Giúp sửa code bị TypeError

So_Mi_Ao_Phong · October 1, 2022, 6:59am

# -*- coding: utf-8 -*-
"""
Created on Tue May 23 10:27:15 2017

@author: phung
"""
from math import log
import operator

def createDataSet():
    dataSet = [[79, 24.7252, 0.818, 9.170, 37.383, 1],
               [89, 25.9909, 0.871, 7.561, 24.685, 1],
               [70, 25.3934, 1.358, 5.347, 40.620, 1],
               [88, 23.2254, 0.714, 7.354, 56.782, 1],
               [85, 24.6097, 0.748, 6.760, 58.358, 1],
               [68, 25.0762, 0.935, 4.939, 67.123, 0],
               [70, 19.8839, 1.040, 4.321, 26.399, 0],
               [69, 25.0593, 1.002, 4.212, 47.515, 0],
               [74, 25.6544, 0.987, 5.605, 26.132, 0],
               [79, 19.9594, 0.863, 5.204, 60.267, 0],
               [76, 22.5981, 0.889, 4.704, 27.026, 1],
               [76, 26.4236, 0.886, 5.115, 43.256, 0],
               [62, 20.3223, 0.889, 5.741, 51.097, 1],
               [69, 19.3698, 0.790, 3.880, 49.678, 0],
               [72, 24.2215, 0.988, 5.844, 41.672, 0],
               [67, 32.1120, 1.119, 4.160, 60.356, 0],
               [74, 25.3934, 1.037, 6.728, 40.225, 0],
               [69, 23.8895, 0.893, 4.203, 27.334, 0],
               [78, 24.6755, 0.850, 7.347, 28.893, 1],
               [71, 27.1314, 0.790, 4.467, 38.173, 0],
               [74, 23.0518, 0.597, 4.835, 35.141, 1],
               [76, 23.4568, 0.889, 5.345, 27.568, 1],
               [75, 23.5457, 0.803, 3.773, 36.726, 1],
               [70, 23.3234, 0.919, 3.672, 40.093, 0],
               [69, 22.8625, 0.870, 4.552, 29.627, 1],
               [71, 22.0384, 0.811, 4.286, 30.380, 0],
               [80, 24.6914, 0.859, 5.706, 37.529, 1],
               [79, 26.8519, 0.867, 3.563, 43.924, 1],
               [72, 27.1809, 0.717, 3.760, 39.714, 0],
               [78, 23.9512, 0.822, 3.453, 27.294, 0],
               [80, 28.3874, 1.004, 5.948, 33.376, 1],
               [79, 23.5102, 0.738, 4.193, 65.640, 0],
               [67, 19.7232, 0.865, 4.443, 36.252, 1],
               [84, 27.4406, 0.808, 5.482, 33.539, 1],
               [78, 28.6661, 0.955, 8.815, 42.398, 0],
               [65, 23.7812, 0.912, 4.704, 39.254, 0],
               [70, 23.4493, 0.857, 4.138, 75.947, 0],
               [67, 25.5354, 0.855, 3.727, 41.851, 0],
               [74, 24.7409, 0.959, 3.967, 42.293, 0],
               [73, 22.2291, 1.036, 4.438, 40.222, 0],
               [74, 34.4753, 1.092, 7.271, 45.434, 0],
               [68, 32.1929, 0.000, 4.269, 50.841, 1],
               [80, 23.3355, 0.759, 4.856, 31.114, 0],
               [78, 22.7903, 0.757, 4.831, 73.343, 0],
               [79, 24.6097, 0.671, 4.870, 68.924, 1],
               [72, 27.5802, 0.814, 3.021, 27.088, 0],
               [67, 30.1205, 1.101, 7.538, 35.487, 1],
               [70, 25.8166, 0.818, 3.564, 36.001, 0],
               [69, 30.4218, 1.088, 3.826, 33.833, 0],
               [67, 28.7132, 0.934, 3.996, 56.167, 0],
               [74, 34.5429, 0.969, 6.762, 43.099, 0],
               [71, 24.6097, 0.794, 4.350, 39.023, 0],
               [67, 23.5294, 0.830, 3.176, 36.595, 0],
               [67, 25.6173, 1.057, 3.738, 32.550, 0],
               [65, 25.3086, 1.160, 3.060, 44.757, 0],
               [66, 24,8358, 0.811, 3,263, 26.941, 0],
               [69, 22.3094, 0.977, 3,106, 27.951, 0],
               [72, 26,5285, 1.063, 6.970, 41.188, 0],
               [75, 25.8546, 1.091, 4.798, 36.045, 0],
               [70, 20.6790, 0.741, 3.908, 30.198, 0],
               [74, 28.3675, 1.045, 4.784, 31.339, 0],
               [71, 29.0688, 1,066, 4,527, 24.252, 0],
               [65, 23.9995, 0.841, 3.089, 79.910, 0],
               [77, 22.9819, 1.015, 4.041, 57.147, 0],
               [67, 33.3598, 1.129, 7.239, 67.103, 1],
               [66, 27.1314, 1.030, 4.096, 29.435, 0],
               [70, 24.7676, 0,896, 4.352, 44.291, 0],
               [70, 24.4193, 1.106, 2.823, 37.348, 0]]
    labels = ['age', 'bmi', 'bmd', 'ictp', 'pinp']
    # change to discrete values
    return dataSet, labels

def calcShannonEnt(dataSet):
    numEntries = len(dataSet)
    labelCounts = {}
    for featVec in dataSet:  # the the number of unique elements and their occurance
        currentLabel = featVec[-1]
        if currentLabel not in labelCounts.keys(): labelCounts[currentLabel] = 0
        labelCounts[currentLabel] += 1
    shannonEnt = 0.0
    for key in labelCounts:
        prob = float(labelCounts[key]) / numEntries
        shannonEnt -= prob * log(prob, 2)  # log base 2
    return shannonEnt


def splitDataSet(dataSet, axis, value):
    retDataSet = []
    for featVec in dataSet:
        if featVec[axis] == value:
            reducedFeatVec = featVec[:axis]  # chop out axis used for splitting
            reducedFeatVec.extend(featVec[axis + 1:])
            retDataSet.append(reducedFeatVec)
    return retDataSet


def chooseBestFeatureToSplit(dataSet):
    numFeatures = len(dataSet[0]) - 1  # the last column is used for the labels
    baseEntropy = calcShannonEnt(dataSet)
    bestInfoGain = 0.0;
    bestFeature = -1
    for i in range(numFeatures):  # iterate over all the features
        featList = [example[i] for example in dataSet]  # create a list of all the examples of this feature
        uniqueVals = set(featList)  # get a set of unique values
        newEntropy = 0.0
        for value in uniqueVals:
            subDataSet = splitDataSet(dataSet, i, value)
            prob = len(subDataSet) / float(len(dataSet))
            newEntropy += prob * calcShannonEnt(subDataSet)


        infoGain = baseEntropy - newEntropy  # calculate the info gain; ie reduction in entropy
        """
        print("feature : " + i)
        print("baseEntropy : "+baseEntropy)
        print("newEntropy : " + newEntropy)
        print("infoGain : " + infoGain)
        """
        if (infoGain > bestInfoGain):  # compare this to the best gain so far
            bestInfoGain = infoGain  # if better than current best, set to best
            bestFeature = i
    return bestFeature  # returns an integer


def majorityCnt(classList):
    classCount = {}
    for vote in classList:
        if vote not in classCount.keys(): classCount[vote] = 0
        classCount[vote] += 1
    sortedClassCount = sorted(classCount.iteritems(), key=operator.itemgetter(1), reverse=True)
    return sortedClassCount[0][0]


def createTree(dataSet, labels):
    # extracting data
    classList = [example[-1] for example in dataSet]
    if classList.count(classList[0]) == len(classList):
        return classList[0]  # stop splitting when all of the classes are equal
    if len(dataSet[0]) == 1:  # stop splitting when there are no more features in dataSet
        return majorityCnt(classList)
    # use Information Gain
    bestFeat = chooseBestFeatureToSplit(dataSet)
    bestFeatLabel = labels[bestFeat]

    #build a tree recursively
    myTree = {bestFeatLabel: {}}
#print("myTree : "+labels[bestFeat])
    del (labels[bestFeat])
    featValues = [example[bestFeat] for example in dataSet]

    uniqueVals = set(featValues)
   
    for value in uniqueVals:
        subLabels = labels[:]  # copy all of labels, so trees don't mess up existing labels
        
        myTree[bestFeatLabel][value] = createTree(splitDataSet(dataSet, bestFeat, value), subLabels)
        
    return myTree


def classify(inputTree, featLabels, testVec):
    firstStr = inputTree.keys()[0]
    #print("fisrtStr : "+firstStr)
    secondDict = inputTree[first]
    #print("secondDict : " + secondDict)
    featIndex = featLabels.index(firstStr)
    #print("featIndex : " + featIndex)
    key = testVec[featIndex]
    #print("key : " + key)
    valueOfFeat = secondDict[key]
    #print("valueOfFeat : " + valueOfFeat)
    if isinstance(valueOfFeat, dict):
        #print("is instance: "+valueOfFeat)
        classLabel = classify(valueOfFeat, featLabels, testVec)
    else:
        #print("is Not instance: " + valueOfFeat)
        classLabel = valueOfFeat
    return classLabel


def storeTree(inputTree, filename):
    import pickle
    fw = open(filename, 'w')
    pickle.dump(inputTree, fw)
    fw.close()


def grabTree(filename):
    import pickle
    fr = open(filename)
    return pickle.load(fr)

# collect data
myDat, labels = createDataSet()

#build a tree
mytree = createTree(myDat, labels)
print(mytree)



#run test

# test 
answer = classify(mytree, ['age', 'bmi', 'bmd', 'ictp', 'pinp'], [70, 24.4193, 1.106, 2.823, 37.348, 0])
print ("Ban "+ answer + "bi nguy co gay xuong")

bị lỗi(m dùng python 2.7)

Traceback (most recent call last):
  File "C:\Users\phung\Desktop\ve.py", line 217, in <module>
    print ("Hi, the answer is "+ answer + ", it is winter family photo")
TypeError: cannot concatenate 'str' and 'int' objects
>>>

Update: e ms sửa đk lỗi dict r ạ. h nó lại ghi lỗi :m.n giúp e với ạ

File "C:/Users/phung/untitled1.py", line 92, in calcShannonEnt
prob = float(labelCounts.keys) / numEntries

TypeError: float() argument must be a string or a number

noname00 · May 25, 2017, 8:53am

Code bạn up khác phần lỗi?