Code bị lỗi "NameError: name 'createDataSet' is not defined"

from math import log
import operator

def createDataset():
    dataSet = [[79, 24.7252, 0.818, 9.170, 37.383, yes],
               [89, 25.9909, 0.871, 7.561, 24.685, yes],
               [70, 25.3934, 1.358, 5.347, 40.620, yes],
               [88, 23.2254, 0.714, 7.354, 56.782, yes],
               [85, 24.6097, 0.748, 6.760, 58.358, yes],
               [68, 25.0762, 0.935, 4.939, 67.123, no],
               [70, 19.8839, 1.040, 4.321, 26.399, no],
               [69, 25.0593, 1.002, 4.212, 47.515, no],
               [74, 25.6544, 0.987, 5.605, 26.132, no],
               [79, 19.9594, 0.863, 5.204, 60.267, no],
               [76, 22.5981, 0.889, 4.704, 27.026, yes],
               [76, 26.4236, 0.886, 5.115, 43.256, no],
               [62, 20.3223, 0.889, 5.741, 51.097, yes],
               [69, 19.3698, 0.790, 3.880, 49.678, no],
               [72, 24.2215, 0.988, 5.844, 41.672, no],
               [67, 32.1120, 1.119, 4.160, 60.356, no],
               [74, 25.3934, 1.037, 6.728, 40.225, no],
               [69, 23.8895, 0.893, 4.203, 27.334, no],
               [78, 24.6755, 0.850, 7.347, 28.893, yes],
               [71, 27.1314, 0.790, 4.467, 38.173, no],
               [74, 23.0518, 0.597, 4.835, 35.141, yes],
               [76, 23.4568, 0.889, 5.345, 27.568, yes],
               [75, 23.5457, 0.803, 3.773, 36.726, yes],
               [70, 23.3234, 0.919, 3.672, 40.093, no],
               [69, 22.8625, 0.870, 4.552, 29.627, yes],
               [71, 22.0384, 0.811, 4.286, 30.380, no],
               [80, 24.6914, 0.859, 5.706, 37.529, yes],
               [79, 26.8519, 0.867, 3.563, 43.924, yes],
               [72, 27.1809, 0.717, 3.760, 39.714, no],
               [78, 23.9512, 0.822, 3.453, 27.294, no],
               [80, 28.3874, 1.004, 5.948, 33.376, yes],
               [79, 23.5102, 0.738, 4.193, 65.640, no],
               [67, 19.7232, 0.865, 4.443, 36.252, yes],
               [84, 27.4406, 0.808, 5.482, 33.539, yes],
               [78, 28.6661, 0.955, 8.815, 42.398, no],
               [65, 23.7812, 0.912, 4.704, 39.254, no],
               [70, 23.4493, 0.857, 4.138, 75.947, no],
               [67, 25.5354, 0.855, 3.727, 41.851, no],
               [74, 24.7409, 0.959, 3.967, 42.293, no],
               [73, 22.2291, 1.036, 4.438, 40.222, no],
               [74, 34.4753, 1.092, 7.271, 45.434, no],
               [68, 32.1929, 0.000, 4.269, 50.841, yes],
               [80, 23.3355, 0.759, 4.856, 31.114, no],
               [78, 22.7903, 0.757, 4.831, 73.343, no],
               [79, 24.6097, 0.671, 4.870, 68.924, yes],
               [72, 27.5802, 0.814, 3.021, 27.088, no,]
               [67, 30.1205, 1.101, 7.538, 35.487, yes],
               [70, 25.8166, 0.818, 3.564, 36.001, no],
               [69, 30.4218, 1.088, 3.826, 33.833, no],
               [67, 28.7132, 0.934, 3.996, 56.167, no],
               [74, 34.5429, 0.969, 6.762, 43.099, no],
               [71, 24.6097, 0.794, 4.350, 39.023, no],
               [67, 23.5294, 0.830, 3.176, 36.595, no],
               [67, 25.6173, 1.057, 3.738, 32.550, no],
               [65, 25.3086, 1.160, 3.060, 44.757, no],
               [66, 24,8358, 0.811, 3,263, 26.941, no],
               [69, 22.3094, 0.977, 3,106, 27.951, no],
               [72, 26,5285, 1.063, 6.970, 41.188, no],
               [75, 25.8546, 1.091, 4.798, 36.045, no],
               [70, 20.6790, 0.741, 3.908, 30.198, no],
               [74, 28.3675, 1.045, 4.784, 31.339, no],
               [71, 29.0688, 1,066, 4,527, 24.252, no],
               [65, 23.9995, 0.841, 3.089, 79.910, no],
               [77, 22.9819, 1.015, 4.041, 57.147, no],
               [67, 33.3598, 1.129, 7.239, 67.103, yes],
               [66, 27.1314, 1.030, 4.096, 29.435, no],
               [70, 24.7676, 0,896, 4.352, 44.291, no],
               [70, 24.4193, 1.106, 2.823, 37.348, no]]
    labels = ['age', 'bmi', 'bmd', 'ictp', 'pinp']
    # change to discrete values
    return dataSet, labels

def calcShannonEnt(dataSet):
    numEntries = len(dataSet)
    labelCounts = {}
    for featVec in dataSet:  # the the number of unique elements and their occurance
        currentLabel = featVec[-1]
        if currentLabel not in labelCounts.keys(): labelCounts[currentLabel] = 0
        labelCounts[currentLabel] += 1
    shannonEnt = 0.0
    for key in labelCounts:
        prob = float(labelCounts[key]) / numEntries
        shannonEnt -= prob * log(prob, 2)  # log base 2
    return shannonEnt

def splitDataSet(dataSet, axis, value):
    retDataSet = []
    for featVec in dataSet:
        if featVec[axis] == value:
            reducedFeatVec = featVec[:axis]  # chop out axis used for splitting
            reducedFeatVec.extend(featVec[axis + 1:])
    return retDataSet

def chooseBestFeatureToSplit(dataSet):
    numFeatures = len(dataSet[0]) - 1  # the last column is used for the labels
    baseEntropy = calcShannonEnt(dataSet)
    bestInfoGain = 0.0;
    bestFeature = -1
    for i in range(numFeatures):  # iterate over all the features
        featList = [example[i] for example in dataSet]  # create a list of all the examples of this feature
        uniqueVals = set(featList)  # get a set of unique values
        newEntropy = 0.0
        for value in uniqueVals:
            subDataSet = splitDataSet(dataSet, i, value)
            prob = len(subDataSet) / float(len(dataSet))
            newEntropy += prob * calcShannonEnt(subDataSet)

        infoGain = baseEntropy - newEntropy  # calculate the info gain; ie reduction in entropy
        print("feature : " + str(i))
        print("baseEntropy : "+str(baseEntropy))
        print("newEntropy : " + str(newEntropy))
        print("infoGain : " + str(infoGain))
        if (infoGain > bestInfoGain):  # compare this to the best gain so far
            bestInfoGain = infoGain  # if better than current best, set to best
            bestFeature = i
    return bestFeature  # returns an integer

def majorityCnt(classList):
    classCount = {}
    for vote in classList:
        if vote not in classCount.keys(): classCount[vote] = 0
        classCount[vote] += 1
    sortedClassCount = sorted(classCount.iteritems(), key=operator.itemgetter(1), reverse=True)
    return sortedClassCount[0][0]

def createTree(dataSet, labels):
    # extracting data
    classList = [example[-1] for example in dataSet]
    if classList.count(classList[0]) == len(classList):
        return classList[0]  # stop splitting when all of the classes are equal
    if len(dataSet[0]) == 1:  # stop splitting when there are no more features in dataSet
        return majorityCnt(classList)
    # use Information Gain
    bestFeat = chooseBestFeatureToSplit(dataSet)
    bestFeatLabel = labels[bestFeat]

    #build a tree recursively
    myTree = {bestFeatLabel: {}}
    #print("myTree : "+labels[bestFeat])
    del (labels[bestFeat])
    featValues = [example[bestFeat] for example in dataSet]
    #print("featValues: "+str(featValues))
    uniqueVals = set(featValues)
    #print("uniqueVals: " + str(uniqueVals))
    for value in uniqueVals:
        subLabels = labels[:]  # copy all of labels, so trees don't mess up existing labels
        myTree[bestFeatLabel][value] = createTree(splitDataSet(dataSet, bestFeat, value), subLabels)
        #print("myTree : " + str(myTree))
    return myTree

def classify(inputTree, featLabels, testVec):
    firstStr = inputTree.keys()[0]
    #print("fistStr : "+firstStr)
    secondDict = inputTree[firstStr]
    #print("secondDict : " + str(secondDict))
    featIndex = featLabels.index(firstStr)
    #print("featIndex : " + str(featIndex))
    key = testVec[featIndex]
    #print("key : " + str(key))
    valueOfFeat = secondDict[key]
    #print("valueOfFeat : " + str(valueOfFeat))
    if isinstance(valueOfFeat, dict):
        #print("is instance: "+str(valueOfFeat))
        classLabel = classify(valueOfFeat, featLabels, testVec)
        #print("is Not instance: " + valueOfFeat)
        classLabel = valueOfFeat
    return classLabel

def storeTree(inputTree, filename):
    import pickle
    fw = open(filename, 'w')
    pickle.dump(inputTree, fw)

def grabTree(filename):
    import pickle
    fr = open(filename)
    return pickle.load(fr)

# collect data
myDat, labels = createDataSet()

#build a tree
mytree = createTree(myDat, labels)

#run test
answer = classifi(mytree, ['age', 'bmi', 'bmd', 'ictp', 'pinp'], [79, 24.7252, 0.818, 9.170, 37.383])
print(" Ban "+answer+" bi nguy co gay xuong")

đoạn code của m bị lôi:
Traceback (most recent call last):
  File "C:\Users\phung\Desktop\", line 198, in <module>
    myDat, labels = createDataSet()
NameError: name 'createDataSet' is not defined

M.n xem giùm m với ạ

Chưa có hàm createDataSet() ?
Mà bạn format lại code đi.
Thêm 3 ` ở đầu và cuối code.

cái ‘yes’ và ‘no’ trong hàm createDataset chưa được định nghĩa. Bạn gõ sai về hoa thường cho chữ 'set' trong createDataSet.

Sai tên hàm nhé bạn, chỉnh S hoa thành s thường

