How to make a decision tree (ID3)
# core code, recur to generate a decision in which based on recursion knowledge
def createTree(dataSet, labels)
classList = dataset[:,-1]
if classList.count(classList[0]) == len(classList):
return classList[0] # return the category label
if len(dataSet[0] == 1) # only one feature remain
return majorityCnt(classList) # return the most percentage of categories
bestFeat = chooseBestFeatureToSplit(dataSet)
bestFeatLabel = labels[bestFeat]
myTree = {bestFeatLabel:{}}
del(labels[bestFeat])
featValue = dataSet[bestFeat]
uniqueVals = set(featValue)
for value in uniquesVals: # considere the making process of binary tree in data structure
myTree[bestFeatLabel][value] = createTree(splitDataSet(dataSet, bestFeat, value), labels)
return myTree
classification function
def classify(inputTree, featLabels, testVec):
# obtain first node key of tree
firstStr = next(iter(inputTree))
# value of the corresponding node's key
secondDict = inputTree[firstStr]
featIndex = featLabels.index(firstStr)
for key in secondDict.keys():
if testVec[featIndex] == key:
if type(secondDict[key]).__name__ == 'dict':
classLabel = classify(secondDict[key], featLabels, testVec)
else:
classLabel = secondDict[key]
return classLabel