Project Address:
https://github.com/TheOneAC/ML.git
dataset in ML/ML_ation/tree
### 决策树
- 计算复杂度低,中间值缺失不敏感,可理解不相关数据
- 可能过度匹配(过度分类)
- 适用:数值型和标称型
决策树伪代码createbranch
|
|
递归结束条件:所有属性遍历完,或者数据集属于同一分类
香农熵
|
|
数据及划分与最优选择(熵最小)
|
|
所有标签用尽无法确定类标签时: 多数表决决定子叶分类
|
|
创建树
|
|
测试
|
|
|
|
### 存储与重载12345678910def storeTree(inputTree, filename): import pickle fw = open(filename, 'w') pickle.dump(inputTree,fw) fw.close()def grabTree(filename): import pickle fr = open(filename) return pickle.load(fr)
### test12345678910#!/usr/bin/pythonimport treesmyDat,labels = trees.createDataSet()myTree = trees.createTree(myDat, labels)trees.storeTree(myTree,'classifierStorage.txt')print(trees.grabTree('classifierStorage.txt'))
图形化显示树结构
|
|
创建节点
|
|
python command line run command as this
12 import treeplottertreePlotter.createPlot()
- result like this1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374def getNumLeafs(myTree):numLeafs = 0firstStr = myTree.keys()[0]secondDict = myTree[firstStr]for key in secondDict.keys():if type(secondDict[key]).__name__ == 'dict':numLeafs += getNumleafs(secondDict[key])else: numLeafs +=1return numLeafsdef getTreeDepth(myTree):maxDepth = 0firstStr = myTree.keys()[0]secondDict = myTree[firstStr]for key in secondDict.keys():if type(secondDict[key]).__name__ == 'dict':thisDepth = 1+ getTreeDepth(secondDict[key])else:thisDepth = 1if thisDepth > maxDepth: maxDepth = thisDepthreturn maxDepthdef retrieveTree(i):listOfTrees =[{'no surfacing': {0: 'no', 1: {'flippers': \{0: 'no', 1: 'yes'}}}},{'no surfacing': {0: 'no', 1: {'flippers': \{0: {'head': {0: 'no', 1: 'yes'}}, 1: 'no'}}}}]return listOfTrees[i]def plotMidText(cntrPt, parentPt, txtString):xMid = (parentPt[0]-cntrPt[0])/2.0 + cntrPt[0]yMid = (parentPt[1]-cntrPt[1])/2.0 + cntrPt[1]createPlot.ax1.text(xMid, yMid, txtString)def plotTree(myTree, parentPt, nodeTxt):numLeafs = getNumLeafs(myTree)depth = getTreeDepth(myTree)firstStr = myTree.keys()[0]cntrPt = (plotTree.xOff + (1.0 + float(numLeafs))/2.0/plotTree.totalW,\plotTree.yOff)plotMidText(cntrPt, parentPt, nodeTxt)plotNode(firstStr, cntrPt, parentPt, decisionNode)secondDict = myTree[firstStr]plotTree.yOff = plotTree.yOff - 1.0/plotTree.totalDfor key in secondDict.keys():if type(secondDict[key]).__name__=='dict':plotTree(secondDict[key],cntrPt,str(key))else:plotTree.xOff = plotTree.xOff + 1.0/plotTree.totalWplotNode(secondDict[key], (plotTree.xOff, plotTree.yOff),cntrPt, leafNode)plotMidText((plotTree.xOff, plotTree.yOff), cntrPt, str(key))plotTree.yOff = plotTree.yOff + 1.0/plotTree.totalDdef createPlot(inTree):fig = plt.figure(1, facecolor='white')fig.clf()axprops = dict(xticks=[], yticks=[])createPlot.ax1 = plt.subplot(111, frameon=False, **axprops)plotTree.totalW = float(getNumLeafs(inTree))plotTree.totalD = float(getTreeDepth(inTree))plotTree.xOff = -0.5/plotTree.totalW; plotTree.yOff = 1.0;plotTree(inTree, (0.5,1.0), '')plt.show()
扩展测试 lens.py
|
|
|
|