# inX: 用于分類的輸入向量# dataSet:輸入的訓(xùn)練集# labels:標(biāo)簽向量# k:選擇近鄰項(xiàng)目的個(gè)數(shù)def classify0(inX, dataSet, labels, k) : dataSetSize = dataSet.shape[0] # 距離計(jì)算 diffMat = tile(inX, (dataSetSize, 1)) - dataSet sqDiffMat = diffMat ** 2 # python中, **2 代表2平方,**0.5代表開方 sqDistances = sqDiffMat.sum(axis=1) # 加入axis=1以后就是將一個(gè)矩陣的每一行向量相加 distances = sqDistances ** 0.5 sortedDistIndicies = distances.argsort() classCount = {} # 選擇距離最小的k個(gè)點(diǎn) for i in range(k) : voteILabel = labels[sortedDistIndicies[i]] classCount[voteILabel] = classCount.get(voteILabel, 0) + 1 # 排序 sortedClassCount = sorted(classCount.iteritems(), key = operator.itemgetter(1), reverse=True) return sortedClassCount[0][0]
然后我們進(jìn)行測(cè)試,重新打開python編譯環(huán)境:
>>> import kNN>>> group, labels = kNN.createDataSet()>>> kNN.classify0([0, 0], group, labels, 3)'B'>>> kNN.classify0([0.3, 0], group, labels, 3)'B'>>> kNN.classify0([0.8, 0.9], group, labels, 3)'A'
我們看到,一個(gè)簡(jiǎn)單的分類器就這樣搞定了。這時(shí),我們來(lái)將電影數(shù)據(jù)進(jìn)行樣本寫入:
def createDataSet(): group = array([ [3, 104], [2, 100], [1, 81], [101, 10], [99, 5], [98, 2] ]) labels = ["love", "love", "love", "action", "action",