kNN

#k_Nearest Neighbor
from numpy import *
import operator
def createDataSet():
    group = array([[1.0,1.1],[1.0,1.0],[0,0],[0,0.1]])
    labels = ['A','A','B','B']
    return group,labels
def classify0(inX,dataSet,labels, k):
    dataSetSize = dataSet.shape[0]  ## 读取矩阵的行数,作为数据集的个数

    diffMat = tile(inX, (dataSetSize, 1)) - dataSet  #tile函数,将输入的inX重复为一个和dataset维度大小相同的矩阵

    sqDiffMat = diffMat**2
    sqDistances = sqDiffMat.sum(axis=1)
    distances = sqDistances**0.5
    sortedDistIndicies = distances.argsort()  #argsort() 将distances的元素从小到大排列 返回其对应的索引
    classCount = {}
    for i in range(k):  ##返回距离排名第i的标签
        voteIlabel = labels[sortedDistIndicies[i]]
        classCount[voteIlabel] = classCount.get(voteIlabel,0) + 1 ##得到索引

    sortedClassCount = sorted(classCount.iteritems(),key = operator.itemgetter(1,reverse=True))
    return sortedClassCount[0][0]



你可能感兴趣的:(kNN)