k-NN回顾
k-NN基本实现
import numpy as npimport pandas as pd
## 读取数据root = '../cifar-10-batches-py/'# a = pd.read_pickle(path=root+'data_batch_1')# print(a.keys())Xtr = np.zeros((50000, 32*32*3))ytr = np.zeros((50000, 1))for i in range(1, 6): Xtr[(i-1)*10000:i*10000, :] = pd.read_pickle(path=root+'data_batch_'+str(i))['data'] ytr[(i-1)*10000:i*10000] = np.reshape(pd.read_pickle(path=root+'data_batch_'+str(i))['labels'],(10000,1))Xte = pd.read_pickle(path=root+'test_batch')['data']yte = np.reshape(pd.read_pickle(path=root+'test_batch')['labels'],(10000,1))
## 定义KNNclass kNN(object): def __init__(self,X,y): self.Xtr = X self.ytr = y def forward(self, Xte, K): """ Xte:测试集输入 K:kNN的k """ Ypred = np.zeros((Xte.shape[0], K),dtype = self.ytr.dtype) for i in range(Xte.shape[0]): distances = np.sum(np.abs(self.Xtr - Xte[i,:]), axis = 1) # 计算训练集里面每一幅图片和测试图片的距离 min_indexs = np.argsort(distances)[0:K]# print(min_indexs) ## 从min_indexs中获取重复次数最多的一个类 min_lables = [self.ytr[i][0] for i in min_indexs] # ytr[i][0] 是因为type(ytr[i]) => numpy.ndarray# print(np.array(min_lables)) Ypred[i] = np.argmax(np.bincount(min_lables))# print(i) return Ypred
nn = kNN(Xtr,ytr)N = 10Yte_predict = nn.forward(Xte[0:N,:],5) #取100个数据验证print('ACC:%f'%(np.mean(Yte_predict==yte[0:N])))
ACC:0.500000
!jupyter nbconvert --to markdown k-NN.ipynb
[NbConvertApp] Converting notebook k-NN.ipynb to markdown[NbConvertApp] Support files will be in k-NN_files\[NbConvertApp] Making directory k-NN_files[NbConvertApp] Writing 4017 bytes to k-NN.md