新建一个matrics.py,将计算预测准确度封装成一个函数get_accuracy:
import numpy as np
def get_accuracy(y_test, y_predict):
"""
计算预测值的准确度
:param y_test: 分类结果的目标值
:param y_predict: 算法预测的结果向量
:return: 准确度比例
"""
assert y_test.shape[0] == y_predict.shape[0], "预测值和目标值的大小必须相等"
return sum(y_predict == y_test) / len(y_test)
在自己的分类器中添加一个计算分数的方法score:
class KNNClassifier:
# 初始化KNN分类器
def __init__(self, k):
assert k >= 1, "k必须为合法值"
self.k = k
# 以_开头代表私有变量,外界不能访问
self._X_train = None
self._y_train = None
def fit(self, X_train, y_train):
"""
根据训练集训练分类器
:param X_train: 用户传入的训练集特征值
:param y_train: 用户传入的训练集目标值
:return: self自身对象
"""
assert X_train.shape[0] == y_train.shape[0], \
"训练集X必须和y的大小一致"
assert self.k <= X_train.shape[0], \
"训练集X必须至少k个样本"
self._X_train = X_train
self._y_train = y_train
return self
def predict(self, X_predict):
"""
预测函数
:param X_predict: 待预测数据集
:return: 对单个向量预测结果的数组
"""
assert self._X_train is not None and self._y_train is not None, \
"在预测前必须先训练"
assert X_predict.shape[1] == self._X_train.shape[1], \
"预测数据的特征数必须和训练集X的一致"
y_predict = [self._predict(x) for x in X_predict]
return np.array(y_predict)
def _predict(self, x):
"""
对一个向量进行预测
:param x: 需要预测的单个向量
:return:
"""
assert x.shape[0] == self._X_train.shape[1], \
"预测数据的特征数必须和训练集X的一致"
distances = [((np.sum((x_train - x) ** 2)) ** 0.5) for x_train in self._X_train]
nearest = np.argsort(distances)
top_K = [self._y_train[i] for i in nearest[:self.k]]
votes = Counter(top_K)
return votes.most_common(1)[0][0]
def score(self, X_test, y_test):
y_predict = self.predict(X_test)
return get_accuracy(y_test, y_predict)
则可以通过以下两种方法计算准确度:
get_accuracy(y_test, y_predict)
my_knn_clf.score(X_test, y_test)
完整代码如下:
from sklearn import datasets
from model_selection import train_test_split # 使用自己的数据集划分函数
from knn import KNNClassifier # 使用自己的KNN分类器
from matrics import get_accuracy # 使用自己的计算精确度函数
if __name__ == '__main__':
digits = datasets.load_digits()
X = digits.data
y = digits.target
# 使用自己的数据集切分函数进行划分数据集
X_train, X_test, y_train, y_test = train_test_split(X, y, test_radio=0.2)
# 使用自己的分类器进行预测
my_knn_clf = KNNClassifier(k=3)
my_knn_clf.fit(X_train, y_train)
y_predict = my_knn_clf.predict(X_test)
print('预测的准确度:', get_accuracy(y_test, y_predict))
print('预测的准确度:', my_knn_clf.score(X_test, y_test))
运行结果:
预测的准确度: 0.9832869080779945
预测的准确度: 0.9832869080779945
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score
if __name__ == '__main__':
digits = datasets.load_digits()
X = digits.data
y = digits.target
# 使用sklearn的切分函数进行划分数据集
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=666)
# 使用sklearn的分类器进行预测
knn_clf = KNeighborsClassifier(n_neighbors=3)
knn_clf.fit(X_train, y_train)
y_predict = knn_clf.predict(X_test)
print(accuracy_score(y_test, y_predict))
运行结果:
0.9888888888888889