运行环境:win10 64位 py 3.6 pycharm 2018.1.1
import numpy as np
import matplotlib.pyplot as plt
from sklearn import neighbors,datasets,cross_validation
def load_classification_data():
digits = datasets.load_digits()
X_train = digits.data
y_train = digits.target
return cross_validation.train_test_split(X_train,y_train,test_size=0.25,random_state=0,stratify=y_train)
def create_regression_data(n):
X = 5 * np.random.rand(n,1)
y = np.sin(X).ravel()
y[::5] += 1 * (0.5 - np.random.rand(int(n/5)))
return cross_validation.train_test_split(X,y,test_size=0.25,random_state=0)
def test_KNeighborsClassifier(*data):
X_train, X_test, y_train, y_test = data
clf = neighbors.KNeighborsClassifier()
clf.fit(X_train,y_train)
print('Training Score:%f'%clf.score(X_train,y_train))
print('Testing score:%f'%clf.score(X_test,y_test))
X_train, X_test, y_train, y_test = load_classification_data()
test_KNeighborsClassifier(X_train, X_test, y_train, y_test)

def test_KNeighborsClassifier_k_p(*data):
X_train, X_test, y_train, y_test = data
Ks = np.linspace(1,y_train.size,endpoint=False,dtype='int')
Ps = [1,2,10]
fig = plt.figure()
ax = fig.add_subplot(1,1,1)
for p in Ps:
training_scores = []
testing_scores = []
for k in Ks:
clf = neighbors.KNeighborsClassifier(p=p,n_neighbors=k)
clf.fit(X_train,y_train)
testing_scores.append(clf.score(X_test,y_test))
training_scores.append(clf.score(X_train,y_train))
ax.plot(Ks,testing_scores,label='testing score:p=%d:'%p)
ax.plot(Ks,training_scores,label='training score:p=%d:'%p)
ax.legend(loc='best')
ax.set_xlabel('K')
ax.set_ylabel('score')
ax.set_ylim(0,1.05)
ax.set_title('KNeighborsClassifiter')
plt.show()
X_train, X_test, y_train, y_test = load_classification_data()
test_KNeighborsClassifier_k_p(X_train, X_test, y_train, y_test)

def test_KNeighborsRegressor(*data):
X_train, X_test, y_train, y_test = data
regr = neighbors.KNeighborsRegressor()
regr.fit(X_train,y_train)
print('Training Score:%f' % regr.score(X_train, y_train))
print('Testing score:%f'%regr.score(X_test,y_test))
X_train, X_test, y_train, y_test = create_regression_data(1000)
test_KNeighborsRegressor(X_train, X_test, y_train, y_test)
def test_KNeighborsRegressor_k_w(*data):
X_train, X_test, y_train, y_test = data
Ks = np.linspace(1,y_train.size,num=100,endpoint=False,dtype='int')
weights = ['uniform','distance']
fig = plt.figure()
ax = fig.add_subplot(1,1,1)
for weight in weights:
training_scores = []
testing_scores = []
for k in Ks:
regr = neighbors.KNeighborsRegressor(weights=weight,n_neighbors=k)
regr.fit(X_train,y_train)
testing_scores.append(regr.score(X_test,y_test))
training_scores.append(regr.score(X_train,y_train))
ax.plot(Ks,testing_scores,label='testing score:weight=%s:'%weight)
ax.plot(Ks,training_scores,label='traing score:weight=%s:'%weight)
ax.legend(loc='best')
ax.set_xlabel('K')
ax.set_ylabel('score')
ax.set_ylim(0,1.05)
ax.set_title('KNeighborsRegressor')
plt.show()
X_train, X_test, y_train, y_test = create_regression_data(1000)
test_KNeighborsRegressor_k_w(X_train, X_test, y_train, y_test)

def test_KNeighborsRegressor_k_p(*data):
X_train, X_test, y_train, y_test = data
Ks = np.linspace(1,y_train.size,endpoint=False,dtype='int')
Ps = [1,2,10]
fig = plt.figure()
ax = fig.add_subplot(1,1,1)
for p in Ps:
training_scores = []
testing_scores = []
for k in Ks:
regr = neighbors.KNeighborsRegressor(p=p,n_neighbors=k)
regr.fit(X_train,y_train)
testing_scores.append(regr.score(X_test,y_test))
training_scores.append(regr.score(X_train,y_train))
ax.plot(Ks,testing_scores,label='testing score:p=%d'%p)
ax.plot(Ks,training_scores,label='traing score:p=%d'%p)
ax.legend(loc='best')
ax.set_xlabel('K')
ax.set_ylabel('score')
ax.set_ylim(0,1.05)
ax.set_title('KNeighborsRegressor')
plt.show()
X_train, X_test, y_train, y_test = create_regression_data(1000)
test_KNeighborsRegressor_k_p(X_train, X_test, y_train, y_test)
