2019-12-01

利用python 实现多元线性回归#-------机器学习--------

1、简单一元线性回归

import numpy as np

import matplotlib.pyplot as plt

x = np.array([1.,2.,3.,4.,5.])

y = np.array([1.,3.,2.,3.,5,])

plt.scatter(x,y)

plt.axis([0,6,0,6])

plt.show()

x_mean = np.mean(x)#计算均值

y_mean = np.mean(y)

num = 0.0

d = 0.0

#zip函数打包成[(x_i,y_i)...]

for x_i,y_i in zip(x,y): 

    num = num + (x_i - x_mean) * (y_i - y_mean)

    d = d + (x_i - x_mean) ** 2

    a = num / d

    b = y_mean - a * x_mean

   y_hat = a * x + b

plt.scatter(x,y)    # 绘制散点图

plt.plot(x,y_hat,color='r')    # 绘制直线

plt.axis([0,6,0,6])

plt.show()

#预测

x_predict = 6

y_predict = a * x_predict + b

print(y_predict)

2多元线性回归


import numpy as np

from .metrics import r2_score

class LinearRegression:

  def __init__(self):     

      """初始化Linear Regression模型"""       

  self.coef_ = None    # 系数(theta0~1 向量)   

  self.interception_ = None  # 截距(theta0 数)   

  self._theta = None  # 整体计算出的向量theta

  def fit_normal(self, X_train, y_train):

"""根据训练数据X_train,y_train训练Linear Regression模型"""

assert X_train.shape[0] == y_train.shape[0], \

"the size of X_train must be equal to the size of y_train"

# 正规化方程求解

X_b = np.hstack([np.ones((len(X_train), 1)), X_train])

self._theta = np.linalg.inv(X_b.T.dot(X_b)).dot(X_b.T).dot(y_train)

self.interception_ = self._theta[0]

self.coef_ = self._theta[1:]

return self

  def predict(self, X_predict):

"""给定待预测的数据集X_predict,返回表示X_predict的结果向量"""

assert self.interception_ is not None and self.coef_ is not None, \

  "must fit before predict"

assert X_predict.shape[1] == len(self.coef_), \

"the feature number of X_predict must be equal to X_train"

X_b = np.hstack([np.ones((len(X_predict), 1)), X_predict])

y_predict = X_b.dot(self._theta)

return y_predict

def score(self, X_test, y_test):

"""很倔测试机X_test和y_test确定当前模型的准确率"""

y_predict = self.predict(self, X_test)

    return r2_score(y_test, y_predict)

def __repr__(self):

return "LinearRegression()"

import numpy as np

import matplotlib.pyplot as plt

from sklearn import datasets

boston = datasets.load_boston()

X = boston.data

y = boston.target

X = X[y<50.0]

y = y[y<50.0]

X.shape

y.shape

from myAlgorithm.model_selection import train_test_split

from myAlgorithm.LinearRegression import LinearRegression

X_train, X_test, y_train, y_test = train_test_split(X, y, seed = 666)

reg = LinearRegression()

reg.fit_normal(X_train, y_train)

reg.coef_

reg.score(X_test, y_test)

总结

    线性回归模型有着比较清晰的数据推导过程,也是其他复杂模型的基础。线性回归算法是典型的参数学习。虽然线性回归只能解决回归问题,但是却是很多分类问题,如逻辑回归的基础。并且线性回归算法是假设数据是有一定的线性关系的,且线性关系越强,效果越好。

你可能感兴趣的:(2019-12-01)