import numpy as np
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import PolynomialFeatures
from sklearn.pipeline import Pipeline
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.model_selection import train_test_split
2. 生成模拟数据
生成具有非线性关系的数据集:
# 生成随机数据
np.random.seed(42)
x = np.random.uniform(-10, 10, size=200)
X = x.reshape(-1, 1)
y = 0.1 * x**3 + 0.1 * x**2 + x + 2 + np.random.normal(-8, 8, size=200)
# 划分训练集和测试集
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
获取权重(coef_
)和偏置(intercept_
):
# 输出模型参数
print("模型对象:", pr.named_steps['lin_reg']) # :ml-citation{ref="2,4" data="citationList"}
print("权重 (coef_):", pr.named_steps['lin_reg'].coef_)
print("偏置 (intercept_):", pr.named_steps['lin_reg'].intercept_)
计算预测值及评估指标(MSE、R²):
# 预测
y_train_pred = pr.predict(X_train)
y_test_pred = pr.predict(X_test)
# 评估指标
mse_train = mean_squared_error(y_train, y_train_pred)
r2_train = r2_score(y_train, y_train_pred)
mse_test = mean_squared_error(y_test, y_test_pred)
r2_test = r2_score(y_test, y_test_pred)
print("\n训练集 MSE:", mse_train)
print("训练集 R²:", r2_train)
print("测试集 MSE:", mse_test)
print("测试集 R²:", r2_test)
展示原始数据与模型预测曲线:
# 生成平滑曲线用于可视化
x_plot = np.linspace(-10, 10, 100).reshape(-1, 1)
y_plot = pr.predict(x_plot)
# 绘制结果
plt.figure(figsize=(10, 6))
plt.scatter(X_train, y_train, color='blue', label='训练集')
plt.scatter(X_test, y_test, color='green', label='测试集')
plt.plot(x_plot, y_plot, color='red', linewidth=2, label='三次多项式拟合')
plt.title("多项式回归拟合效果")
plt.xlabel("特征值")
plt.ylabel("目标值")
plt.legend()
plt.show()
权重与偏置
coef_
:对应多项式特征(如 x3,x2,xx3,x2,x)的系数26。intercept_
:模型的偏置项(常数项)46。评估指标
可视化