tensorflow2.0,通过sklearn实现神经网络预测模型的参数搜索

这里通过将模型封装到sklearn,只实现了预测模型的参数搜索,该方法详情以及分类模型参考tensorflow官方文档,文章只贴出代码供笔者复习
数据集为sklearn提供的加利福利亚房价预测数据集

import matplotlib.pyplot as plt
from tensorflow import keras
import tensorflow as tf
import matplotlib as mpl
import pandas as pd
import numpy as np
import sklearn
import time
import sys
import os

# 加载数据
from sklearn.datasets import fetch_california_housing
housing = fetch_california_housing()
print(housing.data.shape)   # (20640, 8)
print(housing.target.shape) # (20640, )

# 切分数据集
from sklearn.model_selection import train_test_split
x_train_all, x_test, y_train_all, y_test = train_test_split(housing.data, housing.target, random_state = 7)
x_train, x_valid, y_train, y_valid = train_test_split(x_train_all, y_train_all, random_state = 11)
print(x_train.shape, y_train.shape) # (11610, 8) (11610,)
print(x_valid.shape, y_valid.shape) # (3870, 8) (3870,)
print(x_test.shape, y_test.shape)   # (5160, 8) (5160,)

# 数据归一化 x = (x - u) / d
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
x_train_scaled = scaler.fit_transform(x_train)
x_valid_scaled = scaler.transform(x_valid)
x_test_scaled = scaler.transform(x_test)

# 建模
# RandomizedSearchCV
# 1. 转化为sklearn的model
# 2. 定义参数集合
# 3. 搜索参数

def build_model(hidden_layers = 1,layer_size = 30,learning_rate = 3e-3):
    model = keras.models.Sequential()
    model.add(keras.layers.Dense(layer_size, activation='relu',input_shape=x_train.shape[1:]))
    for _ in range(hidden_layers - 1):
        model.add(keras.layers.Dense(layer_size,activation = 'relu'))
    model.add(keras.layers.Dense(1))
    optimizer = keras.optimizers.SGD(learning_rate)
    model.compile(loss = 'mse', optimizer = optimizer)
    return model

sklearn_model = keras.wrappers.scikit_learn.KerasRegressor(build_fn = build_model)
sklearn_model.summary()
callbacks = [keras.callbacks.EarlyStopping(patience=5, min_delta=1e-2)]
history = sklearn_model.fit(x_train_scaled, y_train,
                            epochs = 10,
                            validation_data = (x_valid_scaled, y_valid),
                            callbacks = callbacks)

# 超参数搜索
from scipy.stats import reciprocal
# f(x) = 1/(x*log(b/a)) a <= x <= b
param_distribution = {
    "hidden_layers":[1, 2, 3, 4],
    "layer_size": np.arange(1, 100),
    "learning_rate": reciprocal(1e-4, 1e-2),}

from sklearn.model_selection import RandomizedSearchCV
random_search_cv = RandomizedSearchCV(sklearn_model,
                                      param_distribution,
                                      n_iter = 10,
                                      cv = 3,
                                      n_jobs = 1)
random_search_cv.fit(x_train_scaled, y_train, epochs = 100,
                     validation_data = (x_valid_scaled, y_valid),
                     callbacks = callbacks)
# cross_validation: 训练集分成n份,n-1训练,最后一份验证.cv参数来控制
print(random_search_cv.best_params_)    # 返回最佳参数组合
print(random_search_cv.best_score_)     # 返回最佳评分 sklearn中回归问题默认mse
print(random_search_cv.best_estimator_) # 返回最佳模型

# 得到最佳模型并预测
model = random_search_cv.best_estimator_.model
model.evaluate(x_test_scaled, y_test)

你可能感兴趣的:(tensorflow)