由于要将所有需要优化的参数写到一个列表中,所以再此文件中需要定义两个函数,分别是创建LSTM层函数 create_lstm(inputs, units, return_sequences) 和创建全连接层(包括BN层和dropout层)函数 create_dense(inputs, units) 。
输入:
输出:
# 定义LSTM层函数
def create_lstm(inputs, units, return_sequences):
lstm = layers.Bidirectional(layers.LSTM(units, return_sequences=return_sequences))(inputs)
print('Lstm', lstm.shape)
return lstm
输入:
输出:
# 定义Dense层函数
def create_dense(inputs, units):
dense = layers.Dense(units, kernel_regularizer=keras.regularizers.l2(0.001), activation='relu')(inputs)
print('Dense', dense.shape)
dense_dropout = layers.Dropout(0.2)(dense)
dense_batch = layers.BatchNormalization()(dense_dropout)
return dense, dense_dropout, dense_batch
设置LSTM层参数的时候,只有最后一层只保留最后一步的输出,其他的都是全部保留。
# 设置LSTM层参数
lstm_num_layers = 2
lstm_units = [128, 128]
lstm_name = list(np.zeros((lstm_num_layers,)))
# 设置LSTM_Dense层参数
lstm_dense_num_layers = 2
lstm_dense_units = [128, 64]
lstm_dense_name = list(np.zeros((lstm_dense_num_layers,)))
lstm_dense_dropout_name = list(np.zeros((lstm_dense_num_layers,)))
lstm_dense_batch_name = list(np.zeros((lstm_dense_num_layers,)))
按照介绍函数时的解释构建网络模型。
inputs_lstm = layers.Input(shape=(x_train.shape[1], x_train.shape[2]))
print(inputs_lstm.shape)
for i in range(lstm_num_layers):
if i == 0:
inputs = inputs_lstm
else:
inputs = lstm_name[i-1]
if i == lstm_num_layers - 1:
return_sequences=False
else:
return_sequences=True
lstm_name[i] = create_lstm(inputs, lstm_units[i], return_sequences)
for i in range(lstm_dense_num_layers):
if i == 0:
inputs = lstm_name[lstm_num_layers-1]
else:
inputs = lstm_dense_batch_name[i-1]
lstm_dense_name[i], lstm_dense_dropout_name[i], lstm_dense_batch_name[i] = create_dense(inputs, lstm_dense_units[i])
outputs_lstm = layers.Dense(10, activation='softmax')(lstm_dense_batch_name[lstm_dense_num_layers-1])
print('Outputs:', outputs_lstm.shape)
以上没有用到列表num,而是直接将层数设为2,神经元数量也直接给出,目的是为了方便讲解,下面给出完整代码:
import tensorflow as tf
import tensorflow.keras as keras
from tensorflow.keras import models, layers, optimizers
import matplotlib.pyplot as plt
import numpy as np
# 定义LSTM层函数
def create_lstm(inputs, units, return_sequences):
lstm = layers.Bidirectional(layers.LSTM(units, return_sequences=return_sequences))(inputs)
return lstm
# 定义Dense层函数
def create_dense(inputs, units):
dense = layers.Dense(units, kernel_regularizer=keras.regularizers.l2(0.001), activation='relu')(inputs)
dense_dropout = layers.Dropout(0.2)(dense)
dense_batch = layers.BatchNormalization()(dense_dropout)
return dense, dense_dropout, dense_batch
def load():
# Mnist数据集加载
(x_train, y_train), (x_test, y_test) = keras.datasets.mnist.load_data()
# Mnist数据集简单归一化
x_train, x_test = x_train / 255.0, x_test / 255.0
print('Load finished!')
return x_train, y_train, x_test, y_test
def classify(x_train, y_train, x_test, y_test, num):
# 设置LSTM层参数
lstm_num_layers = num[0]
lstm_units = num[2: 2 + lstm_num_layers]
lstm_name = list(np.zeros((lstm_num_layers,)))
# 设置LSTM_Dense层参数
lstm_dense_num_layers = num[1]
lstm_dense_units = num[2 + lstm_num_layers: 2 + lstm_num_layers + lstm_dense_num_layers]
lstm_dense_name = list(np.zeros((lstm_dense_num_layers,)))
lstm_dense_dropout_name = list(np.zeros((lstm_dense_num_layers,)))
lstm_dense_batch_name = list(np.zeros((lstm_dense_num_layers,)))
inputs_lstm = layers.Input(shape=(x_train.shape[1], x_train.shape[2]))
for i in range(lstm_num_layers):
if i == 0:
inputs = inputs_lstm
else:
inputs = lstm_name[i - 1]
if i == lstm_num_layers - 1:
return_sequences = False
else:
return_sequences = True
lstm_name[i] = create_lstm(inputs, lstm_units[i], return_sequences)
for i in range(lstm_dense_num_layers):
if i == 0:
inputs = lstm_name[lstm_num_layers - 1]
else:
inputs = lstm_dense_batch_name[i - 1]
lstm_dense_name[i], lstm_dense_dropout_name[i], lstm_dense_batch_name[i] = create_dense(inputs,
lstm_dense_units[i])
outputs_lstm = layers.Dense(10, activation='softmax')(lstm_dense_batch_name[lstm_dense_num_layers - 1])
LSTM_model = keras.Model(inputs_lstm, outputs_lstm)
LSTM_model.compile(optimizer=keras.optimizers.Adam(),
loss='sparse_categorical_crossentropy',
metrics=['accuracy'])
history = LSTM_model.fit(x_train, y_train, batch_size=32, epochs=1, validation_split=0.1, verbose=0)
print('LSTM finished!')
# 验证模型:
results = LSTM_model.evaluate(x_test, y_test, verbose=0)
return results[1]
列表num中的前两个元素分别表示LSTM层的层数和全连接层的层数,后面的元素表示每层的神经元个数。
返回的值为测试集的准确率。
常规的遗传算法介绍可以参考我的另一篇文章遗传算法求解最大值问题详解(附python代码)。
在优化卷积神经网络这个问题上,用常规的遗传算法不易实现,原因如下:
import numpy as np
import deep_learning as project
import os
os.environ["TF_CPP_MIN_LOG_LEVEL"]='2'
DNA_SIZE = 2
DNA_SIZE_MAX = 8
POP_SIZE = 20
CROSS_RATE = 0.5
MUTATION_RATE = 0.01
N_GENERATIONS = 40
train_x, train_y, test_x, test_y = project.load()
def get_fitness(x):
return project.classify(train_x, train_y, test_x, test_y, num=x)
def select(pop, fitness):
idx = np.random.choice(np.arange(POP_SIZE), size=POP_SIZE, replace=True, p=fitness / fitness.sum())
return pop[idx]
def crossover(parent, pop):
if np.random.rand() < CROSS_RATE:
i_ = np.random.randint(0, POP_SIZE, size=1)
cross_points = np.random.randint(0, 2, size=DNA_SIZE_MAX).astype(np.bool)
for i, point in enumerate(cross_points):
if point == True and pop[i_, i]*parent[i] == 0:
cross_points[i] = False
if point == True and i < 2:
cross_points[i] = False
parent[cross_points] = pop[i_, cross_points]
return parent
def mutate(child):
for point in range(DNA_SIZE_MAX):
if np.random.rand() < MUTATION_RATE:
if point >= 3:
if child[point] != 0:
child[point] = np.random.randint(32, 257)
return child
pop_layers = np.zeros((POP_SIZE, DNA_SIZE), np.int32)
pop_layers[:, 0] = np.random.randint(1, 4, size=(POP_SIZE,))
pop_layers[:, 1] = np.random.randint(1, 4, size=(POP_SIZE,))
pop = np.zeros((POP_SIZE, DNA_SIZE_MAX))
for i in range(POP_SIZE):
pop_neurons = np.random.randint(32, 257, size=(pop_layers[i].sum(),))
pop_stack = np.hstack((pop_layers[i], pop_neurons))
for j, gene in enumerate(pop_stack):
pop[i][j] = gene
for each_generation in range(N_GENERATIONS):
fitness = np.zeros([POP_SIZE, ])
for i in range(POP_SIZE):
pop_list = list(pop[i])
for j, each in enumerate(pop_list):
if each == 0.0:
index = j
pop_list = pop_list[:j]
for k, each in enumerate(pop_list):
each_int = int(each)
pop_list[k] = each_int
fitness[i] = get_fitness(pop_list)
print('第%d代第%d个染色体的适应度为%f' % (each_generation+1, i+1, fitness[i]))
print('此染色体为:', pop_list)
print("Generation:", each_generation+1, "Most fitted DNA: ", pop[np.argmax(fitness), :], "适应度为:", fitness[np.argmax(fitness)])
pop = select(pop, fitness)
pop_copy = pop.copy()
for parent in pop:
child = crossover(parent, pop_copy)
child = mutate(child)
parent = child
其中,如下代码的作用是将数组中的0元素删除掉,具体实现过程可以参考我的另一篇文章删掉nd array数组中的所有零元素。
for each_generation in range(N_GENERATIONS):
fitness = np.zeros([POP_SIZE, ])
for i in range(POP_SIZE):
pop_list = list(pop[i])
for j, each in enumerate(pop_list):
if each == 0.0:
index = j
pop_list = pop_list[:j]
for k, each in enumerate(pop_list):
each_int = int(each)
pop_list[k] = each_int