这次我们来讨论CNN——卷积神经网络
CNN广泛应用在图像识别等领域
之所以CNN能识别图像,是因为图片具有以下三个方面的特点。这三个特点被CNN的卷积层和池化层所考虑
以下是CNN的模型定义
class CNN(tf.keras.Model):
def __init__(self):
super().__init__()
self.conv1 = tf.keras.layers.Conv2D(filters=16, kernel_size=[8, 8], strides=[1, 1], padding='same')
# filters是指过滤器的个数,等于通道数;
# kernel_size代表卷积核的大小;
# strides代表卷积核每次移动的步长,默认为[1,1]
# padding代表对图片周围的处理方式,如果是'same'则会补充像素,图片size不会改变;如果是'valid',则图片size会减小
self.pool1 = tf.keras.layers.MaxPool2D(pool_size=[2, 2])
# pool_size=[2,2]代表每四个像素选一个最大值,图片size会减小一半
self.conv2 = tf.keras.layers.Conv2D(filters=32, kernel_size=[8, 8], strides=[1, 1], padding='same')
self.pool2 = tf.keras.layers.MaxPool2D(pool_size=[2, 2])
self.flatten = tf.keras.layers.Flatten()
self.dense1 = tf.keras.layers.Dense(units=50, activation=tf.nn.relu)
self.dense2 = tf.keras.layers.Dense(units=10, activation=tf.nn.softmax)
def call(self, inputs):
x = self.conv1(inputs)
x = self.pool1(x)
x = self.conv2(x)
x = self.pool2(x)
x = self.flatten(x)
x = self.dense1(x)
output = self.dense2(x)
return output
注意:卷积层的输入需要一个四维张量,所以需要给图片增加一个维度
注意:CNN输出的张量要经过flatten层才能输入MLP
程序的其他部分和上节的MLP一样
完整代码如下
'''
@Description:
@Version: Python 3.7.4 , tensorflow 2.0.0
@Author: Wu Nannan
@Email: [email protected]
@Date: 2020/3/1 13:47
'''
import numpy as np
import tensorflow as tf
class CNN(tf.keras.Model):
def __init__(self):
super().__init__()
self.conv1 = tf.keras.layers.Conv2D(filters=16, kernel_size=[8, 8], strides=[1, 1], padding='same')
self.pool1 = tf.keras.layers.MaxPool2D(pool_size=[2, 2])
self.conv2 = tf.keras.layers.Conv2D(filters=32, kernel_size=[8, 8], strides=[1, 1], padding='same')
self.pool2 = tf.keras.layers.MaxPool2D(pool_size=[2, 2])
self.flatten = tf.keras.layers.Flatten()
self.dense1 = tf.keras.layers.Dense(units=50, activation=tf.nn.relu)
self.dense2 = tf.keras.layers.Dense(units=10, activation=tf.nn.softmax)
def call(self, inputs):
x = self.conv1(inputs)
x = self.pool1(x)
x = self.conv2(x)
x = self.pool2(x)
x = self.flatten(x)
x = self.dense1(x)
output = self.dense2(x)
return output
class MNISTLoader():
def __init__(self):
(self.x_train, self.y_train), (self.x_test,
self.y_test) = tf.keras.datasets.mnist.load_data()
# 数据预处理, 图片做归一化,并修改图片和label的数据类型
self.x_train = np.expand_dims(self.x_train.astype(np.float32) / 255.0, axis=-1)
self.x_test = np.expand_dims(self.x_test.astype(np.float32) / 255.0, axis=-1)
self.y_train = self.y_train.astype(np.int32)
self.y_test = self.y_test.astype(np.int32)
self.train_nums, self.test_nums = self.x_train.shape[0], self.x_test.shape[0]
# 随机获得大小为batch_size的训练集数据
def get_batch(self, batch_size):
index = np.random.randint(0, self.train_nums, batch_size)
return self.x_train[index, :, :], self.y_train[index]
model = CNN() # 实例化模型
optimizer = tf.optimizers.Adam(learning_rate=0.001) # 实例化优化器
DataLoader = MNISTLoader() # 实例化数据加载器
epochs = 5
batch_size = 50
# 开始训练
num_batches = int(DataLoader.train_nums // batch_size * epochs)
for batch_index in range(num_batches):
x, y = DataLoader.get_batch(batch_size)
with tf.GradientTape() as tape:
y_pred = model(x)
loss = tf.keras.losses.sparse_categorical_crossentropy(
y_true=y, y_pred=y_pred)
loss = tf.reduce_mean(loss)
print("batch %d: loss %f" % (batch_index, loss.numpy()))
grads = tape.gradient(loss, model.variables)
optimizer.apply_gradients(grads_and_vars=zip(grads, model.variables))
# 模型评估
sparse_categorical_accuracy = tf.keras.metrics.SparseCategoricalAccuracy()
num_batches = int(DataLoader.test_nums // batch_size)
for batch_index in range(num_batches):
start_index, end_index = batch_index * batch_size, (batch_index + 1) * batch_size
y_pred = model.predict(DataLoader.x_test[start_index: end_index])
sparse_categorical_accuracy.update_state(y_true=DataLoader.y_test[start_index: end_index],
y_pred=y_pred)
print("test accuracy: %f" % sparse_categorical_accuracy.result())
利用sparse_categorical_accuracy.result() 输出最终的正确率,大概在 98.8% 左右