接下来,我们在神经网络方面继续展开
使用 Keras 实现手写数字识别的代码示例:
from keras.models import Sequential
from keras.layers import Dense
from keras.datasets import mnist
from keras.utils import to_categorical
import numpy as np
# 加载手写数字数据集,mnist 数据集包含大量手写数字的图像及其对应的标签
(train_images, train_labels), (test_images, test_labels) = mnist.load_data()
# 将图像数据展平为一维数组,因为 MLP 输入要求是一维向量。这里每张图像原本是 28x28 的二维矩阵,展平后变为 784 维向量
train_images = train_images.reshape((-1, 28 * 28))
test_images = test_images.reshape((-1, 28 * 28))
# 将数据归一化到 [0, 1] 区间,有助于模型更快收敛和更好地学习。因为神经网络对小范围数值更敏感
train_images = train_images.astype('float32') / 255
test_images = test_images.astype('float32') / 255
# 将标签进行独热编码,将数字标签转换为向量形式,便于神经网络处理。例如数字 3 编码为 [0, 0, 0, 1, 0, 0, 0, 0, 0, 0]
train_labels = to_categorical(train_labels)
test_labels = to_categorical(test_labels)
# 构建 MLP 模型
model = Sequential()
# 添加第一个全连接层,512 个神经元,使用 ReLU 激活函数引入非线性。输入形状为 (28 * 28,),即展平后的图像向量维度
model.add(Dense(512, activation='relu', input_shape=(28 * 28,)))
# 添加输出层,10 个神经元对应 10 个数字类别,使用 softmax 激活函数输出每个类别的概率分布
model.add(Dense(10, activation='softmax'))
# 编译模型,指定损失函数、优化器和评估指标
model.compile(optimizer='adam',
# 多分类问题常用的交叉熵损失函数
loss='categorical_crossentropy',
metrics=['accuracy'])
# 训练模型,训练 5 个 epoch,每个 batch 包含 64 个样本
model.fit(train_images, train_labels, epochs=5, batch_size=64)
# 评估模型在测试集上的性能
test_loss, test_acc = model.evaluate(test_images, test_labels)
print('Test accuracy:', test_acc)
from keras.models import Sequential
from keras.layers import Conv2D, MaxPooling2D, Flatten, Dense
import numpy as np
# 假设 X_train, y_train 为训练数据,X_test, y_test 为测试数据
# 数据形状假设为 (样本数, 图像高度, 图像宽度, 通道数)
# 这里简单生成一些模拟数据
X_train = np.random.random((100, 100, 100, 3))
y_train = np.random.randint(0, 2, (100,))
X_test = np.random.random((20, 100, 100, 3))
y_test = np.random.randint(0, 2, (20,))
model = Sequential()
# 添加第一个卷积层,32 个卷积核,大小为 (3, 3),激活函数为 ReLU,输入形状为 (100, 100, 3)
model.add(Conv2D(32, (3, 3), activation='relu', input_shape=(100, 100, 3)))
# 添加最大池化层,池化窗口大小为 (2, 2),用于减少数据维度,同时保留主要特征
model.add(MaxPooling2D((2, 2)))
# 再次添加卷积层,64 个卷积核,大小 (3, 3),进一步提取特征
model.add(Conv2D(64, (3, 3), activation='relu'))
# 再次添加最大池化层
model.add(MaxPooling2D((2, 2)))
# 将卷积层输出展平为一维向量,以便连接全连接层
model.add(Flatten())
# 添加全连接层,64 个神经元,使用 ReLU 激活函数
model.add(Dense(64, activation='relu'))
# 输出层,1 个神经元,用于二分类(猫或狗),使用 sigmoid 激活函数输出概率
model.add(Dense(1, activation='sigmoid'))
model.compile(optimizer='adam',
# 二分类问题常用的二元交叉熵损失函数
loss='binary_crossentropy',
metrics=['accuracy'])
model.fit(X_train, y_train, epochs=10, batch_size=32)
test_loss, test_acc = model.evaluate(X_test, y_test)
print('Test accuracy:', test_acc)
使用 Keras 进行简单时间序列预测的代码示例(这里以预测正弦函数时间序列为例):
import numpy as np
from keras.models import Sequential
from keras.layers import SimpleRNN, Dense
import matplotlib.pyplot as plt
# 生成正弦函数时间序列数据
time_steps = 100
data = np.sin(np.linspace(0, 10 * np.pi, time_steps))
X = []
y = []
# 构建训练数据,每个样本包含 10 个时间步的数据,标签为第 11 个时间步的数据
for i in range(len(data) - 10):
X.append(data[i:i + 10])
y.append(data[i + 10])
X = np.array(X).reshape(-1, 10, 1)
y = np.array(y)
model = Sequential()
# 添加简单 RNN 层,32 个神经元,输入形状为 (10, 1),即每个样本有 10 个时间步,每个时间步 1 个特征
model.add(SimpleRNN(32, input_shape=(10, 1)))
# 添加全连接输出层,1 个神经元,用于预测下一个时间步的值
model.add(Dense(1))
model.compile(optimizer='adam', loss='mse')
model.fit(X, y, epochs=50, batch_size=32)
# 预测未来 10 个时间步
future_prediction = []
current_sequence = data[-10:].reshape(1, 10, 1)
for _ in range(10):
next_value = model.predict(current_sequence)
future_prediction.append(next_value[0, 0])
# 更新当前序列,将预测值添加到序列末尾,并去掉第一个值
current_sequence = np.roll(current_sequence, -1, axis = 1)
current_sequence[0, -1, 0] = next_value
plt.plot(range(len(data)), data, label='Original')
plt.plot(range(len(data), len(data) + len(future_prediction)), future_prediction, label='Prediction')
plt.legend()
plt.show()
使用 Keras 实现简单文本情感分析的代码示例(假设已有预处理好的文本数据和标签):
from keras.models import Sequential
from keras.layers import Embedding, LSTM, Dense
from keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import pad_sequences
import numpy as np
# 假设 texts 为文本数据,labels 为情感标签(0 为负面,1 为正面)
texts = ["This is a great product", "I hate this service", "The movie was awesome"]
labels = np.array([1, 0, 1])
# 创建 Tokenizer 对象,将文本转换为数字序列,num_words 设置只考虑出现频率最高的 1000 个单词
tokenizer = Tokenizer(num_words = 1000)
tokenizer.fit_on_texts(texts)
X = tokenizer.texts_to_sequences(texts)
# 将所有序列填充到相同长度,maxlen 设置为 100
X = pad_sequences(X, maxlen = 100)
model = Sequential()
# 添加嵌入层,将单词索引转换为低维向量表示,1000 为词汇表大小,128 为嵌入维度
model.add(Embedding(1000, 128))
# 添加 LSTM 层,128 个神经元
model.add(LSTM(128))
# 添加全连接输出层,1 个神经元,用于二分类,使用 sigmoid 激活函数输出概率
model.add(Dense(1, activation='sigmoid'))
model.compile(optimizer='adam',
# 二分类问题常用的二元交叉熵损失函数
loss='binary_crossentropy',
metrics=['accuracy'])
model.fit(X, labels, epochs=10, batch_size=32)
from transformers import pipeline
# 使用 GPT - 2 模型进行文本生成,pipeline 函数构建一个文本生成的管道,
# 'text - generation' 表示任务类型为文本生成,model='gpt2'指定使用 GPT - 2 预训练模型
generator = pipeline('text - generation', model='gpt2')
# 生成文本,从 "Once upon a time" 开始生成,设置最大长度为 50 个词元,生成 1 条文本序列
generated_text = generator("Once upon a time", max_length = 50, num_return_sequences = 1)
print(generated_text[0]['generated_text'])
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import transforms, datasets
import timm
# 定义数据变换,将图像调整为 224x224 大小,转换为张量,并进行标准化
transform = transforms.Compose([
transforms.Resize((224, 224)),
transforms.ToTensor(),
transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])
# 加载自定义花卉数据集,train_dataset 为训练集,test_dataset 为测试集
train_dataset = datasets.ImageFolder('path_to_train_data', transform = transform)
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size = 32, shuffle = True)
test_dataset = datasets.ImageFolder('path_to_test_data', transform = transform)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size = 32, shuffle = False)
# 创建 ViT 模型,'vit_base_patch16_224' 是一种预定义的 ViT 模型架构,
# num_classes 根据花卉类别数量设置
model = timm.create_model('vit_base_patch16_224', num_classes = len(train_dataset.classes))
# 定义交叉熵损失函数,常用于多分类问题
criterion = nn.CrossEntropyLoss()
# 使用 Adam 优化器,设置学习率为 0.001
optimizer = optim.Adam(model.parameters(), lr = 0.001)
# 训练模型
for epoch in range(10):
model.train()
for images, labels in train_loader:
optimizer.zero_grad()
outputs = model(images)
# 计算模型输出与真实标签之间的损失
loss = criterion(outputs, labels)
loss.backward()
optimizer.step()
model.eval()
correct = 0
total = 0
with torch.no_grad():
for images, labels in test_loader:
outputs = model(images)
# 获取预测概率最大的类别索引
_, predicted = torch.max(outputs.data, 1)
total += labels.size(0)
correct += (predicted == labels).sum().item()
print(f'Epoch {epoch + 1}, Accuracy: {correct / total}')
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms
from torchvision.utils import save_image
import os
# 数据预处理,将图像转换为张量并归一化到 [-1, 1]
transform = transforms.Compose([
transforms.ToTensor(),
transforms.Normalize((0.5,), (0.5,))
])
# 加载 MNIST 数据集
mnist_dataset = datasets.MNIST(root='./data', train=True,
download=True, transform=transform)
dataloader = torch.utils.data.DataLoader(mnist_dataset, batch_size=64, shuffle=True)
# 生成器模型,将随机噪声向量映射为图像
class Generator(nn.Module):
def __init__(self):
super(Generator, self).__init__()
self.model = nn.Sequential(
nn.Linear(100, 128),
nn.LeakyReLU(0.2, inplace=True),
nn.Linear(128, 256),
nn.BatchNorm1d(256),
nn.LeakyReLU(0.2, inplace=True),
nn.Linear(256, 512),
nn.BatchNorm1d(512),
nn.LeakyReLU(0.2, inplace=True),
nn.Linear(512, 784),
nn.Tanh()
)
def forward(self, x):
return self.model(x)
# 判别器模型,判断输入图像是真实的还是生成的
class Discriminator(nn.Module):
def __init__(self):
super(Discriminator, self).__init__()
self.model = nn.Sequential(
nn.Linear(784, 512),
nn.LeakyReLU(0.2, inplace=True),
nn.Linear(512, 256),
nn.LeakyReLU(0.2, inplace=True),
nn.Linear(256, 1),
nn.Sigmoid()
)
def forward(self, x):
return self.model(x)
# 创建生成器和判别器实例
generator = Generator()
discriminator = Discriminator()
# 定义损失函数,这里使用二元交叉熵损失
criterion = nn.BCELoss()
# 定义生成器和判别器的优化器,使用 Adam 优化器
optimizer_G = optim.Adam(generator.parameters(), lr=0.0002, betas=(0.5, 0.999))
optimizer_D = optim.Adam(discriminator.parameters(), lr=0.0002, betas=(0.5, 0.999))
# 训练循环
for epoch in range(50):
for i, (images, _) in enumerate(dataloader):
images = images.view(-1, 784)
# 训练判别器
optimizer_D.zero_grad()
real_labels = torch.ones(images.size(0), 1)
fake_labels = torch.zeros(images.size(0), 1)
real_outputs = discriminator(images)
real_loss = criterion(real_outputs, real_labels)
noise = torch.randn(images.size(0), 100)
fake_images = generator(noise)
fake_outputs = discriminator(fake_images.detach())
fake_loss = criterion(fake_outputs, fake_labels)
d_loss = real_loss + fake_loss
d_loss.backward()
optimizer_D.step()
# 训练生成器
optimizer_G.zero_grad()
fake_outputs = discriminator(fake_images)
g_loss = criterion(fake_outputs, real_labels)
g_loss.backward()
optimizer_G.step()
if (i + 1) % 100 == 0:
print(f'Epoch [{epoch + 1}/50], Step [{i + 1}/{len(dataloader)}], '
f'd_loss: {d_loss.item():.4f}, g_loss: {g_loss.item():.4f}')
# 保存生成的图像
with torch.no_grad():
noise = torch.randn(64, 100)
fake_images = generator(noise)
save_image(fake_images.view(-1, 1, 28, 28),
os.path.join('images', f'epoch_{epoch + 1}.png'), nrow=8, normalize=True)
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms
import matplotlib.pyplot as plt
# 数据预处理,将图像转换为张量并归一化到 [0, 1]
transform = transforms.Compose([
transforms.ToTensor()
])
# 加载MNIST数据集
train_dataset = datasets.MNIST(root='./data', train=True,
download=True, transform=transform)
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=64, shuffle=True)
test_dataset = datasets.MNIST(root='./data', train=False,
download=True, transform=transform)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=64, shuffle=False)
# 定义自编码器模型
class Autoencoder(nn.Module):
def __init__(self):
super(Autoencoder, self).__init__()
# 编码器部分
self.encoder = nn.Sequential(
nn.Linear(28 * 28, 128),
nn.ReLU(True),
nn.Linear(128, 64),
nn.ReLU(True),
nn.Linear(64, 32)
)
# 解码器部分
self.decoder = nn.Sequential(
nn.Linear(32, 64),
nn.ReLU(True),
nn.Linear(64, 128),
nn.ReLU(True),
nn.Linear(128, 28 * 28),
nn.Tanh()
)
def forward(self, x):
x = self.encoder(x)
x = self.decoder(x)
return x
# 创建自编码器实例
model = Autoencoder()
# 定义均方误差损失函数
criterion = nn.MSELoss()
# 使用Adam优化器,学习率设置为0.001
optimizer = optim.Adam(model.parameters(), lr=0.001)
# 训练自编码器
num_epochs = 10
for epoch in range(num_epochs):
model.train()
running_loss = 0.0
for i, data in enumerate(train_loader, 0):
inputs, _ = data
inputs = inputs.view(-1, 28 * 28)
optimizer.zero_grad()
# 前向传播
outputs = model(inputs)
loss = criterion(outputs, inputs)
# 反向传播和优化
loss.backward()
optimizer.step()
running_loss += loss.item()
print(f'Epoch {epoch + 1}, Loss: {running_loss / len(train_loader)}')
# 在测试集上进行测试
model.eval()
with torch.no_grad():
for i, data in enumerate(test_loader, 0):
inputs, _ = data
inputs = inputs.view(-1, 28 * 28)
outputs = model(inputs)
# 绘制原始图像和重构图像
if i == 0:
n = 10
plt.figure(figsize=(20, 4))
for j in range(n):
# 显示原始图像
ax = plt.subplot(2, n, j + 1)
plt.imshow(inputs[j].view(28, 28).numpy(), cmap='gray')
ax.get_xaxis().set_visible(False)
ax.get_yaxis().set_visible(False)
# 显示重构图像
ax = plt.subplot(2, n, j + 1 + n)
plt.imshow(outputs[j].view(28, 28).numpy(), cmap='gray')
ax.get_xaxis().set_visible(False)
ax.get_yaxis().set_visible(False)
plt.show()
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms
import numpy as np
# 数据预处理,将图像转换为张量并归一化到 [0, 1]
transform = transforms.Compose([
transforms.ToTensor()
])
# 加载MNIST数据集
train_dataset = datasets.MNIST(root='./data', train=True,
download=True, transform=transform)
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=64, shuffle=True)
test_dataset = datasets.MNIST(root='./data', train=False,
download=True, transform=transform)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=64, shuffle=False)
# 定义胶囊层
class CapsuleLayer(nn.Module):
def __init__(self, in_capsules, in_channels, out_capsules, out_channels, kernel_size=None, stride=None,
num_routing=3):
super(CapsuleLayer, self).__init__()
self.in_capsules = in_capsules
self.in_channels = in_channels
self.out_capsules = out_capsules
self.out_channels = out_channels
self.num_routing = num_routing
if kernel_size:
self.conv = nn.Conv2d(in_channels, out_channels * out_capsules, kernel_size=kernel_size, stride=stride)
else:
self.fc = nn.Linear(in_capsules * in_channels, out_capsules * out_channels)
def squash(self, tensor, dim=-1):
squared_norm = (tensor ** 2).sum(dim=dim, keepdim=True)
scale = squared_norm / (1 + squared_norm)
return scale * tensor / torch.sqrt(squared_norm)
def forward(self, x):
if hasattr(self, 'conv'):
x = self.conv(x)
batch_size = x.size(0)
x = x.view(batch_size, self.out_capsules, self.out_channels, -1)
else:
batch_size = x.size(0)
x = self.fc(x)
x = x.view(batch_size, self.out_capsules, self.out_channels)
u_hat = x
b_ij = torch.zeros((batch_size, self.in_capsules, self.out_capsules, 1)).to(x.device)
for _ in range(self.num_routing - 1):
c_ij = torch.softmax(b_ij, dim=1)
s_j = (c_ij * u_hat).sum(dim=1, keepdim=True)
v_j = self.squash(s_j)
u_vj = (u_hat * v_j).sum(dim=2, keepdim=True)
b_ij = b_ij + u_vj
c_ij = torch.softmax(b_ij, dim=1)
s_j = (c_ij * u_hat).sum(dim=1, keepdim=True)
v_j = self.squash(s_j)
return v_j.view(batch_size, -1)
# 定义胶囊网络模型
class CapsuleNet(nn.Module):
def __init__(self):
super(CapsuleNet, self).__init__()
self.conv1 = nn.Conv2d(1, 256, kernel_size=9, stride=1)
self.primary_capsules = CapsuleLayer(1, 256, 8, 32, kernel_size=9, stride=2)
self.digit_capsules = CapsuleLayer(32 * 6 * 6, 8, 10, 16)
def forward(self, x):
x = nn.functional.relu(self.conv1(x), inplace=True)
x = self.primary_capsules(x)
x = self.digit_capsules(x)
return torch.sqrt((x ** 2).sum(dim=2))
# 创建胶囊网络实例
model = CapsuleNet()
# 定义交叉熵损失函数
criterion = nn.CrossEntropyLoss()
# 使用Adam优化器,学习率设置为0.001
optimizer = optim.Adam(model.parameters(), lr=0.001)
# 训练胶囊网络
num_epochs = 10
for epoch in range(num_epochs):
model.train()
running_loss = 0.0
for i, data in enumerate(train_loader, 0):
inputs, labels = data
optimizer.zero_grad()
outputs = model(inputs)
loss = criterion(outputs, labels)
loss.backward()
optimizer.step()
running_loss += loss.item()
print(f'Epoch {epoch + 1}, Loss: {running_loss / len(train_loader)}')
# 在测试集上评估模型
model.eval()
correct = 0
total = 0
with torch.no_grad():
for data in test_loader:
images, labels = data
outputs = model(images)
_, predicted = torch.max(outputs.data, 1)
total += labels.size(0)
correct += (predicted == labels).sum().item()
print(f'Accuracy of the network on the 10000 test images: {100 * correct / total}%')
从上面你也可以看见,TensorFlow 和 PyTorch 能够实现众多神经网络。
TensorFlow 采用静态计算图,在计算开始前构建完整计算图。这一特性使其在分布式训练与优化方面优势显著,如同搭建一座大型建筑,先绘制精确蓝图,而后有条不紊地施工。在 Google 的云服务中,TensorFlow 借助静态计算图可高效将计算资源分配到多服务器或 GPU 上。静态图构建完成后,能进行自动并行化计算、内存优化等操作,为搭建复杂神经网络筑牢根基。例如在训练大规模图像识别神经网络时,可利用静态计算图的优化机制,快速处理海量图像数据。
PyTorch 使用动态计算图,在运行时构建计算过程。这种方式就像即兴创作,更加直观灵活。研究人员编写神经网络代码如同编写常规 Python 代码,便于调试与快速迭代模型。在探索新神经网络架构时,依据实验结果随时调整网络结构,无需重新构建整个静态图。比如在研究新型循环神经网络结构时,能实时修改网络连接与参数,动态计算图实时跟踪计算过程,助力理解模型运行机制。
在实际使用中,若侧重生产环境,追求大规模分布式训练的高效性与稳定性,像开发工业级的图像识别系统、语音识别服务等,TensorFlow 是不错选择。开发者先定义好计算图中的各个节点与操作,例如构建卷积神经网络时,明确卷积层、池化层等操作及其连接关系,再配置好优化器、损失函数等参数进行模型训练。
若聚焦于研究探索,渴望快速尝试新想法、新架构,如高校或科研机构进行前沿神经网络研究,PyTorch 更为契合。以构建生成对抗网络(GAN)为例,开发者可按逻辑顺序逐步定义生成器与判别器,在训练过程中轻松调整网络结构与参数,通过即时反馈优化模型。
当然框架和库是对理论的封装,要想实现更复杂的模型,了解和学习底层理论必不可少。