卷积操作常见流程如下:
1. 输入 → 卷积层 → Batch归一化层(可选) → 池化层 → 激活函数 → 下一层
作业:尝试手动修改下不同的调度器和CNN的结构,观察训练的差异。
核心修改:
CNN
模型类:修改__init__
方法,使其可以接收一个配置列表来动态构建不同深度和宽度的卷积块。run_experiment
函数:创建一个总控函数,接收模型配置、优化器类型、学习率和调度器配置作为参数,封装了从数据加载到最终评估的完整流程。if __name__ == "__main__":
):定义两个或多个不同的实验配置(不同的CNN结构或调度器),然后分别调用run_experiment
函数来执行并观察结果。import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
import matplotlib.pyplot as plt
import numpy as np
from tqdm import tqdm
import warnings
warnings.filterwarnings("ignore")
# --- 步骤 1: 数据准备 ---
def get_cifar10_loaders(batch_size=128):
"""获取CIFAR-10的数据加载器,包含数据增强"""
print("--- 正在准备CIFAR-10数据加载器 ---")
train_transform = transforms.Compose([
transforms.RandomCrop(32, padding=4),
transforms.RandomHorizontalFlip(),
transforms.ToTensor(),
transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))
])
test_transform = transforms.Compose([
transforms.ToTensor(),
transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))
])
train_dataset = datasets.CIFAR10(root='./data', train=True, download=True, transform=train_transform)
test_dataset = datasets.CIFAR10(root='./data', train=False, transform=test_transform)
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=2)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, num_workers=2)
print("✅ 数据加载器准备完成。")
return train_loader, test_loader
# --- 步骤 2: 定义一个更灵活的CNN模型 ---
class FlexibleCNN(nn.Module):
def __init__(self, config):
"""
根据配置动态构建CNN。
config: 一个列表,每个元素代表一个卷积块的输出通道数。
例如 [32, 64] 代表构建两个卷积块。
"""
super(FlexibleCNN, self).__init__()
layers = []
in_channels = 3 # 初始输入通道为3 (RGB)
# 动态创建卷积块
for out_channels in config:
layers.append(nn.Conv2d(in_channels, out_channels, kernel_size=3, padding=1))
layers.append(nn.BatchNorm2d(out_channels))
layers.append(nn.ReLU())
layers.append(nn.MaxPool2d(kernel_size=2, stride=2))
in_channels = out_channels # 更新下一层的输入通道数
self.conv_part = nn.Sequential(*layers)
# 计算卷积部分输出后的展平尺寸
# 我们用一个假的输入张量来自动计算
dummy_input = torch.randn(1, 3, 32, 32)
conv_output_shape = self.conv_part(dummy_input).shape
flattened_size = conv_output_shape[1] * conv_output_shape[2] * conv_output_shape[3]
# 创建全连接分类器
self.classifier = nn.Sequential(
nn.Linear(flattened_size, 256),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(256, 10)
)
def forward(self, x):
x = self.conv_part(x)
x = x.view(x.size(0), -1) # 展平
x = self.classifier(x)
return x
# --- 步骤 3: 训练和评估逻辑封装 ---
def run_experiment(config_name, model_config, optimizer_name, lr, scheduler_config, epochs):
"""运行一次完整的实验"""
print(f"\n{'='*25} 开始实验: {config_name} {'='*25}")
# 设置设备
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"使用设备: {device}")
# 获取数据
train_loader, test_loader = get_cifar10_loaders()
# 初始化模型
model = FlexibleCNN(model_config).to(device)
print("\n--- 模型结构 ---")
print(model)
# 初始化优化器
if optimizer_name.lower() == 'adam':
optimizer = optim.Adam(model.parameters(), lr=lr)
else: # 默认为SGD
optimizer = optim.SGD(model.parameters(), lr=lr, momentum=0.9)
print(f"优化器: {optimizer_name}, 学习率: {lr}")
# 初始化损失函数
criterion = nn.CrossEntropyLoss()
# 初始化学习率调度器
if scheduler_config['name'].lower() == 'steplr':
scheduler = optim.lr_scheduler.StepLR(optimizer, **scheduler_config['params'])
print("调度器: StepLR")
elif scheduler_config['name'].lower() == 'reducelronplateau':
scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, **scheduler_config['params'])
print("调度器: ReduceLROnPlateau")
else:
scheduler = None # 不使用调度器
print("不使用调度器")
# 开始训练
start_time = time.time()
for epoch in range(1, epochs + 1):
model.train()
loop = tqdm(train_loader, desc=f"Epoch [{epoch}/{epochs}]", leave=False)
for data, target in loop:
data, target = data.to(device), target.to(device)
optimizer.zero_grad()
output = model(data)
loss = criterion(output, target)
loss.backward()
optimizer.step()
loop.set_postfix(loss=loss.item())
loop.close()
# 在每个epoch后评估并更新调度器
model.eval()
test_loss = 0
correct = 0
with torch.no_grad():
for data, target in test_loader:
data, target = data.to(device), target.to(device)
output = model(data)
test_loss += criterion(output, target).item()
pred = output.argmax(dim=1, keepdim=True)
correct += pred.eq(target.view_as(pred)).sum().item()
avg_test_loss = test_loss / len(test_loader)
accuracy = 100. * correct / len(test_loader.dataset)
print(f"Epoch {epoch} 完成 | 测试集平均损失: {avg_test_loss:.4f} | 测试集准确率: {accuracy:.2f}%")
# 更新调度器
if scheduler:
if isinstance(scheduler, optim.lr_scheduler.ReduceLROnPlateau):
scheduler.step(avg_test_loss) # ReduceLROnPlateau需要监控一个指标
else:
scheduler.step() # 其他调度器直接step
end_time = time.time()
print(f"\n✅ 实验 '{config_name}' 完成,总耗时: {end_time - start_time:.2f} 秒")
print(f"{'='*60}")
# --- 步骤 4: 定义并运行不同的实验 ---
if __name__ == "__main__":
# 实验1: 浅层CNN + StepLR调度器
experiment_1_config = {
"name": "浅层CNN + StepLR",
"model_config": [32, 64], # 两个卷积块,输出通道分别为32, 64
"optimizer": "SGD",
"lr": 0.01,
"scheduler": {
"name": "StepLR",
"params": {"step_size": 5, "gamma": 0.5} # 每5个epoch学习率减半
},
"epochs": 15
}
# 实验2: 深层CNN + ReduceLROnPlateau调度器
experiment_2_config = {
"name": "深层CNN + ReduceLROnPlateau",
"model_config": [32, 64, 128], # 三个卷积块,更深
"optimizer": "Adam",
"lr": 0.001,
"scheduler": {
"name": "ReduceLROnPlateau",
"params": {"mode": 'min', "factor": 0.5, "patience": 2} # 验证损失连续2轮不降,学习率减半
},
"epochs": 15
}
# 运行实验
run_experiment(
config_name=experiment_1_config["name"],
model_config=experiment_1_config["model_config"],
optimizer_name=experiment_1_config["optimizer"],
lr=experiment_1_config["lr"],
scheduler_config=experiment_1_config["scheduler"],
epochs=experiment_1_config["epochs"]
)
run_experiment(
config_name=experiment_2_config["name"],
model_config=experiment_2_config["model_config"],
optimizer_name=experiment_2_config["optimizer"],
lr=experiment_2_config["lr"],
scheduler_config=experiment_2_config["scheduler"],
epochs=experiment_2_config["epochs"]
)
灵活的CNN模型 (FlexibleCNN
):
__init__
方法接收一个config
列表,例如[32, 64, 128]
,它会根据这个列表自动创建三个卷积块,输出通道数分别为32, 64, 128。这使得尝试不同深度的网络变得极其方便。dummy_input
,让它流过所有卷积层,然后获取输出的形状,从而自动计算出展平后送入全连接层的维度大小。这样一来,无论您如何改变卷积层的数量或参数,都不再需要手动计算这个值。实验框架 (run_experiment
函数):
StepLR
或ReduceLROnPlateau
),并使用正确的.step()
方法(ReduceLROnPlateau
需要传入监控的指标,如avg_test_loss
)。清晰的实验对比:
if __name__ == "__main__":
),我们用字典清晰地定义了两组完全不同的实验配置。