作业:day43的时候我们安排大家对自己找的数据集用简单cnn训练,现在可以尝试下借助这几天的知识来实现精度的进一步提高
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms, models
from PIL import Image
from pytorch_grad_cam import GradCAM
from pytorch_grad_cam.utils.image import show_cam_on_image
# 1. 数据准备
class MusicInstrumentDataset(Dataset):
def __init__(self, root_dir, csv_file, transform=None):
self.root_dir = root_dir
self.data = pd.read_csv(csv_file)
self.transform = transform
self.classes = self.data['class'].unique().tolist()
self.total_images = sum(self.data['image_count'])
self.image_paths = []
self.labels = []
for _, row in self.data.iterrows():
class_dir = os.path.join(self.root_dir, row['class'])
files = [f for f in os.listdir(class_dir) if f.endswith('.jpg')]
files.sort(key=lambda x: int(x.split('.')[0]))
self.image_paths.extend([os.path.join(class_dir, f) for f in files])
self.labels.extend([self.classes.index(row['class'])] * len(files))
def __len__(self):
return len(self.image_paths)
def __getitem__(self, idx):
image = Image.open(self.image_paths[idx]).convert('RGB')
label = self.labels[idx]
if self.transform:
image = self.transform(image)
return image, label
# 2. 数据预处理
transform = transforms.Compose([
transforms.RandomHorizontalFlip(),
transforms.RandomRotation(15),
transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2), # 新增颜色扰动
transforms.RandomResizedCrop(224, scale=(0.8, 1.0)), # 随机裁剪缩放
transforms.ToTensor(),
transforms.Normalize(mean=[0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225])
])
# 3. 创建数据集和数据加载器
dataset = MusicInstrumentDataset(
root_dir="d:\\python打卡\\day43\\music_instruments",
csv_file="d:\\python打卡\\day43\\music_instruments\\dataset_stats.csv",
transform=transform
)
train_size = int(0.8 * len(dataset))
test_size = len(dataset) - train_size
generator = torch.Generator().manual_seed(42)
train_dataset, test_dataset = torch.utils.data.random_split(
dataset,
[train_size, test_size],
generator=generator
)
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)
# 4. 定义CNN模型
class MusicInstrumentCNN(nn.Module):
def __init__(self, num_classes=10):
super(MusicInstrumentCNN, self).__init__()
# 特征提取
self.features = nn.Sequential(
nn.Conv2d(3, 32, kernel_size=3, padding=1),
nn.ReLU(inplace=True),
nn.MaxPool2d(kernel_size=2, stride=2),
nn.Conv2d(32, 64, kernel_size=3, padding=1),
nn.ReLU(inplace=True),
nn.MaxPool2d(kernel_size=2, stride=2),
)
# 分类器
self.classifier = nn.Sequential(
nn.Linear(64 * 56 * 56, 128),
nn.ReLU(inplace=True),
nn.Dropout(0.5),
nn.Linear(128, num_classes)
)
def forward(self, x):
# 特征提取
x = self.features(x)
# 展平特征图
x = torch.flatten(x, 1)
# 分类
x = self.classifier(x)
return x
# 5. 训练模型
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = MusicInstrumentCNN(num_classes=10).to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.0005, weight_decay=1e-4)
def train_model(model, train_loader, test_loader, criterion, optimizer, device, num_epochs):
# 初始化记录变量
train_losses = []
test_losses = []
train_accs = []
test_accs = []
batch_losses = []
# 创建学习率调度器
scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min', patience=2, factor=0.5)
for epoch in range(num_epochs):
model.train()
running_loss = 0.0
correct = 0
total = 0
# 训练阶段
for batch_idx, (images, labels) in enumerate(train_loader):
images, labels = images.to(device), labels.to(device)
optimizer.zero_grad()
outputs = model(images)
loss = criterion(outputs, labels)
loss.backward()
optimizer.step()
# 记录每个batch的损失
batch_loss = loss.item()
batch_losses.append(batch_loss)
running_loss += batch_loss
# 计算准确率
_, predicted = torch.max(outputs.data, 1)
total += labels.size(0)
correct += (predicted == labels).sum().item()
# 每50个batch打印一次信息
if (batch_idx + 1) % 50 == 0:
print(f'Epoch [{epoch+1}/{num_epochs}], Batch [{batch_idx+1}/{len(train_loader)}], '
f'Batch Loss: {batch_loss:.4f}, Avg Loss: {running_loss/(batch_idx+1):.4f}')
# 计算epoch指标
epoch_loss = running_loss / len(train_loader)
epoch_acc = 100 * correct / total
train_losses.append(epoch_loss)
train_accs.append(epoch_acc)
# 测试阶段
model.eval()
test_loss = 0.0
test_correct = 0
test_total = 0
with torch.no_grad():
for images, labels in test_loader:
images, labels = images.to(device), labels.to(device)
outputs = model(images)
loss = criterion(outputs, labels)
test_loss += loss.item()
_, predicted = torch.max(outputs.data, 1)
test_total += labels.size(0)
test_correct += (predicted == labels).sum().item()
test_epoch_loss = test_loss / len(test_loader)
test_epoch_acc = 100 * test_correct / test_total
test_losses.append(test_epoch_loss)
test_accs.append(test_epoch_acc)
# 更新学习率
scheduler.step(test_epoch_loss)
print(f'Epoch {epoch+1}/{num_epochs} - '
f'Train Loss: {epoch_loss:.4f}, Train Acc: {epoch_acc:.2f}%, '
f'Test Loss: {test_epoch_loss:.4f}, Test Acc: {test_epoch_acc:.2f}%')
# 绘制训练曲线
plt.figure(figsize=(12, 5))
# 损失曲线
plt.subplot(1, 2, 1)
plt.plot(train_losses, label='Train Loss')
plt.plot(test_losses, label='Test Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.title('Training and Test Loss')
plt.legend()
# 准确率曲线
plt.subplot(1, 2, 2)
plt.plot(train_accs, label='Train Accuracy')
plt.plot(test_accs, label='Test Accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy (%)')
plt.title('Training and Test Accuracy')
plt.legend()
plt.tight_layout()
plt.show()
return test_accs[-1]
num_epochs = 30
print("开始训练模型...")
final_acc = train_model(model, train_loader, test_loader, criterion, optimizer, device, num_epochs)
print(f"训练完成! 最终测试准确率: {final_acc:.2f}%")
# 6. Grad-CAM可视化
target_layer = model.features[-2] # 选择最后一个卷积层
cam = GradCAM(model=model, target_layers=[target_layer])
# 选择一个测试图像进行可视化
images, labels = next(iter(test_loader))
input_tensor = images[0:1].to(device)
# 生成CAM
grayscale_cam = cam(input_tensor=input_tensor, targets=None)
# 可视化
rgb_img = images[0].permute(1, 2, 0).cpu().numpy()
rgb_img = (rgb_img - rgb_img.min()) / (rgb_img.max() - rgb_img.min()) # 归一化
visualization = show_cam_on_image(rgb_img, grayscale_cam[0], use_rgb=True)
plt.figure(figsize=(10, 5))
plt.subplot(1, 2, 1)
plt.title("Original Image")
plt.imshow(rgb_img)
plt.axis('off')
plt.subplot(1, 2, 2)
plt.title("Grad-CAM")
plt.imshow(visualization)
plt.axis('off')
plt.show()
(二)改进
def visualize_feature_maps(model, test_loader, device, layer_names, num_images=3, num_channels=9):
"""
可视化指定层的特征图(修复循环冗余问题)
参数:
model: 模型
test_loader: 测试数据加载器
layer_names: 要可视化的层名称(如['conv1', 'conv2', 'conv3'])
num_images: 可视化的图像总数
num_channels: 每个图像显示的通道数(取前num_channels个通道)
"""
model.eval() # 设置为评估模式
class_names = ['accordion', 'banjo', 'drum', 'flute', 'guitar', 'harmonica', 'saxophone', 'sitar', 'tabla', 'violin'] # 修改为音乐乐器类别
# 从测试集加载器中提取指定数量的图像(避免嵌套循环)
images_list, labels_list = [], []
for images, labels in test_loader:
images_list.append(images)
labels_list.append(labels)
if len(images_list) * test_loader.batch_size >= num_images:
break
# 拼接并截取到目标数量
images = torch.cat(images_list, dim=0)[:num_images].to(device)
labels = torch.cat(labels_list, dim=0)[:num_images].to(device)
with torch.no_grad():
# 存储各层特征图
feature_maps = {}
# 保存钩子句柄
hooks = []
# 定义钩子函数,捕获指定层的输出
def hook(module, input, output, name):
feature_maps[name] = output.cpu() # 保存特征图到字典
# 为每个目标层注册钩子,并保存钩子句柄
for name in layer_names:
module = getattr(model.features, name, None) # 修改为从features中获取层
if module is not None:
hook_handle = module.register_forward_hook(lambda m, i, o, n=name: hook(m, i, o, n))
hooks.append(hook_handle)
# 前向传播触发钩子
_ = model(images)
# 正确移除钩子
for hook_handle in hooks:
hook_handle.remove()
# 可视化每个图像的各层特征图(仅一层循环)
for img_idx in range(num_images):
img = images[img_idx].cpu().permute(1, 2, 0).numpy()
# 反标准化处理(恢复原始像素值)
img = img * np.array([0.229, 0.224, 0.225]) + np.array([0.485, 0.456, 0.406])
img = np.clip(img, 0, 1) # 确保像素值在[0,1]范围内
# 创建子图
num_layers = len(layer_names)
fig, axes = plt.subplots(1, num_layers + 1, figsize=(4 * (num_layers + 1), 4))
# 显示原始图像
axes[0].imshow(img)
axes[0].set_title(f'原始图像\n类别: {class_names[labels[img_idx]]}')
axes[0].axis('off')
# 显示各层特征图
for layer_idx, layer_name in enumerate(layer_names):
if layer_name in feature_maps: # 检查层是否存在
fm = feature_maps[layer_name][img_idx] # 取第img_idx张图像的特征图
fm = fm[:num_channels] # 仅取前num_channels个通道
num_rows = int(np.sqrt(num_channels))
num_cols = num_channels // num_rows if num_rows != 0 else 1
# 创建子图网格
layer_ax = axes[layer_idx + 1]
layer_ax.set_title(f'{layer_name}特征图 \n')
layer_ax.axis('off') # 关闭大子图的坐标轴
# 在大子图内创建小网格
for ch_idx, channel in enumerate(fm):
ax = layer_ax.inset_axes([ch_idx % num_cols / num_cols,
(num_rows - 1 - ch_idx // num_cols) / num_rows,
1/num_cols, 1/num_rows])
ax.imshow(channel.numpy(), cmap='viridis')
ax.set_title(f'通道 {ch_idx + 1}')
ax.axis('off')
plt.tight_layout()
plt.show()
# 调用示例(按需修改参数)
layer_names = ['0', '2']
visualize_feature_maps(
model=model,
test_loader=test_loader,
device=device,
layer_names=layer_names,
num_images=5, # 可视化5张测试图像 → 输出5张大图
num_channels=9 # 每张图像显示前9个通道的特征图
)