DAY 43 复习日 CNN训练与Grad-CAM可视化(模块化实现)

目录

Kaggle图像分类项目:

项目结构

一、数据准备模块

1. config/paths.py

2. data/preprocessing.py

3. data/dataset.py

二、模型定义模块

1. models/cnn_model.py

2. models/grad_cam.py

三、训练脚本

train.py

四、可视化模块

1. utils/visualization.py

2. visualize.py

五、实用工具

utils/logger.py

项目执行流程

@浙大疏锦行


Kaggle图像分类项目:

项目结构

text

kaggle_cnn_gradcam/
├── config/
│   ├── paths.py            # 路径配置
│   └── params.py           # 超参数配置
├── data/
│   ├── dataset.py          # 数据集加载
│   └── preprocessing.py    # 数据预处理
├── models/
│   ├── cnn_model.py        # CNN模型定义
│   └── grad_cam.py         # Grad-CAM实现
├── utils/
│   ├── visualization.py    # 可视化工具
│   └── logger.py           # 日志记录
├── train.py                # 训练脚本
└── visualize.py            # 可视化主程序

一、数据准备模块

1. config/paths.py

from pathlib import Path

class Paths:
    def __init__(self, dataset_name="cats_vs_dogs"):
        self.dataset_dir = Path(f"data/{dataset_name}")
        self.train_dir = self.dataset_dir / "train"
        self.test_dir = self.dataset_dir / "test"
        self.model_save = Path("saved_models/best_model.pth")
        self.gradcam_output = Path("output/gradcam")
        
        # 创建必要目录
        self.gradcam_output.mkdir(parents=True, exist_ok=True)

2. data/preprocessing.py

from torchvision import transforms

def get_transforms(input_size=224):
    """返回训练和验证的数据增强管道"""
    train_transform = transforms.Compose([
        transforms.RandomResizedCrop(input_size),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ])
    
    val_transform = transforms.Compose([
        transforms.Resize(256),
        transforms.CenterCrop(input_size),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ])
    return train_transform, val_transform

3. data/dataset.py

from torchvision.datasets import ImageFolder
from config.paths import Paths

class KaggleDataset(ImageFolder):
    def __init__(self, root, transform=None):
        super().__init__(root=root, transform=transform)
        
    @classmethod
    def create_datasets(cls, input_size=224):
        paths = Paths()
        train_tf, val_tf = get_transforms(input_size)
        
        train_set = cls(root=paths.train_dir, transform=train_tf)
        val_set = cls(root=paths.train_dir, transform=val_tf)
        
        # 分割训练/验证集
        indices = torch.randperm(len(train_set)).split(int(0.8*len(train_set)))[0]
        train_set = torch.utils.data.Subset(train_set, indices)
        val_set = torch.utils.data.Subset(val_set, range(len(val_set))[len(indices):])
        
        return train_set, val_set

二、模型定义模块

1. models/cnn_model.py

import torch.nn as nn

class CNNClassifier(nn.Module):
    def __init__(self, num_classes=2):
        super().__init__()
        self.features = nn.Sequential(
            nn.Conv2d(3, 32, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(2),
            nn.Conv2d(32, 64, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(2),
            nn.Conv2d(64, 128, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(2)
        )
        self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
        self.classifier = nn.Linear(128, num_classes)
    
    def forward(self, x):
        x = self.features(x)  # 提取特征图
        x = self.avgpool(x)
        x = x.view(x.size(0), -1)
        return self.classifier(x)

2. models/grad_cam.py

import torch
import torch.nn.functional as F

class GradCAM:
    def __init__(self, model, target_layer):
        self.model = model
        self.target_layer = target_layer
        self.activations = None
        self.gradients = None
        
        # 注册钩子
        target_layer.register_forward_hook(self._forward_hook)
        target_layer.register_full_backward_hook(self._backward_hook)
    
    def _forward_hook(self, module, input, output):
        self.activations = output.detach()
    
    def _backward_hook(self, module, grad_input, grad_output):
        self.gradients = grad_output[0].detach()
    
    def generate(self, input_tensor, class_idx=None):
        # 前向传播
        output = self.model(input_tensor)
        if class_idx is None:
            class_idx = output.argmax(dim=1)
        
        # 反向传播
        self.model.zero_grad()
        one_hot = torch.zeros_like(output)
        one_hot[0][class_idx] = 1
        output.backward(gradient=one_hot)
        
        # 计算权重
        weights = torch.mean(self.gradients, dim=[2, 3])
        
        # 生成热力图
        cam = torch.zeros_like(self.activations[0])
        for i, w in enumerate(weights[0]):
            cam += w * self.activations[0, i]
        
        cam = F.relu(cam)
        cam = (cam - cam.min()) / (cam.max() - cam.min() + 1e-8)
        return cam.cpu().numpy()

三、训练脚本

train.py

import torch
from torch.utils.data import DataLoader
from models.cnn_model import CNNClassifier
from data.dataset import KaggleDataset
from config.paths import Paths
from utils.logger import setup_logger

def train():
    logger = setup_logger()
    paths = Paths()
    
    # 数据加载
    train_set, val_set = KaggleDataset.create_datasets()
    train_loader = DataLoader(train_set, batch_size=32, shuffle=True)
    val_loader = DataLoader(val_set, batch_size=32)
    
    # 模型初始化
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model = CNNClassifier().to(device)
    
    # 训练配置
    criterion = torch.nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
    
    # 训练循环
    best_acc = 0.0
    for epoch in range(10):
        model.train()
        for inputs, labels in train_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
        
        # 验证
        model.eval()
        correct = 0
        with torch.no_grad():
            for inputs, labels in val_loader:
                outputs = model(inputs.to(device))
                preds = outputs.argmax(dim=1)
                correct += (preds == labels.to(device)).sum().item()
        
        acc = 100 * correct / len(val_set)
        logger.info(f"Epoch {epoch+1}: Val Acc={acc:.2f}%")
        
        # 保存最佳模型
        if acc > best_acc:
            best_acc = acc
            torch.save(model.state_dict(), paths.model_save)
            logger.info(f"New best model saved (Acc={acc:.2f}%)")

if __name__ == "__main__":
    train()

四、可视化模块

1. utils/visualization.py

import matplotlib.pyplot as plt
import numpy as np
import cv2

def plot_gradcam(image, heatmap, output_path, alpha=0.5):
    """可视化Grad-CAM结果"""
    # 调整热力图大小
    heatmap = cv2.resize(heatmap, (image.shape[1], image.shape[0]))
    heatmap = np.uint8(255 * heatmap)
    heatmap = cv2.applyColorMap(heatmap, cv2.COLORMAP_JET)
    
    # 叠加原图
    superimposed_img = heatmap * alpha + image * (1-alpha)
    superimposed_img = np.clip(superimposed_img, 0, 255).astype(np.uint8)
    
    # 保存结果
    cv2.imwrite(str(output_path), cv2.cvtColor(superimposed_img, cv2.COLOR_RGB2BGR))

2. visualize.py

import torch
from PIL import Image
from config.paths import Paths
from models.cnn_model import CNNClassifier
from models.grad_cam import GradCAM
from data.preprocessing import get_transforms
from utils.visualization import plot_gradcam

def visualize():
    # 初始化
    paths = Paths()
    _, val_tf = get_transforms()
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    
    # 加载模型
    model = CNNClassifier()
    model.load_state_dict(torch.load(paths.model_save, map_location=device))
    model.to(device).eval()
    
    # 初始化Grad-CAM
    target_layer = model.features[-3]  # 选择倒数第三个卷积层
    gradcam = GradCAM(model, target_layer)
    
    # 处理示例图像
    sample_image = Image.open(paths.train_dir / "cat/1.jpg").convert("RGB")
    input_tensor = val_tf(sample_image).unsqueeze(0).to(device)
    
    # 生成热力图
    heatmap = gradcam.generate(input_tensor)
    
    # 可视化
    original_img = np.array(sample_image)
    output_path = paths.gradcam_output / "cat_example.jpg"
    plot_gradcam(original_img, heatmap, output_path)
    print(f"Grad-CAM结果已保存至: {output_path}")

if __name__ == "__main__":
    visualize()

五、实用工具

utils/logger.py

import logging
from datetime import datetime

def setup_logger():
    logger = logging.getLogger("CNN_Trainer")
    logger.setLevel(logging.INFO)
    
    # 创建文件handler
    log_file = f"logs/train_{datetime.now().strftime('%Y%m%d_%H%M%S')}.log"
    file_handler = logging.FileHandler(log_file)
    file_handler.setLevel(logging.INFO)
    
    # 创建控制台handler
    console_handler = logging.StreamHandler()
    console_handler.setLevel(logging.INFO)
    
    # 设置格式
    formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
    file_handler.setFormatter(formatter)
    console_handler.setFormatter(formatter)
    
    # 添加handler
    logger.addHandler(file_handler)
    logger.addHandler(console_handler)
    return logger

项目执行流程

  1. 准备数据

    mkdir -p data/cats_vs_dogs/train
    # 将Kaggle数据集按类别放入train目录

  2. 训练模型

    python train.py

  3. 可视化结果

    python visualize.py

@浙大疏锦行

你可能感兴趣的:(cnn,人工智能,神经网络)