6.2 打卡

DAY 43 复习日

作业:

kaggle找到一个图像数据集,用cnn网络进行训练并且用grad-cam做可视化

进阶:并拆分成多个文件

image_classification_gradcam/
├── config.py             # 配置文件:路径、超参数等
├── data_loader.py        # 数据加载和预处理
├── model.py              # CNN 模型定义
├── train.py              # 训练和评估逻辑
├── visualize.py          # Grad-CAM 可视化逻辑
├── main.py               # 项目主入口,协调各模块
├── requirements.txt      # Python 依赖
├── README.md             # 项目说明
└── trained_model/        # 训练好的模型将保存在这里
└── gradcam_output/       # Grad-CAM 可视化结果将保存在这里
└── data/                 # 你的数据集应该放在这里
    ├── train/
    │   ├── class_A/
    │   │   └── img1.jpg
    │   │   └── ...
    │   └── class_B/
    │       └── img2.jpg
    │       └── ...
    └── val/
        ├── class_A/
        └── class_B/
# config.py
import os

# --- 项目路径 ---
BASE_DIR = os.path.dirname(os.path.abspath(__file__))
DATA_DIR = os.path.join(BASE_DIR, 'data')
TRAINED_MODEL_DIR = os.path.join(BASE_DIR, 'trained_model')
GRADCAM_OUTPUT_DIR = os.path.join(BASE_DIR, 'gradcam_output')

# 确保输出目录存在
os.makedirs(TRAINED_MODEL_DIR, exist_ok=True)
os.makedirs(GRADCAM_OUTPUT_DIR, exist_ok=True)

# --- 数据参数 ---
IMAGE_SIZE = (224, 224) # 图像尺寸 (H, W)
NUM_CLASSES = 2         # 猫和狗是2个类别
CLASS_NAMES = ['cat', 'dog'] # 类别名称,用于可视化

# --- 训练参数 ---
BATCH_SIZE = 32
NUM_EPOCHS = 10
LEARNING_RATE = 0.001
MODEL_SAVE_PATH = os.path.join(TRAINED_MODEL_DIR, 'best_cnn_model.pth')

# --- Grad-CAM 参数 ---
# 目标层名称。你需要根据你使用的模型结构来确定
# 对于自定义的SimpleCNN,通常是最后一个卷积层的名称
# 例如:'conv_blocks.3.conv' 或 'features.2.conv'
# 在 visualize.py 中,我们有方法帮助你找到它
GRAD_CAM_TARGET_LAYER = 'conv_blocks.3.conv' # 假设SimpleCNN中最后一个是第4个块的conv层
# 可以指定要可视化的图像路径列表,或者让程序随机选择
GRAD_CAM_IMAGES_TO_VISUALIZE = [] # 留空则随机选择,或填写 ['./data/val/cat/some_cat_image.jpg', ...]
NUM_GRAD_CAM_IMAGES = 5 # 随机选择时要可视化的数量
# data_loader.py
import torch
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
import os

def get_dataloaders(data_dir, image_size, batch_size):
    """
    获取训练和验证数据的DataLoader。

    Args:
        data_dir (str): 数据集根目录 (e.g., './data')
        image_size (tuple): 图像尺寸 (H, W)
        batch_size (int): 批次大小

    Returns:
        tuple: (train_loader, val_loader, class_names)
    """

    train_transform = transforms.Compose([
        transforms.Resize(image_size),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ])

    val_transform = transforms.Compose([
        transforms.Resize(image_size),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ])

    train_dataset = datasets.ImageFolder(os.path.join(data_dir, 'train'), transform=train_transform)
    val_dataset = datasets.ImageFolder(os.path.join(data_dir, 'val'), transform=val_transform)

    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=4) # num_workers 可以根据你的CPU核心数调整
    val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False, num_workers=4)

    class_names = train_dataset.classes # 获取类别名称列表,通常是按字母顺序排列的
    print(f"Detected classes: {class_names}")
    print(f"Number of training samples: {len(train_dataset)}")
    print(f"Number of validation samples: {len(val_dataset)}")

    return train_loader, val_loader, class_names

if __name__ == '__main__':
    # 简单测试 data_loader
    from config import DATA_DIR, IMAGE_SIZE, BATCH_SIZE
    train_loader, val_loader, class_names = get_dataloaders(DATA_DIR, IMAGE_SIZE, BATCH_SIZE)

    # 打印一个批次的信息
    for images, labels in train_loader:
        print(f"Batch images shape: {images.shape}")
        print(f"Batch labels shape: {labels.shape}")
        print(f"First 5 labels: {labels[:5]}")
        break
# model.py
import torch
import torch.nn as nn
import torch.nn.functional as F

class SimpleCNN(nn.Module):
    def __init__(self, num_classes=2):
        super(SimpleCNN, self).__init__()
        self.conv_blocks = nn.Sequential(
            # Block 1
            nn.Conv2d(3, 32, kernel_size=3, padding=1),
            nn.BatchNorm2d(32),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2), # Output: (32, 112, 112) for 224x224 input

            # Block 2
            nn.Conv2d(32, 64, kernel_size=3, padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2), # Output: (64, 56, 56)

            # Block 3
            nn.Conv2d(64, 128, kernel_size=3, padding=1),
            nn.BatchNorm2d(128),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2), # Output: (128, 28, 28)

            # Block 4 (Grad-CAM 目标层通常是这里的最后一个Conv层)
            nn.Conv2d(128, 256, kernel_size=3, padding=1),
            nn.BatchNorm2d(256),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2) # Output: (256, 14, 14)
        )

        # 展平层后的全连接层
        # 对于 224x224 输入,经过4个Max Pooling (每次减半) 得到 224/16 = 14x14
        # 展平后维度为 256 * 14 * 14
        self.fc_layers = nn.Sequential(
            nn.Linear(256 * 14 * 14, 512),
            nn.ReLU(inplace=True),
            nn.Dropout(0.5),
            nn.Linear(512, num_classes)
        )

    def forward(self, x):
        x = self.conv_blocks(x)
        x = x.view(x.size(0), -1) # 展平操作
        x = self.fc_layers(x)
        return x

if __name__ == '__main__':
    # 简单测试模型
    model = SimpleCNN(num_classes=2)
    print(model)

    # 检查模型输出形状
    dummy_input = torch.randn(1, 3, 224, 224) # Batch size 1, 3 channels, 224x224
    output = model(dummy_input)
    print(f"Output shape: {output.shape}") # 应该为 torch.Size([1, 2])
# train.py
import torch
import torch.nn as nn
import torch.optim as optim
from tqdm import tqdm
import os

def train_model(model, train_loader, val_loader, num_epochs, learning_rate, device, model_save_path):
    """
    训练和评估模型。

    Args:
        model (nn.Module): CNN 模型
        train_loader (DataLoader): 训练数据加载器
        val_loader (DataLoader): 验证数据加载器
        num_epochs (int): 训练周期数
        learning_rate (float): 学习率
        device (torch.device): 训练设备 (CPU 或 GPU)
        model_save_path (str): 模型保存路径
    """
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)

    best_val_accuracy = 0.0

    model.to(device)

    print(f"Starting training on {device}...")
    for epoch in range(num_epochs):
        model.train()
        running_loss = 0.0
        correct_train = 0
        total_train = 0

        # 训练循环
        train_bar = tqdm(train_loader, desc=f"Epoch {epoch+1}/{num_epochs} [Train]")
        for i, (inputs, labels) in enumerate(train_bar):
            inputs, labels = inputs.to(device), labels.to(device)

            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            running_loss += loss.item() * inputs.size(0)
            _, predicted = torch.max(outputs.data, 1)
            total_train += labels.size(0)
            correct_train += (predicted == labels).sum().item()
            train_bar.set_postfix(loss=loss.item())

        epoch_train_loss = running_loss / total_train
        epoch_train_accuracy = correct_train / total_train
        print(f"Epoch {epoch+1} Train Loss: {epoch_train_loss:.4f}, Train Acc: {epoch_train_accuracy:.4f}")

        # 验证循环
        model.eval()
        val_loss = 0.0
        correct_val = 0
        total_val = 0
        with torch.no_grad():
            val_bar = tqdm(val_loader, desc=f"Epoch {epoch+1}/{num_epochs} [Validation]")
            for inputs, labels in val_bar:
                inputs, labels = inputs.to(device), labels.to(device)
                outputs = model(inputs)
                loss = criterion(outputs, labels)

                val_loss += loss.item() * inputs.size(0)
                _, predicted = torch.max(outputs.data, 1)
                total_val += labels.size(0)
                correct_val += (predicted == labels).sum().item()
                val_bar.set_postfix(loss=loss.item())

        epoch_val_loss = val_loss / total_val
        epoch_val_accuracy = correct_val / total_val
        print(f"Epoch {epoch+1} Val Loss: {epoch_val_loss:.4f}, Val Acc: {epoch_val_accuracy:.4f}")

        # 保存最佳模型
        if epoch_val_accuracy > best_val_accuracy:
            best_val_accuracy = epoch_val_accuracy
            torch.save(model.state_dict(), model_save_path)
            print(f"Saved best model with Val Acc: {best_val_accuracy:.4f} to {model_save_path}")

    print("Training finished!")
# visualize.py
import torch
import numpy as np
from torchvision import transforms
from PIL import Image
import os
import matplotlib.pyplot as plt
from pytorch_grad_cam import GradCAM, HiResCAM, ScoreCAM, GradCAMPlusPlus, AblationCAM, XGradCAM, EigenCAM, FullGrad
from pytorch_grad_cam.utils.model_targets import ClassifierOutputTarget
from pytorch_grad_cam.utils.image import show_cam_on_image
import random

def get_target_layer_from_name(model, layer_name):
    """
    通过名称从模型中获取目标层。
    迭代模型的所有命名模块,直到找到匹配的层。
    """
    for name, module in model.named_modules():
        if name == layer_name:
            print(f"Found target layer: {name} (Type: {type(module)})")
            return module
    print(f"Error: Target layer '{layer_name}' not found in model.")
    print("Available layers:")
    for name, _ in model.named_modules():
        print(f"- {name}")
    raise ValueError(f"Target layer '{layer_name}' not found.")

def visualize_grad_cam(model, image_paths, class_names, target_layer_name, output_dir, device, image_size):
    """
    对给定图像执行 Grad-CAM 可视化并保存结果。

    Args:
        model (nn.Module): 训练好的模型
        image_paths (list): 要可视化的图像文件路径列表
        class_names (list): 类别名称列表 (e.g., ['cat', 'dog'])
        target_layer_name (str): Grad-CAM 目标层的名称
        output_dir (str): Grad-CAM 结果保存目录
        device (torch.device): 设备 (CPU 或 GPU)
        image_size (tuple): 模型输入图像尺寸 (H, W)
    """
    model.eval()
    model.to(device)

    # 查找目标层
    try:
        target_layer = get_target_layer_from_name(model, target_layer_name)
    except ValueError as e:
        print(e)
        return

    # 定义预处理,需要与模型训练时一致
    preprocess = transforms.Compose([
        transforms.Resize(image_size),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ])

    # 选择 Grad-CAM 算法 (可以尝试不同的算法)
    cam = GradCAM(model=model, target_layers=[target_layer], use_cuda=(device.type == 'cuda'))

    print(f"\nGenerating Grad-CAM visualizations for {len(image_paths)} images...")
    for i, img_path in enumerate(image_paths):
        print(f"Processing {img_path}...")
        try:
            rgb_img = Image.open(img_path).convert('RGB')
            # 转换为 numpy 数组,范围在 [0, 1] 之间,用于 Grad-CAM 库
            # 注意:show_cam_on_image 期望图像是 float32 且在 [0, 1] 范围内
            rgb_img_np = np.float32(rgb_img) / 255

            input_tensor = preprocess(rgb_img).unsqueeze(0).to(device) # 添加批次维度

            # 运行模型进行预测
            with torch.no_grad():
                output = model(input_tensor)
            probabilities = torch.softmax(output, dim=1)[0]
            predicted_index = torch.argmax(probabilities).item()
            predicted_class = class_names[predicted_index]
            predicted_prob = probabilities[predicted_index].item()

            # Grad-CAM 的 targets
            # 如果我们想为预测的类别生成CAM,则不需要指定 target_category
            # 如果想为特定类别生成CAM,即使模型没有预测到它,则需要指定
            # target_category = predicted_index # 针对预测的类别
            targets = [ClassifierOutputTarget(predicted_index)]

            grayscale_cam = cam(input_tensor=input_tensor, targets=targets)
            grayscale_cam = grayscale_cam[0, :] # 移除批次维度

            # 将 CAM 叠加到原始图像上
            cam_image = show_cam_on_image(rgb_img_np, grayscale_cam, use_rgb=True)

            # 保存结果
            filename = os.path.basename(img_path)
            output_filename = f"cam_{predicted_class}_{predicted_prob:.2f}_{os.path.splitext(filename)[0]}.jpg"
            output_path = os.path.join(output_dir, output_filename)

            plt.imshow(cam_image)
            plt.title(f"Predicted: {predicted_class} ({predicted_prob:.2f})")
            plt.axis('off')
            plt.savefig(output_path, bbox_inches='tight', pad_inches=0)
            plt.close() # 关闭图形,防止内存泄漏
            print(f"Saved Grad-CAM image to: {output_path}")

        except Exception as e:
            print(f"Error processing {img_path}: {e}")

def get_random_image_paths(data_dir, num_images):
    """从验证集中随机获取指定数量的图像路径。"""
    all_image_paths = []
    val_dir = os.path.join(data_dir, 'val')
    for class_folder in os.listdir(val_dir):
        class_path = os.path.join(val_dir, class_folder)
        if os.path.isdir(class_path):
            for img_name in os.listdir(class_path):
                if img_name.lower().endswith(('.png', '.jpg', '.jpeg')):
                    all_image_paths.append(os.path.join(class_path, img_name))

    if len(all_image_paths) < num_images:
        print(f"Warning: Only {len(all_image_paths)} images found, requested {num_images}. Using all available.")
        return all_image_paths
    return random.sample(all_image_paths, num_images)

if __name__ == '__main__':
    # 简单测试 visualize
    from config import DATA_DIR, IMAGE_SIZE, GRAD_CAM_TARGET_LAYER, GRADCAM_OUTPUT_DIR, CLASS_NAMES, MODEL_SAVE_PATH, NUM_GRAD_CAM_IMAGES
    from model import SimpleCNN

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    print(f"Using device for visualization: {device}")

    # 实例化模型
    model = SimpleCNN(num_classes=len(CLASS_NAMES))

    # 加载预训练模型权重
    if os.path.exists(MODEL_SAVE_PATH):
        print(f"Loading trained model from {MODEL_SAVE_PATH}")
        model.load_state_dict(torch.load(MODEL_SAVE_PATH, map_location=device))
    else:
        print(f"Error: Model not found at {MODEL_SAVE_PATH}. Please train the model first.")
        exit()

    # 获取要可视化的图像路径
    # 如果 config 中指定了具体图片,则使用它们
    # 否则,从验证集中随机选择
    if len(config.GRAD_CAM_IMAGES_TO_VISUALIZE) > 0:
        images_to_visualize = config.GRAD_CAM_IMAGES_TO_VISUALIZE
    else:
        images_to_visualize = get_random_image_paths(DATA_DIR, NUM_GRAD_CAM_IMAGES)
        if not images_to_visualize:
            print("No images found for visualization. Please check your data directory.")
            exit()

    visualize_grad_cam(model, images_to_visualize, CLASS_NAMES, GRAD_CAM_TARGET_LAYER, GRADCAM_OUTPUT_DIR, device, IMAGE_SIZE)
# main.py
import torch
import argparse
import os

from config import (
    DATA_DIR, IMAGE_SIZE, BATCH_SIZE, NUM_CLASSES, CLASS_NAMES,
    NUM_EPOCHS, LEARNING_RATE, MODEL_SAVE_PATH,
    GRAD_CAM_TARGET_LAYER, GRADCAM_OUTPUT_DIR,
    GRAD_CAM_IMAGES_TO_VISUALIZE, NUM_GRAD_CAM_IMAGES
)
from data_loader import get_dataloaders
from model import SimpleCNN
from train import train_model
from visualize import visualize_grad_cam, get_random_image_paths

def main():
    parser = argparse.ArgumentParser(description="CNN Training and Grad-CAM Visualization")
    parser.add_argument('--mode', type=str, default='train', choices=['train', 'visualize'],
                        help="Mode to run: 'train' for training the model, 'visualize' for Grad-CAM.")
    args = parser.parse_args()

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    print(f"Using device: {device}")

    # 初始化模型
    model = SimpleCNN(num_classes=NUM_CLASSES)

    if args.mode == 'train':
        print("\n--- Training Mode ---")
        train_loader, val_loader, _ = get_dataloaders(DATA_DIR, IMAGE_SIZE, BATCH_SIZE)
        train_model(model, train_loader, val_loader, NUM_EPOCHS, LEARNING_RATE, device, MODEL_SAVE_PATH)

    elif args.mode == 'visualize':
        print("\n--- Visualization Mode (Grad-CAM) ---")

        # 加载训练好的模型
        if os.path.exists(MODEL_SAVE_PATH):
            print(f"Loading trained model from {MODEL_SAVE_PATH}")
            model.load_state_dict(torch.load(MODEL_SAVE_PATH, map_location=device))
        else:
            print(f"Error: Model not found at {MODEL_SAVE_PATH}. Please train the model first using 'python main.py --mode train'.")
            return

        # 获取要可视化的图像路径
        if len(GRAD_CAM_IMAGES_TO_VISUALIZE) > 0:
            images_to_visualize = GRAD_CAM_IMAGES_TO_VISUALIZE
            print(f"Using specified images for Grad-CAM: {images_to_visualize}")
        else:
            images_to_visualize = get_random_image_paths(DATA_DIR, NUM_GRAD_CAM_IMAGES)
            if not images_to_visualize:
                print("No images found for visualization. Please check your data directory and ensure it contains 'val' split.")
                return
            print(f"Randomly selected {len(images_to_visualize)} images for Grad-CAM.")

        visualize_grad_cam(model, images_to_visualize, CLASS_NAMES, GRAD_CAM_TARGET_LAYER, GRADCAM_OUTPUT_DIR, device, IMAGE_SIZE)

    else:
        print("Invalid mode. Please use 'train' or 'visualize'.")

if __name__ == '__main__':
    main()

你可能感兴趣的:(python)