目录
Kaggle图像分类项目:
项目结构
一、数据准备模块
1. config/paths.py
2. data/preprocessing.py
3. data/dataset.py
二、模型定义模块
1. models/cnn_model.py
2. models/grad_cam.py
三、训练脚本
train.py
四、可视化模块
1. utils/visualization.py
2. visualize.py
五、实用工具
utils/logger.py
项目执行流程
@浙大疏锦行
text
kaggle_cnn_gradcam/
├── config/
│ ├── paths.py # 路径配置
│ └── params.py # 超参数配置
├── data/
│ ├── dataset.py # 数据集加载
│ └── preprocessing.py # 数据预处理
├── models/
│ ├── cnn_model.py # CNN模型定义
│ └── grad_cam.py # Grad-CAM实现
├── utils/
│ ├── visualization.py # 可视化工具
│ └── logger.py # 日志记录
├── train.py # 训练脚本
└── visualize.py # 可视化主程序
config/paths.py
from pathlib import Path
class Paths:
def __init__(self, dataset_name="cats_vs_dogs"):
self.dataset_dir = Path(f"data/{dataset_name}")
self.train_dir = self.dataset_dir / "train"
self.test_dir = self.dataset_dir / "test"
self.model_save = Path("saved_models/best_model.pth")
self.gradcam_output = Path("output/gradcam")
# 创建必要目录
self.gradcam_output.mkdir(parents=True, exist_ok=True)
data/preprocessing.py
from torchvision import transforms
def get_transforms(input_size=224):
"""返回训练和验证的数据增强管道"""
train_transform = transforms.Compose([
transforms.RandomResizedCrop(input_size),
transforms.RandomHorizontalFlip(),
transforms.ToTensor(),
transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])
val_transform = transforms.Compose([
transforms.Resize(256),
transforms.CenterCrop(input_size),
transforms.ToTensor(),
transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])
return train_transform, val_transform
data/dataset.py
from torchvision.datasets import ImageFolder
from config.paths import Paths
class KaggleDataset(ImageFolder):
def __init__(self, root, transform=None):
super().__init__(root=root, transform=transform)
@classmethod
def create_datasets(cls, input_size=224):
paths = Paths()
train_tf, val_tf = get_transforms(input_size)
train_set = cls(root=paths.train_dir, transform=train_tf)
val_set = cls(root=paths.train_dir, transform=val_tf)
# 分割训练/验证集
indices = torch.randperm(len(train_set)).split(int(0.8*len(train_set)))[0]
train_set = torch.utils.data.Subset(train_set, indices)
val_set = torch.utils.data.Subset(val_set, range(len(val_set))[len(indices):])
return train_set, val_set
models/cnn_model.py
import torch.nn as nn
class CNNClassifier(nn.Module):
def __init__(self, num_classes=2):
super().__init__()
self.features = nn.Sequential(
nn.Conv2d(3, 32, kernel_size=3, padding=1),
nn.ReLU(inplace=True),
nn.MaxPool2d(2),
nn.Conv2d(32, 64, kernel_size=3, padding=1),
nn.ReLU(inplace=True),
nn.MaxPool2d(2),
nn.Conv2d(64, 128, kernel_size=3, padding=1),
nn.ReLU(inplace=True),
nn.MaxPool2d(2)
)
self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
self.classifier = nn.Linear(128, num_classes)
def forward(self, x):
x = self.features(x) # 提取特征图
x = self.avgpool(x)
x = x.view(x.size(0), -1)
return self.classifier(x)
models/grad_cam.py
import torch
import torch.nn.functional as F
class GradCAM:
def __init__(self, model, target_layer):
self.model = model
self.target_layer = target_layer
self.activations = None
self.gradients = None
# 注册钩子
target_layer.register_forward_hook(self._forward_hook)
target_layer.register_full_backward_hook(self._backward_hook)
def _forward_hook(self, module, input, output):
self.activations = output.detach()
def _backward_hook(self, module, grad_input, grad_output):
self.gradients = grad_output[0].detach()
def generate(self, input_tensor, class_idx=None):
# 前向传播
output = self.model(input_tensor)
if class_idx is None:
class_idx = output.argmax(dim=1)
# 反向传播
self.model.zero_grad()
one_hot = torch.zeros_like(output)
one_hot[0][class_idx] = 1
output.backward(gradient=one_hot)
# 计算权重
weights = torch.mean(self.gradients, dim=[2, 3])
# 生成热力图
cam = torch.zeros_like(self.activations[0])
for i, w in enumerate(weights[0]):
cam += w * self.activations[0, i]
cam = F.relu(cam)
cam = (cam - cam.min()) / (cam.max() - cam.min() + 1e-8)
return cam.cpu().numpy()
train.py
import torch
from torch.utils.data import DataLoader
from models.cnn_model import CNNClassifier
from data.dataset import KaggleDataset
from config.paths import Paths
from utils.logger import setup_logger
def train():
logger = setup_logger()
paths = Paths()
# 数据加载
train_set, val_set = KaggleDataset.create_datasets()
train_loader = DataLoader(train_set, batch_size=32, shuffle=True)
val_loader = DataLoader(val_set, batch_size=32)
# 模型初始化
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = CNNClassifier().to(device)
# 训练配置
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
# 训练循环
best_acc = 0.0
for epoch in range(10):
model.train()
for inputs, labels in train_loader:
inputs, labels = inputs.to(device), labels.to(device)
optimizer.zero_grad()
outputs = model(inputs)
loss = criterion(outputs, labels)
loss.backward()
optimizer.step()
# 验证
model.eval()
correct = 0
with torch.no_grad():
for inputs, labels in val_loader:
outputs = model(inputs.to(device))
preds = outputs.argmax(dim=1)
correct += (preds == labels.to(device)).sum().item()
acc = 100 * correct / len(val_set)
logger.info(f"Epoch {epoch+1}: Val Acc={acc:.2f}%")
# 保存最佳模型
if acc > best_acc:
best_acc = acc
torch.save(model.state_dict(), paths.model_save)
logger.info(f"New best model saved (Acc={acc:.2f}%)")
if __name__ == "__main__":
train()
utils/visualization.py
import matplotlib.pyplot as plt
import numpy as np
import cv2
def plot_gradcam(image, heatmap, output_path, alpha=0.5):
"""可视化Grad-CAM结果"""
# 调整热力图大小
heatmap = cv2.resize(heatmap, (image.shape[1], image.shape[0]))
heatmap = np.uint8(255 * heatmap)
heatmap = cv2.applyColorMap(heatmap, cv2.COLORMAP_JET)
# 叠加原图
superimposed_img = heatmap * alpha + image * (1-alpha)
superimposed_img = np.clip(superimposed_img, 0, 255).astype(np.uint8)
# 保存结果
cv2.imwrite(str(output_path), cv2.cvtColor(superimposed_img, cv2.COLOR_RGB2BGR))
visualize.py
import torch
from PIL import Image
from config.paths import Paths
from models.cnn_model import CNNClassifier
from models.grad_cam import GradCAM
from data.preprocessing import get_transforms
from utils.visualization import plot_gradcam
def visualize():
# 初始化
paths = Paths()
_, val_tf = get_transforms()
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# 加载模型
model = CNNClassifier()
model.load_state_dict(torch.load(paths.model_save, map_location=device))
model.to(device).eval()
# 初始化Grad-CAM
target_layer = model.features[-3] # 选择倒数第三个卷积层
gradcam = GradCAM(model, target_layer)
# 处理示例图像
sample_image = Image.open(paths.train_dir / "cat/1.jpg").convert("RGB")
input_tensor = val_tf(sample_image).unsqueeze(0).to(device)
# 生成热力图
heatmap = gradcam.generate(input_tensor)
# 可视化
original_img = np.array(sample_image)
output_path = paths.gradcam_output / "cat_example.jpg"
plot_gradcam(original_img, heatmap, output_path)
print(f"Grad-CAM结果已保存至: {output_path}")
if __name__ == "__main__":
visualize()
utils/logger.py
import logging
from datetime import datetime
def setup_logger():
logger = logging.getLogger("CNN_Trainer")
logger.setLevel(logging.INFO)
# 创建文件handler
log_file = f"logs/train_{datetime.now().strftime('%Y%m%d_%H%M%S')}.log"
file_handler = logging.FileHandler(log_file)
file_handler.setLevel(logging.INFO)
# 创建控制台handler
console_handler = logging.StreamHandler()
console_handler.setLevel(logging.INFO)
# 设置格式
formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
file_handler.setFormatter(formatter)
console_handler.setFormatter(formatter)
# 添加handler
logger.addHandler(file_handler)
logger.addHandler(console_handler)
return logger
准备数据:
mkdir -p data/cats_vs_dogs/train
# 将Kaggle数据集按类别放入train目录
训练模型:
python train.py
可视化结果:
python visualize.py