作业:
kaggle找到一个图像数据集,用cnn网络进行训练并且用grad-cam做可视化
进阶:并拆分成多个文件
image_classification_gradcam/
├── config.py # 配置文件:路径、超参数等
├── data_loader.py # 数据加载和预处理
├── model.py # CNN 模型定义
├── train.py # 训练和评估逻辑
├── visualize.py # Grad-CAM 可视化逻辑
├── main.py # 项目主入口,协调各模块
├── requirements.txt # Python 依赖
├── README.md # 项目说明
└── trained_model/ # 训练好的模型将保存在这里
└── gradcam_output/ # Grad-CAM 可视化结果将保存在这里
└── data/ # 你的数据集应该放在这里
├── train/
│ ├── class_A/
│ │ └── img1.jpg
│ │ └── ...
│ └── class_B/
│ └── img2.jpg
│ └── ...
└── val/
├── class_A/
└── class_B/
# config.py
import os
# --- 项目路径 ---
BASE_DIR = os.path.dirname(os.path.abspath(__file__))
DATA_DIR = os.path.join(BASE_DIR, 'data')
TRAINED_MODEL_DIR = os.path.join(BASE_DIR, 'trained_model')
GRADCAM_OUTPUT_DIR = os.path.join(BASE_DIR, 'gradcam_output')
# 确保输出目录存在
os.makedirs(TRAINED_MODEL_DIR, exist_ok=True)
os.makedirs(GRADCAM_OUTPUT_DIR, exist_ok=True)
# --- 数据参数 ---
IMAGE_SIZE = (224, 224) # 图像尺寸 (H, W)
NUM_CLASSES = 2 # 猫和狗是2个类别
CLASS_NAMES = ['cat', 'dog'] # 类别名称,用于可视化
# --- 训练参数 ---
BATCH_SIZE = 32
NUM_EPOCHS = 10
LEARNING_RATE = 0.001
MODEL_SAVE_PATH = os.path.join(TRAINED_MODEL_DIR, 'best_cnn_model.pth')
# --- Grad-CAM 参数 ---
# 目标层名称。你需要根据你使用的模型结构来确定
# 对于自定义的SimpleCNN,通常是最后一个卷积层的名称
# 例如:'conv_blocks.3.conv' 或 'features.2.conv'
# 在 visualize.py 中,我们有方法帮助你找到它
GRAD_CAM_TARGET_LAYER = 'conv_blocks.3.conv' # 假设SimpleCNN中最后一个是第4个块的conv层
# 可以指定要可视化的图像路径列表,或者让程序随机选择
GRAD_CAM_IMAGES_TO_VISUALIZE = [] # 留空则随机选择,或填写 ['./data/val/cat/some_cat_image.jpg', ...]
NUM_GRAD_CAM_IMAGES = 5 # 随机选择时要可视化的数量
# data_loader.py
import torch
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
import os
def get_dataloaders(data_dir, image_size, batch_size):
"""
获取训练和验证数据的DataLoader。
Args:
data_dir (str): 数据集根目录 (e.g., './data')
image_size (tuple): 图像尺寸 (H, W)
batch_size (int): 批次大小
Returns:
tuple: (train_loader, val_loader, class_names)
"""
train_transform = transforms.Compose([
transforms.Resize(image_size),
transforms.RandomHorizontalFlip(),
transforms.ToTensor(),
transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])
val_transform = transforms.Compose([
transforms.Resize(image_size),
transforms.ToTensor(),
transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])
train_dataset = datasets.ImageFolder(os.path.join(data_dir, 'train'), transform=train_transform)
val_dataset = datasets.ImageFolder(os.path.join(data_dir, 'val'), transform=val_transform)
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=4) # num_workers 可以根据你的CPU核心数调整
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False, num_workers=4)
class_names = train_dataset.classes # 获取类别名称列表,通常是按字母顺序排列的
print(f"Detected classes: {class_names}")
print(f"Number of training samples: {len(train_dataset)}")
print(f"Number of validation samples: {len(val_dataset)}")
return train_loader, val_loader, class_names
if __name__ == '__main__':
# 简单测试 data_loader
from config import DATA_DIR, IMAGE_SIZE, BATCH_SIZE
train_loader, val_loader, class_names = get_dataloaders(DATA_DIR, IMAGE_SIZE, BATCH_SIZE)
# 打印一个批次的信息
for images, labels in train_loader:
print(f"Batch images shape: {images.shape}")
print(f"Batch labels shape: {labels.shape}")
print(f"First 5 labels: {labels[:5]}")
break
# model.py
import torch
import torch.nn as nn
import torch.nn.functional as F
class SimpleCNN(nn.Module):
def __init__(self, num_classes=2):
super(SimpleCNN, self).__init__()
self.conv_blocks = nn.Sequential(
# Block 1
nn.Conv2d(3, 32, kernel_size=3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(inplace=True),
nn.MaxPool2d(kernel_size=2, stride=2), # Output: (32, 112, 112) for 224x224 input
# Block 2
nn.Conv2d(32, 64, kernel_size=3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(inplace=True),
nn.MaxPool2d(kernel_size=2, stride=2), # Output: (64, 56, 56)
# Block 3
nn.Conv2d(64, 128, kernel_size=3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(inplace=True),
nn.MaxPool2d(kernel_size=2, stride=2), # Output: (128, 28, 28)
# Block 4 (Grad-CAM 目标层通常是这里的最后一个Conv层)
nn.Conv2d(128, 256, kernel_size=3, padding=1),
nn.BatchNorm2d(256),
nn.ReLU(inplace=True),
nn.MaxPool2d(kernel_size=2, stride=2) # Output: (256, 14, 14)
)
# 展平层后的全连接层
# 对于 224x224 输入,经过4个Max Pooling (每次减半) 得到 224/16 = 14x14
# 展平后维度为 256 * 14 * 14
self.fc_layers = nn.Sequential(
nn.Linear(256 * 14 * 14, 512),
nn.ReLU(inplace=True),
nn.Dropout(0.5),
nn.Linear(512, num_classes)
)
def forward(self, x):
x = self.conv_blocks(x)
x = x.view(x.size(0), -1) # 展平操作
x = self.fc_layers(x)
return x
if __name__ == '__main__':
# 简单测试模型
model = SimpleCNN(num_classes=2)
print(model)
# 检查模型输出形状
dummy_input = torch.randn(1, 3, 224, 224) # Batch size 1, 3 channels, 224x224
output = model(dummy_input)
print(f"Output shape: {output.shape}") # 应该为 torch.Size([1, 2])
# train.py
import torch
import torch.nn as nn
import torch.optim as optim
from tqdm import tqdm
import os
def train_model(model, train_loader, val_loader, num_epochs, learning_rate, device, model_save_path):
"""
训练和评估模型。
Args:
model (nn.Module): CNN 模型
train_loader (DataLoader): 训练数据加载器
val_loader (DataLoader): 验证数据加载器
num_epochs (int): 训练周期数
learning_rate (float): 学习率
device (torch.device): 训练设备 (CPU 或 GPU)
model_save_path (str): 模型保存路径
"""
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)
best_val_accuracy = 0.0
model.to(device)
print(f"Starting training on {device}...")
for epoch in range(num_epochs):
model.train()
running_loss = 0.0
correct_train = 0
total_train = 0
# 训练循环
train_bar = tqdm(train_loader, desc=f"Epoch {epoch+1}/{num_epochs} [Train]")
for i, (inputs, labels) in enumerate(train_bar):
inputs, labels = inputs.to(device), labels.to(device)
optimizer.zero_grad()
outputs = model(inputs)
loss = criterion(outputs, labels)
loss.backward()
optimizer.step()
running_loss += loss.item() * inputs.size(0)
_, predicted = torch.max(outputs.data, 1)
total_train += labels.size(0)
correct_train += (predicted == labels).sum().item()
train_bar.set_postfix(loss=loss.item())
epoch_train_loss = running_loss / total_train
epoch_train_accuracy = correct_train / total_train
print(f"Epoch {epoch+1} Train Loss: {epoch_train_loss:.4f}, Train Acc: {epoch_train_accuracy:.4f}")
# 验证循环
model.eval()
val_loss = 0.0
correct_val = 0
total_val = 0
with torch.no_grad():
val_bar = tqdm(val_loader, desc=f"Epoch {epoch+1}/{num_epochs} [Validation]")
for inputs, labels in val_bar:
inputs, labels = inputs.to(device), labels.to(device)
outputs = model(inputs)
loss = criterion(outputs, labels)
val_loss += loss.item() * inputs.size(0)
_, predicted = torch.max(outputs.data, 1)
total_val += labels.size(0)
correct_val += (predicted == labels).sum().item()
val_bar.set_postfix(loss=loss.item())
epoch_val_loss = val_loss / total_val
epoch_val_accuracy = correct_val / total_val
print(f"Epoch {epoch+1} Val Loss: {epoch_val_loss:.4f}, Val Acc: {epoch_val_accuracy:.4f}")
# 保存最佳模型
if epoch_val_accuracy > best_val_accuracy:
best_val_accuracy = epoch_val_accuracy
torch.save(model.state_dict(), model_save_path)
print(f"Saved best model with Val Acc: {best_val_accuracy:.4f} to {model_save_path}")
print("Training finished!")
# visualize.py
import torch
import numpy as np
from torchvision import transforms
from PIL import Image
import os
import matplotlib.pyplot as plt
from pytorch_grad_cam import GradCAM, HiResCAM, ScoreCAM, GradCAMPlusPlus, AblationCAM, XGradCAM, EigenCAM, FullGrad
from pytorch_grad_cam.utils.model_targets import ClassifierOutputTarget
from pytorch_grad_cam.utils.image import show_cam_on_image
import random
def get_target_layer_from_name(model, layer_name):
"""
通过名称从模型中获取目标层。
迭代模型的所有命名模块,直到找到匹配的层。
"""
for name, module in model.named_modules():
if name == layer_name:
print(f"Found target layer: {name} (Type: {type(module)})")
return module
print(f"Error: Target layer '{layer_name}' not found in model.")
print("Available layers:")
for name, _ in model.named_modules():
print(f"- {name}")
raise ValueError(f"Target layer '{layer_name}' not found.")
def visualize_grad_cam(model, image_paths, class_names, target_layer_name, output_dir, device, image_size):
"""
对给定图像执行 Grad-CAM 可视化并保存结果。
Args:
model (nn.Module): 训练好的模型
image_paths (list): 要可视化的图像文件路径列表
class_names (list): 类别名称列表 (e.g., ['cat', 'dog'])
target_layer_name (str): Grad-CAM 目标层的名称
output_dir (str): Grad-CAM 结果保存目录
device (torch.device): 设备 (CPU 或 GPU)
image_size (tuple): 模型输入图像尺寸 (H, W)
"""
model.eval()
model.to(device)
# 查找目标层
try:
target_layer = get_target_layer_from_name(model, target_layer_name)
except ValueError as e:
print(e)
return
# 定义预处理,需要与模型训练时一致
preprocess = transforms.Compose([
transforms.Resize(image_size),
transforms.ToTensor(),
transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])
# 选择 Grad-CAM 算法 (可以尝试不同的算法)
cam = GradCAM(model=model, target_layers=[target_layer], use_cuda=(device.type == 'cuda'))
print(f"\nGenerating Grad-CAM visualizations for {len(image_paths)} images...")
for i, img_path in enumerate(image_paths):
print(f"Processing {img_path}...")
try:
rgb_img = Image.open(img_path).convert('RGB')
# 转换为 numpy 数组,范围在 [0, 1] 之间,用于 Grad-CAM 库
# 注意:show_cam_on_image 期望图像是 float32 且在 [0, 1] 范围内
rgb_img_np = np.float32(rgb_img) / 255
input_tensor = preprocess(rgb_img).unsqueeze(0).to(device) # 添加批次维度
# 运行模型进行预测
with torch.no_grad():
output = model(input_tensor)
probabilities = torch.softmax(output, dim=1)[0]
predicted_index = torch.argmax(probabilities).item()
predicted_class = class_names[predicted_index]
predicted_prob = probabilities[predicted_index].item()
# Grad-CAM 的 targets
# 如果我们想为预测的类别生成CAM,则不需要指定 target_category
# 如果想为特定类别生成CAM,即使模型没有预测到它,则需要指定
# target_category = predicted_index # 针对预测的类别
targets = [ClassifierOutputTarget(predicted_index)]
grayscale_cam = cam(input_tensor=input_tensor, targets=targets)
grayscale_cam = grayscale_cam[0, :] # 移除批次维度
# 将 CAM 叠加到原始图像上
cam_image = show_cam_on_image(rgb_img_np, grayscale_cam, use_rgb=True)
# 保存结果
filename = os.path.basename(img_path)
output_filename = f"cam_{predicted_class}_{predicted_prob:.2f}_{os.path.splitext(filename)[0]}.jpg"
output_path = os.path.join(output_dir, output_filename)
plt.imshow(cam_image)
plt.title(f"Predicted: {predicted_class} ({predicted_prob:.2f})")
plt.axis('off')
plt.savefig(output_path, bbox_inches='tight', pad_inches=0)
plt.close() # 关闭图形,防止内存泄漏
print(f"Saved Grad-CAM image to: {output_path}")
except Exception as e:
print(f"Error processing {img_path}: {e}")
def get_random_image_paths(data_dir, num_images):
"""从验证集中随机获取指定数量的图像路径。"""
all_image_paths = []
val_dir = os.path.join(data_dir, 'val')
for class_folder in os.listdir(val_dir):
class_path = os.path.join(val_dir, class_folder)
if os.path.isdir(class_path):
for img_name in os.listdir(class_path):
if img_name.lower().endswith(('.png', '.jpg', '.jpeg')):
all_image_paths.append(os.path.join(class_path, img_name))
if len(all_image_paths) < num_images:
print(f"Warning: Only {len(all_image_paths)} images found, requested {num_images}. Using all available.")
return all_image_paths
return random.sample(all_image_paths, num_images)
if __name__ == '__main__':
# 简单测试 visualize
from config import DATA_DIR, IMAGE_SIZE, GRAD_CAM_TARGET_LAYER, GRADCAM_OUTPUT_DIR, CLASS_NAMES, MODEL_SAVE_PATH, NUM_GRAD_CAM_IMAGES
from model import SimpleCNN
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device for visualization: {device}")
# 实例化模型
model = SimpleCNN(num_classes=len(CLASS_NAMES))
# 加载预训练模型权重
if os.path.exists(MODEL_SAVE_PATH):
print(f"Loading trained model from {MODEL_SAVE_PATH}")
model.load_state_dict(torch.load(MODEL_SAVE_PATH, map_location=device))
else:
print(f"Error: Model not found at {MODEL_SAVE_PATH}. Please train the model first.")
exit()
# 获取要可视化的图像路径
# 如果 config 中指定了具体图片,则使用它们
# 否则,从验证集中随机选择
if len(config.GRAD_CAM_IMAGES_TO_VISUALIZE) > 0:
images_to_visualize = config.GRAD_CAM_IMAGES_TO_VISUALIZE
else:
images_to_visualize = get_random_image_paths(DATA_DIR, NUM_GRAD_CAM_IMAGES)
if not images_to_visualize:
print("No images found for visualization. Please check your data directory.")
exit()
visualize_grad_cam(model, images_to_visualize, CLASS_NAMES, GRAD_CAM_TARGET_LAYER, GRADCAM_OUTPUT_DIR, device, IMAGE_SIZE)
# main.py
import torch
import argparse
import os
from config import (
DATA_DIR, IMAGE_SIZE, BATCH_SIZE, NUM_CLASSES, CLASS_NAMES,
NUM_EPOCHS, LEARNING_RATE, MODEL_SAVE_PATH,
GRAD_CAM_TARGET_LAYER, GRADCAM_OUTPUT_DIR,
GRAD_CAM_IMAGES_TO_VISUALIZE, NUM_GRAD_CAM_IMAGES
)
from data_loader import get_dataloaders
from model import SimpleCNN
from train import train_model
from visualize import visualize_grad_cam, get_random_image_paths
def main():
parser = argparse.ArgumentParser(description="CNN Training and Grad-CAM Visualization")
parser.add_argument('--mode', type=str, default='train', choices=['train', 'visualize'],
help="Mode to run: 'train' for training the model, 'visualize' for Grad-CAM.")
args = parser.parse_args()
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")
# 初始化模型
model = SimpleCNN(num_classes=NUM_CLASSES)
if args.mode == 'train':
print("\n--- Training Mode ---")
train_loader, val_loader, _ = get_dataloaders(DATA_DIR, IMAGE_SIZE, BATCH_SIZE)
train_model(model, train_loader, val_loader, NUM_EPOCHS, LEARNING_RATE, device, MODEL_SAVE_PATH)
elif args.mode == 'visualize':
print("\n--- Visualization Mode (Grad-CAM) ---")
# 加载训练好的模型
if os.path.exists(MODEL_SAVE_PATH):
print(f"Loading trained model from {MODEL_SAVE_PATH}")
model.load_state_dict(torch.load(MODEL_SAVE_PATH, map_location=device))
else:
print(f"Error: Model not found at {MODEL_SAVE_PATH}. Please train the model first using 'python main.py --mode train'.")
return
# 获取要可视化的图像路径
if len(GRAD_CAM_IMAGES_TO_VISUALIZE) > 0:
images_to_visualize = GRAD_CAM_IMAGES_TO_VISUALIZE
print(f"Using specified images for Grad-CAM: {images_to_visualize}")
else:
images_to_visualize = get_random_image_paths(DATA_DIR, NUM_GRAD_CAM_IMAGES)
if not images_to_visualize:
print("No images found for visualization. Please check your data directory and ensure it contains 'val' split.")
return
print(f"Randomly selected {len(images_to_visualize)} images for Grad-CAM.")
visualize_grad_cam(model, images_to_visualize, CLASS_NAMES, GRAD_CAM_TARGET_LAYER, GRADCAM_OUTPUT_DIR, device, IMAGE_SIZE)
else:
print("Invalid mode. Please use 'train' or 'visualize'.")
if __name__ == '__main__':
main()