DAY 47 注意力热图可视化

昨天代码中注意力热图的部分顺移至今天

知识点回顾:

热力图

作业:对比不同卷积层热图可视化的结果

import torch
import torch.nn as nn
import torchvision.models as models
import torchvision.transforms as transforms
from PIL import Image
import matplotlib.pyplot as plt
import numpy as np
import cv2 # OpenCV for resizing and colormap
 
# --- 0. 辅助函数和设置 ---
def preprocess_image(img_path, image_size=(224, 224)):
    """加载并预处理图像"""
    img = Image.open(img_path).convert('RGB')
    preprocess = transforms.Compose([
        transforms.Resize(image_size),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ])
    img_tensor = preprocess(img)
    return img_tensor.unsqueeze(0), img # 返回tensor和原始PIL图像用于显示
 
def load_imagenet_labels(labels_path="imagenet_classes.txt"):
    """加载ImageNet类别标签"""
    try:
        with open(labels_path, "r") as f:
            labels = [line.strip() for line in f.readlines()]
    except FileNotFoundError:
        print(f"Warning: {labels_path} not found. Predictions will be class indices.")
        # 如果你没有这个文件,可以从网上搜索 "imagenet_classes.txt" 下载
        # 或者直接使用ImageNet1k官方类别列表
        # 这里提供一个临时的简化版,实际应该用完整的
        print("Using a placeholder for labels. Download 'imagenet_classes.txt' for actual names.")
        labels = [f"Class {i}" for i in range(1000)] # Placeholder
    return labels
 
def show_feature_maps(activations, layer_name, num_maps_to_show=8, grid_size=(2, 4)):
    """可视化特征图"""
    if activations is None:
        print(f"No activations found for {layer_name}")
        return
    
    activations = activations.squeeze(0).cpu().numpy() # (C, H, W)
    num_channels = activations.shape[0]
    num_maps_to_show = min(num_maps_to_show, num_channels)
 
    fig, axes = plt.subplots(grid_size[0], grid_size[1], figsize=(12, 6))
    axes = axes.ravel() # 展平以便索引
    plt.suptitle(f'Feature Maps from {layer_name} (First {num_maps_to_show} Channels)', fontsize=16)
    
    for i in range(num_maps_to_show):
        if i < len(axes):
            ax = axes[i]
            feature_map = activations[i, :, :]
            ax.imshow(feature_map, cmap='viridis') # viridis, gray, etc.
            ax.set_title(f'Channel {i+1}')
            ax.axis('off')
        else:
            break
    
    # 关闭多余的子图
    for j in range(num_maps_to_show, len(axes)):
        fig.delaxes(axes[j])
        
    plt.tight_layout(rect=[0, 0, 1, 0.96]) # 调整布局以适应suptitle
    plt.show()
 
def generate_grad_cam_heatmap(gradient, activation, original_image_np, target_size=(224, 224)):
    """生成Grad-CAM热力图并叠加到原图"""
    if gradient is None or activation is None:
        print("Gradients or activations are None, cannot generate Grad-CAM.")
        return None, None
 
    # 1. 计算通道权重 (alpha_k)
    weights = torch.mean(gradient, dim=(2, 3), keepdim=True) # (1, C, 1, 1)
 
    # 2. 加权组合特征图
    cam = torch.sum(weights * activation, dim=1, keepdim=True) # (1, 1, H, W)
 
    # 3. ReLU
    cam = torch.relu(cam)
 
    # 4. 归一化到 [0, 1]
    cam_min = torch.min(cam)
    cam_max = torch.max(cam)
    cam = (cam - cam_min) / (cam_max - cam_min + 1e-8) # 避免除以零
    cam = cam.squeeze().cpu().numpy() # (H, W)
 
    # 5. 上采样到原图尺寸并应用色彩映射
    cam_resized = cv2.resize(cam, target_size)
    heatmap = cv2.applyColorMap(np.uint8(255 * cam_resized), cv2.COLORMAP_JET)
    heatmap = cv2.cvtColor(heatmap, cv2.COLOR_BGR2RGB) # Matplotlib expects RGB
 
    # 6. 叠加到原图
    superimposed_img = heatmap * 0.4 + original_image_np * 0.6
    superimposed_img = np.clip(superimposed_img, 0, 255).astype(np.uint8)
 
    return heatmap, superimposed_img
 
# --- 1. 加载模型和图像 ---
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = models.resnet18(weights=models.ResNet18_Weights.IMAGENET1K_V1) # 使用新的weights API
model.eval().to(device)
 
# 你需要一张图片,例如 'cat_dog.jpg'
# 如果你没有图片,可以从网上下载一张,比如搜索 "cat dog image"
# 为了方便运行,这里使用一个占位符,请替换为你的图片路径
IMG_PATH = 'cat_dog.jpg' # <--- 修改这里为你的图片路径!
try:
    img_tensor, original_pil_img = preprocess_image(IMG_PATH)
except FileNotFoundError:
    print(f"Error: Image file not found at {IMG_PATH}")
    print("Please provide a valid image path.")
    # 生成一个随机图像作为占位符,以便代码可以运行
    print("Using a random noise image as a placeholder.")
    random_img_data = np.random.randint(0, 256, (224, 224, 3), dtype=np.uint8)
    original_pil_img = Image.fromarray(random_img_data)
    # 预处理这个随机图像
    preprocess_for_random = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ])
    img_tensor = preprocess_for_random(original_pil_img).unsqueeze(0)
 
 
img_tensor = img_tensor.to(device)
original_np_img = np.array(original_pil_img.resize((224, 224)))
 
imagenet_labels = load_imagenet_labels()
 
# --- 2. 选择目标层 ---
# 我们可以打印模型结构来选择层:
# print(model) 
# 对于ResNet18,一些可选的层:
# model.conv1
# model.layer1[0].conv2  (或 layer1[-1].conv2)
# model.layer2[0].conv2
# model.layer3[0].conv2
# model.layer4[0].conv2  (或 layer4[-1].conv2,这是最后一层卷积)
 
target_layers_dict = {
    "conv1": model.conv1,
    "layer1_last_conv": model.layer1[-1].conv2,
    "layer2_last_conv": model.layer2[-1].conv2,
    "layer3_last_conv": model.layer3[-1].conv2,
    "layer4_last_conv": model.layer4[-1].conv2  # 最后的卷积层,通常用于Grad-CAM
}
 
# --- 3. 存储激活和梯度的全局变量和钩子 ---
activations_store = {}
gradients_store = {}
 
def forward_hook(layer_name):
    def hook(module, input, output):
        activations_store[layer_name] = output.detach() # .detach() 很重要
    return hook
 
def backward_hook(layer_name):
    def hook(module, grad_input, grad_output):
        # grad_output 是一个元组,我们通常需要第一个元素
        gradients_store[layer_name] = grad_output[0].detach()
    return hook
 
# --- 4. 循环处理每个目标层 ---
for layer_name, target_layer_module in target_layers_dict.items():
    print(f"\n--- Processing Layer: {layer_name} ---")
 
    # 清理旧的钩子和存储 (重要,避免混淆)
    activations_store.clear()
    gradients_store.clear()
    hook_handles = []
 
    # 注册钩子
    handle_fwd = target_layer_module.register_forward_hook(forward_hook(layer_name))
    handle_bwd = target_layer_module.register_full_backward_hook(backward_hook(layer_name)) # register_full_backward_hook for module grad_output
    # handle_bwd = target_layer_module.register_backward_hook(backward_hook(layer_name)) # 旧版API,有时需要用这个
    hook_handles.extend([handle_fwd, handle_bwd])
 
    # a. 前向传播
    model.zero_grad() # 清除之前的梯度
    output = model(img_tensor)
    
    # b. 获取预测类别
    pred_probabilities = torch.softmax(output, dim=1)
    pred_score, pred_class_idx = torch.max(pred_probabilities, 1)
    pred_class_idx = pred_class_idx.item()
    target_category_score = output[0, pred_class_idx] # 使用原始logits作为目标分数
 
    # c. 反向传播 (计算梯度)
    target_category_score.backward(retain_graph=True) # retain_graph=True 如果你需要在之后继续反向传播或对多个目标操作
 
    # d. 获取激活和梯度
    current_activation = activations_store.get(layer_name)
    current_gradient = gradients_store.get(layer_name)
 
    # e. 可视化特征图
    print(f"Visualizing feature maps for {layer_name}...")
    if current_activation is not None:
        show_feature_maps(current_activation, layer_name, num_maps_to_show=8, grid_size=(2,4))
    else:
        print(f"Could not retrieve activations for {layer_name}")
 
    # f. 生成并可视化Grad-CAM热力图
    print(f"Generating Grad-CAM for {layer_name}...")
    if current_gradient is not None and current_activation is not None:
        heatmap, superimposed_img = generate_grad_cam_heatmap(
            current_gradient, 
            current_activation, 
            original_np_img,
            target_size=(original_np_img.shape[1], original_np_img.shape[0]) # (width, height) for cv2.resize
        )
 
        if heatmap is not None and superimposed_img is not None:
            fig, axs = plt.subplots(1, 3, figsize=(18, 6))
            pred_label_name = imagenet_labels[pred_class_idx] if imagenet_labels else f"Class {pred_class_idx}"
            fig.suptitle(f'Grad-CAM for {layer_name}\nPredicted: {pred_label_name} ({pred_score.item():.2f})', fontsize=16)
 
            axs[0].imshow(original_np_img)
            axs[0].set_title('Original Image')
            axs[0].axis('off')
 
            axs[1].imshow(heatmap)
            axs[1].set_title('Grad-CAM Heatmap')
            axs[1].axis('off')
 
            axs[2].imshow(superimposed_img)
            axs[2].set_title('Superimposed Image')
            axs[2].axis('off')
            
            plt.tight_layout(rect=[0, 0, 1, 0.90])
            plt.show()
        else:
            print(f"Failed to generate Grad-CAM for {layer_name}")
    else:
        print(f"Missing gradients or activations for Grad-CAM on {layer_name}")
 
    # g. 移除钩子 (重要,防止干扰后续操作或其他模型的钩子)
    for handle in hook_handles:
        handle.remove()
 
print("\n--- All layers processed. ---")

你可能感兴趣的:(Python入门(坚持),人工智能)