昨天代码中注意力热图的部分顺移至今天
知识点回顾:
热力图
作业:对比不同卷积层热图可视化的结果
import torch
import torch.nn as nn
import torchvision.models as models
import torchvision.transforms as transforms
from PIL import Image
import matplotlib.pyplot as plt
import numpy as np
import cv2 # OpenCV for resizing and colormap
# --- 0. 辅助函数和设置 ---
def preprocess_image(img_path, image_size=(224, 224)):
"""加载并预处理图像"""
img = Image.open(img_path).convert('RGB')
preprocess = transforms.Compose([
transforms.Resize(image_size),
transforms.ToTensor(),
transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])
img_tensor = preprocess(img)
return img_tensor.unsqueeze(0), img # 返回tensor和原始PIL图像用于显示
def load_imagenet_labels(labels_path="imagenet_classes.txt"):
"""加载ImageNet类别标签"""
try:
with open(labels_path, "r") as f:
labels = [line.strip() for line in f.readlines()]
except FileNotFoundError:
print(f"Warning: {labels_path} not found. Predictions will be class indices.")
# 如果你没有这个文件,可以从网上搜索 "imagenet_classes.txt" 下载
# 或者直接使用ImageNet1k官方类别列表
# 这里提供一个临时的简化版,实际应该用完整的
print("Using a placeholder for labels. Download 'imagenet_classes.txt' for actual names.")
labels = [f"Class {i}" for i in range(1000)] # Placeholder
return labels
def show_feature_maps(activations, layer_name, num_maps_to_show=8, grid_size=(2, 4)):
"""可视化特征图"""
if activations is None:
print(f"No activations found for {layer_name}")
return
activations = activations.squeeze(0).cpu().numpy() # (C, H, W)
num_channels = activations.shape[0]
num_maps_to_show = min(num_maps_to_show, num_channels)
fig, axes = plt.subplots(grid_size[0], grid_size[1], figsize=(12, 6))
axes = axes.ravel() # 展平以便索引
plt.suptitle(f'Feature Maps from {layer_name} (First {num_maps_to_show} Channels)', fontsize=16)
for i in range(num_maps_to_show):
if i < len(axes):
ax = axes[i]
feature_map = activations[i, :, :]
ax.imshow(feature_map, cmap='viridis') # viridis, gray, etc.
ax.set_title(f'Channel {i+1}')
ax.axis('off')
else:
break
# 关闭多余的子图
for j in range(num_maps_to_show, len(axes)):
fig.delaxes(axes[j])
plt.tight_layout(rect=[0, 0, 1, 0.96]) # 调整布局以适应suptitle
plt.show()
def generate_grad_cam_heatmap(gradient, activation, original_image_np, target_size=(224, 224)):
"""生成Grad-CAM热力图并叠加到原图"""
if gradient is None or activation is None:
print("Gradients or activations are None, cannot generate Grad-CAM.")
return None, None
# 1. 计算通道权重 (alpha_k)
weights = torch.mean(gradient, dim=(2, 3), keepdim=True) # (1, C, 1, 1)
# 2. 加权组合特征图
cam = torch.sum(weights * activation, dim=1, keepdim=True) # (1, 1, H, W)
# 3. ReLU
cam = torch.relu(cam)
# 4. 归一化到 [0, 1]
cam_min = torch.min(cam)
cam_max = torch.max(cam)
cam = (cam - cam_min) / (cam_max - cam_min + 1e-8) # 避免除以零
cam = cam.squeeze().cpu().numpy() # (H, W)
# 5. 上采样到原图尺寸并应用色彩映射
cam_resized = cv2.resize(cam, target_size)
heatmap = cv2.applyColorMap(np.uint8(255 * cam_resized), cv2.COLORMAP_JET)
heatmap = cv2.cvtColor(heatmap, cv2.COLOR_BGR2RGB) # Matplotlib expects RGB
# 6. 叠加到原图
superimposed_img = heatmap * 0.4 + original_image_np * 0.6
superimposed_img = np.clip(superimposed_img, 0, 255).astype(np.uint8)
return heatmap, superimposed_img
# --- 1. 加载模型和图像 ---
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = models.resnet18(weights=models.ResNet18_Weights.IMAGENET1K_V1) # 使用新的weights API
model.eval().to(device)
# 你需要一张图片,例如 'cat_dog.jpg'
# 如果你没有图片,可以从网上下载一张,比如搜索 "cat dog image"
# 为了方便运行,这里使用一个占位符,请替换为你的图片路径
IMG_PATH = 'cat_dog.jpg' # <--- 修改这里为你的图片路径!
try:
img_tensor, original_pil_img = preprocess_image(IMG_PATH)
except FileNotFoundError:
print(f"Error: Image file not found at {IMG_PATH}")
print("Please provide a valid image path.")
# 生成一个随机图像作为占位符,以便代码可以运行
print("Using a random noise image as a placeholder.")
random_img_data = np.random.randint(0, 256, (224, 224, 3), dtype=np.uint8)
original_pil_img = Image.fromarray(random_img_data)
# 预处理这个随机图像
preprocess_for_random = transforms.Compose([
transforms.ToTensor(),
transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])
img_tensor = preprocess_for_random(original_pil_img).unsqueeze(0)
img_tensor = img_tensor.to(device)
original_np_img = np.array(original_pil_img.resize((224, 224)))
imagenet_labels = load_imagenet_labels()
# --- 2. 选择目标层 ---
# 我们可以打印模型结构来选择层:
# print(model)
# 对于ResNet18,一些可选的层:
# model.conv1
# model.layer1[0].conv2 (或 layer1[-1].conv2)
# model.layer2[0].conv2
# model.layer3[0].conv2
# model.layer4[0].conv2 (或 layer4[-1].conv2,这是最后一层卷积)
target_layers_dict = {
"conv1": model.conv1,
"layer1_last_conv": model.layer1[-1].conv2,
"layer2_last_conv": model.layer2[-1].conv2,
"layer3_last_conv": model.layer3[-1].conv2,
"layer4_last_conv": model.layer4[-1].conv2 # 最后的卷积层,通常用于Grad-CAM
}
# --- 3. 存储激活和梯度的全局变量和钩子 ---
activations_store = {}
gradients_store = {}
def forward_hook(layer_name):
def hook(module, input, output):
activations_store[layer_name] = output.detach() # .detach() 很重要
return hook
def backward_hook(layer_name):
def hook(module, grad_input, grad_output):
# grad_output 是一个元组,我们通常需要第一个元素
gradients_store[layer_name] = grad_output[0].detach()
return hook
# --- 4. 循环处理每个目标层 ---
for layer_name, target_layer_module in target_layers_dict.items():
print(f"\n--- Processing Layer: {layer_name} ---")
# 清理旧的钩子和存储 (重要,避免混淆)
activations_store.clear()
gradients_store.clear()
hook_handles = []
# 注册钩子
handle_fwd = target_layer_module.register_forward_hook(forward_hook(layer_name))
handle_bwd = target_layer_module.register_full_backward_hook(backward_hook(layer_name)) # register_full_backward_hook for module grad_output
# handle_bwd = target_layer_module.register_backward_hook(backward_hook(layer_name)) # 旧版API,有时需要用这个
hook_handles.extend([handle_fwd, handle_bwd])
# a. 前向传播
model.zero_grad() # 清除之前的梯度
output = model(img_tensor)
# b. 获取预测类别
pred_probabilities = torch.softmax(output, dim=1)
pred_score, pred_class_idx = torch.max(pred_probabilities, 1)
pred_class_idx = pred_class_idx.item()
target_category_score = output[0, pred_class_idx] # 使用原始logits作为目标分数
# c. 反向传播 (计算梯度)
target_category_score.backward(retain_graph=True) # retain_graph=True 如果你需要在之后继续反向传播或对多个目标操作
# d. 获取激活和梯度
current_activation = activations_store.get(layer_name)
current_gradient = gradients_store.get(layer_name)
# e. 可视化特征图
print(f"Visualizing feature maps for {layer_name}...")
if current_activation is not None:
show_feature_maps(current_activation, layer_name, num_maps_to_show=8, grid_size=(2,4))
else:
print(f"Could not retrieve activations for {layer_name}")
# f. 生成并可视化Grad-CAM热力图
print(f"Generating Grad-CAM for {layer_name}...")
if current_gradient is not None and current_activation is not None:
heatmap, superimposed_img = generate_grad_cam_heatmap(
current_gradient,
current_activation,
original_np_img,
target_size=(original_np_img.shape[1], original_np_img.shape[0]) # (width, height) for cv2.resize
)
if heatmap is not None and superimposed_img is not None:
fig, axs = plt.subplots(1, 3, figsize=(18, 6))
pred_label_name = imagenet_labels[pred_class_idx] if imagenet_labels else f"Class {pred_class_idx}"
fig.suptitle(f'Grad-CAM for {layer_name}\nPredicted: {pred_label_name} ({pred_score.item():.2f})', fontsize=16)
axs[0].imshow(original_np_img)
axs[0].set_title('Original Image')
axs[0].axis('off')
axs[1].imshow(heatmap)
axs[1].set_title('Grad-CAM Heatmap')
axs[1].axis('off')
axs[2].imshow(superimposed_img)
axs[2].set_title('Superimposed Image')
axs[2].axis('off')
plt.tight_layout(rect=[0, 0, 1, 0.90])
plt.show()
else:
print(f"Failed to generate Grad-CAM for {layer_name}")
else:
print(f"Missing gradients or activations for Grad-CAM on {layer_name}")
# g. 移除钩子 (重要,防止干扰后续操作或其他模型的钩子)
for handle in hook_handles:
handle.remove()
print("\n--- All layers processed. ---")