在 YOLOv8 中添加注意力机制可以显著提升模型对关键特征的关注能力,从而提高检测精度。以下是几种主流注意力机制的实现方法和集成策略:
根据计算效率和效果,推荐以下几种注意力模块:
import torch
import torch.nn as nn
import math
class ECA(nn.Module):
"""高效通道注意力模块"""
def __init__(self, channels, gamma=2, b=1):
super(ECA, self).__init__()
# 自适应核大小计算
kernel_size = int(abs((math.log(channels, 2) + b) / gamma))
kernel_size = kernel_size if kernel_size % 2 else kernel_size + 1
self.avg_pool = nn.AdaptiveAvgPool2d(1)
self.conv = nn.Conv1d(1, 1, kernel_size=kernel_size, padding=(kernel_size - 1) // 2, bias=False)
self.sigmoid = nn.Sigmoid()
def forward(self, x):
y = self.avg_pool(x)
y = self.conv(y.squeeze(-1).transpose(-1, -2)).transpose(-1, -2).unsqueeze(-1)
y = self.sigmoid(y)
return x * y.expand_as(x)
将注意力机制集成到骨干网络的 C2f 模块中:
# ultralytics/models/yolo/detect/predict.py
from .attention import ECA # 导入注意力模块
class C2f_Attention(nn.Module):
"""带注意力机制的 C2f 模块"""
def __init__(self, c1, c2, n=1, shortcut=False, g=1, e=0.5, attn_type='eca'):
super().__init__()
self.c = int(c2 * e)
self.cv1 = Conv(c1, 2 * self.c, 1, 1)
self.cv2 = Conv((2 + n) * self.c, c2, 1)
self.m = nn.ModuleList(Bottleneck(self.c, self.c, shortcut, g, k=((3, 3), (3, 3)), e=1.0) for _ in range(n))
# 添加注意力模块
if attn_type == 'eca':
self.attention = ECA(c2)
# 可扩展其他注意力类型...
def forward(self, x):
y = list(self.cv1(x).split((self.c, self.c), 1))
y.extend(m(y[-1]) for m in self.m)
return self.attention(self.cv2(torch.cat(y, 1)))
在 ultralytics/models/v8
目录下找到对应的模型配置文件(如 yolov8n.yaml
),将 C2f 模块替换为 C2f_Attention:
# 原配置
backbone:
[[-1, 1, Conv, [64, 3, 2]], # 0-P1/2
[-1, 1, Conv, [128, 3, 2]], # 1-P2/4
[-1, 3, C2f, [128]], # 2
...
# 修改后
backbone:
[[-1, 1, Conv, [64, 3, 2]], # 0-P1/2
[-1, 1, Conv, [128, 3, 2]], # 1-P2/4
[-1, 3, C2f_Attention, [128, {'attn_type': 'eca'}]], # 2-使用带 ECA 注意力的模块
...
class CBAM(nn.Module):
"""卷积块注意力模块"""
def __init__(self, channel, reduction=16):
super(CBAM, self).__init__()
# 通道注意力
self.avg_pool = nn.AdaptiveAvgPool2d(1)
self.max_pool = nn.AdaptiveMaxPool2d(1)
self.mlp = nn.Sequential(
nn.Linear(channel, channel // reduction, bias=False),
nn.ReLU(inplace=True),
nn.Linear(channel // reduction, channel, bias=False)
)
# 空间注意力
self.conv = nn.Conv2d(2, 1, kernel_size=7, padding=3, bias=False)
self.sigmoid = nn.Sigmoid()
def forward(self, x):
# 通道注意力
b, c, h, w = x.size()
avg_out = self.mlp(self.avg_pool(x).view(b, c))
max_out = self.mlp(self.max_pool(x).view(b, c))
channel_out = self.sigmoid(avg_out + max_out).view(b, c, 1, 1)
x = x * channel_out
# 空间注意力
avg_out = torch.mean(x, dim=1, keepdim=True)
max_out, _ = torch.max(x, dim=1, keepdim=True)
spatial_out = self.sigmoid(self.conv(torch.cat([avg_out, max_out], dim=1)))
x = x * spatial_out
return x
class ShuffleAttention(nn.Module):
"""混洗注意力模块"""
def __init__(self, channel=512, reduction=16, G=8):
super().__init__()
self.G = G
self.channel = channel
self.avg_pool = nn.AdaptiveAvgPool2d(1)
self.gn = nn.GroupNorm(channel // (2 * G), channel // (2 * G))
self.cweight = nn.Parameter(torch.zeros(1, channel // (2 * G), 1, 1))
self.cbias = nn.Parameter(torch.ones(1, channel // (2 * G), 1, 1))
self.sweight = nn.Parameter(torch.zeros(1, channel // (2 * G), 1, 1))
self.sbias = nn.Parameter(torch.ones(1, channel // (2 * G), 1, 1))
self.sigmoid = nn.Sigmoid()
def channel_shuffle(self, x, groups):
batchsize, num_channels, height, width = x.size()
channels_per_group = num_channels // groups
x = x.view(batchsize, groups, channels_per_group, height, width)
x = torch.transpose(x, 1, 2).contiguous()
x = x.view(batchsize, -1, height, width)
return x
def forward(self, x):
b, c, h, w = x.size()
x = x.view(b * self.G, -1, h, w) # [bG, c/G, h, w]
# 分割特征图
x_0, x_1 = x.chunk(2, dim=1) # [bG, c/(2G), h, w]
# 通道注意力
x_channel = self.avg_pool(x_0) # [bG, c/(2G), 1, 1]
x_channel = self.cweight * x_channel + self.cbias # [bG, c/(2G), 1, 1]
x_channel = x_0 * self.sigmoid(x_channel) # [bG, c/(2G), h, w]
# 空间注意力
x_spatial = self.gn(x_1) # [bG, c/(2G), h, w]
x_spatial = self.sweight * x_spatial + self.sbias # [bG, c/(2G), h, w]
x_spatial = x_1 * self.sigmoid(x_spatial) # [bG, c/(2G), h, w]
# 拼接
out = torch.cat([x_channel, x_spatial], dim=1) # [bG, c/G, h, w]
out = self.channel_shuffle(out, 2) # [bG, c/G, h, w]
return out.view(b, c, h, w)
修改后需要重新训练模型:
# 使用修改后的配置训练模型
yolo train model=models/yolov8n_attention.yaml data=coco128.yaml epochs=100 imgsz=640
评估注意力机制的效果:
轻量级模型(YOLOv5n/s 或 YOLOv8n/s):
中大型模型(YOLOv5m/l/x 或 YOLOv8m/l/x):
特定场景:
通过合理集成注意力机制,YOLOv8 可以在不显著增加计算开销的情况下提升检测精度,特别是对小目标和低对比度目标的检测能力。