- 本文为365天深度学习训练营 中的学习记录博客
- 原作者:K同学啊
本周任务:
设计一种结合 ResNet 和 DenseNet 的网络架构,目标是在性能与复杂度之间实现平衡,同时保持与 DenseNet-121 相当的训练速度,可以通过以下步骤设计一种新的网络结构,称为 ResDenseNet(暂命名)。这种网络结构结合了 ResNet 的残差连接和 DenseNet 的密集连接优点,同时对复杂度加以控制。
设计思路
残差模块与密集模块结合:
在网络的不同阶段,使用残差模块(ResBlock)来捕获浅层特征。
在每个阶段的后期引入密集模块(DenseBlock),实现高效的特征复用。
通过调整每层的通道数,避免过多的计算和内存消耗。
瓶颈设计(Bottleneck Block):
每个模块采用瓶颈层,减少计算复杂度。
通过 1x1 卷积压缩和扩展特征通道数。
混合连接方式:
引入 局部密集连接,只连接同一模块内的层,避免 DenseNet 的全连接导致的内存开销。
在模块之间使用残差连接,便于信息流通。
网络深度与宽度的平衡:
将 DenseNet 的增长率(growth rate)减少,适当减少特征图通道数增长。
模块之间引入过渡层(Transition Layer)以压缩特征图尺寸和通道数。
import torch
import torch.nn as nn
class Bottleneck(nn.Module):
def __init__(self, in_channels, growth_rate):
super(Bottleneck, self).__init__()
self.bn1 = nn.BatchNorm2d(in_channels)
self.conv1 = nn.Conv2d(in_channels, 4 * growth_rate, kernel_size=1, stride=1, bias=False)
self.bn2 = nn.BatchNorm2d(4 * growth_rate)
self.conv2 = nn.Conv2d(4 * growth_rate, growth_rate, kernel_size=3, stride=1, padding=1, bias=False)
def forward(self, x):
out = self.conv1(self.bn1(x))
out = self.conv2(self.bn2(out))
return torch.cat([x, out], dim=1)
class DenseBlock(nn.Module):
def __init__(self, num_layers, in_channels, growth_rate):
super(DenseBlock, self).__init__()
self.layers = nn.ModuleList()
for i in range(num_layers):
self.layers.append(Bottleneck(in_channels + i * growth_rate, growth_rate))
# 为了残差连接,可能需要调整通道数以匹配输入输出
self.residual = nn.Conv2d(in_channels, in_channels + num_layers * growth_rate, kernel_size=1, bias=False)
def forward(self, x):
identity = self.residual(x) # 将输入调整为与 DenseBlock 输出通道一致
for layer in self.layers:
x = layer(x) # 密集连接,逐层拼接
return x + identity # 残差连接:输入与输出相加
class TransitionLayer(nn.Module):
def __init__(self, in_channels, out_channels):
super(TransitionLayer, self).__init__()
self.bn = nn.BatchNorm2d(in_channels)
self.conv = nn.Conv2d(in_channels, out_channels, kernel_size=1, stride=1, bias=False)
self.pool = nn.AvgPool2d(kernel_size=2, stride=2)
def forward(self, x):
x = self.conv(self.bn(x))
return self.pool(x)
class ResDenseNet(nn.Module):
def __init__(self, num_classes=1000):
super(ResDenseNet, self).__init__()
self.stem = nn.Sequential(
nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, bias=False),
nn.BatchNorm2d(64),
nn.ReLU(inplace=True),
nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
)
self.stage1 = self._make_stage(64, 128, num_layers=4, growth_rate=16)
self.stage2 = self._make_stage(128, 256, num_layers=4, growth_rate=16)
self.stage3 = self._make_stage(256, 512, num_layers=6, growth_rate=12)
self.stage4 = self._make_stage(512, 1024, num_layers=6, growth_rate=12)
self.classifier = nn.Linear(1024, num_classes)
def _make_stage(self, in_channels, out_channels, num_layers, growth_rate):
dense_block = DenseBlock(num_layers, in_channels, growth_rate)
transition = TransitionLayer(in_channels + num_layers * growth_rate, out_channels)
return nn.Sequential(dense_block, transition)
def forward(self, x):
x = self.stem(x)
x = self.stage1(x)
x = self.stage2(x)
x = self.stage3(x)
x = self.stage4(x)
x = torch.mean(x, dim=[2, 3]) # Global Average Pooling
return self.classifier(x)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = ResDenseNet().to(device)
model
代码输出:
ResDenseNet(
(stem): Sequential(
(0): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
(1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(2): ReLU(inplace=True)
(3): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
)
(stage1): Sequential(
(0): DenseBlock(
(layers): ModuleList(
(0): Bottleneck(
(bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv2): Conv2d(64, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
)
(1): Bottleneck(
(bn1): BatchNorm2d(80, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv1): Conv2d(80, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv2): Conv2d(64, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
)
(2): Bottleneck(
(bn1): BatchNorm2d(96, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv1): Conv2d(96, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv2): Conv2d(64, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
)
(3): Bottleneck(
(bn1): BatchNorm2d(112, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv1): Conv2d(112, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv2): Conv2d(64, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
)
)
(residual): Conv2d(64, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
)
(1): TransitionLayer(
(bn): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv): Conv2d(128, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
(pool): AvgPool2d(kernel_size=2, stride=2, padding=0)
)
)
(stage2): Sequential(
(0): DenseBlock(
(layers): ModuleList(
(0): Bottleneck(
(bn1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv1): Conv2d(128, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv2): Conv2d(64, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
)
(1): Bottleneck(
(bn1): BatchNorm2d(144, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv1): Conv2d(144, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv2): Conv2d(64, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1