# 定义深度可分离卷积层,torch没有实现,tf有实现
class DepthWiseConv2d(nn.Module):
def __init__(self, in_channels, out_channels, kernel_size, stride=1, padding=0, bias=True):
super(DepthWiseConv2d, self).__init__() #这里写为super().__init__(),等价的
self.depthwise_conv = nn.Conv2d(in_channels, in_channels, kernel_size, stride, padding, groups=in_channels, bias=False) #groups参数表示一个卷积核的每个通道分别进行运算
self.pointwise_conv = nn.Conv2d(in_channels, out_channels, 1, 1, 0, bias=bias)
def forward(self, x):
x = self.depthwise_conv(x)
x = self.pointwise_conv(x)
return x
自己命名一个depthwise,进行两步卷积:第一步卷积因为通道数是不变的,所以输入和输出通道这里都是in_channels,后面需要加一个groups=in_channels,也就是说这个卷积里面是支持把他们分别进行做的,把通道分别运算;下一步就是做pointwise。
class CNN(nn.Module):
def __init__(self, activation="relu"):
super(CNN, self).__init__()
self.activation = F.relu if activation == "relu" else F.selu
self.conv1 = nn.Conv2d(in_channels=1, out_channels=32, kernel_size=3, padding="same")
self.conv2 = DepthWiseConv2d(in_channels=32, out_channels=32, kernel_size=3, padding="same")
self.pool = nn.MaxPool2d(2, 2)
self.conv3 = DepthWiseConv2d(in_channels=32, out_channels=64, kernel_size=3, padding="same")
self.conv4 = DepthWiseConv2d(in_channels=64, out_channels=64, kernel_size=3, padding="same")
self.conv5 = DepthWiseConv2d(in_channels=64, out_channels=128, kernel_size=3, padding="same")
self.conv6 = DepthWiseConv2d(in_channels=128, out_channels=128, kernel_size=3, padding="same")
self.flatten = nn.Flatten()
# input shape is (28, 28, 1) so the fc1 layer in_features is 128 * 3 * 3
self.fc1 = nn.Linear(128 * 3 * 3, 128)
self.fc2 = nn.Linear(128, 10)
self.init_weights()
def init_weights(self):
"""使用 xavier 均匀分布来初始化全连接层、卷积层的权重 W"""
for m in self.modules():
if isinstance(m, (nn.Linear, nn.Conv2d)):
nn.init.xavier_uniform_(m.weight)
if m.bias is not None:
nn.init.zeros_(m.bias)
def forward(self, x):
act = self.activation
#x -->(batch_size, 1, 28, 28)
x = self.pool(act(self.conv2(act(self.conv1(x))))) # (batch_size, 32, 14, 14)
x = self.pool(act(self.conv4(act(self.conv3(x))))) # (batch_size, 64, 7, 7)
x = self.pool(act(self.conv6(act(self.conv5(x))))) # (batch_size, 128, 3, 3)
x = self.flatten(x) # (batch_size, 128 * 3 * 3)
x = act(self.fc1(x)) # (batch_size, 128)
x = self.fc2(x) # (batch_size, 10)
return x
for idx, (key, value) in enumerate(CNN().named_parameters()):
print(f"{key}\tparamerters num: {np.prod(value.shape)}")
然后是搭建模型,首先第一层使用卷积,后面再用深度可分离卷积。因为如果第一层也是深度可分离卷积,效果不太好。每两次进行一次翻倍,效果会好一点。