在某些情况下,使用 ML/DL 库中已经存在的模型可能会很便捷。但为了更好地控制和理解模型,你应该自己去实现它们。本文展示了如何仅使用 NumPy 库来实现 CNN。
卷积神经网络(CNN)是分析图像等多维信号的当前最优技术。目前已有很多库可以实现 CNN,如 TensorFlow 和 Keras 等。这种库仅提供一个抽象的 API,因此可以大大降低开发难度,并避免实现的复杂性,不过使用这种库的开发人员无法接触到一些细节,这些细节可能在实践中很重要。
有时,数据科学家必须仔细查看这些细节才能提高性能。在这种情况下,最好自己亲手构建此类模型,这可以帮助你最大程度地控制网络。因此在本文中,我们将仅使用 NumPy 尝试创建 CNN。我们会创建三个层,即卷积层(简称 conv)、ReLU 层和最大池化层。所涉及的主要步骤如下:
import skimage.data
from skimage import io
import matplotlib.pyplot as plt
# Reading the image
img = skimage.data.chelsea()
# Converting the image into gray.
img = skimage.color.rgb2gray(img)
print(img.shape)
io.imshow(img)
plt.show()
代码参考:https://github.com/ahmedfgad/NumPyCNN
def conv_(img, conv_filter):
filter_size = conv_filter.shape[1]
result = numpy.zeros((img.shape))
#Looping through the image to apply the convolution operation.
for r in numpy.uint16(numpy.arange(filter_size/2,
img.shape[0]-filter_size/2)):
for c in numpy.uint16(numpy.arange(filter_size/2,
img.shape[1]-filter_size/2)):
"""
Getting the current region to get multiplied with the filter.
How to loop through the image and get the region based on
the image and filer sizes is the most tricky part of convolution.
"""
curr_region = img[r-numpy.uint16(numpy.floor(filter_size/2)):r+numpy.uint16(numpy.ceil(filter_size/2)),
c-numpy.uint16(numpy.floor(filter_size/2)):c+numpy.uint16(numpy.ceil(filter_size/2))]
#Element-wise multipliplication between the current region and the filter.
curr_result = curr_region * conv_filter
conv_sum = numpy.sum(curr_result) #Summing the result of multiplication.
result[r, c] = conv_sum #Saving the summation in the convolution layer feature map.
#Clipping the outliers of the result matrix.
final_result = result[numpy.uint16(filter_size/2):result.shape[0]-numpy.uint16(filter_size/2),
numpy.uint16(filter_size/2):result.shape[1]-numpy.uint16(filter_size/2)]
return final_result
def conv(img, conv_filter):
if len(img.shape) > 2 or len(conv_filter.shape) > 3: # Check if number of image channels matches the filter depth.
if img.shape[-1] != conv_filter.shape[-1]:
print("Error: Number of channels in both image and filter must match.")
sys.exit()
if conv_filter.shape[1] != conv_filter.shape[2]: # Check if filter dimensions are equal.
print('Error: Filter must be a square matrix. I.e. number of rows and columns must match.')
sys.exit()
if conv_filter.shape[1]%2==0: # Check if filter diemnsions are odd.
print('Error: Filter must have an odd size. I.e. number of rows and columns must be odd.')
sys.exit()
# An empty feature map to hold the output of convolving the filter(s) with the image.
feature_maps = numpy.zeros((img.shape[0]-conv_filter.shape[1]+1,
img.shape[1]-conv_filter.shape[1]+1,
conv_filter.shape[0]))
# Convolving the image by the filter(s).
for filter_num in range(conv_filter.shape[0]):
print("Filter ", filter_num + 1)
curr_filter = conv_filter[filter_num, :] # getting a filter from the bank.
"""
Checking if there are mutliple channels for the single filter.
If so, then each channel will convolve the image.
The result of all convolutions are summed to return a single feature map.
"""
if len(curr_filter.shape) > 2:
conv_map = conv_(img[:, :, 0], curr_filter[:, :, 0]) # Array holding the sum of all feature maps.
for ch_num in range(1, curr_filter.shape[-1]): # Convolving each channel with the image and summing the results.
conv_map = conv_map + conv_(img[:, :, ch_num],
curr_filter[:, :, ch_num])
else: # There is just a single channel in the filter.
conv_map = conv_(img, curr_filter)
feature_maps[:, :, filter_num] = conv_map # Holding feature map with the current filter.
return feature_maps # Returning all feature maps.
def pooling(feature_map, size=2, stride=2):
#Preparing the output of the pooling operation.
pool_out = numpy.zeros((numpy.uint16((feature_map.shape[0]-size+1)/stride),
numpy.uint16((feature_map.shape[1]-size+1)/stride),
feature_map.shape[-1]))
for map_num in range(feature_map.shape[-1]):
r2 = 0
for r in numpy.arange(0,feature_map.shape[0]-size-1, stride):
c2 = 0
for c in numpy.arange(0, feature_map.shape[1]-size-1, stride):
pool_out[r2, c2, map_num] = numpy.max(feature_map[r:r+size, c:c+size])
c2 = c2 + 1
r2 = r2 +1
return pool_out
def relu(feature_map):
#Preparing the output of the ReLU activation function.
relu_out = numpy.zeros(feature_map.shape)
for map_num in range(feature_map.shape[-1]):
for r in numpy.arange(0,feature_map.shape[0]):
for c in numpy.arange(0, feature_map.shape[1]):
relu_out[r, c, map_num] = numpy.max(feature_map[r, c, map_num], 0)
return relu_out
import skimage.data
import numpy
import matplotlib
# Reading the image
#img = skimage.io.imread("test.jpg")
#img = skimage.data.checkerboard()
img = skimage.data.chelsea()
#img = skimage.data.camera()
# Converting the image into gray.
img = skimage.color.rgb2gray(img)
# First conv layer
#l1_filter = numpy.random.rand(2,7,7)*20 # Preparing the filters randomly.
l1_filter = numpy.zeros((2,3,3))
l1_filter[0, :, :] = numpy.array([[[-1, 0, 1],
[-1, 0, 1],
[-1, 0, 1]]])
l1_filter[1, :, :] = numpy.array([[[1, 1, 1],
[0, 0, 0],
[-1, -1, -1]]])
print("\n**Working with conv layer 1**")
l1_feature_map =conv(img, l1_filter)
print("\n**ReLU**")
l1_feature_map_relu =relu(l1_feature_map)
print("\n**Pooling**")
l1_feature_map_relu_pool =pooling(l1_feature_map_relu, 2, 2)
print("**End of conv layer 1**\n")
# Second conv layer
l2_filter = numpy.random.rand(3, 5, 5, l1_feature_map_relu_pool.shape[-1])
print("\n**Working with conv layer 2**")
l2_feature_map = conv(l1_feature_map_relu_pool, l2_filter)
print("\n**ReLU**")
l2_feature_map_relu = relu(l2_feature_map)
print("\n**Pooling**")
l2_feature_map_relu_pool = pooling(l2_feature_map_relu, 2, 2)
print("**End of conv layer 2**\n")
# Third conv layer
l3_filter = numpy.random.rand(1, 7, 7, l2_feature_map_relu_pool.shape[-1])
print("\n**Working with conv layer 3**")
l3_feature_map =conv(l2_feature_map_relu_pool, l3_filter)
print("\n**ReLU**")
l3_feature_map_relu =relu(l3_feature_map)
print("\n**Pooling**")
l3_feature_map_relu_pool =pooling(l3_feature_map_relu, 2, 2)
print("**End of conv layer 3**\n")
# Graphing results
fig0, ax0 = matplotlib.pyplot.subplots(nrows=1, ncols=1)
ax0.imshow(img).set_cmap("gray")
ax0.set_title("Input Image")
ax0.get_xaxis().set_ticks([])
ax0.get_yaxis().set_ticks([])
matplotlib.pyplot.savefig("in_img.png", bbox_inches="tight")
matplotlib.pyplot.close(fig0)
# Layer 1
fig1, ax1 = matplotlib.pyplot.subplots(nrows=3, ncols=2)
ax1[0, 0].imshow(l1_feature_map[:, :, 0]).set_cmap("gray")
ax1[0, 0].get_xaxis().set_ticks([])
ax1[0, 0].get_yaxis().set_ticks([])
ax1[0, 0].set_title("L1-Map1")
ax1[0, 1].imshow(l1_feature_map[:, :, 1]).set_cmap("gray")
ax1[0, 1].get_xaxis().set_ticks([])
ax1[0, 1].get_yaxis().set_ticks([])
ax1[0, 1].set_title("L1-Map2")
ax1[1, 0].imshow(l1_feature_map_relu[:, :, 0]).set_cmap("gray")
ax1[1, 0].get_xaxis().set_ticks([])
ax1[1, 0].get_yaxis().set_ticks([])
ax1[1, 0].set_title("L1-Map1ReLU")
ax1[1, 1].imshow(l1_feature_map_relu[:, :, 1]).set_cmap("gray")
ax1[1, 1].get_xaxis().set_ticks([])
ax1[1, 1].get_yaxis().set_ticks([])
ax1[1, 1].set_title("L1-Map2ReLU")
ax1[2, 0].imshow(l1_feature_map_relu_pool[:, :, 0]).set_cmap("gray")
ax1[2, 0].get_xaxis().set_ticks([])
ax1[2, 0].get_yaxis().set_ticks([])
ax1[2, 0].set_title("L1-Map1ReLUPool")
ax1[2, 1].imshow(l1_feature_map_relu_pool[:, :, 1]).set_cmap("gray")
ax1[2, 0].get_xaxis().set_ticks([])
ax1[2, 0].get_yaxis().set_ticks([])
ax1[2, 1].set_title("L1-Map2ReLUPool")
matplotlib.pyplot.savefig("L1.png", bbox_inches="tight")
matplotlib.pyplot.close(fig1)
# Layer 2
fig2, ax2 = matplotlib.pyplot.subplots(nrows=3, ncols=3)
ax2[0, 0].imshow(l2_feature_map[:, :, 0]).set_cmap("gray")
ax2[0, 0].get_xaxis().set_ticks([])
ax2[0, 0].get_yaxis().set_ticks([])
ax2[0, 0].set_title("L2-Map1")
ax2[0, 1].imshow(l2_feature_map[:, :, 1]).set_cmap("gray")
ax2[0, 1].get_xaxis().set_ticks([])
ax2[0, 1].get_yaxis().set_ticks([])
ax2[0, 1].set_title("L2-Map2")
ax2[0, 2].imshow(l2_feature_map[:, :, 2]).set_cmap("gray")
ax2[0, 2].get_xaxis().set_ticks([])
ax2[0, 2].get_yaxis().set_ticks([])
ax2[0, 2].set_title("L2-Map3")
ax2[1, 0].imshow(l2_feature_map_relu[:, :, 0]).set_cmap("gray")
ax2[1, 0].get_xaxis().set_ticks([])
ax2[1, 0].get_yaxis().set_ticks([])
ax2[1, 0].set_title("L2-Map1ReLU")
ax2[1, 1].imshow(l2_feature_map_relu[:, :, 1]).set_cmap("gray")
ax2[1, 1].get_xaxis().set_ticks([])
ax2[1, 1].get_yaxis().set_ticks([])
ax2[1, 1].set_title("L2-Map2ReLU")
ax2[1, 2].imshow(l2_feature_map_relu[:, :, 2]).set_cmap("gray")
ax2[1, 2].get_xaxis().set_ticks([])
ax2[1, 2].get_yaxis().set_ticks([])
ax2[1, 2].set_title("L2-Map3ReLU")
ax2[2, 0].imshow(l2_feature_map_relu_pool[:, :, 0]).set_cmap("gray")
ax2[2, 0].get_xaxis().set_ticks([])
ax2[2, 0].get_yaxis().set_ticks([])
ax2[2, 0].set_title("L2-Map1ReLUPool")
ax2[2, 1].imshow(l2_feature_map_relu_pool[:, :, 1]).set_cmap("gray")
ax2[2, 1].get_xaxis().set_ticks([])
ax2[2, 1].get_yaxis().set_ticks([])
ax2[2, 1].set_title("L2-Map2ReLUPool")
ax2[2, 2].imshow(l2_feature_map_relu_pool[:, :, 2]).set_cmap("gray")
ax2[2, 2].get_xaxis().set_ticks([])
ax2[2, 2].get_yaxis().set_ticks([])
ax2[2, 2].set_title("L2-Map3ReLUPool")
matplotlib.pyplot.savefig("L2.png", bbox_inches="tight")
matplotlib.pyplot.close(fig2)
# Layer 3
fig3, ax3 = matplotlib.pyplot.subplots(nrows=1, ncols=3)
ax3[0].imshow(l3_feature_map[:, :, 0]).set_cmap("gray")
ax3[0].get_xaxis().set_ticks([])
ax3[0].get_yaxis().set_ticks([])
ax3[0].set_title("L3-Map1")
ax3[1].imshow(l3_feature_map_relu[:, :, 0]).set_cmap("gray")
ax3[1].get_xaxis().set_ticks([])
ax3[1].get_yaxis().set_ticks([])
ax3[1].set_title("L3-Map1ReLU")
ax3[2].imshow(l3_feature_map_relu_pool[:, :, 0]).set_cmap("gray")
ax3[2].get_xaxis().set_ticks([])
ax3[2].get_yaxis().set_ticks([])
ax3[2].set_title("L3-Map1ReLUPool")
matplotlib.pyplot.savefig("L3.png", bbox_inches="tight")
matplotlib.pyplot.close(fig3)