觉得好在GitHub里面给个star
1.导入依赖的包
详细代码见这里
数据处理可参考博客https://blog.csdn.net/weixin_43886213/article/details/97615527
import numpy as np
import tensorflow as tf
from tensorflow.keras import backend as K
from tensorflow.keras.layers import Conv2D, Add, ZeroPadding2D, UpSampling2D, Concatenate, MaxPooling2D,BatchNormalization
from tensorflow.keras.layers import LeakyReLU
from tensorflow.keras import Model
from tensorflow.keras.regularizers import l2
2.yolov3中最基础的模块Darknetconv2D_BN_Leaky
def Darknetconv2D_BN_Leaky(input_layer, filters_shape, downsample=False, activate=True, bn=True):
if downsample:
input_layer = tf.keras.layers.ZeroPadding2D(((1, 0), (1, 0)))(input_layer)
padding = 'valid'
strides = 2
else:
strides = 1
padding = 'same'
conv = tf.keras.layers.Conv2D(filters=filters_shape[-1], kernel_size = filters_shape[0], strides=strides, padding=padding,
use_bias=not bn, kernel_regularizer=tf.keras.regularizers.l2(0.0005),
kernel_initializer=tf.random_normal_initializer(stddev=0.01),
bias_initializer=tf.constant_initializer(0.))(input_layer)
if bn:
conv = BatchNormalization()(conv)
if activate == True:
conv = tf.nn.leaky_relu(conv, alpha=0.1)
return conv
3.resnet_block的最小单元
def res_unit(input_layer, input_channel, filter_num1, filter_num2):
short_cut = input_layer
conv = Darknetconv2D_BN_Leaky(input_layer, filters_shape=(1, 1, input_channel, filter_num1))
conv = Darknetconv2D_BN_Leaky(conv , filters_shape=(3, 3, filter_num1, filter_num2))
residual_output = short_cut + conv
return residual_output
4.res_block组成的darknet_53
def darknet53(input_data):
input_data = Darknetconv2D_BN_Leaky(input_data, (3, 3, 3, 32))
input_data = Darknetconv2D_BN_Leaky(input_data, (3, 3, 32, 64), downsample=True)
for i in range(1):
input_data = res_unit(input_data, 64, 32, 64)
input_data = Darknetconv2D_BN_Leaky(input_data, (3, 3, 64, 128), downsample=True)
for i in range(2):
input_data = res_unit(input_data, 128, 64, 128)
input_data = common.convolutional(input_data, (3, 3, 128, 256), downsample=True)
for i in range(8):
input_data = res_unit(input_data, 256, 128, 256)
route_1 = input_data
input_data = common.convolutional(input_data, (3, 3, 256, 512), downsample=True)
for i in range(8):
input_data = res_unit(input_data, 512, 256, 512)
route_2 = input_data
input_data = Darknetconv2D_BN_Leaky(input_data, (3, 3, 512, 1024), downsample=True)
for i in range(4):
input_data = res_unit(input_data, 1024, 512, 1024)
return route_1, route_2, input_data
5.上采样upsample函数
def upsample(input_layer):
return tf.image.resize(input_layer, (input_layer.shape[1] * 2, input_layer.shape[2] * 2), method='nearest')
6.主函数yolo
def YOLOv3(input_layer):
route_1, route_2, conv = darknet53(input_layer)
conv = Darknetconv2D_BN_Leaky(conv, (1, 1, 1024, 512))
conv = Darknetconv2D_BN_Leaky(conv, (3, 3, 512, 1024))
conv = Darknetconv2D_BN_Leaky(conv, (1, 1, 1024, 512))
conv = Darknetconv2D_BN_Leaky(conv, (3, 3, 512, 1024))
conv = Darknetconv2D_BN_Leaky(conv, (1, 1, 1024, 512))
conv_lobj_branch = Darknetconv2D_BN_Leaky(conv, (3, 3, 512, 1024))
conv_lbbox = Darknetconv2D_BN_Leaky(conv_lobj_branch, (1, 1, 1024, 3*(NUM_CLASS + 5)), activate=False, bn=False)
conv = Darknetconv2D_BN_Leaky(conv, (1, 1, 512, 256))
conv = upsample(conv)
conv = tf.concat([conv, route_2], axis=-1)
conv = Darknetconv2D_BN_Leaky(conv, (1, 1, 768, 256))
conv = Darknetconv2D_BN_Leaky(conv, (3, 3, 256, 512))
conv = Darknetconv2D_BN_Leaky(conv, (1, 1, 512, 256))
conv = Darknetconv2D_BN_Leaky(conv, (3, 3, 256, 512))
conv = Darknetconv2D_BN_Leaky(conv, (1, 1, 512, 256))
conv_mobj_branch = Darknetconv2D_BN_Leaky(conv, (3, 3, 256, 512))
conv_mbbox =Darknetconv2D_BN_Leaky(conv_mobj_branch, (1, 1, 512, 3*(NUM_CLASS + 5)), activate=False, bn=False)
conv = Darknetconv2D_BN_Leaky(conv, (1, 1, 256, 128))
conv = upsample(conv)
conv = tf.concat([conv, route_1], axis=-1)
conv = Darknetconv2D_BN_Leaky(conv, (1, 1, 384, 128))
conv = Darknetconv2D_BN_Leaky(conv, (3, 3, 128, 256))
conv = Darknetconv2D_BN_Leaky(conv, (1, 1, 256, 128))
conv = Darknetconv2D_BN_Leaky(conv, (3, 3, 128, 256))
conv = Darknetconv2D_BN_Leaky(conv, (1, 1, 256, 128))
conv_sobj_branch = Darknetconv2D_BN_Leaky(conv, (3, 3, 128, 256))
conv_sbbox = Darknetconv2D_BN_Leaky(conv_sobj_branch, (1, 1, 256, 3*(NUM_CLASS +5)), activate=False, bn=False)
return [conv_sbbox, conv_mbbox, conv_lbbox]#y3,y2,y1
输出y3,y2,y1
yolov3最重要的一个预测方法就是锚框(anchors)即根据预选框预测锚框的长宽和锚框的中心有无物体的概率,以及锚框的中心是哪种物体的概率
代码
def yolo_head(feats, anchors, num_classes, input_shape, calc_loss=False):
"""Convert final layer features to bounding box parameters."""
num_anchors = len(anchors)
# Reshape to batch, height, width, num_anchors, box_params.
anchors_tensor = K.reshape(K.constant(anchors), [1, 1, 1, num_anchors, 2])
grid_shape = K.shape(feats)[1:3] # height, width
grid_y = K.tile(K.reshape(K.arange(0, stop=grid_shape[0]), [-1, 1, 1, 1]),
[1, grid_shape[1], 1, 1])
grid_x = K.tile(K.reshape(K.arange(0, stop=grid_shape[1]), [1, -1, 1, 1]),
[grid_shape[0], 1, 1, 1])
grid = K.concatenate([grid_x, grid_y])
grid = K.cast(grid, K.dtype(feats))
feats = K.reshape(
feats, [-1, grid_shape[0], grid_shape[1], num_anchors, num_classes + 5])
# Adjust preditions to each spatial grid point and anchor size.
box_xy = (K.sigmoid(feats[..., :2]) + grid) / K.cast(grid_shape[::-1], K.dtype(feats))
box_wh = K.exp(feats[..., 2:4]) * anchors_tensor / K.cast(input_shape[::-1], K.dtype(feats))
box_confidence = K.sigmoid(feats[..., 4:5])
box_class_probs = K.sigmoid(feats[..., 5:])
if calc_loss == True:
return grid, feats, box_xy, box_wh
return box_xy, box_wh, box_confidence, box_class_probs
锚框的位置预测,通过确定锚框的左上角和右下角,来确定一个锚框的位置
def yolo_correct_boxes(box_xy, box_wh, input_shape, image_shape):
'''Get corrected boxes'''
box_yx = box_xy[..., ::-1]
box_hw = box_wh[..., ::-1]
input_shape = K.cast(input_shape, K.dtype(box_yx))
image_shape = K.cast(image_shape, K.dtype(box_yx))
new_shape = K.round(image_shape * K.min(input_shape/image_shape))
offset = (input_shape-new_shape)/2./input_shape
scale = input_shape/new_shape
box_yx = (box_yx - offset) * scale
box_hw *= scale
box_mins = box_yx - (box_hw / 2.)
box_maxes = box_yx + (box_hw / 2.)
boxes = K.concatenate([
box_mins[..., 0:1], # y_min
box_mins[..., 1:2], # x_min
box_maxes[..., 0:1], # y_max
box_maxes[..., 1:2] # x_max
])
# Scale boxes back to original image shape.
boxes *= K.concatenate([image_shape, image_shape])
return boxes
确对锚框盒中有无物体以及是何种类别的物体的概率
def yolo_boxes_and_scores(feats, anchors, num_classes, input_shape, image_shape):
'''Process Conv layer output'''
box_xy, box_wh, box_confidence, box_class_probs = yolo_head(feats,
anchors, num_classes, input_shape)
boxes = yolo_correct_boxes(box_xy, box_wh, input_shape, image_shape)
boxes = K.reshape(boxes, [-1, 4])
box_scores = box_confidence * box_class_probs
box_scores = K.reshape(box_scores, [-1, num_classes])
return boxes, box_scores
判断网格是否有物体及输出标签
def preprocess_true_boxes(true_boxes, input_shape, anchors, num_classes):
'''Preprocess true boxes to training input format
Parameters
----------
true_boxes: array, shape=(m, T, 5)
Absolute x_min, y_min, x_max, y_max, class_id relative to input_shape.
input_shape: array-like, hw, multiples of 32
anchors: array, shape=(N, 2), wh
num_classes: integer
Returns
-------
y_true: list of array, shape like yolo_outputs, xywh are reletive value
'''
assert (true_boxes[..., 4]0
for b in range(m):
# Discard zero rows.
wh = boxes_wh[b, valid_mask[b]]
if len(wh)==0: continue
# Expand dim to apply broadcasting.
wh = np.expand_dims(wh, -2)
box_maxes = wh / 2.
box_mins = -box_maxes
intersect_mins = np.maximum(box_mins, anchor_mins)
intersect_maxes = np.minimum(box_maxes, anchor_maxes)
intersect_wh = np.maximum(intersect_maxes - intersect_mins, 0.)
intersect_area = intersect_wh[..., 0] * intersect_wh[..., 1]
box_area = wh[..., 0] * wh[..., 1]
anchor_area = anchors[..., 0] * anchors[..., 1]
iou = intersect_area / (box_area + anchor_area - intersect_area)
# Find best anchor for each true box
best_anchor = np.argmax(iou, axis=-1)
for t, n in enumerate(best_anchor):
for l in range(num_layers):
if n in anchor_mask[l]:
i = np.floor(true_boxes[b,t,0]*grid_shapes[l][1]).astype('int32')
j = np.floor(true_boxes[b,t,1]*grid_shapes[l][0]).astype('int32')
k = anchor_mask[l].index(n)
c = true_boxes[b,t, 4].astype('int32')
y_true[l][b, j, i, k, 0:4] = true_boxes[b,t, 0:4]
y_true[l][b, j, i, k, 4] = 1
y_true[l][b, j, i, k, 5+c] = 1
return y_true
iou的计算简单来说就是两个锚框的交集除以两个锚框的并集就是了
def box_iou(b1, b2):
'''Return iou tensor
Parameters
----------
b1: tensor, shape=(i1,...,iN, 4), xywh
b2: tensor, shape=(j, 4), xywh
Returns
-------
iou: tensor, shape=(i1,...,iN, j)
'''
# Expand dim to apply broadcasting.
b1 = K.expand_dims(b1, -2)
b1_xy = b1[..., :2]
b1_wh = b1[..., 2:4]
b1_wh_half = b1_wh/2.
b1_mins = b1_xy - b1_wh_half
b1_maxes = b1_xy + b1_wh_half
# Expand dim to apply broadcasting.
b2 = K.expand_dims(b2, 0)
b2_xy = b2[..., :2]
b2_wh = b2[..., 2:4]
b2_wh_half = b2_wh/2.
b2_mins = b2_xy - b2_wh_half
b2_maxes = b2_xy + b2_wh_half
intersect_mins = K.maximum(b1_mins, b2_mins)
intersect_maxes = K.minimum(b1_maxes, b2_maxes)
intersect_wh = K.maximum(intersect_maxes - intersect_mins, 0.)
intersect_area = intersect_wh[..., 0] * intersect_wh[..., 1]
b1_area = b1_wh[..., 0] * b1_wh[..., 1]
b2_area = b2_wh[..., 0] * b2_wh[..., 1]
iou = intersect_area / (b1_area + b2_area - intersect_area)
return iou
def yolo_loss(args, anchors, num_classes, ignore_thresh=.5, print_loss=False):
'''Return yolo_loss tensor
Parameters
----------
yolo_outputs: list of tensor, the output of yolo_body or tiny_yolo_body
y_true: list of array, the output of preprocess_true_boxes
anchors: array, shape=(N, 2), wh
num_classes: integer
ignore_thresh: float, the iou threshold whether to ignore object confidence loss
Returns
-------
loss: tensor, shape=(1,)
'''
num_layers = len(anchors)//3 # default setting
yolo_outputs = args[:num_layers]
y_true = args[num_layers:]
anchor_mask = [[6,7,8], [3,4,5], [0,1,2]] if num_layers==3 else [[3,4,5], [1,2,3]]
input_shape = K.cast(K.shape(yolo_outputs[0])[1:3] * 32, K.dtype(y_true[0]))
grid_shapes = [K.cast(K.shape(yolo_outputs[l])[1:3], K.dtype(y_true[0])) for l in range(num_layers)]
loss = 0
m = K.shape(yolo_outputs[0])[0] # batch size, tensor
mf = K.cast(m, K.dtype(yolo_outputs[0]))
for l in range(num_layers):
object_mask = y_true[l][..., 4:5]
true_class_probs = y_true[l][..., 5:]
grid, raw_pred, pred_xy, pred_wh = yolo_head(yolo_outputs[l],
anchors[anchor_mask[l]], num_classes, input_shape, calc_loss=True)
pred_box = K.concatenate([pred_xy, pred_wh])
# Darknet raw box to calculate loss.
raw_true_xy = y_true[l][..., :2]*grid_shapes[l][::-1] - grid
raw_true_wh = K.log(y_true[l][..., 2:4] / anchors[anchor_mask[l]] * input_shape[::-1])
raw_true_wh = K.switch(object_mask, raw_true_wh, K.zeros_like(raw_true_wh)) # avoid log(0)=-inf
box_loss_scale = 2 - y_true[l][...,2:3]*y_true[l][...,3:4]
# Find ignore mask, iterate over each of batch.
ignore_mask = tf.TensorArray(K.dtype(y_true[0]), size=1, dynamic_size=True)
object_mask_bool = K.cast(object_mask, 'bool')
def loop_body(b, ignore_mask):
true_box = tf.boolean_mask(y_true[l][b,...,0:4], object_mask_bool[b,...,0])
iou = box_iou(pred_box[b], true_box)
best_iou = K.max(iou, axis=-1)
ignore_mask = ignore_mask.write(b, K.cast(best_iou
就先到这里