tensorflow2实现yolov3

tensorflow2实现yolov3_第1张图片
觉得好在GitHub里面给个star
1.导入依赖的包
详细代码见这里
数据处理可参考博客https://blog.csdn.net/weixin_43886213/article/details/97615527


import numpy as np
import tensorflow as tf
from tensorflow.keras import backend as K
from tensorflow.keras.layers import Conv2D, Add, ZeroPadding2D, UpSampling2D, Concatenate, MaxPooling2D,BatchNormalization
from tensorflow.keras.layers import LeakyReLU
from tensorflow.keras import Model
from tensorflow.keras.regularizers import l2

2.yolov3中最基础的模块Darknetconv2D_BN_Leaky

 def Darknetconv2D_BN_Leaky(input_layer, filters_shape, downsample=False, activate=True, bn=True):
    if downsample:
 input_layer = tf.keras.layers.ZeroPadding2D(((1, 0), (1, 0)))(input_layer)
 padding = 'valid'
 strides = 2
    else:
 strides = 1
 padding = 'same'

 conv = tf.keras.layers.Conv2D(filters=filters_shape[-1], kernel_size = filters_shape[0], strides=strides, padding=padding,
                                  use_bias=not bn, kernel_regularizer=tf.keras.regularizers.l2(0.0005),
                                  kernel_initializer=tf.random_normal_initializer(stddev=0.01),
                                  bias_initializer=tf.constant_initializer(0.))(input_layer)

    if bn: 
        conv = BatchNormalization()(conv)
    if activate == True: 
        conv = tf.nn.leaky_relu(conv, alpha=0.1)

    return conv 

3.resnet_block的最小单元

def res_unit(input_layer, input_channel, filter_num1, filter_num2):
 short_cut = input_layer
 conv = Darknetconv2D_BN_Leaky(input_layer, filters_shape=(1, 1, input_channel, filter_num1))
 conv = Darknetconv2D_BN_Leaky(conv , filters_shape=(3, 3, filter_num1, filter_num2))

 residual_output = short_cut + conv
    return residual_output

4.res_block组成的darknet_53

def darknet53(input_data):
    input_data = Darknetconv2D_BN_Leaky(input_data, (3, 3,  3,  32))
    input_data = Darknetconv2D_BN_Leaky(input_data, (3, 3, 32,  64), downsample=True)
    for i in range(1):
        input_data = res_unit(input_data,  64,  32, 64)

    input_data = Darknetconv2D_BN_Leaky(input_data, (3, 3,  64, 128), downsample=True)

    for i in range(2):
        input_data = res_unit(input_data, 128,  64, 128)

    input_data = common.convolutional(input_data, (3, 3, 128, 256), downsample=True)

    for i in range(8):
        input_data = res_unit(input_data, 256, 128, 256)

    route_1 = input_data
    input_data = common.convolutional(input_data, (3, 3, 256, 512), downsample=True)

    for i in range(8):
        input_data = res_unit(input_data, 512, 256, 512)

    route_2 = input_data
    input_data = Darknetconv2D_BN_Leaky(input_data, (3, 3, 512, 1024), downsample=True)

    for i in range(4):
        input_data = res_unit(input_data, 1024, 512, 1024)

    return route_1, route_2, input_data

5.上采样upsample函数

def upsample(input_layer):
    return tf.image.resize(input_layer, (input_layer.shape[1] * 2, input_layer.shape[2] * 2), method='nearest')

6.主函数yolo

def YOLOv3(input_layer):
    route_1, route_2, conv = darknet53(input_layer)

    conv = Darknetconv2D_BN_Leaky(conv, (1, 1, 1024,  512))
    conv = Darknetconv2D_BN_Leaky(conv, (3, 3,  512, 1024))
    conv = Darknetconv2D_BN_Leaky(conv, (1, 1, 1024,  512))
    conv = Darknetconv2D_BN_Leaky(conv, (3, 3,  512, 1024))
    conv = Darknetconv2D_BN_Leaky(conv, (1, 1, 1024,  512))

    conv_lobj_branch = Darknetconv2D_BN_Leaky(conv, (3, 3, 512, 1024))
    conv_lbbox = Darknetconv2D_BN_Leaky(conv_lobj_branch, (1, 1, 1024, 3*(NUM_CLASS + 5)), activate=False, bn=False)

    conv = Darknetconv2D_BN_Leaky(conv, (1, 1,  512,  256))
    conv = upsample(conv)

    conv = tf.concat([conv, route_2], axis=-1)

    conv = Darknetconv2D_BN_Leaky(conv, (1, 1, 768, 256))
    conv = Darknetconv2D_BN_Leaky(conv, (3, 3, 256, 512))
    conv = Darknetconv2D_BN_Leaky(conv, (1, 1, 512, 256))
    conv = Darknetconv2D_BN_Leaky(conv, (3, 3, 256, 512))
    conv = Darknetconv2D_BN_Leaky(conv, (1, 1, 512, 256))

    conv_mobj_branch = Darknetconv2D_BN_Leaky(conv, (3, 3, 256, 512))
    conv_mbbox =Darknetconv2D_BN_Leaky(conv_mobj_branch, (1, 1, 512, 3*(NUM_CLASS + 5)), activate=False, bn=False)

    conv = Darknetconv2D_BN_Leaky(conv, (1, 1, 256, 128))
    conv = upsample(conv)

    conv = tf.concat([conv, route_1], axis=-1)

    conv = Darknetconv2D_BN_Leaky(conv, (1, 1, 384, 128))
    conv = Darknetconv2D_BN_Leaky(conv, (3, 3, 128, 256))
    conv = Darknetconv2D_BN_Leaky(conv, (1, 1, 256, 128))
    conv = Darknetconv2D_BN_Leaky(conv, (3, 3, 128, 256))
    conv = Darknetconv2D_BN_Leaky(conv, (1, 1, 256, 128))

    conv_sobj_branch = Darknetconv2D_BN_Leaky(conv, (3, 3, 128, 256))
    conv_sbbox = Darknetconv2D_BN_Leaky(conv_sobj_branch, (1, 1, 256, 3*(NUM_CLASS +5)), activate=False, bn=False)

    return [conv_sbbox, conv_mbbox, conv_lbbox]#y3,y2,y1

输出y3,y2,y1
yolov3最重要的一个预测方法就是锚框(anchors)即根据预选框预测锚框的长宽和锚框的中心有无物体的概率,以及锚框的中心是哪种物体的概率
代码

def yolo_head(feats, anchors, num_classes, input_shape, calc_loss=False):
    """Convert final layer features to bounding box parameters."""
    num_anchors = len(anchors)
    # Reshape to batch, height, width, num_anchors, box_params.
    anchors_tensor = K.reshape(K.constant(anchors), [1, 1, 1, num_anchors, 2])

    grid_shape = K.shape(feats)[1:3] # height, width
    grid_y = K.tile(K.reshape(K.arange(0, stop=grid_shape[0]), [-1, 1, 1, 1]),
        [1, grid_shape[1], 1, 1])
    grid_x = K.tile(K.reshape(K.arange(0, stop=grid_shape[1]), [1, -1, 1, 1]),
        [grid_shape[0], 1, 1, 1])
    grid = K.concatenate([grid_x, grid_y])
    grid = K.cast(grid, K.dtype(feats))

    feats = K.reshape(
        feats, [-1, grid_shape[0], grid_shape[1], num_anchors, num_classes + 5])

    # Adjust preditions to each spatial grid point and anchor size.
    box_xy = (K.sigmoid(feats[..., :2]) + grid) / K.cast(grid_shape[::-1], K.dtype(feats))
    box_wh = K.exp(feats[..., 2:4]) * anchors_tensor / K.cast(input_shape[::-1], K.dtype(feats))
    box_confidence = K.sigmoid(feats[..., 4:5])
    box_class_probs = K.sigmoid(feats[..., 5:])

    if calc_loss == True:
        return grid, feats, box_xy, box_wh
    return box_xy, box_wh, box_confidence, box_class_probs

锚框的位置预测,通过确定锚框的左上角和右下角,来确定一个锚框的位置

def yolo_correct_boxes(box_xy, box_wh, input_shape, image_shape):
    '''Get corrected boxes'''
    box_yx = box_xy[..., ::-1]
    box_hw = box_wh[..., ::-1]
    input_shape = K.cast(input_shape, K.dtype(box_yx))
    image_shape = K.cast(image_shape, K.dtype(box_yx))
    new_shape = K.round(image_shape * K.min(input_shape/image_shape))
    offset = (input_shape-new_shape)/2./input_shape
    scale = input_shape/new_shape
    box_yx = (box_yx - offset) * scale
    box_hw *= scale

    box_mins = box_yx - (box_hw / 2.)
    box_maxes = box_yx + (box_hw / 2.)
    boxes =  K.concatenate([
        box_mins[..., 0:1],  # y_min
        box_mins[..., 1:2],  # x_min
        box_maxes[..., 0:1],  # y_max
        box_maxes[..., 1:2]  # x_max
    ])

    # Scale boxes back to original image shape.
    boxes *= K.concatenate([image_shape, image_shape])
    return boxes

确对锚框盒中有无物体以及是何种类别的物体的概率

def yolo_boxes_and_scores(feats, anchors, num_classes, input_shape, image_shape):
    '''Process Conv layer output'''
    box_xy, box_wh, box_confidence, box_class_probs = yolo_head(feats,
        anchors, num_classes, input_shape)
    boxes = yolo_correct_boxes(box_xy, box_wh, input_shape, image_shape)
    boxes = K.reshape(boxes, [-1, 4])
    box_scores = box_confidence * box_class_probs
    box_scores = K.reshape(box_scores, [-1, num_classes])
    return boxes, box_scores

判断网格是否有物体及输出标签

def preprocess_true_boxes(true_boxes, input_shape, anchors, num_classes):
    '''Preprocess true boxes to training input format
    Parameters
    ----------
    true_boxes: array, shape=(m, T, 5)
        Absolute x_min, y_min, x_max, y_max, class_id relative to input_shape.
    input_shape: array-like, hw, multiples of 32
    anchors: array, shape=(N, 2), wh
    num_classes: integer

    Returns
    -------
    y_true: list of array, shape like yolo_outputs, xywh are reletive value

    '''
    assert (true_boxes[..., 4]0

    for b in range(m):
        # Discard zero rows.
        wh = boxes_wh[b, valid_mask[b]]
        if len(wh)==0: continue
        # Expand dim to apply broadcasting.
        wh = np.expand_dims(wh, -2)
        box_maxes = wh / 2.
        box_mins = -box_maxes

        intersect_mins = np.maximum(box_mins, anchor_mins)
        intersect_maxes = np.minimum(box_maxes, anchor_maxes)
        intersect_wh = np.maximum(intersect_maxes - intersect_mins, 0.)
        intersect_area = intersect_wh[..., 0] * intersect_wh[..., 1]
        box_area = wh[..., 0] * wh[..., 1]
        anchor_area = anchors[..., 0] * anchors[..., 1]
        iou = intersect_area / (box_area + anchor_area - intersect_area)

        # Find best anchor for each true box
        best_anchor = np.argmax(iou, axis=-1)

        for t, n in enumerate(best_anchor):
            for l in range(num_layers):
                if n in anchor_mask[l]:
                    i = np.floor(true_boxes[b,t,0]*grid_shapes[l][1]).astype('int32')
                    j = np.floor(true_boxes[b,t,1]*grid_shapes[l][0]).astype('int32')
                    k = anchor_mask[l].index(n)
                    c = true_boxes[b,t, 4].astype('int32')
                    y_true[l][b, j, i, k, 0:4] = true_boxes[b,t, 0:4]
                    y_true[l][b, j, i, k, 4] = 1
                    y_true[l][b, j, i, k, 5+c] = 1

    return y_true

iou的计算简单来说就是两个锚框的交集除以两个锚框的并集就是了

def box_iou(b1, b2):
    '''Return iou tensor

    Parameters
    ----------
    b1: tensor, shape=(i1,...,iN, 4), xywh
    b2: tensor, shape=(j, 4), xywh

    Returns
    -------
    iou: tensor, shape=(i1,...,iN, j)

    '''

    # Expand dim to apply broadcasting.
    b1 = K.expand_dims(b1, -2)
    b1_xy = b1[..., :2]
    b1_wh = b1[..., 2:4]
    b1_wh_half = b1_wh/2.
    b1_mins = b1_xy - b1_wh_half
    b1_maxes = b1_xy + b1_wh_half

    # Expand dim to apply broadcasting.
    b2 = K.expand_dims(b2, 0)
    b2_xy = b2[..., :2]
    b2_wh = b2[..., 2:4]
    b2_wh_half = b2_wh/2.
    b2_mins = b2_xy - b2_wh_half
    b2_maxes = b2_xy + b2_wh_half

    intersect_mins = K.maximum(b1_mins, b2_mins)
    intersect_maxes = K.minimum(b1_maxes, b2_maxes)
    intersect_wh = K.maximum(intersect_maxes - intersect_mins, 0.)
    intersect_area = intersect_wh[..., 0] * intersect_wh[..., 1]
    b1_area = b1_wh[..., 0] * b1_wh[..., 1]
    b2_area = b2_wh[..., 0] * b2_wh[..., 1]
    iou = intersect_area / (b1_area + b2_area - intersect_area)

    return iou

损失的计算
在这里插入图片描述

def yolo_loss(args, anchors, num_classes, ignore_thresh=.5, print_loss=False):
    '''Return yolo_loss tensor

    Parameters
    ----------
    yolo_outputs: list of tensor, the output of yolo_body or tiny_yolo_body
    y_true: list of array, the output of preprocess_true_boxes
    anchors: array, shape=(N, 2), wh
    num_classes: integer
    ignore_thresh: float, the iou threshold whether to ignore object confidence loss

    Returns
    -------
    loss: tensor, shape=(1,)

    '''
    num_layers = len(anchors)//3 # default setting
    yolo_outputs = args[:num_layers]
    y_true = args[num_layers:]
    anchor_mask = [[6,7,8], [3,4,5], [0,1,2]] if num_layers==3 else [[3,4,5], [1,2,3]]
    input_shape = K.cast(K.shape(yolo_outputs[0])[1:3] * 32, K.dtype(y_true[0]))
    grid_shapes = [K.cast(K.shape(yolo_outputs[l])[1:3], K.dtype(y_true[0])) for l in range(num_layers)]
    loss = 0
    m = K.shape(yolo_outputs[0])[0] # batch size, tensor
    mf = K.cast(m, K.dtype(yolo_outputs[0]))

    for l in range(num_layers):
        object_mask = y_true[l][..., 4:5]
        true_class_probs = y_true[l][..., 5:]

        grid, raw_pred, pred_xy, pred_wh = yolo_head(yolo_outputs[l],
             anchors[anchor_mask[l]], num_classes, input_shape, calc_loss=True)
        pred_box = K.concatenate([pred_xy, pred_wh])

        # Darknet raw box to calculate loss.
        raw_true_xy = y_true[l][..., :2]*grid_shapes[l][::-1] - grid
        raw_true_wh = K.log(y_true[l][..., 2:4] / anchors[anchor_mask[l]] * input_shape[::-1])
        raw_true_wh = K.switch(object_mask, raw_true_wh, K.zeros_like(raw_true_wh)) # avoid log(0)=-inf
        box_loss_scale = 2 - y_true[l][...,2:3]*y_true[l][...,3:4]

        # Find ignore mask, iterate over each of batch.
        ignore_mask = tf.TensorArray(K.dtype(y_true[0]), size=1, dynamic_size=True)
        object_mask_bool = K.cast(object_mask, 'bool')
        def loop_body(b, ignore_mask):
            true_box = tf.boolean_mask(y_true[l][b,...,0:4], object_mask_bool[b,...,0])
            iou = box_iou(pred_box[b], true_box)
            best_iou = K.max(iou, axis=-1)
            ignore_mask = ignore_mask.write(b, K.cast(best_iou

就先到这里

你可能感兴趣的:(tensorflow2实现yolov3)