保姆级教程:RK3588部署yolo目标检测模型

保姆级教程:RK3588部署yolo目标检测模型_第1张图片

本文用到的板卡设备为鲁班猫4(LubanCat-4),瑞芯微rk3588系列处理器。

官方文档写的挺详细了,但是版本太多不统一,而且涉及了多个代码仓库,稍显杂乱。本着最少代码原则,仅需下载一个代码仓库,将整个实践过程记录一下。

整体分为两大块:

  • PartA:在PC上,训练yolo目标检测模型(.pt格式);再转为.onnx格式;最后转为.rknn格式。
  • PartB:在板卡上,运行rknn格式的模型。

PartA 得到rknn格式的模型

1.数据集准备与模型训练

此类教程太多,此处省略,训练得到.pt格式的模型。(本文用的是yolov8版本)

2.pt转onnx

        虽然yolo的官方库ultralytics也提供了直接导出onnx模型的接口,但是转出来的模型格式有差异,与后续代码不匹配,因此采用下面的代码库(基于yolo的官方库ultralytics做了修改)。

克隆该仓库的代码:https://github.com/airockchip/ultralytics_yolov8

# 调整 ./ultralytics/cfg/default.yaml 中 model 文件路径为自己所训练的模型路径

export PYTHONPATH=./
python ./ultralytics/engine/exporter.py

# 执行完毕后,会生成 ONNX 模型. 假如原始模型为 yolov8n.pt,则生成 yolov8n.onnx 模型。

3.onnx转rknn

3.1环境准备 

根据你的python版本去https://github.com/airockchip/rknn-toolkit2/tree/master/rknn-toolkit2/packages/x86_64 下载requirements_xxx.txt 和 rknn_toolkit2-xxx.whl (不用下载整个代码库,太大了用不着)

# 以python3.9为例
pip install -r requirements_cp39-2.3.2.txt
pip install rknn_toolkit2-2.3.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl

3.2 转换

代码convert.py(参考官方)

import sys 
from rknn.api import RKNN

DATASET_PATH = './quant_dataset.txt'
DEFAULT_RKNN_PATH = './yolov8.rknn' #转换后的模型路径
DEFAULT_QUANT = True  # 是否需要量化

def parse_arg():
    if len(sys.argv) < 3:
        print("Usage: python3 {} onnx_model_path [platform] [dtype(optional)] [output_rknn_path(optional)]".format(sys.argv[0]));
        print("       platform choose from [rk3562, rk3566, rk3568, rk3576, rk3588, rv1126b, rv1109, rv1126, rk1808]")
        print("       dtype choose from [i8, fp] for [rk3562, rk3566, rk3568, rk3576, rk3588, rv1126b]")
        print("       dtype choose from [u8, fp] for [rv1109, rv1126, rk1808]")
        exit(1)

    model_path = sys.argv[1]
    platform = sys.argv[2]

    do_quant = DEFAULT_QUANT
    if len(sys.argv) > 3:
        model_type = sys.argv[3]
        if model_type not in ['i8', 'u8', 'fp']:
            print("ERROR: Invalid model type: {}".format(model_type))
            exit(1)
        elif model_type in ['i8', 'u8']:
            do_quant = True
        else:
            do_quant = False

    if len(sys.argv) > 4:
        output_path = sys.argv[4]
    else:
        output_path = DEFAULT_RKNN_PATH

    return model_path, platform, do_quant, output_path

def gen_quant_dataset():
    import glob
    import random

    # 随机选择50张图片
    image_paths = glob.glob("./dataset/images/train/*.jpg")
    calib_paths = random.sample(image_paths, 50)

    with open(DATASET_PATH, "w") as f:
        for path in calib_paths:
            f.write(path + "\n")

if __name__ == '__main__':
    model_path, platform, do_quant, output_path = parse_arg()
    if do_quant:
        gen_quant_dataset()
    # Create RKNN object
    rknn = RKNN(verbose=False)

    # Pre-process config
    print('--> Config model')
    rknn.config(mean_values=[[0, 0, 0]], std_values=[
                    [255, 255, 255]], target_platform=platform)
    print('done')

    # Load model
    print('--> Loading model')
    ret = rknn.load_onnx(model=model_path)
    if ret != 0:
        print('Load model failed!')
        exit(ret)
    print('done')

    # Build model
    print('--> Building model')
    ret = rknn.build(do_quantization=do_quant, dataset=DATASET_PATH)
    if ret != 0:
        print('Build model failed!')
        exit(ret)
    print('done')

    # Export rknn model
    print('--> Export rknn model')
    ret = rknn.export_rknn(output_path)
    if ret != 0:
        print('Export rknn model failed!')
        exit(ret)
    print('done')

    # Release
    rknn.release()

如果需要进行量化,需要准备一个有若干张图片地址的txt文件(暂时没搞懂原理),于是上述代码中加了个方法def gen_quant_dataset() 用于生成该文件。

python convert.py yolov8n.onnx rk3588
# 运行后会得到yolov8n.rknn

4.在PC端模拟仿真

需要用到以下3个python文件。

1. test.py

# test.py
import os
from rknn.api import RKNN
import cv2 
import numpy as np
from yolov8 import post_process,draw
from coco_utils import COCO_test_helper


if __name__ == '__main__':
    IMG_SIZE = 640 
    ONNX_MODEL = './yolov8n.onnx'
    QUANTIZE_ON = False
    DATASET = './quant_dataset.txt'
    img_path = 'dataset/images/val/test_image.jpg'
    # 创建RKNN对象
    rknn = RKNN(verbose=True)

    # 设置模型转换参数,这里可以指定平台,添加target_platform='rk3588'配置,默认rk3566
    # mean_values是设置输入的均值,std_values是输入的归一化值
    print('--> Config model')
    rknn.config(mean_values=[[0, 0, 0]], std_values=[[255, 255, 255]], target_platform='rk3588')
    print('done')

    # 导入onnx模型,使用model指定onnx模型路径
    print('--> Loading model')
    ret = rknn.load_onnx(model=ONNX_MODEL)
    if ret != 0:
        print('Load model failed!')
        exit(ret)
    print('done')

    # 构建RKNN模型,这里设置do_quantization为true开启量化,dataset是指定用于量化校正的数据集
    print('--> Building model')
    ret = rknn.build(do_quantization=QUANTIZE_ON, dataset=DATASET)
    if ret != 0:
        print('Build model failed!')
        exit(ret)
    print('done')

    # 调用init_runtime接口初始化运行时环境,默认是在PC上模拟仿真
    print('--> Init runtime environment')
    ret = rknn.init_runtime()
    if ret != 0:
        print('Init runtime environment failed!')
        exit(ret)
    print('done')

    # 设置输出,用于模型推理
    co_helper = COCO_test_helper(enable_letter_box=True)
    img_src = cv2.imread(img_path)
    pad_color = (0,0,0)
    img = co_helper.letter_box(im= img_src.copy(), new_shape=(IMG_SIZE, IMG_SIZE), pad_color=(0,0,0))
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

    # 进行推理,没有设置target默认使用模拟器,之后对输出数据后处理并保存结果
    print('--> Running model')
    outputs = rknn.inference(inputs=[img])
    #print(outputs)
    print("len(outputs):", len(outputs))
    boxes, classes, scores = post_process(outputs)
    img_p = img_src.copy()
    if boxes is not None:
        draw(img_p, co_helper.get_real_box(boxes), scores, classes)
    img_name = os.path.basename(img_path)
    result_path = 'output_' + img_name
    cv2.imwrite(result_path, img_p)
    print('Detection result save to {}'.format(result_path))

    print('done')
    rknn.release()

2. yolov8.py,参考官方,仅保留了必要代码。

import os
import cv2
import sys
import argparse
from coco_utils import COCO_test_helper
import numpy as np


OBJ_THRESH = 0.25
NMS_THRESH = 0.45

# The follew two param is for map test
# OBJ_THRESH = 0.001
# NMS_THRESH = 0.65

IMG_SIZE = (640, 640)  # (width, height), such as (1280, 736)

CLASSES = ("person", "bicycle", "car")

coco_id_list = [1, 2, 3]


def filter_boxes(boxes, box_confidences, box_class_probs):
    """Filter boxes with object threshold.
    """
    box_confidences = box_confidences.reshape(-1)
    candidate, class_num = box_class_probs.shape

    class_max_score = np.max(box_class_probs, axis=-1)
    classes = np.argmax(box_class_probs, axis=-1)

    _class_pos = np.where(class_max_score* box_confidences >= OBJ_THRESH)
    scores = (class_max_score* box_confidences)[_class_pos]

    boxes = boxes[_class_pos]
    classes = classes[_class_pos]

    return boxes, classes, scores

def nms_boxes(boxes, scores):
    """Suppress non-maximal boxes.
    # Returns
        keep: ndarray, index of effective boxes.
    """
    x = boxes[:, 0]
    y = boxes[:, 1]
    w = boxes[:, 2] - boxes[:, 0]
    h = boxes[:, 3] - boxes[:, 1]

    areas = w * h
    order = scores.argsort()[::-1]

    keep = []
    while order.size > 0:
        i = order[0]
        keep.append(i)

        xx1 = np.maximum(x[i], x[order[1:]])
        yy1 = np.maximum(y[i], y[order[1:]])
        xx2 = np.minimum(x[i] + w[i], x[order[1:]] + w[order[1:]])
        yy2 = np.minimum(y[i] + h[i], y[order[1:]] + h[order[1:]])

        w1 = np.maximum(0.0, xx2 - xx1 + 0.00001)
        h1 = np.maximum(0.0, yy2 - yy1 + 0.00001)
        inter = w1 * h1

        ovr = inter / (areas[i] + areas[order[1:]] - inter)
        inds = np.where(ovr <= NMS_THRESH)[0]
        order = order[inds + 1]
    keep = np.array(keep)
    return keep

def dfl(position):
    # Distribution Focal Loss (DFL)
    import torch
    x = torch.tensor(position)
    n,c,h,w = x.shape
    p_num = 4
    mc = c//p_num
    y = x.reshape(n,p_num,mc,h,w)
    y = y.softmax(2)
    acc_metrix = torch.tensor(range(mc)).float().reshape(1,1,mc,1,1)
    y = (y*acc_metrix).sum(2)
    return y.numpy()


def box_process(position):
    grid_h, grid_w = position.shape[2:4]
    col, row = np.meshgrid(np.arange(0, grid_w), np.arange(0, grid_h))
    col = col.reshape(1, 1, grid_h, grid_w)
    row = row.reshape(1, 1, grid_h, grid_w)
    grid = np.concatenate((col, row), axis=1)
    stride = np.array([IMG_SIZE[1]//grid_h, IMG_SIZE[0]//grid_w]).reshape(1,2,1,1)

    position = dfl(position)
    box_xy  = grid +0.5 -position[:,0:2,:,:]
    box_xy2 = grid +0.5 +position[:,2:4,:,:]
    xyxy = np.concatenate((box_xy*stride, box_xy2*stride), axis=1)

    return xyxy

def post_process(input_data):
    boxes, scores, classes_conf = [], [], []
    defualt_branch=3
    pair_per_branch = len(input_data)//defualt_branch
    # Python 忽略 score_sum 输出
    for i in range(defualt_branch):
        boxes.append(box_process(input_data[pair_per_branch*i]))
        classes_conf.append(input_data[pair_per_branch*i+1])
        scores.append(np.ones_like(input_data[pair_per_branch*i+1][:,:1,:,:], dtype=np.float32))

    def sp_flatten(_in):
        ch = _in.shape[1]
        _in = _in.transpose(0,2,3,1)
        return _in.reshape(-1, ch)

    boxes = [sp_flatten(_v) for _v in boxes]
    classes_conf = [sp_flatten(_v) for _v in classes_conf]
    scores = [sp_flatten(_v) for _v in scores]

    boxes = np.concatenate(boxes)
    classes_conf = np.concatenate(classes_conf)
    scores = np.concatenate(scores)

    # filter according to threshold
    boxes, classes, scores = filter_boxes(boxes, scores, classes_conf)

    # nms
    nboxes, nclasses, nscores = [], [], []
    for c in set(classes):
        inds = np.where(classes == c)
        b = boxes[inds]
        c = classes[inds]
        s = scores[inds]
        keep = nms_boxes(b, s)

        if len(keep) != 0:
            nboxes.append(b[keep])
            nclasses.append(c[keep])
            nscores.append(s[keep])

    if not nclasses and not nscores:
        return None, None, None

    boxes = np.concatenate(nboxes)
    classes = np.concatenate(nclasses)
    scores = np.concatenate(nscores)

    return boxes, classes, scores


def draw(image, boxes, scores, classes):
    for box, score, cl in zip(boxes, scores, classes):
        top, left, right, bottom = [int(_b) for _b in box]
        print("%s @ (%d %d %d %d) %.3f" % (CLASSES[cl], top, left, right, bottom, score))
        cv2.rectangle(image, (top, left), (right, bottom), (255, 0, 0), 2)
        cv2.putText(image, '{0} {1:.2f}'.format(CLASSES[cl], score),
                    (top, left - 6), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 0, 255), 2)

3. coco_utils.py,使用官方代码,未作修改。

说明:在PC端模拟仿真时,无法直接调用rknn.load_rknn, 只有先rknn.load_onnx,然后build,见test.py中的流程。

上述三个代码准备好后,在test.py中填模型和要测试的图片,运行即可看到检测结果。

Part B 板卡上部署推理

1.环境准备

电脑连接板卡,安装依赖:

pip3 install rknn-toolkit-lite2
pip3 install opencv-python  # 用于读写图片,也可用其他库代替

2.编写测试代码

测试代码run.py

from rknnlite.api import RKNNLite
import cv2
import os
import numpy as np
from yolov8 import post_process,draw
from coco_utils import COCO_test_helper


def test():
    img_path = 'test_image.jpg'
    IMG_SIZE = 640
    rknn_model = 'yolov8n.rknn'
    rknn_lite = RKNNLite()
	  # load RKNN model
    print('--> Load RKNN model')
    ret = rknn_lite.load_rknn(rknn_model)
    if ret != 0:
        print('Load RKNN model failed')
        exit(ret)
    print('done')
    print('--> Init runtime environment')
    ret = rknn_lite.init_runtime(core_mask=RKNNLite.NPU_CORE_0)
    if ret != 0:
        print('Init runtime environment failed!')
        exit(ret)
    print('done')
    co_helper = COCO_test_helper(enable_letter_box=True)
    img_src = cv2.imread(img_path)
    pad_color = (0,0,0)
    img = co_helper.letter_box(im= img_src.copy(), new_shape=(IMG_SIZE, IMG_SIZE), pad_color=(0,0,0))
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    img_exp = np.expand_dims(img, axis=0)
    print('--> Running model')
    print(img_exp.shape)
    outputs = rknn_lite.inference(inputs=[img_exp])
    #print(outputs)
    print("len(outputs):", len(outputs))
    boxes, classes, scores = post_process(outputs)
    img_p = img_src.copy()
    if boxes is not None:
        draw(img_p, co_helper.get_real_box(boxes), scores, classes)
    img_name = os.path.basename(img_path)
    result_path = os.path.join('./result', img_name)
    cv2.imwrite(result_path, img_p)
    print('Detection result save to {}'.format(result_path))
    #cv2.imshow('i', img_p)
    #cv2.waitKey(3000)


test()

将前面导出的.rknn模型要测试的图片、上面的yolov8.pycoco_utils.py传到板卡上,执行python3 run.py

注意:由于yolov8.py中的dfl方法用到了torch,如果你不想在设备上安装torch,可以用numpy进行替换:

ef dfl(position):
    """
    Distribution Focal Loss (DFL) - NumPy 实现
    Args:
        position: 输入张量,形状 (n, c, h, w)
    Returns:
        y: 输出张量,形状 (n, p_num, h, w)
    """
    x = np.array(position)
    n, c, h, w = x.shape
    p_num = 4  
    mc = c 
    y = x.reshape(n, p_num, mc, h, w)
    y_exp = np.exp(y - np.max(y, axis=2, keepdims=True))
    y_softmax = y_exp / np.sum(y_exp, axis=2, keepdims=True)
    acc_metrix = np.arange(mc).reshape(1, 1, mc, 1, 1).astype(np.float32)
    y = np.sum(y_softmax * acc_metrix, axis=2)

    return y

你可能感兴趣的:(YOLO,rk3588,鲁班猫,yolo部署,瑞星微,目标检测)