本文用到的板卡设备为鲁班猫4(LubanCat-4),瑞芯微rk3588系列处理器。
官方文档写的挺详细了,但是版本太多不统一,而且涉及了多个代码仓库,稍显杂乱。本着最少代码原则,仅需下载一个代码仓库,将整个实践过程记录一下。
整体分为两大块:
此类教程太多,此处省略,训练得到.pt格式的模型。(本文用的是yolov8版本)
虽然yolo的官方库ultralytics也提供了直接导出onnx模型的接口,但是转出来的模型格式有差异,与后续代码不匹配,因此采用下面的代码库(基于yolo的官方库ultralytics做了修改)。
克隆该仓库的代码:https://github.com/airockchip/ultralytics_yolov8
# 调整 ./ultralytics/cfg/default.yaml 中 model 文件路径为自己所训练的模型路径
export PYTHONPATH=./
python ./ultralytics/engine/exporter.py
# 执行完毕后,会生成 ONNX 模型. 假如原始模型为 yolov8n.pt,则生成 yolov8n.onnx 模型。
根据你的python版本去https://github.com/airockchip/rknn-toolkit2/tree/master/rknn-toolkit2/packages/x86_64 下载requirements_xxx.txt 和 rknn_toolkit2-xxx.whl (不用下载整个代码库,太大了用不着)
# 以python3.9为例
pip install -r requirements_cp39-2.3.2.txt
pip install rknn_toolkit2-2.3.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl
代码convert.py(参考官方)
import sys
from rknn.api import RKNN
DATASET_PATH = './quant_dataset.txt'
DEFAULT_RKNN_PATH = './yolov8.rknn' #转换后的模型路径
DEFAULT_QUANT = True # 是否需要量化
def parse_arg():
if len(sys.argv) < 3:
print("Usage: python3 {} onnx_model_path [platform] [dtype(optional)] [output_rknn_path(optional)]".format(sys.argv[0]));
print(" platform choose from [rk3562, rk3566, rk3568, rk3576, rk3588, rv1126b, rv1109, rv1126, rk1808]")
print(" dtype choose from [i8, fp] for [rk3562, rk3566, rk3568, rk3576, rk3588, rv1126b]")
print(" dtype choose from [u8, fp] for [rv1109, rv1126, rk1808]")
exit(1)
model_path = sys.argv[1]
platform = sys.argv[2]
do_quant = DEFAULT_QUANT
if len(sys.argv) > 3:
model_type = sys.argv[3]
if model_type not in ['i8', 'u8', 'fp']:
print("ERROR: Invalid model type: {}".format(model_type))
exit(1)
elif model_type in ['i8', 'u8']:
do_quant = True
else:
do_quant = False
if len(sys.argv) > 4:
output_path = sys.argv[4]
else:
output_path = DEFAULT_RKNN_PATH
return model_path, platform, do_quant, output_path
def gen_quant_dataset():
import glob
import random
# 随机选择50张图片
image_paths = glob.glob("./dataset/images/train/*.jpg")
calib_paths = random.sample(image_paths, 50)
with open(DATASET_PATH, "w") as f:
for path in calib_paths:
f.write(path + "\n")
if __name__ == '__main__':
model_path, platform, do_quant, output_path = parse_arg()
if do_quant:
gen_quant_dataset()
# Create RKNN object
rknn = RKNN(verbose=False)
# Pre-process config
print('--> Config model')
rknn.config(mean_values=[[0, 0, 0]], std_values=[
[255, 255, 255]], target_platform=platform)
print('done')
# Load model
print('--> Loading model')
ret = rknn.load_onnx(model=model_path)
if ret != 0:
print('Load model failed!')
exit(ret)
print('done')
# Build model
print('--> Building model')
ret = rknn.build(do_quantization=do_quant, dataset=DATASET_PATH)
if ret != 0:
print('Build model failed!')
exit(ret)
print('done')
# Export rknn model
print('--> Export rknn model')
ret = rknn.export_rknn(output_path)
if ret != 0:
print('Export rknn model failed!')
exit(ret)
print('done')
# Release
rknn.release()
如果需要进行量化,需要准备一个有若干张图片地址的txt文件(暂时没搞懂原理),于是上述代码中加了个方法def gen_quant_dataset() 用于生成该文件。
python convert.py yolov8n.onnx rk3588
# 运行后会得到yolov8n.rknn
需要用到以下3个python文件。
1. test.py
# test.py
import os
from rknn.api import RKNN
import cv2
import numpy as np
from yolov8 import post_process,draw
from coco_utils import COCO_test_helper
if __name__ == '__main__':
IMG_SIZE = 640
ONNX_MODEL = './yolov8n.onnx'
QUANTIZE_ON = False
DATASET = './quant_dataset.txt'
img_path = 'dataset/images/val/test_image.jpg'
# 创建RKNN对象
rknn = RKNN(verbose=True)
# 设置模型转换参数,这里可以指定平台,添加target_platform='rk3588'配置,默认rk3566
# mean_values是设置输入的均值,std_values是输入的归一化值
print('--> Config model')
rknn.config(mean_values=[[0, 0, 0]], std_values=[[255, 255, 255]], target_platform='rk3588')
print('done')
# 导入onnx模型,使用model指定onnx模型路径
print('--> Loading model')
ret = rknn.load_onnx(model=ONNX_MODEL)
if ret != 0:
print('Load model failed!')
exit(ret)
print('done')
# 构建RKNN模型,这里设置do_quantization为true开启量化,dataset是指定用于量化校正的数据集
print('--> Building model')
ret = rknn.build(do_quantization=QUANTIZE_ON, dataset=DATASET)
if ret != 0:
print('Build model failed!')
exit(ret)
print('done')
# 调用init_runtime接口初始化运行时环境,默认是在PC上模拟仿真
print('--> Init runtime environment')
ret = rknn.init_runtime()
if ret != 0:
print('Init runtime environment failed!')
exit(ret)
print('done')
# 设置输出,用于模型推理
co_helper = COCO_test_helper(enable_letter_box=True)
img_src = cv2.imread(img_path)
pad_color = (0,0,0)
img = co_helper.letter_box(im= img_src.copy(), new_shape=(IMG_SIZE, IMG_SIZE), pad_color=(0,0,0))
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
# 进行推理,没有设置target默认使用模拟器,之后对输出数据后处理并保存结果
print('--> Running model')
outputs = rknn.inference(inputs=[img])
#print(outputs)
print("len(outputs):", len(outputs))
boxes, classes, scores = post_process(outputs)
img_p = img_src.copy()
if boxes is not None:
draw(img_p, co_helper.get_real_box(boxes), scores, classes)
img_name = os.path.basename(img_path)
result_path = 'output_' + img_name
cv2.imwrite(result_path, img_p)
print('Detection result save to {}'.format(result_path))
print('done')
rknn.release()
2. yolov8.py,参考官方,仅保留了必要代码。
import os
import cv2
import sys
import argparse
from coco_utils import COCO_test_helper
import numpy as np
OBJ_THRESH = 0.25
NMS_THRESH = 0.45
# The follew two param is for map test
# OBJ_THRESH = 0.001
# NMS_THRESH = 0.65
IMG_SIZE = (640, 640) # (width, height), such as (1280, 736)
CLASSES = ("person", "bicycle", "car")
coco_id_list = [1, 2, 3]
def filter_boxes(boxes, box_confidences, box_class_probs):
"""Filter boxes with object threshold.
"""
box_confidences = box_confidences.reshape(-1)
candidate, class_num = box_class_probs.shape
class_max_score = np.max(box_class_probs, axis=-1)
classes = np.argmax(box_class_probs, axis=-1)
_class_pos = np.where(class_max_score* box_confidences >= OBJ_THRESH)
scores = (class_max_score* box_confidences)[_class_pos]
boxes = boxes[_class_pos]
classes = classes[_class_pos]
return boxes, classes, scores
def nms_boxes(boxes, scores):
"""Suppress non-maximal boxes.
# Returns
keep: ndarray, index of effective boxes.
"""
x = boxes[:, 0]
y = boxes[:, 1]
w = boxes[:, 2] - boxes[:, 0]
h = boxes[:, 3] - boxes[:, 1]
areas = w * h
order = scores.argsort()[::-1]
keep = []
while order.size > 0:
i = order[0]
keep.append(i)
xx1 = np.maximum(x[i], x[order[1:]])
yy1 = np.maximum(y[i], y[order[1:]])
xx2 = np.minimum(x[i] + w[i], x[order[1:]] + w[order[1:]])
yy2 = np.minimum(y[i] + h[i], y[order[1:]] + h[order[1:]])
w1 = np.maximum(0.0, xx2 - xx1 + 0.00001)
h1 = np.maximum(0.0, yy2 - yy1 + 0.00001)
inter = w1 * h1
ovr = inter / (areas[i] + areas[order[1:]] - inter)
inds = np.where(ovr <= NMS_THRESH)[0]
order = order[inds + 1]
keep = np.array(keep)
return keep
def dfl(position):
# Distribution Focal Loss (DFL)
import torch
x = torch.tensor(position)
n,c,h,w = x.shape
p_num = 4
mc = c//p_num
y = x.reshape(n,p_num,mc,h,w)
y = y.softmax(2)
acc_metrix = torch.tensor(range(mc)).float().reshape(1,1,mc,1,1)
y = (y*acc_metrix).sum(2)
return y.numpy()
def box_process(position):
grid_h, grid_w = position.shape[2:4]
col, row = np.meshgrid(np.arange(0, grid_w), np.arange(0, grid_h))
col = col.reshape(1, 1, grid_h, grid_w)
row = row.reshape(1, 1, grid_h, grid_w)
grid = np.concatenate((col, row), axis=1)
stride = np.array([IMG_SIZE[1]//grid_h, IMG_SIZE[0]//grid_w]).reshape(1,2,1,1)
position = dfl(position)
box_xy = grid +0.5 -position[:,0:2,:,:]
box_xy2 = grid +0.5 +position[:,2:4,:,:]
xyxy = np.concatenate((box_xy*stride, box_xy2*stride), axis=1)
return xyxy
def post_process(input_data):
boxes, scores, classes_conf = [], [], []
defualt_branch=3
pair_per_branch = len(input_data)//defualt_branch
# Python 忽略 score_sum 输出
for i in range(defualt_branch):
boxes.append(box_process(input_data[pair_per_branch*i]))
classes_conf.append(input_data[pair_per_branch*i+1])
scores.append(np.ones_like(input_data[pair_per_branch*i+1][:,:1,:,:], dtype=np.float32))
def sp_flatten(_in):
ch = _in.shape[1]
_in = _in.transpose(0,2,3,1)
return _in.reshape(-1, ch)
boxes = [sp_flatten(_v) for _v in boxes]
classes_conf = [sp_flatten(_v) for _v in classes_conf]
scores = [sp_flatten(_v) for _v in scores]
boxes = np.concatenate(boxes)
classes_conf = np.concatenate(classes_conf)
scores = np.concatenate(scores)
# filter according to threshold
boxes, classes, scores = filter_boxes(boxes, scores, classes_conf)
# nms
nboxes, nclasses, nscores = [], [], []
for c in set(classes):
inds = np.where(classes == c)
b = boxes[inds]
c = classes[inds]
s = scores[inds]
keep = nms_boxes(b, s)
if len(keep) != 0:
nboxes.append(b[keep])
nclasses.append(c[keep])
nscores.append(s[keep])
if not nclasses and not nscores:
return None, None, None
boxes = np.concatenate(nboxes)
classes = np.concatenate(nclasses)
scores = np.concatenate(nscores)
return boxes, classes, scores
def draw(image, boxes, scores, classes):
for box, score, cl in zip(boxes, scores, classes):
top, left, right, bottom = [int(_b) for _b in box]
print("%s @ (%d %d %d %d) %.3f" % (CLASSES[cl], top, left, right, bottom, score))
cv2.rectangle(image, (top, left), (right, bottom), (255, 0, 0), 2)
cv2.putText(image, '{0} {1:.2f}'.format(CLASSES[cl], score),
(top, left - 6), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 0, 255), 2)
3. coco_utils.py,使用官方代码,未作修改。
说明:在PC端模拟仿真时,无法直接调用rknn.load_rknn, 只有先rknn.load_onnx,然后build,见test.py中的流程。
上述三个代码准备好后,在test.py中填模型和要测试的图片,运行即可看到检测结果。
电脑连接板卡,安装依赖:
pip3 install rknn-toolkit-lite2
pip3 install opencv-python # 用于读写图片,也可用其他库代替
测试代码run.py
from rknnlite.api import RKNNLite
import cv2
import os
import numpy as np
from yolov8 import post_process,draw
from coco_utils import COCO_test_helper
def test():
img_path = 'test_image.jpg'
IMG_SIZE = 640
rknn_model = 'yolov8n.rknn'
rknn_lite = RKNNLite()
# load RKNN model
print('--> Load RKNN model')
ret = rknn_lite.load_rknn(rknn_model)
if ret != 0:
print('Load RKNN model failed')
exit(ret)
print('done')
print('--> Init runtime environment')
ret = rknn_lite.init_runtime(core_mask=RKNNLite.NPU_CORE_0)
if ret != 0:
print('Init runtime environment failed!')
exit(ret)
print('done')
co_helper = COCO_test_helper(enable_letter_box=True)
img_src = cv2.imread(img_path)
pad_color = (0,0,0)
img = co_helper.letter_box(im= img_src.copy(), new_shape=(IMG_SIZE, IMG_SIZE), pad_color=(0,0,0))
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
img_exp = np.expand_dims(img, axis=0)
print('--> Running model')
print(img_exp.shape)
outputs = rknn_lite.inference(inputs=[img_exp])
#print(outputs)
print("len(outputs):", len(outputs))
boxes, classes, scores = post_process(outputs)
img_p = img_src.copy()
if boxes is not None:
draw(img_p, co_helper.get_real_box(boxes), scores, classes)
img_name = os.path.basename(img_path)
result_path = os.path.join('./result', img_name)
cv2.imwrite(result_path, img_p)
print('Detection result save to {}'.format(result_path))
#cv2.imshow('i', img_p)
#cv2.waitKey(3000)
test()
将前面导出的.rknn模型、要测试的图片、上面的yolov8.py及coco_utils.py传到板卡上,执行python3 run.py
注意:由于yolov8.py中的dfl方法用到了torch,如果你不想在设备上安装torch,可以用numpy进行替换:
ef dfl(position):
"""
Distribution Focal Loss (DFL) - NumPy 实现
Args:
position: 输入张量,形状 (n, c, h, w)
Returns:
y: 输出张量,形状 (n, p_num, h, w)
"""
x = np.array(position)
n, c, h, w = x.shape
p_num = 4
mc = c
y = x.reshape(n, p_num, mc, h, w)
y_exp = np.exp(y - np.max(y, axis=2, keepdims=True))
y_softmax = y_exp / np.sum(y_exp, axis=2, keepdims=True)
acc_metrix = np.arange(mc).reshape(1, 1, mc, 1, 1).astype(np.float32)
y = np.sum(y_softmax * acc_metrix, axis=2)
return y