提示:本文主要讲述利用SVP ACL提供的API实现板端推理,用于实现图像的目标检测。源码下载链接在文末
目录
前言
一、什么是 SVP ACL?
二、接口调用流程
2.1 基本概念
2.2 调用流程
2.3 安装CANN
2.4 根据调用流程编写代码
总结
前文已经讲过了在Hi3516上实现读取摄像头数据和模型量化。实现以上任务后,接下来就可以实现板端的模型推理了。海思提供了SVP ACL(Smart Vision Processing Advanced Computing Language)C语言API库,基于该API我们可以进行模型的推理,用于实 现目标识别、图像分类等功能。
SVP ACL(Smart Vision Processing Advanced Computing Language)提供Device管理、Context管理、Stream管理、内存管理、模型加载与执行等C语言API库供用户开发图像分析工具应用,用于实现目标识别、图像分类等功能。用户可以通过第三方框架调用SVP ACL接口,以便使用SoC的计算能力;用户还可以使用SVP ACL封装实现第三方lib库,以便提供SoC的运行管理、资源管理能力。
注意:
具体调用流程及其对应接口如下:
SVP_ACL依赖CANN,具体安装方式参见《华为海思Hi3516进行ATC模型量化》
创建main.cpp
#include
#include
#include
#include
#include
#include "atomic_queue/atomic_queue.h"
#include "acl/svp_acl.h"
#include "acl/svp_acl_mdl.h"
#include
#define INFO_LOG(fmt, ...) fprintf(stdout, "[INFO] " fmt "\n", ##__VA_ARGS__)
#define WARN_LOG(fmt, ...) fprintf(stdout, "[WARN] " fmt "\n", ##__VA_ARGS__)
#define ERROR_LOG(fmt, ...) fprintf(stdout, "[ERROR] " fmt "\n", ##__VA_ARGS__)
typedef enum Result {
SUCCESS = 0,
FAILED = 1
} Result;
using namespace std;
using namespace atomic_queue;
// 队列容量(需为 2 的幂)
constexpr size_t QUEUE_CAPACITY = 128;
AtomicQueue queue;
atomic done{false};
int32_t deviceId_ { 0 };
svp_acl_rt_context context_ { nullptr };
svp_acl_rt_stream stream_ { nullptr };
uint32_t modelId_ { 0 };
size_t modelMemSize_ { 0 };
size_t modelWeightSize_ { 0 };
void *modelMemPtr_ { nullptr };
void *modelWeightPtr_ { nullptr };
bool loadFlag_ { false };
svp_acl_mdl_desc *modelDesc_ { nullptr };
svp_acl_mdl_dataset *input_ { nullptr };
svp_acl_mdl_dataset *output_ { nullptr };
static void InitData(int8_t* data, size_t dataSize)
{
for (size_t i = 0; i < dataSize; i++) {
data[i] = 0;
}
}
static void* ReadBinFile(const std::string& fileName, uint32_t &fileSize)
{
ifstream binFile(fileName, ifstream::binary);
if (binFile.is_open() == false) {
ERROR_LOG("open file %s failed", fileName.c_str());
return nullptr;
}
binFile.seekg(0, binFile.end);
int binFileBufferLen = binFile.tellg();
if (binFileBufferLen == 0) {
ERROR_LOG("binfile is empty, filename is %s", fileName.c_str());
binFile.close();
return nullptr;
}
binFile.seekg(0, binFile.beg);
void* binFileBufferData = nullptr;
svp_acl_error ret = svp_acl_rt_malloc(&binFileBufferData, binFileBufferLen, SVP_ACL_MEM_MALLOC_NORMAL_ONLY);
if (ret != SVP_ACL_SUCCESS) {
ERROR_LOG("malloc device buffer failed. size is %u", binFileBufferLen);
binFile.close();
return nullptr;
}
InitData(static_cast(binFileBufferData), binFileBufferLen);
binFile.read(static_cast(binFileBufferData), binFileBufferLen);
binFile.close();
fileSize = static_cast(binFileBufferLen);
return binFileBufferData;
}
Result InitResource()
{
svp_acl_error ret = svp_acl_init(NULL);
if (ret != SVP_ACL_SUCCESS) {
ERROR_LOG("acl init failed");
return FAILED;
}
INFO_LOG("acl init success");
return SUCCESS;
}
Result LoadModelFromFileWithMem(const std::string& modelPath)
{
uint32_t fileSize = 0;
modelMemPtr_ = ReadBinFile(modelPath, fileSize);
modelMemSize_ = fileSize;
svp_acl_error ret = svp_acl_mdl_load_from_mem(static_cast(modelMemPtr_), modelMemSize_, &modelId_);
if (ret != SVP_ACL_SUCCESS) {
svp_acl_rt_free(modelMemPtr_);
ERROR_LOG("load model from file failed, model file is %s", modelPath.c_str());
return FAILED;
}
loadFlag_ = true;
INFO_LOG("load model %s success", modelPath.c_str());
return SUCCESS;
}
Result CreateDesc()
{
modelDesc_ = svp_acl_mdl_create_desc();
if (modelDesc_ == nullptr) {
ERROR_LOG("create model description failed");
return FAILED;
}
svp_acl_error ret = svp_acl_mdl_get_desc(modelDesc_, modelId_);
if (ret != SVP_ACL_SUCCESS) {
ERROR_LOG("get model description failed");
return FAILED;
}
INFO_LOG("create model description success");
return SUCCESS;
}
Result CreateOutput()
{
output_ = svp_acl_mdl_create_dataset();
if (output_ == nullptr) {
ERROR_LOG("can't create dataset, create output failed");
return FAILED;
}
size_t outputSize = svp_acl_mdl_get_num_outputs(modelDesc_);
for (size_t i = 0; i < outputSize; ++i) {
size_t stride = svp_acl_mdl_get_output_default_stride(modelDesc_, i);
if (stride == 0) {
ERROR_LOG("Error, output default stride is %zu.", stride);
return FAILED;
}
size_t bufferSize = svp_acl_mdl_get_output_size_by_index(modelDesc_, i);
if (bufferSize == 0) {
ERROR_LOG("Error, output size is %zu.", bufferSize);
return FAILED;
}
void *outputBuffer = nullptr;
//由于需要后处理频繁读取output_数据,因此需要使用svp_acl_rt_malloc_cached接口
svp_acl_error ret = svp_acl_rt_malloc_cached(&outputBuffer, bufferSize, SVP_ACL_MEM_MALLOC_NORMAL_ONLY);
if (ret != SVP_ACL_SUCCESS) {
ERROR_LOG("can't malloc buffer, size is %zu, create output failed", bufferSize);
return FAILED;
}
InitData(static_cast(outputBuffer), bufferSize);
svp_acl_data_buffer* outputData = svp_acl_create_data_buffer(outputBuffer, bufferSize, stride);
if (outputData == nullptr) {
ERROR_LOG("can't create data buffer, create output failed");
svp_acl_rt_free(outputBuffer);
return FAILED;
}
ret = svp_acl_mdl_add_dataset_buffer(output_, outputData);
if (ret != SVP_ACL_SUCCESS) {
ERROR_LOG("can't add data buffer, create output failed");
svp_acl_rt_free(outputBuffer);
svp_acl_destroy_data_buffer(outputData);
return FAILED;
}
}
INFO_LOG("create model output success");
return SUCCESS;
}
Result CreateInput(void *inputDataBuffer, size_t bufferSize, int stride)
{
svp_acl_data_buffer* inputData = svp_acl_create_data_buffer(inputDataBuffer, bufferSize, stride);
if (inputData == nullptr) {
ERROR_LOG("can't create data buffer, create input failed");
return FAILED;
}
svp_acl_error ret = svp_acl_mdl_add_dataset_buffer(input_, inputData);
if (ret != SVP_ACL_SUCCESS) {
ERROR_LOG("add input dataset buffer failed");
svp_acl_destroy_data_buffer(inputData);
inputData = nullptr;
return FAILED;
}
return SUCCESS;
}
Result GetInputStrideParam(int index, size_t& bufSize, size_t& stride, svp_acl_mdl_io_dims& dims)
{
svp_acl_error ret = svp_acl_mdl_get_input_dims(modelDesc_, index, &dims);
if (ret != SVP_ACL_SUCCESS) {
ERROR_LOG("svp_acl_mdl_get_input_dims error!");
return FAILED;
}
stride = svp_acl_mdl_get_input_default_stride(modelDesc_, index);
if (stride == 0) {
ERROR_LOG("svp_acl_mdl_get_input_default_stride error!");
return FAILED;
}
bufSize = svp_acl_mdl_get_input_size_by_index(modelDesc_, index);
if (bufSize == 0) {
ERROR_LOG("svp_acl_mdl_get_input_size_by_index error!");
return FAILED;
}
return SUCCESS;
}
Result CreateBuf(int index)
{
void *bufPtr = nullptr;
size_t bufSize = 0;
size_t bufStride = 0;
svp_acl_mdl_io_dims inDims;
svp_acl_error ret = GetInputStrideParam(index, bufSize, bufStride, inDims);
if (ret != SUCCESS) {
ERROR_LOG("Error, GetInputStrideParam failed");
return FAILED;
}
ret = svp_acl_rt_malloc(&bufPtr, bufSize, SVP_ACL_MEM_MALLOC_NORMAL_ONLY);
if (ret != SVP_ACL_SUCCESS) {
ERROR_LOG("malloc device buffer failed. size is %zu", bufSize);
return FAILED;
}
InitData(static_cast(bufPtr), bufSize);
ret = CreateInput(bufPtr, bufSize, bufStride);
if (ret != SUCCESS) {
ERROR_LOG("execute CreateInput failed");
svp_acl_rt_free(bufPtr);
return FAILED;
}
return SUCCESS;
}
void Unload()
{
if (!loadFlag_) {
WARN_LOG("no model had been loaded, unload failed");
return;
}
svp_acl_error ret = svp_acl_mdl_unload(modelId_);
if (ret != SVP_ACL_SUCCESS) {
ERROR_LOG("unload model failed, modelId is %u", modelId_);
}
if (modelDesc_ != nullptr) {
(void)svp_acl_mdl_destroy_desc(modelDesc_);
modelDesc_ = nullptr;
}
if (modelMemPtr_ != nullptr) {
svp_acl_rt_free(modelMemPtr_);
modelMemPtr_ = nullptr;
modelMemSize_ = 0;
}
if (modelWeightPtr_ != nullptr) {
svp_acl_rt_free(modelWeightPtr_);
modelWeightPtr_ = nullptr;
modelWeightSize_ = 0;
}
loadFlag_ = false;
INFO_LOG("unload model success, modelId is %u", modelId_);
}
void DestroyInput()
{
if (input_ == nullptr) {
return;
}
for (size_t i = 0; i < svp_acl_mdl_get_dataset_num_buffers(input_); ++i) {
svp_acl_data_buffer* dataBuffer = svp_acl_mdl_get_dataset_buffer(input_, i);
void* tmp = svp_acl_get_data_buffer_addr(dataBuffer);
svp_acl_rt_free(tmp);
svp_acl_destroy_data_buffer(dataBuffer);
}
svp_acl_mdl_destroy_dataset(input_);
input_ = nullptr;
}
void DestroyOutput()
{
if (output_ == nullptr) {
return;
}
for (size_t i = 0; i < svp_acl_mdl_get_dataset_num_buffers(output_); ++i) {
svp_acl_data_buffer* dataBuffer = svp_acl_mdl_get_dataset_buffer(output_, i);
void* data = svp_acl_get_data_buffer_addr(dataBuffer);
(void)svp_acl_rt_free(data);
(void)svp_acl_destroy_data_buffer(dataBuffer);
}
(void)svp_acl_mdl_destroy_dataset(output_);
output_ = nullptr;
}
void DestroyResource()
{
svp_acl_error ret;
if (stream_ != nullptr) {
ret = svp_acl_rt_destroy_stream(stream_);
if (ret != SVP_ACL_SUCCESS) {
ERROR_LOG("destroy stream failed");
}
stream_ = nullptr;
}
INFO_LOG("end to destroy stream");
if (context_ != nullptr) {
ret = svp_acl_rt_destroy_context(context_);
if (ret != SVP_ACL_SUCCESS) {
ERROR_LOG("destroy context failed");
}
context_ = nullptr;
}
INFO_LOG("end to destroy context");
ret = svp_acl_rt_reset_device(deviceId_);
if (ret != SVP_ACL_SUCCESS) {
ERROR_LOG("reset device failed");
}
INFO_LOG("end to reset device is %d", deviceId_);
ret = svp_acl_finalize();
if (ret != SVP_ACL_SUCCESS) {
ERROR_LOG("finalize acl failed");
}
INFO_LOG("end to finalize acl");
}
// 消费者线程函数
void consumer() {
svp_acl_error ret = svp_acl_rt_set_device(deviceId_);
INFO_LOG("open device %d success", deviceId_);
if (ret != SVP_ACL_SUCCESS) {
ERROR_LOG("acl open device %d failed", deviceId_);
}
while (true) {
string* msg;
while (!queue.try_pop(msg)) {
// 队列为空,等待(可替换为条件变量等机制)
this_thread::sleep_for(chrono::milliseconds(1));
}
if (*msg == "exit") {
cout << "[Consumer] Received exit signal. Exiting.\n";
delete msg;
break;
}
cout << "[Consumer] Got message: " << *msg << endl;
delete msg;
}
}
// 生产者线程函数
void producer() {
svp_acl_error ret = svp_acl_rt_set_device(deviceId_);
if (ret != SVP_ACL_SUCCESS) {
ERROR_LOG("acl open device %d failed", deviceId_);
}
INFO_LOG("open device %d success", deviceId_);
LoadModelFromFileWithMem("model/yolov8_cpu_original.om");
CreateDesc();
CreateOutput();
input_ = svp_acl_mdl_create_dataset();
if (input_ == nullptr) {
ERROR_LOG("can't create dataset, create input failed");
}
void* inputDataBuffer = nullptr;
ret = svp_acl_rt_malloc(&inputDataBuffer, 640 * 352 * 3 / 2, SVP_ACL_MEM_MALLOC_NORMAL_ONLY);
if (ret!= SVP_ACL_SUCCESS) {
ERROR_LOG("malloc input buffer failed");
}
uint32_t bufferSize = 640 * 352 * 3 / 2;
size_t stride = 640;
svp_acl_data_buffer* inputData = svp_acl_create_data_buffer(inputDataBuffer, bufferSize, stride);
if (inputData == nullptr) {
ERROR_LOG("can't create data buffer, create input failed");
}
ret = svp_acl_mdl_add_dataset_buffer(input_, inputData);
if (ret != SVP_ACL_SUCCESS) {
ERROR_LOG("add input dataset buffer failed");
svp_acl_destroy_data_buffer(inputData);
inputData = nullptr;
}
// 2 is stand taskbuf and workbuf
if (svp_acl_mdl_get_num_inputs(modelDesc_) <= 2) {
ERROR_LOG("input dataset Num is error.");
}
size_t datasetSize = svp_acl_mdl_get_dataset_num_buffers(input_);
if (datasetSize == 0) {
ERROR_LOG("input dataset Num is 0.");
}
for (size_t loop = datasetSize; loop < svp_acl_mdl_get_num_inputs(modelDesc_); loop++) {
Result ret = CreateBuf(loop);
if (ret != SUCCESS) {
ERROR_LOG("execute Create taskBuffer and workBuffer failed");
}
}
for (int i = 0; i < 10; ++i) {
ret = svp_acl_mdl_execute(modelId_, input_, output_);
if (ret != SVP_ACL_SUCCESS) {
ERROR_LOG("execute model failed, modelId is %u", modelId_);
}
auto* msg = new string("Message #" + to_string(i));
queue.push(msg);
cout << "[Producer] Sent: " << *msg << endl;
this_thread::sleep_for(chrono::milliseconds(1));
}
// 发送退出信号
auto* exitMsg = new string("exit");
queue.push(exitMsg);
cout << "[Producer] Sent exit signal.\n";
DestroyInput();
DestroyOutput();
}
int main() {
InitResource();
thread prod(producer);
thread cons(consumer);
prod.join();
cons.join();
cout << "All threads completed.\n";
Unload();
DestroyResource();
return 0;
}
CMakeLists.txt中添加SVP_ACL库并链接
cmake_minimum_required(VERSION 3.10)
project(main)
set(CMAKE_CXX_COMPILER "arm-v01c02-linux-musleabi-gcc")
include_directories(
$ENV{DDK_PATH}/acllib/include/
${PROJECT_SOURCE_DIR}/atomic_queue/include/
)
set(ENV{NPU_HOST_LIB} $ENV{DDK_PATH}/acllib/lib32/stub)
set(LIB_PATH $ENV{NPU_HOST_LIB})
MESSAGE(NPU_HOST_LIB=$ENV{NPU_HOST_LIB})
link_directories(${LIB_PATH})
add_executable(main main.cpp)
target_link_libraries(main stdc++ m svp_acl ss_mpi_sysmem pthread securec dl protobuf-c)
注:
安装CANN会自动添加${DDK_PATH}和${NPU_HOST_LIB}环境变量
板端运行结果如下:
/workspace/install/zytracker_board_musl # ./main
[INFO] acl init success
[INFO] open device 0 success
[INFO] open device 0 success
[INFO] load model model/yolov8_cpu_original.om success
[INFO] create model description success
[INFO] create model output success
[Producer] Sent: Message #0
[Consumer] Got message: Message #0
[Producer] Sent: Message #1
[Consumer] Got message: Message #1
[Producer] Sent: Message #2
[Consumer] Got message: Message #2
[Producer] Sent: Message #3
[Consumer] Got message: Message #3
[Producer] Sent: Message #4
[Consumer] Got message: Message #4
[Producer] Sent: Message #5
[Consumer] Got message: Message #5
[Producer] Sent: Message #6
[Consumer] Got message: Message #6
[Producer] Sent: Message #7
[Consumer] Got message: Message #7
[Producer] Sent: Message #8
[Consumer] Got message: Message #8
[Producer] Sent: Message #9
[Producer] Sent exit signal.
[Consumer] Got message: Message #9
[Consumer] Received exit signal. Exiting.
All threads completed.
[INFO] unload model success, modelId is 11
[INFO] end to destroy stream
[INFO] end to destroy context
[INFO] end to reset device is 0
[INFO] end to finalize acl
本文介绍了如何在Hi3516开发板上使用SVPACL API实现YOLOv8模型的板端推理。主要内容包括:1) SVPACL基础概念,如Device、Context、Stream等核心组件;2) 详细的API调用流程,涵盖资源初始化、模型加载、输入输出创建等关键步骤;3) 实际代码实现,展示了多线程生产-消费者模式下的模型推理过程;4) 环境配置和编译说明。通过SVPACL库可以有效利用NPU计算资源,实现高效的目标检测功能。文章提供了完整的代码框架和运行结果,为开发者在海思平台上部署深度学习模型提供了实用参考。
源码下载链接
链接: https://pan.baidu.com/s/1mCCZJCeobt3sbda5hIg8QA 提取码: 2eqn