onnxruntime和tensorrt是我们常用的两种推理方式,下面整理了两个推理示例,仅供参考。
示例
class ModelInfo {
public:
ModelInfo() {};
~ModelInfo() {};
public:
size_t num_input_nodes; //输入节点的数量
std::vector input_node_names; //输入节点的名称
std::vector> input_node_dims; //输入节点的维度
std::vector input_types; //输入节点的类型
std::vector input_tensors; //输入节点的tensor
size_t num_output_nodes; //输出节点的数量
std::vector output_node_names; //输出节点的名称
std::vector> output_node_dims; //输出节点的维度
std::vector output_types; //输出节点的类型
std::vector output_tensors; //输出节点的tensor
public:
inline void InitialInput() {
this->input_node_names.resize(this->num_input_nodes);
this->input_node_dims.resize(this->num_input_nodes);
this->input_types.resize(this->num_input_nodes);
this->input_tensors.resize(this->num_input_nodes);
}
inline void InitialOutput() {
this->output_node_names.resize(this->num_output_nodes);
this->output_node_dims.resize(this->num_output_nodes);
this->output_types.resize(this->num_output_nodes);
this->output_tensors.resize(this->num_output_nodes);
}
};
bool CheckStatus(const OrtApi* g_ort, OrtStatus* status) {
if (status != nullptr) {
const char* msg = g_ort->GetErrorMessage(status);
std::cerr << msg << std::endl;
g_ort->ReleaseStatus(status);
throw Ort::Exception(msg, OrtErrorCode::ORT_EP_FAIL);
}
return true;
}
void GetModelInputInfo(const OrtApi* g_ort, OrtSession* session, OrtAllocator* allocator, ModelInfo* model_info) {
//**********输入信息**********//
CheckStatus(g_ort, g_ort->SessionGetInputCount(session, &model_info->num_input_nodes)); // Get input count for a session. 从会话中获取输入个数
model_info->InitialInput();
for (size_t i = 0; i < model_info->num_input_nodes; i++) {
// Get input node names
char* input_name;
CheckStatus(g_ort, g_ort->SessionGetInputName(session, i, allocator, &input_name)); // Get input name. 获取当前输入名称
model_info->input_node_names[i] = input_name;
// Get input tensor info
OrtTypeInfo* typeinfo;
CheckStatus(g_ort, g_ort->SessionGetInputTypeInfo(session, i, &typeinfo)); // Get input type information. 获取当前输入的类型信息
const OrtTensorTypeAndShapeInfo* tensor_info;
CheckStatus(g_ort, g_ort->CastTypeInfoToTensorInfo(typeinfo, &tensor_info)); // Get OrtTensorTypeAndShapeInfo from an OrtTypeInfo. 类型转换OrtTypeInfo->OrtTensorTypeAndShapeInfo
// Get input tensor type
ONNXTensorElementDataType type;
CheckStatus(g_ort, g_ort->GetTensorElementType(tensor_info, &type)); // Get element type in OrtTensorTypeAndShapeInfo. 从tensor info中获取元素类型
model_info->input_types[i] = type;
// Get input shapes/dims
size_t num_dims;
CheckStatus(g_ort, g_ort->GetDimensionsCount(tensor_info, &num_dims)); // Get dimension count in OrtTensorTypeAndShapeInfo. 从tensor info中获取维度数量
model_info->input_node_dims[i].resize(num_dims);
CheckStatus(g_ort, g_ort->GetDimensions(tensor_info, model_info->input_node_dims[i].data(), num_dims)); // Get dimensions in OrtTensorTypeAndShapeInfo. 从tensor info中获取维度
size_t tensor_size;
CheckStatus(g_ort, g_ort->GetTensorShapeElementCount(tensor_info, &tensor_size)); // Get total number of elements in a tensor shape from an OrtTensorTypeAndShapeInfo. 从tensor info中获取元素总数
if (typeinfo) g_ort->ReleaseTypeInfo(typeinfo);
}
}
void GetModelOutputInfo(const OrtApi* g_ort, OrtSession* session, OrtAllocator* allocator, ModelInfo* model_info) {
//***********输出信息****************//
CheckStatus(g_ort, g_ort->SessionGetOutputCount(session, &model_info->num_output_nodes)); // Get output count for session. 从会话中获取输出个数
model_info->InitialOutput();
for (size_t i = 0; i < model_info->num_output_nodes; i++) {
// Get input node names
char* input_name;
CheckStatus(g_ort, g_ort->SessionGetOutputName(session, i, allocator, &input_name)); // Get output name. 获取当前输出名称
model_info->output_node_names[i] = input_name;
// Get input tensor info
OrtTypeInfo* typeinfo;
CheckStatus(g_ort, g_ort->SessionGetOutputTypeInfo(session, i, &typeinfo)); // Get output type information. 获取当前输出的类型信息
const OrtTensorTypeAndShapeInfo* tensor_info;
CheckStatus(g_ort, g_ort->CastTypeInfoToTensorInfo(typeinfo, &tensor_info)); // Get OrtTensorTypeAndShapeInfo from an OrtTypeInfo. 类型转换OrtTypeInfo->OrtTensorTypeAndShapeInfo
// Get input tensor type
ONNXTensorElementDataType type;
CheckStatus(g_ort, g_ort->GetTensorElementType(tensor_info, &type)); // Get element type in OrtTensorTypeAndShapeInfo. 从tensor info中获取元素类型
model_info->output_types[i] = type;
// Get input shapes/dims
size_t num_dims;
CheckStatus(g_ort, g_ort->GetDimensionsCount(tensor_info, &num_dims)); // Get dimension count in OrtTensorTypeAndShapeInfo. 从tensor info中获取维度数量
model_info->output_node_dims[i].resize(num_dims);
CheckStatus(g_ort, g_ort->GetDimensions(tensor_info, model_info->output_node_dims[i].data(), num_dims));// Get dimensions in OrtTensorTypeAndShapeInfo. 从tensor info中获取维度
size_t tensor_size;
CheckStatus(g_ort, g_ort->GetTensorShapeElementCount(tensor_info, &tensor_size)); // Get total number of elements in a tensor shape from an OrtTensorTypeAndShapeInfo. 从tensor info中获取元素总数
if (typeinfo) g_ort->ReleaseTypeInfo(typeinfo);
}
}
const wchar_t* model_path = L"model.onnx";
int main()
{
/*
* 1.模型加载
*/
//创建ort环境
const OrtApiBase* ptr_api_base = OrtGetApiBase();
const OrtApi* g_ort = ptr_api_base->GetApi(ORT_API_VERSION);
OrtEnv* env = NULL;
OrtSession* session = NULL;
OrtSessionOptions* session_options = NULL;
OrtAllocator* allocator = NULL;
CheckStatus(g_ort, g_ort->CreateEnv(ORT_LOGGING_LEVEL_ERROR, "INFERENCE", &env)); // Create an OrtEnv. 创建环境
CheckStatus(g_ort, g_ort->CreateSessionOptions(&session_options)); // Create an OrtSessionOptions object. 创建会话选项
CheckStatus(g_ort, g_ort->SetIntraOpNumThreads(session_options, 0)); // Sets the number of threads used to parallelize the execution within nodes. 线程数量
CheckStatus(g_ort, g_ort->SetSessionGraphOptimizationLevel(session_options, ORT_ENABLE_ALL)); // Set the optimization level to apply when loading a graph. 设置优化等级
//CUDA 加速
if (USE_CUDA) {
//CUDA option set
OrtCUDAProviderOptions cuda_option;
cuda_option.device_id = 0;
cuda_option.arena_extend_strategy = 0;
cuda_option.cudnn_conv_algo_search = OrtCudnnConvAlgoSearchExhaustive;
cuda_option.gpu_mem_limit = SIZE_MAX;
cuda_option.do_copy_in_default_stream = 1;
CheckStatus(g_ort, g_ort->SessionOptionsAppendExecutionProvider_CUDA(session_options, &cuda_option)); // Append CUDA provider to session options. 会话选项增加cuda硬件支持
}
//创建会话
CheckStatus(g_ort, g_ort->CreateSession(env, model_path, session_options, &session)); // Create an OrtSession from a model file. 从模型创建会话
CheckStatus(g_ort, g_ort->GetAllocatorWithDefaultOptions(&allocator)); // Get the default allocator. 获取默认内存分配器
/*
* 2.模型信息概览
*/
ModelInfo* model_info = new ModelInfo;
GetModelInputInfo(g_ort, session, allocator, model_info);
GetModelOutputInfo(g_ort, session, allocator, model_info);
/*
* 4.构建输入输出
*/
//创建输入输出
float* host_input = nullptr;
void* host_output;// = new int64_t[imageHeight * imageWidth];
GetInputData(host_input, img_path);
float* dev_input;
cudaMalloc((void**)&dev_input, 400 * 424 * 3 * sizeof(float));
cudaMemcpyAsync(dev_input, host_input, 400 * 424 * 3 * sizeof(float), cudaMemcpyHostToDevice);
int input_tensor_size = 400 * 424 * 3;
OrtMemoryInfo* memory_info = NULL;
//CheckStatus(g_ort, g_ort->CreateMemoryInfo("CUDA", OrtDeviceAllocator, 0, OrtMemTypeCPU, &memory_info)); // Create an OrtMemoryInfo. 创建GPU内存信息
CheckStatus(g_ort, g_ort->CreateCpuMemoryInfo(OrtArenaAllocator, OrtMemTypeDefault, &memory_info)); // Create an OrtMemoryInfo for CPU memory. 创建CPU内存信息
OrtValue* input_tensor = NULL;
OrtValue* output_tensor = NULL;
for (size_t i = 0; i < model_info->num_input_nodes; i++)
{
CheckStatus(g_ort, g_ort->CreateTensorWithDataAsOrtValue(memory_info, dev_input, input_tensor_size * sizeof(float),
model_info->input_node_dims[i].data(), model_info->input_node_dims[i].size(), model_info->input_types[i], &input_tensor)); //Create a tensor backed by a user supplied buffer. 创建一个多维度张量input_tensor
// ToDo: input_tensor 容器
}
cudaDeviceSynchronize();
CheckStatus(g_ort, g_ort->Run(session, NULL, model_info->input_node_names.data(), (const OrtValue* const*)&input_tensor, model_info->num_input_nodes,
model_info->output_node_names.data(), model_info->num_output_nodes, &output_tensor)); // Run the model in an OrtSession. 执行模型流程
CheckStatus(g_ort, g_ort->GetTensorMutableData(output_tensor, (void**)&host_output)); // Get a pointer to the raw data inside a tensor. 获取输出tensor的指针位置,此步可以实现从device直接到host
delete[] host_input;
}
注:
代码示例
#include
#include
#include
#include "utils.h"
/*
* 用于记录TensorRT的相关操作日志
*/
class Logger : public nvinfer1::ILogger {
public:
void log(Severity severity, const char* msg) noexcept override {
using namespace std;
string s;
bool printMsg = true;
switch (severity) {
case Severity::kINTERNAL_ERROR:
s = "INTERNAL_ERROR";
break;
case Severity::kERROR:
s = "ERROR";
break;
case Severity::kWARNING:
s = "WARNING";
break;
case Severity::kINFO:
s = "INFO";
printMsg = m_printVerbose;
break;
case Severity::kVERBOSE:
s = "VERBOSE";
printMsg = m_printVerbose;
break;
}
if (printMsg)
std::cout << s << ": " << msg << endl;
}
public:
void setPrintVerbose(bool printVerbose) {
this->m_printVerbose = printVerbose;
};
private:
bool m_printVerbose = true;
};
Logger gLogger;
const wchar_t* trtModelName = L"model.trt";
int main() {
/*
* 1.读取序列化的trt模型
*/
std::ifstream trtModelFile(trtModelName, std::ios_base::in | std::ios_base::binary);
if (!trtModelFile)
throw "TRT Model Path Error!";
trtModelFile.seekg(0, std::ios::end);
int m_size = (int)trtModelFile.tellg();
trtModelFile.seekg(0, std::ios::beg);
char* p_modelBuff = new char[m_size];
trtModelFile.read(p_modelBuff, m_size);
trtModelFile.close();
/*
* 2.构造推理环境
*/
nvinfer1::IRuntime* runtime = nvinfer1::createInferRuntime(gLogger); // Create an instance of a safe::IRuntime class. 创建运行实例
nvinfer1::ICudaEngine* engine = runtime->deserializeCudaEngine((void*)p_modelBuff, m_size, NULL); // Deserialize an engine from a byte array. 反序列化引擎
if (engine == NULL)
throw "Deserialize model failed!\n";
nvinfer1::IExecutionContext* p_context = engine->createExecutionContext(); // Create an execution context. 创建执行语境
// 创建cuda流
cudaStream_t stream;
cudaStreamCreate(&stream);
/*
* 3.创建输入输出
*/
pcl::PointCloud::Ptr cloud(new pcl::PointCloud);
std::string pointcloudPath = "pointcloud.xyz";
ReadCloudFromTxt(pointcloudPath, cloud);
NormalizePoints(cloud);
int size = cloud->points.size();
float* host_input = new float[size * 3];
float* host_output = new float[size * 7];
for (int i = 0; i < size; ++i) { // 展平x y z x y z…
host_input[i * 3] = cloud->points[i].x;
host_input[i * 3 + 1] = cloud->points[i].y;
host_input[i * 3 + 2] = cloud->points[i].z;
}
//for (int i = 0; i < engine->getNbBindings(); i++){
// nvinfer1::Dims dims = engine->getBindingDimensions(i);
// printf("index %d, dims: (");
// for (int d = 0; d < dims.nbDims; d++)
// {
// if (d < dims.nbDims - 1)
// printf("%d,", dims.d[d]);
// else
// printf("%d", dims.d[d]);
// }
// printf(")\n");
//}
void* buffers[2]{ 0 };
cudaMalloc(&buffers[0], size * 3 * sizeof(float));
cudaMalloc(&buffers[1], size * 7 * sizeof(float)); //输出结果的大小
cudaMemcpyAsync(buffers[0], host_input, size * 3 * sizeof(float), cudaMemcpyHostToDevice);
/*
* 4.动态输入维度固定
*/
nvinfer1::Dims dims4;
dims4.nbDims = 4; // 定义维度
dims4.d[0] = 1;
dims4.d[1] = 1;
dims4.d[2] = size;
dims4.d[3] = 3;
p_context->setBindingDimensions(0, dims4); // Set the dynamic dimensions of an input binding. 动态维度需要在推理时固定!!!切记!!!
/*
* 5.执行推理
*/
p_context->enqueueV2(buffers, (cudaStream_t)stream, nullptr);
cudaStreamSynchronize(stream);
/*
* 6.数据后处理
*/
cudaMemcpyAsync(host_output, buffers[1], size * 7 * sizeof(float), cudaMemcpyDeviceToHost);
int* label = new int[size];
for (int i = 0; i < size; i++) {
label[i] = 0; // 初始化数组元素
}
GetFinalLabel(host_output, label, size);
// 导出可视化
std::fstream fs;
fs.open("result_.txt", std::ios::out);
if (!fs)
return -1;
for (size_t i = 0; i < size; i++){
fs << cloud->points[i].x << " " << cloud->points[i].y << " " << cloud->points[i].z << " " << label[i] << std::endl;
}
fs.close();
delete[] p_modelBuff;
delete[] label;
delete[] host_input;
delete[] host_output;
std::cout << "Hello world" << std::endl;
return 0;
}
注: