导出onnx yolo export model=D:/zou/wjzz/workDoc/screwsDetech/runs/detect/train/weights/best.pt format=onnx opset=12 dynamic=True simplify=True
内容 | 解释 |
---|---|
蓝色 Conv | 卷积层,提取特征 |
黑色 Add / Mul | 做位置偏移、网格映射 |
绿色 Gather / Shape | 从 shape 里提取宽、高等维度信息 |
红色 Concat / Reshape | 拼接张量、变形输出 |
紫色 Softmax / Transpose | 用于分类概率或格式变换 |
因为这部分在 ONNX 里:
很难理解 / 不好调试
不利于优化和加速
有时还不能使用 GPU 推理
所以很多人导出的时候会加上 export(onnx, simplify=True, dynamic=False, nms=False)
,再用自己写的代码(像你 Python 写的那样)做后处理。
张量(Tensor)就是一个多维数组。 你可以把它看作是:
维度 | 举例 | 类比(在 NumPy 中) |
---|---|---|
标量(0维) | 3.14 |
np.array(3.14) |
向量(1维) | [1, 2, 3] |
np.array([1, 2, 3]) |
矩阵(2维) | [[1,2],[3,4]] |
np.array([[1,2],[3,4]]) |
三维张量(3维) | 彩色图像(H, W, C) | img[height][width][channel] |
四维张量(4维) | 多张图片:[batch, channel, height, width] |
img[batch][channel][H][W] |
所以你可以记住一句话: 张量 = “有维度的数组”,是用来装图片、特征图、模型输入输出的容器。
✅ NCHW 和 NHWC 是什么?
它们是 图像数据的维度排列方式,在模型输入输出中经常见到。
格式 含义 解释 举例(YOLO) NCHW Batch, Channels, Height, Width PyTorch 默认格式 [1, 3, 640, 640]
NHWC Batch, Height, Width, Channels TensorFlow 默认格式 [1, 640, 640, 3]
比如,一张 RGB 图片输入模型时:
NCHW:先通道、后高宽:
[1, 3, 640, 640]
NHWC:先高宽、后通道:
[1, 640, 640, 3]
假设你有 100 张图片要输入模型,但你的显卡内存有限,不能一次处理 100 张,那就可以每次处理 16 张,这个 16 就是 Batch Size(N)。
NCHW: (16, 3, 640, 640)
N = 16:一次处理 16 张图片
C = 3:每张图有 3 个通道(RGB)
H = 640:高度 640 像素
W = 640:宽度 640 像素
已经可以跑通的程序、没有优化
using OpenCvSharp; using Microsoft.ML.OnnxRuntime; using Microsoft.ML.OnnxRuntime.Tensors; using System; using System.Linq;
class Program { static void Main(string[] args) { // 1. 加载模型 var session = new InferenceSession(@"D:\zou\wjzz\workDoc\screwsDetech\runs\detect\train\weights\best.onnx");
// 2. 读取图片 string imgPath = @"D:\zou\wjzz\workDoc\screwsDetech\test_images\screw_keshang_bad_130.png"; Mat img = Cv2.ImRead(imgPath); Mat originalImg = img.Clone(); int h0 = img.Rows; int w0 = img.Cols; // 3. 图片预处理:resize到640x640,BGR转RGB,归一化 Mat imgResized = new Mat(); Cv2.Resize(img, imgResized, new Size(640, 640)); Cv2.CvtColor(imgResized, imgResized, ColorConversionCodes.BGR2RGB); imgResized.ConvertTo(imgResized, MatType.CV_32FC3, 1.0 / 255); // 4. HWC转CHW,并变成1x3x640x640的Tensor var inputData = new float[1 * 3 * 640 * 640]; for (int y = 0; y < 640; y++) { for (int x = 0; x < 640; x++) { Vec3f pixel = imgResized.At(y, x); inputData[0 * 3 * 640 * 640 + 0 * 640 * 640 + y * 640 + x] = pixel.Item0; // R从0开始 inputData[0 * 3 * 640 * 640 + 1 * 640 * 640 + y * 640 + x] = pixel.Item1; // G从640*640 inputData[0 * 3 * 640 * 640 + 2 * 640 * 640 + y * 640 + x] = pixel.Item2; // B从640*640*2 } } var inputTensor = new DenseTensor (inputData, new[] { 1, 3, 640, 640 }); // 5. 推理 var inputs = new List { NamedOnnxValue.CreateFromTensor(session.InputMetadata.Keys.First(), inputTensor) }; using var results = session.Run(inputs); var output = results.First().AsTensor (); // 6. 解析输出 var dims = output.Dimensions; // 应该是 (1, 5, N),比如(1,5,8400) int numPreds = dims[2]; float[,] preds = new float[numPreds, 5]; for (int i = 0; i < 5; i++) { for (int j = 0; j < numPreds; j++) { preds[j, i] = output[0, i, j]; } } // 7. 按置信度筛选 float confThreshold = 0.25f; var boxes = new List (); var scores = new List (); for (int i = 0; i < numPreds; i++) { float objness = preds[i, 4]; if (objness > confThreshold) { float cx = preds[i, 0]; float cy = preds[i, 1]; float w = preds[i, 2]; float h = preds[i, 3]; float x1 = cx - w / 2; float y1 = cy - h / 2; float x2 = cx + w / 2; float y2 = cy + h / 2; // 还原到原图尺度 x1 *= (float)w0 / 640; x2 *= (float)w0 / 640; y1 *= (float)h0 / 640; y2 *= (float)h0 / 640; boxes.Add(new Rect2d(x1, y1, x2 - x1, y2 - y1)); scores.Add(objness); } } // 8. 绘制框 for (int i = 0; i < boxes.Count; i++) { var rect = boxes[i]; Cv2.Rectangle(originalImg, new Point(rect.X, rect.Y), new Point(rect.X + rect.Width, rect.Y + rect.Height), Scalar.Green, 2); Cv2.PutText(originalImg, $"{scores[i]:0.00}", new Point((int)rect.X, (int)rect.Y - 10), HersheyFonts.HersheySimplex, 0.5, Scalar.Green, 1); } // 9. 显示 Cv2.ImShow("Result", originalImg); Cv2.WaitKey(); Cv2.DestroyAllWindows(); }
}
using OpenCvSharp; using Microsoft.ML.OnnxRuntime; using Microsoft.ML.OnnxRuntime.Tensors; using System; using System.Collections.Generic; using System.Linq;
class Program { static readonly int inputWidth = 640; static readonly int inputHeight = 640; static readonly float confThreshold = 0.25f; static readonly float iouThreshold = 0.45f;
static readonly InferenceSession session = new InferenceSession(@"D:\zou\wjzz\workDoc\screwsDetech\runs\detect\train\weights\best.onnx"); static void Main(string[] args) { string imgPath = @"D:\zou\wjzz\workDoc\screwsDetech\test_images\screw_keshang_bad_130.png"; Mat img = Cv2.ImRead(imgPath); Mat originalImg = img.Clone(); int h0 = img.Rows; int w0 = img.Cols; var inputTensor = Preprocess(img); var output = Inference(inputTensor); var (boxes, scores) = Postprocess(output, w0, h0); var keepIdx = NMS(boxes, scores, iouThreshold); foreach (int i in keepIdx) { var box = boxes[i]; Cv2.Rectangle(originalImg, new Point(box.X, box.Y), new Point(box.X + box.Width, box.Y + box.Height), Scalar.Green, 2); Cv2.PutText(originalImg, $"{scores[i]:0.00}", new Point((int)box.X, (int)box.Y - 10), HersheyFonts.HersheySimplex, 0.5, Scalar.Green, 1); } Cv2.ImShow("Result", originalImg); Cv2.WaitKey(); Cv2.DestroyAllWindows(); } static DenseTensorPreprocess(Mat img) { Mat resized = new Mat(); Cv2.Resize(img, resized, new Size(inputWidth, inputHeight)); Cv2.CvtColor(resized, resized, ColorConversionCodes.BGR2RGB); resized.ConvertTo(resized, MatType.CV_32FC3, 1.0 / 255); Mat[] channels = Cv2.Split(resized); float[] inputData = new float[3 * inputHeight * inputWidth]; for (int c = 0; c < 3; c++) { var indexer = channels[c].GetGenericIndexer (); for (int y = 0; y < inputHeight; y++) for (int x = 0; x < inputWidth; x++) inputData[c * inputHeight * inputWidth + y * inputWidth + x] = indexer[y, x]; } return new DenseTensor (inputData, new[] { 1, 3, inputHeight, inputWidth }); } static Tensor Inference(DenseTensor inputTensor) { var inputs = new List { NamedOnnxValue.CreateFromTensor(session.InputMetadata.Keys.First(), inputTensor) }; using var results = session.Run(inputs); return results.First().AsTensor (); } static (List , List ) Postprocess(Tensor output, int w0, int h0) { var dims = output.Dimensions; int numPreds = dims[2]; var boxes = new List (); var scores = new List (); for (int i = 0; i < numPreds; i++) { float obj = output[0, 4, i]; if (obj < confThreshold) continue; float cx = output[0, 0, i]; float cy = output[0, 1, i]; float w = output[0, 2, i]; float h = output[0, 3, i]; float x1 = (cx - w / 2) * w0 / inputWidth; float y1 = (cy - h / 2) * h0 / inputHeight; float x2 = (cx + w / 2) * w0 / inputWidth; float y2 = (cy + h / 2) * h0 / inputHeight; boxes.Add(new Rect2d(x1, y1, x2 - x1, y2 - y1)); scores.Add(obj); } return (boxes, scores); } static List NMS(List boxes, List scores, float iouThreshold) { var indices = scores .Select((score, idx) => new { score, idx }) .OrderByDescending(s => s.score) .Select(s => s.idx) .ToList(); var keep = new List (); var removed = new bool[boxes.Count]; for (int i = 0; i < indices.Count; i++) { int idx = indices[i]; if (removed[idx]) continue; keep.Add(idx); for (int j = i + 1; j < indices.Count; j++) { int idx2 = indices[j]; if (removed[idx2]) continue; if (ComputeIOU(boxes[idx], boxes[idx2]) > iouThreshold) removed[idx2] = true; } } return keep; } static double ComputeIOU(Rect2d box1, Rect2d box2) { double xx1 = Math.Max(box1.X, box2.X); double yy1 = Math.Max(box1.Y, box2.Y); double xx2 = Math.Min(box1.X + box1.Width, box2.X + box2.Width); double yy2 = Math.Min(box1.Y + box1.Height, box2.Y + box2.Height); double interArea = Math.Max(0, xx2 - xx1) * Math.Max(0, yy2 - yy1); double unionArea = box1.Width * box1.Height + box2.Width * box2.Height - interArea; return interArea / unionArea; }
}