玩转YOLOv10 多尺度多通道特征图

在yolov10-main/ultralytics/nn/autobackend.py的AutoBackend类的forward函数里可以查看及二次开发YOLOv10的特征图

在最下面的代码中

activate_state = 0
        if activate_state==1:
save_feature_map = 0
        if save_feature_map == 1  and torch.sum(im)>0:
big_feature_map = 0
        if big_feature_map == 1 and torch.sum(im)>0:

这三部分是我自己添加的

① activate_state置为1后可以保存特征图激活情况:

玩转YOLOv10 多尺度多通道特征图_第1张图片

② save_feature_map置为1后可以保存多尺度多通道的特征图:

玩转YOLOv10 多尺度多通道特征图_第2张图片

其中80*80:

玩转YOLOv10 多尺度多通道特征图_第3张图片

③ big_feature_map置为1后可以保存多尺度多通道特征图拼接的长图:

玩转YOLOv10 多尺度多通道特征图_第4张图片

使用注意事项: 

1.使用时最好只把某一个变量置为1,别全开开

2.路径改成自己电脑对应的路径

 YOLOv10 autobackend.py的AutoBackend类的forward:

    def forward(self, im, augment=False, visualize=False, embed=None):
        """
        Runs inference on the YOLOv8 MultiBackend model.

        Args:
            im (torch.Tensor): The image tensor to perform inference on.
            augment (bool): whether to perform data augmentation during inference, defaults to False
            visualize (bool): whether to visualize the output predictions, defaults to False
            embed (list, optional): A list of feature vectors/embeddings to return.

        Returns:
            (tuple): Tuple containing the raw output tensor, and processed output for visualization (if visualize=True)
        """
        b, ch, h, w = im.shape  # batch, channel, height, width
        if self.fp16 and im.dtype != torch.float16:
            im = im.half()  # to FP16
        if self.nhwc:
            im = im.permute(0, 2, 3, 1)  # torch BCHW to numpy BHWC shape(1,320,192,3)


        #不得不说Ultralytics的生态做的真好

        # print(f"self.pt: {self.pt}\nself.nn_module: {self.nn_module}\nself.jit: {self.jit}\n"
        #       f"self.dnn: {self.dnn}\nself.onnx: {self.onnx}\nself.xml: {self.xml}\n"
        #       f"self.engine: {self.engine}\nself.coreml: {self.coreml}\nself.paddle: {self.paddle}\n"
        #       f"self.ncnn: {self.ncnn}\nself.triton: {self.triton}")


        # PyTorch
        if self.pt or self.nn_module:
            y = self.model(im, augment=augment, visualize=visualize, embed=embed)

        # TorchScript
        elif self.jit:
            y = self.model(im)

        # ONNX OpenCV DNN
        elif self.dnn:
            im = im.cpu().numpy()  # torch to numpy
            self.net.setInput(im)
            y = self.net.forward()

        # ONNX Runtime
        elif self.onnx:
            im = im.cpu().numpy()  # torch to numpy
            y = self.session.run(self.output_names, {self.session.get_inputs()[0].name: im})

        # OpenVINO
        elif self.xml:
            im = im.cpu().numpy()  # FP32

            if self.inference_mode in {"THROUGHPUT", "CUMULATIVE_THROUGHPUT"}:  # optimized for larger batch-sizes
                n = im.shape[0]  # number of images in batch
                results = [None] * n  # preallocate list with None to match the number of images

                def callback(request, userdata):
                    """Places result in preallocated list using userdata index."""
                    results[userdata] = request.results

                # Create AsyncInferQueue, set the callback and start asynchronous inference for each input image
                async_queue = self.ov.runtime.AsyncInferQueue(self.ov_compiled_model)
                async_queue.set_callback(callback)
                for i in range(n):
                    # Start async inference with userdata=i to specify the position in results list
                    async_queue.start_async(inputs={self.input_name: im[i : i + 1]}, userdata=i)  # keep image as BCHW
                async_queue.wait_all()  # wait for all inference requests to complete
                y = np.concatenate([list(r.values())[0] for r in results])

            else:  # inference_mode = "LATENCY", optimized for fastest first result at batch-size 1
                y = list(self.ov_compiled_model(im).values())

        # TensorRT
        elif self.engine:
            if self.dynamic and im.shape != self.bindings["images"].shape:
                i = self.model.get_binding_index("images")
                self.context.set_binding_shape(i, im.shape)  # reshape if dynamic
                self.bindings["images"] = self.bindings["images"]._replace(shape=im.shape)
                for name in self.output_names:
                    i = self.model.get_binding_index(name)
                    self.bindings[name].data.resize_(tuple(self.context.get_binding_shape(i)))
            s = self.bindings["images"].shape
            assert im.shape == s, f"input size {im.shape} {'>' if self.dynamic else 'not equal to'} max model size {s}"
            self.binding_addrs["images"] = int(im.data_ptr())
            self.context.execute_v2(list(self.binding_addrs.values()))
            y = [self.bindings[x].data for x in sorted(self.output_names)]

        # CoreML
        #给Appl用的
        elif self.coreml:
            im = im[0].cpu().numpy()
            im_pil = Image.fromarray((im * 255).astype("uint8"))
            # im = im.resize((192, 320), Image.BILINEAR)
            y = self.model.predict({"image": im_pil})  # coordinates are xywh normalized
            if "confidence" in y:
                raise TypeError(
                    "Ultralytics only supports inference of non-pipelined CoreML models exported with "
                    f"'nms=False', but 'model={w}' has an NMS pipeline created by an 'nms=True' export."
                )
                # TODO: CoreML NMS inference handling
                # from ultralytics.utils.ops import xywh2xyxy
                # box = xywh2xyxy(y['coordinates'] * [[w, h, w, h]])  # xyxy pixels
                # conf, cls = y['confidence'].max(1), y['confidence'].argmax(1).astype(np.float32)
                # y = np.concatenate((box, conf.reshape(-1, 1), cls.reshape(-1, 1)), 1)
            elif len(y) == 1:  # classification model
                y = list(y.values())
            elif len(y) == 2:  # segmentation model
                y = list(reversed(y.values()))  # reversed for segmentation models (pred, proto)

        # PaddlePaddle
        elif self.paddle:
            im = im.cpu().numpy().astype(np.float32)
            self.input_handle.copy_from_cpu(im)
            self.predictor.run()
            y = [self.predictor.get_output_handle(x).copy_to_cpu() for x in self.output_names]

        # NCNN
        elif self.ncnn:
            mat_in = self.pyncnn.Mat(im[0].cpu().numpy())
            with self.net.create_extractor() as ex:
                ex.input(self.net.input_names()[0], mat_in)
                y = [np.array(ex.extract(x)[1])[None] for x in self.net.output_names()]

        # NVIDIA Triton Inference Server
        elif self.triton:
            im = im.cpu().numpy()  # torch to numpy
            y = self.model(im)

        # TensorFlow (SavedModel, GraphDef, Lite, Edge TPU)
        else:
            im = im.cpu().numpy()
            if self.saved_model:  # SavedModel
                y = self.model(im, training=False) if self.keras else self.model(im)
                if not isinstance(y, list):
                    y = [y]
            elif self.pb:  # GraphDef
                y = self.frozen_func(x=self.tf.constant(im))
                if len(y) == 2 and len(self.names) == 999:  # segments and names not defined
                    ip, ib = (0, 1) if len(y[0].shape) == 4 else (1, 0)  # index of protos, boxes
                    nc = y[ib].shape[1] - y[ip].shape[3] - 4  # y = (1, 160, 160, 32), (1, 116, 8400)
                    self.names = {i: f"class{i}" for i in range(nc)}
            else:  # Lite or Edge TPU
                details = self.input_details[0]
                integer = details["dtype"] in (np.int8, np.int16)  # is TFLite quantized int8 or int16 model
                if integer:
                    scale, zero_point = details["quantization"]
                    im = (im / scale + zero_point).astype(details["dtype"])  # de-scale
                self.interpreter.set_tensor(details["index"], im)
                self.interpreter.invoke()
                y = []
                for output in self.output_details:
                    x = self.interpreter.get_tensor(output["index"])
                    if integer:
                        scale, zero_point = output["quantization"]
                        x = (x.astype(np.float32) - zero_point) * scale  # re-scale
                    if x.ndim > 2:  # if task is not classification
                        # Denormalize xywh by image size. See https://github.com/ultralytics/ultralytics/pull/1695
                        # xywh are normalized in TFLite/EdgeTPU to mitigate quantization error of integer models
                        x[:, [0, 2]] *= w
                        x[:, [1, 3]] *= h
                    y.append(x)
            # TF segment fixes: export is reversed vs ONNX export and protos are transposed
            if len(y) == 2:  # segment with (det, proto) output order reversed
                if len(y[1].shape) != 4:
                    y = list(reversed(y))  # should be y = (1, 116, 8400), (1, 160, 160, 32)
                y[1] = np.transpose(y[1], (0, 3, 1, 2))  # should be y = (1, 116, 8400), (1, 32, 160, 160)
            y = [x if isinstance(x, np.ndarray) else x.numpy() for x in y]

        #查看激活情况
        activate_state = 0
        if activate_state==1:
            y_one2one = y['one2one']
            y_one2one_1_0 = y_one2one[1][0]
            y_one2one_1_1 = y_one2one[1][1]
            y_one2one_1_2 = y_one2one[1][2]

            mean_1_0 = y_one2one_1_0.mean(dim=[2, 3])
            mean_1_1 = y_one2one_1_1.mean(dim=[2, 3])
            mean_1_2 = y_one2one_1_2.mean(dim=[2, 3])# 在 H 和 W 维度上求平均,即对 80x80 特征图求平均

            # 由于 y_one2one_1_0 是 (1, 68, 80, 80),我们只关心通道维度上的平均值
            # 结果是一个形状为 (68,) 的张量
            mean_1_0 = mean_1_0.squeeze().tolist()  # 去掉单一维度并转换为列表
            mean_1_1 = mean_1_1.squeeze().tolist()
            mean_1_2 = mean_1_2.squeeze().tolist()

            data = {
                'mean_1_0': mean_1_0,
                'mean_1_1': mean_1_1,
                'mean_1_2': mean_1_2
            }

            # 创建一个 DataFrame
            df = pd.DataFrame(data)

            # 保存到 Excel 文件
            file_path = 'D:\实验室\论文\论文-多信号参数估计\JCR3\Principle\特征图\mean_values_train_basedon_yolov10n.xlsx'
            df.to_excel(file_path, index=False)
            print()



        #########  保存特征图  ###########
        # y['one2one'][1][0] 8080
        # y['one2one'][1][1] 4040
        # y['one2one'][1][2] 2020
        save_feature_map = 0
        if save_feature_map == 1  and torch.sum(im)>0:
            y_one2one_1_0 = y['one2one'][1][0]
            y_one2one_1_1 = y['one2one'][1][1]
            y_one2one_1_2 = y['one2one'][1][2]

            # 创建文件夹以保存图片
            save_dir_0 = r'D:\实验室\论文\论文-多信号参数估计\JCR3\Principle\特征图\80_80'
            save_dir_1 = r'D:\实验室\论文\论文-多信号参数估计\JCR3\Principle\特征图\40_40'
            save_dir_2 = r'D:\实验室\论文\论文-多信号参数估计\JCR3\Principle\特征图\20_20'

            # 获取特征图的通道数量
            num_channels = y_one2one_1_0.shape[1]

            # 遍历每个通道并保存特征图
            for i in range(num_channels):
                # 获取第 i 个通道的特征图(大小为 80x80)
                feature_map = y_one2one_1_0[0, i].cpu().detach().numpy()  # 转换为 NumPy 数组

                # 创建图像并保存
                plt.imshow(feature_map, cmap='viridis')  # 使用 viridis colormap
                plt.axis('off')  # 关闭坐标轴
                plt.savefig(os.path.join(save_dir_0, f'feature_map_{i}.png'), bbox_inches='tight', pad_inches=0)
                plt.close()  # 关闭当前图像,防止内存泄漏

                feature_map = y_one2one_1_1[0, i].cpu().detach().numpy()  # 转换为 NumPy 数组

                # 创建图像并保存
                plt.imshow(feature_map, cmap='viridis')  # 使用 viridis colormap
                plt.axis('off')  # 关闭坐标轴
                plt.savefig(os.path.join(save_dir_1, f'feature_map_{i}.png'), bbox_inches='tight', pad_inches=0)
                plt.close()  # 关闭当前图像,防止内存泄漏

                feature_map = y_one2one_1_2[0, i].cpu().detach().numpy()  # 转换为 NumPy 数组

                # 创建图像并保存
                plt.imshow(feature_map, cmap='viridis')  # 使用 viridis colormap
                plt.axis('off')  # 关闭坐标轴
                plt.savefig(os.path.join(save_dir_2, f'feature_map_{i}.png'), bbox_inches='tight', pad_inches=0)
                plt.close()  # 关闭当前图像,防止内存泄漏



        #########  保存特征图  ###########




        ###### 3种尺度的特征图保存在一起 #######
        big_feature_map = 0
        if big_feature_map == 1 and torch.sum(im)>0:

            y_one2one_1_0 = y['one2one'][1][0]
            y_one2one_1_1 = y['one2one'][1][1]
            y_one2one_1_2 = y['one2one'][1][2]


            first_row_activations = [y_one2one_1_0[0, i].mean().item() for i in range(68)]
            top_5_first_row_indices = np.argsort(first_row_activations)[-5:]  # 获取前 5 个最大激活值的索引
            first_row = [y_one2one_1_0[0, i].cpu().detach().numpy() for i in top_5_first_row_indices]

            # 选择 y_one2one_1_1 中激活最强烈的 10 张图
            second_row_activations = [y_one2one_1_1[0, i].mean().item() for i in range(68)]
            top_10_second_row_indices = np.argsort(second_row_activations)[-10:]  # 获取前 10 个最大激活值的索引
            second_row = [y_one2one_1_1[0, i].cpu().detach().numpy() for i in top_10_second_row_indices]

            # 选择 y_one2one_1_2 中激活最强烈的 20 张图
            third_row_activations = [y_one2one_1_2[0, i].mean().item() for i in range(68)]
            top_20_third_row_indices = np.argsort(third_row_activations)[-20:]  # 获取前 20 个最大激活值的索引
            third_row = [y_one2one_1_2[0, i].cpu().detach().numpy() for i in top_20_third_row_indices]



            def set_edge_to_white(image, edge_width=1):
                """
                设置图像的边缘像素为 255(白色)。
                `edge_width` 定义边缘的宽度,默认为 1 像素。
                """
                image[:edge_width, :] = 0  # 上边缘
                image[-edge_width:, :] = 0  # 下边缘
                image[:, :edge_width] = 0  # 左边缘
                image[:, -edge_width:] = 0  # 右边缘
                return image

            # 对每张图像进行边缘处理
            first_row_with_white_edges = [set_edge_to_white(image) for image in first_row]
            second_row_with_white_edges = [set_edge_to_white(image) for image in second_row]
            third_row_with_white_edges = [set_edge_to_white(image) for image in third_row]





            # # 拼接第一行,5张图,大小为 80x80
            first_row_image = np.concatenate(first_row, axis=1)  # 沿着宽度(axis=1)拼接

            # 拼接第二行,10张图,大小为 40x40
            second_row_image = np.concatenate(second_row, axis=1)  # 沿着宽度(axis=1)拼接

            # 拼接第三行,20张图,大小为 20x20
            third_row_image = np.concatenate(third_row, axis=1)  # 沿着宽度(axis=1)拼接

         
            # 将三行拼接成一个大图,沿着高度(axis=0)拼接
            merged_image = np.concatenate([first_row_image, second_row_image, third_row_image], axis=0)

            # 绘制并保存合成图
            # plt.figure(figsize=(4, 16))  # 设置图像大小
            plt.figure()  # 设置图像大小
            plt.imshow(merged_image, cmap='viridis')
            plt.axis('off')  # 关闭坐标轴
            save_dir = r'D:\实验室\论文\论文-多信号参数估计\JCR3\Principle\特征图\6.png'
            plt.savefig(save_dir, bbox_inches='tight', pad_inches=0, dpi=300)
            plt.show()



            print("Merged image saved as 'merged_feature_maps.png'")

        ###### 3种尺度的特征图保存在一起 #######



        if isinstance(y, (list, tuple)):
            return self.from_numpy(y[0]) if len(y) == 1 else [self.from_numpy(x) for x in y]
        else:
            return self.from_numpy(y)

你可能感兴趣的:(YOLO,深度学习,人工智能,python,numpy,计算机视觉,目标检测)