在yolov10-main/ultralytics/nn/autobackend.py的AutoBackend类的forward函数里可以查看及二次开发YOLOv10的特征图
在最下面的代码中
activate_state = 0
if activate_state==1:
save_feature_map = 0
if save_feature_map == 1 and torch.sum(im)>0:
big_feature_map = 0
if big_feature_map == 1 and torch.sum(im)>0:
这三部分是我自己添加的
① activate_state置为1后可以保存特征图激活情况:
② save_feature_map置为1后可以保存多尺度多通道的特征图:
其中80*80:
③ big_feature_map置为1后可以保存多尺度多通道特征图拼接的长图:
使用注意事项:
1.使用时最好只把某一个变量置为1,别全开开
2.路径改成自己电脑对应的路径
YOLOv10 autobackend.py的AutoBackend类的forward:
def forward(self, im, augment=False, visualize=False, embed=None):
"""
Runs inference on the YOLOv8 MultiBackend model.
Args:
im (torch.Tensor): The image tensor to perform inference on.
augment (bool): whether to perform data augmentation during inference, defaults to False
visualize (bool): whether to visualize the output predictions, defaults to False
embed (list, optional): A list of feature vectors/embeddings to return.
Returns:
(tuple): Tuple containing the raw output tensor, and processed output for visualization (if visualize=True)
"""
b, ch, h, w = im.shape # batch, channel, height, width
if self.fp16 and im.dtype != torch.float16:
im = im.half() # to FP16
if self.nhwc:
im = im.permute(0, 2, 3, 1) # torch BCHW to numpy BHWC shape(1,320,192,3)
#不得不说Ultralytics的生态做的真好
# print(f"self.pt: {self.pt}\nself.nn_module: {self.nn_module}\nself.jit: {self.jit}\n"
# f"self.dnn: {self.dnn}\nself.onnx: {self.onnx}\nself.xml: {self.xml}\n"
# f"self.engine: {self.engine}\nself.coreml: {self.coreml}\nself.paddle: {self.paddle}\n"
# f"self.ncnn: {self.ncnn}\nself.triton: {self.triton}")
# PyTorch
if self.pt or self.nn_module:
y = self.model(im, augment=augment, visualize=visualize, embed=embed)
# TorchScript
elif self.jit:
y = self.model(im)
# ONNX OpenCV DNN
elif self.dnn:
im = im.cpu().numpy() # torch to numpy
self.net.setInput(im)
y = self.net.forward()
# ONNX Runtime
elif self.onnx:
im = im.cpu().numpy() # torch to numpy
y = self.session.run(self.output_names, {self.session.get_inputs()[0].name: im})
# OpenVINO
elif self.xml:
im = im.cpu().numpy() # FP32
if self.inference_mode in {"THROUGHPUT", "CUMULATIVE_THROUGHPUT"}: # optimized for larger batch-sizes
n = im.shape[0] # number of images in batch
results = [None] * n # preallocate list with None to match the number of images
def callback(request, userdata):
"""Places result in preallocated list using userdata index."""
results[userdata] = request.results
# Create AsyncInferQueue, set the callback and start asynchronous inference for each input image
async_queue = self.ov.runtime.AsyncInferQueue(self.ov_compiled_model)
async_queue.set_callback(callback)
for i in range(n):
# Start async inference with userdata=i to specify the position in results list
async_queue.start_async(inputs={self.input_name: im[i : i + 1]}, userdata=i) # keep image as BCHW
async_queue.wait_all() # wait for all inference requests to complete
y = np.concatenate([list(r.values())[0] for r in results])
else: # inference_mode = "LATENCY", optimized for fastest first result at batch-size 1
y = list(self.ov_compiled_model(im).values())
# TensorRT
elif self.engine:
if self.dynamic and im.shape != self.bindings["images"].shape:
i = self.model.get_binding_index("images")
self.context.set_binding_shape(i, im.shape) # reshape if dynamic
self.bindings["images"] = self.bindings["images"]._replace(shape=im.shape)
for name in self.output_names:
i = self.model.get_binding_index(name)
self.bindings[name].data.resize_(tuple(self.context.get_binding_shape(i)))
s = self.bindings["images"].shape
assert im.shape == s, f"input size {im.shape} {'>' if self.dynamic else 'not equal to'} max model size {s}"
self.binding_addrs["images"] = int(im.data_ptr())
self.context.execute_v2(list(self.binding_addrs.values()))
y = [self.bindings[x].data for x in sorted(self.output_names)]
# CoreML
#给Appl用的
elif self.coreml:
im = im[0].cpu().numpy()
im_pil = Image.fromarray((im * 255).astype("uint8"))
# im = im.resize((192, 320), Image.BILINEAR)
y = self.model.predict({"image": im_pil}) # coordinates are xywh normalized
if "confidence" in y:
raise TypeError(
"Ultralytics only supports inference of non-pipelined CoreML models exported with "
f"'nms=False', but 'model={w}' has an NMS pipeline created by an 'nms=True' export."
)
# TODO: CoreML NMS inference handling
# from ultralytics.utils.ops import xywh2xyxy
# box = xywh2xyxy(y['coordinates'] * [[w, h, w, h]]) # xyxy pixels
# conf, cls = y['confidence'].max(1), y['confidence'].argmax(1).astype(np.float32)
# y = np.concatenate((box, conf.reshape(-1, 1), cls.reshape(-1, 1)), 1)
elif len(y) == 1: # classification model
y = list(y.values())
elif len(y) == 2: # segmentation model
y = list(reversed(y.values())) # reversed for segmentation models (pred, proto)
# PaddlePaddle
elif self.paddle:
im = im.cpu().numpy().astype(np.float32)
self.input_handle.copy_from_cpu(im)
self.predictor.run()
y = [self.predictor.get_output_handle(x).copy_to_cpu() for x in self.output_names]
# NCNN
elif self.ncnn:
mat_in = self.pyncnn.Mat(im[0].cpu().numpy())
with self.net.create_extractor() as ex:
ex.input(self.net.input_names()[0], mat_in)
y = [np.array(ex.extract(x)[1])[None] for x in self.net.output_names()]
# NVIDIA Triton Inference Server
elif self.triton:
im = im.cpu().numpy() # torch to numpy
y = self.model(im)
# TensorFlow (SavedModel, GraphDef, Lite, Edge TPU)
else:
im = im.cpu().numpy()
if self.saved_model: # SavedModel
y = self.model(im, training=False) if self.keras else self.model(im)
if not isinstance(y, list):
y = [y]
elif self.pb: # GraphDef
y = self.frozen_func(x=self.tf.constant(im))
if len(y) == 2 and len(self.names) == 999: # segments and names not defined
ip, ib = (0, 1) if len(y[0].shape) == 4 else (1, 0) # index of protos, boxes
nc = y[ib].shape[1] - y[ip].shape[3] - 4 # y = (1, 160, 160, 32), (1, 116, 8400)
self.names = {i: f"class{i}" for i in range(nc)}
else: # Lite or Edge TPU
details = self.input_details[0]
integer = details["dtype"] in (np.int8, np.int16) # is TFLite quantized int8 or int16 model
if integer:
scale, zero_point = details["quantization"]
im = (im / scale + zero_point).astype(details["dtype"]) # de-scale
self.interpreter.set_tensor(details["index"], im)
self.interpreter.invoke()
y = []
for output in self.output_details:
x = self.interpreter.get_tensor(output["index"])
if integer:
scale, zero_point = output["quantization"]
x = (x.astype(np.float32) - zero_point) * scale # re-scale
if x.ndim > 2: # if task is not classification
# Denormalize xywh by image size. See https://github.com/ultralytics/ultralytics/pull/1695
# xywh are normalized in TFLite/EdgeTPU to mitigate quantization error of integer models
x[:, [0, 2]] *= w
x[:, [1, 3]] *= h
y.append(x)
# TF segment fixes: export is reversed vs ONNX export and protos are transposed
if len(y) == 2: # segment with (det, proto) output order reversed
if len(y[1].shape) != 4:
y = list(reversed(y)) # should be y = (1, 116, 8400), (1, 160, 160, 32)
y[1] = np.transpose(y[1], (0, 3, 1, 2)) # should be y = (1, 116, 8400), (1, 32, 160, 160)
y = [x if isinstance(x, np.ndarray) else x.numpy() for x in y]
#查看激活情况
activate_state = 0
if activate_state==1:
y_one2one = y['one2one']
y_one2one_1_0 = y_one2one[1][0]
y_one2one_1_1 = y_one2one[1][1]
y_one2one_1_2 = y_one2one[1][2]
mean_1_0 = y_one2one_1_0.mean(dim=[2, 3])
mean_1_1 = y_one2one_1_1.mean(dim=[2, 3])
mean_1_2 = y_one2one_1_2.mean(dim=[2, 3])# 在 H 和 W 维度上求平均,即对 80x80 特征图求平均
# 由于 y_one2one_1_0 是 (1, 68, 80, 80),我们只关心通道维度上的平均值
# 结果是一个形状为 (68,) 的张量
mean_1_0 = mean_1_0.squeeze().tolist() # 去掉单一维度并转换为列表
mean_1_1 = mean_1_1.squeeze().tolist()
mean_1_2 = mean_1_2.squeeze().tolist()
data = {
'mean_1_0': mean_1_0,
'mean_1_1': mean_1_1,
'mean_1_2': mean_1_2
}
# 创建一个 DataFrame
df = pd.DataFrame(data)
# 保存到 Excel 文件
file_path = 'D:\实验室\论文\论文-多信号参数估计\JCR3\Principle\特征图\mean_values_train_basedon_yolov10n.xlsx'
df.to_excel(file_path, index=False)
print()
######### 保存特征图 ###########
# y['one2one'][1][0] 8080
# y['one2one'][1][1] 4040
# y['one2one'][1][2] 2020
save_feature_map = 0
if save_feature_map == 1 and torch.sum(im)>0:
y_one2one_1_0 = y['one2one'][1][0]
y_one2one_1_1 = y['one2one'][1][1]
y_one2one_1_2 = y['one2one'][1][2]
# 创建文件夹以保存图片
save_dir_0 = r'D:\实验室\论文\论文-多信号参数估计\JCR3\Principle\特征图\80_80'
save_dir_1 = r'D:\实验室\论文\论文-多信号参数估计\JCR3\Principle\特征图\40_40'
save_dir_2 = r'D:\实验室\论文\论文-多信号参数估计\JCR3\Principle\特征图\20_20'
# 获取特征图的通道数量
num_channels = y_one2one_1_0.shape[1]
# 遍历每个通道并保存特征图
for i in range(num_channels):
# 获取第 i 个通道的特征图(大小为 80x80)
feature_map = y_one2one_1_0[0, i].cpu().detach().numpy() # 转换为 NumPy 数组
# 创建图像并保存
plt.imshow(feature_map, cmap='viridis') # 使用 viridis colormap
plt.axis('off') # 关闭坐标轴
plt.savefig(os.path.join(save_dir_0, f'feature_map_{i}.png'), bbox_inches='tight', pad_inches=0)
plt.close() # 关闭当前图像,防止内存泄漏
feature_map = y_one2one_1_1[0, i].cpu().detach().numpy() # 转换为 NumPy 数组
# 创建图像并保存
plt.imshow(feature_map, cmap='viridis') # 使用 viridis colormap
plt.axis('off') # 关闭坐标轴
plt.savefig(os.path.join(save_dir_1, f'feature_map_{i}.png'), bbox_inches='tight', pad_inches=0)
plt.close() # 关闭当前图像,防止内存泄漏
feature_map = y_one2one_1_2[0, i].cpu().detach().numpy() # 转换为 NumPy 数组
# 创建图像并保存
plt.imshow(feature_map, cmap='viridis') # 使用 viridis colormap
plt.axis('off') # 关闭坐标轴
plt.savefig(os.path.join(save_dir_2, f'feature_map_{i}.png'), bbox_inches='tight', pad_inches=0)
plt.close() # 关闭当前图像,防止内存泄漏
######### 保存特征图 ###########
###### 3种尺度的特征图保存在一起 #######
big_feature_map = 0
if big_feature_map == 1 and torch.sum(im)>0:
y_one2one_1_0 = y['one2one'][1][0]
y_one2one_1_1 = y['one2one'][1][1]
y_one2one_1_2 = y['one2one'][1][2]
first_row_activations = [y_one2one_1_0[0, i].mean().item() for i in range(68)]
top_5_first_row_indices = np.argsort(first_row_activations)[-5:] # 获取前 5 个最大激活值的索引
first_row = [y_one2one_1_0[0, i].cpu().detach().numpy() for i in top_5_first_row_indices]
# 选择 y_one2one_1_1 中激活最强烈的 10 张图
second_row_activations = [y_one2one_1_1[0, i].mean().item() for i in range(68)]
top_10_second_row_indices = np.argsort(second_row_activations)[-10:] # 获取前 10 个最大激活值的索引
second_row = [y_one2one_1_1[0, i].cpu().detach().numpy() for i in top_10_second_row_indices]
# 选择 y_one2one_1_2 中激活最强烈的 20 张图
third_row_activations = [y_one2one_1_2[0, i].mean().item() for i in range(68)]
top_20_third_row_indices = np.argsort(third_row_activations)[-20:] # 获取前 20 个最大激活值的索引
third_row = [y_one2one_1_2[0, i].cpu().detach().numpy() for i in top_20_third_row_indices]
def set_edge_to_white(image, edge_width=1):
"""
设置图像的边缘像素为 255(白色)。
`edge_width` 定义边缘的宽度,默认为 1 像素。
"""
image[:edge_width, :] = 0 # 上边缘
image[-edge_width:, :] = 0 # 下边缘
image[:, :edge_width] = 0 # 左边缘
image[:, -edge_width:] = 0 # 右边缘
return image
# 对每张图像进行边缘处理
first_row_with_white_edges = [set_edge_to_white(image) for image in first_row]
second_row_with_white_edges = [set_edge_to_white(image) for image in second_row]
third_row_with_white_edges = [set_edge_to_white(image) for image in third_row]
# # 拼接第一行,5张图,大小为 80x80
first_row_image = np.concatenate(first_row, axis=1) # 沿着宽度(axis=1)拼接
# 拼接第二行,10张图,大小为 40x40
second_row_image = np.concatenate(second_row, axis=1) # 沿着宽度(axis=1)拼接
# 拼接第三行,20张图,大小为 20x20
third_row_image = np.concatenate(third_row, axis=1) # 沿着宽度(axis=1)拼接
# 将三行拼接成一个大图,沿着高度(axis=0)拼接
merged_image = np.concatenate([first_row_image, second_row_image, third_row_image], axis=0)
# 绘制并保存合成图
# plt.figure(figsize=(4, 16)) # 设置图像大小
plt.figure() # 设置图像大小
plt.imshow(merged_image, cmap='viridis')
plt.axis('off') # 关闭坐标轴
save_dir = r'D:\实验室\论文\论文-多信号参数估计\JCR3\Principle\特征图\6.png'
plt.savefig(save_dir, bbox_inches='tight', pad_inches=0, dpi=300)
plt.show()
print("Merged image saved as 'merged_feature_maps.png'")
###### 3种尺度的特征图保存在一起 #######
if isinstance(y, (list, tuple)):
return self.from_numpy(y[0]) if len(y) == 1 else [self.from_numpy(x) for x in y]
else:
return self.from_numpy(y)