本文介绍的两个代码为可视化目标检测数据集提供解决方案,将样本标签在原图上画框展示,YOLO工具适用于现代检测框架如YOLOv5-v11,而VOC工具兼容传统的PASCAL VOC数据集。完整代码见下方复制粘贴或者网盘下载。
通过网盘分享的文件:目标检测数据集画框可视化.zip
链接: https://pan.baidu.com/s/1D91zOFwxggZaLaOHoZYTJw?pwd=sbcy 提取码: sbcy
主要参数 | 参数用途 | 默认值 |
---|---|---|
images-dir | 输入图像文件夹 | images文件夹 |
labels-dir | 输入标签文件夹 | labels文件夹 |
output-dir | 输出可视化结果文件夹 | output文件夹 |
class-names | 数字转化为文字标签 | 无 |
conf-threshold | 置信度阈值 | 0.0 |
font-scale | 字体大小 | 0.5 |
thickness | 线宽 | 2 |
no-label | 不显示标签 | False |
data/
├── images/
│ ├── train/
│ │ ├── image1.jpg
│ │ └── image2.jpg
│ └── val/
│ └── image3.jpg
└── labels/
├── train/
│ ├── image1.txt
│ └── image2.txt
└── val/
└── image3.txt
import cv2
import os
import argparse
import glob
import numpy as np
from pathlib import Path
from tqdm import tqdm
def parse_arguments():
parser = argparse.ArgumentParser(description='Visualize YOLO format bounding boxes on images')
parser.add_argument('--images-dir', type=str, default='images',
help='Directory containing input images')
parser.add_argument('--labels-dir', type=str, default='labels',
help='Directory containing YOLO format label files')
parser.add_argument('--output-dir', type=str, default='output',
help='Output directory for visualized images')
parser.add_argument('--class-names', type=str, default=None,
help='Path to class names file (one class per line)')
parser.add_argument('--conf-threshold', type=float, default=0.0,
help='Confidence threshold for displaying boxes (default: 0.0)')
parser.add_argument('--font-scale', type=float, default=0.5,
help='Font scale for labels (default: 0.5)')
parser.add_argument('--thickness', type=int, default=2,
help='Line thickness for boxes (default: 2)')
parser.add_argument('--no-label', action='store_true',
help='Do not display class labels')
return parser.parse_args()
def load_class_names(file_path):
"""Load class names from a file"""
if file_path and os.path.exists(file_path):
with open(file_path, 'r') as f:
class_names = [line.strip() for line in f.readlines()]
return class_names
return None
def visualize_yolo_labels(image_path, label_path, output_path, class_names=None,
conf_threshold=0.0, font_scale=0.5, thickness=2, show_label=True):
"""Draw YOLO format bounding boxes on an image"""
# Read the image
img = cv2.imread(image_path)
if img is None:
print(f"Error: Could not read image {image_path}")
return False
img_height, img_width = img.shape[:2]
# Read the label file
if not os.path.exists(label_path):
print(f"Warning: Label file {label_path} not found")
return False
boxes = []
with open(label_path, 'r') as f:
for line in f:
parts = line.strip().split()
if len(parts) < 5:
continue
# Parse YOLO format: class_id, center_x, center_y, width, height, [confidence]
class_id = int(parts[0])
center_x = float(parts[1]) * img_width
center_y = float(parts[2]) * img_height
width = float(parts[3]) * img_width
height = float(parts[4]) * img_height
confidence = float(parts[5]) if len(parts) >= 6 else 1.0
# Skip if below confidence threshold
if confidence < conf_threshold:
continue
# Calculate coordinates
xmin = int(center_x - width / 2)
ymin = int(center_y - height / 2)
xmax = int(center_x + width / 2)
ymax = int(center_y + height / 2)
# Clip coordinates to image boundaries
xmin = max(0, min(xmin, img_width - 1))
ymin = max(0, min(ymin, img_height - 1))
xmax = max(0, min(xmax, img_width - 1))
ymax = max(0, min(ymax, img_height - 1))
boxes.append((xmin, ymin, xmax, ymax, class_id, confidence))
# Draw boxes on the image
for box in boxes:
xmin, ymin, xmax, ymax, class_id, confidence = box
# Generate random but consistent color based on class ID
np.random.seed(class_id)
color = tuple(map(int, np.random.randint(0, 255, 3)))
# Draw bounding box
cv2.rectangle(img, (xmin, ymin), (xmax, ymax), color, thickness)
if show_label:
# Get class name
class_name = class_names[class_id] if class_names and class_id < len(class_names) else str(class_id)
# Format label text
label = f"{class_name} {confidence:.2f}" if confidence < 1.0 else class_name
# Calculate text size and position
(text_width, text_height), baseline = cv2.getTextSize(
label, cv2.FONT_HERSHEY_SIMPLEX, font_scale, thickness
)
# Draw label background
cv2.rectangle(img,
(xmin, ymin - text_height - baseline - 5),
(xmin + text_width, ymin),
color, -1)
# Draw label text
cv2.putText(img, label,
(xmin, ymin - baseline - 5),
cv2.FONT_HERSHEY_SIMPLEX, font_scale,
(255, 255, 255), thickness)
# Create output directory if needed
os.makedirs(os.path.dirname(output_path), exist_ok=True)
# Save the result
cv2.imwrite(output_path, img)
return True
def main():
args = parse_arguments()
# Load class names if provided
class_names = load_class_names(args.class_names)
# Find all label files
label_files = glob.glob(os.path.join(args.labels_dir, '**', '*.txt'), recursive=True)
print(f"Found {len(label_files)} label files")
print(f"Images directory: {args.images_dir}")
print(f"Labels directory: {args.labels_dir}")
print(f"Output directory: {args.output_dir}")
if class_names:
print(f"Loaded {len(class_names)} class names")
else:
print("No class names provided - using class IDs")
processed_count = 0
skipped_count = 0
# Process each label file
for label_path in tqdm(label_files, desc="Processing images"):
# Get relative path
rel_path = os.path.relpath(label_path, args.labels_dir)
# Determine corresponding image path
image_rel_path = os.path.splitext(rel_path)[0]
possible_extensions = ['.jpg', '.jpeg', '.png', '.bmp', '.tiff', '.webp']
image_found = False
for ext in possible_extensions:
image_path = os.path.join(args.images_dir, image_rel_path + ext)
if os.path.exists(image_path):
image_found = True
break
if not image_found:
# Try without changing the path
if os.path.exists(os.path.join(args.images_dir, rel_path)):
image_path = os.path.join(args.images_dir, rel_path)
image_found = True
else:
# Try to find any image with the same base name
dir_path = os.path.dirname(os.path.join(args.images_dir, image_rel_path))
base_name = os.path.basename(image_rel_path)
if os.path.exists(dir_path):
for f in os.listdir(dir_path):
if f.startswith(base_name):
for ext in possible_extensions:
if f.lower().endswith(ext):
image_path = os.path.join(dir_path, f)
image_found = True
break
if image_found:
break
if not image_found:
tqdm.write(f"Warning: Could not find image for {rel_path}")
skipped_count += 1
continue
# Determine output path
output_path = os.path.join(args.output_dir, image_rel_path + '_visualized.jpg')
# Visualize the labels
success = visualize_yolo_labels(
image_path=image_path,
label_path=label_path,
output_path=output_path,
class_names=class_names,
conf_threshold=args.conf_threshold,
font_scale=args.font_scale,
thickness=args.thickness,
show_label=not args.no_label
)
if success:
processed_count += 1
else:
skipped_count += 1
print(f"\nProcessing complete!")
print(f"Processed: {processed_count} images")
print(f"Skipped: {skipped_count} images")
print(f"Output saved to: {args.output_dir}")
if __name__ == "__main__":
main()
project/
├── images/
│ ├── image1.jpg
│ └── image2.jpg
├── labels/
│ ├── image1.xml
│ └── image2.xml
└── visualize_voc.py
import cv2
import xml.etree.ElementTree as ET
import os
# 配置路径
image_dir = 'images' # 图像文件夹
label_dir = 'labels' # 标签文件夹
output_dir = 'output' # 输出结果文件夹
os.makedirs(output_dir, exist_ok=True) # 创建输出目录
# 解析XML并绘制边界框
def draw_bboxes(image_path, xml_path, output_path):
# 读取图像
image = cv2.imread(image_path)
if image is None:
print(f"错误:无法读取图像 {image_path}")
return
# 解析XML
tree = ET.parse(xml_path)
root = tree.getroot()
# 遍历所有目标对象
for obj in root.findall('object'):
name = obj.find('name').text
bbox = obj.find('bndbox')
xmin = int(float(bbox.find('xmin').text))
ymin = int(float(bbox.find('ymin').text))
xmax = int(float(bbox.find('xmax').text))
ymax = int(float(bbox.find('ymax').text))
# 在图像上绘制矩形
color = (0, 255, 0) # 绿色 (BGR格式)
thickness = 2
cv2.rectangle(image, (xmin, ymin), (xmax, ymax), color, thickness)
# 添加标签文本
label = f"{name}"
cv2.putText(image, label, (xmin, ymin - 10),
cv2.FONT_HERSHEY_SIMPLEX, 0.7, color, thickness)
# 保存结果
cv2.imwrite(output_path, image)
print(f"已保存结果到: {output_path}")
# 在draw_bboxes函数后添加:
def process_all_images():
for xml_file in os.listdir(label_dir):
if xml_file.endswith('.xml'):
image_file = xml_file.replace('.xml', '.jpg')
xml_path = os.path.join(label_dir, xml_file)
image_path = os.path.join(image_dir, image_file)
output_path = os.path.join(output_dir, f"annotated_{image_file}")
if os.path.exists(image_path):
draw_bboxes(image_path, xml_path, output_path)
else:
print(f"警告:找不到对应图像 {image_file}")
# 调用批量处理
process_all_images()