Yolo/VOC样本在原图上画框代码及样本示例

文章目录

  • YOLO格式可视化
    • 参数说明
    • 目录结构
    • 完整代码
  • VOC格式可视化
    • 目录结构
    • 完整代码

本文介绍的两个代码为可视化目标检测数据集提供解决方案,将样本标签在原图上画框展示,YOLO工具适用于现代检测框架如YOLOv5-v11,而VOC工具兼容传统的PASCAL VOC数据集。完整代码见下方复制粘贴或者网盘下载。

通过网盘分享的文件:目标检测数据集画框可视化.zip
链接: https://pan.baidu.com/s/1D91zOFwxggZaLaOHoZYTJw?pwd=sbcy 提取码: sbcy

YOLO格式可视化

参数说明

主要参数 参数用途 默认值
images-dir 输入图像文件夹 images文件夹
labels-dir 输入标签文件夹 labels文件夹
output-dir 输出可视化结果文件夹 output文件夹
class-names 数字转化为文字标签
conf-threshold 置信度阈值 0.0
font-scale 字体大小 0.5
thickness 线宽 2
no-label 不显示标签 False

目录结构

data/
├── images/
│   ├── train/
│   │   ├── image1.jpg
│   │   └── image2.jpg
│   └── val/
│       └── image3.jpg
└── labels/
    ├── train/
    │   ├── image1.txt
    │   └── image2.txt
    └── val/
        └── image3.txt

完整代码

import cv2
import os
import argparse
import glob
import numpy as np
from pathlib import Path
from tqdm import tqdm


def parse_arguments():
    parser = argparse.ArgumentParser(description='Visualize YOLO format bounding boxes on images')
    parser.add_argument('--images-dir', type=str, default='images',
                        help='Directory containing input images')
    parser.add_argument('--labels-dir', type=str, default='labels',
                        help='Directory containing YOLO format label files')
    parser.add_argument('--output-dir', type=str, default='output',
                        help='Output directory for visualized images')
    parser.add_argument('--class-names', type=str, default=None,
                        help='Path to class names file (one class per line)')
    parser.add_argument('--conf-threshold', type=float, default=0.0,
                        help='Confidence threshold for displaying boxes (default: 0.0)')
    parser.add_argument('--font-scale', type=float, default=0.5,
                        help='Font scale for labels (default: 0.5)')
    parser.add_argument('--thickness', type=int, default=2,
                        help='Line thickness for boxes (default: 2)')
    parser.add_argument('--no-label', action='store_true',
                        help='Do not display class labels')
    return parser.parse_args()


def load_class_names(file_path):
    """Load class names from a file"""
    if file_path and os.path.exists(file_path):
        with open(file_path, 'r') as f:
            class_names = [line.strip() for line in f.readlines()]
        return class_names
    return None


def visualize_yolo_labels(image_path, label_path, output_path, class_names=None,
                          conf_threshold=0.0, font_scale=0.5, thickness=2, show_label=True):
    """Draw YOLO format bounding boxes on an image"""
    # Read the image
    img = cv2.imread(image_path)
    if img is None:
        print(f"Error: Could not read image {image_path}")
        return False

    img_height, img_width = img.shape[:2]

    # Read the label file
    if not os.path.exists(label_path):
        print(f"Warning: Label file {label_path} not found")
        return False

    boxes = []
    with open(label_path, 'r') as f:
        for line in f:
            parts = line.strip().split()
            if len(parts) < 5:
                continue

            # Parse YOLO format: class_id, center_x, center_y, width, height, [confidence]
            class_id = int(parts[0])
            center_x = float(parts[1]) * img_width
            center_y = float(parts[2]) * img_height
            width = float(parts[3]) * img_width
            height = float(parts[4]) * img_height
            confidence = float(parts[5]) if len(parts) >= 6 else 1.0

            # Skip if below confidence threshold
            if confidence < conf_threshold:
                continue

            # Calculate coordinates
            xmin = int(center_x - width / 2)
            ymin = int(center_y - height / 2)
            xmax = int(center_x + width / 2)
            ymax = int(center_y + height / 2)

            # Clip coordinates to image boundaries
            xmin = max(0, min(xmin, img_width - 1))
            ymin = max(0, min(ymin, img_height - 1))
            xmax = max(0, min(xmax, img_width - 1))
            ymax = max(0, min(ymax, img_height - 1))

            boxes.append((xmin, ymin, xmax, ymax, class_id, confidence))

    # Draw boxes on the image
    for box in boxes:
        xmin, ymin, xmax, ymax, class_id, confidence = box

        # Generate random but consistent color based on class ID
        np.random.seed(class_id)
        color = tuple(map(int, np.random.randint(0, 255, 3)))

        # Draw bounding box
        cv2.rectangle(img, (xmin, ymin), (xmax, ymax), color, thickness)

        if show_label:
            # Get class name
            class_name = class_names[class_id] if class_names and class_id < len(class_names) else str(class_id)

            # Format label text
            label = f"{class_name} {confidence:.2f}" if confidence < 1.0 else class_name

            # Calculate text size and position
            (text_width, text_height), baseline = cv2.getTextSize(
                label, cv2.FONT_HERSHEY_SIMPLEX, font_scale, thickness
            )

            # Draw label background
            cv2.rectangle(img,
                          (xmin, ymin - text_height - baseline - 5),
                          (xmin + text_width, ymin),
                          color, -1)

            # Draw label text
            cv2.putText(img, label,
                        (xmin, ymin - baseline - 5),
                        cv2.FONT_HERSHEY_SIMPLEX, font_scale,
                        (255, 255, 255), thickness)

    # Create output directory if needed
    os.makedirs(os.path.dirname(output_path), exist_ok=True)

    # Save the result
    cv2.imwrite(output_path, img)
    return True


def main():
    args = parse_arguments()

    # Load class names if provided
    class_names = load_class_names(args.class_names)

    # Find all label files
    label_files = glob.glob(os.path.join(args.labels_dir, '**', '*.txt'), recursive=True)

    print(f"Found {len(label_files)} label files")
    print(f"Images directory: {args.images_dir}")
    print(f"Labels directory: {args.labels_dir}")
    print(f"Output directory: {args.output_dir}")
    if class_names:
        print(f"Loaded {len(class_names)} class names")
    else:
        print("No class names provided - using class IDs")

    processed_count = 0
    skipped_count = 0

    # Process each label file
    for label_path in tqdm(label_files, desc="Processing images"):
        # Get relative path
        rel_path = os.path.relpath(label_path, args.labels_dir)

        # Determine corresponding image path
        image_rel_path = os.path.splitext(rel_path)[0]
        possible_extensions = ['.jpg', '.jpeg', '.png', '.bmp', '.tiff', '.webp']

        image_found = False
        for ext in possible_extensions:
            image_path = os.path.join(args.images_dir, image_rel_path + ext)
            if os.path.exists(image_path):
                image_found = True
                break

        if not image_found:
            # Try without changing the path
            if os.path.exists(os.path.join(args.images_dir, rel_path)):
                image_path = os.path.join(args.images_dir, rel_path)
                image_found = True
            else:
                # Try to find any image with the same base name
                dir_path = os.path.dirname(os.path.join(args.images_dir, image_rel_path))
                base_name = os.path.basename(image_rel_path)
                if os.path.exists(dir_path):
                    for f in os.listdir(dir_path):
                        if f.startswith(base_name):
                            for ext in possible_extensions:
                                if f.lower().endswith(ext):
                                    image_path = os.path.join(dir_path, f)
                                    image_found = True
                                    break
                            if image_found:
                                break

        if not image_found:
            tqdm.write(f"Warning: Could not find image for {rel_path}")
            skipped_count += 1
            continue

        # Determine output path
        output_path = os.path.join(args.output_dir, image_rel_path + '_visualized.jpg')

        # Visualize the labels
        success = visualize_yolo_labels(
            image_path=image_path,
            label_path=label_path,
            output_path=output_path,
            class_names=class_names,
            conf_threshold=args.conf_threshold,
            font_scale=args.font_scale,
            thickness=args.thickness,
            show_label=not args.no_label
        )

        if success:
            processed_count += 1
        else:
            skipped_count += 1

    print(f"\nProcessing complete!")
    print(f"Processed: {processed_count} images")
    print(f"Skipped: {skipped_count} images")
    print(f"Output saved to: {args.output_dir}")


if __name__ == "__main__":
    main()

VOC格式可视化

目录结构

project/
├── images/
│   ├── image1.jpg
│   └── image2.jpg
├── labels/
│   ├── image1.xml
│   └── image2.xml
└── visualize_voc.py

完整代码

import cv2
import xml.etree.ElementTree as ET
import os

# 配置路径
image_dir = 'images'  # 图像文件夹
label_dir = 'labels'  # 标签文件夹
output_dir = 'output'  # 输出结果文件夹
os.makedirs(output_dir, exist_ok=True)  # 创建输出目录


# 解析XML并绘制边界框
def draw_bboxes(image_path, xml_path, output_path):
    # 读取图像
    image = cv2.imread(image_path)
    if image is None:
        print(f"错误:无法读取图像 {image_path}")
        return

    # 解析XML
    tree = ET.parse(xml_path)
    root = tree.getroot()

    # 遍历所有目标对象
    for obj in root.findall('object'):
        name = obj.find('name').text
        bbox = obj.find('bndbox')
        xmin = int(float(bbox.find('xmin').text))
        ymin = int(float(bbox.find('ymin').text))
        xmax = int(float(bbox.find('xmax').text))
        ymax = int(float(bbox.find('ymax').text))

        # 在图像上绘制矩形
        color = (0, 255, 0)  # 绿色 (BGR格式)
        thickness = 2
        cv2.rectangle(image, (xmin, ymin), (xmax, ymax), color, thickness)

        # 添加标签文本
        label = f"{name}"
        cv2.putText(image, label, (xmin, ymin - 10),
                    cv2.FONT_HERSHEY_SIMPLEX, 0.7, color, thickness)

    # 保存结果
    cv2.imwrite(output_path, image)
    print(f"已保存结果到: {output_path}")


# 在draw_bboxes函数后添加:

def process_all_images():
    for xml_file in os.listdir(label_dir):
        if xml_file.endswith('.xml'):
            image_file = xml_file.replace('.xml', '.jpg')
            xml_path = os.path.join(label_dir, xml_file)
            image_path = os.path.join(image_dir, image_file)
            output_path = os.path.join(output_dir, f"annotated_{image_file}")

            if os.path.exists(image_path):
                draw_bboxes(image_path, xml_path, output_path)
            else:
                print(f"警告:找不到对应图像 {image_file}")


# 调用批量处理
process_all_images()

你可能感兴趣的:(写过的小程序,计算机视觉基础,YOLO,样本可视化,标签可视化,VOC)