将NWPUD数据集转化为yolo格式

第一个程序:在yolo模型中要使用这个数据集的话需要将其转换为 类别(编号),x,y,w,h的形式,其中x,y,w,h是目标框相对于图片的框中心点坐标的比例和宽高相对于图像总体尺寸的比例。一共分两步,第一步,现将其转换为类别(编号),xmin,ymin,xmax,ymax的形式,使用如下脚本即可:

————————————————
版权声明:本文为博主原创文章,遵循 CC 4.0 BY-SA 版权协议,转载请附上原文出处链接和本声明。

原文链接:https://blog.csdn.net/Fan1534/article/details/135227192

from PIL import Image
import os
import re

# todo:旧标签的路径
old_label_path = r'D:\学习资料\NWPU VHR-10 dataset\old_labels'
# todo:旧的图片数据集的路径
f_path = r'D:\学习资料\NWPU VHR-10 dataset\positive image set'
# todo:将要存储的labels的路径
new_file_path = r'D:\学习资料\NWPU VHR-10 dataset\new_labels\\'

# 目标类别以及对应的编号
# class_to_index = {'aircraft': 0, 'oiltank': 1, 'overpass': 2, 'playground': 3}


# 获取old_label_path下所有txt文件的文件名列表
txt_files = [file for file in os.listdir(old_label_path) if file.endswith(".txt")]

new_data = [0, 0.00, 0.00, 0.00, 0.00]
# 遍历每个txt文件
for txt_file in txt_files:
    # 构建txt文件的完整路径
    old_label_file_path = os.path.join(old_label_path, txt_file)
    print('当前转换的是图片:', old_label_file_path)
    i = 0
    with open(old_label_file_path, 'r') as file:
        # 读取该标签对应的图片的宽高数据
        new_string = old_label_file_path.replace(".txt", ".jpg")
        img_path = new_string.replace("old_labels", "positive image set")
        # 读取图片的长宽
        with Image.open(img_path) as img:
            # 获取图片的长宽尺寸
            width, height = img.size
            print('图片的长为:', height, "  ,宽为", width)
        new_file_path_concat = new_file_path + txt_file
        # 读取每一行的标签数据并进行操作
        for line in file:
            txt_data = line
            # 使用正则表达式提取数字
            numbers = re.findall(r'\d+', txt_data)
            # 将提取的数字转换为整数
            data = [int(num) for num in numbers]
            if (len(data) == 0):
                break
            # 获取新的标签类别(原始标签-1)
            new_data[0] = data[4] - 1
            new_data[1] = int(data[0]) / width
            new_data[2] = int(data[1]) / height
            new_data[3] = int(data[2]) / width
            new_data[4] = int(data[3]) / height
            print(new_data)

            # 将new_data写入新的txt文件
            with open(new_file_path_concat, 'w') as new_file:
                for item in new_data:
                    new_file.write(str(item) + ' ')
                new_file.write('\n')
    print(f"处理完成: {txt_file}")

第二个程序:将坐标从左上右下转换为中心+宽高

import os


def convert_coordinates(input_folder, output_folder):
    # 确保输出文件夹存在
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)

    # 获取输入文件夹中的所有txt文件
    txt_files = [file for file in os.listdir(input_folder) if file.endswith(".txt")]

    # 遍历每个txt文件
    for txt_file in txt_files:
        input_path = os.path.join(input_folder, txt_file)
        output_path = os.path.join(output_folder, txt_file)

        # 处理每个txt文件
        with open(input_path, 'r') as infile, open(output_path, 'w') as outfile:
            # 逐行读取并处理数据
            for line in infile:
                # 将每一行的数据拆分为列表
                data = line.strip().split()

                if len(data) == 5:
                    # 提取类别标签和矩形框坐标
                    category = int(data[0])
                    x1, y1, x2, y2 = map(float, data[1:5])

                    # 计算矩形中心坐标和宽高
                    cx = (x1 + x2) / 2
                    cy = (y1 + y2) / 2
                    width = x2 - x1
                    height = y2 - y1

                    # 将转换后的数据写入新文件
                    outfile.write(f"{category} {cx} {cy} {width} {height}\n")
                else:
                    print(f"Ignoring invalid data in {txt_file}: {line.strip()}")


if __name__ == "__main__":
    # 指定输入文件夹和输出文件夹路径
    input_folder = 'D:\学习资料/NWPU VHR-10 dataset/new_labels'
    output_folder = 'D:\学习资料/NWPU VHR-10 dataset/newest_labels'

    # 执行坐标转换
    convert_coordinates(input_folder, output_folder)

第三个程序:将negative images重命名

import os

def rename_files(folder_name):
    # 获取文件夹中的所有文件
    files = os.listdir(folder_name)
    jpg_files = [file for file in files if file.endswith('.jpg')]

    # 设置初始序列号
    sequence = 651

    for file_name in jpg_files:
        file_path = os.path.join(folder_name, file_name)
        new_file_name = str(sequence) + '.jpg'
        new_file_path = os.path.join(folder_name, new_file_name)

        # 重命名文件
        os.rename(file_path, new_file_path)

        # 序列号递增
        sequence += 1

    print('文件重命名完成!')

# 提示用户输入文件夹名
folder_name = input('请输入文件夹名:')

# 调用函数进行文件重命名
rename_files(folder_name)

第四个程序:创建negative image对应的空白txt

import os

def create_empty_txt_files(folder_name):
    # 设置起始序列号
    start_sequence = 651
    end_sequence = 800

    for sequence in range(start_sequence, end_sequence + 1):
        file_name = str(sequence) + '.txt'
        file_path = os.path.join(folder_name, file_name)

        # 创建空白的TXT文件
        with open(file_path, 'w') as file:
            pass

    print('空白TXT文件创建完成!')

# 提示用户输入文件夹名
folder_name = input('请输入文件夹名:')

# 调用函数创建空白TXT文件
create_empty_txt_files(folder_name)

第五个程序:按7:2:1随机切分训练集测试集和验证集

import os
import random
import shutil

# 设置图像集和标签集文件夹路径
image_folder = "D:\学习资料/NWPU VHR-10 dataset/positive image set"
label_folder = "D:\学习资料/NWPU VHR-10 dataset/newest_labels"

# 设置划分后的目标文件夹路径
images_folder = "D:\学习资料/NWPU VHR-10 dataset/images"
labels_folder = "D:\学习资料/NWPU VHR-10 dataset/labels"

# 创建目标文件夹
os.makedirs(images_folder, exist_ok=True)
os.makedirs(labels_folder, exist_ok=True)

# 获取图像集和标签集的文件名列表
image_files = os.listdir(image_folder)
label_files = os.listdir(label_folder)

# 确定划分比例
train_ratio = 0.7
val_ratio = 0.1
test_ratio = 0.2

# 随机打乱图像集的顺序
random.shuffle(image_files)

# 计算划分后的样本数量
num_samples = len(image_files)
num_train = int(num_samples * train_ratio)
num_val = int(num_samples * val_ratio)
num_test = num_samples - num_train - num_val

# 将图像文件按比例分配到训练集、验证集和测试集文件夹
train_images = image_files[:num_train]
val_images = image_files[num_train:num_train + num_val]
test_images = image_files[num_train + num_val:]

for image_file in train_images:
    shutil.copy(os.path.join(image_folder, image_file), os.path.join(images_folder, "train", image_file))

for image_file in val_images:
    shutil.copy(os.path.join(image_folder, image_file), os.path.join(images_folder, "val", image_file))

for image_file in test_images:
    shutil.copy(os.path.join(image_folder, image_file), os.path.join(images_folder, "test", image_file))

# 将对应的标签文件复制到labels文件夹中的相应训练集、验证集和测试集文件夹
for image_file in train_images:
    label_file = image_file[:-4] + ".txt"  # 假设标签文件与图像文件同名,但扩展名为.txt
    shutil.copy(os.path.join(label_folder, label_file), os.path.join(labels_folder, "train", label_file))

for image_file in val_images:
    label_file = image_file[:-4] + ".txt"  # 假设标签文件与图像文件同名,但扩展名为.txt
    shutil.copy(os.path.join(label_folder, label_file), os.path.join(labels_folder, "val", label_file))

for image_file in test_images:
    label_file = image_file[:-4] + ".txt"  # 假设标签文件与图像文件同名,但扩展名为.txt
    shutil.copy(os.path.join(label_folder, label_file), os.path.join(labels_folder, "test", label_file))
    ```

# 第六个程序
    ```
import os


def write_jpg_paths_to_txt(folder_name, output_file):
    jpg_paths = []
    for file_name in os.listdir(folder_name):
        if file_name.endswith(".jpg"):
            jpg_paths.append(os.path.join(folder_name, file_name))

    with open(output_file, "w") as file:
        for path in jpg_paths:
            file.write(path + "\n")


# 示例用法
folder_name = "D:\学习资料/NWPU VHR-10 dataset\images/train"
output_file = "D:\学习资料/NWPU VHR-10 dataset\images\output.txt"

write_jpg_paths_to_txt(folder_name, output_file)
    ```

你可能感兴趣的:(YOLO,人工智能,算法)