使用pyradiomics库对dcm图像进行特征提取

一 制作掩膜图像

医学掩膜图像的制作可以使用labelme和3D Slicer获得,下文中仅介绍使用labelme对dcm图像标注后获得json文件的一些处理,至于labelme的使用请参考其他博客。

1. 文件批量重命名

#coding=gbk
#文件批量重命名
import os
import sys
def rename():
    path=r"E:\3D slicerTest\NC_OVER51_MRI\3004368644\ScalarVolume_96"
    name=10000
    startNumber="1"
    mask='_mask'
    fileType=".dcm"
    print("正在生成文件")
    count=1
    filelist=os.listdir(path)
    for files in filelist:
        Olddir=os.path.join(path,files)
        if os.path.isdir(Olddir):
            continue
        Newdir=os.path.join(path,str(name+count)+mask+fileType)
        os.rename(Olddir,Newdir)
        count+=1
    print("一共修改了"+str(count)+"个文件")

rename()

2. 分离dcm和png图像

由于labelme只能对png、jpg等格式的图像进行标注,所以需先将dcm图像转换,这里使用MicroDicom进行批量转换,也可使用python进行格式转换

# dcm转png
import pydicom
import matplotlib.pyplot as plt
import scipy.misc
import imageio
import pandas as pd
import numpy as np
import os


def DcmToPng(file_path,png_path):
    # 获取所有图片名称
    c = []
    dirs = os.listdir(file_path)  # 路径
    # 将文件夹中的文件名称与后边的 .dcm分开
    for dir in dirs:
        # print(dirs)
        new_dir = os.path.join(file_path,dir)
        new_png_path = os.path.join(png_path,dir)
        print(new_dir)
        for dir1 in os.listdir(new_dir):
            new_dir1 = os.path.join(new_dir, dir1)
            new_png_path1 = os.path.join(new_png_path, dir1)
            for files in os.listdir(new_dir1):
                if files.lower().endswith('.dcm'):
                    fullpath = os.path.join(new_dir1, files)
                    print(fullpath)
                    out_path = new_png_path + "\\"+files.split('.')[0]+"_mask.png"
                    ds = pydicom.read_file(fullpath)
                    img = ds.pixel_array  # 提取图像信息
                    imageio.imsave(out_path, img)

                # for file in os.listdir(new_dir):
                #     index = name.rfind('.')
                #     name = name[:index]
                #     c.append(name)
    #
    # for files in c:
    #     picture_path = file_path +"/"+ files + ".dcm"
    #     out_path = des_path+"/" + files + ".png"
    #     ds = pydicom.read_file(picture_path)
    #     img = ds.pixel_array  # 提取图像信息
    #     imageio.imsave(out_path,img)
    #     # scipy.misc.imsave(out_path, img)

    print('all is changed')

if __name__ == '__main__':
    src_path = r'E:\3D slicerTest\test\dcm'
    des_path = r'E:\3D slicerTest\test\png'
    DcmToPng(src_path,des_path)

分离dcm和png

#coding:utf8
import os
import shutil
indexs = list()
def png_copy(path,png_path):
    file_list = os.listdir(path)  # 该文件夹下所有的文件(包括文件夹)
    for dirs in file_list:
        # print(dirs)
        new_dir = os.path.join(path,dirs)
        new_png_path = os.path.join(png_path,dirs)
        # print(new_dir)
        for file in os.listdir(new_dir):
            if file.endswith('.png'):
                # print(file)
                new_path = os.path.join(new_dir,file)
                shutil.copy(new_path, new_png_path)
                prefix = new_dir+'\\'+file.split('.')[0] + '.DCM'

                indexs.append(prefix)

def dcm_copy(path,data_path):

    file_list = os.listdir(path)  # 该文件夹下所有的文件(包括文件夹)
    # print(file_list)
    for dirs in file_list:

        # print(dirs)
        new_dir = os.path.join(path,dirs)
        # print(new_dir)
        new_dcm_path = os.path.join(data_path, dirs)
        # print(new_dcm_path)
        for files in os.listdir(new_dir):
            if files.lower().endswith('.dcm'):
                fullpath = os.path.join(new_dir, files)
                # print(fullpath)
                i = 0
                for i in range(len(indexs)):
                    # print(indexs[i])
                    if indexs[i]==fullpath:
                        shutil.copy(fullpath, new_dcm_path)

                    else:
                        continue
                    i = i + 1

def mk_dir1(path1,path2):
    dirs = os.listdir(path1)
    for dir in dirs:
        file_name = path2 + '/'+str(dir)
        os.mkdir(file_name)

def mk_dir2(path1,path2):
    dirs = os.listdir(path1)
    for dir in dirs:
        file_name = path2 + '/' + str(dir)
        os.mkdir(file_name)

if __name__ == '__main__':
    path = r'E:\1Test\test2\dcm' # 全部DCM文件的路径
    png_path = r'E:\1Test\test2\png' #转化后png的保存路径
    data_path = r'E:\1Test\test2\data' #需要转化的dcm序列的保存路径
    mk_dir1(path,png_path) #创建和path中一致的文件夹
    mk_dir2(path,data_path)
    png_copy(path,png_path)
    # print(indexs)
    dcm_copy(path,data_path)


3. 批量解析json

#!/usr/bin/env python
# _*_ coding: UTF-8 _*_
#!/bin/bash
''' 批量解析json 对指定路径中的json文件进行解析,生成相应的数据'''
import os
import natsort
labelme_json = r"C:\\Users\\18943\.conda\envs\labelme\Scripts\labelme_json_to_dataset.exe" #labelme_json_to_dataset.exe 程序路径

file_path = r"E:\1Test\json_test"   # 处理文件所在路径

dir_list = os.listdir(file_path)

for dirs in dir_list:
    new_path = os.path.join(file_path,dirs)
    # dir_info = natsort.natsorted(Olddir)
    for file_name in os.listdir(new_path):
        file_name = os.path.join(new_path,file_name)
        os.system('cd'+''+new_path)
        os.system(labelme_json + " " + file_name)

4. 提取label图像

#coding:utf8
import os
import shutil

def file_rename(path):
    i = 0
    filelist = os.listdir(path)  # 该文件夹下所有的文件(包括文件夹)
    #print(filelist)
    for files in filelist:
        # i=i+1
        Olddir = os.path.join(path, files)
        if os.path.isdir(Olddir):
            sub_path = path +"/"+ filelist[i]
            sub_filelist = os.listdir(sub_path)
            j = 1
            for sub_files in sub_filelist: #子目录
                filename = os.path.splitext(sub_files)[0]  # 文件名
                newpath = sub_path + '/'
                despath = r'E:\1Test\4label\3004325563\\'
                #print(filename)
                newname=''
                if "label"==filename:
                    #print(filelist[i].split("_")[0])
                     newname=filelist[i].split("_")[0]
                     os.rename(newpath+filename + ".png", despath+newname +'_mask'+ ".png")

                     #shutil.copy(newpath+newname + ".png", todir)
                j = j + 1
            i = i + 1
        else:
            continue


if __name__ == '__main__':
    path = (r'E:\1Test\3json\3004325563')  # 全部文件的路径
    file_rename(path)

5. 批量二值化

#coding:utf8
import os
import shutil
import cv2 as cv
import numpy as np

# 二值化后图像保存路径
des_path = r'E:\1Test\5data\3004325563\\'

def file_rename(path):
    filelist = os.listdir(path)  # 该文件夹下所有的文件(包括文件夹)
    # print(filelist)
    for files in sorted(filelist):
        if files.lower().endswith('_mask.png'):
            image = cv.imread(path + files)
            gray = cv.cvtColor(image, cv.COLOR_RGB2GRAY)  # 把输入图像灰度化
            h, w = gray.shape[:2]
            m = np.reshape(gray, [1, w * h])
            mean = m.sum() / (w * h)
            print("mean:", mean)
            ret, binary = cv.threshold(gray, mean, 255, cv.THRESH_BINARY)
            cv.imwrite(des_path + files, binary)



if __name__ == '__main__':
    path = r'E:\1Test\4label\3004325563\\' # 全部文件的路径
    file_rename(path)

二 特征提取

# -*- coding: utf-8 -*-

import SimpleITK as sitk
import radiomics
from radiomics.featureextractor import RadiomicsFeatureExtractor
import os
import numpy as np
import pandas as pd
import cv2

# ------------------------------------
os.chdir(r'E:\毕业论文\问题2问题3代码')
os.getcwd()  # 获取当前工作目录

yaml_path = './Params.yaml'  # 这是pyradicomisc用得配置文件
dataset_path = "./Test_CT_patient/"
excel_lc_path = './临床数据.csv'

zoom_prixl = 5  # 缩小的像素个数


# predict

def predict_features(image, mask, option_yaml_path):
    extractor = RadiomicsFeatureExtractor(option_yaml_path)
    return extractor.execute(image, mask)
# 特征抽取器是一个封装的类,用于计算影像组学特征。大量设置可用于个性化特征抽取,
# 包括:需要抽取的特征类别及其对应特征;需要使用的图像类别(原始图像/或衍生图像);需要进行什么样的预处理。
# 我们可以使用该类的execute()方法来执行特征抽取操作。execute接受的参数为原始图像及其对应的Mask。


# -----------------------
# path=phase_path
# transform dcm and mask_png to image main function
def prepare_images(path):
    # prepare dcm
    #读取dcm格式图像
    reader = sitk.ImageSeriesReader()
    # 文件夹中包含多个series
    # 根据文件夹获取序列ID,一个文件夹里面通常是一个病人的所有切片,会分为好几个序列
    seriesIDs = reader.GetGDCMSeriesIDs(path)

    # 选取其中一个序列ID,获得该序列的若干文件名
    dicom_names = reader.GetGDCMSeriesFileNames(path, seriesIDs[0])
    # 设置文件名
    reader.SetFileNames(dicom_names)
    #读取dcm序列
    images_dcm = reader.Execute()
    print(images_dcm.GetSize())


    # prepare mask
    tmp = []
    for item in sorted(os.listdir(path)): #用于返回指定的文件夹包含的文件或文件夹的名字的列表
        if item.lower().endswith('_mask.png'):#lower变成小写
            tmp_array = sitk.GetArrayFromImage(sitk.ReadImage(path + item)) #获取图像数组
            #print(tmp_array)
            #将灰度图tmp_array中灰度值小于127的点置0,灰度值大于127的点置255
            ret, thresh = cv2.threshold(tmp_array, 127, 255, 0) #图像阈值处理
            # 参数(1图片源,2阈值(起始值),3最大值,4表示的是这里划分的时候使用的是什么类型的算法,常用值为0)
            #thresh 二值化后的灰度图

            contours, hierarchy = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_NONE)  # 得到轮廓信息 contours 轮廓本身  hierarchy 每条轮廓对应的属性
            imgnew = cv2.drawContours(tmp_array, contours, -1, (0, 0, 0), int(zoom_prixl * 2))  # 把所有轮廓画出来
            imgnew = imgnew / 255
            tmp.append(imgnew.astype(np.int32)) #数据格式转换

    images_mask = sitk.GetImageFromArray(np.array(tmp))
    print(images_mask.GetSize())
    images_mask.CopyInformation(images_dcm)

    return images_dcm, images_mask


results = list()
indexs = list()
if __name__ == '__main__':


    for patients_id in os.listdir(dataset_path):
        patients_path = dataset_path + patients_id + '/'
        # for phase in os.listdir(patients_path):
        #     phase_path = patients_path + phase + '/'
        indexs.append(str(patients_id))
        images, masks = prepare_images(patients_path)
        results.append(predict_features(images, masks, yaml_path))

    df = pd.DataFrame(results)
    df.drop(df.columns[list(range(22))], axis=1, inplace=True)  # drop the non feature
    df.index = indexs
    df.to_csv('5_1_ct_patient.csv')

    #暂时不用
    # target = pd.DataFrame()
    # target['metastasis'] = None
    # df['age'] = None  # add col
    # df['sex'] = None  # add col
    #
    # excel_lc = pd.read_csv(open(excel_lc_path), header=0, encoding='utf-8')
    # for i in df.index:
    #     for v in excel_lc.values:
    #         if int(i[:4]) == v[2]:
    #             sex = 1 if v[0] == 'm' else 0
    #             df.loc[i, ['age', 'sex']] = int(v[1][:2]), sex
    #             target.loc[i, ['metastasis']] = 1 if v[3] == '+' else 0
    #             #    x, y = df.iloc[:, :], np.ravel(target).astype(np.int8)
    # df_all = pd.concat([df, target], axis=1)
    # df_all.to_csv('3.csv')

你可能感兴趣的:(python,图像识别)