医学掩膜图像的制作可以使用labelme和3D Slicer获得,下文中仅介绍使用labelme对dcm图像标注后获得json文件的一些处理,至于labelme的使用请参考其他博客。
#coding=gbk
#文件批量重命名
import os
import sys
def rename():
path=r"E:\3D slicerTest\NC_OVER51_MRI\3004368644\ScalarVolume_96"
name=10000
startNumber="1"
mask='_mask'
fileType=".dcm"
print("正在生成文件")
count=1
filelist=os.listdir(path)
for files in filelist:
Olddir=os.path.join(path,files)
if os.path.isdir(Olddir):
continue
Newdir=os.path.join(path,str(name+count)+mask+fileType)
os.rename(Olddir,Newdir)
count+=1
print("一共修改了"+str(count)+"个文件")
rename()
由于labelme只能对png、jpg等格式的图像进行标注,所以需先将dcm图像转换,这里使用MicroDicom进行批量转换,也可使用python进行格式转换
# dcm转png
import pydicom
import matplotlib.pyplot as plt
import scipy.misc
import imageio
import pandas as pd
import numpy as np
import os
def DcmToPng(file_path,png_path):
# 获取所有图片名称
c = []
dirs = os.listdir(file_path) # 路径
# 将文件夹中的文件名称与后边的 .dcm分开
for dir in dirs:
# print(dirs)
new_dir = os.path.join(file_path,dir)
new_png_path = os.path.join(png_path,dir)
print(new_dir)
for dir1 in os.listdir(new_dir):
new_dir1 = os.path.join(new_dir, dir1)
new_png_path1 = os.path.join(new_png_path, dir1)
for files in os.listdir(new_dir1):
if files.lower().endswith('.dcm'):
fullpath = os.path.join(new_dir1, files)
print(fullpath)
out_path = new_png_path + "\\"+files.split('.')[0]+"_mask.png"
ds = pydicom.read_file(fullpath)
img = ds.pixel_array # 提取图像信息
imageio.imsave(out_path, img)
# for file in os.listdir(new_dir):
# index = name.rfind('.')
# name = name[:index]
# c.append(name)
#
# for files in c:
# picture_path = file_path +"/"+ files + ".dcm"
# out_path = des_path+"/" + files + ".png"
# ds = pydicom.read_file(picture_path)
# img = ds.pixel_array # 提取图像信息
# imageio.imsave(out_path,img)
# # scipy.misc.imsave(out_path, img)
print('all is changed')
if __name__ == '__main__':
src_path = r'E:\3D slicerTest\test\dcm'
des_path = r'E:\3D slicerTest\test\png'
DcmToPng(src_path,des_path)
#coding:utf8
import os
import shutil
indexs = list()
def png_copy(path,png_path):
file_list = os.listdir(path) # 该文件夹下所有的文件(包括文件夹)
for dirs in file_list:
# print(dirs)
new_dir = os.path.join(path,dirs)
new_png_path = os.path.join(png_path,dirs)
# print(new_dir)
for file in os.listdir(new_dir):
if file.endswith('.png'):
# print(file)
new_path = os.path.join(new_dir,file)
shutil.copy(new_path, new_png_path)
prefix = new_dir+'\\'+file.split('.')[0] + '.DCM'
indexs.append(prefix)
def dcm_copy(path,data_path):
file_list = os.listdir(path) # 该文件夹下所有的文件(包括文件夹)
# print(file_list)
for dirs in file_list:
# print(dirs)
new_dir = os.path.join(path,dirs)
# print(new_dir)
new_dcm_path = os.path.join(data_path, dirs)
# print(new_dcm_path)
for files in os.listdir(new_dir):
if files.lower().endswith('.dcm'):
fullpath = os.path.join(new_dir, files)
# print(fullpath)
i = 0
for i in range(len(indexs)):
# print(indexs[i])
if indexs[i]==fullpath:
shutil.copy(fullpath, new_dcm_path)
else:
continue
i = i + 1
def mk_dir1(path1,path2):
dirs = os.listdir(path1)
for dir in dirs:
file_name = path2 + '/'+str(dir)
os.mkdir(file_name)
def mk_dir2(path1,path2):
dirs = os.listdir(path1)
for dir in dirs:
file_name = path2 + '/' + str(dir)
os.mkdir(file_name)
if __name__ == '__main__':
path = r'E:\1Test\test2\dcm' # 全部DCM文件的路径
png_path = r'E:\1Test\test2\png' #转化后png的保存路径
data_path = r'E:\1Test\test2\data' #需要转化的dcm序列的保存路径
mk_dir1(path,png_path) #创建和path中一致的文件夹
mk_dir2(path,data_path)
png_copy(path,png_path)
# print(indexs)
dcm_copy(path,data_path)
#!/usr/bin/env python
# _*_ coding: UTF-8 _*_
#!/bin/bash
''' 批量解析json 对指定路径中的json文件进行解析,生成相应的数据'''
import os
import natsort
labelme_json = r"C:\\Users\\18943\.conda\envs\labelme\Scripts\labelme_json_to_dataset.exe" #labelme_json_to_dataset.exe 程序路径
file_path = r"E:\1Test\json_test" # 处理文件所在路径
dir_list = os.listdir(file_path)
for dirs in dir_list:
new_path = os.path.join(file_path,dirs)
# dir_info = natsort.natsorted(Olddir)
for file_name in os.listdir(new_path):
file_name = os.path.join(new_path,file_name)
os.system('cd'+''+new_path)
os.system(labelme_json + " " + file_name)
#coding:utf8
import os
import shutil
def file_rename(path):
i = 0
filelist = os.listdir(path) # 该文件夹下所有的文件(包括文件夹)
#print(filelist)
for files in filelist:
# i=i+1
Olddir = os.path.join(path, files)
if os.path.isdir(Olddir):
sub_path = path +"/"+ filelist[i]
sub_filelist = os.listdir(sub_path)
j = 1
for sub_files in sub_filelist: #子目录
filename = os.path.splitext(sub_files)[0] # 文件名
newpath = sub_path + '/'
despath = r'E:\1Test\4label\3004325563\\'
#print(filename)
newname=''
if "label"==filename:
#print(filelist[i].split("_")[0])
newname=filelist[i].split("_")[0]
os.rename(newpath+filename + ".png", despath+newname +'_mask'+ ".png")
#shutil.copy(newpath+newname + ".png", todir)
j = j + 1
i = i + 1
else:
continue
if __name__ == '__main__':
path = (r'E:\1Test\3json\3004325563') # 全部文件的路径
file_rename(path)
#coding:utf8
import os
import shutil
import cv2 as cv
import numpy as np
# 二值化后图像保存路径
des_path = r'E:\1Test\5data\3004325563\\'
def file_rename(path):
filelist = os.listdir(path) # 该文件夹下所有的文件(包括文件夹)
# print(filelist)
for files in sorted(filelist):
if files.lower().endswith('_mask.png'):
image = cv.imread(path + files)
gray = cv.cvtColor(image, cv.COLOR_RGB2GRAY) # 把输入图像灰度化
h, w = gray.shape[:2]
m = np.reshape(gray, [1, w * h])
mean = m.sum() / (w * h)
print("mean:", mean)
ret, binary = cv.threshold(gray, mean, 255, cv.THRESH_BINARY)
cv.imwrite(des_path + files, binary)
if __name__ == '__main__':
path = r'E:\1Test\4label\3004325563\\' # 全部文件的路径
file_rename(path)
# -*- coding: utf-8 -*-
import SimpleITK as sitk
import radiomics
from radiomics.featureextractor import RadiomicsFeatureExtractor
import os
import numpy as np
import pandas as pd
import cv2
# ------------------------------------
os.chdir(r'E:\毕业论文\问题2问题3代码')
os.getcwd() # 获取当前工作目录
yaml_path = './Params.yaml' # 这是pyradicomisc用得配置文件
dataset_path = "./Test_CT_patient/"
excel_lc_path = './临床数据.csv'
zoom_prixl = 5 # 缩小的像素个数
# predict
def predict_features(image, mask, option_yaml_path):
extractor = RadiomicsFeatureExtractor(option_yaml_path)
return extractor.execute(image, mask)
# 特征抽取器是一个封装的类,用于计算影像组学特征。大量设置可用于个性化特征抽取,
# 包括:需要抽取的特征类别及其对应特征;需要使用的图像类别(原始图像/或衍生图像);需要进行什么样的预处理。
# 我们可以使用该类的execute()方法来执行特征抽取操作。execute接受的参数为原始图像及其对应的Mask。
# -----------------------
# path=phase_path
# transform dcm and mask_png to image main function
def prepare_images(path):
# prepare dcm
#读取dcm格式图像
reader = sitk.ImageSeriesReader()
# 文件夹中包含多个series
# 根据文件夹获取序列ID,一个文件夹里面通常是一个病人的所有切片,会分为好几个序列
seriesIDs = reader.GetGDCMSeriesIDs(path)
# 选取其中一个序列ID,获得该序列的若干文件名
dicom_names = reader.GetGDCMSeriesFileNames(path, seriesIDs[0])
# 设置文件名
reader.SetFileNames(dicom_names)
#读取dcm序列
images_dcm = reader.Execute()
print(images_dcm.GetSize())
# prepare mask
tmp = []
for item in sorted(os.listdir(path)): #用于返回指定的文件夹包含的文件或文件夹的名字的列表
if item.lower().endswith('_mask.png'):#lower变成小写
tmp_array = sitk.GetArrayFromImage(sitk.ReadImage(path + item)) #获取图像数组
#print(tmp_array)
#将灰度图tmp_array中灰度值小于127的点置0,灰度值大于127的点置255
ret, thresh = cv2.threshold(tmp_array, 127, 255, 0) #图像阈值处理
# 参数(1图片源,2阈值(起始值),3最大值,4表示的是这里划分的时候使用的是什么类型的算法,常用值为0)
#thresh 二值化后的灰度图
contours, hierarchy = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_NONE) # 得到轮廓信息 contours 轮廓本身 hierarchy 每条轮廓对应的属性
imgnew = cv2.drawContours(tmp_array, contours, -1, (0, 0, 0), int(zoom_prixl * 2)) # 把所有轮廓画出来
imgnew = imgnew / 255
tmp.append(imgnew.astype(np.int32)) #数据格式转换
images_mask = sitk.GetImageFromArray(np.array(tmp))
print(images_mask.GetSize())
images_mask.CopyInformation(images_dcm)
return images_dcm, images_mask
results = list()
indexs = list()
if __name__ == '__main__':
for patients_id in os.listdir(dataset_path):
patients_path = dataset_path + patients_id + '/'
# for phase in os.listdir(patients_path):
# phase_path = patients_path + phase + '/'
indexs.append(str(patients_id))
images, masks = prepare_images(patients_path)
results.append(predict_features(images, masks, yaml_path))
df = pd.DataFrame(results)
df.drop(df.columns[list(range(22))], axis=1, inplace=True) # drop the non feature
df.index = indexs
df.to_csv('5_1_ct_patient.csv')
#暂时不用
# target = pd.DataFrame()
# target['metastasis'] = None
# df['age'] = None # add col
# df['sex'] = None # add col
#
# excel_lc = pd.read_csv(open(excel_lc_path), header=0, encoding='utf-8')
# for i in df.index:
# for v in excel_lc.values:
# if int(i[:4]) == v[2]:
# sex = 1 if v[0] == 'm' else 0
# df.loc[i, ['age', 'sex']] = int(v[1][:2]), sex
# target.loc[i, ['metastasis']] = 1 if v[3] == '+' else 0
# # x, y = df.iloc[:, :], np.ravel(target).astype(np.int8)
# df_all = pd.concat([df, target], axis=1)
# df_all.to_csv('3.csv')