基于Python的Scrublet工具去除双细胞-批量运行

参考生信技能树教程https://mp.weixin.qq.com/s/i6_x1yeMbXawfKm36ewnKQ

对原链接中混合使用shell和py脚本的方法进行改进,避免了不必要的错误,运行更高效

注意修改路径,读取路径为cellranger对各个样本的输出文件路径

import scrublet as scr
import scipy.io
import matplotlib.pyplot as plt
import numpy as np
import os
import pandas as pd 
import sys
import os, sys
os.chdir('/biodata_01_4T/scRNA-seq_raw_data/Esophagus/PRJNA777911/result/')
file_to_search = "/biodata_01_4T/scRNA-seq_raw_data/Esophagus/PRJNA777911/result/"
dirlist=[]
for filename in os.listdir(file_to_search):
    if os.path.isdir(filename) == True:
        dirlist.append(filename)
print(dirlist)
path="/biodata_01_4T/scRNA-seq_raw_data/Esophagus/PRJNA777911/result/"
for i in dirlist:
    input_dir = path + i
    counts_matrix = scipy.io.mmread(input_dir + '/matrix.mtx.gz').T.tocsc()
    counts_matrix
    out_df = pd.read_csv(input_dir + '/barcodes.tsv.gz', header=None, index_col=None, names=['barcode'])
    out_df
    scrub = scr.Scrublet(counts_matrix, expected_doublet_rate=0.06)
    doublet_scores, predicted_doublets = scrub.scrub_doublets(min_counts=2, min_cells=3, min_gene_variability_pctl=85,
                                                              n_prin_comps=30)
    # doublets占比
    print(scrub.detected_doublet_rate_)
    out_df['doublet_scores'] = doublet_scores
    out_df['predicted_doublets'] = predicted_doublets

    out_df.to_csv('/biodata_01_4T/scRNA-seq_raw_data/Esophagus/PRJNA777911/scrublet_result/' + i + 'doublet.txt',
                  index=False, header=True)
    # out_df.head()
    print(out_df["predicted_doublets"].value_counts())

本方法经过比对与DoubletFinder R包的结果有较高的一致性,运行速度提升巨大

你可能感兴趣的:(基于Python的Scrublet工具去除双细胞-批量运行)