分析基于spot的ST数据集,以估计spot特异性TF活性,并识别与特定细胞类型、空间结构域、病理区域和配体/受体相关的TF。大多数基于spot的ST分析方法的一个关键限制是缺乏单细胞分辨率,因为空间RNA-seq测量结合了多个细胞。因此,推断TFs的点特异性活性来自多个细胞。为了克服这一限制,使用反卷积方法,通过计算整合空间RNA-seq与从scRNA-seq谱中获得的细胞类型的参考转录组特征来估计每个spot的细胞类型比例,通过线性回归进行TF和细胞类型之间的关联分析,通过反卷积方法从细胞类型比例单独预测每个TF的活性,以确定其活性在病理区域和空间域中不同的TF,并确定其基因表达与邻近位置的TF相关的配体/受体。
细胞行为受到来自周围微环境的信号的复杂影响,这些信号通常以配体的形式出现,配体是由邻近细胞产生的细胞外蛋白质信号。这些配体与受体细胞上表达的受体相互作用,引发一系列分子事件,这些事件可以深刻地影响基因表达程序并调节TF的活性。
from anndata import AnnData
from scipy.stats import pearsonr, spearmanr, wilcoxon
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import scanpy as sc
import seaborn as sns
import squidpy as sq
import warnings
warnings.filterwarnings('ignore')
import sys
sys.path.append("../")
import auxiliary_lymphnode as auxln
import auxiliary_plot as auxpl
figsize = auxpl.figsize
fontsize = auxpl.fontsize
####Loading ST dataset
# input of decoupleR
adata_raw = sc.read_h5ad("results_lymphnode/lymphnode.h5ad")
# output of decoupleR
adata_coup = sc.read_h5ad("results_lymphnode/lymphnode_decoupler.h5ad")
adata_tfa = AnnData(
X = adata_coup.obsm['ulm_estimate'],
obs = adata_coup.obs,
obsm = {name: obj for (name, obj) in adata_coup.obsm.items() if "tf" not in name},
layers = {name: obj for (name, obj) in adata_coup.obsm.items() if "tf" in name})
adata_tfa.uns = adata_coup.uns
sc.pp.normalize_total(adata_raw)
adata_raw.layers['scaled'] = np.sqrt(adata_raw.to_df())
sc.pp.normalize_total(adata_coup)
adata_coup.layers['scaled'] = np.sqrt(adata_coup.to_df())
####We rank TFs and genes for germinal centers and nongerminal centers.
adata_tfa.obs['germinal_center'] = adata_tfa.obs['germinal_center'].astype('category')
sc.tl.rank_genes_groups(adata_tfa, groupby='germinal_center', method="wilcoxon")
sc.pl.rank_genes_groups(adata_tfa, fontsize=fontsize, n_genes=10)
df_celltype = auxln.merge_celltypes(adata_raw)
adata_raw.obsm['celltype_major'] = df_celltype
adata_coup.obsm['celltype_major'] = df_celltype
adata_tfa.obsm['celltype_major'] = df_celltype
def plot_spatial_ct_tf(ct, tf):
fig, axs = plt.subplots(1,3, figsize=(figsize*3,figsize), dpi=100)
sc.pl.spatial(sq.pl.extract(adata_tfa, "celltype_major"),
color=ct, alpha_img=0, ax=axs[0], show=False, cmap="rocket", size=1.8,
legend_fontsize=fontsize)
sc.pl.spatial(adata_tfa, color=tf, alpha_img=0, ax=axs[1],show=False, cmap="plasma",
legend_fontsize=fontsize, size=1.8)
sc.pl.spatial(adata_raw, color=tf, alpha_img=0, ax=axs[2],show=False, cmap="viridis",
legend_fontsize=fontsize, size=1.8)
axs[0].set_title(ct+'\n', fontsize=fontsize)
axs[1].set_title(tf+' activity\npearson=%.4f'% mat_cor_tfa.loc[tf, ct], fontsize=fontsize)
axs[2].set_title(tf+" mRNA expr\npearson=%.4f"% mat_cor_rna.loc[tf, ct], fontsize=fontsize)
for i in range(3):
axs[i].set_xlabel("")
axs[i].set_ylabel("")
plt.tight_layout(pad=0.6)
mat_cor_tfa, mat_cor_rna = auxpl.make_cor_dataframe(adata_raw, adata_tfa, celltype_label='celltype_major')
df_ct_tf = auxpl.make_ct_tf_dataframe(adata_tfa, celltype_label='celltype_major')
plot_spatial_ct_tf(ct = "T_Treg", tf = "FOXP3")
plot_spatial_ct_tf(ct = "B_IFN", tf = "STAT2")
df_ct_tf = auxpl.make_ct_tf_dataframe(adata_tfa, celltype_label='celltype_major')
tf_list = ['BACH2', 'BCL11A', 'CDX2', 'CREB1', 'E2F1', 'E2F7', 'EBF1', 'ETS1', 'ETV6',
'FOXM1', 'FOXP2', 'FOXP3', 'GTF2B', 'IRF1', 'IRF2', 'KLF1', 'KMT2A', 'MAX', 'MYB',
'NR5A2', 'PAX5', 'PBX1', 'PGR', 'POU2F2', 'PPARG', 'RFX3', 'SPIB', 'STAT1',
'STAT2', 'STAT3', 'STAT4', 'STAT5B', 'TBX21', 'TCF21']
ct_list = ['B_Cycling', 'B_GC', 'T_TIM3+', 'B_preGC', 'B_activated', 'NK', 'T_CD8+', 'NKT',
'VSMC', 'FDC', 'T_TfR', 'T_Treg', 'B_mem', 'B_plasma', 'Monocytes', 'B_IFN', 'DC',
'ILC', 'Macrophages', 'T_CD4+']
auxpl.plot_heatmap(df_ct_tf, tf_list, ct_list)