biocParallel学习

我好像做了一个愚蠢的测试

rm(list=ls())
suppressPackageStartupMessages({
  library(SingleCellExperiment)
  library(scMerge)
  library(scater)
  library(Matrix)
})

setwd("/Users/yxk/Desktop/test/R_parallel/")
load("./data/exprsMat.RData")
load("./data/clust.RData")
load("./data/pseudobulk_sample_list.RData")
load("./data/pseudobulk_sample.RData")
load("./data/use_bpparm.RData")
load("./data/res.RData")

#' @importFrom ruv replicate.matrix
#' @importFrom methods as is

aggregate.Matrix <- function(x, groupings=NULL) {
  if (!methods::is(x,'Matrix')) {
    x <- methods::as(as.matrix(x), "CsparseMatrix")
  }
  
  groupings2 <- paste("A", groupings, sep = "")
  
  if (length(unique(groupings2)) > 1) {
    
    mapping <- methods::as(ruv::replicate.matrix(groupings2), "CsparseMatrix")
    colnames(mapping) <- substring(colnames(mapping), 2)
    mapping <- mapping[, levels(factor(groupings))]
    
  } else {
    mapping <- methods::as(matrix(rep(1, length(groupings2)), ncol = 1), "CsparseMatrix")
    colnames(mapping) <- unique(groupings)
  }
  
  result <- t(mapping) %*% x
  
  return(result)
}


create_pseudoBulk_parallel = function (exprsMat, cell_info, k_fold = 30, use_bpparam = BiocParallel::SerialParam()) 
{
  #browser()
  k_fold <- min(ncol(exprsMat), k_fold)
  cv <- cvTools::cvFolds(ncol(exprsMat), K = k_fold)
  exprsMat_pseudo <- BiocParallel::bplapply(seq_len(k_fold), 
                                            function(i) {
                                              subset_idx <- cv$subsets[cv$which == i]
                                              cellType_tab <- table(droplevels(factor(cell_info[subset_idx])))
                                              cellTypes_n_mat <- matrix(rep(cellType_tab, nrow(exprsMat)), 
                                                                        nrow = length(cellType_tab), byrow = FALSE)
                                              rownames(cellTypes_n_mat) <- names(cellType_tab)
                                              res <- aggregate.Matrix(t(exprsMat[, subset_idx]), 
                                                                      cell_info[subset_idx])
                                              cellTypes_n_mat <- cellTypes_n_mat[rownames(res), 
                                              ]
                                              res <- res/cellTypes_n_mat
                                              rownames(res) <- paste(rownames(res), i, sep = "_")
                                              res
                                            }, BPPARAM = use_bpparam)
  exprsMat_pseudo <- do.call(rbind, exprsMat_pseudo)
  return(exprsMat_pseudo)
}


create_pseudoBulk_no = function (exprsMat, cell_info, k_fold = 30) 
{
  #browser()
  k_fold <- min(ncol(exprsMat), k_fold)
  cv <- cvTools::cvFolds(ncol(exprsMat), K = k_fold)
  exprsMat_pseudo =list()
  for (i in seq_len(k_fold)){
      subset_idx <- cv$subsets[cv$which == i]
      cellType_tab <- table(droplevels(factor(cell_info[subset_idx])))
      cellTypes_n_mat <- matrix(rep(cellType_tab, nrow(exprsMat)), 
                                nrow = length(cellType_tab), byrow = FALSE)
      rownames(cellTypes_n_mat) <- names(cellType_tab)
      res <- aggregate.Matrix(t(exprsMat[, subset_idx]), 
                              cell_info[subset_idx])
      cellTypes_n_mat <- cellTypes_n_mat[rownames(res), 
      ]
      res <- res/cellTypes_n_mat
      rownames(res) <- paste(rownames(res), i, sep = "_")
      exprsMat_pseudo[[i]] = res
  }
  exprsMat_pseudo <- do.call(rbind, exprsMat_pseudo)
  return(exprsMat_pseudo)
}


set.seed(1)
i =1
res1 <- create_pseudoBulk_parallel(exprsMat[, pseudobulk_sample ==
                                             pseudobulk_sample_list[i]], clust[[i]], k_fold = 30,
                                  use_bpparam = use_bpparam)

set.seed(1)
i =1
res2 <- create_pseudoBulk_no(exprsMat[, pseudobulk_sample ==
                                              pseudobulk_sample_list[i]],
                             clust[[i]], k_fold = 30
                                  )

print("done")


# for (i in seq_along(pseudobulk_sample_list)) {
#     res <- create_pseudoBulk_parallel(exprsMat[, pseudobulk_sample == 
#                                         pseudobulk_sample_list[i]], clust[[i]], k_fold = 30, 
#                              use_bpparam = use_bpparam)
# }


首先注意一个随机种子的问题,否则这个结果就会不一样

你可能感兴趣的:(学习,windows,linux)