admin管理员组

文章数量:1440221

R tips:解决monocle的plot

最近在使用monocle的plotgenesbranched_heatmap函数进行BEAM分支热图绘制时发生了一个报错,如下:

代码语言:txt复制
p <-  
  cds[BEAM_genes,] %>% 
  plot_genes_branched_heatmap( 
    branch_point = 1,  
    num_clusters = 4, # 3,  
    show_rownames = F,  
    return_heatmap = T 
 ) 
 
<simpleError in checkwz(wz, M = M, trace = trace, wzepsilon = control$wzepsilon): NAs found in the working weights variable 'wz'> 
Error in if (x@family@vfamily %in% c("negbinomial", "negbinomial.size")) { :  
  the condition has length > 1 

老办法,先使用traceback看一下报错的调用栈:

代码语言:txt复制
traceback() 
 
11: FUN(X[[i]], ...) 
10: lapply(X = X, FUN = FUN, ...) 
9: mclapply(models, function(x) { 
 if (is.null(x)) { 
           NA 
 } 
 else { 
 if (x@family@vfamily %in% c("negbinomial", "negbinomial.size")) { 
               predict(x, newdata = newdata, type = response_type) 
 } 
 else if (x@family@vfamily %in% c("uninormal")) { 
               predict(x, newdata = newdata, type = response_type) 
 } 
 else { 
 10^predict(x, newdata = newdata, type = response_type) 
 } 
 } 
 }, mc.cores = cores) 
8: responseMatrix(list(model_fits), new_data, response_type = response_type) 
7: as.data.frame(responseMatrix(list(model_fits), new_data, response_type = response_type)) 
6: FUN(newX[, i], ...) 
5: apply(exprs(X), MARGIN, FUN, ...) 
4: smartEsApply(cds, 1, function(x, trend_formula, expressionFamily,  
       relative_expr, new_data) { 
       environment(fit_model_helper) <- environment() 
       environment(responseMatrix) <- environment() 
       model_fits <- fit_model_helper(x, modelFormulaStr = trend_formula,  
           expressionFamily = expressionFamily, relative_expr = relative_expr,  
           disp_func = cds@dispFitInfo[["blind"]]$disp_func) 
 if (is.null(model_fits))  
           expression_curve <- as.data.frame(matrix(rep(NA, nrow(new_data)),  
               nrow = 1)) 
 else expression_curve <- as.data.frame(responseMatrix(list(model_fits),  
           new_data, response_type = response_type)) 
       colnames(expression_curve) <- row.names(new_data) 
       expression_curve 
 }, convert_to_dense = TRUE, trend_formula = trend_formula, expressionFamily = expressionFamily,  
       relative_expr = relative_expr, new_data = new_data) 
3: genSmoothCurves(new_cds[, ], cores = cores, trend_formula = trend_formula,  
       relative_expr = T, new_data = rbind(newdataA, newdataB)) 
2: plot_genes_branched_heatmap(., branch_point = 1, num_clusters = 4,  
       show_rownames = F, return_heatmap = T) 
1: cds[BEAM_genes, ] %>% plot_genes_branched_heatmap(branch_point = 1,  
       num_clusters = 4, show_rownames = F, return_heatmap = T) 

梳理调用栈,并结合各节点的函数源码可以发现报错传递是:

代码语言:txt复制
plot_genes_branched_heatmap -> genSmoothCurves -> responseMatrix 

先看一下responseMatrix的源码,如下所示,可以发现报错点的if语句那里是有问题的。因为结合最开始的报错提示表明x@family@vfamily的长度可能不为1。

代码语言:txt复制
monocle:::responseMatrix 
function (models, newdata = NULL, response_type = "response",  
    cores = 1)  
{ 
    res_list <- mclapply(models, function(x) { 
 if (is.null(x)) { 
            NA 
 } 
 else { 
 ### 报错点 ### 
 if (x@family@vfamily %in% c("negbinomial", "negbinomial.size")) { 
                predict(x, newdata = newdata, type = response_type) 
 } 
 else if (x@family@vfamily %in% c("uninormal")) { 
                predict(x, newdata = newdata, type = response_type) 
 } 
 else { 
 10^predict(x, newdata = newdata, type = response_type) 
 } 
 } 
 }, mc.cores = cores) 
    res_list_lengths <- lapply(res_list[is.na(res_list) == FALSE],  
        length) 
    stopifnot(length(unique(res_list_lengths)) == 1) 
    num_na_fits <- length(res_list[is.na(res_list)]) 
 if (num_na_fits > 0) { 
        na_matrix <- matrix(rep(rep(NA, res_list_lengths[[1]]),  
            num_na_fits), nrow = num_na_fits) 
        row.names(na_matrix) <- names(res_list[is.na(res_list)]) 
        non_na_matrix <- Matrix::t(do.call(cbind, lapply(res_list[is.na(res_list) ==  
            FALSE], unlist))) 
        row.names(non_na_matrix) <- names(res_list[is.na(res_list) ==  
            FALSE]) 
        res_matrix <- rbind(non_na_matrix, na_matrix) 
        res_matrix <- res_matrix[names(res_list), ] 
 } 
 else { 
        res_matrix <- Matrix::t(do.call(cbind, lapply(res_list,  
            unlist))) 
        row.names(res_matrix) <- names(res_list[is.na(res_list) ==  
            FALSE]) 
 } 
    res_matrix 
} 
可以再看一下genSmoothCurves函数源码:

genSmoothCurves 
function (cds, new_data, trend_formula = "~sm.ns(Pseudotime, df = 3)",  
    relative_expr = T, response_type = "response", cores = 1)  
{ 
    expressionFamily <- cds@expressionFamily 
 if (cores > 1) { 
        expression_curve_matrix <- mcesApply(cds, 1, function(x,  
            trend_formula, expressionFamily, relative_expr, new_data,  
            fit_model_helper, responseMatrix, calculate_NB_dispersion_hint,  
            calculate_QP_dispersion_hint) { 
            environment(fit_model_helper) <- environment() 
            environment(responseMatrix) <- environment() 
            model_fits <- fit_model_helper(x, modelFormulaStr = trend_formula,  
                expressionFamily = expressionFamily, relative_expr = relative_expr,  
                disp_func = cds@dispFitInfo[["blind"]]$disp_func) 
 if (is.null(model_fits))  
                expression_curve <- as.data.frame(matrix(rep(NA,  
                  nrow(new_data)), nrow = 1)) 
 else expression_curve <- as.data.frame(responseMatrix(list(model_fits),  
                newdata = new_data, response_type = response_type)) 
            colnames(expression_curve) <- row.names(new_data) 
            expression_curve 
 }, required_packages = c("BiocGenerics", "Biobase", "VGAM",  
 "plyr"), cores = cores, trend_formula = trend_formula,  
            expressionFamily = expressionFamily, relative_expr = relative_expr,  
            new_data = new_data, fit_model_helper = fit_model_helper,  
            responseMatrix = responseMatrix, calculate_NB_dispersion_hint = calculate_NB_dispersion_hint,  
            calculate_QP_dispersion_hint = calculate_QP_dispersion_hint) 
        expression_curve_matrix <- as.matrix(do.call(rbind, expression_curve_matrix)) 
 return(expression_curve_matrix) 
 } 
 else { 
        expression_curve_matrix <- smartEsApply(cds, 1, function(x,  
            trend_formula, expressionFamily, relative_expr, new_data) { 
            environment(fit_model_helper) <- environment() 
            environment(responseMatrix) <- environment() 
            model_fits <- fit_model_helper(x, modelFormulaStr = trend_formula,  
                expressionFamily = expressionFamily, relative_expr = relative_expr,  
                disp_func = cds@dispFitInfo[["blind"]]$disp_func) 
 
 if (is.null(model_fits))  
                expression_curve <- as.data.frame(matrix(rep(NA,  
                  nrow(new_data)), nrow = 1)) 
 ##### 下面的responseMatrix是traceback提示的报错点 ##### 
 else expression_curve <- as.data.frame(responseMatrix(list(model_fits),  
                new_data, response_type = response_type)) 
            colnames(expression_curve) <- row.names(new_data) 
            expression_curve 
 }, convert_to_dense = TRUE, trend_formula = trend_formula,  
            expressionFamily = expressionFamily, relative_expr = relative_expr,  
            new_data = new_data) 
        expression_curve_matrix <- as.matrix(do.call(rbind, expression_curve_matrix)) 
        row.names(expression_curve_matrix) <- row.names(fData(cds)) 
 return(expression_curve_matrix) 
 } 
} 
如果debug到genSmoothCurves函数,手动走一下fitmodelhelper,如下所示,我们直接让smartEsApply返回fitmodelhelper函数拟合的model,并保存到modelfitsls2,可以发现确实有些基因的model的vfamily有两个:

model_fits_ls2 <- smartEsApply(cds, 1, function(x,  
            trend_formula, expressionFamily, relative_expr, new_data) { 
            environment(fit_model_helper) <- environment() 
            environment(responseMatrix) <- environment() 
 # 下面的fit_model_helper是traceback提示的报错点 
            model_fits <- fit_model_helper(x, modelFormulaStr = trend_formula,  
                expressionFamily = expressionFamily, relative_expr = relative_expr,  
                disp_func = cds@dispFitInfo[["blind"]]$disp_func) 
 
            model_fits 
 
 }, convert_to_dense = TRUE, trend_formula = trend_formula,  
            expressionFamily = expressionFamily, relative_expr = relative_expr,  
            new_data = new_data) 
 
 
# 发现部分model的vfamily有两个 
m <- model_fits_ls2$EVX2 
m@family@vfamily  
[1] "negbinomial" "VGAMcategorical" 
既然知道了报错所在,那就可以解决它,把monocle的源码下载了,重新安装:

# 下载monocle的bioconductor的包 
.html 
 
wget .34.0.tar.gz 
tar -xvzf monocle_2.34.0.tar.gz 
 
# 在源码里面找到responseMatrix函数,可以看到responseMatrix函数的定义在expr_models.R里面 
grep responseMatrix monocle/R/* 
# R/clustering.R:#' expression_curve_matrix <- responseMatrix(full_model_fits) 
# R/expr_models.R:responseMatrix <- function(models, newdata = NULL, response_type="response", cores = 1) { 
# R/expr_models.R:#' the corresponding response matrix. This function is build on other functions (fit_models and responseMatrix) and used in calILRs and calABCs functions 
# R/expr_models.R:      expression_curve_matrix <- mcesApply(cds, 1, function(x, trend_formula, expressionFamily, relative_expr, new_data, fit_model_helper, responseMatrix, 
# R/expr_models.R:            environment(responseMatrix) <- environment() 
# R/expr_models.R:                expression_curve <- as.data.frame(responseMatrix(list(model_fits), newdata = new_data, response_type=response_type)) 
# R/expr_models.R:            fit_model_helper = fit_model_helper, responseMatrix = responseMatrix, calculate_NB_dispersion_hint = calculate_NB_dispersion_hint, 
# R/expr_models.R:            environment(responseMatrix) <- environment() 
# R/expr_models.R:                expression_curve <- as.data.frame(responseMatrix(list(model_fits), new_data, response_type=response_type)) 
# R/expr_models.R:      environment(responseMatrix) <- environment() 
# R/plotting.R:#' expression_curve_matrix <- responseMatrix(full_model_fits) 
 
# 修改编辑相应的文件: 
vi monocle/R/expr_models.R 
 
# 将下面代码 
if (x@family@vfamily %in% c("negbinomial", "negbinomial.size")) { 
# 改为下面代码即可 
if (any(x@family@vfamily %in% c("negbinomial", "negbinomial.size"))) { 
 
# 在R中使用devtools重新安装 
devtools::install('monocle') 
回到R中重新载入monocle包即可:

detach("package:monocle", unload = TRUE) 
library(monocle) 
 
# 不再报错 
p <-  
  cds[BEAM_genes,] %>% 
  plot_genes_branched_heatmap( 
    branch_point = 1,  
    num_clusters = 4, # 3,  
    show_rownames = F,  
    return_heatmap = T 
 ) 
本文参与 腾讯云自媒体同步曝光计划,分享自微信公众号。原始发表:2025-04-11,如有侵权请联系 cloudcommunity@tencent 删除函数源码dataheatmapplot

本文标签: R tips解决monocle的plot