From b9b7d376a4fc683741c2645904db5d18d06cdfed Mon Sep 17 00:00:00 2001 From: Usman Rashid Date: Tue, 10 Dec 2024 15:34:55 +1300 Subject: [PATCH] Fixed issues in genepal-report --- CHANGELOG.md | 4 +++- bin/genepal_report.Rmd | 29 +++++++++++++++++++++++++---- conf/base.config | 3 +++ 3 files changed, 31 insertions(+), 5 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 93c3522..e4d0ca6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,7 +3,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). -## v0.6.0 - [6-Dec-2024] +## v0.6.0 - [10-Dec-2024] ### 'Added' @@ -14,6 +14,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 1. Fixed an issue where TSEBRA failed because LIFTOFF lifted non-protein coding genes [#121](https://github.com/Plant-Food-Research-Open/genepal/issues/121) 2. Switched branch name from `master` to `main` in the GHA CIs +3. Fixed an issue in `genepal_report.Rmd` which caused the pangene matrix plot to fail when the number of clusters exceeded 65536 [#124](https://github.com/Plant-Food-Research-Open/genepal/issues/124) +4. Fixed an issue where `GENEPALREPORT` process failed due to OOM kill signal from SLURM [#123](https://github.com/Plant-Food-Research-Open/genepal/issues/123) ### `Dependencies` diff --git a/bin/genepal_report.Rmd b/bin/genepal_report.Rmd index d85ee12..5d0ac38 100755 --- a/bin/genepal_report.Rmd +++ b/bin/genepal_report.Rmd @@ -190,22 +190,43 @@ cat("
") ```{r pheatmap, eval=(exists("n0_df") && !is.null(n0_df$heatmap)), results='hide', fig.align='center', fig.cap="Heatmap showing number of proteins present in each orthocluster (clusters where all individuals have 1 copy are excluded). Columns = Orthologue cluster, Row = Individual", fig.width=7, fig.height=7, dpi=150, warning=FALSE} -pheatmap(n0_df$heatmap, + +# Max 65536 allowed +# https://github.com/Plant-Food-Research-Open/genepal/issues/124 + +n_cols <- ncol(n0_df$heatmap) +max_cols_allowed <- min(n_cols, 5000) + +# Approach 1: Random selection of columns +# selected_cols <- sample(n_cols, max_cols_allowed) + +# Approach 2: First N largest clusters +selected_cols <- order(colSums(n0_df$heatmap), decreasing = TRUE)[seq(1, max_cols_allowed)] + +prefix_text <- "" + +if ( n_cols != max_cols_allowed ) { + prefix_text <- paste0("Top ", max_cols_allowed, " ") +} + +pheatmap(n0_df$heatmap[, selected_cols], show_colnames = FALSE, - main = "Orthologue clusters containing accessory proteins", + main = paste0(prefix_text, "Orthologue clusters"), legend = TRUE, legend_labels = TRUE, border_color = "white" ) -pheatmap(n0_df$heatmap, +pheatmap(n0_df$heatmap[, selected_cols], filename = file.path(outputs_folder, "pangene.matrix.heatmap.pdf"), show_colnames = FALSE, - main = "Orthologue clusters containing accessory proteins", + main = paste0(prefix_text, "Orthologue clusters"), legend = TRUE, legend_labels = TRUE, border_color = "white" ) + +write.csv(x = transform_hogs(n0o), file = file.path(outputs_folder, "pangenome.matrix.csv"), row.names = FALSE) ``` diff --git a/conf/base.config b/conf/base.config index d0173d1..408a69d 100644 --- a/conf/base.config +++ b/conf/base.config @@ -74,4 +74,7 @@ process { cpus = { 8 * task.attempt } time = { 7.days * task.attempt } } + withName:GENEPALREPORT { + memory = { 20.GB * task.attempt } + } }