From bafa2ca3342f27845237b89fc4cd198a09acf144 Mon Sep 17 00:00:00 2001 From: Christopher Mohr Date: Mon, 21 Aug 2023 09:04:29 +0000 Subject: [PATCH 1/2] add parameter for specifying id column for heatmap --- CHANGELOG.md | 1 + bin/compute_gene_heatmap.R | 33 +++++++++++++++++----------- docs/usage.md | 4 ++++ modules/local/create_gene_heatmap.nf | 2 +- nextflow.config | 1 + nextflow_schema.json | 12 +++++----- 6 files changed, 32 insertions(+), 21 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 30c5f7e..f472ed5 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### `Added` +- [#49](https://github.com/nf-core/nanostring/pull/49) - Allow users to specify id column for heatmap [#39](https://github.com/nf-core/nanostring/issues/39) - [#46](https://github.com/nf-core/nanostring/pull/46) - Update to nf-core template `2.9` - [#42](https://github.com/nf-core/nanostring/pull/42) - Allow users to specify normalization method: `GEO` (default) or `GLM` diff --git a/bin/compute_gene_heatmap.R b/bin/compute_gene_heatmap.R index a269d28..27bee1f 100755 --- a/bin/compute_gene_heatmap.R +++ b/bin/compute_gene_heatmap.R @@ -11,31 +11,26 @@ library(tidylog) ###Command line argument parsing### args = commandArgs(trailingOnly=TRUE) if (length(args) < 1) { - stop("Usage: compute_gene_heatmap.R or compute_gene_heatmap.R ", call.=FALSE) + stop("Usage: compute_gene_heatmap.R or compute_gene_heatmap.R ", call.=FALSE) } input_counts <- args[1] +id_col <- tail(args, 1) #Read annotated counts # HEADER is always RCC_FILE + GENES + SAMPLE_ID and additional metadata such as GROUP TREATMENT OTHER_METADATA counts <- read.table(input_counts, sep="\t", check.names = FALSE, header=TRUE, stringsAsFactors = FALSE) -if (length(args) == 2) { +if (length(args) == 3) { input_genes <- args[2] genes <- read_yaml(input_genes) } else { - gene_cols <- counts %>% dplyr::select(- any_of(c("RCC_FILE", "SAMPLE_ID", "TIME", "TREATMENT", "OTHER_METADATA"))) + gene_cols <- counts %>% dplyr::select(- any_of(c(unique(c("SAMPLE_ID", id_col)), "RCC_FILE", "TIME", "TREATMENT", "OTHER_METADATA"))) genes <- colnames(gene_cols) } #Select counts of interest counts_selected <- counts %>% dplyr::select(all_of(genes)) -#Add proper Rownames -rownames(counts_selected) <- counts$RCC_FILE_NAME - -#sort dataframe by rownames to make it easier comparable across heatmaps -counts_selected[order(row.names(counts_selected)), ] - #log2+1 counts_selected <- log2(counts_selected + 1) @@ -48,7 +43,6 @@ max_value <- max(colMax(counts_selected)) min_value <- min(colMin(counts_selected)) #Save as PDF - prefix <- "" if (grepl("wo_HKnorm",input_counts)) { prefix <- "wo_HKnorm_" @@ -56,8 +50,21 @@ if (grepl("wo_HKnorm",input_counts)) { agg_png(file = paste0(prefix, "gene_heatmap_mqc.png"), width = 1200, height = 2000, unit = "px") -Heatmap(counts_selected, name = "Gene-Count Heatmap", column_title = "Gene (log2 +1)", - row_title_rot = 90, row_title = "SampleID",row_dend_reorder = FALSE, show_row_dend = FALSE, row_names_side = "left", - show_column_dend = FALSE, col = colorRamp2(c(min_value, max_value), c("#f7f7f7", "#67a9cf"))) +#Add proper row names +counts_matrix <- as.matrix(counts_selected) +row.names(counts_matrix) <- counts[[id_col]] + +Heatmap(counts_matrix, + name = "Gene-Count Heatmap", + column_title = "Gene (log2 +1)", + row_order = order(row.names(counts_matrix)), + row_title_rot = 90, + row_title = "SampleID", + row_dend_reorder = FALSE, + show_row_dend = FALSE, + row_names_side = "left", + show_column_dend = FALSE, + col = colorRamp2(c(min_value, max_value), c("#f7f7f7", "#67a9cf")) + ) dev.off() diff --git a/docs/usage.md b/docs/usage.md index 78b6d08..f237868 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -105,6 +105,10 @@ The pipeline will generate one heatmap each, for the Housekeeping-normalized and > ⚠️ If you want to use other metadata in your samplesheet than the one shown in the section [Full samplesheet](#full-samplesheet), please make sure to specify the `yml` file with all endogenous genes or a subset of it. +Per default, the `SAMPLE_ID` column will be used for the rows in the generated heatmap. Therefore, we expect these values to be unique. If this is not the case or if you want to use other row names for the heatmap anyway, you can specify this column, provided in the samplesheet, using the parameter `--heatmap_id_column`. + +You can also skip the heatmap generation step entirely by specifying the parameter `--skip_heatmap`. + ### Normalization The normalization can be adjusted with the parameter `--normalization_method` and choosing either `GEO` or `GLM` as the method for normalization. The default is `GEO`. Future additions will incorporate possibilities to adjust further normalization parameters. diff --git a/modules/local/create_gene_heatmap.nf b/modules/local/create_gene_heatmap.nf index 047e89d..762318d 100644 --- a/modules/local/create_gene_heatmap.nf +++ b/modules/local/create_gene_heatmap.nf @@ -22,7 +22,7 @@ process CREATE_GENE_HEATMAP { def gene_filter = params.heatmap_genes_to_filter ?: "" """ - compute_gene_heatmap.R $annotated_counts $gene_filter + compute_gene_heatmap.R $annotated_counts $gene_filter $params.heatmap_id_column cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/nextflow.config b/nextflow.config index 29e9107..5e56a8d 100644 --- a/nextflow.config +++ b/nextflow.config @@ -17,6 +17,7 @@ params { // Pipeline options heatmap_genes_to_filter = null + heatmap_id_column = "SAMPLE_ID" //Normalization options normalization_method = "GEO" diff --git a/nextflow_schema.json b/nextflow_schema.json index 81eda5e..5209a5f 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -49,6 +49,11 @@ "default": "", "fa_icon": "fas fa-exchange-alt", "properties": { + "heatmap_id_column": { + "type": "string", + "default": "SAMPLE_ID", + "description": "The column used for heatmap generation, specifying the rows. The values in this column have to be unique." + }, "heatmap_genes_to_filter": { "type": "string", "description": "Path to yml file (list, one item per line) to specify which genes should be used for the gene-count heatmap." @@ -227,14 +232,12 @@ "type": "boolean", "description": "Display help text.", "fa_icon": "fas fa-question-circle", - "default": false, "hidden": true }, "version": { "type": "boolean", "description": "Display version and exit.", "fa_icon": "fas fa-question-circle", - "default": false, "hidden": true }, "publish_dir_mode": { @@ -258,7 +261,6 @@ "type": "boolean", "description": "Send plain-text email instead of HTML.", "fa_icon": "fas fa-remove-format", - "default": false, "hidden": true }, "max_multiqc_email_size": { @@ -273,7 +275,6 @@ "type": "boolean", "description": "Do not use coloured log outputs.", "fa_icon": "fas fa-palette", - "default": false, "hidden": true }, "hook_url": { @@ -312,7 +313,6 @@ "type": "boolean", "fa_icon": "far fa-eye-slash", "description": "Show all params when using `--help`", - "default": false, "hidden": true, "help_text": "By default, parameters set as _hidden_ in the schema are not shown on the command line when a user runs with `--help`. Specifying this option will tell the pipeline to show all parameters." }, @@ -320,7 +320,6 @@ "type": "boolean", "fa_icon": "far fa-check-circle", "description": "Validation of parameters fails when an unrecognised parameter is found.", - "default": false, "hidden": true, "help_text": "By default, when an unrecognised parameter is found, it returns a warinig." }, @@ -328,7 +327,6 @@ "type": "boolean", "fa_icon": "far fa-check-circle", "description": "Validation of parameters in lenient more.", - "default": false, "hidden": true, "help_text": "Allows string values that are parseable as numbers or booleans. For further information see [JSONSchema docs](https://github.com/everit-org/json-schema#lenient-mode)." }, From 2f9025235885568cd801f648e583eefe6a1c4bf6 Mon Sep 17 00:00:00 2001 From: Christopher Mohr Date: Mon, 21 Aug 2023 09:11:02 +0000 Subject: [PATCH 2/2] use correct PR numbers --- CHANGELOG.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index f472ed5..9e4595a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,7 +7,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### `Added` -- [#49](https://github.com/nf-core/nanostring/pull/49) - Allow users to specify id column for heatmap [#39](https://github.com/nf-core/nanostring/issues/39) +- [#48](https://github.com/nf-core/nanostring/pull/48) - Allow users to specify id column for heatmap [#39](https://github.com/nf-core/nanostring/issues/39) - [#46](https://github.com/nf-core/nanostring/pull/46) - Update to nf-core template `2.9` - [#42](https://github.com/nf-core/nanostring/pull/42) - Allow users to specify normalization method: `GEO` (default) or `GLM` @@ -15,7 +15,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - [#46](https://github.com/nf-core/nanostring/pull/46) - Publish `NACHO` QC reports [#44](https://github.com/nf-core/nanostring/issues/44) - [#47](https://github.com/nf-core/nanostring/pull/47) - Update `NACHO` R package including bug fix [#45](https://github.com/nf-core/nanostring/issues/45) -- [#48](https://github.com/nf-core/nanostring/pull/48) - Set correct `conda` environment for `COMPUTE_GENE_SCORES` process +- [#47](https://github.com/nf-core/nanostring/pull/47) - Set correct `conda` environment for `COMPUTE_GENE_SCORES` process ### `Dependencies`