From b1c74b1f720f2adb0a3c5c43358f4006a9636ba0 Mon Sep 17 00:00:00 2001 From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com> Date: Wed, 6 Nov 2024 16:25:33 +0100 Subject: [PATCH 1/9] add to bwa* workflow --- conf/modules/align_bwa_bwamem2_bwameme.config | 5 +++++ nextflow.config | 1 + nextflow_schema.json | 12 ++++++++++++ subworkflows/local/align.nf | 2 +- .../local/alignment/align_bwa_bwamem2_bwameme.nf | 10 ++++++++++ 5 files changed, 29 insertions(+), 1 deletion(-) diff --git a/conf/modules/align_bwa_bwamem2_bwameme.config b/conf/modules/align_bwa_bwamem2_bwameme.config index f9037d3d..ce28d438 100644 --- a/conf/modules/align_bwa_bwamem2_bwameme.config +++ b/conf/modules/align_bwa_bwamem2_bwameme.config @@ -50,6 +50,11 @@ process { ext.prefix = { "${meta.id}_sorted_merged" } } + withName: '.*ALIGN:ALIGN_BWA_BWAMEM2_BWAMEME:EXTRACT_ALIGNMENTS' { + ext.prefix = { "${meta.id}_sorted_merged_extracted" } + ext.args2 = { params.restrict_to_contigs } + } + withName: '.*ALIGN:ALIGN_BWA_BWAMEM2_BWAMEME:MARKDUPLICATES' { ext.args = "--TMP_DIR ." ext.prefix = { "${meta.id}_sorted_md" } diff --git a/nextflow.config b/nextflow.config index 72717041..6a7ce4a9 100644 --- a/nextflow.config +++ b/nextflow.config @@ -24,6 +24,7 @@ params { analysis_type = 'wgs' bwa_as_fallback = false bait_padding = 100 + extract_alignments = false run_mt_for_wes = false run_rtgvcfeval = false save_mapped_as_cram = false diff --git a/nextflow_schema.json b/nextflow_schema.json index 74d58d05..7191f3e7 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -503,6 +503,13 @@ "help_text": "errorStrategy needs to be set to ignore for the bwamem2 process for the fallback to work. Turned off by default.", "fa_icon": "fas fa-toggle-on" }, + "extract_alignments": { + "type": "boolean", + "default": "false", + "description": "After aligning the reads to a reference, extract alignments from specific regions/contigs and restrict the analysis to those regions/contigs.", + "help_text": "Set this to true, and specify the contig(s) using `restrict_to_contigs` parameter", + "fa_icon": "fas fa-toggle-on" + }, "platform": { "type": "string", "default": "illumina", @@ -516,6 +523,11 @@ "fa_icon": "fas fa-align-center", "enum": ["xy", "hetx", "sry"] }, + "restrict_to_contigs": { + "type": "string", + "description": "Can be specified as RNAME[:STARTPOS[-ENDPOS]]. Multiple regions should be seperated by space", + "fa_icon": "fas fa-align-center" + }, "run_mt_for_wes": { "type": "boolean", "description": "Specifies whether to run mitochondrial analysis for wes samples", diff --git a/subworkflows/local/align.nf b/subworkflows/local/align.nf index 14d545ea..496648f4 100644 --- a/subworkflows/local/align.nf +++ b/subworkflows/local/align.nf @@ -70,7 +70,7 @@ workflow ALIGN { ch_bwamem2_bai = ALIGN_BWA_BWAMEM2_BWAMEME.out.marked_bai ch_versions = ch_versions.mix(ALIGN_BWA_BWAMEM2_BWAMEME.out.versions) } else if (params.aligner.equals("sentieon")) { - ALIGN_SENTIEON ( // Triggered when params.aligner is set as sentieon + ALIGN_SENTIEON ( // Triggered when params.aligner is set as sentieon ch_reads, ch_genome_fasta, ch_genome_fai, diff --git a/subworkflows/local/alignment/align_bwa_bwamem2_bwameme.nf b/subworkflows/local/alignment/align_bwa_bwamem2_bwameme.nf index 15d3db9a..77620e75 100644 --- a/subworkflows/local/alignment/align_bwa_bwamem2_bwameme.nf +++ b/subworkflows/local/alignment/align_bwa_bwamem2_bwameme.nf @@ -7,9 +7,11 @@ include { BWA_MEM as BWAMEM_FALLBACK } from '../../../modules/nf-c include { BWAMEM2_MEM } from '../../../modules/nf-core/bwamem2/mem/main' include { BWAMEME_MEM } from '../../../modules/nf-core/bwameme/mem/main' include { SAMTOOLS_INDEX as SAMTOOLS_INDEX_ALIGN } from '../../../modules/nf-core/samtools/index/main' +include { SAMTOOLS_INDEX as SAMTOOLS_INDEX_EXTRACT } from '../../../modules/nf-core/samtools/index/main' include { SAMTOOLS_INDEX as SAMTOOLS_INDEX_MARKDUP } from '../../../modules/nf-core/samtools/index/main' include { SAMTOOLS_STATS } from '../../../modules/nf-core/samtools/stats/main' include { SAMTOOLS_MERGE } from '../../../modules/nf-core/samtools/merge/main' +include { SAMTOOLS_VIEW as EXTRACT_ALIGNMENTS } from '../../../modules/nf-core/samtools/view/main' include { PICARD_MARKDUPLICATES as MARKDUPLICATES } from '../../../modules/nf-core/picard/markduplicates/main' @@ -82,6 +84,14 @@ workflow ALIGN_BWA_BWAMEM2_BWAMEME { SAMTOOLS_MERGE ( bams.multiple, ch_genome_fasta, ch_genome_fai ) prepared_bam = bams.single.mix(SAMTOOLS_MERGE.out.bam) + // GET ALIGNMENT FROM PRIMARY CONTIGS + if (params.extract_alignments) { + SAMTOOLS_INDEX_EXTRACT ( prepared_bam ) + extract_bam_sorted_indexed = prepared_bam.join(SAMTOOLS_INDEX_EXTRACT.out.bai, failOnMismatch:true, failOnDuplicate:true) + EXTRACT_ALIGNMENTS( extract_bam_sorted_indexed, ch_genome_fasta, []) + prepared_bam = EXTRACT_ALIGNMENTS.out.bam + } + // Marking duplicates MARKDUPLICATES ( prepared_bam , ch_genome_fasta, ch_genome_fai ) SAMTOOLS_INDEX_MARKDUP ( MARKDUPLICATES.out.bam ) From 4ada05bafae3451a152835a1420e77e6be6d6d9f Mon Sep 17 00:00:00 2001 From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com> Date: Fri, 8 Nov 2024 11:16:42 +0100 Subject: [PATCH 2/9] add to sentieon --- conf/modules/align_sentieon.config | 5 +++++ .../local/alignment/align_sentieon.nf | 19 +++++++++++++++---- 2 files changed, 20 insertions(+), 4 deletions(-) diff --git a/conf/modules/align_sentieon.config b/conf/modules/align_sentieon.config index 9ae4aeb7..07074ec1 100644 --- a/conf/modules/align_sentieon.config +++ b/conf/modules/align_sentieon.config @@ -30,6 +30,11 @@ process { ext.prefix = { "${meta.id}_merged.bam" } } + withName: '.*ALIGN:ALIGN_SENTIEON:EXTRACT_ALIGNMENTS' { + ext.prefix = { "${meta.id}_merged_extracted" } + ext.args2 = { params.restrict_to_contigs } + } + withName: '.*ALIGN:ALIGN_SENTIEON:SENTIEON_DEDUP' { ext.args4 = { $params.rmdup ? "--rmdup" : '' } ext.prefix = { "${meta.id}_dedup.bam" } diff --git a/subworkflows/local/alignment/align_sentieon.nf b/subworkflows/local/alignment/align_sentieon.nf index dc5daa60..194f5651 100644 --- a/subworkflows/local/alignment/align_sentieon.nf +++ b/subworkflows/local/alignment/align_sentieon.nf @@ -2,10 +2,13 @@ // A subworkflow to annotate structural variants. // -include { SENTIEON_BWAMEM } from '../../../modules/nf-core/sentieon/bwamem/main' -include { SENTIEON_DATAMETRICS } from '../../../modules/nf-core/sentieon/datametrics/main' -include { SENTIEON_DEDUP } from '../../../modules/nf-core/sentieon/dedup/main' -include { SENTIEON_READWRITER } from '../../../modules/nf-core/sentieon/readwriter/main' +include { SENTIEON_BWAMEM } from '../../../modules/nf-core/sentieon/bwamem/main' +include { SENTIEON_DATAMETRICS } from '../../../modules/nf-core/sentieon/datametrics/main' +include { SENTIEON_DEDUP } from '../../../modules/nf-core/sentieon/dedup/main' +include { SENTIEON_READWRITER } from '../../../modules/nf-core/sentieon/readwriter/main' +include { SAMTOOLS_VIEW as EXTRACT_ALIGNMENTS } from '../../../modules/nf-core/samtools/view/main' +include { SAMTOOLS_INDEX as SAMTOOLS_INDEX_EXTRACT } from '../../../modules/nf-core/samtools/index/main' + workflow ALIGN_SENTIEON { take: ch_reads_input // channel: [mandatory] [ val(meta), path(reads_input) ] @@ -36,6 +39,14 @@ workflow ALIGN_SENTIEON { SENTIEON_READWRITER ( merge_bams_in.multiple, ch_genome_fasta, ch_genome_fai ) ch_bam_bai = merge_bams_in.single.mix(SENTIEON_READWRITER.out.output_index) + // GET ALIGNMENT FROM PRIMARY CONTIGS + if (params.extract_alignments) { + EXTRACT_ALIGNMENTS( ch_bam_bai, ch_genome_fasta, []) + ch_bam_bai = EXTRACT_ALIGNMENTS.out.bam + SAMTOOLS_INDEX_EXTRACT ( EXTRACT_ALIGNMENTS.out.bam ) + ch_bam_bai = EXTRACT_ALIGNMENTS.out.bam.join(SAMTOOLS_INDEX_EXTRACT.out.bai, failOnMismatch:true, failOnDuplicate:true) + } + SENTIEON_DATAMETRICS ( ch_bam_bai, ch_genome_fasta, ch_genome_fai, false ) SENTIEON_DEDUP ( ch_bam_bai, ch_genome_fasta, ch_genome_fai ) From 019668f369033adaa35485dbcd200beadd887a33 Mon Sep 17 00:00:00 2001 From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com> Date: Fri, 8 Nov 2024 11:22:50 +0100 Subject: [PATCH 3/9] update changelog --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index c74b3258..da2d2e58 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,6 +8,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### `Added` - A new analysis option `mito` to call and annotate only mitochondrial variants [#608](https://github.com/nf-core/raredisease/pull/608) +- An option to restrict analysis to specific contigs [#644](https://github.com/nf-core/raredisease/pull/644) ### `Changed` From 1ff269b290dc3de23963767fb8f03ec43ad117b4 Mon Sep 17 00:00:00 2001 From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com> Date: Fri, 8 Nov 2024 11:25:35 +0100 Subject: [PATCH 4/9] add parameter to changelog --- CHANGELOG.md | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index da2d2e58..206c4b37 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -29,6 +29,11 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Parameters +| Old parameter | New parameter | +| ------------- | ------------------- | +| | extract_alignments | +| | restrict_to_contigs | + ### Tool updates | Tool | Old version | New version | From d7dd85214732487c1a312da3003578fa2c16d052 Mon Sep 17 00:00:00 2001 From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com> Date: Fri, 8 Nov 2024 11:45:46 +0100 Subject: [PATCH 5/9] fix lint --- nextflow.config | 1 + 1 file changed, 1 insertion(+) diff --git a/nextflow.config b/nextflow.config index 6a7ce4a9..f121d191 100644 --- a/nextflow.config +++ b/nextflow.config @@ -25,6 +25,7 @@ params { bwa_as_fallback = false bait_padding = 100 extract_alignments = false + restrict_to_contigs = null run_mt_for_wes = false run_rtgvcfeval = false save_mapped_as_cram = false From 22dc3518da2ecbbdca570d7e3f42249a172d5df3 Mon Sep 17 00:00:00 2001 From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com> Date: Mon, 11 Nov 2024 13:55:28 +0100 Subject: [PATCH 6/9] update usage --- docs/usage.md | 58 +++++++++++++++++++++++++++------------------------ 1 file changed, 31 insertions(+), 27 deletions(-) diff --git a/docs/usage.md b/docs/usage.md index bb2d3b99..7df705cc 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -10,24 +10,24 @@ Table of contents: - [Run nf-core/raredisease with test data](#run-nf-coreraredisease-with-test-data) - [Updating the pipeline](#updating-the-pipeline) - [Run nf-core/raredisease with your data](#run-nf-coreraredisease-with-your-data) - - [Samplesheet](#samplesheet) - - [Reference files and parameters](#reference-files-and-parameters) - - [1. Alignment](#1-alignment) - - [2. QC stats from the alignment files](#2-qc-stats-from-the-alignment-files) - - [3. Repeat expansions](#3-repeat-expansions) - - [4. Variant calling - SNV](#4-variant-calling---snv) - - [5. Variant calling - Structural variants](#5-variant-calling---structural-variants) - - [6. Copy number variant calling](#6-copy-number-variant-calling) - - [7. SNV annotation \& Ranking](#7-snv-annotation--ranking) - - [8. SV annotation \& Ranking](#8-sv-annotation--ranking) - - [9. Mitochondrial annotation](#9-mitochondrial-annotation) - - [10. Mobile element calling](#10-mobile-element-calling) - - [11. Mobile element annotation](#11-mobile-element-annotation) - - [12. Variant evaluation](#12-variant-evaluation) - - [13. Prepare data for CNV visualisation in Gens](#13-prepare-data-for-cnv-visualisation-in-gens) - - [Run the pipeline](#run-the-pipeline) - - [Direct input in CLI](#direct-input-in-cli) - - [Import from a config file (recommended)](#import-from-a-config-file-recommended) + - [Samplesheet](#samplesheet) + - [Reference files and parameters](#reference-files-and-parameters) + - [1. Alignment](#1-alignment) + - [2. QC stats from the alignment files](#2-qc-stats-from-the-alignment-files) + - [3. Repeat expansions](#3-repeat-expansions) + - [4. Variant calling - SNV](#4-variant-calling---snv) + - [5. Variant calling - Structural variants](#5-variant-calling---structural-variants) + - [6. Copy number variant calling](#6-copy-number-variant-calling) + - [7. SNV annotation \& Ranking](#7-snv-annotation--ranking) + - [8. SV annotation \& Ranking](#8-sv-annotation--ranking) + - [9. Mitochondrial annotation](#9-mitochondrial-annotation) + - [10. Mobile element calling](#10-mobile-element-calling) + - [11. Mobile element annotation](#11-mobile-element-annotation) + - [12. Variant evaluation](#12-variant-evaluation) + - [13. Prepare data for CNV visualisation in Gens](#13-prepare-data-for-cnv-visualisation-in-gens) + - [Run the pipeline](#run-the-pipeline) + - [Direct input in CLI](#direct-input-in-cli) + - [Import from a config file (recommended)](#import-from-a-config-file-recommended) - [Best practices](#best-practices) - [Core Nextflow arguments](#core-nextflow-arguments) - [`-profile`](#-profile) @@ -168,15 +168,17 @@ The mandatory and optional parameters for each category are tabulated below. ##### 1. Alignment -| Mandatory | Optional | -| ------------------------------ | ------------------------------ | -| aligner1 | fasta_fai4 | -| fasta2 | bwamem24 | -| platform | bwa4 | -| mito_name/mt_fasta3 | bwameme4 | -| | known_dbsnp5 | -| | known_dbsnp_tbi5 | -| | min_trimmed_length6 | +| Mandatory | Optional | +| ------------------------------ | ------------------------------- | +| aligner1 | fasta_fai4 | +| fasta2 | bwamem24 | +| platform | bwa4 | +| mito_name/mt_fasta3 | bwameme4 | +| | known_dbsnp5 | +| | known_dbsnp_tbi5 | +| | min_trimmed_length6 | +| | extract_alignments | +| | restrict_to_contigs7 | 1Default value is bwamem2. Other alternatives are bwa, bwameme and sentieon (requires valid Sentieon license ).
2Analysis set reference genome in fasta format, first 25 contigs need to be chromosome 1-22, X, Y and the mitochondria.
@@ -184,6 +186,8 @@ The mandatory and optional parameters for each category are tabulated below. 4fasta_fai, bwa, bwamem2 and bwameme, if not provided by the user, will be generated by the pipeline when necessary.
5Used only by Sentieon.
6Default value is 40. Used only by fastp.
+7Used to limit your analysis to specific contigs. Can be used to remove alignments to unplaced contigs to minimize potential errors. This parameter should be used in conjuction with `extract_alignments` parameter.
+ ##### 2. QC stats from the alignment files From d2b1600560fa4a99c32cd4ddcd34126baf19f72b Mon Sep 17 00:00:00 2001 From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com> Date: Mon, 11 Nov 2024 14:40:26 +0100 Subject: [PATCH 7/9] fix lint --- docs/usage.md | 37 ++++++++++++++++++------------------- 1 file changed, 18 insertions(+), 19 deletions(-) diff --git a/docs/usage.md b/docs/usage.md index 7df705cc..028a064d 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -10,24 +10,24 @@ Table of contents: - [Run nf-core/raredisease with test data](#run-nf-coreraredisease-with-test-data) - [Updating the pipeline](#updating-the-pipeline) - [Run nf-core/raredisease with your data](#run-nf-coreraredisease-with-your-data) - - [Samplesheet](#samplesheet) - - [Reference files and parameters](#reference-files-and-parameters) - - [1. Alignment](#1-alignment) - - [2. QC stats from the alignment files](#2-qc-stats-from-the-alignment-files) - - [3. Repeat expansions](#3-repeat-expansions) - - [4. Variant calling - SNV](#4-variant-calling---snv) - - [5. Variant calling - Structural variants](#5-variant-calling---structural-variants) - - [6. Copy number variant calling](#6-copy-number-variant-calling) - - [7. SNV annotation \& Ranking](#7-snv-annotation--ranking) - - [8. SV annotation \& Ranking](#8-sv-annotation--ranking) - - [9. Mitochondrial annotation](#9-mitochondrial-annotation) - - [10. Mobile element calling](#10-mobile-element-calling) - - [11. Mobile element annotation](#11-mobile-element-annotation) - - [12. Variant evaluation](#12-variant-evaluation) - - [13. Prepare data for CNV visualisation in Gens](#13-prepare-data-for-cnv-visualisation-in-gens) - - [Run the pipeline](#run-the-pipeline) - - [Direct input in CLI](#direct-input-in-cli) - - [Import from a config file (recommended)](#import-from-a-config-file-recommended) + - [Samplesheet](#samplesheet) + - [Reference files and parameters](#reference-files-and-parameters) + - [1. Alignment](#1-alignment) + - [2. QC stats from the alignment files](#2-qc-stats-from-the-alignment-files) + - [3. Repeat expansions](#3-repeat-expansions) + - [4. Variant calling - SNV](#4-variant-calling---snv) + - [5. Variant calling - Structural variants](#5-variant-calling---structural-variants) + - [6. Copy number variant calling](#6-copy-number-variant-calling) + - [7. SNV annotation \& Ranking](#7-snv-annotation--ranking) + - [8. SV annotation \& Ranking](#8-sv-annotation--ranking) + - [9. Mitochondrial annotation](#9-mitochondrial-annotation) + - [10. Mobile element calling](#10-mobile-element-calling) + - [11. Mobile element annotation](#11-mobile-element-annotation) + - [12. Variant evaluation](#12-variant-evaluation) + - [13. Prepare data for CNV visualisation in Gens](#13-prepare-data-for-cnv-visualisation-in-gens) + - [Run the pipeline](#run-the-pipeline) + - [Direct input in CLI](#direct-input-in-cli) + - [Import from a config file (recommended)](#import-from-a-config-file-recommended) - [Best practices](#best-practices) - [Core Nextflow arguments](#core-nextflow-arguments) - [`-profile`](#-profile) @@ -188,7 +188,6 @@ The mandatory and optional parameters for each category are tabulated below. 6Default value is 40. Used only by fastp.
7Used to limit your analysis to specific contigs. Can be used to remove alignments to unplaced contigs to minimize potential errors. This parameter should be used in conjuction with `extract_alignments` parameter.
- ##### 2. QC stats from the alignment files | Mandatory | Optional | From bc875688994592aff94ebbc81d77cb8b4b704870 Mon Sep 17 00:00:00 2001 From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com> Date: Mon, 11 Nov 2024 22:00:38 +0100 Subject: [PATCH 8/9] Update subworkflows/local/alignment/align_bwa_bwamem2_bwameme.nf Co-authored-by: Anders Jemt --- subworkflows/local/alignment/align_bwa_bwamem2_bwameme.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/subworkflows/local/alignment/align_bwa_bwamem2_bwameme.nf b/subworkflows/local/alignment/align_bwa_bwamem2_bwameme.nf index 77620e75..90af20f2 100644 --- a/subworkflows/local/alignment/align_bwa_bwamem2_bwameme.nf +++ b/subworkflows/local/alignment/align_bwa_bwamem2_bwameme.nf @@ -84,7 +84,7 @@ workflow ALIGN_BWA_BWAMEM2_BWAMEME { SAMTOOLS_MERGE ( bams.multiple, ch_genome_fasta, ch_genome_fai ) prepared_bam = bams.single.mix(SAMTOOLS_MERGE.out.bam) - // GET ALIGNMENT FROM PRIMARY CONTIGS + // GET ALIGNMENT FROM SELECTED CONTIGS if (params.extract_alignments) { SAMTOOLS_INDEX_EXTRACT ( prepared_bam ) extract_bam_sorted_indexed = prepared_bam.join(SAMTOOLS_INDEX_EXTRACT.out.bai, failOnMismatch:true, failOnDuplicate:true) From c98d0637e148d56d036dd876b088b08f1e8fcfcd Mon Sep 17 00:00:00 2001 From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com> Date: Mon, 11 Nov 2024 22:00:44 +0100 Subject: [PATCH 9/9] Update subworkflows/local/alignment/align_sentieon.nf Co-authored-by: Anders Jemt --- subworkflows/local/alignment/align_sentieon.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/subworkflows/local/alignment/align_sentieon.nf b/subworkflows/local/alignment/align_sentieon.nf index 194f5651..5b0cb708 100644 --- a/subworkflows/local/alignment/align_sentieon.nf +++ b/subworkflows/local/alignment/align_sentieon.nf @@ -39,7 +39,7 @@ workflow ALIGN_SENTIEON { SENTIEON_READWRITER ( merge_bams_in.multiple, ch_genome_fasta, ch_genome_fai ) ch_bam_bai = merge_bams_in.single.mix(SENTIEON_READWRITER.out.output_index) - // GET ALIGNMENT FROM PRIMARY CONTIGS + // GET ALIGNMENT FROM SELECTED CONTIGS if (params.extract_alignments) { EXTRACT_ALIGNMENTS( ch_bam_bai, ch_genome_fasta, []) ch_bam_bai = EXTRACT_ALIGNMENTS.out.bam