From 3abdb850836d56b80f4a2f4d71b998a7d438c7f2 Mon Sep 17 00:00:00 2001 From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com> Date: Wed, 23 Aug 2023 22:13:48 +0200 Subject: [PATCH 01/22] align --- subworkflows/local/align.nf | 93 ++++++++++++++----- subworkflows/local/alignment/align_MT.nf | 56 +++++++++++ subworkflows/local/convert_mt_bam_to_fastq.nf | 36 +++++++ workflows/raredisease.nf | 30 +++--- 4 files changed, 182 insertions(+), 33 deletions(-) create mode 100644 subworkflows/local/alignment/align_MT.nf create mode 100644 subworkflows/local/convert_mt_bam_to_fastq.nf diff --git a/subworkflows/local/align.nf b/subworkflows/local/align.nf index 3455c3b9..f08ae7df 100644 --- a/subworkflows/local/align.nf +++ b/subworkflows/local/align.nf @@ -2,49 +2,100 @@ // Map to reference // -include { ALIGN_BWAMEM2 } from './alignment/align_bwamem2' -include { ALIGN_SENTIEON } from './alignment/align_sentieon' -include { SAMTOOLS_VIEW } from '../../modules/nf-core/samtools/view/main' +include { ALIGN_BWAMEM2 } from './alignment/align_bwamem2' +include { ALIGN_SENTIEON } from './alignment/align_sentieon' +include { SAMTOOLS_VIEW } from '../../modules/nf-core/samtools/view/main' +include { ALIGN_MT } from './alignment/align_MT' +include { ALIGN_MT as ALIGN_MT_SHIFT } from './alignment/align_MT' +include { CONVERT_MT_BAM_TO_FASTQ } from './convert_mt_bam_to_fastq' workflow ALIGN { take: - ch_reads_input // channel: [mandatory] [ val(meta), [path(reads)] ] - ch_genome_fasta // channel: [mandatory] [ val(meta), path(fasta) ] - ch_genome_fai // channel: [mandatory] [ val(meta), path(fai) ] - ch_bwa_index // channel: [mandatory] [ val(meta), path(index) ] - ch_bwamem2_index // channel: [mandatory] [ val(meta), path(index) ] - val_platform // string: [mandatory] illumina or a different technology + ch_reads // channel: [mandatory] [ val(meta), [path(reads)] ] + ch_genome_fasta // channel: [mandatory] [ val(meta), path(fasta) ] + ch_genome_fai // channel: [mandatory] [ val(meta), path(fai) ] + ch_genome_bwaindex // channel: [mandatory] [ val(meta), path(index) ] + ch_genome_bwamem2index // channel: [mandatory] [ val(meta), path(index) ] + ch_genome_dictionary // channel: [mandatory] [ val(meta), path(dict) ] + ch_mtshift_bwaindex // channel: [mandatory] [ val(meta), path(index) ] + ch_mtshift_bwamem2index // channel: [mandatory] [ val(meta), path(index) ] + ch_mtshift_fasta // channel: [mandatory] [ val(meta), path(fasta) ] + ch_mtshift_dictionary // channel: [mandatory] [ val(meta), path(dict) ] + ch_mtshift_fai // channel: [mandatory] [ val(meta), path(fai) ] + val_platform // string: [mandatory] illumina or a different technology main: ch_versions = Channel.empty() ALIGN_BWAMEM2 ( // Triggered when params.aligner is set as bwamem2 - ch_reads_input, - ch_bwamem2_index, + ch_reads, + ch_genome_bwamem2index, ch_genome_fasta, ch_genome_fai, val_platform ) ALIGN_SENTIEON ( // Triggered when params.aligner is set as sentieon - ch_reads_input, + ch_reads, ch_genome_fasta, ch_genome_fai, - ch_bwa_index, + ch_genome_bwaindex, val_platform ) - ch_marked_bam = Channel.empty().mix(ALIGN_BWAMEM2.out.marked_bam, ALIGN_SENTIEON.out.marked_bam) - ch_marked_bai = Channel.empty().mix(ALIGN_BWAMEM2.out.marked_bai, ALIGN_SENTIEON.out.marked_bai) - ch_bam_bai = ch_marked_bam.join(ch_marked_bai, failOnMismatch:true, failOnDuplicate:true) + ch_genome_marked_bam = Channel.empty().mix(ALIGN_BWAMEM2.out.marked_bam, ALIGN_SENTIEON.out.marked_bam) + ch_genome_marked_bai = Channel.empty().mix(ALIGN_BWAMEM2.out.marked_bai, ALIGN_SENTIEON.out.marked_bai) + ch_genome_bam_bai = ch_genome_marked_bam.join(ch_genome_marked_bai, failOnMismatch:true, failOnDuplicate:true) - SAMTOOLS_VIEW( ch_bam_bai, ch_genome_fasta, [] ) + // PREPARING READS FOR MT ALIGNMENT + CONVERT_MT_BAM_TO_FASTQ ( + ch_genome_bam_bai, + ch_genome_fasta, + ch_genome_fai, + ch_genome_dictionary + ) + + ALIGN_MT ( + CONVERT_MT_BAM_TO_FASTQ.out.fastq, + CONVERT_MT_BAM_TO_FASTQ.out.bam, + ch_genome_bwaindex, + ch_genome_bwamem2index, + ch_genome_fasta, + ch_genome_dictionary, + ch_genome_fai + ) + + ALIGN_MT_SHIFT ( + CONVERT_MT_BAM_TO_FASTQ.out.fastq, + CONVERT_MT_BAM_TO_FASTQ.out.bam, + ch_mtshift_bwaindex, + ch_mtshift_bwamem2index, + ch_mtshift_fasta, + ch_mtshift_dictionary, + ch_mtshift_fai + ) + + ch_mt_marked_bam = Channel.empty().mix(ALIGN_MT.out.marked_bam, ALIGN_MT.out.marked_bam) + ch_mt_marked_bai = Channel.empty().mix(ALIGN_MT.out.marked_bai, ALIGN_MT.out.marked_bai) + ch_mt_bam_bai = ch_mt_marked_bam.join(ch_mt_marked_bai, failOnMismatch:true, failOnDuplicate:true) + + ch_mtshift_marked_bam = Channel.empty().mix(ALIGN_MT_SHIFT.out.marked_bam, ALIGN_MT_SHIFT.out.marked_bam) + ch_mtshift_marked_bai = Channel.empty().mix(ALIGN_MT_SHIFT.out.marked_bai, ALIGN_MT_SHIFT.out.marked_bai) + ch_mtshift_bam_bai = ch_mtshift_marked_bam.join(ch_mtshift_marked_bai, failOnMismatch:true, failOnDuplicate:true) + + SAMTOOLS_VIEW( ch_genome_bam_bai, ch_genome_fasta, [] ) ch_versions = Channel.empty().mix(ALIGN_BWAMEM2.out.versions, ALIGN_SENTIEON.out.versions) emit: - marked_bam = ch_marked_bam // channel: [ val(meta), path(bam) ] - marked_bai = ch_marked_bai // channel: [ val(meta), path(bai) ] - bam_bai = ch_bam_bai // channel: [ val(meta), path(bam), path(bai) ] - versions = ch_versions // channel: [ path(versions.yml) ] + genome_marked_bam = ch_genome_marked_bam // channel: [ val(meta), path(bam) ] + genome_marked_bai = ch_genome_marked_bai // channel: [ val(meta), path(bai) ] + genome_bam_bai = ch_genome_bam_bai // channel: [ val(meta), path(bam), path(bai) ] + mt_marked_bam = ch_mt_marked_bam // channel: [ val(meta), path(bam) ] + mt_marked_bai = ch_mt_marked_bai // channel: [ val(meta), path(bai) ] + mt_bam_bai = ch_mt_bam_bai // channel: [ val(meta), path(bam), path(bai) ] + mtshift_marked_bam = ch_mtshift_marked_bam // channel: [ val(meta), path(bam) ] + mtshift_marked_bai = ch_mtshift_marked_bai // channel: [ val(meta), path(bai) ] + mtshift_bam_bai = ch_mtshift_bam_bai // channel: [ val(meta), path(bam), path(bai) ] + versions = ch_versions // channel: [ path(versions.yml) ] } diff --git a/subworkflows/local/alignment/align_MT.nf b/subworkflows/local/alignment/align_MT.nf new file mode 100644 index 00000000..5ae8ace7 --- /dev/null +++ b/subworkflows/local/alignment/align_MT.nf @@ -0,0 +1,56 @@ +// +// Align and call MT +// + +include { SENTIEON_BWAMEM as SENTIEON_BWAMEM_MT } from '../../../modules/nf-core/sentieon/bwamem/main' +include { BWAMEM2_MEM as BWAMEM2_MEM_MT } from '../../../modules/nf-core/bwamem2/mem/main' +include { GATK4_MERGEBAMALIGNMENT as GATK4_MERGEBAMALIGNMENT_MT } from '../../../modules/nf-core/gatk4/mergebamalignment/main' +include { PICARD_ADDORREPLACEREADGROUPS as PICARD_ADDORREPLACEREADGROUPS_MT } from '../../../modules/nf-core/picard/addorreplacereadgroups/main' +include { PICARD_MARKDUPLICATES as PICARD_MARKDUPLICATES_MT } from '../../../modules/nf-core/picard/markduplicates/main' +include { SAMTOOLS_INDEX as SAMTOOLS_INDEX_MT } from '../../../modules/nf-core/samtools/index/main' +include { SAMTOOLS_SORT as SAMTOOLS_SORT_MT } from '../../../modules/nf-core/samtools/sort/main' + +workflow ALIGN_MT { + take: + ch_fastq // channel: [mandatory] [ val(meta), [ path(reads) ] ] + ch_ubam // channel: [mandatory] [ val(meta), path(bam) ] + ch_bwaindex // channel: [mandatory for sentieon] [ val(meta), path(index) ] + ch_bwamem2index // channel: [mandatory for bwamem2] [ val(meta), path(index) ] + ch_fasta // channel: [mandatory] [ val(meta), path(fasta) ] + ch_dict // channel: [mandatory] [ val(meta), path(dict) ] + ch_fai // channel: [mandatory] [ val(meta), path(fai) ] + + main: + ch_versions = Channel.empty() + + BWAMEM2_MEM_MT (ch_fastq, ch_bwamem2index, true) + + SENTIEON_BWAMEM_MT ( ch_fastq, ch_bwaindex, ch_fasta, ch_fai ) + + Channel.empty() + .mix(BWAMEM2_MEM_MT.out.bam, SENTIEON_BWAMEM_MT.out.bam_and_bai.map{ meta, bam, bai -> [meta, bam] }) + .join(ch_ubam, failOnMismatch:true, failOnDuplicate:true) + .set {ch_bam_ubam} + + GATK4_MERGEBAMALIGNMENT_MT (ch_bam_ubam, ch_fasta, ch_dict) + + PICARD_ADDORREPLACEREADGROUPS_MT (GATK4_MERGEBAMALIGNMENT_MT.out.bam) + + PICARD_MARKDUPLICATES_MT (PICARD_ADDORREPLACEREADGROUPS_MT.out.bam, ch_fasta, ch_fai) + + SAMTOOLS_SORT_MT (PICARD_MARKDUPLICATES_MT.out.bam) + + SAMTOOLS_INDEX_MT(SAMTOOLS_SORT_MT.out.bam) + + ch_versions = ch_versions.mix(BWAMEM2_MEM_MT.out.versions.first()) + ch_versions = ch_versions.mix(GATK4_MERGEBAMALIGNMENT_MT.out.versions.first()) + ch_versions = ch_versions.mix(PICARD_ADDORREPLACEREADGROUPS_MT.out.versions.first()) + ch_versions = ch_versions.mix(PICARD_MARKDUPLICATES_MT.out.versions.first()) + ch_versions = ch_versions.mix(SAMTOOLS_SORT_MT.out.versions.first()) + ch_versions = ch_versions.mix(SAMTOOLS_INDEX_MT.out.versions.first()) + + emit: + marked_bam = SAMTOOLS_SORT_MT.out.bam // channel: [ val(meta), path(bam) ] + marked_bai = SAMTOOLS_INDEX_MT.out.bai // channel: [ val(meta), path(bai) ] + versions = ch_versions // channel: [ path(versions.yml) ] +} diff --git a/subworkflows/local/convert_mt_bam_to_fastq.nf b/subworkflows/local/convert_mt_bam_to_fastq.nf new file mode 100644 index 00000000..ca19ea82 --- /dev/null +++ b/subworkflows/local/convert_mt_bam_to_fastq.nf @@ -0,0 +1,36 @@ +// +// Prepare bam files for MT allignment +// + +include { GATK4_PRINTREADS as GATK4_PRINTREADS_MT } from '../../modules/nf-core/gatk4/printreads/main' +include { GATK4_REVERTSAM as GATK4_REVERTSAM_MT } from '../../modules/nf-core/gatk4/revertsam/main' +include { GATK4_SAMTOFASTQ as GATK4_SAMTOFASTQ_MT } from '../../modules/nf-core/gatk4/samtofastq/main' + +workflow CONVERT_MT_BAM_TO_FASTQ { + take: + ch_bam_bai // channel: [mandatory] [ val(meta), path(bam), path(bai) ] + ch_genome_fasta // channel: [mandatory] [ val(meta), path(fasta) ] + ch_genome_fai // channel: [mandatory] [ val(meta), path(fai) ] + ch_genome_dict // channel: [mandatory] [ val(meta), path(dict) ] + + main: + ch_versions = Channel.empty() + + // Outputs bam containing only MT + GATK4_PRINTREADS_MT ( ch_bam_bai, ch_genome_fasta, ch_genome_fai, ch_genome_dict ) + + // Removes alignment information + GATK4_REVERTSAM_MT ( GATK4_PRINTREADS_MT.out.bam ) + + // Outputs fastq files + GATK4_SAMTOFASTQ_MT ( GATK4_REVERTSAM_MT.out.bam ) + + ch_versions = ch_versions.mix(GATK4_PRINTREADS_MT.out.versions.first()) + ch_versions = ch_versions.mix(GATK4_REVERTSAM_MT.out.versions.first()) + ch_versions = ch_versions.mix(GATK4_SAMTOFASTQ_MT.out.versions.first()) + + emit: + fastq = GATK4_SAMTOFASTQ_MT.out.fastq // channel: [ val(meta), [ path(fastq) ] ] + bam = GATK4_REVERTSAM_MT.out.bam // channel: [ val(meta), path(bam) ] + versions = ch_versions // channel: [ path(versions.yml) ] +} diff --git a/workflows/raredisease.nf b/workflows/raredisease.nf index a2966fe7..ce35c6bb 100644 --- a/workflows/raredisease.nf +++ b/workflows/raredisease.nf @@ -296,6 +296,12 @@ workflow RAREDISEASE { ch_genome_fai, ch_genome_bwaindex, ch_genome_bwamem2index, + ch_genome_dictionary, + ch_mtshift_bwaindex, + ch_mtshift_bwamem2index, + ch_mtshift_fasta, + ch_mtshift_dictionary, + ch_mtshift_fai, params.platform ) .set { ch_mapped } @@ -303,9 +309,9 @@ workflow RAREDISEASE { // BAM QUALITY CHECK QC_BAM ( - ch_mapped.marked_bam, - ch_mapped.marked_bai, - ch_mapped.bam_bai, + ch_mapped.genome_marked_bam, + ch_mapped.genome_marked_bai, + ch_mapped.genome_bam_bai, ch_genome_fasta, ch_genome_fai, ch_bait_intervals, @@ -318,7 +324,7 @@ workflow RAREDISEASE { // EXPANSIONHUNTER AND STRANGER CALL_REPEAT_EXPANSIONS ( - ch_mapped.bam_bai, + ch_mapped.genome_bam_bai, ch_variant_catalog, ch_case_info, ch_genome_fasta, @@ -327,12 +333,12 @@ workflow RAREDISEASE { ch_versions = ch_versions.mix(CALL_REPEAT_EXPANSIONS.out.versions) // STEP 1.7: SMNCOPYNUMBERCALLER - ch_mapped.bam_bai + ch_mapped.genome_bam_bai .collect{it[1]} .toList() .set { ch_bam_list } - ch_mapped.bam_bai + ch_mapped.genome_bam_bai .collect{it[2]} .toList() .set { ch_bai_list } @@ -349,7 +355,7 @@ workflow RAREDISEASE { // STEP 2: VARIANT CALLING CALL_SNV ( - ch_mapped.bam_bai, + ch_mapped.genome_bam_bai, ch_genome_fasta, ch_genome_fai, ch_dbsnp, @@ -361,9 +367,9 @@ workflow RAREDISEASE { ch_versions = ch_versions.mix(CALL_SNV.out.versions) CALL_STRUCTURAL_VARIANTS ( - ch_mapped.marked_bam, - ch_mapped.marked_bai, - ch_mapped.bam_bai, + ch_mapped.genome_marked_bam, + ch_mapped.genome_marked_bai, + ch_mapped.genome_bam_bai, ch_genome_bwaindex, ch_genome_fasta, ch_genome_fai, @@ -387,7 +393,7 @@ workflow RAREDISEASE { // GENS if (params.gens_switch) { GENS ( - ch_mapped.bam_bai, + ch_mapped.genome_bam_bai, CALL_SNV.out.vcf, ch_genome_fasta, ch_genome_fai, @@ -439,7 +445,7 @@ workflow RAREDISEASE { if (!params.skip_mt_analysis) { ANALYSE_MT ( - ch_mapped.bam_bai, + ch_mapped.genome_bam_bai, ch_cadd_header, ch_cadd_resources, ch_genome_bwaindex, From 1bdbbf68f4bbe23af14ab56e574c7530f1dd59c3 Mon Sep 17 00:00:00 2001 From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com> Date: Wed, 23 Aug 2023 22:17:05 +0200 Subject: [PATCH 02/22] comment --- subworkflows/local/alignment/align_MT.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/subworkflows/local/alignment/align_MT.nf b/subworkflows/local/alignment/align_MT.nf index 5ae8ace7..17ecc362 100644 --- a/subworkflows/local/alignment/align_MT.nf +++ b/subworkflows/local/alignment/align_MT.nf @@ -1,5 +1,5 @@ // -// Align and call MT +// Align MT // include { SENTIEON_BWAMEM as SENTIEON_BWAMEM_MT } from '../../../modules/nf-core/sentieon/bwamem/main' From 698e067bdbb2e28d8ee72973a342fcb65a0dd5c8 Mon Sep 17 00:00:00 2001 From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com> Date: Thu, 24 Aug 2023 00:37:05 +0200 Subject: [PATCH 03/22] update versions --- subworkflows/local/align.nf | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/subworkflows/local/align.nf b/subworkflows/local/align.nf index f08ae7df..a705f16e 100644 --- a/subworkflows/local/align.nf +++ b/subworkflows/local/align.nf @@ -75,17 +75,20 @@ workflow ALIGN { ch_mtshift_fai ) - ch_mt_marked_bam = Channel.empty().mix(ALIGN_MT.out.marked_bam, ALIGN_MT.out.marked_bam) - ch_mt_marked_bai = Channel.empty().mix(ALIGN_MT.out.marked_bai, ALIGN_MT.out.marked_bai) + ch_mt_marked_bam = ALIGN_MT.out.marked_bam + ch_mt_marked_bai = ALIGN_MT.out.marked_bai ch_mt_bam_bai = ch_mt_marked_bam.join(ch_mt_marked_bai, failOnMismatch:true, failOnDuplicate:true) - ch_mtshift_marked_bam = Channel.empty().mix(ALIGN_MT_SHIFT.out.marked_bam, ALIGN_MT_SHIFT.out.marked_bam) - ch_mtshift_marked_bai = Channel.empty().mix(ALIGN_MT_SHIFT.out.marked_bai, ALIGN_MT_SHIFT.out.marked_bai) + ch_mtshift_marked_bam = ALIGN_MT_SHIFT.out.marked_bam + ch_mtshift_marked_bai = ALIGN_MT_SHIFT.out.marked_bai ch_mtshift_bam_bai = ch_mtshift_marked_bam.join(ch_mtshift_marked_bai, failOnMismatch:true, failOnDuplicate:true) SAMTOOLS_VIEW( ch_genome_bam_bai, ch_genome_fasta, [] ) - ch_versions = Channel.empty().mix(ALIGN_BWAMEM2.out.versions, ALIGN_SENTIEON.out.versions) + ch_versions = Channel.empty().mix(ALIGN_BWAMEM2.out.versions, + ALIGN_SENTIEON.out.versions, + ALIGN_MT.out.versions, + ALIGN_MT_SHIFT.out.versions) emit: genome_marked_bam = ch_genome_marked_bam // channel: [ val(meta), path(bam) ] From 97796fa2cf2af16d314bf84f0b4f6c988970c9d8 Mon Sep 17 00:00:00 2001 From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com> Date: Thu, 24 Aug 2023 00:38:15 +0200 Subject: [PATCH 04/22] mt snv calling --- subworkflows/local/call_snv.nf | 84 ++++++++++++++----- .../local/variant_calling/call_snv_MT.nf | 50 +++++++++++ workflows/raredisease.nf | 13 ++- 3 files changed, 124 insertions(+), 23 deletions(-) create mode 100644 subworkflows/local/variant_calling/call_snv_MT.nf diff --git a/subworkflows/local/call_snv.nf b/subworkflows/local/call_snv.nf index 7b63825e..75be4080 100644 --- a/subworkflows/local/call_snv.nf +++ b/subworkflows/local/call_snv.nf @@ -2,21 +2,31 @@ // call Single-nucleotide Varinats // -include { CALL_SNV_DEEPVARIANT } from './variant_calling/call_snv_deepvariant' -include { CALL_SNV_SENTIEON } from './variant_calling/call_snv_sentieon' -include { GATK4_SELECTVARIANTS } from '../../modules/nf-core/gatk4/selectvariants/main' - +include { CALL_SNV_DEEPVARIANT } from './variant_calling/call_snv_deepvariant' +include { CALL_SNV_SENTIEON } from './variant_calling/call_snv_sentieon' +include { CALL_SNV_MT } from './variant_calling/call_snv_MT' +include { CALL_SNV_MT as CALL_SNV_MT_SHIFT } from './variant_calling/call_snv_MT' +include { PICARD_LIFTOVERVCF } from '../../modules/nf-core/picard/liftovervcf/main' workflow CALL_SNV { take: - ch_bam_bai // channel: [mandatory] [ val(meta), path(bam), path(bai) ] - ch_genome_fasta // channel: [mandatory] [ val(meta), path(fasta) ] - ch_genome_fai // channel: [mandatory] [ val(meta), path(fai) ] - ch_known_dbsnp // channel: [optional] [ val(meta), path(vcf) ] - ch_known_dbsnp_tbi // channel: [optional] [ val(meta), path(tbi) ] - ch_call_interval // channel: [mandatory] [ path(intervals) ] - ch_ml_model // channel: [mandatory] [ path(model) ] - ch_case_info // channel: [mandatory] [ val(case_info) ] + ch_genome_bam_bai // channel: [mandatory] [ val(meta), path(bam), path(bai) ] + ch_mt_bam_bai // channel: [mandatory] [ val(meta), path(bam), path(bai) ] + ch_mtshift_bam_bai // channel: [mandatory] [ val(meta), path(bam), path(bai) ] + ch_genome_fasta // channel: [mandatory] [ val(meta), path(fasta) ] + ch_genome_fai // channel: [mandatory] [ val(meta), path(fai) ] + ch_genome_dictionary // channel: [mandatory] [ val(meta), path(dict) ] + ch_mt_intervals // channel: [optional] [ path(interval_list) ] + ch_mtshift_fasta // channel: [optional] [ val(meta), path(fasta) ] + ch_mtshift_fai // channel: [optional] [ val(meta), path(fai) ] + ch_mtshift_dictionary // channel: [optional] [ val(meta), path(dict) ] + ch_mtshift_intervals // channel: [optional] [ path(interval_list) ] + ch_mtshift_backchain // channel: [mandatory] [ val(meta), path(back_chain) ] + ch_dbsnp // channel: [optional] [ val(meta), path(vcf) ] + ch_dbsnp_tbi // channel: [optional] [ val(meta), path(tbi) ] + ch_call_interval // channel: [mandatory] [ path(intervals) ] + ch_ml_model // channel: [mandatory] [ path(model) ] + ch_case_info // channel: [mandatory] [ val(case_info) ] main: ch_versions = Channel.empty() @@ -24,31 +34,63 @@ workflow CALL_SNV { ch_tabix = Channel.empty() CALL_SNV_DEEPVARIANT ( // triggered only when params.variant_caller is set as deepvariant - ch_bam_bai, + ch_genome_bam_bai, ch_genome_fasta, ch_genome_fai, ch_case_info ) CALL_SNV_SENTIEON( // triggered only when params.variant_caller is set as sentieon - ch_bam_bai, + ch_genome_bam_bai, ch_genome_fasta, ch_genome_fai, - ch_known_dbsnp, - ch_known_dbsnp_tbi, + ch_dbsnp, + ch_dbsnp_tbi, ch_call_interval, ch_ml_model, ch_case_info ) - ch_vcf = Channel.empty().mix(CALL_SNV_DEEPVARIANT.out.vcf, CALL_SNV_SENTIEON.out.vcf) - ch_tabix = Channel.empty().mix(CALL_SNV_DEEPVARIANT.out.tabix, CALL_SNV_SENTIEON.out.tabix) + CALL_SNV_MT( + ch_mt_bam_bai, + ch_genome_fasta, + ch_genome_fai, + ch_genome_dictionary, + ch_mt_intervals + ) + + CALL_SNV_MT_SHIFT( + ch_mtshift_bam_bai, + ch_mtshift_fasta, + ch_mtshift_fai, + ch_mtshift_dictionary, + ch_mtshift_intervals + ) + + // LIFTOVER VCF FROM REFERENCE MT TO SHIFTED MT + PICARD_LIFTOVERVCF ( + CALL_SNV_MT_SHIFT.out.vcf, + ch_genome_dictionary, + ch_genome_fasta, + ch_mtshift_backchain, + ) + + ch_genome_vcf = Channel.empty().mix(CALL_SNV_DEEPVARIANT.out.vcf, CALL_SNV_SENTIEON.out.vcf) + ch_genome_tabix = Channel.empty().mix(CALL_SNV_DEEPVARIANT.out.tabix, CALL_SNV_SENTIEON.out.tabix) + + ch_mt_vcf = CALL_SNV_MT.out.vcf + ch_mtshift_vcf = PICARD_LIFTOVERVCF.out.vcf_lifted ch_versions = ch_versions.mix(CALL_SNV_DEEPVARIANT.out.versions) ch_versions = ch_versions.mix(CALL_SNV_SENTIEON.out.versions) + ch_versions = ch_versions.mix(CALL_SNV_MT.out.versions) + ch_versions = ch_versions.mix(CALL_SNV_MT_SHIFT.out.versions) + ch_versions = ch_versions.mix(PICARD_LIFTOVERVCF.out.versions.first()) emit: - vcf = ch_vcf // channel: [ val(meta), path(vcf) ] - tabix = ch_tabix // channel: [ val(meta), path(tbi) ] - versions = ch_versions // channel: [ path(versions.yml) ] + genome_vcf = ch_genome_vcf // channel: [ val(meta), path(vcf) ] + genome_tabix = ch_genome_tabix // channel: [ val(meta), path(tbi) ] + mt_vcf = ch_mt_vcf // channel: [ val(meta), path(vcf) ] + mtshift_vcf = ch_mtshift_vcf // channel: [ val(meta), path(vcf) ] + versions = ch_versions // channel: [ path(versions.yml) ] } diff --git a/subworkflows/local/variant_calling/call_snv_MT.nf b/subworkflows/local/variant_calling/call_snv_MT.nf new file mode 100644 index 00000000..e91718ad --- /dev/null +++ b/subworkflows/local/variant_calling/call_snv_MT.nf @@ -0,0 +1,50 @@ +// +// Call SNV MT +// + +include { HAPLOCHECK as HAPLOCHECK_MT } from '../../../modules/nf-core/haplocheck/main' +include { GATK4_MUTECT2 as GATK4_MUTECT2_MT } from '../../../modules/nf-core/gatk4/mutect2/main' +include { GATK4_FILTERMUTECTCALLS as GATK4_FILTERMUTECTCALLS_MT } from '../../../modules/nf-core/gatk4/filtermutectcalls/main' +include { MT_DELETION } from '../../../modules/local/mt_deletion_script' +include { EKLIPSE as EKLIPSE_MT } from '../../../modules/nf-core/eklipse/main' + +workflow CALL_SNV_MT { + take: + ch_bam_bai // channel: [mandatory] [ val(meta), path(bam), path(bai) ] + ch_fasta // channel: [mandatory] [ val(meta), path(fasta) ] + ch_fai // channel: [mandatory] [ val(meta), path(fai) ] + ch_dict // channel: [mandatory] [ val(meta), path(dict) ] + ch_intervals // channel: [mandatory] [ path(interval_list) ] + + main: + ch_versions = Channel.empty() + + ch_bam_bai_int = ch_bam_bai.combine(ch_intervals) + + GATK4_MUTECT2_MT (ch_bam_bai_int, ch_fasta, ch_fai, ch_dict, [], [], [],[]) + + HAPLOCHECK_MT (GATK4_MUTECT2_MT.out.vcf) + + // Filter Mutect2 calls + ch_mutect_vcf = GATK4_MUTECT2_MT.out.vcf.join(GATK4_MUTECT2_MT.out.tbi, failOnMismatch:true, failOnDuplicate:true) + ch_mutect_out = ch_mutect_vcf.join(GATK4_MUTECT2_MT.out.stats, failOnMismatch:true, failOnDuplicate:true) + ch_to_filt = ch_mutect_out.map { + meta, vcf, tbi, stats -> + return [meta, vcf, tbi, stats, [], [], [], []] + } + + GATK4_FILTERMUTECTCALLS_MT (ch_to_filt, ch_fasta, ch_fai, ch_dict) + + ch_versions = ch_versions.mix(GATK4_MUTECT2_MT.out.versions.first()) + ch_versions = ch_versions.mix(HAPLOCHECK_MT.out.versions.first()) + ch_versions = ch_versions.mix(GATK4_FILTERMUTECTCALLS_MT.out.versions.first()) + + emit: + vcf = GATK4_FILTERMUTECTCALLS_MT.out.vcf // channel: [ val(meta), path(vcf) ] + tbi = GATK4_FILTERMUTECTCALLS_MT.out.tbi // channel: [ val(meta), path(tbi) ] + stats = GATK4_MUTECT2_MT.out.stats // channel: [ val(meta), path(stats) ] + filt_stats = GATK4_FILTERMUTECTCALLS_MT.out.stats // channel: [ val(meta), path(tsv) ] + txt = HAPLOCHECK_MT.out.txt // channel: [ val(meta), path(txt) ] + html = HAPLOCHECK_MT.out.html // channel: [ val(meta), path(html) ] + versions = ch_versions // channel: [ path(versions.yml) ] +} diff --git a/workflows/raredisease.nf b/workflows/raredisease.nf index ce35c6bb..5112553b 100644 --- a/workflows/raredisease.nf +++ b/workflows/raredisease.nf @@ -356,8 +356,17 @@ workflow RAREDISEASE { // STEP 2: VARIANT CALLING CALL_SNV ( ch_mapped.genome_bam_bai, + ch_mapped.mt_bam_bai, + ch_mapped.mtshift_bam_bai, ch_genome_fasta, ch_genome_fai, + ch_genome_dictionary, + ch_mt_intervals, + ch_mtshift_fasta, + ch_mtshift_fai, + ch_mtshift_dictionary, + ch_mtshift_intervals, + ch_mtshift_backchain, ch_dbsnp, ch_dbsnp_tbi, ch_call_interval, @@ -385,7 +394,7 @@ workflow RAREDISEASE { // ped correspondence, sex check, ancestry check PEDDY_CHECK ( - CALL_SNV.out.vcf.join(CALL_SNV.out.tabix, failOnMismatch:true, failOnDuplicate:true), + CALL_SNV.out.genome_vcf.join(CALL_SNV.out.genome_tabix, failOnMismatch:true, failOnDuplicate:true), ch_pedfile ) ch_versions = ch_versions.mix(PEDDY_CHECK.out.versions) @@ -477,7 +486,7 @@ workflow RAREDISEASE { if (!params.skip_snv_annotation) { - ch_vcf = CALL_SNV.out.vcf.join(CALL_SNV.out.tabix, failOnMismatch:true, failOnDuplicate:true) + ch_vcf = CALL_SNV.out.genome_vcf.join(CALL_SNV.out.genome_tabix, failOnMismatch:true, failOnDuplicate:true) if (!params.skip_mt_analysis) { ch_vcf From a4320bb64381556ec9777a7290df409fb3defa2e Mon Sep 17 00:00:00 2001 From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com> Date: Thu, 24 Aug 2023 15:37:10 +0200 Subject: [PATCH 05/22] SV config --- conf/modules/align_MT.config | 98 +++++++++++++++++++ conf/modules/call_snv_MT.config | 43 ++++++++ conf/modules/call_sv_MT.config | 51 ++++++++++ nextflow.config | 3 + .../local/call_structural_variants.nf | 31 +++--- .../local/variant_calling/call_sv_MT.nf | 29 ++++++ workflows/raredisease.nf | 3 + 7 files changed, 247 insertions(+), 11 deletions(-) create mode 100644 conf/modules/align_MT.config create mode 100644 conf/modules/call_snv_MT.config create mode 100644 conf/modules/call_sv_MT.config create mode 100644 subworkflows/local/variant_calling/call_sv_MT.nf diff --git a/conf/modules/align_MT.config b/conf/modules/align_MT.config new file mode 100644 index 00000000..a01bfc60 --- /dev/null +++ b/conf/modules/align_MT.config @@ -0,0 +1,98 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Config file for defining DSL2 per module options and publishing paths +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Available keys to override module options: + ext.args = Additional arguments appended to command in module. + ext.args2 = Second set of arguments appended to command in module (multi-tool modules). + ext.args3 = Third set of arguments appended to command in module (multi-tool modules). + ext.prefix = File name prefix for output files. + ext.when = Conditional clause +---------------------------------------------------------------------------------------- +*/ + +// +// Mitochondria alignment options +// + +process { + + withName: '.*ALIGN_MT:BWAMEM2_MEM_MT' { + ext.when = { !params.skip_mt_analysis && !(params.analysis_type == "wes") && params.aligner == "bwamem2" } + ext.args = { "-M -K 100000000 -R ${meta.read_group}" } + } + + withName: '.*ALIGN_MT:SENTIEON_BWAMEM_MT' { + ext.args = { "-M -K 10000000 -R ${meta.read_group}" } + ext.when = { !params.skip_mt_analysis && !(params.analysis_type == "wes") && params.aligner == "sentieon" } + ext.prefix = { "${meta.id}.sorted" } + } + + withName: '.*ALIGN_MT:GATK4_MERGEBAMALIGNMENT_MT' { + ext.args = '--CREATE_INDEX true --MAX_GAPS -1 --SORT_ORDER queryname --INCLUDE_SECONDARY_ALIGNMENTS false --PAIRED_RUN false --VALIDATION_STRINGENCY LENIENT' + ext.prefix = { "${meta.id}_merged" } + } + + withName: '.*ALIGN_MT:PICARD_ADDORREPLACEREADGROUPS_MT' { + ext.args = { [ + "--VALIDATION_STRINGENCY LENIENT", + "--RGLB lib", + "--RGPL ILLUMINA", + "--RGPU barcode", + "--RGSM ${meta.id}" + ].join(' ').trim() } + } + + withName: '.*ALIGN_MT:PICARD_MARKDUPLICATES_MT' { + ext.args = '--VALIDATION_STRINGENCY LENIENT --CREATE_INDEX true' + ext.prefix = { "${meta.id}_markduplicates" } + } + + withName: '.*ALIGN_MT:SAMTOOLS_SORT_MT' { + ext.prefix = { "${meta.id}_sorted" } + } + +} + +// +// Shifted mitochondria alignment options +// + +process { + + withName: '.*ALIGN_MT_SHIFT:BWAMEM2_MEM_MT' { + ext.when = { !params.skip_mt_analysis && !(params.analysis_type == "wes") && params.aligner == "bwamem2" } + ext.args = { "-M -K 100000000 -R ${meta.read_group}" } + } + + withName: '.*ALIGN_MT_SHIFT:SENTIEON_BWAMEM_MT' { + ext.args = { "-M -K 10000000 -R ${meta.read_group}" } + ext.when = { !params.skip_mt_analysis && !(params.analysis_type == "wes") && params.aligner == "sentieon" } + ext.prefix = { "${meta.id}.sorted" } + } + + withName: '.*ALIGN_MT_SHIFT:GATK4_MERGEBAMALIGNMENT_MT' { + ext.args = '--CREATE_INDEX true --MAX_GAPS -1 --SORT_ORDER queryname --INCLUDE_SECONDARY_ALIGNMENTS false --PAIRED_RUN false --VALIDATION_STRINGENCY LENIENT' + ext.prefix = { "${meta.id}_merged_shifted" } + } + + withName: '.*ALIGN_MT_SHIFT:PICARD_ADDORREPLACEREADGROUPS_MT' { + ext.args = { [ + "--VALIDATION_STRINGENCY LENIENT", + "--RGLB lib", + "--RGPL ${params.platform}", + "--RGPU barcode", + "--RGSM ${meta.id}" + ].join(' ').trim() } + } + + withName: '.*ALIGN_MT_SHIFT:PICARD_MARKDUPLICATES_MT' { + ext.args = '--VALIDATION_STRINGENCY LENIENT --CREATE_INDEX true' + ext.prefix = { "${meta.id}_markduplicates_shifted" } + } + + withName: '.*ALIGN_MT_SHIFT:SAMTOOLS_SORT_MT' { + ext.prefix = { "${meta.id}_sorted_shifted" } + } + +} diff --git a/conf/modules/call_snv_MT.config b/conf/modules/call_snv_MT.config new file mode 100644 index 00000000..420929e5 --- /dev/null +++ b/conf/modules/call_snv_MT.config @@ -0,0 +1,43 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Config file for defining DSL2 per module options and publishing paths +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Available keys to override module options: + ext.args = Additional arguments appended to command in module. + ext.args2 = Second set of arguments appended to command in module (multi-tool modules). + ext.args3 = Third set of arguments appended to command in module (multi-tool modules). + ext.prefix = File name prefix for output files. + ext.when = Conditional clause +---------------------------------------------------------------------------------------- +*/ + +// +// Call SNVs in mitochondria +// + +process { + + withName: '.*CALL_SNV_MT:GATK4_MUTECT2_MT' { + ext.args = '--mitochondria-mode TRUE' + } + + withName: '.*CALL_SNV_MT:GATK4_FILTERMUTECTCALLS_MT' { + ext.prefix = { "${meta.id}_filtered" } + } +} + +// +// Call SNVs in shifted mitochondria +// + +process { + + withName: '.*CALL_SNV_MT_SHIFT:GATK4_MUTECT2_MT' { + ext.args = '--mitochondria-mode TRUE' + } + + withName: '.*CALL_SNV_MT_SHIFT:GATK4_FILTERMUTECTCALLS_MT' { + ext.prefix = { "${meta.id}_filtered_shifted" } + } + +} diff --git a/conf/modules/call_sv_MT.config b/conf/modules/call_sv_MT.config new file mode 100644 index 00000000..6b769e5c --- /dev/null +++ b/conf/modules/call_sv_MT.config @@ -0,0 +1,51 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Config file for defining DSL2 per module options and publishing paths +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Available keys to override module options: + ext.args = Additional arguments appended to command in module. + ext.args2 = Second set of arguments appended to command in module (multi-tool modules). + ext.args3 = Third set of arguments appended to command in module (multi-tool modules). + ext.prefix = File name prefix for output files. + ext.when = Conditional clause +---------------------------------------------------------------------------------------- +*/ + +// +// Call SV in mitochondria +// + +process { + + withName: '.*CALL_SV_MT:MT_DELETION' { + ext.args = '-s --insert-size 16000' + publishDir = [ + path: { "${params.outdir}/mt_sv" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + + withName: '.*CALL_SV_MT:EKLIPSE' { + publishDir = [ + path: { "${params.outdir}/mt_sv" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } +} + +// +// Call SV in shifted mitochondria +// + +process { + + withName: '.*CALL_SV_MT_SHIFT:MT_DELETION' { + ext.when = false + } + + withName: '.*CALL_SV_MT_SHIFT:EKLIPSE_MT' { + ext.when = false + } +} diff --git a/nextflow.config b/nextflow.config index 04b1b670..1cd04d19 100644 --- a/nextflow.config +++ b/nextflow.config @@ -281,6 +281,9 @@ manifest { includeConfig 'conf/modules/raredisease.config' includeConfig 'conf/modules/align.config' includeConfig 'conf/modules/analyse_MT.config' +includeConfig 'conf/modules/align_MT.config' +includeConfig 'conf/modules/call_snv_MT.config' +includeConfig 'conf/modules/call_sv_MT.config' includeConfig 'conf/modules/call_snv.config' includeConfig 'conf/modules/call_structural_variants.config' includeConfig 'conf/modules/annotate_snvs.config' diff --git a/subworkflows/local/call_structural_variants.nf b/subworkflows/local/call_structural_variants.nf index 65016260..9e749add 100644 --- a/subworkflows/local/call_structural_variants.nf +++ b/subworkflows/local/call_structural_variants.nf @@ -2,21 +2,26 @@ // A nested subworkflow to call structural variants. // -include { CALL_SV_MANTA } from './variant_calling/call_sv_manta' -include { CALL_SV_TIDDIT } from './variant_calling/call_sv_tiddit' -include { SVDB_MERGE } from '../../modules/nf-core/svdb/merge/main' -include { CALL_SV_GERMLINECNVCALLER } from './variant_calling/call_sv_germlinecnvcaller' -include { TABIX_TABIX } from '../../modules/nf-core/tabix/tabix/main' +include { CALL_SV_MANTA } from './variant_calling/call_sv_manta' +include { CALL_SV_MT } from './variant_calling/call_sv_MT' +include { CALL_SV_MT as CALL_SV_MT_SHIFT } from './variant_calling/call_sv_MT' +include { CALL_SV_TIDDIT } from './variant_calling/call_sv_tiddit' +include { SVDB_MERGE } from '../../modules/nf-core/svdb/merge/main' +include { CALL_SV_GERMLINECNVCALLER } from './variant_calling/call_sv_germlinecnvcaller' +include { TABIX_TABIX } from '../../modules/nf-core/tabix/tabix/main' workflow CALL_STRUCTURAL_VARIANTS { take: - ch_bam // channel: [mandatory] [ val(meta), path(bam) ] - ch_bai // channel: [mandatory] [ val(meta), path(bai) ] - ch_bam_bai // channel: [mandatory] [ val(meta), path(bam), path(bai) ] + ch_genome_bam // channel: [mandatory] [ val(meta), path(bam) ] + ch_genome_bai // channel: [mandatory] [ val(meta), path(bai) ] + ch_genome_bam_bai // channel: [mandatory] [ val(meta), path(bam), path(bai) ] + ch_mt_bam_bai // channel: [mandatory] [ val(meta), path(bam), path(bai) ] + ch_mtshift_bam_bai // channel: [mandatory] [ val(meta), path(bam), path(bai) ] ch_bwa_index // channel: [mandatory] [ val(meta), path(index)] ch_genome_fasta // channel: [mandatory] [ val(meta), path(fasta) ] ch_genome_fai // channel: [mandatory] [ val(meta), path(fai) ] + ch_mtshift_fasta // channel: [mandatory] [ val(meta), path(fasta) ] ch_case_info // channel: [mandatory] [ val(case_info) ] ch_target_bed // channel: [mandatory for WES] [ val(meta), path(bed), path(tbi) ] ch_genome_dictionary // channel: [optional; used by mandatory for GATK's cnvcaller][ val(meta), path(dict) ] @@ -28,21 +33,25 @@ workflow CALL_STRUCTURAL_VARIANTS { main: ch_versions = Channel.empty() - CALL_SV_MANTA (ch_bam, ch_bai, ch_genome_fasta, ch_genome_fai, ch_case_info, ch_target_bed) + CALL_SV_MANTA (ch_genome_bam, ch_genome_bai, ch_genome_fasta, ch_genome_fai, ch_case_info, ch_target_bed) .diploid_sv_vcf .collect{it[1]} .set{ manta_vcf } - CALL_SV_TIDDIT (ch_bam_bai, ch_genome_fasta, ch_bwa_index, ch_case_info) + CALL_SV_TIDDIT (ch_genome_bam_bai, ch_genome_fasta, ch_bwa_index, ch_case_info) .vcf .collect{it[1]} .set { tiddit_vcf } - CALL_SV_GERMLINECNVCALLER (ch_bam_bai, ch_genome_fasta, ch_genome_fai, ch_readcount_intervals, ch_genome_dictionary, ch_ploidy_model, ch_gcnvcaller_model) + CALL_SV_GERMLINECNVCALLER (ch_genome_bam_bai, ch_genome_fasta, ch_genome_fai, ch_readcount_intervals, ch_genome_dictionary, ch_ploidy_model, ch_gcnvcaller_model) .genotyped_intervals_vcf .collect{it[1]} .set { gcnvcaller_vcf } + CALL_SV_MT (ch_mt_bam_bai, ch_genome_fasta) + + CALL_SV_MT_SHIFT (ch_mtshift_bam_bai, ch_mtshift_fasta) + //merge if (params.skip_cnv_calling) { tiddit_vcf diff --git a/subworkflows/local/variant_calling/call_sv_MT.nf b/subworkflows/local/variant_calling/call_sv_MT.nf new file mode 100644 index 00000000..c8fdff45 --- /dev/null +++ b/subworkflows/local/variant_calling/call_sv_MT.nf @@ -0,0 +1,29 @@ +// +// Call SV MT +// + +include { MT_DELETION } from '../../../modules/local/mt_deletion_script' +include { EKLIPSE } from '../../../modules/nf-core/eklipse/main' + +workflow CALL_SV_MT { + take: + ch_bam_bai // channel: [mandatory] [ val(meta), path(bam) ] + ch_fasta // channel: [mandatory] [ val(meta), path(fasta) ] + + main: + ch_versions = Channel.empty() + + EKLIPSE(ch_bam_bai,[]) + + MT_DELETION(ch_bam_bai, ch_fasta) + + ch_versions = ch_versions.mix(EKLIPSE.out.versions.first()) + ch_versions = ch_versions.mix(MT_DELETION.out.versions.first()) + + emit: + eklipse_del = EKLIPSE.out.deletions // channel: [ val(meta), path(csv) ] + eklipse_genes = EKLIPSE.out.genes // channel: [ val(meta), path(csv) ] + eklipse_circos = EKLIPSE.out.circos // channel: [ val(meta), path(png) ] + mt_del_result = MT_DELETION.out.mt_del_result // channel: [ val(meta), path(txt) ] + versions = ch_versions // channel: [ path(versions.yml) ] +} diff --git a/workflows/raredisease.nf b/workflows/raredisease.nf index 5112553b..e88686e5 100644 --- a/workflows/raredisease.nf +++ b/workflows/raredisease.nf @@ -379,9 +379,12 @@ workflow RAREDISEASE { ch_mapped.genome_marked_bam, ch_mapped.genome_marked_bai, ch_mapped.genome_bam_bai, + ch_mapped.mt_bam_bai, + ch_mapped.mtshift_bam_bai, ch_genome_bwaindex, ch_genome_fasta, ch_genome_fai, + ch_mtshift_fasta, ch_case_info, ch_target_bed, ch_genome_dictionary, From 58d70e067373f37e9ae9c17233da6a9225fcb446 Mon Sep 17 00:00:00 2001 From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com> Date: Thu, 24 Aug 2023 17:23:12 +0200 Subject: [PATCH 06/22] postprocess --- ...nvs.config => annotate_genome_snvs.config} | 28 ++--- conf/modules/postptocess_MT_calls.config | 43 +++++++ nextflow.config | 4 +- ...notate_snvs.nf => annotate_genome_snvs.nf} | 5 +- subworkflows/local/annotate_mt_snvs.nf | 112 ++++++++++++++++++ subworkflows/local/call_snv.nf | 20 ++-- .../variant_calling/postprocess_MT_calls.nf | 112 ++++++++++++++++++ 7 files changed, 295 insertions(+), 29 deletions(-) rename conf/modules/{annotate_snvs.config => annotate_genome_snvs.config} (85%) create mode 100644 conf/modules/postptocess_MT_calls.config rename subworkflows/local/{annotate_snvs.nf => annotate_genome_snvs.nf} (98%) create mode 100644 subworkflows/local/annotate_mt_snvs.nf create mode 100644 subworkflows/local/variant_calling/postprocess_MT_calls.nf diff --git a/conf/modules/annotate_snvs.config b/conf/modules/annotate_genome_snvs.config similarity index 85% rename from conf/modules/annotate_snvs.config rename to conf/modules/annotate_genome_snvs.config index 093e94a1..0cbf5496 100644 --- a/conf/modules/annotate_snvs.config +++ b/conf/modules/annotate_genome_snvs.config @@ -16,43 +16,43 @@ // process { - withName: '.*ANNOTATE_SNVS:.*' { + withName: '.*ANNOTATE_GENOME_SNVS:.*' { ext.when = !params.skip_snv_annotation } - withName: '.*ANNOTATE_SNVS:VCFANNO' { + withName: '.*ANNOTATE_GENOME_SNVS:VCFANNO' { ext.prefix = { "${meta.id}_vcfanno" } publishDir = [ enabled: false ] } - withName: '.*ANNOTATE_SNVS:BCFTOOLS_ROH' { + withName: '.*ANNOTATE_GENOME_SNVS:BCFTOOLS_ROH' { ext.args = { "--samples ${meta.probands.join(",")} --skip-indels " } ext.prefix = { "${meta.id}_roh" } } - withName: '.*ANNOTATE_SNVS:RHOCALL_ANNOTATE' { + withName: '.*ANNOTATE_GENOME_SNVS:RHOCALL_ANNOTATE' { ext.args = { "--v14 " } ext.prefix = { "${meta.id}_rohann" } } - withName: '.*ANNOTATE_SNVS:VCFANNO' { + withName: '.*ANNOTATE_GENOME_SNVS:VCFANNO' { ext.prefix = { "${meta.id}_rohann_vcfanno" } } - withName: '.*ANNOTATE_SNVS:UPD_SITES' { + withName: '.*ANNOTATE_GENOME_SNVS:UPD_SITES' { ext.prefix = { "${meta.id}_rohann_vcfanno_upd_sites" } ext.args = {"--af-tag GNOMADAF --proband ${meta.upd_child} --mother ${meta.mother} --father ${meta.father} sites"} } - withName: '.*ANNOTATE_SNVS:UPD_REGIONS' { + withName: '.*ANNOTATE_GENOME_SNVS:UPD_REGIONS' { ext.prefix = { "${meta.id}_rohann_vcfanno_upd_regions" } ext.args = {"--af-tag GNOMADAF --proband ${meta.upd_child} --mother ${meta.mother} --father ${meta.father} regions --min-size 5 --min-sites 1"} ext.when = { !(workflow.profile.tokenize(',').intersect(['test', 'test_one_sample']).size() >= 1) || workflow.stubRun } } - withName: '.*ANNOTATE_SNVS:CHROMOGRAPH_SITES' { + withName: '.*ANNOTATE_GENOME_SNVS:CHROMOGRAPH_SITES' { ext.prefix = { "${meta7.id}_rohann_vcfanno_upd_sites_chromograph" } ext.args = { "--euploid" } tag = {"${meta7.id}"} @@ -63,7 +63,7 @@ process { ] } - withName: '.*ANNOTATE_SNVS:CHROMOGRAPH_REGIONS' { + withName: '.*ANNOTATE_GENOME_SNVS:CHROMOGRAPH_REGIONS' { ext.prefix = { "${meta6.id}_rohann_vcfanno_upd_regions_chromograph" } ext.args = { '--euploid' } ext.when = { !(workflow.profile.tokenize(',').intersect(['test', 'test_one_sample']).size() >= 1) || workflow.stubRun } @@ -75,16 +75,16 @@ process { ] } - withName: '.*ANNOTATE_SNVS:BCFTOOLS_VIEW' { + withName: '.*ANNOTATE_GENOME_SNVS:BCFTOOLS_VIEW' { ext.prefix = { "${meta.id}_rohann_vcfanno_filter" } ext.args = { '--output-type z --exclude "INFO/GNOMADAF > 0.70 | INFO/GNOMADAF_popmax > 0.70" ' } } - withName: '.*ANNOTATE_SNVS:GATK4_SELECTVARIANTS' { + withName: '.*ANNOTATE_GENOME_SNVS:GATK4_SELECTVARIANTS' { ext.prefix = { "${meta.id}_${intervals.simpleName}" } } - withName: '.*ANNOTATE_SNVS:ENSEMBLVEP_SNV' { + withName: '.*ANNOTATE_GENOME_SNVS:ENSEMBLVEP_SNV' { ext.prefix = { "${vcf.simpleName}_rohann_vcfanno_filter_vep" } ext.args = [ '--dir_plugins vep_cache/Plugins', @@ -103,7 +103,7 @@ process { ].join(' ') } - withName: '.*ANNOTATE_SNVS:BCFTOOLS_CONCAT' { + withName: '.*ANNOTATE_GENOME_SNVS:BCFTOOLS_CONCAT' { ext.prefix = { "${meta.id}_rohann_vcfanno_filter_vep" } publishDir = [ path: { "${params.outdir}/annotate_snv" }, @@ -112,7 +112,7 @@ process { ] } - withName: '.*ANNOTATE_SNVS:TABIX_BCFTOOLS_CONCAT' { + withName: '.*ANNOTATE_GENOME_SNVS:TABIX_BCFTOOLS_CONCAT' { publishDir = [ path: { "${params.outdir}/annotate_snv" }, mode: params.publish_dir_mode, diff --git a/conf/modules/postptocess_MT_calls.config b/conf/modules/postptocess_MT_calls.config new file mode 100644 index 00000000..3119012e --- /dev/null +++ b/conf/modules/postptocess_MT_calls.config @@ -0,0 +1,43 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Config file for defining DSL2 per module options and publishing paths +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Available keys to override module options: + ext.args = Additional arguments appended to command in module. + ext.args2 = Second set of arguments appended to command in module (multi-tool modules). + ext.args3 = Third set of arguments appended to command in module (multi-tool modules). + ext.prefix = File name prefix for output files. + ext.when = Conditional clause +---------------------------------------------------------------------------------------- +*/ + +// +// Mitochondrial annotation options +// + +process { + + withName: '.*POSTPROCESS_MT_CALLS:GATK4_MERGEVCFS_LIFT_UNLIFT_MT' { + ext.prefix = { "${meta.id}_merged" } + } + + withName: '.*POSTPROCESS_MT_CALLS:GATK4_VARIANTFILTRATION_MT' { + ext.prefix = { "${meta.id}_filt" } + } + + withName: '.*POSTPROCESS_MT_CALLS:SPLIT_MULTIALLELICS_MT' { + ext.args = '--output-type z --multiallelics -both' + ext.prefix = { "${meta.id}_split" } + } + + withName: '.*POSTPROCESS_MT_CALLS:REMOVE_DUPLICATES_MT' { + ext.args = '--output-type z --rm-dup none' + ext.prefix = { "${meta.id}_split_rmdup" } + } + + withName: '.*POSTPROCESS_MT_CALLS:BCFTOOLS_MERGE_MT' { + ext.args = '--output-type z' + ext.prefix = { "${meta.id}_merge_mt" } + } + +} diff --git a/nextflow.config b/nextflow.config index 1cd04d19..7c494c77 100644 --- a/nextflow.config +++ b/nextflow.config @@ -284,9 +284,11 @@ includeConfig 'conf/modules/analyse_MT.config' includeConfig 'conf/modules/align_MT.config' includeConfig 'conf/modules/call_snv_MT.config' includeConfig 'conf/modules/call_sv_MT.config' +includeConfig 'conf/modules/annotate_mt_snvs.config' includeConfig 'conf/modules/call_snv.config' +includeConfig 'conf/modules/postprocess_MT_calls.config' includeConfig 'conf/modules/call_structural_variants.config' -includeConfig 'conf/modules/annotate_snvs.config' +includeConfig 'conf/modules/annotate_genome_snvs.config' includeConfig 'conf/modules/annotate_structural_variants.config' includeConfig 'conf/modules/align_and_call_MT.config' includeConfig 'conf/modules/align_bwamem2.config' diff --git a/subworkflows/local/annotate_snvs.nf b/subworkflows/local/annotate_genome_snvs.nf similarity index 98% rename from subworkflows/local/annotate_snvs.nf rename to subworkflows/local/annotate_genome_snvs.nf index bd5fdfb8..06267872 100644 --- a/subworkflows/local/annotate_snvs.nf +++ b/subworkflows/local/annotate_genome_snvs.nf @@ -1,5 +1,5 @@ // -// A subworkflow to annotate snvs +// A subworkflow to annotate snvs in the genome // include { VCFANNO } from '../../modules/nf-core/vcfanno/main' @@ -20,7 +20,7 @@ include { TABIX_TABIX as TABIX_BCFTOOLS_VIEW } from '../../modules/nf-core/ta include { GATK4_SELECTVARIANTS } from '../../modules/nf-core/gatk4/selectvariants/main' include { ANNOTATE_CADD } from './annotation/annotate_cadd' -workflow ANNOTATE_SNVS { +workflow ANNOTATE_GENOME_SNVS { take: ch_vcf // channel: [mandatory] [ val(meta), path(vcf), path(tbi) ] @@ -46,7 +46,6 @@ workflow ANNOTATE_SNVS { RHOCALL_ANNOTATE (ch_vcf, BCFTOOLS_ROH.out.roh, []) - ZIP_TABIX_ROHCALL (RHOCALL_ANNOTATE.out.vcf) ZIP_TABIX_ROHCALL.out.gz_tbi diff --git a/subworkflows/local/annotate_mt_snvs.nf b/subworkflows/local/annotate_mt_snvs.nf new file mode 100644 index 00000000..400a58c1 --- /dev/null +++ b/subworkflows/local/annotate_mt_snvs.nf @@ -0,0 +1,112 @@ +// +// Merge and annotate MT +// + +include { GATK4_MERGEVCFS as GATK4_MERGEVCFS_LIFT_UNLIFT_MT } from '../../../modules/nf-core/gatk4/mergevcfs/main' +include { GATK4_VARIANTFILTRATION as GATK4_VARIANTFILTRATION_MT } from '../../../modules/nf-core/gatk4/variantfiltration/main' +include { BCFTOOLS_NORM as SPLIT_MULTIALLELICS_MT } from '../../../modules/nf-core/bcftools/norm/main' +include { TABIX_TABIX as TABIX_TABIX_MT } from '../../../modules/nf-core/tabix/tabix/main' +include { BCFTOOLS_NORM as REMOVE_DUPLICATES_MT } from '../../../modules/nf-core/bcftools/norm/main' +include { TABIX_TABIX as TABIX_TABIX_MT2 } from '../../../modules/nf-core/tabix/tabix/main' +include { BCFTOOLS_MERGE as BCFTOOLS_MERGE_MT } from '../../../modules/nf-core/bcftools/merge/main' +include { TABIX_TABIX as TABIX_TABIX_MERGE } from '../../../modules/nf-core/tabix/tabix/main' +include { TABIX_TABIX as TABIX_TABIX_MT3 } from '../../../modules/nf-core/tabix/tabix/main' +include { ENSEMBLVEP as ENSEMBLVEP_MT } from '../../../modules/local/ensemblvep/main' +include { HAPLOGREP2_CLASSIFY as HAPLOGREP2_CLASSIFY_MT } from '../../../modules/nf-core/haplogrep2/classify/main' +include { VCFANNO as VCFANNO_MT } from '../../../modules/nf-core/vcfanno/main' +include { ANNOTATE_CADD } from '../annotation/annotate_cadd' +include { TABIX_BGZIPTABIX as ZIP_TABIX_HMTNOTE } from '../../../modules/nf-core/tabix/bgziptabix/main' +include { HMTNOTE_ANNOTATE } from '../../../modules/nf-core/hmtnote/annotate/main' + +workflow ANNOTATE_MT_SNVS { + take: + ch_vcf1 // channel: [mandatory] [ val(meta), path(vcf) ] + ch_vcf2 // channel: [mandatory] [ val(meta), path(vcf) ] + ch_cadd_header // channel: [mandatory] [ path(txt) ] + ch_cadd_resources // channel: [mandatory] [ path(annotation) ] + ch_genome_fasta // channel: [mandatory] [ val(meta), path(fasta) ] + ch_genome_dict // channel: [mandatory] [ val(meta), path(dict) ] + ch_genome_fai // channel: [mandatory] [ val(meta), path(fai) ] + ch_vcfanno_resources // channel: [mandatory] [ path(resources) ] + ch_vcfanno_toml // channel: [mandatory] [ path(toml) ] + val_vep_genome // string: [mandatory] GRCh37 or GRCh38 + val_vep_cache_version // string: [mandatory] 107 + ch_vep_cache // channel: [mandatory] [ path(cache) ] + ch_case_info // channel: [mandatory] [ val(case_info) ] + + main: + ch_versions = Channel.empty() + + // Annotating with CADD + ANNOTATE_CADD ( + ch_annotation_in, + TABIX_TABIX_MERGE.out.tbi, + ch_cadd_header, + ch_cadd_resources + ) + + // Pick input for vep + ch_annotation_in + .combine(ANNOTATE_CADD.out.vcf.ifEmpty("null")) + .branch { it -> + merged: it[2].equals("null") + return [it[0], it[1]] + cadd: !(it[2].equals("null")) + return [it[2], it[3]] + } + .set { ch_for_mix } + ch_vep_in = ch_for_mix.merged.mix(ch_for_mix.cadd) + + // Annotating with ensembl Vep + ENSEMBLVEP_MT( + ch_vep_in, + ch_genome_fasta, + val_vep_genome, + "homo_sapiens", + val_vep_cache_version, + ch_vep_cache, + [] + ) + + // Running vcfanno + TABIX_TABIX_MT3(ENSEMBLVEP_MT.out.vcf_gz) + ENSEMBLVEP_MT.out.vcf_gz + .join(TABIX_TABIX_MT3.out.tbi, failOnMismatch:true, failOnDuplicate:true) + .map { meta, vcf, tbi -> return [meta, vcf, tbi, []]} + .set { ch_in_vcfanno } + + VCFANNO_MT(ch_in_vcfanno, ch_vcfanno_toml, [], ch_vcfanno_resources) + + // HMTNOTE ANNOTATE + HMTNOTE_ANNOTATE(VCFANNO_MT.out.vcf) + HMTNOTE_ANNOTATE.out.vcf.map{meta, vcf -> + return [meta, WorkflowRaredisease.replaceSpacesInInfoColumn(vcf, vcf.parent.toString(), vcf.baseName)] + } + .set { ch_hmtnote_reformatted } + ZIP_TABIX_HMTNOTE(ch_hmtnote_reformatted) + + // Prepare output + ch_vcf_out = ZIP_TABIX_HMTNOTE.out.gz_tbi.map{meta, vcf, tbi -> return [meta, vcf] } + ch_tbi_out = ZIP_TABIX_HMTNOTE.out.gz_tbi.map{meta, vcf, tbi -> return [meta, tbi] } + + // Running haplogrep2 + HAPLOGREP2_CLASSIFY_MT(ch_vep_in, "vcf.gz") + + ch_versions = ch_versions.mix(GATK4_MERGEVCFS_LIFT_UNLIFT_MT.out.versions.first()) + ch_versions = ch_versions.mix(GATK4_VARIANTFILTRATION_MT.out.versions.first()) + ch_versions = ch_versions.mix(SPLIT_MULTIALLELICS_MT.out.versions.first()) + ch_versions = ch_versions.mix(REMOVE_DUPLICATES_MT.out.versions.first()) + ch_versions = ch_versions.mix(BCFTOOLS_MERGE_MT.out.versions) + ch_versions = ch_versions.mix(ANNOTATE_CADD.out.versions) + ch_versions = ch_versions.mix(ENSEMBLVEP_MT.out.versions) + ch_versions = ch_versions.mix(VCFANNO_MT.out.versions) + ch_versions = ch_versions.mix(HMTNOTE_ANNOTATE.out.versions) + ch_versions = ch_versions.mix(HAPLOGREP2_CLASSIFY_MT.out.versions) + + emit: + haplog = HAPLOGREP2_CLASSIFY_MT.out.txt // channel: [ val(meta), path(txt) ] + vcf = ch_vcf_out // channel: [ val(meta), path(vcf) ] + tbi = ch_tbi_out // channel: [ val(meta), path(tbi) ] + report = ENSEMBLVEP_MT.out.report // channel: [ path(html) ] + versions = ch_versions // channel: [ path(versions.yml) ] +} diff --git a/subworkflows/local/call_snv.nf b/subworkflows/local/call_snv.nf index 75be4080..eb1d2ebf 100644 --- a/subworkflows/local/call_snv.nf +++ b/subworkflows/local/call_snv.nf @@ -6,7 +6,7 @@ include { CALL_SNV_DEEPVARIANT } from './variant_calling/call_snv_de include { CALL_SNV_SENTIEON } from './variant_calling/call_snv_sentieon' include { CALL_SNV_MT } from './variant_calling/call_snv_MT' include { CALL_SNV_MT as CALL_SNV_MT_SHIFT } from './variant_calling/call_snv_MT' -include { PICARD_LIFTOVERVCF } from '../../modules/nf-core/picard/liftovervcf/main' +include { POSTPROCESS_MT_CALLS } from './variant_calling/postprocess_MT_calls' workflow CALL_SNV { take: @@ -67,30 +67,28 @@ workflow CALL_SNV { ch_mtshift_intervals ) - // LIFTOVER VCF FROM REFERENCE MT TO SHIFTED MT - PICARD_LIFTOVERVCF ( - CALL_SNV_MT_SHIFT.out.vcf, - ch_genome_dictionary, + POSTPROCESS_MT_CALLS( + CALL_SNV_MT.out.vcf, + PICARD_LIFTOVERVCF.out.vcf_lifted, ch_genome_fasta, - ch_mtshift_backchain, + ch_genome_dictionary, + ch_genome_fai, + ch_case_info ) ch_genome_vcf = Channel.empty().mix(CALL_SNV_DEEPVARIANT.out.vcf, CALL_SNV_SENTIEON.out.vcf) ch_genome_tabix = Channel.empty().mix(CALL_SNV_DEEPVARIANT.out.tabix, CALL_SNV_SENTIEON.out.tabix) - - ch_mt_vcf = CALL_SNV_MT.out.vcf - ch_mtshift_vcf = PICARD_LIFTOVERVCF.out.vcf_lifted + ch_mt_vcf = POSTPROCESS_MT_CALLS.out.vcf ch_versions = ch_versions.mix(CALL_SNV_DEEPVARIANT.out.versions) ch_versions = ch_versions.mix(CALL_SNV_SENTIEON.out.versions) ch_versions = ch_versions.mix(CALL_SNV_MT.out.versions) ch_versions = ch_versions.mix(CALL_SNV_MT_SHIFT.out.versions) - ch_versions = ch_versions.mix(PICARD_LIFTOVERVCF.out.versions.first()) + ch_versions = ch_versions.mix(POSTPROCESS_MT_CALLS.out.versions) emit: genome_vcf = ch_genome_vcf // channel: [ val(meta), path(vcf) ] genome_tabix = ch_genome_tabix // channel: [ val(meta), path(tbi) ] mt_vcf = ch_mt_vcf // channel: [ val(meta), path(vcf) ] - mtshift_vcf = ch_mtshift_vcf // channel: [ val(meta), path(vcf) ] versions = ch_versions // channel: [ path(versions.yml) ] } diff --git a/subworkflows/local/variant_calling/postprocess_MT_calls.nf b/subworkflows/local/variant_calling/postprocess_MT_calls.nf new file mode 100644 index 00000000..6c9b7628 --- /dev/null +++ b/subworkflows/local/variant_calling/postprocess_MT_calls.nf @@ -0,0 +1,112 @@ +// +// Merge and annotate MT +// + +include { GATK4_MERGEVCFS as GATK4_MERGEVCFS_LIFT_UNLIFT_MT } from '../../../modules/nf-core/gatk4/mergevcfs/main' +include { GATK4_VARIANTFILTRATION as GATK4_VARIANTFILTRATION_MT } from '../../../modules/nf-core/gatk4/variantfiltration/main' +include { BCFTOOLS_NORM as SPLIT_MULTIALLELICS_MT } from '../../../modules/nf-core/bcftools/norm/main' +include { TABIX_TABIX as TABIX_TABIX_MT } from '../../../modules/nf-core/tabix/tabix/main' +include { BCFTOOLS_NORM as REMOVE_DUPLICATES_MT } from '../../../modules/nf-core/bcftools/norm/main' +include { TABIX_TABIX as TABIX_TABIX_MT2 } from '../../../modules/nf-core/tabix/tabix/main' +include { BCFTOOLS_MERGE as BCFTOOLS_MERGE_MT } from '../../../modules/nf-core/bcftools/merge/main' +include { TABIX_TABIX as TABIX_TABIX_MERGE } from '../../../modules/nf-core/tabix/tabix/main' +include { PICARD_LIFTOVERVCF } from '../../modules/nf-core/picard/liftovervcf/main' + +workflow POSTPROCESS_MT_CALLS { + take: + ch_mt_vcf // channel: [mandatory] [ val(meta), path(vcf) ] + ch_mtshift_vcf // channel: [mandatory] [ val(meta), path(vcf) ] + ch_genome_fasta // channel: [mandatory] [ val(meta), path(fasta) ] + ch_genome_dict // channel: [mandatory] [ val(meta), path(dict) ] + ch_genome_fai // channel: [mandatory] [ val(meta), path(fai) ] + ch_case_info // channel: [mandatory] [ val(case_info) ] + + main: + ch_versions = Channel.empty() + + // LIFTOVER VCF FROM REFERENCE MT TO SHIFTED MT + PICARD_LIFTOVERVCF ( + ch_mtshift_vcf, + ch_genome_dictionary, + ch_genome_fasta, + ch_mtshift_backchain, + ) + + ch_vcfs = ch_vcf1 + .join(ch_vcf2, remainder: true) + .map{ meta, vcf1, vcf2 -> + [meta, [vcf1, vcf2]] + } + GATK4_MERGEVCFS_LIFT_UNLIFT_MT( ch_vcfs, ch_genome_dict) + + // Filtering Variants + GATK4_MERGEVCFS_LIFT_UNLIFT_MT.out.vcf + .join(GATK4_MERGEVCFS_LIFT_UNLIFT_MT.out.tbi, failOnMismatch:true, failOnDuplicate:true) + .set { ch_filt_vcf } + GATK4_VARIANTFILTRATION_MT (ch_filt_vcf, ch_genome_fasta, ch_genome_fai, ch_genome_dict) + + // Spliting multiallelic calls + GATK4_VARIANTFILTRATION_MT.out.vcf + .join(GATK4_VARIANTFILTRATION_MT.out.tbi, failOnMismatch:true, failOnDuplicate:true) + .set { ch_in_split } + SPLIT_MULTIALLELICS_MT (ch_in_split, ch_genome_fasta) + TABIX_TABIX_MT(SPLIT_MULTIALLELICS_MT.out.vcf) + + // Removing duplicates and merging if there is more than one sample + SPLIT_MULTIALLELICS_MT.out.vcf + .join(TABIX_TABIX_MT.out.tbi, failOnMismatch:true, failOnDuplicate:true) + .set { ch_in_remdup } + REMOVE_DUPLICATES_MT(ch_in_remdup, ch_genome_fasta) + TABIX_TABIX_MT2(REMOVE_DUPLICATES_MT.out.vcf) + + REMOVE_DUPLICATES_MT.out.vcf + .collect{it[1]} + .ifEmpty([]) + .toList() + .set { file_list_vcf } + + TABIX_TABIX_MT2.out.tbi + .collect{it[1]} + .ifEmpty([]) + .toList() + .set { file_list_tbi } + + ch_case_info + .combine(file_list_vcf) + .combine(file_list_tbi) + .set { ch_rem_dup_vcf_tbi } + + ch_rem_dup_vcf_tbi.branch { + meta, vcf, tbi -> + single: vcf.size() == 1 + return [meta, vcf] + multiple: vcf.size() > 1 + return [meta, vcf, tbi] + }.set { ch_case_vcf } + + BCFTOOLS_MERGE_MT( ch_case_vcf.multiple, + ch_genome_fasta, + ch_genome_fai, + [] + ) + + BCFTOOLS_MERGE_MT.out.merged_variants + .mix(ch_case_vcf.single) + .set { ch_annotation_in } + + TABIX_TABIX_MERGE(ch_annotation_in) + + ch_versions = ch_versions.mix(PICARD_LIFTOVERVCF.out.versions.first()) + ch_versions = ch_versions.mix(GATK4_MERGEVCFS_LIFT_UNLIFT_MT.out.versions.first()) + ch_versions = ch_versions.mix(GATK4_VARIANTFILTRATION_MT.out.versions.first()) + ch_versions = ch_versions.mix(SPLIT_MULTIALLELICS_MT.out.versions.first()) + ch_versions = ch_versions.mix(REMOVE_DUPLICATES_MT.out.versions.first()) + ch_versions = ch_versions.mix(BCFTOOLS_MERGE_MT.out.versions) + + emit: + haplog = HAPLOGREP2_CLASSIFY_MT.out.txt // channel: [ val(meta), path(txt) ] + vcf = ch_vcf_out // channel: [ val(meta), path(vcf) ] + tbi = ch_tbi_out // channel: [ val(meta), path(tbi) ] + report = ENSEMBLVEP_MT.out.report // channel: [ path(html) ] + versions = ch_versions // channel: [ path(versions.yml) ] +} From 6bafacf60986d32d03b55dd16301d1cbee2053f2 Mon Sep 17 00:00:00 2001 From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com> Date: Thu, 24 Aug 2023 17:42:41 +0200 Subject: [PATCH 07/22] postprocess --- ...lls.config => postprocess_MT_calls.config} | 0 subworkflows/local/call_snv.nf | 3 ++- .../variant_calling/postprocess_MT_calls.nf | 19 +++++++++---------- workflows/raredisease.nf | 8 ++++---- 4 files changed, 15 insertions(+), 15 deletions(-) rename conf/modules/{postptocess_MT_calls.config => postprocess_MT_calls.config} (100%) diff --git a/conf/modules/postptocess_MT_calls.config b/conf/modules/postprocess_MT_calls.config similarity index 100% rename from conf/modules/postptocess_MT_calls.config rename to conf/modules/postprocess_MT_calls.config diff --git a/subworkflows/local/call_snv.nf b/subworkflows/local/call_snv.nf index eb1d2ebf..e11b598f 100644 --- a/subworkflows/local/call_snv.nf +++ b/subworkflows/local/call_snv.nf @@ -69,10 +69,11 @@ workflow CALL_SNV { POSTPROCESS_MT_CALLS( CALL_SNV_MT.out.vcf, - PICARD_LIFTOVERVCF.out.vcf_lifted, + CALL_SNV_MT_SHIFT.out.vcf, ch_genome_fasta, ch_genome_dictionary, ch_genome_fai, + ch_mtshift_backchain, ch_case_info ) diff --git a/subworkflows/local/variant_calling/postprocess_MT_calls.nf b/subworkflows/local/variant_calling/postprocess_MT_calls.nf index 6c9b7628..81738085 100644 --- a/subworkflows/local/variant_calling/postprocess_MT_calls.nf +++ b/subworkflows/local/variant_calling/postprocess_MT_calls.nf @@ -10,15 +10,16 @@ include { BCFTOOLS_NORM as REMOVE_DUPLICATES_MT } from '../../.. include { TABIX_TABIX as TABIX_TABIX_MT2 } from '../../../modules/nf-core/tabix/tabix/main' include { BCFTOOLS_MERGE as BCFTOOLS_MERGE_MT } from '../../../modules/nf-core/bcftools/merge/main' include { TABIX_TABIX as TABIX_TABIX_MERGE } from '../../../modules/nf-core/tabix/tabix/main' -include { PICARD_LIFTOVERVCF } from '../../modules/nf-core/picard/liftovervcf/main' +include { PICARD_LIFTOVERVCF } from '../../../modules/nf-core/picard/liftovervcf/main' workflow POSTPROCESS_MT_CALLS { take: ch_mt_vcf // channel: [mandatory] [ val(meta), path(vcf) ] ch_mtshift_vcf // channel: [mandatory] [ val(meta), path(vcf) ] ch_genome_fasta // channel: [mandatory] [ val(meta), path(fasta) ] - ch_genome_dict // channel: [mandatory] [ val(meta), path(dict) ] + ch_genome_dictionary // channel: [mandatory] [ val(meta), path(dict) ] ch_genome_fai // channel: [mandatory] [ val(meta), path(fai) ] + ch_mtshift_backchain // channel: [mandatory] [ val(meta), path(backchain) ] ch_case_info // channel: [mandatory] [ val(case_info) ] main: @@ -32,18 +33,18 @@ workflow POSTPROCESS_MT_CALLS { ch_mtshift_backchain, ) - ch_vcfs = ch_vcf1 - .join(ch_vcf2, remainder: true) + ch_vcfs = ch_mt_vcf + .join(PICARD_LIFTOVERVCF.out.vcf_lifted, remainder: true) .map{ meta, vcf1, vcf2 -> [meta, [vcf1, vcf2]] } - GATK4_MERGEVCFS_LIFT_UNLIFT_MT( ch_vcfs, ch_genome_dict) + GATK4_MERGEVCFS_LIFT_UNLIFT_MT( ch_vcfs, ch_genome_dictionary) // Filtering Variants GATK4_MERGEVCFS_LIFT_UNLIFT_MT.out.vcf .join(GATK4_MERGEVCFS_LIFT_UNLIFT_MT.out.tbi, failOnMismatch:true, failOnDuplicate:true) .set { ch_filt_vcf } - GATK4_VARIANTFILTRATION_MT (ch_filt_vcf, ch_genome_fasta, ch_genome_fai, ch_genome_dict) + GATK4_VARIANTFILTRATION_MT (ch_filt_vcf, ch_genome_fasta, ch_genome_fai, ch_genome_dictionary) // Spliting multiallelic calls GATK4_VARIANTFILTRATION_MT.out.vcf @@ -104,9 +105,7 @@ workflow POSTPROCESS_MT_CALLS { ch_versions = ch_versions.mix(BCFTOOLS_MERGE_MT.out.versions) emit: - haplog = HAPLOGREP2_CLASSIFY_MT.out.txt // channel: [ val(meta), path(txt) ] - vcf = ch_vcf_out // channel: [ val(meta), path(vcf) ] - tbi = ch_tbi_out // channel: [ val(meta), path(tbi) ] - report = ENSEMBLVEP_MT.out.report // channel: [ path(html) ] + vcf = ch_annotation_in // channel: [ val(meta), path(vcf) ] + tbi = TABIX_TABIX_MERGE.out.tbi // channel: [ val(meta), path(tbi) ] versions = ch_versions // channel: [ path(versions.yml) ] } diff --git a/workflows/raredisease.nf b/workflows/raredisease.nf index e88686e5..70c1422a 100644 --- a/workflows/raredisease.nf +++ b/workflows/raredisease.nf @@ -110,7 +110,7 @@ include { ALIGN } from '../subworkflows/local/al include { ANALYSE_MT } from '../subworkflows/local/analyse_MT' include { ANNOTATE_CSQ_PLI as ANN_CSQ_PLI_SNV } from '../subworkflows/local/annotate_consequence_pli' include { ANNOTATE_CSQ_PLI as ANN_CSQ_PLI_SV } from '../subworkflows/local/annotate_consequence_pli' -include { ANNOTATE_SNVS } from '../subworkflows/local/annotate_snvs' +include { ANNOTATE_GENOME_SNVS } from '../subworkflows/local/annotate_genome_snvs' include { ANNOTATE_STRUCTURAL_VARIANTS } from '../subworkflows/local/annotate_structural_variants' include { CALL_REPEAT_EXPANSIONS } from '../subworkflows/local/call_repeat_expansions' include { CALL_SNV } from '../subworkflows/local/call_snv' @@ -502,7 +502,7 @@ workflow RAREDISEASE { ch_versions = ch_versions.mix(GATK4_SELECTVARIANTS.out.versions) } - ANNOTATE_SNVS ( + ANNOTATE_GENOME_SNVS ( ch_vcf, params.analysis_type, ch_cadd_header, @@ -519,11 +519,11 @@ workflow RAREDISEASE { ).set {ch_snv_annotate} ch_versions = ch_versions.mix(ch_snv_annotate.versions) - ch_snv_annotate = ANNOTATE_SNVS.out.vcf_ann + ch_snv_annotate = ANNOTATE_GENOME_SNVS.out.vcf_ann if (!params.skip_mt_analysis) { - ANNOTATE_SNVS.out.vcf_ann + ANNOTATE_GENOME_SNVS.out.vcf_ann .concat(ANALYSE_MT.out.vcf) .groupTuple() .set { ch_merged_vcf } From 7c5909d63428aafe85e93d1bbf5289fd386bcae1 Mon Sep 17 00:00:00 2001 From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com> Date: Thu, 24 Aug 2023 20:43:31 +0200 Subject: [PATCH 08/22] move annotate --- conf/modules/annotate_mt_snvs.config | 66 ++++++ conf/modules/call_sv_MT.config | 14 -- subworkflows/local/annotate_mt_snvs.nf | 50 ++-- subworkflows/local/call_snv.nf | 29 ++- .../local/call_structural_variants.nf | 2 +- workflows/raredisease.nf | 216 +++++++++--------- 6 files changed, 221 insertions(+), 156 deletions(-) create mode 100644 conf/modules/annotate_mt_snvs.config diff --git a/conf/modules/annotate_mt_snvs.config b/conf/modules/annotate_mt_snvs.config new file mode 100644 index 00000000..68e01af1 --- /dev/null +++ b/conf/modules/annotate_mt_snvs.config @@ -0,0 +1,66 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Config file for defining DSL2 per module options and publishing paths +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Available keys to override module options: + ext.args = Additional arguments appended to command in module. + ext.args2 = Second set of arguments appended to command in module (multi-tool modules). + ext.args3 = Third set of arguments appended to command in module (multi-tool modules). + ext.prefix = File name prefix for output files. + ext.when = Conditional clause +---------------------------------------------------------------------------------------- +*/ + +// +// Mitochondrial annotation options +// + +process { + + + withName: '.*ANNOTATE_MT_SNVS:ENSEMBLVEP_MT' { + ext.args = [ + '--dir_plugins vep_cache/Plugins', + '--plugin LoFtool,vep_cache/LoFtool_scores.txt', + '--plugin pLI,vep_cache/pLI_values_107.txt', + '--plugin SpliceAI,snv=vep_cache/spliceai_21_scores_raw_snv_-v1.3-.vcf.gz,indel=vep_cache/spliceai_21_scores_raw_snv_-v1.3-.vcf.gz', + '--plugin MaxEntScan,vep_cache/fordownload,SWA,NCSS', + '--distance 0', + '--buffer_size 20000', + '--format vcf --fork 4 --max_sv_size 248956422', + '--appris --biotype --cache --canonical --ccds --compress_output bgzip', + '--domains --exclude_predicted --force_overwrite', + '--hgvs --humdiv --no_progress --no_stats --numbers', + '--merged --polyphen p --protein --offline --regulatory --sift p --symbol --tsl --vcf', + '--uniprot' + ].join(' ') + } + + withName: '.*ANNOTATE_MT_SNVS:ZIP_TABIX_HMTNOTE' { + ext.prefix = { "${meta.id}_vep_vcfanno_hmtnote_mt" } + publishDir = [ + path: { "${params.outdir}/annotate_mt" }, + mode: params.publish_dir_mode, + pattern: "*{vcf.gz,vcf.gz.tbi}", + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + + withName: '.*ANNOTATE_MT_SNVS:HMTNOTE_ANNOTATE' { + ext.prefix = { "${meta.id}_vep_vcfanno_hmtnote_mt" } + ext.args = '--offline' + publishDir = [ + enabled: false + ] + } + + withName: '.*ANNOTATE_MT_SNVS:HAPLOGREP2_CLASSIFY_MT' { + ext.prefix = { "${meta.id}_haplogrep" } + publishDir = [ + path: { "${params.outdir}/annotate_mt" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + +} diff --git a/conf/modules/call_sv_MT.config b/conf/modules/call_sv_MT.config index 6b769e5c..288ca425 100644 --- a/conf/modules/call_sv_MT.config +++ b/conf/modules/call_sv_MT.config @@ -35,17 +35,3 @@ process { } } -// -// Call SV in shifted mitochondria -// - -process { - - withName: '.*CALL_SV_MT_SHIFT:MT_DELETION' { - ext.when = false - } - - withName: '.*CALL_SV_MT_SHIFT:EKLIPSE_MT' { - ext.when = false - } -} diff --git a/subworkflows/local/annotate_mt_snvs.nf b/subworkflows/local/annotate_mt_snvs.nf index 400a58c1..d6ce5659 100644 --- a/subworkflows/local/annotate_mt_snvs.nf +++ b/subworkflows/local/annotate_mt_snvs.nf @@ -2,51 +2,48 @@ // Merge and annotate MT // -include { GATK4_MERGEVCFS as GATK4_MERGEVCFS_LIFT_UNLIFT_MT } from '../../../modules/nf-core/gatk4/mergevcfs/main' -include { GATK4_VARIANTFILTRATION as GATK4_VARIANTFILTRATION_MT } from '../../../modules/nf-core/gatk4/variantfiltration/main' -include { BCFTOOLS_NORM as SPLIT_MULTIALLELICS_MT } from '../../../modules/nf-core/bcftools/norm/main' -include { TABIX_TABIX as TABIX_TABIX_MT } from '../../../modules/nf-core/tabix/tabix/main' -include { BCFTOOLS_NORM as REMOVE_DUPLICATES_MT } from '../../../modules/nf-core/bcftools/norm/main' -include { TABIX_TABIX as TABIX_TABIX_MT2 } from '../../../modules/nf-core/tabix/tabix/main' -include { BCFTOOLS_MERGE as BCFTOOLS_MERGE_MT } from '../../../modules/nf-core/bcftools/merge/main' -include { TABIX_TABIX as TABIX_TABIX_MERGE } from '../../../modules/nf-core/tabix/tabix/main' -include { TABIX_TABIX as TABIX_TABIX_MT3 } from '../../../modules/nf-core/tabix/tabix/main' -include { ENSEMBLVEP as ENSEMBLVEP_MT } from '../../../modules/local/ensemblvep/main' -include { HAPLOGREP2_CLASSIFY as HAPLOGREP2_CLASSIFY_MT } from '../../../modules/nf-core/haplogrep2/classify/main' -include { VCFANNO as VCFANNO_MT } from '../../../modules/nf-core/vcfanno/main' -include { ANNOTATE_CADD } from '../annotation/annotate_cadd' -include { TABIX_BGZIPTABIX as ZIP_TABIX_HMTNOTE } from '../../../modules/nf-core/tabix/bgziptabix/main' -include { HMTNOTE_ANNOTATE } from '../../../modules/nf-core/hmtnote/annotate/main' +include { GATK4_MERGEVCFS as GATK4_MERGEVCFS_LIFT_UNLIFT_MT } from '../../modules/nf-core/gatk4/mergevcfs/main' +include { GATK4_VARIANTFILTRATION as GATK4_VARIANTFILTRATION_MT } from '../../modules/nf-core/gatk4/variantfiltration/main' +include { BCFTOOLS_NORM as SPLIT_MULTIALLELICS_MT } from '../../modules/nf-core/bcftools/norm/main' +include { TABIX_TABIX as TABIX_TABIX_MT } from '../../modules/nf-core/tabix/tabix/main' +include { BCFTOOLS_NORM as REMOVE_DUPLICATES_MT } from '../../modules/nf-core/bcftools/norm/main' +include { TABIX_TABIX as TABIX_TABIX_MT2 } from '../../modules/nf-core/tabix/tabix/main' +include { BCFTOOLS_MERGE as BCFTOOLS_MERGE_MT } from '../../modules/nf-core/bcftools/merge/main' +include { TABIX_TABIX as TABIX_TABIX_MERGE } from '../../modules/nf-core/tabix/tabix/main' +include { TABIX_TABIX as TABIX_TABIX_MT3 } from '../../modules/nf-core/tabix/tabix/main' +include { ENSEMBLVEP as ENSEMBLVEP_MT } from '../../modules/local/ensemblvep/main' +include { HAPLOGREP2_CLASSIFY as HAPLOGREP2_CLASSIFY_MT } from '../../modules/nf-core/haplogrep2/classify/main' +include { VCFANNO as VCFANNO_MT } from '../../modules/nf-core/vcfanno/main' +include { ANNOTATE_CADD } from './annotation/annotate_cadd' +include { TABIX_BGZIPTABIX as ZIP_TABIX_HMTNOTE } from '../../modules/nf-core/tabix/bgziptabix/main' +include { HMTNOTE_ANNOTATE } from '../../modules/nf-core/hmtnote/annotate/main' workflow ANNOTATE_MT_SNVS { take: - ch_vcf1 // channel: [mandatory] [ val(meta), path(vcf) ] - ch_vcf2 // channel: [mandatory] [ val(meta), path(vcf) ] + ch_mt_vcf // channel: [mandatory] [ val(meta), path(vcf) ] + ch_mt_tbi // channel: [mandatory] [ val(meta), path(tbi) ] ch_cadd_header // channel: [mandatory] [ path(txt) ] ch_cadd_resources // channel: [mandatory] [ path(annotation) ] ch_genome_fasta // channel: [mandatory] [ val(meta), path(fasta) ] - ch_genome_dict // channel: [mandatory] [ val(meta), path(dict) ] - ch_genome_fai // channel: [mandatory] [ val(meta), path(fai) ] ch_vcfanno_resources // channel: [mandatory] [ path(resources) ] ch_vcfanno_toml // channel: [mandatory] [ path(toml) ] val_vep_genome // string: [mandatory] GRCh37 or GRCh38 val_vep_cache_version // string: [mandatory] 107 ch_vep_cache // channel: [mandatory] [ path(cache) ] - ch_case_info // channel: [mandatory] [ val(case_info) ] main: ch_versions = Channel.empty() // Annotating with CADD ANNOTATE_CADD ( - ch_annotation_in, - TABIX_TABIX_MERGE.out.tbi, + ch_mt_vcf, + ch_mt_tbi, ch_cadd_header, ch_cadd_resources ) // Pick input for vep - ch_annotation_in + ch_mt_vcf .combine(ANNOTATE_CADD.out.vcf.ifEmpty("null")) .branch { it -> merged: it[2].equals("null") @@ -92,11 +89,6 @@ workflow ANNOTATE_MT_SNVS { // Running haplogrep2 HAPLOGREP2_CLASSIFY_MT(ch_vep_in, "vcf.gz") - ch_versions = ch_versions.mix(GATK4_MERGEVCFS_LIFT_UNLIFT_MT.out.versions.first()) - ch_versions = ch_versions.mix(GATK4_VARIANTFILTRATION_MT.out.versions.first()) - ch_versions = ch_versions.mix(SPLIT_MULTIALLELICS_MT.out.versions.first()) - ch_versions = ch_versions.mix(REMOVE_DUPLICATES_MT.out.versions.first()) - ch_versions = ch_versions.mix(BCFTOOLS_MERGE_MT.out.versions) ch_versions = ch_versions.mix(ANNOTATE_CADD.out.versions) ch_versions = ch_versions.mix(ENSEMBLVEP_MT.out.versions) ch_versions = ch_versions.mix(VCFANNO_MT.out.versions) @@ -105,7 +97,7 @@ workflow ANNOTATE_MT_SNVS { emit: haplog = HAPLOGREP2_CLASSIFY_MT.out.txt // channel: [ val(meta), path(txt) ] - vcf = ch_vcf_out // channel: [ val(meta), path(vcf) ] + vcf_ann = ch_vcf_out // channel: [ val(meta), path(vcf) ] tbi = ch_tbi_out // channel: [ val(meta), path(tbi) ] report = ENSEMBLVEP_MT.out.report // channel: [ path(html) ] versions = ch_versions // channel: [ path(versions.yml) ] diff --git a/subworkflows/local/call_snv.nf b/subworkflows/local/call_snv.nf index e11b598f..d8cb9744 100644 --- a/subworkflows/local/call_snv.nf +++ b/subworkflows/local/call_snv.nf @@ -7,6 +7,7 @@ include { CALL_SNV_SENTIEON } from './variant_calling/call_snv_se include { CALL_SNV_MT } from './variant_calling/call_snv_MT' include { CALL_SNV_MT as CALL_SNV_MT_SHIFT } from './variant_calling/call_snv_MT' include { POSTPROCESS_MT_CALLS } from './variant_calling/postprocess_MT_calls' +include { GATK4_SELECTVARIANTS } from '../../modules/nf-core/gatk4/selectvariants/main' workflow CALL_SNV { take: @@ -51,6 +52,19 @@ workflow CALL_SNV { ch_case_info ) + ch_vcf = Channel.empty().mix(CALL_SNV_DEEPVARIANT.out.vcf, CALL_SNV_SENTIEON.out.vcf) + ch_tabix = Channel.empty().mix(CALL_SNV_DEEPVARIANT.out.tabix, CALL_SNV_SENTIEON.out.tabix) + + ch_vcf + .join(ch_tabix, failOnMismatch:true, failOnDuplicate:true) + .map { meta, vcf, tbi -> return [meta, vcf, tbi, []]} + .set {ch_selvar_in} + GATK4_SELECTVARIANTS(ch_selvar_in) // remove mitochondrial variants + + ch_genome_vcf = GATK4_SELECTVARIANTS.out.vcf + ch_genome_tabix = GATK4_SELECTVARIANTS.out.tbi + ch_genome_vcf_tabix = ch_genome_vcf.join(ch_genome_tabix, failOnMismatch:true, failOnDuplicate:true) + CALL_SNV_MT( ch_mt_bam_bai, ch_genome_fasta, @@ -77,19 +91,18 @@ workflow CALL_SNV { ch_case_info ) - ch_genome_vcf = Channel.empty().mix(CALL_SNV_DEEPVARIANT.out.vcf, CALL_SNV_SENTIEON.out.vcf) - ch_genome_tabix = Channel.empty().mix(CALL_SNV_DEEPVARIANT.out.tabix, CALL_SNV_SENTIEON.out.tabix) - ch_mt_vcf = POSTPROCESS_MT_CALLS.out.vcf - ch_versions = ch_versions.mix(CALL_SNV_DEEPVARIANT.out.versions) ch_versions = ch_versions.mix(CALL_SNV_SENTIEON.out.versions) ch_versions = ch_versions.mix(CALL_SNV_MT.out.versions) ch_versions = ch_versions.mix(CALL_SNV_MT_SHIFT.out.versions) ch_versions = ch_versions.mix(POSTPROCESS_MT_CALLS.out.versions) + ch_versions = ch_versions.mix(GATK4_SELECTVARIANTS.out.versions) emit: - genome_vcf = ch_genome_vcf // channel: [ val(meta), path(vcf) ] - genome_tabix = ch_genome_tabix // channel: [ val(meta), path(tbi) ] - mt_vcf = ch_mt_vcf // channel: [ val(meta), path(vcf) ] - versions = ch_versions // channel: [ path(versions.yml) ] + genome_vcf = ch_genome_vcf // channel: [ val(meta), path(vcf) ] + genome_tabix = ch_genome_tabix // channel: [ val(meta), path(tbi) ] + genome_vcf_tabix = ch_genome_vcf_tabix // channel: [ val(meta), path(vcf), path(tbi) ] + mt_vcf = POSTPROCESS_MT_CALLS.out.vcf // channel: [ val(meta), path(vcf) ] + mt_tabix = POSTPROCESS_MT_CALLS.out.tbi // channel: [ val(meta), path(vcf) ] + versions = ch_versions // channel: [ path(versions.yml) ] } diff --git a/subworkflows/local/call_structural_variants.nf b/subworkflows/local/call_structural_variants.nf index 9e749add..d614ef7c 100644 --- a/subworkflows/local/call_structural_variants.nf +++ b/subworkflows/local/call_structural_variants.nf @@ -50,7 +50,7 @@ workflow CALL_STRUCTURAL_VARIANTS { CALL_SV_MT (ch_mt_bam_bai, ch_genome_fasta) - CALL_SV_MT_SHIFT (ch_mtshift_bam_bai, ch_mtshift_fasta) +// CALL_SV_MT_SHIFT (ch_mtshift_bam_bai, ch_mtshift_fasta) //merge if (params.skip_cnv_calling) { diff --git a/workflows/raredisease.nf b/workflows/raredisease.nf index 70c1422a..2a5fb036 100644 --- a/workflows/raredisease.nf +++ b/workflows/raredisease.nf @@ -94,11 +94,12 @@ ch_multiqc_custom_methods_description = params.multiqc_methods_description ? fil include { GATK4_MERGEVCFS } from '../modules/nf-core/gatk4/mergevcfs/main' include { CUSTOM_DUMPSOFTWAREVERSIONS } from '../modules/nf-core/custom/dumpsoftwareversions/main' include { FASTQC } from '../modules/nf-core/fastqc/main' -include { GATK4_SELECTVARIANTS } from '../modules/nf-core/gatk4/selectvariants/main' include { MULTIQC } from '../modules/nf-core/multiqc/main' include { SMNCOPYNUMBERCALLER } from '../modules/nf-core/smncopynumbercaller/main' +include { ENSEMBLVEP_FILTERVEP as FILTERVEP_MT } from '../modules/nf-core/ensemblvep/filtervep' include { ENSEMBLVEP_FILTERVEP as FILTERVEP_SNV } from '../modules/nf-core/ensemblvep/filtervep' include { ENSEMBLVEP_FILTERVEP as FILTERVEP_SV } from '../modules/nf-core/ensemblvep/filtervep' +include { TABIX_BGZIPTABIX as BGZIPTABIX_MT } from '../modules/nf-core/tabix/bgziptabix' include { TABIX_BGZIPTABIX as BGZIPTABIX_SNV } from '../modules/nf-core/tabix/bgziptabix' include { TABIX_BGZIPTABIX as BGZIPTABIX_SV } from '../modules/nf-core/tabix/bgziptabix' @@ -108,9 +109,11 @@ include { TABIX_BGZIPTABIX as BGZIPTABIX_SV } from '../modules/nf-core/tabix include { ALIGN } from '../subworkflows/local/align' include { ANALYSE_MT } from '../subworkflows/local/analyse_MT' +include { ANNOTATE_CSQ_PLI as ANN_CSQ_PLI_MT } from '../subworkflows/local/annotate_consequence_pli' include { ANNOTATE_CSQ_PLI as ANN_CSQ_PLI_SNV } from '../subworkflows/local/annotate_consequence_pli' include { ANNOTATE_CSQ_PLI as ANN_CSQ_PLI_SV } from '../subworkflows/local/annotate_consequence_pli' include { ANNOTATE_GENOME_SNVS } from '../subworkflows/local/annotate_genome_snvs' +include { ANNOTATE_MT_SNVS } from '../subworkflows/local/annotate_mt_snvs' include { ANNOTATE_STRUCTURAL_VARIANTS } from '../subworkflows/local/annotate_structural_variants' include { CALL_REPEAT_EXPANSIONS } from '../subworkflows/local/call_repeat_expansions' include { CALL_SNV } from '../subworkflows/local/call_snv' @@ -118,6 +121,7 @@ include { CALL_STRUCTURAL_VARIANTS } from '../subworkflows/local/ca include { GENS } from '../subworkflows/local/gens' include { PREPARE_REFERENCES } from '../subworkflows/local/prepare_references' include { QC_BAM } from '../subworkflows/local/qc_bam' +include { RANK_VARIANTS as RANK_VARIANTS_MT } from '../subworkflows/local/rank_variants' include { RANK_VARIANTS as RANK_VARIANTS_SNV } from '../subworkflows/local/rank_variants' include { RANK_VARIANTS as RANK_VARIANTS_SV } from '../subworkflows/local/rank_variants' include { SCATTER_GENOME } from '../subworkflows/local/scatter_genome' @@ -289,7 +293,9 @@ workflow RAREDISEASE { ch_scatter_split_intervals = ch_scatter.split_intervals ?: Channel.empty() + // // ALIGNING READS, FETCH STATS, AND MERGE. + // ALIGN ( ch_reads, ch_genome_fasta, @@ -307,7 +313,9 @@ workflow RAREDISEASE { .set { ch_mapped } ch_versions = ch_versions.mix(ALIGN.out.versions) + // // BAM QUALITY CHECK + // QC_BAM ( ch_mapped.genome_marked_bam, ch_mapped.genome_marked_bai, @@ -322,7 +330,9 @@ workflow RAREDISEASE { ) ch_versions = ch_versions.mix(QC_BAM.out.versions) + // // EXPANSIONHUNTER AND STRANGER + // CALL_REPEAT_EXPANSIONS ( ch_mapped.genome_bam_bai, ch_variant_catalog, @@ -332,28 +342,9 @@ workflow RAREDISEASE { ) ch_versions = ch_versions.mix(CALL_REPEAT_EXPANSIONS.out.versions) - // STEP 1.7: SMNCOPYNUMBERCALLER - ch_mapped.genome_bam_bai - .collect{it[1]} - .toList() - .set { ch_bam_list } - - ch_mapped.genome_bam_bai - .collect{it[2]} - .toList() - .set { ch_bai_list } - - ch_case_info - .combine(ch_bam_list) - .combine(ch_bai_list) - .set { ch_bams_bais } - - SMNCOPYNUMBERCALLER ( - ch_bams_bais - ) - ch_versions = ch_versions.mix(SMNCOPYNUMBERCALLER.out.versions) - - // STEP 2: VARIANT CALLING + // + // SNV CALLING + // CALL_SNV ( ch_mapped.genome_bam_bai, ch_mapped.mt_bam_bai, @@ -375,6 +366,9 @@ workflow RAREDISEASE { ) ch_versions = ch_versions.mix(CALL_SNV.out.versions) + // + // SV CALLING + // CALL_STRUCTURAL_VARIANTS ( ch_mapped.genome_marked_bam, ch_mapped.genome_marked_bai, @@ -395,29 +389,9 @@ workflow RAREDISEASE { ) ch_versions = ch_versions.mix(CALL_STRUCTURAL_VARIANTS.out.versions) - // ped correspondence, sex check, ancestry check - PEDDY_CHECK ( - CALL_SNV.out.genome_vcf.join(CALL_SNV.out.genome_tabix, failOnMismatch:true, failOnDuplicate:true), - ch_pedfile - ) - ch_versions = ch_versions.mix(PEDDY_CHECK.out.versions) - - // GENS - if (params.gens_switch) { - GENS ( - ch_mapped.genome_bam_bai, - CALL_SNV.out.vcf, - ch_genome_fasta, - ch_genome_fai, - file(params.gens_interval_list), - file(params.gens_pon), - file(params.gens_gnomad_pos), - ch_case_info, - ch_genome_dictionary - ) - ch_versions = ch_versions.mix(GENS.out.versions) - } - + // + // ANNOTATE STRUCTURAL VARIANTS + // if (!params.skip_sv_annotation) { ANNOTATE_STRUCTURAL_VARIANTS ( CALL_STRUCTURAL_VARIANTS.out.vcf, @@ -455,55 +429,13 @@ workflow RAREDISEASE { } - if (!params.skip_mt_analysis) { - ANALYSE_MT ( - ch_mapped.genome_bam_bai, - ch_cadd_header, - ch_cadd_resources, - ch_genome_bwaindex, - ch_genome_bwamem2index, - ch_genome_fasta, - ch_genome_fai, - ch_genome_dictionary, - ch_mt_intervals, - ch_mtshift_bwaindex, - ch_mtshift_bwamem2index, - ch_mtshift_fasta, - ch_mtshift_dictionary, - ch_mtshift_fai, - ch_mtshift_intervals, - ch_mtshift_backchain, - ch_vcfanno_resources, - ch_vcfanno_toml, - params.genome, - params.vep_cache_version, - ch_vep_cache, - ch_case_info - ) - - ch_versions = ch_versions.mix(ANALYSE_MT.out.versions) - - } - - // VARIANT ANNOTATION - + // + // ANNOTATE GENOME SNVs + // if (!params.skip_snv_annotation) { - ch_vcf = CALL_SNV.out.genome_vcf.join(CALL_SNV.out.genome_tabix, failOnMismatch:true, failOnDuplicate:true) - - if (!params.skip_mt_analysis) { - ch_vcf - .map { meta, vcf, tbi -> return [meta, vcf, tbi, []]} - .set { ch_selvar_in } - - GATK4_SELECTVARIANTS(ch_selvar_in) // remove mitochondrial variants - - ch_vcf = GATK4_SELECTVARIANTS.out.vcf.join(GATK4_SELECTVARIANTS.out.tbi, failOnMismatch:true, failOnDuplicate:true) - ch_versions = ch_versions.mix(GATK4_SELECTVARIANTS.out.versions) - } - ANNOTATE_GENOME_SNVS ( - ch_vcf, + CALL_SNV.out.genome_vcf_tabix, params.analysis_type, ch_cadd_header, ch_cadd_resources, @@ -521,18 +453,6 @@ workflow RAREDISEASE { ch_snv_annotate = ANNOTATE_GENOME_SNVS.out.vcf_ann - if (!params.skip_mt_analysis) { - - ANNOTATE_GENOME_SNVS.out.vcf_ann - .concat(ANALYSE_MT.out.vcf) - .groupTuple() - .set { ch_merged_vcf } - - GATK4_MERGEVCFS (ch_merged_vcf, ch_genome_dictionary) - ch_snv_annotate = GATK4_MERGEVCFS.out.vcf - ch_versions = ch_versions.mix(GATK4_MERGEVCFS.out.versions) - } - ANN_CSQ_PLI_SNV ( ch_snv_annotate, ch_variant_consequences @@ -558,6 +478,94 @@ workflow RAREDISEASE { } + // + // ANNOTATE MT SNVs + // + if (!params.skip_mt_annotation) { + + ANNOTATE_MT_SNVS ( + CALL_SNV.out.mt_vcf, + CALL_SNV.out.mt_tabix, + ch_cadd_header, + ch_cadd_resources, + ch_genome_fasta, + ch_vcfanno_resources, + ch_vcfanno_toml, + params.genome, + params.vep_cache_version, + ch_vep_cache, + ).set {ch_mt_annotate} + ch_versions = ch_versions.mix(ch_mt_annotate.versions) + + ANN_CSQ_PLI_MT ( + ch_mt_annotate.vcf_ann, + ch_variant_consequences + ) + ch_versions = ch_versions.mix(ANN_CSQ_PLI_MT.out.versions) + + RANK_VARIANTS_MT ( + ANN_CSQ_PLI_MT.out.vcf_ann, + ch_pedfile, + ch_reduced_penetrance, + ch_score_config_snv + ) + ch_versions = ch_versions.mix(RANK_VARIANTS_MT.out.versions) + + FILTERVEP_MT( + RANK_VARIANTS_MT.out.vcf, + ch_vep_filters + ) + ch_versions = ch_versions.mix(FILTERVEP_MT.out.versions) + + BGZIPTABIX_MT(FILTERVEP_MT.out.output) + ch_versions = ch_versions.mix(BGZIPTABIX_MT.out.versions) + + } + + // STEP 1.7: SMNCOPYNUMBERCALLER + ch_mapped.genome_bam_bai + .collect{it[1]} + .toList() + .set { ch_bam_list } + + ch_mapped.genome_bam_bai + .collect{it[2]} + .toList() + .set { ch_bai_list } + + ch_case_info + .combine(ch_bam_list) + .combine(ch_bai_list) + .set { ch_bams_bais } + + SMNCOPYNUMBERCALLER ( + ch_bams_bais + ) + ch_versions = ch_versions.mix(SMNCOPYNUMBERCALLER.out.versions) + + // ped correspondence, sex check, ancestry check + PEDDY_CHECK ( + CALL_SNV.out.genome_vcf.join(CALL_SNV.out.genome_tabix, failOnMismatch:true, failOnDuplicate:true), + ch_pedfile + ) + ch_versions = ch_versions.mix(PEDDY_CHECK.out.versions) + + // GENS + if (params.gens_switch) { + GENS ( + ch_mapped.genome_bam_bai, + CALL_SNV.out.vcf, + ch_genome_fasta, + ch_genome_fai, + file(params.gens_interval_list), + file(params.gens_pon), + file(params.gens_gnomad_pos), + ch_case_info, + ch_genome_dictionary + ) + ch_versions = ch_versions.mix(GENS.out.versions) + } + // // MODULE: Pipeline reporting // From d711b05f7babeb25292fc9c4d2b890f2eb65b6d9 Mon Sep 17 00:00:00 2001 From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com> Date: Sat, 26 Aug 2023 11:55:35 +0200 Subject: [PATCH 09/22] update configs --- conf/modules/align_MT.config | 8 +- conf/modules/align_and_call_MT.config | 126 ------------------- conf/modules/analyse_MT.config | 31 ----- conf/modules/annotate_consequence_pli.config | 32 ++++- conf/modules/annotate_genome_snvs.config | 8 +- conf/modules/annotate_mt_snvs.config | 4 +- conf/modules/call_snv.config | 10 ++ conf/modules/call_snv_deepvariant.config | 14 --- conf/modules/call_snv_sentieon.config | 14 --- conf/modules/call_structural_variants.config | 4 +- conf/modules/call_sv_MT.config | 4 +- conf/modules/convert_mt_bam_to_fastq.config | 6 +- conf/modules/merge_annotate_MT.config | 88 ------------- conf/modules/postprocess_MT_calls.config | 15 ++- conf/modules/prepare_references.config | 10 +- conf/modules/rank_variants.config | 52 ++++++++ conf/modules/raredisease.config | 33 +++-- conf/test.config | 1 + conf/test_full.config | 2 +- docs/output.md | 8 +- main.nf | 1 + nextflow.config | 5 +- nextflow_schema.json | 11 +- workflows/raredisease.nf | 6 +- 24 files changed, 162 insertions(+), 331 deletions(-) delete mode 100644 conf/modules/align_and_call_MT.config delete mode 100644 conf/modules/analyse_MT.config delete mode 100644 conf/modules/merge_annotate_MT.config diff --git a/conf/modules/align_MT.config b/conf/modules/align_MT.config index a01bfc60..35d91ee3 100644 --- a/conf/modules/align_MT.config +++ b/conf/modules/align_MT.config @@ -18,13 +18,13 @@ process { withName: '.*ALIGN_MT:BWAMEM2_MEM_MT' { - ext.when = { !params.skip_mt_analysis && !(params.analysis_type == "wes") && params.aligner == "bwamem2" } + ext.when = { !(params.analysis_type == "wes") && params.aligner == "bwamem2" } ext.args = { "-M -K 100000000 -R ${meta.read_group}" } } withName: '.*ALIGN_MT:SENTIEON_BWAMEM_MT' { ext.args = { "-M -K 10000000 -R ${meta.read_group}" } - ext.when = { !params.skip_mt_analysis && !(params.analysis_type == "wes") && params.aligner == "sentieon" } + ext.when = { !(params.analysis_type == "wes") && params.aligner == "sentieon" } ext.prefix = { "${meta.id}.sorted" } } @@ -61,13 +61,13 @@ process { process { withName: '.*ALIGN_MT_SHIFT:BWAMEM2_MEM_MT' { - ext.when = { !params.skip_mt_analysis && !(params.analysis_type == "wes") && params.aligner == "bwamem2" } + ext.when = { !(params.analysis_type == "wes") && params.aligner == "bwamem2" } ext.args = { "-M -K 100000000 -R ${meta.read_group}" } } withName: '.*ALIGN_MT_SHIFT:SENTIEON_BWAMEM_MT' { ext.args = { "-M -K 10000000 -R ${meta.read_group}" } - ext.when = { !params.skip_mt_analysis && !(params.analysis_type == "wes") && params.aligner == "sentieon" } + ext.when = { !(params.analysis_type == "wes") && params.aligner == "sentieon" } ext.prefix = { "${meta.id}.sorted" } } diff --git a/conf/modules/align_and_call_MT.config b/conf/modules/align_and_call_MT.config deleted file mode 100644 index 1a2993f5..00000000 --- a/conf/modules/align_and_call_MT.config +++ /dev/null @@ -1,126 +0,0 @@ -/* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - Config file for defining DSL2 per module options and publishing paths -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - Available keys to override module options: - ext.args = Additional arguments appended to command in module. - ext.args2 = Second set of arguments appended to command in module (multi-tool modules). - ext.args3 = Third set of arguments appended to command in module (multi-tool modules). - ext.prefix = File name prefix for output files. - ext.when = Conditional clause ----------------------------------------------------------------------------------------- -*/ - -// -// ANALYSE_MT:ALIGN_AND_CALL_MT -// - -process { - - withName: '.*ANALYSE_MT:ALIGN_AND_CALL_MT:BWAMEM2_MEM_MT' { - ext.when = { !params.skip_mt_analysis && !(params.analysis_type == "wes") && params.aligner == "bwamem2" } - ext.args = { "-M -K 100000000 -R ${meta.read_group}" } - } - - withName: '.*ANALYSE_MT:ALIGN_AND_CALL_MT:SENTIEON_BWAMEM_MT' { - ext.args = { "-M -K 10000000 -R ${meta.read_group}" } - ext.when = { !params.skip_mt_analysis && !(params.analysis_type == "wes") && params.aligner == "sentieon" } - ext.prefix = { "${meta.id}.sorted" } - } - - withName: '.*ANALYSE_MT:ALIGN_AND_CALL_MT:GATK4_MERGEBAMALIGNMENT_MT' { - ext.args = '--CREATE_INDEX true --MAX_GAPS -1 --SORT_ORDER queryname --INCLUDE_SECONDARY_ALIGNMENTS false --PAIRED_RUN false --VALIDATION_STRINGENCY LENIENT' - ext.prefix = { "${meta.id}_merged" } - } - - withName: '.*ANALYSE_MT:ALIGN_AND_CALL_MT:PICARD_ADDORREPLACEREADGROUPS_MT' { - ext.args = { [ - "--VALIDATION_STRINGENCY LENIENT", - "--RGLB lib", - "--RGPL ILLUMINA", - "--RGPU barcode", - "--RGSM ${meta.id}" - ].join(' ' ).trim() } - } - - withName: '.*ANALYSE_MT:ALIGN_AND_CALL_MT:PICARD_MARKDUPLICATES_MT' { - ext.args = '--VALIDATION_STRINGENCY LENIENT --CREATE_INDEX true' - ext.prefix = { "${meta.id}_markduplicates" } - } - - withName: '.*ANALYSE_MT:ALIGN_AND_CALL_MT:SAMTOOLS_SORT_MT' { - ext.prefix = { "${meta.id}_sorted" } - } - - withName: '.*ANALYSE_MT:ALIGN_AND_CALL_MT:MT_DELETION' { - ext.args = '-s --insert-size 16000' - publishDir = [ - path: { "${params.outdir}/mt_sv" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - } - - withName: '.*ANALYSE_MT:ALIGN_AND_CALL_MT:GATK4_MUTECT2_MT' { - ext.args = '--mitochondria-mode TRUE' - } - - withName: '.*ANALYSE_MT:ALIGN_AND_CALL_MT:GATK4_FILTERMUTECTCALLS_MT' { - ext.prefix = { "${meta.id}_filtered" } - } -} - -// -// ANALYSE_MT:ALIGN_AND_CALL_MT_SHIFT -// - -process { - - withName: '.*ANALYSE_MT:ALIGN_AND_CALL_MT_SHIFT:BWAMEM2_MEM_MT' { - ext.when = { !params.skip_mt_analysis && !(params.analysis_type == "wes") && params.aligner == "bwamem2" } - ext.args = { "-M -K 100000000 -R ${meta.read_group}" } - } - - withName: '.*ANALYSE_MT:ALIGN_AND_CALL_MT_SHIFT:SENTIEON_BWAMEM_MT' { - ext.args = { "-M -K 10000000 -R ${meta.read_group}" } - ext.when = { !params.skip_mt_analysis && !(params.analysis_type == "wes") && params.aligner == "sentieon" } - ext.prefix = { "${meta.id}.sorted" } - } - - withName: '.*ANALYSE_MT:ALIGN_AND_CALL_MT_SHIFT:GATK4_MERGEBAMALIGNMENT_MT' { - ext.args = '--CREATE_INDEX true --MAX_GAPS -1 --SORT_ORDER queryname --INCLUDE_SECONDARY_ALIGNMENTS false --PAIRED_RUN false --VALIDATION_STRINGENCY LENIENT' - ext.prefix = { "${meta.id}_merged_shifted" } - } - - withName: '.*ANALYSE_MT:ALIGN_AND_CALL_MT_SHIFT:PICARD_ADDORREPLACEREADGROUPS_MT' { - ext.args = { [ - "--VALIDATION_STRINGENCY LENIENT", - "--RGLB lib", - "--RGPL ${params.platform}", - "--RGPU barcode", - "--RGSM ${meta.id}" - ].join(' ' ).trim() } - } - - withName: '.*ANALYSE_MT:ALIGN_AND_CALL_MT_SHIFT:PICARD_MARKDUPLICATES_MT' { - ext.args = '--VALIDATION_STRINGENCY LENIENT --CREATE_INDEX true' - ext.prefix = { "${meta.id}_markduplicates_shifted" } - } - - withName: '.*ANALYSE_MT:ALIGN_AND_CALL_MT_SHIFT:SAMTOOLS_SORT_MT' { - ext.prefix = { "${meta.id}_sorted_shifted" } - } - - withName: '.*ANALYSE_MT:ALIGN_AND_CALL_MT_SHIFT:GATK4_MUTECT2_MT' { - ext.args = '--mitochondria-mode TRUE' - } - - withName: '.*ANALYSE_MT:ALIGN_AND_CALL_MT_SHIFT:HAPLOCHECK_MT' { - ext.prefix = { "${meta.id}_shifted" } - } - - withName: '.*ANALYSE_MT:ALIGN_AND_CALL_MT_SHIFT:GATK4_FILTERMUTECTCALLS_MT' { - ext.prefix = { "${meta.id}_filtered_shifted" } - } - -} diff --git a/conf/modules/analyse_MT.config b/conf/modules/analyse_MT.config deleted file mode 100644 index 4ee1b693..00000000 --- a/conf/modules/analyse_MT.config +++ /dev/null @@ -1,31 +0,0 @@ -/* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - Config file for defining DSL2 per module options and publishing paths -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - Available keys to override module options: - ext.args = Additional arguments appended to command in module. - ext.args2 = Second set of arguments appended to command in module (multi-tool modules). - ext.args3 = Third set of arguments appended to command in module (multi-tool modules). - ext.prefix = File name prefix for output files. - ext.when = Conditional clause ----------------------------------------------------------------------------------------- -*/ - -// -// Liftover -// - -process { - withName: '.*ANALYSE_MT:.*' { - ext.when = { !params.skip_mt_analysis && !(params.analysis_type == "wes") } - publishDir = [ - enabled: false - ] - } -} - -process { - withName: '.*ANALYSE_MT:PICARD_LIFTOVERVCF' { - ext.prefix = { "${meta.id}_liftover" } - } -} diff --git a/conf/modules/annotate_consequence_pli.config b/conf/modules/annotate_consequence_pli.config index ca4db826..eed623a6 100644 --- a/conf/modules/annotate_consequence_pli.config +++ b/conf/modules/annotate_consequence_pli.config @@ -21,15 +21,15 @@ process { } withName: '.*ANN_CSQ_PLI_SV:ADD_MOST_SEVERE_CSQ' { - ext.prefix = { "${meta.id}_vep_csq" } + ext.prefix = { "${meta.id}_genome_vep_csq" } } withName: '.*ANN_CSQ_PLI_SV:ADD_MOST_SEVERE_PLI' { - ext.prefix = { "${meta.id}_vep_csq_pli" } + ext.prefix = { "${meta.id}_genome_vep_csq_pli" } } withName: '.*ANN_CSQ_PLI_SV:TABIX_BGZIPTABIX' { - ext.prefix = { "${meta.id}_vep_csq_pli" } + ext.prefix = { "${meta.id}_genome_vep_csq_pli" } publishDir = [ path: { "${params.outdir}/annotate_sv" }, mode: params.publish_dir_mode, @@ -64,3 +64,29 @@ process { ] } } + +process { + withName: '.*ANN_CSQ_PLI_MT:.*' { + ext.when = !params.skip_mt_annotation + publishDir = [ + enabled: false + ] + } + + withName: '.*ANN_CSQ_PLI_MT:ADD_MOST_SEVERE_CSQ' { + ext.prefix = { "${meta.id}_mt_vep_csq" } + } + + withName: '.*ANN_CSQ_PLI_MT:ADD_MOST_SEVERE_PLI' { + ext.prefix = { "${meta.id}_mt_vep_csq_pli" } + } + + withName: '.*ANN_CSQ_PLI_MT:TABIX_BGZIPTABIX' { + ext.prefix = { "${meta.id}_mt_vep_csq_pli" } + publishDir = [ + path: { "${params.outdir}/annotate_snv" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } +} diff --git a/conf/modules/annotate_genome_snvs.config b/conf/modules/annotate_genome_snvs.config index 0cbf5496..96503d82 100644 --- a/conf/modules/annotate_genome_snvs.config +++ b/conf/modules/annotate_genome_snvs.config @@ -57,7 +57,7 @@ process { ext.args = { "--euploid" } tag = {"${meta7.id}"} publishDir = [ - path: { "${params.outdir}/annotate_snv" }, + path: { "${params.outdir}/annotate_snv/genome" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] @@ -69,7 +69,7 @@ process { ext.when = { !(workflow.profile.tokenize(',').intersect(['test', 'test_one_sample']).size() >= 1) || workflow.stubRun } tag = {"${meta6.id}"} publishDir = [ - path: { "${params.outdir}/annotate_snv" }, + path: { "${params.outdir}/annotate_snv/genome" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] @@ -106,7 +106,7 @@ process { withName: '.*ANNOTATE_GENOME_SNVS:BCFTOOLS_CONCAT' { ext.prefix = { "${meta.id}_rohann_vcfanno_filter_vep" } publishDir = [ - path: { "${params.outdir}/annotate_snv" }, + path: { "${params.outdir}/annotate_snv/genome" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] @@ -114,7 +114,7 @@ process { withName: '.*ANNOTATE_GENOME_SNVS:TABIX_BCFTOOLS_CONCAT' { publishDir = [ - path: { "${params.outdir}/annotate_snv" }, + path: { "${params.outdir}/annotate_snv/genome" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] diff --git a/conf/modules/annotate_mt_snvs.config b/conf/modules/annotate_mt_snvs.config index 68e01af1..40101033 100644 --- a/conf/modules/annotate_mt_snvs.config +++ b/conf/modules/annotate_mt_snvs.config @@ -39,7 +39,7 @@ process { withName: '.*ANNOTATE_MT_SNVS:ZIP_TABIX_HMTNOTE' { ext.prefix = { "${meta.id}_vep_vcfanno_hmtnote_mt" } publishDir = [ - path: { "${params.outdir}/annotate_mt" }, + path: { "${params.outdir}/annotate_snv/mitochondria" }, mode: params.publish_dir_mode, pattern: "*{vcf.gz,vcf.gz.tbi}", saveAs: { filename -> filename.equals('versions.yml') ? null : filename } @@ -57,7 +57,7 @@ process { withName: '.*ANNOTATE_MT_SNVS:HAPLOGREP2_CLASSIFY_MT' { ext.prefix = { "${meta.id}_haplogrep" } publishDir = [ - path: { "${params.outdir}/annotate_mt" }, + path: { "${params.outdir}/annotate_snv/mitochondria" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] diff --git a/conf/modules/call_snv.config b/conf/modules/call_snv.config index 53cc78b4..17926228 100644 --- a/conf/modules/call_snv.config +++ b/conf/modules/call_snv.config @@ -19,4 +19,14 @@ process { ] } + withName: '.*CALL_SNV:GATK4_SELECTVARIANTS' { + ext.args = "--exclude-intervals ${params.mito_name}" + ext.prefix = { "${meta.id}_nomito" } + ext.when = { !params.skip_snv_annotation } + publishDir = [ + path: { "${params.outdir}/call_snv/genome" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } } diff --git a/conf/modules/call_snv_deepvariant.config b/conf/modules/call_snv_deepvariant.config index 0493534e..329bc15f 100644 --- a/conf/modules/call_snv_deepvariant.config +++ b/conf/modules/call_snv_deepvariant.config @@ -37,20 +37,6 @@ process { withName: '.*CALL_SNV_DEEPVARIANT:REMOVE_DUPLICATES_GL' { ext.args = '--output-type z --rm-dup none' ext.prefix = { "${meta.id}_split_rmdup" } - publishDir = [ - enabled: params.skip_mt_analysis, - path: { "${params.outdir}/call_snv" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] } - withName: '.*CALL_SNV_DEEPVARIANT:TABIX_GL' { - publishDir = [ - enabled: params.skip_mt_analysis, - path: { "${params.outdir}/call_snv" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - } } diff --git a/conf/modules/call_snv_sentieon.config b/conf/modules/call_snv_sentieon.config index ec4d4480..b4443546 100644 --- a/conf/modules/call_snv_sentieon.config +++ b/conf/modules/call_snv_sentieon.config @@ -51,20 +51,6 @@ process { withName: '.*CALL_SNV:CALL_SNV_SENTIEON:REMOVE_DUPLICATES_SEN' { ext.args = '--output-type z --rm-dup none' ext.prefix = { "${meta.id}_split_rmdup" } - publishDir = [ - enabled: params.skip_mt_analysis, - path: { "${params.outdir}/call_snv" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] } - withName: '.*CALL_SNV_SENTIEON:TABIX_SEN' { - publishDir = [ - enabled: params.skip_mt_analysis, - path: { "${params.outdir}/call_snv" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - } } diff --git a/conf/modules/call_structural_variants.config b/conf/modules/call_structural_variants.config index a8c10175..3a59868d 100644 --- a/conf/modules/call_structural_variants.config +++ b/conf/modules/call_structural_variants.config @@ -26,7 +26,7 @@ process { withName: '.*CALL_STRUCTURAL_VARIANTS:SVDB_MERGE' { ext.args = '--pass_only --same_order' publishDir = [ - path: { "${params.outdir}/call_sv" }, + path: { "${params.outdir}/call_sv/genome" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] @@ -34,7 +34,7 @@ process { withName: '.*CALL_STRUCTURAL_VARIANTS:TABIX_TABIX' { publishDir = [ - path: { "${params.outdir}/call_sv" }, + path: { "${params.outdir}/call_sv/genome" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] diff --git a/conf/modules/call_sv_MT.config b/conf/modules/call_sv_MT.config index 288ca425..44b0e581 100644 --- a/conf/modules/call_sv_MT.config +++ b/conf/modules/call_sv_MT.config @@ -20,7 +20,7 @@ process { withName: '.*CALL_SV_MT:MT_DELETION' { ext.args = '-s --insert-size 16000' publishDir = [ - path: { "${params.outdir}/mt_sv" }, + path: { "${params.outdir}/call_sv/mitochondria" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] @@ -28,7 +28,7 @@ process { withName: '.*CALL_SV_MT:EKLIPSE' { publishDir = [ - path: { "${params.outdir}/mt_sv" }, + path: { "${params.outdir}/call_sv/mitochondria" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] diff --git a/conf/modules/convert_mt_bam_to_fastq.config b/conf/modules/convert_mt_bam_to_fastq.config index 9a683b6e..0365c452 100644 --- a/conf/modules/convert_mt_bam_to_fastq.config +++ b/conf/modules/convert_mt_bam_to_fastq.config @@ -17,7 +17,7 @@ process { - withName: '.*ANALYSE_MT:CONVERT_MT_BAM_TO_FASTQ:GATK4_PRINTREADS_MT' { + withName: '.*ALIGN:CONVERT_MT_BAM_TO_FASTQ:GATK4_PRINTREADS_MT' { beforeScript = {"mkdir ./tmp"} ext.args = [ "-L ${params.mito_name}", @@ -26,11 +26,11 @@ process { ].join(" ").trim() } - withName: '.*ANALYSE_MT:CONVERT_MT_BAM_TO_FASTQ:GATK4_REVERTSAM_MT' { + withName: '.*ALIGN:CONVERT_MT_BAM_TO_FASTQ:GATK4_REVERTSAM_MT' { ext.args = '--OUTPUT_BY_READGROUP false --VALIDATION_STRINGENCY LENIENT --ATTRIBUTE_TO_CLEAR FT --ATTRIBUTE_TO_CLEAR CO --SORT_ORDER queryname --RESTORE_ORIGINAL_QUALITIES false' } - withName: '.*ANALYSE_MT:CONVERT_MT_BAM_TO_FASTQ:GATK4_SAMTOFASTQ_MT' { + withName: '.*ALIGN:CONVERT_MT_BAM_TO_FASTQ:GATK4_SAMTOFASTQ_MT' { ext.args = '--VALIDATION_STRINGENCY LENIENT' } } diff --git a/conf/modules/merge_annotate_MT.config b/conf/modules/merge_annotate_MT.config deleted file mode 100644 index ae2601b6..00000000 --- a/conf/modules/merge_annotate_MT.config +++ /dev/null @@ -1,88 +0,0 @@ -/* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - Config file for defining DSL2 per module options and publishing paths -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - Available keys to override module options: - ext.args = Additional arguments appended to command in module. - ext.args2 = Second set of arguments appended to command in module (multi-tool modules). - ext.args3 = Third set of arguments appended to command in module (multi-tool modules). - ext.prefix = File name prefix for output files. - ext.when = Conditional clause ----------------------------------------------------------------------------------------- -*/ - -// -// Mitochondrial annotation options -// - -process { - - withName: '.*ANALYSE_MT:MERGE_ANNOTATE_MT:GATK4_MERGEVCFS_LIFT_UNLIFT_MT' { - ext.prefix = { "${meta.id}_merged" } - } - - withName: '.*ANALYSE_MT:MERGE_ANNOTATE_MT:GATK4_VARIANTFILTRATION_MT' { - ext.prefix = { "${meta.id}_filt" } - } - - withName: '.*ANALYSE_MT:MERGE_ANNOTATE_MT:SPLIT_MULTIALLELICS_MT' { - ext.args = '--output-type z --multiallelics -both' - ext.prefix = { "${meta.id}_split" } - } - - withName: '.*ANALYSE_MT:MERGE_ANNOTATE_MT:REMOVE_DUPLICATES_MT' { - ext.args = '--output-type z --rm-dup none' - ext.prefix = { "${meta.id}_split_rmdup" } - } - - withName: '.*ANALYSE_MT:MERGE_ANNOTATE_MT:BCFTOOLS_MERGE_MT' { - ext.args = '--output-type z' - ext.prefix = { "${meta.id}_merge_mt" } - } - - withName: '.*ANALYSE_MT:MERGE_ANNOTATE_MT:ENSEMBLVEP_MT' { - ext.args = [ - '--dir_plugins vep_cache/Plugins', - '--plugin LoFtool,vep_cache/LoFtool_scores.txt', - '--plugin pLI,vep_cache/pLI_values_107.txt', - '--plugin SpliceAI,snv=vep_cache/spliceai_21_scores_raw_snv_-v1.3-.vcf.gz,indel=vep_cache/spliceai_21_scores_raw_snv_-v1.3-.vcf.gz', - '--plugin MaxEntScan,vep_cache/fordownload,SWA,NCSS', - '--distance 0', - '--buffer_size 20000', - '--format vcf --fork 4 --max_sv_size 248956422', - '--appris --biotype --cache --canonical --ccds --compress_output bgzip', - '--domains --exclude_predicted --force_overwrite', - '--hgvs --humdiv --no_progress --no_stats --numbers', - '--merged --polyphen p --protein --offline --regulatory --sift p --symbol --tsl --vcf', - '--uniprot' - ].join(' ') - } - - withName: '.*ANALYSE_MT:MERGE_ANNOTATE_MT:ZIP_TABIX_HMTNOTE' { - ext.prefix = { "${meta.id}_vep_vcfanno_hmtnote_mt" } - publishDir = [ - path: { "${params.outdir}/annotate_mt" }, - mode: params.publish_dir_mode, - pattern: "*{vcf.gz,vcf.gz.tbi}", - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - } - - withName: '.*ANALYSE_MT:MERGE_ANNOTATE_MT:HMTNOTE_ANNOTATE' { - ext.prefix = { "${meta.id}_vep_vcfanno_hmtnote_mt" } - ext.args = '--offline' - publishDir = [ - enabled: false - ] - } - - withName: '.*ANALYSE_MT:MERGE_ANNOTATE_MT:HAPLOGREP2_CLASSIFY_MT' { - ext.prefix = { "${meta.id}_haplogrep" } - publishDir = [ - path: { "${params.outdir}/annotate_mt" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - } - -} diff --git a/conf/modules/postprocess_MT_calls.config b/conf/modules/postprocess_MT_calls.config index 3119012e..c97ea43d 100644 --- a/conf/modules/postprocess_MT_calls.config +++ b/conf/modules/postprocess_MT_calls.config @@ -37,7 +37,20 @@ process { withName: '.*POSTPROCESS_MT_CALLS:BCFTOOLS_MERGE_MT' { ext.args = '--output-type z' - ext.prefix = { "${meta.id}_merge_mt" } + ext.prefix = { "${meta.id}_mitochondria" } + publishDir = [ + path: { "${params.outdir}/call_snv/mitochondria" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + + withName: '.*POSTPROCESS_MT_CALLS:TABIX_TABIX_MERGE' { + publishDir = [ + path: { "${params.outdir}/call_snv/mitochondria" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] } } diff --git a/conf/modules/prepare_references.config b/conf/modules/prepare_references.config index 2532052d..101824ef 100644 --- a/conf/modules/prepare_references.config +++ b/conf/modules/prepare_references.config @@ -30,7 +30,7 @@ process { } withName: '.*PREPARE_REFERENCES:BWAMEM2_INDEX_MT_SHIFT' { - ext.when = { !params.skip_mt_analysis && !(params.analysis_type == "wes") && params.aligner == "bwamem2"} + ext.when = { !(params.analysis_type == "wes") && params.aligner == "bwamem2"} } withName: '.*PREPARE_REFERENCES:SENTIEON_BWAINDEX_GENOME' { @@ -38,7 +38,7 @@ process { } withName: '.*PREPARE_REFERENCES:SENTIEON_BWAINDEX_MT_SHIFT' { - ext.when = { !params.skip_mt_analysis && !(params.analysis_type == "wes") && params.aligner == "sentieon"} + ext.when = { !(params.analysis_type == "wes") && params.aligner == "sentieon"} } withName: '.*PREPARE_REFERENCES:BWA_INDEX_GENOME' { @@ -51,11 +51,11 @@ process { withName: '.*PREPARE_REFERENCES:SAMTOOLS_EXTRACT_MT' { ext.args = { " ${params.mito_name} -o ${meta.id}_mt.fa" } - ext.when = {!params.mt_fasta && !params.skip_mt_analysis} + ext.when = {!params.mt_fasta} } withName: '.*PREPARE_REFERENCES:SAMTOOLS_FAIDX_MT_SHIFT' { - ext.when = { !params.skip_mt_analysis && !(params.analysis_type == "wes")} + ext.when = { !(params.analysis_type == "wes")} } withName: '.*PREPARE_REFERENCES:GATK_SD' { @@ -67,7 +67,7 @@ process { } withName: '.*PREPARE_REFERENCES:GATK_SD_MT_SHIFT' { - ext.when = { !params.skip_mt_analysis && !(params.analysis_type == "wes")} + ext.when = { !(params.analysis_type == "wes")} } withName: '.*PREPARE_REFERENCES:GET_CHROM_SIZES' { diff --git a/conf/modules/rank_variants.config b/conf/modules/rank_variants.config index 64b7a53b..f5882da1 100644 --- a/conf/modules/rank_variants.config +++ b/conf/modules/rank_variants.config @@ -111,3 +111,55 @@ process { ] } } + +// +// Score and rank MT SNVs +// + +process { + withName: '.*RANK_VARIANTS_MT:.*' { + ext.when = !params.skip_snv_annotation + publishDir = [ + enabled: false + ] + } + + withName: '.*RANK_VARIANTS_MT:GENMOD_ANNOTATE' { + ext.args = { + (params.genome == 'GRCh37') ? '--annotate_regions --genome-build 37' : '--annotate_regions --genome-build 38' + } + } + + withName: '.*RANK_VARIANTS_MT:GENMOD_MODELS' { + ext.args = " --whole_gene " + } + + withName: '.*RANK_VARIANTS_MT:GENMOD_SCORE' { + ext.args = " --rank_results " + } + + withName: '.*RANK_VARIANTS_MT:GENMOD_COMPOUND' { + ext.prefix = { "${meta.id}_ranked_mt" } + } + + withName: '.*RANK_VARIANTS_MT:BCFTOOLS_SORT' { + ext.when = false + } + + withName: '.*RANK_VARIANTS_MT:TABIX_BGZIP' { + ext.prefix = { "${meta.id}_ranked_mt" } + publishDir = [ + path: { "${params.outdir}/rank_and_filter" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + + withName: '.*RANK_VARIANTS_MT:TABIX_TABIX' { + publishDir = [ + path: { "${params.outdir}/rank_and_filter" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } +} diff --git a/conf/modules/raredisease.config b/conf/modules/raredisease.config index c4bab3d1..f77e8a66 100644 --- a/conf/modules/raredisease.config +++ b/conf/modules/raredisease.config @@ -40,23 +40,6 @@ process { } } -// -// Remove mitochondrial variants -// - -process { - withName: '.*RAREDISEASE:GATK4_SELECTVARIANTS' { - ext.args = "--exclude-intervals ${params.mito_name}" - ext.prefix = { "${meta.id}_nomito" } - ext.when = { !params.skip_snv_annotation } - publishDir = [ - enabled: !params.skip_mt_analysis, - path: { "${params.outdir}/call_snv" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - } -} // // Merge mitochondrial and genomic vcfs @@ -134,6 +117,22 @@ process { } } +process { + withName: '.*FILTERVEP_MT' { + ext.prefix = { "${meta.id}_clinical_snv" } + ext.args = { "--filter \"HGNC_ID in ${feature_file}\"" } + } + + withName: '.*BGZIPTABIX_MT' { + ext.prefix = { "${meta.id}_clinical_snv" } + publishDir = [ + path: { "${params.outdir}/rank_and_filter" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } +} + // // Multiqc options // diff --git a/conf/test.config b/conf/test.config index 9a13845b..83edbe24 100644 --- a/conf/test.config +++ b/conf/test.config @@ -38,6 +38,7 @@ params { known_dbsnp = "https://raw.githubusercontent.com/nf-core/test-datasets/raredisease/reference/dbsnp_-138-.vcf.gz" ml_model = "https://s3.amazonaws.com/sentieon-release/other/SentieonDNAscopeModel1.0.model" reduced_penetrance = "https://raw.githubusercontent.com/nf-core/test-datasets/raredisease/reference/reduced_penetrance.tsv" + score_config_mt = "https://raw.githubusercontent.com/nf-core/test-datasets/raredisease/reference/rank_model_snv.ini" score_config_snv = "https://raw.githubusercontent.com/nf-core/test-datasets/raredisease/reference/rank_model_snv.ini" score_config_sv = "https://raw.githubusercontent.com/nf-core/test-datasets/raredisease/reference/rank_model_sv.ini" svdb_query_dbs = "https://raw.githubusercontent.com/nf-core/test-datasets/raredisease/reference/svdb_querydb_files.csv" diff --git a/conf/test_full.config b/conf/test_full.config index dcf02d95..587b8600 100644 --- a/conf/test_full.config +++ b/conf/test_full.config @@ -25,7 +25,7 @@ params { genome = 'GRCh38' // Skip annotation - skip_mt_analysis = true + skip_mt_annotation = true skip_snv_annotation = true skip_sv_annotation = true } diff --git a/docs/output.md b/docs/output.md index 6d07a3bd..6cfac5e6 100644 --- a/docs/output.md +++ b/docs/output.md @@ -98,7 +98,7 @@ The pipeline is built using [Nextflow](https://www.nextflow.io/) and processes d - `{outputdir}/alignment/` - `*.bam|*.cram`: Alignment file in bam/cram format. - `*.bai|*.crai`: Index of the corresponding bam/cram file. - - `*.txt`: Text file containing the dedup metrics. + - `*.metrics`: Text file containing the dedup metrics. ### Quality control and reporting @@ -222,8 +222,6 @@ Results generated by MultiQC collate pipeline QC from supported tools e.g. FastQ Output files - `call_snv/` - - `_split_rmdup.vcf.gz`: normalized vcf file containing MT variants. Only published when `--skip_mt_analysis` is set. - - `_split_rmdup.vcf.gz.tbi`: index of the normalized vcf file containing MT variants. Only published when `--skip_mt_analysis` is set. - `_nomito.selectvariants.vcf.gz`: normalized vcf file containing no MT variants. - `_nomito.selectvariants.vcf.gz.tbi`: index of the vcf file containing no MT variants. @@ -237,8 +235,6 @@ The pipeline performs variant calling using [Sentieon DNAscope](https://support. Output files - `call_snv/` - - `_split_rmdup.vcf.gz`: normalized vcf file containing MT variants. Only published when `--skip_mt_analysis` is set. - - `_split_rmdup.vcf.gz.tbi`: index of the normalized vcf file containing MT variants. Only published when `--skip_mt_analysis` is set. - `_nomito.selectvariants.vcf.gz`: normalized vcf file containing no MT variants. - `_nomito.selectvariants.vcf.gz.tbi`: index of the vcf file containing no MT variants. @@ -326,7 +322,7 @@ We recommend using vcfanno to annotate SNVs with precomputed CADD scores (files Based on VEP annotations, custom scripts used by the pipeline further annotate each record with the most severe consequence, and pli scores. -> **NB**: Output files described below include mitochondrial annotations only if --skip_mt_analysis is set to true. +> **NB**: Output files described below include mitochondrial annotations only if --skip_mt_annotation is set to true.
Output files diff --git a/main.nf b/main.nf index fcce4cc7..12cdadc0 100644 --- a/main.nf +++ b/main.nf @@ -39,6 +39,7 @@ params.ploidy_model = WorkflowMain.getGenomeAttribute(params, params.reduced_penetrance = WorkflowMain.getGenomeAttribute(params, 'reduced_penetrance') params.readcount_intervals = WorkflowMain.getGenomeAttribute(params, 'readcount_intervals') params.sequence_dictionary = WorkflowMain.getGenomeAttribute(params, 'sequence_dictionary') +params.score_config_mt = WorkflowMain.getGenomeAttribute(params, 'score_config_mt') params.score_config_snv = WorkflowMain.getGenomeAttribute(params, 'score_config_snv') params.score_config_sv = WorkflowMain.getGenomeAttribute(params, 'score_config_sv') params.svdb_query_dbs = WorkflowMain.getGenomeAttribute(params, 'svdb_query_dbs') diff --git a/nextflow.config b/nextflow.config index 7c494c77..cbcdd6a6 100644 --- a/nextflow.config +++ b/nextflow.config @@ -27,7 +27,7 @@ params { skip_cnv_calling = false skip_snv_annotation = false skip_sv_annotation = false - skip_mt_analysis = false + skip_mt_annotation = false gens_switch = false cadd_resources = null platform = 'illumina' @@ -280,7 +280,6 @@ manifest { includeConfig 'conf/modules/raredisease.config' includeConfig 'conf/modules/align.config' -includeConfig 'conf/modules/analyse_MT.config' includeConfig 'conf/modules/align_MT.config' includeConfig 'conf/modules/call_snv_MT.config' includeConfig 'conf/modules/call_sv_MT.config' @@ -290,7 +289,6 @@ includeConfig 'conf/modules/postprocess_MT_calls.config' includeConfig 'conf/modules/call_structural_variants.config' includeConfig 'conf/modules/annotate_genome_snvs.config' includeConfig 'conf/modules/annotate_structural_variants.config' -includeConfig 'conf/modules/align_and_call_MT.config' includeConfig 'conf/modules/align_bwamem2.config' includeConfig 'conf/modules/align_sentieon.config' includeConfig 'conf/modules/annotate_consequence_pli.config' @@ -301,7 +299,6 @@ includeConfig 'conf/modules/call_sv_manta.config' includeConfig 'conf/modules/call_sv_tiddit.config' includeConfig 'conf/modules/convert_mt_bam_to_fastq.config' includeConfig 'conf/modules/gens.config' -includeConfig 'conf/modules/merge_annotate_MT.config' includeConfig 'conf/modules/prepare_references.config' includeConfig 'conf/modules/qc_bam.config' includeConfig 'conf/modules/rank_variants.config' diff --git a/nextflow_schema.json b/nextflow_schema.json index f5216d76..27a3365e 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -266,6 +266,13 @@ "help_text": "The saved references can be used for future pipeline runs, reducing processing times.", "fa_icon": "fas fa-save" }, + "score_config_mt": { + "type": "string", + "exists": true, + "format": "path", + "fa_icon": "fas fa-file", + "description": "MT rank model config file for genmod." + }, "score_config_snv": { "type": "string", "exists": true, @@ -388,9 +395,9 @@ "description": "Specifies whether or not to skip CNV calling.", "fa_icon": "fas fa-book" }, - "skip_mt_analysis": { + "skip_mt_annotation": { "type": "boolean", - "description": "Specifies whether or not to skip the subworkflow that analyses mitochondrial genome separate from the nuclear genome.", + "description": "Specifies whether or not to skip annotation of mitochondrial variants.", "fa_icon": "fas fa-toggle-on" }, "skip_snv_annotation": { diff --git a/workflows/raredisease.nf b/workflows/raredisease.nf index 2a5fb036..327aced4 100644 --- a/workflows/raredisease.nf +++ b/workflows/raredisease.nf @@ -42,7 +42,7 @@ if (!params.skip_sv_annotation) { mandatoryParams += ["genome", "svdb_query_dbs", "vep_cache", "vep_cache_version", "score_config_sv"] } -if (!params.skip_mt_analysis) { +if (!params.skip_mt_annotation) { mandatoryParams += ["genome", "mito_name", "vcfanno_resources", "vcfanno_toml", "vep_cache_version", "vep_cache"] } @@ -250,6 +250,8 @@ workflow RAREDISEASE { : ( ch_references.readcount_intervals ?: Channel.empty() ) ch_reduced_penetrance = params.reduced_penetrance ? Channel.fromPath(params.reduced_penetrance).collect() : Channel.value([]) + ch_score_config_mt = params.score_config_mt ? Channel.fromPath(params.score_config_mt).collect() + : Channel.value([]) ch_score_config_snv = params.score_config_snv ? Channel.fromPath(params.score_config_snv).collect() : Channel.value([]) ch_score_config_sv = params.score_config_sv ? Channel.fromPath(params.score_config_sv).collect() @@ -507,7 +509,7 @@ workflow RAREDISEASE { ANN_CSQ_PLI_MT.out.vcf_ann, ch_pedfile, ch_reduced_penetrance, - ch_score_config_snv + ch_score_config_mt ) ch_versions = ch_versions.mix(RANK_VARIANTS_MT.out.versions) From 0c8c8709ad59678b6166b78944fb2a540843c5f4 Mon Sep 17 00:00:00 2001 From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com> Date: Sat, 26 Aug 2023 11:59:03 +0200 Subject: [PATCH 10/22] update raredisease config --- conf/modules/raredisease.config | 17 ++++------------- 1 file changed, 4 insertions(+), 13 deletions(-) diff --git a/conf/modules/raredisease.config b/conf/modules/raredisease.config index e2300b5c..4531294c 100644 --- a/conf/modules/raredisease.config +++ b/conf/modules/raredisease.config @@ -51,17 +51,6 @@ process { } } -// -// SENTIEON_TNSCOPE_MT_CALL -// - -process { - withName: '.*SENTIEON_TNSCOPE' { - ext.args = { (params.genome == "GRCh37") ? " --interval MT " : "--interval chrM" } - ext.args2 = " --min_init_normal_lod 0,5 --min_normal_lod 2,0 --min_init_tumor_lod 1,0 --min_tumor_lod 2,8 --trim_soft_clip " - ext.when = params.variant_caller.equals("sentieon") - } -} // // Smncopynumbercaller options @@ -141,12 +130,14 @@ process { process { withName: '.*FILTERVEP_MT' { - ext.prefix = { "${meta.id}_clinical_snv" } + ext.when = !params.skip_vep_filter + ext.prefix = { "${meta.id}_clinical_mt" } ext.args = { "--filter \"HGNC_ID in ${feature_file}\"" } } withName: '.*BGZIPTABIX_MT' { - ext.prefix = { "${meta.id}_clinical_snv" } + ext.when = !params.skip_vep_filter + ext.prefix = { "${meta.id}_clinical_mt" } publishDir = [ path: { "${params.outdir}/rank_and_filter" }, mode: params.publish_dir_mode, From 37be3a76a87f11a960ceede7f8ba98ae13fd4879 Mon Sep 17 00:00:00 2001 From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com> Date: Sat, 26 Aug 2023 12:11:54 +0200 Subject: [PATCH 11/22] remove skip_mt_analysis --- nextflow.config | 1 - 1 file changed, 1 deletion(-) diff --git a/nextflow.config b/nextflow.config index b428710e..294fdbe8 100644 --- a/nextflow.config +++ b/nextflow.config @@ -29,7 +29,6 @@ params { skip_sv_annotation = false skip_mt_annotation = false skip_vep_filter = false - skip_mt_analysis = false gens_switch = false cadd_resources = null platform = 'illumina' From 637692ef5ff88025da858a63d218bbb53972eeae Mon Sep 17 00:00:00 2001 From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com> Date: Sat, 26 Aug 2023 17:02:49 +0200 Subject: [PATCH 12/22] update output --- docs/output.md | 44 +++++++----- nextflow.config | 30 ++++---- subworkflows/local/analyse_MT.nf | 118 ------------------------------- workflows/raredisease.nf | 1 - 4 files changed, 42 insertions(+), 151 deletions(-) delete mode 100644 subworkflows/local/analyse_MT.nf diff --git a/docs/output.md b/docs/output.md index 8cc6e4ab..2a7949dc 100644 --- a/docs/output.md +++ b/docs/output.md @@ -221,7 +221,7 @@ Results generated by MultiQC collate pipeline QC from supported tools e.g. FastQ
Output files -- `call_snv/` +- `call_snv/genome` - `_nomito.selectvariants.vcf.gz`: normalized vcf file containing no MT variants. - `_nomito.selectvariants.vcf.gz.tbi`: index of the vcf file containing no MT variants. @@ -234,7 +234,7 @@ The pipeline performs variant calling using [Sentieon DNAscope](https://support.
Output files -- `call_snv/` +- `call_snv/genome` - `_nomito.selectvariants.vcf.gz`: normalized vcf file containing no MT variants. - `_nomito.selectvariants.vcf.gz.tbi`: index of the vcf file containing no MT variants. @@ -261,7 +261,7 @@ The pipeline performs variant calling using [Sentieon DNAscope](https://support.
Output files -- `call_sv/` +- `call_sv/genome` - `_sv_merge.vcf.gz`: file containing the merged variant calls. - `_sv_merge.vcf.gz.tbi`: index of the file containing the merged variant calls. @@ -322,12 +322,12 @@ We recommend using vcfanno to annotate SNVs with precomputed CADD scores (files Based on VEP annotations, custom scripts used by the pipeline further annotate each record with the most severe consequence, and pli scores. -> **NB**: Output files described below include mitochondrial annotations only if --skip_mt_annotation is set to true. +> **NB**: Output files described below do not include mitochondrial annotations only if --skip_mt_annotation is set to true.
Output files -- `annotate_snv/` +- `annotate_snv/genome` - `_rohann_vcfanno_filter_vep.vcf.gz`: file containing bcftools roh, vcfanno, and vep annotations. - `_rohann_vcfanno_filter_vep.vcf.gz.tbi`: index of the file containing bcftools roh, vcfanno, and vep annotations. - `_vep_csq_pli.vcf.gz`: file containing bcftools roh, vcfanno, vep, consequence and pli annotations. @@ -346,9 +346,9 @@ Based on VEP annotations, custom scripts used by the pipeline further annotate e
Output files -- `annotate_snv/*sites_chromograph` +- `annotate_snv/genome/*sites_chromograph` - `_rohann_vcfanno_upd_sites_.png`: file containing a plot showing upd sites across chromosomes. -- `annotate_snv/*regions_chromograph` +- `annotate_snv/genome/*regions_chromograph` - `_rohann_vcfanno_upd_regions_.png`: file containing a plot showing upd regions across chromosomes.
@@ -376,7 +376,7 @@ Based on VEP annotations, custom scripts used by the pipeline further annotate e ### Mitochondrial analysis -Mitochondrial analysis is run by default, to turn it off set `--skip_mt_analysis` to true. +Mitochondrial analysis is run by default. If you want to turn off annotations set `--skip_mt_annotation` to true. #### Alignment and variant calling @@ -388,6 +388,10 @@ The pipeline for mitochondrial variant discovery, using Mutect2, uses a high sen [MT deletion script](https://github.com/dnil/mitosign/blob/master/run_mt_del_check.sh) lists the fraction of mitochondrially aligning read pairs (per 1000) that appear discordant, as defined by an insert size of more than 1.2 kb (and less than 15 kb due to the circular nature of the genome) using samtools. +- `call_sv/mitochondria` + - `_svdbquery_vep.vcf.gz`: file containing svdb query, and vep annotations. + - `_svdbquery_vep.vcf.gz.tbi`: index of the file containing bcftools roh, vcfanno, and vep annotations. + #### Annotation: ##### HaploGrep2 @@ -397,7 +401,7 @@ The pipeline for mitochondrial variant discovery, using Mutect2, uses a high sen
Output files -- `annotate_mt/` +- `annotate_snv/mitochondria` - `_haplogrep.txt`: file containing haplogroup information.
@@ -423,9 +427,9 @@ We recommend using vcfanno to annotate SNVs with precomputed CADD scores (files
Output files -- `annotate_mt/` - - `_vep_vcfanno_mt.vcf.gz`: file containing mitochondrial annotations. - - `_vep_vcfanno_mt.vcf.gz.tbi`: index of the file containing mitochondrial annotations. +- `annotate_snv/mitochondria` + - `_vep_vcfanno_hmtnote_mt.vcf.gz`: file containing mitochondrial annotations. + - `_vep_vcfanno_hmtnote_mt.vcf.gz.tbi`: index of the file containing mitochondrial annotations.
@@ -439,12 +443,18 @@ We recommend using vcfanno to annotate SNVs with precomputed CADD scores (files Output files - `rank_and_filter/` - - `_clinical_snv.ann_filter.vcf.gz`: file containing clinically relevant SNVs. - - `_clinical_sv.ann_filter.vcf.gz`: file containing clinically relevant SVs. + - `_clinical_mt.vcf.gz`: file containing clinically relevant mitochondrial SNVs. + - `_clinical_mt.vcf.gz.tbi`: index of the file containing clinically relevant mitochondrial SNVs. + - `_clinical_snv.vcf.gz`: file containing clinically relevant SNVs. + - `_clinical_snv.vcf.gz.tbi`: index of the file containing clinically relevant SNVs. + - `_clinical_sv.vcf.gz`: file containing clinically relevant SVs. + - `_clinical_sv.vcf.gz.tbi`: index of the file containing clinically relevant SVs. + - `_ranked_mt.vcf.gz`: file containing mitochondrial SNV annotations with their rank scores. + - `_ranked_mt.vcf.gz.tbi`: index of the file containing mitochondrial SNV annotations with their rank scores. - `_ranked_snv.vcf.gz`: file containing SNV annotations with their rank scores. - - `_ranked_snv.vcf.gz.tbi`: file containing SNV annotations with their rank scores. - - `_ranked_sv.ann_filter.vcf.gz`: file containing SV annotations with their rank scores. - - `_ranked_sv.ann_filter.vcf.gz.tbi`: file containing SV annotations with their rank scores. + - `_ranked_snv.vcf.gz.tbi`: index of the file containing SNV annotations with their rank scores. + - `_ranked_sv.vcf.gz`: file containing SV annotations with their rank scores. + - `_ranked_sv.vcf.gz.tbi`: index of the file containing SV annotations with their rank scores.
diff --git a/nextflow.config b/nextflow.config index 294fdbe8..e4554ce7 100644 --- a/nextflow.config +++ b/nextflow.config @@ -281,32 +281,32 @@ manifest { includeConfig 'conf/modules/raredisease.config' includeConfig 'conf/modules/align.config' -includeConfig 'conf/modules/align_MT.config' -includeConfig 'conf/modules/call_snv_MT.config' -includeConfig 'conf/modules/call_sv_MT.config' -includeConfig 'conf/modules/annotate_mt_snvs.config' -includeConfig 'conf/modules/call_snv.config' -includeConfig 'conf/modules/postprocess_MT_calls.config' -includeConfig 'conf/modules/call_structural_variants.config' +includeConfig 'conf/modules/annotate_consequence_pli.config' includeConfig 'conf/modules/annotate_genome_snvs.config' +includeConfig 'conf/modules/annotate_mt_snvs.config' includeConfig 'conf/modules/annotate_structural_variants.config' -includeConfig 'conf/modules/align_bwamem2.config' -includeConfig 'conf/modules/align_sentieon.config' -includeConfig 'conf/modules/annotate_consequence_pli.config' includeConfig 'conf/modules/call_repeat_expansions.config' -includeConfig 'conf/modules/call_snv_deepvariant.config' -includeConfig 'conf/modules/call_snv_sentieon.config' -includeConfig 'conf/modules/call_sv_manta.config' -includeConfig 'conf/modules/call_sv_tiddit.config' +includeConfig 'conf/modules/call_snv.config' +includeConfig 'conf/modules/call_structural_variants.config' includeConfig 'conf/modules/convert_mt_bam_to_fastq.config' includeConfig 'conf/modules/gens.config' +includeConfig 'conf/modules/peddy_check.config' includeConfig 'conf/modules/prepare_references.config' includeConfig 'conf/modules/qc_bam.config' includeConfig 'conf/modules/rank_variants.config' includeConfig 'conf/modules/scatter_genome.config' +includeConfig 'conf/modules/align_MT.config' +includeConfig 'conf/modules/align_bwamem2.config' +includeConfig 'conf/modules/align_sentieon.config' includeConfig 'conf/modules/annotate_cadd.config' -includeConfig 'conf/modules/peddy_check.config' +includeConfig 'conf/modules/call_snv_MT.config' +includeConfig 'conf/modules/call_snv_deepvariant.config' +includeConfig 'conf/modules/call_snv_sentieon.config' +includeConfig 'conf/modules/call_sv_MT.config' includeConfig 'conf/modules/call_sv_germlinecnvcaller.config' +includeConfig 'conf/modules/call_sv_manta.config' +includeConfig 'conf/modules/call_sv_tiddit.config' +includeConfig 'conf/modules/postprocess_MT_calls.config' // Function to ensure that resource requirements don't go beyond // a maximum limit diff --git a/subworkflows/local/analyse_MT.nf b/subworkflows/local/analyse_MT.nf deleted file mode 100644 index af5cebff..00000000 --- a/subworkflows/local/analyse_MT.nf +++ /dev/null @@ -1,118 +0,0 @@ -// -// Analyse MT -// -include { CONVERT_MT_BAM_TO_FASTQ } from './mitochondria/convert_mt_bam_to_fastq' -include { ALIGN_AND_CALL_MT } from './mitochondria/align_and_call_MT' -include { ALIGN_AND_CALL_MT as ALIGN_AND_CALL_MT_SHIFT } from './mitochondria/align_and_call_MT' -include { PICARD_LIFTOVERVCF } from '../../modules/nf-core/picard/liftovervcf/main' -include { MERGE_ANNOTATE_MT } from './mitochondria/merge_annotate_MT' - -workflow ANALYSE_MT { - take: - ch_bam_bai // channel: [mandatory] [ val(meta), file(bam), file(bai) ] - ch_cadd_header // channel: [mandatory] [ path(txt) ] - ch_cadd_resources // channel: [mandatory] [ path(annotation) ] - ch_genome_bwa_index // channel: [mandatory] [ val(meta), path(index) ] - ch_genome_bwamem2_index // channel: [mandatory] [ val(meta), path(index) ] - ch_genome_fasta // channel: [mandatory] [ val(meta), path(fasta) ] - ch_genome_fai // channel: [mandatory] [ val(meta), path(fai) ] - ch_genome_dict // channel: [mandatory] [ val(meta), path(dict) ] - ch_mt_intervals // channel: [mandatory] [ path(interval_list) ] - ch_mtshift_bwaindex // channel: [mandatory] [ val(meta), path(index) ] - ch_mtshift_bwamem2index // channel: [mandatory] [ val(meta), path(index) ] - ch_mtshift_fasta // channel: [mandatory] [ val(meta), path(fasta) ] - ch_mtshift_dict // channel: [mandatory] [ val(meta), path(dict) ] - ch_mtshift_fai // channel: [mandatory] [ val(meta), path(fai) ] - ch_mtshift_intervals // channel: [mandatory] [ path(interval_list) ] - ch_mtshift_backchain // channel: [mandatory] [ val(meta), path(back_chain) ] - ch_vcfanno_resources // channel: [mandatory] [ path(resources) ] - ch_vcfanno_toml // channel: [mandatory] [ path(toml) ] - val_vep_genome // string: [mandatory] GRCh37 or GRCh38 - val_vep_cache_version // string: [mandatory] 107 - ch_vep_cache // channel: [mandatory] [ path(cache) ] - ch_case_info // channel: [mandatory] [ val(case_info) ] - - main: - ch_versions = Channel.empty() - - // PREPARING READS FOR MT ALIGNMENT - CONVERT_MT_BAM_TO_FASTQ ( - ch_bam_bai, - ch_genome_fasta, - ch_genome_fai, - ch_genome_dict - ) - - // MT ALIGNMENT AND VARIANT CALLING - ALIGN_AND_CALL_MT ( - CONVERT_MT_BAM_TO_FASTQ.out.fastq, - CONVERT_MT_BAM_TO_FASTQ.out.bam, - ch_genome_bwa_index, - ch_genome_bwamem2_index, - ch_genome_fasta, - ch_genome_dict, - ch_genome_fai, - ch_mt_intervals - ) - - ALIGN_AND_CALL_MT_SHIFT ( - CONVERT_MT_BAM_TO_FASTQ.out.fastq, - CONVERT_MT_BAM_TO_FASTQ.out.bam, - ch_mtshift_bwaindex, - ch_mtshift_bwamem2index, - ch_mtshift_fasta, - ch_mtshift_dict, - ch_mtshift_fai, - ch_mtshift_intervals - ) - - // LIFTOVER VCF FROM REFERENCE MT TO SHIFTED MT - PICARD_LIFTOVERVCF ( - ALIGN_AND_CALL_MT_SHIFT.out.vcf, - ch_genome_dict, - ch_genome_fasta, - ch_mtshift_backchain, - ) - - // MT MERGE AND ANNOTATE VARIANTS - MERGE_ANNOTATE_MT( - ALIGN_AND_CALL_MT.out.vcf, - PICARD_LIFTOVERVCF.out.vcf_lifted, - ch_cadd_header, - ch_cadd_resources, - ch_genome_fasta, - ch_genome_dict, - ch_genome_fai, - ch_vcfanno_resources, - ch_vcfanno_toml, - val_vep_genome, - val_vep_cache_version, - ch_vep_cache, - ch_case_info - ) - - ch_versions = ch_versions.mix(CONVERT_MT_BAM_TO_FASTQ.out.versions) - ch_versions = ch_versions.mix(ALIGN_AND_CALL_MT.out.versions) - ch_versions = ch_versions.mix(ALIGN_AND_CALL_MT_SHIFT.out.versions) - ch_versions = ch_versions.mix(PICARD_LIFTOVERVCF.out.versions.first()) - ch_versions = ch_versions.mix(MERGE_ANNOTATE_MT.out.versions) - - emit: - vcf = MERGE_ANNOTATE_MT.out.vcf // channel: [ val(meta), path(vcf) ] - tbi = MERGE_ANNOTATE_MT.out.tbi // channel: [ val(meta), path(tbi) ] - stats = ALIGN_AND_CALL_MT.out.stats // channel: [ val(meta), path(stats) ] - filt_stats = ALIGN_AND_CALL_MT.out.filt_stats // channel: [ val(meta), path(tsv) ] - mt_del_result = ALIGN_AND_CALL_MT.out.mt_del_result // channel: [ val(meta), path(txt) ] - stats_sh = ALIGN_AND_CALL_MT_SHIFT.out.stats // channel: [ val(meta), path(stats) ] - filt_stats_sh = ALIGN_AND_CALL_MT_SHIFT.out.filt_stats // channel: [ val(meta), path(tsv) ] - eklipse_del = ALIGN_AND_CALL_MT.out.eklipse_del // channel: [ val(meta), path(csv) ] - eklipse_genes = ALIGN_AND_CALL_MT.out.eklipse_genes // channel: [ val(meta), path(csv) ] - eklipse_circos = ALIGN_AND_CALL_MT.out.eklipse_circos // channel: [ val(meta), path(png) ] - haplog = MERGE_ANNOTATE_MT.out.haplog // channel: [ val(meta), path(txt) ] - report = MERGE_ANNOTATE_MT.out.report // channel: [ path(html) ] - txt = ALIGN_AND_CALL_MT.out.txt // channel: [ val(meta), path(txt) ] - html = ALIGN_AND_CALL_MT.out.html // channel: [ val(meta), path(html) ] - txt_sh = ALIGN_AND_CALL_MT_SHIFT.out.txt // channel: [ val(meta), path(txt) ] - html_sh = ALIGN_AND_CALL_MT_SHIFT.out.html // channel: [ val(meta), path(html) ] - versions = ch_versions // channel: [ path(versions.yml) ] -} diff --git a/workflows/raredisease.nf b/workflows/raredisease.nf index cdd26dba..b1b6afb7 100644 --- a/workflows/raredisease.nf +++ b/workflows/raredisease.nf @@ -112,7 +112,6 @@ include { TABIX_BGZIPTABIX as BGZIPTABIX_SV } from '../modules/nf-core/tabix // include { ALIGN } from '../subworkflows/local/align' -include { ANALYSE_MT } from '../subworkflows/local/analyse_MT' include { ANNOTATE_CSQ_PLI as ANN_CSQ_PLI_MT } from '../subworkflows/local/annotate_consequence_pli' include { ANNOTATE_CSQ_PLI as ANN_CSQ_PLI_SNV } from '../subworkflows/local/annotate_consequence_pli' include { ANNOTATE_CSQ_PLI as ANN_CSQ_PLI_SV } from '../subworkflows/local/annotate_consequence_pli' From d71250c0352a29c8a32fd14bc1420fedc417341a Mon Sep 17 00:00:00 2001 From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com> Date: Sun, 27 Aug 2023 01:02:57 +0200 Subject: [PATCH 13/22] warn --- conf/modules/raredisease.config | 26 -------------------------- workflows/raredisease.nf | 1 - 2 files changed, 27 deletions(-) diff --git a/conf/modules/raredisease.config b/conf/modules/raredisease.config index 4531294c..5fef3537 100644 --- a/conf/modules/raredisease.config +++ b/conf/modules/raredisease.config @@ -41,16 +41,6 @@ process { } -// -// Merge mitochondrial and genomic vcfs -// - -process { - withName: '.*RAREDISEASE:GATK4_MERGEVCFS' { - ext.prefix = { "${meta.id}_mito_genome_merged" } - } -} - // // Smncopynumbercaller options @@ -75,14 +65,6 @@ process { // process { - withName: '.*RANK_VARIANTS_SNV' { - publishDir = [ - enabled: params.skip_vep_filter, - path: { "${params.outdir}/rank_and_filter" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - } withName: '.*FILTERVEP_SNV' { ext.when = !params.skip_vep_filter @@ -102,14 +84,6 @@ process { } process { - withName: '.*RANK_VARIANTS_SV' { - publishDir = [ - enabled: params.skip_vep_filter, - path: { "${params.outdir}/rank_and_filter" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - } withName: '.*FILTERVEP_SV' { ext.when = !params.skip_vep_filter diff --git a/workflows/raredisease.nf b/workflows/raredisease.nf index b1b6afb7..fce42b35 100644 --- a/workflows/raredisease.nf +++ b/workflows/raredisease.nf @@ -95,7 +95,6 @@ ch_multiqc_custom_methods_description = params.multiqc_methods_description ? fil // MODULE: Installed directly from nf-core/modules // -include { GATK4_MERGEVCFS } from '../modules/nf-core/gatk4/mergevcfs/main' include { CUSTOM_DUMPSOFTWAREVERSIONS } from '../modules/nf-core/custom/dumpsoftwareversions/main' include { FASTQC } from '../modules/nf-core/fastqc/main' include { MULTIQC } from '../modules/nf-core/multiqc/main' From 12bb6ae725e46b0583db295b595c69fe12c00793 Mon Sep 17 00:00:00 2001 From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com> Date: Sun, 27 Aug 2023 01:21:28 +0200 Subject: [PATCH 14/22] update output --- conf/modules/annotate_consequence_pli.config | 4 ++-- docs/output.md | 25 +++++++++++++++----- 2 files changed, 21 insertions(+), 8 deletions(-) diff --git a/conf/modules/annotate_consequence_pli.config b/conf/modules/annotate_consequence_pli.config index eed623a6..68b11951 100644 --- a/conf/modules/annotate_consequence_pli.config +++ b/conf/modules/annotate_consequence_pli.config @@ -58,7 +58,7 @@ process { withName: '.*ANN_CSQ_PLI_SNV:TABIX_BGZIPTABIX' { ext.prefix = { "${meta.id}_vep_csq_pli" } publishDir = [ - path: { "${params.outdir}/annotate_snv" }, + path: { "${params.outdir}/annotate_snv/genome" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] @@ -84,7 +84,7 @@ process { withName: '.*ANN_CSQ_PLI_MT:TABIX_BGZIPTABIX' { ext.prefix = { "${meta.id}_mt_vep_csq_pli" } publishDir = [ - path: { "${params.outdir}/annotate_snv" }, + path: { "${params.outdir}/annotate_snv/genome" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] diff --git a/docs/output.md b/docs/output.md index 2a7949dc..c977f0db 100644 --- a/docs/output.md +++ b/docs/output.md @@ -51,6 +51,7 @@ The pipeline is built using [Nextflow](https://www.nextflow.io/) and processes d - [Mitochondrial analysis](#mitochondrial-analysis) - [Alignment and variant calling](#alignment-and-variant-calling) - [MT deletion script](#mt-deletion-script) + - [eKLIPse](#eklipse) - [Annotation:](#annotation-) - [HaploGrep2](#haplogrep2) - [vcfanno](#vcfanno-1) @@ -384,13 +385,25 @@ Mitochondrial analysis is run by default. If you want to turn off annotations se The pipeline for mitochondrial variant discovery, using Mutect2, uses a high sensitivity to low AF and separate alignments using opposite genome breakpoints to allow for the tracing of lineages of rare mitochondrial variants. +- `call_snv/mitochondria` + - `_mitochondria.vcf.gz`: normalized vcf file containing MT variants. + - `_mitochondria.vcf.gz.tbi`: index of the vcf file containing MT variants. + ##### MT deletion script [MT deletion script](https://github.com/dnil/mitosign/blob/master/run_mt_del_check.sh) lists the fraction of mitochondrially aligning read pairs (per 1000) that appear discordant, as defined by an insert size of more than 1.2 kb (and less than 15 kb due to the circular nature of the genome) using samtools. - `call_sv/mitochondria` - - `_svdbquery_vep.vcf.gz`: file containing svdb query, and vep annotations. - - `_svdbquery_vep.vcf.gz.tbi`: index of the file containing bcftools roh, vcfanno, and vep annotations. + - `.txt`: file containing deletions. + +##### eKLIPse + +[eKLIPse](https://github.com/dooguypapua/eKLIPse) allows the detection and quantification of large mtDNA rearrangements. + +- `call_sv/mitochondria` + - `eKLIPse_deletions.csv`: file containing all predicted deletions. + - `eKLIPse_genes.csv`: file summarizing cumulated deletions per mtDNA gene. + - `eKLIPse_.png`: circos plot. #### Annotation: @@ -445,15 +458,15 @@ We recommend using vcfanno to annotate SNVs with precomputed CADD scores (files - `rank_and_filter/` - `_clinical_mt.vcf.gz`: file containing clinically relevant mitochondrial SNVs. - `_clinical_mt.vcf.gz.tbi`: index of the file containing clinically relevant mitochondrial SNVs. - - `_clinical_snv.vcf.gz`: file containing clinically relevant SNVs. + - `_clinical_snv.vcf.gz`: file containing clinically relevant SNVs (does not include mitochondrial variants). - `_clinical_snv.vcf.gz.tbi`: index of the file containing clinically relevant SNVs. - - `_clinical_sv.vcf.gz`: file containing clinically relevant SVs. + - `_clinical_sv.vcf.gz`: file containing clinically relevant SVs (includes mitochondrial variants). - `_clinical_sv.vcf.gz.tbi`: index of the file containing clinically relevant SVs. - `_ranked_mt.vcf.gz`: file containing mitochondrial SNV annotations with their rank scores. - `_ranked_mt.vcf.gz.tbi`: index of the file containing mitochondrial SNV annotations with their rank scores. - - `_ranked_snv.vcf.gz`: file containing SNV annotations with their rank scores. + - `_ranked_snv.vcf.gz`: file containing SNV annotations with their rank scores (does not include mitochondrial variants). - `_ranked_snv.vcf.gz.tbi`: index of the file containing SNV annotations with their rank scores. - - `_ranked_sv.vcf.gz`: file containing SV annotations with their rank scores. + - `_ranked_sv.vcf.gz`: file containing SV annotations with their rank scores (includes mitochondrial variants). - `_ranked_sv.vcf.gz.tbi`: index of the file containing SV annotations with their rank scores.
From d24123734ebe929e5347a178dbbc9e5980a16fba Mon Sep 17 00:00:00 2001 From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com> Date: Sun, 27 Aug 2023 02:00:01 +0200 Subject: [PATCH 15/22] update usage --- docs/usage.md | 42 ++++++++++++++++++++++-------------------- 1 file changed, 22 insertions(+), 20 deletions(-) diff --git a/docs/usage.md b/docs/usage.md index 24da77cd..7d1e8f12 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -20,7 +20,7 @@ Table of contents: - [6. Copy number variant calling](#6-copy-number-variant-calling) - [7. SNV annotation & Ranking](#7-snv-annotation--ranking) - [8. SV annotation & Ranking](#8-sv-annotation--ranking) - - [9. Mitochondrial analysis](#9-mitochondrial-analysis) + - [9. Mitochondrial annotation](#9-mitochondrial-annotation) - [Run the pipeline](#run-the-pipeline) - [Direct input in CLI](#direct-input-in-cli) - [Import from a config file (recommended)](#import-from-a-config-file-recommended) @@ -133,7 +133,7 @@ nf-core/raredisease consists of several tools used for various purposes. For con 5. Variant calling - Structural variants (SV) (Tiddit & Manta) 6. SNV annotation & ranking (rohcall, vcfanno, ensembl VEP, GENMOD) 7. SV annotation & ranking (SVDB query, ensembl VEP, GENMOD) -8. Mitochondrial analysis +8. Mitochondrial annotation > We have only listed the groups that require at least one input from the user. For example, the pipeline also runs SMNCopyNumberCaller, but it does not require any input other than the bam files passed by the pipeline. Hence, it is not mentioned in the list above. To know more about the tools used in the pipeline check the [README](../README.md). @@ -143,16 +143,17 @@ The mandatory and optional parameters for each category are tabulated below. ##### 1. Alignment -| Mandatory | Optional | -| ------------------- | --------------------------- | -| aligner1 | fasta_fai2 | -| fasta | bwamem22 | -| platform | known_dbsnp3 | -| | known_dbsnp_tbi3 | +| Mandatory | Optional | +| ------------------------------ | --------------------------- | +| aligner1 | fasta_fai3 | +| fasta | bwamem23 | +| platform | known_dbsnp4 | +| mito_name/mt_fasta2 | known_dbsnp_tbi4 | 1Default value is bwamem2, but if you have a valid license for Sentieon, you have the option to use Sentieon as well.
-2fasta_fai and bwamem2, if not provided by the user, will be generated by the pipeline when necessary.
-3Used only by Sentieon.
+2f If mito_name is provided, mt_fasta can be generated by the pipeline.
+3fasta_fai and bwamem2, if not provided by the user, will be generated by the pipeline when necessary.
+4Used only by Sentieon.
##### 2. QC stats from the alignment files @@ -241,16 +242,17 @@ no header and the following columns: `CHROM POS REF_ALLELE ALT_ALLELE AF`. Sampl 1 A CSV file that describes the databases (VCFs) used by SVDB for annotating structural variants. Sample file [here](https://github.com/nf-core/test-datasets/blob/raredisease/reference/svdb_querydb_files.csv). Information about the column headers can be found [here](https://github.com/J35P312/SVDB#Query). -##### 9. Mitochondrial analysis - -| Mandatory | Optional | -| ----------------- | -------- | -| genome | | -| mito_name | | -| vcfanno_resources | | -| vcfanno_toml | | -| vep_cache_version | | -| vep_cache | | +##### 9. Mitochondrial annotation + +| Mandatory | Optional | +| ----------------- | ----------- | +| genome | vep_filters | +| mito_name | | +| vcfanno_resources | | +| vcfanno_toml | | +| vep_cache_version | | +| vep_cache | | +| score_config_mt | | #### Run the pipeline From c1640fc151dbde9bbd7f1785899c9c3e1fa816dc Mon Sep 17 00:00:00 2001 From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com> Date: Tue, 5 Sep 2023 09:01:47 +0200 Subject: [PATCH 16/22] update headers --- subworkflows/local/align.nf | 4 +++- subworkflows/local/alignment/align_MT.nf | 1 + subworkflows/local/annotate_mt_snvs.nf | 15 ++++----------- subworkflows/local/call_structural_variants.nf | 1 + subworkflows/local/variant_calling/call_snv_MT.nf | 2 -- 5 files changed, 9 insertions(+), 14 deletions(-) diff --git a/subworkflows/local/align.nf b/subworkflows/local/align.nf index a705f16e..e7314f2a 100644 --- a/subworkflows/local/align.nf +++ b/subworkflows/local/align.nf @@ -88,7 +88,9 @@ workflow ALIGN { ch_versions = Channel.empty().mix(ALIGN_BWAMEM2.out.versions, ALIGN_SENTIEON.out.versions, ALIGN_MT.out.versions, - ALIGN_MT_SHIFT.out.versions) + ALIGN_MT_SHIFT.out.versions, + CONVERT_MT_BAM_TO_FASTQ.out.versions, + SAMTOOLS_VIEW.out.versions) emit: genome_marked_bam = ch_genome_marked_bam // channel: [ val(meta), path(bam) ] diff --git a/subworkflows/local/alignment/align_MT.nf b/subworkflows/local/alignment/align_MT.nf index 17ecc362..dec73e67 100644 --- a/subworkflows/local/alignment/align_MT.nf +++ b/subworkflows/local/alignment/align_MT.nf @@ -43,6 +43,7 @@ workflow ALIGN_MT { SAMTOOLS_INDEX_MT(SAMTOOLS_SORT_MT.out.bam) ch_versions = ch_versions.mix(BWAMEM2_MEM_MT.out.versions.first()) + ch_versions = ch_versions.mix(SENTIEON_BWAMEM_MT.out.versions.first()) ch_versions = ch_versions.mix(GATK4_MERGEBAMALIGNMENT_MT.out.versions.first()) ch_versions = ch_versions.mix(PICARD_ADDORREPLACEREADGROUPS_MT.out.versions.first()) ch_versions = ch_versions.mix(PICARD_MARKDUPLICATES_MT.out.versions.first()) diff --git a/subworkflows/local/annotate_mt_snvs.nf b/subworkflows/local/annotate_mt_snvs.nf index d6ce5659..5a6b65b5 100644 --- a/subworkflows/local/annotate_mt_snvs.nf +++ b/subworkflows/local/annotate_mt_snvs.nf @@ -1,16 +1,8 @@ // -// Merge and annotate MT +// Annotate MT // -include { GATK4_MERGEVCFS as GATK4_MERGEVCFS_LIFT_UNLIFT_MT } from '../../modules/nf-core/gatk4/mergevcfs/main' -include { GATK4_VARIANTFILTRATION as GATK4_VARIANTFILTRATION_MT } from '../../modules/nf-core/gatk4/variantfiltration/main' -include { BCFTOOLS_NORM as SPLIT_MULTIALLELICS_MT } from '../../modules/nf-core/bcftools/norm/main' include { TABIX_TABIX as TABIX_TABIX_MT } from '../../modules/nf-core/tabix/tabix/main' -include { BCFTOOLS_NORM as REMOVE_DUPLICATES_MT } from '../../modules/nf-core/bcftools/norm/main' -include { TABIX_TABIX as TABIX_TABIX_MT2 } from '../../modules/nf-core/tabix/tabix/main' -include { BCFTOOLS_MERGE as BCFTOOLS_MERGE_MT } from '../../modules/nf-core/bcftools/merge/main' -include { TABIX_TABIX as TABIX_TABIX_MERGE } from '../../modules/nf-core/tabix/tabix/main' -include { TABIX_TABIX as TABIX_TABIX_MT3 } from '../../modules/nf-core/tabix/tabix/main' include { ENSEMBLVEP as ENSEMBLVEP_MT } from '../../modules/local/ensemblvep/main' include { HAPLOGREP2_CLASSIFY as HAPLOGREP2_CLASSIFY_MT } from '../../modules/nf-core/haplogrep2/classify/main' include { VCFANNO as VCFANNO_MT } from '../../modules/nf-core/vcfanno/main' @@ -66,9 +58,9 @@ workflow ANNOTATE_MT_SNVS { ) // Running vcfanno - TABIX_TABIX_MT3(ENSEMBLVEP_MT.out.vcf_gz) + TABIX_TABIX_MT(ENSEMBLVEP_MT.out.vcf_gz) ENSEMBLVEP_MT.out.vcf_gz - .join(TABIX_TABIX_MT3.out.tbi, failOnMismatch:true, failOnDuplicate:true) + .join(TABIX_TABIX_MT.out.tbi, failOnMismatch:true, failOnDuplicate:true) .map { meta, vcf, tbi -> return [meta, vcf, tbi, []]} .set { ch_in_vcfanno } @@ -91,6 +83,7 @@ workflow ANNOTATE_MT_SNVS { ch_versions = ch_versions.mix(ANNOTATE_CADD.out.versions) ch_versions = ch_versions.mix(ENSEMBLVEP_MT.out.versions) + ch_versions = ch_versions.mix(TABIX_TABIX_MT.out.versions) ch_versions = ch_versions.mix(VCFANNO_MT.out.versions) ch_versions = ch_versions.mix(HMTNOTE_ANNOTATE.out.versions) ch_versions = ch_versions.mix(HAPLOGREP2_CLASSIFY_MT.out.versions) diff --git a/subworkflows/local/call_structural_variants.nf b/subworkflows/local/call_structural_variants.nf index d614ef7c..d0846e6e 100644 --- a/subworkflows/local/call_structural_variants.nf +++ b/subworkflows/local/call_structural_variants.nf @@ -75,6 +75,7 @@ workflow CALL_STRUCTURAL_VARIANTS { TABIX_TABIX (SVDB_MERGE.out.vcf) ch_versions = ch_versions.mix(CALL_SV_MANTA.out.versions) + ch_versions = ch_versions.mix(CALL_SV_MT.out.versions) ch_versions = ch_versions.mix(CALL_SV_TIDDIT.out.versions) ch_versions = ch_versions.mix(CALL_SV_GERMLINECNVCALLER.out.versions) ch_versions = ch_versions.mix(TABIX_TABIX.out.versions) diff --git a/subworkflows/local/variant_calling/call_snv_MT.nf b/subworkflows/local/variant_calling/call_snv_MT.nf index e91718ad..d55d1965 100644 --- a/subworkflows/local/variant_calling/call_snv_MT.nf +++ b/subworkflows/local/variant_calling/call_snv_MT.nf @@ -5,8 +5,6 @@ include { HAPLOCHECK as HAPLOCHECK_MT } from '../../../modules/nf-core/haplocheck/main' include { GATK4_MUTECT2 as GATK4_MUTECT2_MT } from '../../../modules/nf-core/gatk4/mutect2/main' include { GATK4_FILTERMUTECTCALLS as GATK4_FILTERMUTECTCALLS_MT } from '../../../modules/nf-core/gatk4/filtermutectcalls/main' -include { MT_DELETION } from '../../../modules/local/mt_deletion_script' -include { EKLIPSE as EKLIPSE_MT } from '../../../modules/nf-core/eklipse/main' workflow CALL_SNV_MT { take: From fd9c26fadfc9075b25c5ef73b62c7c0188c13153 Mon Sep 17 00:00:00 2001 From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com> Date: Tue, 5 Sep 2023 09:10:23 +0200 Subject: [PATCH 17/22] update changelog --- CHANGELOG.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 01f4cca1..6ef40309 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -22,6 +22,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Installed the nf-core version of the sentieon/dnamodelapply module [#403](https://github.com/nf-core/raredisease/pull/403) - Installed the nf-core version of the sentieon/wgsmetricsalgo module [#404](https://github.com/nf-core/raredisease/pull/404) - Installed the nf-core version of the sentieon/dnascope module [#406](https://github.com/nf-core/raredisease/pull/406) +- Breaks down mitochondrial analysis workflow into smaller subworkflows that are more modular [#419](https://github.com/nf-core/raredisease/pull/419) +- Replaced the parameter skip_mt_analysis which was used to turn on/off the mitochondrial workflow [#419](https://github.com/nf-core/raredisease/pull/419) +- Adds a new parameter skip_mt_annotation which can be used to turn on/off annotation and ranking for mitochondrial SNVs [#419](https://github.com/nf-core/raredisease/pull/419) ### `Fixed` From 8ca4f8d83e3ccdcec392eebd57b45af5e8faaa42 Mon Sep 17 00:00:00 2001 From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com> Date: Wed, 13 Sep 2023 13:21:03 +0200 Subject: [PATCH 18/22] Lucia's comment --- docs/usage.md | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/docs/usage.md b/docs/usage.md index 7d1e8f12..6ce21435 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -131,9 +131,10 @@ nf-core/raredisease consists of several tools used for various purposes. For con 3. Repeat expansions (ExpansionsHunter & Stranger) 4. Variant calling - SNV (DeepVariant/Sentieon DNAscope) 5. Variant calling - Structural variants (SV) (Tiddit & Manta) -6. SNV annotation & ranking (rohcall, vcfanno, ensembl VEP, GENMOD) -7. SV annotation & ranking (SVDB query, ensembl VEP, GENMOD) -8. Mitochondrial annotation +6. Copy number variant calling (GATK's GermlineCNVCaller) +7. SNV annotation & ranking (rohcall, vcfanno, ensembl VEP, GENMOD) +8. SV annotation & ranking (SVDB query, ensembl VEP, GENMOD) +9. Mitochondrial annotation > We have only listed the groups that require at least one input from the user. For example, the pipeline also runs SMNCopyNumberCaller, but it does not require any input other than the bam files passed by the pipeline. Hence, it is not mentioned in the list above. To know more about the tools used in the pipeline check the [README](../README.md). From ee105a8b60869ad588a9a212a5193b85c25c0881 Mon Sep 17 00:00:00 2001 From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com> Date: Wed, 13 Sep 2023 13:21:22 +0200 Subject: [PATCH 19/22] Update subworkflows/local/call_snv.nf [skip ci] Co-authored-by: Anders Jemt --- subworkflows/local/call_snv.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/subworkflows/local/call_snv.nf b/subworkflows/local/call_snv.nf index d8cb9744..96320986 100644 --- a/subworkflows/local/call_snv.nf +++ b/subworkflows/local/call_snv.nf @@ -103,6 +103,6 @@ workflow CALL_SNV { genome_tabix = ch_genome_tabix // channel: [ val(meta), path(tbi) ] genome_vcf_tabix = ch_genome_vcf_tabix // channel: [ val(meta), path(vcf), path(tbi) ] mt_vcf = POSTPROCESS_MT_CALLS.out.vcf // channel: [ val(meta), path(vcf) ] - mt_tabix = POSTPROCESS_MT_CALLS.out.tbi // channel: [ val(meta), path(vcf) ] + mt_tabix = POSTPROCESS_MT_CALLS.out.tbi // channel: [ val(meta), path(tbi) ] versions = ch_versions // channel: [ path(versions.yml) ] } From 4eeab25d49c56b29dc79ed82de7247cfc2661ad0 Mon Sep 17 00:00:00 2001 From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com> Date: Wed, 13 Sep 2023 13:23:01 +0200 Subject: [PATCH 20/22] Update subworkflows/local/variant_calling/postprocess_MT_calls.nf [skip ci] Co-authored-by: Anders Jemt --- subworkflows/local/variant_calling/postprocess_MT_calls.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/subworkflows/local/variant_calling/postprocess_MT_calls.nf b/subworkflows/local/variant_calling/postprocess_MT_calls.nf index 81738085..2d10cfaa 100644 --- a/subworkflows/local/variant_calling/postprocess_MT_calls.nf +++ b/subworkflows/local/variant_calling/postprocess_MT_calls.nf @@ -25,7 +25,7 @@ workflow POSTPROCESS_MT_CALLS { main: ch_versions = Channel.empty() - // LIFTOVER VCF FROM REFERENCE MT TO SHIFTED MT + // LIFTOVER SHIFTED VCF TO REFERENCE MT POSITIONS PICARD_LIFTOVERVCF ( ch_mtshift_vcf, ch_genome_dictionary, From 9c1f1c0e0e4aea525e73e7e47fe4bfc7449ac1df Mon Sep 17 00:00:00 2001 From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com> Date: Wed, 13 Sep 2023 14:48:52 +0200 Subject: [PATCH 21/22] comments --- assets/schema_input.json | 2 +- subworkflows/local/align.nf | 2 +- subworkflows/local/annotate_mt_snvs.nf | 10 +- .../local/call_structural_variants.nf | 2 - subworkflows/local/convert_mt_bam_to_fastq.nf | 36 ---- .../local/mitochondria/align_and_call_MT.nf | 96 ---------- .../local/mitochondria/merge_annotate_MT.nf | 176 ------------------ .../variant_calling/postprocess_MT_calls.nf | 4 +- 8 files changed, 9 insertions(+), 319 deletions(-) delete mode 100644 subworkflows/local/convert_mt_bam_to_fastq.nf delete mode 100644 subworkflows/local/mitochondria/align_and_call_MT.nf delete mode 100644 subworkflows/local/mitochondria/merge_annotate_MT.nf diff --git a/assets/schema_input.json b/assets/schema_input.json index 555f2c0a..191f1394 100644 --- a/assets/schema_input.json +++ b/assets/schema_input.json @@ -10,7 +10,7 @@ "sample": { "type": "string", "exists": true, - "meta": ["id"], + "meta": ["id", "sample"], "pattern": "^\\S+$", "errorMessage": "Sample name must be provided and cannot contain spaces" }, diff --git a/subworkflows/local/align.nf b/subworkflows/local/align.nf index e7314f2a..17a6252b 100644 --- a/subworkflows/local/align.nf +++ b/subworkflows/local/align.nf @@ -7,7 +7,7 @@ include { ALIGN_SENTIEON } from './alignment/align_sentieon' include { SAMTOOLS_VIEW } from '../../modules/nf-core/samtools/view/main' include { ALIGN_MT } from './alignment/align_MT' include { ALIGN_MT as ALIGN_MT_SHIFT } from './alignment/align_MT' -include { CONVERT_MT_BAM_TO_FASTQ } from './convert_mt_bam_to_fastq' +include { CONVERT_MT_BAM_TO_FASTQ } from './mitochondria/convert_mt_bam_to_fastq' workflow ALIGN { take: diff --git a/subworkflows/local/annotate_mt_snvs.nf b/subworkflows/local/annotate_mt_snvs.nf index 5a6b65b5..8f449e7b 100644 --- a/subworkflows/local/annotate_mt_snvs.nf +++ b/subworkflows/local/annotate_mt_snvs.nf @@ -36,12 +36,12 @@ workflow ANNOTATE_MT_SNVS { // Pick input for vep ch_mt_vcf - .combine(ANNOTATE_CADD.out.vcf.ifEmpty("null")) - .branch { it -> - merged: it[2].equals("null") + .join(ANNOTATE_CADD.out.vcf, remainder: true) // If CADD is not run then the third element in this channel will be `null` + .branch { it -> // If CADD is run, then "it" will be [[meta],selvar.vcf,cadd.vcf], else [[meta],selvar.vcf,null] + merged: it[2].equals(null) return [it[0], it[1]] - cadd: !(it[2].equals("null")) - return [it[2], it[3]] + cadd: !(it[2].equals(null)) + return [it[0], it[2]] } .set { ch_for_mix } ch_vep_in = ch_for_mix.merged.mix(ch_for_mix.cadd) diff --git a/subworkflows/local/call_structural_variants.nf b/subworkflows/local/call_structural_variants.nf index d0846e6e..28f4e8db 100644 --- a/subworkflows/local/call_structural_variants.nf +++ b/subworkflows/local/call_structural_variants.nf @@ -50,8 +50,6 @@ workflow CALL_STRUCTURAL_VARIANTS { CALL_SV_MT (ch_mt_bam_bai, ch_genome_fasta) -// CALL_SV_MT_SHIFT (ch_mtshift_bam_bai, ch_mtshift_fasta) - //merge if (params.skip_cnv_calling) { tiddit_vcf diff --git a/subworkflows/local/convert_mt_bam_to_fastq.nf b/subworkflows/local/convert_mt_bam_to_fastq.nf deleted file mode 100644 index ca19ea82..00000000 --- a/subworkflows/local/convert_mt_bam_to_fastq.nf +++ /dev/null @@ -1,36 +0,0 @@ -// -// Prepare bam files for MT allignment -// - -include { GATK4_PRINTREADS as GATK4_PRINTREADS_MT } from '../../modules/nf-core/gatk4/printreads/main' -include { GATK4_REVERTSAM as GATK4_REVERTSAM_MT } from '../../modules/nf-core/gatk4/revertsam/main' -include { GATK4_SAMTOFASTQ as GATK4_SAMTOFASTQ_MT } from '../../modules/nf-core/gatk4/samtofastq/main' - -workflow CONVERT_MT_BAM_TO_FASTQ { - take: - ch_bam_bai // channel: [mandatory] [ val(meta), path(bam), path(bai) ] - ch_genome_fasta // channel: [mandatory] [ val(meta), path(fasta) ] - ch_genome_fai // channel: [mandatory] [ val(meta), path(fai) ] - ch_genome_dict // channel: [mandatory] [ val(meta), path(dict) ] - - main: - ch_versions = Channel.empty() - - // Outputs bam containing only MT - GATK4_PRINTREADS_MT ( ch_bam_bai, ch_genome_fasta, ch_genome_fai, ch_genome_dict ) - - // Removes alignment information - GATK4_REVERTSAM_MT ( GATK4_PRINTREADS_MT.out.bam ) - - // Outputs fastq files - GATK4_SAMTOFASTQ_MT ( GATK4_REVERTSAM_MT.out.bam ) - - ch_versions = ch_versions.mix(GATK4_PRINTREADS_MT.out.versions.first()) - ch_versions = ch_versions.mix(GATK4_REVERTSAM_MT.out.versions.first()) - ch_versions = ch_versions.mix(GATK4_SAMTOFASTQ_MT.out.versions.first()) - - emit: - fastq = GATK4_SAMTOFASTQ_MT.out.fastq // channel: [ val(meta), [ path(fastq) ] ] - bam = GATK4_REVERTSAM_MT.out.bam // channel: [ val(meta), path(bam) ] - versions = ch_versions // channel: [ path(versions.yml) ] -} diff --git a/subworkflows/local/mitochondria/align_and_call_MT.nf b/subworkflows/local/mitochondria/align_and_call_MT.nf deleted file mode 100644 index d857b11a..00000000 --- a/subworkflows/local/mitochondria/align_and_call_MT.nf +++ /dev/null @@ -1,96 +0,0 @@ -// -// Align and call MT -// - -include { SENTIEON_BWAMEM as SENTIEON_BWAMEM_MT } from '../../../modules/nf-core/sentieon/bwamem/main' -include { BWAMEM2_MEM as BWAMEM2_MEM_MT } from '../../../modules/nf-core/bwamem2/mem/main' -include { GATK4_MERGEBAMALIGNMENT as GATK4_MERGEBAMALIGNMENT_MT } from '../../../modules/nf-core/gatk4/mergebamalignment/main' -include { PICARD_ADDORREPLACEREADGROUPS as PICARD_ADDORREPLACEREADGROUPS_MT } from '../../../modules/nf-core/picard/addorreplacereadgroups/main' -include { PICARD_MARKDUPLICATES as PICARD_MARKDUPLICATES_MT } from '../../../modules/nf-core/picard/markduplicates/main' -include { SAMTOOLS_INDEX as SAMTOOLS_INDEX_MT } from '../../../modules/nf-core/samtools/index/main' -include { SAMTOOLS_SORT as SAMTOOLS_SORT_MT } from '../../../modules/nf-core/samtools/sort/main' -include { HAPLOCHECK as HAPLOCHECK_MT } from '../../../modules/nf-core/haplocheck/main' -include { GATK4_MUTECT2 as GATK4_MUTECT2_MT } from '../../../modules/nf-core/gatk4/mutect2/main' -include { GATK4_FILTERMUTECTCALLS as GATK4_FILTERMUTECTCALLS_MT } from '../../../modules/nf-core/gatk4/filtermutectcalls/main' -include { TABIX_TABIX as TABIX_TABIX_MT } from '../../../modules/nf-core/tabix/tabix/main' -include { MT_DELETION } from '../../../modules/local/mt_deletion_script' -include { EKLIPSE as EKLIPSE_MT } from '../../../modules/nf-core/eklipse/main' - -workflow ALIGN_AND_CALL_MT { - take: - ch_fastq // channel: [mandatory] [ val(meta), [ path(reads) ] ] - ch_ubam // channel: [mandatory] [ val(meta), path(bam) ] - ch_bwaindex // channel: [mandatory for sentieon] [ val(meta), path(index) ] - ch_bwamem2index // channel: [mandatory for bwamem2] [ val(meta), path(index) ] - ch_fasta // channel: [mandatory] [ val(meta), path(fasta) ] - ch_dict // channel: [mandatory] [ val(meta), path(dict) ] - ch_fai // channel: [mandatory] [ val(meta), path(fai) ] - ch_intervals // channel: [mandatory] [ path(interval_list) ] - - main: - ch_versions = Channel.empty() - - BWAMEM2_MEM_MT (ch_fastq, ch_bwamem2index, true) - - SENTIEON_BWAMEM_MT ( ch_fastq, ch_bwaindex, ch_fasta, ch_fai ) - - Channel.empty() - .mix(BWAMEM2_MEM_MT.out.bam, SENTIEON_BWAMEM_MT.out.bam_and_bai.map{ meta, bam, bai -> [meta, bam] }) - .join(ch_ubam, failOnMismatch:true, failOnDuplicate:true) - .set {ch_bam_ubam} - - GATK4_MERGEBAMALIGNMENT_MT (ch_bam_ubam, ch_fasta, ch_dict) - - PICARD_ADDORREPLACEREADGROUPS_MT (GATK4_MERGEBAMALIGNMENT_MT.out.bam) - - PICARD_MARKDUPLICATES_MT (PICARD_ADDORREPLACEREADGROUPS_MT.out.bam, ch_fasta, ch_fai) - - SAMTOOLS_SORT_MT (PICARD_MARKDUPLICATES_MT.out.bam) - - SAMTOOLS_INDEX_MT(SAMTOOLS_SORT_MT.out.bam) - ch_sort_index_bam = SAMTOOLS_SORT_MT.out.bam.join(SAMTOOLS_INDEX_MT.out.bai, failOnMismatch:true, failOnDuplicate:true) - ch_sort_index_bam_int_mt = ch_sort_index_bam.combine(ch_intervals) - - EKLIPSE_MT(ch_sort_index_bam,[]) - - MT_DELETION(ch_sort_index_bam, ch_fasta) - - GATK4_MUTECT2_MT (ch_sort_index_bam_int_mt, ch_fasta, ch_fai, ch_dict, [], [], [],[]) - - HAPLOCHECK_MT (GATK4_MUTECT2_MT.out.vcf) - - // Filter Mutect2 calls - ch_mutect_vcf = GATK4_MUTECT2_MT.out.vcf.join(GATK4_MUTECT2_MT.out.tbi, failOnMismatch:true, failOnDuplicate:true) - ch_mutect_out = ch_mutect_vcf.join(GATK4_MUTECT2_MT.out.stats, failOnMismatch:true, failOnDuplicate:true) - ch_to_filt = ch_mutect_out.map { - meta, vcf, tbi, stats -> - return [meta, vcf, tbi, stats, [], [], [], []] - } - - GATK4_FILTERMUTECTCALLS_MT (ch_to_filt, ch_fasta, ch_fai, ch_dict) - - ch_versions = ch_versions.mix(BWAMEM2_MEM_MT.out.versions.first()) - ch_versions = ch_versions.mix(GATK4_MERGEBAMALIGNMENT_MT.out.versions.first()) - ch_versions = ch_versions.mix(PICARD_ADDORREPLACEREADGROUPS_MT.out.versions.first()) - ch_versions = ch_versions.mix(PICARD_MARKDUPLICATES_MT.out.versions.first()) - ch_versions = ch_versions.mix(SAMTOOLS_SORT_MT.out.versions.first()) - ch_versions = ch_versions.mix(SAMTOOLS_INDEX_MT.out.versions.first()) - ch_versions = ch_versions.mix(EKLIPSE_MT.out.versions.first()) - ch_versions = ch_versions.mix(MT_DELETION.out.versions.first()) - ch_versions = ch_versions.mix(GATK4_MUTECT2_MT.out.versions.first()) - ch_versions = ch_versions.mix(HAPLOCHECK_MT.out.versions.first()) - ch_versions = ch_versions.mix(GATK4_FILTERMUTECTCALLS_MT.out.versions.first()) - - emit: - vcf = GATK4_FILTERMUTECTCALLS_MT.out.vcf // channel: [ val(meta), path(vcf) ] - tbi = GATK4_FILTERMUTECTCALLS_MT.out.tbi // channel: [ val(meta), path(tbi) ] - stats = GATK4_MUTECT2_MT.out.stats // channel: [ val(meta), path(stats) ] - filt_stats = GATK4_FILTERMUTECTCALLS_MT.out.stats // channel: [ val(meta), path(tsv) ] - eklipse_del = EKLIPSE_MT.out.deletions // channel: [ val(meta), path(csv) ] - eklipse_genes = EKLIPSE_MT.out.genes // channel: [ val(meta), path(csv) ] - eklipse_circos = EKLIPSE_MT.out.circos // channel: [ val(meta), path(png) ] - txt = HAPLOCHECK_MT.out.txt // channel: [ val(meta), path(txt) ] - html = HAPLOCHECK_MT.out.html // channel: [ val(meta), path(html) ] - mt_del_result = MT_DELETION.out.mt_del_result // channel: [ val(meta), path(txt) ] - versions = ch_versions // channel: [ path(versions.yml) ] -} diff --git a/subworkflows/local/mitochondria/merge_annotate_MT.nf b/subworkflows/local/mitochondria/merge_annotate_MT.nf deleted file mode 100644 index 43edd93b..00000000 --- a/subworkflows/local/mitochondria/merge_annotate_MT.nf +++ /dev/null @@ -1,176 +0,0 @@ -// -// Merge and annotate MT -// - -include { GATK4_MERGEVCFS as GATK4_MERGEVCFS_LIFT_UNLIFT_MT } from '../../../modules/nf-core/gatk4/mergevcfs/main' -include { GATK4_VARIANTFILTRATION as GATK4_VARIANTFILTRATION_MT } from '../../../modules/nf-core/gatk4/variantfiltration/main' -include { BCFTOOLS_NORM as SPLIT_MULTIALLELICS_MT } from '../../../modules/nf-core/bcftools/norm/main' -include { TABIX_TABIX as TABIX_TABIX_MT } from '../../../modules/nf-core/tabix/tabix/main' -include { BCFTOOLS_NORM as REMOVE_DUPLICATES_MT } from '../../../modules/nf-core/bcftools/norm/main' -include { TABIX_TABIX as TABIX_TABIX_MT2 } from '../../../modules/nf-core/tabix/tabix/main' -include { BCFTOOLS_MERGE as BCFTOOLS_MERGE_MT } from '../../../modules/nf-core/bcftools/merge/main' -include { TABIX_TABIX as TABIX_TABIX_MERGE } from '../../../modules/nf-core/tabix/tabix/main' -include { TABIX_TABIX as TABIX_TABIX_MT3 } from '../../../modules/nf-core/tabix/tabix/main' -include { ENSEMBLVEP as ENSEMBLVEP_MT } from '../../../modules/local/ensemblvep/main' -include { HAPLOGREP2_CLASSIFY as HAPLOGREP2_CLASSIFY_MT } from '../../../modules/nf-core/haplogrep2/classify/main' -include { VCFANNO as VCFANNO_MT } from '../../../modules/nf-core/vcfanno/main' -include { ANNOTATE_CADD } from '../annotation/annotate_cadd' -include { TABIX_BGZIPTABIX as ZIP_TABIX_HMTNOTE } from '../../../modules/nf-core/tabix/bgziptabix/main' -include { HMTNOTE_ANNOTATE } from '../../../modules/nf-core/hmtnote/annotate/main' - -workflow MERGE_ANNOTATE_MT { - take: - ch_vcf1 // channel: [mandatory] [ val(meta), path(vcf) ] - ch_vcf2 // channel: [mandatory] [ val(meta), path(vcf) ] - ch_cadd_header // channel: [mandatory] [ path(txt) ] - ch_cadd_resources // channel: [mandatory] [ path(annotation) ] - ch_genome_fasta // channel: [mandatory] [ val(meta), path(fasta) ] - ch_genome_dict // channel: [mandatory] [ val(meta), path(dict) ] - ch_genome_fai // channel: [mandatory] [ val(meta), path(fai) ] - ch_vcfanno_resources // channel: [mandatory] [ path(resources) ] - ch_vcfanno_toml // channel: [mandatory] [ path(toml) ] - val_vep_genome // string: [mandatory] GRCh37 or GRCh38 - val_vep_cache_version // string: [mandatory] 107 - ch_vep_cache // channel: [mandatory] [ path(cache) ] - ch_case_info // channel: [mandatory] [ val(case_info) ] - - main: - ch_versions = Channel.empty() - - ch_vcfs = ch_vcf1 - .join(ch_vcf2, remainder: true) - .map{ meta, vcf1, vcf2 -> - [meta, [vcf1, vcf2]] - } - GATK4_MERGEVCFS_LIFT_UNLIFT_MT( ch_vcfs, ch_genome_dict) - - // Filtering Variants - GATK4_MERGEVCFS_LIFT_UNLIFT_MT.out.vcf - .join(GATK4_MERGEVCFS_LIFT_UNLIFT_MT.out.tbi, failOnMismatch:true, failOnDuplicate:true) - .set { ch_filt_vcf } - GATK4_VARIANTFILTRATION_MT (ch_filt_vcf, ch_genome_fasta, ch_genome_fai, ch_genome_dict) - - // Spliting multiallelic calls - GATK4_VARIANTFILTRATION_MT.out.vcf - .join(GATK4_VARIANTFILTRATION_MT.out.tbi, failOnMismatch:true, failOnDuplicate:true) - .set { ch_in_split } - SPLIT_MULTIALLELICS_MT (ch_in_split, ch_genome_fasta) - TABIX_TABIX_MT(SPLIT_MULTIALLELICS_MT.out.vcf) - - // Removing duplicates and merging if there is more than one sample - SPLIT_MULTIALLELICS_MT.out.vcf - .join(TABIX_TABIX_MT.out.tbi, failOnMismatch:true, failOnDuplicate:true) - .set { ch_in_remdup } - REMOVE_DUPLICATES_MT(ch_in_remdup, ch_genome_fasta) - TABIX_TABIX_MT2(REMOVE_DUPLICATES_MT.out.vcf) - - REMOVE_DUPLICATES_MT.out.vcf - .collect{it[1]} - .ifEmpty([]) - .toList() - .set { file_list_vcf } - - TABIX_TABIX_MT2.out.tbi - .collect{it[1]} - .ifEmpty([]) - .toList() - .set { file_list_tbi } - - ch_case_info - .combine(file_list_vcf) - .combine(file_list_tbi) - .set { ch_rem_dup_vcf_tbi } - - ch_rem_dup_vcf_tbi.branch { - meta, vcf, tbi -> - single: vcf.size() == 1 - return [meta, vcf] - multiple: vcf.size() > 1 - return [meta, vcf, tbi] - }.set { ch_case_vcf } - - BCFTOOLS_MERGE_MT( ch_case_vcf.multiple, - ch_genome_fasta, - ch_genome_fai, - [] - ) - - BCFTOOLS_MERGE_MT.out.merged_variants - .mix(ch_case_vcf.single) - .set { ch_annotation_in } - - TABIX_TABIX_MERGE(ch_annotation_in) - - // Annotating with CADD - ANNOTATE_CADD ( - ch_annotation_in, - TABIX_TABIX_MERGE.out.tbi, - ch_cadd_header, - ch_cadd_resources - ) - - // Pick input for vep - ch_annotation_in - .combine(ANNOTATE_CADD.out.vcf.ifEmpty("null")) - .branch { it -> - merged: it[2].equals("null") - return [it[0], it[1]] - cadd: !(it[2].equals("null")) - return [it[2], it[3]] - } - .set { ch_for_mix } - ch_vep_in = ch_for_mix.merged.mix(ch_for_mix.cadd) - - // Annotating with ensembl Vep - ENSEMBLVEP_MT( - ch_vep_in, - ch_genome_fasta, - val_vep_genome, - "homo_sapiens", - val_vep_cache_version, - ch_vep_cache, - [] - ) - - // Running vcfanno - TABIX_TABIX_MT3(ENSEMBLVEP_MT.out.vcf_gz) - ENSEMBLVEP_MT.out.vcf_gz - .join(TABIX_TABIX_MT3.out.tbi, failOnMismatch:true, failOnDuplicate:true) - .map { meta, vcf, tbi -> return [meta, vcf, tbi, []]} - .set { ch_in_vcfanno } - - VCFANNO_MT(ch_in_vcfanno, ch_vcfanno_toml, [], ch_vcfanno_resources) - - // HMTNOTE ANNOTATE - HMTNOTE_ANNOTATE(VCFANNO_MT.out.vcf) - HMTNOTE_ANNOTATE.out.vcf.map{meta, vcf -> - return [meta, WorkflowRaredisease.replaceSpacesInInfoColumn(vcf, vcf.parent.toString(), vcf.baseName)] - } - .set { ch_hmtnote_reformatted } - ZIP_TABIX_HMTNOTE(ch_hmtnote_reformatted) - - // Prepare output - ch_vcf_out = ZIP_TABIX_HMTNOTE.out.gz_tbi.map{meta, vcf, tbi -> return [meta, vcf] } - ch_tbi_out = ZIP_TABIX_HMTNOTE.out.gz_tbi.map{meta, vcf, tbi -> return [meta, tbi] } - - // Running haplogrep2 - HAPLOGREP2_CLASSIFY_MT(ch_vep_in, "vcf.gz") - - ch_versions = ch_versions.mix(GATK4_MERGEVCFS_LIFT_UNLIFT_MT.out.versions.first()) - ch_versions = ch_versions.mix(GATK4_VARIANTFILTRATION_MT.out.versions.first()) - ch_versions = ch_versions.mix(SPLIT_MULTIALLELICS_MT.out.versions.first()) - ch_versions = ch_versions.mix(REMOVE_DUPLICATES_MT.out.versions.first()) - ch_versions = ch_versions.mix(BCFTOOLS_MERGE_MT.out.versions) - ch_versions = ch_versions.mix(ANNOTATE_CADD.out.versions) - ch_versions = ch_versions.mix(ENSEMBLVEP_MT.out.versions) - ch_versions = ch_versions.mix(VCFANNO_MT.out.versions) - ch_versions = ch_versions.mix(HMTNOTE_ANNOTATE.out.versions) - ch_versions = ch_versions.mix(HAPLOGREP2_CLASSIFY_MT.out.versions) - - emit: - haplog = HAPLOGREP2_CLASSIFY_MT.out.txt // channel: [ val(meta), path(txt) ] - vcf = ch_vcf_out // channel: [ val(meta), path(vcf) ] - tbi = ch_tbi_out // channel: [ val(meta), path(tbi) ] - report = ENSEMBLVEP_MT.out.report // channel: [ path(html) ] - versions = ch_versions // channel: [ path(versions.yml) ] -} diff --git a/subworkflows/local/variant_calling/postprocess_MT_calls.nf b/subworkflows/local/variant_calling/postprocess_MT_calls.nf index 81738085..cb14c9b7 100644 --- a/subworkflows/local/variant_calling/postprocess_MT_calls.nf +++ b/subworkflows/local/variant_calling/postprocess_MT_calls.nf @@ -1,5 +1,5 @@ // -// Merge and annotate MT +// Merge and normalize MT variants // include { GATK4_MERGEVCFS as GATK4_MERGEVCFS_LIFT_UNLIFT_MT } from '../../../modules/nf-core/gatk4/mergevcfs/main' @@ -25,7 +25,7 @@ workflow POSTPROCESS_MT_CALLS { main: ch_versions = Channel.empty() - // LIFTOVER VCF FROM REFERENCE MT TO SHIFTED MT + // LIFTOVER SHIFTED VCF TO REFERENCE MT POSITIONS PICARD_LIFTOVERVCF ( ch_mtshift_vcf, ch_genome_dictionary, From 29c9c85ea9daf0b83fe6437c1ae9b23055d80eb1 Mon Sep 17 00:00:00 2001 From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com> Date: Fri, 15 Sep 2023 23:36:59 +0200 Subject: [PATCH 22/22] update test config --- conf/test.config | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/conf/test.config b/conf/test.config index 10e89c43..c7593f76 100644 --- a/conf/test.config +++ b/conf/test.config @@ -24,8 +24,8 @@ params { mito_name = 'MT' // analysis params - skip_cnv_calling = true - skip_mt_analysis = true + skip_cnv_calling = true + skip_mt_annotation = System.getenv("GITHUB_ACTIONS").equals(null) ? false : true // skip annotation on Github CI // Input data input = 'https://raw.githubusercontent.com/nf-core/test-datasets/raredisease/testdata/samplesheet_trio.csv'