From 3abdb850836d56b80f4a2f4d71b998a7d438c7f2 Mon Sep 17 00:00:00 2001
From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com>
Date: Wed, 23 Aug 2023 22:13:48 +0200
Subject: [PATCH 01/22] align
---
subworkflows/local/align.nf | 93 ++++++++++++++-----
subworkflows/local/alignment/align_MT.nf | 56 +++++++++++
subworkflows/local/convert_mt_bam_to_fastq.nf | 36 +++++++
workflows/raredisease.nf | 30 +++---
4 files changed, 182 insertions(+), 33 deletions(-)
create mode 100644 subworkflows/local/alignment/align_MT.nf
create mode 100644 subworkflows/local/convert_mt_bam_to_fastq.nf
diff --git a/subworkflows/local/align.nf b/subworkflows/local/align.nf
index 3455c3b9..f08ae7df 100644
--- a/subworkflows/local/align.nf
+++ b/subworkflows/local/align.nf
@@ -2,49 +2,100 @@
// Map to reference
//
-include { ALIGN_BWAMEM2 } from './alignment/align_bwamem2'
-include { ALIGN_SENTIEON } from './alignment/align_sentieon'
-include { SAMTOOLS_VIEW } from '../../modules/nf-core/samtools/view/main'
+include { ALIGN_BWAMEM2 } from './alignment/align_bwamem2'
+include { ALIGN_SENTIEON } from './alignment/align_sentieon'
+include { SAMTOOLS_VIEW } from '../../modules/nf-core/samtools/view/main'
+include { ALIGN_MT } from './alignment/align_MT'
+include { ALIGN_MT as ALIGN_MT_SHIFT } from './alignment/align_MT'
+include { CONVERT_MT_BAM_TO_FASTQ } from './convert_mt_bam_to_fastq'
workflow ALIGN {
take:
- ch_reads_input // channel: [mandatory] [ val(meta), [path(reads)] ]
- ch_genome_fasta // channel: [mandatory] [ val(meta), path(fasta) ]
- ch_genome_fai // channel: [mandatory] [ val(meta), path(fai) ]
- ch_bwa_index // channel: [mandatory] [ val(meta), path(index) ]
- ch_bwamem2_index // channel: [mandatory] [ val(meta), path(index) ]
- val_platform // string: [mandatory] illumina or a different technology
+ ch_reads // channel: [mandatory] [ val(meta), [path(reads)] ]
+ ch_genome_fasta // channel: [mandatory] [ val(meta), path(fasta) ]
+ ch_genome_fai // channel: [mandatory] [ val(meta), path(fai) ]
+ ch_genome_bwaindex // channel: [mandatory] [ val(meta), path(index) ]
+ ch_genome_bwamem2index // channel: [mandatory] [ val(meta), path(index) ]
+ ch_genome_dictionary // channel: [mandatory] [ val(meta), path(dict) ]
+ ch_mtshift_bwaindex // channel: [mandatory] [ val(meta), path(index) ]
+ ch_mtshift_bwamem2index // channel: [mandatory] [ val(meta), path(index) ]
+ ch_mtshift_fasta // channel: [mandatory] [ val(meta), path(fasta) ]
+ ch_mtshift_dictionary // channel: [mandatory] [ val(meta), path(dict) ]
+ ch_mtshift_fai // channel: [mandatory] [ val(meta), path(fai) ]
+ val_platform // string: [mandatory] illumina or a different technology
main:
ch_versions = Channel.empty()
ALIGN_BWAMEM2 ( // Triggered when params.aligner is set as bwamem2
- ch_reads_input,
- ch_bwamem2_index,
+ ch_reads,
+ ch_genome_bwamem2index,
ch_genome_fasta,
ch_genome_fai,
val_platform
)
ALIGN_SENTIEON ( // Triggered when params.aligner is set as sentieon
- ch_reads_input,
+ ch_reads,
ch_genome_fasta,
ch_genome_fai,
- ch_bwa_index,
+ ch_genome_bwaindex,
val_platform
)
- ch_marked_bam = Channel.empty().mix(ALIGN_BWAMEM2.out.marked_bam, ALIGN_SENTIEON.out.marked_bam)
- ch_marked_bai = Channel.empty().mix(ALIGN_BWAMEM2.out.marked_bai, ALIGN_SENTIEON.out.marked_bai)
- ch_bam_bai = ch_marked_bam.join(ch_marked_bai, failOnMismatch:true, failOnDuplicate:true)
+ ch_genome_marked_bam = Channel.empty().mix(ALIGN_BWAMEM2.out.marked_bam, ALIGN_SENTIEON.out.marked_bam)
+ ch_genome_marked_bai = Channel.empty().mix(ALIGN_BWAMEM2.out.marked_bai, ALIGN_SENTIEON.out.marked_bai)
+ ch_genome_bam_bai = ch_genome_marked_bam.join(ch_genome_marked_bai, failOnMismatch:true, failOnDuplicate:true)
- SAMTOOLS_VIEW( ch_bam_bai, ch_genome_fasta, [] )
+ // PREPARING READS FOR MT ALIGNMENT
+ CONVERT_MT_BAM_TO_FASTQ (
+ ch_genome_bam_bai,
+ ch_genome_fasta,
+ ch_genome_fai,
+ ch_genome_dictionary
+ )
+
+ ALIGN_MT (
+ CONVERT_MT_BAM_TO_FASTQ.out.fastq,
+ CONVERT_MT_BAM_TO_FASTQ.out.bam,
+ ch_genome_bwaindex,
+ ch_genome_bwamem2index,
+ ch_genome_fasta,
+ ch_genome_dictionary,
+ ch_genome_fai
+ )
+
+ ALIGN_MT_SHIFT (
+ CONVERT_MT_BAM_TO_FASTQ.out.fastq,
+ CONVERT_MT_BAM_TO_FASTQ.out.bam,
+ ch_mtshift_bwaindex,
+ ch_mtshift_bwamem2index,
+ ch_mtshift_fasta,
+ ch_mtshift_dictionary,
+ ch_mtshift_fai
+ )
+
+ ch_mt_marked_bam = Channel.empty().mix(ALIGN_MT.out.marked_bam, ALIGN_MT.out.marked_bam)
+ ch_mt_marked_bai = Channel.empty().mix(ALIGN_MT.out.marked_bai, ALIGN_MT.out.marked_bai)
+ ch_mt_bam_bai = ch_mt_marked_bam.join(ch_mt_marked_bai, failOnMismatch:true, failOnDuplicate:true)
+
+ ch_mtshift_marked_bam = Channel.empty().mix(ALIGN_MT_SHIFT.out.marked_bam, ALIGN_MT_SHIFT.out.marked_bam)
+ ch_mtshift_marked_bai = Channel.empty().mix(ALIGN_MT_SHIFT.out.marked_bai, ALIGN_MT_SHIFT.out.marked_bai)
+ ch_mtshift_bam_bai = ch_mtshift_marked_bam.join(ch_mtshift_marked_bai, failOnMismatch:true, failOnDuplicate:true)
+
+ SAMTOOLS_VIEW( ch_genome_bam_bai, ch_genome_fasta, [] )
ch_versions = Channel.empty().mix(ALIGN_BWAMEM2.out.versions, ALIGN_SENTIEON.out.versions)
emit:
- marked_bam = ch_marked_bam // channel: [ val(meta), path(bam) ]
- marked_bai = ch_marked_bai // channel: [ val(meta), path(bai) ]
- bam_bai = ch_bam_bai // channel: [ val(meta), path(bam), path(bai) ]
- versions = ch_versions // channel: [ path(versions.yml) ]
+ genome_marked_bam = ch_genome_marked_bam // channel: [ val(meta), path(bam) ]
+ genome_marked_bai = ch_genome_marked_bai // channel: [ val(meta), path(bai) ]
+ genome_bam_bai = ch_genome_bam_bai // channel: [ val(meta), path(bam), path(bai) ]
+ mt_marked_bam = ch_mt_marked_bam // channel: [ val(meta), path(bam) ]
+ mt_marked_bai = ch_mt_marked_bai // channel: [ val(meta), path(bai) ]
+ mt_bam_bai = ch_mt_bam_bai // channel: [ val(meta), path(bam), path(bai) ]
+ mtshift_marked_bam = ch_mtshift_marked_bam // channel: [ val(meta), path(bam) ]
+ mtshift_marked_bai = ch_mtshift_marked_bai // channel: [ val(meta), path(bai) ]
+ mtshift_bam_bai = ch_mtshift_bam_bai // channel: [ val(meta), path(bam), path(bai) ]
+ versions = ch_versions // channel: [ path(versions.yml) ]
}
diff --git a/subworkflows/local/alignment/align_MT.nf b/subworkflows/local/alignment/align_MT.nf
new file mode 100644
index 00000000..5ae8ace7
--- /dev/null
+++ b/subworkflows/local/alignment/align_MT.nf
@@ -0,0 +1,56 @@
+//
+// Align and call MT
+//
+
+include { SENTIEON_BWAMEM as SENTIEON_BWAMEM_MT } from '../../../modules/nf-core/sentieon/bwamem/main'
+include { BWAMEM2_MEM as BWAMEM2_MEM_MT } from '../../../modules/nf-core/bwamem2/mem/main'
+include { GATK4_MERGEBAMALIGNMENT as GATK4_MERGEBAMALIGNMENT_MT } from '../../../modules/nf-core/gatk4/mergebamalignment/main'
+include { PICARD_ADDORREPLACEREADGROUPS as PICARD_ADDORREPLACEREADGROUPS_MT } from '../../../modules/nf-core/picard/addorreplacereadgroups/main'
+include { PICARD_MARKDUPLICATES as PICARD_MARKDUPLICATES_MT } from '../../../modules/nf-core/picard/markduplicates/main'
+include { SAMTOOLS_INDEX as SAMTOOLS_INDEX_MT } from '../../../modules/nf-core/samtools/index/main'
+include { SAMTOOLS_SORT as SAMTOOLS_SORT_MT } from '../../../modules/nf-core/samtools/sort/main'
+
+workflow ALIGN_MT {
+ take:
+ ch_fastq // channel: [mandatory] [ val(meta), [ path(reads) ] ]
+ ch_ubam // channel: [mandatory] [ val(meta), path(bam) ]
+ ch_bwaindex // channel: [mandatory for sentieon] [ val(meta), path(index) ]
+ ch_bwamem2index // channel: [mandatory for bwamem2] [ val(meta), path(index) ]
+ ch_fasta // channel: [mandatory] [ val(meta), path(fasta) ]
+ ch_dict // channel: [mandatory] [ val(meta), path(dict) ]
+ ch_fai // channel: [mandatory] [ val(meta), path(fai) ]
+
+ main:
+ ch_versions = Channel.empty()
+
+ BWAMEM2_MEM_MT (ch_fastq, ch_bwamem2index, true)
+
+ SENTIEON_BWAMEM_MT ( ch_fastq, ch_bwaindex, ch_fasta, ch_fai )
+
+ Channel.empty()
+ .mix(BWAMEM2_MEM_MT.out.bam, SENTIEON_BWAMEM_MT.out.bam_and_bai.map{ meta, bam, bai -> [meta, bam] })
+ .join(ch_ubam, failOnMismatch:true, failOnDuplicate:true)
+ .set {ch_bam_ubam}
+
+ GATK4_MERGEBAMALIGNMENT_MT (ch_bam_ubam, ch_fasta, ch_dict)
+
+ PICARD_ADDORREPLACEREADGROUPS_MT (GATK4_MERGEBAMALIGNMENT_MT.out.bam)
+
+ PICARD_MARKDUPLICATES_MT (PICARD_ADDORREPLACEREADGROUPS_MT.out.bam, ch_fasta, ch_fai)
+
+ SAMTOOLS_SORT_MT (PICARD_MARKDUPLICATES_MT.out.bam)
+
+ SAMTOOLS_INDEX_MT(SAMTOOLS_SORT_MT.out.bam)
+
+ ch_versions = ch_versions.mix(BWAMEM2_MEM_MT.out.versions.first())
+ ch_versions = ch_versions.mix(GATK4_MERGEBAMALIGNMENT_MT.out.versions.first())
+ ch_versions = ch_versions.mix(PICARD_ADDORREPLACEREADGROUPS_MT.out.versions.first())
+ ch_versions = ch_versions.mix(PICARD_MARKDUPLICATES_MT.out.versions.first())
+ ch_versions = ch_versions.mix(SAMTOOLS_SORT_MT.out.versions.first())
+ ch_versions = ch_versions.mix(SAMTOOLS_INDEX_MT.out.versions.first())
+
+ emit:
+ marked_bam = SAMTOOLS_SORT_MT.out.bam // channel: [ val(meta), path(bam) ]
+ marked_bai = SAMTOOLS_INDEX_MT.out.bai // channel: [ val(meta), path(bai) ]
+ versions = ch_versions // channel: [ path(versions.yml) ]
+}
diff --git a/subworkflows/local/convert_mt_bam_to_fastq.nf b/subworkflows/local/convert_mt_bam_to_fastq.nf
new file mode 100644
index 00000000..ca19ea82
--- /dev/null
+++ b/subworkflows/local/convert_mt_bam_to_fastq.nf
@@ -0,0 +1,36 @@
+//
+// Prepare bam files for MT allignment
+//
+
+include { GATK4_PRINTREADS as GATK4_PRINTREADS_MT } from '../../modules/nf-core/gatk4/printreads/main'
+include { GATK4_REVERTSAM as GATK4_REVERTSAM_MT } from '../../modules/nf-core/gatk4/revertsam/main'
+include { GATK4_SAMTOFASTQ as GATK4_SAMTOFASTQ_MT } from '../../modules/nf-core/gatk4/samtofastq/main'
+
+workflow CONVERT_MT_BAM_TO_FASTQ {
+ take:
+ ch_bam_bai // channel: [mandatory] [ val(meta), path(bam), path(bai) ]
+ ch_genome_fasta // channel: [mandatory] [ val(meta), path(fasta) ]
+ ch_genome_fai // channel: [mandatory] [ val(meta), path(fai) ]
+ ch_genome_dict // channel: [mandatory] [ val(meta), path(dict) ]
+
+ main:
+ ch_versions = Channel.empty()
+
+ // Outputs bam containing only MT
+ GATK4_PRINTREADS_MT ( ch_bam_bai, ch_genome_fasta, ch_genome_fai, ch_genome_dict )
+
+ // Removes alignment information
+ GATK4_REVERTSAM_MT ( GATK4_PRINTREADS_MT.out.bam )
+
+ // Outputs fastq files
+ GATK4_SAMTOFASTQ_MT ( GATK4_REVERTSAM_MT.out.bam )
+
+ ch_versions = ch_versions.mix(GATK4_PRINTREADS_MT.out.versions.first())
+ ch_versions = ch_versions.mix(GATK4_REVERTSAM_MT.out.versions.first())
+ ch_versions = ch_versions.mix(GATK4_SAMTOFASTQ_MT.out.versions.first())
+
+ emit:
+ fastq = GATK4_SAMTOFASTQ_MT.out.fastq // channel: [ val(meta), [ path(fastq) ] ]
+ bam = GATK4_REVERTSAM_MT.out.bam // channel: [ val(meta), path(bam) ]
+ versions = ch_versions // channel: [ path(versions.yml) ]
+}
diff --git a/workflows/raredisease.nf b/workflows/raredisease.nf
index a2966fe7..ce35c6bb 100644
--- a/workflows/raredisease.nf
+++ b/workflows/raredisease.nf
@@ -296,6 +296,12 @@ workflow RAREDISEASE {
ch_genome_fai,
ch_genome_bwaindex,
ch_genome_bwamem2index,
+ ch_genome_dictionary,
+ ch_mtshift_bwaindex,
+ ch_mtshift_bwamem2index,
+ ch_mtshift_fasta,
+ ch_mtshift_dictionary,
+ ch_mtshift_fai,
params.platform
)
.set { ch_mapped }
@@ -303,9 +309,9 @@ workflow RAREDISEASE {
// BAM QUALITY CHECK
QC_BAM (
- ch_mapped.marked_bam,
- ch_mapped.marked_bai,
- ch_mapped.bam_bai,
+ ch_mapped.genome_marked_bam,
+ ch_mapped.genome_marked_bai,
+ ch_mapped.genome_bam_bai,
ch_genome_fasta,
ch_genome_fai,
ch_bait_intervals,
@@ -318,7 +324,7 @@ workflow RAREDISEASE {
// EXPANSIONHUNTER AND STRANGER
CALL_REPEAT_EXPANSIONS (
- ch_mapped.bam_bai,
+ ch_mapped.genome_bam_bai,
ch_variant_catalog,
ch_case_info,
ch_genome_fasta,
@@ -327,12 +333,12 @@ workflow RAREDISEASE {
ch_versions = ch_versions.mix(CALL_REPEAT_EXPANSIONS.out.versions)
// STEP 1.7: SMNCOPYNUMBERCALLER
- ch_mapped.bam_bai
+ ch_mapped.genome_bam_bai
.collect{it[1]}
.toList()
.set { ch_bam_list }
- ch_mapped.bam_bai
+ ch_mapped.genome_bam_bai
.collect{it[2]}
.toList()
.set { ch_bai_list }
@@ -349,7 +355,7 @@ workflow RAREDISEASE {
// STEP 2: VARIANT CALLING
CALL_SNV (
- ch_mapped.bam_bai,
+ ch_mapped.genome_bam_bai,
ch_genome_fasta,
ch_genome_fai,
ch_dbsnp,
@@ -361,9 +367,9 @@ workflow RAREDISEASE {
ch_versions = ch_versions.mix(CALL_SNV.out.versions)
CALL_STRUCTURAL_VARIANTS (
- ch_mapped.marked_bam,
- ch_mapped.marked_bai,
- ch_mapped.bam_bai,
+ ch_mapped.genome_marked_bam,
+ ch_mapped.genome_marked_bai,
+ ch_mapped.genome_bam_bai,
ch_genome_bwaindex,
ch_genome_fasta,
ch_genome_fai,
@@ -387,7 +393,7 @@ workflow RAREDISEASE {
// GENS
if (params.gens_switch) {
GENS (
- ch_mapped.bam_bai,
+ ch_mapped.genome_bam_bai,
CALL_SNV.out.vcf,
ch_genome_fasta,
ch_genome_fai,
@@ -439,7 +445,7 @@ workflow RAREDISEASE {
if (!params.skip_mt_analysis) {
ANALYSE_MT (
- ch_mapped.bam_bai,
+ ch_mapped.genome_bam_bai,
ch_cadd_header,
ch_cadd_resources,
ch_genome_bwaindex,
From 1bdbbf68f4bbe23af14ab56e574c7530f1dd59c3 Mon Sep 17 00:00:00 2001
From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com>
Date: Wed, 23 Aug 2023 22:17:05 +0200
Subject: [PATCH 02/22] comment
---
subworkflows/local/alignment/align_MT.nf | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/subworkflows/local/alignment/align_MT.nf b/subworkflows/local/alignment/align_MT.nf
index 5ae8ace7..17ecc362 100644
--- a/subworkflows/local/alignment/align_MT.nf
+++ b/subworkflows/local/alignment/align_MT.nf
@@ -1,5 +1,5 @@
//
-// Align and call MT
+// Align MT
//
include { SENTIEON_BWAMEM as SENTIEON_BWAMEM_MT } from '../../../modules/nf-core/sentieon/bwamem/main'
From 698e067bdbb2e28d8ee72973a342fcb65a0dd5c8 Mon Sep 17 00:00:00 2001
From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com>
Date: Thu, 24 Aug 2023 00:37:05 +0200
Subject: [PATCH 03/22] update versions
---
subworkflows/local/align.nf | 13 ++++++++-----
1 file changed, 8 insertions(+), 5 deletions(-)
diff --git a/subworkflows/local/align.nf b/subworkflows/local/align.nf
index f08ae7df..a705f16e 100644
--- a/subworkflows/local/align.nf
+++ b/subworkflows/local/align.nf
@@ -75,17 +75,20 @@ workflow ALIGN {
ch_mtshift_fai
)
- ch_mt_marked_bam = Channel.empty().mix(ALIGN_MT.out.marked_bam, ALIGN_MT.out.marked_bam)
- ch_mt_marked_bai = Channel.empty().mix(ALIGN_MT.out.marked_bai, ALIGN_MT.out.marked_bai)
+ ch_mt_marked_bam = ALIGN_MT.out.marked_bam
+ ch_mt_marked_bai = ALIGN_MT.out.marked_bai
ch_mt_bam_bai = ch_mt_marked_bam.join(ch_mt_marked_bai, failOnMismatch:true, failOnDuplicate:true)
- ch_mtshift_marked_bam = Channel.empty().mix(ALIGN_MT_SHIFT.out.marked_bam, ALIGN_MT_SHIFT.out.marked_bam)
- ch_mtshift_marked_bai = Channel.empty().mix(ALIGN_MT_SHIFT.out.marked_bai, ALIGN_MT_SHIFT.out.marked_bai)
+ ch_mtshift_marked_bam = ALIGN_MT_SHIFT.out.marked_bam
+ ch_mtshift_marked_bai = ALIGN_MT_SHIFT.out.marked_bai
ch_mtshift_bam_bai = ch_mtshift_marked_bam.join(ch_mtshift_marked_bai, failOnMismatch:true, failOnDuplicate:true)
SAMTOOLS_VIEW( ch_genome_bam_bai, ch_genome_fasta, [] )
- ch_versions = Channel.empty().mix(ALIGN_BWAMEM2.out.versions, ALIGN_SENTIEON.out.versions)
+ ch_versions = Channel.empty().mix(ALIGN_BWAMEM2.out.versions,
+ ALIGN_SENTIEON.out.versions,
+ ALIGN_MT.out.versions,
+ ALIGN_MT_SHIFT.out.versions)
emit:
genome_marked_bam = ch_genome_marked_bam // channel: [ val(meta), path(bam) ]
From 97796fa2cf2af16d314bf84f0b4f6c988970c9d8 Mon Sep 17 00:00:00 2001
From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com>
Date: Thu, 24 Aug 2023 00:38:15 +0200
Subject: [PATCH 04/22] mt snv calling
---
subworkflows/local/call_snv.nf | 84 ++++++++++++++-----
.../local/variant_calling/call_snv_MT.nf | 50 +++++++++++
workflows/raredisease.nf | 13 ++-
3 files changed, 124 insertions(+), 23 deletions(-)
create mode 100644 subworkflows/local/variant_calling/call_snv_MT.nf
diff --git a/subworkflows/local/call_snv.nf b/subworkflows/local/call_snv.nf
index 7b63825e..75be4080 100644
--- a/subworkflows/local/call_snv.nf
+++ b/subworkflows/local/call_snv.nf
@@ -2,21 +2,31 @@
// call Single-nucleotide Varinats
//
-include { CALL_SNV_DEEPVARIANT } from './variant_calling/call_snv_deepvariant'
-include { CALL_SNV_SENTIEON } from './variant_calling/call_snv_sentieon'
-include { GATK4_SELECTVARIANTS } from '../../modules/nf-core/gatk4/selectvariants/main'
-
+include { CALL_SNV_DEEPVARIANT } from './variant_calling/call_snv_deepvariant'
+include { CALL_SNV_SENTIEON } from './variant_calling/call_snv_sentieon'
+include { CALL_SNV_MT } from './variant_calling/call_snv_MT'
+include { CALL_SNV_MT as CALL_SNV_MT_SHIFT } from './variant_calling/call_snv_MT'
+include { PICARD_LIFTOVERVCF } from '../../modules/nf-core/picard/liftovervcf/main'
workflow CALL_SNV {
take:
- ch_bam_bai // channel: [mandatory] [ val(meta), path(bam), path(bai) ]
- ch_genome_fasta // channel: [mandatory] [ val(meta), path(fasta) ]
- ch_genome_fai // channel: [mandatory] [ val(meta), path(fai) ]
- ch_known_dbsnp // channel: [optional] [ val(meta), path(vcf) ]
- ch_known_dbsnp_tbi // channel: [optional] [ val(meta), path(tbi) ]
- ch_call_interval // channel: [mandatory] [ path(intervals) ]
- ch_ml_model // channel: [mandatory] [ path(model) ]
- ch_case_info // channel: [mandatory] [ val(case_info) ]
+ ch_genome_bam_bai // channel: [mandatory] [ val(meta), path(bam), path(bai) ]
+ ch_mt_bam_bai // channel: [mandatory] [ val(meta), path(bam), path(bai) ]
+ ch_mtshift_bam_bai // channel: [mandatory] [ val(meta), path(bam), path(bai) ]
+ ch_genome_fasta // channel: [mandatory] [ val(meta), path(fasta) ]
+ ch_genome_fai // channel: [mandatory] [ val(meta), path(fai) ]
+ ch_genome_dictionary // channel: [mandatory] [ val(meta), path(dict) ]
+ ch_mt_intervals // channel: [optional] [ path(interval_list) ]
+ ch_mtshift_fasta // channel: [optional] [ val(meta), path(fasta) ]
+ ch_mtshift_fai // channel: [optional] [ val(meta), path(fai) ]
+ ch_mtshift_dictionary // channel: [optional] [ val(meta), path(dict) ]
+ ch_mtshift_intervals // channel: [optional] [ path(interval_list) ]
+ ch_mtshift_backchain // channel: [mandatory] [ val(meta), path(back_chain) ]
+ ch_dbsnp // channel: [optional] [ val(meta), path(vcf) ]
+ ch_dbsnp_tbi // channel: [optional] [ val(meta), path(tbi) ]
+ ch_call_interval // channel: [mandatory] [ path(intervals) ]
+ ch_ml_model // channel: [mandatory] [ path(model) ]
+ ch_case_info // channel: [mandatory] [ val(case_info) ]
main:
ch_versions = Channel.empty()
@@ -24,31 +34,63 @@ workflow CALL_SNV {
ch_tabix = Channel.empty()
CALL_SNV_DEEPVARIANT ( // triggered only when params.variant_caller is set as deepvariant
- ch_bam_bai,
+ ch_genome_bam_bai,
ch_genome_fasta,
ch_genome_fai,
ch_case_info
)
CALL_SNV_SENTIEON( // triggered only when params.variant_caller is set as sentieon
- ch_bam_bai,
+ ch_genome_bam_bai,
ch_genome_fasta,
ch_genome_fai,
- ch_known_dbsnp,
- ch_known_dbsnp_tbi,
+ ch_dbsnp,
+ ch_dbsnp_tbi,
ch_call_interval,
ch_ml_model,
ch_case_info
)
- ch_vcf = Channel.empty().mix(CALL_SNV_DEEPVARIANT.out.vcf, CALL_SNV_SENTIEON.out.vcf)
- ch_tabix = Channel.empty().mix(CALL_SNV_DEEPVARIANT.out.tabix, CALL_SNV_SENTIEON.out.tabix)
+ CALL_SNV_MT(
+ ch_mt_bam_bai,
+ ch_genome_fasta,
+ ch_genome_fai,
+ ch_genome_dictionary,
+ ch_mt_intervals
+ )
+
+ CALL_SNV_MT_SHIFT(
+ ch_mtshift_bam_bai,
+ ch_mtshift_fasta,
+ ch_mtshift_fai,
+ ch_mtshift_dictionary,
+ ch_mtshift_intervals
+ )
+
+ // LIFTOVER VCF FROM REFERENCE MT TO SHIFTED MT
+ PICARD_LIFTOVERVCF (
+ CALL_SNV_MT_SHIFT.out.vcf,
+ ch_genome_dictionary,
+ ch_genome_fasta,
+ ch_mtshift_backchain,
+ )
+
+ ch_genome_vcf = Channel.empty().mix(CALL_SNV_DEEPVARIANT.out.vcf, CALL_SNV_SENTIEON.out.vcf)
+ ch_genome_tabix = Channel.empty().mix(CALL_SNV_DEEPVARIANT.out.tabix, CALL_SNV_SENTIEON.out.tabix)
+
+ ch_mt_vcf = CALL_SNV_MT.out.vcf
+ ch_mtshift_vcf = PICARD_LIFTOVERVCF.out.vcf_lifted
ch_versions = ch_versions.mix(CALL_SNV_DEEPVARIANT.out.versions)
ch_versions = ch_versions.mix(CALL_SNV_SENTIEON.out.versions)
+ ch_versions = ch_versions.mix(CALL_SNV_MT.out.versions)
+ ch_versions = ch_versions.mix(CALL_SNV_MT_SHIFT.out.versions)
+ ch_versions = ch_versions.mix(PICARD_LIFTOVERVCF.out.versions.first())
emit:
- vcf = ch_vcf // channel: [ val(meta), path(vcf) ]
- tabix = ch_tabix // channel: [ val(meta), path(tbi) ]
- versions = ch_versions // channel: [ path(versions.yml) ]
+ genome_vcf = ch_genome_vcf // channel: [ val(meta), path(vcf) ]
+ genome_tabix = ch_genome_tabix // channel: [ val(meta), path(tbi) ]
+ mt_vcf = ch_mt_vcf // channel: [ val(meta), path(vcf) ]
+ mtshift_vcf = ch_mtshift_vcf // channel: [ val(meta), path(vcf) ]
+ versions = ch_versions // channel: [ path(versions.yml) ]
}
diff --git a/subworkflows/local/variant_calling/call_snv_MT.nf b/subworkflows/local/variant_calling/call_snv_MT.nf
new file mode 100644
index 00000000..e91718ad
--- /dev/null
+++ b/subworkflows/local/variant_calling/call_snv_MT.nf
@@ -0,0 +1,50 @@
+//
+// Call SNV MT
+//
+
+include { HAPLOCHECK as HAPLOCHECK_MT } from '../../../modules/nf-core/haplocheck/main'
+include { GATK4_MUTECT2 as GATK4_MUTECT2_MT } from '../../../modules/nf-core/gatk4/mutect2/main'
+include { GATK4_FILTERMUTECTCALLS as GATK4_FILTERMUTECTCALLS_MT } from '../../../modules/nf-core/gatk4/filtermutectcalls/main'
+include { MT_DELETION } from '../../../modules/local/mt_deletion_script'
+include { EKLIPSE as EKLIPSE_MT } from '../../../modules/nf-core/eklipse/main'
+
+workflow CALL_SNV_MT {
+ take:
+ ch_bam_bai // channel: [mandatory] [ val(meta), path(bam), path(bai) ]
+ ch_fasta // channel: [mandatory] [ val(meta), path(fasta) ]
+ ch_fai // channel: [mandatory] [ val(meta), path(fai) ]
+ ch_dict // channel: [mandatory] [ val(meta), path(dict) ]
+ ch_intervals // channel: [mandatory] [ path(interval_list) ]
+
+ main:
+ ch_versions = Channel.empty()
+
+ ch_bam_bai_int = ch_bam_bai.combine(ch_intervals)
+
+ GATK4_MUTECT2_MT (ch_bam_bai_int, ch_fasta, ch_fai, ch_dict, [], [], [],[])
+
+ HAPLOCHECK_MT (GATK4_MUTECT2_MT.out.vcf)
+
+ // Filter Mutect2 calls
+ ch_mutect_vcf = GATK4_MUTECT2_MT.out.vcf.join(GATK4_MUTECT2_MT.out.tbi, failOnMismatch:true, failOnDuplicate:true)
+ ch_mutect_out = ch_mutect_vcf.join(GATK4_MUTECT2_MT.out.stats, failOnMismatch:true, failOnDuplicate:true)
+ ch_to_filt = ch_mutect_out.map {
+ meta, vcf, tbi, stats ->
+ return [meta, vcf, tbi, stats, [], [], [], []]
+ }
+
+ GATK4_FILTERMUTECTCALLS_MT (ch_to_filt, ch_fasta, ch_fai, ch_dict)
+
+ ch_versions = ch_versions.mix(GATK4_MUTECT2_MT.out.versions.first())
+ ch_versions = ch_versions.mix(HAPLOCHECK_MT.out.versions.first())
+ ch_versions = ch_versions.mix(GATK4_FILTERMUTECTCALLS_MT.out.versions.first())
+
+ emit:
+ vcf = GATK4_FILTERMUTECTCALLS_MT.out.vcf // channel: [ val(meta), path(vcf) ]
+ tbi = GATK4_FILTERMUTECTCALLS_MT.out.tbi // channel: [ val(meta), path(tbi) ]
+ stats = GATK4_MUTECT2_MT.out.stats // channel: [ val(meta), path(stats) ]
+ filt_stats = GATK4_FILTERMUTECTCALLS_MT.out.stats // channel: [ val(meta), path(tsv) ]
+ txt = HAPLOCHECK_MT.out.txt // channel: [ val(meta), path(txt) ]
+ html = HAPLOCHECK_MT.out.html // channel: [ val(meta), path(html) ]
+ versions = ch_versions // channel: [ path(versions.yml) ]
+}
diff --git a/workflows/raredisease.nf b/workflows/raredisease.nf
index ce35c6bb..5112553b 100644
--- a/workflows/raredisease.nf
+++ b/workflows/raredisease.nf
@@ -356,8 +356,17 @@ workflow RAREDISEASE {
// STEP 2: VARIANT CALLING
CALL_SNV (
ch_mapped.genome_bam_bai,
+ ch_mapped.mt_bam_bai,
+ ch_mapped.mtshift_bam_bai,
ch_genome_fasta,
ch_genome_fai,
+ ch_genome_dictionary,
+ ch_mt_intervals,
+ ch_mtshift_fasta,
+ ch_mtshift_fai,
+ ch_mtshift_dictionary,
+ ch_mtshift_intervals,
+ ch_mtshift_backchain,
ch_dbsnp,
ch_dbsnp_tbi,
ch_call_interval,
@@ -385,7 +394,7 @@ workflow RAREDISEASE {
// ped correspondence, sex check, ancestry check
PEDDY_CHECK (
- CALL_SNV.out.vcf.join(CALL_SNV.out.tabix, failOnMismatch:true, failOnDuplicate:true),
+ CALL_SNV.out.genome_vcf.join(CALL_SNV.out.genome_tabix, failOnMismatch:true, failOnDuplicate:true),
ch_pedfile
)
ch_versions = ch_versions.mix(PEDDY_CHECK.out.versions)
@@ -477,7 +486,7 @@ workflow RAREDISEASE {
if (!params.skip_snv_annotation) {
- ch_vcf = CALL_SNV.out.vcf.join(CALL_SNV.out.tabix, failOnMismatch:true, failOnDuplicate:true)
+ ch_vcf = CALL_SNV.out.genome_vcf.join(CALL_SNV.out.genome_tabix, failOnMismatch:true, failOnDuplicate:true)
if (!params.skip_mt_analysis) {
ch_vcf
From a4320bb64381556ec9777a7290df409fb3defa2e Mon Sep 17 00:00:00 2001
From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com>
Date: Thu, 24 Aug 2023 15:37:10 +0200
Subject: [PATCH 05/22] SV config
---
conf/modules/align_MT.config | 98 +++++++++++++++++++
conf/modules/call_snv_MT.config | 43 ++++++++
conf/modules/call_sv_MT.config | 51 ++++++++++
nextflow.config | 3 +
.../local/call_structural_variants.nf | 31 +++---
.../local/variant_calling/call_sv_MT.nf | 29 ++++++
workflows/raredisease.nf | 3 +
7 files changed, 247 insertions(+), 11 deletions(-)
create mode 100644 conf/modules/align_MT.config
create mode 100644 conf/modules/call_snv_MT.config
create mode 100644 conf/modules/call_sv_MT.config
create mode 100644 subworkflows/local/variant_calling/call_sv_MT.nf
diff --git a/conf/modules/align_MT.config b/conf/modules/align_MT.config
new file mode 100644
index 00000000..a01bfc60
--- /dev/null
+++ b/conf/modules/align_MT.config
@@ -0,0 +1,98 @@
+/*
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ Config file for defining DSL2 per module options and publishing paths
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ Available keys to override module options:
+ ext.args = Additional arguments appended to command in module.
+ ext.args2 = Second set of arguments appended to command in module (multi-tool modules).
+ ext.args3 = Third set of arguments appended to command in module (multi-tool modules).
+ ext.prefix = File name prefix for output files.
+ ext.when = Conditional clause
+----------------------------------------------------------------------------------------
+*/
+
+//
+// Mitochondria alignment options
+//
+
+process {
+
+ withName: '.*ALIGN_MT:BWAMEM2_MEM_MT' {
+ ext.when = { !params.skip_mt_analysis && !(params.analysis_type == "wes") && params.aligner == "bwamem2" }
+ ext.args = { "-M -K 100000000 -R ${meta.read_group}" }
+ }
+
+ withName: '.*ALIGN_MT:SENTIEON_BWAMEM_MT' {
+ ext.args = { "-M -K 10000000 -R ${meta.read_group}" }
+ ext.when = { !params.skip_mt_analysis && !(params.analysis_type == "wes") && params.aligner == "sentieon" }
+ ext.prefix = { "${meta.id}.sorted" }
+ }
+
+ withName: '.*ALIGN_MT:GATK4_MERGEBAMALIGNMENT_MT' {
+ ext.args = '--CREATE_INDEX true --MAX_GAPS -1 --SORT_ORDER queryname --INCLUDE_SECONDARY_ALIGNMENTS false --PAIRED_RUN false --VALIDATION_STRINGENCY LENIENT'
+ ext.prefix = { "${meta.id}_merged" }
+ }
+
+ withName: '.*ALIGN_MT:PICARD_ADDORREPLACEREADGROUPS_MT' {
+ ext.args = { [
+ "--VALIDATION_STRINGENCY LENIENT",
+ "--RGLB lib",
+ "--RGPL ILLUMINA",
+ "--RGPU barcode",
+ "--RGSM ${meta.id}"
+ ].join(' ').trim() }
+ }
+
+ withName: '.*ALIGN_MT:PICARD_MARKDUPLICATES_MT' {
+ ext.args = '--VALIDATION_STRINGENCY LENIENT --CREATE_INDEX true'
+ ext.prefix = { "${meta.id}_markduplicates" }
+ }
+
+ withName: '.*ALIGN_MT:SAMTOOLS_SORT_MT' {
+ ext.prefix = { "${meta.id}_sorted" }
+ }
+
+}
+
+//
+// Shifted mitochondria alignment options
+//
+
+process {
+
+ withName: '.*ALIGN_MT_SHIFT:BWAMEM2_MEM_MT' {
+ ext.when = { !params.skip_mt_analysis && !(params.analysis_type == "wes") && params.aligner == "bwamem2" }
+ ext.args = { "-M -K 100000000 -R ${meta.read_group}" }
+ }
+
+ withName: '.*ALIGN_MT_SHIFT:SENTIEON_BWAMEM_MT' {
+ ext.args = { "-M -K 10000000 -R ${meta.read_group}" }
+ ext.when = { !params.skip_mt_analysis && !(params.analysis_type == "wes") && params.aligner == "sentieon" }
+ ext.prefix = { "${meta.id}.sorted" }
+ }
+
+ withName: '.*ALIGN_MT_SHIFT:GATK4_MERGEBAMALIGNMENT_MT' {
+ ext.args = '--CREATE_INDEX true --MAX_GAPS -1 --SORT_ORDER queryname --INCLUDE_SECONDARY_ALIGNMENTS false --PAIRED_RUN false --VALIDATION_STRINGENCY LENIENT'
+ ext.prefix = { "${meta.id}_merged_shifted" }
+ }
+
+ withName: '.*ALIGN_MT_SHIFT:PICARD_ADDORREPLACEREADGROUPS_MT' {
+ ext.args = { [
+ "--VALIDATION_STRINGENCY LENIENT",
+ "--RGLB lib",
+ "--RGPL ${params.platform}",
+ "--RGPU barcode",
+ "--RGSM ${meta.id}"
+ ].join(' ').trim() }
+ }
+
+ withName: '.*ALIGN_MT_SHIFT:PICARD_MARKDUPLICATES_MT' {
+ ext.args = '--VALIDATION_STRINGENCY LENIENT --CREATE_INDEX true'
+ ext.prefix = { "${meta.id}_markduplicates_shifted" }
+ }
+
+ withName: '.*ALIGN_MT_SHIFT:SAMTOOLS_SORT_MT' {
+ ext.prefix = { "${meta.id}_sorted_shifted" }
+ }
+
+}
diff --git a/conf/modules/call_snv_MT.config b/conf/modules/call_snv_MT.config
new file mode 100644
index 00000000..420929e5
--- /dev/null
+++ b/conf/modules/call_snv_MT.config
@@ -0,0 +1,43 @@
+/*
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ Config file for defining DSL2 per module options and publishing paths
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ Available keys to override module options:
+ ext.args = Additional arguments appended to command in module.
+ ext.args2 = Second set of arguments appended to command in module (multi-tool modules).
+ ext.args3 = Third set of arguments appended to command in module (multi-tool modules).
+ ext.prefix = File name prefix for output files.
+ ext.when = Conditional clause
+----------------------------------------------------------------------------------------
+*/
+
+//
+// Call SNVs in mitochondria
+//
+
+process {
+
+ withName: '.*CALL_SNV_MT:GATK4_MUTECT2_MT' {
+ ext.args = '--mitochondria-mode TRUE'
+ }
+
+ withName: '.*CALL_SNV_MT:GATK4_FILTERMUTECTCALLS_MT' {
+ ext.prefix = { "${meta.id}_filtered" }
+ }
+}
+
+//
+// Call SNVs in shifted mitochondria
+//
+
+process {
+
+ withName: '.*CALL_SNV_MT_SHIFT:GATK4_MUTECT2_MT' {
+ ext.args = '--mitochondria-mode TRUE'
+ }
+
+ withName: '.*CALL_SNV_MT_SHIFT:GATK4_FILTERMUTECTCALLS_MT' {
+ ext.prefix = { "${meta.id}_filtered_shifted" }
+ }
+
+}
diff --git a/conf/modules/call_sv_MT.config b/conf/modules/call_sv_MT.config
new file mode 100644
index 00000000..6b769e5c
--- /dev/null
+++ b/conf/modules/call_sv_MT.config
@@ -0,0 +1,51 @@
+/*
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ Config file for defining DSL2 per module options and publishing paths
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ Available keys to override module options:
+ ext.args = Additional arguments appended to command in module.
+ ext.args2 = Second set of arguments appended to command in module (multi-tool modules).
+ ext.args3 = Third set of arguments appended to command in module (multi-tool modules).
+ ext.prefix = File name prefix for output files.
+ ext.when = Conditional clause
+----------------------------------------------------------------------------------------
+*/
+
+//
+// Call SV in mitochondria
+//
+
+process {
+
+ withName: '.*CALL_SV_MT:MT_DELETION' {
+ ext.args = '-s --insert-size 16000'
+ publishDir = [
+ path: { "${params.outdir}/mt_sv" },
+ mode: params.publish_dir_mode,
+ saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
+ ]
+ }
+
+ withName: '.*CALL_SV_MT:EKLIPSE' {
+ publishDir = [
+ path: { "${params.outdir}/mt_sv" },
+ mode: params.publish_dir_mode,
+ saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
+ ]
+ }
+}
+
+//
+// Call SV in shifted mitochondria
+//
+
+process {
+
+ withName: '.*CALL_SV_MT_SHIFT:MT_DELETION' {
+ ext.when = false
+ }
+
+ withName: '.*CALL_SV_MT_SHIFT:EKLIPSE_MT' {
+ ext.when = false
+ }
+}
diff --git a/nextflow.config b/nextflow.config
index 04b1b670..1cd04d19 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -281,6 +281,9 @@ manifest {
includeConfig 'conf/modules/raredisease.config'
includeConfig 'conf/modules/align.config'
includeConfig 'conf/modules/analyse_MT.config'
+includeConfig 'conf/modules/align_MT.config'
+includeConfig 'conf/modules/call_snv_MT.config'
+includeConfig 'conf/modules/call_sv_MT.config'
includeConfig 'conf/modules/call_snv.config'
includeConfig 'conf/modules/call_structural_variants.config'
includeConfig 'conf/modules/annotate_snvs.config'
diff --git a/subworkflows/local/call_structural_variants.nf b/subworkflows/local/call_structural_variants.nf
index 65016260..9e749add 100644
--- a/subworkflows/local/call_structural_variants.nf
+++ b/subworkflows/local/call_structural_variants.nf
@@ -2,21 +2,26 @@
// A nested subworkflow to call structural variants.
//
-include { CALL_SV_MANTA } from './variant_calling/call_sv_manta'
-include { CALL_SV_TIDDIT } from './variant_calling/call_sv_tiddit'
-include { SVDB_MERGE } from '../../modules/nf-core/svdb/merge/main'
-include { CALL_SV_GERMLINECNVCALLER } from './variant_calling/call_sv_germlinecnvcaller'
-include { TABIX_TABIX } from '../../modules/nf-core/tabix/tabix/main'
+include { CALL_SV_MANTA } from './variant_calling/call_sv_manta'
+include { CALL_SV_MT } from './variant_calling/call_sv_MT'
+include { CALL_SV_MT as CALL_SV_MT_SHIFT } from './variant_calling/call_sv_MT'
+include { CALL_SV_TIDDIT } from './variant_calling/call_sv_tiddit'
+include { SVDB_MERGE } from '../../modules/nf-core/svdb/merge/main'
+include { CALL_SV_GERMLINECNVCALLER } from './variant_calling/call_sv_germlinecnvcaller'
+include { TABIX_TABIX } from '../../modules/nf-core/tabix/tabix/main'
workflow CALL_STRUCTURAL_VARIANTS {
take:
- ch_bam // channel: [mandatory] [ val(meta), path(bam) ]
- ch_bai // channel: [mandatory] [ val(meta), path(bai) ]
- ch_bam_bai // channel: [mandatory] [ val(meta), path(bam), path(bai) ]
+ ch_genome_bam // channel: [mandatory] [ val(meta), path(bam) ]
+ ch_genome_bai // channel: [mandatory] [ val(meta), path(bai) ]
+ ch_genome_bam_bai // channel: [mandatory] [ val(meta), path(bam), path(bai) ]
+ ch_mt_bam_bai // channel: [mandatory] [ val(meta), path(bam), path(bai) ]
+ ch_mtshift_bam_bai // channel: [mandatory] [ val(meta), path(bam), path(bai) ]
ch_bwa_index // channel: [mandatory] [ val(meta), path(index)]
ch_genome_fasta // channel: [mandatory] [ val(meta), path(fasta) ]
ch_genome_fai // channel: [mandatory] [ val(meta), path(fai) ]
+ ch_mtshift_fasta // channel: [mandatory] [ val(meta), path(fasta) ]
ch_case_info // channel: [mandatory] [ val(case_info) ]
ch_target_bed // channel: [mandatory for WES] [ val(meta), path(bed), path(tbi) ]
ch_genome_dictionary // channel: [optional; used by mandatory for GATK's cnvcaller][ val(meta), path(dict) ]
@@ -28,21 +33,25 @@ workflow CALL_STRUCTURAL_VARIANTS {
main:
ch_versions = Channel.empty()
- CALL_SV_MANTA (ch_bam, ch_bai, ch_genome_fasta, ch_genome_fai, ch_case_info, ch_target_bed)
+ CALL_SV_MANTA (ch_genome_bam, ch_genome_bai, ch_genome_fasta, ch_genome_fai, ch_case_info, ch_target_bed)
.diploid_sv_vcf
.collect{it[1]}
.set{ manta_vcf }
- CALL_SV_TIDDIT (ch_bam_bai, ch_genome_fasta, ch_bwa_index, ch_case_info)
+ CALL_SV_TIDDIT (ch_genome_bam_bai, ch_genome_fasta, ch_bwa_index, ch_case_info)
.vcf
.collect{it[1]}
.set { tiddit_vcf }
- CALL_SV_GERMLINECNVCALLER (ch_bam_bai, ch_genome_fasta, ch_genome_fai, ch_readcount_intervals, ch_genome_dictionary, ch_ploidy_model, ch_gcnvcaller_model)
+ CALL_SV_GERMLINECNVCALLER (ch_genome_bam_bai, ch_genome_fasta, ch_genome_fai, ch_readcount_intervals, ch_genome_dictionary, ch_ploidy_model, ch_gcnvcaller_model)
.genotyped_intervals_vcf
.collect{it[1]}
.set { gcnvcaller_vcf }
+ CALL_SV_MT (ch_mt_bam_bai, ch_genome_fasta)
+
+ CALL_SV_MT_SHIFT (ch_mtshift_bam_bai, ch_mtshift_fasta)
+
//merge
if (params.skip_cnv_calling) {
tiddit_vcf
diff --git a/subworkflows/local/variant_calling/call_sv_MT.nf b/subworkflows/local/variant_calling/call_sv_MT.nf
new file mode 100644
index 00000000..c8fdff45
--- /dev/null
+++ b/subworkflows/local/variant_calling/call_sv_MT.nf
@@ -0,0 +1,29 @@
+//
+// Call SV MT
+//
+
+include { MT_DELETION } from '../../../modules/local/mt_deletion_script'
+include { EKLIPSE } from '../../../modules/nf-core/eklipse/main'
+
+workflow CALL_SV_MT {
+ take:
+ ch_bam_bai // channel: [mandatory] [ val(meta), path(bam) ]
+ ch_fasta // channel: [mandatory] [ val(meta), path(fasta) ]
+
+ main:
+ ch_versions = Channel.empty()
+
+ EKLIPSE(ch_bam_bai,[])
+
+ MT_DELETION(ch_bam_bai, ch_fasta)
+
+ ch_versions = ch_versions.mix(EKLIPSE.out.versions.first())
+ ch_versions = ch_versions.mix(MT_DELETION.out.versions.first())
+
+ emit:
+ eklipse_del = EKLIPSE.out.deletions // channel: [ val(meta), path(csv) ]
+ eklipse_genes = EKLIPSE.out.genes // channel: [ val(meta), path(csv) ]
+ eklipse_circos = EKLIPSE.out.circos // channel: [ val(meta), path(png) ]
+ mt_del_result = MT_DELETION.out.mt_del_result // channel: [ val(meta), path(txt) ]
+ versions = ch_versions // channel: [ path(versions.yml) ]
+}
diff --git a/workflows/raredisease.nf b/workflows/raredisease.nf
index 5112553b..e88686e5 100644
--- a/workflows/raredisease.nf
+++ b/workflows/raredisease.nf
@@ -379,9 +379,12 @@ workflow RAREDISEASE {
ch_mapped.genome_marked_bam,
ch_mapped.genome_marked_bai,
ch_mapped.genome_bam_bai,
+ ch_mapped.mt_bam_bai,
+ ch_mapped.mtshift_bam_bai,
ch_genome_bwaindex,
ch_genome_fasta,
ch_genome_fai,
+ ch_mtshift_fasta,
ch_case_info,
ch_target_bed,
ch_genome_dictionary,
From 58d70e067373f37e9ae9c17233da6a9225fcb446 Mon Sep 17 00:00:00 2001
From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com>
Date: Thu, 24 Aug 2023 17:23:12 +0200
Subject: [PATCH 06/22] postprocess
---
...nvs.config => annotate_genome_snvs.config} | 28 ++---
conf/modules/postptocess_MT_calls.config | 43 +++++++
nextflow.config | 4 +-
...notate_snvs.nf => annotate_genome_snvs.nf} | 5 +-
subworkflows/local/annotate_mt_snvs.nf | 112 ++++++++++++++++++
subworkflows/local/call_snv.nf | 20 ++--
.../variant_calling/postprocess_MT_calls.nf | 112 ++++++++++++++++++
7 files changed, 295 insertions(+), 29 deletions(-)
rename conf/modules/{annotate_snvs.config => annotate_genome_snvs.config} (85%)
create mode 100644 conf/modules/postptocess_MT_calls.config
rename subworkflows/local/{annotate_snvs.nf => annotate_genome_snvs.nf} (98%)
create mode 100644 subworkflows/local/annotate_mt_snvs.nf
create mode 100644 subworkflows/local/variant_calling/postprocess_MT_calls.nf
diff --git a/conf/modules/annotate_snvs.config b/conf/modules/annotate_genome_snvs.config
similarity index 85%
rename from conf/modules/annotate_snvs.config
rename to conf/modules/annotate_genome_snvs.config
index 093e94a1..0cbf5496 100644
--- a/conf/modules/annotate_snvs.config
+++ b/conf/modules/annotate_genome_snvs.config
@@ -16,43 +16,43 @@
//
process {
- withName: '.*ANNOTATE_SNVS:.*' {
+ withName: '.*ANNOTATE_GENOME_SNVS:.*' {
ext.when = !params.skip_snv_annotation
}
- withName: '.*ANNOTATE_SNVS:VCFANNO' {
+ withName: '.*ANNOTATE_GENOME_SNVS:VCFANNO' {
ext.prefix = { "${meta.id}_vcfanno" }
publishDir = [
enabled: false
]
}
- withName: '.*ANNOTATE_SNVS:BCFTOOLS_ROH' {
+ withName: '.*ANNOTATE_GENOME_SNVS:BCFTOOLS_ROH' {
ext.args = { "--samples ${meta.probands.join(",")} --skip-indels " }
ext.prefix = { "${meta.id}_roh" }
}
- withName: '.*ANNOTATE_SNVS:RHOCALL_ANNOTATE' {
+ withName: '.*ANNOTATE_GENOME_SNVS:RHOCALL_ANNOTATE' {
ext.args = { "--v14 " }
ext.prefix = { "${meta.id}_rohann" }
}
- withName: '.*ANNOTATE_SNVS:VCFANNO' {
+ withName: '.*ANNOTATE_GENOME_SNVS:VCFANNO' {
ext.prefix = { "${meta.id}_rohann_vcfanno" }
}
- withName: '.*ANNOTATE_SNVS:UPD_SITES' {
+ withName: '.*ANNOTATE_GENOME_SNVS:UPD_SITES' {
ext.prefix = { "${meta.id}_rohann_vcfanno_upd_sites" }
ext.args = {"--af-tag GNOMADAF --proband ${meta.upd_child} --mother ${meta.mother} --father ${meta.father} sites"}
}
- withName: '.*ANNOTATE_SNVS:UPD_REGIONS' {
+ withName: '.*ANNOTATE_GENOME_SNVS:UPD_REGIONS' {
ext.prefix = { "${meta.id}_rohann_vcfanno_upd_regions" }
ext.args = {"--af-tag GNOMADAF --proband ${meta.upd_child} --mother ${meta.mother} --father ${meta.father} regions --min-size 5 --min-sites 1"}
ext.when = { !(workflow.profile.tokenize(',').intersect(['test', 'test_one_sample']).size() >= 1) || workflow.stubRun }
}
- withName: '.*ANNOTATE_SNVS:CHROMOGRAPH_SITES' {
+ withName: '.*ANNOTATE_GENOME_SNVS:CHROMOGRAPH_SITES' {
ext.prefix = { "${meta7.id}_rohann_vcfanno_upd_sites_chromograph" }
ext.args = { "--euploid" }
tag = {"${meta7.id}"}
@@ -63,7 +63,7 @@ process {
]
}
- withName: '.*ANNOTATE_SNVS:CHROMOGRAPH_REGIONS' {
+ withName: '.*ANNOTATE_GENOME_SNVS:CHROMOGRAPH_REGIONS' {
ext.prefix = { "${meta6.id}_rohann_vcfanno_upd_regions_chromograph" }
ext.args = { '--euploid' }
ext.when = { !(workflow.profile.tokenize(',').intersect(['test', 'test_one_sample']).size() >= 1) || workflow.stubRun }
@@ -75,16 +75,16 @@ process {
]
}
- withName: '.*ANNOTATE_SNVS:BCFTOOLS_VIEW' {
+ withName: '.*ANNOTATE_GENOME_SNVS:BCFTOOLS_VIEW' {
ext.prefix = { "${meta.id}_rohann_vcfanno_filter" }
ext.args = { '--output-type z --exclude "INFO/GNOMADAF > 0.70 | INFO/GNOMADAF_popmax > 0.70" ' }
}
- withName: '.*ANNOTATE_SNVS:GATK4_SELECTVARIANTS' {
+ withName: '.*ANNOTATE_GENOME_SNVS:GATK4_SELECTVARIANTS' {
ext.prefix = { "${meta.id}_${intervals.simpleName}" }
}
- withName: '.*ANNOTATE_SNVS:ENSEMBLVEP_SNV' {
+ withName: '.*ANNOTATE_GENOME_SNVS:ENSEMBLVEP_SNV' {
ext.prefix = { "${vcf.simpleName}_rohann_vcfanno_filter_vep" }
ext.args = [
'--dir_plugins vep_cache/Plugins',
@@ -103,7 +103,7 @@ process {
].join(' ')
}
- withName: '.*ANNOTATE_SNVS:BCFTOOLS_CONCAT' {
+ withName: '.*ANNOTATE_GENOME_SNVS:BCFTOOLS_CONCAT' {
ext.prefix = { "${meta.id}_rohann_vcfanno_filter_vep" }
publishDir = [
path: { "${params.outdir}/annotate_snv" },
@@ -112,7 +112,7 @@ process {
]
}
- withName: '.*ANNOTATE_SNVS:TABIX_BCFTOOLS_CONCAT' {
+ withName: '.*ANNOTATE_GENOME_SNVS:TABIX_BCFTOOLS_CONCAT' {
publishDir = [
path: { "${params.outdir}/annotate_snv" },
mode: params.publish_dir_mode,
diff --git a/conf/modules/postptocess_MT_calls.config b/conf/modules/postptocess_MT_calls.config
new file mode 100644
index 00000000..3119012e
--- /dev/null
+++ b/conf/modules/postptocess_MT_calls.config
@@ -0,0 +1,43 @@
+/*
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ Config file for defining DSL2 per module options and publishing paths
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ Available keys to override module options:
+ ext.args = Additional arguments appended to command in module.
+ ext.args2 = Second set of arguments appended to command in module (multi-tool modules).
+ ext.args3 = Third set of arguments appended to command in module (multi-tool modules).
+ ext.prefix = File name prefix for output files.
+ ext.when = Conditional clause
+----------------------------------------------------------------------------------------
+*/
+
+//
+// Mitochondrial annotation options
+//
+
+process {
+
+ withName: '.*POSTPROCESS_MT_CALLS:GATK4_MERGEVCFS_LIFT_UNLIFT_MT' {
+ ext.prefix = { "${meta.id}_merged" }
+ }
+
+ withName: '.*POSTPROCESS_MT_CALLS:GATK4_VARIANTFILTRATION_MT' {
+ ext.prefix = { "${meta.id}_filt" }
+ }
+
+ withName: '.*POSTPROCESS_MT_CALLS:SPLIT_MULTIALLELICS_MT' {
+ ext.args = '--output-type z --multiallelics -both'
+ ext.prefix = { "${meta.id}_split" }
+ }
+
+ withName: '.*POSTPROCESS_MT_CALLS:REMOVE_DUPLICATES_MT' {
+ ext.args = '--output-type z --rm-dup none'
+ ext.prefix = { "${meta.id}_split_rmdup" }
+ }
+
+ withName: '.*POSTPROCESS_MT_CALLS:BCFTOOLS_MERGE_MT' {
+ ext.args = '--output-type z'
+ ext.prefix = { "${meta.id}_merge_mt" }
+ }
+
+}
diff --git a/nextflow.config b/nextflow.config
index 1cd04d19..7c494c77 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -284,9 +284,11 @@ includeConfig 'conf/modules/analyse_MT.config'
includeConfig 'conf/modules/align_MT.config'
includeConfig 'conf/modules/call_snv_MT.config'
includeConfig 'conf/modules/call_sv_MT.config'
+includeConfig 'conf/modules/annotate_mt_snvs.config'
includeConfig 'conf/modules/call_snv.config'
+includeConfig 'conf/modules/postprocess_MT_calls.config'
includeConfig 'conf/modules/call_structural_variants.config'
-includeConfig 'conf/modules/annotate_snvs.config'
+includeConfig 'conf/modules/annotate_genome_snvs.config'
includeConfig 'conf/modules/annotate_structural_variants.config'
includeConfig 'conf/modules/align_and_call_MT.config'
includeConfig 'conf/modules/align_bwamem2.config'
diff --git a/subworkflows/local/annotate_snvs.nf b/subworkflows/local/annotate_genome_snvs.nf
similarity index 98%
rename from subworkflows/local/annotate_snvs.nf
rename to subworkflows/local/annotate_genome_snvs.nf
index bd5fdfb8..06267872 100644
--- a/subworkflows/local/annotate_snvs.nf
+++ b/subworkflows/local/annotate_genome_snvs.nf
@@ -1,5 +1,5 @@
//
-// A subworkflow to annotate snvs
+// A subworkflow to annotate snvs in the genome
//
include { VCFANNO } from '../../modules/nf-core/vcfanno/main'
@@ -20,7 +20,7 @@ include { TABIX_TABIX as TABIX_BCFTOOLS_VIEW } from '../../modules/nf-core/ta
include { GATK4_SELECTVARIANTS } from '../../modules/nf-core/gatk4/selectvariants/main'
include { ANNOTATE_CADD } from './annotation/annotate_cadd'
-workflow ANNOTATE_SNVS {
+workflow ANNOTATE_GENOME_SNVS {
take:
ch_vcf // channel: [mandatory] [ val(meta), path(vcf), path(tbi) ]
@@ -46,7 +46,6 @@ workflow ANNOTATE_SNVS {
RHOCALL_ANNOTATE (ch_vcf, BCFTOOLS_ROH.out.roh, [])
-
ZIP_TABIX_ROHCALL (RHOCALL_ANNOTATE.out.vcf)
ZIP_TABIX_ROHCALL.out.gz_tbi
diff --git a/subworkflows/local/annotate_mt_snvs.nf b/subworkflows/local/annotate_mt_snvs.nf
new file mode 100644
index 00000000..400a58c1
--- /dev/null
+++ b/subworkflows/local/annotate_mt_snvs.nf
@@ -0,0 +1,112 @@
+//
+// Merge and annotate MT
+//
+
+include { GATK4_MERGEVCFS as GATK4_MERGEVCFS_LIFT_UNLIFT_MT } from '../../../modules/nf-core/gatk4/mergevcfs/main'
+include { GATK4_VARIANTFILTRATION as GATK4_VARIANTFILTRATION_MT } from '../../../modules/nf-core/gatk4/variantfiltration/main'
+include { BCFTOOLS_NORM as SPLIT_MULTIALLELICS_MT } from '../../../modules/nf-core/bcftools/norm/main'
+include { TABIX_TABIX as TABIX_TABIX_MT } from '../../../modules/nf-core/tabix/tabix/main'
+include { BCFTOOLS_NORM as REMOVE_DUPLICATES_MT } from '../../../modules/nf-core/bcftools/norm/main'
+include { TABIX_TABIX as TABIX_TABIX_MT2 } from '../../../modules/nf-core/tabix/tabix/main'
+include { BCFTOOLS_MERGE as BCFTOOLS_MERGE_MT } from '../../../modules/nf-core/bcftools/merge/main'
+include { TABIX_TABIX as TABIX_TABIX_MERGE } from '../../../modules/nf-core/tabix/tabix/main'
+include { TABIX_TABIX as TABIX_TABIX_MT3 } from '../../../modules/nf-core/tabix/tabix/main'
+include { ENSEMBLVEP as ENSEMBLVEP_MT } from '../../../modules/local/ensemblvep/main'
+include { HAPLOGREP2_CLASSIFY as HAPLOGREP2_CLASSIFY_MT } from '../../../modules/nf-core/haplogrep2/classify/main'
+include { VCFANNO as VCFANNO_MT } from '../../../modules/nf-core/vcfanno/main'
+include { ANNOTATE_CADD } from '../annotation/annotate_cadd'
+include { TABIX_BGZIPTABIX as ZIP_TABIX_HMTNOTE } from '../../../modules/nf-core/tabix/bgziptabix/main'
+include { HMTNOTE_ANNOTATE } from '../../../modules/nf-core/hmtnote/annotate/main'
+
+workflow ANNOTATE_MT_SNVS {
+ take:
+ ch_vcf1 // channel: [mandatory] [ val(meta), path(vcf) ]
+ ch_vcf2 // channel: [mandatory] [ val(meta), path(vcf) ]
+ ch_cadd_header // channel: [mandatory] [ path(txt) ]
+ ch_cadd_resources // channel: [mandatory] [ path(annotation) ]
+ ch_genome_fasta // channel: [mandatory] [ val(meta), path(fasta) ]
+ ch_genome_dict // channel: [mandatory] [ val(meta), path(dict) ]
+ ch_genome_fai // channel: [mandatory] [ val(meta), path(fai) ]
+ ch_vcfanno_resources // channel: [mandatory] [ path(resources) ]
+ ch_vcfanno_toml // channel: [mandatory] [ path(toml) ]
+ val_vep_genome // string: [mandatory] GRCh37 or GRCh38
+ val_vep_cache_version // string: [mandatory] 107
+ ch_vep_cache // channel: [mandatory] [ path(cache) ]
+ ch_case_info // channel: [mandatory] [ val(case_info) ]
+
+ main:
+ ch_versions = Channel.empty()
+
+ // Annotating with CADD
+ ANNOTATE_CADD (
+ ch_annotation_in,
+ TABIX_TABIX_MERGE.out.tbi,
+ ch_cadd_header,
+ ch_cadd_resources
+ )
+
+ // Pick input for vep
+ ch_annotation_in
+ .combine(ANNOTATE_CADD.out.vcf.ifEmpty("null"))
+ .branch { it ->
+ merged: it[2].equals("null")
+ return [it[0], it[1]]
+ cadd: !(it[2].equals("null"))
+ return [it[2], it[3]]
+ }
+ .set { ch_for_mix }
+ ch_vep_in = ch_for_mix.merged.mix(ch_for_mix.cadd)
+
+ // Annotating with ensembl Vep
+ ENSEMBLVEP_MT(
+ ch_vep_in,
+ ch_genome_fasta,
+ val_vep_genome,
+ "homo_sapiens",
+ val_vep_cache_version,
+ ch_vep_cache,
+ []
+ )
+
+ // Running vcfanno
+ TABIX_TABIX_MT3(ENSEMBLVEP_MT.out.vcf_gz)
+ ENSEMBLVEP_MT.out.vcf_gz
+ .join(TABIX_TABIX_MT3.out.tbi, failOnMismatch:true, failOnDuplicate:true)
+ .map { meta, vcf, tbi -> return [meta, vcf, tbi, []]}
+ .set { ch_in_vcfanno }
+
+ VCFANNO_MT(ch_in_vcfanno, ch_vcfanno_toml, [], ch_vcfanno_resources)
+
+ // HMTNOTE ANNOTATE
+ HMTNOTE_ANNOTATE(VCFANNO_MT.out.vcf)
+ HMTNOTE_ANNOTATE.out.vcf.map{meta, vcf ->
+ return [meta, WorkflowRaredisease.replaceSpacesInInfoColumn(vcf, vcf.parent.toString(), vcf.baseName)]
+ }
+ .set { ch_hmtnote_reformatted }
+ ZIP_TABIX_HMTNOTE(ch_hmtnote_reformatted)
+
+ // Prepare output
+ ch_vcf_out = ZIP_TABIX_HMTNOTE.out.gz_tbi.map{meta, vcf, tbi -> return [meta, vcf] }
+ ch_tbi_out = ZIP_TABIX_HMTNOTE.out.gz_tbi.map{meta, vcf, tbi -> return [meta, tbi] }
+
+ // Running haplogrep2
+ HAPLOGREP2_CLASSIFY_MT(ch_vep_in, "vcf.gz")
+
+ ch_versions = ch_versions.mix(GATK4_MERGEVCFS_LIFT_UNLIFT_MT.out.versions.first())
+ ch_versions = ch_versions.mix(GATK4_VARIANTFILTRATION_MT.out.versions.first())
+ ch_versions = ch_versions.mix(SPLIT_MULTIALLELICS_MT.out.versions.first())
+ ch_versions = ch_versions.mix(REMOVE_DUPLICATES_MT.out.versions.first())
+ ch_versions = ch_versions.mix(BCFTOOLS_MERGE_MT.out.versions)
+ ch_versions = ch_versions.mix(ANNOTATE_CADD.out.versions)
+ ch_versions = ch_versions.mix(ENSEMBLVEP_MT.out.versions)
+ ch_versions = ch_versions.mix(VCFANNO_MT.out.versions)
+ ch_versions = ch_versions.mix(HMTNOTE_ANNOTATE.out.versions)
+ ch_versions = ch_versions.mix(HAPLOGREP2_CLASSIFY_MT.out.versions)
+
+ emit:
+ haplog = HAPLOGREP2_CLASSIFY_MT.out.txt // channel: [ val(meta), path(txt) ]
+ vcf = ch_vcf_out // channel: [ val(meta), path(vcf) ]
+ tbi = ch_tbi_out // channel: [ val(meta), path(tbi) ]
+ report = ENSEMBLVEP_MT.out.report // channel: [ path(html) ]
+ versions = ch_versions // channel: [ path(versions.yml) ]
+}
diff --git a/subworkflows/local/call_snv.nf b/subworkflows/local/call_snv.nf
index 75be4080..eb1d2ebf 100644
--- a/subworkflows/local/call_snv.nf
+++ b/subworkflows/local/call_snv.nf
@@ -6,7 +6,7 @@ include { CALL_SNV_DEEPVARIANT } from './variant_calling/call_snv_de
include { CALL_SNV_SENTIEON } from './variant_calling/call_snv_sentieon'
include { CALL_SNV_MT } from './variant_calling/call_snv_MT'
include { CALL_SNV_MT as CALL_SNV_MT_SHIFT } from './variant_calling/call_snv_MT'
-include { PICARD_LIFTOVERVCF } from '../../modules/nf-core/picard/liftovervcf/main'
+include { POSTPROCESS_MT_CALLS } from './variant_calling/postprocess_MT_calls'
workflow CALL_SNV {
take:
@@ -67,30 +67,28 @@ workflow CALL_SNV {
ch_mtshift_intervals
)
- // LIFTOVER VCF FROM REFERENCE MT TO SHIFTED MT
- PICARD_LIFTOVERVCF (
- CALL_SNV_MT_SHIFT.out.vcf,
- ch_genome_dictionary,
+ POSTPROCESS_MT_CALLS(
+ CALL_SNV_MT.out.vcf,
+ PICARD_LIFTOVERVCF.out.vcf_lifted,
ch_genome_fasta,
- ch_mtshift_backchain,
+ ch_genome_dictionary,
+ ch_genome_fai,
+ ch_case_info
)
ch_genome_vcf = Channel.empty().mix(CALL_SNV_DEEPVARIANT.out.vcf, CALL_SNV_SENTIEON.out.vcf)
ch_genome_tabix = Channel.empty().mix(CALL_SNV_DEEPVARIANT.out.tabix, CALL_SNV_SENTIEON.out.tabix)
-
- ch_mt_vcf = CALL_SNV_MT.out.vcf
- ch_mtshift_vcf = PICARD_LIFTOVERVCF.out.vcf_lifted
+ ch_mt_vcf = POSTPROCESS_MT_CALLS.out.vcf
ch_versions = ch_versions.mix(CALL_SNV_DEEPVARIANT.out.versions)
ch_versions = ch_versions.mix(CALL_SNV_SENTIEON.out.versions)
ch_versions = ch_versions.mix(CALL_SNV_MT.out.versions)
ch_versions = ch_versions.mix(CALL_SNV_MT_SHIFT.out.versions)
- ch_versions = ch_versions.mix(PICARD_LIFTOVERVCF.out.versions.first())
+ ch_versions = ch_versions.mix(POSTPROCESS_MT_CALLS.out.versions)
emit:
genome_vcf = ch_genome_vcf // channel: [ val(meta), path(vcf) ]
genome_tabix = ch_genome_tabix // channel: [ val(meta), path(tbi) ]
mt_vcf = ch_mt_vcf // channel: [ val(meta), path(vcf) ]
- mtshift_vcf = ch_mtshift_vcf // channel: [ val(meta), path(vcf) ]
versions = ch_versions // channel: [ path(versions.yml) ]
}
diff --git a/subworkflows/local/variant_calling/postprocess_MT_calls.nf b/subworkflows/local/variant_calling/postprocess_MT_calls.nf
new file mode 100644
index 00000000..6c9b7628
--- /dev/null
+++ b/subworkflows/local/variant_calling/postprocess_MT_calls.nf
@@ -0,0 +1,112 @@
+//
+// Merge and annotate MT
+//
+
+include { GATK4_MERGEVCFS as GATK4_MERGEVCFS_LIFT_UNLIFT_MT } from '../../../modules/nf-core/gatk4/mergevcfs/main'
+include { GATK4_VARIANTFILTRATION as GATK4_VARIANTFILTRATION_MT } from '../../../modules/nf-core/gatk4/variantfiltration/main'
+include { BCFTOOLS_NORM as SPLIT_MULTIALLELICS_MT } from '../../../modules/nf-core/bcftools/norm/main'
+include { TABIX_TABIX as TABIX_TABIX_MT } from '../../../modules/nf-core/tabix/tabix/main'
+include { BCFTOOLS_NORM as REMOVE_DUPLICATES_MT } from '../../../modules/nf-core/bcftools/norm/main'
+include { TABIX_TABIX as TABIX_TABIX_MT2 } from '../../../modules/nf-core/tabix/tabix/main'
+include { BCFTOOLS_MERGE as BCFTOOLS_MERGE_MT } from '../../../modules/nf-core/bcftools/merge/main'
+include { TABIX_TABIX as TABIX_TABIX_MERGE } from '../../../modules/nf-core/tabix/tabix/main'
+include { PICARD_LIFTOVERVCF } from '../../modules/nf-core/picard/liftovervcf/main'
+
+workflow POSTPROCESS_MT_CALLS {
+ take:
+ ch_mt_vcf // channel: [mandatory] [ val(meta), path(vcf) ]
+ ch_mtshift_vcf // channel: [mandatory] [ val(meta), path(vcf) ]
+ ch_genome_fasta // channel: [mandatory] [ val(meta), path(fasta) ]
+ ch_genome_dict // channel: [mandatory] [ val(meta), path(dict) ]
+ ch_genome_fai // channel: [mandatory] [ val(meta), path(fai) ]
+ ch_case_info // channel: [mandatory] [ val(case_info) ]
+
+ main:
+ ch_versions = Channel.empty()
+
+ // LIFTOVER VCF FROM REFERENCE MT TO SHIFTED MT
+ PICARD_LIFTOVERVCF (
+ ch_mtshift_vcf,
+ ch_genome_dictionary,
+ ch_genome_fasta,
+ ch_mtshift_backchain,
+ )
+
+ ch_vcfs = ch_vcf1
+ .join(ch_vcf2, remainder: true)
+ .map{ meta, vcf1, vcf2 ->
+ [meta, [vcf1, vcf2]]
+ }
+ GATK4_MERGEVCFS_LIFT_UNLIFT_MT( ch_vcfs, ch_genome_dict)
+
+ // Filtering Variants
+ GATK4_MERGEVCFS_LIFT_UNLIFT_MT.out.vcf
+ .join(GATK4_MERGEVCFS_LIFT_UNLIFT_MT.out.tbi, failOnMismatch:true, failOnDuplicate:true)
+ .set { ch_filt_vcf }
+ GATK4_VARIANTFILTRATION_MT (ch_filt_vcf, ch_genome_fasta, ch_genome_fai, ch_genome_dict)
+
+ // Spliting multiallelic calls
+ GATK4_VARIANTFILTRATION_MT.out.vcf
+ .join(GATK4_VARIANTFILTRATION_MT.out.tbi, failOnMismatch:true, failOnDuplicate:true)
+ .set { ch_in_split }
+ SPLIT_MULTIALLELICS_MT (ch_in_split, ch_genome_fasta)
+ TABIX_TABIX_MT(SPLIT_MULTIALLELICS_MT.out.vcf)
+
+ // Removing duplicates and merging if there is more than one sample
+ SPLIT_MULTIALLELICS_MT.out.vcf
+ .join(TABIX_TABIX_MT.out.tbi, failOnMismatch:true, failOnDuplicate:true)
+ .set { ch_in_remdup }
+ REMOVE_DUPLICATES_MT(ch_in_remdup, ch_genome_fasta)
+ TABIX_TABIX_MT2(REMOVE_DUPLICATES_MT.out.vcf)
+
+ REMOVE_DUPLICATES_MT.out.vcf
+ .collect{it[1]}
+ .ifEmpty([])
+ .toList()
+ .set { file_list_vcf }
+
+ TABIX_TABIX_MT2.out.tbi
+ .collect{it[1]}
+ .ifEmpty([])
+ .toList()
+ .set { file_list_tbi }
+
+ ch_case_info
+ .combine(file_list_vcf)
+ .combine(file_list_tbi)
+ .set { ch_rem_dup_vcf_tbi }
+
+ ch_rem_dup_vcf_tbi.branch {
+ meta, vcf, tbi ->
+ single: vcf.size() == 1
+ return [meta, vcf]
+ multiple: vcf.size() > 1
+ return [meta, vcf, tbi]
+ }.set { ch_case_vcf }
+
+ BCFTOOLS_MERGE_MT( ch_case_vcf.multiple,
+ ch_genome_fasta,
+ ch_genome_fai,
+ []
+ )
+
+ BCFTOOLS_MERGE_MT.out.merged_variants
+ .mix(ch_case_vcf.single)
+ .set { ch_annotation_in }
+
+ TABIX_TABIX_MERGE(ch_annotation_in)
+
+ ch_versions = ch_versions.mix(PICARD_LIFTOVERVCF.out.versions.first())
+ ch_versions = ch_versions.mix(GATK4_MERGEVCFS_LIFT_UNLIFT_MT.out.versions.first())
+ ch_versions = ch_versions.mix(GATK4_VARIANTFILTRATION_MT.out.versions.first())
+ ch_versions = ch_versions.mix(SPLIT_MULTIALLELICS_MT.out.versions.first())
+ ch_versions = ch_versions.mix(REMOVE_DUPLICATES_MT.out.versions.first())
+ ch_versions = ch_versions.mix(BCFTOOLS_MERGE_MT.out.versions)
+
+ emit:
+ haplog = HAPLOGREP2_CLASSIFY_MT.out.txt // channel: [ val(meta), path(txt) ]
+ vcf = ch_vcf_out // channel: [ val(meta), path(vcf) ]
+ tbi = ch_tbi_out // channel: [ val(meta), path(tbi) ]
+ report = ENSEMBLVEP_MT.out.report // channel: [ path(html) ]
+ versions = ch_versions // channel: [ path(versions.yml) ]
+}
From 6bafacf60986d32d03b55dd16301d1cbee2053f2 Mon Sep 17 00:00:00 2001
From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com>
Date: Thu, 24 Aug 2023 17:42:41 +0200
Subject: [PATCH 07/22] postprocess
---
...lls.config => postprocess_MT_calls.config} | 0
subworkflows/local/call_snv.nf | 3 ++-
.../variant_calling/postprocess_MT_calls.nf | 19 +++++++++----------
workflows/raredisease.nf | 8 ++++----
4 files changed, 15 insertions(+), 15 deletions(-)
rename conf/modules/{postptocess_MT_calls.config => postprocess_MT_calls.config} (100%)
diff --git a/conf/modules/postptocess_MT_calls.config b/conf/modules/postprocess_MT_calls.config
similarity index 100%
rename from conf/modules/postptocess_MT_calls.config
rename to conf/modules/postprocess_MT_calls.config
diff --git a/subworkflows/local/call_snv.nf b/subworkflows/local/call_snv.nf
index eb1d2ebf..e11b598f 100644
--- a/subworkflows/local/call_snv.nf
+++ b/subworkflows/local/call_snv.nf
@@ -69,10 +69,11 @@ workflow CALL_SNV {
POSTPROCESS_MT_CALLS(
CALL_SNV_MT.out.vcf,
- PICARD_LIFTOVERVCF.out.vcf_lifted,
+ CALL_SNV_MT_SHIFT.out.vcf,
ch_genome_fasta,
ch_genome_dictionary,
ch_genome_fai,
+ ch_mtshift_backchain,
ch_case_info
)
diff --git a/subworkflows/local/variant_calling/postprocess_MT_calls.nf b/subworkflows/local/variant_calling/postprocess_MT_calls.nf
index 6c9b7628..81738085 100644
--- a/subworkflows/local/variant_calling/postprocess_MT_calls.nf
+++ b/subworkflows/local/variant_calling/postprocess_MT_calls.nf
@@ -10,15 +10,16 @@ include { BCFTOOLS_NORM as REMOVE_DUPLICATES_MT } from '../../..
include { TABIX_TABIX as TABIX_TABIX_MT2 } from '../../../modules/nf-core/tabix/tabix/main'
include { BCFTOOLS_MERGE as BCFTOOLS_MERGE_MT } from '../../../modules/nf-core/bcftools/merge/main'
include { TABIX_TABIX as TABIX_TABIX_MERGE } from '../../../modules/nf-core/tabix/tabix/main'
-include { PICARD_LIFTOVERVCF } from '../../modules/nf-core/picard/liftovervcf/main'
+include { PICARD_LIFTOVERVCF } from '../../../modules/nf-core/picard/liftovervcf/main'
workflow POSTPROCESS_MT_CALLS {
take:
ch_mt_vcf // channel: [mandatory] [ val(meta), path(vcf) ]
ch_mtshift_vcf // channel: [mandatory] [ val(meta), path(vcf) ]
ch_genome_fasta // channel: [mandatory] [ val(meta), path(fasta) ]
- ch_genome_dict // channel: [mandatory] [ val(meta), path(dict) ]
+ ch_genome_dictionary // channel: [mandatory] [ val(meta), path(dict) ]
ch_genome_fai // channel: [mandatory] [ val(meta), path(fai) ]
+ ch_mtshift_backchain // channel: [mandatory] [ val(meta), path(backchain) ]
ch_case_info // channel: [mandatory] [ val(case_info) ]
main:
@@ -32,18 +33,18 @@ workflow POSTPROCESS_MT_CALLS {
ch_mtshift_backchain,
)
- ch_vcfs = ch_vcf1
- .join(ch_vcf2, remainder: true)
+ ch_vcfs = ch_mt_vcf
+ .join(PICARD_LIFTOVERVCF.out.vcf_lifted, remainder: true)
.map{ meta, vcf1, vcf2 ->
[meta, [vcf1, vcf2]]
}
- GATK4_MERGEVCFS_LIFT_UNLIFT_MT( ch_vcfs, ch_genome_dict)
+ GATK4_MERGEVCFS_LIFT_UNLIFT_MT( ch_vcfs, ch_genome_dictionary)
// Filtering Variants
GATK4_MERGEVCFS_LIFT_UNLIFT_MT.out.vcf
.join(GATK4_MERGEVCFS_LIFT_UNLIFT_MT.out.tbi, failOnMismatch:true, failOnDuplicate:true)
.set { ch_filt_vcf }
- GATK4_VARIANTFILTRATION_MT (ch_filt_vcf, ch_genome_fasta, ch_genome_fai, ch_genome_dict)
+ GATK4_VARIANTFILTRATION_MT (ch_filt_vcf, ch_genome_fasta, ch_genome_fai, ch_genome_dictionary)
// Spliting multiallelic calls
GATK4_VARIANTFILTRATION_MT.out.vcf
@@ -104,9 +105,7 @@ workflow POSTPROCESS_MT_CALLS {
ch_versions = ch_versions.mix(BCFTOOLS_MERGE_MT.out.versions)
emit:
- haplog = HAPLOGREP2_CLASSIFY_MT.out.txt // channel: [ val(meta), path(txt) ]
- vcf = ch_vcf_out // channel: [ val(meta), path(vcf) ]
- tbi = ch_tbi_out // channel: [ val(meta), path(tbi) ]
- report = ENSEMBLVEP_MT.out.report // channel: [ path(html) ]
+ vcf = ch_annotation_in // channel: [ val(meta), path(vcf) ]
+ tbi = TABIX_TABIX_MERGE.out.tbi // channel: [ val(meta), path(tbi) ]
versions = ch_versions // channel: [ path(versions.yml) ]
}
diff --git a/workflows/raredisease.nf b/workflows/raredisease.nf
index e88686e5..70c1422a 100644
--- a/workflows/raredisease.nf
+++ b/workflows/raredisease.nf
@@ -110,7 +110,7 @@ include { ALIGN } from '../subworkflows/local/al
include { ANALYSE_MT } from '../subworkflows/local/analyse_MT'
include { ANNOTATE_CSQ_PLI as ANN_CSQ_PLI_SNV } from '../subworkflows/local/annotate_consequence_pli'
include { ANNOTATE_CSQ_PLI as ANN_CSQ_PLI_SV } from '../subworkflows/local/annotate_consequence_pli'
-include { ANNOTATE_SNVS } from '../subworkflows/local/annotate_snvs'
+include { ANNOTATE_GENOME_SNVS } from '../subworkflows/local/annotate_genome_snvs'
include { ANNOTATE_STRUCTURAL_VARIANTS } from '../subworkflows/local/annotate_structural_variants'
include { CALL_REPEAT_EXPANSIONS } from '../subworkflows/local/call_repeat_expansions'
include { CALL_SNV } from '../subworkflows/local/call_snv'
@@ -502,7 +502,7 @@ workflow RAREDISEASE {
ch_versions = ch_versions.mix(GATK4_SELECTVARIANTS.out.versions)
}
- ANNOTATE_SNVS (
+ ANNOTATE_GENOME_SNVS (
ch_vcf,
params.analysis_type,
ch_cadd_header,
@@ -519,11 +519,11 @@ workflow RAREDISEASE {
).set {ch_snv_annotate}
ch_versions = ch_versions.mix(ch_snv_annotate.versions)
- ch_snv_annotate = ANNOTATE_SNVS.out.vcf_ann
+ ch_snv_annotate = ANNOTATE_GENOME_SNVS.out.vcf_ann
if (!params.skip_mt_analysis) {
- ANNOTATE_SNVS.out.vcf_ann
+ ANNOTATE_GENOME_SNVS.out.vcf_ann
.concat(ANALYSE_MT.out.vcf)
.groupTuple()
.set { ch_merged_vcf }
From 7c5909d63428aafe85e93d1bbf5289fd386bcae1 Mon Sep 17 00:00:00 2001
From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com>
Date: Thu, 24 Aug 2023 20:43:31 +0200
Subject: [PATCH 08/22] move annotate
---
conf/modules/annotate_mt_snvs.config | 66 ++++++
conf/modules/call_sv_MT.config | 14 --
subworkflows/local/annotate_mt_snvs.nf | 50 ++--
subworkflows/local/call_snv.nf | 29 ++-
.../local/call_structural_variants.nf | 2 +-
workflows/raredisease.nf | 216 +++++++++---------
6 files changed, 221 insertions(+), 156 deletions(-)
create mode 100644 conf/modules/annotate_mt_snvs.config
diff --git a/conf/modules/annotate_mt_snvs.config b/conf/modules/annotate_mt_snvs.config
new file mode 100644
index 00000000..68e01af1
--- /dev/null
+++ b/conf/modules/annotate_mt_snvs.config
@@ -0,0 +1,66 @@
+/*
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ Config file for defining DSL2 per module options and publishing paths
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ Available keys to override module options:
+ ext.args = Additional arguments appended to command in module.
+ ext.args2 = Second set of arguments appended to command in module (multi-tool modules).
+ ext.args3 = Third set of arguments appended to command in module (multi-tool modules).
+ ext.prefix = File name prefix for output files.
+ ext.when = Conditional clause
+----------------------------------------------------------------------------------------
+*/
+
+//
+// Mitochondrial annotation options
+//
+
+process {
+
+
+ withName: '.*ANNOTATE_MT_SNVS:ENSEMBLVEP_MT' {
+ ext.args = [
+ '--dir_plugins vep_cache/Plugins',
+ '--plugin LoFtool,vep_cache/LoFtool_scores.txt',
+ '--plugin pLI,vep_cache/pLI_values_107.txt',
+ '--plugin SpliceAI,snv=vep_cache/spliceai_21_scores_raw_snv_-v1.3-.vcf.gz,indel=vep_cache/spliceai_21_scores_raw_snv_-v1.3-.vcf.gz',
+ '--plugin MaxEntScan,vep_cache/fordownload,SWA,NCSS',
+ '--distance 0',
+ '--buffer_size 20000',
+ '--format vcf --fork 4 --max_sv_size 248956422',
+ '--appris --biotype --cache --canonical --ccds --compress_output bgzip',
+ '--domains --exclude_predicted --force_overwrite',
+ '--hgvs --humdiv --no_progress --no_stats --numbers',
+ '--merged --polyphen p --protein --offline --regulatory --sift p --symbol --tsl --vcf',
+ '--uniprot'
+ ].join(' ')
+ }
+
+ withName: '.*ANNOTATE_MT_SNVS:ZIP_TABIX_HMTNOTE' {
+ ext.prefix = { "${meta.id}_vep_vcfanno_hmtnote_mt" }
+ publishDir = [
+ path: { "${params.outdir}/annotate_mt" },
+ mode: params.publish_dir_mode,
+ pattern: "*{vcf.gz,vcf.gz.tbi}",
+ saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
+ ]
+ }
+
+ withName: '.*ANNOTATE_MT_SNVS:HMTNOTE_ANNOTATE' {
+ ext.prefix = { "${meta.id}_vep_vcfanno_hmtnote_mt" }
+ ext.args = '--offline'
+ publishDir = [
+ enabled: false
+ ]
+ }
+
+ withName: '.*ANNOTATE_MT_SNVS:HAPLOGREP2_CLASSIFY_MT' {
+ ext.prefix = { "${meta.id}_haplogrep" }
+ publishDir = [
+ path: { "${params.outdir}/annotate_mt" },
+ mode: params.publish_dir_mode,
+ saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
+ ]
+ }
+
+}
diff --git a/conf/modules/call_sv_MT.config b/conf/modules/call_sv_MT.config
index 6b769e5c..288ca425 100644
--- a/conf/modules/call_sv_MT.config
+++ b/conf/modules/call_sv_MT.config
@@ -35,17 +35,3 @@ process {
}
}
-//
-// Call SV in shifted mitochondria
-//
-
-process {
-
- withName: '.*CALL_SV_MT_SHIFT:MT_DELETION' {
- ext.when = false
- }
-
- withName: '.*CALL_SV_MT_SHIFT:EKLIPSE_MT' {
- ext.when = false
- }
-}
diff --git a/subworkflows/local/annotate_mt_snvs.nf b/subworkflows/local/annotate_mt_snvs.nf
index 400a58c1..d6ce5659 100644
--- a/subworkflows/local/annotate_mt_snvs.nf
+++ b/subworkflows/local/annotate_mt_snvs.nf
@@ -2,51 +2,48 @@
// Merge and annotate MT
//
-include { GATK4_MERGEVCFS as GATK4_MERGEVCFS_LIFT_UNLIFT_MT } from '../../../modules/nf-core/gatk4/mergevcfs/main'
-include { GATK4_VARIANTFILTRATION as GATK4_VARIANTFILTRATION_MT } from '../../../modules/nf-core/gatk4/variantfiltration/main'
-include { BCFTOOLS_NORM as SPLIT_MULTIALLELICS_MT } from '../../../modules/nf-core/bcftools/norm/main'
-include { TABIX_TABIX as TABIX_TABIX_MT } from '../../../modules/nf-core/tabix/tabix/main'
-include { BCFTOOLS_NORM as REMOVE_DUPLICATES_MT } from '../../../modules/nf-core/bcftools/norm/main'
-include { TABIX_TABIX as TABIX_TABIX_MT2 } from '../../../modules/nf-core/tabix/tabix/main'
-include { BCFTOOLS_MERGE as BCFTOOLS_MERGE_MT } from '../../../modules/nf-core/bcftools/merge/main'
-include { TABIX_TABIX as TABIX_TABIX_MERGE } from '../../../modules/nf-core/tabix/tabix/main'
-include { TABIX_TABIX as TABIX_TABIX_MT3 } from '../../../modules/nf-core/tabix/tabix/main'
-include { ENSEMBLVEP as ENSEMBLVEP_MT } from '../../../modules/local/ensemblvep/main'
-include { HAPLOGREP2_CLASSIFY as HAPLOGREP2_CLASSIFY_MT } from '../../../modules/nf-core/haplogrep2/classify/main'
-include { VCFANNO as VCFANNO_MT } from '../../../modules/nf-core/vcfanno/main'
-include { ANNOTATE_CADD } from '../annotation/annotate_cadd'
-include { TABIX_BGZIPTABIX as ZIP_TABIX_HMTNOTE } from '../../../modules/nf-core/tabix/bgziptabix/main'
-include { HMTNOTE_ANNOTATE } from '../../../modules/nf-core/hmtnote/annotate/main'
+include { GATK4_MERGEVCFS as GATK4_MERGEVCFS_LIFT_UNLIFT_MT } from '../../modules/nf-core/gatk4/mergevcfs/main'
+include { GATK4_VARIANTFILTRATION as GATK4_VARIANTFILTRATION_MT } from '../../modules/nf-core/gatk4/variantfiltration/main'
+include { BCFTOOLS_NORM as SPLIT_MULTIALLELICS_MT } from '../../modules/nf-core/bcftools/norm/main'
+include { TABIX_TABIX as TABIX_TABIX_MT } from '../../modules/nf-core/tabix/tabix/main'
+include { BCFTOOLS_NORM as REMOVE_DUPLICATES_MT } from '../../modules/nf-core/bcftools/norm/main'
+include { TABIX_TABIX as TABIX_TABIX_MT2 } from '../../modules/nf-core/tabix/tabix/main'
+include { BCFTOOLS_MERGE as BCFTOOLS_MERGE_MT } from '../../modules/nf-core/bcftools/merge/main'
+include { TABIX_TABIX as TABIX_TABIX_MERGE } from '../../modules/nf-core/tabix/tabix/main'
+include { TABIX_TABIX as TABIX_TABIX_MT3 } from '../../modules/nf-core/tabix/tabix/main'
+include { ENSEMBLVEP as ENSEMBLVEP_MT } from '../../modules/local/ensemblvep/main'
+include { HAPLOGREP2_CLASSIFY as HAPLOGREP2_CLASSIFY_MT } from '../../modules/nf-core/haplogrep2/classify/main'
+include { VCFANNO as VCFANNO_MT } from '../../modules/nf-core/vcfanno/main'
+include { ANNOTATE_CADD } from './annotation/annotate_cadd'
+include { TABIX_BGZIPTABIX as ZIP_TABIX_HMTNOTE } from '../../modules/nf-core/tabix/bgziptabix/main'
+include { HMTNOTE_ANNOTATE } from '../../modules/nf-core/hmtnote/annotate/main'
workflow ANNOTATE_MT_SNVS {
take:
- ch_vcf1 // channel: [mandatory] [ val(meta), path(vcf) ]
- ch_vcf2 // channel: [mandatory] [ val(meta), path(vcf) ]
+ ch_mt_vcf // channel: [mandatory] [ val(meta), path(vcf) ]
+ ch_mt_tbi // channel: [mandatory] [ val(meta), path(tbi) ]
ch_cadd_header // channel: [mandatory] [ path(txt) ]
ch_cadd_resources // channel: [mandatory] [ path(annotation) ]
ch_genome_fasta // channel: [mandatory] [ val(meta), path(fasta) ]
- ch_genome_dict // channel: [mandatory] [ val(meta), path(dict) ]
- ch_genome_fai // channel: [mandatory] [ val(meta), path(fai) ]
ch_vcfanno_resources // channel: [mandatory] [ path(resources) ]
ch_vcfanno_toml // channel: [mandatory] [ path(toml) ]
val_vep_genome // string: [mandatory] GRCh37 or GRCh38
val_vep_cache_version // string: [mandatory] 107
ch_vep_cache // channel: [mandatory] [ path(cache) ]
- ch_case_info // channel: [mandatory] [ val(case_info) ]
main:
ch_versions = Channel.empty()
// Annotating with CADD
ANNOTATE_CADD (
- ch_annotation_in,
- TABIX_TABIX_MERGE.out.tbi,
+ ch_mt_vcf,
+ ch_mt_tbi,
ch_cadd_header,
ch_cadd_resources
)
// Pick input for vep
- ch_annotation_in
+ ch_mt_vcf
.combine(ANNOTATE_CADD.out.vcf.ifEmpty("null"))
.branch { it ->
merged: it[2].equals("null")
@@ -92,11 +89,6 @@ workflow ANNOTATE_MT_SNVS {
// Running haplogrep2
HAPLOGREP2_CLASSIFY_MT(ch_vep_in, "vcf.gz")
- ch_versions = ch_versions.mix(GATK4_MERGEVCFS_LIFT_UNLIFT_MT.out.versions.first())
- ch_versions = ch_versions.mix(GATK4_VARIANTFILTRATION_MT.out.versions.first())
- ch_versions = ch_versions.mix(SPLIT_MULTIALLELICS_MT.out.versions.first())
- ch_versions = ch_versions.mix(REMOVE_DUPLICATES_MT.out.versions.first())
- ch_versions = ch_versions.mix(BCFTOOLS_MERGE_MT.out.versions)
ch_versions = ch_versions.mix(ANNOTATE_CADD.out.versions)
ch_versions = ch_versions.mix(ENSEMBLVEP_MT.out.versions)
ch_versions = ch_versions.mix(VCFANNO_MT.out.versions)
@@ -105,7 +97,7 @@ workflow ANNOTATE_MT_SNVS {
emit:
haplog = HAPLOGREP2_CLASSIFY_MT.out.txt // channel: [ val(meta), path(txt) ]
- vcf = ch_vcf_out // channel: [ val(meta), path(vcf) ]
+ vcf_ann = ch_vcf_out // channel: [ val(meta), path(vcf) ]
tbi = ch_tbi_out // channel: [ val(meta), path(tbi) ]
report = ENSEMBLVEP_MT.out.report // channel: [ path(html) ]
versions = ch_versions // channel: [ path(versions.yml) ]
diff --git a/subworkflows/local/call_snv.nf b/subworkflows/local/call_snv.nf
index e11b598f..d8cb9744 100644
--- a/subworkflows/local/call_snv.nf
+++ b/subworkflows/local/call_snv.nf
@@ -7,6 +7,7 @@ include { CALL_SNV_SENTIEON } from './variant_calling/call_snv_se
include { CALL_SNV_MT } from './variant_calling/call_snv_MT'
include { CALL_SNV_MT as CALL_SNV_MT_SHIFT } from './variant_calling/call_snv_MT'
include { POSTPROCESS_MT_CALLS } from './variant_calling/postprocess_MT_calls'
+include { GATK4_SELECTVARIANTS } from '../../modules/nf-core/gatk4/selectvariants/main'
workflow CALL_SNV {
take:
@@ -51,6 +52,19 @@ workflow CALL_SNV {
ch_case_info
)
+ ch_vcf = Channel.empty().mix(CALL_SNV_DEEPVARIANT.out.vcf, CALL_SNV_SENTIEON.out.vcf)
+ ch_tabix = Channel.empty().mix(CALL_SNV_DEEPVARIANT.out.tabix, CALL_SNV_SENTIEON.out.tabix)
+
+ ch_vcf
+ .join(ch_tabix, failOnMismatch:true, failOnDuplicate:true)
+ .map { meta, vcf, tbi -> return [meta, vcf, tbi, []]}
+ .set {ch_selvar_in}
+ GATK4_SELECTVARIANTS(ch_selvar_in) // remove mitochondrial variants
+
+ ch_genome_vcf = GATK4_SELECTVARIANTS.out.vcf
+ ch_genome_tabix = GATK4_SELECTVARIANTS.out.tbi
+ ch_genome_vcf_tabix = ch_genome_vcf.join(ch_genome_tabix, failOnMismatch:true, failOnDuplicate:true)
+
CALL_SNV_MT(
ch_mt_bam_bai,
ch_genome_fasta,
@@ -77,19 +91,18 @@ workflow CALL_SNV {
ch_case_info
)
- ch_genome_vcf = Channel.empty().mix(CALL_SNV_DEEPVARIANT.out.vcf, CALL_SNV_SENTIEON.out.vcf)
- ch_genome_tabix = Channel.empty().mix(CALL_SNV_DEEPVARIANT.out.tabix, CALL_SNV_SENTIEON.out.tabix)
- ch_mt_vcf = POSTPROCESS_MT_CALLS.out.vcf
-
ch_versions = ch_versions.mix(CALL_SNV_DEEPVARIANT.out.versions)
ch_versions = ch_versions.mix(CALL_SNV_SENTIEON.out.versions)
ch_versions = ch_versions.mix(CALL_SNV_MT.out.versions)
ch_versions = ch_versions.mix(CALL_SNV_MT_SHIFT.out.versions)
ch_versions = ch_versions.mix(POSTPROCESS_MT_CALLS.out.versions)
+ ch_versions = ch_versions.mix(GATK4_SELECTVARIANTS.out.versions)
emit:
- genome_vcf = ch_genome_vcf // channel: [ val(meta), path(vcf) ]
- genome_tabix = ch_genome_tabix // channel: [ val(meta), path(tbi) ]
- mt_vcf = ch_mt_vcf // channel: [ val(meta), path(vcf) ]
- versions = ch_versions // channel: [ path(versions.yml) ]
+ genome_vcf = ch_genome_vcf // channel: [ val(meta), path(vcf) ]
+ genome_tabix = ch_genome_tabix // channel: [ val(meta), path(tbi) ]
+ genome_vcf_tabix = ch_genome_vcf_tabix // channel: [ val(meta), path(vcf), path(tbi) ]
+ mt_vcf = POSTPROCESS_MT_CALLS.out.vcf // channel: [ val(meta), path(vcf) ]
+ mt_tabix = POSTPROCESS_MT_CALLS.out.tbi // channel: [ val(meta), path(vcf) ]
+ versions = ch_versions // channel: [ path(versions.yml) ]
}
diff --git a/subworkflows/local/call_structural_variants.nf b/subworkflows/local/call_structural_variants.nf
index 9e749add..d614ef7c 100644
--- a/subworkflows/local/call_structural_variants.nf
+++ b/subworkflows/local/call_structural_variants.nf
@@ -50,7 +50,7 @@ workflow CALL_STRUCTURAL_VARIANTS {
CALL_SV_MT (ch_mt_bam_bai, ch_genome_fasta)
- CALL_SV_MT_SHIFT (ch_mtshift_bam_bai, ch_mtshift_fasta)
+// CALL_SV_MT_SHIFT (ch_mtshift_bam_bai, ch_mtshift_fasta)
//merge
if (params.skip_cnv_calling) {
diff --git a/workflows/raredisease.nf b/workflows/raredisease.nf
index 70c1422a..2a5fb036 100644
--- a/workflows/raredisease.nf
+++ b/workflows/raredisease.nf
@@ -94,11 +94,12 @@ ch_multiqc_custom_methods_description = params.multiqc_methods_description ? fil
include { GATK4_MERGEVCFS } from '../modules/nf-core/gatk4/mergevcfs/main'
include { CUSTOM_DUMPSOFTWAREVERSIONS } from '../modules/nf-core/custom/dumpsoftwareversions/main'
include { FASTQC } from '../modules/nf-core/fastqc/main'
-include { GATK4_SELECTVARIANTS } from '../modules/nf-core/gatk4/selectvariants/main'
include { MULTIQC } from '../modules/nf-core/multiqc/main'
include { SMNCOPYNUMBERCALLER } from '../modules/nf-core/smncopynumbercaller/main'
+include { ENSEMBLVEP_FILTERVEP as FILTERVEP_MT } from '../modules/nf-core/ensemblvep/filtervep'
include { ENSEMBLVEP_FILTERVEP as FILTERVEP_SNV } from '../modules/nf-core/ensemblvep/filtervep'
include { ENSEMBLVEP_FILTERVEP as FILTERVEP_SV } from '../modules/nf-core/ensemblvep/filtervep'
+include { TABIX_BGZIPTABIX as BGZIPTABIX_MT } from '../modules/nf-core/tabix/bgziptabix'
include { TABIX_BGZIPTABIX as BGZIPTABIX_SNV } from '../modules/nf-core/tabix/bgziptabix'
include { TABIX_BGZIPTABIX as BGZIPTABIX_SV } from '../modules/nf-core/tabix/bgziptabix'
@@ -108,9 +109,11 @@ include { TABIX_BGZIPTABIX as BGZIPTABIX_SV } from '../modules/nf-core/tabix
include { ALIGN } from '../subworkflows/local/align'
include { ANALYSE_MT } from '../subworkflows/local/analyse_MT'
+include { ANNOTATE_CSQ_PLI as ANN_CSQ_PLI_MT } from '../subworkflows/local/annotate_consequence_pli'
include { ANNOTATE_CSQ_PLI as ANN_CSQ_PLI_SNV } from '../subworkflows/local/annotate_consequence_pli'
include { ANNOTATE_CSQ_PLI as ANN_CSQ_PLI_SV } from '../subworkflows/local/annotate_consequence_pli'
include { ANNOTATE_GENOME_SNVS } from '../subworkflows/local/annotate_genome_snvs'
+include { ANNOTATE_MT_SNVS } from '../subworkflows/local/annotate_mt_snvs'
include { ANNOTATE_STRUCTURAL_VARIANTS } from '../subworkflows/local/annotate_structural_variants'
include { CALL_REPEAT_EXPANSIONS } from '../subworkflows/local/call_repeat_expansions'
include { CALL_SNV } from '../subworkflows/local/call_snv'
@@ -118,6 +121,7 @@ include { CALL_STRUCTURAL_VARIANTS } from '../subworkflows/local/ca
include { GENS } from '../subworkflows/local/gens'
include { PREPARE_REFERENCES } from '../subworkflows/local/prepare_references'
include { QC_BAM } from '../subworkflows/local/qc_bam'
+include { RANK_VARIANTS as RANK_VARIANTS_MT } from '../subworkflows/local/rank_variants'
include { RANK_VARIANTS as RANK_VARIANTS_SNV } from '../subworkflows/local/rank_variants'
include { RANK_VARIANTS as RANK_VARIANTS_SV } from '../subworkflows/local/rank_variants'
include { SCATTER_GENOME } from '../subworkflows/local/scatter_genome'
@@ -289,7 +293,9 @@ workflow RAREDISEASE {
ch_scatter_split_intervals = ch_scatter.split_intervals ?: Channel.empty()
+ //
// ALIGNING READS, FETCH STATS, AND MERGE.
+ //
ALIGN (
ch_reads,
ch_genome_fasta,
@@ -307,7 +313,9 @@ workflow RAREDISEASE {
.set { ch_mapped }
ch_versions = ch_versions.mix(ALIGN.out.versions)
+ //
// BAM QUALITY CHECK
+ //
QC_BAM (
ch_mapped.genome_marked_bam,
ch_mapped.genome_marked_bai,
@@ -322,7 +330,9 @@ workflow RAREDISEASE {
)
ch_versions = ch_versions.mix(QC_BAM.out.versions)
+ //
// EXPANSIONHUNTER AND STRANGER
+ //
CALL_REPEAT_EXPANSIONS (
ch_mapped.genome_bam_bai,
ch_variant_catalog,
@@ -332,28 +342,9 @@ workflow RAREDISEASE {
)
ch_versions = ch_versions.mix(CALL_REPEAT_EXPANSIONS.out.versions)
- // STEP 1.7: SMNCOPYNUMBERCALLER
- ch_mapped.genome_bam_bai
- .collect{it[1]}
- .toList()
- .set { ch_bam_list }
-
- ch_mapped.genome_bam_bai
- .collect{it[2]}
- .toList()
- .set { ch_bai_list }
-
- ch_case_info
- .combine(ch_bam_list)
- .combine(ch_bai_list)
- .set { ch_bams_bais }
-
- SMNCOPYNUMBERCALLER (
- ch_bams_bais
- )
- ch_versions = ch_versions.mix(SMNCOPYNUMBERCALLER.out.versions)
-
- // STEP 2: VARIANT CALLING
+ //
+ // SNV CALLING
+ //
CALL_SNV (
ch_mapped.genome_bam_bai,
ch_mapped.mt_bam_bai,
@@ -375,6 +366,9 @@ workflow RAREDISEASE {
)
ch_versions = ch_versions.mix(CALL_SNV.out.versions)
+ //
+ // SV CALLING
+ //
CALL_STRUCTURAL_VARIANTS (
ch_mapped.genome_marked_bam,
ch_mapped.genome_marked_bai,
@@ -395,29 +389,9 @@ workflow RAREDISEASE {
)
ch_versions = ch_versions.mix(CALL_STRUCTURAL_VARIANTS.out.versions)
- // ped correspondence, sex check, ancestry check
- PEDDY_CHECK (
- CALL_SNV.out.genome_vcf.join(CALL_SNV.out.genome_tabix, failOnMismatch:true, failOnDuplicate:true),
- ch_pedfile
- )
- ch_versions = ch_versions.mix(PEDDY_CHECK.out.versions)
-
- // GENS
- if (params.gens_switch) {
- GENS (
- ch_mapped.genome_bam_bai,
- CALL_SNV.out.vcf,
- ch_genome_fasta,
- ch_genome_fai,
- file(params.gens_interval_list),
- file(params.gens_pon),
- file(params.gens_gnomad_pos),
- ch_case_info,
- ch_genome_dictionary
- )
- ch_versions = ch_versions.mix(GENS.out.versions)
- }
-
+ //
+ // ANNOTATE STRUCTURAL VARIANTS
+ //
if (!params.skip_sv_annotation) {
ANNOTATE_STRUCTURAL_VARIANTS (
CALL_STRUCTURAL_VARIANTS.out.vcf,
@@ -455,55 +429,13 @@ workflow RAREDISEASE {
}
- if (!params.skip_mt_analysis) {
- ANALYSE_MT (
- ch_mapped.genome_bam_bai,
- ch_cadd_header,
- ch_cadd_resources,
- ch_genome_bwaindex,
- ch_genome_bwamem2index,
- ch_genome_fasta,
- ch_genome_fai,
- ch_genome_dictionary,
- ch_mt_intervals,
- ch_mtshift_bwaindex,
- ch_mtshift_bwamem2index,
- ch_mtshift_fasta,
- ch_mtshift_dictionary,
- ch_mtshift_fai,
- ch_mtshift_intervals,
- ch_mtshift_backchain,
- ch_vcfanno_resources,
- ch_vcfanno_toml,
- params.genome,
- params.vep_cache_version,
- ch_vep_cache,
- ch_case_info
- )
-
- ch_versions = ch_versions.mix(ANALYSE_MT.out.versions)
-
- }
-
- // VARIANT ANNOTATION
-
+ //
+ // ANNOTATE GENOME SNVs
+ //
if (!params.skip_snv_annotation) {
- ch_vcf = CALL_SNV.out.genome_vcf.join(CALL_SNV.out.genome_tabix, failOnMismatch:true, failOnDuplicate:true)
-
- if (!params.skip_mt_analysis) {
- ch_vcf
- .map { meta, vcf, tbi -> return [meta, vcf, tbi, []]}
- .set { ch_selvar_in }
-
- GATK4_SELECTVARIANTS(ch_selvar_in) // remove mitochondrial variants
-
- ch_vcf = GATK4_SELECTVARIANTS.out.vcf.join(GATK4_SELECTVARIANTS.out.tbi, failOnMismatch:true, failOnDuplicate:true)
- ch_versions = ch_versions.mix(GATK4_SELECTVARIANTS.out.versions)
- }
-
ANNOTATE_GENOME_SNVS (
- ch_vcf,
+ CALL_SNV.out.genome_vcf_tabix,
params.analysis_type,
ch_cadd_header,
ch_cadd_resources,
@@ -521,18 +453,6 @@ workflow RAREDISEASE {
ch_snv_annotate = ANNOTATE_GENOME_SNVS.out.vcf_ann
- if (!params.skip_mt_analysis) {
-
- ANNOTATE_GENOME_SNVS.out.vcf_ann
- .concat(ANALYSE_MT.out.vcf)
- .groupTuple()
- .set { ch_merged_vcf }
-
- GATK4_MERGEVCFS (ch_merged_vcf, ch_genome_dictionary)
- ch_snv_annotate = GATK4_MERGEVCFS.out.vcf
- ch_versions = ch_versions.mix(GATK4_MERGEVCFS.out.versions)
- }
-
ANN_CSQ_PLI_SNV (
ch_snv_annotate,
ch_variant_consequences
@@ -558,6 +478,94 @@ workflow RAREDISEASE {
}
+ //
+ // ANNOTATE MT SNVs
+ //
+ if (!params.skip_mt_annotation) {
+
+ ANNOTATE_MT_SNVS (
+ CALL_SNV.out.mt_vcf,
+ CALL_SNV.out.mt_tabix,
+ ch_cadd_header,
+ ch_cadd_resources,
+ ch_genome_fasta,
+ ch_vcfanno_resources,
+ ch_vcfanno_toml,
+ params.genome,
+ params.vep_cache_version,
+ ch_vep_cache,
+ ).set {ch_mt_annotate}
+ ch_versions = ch_versions.mix(ch_mt_annotate.versions)
+
+ ANN_CSQ_PLI_MT (
+ ch_mt_annotate.vcf_ann,
+ ch_variant_consequences
+ )
+ ch_versions = ch_versions.mix(ANN_CSQ_PLI_MT.out.versions)
+
+ RANK_VARIANTS_MT (
+ ANN_CSQ_PLI_MT.out.vcf_ann,
+ ch_pedfile,
+ ch_reduced_penetrance,
+ ch_score_config_snv
+ )
+ ch_versions = ch_versions.mix(RANK_VARIANTS_MT.out.versions)
+
+ FILTERVEP_MT(
+ RANK_VARIANTS_MT.out.vcf,
+ ch_vep_filters
+ )
+ ch_versions = ch_versions.mix(FILTERVEP_MT.out.versions)
+
+ BGZIPTABIX_MT(FILTERVEP_MT.out.output)
+ ch_versions = ch_versions.mix(BGZIPTABIX_MT.out.versions)
+
+ }
+
+ // STEP 1.7: SMNCOPYNUMBERCALLER
+ ch_mapped.genome_bam_bai
+ .collect{it[1]}
+ .toList()
+ .set { ch_bam_list }
+
+ ch_mapped.genome_bam_bai
+ .collect{it[2]}
+ .toList()
+ .set { ch_bai_list }
+
+ ch_case_info
+ .combine(ch_bam_list)
+ .combine(ch_bai_list)
+ .set { ch_bams_bais }
+
+ SMNCOPYNUMBERCALLER (
+ ch_bams_bais
+ )
+ ch_versions = ch_versions.mix(SMNCOPYNUMBERCALLER.out.versions)
+
+ // ped correspondence, sex check, ancestry check
+ PEDDY_CHECK (
+ CALL_SNV.out.genome_vcf.join(CALL_SNV.out.genome_tabix, failOnMismatch:true, failOnDuplicate:true),
+ ch_pedfile
+ )
+ ch_versions = ch_versions.mix(PEDDY_CHECK.out.versions)
+
+ // GENS
+ if (params.gens_switch) {
+ GENS (
+ ch_mapped.genome_bam_bai,
+ CALL_SNV.out.vcf,
+ ch_genome_fasta,
+ ch_genome_fai,
+ file(params.gens_interval_list),
+ file(params.gens_pon),
+ file(params.gens_gnomad_pos),
+ ch_case_info,
+ ch_genome_dictionary
+ )
+ ch_versions = ch_versions.mix(GENS.out.versions)
+ }
+
//
// MODULE: Pipeline reporting
//
From d711b05f7babeb25292fc9c4d2b890f2eb65b6d9 Mon Sep 17 00:00:00 2001
From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com>
Date: Sat, 26 Aug 2023 11:55:35 +0200
Subject: [PATCH 09/22] update configs
---
conf/modules/align_MT.config | 8 +-
conf/modules/align_and_call_MT.config | 126 -------------------
conf/modules/analyse_MT.config | 31 -----
conf/modules/annotate_consequence_pli.config | 32 ++++-
conf/modules/annotate_genome_snvs.config | 8 +-
conf/modules/annotate_mt_snvs.config | 4 +-
conf/modules/call_snv.config | 10 ++
conf/modules/call_snv_deepvariant.config | 14 ---
conf/modules/call_snv_sentieon.config | 14 ---
conf/modules/call_structural_variants.config | 4 +-
conf/modules/call_sv_MT.config | 4 +-
conf/modules/convert_mt_bam_to_fastq.config | 6 +-
conf/modules/merge_annotate_MT.config | 88 -------------
conf/modules/postprocess_MT_calls.config | 15 ++-
conf/modules/prepare_references.config | 10 +-
conf/modules/rank_variants.config | 52 ++++++++
conf/modules/raredisease.config | 33 +++--
conf/test.config | 1 +
conf/test_full.config | 2 +-
docs/output.md | 8 +-
main.nf | 1 +
nextflow.config | 5 +-
nextflow_schema.json | 11 +-
workflows/raredisease.nf | 6 +-
24 files changed, 162 insertions(+), 331 deletions(-)
delete mode 100644 conf/modules/align_and_call_MT.config
delete mode 100644 conf/modules/analyse_MT.config
delete mode 100644 conf/modules/merge_annotate_MT.config
diff --git a/conf/modules/align_MT.config b/conf/modules/align_MT.config
index a01bfc60..35d91ee3 100644
--- a/conf/modules/align_MT.config
+++ b/conf/modules/align_MT.config
@@ -18,13 +18,13 @@
process {
withName: '.*ALIGN_MT:BWAMEM2_MEM_MT' {
- ext.when = { !params.skip_mt_analysis && !(params.analysis_type == "wes") && params.aligner == "bwamem2" }
+ ext.when = { !(params.analysis_type == "wes") && params.aligner == "bwamem2" }
ext.args = { "-M -K 100000000 -R ${meta.read_group}" }
}
withName: '.*ALIGN_MT:SENTIEON_BWAMEM_MT' {
ext.args = { "-M -K 10000000 -R ${meta.read_group}" }
- ext.when = { !params.skip_mt_analysis && !(params.analysis_type == "wes") && params.aligner == "sentieon" }
+ ext.when = { !(params.analysis_type == "wes") && params.aligner == "sentieon" }
ext.prefix = { "${meta.id}.sorted" }
}
@@ -61,13 +61,13 @@ process {
process {
withName: '.*ALIGN_MT_SHIFT:BWAMEM2_MEM_MT' {
- ext.when = { !params.skip_mt_analysis && !(params.analysis_type == "wes") && params.aligner == "bwamem2" }
+ ext.when = { !(params.analysis_type == "wes") && params.aligner == "bwamem2" }
ext.args = { "-M -K 100000000 -R ${meta.read_group}" }
}
withName: '.*ALIGN_MT_SHIFT:SENTIEON_BWAMEM_MT' {
ext.args = { "-M -K 10000000 -R ${meta.read_group}" }
- ext.when = { !params.skip_mt_analysis && !(params.analysis_type == "wes") && params.aligner == "sentieon" }
+ ext.when = { !(params.analysis_type == "wes") && params.aligner == "sentieon" }
ext.prefix = { "${meta.id}.sorted" }
}
diff --git a/conf/modules/align_and_call_MT.config b/conf/modules/align_and_call_MT.config
deleted file mode 100644
index 1a2993f5..00000000
--- a/conf/modules/align_and_call_MT.config
+++ /dev/null
@@ -1,126 +0,0 @@
-/*
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
- Config file for defining DSL2 per module options and publishing paths
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
- Available keys to override module options:
- ext.args = Additional arguments appended to command in module.
- ext.args2 = Second set of arguments appended to command in module (multi-tool modules).
- ext.args3 = Third set of arguments appended to command in module (multi-tool modules).
- ext.prefix = File name prefix for output files.
- ext.when = Conditional clause
-----------------------------------------------------------------------------------------
-*/
-
-//
-// ANALYSE_MT:ALIGN_AND_CALL_MT
-//
-
-process {
-
- withName: '.*ANALYSE_MT:ALIGN_AND_CALL_MT:BWAMEM2_MEM_MT' {
- ext.when = { !params.skip_mt_analysis && !(params.analysis_type == "wes") && params.aligner == "bwamem2" }
- ext.args = { "-M -K 100000000 -R ${meta.read_group}" }
- }
-
- withName: '.*ANALYSE_MT:ALIGN_AND_CALL_MT:SENTIEON_BWAMEM_MT' {
- ext.args = { "-M -K 10000000 -R ${meta.read_group}" }
- ext.when = { !params.skip_mt_analysis && !(params.analysis_type == "wes") && params.aligner == "sentieon" }
- ext.prefix = { "${meta.id}.sorted" }
- }
-
- withName: '.*ANALYSE_MT:ALIGN_AND_CALL_MT:GATK4_MERGEBAMALIGNMENT_MT' {
- ext.args = '--CREATE_INDEX true --MAX_GAPS -1 --SORT_ORDER queryname --INCLUDE_SECONDARY_ALIGNMENTS false --PAIRED_RUN false --VALIDATION_STRINGENCY LENIENT'
- ext.prefix = { "${meta.id}_merged" }
- }
-
- withName: '.*ANALYSE_MT:ALIGN_AND_CALL_MT:PICARD_ADDORREPLACEREADGROUPS_MT' {
- ext.args = { [
- "--VALIDATION_STRINGENCY LENIENT",
- "--RGLB lib",
- "--RGPL ILLUMINA",
- "--RGPU barcode",
- "--RGSM ${meta.id}"
- ].join(' ' ).trim() }
- }
-
- withName: '.*ANALYSE_MT:ALIGN_AND_CALL_MT:PICARD_MARKDUPLICATES_MT' {
- ext.args = '--VALIDATION_STRINGENCY LENIENT --CREATE_INDEX true'
- ext.prefix = { "${meta.id}_markduplicates" }
- }
-
- withName: '.*ANALYSE_MT:ALIGN_AND_CALL_MT:SAMTOOLS_SORT_MT' {
- ext.prefix = { "${meta.id}_sorted" }
- }
-
- withName: '.*ANALYSE_MT:ALIGN_AND_CALL_MT:MT_DELETION' {
- ext.args = '-s --insert-size 16000'
- publishDir = [
- path: { "${params.outdir}/mt_sv" },
- mode: params.publish_dir_mode,
- saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
- ]
- }
-
- withName: '.*ANALYSE_MT:ALIGN_AND_CALL_MT:GATK4_MUTECT2_MT' {
- ext.args = '--mitochondria-mode TRUE'
- }
-
- withName: '.*ANALYSE_MT:ALIGN_AND_CALL_MT:GATK4_FILTERMUTECTCALLS_MT' {
- ext.prefix = { "${meta.id}_filtered" }
- }
-}
-
-//
-// ANALYSE_MT:ALIGN_AND_CALL_MT_SHIFT
-//
-
-process {
-
- withName: '.*ANALYSE_MT:ALIGN_AND_CALL_MT_SHIFT:BWAMEM2_MEM_MT' {
- ext.when = { !params.skip_mt_analysis && !(params.analysis_type == "wes") && params.aligner == "bwamem2" }
- ext.args = { "-M -K 100000000 -R ${meta.read_group}" }
- }
-
- withName: '.*ANALYSE_MT:ALIGN_AND_CALL_MT_SHIFT:SENTIEON_BWAMEM_MT' {
- ext.args = { "-M -K 10000000 -R ${meta.read_group}" }
- ext.when = { !params.skip_mt_analysis && !(params.analysis_type == "wes") && params.aligner == "sentieon" }
- ext.prefix = { "${meta.id}.sorted" }
- }
-
- withName: '.*ANALYSE_MT:ALIGN_AND_CALL_MT_SHIFT:GATK4_MERGEBAMALIGNMENT_MT' {
- ext.args = '--CREATE_INDEX true --MAX_GAPS -1 --SORT_ORDER queryname --INCLUDE_SECONDARY_ALIGNMENTS false --PAIRED_RUN false --VALIDATION_STRINGENCY LENIENT'
- ext.prefix = { "${meta.id}_merged_shifted" }
- }
-
- withName: '.*ANALYSE_MT:ALIGN_AND_CALL_MT_SHIFT:PICARD_ADDORREPLACEREADGROUPS_MT' {
- ext.args = { [
- "--VALIDATION_STRINGENCY LENIENT",
- "--RGLB lib",
- "--RGPL ${params.platform}",
- "--RGPU barcode",
- "--RGSM ${meta.id}"
- ].join(' ' ).trim() }
- }
-
- withName: '.*ANALYSE_MT:ALIGN_AND_CALL_MT_SHIFT:PICARD_MARKDUPLICATES_MT' {
- ext.args = '--VALIDATION_STRINGENCY LENIENT --CREATE_INDEX true'
- ext.prefix = { "${meta.id}_markduplicates_shifted" }
- }
-
- withName: '.*ANALYSE_MT:ALIGN_AND_CALL_MT_SHIFT:SAMTOOLS_SORT_MT' {
- ext.prefix = { "${meta.id}_sorted_shifted" }
- }
-
- withName: '.*ANALYSE_MT:ALIGN_AND_CALL_MT_SHIFT:GATK4_MUTECT2_MT' {
- ext.args = '--mitochondria-mode TRUE'
- }
-
- withName: '.*ANALYSE_MT:ALIGN_AND_CALL_MT_SHIFT:HAPLOCHECK_MT' {
- ext.prefix = { "${meta.id}_shifted" }
- }
-
- withName: '.*ANALYSE_MT:ALIGN_AND_CALL_MT_SHIFT:GATK4_FILTERMUTECTCALLS_MT' {
- ext.prefix = { "${meta.id}_filtered_shifted" }
- }
-
-}
diff --git a/conf/modules/analyse_MT.config b/conf/modules/analyse_MT.config
deleted file mode 100644
index 4ee1b693..00000000
--- a/conf/modules/analyse_MT.config
+++ /dev/null
@@ -1,31 +0,0 @@
-/*
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
- Config file for defining DSL2 per module options and publishing paths
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
- Available keys to override module options:
- ext.args = Additional arguments appended to command in module.
- ext.args2 = Second set of arguments appended to command in module (multi-tool modules).
- ext.args3 = Third set of arguments appended to command in module (multi-tool modules).
- ext.prefix = File name prefix for output files.
- ext.when = Conditional clause
-----------------------------------------------------------------------------------------
-*/
-
-//
-// Liftover
-//
-
-process {
- withName: '.*ANALYSE_MT:.*' {
- ext.when = { !params.skip_mt_analysis && !(params.analysis_type == "wes") }
- publishDir = [
- enabled: false
- ]
- }
-}
-
-process {
- withName: '.*ANALYSE_MT:PICARD_LIFTOVERVCF' {
- ext.prefix = { "${meta.id}_liftover" }
- }
-}
diff --git a/conf/modules/annotate_consequence_pli.config b/conf/modules/annotate_consequence_pli.config
index ca4db826..eed623a6 100644
--- a/conf/modules/annotate_consequence_pli.config
+++ b/conf/modules/annotate_consequence_pli.config
@@ -21,15 +21,15 @@ process {
}
withName: '.*ANN_CSQ_PLI_SV:ADD_MOST_SEVERE_CSQ' {
- ext.prefix = { "${meta.id}_vep_csq" }
+ ext.prefix = { "${meta.id}_genome_vep_csq" }
}
withName: '.*ANN_CSQ_PLI_SV:ADD_MOST_SEVERE_PLI' {
- ext.prefix = { "${meta.id}_vep_csq_pli" }
+ ext.prefix = { "${meta.id}_genome_vep_csq_pli" }
}
withName: '.*ANN_CSQ_PLI_SV:TABIX_BGZIPTABIX' {
- ext.prefix = { "${meta.id}_vep_csq_pli" }
+ ext.prefix = { "${meta.id}_genome_vep_csq_pli" }
publishDir = [
path: { "${params.outdir}/annotate_sv" },
mode: params.publish_dir_mode,
@@ -64,3 +64,29 @@ process {
]
}
}
+
+process {
+ withName: '.*ANN_CSQ_PLI_MT:.*' {
+ ext.when = !params.skip_mt_annotation
+ publishDir = [
+ enabled: false
+ ]
+ }
+
+ withName: '.*ANN_CSQ_PLI_MT:ADD_MOST_SEVERE_CSQ' {
+ ext.prefix = { "${meta.id}_mt_vep_csq" }
+ }
+
+ withName: '.*ANN_CSQ_PLI_MT:ADD_MOST_SEVERE_PLI' {
+ ext.prefix = { "${meta.id}_mt_vep_csq_pli" }
+ }
+
+ withName: '.*ANN_CSQ_PLI_MT:TABIX_BGZIPTABIX' {
+ ext.prefix = { "${meta.id}_mt_vep_csq_pli" }
+ publishDir = [
+ path: { "${params.outdir}/annotate_snv" },
+ mode: params.publish_dir_mode,
+ saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
+ ]
+ }
+}
diff --git a/conf/modules/annotate_genome_snvs.config b/conf/modules/annotate_genome_snvs.config
index 0cbf5496..96503d82 100644
--- a/conf/modules/annotate_genome_snvs.config
+++ b/conf/modules/annotate_genome_snvs.config
@@ -57,7 +57,7 @@ process {
ext.args = { "--euploid" }
tag = {"${meta7.id}"}
publishDir = [
- path: { "${params.outdir}/annotate_snv" },
+ path: { "${params.outdir}/annotate_snv/genome" },
mode: params.publish_dir_mode,
saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
]
@@ -69,7 +69,7 @@ process {
ext.when = { !(workflow.profile.tokenize(',').intersect(['test', 'test_one_sample']).size() >= 1) || workflow.stubRun }
tag = {"${meta6.id}"}
publishDir = [
- path: { "${params.outdir}/annotate_snv" },
+ path: { "${params.outdir}/annotate_snv/genome" },
mode: params.publish_dir_mode,
saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
]
@@ -106,7 +106,7 @@ process {
withName: '.*ANNOTATE_GENOME_SNVS:BCFTOOLS_CONCAT' {
ext.prefix = { "${meta.id}_rohann_vcfanno_filter_vep" }
publishDir = [
- path: { "${params.outdir}/annotate_snv" },
+ path: { "${params.outdir}/annotate_snv/genome" },
mode: params.publish_dir_mode,
saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
]
@@ -114,7 +114,7 @@ process {
withName: '.*ANNOTATE_GENOME_SNVS:TABIX_BCFTOOLS_CONCAT' {
publishDir = [
- path: { "${params.outdir}/annotate_snv" },
+ path: { "${params.outdir}/annotate_snv/genome" },
mode: params.publish_dir_mode,
saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
]
diff --git a/conf/modules/annotate_mt_snvs.config b/conf/modules/annotate_mt_snvs.config
index 68e01af1..40101033 100644
--- a/conf/modules/annotate_mt_snvs.config
+++ b/conf/modules/annotate_mt_snvs.config
@@ -39,7 +39,7 @@ process {
withName: '.*ANNOTATE_MT_SNVS:ZIP_TABIX_HMTNOTE' {
ext.prefix = { "${meta.id}_vep_vcfanno_hmtnote_mt" }
publishDir = [
- path: { "${params.outdir}/annotate_mt" },
+ path: { "${params.outdir}/annotate_snv/mitochondria" },
mode: params.publish_dir_mode,
pattern: "*{vcf.gz,vcf.gz.tbi}",
saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
@@ -57,7 +57,7 @@ process {
withName: '.*ANNOTATE_MT_SNVS:HAPLOGREP2_CLASSIFY_MT' {
ext.prefix = { "${meta.id}_haplogrep" }
publishDir = [
- path: { "${params.outdir}/annotate_mt" },
+ path: { "${params.outdir}/annotate_snv/mitochondria" },
mode: params.publish_dir_mode,
saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
]
diff --git a/conf/modules/call_snv.config b/conf/modules/call_snv.config
index 53cc78b4..17926228 100644
--- a/conf/modules/call_snv.config
+++ b/conf/modules/call_snv.config
@@ -19,4 +19,14 @@ process {
]
}
+ withName: '.*CALL_SNV:GATK4_SELECTVARIANTS' {
+ ext.args = "--exclude-intervals ${params.mito_name}"
+ ext.prefix = { "${meta.id}_nomito" }
+ ext.when = { !params.skip_snv_annotation }
+ publishDir = [
+ path: { "${params.outdir}/call_snv/genome" },
+ mode: params.publish_dir_mode,
+ saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
+ ]
+ }
}
diff --git a/conf/modules/call_snv_deepvariant.config b/conf/modules/call_snv_deepvariant.config
index 0493534e..329bc15f 100644
--- a/conf/modules/call_snv_deepvariant.config
+++ b/conf/modules/call_snv_deepvariant.config
@@ -37,20 +37,6 @@ process {
withName: '.*CALL_SNV_DEEPVARIANT:REMOVE_DUPLICATES_GL' {
ext.args = '--output-type z --rm-dup none'
ext.prefix = { "${meta.id}_split_rmdup" }
- publishDir = [
- enabled: params.skip_mt_analysis,
- path: { "${params.outdir}/call_snv" },
- mode: params.publish_dir_mode,
- saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
- ]
}
- withName: '.*CALL_SNV_DEEPVARIANT:TABIX_GL' {
- publishDir = [
- enabled: params.skip_mt_analysis,
- path: { "${params.outdir}/call_snv" },
- mode: params.publish_dir_mode,
- saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
- ]
- }
}
diff --git a/conf/modules/call_snv_sentieon.config b/conf/modules/call_snv_sentieon.config
index ec4d4480..b4443546 100644
--- a/conf/modules/call_snv_sentieon.config
+++ b/conf/modules/call_snv_sentieon.config
@@ -51,20 +51,6 @@ process {
withName: '.*CALL_SNV:CALL_SNV_SENTIEON:REMOVE_DUPLICATES_SEN' {
ext.args = '--output-type z --rm-dup none'
ext.prefix = { "${meta.id}_split_rmdup" }
- publishDir = [
- enabled: params.skip_mt_analysis,
- path: { "${params.outdir}/call_snv" },
- mode: params.publish_dir_mode,
- saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
- ]
}
- withName: '.*CALL_SNV_SENTIEON:TABIX_SEN' {
- publishDir = [
- enabled: params.skip_mt_analysis,
- path: { "${params.outdir}/call_snv" },
- mode: params.publish_dir_mode,
- saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
- ]
- }
}
diff --git a/conf/modules/call_structural_variants.config b/conf/modules/call_structural_variants.config
index a8c10175..3a59868d 100644
--- a/conf/modules/call_structural_variants.config
+++ b/conf/modules/call_structural_variants.config
@@ -26,7 +26,7 @@ process {
withName: '.*CALL_STRUCTURAL_VARIANTS:SVDB_MERGE' {
ext.args = '--pass_only --same_order'
publishDir = [
- path: { "${params.outdir}/call_sv" },
+ path: { "${params.outdir}/call_sv/genome" },
mode: params.publish_dir_mode,
saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
]
@@ -34,7 +34,7 @@ process {
withName: '.*CALL_STRUCTURAL_VARIANTS:TABIX_TABIX' {
publishDir = [
- path: { "${params.outdir}/call_sv" },
+ path: { "${params.outdir}/call_sv/genome" },
mode: params.publish_dir_mode,
saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
]
diff --git a/conf/modules/call_sv_MT.config b/conf/modules/call_sv_MT.config
index 288ca425..44b0e581 100644
--- a/conf/modules/call_sv_MT.config
+++ b/conf/modules/call_sv_MT.config
@@ -20,7 +20,7 @@ process {
withName: '.*CALL_SV_MT:MT_DELETION' {
ext.args = '-s --insert-size 16000'
publishDir = [
- path: { "${params.outdir}/mt_sv" },
+ path: { "${params.outdir}/call_sv/mitochondria" },
mode: params.publish_dir_mode,
saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
]
@@ -28,7 +28,7 @@ process {
withName: '.*CALL_SV_MT:EKLIPSE' {
publishDir = [
- path: { "${params.outdir}/mt_sv" },
+ path: { "${params.outdir}/call_sv/mitochondria" },
mode: params.publish_dir_mode,
saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
]
diff --git a/conf/modules/convert_mt_bam_to_fastq.config b/conf/modules/convert_mt_bam_to_fastq.config
index 9a683b6e..0365c452 100644
--- a/conf/modules/convert_mt_bam_to_fastq.config
+++ b/conf/modules/convert_mt_bam_to_fastq.config
@@ -17,7 +17,7 @@
process {
- withName: '.*ANALYSE_MT:CONVERT_MT_BAM_TO_FASTQ:GATK4_PRINTREADS_MT' {
+ withName: '.*ALIGN:CONVERT_MT_BAM_TO_FASTQ:GATK4_PRINTREADS_MT' {
beforeScript = {"mkdir ./tmp"}
ext.args = [
"-L ${params.mito_name}",
@@ -26,11 +26,11 @@ process {
].join(" ").trim()
}
- withName: '.*ANALYSE_MT:CONVERT_MT_BAM_TO_FASTQ:GATK4_REVERTSAM_MT' {
+ withName: '.*ALIGN:CONVERT_MT_BAM_TO_FASTQ:GATK4_REVERTSAM_MT' {
ext.args = '--OUTPUT_BY_READGROUP false --VALIDATION_STRINGENCY LENIENT --ATTRIBUTE_TO_CLEAR FT --ATTRIBUTE_TO_CLEAR CO --SORT_ORDER queryname --RESTORE_ORIGINAL_QUALITIES false'
}
- withName: '.*ANALYSE_MT:CONVERT_MT_BAM_TO_FASTQ:GATK4_SAMTOFASTQ_MT' {
+ withName: '.*ALIGN:CONVERT_MT_BAM_TO_FASTQ:GATK4_SAMTOFASTQ_MT' {
ext.args = '--VALIDATION_STRINGENCY LENIENT'
}
}
diff --git a/conf/modules/merge_annotate_MT.config b/conf/modules/merge_annotate_MT.config
deleted file mode 100644
index ae2601b6..00000000
--- a/conf/modules/merge_annotate_MT.config
+++ /dev/null
@@ -1,88 +0,0 @@
-/*
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
- Config file for defining DSL2 per module options and publishing paths
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
- Available keys to override module options:
- ext.args = Additional arguments appended to command in module.
- ext.args2 = Second set of arguments appended to command in module (multi-tool modules).
- ext.args3 = Third set of arguments appended to command in module (multi-tool modules).
- ext.prefix = File name prefix for output files.
- ext.when = Conditional clause
-----------------------------------------------------------------------------------------
-*/
-
-//
-// Mitochondrial annotation options
-//
-
-process {
-
- withName: '.*ANALYSE_MT:MERGE_ANNOTATE_MT:GATK4_MERGEVCFS_LIFT_UNLIFT_MT' {
- ext.prefix = { "${meta.id}_merged" }
- }
-
- withName: '.*ANALYSE_MT:MERGE_ANNOTATE_MT:GATK4_VARIANTFILTRATION_MT' {
- ext.prefix = { "${meta.id}_filt" }
- }
-
- withName: '.*ANALYSE_MT:MERGE_ANNOTATE_MT:SPLIT_MULTIALLELICS_MT' {
- ext.args = '--output-type z --multiallelics -both'
- ext.prefix = { "${meta.id}_split" }
- }
-
- withName: '.*ANALYSE_MT:MERGE_ANNOTATE_MT:REMOVE_DUPLICATES_MT' {
- ext.args = '--output-type z --rm-dup none'
- ext.prefix = { "${meta.id}_split_rmdup" }
- }
-
- withName: '.*ANALYSE_MT:MERGE_ANNOTATE_MT:BCFTOOLS_MERGE_MT' {
- ext.args = '--output-type z'
- ext.prefix = { "${meta.id}_merge_mt" }
- }
-
- withName: '.*ANALYSE_MT:MERGE_ANNOTATE_MT:ENSEMBLVEP_MT' {
- ext.args = [
- '--dir_plugins vep_cache/Plugins',
- '--plugin LoFtool,vep_cache/LoFtool_scores.txt',
- '--plugin pLI,vep_cache/pLI_values_107.txt',
- '--plugin SpliceAI,snv=vep_cache/spliceai_21_scores_raw_snv_-v1.3-.vcf.gz,indel=vep_cache/spliceai_21_scores_raw_snv_-v1.3-.vcf.gz',
- '--plugin MaxEntScan,vep_cache/fordownload,SWA,NCSS',
- '--distance 0',
- '--buffer_size 20000',
- '--format vcf --fork 4 --max_sv_size 248956422',
- '--appris --biotype --cache --canonical --ccds --compress_output bgzip',
- '--domains --exclude_predicted --force_overwrite',
- '--hgvs --humdiv --no_progress --no_stats --numbers',
- '--merged --polyphen p --protein --offline --regulatory --sift p --symbol --tsl --vcf',
- '--uniprot'
- ].join(' ')
- }
-
- withName: '.*ANALYSE_MT:MERGE_ANNOTATE_MT:ZIP_TABIX_HMTNOTE' {
- ext.prefix = { "${meta.id}_vep_vcfanno_hmtnote_mt" }
- publishDir = [
- path: { "${params.outdir}/annotate_mt" },
- mode: params.publish_dir_mode,
- pattern: "*{vcf.gz,vcf.gz.tbi}",
- saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
- ]
- }
-
- withName: '.*ANALYSE_MT:MERGE_ANNOTATE_MT:HMTNOTE_ANNOTATE' {
- ext.prefix = { "${meta.id}_vep_vcfanno_hmtnote_mt" }
- ext.args = '--offline'
- publishDir = [
- enabled: false
- ]
- }
-
- withName: '.*ANALYSE_MT:MERGE_ANNOTATE_MT:HAPLOGREP2_CLASSIFY_MT' {
- ext.prefix = { "${meta.id}_haplogrep" }
- publishDir = [
- path: { "${params.outdir}/annotate_mt" },
- mode: params.publish_dir_mode,
- saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
- ]
- }
-
-}
diff --git a/conf/modules/postprocess_MT_calls.config b/conf/modules/postprocess_MT_calls.config
index 3119012e..c97ea43d 100644
--- a/conf/modules/postprocess_MT_calls.config
+++ b/conf/modules/postprocess_MT_calls.config
@@ -37,7 +37,20 @@ process {
withName: '.*POSTPROCESS_MT_CALLS:BCFTOOLS_MERGE_MT' {
ext.args = '--output-type z'
- ext.prefix = { "${meta.id}_merge_mt" }
+ ext.prefix = { "${meta.id}_mitochondria" }
+ publishDir = [
+ path: { "${params.outdir}/call_snv/mitochondria" },
+ mode: params.publish_dir_mode,
+ saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
+ ]
+ }
+
+ withName: '.*POSTPROCESS_MT_CALLS:TABIX_TABIX_MERGE' {
+ publishDir = [
+ path: { "${params.outdir}/call_snv/mitochondria" },
+ mode: params.publish_dir_mode,
+ saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
+ ]
}
}
diff --git a/conf/modules/prepare_references.config b/conf/modules/prepare_references.config
index 2532052d..101824ef 100644
--- a/conf/modules/prepare_references.config
+++ b/conf/modules/prepare_references.config
@@ -30,7 +30,7 @@ process {
}
withName: '.*PREPARE_REFERENCES:BWAMEM2_INDEX_MT_SHIFT' {
- ext.when = { !params.skip_mt_analysis && !(params.analysis_type == "wes") && params.aligner == "bwamem2"}
+ ext.when = { !(params.analysis_type == "wes") && params.aligner == "bwamem2"}
}
withName: '.*PREPARE_REFERENCES:SENTIEON_BWAINDEX_GENOME' {
@@ -38,7 +38,7 @@ process {
}
withName: '.*PREPARE_REFERENCES:SENTIEON_BWAINDEX_MT_SHIFT' {
- ext.when = { !params.skip_mt_analysis && !(params.analysis_type == "wes") && params.aligner == "sentieon"}
+ ext.when = { !(params.analysis_type == "wes") && params.aligner == "sentieon"}
}
withName: '.*PREPARE_REFERENCES:BWA_INDEX_GENOME' {
@@ -51,11 +51,11 @@ process {
withName: '.*PREPARE_REFERENCES:SAMTOOLS_EXTRACT_MT' {
ext.args = { " ${params.mito_name} -o ${meta.id}_mt.fa" }
- ext.when = {!params.mt_fasta && !params.skip_mt_analysis}
+ ext.when = {!params.mt_fasta}
}
withName: '.*PREPARE_REFERENCES:SAMTOOLS_FAIDX_MT_SHIFT' {
- ext.when = { !params.skip_mt_analysis && !(params.analysis_type == "wes")}
+ ext.when = { !(params.analysis_type == "wes")}
}
withName: '.*PREPARE_REFERENCES:GATK_SD' {
@@ -67,7 +67,7 @@ process {
}
withName: '.*PREPARE_REFERENCES:GATK_SD_MT_SHIFT' {
- ext.when = { !params.skip_mt_analysis && !(params.analysis_type == "wes")}
+ ext.when = { !(params.analysis_type == "wes")}
}
withName: '.*PREPARE_REFERENCES:GET_CHROM_SIZES' {
diff --git a/conf/modules/rank_variants.config b/conf/modules/rank_variants.config
index 64b7a53b..f5882da1 100644
--- a/conf/modules/rank_variants.config
+++ b/conf/modules/rank_variants.config
@@ -111,3 +111,55 @@ process {
]
}
}
+
+//
+// Score and rank MT SNVs
+//
+
+process {
+ withName: '.*RANK_VARIANTS_MT:.*' {
+ ext.when = !params.skip_snv_annotation
+ publishDir = [
+ enabled: false
+ ]
+ }
+
+ withName: '.*RANK_VARIANTS_MT:GENMOD_ANNOTATE' {
+ ext.args = {
+ (params.genome == 'GRCh37') ? '--annotate_regions --genome-build 37' : '--annotate_regions --genome-build 38'
+ }
+ }
+
+ withName: '.*RANK_VARIANTS_MT:GENMOD_MODELS' {
+ ext.args = " --whole_gene "
+ }
+
+ withName: '.*RANK_VARIANTS_MT:GENMOD_SCORE' {
+ ext.args = " --rank_results "
+ }
+
+ withName: '.*RANK_VARIANTS_MT:GENMOD_COMPOUND' {
+ ext.prefix = { "${meta.id}_ranked_mt" }
+ }
+
+ withName: '.*RANK_VARIANTS_MT:BCFTOOLS_SORT' {
+ ext.when = false
+ }
+
+ withName: '.*RANK_VARIANTS_MT:TABIX_BGZIP' {
+ ext.prefix = { "${meta.id}_ranked_mt" }
+ publishDir = [
+ path: { "${params.outdir}/rank_and_filter" },
+ mode: params.publish_dir_mode,
+ saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
+ ]
+ }
+
+ withName: '.*RANK_VARIANTS_MT:TABIX_TABIX' {
+ publishDir = [
+ path: { "${params.outdir}/rank_and_filter" },
+ mode: params.publish_dir_mode,
+ saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
+ ]
+ }
+}
diff --git a/conf/modules/raredisease.config b/conf/modules/raredisease.config
index c4bab3d1..f77e8a66 100644
--- a/conf/modules/raredisease.config
+++ b/conf/modules/raredisease.config
@@ -40,23 +40,6 @@ process {
}
}
-//
-// Remove mitochondrial variants
-//
-
-process {
- withName: '.*RAREDISEASE:GATK4_SELECTVARIANTS' {
- ext.args = "--exclude-intervals ${params.mito_name}"
- ext.prefix = { "${meta.id}_nomito" }
- ext.when = { !params.skip_snv_annotation }
- publishDir = [
- enabled: !params.skip_mt_analysis,
- path: { "${params.outdir}/call_snv" },
- mode: params.publish_dir_mode,
- saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
- ]
- }
-}
//
// Merge mitochondrial and genomic vcfs
@@ -134,6 +117,22 @@ process {
}
}
+process {
+ withName: '.*FILTERVEP_MT' {
+ ext.prefix = { "${meta.id}_clinical_snv" }
+ ext.args = { "--filter \"HGNC_ID in ${feature_file}\"" }
+ }
+
+ withName: '.*BGZIPTABIX_MT' {
+ ext.prefix = { "${meta.id}_clinical_snv" }
+ publishDir = [
+ path: { "${params.outdir}/rank_and_filter" },
+ mode: params.publish_dir_mode,
+ saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
+ ]
+ }
+}
+
//
// Multiqc options
//
diff --git a/conf/test.config b/conf/test.config
index 9a13845b..83edbe24 100644
--- a/conf/test.config
+++ b/conf/test.config
@@ -38,6 +38,7 @@ params {
known_dbsnp = "https://raw.githubusercontent.com/nf-core/test-datasets/raredisease/reference/dbsnp_-138-.vcf.gz"
ml_model = "https://s3.amazonaws.com/sentieon-release/other/SentieonDNAscopeModel1.0.model"
reduced_penetrance = "https://raw.githubusercontent.com/nf-core/test-datasets/raredisease/reference/reduced_penetrance.tsv"
+ score_config_mt = "https://raw.githubusercontent.com/nf-core/test-datasets/raredisease/reference/rank_model_snv.ini"
score_config_snv = "https://raw.githubusercontent.com/nf-core/test-datasets/raredisease/reference/rank_model_snv.ini"
score_config_sv = "https://raw.githubusercontent.com/nf-core/test-datasets/raredisease/reference/rank_model_sv.ini"
svdb_query_dbs = "https://raw.githubusercontent.com/nf-core/test-datasets/raredisease/reference/svdb_querydb_files.csv"
diff --git a/conf/test_full.config b/conf/test_full.config
index dcf02d95..587b8600 100644
--- a/conf/test_full.config
+++ b/conf/test_full.config
@@ -25,7 +25,7 @@ params {
genome = 'GRCh38'
// Skip annotation
- skip_mt_analysis = true
+ skip_mt_annotation = true
skip_snv_annotation = true
skip_sv_annotation = true
}
diff --git a/docs/output.md b/docs/output.md
index 6d07a3bd..6cfac5e6 100644
--- a/docs/output.md
+++ b/docs/output.md
@@ -98,7 +98,7 @@ The pipeline is built using [Nextflow](https://www.nextflow.io/) and processes d
- `{outputdir}/alignment/`
- `*.bam|*.cram`: Alignment file in bam/cram format.
- `*.bai|*.crai`: Index of the corresponding bam/cram file.
- - `*.txt`: Text file containing the dedup metrics.
+ - `*.metrics`: Text file containing the dedup metrics.
### Quality control and reporting
@@ -222,8 +222,6 @@ Results generated by MultiQC collate pipeline QC from supported tools e.g. FastQ
Output files
- `call_snv/`
- - `_split_rmdup.vcf.gz`: normalized vcf file containing MT variants. Only published when `--skip_mt_analysis` is set.
- - `_split_rmdup.vcf.gz.tbi`: index of the normalized vcf file containing MT variants. Only published when `--skip_mt_analysis` is set.
- `_nomito.selectvariants.vcf.gz`: normalized vcf file containing no MT variants.
- `_nomito.selectvariants.vcf.gz.tbi`: index of the vcf file containing no MT variants.
@@ -237,8 +235,6 @@ The pipeline performs variant calling using [Sentieon DNAscope](https://support.
Output files
- `call_snv/`
- - `_split_rmdup.vcf.gz`: normalized vcf file containing MT variants. Only published when `--skip_mt_analysis` is set.
- - `_split_rmdup.vcf.gz.tbi`: index of the normalized vcf file containing MT variants. Only published when `--skip_mt_analysis` is set.
- `_nomito.selectvariants.vcf.gz`: normalized vcf file containing no MT variants.
- `_nomito.selectvariants.vcf.gz.tbi`: index of the vcf file containing no MT variants.
@@ -326,7 +322,7 @@ We recommend using vcfanno to annotate SNVs with precomputed CADD scores (files
Based on VEP annotations, custom scripts used by the pipeline further annotate each record with the most severe consequence, and pli scores.
-> **NB**: Output files described below include mitochondrial annotations only if --skip_mt_analysis is set to true.
+> **NB**: Output files described below include mitochondrial annotations only if --skip_mt_annotation is set to true.
Output files
diff --git a/main.nf b/main.nf
index fcce4cc7..12cdadc0 100644
--- a/main.nf
+++ b/main.nf
@@ -39,6 +39,7 @@ params.ploidy_model = WorkflowMain.getGenomeAttribute(params,
params.reduced_penetrance = WorkflowMain.getGenomeAttribute(params, 'reduced_penetrance')
params.readcount_intervals = WorkflowMain.getGenomeAttribute(params, 'readcount_intervals')
params.sequence_dictionary = WorkflowMain.getGenomeAttribute(params, 'sequence_dictionary')
+params.score_config_mt = WorkflowMain.getGenomeAttribute(params, 'score_config_mt')
params.score_config_snv = WorkflowMain.getGenomeAttribute(params, 'score_config_snv')
params.score_config_sv = WorkflowMain.getGenomeAttribute(params, 'score_config_sv')
params.svdb_query_dbs = WorkflowMain.getGenomeAttribute(params, 'svdb_query_dbs')
diff --git a/nextflow.config b/nextflow.config
index 7c494c77..cbcdd6a6 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -27,7 +27,7 @@ params {
skip_cnv_calling = false
skip_snv_annotation = false
skip_sv_annotation = false
- skip_mt_analysis = false
+ skip_mt_annotation = false
gens_switch = false
cadd_resources = null
platform = 'illumina'
@@ -280,7 +280,6 @@ manifest {
includeConfig 'conf/modules/raredisease.config'
includeConfig 'conf/modules/align.config'
-includeConfig 'conf/modules/analyse_MT.config'
includeConfig 'conf/modules/align_MT.config'
includeConfig 'conf/modules/call_snv_MT.config'
includeConfig 'conf/modules/call_sv_MT.config'
@@ -290,7 +289,6 @@ includeConfig 'conf/modules/postprocess_MT_calls.config'
includeConfig 'conf/modules/call_structural_variants.config'
includeConfig 'conf/modules/annotate_genome_snvs.config'
includeConfig 'conf/modules/annotate_structural_variants.config'
-includeConfig 'conf/modules/align_and_call_MT.config'
includeConfig 'conf/modules/align_bwamem2.config'
includeConfig 'conf/modules/align_sentieon.config'
includeConfig 'conf/modules/annotate_consequence_pli.config'
@@ -301,7 +299,6 @@ includeConfig 'conf/modules/call_sv_manta.config'
includeConfig 'conf/modules/call_sv_tiddit.config'
includeConfig 'conf/modules/convert_mt_bam_to_fastq.config'
includeConfig 'conf/modules/gens.config'
-includeConfig 'conf/modules/merge_annotate_MT.config'
includeConfig 'conf/modules/prepare_references.config'
includeConfig 'conf/modules/qc_bam.config'
includeConfig 'conf/modules/rank_variants.config'
diff --git a/nextflow_schema.json b/nextflow_schema.json
index f5216d76..27a3365e 100644
--- a/nextflow_schema.json
+++ b/nextflow_schema.json
@@ -266,6 +266,13 @@
"help_text": "The saved references can be used for future pipeline runs, reducing processing times.",
"fa_icon": "fas fa-save"
},
+ "score_config_mt": {
+ "type": "string",
+ "exists": true,
+ "format": "path",
+ "fa_icon": "fas fa-file",
+ "description": "MT rank model config file for genmod."
+ },
"score_config_snv": {
"type": "string",
"exists": true,
@@ -388,9 +395,9 @@
"description": "Specifies whether or not to skip CNV calling.",
"fa_icon": "fas fa-book"
},
- "skip_mt_analysis": {
+ "skip_mt_annotation": {
"type": "boolean",
- "description": "Specifies whether or not to skip the subworkflow that analyses mitochondrial genome separate from the nuclear genome.",
+ "description": "Specifies whether or not to skip annotation of mitochondrial variants.",
"fa_icon": "fas fa-toggle-on"
},
"skip_snv_annotation": {
diff --git a/workflows/raredisease.nf b/workflows/raredisease.nf
index 2a5fb036..327aced4 100644
--- a/workflows/raredisease.nf
+++ b/workflows/raredisease.nf
@@ -42,7 +42,7 @@ if (!params.skip_sv_annotation) {
mandatoryParams += ["genome", "svdb_query_dbs", "vep_cache", "vep_cache_version", "score_config_sv"]
}
-if (!params.skip_mt_analysis) {
+if (!params.skip_mt_annotation) {
mandatoryParams += ["genome", "mito_name", "vcfanno_resources", "vcfanno_toml", "vep_cache_version", "vep_cache"]
}
@@ -250,6 +250,8 @@ workflow RAREDISEASE {
: ( ch_references.readcount_intervals ?: Channel.empty() )
ch_reduced_penetrance = params.reduced_penetrance ? Channel.fromPath(params.reduced_penetrance).collect()
: Channel.value([])
+ ch_score_config_mt = params.score_config_mt ? Channel.fromPath(params.score_config_mt).collect()
+ : Channel.value([])
ch_score_config_snv = params.score_config_snv ? Channel.fromPath(params.score_config_snv).collect()
: Channel.value([])
ch_score_config_sv = params.score_config_sv ? Channel.fromPath(params.score_config_sv).collect()
@@ -507,7 +509,7 @@ workflow RAREDISEASE {
ANN_CSQ_PLI_MT.out.vcf_ann,
ch_pedfile,
ch_reduced_penetrance,
- ch_score_config_snv
+ ch_score_config_mt
)
ch_versions = ch_versions.mix(RANK_VARIANTS_MT.out.versions)
From 0c8c8709ad59678b6166b78944fb2a540843c5f4 Mon Sep 17 00:00:00 2001
From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com>
Date: Sat, 26 Aug 2023 11:59:03 +0200
Subject: [PATCH 10/22] update raredisease config
---
conf/modules/raredisease.config | 17 ++++-------------
1 file changed, 4 insertions(+), 13 deletions(-)
diff --git a/conf/modules/raredisease.config b/conf/modules/raredisease.config
index e2300b5c..4531294c 100644
--- a/conf/modules/raredisease.config
+++ b/conf/modules/raredisease.config
@@ -51,17 +51,6 @@ process {
}
}
-//
-// SENTIEON_TNSCOPE_MT_CALL
-//
-
-process {
- withName: '.*SENTIEON_TNSCOPE' {
- ext.args = { (params.genome == "GRCh37") ? " --interval MT " : "--interval chrM" }
- ext.args2 = " --min_init_normal_lod 0,5 --min_normal_lod 2,0 --min_init_tumor_lod 1,0 --min_tumor_lod 2,8 --trim_soft_clip "
- ext.when = params.variant_caller.equals("sentieon")
- }
-}
//
// Smncopynumbercaller options
@@ -141,12 +130,14 @@ process {
process {
withName: '.*FILTERVEP_MT' {
- ext.prefix = { "${meta.id}_clinical_snv" }
+ ext.when = !params.skip_vep_filter
+ ext.prefix = { "${meta.id}_clinical_mt" }
ext.args = { "--filter \"HGNC_ID in ${feature_file}\"" }
}
withName: '.*BGZIPTABIX_MT' {
- ext.prefix = { "${meta.id}_clinical_snv" }
+ ext.when = !params.skip_vep_filter
+ ext.prefix = { "${meta.id}_clinical_mt" }
publishDir = [
path: { "${params.outdir}/rank_and_filter" },
mode: params.publish_dir_mode,
From 37be3a76a87f11a960ceede7f8ba98ae13fd4879 Mon Sep 17 00:00:00 2001
From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com>
Date: Sat, 26 Aug 2023 12:11:54 +0200
Subject: [PATCH 11/22] remove skip_mt_analysis
---
nextflow.config | 1 -
1 file changed, 1 deletion(-)
diff --git a/nextflow.config b/nextflow.config
index b428710e..294fdbe8 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -29,7 +29,6 @@ params {
skip_sv_annotation = false
skip_mt_annotation = false
skip_vep_filter = false
- skip_mt_analysis = false
gens_switch = false
cadd_resources = null
platform = 'illumina'
From 637692ef5ff88025da858a63d218bbb53972eeae Mon Sep 17 00:00:00 2001
From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com>
Date: Sat, 26 Aug 2023 17:02:49 +0200
Subject: [PATCH 12/22] update output
---
docs/output.md | 44 +++++++-----
nextflow.config | 30 ++++----
subworkflows/local/analyse_MT.nf | 118 -------------------------------
workflows/raredisease.nf | 1 -
4 files changed, 42 insertions(+), 151 deletions(-)
delete mode 100644 subworkflows/local/analyse_MT.nf
diff --git a/docs/output.md b/docs/output.md
index 8cc6e4ab..2a7949dc 100644
--- a/docs/output.md
+++ b/docs/output.md
@@ -221,7 +221,7 @@ Results generated by MultiQC collate pipeline QC from supported tools e.g. FastQ
Output files
-- `call_snv/`
+- `call_snv/genome`
- `_nomito.selectvariants.vcf.gz`: normalized vcf file containing no MT variants.
- `_nomito.selectvariants.vcf.gz.tbi`: index of the vcf file containing no MT variants.
@@ -234,7 +234,7 @@ The pipeline performs variant calling using [Sentieon DNAscope](https://support.
Output files
-- `call_snv/`
+- `call_snv/genome`
- `_nomito.selectvariants.vcf.gz`: normalized vcf file containing no MT variants.
- `_nomito.selectvariants.vcf.gz.tbi`: index of the vcf file containing no MT variants.
@@ -261,7 +261,7 @@ The pipeline performs variant calling using [Sentieon DNAscope](https://support.
Output files
-- `call_sv/`
+- `call_sv/genome`
- `_sv_merge.vcf.gz`: file containing the merged variant calls.
- `_sv_merge.vcf.gz.tbi`: index of the file containing the merged variant calls.
@@ -322,12 +322,12 @@ We recommend using vcfanno to annotate SNVs with precomputed CADD scores (files
Based on VEP annotations, custom scripts used by the pipeline further annotate each record with the most severe consequence, and pli scores.
-> **NB**: Output files described below include mitochondrial annotations only if --skip_mt_annotation is set to true.
+> **NB**: Output files described below do not include mitochondrial annotations only if --skip_mt_annotation is set to true.
Output files
-- `annotate_snv/`
+- `annotate_snv/genome`
- `_rohann_vcfanno_filter_vep.vcf.gz`: file containing bcftools roh, vcfanno, and vep annotations.
- `_rohann_vcfanno_filter_vep.vcf.gz.tbi`: index of the file containing bcftools roh, vcfanno, and vep annotations.
- `_vep_csq_pli.vcf.gz`: file containing bcftools roh, vcfanno, vep, consequence and pli annotations.
@@ -346,9 +346,9 @@ Based on VEP annotations, custom scripts used by the pipeline further annotate e
Output files
-- `annotate_snv/*sites_chromograph`
+- `annotate_snv/genome/*sites_chromograph`
- `_rohann_vcfanno_upd_sites_.png`: file containing a plot showing upd sites across chromosomes.
-- `annotate_snv/*regions_chromograph`
+- `annotate_snv/genome/*regions_chromograph`
- `_rohann_vcfanno_upd_regions_.png`: file containing a plot showing upd regions across chromosomes.
@@ -376,7 +376,7 @@ Based on VEP annotations, custom scripts used by the pipeline further annotate e
### Mitochondrial analysis
-Mitochondrial analysis is run by default, to turn it off set `--skip_mt_analysis` to true.
+Mitochondrial analysis is run by default. If you want to turn off annotations set `--skip_mt_annotation` to true.
#### Alignment and variant calling
@@ -388,6 +388,10 @@ The pipeline for mitochondrial variant discovery, using Mutect2, uses a high sen
[MT deletion script](https://github.com/dnil/mitosign/blob/master/run_mt_del_check.sh) lists the fraction of mitochondrially aligning read pairs (per 1000) that appear discordant, as defined by an insert size of more than 1.2 kb (and less than 15 kb due to the circular nature of the genome) using samtools.
+- `call_sv/mitochondria`
+ - `_svdbquery_vep.vcf.gz`: file containing svdb query, and vep annotations.
+ - `_svdbquery_vep.vcf.gz.tbi`: index of the file containing bcftools roh, vcfanno, and vep annotations.
+
#### Annotation:
##### HaploGrep2
@@ -397,7 +401,7 @@ The pipeline for mitochondrial variant discovery, using Mutect2, uses a high sen
Output files
-- `annotate_mt/`
+- `annotate_snv/mitochondria`
- `_haplogrep.txt`: file containing haplogroup information.
@@ -423,9 +427,9 @@ We recommend using vcfanno to annotate SNVs with precomputed CADD scores (files
Output files
-- `annotate_mt/`
- - `_vep_vcfanno_mt.vcf.gz`: file containing mitochondrial annotations.
- - `_vep_vcfanno_mt.vcf.gz.tbi`: index of the file containing mitochondrial annotations.
+- `annotate_snv/mitochondria`
+ - `_vep_vcfanno_hmtnote_mt.vcf.gz`: file containing mitochondrial annotations.
+ - `_vep_vcfanno_hmtnote_mt.vcf.gz.tbi`: index of the file containing mitochondrial annotations.
@@ -439,12 +443,18 @@ We recommend using vcfanno to annotate SNVs with precomputed CADD scores (files
Output files
- `rank_and_filter/`
- - `_clinical_snv.ann_filter.vcf.gz`: file containing clinically relevant SNVs.
- - `_clinical_sv.ann_filter.vcf.gz`: file containing clinically relevant SVs.
+ - `_clinical_mt.vcf.gz`: file containing clinically relevant mitochondrial SNVs.
+ - `_clinical_mt.vcf.gz.tbi`: index of the file containing clinically relevant mitochondrial SNVs.
+ - `_clinical_snv.vcf.gz`: file containing clinically relevant SNVs.
+ - `_clinical_snv.vcf.gz.tbi`: index of the file containing clinically relevant SNVs.
+ - `_clinical_sv.vcf.gz`: file containing clinically relevant SVs.
+ - `_clinical_sv.vcf.gz.tbi`: index of the file containing clinically relevant SVs.
+ - `_ranked_mt.vcf.gz`: file containing mitochondrial SNV annotations with their rank scores.
+ - `_ranked_mt.vcf.gz.tbi`: index of the file containing mitochondrial SNV annotations with their rank scores.
- `_ranked_snv.vcf.gz`: file containing SNV annotations with their rank scores.
- - `_ranked_snv.vcf.gz.tbi`: file containing SNV annotations with their rank scores.
- - `_ranked_sv.ann_filter.vcf.gz`: file containing SV annotations with their rank scores.
- - `_ranked_sv.ann_filter.vcf.gz.tbi`: file containing SV annotations with their rank scores.
+ - `_ranked_snv.vcf.gz.tbi`: index of the file containing SNV annotations with their rank scores.
+ - `_ranked_sv.vcf.gz`: file containing SV annotations with their rank scores.
+ - `_ranked_sv.vcf.gz.tbi`: index of the file containing SV annotations with their rank scores.
diff --git a/nextflow.config b/nextflow.config
index 294fdbe8..e4554ce7 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -281,32 +281,32 @@ manifest {
includeConfig 'conf/modules/raredisease.config'
includeConfig 'conf/modules/align.config'
-includeConfig 'conf/modules/align_MT.config'
-includeConfig 'conf/modules/call_snv_MT.config'
-includeConfig 'conf/modules/call_sv_MT.config'
-includeConfig 'conf/modules/annotate_mt_snvs.config'
-includeConfig 'conf/modules/call_snv.config'
-includeConfig 'conf/modules/postprocess_MT_calls.config'
-includeConfig 'conf/modules/call_structural_variants.config'
+includeConfig 'conf/modules/annotate_consequence_pli.config'
includeConfig 'conf/modules/annotate_genome_snvs.config'
+includeConfig 'conf/modules/annotate_mt_snvs.config'
includeConfig 'conf/modules/annotate_structural_variants.config'
-includeConfig 'conf/modules/align_bwamem2.config'
-includeConfig 'conf/modules/align_sentieon.config'
-includeConfig 'conf/modules/annotate_consequence_pli.config'
includeConfig 'conf/modules/call_repeat_expansions.config'
-includeConfig 'conf/modules/call_snv_deepvariant.config'
-includeConfig 'conf/modules/call_snv_sentieon.config'
-includeConfig 'conf/modules/call_sv_manta.config'
-includeConfig 'conf/modules/call_sv_tiddit.config'
+includeConfig 'conf/modules/call_snv.config'
+includeConfig 'conf/modules/call_structural_variants.config'
includeConfig 'conf/modules/convert_mt_bam_to_fastq.config'
includeConfig 'conf/modules/gens.config'
+includeConfig 'conf/modules/peddy_check.config'
includeConfig 'conf/modules/prepare_references.config'
includeConfig 'conf/modules/qc_bam.config'
includeConfig 'conf/modules/rank_variants.config'
includeConfig 'conf/modules/scatter_genome.config'
+includeConfig 'conf/modules/align_MT.config'
+includeConfig 'conf/modules/align_bwamem2.config'
+includeConfig 'conf/modules/align_sentieon.config'
includeConfig 'conf/modules/annotate_cadd.config'
-includeConfig 'conf/modules/peddy_check.config'
+includeConfig 'conf/modules/call_snv_MT.config'
+includeConfig 'conf/modules/call_snv_deepvariant.config'
+includeConfig 'conf/modules/call_snv_sentieon.config'
+includeConfig 'conf/modules/call_sv_MT.config'
includeConfig 'conf/modules/call_sv_germlinecnvcaller.config'
+includeConfig 'conf/modules/call_sv_manta.config'
+includeConfig 'conf/modules/call_sv_tiddit.config'
+includeConfig 'conf/modules/postprocess_MT_calls.config'
// Function to ensure that resource requirements don't go beyond
// a maximum limit
diff --git a/subworkflows/local/analyse_MT.nf b/subworkflows/local/analyse_MT.nf
deleted file mode 100644
index af5cebff..00000000
--- a/subworkflows/local/analyse_MT.nf
+++ /dev/null
@@ -1,118 +0,0 @@
-//
-// Analyse MT
-//
-include { CONVERT_MT_BAM_TO_FASTQ } from './mitochondria/convert_mt_bam_to_fastq'
-include { ALIGN_AND_CALL_MT } from './mitochondria/align_and_call_MT'
-include { ALIGN_AND_CALL_MT as ALIGN_AND_CALL_MT_SHIFT } from './mitochondria/align_and_call_MT'
-include { PICARD_LIFTOVERVCF } from '../../modules/nf-core/picard/liftovervcf/main'
-include { MERGE_ANNOTATE_MT } from './mitochondria/merge_annotate_MT'
-
-workflow ANALYSE_MT {
- take:
- ch_bam_bai // channel: [mandatory] [ val(meta), file(bam), file(bai) ]
- ch_cadd_header // channel: [mandatory] [ path(txt) ]
- ch_cadd_resources // channel: [mandatory] [ path(annotation) ]
- ch_genome_bwa_index // channel: [mandatory] [ val(meta), path(index) ]
- ch_genome_bwamem2_index // channel: [mandatory] [ val(meta), path(index) ]
- ch_genome_fasta // channel: [mandatory] [ val(meta), path(fasta) ]
- ch_genome_fai // channel: [mandatory] [ val(meta), path(fai) ]
- ch_genome_dict // channel: [mandatory] [ val(meta), path(dict) ]
- ch_mt_intervals // channel: [mandatory] [ path(interval_list) ]
- ch_mtshift_bwaindex // channel: [mandatory] [ val(meta), path(index) ]
- ch_mtshift_bwamem2index // channel: [mandatory] [ val(meta), path(index) ]
- ch_mtshift_fasta // channel: [mandatory] [ val(meta), path(fasta) ]
- ch_mtshift_dict // channel: [mandatory] [ val(meta), path(dict) ]
- ch_mtshift_fai // channel: [mandatory] [ val(meta), path(fai) ]
- ch_mtshift_intervals // channel: [mandatory] [ path(interval_list) ]
- ch_mtshift_backchain // channel: [mandatory] [ val(meta), path(back_chain) ]
- ch_vcfanno_resources // channel: [mandatory] [ path(resources) ]
- ch_vcfanno_toml // channel: [mandatory] [ path(toml) ]
- val_vep_genome // string: [mandatory] GRCh37 or GRCh38
- val_vep_cache_version // string: [mandatory] 107
- ch_vep_cache // channel: [mandatory] [ path(cache) ]
- ch_case_info // channel: [mandatory] [ val(case_info) ]
-
- main:
- ch_versions = Channel.empty()
-
- // PREPARING READS FOR MT ALIGNMENT
- CONVERT_MT_BAM_TO_FASTQ (
- ch_bam_bai,
- ch_genome_fasta,
- ch_genome_fai,
- ch_genome_dict
- )
-
- // MT ALIGNMENT AND VARIANT CALLING
- ALIGN_AND_CALL_MT (
- CONVERT_MT_BAM_TO_FASTQ.out.fastq,
- CONVERT_MT_BAM_TO_FASTQ.out.bam,
- ch_genome_bwa_index,
- ch_genome_bwamem2_index,
- ch_genome_fasta,
- ch_genome_dict,
- ch_genome_fai,
- ch_mt_intervals
- )
-
- ALIGN_AND_CALL_MT_SHIFT (
- CONVERT_MT_BAM_TO_FASTQ.out.fastq,
- CONVERT_MT_BAM_TO_FASTQ.out.bam,
- ch_mtshift_bwaindex,
- ch_mtshift_bwamem2index,
- ch_mtshift_fasta,
- ch_mtshift_dict,
- ch_mtshift_fai,
- ch_mtshift_intervals
- )
-
- // LIFTOVER VCF FROM REFERENCE MT TO SHIFTED MT
- PICARD_LIFTOVERVCF (
- ALIGN_AND_CALL_MT_SHIFT.out.vcf,
- ch_genome_dict,
- ch_genome_fasta,
- ch_mtshift_backchain,
- )
-
- // MT MERGE AND ANNOTATE VARIANTS
- MERGE_ANNOTATE_MT(
- ALIGN_AND_CALL_MT.out.vcf,
- PICARD_LIFTOVERVCF.out.vcf_lifted,
- ch_cadd_header,
- ch_cadd_resources,
- ch_genome_fasta,
- ch_genome_dict,
- ch_genome_fai,
- ch_vcfanno_resources,
- ch_vcfanno_toml,
- val_vep_genome,
- val_vep_cache_version,
- ch_vep_cache,
- ch_case_info
- )
-
- ch_versions = ch_versions.mix(CONVERT_MT_BAM_TO_FASTQ.out.versions)
- ch_versions = ch_versions.mix(ALIGN_AND_CALL_MT.out.versions)
- ch_versions = ch_versions.mix(ALIGN_AND_CALL_MT_SHIFT.out.versions)
- ch_versions = ch_versions.mix(PICARD_LIFTOVERVCF.out.versions.first())
- ch_versions = ch_versions.mix(MERGE_ANNOTATE_MT.out.versions)
-
- emit:
- vcf = MERGE_ANNOTATE_MT.out.vcf // channel: [ val(meta), path(vcf) ]
- tbi = MERGE_ANNOTATE_MT.out.tbi // channel: [ val(meta), path(tbi) ]
- stats = ALIGN_AND_CALL_MT.out.stats // channel: [ val(meta), path(stats) ]
- filt_stats = ALIGN_AND_CALL_MT.out.filt_stats // channel: [ val(meta), path(tsv) ]
- mt_del_result = ALIGN_AND_CALL_MT.out.mt_del_result // channel: [ val(meta), path(txt) ]
- stats_sh = ALIGN_AND_CALL_MT_SHIFT.out.stats // channel: [ val(meta), path(stats) ]
- filt_stats_sh = ALIGN_AND_CALL_MT_SHIFT.out.filt_stats // channel: [ val(meta), path(tsv) ]
- eklipse_del = ALIGN_AND_CALL_MT.out.eklipse_del // channel: [ val(meta), path(csv) ]
- eklipse_genes = ALIGN_AND_CALL_MT.out.eklipse_genes // channel: [ val(meta), path(csv) ]
- eklipse_circos = ALIGN_AND_CALL_MT.out.eklipse_circos // channel: [ val(meta), path(png) ]
- haplog = MERGE_ANNOTATE_MT.out.haplog // channel: [ val(meta), path(txt) ]
- report = MERGE_ANNOTATE_MT.out.report // channel: [ path(html) ]
- txt = ALIGN_AND_CALL_MT.out.txt // channel: [ val(meta), path(txt) ]
- html = ALIGN_AND_CALL_MT.out.html // channel: [ val(meta), path(html) ]
- txt_sh = ALIGN_AND_CALL_MT_SHIFT.out.txt // channel: [ val(meta), path(txt) ]
- html_sh = ALIGN_AND_CALL_MT_SHIFT.out.html // channel: [ val(meta), path(html) ]
- versions = ch_versions // channel: [ path(versions.yml) ]
-}
diff --git a/workflows/raredisease.nf b/workflows/raredisease.nf
index cdd26dba..b1b6afb7 100644
--- a/workflows/raredisease.nf
+++ b/workflows/raredisease.nf
@@ -112,7 +112,6 @@ include { TABIX_BGZIPTABIX as BGZIPTABIX_SV } from '../modules/nf-core/tabix
//
include { ALIGN } from '../subworkflows/local/align'
-include { ANALYSE_MT } from '../subworkflows/local/analyse_MT'
include { ANNOTATE_CSQ_PLI as ANN_CSQ_PLI_MT } from '../subworkflows/local/annotate_consequence_pli'
include { ANNOTATE_CSQ_PLI as ANN_CSQ_PLI_SNV } from '../subworkflows/local/annotate_consequence_pli'
include { ANNOTATE_CSQ_PLI as ANN_CSQ_PLI_SV } from '../subworkflows/local/annotate_consequence_pli'
From d71250c0352a29c8a32fd14bc1420fedc417341a Mon Sep 17 00:00:00 2001
From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com>
Date: Sun, 27 Aug 2023 01:02:57 +0200
Subject: [PATCH 13/22] warn
---
conf/modules/raredisease.config | 26 --------------------------
workflows/raredisease.nf | 1 -
2 files changed, 27 deletions(-)
diff --git a/conf/modules/raredisease.config b/conf/modules/raredisease.config
index 4531294c..5fef3537 100644
--- a/conf/modules/raredisease.config
+++ b/conf/modules/raredisease.config
@@ -41,16 +41,6 @@ process {
}
-//
-// Merge mitochondrial and genomic vcfs
-//
-
-process {
- withName: '.*RAREDISEASE:GATK4_MERGEVCFS' {
- ext.prefix = { "${meta.id}_mito_genome_merged" }
- }
-}
-
//
// Smncopynumbercaller options
@@ -75,14 +65,6 @@ process {
//
process {
- withName: '.*RANK_VARIANTS_SNV' {
- publishDir = [
- enabled: params.skip_vep_filter,
- path: { "${params.outdir}/rank_and_filter" },
- mode: params.publish_dir_mode,
- saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
- ]
- }
withName: '.*FILTERVEP_SNV' {
ext.when = !params.skip_vep_filter
@@ -102,14 +84,6 @@ process {
}
process {
- withName: '.*RANK_VARIANTS_SV' {
- publishDir = [
- enabled: params.skip_vep_filter,
- path: { "${params.outdir}/rank_and_filter" },
- mode: params.publish_dir_mode,
- saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
- ]
- }
withName: '.*FILTERVEP_SV' {
ext.when = !params.skip_vep_filter
diff --git a/workflows/raredisease.nf b/workflows/raredisease.nf
index b1b6afb7..fce42b35 100644
--- a/workflows/raredisease.nf
+++ b/workflows/raredisease.nf
@@ -95,7 +95,6 @@ ch_multiqc_custom_methods_description = params.multiqc_methods_description ? fil
// MODULE: Installed directly from nf-core/modules
//
-include { GATK4_MERGEVCFS } from '../modules/nf-core/gatk4/mergevcfs/main'
include { CUSTOM_DUMPSOFTWAREVERSIONS } from '../modules/nf-core/custom/dumpsoftwareversions/main'
include { FASTQC } from '../modules/nf-core/fastqc/main'
include { MULTIQC } from '../modules/nf-core/multiqc/main'
From 12bb6ae725e46b0583db295b595c69fe12c00793 Mon Sep 17 00:00:00 2001
From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com>
Date: Sun, 27 Aug 2023 01:21:28 +0200
Subject: [PATCH 14/22] update output
---
conf/modules/annotate_consequence_pli.config | 4 ++--
docs/output.md | 25 +++++++++++++++-----
2 files changed, 21 insertions(+), 8 deletions(-)
diff --git a/conf/modules/annotate_consequence_pli.config b/conf/modules/annotate_consequence_pli.config
index eed623a6..68b11951 100644
--- a/conf/modules/annotate_consequence_pli.config
+++ b/conf/modules/annotate_consequence_pli.config
@@ -58,7 +58,7 @@ process {
withName: '.*ANN_CSQ_PLI_SNV:TABIX_BGZIPTABIX' {
ext.prefix = { "${meta.id}_vep_csq_pli" }
publishDir = [
- path: { "${params.outdir}/annotate_snv" },
+ path: { "${params.outdir}/annotate_snv/genome" },
mode: params.publish_dir_mode,
saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
]
@@ -84,7 +84,7 @@ process {
withName: '.*ANN_CSQ_PLI_MT:TABIX_BGZIPTABIX' {
ext.prefix = { "${meta.id}_mt_vep_csq_pli" }
publishDir = [
- path: { "${params.outdir}/annotate_snv" },
+ path: { "${params.outdir}/annotate_snv/genome" },
mode: params.publish_dir_mode,
saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
]
diff --git a/docs/output.md b/docs/output.md
index 2a7949dc..c977f0db 100644
--- a/docs/output.md
+++ b/docs/output.md
@@ -51,6 +51,7 @@ The pipeline is built using [Nextflow](https://www.nextflow.io/) and processes d
- [Mitochondrial analysis](#mitochondrial-analysis)
- [Alignment and variant calling](#alignment-and-variant-calling)
- [MT deletion script](#mt-deletion-script)
+ - [eKLIPse](#eklipse)
- [Annotation:](#annotation-)
- [HaploGrep2](#haplogrep2)
- [vcfanno](#vcfanno-1)
@@ -384,13 +385,25 @@ Mitochondrial analysis is run by default. If you want to turn off annotations se
The pipeline for mitochondrial variant discovery, using Mutect2, uses a high sensitivity to low AF and separate alignments using opposite genome breakpoints to allow for the tracing of lineages of rare mitochondrial variants.
+- `call_snv/mitochondria`
+ - `_mitochondria.vcf.gz`: normalized vcf file containing MT variants.
+ - `_mitochondria.vcf.gz.tbi`: index of the vcf file containing MT variants.
+
##### MT deletion script
[MT deletion script](https://github.com/dnil/mitosign/blob/master/run_mt_del_check.sh) lists the fraction of mitochondrially aligning read pairs (per 1000) that appear discordant, as defined by an insert size of more than 1.2 kb (and less than 15 kb due to the circular nature of the genome) using samtools.
- `call_sv/mitochondria`
- - `_svdbquery_vep.vcf.gz`: file containing svdb query, and vep annotations.
- - `_svdbquery_vep.vcf.gz.tbi`: index of the file containing bcftools roh, vcfanno, and vep annotations.
+ - `.txt`: file containing deletions.
+
+##### eKLIPse
+
+[eKLIPse](https://github.com/dooguypapua/eKLIPse) allows the detection and quantification of large mtDNA rearrangements.
+
+- `call_sv/mitochondria`
+ - `eKLIPse_deletions.csv`: file containing all predicted deletions.
+ - `eKLIPse_genes.csv`: file summarizing cumulated deletions per mtDNA gene.
+ - `eKLIPse_.png`: circos plot.
#### Annotation:
@@ -445,15 +458,15 @@ We recommend using vcfanno to annotate SNVs with precomputed CADD scores (files
- `rank_and_filter/`
- `_clinical_mt.vcf.gz`: file containing clinically relevant mitochondrial SNVs.
- `_clinical_mt.vcf.gz.tbi`: index of the file containing clinically relevant mitochondrial SNVs.
- - `_clinical_snv.vcf.gz`: file containing clinically relevant SNVs.
+ - `_clinical_snv.vcf.gz`: file containing clinically relevant SNVs (does not include mitochondrial variants).
- `_clinical_snv.vcf.gz.tbi`: index of the file containing clinically relevant SNVs.
- - `_clinical_sv.vcf.gz`: file containing clinically relevant SVs.
+ - `_clinical_sv.vcf.gz`: file containing clinically relevant SVs (includes mitochondrial variants).
- `_clinical_sv.vcf.gz.tbi`: index of the file containing clinically relevant SVs.
- `_ranked_mt.vcf.gz`: file containing mitochondrial SNV annotations with their rank scores.
- `_ranked_mt.vcf.gz.tbi`: index of the file containing mitochondrial SNV annotations with their rank scores.
- - `_ranked_snv.vcf.gz`: file containing SNV annotations with their rank scores.
+ - `_ranked_snv.vcf.gz`: file containing SNV annotations with their rank scores (does not include mitochondrial variants).
- `_ranked_snv.vcf.gz.tbi`: index of the file containing SNV annotations with their rank scores.
- - `_ranked_sv.vcf.gz`: file containing SV annotations with their rank scores.
+ - `_ranked_sv.vcf.gz`: file containing SV annotations with their rank scores (includes mitochondrial variants).
- `_ranked_sv.vcf.gz.tbi`: index of the file containing SV annotations with their rank scores.
From d24123734ebe929e5347a178dbbc9e5980a16fba Mon Sep 17 00:00:00 2001
From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com>
Date: Sun, 27 Aug 2023 02:00:01 +0200
Subject: [PATCH 15/22] update usage
---
docs/usage.md | 42 ++++++++++++++++++++++--------------------
1 file changed, 22 insertions(+), 20 deletions(-)
diff --git a/docs/usage.md b/docs/usage.md
index 24da77cd..7d1e8f12 100644
--- a/docs/usage.md
+++ b/docs/usage.md
@@ -20,7 +20,7 @@ Table of contents:
- [6. Copy number variant calling](#6-copy-number-variant-calling)
- [7. SNV annotation & Ranking](#7-snv-annotation--ranking)
- [8. SV annotation & Ranking](#8-sv-annotation--ranking)
- - [9. Mitochondrial analysis](#9-mitochondrial-analysis)
+ - [9. Mitochondrial annotation](#9-mitochondrial-annotation)
- [Run the pipeline](#run-the-pipeline)
- [Direct input in CLI](#direct-input-in-cli)
- [Import from a config file (recommended)](#import-from-a-config-file-recommended)
@@ -133,7 +133,7 @@ nf-core/raredisease consists of several tools used for various purposes. For con
5. Variant calling - Structural variants (SV) (Tiddit & Manta)
6. SNV annotation & ranking (rohcall, vcfanno, ensembl VEP, GENMOD)
7. SV annotation & ranking (SVDB query, ensembl VEP, GENMOD)
-8. Mitochondrial analysis
+8. Mitochondrial annotation
> We have only listed the groups that require at least one input from the user. For example, the pipeline also runs SMNCopyNumberCaller, but it does not require any input other than the bam files passed by the pipeline. Hence, it is not mentioned in the list above. To know more about the tools used in the pipeline check the [README](../README.md).
@@ -143,16 +143,17 @@ The mandatory and optional parameters for each category are tabulated below.
##### 1. Alignment
-| Mandatory | Optional |
-| ------------------- | --------------------------- |
-| aligner1 | fasta_fai2 |
-| fasta | bwamem22 |
-| platform | known_dbsnp3 |
-| | known_dbsnp_tbi3 |
+| Mandatory | Optional |
+| ------------------------------ | --------------------------- |
+| aligner1 | fasta_fai3 |
+| fasta | bwamem23 |
+| platform | known_dbsnp4 |
+| mito_name/mt_fasta2 | known_dbsnp_tbi4 |
1Default value is bwamem2, but if you have a valid license for Sentieon, you have the option to use Sentieon as well.
-2fasta_fai and bwamem2, if not provided by the user, will be generated by the pipeline when necessary.
-3Used only by Sentieon.
+2f If mito_name is provided, mt_fasta can be generated by the pipeline.
+3fasta_fai and bwamem2, if not provided by the user, will be generated by the pipeline when necessary.
+4Used only by Sentieon.
##### 2. QC stats from the alignment files
@@ -241,16 +242,17 @@ no header and the following columns: `CHROM POS REF_ALLELE ALT_ALLELE AF`. Sampl
1 A CSV file that describes the databases (VCFs) used by SVDB for annotating structural variants. Sample file [here](https://github.com/nf-core/test-datasets/blob/raredisease/reference/svdb_querydb_files.csv). Information about the column headers can be found [here](https://github.com/J35P312/SVDB#Query).
-##### 9. Mitochondrial analysis
-
-| Mandatory | Optional |
-| ----------------- | -------- |
-| genome | |
-| mito_name | |
-| vcfanno_resources | |
-| vcfanno_toml | |
-| vep_cache_version | |
-| vep_cache | |
+##### 9. Mitochondrial annotation
+
+| Mandatory | Optional |
+| ----------------- | ----------- |
+| genome | vep_filters |
+| mito_name | |
+| vcfanno_resources | |
+| vcfanno_toml | |
+| vep_cache_version | |
+| vep_cache | |
+| score_config_mt | |
#### Run the pipeline
From c1640fc151dbde9bbd7f1785899c9c3e1fa816dc Mon Sep 17 00:00:00 2001
From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com>
Date: Tue, 5 Sep 2023 09:01:47 +0200
Subject: [PATCH 16/22] update headers
---
subworkflows/local/align.nf | 4 +++-
subworkflows/local/alignment/align_MT.nf | 1 +
subworkflows/local/annotate_mt_snvs.nf | 15 ++++-----------
subworkflows/local/call_structural_variants.nf | 1 +
subworkflows/local/variant_calling/call_snv_MT.nf | 2 --
5 files changed, 9 insertions(+), 14 deletions(-)
diff --git a/subworkflows/local/align.nf b/subworkflows/local/align.nf
index a705f16e..e7314f2a 100644
--- a/subworkflows/local/align.nf
+++ b/subworkflows/local/align.nf
@@ -88,7 +88,9 @@ workflow ALIGN {
ch_versions = Channel.empty().mix(ALIGN_BWAMEM2.out.versions,
ALIGN_SENTIEON.out.versions,
ALIGN_MT.out.versions,
- ALIGN_MT_SHIFT.out.versions)
+ ALIGN_MT_SHIFT.out.versions,
+ CONVERT_MT_BAM_TO_FASTQ.out.versions,
+ SAMTOOLS_VIEW.out.versions)
emit:
genome_marked_bam = ch_genome_marked_bam // channel: [ val(meta), path(bam) ]
diff --git a/subworkflows/local/alignment/align_MT.nf b/subworkflows/local/alignment/align_MT.nf
index 17ecc362..dec73e67 100644
--- a/subworkflows/local/alignment/align_MT.nf
+++ b/subworkflows/local/alignment/align_MT.nf
@@ -43,6 +43,7 @@ workflow ALIGN_MT {
SAMTOOLS_INDEX_MT(SAMTOOLS_SORT_MT.out.bam)
ch_versions = ch_versions.mix(BWAMEM2_MEM_MT.out.versions.first())
+ ch_versions = ch_versions.mix(SENTIEON_BWAMEM_MT.out.versions.first())
ch_versions = ch_versions.mix(GATK4_MERGEBAMALIGNMENT_MT.out.versions.first())
ch_versions = ch_versions.mix(PICARD_ADDORREPLACEREADGROUPS_MT.out.versions.first())
ch_versions = ch_versions.mix(PICARD_MARKDUPLICATES_MT.out.versions.first())
diff --git a/subworkflows/local/annotate_mt_snvs.nf b/subworkflows/local/annotate_mt_snvs.nf
index d6ce5659..5a6b65b5 100644
--- a/subworkflows/local/annotate_mt_snvs.nf
+++ b/subworkflows/local/annotate_mt_snvs.nf
@@ -1,16 +1,8 @@
//
-// Merge and annotate MT
+// Annotate MT
//
-include { GATK4_MERGEVCFS as GATK4_MERGEVCFS_LIFT_UNLIFT_MT } from '../../modules/nf-core/gatk4/mergevcfs/main'
-include { GATK4_VARIANTFILTRATION as GATK4_VARIANTFILTRATION_MT } from '../../modules/nf-core/gatk4/variantfiltration/main'
-include { BCFTOOLS_NORM as SPLIT_MULTIALLELICS_MT } from '../../modules/nf-core/bcftools/norm/main'
include { TABIX_TABIX as TABIX_TABIX_MT } from '../../modules/nf-core/tabix/tabix/main'
-include { BCFTOOLS_NORM as REMOVE_DUPLICATES_MT } from '../../modules/nf-core/bcftools/norm/main'
-include { TABIX_TABIX as TABIX_TABIX_MT2 } from '../../modules/nf-core/tabix/tabix/main'
-include { BCFTOOLS_MERGE as BCFTOOLS_MERGE_MT } from '../../modules/nf-core/bcftools/merge/main'
-include { TABIX_TABIX as TABIX_TABIX_MERGE } from '../../modules/nf-core/tabix/tabix/main'
-include { TABIX_TABIX as TABIX_TABIX_MT3 } from '../../modules/nf-core/tabix/tabix/main'
include { ENSEMBLVEP as ENSEMBLVEP_MT } from '../../modules/local/ensemblvep/main'
include { HAPLOGREP2_CLASSIFY as HAPLOGREP2_CLASSIFY_MT } from '../../modules/nf-core/haplogrep2/classify/main'
include { VCFANNO as VCFANNO_MT } from '../../modules/nf-core/vcfanno/main'
@@ -66,9 +58,9 @@ workflow ANNOTATE_MT_SNVS {
)
// Running vcfanno
- TABIX_TABIX_MT3(ENSEMBLVEP_MT.out.vcf_gz)
+ TABIX_TABIX_MT(ENSEMBLVEP_MT.out.vcf_gz)
ENSEMBLVEP_MT.out.vcf_gz
- .join(TABIX_TABIX_MT3.out.tbi, failOnMismatch:true, failOnDuplicate:true)
+ .join(TABIX_TABIX_MT.out.tbi, failOnMismatch:true, failOnDuplicate:true)
.map { meta, vcf, tbi -> return [meta, vcf, tbi, []]}
.set { ch_in_vcfanno }
@@ -91,6 +83,7 @@ workflow ANNOTATE_MT_SNVS {
ch_versions = ch_versions.mix(ANNOTATE_CADD.out.versions)
ch_versions = ch_versions.mix(ENSEMBLVEP_MT.out.versions)
+ ch_versions = ch_versions.mix(TABIX_TABIX_MT.out.versions)
ch_versions = ch_versions.mix(VCFANNO_MT.out.versions)
ch_versions = ch_versions.mix(HMTNOTE_ANNOTATE.out.versions)
ch_versions = ch_versions.mix(HAPLOGREP2_CLASSIFY_MT.out.versions)
diff --git a/subworkflows/local/call_structural_variants.nf b/subworkflows/local/call_structural_variants.nf
index d614ef7c..d0846e6e 100644
--- a/subworkflows/local/call_structural_variants.nf
+++ b/subworkflows/local/call_structural_variants.nf
@@ -75,6 +75,7 @@ workflow CALL_STRUCTURAL_VARIANTS {
TABIX_TABIX (SVDB_MERGE.out.vcf)
ch_versions = ch_versions.mix(CALL_SV_MANTA.out.versions)
+ ch_versions = ch_versions.mix(CALL_SV_MT.out.versions)
ch_versions = ch_versions.mix(CALL_SV_TIDDIT.out.versions)
ch_versions = ch_versions.mix(CALL_SV_GERMLINECNVCALLER.out.versions)
ch_versions = ch_versions.mix(TABIX_TABIX.out.versions)
diff --git a/subworkflows/local/variant_calling/call_snv_MT.nf b/subworkflows/local/variant_calling/call_snv_MT.nf
index e91718ad..d55d1965 100644
--- a/subworkflows/local/variant_calling/call_snv_MT.nf
+++ b/subworkflows/local/variant_calling/call_snv_MT.nf
@@ -5,8 +5,6 @@
include { HAPLOCHECK as HAPLOCHECK_MT } from '../../../modules/nf-core/haplocheck/main'
include { GATK4_MUTECT2 as GATK4_MUTECT2_MT } from '../../../modules/nf-core/gatk4/mutect2/main'
include { GATK4_FILTERMUTECTCALLS as GATK4_FILTERMUTECTCALLS_MT } from '../../../modules/nf-core/gatk4/filtermutectcalls/main'
-include { MT_DELETION } from '../../../modules/local/mt_deletion_script'
-include { EKLIPSE as EKLIPSE_MT } from '../../../modules/nf-core/eklipse/main'
workflow CALL_SNV_MT {
take:
From fd9c26fadfc9075b25c5ef73b62c7c0188c13153 Mon Sep 17 00:00:00 2001
From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com>
Date: Tue, 5 Sep 2023 09:10:23 +0200
Subject: [PATCH 17/22] update changelog
---
CHANGELOG.md | 3 +++
1 file changed, 3 insertions(+)
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 01f4cca1..6ef40309 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -22,6 +22,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- Installed the nf-core version of the sentieon/dnamodelapply module [#403](https://github.com/nf-core/raredisease/pull/403)
- Installed the nf-core version of the sentieon/wgsmetricsalgo module [#404](https://github.com/nf-core/raredisease/pull/404)
- Installed the nf-core version of the sentieon/dnascope module [#406](https://github.com/nf-core/raredisease/pull/406)
+- Breaks down mitochondrial analysis workflow into smaller subworkflows that are more modular [#419](https://github.com/nf-core/raredisease/pull/419)
+- Replaced the parameter skip_mt_analysis which was used to turn on/off the mitochondrial workflow [#419](https://github.com/nf-core/raredisease/pull/419)
+- Adds a new parameter skip_mt_annotation which can be used to turn on/off annotation and ranking for mitochondrial SNVs [#419](https://github.com/nf-core/raredisease/pull/419)
### `Fixed`
From 8ca4f8d83e3ccdcec392eebd57b45af5e8faaa42 Mon Sep 17 00:00:00 2001
From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com>
Date: Wed, 13 Sep 2023 13:21:03 +0200
Subject: [PATCH 18/22] Lucia's comment
---
docs/usage.md | 7 ++++---
1 file changed, 4 insertions(+), 3 deletions(-)
diff --git a/docs/usage.md b/docs/usage.md
index 7d1e8f12..6ce21435 100644
--- a/docs/usage.md
+++ b/docs/usage.md
@@ -131,9 +131,10 @@ nf-core/raredisease consists of several tools used for various purposes. For con
3. Repeat expansions (ExpansionsHunter & Stranger)
4. Variant calling - SNV (DeepVariant/Sentieon DNAscope)
5. Variant calling - Structural variants (SV) (Tiddit & Manta)
-6. SNV annotation & ranking (rohcall, vcfanno, ensembl VEP, GENMOD)
-7. SV annotation & ranking (SVDB query, ensembl VEP, GENMOD)
-8. Mitochondrial annotation
+6. Copy number variant calling (GATK's GermlineCNVCaller)
+7. SNV annotation & ranking (rohcall, vcfanno, ensembl VEP, GENMOD)
+8. SV annotation & ranking (SVDB query, ensembl VEP, GENMOD)
+9. Mitochondrial annotation
> We have only listed the groups that require at least one input from the user. For example, the pipeline also runs SMNCopyNumberCaller, but it does not require any input other than the bam files passed by the pipeline. Hence, it is not mentioned in the list above. To know more about the tools used in the pipeline check the [README](../README.md).
From ee105a8b60869ad588a9a212a5193b85c25c0881 Mon Sep 17 00:00:00 2001
From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com>
Date: Wed, 13 Sep 2023 13:21:22 +0200
Subject: [PATCH 19/22] Update subworkflows/local/call_snv.nf [skip ci]
Co-authored-by: Anders Jemt
---
subworkflows/local/call_snv.nf | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/subworkflows/local/call_snv.nf b/subworkflows/local/call_snv.nf
index d8cb9744..96320986 100644
--- a/subworkflows/local/call_snv.nf
+++ b/subworkflows/local/call_snv.nf
@@ -103,6 +103,6 @@ workflow CALL_SNV {
genome_tabix = ch_genome_tabix // channel: [ val(meta), path(tbi) ]
genome_vcf_tabix = ch_genome_vcf_tabix // channel: [ val(meta), path(vcf), path(tbi) ]
mt_vcf = POSTPROCESS_MT_CALLS.out.vcf // channel: [ val(meta), path(vcf) ]
- mt_tabix = POSTPROCESS_MT_CALLS.out.tbi // channel: [ val(meta), path(vcf) ]
+ mt_tabix = POSTPROCESS_MT_CALLS.out.tbi // channel: [ val(meta), path(tbi) ]
versions = ch_versions // channel: [ path(versions.yml) ]
}
From 4eeab25d49c56b29dc79ed82de7247cfc2661ad0 Mon Sep 17 00:00:00 2001
From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com>
Date: Wed, 13 Sep 2023 13:23:01 +0200
Subject: [PATCH 20/22] Update
subworkflows/local/variant_calling/postprocess_MT_calls.nf [skip ci]
Co-authored-by: Anders Jemt
---
subworkflows/local/variant_calling/postprocess_MT_calls.nf | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/subworkflows/local/variant_calling/postprocess_MT_calls.nf b/subworkflows/local/variant_calling/postprocess_MT_calls.nf
index 81738085..2d10cfaa 100644
--- a/subworkflows/local/variant_calling/postprocess_MT_calls.nf
+++ b/subworkflows/local/variant_calling/postprocess_MT_calls.nf
@@ -25,7 +25,7 @@ workflow POSTPROCESS_MT_CALLS {
main:
ch_versions = Channel.empty()
- // LIFTOVER VCF FROM REFERENCE MT TO SHIFTED MT
+ // LIFTOVER SHIFTED VCF TO REFERENCE MT POSITIONS
PICARD_LIFTOVERVCF (
ch_mtshift_vcf,
ch_genome_dictionary,
From 9c1f1c0e0e4aea525e73e7e47fe4bfc7449ac1df Mon Sep 17 00:00:00 2001
From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com>
Date: Wed, 13 Sep 2023 14:48:52 +0200
Subject: [PATCH 21/22] comments
---
assets/schema_input.json | 2 +-
subworkflows/local/align.nf | 2 +-
subworkflows/local/annotate_mt_snvs.nf | 10 +-
.../local/call_structural_variants.nf | 2 -
subworkflows/local/convert_mt_bam_to_fastq.nf | 36 ----
.../local/mitochondria/align_and_call_MT.nf | 96 ----------
.../local/mitochondria/merge_annotate_MT.nf | 176 ------------------
.../variant_calling/postprocess_MT_calls.nf | 4 +-
8 files changed, 9 insertions(+), 319 deletions(-)
delete mode 100644 subworkflows/local/convert_mt_bam_to_fastq.nf
delete mode 100644 subworkflows/local/mitochondria/align_and_call_MT.nf
delete mode 100644 subworkflows/local/mitochondria/merge_annotate_MT.nf
diff --git a/assets/schema_input.json b/assets/schema_input.json
index 555f2c0a..191f1394 100644
--- a/assets/schema_input.json
+++ b/assets/schema_input.json
@@ -10,7 +10,7 @@
"sample": {
"type": "string",
"exists": true,
- "meta": ["id"],
+ "meta": ["id", "sample"],
"pattern": "^\\S+$",
"errorMessage": "Sample name must be provided and cannot contain spaces"
},
diff --git a/subworkflows/local/align.nf b/subworkflows/local/align.nf
index e7314f2a..17a6252b 100644
--- a/subworkflows/local/align.nf
+++ b/subworkflows/local/align.nf
@@ -7,7 +7,7 @@ include { ALIGN_SENTIEON } from './alignment/align_sentieon'
include { SAMTOOLS_VIEW } from '../../modules/nf-core/samtools/view/main'
include { ALIGN_MT } from './alignment/align_MT'
include { ALIGN_MT as ALIGN_MT_SHIFT } from './alignment/align_MT'
-include { CONVERT_MT_BAM_TO_FASTQ } from './convert_mt_bam_to_fastq'
+include { CONVERT_MT_BAM_TO_FASTQ } from './mitochondria/convert_mt_bam_to_fastq'
workflow ALIGN {
take:
diff --git a/subworkflows/local/annotate_mt_snvs.nf b/subworkflows/local/annotate_mt_snvs.nf
index 5a6b65b5..8f449e7b 100644
--- a/subworkflows/local/annotate_mt_snvs.nf
+++ b/subworkflows/local/annotate_mt_snvs.nf
@@ -36,12 +36,12 @@ workflow ANNOTATE_MT_SNVS {
// Pick input for vep
ch_mt_vcf
- .combine(ANNOTATE_CADD.out.vcf.ifEmpty("null"))
- .branch { it ->
- merged: it[2].equals("null")
+ .join(ANNOTATE_CADD.out.vcf, remainder: true) // If CADD is not run then the third element in this channel will be `null`
+ .branch { it -> // If CADD is run, then "it" will be [[meta],selvar.vcf,cadd.vcf], else [[meta],selvar.vcf,null]
+ merged: it[2].equals(null)
return [it[0], it[1]]
- cadd: !(it[2].equals("null"))
- return [it[2], it[3]]
+ cadd: !(it[2].equals(null))
+ return [it[0], it[2]]
}
.set { ch_for_mix }
ch_vep_in = ch_for_mix.merged.mix(ch_for_mix.cadd)
diff --git a/subworkflows/local/call_structural_variants.nf b/subworkflows/local/call_structural_variants.nf
index d0846e6e..28f4e8db 100644
--- a/subworkflows/local/call_structural_variants.nf
+++ b/subworkflows/local/call_structural_variants.nf
@@ -50,8 +50,6 @@ workflow CALL_STRUCTURAL_VARIANTS {
CALL_SV_MT (ch_mt_bam_bai, ch_genome_fasta)
-// CALL_SV_MT_SHIFT (ch_mtshift_bam_bai, ch_mtshift_fasta)
-
//merge
if (params.skip_cnv_calling) {
tiddit_vcf
diff --git a/subworkflows/local/convert_mt_bam_to_fastq.nf b/subworkflows/local/convert_mt_bam_to_fastq.nf
deleted file mode 100644
index ca19ea82..00000000
--- a/subworkflows/local/convert_mt_bam_to_fastq.nf
+++ /dev/null
@@ -1,36 +0,0 @@
-//
-// Prepare bam files for MT allignment
-//
-
-include { GATK4_PRINTREADS as GATK4_PRINTREADS_MT } from '../../modules/nf-core/gatk4/printreads/main'
-include { GATK4_REVERTSAM as GATK4_REVERTSAM_MT } from '../../modules/nf-core/gatk4/revertsam/main'
-include { GATK4_SAMTOFASTQ as GATK4_SAMTOFASTQ_MT } from '../../modules/nf-core/gatk4/samtofastq/main'
-
-workflow CONVERT_MT_BAM_TO_FASTQ {
- take:
- ch_bam_bai // channel: [mandatory] [ val(meta), path(bam), path(bai) ]
- ch_genome_fasta // channel: [mandatory] [ val(meta), path(fasta) ]
- ch_genome_fai // channel: [mandatory] [ val(meta), path(fai) ]
- ch_genome_dict // channel: [mandatory] [ val(meta), path(dict) ]
-
- main:
- ch_versions = Channel.empty()
-
- // Outputs bam containing only MT
- GATK4_PRINTREADS_MT ( ch_bam_bai, ch_genome_fasta, ch_genome_fai, ch_genome_dict )
-
- // Removes alignment information
- GATK4_REVERTSAM_MT ( GATK4_PRINTREADS_MT.out.bam )
-
- // Outputs fastq files
- GATK4_SAMTOFASTQ_MT ( GATK4_REVERTSAM_MT.out.bam )
-
- ch_versions = ch_versions.mix(GATK4_PRINTREADS_MT.out.versions.first())
- ch_versions = ch_versions.mix(GATK4_REVERTSAM_MT.out.versions.first())
- ch_versions = ch_versions.mix(GATK4_SAMTOFASTQ_MT.out.versions.first())
-
- emit:
- fastq = GATK4_SAMTOFASTQ_MT.out.fastq // channel: [ val(meta), [ path(fastq) ] ]
- bam = GATK4_REVERTSAM_MT.out.bam // channel: [ val(meta), path(bam) ]
- versions = ch_versions // channel: [ path(versions.yml) ]
-}
diff --git a/subworkflows/local/mitochondria/align_and_call_MT.nf b/subworkflows/local/mitochondria/align_and_call_MT.nf
deleted file mode 100644
index d857b11a..00000000
--- a/subworkflows/local/mitochondria/align_and_call_MT.nf
+++ /dev/null
@@ -1,96 +0,0 @@
-//
-// Align and call MT
-//
-
-include { SENTIEON_BWAMEM as SENTIEON_BWAMEM_MT } from '../../../modules/nf-core/sentieon/bwamem/main'
-include { BWAMEM2_MEM as BWAMEM2_MEM_MT } from '../../../modules/nf-core/bwamem2/mem/main'
-include { GATK4_MERGEBAMALIGNMENT as GATK4_MERGEBAMALIGNMENT_MT } from '../../../modules/nf-core/gatk4/mergebamalignment/main'
-include { PICARD_ADDORREPLACEREADGROUPS as PICARD_ADDORREPLACEREADGROUPS_MT } from '../../../modules/nf-core/picard/addorreplacereadgroups/main'
-include { PICARD_MARKDUPLICATES as PICARD_MARKDUPLICATES_MT } from '../../../modules/nf-core/picard/markduplicates/main'
-include { SAMTOOLS_INDEX as SAMTOOLS_INDEX_MT } from '../../../modules/nf-core/samtools/index/main'
-include { SAMTOOLS_SORT as SAMTOOLS_SORT_MT } from '../../../modules/nf-core/samtools/sort/main'
-include { HAPLOCHECK as HAPLOCHECK_MT } from '../../../modules/nf-core/haplocheck/main'
-include { GATK4_MUTECT2 as GATK4_MUTECT2_MT } from '../../../modules/nf-core/gatk4/mutect2/main'
-include { GATK4_FILTERMUTECTCALLS as GATK4_FILTERMUTECTCALLS_MT } from '../../../modules/nf-core/gatk4/filtermutectcalls/main'
-include { TABIX_TABIX as TABIX_TABIX_MT } from '../../../modules/nf-core/tabix/tabix/main'
-include { MT_DELETION } from '../../../modules/local/mt_deletion_script'
-include { EKLIPSE as EKLIPSE_MT } from '../../../modules/nf-core/eklipse/main'
-
-workflow ALIGN_AND_CALL_MT {
- take:
- ch_fastq // channel: [mandatory] [ val(meta), [ path(reads) ] ]
- ch_ubam // channel: [mandatory] [ val(meta), path(bam) ]
- ch_bwaindex // channel: [mandatory for sentieon] [ val(meta), path(index) ]
- ch_bwamem2index // channel: [mandatory for bwamem2] [ val(meta), path(index) ]
- ch_fasta // channel: [mandatory] [ val(meta), path(fasta) ]
- ch_dict // channel: [mandatory] [ val(meta), path(dict) ]
- ch_fai // channel: [mandatory] [ val(meta), path(fai) ]
- ch_intervals // channel: [mandatory] [ path(interval_list) ]
-
- main:
- ch_versions = Channel.empty()
-
- BWAMEM2_MEM_MT (ch_fastq, ch_bwamem2index, true)
-
- SENTIEON_BWAMEM_MT ( ch_fastq, ch_bwaindex, ch_fasta, ch_fai )
-
- Channel.empty()
- .mix(BWAMEM2_MEM_MT.out.bam, SENTIEON_BWAMEM_MT.out.bam_and_bai.map{ meta, bam, bai -> [meta, bam] })
- .join(ch_ubam, failOnMismatch:true, failOnDuplicate:true)
- .set {ch_bam_ubam}
-
- GATK4_MERGEBAMALIGNMENT_MT (ch_bam_ubam, ch_fasta, ch_dict)
-
- PICARD_ADDORREPLACEREADGROUPS_MT (GATK4_MERGEBAMALIGNMENT_MT.out.bam)
-
- PICARD_MARKDUPLICATES_MT (PICARD_ADDORREPLACEREADGROUPS_MT.out.bam, ch_fasta, ch_fai)
-
- SAMTOOLS_SORT_MT (PICARD_MARKDUPLICATES_MT.out.bam)
-
- SAMTOOLS_INDEX_MT(SAMTOOLS_SORT_MT.out.bam)
- ch_sort_index_bam = SAMTOOLS_SORT_MT.out.bam.join(SAMTOOLS_INDEX_MT.out.bai, failOnMismatch:true, failOnDuplicate:true)
- ch_sort_index_bam_int_mt = ch_sort_index_bam.combine(ch_intervals)
-
- EKLIPSE_MT(ch_sort_index_bam,[])
-
- MT_DELETION(ch_sort_index_bam, ch_fasta)
-
- GATK4_MUTECT2_MT (ch_sort_index_bam_int_mt, ch_fasta, ch_fai, ch_dict, [], [], [],[])
-
- HAPLOCHECK_MT (GATK4_MUTECT2_MT.out.vcf)
-
- // Filter Mutect2 calls
- ch_mutect_vcf = GATK4_MUTECT2_MT.out.vcf.join(GATK4_MUTECT2_MT.out.tbi, failOnMismatch:true, failOnDuplicate:true)
- ch_mutect_out = ch_mutect_vcf.join(GATK4_MUTECT2_MT.out.stats, failOnMismatch:true, failOnDuplicate:true)
- ch_to_filt = ch_mutect_out.map {
- meta, vcf, tbi, stats ->
- return [meta, vcf, tbi, stats, [], [], [], []]
- }
-
- GATK4_FILTERMUTECTCALLS_MT (ch_to_filt, ch_fasta, ch_fai, ch_dict)
-
- ch_versions = ch_versions.mix(BWAMEM2_MEM_MT.out.versions.first())
- ch_versions = ch_versions.mix(GATK4_MERGEBAMALIGNMENT_MT.out.versions.first())
- ch_versions = ch_versions.mix(PICARD_ADDORREPLACEREADGROUPS_MT.out.versions.first())
- ch_versions = ch_versions.mix(PICARD_MARKDUPLICATES_MT.out.versions.first())
- ch_versions = ch_versions.mix(SAMTOOLS_SORT_MT.out.versions.first())
- ch_versions = ch_versions.mix(SAMTOOLS_INDEX_MT.out.versions.first())
- ch_versions = ch_versions.mix(EKLIPSE_MT.out.versions.first())
- ch_versions = ch_versions.mix(MT_DELETION.out.versions.first())
- ch_versions = ch_versions.mix(GATK4_MUTECT2_MT.out.versions.first())
- ch_versions = ch_versions.mix(HAPLOCHECK_MT.out.versions.first())
- ch_versions = ch_versions.mix(GATK4_FILTERMUTECTCALLS_MT.out.versions.first())
-
- emit:
- vcf = GATK4_FILTERMUTECTCALLS_MT.out.vcf // channel: [ val(meta), path(vcf) ]
- tbi = GATK4_FILTERMUTECTCALLS_MT.out.tbi // channel: [ val(meta), path(tbi) ]
- stats = GATK4_MUTECT2_MT.out.stats // channel: [ val(meta), path(stats) ]
- filt_stats = GATK4_FILTERMUTECTCALLS_MT.out.stats // channel: [ val(meta), path(tsv) ]
- eklipse_del = EKLIPSE_MT.out.deletions // channel: [ val(meta), path(csv) ]
- eklipse_genes = EKLIPSE_MT.out.genes // channel: [ val(meta), path(csv) ]
- eklipse_circos = EKLIPSE_MT.out.circos // channel: [ val(meta), path(png) ]
- txt = HAPLOCHECK_MT.out.txt // channel: [ val(meta), path(txt) ]
- html = HAPLOCHECK_MT.out.html // channel: [ val(meta), path(html) ]
- mt_del_result = MT_DELETION.out.mt_del_result // channel: [ val(meta), path(txt) ]
- versions = ch_versions // channel: [ path(versions.yml) ]
-}
diff --git a/subworkflows/local/mitochondria/merge_annotate_MT.nf b/subworkflows/local/mitochondria/merge_annotate_MT.nf
deleted file mode 100644
index 43edd93b..00000000
--- a/subworkflows/local/mitochondria/merge_annotate_MT.nf
+++ /dev/null
@@ -1,176 +0,0 @@
-//
-// Merge and annotate MT
-//
-
-include { GATK4_MERGEVCFS as GATK4_MERGEVCFS_LIFT_UNLIFT_MT } from '../../../modules/nf-core/gatk4/mergevcfs/main'
-include { GATK4_VARIANTFILTRATION as GATK4_VARIANTFILTRATION_MT } from '../../../modules/nf-core/gatk4/variantfiltration/main'
-include { BCFTOOLS_NORM as SPLIT_MULTIALLELICS_MT } from '../../../modules/nf-core/bcftools/norm/main'
-include { TABIX_TABIX as TABIX_TABIX_MT } from '../../../modules/nf-core/tabix/tabix/main'
-include { BCFTOOLS_NORM as REMOVE_DUPLICATES_MT } from '../../../modules/nf-core/bcftools/norm/main'
-include { TABIX_TABIX as TABIX_TABIX_MT2 } from '../../../modules/nf-core/tabix/tabix/main'
-include { BCFTOOLS_MERGE as BCFTOOLS_MERGE_MT } from '../../../modules/nf-core/bcftools/merge/main'
-include { TABIX_TABIX as TABIX_TABIX_MERGE } from '../../../modules/nf-core/tabix/tabix/main'
-include { TABIX_TABIX as TABIX_TABIX_MT3 } from '../../../modules/nf-core/tabix/tabix/main'
-include { ENSEMBLVEP as ENSEMBLVEP_MT } from '../../../modules/local/ensemblvep/main'
-include { HAPLOGREP2_CLASSIFY as HAPLOGREP2_CLASSIFY_MT } from '../../../modules/nf-core/haplogrep2/classify/main'
-include { VCFANNO as VCFANNO_MT } from '../../../modules/nf-core/vcfanno/main'
-include { ANNOTATE_CADD } from '../annotation/annotate_cadd'
-include { TABIX_BGZIPTABIX as ZIP_TABIX_HMTNOTE } from '../../../modules/nf-core/tabix/bgziptabix/main'
-include { HMTNOTE_ANNOTATE } from '../../../modules/nf-core/hmtnote/annotate/main'
-
-workflow MERGE_ANNOTATE_MT {
- take:
- ch_vcf1 // channel: [mandatory] [ val(meta), path(vcf) ]
- ch_vcf2 // channel: [mandatory] [ val(meta), path(vcf) ]
- ch_cadd_header // channel: [mandatory] [ path(txt) ]
- ch_cadd_resources // channel: [mandatory] [ path(annotation) ]
- ch_genome_fasta // channel: [mandatory] [ val(meta), path(fasta) ]
- ch_genome_dict // channel: [mandatory] [ val(meta), path(dict) ]
- ch_genome_fai // channel: [mandatory] [ val(meta), path(fai) ]
- ch_vcfanno_resources // channel: [mandatory] [ path(resources) ]
- ch_vcfanno_toml // channel: [mandatory] [ path(toml) ]
- val_vep_genome // string: [mandatory] GRCh37 or GRCh38
- val_vep_cache_version // string: [mandatory] 107
- ch_vep_cache // channel: [mandatory] [ path(cache) ]
- ch_case_info // channel: [mandatory] [ val(case_info) ]
-
- main:
- ch_versions = Channel.empty()
-
- ch_vcfs = ch_vcf1
- .join(ch_vcf2, remainder: true)
- .map{ meta, vcf1, vcf2 ->
- [meta, [vcf1, vcf2]]
- }
- GATK4_MERGEVCFS_LIFT_UNLIFT_MT( ch_vcfs, ch_genome_dict)
-
- // Filtering Variants
- GATK4_MERGEVCFS_LIFT_UNLIFT_MT.out.vcf
- .join(GATK4_MERGEVCFS_LIFT_UNLIFT_MT.out.tbi, failOnMismatch:true, failOnDuplicate:true)
- .set { ch_filt_vcf }
- GATK4_VARIANTFILTRATION_MT (ch_filt_vcf, ch_genome_fasta, ch_genome_fai, ch_genome_dict)
-
- // Spliting multiallelic calls
- GATK4_VARIANTFILTRATION_MT.out.vcf
- .join(GATK4_VARIANTFILTRATION_MT.out.tbi, failOnMismatch:true, failOnDuplicate:true)
- .set { ch_in_split }
- SPLIT_MULTIALLELICS_MT (ch_in_split, ch_genome_fasta)
- TABIX_TABIX_MT(SPLIT_MULTIALLELICS_MT.out.vcf)
-
- // Removing duplicates and merging if there is more than one sample
- SPLIT_MULTIALLELICS_MT.out.vcf
- .join(TABIX_TABIX_MT.out.tbi, failOnMismatch:true, failOnDuplicate:true)
- .set { ch_in_remdup }
- REMOVE_DUPLICATES_MT(ch_in_remdup, ch_genome_fasta)
- TABIX_TABIX_MT2(REMOVE_DUPLICATES_MT.out.vcf)
-
- REMOVE_DUPLICATES_MT.out.vcf
- .collect{it[1]}
- .ifEmpty([])
- .toList()
- .set { file_list_vcf }
-
- TABIX_TABIX_MT2.out.tbi
- .collect{it[1]}
- .ifEmpty([])
- .toList()
- .set { file_list_tbi }
-
- ch_case_info
- .combine(file_list_vcf)
- .combine(file_list_tbi)
- .set { ch_rem_dup_vcf_tbi }
-
- ch_rem_dup_vcf_tbi.branch {
- meta, vcf, tbi ->
- single: vcf.size() == 1
- return [meta, vcf]
- multiple: vcf.size() > 1
- return [meta, vcf, tbi]
- }.set { ch_case_vcf }
-
- BCFTOOLS_MERGE_MT( ch_case_vcf.multiple,
- ch_genome_fasta,
- ch_genome_fai,
- []
- )
-
- BCFTOOLS_MERGE_MT.out.merged_variants
- .mix(ch_case_vcf.single)
- .set { ch_annotation_in }
-
- TABIX_TABIX_MERGE(ch_annotation_in)
-
- // Annotating with CADD
- ANNOTATE_CADD (
- ch_annotation_in,
- TABIX_TABIX_MERGE.out.tbi,
- ch_cadd_header,
- ch_cadd_resources
- )
-
- // Pick input for vep
- ch_annotation_in
- .combine(ANNOTATE_CADD.out.vcf.ifEmpty("null"))
- .branch { it ->
- merged: it[2].equals("null")
- return [it[0], it[1]]
- cadd: !(it[2].equals("null"))
- return [it[2], it[3]]
- }
- .set { ch_for_mix }
- ch_vep_in = ch_for_mix.merged.mix(ch_for_mix.cadd)
-
- // Annotating with ensembl Vep
- ENSEMBLVEP_MT(
- ch_vep_in,
- ch_genome_fasta,
- val_vep_genome,
- "homo_sapiens",
- val_vep_cache_version,
- ch_vep_cache,
- []
- )
-
- // Running vcfanno
- TABIX_TABIX_MT3(ENSEMBLVEP_MT.out.vcf_gz)
- ENSEMBLVEP_MT.out.vcf_gz
- .join(TABIX_TABIX_MT3.out.tbi, failOnMismatch:true, failOnDuplicate:true)
- .map { meta, vcf, tbi -> return [meta, vcf, tbi, []]}
- .set { ch_in_vcfanno }
-
- VCFANNO_MT(ch_in_vcfanno, ch_vcfanno_toml, [], ch_vcfanno_resources)
-
- // HMTNOTE ANNOTATE
- HMTNOTE_ANNOTATE(VCFANNO_MT.out.vcf)
- HMTNOTE_ANNOTATE.out.vcf.map{meta, vcf ->
- return [meta, WorkflowRaredisease.replaceSpacesInInfoColumn(vcf, vcf.parent.toString(), vcf.baseName)]
- }
- .set { ch_hmtnote_reformatted }
- ZIP_TABIX_HMTNOTE(ch_hmtnote_reformatted)
-
- // Prepare output
- ch_vcf_out = ZIP_TABIX_HMTNOTE.out.gz_tbi.map{meta, vcf, tbi -> return [meta, vcf] }
- ch_tbi_out = ZIP_TABIX_HMTNOTE.out.gz_tbi.map{meta, vcf, tbi -> return [meta, tbi] }
-
- // Running haplogrep2
- HAPLOGREP2_CLASSIFY_MT(ch_vep_in, "vcf.gz")
-
- ch_versions = ch_versions.mix(GATK4_MERGEVCFS_LIFT_UNLIFT_MT.out.versions.first())
- ch_versions = ch_versions.mix(GATK4_VARIANTFILTRATION_MT.out.versions.first())
- ch_versions = ch_versions.mix(SPLIT_MULTIALLELICS_MT.out.versions.first())
- ch_versions = ch_versions.mix(REMOVE_DUPLICATES_MT.out.versions.first())
- ch_versions = ch_versions.mix(BCFTOOLS_MERGE_MT.out.versions)
- ch_versions = ch_versions.mix(ANNOTATE_CADD.out.versions)
- ch_versions = ch_versions.mix(ENSEMBLVEP_MT.out.versions)
- ch_versions = ch_versions.mix(VCFANNO_MT.out.versions)
- ch_versions = ch_versions.mix(HMTNOTE_ANNOTATE.out.versions)
- ch_versions = ch_versions.mix(HAPLOGREP2_CLASSIFY_MT.out.versions)
-
- emit:
- haplog = HAPLOGREP2_CLASSIFY_MT.out.txt // channel: [ val(meta), path(txt) ]
- vcf = ch_vcf_out // channel: [ val(meta), path(vcf) ]
- tbi = ch_tbi_out // channel: [ val(meta), path(tbi) ]
- report = ENSEMBLVEP_MT.out.report // channel: [ path(html) ]
- versions = ch_versions // channel: [ path(versions.yml) ]
-}
diff --git a/subworkflows/local/variant_calling/postprocess_MT_calls.nf b/subworkflows/local/variant_calling/postprocess_MT_calls.nf
index 81738085..cb14c9b7 100644
--- a/subworkflows/local/variant_calling/postprocess_MT_calls.nf
+++ b/subworkflows/local/variant_calling/postprocess_MT_calls.nf
@@ -1,5 +1,5 @@
//
-// Merge and annotate MT
+// Merge and normalize MT variants
//
include { GATK4_MERGEVCFS as GATK4_MERGEVCFS_LIFT_UNLIFT_MT } from '../../../modules/nf-core/gatk4/mergevcfs/main'
@@ -25,7 +25,7 @@ workflow POSTPROCESS_MT_CALLS {
main:
ch_versions = Channel.empty()
- // LIFTOVER VCF FROM REFERENCE MT TO SHIFTED MT
+ // LIFTOVER SHIFTED VCF TO REFERENCE MT POSITIONS
PICARD_LIFTOVERVCF (
ch_mtshift_vcf,
ch_genome_dictionary,
From 29c9c85ea9daf0b83fe6437c1ae9b23055d80eb1 Mon Sep 17 00:00:00 2001
From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com>
Date: Fri, 15 Sep 2023 23:36:59 +0200
Subject: [PATCH 22/22] update test config
---
conf/test.config | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/conf/test.config b/conf/test.config
index 10e89c43..c7593f76 100644
--- a/conf/test.config
+++ b/conf/test.config
@@ -24,8 +24,8 @@ params {
mito_name = 'MT'
// analysis params
- skip_cnv_calling = true
- skip_mt_analysis = true
+ skip_cnv_calling = true
+ skip_mt_annotation = System.getenv("GITHUB_ACTIONS").equals(null) ? false : true // skip annotation on Github CI
// Input data
input = 'https://raw.githubusercontent.com/nf-core/test-datasets/raredisease/testdata/samplesheet_trio.csv'