From 9ac0723773726d06bc87a288bcdeb5cdbf5caae8 Mon Sep 17 00:00:00 2001 From: Bekir Erguner Date: Fri, 11 Aug 2023 16:47:38 +0200 Subject: [PATCH 01/10] inital tests working --- conf/test.config | 15 +++-------- conf/test/cache.config | 2 +- .../main.nf | 7 +++++ .../main.nf | 26 ++++++++++--------- 4 files changed, 25 insertions(+), 25 deletions(-) diff --git a/conf/test.config b/conf/test.config index 81653565c7..876e0de0b1 100644 --- a/conf/test.config +++ b/conf/test.config @@ -70,18 +70,9 @@ process { } } - if (params.joint_mutect2) { - withName: 'MUTECT2_PAIRED' { - ext.args = { params.ignore_soft_clipped_bases ? - "--dont-use-soft-clipped-bases true --f1r2-tar-gz ${task.ext.prefix}.f1r2.tar.gz --normal-sample ${meta.normal_id}" : - "--f1r2-tar-gz ${task.ext.prefix}.f1r2.tar.gz --normal-sample ${meta.normal_id}" } - } - } - else { - withName: 'MUTECT2_PAIRED'{ - //sample name from when the test data was generated - ext.args = { "--f1r2-tar-gz ${task.ext.prefix}.f1r2.tar.gz --normal-sample normal " } - } + withName: '.*:MUTECT2_PAIRED'{ + //sample name from when the test data was generated + ext.args = { "--f1r2-tar-gz ${task.ext.prefix}.f1r2.tar.gz --normal-sample normal " } } withName: 'FILTERVARIANTTRANCHES'{ diff --git a/conf/test/cache.config b/conf/test/cache.config index 4e4119d3a7..88ae85503f 100644 --- a/conf/test/cache.config +++ b/conf/test/cache.config @@ -87,7 +87,7 @@ process { } } - withName: 'MUTECT2_PAIRED'{ + withName: '.*:MUTECT2_PAIRED'{ //sample name from when the test data was generated ext.args = { "--f1r2-tar-gz ${task.ext.prefix}.f1r2.tar.gz --normal-sample normal " } } diff --git a/subworkflows/local/bam_variant_calling_somatic_mutect2/main.nf b/subworkflows/local/bam_variant_calling_somatic_mutect2/main.nf index 991f484d5c..00e7d4b55a 100644 --- a/subworkflows/local/bam_variant_calling_somatic_mutect2/main.nf +++ b/subworkflows/local/bam_variant_calling_somatic_mutect2/main.nf @@ -161,6 +161,13 @@ workflow BAM_VARIANT_CALLING_SOMATIC_MUTECT2 { ch_cont_to_filtermutectcalls = CALCULATECONTAMINATION.out.contamination } + // vcf.view(it -> ["vcfs", it]) + // tbi.view(it -> ["tbis", it]) + // stats.view(it -> ["stats", it]) + // LEARNREADORIENTATIONMODEL.out.artifactprior.view(it -> ["priors", it]) + // ch_seg_to_filtermutectcalls.view(it -> ["segs", it]) + // ch_cont_to_filtermutectcalls.view(it -> ["conts", it]) + // Mutect2 calls filtered by filtermutectcalls using the artifactpriors, contamination and segmentation tables vcf_to_filter = vcf.join(tbi, failOnDuplicate: true, failOnMismatch: true) .join(stats, failOnDuplicate: true, failOnMismatch: true) diff --git a/subworkflows/local/bam_variant_calling_tumor_only_mutect2/main.nf b/subworkflows/local/bam_variant_calling_tumor_only_mutect2/main.nf index d776d89878..de94f836fd 100644 --- a/subworkflows/local/bam_variant_calling_tumor_only_mutect2/main.nf +++ b/subworkflows/local/bam_variant_calling_tumor_only_mutect2/main.nf @@ -89,9 +89,9 @@ workflow BAM_VARIANT_CALLING_TUMOR_ONLY_MUTECT2 { MERGEMUTECTSTATS(stats_to_merge) // Mix intervals and no_intervals channels together - vcf = Channel.empty().mix(MERGE_MUTECT2.out.vcf, vcf_branch.no_intervals) - tbi = Channel.empty().mix(MERGE_MUTECT2.out.tbi, tbi_branch.no_intervals) - stats = Channel.empty().mix(MERGEMUTECTSTATS.out.stats, stats_branch.no_intervals) + vcf = Channel.empty().mix(MERGE_MUTECT2.out.vcf, vcf_branch.no_intervals).map{ meta, vcf -> [ meta - meta.subMap('sample', 'status', 'num_intervals', 'data_type'), vcf ] } + tbi = Channel.empty().mix(MERGE_MUTECT2.out.tbi, tbi_branch.no_intervals).map{ meta, tbi -> [ meta - meta.subMap('sample', 'status', 'num_intervals', 'data_type'), tbi ] } + stats = Channel.empty().mix(MERGEMUTECTSTATS.out.stats, stats_branch.no_intervals).map{ meta, stats -> [ meta - meta.subMap('sample', 'status', 'num_intervals', 'data_type'), stats ] } f1r2 = Channel.empty().mix(f1r2_to_merge, f1r2_branch.no_intervals) // Generate artifactpriors using learnreadorientationmodel on the f1r2 output of mutect2 @@ -120,21 +120,23 @@ workflow BAM_VARIANT_CALLING_TUMOR_ONLY_MUTECT2 { if (joint_mutect2) { // Remove sample names and retain patient name as the main identifier - calculatecontamination_out_seg = CALCULATECONTAMINATION.out.segmentation.map{ meta, seg -> [ meta - meta.subMap('sample') + [id:meta.patient], seg ] }.groupTuple() - calculatecontamination_out_cont = CALCULATECONTAMINATION.out.contamination.map{ meta, cont -> [ meta - meta.subMap('sample') + [id:meta.patient], cont ] }.groupTuple() + calculatecontamination_out_seg = CALCULATECONTAMINATION.out.segmentation.map{ meta, seg -> [ meta - meta.subMap('sample', 'status', 'num_intervals', 'data_type') + [id:meta.patient], seg ] }.groupTuple() + calculatecontamination_out_cont = CALCULATECONTAMINATION.out.contamination.map{ meta, cont -> [ meta - meta.subMap('sample', 'status', 'num_intervals', 'data_type') + [id:meta.patient], cont ] }.groupTuple() } else { // Regular single sample mode - calculatecontamination_out_seg = CALCULATECONTAMINATION.out.segmentation - calculatecontamination_out_cont = CALCULATECONTAMINATION.out.contamination + calculatecontamination_out_seg = CALCULATECONTAMINATION.out.segmentation.map{ meta, seg -> [ meta - meta.subMap('sample', 'status', 'num_intervals', 'data_type'), seg ] } + calculatecontamination_out_cont = CALCULATECONTAMINATION.out.contamination.map{ meta, cont -> [ meta - meta.subMap('sample', 'status', 'num_intervals', 'data_type'), cont ] } } + prior_to_join = LEARNREADORIENTATIONMODEL.out.artifactprior.map{ meta, prior -> [ meta - meta.subMap('sample', 'status', 'num_intervals', 'data_type'), prior ] } + // Mutect2 calls filtered by filtermutectcalls using the contamination and segmentation tables - vcf_to_filter = vcf.join(tbi, failOnDuplicate: true, failOnMismatch: true) - .join(stats, failOnDuplicate: true, failOnMismatch: true) - .join(LEARNREADORIENTATIONMODEL.out.artifactprior, failOnDuplicate: true, failOnMismatch: true) - .join(calculatecontamination_out_seg, failOnDuplicate: true, failOnMismatch: true) - .join(calculatecontamination_out_cont, failOnDuplicate: true, failOnMismatch: true) + vcf_to_filter = vcf.join(tbi, failOnDuplicate: true, failOnMismatch: true, by: 0) + .join(stats, failOnDuplicate: true, failOnMismatch: true, by: 0) + .join(prior_to_join, failOnDuplicate: true, failOnMismatch: true, by: 0) + .join(calculatecontamination_out_seg, failOnDuplicate: true, failOnMismatch: true, by: 0) + .join(calculatecontamination_out_cont, failOnDuplicate: true, failOnMismatch: true, by: 0) .map{ meta, vcf, tbi, stats, artifactprior, seg, cont -> [ meta, vcf, tbi, stats, artifactprior, seg, cont, [] ] } FILTERMUTECTCALLS(vcf_to_filter, fasta, fai, dict) From 1330e1cd0c5c01b1f869775c023bacffb47ac2d1 Mon Sep 17 00:00:00 2001 From: Bekir Erguner Date: Wed, 16 Aug 2023 09:08:03 +0200 Subject: [PATCH 02/10] fixed tumor_only --- .../main.nf | 29 ++++++++++++------- 1 file changed, 19 insertions(+), 10 deletions(-) diff --git a/subworkflows/local/bam_variant_calling_tumor_only_mutect2/main.nf b/subworkflows/local/bam_variant_calling_tumor_only_mutect2/main.nf index de94f836fd..3cd6ed1954 100644 --- a/subworkflows/local/bam_variant_calling_tumor_only_mutect2/main.nf +++ b/subworkflows/local/bam_variant_calling_tumor_only_mutect2/main.nf @@ -89,11 +89,22 @@ workflow BAM_VARIANT_CALLING_TUMOR_ONLY_MUTECT2 { MERGEMUTECTSTATS(stats_to_merge) // Mix intervals and no_intervals channels together - vcf = Channel.empty().mix(MERGE_MUTECT2.out.vcf, vcf_branch.no_intervals).map{ meta, vcf -> [ meta - meta.subMap('sample', 'status', 'num_intervals', 'data_type'), vcf ] } - tbi = Channel.empty().mix(MERGE_MUTECT2.out.tbi, tbi_branch.no_intervals).map{ meta, tbi -> [ meta - meta.subMap('sample', 'status', 'num_intervals', 'data_type'), tbi ] } - stats = Channel.empty().mix(MERGEMUTECTSTATS.out.stats, stats_branch.no_intervals).map{ meta, stats -> [ meta - meta.subMap('sample', 'status', 'num_intervals', 'data_type'), stats ] } - f1r2 = Channel.empty().mix(f1r2_to_merge, f1r2_branch.no_intervals) + // Remove unnecessary metadata + if (joint_mutect2) { + // Remove sample and status metadata as they become irrelevant for joint calling + vcf = Channel.empty().mix(MERGE_MUTECT2.out.vcf, vcf_branch.no_intervals).map{ meta, vcf -> [ meta - meta.subMap('sample', 'status', 'num_intervals', 'data_type'), vcf ] } + tbi = Channel.empty().mix(MERGE_MUTECT2.out.tbi, tbi_branch.no_intervals).map{ meta, tbi -> [ meta - meta.subMap('sample', 'status', 'num_intervals', 'data_type'), tbi ] } + stats = Channel.empty().mix(MERGEMUTECTSTATS.out.stats, stats_branch.no_intervals).map{ meta, stats -> [ meta - meta.subMap('sample', 'status', 'num_intervals', 'data_type'), stats ] } + f1r2 = Channel.empty().mix(f1r2_to_merge, f1r2_branch.no_intervals).map{ meta, f1r2 -> [ meta - meta.subMap('sample', 'status', 'num_intervals', 'data_type'), f1r2 ] } + } + else { + // Keep sample and status metadata + vcf = Channel.empty().mix(MERGE_MUTECT2.out.vcf, vcf_branch.no_intervals).map{ meta, vcf -> [ meta - meta.subMap('num_intervals', 'data_type'), vcf ] } + tbi = Channel.empty().mix(MERGE_MUTECT2.out.tbi, tbi_branch.no_intervals).map{ meta, tbi -> [ meta - meta.subMap('num_intervals', 'data_type'), tbi ] } + stats = Channel.empty().mix(MERGEMUTECTSTATS.out.stats, stats_branch.no_intervals).map{ meta, stats -> [ meta - meta.subMap('num_intervals', 'data_type'), stats ] } + f1r2 = Channel.empty().mix(f1r2_to_merge, f1r2_branch.no_intervals).map{ meta, f1r2 -> [ meta - meta.subMap('sample', 'status', 'num_intervals', 'data_type'), f1r2 ] } + } // Generate artifactpriors using learnreadorientationmodel on the f1r2 output of mutect2 LEARNREADORIENTATIONMODEL(f1r2) @@ -125,16 +136,14 @@ workflow BAM_VARIANT_CALLING_TUMOR_ONLY_MUTECT2 { } else { // Regular single sample mode - calculatecontamination_out_seg = CALCULATECONTAMINATION.out.segmentation.map{ meta, seg -> [ meta - meta.subMap('sample', 'status', 'num_intervals', 'data_type'), seg ] } - calculatecontamination_out_cont = CALCULATECONTAMINATION.out.contamination.map{ meta, cont -> [ meta - meta.subMap('sample', 'status', 'num_intervals', 'data_type'), cont ] } + calculatecontamination_out_seg = CALCULATECONTAMINATION.out.segmentation.map{ meta, seg -> [ meta - meta.subMap('num_intervals', 'data_type'), seg ] } + calculatecontamination_out_cont = CALCULATECONTAMINATION.out.contamination.map{ meta, cont -> [ meta - meta.subMap('num_intervals', 'data_type'), cont ] } } - prior_to_join = LEARNREADORIENTATIONMODEL.out.artifactprior.map{ meta, prior -> [ meta - meta.subMap('sample', 'status', 'num_intervals', 'data_type'), prior ] } - // Mutect2 calls filtered by filtermutectcalls using the contamination and segmentation tables vcf_to_filter = vcf.join(tbi, failOnDuplicate: true, failOnMismatch: true, by: 0) .join(stats, failOnDuplicate: true, failOnMismatch: true, by: 0) - .join(prior_to_join, failOnDuplicate: true, failOnMismatch: true, by: 0) + .join(LEARNREADORIENTATIONMODEL.out.artifactprior, failOnDuplicate: true, failOnMismatch: true, by: 0) .join(calculatecontamination_out_seg, failOnDuplicate: true, failOnMismatch: true, by: 0) .join(calculatecontamination_out_cont, failOnDuplicate: true, failOnMismatch: true, by: 0) .map{ meta, vcf, tbi, stats, artifactprior, seg, cont -> [ meta, vcf, tbi, stats, artifactprior, seg, cont, [] ] } @@ -143,7 +152,7 @@ workflow BAM_VARIANT_CALLING_TUMOR_ONLY_MUTECT2 { vcf_filtered = FILTERMUTECTCALLS.out.vcf // add variantcaller to meta map and remove no longer necessary field: num_intervals - .map{ meta, vcf -> [ meta - meta.subMap('num_intervals') + [ variantcaller:'mutect2' ], vcf ] } + .map{ meta, vcf -> [ meta + [ variantcaller:'mutect2' ], vcf ] } versions = versions.mix(MERGE_MUTECT2.out.versions) versions = versions.mix(CALCULATECONTAMINATION.out.versions) From cca3144e3872fed10031efa5ce04430fff2f6190 Mon Sep 17 00:00:00 2001 From: Bekir Erguner Date: Wed, 16 Aug 2023 10:21:05 +0200 Subject: [PATCH 03/10] clean somatic mutect2 --- .../local/bam_variant_calling_somatic_mutect2/main.nf | 7 ------- 1 file changed, 7 deletions(-) diff --git a/subworkflows/local/bam_variant_calling_somatic_mutect2/main.nf b/subworkflows/local/bam_variant_calling_somatic_mutect2/main.nf index 00e7d4b55a..991f484d5c 100644 --- a/subworkflows/local/bam_variant_calling_somatic_mutect2/main.nf +++ b/subworkflows/local/bam_variant_calling_somatic_mutect2/main.nf @@ -161,13 +161,6 @@ workflow BAM_VARIANT_CALLING_SOMATIC_MUTECT2 { ch_cont_to_filtermutectcalls = CALCULATECONTAMINATION.out.contamination } - // vcf.view(it -> ["vcfs", it]) - // tbi.view(it -> ["tbis", it]) - // stats.view(it -> ["stats", it]) - // LEARNREADORIENTATIONMODEL.out.artifactprior.view(it -> ["priors", it]) - // ch_seg_to_filtermutectcalls.view(it -> ["segs", it]) - // ch_cont_to_filtermutectcalls.view(it -> ["conts", it]) - // Mutect2 calls filtered by filtermutectcalls using the artifactpriors, contamination and segmentation tables vcf_to_filter = vcf.join(tbi, failOnDuplicate: true, failOnMismatch: true) .join(stats, failOnDuplicate: true, failOnMismatch: true) From 372f2ee357fd6704bc691564a6222db92304ab4b Mon Sep 17 00:00:00 2001 From: Bekir Erguner Date: Wed, 16 Aug 2023 10:47:48 +0200 Subject: [PATCH 04/10] remove by:0 --- .../bam_variant_calling_tumor_only_mutect2/main.nf | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/subworkflows/local/bam_variant_calling_tumor_only_mutect2/main.nf b/subworkflows/local/bam_variant_calling_tumor_only_mutect2/main.nf index 3cd6ed1954..72be15e9d4 100644 --- a/subworkflows/local/bam_variant_calling_tumor_only_mutect2/main.nf +++ b/subworkflows/local/bam_variant_calling_tumor_only_mutect2/main.nf @@ -141,11 +141,11 @@ workflow BAM_VARIANT_CALLING_TUMOR_ONLY_MUTECT2 { } // Mutect2 calls filtered by filtermutectcalls using the contamination and segmentation tables - vcf_to_filter = vcf.join(tbi, failOnDuplicate: true, failOnMismatch: true, by: 0) - .join(stats, failOnDuplicate: true, failOnMismatch: true, by: 0) - .join(LEARNREADORIENTATIONMODEL.out.artifactprior, failOnDuplicate: true, failOnMismatch: true, by: 0) - .join(calculatecontamination_out_seg, failOnDuplicate: true, failOnMismatch: true, by: 0) - .join(calculatecontamination_out_cont, failOnDuplicate: true, failOnMismatch: true, by: 0) + vcf_to_filter = vcf.join(tbi, failOnDuplicate: true, failOnMismatch: true) + .join(stats, failOnDuplicate: true, failOnMismatch: true) + .join(LEARNREADORIENTATIONMODEL.out.artifactprior, failOnDuplicate: true, failOnMismatch: true) + .join(calculatecontamination_out_seg, failOnDuplicate: true, failOnMismatch: true) + .join(calculatecontamination_out_cont, failOnDuplicate: true, failOnMismatch: true) .map{ meta, vcf, tbi, stats, artifactprior, seg, cont -> [ meta, vcf, tbi, stats, artifactprior, seg, cont, [] ] } FILTERMUTECTCALLS(vcf_to_filter, fasta, fai, dict) From d231b26bd429b5ba563666ebd54a3340cc66c44c Mon Sep 17 00:00:00 2001 From: Maxime U Garcia Date: Thu, 31 Aug 2023 10:14:44 +0200 Subject: [PATCH 05/10] Update CHANGELOG.md --- CHANGELOG.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 3a4222039a..30ca9548dd 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -47,11 +47,14 @@ Rapaselet is a delta formed by the RapaƤtno river between the Bielloriehppe mas - [#1173](https://github.com/nf-core/sarek/pull/1173) - Fixed duplicated entries in joint germline recalibrated VCF ([#966](https://github.com/nf-core/sarek/pull/966), [#1102](https://github.com/nf-core/sarek/pull/1102)), fixed grouping joint germline recalibrated VCF ([#1137](https://github.com/nf-core/sarek/pull/1137)) - [#1177](https://github.com/nf-core/sarek/pull/1177) - Fix status inference when using nf-validation plugin +- [#1181](https://github.com/nf-core/sarek/pull/1181) - Fix join mismatch error in Mutect2 tumor only subworkflow - [#1183](https://github.com/nf-core/sarek/pull/1183) - Add docs for concatentated germline variants - [#1184](https://github.com/nf-core/sarek/pull/1184) - Fix issue with duplicated variants in VCF from Sentieon-based joint-germline variant-calling with VQSR. (Corresponding to [#966](https://github.com/nf-core/sarek/issues/966) for GATK.) - [#1192](https://github.com/nf-core/sarek/pull/1192) - Add `ASCATprofile.png` to ASCAT output docs - [#1197](https://github.com/nf-core/sarek/pull/1197) - Improve `tower.yml` file to display reports in `Tower` ([#1190](https://github.com/nf-core/sarek/issues/1190)) + + ### Dependencies | Dependency | Old version | New version | From f32e7f78eea6fc103451cdf33790b8a74b10ee5d Mon Sep 17 00:00:00 2001 From: Maxime U Garcia Date: Thu, 31 Aug 2023 10:15:28 +0200 Subject: [PATCH 06/10] Update CHANGELOG.md --- CHANGELOG.md | 2 -- 1 file changed, 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 30ca9548dd..4206411923 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -53,8 +53,6 @@ Rapaselet is a delta formed by the RapaƤtno river between the Bielloriehppe mas - [#1192](https://github.com/nf-core/sarek/pull/1192) - Add `ASCATprofile.png` to ASCAT output docs - [#1197](https://github.com/nf-core/sarek/pull/1197) - Improve `tower.yml` file to display reports in `Tower` ([#1190](https://github.com/nf-core/sarek/issues/1190)) - - ### Dependencies | Dependency | Old version | New version | From 8bc44fc5b9a56706602158c05fb387c2628c56b8 Mon Sep 17 00:00:00 2001 From: maxulysse Date: Thu, 31 Aug 2023 12:33:45 +0200 Subject: [PATCH 07/10] simplify meta data handling --- .../main.nf | 6 +++- .../main.nf | 33 +++++++------------ 2 files changed, 16 insertions(+), 23 deletions(-) diff --git a/subworkflows/local/bam_variant_calling_tumor_only_all/main.nf b/subworkflows/local/bam_variant_calling_tumor_only_all/main.nf index 0362d8750a..6e90af5974 100644 --- a/subworkflows/local/bam_variant_calling_tumor_only_all/main.nf +++ b/subworkflows/local/bam_variant_calling_tumor_only_all/main.nf @@ -110,7 +110,11 @@ workflow BAM_VARIANT_CALLING_TUMOR_ONLY_ALL { // MUTECT2 if (tools.split(',').contains('mutect2')) { BAM_VARIANT_CALLING_TUMOR_ONLY_MUTECT2( - cram, + cram.map{ meta, cram, crai -> + joint_mutect2 ? + [ meta + [ id:meta.patient ] - meta.subMap('sample', 'status', 'num_intervals', 'data_type', 'patient') , cram, crai ] : + [ meta - meta.subMap('sample', 'status', 'num_intervals', 'data_type'), cram, crai ] + }, // Remap channel to match module/subworkflow fasta.map{ it -> [ [ id:'fasta' ], it ] }, // Remap channel to match module/subworkflow diff --git a/subworkflows/local/bam_variant_calling_tumor_only_mutect2/main.nf b/subworkflows/local/bam_variant_calling_tumor_only_mutect2/main.nf index 161002e8e0..9e2c6d0bc5 100644 --- a/subworkflows/local/bam_variant_calling_tumor_only_mutect2/main.nf +++ b/subworkflows/local/bam_variant_calling_tumor_only_mutect2/main.nf @@ -41,7 +41,7 @@ workflow BAM_VARIANT_CALLING_TUMOR_ONLY_MUTECT2 { if (joint_mutect2) { // Perform variant calling using mutect2 module in tumor single mode // Group cram files by patient - patient_crams = input.map{ meta, t_cram, t_crai -> [ meta - meta.subMap('sample') + [id:meta.patient], t_cram, t_crai ] }.groupTuple() + patient_crams = input.groupTuple() // Add intervals for scatter-gather scaling patient_cram_intervals = patient_crams.combine(intervals) // Move num_intervals to meta map and reorganize channel for MUTECT2 module @@ -91,21 +91,11 @@ workflow BAM_VARIANT_CALLING_TUMOR_ONLY_MUTECT2 { // Mix intervals and no_intervals channels together // Remove unnecessary metadata - if (joint_mutect2) { - // Remove sample and status metadata as they become irrelevant for joint calling - vcf = Channel.empty().mix(MERGE_MUTECT2.out.vcf, vcf_branch.no_intervals).map{ meta, vcf -> [ meta - meta.subMap('sample', 'status', 'num_intervals', 'data_type'), vcf ] } - tbi = Channel.empty().mix(MERGE_MUTECT2.out.tbi, tbi_branch.no_intervals).map{ meta, tbi -> [ meta - meta.subMap('sample', 'status', 'num_intervals', 'data_type'), tbi ] } - stats = Channel.empty().mix(MERGEMUTECTSTATS.out.stats, stats_branch.no_intervals).map{ meta, stats -> [ meta - meta.subMap('sample', 'status', 'num_intervals', 'data_type'), stats ] } - f1r2 = Channel.empty().mix(f1r2_to_merge, f1r2_branch.no_intervals).map{ meta, f1r2 -> [ meta - meta.subMap('sample', 'status', 'num_intervals', 'data_type'), f1r2 ] } - } - else { - // Keep sample and status metadata - vcf = Channel.empty().mix(MERGE_MUTECT2.out.vcf, vcf_branch.no_intervals).map{ meta, vcf -> [ meta - meta.subMap('num_intervals', 'data_type'), vcf ] } - tbi = Channel.empty().mix(MERGE_MUTECT2.out.tbi, tbi_branch.no_intervals).map{ meta, tbi -> [ meta - meta.subMap('num_intervals', 'data_type'), tbi ] } - stats = Channel.empty().mix(MERGEMUTECTSTATS.out.stats, stats_branch.no_intervals).map{ meta, stats -> [ meta - meta.subMap('num_intervals', 'data_type'), stats ] } - f1r2 = Channel.empty().mix(f1r2_to_merge, f1r2_branch.no_intervals).map{ meta, f1r2 -> [ meta - meta.subMap('sample', 'status', 'num_intervals', 'data_type'), f1r2 ] } + vcf = Channel.empty().mix(MERGE_MUTECT2.out.vcf, vcf_branch.no_intervals).map{ meta, vcf -> [ meta - meta.subMap('num_intervals'), vcf ] } + tbi = Channel.empty().mix(MERGE_MUTECT2.out.tbi, tbi_branch.no_intervals).map{ meta, tbi -> [ meta - meta.subMap('num_intervals'), tbi ] } + stats = Channel.empty().mix(MERGEMUTECTSTATS.out.stats, stats_branch.no_intervals).map{ meta, stats -> [ meta - meta.subMap('num_intervals'), stats ] } + f1r2 = Channel.empty().mix(f1r2_to_merge, f1r2_branch.no_intervals).map{ meta, f1r2 -> [ meta - meta.subMap('num_intervals'), f1r2 ] } - } // Generate artifactpriors using learnreadorientationmodel on the f1r2 output of mutect2 LEARNREADORIENTATIONMODEL(f1r2) @@ -135,14 +125,13 @@ workflow BAM_VARIANT_CALLING_TUMOR_ONLY_MUTECT2 { calculatecontamination_out_cont = Channel.empty() if (joint_mutect2) { - // Remove sample names and retain patient name as the main identifier - calculatecontamination_out_seg = CALCULATECONTAMINATION.out.segmentation.map{ meta, seg -> [ meta - meta.subMap('sample', 'status', 'num_intervals', 'data_type') + [id:meta.patient], seg ] }.groupTuple() - calculatecontamination_out_cont = CALCULATECONTAMINATION.out.contamination.map{ meta, cont -> [ meta - meta.subMap('sample', 'status', 'num_intervals', 'data_type') + [id:meta.patient], cont ] }.groupTuple() - } - else { + // Group tables by samples + calculatecontamination_out_seg = CALCULATECONTAMINATION.out.segmentation.map{ meta, seg -> [ meta - meta.subMap('num_intervals'), seg ] }.groupTuple() + calculatecontamination_out_cont = CALCULATECONTAMINATION.out.contamination.map{ meta, cont -> [ meta - meta.subMap('num_intervals'), cont ] }.groupTuple() + } else { // Regular single sample mode - calculatecontamination_out_seg = CALCULATECONTAMINATION.out.segmentation.map{ meta, seg -> [ meta - meta.subMap('num_intervals', 'data_type'), seg ] } - calculatecontamination_out_cont = CALCULATECONTAMINATION.out.contamination.map{ meta, cont -> [ meta - meta.subMap('num_intervals', 'data_type'), cont ] } + calculatecontamination_out_seg = CALCULATECONTAMINATION.out.segmentation.map{ meta, seg -> [ meta - meta.subMap('num_intervals'), seg ] } + calculatecontamination_out_cont = CALCULATECONTAMINATION.out.contamination.map{ meta, cont -> [ meta - meta.subMap('num_intervals'), cont ] } } // Mutect2 calls filtered by filtermutectcalls using the contamination and segmentation tables From e1e573ec9246edf1b97a88c02fce0de6fdbaff20 Mon Sep 17 00:00:00 2001 From: maxulysse Date: Thu, 31 Aug 2023 12:45:15 +0200 Subject: [PATCH 08/10] simplify meta data handling --- .../local/bam_variant_calling_somatic_all/main.nf | 10 +++++++--- .../local/bam_variant_calling_somatic_mutect2/main.nf | 4 ++-- 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/subworkflows/local/bam_variant_calling_somatic_all/main.nf b/subworkflows/local/bam_variant_calling_somatic_all/main.nf index 280c2277e5..0f201dfc91 100644 --- a/subworkflows/local/bam_variant_calling_somatic_all/main.nf +++ b/subworkflows/local/bam_variant_calling_somatic_all/main.nf @@ -70,8 +70,8 @@ workflow BAM_VARIANT_CALLING_SOMATIC_ALL { // CONTROLFREEC if (tools.split(',').contains('controlfreec')) { // Remap channels to match module/subworkflow - cram_normal = cram.map { meta, normal_cram, normal_crai, tumor_cram, tumor_crai -> [ meta, normal_cram, normal_crai ] } - cram_tumor = cram.map { meta, normal_cram, normal_crai, tumor_cram, tumor_crai -> [ meta, tumor_cram, tumor_crai ] } + cram_normal = cram.map{ meta, normal_cram, normal_crai, tumor_cram, tumor_crai -> [ meta, normal_cram, normal_crai ] } + cram_tumor = cram.map{ meta, normal_cram, normal_crai, tumor_cram, tumor_crai -> [ meta, tumor_cram, tumor_crai ] } MPILEUP_NORMAL( cram_normal, @@ -183,7 +183,11 @@ workflow BAM_VARIANT_CALLING_SOMATIC_ALL { if (tools.split(',').contains('mutect2')) { BAM_VARIANT_CALLING_SOMATIC_MUTECT2( // Remap channel to match module/subworkflow - cram.map { meta, normal_cram, normal_crai, tumor_cram, tumor_crai -> [ meta, [ normal_cram, tumor_cram ], [ normal_crai, tumor_crai ] ] }, + cram.map{ meta, normal_cram, normal_crai, tumor_cram, tumor_crai -> + joint_mutect2 ? + [ meta + [ id:meta.patient ] - meta.subMap('patient', 'tumor_id'), [ normal_cram, tumor_cram ], [ normal_crai, tumor_crai ] ] : + [ meta, [ normal_cram, tumor_cram ], [ normal_crai, tumor_crai ] ] + }, // Remap channel to match module/subworkflow fasta.map{ it -> [ [ id:'fasta' ], it ] }, // Remap channel to match module/subworkflow diff --git a/subworkflows/local/bam_variant_calling_somatic_mutect2/main.nf b/subworkflows/local/bam_variant_calling_somatic_mutect2/main.nf index 1e73885360..7e2a74d0e8 100644 --- a/subworkflows/local/bam_variant_calling_somatic_mutect2/main.nf +++ b/subworkflows/local/bam_variant_calling_somatic_mutect2/main.nf @@ -41,9 +41,9 @@ workflow BAM_VARIANT_CALLING_SOMATIC_MUTECT2 { if (joint_mutect2) { // Separate normal cram files and remove duplicates - ch_normal_cram = input.map{ meta, cram, crai -> [ meta - meta.subMap('tumor_id') + [id:meta.patient], cram[0], crai[0] ] }.unique() + ch_normal_cram = input.map{ meta, cram, crai -> [ meta, cram[0], crai[0] ] }.unique() // Extract tumor cram files - ch_tumor_cram = input.map{ meta, cram, crai -> [ meta - meta.subMap('tumor_id') + [id:meta.patient], cram[1], crai[1] ] } + ch_tumor_cram = input.map{ meta, cram, crai -> [ meta, cram[1], crai[1] ] } // Merge normal and tumor crams by patient ch_tn_cram = ch_normal_cram.mix(ch_tumor_cram).groupTuple() // Combine input and intervals for scatter and gather strategy From e68e19f3eb7455e83bd41fc348f09edb52b17e3f Mon Sep 17 00:00:00 2001 From: maxulysse Date: Thu, 31 Aug 2023 13:09:23 +0200 Subject: [PATCH 09/10] add comments --- subworkflows/local/bam_variant_calling_somatic_all/main.nf | 2 ++ subworkflows/local/bam_variant_calling_tumor_only_all/main.nf | 2 ++ 2 files changed, 4 insertions(+) diff --git a/subworkflows/local/bam_variant_calling_somatic_all/main.nf b/subworkflows/local/bam_variant_calling_somatic_all/main.nf index 0f201dfc91..bb98e6c6a1 100644 --- a/subworkflows/local/bam_variant_calling_somatic_all/main.nf +++ b/subworkflows/local/bam_variant_calling_somatic_all/main.nf @@ -183,6 +183,8 @@ workflow BAM_VARIANT_CALLING_SOMATIC_ALL { if (tools.split(',').contains('mutect2')) { BAM_VARIANT_CALLING_SOMATIC_MUTECT2( // Remap channel to match module/subworkflow + // Adjust meta.map to simplify joining channels + // joint_mutect2 mode needs different meta.map than regular mode cram.map{ meta, normal_cram, normal_crai, tumor_cram, tumor_crai -> joint_mutect2 ? [ meta + [ id:meta.patient ] - meta.subMap('patient', 'tumor_id'), [ normal_cram, tumor_cram ], [ normal_crai, tumor_crai ] ] : diff --git a/subworkflows/local/bam_variant_calling_tumor_only_all/main.nf b/subworkflows/local/bam_variant_calling_tumor_only_all/main.nf index 6e90af5974..738350713e 100644 --- a/subworkflows/local/bam_variant_calling_tumor_only_all/main.nf +++ b/subworkflows/local/bam_variant_calling_tumor_only_all/main.nf @@ -110,6 +110,8 @@ workflow BAM_VARIANT_CALLING_TUMOR_ONLY_ALL { // MUTECT2 if (tools.split(',').contains('mutect2')) { BAM_VARIANT_CALLING_TUMOR_ONLY_MUTECT2( + // Adjust meta.map to simplify joining channels + // joint_mutect2 mode needs different meta.map than regular mode cram.map{ meta, cram, crai -> joint_mutect2 ? [ meta + [ id:meta.patient ] - meta.subMap('sample', 'status', 'num_intervals', 'data_type', 'patient') , cram, crai ] : From 4c1eced91b6050894d7a93035e83f51a1047c22e Mon Sep 17 00:00:00 2001 From: maxulysse Date: Thu, 31 Aug 2023 14:03:42 +0200 Subject: [PATCH 10/10] change channels name --- .../main.nf | 26 +++++++++---------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/subworkflows/local/bam_variant_calling_somatic_mutect2/main.nf b/subworkflows/local/bam_variant_calling_somatic_mutect2/main.nf index 7e2a74d0e8..8de17d6a4d 100644 --- a/subworkflows/local/bam_variant_calling_somatic_mutect2/main.nf +++ b/subworkflows/local/bam_variant_calling_somatic_mutect2/main.nf @@ -153,27 +153,27 @@ workflow BAM_VARIANT_CALLING_SOMATIC_MUTECT2 { CALCULATECONTAMINATION(ch_calculatecontamination_in_tables) // Initialize empty channel: Contamination calculation is run on pileup table, pileup is not run if germline resource is not provided - ch_seg_to_filtermutectcalls = Channel.empty() - ch_cont_to_filtermutectcalls = Channel.empty() + calculatecontamination_out_seg = Channel.empty() + calculatecontamination_out_cont = Channel.empty() if (joint_mutect2) { // Reduce the meta to only patient name - ch_seg_to_filtermutectcalls = CALCULATECONTAMINATION.out.segmentation.map{ meta, seg -> [ meta - meta.subMap('tumor_id') + [id: meta.patient], seg]}.groupTuple() - ch_cont_to_filtermutectcalls = CALCULATECONTAMINATION.out.contamination.map{ meta, cont -> [ meta - meta.subMap('tumor_id') + [id: meta.patient], cont]}.groupTuple() + calculatecontamination_out_seg = CALCULATECONTAMINATION.out.segmentation.map{ meta, seg -> [ meta - meta.subMap('tumor_id') + [id: meta.patient], seg]}.groupTuple() + calculatecontamination_out_cont = CALCULATECONTAMINATION.out.contamination.map{ meta, cont -> [ meta - meta.subMap('tumor_id') + [id: meta.patient], cont]}.groupTuple() } else { // Keep tumor_vs_normal ID - ch_seg_to_filtermutectcalls = CALCULATECONTAMINATION.out.segmentation - ch_cont_to_filtermutectcalls = CALCULATECONTAMINATION.out.contamination + calculatecontamination_out_seg = CALCULATECONTAMINATION.out.segmentation + calculatecontamination_out_cont = CALCULATECONTAMINATION.out.contamination } // Mutect2 calls filtered by filtermutectcalls using the artifactpriors, contamination and segmentation tables vcf_to_filter = vcf.join(tbi, failOnDuplicate: true, failOnMismatch: true) - .join(stats, failOnDuplicate: true, failOnMismatch: true) - .join(LEARNREADORIENTATIONMODEL.out.artifactprior, failOnDuplicate: true, failOnMismatch: true) - .join(ch_seg_to_filtermutectcalls) - .join(ch_cont_to_filtermutectcalls) - .map{ meta, vcf, tbi, stats, orientation, seg, cont -> [ meta, vcf, tbi, stats, orientation, seg, cont, [] ] } + .join(stats, failOnDuplicate: true, failOnMismatch: true) + .join(LEARNREADORIENTATIONMODEL.out.artifactprior, failOnDuplicate: true, failOnMismatch: true) + .join(calculatecontamination_out_seg) + .join(calculatecontamination_out_cont) + .map{ meta, vcf, tbi, stats, orientation, seg, cont -> [ meta, vcf, tbi, stats, orientation, seg, cont, [] ] } FILTERMUTECTCALLS(vcf_to_filter, fasta, fai, dict) @@ -205,8 +205,8 @@ workflow BAM_VARIANT_CALLING_SOMATIC_MUTECT2 { pileup_table_normal // channel: [ meta, table_normal ] pileup_table_tumor // channel: [ meta, table_tumor ] - contamination_table = ch_cont_to_filtermutectcalls // channel: [ meta, contamination ] - segmentation_table = ch_seg_to_filtermutectcalls // channel: [ meta, segmentation ] + contamination_table = calculatecontamination_out_cont // channel: [ meta, contamination ] + segmentation_table = calculatecontamination_out_seg // channel: [ meta, segmentation ] versions // channel: [ versions.yml ] }