diff --git a/CHANGELOG.md b/CHANGELOG.md index a1974ca388..8c1729ac11 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -50,6 +50,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - [#982](https://github.com/nf-core/sarek/pull/982) - Remove usage of exit statements, using `Nextflow.error` instead - [#985](https://github.com/nf-core/sarek/pull/985) - Cache correctly identifies when it needs to be updated - [#988](https://github.com/nf-core/sarek/pull/988) - Updated ascat module to fix seed for reproducibility +- [#998](https://github.com/nf-core/sarek/pull/998) - Remove parallelization within a sample for `Manta` ### Deprecated diff --git a/conf/modules/manta.config b/conf/modules/manta.config index 944025af07..eea92d8083 100644 --- a/conf/modules/manta.config +++ b/conf/modules/manta.config @@ -16,42 +16,14 @@ process { withName: 'MANTA.*' { - ext.prefix = { meta.num_intervals <= 1 ? "${meta.id}.manta" : "${meta.id}.manta.${target_bed.simpleName}" } ext.args = { params.wes ? "--exome" : "" } + ext.prefix = { "${meta.id}.manta" } ext.when = { params.tools && params.tools.split(',').contains('manta') } publishDir = [ mode: params.publish_dir_mode, - path: { "${params.outdir}/variant_calling/" }, - pattern: "*{diploid_sv,tumor_sv,somatic_sv}.{vcf.gz,vcf.gz.tbi}", - saveAs: { meta.num_intervals > 1 ? null : "manta/${meta.id}/${it}" } - ] - } - - withName: 'MERGE_MANTA.*' { - publishDir = [ - mode: params.publish_dir_mode, - path: { "${params.outdir}/variant_calling/manta/${meta.id}/" }, + path: { "${params.outdir}/variant_calling/manta/${meta.id}" }, pattern: "*{diploid_sv,tumor_sv,somatic_sv}.{vcf.gz,vcf.gz.tbi}" ] } - withName: 'MERGE_MANTA_DIPLOID' { - ext.prefix = {"${meta.id}.manta.diploid_sv"} - } - - withName: 'MERGE_MANTA_SMALL_INDELS' { - ext.prefix = {"${meta.id}.manta.candidate_small_indels"} - } - - withName: 'MERGE_MANTA_SV' { - ext.prefix = {"${meta.id}.manta.candidate_sv"} - } - - withName: 'MERGE_MANTA_TUMOR' { - ext.prefix = {"${meta.id}.manta.tumor_sv"} - } - - withName: 'MERGE_MANTA_SOMATIC' { - ext.prefix = {"${meta.id}.manta.somatic_sv"} - } } diff --git a/modules.json b/modules.json index fc38461b19..2cfe2fc364 100644 --- a/modules.json +++ b/modules.json @@ -295,12 +295,12 @@ }, "manta/somatic": { "branch": "master", - "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", + "git_sha": "f68f379dcdba4e8c132c6daa6cd8794527b81a04", "installed_by": ["modules"] }, "manta/tumoronly": { "branch": "master", - "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", + "git_sha": "f68f379dcdba4e8c132c6daa6cd8794527b81a04", "installed_by": ["modules"] }, "mosdepth": { diff --git a/modules/nf-core/manta/somatic/main.nf b/modules/nf-core/manta/somatic/main.nf index c559c8851b..64f296b288 100644 --- a/modules/nf-core/manta/somatic/main.nf +++ b/modules/nf-core/manta/somatic/main.nf @@ -29,7 +29,7 @@ process MANTA_SOMATIC { script: def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" - def options_manta = target_bed ? "--exome --callRegions $target_bed" : "" + def options_manta = target_bed ? "--callRegions $target_bed" : "" """ configManta.py \ diff --git a/modules/nf-core/manta/tumoronly/main.nf b/modules/nf-core/manta/tumoronly/main.nf index 3fea008fc4..4600565cbf 100644 --- a/modules/nf-core/manta/tumoronly/main.nf +++ b/modules/nf-core/manta/tumoronly/main.nf @@ -27,7 +27,7 @@ process MANTA_TUMORONLY { script: def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" - def options_manta = target_bed ? "--exome --callRegions $target_bed" : "" + def options_manta = target_bed ? "--callRegions $target_bed" : "" """ configManta.py \ --tumorBam $input \ diff --git a/subworkflows/local/bam_variant_calling_germline_all/main.nf b/subworkflows/local/bam_variant_calling_germline_all/main.nf index 3ec320c06b..46b027a8f9 100644 --- a/subworkflows/local/bam_variant_calling_germline_all/main.nf +++ b/subworkflows/local/bam_variant_calling_germline_all/main.nf @@ -26,6 +26,7 @@ workflow BAM_VARIANT_CALLING_GERMLINE_ALL { fasta_fai // channel: [mandatory] fasta_fai intervals // channel: [mandatory] [ intervals, num_intervals ] or [ [], 0 ] if no intervals intervals_bed_combined // channel: [mandatory] intervals/target regions in one file unzipped + intervals_bed_gz_tbi_combined // channel: [mandatory] intervals/target regions in one file zipped intervals_bed_combined_haplotypec // channel: [mandatory] intervals/target regions in one file unzipped, no_intervals.bed if no_intervals intervals_bed_gz_tbi // channel: [mandatory] [ interval.bed.gz, interval.bed.gz.tbi, num_intervals ] or [ [], [], 0 ] if no intervals known_indels_vqsr @@ -159,7 +160,7 @@ workflow BAM_VARIANT_CALLING_GERMLINE_ALL { dict.map{ it -> [ [ id:'dict' ], it ] }, fasta, fasta_fai, - intervals_bed_gz_tbi + intervals_bed_gz_tbi_combined ) vcf_manta = BAM_VARIANT_CALLING_GERMLINE_MANTA.out.vcf diff --git a/subworkflows/local/bam_variant_calling_germline_manta/main.nf b/subworkflows/local/bam_variant_calling_germline_manta/main.nf index 422b387883..0827052628 100644 --- a/subworkflows/local/bam_variant_calling_germline_manta/main.nf +++ b/subworkflows/local/bam_variant_calling_germline_manta/main.nf @@ -1,6 +1,3 @@ -include { GATK4_MERGEVCFS as MERGE_MANTA_DIPLOID } from '../../../modules/nf-core/gatk4/mergevcfs/main' -include { GATK4_MERGEVCFS as MERGE_MANTA_SMALL_INDELS } from '../../../modules/nf-core/gatk4/mergevcfs/main' -include { GATK4_MERGEVCFS as MERGE_MANTA_SV } from '../../../modules/nf-core/gatk4/mergevcfs/main' include { MANTA_GERMLINE } from '../../../modules/nf-core/manta/germline/main' // Seems to be the consensus on upstream modules implementation too @@ -10,57 +7,29 @@ workflow BAM_VARIANT_CALLING_GERMLINE_MANTA { dict // channel: [optional] [ meta, dict ] fasta // channel: [mandatory] [ fasta ] fasta_fai // channel: [mandatory] [ fasta_fai ] - intervals // channel: [mandatory] [ interval.bed.gz, interval.bed.gz.tbi, num_intervals ] or [ [], [], 0 ] if no intervals + intervals // channel: [mandatory] [ interval.bed.gz, interval.bed.gz.tbi] or [ [], []] if no intervals; intervals file contains all intervals main: versions = Channel.empty() - // Combine cram and intervals for spread and gather strategy - cram_intervals = cram.combine(intervals) - // Move num_intervals to meta map - .map{ meta, cram, crai, intervals, intervals_index, num_intervals -> [ meta + [ num_intervals:num_intervals ], cram, crai, intervals, intervals_index ] } + // Combine cram and intervals, account for 0 intervals + cram_intervals = cram.combine(intervals).map{ it -> + bed_gz = it.size() > 3 ? it[3] : [] + bed_tbi = it.size() > 3 ? it[4] : [] - MANTA_GERMLINE(cram_intervals, fasta, fasta_fai) - - // Figuring out if there is one or more vcf(s) from the same sample - small_indels_vcf = MANTA_GERMLINE.out.candidate_small_indels_vcf.branch{ - // Use meta.num_intervals to asses number of intervals - intervals: it[0].num_intervals > 1 - no_intervals: it[0].num_intervals <= 1 - } - - // Figuring out if there is one or more vcf(s) from the same sample - sv_vcf = MANTA_GERMLINE.out.candidate_sv_vcf.branch{ - // Use meta.num_intervals to asses number of intervals - intervals: it[0].num_intervals > 1 - no_intervals: it[0].num_intervals <= 1 + [it[0], it[1], it[2], bed_gz, bed_tbi] } - // Figuring out if there is one or more vcf(s) from the same sample - diploid_sv_vcf = MANTA_GERMLINE.out.diploid_sv_vcf.branch{ - // Use meta.num_intervals to asses number of intervals - intervals: it[0].num_intervals > 1 - no_intervals: it[0].num_intervals <= 1 - } - - // Only when using intervals - diploid_sv_vcf_to_merge = diploid_sv_vcf.intervals.map{ meta, vcf -> [ groupKey(meta, meta.num_intervals), vcf ]}.groupTuple() - small_indels_vcf_to_merge = small_indels_vcf.intervals.map{ meta, vcf -> [ groupKey(meta, meta.num_intervals), vcf ]}.groupTuple() - sv_vcf_to_merge = sv_vcf.intervals.map{ meta, vcf -> [ groupKey(meta, meta.num_intervals), vcf ]}.groupTuple() + MANTA_GERMLINE(cram_intervals, fasta, fasta_fai) - MERGE_MANTA_DIPLOID(diploid_sv_vcf_to_merge, dict) - MERGE_MANTA_SMALL_INDELS(small_indels_vcf_to_merge, dict) - MERGE_MANTA_SV(sv_vcf_to_merge, dict) + small_indels_vcf = MANTA_GERMLINE.out.candidate_small_indels_vcf + sv_vcf = MANTA_GERMLINE.out.candidate_sv_vcf + diploid_sv_vcf = MANTA_GERMLINE.out.diploid_sv_vcf - // Mix intervals and no_intervals channels together // Only diploid SV should get annotated - vcf = Channel.empty().mix(MERGE_MANTA_DIPLOID.out.vcf, diploid_sv_vcf.no_intervals) - // add variantcaller to meta map and remove no longer necessary field: num_intervals - .map{ meta, vcf -> [ meta - meta.subMap('num_intervals') + [ variantcaller:'manta' ], vcf ] } + // add variantcaller to meta map + vcf = diploid_sv_vcf.map{ meta, vcf -> [ meta + [ variantcaller:'manta' ], vcf ] } - versions = versions.mix(MERGE_MANTA_DIPLOID.out.versions) - versions = versions.mix(MERGE_MANTA_SMALL_INDELS.out.versions) - versions = versions.mix(MERGE_MANTA_SV.out.versions) versions = versions.mix(MANTA_GERMLINE.out.versions) emit: diff --git a/subworkflows/local/bam_variant_calling_somatic_all/main.nf b/subworkflows/local/bam_variant_calling_somatic_all/main.nf index cc29e4b3a5..65ec705fd4 100644 --- a/subworkflows/local/bam_variant_calling_somatic_all/main.nf +++ b/subworkflows/local/bam_variant_calling_somatic_all/main.nf @@ -31,6 +31,7 @@ workflow BAM_VARIANT_CALLING_SOMATIC_ALL { intervals // channel: [mandatory] [ intervals, num_intervals ] or [ [], 0 ] if no intervals intervals_bed_gz_tbi // channel: [mandatory] intervals/target regions index zipped and indexed intervals_bed_combined // channel: [mandatory] intervals/target regions in one file unzipped + intervals_bed_gz_tbi_combined // channel: [mandatory] intervals/target regions in one file zipped mappability msisensorpro_scan // channel: [optional] msisensorpro_scan panel_of_normals // channel: [optional] panel_of_normals @@ -147,7 +148,7 @@ workflow BAM_VARIANT_CALLING_SOMATIC_ALL { dict.map{ it -> [ [ id:'dict' ], it ] }, fasta, fasta_fai, - intervals_bed_gz_tbi + intervals_bed_gz_tbi_combined ) vcf_manta = BAM_VARIANT_CALLING_SOMATIC_MANTA.out.vcf diff --git a/subworkflows/local/bam_variant_calling_somatic_manta/main.nf b/subworkflows/local/bam_variant_calling_somatic_manta/main.nf index 82710b8629..1670276d36 100644 --- a/subworkflows/local/bam_variant_calling_somatic_manta/main.nf +++ b/subworkflows/local/bam_variant_calling_somatic_manta/main.nf @@ -1,7 +1,3 @@ -include { GATK4_MERGEVCFS as MERGE_MANTA_DIPLOID } from '../../../modules/nf-core/gatk4/mergevcfs/main' -include { GATK4_MERGEVCFS as MERGE_MANTA_SMALL_INDELS } from '../../../modules/nf-core/gatk4/mergevcfs/main' -include { GATK4_MERGEVCFS as MERGE_MANTA_SOMATIC } from '../../../modules/nf-core/gatk4/mergevcfs/main' -include { GATK4_MERGEVCFS as MERGE_MANTA_SV } from '../../../modules/nf-core/gatk4/mergevcfs/main' include { MANTA_SOMATIC } from '../../../modules/nf-core/manta/somatic/main' workflow BAM_VARIANT_CALLING_SOMATIC_MANTA { @@ -10,86 +6,31 @@ workflow BAM_VARIANT_CALLING_SOMATIC_MANTA { dict // channel: [optional] [ meta, dict ] fasta // channel: [mandatory] [ fasta ] fasta_fai // channel: [mandatory] [ fasta_fai ] - intervals // channel: [mandatory] [ interval.bed.gz, interval.bed.gz.tbi, num_intervals ] or [ [], [], 0 ] if no intervals + intervals // channel: [mandatory] [ interval.bed.gz, interval.bed.gz.tbi ] or [ [], [] ] if no intervals main: versions = Channel.empty() - // Combine cram and intervals for spread and gather strategy - cram_intervals = cram.combine(intervals) - // Move num_intervals to meta map - .map{ meta, cram1, crai1, cram2, crai2, intervals, intervals_index, num_intervals -> [ meta + [ num_intervals:num_intervals ], cram1, crai1, cram2, crai2, intervals, intervals_index ] } + // Combine cram and intervals, account for 0 intervals + cram_intervals = cram.combine(intervals).map{ it -> + bed_gz = it.size() > 5 ? it[5] : [] + bed_tbi = it.size() > 5 ? it[6] : [] - MANTA_SOMATIC(cram_intervals, fasta, fasta_fai) - - // Figuring out if there is one or more vcf(s) from the same sample - candidate_small_indels_vcf = MANTA_SOMATIC.out.candidate_small_indels_vcf.branch{ - // Use meta.num_intervals to asses number of intervals - intervals: it[0].num_intervals > 1 - no_intervals: it[0].num_intervals <= 1 - } - - // Figuring out if there is one or more vcf(s) from the same sample - candidate_small_indels_vcf_tbi = MANTA_SOMATIC.out.candidate_small_indels_vcf_tbi.branch{ - // Use meta.num_intervals to asses number of intervals - intervals: it[0].num_intervals > 1 - no_intervals: it[0].num_intervals <= 1 + [it[0], it[1], it[2], it[3], it[4], bed_gz, bed_tbi] } - // Figuring out if there is one or more vcf(s) from the same sample - candidate_sv_vcf = MANTA_SOMATIC.out.candidate_sv_vcf.branch{ - // Use meta.num_intervals to asses number of intervals - intervals: it[0].num_intervals > 1 - no_intervals: it[0].num_intervals <= 1 - } - - // Figuring out if there is one or more vcf(s) from the same sample - diploid_sv_vcf = MANTA_SOMATIC.out.diploid_sv_vcf.branch{ - // Use meta.num_intervals to asses number of intervals - intervals: it[0].num_intervals > 1 - no_intervals: it[0].num_intervals <= 1 - } - - // Figuring out if there is one or more vcf(s) from the same sample - somatic_sv_vcf = MANTA_SOMATIC.out.somatic_sv_vcf.branch{ - // Use meta.num_intervals to asses number of intervals - intervals: it[0].num_intervals > 1 - no_intervals: it[0].num_intervals <= 1 - } - - // Only when using intervals - candidate_small_indels_vcf_to_merge = candidate_small_indels_vcf.intervals.map{ meta, vcf -> [ groupKey(meta, meta.num_intervals), vcf ]}.groupTuple() - candidate_sv_vcf_to_merge = candidate_sv_vcf.intervals.map{ meta, vcf -> [ groupKey(meta, meta.num_intervals), vcf ]}.groupTuple() - diploid_sv_vcf_to_merge = diploid_sv_vcf.intervals.map{ meta, vcf -> [ groupKey(meta, meta.num_intervals), vcf ]}.groupTuple() - somatic_sv_vcf_to_merge = somatic_sv_vcf.intervals.map{ meta, vcf -> [ groupKey(meta, meta.num_intervals), vcf ]}.groupTuple() + MANTA_SOMATIC(cram_intervals, fasta, fasta_fai) - MERGE_MANTA_SMALL_INDELS(candidate_small_indels_vcf_to_merge, dict) - MERGE_MANTA_SV(candidate_sv_vcf_to_merge, dict) - MERGE_MANTA_DIPLOID(diploid_sv_vcf_to_merge, dict) - MERGE_MANTA_SOMATIC(somatic_sv_vcf_to_merge, dict) + candidate_small_indels_vcf = MANTA_SOMATIC.out.candidate_small_indels_vcf + candidate_small_indels_vcf_tbi = MANTA_SOMATIC.out.candidate_small_indels_vcf_tbi + candidate_sv_vcf = MANTA_SOMATIC.out.candidate_sv_vcf + diploid_sv_vcf = MANTA_SOMATIC.out.diploid_sv_vcf + somatic_sv_vcf = MANTA_SOMATIC.out.somatic_sv_vcf - // Mix intervals and no_intervals channels together // Only diploid and somatic SV should get annotated - vcf = Channel.empty().mix(MERGE_MANTA_DIPLOID.out.vcf, MERGE_MANTA_SOMATIC.out.vcf, diploid_sv_vcf.no_intervals, somatic_sv_vcf.no_intervals) - // add variantcaller to meta map and remove no longer necessary field: num_intervals - .map{ meta, vcf -> [ meta - meta.subMap('num_intervals') + [ variantcaller:'manta' ], vcf ] } - - // Mix intervals and no_intervals channels together - // Only joining reads for StrelkaBP - candidate_small_indels_vcf = Channel.empty().mix(MERGE_MANTA_SMALL_INDELS.out.vcf, candidate_small_indels_vcf.no_intervals) - // remove no longer necessary field: num_intervals - .map{ meta, vcf -> [ meta - meta.subMap('num_intervals'), vcf ] } - - // Mix intervals and no_intervals channels together - // Only joining reads for StrelkaBP - candidate_small_indels_vcf_tbi = Channel.empty().mix(MERGE_MANTA_SMALL_INDELS.out.tbi, candidate_small_indels_vcf_tbi.no_intervals) - // remove no longer necessary field: num_intervals - .map{ meta, tbi -> [ meta - meta.subMap('num_intervals'), tbi ] } + // add variantcaller to meta map + vcf = Channel.empty().mix(diploid_sv_vcf, somatic_sv_vcf).map{ meta, vcf -> [ meta + [ variantcaller:'manta' ], vcf ] } - versions = versions.mix(MERGE_MANTA_SV.out.versions) - versions = versions.mix(MERGE_MANTA_SMALL_INDELS.out.versions) - versions = versions.mix(MERGE_MANTA_DIPLOID.out.versions) - versions = versions.mix(MERGE_MANTA_SOMATIC.out.versions) versions = versions.mix(MANTA_SOMATIC.out.versions) emit: diff --git a/subworkflows/local/bam_variant_calling_tumor_only_all/main.nf b/subworkflows/local/bam_variant_calling_tumor_only_all/main.nf index 9e5956e2d9..c34aa6fca6 100644 --- a/subworkflows/local/bam_variant_calling_tumor_only_all/main.nf +++ b/subworkflows/local/bam_variant_calling_tumor_only_all/main.nf @@ -30,6 +30,7 @@ workflow BAM_VARIANT_CALLING_TUMOR_ONLY_ALL { intervals // channel: [mandatory] [ intervals, num_intervals ] or [ [], 0 ] if no intervals intervals_bed_gz_tbi // channel: [mandatory] [ interval.bed.gz, interval.bed.gz.tbi, num_intervals ] or [ [], [], 0 ] if no intervals intervals_bed_combined // channel: [mandatory] intervals/target regions in one file unzipped + intervals_bed_gz_tbi_combined // channel: [mandatory] intervals/target regions in one file zipped mappability panel_of_normals // channel: [optional] panel_of_normals panel_of_normals_tbi // channel: [optional] panel_of_normals_tbi @@ -132,7 +133,8 @@ workflow BAM_VARIANT_CALLING_TUMOR_ONLY_ALL { dict.map{ it -> [ [ id:'dict' ], it ] }, fasta, fasta_fai, - intervals_bed_gz_tbi + intervals_bed_gz_tbi_combined + ) vcf_manta = BAM_VARIANT_CALLING_TUMOR_ONLY_MANTA.out.vcf diff --git a/subworkflows/local/bam_variant_calling_tumor_only_manta/main.nf b/subworkflows/local/bam_variant_calling_tumor_only_manta/main.nf index 796cd47495..8ee472aa80 100644 --- a/subworkflows/local/bam_variant_calling_tumor_only_manta/main.nf +++ b/subworkflows/local/bam_variant_calling_tumor_only_manta/main.nf @@ -1,6 +1,3 @@ -include { GATK4_MERGEVCFS as MERGE_MANTA_SMALL_INDELS } from '../../../modules/nf-core/gatk4/mergevcfs/main' -include { GATK4_MERGEVCFS as MERGE_MANTA_SV } from '../../../modules/nf-core/gatk4/mergevcfs/main' -include { GATK4_MERGEVCFS as MERGE_MANTA_TUMOR } from '../../../modules/nf-core/gatk4/mergevcfs/main' include { MANTA_TUMORONLY } from '../../../modules/nf-core/manta/tumoronly/main' // Seems to be the consensus on upstream modules implementation too @@ -10,57 +7,29 @@ workflow BAM_VARIANT_CALLING_TUMOR_ONLY_MANTA { dict // channel: [optional] [ meta, dict ] fasta // channel: [mandatory] [ fasta ] fasta_fai // channel: [mandatory] [ fasta_fai ] - intervals // channel: [mandatory] [ interval.bed.gz, interval.bed.gz.tbi, num_intervals ] or [ [], [], 0 ] if no intervals + intervals // channel: [mandatory] [ interval.bed.gz, interval.bed.gz.tbi ] or [ [], [] ] if no intervals main: versions = Channel.empty() - // Combine cram and intervals for spread and gather strategy - cram_intervals = cram.combine(intervals) - // Move num_intervals to meta map - .map{ meta, cram, crai, intervals, intervals_index, num_intervals -> [ meta + [ num_intervals:num_intervals ], cram, crai, intervals, intervals_index ] } + // Combine cram and intervals, account for 0 intervals + cram_intervals = cram.combine(intervals).map{ it -> + bed_gz = it.size() > 3 ? it[3] : [] + bed_tbi = it.size() > 3 ? it[4] : [] - MANTA_TUMORONLY(cram_intervals, fasta, fasta_fai) - - // Figuring out if there is one or more vcf(s) from the same sample - small_indels_vcf = MANTA_TUMORONLY.out.candidate_small_indels_vcf.branch{ - // Use meta.num_intervals to asses number of intervals - intervals: it[0].num_intervals > 1 - no_intervals: it[0].num_intervals <= 1 - } - - // Figuring out if there is one or more vcf(s) from the same sample - candidate_sv_vcf = MANTA_TUMORONLY.out.candidate_sv_vcf.branch{ - // Use meta.num_intervals to asses number of intervals - intervals: it[0].num_intervals > 1 - no_intervals: it[0].num_intervals <= 1 + [it[0], it[1], it[2], bed_gz, bed_tbi] } - // Figuring out if there is one or more vcf(s) from the same sample - tumor_sv_vcf = MANTA_TUMORONLY.out.tumor_sv_vcf.branch{ - // Use meta.num_intervals to asses number of intervals - intervals: it[0].num_intervals > 1 - no_intervals: it[0].num_intervals <= 1 - } - - // Only when using intervals - candidate_sv_vcf_to_merge = candidate_sv_vcf.intervals.map{ meta, vcf -> [ groupKey(meta, meta.num_intervals), vcf ]}.groupTuple() - small_indels_vcf_to_merge = small_indels_vcf.intervals.map{ meta, vcf -> [ groupKey(meta, meta.num_intervals), vcf ]}.groupTuple() - tumor_sv_vcf_to_merge = tumor_sv_vcf.intervals.map{ meta, vcf -> [ groupKey(meta, meta.num_intervals), vcf ]}.groupTuple() + MANTA_TUMORONLY(cram_intervals, fasta, fasta_fai) - MERGE_MANTA_SV(candidate_sv_vcf_to_merge, dict) - MERGE_MANTA_SMALL_INDELS(small_indels_vcf_to_merge, dict) - MERGE_MANTA_TUMOR(tumor_sv_vcf_to_merge, dict) + small_indels_vcf = MANTA_TUMORONLY.out.candidate_small_indels_vcf + candidate_sv_vcf = MANTA_TUMORONLY.out.candidate_sv_vcf + tumor_sv_vcf = MANTA_TUMORONLY.out.tumor_sv_vcf - // Mix intervals and no_intervals channels together // Only tumor sv should get annotated - vcf = Channel.empty().mix(MERGE_MANTA_TUMOR.out.vcf, tumor_sv_vcf.no_intervals) - // add variantcaller to meta map and remove no longer necessary field: num_intervals - .map{ meta, vcf -> [ meta - meta.subMap('num_intervals') + [ variantcaller:'manta' ], vcf ] } + // add variantcaller to meta map + vcf = tumor_sv_vcf.map{ meta, vcf -> [ meta + [ variantcaller:'manta' ], vcf ] } - versions = versions.mix(MERGE_MANTA_SV.out.versions) - versions = versions.mix(MERGE_MANTA_SMALL_INDELS.out.versions) - versions = versions.mix(MERGE_MANTA_TUMOR.out.versions) versions = versions.mix(MANTA_TUMORONLY.out.versions) emit: diff --git a/subworkflows/local/prepare_intervals/main.nf b/subworkflows/local/prepare_intervals/main.nf index a0091bde21..65457d4c48 100644 --- a/subworkflows/local/prepare_intervals/main.nf +++ b/subworkflows/local/prepare_intervals/main.nf @@ -6,10 +6,11 @@ // For all modules here: // A when clause condition is defined in the conf/modules.config to determine if the module should be run -include { BUILD_INTERVALS } from '../../../modules/local/build_intervals/main' -include { CREATE_INTERVALS_BED } from '../../../modules/local/create_intervals_bed/main' -include { GATK4_INTERVALLISTTOBED } from '../../../modules/nf-core/gatk4/intervallisttobed/main' -include { TABIX_BGZIPTABIX as TABIX_BGZIPTABIX_INTERVAL_SPLIT } from '../../../modules/nf-core/tabix/bgziptabix/main' +include { BUILD_INTERVALS } from '../../../modules/local/build_intervals/main' +include { CREATE_INTERVALS_BED } from '../../../modules/local/create_intervals_bed/main' +include { GATK4_INTERVALLISTTOBED } from '../../../modules/nf-core/gatk4/intervallisttobed/main' +include { TABIX_BGZIPTABIX as TABIX_BGZIPTABIX_INTERVAL_SPLIT } from '../../../modules/nf-core/tabix/bgziptabix/main' +include { TABIX_BGZIPTABIX as TABIX_BGZIPTABIX_INTERVAL_COMBINED } from '../../../modules/nf-core/tabix/bgziptabix/main' workflow PREPARE_INTERVALS { take: @@ -94,14 +95,19 @@ workflow PREPARE_INTERVALS { versions = versions.mix(TABIX_BGZIPTABIX_INTERVAL_SPLIT.out.versions) } - intervals_bed_combined = intervals_combined.map{meta, bed -> bed }.collect() + TABIX_BGZIPTABIX_INTERVAL_COMBINED(intervals_combined) + versions = versions.mix(TABIX_BGZIPTABIX_INTERVAL_COMBINED.out.versions) + + intervals_bed_combined = intervals_combined.map{meta, bed -> bed }.collect() + intervals_bed_gz_tbi_combined = TABIX_BGZIPTABIX_INTERVAL_COMBINED.out.gz_tbi.map{meta, gz, tbi -> [gz, tbi] }.collect() emit: // Intervals split for parallel execution - intervals_bed // [ intervals.bed, num_intervals ] - intervals_bed_gz_tbi // [ target.bed.gz, target.bed.gz.tbi, num_intervals ] + intervals_bed // [ intervals.bed, num_intervals ] + intervals_bed_gz_tbi // [ target.bed.gz, target.bed.gz.tbi, num_intervals ] // All intervals in one file - intervals_bed_combined // [ intervals.bed ] + intervals_bed_combined // [ intervals.bed ] + intervals_bed_gz_tbi_combined //[intervals.bed.gz, intervals.bed.gz.tbi] versions // [ versions.yml ] } diff --git a/tests/test_manta.yml b/tests/test_manta.yml index 97fc3de928..635ccef01e 100644 --- a/tests/test_manta.yml +++ b/tests/test_manta.yml @@ -74,7 +74,7 @@ - path: results/variant_calling/manta/sample2/sample2.manta.tumor_sv.vcf.gz # binary changes md5sums on reruns - path: results/variant_calling/manta/sample2/sample2.manta.tumor_sv.vcf.gz.tbi - md5sum: e7ca7e9fe76ce12198fd54ec9a64fad4 + md5sum: 4cb176febbc8c26d717a6c6e67b9c905 - path: results/manta should_exist: false - name: Run variant calling on tumor_only sample with manta without intervals @@ -145,15 +145,15 @@ - path: results/variant_calling/manta/sample3/sample3.manta.diploid_sv.vcf.gz # binary changes md5sums on reruns - path: results/variant_calling/manta/sample3/sample3.manta.diploid_sv.vcf.gz.tbi - md5sum: e7ca7e9fe76ce12198fd54ec9a64fad4 + md5sum: 4cb176febbc8c26d717a6c6e67b9c905 - path: results/variant_calling/manta/sample4_vs_sample3/sample4_vs_sample3.manta.diploid_sv.vcf.gz # binary changes md5sums on reruns - path: results/variant_calling/manta/sample4_vs_sample3/sample4_vs_sample3.manta.diploid_sv.vcf.gz.tbi - md5sum: e7ca7e9fe76ce12198fd54ec9a64fad4 + md5sum: 4cb176febbc8c26d717a6c6e67b9c905 - path: results/variant_calling/manta/sample4_vs_sample3/sample4_vs_sample3.manta.somatic_sv.vcf.gz # binary changes md5sums on reruns - path: results/variant_calling/manta/sample4_vs_sample3/sample4_vs_sample3.manta.somatic_sv.vcf.gz.tbi - md5sum: e7ca7e9fe76ce12198fd54ec9a64fad4 + md5sum: 4cb176febbc8c26d717a6c6e67b9c905 - path: results/manta should_exist: false - name: Run variant calling on somatic sample with manta without intervals diff --git a/tests/test_strelka_bp.yml b/tests/test_strelka_bp.yml index 1cf2f6478a..66c502267f 100644 --- a/tests/test_strelka_bp.yml +++ b/tests/test_strelka_bp.yml @@ -59,15 +59,15 @@ - path: results/variant_calling/manta/sample3/sample3.manta.diploid_sv.vcf.gz # binary changes md5sums on reruns - path: results/variant_calling/manta/sample3/sample3.manta.diploid_sv.vcf.gz.tbi - md5sum: e7ca7e9fe76ce12198fd54ec9a64fad4 + md5sum: 4cb176febbc8c26d717a6c6e67b9c905 - path: results/variant_calling/manta/sample4_vs_sample3/sample4_vs_sample3.manta.diploid_sv.vcf.gz # binary changes md5sums on reruns - path: results/variant_calling/manta/sample4_vs_sample3/sample4_vs_sample3.manta.diploid_sv.vcf.gz.tbi - md5sum: e7ca7e9fe76ce12198fd54ec9a64fad4 + md5sum: 4cb176febbc8c26d717a6c6e67b9c905 - path: results/variant_calling/manta/sample4_vs_sample3/sample4_vs_sample3.manta.somatic_sv.vcf.gz # binary changes md5sums on reruns - path: results/variant_calling/manta/sample4_vs_sample3/sample4_vs_sample3.manta.somatic_sv.vcf.gz.tbi - md5sum: e7ca7e9fe76ce12198fd54ec9a64fad4 + md5sum: 4cb176febbc8c26d717a6c6e67b9c905 - path: results/variant_calling/strelka/sample3/sample3.strelka.genome.vcf.gz # binary changes md5sums on reruns - path: results/variant_calling/strelka/sample3/sample3.strelka.genome.vcf.gz.tbi diff --git a/workflows/sarek.nf b/workflows/sarek.nf index f3b025909b..ba008b7d1a 100644 --- a/workflows/sarek.nf +++ b/workflows/sarek.nf @@ -370,7 +370,9 @@ workflow SAREK { // Intervals for speed up preprocessing/variant calling by spread/gather // [interval.bed] all intervals in one file - intervals_bed_combined = params.no_intervals ? Channel.value([]) : PREPARE_INTERVALS.out.intervals_bed_combined + intervals_bed_combined = params.no_intervals ? Channel.value([]) : PREPARE_INTERVALS.out.intervals_bed_combined + intervals_bed_gz_tbi_combined = params.no_intervals ? Channel.value([]) : PREPARE_INTERVALS.out.intervals_bed_gz_tbi_combined + // For QC during preprocessing, we don't need any intervals (MOSDEPTH doesn't take them for WGS) intervals_for_preprocessing = params.wes ? intervals_bed_combined.map{it -> [ [ id:it.baseName ], it ]}.collect() : @@ -920,7 +922,8 @@ workflow SAREK { fasta, fasta_fai, intervals_and_num_intervals, - intervals_bed_combined, // [] if no_intervals, else interval_bed_combined.bed + intervals_bed_combined, // [] if no_intervals, else interval_bed_combined.bed, + intervals_bed_gz_tbi_combined, // [] if no_intervals, else interval_bed_combined_gz, interval_bed_combined_gz_tbi PREPARE_INTERVALS.out.intervals_bed_combined, // no_intervals.bed if no intervals, else interval_bed_combined.bed; Channel operations possible intervals_bed_gz_tbi_and_num_intervals, known_indels_vqsr, @@ -949,6 +952,7 @@ workflow SAREK { intervals_and_num_intervals, intervals_bed_gz_tbi_and_num_intervals, intervals_bed_combined, + intervals_bed_gz_tbi_combined, // [] if no_intervals, else interval_bed_combined_gz, interval_bed_combined_gz_tbi mappability, pon, pon_tbi @@ -971,6 +975,7 @@ workflow SAREK { intervals_and_num_intervals, intervals_bed_gz_tbi_and_num_intervals, intervals_bed_combined, + intervals_bed_gz_tbi_combined, // [] if no_intervals, else interval_bed_combined_gz, interval_bed_combined_gz_tbi mappability, msisensorpro_scan, pon,