From f6afe61dbe88945c381a0788d1a6fb61cb356718 Mon Sep 17 00:00:00 2001 From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com> Date: Thu, 25 May 2023 16:35:31 +0200 Subject: [PATCH 1/8] add module and remove redundant parameters --- conf/modules/prepare_references.config | 9 + main.nf | 9 +- modules.json | 283 ++++++++++++++++------ modules/nf-core/gatk4/shiftfasta/main.nf | 66 +++++ modules/nf-core/gatk4/shiftfasta/meta.yml | 81 +++++++ modules/nf-core/samtools/faidx/main.nf | 12 +- modules/nf-core/samtools/faidx/meta.yml | 14 +- nextflow_schema.json | 58 +---- subworkflows/local/prepare_references.nf | 30 ++- workflows/raredisease.nf | 36 ++- 10 files changed, 426 insertions(+), 172 deletions(-) create mode 100644 modules/nf-core/gatk4/shiftfasta/main.nf create mode 100644 modules/nf-core/gatk4/shiftfasta/meta.yml diff --git a/conf/modules/prepare_references.config b/conf/modules/prepare_references.config index 8504a34c..af73549a 100644 --- a/conf/modules/prepare_references.config +++ b/conf/modules/prepare_references.config @@ -49,6 +49,11 @@ process { ext.when = {!params.fai} } + withName: '.*PREPARE_REFERENCES:SAMTOOLS_EXTRACT_MT' { + ext.args = { " ${params.mito_name} -o ${meta.id}_mt.fa" } + ext.when = {!params.mt_fasta && !params.skip_mt_analysis} + } + withName: '.*PREPARE_REFERENCES:SAMTOOLS_FAIDX_SHIFT_MT' { ext.when = {!params.mt_fai_shift && params.mt_fasta_shift && !(params.analysis_type == "wes")} } @@ -57,6 +62,10 @@ process { ext.when = {!params.sequence_dictionary} } + withName: '.*PREPARE_REFERENCES:GATK_SHIFTFASTA' { + ext.args = { "--interval-file-name ${meta.id}_mt" } + } + withName: '.*PREPARE_REFERENCES:GATK_SD_SHIFT_MT' { ext.when = {!params.mt_sequence_dictionary_shift && params.mt_fasta_shift && !(params.analysis_type == "wes")} } diff --git a/main.nf b/main.nf index 7e0ceb8a..fe93d397 100644 --- a/main.nf +++ b/main.nf @@ -32,14 +32,7 @@ params.known_dbsnp_tbi = WorkflowMain.getGenomeAttribute(params, params.known_indels = WorkflowMain.getGenomeAttribute(params, 'known_indels') params.known_mills = WorkflowMain.getGenomeAttribute(params, 'known_mills') params.ml_model = WorkflowMain.getGenomeAttribute(params, 'ml_model') -params.mt_backchain_shift = WorkflowMain.getGenomeAttribute(params, 'mt_backchain_shift') -params.mt_bwa_index_shift = WorkflowMain.getGenomeAttribute(params, 'mt_bwa_index_shift') -params.mt_bwamem2_index_shift = WorkflowMain.getGenomeAttribute(params, 'mt_bwamem2_index_shift') -params.mt_fasta_shift = WorkflowMain.getGenomeAttribute(params, 'mt_fasta_shift') -params.mt_fai_shift = WorkflowMain.getGenomeAttribute(params, 'mt_fai_shift') -params.mt_intervals = WorkflowMain.getGenomeAttribute(params, 'mt_intervals') -params.mt_intervals_shift = WorkflowMain.getGenomeAttribute(params, 'mt_intervals_shift') -params.mt_sequence_dictionary_shift = WorkflowMain.getGenomeAttribute(params, 'mt_sequence_dictionary_shift') +params.mt_fasta = WorkflowMain.getGenomeAttribute(params, 'mt_fasta') params.reduced_penetrance = WorkflowMain.getGenomeAttribute(params, 'reduced_penetrance') params.sequence_dictionary = WorkflowMain.getGenomeAttribute(params, 'sequence_dictionary') params.score_config_snv = WorkflowMain.getGenomeAttribute(params, 'score_config_snv') diff --git a/modules.json b/modules.json index 6b0ebcbc..0622ed2e 100644 --- a/modules.json +++ b/modules.json @@ -8,345 +8,488 @@ "bcftools/annotate": { "branch": "master", "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "bcftools/concat": { "branch": "master", "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "bcftools/filter": { "branch": "master", "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "bcftools/merge": { "branch": "master", "git_sha": "0435e4eebc94e53721c194b2d5d06f455a79e407", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "bcftools/norm": { "branch": "master", "git_sha": "0435e4eebc94e53721c194b2d5d06f455a79e407", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "bcftools/reheader": { "branch": "master", "git_sha": "0435e4eebc94e53721c194b2d5d06f455a79e407", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "bcftools/roh": { "branch": "master", "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "bcftools/view": { "branch": "master", "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "bwa/index": { "branch": "master", "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "bwamem2/index": { "branch": "master", "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "bwamem2/mem": { "branch": "master", "git_sha": "0460d316170f75f323111b4a2c0a2989f0c32013", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "cadd": { "branch": "master", "git_sha": "603ecbd9f45300c9788f197d2a15a005685b4220", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "cat/cat": { "branch": "master", "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "custom/dumpsoftwareversions": { "branch": "master", "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "deepvariant": { "branch": "master", "git_sha": "0460d316170f75f323111b4a2c0a2989f0c32013", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "expansionhunter": { "branch": "master", "git_sha": "0260e5d22372eae434816d6970dedf3f5adc0053", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "fastqc": { "branch": "master", "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "gatk4/bedtointervallist": { "branch": "master", "git_sha": "2df2a11d5b12f2a73bca74f103691bc35d83c5fd", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "gatk4/createsequencedictionary": { "branch": "master", "git_sha": "2df2a11d5b12f2a73bca74f103691bc35d83c5fd", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "gatk4/filtermutectcalls": { "branch": "master", "git_sha": "2df2a11d5b12f2a73bca74f103691bc35d83c5fd", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "gatk4/intervallisttools": { "branch": "master", "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "gatk4/mergebamalignment": { "branch": "master", "git_sha": "2df2a11d5b12f2a73bca74f103691bc35d83c5fd", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "gatk4/mergevcfs": { "branch": "master", "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "gatk4/mutect2": { "branch": "master", "git_sha": "2df2a11d5b12f2a73bca74f103691bc35d83c5fd", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "gatk4/printreads": { "branch": "master", "git_sha": "2df2a11d5b12f2a73bca74f103691bc35d83c5fd", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "gatk4/revertsam": { "branch": "master", "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "gatk4/samtofastq": { "branch": "master", "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "gatk4/selectvariants": { "branch": "master", "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] + }, + "gatk4/shiftfasta": { + "branch": "master", + "git_sha": "0460d316170f75f323111b4a2c0a2989f0c32013", + "installed_by": [ + "modules" + ] }, "gatk4/splitintervals": { "branch": "master", "git_sha": "2df2a11d5b12f2a73bca74f103691bc35d83c5fd", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "gatk4/variantfiltration": { "branch": "master", "git_sha": "2df2a11d5b12f2a73bca74f103691bc35d83c5fd", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "genmod/annotate": { "branch": "master", "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "genmod/compound": { "branch": "master", "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "genmod/models": { "branch": "master", "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "genmod/score": { "branch": "master", "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "glnexus": { "branch": "master", "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "haplocheck": { "branch": "master", "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "haplogrep2/classify": { "branch": "master", "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "manta/germline": { "branch": "master", "git_sha": "0460d316170f75f323111b4a2c0a2989f0c32013", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "mosdepth": { "branch": "master", "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "multiqc": { "branch": "master", "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "peddy": { "branch": "master", "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "picard/addorreplacereadgroups": { "branch": "master", "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "picard/collecthsmetrics": { "branch": "master", "git_sha": "0ce3ab0ac301f160225b22254fa238478b4389f2", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "picard/collectmultiplemetrics": { "branch": "master", "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "picard/collectwgsmetrics": { "branch": "master", "git_sha": "735e1e04e7e01751d2d6e97055bbdb6f70683cc1", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "picard/liftovervcf": { "branch": "master", "git_sha": "735e1e04e7e01751d2d6e97055bbdb6f70683cc1", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "picard/markduplicates": { "branch": "master", "git_sha": "735e1e04e7e01751d2d6e97055bbdb6f70683cc1", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "picard/renamesampleinvcf": { "branch": "master", "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "picard/sortvcf": { "branch": "master", "git_sha": "735e1e04e7e01751d2d6e97055bbdb6f70683cc1", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "qualimap/bamqc": { "branch": "master", "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "rhocall/annotate": { "branch": "master", "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "samtools/faidx": { "branch": "master", - "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", - "installed_by": ["modules"] + "git_sha": "bf8ff98531167f8245ba5c44ce7d781503ddf936", + "installed_by": [ + "modules" + ] }, "samtools/index": { "branch": "master", "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "samtools/merge": { "branch": "master", "git_sha": "0460d316170f75f323111b4a2c0a2989f0c32013", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "samtools/sort": { "branch": "master", "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "samtools/stats": { "branch": "master", "git_sha": "735e1e04e7e01751d2d6e97055bbdb6f70683cc1", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "samtools/view": { "branch": "master", "git_sha": "3ffae3598260a99e8db3207dead9f73f87f90d1f", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "smncopynumbercaller": { "branch": "master", "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "stranger": { "branch": "master", "git_sha": "0260e5d22372eae434816d6970dedf3f5adc0053", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "svdb/merge": { "branch": "master", "git_sha": "603ecbd9f45300c9788f197d2a15a005685b4220", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "svdb/query": { "branch": "master", "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "tabix/bgziptabix": { "branch": "master", "git_sha": "01b3b2509d76625b6d6cd613b349fb4777712a15", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "tabix/tabix": { "branch": "master", "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "tiddit/cov": { "branch": "master", "git_sha": "0460d316170f75f323111b4a2c0a2989f0c32013", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "tiddit/sv": { "branch": "master", "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "ucsc/wigtobigwig": { "branch": "master", "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "untar": { "branch": "master", "git_sha": "5c460c5a4736974abde2843294f35307ee2b0e5e", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "vcfanno": { "branch": "master", "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] } } } } } -} +} \ No newline at end of file diff --git a/modules/nf-core/gatk4/shiftfasta/main.nf b/modules/nf-core/gatk4/shiftfasta/main.nf new file mode 100644 index 00000000..cf984e8b --- /dev/null +++ b/modules/nf-core/gatk4/shiftfasta/main.nf @@ -0,0 +1,66 @@ +process GATK4_SHIFTFASTA { + tag "$meta.id" + label 'process_single' + + conda "bioconda::gatk4=4.4.0.0" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/gatk4:4.4.0.0--py36hdfd78af_0': + 'biocontainers/gatk4:4.4.0.0--py36hdfd78af_0' }" + + input: + tuple val(meta), path(fasta) + tuple val(meta2), path(fasta_fai) + tuple val(meta3), path(dict) + + output: + tuple val(meta), path("*_shift.fasta") , emit: shift_fa + tuple val(meta), path("*_shift.fasta.fai") , emit: shift_fai + tuple val(meta), path("*_shift.back_chain") , emit: shift_back_chain + tuple val(meta), path("*_shift.dict") , emit: dict , optional: true + tuple val(meta), path("*.intervals") , emit: intervals , optional: true + tuple val(meta), path("*.shifted.intervals") , emit: shift_intervals , optional: true + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def seq_dict = dict ? "--sequence-dictionary ${dict}" : "" + def avail_mem = 3072 + if (!task.memory) { + log.info '[GATK ShiftFasta] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' + } else { + avail_mem = (task.memory.mega*0.8).intValue() + } + """ + gatk --java-options "-Xmx${avail_mem}M" ShiftFasta \\ + --reference $fasta \\ + --output ${prefix}_shift.fasta \\ + --shift-back-output ${prefix}_shift.back_chain \\ + $args \\ + $seq_dict \\ + --tmp-dir . + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') + END_VERSIONS + """ + + stub: + """ + touch test.intervals + touch test_shift.back_chain + touch test_shift.dict + touch test.shifted.intervals + touch test_shift.fasta + touch test_shift.fasta.fai + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/gatk4/shiftfasta/meta.yml b/modules/nf-core/gatk4/shiftfasta/meta.yml new file mode 100644 index 00000000..6d563ded --- /dev/null +++ b/modules/nf-core/gatk4/shiftfasta/meta.yml @@ -0,0 +1,81 @@ +name: "gatk4_shiftfasta" +description: Create a fasta with the bases shifted by offset +keywords: + - mitochondria + - shiftfasta + - shiftchain + - shiftintervals +tools: + - gatk4: + description: | + Developed in the Data Sciences Platform at the Broad Institute, the toolkit offers a wide variety of tools + with a primary focus on variant discovery and genotyping. Its powerful processing engine + and high-performance computing features make it capable of taking on projects of any size. + homepage: https://gatk.broadinstitute.org/hc/en-us + documentation: https://gatk.broadinstitute.org/hc/en-us/categories/360002369672s + tool_dev_url: "https://github.com/broadinstitute/gatk" + doi: 10.1158/1538-7445.AM2017-3590 + licence: ["Apache-2.0"] + +input: + - meta: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'test' ] + - fasta: + type: file + description: fasta file + pattern: "*.{fa,fasta}" + - meta2: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'test' ] + - fasta_fai: + type: file + description: index for fasta file + pattern: "*.{fai}" + - meta3: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'test' ] + - dict: + type: file + description: sequence dictionary file + pattern: "*.{dict}" + +output: + - meta: + type: map + description: | + Groovy Map containing fasta information + e.g. [ id:'test' ] + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - dict: + type: file + description: sequence dictionary file + pattern: "*.{dict}" + - intervals: + type: file + description: Intervals file for the fasta file + pattern: "*.{intervals}" + - shift_back_chain: + type: file + description: The shiftback chain file to use when lifting over + pattern: "*.{back_chain}" + - shift_fa: + type: file + description: Shifted fasta file + pattern: "*.{fa,fasta}" + - shift_intervals: + type: file + description: Intervals file for the shifted fasta file + pattern: "*.{shifted.intervals}" + +authors: + - "@ramprasadn" diff --git a/modules/nf-core/samtools/faidx/main.nf b/modules/nf-core/samtools/faidx/main.nf index 4dd0e5b0..c1e8ef3a 100644 --- a/modules/nf-core/samtools/faidx/main.nf +++ b/modules/nf-core/samtools/faidx/main.nf @@ -9,11 +9,13 @@ process SAMTOOLS_FAIDX { input: tuple val(meta), path(fasta) + tuple val(meta2), path(fai) output: - tuple val(meta), path ("*.fai"), emit: fai - tuple val(meta), path ("*.gzi"), emit: gzi, optional: true - path "versions.yml" , emit: versions + tuple val(meta), path ("*.{fa,fasta}") , emit: fa , optional: true + tuple val(meta), path ("*.fai") , emit: fai, optional: true + tuple val(meta), path ("*.gzi") , emit: gzi, optional: true + path "versions.yml" , emit: versions when: task.ext.when == null || task.ext.when @@ -23,8 +25,8 @@ process SAMTOOLS_FAIDX { """ samtools \\ faidx \\ - $args \\ - $fasta + $fasta \\ + $args cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/nf-core/samtools/faidx/meta.yml b/modules/nf-core/samtools/faidx/meta.yml index fe2fe9a1..957b25e5 100644 --- a/modules/nf-core/samtools/faidx/meta.yml +++ b/modules/nf-core/samtools/faidx/meta.yml @@ -3,6 +3,7 @@ description: Index FASTA file keywords: - index - fasta + - faidx tools: - samtools: description: | @@ -17,12 +18,21 @@ input: - meta: type: map description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] + Groovy Map containing reference information + e.g. [ id:'test' ] - fasta: type: file description: FASTA file pattern: "*.{fa,fasta}" + - meta2: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'test' ] + - fai: + type: file + description: FASTA index file + pattern: "*.{fai}" output: - meta: type: map diff --git a/nextflow_schema.json b/nextflow_schema.json index 842d6375..44810f84 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -156,22 +156,6 @@ "description": "Name of the mitochondrial contig in the reference fasta file", "help_text": "Used to extract relevant information from the references to analyse mitochondria" }, - "mt_intervals": { - "type": "string", - "format": "path", - "fa_icon": "fas fa-file", - "pattern": "^\\S+\\.intervals?(_list)?$", - "description": "Path to the interval list of the non control mitochondral region.", - "help_text": "Path to the interval list of the non control mitochondral regions for Mutect2" - }, - "mt_intervals_shift": { - "type": "string", - "format": "path", - "fa_icon": "fas fa-file", - "pattern": "^\\S+\\.intervals?(\\_list)?$", - "description": "Path to the interval list of the non control mitochondral region in shifted fasta.", - "help_text": "Path to the interval list of the non control mitochondral regions in shifted fasta for Mutect2" - }, "known_dbsnp": { "type": "string", "format": "path", @@ -211,7 +195,7 @@ "description": "Path to sentieon machine learning model file.", "hidden": true }, - "mt_fasta_shift": { + "mt_fasta": { "type": "string", "format": "file-path", "mimetype": "text/plain", @@ -219,38 +203,6 @@ "description": "Path to mitochondrial FASTA genome file.", "fa_icon": "far fa-file-code" }, - "mt_fai_shift": { - "type": "string", - "format": "file-path", - "mimetype": "text/plain", - "pattern": "^\\S+\\.fn?a(sta)?\\.fai?$", - "description": "Path to mitochondrial FASTA genome index file.", - "fa_icon": "far fa-file-code" - }, - "mt_bwa_index_shift": { - "type": "string", - "format": "directory-path", - "description": "Directory for pre-built bwa index for shifted mitochondrial fasta (used for alignment with sentioen)", - "help_text": "If none provided, will be generated automatically from the FASTA reference.", - "fa_icon": "fas fa-folder-open", - "hidden": true - }, - "mt_bwamem2_index_shift": { - "type": "string", - "format": "directory-path", - "description": "Directory for pre-built bwamem2 index for shifted mitochondrial fasta.", - "help_text": "If none provided, will be generated automatically from the FASTA reference.", - "fa_icon": "fas fa-folder-open", - "hidden": true - }, - "mt_backchain_shift": { - "type": "string", - "format": "directory-path", - "description": "Chain file describing the alignment between the mitochondrial shifted fasta and typical mitochondrial fasta", - "help_text": "For more information, check https://genome.ucsc.edu/goldenPath/help/chain.html", - "fa_icon": "fas fa-folder-open", - "hidden": true - }, "reduced_penetrance": { "type": "string", "format": "path", @@ -280,14 +232,6 @@ "description": "Genome dictionary file", "hidden": true }, - "mt_sequence_dictionary_shift": { - "type": "string", - "format": "path", - "fa_icon": "fas fa-chart-bar", - "pattern": "^\\S+\\.dict$", - "description": "Shifted mitochondrial genome dictionary file", - "hidden": true - }, "vep_filters": { "type": "string", "format": "path", diff --git a/subworkflows/local/prepare_references.nf b/subworkflows/local/prepare_references.nf index 0138cbaa..0ac46211 100644 --- a/subworkflows/local/prepare_references.nf +++ b/subworkflows/local/prepare_references.nf @@ -10,9 +10,12 @@ include { GATK4_BEDTOINTERVALLIST as GATK_BILT } from '../../modul include { GATK4_CREATESEQUENCEDICTIONARY as GATK_SD } from '../../modules/nf-core/gatk4/createsequencedictionary/main' include { GATK4_CREATESEQUENCEDICTIONARY as GATK_SD_SHIFT_MT } from '../../modules/nf-core/gatk4/createsequencedictionary/main' include { GATK4_INTERVALLISTTOOLS as GATK_ILT } from '../../modules/nf-core/gatk4/intervallisttools/main' +include { GATK4_SHIFTFASTA as GATK_SHIFTFASTA } from '../../modules/nf-core/gatk4/shiftfasta/main' include { GET_CHROM_SIZES } from '../../modules/local/get_chrom_sizes' +include { SAMTOOLS_FAIDX as SAMTOOLS_EXTRACT_MT } from '../../modules/nf-core/samtools/faidx/main' include { SAMTOOLS_FAIDX as SAMTOOLS_FAIDX_GENOME } from '../../modules/nf-core/samtools/faidx/main' -include { SAMTOOLS_FAIDX as SAMTOOLS_FAIDX_SHIFT_MT } from '../../modules/nf-core/samtools/faidx/main' +include { SAMTOOLS_FAIDX as SAMTOOLS_FAIDX_MT } from '../../modules/nf-core/samtools/faidx/main' +include { SAMTOOLS_FAIDX as SAMTOOLS_FAIDX_MT_SHIFT } from '../../modules/nf-core/samtools/faidx/main' include { SENTIEON_BWAINDEX as SENTIEON_BWAINDEX_GENOME } from '../../modules/local/sentieon/bwamemindex' include { SENTIEON_BWAINDEX as SENTIEON_BWAINDEX_SHIFT_MT } from '../../modules/local/sentieon/bwamemindex' include { TABIX_BGZIPTABIX as TABIX_PBT } from '../../modules/nf-core/tabix/bgziptabix/main' @@ -24,7 +27,8 @@ include { UNTAR as UNTAR_VEP_CACHE } from '../../modul workflow PREPARE_REFERENCES { take: ch_fasta // channel: [mandatory] [ val(meta), path(fasta) ] - ch_fasta_mt // channel: [mandatory for dedicated mt analysis] [ val(meta), path(fasta) ] + ch_genome_fai // channel: [mandatory] [ val(meta), path(fai) ] + ch_fasta_mt // channel: [optional for dedicated mt analysis] [ val(meta), path(fasta) ] ch_gnomad_af_tab // channel: [optional; used in for snv annotation] [ val(meta), path(tab) ] ch_known_dbsnp // channel: [optional; used only by sentieon] [ val(meta), path(vcf) ] ch_target_bed // channel: [mandatory for WES] [ path(bed) ] @@ -40,15 +44,21 @@ workflow PREPARE_REFERENCES { // Genome indices BWA_INDEX_GENOME(ch_fasta).index.set{ch_bwa} BWAMEM2_INDEX_GENOME(ch_fasta) - BWAMEM2_INDEX_SHIFT_MT(ch_fasta_mt) SENTIEON_BWAINDEX_GENOME(ch_fasta).index.set{ch_sentieonbwa} - SENTIEON_BWAINDEX_SHIFT_MT(ch_fasta_mt) - SAMTOOLS_FAIDX_GENOME(ch_fasta) - SAMTOOLS_FAIDX_SHIFT_MT(ch_fasta_mt) + SAMTOOLS_FAIDX_GENOME(ch_fasta, [[],[]]) GATK_SD(ch_fasta) - GATK_SD_SHIFT_MT(ch_fasta_mt) GET_CHROM_SIZES( SAMTOOLS_FAIDX_GENOME.out.fai ) + // MT indices + BWAMEM2_INDEX_SHIFT_MT(ch_fasta_mt) + SENTIEON_BWAINDEX_SHIFT_MT(ch_fasta_mt) + ch_fai = Channel.empty().mix(ch_genome_fai, SAMTOOLS_FAIDX_GENOME.out.fai).collect() + SAMTOOLS_EXTRACT_MT(ch_fasta, ch_fai) + ch_mt_fasta = Channel.empty().mix(ch_genome_fai, SAMTOOLS_FAIDX_GENOME.out.fai).collect() + GATK_SD_SHIFT_MT(SAMTOOLS_EXTRACT_MT.out.fa) + SAMTOOLS_FAIDX_MT_SHIFT(SAMTOOLS_EXTRACT_MT.out.fa, [[],[]]) + GATK_SHIFTFASTA(SAMTOOLS_EXTRACT_MT.out.fa,SAMTOOLS_FAIDX_MT_SHIFT.out.fai, GATK_SD_SHIFT_MT.out.dict) + // Vcf, tab and bed indices TABIX_DBSNP(ch_known_dbsnp) TABIX_GNOMAD_AF(ch_gnomad_af_tab) @@ -75,7 +85,7 @@ workflow PREPARE_REFERENCES { ch_versions = ch_versions.mix(SENTIEON_BWAINDEX_GENOME.out.versions) ch_versions = ch_versions.mix(SENTIEON_BWAINDEX_SHIFT_MT.out.versions) ch_versions = ch_versions.mix(SAMTOOLS_FAIDX_GENOME.out.versions) - ch_versions = ch_versions.mix(SAMTOOLS_FAIDX_SHIFT_MT.out.versions) + ch_versions = ch_versions.mix(SAMTOOLS_FAIDX_MT_SHIFT.out.versions) ch_versions = ch_versions.mix(GATK_SD.out.versions) ch_versions = ch_versions.mix(GATK_SD_SHIFT_MT.out.versions) ch_versions = ch_versions.mix(GET_CHROM_SIZES.out.versions) @@ -94,8 +104,8 @@ workflow PREPARE_REFERENCES { bwamem2_index = BWAMEM2_INDEX_GENOME.out.index.collect() // channel: [ val(meta), path(index) ] bwamem2_index_mt_shift = BWAMEM2_INDEX_SHIFT_MT.out.index.collect() // channel: [ val(meta), path(index) ] chrom_sizes = GET_CHROM_SIZES.out.sizes.collect() // channel: [ path(sizes) ] - fai = SAMTOOLS_FAIDX_GENOME.out.fai.collect() // channel: [ val(meta), path(fai) ] - fai_mt_shift = SAMTOOLS_FAIDX_SHIFT_MT.out.fai.collect() // channel: [ val(meta), path(fai) ] + fai = ch_fai // channel: [ val(meta), path(fai) ] + fai_mt_shift = SAMTOOLS_FAIDX_MT_SHIFT.out.fai.collect() // channel: [ val(meta), path(fai) ] gnomad_af_idx = TABIX_GNOMAD_AF.out.tbi.collect() // channel: [ val(meta), path(fasta) ] known_dbsnp_tbi = TABIX_DBSNP.out.tbi.collect() // channel: [ val(meta), path(fasta) ] sequence_dict = GATK_SD.out.dict.collect() // channel: [ path(dict) ] diff --git a/workflows/raredisease.nf b/workflows/raredisease.nf index 8555e1f4..61aa8a29 100644 --- a/workflows/raredisease.nf +++ b/workflows/raredisease.nf @@ -30,14 +30,7 @@ def checkPathParamList = [ params.known_indels, params.known_mills, params.ml_model, - params.mt_backchain_shift, - params.mt_bwa_index_shift, - params.mt_bwamem2_index_shift, - params.mt_fasta_shift, - params.mt_fai_shift, - params.mt_intervals, - params.mt_intervals_shift, - params.mt_sequence_dictionary_shift, + params.mt_fasta, params.multiqc_config, params.reduced_penetrance, params.score_config_snv, @@ -82,8 +75,7 @@ if (!params.skip_sv_annotation) { } if (!params.skip_mt_analysis) { - mandatoryParams += ["genome", "mt_backchain_shift", "mito_name", "mt_fasta_shift", "mt_intervals", - "mt_intervals_shift", "vcfanno_resources", "vcfanno_toml", "vep_cache_version", "vep_cache"] + mandatoryParams += ["genome", "mito_name", "vcfanno_resources", "vcfanno_toml", "vep_cache_version", "vep_cache"] } if (params.analysis_type.equals("wes")) { @@ -190,6 +182,8 @@ workflow RAREDISEASE { : Channel.value([]) ch_call_interval = params.call_interval ? Channel.fromPath(params.call_interval).collect() : Channel.value([]) + ch_genome_fai = params.fai ? Channel.fromPath(params.fai).map {it -> [[id:it[0].simpleName], it]}.collect() + : Channel.value([]) ch_gnomad_af_tab = params.gnomad_af ? Channel.fromPath(params.gnomad_af).map{ it -> [[id:it[0].simpleName], it] }.collect() : Channel.value([[],[]]) ch_intervals_wgs = params.intervals_wgs ? Channel.fromPath(params.intervals_wgs).collect() @@ -200,14 +194,8 @@ workflow RAREDISEASE { : Channel.value([[],[]]) ch_ml_model = params.variant_caller.equals("sentieon") ? Channel.fromPath(params.ml_model).collect() : Channel.value([]) - ch_mt_backchain_shift = params.mt_backchain_shift ? Channel.fromPath(params.mt_backchain_shift).map{ it -> [[id:it[0].simpleName], it] }.collect() - : Channel.value([]) - ch_mt_fasta_shift = params.mt_fasta_shift ? Channel.fromPath(params.mt_fasta_shift).map { it -> [[id:it[0].simpleName], it] }.collect() + ch_mt_fasta = params.mt_fasta ? Channel.fromPath(params.mt_fasta).map { it -> [[id:it[0].simpleName], it] }.collect() : Channel.empty() - ch_mt_intervals = params.mt_intervals ? Channel.fromPath(params.mt_intervals).collect() - : Channel.value([]) - ch_mt_intervals_shift = params.mt_intervals_shift ? Channel.fromPath(params.mt_intervals_shift).collect() - : Channel.value([]) ch_reduced_penetrance = params.reduced_penetrance ? Channel.fromPath(params.reduced_penetrance).collect() : Channel.value([]) ch_score_config_snv = params.score_config_snv ? Channel.fromPath(params.score_config_snv).collect() @@ -239,7 +227,8 @@ workflow RAREDISEASE { // Prepare references and indices. PREPARE_REFERENCES ( ch_genome_fasta, - ch_mt_fasta_shift, + ch_genome_fai, + ch_mt_fasta, ch_gnomad_af_tab, ch_known_dbsnp, ch_target_bed_unprocessed, @@ -258,10 +247,17 @@ workflow RAREDISEASE { ch_bwamem2_index_mt_shift = params.mt_bwamem2_index_shift ? Channel.fromPath(params.mt_bwamem2_index_shift).map {it -> [[id:it[0].simpleName], it]}.collect() : ch_references.bwamem2_index_mt_shift ch_chrom_sizes = ch_references.chrom_sizes - ch_genome_fai = params.fai ? Channel.fromPath(params.fai).map {it -> [[id:it[0].simpleName], it]}.collect() - : ch_references.fai + ch_mt_backchain_shift = params.mt_backchain_shift ? Channel.fromPath(params.mt_backchain_shift).map{ it -> [[id:it[0].simpleName], it] }.collect() + : Channel.value([]) + ch_mt_fasta_shift = params.mt_fasta_shift ? Channel.fromPath(params.mt_fasta_shift).map { it -> [[id:it[0].simpleName], it] }.collect() + : Channel.empty() + ch_mt_intervals = params.mt_intervals ? Channel.fromPath(params.mt_intervals).collect() + : Channel.value([]) + ch_mt_intervals_shift = params.mt_intervals_shift ? Channel.fromPath(params.mt_intervals_shift).collect() + : Channel.value([]) ch_mt_shift_fai = params.mt_fai_shift ? Channel.fromPath(params.mt_fai_shift).map {it -> [[id:it[0].simpleName], it]}.collect() : ch_references.fai_mt_shift + ch_genome_fai = ch_references.fai ch_gnomad_af_idx = params.gnomad_af_idx ? Channel.fromPath(params.gnomad_af_idx).collect() : ch_references.gnomad_af_idx ch_gnomad_af = params.gnomad_af ? ch_gnomad_af_tab.join(ch_gnomad_af_idx).map {meta, tab, idx -> [tab,idx]}.collect() From 49dd613f6c2861a839ab9f0c51ad9cd7a838bfce Mon Sep 17 00:00:00 2001 From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com> Date: Fri, 26 May 2023 10:36:38 +0200 Subject: [PATCH 2/8] update configs --- conf/modules/align_and_call_MT.config | 14 +- conf/modules/analyse_MT.config | 2 +- conf/modules/convert_mt_bam_to_fastq.config | 3 - conf/modules/merge_annotate_MT.config | 3 - conf/test.config | 4 - conf/test_one_sample.config | 4 - subworkflows/local/analyse_MT.nf | 62 ++++---- subworkflows/local/prepare_references.nf | 40 ++--- workflows/raredisease.nf | 157 ++++++++++---------- 9 files changed, 131 insertions(+), 158 deletions(-) diff --git a/conf/modules/align_and_call_MT.config b/conf/modules/align_and_call_MT.config index c4915c11..32e9ed0c 100644 --- a/conf/modules/align_and_call_MT.config +++ b/conf/modules/align_and_call_MT.config @@ -16,18 +16,15 @@ // process { - withName: '.*ANALYSE_MT:ALIGN_AND_CALL_MT:.*' { - ext.when = { params.mt_fasta_shift && params.mt_intervals && !(params.analysis_type == "wes") } - } withName: '.*ANALYSE_MT:ALIGN_AND_CALL_MT:BWAMEM2_MEM_MT' { - ext.when = { params.mt_fasta_shift && params.mt_intervals && !(params.analysis_type == "wes") && params.aligner == "bwamem2" } + ext.when = { !params.skip_mt_analysis && !(params.analysis_type == "wes") && params.aligner == "bwamem2" } ext.args = { "-M -K 100000000 -R ${meta.read_group}" } } withName: '.*ANALYSE_MT:ALIGN_AND_CALL_MT:SENTIEON_BWAMEM_MT' { ext.args = { "-M -K 10000000 -R ${meta.read_group}" } - ext.when = { params.mt_fasta_shift && params.mt_intervals && !(params.analysis_type == "wes") && params.aligner == "sentieon" } + ext.when = { !params.skip_mt_analysis && !(params.analysis_type == "wes") && params.aligner == "sentieon" } ext.prefix = { "${meta.id}.sorted" } } @@ -69,18 +66,15 @@ process { // process { - withName: '.*ANALYSE_MT:ALIGN_AND_CALL_MT_SHIFT:.*' { - ext.when = { params.mt_fasta_shift && params.mt_intervals_shift && !(params.analysis_type == "wes") } - } withName: '.*ANALYSE_MT:ALIGN_AND_CALL_MT_SHIFT:BWAMEM2_MEM_MT' { - ext.when = { params.mt_fasta_shift && params.mt_intervals_shift && !(params.analysis_type == "wes") && params.aligner == "bwamem2" } + ext.when = { !params.skip_mt_analysis && !(params.analysis_type == "wes") && params.aligner == "bwamem2" } ext.args = { "-M -K 100000000 -R ${meta.read_group}" } } withName: '.*ANALYSE_MT:ALIGN_AND_CALL_MT_SHIFT:SENTIEON_BWAMEM_MT' { ext.args = { "-M -K 10000000 -R ${meta.read_group}" } - ext.when = { params.mt_fasta_shift && params.mt_intervals_shift && !(params.analysis_type == "wes") && params.aligner == "sentieon" } + ext.when = { !params.skip_mt_analysis && !(params.analysis_type == "wes") && params.aligner == "sentieon" } ext.prefix = { "${meta.id}.sorted" } } diff --git a/conf/modules/analyse_MT.config b/conf/modules/analyse_MT.config index 85568e3e..4ee1b693 100644 --- a/conf/modules/analyse_MT.config +++ b/conf/modules/analyse_MT.config @@ -17,6 +17,7 @@ process { withName: '.*ANALYSE_MT:.*' { + ext.when = { !params.skip_mt_analysis && !(params.analysis_type == "wes") } publishDir = [ enabled: false ] @@ -25,7 +26,6 @@ process { process { withName: '.*ANALYSE_MT:PICARD_LIFTOVERVCF' { - ext.when = { params.mt_fasta_shift && params.mt_intervals_shift && !(params.analysis_type == "wes") } ext.prefix = { "${meta.id}_liftover" } } } diff --git a/conf/modules/convert_mt_bam_to_fastq.config b/conf/modules/convert_mt_bam_to_fastq.config index 5cba99b5..9a683b6e 100644 --- a/conf/modules/convert_mt_bam_to_fastq.config +++ b/conf/modules/convert_mt_bam_to_fastq.config @@ -16,9 +16,6 @@ // process { - withName: '.*ANALYSE_MT:CONVERT_MT_BAM_TO_FASTQ:.*' { - ext.when = { params.mt_fasta_shift && !(params.analysis_type == "wes") } - } withName: '.*ANALYSE_MT:CONVERT_MT_BAM_TO_FASTQ:GATK4_PRINTREADS_MT' { beforeScript = {"mkdir ./tmp"} diff --git a/conf/modules/merge_annotate_MT.config b/conf/modules/merge_annotate_MT.config index d62cf73c..a1b41a90 100644 --- a/conf/modules/merge_annotate_MT.config +++ b/conf/modules/merge_annotate_MT.config @@ -16,9 +16,6 @@ // process { - withName: '.*ANALYSE_MT:MERGE_ANNOTATE_MT:.*' { - ext.when = { params.mt_fasta_shift && params.mt_intervals && !(params.analysis_type == "wes") } - } withName: '.*ANALYSE_MT:MERGE_ANNOTATE_MT:GATK4_MERGEVCFS_LIFT_UNLIFT_MT' { ext.prefix = { "${meta.id}_merged" } diff --git a/conf/test.config b/conf/test.config index 805f84d6..c3ed9011 100644 --- a/conf/test.config +++ b/conf/test.config @@ -34,10 +34,6 @@ params { intervals_y = "https://raw.githubusercontent.com/nf-core/test-datasets/raredisease/reference/targetY.interval_list" known_dbsnp = "https://raw.githubusercontent.com/nf-core/test-datasets/raredisease/reference/dbsnp_-138-.vcf.gz" ml_model = "https://s3.amazonaws.com/sentieon-release/other/SentieonDNAscopeModel1.0.model" - mt_fasta_shift = "https://raw.githubusercontent.com/nf-core/test-datasets/raredisease/reference/mt_shift8000.fa" - mt_intervals = "https://raw.githubusercontent.com/nf-core/test-datasets/raredisease/reference/mt.intervals" - mt_intervals_shift = "https://raw.githubusercontent.com/nf-core/test-datasets/raredisease/reference/mt_shift8000.intervals" - mt_backchain_shift = "https://raw.githubusercontent.com/nf-core/test-datasets/raredisease/reference/mt_shift8000.back_chain" reduced_penetrance = "https://raw.githubusercontent.com/nf-core/test-datasets/raredisease/reference/reduced_penetrance.tsv" score_config_snv = "https://raw.githubusercontent.com/nf-core/test-datasets/raredisease/reference/rank_model_snv.ini" score_config_sv = "https://raw.githubusercontent.com/nf-core/test-datasets/raredisease/reference/rank_model_sv.ini" diff --git a/conf/test_one_sample.config b/conf/test_one_sample.config index e4f73a33..85efab52 100644 --- a/conf/test_one_sample.config +++ b/conf/test_one_sample.config @@ -34,10 +34,6 @@ params { intervals_y = "https://raw.githubusercontent.com/nf-core/test-datasets/raredisease/reference/targetY.interval_list" known_dbsnp = "https://raw.githubusercontent.com/nf-core/test-datasets/raredisease/reference/dbsnp_-138-.vcf.gz" ml_model = "https://s3.amazonaws.com/sentieon-release/other/SentieonDNAscopeModel1.0.model" - mt_fasta_shift = "https://raw.githubusercontent.com/nf-core/test-datasets/raredisease/reference/mt_shift8000.fa" - mt_intervals = "https://raw.githubusercontent.com/nf-core/test-datasets/raredisease/reference/mt.intervals" - mt_intervals_shift = "https://raw.githubusercontent.com/nf-core/test-datasets/raredisease/reference/mt_shift8000.intervals" - mt_backchain_shift = "https://raw.githubusercontent.com/nf-core/test-datasets/raredisease/reference/mt_shift8000.back_chain" reduced_penetrance = "https://raw.githubusercontent.com/nf-core/test-datasets/raredisease/reference/reduced_penetrance.tsv" score_config_snv = "https://raw.githubusercontent.com/nf-core/test-datasets/raredisease/reference/rank_model_snv.ini" score_config_sv = "https://raw.githubusercontent.com/nf-core/test-datasets/raredisease/reference/rank_model_sv.ini" diff --git a/subworkflows/local/analyse_MT.nf b/subworkflows/local/analyse_MT.nf index 60db58ec..1dbe2911 100644 --- a/subworkflows/local/analyse_MT.nf +++ b/subworkflows/local/analyse_MT.nf @@ -9,35 +9,35 @@ include { MERGE_ANNOTATE_MT } from './mitochondria/me workflow ANALYSE_MT { take: - ch_bam // channel: [mandatory] [ val(meta), file(bam), file(bai) ] - ch_cadd_header // channel: [mandatory] [ path(txt) ] - ch_cadd_resources // channel: [mandatory] [ path(annotation) ] - ch_genome_bwa_index // channel: [mandatory] [ path(index) ] - ch_genome_bwamem2_index // channel: [mandatory] [ path(index) ] - ch_genome_fasta // channel: [mandatory] [ val(meta), path(fasta) ] - ch_genome_fai // channel: [mandatory] [ path(fai) ] - ch_genome_dict // channel: [mandatory] [ val(meta), path(dict) ] - ch_mt_intervals // channel: [mandatory] [ path(interval_list) ] - ch_shift_mt_bwa_index // channel: [mandatory] [ path(index) ] - ch_shift_mt_bwamem2_index // channel: [mandatory] [ path(index) ] - ch_shift_mt_fasta // channel: [mandatory] [ path(fasta) ] - ch_shift_mt_dict // channel: [mandatory] [ path(dict) ] - ch_shift_mt_fai // channel: [mandatory] [ path(fai) ] - ch_shift_mt_intervals // channel: [mandatory] [ path(interval_list) ] - ch_shift_mt_backchain // channel: [mandatory] [ path(back_chain) ] - ch_vcfanno_resources // channel: [mandatory] [ path(resources) ] - ch_vcfanno_toml // channel: [mandatory] [ path(toml) ] - val_vep_genome // string: [mandatory] GRCh37 or GRCh38 - val_vep_cache_version // string: [mandatory] 107 - ch_vep_cache // channel: [mandatory] [ path(cache) ] - ch_case_info // channel: [mandatory] [ val(case_info) ] + ch_bam_bai // channel: [mandatory] [ val(meta), file(bam), file(bai) ] + ch_cadd_header // channel: [mandatory] [ path(txt) ] + ch_cadd_resources // channel: [mandatory] [ path(annotation) ] + ch_genome_bwa_index // channel: [mandatory] [ val(meta), path(index) ] + ch_genome_bwamem2_index // channel: [mandatory] [ val(meta), path(index) ] + ch_genome_fasta // channel: [mandatory] [ val(meta), path(fasta) ] + ch_genome_fai // channel: [mandatory] [ val(meta), path(fai) ] + ch_genome_dict // channel: [mandatory] [ val(meta), path(dict) ] + ch_mt_intervals // channel: [mandatory] [ path(interval_list) ] + ch_mtshift_bwaindex // channel: [mandatory] [ val(meta), path(index) ] + ch_mtshift_bwamem2index // channel: [mandatory] [ val(meta), path(index) ] + ch_mtshift_fasta // channel: [mandatory] [ val(meta), path(fasta) ] + ch_mtshift_dict // channel: [mandatory] [ val(meta), path(dict) ] + ch_mtshift_fai // channel: [mandatory] [ val(meta), path(fai) ] + ch_mtshift_intervals // channel: [mandatory] [ path(interval_list) ] + ch_mtshift_backchain // channel: [mandatory] [ val(meta), path(back_chain) ] + ch_vcfanno_resources // channel: [mandatory] [ path(resources) ] + ch_vcfanno_toml // channel: [mandatory] [ path(toml) ] + val_vep_genome // string: [mandatory] GRCh37 or GRCh38 + val_vep_cache_version // string: [mandatory] 107 + ch_vep_cache // channel: [mandatory] [ path(cache) ] + ch_case_info // channel: [mandatory] [ val(case_info) ] main: ch_versions = Channel.empty() // PREPARING READS FOR MT ALIGNMENT CONVERT_MT_BAM_TO_FASTQ ( - ch_bam, + ch_bam_bai, ch_genome_fasta, ch_genome_fai, ch_genome_dict @@ -58,20 +58,20 @@ workflow ANALYSE_MT { ALIGN_AND_CALL_MT_SHIFT ( CONVERT_MT_BAM_TO_FASTQ.out.fastq, CONVERT_MT_BAM_TO_FASTQ.out.bam, - ch_shift_mt_bwa_index, - ch_shift_mt_bwamem2_index, - ch_shift_mt_fasta, - ch_shift_mt_dict, - ch_shift_mt_fai, - ch_shift_mt_intervals + ch_mtshift_bwaindex, + ch_mtshift_bwamem2index, + ch_mtshift_fasta, + ch_mtshift_dict, + ch_mtshift_fai, + ch_mtshift_intervals ) // LIFTOVER VCF FROM REFERENCE MT TO SHIFTED MT PICARD_LIFTOVERVCF ( ALIGN_AND_CALL_MT_SHIFT.out.vcf, ch_genome_dict, - ch_shift_mt_backchain, - ch_genome_fasta + ch_genome_fasta, + ch_mtshift_backchain, ) // MT MERGE AND ANNOTATE VARIANTS diff --git a/subworkflows/local/prepare_references.nf b/subworkflows/local/prepare_references.nf index d59baba5..f9a8fb47 100644 --- a/subworkflows/local/prepare_references.nf +++ b/subworkflows/local/prepare_references.nf @@ -101,28 +101,28 @@ workflow PREPARE_REFERENCES { ch_versions = ch_versions.mix(UNTAR_VEP_CACHE.out.versions) emit: - genome_bwa_index = Channel.empty().mix(ch_bwa, ch_sentieonbwa).collect() // channel: [ val(meta), path(index) ] - genome_bwamem2_index = BWAMEM2_INDEX_GENOME.out.index.collect() // channel: [ val(meta), path(index) ] - genome_chrom_sizes = GET_CHROM_SIZES.out.sizes.collect() // channel: [ path(sizes) ] - genome_fai = ch_fai // channel: [ val(meta), path(fai) ] - genome_dict = GATK_SD.out.dict.collect() // channel: [ path(dict) ] + genome_bwa_index = Channel.empty().mix(ch_bwa, ch_sentieonbwa).collect() // channel: [ val(meta), path(index) ] + genome_bwamem2_index = BWAMEM2_INDEX_GENOME.out.index.collect() // channel: [ val(meta), path(index) ] + genome_chrom_sizes = GET_CHROM_SIZES.out.sizes.collect() // channel: [ path(sizes) ] + genome_fai = ch_fai // channel: [ val(meta), path(fai) ] + genome_dict = GATK_SD.out.dict.collect() // channel: [ path(dict) ] - mt_intervals = GATK_SHIFTFASTA.out.intervals.collect() - mtshift_intervals = GATK_SHIFTFASTA.out.shift_intervals.collect() - mtshift_backchain = GATK_SHIFTFASTA.out.shift_back_chain.collect() - mtshift_fai = GATK_SHIFTFASTA.out.shift_fai.collect() // channel: [ val(meta), path(fai) ] - mtshift_fasta = GATK_SHIFTFASTA.out.shift_fa.collect() // channel: [ val(meta), path(fai) ] - mtshift_dict = GATK_SHIFTFASTA.out.dict.collect() // channel: [ path(dict) ] - mtshift_bwa_index = SENTIEON_BWAINDEX_MT_SHIFT.out.index.collect() // channel: [ val(meta), path(index) ] - mtshift_bwamem2_index = BWAMEM2_INDEX_MT_SHIFT.out.index.collect() // channel: [ val(meta), path(index) ] + mt_intervals = GATK_SHIFTFASTA.out.intervals.map{ meta, it -> it}.collect() // channel: [ path(intervals) ] + mtshift_intervals = GATK_SHIFTFASTA.out.shift_intervals.map{ meta, it -> it}.collect() // channel: [ path(intervals) ] + mtshift_backchain = GATK_SHIFTFASTA.out.shift_back_chain.collect() + mtshift_fai = GATK_SHIFTFASTA.out.shift_fai.collect() // channel: [ val(meta), path(fai) ] + mtshift_fasta = GATK_SHIFTFASTA.out.shift_fa.collect() // channel: [ val(meta), path(fai) ] + mtshift_dict = GATK_SHIFTFASTA.out.dict.collect() // channel: [ path(dict) ] + mtshift_bwa_index = SENTIEON_BWAINDEX_MT_SHIFT.out.index.collect() // channel: [ val(meta), path(index) ] + mtshift_bwamem2_index = BWAMEM2_INDEX_MT_SHIFT.out.index.collect() // channel: [ val(meta), path(index) ] - gnomad_af_idx = TABIX_GNOMAD_AF.out.tbi.collect() // channel: [ val(meta), path(fasta) ] - known_dbsnp_tbi = TABIX_DBSNP.out.tbi.collect() // channel: [ val(meta), path(fasta) ] - target_bed = Channel.empty().mix(ch_tbi, ch_bgzip_tbi).collect() // channel: [ val(meta), path(bed), path(tbi) ] - bait_intervals = CAT_CAT_BAIT.out.file_out.map{ meta, inter -> inter}.collect() // channel: [ path(intervals) ] - target_intervals = GATK_BILT.out.interval_list.map{ meta, inter -> inter}.collect() // channel: [ path(interval_list) ] - vep_resources = UNTAR_VEP_CACHE.out.untar.map{meta, files -> [files]}.collect() // channel: [ path(cache) ] - versions = ch_versions // channel: [ path(versions.yml) ] + gnomad_af_idx = TABIX_GNOMAD_AF.out.tbi.collect() // channel: [ val(meta), path(fasta) ] + known_dbsnp_tbi = TABIX_DBSNP.out.tbi.collect() // channel: [ val(meta), path(fasta) ] + target_bed = Channel.empty().mix(ch_tbi, ch_bgzip_tbi).collect() // channel: [ val(meta), path(bed), path(tbi) ] + bait_intervals = CAT_CAT_BAIT.out.file_out.map{ meta, inter -> inter}.collect() // channel: [ path(intervals) ] + target_intervals = GATK_BILT.out.interval_list.map{ meta, inter -> inter}.collect() // channel: [ path(interval_list) ] + vep_resources = UNTAR_VEP_CACHE.out.untar.map{meta, files -> [files]}.collect() // channel: [ path(cache) ] + versions = ch_versions // channel: [ path(versions.yml) ] } diff --git a/workflows/raredisease.nf b/workflows/raredisease.nf index 45e8491b..1e1b70e5 100644 --- a/workflows/raredisease.nf +++ b/workflows/raredisease.nf @@ -173,49 +173,92 @@ workflow RAREDISEASE { CHECK_INPUT (ch_input) ch_versions = ch_versions.mix(CHECK_INPUT.out.versions) - // Initialize all file channels including unprocessed vcf, bed and tab files - ch_cadd_header = Channel.fromPath("$projectDir/assets/cadd_to_vcf_header_-1.0-.txt", checkIfExists: true).collect() + // Initialize file channels for PREPARE_REFERENCES subworkflow ch_genome_fasta = Channel.fromPath(params.fasta).map { it -> [[id:it[0].simpleName], it] }.collect() - ch_variant_consequences = Channel.fromPath("$projectDir/assets/variant_consequences_v1.txt", checkIfExists: true).collect() + ch_genome_fai = params.fai ? Channel.fromPath(params.fai).map {it -> [[id:it[0].simpleName], it]}.collect() + : Channel.empty() + ch_gnomad_af_tab = params.gnomad_af ? Channel.fromPath(params.gnomad_af).map{ it -> [[id:it[0].simpleName], it] }.collect() + : Channel.value([[],[]]) + ch_dbsnp = params.known_dbsnp ? Channel.fromPath(params.known_dbsnp).map{ it -> [[id:it[0].simpleName], it] }.collect() + : Channel.value([[],[]]) + ch_mt_fasta = params.mt_fasta ? Channel.fromPath(params.mt_fasta).map { it -> [[id:it[0].simpleName], it] }.collect() + : Channel.empty() + ch_target_bed_unprocessed = params.target_bed ? Channel.fromPath(params.target_bed).map{ it -> [[id:it[0].simpleName], it] }.collect() + : Channel.value([[],[]]) + ch_vep_cache_unprocessed = params.vep_cache ? Channel.fromPath(params.vep_cache).map { it -> [[id:'vep_cache'], it] }.collect() + : Channel.value([[],[]]) + + // Prepare references and indices. + PREPARE_REFERENCES ( + ch_genome_fasta, + ch_genome_fai, + ch_mt_fasta, + ch_gnomad_af_tab, + ch_dbsnp, + ch_target_bed_unprocessed, + ch_vep_cache_unprocessed + ) + .set { ch_references } + + // Gather built indices or get them from the params + ch_bait_intervals = ch_references.bait_intervals + ch_cadd_header = Channel.fromPath("$projectDir/assets/cadd_to_vcf_header_-1.0-.txt", checkIfExists: true).collect() ch_cadd_resources = params.cadd_resources ? Channel.fromPath(params.cadd_resources).collect() : Channel.value([]) ch_call_interval = params.call_interval ? Channel.fromPath(params.call_interval).collect() : Channel.value([]) - ch_genome_fai = params.fai ? Channel.fromPath(params.fai).map {it -> [[id:it[0].simpleName], it]}.collect() - : Channel.value([]) - ch_gnomad_af_tab = params.gnomad_af ? Channel.fromPath(params.gnomad_af).map{ it -> [[id:it[0].simpleName], it] }.collect() - : Channel.value([[],[]]) + ch_dbsnp_tbi = params.known_dbsnp_tbi ? Channel.fromPath(params.known_dbsnp_tbi).map {it -> [[id:it[0].simpleName], it]}.collect() + : ch_references.known_dbsnp_tbi.ifEmpty([[],[]]) + ch_genome_bwaindex = params.bwa ? Channel.fromPath(params.bwa).map {it -> [[id:it[0].simpleName], it]}.collect() + : ch_references.genome_bwa_index + ch_genome_bwamem2index = params.bwamem2 ? Channel.fromPath(params.bwamem2).map {it -> [[id:it[0].simpleName], it]}.collect() + : ch_references.genome_bwamem2_index + ch_genome_chrsizes = ch_references.genome_chrom_sizes + ch_genome_fai = ch_references.genome_fai + ch_genome_dictionary = params.sequence_dictionary ? Channel.fromPath(params.sequence_dictionary).map {it -> [[id:it[0].simpleName], it]}.collect() + : ch_references.genome_dict + ch_gnomad_afidx = params.gnomad_af_idx ? Channel.fromPath(params.gnomad_af_idx).collect() + : ch_references.gnomad_af_idx + ch_gnomad_af = params.gnomad_af ? ch_gnomad_af_tab.join(ch_gnomad_afidx).map {meta, tab, idx -> [tab,idx]}.collect() + : Channel.empty() ch_intervals_wgs = params.intervals_wgs ? Channel.fromPath(params.intervals_wgs).collect() : Channel.empty() ch_intervals_y = params.intervals_y ? Channel.fromPath(params.intervals_y).collect() : Channel.empty() - ch_known_dbsnp = params.known_dbsnp ? Channel.fromPath(params.known_dbsnp).map{ it -> [[id:it[0].simpleName], it] }.collect() - : Channel.value([[],[]]) ch_ml_model = params.variant_caller.equals("sentieon") ? Channel.fromPath(params.ml_model).collect() : Channel.value([]) - ch_mt_fasta = params.mt_fasta ? Channel.fromPath(params.mt_fasta).map { it -> [[id:it[0].simpleName], it] }.collect() - : Channel.empty() + ch_mt_intervals = ch_references.mt_intervals + ch_mtshift_backchain = ch_references.mtshift_backchain + ch_mtshift_bwaindex = ch_references.mtshift_bwa_index + ch_mtshift_bwamem2index = ch_references.mtshift_bwamem2_index + ch_mtshift_dictionary = ch_references.mtshift_dict + ch_mtshift_fai = ch_references.mtshift_fai + ch_mtshift_fasta = ch_references.mtshift_fasta + ch_mtshift_intervals = ch_references.mtshift_intervals ch_reduced_penetrance = params.reduced_penetrance ? Channel.fromPath(params.reduced_penetrance).collect() : Channel.value([]) ch_score_config_snv = params.score_config_snv ? Channel.fromPath(params.score_config_snv).collect() : Channel.value([]) ch_score_config_sv = params.score_config_sv ? Channel.fromPath(params.score_config_sv).collect() : Channel.value([]) - ch_target_bed_unprocessed = params.target_bed ? Channel.fromPath(params.target_bed).map{ it -> [[id:it[0].simpleName], it] }.collect() - : Channel.value([[],[]]) + ch_target_bed = ch_references.target_bed + ch_target_intervals = ch_references.target_intervals ch_variant_catalog = params.variant_catalog ? Channel.fromPath(params.variant_catalog).map { it -> [[id:it[0].simpleName],it]}.collect() : Channel.value([[],[]]) + ch_variant_consequences = Channel.fromPath("$projectDir/assets/variant_consequences_v1.txt", checkIfExists: true).collect() ch_vcfanno_resources = params.vcfanno_resources ? Channel.fromPath(params.vcfanno_resources).splitText().map{it -> it.trim()}.collect() : Channel.value([]) ch_vcfanno_lua = params.vcfanno_lua ? Channel.fromPath(params.vcfanno_lua).collect() : Channel.value([]) ch_vcfanno_toml = params.vcfanno_toml ? Channel.fromPath(params.vcfanno_toml).collect() : Channel.value([]) - ch_vep_cache_unprocessed = params.vep_cache ? Channel.fromPath(params.vep_cache).map { it -> [[id:'vep_cache'], it] }.collect() - : Channel.value([[],[]]) + ch_vep_cache = ( params.vep_cache && params.vep_cache.endsWith("tar.gz") ) ? ch_references.vep_resources + : ( params.vep_cache ? Channel.fromPath(params.vep_cache).collect() : Channel.value([]) ) ch_vep_filters = params.vep_filters ? Channel.fromPath(params.vep_filters).collect() : Channel.value([]) + ch_versions = ch_versions.mix(ch_references.versions) + // Generate pedigree file ch_pedfile = CHECK_INPUT.out.samples.toList().map { makePed(it) } @@ -224,56 +267,6 @@ workflow RAREDISEASE { FASTQC (CHECK_INPUT.out.reads) ch_versions = ch_versions.mix(FASTQC.out.versions.first()) - // Prepare references and indices. - PREPARE_REFERENCES ( - ch_genome_fasta, - ch_genome_fai, - ch_mt_fasta, - ch_gnomad_af_tab, - ch_known_dbsnp, - ch_target_bed_unprocessed, - ch_vep_cache_unprocessed - ) - .set { ch_references } - - // Gather built indices or get them from the params - ch_bait_intervals = ch_references.bait_intervals - ch_bwa_index = params.bwa ? Channel.fromPath(params.bwa).map {it -> [[id:it[0].simpleName], it]}.collect() - : ch_references.bwa_index - ch_bwa_index_mt_shift = params.mt_bwa_index_shift ? Channel.fromPath(params.mt_bwa_index_shift).map {it -> [[id:it[0].simpleName], it]}.collect() - : ch_references.bwa_index_mt_shift - ch_bwamem2_index = params.bwamem2 ? Channel.fromPath(params.bwamem2).map {it -> [[id:it[0].simpleName], it]}.collect() - : ch_references.bwamem2_index - ch_bwamem2_index_mt_shift = params.mt_bwamem2_index_shift ? Channel.fromPath(params.mt_bwamem2_index_shift).map {it -> [[id:it[0].simpleName], it]}.collect() - : ch_references.bwamem2_index_mt_shift - ch_chrom_sizes = ch_references.chrom_sizes - ch_mt_backchain_shift = params.mt_backchain_shift ? Channel.fromPath(params.mt_backchain_shift).map{ it -> [[id:it[0].simpleName], it] }.collect() - : Channel.value([]) - ch_mt_fasta_shift = params.mt_fasta_shift ? Channel.fromPath(params.mt_fasta_shift).map { it -> [[id:it[0].simpleName], it] }.collect() - : Channel.empty() - ch_mt_intervals = params.mt_intervals ? Channel.fromPath(params.mt_intervals).collect() - : Channel.value([]) - ch_mt_intervals_shift = params.mt_intervals_shift ? Channel.fromPath(params.mt_intervals_shift).collect() - : Channel.value([]) - ch_mt_shift_fai = params.mt_fai_shift ? Channel.fromPath(params.mt_fai_shift).map {it -> [[id:it[0].simpleName], it]}.collect() - : ch_references.fai_mt_shift - ch_genome_fai = ch_references.fai - ch_gnomad_af_idx = params.gnomad_af_idx ? Channel.fromPath(params.gnomad_af_idx).collect() - : ch_references.gnomad_af_idx - ch_gnomad_af = params.gnomad_af ? ch_gnomad_af_tab.join(ch_gnomad_af_idx).map {meta, tab, idx -> [tab,idx]}.collect() - : Channel.empty() - ch_known_dbsnp_tbi = params.known_dbsnp_tbi ? Channel.fromPath(params.known_dbsnp_tbi).map {it -> [[id:it[0].simpleName], it]}.collect() - : ch_references.known_dbsnp_tbi.ifEmpty([[],[]]) - ch_genome_dictionary = params.sequence_dictionary ? Channel.fromPath(params.sequence_dictionary).map {it -> [[id:it[0].simpleName], it]}.collect() - : ch_references.sequence_dict - ch_sequence_dictionary_mt_shift = params.mt_sequence_dictionary_shift ? Channel.fromPath(params.mt_sequence_dictionary_shift).map {it -> [[id:it[0].simpleName], it]}.collect() - : ch_references.sequence_dict_mt_shift - ch_target_bed = ch_references.target_bed - ch_target_intervals = ch_references.target_intervals - ch_vep_cache = ( params.vep_cache && params.vep_cache.endsWith("tar.gz") ) ? ch_references.vep_resources - : ( params.vep_cache ? Channel.fromPath(params.vep_cache).collect() : Channel.value([]) ) - ch_versions = ch_versions.mix(ch_references.versions) - // CREATE CHROMOSOME BED AND INTERVALS SCATTER_GENOME ( ch_genome_dictionary, @@ -289,10 +282,10 @@ workflow RAREDISEASE { CHECK_INPUT.out.reads, ch_genome_fasta, ch_genome_fai, - ch_bwa_index, - ch_bwamem2_index, - ch_known_dbsnp, - ch_known_dbsnp_tbi, + ch_genome_bwaindex, + ch_genome_bwamem2index, + ch_dbsnp, + ch_dbsnp_tbi, params.platform ) .set { ch_mapped } @@ -307,7 +300,7 @@ workflow RAREDISEASE { ch_genome_fai, ch_bait_intervals, ch_target_intervals, - ch_chrom_sizes, + ch_genome_chrsizes, ch_intervals_wgs, ch_intervals_y ) @@ -349,8 +342,8 @@ workflow RAREDISEASE { ch_mapped.bam_bai, ch_genome_fasta, ch_genome_fai, - ch_known_dbsnp, - ch_known_dbsnp_tbi, + ch_dbsnp, + ch_dbsnp_tbi, ch_call_interval, ch_ml_model, CHECK_INPUT.out.case_info @@ -361,7 +354,7 @@ workflow RAREDISEASE { ch_mapped.marked_bam, ch_mapped.marked_bai, ch_mapped.bam_bai, - ch_bwa_index, + ch_genome_bwaindex, ch_genome_fasta, ch_genome_fai, CHECK_INPUT.out.case_info, @@ -431,19 +424,19 @@ workflow RAREDISEASE { ch_mapped.bam_bai, ch_cadd_header, ch_cadd_resources, - ch_bwa_index, - ch_bwamem2_index, + ch_genome_bwaindex, + ch_genome_bwamem2index, ch_genome_fasta, ch_genome_fai, ch_genome_dictionary, ch_mt_intervals, - ch_bwa_index_mt_shift, - ch_bwamem2_index_mt_shift, - ch_mt_fasta_shift, - ch_sequence_dictionary_mt_shift, - ch_mt_shift_fai, - ch_mt_intervals_shift, - ch_mt_backchain_shift, + ch_mtshift_bwaindex, + ch_mtshift_bwamem2index, + ch_mtshift_fasta, + ch_mtshift_dictionary, + ch_mtshift_fai, + ch_mtshift_intervals, + ch_mtshift_backchain, ch_vcfanno_resources, ch_vcfanno_toml, params.genome, From fd79ab76593604549ba4cf7e754d079b7dbbfdf7 Mon Sep 17 00:00:00 2001 From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com> Date: Fri, 26 May 2023 10:49:42 +0200 Subject: [PATCH 3/8] update input channel definitions --- .../local/mitochondria/align_and_call_MT.nf | 22 +++++++++---------- .../mitochondria/convert_mt_bam_to_fastq.nf | 10 ++++----- .../local/mitochondria/merge_annotate_MT.nf | 4 ++-- 3 files changed, 18 insertions(+), 18 deletions(-) diff --git a/subworkflows/local/mitochondria/align_and_call_MT.nf b/subworkflows/local/mitochondria/align_and_call_MT.nf index 3d992322..7121168c 100644 --- a/subworkflows/local/mitochondria/align_and_call_MT.nf +++ b/subworkflows/local/mitochondria/align_and_call_MT.nf @@ -16,21 +16,21 @@ include { TABIX_TABIX as TABIX_TABIX_MT } fr workflow ALIGN_AND_CALL_MT { take: - ch_fastq // channel: [mandatory] [ val(meta), [ path(reads) ] ] - ch_ubam // channel: [mandatory] [ val(meta), path(bam) ] - ch_index_bwa // channel: [mandatory for sentieon] [ val(meta), path(index) ] - ch_index_bwamem2 // channel: [mandatory for bwamem2] [ val(meta), path(index) ] - ch_fasta // channel: [mandatory] [ val(meta), path(fasta) ] - ch_dict // channel: [mandatory] [ val(meta), path(dict) ] - ch_fai // channel: [mandatory] [ val(meta), path(fai) ] - ch_intervals_mt // channel: [mandatory] [ path(interval_list) ] + ch_fastq // channel: [mandatory] [ val(meta), [ path(reads) ] ] + ch_ubam // channel: [mandatory] [ val(meta), path(bam) ] + ch_bwaindex // channel: [mandatory for sentieon] [ val(meta), path(index) ] + ch_bwamem2index // channel: [mandatory for bwamem2] [ val(meta), path(index) ] + ch_fasta // channel: [mandatory] [ val(meta), path(fasta) ] + ch_dict // channel: [mandatory] [ val(meta), path(dict) ] + ch_fai // channel: [mandatory] [ val(meta), path(fai) ] + ch_intervals // channel: [mandatory] [ path(interval_list) ] main: ch_versions = Channel.empty() - BWAMEM2_MEM_MT (ch_fastq , ch_index_bwamem2, true) + BWAMEM2_MEM_MT (ch_fastq , ch_bwamem2index, true) - SENTIEON_BWAMEM_MT ( ch_fastq, ch_fasta, ch_fai, ch_index_bwa ) + SENTIEON_BWAMEM_MT ( ch_fastq, ch_fasta, ch_fai, ch_bwaindex ) ch_mt_bam = Channel.empty().mix(BWAMEM2_MEM_MT.out.bam, SENTIEON_BWAMEM_MT.out.bam) ch_fastq_ubam = ch_mt_bam.join(ch_ubam, failOnMismatch:true, failOnDuplicate:true) @@ -45,7 +45,7 @@ workflow ALIGN_AND_CALL_MT { SAMTOOLS_INDEX_MT(SAMTOOLS_SORT_MT.out.bam) ch_sort_index_bam = SAMTOOLS_SORT_MT.out.bam.join(SAMTOOLS_INDEX_MT.out.bai, failOnMismatch:true, failOnDuplicate:true) - ch_sort_index_bam_int_mt = ch_sort_index_bam.combine(ch_intervals_mt) + ch_sort_index_bam_int_mt = ch_sort_index_bam.combine(ch_intervals) GATK4_MUTECT2_MT (ch_sort_index_bam_int_mt, ch_fasta, ch_fai, ch_dict, [], [], [],[]) diff --git a/subworkflows/local/mitochondria/convert_mt_bam_to_fastq.nf b/subworkflows/local/mitochondria/convert_mt_bam_to_fastq.nf index 2df4406f..156f71bb 100644 --- a/subworkflows/local/mitochondria/convert_mt_bam_to_fastq.nf +++ b/subworkflows/local/mitochondria/convert_mt_bam_to_fastq.nf @@ -8,16 +8,16 @@ include { GATK4_SAMTOFASTQ as GATK4_SAMTOFASTQ_MT } from '../../../modules/nf-co workflow CONVERT_MT_BAM_TO_FASTQ { take: - ch_bam // channel: [mandatory] [ val(meta), path(bam), path(bai) ] - ch_genome_fasta_meta // channel: [mandatory] [ val(meta), path(fasta) ] - ch_genome_fai // channel: [mandatory] [ val(meta), path(fai) ] - ch_genome_dict // channel: [mandatory] [ val(meta), path(dict) ] + ch_bam_bai // channel: [mandatory] [ val(meta), path(bam), path(bai) ] + ch_genome_fasta // channel: [mandatory] [ val(meta), path(fasta) ] + ch_genome_fai // channel: [mandatory] [ val(meta), path(fai) ] + ch_genome_dict // channel: [mandatory] [ val(meta), path(dict) ] main: ch_versions = Channel.empty() // Outputs bam containing only MT - GATK4_PRINTREADS_MT ( ch_bam, ch_genome_fasta_meta, ch_genome_fai, ch_genome_dict ) + GATK4_PRINTREADS_MT ( ch_bam_bai, ch_genome_fasta, ch_genome_fai, ch_genome_dict ) // Removes alignment information GATK4_REVERTSAM_MT ( GATK4_PRINTREADS_MT.out.bam ) diff --git a/subworkflows/local/mitochondria/merge_annotate_MT.nf b/subworkflows/local/mitochondria/merge_annotate_MT.nf index 8d4f3da0..46e334f0 100644 --- a/subworkflows/local/mitochondria/merge_annotate_MT.nf +++ b/subworkflows/local/mitochondria/merge_annotate_MT.nf @@ -23,9 +23,9 @@ workflow MERGE_ANNOTATE_MT { ch_vcf2 // channel: [mandatory] [ val(meta), path(vcf) ] ch_cadd_header // channel: [mandatory] [ path(txt) ] ch_cadd_resources // channel: [mandatory] [ path(annotation) ] - ch_genome_fasta // channel: [mandatory] [ path(fasta) ] + ch_genome_fasta // channel: [mandatory] [ val(meta), path(fasta) ] ch_genome_dict // channel: [mandatory] [ val(meta), path(dict) ] - ch_genome_fai // channel: [mandatory] [ path(fai) ] + ch_genome_fai // channel: [mandatory] [ val(meta), path(fai) ] ch_vcfanno_resources // channel: [mandatory] [ path(resources) ] ch_vcfanno_toml // channel: [mandatory] [ path(toml) ] val_vep_genome // string: [mandatory] GRCh37 or GRCh38 From 743c35bb24f801cd2573235e1784401403bf40f4 Mon Sep 17 00:00:00 2001 From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com> Date: Fri, 26 May 2023 10:56:08 +0200 Subject: [PATCH 4/8] update docs --- CHANGELOG.md | 7 +++++++ docs/usage.md | 22 ++++++++-------------- 2 files changed, 15 insertions(+), 14 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 23a719c8..cf89d03d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,6 +3,13 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## v2.0.0 - [] + +### `Added` + +- GATK's ShiftFasta to generate all the files required for mitochondrial variant calling +- Feature to calculate CADD scores for indels + ## v1.0.0 - [2023-03-31] Initial release of nf-core/raredisease, created with the [nf-core](https://nf-co.re/) template. diff --git a/docs/usage.md b/docs/usage.md index 482ed179..5dd5761a 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -219,20 +219,14 @@ The mandatory and optional parameters for each category are tabulated below. ##### 8. Mitochondrial analysis -| Mandatory | Optional | -| ------------------------------ | -------- | -| genome | | -| mt_backchain_shift1 | | -| mito_name | | -| mt_fasta_shift | | -| mt_intervals | | -| mt_intervals_shift | | -| vcfanno_resources | | -| vcfanno_toml | | -| vep_cache_version | | -| vep_cache | | - -1Can be generated by GATK's [ShiftFasta](https://gatk.broadinstitute.org/hc/en-us/articles/9570501436827-ShiftFasta-BETA-). Sample file [here](https://github.com/nf-core/test-datasets/blob/raredisease/reference/mt_shift8000.back_chain). +| Mandatory | Optional | +| ----------------- | -------- | +| genome | | +| mito_name | | +| vcfanno_resources | | +| vcfanno_toml | | +| vep_cache_version | | +| vep_cache | | #### Run the pipeline From f0f866d0d8d10f29e92b947fb85996cd55520dc3 Mon Sep 17 00:00:00 2001 From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com> Date: Fri, 26 May 2023 10:58:53 +0200 Subject: [PATCH 5/8] update changelog --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index cf89d03d..55291bfa 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,7 +7,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### `Added` -- GATK's ShiftFasta to generate all the files required for mitochondrial variant calling +- GATK's ShiftFasta to generate all the files required for mitochondrial analysis - Feature to calculate CADD scores for indels ## v1.0.0 - [2023-03-31] From c0a869e9b629935d9cc492864053ac0f210301ab Mon Sep 17 00:00:00 2001 From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com> Date: Fri, 26 May 2023 11:27:13 +0200 Subject: [PATCH 6/8] fix lint --- modules.json | 278 +++++++++++++-------------------------------------- 1 file changed, 70 insertions(+), 208 deletions(-) diff --git a/modules.json b/modules.json index 0622ed2e..5b778895 100644 --- a/modules.json +++ b/modules.json @@ -8,488 +8,350 @@ "bcftools/annotate": { "branch": "master", "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "bcftools/concat": { "branch": "master", "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "bcftools/filter": { "branch": "master", "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "bcftools/merge": { "branch": "master", "git_sha": "0435e4eebc94e53721c194b2d5d06f455a79e407", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "bcftools/norm": { "branch": "master", "git_sha": "0435e4eebc94e53721c194b2d5d06f455a79e407", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "bcftools/reheader": { "branch": "master", "git_sha": "0435e4eebc94e53721c194b2d5d06f455a79e407", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "bcftools/roh": { "branch": "master", "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "bcftools/view": { "branch": "master", "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "bwa/index": { "branch": "master", "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "bwamem2/index": { "branch": "master", "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "bwamem2/mem": { "branch": "master", "git_sha": "0460d316170f75f323111b4a2c0a2989f0c32013", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "cadd": { "branch": "master", "git_sha": "603ecbd9f45300c9788f197d2a15a005685b4220", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "cat/cat": { "branch": "master", "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "custom/dumpsoftwareversions": { "branch": "master", "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "deepvariant": { "branch": "master", "git_sha": "0460d316170f75f323111b4a2c0a2989f0c32013", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "expansionhunter": { "branch": "master", "git_sha": "0260e5d22372eae434816d6970dedf3f5adc0053", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "fastqc": { "branch": "master", "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "gatk4/bedtointervallist": { "branch": "master", "git_sha": "2df2a11d5b12f2a73bca74f103691bc35d83c5fd", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "gatk4/createsequencedictionary": { "branch": "master", "git_sha": "2df2a11d5b12f2a73bca74f103691bc35d83c5fd", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "gatk4/filtermutectcalls": { "branch": "master", "git_sha": "2df2a11d5b12f2a73bca74f103691bc35d83c5fd", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "gatk4/intervallisttools": { "branch": "master", "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "gatk4/mergebamalignment": { "branch": "master", "git_sha": "2df2a11d5b12f2a73bca74f103691bc35d83c5fd", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "gatk4/mergevcfs": { "branch": "master", "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "gatk4/mutect2": { "branch": "master", "git_sha": "2df2a11d5b12f2a73bca74f103691bc35d83c5fd", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "gatk4/printreads": { "branch": "master", "git_sha": "2df2a11d5b12f2a73bca74f103691bc35d83c5fd", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "gatk4/revertsam": { "branch": "master", "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "gatk4/samtofastq": { "branch": "master", "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "gatk4/selectvariants": { "branch": "master", "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "gatk4/shiftfasta": { "branch": "master", "git_sha": "0460d316170f75f323111b4a2c0a2989f0c32013", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "gatk4/splitintervals": { "branch": "master", "git_sha": "2df2a11d5b12f2a73bca74f103691bc35d83c5fd", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "gatk4/variantfiltration": { "branch": "master", "git_sha": "2df2a11d5b12f2a73bca74f103691bc35d83c5fd", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "genmod/annotate": { "branch": "master", "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "genmod/compound": { "branch": "master", "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "genmod/models": { "branch": "master", "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "genmod/score": { "branch": "master", "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "glnexus": { "branch": "master", "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "haplocheck": { "branch": "master", "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "haplogrep2/classify": { "branch": "master", "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "manta/germline": { "branch": "master", "git_sha": "0460d316170f75f323111b4a2c0a2989f0c32013", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "mosdepth": { "branch": "master", "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "multiqc": { "branch": "master", "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "peddy": { "branch": "master", "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "picard/addorreplacereadgroups": { "branch": "master", "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "picard/collecthsmetrics": { "branch": "master", "git_sha": "0ce3ab0ac301f160225b22254fa238478b4389f2", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "picard/collectmultiplemetrics": { "branch": "master", "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "picard/collectwgsmetrics": { "branch": "master", "git_sha": "735e1e04e7e01751d2d6e97055bbdb6f70683cc1", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "picard/liftovervcf": { "branch": "master", "git_sha": "735e1e04e7e01751d2d6e97055bbdb6f70683cc1", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "picard/markduplicates": { "branch": "master", "git_sha": "735e1e04e7e01751d2d6e97055bbdb6f70683cc1", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "picard/renamesampleinvcf": { "branch": "master", "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "picard/sortvcf": { "branch": "master", "git_sha": "735e1e04e7e01751d2d6e97055bbdb6f70683cc1", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "qualimap/bamqc": { "branch": "master", "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "rhocall/annotate": { "branch": "master", "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "samtools/faidx": { "branch": "master", "git_sha": "bf8ff98531167f8245ba5c44ce7d781503ddf936", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "samtools/index": { "branch": "master", "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "samtools/merge": { "branch": "master", "git_sha": "0460d316170f75f323111b4a2c0a2989f0c32013", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "samtools/sort": { "branch": "master", "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "samtools/stats": { "branch": "master", "git_sha": "735e1e04e7e01751d2d6e97055bbdb6f70683cc1", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "samtools/view": { "branch": "master", "git_sha": "3ffae3598260a99e8db3207dead9f73f87f90d1f", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "smncopynumbercaller": { "branch": "master", "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "stranger": { "branch": "master", "git_sha": "0260e5d22372eae434816d6970dedf3f5adc0053", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "svdb/merge": { "branch": "master", "git_sha": "603ecbd9f45300c9788f197d2a15a005685b4220", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "svdb/query": { "branch": "master", "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "tabix/bgziptabix": { "branch": "master", "git_sha": "01b3b2509d76625b6d6cd613b349fb4777712a15", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "tabix/tabix": { "branch": "master", "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "tiddit/cov": { "branch": "master", "git_sha": "0460d316170f75f323111b4a2c0a2989f0c32013", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "tiddit/sv": { "branch": "master", "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "ucsc/wigtobigwig": { "branch": "master", "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "untar": { "branch": "master", "git_sha": "5c460c5a4736974abde2843294f35307ee2b0e5e", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "vcfanno": { "branch": "master", "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] } } } } } -} \ No newline at end of file +} From 4b6b9eac055a2d37250e12c477e8996cea4aa099 Mon Sep 17 00:00:00 2001 From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com> Date: Sat, 27 May 2023 23:06:31 +0200 Subject: [PATCH 7/8] update conf --- conf/modules/prepare_references.config | 16 +++--- .../local/mitochondria/align_and_call_MT.nf | 10 ++-- subworkflows/local/prepare_references.nf | 51 +++++++++++-------- 3 files changed, 44 insertions(+), 33 deletions(-) diff --git a/conf/modules/prepare_references.config b/conf/modules/prepare_references.config index af73549a..a825250c 100644 --- a/conf/modules/prepare_references.config +++ b/conf/modules/prepare_references.config @@ -29,16 +29,16 @@ process { ext.when = {!params.bwamem2 && params.aligner == "bwamem2"} } - withName: '.*PREPARE_REFERENCES:BWAMEM2_INDEX_SHIFT_MT' { - ext.when = {!params.mt_bwamem2_index_shift && params.mt_fasta_shift && !(params.analysis_type == "wes") && params.aligner == "bwamem2"} + withName: '.*PREPARE_REFERENCES:BWAMEM2_INDEX_MT_SHIFT' { + ext.when = { !params.skip_mt_analysis && !(params.analysis_type == "wes") && params.aligner == "bwamem2"} } withName: '.*PREPARE_REFERENCES:SENTIEON_BWAINDEX_GENOME' { ext.when = {!params.bwa && params.aligner == "sentieon"} } - withName: '.*PREPARE_REFERENCES:SENTIEON_BWAINDEX_SHIFT_MT' { - ext.when = {!params.mt_bwa_index_shift && params.mt_fasta_shift && !(params.analysis_type == "wes") && params.aligner == "sentieon"} + withName: '.*PREPARE_REFERENCES:SENTIEON_BWAINDEX_MT_SHIFT' { + ext.when = { !params.skip_mt_analysis && !(params.analysis_type == "wes") && params.aligner == "sentieon"} } withName: '.*PREPARE_REFERENCES:BWA_INDEX_GENOME' { @@ -54,8 +54,8 @@ process { ext.when = {!params.mt_fasta && !params.skip_mt_analysis} } - withName: '.*PREPARE_REFERENCES:SAMTOOLS_FAIDX_SHIFT_MT' { - ext.when = {!params.mt_fai_shift && params.mt_fasta_shift && !(params.analysis_type == "wes")} + withName: '.*PREPARE_REFERENCES:SAMTOOLS_FAIDX_MT_SHIFT' { + ext.when = { !params.skip_mt_analysis && !(params.analysis_type == "wes")} } withName: '.*PREPARE_REFERENCES:GATK_SD' { @@ -66,8 +66,8 @@ process { ext.args = { "--interval-file-name ${meta.id}_mt" } } - withName: '.*PREPARE_REFERENCES:GATK_SD_SHIFT_MT' { - ext.when = {!params.mt_sequence_dictionary_shift && params.mt_fasta_shift && !(params.analysis_type == "wes")} + withName: '.*PREPARE_REFERENCES:GATK_SD_MT_SHIFT' { + ext.when = { !params.skip_mt_analysis && !(params.analysis_type == "wes")} } withName: '.*PREPARE_REFERENCES:GET_CHROM_SIZES' { diff --git a/subworkflows/local/mitochondria/align_and_call_MT.nf b/subworkflows/local/mitochondria/align_and_call_MT.nf index 7121168c..dc629d84 100644 --- a/subworkflows/local/mitochondria/align_and_call_MT.nf +++ b/subworkflows/local/mitochondria/align_and_call_MT.nf @@ -28,14 +28,16 @@ workflow ALIGN_AND_CALL_MT { main: ch_versions = Channel.empty() - BWAMEM2_MEM_MT (ch_fastq , ch_bwamem2index, true) + BWAMEM2_MEM_MT (ch_fastq, ch_bwamem2index, true) SENTIEON_BWAMEM_MT ( ch_fastq, ch_fasta, ch_fai, ch_bwaindex ) - ch_mt_bam = Channel.empty().mix(BWAMEM2_MEM_MT.out.bam, SENTIEON_BWAMEM_MT.out.bam) - ch_fastq_ubam = ch_mt_bam.join(ch_ubam, failOnMismatch:true, failOnDuplicate:true) + Channel.empty() + .mix(BWAMEM2_MEM_MT.out.bam, SENTIEON_BWAMEM_MT.out.bam) + .join(ch_ubam, failOnMismatch:true, failOnDuplicate:true) + .set {ch_bam_ubam} - GATK4_MERGEBAMALIGNMENT_MT (ch_fastq_ubam, ch_fasta, ch_dict) + GATK4_MERGEBAMALIGNMENT_MT (ch_bam_ubam, ch_fasta, ch_dict) PICARD_ADDORREPLACEREADGROUPS_MT (GATK4_MERGEBAMALIGNMENT_MT.out.bam) diff --git a/subworkflows/local/prepare_references.nf b/subworkflows/local/prepare_references.nf index f9a8fb47..54160fdc 100644 --- a/subworkflows/local/prepare_references.nf +++ b/subworkflows/local/prepare_references.nf @@ -57,11 +57,20 @@ workflow PREPARE_REFERENCES { GATK_SHIFTFASTA(ch_mt_fasta_in, SAMTOOLS_FAIDX_MT_SHIFT.out.fai, GATK_SD_MT_SHIFT.out.dict) BWAMEM2_INDEX_MT_SHIFT(GATK_SHIFTFASTA.out.shift_fa) SENTIEON_BWAINDEX_MT_SHIFT(GATK_SHIFTFASTA.out.shift_fa) + GATK_SHIFTFASTA.out.intervals + .multiMap{ meta, files -> + shift_intervals: + ind = files.findIndexValues {it.toString().endsWith("shifted.intervals")} + files[ind] + intervals: + ind = files.findIndexValues {!(it.toString().endsWith("shifted.intervals"))} + files[ind] + } + .set {ch_shiftfasta_mtintervals} // Vcf, tab and bed indices TABIX_DBSNP(ch_known_dbsnp) ch_versions = ch_versions.mix(TABIX_DBSNP.out.versions) - TABIX_GNOMAD_AF(ch_gnomad_af_tab) TABIX_PT(ch_target_bed).tbi.set { ch_tbi } TABIX_PBT(ch_target_bed).gz_tbi.set { ch_bgzip_tbi } @@ -101,28 +110,28 @@ workflow PREPARE_REFERENCES { ch_versions = ch_versions.mix(UNTAR_VEP_CACHE.out.versions) emit: - genome_bwa_index = Channel.empty().mix(ch_bwa, ch_sentieonbwa).collect() // channel: [ val(meta), path(index) ] - genome_bwamem2_index = BWAMEM2_INDEX_GENOME.out.index.collect() // channel: [ val(meta), path(index) ] - genome_chrom_sizes = GET_CHROM_SIZES.out.sizes.collect() // channel: [ path(sizes) ] - genome_fai = ch_fai // channel: [ val(meta), path(fai) ] - genome_dict = GATK_SD.out.dict.collect() // channel: [ path(dict) ] + genome_bwa_index = Channel.empty().mix(ch_bwa, ch_sentieonbwa).collect() // channel: [ val(meta), path(index) ] + genome_bwamem2_index = BWAMEM2_INDEX_GENOME.out.index.collect() // channel: [ val(meta), path(index) ] + genome_chrom_sizes = GET_CHROM_SIZES.out.sizes.collect() // channel: [ path(sizes) ] + genome_fai = ch_fai // channel: [ val(meta), path(fai) ] + genome_dict = GATK_SD.out.dict.collect() // channel: [ path(dict) ] - mt_intervals = GATK_SHIFTFASTA.out.intervals.map{ meta, it -> it}.collect() // channel: [ path(intervals) ] - mtshift_intervals = GATK_SHIFTFASTA.out.shift_intervals.map{ meta, it -> it}.collect() // channel: [ path(intervals) ] - mtshift_backchain = GATK_SHIFTFASTA.out.shift_back_chain.collect() - mtshift_fai = GATK_SHIFTFASTA.out.shift_fai.collect() // channel: [ val(meta), path(fai) ] - mtshift_fasta = GATK_SHIFTFASTA.out.shift_fa.collect() // channel: [ val(meta), path(fai) ] - mtshift_dict = GATK_SHIFTFASTA.out.dict.collect() // channel: [ path(dict) ] - mtshift_bwa_index = SENTIEON_BWAINDEX_MT_SHIFT.out.index.collect() // channel: [ val(meta), path(index) ] - mtshift_bwamem2_index = BWAMEM2_INDEX_MT_SHIFT.out.index.collect() // channel: [ val(meta), path(index) ] + mt_intervals = ch_shiftfasta_mtintervals.intervals.collect() // channel: [ path(intervals) ] + mtshift_intervals = ch_shiftfasta_mtintervals.shift_intervals.collect() // channel: [ path(intervals) ] + mtshift_backchain = GATK_SHIFTFASTA.out.shift_back_chain.collect() // channel: [ val(meta), path(backchain) ] + mtshift_fai = GATK_SHIFTFASTA.out.shift_fai.collect() // channel: [ val(meta), path(fai) ] + mtshift_fasta = GATK_SHIFTFASTA.out.shift_fa.collect() // channel: [ val(meta), path(fai) ] + mtshift_dict = GATK_SHIFTFASTA.out.dict.collect() // channel: [ path(dict) ] + mtshift_bwa_index = SENTIEON_BWAINDEX_MT_SHIFT.out.index.collect() // channel: [ val(meta), path(index) ] + mtshift_bwamem2_index = BWAMEM2_INDEX_MT_SHIFT.out.index.collect() // channel: [ val(meta), path(index) ] - gnomad_af_idx = TABIX_GNOMAD_AF.out.tbi.collect() // channel: [ val(meta), path(fasta) ] - known_dbsnp_tbi = TABIX_DBSNP.out.tbi.collect() // channel: [ val(meta), path(fasta) ] - target_bed = Channel.empty().mix(ch_tbi, ch_bgzip_tbi).collect() // channel: [ val(meta), path(bed), path(tbi) ] - bait_intervals = CAT_CAT_BAIT.out.file_out.map{ meta, inter -> inter}.collect() // channel: [ path(intervals) ] - target_intervals = GATK_BILT.out.interval_list.map{ meta, inter -> inter}.collect() // channel: [ path(interval_list) ] - vep_resources = UNTAR_VEP_CACHE.out.untar.map{meta, files -> [files]}.collect() // channel: [ path(cache) ] - versions = ch_versions // channel: [ path(versions.yml) ] + gnomad_af_idx = TABIX_GNOMAD_AF.out.tbi.collect() // channel: [ val(meta), path(fasta) ] + known_dbsnp_tbi = TABIX_DBSNP.out.tbi.collect() // channel: [ val(meta), path(fasta) ] + target_bed = Channel.empty().mix(ch_tbi, ch_bgzip_tbi).collect() // channel: [ val(meta), path(bed), path(tbi) ] + bait_intervals = CAT_CAT_BAIT.out.file_out.map{ meta, inter -> inter}.collect() // channel: [ path(intervals) ] + target_intervals = GATK_BILT.out.interval_list.map{ meta, inter -> inter}.collect() // channel: [ path(interval_list) ] + vep_resources = UNTAR_VEP_CACHE.out.untar.map{meta, files -> [files]}.collect() // channel: [ path(cache) ] + versions = ch_versions // channel: [ path(versions.yml) ] } From 48af7d5167ac0ab8b632a59df50673e9cf167c1a Mon Sep 17 00:00:00 2001 From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com> Date: Mon, 29 May 2023 10:39:51 +0200 Subject: [PATCH 8/8] update faidx --- modules.json | 2 +- modules/nf-core/samtools/faidx/main.nf | 4 ++++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/modules.json b/modules.json index 5b778895..a061424c 100644 --- a/modules.json +++ b/modules.json @@ -267,7 +267,7 @@ }, "samtools/faidx": { "branch": "master", - "git_sha": "bf8ff98531167f8245ba5c44ce7d781503ddf936", + "git_sha": "fd742419940e01ba1c5ecb172c3e32ec840662fe", "installed_by": ["modules"] }, "samtools/index": { diff --git a/modules/nf-core/samtools/faidx/main.nf b/modules/nf-core/samtools/faidx/main.nf index c1e8ef3a..59ed3088 100644 --- a/modules/nf-core/samtools/faidx/main.nf +++ b/modules/nf-core/samtools/faidx/main.nf @@ -35,8 +35,12 @@ process SAMTOOLS_FAIDX { """ stub: + def match = (task.ext.args =~ /-o(?:utput)?\s(.*)\s?/).findAll() + def fastacmd = match[0] ? "touch ${match[0][1]}" : '' """ + ${fastacmd} touch ${fasta}.fai + cat <<-END_VERSIONS > versions.yml "${task.process}":