From f6afe61dbe88945c381a0788d1a6fb61cb356718 Mon Sep 17 00:00:00 2001
From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com>
Date: Thu, 25 May 2023 16:35:31 +0200
Subject: [PATCH 1/8] add module and remove redundant parameters
---
conf/modules/prepare_references.config | 9 +
main.nf | 9 +-
modules.json | 283 ++++++++++++++++------
modules/nf-core/gatk4/shiftfasta/main.nf | 66 +++++
modules/nf-core/gatk4/shiftfasta/meta.yml | 81 +++++++
modules/nf-core/samtools/faidx/main.nf | 12 +-
modules/nf-core/samtools/faidx/meta.yml | 14 +-
nextflow_schema.json | 58 +----
subworkflows/local/prepare_references.nf | 30 ++-
workflows/raredisease.nf | 36 ++-
10 files changed, 426 insertions(+), 172 deletions(-)
create mode 100644 modules/nf-core/gatk4/shiftfasta/main.nf
create mode 100644 modules/nf-core/gatk4/shiftfasta/meta.yml
diff --git a/conf/modules/prepare_references.config b/conf/modules/prepare_references.config
index 8504a34c..af73549a 100644
--- a/conf/modules/prepare_references.config
+++ b/conf/modules/prepare_references.config
@@ -49,6 +49,11 @@ process {
ext.when = {!params.fai}
}
+ withName: '.*PREPARE_REFERENCES:SAMTOOLS_EXTRACT_MT' {
+ ext.args = { " ${params.mito_name} -o ${meta.id}_mt.fa" }
+ ext.when = {!params.mt_fasta && !params.skip_mt_analysis}
+ }
+
withName: '.*PREPARE_REFERENCES:SAMTOOLS_FAIDX_SHIFT_MT' {
ext.when = {!params.mt_fai_shift && params.mt_fasta_shift && !(params.analysis_type == "wes")}
}
@@ -57,6 +62,10 @@ process {
ext.when = {!params.sequence_dictionary}
}
+ withName: '.*PREPARE_REFERENCES:GATK_SHIFTFASTA' {
+ ext.args = { "--interval-file-name ${meta.id}_mt" }
+ }
+
withName: '.*PREPARE_REFERENCES:GATK_SD_SHIFT_MT' {
ext.when = {!params.mt_sequence_dictionary_shift && params.mt_fasta_shift && !(params.analysis_type == "wes")}
}
diff --git a/main.nf b/main.nf
index 7e0ceb8a..fe93d397 100644
--- a/main.nf
+++ b/main.nf
@@ -32,14 +32,7 @@ params.known_dbsnp_tbi = WorkflowMain.getGenomeAttribute(params,
params.known_indels = WorkflowMain.getGenomeAttribute(params, 'known_indels')
params.known_mills = WorkflowMain.getGenomeAttribute(params, 'known_mills')
params.ml_model = WorkflowMain.getGenomeAttribute(params, 'ml_model')
-params.mt_backchain_shift = WorkflowMain.getGenomeAttribute(params, 'mt_backchain_shift')
-params.mt_bwa_index_shift = WorkflowMain.getGenomeAttribute(params, 'mt_bwa_index_shift')
-params.mt_bwamem2_index_shift = WorkflowMain.getGenomeAttribute(params, 'mt_bwamem2_index_shift')
-params.mt_fasta_shift = WorkflowMain.getGenomeAttribute(params, 'mt_fasta_shift')
-params.mt_fai_shift = WorkflowMain.getGenomeAttribute(params, 'mt_fai_shift')
-params.mt_intervals = WorkflowMain.getGenomeAttribute(params, 'mt_intervals')
-params.mt_intervals_shift = WorkflowMain.getGenomeAttribute(params, 'mt_intervals_shift')
-params.mt_sequence_dictionary_shift = WorkflowMain.getGenomeAttribute(params, 'mt_sequence_dictionary_shift')
+params.mt_fasta = WorkflowMain.getGenomeAttribute(params, 'mt_fasta')
params.reduced_penetrance = WorkflowMain.getGenomeAttribute(params, 'reduced_penetrance')
params.sequence_dictionary = WorkflowMain.getGenomeAttribute(params, 'sequence_dictionary')
params.score_config_snv = WorkflowMain.getGenomeAttribute(params, 'score_config_snv')
diff --git a/modules.json b/modules.json
index 6b0ebcbc..0622ed2e 100644
--- a/modules.json
+++ b/modules.json
@@ -8,345 +8,488 @@
"bcftools/annotate": {
"branch": "master",
"git_sha": "911696ea0b62df80e900ef244d7867d177971f73",
- "installed_by": ["modules"]
+ "installed_by": [
+ "modules"
+ ]
},
"bcftools/concat": {
"branch": "master",
"git_sha": "911696ea0b62df80e900ef244d7867d177971f73",
- "installed_by": ["modules"]
+ "installed_by": [
+ "modules"
+ ]
},
"bcftools/filter": {
"branch": "master",
"git_sha": "911696ea0b62df80e900ef244d7867d177971f73",
- "installed_by": ["modules"]
+ "installed_by": [
+ "modules"
+ ]
},
"bcftools/merge": {
"branch": "master",
"git_sha": "0435e4eebc94e53721c194b2d5d06f455a79e407",
- "installed_by": ["modules"]
+ "installed_by": [
+ "modules"
+ ]
},
"bcftools/norm": {
"branch": "master",
"git_sha": "0435e4eebc94e53721c194b2d5d06f455a79e407",
- "installed_by": ["modules"]
+ "installed_by": [
+ "modules"
+ ]
},
"bcftools/reheader": {
"branch": "master",
"git_sha": "0435e4eebc94e53721c194b2d5d06f455a79e407",
- "installed_by": ["modules"]
+ "installed_by": [
+ "modules"
+ ]
},
"bcftools/roh": {
"branch": "master",
"git_sha": "911696ea0b62df80e900ef244d7867d177971f73",
- "installed_by": ["modules"]
+ "installed_by": [
+ "modules"
+ ]
},
"bcftools/view": {
"branch": "master",
"git_sha": "911696ea0b62df80e900ef244d7867d177971f73",
- "installed_by": ["modules"]
+ "installed_by": [
+ "modules"
+ ]
},
"bwa/index": {
"branch": "master",
"git_sha": "911696ea0b62df80e900ef244d7867d177971f73",
- "installed_by": ["modules"]
+ "installed_by": [
+ "modules"
+ ]
},
"bwamem2/index": {
"branch": "master",
"git_sha": "911696ea0b62df80e900ef244d7867d177971f73",
- "installed_by": ["modules"]
+ "installed_by": [
+ "modules"
+ ]
},
"bwamem2/mem": {
"branch": "master",
"git_sha": "0460d316170f75f323111b4a2c0a2989f0c32013",
- "installed_by": ["modules"]
+ "installed_by": [
+ "modules"
+ ]
},
"cadd": {
"branch": "master",
"git_sha": "603ecbd9f45300c9788f197d2a15a005685b4220",
- "installed_by": ["modules"]
+ "installed_by": [
+ "modules"
+ ]
},
"cat/cat": {
"branch": "master",
"git_sha": "911696ea0b62df80e900ef244d7867d177971f73",
- "installed_by": ["modules"]
+ "installed_by": [
+ "modules"
+ ]
},
"custom/dumpsoftwareversions": {
"branch": "master",
"git_sha": "911696ea0b62df80e900ef244d7867d177971f73",
- "installed_by": ["modules"]
+ "installed_by": [
+ "modules"
+ ]
},
"deepvariant": {
"branch": "master",
"git_sha": "0460d316170f75f323111b4a2c0a2989f0c32013",
- "installed_by": ["modules"]
+ "installed_by": [
+ "modules"
+ ]
},
"expansionhunter": {
"branch": "master",
"git_sha": "0260e5d22372eae434816d6970dedf3f5adc0053",
- "installed_by": ["modules"]
+ "installed_by": [
+ "modules"
+ ]
},
"fastqc": {
"branch": "master",
"git_sha": "911696ea0b62df80e900ef244d7867d177971f73",
- "installed_by": ["modules"]
+ "installed_by": [
+ "modules"
+ ]
},
"gatk4/bedtointervallist": {
"branch": "master",
"git_sha": "2df2a11d5b12f2a73bca74f103691bc35d83c5fd",
- "installed_by": ["modules"]
+ "installed_by": [
+ "modules"
+ ]
},
"gatk4/createsequencedictionary": {
"branch": "master",
"git_sha": "2df2a11d5b12f2a73bca74f103691bc35d83c5fd",
- "installed_by": ["modules"]
+ "installed_by": [
+ "modules"
+ ]
},
"gatk4/filtermutectcalls": {
"branch": "master",
"git_sha": "2df2a11d5b12f2a73bca74f103691bc35d83c5fd",
- "installed_by": ["modules"]
+ "installed_by": [
+ "modules"
+ ]
},
"gatk4/intervallisttools": {
"branch": "master",
"git_sha": "911696ea0b62df80e900ef244d7867d177971f73",
- "installed_by": ["modules"]
+ "installed_by": [
+ "modules"
+ ]
},
"gatk4/mergebamalignment": {
"branch": "master",
"git_sha": "2df2a11d5b12f2a73bca74f103691bc35d83c5fd",
- "installed_by": ["modules"]
+ "installed_by": [
+ "modules"
+ ]
},
"gatk4/mergevcfs": {
"branch": "master",
"git_sha": "911696ea0b62df80e900ef244d7867d177971f73",
- "installed_by": ["modules"]
+ "installed_by": [
+ "modules"
+ ]
},
"gatk4/mutect2": {
"branch": "master",
"git_sha": "2df2a11d5b12f2a73bca74f103691bc35d83c5fd",
- "installed_by": ["modules"]
+ "installed_by": [
+ "modules"
+ ]
},
"gatk4/printreads": {
"branch": "master",
"git_sha": "2df2a11d5b12f2a73bca74f103691bc35d83c5fd",
- "installed_by": ["modules"]
+ "installed_by": [
+ "modules"
+ ]
},
"gatk4/revertsam": {
"branch": "master",
"git_sha": "911696ea0b62df80e900ef244d7867d177971f73",
- "installed_by": ["modules"]
+ "installed_by": [
+ "modules"
+ ]
},
"gatk4/samtofastq": {
"branch": "master",
"git_sha": "911696ea0b62df80e900ef244d7867d177971f73",
- "installed_by": ["modules"]
+ "installed_by": [
+ "modules"
+ ]
},
"gatk4/selectvariants": {
"branch": "master",
"git_sha": "911696ea0b62df80e900ef244d7867d177971f73",
- "installed_by": ["modules"]
+ "installed_by": [
+ "modules"
+ ]
+ },
+ "gatk4/shiftfasta": {
+ "branch": "master",
+ "git_sha": "0460d316170f75f323111b4a2c0a2989f0c32013",
+ "installed_by": [
+ "modules"
+ ]
},
"gatk4/splitintervals": {
"branch": "master",
"git_sha": "2df2a11d5b12f2a73bca74f103691bc35d83c5fd",
- "installed_by": ["modules"]
+ "installed_by": [
+ "modules"
+ ]
},
"gatk4/variantfiltration": {
"branch": "master",
"git_sha": "2df2a11d5b12f2a73bca74f103691bc35d83c5fd",
- "installed_by": ["modules"]
+ "installed_by": [
+ "modules"
+ ]
},
"genmod/annotate": {
"branch": "master",
"git_sha": "911696ea0b62df80e900ef244d7867d177971f73",
- "installed_by": ["modules"]
+ "installed_by": [
+ "modules"
+ ]
},
"genmod/compound": {
"branch": "master",
"git_sha": "911696ea0b62df80e900ef244d7867d177971f73",
- "installed_by": ["modules"]
+ "installed_by": [
+ "modules"
+ ]
},
"genmod/models": {
"branch": "master",
"git_sha": "911696ea0b62df80e900ef244d7867d177971f73",
- "installed_by": ["modules"]
+ "installed_by": [
+ "modules"
+ ]
},
"genmod/score": {
"branch": "master",
"git_sha": "911696ea0b62df80e900ef244d7867d177971f73",
- "installed_by": ["modules"]
+ "installed_by": [
+ "modules"
+ ]
},
"glnexus": {
"branch": "master",
"git_sha": "911696ea0b62df80e900ef244d7867d177971f73",
- "installed_by": ["modules"]
+ "installed_by": [
+ "modules"
+ ]
},
"haplocheck": {
"branch": "master",
"git_sha": "911696ea0b62df80e900ef244d7867d177971f73",
- "installed_by": ["modules"]
+ "installed_by": [
+ "modules"
+ ]
},
"haplogrep2/classify": {
"branch": "master",
"git_sha": "911696ea0b62df80e900ef244d7867d177971f73",
- "installed_by": ["modules"]
+ "installed_by": [
+ "modules"
+ ]
},
"manta/germline": {
"branch": "master",
"git_sha": "0460d316170f75f323111b4a2c0a2989f0c32013",
- "installed_by": ["modules"]
+ "installed_by": [
+ "modules"
+ ]
},
"mosdepth": {
"branch": "master",
"git_sha": "911696ea0b62df80e900ef244d7867d177971f73",
- "installed_by": ["modules"]
+ "installed_by": [
+ "modules"
+ ]
},
"multiqc": {
"branch": "master",
"git_sha": "911696ea0b62df80e900ef244d7867d177971f73",
- "installed_by": ["modules"]
+ "installed_by": [
+ "modules"
+ ]
},
"peddy": {
"branch": "master",
"git_sha": "911696ea0b62df80e900ef244d7867d177971f73",
- "installed_by": ["modules"]
+ "installed_by": [
+ "modules"
+ ]
},
"picard/addorreplacereadgroups": {
"branch": "master",
"git_sha": "911696ea0b62df80e900ef244d7867d177971f73",
- "installed_by": ["modules"]
+ "installed_by": [
+ "modules"
+ ]
},
"picard/collecthsmetrics": {
"branch": "master",
"git_sha": "0ce3ab0ac301f160225b22254fa238478b4389f2",
- "installed_by": ["modules"]
+ "installed_by": [
+ "modules"
+ ]
},
"picard/collectmultiplemetrics": {
"branch": "master",
"git_sha": "911696ea0b62df80e900ef244d7867d177971f73",
- "installed_by": ["modules"]
+ "installed_by": [
+ "modules"
+ ]
},
"picard/collectwgsmetrics": {
"branch": "master",
"git_sha": "735e1e04e7e01751d2d6e97055bbdb6f70683cc1",
- "installed_by": ["modules"]
+ "installed_by": [
+ "modules"
+ ]
},
"picard/liftovervcf": {
"branch": "master",
"git_sha": "735e1e04e7e01751d2d6e97055bbdb6f70683cc1",
- "installed_by": ["modules"]
+ "installed_by": [
+ "modules"
+ ]
},
"picard/markduplicates": {
"branch": "master",
"git_sha": "735e1e04e7e01751d2d6e97055bbdb6f70683cc1",
- "installed_by": ["modules"]
+ "installed_by": [
+ "modules"
+ ]
},
"picard/renamesampleinvcf": {
"branch": "master",
"git_sha": "911696ea0b62df80e900ef244d7867d177971f73",
- "installed_by": ["modules"]
+ "installed_by": [
+ "modules"
+ ]
},
"picard/sortvcf": {
"branch": "master",
"git_sha": "735e1e04e7e01751d2d6e97055bbdb6f70683cc1",
- "installed_by": ["modules"]
+ "installed_by": [
+ "modules"
+ ]
},
"qualimap/bamqc": {
"branch": "master",
"git_sha": "911696ea0b62df80e900ef244d7867d177971f73",
- "installed_by": ["modules"]
+ "installed_by": [
+ "modules"
+ ]
},
"rhocall/annotate": {
"branch": "master",
"git_sha": "911696ea0b62df80e900ef244d7867d177971f73",
- "installed_by": ["modules"]
+ "installed_by": [
+ "modules"
+ ]
},
"samtools/faidx": {
"branch": "master",
- "git_sha": "911696ea0b62df80e900ef244d7867d177971f73",
- "installed_by": ["modules"]
+ "git_sha": "bf8ff98531167f8245ba5c44ce7d781503ddf936",
+ "installed_by": [
+ "modules"
+ ]
},
"samtools/index": {
"branch": "master",
"git_sha": "911696ea0b62df80e900ef244d7867d177971f73",
- "installed_by": ["modules"]
+ "installed_by": [
+ "modules"
+ ]
},
"samtools/merge": {
"branch": "master",
"git_sha": "0460d316170f75f323111b4a2c0a2989f0c32013",
- "installed_by": ["modules"]
+ "installed_by": [
+ "modules"
+ ]
},
"samtools/sort": {
"branch": "master",
"git_sha": "911696ea0b62df80e900ef244d7867d177971f73",
- "installed_by": ["modules"]
+ "installed_by": [
+ "modules"
+ ]
},
"samtools/stats": {
"branch": "master",
"git_sha": "735e1e04e7e01751d2d6e97055bbdb6f70683cc1",
- "installed_by": ["modules"]
+ "installed_by": [
+ "modules"
+ ]
},
"samtools/view": {
"branch": "master",
"git_sha": "3ffae3598260a99e8db3207dead9f73f87f90d1f",
- "installed_by": ["modules"]
+ "installed_by": [
+ "modules"
+ ]
},
"smncopynumbercaller": {
"branch": "master",
"git_sha": "911696ea0b62df80e900ef244d7867d177971f73",
- "installed_by": ["modules"]
+ "installed_by": [
+ "modules"
+ ]
},
"stranger": {
"branch": "master",
"git_sha": "0260e5d22372eae434816d6970dedf3f5adc0053",
- "installed_by": ["modules"]
+ "installed_by": [
+ "modules"
+ ]
},
"svdb/merge": {
"branch": "master",
"git_sha": "603ecbd9f45300c9788f197d2a15a005685b4220",
- "installed_by": ["modules"]
+ "installed_by": [
+ "modules"
+ ]
},
"svdb/query": {
"branch": "master",
"git_sha": "911696ea0b62df80e900ef244d7867d177971f73",
- "installed_by": ["modules"]
+ "installed_by": [
+ "modules"
+ ]
},
"tabix/bgziptabix": {
"branch": "master",
"git_sha": "01b3b2509d76625b6d6cd613b349fb4777712a15",
- "installed_by": ["modules"]
+ "installed_by": [
+ "modules"
+ ]
},
"tabix/tabix": {
"branch": "master",
"git_sha": "911696ea0b62df80e900ef244d7867d177971f73",
- "installed_by": ["modules"]
+ "installed_by": [
+ "modules"
+ ]
},
"tiddit/cov": {
"branch": "master",
"git_sha": "0460d316170f75f323111b4a2c0a2989f0c32013",
- "installed_by": ["modules"]
+ "installed_by": [
+ "modules"
+ ]
},
"tiddit/sv": {
"branch": "master",
"git_sha": "911696ea0b62df80e900ef244d7867d177971f73",
- "installed_by": ["modules"]
+ "installed_by": [
+ "modules"
+ ]
},
"ucsc/wigtobigwig": {
"branch": "master",
"git_sha": "911696ea0b62df80e900ef244d7867d177971f73",
- "installed_by": ["modules"]
+ "installed_by": [
+ "modules"
+ ]
},
"untar": {
"branch": "master",
"git_sha": "5c460c5a4736974abde2843294f35307ee2b0e5e",
- "installed_by": ["modules"]
+ "installed_by": [
+ "modules"
+ ]
},
"vcfanno": {
"branch": "master",
"git_sha": "911696ea0b62df80e900ef244d7867d177971f73",
- "installed_by": ["modules"]
+ "installed_by": [
+ "modules"
+ ]
}
}
}
}
}
-}
+}
\ No newline at end of file
diff --git a/modules/nf-core/gatk4/shiftfasta/main.nf b/modules/nf-core/gatk4/shiftfasta/main.nf
new file mode 100644
index 00000000..cf984e8b
--- /dev/null
+++ b/modules/nf-core/gatk4/shiftfasta/main.nf
@@ -0,0 +1,66 @@
+process GATK4_SHIFTFASTA {
+ tag "$meta.id"
+ label 'process_single'
+
+ conda "bioconda::gatk4=4.4.0.0"
+ container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+ 'https://depot.galaxyproject.org/singularity/gatk4:4.4.0.0--py36hdfd78af_0':
+ 'biocontainers/gatk4:4.4.0.0--py36hdfd78af_0' }"
+
+ input:
+ tuple val(meta), path(fasta)
+ tuple val(meta2), path(fasta_fai)
+ tuple val(meta3), path(dict)
+
+ output:
+ tuple val(meta), path("*_shift.fasta") , emit: shift_fa
+ tuple val(meta), path("*_shift.fasta.fai") , emit: shift_fai
+ tuple val(meta), path("*_shift.back_chain") , emit: shift_back_chain
+ tuple val(meta), path("*_shift.dict") , emit: dict , optional: true
+ tuple val(meta), path("*.intervals") , emit: intervals , optional: true
+ tuple val(meta), path("*.shifted.intervals") , emit: shift_intervals , optional: true
+ path "versions.yml" , emit: versions
+
+ when:
+ task.ext.when == null || task.ext.when
+
+ script:
+ def args = task.ext.args ?: ''
+ def prefix = task.ext.prefix ?: "${meta.id}"
+ def seq_dict = dict ? "--sequence-dictionary ${dict}" : ""
+ def avail_mem = 3072
+ if (!task.memory) {
+ log.info '[GATK ShiftFasta] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.'
+ } else {
+ avail_mem = (task.memory.mega*0.8).intValue()
+ }
+ """
+ gatk --java-options "-Xmx${avail_mem}M" ShiftFasta \\
+ --reference $fasta \\
+ --output ${prefix}_shift.fasta \\
+ --shift-back-output ${prefix}_shift.back_chain \\
+ $args \\
+ $seq_dict \\
+ --tmp-dir .
+
+ cat <<-END_VERSIONS > versions.yml
+ "${task.process}":
+ gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//')
+ END_VERSIONS
+ """
+
+ stub:
+ """
+ touch test.intervals
+ touch test_shift.back_chain
+ touch test_shift.dict
+ touch test.shifted.intervals
+ touch test_shift.fasta
+ touch test_shift.fasta.fai
+
+ cat <<-END_VERSIONS > versions.yml
+ "${task.process}":
+ gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//')
+ END_VERSIONS
+ """
+}
diff --git a/modules/nf-core/gatk4/shiftfasta/meta.yml b/modules/nf-core/gatk4/shiftfasta/meta.yml
new file mode 100644
index 00000000..6d563ded
--- /dev/null
+++ b/modules/nf-core/gatk4/shiftfasta/meta.yml
@@ -0,0 +1,81 @@
+name: "gatk4_shiftfasta"
+description: Create a fasta with the bases shifted by offset
+keywords:
+ - mitochondria
+ - shiftfasta
+ - shiftchain
+ - shiftintervals
+tools:
+ - gatk4:
+ description: |
+ Developed in the Data Sciences Platform at the Broad Institute, the toolkit offers a wide variety of tools
+ with a primary focus on variant discovery and genotyping. Its powerful processing engine
+ and high-performance computing features make it capable of taking on projects of any size.
+ homepage: https://gatk.broadinstitute.org/hc/en-us
+ documentation: https://gatk.broadinstitute.org/hc/en-us/categories/360002369672s
+ tool_dev_url: "https://github.com/broadinstitute/gatk"
+ doi: 10.1158/1538-7445.AM2017-3590
+ licence: ["Apache-2.0"]
+
+input:
+ - meta:
+ type: map
+ description: |
+ Groovy Map containing reference information
+ e.g. [ id:'test' ]
+ - fasta:
+ type: file
+ description: fasta file
+ pattern: "*.{fa,fasta}"
+ - meta2:
+ type: map
+ description: |
+ Groovy Map containing reference information
+ e.g. [ id:'test' ]
+ - fasta_fai:
+ type: file
+ description: index for fasta file
+ pattern: "*.{fai}"
+ - meta3:
+ type: map
+ description: |
+ Groovy Map containing reference information
+ e.g. [ id:'test' ]
+ - dict:
+ type: file
+ description: sequence dictionary file
+ pattern: "*.{dict}"
+
+output:
+ - meta:
+ type: map
+ description: |
+ Groovy Map containing fasta information
+ e.g. [ id:'test' ]
+ - versions:
+ type: file
+ description: File containing software versions
+ pattern: "versions.yml"
+ - dict:
+ type: file
+ description: sequence dictionary file
+ pattern: "*.{dict}"
+ - intervals:
+ type: file
+ description: Intervals file for the fasta file
+ pattern: "*.{intervals}"
+ - shift_back_chain:
+ type: file
+ description: The shiftback chain file to use when lifting over
+ pattern: "*.{back_chain}"
+ - shift_fa:
+ type: file
+ description: Shifted fasta file
+ pattern: "*.{fa,fasta}"
+ - shift_intervals:
+ type: file
+ description: Intervals file for the shifted fasta file
+ pattern: "*.{shifted.intervals}"
+
+authors:
+ - "@ramprasadn"
diff --git a/modules/nf-core/samtools/faidx/main.nf b/modules/nf-core/samtools/faidx/main.nf
index 4dd0e5b0..c1e8ef3a 100644
--- a/modules/nf-core/samtools/faidx/main.nf
+++ b/modules/nf-core/samtools/faidx/main.nf
@@ -9,11 +9,13 @@ process SAMTOOLS_FAIDX {
input:
tuple val(meta), path(fasta)
+ tuple val(meta2), path(fai)
output:
- tuple val(meta), path ("*.fai"), emit: fai
- tuple val(meta), path ("*.gzi"), emit: gzi, optional: true
- path "versions.yml" , emit: versions
+ tuple val(meta), path ("*.{fa,fasta}") , emit: fa , optional: true
+ tuple val(meta), path ("*.fai") , emit: fai, optional: true
+ tuple val(meta), path ("*.gzi") , emit: gzi, optional: true
+ path "versions.yml" , emit: versions
when:
task.ext.when == null || task.ext.when
@@ -23,8 +25,8 @@ process SAMTOOLS_FAIDX {
"""
samtools \\
faidx \\
- $args \\
- $fasta
+ $fasta \\
+ $args
cat <<-END_VERSIONS > versions.yml
"${task.process}":
diff --git a/modules/nf-core/samtools/faidx/meta.yml b/modules/nf-core/samtools/faidx/meta.yml
index fe2fe9a1..957b25e5 100644
--- a/modules/nf-core/samtools/faidx/meta.yml
+++ b/modules/nf-core/samtools/faidx/meta.yml
@@ -3,6 +3,7 @@ description: Index FASTA file
keywords:
- index
- fasta
+ - faidx
tools:
- samtools:
description: |
@@ -17,12 +18,21 @@ input:
- meta:
type: map
description: |
- Groovy Map containing sample information
- e.g. [ id:'test', single_end:false ]
+ Groovy Map containing reference information
+ e.g. [ id:'test' ]
- fasta:
type: file
description: FASTA file
pattern: "*.{fa,fasta}"
+ - meta2:
+ type: map
+ description: |
+ Groovy Map containing reference information
+ e.g. [ id:'test' ]
+ - fai:
+ type: file
+ description: FASTA index file
+ pattern: "*.{fai}"
output:
- meta:
type: map
diff --git a/nextflow_schema.json b/nextflow_schema.json
index 842d6375..44810f84 100644
--- a/nextflow_schema.json
+++ b/nextflow_schema.json
@@ -156,22 +156,6 @@
"description": "Name of the mitochondrial contig in the reference fasta file",
"help_text": "Used to extract relevant information from the references to analyse mitochondria"
},
- "mt_intervals": {
- "type": "string",
- "format": "path",
- "fa_icon": "fas fa-file",
- "pattern": "^\\S+\\.intervals?(_list)?$",
- "description": "Path to the interval list of the non control mitochondral region.",
- "help_text": "Path to the interval list of the non control mitochondral regions for Mutect2"
- },
- "mt_intervals_shift": {
- "type": "string",
- "format": "path",
- "fa_icon": "fas fa-file",
- "pattern": "^\\S+\\.intervals?(\\_list)?$",
- "description": "Path to the interval list of the non control mitochondral region in shifted fasta.",
- "help_text": "Path to the interval list of the non control mitochondral regions in shifted fasta for Mutect2"
- },
"known_dbsnp": {
"type": "string",
"format": "path",
@@ -211,7 +195,7 @@
"description": "Path to sentieon machine learning model file.",
"hidden": true
},
- "mt_fasta_shift": {
+ "mt_fasta": {
"type": "string",
"format": "file-path",
"mimetype": "text/plain",
@@ -219,38 +203,6 @@
"description": "Path to mitochondrial FASTA genome file.",
"fa_icon": "far fa-file-code"
},
- "mt_fai_shift": {
- "type": "string",
- "format": "file-path",
- "mimetype": "text/plain",
- "pattern": "^\\S+\\.fn?a(sta)?\\.fai?$",
- "description": "Path to mitochondrial FASTA genome index file.",
- "fa_icon": "far fa-file-code"
- },
- "mt_bwa_index_shift": {
- "type": "string",
- "format": "directory-path",
- "description": "Directory for pre-built bwa index for shifted mitochondrial fasta (used for alignment with sentioen)",
- "help_text": "If none provided, will be generated automatically from the FASTA reference.",
- "fa_icon": "fas fa-folder-open",
- "hidden": true
- },
- "mt_bwamem2_index_shift": {
- "type": "string",
- "format": "directory-path",
- "description": "Directory for pre-built bwamem2 index for shifted mitochondrial fasta.",
- "help_text": "If none provided, will be generated automatically from the FASTA reference.",
- "fa_icon": "fas fa-folder-open",
- "hidden": true
- },
- "mt_backchain_shift": {
- "type": "string",
- "format": "directory-path",
- "description": "Chain file describing the alignment between the mitochondrial shifted fasta and typical mitochondrial fasta",
- "help_text": "For more information, check https://genome.ucsc.edu/goldenPath/help/chain.html",
- "fa_icon": "fas fa-folder-open",
- "hidden": true
- },
"reduced_penetrance": {
"type": "string",
"format": "path",
@@ -280,14 +232,6 @@
"description": "Genome dictionary file",
"hidden": true
},
- "mt_sequence_dictionary_shift": {
- "type": "string",
- "format": "path",
- "fa_icon": "fas fa-chart-bar",
- "pattern": "^\\S+\\.dict$",
- "description": "Shifted mitochondrial genome dictionary file",
- "hidden": true
- },
"vep_filters": {
"type": "string",
"format": "path",
diff --git a/subworkflows/local/prepare_references.nf b/subworkflows/local/prepare_references.nf
index 0138cbaa..0ac46211 100644
--- a/subworkflows/local/prepare_references.nf
+++ b/subworkflows/local/prepare_references.nf
@@ -10,9 +10,12 @@ include { GATK4_BEDTOINTERVALLIST as GATK_BILT } from '../../modul
include { GATK4_CREATESEQUENCEDICTIONARY as GATK_SD } from '../../modules/nf-core/gatk4/createsequencedictionary/main'
include { GATK4_CREATESEQUENCEDICTIONARY as GATK_SD_SHIFT_MT } from '../../modules/nf-core/gatk4/createsequencedictionary/main'
include { GATK4_INTERVALLISTTOOLS as GATK_ILT } from '../../modules/nf-core/gatk4/intervallisttools/main'
+include { GATK4_SHIFTFASTA as GATK_SHIFTFASTA } from '../../modules/nf-core/gatk4/shiftfasta/main'
include { GET_CHROM_SIZES } from '../../modules/local/get_chrom_sizes'
+include { SAMTOOLS_FAIDX as SAMTOOLS_EXTRACT_MT } from '../../modules/nf-core/samtools/faidx/main'
include { SAMTOOLS_FAIDX as SAMTOOLS_FAIDX_GENOME } from '../../modules/nf-core/samtools/faidx/main'
-include { SAMTOOLS_FAIDX as SAMTOOLS_FAIDX_SHIFT_MT } from '../../modules/nf-core/samtools/faidx/main'
+include { SAMTOOLS_FAIDX as SAMTOOLS_FAIDX_MT } from '../../modules/nf-core/samtools/faidx/main'
+include { SAMTOOLS_FAIDX as SAMTOOLS_FAIDX_MT_SHIFT } from '../../modules/nf-core/samtools/faidx/main'
include { SENTIEON_BWAINDEX as SENTIEON_BWAINDEX_GENOME } from '../../modules/local/sentieon/bwamemindex'
include { SENTIEON_BWAINDEX as SENTIEON_BWAINDEX_SHIFT_MT } from '../../modules/local/sentieon/bwamemindex'
include { TABIX_BGZIPTABIX as TABIX_PBT } from '../../modules/nf-core/tabix/bgziptabix/main'
@@ -24,7 +27,8 @@ include { UNTAR as UNTAR_VEP_CACHE } from '../../modul
workflow PREPARE_REFERENCES {
take:
ch_fasta // channel: [mandatory] [ val(meta), path(fasta) ]
- ch_fasta_mt // channel: [mandatory for dedicated mt analysis] [ val(meta), path(fasta) ]
+ ch_genome_fai // channel: [mandatory] [ val(meta), path(fai) ]
+ ch_fasta_mt // channel: [optional for dedicated mt analysis] [ val(meta), path(fasta) ]
ch_gnomad_af_tab // channel: [optional; used in for snv annotation] [ val(meta), path(tab) ]
ch_known_dbsnp // channel: [optional; used only by sentieon] [ val(meta), path(vcf) ]
ch_target_bed // channel: [mandatory for WES] [ path(bed) ]
@@ -40,15 +44,21 @@ workflow PREPARE_REFERENCES {
// Genome indices
BWA_INDEX_GENOME(ch_fasta).index.set{ch_bwa}
BWAMEM2_INDEX_GENOME(ch_fasta)
- BWAMEM2_INDEX_SHIFT_MT(ch_fasta_mt)
SENTIEON_BWAINDEX_GENOME(ch_fasta).index.set{ch_sentieonbwa}
- SENTIEON_BWAINDEX_SHIFT_MT(ch_fasta_mt)
- SAMTOOLS_FAIDX_GENOME(ch_fasta)
- SAMTOOLS_FAIDX_SHIFT_MT(ch_fasta_mt)
+ SAMTOOLS_FAIDX_GENOME(ch_fasta, [[],[]])
GATK_SD(ch_fasta)
- GATK_SD_SHIFT_MT(ch_fasta_mt)
GET_CHROM_SIZES( SAMTOOLS_FAIDX_GENOME.out.fai )
+ // MT indices
+ BWAMEM2_INDEX_SHIFT_MT(ch_fasta_mt)
+ SENTIEON_BWAINDEX_SHIFT_MT(ch_fasta_mt)
+ ch_fai = Channel.empty().mix(ch_genome_fai, SAMTOOLS_FAIDX_GENOME.out.fai).collect()
+ SAMTOOLS_EXTRACT_MT(ch_fasta, ch_fai)
+ ch_mt_fasta = Channel.empty().mix(ch_genome_fai, SAMTOOLS_FAIDX_GENOME.out.fai).collect()
+ GATK_SD_SHIFT_MT(SAMTOOLS_EXTRACT_MT.out.fa)
+ SAMTOOLS_FAIDX_MT_SHIFT(SAMTOOLS_EXTRACT_MT.out.fa, [[],[]])
+ GATK_SHIFTFASTA(SAMTOOLS_EXTRACT_MT.out.fa,SAMTOOLS_FAIDX_MT_SHIFT.out.fai, GATK_SD_SHIFT_MT.out.dict)
+
// Vcf, tab and bed indices
TABIX_DBSNP(ch_known_dbsnp)
TABIX_GNOMAD_AF(ch_gnomad_af_tab)
@@ -75,7 +85,7 @@ workflow PREPARE_REFERENCES {
ch_versions = ch_versions.mix(SENTIEON_BWAINDEX_GENOME.out.versions)
ch_versions = ch_versions.mix(SENTIEON_BWAINDEX_SHIFT_MT.out.versions)
ch_versions = ch_versions.mix(SAMTOOLS_FAIDX_GENOME.out.versions)
- ch_versions = ch_versions.mix(SAMTOOLS_FAIDX_SHIFT_MT.out.versions)
+ ch_versions = ch_versions.mix(SAMTOOLS_FAIDX_MT_SHIFT.out.versions)
ch_versions = ch_versions.mix(GATK_SD.out.versions)
ch_versions = ch_versions.mix(GATK_SD_SHIFT_MT.out.versions)
ch_versions = ch_versions.mix(GET_CHROM_SIZES.out.versions)
@@ -94,8 +104,8 @@ workflow PREPARE_REFERENCES {
bwamem2_index = BWAMEM2_INDEX_GENOME.out.index.collect() // channel: [ val(meta), path(index) ]
bwamem2_index_mt_shift = BWAMEM2_INDEX_SHIFT_MT.out.index.collect() // channel: [ val(meta), path(index) ]
chrom_sizes = GET_CHROM_SIZES.out.sizes.collect() // channel: [ path(sizes) ]
- fai = SAMTOOLS_FAIDX_GENOME.out.fai.collect() // channel: [ val(meta), path(fai) ]
- fai_mt_shift = SAMTOOLS_FAIDX_SHIFT_MT.out.fai.collect() // channel: [ val(meta), path(fai) ]
+ fai = ch_fai // channel: [ val(meta), path(fai) ]
+ fai_mt_shift = SAMTOOLS_FAIDX_MT_SHIFT.out.fai.collect() // channel: [ val(meta), path(fai) ]
gnomad_af_idx = TABIX_GNOMAD_AF.out.tbi.collect() // channel: [ val(meta), path(fasta) ]
known_dbsnp_tbi = TABIX_DBSNP.out.tbi.collect() // channel: [ val(meta), path(fasta) ]
sequence_dict = GATK_SD.out.dict.collect() // channel: [ path(dict) ]
diff --git a/workflows/raredisease.nf b/workflows/raredisease.nf
index 8555e1f4..61aa8a29 100644
--- a/workflows/raredisease.nf
+++ b/workflows/raredisease.nf
@@ -30,14 +30,7 @@ def checkPathParamList = [
params.known_indels,
params.known_mills,
params.ml_model,
- params.mt_backchain_shift,
- params.mt_bwa_index_shift,
- params.mt_bwamem2_index_shift,
- params.mt_fasta_shift,
- params.mt_fai_shift,
- params.mt_intervals,
- params.mt_intervals_shift,
- params.mt_sequence_dictionary_shift,
+ params.mt_fasta,
params.multiqc_config,
params.reduced_penetrance,
params.score_config_snv,
@@ -82,8 +75,7 @@ if (!params.skip_sv_annotation) {
}
if (!params.skip_mt_analysis) {
- mandatoryParams += ["genome", "mt_backchain_shift", "mito_name", "mt_fasta_shift", "mt_intervals",
- "mt_intervals_shift", "vcfanno_resources", "vcfanno_toml", "vep_cache_version", "vep_cache"]
+ mandatoryParams += ["genome", "mito_name", "vcfanno_resources", "vcfanno_toml", "vep_cache_version", "vep_cache"]
}
if (params.analysis_type.equals("wes")) {
@@ -190,6 +182,8 @@ workflow RAREDISEASE {
: Channel.value([])
ch_call_interval = params.call_interval ? Channel.fromPath(params.call_interval).collect()
: Channel.value([])
+ ch_genome_fai = params.fai ? Channel.fromPath(params.fai).map {it -> [[id:it[0].simpleName], it]}.collect()
+ : Channel.value([])
ch_gnomad_af_tab = params.gnomad_af ? Channel.fromPath(params.gnomad_af).map{ it -> [[id:it[0].simpleName], it] }.collect()
: Channel.value([[],[]])
ch_intervals_wgs = params.intervals_wgs ? Channel.fromPath(params.intervals_wgs).collect()
@@ -200,14 +194,8 @@ workflow RAREDISEASE {
: Channel.value([[],[]])
ch_ml_model = params.variant_caller.equals("sentieon") ? Channel.fromPath(params.ml_model).collect()
: Channel.value([])
- ch_mt_backchain_shift = params.mt_backchain_shift ? Channel.fromPath(params.mt_backchain_shift).map{ it -> [[id:it[0].simpleName], it] }.collect()
- : Channel.value([])
- ch_mt_fasta_shift = params.mt_fasta_shift ? Channel.fromPath(params.mt_fasta_shift).map { it -> [[id:it[0].simpleName], it] }.collect()
+ ch_mt_fasta = params.mt_fasta ? Channel.fromPath(params.mt_fasta).map { it -> [[id:it[0].simpleName], it] }.collect()
: Channel.empty()
- ch_mt_intervals = params.mt_intervals ? Channel.fromPath(params.mt_intervals).collect()
- : Channel.value([])
- ch_mt_intervals_shift = params.mt_intervals_shift ? Channel.fromPath(params.mt_intervals_shift).collect()
- : Channel.value([])
ch_reduced_penetrance = params.reduced_penetrance ? Channel.fromPath(params.reduced_penetrance).collect()
: Channel.value([])
ch_score_config_snv = params.score_config_snv ? Channel.fromPath(params.score_config_snv).collect()
@@ -239,7 +227,8 @@ workflow RAREDISEASE {
// Prepare references and indices.
PREPARE_REFERENCES (
ch_genome_fasta,
- ch_mt_fasta_shift,
+ ch_genome_fai,
+ ch_mt_fasta,
ch_gnomad_af_tab,
ch_known_dbsnp,
ch_target_bed_unprocessed,
@@ -258,10 +247,17 @@ workflow RAREDISEASE {
ch_bwamem2_index_mt_shift = params.mt_bwamem2_index_shift ? Channel.fromPath(params.mt_bwamem2_index_shift).map {it -> [[id:it[0].simpleName], it]}.collect()
: ch_references.bwamem2_index_mt_shift
ch_chrom_sizes = ch_references.chrom_sizes
- ch_genome_fai = params.fai ? Channel.fromPath(params.fai).map {it -> [[id:it[0].simpleName], it]}.collect()
- : ch_references.fai
+ ch_mt_backchain_shift = params.mt_backchain_shift ? Channel.fromPath(params.mt_backchain_shift).map{ it -> [[id:it[0].simpleName], it] }.collect()
+ : Channel.value([])
+ ch_mt_fasta_shift = params.mt_fasta_shift ? Channel.fromPath(params.mt_fasta_shift).map { it -> [[id:it[0].simpleName], it] }.collect()
+ : Channel.empty()
+ ch_mt_intervals = params.mt_intervals ? Channel.fromPath(params.mt_intervals).collect()
+ : Channel.value([])
+ ch_mt_intervals_shift = params.mt_intervals_shift ? Channel.fromPath(params.mt_intervals_shift).collect()
+ : Channel.value([])
ch_mt_shift_fai = params.mt_fai_shift ? Channel.fromPath(params.mt_fai_shift).map {it -> [[id:it[0].simpleName], it]}.collect()
: ch_references.fai_mt_shift
+ ch_genome_fai = ch_references.fai
ch_gnomad_af_idx = params.gnomad_af_idx ? Channel.fromPath(params.gnomad_af_idx).collect()
: ch_references.gnomad_af_idx
ch_gnomad_af = params.gnomad_af ? ch_gnomad_af_tab.join(ch_gnomad_af_idx).map {meta, tab, idx -> [tab,idx]}.collect()
From 49dd613f6c2861a839ab9f0c51ad9cd7a838bfce Mon Sep 17 00:00:00 2001
From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com>
Date: Fri, 26 May 2023 10:36:38 +0200
Subject: [PATCH 2/8] update configs
---
conf/modules/align_and_call_MT.config | 14 +-
conf/modules/analyse_MT.config | 2 +-
conf/modules/convert_mt_bam_to_fastq.config | 3 -
conf/modules/merge_annotate_MT.config | 3 -
conf/test.config | 4 -
conf/test_one_sample.config | 4 -
subworkflows/local/analyse_MT.nf | 62 ++++----
subworkflows/local/prepare_references.nf | 40 ++---
workflows/raredisease.nf | 157 ++++++++++----------
9 files changed, 131 insertions(+), 158 deletions(-)
diff --git a/conf/modules/align_and_call_MT.config b/conf/modules/align_and_call_MT.config
index c4915c11..32e9ed0c 100644
--- a/conf/modules/align_and_call_MT.config
+++ b/conf/modules/align_and_call_MT.config
@@ -16,18 +16,15 @@
//
process {
- withName: '.*ANALYSE_MT:ALIGN_AND_CALL_MT:.*' {
- ext.when = { params.mt_fasta_shift && params.mt_intervals && !(params.analysis_type == "wes") }
- }
withName: '.*ANALYSE_MT:ALIGN_AND_CALL_MT:BWAMEM2_MEM_MT' {
- ext.when = { params.mt_fasta_shift && params.mt_intervals && !(params.analysis_type == "wes") && params.aligner == "bwamem2" }
+ ext.when = { !params.skip_mt_analysis && !(params.analysis_type == "wes") && params.aligner == "bwamem2" }
ext.args = { "-M -K 100000000 -R ${meta.read_group}" }
}
withName: '.*ANALYSE_MT:ALIGN_AND_CALL_MT:SENTIEON_BWAMEM_MT' {
ext.args = { "-M -K 10000000 -R ${meta.read_group}" }
- ext.when = { params.mt_fasta_shift && params.mt_intervals && !(params.analysis_type == "wes") && params.aligner == "sentieon" }
+ ext.when = { !params.skip_mt_analysis && !(params.analysis_type == "wes") && params.aligner == "sentieon" }
ext.prefix = { "${meta.id}.sorted" }
}
@@ -69,18 +66,15 @@ process {
//
process {
- withName: '.*ANALYSE_MT:ALIGN_AND_CALL_MT_SHIFT:.*' {
- ext.when = { params.mt_fasta_shift && params.mt_intervals_shift && !(params.analysis_type == "wes") }
- }
withName: '.*ANALYSE_MT:ALIGN_AND_CALL_MT_SHIFT:BWAMEM2_MEM_MT' {
- ext.when = { params.mt_fasta_shift && params.mt_intervals_shift && !(params.analysis_type == "wes") && params.aligner == "bwamem2" }
+ ext.when = { !params.skip_mt_analysis && !(params.analysis_type == "wes") && params.aligner == "bwamem2" }
ext.args = { "-M -K 100000000 -R ${meta.read_group}" }
}
withName: '.*ANALYSE_MT:ALIGN_AND_CALL_MT_SHIFT:SENTIEON_BWAMEM_MT' {
ext.args = { "-M -K 10000000 -R ${meta.read_group}" }
- ext.when = { params.mt_fasta_shift && params.mt_intervals_shift && !(params.analysis_type == "wes") && params.aligner == "sentieon" }
+ ext.when = { !params.skip_mt_analysis && !(params.analysis_type == "wes") && params.aligner == "sentieon" }
ext.prefix = { "${meta.id}.sorted" }
}
diff --git a/conf/modules/analyse_MT.config b/conf/modules/analyse_MT.config
index 85568e3e..4ee1b693 100644
--- a/conf/modules/analyse_MT.config
+++ b/conf/modules/analyse_MT.config
@@ -17,6 +17,7 @@
process {
withName: '.*ANALYSE_MT:.*' {
+ ext.when = { !params.skip_mt_analysis && !(params.analysis_type == "wes") }
publishDir = [
enabled: false
]
@@ -25,7 +26,6 @@ process {
process {
withName: '.*ANALYSE_MT:PICARD_LIFTOVERVCF' {
- ext.when = { params.mt_fasta_shift && params.mt_intervals_shift && !(params.analysis_type == "wes") }
ext.prefix = { "${meta.id}_liftover" }
}
}
diff --git a/conf/modules/convert_mt_bam_to_fastq.config b/conf/modules/convert_mt_bam_to_fastq.config
index 5cba99b5..9a683b6e 100644
--- a/conf/modules/convert_mt_bam_to_fastq.config
+++ b/conf/modules/convert_mt_bam_to_fastq.config
@@ -16,9 +16,6 @@
//
process {
- withName: '.*ANALYSE_MT:CONVERT_MT_BAM_TO_FASTQ:.*' {
- ext.when = { params.mt_fasta_shift && !(params.analysis_type == "wes") }
- }
withName: '.*ANALYSE_MT:CONVERT_MT_BAM_TO_FASTQ:GATK4_PRINTREADS_MT' {
beforeScript = {"mkdir ./tmp"}
diff --git a/conf/modules/merge_annotate_MT.config b/conf/modules/merge_annotate_MT.config
index d62cf73c..a1b41a90 100644
--- a/conf/modules/merge_annotate_MT.config
+++ b/conf/modules/merge_annotate_MT.config
@@ -16,9 +16,6 @@
//
process {
- withName: '.*ANALYSE_MT:MERGE_ANNOTATE_MT:.*' {
- ext.when = { params.mt_fasta_shift && params.mt_intervals && !(params.analysis_type == "wes") }
- }
withName: '.*ANALYSE_MT:MERGE_ANNOTATE_MT:GATK4_MERGEVCFS_LIFT_UNLIFT_MT' {
ext.prefix = { "${meta.id}_merged" }
diff --git a/conf/test.config b/conf/test.config
index 805f84d6..c3ed9011 100644
--- a/conf/test.config
+++ b/conf/test.config
@@ -34,10 +34,6 @@ params {
intervals_y = "https://raw.githubusercontent.com/nf-core/test-datasets/raredisease/reference/targetY.interval_list"
known_dbsnp = "https://raw.githubusercontent.com/nf-core/test-datasets/raredisease/reference/dbsnp_-138-.vcf.gz"
ml_model = "https://s3.amazonaws.com/sentieon-release/other/SentieonDNAscopeModel1.0.model"
- mt_fasta_shift = "https://raw.githubusercontent.com/nf-core/test-datasets/raredisease/reference/mt_shift8000.fa"
- mt_intervals = "https://raw.githubusercontent.com/nf-core/test-datasets/raredisease/reference/mt.intervals"
- mt_intervals_shift = "https://raw.githubusercontent.com/nf-core/test-datasets/raredisease/reference/mt_shift8000.intervals"
- mt_backchain_shift = "https://raw.githubusercontent.com/nf-core/test-datasets/raredisease/reference/mt_shift8000.back_chain"
reduced_penetrance = "https://raw.githubusercontent.com/nf-core/test-datasets/raredisease/reference/reduced_penetrance.tsv"
score_config_snv = "https://raw.githubusercontent.com/nf-core/test-datasets/raredisease/reference/rank_model_snv.ini"
score_config_sv = "https://raw.githubusercontent.com/nf-core/test-datasets/raredisease/reference/rank_model_sv.ini"
diff --git a/conf/test_one_sample.config b/conf/test_one_sample.config
index e4f73a33..85efab52 100644
--- a/conf/test_one_sample.config
+++ b/conf/test_one_sample.config
@@ -34,10 +34,6 @@ params {
intervals_y = "https://raw.githubusercontent.com/nf-core/test-datasets/raredisease/reference/targetY.interval_list"
known_dbsnp = "https://raw.githubusercontent.com/nf-core/test-datasets/raredisease/reference/dbsnp_-138-.vcf.gz"
ml_model = "https://s3.amazonaws.com/sentieon-release/other/SentieonDNAscopeModel1.0.model"
- mt_fasta_shift = "https://raw.githubusercontent.com/nf-core/test-datasets/raredisease/reference/mt_shift8000.fa"
- mt_intervals = "https://raw.githubusercontent.com/nf-core/test-datasets/raredisease/reference/mt.intervals"
- mt_intervals_shift = "https://raw.githubusercontent.com/nf-core/test-datasets/raredisease/reference/mt_shift8000.intervals"
- mt_backchain_shift = "https://raw.githubusercontent.com/nf-core/test-datasets/raredisease/reference/mt_shift8000.back_chain"
reduced_penetrance = "https://raw.githubusercontent.com/nf-core/test-datasets/raredisease/reference/reduced_penetrance.tsv"
score_config_snv = "https://raw.githubusercontent.com/nf-core/test-datasets/raredisease/reference/rank_model_snv.ini"
score_config_sv = "https://raw.githubusercontent.com/nf-core/test-datasets/raredisease/reference/rank_model_sv.ini"
diff --git a/subworkflows/local/analyse_MT.nf b/subworkflows/local/analyse_MT.nf
index 60db58ec..1dbe2911 100644
--- a/subworkflows/local/analyse_MT.nf
+++ b/subworkflows/local/analyse_MT.nf
@@ -9,35 +9,35 @@ include { MERGE_ANNOTATE_MT } from './mitochondria/me
workflow ANALYSE_MT {
take:
- ch_bam // channel: [mandatory] [ val(meta), file(bam), file(bai) ]
- ch_cadd_header // channel: [mandatory] [ path(txt) ]
- ch_cadd_resources // channel: [mandatory] [ path(annotation) ]
- ch_genome_bwa_index // channel: [mandatory] [ path(index) ]
- ch_genome_bwamem2_index // channel: [mandatory] [ path(index) ]
- ch_genome_fasta // channel: [mandatory] [ val(meta), path(fasta) ]
- ch_genome_fai // channel: [mandatory] [ path(fai) ]
- ch_genome_dict // channel: [mandatory] [ val(meta), path(dict) ]
- ch_mt_intervals // channel: [mandatory] [ path(interval_list) ]
- ch_shift_mt_bwa_index // channel: [mandatory] [ path(index) ]
- ch_shift_mt_bwamem2_index // channel: [mandatory] [ path(index) ]
- ch_shift_mt_fasta // channel: [mandatory] [ path(fasta) ]
- ch_shift_mt_dict // channel: [mandatory] [ path(dict) ]
- ch_shift_mt_fai // channel: [mandatory] [ path(fai) ]
- ch_shift_mt_intervals // channel: [mandatory] [ path(interval_list) ]
- ch_shift_mt_backchain // channel: [mandatory] [ path(back_chain) ]
- ch_vcfanno_resources // channel: [mandatory] [ path(resources) ]
- ch_vcfanno_toml // channel: [mandatory] [ path(toml) ]
- val_vep_genome // string: [mandatory] GRCh37 or GRCh38
- val_vep_cache_version // string: [mandatory] 107
- ch_vep_cache // channel: [mandatory] [ path(cache) ]
- ch_case_info // channel: [mandatory] [ val(case_info) ]
+ ch_bam_bai // channel: [mandatory] [ val(meta), file(bam), file(bai) ]
+ ch_cadd_header // channel: [mandatory] [ path(txt) ]
+ ch_cadd_resources // channel: [mandatory] [ path(annotation) ]
+ ch_genome_bwa_index // channel: [mandatory] [ val(meta), path(index) ]
+ ch_genome_bwamem2_index // channel: [mandatory] [ val(meta), path(index) ]
+ ch_genome_fasta // channel: [mandatory] [ val(meta), path(fasta) ]
+ ch_genome_fai // channel: [mandatory] [ val(meta), path(fai) ]
+ ch_genome_dict // channel: [mandatory] [ val(meta), path(dict) ]
+ ch_mt_intervals // channel: [mandatory] [ path(interval_list) ]
+ ch_mtshift_bwaindex // channel: [mandatory] [ val(meta), path(index) ]
+ ch_mtshift_bwamem2index // channel: [mandatory] [ val(meta), path(index) ]
+ ch_mtshift_fasta // channel: [mandatory] [ val(meta), path(fasta) ]
+ ch_mtshift_dict // channel: [mandatory] [ val(meta), path(dict) ]
+ ch_mtshift_fai // channel: [mandatory] [ val(meta), path(fai) ]
+ ch_mtshift_intervals // channel: [mandatory] [ path(interval_list) ]
+ ch_mtshift_backchain // channel: [mandatory] [ val(meta), path(back_chain) ]
+ ch_vcfanno_resources // channel: [mandatory] [ path(resources) ]
+ ch_vcfanno_toml // channel: [mandatory] [ path(toml) ]
+ val_vep_genome // string: [mandatory] GRCh37 or GRCh38
+ val_vep_cache_version // string: [mandatory] 107
+ ch_vep_cache // channel: [mandatory] [ path(cache) ]
+ ch_case_info // channel: [mandatory] [ val(case_info) ]
main:
ch_versions = Channel.empty()
// PREPARING READS FOR MT ALIGNMENT
CONVERT_MT_BAM_TO_FASTQ (
- ch_bam,
+ ch_bam_bai,
ch_genome_fasta,
ch_genome_fai,
ch_genome_dict
@@ -58,20 +58,20 @@ workflow ANALYSE_MT {
ALIGN_AND_CALL_MT_SHIFT (
CONVERT_MT_BAM_TO_FASTQ.out.fastq,
CONVERT_MT_BAM_TO_FASTQ.out.bam,
- ch_shift_mt_bwa_index,
- ch_shift_mt_bwamem2_index,
- ch_shift_mt_fasta,
- ch_shift_mt_dict,
- ch_shift_mt_fai,
- ch_shift_mt_intervals
+ ch_mtshift_bwaindex,
+ ch_mtshift_bwamem2index,
+ ch_mtshift_fasta,
+ ch_mtshift_dict,
+ ch_mtshift_fai,
+ ch_mtshift_intervals
)
// LIFTOVER VCF FROM REFERENCE MT TO SHIFTED MT
PICARD_LIFTOVERVCF (
ALIGN_AND_CALL_MT_SHIFT.out.vcf,
ch_genome_dict,
- ch_shift_mt_backchain,
- ch_genome_fasta
+ ch_genome_fasta,
+ ch_mtshift_backchain,
)
// MT MERGE AND ANNOTATE VARIANTS
diff --git a/subworkflows/local/prepare_references.nf b/subworkflows/local/prepare_references.nf
index d59baba5..f9a8fb47 100644
--- a/subworkflows/local/prepare_references.nf
+++ b/subworkflows/local/prepare_references.nf
@@ -101,28 +101,28 @@ workflow PREPARE_REFERENCES {
ch_versions = ch_versions.mix(UNTAR_VEP_CACHE.out.versions)
emit:
- genome_bwa_index = Channel.empty().mix(ch_bwa, ch_sentieonbwa).collect() // channel: [ val(meta), path(index) ]
- genome_bwamem2_index = BWAMEM2_INDEX_GENOME.out.index.collect() // channel: [ val(meta), path(index) ]
- genome_chrom_sizes = GET_CHROM_SIZES.out.sizes.collect() // channel: [ path(sizes) ]
- genome_fai = ch_fai // channel: [ val(meta), path(fai) ]
- genome_dict = GATK_SD.out.dict.collect() // channel: [ path(dict) ]
+ genome_bwa_index = Channel.empty().mix(ch_bwa, ch_sentieonbwa).collect() // channel: [ val(meta), path(index) ]
+ genome_bwamem2_index = BWAMEM2_INDEX_GENOME.out.index.collect() // channel: [ val(meta), path(index) ]
+ genome_chrom_sizes = GET_CHROM_SIZES.out.sizes.collect() // channel: [ path(sizes) ]
+ genome_fai = ch_fai // channel: [ val(meta), path(fai) ]
+ genome_dict = GATK_SD.out.dict.collect() // channel: [ path(dict) ]
- mt_intervals = GATK_SHIFTFASTA.out.intervals.collect()
- mtshift_intervals = GATK_SHIFTFASTA.out.shift_intervals.collect()
- mtshift_backchain = GATK_SHIFTFASTA.out.shift_back_chain.collect()
- mtshift_fai = GATK_SHIFTFASTA.out.shift_fai.collect() // channel: [ val(meta), path(fai) ]
- mtshift_fasta = GATK_SHIFTFASTA.out.shift_fa.collect() // channel: [ val(meta), path(fai) ]
- mtshift_dict = GATK_SHIFTFASTA.out.dict.collect() // channel: [ path(dict) ]
- mtshift_bwa_index = SENTIEON_BWAINDEX_MT_SHIFT.out.index.collect() // channel: [ val(meta), path(index) ]
- mtshift_bwamem2_index = BWAMEM2_INDEX_MT_SHIFT.out.index.collect() // channel: [ val(meta), path(index) ]
+ mt_intervals = GATK_SHIFTFASTA.out.intervals.map{ meta, it -> it}.collect() // channel: [ path(intervals) ]
+ mtshift_intervals = GATK_SHIFTFASTA.out.shift_intervals.map{ meta, it -> it}.collect() // channel: [ path(intervals) ]
+ mtshift_backchain = GATK_SHIFTFASTA.out.shift_back_chain.collect()
+ mtshift_fai = GATK_SHIFTFASTA.out.shift_fai.collect() // channel: [ val(meta), path(fai) ]
+ mtshift_fasta = GATK_SHIFTFASTA.out.shift_fa.collect() // channel: [ val(meta), path(fai) ]
+ mtshift_dict = GATK_SHIFTFASTA.out.dict.collect() // channel: [ path(dict) ]
+ mtshift_bwa_index = SENTIEON_BWAINDEX_MT_SHIFT.out.index.collect() // channel: [ val(meta), path(index) ]
+ mtshift_bwamem2_index = BWAMEM2_INDEX_MT_SHIFT.out.index.collect() // channel: [ val(meta), path(index) ]
- gnomad_af_idx = TABIX_GNOMAD_AF.out.tbi.collect() // channel: [ val(meta), path(fasta) ]
- known_dbsnp_tbi = TABIX_DBSNP.out.tbi.collect() // channel: [ val(meta), path(fasta) ]
- target_bed = Channel.empty().mix(ch_tbi, ch_bgzip_tbi).collect() // channel: [ val(meta), path(bed), path(tbi) ]
- bait_intervals = CAT_CAT_BAIT.out.file_out.map{ meta, inter -> inter}.collect() // channel: [ path(intervals) ]
- target_intervals = GATK_BILT.out.interval_list.map{ meta, inter -> inter}.collect() // channel: [ path(interval_list) ]
- vep_resources = UNTAR_VEP_CACHE.out.untar.map{meta, files -> [files]}.collect() // channel: [ path(cache) ]
- versions = ch_versions // channel: [ path(versions.yml) ]
+ gnomad_af_idx = TABIX_GNOMAD_AF.out.tbi.collect() // channel: [ val(meta), path(fasta) ]
+ known_dbsnp_tbi = TABIX_DBSNP.out.tbi.collect() // channel: [ val(meta), path(fasta) ]
+ target_bed = Channel.empty().mix(ch_tbi, ch_bgzip_tbi).collect() // channel: [ val(meta), path(bed), path(tbi) ]
+ bait_intervals = CAT_CAT_BAIT.out.file_out.map{ meta, inter -> inter}.collect() // channel: [ path(intervals) ]
+ target_intervals = GATK_BILT.out.interval_list.map{ meta, inter -> inter}.collect() // channel: [ path(interval_list) ]
+ vep_resources = UNTAR_VEP_CACHE.out.untar.map{meta, files -> [files]}.collect() // channel: [ path(cache) ]
+ versions = ch_versions // channel: [ path(versions.yml) ]
}
diff --git a/workflows/raredisease.nf b/workflows/raredisease.nf
index 45e8491b..1e1b70e5 100644
--- a/workflows/raredisease.nf
+++ b/workflows/raredisease.nf
@@ -173,49 +173,92 @@ workflow RAREDISEASE {
CHECK_INPUT (ch_input)
ch_versions = ch_versions.mix(CHECK_INPUT.out.versions)
- // Initialize all file channels including unprocessed vcf, bed and tab files
- ch_cadd_header = Channel.fromPath("$projectDir/assets/cadd_to_vcf_header_-1.0-.txt", checkIfExists: true).collect()
+ // Initialize file channels for PREPARE_REFERENCES subworkflow
ch_genome_fasta = Channel.fromPath(params.fasta).map { it -> [[id:it[0].simpleName], it] }.collect()
- ch_variant_consequences = Channel.fromPath("$projectDir/assets/variant_consequences_v1.txt", checkIfExists: true).collect()
+ ch_genome_fai = params.fai ? Channel.fromPath(params.fai).map {it -> [[id:it[0].simpleName], it]}.collect()
+ : Channel.empty()
+ ch_gnomad_af_tab = params.gnomad_af ? Channel.fromPath(params.gnomad_af).map{ it -> [[id:it[0].simpleName], it] }.collect()
+ : Channel.value([[],[]])
+ ch_dbsnp = params.known_dbsnp ? Channel.fromPath(params.known_dbsnp).map{ it -> [[id:it[0].simpleName], it] }.collect()
+ : Channel.value([[],[]])
+ ch_mt_fasta = params.mt_fasta ? Channel.fromPath(params.mt_fasta).map { it -> [[id:it[0].simpleName], it] }.collect()
+ : Channel.empty()
+ ch_target_bed_unprocessed = params.target_bed ? Channel.fromPath(params.target_bed).map{ it -> [[id:it[0].simpleName], it] }.collect()
+ : Channel.value([[],[]])
+ ch_vep_cache_unprocessed = params.vep_cache ? Channel.fromPath(params.vep_cache).map { it -> [[id:'vep_cache'], it] }.collect()
+ : Channel.value([[],[]])
+
+ // Prepare references and indices.
+ PREPARE_REFERENCES (
+ ch_genome_fasta,
+ ch_genome_fai,
+ ch_mt_fasta,
+ ch_gnomad_af_tab,
+ ch_dbsnp,
+ ch_target_bed_unprocessed,
+ ch_vep_cache_unprocessed
+ )
+ .set { ch_references }
+
+ // Gather built indices or get them from the params
+ ch_bait_intervals = ch_references.bait_intervals
+ ch_cadd_header = Channel.fromPath("$projectDir/assets/cadd_to_vcf_header_-1.0-.txt", checkIfExists: true).collect()
ch_cadd_resources = params.cadd_resources ? Channel.fromPath(params.cadd_resources).collect()
: Channel.value([])
ch_call_interval = params.call_interval ? Channel.fromPath(params.call_interval).collect()
: Channel.value([])
- ch_genome_fai = params.fai ? Channel.fromPath(params.fai).map {it -> [[id:it[0].simpleName], it]}.collect()
- : Channel.value([])
- ch_gnomad_af_tab = params.gnomad_af ? Channel.fromPath(params.gnomad_af).map{ it -> [[id:it[0].simpleName], it] }.collect()
- : Channel.value([[],[]])
+ ch_dbsnp_tbi = params.known_dbsnp_tbi ? Channel.fromPath(params.known_dbsnp_tbi).map {it -> [[id:it[0].simpleName], it]}.collect()
+ : ch_references.known_dbsnp_tbi.ifEmpty([[],[]])
+ ch_genome_bwaindex = params.bwa ? Channel.fromPath(params.bwa).map {it -> [[id:it[0].simpleName], it]}.collect()
+ : ch_references.genome_bwa_index
+ ch_genome_bwamem2index = params.bwamem2 ? Channel.fromPath(params.bwamem2).map {it -> [[id:it[0].simpleName], it]}.collect()
+ : ch_references.genome_bwamem2_index
+ ch_genome_chrsizes = ch_references.genome_chrom_sizes
+ ch_genome_fai = ch_references.genome_fai
+ ch_genome_dictionary = params.sequence_dictionary ? Channel.fromPath(params.sequence_dictionary).map {it -> [[id:it[0].simpleName], it]}.collect()
+ : ch_references.genome_dict
+ ch_gnomad_afidx = params.gnomad_af_idx ? Channel.fromPath(params.gnomad_af_idx).collect()
+ : ch_references.gnomad_af_idx
+ ch_gnomad_af = params.gnomad_af ? ch_gnomad_af_tab.join(ch_gnomad_afidx).map {meta, tab, idx -> [tab,idx]}.collect()
+ : Channel.empty()
ch_intervals_wgs = params.intervals_wgs ? Channel.fromPath(params.intervals_wgs).collect()
: Channel.empty()
ch_intervals_y = params.intervals_y ? Channel.fromPath(params.intervals_y).collect()
: Channel.empty()
- ch_known_dbsnp = params.known_dbsnp ? Channel.fromPath(params.known_dbsnp).map{ it -> [[id:it[0].simpleName], it] }.collect()
- : Channel.value([[],[]])
ch_ml_model = params.variant_caller.equals("sentieon") ? Channel.fromPath(params.ml_model).collect()
: Channel.value([])
- ch_mt_fasta = params.mt_fasta ? Channel.fromPath(params.mt_fasta).map { it -> [[id:it[0].simpleName], it] }.collect()
- : Channel.empty()
+ ch_mt_intervals = ch_references.mt_intervals
+ ch_mtshift_backchain = ch_references.mtshift_backchain
+ ch_mtshift_bwaindex = ch_references.mtshift_bwa_index
+ ch_mtshift_bwamem2index = ch_references.mtshift_bwamem2_index
+ ch_mtshift_dictionary = ch_references.mtshift_dict
+ ch_mtshift_fai = ch_references.mtshift_fai
+ ch_mtshift_fasta = ch_references.mtshift_fasta
+ ch_mtshift_intervals = ch_references.mtshift_intervals
ch_reduced_penetrance = params.reduced_penetrance ? Channel.fromPath(params.reduced_penetrance).collect()
: Channel.value([])
ch_score_config_snv = params.score_config_snv ? Channel.fromPath(params.score_config_snv).collect()
: Channel.value([])
ch_score_config_sv = params.score_config_sv ? Channel.fromPath(params.score_config_sv).collect()
: Channel.value([])
- ch_target_bed_unprocessed = params.target_bed ? Channel.fromPath(params.target_bed).map{ it -> [[id:it[0].simpleName], it] }.collect()
- : Channel.value([[],[]])
+ ch_target_bed = ch_references.target_bed
+ ch_target_intervals = ch_references.target_intervals
ch_variant_catalog = params.variant_catalog ? Channel.fromPath(params.variant_catalog).map { it -> [[id:it[0].simpleName],it]}.collect()
: Channel.value([[],[]])
+ ch_variant_consequences = Channel.fromPath("$projectDir/assets/variant_consequences_v1.txt", checkIfExists: true).collect()
ch_vcfanno_resources = params.vcfanno_resources ? Channel.fromPath(params.vcfanno_resources).splitText().map{it -> it.trim()}.collect()
: Channel.value([])
ch_vcfanno_lua = params.vcfanno_lua ? Channel.fromPath(params.vcfanno_lua).collect()
: Channel.value([])
ch_vcfanno_toml = params.vcfanno_toml ? Channel.fromPath(params.vcfanno_toml).collect()
: Channel.value([])
- ch_vep_cache_unprocessed = params.vep_cache ? Channel.fromPath(params.vep_cache).map { it -> [[id:'vep_cache'], it] }.collect()
- : Channel.value([[],[]])
+ ch_vep_cache = ( params.vep_cache && params.vep_cache.endsWith("tar.gz") ) ? ch_references.vep_resources
+ : ( params.vep_cache ? Channel.fromPath(params.vep_cache).collect() : Channel.value([]) )
ch_vep_filters = params.vep_filters ? Channel.fromPath(params.vep_filters).collect()
: Channel.value([])
+ ch_versions = ch_versions.mix(ch_references.versions)
+
// Generate pedigree file
ch_pedfile = CHECK_INPUT.out.samples.toList().map { makePed(it) }
@@ -224,56 +267,6 @@ workflow RAREDISEASE {
FASTQC (CHECK_INPUT.out.reads)
ch_versions = ch_versions.mix(FASTQC.out.versions.first())
- // Prepare references and indices.
- PREPARE_REFERENCES (
- ch_genome_fasta,
- ch_genome_fai,
- ch_mt_fasta,
- ch_gnomad_af_tab,
- ch_known_dbsnp,
- ch_target_bed_unprocessed,
- ch_vep_cache_unprocessed
- )
- .set { ch_references }
-
- // Gather built indices or get them from the params
- ch_bait_intervals = ch_references.bait_intervals
- ch_bwa_index = params.bwa ? Channel.fromPath(params.bwa).map {it -> [[id:it[0].simpleName], it]}.collect()
- : ch_references.bwa_index
- ch_bwa_index_mt_shift = params.mt_bwa_index_shift ? Channel.fromPath(params.mt_bwa_index_shift).map {it -> [[id:it[0].simpleName], it]}.collect()
- : ch_references.bwa_index_mt_shift
- ch_bwamem2_index = params.bwamem2 ? Channel.fromPath(params.bwamem2).map {it -> [[id:it[0].simpleName], it]}.collect()
- : ch_references.bwamem2_index
- ch_bwamem2_index_mt_shift = params.mt_bwamem2_index_shift ? Channel.fromPath(params.mt_bwamem2_index_shift).map {it -> [[id:it[0].simpleName], it]}.collect()
- : ch_references.bwamem2_index_mt_shift
- ch_chrom_sizes = ch_references.chrom_sizes
- ch_mt_backchain_shift = params.mt_backchain_shift ? Channel.fromPath(params.mt_backchain_shift).map{ it -> [[id:it[0].simpleName], it] }.collect()
- : Channel.value([])
- ch_mt_fasta_shift = params.mt_fasta_shift ? Channel.fromPath(params.mt_fasta_shift).map { it -> [[id:it[0].simpleName], it] }.collect()
- : Channel.empty()
- ch_mt_intervals = params.mt_intervals ? Channel.fromPath(params.mt_intervals).collect()
- : Channel.value([])
- ch_mt_intervals_shift = params.mt_intervals_shift ? Channel.fromPath(params.mt_intervals_shift).collect()
- : Channel.value([])
- ch_mt_shift_fai = params.mt_fai_shift ? Channel.fromPath(params.mt_fai_shift).map {it -> [[id:it[0].simpleName], it]}.collect()
- : ch_references.fai_mt_shift
- ch_genome_fai = ch_references.fai
- ch_gnomad_af_idx = params.gnomad_af_idx ? Channel.fromPath(params.gnomad_af_idx).collect()
- : ch_references.gnomad_af_idx
- ch_gnomad_af = params.gnomad_af ? ch_gnomad_af_tab.join(ch_gnomad_af_idx).map {meta, tab, idx -> [tab,idx]}.collect()
- : Channel.empty()
- ch_known_dbsnp_tbi = params.known_dbsnp_tbi ? Channel.fromPath(params.known_dbsnp_tbi).map {it -> [[id:it[0].simpleName], it]}.collect()
- : ch_references.known_dbsnp_tbi.ifEmpty([[],[]])
- ch_genome_dictionary = params.sequence_dictionary ? Channel.fromPath(params.sequence_dictionary).map {it -> [[id:it[0].simpleName], it]}.collect()
- : ch_references.sequence_dict
- ch_sequence_dictionary_mt_shift = params.mt_sequence_dictionary_shift ? Channel.fromPath(params.mt_sequence_dictionary_shift).map {it -> [[id:it[0].simpleName], it]}.collect()
- : ch_references.sequence_dict_mt_shift
- ch_target_bed = ch_references.target_bed
- ch_target_intervals = ch_references.target_intervals
- ch_vep_cache = ( params.vep_cache && params.vep_cache.endsWith("tar.gz") ) ? ch_references.vep_resources
- : ( params.vep_cache ? Channel.fromPath(params.vep_cache).collect() : Channel.value([]) )
- ch_versions = ch_versions.mix(ch_references.versions)
-
// CREATE CHROMOSOME BED AND INTERVALS
SCATTER_GENOME (
ch_genome_dictionary,
@@ -289,10 +282,10 @@ workflow RAREDISEASE {
CHECK_INPUT.out.reads,
ch_genome_fasta,
ch_genome_fai,
- ch_bwa_index,
- ch_bwamem2_index,
- ch_known_dbsnp,
- ch_known_dbsnp_tbi,
+ ch_genome_bwaindex,
+ ch_genome_bwamem2index,
+ ch_dbsnp,
+ ch_dbsnp_tbi,
params.platform
)
.set { ch_mapped }
@@ -307,7 +300,7 @@ workflow RAREDISEASE {
ch_genome_fai,
ch_bait_intervals,
ch_target_intervals,
- ch_chrom_sizes,
+ ch_genome_chrsizes,
ch_intervals_wgs,
ch_intervals_y
)
@@ -349,8 +342,8 @@ workflow RAREDISEASE {
ch_mapped.bam_bai,
ch_genome_fasta,
ch_genome_fai,
- ch_known_dbsnp,
- ch_known_dbsnp_tbi,
+ ch_dbsnp,
+ ch_dbsnp_tbi,
ch_call_interval,
ch_ml_model,
CHECK_INPUT.out.case_info
@@ -361,7 +354,7 @@ workflow RAREDISEASE {
ch_mapped.marked_bam,
ch_mapped.marked_bai,
ch_mapped.bam_bai,
- ch_bwa_index,
+ ch_genome_bwaindex,
ch_genome_fasta,
ch_genome_fai,
CHECK_INPUT.out.case_info,
@@ -431,19 +424,19 @@ workflow RAREDISEASE {
ch_mapped.bam_bai,
ch_cadd_header,
ch_cadd_resources,
- ch_bwa_index,
- ch_bwamem2_index,
+ ch_genome_bwaindex,
+ ch_genome_bwamem2index,
ch_genome_fasta,
ch_genome_fai,
ch_genome_dictionary,
ch_mt_intervals,
- ch_bwa_index_mt_shift,
- ch_bwamem2_index_mt_shift,
- ch_mt_fasta_shift,
- ch_sequence_dictionary_mt_shift,
- ch_mt_shift_fai,
- ch_mt_intervals_shift,
- ch_mt_backchain_shift,
+ ch_mtshift_bwaindex,
+ ch_mtshift_bwamem2index,
+ ch_mtshift_fasta,
+ ch_mtshift_dictionary,
+ ch_mtshift_fai,
+ ch_mtshift_intervals,
+ ch_mtshift_backchain,
ch_vcfanno_resources,
ch_vcfanno_toml,
params.genome,
From fd79ab76593604549ba4cf7e754d079b7dbbfdf7 Mon Sep 17 00:00:00 2001
From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com>
Date: Fri, 26 May 2023 10:49:42 +0200
Subject: [PATCH 3/8] update input channel definitions
---
.../local/mitochondria/align_and_call_MT.nf | 22 +++++++++----------
.../mitochondria/convert_mt_bam_to_fastq.nf | 10 ++++-----
.../local/mitochondria/merge_annotate_MT.nf | 4 ++--
3 files changed, 18 insertions(+), 18 deletions(-)
diff --git a/subworkflows/local/mitochondria/align_and_call_MT.nf b/subworkflows/local/mitochondria/align_and_call_MT.nf
index 3d992322..7121168c 100644
--- a/subworkflows/local/mitochondria/align_and_call_MT.nf
+++ b/subworkflows/local/mitochondria/align_and_call_MT.nf
@@ -16,21 +16,21 @@ include { TABIX_TABIX as TABIX_TABIX_MT } fr
workflow ALIGN_AND_CALL_MT {
take:
- ch_fastq // channel: [mandatory] [ val(meta), [ path(reads) ] ]
- ch_ubam // channel: [mandatory] [ val(meta), path(bam) ]
- ch_index_bwa // channel: [mandatory for sentieon] [ val(meta), path(index) ]
- ch_index_bwamem2 // channel: [mandatory for bwamem2] [ val(meta), path(index) ]
- ch_fasta // channel: [mandatory] [ val(meta), path(fasta) ]
- ch_dict // channel: [mandatory] [ val(meta), path(dict) ]
- ch_fai // channel: [mandatory] [ val(meta), path(fai) ]
- ch_intervals_mt // channel: [mandatory] [ path(interval_list) ]
+ ch_fastq // channel: [mandatory] [ val(meta), [ path(reads) ] ]
+ ch_ubam // channel: [mandatory] [ val(meta), path(bam) ]
+ ch_bwaindex // channel: [mandatory for sentieon] [ val(meta), path(index) ]
+ ch_bwamem2index // channel: [mandatory for bwamem2] [ val(meta), path(index) ]
+ ch_fasta // channel: [mandatory] [ val(meta), path(fasta) ]
+ ch_dict // channel: [mandatory] [ val(meta), path(dict) ]
+ ch_fai // channel: [mandatory] [ val(meta), path(fai) ]
+ ch_intervals // channel: [mandatory] [ path(interval_list) ]
main:
ch_versions = Channel.empty()
- BWAMEM2_MEM_MT (ch_fastq , ch_index_bwamem2, true)
+ BWAMEM2_MEM_MT (ch_fastq , ch_bwamem2index, true)
- SENTIEON_BWAMEM_MT ( ch_fastq, ch_fasta, ch_fai, ch_index_bwa )
+ SENTIEON_BWAMEM_MT ( ch_fastq, ch_fasta, ch_fai, ch_bwaindex )
ch_mt_bam = Channel.empty().mix(BWAMEM2_MEM_MT.out.bam, SENTIEON_BWAMEM_MT.out.bam)
ch_fastq_ubam = ch_mt_bam.join(ch_ubam, failOnMismatch:true, failOnDuplicate:true)
@@ -45,7 +45,7 @@ workflow ALIGN_AND_CALL_MT {
SAMTOOLS_INDEX_MT(SAMTOOLS_SORT_MT.out.bam)
ch_sort_index_bam = SAMTOOLS_SORT_MT.out.bam.join(SAMTOOLS_INDEX_MT.out.bai, failOnMismatch:true, failOnDuplicate:true)
- ch_sort_index_bam_int_mt = ch_sort_index_bam.combine(ch_intervals_mt)
+ ch_sort_index_bam_int_mt = ch_sort_index_bam.combine(ch_intervals)
GATK4_MUTECT2_MT (ch_sort_index_bam_int_mt, ch_fasta, ch_fai, ch_dict, [], [], [],[])
diff --git a/subworkflows/local/mitochondria/convert_mt_bam_to_fastq.nf b/subworkflows/local/mitochondria/convert_mt_bam_to_fastq.nf
index 2df4406f..156f71bb 100644
--- a/subworkflows/local/mitochondria/convert_mt_bam_to_fastq.nf
+++ b/subworkflows/local/mitochondria/convert_mt_bam_to_fastq.nf
@@ -8,16 +8,16 @@ include { GATK4_SAMTOFASTQ as GATK4_SAMTOFASTQ_MT } from '../../../modules/nf-co
workflow CONVERT_MT_BAM_TO_FASTQ {
take:
- ch_bam // channel: [mandatory] [ val(meta), path(bam), path(bai) ]
- ch_genome_fasta_meta // channel: [mandatory] [ val(meta), path(fasta) ]
- ch_genome_fai // channel: [mandatory] [ val(meta), path(fai) ]
- ch_genome_dict // channel: [mandatory] [ val(meta), path(dict) ]
+ ch_bam_bai // channel: [mandatory] [ val(meta), path(bam), path(bai) ]
+ ch_genome_fasta // channel: [mandatory] [ val(meta), path(fasta) ]
+ ch_genome_fai // channel: [mandatory] [ val(meta), path(fai) ]
+ ch_genome_dict // channel: [mandatory] [ val(meta), path(dict) ]
main:
ch_versions = Channel.empty()
// Outputs bam containing only MT
- GATK4_PRINTREADS_MT ( ch_bam, ch_genome_fasta_meta, ch_genome_fai, ch_genome_dict )
+ GATK4_PRINTREADS_MT ( ch_bam_bai, ch_genome_fasta, ch_genome_fai, ch_genome_dict )
// Removes alignment information
GATK4_REVERTSAM_MT ( GATK4_PRINTREADS_MT.out.bam )
diff --git a/subworkflows/local/mitochondria/merge_annotate_MT.nf b/subworkflows/local/mitochondria/merge_annotate_MT.nf
index 8d4f3da0..46e334f0 100644
--- a/subworkflows/local/mitochondria/merge_annotate_MT.nf
+++ b/subworkflows/local/mitochondria/merge_annotate_MT.nf
@@ -23,9 +23,9 @@ workflow MERGE_ANNOTATE_MT {
ch_vcf2 // channel: [mandatory] [ val(meta), path(vcf) ]
ch_cadd_header // channel: [mandatory] [ path(txt) ]
ch_cadd_resources // channel: [mandatory] [ path(annotation) ]
- ch_genome_fasta // channel: [mandatory] [ path(fasta) ]
+ ch_genome_fasta // channel: [mandatory] [ val(meta), path(fasta) ]
ch_genome_dict // channel: [mandatory] [ val(meta), path(dict) ]
- ch_genome_fai // channel: [mandatory] [ path(fai) ]
+ ch_genome_fai // channel: [mandatory] [ val(meta), path(fai) ]
ch_vcfanno_resources // channel: [mandatory] [ path(resources) ]
ch_vcfanno_toml // channel: [mandatory] [ path(toml) ]
val_vep_genome // string: [mandatory] GRCh37 or GRCh38
From 743c35bb24f801cd2573235e1784401403bf40f4 Mon Sep 17 00:00:00 2001
From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com>
Date: Fri, 26 May 2023 10:56:08 +0200
Subject: [PATCH 4/8] update docs
---
CHANGELOG.md | 7 +++++++
docs/usage.md | 22 ++++++++--------------
2 files changed, 15 insertions(+), 14 deletions(-)
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 23a719c8..cf89d03d 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -3,6 +3,13 @@
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/)
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
+## v2.0.0 - []
+
+### `Added`
+
+- GATK's ShiftFasta to generate all the files required for mitochondrial variant calling
+- Feature to calculate CADD scores for indels
+
## v1.0.0 - [2023-03-31]
Initial release of nf-core/raredisease, created with the [nf-core](https://nf-co.re/) template.
diff --git a/docs/usage.md b/docs/usage.md
index 482ed179..5dd5761a 100644
--- a/docs/usage.md
+++ b/docs/usage.md
@@ -219,20 +219,14 @@ The mandatory and optional parameters for each category are tabulated below.
##### 8. Mitochondrial analysis
-| Mandatory | Optional |
-| ------------------------------ | -------- |
-| genome | |
-| mt_backchain_shift1 | |
-| mito_name | |
-| mt_fasta_shift | |
-| mt_intervals | |
-| mt_intervals_shift | |
-| vcfanno_resources | |
-| vcfanno_toml | |
-| vep_cache_version | |
-| vep_cache | |
-
-1Can be generated by GATK's [ShiftFasta](https://gatk.broadinstitute.org/hc/en-us/articles/9570501436827-ShiftFasta-BETA-). Sample file [here](https://github.com/nf-core/test-datasets/blob/raredisease/reference/mt_shift8000.back_chain).
+| Mandatory | Optional |
+| ----------------- | -------- |
+| genome | |
+| mito_name | |
+| vcfanno_resources | |
+| vcfanno_toml | |
+| vep_cache_version | |
+| vep_cache | |
#### Run the pipeline
From f0f866d0d8d10f29e92b947fb85996cd55520dc3 Mon Sep 17 00:00:00 2001
From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com>
Date: Fri, 26 May 2023 10:58:53 +0200
Subject: [PATCH 5/8] update changelog
---
CHANGELOG.md | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/CHANGELOG.md b/CHANGELOG.md
index cf89d03d..55291bfa 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -7,7 +7,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
### `Added`
-- GATK's ShiftFasta to generate all the files required for mitochondrial variant calling
+- GATK's ShiftFasta to generate all the files required for mitochondrial analysis
- Feature to calculate CADD scores for indels
## v1.0.0 - [2023-03-31]
From c0a869e9b629935d9cc492864053ac0f210301ab Mon Sep 17 00:00:00 2001
From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com>
Date: Fri, 26 May 2023 11:27:13 +0200
Subject: [PATCH 6/8] fix lint
---
modules.json | 278 +++++++++++++--------------------------------------
1 file changed, 70 insertions(+), 208 deletions(-)
diff --git a/modules.json b/modules.json
index 0622ed2e..5b778895 100644
--- a/modules.json
+++ b/modules.json
@@ -8,488 +8,350 @@
"bcftools/annotate": {
"branch": "master",
"git_sha": "911696ea0b62df80e900ef244d7867d177971f73",
- "installed_by": [
- "modules"
- ]
+ "installed_by": ["modules"]
},
"bcftools/concat": {
"branch": "master",
"git_sha": "911696ea0b62df80e900ef244d7867d177971f73",
- "installed_by": [
- "modules"
- ]
+ "installed_by": ["modules"]
},
"bcftools/filter": {
"branch": "master",
"git_sha": "911696ea0b62df80e900ef244d7867d177971f73",
- "installed_by": [
- "modules"
- ]
+ "installed_by": ["modules"]
},
"bcftools/merge": {
"branch": "master",
"git_sha": "0435e4eebc94e53721c194b2d5d06f455a79e407",
- "installed_by": [
- "modules"
- ]
+ "installed_by": ["modules"]
},
"bcftools/norm": {
"branch": "master",
"git_sha": "0435e4eebc94e53721c194b2d5d06f455a79e407",
- "installed_by": [
- "modules"
- ]
+ "installed_by": ["modules"]
},
"bcftools/reheader": {
"branch": "master",
"git_sha": "0435e4eebc94e53721c194b2d5d06f455a79e407",
- "installed_by": [
- "modules"
- ]
+ "installed_by": ["modules"]
},
"bcftools/roh": {
"branch": "master",
"git_sha": "911696ea0b62df80e900ef244d7867d177971f73",
- "installed_by": [
- "modules"
- ]
+ "installed_by": ["modules"]
},
"bcftools/view": {
"branch": "master",
"git_sha": "911696ea0b62df80e900ef244d7867d177971f73",
- "installed_by": [
- "modules"
- ]
+ "installed_by": ["modules"]
},
"bwa/index": {
"branch": "master",
"git_sha": "911696ea0b62df80e900ef244d7867d177971f73",
- "installed_by": [
- "modules"
- ]
+ "installed_by": ["modules"]
},
"bwamem2/index": {
"branch": "master",
"git_sha": "911696ea0b62df80e900ef244d7867d177971f73",
- "installed_by": [
- "modules"
- ]
+ "installed_by": ["modules"]
},
"bwamem2/mem": {
"branch": "master",
"git_sha": "0460d316170f75f323111b4a2c0a2989f0c32013",
- "installed_by": [
- "modules"
- ]
+ "installed_by": ["modules"]
},
"cadd": {
"branch": "master",
"git_sha": "603ecbd9f45300c9788f197d2a15a005685b4220",
- "installed_by": [
- "modules"
- ]
+ "installed_by": ["modules"]
},
"cat/cat": {
"branch": "master",
"git_sha": "911696ea0b62df80e900ef244d7867d177971f73",
- "installed_by": [
- "modules"
- ]
+ "installed_by": ["modules"]
},
"custom/dumpsoftwareversions": {
"branch": "master",
"git_sha": "911696ea0b62df80e900ef244d7867d177971f73",
- "installed_by": [
- "modules"
- ]
+ "installed_by": ["modules"]
},
"deepvariant": {
"branch": "master",
"git_sha": "0460d316170f75f323111b4a2c0a2989f0c32013",
- "installed_by": [
- "modules"
- ]
+ "installed_by": ["modules"]
},
"expansionhunter": {
"branch": "master",
"git_sha": "0260e5d22372eae434816d6970dedf3f5adc0053",
- "installed_by": [
- "modules"
- ]
+ "installed_by": ["modules"]
},
"fastqc": {
"branch": "master",
"git_sha": "911696ea0b62df80e900ef244d7867d177971f73",
- "installed_by": [
- "modules"
- ]
+ "installed_by": ["modules"]
},
"gatk4/bedtointervallist": {
"branch": "master",
"git_sha": "2df2a11d5b12f2a73bca74f103691bc35d83c5fd",
- "installed_by": [
- "modules"
- ]
+ "installed_by": ["modules"]
},
"gatk4/createsequencedictionary": {
"branch": "master",
"git_sha": "2df2a11d5b12f2a73bca74f103691bc35d83c5fd",
- "installed_by": [
- "modules"
- ]
+ "installed_by": ["modules"]
},
"gatk4/filtermutectcalls": {
"branch": "master",
"git_sha": "2df2a11d5b12f2a73bca74f103691bc35d83c5fd",
- "installed_by": [
- "modules"
- ]
+ "installed_by": ["modules"]
},
"gatk4/intervallisttools": {
"branch": "master",
"git_sha": "911696ea0b62df80e900ef244d7867d177971f73",
- "installed_by": [
- "modules"
- ]
+ "installed_by": ["modules"]
},
"gatk4/mergebamalignment": {
"branch": "master",
"git_sha": "2df2a11d5b12f2a73bca74f103691bc35d83c5fd",
- "installed_by": [
- "modules"
- ]
+ "installed_by": ["modules"]
},
"gatk4/mergevcfs": {
"branch": "master",
"git_sha": "911696ea0b62df80e900ef244d7867d177971f73",
- "installed_by": [
- "modules"
- ]
+ "installed_by": ["modules"]
},
"gatk4/mutect2": {
"branch": "master",
"git_sha": "2df2a11d5b12f2a73bca74f103691bc35d83c5fd",
- "installed_by": [
- "modules"
- ]
+ "installed_by": ["modules"]
},
"gatk4/printreads": {
"branch": "master",
"git_sha": "2df2a11d5b12f2a73bca74f103691bc35d83c5fd",
- "installed_by": [
- "modules"
- ]
+ "installed_by": ["modules"]
},
"gatk4/revertsam": {
"branch": "master",
"git_sha": "911696ea0b62df80e900ef244d7867d177971f73",
- "installed_by": [
- "modules"
- ]
+ "installed_by": ["modules"]
},
"gatk4/samtofastq": {
"branch": "master",
"git_sha": "911696ea0b62df80e900ef244d7867d177971f73",
- "installed_by": [
- "modules"
- ]
+ "installed_by": ["modules"]
},
"gatk4/selectvariants": {
"branch": "master",
"git_sha": "911696ea0b62df80e900ef244d7867d177971f73",
- "installed_by": [
- "modules"
- ]
+ "installed_by": ["modules"]
},
"gatk4/shiftfasta": {
"branch": "master",
"git_sha": "0460d316170f75f323111b4a2c0a2989f0c32013",
- "installed_by": [
- "modules"
- ]
+ "installed_by": ["modules"]
},
"gatk4/splitintervals": {
"branch": "master",
"git_sha": "2df2a11d5b12f2a73bca74f103691bc35d83c5fd",
- "installed_by": [
- "modules"
- ]
+ "installed_by": ["modules"]
},
"gatk4/variantfiltration": {
"branch": "master",
"git_sha": "2df2a11d5b12f2a73bca74f103691bc35d83c5fd",
- "installed_by": [
- "modules"
- ]
+ "installed_by": ["modules"]
},
"genmod/annotate": {
"branch": "master",
"git_sha": "911696ea0b62df80e900ef244d7867d177971f73",
- "installed_by": [
- "modules"
- ]
+ "installed_by": ["modules"]
},
"genmod/compound": {
"branch": "master",
"git_sha": "911696ea0b62df80e900ef244d7867d177971f73",
- "installed_by": [
- "modules"
- ]
+ "installed_by": ["modules"]
},
"genmod/models": {
"branch": "master",
"git_sha": "911696ea0b62df80e900ef244d7867d177971f73",
- "installed_by": [
- "modules"
- ]
+ "installed_by": ["modules"]
},
"genmod/score": {
"branch": "master",
"git_sha": "911696ea0b62df80e900ef244d7867d177971f73",
- "installed_by": [
- "modules"
- ]
+ "installed_by": ["modules"]
},
"glnexus": {
"branch": "master",
"git_sha": "911696ea0b62df80e900ef244d7867d177971f73",
- "installed_by": [
- "modules"
- ]
+ "installed_by": ["modules"]
},
"haplocheck": {
"branch": "master",
"git_sha": "911696ea0b62df80e900ef244d7867d177971f73",
- "installed_by": [
- "modules"
- ]
+ "installed_by": ["modules"]
},
"haplogrep2/classify": {
"branch": "master",
"git_sha": "911696ea0b62df80e900ef244d7867d177971f73",
- "installed_by": [
- "modules"
- ]
+ "installed_by": ["modules"]
},
"manta/germline": {
"branch": "master",
"git_sha": "0460d316170f75f323111b4a2c0a2989f0c32013",
- "installed_by": [
- "modules"
- ]
+ "installed_by": ["modules"]
},
"mosdepth": {
"branch": "master",
"git_sha": "911696ea0b62df80e900ef244d7867d177971f73",
- "installed_by": [
- "modules"
- ]
+ "installed_by": ["modules"]
},
"multiqc": {
"branch": "master",
"git_sha": "911696ea0b62df80e900ef244d7867d177971f73",
- "installed_by": [
- "modules"
- ]
+ "installed_by": ["modules"]
},
"peddy": {
"branch": "master",
"git_sha": "911696ea0b62df80e900ef244d7867d177971f73",
- "installed_by": [
- "modules"
- ]
+ "installed_by": ["modules"]
},
"picard/addorreplacereadgroups": {
"branch": "master",
"git_sha": "911696ea0b62df80e900ef244d7867d177971f73",
- "installed_by": [
- "modules"
- ]
+ "installed_by": ["modules"]
},
"picard/collecthsmetrics": {
"branch": "master",
"git_sha": "0ce3ab0ac301f160225b22254fa238478b4389f2",
- "installed_by": [
- "modules"
- ]
+ "installed_by": ["modules"]
},
"picard/collectmultiplemetrics": {
"branch": "master",
"git_sha": "911696ea0b62df80e900ef244d7867d177971f73",
- "installed_by": [
- "modules"
- ]
+ "installed_by": ["modules"]
},
"picard/collectwgsmetrics": {
"branch": "master",
"git_sha": "735e1e04e7e01751d2d6e97055bbdb6f70683cc1",
- "installed_by": [
- "modules"
- ]
+ "installed_by": ["modules"]
},
"picard/liftovervcf": {
"branch": "master",
"git_sha": "735e1e04e7e01751d2d6e97055bbdb6f70683cc1",
- "installed_by": [
- "modules"
- ]
+ "installed_by": ["modules"]
},
"picard/markduplicates": {
"branch": "master",
"git_sha": "735e1e04e7e01751d2d6e97055bbdb6f70683cc1",
- "installed_by": [
- "modules"
- ]
+ "installed_by": ["modules"]
},
"picard/renamesampleinvcf": {
"branch": "master",
"git_sha": "911696ea0b62df80e900ef244d7867d177971f73",
- "installed_by": [
- "modules"
- ]
+ "installed_by": ["modules"]
},
"picard/sortvcf": {
"branch": "master",
"git_sha": "735e1e04e7e01751d2d6e97055bbdb6f70683cc1",
- "installed_by": [
- "modules"
- ]
+ "installed_by": ["modules"]
},
"qualimap/bamqc": {
"branch": "master",
"git_sha": "911696ea0b62df80e900ef244d7867d177971f73",
- "installed_by": [
- "modules"
- ]
+ "installed_by": ["modules"]
},
"rhocall/annotate": {
"branch": "master",
"git_sha": "911696ea0b62df80e900ef244d7867d177971f73",
- "installed_by": [
- "modules"
- ]
+ "installed_by": ["modules"]
},
"samtools/faidx": {
"branch": "master",
"git_sha": "bf8ff98531167f8245ba5c44ce7d781503ddf936",
- "installed_by": [
- "modules"
- ]
+ "installed_by": ["modules"]
},
"samtools/index": {
"branch": "master",
"git_sha": "911696ea0b62df80e900ef244d7867d177971f73",
- "installed_by": [
- "modules"
- ]
+ "installed_by": ["modules"]
},
"samtools/merge": {
"branch": "master",
"git_sha": "0460d316170f75f323111b4a2c0a2989f0c32013",
- "installed_by": [
- "modules"
- ]
+ "installed_by": ["modules"]
},
"samtools/sort": {
"branch": "master",
"git_sha": "911696ea0b62df80e900ef244d7867d177971f73",
- "installed_by": [
- "modules"
- ]
+ "installed_by": ["modules"]
},
"samtools/stats": {
"branch": "master",
"git_sha": "735e1e04e7e01751d2d6e97055bbdb6f70683cc1",
- "installed_by": [
- "modules"
- ]
+ "installed_by": ["modules"]
},
"samtools/view": {
"branch": "master",
"git_sha": "3ffae3598260a99e8db3207dead9f73f87f90d1f",
- "installed_by": [
- "modules"
- ]
+ "installed_by": ["modules"]
},
"smncopynumbercaller": {
"branch": "master",
"git_sha": "911696ea0b62df80e900ef244d7867d177971f73",
- "installed_by": [
- "modules"
- ]
+ "installed_by": ["modules"]
},
"stranger": {
"branch": "master",
"git_sha": "0260e5d22372eae434816d6970dedf3f5adc0053",
- "installed_by": [
- "modules"
- ]
+ "installed_by": ["modules"]
},
"svdb/merge": {
"branch": "master",
"git_sha": "603ecbd9f45300c9788f197d2a15a005685b4220",
- "installed_by": [
- "modules"
- ]
+ "installed_by": ["modules"]
},
"svdb/query": {
"branch": "master",
"git_sha": "911696ea0b62df80e900ef244d7867d177971f73",
- "installed_by": [
- "modules"
- ]
+ "installed_by": ["modules"]
},
"tabix/bgziptabix": {
"branch": "master",
"git_sha": "01b3b2509d76625b6d6cd613b349fb4777712a15",
- "installed_by": [
- "modules"
- ]
+ "installed_by": ["modules"]
},
"tabix/tabix": {
"branch": "master",
"git_sha": "911696ea0b62df80e900ef244d7867d177971f73",
- "installed_by": [
- "modules"
- ]
+ "installed_by": ["modules"]
},
"tiddit/cov": {
"branch": "master",
"git_sha": "0460d316170f75f323111b4a2c0a2989f0c32013",
- "installed_by": [
- "modules"
- ]
+ "installed_by": ["modules"]
},
"tiddit/sv": {
"branch": "master",
"git_sha": "911696ea0b62df80e900ef244d7867d177971f73",
- "installed_by": [
- "modules"
- ]
+ "installed_by": ["modules"]
},
"ucsc/wigtobigwig": {
"branch": "master",
"git_sha": "911696ea0b62df80e900ef244d7867d177971f73",
- "installed_by": [
- "modules"
- ]
+ "installed_by": ["modules"]
},
"untar": {
"branch": "master",
"git_sha": "5c460c5a4736974abde2843294f35307ee2b0e5e",
- "installed_by": [
- "modules"
- ]
+ "installed_by": ["modules"]
},
"vcfanno": {
"branch": "master",
"git_sha": "911696ea0b62df80e900ef244d7867d177971f73",
- "installed_by": [
- "modules"
- ]
+ "installed_by": ["modules"]
}
}
}
}
}
-}
\ No newline at end of file
+}
From 4b6b9eac055a2d37250e12c477e8996cea4aa099 Mon Sep 17 00:00:00 2001
From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com>
Date: Sat, 27 May 2023 23:06:31 +0200
Subject: [PATCH 7/8] update conf
---
conf/modules/prepare_references.config | 16 +++---
.../local/mitochondria/align_and_call_MT.nf | 10 ++--
subworkflows/local/prepare_references.nf | 51 +++++++++++--------
3 files changed, 44 insertions(+), 33 deletions(-)
diff --git a/conf/modules/prepare_references.config b/conf/modules/prepare_references.config
index af73549a..a825250c 100644
--- a/conf/modules/prepare_references.config
+++ b/conf/modules/prepare_references.config
@@ -29,16 +29,16 @@ process {
ext.when = {!params.bwamem2 && params.aligner == "bwamem2"}
}
- withName: '.*PREPARE_REFERENCES:BWAMEM2_INDEX_SHIFT_MT' {
- ext.when = {!params.mt_bwamem2_index_shift && params.mt_fasta_shift && !(params.analysis_type == "wes") && params.aligner == "bwamem2"}
+ withName: '.*PREPARE_REFERENCES:BWAMEM2_INDEX_MT_SHIFT' {
+ ext.when = { !params.skip_mt_analysis && !(params.analysis_type == "wes") && params.aligner == "bwamem2"}
}
withName: '.*PREPARE_REFERENCES:SENTIEON_BWAINDEX_GENOME' {
ext.when = {!params.bwa && params.aligner == "sentieon"}
}
- withName: '.*PREPARE_REFERENCES:SENTIEON_BWAINDEX_SHIFT_MT' {
- ext.when = {!params.mt_bwa_index_shift && params.mt_fasta_shift && !(params.analysis_type == "wes") && params.aligner == "sentieon"}
+ withName: '.*PREPARE_REFERENCES:SENTIEON_BWAINDEX_MT_SHIFT' {
+ ext.when = { !params.skip_mt_analysis && !(params.analysis_type == "wes") && params.aligner == "sentieon"}
}
withName: '.*PREPARE_REFERENCES:BWA_INDEX_GENOME' {
@@ -54,8 +54,8 @@ process {
ext.when = {!params.mt_fasta && !params.skip_mt_analysis}
}
- withName: '.*PREPARE_REFERENCES:SAMTOOLS_FAIDX_SHIFT_MT' {
- ext.when = {!params.mt_fai_shift && params.mt_fasta_shift && !(params.analysis_type == "wes")}
+ withName: '.*PREPARE_REFERENCES:SAMTOOLS_FAIDX_MT_SHIFT' {
+ ext.when = { !params.skip_mt_analysis && !(params.analysis_type == "wes")}
}
withName: '.*PREPARE_REFERENCES:GATK_SD' {
@@ -66,8 +66,8 @@ process {
ext.args = { "--interval-file-name ${meta.id}_mt" }
}
- withName: '.*PREPARE_REFERENCES:GATK_SD_SHIFT_MT' {
- ext.when = {!params.mt_sequence_dictionary_shift && params.mt_fasta_shift && !(params.analysis_type == "wes")}
+ withName: '.*PREPARE_REFERENCES:GATK_SD_MT_SHIFT' {
+ ext.when = { !params.skip_mt_analysis && !(params.analysis_type == "wes")}
}
withName: '.*PREPARE_REFERENCES:GET_CHROM_SIZES' {
diff --git a/subworkflows/local/mitochondria/align_and_call_MT.nf b/subworkflows/local/mitochondria/align_and_call_MT.nf
index 7121168c..dc629d84 100644
--- a/subworkflows/local/mitochondria/align_and_call_MT.nf
+++ b/subworkflows/local/mitochondria/align_and_call_MT.nf
@@ -28,14 +28,16 @@ workflow ALIGN_AND_CALL_MT {
main:
ch_versions = Channel.empty()
- BWAMEM2_MEM_MT (ch_fastq , ch_bwamem2index, true)
+ BWAMEM2_MEM_MT (ch_fastq, ch_bwamem2index, true)
SENTIEON_BWAMEM_MT ( ch_fastq, ch_fasta, ch_fai, ch_bwaindex )
- ch_mt_bam = Channel.empty().mix(BWAMEM2_MEM_MT.out.bam, SENTIEON_BWAMEM_MT.out.bam)
- ch_fastq_ubam = ch_mt_bam.join(ch_ubam, failOnMismatch:true, failOnDuplicate:true)
+ Channel.empty()
+ .mix(BWAMEM2_MEM_MT.out.bam, SENTIEON_BWAMEM_MT.out.bam)
+ .join(ch_ubam, failOnMismatch:true, failOnDuplicate:true)
+ .set {ch_bam_ubam}
- GATK4_MERGEBAMALIGNMENT_MT (ch_fastq_ubam, ch_fasta, ch_dict)
+ GATK4_MERGEBAMALIGNMENT_MT (ch_bam_ubam, ch_fasta, ch_dict)
PICARD_ADDORREPLACEREADGROUPS_MT (GATK4_MERGEBAMALIGNMENT_MT.out.bam)
diff --git a/subworkflows/local/prepare_references.nf b/subworkflows/local/prepare_references.nf
index f9a8fb47..54160fdc 100644
--- a/subworkflows/local/prepare_references.nf
+++ b/subworkflows/local/prepare_references.nf
@@ -57,11 +57,20 @@ workflow PREPARE_REFERENCES {
GATK_SHIFTFASTA(ch_mt_fasta_in, SAMTOOLS_FAIDX_MT_SHIFT.out.fai, GATK_SD_MT_SHIFT.out.dict)
BWAMEM2_INDEX_MT_SHIFT(GATK_SHIFTFASTA.out.shift_fa)
SENTIEON_BWAINDEX_MT_SHIFT(GATK_SHIFTFASTA.out.shift_fa)
+ GATK_SHIFTFASTA.out.intervals
+ .multiMap{ meta, files ->
+ shift_intervals:
+ ind = files.findIndexValues {it.toString().endsWith("shifted.intervals")}
+ files[ind]
+ intervals:
+ ind = files.findIndexValues {!(it.toString().endsWith("shifted.intervals"))}
+ files[ind]
+ }
+ .set {ch_shiftfasta_mtintervals}
// Vcf, tab and bed indices
TABIX_DBSNP(ch_known_dbsnp)
ch_versions = ch_versions.mix(TABIX_DBSNP.out.versions)
-
TABIX_GNOMAD_AF(ch_gnomad_af_tab)
TABIX_PT(ch_target_bed).tbi.set { ch_tbi }
TABIX_PBT(ch_target_bed).gz_tbi.set { ch_bgzip_tbi }
@@ -101,28 +110,28 @@ workflow PREPARE_REFERENCES {
ch_versions = ch_versions.mix(UNTAR_VEP_CACHE.out.versions)
emit:
- genome_bwa_index = Channel.empty().mix(ch_bwa, ch_sentieonbwa).collect() // channel: [ val(meta), path(index) ]
- genome_bwamem2_index = BWAMEM2_INDEX_GENOME.out.index.collect() // channel: [ val(meta), path(index) ]
- genome_chrom_sizes = GET_CHROM_SIZES.out.sizes.collect() // channel: [ path(sizes) ]
- genome_fai = ch_fai // channel: [ val(meta), path(fai) ]
- genome_dict = GATK_SD.out.dict.collect() // channel: [ path(dict) ]
+ genome_bwa_index = Channel.empty().mix(ch_bwa, ch_sentieonbwa).collect() // channel: [ val(meta), path(index) ]
+ genome_bwamem2_index = BWAMEM2_INDEX_GENOME.out.index.collect() // channel: [ val(meta), path(index) ]
+ genome_chrom_sizes = GET_CHROM_SIZES.out.sizes.collect() // channel: [ path(sizes) ]
+ genome_fai = ch_fai // channel: [ val(meta), path(fai) ]
+ genome_dict = GATK_SD.out.dict.collect() // channel: [ path(dict) ]
- mt_intervals = GATK_SHIFTFASTA.out.intervals.map{ meta, it -> it}.collect() // channel: [ path(intervals) ]
- mtshift_intervals = GATK_SHIFTFASTA.out.shift_intervals.map{ meta, it -> it}.collect() // channel: [ path(intervals) ]
- mtshift_backchain = GATK_SHIFTFASTA.out.shift_back_chain.collect()
- mtshift_fai = GATK_SHIFTFASTA.out.shift_fai.collect() // channel: [ val(meta), path(fai) ]
- mtshift_fasta = GATK_SHIFTFASTA.out.shift_fa.collect() // channel: [ val(meta), path(fai) ]
- mtshift_dict = GATK_SHIFTFASTA.out.dict.collect() // channel: [ path(dict) ]
- mtshift_bwa_index = SENTIEON_BWAINDEX_MT_SHIFT.out.index.collect() // channel: [ val(meta), path(index) ]
- mtshift_bwamem2_index = BWAMEM2_INDEX_MT_SHIFT.out.index.collect() // channel: [ val(meta), path(index) ]
+ mt_intervals = ch_shiftfasta_mtintervals.intervals.collect() // channel: [ path(intervals) ]
+ mtshift_intervals = ch_shiftfasta_mtintervals.shift_intervals.collect() // channel: [ path(intervals) ]
+ mtshift_backchain = GATK_SHIFTFASTA.out.shift_back_chain.collect() // channel: [ val(meta), path(backchain) ]
+ mtshift_fai = GATK_SHIFTFASTA.out.shift_fai.collect() // channel: [ val(meta), path(fai) ]
+ mtshift_fasta = GATK_SHIFTFASTA.out.shift_fa.collect() // channel: [ val(meta), path(fai) ]
+ mtshift_dict = GATK_SHIFTFASTA.out.dict.collect() // channel: [ path(dict) ]
+ mtshift_bwa_index = SENTIEON_BWAINDEX_MT_SHIFT.out.index.collect() // channel: [ val(meta), path(index) ]
+ mtshift_bwamem2_index = BWAMEM2_INDEX_MT_SHIFT.out.index.collect() // channel: [ val(meta), path(index) ]
- gnomad_af_idx = TABIX_GNOMAD_AF.out.tbi.collect() // channel: [ val(meta), path(fasta) ]
- known_dbsnp_tbi = TABIX_DBSNP.out.tbi.collect() // channel: [ val(meta), path(fasta) ]
- target_bed = Channel.empty().mix(ch_tbi, ch_bgzip_tbi).collect() // channel: [ val(meta), path(bed), path(tbi) ]
- bait_intervals = CAT_CAT_BAIT.out.file_out.map{ meta, inter -> inter}.collect() // channel: [ path(intervals) ]
- target_intervals = GATK_BILT.out.interval_list.map{ meta, inter -> inter}.collect() // channel: [ path(interval_list) ]
- vep_resources = UNTAR_VEP_CACHE.out.untar.map{meta, files -> [files]}.collect() // channel: [ path(cache) ]
- versions = ch_versions // channel: [ path(versions.yml) ]
+ gnomad_af_idx = TABIX_GNOMAD_AF.out.tbi.collect() // channel: [ val(meta), path(fasta) ]
+ known_dbsnp_tbi = TABIX_DBSNP.out.tbi.collect() // channel: [ val(meta), path(fasta) ]
+ target_bed = Channel.empty().mix(ch_tbi, ch_bgzip_tbi).collect() // channel: [ val(meta), path(bed), path(tbi) ]
+ bait_intervals = CAT_CAT_BAIT.out.file_out.map{ meta, inter -> inter}.collect() // channel: [ path(intervals) ]
+ target_intervals = GATK_BILT.out.interval_list.map{ meta, inter -> inter}.collect() // channel: [ path(interval_list) ]
+ vep_resources = UNTAR_VEP_CACHE.out.untar.map{meta, files -> [files]}.collect() // channel: [ path(cache) ]
+ versions = ch_versions // channel: [ path(versions.yml) ]
}
From 48af7d5167ac0ab8b632a59df50673e9cf167c1a Mon Sep 17 00:00:00 2001
From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com>
Date: Mon, 29 May 2023 10:39:51 +0200
Subject: [PATCH 8/8] update faidx
---
modules.json | 2 +-
modules/nf-core/samtools/faidx/main.nf | 4 ++++
2 files changed, 5 insertions(+), 1 deletion(-)
diff --git a/modules.json b/modules.json
index 5b778895..a061424c 100644
--- a/modules.json
+++ b/modules.json
@@ -267,7 +267,7 @@
},
"samtools/faidx": {
"branch": "master",
- "git_sha": "bf8ff98531167f8245ba5c44ce7d781503ddf936",
+ "git_sha": "fd742419940e01ba1c5ecb172c3e32ec840662fe",
"installed_by": ["modules"]
},
"samtools/index": {
diff --git a/modules/nf-core/samtools/faidx/main.nf b/modules/nf-core/samtools/faidx/main.nf
index c1e8ef3a..59ed3088 100644
--- a/modules/nf-core/samtools/faidx/main.nf
+++ b/modules/nf-core/samtools/faidx/main.nf
@@ -35,8 +35,12 @@ process SAMTOOLS_FAIDX {
"""
stub:
+ def match = (task.ext.args =~ /-o(?:utput)?\s(.*)\s?/).findAll()
+ def fastacmd = match[0] ? "touch ${match[0][1]}" : ''
"""
+ ${fastacmd}
touch ${fasta}.fai
+
cat <<-END_VERSIONS > versions.yml
"${task.process}":