diff --git a/CHANGELOG.md b/CHANGELOG.md
index 72edfc62..75f0eee0 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -7,6 +7,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
### `Added`
+- A new parameter `par_bed` to pass a PAR bed files to deepvariant [#598](https://github.com/nf-core/raredisease/pull/598)
- A new functionality to pass gzipped resources to vcfanno_extra_resources [#589](https://github.com/nf-core/raredisease/pull/589)
- A new parameter `vcfanno_extra_resources` to pass an extra resource to vcfanno [#588](https://github.com/nf-core/raredisease/pull/588)
- A new parameter `scatter_count` to control how many interval files are created from a genome (used to parallelize annotations) [#585](https://github.com/nf-core/raredisease/pull/585)
@@ -19,6 +20,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
### `Changed`
+- Males' X and Y chromosomes will be treated as haploids during variant calling by deepvariant [#598](https://github.com/nf-core/raredisease/pull/598)
- Acceptable type for lane field in the samplesheet from number to string [#597](https://github.com/nf-core/raredisease/pull/597)
- Allow `0` as a valid value for `sex` in the samplesheet [#595](https://github.com/nf-core/raredisease/pull/595)
- Updated deepvariant to version 1.6.1 [#587](https://github.com/nf-core/raredisease/pull/587)
@@ -57,6 +59,12 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
| | scatter_count |
| | vcfanno_extra_resources |
+### Tool updates
+
+| Tool | Old version | New version |
+| ----------- | ----------- | ----------- |
+| Deepvariant | 1.5.0 | 1.6.1 |
+
## 2.1.0 - Obelix [2024-05-29]
### `Added`
diff --git a/conf/modules/call_snv_deepvariant.config b/conf/modules/call_snv_deepvariant.config
index 02c2b384..348e95cc 100644
--- a/conf/modules/call_snv_deepvariant.config
+++ b/conf/modules/call_snv_deepvariant.config
@@ -22,7 +22,10 @@ process {
}
withName: '.*CALL_SNV_DEEPVARIANT:DEEPVARIANT' {
- ext.args = { "--model_type=${params.analysis_type.toUpperCase()}" }
+ ext.args = { [
+ "--model_type=${params.analysis_type.toUpperCase()}",
+ meta.sex == "1" ? params.genome == 'GRCh37' ? '--haploid_contigs="X,Y"' : '--haploid_contigs="chrX,chrY"' : ''
+ ].join(' ') }
ext.prefix = { "${meta.id}_deepvar" }
}
diff --git a/docs/usage.md b/docs/usage.md
index 550b0e6e..4bbff0b5 100644
--- a/docs/usage.md
+++ b/docs/usage.md
@@ -201,10 +201,12 @@ The mandatory and optional parameters for each category are tabulated below.
| ml_model2 | known_dbsnp_tbi2 |
| analysis_type3 | call_interval2 |
| | known_dbsnp_tbi2 |
+| | par_bed4 |
1Default variant caller is DeepVariant, but you have the option to use Sentieon as well.
2These parameters are only used by Sentieon.
3Default is WGS, but you have the option to choose WES as well.
+4This parameter is only used by Deepvariant.
##### 5. Variant calling - Structural variants
diff --git a/main.nf b/main.nf
index 96fced9c..1e80e917 100644
--- a/main.nf
+++ b/main.nf
@@ -39,6 +39,7 @@ params.mobile_element_references = getGenomeAttribute('mobile_element_refe
params.mobile_element_svdb_annotations = getGenomeAttribute('mobile_element_svdb_annotations')
params.ml_model = getGenomeAttribute('ml_model')
params.mt_fasta = getGenomeAttribute('mt_fasta')
+params.par_bed = getGenomeAttribute('par_bed')
params.ploidy_model = getGenomeAttribute('ploidy_model')
params.reduced_penetrance = getGenomeAttribute('reduced_penetrance')
params.readcount_intervals = getGenomeAttribute('readcount_intervals')
diff --git a/modules.json b/modules.json
index 9fe5ae84..64ceca8d 100644
--- a/modules.json
+++ b/modules.json
@@ -113,7 +113,7 @@
},
"deepvariant": {
"branch": "master",
- "git_sha": "ccf06a896339d2a4ed8594daa9f9adb827cb8189",
+ "git_sha": "a7e8b8afd4fa82f20d745fa778bfdbf39c1f7efb",
"installed_by": ["modules"]
},
"eklipse": {
diff --git a/modules/nf-core/deepvariant/main.nf b/modules/nf-core/deepvariant/main.nf
index a560cbe9..8d3d0911 100644
--- a/modules/nf-core/deepvariant/main.nf
+++ b/modules/nf-core/deepvariant/main.nf
@@ -12,6 +12,7 @@ process DEEPVARIANT {
tuple val(meta2), path(fasta)
tuple val(meta3), path(fai)
tuple val(meta4), path(gzi)
+ tuple val(meta5), path(par_bed)
output:
tuple val(meta), path("${prefix}.vcf.gz") , emit: vcf
@@ -31,6 +32,7 @@ process DEEPVARIANT {
def args = task.ext.args ?: ''
prefix = task.ext.prefix ?: "${meta.id}"
def regions = intervals ? "--regions=${intervals}" : ""
+ def par_regions = par_bed ? "--par_regions_bed=${par_bed}" : ""
// WARN https://github.com/nf-core/modules/pull/5801#issuecomment-2194293755
// FIXME Revert this on next version bump
def VERSION = '1.6.1'
@@ -43,6 +45,7 @@ process DEEPVARIANT {
--output_gvcf=${prefix}.g.vcf.gz \\
${args} \\
${regions} \\
+ ${par_regions} \\
--intermediate_results_dir=tmp \\
--num_shards=${task.cpus}
diff --git a/modules/nf-core/deepvariant/meta.yml b/modules/nf-core/deepvariant/meta.yml
index a50dc57d..2327dd5f 100644
--- a/modules/nf-core/deepvariant/meta.yml
+++ b/modules/nf-core/deepvariant/meta.yml
@@ -57,6 +57,15 @@ input:
type: file
description: GZI index of reference fasta file
pattern: "*.gzi"
+ - meta5:
+ type: map
+ description: |
+ Groovy Map containing reference information
+ e.g. [ id:'genome' ]
+ - par_bed:
+ type: file
+ description: BED file containing PAR regions
+ pattern: "*.bed"
output:
- meta:
type: map
diff --git a/modules/nf-core/deepvariant/tests/main.nf.test b/modules/nf-core/deepvariant/tests/main.nf.test
index 91612c1e..17765233 100644
--- a/modules/nf-core/deepvariant/tests/main.nf.test
+++ b/modules/nf-core/deepvariant/tests/main.nf.test
@@ -31,6 +31,9 @@ nextflow_process {
input[3] = [
[],[]
]
+ input[4] = [
+ [],[]
+ ]
"""
}
}
@@ -66,6 +69,48 @@ nextflow_process {
input[3] = [
[],[]
]
+ input[4] = [
+ [],[]
+ ]
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert process.success },
+ { assert snapshot(process.out).match() }
+ )
+ }
+ }
+
+ test("homo_sapiens - [cram, crai, genome_bed] - fasta - fai - par_bed") {
+ config "./nextflow-non-autosomal-calling.config"
+ tag "test"
+ when {
+ process {
+ """
+ input[0] = [
+ [ id:'test', single_end:false ], // meta map
+ file(params.modules_testdata_base_path + '/genomics/homo_sapiens/illumina/cram/test.paired_end.sorted.cram', checkIfExists: true),
+ file(params.modules_testdata_base_path + '/genomics/homo_sapiens/illumina/cram/test.paired_end.sorted.cram.crai', checkIfExists: true),
+ file(params.modules_testdata_base_path + '/genomics/homo_sapiens/genome/genome.bed', checkIfExists: true)
+ ]
+ input[1] = [
+ [ id:'genome'],
+ file(params.modules_testdata_base_path + '/genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true)
+ ]
+ input[2] = [
+ [ id:'genome'],
+ file(params.modules_testdata_base_path + '/genomics/homo_sapiens/genome/genome.fasta.fai', checkIfExists: true)
+ ]
+ input[3] = [
+ [],[]
+ ]
+ input[4] = [
+ [ id:'genome'],
+ file(params.modules_testdata_base_path + '/genomics/homo_sapiens/genome/genome.blacklist_intervals.bed', checkIfExists: true)
+ ]
"""
}
}
@@ -102,6 +147,9 @@ nextflow_process {
[ id:'genome'],
file(params.modules_testdata_base_path + '/genomics/homo_sapiens/genome/genome.fasta.gz.gzi', checkIfExists: true)
]
+ input[4] = [
+ [],[]
+ ]
"""
}
}
diff --git a/modules/nf-core/deepvariant/tests/main.nf.test.snap b/modules/nf-core/deepvariant/tests/main.nf.test.snap
index c49f7e4d..04f87774 100644
--- a/modules/nf-core/deepvariant/tests/main.nf.test.snap
+++ b/modules/nf-core/deepvariant/tests/main.nf.test.snap
@@ -265,5 +265,94 @@
"nextflow": "24.04.2"
},
"timestamp": "2024-07-01T12:09:13.952808655"
+ },
+ "homo_sapiens - [cram, crai, genome_bed] - fasta - fai - par_bed": {
+ "content": [
+ {
+ "0": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test_out.vcf.gz:md5,d2e26d65dbbcea9b087ed191b5c9841c"
+ ]
+ ],
+ "1": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test_out.vcf.gz.tbi:md5,0801296d0356415bbf1ef8deb4ec84c3"
+ ]
+ ],
+ "2": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test_out.g.vcf.gz:md5,4fcaa9a8b55730d191382160c2b5bb0a"
+ ]
+ ],
+ "3": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test_out.g.vcf.gz.tbi:md5,f468e846904733b3231ecf00ef7cd4a2"
+ ]
+ ],
+ "4": [
+ "versions.yml:md5,efbdcb1ad96e0209b31bcafedd0017a8"
+ ],
+ "gvcf": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test_out.g.vcf.gz:md5,4fcaa9a8b55730d191382160c2b5bb0a"
+ ]
+ ],
+ "gvcf_tbi": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test_out.g.vcf.gz.tbi:md5,f468e846904733b3231ecf00ef7cd4a2"
+ ]
+ ],
+ "vcf": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test_out.vcf.gz:md5,d2e26d65dbbcea9b087ed191b5c9841c"
+ ]
+ ],
+ "vcf_tbi": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test_out.vcf.gz.tbi:md5,0801296d0356415bbf1ef8deb4ec84c3"
+ ]
+ ],
+ "versions": [
+ "versions.yml:md5,efbdcb1ad96e0209b31bcafedd0017a8"
+ ]
+ }
+ ],
+ "meta": {
+ "nf-test": "0.9.0",
+ "nextflow": "24.04.3"
+ },
+ "timestamp": "2024-07-23T14:29:24.939680679"
}
-}
\ No newline at end of file
+}
diff --git a/modules/nf-core/deepvariant/tests/nextflow-non-autosomal-calling.config b/modules/nf-core/deepvariant/tests/nextflow-non-autosomal-calling.config
new file mode 100644
index 00000000..4be8986b
--- /dev/null
+++ b/modules/nf-core/deepvariant/tests/nextflow-non-autosomal-calling.config
@@ -0,0 +1,8 @@
+process {
+
+ withName: DEEPVARIANT {
+ ext.args = '--model_type=WGS --haploid_contigs chr22'
+ ext.prefix = { "${meta.id}_out" }
+ }
+
+}
diff --git a/nextflow_schema.json b/nextflow_schema.json
index f2c79a4f..dc91e7a4 100644
--- a/nextflow_schema.json
+++ b/nextflow_schema.json
@@ -267,6 +267,14 @@
"description": "Path to mitochondrial FASTA genome file.",
"fa_icon": "fas fa-file"
},
+ "par_bed": {
+ "type": "string",
+ "exists": true,
+ "format": "path",
+ "fa_icon": "fas fa-file",
+ "pattern": "^\\S+\\.bed(\\.gz)?$",
+ "description": "Path to a BED file containing PAR regions (used by deepvariant)."
+ },
"ploidy_model": {
"type": "string",
"exists": true,
diff --git a/subworkflows/local/call_snv.nf b/subworkflows/local/call_snv.nf
index 48bc500a..5d0b2ce9 100644
--- a/subworkflows/local/call_snv.nf
+++ b/subworkflows/local/call_snv.nf
@@ -28,6 +28,7 @@ workflow CALL_SNV {
ch_dbsnp_tbi // channel: [optional] [ val(meta), path(tbi) ]
ch_call_interval // channel: [mandatory] [ path(intervals) ]
ch_ml_model // channel: [mandatory] [ path(model) ]
+ ch_par_bed // channel: [optional] [ val(meta), path(bed) ]
ch_case_info // channel: [mandatory] [ val(case_info) ]
ch_foundin_header // channel: [mandatory] [ path(header) ]
ch_pcr_indel_model // channel: [optional] [ val(sentieon_dnascope_pcr_indel_model) ]
@@ -50,6 +51,7 @@ workflow CALL_SNV {
ch_genome_bam_bai,
ch_genome_fasta,
ch_genome_fai,
+ ch_par_bed,
ch_case_info,
ch_foundin_header,
ch_genome_chrsizes
diff --git a/subworkflows/local/variant_calling/call_snv_deepvariant.nf b/subworkflows/local/variant_calling/call_snv_deepvariant.nf
index c3477d67..3104ee67 100644
--- a/subworkflows/local/variant_calling/call_snv_deepvariant.nf
+++ b/subworkflows/local/variant_calling/call_snv_deepvariant.nf
@@ -16,6 +16,7 @@ workflow CALL_SNV_DEEPVARIANT {
ch_bam_bai // channel: [mandatory] [ val(meta), path(bam), path(bai) ]
ch_genome_fasta // channel: [mandatory] [ val(meta), path(fasta) ]
ch_genome_fai // channel: [mandatory] [ val(meta), path(fai) ]
+ ch_par_bed // channel: [optional] [ val(meta), path(bed) ]
ch_case_info // channel: [mandatory] [ val(case_info) ]
ch_foundin_header // channel: [mandatory] [ path(header) ]
ch_genome_chrsizes // channel: [mandatory] [ path(chrsizes) ]
@@ -28,7 +29,7 @@ workflow CALL_SNV_DEEPVARIANT {
}
.set { ch_deepvar_in }
- DEEPVARIANT ( ch_deepvar_in, ch_genome_fasta, ch_genome_fai, [[],[]] )
+ DEEPVARIANT ( ch_deepvar_in, ch_genome_fasta, ch_genome_fai, [[],[]], ch_par_bed )
DEEPVARIANT.out.gvcf
.collect{it[1]}
.toList()
diff --git a/workflows/raredisease.nf b/workflows/raredisease.nf
index 864eaf0c..0234272a 100644
--- a/workflows/raredisease.nf
+++ b/workflows/raredisease.nf
@@ -279,6 +279,8 @@ workflow RAREDISEASE {
ch_mtshift_fai = ch_references.mtshift_fai
ch_mtshift_fasta = ch_references.mtshift_fasta
ch_mtshift_intervals = ch_references.mtshift_intervals
+ ch_par_bed = params.par_bed ? Channel.fromPath(params.par_bed).map{ it -> [[id:'par_bed'], it] }.collect()
+ : Channel.value([[],[]])
ch_ploidy_model = params.ploidy_model ? Channel.fromPath(params.ploidy_model).map{ it -> [[id:it[0].simpleName], it] }.collect()
: Channel.empty()
ch_readcount_intervals = params.readcount_intervals ? Channel.fromPath(params.readcount_intervals).collect()
@@ -502,6 +504,7 @@ workflow RAREDISEASE {
ch_dbsnp_tbi,
ch_call_interval,
ch_ml_model,
+ ch_par_bed,
ch_case_info,
ch_foundin_header,
Channel.value(params.sentieon_dnascope_pcr_indel_model)