diff --git a/CHANGELOG.md b/CHANGELOG.md index 72edfc62..75f0eee0 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### `Added` +- A new parameter `par_bed` to pass a PAR bed files to deepvariant [#598](https://github.com/nf-core/raredisease/pull/598) - A new functionality to pass gzipped resources to vcfanno_extra_resources [#589](https://github.com/nf-core/raredisease/pull/589) - A new parameter `vcfanno_extra_resources` to pass an extra resource to vcfanno [#588](https://github.com/nf-core/raredisease/pull/588) - A new parameter `scatter_count` to control how many interval files are created from a genome (used to parallelize annotations) [#585](https://github.com/nf-core/raredisease/pull/585) @@ -19,6 +20,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### `Changed` +- Males' X and Y chromosomes will be treated as haploids during variant calling by deepvariant [#598](https://github.com/nf-core/raredisease/pull/598) - Acceptable type for lane field in the samplesheet from number to string [#597](https://github.com/nf-core/raredisease/pull/597) - Allow `0` as a valid value for `sex` in the samplesheet [#595](https://github.com/nf-core/raredisease/pull/595) - Updated deepvariant to version 1.6.1 [#587](https://github.com/nf-core/raredisease/pull/587) @@ -57,6 +59,12 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 | | scatter_count | | | vcfanno_extra_resources | +### Tool updates + +| Tool | Old version | New version | +| ----------- | ----------- | ----------- | +| Deepvariant | 1.5.0 | 1.6.1 | + ## 2.1.0 - Obelix [2024-05-29] ### `Added` diff --git a/conf/modules/call_snv_deepvariant.config b/conf/modules/call_snv_deepvariant.config index 02c2b384..348e95cc 100644 --- a/conf/modules/call_snv_deepvariant.config +++ b/conf/modules/call_snv_deepvariant.config @@ -22,7 +22,10 @@ process { } withName: '.*CALL_SNV_DEEPVARIANT:DEEPVARIANT' { - ext.args = { "--model_type=${params.analysis_type.toUpperCase()}" } + ext.args = { [ + "--model_type=${params.analysis_type.toUpperCase()}", + meta.sex == "1" ? params.genome == 'GRCh37' ? '--haploid_contigs="X,Y"' : '--haploid_contigs="chrX,chrY"' : '' + ].join(' ') } ext.prefix = { "${meta.id}_deepvar" } } diff --git a/docs/usage.md b/docs/usage.md index 550b0e6e..4bbff0b5 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -201,10 +201,12 @@ The mandatory and optional parameters for each category are tabulated below. | ml_model2 | known_dbsnp_tbi2 | | analysis_type3 | call_interval2 | | | known_dbsnp_tbi2 | +| | par_bed4 | 1Default variant caller is DeepVariant, but you have the option to use Sentieon as well.
2These parameters are only used by Sentieon.
3Default is WGS, but you have the option to choose WES as well.
+4This parameter is only used by Deepvariant.
##### 5. Variant calling - Structural variants diff --git a/main.nf b/main.nf index 96fced9c..1e80e917 100644 --- a/main.nf +++ b/main.nf @@ -39,6 +39,7 @@ params.mobile_element_references = getGenomeAttribute('mobile_element_refe params.mobile_element_svdb_annotations = getGenomeAttribute('mobile_element_svdb_annotations') params.ml_model = getGenomeAttribute('ml_model') params.mt_fasta = getGenomeAttribute('mt_fasta') +params.par_bed = getGenomeAttribute('par_bed') params.ploidy_model = getGenomeAttribute('ploidy_model') params.reduced_penetrance = getGenomeAttribute('reduced_penetrance') params.readcount_intervals = getGenomeAttribute('readcount_intervals') diff --git a/modules.json b/modules.json index 9fe5ae84..64ceca8d 100644 --- a/modules.json +++ b/modules.json @@ -113,7 +113,7 @@ }, "deepvariant": { "branch": "master", - "git_sha": "ccf06a896339d2a4ed8594daa9f9adb827cb8189", + "git_sha": "a7e8b8afd4fa82f20d745fa778bfdbf39c1f7efb", "installed_by": ["modules"] }, "eklipse": { diff --git a/modules/nf-core/deepvariant/main.nf b/modules/nf-core/deepvariant/main.nf index a560cbe9..8d3d0911 100644 --- a/modules/nf-core/deepvariant/main.nf +++ b/modules/nf-core/deepvariant/main.nf @@ -12,6 +12,7 @@ process DEEPVARIANT { tuple val(meta2), path(fasta) tuple val(meta3), path(fai) tuple val(meta4), path(gzi) + tuple val(meta5), path(par_bed) output: tuple val(meta), path("${prefix}.vcf.gz") , emit: vcf @@ -31,6 +32,7 @@ process DEEPVARIANT { def args = task.ext.args ?: '' prefix = task.ext.prefix ?: "${meta.id}" def regions = intervals ? "--regions=${intervals}" : "" + def par_regions = par_bed ? "--par_regions_bed=${par_bed}" : "" // WARN https://github.com/nf-core/modules/pull/5801#issuecomment-2194293755 // FIXME Revert this on next version bump def VERSION = '1.6.1' @@ -43,6 +45,7 @@ process DEEPVARIANT { --output_gvcf=${prefix}.g.vcf.gz \\ ${args} \\ ${regions} \\ + ${par_regions} \\ --intermediate_results_dir=tmp \\ --num_shards=${task.cpus} diff --git a/modules/nf-core/deepvariant/meta.yml b/modules/nf-core/deepvariant/meta.yml index a50dc57d..2327dd5f 100644 --- a/modules/nf-core/deepvariant/meta.yml +++ b/modules/nf-core/deepvariant/meta.yml @@ -57,6 +57,15 @@ input: type: file description: GZI index of reference fasta file pattern: "*.gzi" + - meta5: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'genome' ] + - par_bed: + type: file + description: BED file containing PAR regions + pattern: "*.bed" output: - meta: type: map diff --git a/modules/nf-core/deepvariant/tests/main.nf.test b/modules/nf-core/deepvariant/tests/main.nf.test index 91612c1e..17765233 100644 --- a/modules/nf-core/deepvariant/tests/main.nf.test +++ b/modules/nf-core/deepvariant/tests/main.nf.test @@ -31,6 +31,9 @@ nextflow_process { input[3] = [ [],[] ] + input[4] = [ + [],[] + ] """ } } @@ -66,6 +69,48 @@ nextflow_process { input[3] = [ [],[] ] + input[4] = [ + [],[] + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("homo_sapiens - [cram, crai, genome_bed] - fasta - fai - par_bed") { + config "./nextflow-non-autosomal-calling.config" + tag "test" + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + '/genomics/homo_sapiens/illumina/cram/test.paired_end.sorted.cram', checkIfExists: true), + file(params.modules_testdata_base_path + '/genomics/homo_sapiens/illumina/cram/test.paired_end.sorted.cram.crai', checkIfExists: true), + file(params.modules_testdata_base_path + '/genomics/homo_sapiens/genome/genome.bed', checkIfExists: true) + ] + input[1] = [ + [ id:'genome'], + file(params.modules_testdata_base_path + '/genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) + ] + input[2] = [ + [ id:'genome'], + file(params.modules_testdata_base_path + '/genomics/homo_sapiens/genome/genome.fasta.fai', checkIfExists: true) + ] + input[3] = [ + [],[] + ] + input[4] = [ + [ id:'genome'], + file(params.modules_testdata_base_path + '/genomics/homo_sapiens/genome/genome.blacklist_intervals.bed', checkIfExists: true) + ] """ } } @@ -102,6 +147,9 @@ nextflow_process { [ id:'genome'], file(params.modules_testdata_base_path + '/genomics/homo_sapiens/genome/genome.fasta.gz.gzi', checkIfExists: true) ] + input[4] = [ + [],[] + ] """ } } diff --git a/modules/nf-core/deepvariant/tests/main.nf.test.snap b/modules/nf-core/deepvariant/tests/main.nf.test.snap index c49f7e4d..04f87774 100644 --- a/modules/nf-core/deepvariant/tests/main.nf.test.snap +++ b/modules/nf-core/deepvariant/tests/main.nf.test.snap @@ -265,5 +265,94 @@ "nextflow": "24.04.2" }, "timestamp": "2024-07-01T12:09:13.952808655" + }, + "homo_sapiens - [cram, crai, genome_bed] - fasta - fai - par_bed": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test_out.vcf.gz:md5,d2e26d65dbbcea9b087ed191b5c9841c" + ] + ], + "1": [ + [ + { + "id": "test", + "single_end": false + }, + "test_out.vcf.gz.tbi:md5,0801296d0356415bbf1ef8deb4ec84c3" + ] + ], + "2": [ + [ + { + "id": "test", + "single_end": false + }, + "test_out.g.vcf.gz:md5,4fcaa9a8b55730d191382160c2b5bb0a" + ] + ], + "3": [ + [ + { + "id": "test", + "single_end": false + }, + "test_out.g.vcf.gz.tbi:md5,f468e846904733b3231ecf00ef7cd4a2" + ] + ], + "4": [ + "versions.yml:md5,efbdcb1ad96e0209b31bcafedd0017a8" + ], + "gvcf": [ + [ + { + "id": "test", + "single_end": false + }, + "test_out.g.vcf.gz:md5,4fcaa9a8b55730d191382160c2b5bb0a" + ] + ], + "gvcf_tbi": [ + [ + { + "id": "test", + "single_end": false + }, + "test_out.g.vcf.gz.tbi:md5,f468e846904733b3231ecf00ef7cd4a2" + ] + ], + "vcf": [ + [ + { + "id": "test", + "single_end": false + }, + "test_out.vcf.gz:md5,d2e26d65dbbcea9b087ed191b5c9841c" + ] + ], + "vcf_tbi": [ + [ + { + "id": "test", + "single_end": false + }, + "test_out.vcf.gz.tbi:md5,0801296d0356415bbf1ef8deb4ec84c3" + ] + ], + "versions": [ + "versions.yml:md5,efbdcb1ad96e0209b31bcafedd0017a8" + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-23T14:29:24.939680679" } -} \ No newline at end of file +} diff --git a/modules/nf-core/deepvariant/tests/nextflow-non-autosomal-calling.config b/modules/nf-core/deepvariant/tests/nextflow-non-autosomal-calling.config new file mode 100644 index 00000000..4be8986b --- /dev/null +++ b/modules/nf-core/deepvariant/tests/nextflow-non-autosomal-calling.config @@ -0,0 +1,8 @@ +process { + + withName: DEEPVARIANT { + ext.args = '--model_type=WGS --haploid_contigs chr22' + ext.prefix = { "${meta.id}_out" } + } + +} diff --git a/nextflow_schema.json b/nextflow_schema.json index f2c79a4f..dc91e7a4 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -267,6 +267,14 @@ "description": "Path to mitochondrial FASTA genome file.", "fa_icon": "fas fa-file" }, + "par_bed": { + "type": "string", + "exists": true, + "format": "path", + "fa_icon": "fas fa-file", + "pattern": "^\\S+\\.bed(\\.gz)?$", + "description": "Path to a BED file containing PAR regions (used by deepvariant)." + }, "ploidy_model": { "type": "string", "exists": true, diff --git a/subworkflows/local/call_snv.nf b/subworkflows/local/call_snv.nf index 48bc500a..5d0b2ce9 100644 --- a/subworkflows/local/call_snv.nf +++ b/subworkflows/local/call_snv.nf @@ -28,6 +28,7 @@ workflow CALL_SNV { ch_dbsnp_tbi // channel: [optional] [ val(meta), path(tbi) ] ch_call_interval // channel: [mandatory] [ path(intervals) ] ch_ml_model // channel: [mandatory] [ path(model) ] + ch_par_bed // channel: [optional] [ val(meta), path(bed) ] ch_case_info // channel: [mandatory] [ val(case_info) ] ch_foundin_header // channel: [mandatory] [ path(header) ] ch_pcr_indel_model // channel: [optional] [ val(sentieon_dnascope_pcr_indel_model) ] @@ -50,6 +51,7 @@ workflow CALL_SNV { ch_genome_bam_bai, ch_genome_fasta, ch_genome_fai, + ch_par_bed, ch_case_info, ch_foundin_header, ch_genome_chrsizes diff --git a/subworkflows/local/variant_calling/call_snv_deepvariant.nf b/subworkflows/local/variant_calling/call_snv_deepvariant.nf index c3477d67..3104ee67 100644 --- a/subworkflows/local/variant_calling/call_snv_deepvariant.nf +++ b/subworkflows/local/variant_calling/call_snv_deepvariant.nf @@ -16,6 +16,7 @@ workflow CALL_SNV_DEEPVARIANT { ch_bam_bai // channel: [mandatory] [ val(meta), path(bam), path(bai) ] ch_genome_fasta // channel: [mandatory] [ val(meta), path(fasta) ] ch_genome_fai // channel: [mandatory] [ val(meta), path(fai) ] + ch_par_bed // channel: [optional] [ val(meta), path(bed) ] ch_case_info // channel: [mandatory] [ val(case_info) ] ch_foundin_header // channel: [mandatory] [ path(header) ] ch_genome_chrsizes // channel: [mandatory] [ path(chrsizes) ] @@ -28,7 +29,7 @@ workflow CALL_SNV_DEEPVARIANT { } .set { ch_deepvar_in } - DEEPVARIANT ( ch_deepvar_in, ch_genome_fasta, ch_genome_fai, [[],[]] ) + DEEPVARIANT ( ch_deepvar_in, ch_genome_fasta, ch_genome_fai, [[],[]], ch_par_bed ) DEEPVARIANT.out.gvcf .collect{it[1]} .toList() diff --git a/workflows/raredisease.nf b/workflows/raredisease.nf index 864eaf0c..0234272a 100644 --- a/workflows/raredisease.nf +++ b/workflows/raredisease.nf @@ -279,6 +279,8 @@ workflow RAREDISEASE { ch_mtshift_fai = ch_references.mtshift_fai ch_mtshift_fasta = ch_references.mtshift_fasta ch_mtshift_intervals = ch_references.mtshift_intervals + ch_par_bed = params.par_bed ? Channel.fromPath(params.par_bed).map{ it -> [[id:'par_bed'], it] }.collect() + : Channel.value([[],[]]) ch_ploidy_model = params.ploidy_model ? Channel.fromPath(params.ploidy_model).map{ it -> [[id:it[0].simpleName], it] }.collect() : Channel.empty() ch_readcount_intervals = params.readcount_intervals ? Channel.fromPath(params.readcount_intervals).collect() @@ -502,6 +504,7 @@ workflow RAREDISEASE { ch_dbsnp_tbi, ch_call_interval, ch_ml_model, + ch_par_bed, ch_case_info, ch_foundin_header, Channel.value(params.sentieon_dnascope_pcr_indel_model)