diff --git a/CHANGELOG.md b/CHANGELOG.md index c7e97c9e..140d2a28 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### `Added` +- A new parameter `vcfanno_extra_resources` to pass an extra resource to vcfanno [#588](https://github.com/nf-core/raredisease/pull/588) - A new parameter `scatter_count` to control how many interval files are created from a genome (used to parallelize annotations) [#585](https://github.com/nf-core/raredisease/pull/585) - Print warning messages if user intends to perform ranking when there are no affected samples [#579](https://github.com/nf-core/raredisease/pull/579) - Two new parameters `skip_repeat_annotation` and `skip_repeat_calling` to skip calling and annotation of repeat expansions [#574](https://github.com/nf-core/raredisease/pull/574) diff --git a/main.nf b/main.nf index 838d75d7..96fced9c 100644 --- a/main.nf +++ b/main.nf @@ -58,6 +58,7 @@ params.variant_consequences_sv = getGenomeAttribute('variant_consequence params.vep_filters = getGenomeAttribute('vep_filters') params.vep_filters_scout_fmt = getGenomeAttribute('vep_filters_scout_fmt') params.vcf2cytosure_blacklist = getGenomeAttribute('vcf2cytosure_blacklist') +params.vcfanno_extra_resources = getGenomeAttribute('vcfanno_extra_resources') params.vcfanno_resources = getGenomeAttribute('vcfanno_resources') params.vcfanno_toml = getGenomeAttribute('vcfanno_toml') params.vcfanno_lua = getGenomeAttribute('vcfanno_lua') diff --git a/nextflow_schema.json b/nextflow_schema.json index 81ef4683..f2c79a4f 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -402,6 +402,14 @@ "fa_icon": "fas fa-file", "description": "Path to vcf2cytosure blacklist file" }, + "vcfanno_extra_resources": { + "type": "string", + "exists": true, + "format": "file-path", + "description": "Path to a VCF file containing annotations.", + "help_text": "Can be used to supply case-specific annotations in addition to those provided using --vcfanno_resources", + "fa_icon": "fas fa-file" + }, "vcfanno_resources": { "type": "string", "exists": true, diff --git a/subworkflows/local/annotate_genome_snvs.nf b/subworkflows/local/annotate_genome_snvs.nf index 8ae123f7..51d5a64c 100644 --- a/subworkflows/local/annotate_genome_snvs.nf +++ b/subworkflows/local/annotate_genome_snvs.nf @@ -28,7 +28,8 @@ workflow ANNOTATE_GENOME_SNVS { analysis_type // string: [mandatory] 'wgs' or 'wes' ch_cadd_header // channel: [mandatory] [ path(txt) ] ch_cadd_resources // channel: [mandatory] [ path(annotation) ] - ch_vcfanno_resources // channel: [mandatory] [ path(resources) ] + ch_vcfanno_extra // channel: [mandatory] [ [path(vcf),path(index)] ] + ch_vcfanno_resources // channel: [mandatory] [ [path(vcf),path(index),...] ] ch_vcfanno_lua // channel: [mandatory] [ path(lua) ] ch_vcfanno_toml // channel: [mandatory] [ path(toml) ] val_vep_genome // string: [mandatory] GRCh37 or GRCh38 @@ -69,7 +70,7 @@ workflow ANNOTATE_GENOME_SNVS { GATK4_SELECTVARIANTS.out.vcf .join(GATK4_SELECTVARIANTS.out.tbi) - .map { meta, vcf, tbi -> return [meta, vcf, tbi, []] } + .combine(ch_vcfanno_extra) .set { ch_vcfanno_in } VCFANNO (ch_vcfanno_in, ch_vcfanno_toml, ch_vcfanno_lua, ch_vcfanno_resources) diff --git a/subworkflows/local/annotate_mt_snvs.nf b/subworkflows/local/annotate_mt_snvs.nf index aab01ef6..bb5dca6b 100644 --- a/subworkflows/local/annotate_mt_snvs.nf +++ b/subworkflows/local/annotate_mt_snvs.nf @@ -19,8 +19,9 @@ workflow ANNOTATE_MT_SNVS { ch_cadd_header // channel: [mandatory] [ path(txt) ] ch_cadd_resources // channel: [mandatory] [ path(annotation) ] ch_genome_fasta // channel: [mandatory] [ val(meta), path(fasta) ] - ch_vcfanno_lua // channel: [mandatory] [ path(lua) ] - ch_vcfanno_resources // channel: [mandatory] [ path(resources) ] + ch_vcfanno_extra // channel: [mandatory] [ [path(vcf),path(index).....] ] + ch_vcfanno_lua // channel: [mandatory] [ path(lua) ] + ch_vcfanno_resources // channel: [mandatory] [ [path(vcf),path(index),.....] ] ch_vcfanno_toml // channel: [mandatory] [ path(toml) ] val_vep_genome // string: [mandatory] GRCh37 or GRCh38 val_vep_cache_version // string: [mandatory] 107 @@ -45,7 +46,8 @@ workflow ANNOTATE_MT_SNVS { // Vcfanno ZIP_TABIX_HMTNOTE_MT.out.gz_tbi - .map { meta, vcf, tbi -> return [meta + [prefix: meta.prefix + "_vcfanno"], vcf, tbi, []]} + .combine(ch_vcfanno_extra) + .map { meta, vcf, tbi, resources -> return [meta + [prefix: meta.prefix + "_vcfanno"], vcf, tbi, resources]} .set { ch_in_vcfanno } VCFANNO_MT(ch_in_vcfanno, ch_vcfanno_toml, ch_vcfanno_lua, ch_vcfanno_resources) diff --git a/subworkflows/local/prepare_references.nf b/subworkflows/local/prepare_references.nf index 7c7726b4..dd502db2 100644 --- a/subworkflows/local/prepare_references.nf +++ b/subworkflows/local/prepare_references.nf @@ -22,6 +22,7 @@ include { SAMTOOLS_FAIDX as SAMTOOLS_FAIDX_MT_SHIFT } from '../../modul include { SENTIEON_BWAINDEX as SENTIEON_BWAINDEX_GENOME } from '../../modules/nf-core/sentieon/bwaindex/main' include { SENTIEON_BWAINDEX as SENTIEON_BWAINDEX_MT_SHIFT } from '../../modules/nf-core/sentieon/bwaindex/main' include { TABIX_BGZIPTABIX as TABIX_PBT } from '../../modules/nf-core/tabix/bgziptabix/main' +include { TABIX_BGZIPTABIX as TABIX_VCFANNOEXTRA } from '../../modules/nf-core/tabix/bgziptabix/main' include { TABIX_TABIX as TABIX_DBSNP } from '../../modules/nf-core/tabix/tabix/main' include { TABIX_TABIX as TABIX_GNOMAD_AF } from '../../modules/nf-core/tabix/tabix/main' include { TABIX_TABIX as TABIX_PT } from '../../modules/nf-core/tabix/tabix/main' @@ -36,14 +37,16 @@ workflow PREPARE_REFERENCES { ch_gnomad_af_tab // channel: [optional; used in for snv annotation] [ val(meta), path(tab) ] ch_known_dbsnp // channel: [optional; used only by sentieon] [ val(meta), path(vcf) ] ch_target_bed // channel: [mandatory for WES] [ path(bed) ] + ch_vcfanno_extra_unprocessed // channel: [mandatory] [ val(meta), path(vcf) ] ch_vep_cache // channel: [mandatory for annotation] [ path(cache) ] main: - ch_versions = Channel.empty() - ch_tbi = Channel.empty() - ch_bgzip_tbi = Channel.empty() - ch_bwa = Channel.empty() - ch_sentieonbwa = Channel.empty() + ch_versions = Channel.empty() + ch_tbi = Channel.empty() + ch_bgzip_tbi = Channel.empty() + ch_bwa = Channel.empty() + ch_sentieonbwa = Channel.empty() + ch_vcfanno_extra = Channel.empty() // Genome indices SAMTOOLS_FAIDX_GENOME(ch_genome_fasta, [[],[]]) @@ -87,6 +90,11 @@ workflow PREPARE_REFERENCES { TABIX_GNOMAD_AF(ch_gnomad_af_tab) TABIX_PT(ch_target_bed).tbi.set { ch_tbi } TABIX_PBT(ch_target_bed).gz_tbi.set { ch_bgzip_tbi } + TABIX_VCFANNOEXTRA(ch_vcfanno_extra_unprocessed) + .gz_tbi + .map { meta, vcf, tbi -> return [[vcf,tbi]] } + .collect() + .set {ch_vcfanno_extra} // Generate bait and target intervals GATK_BILT(ch_target_bed, ch_dict).interval_list @@ -125,6 +133,7 @@ workflow PREPARE_REFERENCES { ch_versions = ch_versions.mix(TABIX_GNOMAD_AF.out.versions) ch_versions = ch_versions.mix(TABIX_PT.out.versions) ch_versions = ch_versions.mix(TABIX_PBT.out.versions) + ch_versions = ch_versions.mix(TABIX_VCFANNOEXTRA.out.versions) ch_versions = ch_versions.mix(TABIX_DBSNP.out.versions) ch_versions = ch_versions.mix(GATK_BILT.out.versions) ch_versions = ch_versions.mix(GATK_ILT.out.versions) @@ -153,6 +162,7 @@ workflow PREPARE_REFERENCES { gnomad_af_idx = TABIX_GNOMAD_AF.out.tbi.collect() // channel: [ val(meta), path(fasta) ] known_dbsnp_tbi = TABIX_DBSNP.out.tbi.collect() // channel: [ val(meta), path(fasta) ] target_bed = Channel.empty().mix(ch_tbi, ch_bgzip_tbi).collect() // channel: [ val(meta), path(bed), path(tbi) ] + vcfanno_extra = ch_vcfanno_extra.ifEmpty([[]]) // channel: [ [path(vcf), path(tbi)] ] bait_intervals = CAT_CAT_BAIT.out.file_out.map{ meta, inter -> inter}.collect() // channel: [ path(intervals) ] target_intervals = GATK_BILT.out.interval_list.map{ meta, inter -> inter}.collect() // channel: [ path(interval_list) ] vep_resources = UNTAR_VEP_CACHE.out.untar.map{meta, files -> [files]}.collect() // channel: [ path(cache) ] diff --git a/workflows/raredisease.nf b/workflows/raredisease.nf index 082dcc26..864eaf0c 100644 --- a/workflows/raredisease.nf +++ b/workflows/raredisease.nf @@ -188,21 +188,23 @@ workflow RAREDISEASE { // // Initialize file channels for PREPARE_REFERENCES subworkflow // - ch_genome_fasta = Channel.fromPath(params.fasta).map { it -> [[id:it[0].simpleName], it] }.collect() - ch_genome_fai = params.fai ? Channel.fromPath(params.fai).map {it -> [[id:it[0].simpleName], it]}.collect() - : Channel.empty() - ch_genome_dictionary = params.sequence_dictionary ? Channel.fromPath(params.sequence_dictionary).map {it -> [[id:it[0].simpleName], it]}.collect() - : Channel.empty() - ch_gnomad_af_tab = params.gnomad_af ? Channel.fromPath(params.gnomad_af).map{ it -> [[id:it[0].simpleName], it] }.collect() - : Channel.value([[],[]]) - ch_dbsnp = params.known_dbsnp ? Channel.fromPath(params.known_dbsnp).map{ it -> [[id:it[0].simpleName], it] }.collect() - : Channel.value([[],[]]) - ch_mt_fasta = params.mt_fasta ? Channel.fromPath(params.mt_fasta).map { it -> [[id:it[0].simpleName], it] }.collect() - : Channel.empty() - ch_target_bed_unprocessed = params.target_bed ? Channel.fromPath(params.target_bed).map{ it -> [[id:it[0].simpleName], it] }.collect() - : Channel.value([[],[]]) - ch_vep_cache_unprocessed = params.vep_cache ? Channel.fromPath(params.vep_cache).map { it -> [[id:'vep_cache'], it] }.collect() - : Channel.value([[],[]]) + ch_genome_fasta = Channel.fromPath(params.fasta).map { it -> [[id:it[0].simpleName], it] }.collect() + ch_genome_fai = params.fai ? Channel.fromPath(params.fai).map {it -> [[id:it[0].simpleName], it]}.collect() + : Channel.empty() + ch_genome_dictionary = params.sequence_dictionary ? Channel.fromPath(params.sequence_dictionary).map {it -> [[id:it[0].simpleName], it]}.collect() + : Channel.empty() + ch_gnomad_af_tab = params.gnomad_af ? Channel.fromPath(params.gnomad_af).map{ it -> [[id:it[0].simpleName], it] }.collect() + : Channel.value([[],[]]) + ch_dbsnp = params.known_dbsnp ? Channel.fromPath(params.known_dbsnp).map{ it -> [[id:it[0].simpleName], it] }.collect() + : Channel.value([[],[]]) + ch_mt_fasta = params.mt_fasta ? Channel.fromPath(params.mt_fasta).map { it -> [[id:it[0].simpleName], it] }.collect() + : Channel.empty() + ch_target_bed_unprocessed = params.target_bed ? Channel.fromPath(params.target_bed).map{ it -> [[id:it[0].simpleName], it] }.collect() + : Channel.value([[],[]]) + ch_vcfanno_extra_unprocessed = params.vcfanno_extra_resources ? Channel.fromPath(params.vcfanno_extra_resources).map { it -> [[id:it.baseName], it] }.collect() + : Channel.empty() + ch_vep_cache_unprocessed = params.vep_cache ? Channel.fromPath(params.vep_cache).map { it -> [[id:'vep_cache'], it] }.collect() + : Channel.value([[],[]]) // // Prepare references and indices. @@ -215,6 +217,7 @@ workflow RAREDISEASE { ch_gnomad_af_tab, ch_dbsnp, ch_target_bed_unprocessed, + ch_vcfanno_extra_unprocessed, ch_vep_cache_unprocessed ) .set { ch_references } @@ -306,6 +309,7 @@ workflow RAREDISEASE { : Channel.value([]) ch_variant_consequences_sv = params.variant_consequences_sv ? Channel.fromPath(params.variant_consequences_sv).collect() : Channel.value([]) + ch_vcfanno_extra = ch_references.vcfanno_extra ch_vcfanno_resources = params.vcfanno_resources ? Channel.fromPath(params.vcfanno_resources).splitText().map{it -> it.trim()}.collect() : Channel.value([]) ch_vcf2cytosure_blacklist = params.vcf2cytosure_blacklist ? Channel.fromPath(params.vcf2cytosure_blacklist).collect() @@ -514,6 +518,7 @@ workflow RAREDISEASE { params.analysis_type, ch_cadd_header, ch_cadd_resources, + ch_vcfanno_extra, ch_vcfanno_resources, ch_vcfanno_lua, ch_vcfanno_toml, @@ -570,6 +575,7 @@ workflow RAREDISEASE { ch_cadd_header, ch_cadd_resources, ch_genome_fasta, + ch_vcfanno_extra, ch_vcfanno_lua, ch_vcfanno_resources, ch_vcfanno_toml,