From 9359c3854c6a8645f5b650e0135cc922390da822 Mon Sep 17 00:00:00 2001 From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com> Date: Wed, 20 Nov 2024 16:25:25 +0100 Subject: [PATCH 1/5] update configs --- conf/modules/qc_bam.config | 1 - conf/modules/raredisease.config | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/conf/modules/qc_bam.config b/conf/modules/qc_bam.config index a2a17364..5aa0d7f3 100644 --- a/conf/modules/qc_bam.config +++ b/conf/modules/qc_bam.config @@ -55,7 +55,6 @@ process { } withName: '.*QC_BAM:MOSDEPTH' { - ext.args = '--d4' ext.prefix = { "${meta.id}_mosdepth" } } diff --git a/conf/modules/raredisease.config b/conf/modules/raredisease.config index 5298800d..f4163456 100644 --- a/conf/modules/raredisease.config +++ b/conf/modules/raredisease.config @@ -36,7 +36,7 @@ process { ext.args = { (params.genome == 'GRCh37') ? '--genome 37' : '--genome 38' } - ext.prefix = { "${meta.id}" } + ext.prefix = { "${meta.id}_smncopynumbercaller" } publishDir = [ path: { "${params.outdir}/smncopynumbercaller" }, mode: params.publish_dir_mode, From 9db24ed31e99dc8d2104a45198f5dec42c0f3e0b Mon Sep 17 00:00:00 2001 From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com> Date: Thu, 21 Nov 2024 15:52:49 +0100 Subject: [PATCH 2/5] vep --- modules.json | 2 +- .../nf-core/ensemblvep/vep/environment.yml | 5 - modules/nf-core/ensemblvep/vep/main.nf | 19 ++- modules/nf-core/ensemblvep/vep/meta.yml | 154 ++++++++---------- .../nf-core/ensemblvep/vep/tests/main.nf.test | 114 ------------- .../ensemblvep/vep/tests/main.nf.test.snap | 26 --- .../ensemblvep/vep/tests/nextflow.config | 12 -- .../ensemblvep/vep/tests/tab.gz.config | 5 - modules/nf-core/ensemblvep/vep/tests/tags.yml | 2 - .../nf-core/ensemblvep/vep/tests/vcf.config | 5 - 10 files changed, 75 insertions(+), 269 deletions(-) delete mode 100644 modules/nf-core/ensemblvep/vep/environment.yml delete mode 100644 modules/nf-core/ensemblvep/vep/tests/main.nf.test delete mode 100644 modules/nf-core/ensemblvep/vep/tests/main.nf.test.snap delete mode 100644 modules/nf-core/ensemblvep/vep/tests/nextflow.config delete mode 100644 modules/nf-core/ensemblvep/vep/tests/tab.gz.config delete mode 100644 modules/nf-core/ensemblvep/vep/tests/tags.yml delete mode 100644 modules/nf-core/ensemblvep/vep/tests/vcf.config diff --git a/modules.json b/modules.json index b99a590e..1a2e1206 100644 --- a/modules.json +++ b/modules.json @@ -128,7 +128,7 @@ }, "ensemblvep/vep": { "branch": "master", - "git_sha": "6e3585d9ad20b41adc7d271009f8cb5e191ecab4", + "git_sha": "9f9e1fc31cb35876922070c0e601ae05abae5cae", "installed_by": ["modules"] }, "expansionhunter": { diff --git a/modules/nf-core/ensemblvep/vep/environment.yml b/modules/nf-core/ensemblvep/vep/environment.yml deleted file mode 100644 index 3d36eb17..00000000 --- a/modules/nf-core/ensemblvep/vep/environment.yml +++ /dev/null @@ -1,5 +0,0 @@ -channels: - - conda-forge - - bioconda -dependencies: - - bioconda::ensembl-vep=113.0 diff --git a/modules/nf-core/ensemblvep/vep/main.nf b/modules/nf-core/ensemblvep/vep/main.nf index 7d2c82ff..da0e3646 100644 --- a/modules/nf-core/ensemblvep/vep/main.nf +++ b/modules/nf-core/ensemblvep/vep/main.nf @@ -2,10 +2,10 @@ process ENSEMBLVEP_VEP { tag "$meta.id" label 'process_medium' - conda "${moduleDir}/environment.yml" + conda "bioconda::ensembl-vep=110.0" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/ensembl-vep:113.0--pl5321h2a3209d_0' : - 'biocontainers/ensembl-vep:113.0--pl5321h2a3209d_0' }" + 'https://depot.galaxyproject.org/singularity/ensembl-vep:110.0--pl5321h2a3209d_0' : + 'biocontainers/ensembl-vep:110.0--pl5321h2a3209d_0' }" input: tuple val(meta), path(vcf), path(custom_extra_files) @@ -20,7 +20,7 @@ process ENSEMBLVEP_VEP { tuple val(meta), path("*.vcf.gz") , optional:true, emit: vcf tuple val(meta), path("*.tab.gz") , optional:true, emit: tab tuple val(meta), path("*.json.gz") , optional:true, emit: json - path "*.html" , optional:true, emit: report + path "*.summary.html" , emit: report path "versions.yml" , emit: versions when: @@ -45,7 +45,8 @@ process ENSEMBLVEP_VEP { --cache \\ --cache_version $cache_version \\ --dir_cache $dir_cache \\ - --fork $task.cpus + --fork $task.cpus \\ + --stats_file ${prefix}.summary.html \\ cat <<-END_VERSIONS > versions.yml @@ -57,10 +58,10 @@ process ENSEMBLVEP_VEP { stub: def prefix = task.ext.prefix ?: "${meta.id}" """ - echo "" | gzip > ${prefix}.vcf.gz - echo "" | gzip > ${prefix}.tab.gz - echo "" | gzip > ${prefix}.json.gz - touch ${prefix}_summary.html + touch ${prefix}.vcf.gz + touch ${prefix}.tab.gz + touch ${prefix}.json.gz + touch ${prefix}.summary.html cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/nf-core/ensemblvep/vep/meta.yml b/modules/nf-core/ensemblvep/vep/meta.yml index 9288a938..7783847d 100644 --- a/modules/nf-core/ensemblvep/vep/meta.yml +++ b/modules/nf-core/ensemblvep/vep/meta.yml @@ -1,6 +1,5 @@ -name: ensemblvep_vep -description: Ensembl Variant Effect Predictor (VEP). The output-file-format is controlled - through `task.ext.args`. +name: ENSEMBLVEP_VEP +description: Ensembl Variant Effect Predictor (VEP). The output-file-format is controlled through `task.ext.args`. keywords: - annotation - vcf @@ -14,101 +13,76 @@ tools: homepage: https://www.ensembl.org/info/docs/tools/vep/index.html documentation: https://www.ensembl.org/info/docs/tools/vep/script/index.html licence: ["Apache-2.0"] - identifier: "" input: - - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - vcf: - type: file - description: | - vcf to annotate - - custom_extra_files: - type: file - description: | - extra sample-specific files to be used with the `--custom` flag to be configured with ext.args - (optional) - - - genome: - type: string - description: | - which genome to annotate with - - - species: - type: string - description: | - which species to annotate with - - - cache_version: - type: integer - description: | - which version of the cache to annotate with - - - cache: - type: file - description: | - path to VEP cache (optional) - - - meta2: - type: map - description: | - Groovy Map containing fasta reference information - e.g. [ id:'test' ] - - fasta: - type: file - description: | - reference FASTA file (optional) - pattern: "*.{fasta,fa}" - - - extra_files: - type: file - description: | - path to file(s) needed for plugins (optional) + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - vcf: + type: file + description: | + vcf to annotate + - custom_extra_files: + type: file + description: | + extra sample-specific files to be used with the `--custom` flag to be configured with ext.args + (optional) + - genome: + type: string + description: | + which genome to annotate with + - species: + type: string + description: | + which species to annotate with + - cache_version: + type: integer + description: | + which version of the cache to annotate with + - cache: + type: file + description: | + path to VEP cache (optional) + - meta2: + type: map + description: | + Groovy Map containing fasta reference information + e.g. [ id:'test' ] + - fasta: + type: file + description: | + reference FASTA file (optional) + pattern: "*.{fasta,fa}" + - extra_files: + type: file + description: | + path to file(s) needed for plugins (optional) output: - vcf: - - meta: - type: file - description: | - annotated vcf (optional) - pattern: "*.ann.vcf.gz" - - "*.vcf.gz": - type: file - description: | - annotated vcf (optional) - pattern: "*.ann.vcf.gz" + type: file + description: | + annotated vcf (optional) + pattern: "*.ann.vcf.gz" - tab: - - meta: - type: file - description: | - tab file with annotated variants (optional) - pattern: "*.ann.tab.gz" - - "*.tab.gz": - type: file - description: | - tab file with annotated variants (optional) - pattern: "*.ann.tab.gz" + type: file + description: | + tab file with annotated variants (optional) + pattern: "*.ann.tab.gz" - json: - - meta: - type: file - description: | - json file with annotated variants (optional) - pattern: "*.ann.json.gz" - - "*.json.gz": - type: file - description: | - json file with annotated variants (optional) - pattern: "*.ann.json.gz" + type: file + description: | + json file with annotated variants (optional) + pattern: "*.ann.json.gz" - report: - - "*.html": - type: file - description: VEP report file - pattern: "*.html" + type: file + description: VEP report file + pattern: "*.html" - versions: - - versions.yml: - type: file - description: File containing software versions - pattern: "versions.yml" + type: file + description: File containing software versions + pattern: "versions.yml" authors: - "@maxulysse" - "@matthdsm" - "@nvnieuwk" -maintainers: - - "@maxulysse" - - "@matthdsm" - - "@nvnieuwk" diff --git a/modules/nf-core/ensemblvep/vep/tests/main.nf.test b/modules/nf-core/ensemblvep/vep/tests/main.nf.test deleted file mode 100644 index 3e8c0b53..00000000 --- a/modules/nf-core/ensemblvep/vep/tests/main.nf.test +++ /dev/null @@ -1,114 +0,0 @@ -nextflow_process { - - name "Test Process ENSEMBLVEP_VEP" - script "../main.nf" - process "ENSEMBLVEP_VEP" - config "./nextflow.config" - - tag "modules" - tag "modules_nfcore" - tag "ensemblvep" - tag "ensemblvep/vep" - tag "ensemblvep/download" - - test("test_ensemblvep_vep_fasta_vcf") { - config "./vcf.config" - - setup { - run("ENSEMBLVEP_DOWNLOAD") { - script "../../download/main.nf" - - process { - """ - input[0] = Channel.of([ - [id:"113_WBcel235"], - params.vep_genome, - params.vep_species, - params.vep_cache_version - ]) - """ - } - } - } - - when { - process { - """ - input[0] = Channel.of([ - [ id:'test' ], // meta map - file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf', checkIfExists: true), - [] - ]) - input[1] = params.vep_genome - input[2] = params.vep_species - input[3] = params.vep_cache_version - input[4] = ENSEMBLVEP_DOWNLOAD.out.cache.map{ meta, cache -> [cache] } - input[5] = Channel.value([ - [id:"fasta"], - file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) - ]) - input[6] = [] - """ - } - } - - then { - assertAll( - { assert process.success }, - { assert snapshot(process.out.versions).match() }, - { assert path(process.out.vcf.get(0).get(1)).linesGzip.contains("##fileformat=VCFv4.2") } - ) - } - - } - - test("test_ensemblvep_vep_fasta_tab_gz") { - config "./tab.gz.config" - - setup { - run("ENSEMBLVEP_DOWNLOAD") { - script "../../download/main.nf" - - process { - """ - input[0] = Channel.of([ - [id:"113_WBcel235"], - params.vep_genome, - params.vep_species, - params.vep_cache_version - ]) - """ - } - } - } - - when { - process { - """ - input[0] = Channel.of([ - [ id:'test' ], // meta map - file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf', checkIfExists: true), - [] - ]) - input[1] = params.vep_genome - input[2] = params.vep_species - input[3] = params.vep_cache_version - input[4] = ENSEMBLVEP_DOWNLOAD.out.cache.map{ meta, cache -> [cache] } - input[5] = Channel.value([ - [id:"fasta"], - file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) - ]) - input[6] = [] - """ - } - } - - then { - assertAll( - { assert process.success }, - { assert snapshot(process.out.versions).match() }, - { assert path(process.out.tab.get(0).get(1)).linesGzip.contains("## ENSEMBL VARIANT EFFECT PREDICTOR v113.0") } - ) - } - } -} diff --git a/modules/nf-core/ensemblvep/vep/tests/main.nf.test.snap b/modules/nf-core/ensemblvep/vep/tests/main.nf.test.snap deleted file mode 100644 index 1df94276..00000000 --- a/modules/nf-core/ensemblvep/vep/tests/main.nf.test.snap +++ /dev/null @@ -1,26 +0,0 @@ -{ - "test_ensemblvep_vep_fasta_tab_gz": { - "content": [ - [ - "versions.yml:md5,4fbfeb73f0d4b4aa039f17be8ba9e1f2" - ] - ], - "meta": { - "nf-test": "0.9.0", - "nextflow": "24.04.4" - }, - "timestamp": "2024-10-21T09:12:23.474703494" - }, - "test_ensemblvep_vep_fasta_vcf": { - "content": [ - [ - "versions.yml:md5,4fbfeb73f0d4b4aa039f17be8ba9e1f2" - ] - ], - "meta": { - "nf-test": "0.9.0", - "nextflow": "24.04.4" - }, - "timestamp": "2024-10-21T09:11:54.343590485" - } -} \ No newline at end of file diff --git a/modules/nf-core/ensemblvep/vep/tests/nextflow.config b/modules/nf-core/ensemblvep/vep/tests/nextflow.config deleted file mode 100644 index 0a4ae1a6..00000000 --- a/modules/nf-core/ensemblvep/vep/tests/nextflow.config +++ /dev/null @@ -1,12 +0,0 @@ -params { - vep_cache_version = "113" - vep_genome = "WBcel235" - vep_species = "caenorhabditis_elegans" -} - -process { - withName: ENSEMBLVEP_DOWNLOAD { - ext.args = '--AUTO c --CONVERT --NO_BIOPERL --NO_HTSLIB --NO_TEST --NO_UPDATE' - ext.prefix = { "${params.vep_cache_version}_${params.vep_genome}" } - } -} diff --git a/modules/nf-core/ensemblvep/vep/tests/tab.gz.config b/modules/nf-core/ensemblvep/vep/tests/tab.gz.config deleted file mode 100644 index 40eb03e5..00000000 --- a/modules/nf-core/ensemblvep/vep/tests/tab.gz.config +++ /dev/null @@ -1,5 +0,0 @@ -process { - withName: ENSEMBLVEP_VEP { - ext.args = '--tab --compress_output bgzip' - } -} diff --git a/modules/nf-core/ensemblvep/vep/tests/tags.yml b/modules/nf-core/ensemblvep/vep/tests/tags.yml deleted file mode 100644 index 4aa4aa45..00000000 --- a/modules/nf-core/ensemblvep/vep/tests/tags.yml +++ /dev/null @@ -1,2 +0,0 @@ -ensemblvep/vep: - - "modules/nf-core/ensemblvep/vep/**" diff --git a/modules/nf-core/ensemblvep/vep/tests/vcf.config b/modules/nf-core/ensemblvep/vep/tests/vcf.config deleted file mode 100644 index ad8955a3..00000000 --- a/modules/nf-core/ensemblvep/vep/tests/vcf.config +++ /dev/null @@ -1,5 +0,0 @@ -process { - withName: ENSEMBLVEP_VEP { - ext.args = '--vcf' - } -} From 057c439a409be2bcb4ee0945ffe4c35ede0592d0 Mon Sep 17 00:00:00 2001 From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com> Date: Thu, 21 Nov 2024 17:14:53 +0100 Subject: [PATCH 3/5] update docs --- CHANGELOG.md | 3 +-- docs/output.md | 3 +-- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index f6d0c355..d6e10d8f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -25,7 +25,6 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Restrict deepvariant analysis of WES samples to bait regions [#633](https://github.com/nf-core/raredisease/pull/633) - bcftools annotate declaration in annotate CADD subworkflow [#624](https://github.com/nf-core/raredisease/pull/624) - Rhocallviz subworkflow will only be invocated once per sample [#621](https://github.com/nf-core/raredisease/pull/621) -- Allow for VEP version 112 to be used and set it to default [#617](https://github.com/nf-core/raredisease/pull/617) - Updated createCaseChannel function to include a check for maternal and paternal ids being set to a numeric 0 [#643](https://github.com/nf-core/raredisease/pull/643) ### Parameters @@ -40,7 +39,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 | Tool | Old version | New version | | ---------- | ----------- | ----------- | | bcftools | 1.18 | 1.20 | -| ensemblvep | 112 | 113 | +| ensemblvep | 112 | 110 | | genmod | 3.8.2 | 3.9 | | mosdepth | 0.3.6 | 0.3.8 | | multiqc | 1.21 | 1.25.1 | diff --git a/docs/output.md b/docs/output.md index 4640f514..1112bcea 100644 --- a/docs/output.md +++ b/docs/output.md @@ -68,7 +68,7 @@ The pipeline is built using [Nextflow](https://www.nextflow.io/) and processes d - [Hmtnote](#hmtnote) - [VEP](#vep-2) - [Filtering and ranking](#filtering-and-ranking) - - [Filter_vep](#filter_vep) + - [Filter\_vep](#filter_vep) - [GENMOD](#genmod) - [Mobile element analysis](#mobile-element-analysis) - [Calling mobile elements](#calling-mobile-elements) @@ -171,7 +171,6 @@ The FastQC plots displayed in the MultiQC report shows _untrimmed_ reads. They m - `{outputdir}/qc_bam/` - `_mosdepth.global.dist.txt`: This file contains a cumulative distribution indicating the proportion of total bases that were covered for at least a given coverage value across each chromosome and the whole genome. - - `_mosdepth.per-base.d4`: This file contains a coverage for each base in the genome in d4 format. - `_mosdepth.summary.txt`: This file contains summary statistics, such as mean, minimum and maximum coverage per genomic contig. From 3898e146f49b2d06e45a2936c484c531c63b92b0 Mon Sep 17 00:00:00 2001 From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com> Date: Thu, 21 Nov 2024 17:17:31 +0100 Subject: [PATCH 4/5] fix lint --- docs/output.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/output.md b/docs/output.md index 1112bcea..b4755352 100644 --- a/docs/output.md +++ b/docs/output.md @@ -68,7 +68,7 @@ The pipeline is built using [Nextflow](https://www.nextflow.io/) and processes d - [Hmtnote](#hmtnote) - [VEP](#vep-2) - [Filtering and ranking](#filtering-and-ranking) - - [Filter\_vep](#filter_vep) + - [Filter_vep](#filter_vep) - [GENMOD](#genmod) - [Mobile element analysis](#mobile-element-analysis) - [Calling mobile elements](#calling-mobile-elements) From 74d88aac7909d0b11fc43a951b9865b29c2afade Mon Sep 17 00:00:00 2001 From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com> Date: Fri, 22 Nov 2024 09:26:29 +0100 Subject: [PATCH 5/5] Update CHANGELOG.md --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index d6e10d8f..d68b0072 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -13,6 +13,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### `Changed` +- d4 files are not generated by default anymore [#648](https://github.com/nf-core/raredisease/pull/648) - Suffix used to identify unique fastq pairs from "\_T" to "\_LNUMBER" [#638](https://github.com/nf-core/raredisease/pull/638) - Merge output from germlinecnvcaller [#635](https://github.com/nf-core/raredisease/pull/635) - Update tools [#623](https://github.com/nf-core/raredisease/pull/623)