Skip to content

Commit

Permalink
New ensemblvep/download module (#2847)
Browse files Browse the repository at this point in the history
* ensemblvep -> ensemblvep/vep + add ensemblvep/download

* update tags

* lint

* fix subworkflow tag

* update exclude matrix

* proper stub
  • Loading branch information
maxulysse authored Feb 7, 2023
1 parent ce166d3 commit bea3ca9
Show file tree
Hide file tree
Showing 15 changed files with 229 additions and 104 deletions.
4 changes: 3 additions & 1 deletion .github/workflows/pytest-workflow.yml
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ jobs:
- profile: "conda"
tags: deepvariant
- profile: "conda"
tags: ensemblvep
tags: ensemblvep/vep
- profile: "conda"
tags: fastk/fastk
- profile: "conda"
Expand All @@ -81,6 +81,8 @@ jobs:
tags: merquryfk/merquryfk
- profile: "conda"
tags: merquryfk/ploidyplot
- profile: "conda"
tags: subworkflows/vcf_annotate_ensemblvep
env:
NXF_ANSI_LOG: false
steps:
Expand Down
45 changes: 45 additions & 0 deletions modules/nf-core/ensemblvep/download/main.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
process ENSEMBLVEP_DOWNLOAD {
tag "$meta.id"
label 'process_medium'

conda "bioconda::ensembl-vep=108.2"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/ensembl-vep:108.2--pl5321h4a94de4_0' :
'quay.io/biocontainers/ensembl-vep:108.2--pl5321h4a94de4_0' }"

input:
tuple val(meta), val(assembly), val(species), val(cache_version)

output:
tuple val(meta), path("vep_cache"), emit: cache
path "versions.yml" , emit: versions

when:
task.ext.when == null || task.ext.when

script:
def args = task.ext.args ?: ''
"""
vep_install \\
--CACHEDIR vep_cache \\
--SPECIES $species \\
--ASSEMBLY $assembly \\
--CACHE_VERSION $cache_version \\
$args
cat <<-END_VERSIONS > versions.yml
"${task.process}":
ensemblvep: \$( echo \$(vep --help 2>&1) | sed 's/^.*Versions:.*ensembl-vep : //;s/ .*\$//')
END_VERSIONS
"""

stub:
"""
mkdir vep_cache
cat <<-END_VERSIONS > versions.yml
"${task.process}":
ensemblvep: \$( echo \$(vep --help 2>&1) | sed 's/^.*Versions:.*ensembl-vep : //;s/ .*\$//')
END_VERSIONS
"""
}
41 changes: 41 additions & 0 deletions modules/nf-core/ensemblvep/download/meta.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
name: ENSEMBLVEP_DOWNLOAD
description: Ensembl Variant Effect Predictor (VEP). The cache downloading options are controlled through `task.ext.args`.
keywords:
- annotation
tools:
- ensemblvep:
description: |
VEP determines the effect of your variants (SNPs, insertions, deletions, CNVs
or structural variants) on genes, transcripts, and protein sequence, as well as regulatory regions.
homepage: https://www.ensembl.org/info/docs/tools/vep/index.html
documentation: https://www.ensembl.org/info/docs/tools/vep/script/index.html
licence: ["Apache-2.0"]
input:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- assembly:
type: value
description: |
Genome assembly
- species:
type: value
description: |
Specie
- cache_version:
type: value
description: |
cache version
output:
- cache:
type: file
description: cache
pattern: "*"
- versions:
type: file
description: File containing software versions
pattern: "versions.yml"
authors:
- "@maxulysse"
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
process ENSEMBLVEP {
process ENSEMBLVEP_VEP {
tag "$meta.id"
label 'process_medium'

Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
name: ENSEMBLVEP
name: ENSEMBLVEP_VEP
description: Ensembl Variant Effect Predictor (VEP). The output-file-format is controlled through `task.ext.args`.
keywords:
- annotation
Expand Down
20 changes: 10 additions & 10 deletions subworkflows/nf-core/vcf_annotate_ensemblvep/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@
// Run VEP to annotate VCF files
//

include { ENSEMBLVEP } from '../../../modules/nf-core/ensemblvep/main'
include { TABIX_TABIX } from '../../../modules/nf-core/tabix/tabix/main'
include { ENSEMBLVEP_VEP } from '../../../modules/nf-core/ensemblvep/vep/main'
include { TABIX_TABIX } from '../../../modules/nf-core/tabix/tabix/main'

workflow VCF_ANNOTATE_ENSEMBLVEP {
take:
Expand All @@ -18,19 +18,19 @@ workflow VCF_ANNOTATE_ENSEMBLVEP {
main:
ch_versions = Channel.empty()

ENSEMBLVEP(vcf, vep_genome, vep_species, vep_cache_version, vep_cache, fasta, vep_extra_files)
TABIX_TABIX(ENSEMBLVEP.out.vcf)
ENSEMBLVEP_VEP(vcf, vep_genome, vep_species, vep_cache_version, vep_cache, fasta, vep_extra_files)
TABIX_TABIX(ENSEMBLVEP_VEP.out.vcf)

ch_vcf_tbi = ENSEMBLVEP.out.vcf.join(TABIX_TABIX.out.tbi)
ch_vcf_tbi = ENSEMBLVEP_VEP.out.vcf.join(TABIX_TABIX.out.tbi)

// Gather versions of all tools used
ch_versions = ch_versions.mix(ENSEMBLVEP.out.versions)
ch_versions = ch_versions.mix(ENSEMBLVEP_VEP.out.versions)
ch_versions = ch_versions.mix(TABIX_TABIX.out.versions)

emit:
vcf_tbi = ch_vcf_tbi // channel: [ val(meta), vcf.gz, vcf.gz.tbi ]
json = ENSEMBLVEP.out.json // channel: [ val(meta), json ]
tab = ENSEMBLVEP.out.tab // channel: [ val(meta), tab ]
reports = ENSEMBLVEP.out.report // path: *.html
versions = ch_versions // path: versions.yml
json = ENSEMBLVEP_VEP.out.json // channel: [ val(meta), json ]
tab = ENSEMBLVEP_VEP.out.tab // channel: [ val(meta), tab ]
reports = ENSEMBLVEP_VEP.out.report // channel: [ *.html ]
versions = ch_versions // channel: [ versions.yml ]
}
16 changes: 10 additions & 6 deletions tests/config/pytest_modules.yml
Original file line number Diff line number Diff line change
Expand Up @@ -931,9 +931,13 @@ endorspy:
- modules/nf-core/endorspy/**
- tests/modules/nf-core/endorspy/**

ensemblvep:
- modules/nf-core/ensemblvep/**
- tests/modules/nf-core/ensemblvep/**
ensemblvep/download:
- modules/nf-core/ensemblvep/download/**
- tests/modules/nf-core/ensemblvep/download/**

ensemblvep/vep:
- modules/nf-core/ensemblvep/vep/**
- tests/modules/nf-core/ensemblvep/vep/**

entrezdirect/esearch:
- modules/nf-core/entrezdirect/esearch/**
Expand Down Expand Up @@ -3059,9 +3063,9 @@ subworkflows/homer/groseq:
- subworkflows/nf-core/homer/groseq/**
- tests/subworkflows/nf-core/homer/groseq/**

subworkflows/vcf_annotate_ensembl_vep:
- subworkflows/nf-core/vcf_annotate_ensembl_vep/**
- tests/subworkflows/nf-core/vcf_annotate_ensembl_vep/**
subworkflows/vcf_annotate_ensemblvep:
- subworkflows/nf-core/vcf_annotate_ensemblvep/**
- tests/subworkflows/nf-core/vcf_annotate_ensemblvep/**

subworkflows/vcf_annotate_snpeff:
- subworkflows/nf-core/vcf_annotate_snpeff/**
Expand Down
11 changes: 11 additions & 0 deletions tests/modules/nf-core/ensemblvep/download/main.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
#!/usr/bin/env nextflow

nextflow.enable.dsl = 2

include { ENSEMBLVEP_DOWNLOAD } from '../../../../../modules/nf-core/ensemblvep/download/main.nf'

workflow test_ensemblvep_download {
input = [[id:"test"], "WBcel235", "caenorhabditis_elegans", "108"]

ENSEMBLVEP_DOWNLOAD(input)
}
9 changes: 9 additions & 0 deletions tests/modules/nf-core/ensemblvep/download/nextflow.config
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
process {

publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" }

withName: ENSEMBLVEP_DOWNLOAD {
ext.args = '--AUTO c --CONVERT --NO_BIOPERL --NO_HTSLIB --NO_TEST --NO_UPDATE'
}

}
7 changes: 7 additions & 0 deletions tests/modules/nf-core/ensemblvep/download/test.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
- name: ensemblvep test_ensemblvep_download
command: nextflow run ./tests/modules/nf-core/ensemblvep/download -entry test_ensemblvep_download -c ./tests/config/nextflow.config -c ./tests/modules/nf-core/ensemblvep/download/nextflow.config
tags:
- ensemblvep
- ensemblvep/download
files:
- path: output/ensemblvep/vep_cache/caenorhabditis_elegans/108_WBcel235/
56 changes: 0 additions & 56 deletions tests/modules/nf-core/ensemblvep/test.yml

This file was deleted.

Original file line number Diff line number Diff line change
Expand Up @@ -2,85 +2,84 @@

nextflow.enable.dsl = 2

include { ENSEMBLVEP } from '../../../../modules/nf-core/ensemblvep/main.nf'

include { ENSEMBLVEP as ENSEMBLVEP_JSON } from '../../../../modules/nf-core/ensemblvep/main.nf'
include { ENSEMBLVEP as ENSEMBLVEP_TAB } from '../../../../modules/nf-core/ensemblvep/main.nf'
include { ENSEMBLVEP as ENSEMBLVEP_VCF } from '../../../../modules/nf-core/ensemblvep/main.nf'
include { ENSEMBLVEP as ENSEMBLVEP_VCF_BGZIP } from '../../../../modules/nf-core/ensemblvep/main.nf'
include { ENSEMBLVEP as ENSEMBLVEP_VCF_GZIP } from '../../../../modules/nf-core/ensemblvep/main.nf'

workflow test_ensemblvep_fasta_json {
include { ENSEMBLVEP_VEP as ENSEMBLVEP_VEP_DEFAULT } from '../../../../../modules/nf-core/ensemblvep/vep/main.nf'
include { ENSEMBLVEP_VEP as ENSEMBLVEP_VEP_JSON } from '../../../../../modules/nf-core/ensemblvep/vep/main.nf'
include { ENSEMBLVEP_VEP as ENSEMBLVEP_VEP_TAB } from '../../../../../modules/nf-core/ensemblvep/vep/main.nf'
include { ENSEMBLVEP_VEP as ENSEMBLVEP_VEP_VCF } from '../../../../../modules/nf-core/ensemblvep/vep/main.nf'
include { ENSEMBLVEP_VEP as ENSEMBLVEP_VEP_VCF_BGZIP } from '../../../../../modules/nf-core/ensemblvep/vep/main.nf'
include { ENSEMBLVEP_VEP as ENSEMBLVEP_VEP_VCF_GZIP } from '../../../../../modules/nf-core/ensemblvep/vep/main.nf'

workflow test_ensemblvep_vep_fasta_json {
input = [
[ id:'test' ], // meta map
file(params.test_data['sarscov2']['illumina']['test_vcf'], checkIfExists: true)
]

fasta = file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true)

ENSEMBLVEP_JSON ( input, "WBcel235", "caenorhabditis_elegans", "108", [], fasta, [] )
ENSEMBLVEP_VEP_JSON ( input, "WBcel235", "caenorhabditis_elegans", "108", [], fasta, [] )
}

workflow test_ensemblvep_fasta_tab {
workflow test_ensemblvep_vep_fasta_tab {
input = [
[ id:'test' ], // meta map
file(params.test_data['sarscov2']['illumina']['test_vcf'], checkIfExists: true)
]

fasta = file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true)

ENSEMBLVEP_TAB ( input, "WBcel235", "caenorhabditis_elegans", "108", [], fasta, [] )
ENSEMBLVEP_VEP_TAB ( input, "WBcel235", "caenorhabditis_elegans", "108", [], fasta, [] )
}

workflow test_ensemblvep_fasta_vcf {
workflow test_ensemblvep_vep_fasta_vcf {
input = [
[ id:'test' ], // meta map
file(params.test_data['sarscov2']['illumina']['test_vcf'], checkIfExists: true)
]

fasta = file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true)

ENSEMBLVEP_VCF ( input, "WBcel235", "caenorhabditis_elegans", "108", [], fasta, [] )
ENSEMBLVEP_VEP_VCF ( input, "WBcel235", "caenorhabditis_elegans", "108", [], fasta, [] )
}

workflow test_ensemblvep_fasta_vcf_bgzip {
workflow test_ensemblvep_vep_fasta_vcf_bgzip {
input = [
[ id:'test' ], // meta map
file(params.test_data['sarscov2']['illumina']['test_vcf'], checkIfExists: true)
]

fasta = file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true)

ENSEMBLVEP_VCF_BGZIP ( input, "WBcel235", "caenorhabditis_elegans", "108", [], fasta, [] )
ENSEMBLVEP_VEP_VCF_BGZIP ( input, "WBcel235", "caenorhabditis_elegans", "108", [], fasta, [] )
}

workflow test_ensemblvep_fasta_vcf_gzip {
workflow test_ensemblvep_vep_fasta_vcf_gzip {
input = [
[ id:'test' ], // meta map
file(params.test_data['sarscov2']['illumina']['test_vcf'], checkIfExists: true)
]

fasta = file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true)

ENSEMBLVEP_VCF_GZIP ( input, "WBcel235", "caenorhabditis_elegans", "108", [], fasta, [] )
ENSEMBLVEP_VEP_VCF_GZIP ( input, "WBcel235", "caenorhabditis_elegans", "108", [], fasta, [] )
}

workflow test_ensemblvep_fasta {
workflow test_ensemblvep_vep_fasta {
input = [
[ id:'test' ], // meta map
file(params.test_data['sarscov2']['illumina']['test_vcf'], checkIfExists: true)
]

fasta = file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true)

ENSEMBLVEP ( input, "WBcel235", "caenorhabditis_elegans", "108", [], fasta, [] )
ENSEMBLVEP_VEP_DEFAULT ( input, "WBcel235", "caenorhabditis_elegans", "108", [], fasta, [] )
}

workflow test_ensemblvep_no_fasta {
workflow test_ensemblvep_vep_no_fasta {
input = [
[ id:'test' ], // meta map
file(params.test_data['sarscov2']['illumina']['test_vcf'], checkIfExists: true)
]

ENSEMBLVEP ( input, "WBcel235", "caenorhabditis_elegans", "108", [], [], [] )
ENSEMBLVEP_VEP_DEFAULT ( input, "WBcel235", "caenorhabditis_elegans", "108", [], [], [] )
}
Loading

0 comments on commit bea3ca9

Please sign in to comment.