From 088a94ded0c78f955c8bb4c79f5d71f1d07c2bcd Mon Sep 17 00:00:00 2001
From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com>
Date: Tue, 16 Jan 2024 19:46:27 +0100
Subject: [PATCH 1/9] update vep

---
 ...ces_v1.txt => variant_consequences_v2.txt} |  8 +-
 conf/modules/annotate_genome_snvs.config      |  9 +-
 conf/modules/annotate_mt_snvs.config          |  9 +-
 .../annotate_structural_variants.config       |  4 +-
 conf/test.config                              |  9 +-
 conf/test_one_sample.config                   |  9 +-
 modules.json                                  |  5 +
 .../nf-core/ensemblvep/vep/environment.yml    |  7 ++
 modules/nf-core/ensemblvep/vep/main.nf        | 71 ++++++++++++++
 modules/nf-core/ensemblvep/vep/meta.yml       | 92 +++++++++++++++++++
 nextflow_schema.json                          | 13 ++-
 subworkflows/local/annotate_genome_snvs.nf    | 14 ++-
 subworkflows/local/annotate_mt_snvs.nf        | 33 ++++---
 .../local/annotate_structural_variants.nf     | 30 +++---
 workflows/raredisease.nf                      | 25 ++++-
 15 files changed, 273 insertions(+), 65 deletions(-)
 rename assets/{variant_consequences_v1.txt => variant_consequences_v2.txt} (95%)
 create mode 100644 modules/nf-core/ensemblvep/vep/environment.yml
 create mode 100644 modules/nf-core/ensemblvep/vep/main.nf
 create mode 100644 modules/nf-core/ensemblvep/vep/meta.yml

diff --git a/assets/variant_consequences_v1.txt b/assets/variant_consequences_v2.txt
similarity index 95%
rename from assets/variant_consequences_v1.txt
rename to assets/variant_consequences_v2.txt
index 0893a8b9..effe32b1 100644
--- a/assets/variant_consequences_v1.txt
+++ b/assets/variant_consequences_v2.txt
@@ -6,12 +6,14 @@ frameshift_variant
 stop_lost
 start_lost
 transcript_amplification
+feature_elongation
+feature_truncation
 inframe_insertion
 inframe_deletion
 missense_variant
 protein_altering_variant
-splice_region_variant
 splice_donor_5th_base_variant
+splice_region_variant
 splice_donor_region_variant
 splice_polypyrimidine_tract_variant
 incomplete_terminal_codon_variant
@@ -26,6 +28,7 @@ non_coding_transcript_exon_variant
 intron_variant
 NMD_transcript_variant
 non_coding_transcript_variant
+coding_transcript_variant
 upstream_gene_variant
 downstream_gene_variant
 TFBS_ablation
@@ -33,7 +36,6 @@ TFBS_amplification
 TF_binding_site_variant
 regulatory_region_ablation
 regulatory_region_amplification
-feature_elongation
 regulatory_region_variant
-feature_truncation
 intergenic_variant
+sequence_variant
diff --git a/conf/modules/annotate_genome_snvs.config b/conf/modules/annotate_genome_snvs.config
index 6697c498..5b1e9928 100644
--- a/conf/modules/annotate_genome_snvs.config
+++ b/conf/modules/annotate_genome_snvs.config
@@ -79,16 +79,15 @@ process {
             ext.prefix = { "${vcf.simpleName}_rohann_vcfanno_filter_vep" }
             ext.args   = [
                 '--dir_plugins vep_cache/Plugins',
-                '--plugin LoFtool,vep_cache/LoFtool_scores.txt',
-                '--plugin pLI,vep_cache/pLI_values_107.txt',
-                '--plugin SpliceAI,snv=vep_cache/spliceai_21_scores_raw_snv_-v1.3-.vcf.gz,indel=vep_cache/spliceai_21_scores_raw_snv_-v1.3-.vcf.gz',
-                '--plugin MaxEntScan,vep_cache/fordownload,SWA,NCSS',
+                '--plugin LoFtool,LoFtool_scores.txt',
+                '--plugin pLI,pLI_values_107.txt',
+                '--plugin SpliceAI,snv=spliceai_21_scores_raw_snv_-v1.3-.vcf.gz,indel=spliceai_21_scores_raw_snv_-v1.3-.vcf.gz',
                 '--distance 5000',
                 '--buffer_size 20000',
                 '--format vcf --max_sv_size 248956422',
                 '--appris --biotype --cache --canonical --ccds --compress_output bgzip',
                 '--domains --exclude_predicted --force_overwrite',
-                '--hgvs --humdiv --no_progress --no_stats --numbers',
+                '--hgvs --humdiv --no_progress --numbers',
                 '--merged --polyphen p --protein --offline --regulatory --sift p --symbol --tsl',
                 '--uniprot --vcf'
             ].join(' ')
diff --git a/conf/modules/annotate_mt_snvs.config b/conf/modules/annotate_mt_snvs.config
index 391a3e71..f0e46836 100644
--- a/conf/modules/annotate_mt_snvs.config
+++ b/conf/modules/annotate_mt_snvs.config
@@ -20,16 +20,15 @@ process {
         withName: '.*ANNOTATE_MT_SNVS:ENSEMBLVEP_MT' {
             ext.args   = [
                 '--dir_plugins vep_cache/Plugins',
-                '--plugin LoFtool,vep_cache/LoFtool_scores.txt',
-                '--plugin pLI,vep_cache/pLI_values_107.txt',
-                '--plugin SpliceAI,snv=vep_cache/spliceai_21_scores_raw_snv_-v1.3-.vcf.gz,indel=vep_cache/spliceai_21_scores_raw_snv_-v1.3-.vcf.gz',
-                '--plugin MaxEntScan,vep_cache/fordownload,SWA,NCSS',
+                '--plugin LoFtool,LoFtool_scores.txt',
+                '--plugin pLI,pLI_values_107.txt',
+                '--plugin SpliceAI,snv=spliceai_21_scores_raw_snv_-v1.3-.vcf.gz,indel=spliceai_21_scores_raw_snv_-v1.3-.vcf.gz',
                 '--distance 0',
                 '--buffer_size 20000',
                 '--format vcf --fork 4 --max_sv_size 248956422',
                 '--appris --biotype --cache --canonical --ccds --compress_output bgzip',
                 '--domains --exclude_predicted --force_overwrite',
-                '--hgvs --humdiv --no_progress --no_stats --numbers',
+                '--hgvs --humdiv --no_progress --numbers',
                 '--merged --polyphen p --protein --offline --regulatory --sift p --symbol --tsl --vcf',
                 '--uniprot'
             ].join(' ')
diff --git a/conf/modules/annotate_structural_variants.config b/conf/modules/annotate_structural_variants.config
index b2ee6218..9f8f5f19 100644
--- a/conf/modules/annotate_structural_variants.config
+++ b/conf/modules/annotate_structural_variants.config
@@ -46,12 +46,12 @@ process {
             ext.args   = [
                 '--dir_cache vep_cache',
                 '--dir_plugins vep_cache/Plugins',
-                '--plugin pLI,vep_cache/pLI_values_107.txt',
+                '--plugin pLI,pLI_values_107.txt',
                 '--appris --biotype --buffer_size 100 --canonical --cache --ccds',
                 '--compress_output bgzip --distance 5000 --domains',
                 '--exclude_predicted --force_overwrite --format vcf',
                 '--fork 4 --hgvs --humdiv --max_sv_size 248956422 --merged',
-                '--no_progress --no_stats --numbers --per_gene --polyphen p',
+                '--no_progress --numbers --per_gene --polyphen p',
                 '--protein --offline --regulatory --sift p',
                 '--symbol --tsl --uniprot --vcf'
             ].join(' ')
diff --git a/conf/test.config b/conf/test.config
index d43f319b..5a5fdc39 100644
--- a/conf/test.config
+++ b/conf/test.config
@@ -56,11 +56,6 @@ params {
     vcfanno_toml         = "https://raw.githubusercontent.com/nf-core/test-datasets/raredisease/reference/vcfanno_config.toml"
     vep_cache            = "https://raw.githubusercontent.com/nf-core/test-datasets/raredisease/reference/vep_cache_and_plugins.tar.gz"
     vep_filters          = "https://raw.githubusercontent.com/nf-core/test-datasets/raredisease/reference/hgnc.txt"
-    vep_cache_version    = 107
-}
-
-process {
-    withName: '.*FILTERVEP.*' {
-        container  = "docker.io/ensemblorg/ensembl-vep:release_107.0"
-    }
+    vep_cache_version    = 110
+    vep_plugin_files     = "https://raw.githubusercontent.com/nf-core/test-datasets/raredisease/reference/vep_files.csv"
 }
diff --git a/conf/test_one_sample.config b/conf/test_one_sample.config
index de8436c8..82795453 100644
--- a/conf/test_one_sample.config
+++ b/conf/test_one_sample.config
@@ -56,11 +56,6 @@ params {
     vcfanno_toml         = "https://raw.githubusercontent.com/nf-core/test-datasets/raredisease/reference/vcfanno_config.toml"
     vep_cache            = "https://raw.githubusercontent.com/nf-core/test-datasets/raredisease/reference/vep_cache_and_plugins.tar.gz"
     vep_filters          = "https://raw.githubusercontent.com/nf-core/test-datasets/raredisease/reference/hgnc.txt"
-    vep_cache_version    = 107
-}
-
-process {
-    withName: '.*FILTERVEP.*' {
-        container  = "docker.io/ensemblorg/ensembl-vep:release_107.0"
-    }
+    vep_cache_version    = 110
+    vep_plugin_files     = "https://raw.githubusercontent.com/nf-core/test-datasets/raredisease/reference/vep_files.csv"
 }
diff --git a/modules.json b/modules.json
index 9a1324fd..eac809df 100644
--- a/modules.json
+++ b/modules.json
@@ -115,6 +115,11 @@
                         "git_sha": "29984d70aea47d06f0062a1785d76c357dd40ea9",
                         "installed_by": ["modules"]
                     },
+                    "ensemblvep/vep": {
+                        "branch": "master",
+                        "git_sha": "214d575774c172062924ad3564b4f66655600730",
+                        "installed_by": ["modules"]
+                    },
                     "expansionhunter": {
                         "branch": "master",
                         "git_sha": "0260e5d22372eae434816d6970dedf3f5adc0053",
diff --git a/modules/nf-core/ensemblvep/vep/environment.yml b/modules/nf-core/ensemblvep/vep/environment.yml
new file mode 100644
index 00000000..7a127746
--- /dev/null
+++ b/modules/nf-core/ensemblvep/vep/environment.yml
@@ -0,0 +1,7 @@
+name: ensemblvep_vep
+channels:
+  - conda-forge
+  - bioconda
+  - defaults
+dependencies:
+  - bioconda::ensembl-vep=110.0
diff --git a/modules/nf-core/ensemblvep/vep/main.nf b/modules/nf-core/ensemblvep/vep/main.nf
new file mode 100644
index 00000000..3a2b7423
--- /dev/null
+++ b/modules/nf-core/ensemblvep/vep/main.nf
@@ -0,0 +1,71 @@
+process ENSEMBLVEP_VEP {
+    tag "$meta.id"
+    label 'process_medium'
+
+    conda "${moduleDir}/environment.yml"
+    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+        'https://depot.galaxyproject.org/singularity/ensembl-vep:110.0--pl5321h2a3209d_0' :
+        'biocontainers/ensembl-vep:110.0--pl5321h2a3209d_0' }"
+
+    input:
+    tuple val(meta), path(vcf), path(custom_extra_files)
+    val   genome
+    val   species
+    val   cache_version
+    path  cache
+    tuple val(meta2), path(fasta)
+    path  extra_files
+
+    output:
+    tuple val(meta), path("*.vcf.gz")  , optional:true, emit: vcf
+    tuple val(meta), path("*.tab.gz")  , optional:true, emit: tab
+    tuple val(meta), path("*.json.gz") , optional:true, emit: json
+    path "*.summary.html"              , emit: report
+    path "versions.yml"                , emit: versions
+
+    when:
+    task.ext.when == null || task.ext.when
+
+    script:
+    def args = task.ext.args ?: ''
+    def file_extension = args.contains("--vcf") ? 'vcf' : args.contains("--json")? 'json' : args.contains("--tab")? 'tab' : 'vcf'
+    def compress_cmd = args.contains("--compress_output") ? '' : '--compress_output bgzip'
+    def prefix = task.ext.prefix ?: "${meta.id}"
+    def dir_cache = cache ? "\${PWD}/${cache}" : "/.vep"
+    def reference = fasta ? "--fasta $fasta" : ""
+    """
+    vep \\
+        -i $vcf \\
+        -o ${prefix}.${file_extension}.gz \\
+        $args \\
+        $compress_cmd \\
+        $reference \\
+        --assembly $genome \\
+        --species $species \\
+        --cache \\
+        --cache_version $cache_version \\
+        --dir_cache $dir_cache \\
+        --fork $task.cpus \\
+        --stats_file ${prefix}.summary.html \\
+
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        ensemblvep: \$( echo \$(vep --help 2>&1) | sed 's/^.*Versions:.*ensembl-vep : //;s/ .*\$//')
+    END_VERSIONS
+    """
+
+    stub:
+    def prefix = task.ext.prefix ?: "${meta.id}"
+    """
+    touch ${prefix}.vcf.gz
+    touch ${prefix}.tab.gz
+    touch ${prefix}.json.gz
+    touch ${prefix}.summary.html
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        ensemblvep: \$( echo \$(vep --help 2>&1) | sed 's/^.*Versions:.*ensembl-vep : //;s/ .*\$//')
+    END_VERSIONS
+    """
+}
diff --git a/modules/nf-core/ensemblvep/vep/meta.yml b/modules/nf-core/ensemblvep/vep/meta.yml
new file mode 100644
index 00000000..d8ff8d14
--- /dev/null
+++ b/modules/nf-core/ensemblvep/vep/meta.yml
@@ -0,0 +1,92 @@
+name: ensemblvep_vep
+description: Ensembl Variant Effect Predictor (VEP). The output-file-format is controlled through `task.ext.args`.
+keywords:
+  - annotation
+  - vcf
+  - json
+  - tab
+tools:
+  - ensemblvep:
+      description: |
+        VEP determines the effect of your variants (SNPs, insertions, deletions, CNVs
+        or structural variants) on genes, transcripts, and protein sequence, as well as regulatory regions.
+      homepage: https://www.ensembl.org/info/docs/tools/vep/index.html
+      documentation: https://www.ensembl.org/info/docs/tools/vep/script/index.html
+      licence: ["Apache-2.0"]
+input:
+  - meta:
+      type: map
+      description: |
+        Groovy Map containing sample information
+        e.g. [ id:'test', single_end:false ]
+  - vcf:
+      type: file
+      description: |
+        vcf to annotate
+  - custom_extra_files:
+      type: file
+      description: |
+        extra sample-specific files to be used with the `--custom` flag to be configured with ext.args
+        (optional)
+  - genome:
+      type: string
+      description: |
+        which genome to annotate with
+  - species:
+      type: string
+      description: |
+        which species to annotate with
+  - cache_version:
+      type: integer
+      description: |
+        which version of the cache to annotate with
+  - cache:
+      type: file
+      description: |
+        path to VEP cache (optional)
+  - meta2:
+      type: map
+      description: |
+        Groovy Map containing fasta reference information
+        e.g. [ id:'test' ]
+  - fasta:
+      type: file
+      description: |
+        reference FASTA file (optional)
+      pattern: "*.{fasta,fa}"
+  - extra_files:
+      type: file
+      description: |
+        path to file(s) needed for plugins  (optional)
+output:
+  - vcf:
+      type: file
+      description: |
+        annotated vcf (optional)
+      pattern: "*.ann.vcf.gz"
+  - tab:
+      type: file
+      description: |
+        tab file with annotated variants (optional)
+      pattern: "*.ann.tab.gz"
+  - json:
+      type: file
+      description: |
+        json file with annotated variants (optional)
+      pattern: "*.ann.json.gz"
+  - report:
+      type: file
+      description: VEP report file
+      pattern: "*.html"
+  - versions:
+      type: file
+      description: File containing software versions
+      pattern: "versions.yml"
+authors:
+  - "@maxulysse"
+  - "@matthdsm"
+  - "@nvnieuwk"
+maintainers:
+  - "@maxulysse"
+  - "@matthdsm"
+  - "@nvnieuwk"
diff --git a/nextflow_schema.json b/nextflow_schema.json
index 1b1b7641..d29a9320 100644
--- a/nextflow_schema.json
+++ b/nextflow_schema.json
@@ -374,6 +374,15 @@
                     "help_text": "If no directory path is passed, vcf files will not be annotated by vep.",
                     "fa_icon": "fas fa-folder-open"
                 },
+                "vep_plugin_files": {
+                    "type": "string",
+                    "exists": true,
+                    "format": "file-path",
+                    "description": "Databases used by both named and custom plugins to annotate variants.",
+                    "fa_icon": "fas fa-file-csv",
+                    "help_text": "Path to a file containing the absolute paths to databases and their indices used by VEP's custom and named plugins resources defined within the vcfanno toml file. One line per resource.",
+                    "mimetype": "text/csv"
+                },
                 "vep_filters": {
                     "type": "string",
                     "exists": true,
@@ -557,10 +566,10 @@
             "properties": {
                 "vep_cache_version": {
                     "type": "integer",
-                    "default": 107,
+                    "default": 110,
                     "description": "Specify the version of the VEP cache provided to the `--vep_cache` option.",
                     "fa_icon": "fas fa-align-center",
-                    "enum": [107]
+                    "enum": [107, 110]
                 }
             }
         },
diff --git a/subworkflows/local/annotate_genome_snvs.nf b/subworkflows/local/annotate_genome_snvs.nf
index 334b7d4b..291d3acd 100644
--- a/subworkflows/local/annotate_genome_snvs.nf
+++ b/subworkflows/local/annotate_genome_snvs.nf
@@ -11,7 +11,7 @@ include { UPD as UPD_SITES                      } from '../../modules/nf-core/up
 include { UPD as UPD_REGIONS                    } from '../../modules/nf-core/upd/main'
 include { CHROMOGRAPH as CHROMOGRAPH_SITES      } from '../../modules/nf-core/chromograph/main'
 include { CHROMOGRAPH as CHROMOGRAPH_REGIONS    } from '../../modules/nf-core/chromograph/main'
-include { ENSEMBLVEP as ENSEMBLVEP_SNV          } from '../../modules/local/ensemblvep/main'
+include { ENSEMBLVEP_VEP as ENSEMBLVEP_SNV      } from '../../modules/nf-core/ensemblvep/vep/main'
 include { TABIX_BGZIPTABIX as ZIP_TABIX_ROHCALL } from '../../modules/nf-core/tabix/bgziptabix/main'
 include { TABIX_BGZIPTABIX as ZIP_TABIX_VCFANNO } from '../../modules/nf-core/tabix/bgziptabix/main'
 include { TABIX_TABIX as TABIX_VEP              } from '../../modules/nf-core/tabix/tabix/main'
@@ -36,6 +36,7 @@ workflow ANNOTATE_GENOME_SNVS {
         ch_genome_fasta       // channel: [mandatory] [ val(meta), path(fasta) ]
         ch_gnomad_af          // channel: [optional] [ path(tab), path(tbi) ]
         ch_split_intervals    // channel: [mandatory] [ path(intervals) ]
+        ch_vep_extra_files    // channel: [mandatory] [ path(files) ]
 
     main:
         ch_cadd_vcf       = Channel.empty()
@@ -115,20 +116,23 @@ workflow ANNOTATE_GENOME_SNVS {
             }
             .set { ch_for_mix }
 
-        ch_vep_in = ch_for_mix.selvar.mix(ch_for_mix.cadd)
+        ch_for_mix.selvar.mix(ch_for_mix.cadd)
+            .map { meta, vcf -> return [meta, vcf, []]}
+            .set { ch_vep_in }
+
 
         // Annotating with ensembl Vep
         ENSEMBLVEP_SNV(
             ch_vep_in,
-            ch_genome_fasta,
             val_vep_genome,
             "homo_sapiens",
             val_vep_cache_version,
             ch_vep_cache,
-            []
+            ch_genome_fasta,
+            ch_vep_extra_files
         )
 
-        ENSEMBLVEP_SNV.out.vcf_gz
+        ENSEMBLVEP_SNV.out.vcf
             .map { meta, vcf -> [meta - meta.subMap('scatterid'), vcf] }
             .set { ch_vep_out }
 
diff --git a/subworkflows/local/annotate_mt_snvs.nf b/subworkflows/local/annotate_mt_snvs.nf
index 8f7c24eb..e1ed903a 100644
--- a/subworkflows/local/annotate_mt_snvs.nf
+++ b/subworkflows/local/annotate_mt_snvs.nf
@@ -2,13 +2,13 @@
 // Annotate MT
 //
 
-include { TABIX_TABIX as TABIX_TABIX_MT                         } from '../../modules/nf-core/tabix/tabix/main'
-include { ENSEMBLVEP as ENSEMBLVEP_MT                           } from '../../modules/local/ensemblvep/main'
-include { HAPLOGREP2_CLASSIFY as HAPLOGREP2_CLASSIFY_MT         } from '../../modules/nf-core/haplogrep2/classify/main'
-include { VCFANNO as VCFANNO_MT                                 } from '../../modules/nf-core/vcfanno/main'
-include { ANNOTATE_CADD                                         } from './annotation/annotate_cadd'
-include { TABIX_BGZIPTABIX as ZIP_TABIX_HMTNOTE                 } from '../../modules/nf-core/tabix/bgziptabix/main'
-include { HMTNOTE_ANNOTATE                                      } from '../../modules/nf-core/hmtnote/annotate/main'
+include { TABIX_TABIX as TABIX_TABIX_MT                  } from '../../modules/nf-core/tabix/tabix/main'
+include { ENSEMBLVEP_VEP as ENSEMBLVEP_MT                } from '../../modules/nf-core/ensemblvep/vep/main'
+include { HAPLOGREP2_CLASSIFY as HAPLOGREP2_CLASSIFY_MT  } from '../../modules/nf-core/haplogrep2/classify/main'
+include { VCFANNO as VCFANNO_MT                          } from '../../modules/nf-core/vcfanno/main'
+include { ANNOTATE_CADD                                  } from './annotation/annotate_cadd'
+include { TABIX_BGZIPTABIX as ZIP_TABIX_HMTNOTE          } from '../../modules/nf-core/tabix/bgziptabix/main'
+include { HMTNOTE_ANNOTATE                               } from '../../modules/nf-core/hmtnote/annotate/main'
 
 workflow ANNOTATE_MT_SNVS {
     take:
@@ -22,6 +22,8 @@ workflow ANNOTATE_MT_SNVS {
         val_vep_genome         // string:  [mandatory] GRCh37 or GRCh38
         val_vep_cache_version  // string:  [mandatory] 107
         ch_vep_cache           // channel: [mandatory] [ path(cache) ]
+        ch_vep_cache           // channel: [mandatory] [ path(cache) ]
+        ch_vep_extra_files     // channel: [mandatory] [ path(files) ]
 
     main:
         ch_cadd_vcf = Channel.empty()
@@ -49,22 +51,27 @@ workflow ANNOTATE_MT_SNVS {
                     return [it[0], it[2]]
             }
             .set { ch_for_mix }
-        ch_vep_in = ch_for_mix.merged.mix(ch_for_mix.cadd)
+
+        ch_for_mix.merged.mix(ch_for_mix.cadd)
+            .tap { ch_haplogrep_in }
+            .map { meta, vcf -> return [meta, vcf, []]}
+            .set { ch_vep_in }
+
 
         // Annotating with ensembl Vep
         ENSEMBLVEP_MT(
             ch_vep_in,
-            ch_genome_fasta,
             val_vep_genome,
             "homo_sapiens",
             val_vep_cache_version,
             ch_vep_cache,
-            []
+            ch_genome_fasta,
+            ch_vep_extra_files
         )
 
         // Running vcfanno
-        TABIX_TABIX_MT(ENSEMBLVEP_MT.out.vcf_gz)
-        ENSEMBLVEP_MT.out.vcf_gz
+        TABIX_TABIX_MT(ENSEMBLVEP_MT.out.vcf)
+        ENSEMBLVEP_MT.out.vcf
             .join(TABIX_TABIX_MT.out.tbi, failOnMismatch:true, failOnDuplicate:true)
             .map { meta, vcf, tbi -> return [meta, vcf, tbi, []]}
             .set { ch_in_vcfanno }
@@ -84,7 +91,7 @@ workflow ANNOTATE_MT_SNVS {
         ch_tbi_out = ZIP_TABIX_HMTNOTE.out.gz_tbi.map{meta, vcf, tbi -> return [meta, tbi] }
 
         // Running haplogrep2
-        HAPLOGREP2_CLASSIFY_MT(ch_vep_in, "vcf.gz")
+        HAPLOGREP2_CLASSIFY_MT(ch_haplogrep_in, "vcf.gz")
 
         ch_versions = ch_versions.mix(ENSEMBLVEP_MT.out.versions)
         ch_versions = ch_versions.mix(TABIX_TABIX_MT.out.versions)
diff --git a/subworkflows/local/annotate_structural_variants.nf b/subworkflows/local/annotate_structural_variants.nf
index 6766a73c..d2d42027 100644
--- a/subworkflows/local/annotate_structural_variants.nf
+++ b/subworkflows/local/annotate_structural_variants.nf
@@ -2,12 +2,12 @@
 // A subworkflow to annotate structural variants.
 //
 
-include { SVDB_QUERY as SVDB_QUERY_DB    } from '../../modules/nf-core/svdb/query/main'
-include { SVDB_QUERY as SVDB_QUERY_BEDPE } from '../../modules/nf-core/svdb/query/main'
-include { PICARD_SORTVCF                 } from '../../modules/nf-core/picard/sortvcf/main'
-include { BCFTOOLS_VIEW                  } from '../../modules/nf-core/bcftools/view/main'
-include { ENSEMBLVEP as ENSEMBLVEP_SV    } from '../../modules/local/ensemblvep/main'
-include { TABIX_TABIX as TABIX_VEP       } from '../../modules/nf-core/tabix/tabix/main'
+include { SVDB_QUERY as SVDB_QUERY_DB     } from '../../modules/nf-core/svdb/query/main'
+include { SVDB_QUERY as SVDB_QUERY_BEDPE  } from '../../modules/nf-core/svdb/query/main'
+include { PICARD_SORTVCF                  } from '../../modules/nf-core/picard/sortvcf/main'
+include { BCFTOOLS_VIEW                   } from '../../modules/nf-core/bcftools/view/main'
+include { ENSEMBLVEP_VEP as ENSEMBLVEP_SV } from '../../modules/nf-core/ensemblvep/vep/main'
+include { TABIX_TABIX as TABIX_VEP        } from '../../modules/nf-core/tabix/tabix/main'
 
 workflow ANNOTATE_STRUCTURAL_VARIANTS {
 
@@ -20,6 +20,7 @@ workflow ANNOTATE_STRUCTURAL_VARIANTS {
         ch_vep_cache          // channel: [mandatory] [ path(cache) ]
         ch_genome_fasta       // channel: [mandatory] [ val(meta), path(fasta) ]
         ch_genome_dictionary  // channel: [mandatory] [ val(meta), path(dict) ]
+        ch_vep_extra_files    // channel: [mandatory] [ path(files) ]
 
     main:
         ch_versions      = Channel.empty()
@@ -97,18 +98,21 @@ workflow ANNOTATE_STRUCTURAL_VARIANTS {
         PICARD_SORTVCF.out.vcf.map { meta, vcf -> return [meta,vcf,[]] }.set { ch_sortvcf }
 
         BCFTOOLS_VIEW(ch_sortvcf, [], [], [])
+            .vcf
+            .map { meta, vcf -> return [meta, vcf, []]}
+            .set { ch_vep_in }
 
         ENSEMBLVEP_SV(
-            BCFTOOLS_VIEW.out.vcf,
-            ch_genome_fasta,
+            ch_vep_in,
             val_vep_genome,
             "homo_sapiens",
             val_vep_cache_version,
             ch_vep_cache,
-            []
+            ch_genome_fasta,
+            ch_vep_extra_files
         )
 
-        TABIX_VEP (ENSEMBLVEP_SV.out.vcf_gz)
+        TABIX_VEP (ENSEMBLVEP_SV.out.vcf)
 
         ch_versions = ch_versions.mix(SVDB_QUERY_DB.out.versions)
         ch_versions = ch_versions.mix(SVDB_QUERY_BEDPE.out.versions)
@@ -118,7 +122,7 @@ workflow ANNOTATE_STRUCTURAL_VARIANTS {
         ch_versions = ch_versions.mix(TABIX_VEP.out.versions)
 
     emit:
-        vcf_ann  = ENSEMBLVEP_SV.out.vcf_gz // channel: [ val(meta), path(vcf) ]
-        tbi      = TABIX_VEP.out.tbi        // channel: [ val(meta), path(tbi) ]
-        versions = ch_versions              // channel: [ path(versions.yml) ]
+        vcf_ann  = ENSEMBLVEP_SV.out.vcf // channel: [ val(meta), path(vcf) ]
+        tbi      = TABIX_VEP.out.tbi     // channel: [ val(meta), path(tbi) ]
+        versions = ch_versions           // channel: [ path(versions.yml) ]
 }
diff --git a/workflows/raredisease.nf b/workflows/raredisease.nf
index dac92ee9..2227303c 100644
--- a/workflows/raredisease.nf
+++ b/workflows/raredisease.nf
@@ -275,7 +275,7 @@ workflow RAREDISEASE {
     ch_target_intervals         = ch_references.target_intervals
     ch_variant_catalog          = params.variant_catalog                   ? Channel.fromPath(params.variant_catalog).map { it -> [[id:it[0].simpleName],it]}.collect()
                                                                            : Channel.value([[],[]])
-    ch_variant_consequences     = Channel.fromPath("$projectDir/assets/variant_consequences_v1.txt", checkIfExists: true).collect()
+    ch_variant_consequences     = Channel.fromPath("$projectDir/assets/variant_consequences_v2.txt", checkIfExists: true).collect()
     ch_vcfanno_resources        = params.vcfanno_resources                 ? Channel.fromPath(params.vcfanno_resources).splitText().map{it -> it.trim()}.collect()
                                                                            : Channel.value([])
     ch_vcf2cytosure_blacklist   = params.vcf2cytosure_blacklist            ? Channel.fromPath(params.vcf2cytosure_blacklist).collect()
@@ -286,6 +286,8 @@ workflow RAREDISEASE {
                                                                            : Channel.value([])
     ch_vep_cache                = ( params.vep_cache && params.vep_cache.endsWith("tar.gz") )  ? ch_references.vep_resources
                                                                            : ( params.vep_cache    ? Channel.fromPath(params.vep_cache).collect() : Channel.value([]) )
+    ch_vep_extra_files_unsplit  = params.vep_plugin_files                  ? Channel.fromPath(params.vep_plugin_files).collect()
+                                                                           : Channel.value([])
     ch_vep_filters              = params.vep_filters                       ? Channel.fromPath(params.vep_filters).collect()
                                                                            : Channel.value([])
     ch_versions                 = ch_versions.mix(ch_references.versions)
@@ -298,11 +300,25 @@ workflow RAREDISEASE {
         ch_svcaller_priority = Channel.value(["tiddit", "manta", "gcnvcaller", "cnvnator"])
     }
 
+    // Read and store paths in the vep_plugin_files file
+    ch_vep_extra_files_unsplit.splitCsv ( header:true )
+        .map { row ->
+            f = file(row.vep_files[0])
+            if(f.isFile() || f.isDirectory()){
+                return [f]
+            } else {
+                error("\nVep database file ${f} does not exist.")
+            }
+        }
+        .collect()
+        .set {ch_vep_extra_files}
+
     // Input QC
     if (!params.skip_fastqc) {
         FASTQC (ch_reads)
         ch_versions = ch_versions.mix(FASTQC.out.versions.first())
     }
+
     // CREATE CHROMOSOME BED AND INTERVALS
     SCATTER_GENOME (
         ch_genome_dictionary,
@@ -425,7 +441,8 @@ workflow RAREDISEASE {
             params.vep_cache_version,
             ch_vep_cache,
             ch_genome_fasta,
-            ch_genome_dictionary
+            ch_genome_dictionary,
+            ch_vep_extra_files
         ).set {ch_sv_annotate}
         ch_versions = ch_versions.mix(ch_sv_annotate.versions)
 
@@ -472,7 +489,8 @@ workflow RAREDISEASE {
             ch_vep_cache,
             ch_genome_fasta,
             ch_gnomad_af,
-            ch_scatter_split_intervals
+            ch_scatter_split_intervals,
+            ch_vep_extra_files
         ).set {ch_snv_annotate}
         ch_versions = ch_versions.mix(ch_snv_annotate.versions)
 
@@ -519,6 +537,7 @@ workflow RAREDISEASE {
             params.genome,
             params.vep_cache_version,
             ch_vep_cache,
+            ch_vep_extra_files
         ).set {ch_mt_annotate}
         ch_versions = ch_versions.mix(ch_mt_annotate.versions)
 

From 9434630d71cdb4dac710e7c03935cef0faccd2b7 Mon Sep 17 00:00:00 2001
From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com>
Date: Tue, 16 Jan 2024 20:05:41 +0100
Subject: [PATCH 2/9] update usage

---
 docs/usage.md | 25 +++++++++++++------------
 1 file changed, 13 insertions(+), 12 deletions(-)

diff --git a/docs/usage.md b/docs/usage.md
index 34776196..6735eb07 100644
--- a/docs/usage.md
+++ b/docs/usage.md
@@ -225,7 +225,7 @@ The mandatory and optional parameters for each category are tabulated below.
 | vcfanno_resources<sup>2</sup> | vcfanno_lua                    |
 | vcfanno_toml<sup>3</sup>      | vep_filters<sup>8</sup>        |
 | vep_cache_version             | cadd_resources<sup>9</sup>     |
-| vep_cache<sup>4</sup>         |                                |
+| vep_cache<sup>4</sup>         | vep_plugin_files<sup>10</sup>  |
 | gnomad_af<sup>5</sup>         |                                |
 | score_config_snv<sup>6</sup>  |                                |
 
@@ -233,7 +233,7 @@ The mandatory and optional parameters for each category are tabulated below.
 <sup>2</sup>Path to VCF files and their indices used by vcfanno. Sample file [here](https://github.com/nf-core/test-datasets/blob/raredisease/reference/vcfanno_resources.txt).<br />
 <sup>3</sup>Path to a vcfanno configuration file. Sample file [here](https://github.com/nf-core/test-datasets/blob/raredisease/reference/vcfanno_config.toml).<br />
 <sup>4</sup> VEP caches can be downloaded [here](https://www.ensembl.org/info/docs/tools/vep/script/vep_cache.html#cache).
-VEP plugins and associated files may be installed in the cache directory, and the plugin pLI is mandatory to install.
+VEP plugins may be installed in the cache directory, and the plugin pLI is mandatory to install. To supply files required by VEP plugins, use `vep_plugin_files` parameter.
 See example cache [here](https://raw.githubusercontent.com/nf-core/test-datasets/raredisease/reference/vep_cache_and_plugins.tar.gz).<br />
 <sup>5</sup> GnomAD VCF files can be downloaded from [here](https://gnomad.broadinstitute.org/downloads). The option `gnomad_af` expects a tab-delimited file with
 no header and the following columns: `CHROM POS REF_ALLELE ALT_ALLELE AF`. Sample file [here](https://github.com/nf-core/test-datasets/blob/raredisease/reference/gnomad_reformated.tab.gz).<br />
@@ -241,6 +241,7 @@ no header and the following columns: `CHROM POS REF_ALLELE ALT_ALLELE AF`. Sampl
 <sup>7</sup>Used by GENMOD while modeling the variants. Contains a list of loci that show [reduced penetrance](https://medlineplus.gov/genetics/understanding/inheritance/penetranceexpressivity/) in people. Sample file [here](https://github.com/nf-core/test-datasets/blob/raredisease/reference/reduced_penetrance.tsv).<br />
 <sup>8</sup> This file contains a list of candidate genes (with [HGNC](https://www.genenames.org/) IDs) that is used to split the variants into canditate variants and research variants. Research variants contain all the variants, while candidate variants are a subset of research variants and are associated with candidate genes. Sample file [here](https://github.com/nf-core/test-datasets/blob/raredisease/reference/hgnc.txt). Not required if --skip_vep_filter is set to true.<br />
 <sup>9</sup>Path to a folder containing cadd annotations. Equivalent of the data/annotations/ folder described [here](https://github.com/kircherlab/CADD-scripts/#manual-installation), and it is used to calculate CADD scores for small indels. <br />
+<sup>10</sup>A CSV file that describes the files used by VEP's named and custom plugins. Sample file [here](https://github.com/nf-core/test-datasets/blob/raredisease/reference/vep_files.csv). <br />
 
 > NB: We use CADD only to annotate small indels. To annotate SNVs with precomputed CADD scores, pass the file containing CADD scores as a resource to vcfanno instead. Files containing the precomputed CADD scores for SNVs can be downloaded from [here](https://cadd.gs.washington.edu/download) (description: "All possible SNVs of GRCh3<7/8>/hg3<7/8>")
 
@@ -251,22 +252,22 @@ no header and the following columns: `CHROM POS REF_ALLELE ALT_ALLELE AF`. Sampl
 | genome                                         | reduced_penetrance |
 | svdb_query_dbs/svdb_query_bedpedbs<sup>1</sup> |                    |
 | vep_cache_version                              | vep_filters        |
-| vep_cache                                      |                    |
+| vep_cache                                      | vep_plugin_files   |
 | score_config_sv                                |                    |
 
 <sup>1</sup> A CSV file that describes the databases (VCFs or BEDPEs) used by SVDB for annotating structural variants. Sample file [here](https://github.com/nf-core/test-datasets/blob/raredisease/reference/svdb_querydb_files.csv). Information about the column headers can be found [here](https://github.com/J35P312/SVDB#Query).
 
 ##### 9. Mitochondrial annotation
 
-| Mandatory         | Optional    |
-| ----------------- | ----------- |
-| genome            | vep_filters |
-| mito_name         |             |
-| vcfanno_resources |             |
-| vcfanno_toml      |             |
-| vep_cache_version |             |
-| vep_cache         |             |
-| score_config_mt   |             |
+| Mandatory         | Optional         |
+| ----------------- | ---------------- |
+| genome            | vep_filters      |
+| mito_name         | vep_plugin_files |
+| vcfanno_resources |                  |
+| vcfanno_toml      |                  |
+| vep_cache_version |                  |
+| vep_cache         |                  |
+| score_config_mt   |                  |
 
 #### Run the pipeline
 

From dc4e176116c288a09b735ea7f1416d7711d71793 Mon Sep 17 00:00:00 2001
From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com>
Date: Tue, 16 Jan 2024 20:21:44 +0100
Subject: [PATCH 3/9] update changelog

---
 CHANGELOG.md | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 0878e5c8..dda4ef6f 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -23,6 +23,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 - GATK CNVCaller uses segments instead of intervals, filters out "reference" segments between the calls, and fixes a bug with how `ch_readcount_intervals` was handled [#472](https://github.com/nf-core/raredisease/pull/472)
 - bwa aligner [#474](https://github.com/nf-core/raredisease/pull/474)
 - Add FOUND_IN tag, which mentions the variant caller that found the mutation, in the INFO column of the vcf files [#471](https://github.com/nf-core/raredisease/pull/471)
+- A new parameter `vep_plugin_files` to supply files required by vep plugins [#482](https://github.com/nf-core/raredisease/pull/482)
 
 ### `Changed`
 
@@ -42,6 +43,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 - Changed the name of the parameter from `skip_cnv_calling` to `skip_germlinecnvcaller` [#435](https://github.com/nf-core/raredisease/pull/435)
 - Check SVDB query input files for existence and correct format [#476](https://github.com/nf-core/raredisease/pull/476)
 - Change hardcoded platform value to params.platform in align_MT.config [#475](https://github.com/nf-core/raredisease/pull/475)
+- Installed the nf-core version of ensemblvep/vep module [#482](https://github.com/nf-core/raredisease/pull/482)
 
 ### `Fixed`
 

From de9ff795de76bedf89cec51310dff3124b43cdf9 Mon Sep 17 00:00:00 2001
From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com>
Date: Tue, 16 Jan 2024 20:30:50 +0100
Subject: [PATCH 4/9] fix lint error

---
 nextflow.config | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/nextflow.config b/nextflow.config
index a6551e77..0ff60379 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -39,6 +39,9 @@ params {
     cadd_resources             = null
     platform                   = 'illumina'
 
+    // Annotation
+    vep_cache_version          = 110
+
     // Bam_qc
     ngsbits_samplegender_method = 'xy'
 

From 3ee9fed33c5c14af78c88b7e30f76d51f70d9e61 Mon Sep 17 00:00:00 2001
From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com>
Date: Tue, 16 Jan 2024 20:36:42 +0100
Subject: [PATCH 5/9] fix lint error

---
 main.nf         | 2 +-
 nextflow.config | 3 ---
 2 files changed, 1 insertion(+), 4 deletions(-)

diff --git a/main.nf b/main.nf
index 0ebf4e51..d87f9e02 100644
--- a/main.nf
+++ b/main.nf
@@ -54,7 +54,7 @@ params.vcfanno_toml                   = WorkflowMain.getGenomeAttribute(params,
 params.vcfanno_lua                    = WorkflowMain.getGenomeAttribute(params, 'vcfanno_lua')
 params.vep_cache                      = WorkflowMain.getGenomeAttribute(params, 'vep_cache')
 params.vep_cache_version              = WorkflowMain.getGenomeAttribute(params, 'vep_cache_version')
-
+params.vep_plugin_files               = WorkflowMain.getGenomeAttribute(params, 'vep_plugin_files')
 /*
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
     VALIDATE & PRINT PARAMETER SUMMARY
diff --git a/nextflow.config b/nextflow.config
index 0ff60379..a6551e77 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -39,9 +39,6 @@ params {
     cadd_resources             = null
     platform                   = 'illumina'
 
-    // Annotation
-    vep_cache_version          = 110
-
     // Bam_qc
     ngsbits_samplegender_method = 'xy'
 

From 76fb0d438d800c9be00d721d08fea322709ff2e0 Mon Sep 17 00:00:00 2001
From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com>
Date: Sat, 20 Jan 2024 10:36:53 +0100
Subject: [PATCH 6/9] merge me

---
 conf/modules/annotate_mobile_elements.config  |  4 +-
 conf/test.config                              |  2 +-
 conf/test_one_sample.config                   |  2 +-
 modules/local/ensemblvep/main.nf              | 80 -------------------
 modules/local/ensemblvep/meta.yml             | 73 -----------------
 .../local/annotate_mobile_elements.nf         | 27 ++++---
 workflows/raredisease.nf                      |  3 +-
 7 files changed, 21 insertions(+), 170 deletions(-)
 delete mode 100644 modules/local/ensemblvep/main.nf
 delete mode 100644 modules/local/ensemblvep/meta.yml

diff --git a/conf/modules/annotate_mobile_elements.config b/conf/modules/annotate_mobile_elements.config
index 0e04095c..442652a3 100644
--- a/conf/modules/annotate_mobile_elements.config
+++ b/conf/modules/annotate_mobile_elements.config
@@ -40,12 +40,12 @@ process {
         ext.args   = { [
             '--dir_cache vep_cache',
             '--dir_plugins vep_cache/Plugins',
-            '--plugin pLI,vep_cache/pLI_values_107.txt',
+            '--plugin pLI,pLI_values_107.txt',
             '--appris --biotype --buffer_size 100 --canonical --cache --ccds',
             '--compress_output bgzip --distance 5000 --domains',
             '--exclude_predicted --force_overwrite --format vcf',
             '--fork 4 --hgvs --humdiv --max_sv_size 248956422 --merged',
-            '--no_progress --no_stats --numbers --per_gene --polyphen p',
+            '--no_progress --numbers --per_gene --polyphen p',
             '--protein --offline --regulatory --sift p',
             '--symbol --tsl --uniprot --vcf'
         ].join(' ') }
diff --git a/conf/test.config b/conf/test.config
index 1347680d..fd2f873e 100644
--- a/conf/test.config
+++ b/conf/test.config
@@ -57,6 +57,6 @@ params {
     vcfanno_toml         = "https://raw.githubusercontent.com/nf-core/test-datasets/raredisease/reference/vcfanno_config.toml"
     vep_cache            = "https://raw.githubusercontent.com/nf-core/test-datasets/raredisease/reference/vep_cache_and_plugins.tar.gz"
     vep_filters          = "https://raw.githubusercontent.com/nf-core/test-datasets/raredisease/reference/hgnc.txt"
-    vep_cache_version    = 110
+    vep_cache_version    = 107
     vep_plugin_files     = "https://raw.githubusercontent.com/nf-core/test-datasets/raredisease/reference/vep_files.csv"
 }
diff --git a/conf/test_one_sample.config b/conf/test_one_sample.config
index 12eb9f39..d521a8a3 100644
--- a/conf/test_one_sample.config
+++ b/conf/test_one_sample.config
@@ -57,6 +57,6 @@ params {
     vcfanno_toml         = "https://raw.githubusercontent.com/nf-core/test-datasets/raredisease/reference/vcfanno_config.toml"
     vep_cache            = "https://raw.githubusercontent.com/nf-core/test-datasets/raredisease/reference/vep_cache_and_plugins.tar.gz"
     vep_filters          = "https://raw.githubusercontent.com/nf-core/test-datasets/raredisease/reference/hgnc.txt"
-    vep_cache_version    = 110
+    vep_cache_version    = 107
     vep_plugin_files     = "https://raw.githubusercontent.com/nf-core/test-datasets/raredisease/reference/vep_files.csv"
 }
diff --git a/modules/local/ensemblvep/main.nf b/modules/local/ensemblvep/main.nf
deleted file mode 100644
index 81d4191f..00000000
--- a/modules/local/ensemblvep/main.nf
+++ /dev/null
@@ -1,80 +0,0 @@
-process ENSEMBLVEP {
-    tag "$meta.id"
-    label 'process_medium'
-
-    // Exit if running this module with -profile conda / -profile mamba
-    if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) {
-        error("Local VEP module does not support Conda. Please use Docker / Singularity / Podman instead.")
-    }
-
-    container "docker.io/ensemblorg/ensembl-vep:release_107.0"
-
-    input:
-    tuple val(meta), path(vcf)
-    tuple val(meta2), path(fasta)
-    val   genome
-    val   species
-    val   cache_version
-    path  cache
-    path  extra_files
-
-    output:
-    tuple val(meta), path("*.vcf")    , optional:true, emit: vcf
-    tuple val(meta), path("*.tab")    , optional:true, emit: tab
-    tuple val(meta), path("*.json")   , optional:true, emit: json
-    tuple val(meta), path("*.vcf.gz") , optional:true, emit: vcf_gz
-    tuple val(meta), path("*.tab.gz") , optional:true, emit: tab_gz
-    tuple val(meta), path("*.json.gz"), optional:true, emit: json_gz
-    path "*.summary.html"             , optional:true, emit: report
-    path "versions.yml"               , emit: versions
-
-    when:
-    task.ext.when == null || task.ext.when
-
-    script:
-    def args = task.ext.args ?: ''
-    def file_extension = args.contains("--vcf") ? 'vcf' : args.contains("--json")? 'json' : args.contains("--tab")? 'tab' : 'vcf'
-    def compress_out = args.contains("--compress_output") ? '.gz' : ''
-    def prefix = task.ext.prefix ?: "${meta.id}"
-    def stats_file     = args.contains("--no_stats") ? '' : "--stats_file ${prefix}.summary.html"
-    def dir_cache = cache ? "\${PWD}/${cache}" : "/.vep"
-    def reference = fasta ? "--fasta $fasta" : ""
-
-    """
-    vep \\
-        -i $vcf \\
-        -o ${prefix}.${file_extension}${compress_out} \\
-        $args \\
-        $reference \\
-        --assembly $genome \\
-        --species $species \\
-        --cache \\
-        --cache_version $cache_version \\
-        --dir_cache $dir_cache \\
-        --fork $task.cpus \\
-        ${stats_file}
-
-
-    cat <<-END_VERSIONS > versions.yml
-    "${task.process}":
-        ensemblvep: \$( echo \$(vep --help 2>&1) | sed 's/^.*Versions:.*ensembl-vep : //;s/ .*\$//')
-    END_VERSIONS
-    """
-
-    stub:
-    def prefix = task.ext.prefix ?: "${meta.id}"
-    """
-    touch ${prefix}.vcf
-    touch ${prefix}.tab
-    touch ${prefix}.json
-    touch ${prefix}.vcf.gz
-    touch ${prefix}.tab.gz
-    touch ${prefix}.json.gz
-    touch ${prefix}.summary.html
-
-    cat <<-END_VERSIONS > versions.yml
-    "${task.process}":
-        ensemblvep: \$( echo \$(vep --help 2>&1) | sed 's/^.*Versions:.*ensembl-vep : //;s/ .*\$//')
-    END_VERSIONS
-    """
-}
diff --git a/modules/local/ensemblvep/meta.yml b/modules/local/ensemblvep/meta.yml
deleted file mode 100644
index a4dde8a6..00000000
--- a/modules/local/ensemblvep/meta.yml
+++ /dev/null
@@ -1,73 +0,0 @@
-name: ENSEMBLVEP
-description: Ensembl Variant Effect Predictor (VEP). The output-file-format is controlled through `task.ext.args`.
-keywords:
-  - annotation
-tools:
-  - ensemblvep:
-      description: |
-        VEP determines the effect of your variants (SNPs, insertions, deletions, CNVs
-        or structural variants) on genes, transcripts, and protein sequence, as well as regulatory regions.
-      homepage: https://www.ensembl.org/info/docs/tools/vep/index.html
-      documentation: https://www.ensembl.org/info/docs/tools/vep/script/index.html
-      licence: ["Apache-2.0"]
-input:
-  - meta:
-      type: map
-      description: |
-        Groovy Map containing sample information
-        e.g. [ id:'test', single_end:false ]
-  - vcf:
-      type: file
-      description: |
-        vcf to annotate
-  - genome:
-      type: value
-      description: |
-        which genome to annotate with
-  - species:
-      type: value
-      description: |
-        which species to annotate with
-  - cache_version:
-      type: value
-      description: |
-        which version of the cache to annotate with
-  - cache:
-      type: file
-      description: |
-        path to VEP cache (optional)
-  - fasta:
-      type: file
-      description: |
-        reference FASTA file (optional)
-      pattern: "*.{fasta,fa}"
-  - extra_files:
-      type: tuple
-      description: |
-        path to file(s) needed for plugins  (optional)
-output:
-  - vcf:
-      type: file
-      description: |
-        annotated vcf (optional)
-      pattern: "*.ann.vcf"
-  - tab:
-      type: file
-      description: |
-        tab file with annotated variants (optional)
-      pattern: "*.ann.tab"
-  - json:
-      type: file
-      description: |
-        json file with annotated variants (optional)
-      pattern: "*.ann.json"
-  - report:
-      type: file
-      description: VEP report file
-      pattern: "*.html"
-  - versions:
-      type: file
-      description: File containing software versions
-      pattern: "versions.yml"
-authors:
-  - "@maxulysse"
diff --git a/subworkflows/local/annotate_mobile_elements.nf b/subworkflows/local/annotate_mobile_elements.nf
index 04b15ad2..dc6247bf 100644
--- a/subworkflows/local/annotate_mobile_elements.nf
+++ b/subworkflows/local/annotate_mobile_elements.nf
@@ -2,13 +2,12 @@
 // A subworkflow to annotate structural variants.
 //
 
-include { SVDB_QUERY as SVDB_QUERY_DB    } from '../../modules/nf-core/svdb/query/main'
-include { PICARD_SORTVCF                 } from '../../modules/nf-core/picard/sortvcf/main'
-include { ENSEMBLVEP as ENSEMBLVEP_ME    } from '../../modules/local/ensemblvep/main'
-include { ENSEMBLVEP_FILTERVEP as FILTERVEP_ME  } from '../../modules/nf-core/ensemblvep/filtervep'
-include { BCFTOOLS_VIEW as BCFTOOLS_VIEW_FILTER } from '../../modules/nf-core/bcftools/view/main'
-include { TABIX_BGZIPTABIX as BGZIP_TABIX_ME    } from '../../modules/nf-core/tabix/bgziptabix/main'
-
+include { SVDB_QUERY as SVDB_QUERY_DB             } from '../../modules/nf-core/svdb/query/main'
+include { PICARD_SORTVCF                          } from '../../modules/nf-core/picard/sortvcf/main'
+include { ENSEMBLVEP_VEP as ENSEMBLVEP_ME         } from '../../modules/nf-core/ensemblvep/vep/main'
+include { ENSEMBLVEP_FILTERVEP as FILTERVEP_ME    } from '../../modules/nf-core/ensemblvep/filtervep'
+include { BCFTOOLS_VIEW as BCFTOOLS_VIEW_FILTER   } from '../../modules/nf-core/bcftools/view/main'
+include { TABIX_BGZIPTABIX as BGZIP_TABIX_ME      } from '../../modules/nf-core/tabix/bgziptabix/main'
 include { ANNOTATE_CSQ_PLI as ANNOTATE_CSQ_PLI_ME } from '../../subworkflows/local/annotate_consequence_pli.nf'
 
 workflow ANNOTATE_MOBILE_ELEMENTS {
@@ -23,6 +22,7 @@ workflow ANNOTATE_MOBILE_ELEMENTS {
         ch_vep_filters          // channel: [mandatory] [ path(vep_filter) ]
         val_vep_genome          // string: [mandatory] GRCh37 or GRCh38
         val_vep_cache_version   // string: [mandatory] default: 107
+        ch_vep_extra_files      // channel: [mandatory] [ path(files) ]
 
     main:
         ch_versions = Channel.empty()
@@ -54,18 +54,21 @@ workflow ANNOTATE_MOBILE_ELEMENTS {
             ch_genome_fasta,
             ch_genome_dictionary
         )
+        .vcf
+        .map { meta, vcf -> return [meta, vcf, []]}
+        .set { ch_vep_in }
 
         ENSEMBLVEP_ME(
-            PICARD_SORTVCF.out.vcf,
-            ch_genome_fasta,
+            ch_vep_in,
             val_vep_genome,
             "homo_sapiens",
             val_vep_cache_version,
             ch_vep_cache,
-            []
+            ch_genome_fasta,
+            ch_vep_extra_files
         )
 
-        ENSEMBLVEP_ME.out.vcf_gz
+        ENSEMBLVEP_ME.out.vcf
             .map { meta, vcf ->
                 [ meta, vcf, [] ]
             }
@@ -76,7 +79,7 @@ workflow ANNOTATE_MOBILE_ELEMENTS {
         BCFTOOLS_VIEW_FILTER.out.vcf
             .multiMap { meta, vcf ->
                 clinical: [ meta + [ set: "clinical" ], vcf ]
-                research: [ meta + [ set: "research" ], vcf ]   
+                research: [ meta + [ set: "research" ], vcf ]
             }
             .set { ch_clin_research_vcf }
 
diff --git a/workflows/raredisease.nf b/workflows/raredisease.nf
index e5781c17..0f2a4fe1 100644
--- a/workflows/raredisease.nf
+++ b/workflows/raredisease.nf
@@ -647,7 +647,8 @@ workflow RAREDISEASE {
             ch_variant_consequences,
             ch_vep_filters,
             params.genome,
-            params.vep_cache_version
+            params.vep_cache_version,
+            ch_vep_extra_files
         )
         ch_versions = ch_versions.mix(ANNOTATE_MOBILE_ELEMENTS.out.versions)
     }

From 4c6851e0b85cf9ee58e43a1767f47a935da4d243 Mon Sep 17 00:00:00 2001
From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com>
Date: Mon, 22 Jan 2024 10:01:31 +0100
Subject: [PATCH 7/9] update module

---
 modules.json                                  |   2 +-
 modules/nf-core/ensemblvep/vep/main.nf        |   5 +-
 .../nf-core/ensemblvep/vep/tests/main.nf.test | 102 ++++++++++++++++++
 .../ensemblvep/vep/tests/nextflow.config      |  13 +++
 .../ensemblvep/vep/tests/tab.gz.config        |   5 +
 modules/nf-core/ensemblvep/vep/tests/tags.yml |   2 +
 .../nf-core/ensemblvep/vep/tests/vcf.config   |   5 +
 7 files changed, 130 insertions(+), 4 deletions(-)
 create mode 100644 modules/nf-core/ensemblvep/vep/tests/main.nf.test
 create mode 100644 modules/nf-core/ensemblvep/vep/tests/nextflow.config
 create mode 100644 modules/nf-core/ensemblvep/vep/tests/tab.gz.config
 create mode 100644 modules/nf-core/ensemblvep/vep/tests/tags.yml
 create mode 100644 modules/nf-core/ensemblvep/vep/tests/vcf.config

diff --git a/modules.json b/modules.json
index eac809df..03000a35 100644
--- a/modules.json
+++ b/modules.json
@@ -117,7 +117,7 @@
                     },
                     "ensemblvep/vep": {
                         "branch": "master",
-                        "git_sha": "214d575774c172062924ad3564b4f66655600730",
+                        "git_sha": "76a0696a60c41c57fc5f6040ac31b11ce5d4d8dd",
                         "installed_by": ["modules"]
                     },
                     "expansionhunter": {
diff --git a/modules/nf-core/ensemblvep/vep/main.nf b/modules/nf-core/ensemblvep/vep/main.nf
index 3a2b7423..a7fc5ad1 100644
--- a/modules/nf-core/ensemblvep/vep/main.nf
+++ b/modules/nf-core/ensemblvep/vep/main.nf
@@ -20,7 +20,7 @@ process ENSEMBLVEP_VEP {
     tuple val(meta), path("*.vcf.gz")  , optional:true, emit: vcf
     tuple val(meta), path("*.tab.gz")  , optional:true, emit: tab
     tuple val(meta), path("*.json.gz") , optional:true, emit: json
-    path "*.summary.html"              , emit: report
+    path "*.summary.html"              , optional:true, emit: report
     path "versions.yml"                , emit: versions
 
     when:
@@ -45,8 +45,7 @@ process ENSEMBLVEP_VEP {
         --cache \\
         --cache_version $cache_version \\
         --dir_cache $dir_cache \\
-        --fork $task.cpus \\
-        --stats_file ${prefix}.summary.html \\
+        --fork $task.cpus
 
 
     cat <<-END_VERSIONS > versions.yml
diff --git a/modules/nf-core/ensemblvep/vep/tests/main.nf.test b/modules/nf-core/ensemblvep/vep/tests/main.nf.test
new file mode 100644
index 00000000..f072dcab
--- /dev/null
+++ b/modules/nf-core/ensemblvep/vep/tests/main.nf.test
@@ -0,0 +1,102 @@
+nextflow_process {
+
+    name "Test Process ENSEMBLVEP_VEP"
+    script "modules/nf-core/ensemblvep/vep/main.nf"
+    process "ENSEMBLVEP_VEP"
+    config "./nextflow.config"
+    tag "modules"
+    tag "modules_nfcore"
+    tag "ensemblvep"
+    tag "ensemblvep/vep"
+    tag "ensemblvep/download"
+
+
+    test("test_ensemblvep_vep_fasta_vcf") {
+
+        config "./vcf.config"
+
+        setup {
+            run("ENSEMBLVEP_DOWNLOAD") {
+                script "../../download/main.nf"
+                process {
+                    """
+                    input[0] = Channel.of([[id:"${params.vep_cache_version}_${params.vep_genome}"], params.vep_genome, params.vep_species, params.vep_cache_version])
+                    """
+                }
+            }
+        }
+
+        when {
+            process {
+                """
+                input[0] = Channel.of([
+                    [ id:'test' ], // meta map
+                    file(params.test_data['sarscov2']['illumina']['test_vcf'], checkIfExists: true),
+                    []
+                ])
+                input[1] = params.vep_genome
+                input[2] = params.vep_species
+                input[3] = params.vep_cache_version
+                input[4] = ENSEMBLVEP_DOWNLOAD.out.cache.map{ meta, cache -> [cache] }
+                input[5] = Channel.value([
+                    [id:"fasta"],
+                    file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true)
+                ])
+                input[6] = []
+                """
+            }
+        }
+
+        then {
+            assertAll(
+            {assert process.success},
+            {assert path(process.out.vcf.get(0).get(1)).linesGzip.contains("##fileformat=VCFv4.2")}
+            )
+        }
+
+    }
+
+    test("test_ensemblvep_vep_fasta_tab_gz") {
+
+        config "./tab.gz.config"
+
+        setup {
+            run("ENSEMBLVEP_DOWNLOAD") {
+                script "../../download/main.nf"
+                process {
+                    """
+                    input[0] = Channel.of([[id:"${params.vep_cache_version}_${params.vep_genome}"], params.vep_genome, params.vep_species, params.vep_cache_version])
+                    """
+                }
+            }
+        }
+
+        when {
+            process {
+                """
+                input[0] = Channel.of([
+                    [ id:'test' ], // meta map
+                    file(params.test_data['sarscov2']['illumina']['test_vcf'], checkIfExists: true),
+                    []
+                ])
+                input[1] = params.vep_genome
+                input[2] = params.vep_species
+                input[3] = params.vep_cache_version
+                input[4] = ENSEMBLVEP_DOWNLOAD.out.cache.map{ meta, cache -> [cache] }
+                input[5] = Channel.value([
+                    [id:"fasta"],
+                    file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true)
+                ])
+                input[6] = []
+                """
+            }
+        }
+
+        then {
+            assertAll(
+            {assert process.success},
+            {assert path(process.out.tab.get(0).get(1)).linesGzip.contains("## ENSEMBL VARIANT EFFECT PREDICTOR v110.0")}
+            )
+        }
+    }
+}
diff --git a/modules/nf-core/ensemblvep/vep/tests/nextflow.config b/modules/nf-core/ensemblvep/vep/tests/nextflow.config
new file mode 100644
index 00000000..cfaef733
--- /dev/null
+++ b/modules/nf-core/ensemblvep/vep/tests/nextflow.config
@@ -0,0 +1,13 @@
+params {
+    vep_cache_version = "110"
+    vep_genome = "WBcel235"
+    vep_species = "caenorhabditis_elegans"
+}
+
+process {
+
+    withName: ENSEMBLVEP_DOWNLOAD {
+        ext.args = '--AUTO c --CONVERT --NO_BIOPERL --NO_HTSLIB --NO_TEST --NO_UPDATE'
+    }
+
+}
diff --git a/modules/nf-core/ensemblvep/vep/tests/tab.gz.config b/modules/nf-core/ensemblvep/vep/tests/tab.gz.config
new file mode 100644
index 00000000..40eb03e5
--- /dev/null
+++ b/modules/nf-core/ensemblvep/vep/tests/tab.gz.config
@@ -0,0 +1,5 @@
+process {
+        withName: ENSEMBLVEP_VEP {
+        ext.args = '--tab --compress_output bgzip'
+    }
+}
diff --git a/modules/nf-core/ensemblvep/vep/tests/tags.yml b/modules/nf-core/ensemblvep/vep/tests/tags.yml
new file mode 100644
index 00000000..4aa4aa45
--- /dev/null
+++ b/modules/nf-core/ensemblvep/vep/tests/tags.yml
@@ -0,0 +1,2 @@
+ensemblvep/vep:
+  - "modules/nf-core/ensemblvep/vep/**"
diff --git a/modules/nf-core/ensemblvep/vep/tests/vcf.config b/modules/nf-core/ensemblvep/vep/tests/vcf.config
new file mode 100644
index 00000000..ad8955a3
--- /dev/null
+++ b/modules/nf-core/ensemblvep/vep/tests/vcf.config
@@ -0,0 +1,5 @@
+process {
+        withName: ENSEMBLVEP_VEP {
+        ext.args = '--vcf'
+    }
+}

From 1b1d6a125bef3a2179f35c5fb93494f54cfc43c4 Mon Sep 17 00:00:00 2001
From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com>
Date: Mon, 22 Jan 2024 15:04:12 +0100
Subject: [PATCH 8/9] review suggestions

---
 assets/vep_plugin_files_schema.json | 19 +++++++++++++++++++
 nextflow_schema.json                |  4 +++-
 2 files changed, 22 insertions(+), 1 deletion(-)
 create mode 100644 assets/vep_plugin_files_schema.json

diff --git a/assets/vep_plugin_files_schema.json b/assets/vep_plugin_files_schema.json
new file mode 100644
index 00000000..6f728a7b
--- /dev/null
+++ b/assets/vep_plugin_files_schema.json
@@ -0,0 +1,19 @@
+{
+    "$schema": "http://json-schema.org/draft-07/schema",
+    "$id": "https://raw.githubusercontent.com/nf-core/raredisease/master/assets/mobile_element_references_schema.json",
+    "title": "Schema for VEP plugin files and their indices",
+    "description": "Schema for VEP plugin files and their indices",
+    "type": "array",
+    "items": {
+        "type": "object",
+        "properties": {
+            "vep_files": {
+                "type": "string",
+                "format": "file-path",
+                "exists": true,
+                "errorMessage": "Path to vep plugin files and their indices"
+            }
+        },
+        "required": ["vep_files"]
+    }
+}
diff --git a/nextflow_schema.json b/nextflow_schema.json
index 64056370..e1d952f1 100644
--- a/nextflow_schema.json
+++ b/nextflow_schema.json
@@ -390,7 +390,9 @@
                     "description": "Databases used by both named and custom plugins to annotate variants.",
                     "fa_icon": "fas fa-file-csv",
                     "help_text": "Path to a file containing the absolute paths to databases and their indices used by VEP's custom and named plugins resources defined within the vcfanno toml file. One line per resource.",
-                    "mimetype": "text/csv"
+                    "pattern": "^\\S+\\.csv$",
+                    "mimetype": "text/csv",
+                    "schema": "assets/vep_plugin_files_schema.json"
                 },
                 "vep_filters": {
                     "type": "string",

From f1b75d4f8cceaae9968907946f968a721cf18191 Mon Sep 17 00:00:00 2001
From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com>
Date: Mon, 22 Jan 2024 19:36:54 +0530
Subject: [PATCH 9/9] Apply suggestions from code review

Co-authored-by: Anders Jemt <jemten@users.noreply.github.com>
---
 subworkflows/local/annotate_genome_snvs.nf     | 2 +-
 subworkflows/local/annotate_mobile_elements.nf | 2 +-
 subworkflows/local/annotate_mt_snvs.nf         | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/subworkflows/local/annotate_genome_snvs.nf b/subworkflows/local/annotate_genome_snvs.nf
index 291d3acd..9e3d74d6 100644
--- a/subworkflows/local/annotate_genome_snvs.nf
+++ b/subworkflows/local/annotate_genome_snvs.nf
@@ -117,7 +117,7 @@ workflow ANNOTATE_GENOME_SNVS {
             .set { ch_for_mix }
 
         ch_for_mix.selvar.mix(ch_for_mix.cadd)
-            .map { meta, vcf -> return [meta, vcf, []]}
+            .map { meta, vcf -> return [meta, vcf, []] }
             .set { ch_vep_in }
 
 
diff --git a/subworkflows/local/annotate_mobile_elements.nf b/subworkflows/local/annotate_mobile_elements.nf
index dc6247bf..265ccce0 100644
--- a/subworkflows/local/annotate_mobile_elements.nf
+++ b/subworkflows/local/annotate_mobile_elements.nf
@@ -55,7 +55,7 @@ workflow ANNOTATE_MOBILE_ELEMENTS {
             ch_genome_dictionary
         )
         .vcf
-        .map { meta, vcf -> return [meta, vcf, []]}
+        .map { meta, vcf -> return [meta, vcf, []] }
         .set { ch_vep_in }
 
         ENSEMBLVEP_ME(
diff --git a/subworkflows/local/annotate_mt_snvs.nf b/subworkflows/local/annotate_mt_snvs.nf
index e1ed903a..e7b8ae6a 100644
--- a/subworkflows/local/annotate_mt_snvs.nf
+++ b/subworkflows/local/annotate_mt_snvs.nf
@@ -54,7 +54,7 @@ workflow ANNOTATE_MT_SNVS {
 
         ch_for_mix.merged.mix(ch_for_mix.cadd)
             .tap { ch_haplogrep_in }
-            .map { meta, vcf -> return [meta, vcf, []]}
+            .map { meta, vcf -> return [meta, vcf, []] }
             .set { ch_vep_in }