From efddf0b8f9593f535d41d533320bc4db1a30c7bb Mon Sep 17 00:00:00 2001
From: peterpru <peter.pruisscher@scilifelab.se>
Date: Mon, 6 Nov 2023 13:03:25 +0100
Subject: [PATCH 01/22] initial commit to log progress

---
 modules/local/retroseq/call/placeholder.txt     | 1 +
 modules/local/retroseq/discover/placeholder.txt | 1 +
 2 files changed, 2 insertions(+)
 create mode 100644 modules/local/retroseq/call/placeholder.txt
 create mode 100644 modules/local/retroseq/discover/placeholder.txt

diff --git a/modules/local/retroseq/call/placeholder.txt b/modules/local/retroseq/call/placeholder.txt
new file mode 100644
index 00000000..b3a42524
--- /dev/null
+++ b/modules/local/retroseq/call/placeholder.txt
@@ -0,0 +1 @@
+placeholder
\ No newline at end of file
diff --git a/modules/local/retroseq/discover/placeholder.txt b/modules/local/retroseq/discover/placeholder.txt
new file mode 100644
index 00000000..b3a42524
--- /dev/null
+++ b/modules/local/retroseq/discover/placeholder.txt
@@ -0,0 +1 @@
+placeholder
\ No newline at end of file

From 46041f37b849ac5a86c647a96109786f8ddc3c0b Mon Sep 17 00:00:00 2001
From: peterpru <peter.pruisscher@scilifelab.se>
Date: Tue, 7 Nov 2023 10:43:00 +0100
Subject: [PATCH 02/22] add modules retroseq call and discover

---
 modules/local/retroseq/call/main.nf           | 51 ++++++++++++++++++
 modules/local/retroseq/call/meta.yml          | 51 ++++++++++++++++++
 modules/local/retroseq/call/placeholder.txt   |  1 -
 modules/local/retroseq/discover/main.nf       | 52 ++++++++++++++++++
 modules/local/retroseq/discover/meta.yml      | 54 +++++++++++++++++++
 .../local/retroseq/discover/placeholder.txt   |  1 -
 6 files changed, 208 insertions(+), 2 deletions(-)
 create mode 100644 modules/local/retroseq/call/main.nf
 create mode 100644 modules/local/retroseq/call/meta.yml
 delete mode 100644 modules/local/retroseq/call/placeholder.txt
 create mode 100644 modules/local/retroseq/discover/main.nf
 create mode 100644 modules/local/retroseq/discover/meta.yml
 delete mode 100644 modules/local/retroseq/discover/placeholder.txt

diff --git a/modules/local/retroseq/call/main.nf b/modules/local/retroseq/call/main.nf
new file mode 100644
index 00000000..9e38e66c
--- /dev/null
+++ b/modules/local/retroseq/call/main.nf
@@ -0,0 +1,51 @@
+process RETROSEQ_CALL {
+    tag "$meta.id"
+    label 'process_low'
+
+    conda "bioconda::perl-retroseq=1.5=pl5321hdfd78af_1"
+    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+    'docker://clinicalgenomics/retroseq:1.5_9d4f3b5' : 'docker://clinicalgenomics/retroseq:1.5_9d4f3b5' }"
+
+
+    input:
+    tuple val(meta), path(bam), path(bai)
+    tuple val(meta), path(fasta) 
+
+    output:
+    tuple val(meta), path("*.vcf"), emit: vcf
+    path "versions.yml"           , emit: versions
+
+    when:
+    task.ext.when == null || task.ext.when
+
+    script:
+    def args = task.ext.args ?: ''
+    def prefix = task.ext.prefix ?: "${meta.id}"
+    def VERSION = "1.5"
+
+    """
+    retroseq.pl \\
+        -call \\
+        -bam $bam \\
+        -ref $fasta \\
+        -output ${prefix}.tab
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        retroseq_call: $VERSION
+    END_VERSIONS
+    """
+
+    stub:
+    def args = task.ext.args ?: ''
+    def prefix = task.ext.prefix ?: "${meta.id}"
+    def VERSION = "1.5"
+    """
+    touch ${prefix}.tab
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        retroseq_call: $VERSION
+    END_VERSIONS
+    """
+}
diff --git a/modules/local/retroseq/call/meta.yml b/modules/local/retroseq/call/meta.yml
new file mode 100644
index 00000000..7427491f
--- /dev/null
+++ b/modules/local/retroseq/call/meta.yml
@@ -0,0 +1,51 @@
+name: "retroseq_call"
+description: RetroSeq is a tool for discovery and genotyping of transposable element variants (TEVs) from next-gen sequencing reads aligned to a reference genome in BAM format.
+keywords:
+  - retroseq
+  - transposable elements
+  - genomics
+tools:
+  - "retroseq":
+      description: "RetroSeq: discovery and genotyping of TEVs from reads in BAM format."
+      homepage: "https://github.com/tk2/RetroSeq"
+      documentation: "https://github.com/tk2/RetroSeq"
+      tool_dev_url: "https://github.com/tk2/RetroSeq"
+      doi: "10.1093/bioinformatics/bts697"
+      licence: "['GPL']"
+
+input:
+  - meta:
+      type: map
+      description: |
+        Groovy Map containing sample information
+        e.g. `[ id:'test', single_end:false ]`
+  - bam:
+      type: file
+      description: Sorted BAM file
+      pattern: "*.{bam}"
+  - bai:
+      type: file
+      description: Index of the sorted BAM file
+      pattern: "*.{bam}"
+  - fasta:
+      type: file
+      description: reference genome in fasta format
+      pattern: "*.{fasta}"
+
+output:
+  - meta:
+      type: map
+      description: |
+        Groovy Map containing sample information
+        e.g. `[ id:'test', single_end:false ]`
+  - versions:
+      type: file
+      description: File containing software versions
+      pattern: "versions.yml"
+  - vcf:
+      type: file
+      description: Output file containing TEVs and their location in the genome.
+      pattern: "*.{vcf}"
+
+authors:
+  - "@peterpru"
diff --git a/modules/local/retroseq/call/placeholder.txt b/modules/local/retroseq/call/placeholder.txt
deleted file mode 100644
index b3a42524..00000000
--- a/modules/local/retroseq/call/placeholder.txt
+++ /dev/null
@@ -1 +0,0 @@
-placeholder
\ No newline at end of file
diff --git a/modules/local/retroseq/discover/main.nf b/modules/local/retroseq/discover/main.nf
new file mode 100644
index 00000000..d71fd3fa
--- /dev/null
+++ b/modules/local/retroseq/discover/main.nf
@@ -0,0 +1,52 @@
+process RETROSEQ_DISCOVER {
+    tag "$meta.id"
+    label 'process_low'
+
+    conda "bioconda::perl-retroseq=1.5=pl5321hdfd78af_1"
+    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+    'docker://clinicalgenomics/retroseq:1.5_9d4f3b5' : 'docker://clinicalgenomics/retroseq:1.5_9d4f3b5' }"
+
+
+    input:
+    tuple val(meta), path(bam), path(bai)
+    path(tab)
+    path(extra_files)
+
+    output:
+    tuple val(meta), path("*.tab"), emit: tab
+    path "versions.yml"           , emit: versions
+
+    when:
+    task.ext.when == null || task.ext.when
+
+    script:
+    def args = task.ext.args ?: ''
+    def prefix = task.ext.prefix ?: "${meta.id}"
+    def VERSION = "1.5" // WARN: Version information not provided by tool on CLI. Please update version string below when bumping container versions.
+
+    """
+    retroseq.pl \\
+        -discover \\
+        -bam $bam \\
+        -refTEs $tab\\
+        -output ${prefix}.tab
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        retroseq_discover: $VERSION
+    END_VERSIONS
+    """
+
+    stub:
+    def args = task.ext.args ?: ''
+    def prefix = task.ext.prefix ?: "${meta.id}"
+    def VERSION = "1.5"
+    """
+    touch ${prefix}.tab
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        retroseq_discover: $VERSION
+    END_VERSIONS
+    """
+}
diff --git a/modules/local/retroseq/discover/meta.yml b/modules/local/retroseq/discover/meta.yml
new file mode 100644
index 00000000..a61f64c9
--- /dev/null
+++ b/modules/local/retroseq/discover/meta.yml
@@ -0,0 +1,54 @@
+name: "retroseq_discover"
+description: RetroSeq is a tool for discovery and genotyping of transposable element variants (TEVs) from next-gen sequencing reads aligned to a reference genome in BAM format.
+keywords:
+  - retroseq
+  - transposable elements
+  - genomics
+tools:
+  - "retroseq":
+      description: "RetroSeq: discovery and genotyping of TEVs from reads in BAM format."
+      homepage: "https://github.com/tk2/RetroSeq"
+      documentation: "https://github.com/tk2/RetroSeq"
+      tool_dev_url: "https://github.com/tk2/RetroSeq"
+      doi: "10.1093/bioinformatics/bts697"
+      licence: "['GPL']"
+
+input:
+  - meta:
+      type: map
+      description: |
+        Groovy Map containing sample information
+        e.g. `[ id:'test', single_end:false ]`
+  - bam:
+      type: file
+      description: Sorted BAM file
+      pattern: "*.{bam}"
+  - bai:
+      type: file
+      description: Index of the sorted BAM file
+      pattern: "*.{bam}"
+  - tab:
+      type: file
+      description: Tab file containing a list of files, where each line is a TE type and the path to a bed file containing its coordinates in the genome.
+      pattern: "*.{tab}"
+  - extra_files:
+      type: file
+      description: Each file contains the coordinates of transposable elements of a particular type.
+
+output:
+  - meta:
+      type: map
+      description: |
+        Groovy Map containing sample information
+        e.g. `[ id:'test', single_end:false ]`
+  - versions:
+      type: file
+      description: File containing software versions
+      pattern: "versions.yml"
+  - tab:
+      type: file
+      description: Output file containing lists of read pair names per TE type
+      pattern: "*.{tab}"
+
+authors:
+  - "@peterpru"
diff --git a/modules/local/retroseq/discover/placeholder.txt b/modules/local/retroseq/discover/placeholder.txt
deleted file mode 100644
index b3a42524..00000000
--- a/modules/local/retroseq/discover/placeholder.txt
+++ /dev/null
@@ -1 +0,0 @@
-placeholder
\ No newline at end of file

From 0bcf05c7a9aadf925585f65e690983a641ff0097 Mon Sep 17 00:00:00 2001
From: peterpru <peter.pruisscher@scilifelab.se>
Date: Tue, 7 Nov 2023 10:51:46 +0100
Subject: [PATCH 03/22] fix linting

---
 modules/local/retroseq/call/main.nf | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/modules/local/retroseq/call/main.nf b/modules/local/retroseq/call/main.nf
index 9e38e66c..0fb36553 100644
--- a/modules/local/retroseq/call/main.nf
+++ b/modules/local/retroseq/call/main.nf
@@ -9,7 +9,7 @@ process RETROSEQ_CALL {
 
     input:
     tuple val(meta), path(bam), path(bai)
-    tuple val(meta), path(fasta) 
+    tuple val(meta), path(fasta)
 
     output:
     tuple val(meta), path("*.vcf"), emit: vcf

From d803ed8b9749b2b182651d5e44e208ea84455db1 Mon Sep 17 00:00:00 2001
From: peterpru <peter.pruisscher@scilifelab.se>
Date: Tue, 7 Nov 2023 13:26:06 +0100
Subject: [PATCH 04/22] add subworkflow

---
 modules/local/retroseq/call/main.nf           |  1 +
 modules/local/retroseq/discover/meta.yml      |  2 +-
 .../local/annotate_mobile_elements.nf         | 46 +++++++++++++++++++
 3 files changed, 48 insertions(+), 1 deletion(-)
 create mode 100644 subworkflows/local/annotate_mobile_elements.nf

diff --git a/modules/local/retroseq/call/main.nf b/modules/local/retroseq/call/main.nf
index 0fb36553..569d84a2 100644
--- a/modules/local/retroseq/call/main.nf
+++ b/modules/local/retroseq/call/main.nf
@@ -27,6 +27,7 @@ process RETROSEQ_CALL {
     retroseq.pl \\
         -call \\
         -bam $bam \\
+        -input $input
         -ref $fasta \\
         -output ${prefix}.tab
 
diff --git a/modules/local/retroseq/discover/meta.yml b/modules/local/retroseq/discover/meta.yml
index a61f64c9..80c6af6c 100644
--- a/modules/local/retroseq/discover/meta.yml
+++ b/modules/local/retroseq/discover/meta.yml
@@ -33,7 +33,7 @@ input:
       pattern: "*.{tab}"
   - extra_files:
       type: file
-      description: Each file contains the coordinates of transposable elements of a particular type.
+      description: List of files. Each file contains the coordinates of transposable elements of a particular type.
 
 output:
   - meta:
diff --git a/subworkflows/local/annotate_mobile_elements.nf b/subworkflows/local/annotate_mobile_elements.nf
new file mode 100644
index 00000000..26648434
--- /dev/null
+++ b/subworkflows/local/annotate_mobile_elements.nf
@@ -0,0 +1,46 @@
+//
+// A subworkflow to annotate mobile elements in the genome
+//
+
+include { RETROSEQ_CALL as RETROSEQ_CALL } from '../../modules/local/retroseq/call/main'
+include { RETROSEQ_DISCOVER as RETROSEQ_DISCOVER } from '../../modules/local/retroseq/discover/main'
+
+workflow ANNOTATE_STRUCTURAL_VARIANTS {
+
+    take:
+        ch_bam              // channel: [mandatory] [ val(meta), path(bam) ]
+        ch_bai              // channel: [mandatory] [ val(meta), path(bai) ]
+        ch_bam_bai          // channel: [mandatory] [ val(meta), path(bam), path(bai) ]
+        ch_genome_fasta     // channel: [mandatory] [ val(meta), path(fasta) ]
+        ch_genome_fai       // channel: [mandatory] [ val(meta), path(fai) ]
+
+    main:
+        ch_versions = Channel.empty()
+
+        // Running retroseq discover: identify discordant read pairs that might support a TE insertion
+        RETROSEQ_DISCOVER (
+            ch_bam_bai,
+            "tab",
+            "extrafiles"
+        )
+
+        // Running retroseq call: clusters reads and checks on the breakpoints to decide whether a TEV is present
+        RETROSEQ_CALL (
+            ch_bam_bai,
+            ch_genome_fasta,
+        )
+
+        // Run vep to annotate
+
+        // Run svdb to query against database
+
+        // Filter and rank as done in findtroll? E.g. protein coding
+
+        ch_versions = ch_versions.mix(RETROSEQ_CALL.out.versions)
+        ch_versions = ch_versions.mix(RETROSEQ_DISCOVER.out.versions)
+
+    emit:
+        vcf_ann  = RETROSEQ_CALL.out.vcf // channel: [ val(meta), path(vcf) ]
+        versions = ch_versions              // channel: [ path(versions.yml) ]
+}
+

From c38762323363186c63959fa54c4e6181c37418c4 Mon Sep 17 00:00:00 2001
From: jemten <jemten@users.noreply.github.com>
Date: Thu, 4 Jan 2024 11:49:25 +0100
Subject: [PATCH 05/22] retroseq call subworkflow

---
 assets/grch37_g1k_alu.bed                     |  0
 assets/grch37_g1k_herv.bed                    |  0
 assets/grch37_g1k_l1.bed                      |  0
 assets/grch37_g1k_sva.bed                     |  0
 assets/mobile_element_references.tsv          |  5 ++
 assets/mobile_element_references_schema.json  | 26 ++++++
 conf/modules/call_mobile_elements.config      | 40 +++++++++
 conf/test.config                              |  2 +
 main.nf                                       |  3 +-
 modules/local/retroseq/call/main.nf           |  8 +-
 modules/local/retroseq/call/meta.yml          |  6 +-
 modules/local/retroseq/discover/main.nf       |  9 +-
 modules/local/retroseq/discover/meta.yml      | 14 +--
 nextflow.config                               |  2 +
 nextflow_schema.json                          |  9 ++
 .../local/annotate_mobile_elements.nf         | 46 ----------
 subworkflows/local/call_mobile_elements.nf    | 86 +++++++++++++++++++
 workflows/raredisease.nf                      | 11 +++
 18 files changed, 206 insertions(+), 61 deletions(-)
 create mode 100644 assets/grch37_g1k_alu.bed
 create mode 100644 assets/grch37_g1k_herv.bed
 create mode 100644 assets/grch37_g1k_l1.bed
 create mode 100644 assets/grch37_g1k_sva.bed
 create mode 100644 assets/mobile_element_references.tsv
 create mode 100644 assets/mobile_element_references_schema.json
 create mode 100644 conf/modules/call_mobile_elements.config
 delete mode 100644 subworkflows/local/annotate_mobile_elements.nf
 create mode 100644 subworkflows/local/call_mobile_elements.nf

diff --git a/assets/grch37_g1k_alu.bed b/assets/grch37_g1k_alu.bed
new file mode 100644
index 00000000..e69de29b
diff --git a/assets/grch37_g1k_herv.bed b/assets/grch37_g1k_herv.bed
new file mode 100644
index 00000000..e69de29b
diff --git a/assets/grch37_g1k_l1.bed b/assets/grch37_g1k_l1.bed
new file mode 100644
index 00000000..e69de29b
diff --git a/assets/grch37_g1k_sva.bed b/assets/grch37_g1k_sva.bed
new file mode 100644
index 00000000..e69de29b
diff --git a/assets/mobile_element_references.tsv b/assets/mobile_element_references.tsv
new file mode 100644
index 00000000..328e5083
--- /dev/null
+++ b/assets/mobile_element_references.tsv
@@ -0,0 +1,5 @@
+type	path
+L1	grch37_g1k_l1.bed
+SVA	grch37_g1k_sva.bed
+ALU	grch37_g1k_alu.bed
+HERV	grch37_g1k_herv.bed
diff --git a/assets/mobile_element_references_schema.json b/assets/mobile_element_references_schema.json
new file mode 100644
index 00000000..40e79825
--- /dev/null
+++ b/assets/mobile_element_references_schema.json
@@ -0,0 +1,26 @@
+{
+    "$schema": "http://json-schema.org/draft-07/schema",
+    "$id": "https://raw.githubusercontent.com/nf-core/raredisease/master/assets/mobile_element_references_schema.json",
+    "title": "Schema for mobile_element_references",
+    "description": "Schema for the file provided with params.mobile_element_references",
+    "type": "array",
+    "items": {
+        "type": "object",
+        "properties": {
+            "type": {
+                "type": "string",
+                "exists": true,
+                "pattern": "^\\S+$",
+                "errorMessage": "Mobile element type must be provided and cannot contain spaces"
+            },
+            "path": {
+                "type": "string",
+                "format": "file-path",
+                "exists": true,
+                "pattern": "^\\S+\\.bed$",
+                "errorMessage": "Bed file, cannot contain spaces and must have extension '.bed'"
+            }
+        },
+        "required": ["type", "path"]
+    }
+}
diff --git a/conf/modules/call_mobile_elements.config b/conf/modules/call_mobile_elements.config
new file mode 100644
index 00000000..b7a0b81f
--- /dev/null
+++ b/conf/modules/call_mobile_elements.config
@@ -0,0 +1,40 @@
+/*
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+    Config file for defining DSL2 per module options and publishing paths
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+    Available keys to override module options:
+        ext.args            = Additional arguments appended to command in module.
+        ext.args2           = Second set of arguments appended to command in module (multi-tool modules).
+        ext.args3           = Third set of arguments appended to command in module (multi-tool modules).
+        ext.prefix          = File name prefix for output files.
+        ext.when            = Conditional clause
+----------------------------------------------------------------------------------------
+*/
+
+process {
+
+    withName: '.*CALL_MOBILE_ELEMENTS:.*' {
+        publishDir = [
+            enabled: false
+        ]
+    }
+
+    withName: '.*CALL_MOBILE_ELEMENTS:ME_SPLIT_ALIGNMENT' {
+        ext.args   = { [
+            '--output-fmt bam',
+            '--fetch-pairs'
+            ].join(' ') }
+        ext.args2  = { "${meta.interval}" }
+        ext.prefix = { "${meta.id}_${meta.interval}" }
+    }
+
+    withName: '.*CALL_MOBILE_ELEMENTS:RETROSEQ_DISCOVER' {
+        ext.prefix = { "${meta.id}_${meta.interval}_retroseq_discover" }
+    }
+
+    withName: '.*CALL_MOBILE_ELEMENTS:RETROSEQ_CALL' {
+        ext.args = { '--soft' }
+        ext.prefix = { "${meta.id}_${meta.interval}_retroseq_call" }
+    }
+
+}
diff --git a/conf/test.config b/conf/test.config
index d398d542..e23102f8 100644
--- a/conf/test.config
+++ b/conf/test.config
@@ -41,6 +41,8 @@ params {
     intervals_wgs        = "https://raw.githubusercontent.com/nf-core/test-datasets/raredisease/reference/target_wgs.interval_list"
     intervals_y          = "https://raw.githubusercontent.com/nf-core/test-datasets/raredisease/reference/targetY.interval_list"
     known_dbsnp          = "https://raw.githubusercontent.com/nf-core/test-datasets/raredisease/reference/dbsnp_-138-.vcf.gz"
+    // TODO: Update path for mobile element refs
+    mobile_element_references = "${projectDir}//mobile_element_references.tsv"
     ml_model             = "https://s3.amazonaws.com/sentieon-release/other/SentieonDNAscopeModel1.0.model"
     reduced_penetrance   = "https://raw.githubusercontent.com/nf-core/test-datasets/raredisease/reference/reduced_penetrance.tsv"
     score_config_mt      = "https://raw.githubusercontent.com/nf-core/test-datasets/raredisease/reference/rank_model_snv.ini"
diff --git a/main.nf b/main.nf
index 9d24f1ac..5b0bb182 100644
--- a/main.nf
+++ b/main.nf
@@ -16,7 +16,7 @@ nextflow.enable.dsl = 2
     GENOME PARAMETER VALUES
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 */
-
+// TODO: Are the genome config used?
 params.fasta                          = WorkflowMain.getGenomeAttribute(params, 'fasta')
 params.fai                            = WorkflowMain.getGenomeAttribute(params, 'fai')
 params.bwa                            = WorkflowMain.getGenomeAttribute(params, 'bwa')
@@ -33,6 +33,7 @@ params.intervals_wgs                  = WorkflowMain.getGenomeAttribute(params,
 params.intervals_y                    = WorkflowMain.getGenomeAttribute(params, 'intervals_y')
 params.known_dbsnp                    = WorkflowMain.getGenomeAttribute(params, 'known_dbsnp')
 params.known_dbsnp_tbi                = WorkflowMain.getGenomeAttribute(params, 'known_dbsnp_tbi')
+params.mobile_element_references      = WorkflowMain.getGenomeAttribute(params, 'mobile_element_references')
 params.ml_model                       = WorkflowMain.getGenomeAttribute(params, 'ml_model')
 params.mt_fasta                       = WorkflowMain.getGenomeAttribute(params, 'mt_fasta')
 params.ngsbits_samplegender_method    = WorkflowMain.getGenomeAttribute(params, 'ngsbits_samplegender_method')
diff --git a/modules/local/retroseq/call/main.nf b/modules/local/retroseq/call/main.nf
index 569d84a2..21207f9b 100644
--- a/modules/local/retroseq/call/main.nf
+++ b/modules/local/retroseq/call/main.nf
@@ -9,6 +9,7 @@ process RETROSEQ_CALL {
 
     input:
     tuple val(meta), path(bam), path(bai)
+    tuple val(meta), path(input)
     tuple val(meta), path(fasta)
 
     output:
@@ -26,10 +27,11 @@ process RETROSEQ_CALL {
     """
     retroseq.pl \\
         -call \\
+        $args \\
         -bam $bam \\
-        -input $input
+        -input $input \\
         -ref $fasta \\
-        -output ${prefix}.tab
+        -output ${prefix}.vcf
 
     cat <<-END_VERSIONS > versions.yml
     "${task.process}":
@@ -42,7 +44,7 @@ process RETROSEQ_CALL {
     def prefix = task.ext.prefix ?: "${meta.id}"
     def VERSION = "1.5"
     """
-    touch ${prefix}.tab
+    touch ${prefix}.vcf
 
     cat <<-END_VERSIONS > versions.yml
     "${task.process}":
diff --git a/modules/local/retroseq/call/meta.yml b/modules/local/retroseq/call/meta.yml
index 7427491f..8147d20c 100644
--- a/modules/local/retroseq/call/meta.yml
+++ b/modules/local/retroseq/call/meta.yml
@@ -29,8 +29,12 @@ input:
       pattern: "*.{bam}"
   - fasta:
       type: file
-      description: reference genome in fasta format
+      description: Reference genome in fasta format
       pattern: "*.{fasta}"
+  - tab:
+      type: file
+      description: Output file from running retroseq -call
+      pattern: "*.{tab}"
 
 output:
   - meta:
diff --git a/modules/local/retroseq/discover/main.nf b/modules/local/retroseq/discover/main.nf
index d71fd3fa..b72f10c3 100644
--- a/modules/local/retroseq/discover/main.nf
+++ b/modules/local/retroseq/discover/main.nf
@@ -9,8 +9,8 @@ process RETROSEQ_DISCOVER {
 
     input:
     tuple val(meta), path(bam), path(bai)
-    path(tab)
-    path(extra_files)
+    path(me_references)
+    val(me_types)
 
     output:
     tuple val(meta), path("*.tab"), emit: tab
@@ -25,10 +25,12 @@ process RETROSEQ_DISCOVER {
     def VERSION = "1.5" // WARN: Version information not provided by tool on CLI. Please update version string below when bumping container versions.
 
     """
+    paste <(printf "%s\\n" $me_types | tr -d '[],') <(printf "%s\\n" $me_references) > me_reference_manifest.tsv
     retroseq.pl \\
         -discover \\
+        $args \\
         -bam $bam \\
-        -refTEs $tab\\
+        -refTEs me_reference_manifest.tsv\\
         -output ${prefix}.tab
 
     cat <<-END_VERSIONS > versions.yml
@@ -42,6 +44,7 @@ process RETROSEQ_DISCOVER {
     def prefix = task.ext.prefix ?: "${meta.id}"
     def VERSION = "1.5"
     """
+    paste <(printf "%s\\n" $me_types | tr -d '[],') <(printf "%s\\n" $me_references) > me_reference_manifest.tsv
     touch ${prefix}.tab
 
     cat <<-END_VERSIONS > versions.yml
diff --git a/modules/local/retroseq/discover/meta.yml b/modules/local/retroseq/discover/meta.yml
index 80c6af6c..55d6de5d 100644
--- a/modules/local/retroseq/discover/meta.yml
+++ b/modules/local/retroseq/discover/meta.yml
@@ -26,14 +26,14 @@ input:
   - bai:
       type: file
       description: Index of the sorted BAM file
-      pattern: "*.{bam}"
-  - tab:
-      type: file
-      description: Tab file containing a list of files, where each line is a TE type and the path to a bed file containing its coordinates in the genome.
-      pattern: "*.{tab}"
-  - extra_files:
+      pattern: "*.{bai}"
+  - me_references:
       type: file
-      description: List of files. Each file contains the coordinates of transposable elements of a particular type.
+      description: Paths to bed files containing transposable element coordinates in the genome.
+      pattern: "*.{bed}"
+  - me_types:
+      type: list
+      description: List of transposable element types to discover. Needs to be in sync with me_references.
 
 output:
   - meta:
diff --git a/nextflow.config b/nextflow.config
index e65b343d..4c4a4be8 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -45,6 +45,7 @@ params {
     // File params
     svdb_query_bedpedbs        = null
     svdb_query_dbs             = null
+    mobile_element_references  = null
 
     // Alignment
     aligner                    = 'bwamem2'
@@ -329,6 +330,7 @@ includeConfig 'conf/modules/call_sv_germlinecnvcaller.config'
 includeConfig 'conf/modules/call_sv_manta.config'
 includeConfig 'conf/modules/call_sv_tiddit.config'
 includeConfig 'conf/modules/postprocess_MT_calls.config'
+includeConfig 'conf/modules/call_mobile_elements.config'
 
 // Function to ensure that resource requirements don't go beyond
 // a maximum limit
diff --git a/nextflow_schema.json b/nextflow_schema.json
index 07988ec3..76f6a336 100644
--- a/nextflow_schema.json
+++ b/nextflow_schema.json
@@ -214,6 +214,15 @@
                     "help_text": "Used to extract relevant information from the references to analyse mitochondria",
                     "fa_icon": "fas fa-align-center"
                 },
+                "mobile_element_references": {
+                    "type": "string",
+                    "fa_icon": "fas fa-file",
+                    "description": "File with mobile element references",
+                    "help_text": "Path to tsv file listing mobile element references. \nFormat: <mobile element type>\\t<path to bed file>",
+                    "pattern": "^\\S+\\.tsv$",
+                    "format": "file-path",
+                    "schema": "assets/mobile_element_references_schema.json"
+                },
                 "ml_model": {
                     "type": "string",
                     "exists": true,
diff --git a/subworkflows/local/annotate_mobile_elements.nf b/subworkflows/local/annotate_mobile_elements.nf
deleted file mode 100644
index 26648434..00000000
--- a/subworkflows/local/annotate_mobile_elements.nf
+++ /dev/null
@@ -1,46 +0,0 @@
-//
-// A subworkflow to annotate mobile elements in the genome
-//
-
-include { RETROSEQ_CALL as RETROSEQ_CALL } from '../../modules/local/retroseq/call/main'
-include { RETROSEQ_DISCOVER as RETROSEQ_DISCOVER } from '../../modules/local/retroseq/discover/main'
-
-workflow ANNOTATE_STRUCTURAL_VARIANTS {
-
-    take:
-        ch_bam              // channel: [mandatory] [ val(meta), path(bam) ]
-        ch_bai              // channel: [mandatory] [ val(meta), path(bai) ]
-        ch_bam_bai          // channel: [mandatory] [ val(meta), path(bam), path(bai) ]
-        ch_genome_fasta     // channel: [mandatory] [ val(meta), path(fasta) ]
-        ch_genome_fai       // channel: [mandatory] [ val(meta), path(fai) ]
-
-    main:
-        ch_versions = Channel.empty()
-
-        // Running retroseq discover: identify discordant read pairs that might support a TE insertion
-        RETROSEQ_DISCOVER (
-            ch_bam_bai,
-            "tab",
-            "extrafiles"
-        )
-
-        // Running retroseq call: clusters reads and checks on the breakpoints to decide whether a TEV is present
-        RETROSEQ_CALL (
-            ch_bam_bai,
-            ch_genome_fasta,
-        )
-
-        // Run vep to annotate
-
-        // Run svdb to query against database
-
-        // Filter and rank as done in findtroll? E.g. protein coding
-
-        ch_versions = ch_versions.mix(RETROSEQ_CALL.out.versions)
-        ch_versions = ch_versions.mix(RETROSEQ_DISCOVER.out.versions)
-
-    emit:
-        vcf_ann  = RETROSEQ_CALL.out.vcf // channel: [ val(meta), path(vcf) ]
-        versions = ch_versions              // channel: [ path(versions.yml) ]
-}
-
diff --git a/subworkflows/local/call_mobile_elements.nf b/subworkflows/local/call_mobile_elements.nf
new file mode 100644
index 00000000..32b62bac
--- /dev/null
+++ b/subworkflows/local/call_mobile_elements.nf
@@ -0,0 +1,86 @@
+//
+// A subworkflow to call mobile elements in the genome
+//
+
+include { RETROSEQ_CALL as RETROSEQ_CALL             } from '../../modules/local/retroseq/call/main'
+include { RETROSEQ_DISCOVER as RETROSEQ_DISCOVER     } from '../../modules/local/retroseq/discover/main'
+include { SAMTOOLS_VIEW as ME_SPLIT_ALIGNMENT        } from '../../modules/nf-core/samtools/view/main'
+include { SAMTOOLS_INDEX as ME_INDEX_SPLIT_ALIGNMENT } from '../../modules/nf-core/samtools/index/main'
+
+workflow CALL_MOBILE_ELEMENTS {
+
+    take:
+        ch_genome_bam_bai   // channel: [mandatory] [ val(meta), path(bam), path(bai) ]
+        ch_genome_fasta     // channel: [mandatory] [ val(meta), path(fasta) ]
+        ch_me_references    // channel: [mandatory] [path(tsv)]
+        val_genome_build    // string: [mandatory] GRCh37 or GRCh38
+
+    main:
+        ch_versions = Channel.empty()
+
+        // Building chromosome channel depending on genome version
+        // TODO: Check how retroseq behaves when running chrY on female samples
+        Channel.of(1..22, 'X', 'Y')
+            .branch { it ->
+                grch37: val_genome_build.equals('GRCh37')
+                    return [it.toString()]
+                grch38: val_genome_build.equals('GRCh38')
+                    return ['chr' + it.toString()]
+            }.set{ ch_chr_genome }
+        ch_chr = ch_chr_genome.grch37.mix(ch_chr_genome.grch38)
+
+        // Building one bam channel per chromosome and adding interval
+        ch_genome_bam_bai
+            .combine(ch_chr)
+            .map {
+                meta, bam, bai, chr ->
+                return [ meta + [interval:chr], bam, bai ]
+            }
+            .set { ch_genome_bam_bai_interval }
+
+        ME_SPLIT_ALIGNMENT(
+            ch_genome_bam_bai_interval,
+            [[:], []],
+            []
+        )
+
+        ME_INDEX_SPLIT_ALIGNMENT( ME_SPLIT_ALIGNMENT.out.bam )
+        
+        ME_SPLIT_ALIGNMENT.out.bam
+            .join(ME_INDEX_SPLIT_ALIGNMENT.out.bai, failOnMismatch:true)
+            .set { ch_retroseq_input }
+
+        ch_me_references
+            .multiMap { type, path ->
+                type: type
+                path: path
+            }
+            .set { ch_me_reference_split }
+
+        RETROSEQ_DISCOVER (
+            ch_retroseq_input,
+            ch_me_reference_split.path.collect(),
+            ch_me_reference_split.type.collect()
+        )
+
+        // Running retroseq call: clusters reads and checks on the breakpoints to decide whether a TEV is present
+        RETROSEQ_CALL (
+            ch_retroseq_input,
+            RETROSEQ_DISCOVER.out.tab,
+            ch_genome_fasta
+        )
+
+        // Run vep to annotate
+
+        // Run svdb to query against database
+
+        // Filter and rank as done in findtroll? E.g. protein coding
+
+        ch_versions = ch_versions.mix(ME_SPLIT_ALIGNMENT.out.versions)
+        ch_versions = ch_versions.mix(ME_INDEX_SPLIT_ALIGNMENT.out.versions)
+        ch_versions = ch_versions.mix(RETROSEQ_DISCOVER.out.versions)
+        ch_versions = ch_versions.mix(RETROSEQ_CALL.out.versions)
+
+    emit:
+        versions = ch_versions              // channel: [ path(versions.yml) ]
+}
diff --git a/workflows/raredisease.nf b/workflows/raredisease.nf
index c4183c93..f572a651 100644
--- a/workflows/raredisease.nf
+++ b/workflows/raredisease.nf
@@ -133,6 +133,7 @@ include { RANK_VARIANTS as RANK_VARIANTS_MT     } from '../subworkflows/local/ra
 include { RANK_VARIANTS as RANK_VARIANTS_SNV    } from '../subworkflows/local/rank_variants'
 include { RANK_VARIANTS as RANK_VARIANTS_SV     } from '../subworkflows/local/rank_variants'
 include { SCATTER_GENOME                        } from '../subworkflows/local/scatter_genome'
+include { CALL_MOBILE_ELEMENTS                  } from '../subworkflows/local/call_mobile_elements'
 
 
 /*
@@ -241,6 +242,9 @@ workflow RAREDISEASE {
                                                                            : Channel.empty()
     ch_intervals_y              = params.intervals_y                       ? Channel.fromPath(params.intervals_y).collect()
                                                                            : Channel.empty()
+    //ch_me_references            = params.mobile_element_references         ? Channel.fromPath(params.mobile_element_references)
+    ch_me_references            = params.mobile_element_references         ? Channel.fromSamplesheet("mobile_element_references")
+                                                                           : Channel.empty()
     ch_ml_model                 = params.variant_caller.equals("sentieon") ? Channel.fromPath(params.ml_model).map {it -> [[id:it[0].simpleName], it]}.collect()
                                                                            : Channel.value([[:],[]])
     ch_mt_intervals             = ch_references.mt_intervals
@@ -595,6 +599,13 @@ workflow RAREDISEASE {
         ch_versions = ch_versions.mix(GENS.out.versions)
     }
 
+    CALL_MOBILE_ELEMENTS(
+        ch_mapped.genome_bam_bai,
+        ch_genome_fasta,
+        ch_me_references,
+        params.genome
+    )
+
     //
     // MODULE: Pipeline reporting
     //

From cad46c183c24594f7e2e2e50c739660022335a8f Mon Sep 17 00:00:00 2001
From: jemten <jemten@users.noreply.github.com>
Date: Thu, 4 Jan 2024 11:57:46 +0100
Subject: [PATCH 06/22] updating temporary ref_me paths

---
 assets/mobile_element_references.tsv | 8 ++++----
 conf/test.config                     | 2 +-
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/assets/mobile_element_references.tsv b/assets/mobile_element_references.tsv
index 328e5083..e5a804ff 100644
--- a/assets/mobile_element_references.tsv
+++ b/assets/mobile_element_references.tsv
@@ -1,5 +1,5 @@
 type	path
-L1	grch37_g1k_l1.bed
-SVA	grch37_g1k_sva.bed
-ALU	grch37_g1k_alu.bed
-HERV	grch37_g1k_herv.bed
+L1	./assets/grch37_g1k_l1.bed
+SVA	./assets/grch37_g1k_sva.bed
+ALU	./assets/grch37_g1k_alu.bed
+HERV	./assets/grch37_g1k_herv.bed
diff --git a/conf/test.config b/conf/test.config
index e23102f8..e691a70d 100644
--- a/conf/test.config
+++ b/conf/test.config
@@ -42,7 +42,7 @@ params {
     intervals_y          = "https://raw.githubusercontent.com/nf-core/test-datasets/raredisease/reference/targetY.interval_list"
     known_dbsnp          = "https://raw.githubusercontent.com/nf-core/test-datasets/raredisease/reference/dbsnp_-138-.vcf.gz"
     // TODO: Update path for mobile element refs
-    mobile_element_references = "${projectDir}//mobile_element_references.tsv"
+    mobile_element_references = "${projectDir}/assets/mobile_element_references.tsv"
     ml_model             = "https://s3.amazonaws.com/sentieon-release/other/SentieonDNAscopeModel1.0.model"
     reduced_penetrance   = "https://raw.githubusercontent.com/nf-core/test-datasets/raredisease/reference/reduced_penetrance.tsv"
     score_config_mt      = "https://raw.githubusercontent.com/nf-core/test-datasets/raredisease/reference/rank_model_snv.ini"

From 0f6edbdfccd6ae89140c29f7ecd666e356f074ea Mon Sep 17 00:00:00 2001
From: jemten <jemten@users.noreply.github.com>
Date: Thu, 4 Jan 2024 12:57:05 +0100
Subject: [PATCH 07/22] fixing path to retroseq docker

---
 modules/local/retroseq/call/main.nf     | 2 +-
 modules/local/retroseq/discover/main.nf | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/modules/local/retroseq/call/main.nf b/modules/local/retroseq/call/main.nf
index 21207f9b..33ddb1af 100644
--- a/modules/local/retroseq/call/main.nf
+++ b/modules/local/retroseq/call/main.nf
@@ -4,7 +4,7 @@ process RETROSEQ_CALL {
 
     conda "bioconda::perl-retroseq=1.5=pl5321hdfd78af_1"
     container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
-    'docker://clinicalgenomics/retroseq:1.5_9d4f3b5' : 'docker://clinicalgenomics/retroseq:1.5_9d4f3b5' }"
+    'docker.io/clinicalgenomics/retroseq:1.5_9d4f3b5' : 'docker.io/clinicalgenomics/retroseq:1.5_9d4f3b5' }"
 
 
     input:
diff --git a/modules/local/retroseq/discover/main.nf b/modules/local/retroseq/discover/main.nf
index b72f10c3..cf4b5fa8 100644
--- a/modules/local/retroseq/discover/main.nf
+++ b/modules/local/retroseq/discover/main.nf
@@ -4,7 +4,7 @@ process RETROSEQ_DISCOVER {
 
     conda "bioconda::perl-retroseq=1.5=pl5321hdfd78af_1"
     container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
-    'docker://clinicalgenomics/retroseq:1.5_9d4f3b5' : 'docker://clinicalgenomics/retroseq:1.5_9d4f3b5' }"
+    'docker.io/clinicalgenomics/retroseq:1.5_9d4f3b5' : 'docker.io/clinicalgenomics/retroseq:1.5_9d4f3b5' }"
 
 
     input:

From bddd2fc7376cb972900855141c91190cbd3c8d11 Mon Sep 17 00:00:00 2001
From: jemten <jemten@users.noreply.github.com>
Date: Thu, 4 Jan 2024 13:02:49 +0100
Subject: [PATCH 08/22] propagating versions

---
 subworkflows/local/call_mobile_elements.nf | 10 +++++-----
 workflows/raredisease.nf                   |  1 +
 2 files changed, 6 insertions(+), 5 deletions(-)

diff --git a/subworkflows/local/call_mobile_elements.nf b/subworkflows/local/call_mobile_elements.nf
index 32b62bac..9929fb7f 100644
--- a/subworkflows/local/call_mobile_elements.nf
+++ b/subworkflows/local/call_mobile_elements.nf
@@ -45,7 +45,7 @@ workflow CALL_MOBILE_ELEMENTS {
         )
 
         ME_INDEX_SPLIT_ALIGNMENT( ME_SPLIT_ALIGNMENT.out.bam )
-        
+
         ME_SPLIT_ALIGNMENT.out.bam
             .join(ME_INDEX_SPLIT_ALIGNMENT.out.bai, failOnMismatch:true)
             .set { ch_retroseq_input }
@@ -76,10 +76,10 @@ workflow CALL_MOBILE_ELEMENTS {
 
         // Filter and rank as done in findtroll? E.g. protein coding
 
-        ch_versions = ch_versions.mix(ME_SPLIT_ALIGNMENT.out.versions)
-        ch_versions = ch_versions.mix(ME_INDEX_SPLIT_ALIGNMENT.out.versions)
-        ch_versions = ch_versions.mix(RETROSEQ_DISCOVER.out.versions)
-        ch_versions = ch_versions.mix(RETROSEQ_CALL.out.versions)
+        ch_versions = ch_versions.mix(ME_SPLIT_ALIGNMENT.out.versions).first()
+        ch_versions = ch_versions.mix(ME_INDEX_SPLIT_ALIGNMENT.out.versions).first()
+        ch_versions = ch_versions.mix(RETROSEQ_DISCOVER.out.versions).first()
+        ch_versions = ch_versions.mix(RETROSEQ_CALL.out.versions).first()
 
     emit:
         versions = ch_versions              // channel: [ path(versions.yml) ]
diff --git a/workflows/raredisease.nf b/workflows/raredisease.nf
index f572a651..6d833d08 100644
--- a/workflows/raredisease.nf
+++ b/workflows/raredisease.nf
@@ -605,6 +605,7 @@ workflow RAREDISEASE {
         ch_me_references,
         params.genome
     )
+    ch_versions = ch_versions.mix(CALL_MOBILE_ELEMENTS.out.versions)
 
     //
     // MODULE: Pipeline reporting

From 0c0fd91447508533530a6df090d050a7dad2c676 Mon Sep 17 00:00:00 2001
From: jemten <jemten@users.noreply.github.com>
Date: Thu, 4 Jan 2024 15:58:56 +0100
Subject: [PATCH 09/22] joining channels to keep them in sync

---
 modules/local/retroseq/call/main.nf            |  8 ++++----
 modules/local/retroseq/call/meta.yml           | 18 +++++++++++-------
 modules/local/retroseq/discover/main.nf        |  2 +-
 modules/local/retroseq/discover/meta.yml       |  8 ++++----
 subworkflows/local/call_mobile_elements.nf     | 12 +++++++++---
 subworkflows/local/call_structural_variants.nf | 11 ++++++-----
 workflows/raredisease.nf                       |  5 ++++-
 7 files changed, 39 insertions(+), 25 deletions(-)

diff --git a/modules/local/retroseq/call/main.nf b/modules/local/retroseq/call/main.nf
index 33ddb1af..150af4ba 100644
--- a/modules/local/retroseq/call/main.nf
+++ b/modules/local/retroseq/call/main.nf
@@ -4,13 +4,13 @@ process RETROSEQ_CALL {
 
     conda "bioconda::perl-retroseq=1.5=pl5321hdfd78af_1"
     container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
-    'docker.io/clinicalgenomics/retroseq:1.5_9d4f3b5' : 'docker.io/clinicalgenomics/retroseq:1.5_9d4f3b5' }"
+    'docker.io/clinicalgenomics/retroseq:1.5_9d4f3b5-1' : 'docker.io/clinicalgenomics/retroseq:1.5_9d4f3b5-1' }"
 
 
     input:
-    tuple val(meta), path(bam), path(bai)
-    tuple val(meta), path(input)
-    tuple val(meta), path(fasta)
+    tuple val(meta), path(tab), path(bam), path(bai)
+    tuple val(meta2), path(fasta)
+    tuple val(meta3), path(fai)
 
     output:
     tuple val(meta), path("*.vcf"), emit: vcf
diff --git a/modules/local/retroseq/call/meta.yml b/modules/local/retroseq/call/meta.yml
index 8147d20c..98aa0c04 100644
--- a/modules/local/retroseq/call/meta.yml
+++ b/modules/local/retroseq/call/meta.yml
@@ -19,22 +19,26 @@ input:
       description: |
         Groovy Map containing sample information
         e.g. `[ id:'test', single_end:false ]`
+  - tab:
+      type: file
+      description: Output file from running retroseq -call
+      pattern: "*.tab"
   - bam:
       type: file
       description: Sorted BAM file
-      pattern: "*.{bam}"
+      pattern: "*.bam"
   - bai:
       type: file
       description: Index of the sorted BAM file
-      pattern: "*.{bam}"
+      pattern: "*.bam"
   - fasta:
       type: file
       description: Reference genome in fasta format
-      pattern: "*.{fasta}"
-  - tab:
+      pattern: "*.fasta"
+  - fai:
       type: file
-      description: Output file from running retroseq -call
-      pattern: "*.{tab}"
+      description: Reference FASTA index
+      pattern: "*.fai"
 
 output:
   - meta:
@@ -49,7 +53,7 @@ output:
   - vcf:
       type: file
       description: Output file containing TEVs and their location in the genome.
-      pattern: "*.{vcf}"
+      pattern: "*.vcf"
 
 authors:
   - "@peterpru"
diff --git a/modules/local/retroseq/discover/main.nf b/modules/local/retroseq/discover/main.nf
index cf4b5fa8..2ea51344 100644
--- a/modules/local/retroseq/discover/main.nf
+++ b/modules/local/retroseq/discover/main.nf
@@ -4,7 +4,7 @@ process RETROSEQ_DISCOVER {
 
     conda "bioconda::perl-retroseq=1.5=pl5321hdfd78af_1"
     container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
-    'docker.io/clinicalgenomics/retroseq:1.5_9d4f3b5' : 'docker.io/clinicalgenomics/retroseq:1.5_9d4f3b5' }"
+    'docker.io/clinicalgenomics/retroseq:1.5_9d4f3b5-1' : 'docker.io/clinicalgenomics/retroseq:1.5_9d4f3b5-1' }"
 
 
     input:
diff --git a/modules/local/retroseq/discover/meta.yml b/modules/local/retroseq/discover/meta.yml
index 55d6de5d..9ea47408 100644
--- a/modules/local/retroseq/discover/meta.yml
+++ b/modules/local/retroseq/discover/meta.yml
@@ -22,15 +22,15 @@ input:
   - bam:
       type: file
       description: Sorted BAM file
-      pattern: "*.{bam}"
+      pattern: "*.bam"
   - bai:
       type: file
       description: Index of the sorted BAM file
-      pattern: "*.{bai}"
+      pattern: "*.bai"
   - me_references:
       type: file
       description: Paths to bed files containing transposable element coordinates in the genome.
-      pattern: "*.{bed}"
+      pattern: "*.bed"
   - me_types:
       type: list
       description: List of transposable element types to discover. Needs to be in sync with me_references.
@@ -48,7 +48,7 @@ output:
   - tab:
       type: file
       description: Output file containing lists of read pair names per TE type
-      pattern: "*.{tab}"
+      pattern: "*.tab"
 
 authors:
   - "@peterpru"
diff --git a/subworkflows/local/call_mobile_elements.nf b/subworkflows/local/call_mobile_elements.nf
index 9929fb7f..e2d34752 100644
--- a/subworkflows/local/call_mobile_elements.nf
+++ b/subworkflows/local/call_mobile_elements.nf
@@ -12,6 +12,7 @@ workflow CALL_MOBILE_ELEMENTS {
     take:
         ch_genome_bam_bai   // channel: [mandatory] [ val(meta), path(bam), path(bai) ]
         ch_genome_fasta     // channel: [mandatory] [ val(meta), path(fasta) ]
+        ch_genome_fai       // channel: [mandatory] [ val(meta), path(fai) ]
         ch_me_references    // channel: [mandatory] [path(tsv)]
         val_genome_build    // string: [mandatory] GRCh37 or GRCh38
 
@@ -63,13 +64,18 @@ workflow CALL_MOBILE_ELEMENTS {
             ch_me_reference_split.type.collect()
         )
 
+        RETROSEQ_DISCOVER.out.tab
+            .join(ch_retroseq_input, failOnMismatch: true)
+            .set { ch_retroseq_call_input }
+
         // Running retroseq call: clusters reads and checks on the breakpoints to decide whether a TEV is present
         RETROSEQ_CALL (
-            ch_retroseq_input,
-            RETROSEQ_DISCOVER.out.tab,
-            ch_genome_fasta
+            ch_retroseq_call_input,
+            ch_genome_fasta,
+            ch_genome_fai
         )
 
+
         // Run vep to annotate
 
         // Run svdb to query against database
diff --git a/subworkflows/local/call_structural_variants.nf b/subworkflows/local/call_structural_variants.nf
index 13827f61..ee5c2cbc 100644
--- a/subworkflows/local/call_structural_variants.nf
+++ b/subworkflows/local/call_structural_variants.nf
@@ -53,10 +53,11 @@ workflow CALL_STRUCTURAL_VARIANTS {
             ch_versions = ch_versions.mix(CALL_SV_GERMLINECNVCALLER.out.versions)
         }
 
-        CALL_SV_CNVNATOR (ch_genome_bam_bai, ch_genome_fasta, ch_genome_fai, ch_case_info)
-            .vcf
-            .collect{it[1]}
-            .set { cnvnator_vcf }
+        // Uncomment
+        //CALL_SV_CNVNATOR (ch_genome_bam_bai, ch_genome_fasta, ch_genome_fai, ch_case_info)
+        //    .vcf
+        //    .collect{it[1]}
+        //    .set { cnvnator_vcf }
 
         CALL_SV_MT (ch_mt_bam_bai, ch_genome_fasta)
 
@@ -64,7 +65,7 @@ workflow CALL_STRUCTURAL_VARIANTS {
         if (params.skip_germlinecnvcaller) {
             tiddit_vcf
                 .combine(manta_vcf)
-                .combine(cnvnator_vcf)
+               // .combine(cnvnator_vcf)
                 .toList()
                 .set { vcf_list }
         } else {
diff --git a/workflows/raredisease.nf b/workflows/raredisease.nf
index 6d833d08..f416ec4d 100644
--- a/workflows/raredisease.nf
+++ b/workflows/raredisease.nf
@@ -293,7 +293,9 @@ workflow RAREDISEASE {
 
     // SV caller priority
     if (params.skip_germlinecnvcaller) {
-        ch_svcaller_priority = Channel.value(["tiddit", "manta", "cnvnator"])
+        // TODO: Uncomment and remove
+        ch_svcaller_priority = Channel.value(["tiddit", "manta"])
+        //ch_svcaller_priority = Channel.value(["tiddit", "manta", "cnvnator"])
     } else {
         ch_svcaller_priority = Channel.value(["tiddit", "manta", "gcnvcaller", "cnvnator"])
     }
@@ -602,6 +604,7 @@ workflow RAREDISEASE {
     CALL_MOBILE_ELEMENTS(
         ch_mapped.genome_bam_bai,
         ch_genome_fasta,
+        ch_genome_fai,
         ch_me_references,
         params.genome
     )

From ad9051841d39aafd4c93be0dd93c2d0a5cb24e53 Mon Sep 17 00:00:00 2001
From: jemten <jemten@users.noreply.github.com>
Date: Thu, 4 Jan 2024 17:52:23 +0100
Subject: [PATCH 10/22] fix input name

---
 modules/local/retroseq/call/main.nf | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/modules/local/retroseq/call/main.nf b/modules/local/retroseq/call/main.nf
index 150af4ba..5b13c630 100644
--- a/modules/local/retroseq/call/main.nf
+++ b/modules/local/retroseq/call/main.nf
@@ -29,7 +29,7 @@ process RETROSEQ_CALL {
         -call \\
         $args \\
         -bam $bam \\
-        -input $input \\
+        -input $tab \\
         -ref $fasta \\
         -output ${prefix}.vcf
 

From caa0029cc35357f3096c6218fd0c00dd6dbf231c Mon Sep 17 00:00:00 2001
From: jemten <jemten@users.noreply.github.com>
Date: Fri, 5 Jan 2024 14:37:28 +0100
Subject: [PATCH 11/22] concatenate and merge vcfs

---
 conf/modules/call_mobile_elements.config   | 33 ++++++++++
 subworkflows/local/call_mobile_elements.nf | 73 +++++++++++++++++-----
 workflows/raredisease.nf                   |  1 +
 3 files changed, 93 insertions(+), 14 deletions(-)

diff --git a/conf/modules/call_mobile_elements.config b/conf/modules/call_mobile_elements.config
index b7a0b81f..788404a3 100644
--- a/conf/modules/call_mobile_elements.config
+++ b/conf/modules/call_mobile_elements.config
@@ -37,4 +37,37 @@ process {
         ext.prefix = { "${meta.id}_${meta.interval}_retroseq_call" }
     }
 
+    withName: '.*CALL_MOBILE_ELEMENTS:BCFTOOLS_REHEADER_ME' {
+        ext.args2 = { '--output-type v' }
+        ext.prefix = { "${meta.id}_${meta.interval}_retroseq_reheader" }
+    }
+
+    withName: '.*CALL_MOBILE_ELEMENTS:BCFTOOLS_SORT_ME' {
+        ext.args = { '--output-type z' }
+        ext.prefix = { "${meta.id}_${meta.interval}_retroseq_sort" }
+    }
+
+    withName: '.*CALL_MOBILE_ELEMENTS:BCFTOOLS_CONCAT_ME' {
+        ext.args = { '--output-type z --allow-overlaps' }
+        ext.prefix = { "${meta.id}_mobile_elements" }
+    }
+
+    withName: '.*CALL_MOBILE_ELEMENTS:SVDB_MERGE_ME' {
+        ext.args = { '--bnd_distance 150 --overlap 0.5' }
+        ext.prefix = { "${meta.id}_mobile_elements" }
+        publishDir = [
+            path: { "${params.outdir}/call_mobile_elements" },
+            mode: params.publish_dir_mode,
+            saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
+        ]
+    }
+
+     withName: '.*CALL_MOBILE_ELEMENTS:TABIX_ME' {
+        publishDir = [
+            path: { "${params.outdir}/call_mobile_elements" },
+            mode: params.publish_dir_mode,
+            saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
+        ]
+    }
+
 }
diff --git a/subworkflows/local/call_mobile_elements.nf b/subworkflows/local/call_mobile_elements.nf
index e2d34752..7db198be 100644
--- a/subworkflows/local/call_mobile_elements.nf
+++ b/subworkflows/local/call_mobile_elements.nf
@@ -2,10 +2,16 @@
 // A subworkflow to call mobile elements in the genome
 //
 
+include { BCFTOOLS_REHEADER as BCFTOOLS_REHEADER_ME  } from '../../modules/nf-core/bcftools/reheader/main'
+include { BCFTOOLS_CONCAT as BCFTOOLS_CONCAT_ME  } from '../../modules/nf-core/bcftools/concat/main'
+include { BCFTOOLS_SORT as BCFTOOLS_SORT_ME  } from '../../modules/nf-core/bcftools/sort/main'
 include { RETROSEQ_CALL as RETROSEQ_CALL             } from '../../modules/local/retroseq/call/main'
 include { RETROSEQ_DISCOVER as RETROSEQ_DISCOVER     } from '../../modules/local/retroseq/discover/main'
-include { SAMTOOLS_VIEW as ME_SPLIT_ALIGNMENT        } from '../../modules/nf-core/samtools/view/main'
 include { SAMTOOLS_INDEX as ME_INDEX_SPLIT_ALIGNMENT } from '../../modules/nf-core/samtools/index/main'
+include { SAMTOOLS_VIEW as ME_SPLIT_ALIGNMENT        } from '../../modules/nf-core/samtools/view/main'
+include { TABIX_TABIX as TABIX_ME              } from '../../modules/nf-core/tabix/tabix/main'
+include { TABIX_TABIX as TABIX_ME_SPLIT              } from '../../modules/nf-core/tabix/tabix/main'
+include { SVDB_MERGE as SVDB_MERGE_ME                } from '../../modules/nf-core/svdb/merge/main'
 
 workflow CALL_MOBILE_ELEMENTS {
 
@@ -14,6 +20,7 @@ workflow CALL_MOBILE_ELEMENTS {
         ch_genome_fasta     // channel: [mandatory] [ val(meta), path(fasta) ]
         ch_genome_fai       // channel: [mandatory] [ val(meta), path(fai) ]
         ch_me_references    // channel: [mandatory] [path(tsv)]
+        ch_case_info        // channel: [mandatory] [ val(case_info) ]
         val_genome_build    // string: [mandatory] GRCh37 or GRCh38
 
     main:
@@ -28,7 +35,8 @@ workflow CALL_MOBILE_ELEMENTS {
                 grch38: val_genome_build.equals('GRCh38')
                     return ['chr' + it.toString()]
             }.set{ ch_chr_genome }
-        ch_chr = ch_chr_genome.grch37.mix(ch_chr_genome.grch38)
+        ch_chr_genome.grch37.mix(ch_chr_genome.grch38)
+            .set { ch_chr }
 
         // Building one bam channel per chromosome and adding interval
         ch_genome_bam_bai
@@ -39,13 +47,9 @@ workflow CALL_MOBILE_ELEMENTS {
             }
             .set { ch_genome_bam_bai_interval }
 
-        ME_SPLIT_ALIGNMENT(
-            ch_genome_bam_bai_interval,
-            [[:], []],
-            []
-        )
-
-        ME_INDEX_SPLIT_ALIGNMENT( ME_SPLIT_ALIGNMENT.out.bam )
+        // Split bam file on chromosome and index
+        ME_SPLIT_ALIGNMENT ( ch_genome_bam_bai_interval, [[:], []], [] )
+        ME_INDEX_SPLIT_ALIGNMENT ( ME_SPLIT_ALIGNMENT.out.bam )
 
         ME_SPLIT_ALIGNMENT.out.bam
             .join(ME_INDEX_SPLIT_ALIGNMENT.out.bai, failOnMismatch:true)
@@ -68,25 +72,66 @@ workflow CALL_MOBILE_ELEMENTS {
             .join(ch_retroseq_input, failOnMismatch: true)
             .set { ch_retroseq_call_input }
 
-        // Running retroseq call: clusters reads and checks on the breakpoints to decide whether a TEV is present
         RETROSEQ_CALL (
             ch_retroseq_call_input,
             ch_genome_fasta,
             ch_genome_fai
         )
 
+        // Fix the vcf by adding header, sorting and indexing
+        BCFTOOLS_REHEADER_ME (
+            RETROSEQ_CALL.out.vcf.map{ meta, vcf -> [ meta, vcf, [] ] },
+            ch_genome_fai
+        )
+        BCFTOOLS_SORT_ME ( BCFTOOLS_REHEADER_ME.out.vcf )
+        TABIX_ME_SPLIT ( BCFTOOLS_SORT_ME.out.vcf )
+
+        // Concatenate the chromosme vcfs per sample
+        BCFTOOLS_SORT_ME.out.vcf
+            .map { meta, vcf -> [ meta.findAll { !(it.key in ['interval']) }, vcf ] }
+            .groupTuple(size: 24)
+            .set { ch_vcfs }
+
+        TABIX_ME_SPLIT.out.tbi
+            .map { meta, tbi -> [ meta.findAll { !(it.key in ['interval']) }, tbi ] }
+            .groupTuple(size: 24)
+            .set { ch_tbis }
+
+        ch_vcfs.join(ch_tbis)
+            .set { ch_vcfs_tbis}
+
+        BCFTOOLS_CONCAT_ME ( ch_vcfs_tbis )
+
+        // Merge sample vcfs to a case vcf
+        BCFTOOLS_CONCAT_ME.out.vcf
+            .collect{it[1]}
+            .toList()
+            .collect()
+            .set { ch_vcf_list }
 
-        // Run vep to annotate
+        ch_case_info
+            .combine(ch_vcf_list)
+            .set { ch_svdb_merge_me_input }
 
-        // Run svdb to query against database
+        SVDB_MERGE_ME ( ch_svdb_merge_me_input, [] )
+        TABIX_ME ( SVDB_MERGE_ME.out.vcf )
 
-        // Filter and rank as done in findtroll? E.g. protein coding
+        SVDB_MERGE_ME.out.vcf
+            .join(TABIX_ME.out.tbi)
+            .set { ch_me_vcf }
 
         ch_versions = ch_versions.mix(ME_SPLIT_ALIGNMENT.out.versions).first()
         ch_versions = ch_versions.mix(ME_INDEX_SPLIT_ALIGNMENT.out.versions).first()
         ch_versions = ch_versions.mix(RETROSEQ_DISCOVER.out.versions).first()
         ch_versions = ch_versions.mix(RETROSEQ_CALL.out.versions).first()
+        ch_versions = ch_versions.mix(BCFTOOLS_REHEADER_ME.out.versions).first()
+        ch_versions = ch_versions.mix(BCFTOOLS_SORT_ME.out.versions).first()
+        ch_versions = ch_versions.mix(TABIX_ME_SPLIT.out.versions).first()
+        ch_versions = ch_versions.mix(BCFTOOLS_CONCAT_ME.out.versions).first()
+        ch_versions = ch_versions.mix(SVDB_MERGE_ME.out.versions)
+        ch_versions = ch_versions.mix(TABIX_ME.out.versions)
 
     emit:
-        versions = ch_versions              // channel: [ path(versions.yml) ]
+        me_vcf   = ch_me_vcf     // channel: [ val(meta), path(vcf), path(tbi) ]
+        versions = ch_versions   // channel: [ path(versions.yml) ]
 }
diff --git a/workflows/raredisease.nf b/workflows/raredisease.nf
index f416ec4d..0016a154 100644
--- a/workflows/raredisease.nf
+++ b/workflows/raredisease.nf
@@ -606,6 +606,7 @@ workflow RAREDISEASE {
         ch_genome_fasta,
         ch_genome_fai,
         ch_me_references,
+        ch_case_info,
         params.genome
     )
     ch_versions = ch_versions.mix(CALL_MOBILE_ELEMENTS.out.versions)

From c6c089039f28fc1ed9b108a55f8d2ba6cadb5d39 Mon Sep 17 00:00:00 2001
From: jemten <jemten@users.noreply.github.com>
Date: Mon, 8 Jan 2024 15:45:16 +0100
Subject: [PATCH 12/22] looking into joining error

---
 lib/CustomChannelOperators.groovy          | 156 +++++++++++++++++++++
 subworkflows/local/call_mobile_elements.nf |  19 ++-
 2 files changed, 172 insertions(+), 3 deletions(-)
 create mode 100644 lib/CustomChannelOperators.groovy

diff --git a/lib/CustomChannelOperators.groovy b/lib/CustomChannelOperators.groovy
new file mode 100644
index 00000000..f1317a46
--- /dev/null
+++ b/lib/CustomChannelOperators.groovy
@@ -0,0 +1,156 @@
+//
+// https://github.com/Midnighter/nextflow-utility-services/blob/1f84278c965255fb2bbe290b8d6c9f33f71da26f/lib/CustomChannelOperators.groovy
+//
+/**
+ * MIT License
+ *
+ * Copyright (c) 2022 Moritz E. Beber
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+import groovyx.gpars.dataflow.DataflowBroadcast
+
+/**
+ * Provide a collection of custom channel operators that go beyond the nextflow default.
+ *
+ * @author Moritz E. Beber <https://github.com/Midnighter>
+ * @author Mahesh Binzer-Panchal <https://github.com/mahesh-panchal>
+ */
+class CustomChannelOperators {
+
+    /**
+     * Join two channels by one or more keys from a map contained in each channel.
+     *
+     * The channel elements are assumed to be tuples whose size is at least two.
+     * Typically, the maps to join by are in the first position of the tuples.
+     * Please read https://www.nextflow.io/docs/latest/operator.html#join carefully.
+     *
+     * @param left The left-hand side channel in the join.
+     * @param right The right-hand side channel in the join.
+     * @param keys A list of strings providing the map keys to compare.
+     * @param leftBy The position of the map in the left channel.
+     * @param rightBy The position of the map in the right channel.
+     * @param joinArgs A map of keyword arguments that is passed on to the nextflow join call.
+     * @return The joined channels with the map in the original position of the left channel,
+     *      followed by all elements of the right channel except for the map.
+     */
+    static DataflowBroadcast joinOnKeys(
+            DataflowBroadcast left,
+            DataflowBroadcast right,
+            List<String> keys,
+            Integer leftBy,
+            Integer rightBy,
+            Map joinArgs
+    ) {
+        // Extract desired keys from the left map, located at `leftBy`, and prepend them.
+        DataflowBroadcast newLeft = left.map { tuple ->
+            extractKeys(tuple, keys, leftBy) + tuple
+        }
+
+        // Extract desired keys from the right map, located at `rightBy`, and prepend them.
+        // Also drop the map itself from the right.
+        DataflowBroadcast newRight = right.map { tuple ->
+            extractKeys(tuple, keys, rightBy) + removeMap(tuple, rightBy)
+        }
+
+        // Set the positions to join on explicitly.
+        joinArgs.by = 0..<keys.size()
+
+        // Apply the join channel operator to the channels and finally drop the keys used for joining tuples.
+        return newLeft.join(joinArgs, newRight).map { tuple -> dropKeys(tuple, keys) }
+    }
+
+    static DataflowBroadcast joinOnKeys(
+            Map joinArgs,
+            DataflowBroadcast left,
+            DataflowBroadcast right,
+            String key,
+            Integer leftBy = 0,
+            Integer rightBy = 0
+    ) {
+        return joinOnKeys(left, right, [key], leftBy, rightBy, joinArgs)
+    }
+
+    static DataflowBroadcast joinOnKeys(
+            DataflowBroadcast left,
+            DataflowBroadcast right,
+            String key,
+            Integer leftBy = 0,
+            Integer rightBy = 0
+    ) {
+        return joinOnKeys(left, right, [key], leftBy, rightBy, [:])
+    }
+
+    static DataflowBroadcast joinOnKeys(
+            Map joinArgs,
+            DataflowBroadcast left,
+            DataflowBroadcast right,
+            List<String> keys,
+            Integer leftBy = 0,
+            Integer rightBy = 0
+    ) {
+        return joinOnKeys(left, right, keys, leftBy, rightBy, joinArgs)
+    }
+
+    static DataflowBroadcast joinOnKeys(
+            DataflowBroadcast left,
+            DataflowBroadcast right,
+            List<String> keys,
+            Integer leftBy = 0,
+            Integer rightBy = 0
+    ) {
+        return joinOnKeys(left, right, keys, leftBy, rightBy, [:])
+    }
+
+    /**
+     * Extract values from a map at given position with given keys.
+     *
+     * @param tuple A tuple (`List`) channel element.
+     * @param keys A list of strings denoting keys in the map.
+     * @param index The position of the map in the tuple.
+     * @return The list of values extracted from the map.
+     */
+    private static List extractKeys(List tuple, List<String> keys, Integer index) {
+        return tuple[index].subMap(keys).values().toList()
+    }
+
+    /**
+     * Return a new tuple without the map in it.
+     *
+     * @param tuple A tuple (`List`) channel element.
+     * @param index The position of the map in the tuple.
+     * @return A copy of the list without the map.
+     */
+    private static List removeMap(List tuple, Integer index) {
+        return tuple[0..<index] + tuple[(index + 1)..<tuple.size()]
+    }
+
+    /**
+     * Drop elements corresponding to the number of keys from the head of the list.
+     *
+     * @param tuple A tuple (`List`) channel element.
+     * @param keys A list of strings denoting keys in the map.
+     * @return The given list but without the prepended values.
+     */
+    private static List dropKeys(List tuple, List<String> keys) {
+        return tuple.drop(keys.size())
+    }
+
+}
diff --git a/subworkflows/local/call_mobile_elements.nf b/subworkflows/local/call_mobile_elements.nf
index 7db198be..12e4085e 100644
--- a/subworkflows/local/call_mobile_elements.nf
+++ b/subworkflows/local/call_mobile_elements.nf
@@ -26,8 +26,11 @@ workflow CALL_MOBILE_ELEMENTS {
     main:
         ch_versions = Channel.empty()
 
+        // TODO: Fix one sample join error
+
         // Building chromosome channel depending on genome version
         // TODO: Check how retroseq behaves when running chrY on female samples
+        // TODO: Would be nicer to read in the fai or dict
         Channel.of(1..22, 'X', 'Y')
             .branch { it ->
                 grch37: val_genome_build.equals('GRCh37')
@@ -51,8 +54,18 @@ workflow CALL_MOBILE_ELEMENTS {
         ME_SPLIT_ALIGNMENT ( ch_genome_bam_bai_interval, [[:], []], [] )
         ME_INDEX_SPLIT_ALIGNMENT ( ME_SPLIT_ALIGNMENT.out.bam )
 
+        ME_SPLIT_ALIGNMENT.out.bam.toList().dump(tag:'bam')
+        ME_INDEX_SPLIT_ALIGNMENT.out.bai.toList().dump(tag:'bai')
+
+        //ch_retroseq_input = CustomChannelOperators.joinOnKeys(
+        //    ME_SPLIT_ALIGNMENT.out.bam,
+        //    ME_SPLIT_ALIGNMENT.out.bai,
+        //    ["id", "interval"], 0,0, [failOnMismatch:true, failOnDuplicate:true]
+        //)
+        //ch_retroseq_input.dump(tag: 'join', pretty: true)
+
         ME_SPLIT_ALIGNMENT.out.bam
-            .join(ME_INDEX_SPLIT_ALIGNMENT.out.bai, failOnMismatch:true)
+            .join(ME_INDEX_SPLIT_ALIGNMENT.out.bai, failOnMismatch:true, failOnDuplicate: true)
             .set { ch_retroseq_input }
 
         ch_me_references
@@ -88,12 +101,12 @@ workflow CALL_MOBILE_ELEMENTS {
 
         // Concatenate the chromosme vcfs per sample
         BCFTOOLS_SORT_ME.out.vcf
-            .map { meta, vcf -> [ meta.findAll { !(it.key in ['interval']) }, vcf ] }
+            .map { meta, vcf -> [ meta - meta.subMap('interval'), vcf ] }
             .groupTuple(size: 24)
             .set { ch_vcfs }
 
         TABIX_ME_SPLIT.out.tbi
-            .map { meta, tbi -> [ meta.findAll { !(it.key in ['interval']) }, tbi ] }
+            .map { meta, tbi -> [ meta - meta.subMap('interval'), tbi ] }
             .groupTuple(size: 24)
             .set { ch_tbis }
 

From 45cc753e086ded70fa73fda6a192f7e5861d6b62 Mon Sep 17 00:00:00 2001
From: jemten <jemten@users.noreply.github.com>
Date: Tue, 9 Jan 2024 11:00:28 +0100
Subject: [PATCH 13/22] adding missing test file

---
 conf/test_one_sample.config                |   2 +
 lib/CustomChannelOperators.groovy          | 156 ---------------------
 subworkflows/local/call_mobile_elements.nf |  23 +--
 3 files changed, 6 insertions(+), 175 deletions(-)
 delete mode 100644 lib/CustomChannelOperators.groovy

diff --git a/conf/test_one_sample.config b/conf/test_one_sample.config
index f39cb40c..81a3dd12 100644
--- a/conf/test_one_sample.config
+++ b/conf/test_one_sample.config
@@ -41,6 +41,8 @@ params {
     intervals_wgs        = "https://raw.githubusercontent.com/nf-core/test-datasets/raredisease/reference/target_wgs.interval_list"
     intervals_y          = "https://raw.githubusercontent.com/nf-core/test-datasets/raredisease/reference/targetY.interval_list"
     known_dbsnp          = "https://raw.githubusercontent.com/nf-core/test-datasets/raredisease/reference/dbsnp_-138-.vcf.gz"
+    // TODO: Update path for mobile element refs
+    mobile_element_references = "${projectDir}/assets/mobile_element_references.tsv"
     ml_model             = "https://s3.amazonaws.com/sentieon-release/other/SentieonDNAscopeModel1.0.model"
     reduced_penetrance   = "https://raw.githubusercontent.com/nf-core/test-datasets/raredisease/reference/reduced_penetrance.tsv"
     score_config_snv     = "https://raw.githubusercontent.com/nf-core/test-datasets/raredisease/reference/rank_model_snv.ini"
diff --git a/lib/CustomChannelOperators.groovy b/lib/CustomChannelOperators.groovy
deleted file mode 100644
index f1317a46..00000000
--- a/lib/CustomChannelOperators.groovy
+++ /dev/null
@@ -1,156 +0,0 @@
-//
-// https://github.com/Midnighter/nextflow-utility-services/blob/1f84278c965255fb2bbe290b8d6c9f33f71da26f/lib/CustomChannelOperators.groovy
-//
-/**
- * MIT License
- *
- * Copyright (c) 2022 Moritz E. Beber
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to deal
- * in the Software without restriction, including without limitation the rights
- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-import groovyx.gpars.dataflow.DataflowBroadcast
-
-/**
- * Provide a collection of custom channel operators that go beyond the nextflow default.
- *
- * @author Moritz E. Beber <https://github.com/Midnighter>
- * @author Mahesh Binzer-Panchal <https://github.com/mahesh-panchal>
- */
-class CustomChannelOperators {
-
-    /**
-     * Join two channels by one or more keys from a map contained in each channel.
-     *
-     * The channel elements are assumed to be tuples whose size is at least two.
-     * Typically, the maps to join by are in the first position of the tuples.
-     * Please read https://www.nextflow.io/docs/latest/operator.html#join carefully.
-     *
-     * @param left The left-hand side channel in the join.
-     * @param right The right-hand side channel in the join.
-     * @param keys A list of strings providing the map keys to compare.
-     * @param leftBy The position of the map in the left channel.
-     * @param rightBy The position of the map in the right channel.
-     * @param joinArgs A map of keyword arguments that is passed on to the nextflow join call.
-     * @return The joined channels with the map in the original position of the left channel,
-     *      followed by all elements of the right channel except for the map.
-     */
-    static DataflowBroadcast joinOnKeys(
-            DataflowBroadcast left,
-            DataflowBroadcast right,
-            List<String> keys,
-            Integer leftBy,
-            Integer rightBy,
-            Map joinArgs
-    ) {
-        // Extract desired keys from the left map, located at `leftBy`, and prepend them.
-        DataflowBroadcast newLeft = left.map { tuple ->
-            extractKeys(tuple, keys, leftBy) + tuple
-        }
-
-        // Extract desired keys from the right map, located at `rightBy`, and prepend them.
-        // Also drop the map itself from the right.
-        DataflowBroadcast newRight = right.map { tuple ->
-            extractKeys(tuple, keys, rightBy) + removeMap(tuple, rightBy)
-        }
-
-        // Set the positions to join on explicitly.
-        joinArgs.by = 0..<keys.size()
-
-        // Apply the join channel operator to the channels and finally drop the keys used for joining tuples.
-        return newLeft.join(joinArgs, newRight).map { tuple -> dropKeys(tuple, keys) }
-    }
-
-    static DataflowBroadcast joinOnKeys(
-            Map joinArgs,
-            DataflowBroadcast left,
-            DataflowBroadcast right,
-            String key,
-            Integer leftBy = 0,
-            Integer rightBy = 0
-    ) {
-        return joinOnKeys(left, right, [key], leftBy, rightBy, joinArgs)
-    }
-
-    static DataflowBroadcast joinOnKeys(
-            DataflowBroadcast left,
-            DataflowBroadcast right,
-            String key,
-            Integer leftBy = 0,
-            Integer rightBy = 0
-    ) {
-        return joinOnKeys(left, right, [key], leftBy, rightBy, [:])
-    }
-
-    static DataflowBroadcast joinOnKeys(
-            Map joinArgs,
-            DataflowBroadcast left,
-            DataflowBroadcast right,
-            List<String> keys,
-            Integer leftBy = 0,
-            Integer rightBy = 0
-    ) {
-        return joinOnKeys(left, right, keys, leftBy, rightBy, joinArgs)
-    }
-
-    static DataflowBroadcast joinOnKeys(
-            DataflowBroadcast left,
-            DataflowBroadcast right,
-            List<String> keys,
-            Integer leftBy = 0,
-            Integer rightBy = 0
-    ) {
-        return joinOnKeys(left, right, keys, leftBy, rightBy, [:])
-    }
-
-    /**
-     * Extract values from a map at given position with given keys.
-     *
-     * @param tuple A tuple (`List`) channel element.
-     * @param keys A list of strings denoting keys in the map.
-     * @param index The position of the map in the tuple.
-     * @return The list of values extracted from the map.
-     */
-    private static List extractKeys(List tuple, List<String> keys, Integer index) {
-        return tuple[index].subMap(keys).values().toList()
-    }
-
-    /**
-     * Return a new tuple without the map in it.
-     *
-     * @param tuple A tuple (`List`) channel element.
-     * @param index The position of the map in the tuple.
-     * @return A copy of the list without the map.
-     */
-    private static List removeMap(List tuple, Integer index) {
-        return tuple[0..<index] + tuple[(index + 1)..<tuple.size()]
-    }
-
-    /**
-     * Drop elements corresponding to the number of keys from the head of the list.
-     *
-     * @param tuple A tuple (`List`) channel element.
-     * @param keys A list of strings denoting keys in the map.
-     * @return The given list but without the prepended values.
-     */
-    private static List dropKeys(List tuple, List<String> keys) {
-        return tuple.drop(keys.size())
-    }
-
-}
diff --git a/subworkflows/local/call_mobile_elements.nf b/subworkflows/local/call_mobile_elements.nf
index 12e4085e..45432ae6 100644
--- a/subworkflows/local/call_mobile_elements.nf
+++ b/subworkflows/local/call_mobile_elements.nf
@@ -26,8 +26,6 @@ workflow CALL_MOBILE_ELEMENTS {
     main:
         ch_versions = Channel.empty()
 
-        // TODO: Fix one sample join error
-
         // Building chromosome channel depending on genome version
         // TODO: Check how retroseq behaves when running chrY on female samples
         // TODO: Would be nicer to read in the fai or dict
@@ -54,16 +52,6 @@ workflow CALL_MOBILE_ELEMENTS {
         ME_SPLIT_ALIGNMENT ( ch_genome_bam_bai_interval, [[:], []], [] )
         ME_INDEX_SPLIT_ALIGNMENT ( ME_SPLIT_ALIGNMENT.out.bam )
 
-        ME_SPLIT_ALIGNMENT.out.bam.toList().dump(tag:'bam')
-        ME_INDEX_SPLIT_ALIGNMENT.out.bai.toList().dump(tag:'bai')
-
-        //ch_retroseq_input = CustomChannelOperators.joinOnKeys(
-        //    ME_SPLIT_ALIGNMENT.out.bam,
-        //    ME_SPLIT_ALIGNMENT.out.bai,
-        //    ["id", "interval"], 0,0, [failOnMismatch:true, failOnDuplicate:true]
-        //)
-        //ch_retroseq_input.dump(tag: 'join', pretty: true)
-
         ME_SPLIT_ALIGNMENT.out.bam
             .join(ME_INDEX_SPLIT_ALIGNMENT.out.bai, failOnMismatch:true, failOnDuplicate: true)
             .set { ch_retroseq_input }
@@ -99,7 +87,7 @@ workflow CALL_MOBILE_ELEMENTS {
         BCFTOOLS_SORT_ME ( BCFTOOLS_REHEADER_ME.out.vcf )
         TABIX_ME_SPLIT ( BCFTOOLS_SORT_ME.out.vcf )
 
-        // Concatenate the chromosme vcfs per sample
+        // Concatenate the chromosome vcfs to sample vcfs
         BCFTOOLS_SORT_ME.out.vcf
             .map { meta, vcf -> [ meta - meta.subMap('interval'), vcf ] }
             .groupTuple(size: 24)
@@ -129,10 +117,6 @@ workflow CALL_MOBILE_ELEMENTS {
         SVDB_MERGE_ME ( ch_svdb_merge_me_input, [] )
         TABIX_ME ( SVDB_MERGE_ME.out.vcf )
 
-        SVDB_MERGE_ME.out.vcf
-            .join(TABIX_ME.out.tbi)
-            .set { ch_me_vcf }
-
         ch_versions = ch_versions.mix(ME_SPLIT_ALIGNMENT.out.versions).first()
         ch_versions = ch_versions.mix(ME_INDEX_SPLIT_ALIGNMENT.out.versions).first()
         ch_versions = ch_versions.mix(RETROSEQ_DISCOVER.out.versions).first()
@@ -145,6 +129,7 @@ workflow CALL_MOBILE_ELEMENTS {
         ch_versions = ch_versions.mix(TABIX_ME.out.versions)
 
     emit:
-        me_vcf   = ch_me_vcf     // channel: [ val(meta), path(vcf), path(tbi) ]
-        versions = ch_versions   // channel: [ path(versions.yml) ]
+        vcf      = SVDB_MERGE_ME.out.vcf // channel: [ val(meta), path(vcf) ]
+        tbi      = TABIX_ME.out.tbi      // channel: [ val(meta), path(tbi) ]
+        versions = ch_versions           // channel: [ path(versions.yml) ]
 }

From 08b7203e31d9b81833d38fdbaf5bbac89c173f04 Mon Sep 17 00:00:00 2001
From: jemten <jemten@users.noreply.github.com>
Date: Tue, 9 Jan 2024 11:06:28 +0100
Subject: [PATCH 14/22]  fix config indentation

---
 conf/modules/call_mobile_elements.config | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/conf/modules/call_mobile_elements.config b/conf/modules/call_mobile_elements.config
index 788404a3..fabc6b94 100644
--- a/conf/modules/call_mobile_elements.config
+++ b/conf/modules/call_mobile_elements.config
@@ -62,7 +62,7 @@ process {
         ]
     }
 
-     withName: '.*CALL_MOBILE_ELEMENTS:TABIX_ME' {
+    withName: '.*CALL_MOBILE_ELEMENTS:TABIX_ME' {
         publishDir = [
             path: { "${params.outdir}/call_mobile_elements" },
             mode: params.publish_dir_mode,

From 3be46420032c2018ee47842db4035e2797e908fe Mon Sep 17 00:00:00 2001
From: jemten <jemten@users.noreply.github.com>
Date: Tue, 9 Jan 2024 16:23:01 +0100
Subject: [PATCH 15/22] fixing around with test data

---
 CITATIONS.md                               |  4 ++++
 README.md                                  |  4 ++++
 assets/grch37_g1k_alu.bed                  |  0
 assets/grch37_g1k_herv.bed                 |  0
 assets/grch37_g1k_l1.bed                   |  0
 assets/grch37_g1k_sva.bed                  |  0
 assets/mobile_element_references.tsv       |  5 -----
 conf/test.config                           |  4 ++--
 conf/test_one_sample.config                |  4 ++--
 conf/test_sentieon.config                  |  2 ++
 main.nf                                    |  1 -
 subworkflows/local/call_mobile_elements.nf | 21 +++++++--------------
 workflows/raredisease.nf                   | 20 +++++++++-----------
 13 files changed, 30 insertions(+), 35 deletions(-)
 delete mode 100644 assets/grch37_g1k_alu.bed
 delete mode 100644 assets/grch37_g1k_herv.bed
 delete mode 100644 assets/grch37_g1k_l1.bed
 delete mode 100644 assets/grch37_g1k_sva.bed
 delete mode 100644 assets/mobile_element_references.tsv

diff --git a/CITATIONS.md b/CITATIONS.md
index b20f58ac..f9973ae3 100644
--- a/CITATIONS.md
+++ b/CITATIONS.md
@@ -100,6 +100,10 @@
 
   > Okonechnikov K, Conesa A, García-Alcalde F. Qualimap 2: advanced multi-sample quality control for high-throughput sequencing data. Bioinformatics. 2016;32(2):292-294. doi:10.1093/bioinformatics/btv566
 
+- [RetroSeq](https://academic.oup.com/bioinformatics/article/29/3/389/257479)
+
+  > Thomas M. Keane, Kim Wong, David J. Adams, RetroSeq: transposable element discovery from next-generation sequencing data. Bioinformatics.2013 Feb 1;29(3):389-90. doi: 10.1093/bioinformatics/bts697
+
 - [rhocall](https://github.com/dnil/rhocall)
 
 - [Sentieon DNAscope](https://www.biorxiv.org/content/10.1101/2022.05.20.492556v1.abstract)
diff --git a/README.md b/README.md
index 3c36427a..ff227da8 100644
--- a/README.md
+++ b/README.md
@@ -93,6 +93,10 @@ On release, automated continuous integration tests run the pipeline on a full-si
 - [Expansion Hunter](https://github.com/Illumina/ExpansionHunter)
 - [Stranger](https://github.com/Clinical-Genomics/stranger)
 
+**9. Variant calling - mobile elements:**
+
+- [RetroSeq](https://github.com/tk2/RetroSeq)
+
 **9. Rank variants - SV and SNV:**
 
 - [GENMOD](https://github.com/Clinical-Genomics/genmod)
diff --git a/assets/grch37_g1k_alu.bed b/assets/grch37_g1k_alu.bed
deleted file mode 100644
index e69de29b..00000000
diff --git a/assets/grch37_g1k_herv.bed b/assets/grch37_g1k_herv.bed
deleted file mode 100644
index e69de29b..00000000
diff --git a/assets/grch37_g1k_l1.bed b/assets/grch37_g1k_l1.bed
deleted file mode 100644
index e69de29b..00000000
diff --git a/assets/grch37_g1k_sva.bed b/assets/grch37_g1k_sva.bed
deleted file mode 100644
index e69de29b..00000000
diff --git a/assets/mobile_element_references.tsv b/assets/mobile_element_references.tsv
deleted file mode 100644
index e5a804ff..00000000
--- a/assets/mobile_element_references.tsv
+++ /dev/null
@@ -1,5 +0,0 @@
-type	path
-L1	./assets/grch37_g1k_l1.bed
-SVA	./assets/grch37_g1k_sva.bed
-ALU	./assets/grch37_g1k_alu.bed
-HERV	./assets/grch37_g1k_herv.bed
diff --git a/conf/test.config b/conf/test.config
index e691a70d..d43f319b 100644
--- a/conf/test.config
+++ b/conf/test.config
@@ -36,13 +36,13 @@ params {
 
     // Genome references
     fasta                = "https://raw.githubusercontent.com/nf-core/test-datasets/raredisease/reference/reference.fasta"
+    fai                  = "https://raw.githubusercontent.com/nf-core/test-datasets/raredisease/reference/reference.fasta.fai"
     genome               = 'GRCh37'
     gnomad_af            = "https://raw.githubusercontent.com/nf-core/test-datasets/raredisease/reference/gnomad_reformated.tab.gz"
     intervals_wgs        = "https://raw.githubusercontent.com/nf-core/test-datasets/raredisease/reference/target_wgs.interval_list"
     intervals_y          = "https://raw.githubusercontent.com/nf-core/test-datasets/raredisease/reference/targetY.interval_list"
     known_dbsnp          = "https://raw.githubusercontent.com/nf-core/test-datasets/raredisease/reference/dbsnp_-138-.vcf.gz"
-    // TODO: Update path for mobile element refs
-    mobile_element_references = "${projectDir}/assets/mobile_element_references.tsv"
+    mobile_element_references = "https://raw.githubusercontent.com/nf-core/test-datasets/raredisease/reference/mobile_element_references.tsv"
     ml_model             = "https://s3.amazonaws.com/sentieon-release/other/SentieonDNAscopeModel1.0.model"
     reduced_penetrance   = "https://raw.githubusercontent.com/nf-core/test-datasets/raredisease/reference/reduced_penetrance.tsv"
     score_config_mt      = "https://raw.githubusercontent.com/nf-core/test-datasets/raredisease/reference/rank_model_snv.ini"
diff --git a/conf/test_one_sample.config b/conf/test_one_sample.config
index 81a3dd12..dc99e145 100644
--- a/conf/test_one_sample.config
+++ b/conf/test_one_sample.config
@@ -36,13 +36,13 @@ params {
 
     // Genome references
     fasta                = "https://raw.githubusercontent.com/nf-core/test-datasets/raredisease/reference/reference.fasta"
+    fai                  = "https://raw.githubusercontent.com/nf-core/test-datasets/raredisease/reference/reference.fasta.fai"
     genome               = 'GRCh37'
     gnomad_af            = "https://raw.githubusercontent.com/nf-core/test-datasets/raredisease/reference/gnomad_reformated.tab.gz"
     intervals_wgs        = "https://raw.githubusercontent.com/nf-core/test-datasets/raredisease/reference/target_wgs.interval_list"
     intervals_y          = "https://raw.githubusercontent.com/nf-core/test-datasets/raredisease/reference/targetY.interval_list"
     known_dbsnp          = "https://raw.githubusercontent.com/nf-core/test-datasets/raredisease/reference/dbsnp_-138-.vcf.gz"
-    // TODO: Update path for mobile element refs
-    mobile_element_references = "${projectDir}/assets/mobile_element_references.tsv"
+    mobile_element_references = "https://raw.githubusercontent.com/nf-core/test-datasets/raredisease/reference/mobile_element_references.tsv"
     ml_model             = "https://s3.amazonaws.com/sentieon-release/other/SentieonDNAscopeModel1.0.model"
     reduced_penetrance   = "https://raw.githubusercontent.com/nf-core/test-datasets/raredisease/reference/reduced_penetrance.tsv"
     score_config_snv     = "https://raw.githubusercontent.com/nf-core/test-datasets/raredisease/reference/rank_model_snv.ini"
diff --git a/conf/test_sentieon.config b/conf/test_sentieon.config
index 64618d12..aca4b755 100644
--- a/conf/test_sentieon.config
+++ b/conf/test_sentieon.config
@@ -31,11 +31,13 @@ params {
 
     // Genome references
     fasta                = "https://raw.githubusercontent.com/nf-core/test-datasets/raredisease/reference/reference.fasta"
+    fai                  = "https://raw.githubusercontent.com/nf-core/test-datasets/raredisease/reference/reference.fasta.fai"
     genome               = 'GRCh37'
     gnomad_af            = "https://raw.githubusercontent.com/nf-core/test-datasets/raredisease/reference/gnomad_reformated.tab.gz"
     intervals_wgs        = "https://raw.githubusercontent.com/nf-core/test-datasets/raredisease/reference/target_wgs.interval_list"
     intervals_y          = "https://raw.githubusercontent.com/nf-core/test-datasets/raredisease/reference/targetY.interval_list"
     known_dbsnp          = "https://raw.githubusercontent.com/nf-core/test-datasets/raredisease/reference/dbsnp_-138-.vcf.gz"
+    mobile_element_references = "https://raw.githubusercontent.com/nf-core/test-datasets/raredisease/reference/mobile_element_references.tsv"
     ml_model             = "https://s3.amazonaws.com/sentieon-release/other/SentieonDNAscopeModel1.0.model"
     reduced_penetrance   = "https://raw.githubusercontent.com/nf-core/test-datasets/raredisease/reference/reduced_penetrance.tsv"
     score_config_snv     = "https://raw.githubusercontent.com/nf-core/test-datasets/raredisease/reference/rank_model_snv.ini"
diff --git a/main.nf b/main.nf
index 5b0bb182..0ebf4e51 100644
--- a/main.nf
+++ b/main.nf
@@ -16,7 +16,6 @@ nextflow.enable.dsl = 2
     GENOME PARAMETER VALUES
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 */
-// TODO: Are the genome config used?
 params.fasta                          = WorkflowMain.getGenomeAttribute(params, 'fasta')
 params.fai                            = WorkflowMain.getGenomeAttribute(params, 'fai')
 params.bwa                            = WorkflowMain.getGenomeAttribute(params, 'bwa')
diff --git a/subworkflows/local/call_mobile_elements.nf b/subworkflows/local/call_mobile_elements.nf
index 45432ae6..c77a11d9 100644
--- a/subworkflows/local/call_mobile_elements.nf
+++ b/subworkflows/local/call_mobile_elements.nf
@@ -3,13 +3,13 @@
 //
 
 include { BCFTOOLS_REHEADER as BCFTOOLS_REHEADER_ME  } from '../../modules/nf-core/bcftools/reheader/main'
-include { BCFTOOLS_CONCAT as BCFTOOLS_CONCAT_ME  } from '../../modules/nf-core/bcftools/concat/main'
-include { BCFTOOLS_SORT as BCFTOOLS_SORT_ME  } from '../../modules/nf-core/bcftools/sort/main'
+include { BCFTOOLS_CONCAT as BCFTOOLS_CONCAT_ME      } from '../../modules/nf-core/bcftools/concat/main'
+include { BCFTOOLS_SORT as BCFTOOLS_SORT_ME          } from '../../modules/nf-core/bcftools/sort/main'
 include { RETROSEQ_CALL as RETROSEQ_CALL             } from '../../modules/local/retroseq/call/main'
 include { RETROSEQ_DISCOVER as RETROSEQ_DISCOVER     } from '../../modules/local/retroseq/discover/main'
 include { SAMTOOLS_INDEX as ME_INDEX_SPLIT_ALIGNMENT } from '../../modules/nf-core/samtools/index/main'
 include { SAMTOOLS_VIEW as ME_SPLIT_ALIGNMENT        } from '../../modules/nf-core/samtools/view/main'
-include { TABIX_TABIX as TABIX_ME              } from '../../modules/nf-core/tabix/tabix/main'
+include { TABIX_TABIX as TABIX_ME                    } from '../../modules/nf-core/tabix/tabix/main'
 include { TABIX_TABIX as TABIX_ME_SPLIT              } from '../../modules/nf-core/tabix/tabix/main'
 include { SVDB_MERGE as SVDB_MERGE_ME                } from '../../modules/nf-core/svdb/merge/main'
 
@@ -26,17 +26,10 @@ workflow CALL_MOBILE_ELEMENTS {
     main:
         ch_versions = Channel.empty()
 
-        // Building chromosome channel depending on genome version
-        // TODO: Check how retroseq behaves when running chrY on female samples
-        // TODO: Would be nicer to read in the fai or dict
-        Channel.of(1..22, 'X', 'Y')
-            .branch { it ->
-                grch37: val_genome_build.equals('GRCh37')
-                    return [it.toString()]
-                grch38: val_genome_build.equals('GRCh38')
-                    return ['chr' + it.toString()]
-            }.set{ ch_chr_genome }
-        ch_chr_genome.grch37.mix(ch_chr_genome.grch38)
+        // Building chromosome channels based on fasta index
+        ch_genome_fai
+            .splitCsv(sep: "\t", elem: 1, limit: 25)
+            .map { meta, fai -> [ fai.first() ] }
             .set { ch_chr }
 
         // Building one bam channel per chromosome and adding interval
diff --git a/workflows/raredisease.nf b/workflows/raredisease.nf
index 0016a154..5fafd6a7 100644
--- a/workflows/raredisease.nf
+++ b/workflows/raredisease.nf
@@ -293,9 +293,7 @@ workflow RAREDISEASE {
 
     // SV caller priority
     if (params.skip_germlinecnvcaller) {
-        // TODO: Uncomment and remove
-        ch_svcaller_priority = Channel.value(["tiddit", "manta"])
-        //ch_svcaller_priority = Channel.value(["tiddit", "manta", "cnvnator"])
+        ch_svcaller_priority = Channel.value(["tiddit", "manta", "cnvnator"])
     } else {
         ch_svcaller_priority = Channel.value(["tiddit", "manta", "gcnvcaller", "cnvnator"])
     }
@@ -356,14 +354,14 @@ workflow RAREDISEASE {
     //
     // EXPANSIONHUNTER AND STRANGER
     //
-    CALL_REPEAT_EXPANSIONS (
-        ch_mapped.genome_bam_bai,
-        ch_variant_catalog,
-        ch_case_info,
-        ch_genome_fasta,
-        ch_genome_fai
-    )
-    ch_versions = ch_versions.mix(CALL_REPEAT_EXPANSIONS.out.versions)
+    //CALL_REPEAT_EXPANSIONS (
+    //    ch_mapped.genome_bam_bai,
+    //    ch_variant_catalog,
+    //    ch_case_info,
+    //    ch_genome_fasta,
+    //    ch_genome_fai
+    //)
+    //ch_versions = ch_versions.mix(CALL_REPEAT_EXPANSIONS.out.versions)
 
     //
     // SNV CALLING

From 4bbb604114b628aa1239fc41f5236b0b1fd4e5a8 Mon Sep 17 00:00:00 2001
From: jemten <jemten@users.noreply.github.com>
Date: Tue, 9 Jan 2024 16:27:43 +0100
Subject: [PATCH 16/22] update readme

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index ff227da8..ebb4b95a 100644
--- a/README.md
+++ b/README.md
@@ -97,7 +97,7 @@ On release, automated continuous integration tests run the pipeline on a full-si
 
 - [RetroSeq](https://github.com/tk2/RetroSeq)
 
-**9. Rank variants - SV and SNV:**
+**10. Rank variants - SV and SNV:**
 
 - [GENMOD](https://github.com/Clinical-Genomics/genmod)
 

From e50756fde73afd0ffdb7a84b2efd5c1cac2d21bd Mon Sep 17 00:00:00 2001
From: jemten <jemten@users.noreply.github.com>
Date: Tue, 9 Jan 2024 16:29:39 +0100
Subject: [PATCH 17/22] uncommenting lines

---
 workflows/raredisease.nf | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/workflows/raredisease.nf b/workflows/raredisease.nf
index 5fafd6a7..cb23fe7d 100644
--- a/workflows/raredisease.nf
+++ b/workflows/raredisease.nf
@@ -354,14 +354,14 @@ workflow RAREDISEASE {
     //
     // EXPANSIONHUNTER AND STRANGER
     //
-    //CALL_REPEAT_EXPANSIONS (
-    //    ch_mapped.genome_bam_bai,
-    //    ch_variant_catalog,
-    //    ch_case_info,
-    //    ch_genome_fasta,
-    //    ch_genome_fai
-    //)
-    //ch_versions = ch_versions.mix(CALL_REPEAT_EXPANSIONS.out.versions)
+    CALL_REPEAT_EXPANSIONS (
+        ch_mapped.genome_bam_bai,
+        ch_variant_catalog,
+        ch_case_info,
+        ch_genome_fasta,
+        ch_genome_fai
+    )
+    ch_versions = ch_versions.mix(CALL_REPEAT_EXPANSIONS.out.versions)
 
     //
     // SNV CALLING

From 8cacd8f99960acd6040ad0b6246a81083539ade5 Mon Sep 17 00:00:00 2001
From: jemten <jemten@users.noreply.github.com>
Date: Tue, 9 Jan 2024 16:54:40 +0100
Subject: [PATCH 18/22] update changelog

---
 CHANGELOG.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index ce1a867b..6a0e09ce 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -19,6 +19,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 - ngsbits samplegender to check sex [#453](https://github.com/nf-core/raredisease/pull/453)
 - New workflow for generating cgh files from SV vcfs for interpretation in the CytosSure interpretation software. Turned off by default [#456](https://github.com/nf-core/raredisease/pull/456/)
 - Fastp to do adapter trimming. It can be skipped using `--skip_fastp` [#457](https://github.com/nf-core/raredisease/pull/457)
+- New workflow for calling insertion of mobile elements [#440](https://github.com/nf-core/raredisease/pull/440)
 
 ### `Changed`
 

From 9c178a1a51cf5046a503fdb60f560f5371d8c79c Mon Sep 17 00:00:00 2001
From: Anders Jemt <jemten@users.noreply.github.com>
Date: Wed, 10 Jan 2024 16:51:24 +0100
Subject: [PATCH 19/22] bring back a few lines that was commented out

---
 subworkflows/local/call_structural_variants.nf | 11 +++++------
 workflows/raredisease.nf                       |  1 -
 2 files changed, 5 insertions(+), 7 deletions(-)

diff --git a/subworkflows/local/call_structural_variants.nf b/subworkflows/local/call_structural_variants.nf
index 0007e251..ad45dc29 100644
--- a/subworkflows/local/call_structural_variants.nf
+++ b/subworkflows/local/call_structural_variants.nf
@@ -53,11 +53,10 @@ workflow CALL_STRUCTURAL_VARIANTS {
             ch_versions = ch_versions.mix(CALL_SV_GERMLINECNVCALLER.out.versions)
         }
 
-        // Uncomment
-        //CALL_SV_CNVNATOR (ch_genome_bam_bai, ch_genome_fasta, ch_genome_fai, ch_case_info)
-        //    .vcf
-        //    .collect{it[1]}
-        //    .set { cnvnator_vcf }
+        CALL_SV_CNVNATOR (ch_genome_bam_bai, ch_genome_fasta, ch_genome_fai, ch_case_info)
+            .vcf
+            .collect{it[1]}
+            .set { cnvnator_vcf }
 
         CALL_SV_MT (ch_mt_bam_bai, ch_genome_fasta)
 
@@ -65,7 +64,7 @@ workflow CALL_STRUCTURAL_VARIANTS {
         if (params.skip_germlinecnvcaller) {
             tiddit_vcf
                 .combine(manta_vcf)
-               // .combine(cnvnator_vcf)
+                .combine(cnvnator_vcf)
                 .toList()
                 .set { vcf_list }
         } else {
diff --git a/workflows/raredisease.nf b/workflows/raredisease.nf
index cb23fe7d..76e37e47 100644
--- a/workflows/raredisease.nf
+++ b/workflows/raredisease.nf
@@ -242,7 +242,6 @@ workflow RAREDISEASE {
                                                                            : Channel.empty()
     ch_intervals_y              = params.intervals_y                       ? Channel.fromPath(params.intervals_y).collect()
                                                                            : Channel.empty()
-    //ch_me_references            = params.mobile_element_references         ? Channel.fromPath(params.mobile_element_references)
     ch_me_references            = params.mobile_element_references         ? Channel.fromSamplesheet("mobile_element_references")
                                                                            : Channel.empty()
     ch_ml_model                 = params.variant_caller.equals("sentieon") ? Channel.fromPath(params.ml_model).map {it -> [[id:it[0].simpleName], it]}.collect()

From 9564c642706b9260fa86ee5f8fbae963ee0e130d Mon Sep 17 00:00:00 2001
From: Anders Jemt <jemten@users.noreply.github.com>
Date: Thu, 11 Jan 2024 14:03:46 +0100
Subject: [PATCH 20/22] add meta2 and meta3 to retroseq_call meta.yml

---
 modules/local/retroseq/call/meta.yml | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/modules/local/retroseq/call/meta.yml b/modules/local/retroseq/call/meta.yml
index 98aa0c04..4c1e5b6c 100644
--- a/modules/local/retroseq/call/meta.yml
+++ b/modules/local/retroseq/call/meta.yml
@@ -31,10 +31,20 @@ input:
       type: file
       description: Index of the sorted BAM file
       pattern: "*.bam"
+  - meta2:
+      type: map
+      description: |
+        Groovy Map containing sample information
+        e.g. `[ id:'test', single_end:false ]`
   - fasta:
       type: file
       description: Reference genome in fasta format
       pattern: "*.fasta"
+  - meta3:
+      type: map
+      description: |
+        Groovy Map containing sample information
+        e.g. `[ id:'test', single_end:false ]`
   - fai:
       type: file
       description: Reference FASTA index

From 01a83137ff61268e64b85138f444361f5e9f2fc3 Mon Sep 17 00:00:00 2001
From: jemten <jemten@users.noreply.github.com>
Date: Thu, 11 Jan 2024 15:35:02 +0100
Subject: [PATCH 21/22] Document order of chr in refrence genome

---
 docs/usage.md | 19 ++++++++++---------
 1 file changed, 10 insertions(+), 9 deletions(-)

diff --git a/docs/usage.md b/docs/usage.md
index b4b500ce..8dfd0cc4 100644
--- a/docs/usage.md
+++ b/docs/usage.md
@@ -153,17 +153,18 @@ The mandatory and optional parameters for each category are tabulated below.
 
 | Mandatory                      | Optional                       |
 | ------------------------------ | ------------------------------ |
-| aligner<sup>1</sup>            | fasta_fai<sup>3</sup>          |
-| fasta                          | bwamem2<sup>3</sup>            |
-| platform                       | known_dbsnp<sup>4</sup>        |
-| mito_name/mt_fasta<sup>2</sup> | known_dbsnp_tbi<sup>4</sup>    |
-|                                | min_trimmed_length<sup>5</sup> |
+| aligner<sup>1</sup>            | fasta_fai<sup>4</sup>          |
+| fasta<sup>2</sup>              | bwamem2<sup>4</sup>            |
+| platform                       | known_dbsnp<sup>5</sup>        |
+| mito_name/mt_fasta<sup>3</sup> | known_dbsnp_tbi<sup>5</sup>    |
+|                                | min_trimmed_length<sup>6</sup> |
 
 <sup>1</sup>Default value is bwamem2, but if you have a valid license for Sentieon, you have the option to use Sentieon as well.<br />
-<sup>2</sup>f If mito_name is provided, mt_fasta can be generated by the pipeline.<br />
-<sup>3</sup>fasta_fai and bwamem2, if not provided by the user, will be generated by the pipeline when necessary.<br />
-<sup>4</sup>Used only by Sentieon.<br />
-<sup>5</sup>Default value is 40. Used only by fastp.<br />
+<sup>2</sup>Analysis set reference genome in fasta format, first 25 contigs need to be chromosome 1-22, X, Y and the mitochondria.<br />
+<sup>3</sup>f If mito_name is provided, mt_fasta can be generated by the pipeline.<br />
+<sup>4</sup>fasta_fai and bwamem2, if not provided by the user, will be generated by the pipeline when necessary.<br />
+<sup>5</sup>Used only by Sentieon.<br />
+<sup>6</sup>Default value is 40. Used only by fastp.<br />
 
 ##### 2. QC stats from the alignment files
 

From 425863d72bc9879137e72206914faccdb7a334a9 Mon Sep 17 00:00:00 2001
From: Anders Jemt <jemten@users.noreply.github.com>
Date: Mon, 15 Jan 2024 09:19:03 +0100
Subject: [PATCH 22/22] updating usage

---
 docs/usage.md | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/docs/usage.md b/docs/usage.md
index 8dfd0cc4..80130c19 100644
--- a/docs/usage.md
+++ b/docs/usage.md
@@ -155,14 +155,15 @@ The mandatory and optional parameters for each category are tabulated below.
 | ------------------------------ | ------------------------------ |
 | aligner<sup>1</sup>            | fasta_fai<sup>4</sup>          |
 | fasta<sup>2</sup>              | bwamem2<sup>4</sup>            |
-| platform                       | known_dbsnp<sup>5</sup>        |
-| mito_name/mt_fasta<sup>3</sup> | known_dbsnp_tbi<sup>5</sup>    |
+| platform                       | bwa<sup>4</sup>                |
+| mito_name/mt_fasta<sup>3</sup> | known_dbsnp<sup>5</sup>        |
+|                                | known_dbsnp_tbi<sup>5</sup>    |
 |                                | min_trimmed_length<sup>6</sup> |
 
-<sup>1</sup>Default value is bwamem2, but if you have a valid license for Sentieon, you have the option to use Sentieon as well.<br />
+<sup>1</sup>Default value is bwamem2. Other alternatives are bwa and sentieon (requires valid Sentieon license ).<br />
 <sup>2</sup>Analysis set reference genome in fasta format, first 25 contigs need to be chromosome 1-22, X, Y and the mitochondria.<br />
 <sup>3</sup>f If mito_name is provided, mt_fasta can be generated by the pipeline.<br />
-<sup>4</sup>fasta_fai and bwamem2, if not provided by the user, will be generated by the pipeline when necessary.<br />
+<sup>4</sup>fasta_fai, bwa and bwamem2, if not provided by the user, will be generated by the pipeline when necessary.<br />
 <sup>5</sup>Used only by Sentieon.<br />
 <sup>6</sup>Default value is 40. Used only by fastp.<br />