From efddf0b8f9593f535d41d533320bc4db1a30c7bb Mon Sep 17 00:00:00 2001 From: peterpru Date: Mon, 6 Nov 2023 13:03:25 +0100 Subject: [PATCH 01/22] initial commit to log progress --- modules/local/retroseq/call/placeholder.txt | 1 + modules/local/retroseq/discover/placeholder.txt | 1 + 2 files changed, 2 insertions(+) create mode 100644 modules/local/retroseq/call/placeholder.txt create mode 100644 modules/local/retroseq/discover/placeholder.txt diff --git a/modules/local/retroseq/call/placeholder.txt b/modules/local/retroseq/call/placeholder.txt new file mode 100644 index 00000000..b3a42524 --- /dev/null +++ b/modules/local/retroseq/call/placeholder.txt @@ -0,0 +1 @@ +placeholder \ No newline at end of file diff --git a/modules/local/retroseq/discover/placeholder.txt b/modules/local/retroseq/discover/placeholder.txt new file mode 100644 index 00000000..b3a42524 --- /dev/null +++ b/modules/local/retroseq/discover/placeholder.txt @@ -0,0 +1 @@ +placeholder \ No newline at end of file From 46041f37b849ac5a86c647a96109786f8ddc3c0b Mon Sep 17 00:00:00 2001 From: peterpru Date: Tue, 7 Nov 2023 10:43:00 +0100 Subject: [PATCH 02/22] add modules retroseq call and discover --- modules/local/retroseq/call/main.nf | 51 ++++++++++++++++++ modules/local/retroseq/call/meta.yml | 51 ++++++++++++++++++ modules/local/retroseq/call/placeholder.txt | 1 - modules/local/retroseq/discover/main.nf | 52 ++++++++++++++++++ modules/local/retroseq/discover/meta.yml | 54 +++++++++++++++++++ .../local/retroseq/discover/placeholder.txt | 1 - 6 files changed, 208 insertions(+), 2 deletions(-) create mode 100644 modules/local/retroseq/call/main.nf create mode 100644 modules/local/retroseq/call/meta.yml delete mode 100644 modules/local/retroseq/call/placeholder.txt create mode 100644 modules/local/retroseq/discover/main.nf create mode 100644 modules/local/retroseq/discover/meta.yml delete mode 100644 modules/local/retroseq/discover/placeholder.txt diff --git a/modules/local/retroseq/call/main.nf b/modules/local/retroseq/call/main.nf new file mode 100644 index 00000000..9e38e66c --- /dev/null +++ b/modules/local/retroseq/call/main.nf @@ -0,0 +1,51 @@ +process RETROSEQ_CALL { + tag "$meta.id" + label 'process_low' + + conda "bioconda::perl-retroseq=1.5=pl5321hdfd78af_1" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'docker://clinicalgenomics/retroseq:1.5_9d4f3b5' : 'docker://clinicalgenomics/retroseq:1.5_9d4f3b5' }" + + + input: + tuple val(meta), path(bam), path(bai) + tuple val(meta), path(fasta) + + output: + tuple val(meta), path("*.vcf"), emit: vcf + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def VERSION = "1.5" + + """ + retroseq.pl \\ + -call \\ + -bam $bam \\ + -ref $fasta \\ + -output ${prefix}.tab + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + retroseq_call: $VERSION + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def VERSION = "1.5" + """ + touch ${prefix}.tab + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + retroseq_call: $VERSION + END_VERSIONS + """ +} diff --git a/modules/local/retroseq/call/meta.yml b/modules/local/retroseq/call/meta.yml new file mode 100644 index 00000000..7427491f --- /dev/null +++ b/modules/local/retroseq/call/meta.yml @@ -0,0 +1,51 @@ +name: "retroseq_call" +description: RetroSeq is a tool for discovery and genotyping of transposable element variants (TEVs) from next-gen sequencing reads aligned to a reference genome in BAM format. +keywords: + - retroseq + - transposable elements + - genomics +tools: + - "retroseq": + description: "RetroSeq: discovery and genotyping of TEVs from reads in BAM format." + homepage: "https://github.com/tk2/RetroSeq" + documentation: "https://github.com/tk2/RetroSeq" + tool_dev_url: "https://github.com/tk2/RetroSeq" + doi: "10.1093/bioinformatics/bts697" + licence: "['GPL']" + +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'test', single_end:false ]` + - bam: + type: file + description: Sorted BAM file + pattern: "*.{bam}" + - bai: + type: file + description: Index of the sorted BAM file + pattern: "*.{bam}" + - fasta: + type: file + description: reference genome in fasta format + pattern: "*.{fasta}" + +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'test', single_end:false ]` + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - vcf: + type: file + description: Output file containing TEVs and their location in the genome. + pattern: "*.{vcf}" + +authors: + - "@peterpru" diff --git a/modules/local/retroseq/call/placeholder.txt b/modules/local/retroseq/call/placeholder.txt deleted file mode 100644 index b3a42524..00000000 --- a/modules/local/retroseq/call/placeholder.txt +++ /dev/null @@ -1 +0,0 @@ -placeholder \ No newline at end of file diff --git a/modules/local/retroseq/discover/main.nf b/modules/local/retroseq/discover/main.nf new file mode 100644 index 00000000..d71fd3fa --- /dev/null +++ b/modules/local/retroseq/discover/main.nf @@ -0,0 +1,52 @@ +process RETROSEQ_DISCOVER { + tag "$meta.id" + label 'process_low' + + conda "bioconda::perl-retroseq=1.5=pl5321hdfd78af_1" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'docker://clinicalgenomics/retroseq:1.5_9d4f3b5' : 'docker://clinicalgenomics/retroseq:1.5_9d4f3b5' }" + + + input: + tuple val(meta), path(bam), path(bai) + path(tab) + path(extra_files) + + output: + tuple val(meta), path("*.tab"), emit: tab + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def VERSION = "1.5" // WARN: Version information not provided by tool on CLI. Please update version string below when bumping container versions. + + """ + retroseq.pl \\ + -discover \\ + -bam $bam \\ + -refTEs $tab\\ + -output ${prefix}.tab + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + retroseq_discover: $VERSION + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def VERSION = "1.5" + """ + touch ${prefix}.tab + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + retroseq_discover: $VERSION + END_VERSIONS + """ +} diff --git a/modules/local/retroseq/discover/meta.yml b/modules/local/retroseq/discover/meta.yml new file mode 100644 index 00000000..a61f64c9 --- /dev/null +++ b/modules/local/retroseq/discover/meta.yml @@ -0,0 +1,54 @@ +name: "retroseq_discover" +description: RetroSeq is a tool for discovery and genotyping of transposable element variants (TEVs) from next-gen sequencing reads aligned to a reference genome in BAM format. +keywords: + - retroseq + - transposable elements + - genomics +tools: + - "retroseq": + description: "RetroSeq: discovery and genotyping of TEVs from reads in BAM format." + homepage: "https://github.com/tk2/RetroSeq" + documentation: "https://github.com/tk2/RetroSeq" + tool_dev_url: "https://github.com/tk2/RetroSeq" + doi: "10.1093/bioinformatics/bts697" + licence: "['GPL']" + +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'test', single_end:false ]` + - bam: + type: file + description: Sorted BAM file + pattern: "*.{bam}" + - bai: + type: file + description: Index of the sorted BAM file + pattern: "*.{bam}" + - tab: + type: file + description: Tab file containing a list of files, where each line is a TE type and the path to a bed file containing its coordinates in the genome. + pattern: "*.{tab}" + - extra_files: + type: file + description: Each file contains the coordinates of transposable elements of a particular type. + +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'test', single_end:false ]` + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - tab: + type: file + description: Output file containing lists of read pair names per TE type + pattern: "*.{tab}" + +authors: + - "@peterpru" diff --git a/modules/local/retroseq/discover/placeholder.txt b/modules/local/retroseq/discover/placeholder.txt deleted file mode 100644 index b3a42524..00000000 --- a/modules/local/retroseq/discover/placeholder.txt +++ /dev/null @@ -1 +0,0 @@ -placeholder \ No newline at end of file From 0bcf05c7a9aadf925585f65e690983a641ff0097 Mon Sep 17 00:00:00 2001 From: peterpru Date: Tue, 7 Nov 2023 10:51:46 +0100 Subject: [PATCH 03/22] fix linting --- modules/local/retroseq/call/main.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/local/retroseq/call/main.nf b/modules/local/retroseq/call/main.nf index 9e38e66c..0fb36553 100644 --- a/modules/local/retroseq/call/main.nf +++ b/modules/local/retroseq/call/main.nf @@ -9,7 +9,7 @@ process RETROSEQ_CALL { input: tuple val(meta), path(bam), path(bai) - tuple val(meta), path(fasta) + tuple val(meta), path(fasta) output: tuple val(meta), path("*.vcf"), emit: vcf From d803ed8b9749b2b182651d5e44e208ea84455db1 Mon Sep 17 00:00:00 2001 From: peterpru Date: Tue, 7 Nov 2023 13:26:06 +0100 Subject: [PATCH 04/22] add subworkflow --- modules/local/retroseq/call/main.nf | 1 + modules/local/retroseq/discover/meta.yml | 2 +- .../local/annotate_mobile_elements.nf | 46 +++++++++++++++++++ 3 files changed, 48 insertions(+), 1 deletion(-) create mode 100644 subworkflows/local/annotate_mobile_elements.nf diff --git a/modules/local/retroseq/call/main.nf b/modules/local/retroseq/call/main.nf index 0fb36553..569d84a2 100644 --- a/modules/local/retroseq/call/main.nf +++ b/modules/local/retroseq/call/main.nf @@ -27,6 +27,7 @@ process RETROSEQ_CALL { retroseq.pl \\ -call \\ -bam $bam \\ + -input $input -ref $fasta \\ -output ${prefix}.tab diff --git a/modules/local/retroseq/discover/meta.yml b/modules/local/retroseq/discover/meta.yml index a61f64c9..80c6af6c 100644 --- a/modules/local/retroseq/discover/meta.yml +++ b/modules/local/retroseq/discover/meta.yml @@ -33,7 +33,7 @@ input: pattern: "*.{tab}" - extra_files: type: file - description: Each file contains the coordinates of transposable elements of a particular type. + description: List of files. Each file contains the coordinates of transposable elements of a particular type. output: - meta: diff --git a/subworkflows/local/annotate_mobile_elements.nf b/subworkflows/local/annotate_mobile_elements.nf new file mode 100644 index 00000000..26648434 --- /dev/null +++ b/subworkflows/local/annotate_mobile_elements.nf @@ -0,0 +1,46 @@ +// +// A subworkflow to annotate mobile elements in the genome +// + +include { RETROSEQ_CALL as RETROSEQ_CALL } from '../../modules/local/retroseq/call/main' +include { RETROSEQ_DISCOVER as RETROSEQ_DISCOVER } from '../../modules/local/retroseq/discover/main' + +workflow ANNOTATE_STRUCTURAL_VARIANTS { + + take: + ch_bam // channel: [mandatory] [ val(meta), path(bam) ] + ch_bai // channel: [mandatory] [ val(meta), path(bai) ] + ch_bam_bai // channel: [mandatory] [ val(meta), path(bam), path(bai) ] + ch_genome_fasta // channel: [mandatory] [ val(meta), path(fasta) ] + ch_genome_fai // channel: [mandatory] [ val(meta), path(fai) ] + + main: + ch_versions = Channel.empty() + + // Running retroseq discover: identify discordant read pairs that might support a TE insertion + RETROSEQ_DISCOVER ( + ch_bam_bai, + "tab", + "extrafiles" + ) + + // Running retroseq call: clusters reads and checks on the breakpoints to decide whether a TEV is present + RETROSEQ_CALL ( + ch_bam_bai, + ch_genome_fasta, + ) + + // Run vep to annotate + + // Run svdb to query against database + + // Filter and rank as done in findtroll? E.g. protein coding + + ch_versions = ch_versions.mix(RETROSEQ_CALL.out.versions) + ch_versions = ch_versions.mix(RETROSEQ_DISCOVER.out.versions) + + emit: + vcf_ann = RETROSEQ_CALL.out.vcf // channel: [ val(meta), path(vcf) ] + versions = ch_versions // channel: [ path(versions.yml) ] +} + From c38762323363186c63959fa54c4e6181c37418c4 Mon Sep 17 00:00:00 2001 From: jemten Date: Thu, 4 Jan 2024 11:49:25 +0100 Subject: [PATCH 05/22] retroseq call subworkflow --- assets/grch37_g1k_alu.bed | 0 assets/grch37_g1k_herv.bed | 0 assets/grch37_g1k_l1.bed | 0 assets/grch37_g1k_sva.bed | 0 assets/mobile_element_references.tsv | 5 ++ assets/mobile_element_references_schema.json | 26 ++++++ conf/modules/call_mobile_elements.config | 40 +++++++++ conf/test.config | 2 + main.nf | 3 +- modules/local/retroseq/call/main.nf | 8 +- modules/local/retroseq/call/meta.yml | 6 +- modules/local/retroseq/discover/main.nf | 9 +- modules/local/retroseq/discover/meta.yml | 14 +-- nextflow.config | 2 + nextflow_schema.json | 9 ++ .../local/annotate_mobile_elements.nf | 46 ---------- subworkflows/local/call_mobile_elements.nf | 86 +++++++++++++++++++ workflows/raredisease.nf | 11 +++ 18 files changed, 206 insertions(+), 61 deletions(-) create mode 100644 assets/grch37_g1k_alu.bed create mode 100644 assets/grch37_g1k_herv.bed create mode 100644 assets/grch37_g1k_l1.bed create mode 100644 assets/grch37_g1k_sva.bed create mode 100644 assets/mobile_element_references.tsv create mode 100644 assets/mobile_element_references_schema.json create mode 100644 conf/modules/call_mobile_elements.config delete mode 100644 subworkflows/local/annotate_mobile_elements.nf create mode 100644 subworkflows/local/call_mobile_elements.nf diff --git a/assets/grch37_g1k_alu.bed b/assets/grch37_g1k_alu.bed new file mode 100644 index 00000000..e69de29b diff --git a/assets/grch37_g1k_herv.bed b/assets/grch37_g1k_herv.bed new file mode 100644 index 00000000..e69de29b diff --git a/assets/grch37_g1k_l1.bed b/assets/grch37_g1k_l1.bed new file mode 100644 index 00000000..e69de29b diff --git a/assets/grch37_g1k_sva.bed b/assets/grch37_g1k_sva.bed new file mode 100644 index 00000000..e69de29b diff --git a/assets/mobile_element_references.tsv b/assets/mobile_element_references.tsv new file mode 100644 index 00000000..328e5083 --- /dev/null +++ b/assets/mobile_element_references.tsv @@ -0,0 +1,5 @@ +type path +L1 grch37_g1k_l1.bed +SVA grch37_g1k_sva.bed +ALU grch37_g1k_alu.bed +HERV grch37_g1k_herv.bed diff --git a/assets/mobile_element_references_schema.json b/assets/mobile_element_references_schema.json new file mode 100644 index 00000000..40e79825 --- /dev/null +++ b/assets/mobile_element_references_schema.json @@ -0,0 +1,26 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema", + "$id": "https://raw.githubusercontent.com/nf-core/raredisease/master/assets/mobile_element_references_schema.json", + "title": "Schema for mobile_element_references", + "description": "Schema for the file provided with params.mobile_element_references", + "type": "array", + "items": { + "type": "object", + "properties": { + "type": { + "type": "string", + "exists": true, + "pattern": "^\\S+$", + "errorMessage": "Mobile element type must be provided and cannot contain spaces" + }, + "path": { + "type": "string", + "format": "file-path", + "exists": true, + "pattern": "^\\S+\\.bed$", + "errorMessage": "Bed file, cannot contain spaces and must have extension '.bed'" + } + }, + "required": ["type", "path"] + } +} diff --git a/conf/modules/call_mobile_elements.config b/conf/modules/call_mobile_elements.config new file mode 100644 index 00000000..b7a0b81f --- /dev/null +++ b/conf/modules/call_mobile_elements.config @@ -0,0 +1,40 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Config file for defining DSL2 per module options and publishing paths +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Available keys to override module options: + ext.args = Additional arguments appended to command in module. + ext.args2 = Second set of arguments appended to command in module (multi-tool modules). + ext.args3 = Third set of arguments appended to command in module (multi-tool modules). + ext.prefix = File name prefix for output files. + ext.when = Conditional clause +---------------------------------------------------------------------------------------- +*/ + +process { + + withName: '.*CALL_MOBILE_ELEMENTS:.*' { + publishDir = [ + enabled: false + ] + } + + withName: '.*CALL_MOBILE_ELEMENTS:ME_SPLIT_ALIGNMENT' { + ext.args = { [ + '--output-fmt bam', + '--fetch-pairs' + ].join(' ') } + ext.args2 = { "${meta.interval}" } + ext.prefix = { "${meta.id}_${meta.interval}" } + } + + withName: '.*CALL_MOBILE_ELEMENTS:RETROSEQ_DISCOVER' { + ext.prefix = { "${meta.id}_${meta.interval}_retroseq_discover" } + } + + withName: '.*CALL_MOBILE_ELEMENTS:RETROSEQ_CALL' { + ext.args = { '--soft' } + ext.prefix = { "${meta.id}_${meta.interval}_retroseq_call" } + } + +} diff --git a/conf/test.config b/conf/test.config index d398d542..e23102f8 100644 --- a/conf/test.config +++ b/conf/test.config @@ -41,6 +41,8 @@ params { intervals_wgs = "https://raw.githubusercontent.com/nf-core/test-datasets/raredisease/reference/target_wgs.interval_list" intervals_y = "https://raw.githubusercontent.com/nf-core/test-datasets/raredisease/reference/targetY.interval_list" known_dbsnp = "https://raw.githubusercontent.com/nf-core/test-datasets/raredisease/reference/dbsnp_-138-.vcf.gz" + // TODO: Update path for mobile element refs + mobile_element_references = "${projectDir}//mobile_element_references.tsv" ml_model = "https://s3.amazonaws.com/sentieon-release/other/SentieonDNAscopeModel1.0.model" reduced_penetrance = "https://raw.githubusercontent.com/nf-core/test-datasets/raredisease/reference/reduced_penetrance.tsv" score_config_mt = "https://raw.githubusercontent.com/nf-core/test-datasets/raredisease/reference/rank_model_snv.ini" diff --git a/main.nf b/main.nf index 9d24f1ac..5b0bb182 100644 --- a/main.nf +++ b/main.nf @@ -16,7 +16,7 @@ nextflow.enable.dsl = 2 GENOME PARAMETER VALUES ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ - +// TODO: Are the genome config used? params.fasta = WorkflowMain.getGenomeAttribute(params, 'fasta') params.fai = WorkflowMain.getGenomeAttribute(params, 'fai') params.bwa = WorkflowMain.getGenomeAttribute(params, 'bwa') @@ -33,6 +33,7 @@ params.intervals_wgs = WorkflowMain.getGenomeAttribute(params, params.intervals_y = WorkflowMain.getGenomeAttribute(params, 'intervals_y') params.known_dbsnp = WorkflowMain.getGenomeAttribute(params, 'known_dbsnp') params.known_dbsnp_tbi = WorkflowMain.getGenomeAttribute(params, 'known_dbsnp_tbi') +params.mobile_element_references = WorkflowMain.getGenomeAttribute(params, 'mobile_element_references') params.ml_model = WorkflowMain.getGenomeAttribute(params, 'ml_model') params.mt_fasta = WorkflowMain.getGenomeAttribute(params, 'mt_fasta') params.ngsbits_samplegender_method = WorkflowMain.getGenomeAttribute(params, 'ngsbits_samplegender_method') diff --git a/modules/local/retroseq/call/main.nf b/modules/local/retroseq/call/main.nf index 569d84a2..21207f9b 100644 --- a/modules/local/retroseq/call/main.nf +++ b/modules/local/retroseq/call/main.nf @@ -9,6 +9,7 @@ process RETROSEQ_CALL { input: tuple val(meta), path(bam), path(bai) + tuple val(meta), path(input) tuple val(meta), path(fasta) output: @@ -26,10 +27,11 @@ process RETROSEQ_CALL { """ retroseq.pl \\ -call \\ + $args \\ -bam $bam \\ - -input $input + -input $input \\ -ref $fasta \\ - -output ${prefix}.tab + -output ${prefix}.vcf cat <<-END_VERSIONS > versions.yml "${task.process}": @@ -42,7 +44,7 @@ process RETROSEQ_CALL { def prefix = task.ext.prefix ?: "${meta.id}" def VERSION = "1.5" """ - touch ${prefix}.tab + touch ${prefix}.vcf cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/local/retroseq/call/meta.yml b/modules/local/retroseq/call/meta.yml index 7427491f..8147d20c 100644 --- a/modules/local/retroseq/call/meta.yml +++ b/modules/local/retroseq/call/meta.yml @@ -29,8 +29,12 @@ input: pattern: "*.{bam}" - fasta: type: file - description: reference genome in fasta format + description: Reference genome in fasta format pattern: "*.{fasta}" + - tab: + type: file + description: Output file from running retroseq -call + pattern: "*.{tab}" output: - meta: diff --git a/modules/local/retroseq/discover/main.nf b/modules/local/retroseq/discover/main.nf index d71fd3fa..b72f10c3 100644 --- a/modules/local/retroseq/discover/main.nf +++ b/modules/local/retroseq/discover/main.nf @@ -9,8 +9,8 @@ process RETROSEQ_DISCOVER { input: tuple val(meta), path(bam), path(bai) - path(tab) - path(extra_files) + path(me_references) + val(me_types) output: tuple val(meta), path("*.tab"), emit: tab @@ -25,10 +25,12 @@ process RETROSEQ_DISCOVER { def VERSION = "1.5" // WARN: Version information not provided by tool on CLI. Please update version string below when bumping container versions. """ + paste <(printf "%s\\n" $me_types | tr -d '[],') <(printf "%s\\n" $me_references) > me_reference_manifest.tsv retroseq.pl \\ -discover \\ + $args \\ -bam $bam \\ - -refTEs $tab\\ + -refTEs me_reference_manifest.tsv\\ -output ${prefix}.tab cat <<-END_VERSIONS > versions.yml @@ -42,6 +44,7 @@ process RETROSEQ_DISCOVER { def prefix = task.ext.prefix ?: "${meta.id}" def VERSION = "1.5" """ + paste <(printf "%s\\n" $me_types | tr -d '[],') <(printf "%s\\n" $me_references) > me_reference_manifest.tsv touch ${prefix}.tab cat <<-END_VERSIONS > versions.yml diff --git a/modules/local/retroseq/discover/meta.yml b/modules/local/retroseq/discover/meta.yml index 80c6af6c..55d6de5d 100644 --- a/modules/local/retroseq/discover/meta.yml +++ b/modules/local/retroseq/discover/meta.yml @@ -26,14 +26,14 @@ input: - bai: type: file description: Index of the sorted BAM file - pattern: "*.{bam}" - - tab: - type: file - description: Tab file containing a list of files, where each line is a TE type and the path to a bed file containing its coordinates in the genome. - pattern: "*.{tab}" - - extra_files: + pattern: "*.{bai}" + - me_references: type: file - description: List of files. Each file contains the coordinates of transposable elements of a particular type. + description: Paths to bed files containing transposable element coordinates in the genome. + pattern: "*.{bed}" + - me_types: + type: list + description: List of transposable element types to discover. Needs to be in sync with me_references. output: - meta: diff --git a/nextflow.config b/nextflow.config index e65b343d..4c4a4be8 100644 --- a/nextflow.config +++ b/nextflow.config @@ -45,6 +45,7 @@ params { // File params svdb_query_bedpedbs = null svdb_query_dbs = null + mobile_element_references = null // Alignment aligner = 'bwamem2' @@ -329,6 +330,7 @@ includeConfig 'conf/modules/call_sv_germlinecnvcaller.config' includeConfig 'conf/modules/call_sv_manta.config' includeConfig 'conf/modules/call_sv_tiddit.config' includeConfig 'conf/modules/postprocess_MT_calls.config' +includeConfig 'conf/modules/call_mobile_elements.config' // Function to ensure that resource requirements don't go beyond // a maximum limit diff --git a/nextflow_schema.json b/nextflow_schema.json index 07988ec3..76f6a336 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -214,6 +214,15 @@ "help_text": "Used to extract relevant information from the references to analyse mitochondria", "fa_icon": "fas fa-align-center" }, + "mobile_element_references": { + "type": "string", + "fa_icon": "fas fa-file", + "description": "File with mobile element references", + "help_text": "Path to tsv file listing mobile element references. \nFormat: \\t", + "pattern": "^\\S+\\.tsv$", + "format": "file-path", + "schema": "assets/mobile_element_references_schema.json" + }, "ml_model": { "type": "string", "exists": true, diff --git a/subworkflows/local/annotate_mobile_elements.nf b/subworkflows/local/annotate_mobile_elements.nf deleted file mode 100644 index 26648434..00000000 --- a/subworkflows/local/annotate_mobile_elements.nf +++ /dev/null @@ -1,46 +0,0 @@ -// -// A subworkflow to annotate mobile elements in the genome -// - -include { RETROSEQ_CALL as RETROSEQ_CALL } from '../../modules/local/retroseq/call/main' -include { RETROSEQ_DISCOVER as RETROSEQ_DISCOVER } from '../../modules/local/retroseq/discover/main' - -workflow ANNOTATE_STRUCTURAL_VARIANTS { - - take: - ch_bam // channel: [mandatory] [ val(meta), path(bam) ] - ch_bai // channel: [mandatory] [ val(meta), path(bai) ] - ch_bam_bai // channel: [mandatory] [ val(meta), path(bam), path(bai) ] - ch_genome_fasta // channel: [mandatory] [ val(meta), path(fasta) ] - ch_genome_fai // channel: [mandatory] [ val(meta), path(fai) ] - - main: - ch_versions = Channel.empty() - - // Running retroseq discover: identify discordant read pairs that might support a TE insertion - RETROSEQ_DISCOVER ( - ch_bam_bai, - "tab", - "extrafiles" - ) - - // Running retroseq call: clusters reads and checks on the breakpoints to decide whether a TEV is present - RETROSEQ_CALL ( - ch_bam_bai, - ch_genome_fasta, - ) - - // Run vep to annotate - - // Run svdb to query against database - - // Filter and rank as done in findtroll? E.g. protein coding - - ch_versions = ch_versions.mix(RETROSEQ_CALL.out.versions) - ch_versions = ch_versions.mix(RETROSEQ_DISCOVER.out.versions) - - emit: - vcf_ann = RETROSEQ_CALL.out.vcf // channel: [ val(meta), path(vcf) ] - versions = ch_versions // channel: [ path(versions.yml) ] -} - diff --git a/subworkflows/local/call_mobile_elements.nf b/subworkflows/local/call_mobile_elements.nf new file mode 100644 index 00000000..32b62bac --- /dev/null +++ b/subworkflows/local/call_mobile_elements.nf @@ -0,0 +1,86 @@ +// +// A subworkflow to call mobile elements in the genome +// + +include { RETROSEQ_CALL as RETROSEQ_CALL } from '../../modules/local/retroseq/call/main' +include { RETROSEQ_DISCOVER as RETROSEQ_DISCOVER } from '../../modules/local/retroseq/discover/main' +include { SAMTOOLS_VIEW as ME_SPLIT_ALIGNMENT } from '../../modules/nf-core/samtools/view/main' +include { SAMTOOLS_INDEX as ME_INDEX_SPLIT_ALIGNMENT } from '../../modules/nf-core/samtools/index/main' + +workflow CALL_MOBILE_ELEMENTS { + + take: + ch_genome_bam_bai // channel: [mandatory] [ val(meta), path(bam), path(bai) ] + ch_genome_fasta // channel: [mandatory] [ val(meta), path(fasta) ] + ch_me_references // channel: [mandatory] [path(tsv)] + val_genome_build // string: [mandatory] GRCh37 or GRCh38 + + main: + ch_versions = Channel.empty() + + // Building chromosome channel depending on genome version + // TODO: Check how retroseq behaves when running chrY on female samples + Channel.of(1..22, 'X', 'Y') + .branch { it -> + grch37: val_genome_build.equals('GRCh37') + return [it.toString()] + grch38: val_genome_build.equals('GRCh38') + return ['chr' + it.toString()] + }.set{ ch_chr_genome } + ch_chr = ch_chr_genome.grch37.mix(ch_chr_genome.grch38) + + // Building one bam channel per chromosome and adding interval + ch_genome_bam_bai + .combine(ch_chr) + .map { + meta, bam, bai, chr -> + return [ meta + [interval:chr], bam, bai ] + } + .set { ch_genome_bam_bai_interval } + + ME_SPLIT_ALIGNMENT( + ch_genome_bam_bai_interval, + [[:], []], + [] + ) + + ME_INDEX_SPLIT_ALIGNMENT( ME_SPLIT_ALIGNMENT.out.bam ) + + ME_SPLIT_ALIGNMENT.out.bam + .join(ME_INDEX_SPLIT_ALIGNMENT.out.bai, failOnMismatch:true) + .set { ch_retroseq_input } + + ch_me_references + .multiMap { type, path -> + type: type + path: path + } + .set { ch_me_reference_split } + + RETROSEQ_DISCOVER ( + ch_retroseq_input, + ch_me_reference_split.path.collect(), + ch_me_reference_split.type.collect() + ) + + // Running retroseq call: clusters reads and checks on the breakpoints to decide whether a TEV is present + RETROSEQ_CALL ( + ch_retroseq_input, + RETROSEQ_DISCOVER.out.tab, + ch_genome_fasta + ) + + // Run vep to annotate + + // Run svdb to query against database + + // Filter and rank as done in findtroll? E.g. protein coding + + ch_versions = ch_versions.mix(ME_SPLIT_ALIGNMENT.out.versions) + ch_versions = ch_versions.mix(ME_INDEX_SPLIT_ALIGNMENT.out.versions) + ch_versions = ch_versions.mix(RETROSEQ_DISCOVER.out.versions) + ch_versions = ch_versions.mix(RETROSEQ_CALL.out.versions) + + emit: + versions = ch_versions // channel: [ path(versions.yml) ] +} diff --git a/workflows/raredisease.nf b/workflows/raredisease.nf index c4183c93..f572a651 100644 --- a/workflows/raredisease.nf +++ b/workflows/raredisease.nf @@ -133,6 +133,7 @@ include { RANK_VARIANTS as RANK_VARIANTS_MT } from '../subworkflows/local/ra include { RANK_VARIANTS as RANK_VARIANTS_SNV } from '../subworkflows/local/rank_variants' include { RANK_VARIANTS as RANK_VARIANTS_SV } from '../subworkflows/local/rank_variants' include { SCATTER_GENOME } from '../subworkflows/local/scatter_genome' +include { CALL_MOBILE_ELEMENTS } from '../subworkflows/local/call_mobile_elements' /* @@ -241,6 +242,9 @@ workflow RAREDISEASE { : Channel.empty() ch_intervals_y = params.intervals_y ? Channel.fromPath(params.intervals_y).collect() : Channel.empty() + //ch_me_references = params.mobile_element_references ? Channel.fromPath(params.mobile_element_references) + ch_me_references = params.mobile_element_references ? Channel.fromSamplesheet("mobile_element_references") + : Channel.empty() ch_ml_model = params.variant_caller.equals("sentieon") ? Channel.fromPath(params.ml_model).map {it -> [[id:it[0].simpleName], it]}.collect() : Channel.value([[:],[]]) ch_mt_intervals = ch_references.mt_intervals @@ -595,6 +599,13 @@ workflow RAREDISEASE { ch_versions = ch_versions.mix(GENS.out.versions) } + CALL_MOBILE_ELEMENTS( + ch_mapped.genome_bam_bai, + ch_genome_fasta, + ch_me_references, + params.genome + ) + // // MODULE: Pipeline reporting // From cad46c183c24594f7e2e2e50c739660022335a8f Mon Sep 17 00:00:00 2001 From: jemten Date: Thu, 4 Jan 2024 11:57:46 +0100 Subject: [PATCH 06/22] updating temporary ref_me paths --- assets/mobile_element_references.tsv | 8 ++++---- conf/test.config | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/assets/mobile_element_references.tsv b/assets/mobile_element_references.tsv index 328e5083..e5a804ff 100644 --- a/assets/mobile_element_references.tsv +++ b/assets/mobile_element_references.tsv @@ -1,5 +1,5 @@ type path -L1 grch37_g1k_l1.bed -SVA grch37_g1k_sva.bed -ALU grch37_g1k_alu.bed -HERV grch37_g1k_herv.bed +L1 ./assets/grch37_g1k_l1.bed +SVA ./assets/grch37_g1k_sva.bed +ALU ./assets/grch37_g1k_alu.bed +HERV ./assets/grch37_g1k_herv.bed diff --git a/conf/test.config b/conf/test.config index e23102f8..e691a70d 100644 --- a/conf/test.config +++ b/conf/test.config @@ -42,7 +42,7 @@ params { intervals_y = "https://raw.githubusercontent.com/nf-core/test-datasets/raredisease/reference/targetY.interval_list" known_dbsnp = "https://raw.githubusercontent.com/nf-core/test-datasets/raredisease/reference/dbsnp_-138-.vcf.gz" // TODO: Update path for mobile element refs - mobile_element_references = "${projectDir}//mobile_element_references.tsv" + mobile_element_references = "${projectDir}/assets/mobile_element_references.tsv" ml_model = "https://s3.amazonaws.com/sentieon-release/other/SentieonDNAscopeModel1.0.model" reduced_penetrance = "https://raw.githubusercontent.com/nf-core/test-datasets/raredisease/reference/reduced_penetrance.tsv" score_config_mt = "https://raw.githubusercontent.com/nf-core/test-datasets/raredisease/reference/rank_model_snv.ini" From 0f6edbdfccd6ae89140c29f7ecd666e356f074ea Mon Sep 17 00:00:00 2001 From: jemten Date: Thu, 4 Jan 2024 12:57:05 +0100 Subject: [PATCH 07/22] fixing path to retroseq docker --- modules/local/retroseq/call/main.nf | 2 +- modules/local/retroseq/discover/main.nf | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/modules/local/retroseq/call/main.nf b/modules/local/retroseq/call/main.nf index 21207f9b..33ddb1af 100644 --- a/modules/local/retroseq/call/main.nf +++ b/modules/local/retroseq/call/main.nf @@ -4,7 +4,7 @@ process RETROSEQ_CALL { conda "bioconda::perl-retroseq=1.5=pl5321hdfd78af_1" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'docker://clinicalgenomics/retroseq:1.5_9d4f3b5' : 'docker://clinicalgenomics/retroseq:1.5_9d4f3b5' }" + 'docker.io/clinicalgenomics/retroseq:1.5_9d4f3b5' : 'docker.io/clinicalgenomics/retroseq:1.5_9d4f3b5' }" input: diff --git a/modules/local/retroseq/discover/main.nf b/modules/local/retroseq/discover/main.nf index b72f10c3..cf4b5fa8 100644 --- a/modules/local/retroseq/discover/main.nf +++ b/modules/local/retroseq/discover/main.nf @@ -4,7 +4,7 @@ process RETROSEQ_DISCOVER { conda "bioconda::perl-retroseq=1.5=pl5321hdfd78af_1" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'docker://clinicalgenomics/retroseq:1.5_9d4f3b5' : 'docker://clinicalgenomics/retroseq:1.5_9d4f3b5' }" + 'docker.io/clinicalgenomics/retroseq:1.5_9d4f3b5' : 'docker.io/clinicalgenomics/retroseq:1.5_9d4f3b5' }" input: From bddd2fc7376cb972900855141c91190cbd3c8d11 Mon Sep 17 00:00:00 2001 From: jemten Date: Thu, 4 Jan 2024 13:02:49 +0100 Subject: [PATCH 08/22] propagating versions --- subworkflows/local/call_mobile_elements.nf | 10 +++++----- workflows/raredisease.nf | 1 + 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/subworkflows/local/call_mobile_elements.nf b/subworkflows/local/call_mobile_elements.nf index 32b62bac..9929fb7f 100644 --- a/subworkflows/local/call_mobile_elements.nf +++ b/subworkflows/local/call_mobile_elements.nf @@ -45,7 +45,7 @@ workflow CALL_MOBILE_ELEMENTS { ) ME_INDEX_SPLIT_ALIGNMENT( ME_SPLIT_ALIGNMENT.out.bam ) - + ME_SPLIT_ALIGNMENT.out.bam .join(ME_INDEX_SPLIT_ALIGNMENT.out.bai, failOnMismatch:true) .set { ch_retroseq_input } @@ -76,10 +76,10 @@ workflow CALL_MOBILE_ELEMENTS { // Filter and rank as done in findtroll? E.g. protein coding - ch_versions = ch_versions.mix(ME_SPLIT_ALIGNMENT.out.versions) - ch_versions = ch_versions.mix(ME_INDEX_SPLIT_ALIGNMENT.out.versions) - ch_versions = ch_versions.mix(RETROSEQ_DISCOVER.out.versions) - ch_versions = ch_versions.mix(RETROSEQ_CALL.out.versions) + ch_versions = ch_versions.mix(ME_SPLIT_ALIGNMENT.out.versions).first() + ch_versions = ch_versions.mix(ME_INDEX_SPLIT_ALIGNMENT.out.versions).first() + ch_versions = ch_versions.mix(RETROSEQ_DISCOVER.out.versions).first() + ch_versions = ch_versions.mix(RETROSEQ_CALL.out.versions).first() emit: versions = ch_versions // channel: [ path(versions.yml) ] diff --git a/workflows/raredisease.nf b/workflows/raredisease.nf index f572a651..6d833d08 100644 --- a/workflows/raredisease.nf +++ b/workflows/raredisease.nf @@ -605,6 +605,7 @@ workflow RAREDISEASE { ch_me_references, params.genome ) + ch_versions = ch_versions.mix(CALL_MOBILE_ELEMENTS.out.versions) // // MODULE: Pipeline reporting From 0c0fd91447508533530a6df090d050a7dad2c676 Mon Sep 17 00:00:00 2001 From: jemten Date: Thu, 4 Jan 2024 15:58:56 +0100 Subject: [PATCH 09/22] joining channels to keep them in sync --- modules/local/retroseq/call/main.nf | 8 ++++---- modules/local/retroseq/call/meta.yml | 18 +++++++++++------- modules/local/retroseq/discover/main.nf | 2 +- modules/local/retroseq/discover/meta.yml | 8 ++++---- subworkflows/local/call_mobile_elements.nf | 12 +++++++++--- subworkflows/local/call_structural_variants.nf | 11 ++++++----- workflows/raredisease.nf | 5 ++++- 7 files changed, 39 insertions(+), 25 deletions(-) diff --git a/modules/local/retroseq/call/main.nf b/modules/local/retroseq/call/main.nf index 33ddb1af..150af4ba 100644 --- a/modules/local/retroseq/call/main.nf +++ b/modules/local/retroseq/call/main.nf @@ -4,13 +4,13 @@ process RETROSEQ_CALL { conda "bioconda::perl-retroseq=1.5=pl5321hdfd78af_1" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'docker.io/clinicalgenomics/retroseq:1.5_9d4f3b5' : 'docker.io/clinicalgenomics/retroseq:1.5_9d4f3b5' }" + 'docker.io/clinicalgenomics/retroseq:1.5_9d4f3b5-1' : 'docker.io/clinicalgenomics/retroseq:1.5_9d4f3b5-1' }" input: - tuple val(meta), path(bam), path(bai) - tuple val(meta), path(input) - tuple val(meta), path(fasta) + tuple val(meta), path(tab), path(bam), path(bai) + tuple val(meta2), path(fasta) + tuple val(meta3), path(fai) output: tuple val(meta), path("*.vcf"), emit: vcf diff --git a/modules/local/retroseq/call/meta.yml b/modules/local/retroseq/call/meta.yml index 8147d20c..98aa0c04 100644 --- a/modules/local/retroseq/call/meta.yml +++ b/modules/local/retroseq/call/meta.yml @@ -19,22 +19,26 @@ input: description: | Groovy Map containing sample information e.g. `[ id:'test', single_end:false ]` + - tab: + type: file + description: Output file from running retroseq -call + pattern: "*.tab" - bam: type: file description: Sorted BAM file - pattern: "*.{bam}" + pattern: "*.bam" - bai: type: file description: Index of the sorted BAM file - pattern: "*.{bam}" + pattern: "*.bam" - fasta: type: file description: Reference genome in fasta format - pattern: "*.{fasta}" - - tab: + pattern: "*.fasta" + - fai: type: file - description: Output file from running retroseq -call - pattern: "*.{tab}" + description: Reference FASTA index + pattern: "*.fai" output: - meta: @@ -49,7 +53,7 @@ output: - vcf: type: file description: Output file containing TEVs and their location in the genome. - pattern: "*.{vcf}" + pattern: "*.vcf" authors: - "@peterpru" diff --git a/modules/local/retroseq/discover/main.nf b/modules/local/retroseq/discover/main.nf index cf4b5fa8..2ea51344 100644 --- a/modules/local/retroseq/discover/main.nf +++ b/modules/local/retroseq/discover/main.nf @@ -4,7 +4,7 @@ process RETROSEQ_DISCOVER { conda "bioconda::perl-retroseq=1.5=pl5321hdfd78af_1" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'docker.io/clinicalgenomics/retroseq:1.5_9d4f3b5' : 'docker.io/clinicalgenomics/retroseq:1.5_9d4f3b5' }" + 'docker.io/clinicalgenomics/retroseq:1.5_9d4f3b5-1' : 'docker.io/clinicalgenomics/retroseq:1.5_9d4f3b5-1' }" input: diff --git a/modules/local/retroseq/discover/meta.yml b/modules/local/retroseq/discover/meta.yml index 55d6de5d..9ea47408 100644 --- a/modules/local/retroseq/discover/meta.yml +++ b/modules/local/retroseq/discover/meta.yml @@ -22,15 +22,15 @@ input: - bam: type: file description: Sorted BAM file - pattern: "*.{bam}" + pattern: "*.bam" - bai: type: file description: Index of the sorted BAM file - pattern: "*.{bai}" + pattern: "*.bai" - me_references: type: file description: Paths to bed files containing transposable element coordinates in the genome. - pattern: "*.{bed}" + pattern: "*.bed" - me_types: type: list description: List of transposable element types to discover. Needs to be in sync with me_references. @@ -48,7 +48,7 @@ output: - tab: type: file description: Output file containing lists of read pair names per TE type - pattern: "*.{tab}" + pattern: "*.tab" authors: - "@peterpru" diff --git a/subworkflows/local/call_mobile_elements.nf b/subworkflows/local/call_mobile_elements.nf index 9929fb7f..e2d34752 100644 --- a/subworkflows/local/call_mobile_elements.nf +++ b/subworkflows/local/call_mobile_elements.nf @@ -12,6 +12,7 @@ workflow CALL_MOBILE_ELEMENTS { take: ch_genome_bam_bai // channel: [mandatory] [ val(meta), path(bam), path(bai) ] ch_genome_fasta // channel: [mandatory] [ val(meta), path(fasta) ] + ch_genome_fai // channel: [mandatory] [ val(meta), path(fai) ] ch_me_references // channel: [mandatory] [path(tsv)] val_genome_build // string: [mandatory] GRCh37 or GRCh38 @@ -63,13 +64,18 @@ workflow CALL_MOBILE_ELEMENTS { ch_me_reference_split.type.collect() ) + RETROSEQ_DISCOVER.out.tab + .join(ch_retroseq_input, failOnMismatch: true) + .set { ch_retroseq_call_input } + // Running retroseq call: clusters reads and checks on the breakpoints to decide whether a TEV is present RETROSEQ_CALL ( - ch_retroseq_input, - RETROSEQ_DISCOVER.out.tab, - ch_genome_fasta + ch_retroseq_call_input, + ch_genome_fasta, + ch_genome_fai ) + // Run vep to annotate // Run svdb to query against database diff --git a/subworkflows/local/call_structural_variants.nf b/subworkflows/local/call_structural_variants.nf index 13827f61..ee5c2cbc 100644 --- a/subworkflows/local/call_structural_variants.nf +++ b/subworkflows/local/call_structural_variants.nf @@ -53,10 +53,11 @@ workflow CALL_STRUCTURAL_VARIANTS { ch_versions = ch_versions.mix(CALL_SV_GERMLINECNVCALLER.out.versions) } - CALL_SV_CNVNATOR (ch_genome_bam_bai, ch_genome_fasta, ch_genome_fai, ch_case_info) - .vcf - .collect{it[1]} - .set { cnvnator_vcf } + // Uncomment + //CALL_SV_CNVNATOR (ch_genome_bam_bai, ch_genome_fasta, ch_genome_fai, ch_case_info) + // .vcf + // .collect{it[1]} + // .set { cnvnator_vcf } CALL_SV_MT (ch_mt_bam_bai, ch_genome_fasta) @@ -64,7 +65,7 @@ workflow CALL_STRUCTURAL_VARIANTS { if (params.skip_germlinecnvcaller) { tiddit_vcf .combine(manta_vcf) - .combine(cnvnator_vcf) + // .combine(cnvnator_vcf) .toList() .set { vcf_list } } else { diff --git a/workflows/raredisease.nf b/workflows/raredisease.nf index 6d833d08..f416ec4d 100644 --- a/workflows/raredisease.nf +++ b/workflows/raredisease.nf @@ -293,7 +293,9 @@ workflow RAREDISEASE { // SV caller priority if (params.skip_germlinecnvcaller) { - ch_svcaller_priority = Channel.value(["tiddit", "manta", "cnvnator"]) + // TODO: Uncomment and remove + ch_svcaller_priority = Channel.value(["tiddit", "manta"]) + //ch_svcaller_priority = Channel.value(["tiddit", "manta", "cnvnator"]) } else { ch_svcaller_priority = Channel.value(["tiddit", "manta", "gcnvcaller", "cnvnator"]) } @@ -602,6 +604,7 @@ workflow RAREDISEASE { CALL_MOBILE_ELEMENTS( ch_mapped.genome_bam_bai, ch_genome_fasta, + ch_genome_fai, ch_me_references, params.genome ) From ad9051841d39aafd4c93be0dd93c2d0a5cb24e53 Mon Sep 17 00:00:00 2001 From: jemten Date: Thu, 4 Jan 2024 17:52:23 +0100 Subject: [PATCH 10/22] fix input name --- modules/local/retroseq/call/main.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/local/retroseq/call/main.nf b/modules/local/retroseq/call/main.nf index 150af4ba..5b13c630 100644 --- a/modules/local/retroseq/call/main.nf +++ b/modules/local/retroseq/call/main.nf @@ -29,7 +29,7 @@ process RETROSEQ_CALL { -call \\ $args \\ -bam $bam \\ - -input $input \\ + -input $tab \\ -ref $fasta \\ -output ${prefix}.vcf From caa0029cc35357f3096c6218fd0c00dd6dbf231c Mon Sep 17 00:00:00 2001 From: jemten Date: Fri, 5 Jan 2024 14:37:28 +0100 Subject: [PATCH 11/22] concatenate and merge vcfs --- conf/modules/call_mobile_elements.config | 33 ++++++++++ subworkflows/local/call_mobile_elements.nf | 73 +++++++++++++++++----- workflows/raredisease.nf | 1 + 3 files changed, 93 insertions(+), 14 deletions(-) diff --git a/conf/modules/call_mobile_elements.config b/conf/modules/call_mobile_elements.config index b7a0b81f..788404a3 100644 --- a/conf/modules/call_mobile_elements.config +++ b/conf/modules/call_mobile_elements.config @@ -37,4 +37,37 @@ process { ext.prefix = { "${meta.id}_${meta.interval}_retroseq_call" } } + withName: '.*CALL_MOBILE_ELEMENTS:BCFTOOLS_REHEADER_ME' { + ext.args2 = { '--output-type v' } + ext.prefix = { "${meta.id}_${meta.interval}_retroseq_reheader" } + } + + withName: '.*CALL_MOBILE_ELEMENTS:BCFTOOLS_SORT_ME' { + ext.args = { '--output-type z' } + ext.prefix = { "${meta.id}_${meta.interval}_retroseq_sort" } + } + + withName: '.*CALL_MOBILE_ELEMENTS:BCFTOOLS_CONCAT_ME' { + ext.args = { '--output-type z --allow-overlaps' } + ext.prefix = { "${meta.id}_mobile_elements" } + } + + withName: '.*CALL_MOBILE_ELEMENTS:SVDB_MERGE_ME' { + ext.args = { '--bnd_distance 150 --overlap 0.5' } + ext.prefix = { "${meta.id}_mobile_elements" } + publishDir = [ + path: { "${params.outdir}/call_mobile_elements" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + + withName: '.*CALL_MOBILE_ELEMENTS:TABIX_ME' { + publishDir = [ + path: { "${params.outdir}/call_mobile_elements" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + } diff --git a/subworkflows/local/call_mobile_elements.nf b/subworkflows/local/call_mobile_elements.nf index e2d34752..7db198be 100644 --- a/subworkflows/local/call_mobile_elements.nf +++ b/subworkflows/local/call_mobile_elements.nf @@ -2,10 +2,16 @@ // A subworkflow to call mobile elements in the genome // +include { BCFTOOLS_REHEADER as BCFTOOLS_REHEADER_ME } from '../../modules/nf-core/bcftools/reheader/main' +include { BCFTOOLS_CONCAT as BCFTOOLS_CONCAT_ME } from '../../modules/nf-core/bcftools/concat/main' +include { BCFTOOLS_SORT as BCFTOOLS_SORT_ME } from '../../modules/nf-core/bcftools/sort/main' include { RETROSEQ_CALL as RETROSEQ_CALL } from '../../modules/local/retroseq/call/main' include { RETROSEQ_DISCOVER as RETROSEQ_DISCOVER } from '../../modules/local/retroseq/discover/main' -include { SAMTOOLS_VIEW as ME_SPLIT_ALIGNMENT } from '../../modules/nf-core/samtools/view/main' include { SAMTOOLS_INDEX as ME_INDEX_SPLIT_ALIGNMENT } from '../../modules/nf-core/samtools/index/main' +include { SAMTOOLS_VIEW as ME_SPLIT_ALIGNMENT } from '../../modules/nf-core/samtools/view/main' +include { TABIX_TABIX as TABIX_ME } from '../../modules/nf-core/tabix/tabix/main' +include { TABIX_TABIX as TABIX_ME_SPLIT } from '../../modules/nf-core/tabix/tabix/main' +include { SVDB_MERGE as SVDB_MERGE_ME } from '../../modules/nf-core/svdb/merge/main' workflow CALL_MOBILE_ELEMENTS { @@ -14,6 +20,7 @@ workflow CALL_MOBILE_ELEMENTS { ch_genome_fasta // channel: [mandatory] [ val(meta), path(fasta) ] ch_genome_fai // channel: [mandatory] [ val(meta), path(fai) ] ch_me_references // channel: [mandatory] [path(tsv)] + ch_case_info // channel: [mandatory] [ val(case_info) ] val_genome_build // string: [mandatory] GRCh37 or GRCh38 main: @@ -28,7 +35,8 @@ workflow CALL_MOBILE_ELEMENTS { grch38: val_genome_build.equals('GRCh38') return ['chr' + it.toString()] }.set{ ch_chr_genome } - ch_chr = ch_chr_genome.grch37.mix(ch_chr_genome.grch38) + ch_chr_genome.grch37.mix(ch_chr_genome.grch38) + .set { ch_chr } // Building one bam channel per chromosome and adding interval ch_genome_bam_bai @@ -39,13 +47,9 @@ workflow CALL_MOBILE_ELEMENTS { } .set { ch_genome_bam_bai_interval } - ME_SPLIT_ALIGNMENT( - ch_genome_bam_bai_interval, - [[:], []], - [] - ) - - ME_INDEX_SPLIT_ALIGNMENT( ME_SPLIT_ALIGNMENT.out.bam ) + // Split bam file on chromosome and index + ME_SPLIT_ALIGNMENT ( ch_genome_bam_bai_interval, [[:], []], [] ) + ME_INDEX_SPLIT_ALIGNMENT ( ME_SPLIT_ALIGNMENT.out.bam ) ME_SPLIT_ALIGNMENT.out.bam .join(ME_INDEX_SPLIT_ALIGNMENT.out.bai, failOnMismatch:true) @@ -68,25 +72,66 @@ workflow CALL_MOBILE_ELEMENTS { .join(ch_retroseq_input, failOnMismatch: true) .set { ch_retroseq_call_input } - // Running retroseq call: clusters reads and checks on the breakpoints to decide whether a TEV is present RETROSEQ_CALL ( ch_retroseq_call_input, ch_genome_fasta, ch_genome_fai ) + // Fix the vcf by adding header, sorting and indexing + BCFTOOLS_REHEADER_ME ( + RETROSEQ_CALL.out.vcf.map{ meta, vcf -> [ meta, vcf, [] ] }, + ch_genome_fai + ) + BCFTOOLS_SORT_ME ( BCFTOOLS_REHEADER_ME.out.vcf ) + TABIX_ME_SPLIT ( BCFTOOLS_SORT_ME.out.vcf ) + + // Concatenate the chromosme vcfs per sample + BCFTOOLS_SORT_ME.out.vcf + .map { meta, vcf -> [ meta.findAll { !(it.key in ['interval']) }, vcf ] } + .groupTuple(size: 24) + .set { ch_vcfs } + + TABIX_ME_SPLIT.out.tbi + .map { meta, tbi -> [ meta.findAll { !(it.key in ['interval']) }, tbi ] } + .groupTuple(size: 24) + .set { ch_tbis } + + ch_vcfs.join(ch_tbis) + .set { ch_vcfs_tbis} + + BCFTOOLS_CONCAT_ME ( ch_vcfs_tbis ) + + // Merge sample vcfs to a case vcf + BCFTOOLS_CONCAT_ME.out.vcf + .collect{it[1]} + .toList() + .collect() + .set { ch_vcf_list } - // Run vep to annotate + ch_case_info + .combine(ch_vcf_list) + .set { ch_svdb_merge_me_input } - // Run svdb to query against database + SVDB_MERGE_ME ( ch_svdb_merge_me_input, [] ) + TABIX_ME ( SVDB_MERGE_ME.out.vcf ) - // Filter and rank as done in findtroll? E.g. protein coding + SVDB_MERGE_ME.out.vcf + .join(TABIX_ME.out.tbi) + .set { ch_me_vcf } ch_versions = ch_versions.mix(ME_SPLIT_ALIGNMENT.out.versions).first() ch_versions = ch_versions.mix(ME_INDEX_SPLIT_ALIGNMENT.out.versions).first() ch_versions = ch_versions.mix(RETROSEQ_DISCOVER.out.versions).first() ch_versions = ch_versions.mix(RETROSEQ_CALL.out.versions).first() + ch_versions = ch_versions.mix(BCFTOOLS_REHEADER_ME.out.versions).first() + ch_versions = ch_versions.mix(BCFTOOLS_SORT_ME.out.versions).first() + ch_versions = ch_versions.mix(TABIX_ME_SPLIT.out.versions).first() + ch_versions = ch_versions.mix(BCFTOOLS_CONCAT_ME.out.versions).first() + ch_versions = ch_versions.mix(SVDB_MERGE_ME.out.versions) + ch_versions = ch_versions.mix(TABIX_ME.out.versions) emit: - versions = ch_versions // channel: [ path(versions.yml) ] + me_vcf = ch_me_vcf // channel: [ val(meta), path(vcf), path(tbi) ] + versions = ch_versions // channel: [ path(versions.yml) ] } diff --git a/workflows/raredisease.nf b/workflows/raredisease.nf index f416ec4d..0016a154 100644 --- a/workflows/raredisease.nf +++ b/workflows/raredisease.nf @@ -606,6 +606,7 @@ workflow RAREDISEASE { ch_genome_fasta, ch_genome_fai, ch_me_references, + ch_case_info, params.genome ) ch_versions = ch_versions.mix(CALL_MOBILE_ELEMENTS.out.versions) From c6c089039f28fc1ed9b108a55f8d2ba6cadb5d39 Mon Sep 17 00:00:00 2001 From: jemten Date: Mon, 8 Jan 2024 15:45:16 +0100 Subject: [PATCH 12/22] looking into joining error --- lib/CustomChannelOperators.groovy | 156 +++++++++++++++++++++ subworkflows/local/call_mobile_elements.nf | 19 ++- 2 files changed, 172 insertions(+), 3 deletions(-) create mode 100644 lib/CustomChannelOperators.groovy diff --git a/lib/CustomChannelOperators.groovy b/lib/CustomChannelOperators.groovy new file mode 100644 index 00000000..f1317a46 --- /dev/null +++ b/lib/CustomChannelOperators.groovy @@ -0,0 +1,156 @@ +// +// https://github.com/Midnighter/nextflow-utility-services/blob/1f84278c965255fb2bbe290b8d6c9f33f71da26f/lib/CustomChannelOperators.groovy +// +/** + * MIT License + * + * Copyright (c) 2022 Moritz E. Beber + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +import groovyx.gpars.dataflow.DataflowBroadcast + +/** + * Provide a collection of custom channel operators that go beyond the nextflow default. + * + * @author Moritz E. Beber + * @author Mahesh Binzer-Panchal + */ +class CustomChannelOperators { + + /** + * Join two channels by one or more keys from a map contained in each channel. + * + * The channel elements are assumed to be tuples whose size is at least two. + * Typically, the maps to join by are in the first position of the tuples. + * Please read https://www.nextflow.io/docs/latest/operator.html#join carefully. + * + * @param left The left-hand side channel in the join. + * @param right The right-hand side channel in the join. + * @param keys A list of strings providing the map keys to compare. + * @param leftBy The position of the map in the left channel. + * @param rightBy The position of the map in the right channel. + * @param joinArgs A map of keyword arguments that is passed on to the nextflow join call. + * @return The joined channels with the map in the original position of the left channel, + * followed by all elements of the right channel except for the map. + */ + static DataflowBroadcast joinOnKeys( + DataflowBroadcast left, + DataflowBroadcast right, + List keys, + Integer leftBy, + Integer rightBy, + Map joinArgs + ) { + // Extract desired keys from the left map, located at `leftBy`, and prepend them. + DataflowBroadcast newLeft = left.map { tuple -> + extractKeys(tuple, keys, leftBy) + tuple + } + + // Extract desired keys from the right map, located at `rightBy`, and prepend them. + // Also drop the map itself from the right. + DataflowBroadcast newRight = right.map { tuple -> + extractKeys(tuple, keys, rightBy) + removeMap(tuple, rightBy) + } + + // Set the positions to join on explicitly. + joinArgs.by = 0.. dropKeys(tuple, keys) } + } + + static DataflowBroadcast joinOnKeys( + Map joinArgs, + DataflowBroadcast left, + DataflowBroadcast right, + String key, + Integer leftBy = 0, + Integer rightBy = 0 + ) { + return joinOnKeys(left, right, [key], leftBy, rightBy, joinArgs) + } + + static DataflowBroadcast joinOnKeys( + DataflowBroadcast left, + DataflowBroadcast right, + String key, + Integer leftBy = 0, + Integer rightBy = 0 + ) { + return joinOnKeys(left, right, [key], leftBy, rightBy, [:]) + } + + static DataflowBroadcast joinOnKeys( + Map joinArgs, + DataflowBroadcast left, + DataflowBroadcast right, + List keys, + Integer leftBy = 0, + Integer rightBy = 0 + ) { + return joinOnKeys(left, right, keys, leftBy, rightBy, joinArgs) + } + + static DataflowBroadcast joinOnKeys( + DataflowBroadcast left, + DataflowBroadcast right, + List keys, + Integer leftBy = 0, + Integer rightBy = 0 + ) { + return joinOnKeys(left, right, keys, leftBy, rightBy, [:]) + } + + /** + * Extract values from a map at given position with given keys. + * + * @param tuple A tuple (`List`) channel element. + * @param keys A list of strings denoting keys in the map. + * @param index The position of the map in the tuple. + * @return The list of values extracted from the map. + */ + private static List extractKeys(List tuple, List keys, Integer index) { + return tuple[index].subMap(keys).values().toList() + } + + /** + * Return a new tuple without the map in it. + * + * @param tuple A tuple (`List`) channel element. + * @param index The position of the map in the tuple. + * @return A copy of the list without the map. + */ + private static List removeMap(List tuple, Integer index) { + return tuple[0.. keys) { + return tuple.drop(keys.size()) + } + +} diff --git a/subworkflows/local/call_mobile_elements.nf b/subworkflows/local/call_mobile_elements.nf index 7db198be..12e4085e 100644 --- a/subworkflows/local/call_mobile_elements.nf +++ b/subworkflows/local/call_mobile_elements.nf @@ -26,8 +26,11 @@ workflow CALL_MOBILE_ELEMENTS { main: ch_versions = Channel.empty() + // TODO: Fix one sample join error + // Building chromosome channel depending on genome version // TODO: Check how retroseq behaves when running chrY on female samples + // TODO: Would be nicer to read in the fai or dict Channel.of(1..22, 'X', 'Y') .branch { it -> grch37: val_genome_build.equals('GRCh37') @@ -51,8 +54,18 @@ workflow CALL_MOBILE_ELEMENTS { ME_SPLIT_ALIGNMENT ( ch_genome_bam_bai_interval, [[:], []], [] ) ME_INDEX_SPLIT_ALIGNMENT ( ME_SPLIT_ALIGNMENT.out.bam ) + ME_SPLIT_ALIGNMENT.out.bam.toList().dump(tag:'bam') + ME_INDEX_SPLIT_ALIGNMENT.out.bai.toList().dump(tag:'bai') + + //ch_retroseq_input = CustomChannelOperators.joinOnKeys( + // ME_SPLIT_ALIGNMENT.out.bam, + // ME_SPLIT_ALIGNMENT.out.bai, + // ["id", "interval"], 0,0, [failOnMismatch:true, failOnDuplicate:true] + //) + //ch_retroseq_input.dump(tag: 'join', pretty: true) + ME_SPLIT_ALIGNMENT.out.bam - .join(ME_INDEX_SPLIT_ALIGNMENT.out.bai, failOnMismatch:true) + .join(ME_INDEX_SPLIT_ALIGNMENT.out.bai, failOnMismatch:true, failOnDuplicate: true) .set { ch_retroseq_input } ch_me_references @@ -88,12 +101,12 @@ workflow CALL_MOBILE_ELEMENTS { // Concatenate the chromosme vcfs per sample BCFTOOLS_SORT_ME.out.vcf - .map { meta, vcf -> [ meta.findAll { !(it.key in ['interval']) }, vcf ] } + .map { meta, vcf -> [ meta - meta.subMap('interval'), vcf ] } .groupTuple(size: 24) .set { ch_vcfs } TABIX_ME_SPLIT.out.tbi - .map { meta, tbi -> [ meta.findAll { !(it.key in ['interval']) }, tbi ] } + .map { meta, tbi -> [ meta - meta.subMap('interval'), tbi ] } .groupTuple(size: 24) .set { ch_tbis } From 45cc753e086ded70fa73fda6a192f7e5861d6b62 Mon Sep 17 00:00:00 2001 From: jemten Date: Tue, 9 Jan 2024 11:00:28 +0100 Subject: [PATCH 13/22] adding missing test file --- conf/test_one_sample.config | 2 + lib/CustomChannelOperators.groovy | 156 --------------------- subworkflows/local/call_mobile_elements.nf | 23 +-- 3 files changed, 6 insertions(+), 175 deletions(-) delete mode 100644 lib/CustomChannelOperators.groovy diff --git a/conf/test_one_sample.config b/conf/test_one_sample.config index f39cb40c..81a3dd12 100644 --- a/conf/test_one_sample.config +++ b/conf/test_one_sample.config @@ -41,6 +41,8 @@ params { intervals_wgs = "https://raw.githubusercontent.com/nf-core/test-datasets/raredisease/reference/target_wgs.interval_list" intervals_y = "https://raw.githubusercontent.com/nf-core/test-datasets/raredisease/reference/targetY.interval_list" known_dbsnp = "https://raw.githubusercontent.com/nf-core/test-datasets/raredisease/reference/dbsnp_-138-.vcf.gz" + // TODO: Update path for mobile element refs + mobile_element_references = "${projectDir}/assets/mobile_element_references.tsv" ml_model = "https://s3.amazonaws.com/sentieon-release/other/SentieonDNAscopeModel1.0.model" reduced_penetrance = "https://raw.githubusercontent.com/nf-core/test-datasets/raredisease/reference/reduced_penetrance.tsv" score_config_snv = "https://raw.githubusercontent.com/nf-core/test-datasets/raredisease/reference/rank_model_snv.ini" diff --git a/lib/CustomChannelOperators.groovy b/lib/CustomChannelOperators.groovy deleted file mode 100644 index f1317a46..00000000 --- a/lib/CustomChannelOperators.groovy +++ /dev/null @@ -1,156 +0,0 @@ -// -// https://github.com/Midnighter/nextflow-utility-services/blob/1f84278c965255fb2bbe290b8d6c9f33f71da26f/lib/CustomChannelOperators.groovy -// -/** - * MIT License - * - * Copyright (c) 2022 Moritz E. Beber - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -import groovyx.gpars.dataflow.DataflowBroadcast - -/** - * Provide a collection of custom channel operators that go beyond the nextflow default. - * - * @author Moritz E. Beber - * @author Mahesh Binzer-Panchal - */ -class CustomChannelOperators { - - /** - * Join two channels by one or more keys from a map contained in each channel. - * - * The channel elements are assumed to be tuples whose size is at least two. - * Typically, the maps to join by are in the first position of the tuples. - * Please read https://www.nextflow.io/docs/latest/operator.html#join carefully. - * - * @param left The left-hand side channel in the join. - * @param right The right-hand side channel in the join. - * @param keys A list of strings providing the map keys to compare. - * @param leftBy The position of the map in the left channel. - * @param rightBy The position of the map in the right channel. - * @param joinArgs A map of keyword arguments that is passed on to the nextflow join call. - * @return The joined channels with the map in the original position of the left channel, - * followed by all elements of the right channel except for the map. - */ - static DataflowBroadcast joinOnKeys( - DataflowBroadcast left, - DataflowBroadcast right, - List keys, - Integer leftBy, - Integer rightBy, - Map joinArgs - ) { - // Extract desired keys from the left map, located at `leftBy`, and prepend them. - DataflowBroadcast newLeft = left.map { tuple -> - extractKeys(tuple, keys, leftBy) + tuple - } - - // Extract desired keys from the right map, located at `rightBy`, and prepend them. - // Also drop the map itself from the right. - DataflowBroadcast newRight = right.map { tuple -> - extractKeys(tuple, keys, rightBy) + removeMap(tuple, rightBy) - } - - // Set the positions to join on explicitly. - joinArgs.by = 0.. dropKeys(tuple, keys) } - } - - static DataflowBroadcast joinOnKeys( - Map joinArgs, - DataflowBroadcast left, - DataflowBroadcast right, - String key, - Integer leftBy = 0, - Integer rightBy = 0 - ) { - return joinOnKeys(left, right, [key], leftBy, rightBy, joinArgs) - } - - static DataflowBroadcast joinOnKeys( - DataflowBroadcast left, - DataflowBroadcast right, - String key, - Integer leftBy = 0, - Integer rightBy = 0 - ) { - return joinOnKeys(left, right, [key], leftBy, rightBy, [:]) - } - - static DataflowBroadcast joinOnKeys( - Map joinArgs, - DataflowBroadcast left, - DataflowBroadcast right, - List keys, - Integer leftBy = 0, - Integer rightBy = 0 - ) { - return joinOnKeys(left, right, keys, leftBy, rightBy, joinArgs) - } - - static DataflowBroadcast joinOnKeys( - DataflowBroadcast left, - DataflowBroadcast right, - List keys, - Integer leftBy = 0, - Integer rightBy = 0 - ) { - return joinOnKeys(left, right, keys, leftBy, rightBy, [:]) - } - - /** - * Extract values from a map at given position with given keys. - * - * @param tuple A tuple (`List`) channel element. - * @param keys A list of strings denoting keys in the map. - * @param index The position of the map in the tuple. - * @return The list of values extracted from the map. - */ - private static List extractKeys(List tuple, List keys, Integer index) { - return tuple[index].subMap(keys).values().toList() - } - - /** - * Return a new tuple without the map in it. - * - * @param tuple A tuple (`List`) channel element. - * @param index The position of the map in the tuple. - * @return A copy of the list without the map. - */ - private static List removeMap(List tuple, Integer index) { - return tuple[0.. keys) { - return tuple.drop(keys.size()) - } - -} diff --git a/subworkflows/local/call_mobile_elements.nf b/subworkflows/local/call_mobile_elements.nf index 12e4085e..45432ae6 100644 --- a/subworkflows/local/call_mobile_elements.nf +++ b/subworkflows/local/call_mobile_elements.nf @@ -26,8 +26,6 @@ workflow CALL_MOBILE_ELEMENTS { main: ch_versions = Channel.empty() - // TODO: Fix one sample join error - // Building chromosome channel depending on genome version // TODO: Check how retroseq behaves when running chrY on female samples // TODO: Would be nicer to read in the fai or dict @@ -54,16 +52,6 @@ workflow CALL_MOBILE_ELEMENTS { ME_SPLIT_ALIGNMENT ( ch_genome_bam_bai_interval, [[:], []], [] ) ME_INDEX_SPLIT_ALIGNMENT ( ME_SPLIT_ALIGNMENT.out.bam ) - ME_SPLIT_ALIGNMENT.out.bam.toList().dump(tag:'bam') - ME_INDEX_SPLIT_ALIGNMENT.out.bai.toList().dump(tag:'bai') - - //ch_retroseq_input = CustomChannelOperators.joinOnKeys( - // ME_SPLIT_ALIGNMENT.out.bam, - // ME_SPLIT_ALIGNMENT.out.bai, - // ["id", "interval"], 0,0, [failOnMismatch:true, failOnDuplicate:true] - //) - //ch_retroseq_input.dump(tag: 'join', pretty: true) - ME_SPLIT_ALIGNMENT.out.bam .join(ME_INDEX_SPLIT_ALIGNMENT.out.bai, failOnMismatch:true, failOnDuplicate: true) .set { ch_retroseq_input } @@ -99,7 +87,7 @@ workflow CALL_MOBILE_ELEMENTS { BCFTOOLS_SORT_ME ( BCFTOOLS_REHEADER_ME.out.vcf ) TABIX_ME_SPLIT ( BCFTOOLS_SORT_ME.out.vcf ) - // Concatenate the chromosme vcfs per sample + // Concatenate the chromosome vcfs to sample vcfs BCFTOOLS_SORT_ME.out.vcf .map { meta, vcf -> [ meta - meta.subMap('interval'), vcf ] } .groupTuple(size: 24) @@ -129,10 +117,6 @@ workflow CALL_MOBILE_ELEMENTS { SVDB_MERGE_ME ( ch_svdb_merge_me_input, [] ) TABIX_ME ( SVDB_MERGE_ME.out.vcf ) - SVDB_MERGE_ME.out.vcf - .join(TABIX_ME.out.tbi) - .set { ch_me_vcf } - ch_versions = ch_versions.mix(ME_SPLIT_ALIGNMENT.out.versions).first() ch_versions = ch_versions.mix(ME_INDEX_SPLIT_ALIGNMENT.out.versions).first() ch_versions = ch_versions.mix(RETROSEQ_DISCOVER.out.versions).first() @@ -145,6 +129,7 @@ workflow CALL_MOBILE_ELEMENTS { ch_versions = ch_versions.mix(TABIX_ME.out.versions) emit: - me_vcf = ch_me_vcf // channel: [ val(meta), path(vcf), path(tbi) ] - versions = ch_versions // channel: [ path(versions.yml) ] + vcf = SVDB_MERGE_ME.out.vcf // channel: [ val(meta), path(vcf) ] + tbi = TABIX_ME.out.tbi // channel: [ val(meta), path(tbi) ] + versions = ch_versions // channel: [ path(versions.yml) ] } From 08b7203e31d9b81833d38fdbaf5bbac89c173f04 Mon Sep 17 00:00:00 2001 From: jemten Date: Tue, 9 Jan 2024 11:06:28 +0100 Subject: [PATCH 14/22] fix config indentation --- conf/modules/call_mobile_elements.config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/conf/modules/call_mobile_elements.config b/conf/modules/call_mobile_elements.config index 788404a3..fabc6b94 100644 --- a/conf/modules/call_mobile_elements.config +++ b/conf/modules/call_mobile_elements.config @@ -62,7 +62,7 @@ process { ] } - withName: '.*CALL_MOBILE_ELEMENTS:TABIX_ME' { + withName: '.*CALL_MOBILE_ELEMENTS:TABIX_ME' { publishDir = [ path: { "${params.outdir}/call_mobile_elements" }, mode: params.publish_dir_mode, From 3be46420032c2018ee47842db4035e2797e908fe Mon Sep 17 00:00:00 2001 From: jemten Date: Tue, 9 Jan 2024 16:23:01 +0100 Subject: [PATCH 15/22] fixing around with test data --- CITATIONS.md | 4 ++++ README.md | 4 ++++ assets/grch37_g1k_alu.bed | 0 assets/grch37_g1k_herv.bed | 0 assets/grch37_g1k_l1.bed | 0 assets/grch37_g1k_sva.bed | 0 assets/mobile_element_references.tsv | 5 ----- conf/test.config | 4 ++-- conf/test_one_sample.config | 4 ++-- conf/test_sentieon.config | 2 ++ main.nf | 1 - subworkflows/local/call_mobile_elements.nf | 21 +++++++-------------- workflows/raredisease.nf | 20 +++++++++----------- 13 files changed, 30 insertions(+), 35 deletions(-) delete mode 100644 assets/grch37_g1k_alu.bed delete mode 100644 assets/grch37_g1k_herv.bed delete mode 100644 assets/grch37_g1k_l1.bed delete mode 100644 assets/grch37_g1k_sva.bed delete mode 100644 assets/mobile_element_references.tsv diff --git a/CITATIONS.md b/CITATIONS.md index b20f58ac..f9973ae3 100644 --- a/CITATIONS.md +++ b/CITATIONS.md @@ -100,6 +100,10 @@ > Okonechnikov K, Conesa A, GarcĂ­a-Alcalde F. Qualimap 2: advanced multi-sample quality control for high-throughput sequencing data. Bioinformatics. 2016;32(2):292-294. doi:10.1093/bioinformatics/btv566 +- [RetroSeq](https://academic.oup.com/bioinformatics/article/29/3/389/257479) + + > Thomas M. Keane, Kim Wong, David J. Adams, RetroSeq: transposable element discovery from next-generation sequencing data. Bioinformatics.2013 Feb 1;29(3):389-90. doi: 10.1093/bioinformatics/bts697 + - [rhocall](https://github.com/dnil/rhocall) - [Sentieon DNAscope](https://www.biorxiv.org/content/10.1101/2022.05.20.492556v1.abstract) diff --git a/README.md b/README.md index 3c36427a..ff227da8 100644 --- a/README.md +++ b/README.md @@ -93,6 +93,10 @@ On release, automated continuous integration tests run the pipeline on a full-si - [Expansion Hunter](https://github.com/Illumina/ExpansionHunter) - [Stranger](https://github.com/Clinical-Genomics/stranger) +**9. Variant calling - mobile elements:** + +- [RetroSeq](https://github.com/tk2/RetroSeq) + **9. Rank variants - SV and SNV:** - [GENMOD](https://github.com/Clinical-Genomics/genmod) diff --git a/assets/grch37_g1k_alu.bed b/assets/grch37_g1k_alu.bed deleted file mode 100644 index e69de29b..00000000 diff --git a/assets/grch37_g1k_herv.bed b/assets/grch37_g1k_herv.bed deleted file mode 100644 index e69de29b..00000000 diff --git a/assets/grch37_g1k_l1.bed b/assets/grch37_g1k_l1.bed deleted file mode 100644 index e69de29b..00000000 diff --git a/assets/grch37_g1k_sva.bed b/assets/grch37_g1k_sva.bed deleted file mode 100644 index e69de29b..00000000 diff --git a/assets/mobile_element_references.tsv b/assets/mobile_element_references.tsv deleted file mode 100644 index e5a804ff..00000000 --- a/assets/mobile_element_references.tsv +++ /dev/null @@ -1,5 +0,0 @@ -type path -L1 ./assets/grch37_g1k_l1.bed -SVA ./assets/grch37_g1k_sva.bed -ALU ./assets/grch37_g1k_alu.bed -HERV ./assets/grch37_g1k_herv.bed diff --git a/conf/test.config b/conf/test.config index e691a70d..d43f319b 100644 --- a/conf/test.config +++ b/conf/test.config @@ -36,13 +36,13 @@ params { // Genome references fasta = "https://raw.githubusercontent.com/nf-core/test-datasets/raredisease/reference/reference.fasta" + fai = "https://raw.githubusercontent.com/nf-core/test-datasets/raredisease/reference/reference.fasta.fai" genome = 'GRCh37' gnomad_af = "https://raw.githubusercontent.com/nf-core/test-datasets/raredisease/reference/gnomad_reformated.tab.gz" intervals_wgs = "https://raw.githubusercontent.com/nf-core/test-datasets/raredisease/reference/target_wgs.interval_list" intervals_y = "https://raw.githubusercontent.com/nf-core/test-datasets/raredisease/reference/targetY.interval_list" known_dbsnp = "https://raw.githubusercontent.com/nf-core/test-datasets/raredisease/reference/dbsnp_-138-.vcf.gz" - // TODO: Update path for mobile element refs - mobile_element_references = "${projectDir}/assets/mobile_element_references.tsv" + mobile_element_references = "https://raw.githubusercontent.com/nf-core/test-datasets/raredisease/reference/mobile_element_references.tsv" ml_model = "https://s3.amazonaws.com/sentieon-release/other/SentieonDNAscopeModel1.0.model" reduced_penetrance = "https://raw.githubusercontent.com/nf-core/test-datasets/raredisease/reference/reduced_penetrance.tsv" score_config_mt = "https://raw.githubusercontent.com/nf-core/test-datasets/raredisease/reference/rank_model_snv.ini" diff --git a/conf/test_one_sample.config b/conf/test_one_sample.config index 81a3dd12..dc99e145 100644 --- a/conf/test_one_sample.config +++ b/conf/test_one_sample.config @@ -36,13 +36,13 @@ params { // Genome references fasta = "https://raw.githubusercontent.com/nf-core/test-datasets/raredisease/reference/reference.fasta" + fai = "https://raw.githubusercontent.com/nf-core/test-datasets/raredisease/reference/reference.fasta.fai" genome = 'GRCh37' gnomad_af = "https://raw.githubusercontent.com/nf-core/test-datasets/raredisease/reference/gnomad_reformated.tab.gz" intervals_wgs = "https://raw.githubusercontent.com/nf-core/test-datasets/raredisease/reference/target_wgs.interval_list" intervals_y = "https://raw.githubusercontent.com/nf-core/test-datasets/raredisease/reference/targetY.interval_list" known_dbsnp = "https://raw.githubusercontent.com/nf-core/test-datasets/raredisease/reference/dbsnp_-138-.vcf.gz" - // TODO: Update path for mobile element refs - mobile_element_references = "${projectDir}/assets/mobile_element_references.tsv" + mobile_element_references = "https://raw.githubusercontent.com/nf-core/test-datasets/raredisease/reference/mobile_element_references.tsv" ml_model = "https://s3.amazonaws.com/sentieon-release/other/SentieonDNAscopeModel1.0.model" reduced_penetrance = "https://raw.githubusercontent.com/nf-core/test-datasets/raredisease/reference/reduced_penetrance.tsv" score_config_snv = "https://raw.githubusercontent.com/nf-core/test-datasets/raredisease/reference/rank_model_snv.ini" diff --git a/conf/test_sentieon.config b/conf/test_sentieon.config index 64618d12..aca4b755 100644 --- a/conf/test_sentieon.config +++ b/conf/test_sentieon.config @@ -31,11 +31,13 @@ params { // Genome references fasta = "https://raw.githubusercontent.com/nf-core/test-datasets/raredisease/reference/reference.fasta" + fai = "https://raw.githubusercontent.com/nf-core/test-datasets/raredisease/reference/reference.fasta.fai" genome = 'GRCh37' gnomad_af = "https://raw.githubusercontent.com/nf-core/test-datasets/raredisease/reference/gnomad_reformated.tab.gz" intervals_wgs = "https://raw.githubusercontent.com/nf-core/test-datasets/raredisease/reference/target_wgs.interval_list" intervals_y = "https://raw.githubusercontent.com/nf-core/test-datasets/raredisease/reference/targetY.interval_list" known_dbsnp = "https://raw.githubusercontent.com/nf-core/test-datasets/raredisease/reference/dbsnp_-138-.vcf.gz" + mobile_element_references = "https://raw.githubusercontent.com/nf-core/test-datasets/raredisease/reference/mobile_element_references.tsv" ml_model = "https://s3.amazonaws.com/sentieon-release/other/SentieonDNAscopeModel1.0.model" reduced_penetrance = "https://raw.githubusercontent.com/nf-core/test-datasets/raredisease/reference/reduced_penetrance.tsv" score_config_snv = "https://raw.githubusercontent.com/nf-core/test-datasets/raredisease/reference/rank_model_snv.ini" diff --git a/main.nf b/main.nf index 5b0bb182..0ebf4e51 100644 --- a/main.nf +++ b/main.nf @@ -16,7 +16,6 @@ nextflow.enable.dsl = 2 GENOME PARAMETER VALUES ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -// TODO: Are the genome config used? params.fasta = WorkflowMain.getGenomeAttribute(params, 'fasta') params.fai = WorkflowMain.getGenomeAttribute(params, 'fai') params.bwa = WorkflowMain.getGenomeAttribute(params, 'bwa') diff --git a/subworkflows/local/call_mobile_elements.nf b/subworkflows/local/call_mobile_elements.nf index 45432ae6..c77a11d9 100644 --- a/subworkflows/local/call_mobile_elements.nf +++ b/subworkflows/local/call_mobile_elements.nf @@ -3,13 +3,13 @@ // include { BCFTOOLS_REHEADER as BCFTOOLS_REHEADER_ME } from '../../modules/nf-core/bcftools/reheader/main' -include { BCFTOOLS_CONCAT as BCFTOOLS_CONCAT_ME } from '../../modules/nf-core/bcftools/concat/main' -include { BCFTOOLS_SORT as BCFTOOLS_SORT_ME } from '../../modules/nf-core/bcftools/sort/main' +include { BCFTOOLS_CONCAT as BCFTOOLS_CONCAT_ME } from '../../modules/nf-core/bcftools/concat/main' +include { BCFTOOLS_SORT as BCFTOOLS_SORT_ME } from '../../modules/nf-core/bcftools/sort/main' include { RETROSEQ_CALL as RETROSEQ_CALL } from '../../modules/local/retroseq/call/main' include { RETROSEQ_DISCOVER as RETROSEQ_DISCOVER } from '../../modules/local/retroseq/discover/main' include { SAMTOOLS_INDEX as ME_INDEX_SPLIT_ALIGNMENT } from '../../modules/nf-core/samtools/index/main' include { SAMTOOLS_VIEW as ME_SPLIT_ALIGNMENT } from '../../modules/nf-core/samtools/view/main' -include { TABIX_TABIX as TABIX_ME } from '../../modules/nf-core/tabix/tabix/main' +include { TABIX_TABIX as TABIX_ME } from '../../modules/nf-core/tabix/tabix/main' include { TABIX_TABIX as TABIX_ME_SPLIT } from '../../modules/nf-core/tabix/tabix/main' include { SVDB_MERGE as SVDB_MERGE_ME } from '../../modules/nf-core/svdb/merge/main' @@ -26,17 +26,10 @@ workflow CALL_MOBILE_ELEMENTS { main: ch_versions = Channel.empty() - // Building chromosome channel depending on genome version - // TODO: Check how retroseq behaves when running chrY on female samples - // TODO: Would be nicer to read in the fai or dict - Channel.of(1..22, 'X', 'Y') - .branch { it -> - grch37: val_genome_build.equals('GRCh37') - return [it.toString()] - grch38: val_genome_build.equals('GRCh38') - return ['chr' + it.toString()] - }.set{ ch_chr_genome } - ch_chr_genome.grch37.mix(ch_chr_genome.grch38) + // Building chromosome channels based on fasta index + ch_genome_fai + .splitCsv(sep: "\t", elem: 1, limit: 25) + .map { meta, fai -> [ fai.first() ] } .set { ch_chr } // Building one bam channel per chromosome and adding interval diff --git a/workflows/raredisease.nf b/workflows/raredisease.nf index 0016a154..5fafd6a7 100644 --- a/workflows/raredisease.nf +++ b/workflows/raredisease.nf @@ -293,9 +293,7 @@ workflow RAREDISEASE { // SV caller priority if (params.skip_germlinecnvcaller) { - // TODO: Uncomment and remove - ch_svcaller_priority = Channel.value(["tiddit", "manta"]) - //ch_svcaller_priority = Channel.value(["tiddit", "manta", "cnvnator"]) + ch_svcaller_priority = Channel.value(["tiddit", "manta", "cnvnator"]) } else { ch_svcaller_priority = Channel.value(["tiddit", "manta", "gcnvcaller", "cnvnator"]) } @@ -356,14 +354,14 @@ workflow RAREDISEASE { // // EXPANSIONHUNTER AND STRANGER // - CALL_REPEAT_EXPANSIONS ( - ch_mapped.genome_bam_bai, - ch_variant_catalog, - ch_case_info, - ch_genome_fasta, - ch_genome_fai - ) - ch_versions = ch_versions.mix(CALL_REPEAT_EXPANSIONS.out.versions) + //CALL_REPEAT_EXPANSIONS ( + // ch_mapped.genome_bam_bai, + // ch_variant_catalog, + // ch_case_info, + // ch_genome_fasta, + // ch_genome_fai + //) + //ch_versions = ch_versions.mix(CALL_REPEAT_EXPANSIONS.out.versions) // // SNV CALLING From 4bbb604114b628aa1239fc41f5236b0b1fd4e5a8 Mon Sep 17 00:00:00 2001 From: jemten Date: Tue, 9 Jan 2024 16:27:43 +0100 Subject: [PATCH 16/22] update readme --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index ff227da8..ebb4b95a 100644 --- a/README.md +++ b/README.md @@ -97,7 +97,7 @@ On release, automated continuous integration tests run the pipeline on a full-si - [RetroSeq](https://github.com/tk2/RetroSeq) -**9. Rank variants - SV and SNV:** +**10. Rank variants - SV and SNV:** - [GENMOD](https://github.com/Clinical-Genomics/genmod) From e50756fde73afd0ffdb7a84b2efd5c1cac2d21bd Mon Sep 17 00:00:00 2001 From: jemten Date: Tue, 9 Jan 2024 16:29:39 +0100 Subject: [PATCH 17/22] uncommenting lines --- workflows/raredisease.nf | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/workflows/raredisease.nf b/workflows/raredisease.nf index 5fafd6a7..cb23fe7d 100644 --- a/workflows/raredisease.nf +++ b/workflows/raredisease.nf @@ -354,14 +354,14 @@ workflow RAREDISEASE { // // EXPANSIONHUNTER AND STRANGER // - //CALL_REPEAT_EXPANSIONS ( - // ch_mapped.genome_bam_bai, - // ch_variant_catalog, - // ch_case_info, - // ch_genome_fasta, - // ch_genome_fai - //) - //ch_versions = ch_versions.mix(CALL_REPEAT_EXPANSIONS.out.versions) + CALL_REPEAT_EXPANSIONS ( + ch_mapped.genome_bam_bai, + ch_variant_catalog, + ch_case_info, + ch_genome_fasta, + ch_genome_fai + ) + ch_versions = ch_versions.mix(CALL_REPEAT_EXPANSIONS.out.versions) // // SNV CALLING From 8cacd8f99960acd6040ad0b6246a81083539ade5 Mon Sep 17 00:00:00 2001 From: jemten Date: Tue, 9 Jan 2024 16:54:40 +0100 Subject: [PATCH 18/22] update changelog --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index ce1a867b..6a0e09ce 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -19,6 +19,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - ngsbits samplegender to check sex [#453](https://github.com/nf-core/raredisease/pull/453) - New workflow for generating cgh files from SV vcfs for interpretation in the CytosSure interpretation software. Turned off by default [#456](https://github.com/nf-core/raredisease/pull/456/) - Fastp to do adapter trimming. It can be skipped using `--skip_fastp` [#457](https://github.com/nf-core/raredisease/pull/457) +- New workflow for calling insertion of mobile elements [#440](https://github.com/nf-core/raredisease/pull/440) ### `Changed` From 9c178a1a51cf5046a503fdb60f560f5371d8c79c Mon Sep 17 00:00:00 2001 From: Anders Jemt Date: Wed, 10 Jan 2024 16:51:24 +0100 Subject: [PATCH 19/22] bring back a few lines that was commented out --- subworkflows/local/call_structural_variants.nf | 11 +++++------ workflows/raredisease.nf | 1 - 2 files changed, 5 insertions(+), 7 deletions(-) diff --git a/subworkflows/local/call_structural_variants.nf b/subworkflows/local/call_structural_variants.nf index 0007e251..ad45dc29 100644 --- a/subworkflows/local/call_structural_variants.nf +++ b/subworkflows/local/call_structural_variants.nf @@ -53,11 +53,10 @@ workflow CALL_STRUCTURAL_VARIANTS { ch_versions = ch_versions.mix(CALL_SV_GERMLINECNVCALLER.out.versions) } - // Uncomment - //CALL_SV_CNVNATOR (ch_genome_bam_bai, ch_genome_fasta, ch_genome_fai, ch_case_info) - // .vcf - // .collect{it[1]} - // .set { cnvnator_vcf } + CALL_SV_CNVNATOR (ch_genome_bam_bai, ch_genome_fasta, ch_genome_fai, ch_case_info) + .vcf + .collect{it[1]} + .set { cnvnator_vcf } CALL_SV_MT (ch_mt_bam_bai, ch_genome_fasta) @@ -65,7 +64,7 @@ workflow CALL_STRUCTURAL_VARIANTS { if (params.skip_germlinecnvcaller) { tiddit_vcf .combine(manta_vcf) - // .combine(cnvnator_vcf) + .combine(cnvnator_vcf) .toList() .set { vcf_list } } else { diff --git a/workflows/raredisease.nf b/workflows/raredisease.nf index cb23fe7d..76e37e47 100644 --- a/workflows/raredisease.nf +++ b/workflows/raredisease.nf @@ -242,7 +242,6 @@ workflow RAREDISEASE { : Channel.empty() ch_intervals_y = params.intervals_y ? Channel.fromPath(params.intervals_y).collect() : Channel.empty() - //ch_me_references = params.mobile_element_references ? Channel.fromPath(params.mobile_element_references) ch_me_references = params.mobile_element_references ? Channel.fromSamplesheet("mobile_element_references") : Channel.empty() ch_ml_model = params.variant_caller.equals("sentieon") ? Channel.fromPath(params.ml_model).map {it -> [[id:it[0].simpleName], it]}.collect() From 9564c642706b9260fa86ee5f8fbae963ee0e130d Mon Sep 17 00:00:00 2001 From: Anders Jemt Date: Thu, 11 Jan 2024 14:03:46 +0100 Subject: [PATCH 20/22] add meta2 and meta3 to retroseq_call meta.yml --- modules/local/retroseq/call/meta.yml | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/modules/local/retroseq/call/meta.yml b/modules/local/retroseq/call/meta.yml index 98aa0c04..4c1e5b6c 100644 --- a/modules/local/retroseq/call/meta.yml +++ b/modules/local/retroseq/call/meta.yml @@ -31,10 +31,20 @@ input: type: file description: Index of the sorted BAM file pattern: "*.bam" + - meta2: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'test', single_end:false ]` - fasta: type: file description: Reference genome in fasta format pattern: "*.fasta" + - meta3: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'test', single_end:false ]` - fai: type: file description: Reference FASTA index From 01a83137ff61268e64b85138f444361f5e9f2fc3 Mon Sep 17 00:00:00 2001 From: jemten Date: Thu, 11 Jan 2024 15:35:02 +0100 Subject: [PATCH 21/22] Document order of chr in refrence genome --- docs/usage.md | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/docs/usage.md b/docs/usage.md index b4b500ce..8dfd0cc4 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -153,17 +153,18 @@ The mandatory and optional parameters for each category are tabulated below. | Mandatory | Optional | | ------------------------------ | ------------------------------ | -| aligner1 | fasta_fai3 | -| fasta | bwamem23 | -| platform | known_dbsnp4 | -| mito_name/mt_fasta2 | known_dbsnp_tbi4 | -| | min_trimmed_length5 | +| aligner1 | fasta_fai4 | +| fasta2 | bwamem24 | +| platform | known_dbsnp5 | +| mito_name/mt_fasta3 | known_dbsnp_tbi5 | +| | min_trimmed_length6 | 1Default value is bwamem2, but if you have a valid license for Sentieon, you have the option to use Sentieon as well.
-2f If mito_name is provided, mt_fasta can be generated by the pipeline.
-3fasta_fai and bwamem2, if not provided by the user, will be generated by the pipeline when necessary.
-4Used only by Sentieon.
-5Default value is 40. Used only by fastp.
+2Analysis set reference genome in fasta format, first 25 contigs need to be chromosome 1-22, X, Y and the mitochondria.
+3f If mito_name is provided, mt_fasta can be generated by the pipeline.
+4fasta_fai and bwamem2, if not provided by the user, will be generated by the pipeline when necessary.
+5Used only by Sentieon.
+6Default value is 40. Used only by fastp.
##### 2. QC stats from the alignment files From 425863d72bc9879137e72206914faccdb7a334a9 Mon Sep 17 00:00:00 2001 From: Anders Jemt Date: Mon, 15 Jan 2024 09:19:03 +0100 Subject: [PATCH 22/22] updating usage --- docs/usage.md | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/docs/usage.md b/docs/usage.md index 8dfd0cc4..80130c19 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -155,14 +155,15 @@ The mandatory and optional parameters for each category are tabulated below. | ------------------------------ | ------------------------------ | | aligner1 | fasta_fai4 | | fasta2 | bwamem24 | -| platform | known_dbsnp5 | -| mito_name/mt_fasta3 | known_dbsnp_tbi5 | +| platform | bwa4 | +| mito_name/mt_fasta3 | known_dbsnp5 | +| | known_dbsnp_tbi5 | | | min_trimmed_length6 | -1Default value is bwamem2, but if you have a valid license for Sentieon, you have the option to use Sentieon as well.
+1Default value is bwamem2. Other alternatives are bwa and sentieon (requires valid Sentieon license ).
2Analysis set reference genome in fasta format, first 25 contigs need to be chromosome 1-22, X, Y and the mitochondria.
3f If mito_name is provided, mt_fasta can be generated by the pipeline.
-4fasta_fai and bwamem2, if not provided by the user, will be generated by the pipeline when necessary.
+4fasta_fai, bwa and bwamem2, if not provided by the user, will be generated by the pipeline when necessary.
5Used only by Sentieon.
6Default value is 40. Used only by fastp.