Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 7 additions & 3 deletions modules/nf-core/beagle5/beagle/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,9 @@ process BEAGLE5_BEAGLE {
'biocontainers/beagle:5.5_27Feb25.75f--hdfd78af_0' }"

input:
tuple val(meta), path(vcf), path(vcf_index), path(refpanel), path(refpanel_index), path(genmap), path(exclsamples), path(exclmarkers)
// Including `val(region)` to prevent errors with multi-chromosome VCFs and single-chromosome reference panels.
// This enhances clarity and simplifies implementation in the subworkflow.
tuple val(meta), path(vcf), path(vcf_index), path(refpanel), path(refpanel_index), path(genmap), path(exclsamples), path(exclmarkers), val(region)

output:
tuple val(meta), path("*.vcf.gz"), emit: vcf
Expand All @@ -22,7 +24,8 @@ process BEAGLE5_BEAGLE {
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}.bglout"
def ref_command = refpanel ? "ref=$refpanel" : ""
def map_command = genmap ? "map=$genmap" : ""
def map_command = genmap ? "map=$genmap" : ""
def region_cmd = region ? "chrom=$region" : ""
def excludesamples_command = exclsamples ? "excludesamples=$exclsamples" : ""
def excludemarkers_command = exclmarkers ? "excludemarkers=$exclmarkers" : ""

Expand All @@ -40,8 +43,9 @@ process BEAGLE5_BEAGLE {
$args \\
${ref_command} \\
${map_command} \\
${region_cmd} \\
${excludesamples_command} \\
${excludemarkers_command} \\
${excludemarkers_command}

cat <<-END_VERSIONS > versions.yml
"${task.process}":
Expand Down
4 changes: 4 additions & 0 deletions modules/nf-core/beagle5/beagle/meta.yml
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,10 @@ input:
the analysis
pattern: "*.*"
ontologies: []
- region:
type: string
description: Region to perform imputation
pattern: "(chr)?\\d*:\\d*-\\d*"
output:
vcf:
- - meta:
Expand Down
22 changes: 4 additions & 18 deletions modules/nf-core/beagle5/beagle/tests/main.nf.test
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,6 @@ nextflow_process {
script "../main.nf"
process "BEAGLE5_BEAGLE"

config "./nextflow.config"

tag "modules"
tag "modules_nfcore"
tag "beagle5"
Expand All @@ -15,16 +13,13 @@ nextflow_process {
test("test-beagle5-beagle") {

when {
params {
module_args = ""
}
process {
"""
input[0] = [
[ id:'test', single_end:false ], // meta map
file(params.modules_testdata_base_path + "genomics/homo_sapiens/illumina/vcf/NA12878.chr21_22.1X.glimpse2.vcf.gz", checkIfExists: true),
file(params.modules_testdata_base_path + "genomics/homo_sapiens/illumina/vcf/NA12878.chr21_22.1X.glimpse2.vcf.gz.csi", checkIfExists: true),
[], [], [], [], []
[], [], [], [], [], []
]
"""
}
Expand All @@ -46,9 +41,6 @@ nextflow_process {
test("test-beagle5-beagle-ref") {

when {
params {
module_args = "chrom=chr22"
}
process {
"""
input[0] = [
Expand All @@ -57,7 +49,7 @@ nextflow_process {
file(params.modules_testdata_base_path + "genomics/homo_sapiens/illumina/vcf/NA12878.chr21_22.1X.glimpse2.vcf.gz.csi", checkIfExists: true),
file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/1000GP.chr22.vcf.gz", checkIfExists:true),
file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/1000GP.chr22.vcf.gz.csi", checkIfExists:true),
[], [], []
[], [], [], "chr22"
]
"""
}
Expand All @@ -78,9 +70,6 @@ nextflow_process {

test("test-beagle5-beagle-ref-map") {
when {
params {
module_args = "chrom=chr22"
}
process {
"""
input[0] = [
Expand All @@ -90,7 +79,7 @@ nextflow_process {
file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/1000GP.chr22.vcf.gz", checkIfExists:true),
file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/1000GP.chr22.vcf.gz.csi", checkIfExists:true),
file(params.modules_testdata_base_path + "genomics/homo_sapiens/genome/genetic_map/genome.GRCh38.chr22.plink.map"),
[], []
[], [], "chr22"
]
"""
}
Expand All @@ -112,16 +101,13 @@ nextflow_process {
test("test-beagle5-beagle-ref-map - stub") {
options '-stub'
when {
params {
module_args = ""
}
process {
"""
input[0] = [
[ id:'test', single_end:false ], // meta map
file(params.modules_testdata_base_path + "genomics/homo_sapiens/illumina/vcf/NA12878.chr21_22.1X.glimpse2.vcf.gz", checkIfExists: true),
file(params.modules_testdata_base_path + "genomics/homo_sapiens/illumina/vcf/NA12878.chr21_22.1X.glimpse2.vcf.gz.csi", checkIfExists: true),
[], [], [], [], []
[], [], [], [], [], []
]
"""
}
Expand Down
5 changes: 0 additions & 5 deletions modules/nf-core/beagle5/beagle/tests/nextflow.config

This file was deleted.

116 changes: 116 additions & 0 deletions subworkflows/nf-core/vcf_impute_beagle5/main.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,116 @@
include { BEAGLE5_BEAGLE } from '../../../modules/nf-core/beagle5/beagle'
include { BCFTOOLS_VIEW } from '../../../modules/nf-core/bcftools/view'
include { GLIMPSE2_LIGATE } from '../../../modules/nf-core/glimpse2/ligate'
include { BCFTOOLS_INDEX as BCFTOOLS_INDEX_BEAGLE } from '../../../modules/nf-core/bcftools/index'
include { BCFTOOLS_INDEX as BCFTOOLS_INDEX_LIGATE } from '../../../modules/nf-core/bcftools/index'

workflow VCF_IMPUTE_BEAGLE5 {

take:
ch_input // channel (mandatory): [ [id], vcf, tbi ]
ch_panel // channel (mandatory): [ [panel, chr], vcf, tbi ]
ch_chunks // channel (optional) : [ [panel, chr], regionout ]
ch_map // channel (optional) : [ [chr], map]

main:
ch_versions = channel.empty()

// Branch input files based on format
ch_input
.branch { _meta, vcf, _tbi ->
bcf: vcf.name.contains('.bcf')
vcf: vcf.name.contains('.vcf')
other: true
}
.set { ch_input_branched }

ch_input_branched.other.map{ _meta, vcf, _tbi ->
error "ERROR: ${vcf.name} in ch_input channel must be in VCF or BCF format."
}

// Convert BCF to VCF if necessary
BCFTOOLS_VIEW(
ch_input_branched.bcf,
[], [], []
)
ch_versions = ch_versions.mix(BCFTOOLS_VIEW.out.versions.first())

// Combine VCF files
ch_ready_vcf = ch_input_branched.vcf
.mix(BCFTOOLS_VIEW.out.vcf
.join(
BCFTOOLS_VIEW.out.csi
.mix(BCFTOOLS_VIEW.out.tbi)
)
)

// Prepare input channels for BEAGLE5 by combining VCF, panel, and map files
ch_chunks_counts = ch_chunks
.groupTuple()
.map { metaPC, regionouts ->
[metaPC, regionouts.size()]
}

ch_panel_map = ch_panel
.combine(ch_map, by: 0)
.combine(ch_chunks, by: 0)
.combine(ch_chunks_counts, by: 0)

ch_panel_map.ifEmpty{
error "ERROR: join operation resulted in an empty channel. Please provide a valid ch_panel and ch_map channel as input."
}

ch_beagle_input = ch_ready_vcf
.combine(ch_panel_map)
.map { metaI, input_vcf, input_index, metaPC, panel_vcf, panel_index, map, regionout, regionsize -> [
metaI + metaPC + ["regionout": regionout, "regionsize": regionsize],
input_vcf, input_index,
panel_vcf, panel_index,
map, [], [], regionout
]}

// Run BEAGLE5 imputation
BEAGLE5_BEAGLE(ch_beagle_input)
ch_versions = ch_versions.mix(BEAGLE5_BEAGLE.out.versions.first())

// Index the imputed VCF files
BCFTOOLS_INDEX_BEAGLE(BEAGLE5_BEAGLE.out.vcf)
ch_versions = ch_versions.mix(BCFTOOLS_INDEX_BEAGLE.out.versions.first())

// Ligate all phased files in one and index it
ligate_input = BEAGLE5_BEAGLE.out.vcf
.join(
BCFTOOLS_INDEX_BEAGLE.out.tbi
.mix(BCFTOOLS_INDEX_BEAGLE.out.csi)
)
.map{ meta, vcf, index ->
def keysToKeep = meta.keySet() - ['regionout', 'regionsize']
[
groupKey(meta.subMap(keysToKeep), meta.regionsize),
vcf, index
]
}
.groupTuple()
.map{ groupKeyObj, vcf, index ->
// Extract the actual meta from the groupKey
def meta = groupKeyObj.getGroupTarget()
[meta, vcf, index]
}

GLIMPSE2_LIGATE( ligate_input )
ch_versions = ch_versions.mix( GLIMPSE2_LIGATE.out.versions.first() )

BCFTOOLS_INDEX_LIGATE( GLIMPSE2_LIGATE.out.merged_variants )
ch_versions = ch_versions.mix( BCFTOOLS_INDEX_LIGATE.out.versions.first() )

// Join imputed and index files
ch_vcf_index = GLIMPSE2_LIGATE.out.merged_variants
.join(
BCFTOOLS_INDEX_LIGATE.out.tbi
.mix(BCFTOOLS_INDEX_LIGATE.out.csi)
)

emit:
vcf_index = ch_vcf_index // channel: [ [id, chr, tools], vcf, index ]
versions = ch_versions // channel: [ versions.yml ]
}
100 changes: 100 additions & 0 deletions subworkflows/nf-core/vcf_impute_beagle5/meta.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/subworkflows/yaml-schema.json
name: VCF_IMPUTE_BEAGLE5
description: |
Subworkflow to impute VCF files using BEAGLE5 software. The subworkflow
takes VCF files, phased reference panel, genetic maps and chunks region to perform imputation
and outputs phased and imputed VCF files.
Meta map of all channels, except ch_input, will be used to perform joint operations.
"regionout" and "regionsize" keys will be added to the meta map to distinguish the different
file before ligation and therefore should not be used.
keywords:
- VCF
- imputation
- beagle5
- phasing
components:
- beagle5/beagle
- bcftools/index
- bcftools/view
- glimpse2/ligate
input:
- ch_input:
description: Channel with input data
structure:
- meta:
type: map
description: |
Metadata map containing sample information
- vcf:
type: file
description: Input VCF files
pattern: "*.{vcf,bcf}{.gz}?"
- index:
type: file
description: Input index file
pattern: "*.{tbi,csi}"
- ch_panel:
description: Channel with phased reference panel data
structure:
- meta:
type: map
description: |
Metadata map that will be combined with the input data map
- vcf:
type: file
description: Reference panel VCF files by chromosomes
pattern: "*.{vcf,bcf,vcf.gz}"
- index:
type: file
description: Reference panel VCF index files
pattern: "*.{tbi,csi}"
- ch_chunks:
description: Channel containing the region to impute
structure:
- meta:
type: map
description: |
Metadata map containing chromosome information
- regionout:
type: string
description: Region to perform the phasing on
pattern: "[chr]+[0-9]+:[0-9]+-[0-9]+"
- ch_map:
description: Channel with genetic map data
structure:
- meta:
type: map
description: |
Metadata map containing chromosome information
- map:
type: file
description: Plink format genetic map files
pattern: "*.map"
output:
- vcf_index:
description: Channel with imputed and phased VCF files
structure:
- meta:
type: map
description: |
Metadata map of the target input file combined with the reference panel map.
- vcf:
type: file
description: VCF imputed and phased file by sample
pattern: "*.{vcf,bcf,vcf.gz}"
- index:
type: file
description: VCF index file
pattern: "*.{tbi,csi}"
- versions:
description: Channel containing software versions file
structure:
- versions.yml:
type: file
description: File containing versions of the software used
authors:
- "@LouisLeNezet"
- "@gichas"
maintainers:
- "@LouisLeNezet"
- "@gichas"
Loading
Loading