diff --git a/modules/nf-core/shapeit5/ligate/main.nf b/modules/nf-core/shapeit5/ligate/main.nf index a45179493fd5..66c13225f207 100644 --- a/modules/nf-core/shapeit5/ligate/main.nf +++ b/modules/nf-core/shapeit5/ligate/main.nf @@ -22,7 +22,7 @@ process SHAPEIT5_LIGATE { def prefix = task.ext.prefix ?: "${meta.id}" def suffix = task.ext.suffix ?: "vcf.gz" """ - printf "%s\\n" $input_list | tr -d '[],' > all_files.txt + printf "%s\\n" $input_list | tr -d '[],' | sort -V > all_files.txt SHAPEIT5_ligate \\ $args \\ diff --git a/modules/nf-core/shapeit5/ligate/tests/main.nf.test b/modules/nf-core/shapeit5/ligate/tests/main.nf.test index 4e3391c5631f..a462a8222be7 100644 --- a/modules/nf-core/shapeit5/ligate/tests/main.nf.test +++ b/modules/nf-core/shapeit5/ligate/tests/main.nf.test @@ -4,6 +4,8 @@ nextflow_process { script "../main.nf" process "SHAPEIT5_LIGATE" + config "./nextflow.config" + tag "modules" tag "modules_nfcore" tag "shapeit5" @@ -13,36 +15,17 @@ nextflow_process { test("homo sapiens - map, [vcf], [tbi]") { config "./nextflow.config" setup { - run("BCFTOOLS_VIEW", alias: "BCFTOOLS_VIEW_1") { - script "../../../bcftools/view" - params { - bcftools_args = "--regions chr22:16570000-16600000 -Ob --write-index=csi -e 'GT=\"./.\"||GT=\".\"'" - } - process { - """ - input[0] = [ - [ id:'NA12878_1' ], // meta map - file(params.modules_testdata_base_path + "genomics/homo_sapiens/illumina/vcf/NA12878_GIAB.chr22.vcf.gz", checkIfExists: true), - file(params.modules_testdata_base_path + "genomics/homo_sapiens/illumina/vcf/NA12878_GIAB.chr22.vcf.gz.csi", checkIfExists: true), - ] - input[1] = [] - input[2] = [] - input[3] = [] - """ - } - } - run("BCFTOOLS_VIEW", alias: "BCFTOOLS_VIEW_2") { + run("BCFTOOLS_VIEW") { script "../../../bcftools/view" - params { - bcftools_args = "--regions chr22:16580000-16610000 -Ob --write-index=csi -e 'GT=\"./.\"||GT=\".\"'" - } process { """ - input[0] = [ - [ id:'NA12878_2' ], // meta map - file(params.modules_testdata_base_path + "genomics/homo_sapiens/illumina/vcf/NA12878_GIAB.chr22.vcf.gz", checkIfExists: true), - file(params.modules_testdata_base_path + "genomics/homo_sapiens/illumina/vcf/NA12878_GIAB.chr22.vcf.gz.csi", checkIfExists: true), - ] + input[0] = channel.of( + [ id:'NA12878', region: "chr22:16570000-16600000" ], + [ id:'NA12878', region: "chr22:16580000-16610000" ] + ).combine(channel.of([ + file(params.modules_testdata_base_path + "genomics/homo_sapiens/illumina/vcf/NA12878_GIAB.chr21_22.vcf.gz", checkIfExists: true), + file(params.modules_testdata_base_path + "genomics/homo_sapiens/illumina/vcf/NA12878_GIAB.chr21_22.vcf.gz.csi", checkIfExists: true), + ])) input[1] = [] input[2] = [] input[3] = [] @@ -54,9 +37,8 @@ nextflow_process { when { process { """ - input[0] = BCFTOOLS_VIEW_1.out.vcf.join(BCFTOOLS_VIEW_1.out.csi) - .mix(BCFTOOLS_VIEW_2.out.vcf.join(BCFTOOLS_VIEW_2.out.csi)) - .map { meta, vcf, csi -> [ [id : "NA12878"], vcf, csi ] } + input[0] = BCFTOOLS_VIEW.out.vcf.join(BCFTOOLS_VIEW.out.csi) + .map{ meta, vcf, index -> [meta.subMap("id"), vcf, index]} .groupTuple() """ } @@ -66,8 +48,15 @@ nextflow_process { assertAll( { assert process.success }, { assert snapshot( - process.out.merged_variants.collect {meta, vcf -> [ meta, path(vcf).vcf.summary ]}, - process.out.versions + process.out.merged_variants.collect { + meta, vcf -> [ + meta, + file(vcf).name, + path(vcf).vcf.summary, + path(vcf).vcf.variantsMD5 + ] + }, + ["versions": process.out.versions] ).match() } ) } @@ -81,8 +70,8 @@ nextflow_process { """ input[0] = [ [id: 'NA12878'], - [file(params.modules_testdata_base_path + "genomics/homo_sapiens/illumina/vcf/NA12878_GIAB.chr22.vcf.gz", checkIfExists: true)], - [file(params.modules_testdata_base_path + "genomics/homo_sapiens/illumina/vcf/NA12878_GIAB.chr22.vcf.gz.csi", checkIfExists: true)] + [file(params.modules_testdata_base_path + "genomics/homo_sapiens/illumina/vcf/NA12878_GIAB.chr21_22.vcf.gz", checkIfExists: true)], + [file(params.modules_testdata_base_path + "genomics/homo_sapiens/illumina/vcf/NA12878_GIAB.chr21_22.vcf.gz.csi", checkIfExists: true)] ] """ } diff --git a/modules/nf-core/shapeit5/ligate/tests/main.nf.test.snap b/modules/nf-core/shapeit5/ligate/tests/main.nf.test.snap index 3caae978dd64..b727f648bdb7 100644 --- a/modules/nf-core/shapeit5/ligate/tests/main.nf.test.snap +++ b/modules/nf-core/shapeit5/ligate/tests/main.nf.test.snap @@ -27,10 +27,10 @@ } ], "meta": { - "nf-test": "0.9.2", - "nextflow": "24.10.1" + "nf-test": "0.9.3", + "nextflow": "25.10.0" }, - "timestamp": "2024-11-22T17:22:34.235879155" + "timestamp": "2025-11-26T17:58:05.130390005" }, "homo sapiens - map, [vcf], [tbi]": { "content": [ @@ -39,17 +39,21 @@ { "id": "NA12878" }, - "VcfFile [chromosomes=[chr22], sampleCount=1, variantCount=60, phased=false, phasedAutodetect=false]" + "NA12878.vcf.gz", + "VcfFile [chromosomes=[chr22], sampleCount=1, variantCount=81, phased=false, phasedAutodetect=false]", + "e0d8b01df09f70f7ccd010f42a7e07c5" ] ], - [ - "versions.yml:md5,4b66c655a4fd210ca7bc47b6fe35230b" - ] + { + "versions": [ + "versions.yml:md5,4b66c655a4fd210ca7bc47b6fe35230b" + ] + } ], "meta": { - "nf-test": "0.9.2", - "nextflow": "24.10.1" + "nf-test": "0.9.3", + "nextflow": "25.10.0" }, - "timestamp": "2024-12-04T10:07:53.185018802" + "timestamp": "2025-11-26T18:01:39.348167832" } } \ No newline at end of file diff --git a/modules/nf-core/shapeit5/ligate/tests/nextflow.config b/modules/nf-core/shapeit5/ligate/tests/nextflow.config index 0dee91a2a0c9..c72ccf4ef3a0 100644 --- a/modules/nf-core/shapeit5/ligate/tests/nextflow.config +++ b/modules/nf-core/shapeit5/ligate/tests/nextflow.config @@ -1,6 +1,6 @@ process { - withName: "BCFTOOLS_VIEW_*" { - ext.args = params.bcftools_args - ext.prefix = { "${meta.id}" } + withName: "BCFTOOLS_VIEW" { + ext.args = { "--regions ${meta.region} -Ob --write-index=csi -e 'GT=\"./.\"||GT=\".\"'" } + ext.prefix = { "${meta.id}_${meta.region}" } } } diff --git a/modules/nf-core/shapeit5/phasecommon/main.nf b/modules/nf-core/shapeit5/phasecommon/main.nf index 89239aeffebc..df22b868617f 100644 --- a/modules/nf-core/shapeit5/phasecommon/main.nf +++ b/modules/nf-core/shapeit5/phasecommon/main.nf @@ -8,14 +8,11 @@ process SHAPEIT5_PHASECOMMON { 'biocontainers/shapeit5:5.1.1--hb60d31d_0'}" input: - tuple val(meta) , path(input), path(input_index), path(pedigree), val(region) - tuple val(meta2), path(reference), path(reference_index) - tuple val(meta3), path(scaffold), path(scaffold_index) - tuple val(meta4), path(map) + tuple val(meta), path(input), path(input_index), path(pedigree), val(region), path(reference), path(reference_index), path(scaffold), path(scaffold_index), path(map) output: - tuple val(meta), path("*.{vcf,bcf,vcf.gz,bcf.gz}"), emit: phased_variant - path "versions.yml" , emit: versions + tuple val(meta), path("*.{bcf,graph,bh}"), emit: phased_variant + path "versions.yml" , emit: versions when: task.ext.when == null || task.ext.when @@ -23,9 +20,13 @@ process SHAPEIT5_PHASECOMMON { script: def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" - def suffix = task.ext.suffix ?: "bcf" - if ("$input" == "${prefix}.${suffix}") error "Input and output names are the same, set prefix in module configuration to disambiguate!" + def extension = args.contains("--output-format bcf") ? "bcf" : + args.contains("--output-format graph") ? "graph" : + args.contains("--output-format bh") ? "bh" : + "bcf" + + if ("$input" == "${prefix}.${extension}") error "Input and output names are the same, set prefix in module configuration to disambiguate!" def map_command = map ? "--map $map" : "" def reference_command = reference ? "--reference $reference" : "" @@ -42,24 +43,27 @@ process SHAPEIT5_PHASECOMMON { $pedigree_command \\ --region $region \\ --thread $task.cpus \\ - --output ${prefix}.${suffix} + --output ${prefix}.${extension} cat <<-END_VERSIONS > versions.yml "${task.process}": - shapeit5: "\$(SHAPEIT5_phase_common | sed -nr '/Version/p' | grep -o -E '([0-9]+.){1,2}[0-9]' | head -1)" + shapeit5: "\$(SHAPEIT5_phase_common | sed -nr '/Version/p' | grep -o -E '([0-9]+.){1,2}[0-9]' | head -n 1)" END_VERSIONS """ stub: - def prefix = task.ext.prefix ?: "${meta.id}" - def suffix = task.ext.suffix ?: "bcf" - def create_cmd = suffix.endsWith(".gz") ? "echo '' | gzip >" : "touch" + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def extension = args.contains("--output-format bcf") ? "bcf" : + args.contains("--output-format graph") ? "graph" : + args.contains("--output-format bh") ? "bh" : + "bcf" """ - ${create_cmd} ${prefix}.${suffix} + touch ${prefix}.${extension} cat <<-END_VERSIONS > versions.yml "${task.process}": - shapeit5: "\$(SHAPEIT5_phase_common | sed -nr '/Version/p' | grep -o -E '([0-9]+.){1,2}[0-9]' | head -1)" + shapeit5: "\$(SHAPEIT5_phase_common | sed -nr '/Version/p' | grep -o -E '([0-9]+.){1,2}[0-9]' | head -n 1)" END_VERSIONS """ } diff --git a/modules/nf-core/shapeit5/phasecommon/meta.yml b/modules/nf-core/shapeit5/phasecommon/meta.yml index 62a9e9245bd3..8315815d6248 100644 --- a/modules/nf-core/shapeit5/phasecommon/meta.yml +++ b/modules/nf-core/shapeit5/phasecommon/meta.yml @@ -45,11 +45,6 @@ input: Target region, usually a full chromosome (e.g. chr20:1000000-2000000 or chr20). For chrX, please treat PAR and non-PAR regions as different choromosome in order to avoid mixing ploidy. pattern: "chrXX:leftBufferPosition-rightBufferPosition" - - - meta2: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - reference: type: file description: Reference panel of haplotypes in VCF/BCF format. @@ -60,11 +55,6 @@ input: description: Index file of the Reference panel file. pattern: "*.{vcf.gz.csi,bcf.gz.csi}" ontologies: [] - - - meta3: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - scaffold: type: file description: Scaffold of haplotypes in VCF/BCF format. @@ -75,14 +65,9 @@ input: description: Index file of the scaffold file. pattern: "*.{vcf.gz.csi,bcf.gz.csi}" ontologies: [] - - - meta4: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - map: type: file - description: File containing the genetic map. + description: File containing the genetic map in Glimpse format. pattern: "*.gmap" ontologies: [] output: @@ -92,10 +77,10 @@ output: description: | Groovy Map containing sample information e.g. [ id:'test', single_end:false ] - - "*.{vcf,bcf,vcf.gz,bcf.gz}": + - "*.{bcf,graph,bh}": type: file - description: Phased variant dataset in VCF/BCF format. - pattern: "*.{vcf,bcf,vcf.gz,bcf.gz}" + description: Phased variant dataset in BCF, GRAPH or XCF binary format. + pattern: "*.{bcf,graph,bh}" ontologies: [] versions: - versions.yml: diff --git a/modules/nf-core/shapeit5/phasecommon/tests/main.nf.test b/modules/nf-core/shapeit5/phasecommon/tests/main.nf.test index b73fb5aa1309..a194855cdcdb 100644 --- a/modules/nf-core/shapeit5/phasecommon/tests/main.nf.test +++ b/modules/nf-core/shapeit5/phasecommon/tests/main.nf.test @@ -4,13 +4,18 @@ nextflow_process { script "../main.nf" process "SHAPEIT5_PHASECOMMON" + config "./nextflow.config" + tag "modules" tag "modules_nfcore" tag "shapeit5" tag "shapeit5/phasecommon" - test("homo sapiens - vcf, [], [], []") { + test("homo sapiens - vcf, no reference, no scaffold, no map") { when { + params { + shapeit5_phasecommon_args = "--seed 1" + } process { """ input[0] = Channel.of([ @@ -18,11 +23,11 @@ nextflow_process { file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/1000GP.chr22.vcf.gz", checkIfExists: true), file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/1000GP.chr22.vcf.gz.csi", checkIfExists: true), [], - "chr22" + "chr22", + [],[], // reference + [],[], // scaffold + [] // map ]) - input[1] = Channel.of([[],[],[]]) - input[2] = Channel.of([[],[],[]]) - input[3] = Channel.of([[],[]]) """ } } @@ -31,17 +36,21 @@ nextflow_process { assertAll( { assert process.success }, { assert snapshot( - process.out.phased_variant.collect{ meta, vcf -> [meta, file(vcf).name]}, - process.out.versions + process.out.phased_variant.collect{ + meta, vcf -> [ meta, file(vcf).name ] // Only file name tested due to bcf format + }, + ["versions": process.out.versions] ).match() } ) } } - test("homo sapiens - vcf, [], [], [] - stub") { - options '-stub' + test("homo sapiens - vcf, no reference, no scaffold, with map") { when { + params { + shapeit5_phasecommon_args = "--seed 1" + } process { """ input[0] = Channel.of([ @@ -49,12 +58,43 @@ nextflow_process { file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/1000GP.chr22.vcf.gz", checkIfExists: true), file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/1000GP.chr22.vcf.gz.csi", checkIfExists: true), [], - "chr22" + "chr22", + [],[], // reference + [],[], // scaffold + file(params.modules_testdata_base_path + "genomics/homo_sapiens/genome/genetic_map/genome.GRCh38.chr22.glimpse.map", checkIfExists: true) ]) + """ + } + } - input[1] = Channel.of([[],[],[]]) - input[2] = Channel.of([[],[],[]]) - input[3] = Channel.of([[],[]]) + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.phased_variant.collect{ + meta, vcf -> [ meta, file(vcf).name ] // Only file name tested due to bcf format + }, + ["versions": process.out.versions] + ).match() } + ) + } + + } + + test("homo sapiens - vcf, [], [], [] - stub") { + options '-stub' + when { + params { + shapeit5_phasecommon_args = "--seed 1 --output-format graph" + } + process { + """ + input[0] = Channel.of([ + [ id:'input', single_end:false ], // meta map + file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/1000GP.chr22.vcf.gz", checkIfExists: true), + file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/1000GP.chr22.vcf.gz.csi", checkIfExists: true), + [], "chr22", [], [], [], [], [] + ]) """ } } diff --git a/modules/nf-core/shapeit5/phasecommon/tests/main.nf.test.snap b/modules/nf-core/shapeit5/phasecommon/tests/main.nf.test.snap index a339a8796d5a..2159550c051f 100644 --- a/modules/nf-core/shapeit5/phasecommon/tests/main.nf.test.snap +++ b/modules/nf-core/shapeit5/phasecommon/tests/main.nf.test.snap @@ -8,7 +8,7 @@ "id": "input", "single_end": false }, - "input.bcf:md5,d41d8cd98f00b204e9800998ecf8427e" + "input.graph:md5,d41d8cd98f00b204e9800998ecf8427e" ] ], "1": [ @@ -20,7 +20,7 @@ "id": "input", "single_end": false }, - "input.bcf:md5,d41d8cd98f00b204e9800998ecf8427e" + "input.graph:md5,d41d8cd98f00b204e9800998ecf8427e" ] ], "versions": [ @@ -29,12 +29,12 @@ } ], "meta": { - "nf-test": "0.9.1", - "nextflow": "24.10.1" + "nf-test": "0.9.3", + "nextflow": "25.10.0" }, - "timestamp": "2024-12-03T17:01:18.387684498" + "timestamp": "2025-11-26T17:58:34.335209352" }, - "homo sapiens - vcf, [], [], []": { + "homo sapiens - vcf, no reference, no scaffold, no map": { "content": [ [ [ @@ -45,14 +45,39 @@ "input.bcf" ] ], + { + "versions": [ + "versions.yml:md5,135745eaa06583f1c0402134626f9082" + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.0" + }, + "timestamp": "2025-11-26T18:01:57.860901774" + }, + "homo sapiens - vcf, no reference, no scaffold, with map": { + "content": [ [ - "versions.yml:md5,135745eaa06583f1c0402134626f9082" - ] + [ + { + "id": "input", + "single_end": false + }, + "input.bcf" + ] + ], + { + "versions": [ + "versions.yml:md5,135745eaa06583f1c0402134626f9082" + ] + } ], "meta": { - "nf-test": "0.9.1", - "nextflow": "24.10.1" + "nf-test": "0.9.3", + "nextflow": "25.10.0" }, - "timestamp": "2024-12-03T16:56:47.720234317" + "timestamp": "2025-11-26T17:58:27.665343119" } } \ No newline at end of file diff --git a/modules/nf-core/shapeit5/phasecommon/tests/nextflow.config b/modules/nf-core/shapeit5/phasecommon/tests/nextflow.config new file mode 100644 index 000000000000..28775613ce50 --- /dev/null +++ b/modules/nf-core/shapeit5/phasecommon/tests/nextflow.config @@ -0,0 +1,6 @@ +process { + withName: SHAPEIT5_PHASECOMMON { + cpus = 1 // needed for deterministic output + ext.args = params.shapeit5_phasecommon_args + } +} diff --git a/modules/nf-core/shapeit5/phaserare/main.nf b/modules/nf-core/shapeit5/phaserare/main.nf index 01254f925c1b..313d5be475b7 100644 --- a/modules/nf-core/shapeit5/phaserare/main.nf +++ b/modules/nf-core/shapeit5/phaserare/main.nf @@ -18,9 +18,7 @@ process SHAPEIT5_PHASERARE { 'biocontainers/shapeit5:5.1.1--hb60d31d_0' }" input: - tuple val(meta) , path(input) , path(input_index) , path(pedigree), val(input_region) - tuple val(meta2), path(scaffold) , path(scaffold_index) , val(scaffold_region) - tuple val(meta3), path(map) + tuple val(meta), path(input), path(input_index), path(pedigree), val(input_region), path(scaffold), path(scaffold_index), val(scaffold_region), path(map) output: tuple val(meta), path("*.{vcf,bcf,vcf.gz,bcf.gz}"), emit: phased_variant @@ -54,7 +52,7 @@ process SHAPEIT5_PHASERARE { cat <<-END_VERSIONS > versions.yml "${task.process}": - shapeit5: "\$(SHAPEIT5_phase_rare | sed -nr '/Version/p' | grep -o -E '([0-9]+.){1,2}[0-9]' | head -1)" + shapeit5: "\$(SHAPEIT5_phase_rare | sed -nr '/Version/p' | grep -o -E '([0-9]+.){1,2}[0-9]' | head -n 1)" END_VERSIONS """ @@ -67,7 +65,7 @@ process SHAPEIT5_PHASERARE { cat <<-END_VERSIONS > versions.yml "${task.process}": - shapeit5: "\$(SHAPEIT5_phase_rare | sed -nr '/Version/p' | grep -o -E '([0-9]+.){1,2}[0-9]' | head -1)" + shapeit5: "\$(SHAPEIT5_phase_rare | sed -nr '/Version/p' | grep -o -E '([0-9]+.){1,2}[0-9]' | head -n 1)" END_VERSIONS """ } diff --git a/modules/nf-core/shapeit5/phaserare/meta.yml b/modules/nf-core/shapeit5/phaserare/meta.yml index e3088c1d824f..91dd01ac5c2e 100644 --- a/modules/nf-core/shapeit5/phaserare/meta.yml +++ b/modules/nf-core/shapeit5/phaserare/meta.yml @@ -47,11 +47,6 @@ input: Region to be considered in --input-plain (e.g. chr20:1000000-2000000 or chr20). For chrX, please treat PAR and non-PAR regions as different choromosome in order to avoid mixing ploidy. pattern: "chrXX:leftBufferPosition-rightBufferPosition" - - - meta2: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - scaffold: type: file description: Scaffold of haplotypes in VCF/BCF format. @@ -67,14 +62,9 @@ input: description: | Region to be considered in --scaffold (e.g. chr20:1000000-2000000 or chr20). pattern: "chrXX:leftBufferPosition-rightBufferPosition" - - - meta3: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - map: type: file - description: File containing the genetic map. + description: File containing the genetic map in Glimpse format. pattern: "*.gmap" ontologies: [] output: diff --git a/modules/nf-core/shapeit5/phaserare/tests/main.nf.test b/modules/nf-core/shapeit5/phaserare/tests/main.nf.test index 337699e1b1f9..5cc5565cf796 100644 --- a/modules/nf-core/shapeit5/phaserare/tests/main.nf.test +++ b/modules/nf-core/shapeit5/phaserare/tests/main.nf.test @@ -4,6 +4,8 @@ nextflow_process { script "../main.nf" process "SHAPEIT5_PHASERARE" + config "./nextflow.config" + tag "modules" tag "modules_nfcore" tag "shapeit5" @@ -11,26 +13,18 @@ nextflow_process { tag "shapeit5/phasecommon" tag "bcftools/index" - test("homo sapiens - vcf, scaffold, []") { - config "./nextflow.config" + test("homo sapiens - vcf, scaffold, map") { setup { run("SHAPEIT5_PHASECOMMON") { script "../../../shapeit5/phasecommon" - params { - shapeit5_phasecommon_args = "--filter-maf 0.001" - } process { """ input[0] = Channel.of([ [ id:'scaffold', single_end:false ], // meta map file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/1000GP.chr22.vcf.gz", checkIfExists: true), file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/1000GP.chr22.vcf.gz.csi", checkIfExists: true), - [], - "chr22", + [], "chr22", [], [], [], [], [] ]) - input[1] = Channel.of([[],[],[]]) - input[2] = Channel.of([[],[],[]]) - input[3] = Channel.of([[],[]]) """ } } @@ -45,6 +39,10 @@ nextflow_process { } when { + params { + shapeit5_phasecommon_args = "--seed 1 --filter-maf 0.001" + shapeit5_phaserare_args = "--seed 1" + } process { """ input[0] = Channel.of([ @@ -53,11 +51,15 @@ nextflow_process { file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/1000GP.chr22.vcf.gz.csi", checkIfExists: true), [], "chr22:16580000-16600000", - ]) - input[1] = SHAPEIT5_PHASECOMMON.out.phased_variant - .join(BCFTOOLS_INDEX.out.csi) - .combine(Channel.of("chr22:16570000-16610000")) - input[2] = Channel.of([[],[]]) + ]).combine( + SHAPEIT5_PHASECOMMON.out.phased_variant + .join(BCFTOOLS_INDEX.out.csi) + .map{ meta, vcf, index -> [vcf, index] } + ) + .combine(channel.of([ + "chr22:16570000-16610000", + file(params.modules_testdata_base_path + "genomics/homo_sapiens/genome/genetic_map/genome.GRCh38.chr22.glimpse.map", checkIfExists: true) + ])) """ } } @@ -66,8 +68,13 @@ nextflow_process { assertAll( { assert process.success }, { assert snapshot( - process.out.phased_variant.collect { meta, vcf -> [ meta, file(vcf).name ] }, - process.out.versions + process.out.phased_variant.collect { meta, vcf -> [ + meta, + file(vcf).name, + path(vcf).vcf.summary, + // path(vcf).vcf.variantsMD5 Conda give another md5 sum + ] }, + ["versions": process.out.versions] ).match() } ) } @@ -78,17 +85,18 @@ nextflow_process { options "-stub" when { + params { + shapeit5_phasecommon_args = "--seed 1 --filter-maf 0.001" + shapeit5_phaserare_args = "--seed 1" + } process { """ input[0] = Channel.of([ [ id:'input', single_end:false ], // meta map file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/1000GP.chr22.vcf.gz", checkIfExists: true), file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/1000GP.chr22.vcf.gz.csi", checkIfExists: true), - [], - "chr22:16580000-16600000", + [], "chr22:16580000-16600000", [], [], [], [] ]) - input[1] = Channel.of([[],[],[],[]]) - input[2] = Channel.of([[],[]]) """ } } diff --git a/modules/nf-core/shapeit5/phaserare/tests/main.nf.test.snap b/modules/nf-core/shapeit5/phaserare/tests/main.nf.test.snap index 9c797738da52..b08abaaa3704 100644 --- a/modules/nf-core/shapeit5/phaserare/tests/main.nf.test.snap +++ b/modules/nf-core/shapeit5/phaserare/tests/main.nf.test.snap @@ -29,12 +29,12 @@ } ], "meta": { - "nf-test": "0.9.2", - "nextflow": "24.10.1" + "nf-test": "0.9.3", + "nextflow": "25.10.0" }, - "timestamp": "2024-11-22T18:11:56.868657182" + "timestamp": "2025-11-26T17:58:56.414979229" }, - "homo sapiens - vcf, scaffold, []": { + "homo sapiens - vcf, scaffold, map": { "content": [ [ [ @@ -42,17 +42,20 @@ "id": "input", "single_end": false }, - "input.vcf.gz" + "input.vcf.gz", + "VcfFile [chromosomes=[chr22], sampleCount=3202, variantCount=447, phased=true, phasedAutodetect=true]" ] ], - [ - "versions.yml:md5,c8092f58dd64199400ee9b9bfd96032e" - ] + { + "versions": [ + "versions.yml:md5,c8092f58dd64199400ee9b9bfd96032e" + ] + } ], "meta": { - "nf-test": "0.9.2", - "nextflow": "24.10.1" + "nf-test": "0.9.3", + "nextflow": "25.10.0" }, - "timestamp": "2024-11-22T18:35:35.056441054" + "timestamp": "2025-11-26T18:23:40.748303905" } } \ No newline at end of file diff --git a/modules/nf-core/shapeit5/phaserare/tests/nextflow.config b/modules/nf-core/shapeit5/phaserare/tests/nextflow.config index 5f84c07d3bdd..9e5e8dbe4115 100644 --- a/modules/nf-core/shapeit5/phaserare/tests/nextflow.config +++ b/modules/nf-core/shapeit5/phaserare/tests/nextflow.config @@ -1,5 +1,11 @@ process { withName: SHAPEIT5_PHASECOMMON { + cpus = 1 // Needed for deterministic output ext.args = params.shapeit5_phasecommon_args } + + withName: SHAPEIT5_PHASERARE { + cpus = 1 // Needed for deterministic output + ext.args = params.shapeit5_phaserare_args + } } diff --git a/modules/nf-core/shapeit5/switch/main.nf b/modules/nf-core/shapeit5/switch/main.nf index a34fed1c805c..2c26271a9e7a 100644 --- a/modules/nf-core/shapeit5/switch/main.nf +++ b/modules/nf-core/shapeit5/switch/main.nf @@ -8,9 +8,7 @@ process SHAPEIT5_SWITCH { 'biocontainers/shapeit5:5.1.1--hb60d31d_0'}" input: - tuple val(meta) , path(estimate), path(estimate_index), val(region), path(pedigree) - tuple val(meta2), path(truth) , path(truth_index) - tuple val(meta3), path(freq) , path(freq_index) + tuple val(meta), path(estimate), path(estimate_index), val(region), path(pedigree), path(truth), path(truth_index), path(freq) , path(freq_index) output: tuple val(meta), path("*.txt.gz"), emit: errors @@ -38,7 +36,7 @@ process SHAPEIT5_SWITCH { cat <<-END_VERSIONS > versions.yml "${task.process}": - shapeit5: "\$(SHAPEIT5_switch | sed -nr '/Version/p' | grep -o -E '([0-9]+.){1,2}[0-9]' | head -1)" + shapeit5: "\$(SHAPEIT5_switch | sed -nr '/Version/p' | grep -o -E '([0-9]+.){1,2}[0-9]' | head -n 1)" END_VERSIONS """ @@ -58,7 +56,7 @@ process SHAPEIT5_SWITCH { cat <<-END_VERSIONS > versions.yml "${task.process}": - shapeit5: "\$(SHAPEIT5_switch | sed -nr '/Version/p' | grep -o -E '([0-9]+.){1,2}[0-9]' | head -1)" + shapeit5: "\$(SHAPEIT5_switch | sed -nr '/Version/p' | grep -o -E '([0-9]+.){1,2}[0-9]' | head -n 1)" END_VERSIONS """ } diff --git a/modules/nf-core/shapeit5/switch/meta.yml b/modules/nf-core/shapeit5/switch/meta.yml index a11dabfe4f66..10fad6d6bc87 100644 --- a/modules/nf-core/shapeit5/switch/meta.yml +++ b/modules/nf-core/shapeit5/switch/meta.yml @@ -42,11 +42,6 @@ input: Pedigree information in the following format: offspring father mother. pattern: "*.{txt, tsv}" ontologies: [] - - - meta2: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - truth: type: file description: Validation dataset called at the same positions as the imputed @@ -58,11 +53,6 @@ input: description: Index file of the truth VCF/BCF file. pattern: "*.{vcf,bcf,vcf.gz,bcf.gz}" ontologies: [] - - - meta3: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - freq: type: file description: File containing allele frequencies at each site. diff --git a/modules/nf-core/shapeit5/switch/tests/main.nf.test b/modules/nf-core/shapeit5/switch/tests/main.nf.test index 8ebad3eaa903..da867d2efb07 100644 --- a/modules/nf-core/shapeit5/switch/tests/main.nf.test +++ b/modules/nf-core/shapeit5/switch/tests/main.nf.test @@ -4,6 +4,8 @@ nextflow_process { script "../main.nf" process "SHAPEIT5_SWITCH" + config "./nextflow.config" + tag "modules" tag "modules_nfcore" tag "shapeit5" @@ -12,25 +14,17 @@ nextflow_process { tag "bcftools/index" test("homo sapiens - vcf, scaffold, []") { - config "./nextflow.config" setup { run("SHAPEIT5_PHASECOMMON") { script "../../../shapeit5/phasecommon" - params { - shapeit5_phasecommon_args = "--filter-maf 0.001" - } process { """ input[0] = Channel.of([ [ id:'scaffold', single_end:false ], // meta map file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/1000GP.chr22.vcf.gz", checkIfExists: true), file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/1000GP.chr22.vcf.gz.csi", checkIfExists: true), - [], - "chr22", + [], "chr22", [], [], [], [], [] ]) - input[1] = Channel.of([[],[],[]]) - input[2] = Channel.of([[],[],[]]) - input[3] = Channel.of([[],[]]) """ } } @@ -45,20 +39,20 @@ nextflow_process { } when { + params { + shapeit5_phasecommon_args = "--filter-maf 0.001 --seed 1" + } process { """ input[0] = SHAPEIT5_PHASECOMMON.out.phased_variant .join(BCFTOOLS_INDEX.out.csi) - .combine(Channel.of("chr22")) - .combine(Channel.of([[]])) - input[1] = Channel.of([[ id:'truth_panel'], + .combine(channel.of([ + "chr22", [], file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/1000GP.chr22.vcf.gz",checkIfExists:true), - file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/1000GP.chr22.vcf.gz.csi",checkIfExists:true) - ]) - input[2] = Channel.of([[ id:'freq_file'], + file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/1000GP.chr22.vcf.gz.csi",checkIfExists:true), file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/1000GP.chr22.sites.vcf.gz",checkIfExists:true), file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/1000GP.chr22.sites.vcf.gz.csi",checkIfExists:true) - ]) + ])) """ } } @@ -66,10 +60,7 @@ nextflow_process { then { assertAll( { assert process.success }, - { assert snapshot( - process.out.errors.collect { meta, txt -> [ meta, txt.collect{ file(it).name } ] }, - process.out.versions - ).match() } + { assert snapshot(process.out).match() } ) } @@ -78,17 +69,17 @@ nextflow_process { test("homo sapiens - vcf, scaffold, [] - stub") { options "-stub" when { + params { + shapeit5_phasecommon_args = "--filter-maf 0.001 --seed 1" + } process { """ input[0] = Channel.of([ [id:'truth_panel'], file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/1000GP.chr22.vcf.gz",checkIfExists:true), file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/1000GP.chr22.vcf.gz.csi",checkIfExists:true), - "chr21", - [] + "chr21", [], [], [], [], [] ]) - input[1] = [[], [], []] - input[2] = [[], [], []] """ } } diff --git a/modules/nf-core/shapeit5/switch/tests/main.nf.test.snap b/modules/nf-core/shapeit5/switch/tests/main.nf.test.snap index a568047c3552..32f45bb2a847 100644 --- a/modules/nf-core/shapeit5/switch/tests/main.nf.test.snap +++ b/modules/nf-core/shapeit5/switch/tests/main.nf.test.snap @@ -47,40 +47,64 @@ } ], "meta": { - "nf-test": "0.9.2", - "nextflow": "24.10.1" + "nf-test": "0.9.3", + "nextflow": "25.10.0" }, - "timestamp": "2024-11-22T19:04:42.303883153" + "timestamp": "2025-11-26T17:59:17.11538368" }, "homo sapiens - vcf, scaffold, []": { "content": [ - [ - [ - { - "id": "scaffold", - "single_end": false - }, + { + "0": [ [ - "scaffold.block.switch.txt.gz", - "scaffold.calibration.switch.txt.gz", - "scaffold.flipsAndSwitches.txt.gz", - "scaffold.frequency.switch.txt.gz", - "scaffold.sample.switch.txt.gz", - "scaffold.sample.typing.txt.gz", - "scaffold.type.switch.txt.gz", - "scaffold.variant.switch.txt.gz", - "scaffold.variant.typing.txt.gz" + { + "id": "scaffold", + "single_end": false + }, + [ + "scaffold.block.switch.txt.gz:md5,64c8c2f5bbcb0ad0d8ccae753a11091c", + "scaffold.calibration.switch.txt.gz:md5,2881ea7e450a54b3717b204bc67f569b", + "scaffold.flipsAndSwitches.txt.gz:md5,a5312a42910984f914653d8d520c3711", + "scaffold.frequency.switch.txt.gz:md5,776aec0a106e2c3d4dd130a4e6e425a9", + "scaffold.sample.switch.txt.gz:md5,76879e436b6baf115cffa5cb1f73d2a8", + "scaffold.sample.typing.txt.gz:md5,725898bf9871c0a40cc44ef778cc0763", + "scaffold.type.switch.txt.gz:md5,22afc8680b98e7d205163642b2c0159a", + "scaffold.variant.switch.txt.gz:md5,074ee134a47bf41f55c947efb2a46392", + "scaffold.variant.typing.txt.gz:md5,8b3e30c7a19afc9d27194f1852c2a510" + ] ] + ], + "1": [ + "versions.yml:md5,95238962a141557f464b8e22c8057211" + ], + "errors": [ + [ + { + "id": "scaffold", + "single_end": false + }, + [ + "scaffold.block.switch.txt.gz:md5,64c8c2f5bbcb0ad0d8ccae753a11091c", + "scaffold.calibration.switch.txt.gz:md5,2881ea7e450a54b3717b204bc67f569b", + "scaffold.flipsAndSwitches.txt.gz:md5,a5312a42910984f914653d8d520c3711", + "scaffold.frequency.switch.txt.gz:md5,776aec0a106e2c3d4dd130a4e6e425a9", + "scaffold.sample.switch.txt.gz:md5,76879e436b6baf115cffa5cb1f73d2a8", + "scaffold.sample.typing.txt.gz:md5,725898bf9871c0a40cc44ef778cc0763", + "scaffold.type.switch.txt.gz:md5,22afc8680b98e7d205163642b2c0159a", + "scaffold.variant.switch.txt.gz:md5,074ee134a47bf41f55c947efb2a46392", + "scaffold.variant.typing.txt.gz:md5,8b3e30c7a19afc9d27194f1852c2a510" + ] + ] + ], + "versions": [ + "versions.yml:md5,95238962a141557f464b8e22c8057211" ] - ], - [ - "versions.yml:md5,95238962a141557f464b8e22c8057211" - ] + } ], "meta": { - "nf-test": "0.9.2", - "nextflow": "24.10.1" + "nf-test": "0.9.3", + "nextflow": "25.10.0" }, - "timestamp": "2024-11-22T19:13:32.038870261" + "timestamp": "2025-11-26T17:59:10.721267924" } } \ No newline at end of file diff --git a/modules/nf-core/shapeit5/switch/tests/nextflow.config b/modules/nf-core/shapeit5/switch/tests/nextflow.config index 5f84c07d3bdd..28775613ce50 100644 --- a/modules/nf-core/shapeit5/switch/tests/nextflow.config +++ b/modules/nf-core/shapeit5/switch/tests/nextflow.config @@ -1,5 +1,6 @@ process { withName: SHAPEIT5_PHASECOMMON { + cpus = 1 // needed for deterministic output ext.args = params.shapeit5_phasecommon_args } } diff --git a/subworkflows/nf-core/vcf_phase_shapeit5/main.nf b/subworkflows/nf-core/vcf_phase_shapeit5/main.nf index e10b44a10098..c1303d99667c 100644 --- a/subworkflows/nf-core/vcf_phase_shapeit5/main.nf +++ b/subworkflows/nf-core/vcf_phase_shapeit5/main.nf @@ -1,101 +1,110 @@ -include { BEDTOOLS_MAKEWINDOWS } from '../../../modules/nf-core/bedtools/makewindows/main.nf' -include { SHAPEIT5_PHASECOMMON } from '../../../modules/nf-core/shapeit5/phasecommon/main.nf' -include { SHAPEIT5_LIGATE } from '../../../modules/nf-core/shapeit5/ligate/main.nf' -include { BCFTOOLS_INDEX as VCF_INDEX1 } from '../../../modules/nf-core/bcftools/index/main.nf' -include { BCFTOOLS_INDEX as VCF_INDEX2 } from '../../../modules/nf-core/bcftools/index/main.nf' +include { GLIMPSE2_CHUNK } from '../../../modules/nf-core/glimpse2/chunk' +include { SHAPEIT5_PHASECOMMON } from '../../../modules/nf-core/shapeit5/phasecommon' +include { SHAPEIT5_LIGATE } from '../../../modules/nf-core/shapeit5/ligate' +include { BCFTOOLS_INDEX as BCFTOOLS_INDEX_1 } from '../../../modules/nf-core/bcftools/index' +include { BCFTOOLS_INDEX as BCFTOOLS_INDEX_2 } from '../../../modules/nf-core/bcftools/index' workflow VCF_PHASE_SHAPEIT5 { take: - ch_vcf // channel (mandatory): [ val(meta), path(vcf), path(csi), path(pedigree), val(region) ] - ch_ref // channel (optional) : [ val(meta), path(ref), path(csi) ] - ch_scaffold // channel (optional) : [ val(meta), path(scaffold), path(csi) ] - ch_map // channel (optional) : [ val(meta), path(map)] + ch_vcf // channel (mandatory) : [ [id, chr], vcf, index, pedigree, region ] + ch_chunks // channel (optional) : [ [id, chr], regionout ] + ch_ref // channel (optional) : [ [id, chr], vcf, index ] + ch_scaffold // channel (optional) : [ [id, chr], vcf, index ] + ch_map // channel (optional) : [ [id, chr], map] + chunk // val (mandatory) : boolean to activate/deactivate chunking step + chunk_model // channel (mandatory) : [ model ] main: - ch_versions = Channel.empty() - - // It is needed to generate a file containing the region to phase in a Chr \tab Start \tab End format - // The meta map needing to be conserved the following steps a required + ch_versions = channel.empty() + + if ( chunk == true ){ + // Error if pre-defined chunks are provided when chunking is activated + ch_chunks + .filter { _meta, regionout -> regionout.size() > 0 } + .subscribe { + error "ERROR: Cannot provide pre-defined chunks (regionin) when chunk=true. Please either set chunk=false to use provided chunks, or remove input chunks to enable automatic chunking." + } + + // Chunk reference panel + ch_vcf_map = ch_vcf + .combine(ch_map, by: 0) + .map{ + meta, vcf, index, _pedigree, region, gmap -> [ + meta, vcf, index, region, gmap + ] + } + + GLIMPSE2_CHUNK ( ch_vcf_map, chunk_model ) + ch_versions = ch_versions.mix( GLIMPSE2_CHUNK.out.versions.first() ) + + ch_chunks = GLIMPSE2_CHUNK.out.chunk_chr + .splitCsv(header: [ + 'ID', 'Chr', 'RegionBuf', 'RegionCnk', 'WindowCm', + 'WindowMb', 'NbTotVariants', 'NbComVariants' + ], sep: "\t", skip: 0) + .map { meta, rows -> [meta, rows["RegionBuf"]]} + } + + ch_chunks + .filter { _meta, regionout -> regionout.size() == 0 } + .subscribe { + error "ERROR: ch_chunks channel is empty. Please provide a valid channel or set chunk parameter to true." + } - // Keep the meta map and the region in two separated channel but keed id field to link them back - ch_region = ch_vcf - .multiMap { meta, _vcf, _csi, _pedigree, region -> - metadata: [ meta.id, meta] - region : [ meta.id, region] + // Make channel with all parameters + ch_parameters = ch_vcf + .combine(ch_map, by: 0) + .combine(ch_ref, by: 0) + .combine(ch_scaffold, by: 0) + .combine(ch_chunks, by: 0) + + ch_parameters.ifEmpty{ + error "ERROR: join operation resulted in an empty channel. Please provide a valid ch_map, ch_ref, ch_scaffold and ch_chunks channel as input (same meta map)." + } + + // Rearrange channel for phasing + ch_phase_input = ch_parameters + .map{ + meta, vcf, index, pedigree, _region, gmap, ref_vcf, ref_index, scaffold_vcf, scaffold_index, regionbuf -> [ + meta + ["regionout": regionbuf], vcf, index, pedigree, regionbuf, + ref_vcf, ref_index, scaffold_vcf, scaffold_index, gmap + ] } - // Create the File in bed format and use the meta id for the file name - ch_merged_region = ch_region.region - .collectFile { metaid, region -> ["${metaid}.bed", region.replace(":","\t").replace("-","\t")] } - .map { file -> [file.baseName, file] } - - // Link back the meta map with the file - ch_region_file = ch_region.metadata - .join(ch_merged_region, failOnMismatch:true, failOnDuplicate:true) - .map { _mid, meta, region_file -> [meta, region_file]} - - BEDTOOLS_MAKEWINDOWS(ch_region_file) - ch_versions = ch_versions.mix(BEDTOOLS_MAKEWINDOWS.out.versions.first()) - - ch_chunk_output = BEDTOOLS_MAKEWINDOWS.out.bed - .splitCsv(header: ['Chr', 'Start', 'End'], sep: "\t", skip: 0) - .map { meta, it -> [meta, it["Chr"]+":"+it["Start"]+"-"+it["End"]]} - - // Count the number of chunks - ch_chunks_number = BEDTOOLS_MAKEWINDOWS.out.bed - .map { meta, bed -> [meta, bed.countLines().intValue()]} - - ch_phase_input = ch_vcf - .map { meta, vcf, index, pedigree, _region -> - [meta, vcf, index, pedigree] } - .combine(ch_chunk_output, by:0) - .map { meta, vcf, index, pedigree, chunk -> - [meta + [id: "${meta.id}_${chunk.replace(":","-")}"], // The meta.id field need to be modified to be unique for each chunk - vcf, index, pedigree, chunk]} - - SHAPEIT5_PHASECOMMON ( - ch_phase_input, - ch_ref, - ch_scaffold, - ch_map - ) - ch_versions = ch_versions.mix(SHAPEIT5_PHASECOMMON.out.versions) - - VCF_INDEX1(SHAPEIT5_PHASECOMMON.out.phased_variant) - ch_versions = ch_versions.mix(VCF_INDEX1.out.versions) + SHAPEIT5_PHASECOMMON (ch_phase_input) + ch_versions = ch_versions.mix(SHAPEIT5_PHASECOMMON.out.versions.first()) + + BCFTOOLS_INDEX_1(SHAPEIT5_PHASECOMMON.out.phased_variant) + ch_versions = ch_versions.mix(BCFTOOLS_INDEX_1.out.versions.first()) ch_ligate_input = SHAPEIT5_PHASECOMMON.out.phased_variant - .join(VCF_INDEX1.out.csi, failOnMismatch:true, failOnDuplicate:true) - .map{ meta, vcf, csi -> - def newmeta = meta + [id: meta.id.split("_")[0..-2].join("_")] - [newmeta, vcf, csi]} - .combine(ch_chunks_number, by:0) - .map{meta, vcf, csi, chunks_num -> - [groupKey(meta, chunks_num), vcf, csi]} - .groupTuple() - .map{ meta, vcf, csi -> - [ - meta.target, - vcf.sort { a, b -> - def aStart = a.getName().split('-')[-2].toInteger() - def bStart = b.getName().split('-')[-2].toInteger() - aStart <=> bStart - }, - csi - ] + .join( + BCFTOOLS_INDEX_1.out.csi + .mix(BCFTOOLS_INDEX_1.out.tbi), + failOnMismatch:true, failOnDuplicate:true + ) + .map{ meta, vcf, index -> + def keysToKeep = meta.keySet() - ['regionout'] + [ meta.subMap(keysToKeep), vcf, index ] } + .groupTuple() SHAPEIT5_LIGATE(ch_ligate_input) - ch_versions = ch_versions.mix(SHAPEIT5_LIGATE.out.versions) + ch_versions = ch_versions.mix(SHAPEIT5_LIGATE.out.versions.first()) + + BCFTOOLS_INDEX_2(SHAPEIT5_LIGATE.out.merged_variants) + ch_versions = ch_versions.mix(BCFTOOLS_INDEX_2.out.versions.first()) - VCF_INDEX2(SHAPEIT5_LIGATE.out.merged_variants) - ch_versions = ch_versions.mix(VCF_INDEX2.out.versions) + ch_vcf_index = SHAPEIT5_LIGATE.out.merged_variants + .join( + BCFTOOLS_INDEX_2.out.csi.mix(BCFTOOLS_INDEX_2.out.tbi), + failOnMismatch:true, failOnDuplicate:true + ) emit: - bed = BEDTOOLS_MAKEWINDOWS.out.bed // channel: [ val(meta), bed ] - variants_phased = SHAPEIT5_LIGATE.out.merged_variants // channel: [ val(meta), vcf ] - variants_index = VCF_INDEX2.out.csi // channel: [ val(meta), csi ] - versions = ch_versions // channel: [ versions.yml ] + chunks = ch_chunks // channel: [ [id, chr], regionout] + vcf_index = ch_vcf_index // channel: [ [id, chr], vcf, csi ] + versions = ch_versions // channel: [ versions.yml ] } diff --git a/subworkflows/nf-core/vcf_phase_shapeit5/meta.yml b/subworkflows/nf-core/vcf_phase_shapeit5/meta.yml index 54c8cd01acb9..d5bbd90fae06 100644 --- a/subworkflows/nf-core/vcf_phase_shapeit5/meta.yml +++ b/subworkflows/nf-core/vcf_phase_shapeit5/meta.yml @@ -1,71 +1,136 @@ # yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/subworkflows/yaml-schema.json -name: "vcf_phase_shapeit5" -description: Phase vcf panel with Shapeit5 tools +name: "VCF_PHASE_SHAPEIT5" +description: | + Subworkflow to phase a reference panel VCF file using SHAPEIT5. + The panel is first chunked by chromosome by glimpse2/chunk, + then genotypes are phased with shapeit5/phasecommon and + finally the chunks are merged back together by shapeit5/ligate by chromosomes. + Meta map of all channels will be used to perform joint operations. + "regionout" key will be added to the meta map to distinguish the different file + before ligation and therefore should not be used. keywords: - - chunk + - VCF - phase - - ligate - - index - - vcf + - shapeit5 + - haplotype components: - - bedtools/makewindows + - glimpse2/chunk - shapeit5/phasecommon - shapeit5/ligate - bcftools/index input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test' ] - ch_vcf: - type: file - description: | - Target dataset in VCF/BCF format defined at all variable positions. - Index file of the input VCF/BCF file containing genotype likelihoods. - Pedigree information in the following format: offspring father mother. - Target region, usually a full chromosome (e.g. chr20:1000000-2000000 or chr20). - The file could possibly be without GT field (for efficiency reasons a file containing only the positions is recommended). - Structure: [ val(meta), path(vcf), path(csi), path(pedigree), val(region) ] + description: Channel with target VCF files + structure: + - meta: + type: map + description: Metadata map + - vcf: + type: file + description: VCF file + pattern: "*.{vcf,bcf,vcf.gz}" + - index: + type: file + description: VCF index file + pattern: "*.{tbi,csi}" + - pedigree: + type: file + description: + Pedigree information in the following format: offspring father mother. + pattern: "*.{txt, tsv}" + - region: + type: string + description: Region to perform the chunking on for GLIMPSE2_chunk + pattern: "[chr]+[0-9]+:[0-9]+-[0-9]+" + - ch_chunks: + description: Channel with region data + structure: + - meta: + type: map + description: Metadata map that will be combined with the input data map + - regionout: + type: string + description: Region to perform the phasing on + pattern: "[chr]+[0-9]+:[0-9]+-[0-9]+" - ch_ref: - type: file - description: | - Reference panel of haplotypes in VCF/BCF format. - Index file of the Reference panel file. - Structure: [ val(meta), path(ref), path(csi) ] + description: Channel with reference phased VCF files + structure: + - meta: + type: map + description: Metadata map + - vcf: + type: file + description: VCF file + pattern: "*.{vcf,bcf,vcf.gz}" + - index: + type: file + description: VCF index file + pattern: "*.{tbi,csi}" - ch_scaffold: - type: file - description: | - Scaffold of haplotypes in VCF/BCF format. - Index file of the Scaffold of haplotypes file. - Structure: [ val(meta), path(scaffold), path(csi) ] + description: Channel with reference scaffold VCF files + structure: + - meta: + type: map + description: Metadata map + - vcf: + type: file + description: VCF file + pattern: "*.{vcf,bcf,vcf.gz}" + - index: + type: file + description: VCF index file + pattern: "*.{tbi,csi}" - ch_map: - type: file - description: File containing the genetic map. - Structure: [val(meta), path(map)] + description: Channel with genetic map data (optional) + structure: + - meta: + type: map + description: Metadata map + - map: + type: file + description: Map file in GLIMPSE format + pattern: "*.map" + - chunk: + type: boolean + description: Whether to perform chunking of the input data before imputation. + - chunk_model: + description: Chunk model for GLIMPSE2_chunk + type: string + enum: + - recursive + - sequential output: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test' ] - - bed: - type: file - description: BED file containing the windows - pattern: "*.bed" - - variants_phased: - type: file - description: Phased haplotypes in VCF/BCF format. - pattern: "*.{vcf,bcf,vcf.gz,bcf.gz}" - - variants_index: - type: file - description: CSI bcftools index - pattern: "*.csi" + - chunks: + description: Channel with chunks regions + structure: + - meta: + type: map + description: Metadata map + - regionout: + type: string + description: Region to perform the phasing on + pattern: "[chr]+[0-9]+:[0-9]+-[0-9]+" + - vcf_index: + description: Channel with phased VCF files + structure: + - meta: + type: map + description: Metadata map + - vcf: + type: file + description: VCF file + pattern: "*.{vcf,bcf,vcf.gz}" + - index: + type: file + description: VCF index file + pattern: "*.{tbi,csi}" - versions: - type: file - description: File containing software versions - pattern: "versions.yml" + description: Channel containing software versions file + structure: + - versions.yml: + type: file + description: File containing versions of the software used authors: - - "@LouisLeNezet" + - "@louislenezet" maintainers: - - "@LouisLeNezet" + - "@louislenezet" diff --git a/subworkflows/nf-core/vcf_phase_shapeit5/tests/main.nf.test b/subworkflows/nf-core/vcf_phase_shapeit5/tests/main.nf.test index 92912da95c5e..f0811c9ed5d7 100644 --- a/subworkflows/nf-core/vcf_phase_shapeit5/tests/main.nf.test +++ b/subworkflows/nf-core/vcf_phase_shapeit5/tests/main.nf.test @@ -10,8 +10,8 @@ nextflow_workflow { tag "subworkflows/vcf_phase_shapeit5" tag "vcf_phase_shapeit5" - tag "bedtools/makewindows" - tag "bedtools" + tag "glimpse2/chunk" + tag "glimpse2" tag "shapeit5/phasecommon" tag "shapeit5/ligate" tag "shapeit5" @@ -21,26 +21,29 @@ nextflow_workflow { tag "bcftools/view" tag "bcftools/pluginfilltags" - test("homo_sapiens - panel - [] - [] - []") { + test("homo_sapiens - target - no chunks, no ref, no scaffold, no map - one chromosome") { when { params { - bcftools_pluginfilltags_args = "--write-index" - bcftools_pluginfilltags_args2 = "-t AC,AN" + glimpse2_chunk_args = "--window-mb 0.1 --window-cm 0.1 --window-count 100 --buffer-mb 0.01 --buffer-cm 0.01 --buffer-count 10" + bcftools_pluginfilltags_args = "" + bcftools_pluginfilltags_args2 = "" bcftools_view_args = "" - bedtools_makewindows_args = "-w 60000 -s 40000" } workflow { """ input[0] = Channel.of([ - [id:'ref_panel', region: "chr22:16570065-16609999"], + [id:'test', chr: "22"], file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/1000GP.chr22.vcf.gz", checkIfExists:true), file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/1000GP.chr22.vcf.gz.csi", checkIfExists:true), - [], - "chr22:16570065-16609999" + [], // empty pedigree + "chr22:16570000-16610000" // Region to chunk ]) - input[1] = Channel.of([[],[],[]]).collect() - input[2] = Channel.of([[],[],[]]).collect() - input[3] = Channel.of([[],[]]).collect() + input[1] = Channel.of([[id:'test', chr: "22"], []]) // Chunks + input[2] = Channel.of([[id:'test', chr: "22"], [], []]) // Ref + input[3] = Channel.of([[id:'test', chr: "22"], [], []]) // Scaffold + input[4] = Channel.of([[id:'test', chr: "22"], []]) // Genetic map + input[5] = true // Perform chunks + input[6] = "recursive" // chunk_model """ } } @@ -48,25 +51,35 @@ nextflow_workflow { assertAll( { assert workflow.success }, { assert snapshot( - path(workflow.out.variants_phased[0][1]).vcf.summary, - file(workflow.out.variants_index[0][1]).name, - workflow.out.bed, + workflow.out.vcf_index.collect{[ + it[0], + path(it[1]).getFileName().toString(), + path(it[2]).getFileName().toString(), + path(it[1]).vcf.summary, + path(it[1]).vcf.variantsMD5 + ]}, + workflow.out.chunks, workflow.out.versions ).match() } ) } } - test("homo_sapiens - target - panel - [] - []") { + test("homo_sapiens - target, ref, chunks - no scaffold - two chromosomes") { setup { run("BCFTOOLS_VIEW") { script "../../../../modules/nf-core/bcftools/view/main.nf" process { """ - input[0] = Channel.of([ - [id:'NA12878', region:"chr22:16570065-16609999"], - file(params.modules_testdata_base_path + "genomics/homo_sapiens/illumina/vcf/NA12878.chr22.1X.vcf.gz", checkIfExists: true), - file(params.modules_testdata_base_path + "genomics/homo_sapiens/illumina/vcf/NA12878.chr22.1X.vcf.gz.csi", checkIfExists: true), + input[0] = Channel.of( + [ + [id:'NA12878', chr:"chr22"], + file(params.modules_testdata_base_path + "genomics/homo_sapiens/illumina/vcf/NA12878.chr21_22.1X.glimpse2.vcf.gz", checkIfExists: true), + file(params.modules_testdata_base_path + "genomics/homo_sapiens/illumina/vcf/NA12878.chr21_22.1X.glimpse2.vcf.gz.csi", checkIfExists: true), + ],[ + [id:'NA12878', chr:"chr21"], + file(params.modules_testdata_base_path + "genomics/homo_sapiens/illumina/vcf/NA12878.chr21_22.1X.glimpse2.vcf.gz", checkIfExists: true), + file(params.modules_testdata_base_path + "genomics/homo_sapiens/illumina/vcf/NA12878.chr21_22.1X.glimpse2.vcf.gz.csi", checkIfExists: true) ]) input[1] = [] input[2] = [] @@ -90,23 +103,46 @@ nextflow_workflow { } when { params { + glimpse2_chunk_args = "" bcftools_pluginfilltags_args = "--write-index" bcftools_pluginfilltags_args2 = "-t AC,AN" bcftools_view_args = "-Oz -e 'GT=\"./.\"||GT=\".\"' --write-index" - bedtools_makewindows_args = "-w 60000 -s 40000" } workflow { """ input[0] = BCFTOOLS_PLUGINFILLTAGS.out.vcf .join(BCFTOOLS_PLUGINFILLTAGS.out.csi) - .combine(Channel.of([[],"chr22:16570065-16609999"])) - input[1] = Channel.of([ - [panel:'1000GP'], + .combine(Channel.of([[], []])) // No regions needed as chunks are provided + input[1] = channel.of( + [[id:'NA12878', chr:"chr22"], "chr22:16570065-16597215"], + [[id:'NA12878', chr:"chr22"], "chr22:16587172-16609999"], + [[id:'NA12878', chr:"chr21"], "chr21:16570065-16597215"], + [[id:'NA12878', chr:"chr21"], "chr21:16587172-16609999"] + ) + input[2] = channel.of( + [ + [id:'NA12878', chr:"chr22"], file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/1000GP.chr22.vcf.gz", checkIfExists:true), - file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/1000GP.chr22.vcf.gz.csi", checkIfExists:true), - ]).collect() - input[2] = Channel.of([[],[],[]]).collect() - input[3] = Channel.of([[],[]]).collect() // Map can't be used on really small region (error: "Haploid underflow impossible to recover for [NA12878]") + file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/1000GP.chr22.vcf.gz.csi", checkIfExists:true) + ],[ + [id:'NA12878', chr:"chr21"], + file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/1000GP.chr21.vcf.gz", checkIfExists:true), + file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/1000GP.chr21.vcf.gz.csi", checkIfExists:true) + ]) + input[3] = Channel.of( + [[id:'NA12878', chr:"chr22"], [], []], + [[id:'NA12878', chr:"chr21"], [], []] + ) + input[4] = Channel.of( + [ + [id:'NA12878', chr:"chr22"], + file(params.modules_testdata_base_path + "genomics/homo_sapiens/genome/genetic_map/genome.GRCh38.chr22.glimpse.map", checkIfExists:true) + ],[ + [id:'NA12878', chr:"chr21"], + file(params.modules_testdata_base_path + "genomics/homo_sapiens/genome/genetic_map/genome.GRCh38.chr21.glimpse.map", checkIfExists:true) + ]) + input[5] = false // Perform chunks + input[6] = "recursive" // chunk_model """ } } @@ -114,36 +150,45 @@ nextflow_workflow { assertAll( { assert workflow.success }, { assert snapshot( - path(workflow.out.variants_phased[0][1]).vcf.variantsMD5, - file(workflow.out.variants_index[0][1]).name, - workflow.out.bed, + workflow.out.vcf_index.collect{[ + it[0], + path(it[1]).getFileName().toString(), + path(it[2]).getFileName().toString(), + path(it[1]).vcf.summary, + path(it[1]).vcf.header.getGenotypeSamples().sort(), + path(it[1]).vcf.variantsMD5 + ]}, + workflow.out.chunks, workflow.out.versions ).match() } ) } } - test("homo_sapiens - panel - [] - [] - [] -- stub") { + test("homo_sapiens - target - no error -- stub") { options "-stub" when { params { + glimpse2_chunk_args = "" bcftools_pluginfilltags_args = "--write-index" bcftools_pluginfilltags_args2 = "-t AC,AN" bcftools_view_args = "" - bedtools_makewindows_args = "-w 60000 -s 40000" } workflow { """ input[0] = Channel.of([ - [id:'ref_panel', region: "chr22:16570065-16609999"], + [id:'test', chr: "22"], file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/1000GP.chr22.vcf.gz", checkIfExists:true), file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/1000GP.chr22.vcf.gz.csi", checkIfExists:true), [], "chr22:16570065-16609999" ]) - input[1] = Channel.of([[],[],[]]).collect() - input[2] = Channel.of([[],[],[]]).collect() - input[3] = Channel.of([[],[]]).collect() + input[1] = Channel.of([[id:'test', chr: "22"], []]) // Chunks + input[2] = Channel.of([[id:'test', chr: "22"], [], []]) // Ref + input[3] = Channel.of([[id:'test', chr: "22"], [], []]) // Scaffold + input[4] = Channel.of([[id:'test', chr: "22"], []]) // Genetic map + input[5] = true // Perform chunks + input[6] = "recursive" // chunk_model """ } } @@ -152,9 +197,114 @@ nextflow_workflow { { assert workflow.success }, { assert snapshot( workflow.out, - workflow.out.versions.collect{ path(it).yaml } + workflow.out.versions ).match() } ) } } + + test("homo_sapiens - target - error: empty channel -- stub") { + options "-stub" + when { + params { + glimpse2_chunk_args = "" + bcftools_pluginfilltags_args = "--write-index" + bcftools_pluginfilltags_args2 = "-t AC,AN" + bcftools_view_args = "" + } + workflow { + """ + input[0] = Channel.of([ + [id:'test', chr: "22"], + file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/1000GP.chr22.vcf.gz", checkIfExists:true), + file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/1000GP.chr22.vcf.gz.csi", checkIfExists:true), + [], + "chr22:16570065-16609999" + ]) + input[1] = Channel.of([[id:'test', chr: "22"], []]) // Chunks + input[2] = Channel.of([[id:'test', chr: "21"], [], []]) // Ref wrong chr result in empty channel + input[3] = Channel.of([[id:'test', chr: "22"], [], []]) // Scaffold + input[4] = Channel.of([[id:'test', chr: "22"], []]) // Genetic map + input[5] = true // Perform chunks + input[6] = "recursive" // chunk_model + """ + } + } + then { + assertAll( + { assert workflow.failed }, + { assert workflow.errorMessage.contains("ERROR: join operation resulted in an empty channel.") } + ) + } + } + + test("homo_sapiens - target - error: chunks provided and chunk is true -- stub") { + options "-stub" + when { + params { + glimpse2_chunk_args = "" + bcftools_pluginfilltags_args = "--write-index" + bcftools_pluginfilltags_args2 = "-t AC,AN" + bcftools_view_args = "" + } + workflow { + """ + input[0] = Channel.of([ + [id:'test', chr: "22"], + file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/1000GP.chr22.vcf.gz", checkIfExists:true), + file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/1000GP.chr22.vcf.gz.csi", checkIfExists:true), + [], + "chr22:16570065-16609999" + ]) + input[1] = Channel.of([[id:'test', chr: "22"], "chr22:16570065-16597215"]) // Chunks provided + input[2] = Channel.of([[id:'test', chr: "22"], [], []]) // Ref + input[3] = Channel.of([[id:'test', chr: "22"], [], []]) // Scaffold + input[4] = Channel.of([[id:'test', chr: "22"], []]) // Genetic map + input[5] = true // Perform chunks + input[6] = "recursive" // chunk_model + """ + } + } + then { + assertAll( + { assert workflow.failed }, + { assert workflow.errorMessage.contains("ERROR: Cannot provide pre-defined chunks (regionin) when chunk=true.") } + ) + } + } + + test("homo_sapiens - target - error: chunks not provided and chunk is false -- stub") { + options "-stub" + when { + params { + glimpse2_chunk_args = "" + bcftools_pluginfilltags_args = "--write-index" + bcftools_pluginfilltags_args2 = "-t AC,AN" + bcftools_view_args = "" + } + workflow { + """ + input[0] = Channel.of([ + [id:'test', chr: "22"], + file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/1000GP.chr22.vcf.gz", checkIfExists:true), + file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/1000GP.chr22.vcf.gz.csi", checkIfExists:true), + [], + "chr22:16570065-16609999" + ]) + input[1] = Channel.of([[id:'test', chr: "22"], []]) // Chunks provided + input[2] = Channel.of([[id:'test', chr: "22"], [], []]) // Ref + input[3] = Channel.of([[id:'test', chr: "22"], [], []]) // Scaffold + input[4] = Channel.of([[id:'test', chr: "22"], []]) // Genetic map + input[5] = false // Perform chunks + input[6] = "recursive" // chunk_model + """ + } + } + then { + assertAll( + { assert workflow.failed }, + { assert workflow.errorMessage.contains("ERROR: ch_chunks channel is empty. Please provide a valid channel or set chunk parameter to true.") } + ) + } + } } diff --git a/subworkflows/nf-core/vcf_phase_shapeit5/tests/main.nf.test.snap b/subworkflows/nf-core/vcf_phase_shapeit5/tests/main.nf.test.snap index 9e1a24d25bac..a385cb7ecd1d 100644 --- a/subworkflows/nf-core/vcf_phase_shapeit5/tests/main.nf.test.snap +++ b/subworkflows/nf-core/vcf_phase_shapeit5/tests/main.nf.test.snap @@ -1,162 +1,182 @@ { - "homo_sapiens - panel - [] - [] - [] -- stub": { + "homo_sapiens - target, ref, chunks - no scaffold - two chromosomes": { + "content": [ + [ + [ + { + "id": "NA12878", + "chr": "chr21" + }, + "NA12878_chr21.ligate.vcf.gz", + "NA12878_chr21.ligate.vcf.gz.csi", + "VcfFile [chromosomes=[chr21], sampleCount=1, variantCount=836, phased=true, phasedAutodetect=true]", + [ + "NA12878" + ], + "844efd081d7714b3ca7213bc40de8e08" + ], + [ + { + "id": "NA12878", + "chr": "chr22" + }, + "NA12878_chr22.ligate.vcf.gz", + "NA12878_chr22.ligate.vcf.gz.csi", + "VcfFile [chromosomes=[chr22], sampleCount=1, variantCount=903, phased=true, phasedAutodetect=true]", + [ + "NA12878" + ], + "4865103995e99b948f11a963056c37c6" + ] + ], + [ + [ + { + "id": "NA12878", + "chr": "chr21" + }, + "chr21:16570065-16597215" + ], + [ + { + "id": "NA12878", + "chr": "chr21" + }, + "chr21:16587172-16609999" + ], + [ + { + "id": "NA12878", + "chr": "chr22" + }, + "chr22:16570065-16597215" + ], + [ + { + "id": "NA12878", + "chr": "chr22" + }, + "chr22:16587172-16609999" + ] + ], + [ + "versions.yml:md5,254449d2f1f8b59fd027d7dd4a79a986", + "versions.yml:md5,7b8268b08a3d5105126bc590d3348379", + "versions.yml:md5,9109283c8ca50c507c4b77329cc0ba79", + "versions.yml:md5,e5ace4193f44f483d16d945b9eeae318" + ] + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.0" + }, + "timestamp": "2025-12-04T14:44:18.920849936" + }, + "homo_sapiens - target - no error -- stub": { "content": [ { "0": [ [ { - "id": "ref_panel", - "region": "chr22:16570065-16609999" + "id": "test", + "chr": "22" }, - "ref_panel_chr22_16570065-16609999.bed:md5,5fe28e8d463b69936c780a072e5293f4" + "0" ] ], "1": [ [ { - "id": "ref_panel", - "region": "chr22:16570065-16609999" + "id": "test", + "chr": "22" }, - "ref_panel.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + "test_22.ligate.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940", + "test_22.ligate.vcf.gz.csi:md5,d41d8cd98f00b204e9800998ecf8427e" ] ], "2": [ - [ - { - "id": "ref_panel", - "region": "chr22:16570065-16609999" - }, - "ref_panel.vcf.gz.csi:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ], - "3": [ "versions.yml:md5,254449d2f1f8b59fd027d7dd4a79a986", - "versions.yml:md5,4d5ed4cc7484687ea3931bb1339adb13", - "versions.yml:md5,72e5f16c9a9a5f341b1177ecb9df9e4c", - "versions.yml:md5,7a1a629b544fc13d94254b275a762854", - "versions.yml:md5,7b8268b08a3d5105126bc590d3348379" - ], - "bed": [ - [ - { - "id": "ref_panel", - "region": "chr22:16570065-16609999" - }, - "ref_panel_chr22_16570065-16609999.bed:md5,5fe28e8d463b69936c780a072e5293f4" - ] + "versions.yml:md5,714bb0db6e2d39cf0042359a64915bc6", + "versions.yml:md5,7b8268b08a3d5105126bc590d3348379", + "versions.yml:md5,9109283c8ca50c507c4b77329cc0ba79", + "versions.yml:md5,e5ace4193f44f483d16d945b9eeae318" ], - "variants_index": [ + "chunks": [ [ { - "id": "ref_panel", - "region": "chr22:16570065-16609999" + "id": "test", + "chr": "22" }, - "ref_panel.vcf.gz.csi:md5,d41d8cd98f00b204e9800998ecf8427e" + "0" ] ], - "variants_phased": [ + "vcf_index": [ [ { - "id": "ref_panel", - "region": "chr22:16570065-16609999" + "id": "test", + "chr": "22" }, - "ref_panel.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + "test_22.ligate.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940", + "test_22.ligate.vcf.gz.csi:md5,d41d8cd98f00b204e9800998ecf8427e" ] ], "versions": [ "versions.yml:md5,254449d2f1f8b59fd027d7dd4a79a986", - "versions.yml:md5,4d5ed4cc7484687ea3931bb1339adb13", - "versions.yml:md5,72e5f16c9a9a5f341b1177ecb9df9e4c", - "versions.yml:md5,7a1a629b544fc13d94254b275a762854", - "versions.yml:md5,7b8268b08a3d5105126bc590d3348379" + "versions.yml:md5,714bb0db6e2d39cf0042359a64915bc6", + "versions.yml:md5,7b8268b08a3d5105126bc590d3348379", + "versions.yml:md5,9109283c8ca50c507c4b77329cc0ba79", + "versions.yml:md5,e5ace4193f44f483d16d945b9eeae318" ] }, [ - { - "VCF_PHASE_SHAPEIT5:SHAPEIT5_PHASECOMMON": { - "shapeit5": "5.1.1" - } - }, - { - "VCF_PHASE_SHAPEIT5:VCF_INDEX1": { - "bcftools": 1.22 - } - }, - { - "VCF_PHASE_SHAPEIT5:BEDTOOLS_MAKEWINDOWS": { - "bedtools": "2.31.1" - } - }, - { - "VCF_PHASE_SHAPEIT5:VCF_INDEX2": { - "bcftools": 1.22 - } - }, - { - "VCF_PHASE_SHAPEIT5:SHAPEIT5_LIGATE": { - "shapeit5": "5.1.1" - } - } + "versions.yml:md5,254449d2f1f8b59fd027d7dd4a79a986", + "versions.yml:md5,714bb0db6e2d39cf0042359a64915bc6", + "versions.yml:md5,7b8268b08a3d5105126bc590d3348379", + "versions.yml:md5,9109283c8ca50c507c4b77329cc0ba79", + "versions.yml:md5,e5ace4193f44f483d16d945b9eeae318" ] ], "meta": { - "nf-test": "0.9.2", - "nextflow": "25.04.6" + "nf-test": "0.9.3", + "nextflow": "25.10.0" }, - "timestamp": "2025-09-15T09:06:23.433542" + "timestamp": "2025-12-04T14:44:31.05363112" }, - "homo_sapiens - target - panel - [] - []": { + "homo_sapiens - target - no chunks, no ref, no scaffold, no map - one chromosome": { "content": [ - "4865103995e99b948f11a963056c37c6", - "NA12878.vcf.gz.csi", [ [ { - "id": "NA12878", - "region": "chr22:16570065-16609999" + "id": "test", + "chr": "22" }, - "NA12878_chr22_16570065-16609999.bed:md5,d98a30331ba4743339f79d26c73c7402" + "test_22.ligate.vcf.gz", + "test_22.ligate.vcf.gz.csi", + "VcfFile [chromosomes=[chr22], sampleCount=3202, variantCount=903, phased=true, phasedAutodetect=true]", + "affeabaf0b686b35f66536890fa35c4b" ] ], - [ - "versions.yml:md5,254449d2f1f8b59fd027d7dd4a79a986", - "versions.yml:md5,4d5ed4cc7484687ea3931bb1339adb13", - "versions.yml:md5,72e5f16c9a9a5f341b1177ecb9df9e4c", - "versions.yml:md5,7a1a629b544fc13d94254b275a762854", - "versions.yml:md5,7b8268b08a3d5105126bc590d3348379" - ] - ], - "meta": { - "nf-test": "0.9.2", - "nextflow": "25.04.6" - }, - "timestamp": "2025-09-15T09:06:12.444987" - }, - "homo_sapiens - panel - [] - [] - []": { - "content": [ - "VcfFile [chromosomes=[chr22], sampleCount=3202, variantCount=903, phased=true, phasedAutodetect=true]", - "ref_panel.vcf.gz.csi", [ [ { - "id": "ref_panel", - "region": "chr22:16570065-16609999" + "id": "test", + "chr": "22" }, - "ref_panel_chr22_16570065-16609999.bed:md5,d98a30331ba4743339f79d26c73c7402" + "chr22:16570065-16609999" ] ], [ "versions.yml:md5,254449d2f1f8b59fd027d7dd4a79a986", - "versions.yml:md5,4d5ed4cc7484687ea3931bb1339adb13", - "versions.yml:md5,72e5f16c9a9a5f341b1177ecb9df9e4c", - "versions.yml:md5,7a1a629b544fc13d94254b275a762854", - "versions.yml:md5,7b8268b08a3d5105126bc590d3348379" + "versions.yml:md5,714bb0db6e2d39cf0042359a64915bc6", + "versions.yml:md5,7b8268b08a3d5105126bc590d3348379", + "versions.yml:md5,9109283c8ca50c507c4b77329cc0ba79", + "versions.yml:md5,e5ace4193f44f483d16d945b9eeae318" ] ], "meta": { - "nf-test": "0.9.2", - "nextflow": "25.04.6" + "nf-test": "0.9.3", + "nextflow": "25.10.0" }, - "timestamp": "2025-09-15T09:05:47.693311" + "timestamp": "2025-12-04T14:43:58.500525154" } } \ No newline at end of file diff --git a/subworkflows/nf-core/vcf_phase_shapeit5/tests/nextflow.config b/subworkflows/nf-core/vcf_phase_shapeit5/tests/nextflow.config index ebb0827f5123..e578757ec5c1 100644 --- a/subworkflows/nf-core/vcf_phase_shapeit5/tests/nextflow.config +++ b/subworkflows/nf-core/vcf_phase_shapeit5/tests/nextflow.config @@ -1,7 +1,25 @@ process { - withName: BEDTOOLS_MAKEWINDOWS { - ext.args = params.bedtools_makewindows_args - ext.prefix = { "${meta.id}_${meta.region.replace(":","_")}" } + withName: GLIMPSE2_CHUNK { + ext.args = { params.glimpse2_chunk_args } + ext.prefix = { "${meta.id}_${meta.chr}" } + } + + withName: SHAPEIT5_PHASECOMMON { + cpus = 1 // Needed to have deterministic output for testing + ext.args = "--seed 1" + ext.prefix = { "${meta.id}_${meta.chr}_${meta.regionout.replace(':', '-')}.phased" } + } + + withName: BCFTOOLS_INDEX_1 { + ext.args = '--csi' + } + + withName: SHAPEIT5_LIGATE { + ext.prefix = { "${meta.id}_${meta.chr}.ligate" } + } + + withName: BCFTOOLS_INDEX_2 { + ext.args = '--csi' } withName: BCFTOOLS_VIEW {