From 66e2a6b3063fa116c9920e37424544ec9b0600f0 Mon Sep 17 00:00:00 2001 From: rhassaine Date: Tue, 9 Dec 2025 14:09:47 +0000 Subject: [PATCH 1/5] RSeQC split_bam.py module implementation Syntax edit Refractored version channel to topic Cleanup --- .../nf-core/rseqc/splitbam/environment.yml | 7 + modules/nf-core/rseqc/splitbam/main.nf | 47 +++++ modules/nf-core/rseqc/splitbam/meta.yml | 111 ++++++++++++ .../nf-core/rseqc/splitbam/tests/main.nf.test | 74 ++++++++ .../rseqc/splitbam/tests/main.nf.test.snap | 166 ++++++++++++++++++ 5 files changed, 405 insertions(+) create mode 100644 modules/nf-core/rseqc/splitbam/environment.yml create mode 100644 modules/nf-core/rseqc/splitbam/main.nf create mode 100644 modules/nf-core/rseqc/splitbam/meta.yml create mode 100644 modules/nf-core/rseqc/splitbam/tests/main.nf.test create mode 100644 modules/nf-core/rseqc/splitbam/tests/main.nf.test.snap diff --git a/modules/nf-core/rseqc/splitbam/environment.yml b/modules/nf-core/rseqc/splitbam/environment.yml new file mode 100644 index 000000000000..6cdb911dd007 --- /dev/null +++ b/modules/nf-core/rseqc/splitbam/environment.yml @@ -0,0 +1,7 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + - "bioconda::rseqc=5.0.4" diff --git a/modules/nf-core/rseqc/splitbam/main.nf b/modules/nf-core/rseqc/splitbam/main.nf new file mode 100644 index 000000000000..1eb8eaecc6fc --- /dev/null +++ b/modules/nf-core/rseqc/splitbam/main.nf @@ -0,0 +1,47 @@ +process RSEQC_SPLITBAM { + tag "$meta.id" + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/rseqc:5.0.4--pyhdfd78af_1' : + 'biocontainers/rseqc:5.0.4--pyhdfd78af_1' }" + + input: + tuple val(meta) , path(bam), path(bai) + tuple val(meta2), path(bed) + + output: + tuple val(meta), path("*.in.bam") , emit: in_bam + tuple val(meta), path("*.ex.bam") , emit: ex_bam + tuple val(meta), path("*.junk.bam"), emit: junk_bam + tuple val("${task.process}"), val('rseqc'), eval('split_bam.py --version | sed "s/split_bam.py //"'), emit: versions_rseqc, topic: versions + + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + split_bam.py \\ + -i $bam \\ + -r $bed \\ + -o $prefix \\ + $args + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.in.bam + touch ${prefix}.ex.bam + touch ${prefix}.junk.bam + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + rseqc: \$(split_bam.py --version 2>&1 | sed 's/split_bam.py //; s/ .*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/rseqc/splitbam/meta.yml b/modules/nf-core/rseqc/splitbam/meta.yml new file mode 100644 index 000000000000..e907d09609a8 --- /dev/null +++ b/modules/nf-core/rseqc/splitbam/meta.yml @@ -0,0 +1,111 @@ +name: rseqc_splitbam +description: Split BAM file based on gene list in BED format +keywords: + - bam + - split + - rnaseq + - quality control +tools: + - "rseqc": + description: | + RSeQC package provides a number of useful modules that can comprehensively evaluate + high throughput sequence data especially RNA-seq data. + homepage: "http://rseqc.sourceforge.net/" + documentation: "http://rseqc.sourceforge.net/" + tool_dev_url: "https://github.com/MonashBioinformaticsPlatform/RSeQC" + doi: "10.1093/bioinformatics/bts356" + licence: ["GPL-3.0-or-later"] + identifier: biotools:rseqc + +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1' ]` + - bam: + type: file + description: Sorted BAM file + pattern: "*.{bam}" + ontologies: + - edam: "http://edamontology.org/format_2572" # BAM + - bai: + type: file + description: BAM index file + pattern: "*.{bai}" + ontologies: + - edam: "http://edamontology.org/format_3327" # BAI + - - meta2: + type: map + description: | + Groovy Map containing reference information + e.g. `[ id:'reference' ]` + - bed: + type: file + description: Gene list in BED format to split the BAM by + pattern: "*.{bed}" + ontologies: + - edam: "http://edamontology.org/format_3003" # BED + +output: + in_bam: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1' ]` + - "*.in.bam": + type: file + description: BAM file containing reads that mapped to the gene list + pattern: "*.in.bam" + ontologies: + - edam: "http://edamontology.org/format_2572" # BAM + ex_bam: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1' ]` + - "*.ex.bam": + type: file + description: BAM file containing reads that did not map to the gene list + pattern: "*.ex.bam" + ontologies: + - edam: "http://edamontology.org/format_2572" # BAM + junk_bam: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1' ]` + - "*.junk.bam": + type: file + description: BAM file containing QC failed or unmapped reads + pattern: "*.junk.bam" + ontologies: + - edam: "http://edamontology.org/format_2572" # BAM + versions_rseqc: + - - ${task.process}: + type: string + description: The process the versions were collected from + - rseqc: + type: string + description: The tool name + - split_bam.py --version | sed "s/split_bam.py //": + type: eval + description: The expression to obtain the version of the tool +topics: + versions: + - - ${task.process}: + type: string + description: The process the versions were collected from + - rseqc: + type: string + description: The tool name + - split_bam.py --version | sed "s/split_bam.py //": + type: eval + description: The expression to obtain the version of the tool +authors: + - "@rhassaine" +maintainers: + - "@rhassaine" diff --git a/modules/nf-core/rseqc/splitbam/tests/main.nf.test b/modules/nf-core/rseqc/splitbam/tests/main.nf.test new file mode 100644 index 000000000000..b715a1a26222 --- /dev/null +++ b/modules/nf-core/rseqc/splitbam/tests/main.nf.test @@ -0,0 +1,74 @@ +nextflow_process { + + name "Test Process RSEQC_SPLITBAM" + script "../main.nf" + process "RSEQC_SPLITBAM" + + tag "modules" + tag "modules_nfcore" + tag "rseqc" + tag "rseqc/splitbam" + + test("sarscov2 - [[meta] - bam - bai] - [[meta2] - bed]") { + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + "genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam", checkIfExists: true), + file(params.modules_testdata_base_path + "genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam.bai", checkIfExists: true) + ]) + input[1] = Channel.of([ + [ id:'test_bed' ], // meta2 map + file(params.modules_testdata_base_path + "genomics/sarscov2/genome/bed/test.bed12", checkIfExists: true) + ]) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out, + process.out.findAll { key, val -> key.startsWith("versions")} + ).match() } + ) + } + + } + + test("sarscov2 - [[meta] - bam - bai] - [[meta2] - bed] - stub") { + + options "-stub" + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + "genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam", checkIfExists: true), + file(params.modules_testdata_base_path + "genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam.bai", checkIfExists: true) + ]) + input[1] = Channel.of([ + [ id:'test_bed' ], // meta2 map + file(params.modules_testdata_base_path + "genomics/sarscov2/genome/bed/test.bed12", checkIfExists: true) + ]) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out, + process.out.findAll { key, val -> key.startsWith("versions")} + ).match() } + ) + } + + } + +} diff --git a/modules/nf-core/rseqc/splitbam/tests/main.nf.test.snap b/modules/nf-core/rseqc/splitbam/tests/main.nf.test.snap new file mode 100644 index 000000000000..b83ad079edda --- /dev/null +++ b/modules/nf-core/rseqc/splitbam/tests/main.nf.test.snap @@ -0,0 +1,166 @@ +{ + "sarscov2 - [[meta] - bam - bai] - [[meta2] - bed] - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.in.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + [ + { + "id": "test" + }, + "test.ex.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + [ + { + "id": "test" + }, + "test.junk.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "3": [ + [ + "RSEQC_SPLITBAM", + "rseqc", + "5.0.4" + ] + ], + "ex_bam": [ + [ + { + "id": "test" + }, + "test.ex.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "in_bam": [ + [ + { + "id": "test" + }, + "test.in.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "junk_bam": [ + [ + { + "id": "test" + }, + "test.junk.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions_rseqc": [ + [ + "RSEQC_SPLITBAM", + "rseqc", + "5.0.4" + ] + ] + }, + { + "versions_rseqc": [ + [ + "RSEQC_SPLITBAM", + "rseqc", + "5.0.4" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "25.10.2" + }, + "timestamp": "2025-12-09T16:18:46.071286373" + }, + "sarscov2 - [[meta] - bam - bai] - [[meta2] - bed]": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.in.bam:md5,1b683a36569410a655b2c2cd5312a9de" + ] + ], + "1": [ + [ + { + "id": "test" + }, + "test.ex.bam:md5,de1a667ed02b677907c441792a9fd651" + ] + ], + "2": [ + [ + { + "id": "test" + }, + "test.junk.bam:md5,db5d68a1eef108c6bef2090beb0941bd" + ] + ], + "3": [ + [ + "RSEQC_SPLITBAM", + "rseqc", + "5.0.4" + ] + ], + "ex_bam": [ + [ + { + "id": "test" + }, + "test.ex.bam:md5,de1a667ed02b677907c441792a9fd651" + ] + ], + "in_bam": [ + [ + { + "id": "test" + }, + "test.in.bam:md5,1b683a36569410a655b2c2cd5312a9de" + ] + ], + "junk_bam": [ + [ + { + "id": "test" + }, + "test.junk.bam:md5,db5d68a1eef108c6bef2090beb0941bd" + ] + ], + "versions_rseqc": [ + [ + "RSEQC_SPLITBAM", + "rseqc", + "5.0.4" + ] + ] + }, + { + "versions_rseqc": [ + [ + "RSEQC_SPLITBAM", + "rseqc", + "5.0.4" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "25.10.2" + }, + "timestamp": "2025-12-09T16:18:40.168278662" + } +} \ No newline at end of file From 97b903baaae5fc7a43997839de300edb1ef5e0cc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rayan=20Hassa=C3=AFne?= Date: Wed, 10 Dec 2025 09:36:21 +0100 Subject: [PATCH 2/5] Update modules/nf-core/rseqc/splitbam/main.nf Co-authored-by: Felix Lenner <52530259+fellen31@users.noreply.github.com> --- modules/nf-core/rseqc/splitbam/main.nf | 5 ----- 1 file changed, 5 deletions(-) diff --git a/modules/nf-core/rseqc/splitbam/main.nf b/modules/nf-core/rseqc/splitbam/main.nf index 1eb8eaecc6fc..0b6c3f0d5187 100644 --- a/modules/nf-core/rseqc/splitbam/main.nf +++ b/modules/nf-core/rseqc/splitbam/main.nf @@ -38,10 +38,5 @@ process RSEQC_SPLITBAM { touch ${prefix}.in.bam touch ${prefix}.ex.bam touch ${prefix}.junk.bam - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - rseqc: \$(split_bam.py --version 2>&1 | sed 's/split_bam.py //; s/ .*\$//') - END_VERSIONS """ } From 1ea8976e63439d82cdaee6d2aade94c518ea6dc2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rayan=20Hassa=C3=AFne?= Date: Wed, 10 Dec 2025 09:36:30 +0100 Subject: [PATCH 3/5] Update modules/nf-core/rseqc/splitbam/tests/main.nf.test Co-authored-by: Felix Lenner <52530259+fellen31@users.noreply.github.com> --- modules/nf-core/rseqc/splitbam/tests/main.nf.test | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/modules/nf-core/rseqc/splitbam/tests/main.nf.test b/modules/nf-core/rseqc/splitbam/tests/main.nf.test index b715a1a26222..8e32925fef78 100644 --- a/modules/nf-core/rseqc/splitbam/tests/main.nf.test +++ b/modules/nf-core/rseqc/splitbam/tests/main.nf.test @@ -31,8 +31,7 @@ nextflow_process { assertAll( { assert process.success }, { assert snapshot( - process.out, - process.out.findAll { key, val -> key.startsWith("versions")} + process.out ).match() } ) } From b63568830a85b7c965464e2ae3cf584c603a1ed5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rayan=20Hassa=C3=AFne?= Date: Wed, 10 Dec 2025 09:36:38 +0100 Subject: [PATCH 4/5] Update modules/nf-core/rseqc/splitbam/tests/main.nf.test Co-authored-by: Felix Lenner <52530259+fellen31@users.noreply.github.com> --- modules/nf-core/rseqc/splitbam/tests/main.nf.test | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/modules/nf-core/rseqc/splitbam/tests/main.nf.test b/modules/nf-core/rseqc/splitbam/tests/main.nf.test index 8e32925fef78..87b80f2943f6 100644 --- a/modules/nf-core/rseqc/splitbam/tests/main.nf.test +++ b/modules/nf-core/rseqc/splitbam/tests/main.nf.test @@ -62,8 +62,7 @@ nextflow_process { assertAll( { assert process.success }, { assert snapshot( - process.out, - process.out.findAll { key, val -> key.startsWith("versions")} + process.out ).match() } ) } From a0aab2d0d019bd8e5669aac1478b660046db52ef Mon Sep 17 00:00:00 2001 From: rhassaine Date: Wed, 10 Dec 2025 08:53:29 +0000 Subject: [PATCH 5/5] Updated snapshot --- .../rseqc/splitbam/tests/main.nf.test.snap | 22 ++----------------- 1 file changed, 2 insertions(+), 20 deletions(-) diff --git a/modules/nf-core/rseqc/splitbam/tests/main.nf.test.snap b/modules/nf-core/rseqc/splitbam/tests/main.nf.test.snap index b83ad079edda..4d83f7131288 100644 --- a/modules/nf-core/rseqc/splitbam/tests/main.nf.test.snap +++ b/modules/nf-core/rseqc/splitbam/tests/main.nf.test.snap @@ -64,22 +64,13 @@ "5.0.4" ] ] - }, - { - "versions_rseqc": [ - [ - "RSEQC_SPLITBAM", - "rseqc", - "5.0.4" - ] - ] } ], "meta": { "nf-test": "0.9.2", "nextflow": "25.10.2" }, - "timestamp": "2025-12-09T16:18:46.071286373" + "timestamp": "2025-12-10T08:52:06.92323219" }, "sarscov2 - [[meta] - bam - bai] - [[meta2] - bed]": { "content": [ @@ -146,21 +137,12 @@ "5.0.4" ] ] - }, - { - "versions_rseqc": [ - [ - "RSEQC_SPLITBAM", - "rseqc", - "5.0.4" - ] - ] } ], "meta": { "nf-test": "0.9.2", "nextflow": "25.10.2" }, - "timestamp": "2025-12-09T16:18:40.168278662" + "timestamp": "2025-12-10T08:52:00.227785889" } } \ No newline at end of file