From da8fa10752b2e6c7a11b6174ba0668683f327a90 Mon Sep 17 00:00:00 2001 From: "Moritz E. Beber" Date: Wed, 30 Oct 2024 15:26:01 +0100 Subject: [PATCH 1/4] refactor!: expect input of prefixes Breaking Change: Require a new input. Place sequences in sub-directory to prevent trying to compress them later. --- .../krakenuniq/preloadedkrakenuniq/main.nf | 103 ++++++-------- .../krakenuniq/preloadedkrakenuniq/meta.yml | 5 + .../preloadedkrakenuniq/tests/main.nf.test | 73 +++++++--- .../tests/main.nf.test.snap | 134 ++++++++++-------- 4 files changed, 184 insertions(+), 131 deletions(-) diff --git a/modules/nf-core/krakenuniq/preloadedkrakenuniq/main.nf b/modules/nf-core/krakenuniq/preloadedkrakenuniq/main.nf index d24f75d2932..4c33eece20c 100644 --- a/modules/nf-core/krakenuniq/preloadedkrakenuniq/main.nf +++ b/modules/nf-core/krakenuniq/preloadedkrakenuniq/main.nf @@ -8,7 +8,8 @@ process KRAKENUNIQ_PRELOADEDKRAKENUNIQ { 'biocontainers/krakenuniq:1.0.4--pl5321h6dccd9a_2' }" input: - tuple val(meta), path(sequences) + // We stage sequencing files in a sub-directory so we don't accidentally gzip them later. + tuple val(meta), path(sequences, name: 'sequences/*'), val(prefixes) val sequence_type path db val ram_chunk_size @@ -38,9 +39,18 @@ process KRAKENUNIQ_PRELOADEDKRAKENUNIQ { unclassified_option = save_output_reads ? "--unclassified-out \"${unclassified}\"" : '' def output_option = save_output ? '--output "\${PREFIX}.krakenuniq.classified.txt"' : '' def report = report_file ? '--report-file "\${PREFIX}.krakenuniq.report.txt"' : '' - compress_reads_command = save_output_reads ? "find . -name '*.${sequence_type}' -print0 | xargs -0 -t -P ${task.cpus} -I % gzip --no-name %" : '' + compress_reads_command = save_output_reads ? "find . -maxdepth 0 -name '*.${sequence_type}' -print0 | xargs -0 -t -P ${task.cpus} -I % gzip --no-name %" : '' + def command_inputs_file = '.inputs.txt' + if (meta.single_end) { + assert sequences.size() == prefixes.size() + command_inputs = [sequences, prefixes].transpose().collect { seq, prefix -> "${seq} ${prefix}" } + """ + cat <<-END_INPUTS > ${command_inputs_file} + ${command_inputs.join('\n ')} + END_INPUTS + krakenuniq \\ $args \\ --db $db \\ @@ -48,15 +58,7 @@ process KRAKENUNIQ_PRELOADEDKRAKENUNIQ { --preload-size $ram_chunk_size \\ --threads $task.cpus - strip_suffix() { - local result=\$1 - # Strip any file extensions. - echo "\${result%%.*}" - } - - printf "%s\\n" ${sequences} | while read FASTQ; do \\ - PREFIX="\$(strip_suffix "\${FASTQ}")" - + while IFS=' ' read -r SEQ PREFIX; do krakenuniq \\ --db $db \\ --threads $task.cpus \\ @@ -65,8 +67,8 @@ process KRAKENUNIQ_PRELOADEDKRAKENUNIQ { $unclassified_option \\ $classified_option \\ $args2 \\ - "\${FASTQ}" - done + "\${SEQ}" + done < ${command_inputs_file} $compress_reads_command @@ -76,7 +78,14 @@ process KRAKENUNIQ_PRELOADEDKRAKENUNIQ { END_VERSIONS """ } else { + assert sequences.size() / 2 == prefixes.size() + command_inputs = [sequences.collate(2), prefixes].transpose().collect { pair, prefix -> "${pair[0]} ${pair[1]} ${prefix}" } + """ + cat <<-END_INPUTS > ${command_inputs_file} + ${command_inputs.join('\n ')} + END_INPUTS + krakenuniq \\ $args \\ --db $db \\ @@ -84,18 +93,7 @@ process KRAKENUNIQ_PRELOADEDKRAKENUNIQ { --preload-size $ram_chunk_size \\ --threads $task.cpus - strip_suffix() { - local result - read result - # Strip any trailing dot or underscore. - result="\${result%_}" - echo "\${result%.}" - } - - printf "%s %s\\n" ${sequences} | while read FASTQ; do \\ - read -r -a FASTQ <<< "\${FASTQ}" - PREFIX="\$(printf "%s\\n" "\${FASTQ[@]}" | sed -e 'N;s/^\\(.*\\).*\\n\\1.*\$/\\1\\n\\1/;D' | strip_suffix)" - + while IFS=' ' read -r FIRST_SEQ SECOND_SEQ PREFIX; do krakenuniq \\ --db $db \\ --threads $task.cpus \\ @@ -105,8 +103,8 @@ process KRAKENUNIQ_PRELOADEDKRAKENUNIQ { $classified_option \\ --paired \\ $args2 \\ - "\${FASTQ[@]}" - done + "\${FIRST_SEQ}" "\${SECOND_SEQ}" + done < ${command_inputs_file} $compress_reads_command @@ -130,8 +128,17 @@ process KRAKENUNIQ_PRELOADEDKRAKENUNIQ { def output_option = save_output ? '--output "\${PREFIX}.krakenuniq.classified.txt"' : '' def report = report_file ? '--report-file "\${PREFIX}.krakenuniq.report.txt"' : '' compress_reads_command = save_output_reads ? "find . -name '*.${sequence_type}' -print0 | xargs -0 -t -P ${task.cpus} -I % gzip --no-name %" : '' + def command_inputs_file = '.inputs.txt' + if (meta.single_end) { + assert sequences.size() == prefixes.size() + command_inputs = [sequences, prefixes].transpose().collect { seq, prefix -> "${seq} ${prefix}" } + """ + cat <<-END_INPUTS > ${command_inputs_file} + ${command_inputs.join('\n ')} + END_INPUTS + echo krakenuniq \\ $args \\ --db $db \\ @@ -139,12 +146,6 @@ process KRAKENUNIQ_PRELOADEDKRAKENUNIQ { --preload-size $ram_chunk_size \\ --threads $task.cpus - strip_suffix() { - local result=\$1 - # Strip any file extensions. - echo "\${result%%.*}" - } - create_file() { echo '<3 nf-core' > "\$1" } @@ -153,11 +154,7 @@ process KRAKENUNIQ_PRELOADEDKRAKENUNIQ { echo '<3 nf-core' | gzip -n > "\$1" } - printf "%s\\n" ${sequences} | while read FASTQ; do \\ - echo "\${FASTQ}" - PREFIX="\$(strip_suffix "\${FASTQ}")" - echo "\${PREFIX}" - + while IFS=' ' read -r SEQ PREFIX; do echo krakenuniq \\ --db $db \\ --threads $task.cpus \\ @@ -166,13 +163,13 @@ process KRAKENUNIQ_PRELOADEDKRAKENUNIQ { $unclassified_option \\ $classified_option \\ $args2 \\ - "\${FASTQ}" + "\${SEQ}" create_file "\${PREFIX}.krakenuniq.classified.txt" create_file "\${PREFIX}.krakenuniq.report.txt" create_gzip_file "\${PREFIX}.classified.${sequence_type}.gz" create_gzip_file "\${PREFIX}.unclassified.${sequence_type}.gz" - done + done < ${command_inputs_file} echo "$compress_reads_command" @@ -182,7 +179,14 @@ process KRAKENUNIQ_PRELOADEDKRAKENUNIQ { END_VERSIONS """ } else { + assert sequences.size() / 2 == prefixes.size() + command_inputs = [sequences.collate(2), prefixes].transpose().collect { pair, prefix -> "${pair[0]} ${pair[1]} ${prefix}" } + """ + cat <<-END_INPUTS > ${command_inputs_file} + ${command_inputs.join('\n ')} + END_INPUTS + echo krakenuniq \\ $args \\ --db $db \\ @@ -190,14 +194,6 @@ process KRAKENUNIQ_PRELOADEDKRAKENUNIQ { --preload-size $ram_chunk_size \\ --threads $task.cpus - strip_suffix() { - local result - read result - # Strip any trailing dot or underscore. - result="\${result%_}" - echo "\${result%.}" - } - create_file() { echo '<3 nf-core' > "\$1" } @@ -206,12 +202,7 @@ process KRAKENUNIQ_PRELOADEDKRAKENUNIQ { echo '<3 nf-core' | gzip -n > "\$1" } - printf "%s %s\\n" ${sequences} | while read FASTQ; do \\ - read -r -a FASTQ <<< "\${FASTQ}" - echo "\${FASTQ[@]}" - PREFIX="\$(printf "%s\\n" "\${FASTQ[@]}" | sed -e 'N;s/^\\(.*\\).*\\n\\1.*\$/\\1\\n\\1/;D' | strip_suffix)" - echo "\${PREFIX}" - + while IFS=' ' read -r FIRST_SEQ SECOND_SEQ PREFIX; do echo krakenuniq \\ --db $db \\ --threads $task.cpus \\ @@ -221,13 +212,13 @@ process KRAKENUNIQ_PRELOADEDKRAKENUNIQ { $classified_option \\ --paired \\ $args2 \\ - "\${FASTQ[@]}" + "\${FIRST_SEQ}" "\${SECOND_SEQ}" create_file "\${PREFIX}.krakenuniq.classified.txt" create_file "\${PREFIX}.krakenuniq.report.txt" create_gzip_file "\${PREFIX}.merged.classified.${sequence_type}.gz" create_gzip_file "\${PREFIX}.merged.unclassified.${sequence_type}.gz" - done + done < ${command_inputs_file} echo "$compress_reads_command" diff --git a/modules/nf-core/krakenuniq/preloadedkrakenuniq/meta.yml b/modules/nf-core/krakenuniq/preloadedkrakenuniq/meta.yml index 1af2350d69b..8e674504d19 100644 --- a/modules/nf-core/krakenuniq/preloadedkrakenuniq/meta.yml +++ b/modules/nf-core/krakenuniq/preloadedkrakenuniq/meta.yml @@ -25,6 +25,11 @@ input: type: file description: List of input files containing sequences. All of them must be either in FASTA or FASTQ format. + - prefixes: + type: string + description: > + List of sample identifiers or filename prefixes. Must correspond in order and + length to the 'sequences', or to the number of sequencing pairs. - - sequence_type: type: string description: Format of all given sequencing files as literal string, either diff --git a/modules/nf-core/krakenuniq/preloadedkrakenuniq/tests/main.nf.test b/modules/nf-core/krakenuniq/preloadedkrakenuniq/tests/main.nf.test index 9e1d6700805..16da8e4544a 100644 --- a/modules/nf-core/krakenuniq/preloadedkrakenuniq/tests/main.nf.test +++ b/modules/nf-core/krakenuniq/preloadedkrakenuniq/tests/main.nf.test @@ -32,7 +32,11 @@ nextflow_process { """ input[0] = [ [id:'test', single_end:true], - file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.gz', checkIfExists: true) + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fasta/contigs.fasta', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.gz', checkIfExists: true), + ], + ['sample_1', 'sample.2'] ] input[1] = 'fasta' input[2] = UNTAR.out.untar.map { it[1] } @@ -45,13 +49,16 @@ nextflow_process { } then { + def reports = process.out.report.get(0).get(1).collect { report -> file(report).name } + def expected = ['sample_1.krakenuniq.report.txt', 'sample.2.krakenuniq.report.txt'] + assertAll ( { assert process.success }, // Report contains a timestamp. - { assert file(process.out.report.get(0).get(1)).name == 'genome.krakenuniq.report.txt' }, - { assert file(process.out.unclassified_reads.get(0).get(1)).name == 'genome.unclassified.fasta.gz' }, + { assertContainsInAnyOrder(reports, expected) }, { assert snapshot( process.out.classified_reads, + process.out.unclassified_reads, process.out.classified_assignment, process.out.versions ).match('fasta') }, @@ -69,7 +76,11 @@ nextflow_process { """ input[0] = [ [id:'test', single_end:true], - file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_interleaved.fastq.gz', checkIfExists: true) + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_interleaved.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test2_1.fastq.gz', checkIfExists: true), + ], + ['sample_1', 'sample.2'] ] input[1] = 'fastq' input[2] = UNTAR.out.untar.map { it[1] } @@ -82,10 +93,13 @@ nextflow_process { } then { + def reports = process.out.report.get(0).get(1).collect { report -> file(report).name } + def expected = ['sample_1.krakenuniq.report.txt', 'sample.2.krakenuniq.report.txt'] + assertAll ( { assert process.success }, // Report contains a timestamp. - { assert file(process.out.report.get(0).get(1)).name == 'test_interleaved.krakenuniq.report.txt' }, + { assertContainsInAnyOrder(reports, expected) }, { assert snapshot( process.out.classified_reads, process.out.unclassified_reads, @@ -108,8 +122,11 @@ nextflow_process { [id:'test', single_end:false], [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), - file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) - ] + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test2_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test2_2.fastq.gz', checkIfExists: true), + ], + ['sample_1', 'sample.2'] ] input[1] = 'fastq' input[2] = UNTAR.out.untar.map { it[1] } @@ -122,13 +139,16 @@ nextflow_process { } then { + def reports = process.out.report.get(0).get(1).collect { report -> file(report).name } + def expected = ['sample_1.krakenuniq.report.txt', 'sample.2.krakenuniq.report.txt'] + assertAll ( { assert process.success }, // Report contains a timestamp. - { assert file(process.out.report.get(0).get(1)).name == 'test.krakenuniq.report.txt' }, - { assert file(process.out.unclassified_reads.get(0).get(1)).name == 'test.merged.unclassified.fastq.gz' }, + { assertContainsInAnyOrder(reports, expected) }, { assert snapshot( process.out.classified_reads, + process.out.unclassified_reads, process.out.classified_assignment, process.out.versions ).match('fastq-paired') }, @@ -148,7 +168,11 @@ nextflow_process { """ input[0] = [ [id:'test', single_end:true], - file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.gz', checkIfExists: true) + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fasta/contigs.fasta', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.gz', checkIfExists: true), + ], + ['sample_1', 'sample.2'] ] input[1] = 'fasta' input[2] = UNTAR.out.untar.map { it[1] } @@ -161,11 +185,13 @@ nextflow_process { } then { + def reports = process.out.report.get(0).get(1).collect { report -> file(report).name } + def expected = ['sample_1.krakenuniq.report.txt', 'sample.2.krakenuniq.report.txt'] + assertAll ( { assert process.success }, // Report contains a timestamp. - { assert file(process.out.report.get(0).get(1)).name == 'genome.krakenuniq.report.txt' }, - { assert file(process.out.unclassified_reads.get(0).get(1)).name == 'genome.unclassified.fasta.gz' }, + { assertContainsInAnyOrder(reports, expected) }, { assert snapshot( process.out.classified_reads, process.out.unclassified_reads, @@ -188,7 +214,11 @@ nextflow_process { """ input[0] = [ [id:'test', single_end:true], - file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_interleaved.fastq.gz', checkIfExists: true) + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_interleaved.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test2_1.fastq.gz', checkIfExists: true), + ], + ['sample_1', 'sample.2'] ] input[1] = 'fastq' input[2] = UNTAR.out.untar.map { it[1] } @@ -201,10 +231,13 @@ nextflow_process { } then { + def reports = process.out.report.get(0).get(1).collect { report -> file(report).name } + def expected = ['sample_1.krakenuniq.report.txt', 'sample.2.krakenuniq.report.txt'] + assertAll ( { assert process.success }, // Report contains a timestamp. - { assert file(process.out.report.get(0).get(1)).name == 'test_interleaved.krakenuniq.report.txt' }, + { assertContainsInAnyOrder(reports, expected) }, { assert snapshot( process.out.classified_reads, process.out.unclassified_reads, @@ -229,8 +262,11 @@ nextflow_process { [id:'test', single_end:false], [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), - file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) - ] + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test2_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test2_2.fastq.gz', checkIfExists: true), + ], + ['sample_1', 'sample.2'] ] input[1] = 'fastq' input[2] = UNTAR.out.untar.map { it[1] } @@ -243,10 +279,13 @@ nextflow_process { } then { + def reports = process.out.report.get(0).get(1).collect { report -> file(report).name } + def expected = ['sample_1.krakenuniq.report.txt', 'sample.2.krakenuniq.report.txt'] + assertAll ( { assert process.success }, // Report contains a timestamp. - { assert file(process.out.report.get(0).get(1)).name == 'test.krakenuniq.report.txt' }, + { assertContainsInAnyOrder(reports, expected) }, { assert snapshot( process.out.classified_reads, process.out.unclassified_reads, diff --git a/modules/nf-core/krakenuniq/preloadedkrakenuniq/tests/main.nf.test.snap b/modules/nf-core/krakenuniq/preloadedkrakenuniq/tests/main.nf.test.snap index 2a431be8868..ca29cf7d725 100644 --- a/modules/nf-core/krakenuniq/preloadedkrakenuniq/tests/main.nf.test.snap +++ b/modules/nf-core/krakenuniq/preloadedkrakenuniq/tests/main.nf.test.snap @@ -7,7 +7,10 @@ "id": "test", "single_end": true }, - "test_interleaved.classified.fastq.gz:md5,a5704c35e6b573a45e3a344768fe6975" + [ + "sample.2.classified.fastq.gz:md5,a5704c35e6b573a45e3a344768fe6975", + "sample_1.classified.fastq.gz:md5,a5704c35e6b573a45e3a344768fe6975" + ] ] ], [ @@ -16,7 +19,10 @@ "id": "test", "single_end": true }, - "test_interleaved.unclassified.fastq.gz:md5,a5704c35e6b573a45e3a344768fe6975" + [ + "sample.2.unclassified.fastq.gz:md5,a5704c35e6b573a45e3a344768fe6975", + "sample_1.unclassified.fastq.gz:md5,a5704c35e6b573a45e3a344768fe6975" + ] ] ], [ @@ -25,7 +31,10 @@ "id": "test", "single_end": true }, - "test_interleaved.krakenuniq.classified.txt:md5,a5704c35e6b573a45e3a344768fe6975" + [ + "sample.2.krakenuniq.classified.txt:md5,a5704c35e6b573a45e3a344768fe6975", + "sample_1.krakenuniq.classified.txt:md5,a5704c35e6b573a45e3a344768fe6975" + ] ] ], [ @@ -33,30 +42,18 @@ ] ], "meta": { - "nf-test": "0.8.4", - "nextflow": "23.10.1" + "nf-test": "0.9.1", + "nextflow": "24.10.0" }, - "timestamp": "2024-05-06T11:21:36.338887437" + "timestamp": "2024-10-30T14:25:43.618168582" }, "fastq-single": { "content": [ [ - [ - { - "id": "test", - "single_end": true - }, - "test_interleaved.classified.fastq.gz:md5,3bd95021a8fbced1be8039b990b28176" - ] + ], [ - [ - { - "id": "test", - "single_end": true - }, - "test_interleaved.unclassified.fastq.gz:md5,143c7eb70ca93cc2d5ea98767c370424" - ] + ], [ [ @@ -64,7 +61,10 @@ "id": "test", "single_end": true }, - "test_interleaved.krakenuniq.classified.txt:md5,88a734a9a9216cb0770a77f36c9f4e78" + [ + "sample.2.krakenuniq.classified.txt:md5,f885fa1cdbfc5460af0772219991bf6d", + "sample_1.krakenuniq.classified.txt:md5,88a734a9a9216cb0770a77f36c9f4e78" + ] ] ], [ @@ -72,21 +72,18 @@ ] ], "meta": { - "nf-test": "0.8.4", - "nextflow": "23.10.1" + "nf-test": "0.9.1", + "nextflow": "24.10.0" }, - "timestamp": "2024-05-06T11:17:43.586414914" + "timestamp": "2024-10-30T15:21:33.941412985" }, "fastq-paired": { "content": [ [ - [ - { - "id": "test", - "single_end": false - }, - "test.merged.classified.fastq.gz:md5,dd7651837cce63e6108e28f4f019aedb" - ] + + ], + [ + ], [ [ @@ -94,7 +91,10 @@ "id": "test", "single_end": false }, - "test.krakenuniq.classified.txt:md5,ed5e19c7a88312cc04e483ac5f2579cd" + [ + "sample.2.krakenuniq.classified.txt:md5,ed5e19c7a88312cc04e483ac5f2579cd", + "sample_1.krakenuniq.classified.txt:md5,ed5e19c7a88312cc04e483ac5f2579cd" + ] ] ], [ @@ -102,10 +102,10 @@ ] ], "meta": { - "nf-test": "0.8.4", - "nextflow": "23.10.1" + "nf-test": "0.9.1", + "nextflow": "24.10.0" }, - "timestamp": "2024-05-06T11:37:46.718293365" + "timestamp": "2024-10-30T15:21:43.622100223" }, "fasta-stub": { "content": [ @@ -115,7 +115,10 @@ "id": "test", "single_end": true }, - "genome.classified.fasta.gz:md5,a5704c35e6b573a45e3a344768fe6975" + [ + "sample.2.classified.fasta.gz:md5,a5704c35e6b573a45e3a344768fe6975", + "sample_1.classified.fasta.gz:md5,a5704c35e6b573a45e3a344768fe6975" + ] ] ], [ @@ -124,7 +127,10 @@ "id": "test", "single_end": true }, - "genome.unclassified.fasta.gz:md5,a5704c35e6b573a45e3a344768fe6975" + [ + "sample.2.unclassified.fasta.gz:md5,a5704c35e6b573a45e3a344768fe6975", + "sample_1.unclassified.fasta.gz:md5,a5704c35e6b573a45e3a344768fe6975" + ] ] ], [ @@ -133,7 +139,10 @@ "id": "test", "single_end": true }, - "genome.krakenuniq.classified.txt:md5,a5704c35e6b573a45e3a344768fe6975" + [ + "sample.2.krakenuniq.classified.txt:md5,a5704c35e6b573a45e3a344768fe6975", + "sample_1.krakenuniq.classified.txt:md5,a5704c35e6b573a45e3a344768fe6975" + ] ] ], [ @@ -141,10 +150,10 @@ ] ], "meta": { - "nf-test": "0.8.4", - "nextflow": "23.10.1" + "nf-test": "0.9.1", + "nextflow": "24.10.0" }, - "timestamp": "2024-05-06T11:28:27.729550991" + "timestamp": "2024-10-30T14:25:33.871634213" }, "fastq-paired-stub": { "content": [ @@ -154,7 +163,10 @@ "id": "test", "single_end": false }, - "test.merged.classified.fastq.gz:md5,a5704c35e6b573a45e3a344768fe6975" + [ + "sample.2.merged.classified.fastq.gz:md5,a5704c35e6b573a45e3a344768fe6975", + "sample_1.merged.classified.fastq.gz:md5,a5704c35e6b573a45e3a344768fe6975" + ] ] ], [ @@ -163,7 +175,10 @@ "id": "test", "single_end": false }, - "test.merged.unclassified.fastq.gz:md5,a5704c35e6b573a45e3a344768fe6975" + [ + "sample.2.merged.unclassified.fastq.gz:md5,a5704c35e6b573a45e3a344768fe6975", + "sample_1.merged.unclassified.fastq.gz:md5,a5704c35e6b573a45e3a344768fe6975" + ] ] ], [ @@ -172,7 +187,10 @@ "id": "test", "single_end": false }, - "test.krakenuniq.classified.txt:md5,a5704c35e6b573a45e3a344768fe6975" + [ + "sample.2.krakenuniq.classified.txt:md5,a5704c35e6b573a45e3a344768fe6975", + "sample_1.krakenuniq.classified.txt:md5,a5704c35e6b573a45e3a344768fe6975" + ] ] ], [ @@ -180,21 +198,18 @@ ] ], "meta": { - "nf-test": "0.8.4", - "nextflow": "23.10.1" + "nf-test": "0.9.1", + "nextflow": "24.10.0" }, - "timestamp": "2024-05-05T20:06:20.262529457" + "timestamp": "2024-10-30T14:25:54.663232573" }, "fasta": { "content": [ [ - [ - { - "id": "test", - "single_end": true - }, - "genome.classified.fasta.gz:md5,e73599798195a519ba2565c3f0275b93" - ] + + ], + [ + ], [ [ @@ -202,7 +217,10 @@ "id": "test", "single_end": true }, - "genome.krakenuniq.classified.txt:md5,8aafacd89a6aac98aaf512df0a7493d1" + [ + "sample.2.krakenuniq.classified.txt:md5,8aafacd89a6aac98aaf512df0a7493d1", + "sample_1.krakenuniq.classified.txt:md5,2bea6c2195c400a909a2d4cca2e3045e" + ] ] ], [ @@ -210,9 +228,9 @@ ] ], "meta": { - "nf-test": "0.8.4", - "nextflow": "23.10.1" + "nf-test": "0.9.1", + "nextflow": "24.10.0" }, - "timestamp": "2024-05-06T11:36:00.24752418" + "timestamp": "2024-10-30T14:45:02.199077563" } } \ No newline at end of file From e02d2c69721838a5776c5df00d405bf98508c352 Mon Sep 17 00:00:00 2001 From: "Moritz E. Beber" Date: Mon, 4 Nov 2024 11:49:21 +0100 Subject: [PATCH 2/4] fix: always use list of files --- modules/nf-core/krakenuniq/preloadedkrakenuniq/main.nf | 2 ++ 1 file changed, 2 insertions(+) diff --git a/modules/nf-core/krakenuniq/preloadedkrakenuniq/main.nf b/modules/nf-core/krakenuniq/preloadedkrakenuniq/main.nf index 4c33eece20c..5311e4f3c8c 100644 --- a/modules/nf-core/krakenuniq/preloadedkrakenuniq/main.nf +++ b/modules/nf-core/krakenuniq/preloadedkrakenuniq/main.nf @@ -29,6 +29,7 @@ process KRAKENUNIQ_PRELOADEDKRAKENUNIQ { script: assert sequence_type in ['fasta', 'fastq'] + sequences = sequences instanceof List ? sequences : [sequences] def args = task.ext.args ?: '' def args2 = task.ext.args ?: '' @@ -117,6 +118,7 @@ process KRAKENUNIQ_PRELOADEDKRAKENUNIQ { stub: assert sequence_type in ['fasta', 'fastq'] + sequences = sequences instanceof List ? sequences : [sequences] def args = task.ext.args ?: '' def args2 = task.ext.args ?: '' From 2b2ce4258a3f4cba83b963046de22c040730c8e9 Mon Sep 17 00:00:00 2001 From: "Moritz E. Beber" Date: Tue, 5 Nov 2024 15:29:17 +0100 Subject: [PATCH 3/4] refactor: change separator from space to tab --- .../krakenuniq/preloadedkrakenuniq/main.nf | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/modules/nf-core/krakenuniq/preloadedkrakenuniq/main.nf b/modules/nf-core/krakenuniq/preloadedkrakenuniq/main.nf index 5311e4f3c8c..7062028f40c 100644 --- a/modules/nf-core/krakenuniq/preloadedkrakenuniq/main.nf +++ b/modules/nf-core/krakenuniq/preloadedkrakenuniq/main.nf @@ -45,7 +45,7 @@ process KRAKENUNIQ_PRELOADEDKRAKENUNIQ { if (meta.single_end) { assert sequences.size() == prefixes.size() - command_inputs = [sequences, prefixes].transpose().collect { seq, prefix -> "${seq} ${prefix}" } + command_inputs = [sequences, prefixes].transpose().collect { seq, prefix -> "${seq}\t${prefix}" } """ cat <<-END_INPUTS > ${command_inputs_file} @@ -59,7 +59,7 @@ process KRAKENUNIQ_PRELOADEDKRAKENUNIQ { --preload-size $ram_chunk_size \\ --threads $task.cpus - while IFS=' ' read -r SEQ PREFIX; do + while IFS='\t' read -r SEQ PREFIX; do krakenuniq \\ --db $db \\ --threads $task.cpus \\ @@ -80,7 +80,7 @@ process KRAKENUNIQ_PRELOADEDKRAKENUNIQ { """ } else { assert sequences.size() / 2 == prefixes.size() - command_inputs = [sequences.collate(2), prefixes].transpose().collect { pair, prefix -> "${pair[0]} ${pair[1]} ${prefix}" } + command_inputs = [sequences.collate(2), prefixes].transpose().collect { pair, prefix -> "${pair[0]}\t${pair[1]}\t${prefix}" } """ cat <<-END_INPUTS > ${command_inputs_file} @@ -94,7 +94,7 @@ process KRAKENUNIQ_PRELOADEDKRAKENUNIQ { --preload-size $ram_chunk_size \\ --threads $task.cpus - while IFS=' ' read -r FIRST_SEQ SECOND_SEQ PREFIX; do + while IFS='\t' read -r FIRST_SEQ SECOND_SEQ PREFIX; do krakenuniq \\ --db $db \\ --threads $task.cpus \\ @@ -134,7 +134,7 @@ process KRAKENUNIQ_PRELOADEDKRAKENUNIQ { if (meta.single_end) { assert sequences.size() == prefixes.size() - command_inputs = [sequences, prefixes].transpose().collect { seq, prefix -> "${seq} ${prefix}" } + command_inputs = [sequences, prefixes].transpose().collect { seq, prefix -> "${seq}\t${prefix}" } """ cat <<-END_INPUTS > ${command_inputs_file} @@ -156,7 +156,7 @@ process KRAKENUNIQ_PRELOADEDKRAKENUNIQ { echo '<3 nf-core' | gzip -n > "\$1" } - while IFS=' ' read -r SEQ PREFIX; do + while IFS='\t' read -r SEQ PREFIX; do echo krakenuniq \\ --db $db \\ --threads $task.cpus \\ @@ -182,7 +182,7 @@ process KRAKENUNIQ_PRELOADEDKRAKENUNIQ { """ } else { assert sequences.size() / 2 == prefixes.size() - command_inputs = [sequences.collate(2), prefixes].transpose().collect { pair, prefix -> "${pair[0]} ${pair[1]} ${prefix}" } + command_inputs = [sequences.collate(2), prefixes].transpose().collect { pair, prefix -> "${pair[0]}\t${pair[1]}\t${prefix}" } """ cat <<-END_INPUTS > ${command_inputs_file} @@ -204,7 +204,7 @@ process KRAKENUNIQ_PRELOADEDKRAKENUNIQ { echo '<3 nf-core' | gzip -n > "\$1" } - while IFS=' ' read -r FIRST_SEQ SECOND_SEQ PREFIX; do + while IFS='\t' read -r FIRST_SEQ SECOND_SEQ PREFIX; do echo krakenuniq \\ --db $db \\ --threads $task.cpus \\ From 6ef7b396cef14db63975964b6fb7b75c428d28c1 Mon Sep 17 00:00:00 2001 From: "Moritz E. Beber" Date: Tue, 5 Nov 2024 15:33:13 +0100 Subject: [PATCH 4/4] docs: add descriptive comments to commands --- .../nf-core/krakenuniq/preloadedkrakenuniq/main.nf | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/modules/nf-core/krakenuniq/preloadedkrakenuniq/main.nf b/modules/nf-core/krakenuniq/preloadedkrakenuniq/main.nf index 7062028f40c..3a31fcb40fe 100644 --- a/modules/nf-core/krakenuniq/preloadedkrakenuniq/main.nf +++ b/modules/nf-core/krakenuniq/preloadedkrakenuniq/main.nf @@ -48,10 +48,12 @@ process KRAKENUNIQ_PRELOADEDKRAKENUNIQ { command_inputs = [sequences, prefixes].transpose().collect { seq, prefix -> "${seq}\t${prefix}" } """ + # Store the batch of samples for later command input. cat <<-END_INPUTS > ${command_inputs_file} ${command_inputs.join('\n ')} END_INPUTS + # Preload the KrakenUniq database into memory. krakenuniq \\ $args \\ --db $db \\ @@ -59,6 +61,7 @@ process KRAKENUNIQ_PRELOADEDKRAKENUNIQ { --preload-size $ram_chunk_size \\ --threads $task.cpus + # Run the KrakenUniq classification on each sample in the batch. while IFS='\t' read -r SEQ PREFIX; do krakenuniq \\ --db $db \\ @@ -83,10 +86,12 @@ process KRAKENUNIQ_PRELOADEDKRAKENUNIQ { command_inputs = [sequences.collate(2), prefixes].transpose().collect { pair, prefix -> "${pair[0]}\t${pair[1]}\t${prefix}" } """ + # Store the batch of samples for later command input. cat <<-END_INPUTS > ${command_inputs_file} ${command_inputs.join('\n ')} END_INPUTS + # Preload the KrakenUniq database into memory. krakenuniq \\ $args \\ --db $db \\ @@ -94,6 +99,7 @@ process KRAKENUNIQ_PRELOADEDKRAKENUNIQ { --preload-size $ram_chunk_size \\ --threads $task.cpus + # Run the KrakenUniq classification on each sample in the batch. while IFS='\t' read -r FIRST_SEQ SECOND_SEQ PREFIX; do krakenuniq \\ --db $db \\ @@ -137,10 +143,12 @@ process KRAKENUNIQ_PRELOADEDKRAKENUNIQ { command_inputs = [sequences, prefixes].transpose().collect { seq, prefix -> "${seq}\t${prefix}" } """ + # Store the batch of samples for later command input. cat <<-END_INPUTS > ${command_inputs_file} ${command_inputs.join('\n ')} END_INPUTS + # Preload the KrakenUniq database into memory. echo krakenuniq \\ $args \\ --db $db \\ @@ -156,6 +164,7 @@ process KRAKENUNIQ_PRELOADEDKRAKENUNIQ { echo '<3 nf-core' | gzip -n > "\$1" } + # Run the KrakenUniq classification on each sample in the batch. while IFS='\t' read -r SEQ PREFIX; do echo krakenuniq \\ --db $db \\ @@ -185,10 +194,12 @@ process KRAKENUNIQ_PRELOADEDKRAKENUNIQ { command_inputs = [sequences.collate(2), prefixes].transpose().collect { pair, prefix -> "${pair[0]}\t${pair[1]}\t${prefix}" } """ + # Store the batch of samples for later command input. cat <<-END_INPUTS > ${command_inputs_file} ${command_inputs.join('\n ')} END_INPUTS + # Preload the KrakenUniq database into memory. echo krakenuniq \\ $args \\ --db $db \\ @@ -204,6 +215,7 @@ process KRAKENUNIQ_PRELOADEDKRAKENUNIQ { echo '<3 nf-core' | gzip -n > "\$1" } + # Run the KrakenUniq classification on each sample in the batch. while IFS='\t' read -r FIRST_SEQ SECOND_SEQ PREFIX; do echo krakenuniq \\ --db $db \\