From b2e7be55427d0bc063373d45e505d0c6b5509ed2 Mon Sep 17 00:00:00 2001 From: Alfred Kedhammar Date: Mon, 28 Oct 2024 14:56:37 +0000 Subject: [PATCH 01/42] Fran's draft --- modules.json | 5 ++ .../fastqscreen/fastqscreen/environment.yml | 9 ++ .../nf-core/fastqscreen/fastqscreen/main.nf | 54 ++++++++++++ .../nf-core/fastqscreen/fastqscreen/meta.yml | 44 ++++++++++ .../fastqscreen/tests/main.nf.test | 87 +++++++++++++++++++ .../fastqscreen/tests/main.nf.test.snap | 81 +++++++++++++++++ .../fastqscreen/fastqscreen/tests/tags.yml | 2 + nextflow.config | 1 + nextflow_schema.json | 5 ++ workflows/seqinspector.nf | 12 +++ 10 files changed, 300 insertions(+) create mode 100644 modules/nf-core/fastqscreen/fastqscreen/environment.yml create mode 100644 modules/nf-core/fastqscreen/fastqscreen/main.nf create mode 100644 modules/nf-core/fastqscreen/fastqscreen/meta.yml create mode 100644 modules/nf-core/fastqscreen/fastqscreen/tests/main.nf.test create mode 100644 modules/nf-core/fastqscreen/fastqscreen/tests/main.nf.test.snap create mode 100644 modules/nf-core/fastqscreen/fastqscreen/tests/tags.yml diff --git a/modules.json b/modules.json index 8e632d5..cb726e0 100644 --- a/modules.json +++ b/modules.json @@ -10,6 +10,11 @@ "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", "installed_by": ["modules"] }, + "fastqscreen/fastqscreen": { + "branch": "master", + "git_sha": "e1316cdcbef318b9cdfd35586423f8337c3d45f0", + "installed_by": ["modules"] + }, "multiqc": { "branch": "master", "git_sha": "cf17ca47590cc578dfb47db1c2a44ef86f89976d", diff --git a/modules/nf-core/fastqscreen/fastqscreen/environment.yml b/modules/nf-core/fastqscreen/fastqscreen/environment.yml new file mode 100644 index 0000000..5097f09 --- /dev/null +++ b/modules/nf-core/fastqscreen/fastqscreen/environment.yml @@ -0,0 +1,9 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +name: "fastqscreen_fastqscreen" +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - "bioconda::fastq-screen=0.15.3" diff --git a/modules/nf-core/fastqscreen/fastqscreen/main.nf b/modules/nf-core/fastqscreen/fastqscreen/main.nf new file mode 100644 index 0000000..8686f20 --- /dev/null +++ b/modules/nf-core/fastqscreen/fastqscreen/main.nf @@ -0,0 +1,54 @@ +process FASTQSCREEN_FASTQSCREEN { + tag "$meta.id" + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/fastq-screen:0.15.3--pl5321hdfd78af_0': + 'biocontainers/fastq-screen:0.15.3--pl5321hdfd78af_0'}" + + input: + tuple val(meta), path(reads) // .fastq files + path database + + output: + tuple val(meta), path("*.txt") , emit: txt + tuple val(meta), path("*.png") , emit: png + tuple val(meta), path("*.html"), emit: html + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def prefix = task.ext.prefix ?: "${meta.id}" + def args = task.ext.args ?: "" + + """ + fastq_screen --threads ${task.cpus} \\ + --aligner bowtie2 \\ + --conf ${database}/fastq_screen.conf \\ + $reads \\ + $args \\ + --outdir . + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + fastqscreen: \$(echo \$(fastq_screen --version 2>&1) | sed 's/^.*FastQ Screen v//; s/ .*\$//') + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch test_1_screen.html + touch test_1_screen.png + touch test_1_screen.txt + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + fastqscreen: \$(echo \$(fastq_screen --version 2>&1) | sed 's/^.*FastQ Screen v//; s/ .*\$//') + END_VERSIONS + """ + +} diff --git a/modules/nf-core/fastqscreen/fastqscreen/meta.yml b/modules/nf-core/fastqscreen/fastqscreen/meta.yml new file mode 100644 index 0000000..623dacf --- /dev/null +++ b/modules/nf-core/fastqscreen/fastqscreen/meta.yml @@ -0,0 +1,44 @@ +name: fastqscreen_fastqscreen +description: Align reads to multiple reference genomes using fastq-screen +keywords: + - align + - map + - fasta + - fastq + - genome + - reference +tools: + - "fastqscreen": + description: "FastQ Screen allows you to screen a library of sequences in FastQ format against a set of sequence databases so you can see if the composition of the library matches with what you expect." + homepage: "https://www.bioinformatics.babraham.ac.uk/projects/fastq_screen/" + documentation: "https://stevenwingett.github.io/FastQ-Screen/" + tool_dev_url: "https://github.com/StevenWingett/FastQ-Screen/archive/refs/tags/v0.15.3.zip" + doi: "10.5281/zenodo.5838377" + licence: ["GPL-3.0-or-later"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - reads: + type: file + description: | + List of input FastQ files of size 1 and 2 for single-end and paired-end data, + respectively. + - database: + type: directory + description: fastq screen database folder containing config file and index folders + pattern: "FastQ_Screen_Genomes" +output: + - fastq_screen: + type: directory + description: Output fastq_screen file containing alignment statistics + pattern: "*.{_fq_screen}" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@snesic" + - "@JPejovicApis" diff --git a/modules/nf-core/fastqscreen/fastqscreen/tests/main.nf.test b/modules/nf-core/fastqscreen/fastqscreen/tests/main.nf.test new file mode 100644 index 0000000..6d858a4 --- /dev/null +++ b/modules/nf-core/fastqscreen/fastqscreen/tests/main.nf.test @@ -0,0 +1,87 @@ +nextflow_process { + + name "Test Process FASTQSCREEN_FASTQSCREEN" + script "../main.nf" + process "FASTQSCREEN_FASTQSCREEN" + + tag "modules" + tag "modules_nfcore" + tag "bowtie2/build" + tag "fastqscreen" + tag "fastqscreen/buildfromindex" + tag "fastqscreen/fastqscreen" + + setup { + + run("BOWTIE2_BUILD") { + script "../../../bowtie2/build/main.nf" + process { + """ + input[0] = Channel.from([ + [[id: "sarscov2"], file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true)], + [[id: "human"] , file(params.test_data['homo_sapiens']['genome']['genome_21_fasta'], checkIfExists: true)] + ]) + """ + } + } + + run("FASTQSCREEN_BUILDFROMINDEX") { + script "../../../fastqscreen/buildfromindex/main.nf" + process { + """ + input[0] = BOWTIE2_BUILD.out.index.map{meta, index -> meta.id}.collect() + input[1] = BOWTIE2_BUILD.out.index.map{meta, index -> index}.collect() + """ + } + } + } + + test("sarscov2 - human") { + + when { + process { + """ + input[0] = [[ id:'test', single_end:true ], + [file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true) ] + ] + input[1] = FASTQSCREEN_BUILDFROMINDEX.out.database + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.version).match("version") }, + { assert file(process.out.txt.get(0).get(1)).exists() }, + { assert file(process.out.png.get(0).get(1)).exists() }, + { assert file(process.out.html.get(0).get(1)).exists() } + ) + } + + } + + test("sarscov2 - human - stub") { + + options "-stub" + when { + process { + """ + input[0] = [[ id:'test', single_end:true ], + [file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true) ] + ] + input[1] = FASTQSCREEN_BUILDFROMINDEX.out.database + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + +} diff --git a/modules/nf-core/fastqscreen/fastqscreen/tests/main.nf.test.snap b/modules/nf-core/fastqscreen/fastqscreen/tests/main.nf.test.snap new file mode 100644 index 0000000..b245019 --- /dev/null +++ b/modules/nf-core/fastqscreen/fastqscreen/tests/main.nf.test.snap @@ -0,0 +1,81 @@ +{ + "version": { + "content": null, + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-04-30T14:22:56.541922683" + }, + "sarscov2 - human - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": true + }, + "test_1_screen.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + [ + { + "id": "test", + "single_end": true + }, + "test_1_screen.png:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + [ + { + "id": "test", + "single_end": true + }, + "test_1_screen.html:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "3": [ + "versions.yml:md5,8ac0239b5103352958d9a9e562b23103" + ], + "html": [ + [ + { + "id": "test", + "single_end": true + }, + "test_1_screen.html:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "png": [ + [ + { + "id": "test", + "single_end": true + }, + "test_1_screen.png:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "txt": [ + [ + { + "id": "test", + "single_end": true + }, + "test_1_screen.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,8ac0239b5103352958d9a9e562b23103" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-04-30T14:23:12.70922619" + } +} \ No newline at end of file diff --git a/modules/nf-core/fastqscreen/fastqscreen/tests/tags.yml b/modules/nf-core/fastqscreen/fastqscreen/tests/tags.yml new file mode 100644 index 0000000..b03bfb4 --- /dev/null +++ b/modules/nf-core/fastqscreen/fastqscreen/tests/tags.yml @@ -0,0 +1,2 @@ +fastqscreen/fastqscreen: + - "modules/nf-core/fastqscreen/fastqscreen/**" diff --git a/nextflow.config b/nextflow.config index 50c1ecb..210f03e 100644 --- a/nextflow.config +++ b/nextflow.config @@ -17,6 +17,7 @@ params { genome = null fasta = null igenomes_base = 's3://ngi-igenomes/igenomes/' + config_fastq_screen = "${projectDir}/modules/nf-core/fastqscreen/references" igenomes_ignore = false // MultiQC options diff --git a/nextflow_schema.json b/nextflow_schema.json index 88fd607..f18a5f1 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -65,6 +65,11 @@ "help_text": "This parameter is *mandatory* if `--genome` is not specified. If you don't have a BWA index available this will be generated for you automatically. Combine with `--save_reference` to save BWA index for future runs.", "fa_icon": "far fa-file-code" }, + "config_fastq_screen": { + "type": "string", + "description": "path to directory with fastq_screen config (fastq_screen.conf)", + "fa_icon": "fas fa-braille" + }, "igenomes_ignore": { "type": "boolean", "description": "Do not load the iGenomes reference config.", diff --git a/workflows/seqinspector.nf b/workflows/seqinspector.nf index ea62811..6ec9ca9 100644 --- a/workflows/seqinspector.nf +++ b/workflows/seqinspector.nf @@ -4,6 +4,7 @@ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ include { FASTQC } from '../modules/nf-core/fastqc/main' +include { FASTQSCREEN_FASTQSCREEN } from '../modules/nf-core/fastqscreen/fastqscreen/main' include { MULTIQC as MULTIQC_GLOBAL } from '../modules/nf-core/multiqc/main' include { MULTIQC as MULTIQC_PER_TAG } from '../modules/nf-core/multiqc/main' @@ -39,6 +40,17 @@ workflow SEQINSPECTOR { ch_multiqc_files = ch_multiqc_files.mix(FASTQC.out.zip) ch_versions = ch_versions.mix(FASTQC.out.versions.first()) + // + // MODULE: Run FastQ Screen + // + + FASTQSCREEN_FASTQSCREEN ( + ch_samplesheet, + Channel.fromPath(params.config_fastq_screen) + ) + ch_multiqc_files = ch_multiqc_files.mix(FASTQSCREEN_FASTQSCREEN.out.txt) + ch_versions = ch_versions.mix(FASTQSCREEN_FASTQSCREEN.out.versions.first()) + // // Collate and save software versions // From d833d913c5f0da94eeee1c9eaf44fd14c5fdb2fa Mon Sep 17 00:00:00 2001 From: Alfred Kedhammar Date: Mon, 28 Oct 2024 15:02:08 +0000 Subject: [PATCH 02/42] nf-core modules update --> fastqscreen --- modules.json | 2 +- .../fastqscreen/fastqscreen/environment.yml | 3 +- .../nf-core/fastqscreen/fastqscreen/main.nf | 10 +-- .../nf-core/fastqscreen/fastqscreen/meta.yml | 78 +++++++++++++------ .../fastqscreen/tests/main.nf.test | 32 +++++++- .../fastqscreen/tests/main.nf.test.snap | 65 ++++++++++++++-- .../fastqscreen/tests/nextflow.config | 5 ++ 7 files changed, 157 insertions(+), 38 deletions(-) create mode 100644 modules/nf-core/fastqscreen/fastqscreen/tests/nextflow.config diff --git a/modules.json b/modules.json index cb726e0..f8b4855 100644 --- a/modules.json +++ b/modules.json @@ -12,7 +12,7 @@ }, "fastqscreen/fastqscreen": { "branch": "master", - "git_sha": "e1316cdcbef318b9cdfd35586423f8337c3d45f0", + "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", "installed_by": ["modules"] }, "multiqc": { diff --git a/modules/nf-core/fastqscreen/fastqscreen/environment.yml b/modules/nf-core/fastqscreen/fastqscreen/environment.yml index 5097f09..c63c61e 100644 --- a/modules/nf-core/fastqscreen/fastqscreen/environment.yml +++ b/modules/nf-core/fastqscreen/fastqscreen/environment.yml @@ -1,9 +1,8 @@ --- # yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json -name: "fastqscreen_fastqscreen" channels: - conda-forge - bioconda - - defaults dependencies: - "bioconda::fastq-screen=0.15.3" + - bioconda::perl-gdgraph=1.54 diff --git a/modules/nf-core/fastqscreen/fastqscreen/main.nf b/modules/nf-core/fastqscreen/fastqscreen/main.nf index 8686f20..88c4e5c 100644 --- a/modules/nf-core/fastqscreen/fastqscreen/main.nf +++ b/modules/nf-core/fastqscreen/fastqscreen/main.nf @@ -12,10 +12,11 @@ process FASTQSCREEN_FASTQSCREEN { path database output: - tuple val(meta), path("*.txt") , emit: txt - tuple val(meta), path("*.png") , emit: png - tuple val(meta), path("*.html"), emit: html - path "versions.yml" , emit: versions + tuple val(meta), path("*.txt") , emit: txt + tuple val(meta), path("*.png") , emit: png , optional: true + tuple val(meta), path("*.html") , emit: html + tuple val(meta), path("*.fastq.gz"), emit: fastq, optional: true + path "versions.yml" , emit: versions when: task.ext.when == null || task.ext.when @@ -30,7 +31,6 @@ process FASTQSCREEN_FASTQSCREEN { --conf ${database}/fastq_screen.conf \\ $reads \\ $args \\ - --outdir . cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/nf-core/fastqscreen/fastqscreen/meta.yml b/modules/nf-core/fastqscreen/fastqscreen/meta.yml index 623dacf..39c86b4 100644 --- a/modules/nf-core/fastqscreen/fastqscreen/meta.yml +++ b/modules/nf-core/fastqscreen/fastqscreen/meta.yml @@ -9,36 +9,70 @@ keywords: - reference tools: - "fastqscreen": - description: "FastQ Screen allows you to screen a library of sequences in FastQ format against a set of sequence databases so you can see if the composition of the library matches with what you expect." + description: "FastQ Screen allows you to screen a library of sequences in FastQ + format against a set of sequence databases so you can see if the composition + of the library matches with what you expect." homepage: "https://www.bioinformatics.babraham.ac.uk/projects/fastq_screen/" documentation: "https://stevenwingett.github.io/FastQ-Screen/" tool_dev_url: "https://github.com/StevenWingett/FastQ-Screen/archive/refs/tags/v0.15.3.zip" doi: "10.5281/zenodo.5838377" licence: ["GPL-3.0-or-later"] + identifier: "" input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - reads: - type: file - description: | - List of input FastQ files of size 1 and 2 for single-end and paired-end data, - respectively. - - database: - type: directory - description: fastq screen database folder containing config file and index folders - pattern: "FastQ_Screen_Genomes" + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - reads: + type: file + description: | + List of input FastQ files of size 1 and 2 for single-end and paired-end data, + respectively. + - - database: + type: directory + description: fastq screen database folder containing config file and index folders + pattern: "FastQ_Screen_Genomes" output: - - fastq_screen: - type: directory - description: Output fastq_screen file containing alignment statistics - pattern: "*.{_fq_screen}" + - txt: + - meta: + type: map + description: Groovy Map containing sample information + - "*.txt": + type: file + description: TXT file containing alignment statistics + pattern: "*.txt" + - png: + - meta: + type: map + description: Groovy Map containing sample information + - "*.png": + type: file + description: PNG file with graphical representation of alignments + pattern: "*.png" + - html: + - meta: + type: map + description: Groovy Map containing sample information + - "*.html": + type: file + description: HTML file containing mapping results as a table and graphical representation + pattern: "*.html" + - fastq: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.fastq.gz": + type: file + description: FastQ file containing reads that did not align to any database (optional) + pattern: "*.fastq.gz" - versions: - type: file - description: File containing software versions - pattern: "versions.yml" + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" authors: - "@snesic" - "@JPejovicApis" diff --git a/modules/nf-core/fastqscreen/fastqscreen/tests/main.nf.test b/modules/nf-core/fastqscreen/fastqscreen/tests/main.nf.test index 6d858a4..71230a2 100644 --- a/modules/nf-core/fastqscreen/fastqscreen/tests/main.nf.test +++ b/modules/nf-core/fastqscreen/fastqscreen/tests/main.nf.test @@ -10,6 +10,8 @@ nextflow_process { tag "fastqscreen" tag "fastqscreen/buildfromindex" tag "fastqscreen/fastqscreen" + tag "buildfromindex" + tag "modules_fastqscreen" setup { @@ -52,7 +54,7 @@ nextflow_process { then { assertAll( { assert process.success }, - { assert snapshot(process.out.version).match("version") }, + { assert snapshot(process.out.version).match() }, { assert file(process.out.txt.get(0).get(1)).exists() }, { assert file(process.out.png.get(0).get(1)).exists() }, { assert file(process.out.html.get(0).get(1)).exists() } @@ -61,6 +63,34 @@ nextflow_process { } + test("sarscov2 - human - tags") { + config './nextflow.config' + when { + process { + """ + input[0] = [[ id:'test', single_end:false ], + [file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true)] + ] + input[1] = FASTQSCREEN_BUILDFROMINDEX.out.database + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.version, + process.out.txt, + process.out.fastq, + path(process.out.html.get(0).get(1)).readLines()[0..10], + path(process.out.png.get(0).get(1)).exists() + ).match() } + ) + } + + } + test("sarscov2 - human - stub") { options "-stub" diff --git a/modules/nf-core/fastqscreen/fastqscreen/tests/main.nf.test.snap b/modules/nf-core/fastqscreen/fastqscreen/tests/main.nf.test.snap index b245019..2afffde 100644 --- a/modules/nf-core/fastqscreen/fastqscreen/tests/main.nf.test.snap +++ b/modules/nf-core/fastqscreen/fastqscreen/tests/main.nf.test.snap @@ -1,11 +1,56 @@ { - "version": { + "sarscov2 - human": { "content": null, "meta": { - "nf-test": "0.8.4", - "nextflow": "23.10.1" + "nf-test": "0.9.0", + "nextflow": "24.04.4" }, - "timestamp": "2024-04-30T14:22:56.541922683" + "timestamp": "2024-08-31T05:42:29.972454812" + }, + "sarscov2 - human - tags": { + "content": [ + null, + [ + [ + { + "id": "test", + "single_end": false + }, + "test_1_screen.txt:md5,b0b0ea58bc26ebaa4d573a85e7898f25" + ] + ], + [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test_1.tagged.fastq.gz:md5,f742b162c43ce28f80b89608d5c47f3d", + "test_1.tagged_filter.fastq.gz:md5,28527a76bb0bb3fce0ee76afe01e90aa" + ] + ] + ], + [ + "", + "", + "", + "", + "", + "", + "\t", + "\tFastQ Screen Processing Report - test_1.fastq.gz", + "\t