diff --git a/CHANGELOG.md b/CHANGELOG.md index 61d8dbf51..c069c585e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -35,6 +35,7 @@ - Update GitHub Actions ([#3237](https://github.com/nf-core/tools/pull/3237)) - add `--dir/-d` option to schema commands ([#3247](https://github.com/nf-core/tools/pull/3247)) - Update pre-commit hook astral-sh/ruff-pre-commit to v0.7.1 ([#3250](https://github.com/nf-core/tools/pull/3250)) +- Add `downstream_samplesheets` to skip_features and GENERATE_DOWNSTREAM_SAMPLESHEETS to template ([#3261](https://github.com/nf-core/tools/pull/3261)) ## [v3.0.2 - Titanium Tapir Patch](https://github.com/nf-core/tools/releases/tag/3.0.2) - [2024-10-11] diff --git a/nf_core/pipeline-template/conf/test.config b/nf_core/pipeline-template/conf/test.config index bea6f670d..016d5639c 100644 --- a/nf_core/pipeline-template/conf/test.config +++ b/nf_core/pipeline-template/conf/test.config @@ -31,4 +31,10 @@ params { // Genome references genome = 'R64-1-1' {%- endif %} + + {% if downstream_samplesheet -%} + // Downstream samplesheets + generate_downstream_samplesheets = true + generate_pipeline_samplesheets = 'rnaseq,sarek' + {%- endif %} } diff --git a/nf_core/pipeline-template/conf/test_full.config b/nf_core/pipeline-template/conf/test_full.config index aa3b70c1e..f525e636a 100644 --- a/nf_core/pipeline-template/conf/test_full.config +++ b/nf_core/pipeline-template/conf/test_full.config @@ -26,4 +26,10 @@ params { // Fasta references fasta = params.pipelines_testdata_base_path + 'viralrecon/genome/NC_045512.2/GCF_009858895.2_ASM985889v3_genomic.200409.fna.gz' {%- endif %} + + {% if downstream_samplesheet -%} + // Downstream samplesheets + generate_downstream_samplesheets = true + generate_pipeline_samplesheets = 'sarek,rnaseq' + {%- endif %} } diff --git a/nf_core/pipeline-template/nextflow.config b/nf_core/pipeline-template/nextflow.config index 599998f38..664df3702 100644 --- a/nf_core/pipeline-template/nextflow.config +++ b/nf_core/pipeline-template/nextflow.config @@ -29,6 +29,13 @@ params { {% if citations %}multiqc_methods_description = null{% endif %} {%- endif %} + {% if downstream_samplesheet -%} + // Downstream samplesheet generation + generate_downstream_samplesheets = false + generate_pipeline_samplesheets = null + generate_pipeline_samplesheets_format = 'csv' + {%- endif %} + // Boilerplate options outdir = null {% if modules %}publish_dir_mode = 'copy'{% endif %} diff --git a/nf_core/pipeline-template/nextflow_schema.json b/nf_core/pipeline-template/nextflow_schema.json index 4136a0b49..47a6624e6 100644 --- a/nf_core/pipeline-template/nextflow_schema.json +++ b/nf_core/pipeline-template/nextflow_schema.json @@ -86,6 +86,30 @@ } }, {%- endif %} + {% if downstream_samplesheet %} + "generate_samplesheet_options": { + "title": "Downstream pipeline samplesheet generation options", + "type": "object", + "fa_icon": "fas fa-university", + "description": "Options for generating input samplesheets for complementary downstream pipelines.", + "properties": { + "generate_downstream_samplesheets": { + "type": "boolean", + "description": "Turn on generation of samplesheets for downstream pipelines." + }, + "generate_pipeline_samplesheets": { + "type": "string", + "description": "Specify a comma separated string in quotes to specify which pipeline to generate a samplesheet for.", + "pattern": "^(rnaseq|sarek)(?:,(sarek|rnaseq)){0,1}" + }, + "generate_pipeline_samplesheets_format": { + "type": "string", + "description": "Specify the output format of the samplesheet.", + "enum": ["txt", "tsv", "csv"], + } + } + }, + {%- endif %} {%- if nf_core_configs %} "institutional_config_options": { "title": "Institutional config options", diff --git a/nf_core/pipeline-template/subworkflows/local/generate_downstream_samplesheets/main.nf b/nf_core/pipeline-template/subworkflows/local/generate_downstream_samplesheets/main.nf new file mode 100644 index 000000000..ddcd1309c --- /dev/null +++ b/nf_core/pipeline-template/subworkflows/local/generate_downstream_samplesheets/main.nf @@ -0,0 +1,105 @@ + + + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + SUBWORKFLOW SPECIFIC FOR RNASEQ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +// TODO nf-core: Update the following workflow to a specific pipeline +workflow SAMPLESHEET_RNASEQ { + take: + ch_reads + format + + main: + + //TODO nf-core: customise to your needs + ch_list_for_samplesheet = ch_reads.map { meta, reads -> + //TODO nf-core: Update the path to the published output directory of the reads + def out_path = file(params.outdir).toString() + '/relative/custom/path/' + def sample = meta.id + def fastq_1 = meta.single_end ? out_path + reads.getName() : out_path + reads[0].getName() + def fastq_2 = !meta.single_end ? out_path + reads[1].getName() : "" + def strandedness = "auto" + [sample: sample, fastq_1: fastq_1, fastq_2: fastq_2, strandedness: strandedness] + } + + channelToSamplesheet(ch_list_for_samplesheet, "${params.outdir}/downstream_samplesheets/rnaseq", format) +} + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + SUBWORKFLOW SPECIFIC FOR SAREK +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +// TODO nf-core: Update the following workflow to a specific pipeline +workflow SAMPLESHEET_SAREK { + take: + ch_reads + format + + main: + + //TODO nf-core: customise to your needs + ch_list_for_samplesheet = ch_reads.map { meta, reads -> + //TODO nf-core: Update the path to the published output directory of the reads + def out_path = file(params.outdir).toString() + '/relative/custom/path/' + def patient = meta.id + def sample = meta.id + def lane = "" + def fastq_1 = meta.single_end ? out_path + reads.getName() : out_path + reads[0].getName() + def fastq_2 = !meta.single_end ? out_path + reads[1].getName() : "" + [ patient: patient, sample: sample, lane: lane, fastq_1: fastq_1, fastq_2: fastq_2 ] + } + + channelToSamplesheet(ch_list_for_samplesheet, "${params.outdir}/downstream_samplesheets/sarek", format) +} + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + SUBWORKFLOW CALLING PIPELINE SPECIFIC SAMPLESHEET GENERATION +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +workflow GENERATE_DOWNSTREAM_SAMPLESHEETS { + take: + input + + main: + def downstreampipeline_names = params.generate_pipeline_samplesheets.split(",") + + // TODO nf-coee: Add more pipelines here + if (downstreampipeline_names.contains('rnaseq')) { + SAMPLESHEET_RNASEQ( + input, + params.generate_pipeline_samplesheets_format + ) + } + + if (downstreampipeline_names.contains('rnaseq')) { + SAMPLESHEET_SAREK( + input, + params.generate_pipeline_samplesheets_format + ) + } +} + +// Input can be any channel with a dictionary +def channelToSamplesheet(ch_list_for_samplesheet, path, format) { + def format_sep = [csv: ",", tsv: "\t", txt: "\t"][format] + + def ch_header = ch_list_for_samplesheet + + ch_header + .first() + .map { it.keySet().join(format_sep) } + .concat(ch_list_for_samplesheet.map { it.values().join(format_sep) }) + .collectFile( + name: "${path}.${format}", + newLine: true, + sort: false + ) +} diff --git a/nf_core/pipeline-template/subworkflows/local/generate_downstream_samplesheets/tests/main.function.nf.test b/nf_core/pipeline-template/subworkflows/local/generate_downstream_samplesheets/tests/main.function.nf.test new file mode 100644 index 000000000..8c79968af --- /dev/null +++ b/nf_core/pipeline-template/subworkflows/local/generate_downstream_samplesheets/tests/main.function.nf.test @@ -0,0 +1,79 @@ + +nextflow_function { + + name "Test Functions" + script "../main.nf" + tag 'subworkflows' + tag 'generate_downstream_samplesheets' + tag 'subworkflows/generate_downstream_samplesheets' + + test("Test Function channelToSamplesheet - csv") { + + function "channelToSamplesheet" + + when { + function { + """ + // define inputs of the function here. Example: + input[0] = Channel.of( + [ + sample: 'test-pe', + fastq_1: 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/sarscov2/illumina/fastq/test_1.fastq.gz', + fastq_2: 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/sarscov2/illumina/fastq/test_1.fastq.gz', + strandedness: 'auto' + ], + [ + sample: 'test-se', + fastq_1: 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/sarscov2/illumina/fastq/test_1.fastq.gz', + fastq_2: '', + strandedness: 'auto' + ]) + input[1] = "$outputDir/test.csv" + input[2] = "csv" + """ + } + } + + then { + assertAll( + { assert function.success }, + { assert snapshot(function.result).match() } + ) + } + } + + test("Test Function channelToSamplesheet - tsv") { + + function "channelToSamplesheet" + + when { + function { + """ + // define inputs of the function here. Example: + input[0] = Channel.of( + [ + sample: 'test-pe', + fastq_1: 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/sarscov2/illumina/fastq/test_1.fastq.gz', + fastq_2: 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/sarscov2/illumina/fastq/test_1.fastq.gz', + strandedness: 'auto' + ], + [ + sample: 'test-se', + fastq_1: 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/sarscov2/illumina/fastq/test_1.fastq.gz', + fastq_2: '', + strandedness: 'auto' + ]) + input[1] = "$outputDir/test.tsv" + input[2] = "tsv" + """ + } + } + + then { + assertAll( + { assert function.success }, + { assert snapshot(function.result).match() } + ) + } + } +} diff --git a/nf_core/pipeline-template/subworkflows/local/generate_downstream_samplesheets/tests/main.function.nf.test.snap b/nf_core/pipeline-template/subworkflows/local/generate_downstream_samplesheets/tests/main.function.nf.test.snap new file mode 100644 index 000000000..7256fc36f --- /dev/null +++ b/nf_core/pipeline-template/subworkflows/local/generate_downstream_samplesheets/tests/main.function.nf.test.snap @@ -0,0 +1,26 @@ +{ + "Test Function channelToSamplesheet - tsv": { + "content": [ + { + + } + ], + "meta": { + "nf-test": "0.9.1", + "nextflow": "24.10.0" + }, + "timestamp": "2024-10-30T10:06:13.403158303" + }, + "Test Function channelToSamplesheet - csv": { + "content": [ + { + + } + ], + "meta": { + "nf-test": "0.9.1", + "nextflow": "24.10.0" + }, + "timestamp": "2024-10-30T10:06:02.487840724" + } +} \ No newline at end of file diff --git a/nf_core/pipeline-template/subworkflows/local/generate_downstream_samplesheets/tests/main.workflow.nf.test b/nf_core/pipeline-template/subworkflows/local/generate_downstream_samplesheets/tests/main.workflow.nf.test new file mode 100644 index 000000000..f258ffeb4 --- /dev/null +++ b/nf_core/pipeline-template/subworkflows/local/generate_downstream_samplesheets/tests/main.workflow.nf.test @@ -0,0 +1,46 @@ +nextflow_workflow { + + name "Test Workflow GENERATE_DOWNSTREAM_SAMPLESHEETS" + script "../main.nf" + workflow "GENERATE_DOWNSTREAM_SAMPLESHEETS" + tag 'subworkflows' + tag 'generate_downstream_samplesheets' + tag 'subworkflows/generated_downstream_samplesheets' + + test("Test worfklow rnaseq,sarek") { + when { + params { + outdir = "." + generate_pipeline_samplesheets = 'rnaseq,sarek' + generate_pipeline_samplesheets_format = 'csv' + } + workflow { + """ + input[0] = Channel.of( + [ + [ id:'test', single_end:false ], + [file('test_1.fastq.gz', checkIfExists: false), file('test_2.fastq.gz', checkIfExists: false)] + ], + [ + [id: 'test-se', single_end: true], + file('test_1.fastq.gz', checkIfExists: false) + ] + ) + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert snapshot( + [ + "${params.outdir}/downstream_samplesheets/rnaseq.csv", + "${params.outdir}/downstream_samplesheets/sarek.csv" + ]).match() + } + ) + } + } + +} diff --git a/nf_core/pipeline-template/subworkflows/local/generate_downstream_samplesheets/tests/main.workflow.nf.test.snap b/nf_core/pipeline-template/subworkflows/local/generate_downstream_samplesheets/tests/main.workflow.nf.test.snap new file mode 100644 index 000000000..8caf3bc95 --- /dev/null +++ b/nf_core/pipeline-template/subworkflows/local/generate_downstream_samplesheets/tests/main.workflow.nf.test.snap @@ -0,0 +1,15 @@ +{ + "Test worfklow rnaseq,sarek": { + "content": [ + [ + "./downstream_samplesheets/rnaseq.csv", + "./downstream_samplesheets/sarek.csv" + ] + ], + "meta": { + "nf-test": "0.9.1", + "nextflow": "24.10.0" + }, + "timestamp": "2024-10-31T15:44:42.743679838" + } +} diff --git a/nf_core/pipeline-template/workflows/pipeline.nf b/nf_core/pipeline-template/workflows/pipeline.nf index 4dd8674c1..fd7a29b90 100644 --- a/nf_core/pipeline-template/workflows/pipeline.nf +++ b/nf_core/pipeline-template/workflows/pipeline.nf @@ -11,6 +11,7 @@ {% if multiqc %}include { paramsSummaryMultiqc } from '../subworkflows/nf-core/utils_nfcore_pipeline'{% endif %} include { softwareVersionsToYAML } from '../subworkflows/nf-core/utils_nfcore_pipeline' {% if citations or multiqc %}include { methodsDescriptionText } from '../subworkflows/local/utils_nfcore_{{ short_name }}_pipeline'{% endif %} +{% if downstream_samplesheet %}include { GENERATE_DOWNSTREAM_SAMPLESHEETS } from '../subworkflows/local/generate_downstream_samplesheets'{% endif %} {%- endif %} /* @@ -41,6 +42,18 @@ workflow {{ short_name|upper }} { ch_versions = ch_versions.mix(FASTQC.out.versions.first()) {%- endif %} + + {% if downstream_samplesheet %} + // + // SUBWORKFLOW: Generate downstream samplesheets + // + if (params.generate_downstream_samplesheets) { + GENERATE_DOWNSTREAM_SAMPLESHEETS( + ch_samplesheet + ) + } + {% endif %} + // // Collate and save software versions // diff --git a/nf_core/pipelines/create/template_features.yml b/nf_core/pipelines/create/template_features.yml index 0a3180286..f3be4b688 100644 --- a/nf_core/pipelines/create/template_features.yml +++ b/nf_core/pipelines/create/template_features.yml @@ -279,6 +279,17 @@ modules: - "modules.json" nfcore_pipelines: False custom_pipelines: True +downstream_samplesheet: + skippable_paths: + - "subworkflows/local/generate_downstream_samplesheets" + short_description: "Generate downstream samplesheets" + description: "The pipeline will include the generate_downstream_samplesheets subworkflow for the generation of a samplesheet for other downstream pipelines." + help_text: | + The pipeline will include the `GENERATE_DOWNSTREAM_SAMPLESHEETS` subworkflow. + + The subworkflow `GENERATE_DOWNSTREAM_SAMPLESHEETS` provides a base template for generating samplesheets by taking a specified input channel, *eg:* reads or fasta, and extracts its metadata for generating samplesheets. + nfcore_pipelines: True + custom_pipelines: True changelog: skippable_paths: - "CHANGELOG.md"