diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json
index 4ecfbfe33..4a9bc5c79 100644
--- a/.devcontainer/devcontainer.json
+++ b/.devcontainer/devcontainer.json
@@ -18,11 +18,11 @@
"python.linting.flake8Path": "/opt/conda/bin/flake8",
"python.linting.pycodestylePath": "/opt/conda/bin/pycodestyle",
"python.linting.pydocstylePath": "/opt/conda/bin/pydocstyle",
- "python.linting.pylintPath": "/opt/conda/bin/pylint"
+ "python.linting.pylintPath": "/opt/conda/bin/pylint",
},
// Add the IDs of extensions you want installed when the container is created.
- "extensions": ["ms-python.python", "ms-python.vscode-pylance", "nf-core.nf-core-extensionpack"]
- }
- }
+ "extensions": ["ms-python.python", "ms-python.vscode-pylance", "nf-core.nf-core-extensionpack"],
+ },
+ },
}
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 66bd47cf9..c80d78b97 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -35,38 +35,9 @@ jobs:
with:
version: "${{ matrix.NXF_VER }}"
- - name: Hash Github Workspace
- id: hash_workspace
- run: |
- echo "digest=$(echo RNA_3.10.1_${{ github.workspace }} | md5sum | cut -c 1-25)" >> $GITHUB_OUTPUT
-
- - name: Cache test data
- id: cache-testdata
- uses: actions/cache@v3
- with:
- path: test-datasets/
- key: ${{ steps.hash_workspace.outputs.digest }}
-
- - name: Check out test data
- if: steps.cache-testdata.outputs.cache-hit != 'true'
- uses: actions/checkout@v3
- with:
- repository: nf-core/test-datasets
- ref: rnaseq3
- path: test-datasets/
-
- - name: Replace remote paths in samplesheets
- run: |
- for f in ${{ github.workspace }}/test-datasets/samplesheet/v3.10/*.csv; do
- sed -i "s=https://raw.githubusercontent.com/nf-core/test-datasets/rnaseq/=${{ github.workspace }}/test-datasets/=g" $f
- echo "========== $f ============"
- cat $f
- echo "========================================"
- done;
-
- name: Run pipeline with test data
run: |
- nextflow run ${GITHUB_WORKSPACE} -profile test_cache,docker --outdir ./results --test_data_base ${{ github.workspace }}/test-datasets/
+ nextflow run ${GITHUB_WORKSPACE} -profile test,docker --outdir ./results
star_salmon:
name: Test STAR Salmon with workflow parameters
@@ -92,35 +63,6 @@ jobs:
- name: Check out pipeline code
uses: actions/checkout@v2
- - name: Hash Github Workspace
- id: hash_workspace
- run: |
- echo "digest=$(echo RNA_3.10.1_${{ github.workspace }} | md5sum | cut -c 1-25)" >> $GITHUB_OUTPUT
-
- - name: Cache test data
- id: cache-testdata
- uses: actions/cache@v3
- with:
- path: test-datasets/
- key: ${{ steps.hash_workspace.outputs.digest }}
-
- - name: Check out test data
- if: steps.cache-testdata.outputs.cache-hit != 'true'
- uses: actions/checkout@v3
- with:
- repository: nf-core/test-datasets
- ref: rnaseq3
- path: test-datasets/
-
- - name: Replace remote paths in samplesheets
- run: |
- for f in ${{ github.workspace }}/test-datasets/samplesheet/v3.10/*.csv; do
- sed -i "s=https://raw.githubusercontent.com/nf-core/test-datasets/rnaseq/=${{ github.workspace }}/test-datasets/=g" $f
- echo "========== $f ============"
- cat $f
- echo "========================================"
- done;
-
- name: Install Nextflow
run: |
wget -qO- get.nextflow.io | bash
@@ -128,7 +70,7 @@ jobs:
- name: Run pipeline with STAR and various parameters
run: |
- nextflow run ${GITHUB_WORKSPACE} -profile test_cache,docker --aligner star_salmon ${{ matrix.parameters }} --outdir ./results --test_data_base ${{ github.workspace }}/test-datasets/
+ nextflow run ${GITHUB_WORKSPACE} -profile test,docker --aligner star_salmon ${{ matrix.parameters }} --outdir ./results
star_rsem:
name: Test STAR RSEM with workflow parameters
@@ -143,35 +85,6 @@ jobs:
- name: Check out pipeline code
uses: actions/checkout@v2
- - name: Hash Github Workspace
- id: hash_workspace
- run: |
- echo "digest=$(echo RNA_3.10.1_${{ github.workspace }} | md5sum | cut -c 1-25)" >> $GITHUB_OUTPUT
-
- - name: Cache test data
- id: cache-testdata
- uses: actions/cache@v3
- with:
- path: test-datasets/
- key: ${{ steps.hash_workspace.outputs.digest }}
-
- - name: Check out test data
- if: steps.cache-testdata.outputs.cache-hit != 'true'
- uses: actions/checkout@v3
- with:
- repository: nf-core/test-datasets
- ref: rnaseq3
- path: test-datasets/
-
- - name: Replace remote paths in samplesheets
- run: |
- for f in ${{ github.workspace }}/test-datasets/samplesheet/v3.10/*.csv; do
- sed -i "s=https://raw.githubusercontent.com/nf-core/test-datasets/rnaseq/=${{ github.workspace }}/test-datasets/=g" $f
- echo "========== $f ============"
- cat $f
- echo "========================================"
- done;
-
- name: Install Nextflow
run: |
wget -qO- get.nextflow.io | bash
@@ -179,7 +92,7 @@ jobs:
- name: Run pipeline with RSEM STAR and various parameters
run: |
- nextflow run ${GITHUB_WORKSPACE} -profile test_cache,docker --aligner star_rsem ${{ matrix.parameters }} --outdir ./results --test_data_base ${{ github.workspace }}/test-datasets/
+ nextflow run ${GITHUB_WORKSPACE} -profile test,docker --aligner star_rsem ${{ matrix.parameters }} --outdir ./results
hisat2:
name: Test HISAT2 with workflow parameters
@@ -194,35 +107,6 @@ jobs:
- name: Check out pipeline code
uses: actions/checkout@v2
- - name: Hash Github Workspace
- id: hash_workspace
- run: |
- echo "digest=$(echo RNA_3.10.1_${{ github.workspace }} | md5sum | cut -c 1-25)" >> $GITHUB_OUTPUT
-
- - name: Cache test data
- id: cache-testdata
- uses: actions/cache@v3
- with:
- path: test-datasets/
- key: ${{ steps.hash_workspace.outputs.digest }}
-
- - name: Check out test data
- if: steps.cache-testdata.outputs.cache-hit != 'true'
- uses: actions/checkout@v3
- with:
- repository: nf-core/test-datasets
- ref: rnaseq3
- path: test-datasets/
-
- - name: Replace remote paths in samplesheets
- run: |
- for f in ${{ github.workspace }}/test-datasets/samplesheet/v3.10/*.csv; do
- sed -i "s=https://raw.githubusercontent.com/nf-core/test-datasets/rnaseq/=${{ github.workspace }}/test-datasets/=g" $f
- echo "========== $f ============"
- cat $f
- echo "========================================"
- done;
-
- name: Install Nextflow
run: |
wget -qO- get.nextflow.io | bash
@@ -230,7 +114,7 @@ jobs:
- name: Run pipeline with HISAT2 and various parameters
run: |
- nextflow run ${GITHUB_WORKSPACE} -profile test_cache,docker --aligner hisat2 ${{ matrix.parameters }} --outdir ./results --test_data_base ${{ github.workspace }}/test-datasets/
+ nextflow run ${GITHUB_WORKSPACE} -profile test,docker --aligner hisat2 ${{ matrix.parameters }} --outdir ./results
pseudo:
name: Test Pseudoaligners with workflow parameters
@@ -249,35 +133,6 @@ jobs:
- name: Check out pipeline code
uses: actions/checkout@v2
- - name: Hash Github Workspace
- id: hash_workspace
- run: |
- echo "digest=$(echo RNA_3.10.1_${{ github.workspace }} | md5sum | cut -c 1-25)" >> $GITHUB_OUTPUT
-
- - name: Cache test data
- id: cache-testdata
- uses: actions/cache@v3
- with:
- path: test-datasets/
- key: ${{ steps.hash_workspace.outputs.digest }}
-
- - name: Check out test data
- if: steps.cache-testdata.outputs.cache-hit != 'true'
- uses: actions/checkout@v3
- with:
- repository: nf-core/test-datasets
- ref: rnaseq3
- path: test-datasets/
-
- - name: Replace remote paths in samplesheets
- run: |
- for f in ${{ github.workspace }}/test-datasets/samplesheet/v3.10/*.csv; do
- sed -i "s=https://raw.githubusercontent.com/nf-core/test-datasets/rnaseq/=${{ github.workspace }}/test-datasets/=g" $f
- echo "========== $f ============"
- cat $f
- echo "========================================"
- done;
-
- name: Install Nextflow
run: |
wget -qO- get.nextflow.io | bash
@@ -285,4 +140,4 @@ jobs:
- name: Run pipeline with Salmon or Kallisto and various parameters
run: |
- nextflow run ${GITHUB_WORKSPACE} -profile test_cache,docker ${{ matrix.parameters }} --outdir ./results --test_data_base ${{ github.workspace }}/test-datasets/
+ nextflow run ${GITHUB_WORKSPACE} -profile test,docker ${{ matrix.parameters }} --outdir ./results
diff --git a/.nf-core.yml b/.nf-core.yml
index 094459361..fdcea61c0 100644
--- a/.nf-core.yml
+++ b/.nf-core.yml
@@ -3,6 +3,8 @@ lint:
files_unchanged:
- assets/email_template.html
- assets/email_template.txt
+ files_exist:
+ - lib/nfcore_external_java_deps.jar
- lib/NfcoreTemplate.groovy
- - pyproject.toml
- multiqc_config: false
+ - lib/Utils.groovy
+ - lib/WorkflowMain.groovy
diff --git a/assets/dummy_file.txt b/assets/dummy_file.txt
deleted file mode 100644
index 8b1378917..000000000
--- a/assets/dummy_file.txt
+++ /dev/null
@@ -1 +0,0 @@
-
diff --git a/assets/email_template.html b/assets/email_template.html
index 0ef39b222..dc46d39ee 100644
--- a/assets/email_template.html
+++ b/assets/email_template.html
@@ -14,7 +14,7 @@

-
nf-core/rnaseq v${version}
+
nf-core/rnaseq ${version}
Run Name: $runName
<% if (!success){ out << """
diff --git a/assets/email_template.txt b/assets/email_template.txt
index 5440f887a..8789fd8ae 100644
--- a/assets/email_template.txt
+++ b/assets/email_template.txt
@@ -4,7 +4,7 @@
|\\ | |__ __ / ` / \\ |__) |__ } {
| \\| | \\__, \\__/ | \\ |___ \\`-._,-`-,
`._,._,'
- nf-core/rnaseq v${version}
+ nf-core/rnaseq ${version}
----------------------------------------------------
Run Name: $runName
diff --git a/conf/base.config b/conf/base.config
index 337564e5c..93ad25b28 100644
--- a/conf/base.config
+++ b/conf/base.config
@@ -57,7 +57,4 @@ process {
errorStrategy = 'retry'
maxRetries = 2
}
- withName:CUSTOM_DUMPSOFTWAREVERSIONS {
- cache = false
- }
}
diff --git a/conf/modules.config b/conf/modules.config
index 16d635cdb..e6210ea0e 100644
--- a/conf/modules.config
+++ b/conf/modules.config
@@ -22,14 +22,6 @@ process {
mode: params.publish_dir_mode,
saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
]
-
- withName: 'CUSTOM_DUMPSOFTWAREVERSIONS' {
- publishDir = [
- path: { "${params.outdir}/pipeline_info" },
- mode: params.publish_dir_mode,
- pattern: '*_versions.yml'
- ]
- }
}
//
@@ -148,7 +140,7 @@ process {
if (!params.skip_bbsplit && params.bbsplit_fasta_list) {
process {
- withName: '.*:PREPARE_GENOME:BBMAP_BBSPLIT' {
+ withName: 'PREPARE_GENOME:BBMAP_BBSPLIT' {
ext.args = 'build=1'
publishDir = [
path: { params.save_reference ? "${params.outdir}/genome/index" : params.outdir },
@@ -348,7 +340,7 @@ if (params.remove_ribo_rna) {
if (!params.skip_alignment) {
process {
- withName: 'NFCORE_RNASEQ:RNASEQ:.*:BAM_SORT_STATS_SAMTOOLS:BAM_STATS_SAMTOOLS:.*' {
+ withName: 'NFCORE_RNASEQ:.*:BAM_SORT_STATS_SAMTOOLS:BAM_STATS_SAMTOOLS:.*' {
ext.prefix = { "${meta.id}.sorted.bam" }
publishDir = [
path: { "${params.outdir}/${params.aligner}/samtools_stats" },
@@ -357,7 +349,7 @@ if (!params.skip_alignment) {
]
}
- withName: 'NFCORE_RNASEQ:RNASEQ:.*:BAM_SORT_STATS_SAMTOOLS:SAMTOOLS_SORT' {
+ withName: 'NFCORE_RNASEQ:.*:BAM_SORT_STATS_SAMTOOLS:SAMTOOLS_SORT' {
ext.prefix = { "${meta.id}.sorted" }
publishDir = [
path: { ( ['star_salmon','hisat2'].contains(params.aligner) &&
@@ -371,7 +363,7 @@ if (!params.skip_alignment) {
]
}
- withName: 'NFCORE_RNASEQ:RNASEQ:.*:BAM_SORT_STATS_SAMTOOLS:SAMTOOLS_INDEX' {
+ withName: 'NFCORE_RNASEQ:.*:BAM_SORT_STATS_SAMTOOLS:SAMTOOLS_INDEX' {
ext.args = { params.bam_csi_index ? '-c' : '' }
publishDir = [
path: { ( ['star_salmon','hisat2'].contains(params.aligner) &&
@@ -611,7 +603,7 @@ if (!params.skip_alignment && params.aligner == 'star_salmon') {
if (params.with_umi) {
process {
- withName: 'NFCORE_RNASEQ:RNASEQ:SAMTOOLS_SORT' {
+ withName: 'NFCORE_RNASEQ:SAMTOOLS_SORT' {
ext.args = '-n'
ext.prefix = { "${meta.id}.umi_dedup.transcriptome" }
publishDir = [
@@ -622,7 +614,7 @@ if (!params.skip_alignment && params.aligner == 'star_salmon') {
]
}
- withName: 'NFCORE_RNASEQ:RNASEQ:UMITOOLS_PREPAREFORSALMON' {
+ withName: 'NFCORE_RNASEQ:UMITOOLS_PREPAREFORSALMON' {
ext.prefix = { "${meta.id}.umi_dedup.transcriptome.filtered" }
publishDir = [
[
@@ -639,7 +631,7 @@ if (!params.skip_alignment && params.aligner == 'star_salmon') {
]
}
- withName: 'NFCORE_RNASEQ:RNASEQ:BAM_SORT_STATS_SAMTOOLS:SAMTOOLS_SORT' {
+ withName: 'NFCORE_RNASEQ:BAM_SORT_STATS_SAMTOOLS:SAMTOOLS_SORT' {
ext.prefix = { "${meta.id}.transcriptome.sorted" }
publishDir = [
path: { params.save_align_intermeds || params.save_umi_intermeds ? "${params.outdir}/${params.aligner}" : params.outdir },
@@ -649,7 +641,7 @@ if (!params.skip_alignment && params.aligner == 'star_salmon') {
]
}
- withName: 'NFCORE_RNASEQ:RNASEQ:BAM_SORT_STATS_SAMTOOLS:SAMTOOLS_INDEX' {
+ withName: 'NFCORE_RNASEQ:BAM_SORT_STATS_SAMTOOLS:SAMTOOLS_INDEX' {
publishDir = [
path: { params.save_align_intermeds || params.save_umi_intermeds ? "${params.outdir}/${params.aligner}" : params.outdir },
mode: params.publish_dir_mode,
@@ -658,7 +650,7 @@ if (!params.skip_alignment && params.aligner == 'star_salmon') {
]
}
- withName: 'NFCORE_RNASEQ:RNASEQ:BAM_SORT_STATS_SAMTOOLS:BAM_STATS_SAMTOOLS:.*' {
+ withName: 'NFCORE_RNASEQ:BAM_SORT_STATS_SAMTOOLS:BAM_STATS_SAMTOOLS:.*' {
ext.prefix = { "${meta.id}.transcriptome.sorted.bam" }
publishDir = [
path: { params.save_align_intermeds || params.save_umi_intermeds ? "${params.outdir}/${params.aligner}/samtools_stats" : params.outdir },
diff --git a/conf/test_cache.config b/conf/test_cache.config
deleted file mode 100644
index 51aad577e..000000000
--- a/conf/test_cache.config
+++ /dev/null
@@ -1,50 +0,0 @@
-/*
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
- Nextflow config file for running minimal tests using cached / offline test data
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
- Defines input files and everything required to run a fast and simple pipeline test.
-
- Use as follows:
- nextflow run nf-core/rnaseq -profile test,
--outdir
-
-----------------------------------------------------------------------------------------
-*/
-
-params {
- config_profile_name = 'Test profile'
- config_profile_description = 'Minimal test dataset to check pipeline function'
-
- // Limit resources so that this can run on GitHub Actions
- max_cpus = 2
- max_memory = '6.GB'
- max_time = '6.h'
-
-
- // Input data
- // params.test_data_base (default) = 'https://raw.githubusercontent.com/nf-core/test-datasets/rnaseq3/'
- input = "${params.test_data_base}/samplesheet/v3.10/samplesheet_test.csv"
-
- // Genome references
- fasta = "${params.test_data_base}/reference/genome.fasta"
- gtf = "${params.test_data_base}/reference/genes.gtf.gz"
- gff = "${params.test_data_base}/reference/genes.gff.gz"
- transcript_fasta = "${params.test_data_base}/reference/transcriptome.fasta"
- additional_fasta = "${params.test_data_base}/reference/gfp.fa.gz"
-
- bbsplit_fasta_list = "${params.test_data_base}/reference/bbsplit_fasta_list.txt"
- hisat2_index = "${params.test_data_base}/reference/hisat2.tar.gz"
- salmon_index = "${params.test_data_base}/reference/salmon.tar.gz"
- rsem_index = "${params.test_data_base}/reference/rsem.tar.gz"
-
- // Other parameters
- skip_bbsplit = false
- pseudo_aligner = 'salmon'
- umitools_bc_pattern = 'NNNN'
-}
-
-// When using RSEM, remove warning from STAR whilst building tiny indices
-process {
- withName: 'RSEM_PREPAREREFERENCE_GENOME' {
- ext.args2 = "--genomeSAindexNbases 7"
- }
-}
diff --git a/lib/NfcoreTemplate.groovy b/lib/NfcoreTemplate.groovy
deleted file mode 100755
index 5cdc9a73c..000000000
--- a/lib/NfcoreTemplate.groovy
+++ /dev/null
@@ -1,398 +0,0 @@
-//
-// This file holds several functions used within the nf-core pipeline template.
-//
-
-import org.yaml.snakeyaml.Yaml
-import groovy.json.JsonOutput
-import nextflow.extension.FilesEx
-
-class NfcoreTemplate {
-
- //
- // Check AWS Batch related parameters have been specified correctly
- //
- public static void awsBatch(workflow, params) {
- if (workflow.profile.contains('awsbatch')) {
- // Check params.awsqueue and params.awsregion have been set if running on AWSBatch
- assert (params.awsqueue && params.awsregion) : "Specify correct --awsqueue and --awsregion parameters on AWSBatch!"
- // Check outdir paths to be S3 buckets if running on AWSBatch
- assert params.outdir.startsWith('s3:') : "Outdir not on S3 - specify S3 Bucket to run on AWSBatch!"
- }
- }
-
- //
- // Warn if a -profile or Nextflow config has not been provided to run the pipeline
- //
- public static void checkConfigProvided(workflow, log) {
- if (workflow.profile == 'standard' && workflow.configFiles.size() <= 1) {
- log.warn "[$workflow.manifest.name] You are attempting to run the pipeline without any custom configuration!\n\n" +
- "This will be dependent on your local compute environment but can be achieved via one or more of the following:\n" +
- " (1) Using an existing pipeline profile e.g. `-profile docker` or `-profile singularity`\n" +
- " (2) Using an existing nf-core/configs for your Institution e.g. `-profile crick` or `-profile uppmax`\n" +
- " (3) Using your own local custom config e.g. `-c /path/to/your/custom.config`\n\n" +
- "Please refer to the quick start section and usage docs for the pipeline.\n "
- }
- }
-
- //
- // Warn if using custom configs to provide pipeline parameters
- //
- public static void warnParamsProvidedInConfig(workflow, log) {
- if (workflow.configFiles.size() > 1) {
- log.warn "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n" +
- " Multiple config files detected!\n" +
- " Please provide pipeline parameters via the CLI or Nextflow '-params-file' option.\n" +
- " Custom config files including those provided by the '-c' Nextflow option can be\n" +
- " used to provide any configuration except for parameters.\n\n" +
- " Docs: https://nf-co.re/usage/configuration#custom-configuration-files\n" +
- "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~"
- }
- }
-
- //
- // Generate version string
- //
- public static String version(workflow) {
- String version_string = ""
-
- if (workflow.manifest.version) {
- def prefix_v = workflow.manifest.version[0] != 'v' ? 'v' : ''
- version_string += "${prefix_v}${workflow.manifest.version}"
- }
-
- if (workflow.commitId) {
- def git_shortsha = workflow.commitId.substring(0, 7)
- version_string += "-g${git_shortsha}"
- }
-
- return version_string
- }
-
- //
- // Construct and send completion email
- //
- public static void email(workflow, params, summary_params, projectDir, log, multiqc_report=[], pass_mapped_reads=[:], pass_trimmed_reads=[:], pass_strand_check=[:]) {
-
- // Set up the e-mail variables
- def fail_mapped_count = pass_mapped_reads.count { key, value -> value == false }
- def fail_trimmed_count = pass_trimmed_reads.count { key, value -> value == false }
- def fail_strand_count = pass_strand_check.count { key, value -> value == false }
-
- def subject = "[$workflow.manifest.name] Successful: $workflow.runName"
- if (fail_mapped_count + fail_trimmed_count + fail_strand_count > 0) {
- subject = "[$workflow.manifest.name] Partially successful - samples skipped: $workflow.runName"
- }
- if (!workflow.success) {
- subject = "[$workflow.manifest.name] FAILED: $workflow.runName"
- }
-
- def summary = [:]
- for (group in summary_params.keySet()) {
- summary << summary_params[group]
- }
-
- def misc_fields = [:]
- misc_fields['Date Started'] = workflow.start
- misc_fields['Date Completed'] = workflow.complete
- misc_fields['Pipeline script file path'] = workflow.scriptFile
- misc_fields['Pipeline script hash ID'] = workflow.scriptId
- if (workflow.repository) misc_fields['Pipeline repository Git URL'] = workflow.repository
- if (workflow.commitId) misc_fields['Pipeline repository Git Commit'] = workflow.commitId
- if (workflow.revision) misc_fields['Pipeline Git branch/tag'] = workflow.revision
- misc_fields['Nextflow Version'] = workflow.nextflow.version
- misc_fields['Nextflow Build'] = workflow.nextflow.build
- misc_fields['Nextflow Compile Timestamp'] = workflow.nextflow.timestamp
-
- def email_fields = [:]
- email_fields['version'] = NfcoreTemplate.version(workflow)
- email_fields['runName'] = workflow.runName
- email_fields['success'] = workflow.success
- email_fields['dateComplete'] = workflow.complete
- email_fields['duration'] = workflow.duration
- email_fields['exitStatus'] = workflow.exitStatus
- email_fields['errorMessage'] = (workflow.errorMessage ?: 'None')
- email_fields['errorReport'] = (workflow.errorReport ?: 'None')
- email_fields['commandLine'] = workflow.commandLine
- email_fields['projectDir'] = workflow.projectDir
- email_fields['summary'] = summary << misc_fields
- email_fields['skip_sample_count'] = fail_mapped_count + fail_trimmed_count + fail_strand_count
-
- // On success try attach the multiqc report
- def mqc_report = null
- try {
- if (workflow.success && !params.skip_multiqc) {
- mqc_report = multiqc_report.getVal()
- if (mqc_report.getClass() == ArrayList && mqc_report.size() >= 1) {
- if (mqc_report.size() > 1) {
- log.warn "[$workflow.manifest.name] Found multiple reports from process 'MULTIQC', will use only one"
- }
- mqc_report = mqc_report[0]
- }
- }
- } catch (all) {
- if (multiqc_report) {
- log.warn "[$workflow.manifest.name] Could not attach MultiQC report to summary email"
- }
- }
-
- // Check if we are only sending emails on failure
- def email_address = params.email
- if (!params.email && params.email_on_fail && !workflow.success) {
- email_address = params.email_on_fail
- }
-
- // Render the TXT template
- def engine = new groovy.text.GStringTemplateEngine()
- def tf = new File("$projectDir/assets/email_template.txt")
- def txt_template = engine.createTemplate(tf).make(email_fields)
- def email_txt = txt_template.toString()
-
- // Render the HTML template
- def hf = new File("$projectDir/assets/email_template.html")
- def html_template = engine.createTemplate(hf).make(email_fields)
- def email_html = html_template.toString()
-
- // Render the sendmail template
- def max_multiqc_email_size = (params.containsKey('max_multiqc_email_size') ? params.max_multiqc_email_size : 0) as nextflow.util.MemoryUnit
- def smail_fields = [ email: email_address, subject: subject, email_txt: email_txt, email_html: email_html, projectDir: "$projectDir", mqcFile: mqc_report, mqcMaxSize: max_multiqc_email_size.toBytes() ]
- def sf = new File("$projectDir/assets/sendmail_template.txt")
- def sendmail_template = engine.createTemplate(sf).make(smail_fields)
- def sendmail_html = sendmail_template.toString()
-
- // Send the HTML e-mail
- Map colors = logColours(params.monochrome_logs)
- if (email_address) {
- try {
- if (params.plaintext_email) { throw GroovyException('Send plaintext e-mail, not HTML') }
- // Try to send HTML e-mail using sendmail
- def sendmail_tf = new File(workflow.launchDir.toString(), ".sendmail_tmp.html")
- sendmail_tf.withWriter { w -> w << sendmail_html }
- [ 'sendmail', '-t' ].execute() << sendmail_html
- log.info "-${colors.purple}[$workflow.manifest.name]${colors.green} Sent summary e-mail to $email_address (sendmail)-"
- } catch (all) {
- // Catch failures and try with plaintext
- def mail_cmd = [ 'mail', '-s', subject, '--content-type=text/html', email_address ]
- if ( mqc_report != null && mqc_report.size() <= max_multiqc_email_size.toBytes() ) {
- mail_cmd += [ '-A', mqc_report ]
- }
- mail_cmd.execute() << email_html
- log.info "-${colors.purple}[$workflow.manifest.name]${colors.green} Sent summary e-mail to $email_address (mail)-"
- }
- }
-
- // Write summary e-mail HTML to a file
- def output_hf = new File(workflow.launchDir.toString(), ".pipeline_report.html")
- output_hf.withWriter { w -> w << email_html }
- FilesEx.copyTo(output_hf.toPath(), "${params.outdir}/pipeline_info/pipeline_report.html");
- output_hf.delete()
-
- // Write summary e-mail TXT to a file
- def output_tf = new File(workflow.launchDir.toString(), ".pipeline_report.txt")
- output_tf.withWriter { w -> w << email_txt }
- FilesEx.copyTo(output_tf.toPath(), "${params.outdir}/pipeline_info/pipeline_report.txt");
- output_tf.delete()
- }
-
- //
- // Construct and send a notification to a web server as JSON
- // e.g. Microsoft Teams and Slack
- //
- public static void IM_notification(workflow, params, summary_params, projectDir, log) {
- def hook_url = params.hook_url
-
- def summary = [:]
- for (group in summary_params.keySet()) {
- summary << summary_params[group]
- }
-
- def misc_fields = [:]
- misc_fields['start'] = workflow.start
- misc_fields['complete'] = workflow.complete
- misc_fields['scriptfile'] = workflow.scriptFile
- misc_fields['scriptid'] = workflow.scriptId
- if (workflow.repository) misc_fields['repository'] = workflow.repository
- if (workflow.commitId) misc_fields['commitid'] = workflow.commitId
- if (workflow.revision) misc_fields['revision'] = workflow.revision
- misc_fields['nxf_version'] = workflow.nextflow.version
- misc_fields['nxf_build'] = workflow.nextflow.build
- misc_fields['nxf_timestamp'] = workflow.nextflow.timestamp
-
- def msg_fields = [:]
- msg_fields['version'] = NfcoreTemplate.version(workflow)
- msg_fields['runName'] = workflow.runName
- msg_fields['success'] = workflow.success
- msg_fields['dateComplete'] = workflow.complete
- msg_fields['duration'] = workflow.duration
- msg_fields['exitStatus'] = workflow.exitStatus
- msg_fields['errorMessage'] = (workflow.errorMessage ?: 'None')
- msg_fields['errorReport'] = (workflow.errorReport ?: 'None')
- msg_fields['commandLine'] = workflow.commandLine.replaceFirst(/ +--hook_url +[^ ]+/, "")
- msg_fields['projectDir'] = workflow.projectDir
- msg_fields['summary'] = summary << misc_fields
-
- // Render the JSON template
- def engine = new groovy.text.GStringTemplateEngine()
- // Different JSON depending on the service provider
- // Defaults to "Adaptive Cards" (https://adaptivecards.io), except Slack which has its own format
- def json_path = hook_url.contains("hooks.slack.com") ? "slackreport.json" : "adaptivecard.json"
- def hf = new File("$projectDir/assets/${json_path}")
- def json_template = engine.createTemplate(hf).make(msg_fields)
- def json_message = json_template.toString()
-
- // POST
- def post = new URL(hook_url).openConnection();
- post.setRequestMethod("POST")
- post.setDoOutput(true)
- post.setRequestProperty("Content-Type", "application/json")
- post.getOutputStream().write(json_message.getBytes("UTF-8"));
- def postRC = post.getResponseCode();
- if (! postRC.equals(200)) {
- log.warn(post.getErrorStream().getText());
- }
- }
-
- //
- // Dump pipeline parameters in a json file
- //
- public static void dump_parameters(workflow, params) {
- def timestamp = new java.util.Date().format( 'yyyy-MM-dd_HH-mm-ss')
- def filename = "params_${timestamp}.json"
- def temp_pf = new File(workflow.launchDir.toString(), ".${filename}")
- def jsonStr = JsonOutput.toJson(params)
- temp_pf.text = JsonOutput.prettyPrint(jsonStr)
-
- FilesEx.copyTo(temp_pf.toPath(), "${params.outdir}/pipeline_info/params_${timestamp}.json")
- temp_pf.delete()
- }
-
- //
- // Print pipeline summary on completion
- //
- public static void summary(workflow, params, log, pass_mapped_reads=[:], pass_trimmed_reads=[:], pass_strand_check=[:]) {
- Map colors = logColours(params.monochrome_logs)
-
- def fail_mapped_count = pass_mapped_reads.count { key, value -> value == false }
- def fail_trimmed_count = pass_trimmed_reads.count { key, value -> value == false }
- def fail_strand_count = pass_strand_check.count { key, value -> value == false }
- if (workflow.success) {
- def color = colors.green
- def status = []
- if (workflow.stats.ignoredCount != 0) {
- color = colors.yellow
- status += ['with errored process(es)']
- }
- if (fail_mapped_count > 0 || fail_trimmed_count > 0 || fail_strand_count > 0) {
- color = colors.yellow
- status += ['with skipped sampl(es)']
- }
- log.info "-${colors.purple}[$workflow.manifest.name]${color} Pipeline completed successfully ${status.join(', ')}${colors.reset}-"
- if (fail_trimmed_count > 0) {
- log.info "-${colors.purple}[$workflow.manifest.name]${colors.red} Please check MultiQC report: ${fail_trimmed_count}/${pass_trimmed_reads.size()} samples skipped since they failed ${params.min_trimmed_reads} trimmed read threshold.${colors.reset}-"
- }
- if (fail_mapped_count > 0) {
- log.info "-${colors.purple}[$workflow.manifest.name]${colors.red} Please check MultiQC report: ${fail_mapped_count}/${pass_mapped_reads.size()} samples skipped since they failed STAR ${params.min_mapped_reads}% mapped threshold.${colors.reset}-"
- }
- if (fail_strand_count > 0) {
- log.info "-${colors.purple}[$workflow.manifest.name]${colors.red} Please check MultiQC report: ${fail_strand_count}/${pass_strand_check.size()} samples failed strandedness check.${colors.reset}-"
- }
- } else {
- log.info "-${colors.purple}[$workflow.manifest.name]${colors.red} Pipeline completed with errors${colors.reset}-"
- }
- }
-
- //
- // ANSII Colours used for terminal logging
- //
- public static Map logColours(Boolean monochrome_logs) {
- Map colorcodes = [:]
-
- // Reset / Meta
- colorcodes['reset'] = monochrome_logs ? '' : "\033[0m"
- colorcodes['bold'] = monochrome_logs ? '' : "\033[1m"
- colorcodes['dim'] = monochrome_logs ? '' : "\033[2m"
- colorcodes['underlined'] = monochrome_logs ? '' : "\033[4m"
- colorcodes['blink'] = monochrome_logs ? '' : "\033[5m"
- colorcodes['reverse'] = monochrome_logs ? '' : "\033[7m"
- colorcodes['hidden'] = monochrome_logs ? '' : "\033[8m"
-
- // Regular Colors
- colorcodes['black'] = monochrome_logs ? '' : "\033[0;30m"
- colorcodes['red'] = monochrome_logs ? '' : "\033[0;31m"
- colorcodes['green'] = monochrome_logs ? '' : "\033[0;32m"
- colorcodes['yellow'] = monochrome_logs ? '' : "\033[0;33m"
- colorcodes['blue'] = monochrome_logs ? '' : "\033[0;34m"
- colorcodes['purple'] = monochrome_logs ? '' : "\033[0;35m"
- colorcodes['cyan'] = monochrome_logs ? '' : "\033[0;36m"
- colorcodes['white'] = monochrome_logs ? '' : "\033[0;37m"
-
- // Bold
- colorcodes['bblack'] = monochrome_logs ? '' : "\033[1;30m"
- colorcodes['bred'] = monochrome_logs ? '' : "\033[1;31m"
- colorcodes['bgreen'] = monochrome_logs ? '' : "\033[1;32m"
- colorcodes['byellow'] = monochrome_logs ? '' : "\033[1;33m"
- colorcodes['bblue'] = monochrome_logs ? '' : "\033[1;34m"
- colorcodes['bpurple'] = monochrome_logs ? '' : "\033[1;35m"
- colorcodes['bcyan'] = monochrome_logs ? '' : "\033[1;36m"
- colorcodes['bwhite'] = monochrome_logs ? '' : "\033[1;37m"
-
- // Underline
- colorcodes['ublack'] = monochrome_logs ? '' : "\033[4;30m"
- colorcodes['ured'] = monochrome_logs ? '' : "\033[4;31m"
- colorcodes['ugreen'] = monochrome_logs ? '' : "\033[4;32m"
- colorcodes['uyellow'] = monochrome_logs ? '' : "\033[4;33m"
- colorcodes['ublue'] = monochrome_logs ? '' : "\033[4;34m"
- colorcodes['upurple'] = monochrome_logs ? '' : "\033[4;35m"
- colorcodes['ucyan'] = monochrome_logs ? '' : "\033[4;36m"
- colorcodes['uwhite'] = monochrome_logs ? '' : "\033[4;37m"
-
- // High Intensity
- colorcodes['iblack'] = monochrome_logs ? '' : "\033[0;90m"
- colorcodes['ired'] = monochrome_logs ? '' : "\033[0;91m"
- colorcodes['igreen'] = monochrome_logs ? '' : "\033[0;92m"
- colorcodes['iyellow'] = monochrome_logs ? '' : "\033[0;93m"
- colorcodes['iblue'] = monochrome_logs ? '' : "\033[0;94m"
- colorcodes['ipurple'] = monochrome_logs ? '' : "\033[0;95m"
- colorcodes['icyan'] = monochrome_logs ? '' : "\033[0;96m"
- colorcodes['iwhite'] = monochrome_logs ? '' : "\033[0;97m"
-
- // Bold High Intensity
- colorcodes['biblack'] = monochrome_logs ? '' : "\033[1;90m"
- colorcodes['bired'] = monochrome_logs ? '' : "\033[1;91m"
- colorcodes['bigreen'] = monochrome_logs ? '' : "\033[1;92m"
- colorcodes['biyellow'] = monochrome_logs ? '' : "\033[1;93m"
- colorcodes['biblue'] = monochrome_logs ? '' : "\033[1;94m"
- colorcodes['bipurple'] = monochrome_logs ? '' : "\033[1;95m"
- colorcodes['bicyan'] = monochrome_logs ? '' : "\033[1;96m"
- colorcodes['biwhite'] = monochrome_logs ? '' : "\033[1;97m"
-
- return colorcodes
- }
-
- //
- // Does what is says on the tin
- //
- public static String dashedLine(monochrome_logs) {
- Map colors = logColours(monochrome_logs)
- return "-${colors.dim}----------------------------------------------------${colors.reset}-"
- }
-
- //
- // nf-core logo
- //
- public static String logo(workflow, monochrome_logs) {
- Map colors = logColours(monochrome_logs)
- String workflow_version = NfcoreTemplate.version(workflow)
- String.format(
- """\n
- ${dashedLine(monochrome_logs)}
- ${colors.green},--.${colors.black}/${colors.green},-.${colors.reset}
- ${colors.blue} ___ __ __ __ ___ ${colors.green}/,-._.--~\'${colors.reset}
- ${colors.blue} |\\ | |__ __ / ` / \\ |__) |__ ${colors.yellow}} {${colors.reset}
- ${colors.blue} | \\| | \\__, \\__/ | \\ |___ ${colors.green}\\`-._,-`-,${colors.reset}
- ${colors.green}`._,._,\'${colors.reset}
- ${colors.purple} ${workflow.manifest.name} ${workflow_version}${colors.reset}
- ${dashedLine(monochrome_logs)}
- """.stripIndent()
- )
- }
-}
diff --git a/lib/Utils.groovy b/lib/Utils.groovy
deleted file mode 100644
index 8d030f4e8..000000000
--- a/lib/Utils.groovy
+++ /dev/null
@@ -1,47 +0,0 @@
-//
-// This file holds several Groovy functions that could be useful for any Nextflow pipeline
-//
-
-import org.yaml.snakeyaml.Yaml
-
-class Utils {
-
- //
- // When running with -profile conda, warn if channels have not been set-up appropriately
- //
- public static void checkCondaChannels(log) {
- Yaml parser = new Yaml()
- def channels = []
- try {
- def config = parser.load("conda config --show channels".execute().text)
- channels = config.channels
- } catch(NullPointerException | IOException e) {
- log.warn "Could not verify conda channel configuration."
- return
- }
-
- // Check that all channels are present
- // This channel list is ordered by required channel priority.
- def required_channels_in_order = ['conda-forge', 'bioconda', 'defaults']
- def channels_missing = ((required_channels_in_order as Set) - (channels as Set)) as Boolean
-
- // Check that they are in the right order
- def channel_priority_violation = false
- def n = required_channels_in_order.size()
- for (int i = 0; i < n - 1; i++) {
- channel_priority_violation |= !(channels.indexOf(required_channels_in_order[i]) < channels.indexOf(required_channels_in_order[i+1]))
- }
-
- if (channels_missing | channel_priority_violation) {
- log.warn "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n" +
- " There is a problem with your Conda configuration!\n\n" +
- " You will need to set-up the conda-forge and bioconda channels correctly.\n" +
- " Please refer to https://bioconda.github.io/\n" +
- " The observed channel order is \n" +
- " ${channels}\n" +
- " but the following channel order is required:\n" +
- " ${required_channels_in_order}\n" +
- "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~"
- }
- }
-}
diff --git a/lib/WorkflowMain.groovy b/lib/WorkflowMain.groovy
deleted file mode 100755
index 4d94dfe08..000000000
--- a/lib/WorkflowMain.groovy
+++ /dev/null
@@ -1,60 +0,0 @@
-//
-// This file holds several functions specific to the main.nf workflow in the nf-core/rnaseq pipeline
-//
-
-import nextflow.Nextflow
-
-class WorkflowMain {
-
- //
- // Citation string for pipeline
- //
- public static String citation(workflow) {
- return "If you use ${workflow.manifest.name} for your analysis please cite:\n\n" +
- "* The pipeline\n" +
- " https://doi.org/10.5281/zenodo.1400710\n\n" +
- "* The nf-core framework\n" +
- " https://doi.org/10.1038/s41587-020-0439-x\n\n" +
- "* Software dependencies\n" +
- " https://github.com/${workflow.manifest.name}/blob/master/CITATIONS.md"
- }
-
- //
- // Validate parameters and print summary to screen
- //
- public static void initialise(workflow, params, log) {
-
- // Print workflow version and exit on --version
- if (params.version) {
- String workflow_version = NfcoreTemplate.version(workflow)
- log.info "${workflow.manifest.name} ${workflow_version}"
- System.exit(0)
- }
-
- // Warn about using custom configs to provide pipeline parameters
- NfcoreTemplate.warnParamsProvidedInConfig(workflow, log)
-
- // Check that a -profile or Nextflow config has been provided to run the pipeline
- NfcoreTemplate.checkConfigProvided(workflow, log)
-
- // Check that conda channels are set-up correctly
- if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) {
- Utils.checkCondaChannels(log)
- }
-
- // Check AWS batch settings
- NfcoreTemplate.awsBatch(workflow, params)
- }
-
- //
- // Get attribute from genome config file e.g. fasta
- //
- public static Object getGenomeAttribute(params, attribute) {
- if (params.genomes && params.genome && params.genomes.containsKey(params.genome)) {
- if (params.genomes[ params.genome ].containsKey(attribute)) {
- return params.genomes[ params.genome ][ attribute ]
- }
- }
- return null
- }
-}
diff --git a/lib/WorkflowRnaseq.groovy b/lib/WorkflowRnaseq.groovy
deleted file mode 100755
index 29a1724e3..000000000
--- a/lib/WorkflowRnaseq.groovy
+++ /dev/null
@@ -1,474 +0,0 @@
-//
-// This file holds several functions specific to the workflow/rnaseq.nf in the nf-core/rnaseq pipeline
-//
-
-import nextflow.Nextflow
-import groovy.json.JsonSlurper
-import groovy.text.SimpleTemplateEngine
-
-class WorkflowRnaseq {
-
- //
- // Check and validate parameters
- //
- public static void initialise(params, log) {
- genomeExistsError(params, log)
-
- if (!params.fasta) {
- Nextflow.error("Genome fasta file not specified with e.g. '--fasta genome.fa' or via a detectable config file.")
- }
-
- if (!params.gtf && !params.gff) {
- Nextflow.error("No GTF or GFF3 annotation specified! The pipeline requires at least one of these files.")
- }
-
- if (params.gtf) {
- if (params.gff) {
- gtfGffWarn(log)
- }
- if (params.genome == 'GRCh38' && params.gtf.contains('Homo_sapiens/NCBI/GRCh38/Annotation/Genes/genes.gtf')) {
- ncbiGenomeWarn(log)
- }
- if (params.gtf.contains('/UCSC/') && params.gtf.contains('Annotation/Genes/genes.gtf')) {
- ucscGenomeWarn(log)
- }
- }
-
- if (params.transcript_fasta) {
- transcriptsFastaWarn(log)
- }
-
- if (!params.skip_bbsplit && !params.bbsplit_index && !params.bbsplit_fasta_list) {
- Nextflow.error("Please provide either --bbsplit_fasta_list / --bbsplit_index to run BBSplit.")
- }
-
- if (params.remove_ribo_rna && !params.ribo_database_manifest) {
- Nextflow.error("Please provide --ribo_database_manifest to remove ribosomal RNA with SortMeRNA.")
- }
-
-
- if (params.with_umi && !params.skip_umi_extract) {
- if (!params.umitools_bc_pattern && !params.umitools_bc_pattern2) {
- Nextflow.error("UMI-tools requires a barcode pattern to extract barcodes from the reads.")
- }
- }
-
- if (params.skip_alignment) {
- skipAlignmentWarn(log)
- }
-
- if (!params.skip_pseudo_alignment && params.pseudo_aligner) {
- if (!(params.salmon_index || params.transcript_fasta || (params.fasta && (params.gtf || params.gff)))) {
- Nextflow.error("To use `--pseudo_aligner 'salmon'`, you must provide either --salmon_index or --transcript_fasta or both --fasta and --gtf / --gff.")
- }
- }
-
- // Checks when running --aligner star_rsem
- if (!params.skip_alignment && params.aligner == 'star_rsem') {
- if (params.with_umi) {
- rsemUmiError(log)
- }
- if (params.rsem_index && params.star_index) {
- rsemStarIndexWarn(log)
- }
- if (params.aligner == 'star_rsem' && params.extra_star_align_args) {
- rsemStarExtraArgumentsWarn(log)
- }
- }
-
- // Warn if --additional_fasta provided with aligner index
- if (!params.skip_alignment && params.additional_fasta) {
- def index = ''
- if (params.aligner == 'star_salmon' && params.star_index) {
- index = 'star'
- }
- if (params.aligner == 'star_rsem' && params.rsem_index) {
- index = 'rsem'
- }
- if (params.aligner == 'hisat2' && params.hisat2_index) {
- index = 'hisat2'
- }
- if (index) {
- additionaFastaIndexWarn(index, log)
- }
- }
-
- // Check which RSeQC modules we are running
- def valid_rseqc_modules = ['bam_stat', 'inner_distance', 'infer_experiment', 'junction_annotation', 'junction_saturation', 'read_distribution', 'read_duplication', 'tin']
- def rseqc_modules = params.rseqc_modules ? params.rseqc_modules.split(',').collect{ it.trim().toLowerCase() } : []
- if ((valid_rseqc_modules + rseqc_modules).unique().size() != valid_rseqc_modules.size()) {
- Nextflow.error("Invalid option: ${params.rseqc_modules}. Valid options for '--rseqc_modules': ${valid_rseqc_modules.join(', ')}")
- }
- }
-
- //
- // Function to validate channels from input samplesheet
- //
- public static ArrayList validateInput(input) {
- def (metas, fastqs) = input[1..2]
-
- // Check that multiple runs of the same sample are of the same strandedness
- def strandedness_ok = metas.collect{ it.strandedness }.unique().size == 1
- if (!strandedness_ok) {
- Nextflow.error("Please check input samplesheet -> Multiple runs of a sample must have the same strandedness!: ${metas[0].id}")
- }
-
- // Check that multiple runs of the same sample are of the same datatype i.e. single-end / paired-end
- def endedness_ok = metas.collect{ it.single_end }.unique().size == 1
- if (!endedness_ok) {
- Nextflow.error("Please check input samplesheet -> Multiple runs of a sample must be of the same datatype i.e. single-end or paired-end: ${metas[0].id}")
- }
-
- return [ metas[0], fastqs ]
- }
-
- //
- // Function to check whether biotype field exists in GTF file
- //
- public static Boolean biotypeInGtf(gtf_file, biotype, log) {
- def hits = 0
- gtf_file.eachLine { line ->
- def attributes = line.split('\t')[-1].split()
- if (attributes.contains(biotype)) {
- hits += 1
- }
- }
- if (hits) {
- return true
- } else {
- log.warn "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n" +
- " Biotype attribute '${biotype}' not found in the last column of the GTF file!\n\n" +
- " Biotype QC will be skipped to circumvent the issue below:\n" +
- " https://github.com/nf-core/rnaseq/issues/460\n\n" +
- " Amend '--featurecounts_group_type' to change this behaviour.\n" +
- "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~"
- return false
- }
- }
-
- //
- // Function to generate an error if contigs in genome fasta file > 512 Mbp
- //
- public static void checkMaxContigSize(fai_file, log) {
- def max_size = 512000000
- fai_file.eachLine { line ->
- def lspl = line.split('\t')
- def chrom = lspl[0]
- def size = lspl[1]
- if (size.toInteger() > max_size) {
- def error_string = "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n" +
- " Contig longer than ${max_size}bp found in reference genome!\n\n" +
- " ${chrom}: ${size}\n\n" +
- " Provide the '--bam_csi_index' parameter to use a CSI instead of BAI index.\n\n" +
- " Please see:\n" +
- " https://github.com/nf-core/rnaseq/issues/744\n" +
- "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~"
- Nextflow.error(error_string)
- }
- }
- }
-
- //
- // Function that parses Salmon quant 'meta_info.json' output file to get inferred strandedness
- //
- public static String getSalmonInferredStrandedness(json_file) {
- def lib_type = new JsonSlurper().parseText(json_file.text).get('library_types')[0]
- def strandedness = 'reverse'
- if (lib_type) {
- if (lib_type in ['U', 'IU']) {
- strandedness = 'unstranded'
- } else if (lib_type in ['SF', 'ISF']) {
- strandedness = 'forward'
- } else if (lib_type in ['SR', 'ISR']) {
- strandedness = 'reverse'
- }
- }
- return strandedness
- }
-
- //
- // Function that parses TrimGalore log output file to get total number of reads after trimming
- //
- public static Integer getTrimGaloreReadsAfterFiltering(log_file) {
- def total_reads = 0
- def filtered_reads = 0
- log_file.eachLine { line ->
- def total_reads_matcher = line =~ /([\d\.]+)\ssequences processed in total/
- def filtered_reads_matcher = line =~ /shorter than the length cutoff[^:]+:\s([\d\.]+)/
- if (total_reads_matcher) total_reads = total_reads_matcher[0][1].toFloat()
- if (filtered_reads_matcher) filtered_reads = filtered_reads_matcher[0][1].toFloat()
- }
- return total_reads - filtered_reads
- }
-
- //
- // Function that parses and returns the alignment rate from the STAR log output
- //
- public static ArrayList getStarPercentMapped(params, align_log) {
- def percent_aligned = 0
- def pattern = /Uniquely mapped reads %\s*\|\s*([\d\.]+)%/
- align_log.eachLine { line ->
- def matcher = line =~ pattern
- if (matcher) {
- percent_aligned = matcher[0][1].toFloat()
- }
- }
-
- def pass = false
- if (percent_aligned >= params.min_mapped_reads.toFloat()) {
- pass = true
- }
- return [ percent_aligned, pass ]
- }
-
- //
- // Function that parses and returns the predicted strandedness from the RSeQC infer_experiment.py output
- //
- public static ArrayList getInferexperimentStrandedness(inferexperiment_file, cutoff=30) {
- def sense = 0
- def antisense = 0
- def undetermined = 0
- inferexperiment_file.eachLine { line ->
- def undetermined_matcher = line =~ /Fraction of reads failed to determine:\s([\d\.]+)/
- def se_sense_matcher = line =~ /Fraction of reads explained by "\++,--":\s([\d\.]+)/
- def se_antisense_matcher = line =~ /Fraction of reads explained by "\+-,-\+":\s([\d\.]+)/
- def pe_sense_matcher = line =~ /Fraction of reads explained by "1\++,1--,2\+-,2-\+":\s([\d\.]+)/
- def pe_antisense_matcher = line =~ /Fraction of reads explained by "1\+-,1-\+,2\+\+,2--":\s([\d\.]+)/
- if (undetermined_matcher) undetermined = undetermined_matcher[0][1].toFloat() * 100
- if (se_sense_matcher) sense = se_sense_matcher[0][1].toFloat() * 100
- if (se_antisense_matcher) antisense = se_antisense_matcher[0][1].toFloat() * 100
- if (pe_sense_matcher) sense = pe_sense_matcher[0][1].toFloat() * 100
- if (pe_antisense_matcher) antisense = pe_antisense_matcher[0][1].toFloat() * 100
- }
- def strandedness = 'unstranded'
- if (sense >= 100-cutoff) {
- strandedness = 'forward'
- } else if (antisense >= 100-cutoff) {
- strandedness = 'reverse'
- }
- return [ strandedness, sense, antisense, undetermined ]
- }
-
- //
- // Get workflow summary for MultiQC
- //
- public static String paramsSummaryMultiqc(workflow, summary) {
- String summary_section = ''
- for (group in summary.keySet()) {
- def group_params = summary.get(group) // This gets the parameters of that particular group
- if (group_params) {
- summary_section += " $group
\n"
- summary_section += " \n"
- for (param in group_params.keySet()) {
- summary_section += " - $param
- ${group_params.get(param) ?: 'N/A'}
\n"
- }
- summary_section += "
\n"
- }
- }
-
- String yaml_file_text = "id: '${workflow.manifest.name.replace('/','-')}-summary'\n"
- yaml_file_text += "description: ' - this information is collected when the pipeline is started.'\n"
- yaml_file_text += "section_name: '${workflow.manifest.name} Workflow Summary'\n"
- yaml_file_text += "section_href: 'https://github.com/${workflow.manifest.name}'\n"
- yaml_file_text += "plot_type: 'html'\n"
- yaml_file_text += "data: |\n"
- yaml_file_text += "${summary_section}"
- return yaml_file_text
- }
-
- //
- // Generate methods description for MultiQC
- //
- public static String toolCitationText(params) {
- // TODO nf-core: Optionally add in-text citation tools to this list.
- // Can use ternary operators to dynamically construct based conditions, e.g. params["run_xyz"] ? "Tool (Foo et al. 2023)" : "",
- // Uncomment function in methodsDescriptionText to render in MultiQC report
- def citation_text = [
- "Tools used in the workflow included:",
- "FastQC (Andrews 2010),",
- "MultiQC (Ewels et al. 2016)",
- "."
- ].join(' ').trim()
-
- return citation_text
- }
-
- public static String toolBibliographyText(params) {
- // TODO Optionally add bibliographic entries to this list.
- // Can use ternary operators to dynamically construct based conditions, e.g. params["run_xyz"] ? "Author (2023) Pub name, Journal, DOI" : "",
- // Uncomment function in methodsDescriptionText to render in MultiQC report
- def reference_text = [
- "Andrews S, (2010) FastQC, URL: https://www.bioinformatics.babraham.ac.uk/projects/fastqc/).",
- "Ewels, P., Magnusson, M., Lundin, S., & Käller, M. (2016). MultiQC: summarize analysis results for multiple tools and samples in a single report. Bioinformatics , 32(19), 3047–3048. doi: /10.1093/bioinformatics/btw354"
- ].join(' ').trim()
-
- return reference_text
- }
-
- public static String methodsDescriptionText(run_workflow, mqc_methods_yaml, params) {
- // Convert to a named map so can be used as with familar NXF ${workflow} variable syntax in the MultiQC YML file
- def meta = [:]
- meta.workflow = run_workflow.toMap()
- meta["manifest_map"] = run_workflow.manifest.toMap()
-
- // Pipeline DOI
- meta["doi_text"] = meta.manifest_map.doi ? "(doi: ${meta.manifest_map.doi})" : ""
- meta["nodoi_text"] = meta.manifest_map.doi ? "": "If available, make sure to update the text to include the Zenodo DOI of version of the pipeline used. "
-
- // Tool references
- meta["tool_citations"] = ""
- meta["tool_bibliography"] = ""
-
- // TODO Only uncomment below if logic in toolCitationText/toolBibliographyText has been filled!
- //meta["tool_citations"] = toolCitationText(params).replaceAll(", \\.", ".").replaceAll("\\. \\.", ".").replaceAll(", \\.", ".")
- //meta["tool_bibliography"] = toolBibliographyText(params)
-
- def methods_text = mqc_methods_yaml.text
-
- def engine = new SimpleTemplateEngine()
- def description_html = engine.createTemplate(methods_text).make(meta)
-
- return description_html
- }
-
- //
- // Create MultiQC tsv custom content from a list of values
- //
- public static String multiqcTsvFromList(tsv_data, header) {
- def tsv_string = ""
- if (tsv_data.size() > 0) {
- tsv_string += "${header.join('\t')}\n"
- tsv_string += tsv_data.join('\n')
- }
- return tsv_string
- }
-
- //
- // Exit pipeline if incorrect --genome key provided
- //
- private static void genomeExistsError(params, log) {
- if (params.genomes && params.genome && !params.genomes.containsKey(params.genome)) {
- def error_string = "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n" +
- " Genome '${params.genome}' not found in any config files provided to the pipeline.\n" +
- " Currently, the available genome keys are:\n" +
- " ${params.genomes.keySet().join(", ")}\n" +
- "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~"
- Nextflow.error(error_string)
- }
- }
-
- //
- // Print a warning if using GRCh38 assembly from igenomes.config
- //
- private static void ncbiGenomeWarn(log) {
- log.warn "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n" +
- " When using '--genome GRCh38' the assembly is from the NCBI and NOT Ensembl.\n" +
- " Biotype QC will be skipped to circumvent the issue below:\n" +
- " https://github.com/nf-core/rnaseq/issues/460\n\n" +
- " If you would like to use the soft-masked Ensembl assembly instead please see:\n" +
- " https://github.com/nf-core/rnaseq/issues/159#issuecomment-501184312\n" +
- "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~"
- }
-
- //
- // Print a warning if using a UCSC assembly from igenomes.config
- //
- private static void ucscGenomeWarn(log) {
- log.warn "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n" +
- " When using UCSC assemblies the 'gene_biotype' field is absent from the GTF file.\n" +
- " Biotype QC will be skipped to circumvent the issue below:\n" +
- " https://github.com/nf-core/rnaseq/issues/460\n\n" +
- " If you would like to use the soft-masked Ensembl assembly instead please see:\n" +
- " https://github.com/nf-core/rnaseq/issues/159#issuecomment-501184312\n" +
- "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~"
- }
-
- //
- // Print a warning if both GTF and GFF have been provided
- //
- private static void gtfGffWarn(log) {
- log.warn "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n" +
- " Both '--gtf' and '--gff' parameters have been provided.\n" +
- " Using GTF file as priority.\n" +
- "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~"
- }
-
- //
- // Print a warning if using '--transcript_fasta'
- //
- private static void transcriptsFastaWarn(log) {
- log.warn "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n" +
- " '--transcript_fasta' parameter has been provided.\n" +
- " Make sure transcript names in this file match those in the GFF/GTF file.\n\n" +
- " Please see:\n" +
- " https://github.com/nf-core/rnaseq/issues/753\n" +
- "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~"
- }
-
- //
- // Print a warning if --skip_alignment has been provided
- //
- private static void skipAlignmentWarn(log) {
- log.warn "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n" +
- " '--skip_alignment' parameter has been provided.\n" +
- " Skipping alignment, genome-based quantification and all downstream QC processes.\n" +
- "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~"
- }
-
- //
- // Print a warning if using '--aligner star_rsem' and '--with_umi'
- //
- private static void rsemUmiError(log) {
- def error_string = "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n" +
- " When using '--aligner star_rsem', STAR is run by RSEM itself and so it is\n" +
- " not possible to remove UMIs before the quantification.\n\n" +
- " If you would like to remove UMI barcodes using the '--with_umi' option\n" +
- " please use either '--aligner star_salmon' or '--aligner hisat2'.\n" +
- "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~"
- Nextflow.error(error_string)
- }
-
- //
- // Print a warning if using '--aligner star_rsem' and providing both '--rsem_index' and '--star_index'
- //
- private static void rsemStarIndexWarn(log) {
- log.warn "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n" +
- " When using '--aligner star_rsem', both the STAR and RSEM indices should\n" +
- " be present in the path specified by '--rsem_index'.\n\n" +
- " This warning has been generated because you have provided both\n" +
- " '--rsem_index' and '--star_index'. The pipeline will ignore the latter.\n\n" +
- " Please see:\n" +
- " https://github.com/nf-core/rnaseq/issues/568\n" +
- "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~"
- }
-
- //
- // Print a warning if using '--aligner star_rsem' and providing '--star_extra_alignment_args'
- //
- private static void rsemStarExtraArgumentsWarn(log) {
- log.warn "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n" +
- " No additional arguments can be passed to STAR when using RSEM.\n" +
- " Because RSEM enforces its own parameters for STAR, any extra arguments\n" +
- " to STAR will be ignored. Alternatively, choose the STAR+Salmon route.\n\n" +
- " This warning has been generated because you have provided both\n" +
- " '--aligner star_rsem' and '--extra_star_align_args'.\n\n" +
- "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~"
- }
-
- //
- // Print a warning if using '--additional_fasta' and '--_index'
- //
- private static void additionaFastaIndexWarn(index, log) {
- log.warn "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n" +
- " When using '--additional_fasta ' the aligner index will not\n" +
- " be re-built with the transgenes incorporated by default since you have \n" +
- " already provided an index via '--${index}_index '.\n\n" +
- " Set '--additional_fasta --${index}_index false --gene_bed false --save_reference'\n" +
- " to re-build the index with transgenes included and the index and gene BED file will be saved in\n" +
- " 'results/genome/index/${index}/' for re-use with '--${index}_index'.\n\n" +
- " Ignore this warning if you know that the index already contains transgenes.\n\n" +
- " Please see:\n" +
- " https://github.com/nf-core/rnaseq/issues/556\n" +
- "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~"
- }
-}
diff --git a/lib/nfcore_external_java_deps.jar b/lib/nfcore_external_java_deps.jar
deleted file mode 100644
index 805c8bb5e..000000000
Binary files a/lib/nfcore_external_java_deps.jar and /dev/null differ
diff --git a/main.nf b/main.nf
index 69ae6c3b3..607d02e73 100755
--- a/main.nf
+++ b/main.nf
@@ -13,74 +13,122 @@ nextflow.enable.dsl = 2
/*
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
- GENOME PARAMETER VALUES
+ IMPORT FUNCTIONS / MODULES / SUBWORKFLOWS / WORKFLOWS
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
*/
-params.fasta = WorkflowMain.getGenomeAttribute(params, 'fasta')
-params.transcript_fasta = WorkflowMain.getGenomeAttribute(params, 'transcript_fasta')
-params.additional_fasta = WorkflowMain.getGenomeAttribute(params, 'additional_fasta')
-params.gtf = WorkflowMain.getGenomeAttribute(params, 'gtf')
-params.gff = WorkflowMain.getGenomeAttribute(params, 'gff')
-params.gene_bed = WorkflowMain.getGenomeAttribute(params, 'bed12')
-params.bbsplit_index = WorkflowMain.getGenomeAttribute(params, 'bbsplit')
-params.star_index = WorkflowMain.getGenomeAttribute(params, 'star')
-params.hisat2_index = WorkflowMain.getGenomeAttribute(params, 'hisat2')
-params.rsem_index = WorkflowMain.getGenomeAttribute(params, 'rsem')
-params.salmon_index = WorkflowMain.getGenomeAttribute(params, 'salmon')
-params.kallisto_index = WorkflowMain.getGenomeAttribute(params, 'kallisto')
+include { PREPARE_GENOME } from './subworkflows/local/prepare_genome'
+include { NFCORE_RNASEQ } from './workflows/rnaseq'
+include { PIPELINE_INITIALISATION } from './subworkflows/local/utils_nfcore_rnaseq_pipeline'
+include { PIPELINE_COMPLETION } from './subworkflows/local/utils_nfcore_rnaseq_pipeline'
+include { getGenomeAttribute } from './subworkflows/local/utils_nfcore_rnaseq_pipeline'
+include { checkMaxContigSize } from './subworkflows/local/utils_nfcore_rnaseq_pipeline'
/*
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
- VALIDATE & PRINT PARAMETER SUMMARY
+ GENOME PARAMETER VALUES
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
*/
-include { validateParameters; paramsHelp } from 'plugin/nf-validation'
-
-// Print help message if needed
-if (params.help) {
- def logo = NfcoreTemplate.logo(workflow, params.monochrome_logs)
- def citation = '\n' + WorkflowMain.citation(workflow) + '\n'
- def String command = "nextflow run ${workflow.manifest.name} --input samplesheet.csv --genome GRCh37 -profile docker"
- log.info logo + paramsHelp(command) + citation + NfcoreTemplate.dashedLine(params.monochrome_logs)
- System.exit(0)
-}
-
-// Validate input parameters
-if (params.validate_params) {
- validateParameters()
-}
-
-WorkflowMain.initialise(workflow, params, log)
+params.fasta = getGenomeAttribute('fasta')
+params.transcript_fasta = getGenomeAttribute('transcript_fasta')
+params.additional_fasta = getGenomeAttribute('additional_fasta')
+params.gtf = getGenomeAttribute('gtf')
+params.gff = getGenomeAttribute('gff')
+params.gene_bed = getGenomeAttribute('bed12')
+params.bbsplit_index = getGenomeAttribute('bbsplit')
+params.star_index = getGenomeAttribute('star')
+params.hisat2_index = getGenomeAttribute('hisat2')
+params.rsem_index = getGenomeAttribute('rsem')
+params.salmon_index = getGenomeAttribute('salmon')
+params.kallisto_index = getGenomeAttribute('kallisto')
/*
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
- NAMED WORKFLOW FOR PIPELINE
+ RUN MAIN WORKFLOW
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
*/
-include { RNASEQ } from './workflows/rnaseq'
+workflow {
-//
-// WORKFLOW: Run main nf-core/rnaseq analysis pipeline
-//
-workflow NFCORE_RNASEQ {
- RNASEQ ()
-}
+ main:
+ ch_versions = Channel.empty()
-/*
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
- RUN ALL WORKFLOWS
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-*/
+ //
+ // SUBWORKFLOW: Run initialisation tasks
+ //
+ PIPELINE_INITIALISATION ()
-//
-// WORKFLOW: Execute a single named workflow for the pipeline
-// See: https://github.com/nf-core/rnaseq/issues/619
-//
-workflow {
- NFCORE_RNASEQ ()
+ //
+ // SUBWORKFLOW: Prepare reference genome files
+ //
+ PREPARE_GENOME (
+ params.fasta,
+ params.gtf,
+ params.gff,
+ params.additional_fasta,
+ params.transcript_fasta,
+ params.gene_bed,
+ params.splicesites,
+ params.bbsplit_fasta_list,
+ params.star_index,
+ params.rsem_index,
+ params.salmon_index,
+ params.kallisto_index,
+ params.hisat2_index,
+ params.bbsplit_index,
+ params.gencode,
+ params.featurecounts_group_type,
+ params.aligner,
+ params.pseudo_aligner,
+ params.skip_gtf_filter,
+ params.skip_bbsplit,
+ params.skip_alignment,
+ params.skip_pseudo_alignment
+ )
+ ch_versions = ch_versions.mix(PREPARE_GENOME.out.versions)
+
+ // Check if contigs in genome fasta file > 512 Mbp
+ if (!params.skip_alignment && !params.bam_csi_index) {
+ PREPARE_GENOME
+ .out
+ .fai
+ .map { checkMaxContigSize(it) }
+ }
+
+ //
+ // WORKFLOW: Run nf-core/rnaseq workflow
+ //
+ NFCORE_RNASEQ (
+ PIPELINE_INITIALISATION.out.samplesheet,
+ ch_versions,
+ PREPARE_GENOME.out.fasta,
+ PREPARE_GENOME.out.gtf,
+ PREPARE_GENOME.out.fai,
+ PREPARE_GENOME.out.chrom_sizes,
+ PREPARE_GENOME.out.gene_bed,
+ PREPARE_GENOME.out.transcript_fasta,
+ PREPARE_GENOME.out.star_index,
+ PREPARE_GENOME.out.rsem_index,
+ PREPARE_GENOME.out.hisat2_index,
+ PREPARE_GENOME.out.salmon_index,
+ PREPARE_GENOME.out.kallisto_index,
+ PREPARE_GENOME.out.bbsplit_index,
+ PREPARE_GENOME.out.splicesites
+ )
+ ch_versions = ch_versions.mix(NFCORE_RNASEQ.out.versions)
+
+ //
+ // SUBWORKFLOW: Run completion tasks
+ //
+ PIPELINE_COMPLETION (
+ params.email,
+ params.email_on_fail,
+ params.plaintext_email,
+ params.outdir,
+ params.monochrome_logs,
+ params.hook_url
+ )
}
/*
diff --git a/modules.json b/modules.json
index 67970abf1..537a5e175 100644
--- a/modules.json
+++ b/modules.json
@@ -15,12 +15,6 @@
"git_sha": "5c460c5a4736974abde2843294f35307ee2b0e5e",
"installed_by": ["modules"]
},
- "custom/dumpsoftwareversions": {
- "branch": "master",
- "git_sha": "bba7e362e4afead70653f84d8700588ea28d0f9e",
- "installed_by": ["modules"],
- "patch": "modules/nf-core/custom/dumpsoftwareversions/custom-dumpsoftwareversions.diff"
- },
"custom/getchromsizes": {
"branch": "master",
"git_sha": "911696ea0b62df80e900ef244d7867d177971f73",
@@ -78,6 +72,11 @@
"git_sha": "bdc2a97ced7adc423acfa390742db83cab98c1ad",
"installed_by": ["modules"]
},
+ "multiqc": {
+ "branch": "master",
+ "git_sha": "8ec825f465b9c17f9d83000022995b4f7de6fe93",
+ "installed_by": ["modules"]
+ },
"picard/markduplicates": {
"branch": "master",
"git_sha": "2ee934606f1fdf7fc1cb05d6e8abc13bec8ab448",
@@ -304,6 +303,21 @@
"branch": "master",
"git_sha": "dedc0e31087f3306101c38835d051bf49789445a",
"installed_by": ["subworkflows"]
+ },
+ "utils_nextflow_pipeline": {
+ "branch": "master",
+ "git_sha": "cd08c91373cd00a73255081340e4914485846ba1",
+ "installed_by": ["subworkflows"]
+ },
+ "utils_nfcore_pipeline": {
+ "branch": "master",
+ "git_sha": "cd08c91373cd00a73255081340e4914485846ba1",
+ "installed_by": ["subworkflows"]
+ },
+ "utils_nfvalidation_plugin": {
+ "branch": "master",
+ "git_sha": "cd08c91373cd00a73255081340e4914485846ba1",
+ "installed_by": ["subworkflows"]
}
}
}
diff --git a/modules/local/multiqc/main.nf b/modules/local/multiqc/main.nf
deleted file mode 100644
index be0dfe2c3..000000000
--- a/modules/local/multiqc/main.nf
+++ /dev/null
@@ -1,74 +0,0 @@
-process MULTIQC {
- label 'process_medium'
-
- conda "bioconda::multiqc=1.19"
- container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
- 'https://depot.galaxyproject.org/singularity/multiqc:1.19--pyhdfd78af_0' :
- 'biocontainers/multiqc:1.19--pyhdfd78af_0' }"
-
- input:
- path multiqc_config
- path multiqc_custom_config
- path software_versions
- path workflow_summary
- path methods_description
- path logo
- path fail_trimming_summary
- path fail_mapping_summary
- path fail_strand_check
- path ('fastqc/raw/*')
- path ('fastqc/trim/*')
- path ('trim_log/*')
- path ('sortmerna/*')
- path ('star/*')
- path ('hisat2/*')
- path ('rsem/*')
- path ('pseudoalignment/*')
- path ('samtools/stats/*')
- path ('samtools/flagstat/*')
- path ('samtools/idxstats/*')
- path ('picard/markduplicates/*')
- path ('featurecounts/*')
- path ('deseq2/aligner/*')
- path ('deseq2/aligner/*')
- path ('deseq2/pseudoaligner/*')
- path ('deseq2/pseudoaligner/*')
- path ('preseq/*')
- path ('qualimap/*')
- path ('dupradar/*')
- path ('rseqc/bam_stat/*')
- path ('rseqc/infer_experiment/*')
- path ('rseqc/inner_distance/*')
- path ('rseqc/junction_annotation/*')
- path ('rseqc/junction_saturation/*')
- path ('rseqc/read_distribution/*')
- path ('rseqc/read_duplication/*')
- path ('rseqc/tin/*')
-
- output:
- path "*multiqc_report.html", emit: report
- path "*_data" , emit: data
- path "*_plots" , optional:true, emit: plots
- path "versions.yml" , emit: versions
-
- when:
- task.ext.when == null || task.ext.when
-
- script:
- def args = task.ext.args ?: ''
- def custom_config = params.multiqc_config ? "--config $multiqc_custom_config" : ''
- prefix = task.ext.prefix ?: "multiqc_report"
- """
- multiqc \\
- -n ${prefix}.html \\
- -f \\
- $args \\
- $custom_config \\
- .
-
- cat <<-END_VERSIONS > versions.yml
- "${task.process}":
- multiqc: \$( multiqc --version | sed -e "s/multiqc, version //g" )
- END_VERSIONS
- """
-}
diff --git a/modules/nf-core/custom/dumpsoftwareversions/custom-dumpsoftwareversions.diff b/modules/nf-core/custom/dumpsoftwareversions/custom-dumpsoftwareversions.diff
deleted file mode 100644
index 2ec89807d..000000000
--- a/modules/nf-core/custom/dumpsoftwareversions/custom-dumpsoftwareversions.diff
+++ /dev/null
@@ -1,19 +0,0 @@
-Changes in module 'nf-core/custom/dumpsoftwareversions'
---- modules/nf-core/custom/dumpsoftwareversions/main.nf
-+++ modules/nf-core/custom/dumpsoftwareversions/main.nf
-@@ -2,10 +2,10 @@
- label 'process_single'
-
- // Requires `pyyaml` which does not have a dedicated container but is in the MultiQC container
-- conda "${moduleDir}/environment.yml"
-+ conda "bioconda::multiqc=1.19"
- container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
-- 'https://depot.galaxyproject.org/singularity/multiqc:1.17--pyhdfd78af_0' :
-- 'biocontainers/multiqc:1.17--pyhdfd78af_0' }"
-+ 'https://depot.galaxyproject.org/singularity/multiqc:1.19--pyhdfd78af_0' :
-+ 'biocontainers/multiqc:1.19--pyhdfd78af_0' }"
-
- input:
- path versions
-
-************************************************************
diff --git a/modules/nf-core/custom/dumpsoftwareversions/main.nf b/modules/nf-core/custom/dumpsoftwareversions/main.nf
deleted file mode 100644
index b2dcf480e..000000000
--- a/modules/nf-core/custom/dumpsoftwareversions/main.nf
+++ /dev/null
@@ -1,24 +0,0 @@
-process CUSTOM_DUMPSOFTWAREVERSIONS {
- label 'process_single'
-
- // Requires `pyyaml` which does not have a dedicated container but is in the MultiQC container
- conda "bioconda::multiqc=1.19"
- container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
- 'https://depot.galaxyproject.org/singularity/multiqc:1.19--pyhdfd78af_0' :
- 'biocontainers/multiqc:1.19--pyhdfd78af_0' }"
-
- input:
- path versions
-
- output:
- path "software_versions.yml" , emit: yml
- path "software_versions_mqc.yml", emit: mqc_yml
- path "versions.yml" , emit: versions
-
- when:
- task.ext.when == null || task.ext.when
-
- script:
- def args = task.ext.args ?: ''
- template 'dumpsoftwareversions.py'
-}
diff --git a/modules/nf-core/custom/dumpsoftwareversions/meta.yml b/modules/nf-core/custom/dumpsoftwareversions/meta.yml
deleted file mode 100644
index 5f15a5fde..000000000
--- a/modules/nf-core/custom/dumpsoftwareversions/meta.yml
+++ /dev/null
@@ -1,37 +0,0 @@
-# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json
-name: custom_dumpsoftwareversions
-description: Custom module used to dump software versions within the nf-core pipeline template
-keywords:
- - custom
- - dump
- - version
-tools:
- - custom:
- description: Custom module used to dump software versions within the nf-core pipeline template
- homepage: https://github.com/nf-core/tools
- documentation: https://github.com/nf-core/tools
- licence: ["MIT"]
-input:
- - versions:
- type: file
- description: YML file containing software versions
- pattern: "*.yml"
-output:
- - yml:
- type: file
- description: Standard YML file containing software versions
- pattern: "software_versions.yml"
- - mqc_yml:
- type: file
- description: MultiQC custom content YML file containing software versions
- pattern: "software_versions_mqc.yml"
- - versions:
- type: file
- description: File containing software versions
- pattern: "versions.yml"
-authors:
- - "@drpatelh"
- - "@grst"
-maintainers:
- - "@drpatelh"
- - "@grst"
diff --git a/modules/nf-core/custom/dumpsoftwareversions/templates/dumpsoftwareversions.py b/modules/nf-core/custom/dumpsoftwareversions/templates/dumpsoftwareversions.py
deleted file mode 100755
index da0334085..000000000
--- a/modules/nf-core/custom/dumpsoftwareversions/templates/dumpsoftwareversions.py
+++ /dev/null
@@ -1,101 +0,0 @@
-#!/usr/bin/env python
-
-
-"""Provide functions to merge multiple versions.yml files."""
-
-
-import yaml
-import platform
-from textwrap import dedent
-
-
-def _make_versions_html(versions):
- """Generate a tabular HTML output of all versions for MultiQC."""
- html = [
- dedent(
- """\\
-
-
-
-
- Process Name |
- Software |
- Version |
-
-
- """
- )
- ]
- for process, tmp_versions in sorted(versions.items()):
- html.append("")
- for i, (tool, version) in enumerate(sorted(tmp_versions.items())):
- html.append(
- dedent(
- f"""\\
-
- {process if (i == 0) else ''} |
- {tool} |
- {version} |
-
- """
- )
- )
- html.append("")
- html.append("
")
- return "\\n".join(html)
-
-
-def main():
- """Load all version files and generate merged output."""
- versions_this_module = {}
- versions_this_module["${task.process}"] = {
- "python": platform.python_version(),
- "yaml": yaml.__version__,
- }
-
- with open("$versions") as f:
- versions_by_process = yaml.load(f, Loader=yaml.BaseLoader) | versions_this_module
-
- # aggregate versions by the module name (derived from fully-qualified process name)
- versions_by_module = {}
- for process, process_versions in versions_by_process.items():
- module = process.split(":")[-1]
- try:
- if versions_by_module[module] != process_versions:
- raise AssertionError(
- "We assume that software versions are the same between all modules. "
- "If you see this error-message it means you discovered an edge-case "
- "and should open an issue in nf-core/tools. "
- )
- except KeyError:
- versions_by_module[module] = process_versions
-
- versions_by_module["Workflow"] = {
- "Nextflow": "$workflow.nextflow.version",
- "$workflow.manifest.name": "$workflow.manifest.version",
- }
-
- versions_mqc = {
- "id": "software_versions",
- "section_name": "${workflow.manifest.name} Software Versions",
- "section_href": "https://github.com/${workflow.manifest.name}",
- "plot_type": "html",
- "description": "are collected at run time from the software output.",
- "data": _make_versions_html(versions_by_module),
- }
-
- with open("software_versions.yml", "w") as f:
- yaml.dump(versions_by_module, f, default_flow_style=False)
- with open("software_versions_mqc.yml", "w") as f:
- yaml.dump(versions_mqc, f, default_flow_style=False)
-
- with open("versions.yml", "w") as f:
- yaml.dump(versions_this_module, f, default_flow_style=False)
-
-
-if __name__ == "__main__":
- main()
diff --git a/modules/nf-core/custom/dumpsoftwareversions/tests/main.nf.test b/modules/nf-core/custom/dumpsoftwareversions/tests/main.nf.test
deleted file mode 100644
index eec1db10a..000000000
--- a/modules/nf-core/custom/dumpsoftwareversions/tests/main.nf.test
+++ /dev/null
@@ -1,38 +0,0 @@
-nextflow_process {
-
- name "Test Process CUSTOM_DUMPSOFTWAREVERSIONS"
- script "../main.nf"
- process "CUSTOM_DUMPSOFTWAREVERSIONS"
- tag "modules"
- tag "modules_nfcore"
- tag "custom"
- tag "dumpsoftwareversions"
- tag "custom/dumpsoftwareversions"
-
- test("Should run without failures") {
- when {
- process {
- """
- def tool1_version = '''
- TOOL1:
- tool1: 0.11.9
- '''.stripIndent()
-
- def tool2_version = '''
- TOOL2:
- tool2: 1.9
- '''.stripIndent()
-
- input[0] = Channel.of(tool1_version, tool2_version).collectFile()
- """
- }
- }
-
- then {
- assertAll(
- { assert process.success },
- { assert snapshot(process.out).match() }
- )
- }
- }
-}
diff --git a/modules/nf-core/custom/dumpsoftwareversions/tests/main.nf.test.snap b/modules/nf-core/custom/dumpsoftwareversions/tests/main.nf.test.snap
deleted file mode 100644
index 4274ed57a..000000000
--- a/modules/nf-core/custom/dumpsoftwareversions/tests/main.nf.test.snap
+++ /dev/null
@@ -1,27 +0,0 @@
-{
- "Should run without failures": {
- "content": [
- {
- "0": [
- "software_versions.yml:md5,1c851188476409cda5752ce971b20b58"
- ],
- "1": [
- "software_versions_mqc.yml:md5,2570f4ba271ad08357b0d3d32a9cf84d"
- ],
- "2": [
- "versions.yml:md5,3843ac526e762117eedf8825b40683df"
- ],
- "mqc_yml": [
- "software_versions_mqc.yml:md5,2570f4ba271ad08357b0d3d32a9cf84d"
- ],
- "versions": [
- "versions.yml:md5,3843ac526e762117eedf8825b40683df"
- ],
- "yml": [
- "software_versions.yml:md5,1c851188476409cda5752ce971b20b58"
- ]
- }
- ],
- "timestamp": "2023-11-03T14:43:22.157011"
- }
-}
diff --git a/modules/nf-core/custom/dumpsoftwareversions/tests/tags.yml b/modules/nf-core/custom/dumpsoftwareversions/tests/tags.yml
deleted file mode 100644
index 405aa24ae..000000000
--- a/modules/nf-core/custom/dumpsoftwareversions/tests/tags.yml
+++ /dev/null
@@ -1,2 +0,0 @@
-custom/dumpsoftwareversions:
- - modules/nf-core/custom/dumpsoftwareversions/**
diff --git a/modules/nf-core/custom/dumpsoftwareversions/environment.yml b/modules/nf-core/multiqc/environment.yml
similarity index 51%
rename from modules/nf-core/custom/dumpsoftwareversions/environment.yml
rename to modules/nf-core/multiqc/environment.yml
index f0c63f698..7625b7520 100644
--- a/modules/nf-core/custom/dumpsoftwareversions/environment.yml
+++ b/modules/nf-core/multiqc/environment.yml
@@ -1,7 +1,7 @@
-name: custom_dumpsoftwareversions
+name: multiqc
channels:
- conda-forge
- bioconda
- defaults
dependencies:
- - bioconda::multiqc=1.17
+ - bioconda::multiqc=1.19
diff --git a/modules/nf-core/multiqc/main.nf b/modules/nf-core/multiqc/main.nf
new file mode 100644
index 000000000..1b9f7c431
--- /dev/null
+++ b/modules/nf-core/multiqc/main.nf
@@ -0,0 +1,55 @@
+process MULTIQC {
+ label 'process_single'
+
+ conda "${moduleDir}/environment.yml"
+ container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+ 'https://depot.galaxyproject.org/singularity/multiqc:1.19--pyhdfd78af_0' :
+ 'biocontainers/multiqc:1.19--pyhdfd78af_0' }"
+
+ input:
+ path multiqc_files, stageAs: "?/*"
+ path(multiqc_config)
+ path(extra_multiqc_config)
+ path(multiqc_logo)
+
+ output:
+ path "*multiqc_report.html", emit: report
+ path "*_data" , emit: data
+ path "*_plots" , optional:true, emit: plots
+ path "versions.yml" , emit: versions
+
+ when:
+ task.ext.when == null || task.ext.when
+
+ script:
+ def args = task.ext.args ?: ''
+ def config = multiqc_config ? "--config $multiqc_config" : ''
+ def extra_config = extra_multiqc_config ? "--config $extra_multiqc_config" : ''
+ def logo = multiqc_logo ? /--cl-config 'custom_logo: "${multiqc_logo}"'/ : ''
+ """
+ multiqc \\
+ --force \\
+ $args \\
+ $config \\
+ $extra_config \\
+ $logo \\
+ .
+
+ cat <<-END_VERSIONS > versions.yml
+ "${task.process}":
+ multiqc: \$( multiqc --version | sed -e "s/multiqc, version //g" )
+ END_VERSIONS
+ """
+
+ stub:
+ """
+ mkdir multiqc_data
+ touch multiqc_plots
+ touch multiqc_report.html
+
+ cat <<-END_VERSIONS > versions.yml
+ "${task.process}":
+ multiqc: \$( multiqc --version | sed -e "s/multiqc, version //g" )
+ END_VERSIONS
+ """
+}
diff --git a/modules/nf-core/multiqc/meta.yml b/modules/nf-core/multiqc/meta.yml
new file mode 100644
index 000000000..45a9bc35e
--- /dev/null
+++ b/modules/nf-core/multiqc/meta.yml
@@ -0,0 +1,58 @@
+name: multiqc
+description: Aggregate results from bioinformatics analyses across many samples into a single report
+keywords:
+ - QC
+ - bioinformatics tools
+ - Beautiful stand-alone HTML report
+tools:
+ - multiqc:
+ description: |
+ MultiQC searches a given directory for analysis logs and compiles a HTML report.
+ It's a general use tool, perfect for summarising the output from numerous bioinformatics tools.
+ homepage: https://multiqc.info/
+ documentation: https://multiqc.info/docs/
+ licence: ["GPL-3.0-or-later"]
+input:
+ - multiqc_files:
+ type: file
+ description: |
+ List of reports / files recognised by MultiQC, for example the html and zip output of FastQC
+ - multiqc_config:
+ type: file
+ description: Optional config yml for MultiQC
+ pattern: "*.{yml,yaml}"
+ - extra_multiqc_config:
+ type: file
+ description: Second optional config yml for MultiQC. Will override common sections in multiqc_config.
+ pattern: "*.{yml,yaml}"
+ - multiqc_logo:
+ type: file
+ description: Optional logo file for MultiQC
+ pattern: "*.{png}"
+output:
+ - report:
+ type: file
+ description: MultiQC report file
+ pattern: "multiqc_report.html"
+ - data:
+ type: directory
+ description: MultiQC data dir
+ pattern: "multiqc_data"
+ - plots:
+ type: file
+ description: Plots created by MultiQC
+ pattern: "*_data"
+ - versions:
+ type: file
+ description: File containing software versions
+ pattern: "versions.yml"
+authors:
+ - "@abhi18av"
+ - "@bunop"
+ - "@drpatelh"
+ - "@jfy133"
+maintainers:
+ - "@abhi18av"
+ - "@bunop"
+ - "@drpatelh"
+ - "@jfy133"
diff --git a/modules/nf-core/multiqc/tests/main.nf.test b/modules/nf-core/multiqc/tests/main.nf.test
new file mode 100644
index 000000000..d0438eda6
--- /dev/null
+++ b/modules/nf-core/multiqc/tests/main.nf.test
@@ -0,0 +1,83 @@
+nextflow_process {
+
+ name "Test Process MULTIQC"
+ script "../main.nf"
+ process "MULTIQC"
+ tag "modules"
+ tag "modules_nfcore"
+ tag "multiqc"
+
+ test("sarscov2 single-end [fastqc]") {
+
+ when {
+ process {
+ """
+ input[0] = Channel.of([file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz_fastqc_zip'], checkIfExists: true)])
+ input[1] = []
+ input[2] = []
+ input[3] = []
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert process.success },
+ { assert process.out.report[0] ==~ ".*/multiqc_report.html" },
+ { assert process.out.data[0] ==~ ".*/multiqc_data" },
+ { assert snapshot(process.out.versions).match("versions") }
+ )
+ }
+
+ }
+
+ test("sarscov2 single-end [fastqc] [config]") {
+
+ when {
+ process {
+ """
+ input[0] = Channel.of([file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz_fastqc_zip'], checkIfExists: true)])
+ input[1] = Channel.of(file("https://github.com/nf-core/tools/raw/dev/nf_core/pipeline-template/assets/multiqc_config.yml", checkIfExists: true))
+ input[2] = []
+ input[3] = []
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert process.success },
+ { assert process.out.report[0] ==~ ".*/multiqc_report.html" },
+ { assert process.out.data[0] ==~ ".*/multiqc_data" },
+ { assert snapshot(process.out.versions).match("versions") }
+ )
+ }
+ }
+
+ test("sarscov2 single-end [fastqc] - stub") {
+
+ options "-stub"
+
+ when {
+ process {
+ """
+ input[0] = Channel.of([file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz_fastqc_zip'], checkIfExists: true)])
+ input[1] = []
+ input[2] = []
+ input[3] = []
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert process.success },
+ { assert snapshot(process.out.report.collect { file(it).getName() } +
+ process.out.data.collect { file(it).getName() } +
+ process.out.plots.collect { file(it).getName() } +
+ process.out.versions ).match() }
+ )
+ }
+
+ }
+}
diff --git a/modules/nf-core/multiqc/tests/main.nf.test.snap b/modules/nf-core/multiqc/tests/main.nf.test.snap
new file mode 100644
index 000000000..d37e73040
--- /dev/null
+++ b/modules/nf-core/multiqc/tests/main.nf.test.snap
@@ -0,0 +1,21 @@
+{
+ "versions": {
+ "content": [
+ [
+ "versions.yml:md5,14e9a2661241abd828f4f06a7b5c222d"
+ ]
+ ],
+ "timestamp": "2024-01-09T23:02:49.911994"
+ },
+ "sarscov2 single-end [fastqc] - stub": {
+ "content": [
+ [
+ "multiqc_report.html",
+ "multiqc_data",
+ "multiqc_plots",
+ "versions.yml:md5,14e9a2661241abd828f4f06a7b5c222d"
+ ]
+ ],
+ "timestamp": "2024-01-09T23:03:14.524346"
+ }
+}
\ No newline at end of file
diff --git a/modules/nf-core/multiqc/tests/tags.yml b/modules/nf-core/multiqc/tests/tags.yml
new file mode 100644
index 000000000..bea6c0d37
--- /dev/null
+++ b/modules/nf-core/multiqc/tests/tags.yml
@@ -0,0 +1,2 @@
+multiqc:
+ - modules/nf-core/multiqc/**
diff --git a/nextflow.config b/nextflow.config
index c9c9cb08d..c4e5a8a30 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -54,7 +54,7 @@ params {
// Ribosomal RNA removal
remove_ribo_rna = false
save_non_ribo_reads = false
- ribo_database_manifest = "${projectDir}/assets/rrna-db-defaults.txt"
+ ribo_database_manifest = "${projectDir}/workflows/rnaseq/assets/rrna-db-defaults.txt"
// Alignment
aligner = 'star_salmon'
@@ -119,7 +119,6 @@ params {
custom_config_base = "https://raw.githubusercontent.com/nf-core/configs/${params.custom_config_version}"
config_profile_contact = null
config_profile_url = null
- test_data_base = 'https://raw.githubusercontent.com/nf-core/test-datasets/rnaseq3'
// Max resource options
// Defaults only, expecting to be overwritten
@@ -245,7 +244,6 @@ profiles {
executor.memory = 8.GB
}
test { includeConfig 'conf/test.config' }
- test_cache { includeConfig 'conf/test_cache.config' }
test_full { includeConfig 'conf/test_full.config' }
test_full_aws {
includeConfig 'conf/test_full.config'
diff --git a/nextflow_schema.json b/nextflow_schema.json
index 42e4e8967..0cd5df6e0 100644
--- a/nextflow_schema.json
+++ b/nextflow_schema.json
@@ -278,7 +278,7 @@
"format": "file-path",
"exists": true,
"mimetype": "text/plain",
- "default": "${projectDir}/assets/rrna-db-defaults.txt",
+ "default": "${projectDir}/workflows/rnaseq/assets/rrna-db-defaults.txt",
"fa_icon": "fas fa-database",
"description": "Text file containing paths to fasta files (one per line) that will be used to create the database for SortMeRNA.",
"help_text": "By default, [rRNA databases](https://github.com/biocore/sortmerna/tree/master/data/rRNA_databases) defined in the SortMeRNA GitHub repo are used. You can see an example in the pipeline Github repository in `assets/rrna-default-dbs.txt`.\nPlease note that commercial/non-academic entities require [`licensing for SILVA`](https://www.arb-silva.de/silva-license-information) for these default databases."
@@ -672,13 +672,6 @@
"description": "Institutional config URL link.",
"hidden": true,
"fa_icon": "fas fa-users-cog"
- },
- "test_data_base": {
- "type": "string",
- "default": "https://raw.githubusercontent.com/nf-core/test-datasets/rnaseq3",
- "description": "Base path / URL for data used in the test profiles",
- "help_text": "Warning: The `-profile test` samplesheet file itself contains remote paths. Setting this parameter does not alter the contents of that file.",
- "hidden": true
}
}
},
diff --git a/pyproject.toml b/pyproject.toml
index bc01239b3..0d62beb6f 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,4 +1,4 @@
-# Config file for Python. Mostly used to configure linting of bin/*.py with Black.
+# Config file for Python. Mostly used to configure linting of bin/check_samplesheet.py with Black.
# Should be kept the same as nf-core/tools to avoid fighting with template synchronisation.
[tool.black]
line-length = 120
diff --git a/subworkflows/local/prepare_genome/main.nf b/subworkflows/local/prepare_genome/main.nf
index 0840c7734..ace271641 100644
--- a/subworkflows/local/prepare_genome/main.nf
+++ b/subworkflows/local/prepare_genome/main.nf
@@ -35,28 +35,30 @@ include { STAR_GENOMEGENERATE_IGENOMES } from '../../../modules/local/st
workflow PREPARE_GENOME {
take:
- fasta // file: /path/to/genome.fasta
- gtf // file: /path/to/genome.gtf
- gff // file: /path/to/genome.gff
- additional_fasta // file: /path/to/additional.fasta
- transcript_fasta // file: /path/to/transcript.fasta
- gene_bed // file: /path/to/gene.bed
- splicesites // file: /path/to/splicesites.txt
- bbsplit_fasta_list // file: /path/to/bbsplit_fasta_list.txt
- star_index // directory: /path/to/star/index/
- rsem_index // directory: /path/to/rsem/index/
- salmon_index // directory: /path/to/salmon/index/
- kallisto_index // directory: /path/to/kallisto/index/
- hisat2_index // directory: /path/to/hisat2/index/
- bbsplit_index // directory: /path/to/rsem/index/
- gencode // boolean: whether the genome is from GENCODE
- is_aws_igenome // boolean: whether the genome files are from AWS iGenomes
- biotype // string: if additional fasta file is provided biotype value to use when appending entries to GTF file
- prepare_tool_indices // list: tools to prepare indices for
- filter_gtf // boolean: whether to filter GTF file
+ fasta // file: /path/to/genome.fasta
+ gtf // file: /path/to/genome.gtf
+ gff // file: /path/to/genome.gff
+ additional_fasta // file: /path/to/additional.fasta
+ transcript_fasta // file: /path/to/transcript.fasta
+ gene_bed // file: /path/to/gene.bed
+ splicesites // file: /path/to/splicesites.txt
+ bbsplit_fasta_list // file: /path/to/bbsplit_fasta_list.txt
+ star_index // directory: /path/to/star/index/
+ rsem_index // directory: /path/to/rsem/index/
+ salmon_index // directory: /path/to/salmon/index/
+ kallisto_index // directory: /path/to/kallisto/index/
+ hisat2_index // directory: /path/to/hisat2/index/
+ bbsplit_index // directory: /path/to/rsem/index/
+ gencode // boolean: whether the genome is from GENCODE
+ featurecounts_group_type // string: The attribute type used to group feature types in the GTF file when generating the biotype plot with featureCounts
+ aligner // string: Specifies the alignment algorithm to use - available options are 'star_salmon', 'star_rsem' and 'hisat2'
+ pseudo_aligner // string: Specifies the pseudo aligner to use - available options are 'salmon'. Runs in addition to '--aligner'
+ skip_gtf_filter // boolean: Skip filtering of GTF for valid scaffolds and/ or transcript IDs
+ skip_bbsplit // boolean: Skip BBSplit for removal of non-reference genome reads
+ skip_alignment // boolean: Skip all of the alignment-based processes within the pipeline
+ skip_pseudo_alignment // boolean: Skip all of the pseudoalignment-based processes within the pipeline
main:
-
ch_versions = Channel.empty()
//
@@ -91,6 +93,24 @@ workflow PREPARE_GENOME {
ch_versions = ch_versions.mix(GFFREAD.out.versions)
}
+ // Determine whether to filter the GTF or not
+ def filter_gtf =
+ ((
+ // Condition 1: Alignment is required and aligner is set
+ !skip_alignment && aligner
+ ) ||
+ (
+ // Condition 2: Pseudoalignment is required and pseudoaligner is set
+ !skip_pseudo_alignment && pseudo_aligner
+ ) ||
+ (
+ // Condition 3: Transcript FASTA file is not provided
+ !transcript_fasta
+ )) &&
+ (
+ // Condition 4: --skip_gtf_filter is not provided
+ !skip_gtf_filter
+ )
if (filter_gtf) {
GTF_FILTER ( ch_fasta, ch_gtf )
ch_gtf = GTF_FILTER.out.genome_gtf
@@ -101,6 +121,7 @@ workflow PREPARE_GENOME {
//
// Uncompress additional fasta file and concatenate with reference fasta and gtf files
//
+ def biotype = gencode ? "gene_type" : featurecounts_group_type
if (additional_fasta) {
if (additional_fasta.endsWith('.gz')) {
ch_add_fasta = GUNZIP_ADDITIONAL_FASTA ( [ [:], additional_fasta ] ).gunzip.map { it[1] }
@@ -157,6 +178,14 @@ workflow PREPARE_GENOME {
ch_chrom_sizes = CUSTOM_GETCHROMSIZES.out.sizes.map { it[1] }
ch_versions = ch_versions.mix(CUSTOM_GETCHROMSIZES.out.versions)
+ //
+ // Get list of indices that need to be created
+ //
+ def prepare_tool_indices = []
+ if (!skip_bbsplit) { prepare_tool_indices << 'bbsplit' }
+ if (!skip_alignment) { prepare_tool_indices << aligner }
+ if (!skip_pseudo_alignment && pseudo_aligner) { prepare_tool_indices << pseudo_aligner }
+
//
// Uncompress BBSplit index or generate from scratch if required
//
@@ -197,6 +226,13 @@ workflow PREPARE_GENOME {
ch_star_index = Channel.value(file(star_index))
}
} else {
+ // Check if an AWS iGenome has been provided to use the appropriate version of STAR
+ def is_aws_igenome = false
+ if (fasta && gtf) {
+ if ((file(fasta).getName() - '.gz' == 'genome.fa') && (file(gtf).getName() - '.gz' == 'genes.gtf')) {
+ is_aws_igenome = true
+ }
+ }
if (is_aws_igenome) {
ch_star_index = STAR_GENOMEGENERATE_IGENOMES ( ch_fasta, ch_gtf ).index
ch_versions = ch_versions.mix(STAR_GENOMEGENERATE_IGENOMES.out.versions)
diff --git a/subworkflows/local/utils_nfcore_rnaseq_pipeline/main.nf b/subworkflows/local/utils_nfcore_rnaseq_pipeline/main.nf
new file mode 100644
index 000000000..684325f6e
--- /dev/null
+++ b/subworkflows/local/utils_nfcore_rnaseq_pipeline/main.nf
@@ -0,0 +1,528 @@
+//
+// Subworkflow with functionality specific to the nf-core/rnaseq pipeline
+//
+
+import groovy.json.JsonSlurper
+
+/*
+========================================================================================
+ IMPORT MODULES/SUBWORKFLOWS
+========================================================================================
+*/
+
+include { UTILS_NFVALIDATION_PLUGIN } from '../../nf-core/utils_nfvalidation_plugin'
+include { fromSamplesheet } from 'plugin/nf-validation'
+include { paramsSummaryMap } from 'plugin/nf-validation'
+include { UTILS_NEXTFLOW_PIPELINE } from '../../nf-core/utils_nextflow_pipeline'
+include { completionEmail } from '../../nf-core/utils_nfcore_pipeline'
+include { completionSummary } from '../../nf-core/utils_nfcore_pipeline'
+include { dashedLine } from '../../nf-core/utils_nfcore_pipeline'
+include { nfCoreLogo } from '../../nf-core/utils_nfcore_pipeline'
+include { imNotification } from '../../nf-core/utils_nfcore_pipeline'
+include { UTILS_NFCORE_PIPELINE } from '../../nf-core/utils_nfcore_pipeline'
+include { workflowCitation } from '../../nf-core/utils_nfcore_pipeline'
+
+/*
+========================================================================================
+ SUBWORKFLOW TO INITIALISE PIPELINE
+========================================================================================
+*/
+
+workflow PIPELINE_INITIALISATION {
+
+ main:
+
+ //
+ // Print version and exit if required and dump pipeline parameters to JSON file
+ //
+ UTILS_NEXTFLOW_PIPELINE (
+ params.version,
+ true,
+ params.outdir,
+ workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1
+ )
+
+ //
+ // Validate parameters and generate parameter summary to stdout
+ //
+ def pre_help_text = nfCoreLogo(params.monochrome_logs)
+ def post_help_text = '\n' + workflowCitation() + '\n' + dashedLine(params.monochrome_logs)
+ def String workflow_command = "nextflow run ${workflow.manifest.name} -profile --input samplesheet.csv --genome GRCh37 --outdir "
+ UTILS_NFVALIDATION_PLUGIN (
+ params.help,
+ workflow_command,
+ pre_help_text,
+ post_help_text,
+ params.validate_params,
+ "nextflow_schema.json"
+ )
+
+ //
+ // Check config provided to the pipeline
+ //
+ UTILS_NFCORE_PIPELINE ()
+
+ //
+ // Custom validation for pipeline parameters
+ //
+ validateInputParameters()
+
+ //
+ // Create channel from input file provided through params.input
+ //
+ Channel
+ .fromSamplesheet("input")
+ .map {
+ meta, fastq_1, fastq_2 ->
+ if (!fastq_2) {
+ return [ meta.id, meta + [ single_end:true ], [ fastq_1 ] ]
+ } else {
+ return [ meta.id, meta + [ single_end:false ], [ fastq_1, fastq_2 ] ]
+ }
+ }
+ .groupTuple()
+ .map {
+ validateInputSamplesheet(it)
+ }
+ .set { ch_samplesheet }
+
+ emit:
+ samplesheet = ch_samplesheet
+}
+
+/*
+========================================================================================
+ SUBWORKFLOW FOR PIPELINE COMPLETION
+========================================================================================
+*/
+
+workflow PIPELINE_COMPLETION {
+
+ take:
+ email // string: email address
+ email_on_fail // string: email address sent on pipeline failure
+ plaintext_email // boolean: Send plain-text email instead of HTML
+ outdir // path: Path to output directory where results will be published
+ monochrome_logs // boolean: Disable ANSI colour codes in log output
+ hook_url // string: hook URL for notifications
+
+ main:
+
+ summary_params = paramsSummaryMap(workflow, parameters_schema: "nextflow_schema.json")
+
+ //
+ // Completion email and summary
+ //
+ workflow.onComplete {
+ if (email || email_on_fail) {
+ completionEmail(summary_params, email, email_on_fail, plaintext_email, outdir, monochrome_logs)
+ }
+
+ completionSummary(monochrome_logs)
+
+ if (hook_url) {
+ imNotification(summary_params, hook_url)
+ }
+ }
+}
+
+/*
+========================================================================================
+ FUNCTIONS
+========================================================================================
+*/
+
+//
+// Function to validate channels from input samplesheet
+//
+def validateInputSamplesheet(input) {
+ def (metas, fastqs) = input[1..2]
+
+ // Check that multiple runs of the same sample are of the same strandedness
+ def strandedness_ok = metas.collect{ it.strandedness }.unique().size == 1
+ if (!strandedness_ok) {
+ error("Please check input samplesheet -> Multiple runs of a sample must have the same strandedness!: ${metas[0].id}")
+ }
+
+ // Check that multiple runs of the same sample are of the same datatype i.e. single-end / paired-end
+ def endedness_ok = metas.collect{ it.single_end }.unique().size == 1
+ if (!endedness_ok) {
+ error("Please check input samplesheet -> Multiple runs of a sample must be of the same datatype i.e. single-end or paired-end: ${metas[0].id}")
+ }
+
+ return [ metas[0], fastqs ]
+}
+
+//
+// Check and validate pipeline parameters
+//
+def validateInputParameters() {
+
+ genomeExistsError()
+
+ if (!params.fasta) {
+ error("Genome fasta file not specified with e.g. '--fasta genome.fa' or via a detectable config file.")
+ }
+
+ if (!params.gtf && !params.gff) {
+ error("No GTF or GFF3 annotation specified! The pipeline requires at least one of these files.")
+ }
+
+ if (params.gtf) {
+ if (params.gff) {
+ gtfGffWarn()
+ }
+ if (params.genome == 'GRCh38' && params.gtf.contains('Homo_sapiens/NCBI/GRCh38/Annotation/Genes/genes.gtf')) {
+ ncbiGenomeWarn()
+ }
+ if (params.gtf.contains('/UCSC/') && params.gtf.contains('Annotation/Genes/genes.gtf')) {
+ ucscGenomeWarn()
+ }
+ }
+
+ if (params.transcript_fasta) {
+ transcriptsFastaWarn()
+ }
+
+ if (!params.skip_bbsplit && !params.bbsplit_index && !params.bbsplit_fasta_list) {
+ error("Please provide either --bbsplit_fasta_list / --bbsplit_index to run BBSplit.")
+ }
+
+ if (params.remove_ribo_rna && !params.ribo_database_manifest) {
+ error("Please provide --ribo_database_manifest to remove ribosomal RNA with SortMeRNA.")
+ }
+
+ if (params.with_umi && !params.skip_umi_extract) {
+ if (!params.umitools_bc_pattern && !params.umitools_bc_pattern2) {
+ error("UMI-tools requires a barcode pattern to extract barcodes from the reads.")
+ }
+ }
+
+ if (params.skip_alignment) {
+ skipAlignmentWarn()
+ }
+
+ if (!params.skip_pseudo_alignment && params.pseudo_aligner) {
+ if (!(params.salmon_index || params.transcript_fasta || (params.fasta && (params.gtf || params.gff)))) {
+ error("To use `--pseudo_aligner 'salmon'`, you must provide either --salmon_index or --transcript_fasta or both --fasta and --gtf / --gff.")
+ }
+ }
+
+ // Checks when running --aligner star_rsem
+ if (!params.skip_alignment && params.aligner == 'star_rsem') {
+ if (params.with_umi) {
+ rsemUmiError()
+ }
+ if (params.rsem_index && params.star_index) {
+ rsemStarIndexWarn()
+ }
+ if (params.aligner == 'star_rsem' && params.extra_star_align_args) {
+ rsemStarExtraArgumentsWarn()
+ }
+ }
+
+ // Warn if --additional_fasta provided with aligner index
+ if (!params.skip_alignment && params.additional_fasta) {
+ def index = ''
+ if (params.aligner == 'star_salmon' && params.star_index) {
+ index = 'star'
+ }
+ if (params.aligner == 'star_rsem' && params.rsem_index) {
+ index = 'rsem'
+ }
+ if (params.aligner == 'hisat2' && params.hisat2_index) {
+ index = 'hisat2'
+ }
+ if (index) {
+ additionaFastaIndexWarn(index)
+ }
+ }
+
+ // Check which RSeQC modules we are running
+ def valid_rseqc_modules = ['bam_stat', 'inner_distance', 'infer_experiment', 'junction_annotation', 'junction_saturation', 'read_distribution', 'read_duplication', 'tin']
+ def rseqc_modules = params.rseqc_modules ? params.rseqc_modules.split(',').collect{ it.trim().toLowerCase() } : []
+ if ((valid_rseqc_modules + rseqc_modules).unique().size() != valid_rseqc_modules.size()) {
+ error("Invalid option: ${params.rseqc_modules}. Valid options for '--rseqc_modules': ${valid_rseqc_modules.join(', ')}")
+ }
+
+ // Check rRNA databases for sortmerna
+ if (params.remove_ribo_rna) {
+ ch_ribo_db = file(params.ribo_database_manifest)
+ if (ch_ribo_db.isEmpty()) {
+ error("File provided with --ribo_database_manifest is empty: ${ch_ribo_db.getName()}!")
+ }
+ }
+
+ // Check if file with list of fastas is provided when running BBSplit
+ if (!params.skip_bbsplit && !params.bbsplit_index && params.bbsplit_fasta_list) {
+ ch_bbsplit_fasta_list = file(params.bbsplit_fasta_list)
+ if (ch_bbsplit_fasta_list.isEmpty()) {
+ error("File provided with --bbsplit_fasta_list is empty: ${ch_bbsplit_fasta_list.getName()}!")
+ }
+ }
+}
+
+//
+// Get attribute from genome config file e.g. fasta
+//
+def getGenomeAttribute(attribute) {
+ if (params.genomes && params.genome && params.genomes.containsKey(params.genome)) {
+ if (params.genomes[ params.genome ].containsKey(attribute)) {
+ return params.genomes[ params.genome ][ attribute ]
+ }
+ }
+ return null
+}
+
+//
+// Exit pipeline if incorrect --genome key provided
+//
+def genomeExistsError() {
+ if (params.genomes && params.genome && !params.genomes.containsKey(params.genome)) {
+ def error_string = "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n" +
+ " Genome '${params.genome}' not found in any config files provided to the pipeline.\n" +
+ " Currently, the available genome keys are:\n" +
+ " ${params.genomes.keySet().join(", ")}\n" +
+ "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~"
+ error(error_string)
+ }
+}
+
+//
+// Print a warning if both GTF and GFF have been provided
+//
+def gtfGffWarn() {
+ log.warn "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n" +
+ " Both '--gtf' and '--gff' parameters have been provided.\n" +
+ " Using GTF file as priority.\n" +
+ "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~"
+}
+
+//
+// Print a warning if using GRCh38 assembly from igenomes.config
+//
+def ncbiGenomeWarn() {
+ log.warn "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n" +
+ " When using '--genome GRCh38' the assembly is from the NCBI and NOT Ensembl.\n" +
+ " Biotype QC will be skipped to circumvent the issue below:\n" +
+ " https://github.com/nf-core/rnaseq/issues/460\n\n" +
+ " If you would like to use the soft-masked Ensembl assembly instead please see:\n" +
+ " https://github.com/nf-core/rnaseq/issues/159#issuecomment-501184312\n" +
+ "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~"
+}
+
+//
+// Print a warning if using a UCSC assembly from igenomes.config
+//
+def ucscGenomeWarn() {
+ log.warn "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n" +
+ " When using UCSC assemblies the 'gene_biotype' field is absent from the GTF file.\n" +
+ " Biotype QC will be skipped to circumvent the issue below:\n" +
+ " https://github.com/nf-core/rnaseq/issues/460\n\n" +
+ " If you would like to use the soft-masked Ensembl assembly instead please see:\n" +
+ " https://github.com/nf-core/rnaseq/issues/159#issuecomment-501184312\n" +
+ "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~"
+}
+
+//
+// Print a warning if using '--transcript_fasta'
+//
+def transcriptsFastaWarn() {
+ log.warn "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n" +
+ " '--transcript_fasta' parameter has been provided.\n" +
+ " Make sure transcript names in this file match those in the GFF/GTF file.\n\n" +
+ " Please see:\n" +
+ " https://github.com/nf-core/rnaseq/issues/753\n" +
+ "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~"
+}
+
+//
+// Print a warning if --skip_alignment has been provided
+//
+def skipAlignmentWarn() {
+ log.warn "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n" +
+ " '--skip_alignment' parameter has been provided.\n" +
+ " Skipping alignment, genome-based quantification and all downstream QC processes.\n" +
+ "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~"
+}
+
+//
+// Print a warning if using '--aligner star_rsem' and '--with_umi'
+//
+def rsemUmiError() {
+ def error_string = "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n" +
+ " When using '--aligner star_rsem', STAR is run by RSEM itself and so it is\n" +
+ " not possible to remove UMIs before the quantification.\n\n" +
+ " If you would like to remove UMI barcodes using the '--with_umi' option\n" +
+ " please use either '--aligner star_salmon' or '--aligner hisat2'.\n" +
+ "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~"
+ error(error_string)
+}
+
+//
+// Print a warning if using '--aligner star_rsem' and providing both '--rsem_index' and '--star_index'
+//
+def rsemStarIndexWarn() {
+ log.warn "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n" +
+ " When using '--aligner star_rsem', both the STAR and RSEM indices should\n" +
+ " be present in the path specified by '--rsem_index'.\n\n" +
+ " This warning has been generated because you have provided both\n" +
+ " '--rsem_index' and '--star_index'. The pipeline will ignore the latter.\n\n" +
+ " Please see:\n" +
+ " https://github.com/nf-core/rnaseq/issues/568\n" +
+ "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~"
+}
+
+//
+// Print a warning if using '--aligner star_rsem' and providing '--star_extra_alignment_args'
+//
+def rsemStarExtraArgumentsWarn() {
+ log.warn "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n" +
+ " No additional arguments can be passed to STAR when using RSEM.\n" +
+ " Because RSEM enforces its own parameters for STAR, any extra arguments\n" +
+ " to STAR will be ignored. Alternatively, choose the STAR+Salmon route.\n\n" +
+ " This warning has been generated because you have provided both\n" +
+ " '--aligner star_rsem' and '--extra_star_align_args'.\n\n" +
+ "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~"
+}
+
+//
+// Print a warning if using '--additional_fasta' and '--_index'
+//
+def additionaFastaIndexWarn(index) {
+ log.warn "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n" +
+ " When using '--additional_fasta ' the aligner index will not\n" +
+ " be re-built with the transgenes incorporated by default since you have \n" +
+ " already provided an index via '--${index}_index '.\n\n" +
+ " Set '--additional_fasta --${index}_index false --gene_bed false --save_reference'\n" +
+ " to re-build the index with transgenes included and the index and gene BED file will be saved in\n" +
+ " 'results/genome/index/${index}/' for re-use with '--${index}_index'.\n\n" +
+ " Ignore this warning if you know that the index already contains transgenes.\n\n" +
+ " Please see:\n" +
+ " https://github.com/nf-core/rnaseq/issues/556\n" +
+ "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~"
+}
+
+//
+// Function to generate an error if contigs in genome fasta file > 512 Mbp
+//
+def checkMaxContigSize(fai_file) {
+ def max_size = 512000000
+ fai_file.eachLine { line ->
+ def lspl = line.split('\t')
+ def chrom = lspl[0]
+ def size = lspl[1]
+ if (size.toInteger() > max_size) {
+ def error_string = "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n" +
+ " Contig longer than ${max_size}bp found in reference genome!\n\n" +
+ " ${chrom}: ${size}\n\n" +
+ " Provide the '--bam_csi_index' parameter to use a CSI instead of BAI index.\n\n" +
+ " Please see:\n" +
+ " https://github.com/nf-core/rnaseq/issues/744\n" +
+ "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~"
+ error(error_string)
+ }
+ }
+}
+
+//
+// Create MultiQC tsv custom content from a list of values
+//
+def multiqcTsvFromList(tsv_data, header) {
+ def tsv_string = ""
+ if (tsv_data.size() > 0) {
+ tsv_string += "${header.join('\t')}\n"
+ tsv_string += tsv_data.join('\n')
+ }
+ return tsv_string
+}
+
+//
+// Function that parses Salmon quant 'meta_info.json' output file to get inferred strandedness
+//
+def getSalmonInferredStrandedness(json_file) {
+ def lib_type = new JsonSlurper().parseText(json_file.text).get('library_types')[0]
+ def strandedness = 'reverse'
+ if (lib_type) {
+ if (lib_type in ['U', 'IU']) {
+ strandedness = 'unstranded'
+ } else if (lib_type in ['SF', 'ISF']) {
+ strandedness = 'forward'
+ } else if (lib_type in ['SR', 'ISR']) {
+ strandedness = 'reverse'
+ }
+ }
+ return strandedness
+}
+
+//
+// Function that parses and returns the alignment rate from the STAR log output
+//
+def getStarPercentMapped(params, align_log) {
+ def percent_aligned = 0
+ def pattern = /Uniquely mapped reads %\s*\|\s*([\d\.]+)%/
+ align_log.eachLine { line ->
+ def matcher = line =~ pattern
+ if (matcher) {
+ percent_aligned = matcher[0][1].toFloat()
+ }
+ }
+
+ def pass = false
+ if (percent_aligned >= params.min_mapped_reads.toFloat()) {
+ pass = true
+ }
+ return [ percent_aligned, pass ]
+}
+
+//
+// Function to check whether biotype field exists in GTF file
+//
+def biotypeInGtf(gtf_file, biotype) {
+ def hits = 0
+ gtf_file.eachLine { line ->
+ def attributes = line.split('\t')[-1].split()
+ if (attributes.contains(biotype)) {
+ hits += 1
+ }
+ }
+ if (hits) {
+ return true
+ } else {
+ log.warn "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n" +
+ " Biotype attribute '${biotype}' not found in the last column of the GTF file!\n\n" +
+ " Biotype QC will be skipped to circumvent the issue below:\n" +
+ " https://github.com/nf-core/rnaseq/issues/460\n\n" +
+ " Amend '--featurecounts_group_type' to change this behaviour.\n" +
+ "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~"
+ return false
+ }
+}
+
+//
+// Function that parses and returns the predicted strandedness from the RSeQC infer_experiment.py output
+//
+def getInferexperimentStrandedness(inferexperiment_file, cutoff=30) {
+ def sense = 0
+ def antisense = 0
+ def undetermined = 0
+ inferexperiment_file.eachLine { line ->
+ def undetermined_matcher = line =~ /Fraction of reads failed to determine:\s([\d\.]+)/
+ def se_sense_matcher = line =~ /Fraction of reads explained by "\++,--":\s([\d\.]+)/
+ def se_antisense_matcher = line =~ /Fraction of reads explained by "\+-,-\+":\s([\d\.]+)/
+ def pe_sense_matcher = line =~ /Fraction of reads explained by "1\++,1--,2\+-,2-\+":\s([\d\.]+)/
+ def pe_antisense_matcher = line =~ /Fraction of reads explained by "1\+-,1-\+,2\+\+,2--":\s([\d\.]+)/
+ if (undetermined_matcher) undetermined = undetermined_matcher[0][1].toFloat() * 100
+ if (se_sense_matcher) sense = se_sense_matcher[0][1].toFloat() * 100
+ if (se_antisense_matcher) antisense = se_antisense_matcher[0][1].toFloat() * 100
+ if (pe_sense_matcher) sense = pe_sense_matcher[0][1].toFloat() * 100
+ if (pe_antisense_matcher) antisense = pe_antisense_matcher[0][1].toFloat() * 100
+ }
+ def strandedness = 'unstranded'
+ if (sense >= 100-cutoff) {
+ strandedness = 'forward'
+ } else if (antisense >= 100-cutoff) {
+ strandedness = 'reverse'
+ }
+ return [ strandedness, sense, antisense, undetermined ]
+}
diff --git a/subworkflows/nf-core/utils_nextflow_pipeline/main.nf b/subworkflows/nf-core/utils_nextflow_pipeline/main.nf
new file mode 100644
index 000000000..ac31f28f6
--- /dev/null
+++ b/subworkflows/nf-core/utils_nextflow_pipeline/main.nf
@@ -0,0 +1,126 @@
+//
+// Subworkflow with functionality that may be useful for any Nextflow pipeline
+//
+
+import org.yaml.snakeyaml.Yaml
+import groovy.json.JsonOutput
+import nextflow.extension.FilesEx
+
+/*
+========================================================================================
+ SUBWORKFLOW DEFINITION
+========================================================================================
+*/
+
+workflow UTILS_NEXTFLOW_PIPELINE {
+
+ take:
+ print_version // boolean: print version
+ dump_parameters // boolean: dump parameters
+ outdir // path: base directory used to publish pipeline results
+ check_conda_channels // boolean: check conda channels
+
+ main:
+
+ //
+ // Print workflow version and exit on --version
+ //
+ if (print_version) {
+ log.info "${workflow.manifest.name} ${getWorkflowVersion()}"
+ System.exit(0)
+ }
+
+ //
+ // Dump pipeline parameters to a JSON file
+ //
+ if (dump_parameters && outdir) {
+ dumpParametersToJSON(outdir)
+ }
+
+ //
+ // When running with Conda, warn if channels have not been set-up appropriately
+ //
+ if (check_conda_channels) {
+ checkCondaChannels()
+ }
+
+ emit:
+ dummy_emit = true
+}
+
+/*
+========================================================================================
+ FUNCTIONS
+========================================================================================
+*/
+
+//
+// Generate version string
+//
+def getWorkflowVersion() {
+ String version_string = ""
+ if (workflow.manifest.version) {
+ def prefix_v = workflow.manifest.version[0] != 'v' ? 'v' : ''
+ version_string += "${prefix_v}${workflow.manifest.version}"
+ }
+
+ if (workflow.commitId) {
+ def git_shortsha = workflow.commitId.substring(0, 7)
+ version_string += "-g${git_shortsha}"
+ }
+
+ return version_string
+}
+
+//
+// Dump pipeline parameters to a JSON file
+//
+def dumpParametersToJSON(outdir) {
+ def timestamp = new java.util.Date().format( 'yyyy-MM-dd_HH-mm-ss')
+ def filename = "params_${timestamp}.json"
+ def temp_pf = new File(workflow.launchDir.toString(), ".${filename}")
+ def jsonStr = JsonOutput.toJson(params)
+ temp_pf.text = JsonOutput.prettyPrint(jsonStr)
+
+ FilesEx.copyTo(temp_pf.toPath(), "${outdir}/pipeline_info/params_${timestamp}.json")
+ temp_pf.delete()
+}
+
+//
+// When running with -profile conda, warn if channels have not been set-up appropriately
+//
+def checkCondaChannels() {
+ Yaml parser = new Yaml()
+ def channels = []
+ try {
+ def config = parser.load("conda config --show channels".execute().text)
+ channels = config.channels
+ } catch(NullPointerException | IOException e) {
+ log.warn "Could not verify conda channel configuration."
+ return
+ }
+
+ // Check that all channels are present
+ // This channel list is ordered by required channel priority.
+ def required_channels_in_order = ['conda-forge', 'bioconda', 'defaults']
+ def channels_missing = ((required_channels_in_order as Set) - (channels as Set)) as Boolean
+
+ // Check that they are in the right order
+ def channel_priority_violation = false
+ def n = required_channels_in_order.size()
+ for (int i = 0; i < n - 1; i++) {
+ channel_priority_violation |= !(channels.indexOf(required_channels_in_order[i]) < channels.indexOf(required_channels_in_order[i+1]))
+ }
+
+ if (channels_missing | channel_priority_violation) {
+ log.warn "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n" +
+ " There is a problem with your Conda configuration!\n\n" +
+ " You will need to set-up the conda-forge and bioconda channels correctly.\n" +
+ " Please refer to https://bioconda.github.io/\n" +
+ " The observed channel order is \n" +
+ " ${channels}\n" +
+ " but the following channel order is required:\n" +
+ " ${required_channels_in_order}\n" +
+ "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~"
+ }
+}
diff --git a/subworkflows/nf-core/utils_nextflow_pipeline/meta.yml b/subworkflows/nf-core/utils_nextflow_pipeline/meta.yml
new file mode 100644
index 000000000..e5c3a0a82
--- /dev/null
+++ b/subworkflows/nf-core/utils_nextflow_pipeline/meta.yml
@@ -0,0 +1,38 @@
+# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/subworkflows/yaml-schema.json
+name: "UTILS_NEXTFLOW_PIPELINE"
+description: Subworkflow with functionality that may be useful for any Nextflow pipeline
+keywords:
+ - utility
+ - pipeline
+ - initialise
+ - version
+components: []
+input:
+ - print_version:
+ type: boolean
+ description: |
+ Print the version of the pipeline and exit
+ - dump_parameters:
+ type: boolean
+ description: |
+ Dump the parameters of the pipeline to a JSON file
+ - output_directory:
+ type: directory
+ description: Path to output dir to write JSON file to.
+ pattern: "results/"
+ - check_conda_channel:
+ type: boolean
+ description: |
+ Check if the conda channel priority is correct.
+output:
+ - dummy_emit:
+ type: boolean
+ description: |
+ Dummy emit to make nf-core subworkflows lint happy
+authors:
+ - "@adamrtalbot"
+ - "@drpatelh"
+maintainers:
+ - "@adamrtalbot"
+ - "@drpatelh"
+ - "@maxulysse"
diff --git a/subworkflows/nf-core/utils_nextflow_pipeline/tests/main.function.nf.test b/subworkflows/nf-core/utils_nextflow_pipeline/tests/main.function.nf.test
new file mode 100644
index 000000000..8ed4310ca
--- /dev/null
+++ b/subworkflows/nf-core/utils_nextflow_pipeline/tests/main.function.nf.test
@@ -0,0 +1,54 @@
+
+nextflow_function {
+
+ name "Test Functions"
+ script "subworkflows/nf-core/utils_nextflow_pipeline/main.nf"
+ config "subworkflows/nf-core/utils_nextflow_pipeline/tests/nextflow.config"
+ tag 'subworkflows'
+ tag 'utils_nextflow_pipeline'
+ tag 'subworkflows/utils_nextflow_pipeline'
+
+ test("Test Function getWorkflowVersion") {
+
+ function "getWorkflowVersion"
+
+ then {
+ assertAll(
+ { assert function.success },
+ { assert snapshot(function.result).match() }
+ )
+ }
+ }
+
+ test("Test Function dumpParametersToJSON") {
+
+ function "dumpParametersToJSON"
+
+ when {
+ function {
+ """
+ // define inputs of the function here. Example:
+ input[0] = "$outputDir"
+ """.stripIndent()
+ }
+ }
+
+ then {
+ assertAll(
+ { assert function.success }
+ )
+ }
+ }
+
+ test("Test Function checkCondaChannels") {
+
+ function "checkCondaChannels"
+
+ then {
+ assertAll(
+ { assert function.success },
+ { assert snapshot(function.result).match() }
+ )
+ }
+ }
+}
\ No newline at end of file
diff --git a/subworkflows/nf-core/utils_nextflow_pipeline/tests/main.function.nf.test.snap b/subworkflows/nf-core/utils_nextflow_pipeline/tests/main.function.nf.test.snap
new file mode 100644
index 000000000..db2030f8b
--- /dev/null
+++ b/subworkflows/nf-core/utils_nextflow_pipeline/tests/main.function.nf.test.snap
@@ -0,0 +1,12 @@
+{
+ "Test Function getWorkflowVersion": {
+ "content": [
+ "v9.9.9"
+ ],
+ "timestamp": "2024-01-19T11:32:36.031083"
+ },
+ "Test Function checkCondaChannels": {
+ "content": null,
+ "timestamp": "2024-01-19T11:32:50.456"
+ }
+}
\ No newline at end of file
diff --git a/subworkflows/nf-core/utils_nextflow_pipeline/tests/main.workflow.nf.test b/subworkflows/nf-core/utils_nextflow_pipeline/tests/main.workflow.nf.test
new file mode 100644
index 000000000..f7c54bc68
--- /dev/null
+++ b/subworkflows/nf-core/utils_nextflow_pipeline/tests/main.workflow.nf.test
@@ -0,0 +1,123 @@
+nextflow_workflow {
+
+ name "Test Workflow UTILS_NEXTFLOW_PIPELINE"
+ script "../main.nf"
+ config "subworkflows/nf-core/utils_nextflow_pipeline/tests/nextflow.config"
+ workflow "UTILS_NEXTFLOW_PIPELINE"
+ tag 'subworkflows'
+ tag 'utils_nextflow_pipeline'
+ tag 'subworkflows/utils_nextflow_pipeline'
+
+ test("Should run no inputs") {
+
+ when {
+ params {
+ outdir = "tests/results"
+ }
+ workflow {
+ """
+ print_version = false
+ dump_parameters = false
+ outdir = null
+ check_conda_channels = false
+
+ input[0] = print_version
+ input[1] = dump_parameters
+ input[2] = outdir
+ input[3] = check_conda_channels
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert workflow.success }
+ )
+ }
+ }
+
+ test("Should print version") {
+
+ when {
+ params {
+ outdir = "tests/results"
+ }
+ workflow {
+ """
+ print_version = true
+ dump_parameters = false
+ outdir = null
+ check_conda_channels = false
+
+ input[0] = print_version
+ input[1] = dump_parameters
+ input[2] = outdir
+ input[3] = check_conda_channels
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert workflow.success },
+ { assert workflow.stdout.contains("nextflow_workflow v9.9.9") }
+ )
+ }
+ }
+
+ test("Should dump params") {
+
+ when {
+ params {
+ outdir = "$outputDir"
+ }
+ workflow {
+ """
+ print_version = false
+ dump_parameters = true
+ outdir = params.outdir
+ check_conda_channels = false
+
+ input[0] = false
+ input[1] = true
+ input[2] = params.outdir
+ input[3] = false
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert workflow.success }
+ )
+ }
+ }
+
+ test("Should not create params JSON if no output directory") {
+
+ when {
+ params {
+ outdir = "$outputDir"
+ }
+ workflow {
+ """
+ print_version = false
+ dump_parameters = true
+ outdir = params.outdir
+ check_conda_channels = false
+
+ input[0] = false
+ input[1] = true
+ input[2] = null
+ input[3] = false
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert workflow.success }
+ )
+ }
+ }
+}
diff --git a/subworkflows/nf-core/utils_nextflow_pipeline/tests/nextflow.config b/subworkflows/nf-core/utils_nextflow_pipeline/tests/nextflow.config
new file mode 100644
index 000000000..53574ffec
--- /dev/null
+++ b/subworkflows/nf-core/utils_nextflow_pipeline/tests/nextflow.config
@@ -0,0 +1,9 @@
+manifest {
+ name = 'nextflow_workflow'
+ author = """nf-core"""
+ homePage = 'https://127.0.0.1'
+ description = """Dummy pipeline"""
+ nextflowVersion = '!>=23.04.0'
+ version = '9.9.9'
+ doi = 'https://doi.org/10.5281/zenodo.5070524'
+}
\ No newline at end of file
diff --git a/subworkflows/nf-core/utils_nextflow_pipeline/tests/tags.yml b/subworkflows/nf-core/utils_nextflow_pipeline/tests/tags.yml
new file mode 100644
index 000000000..f84761125
--- /dev/null
+++ b/subworkflows/nf-core/utils_nextflow_pipeline/tests/tags.yml
@@ -0,0 +1,2 @@
+subworkflows/utils_nextflow_pipeline:
+ - subworkflows/nf-core/utils_nextflow_pipeline/**
diff --git a/subworkflows/nf-core/utils_nfcore_pipeline/main.nf b/subworkflows/nf-core/utils_nfcore_pipeline/main.nf
new file mode 100644
index 000000000..6d805bb3f
--- /dev/null
+++ b/subworkflows/nf-core/utils_nfcore_pipeline/main.nf
@@ -0,0 +1,395 @@
+//
+// Subworkflow with utility functions specific to the nf-core pipeline template
+//
+
+import org.yaml.snakeyaml.Yaml
+import nextflow.extension.FilesEx
+
+/*
+========================================================================================
+ SUBWORKFLOW DEFINITION
+========================================================================================
+*/
+
+workflow UTILS_NFCORE_PIPELINE {
+
+ main:
+ valid_config = checkConfigProvided()
+
+ emit:
+ valid_config
+}
+
+/*
+========================================================================================
+ FUNCTIONS
+========================================================================================
+*/
+
+//
+// Warn if a -profile or Nextflow config has not been provided to run the pipeline
+//
+def checkConfigProvided() {
+ valid_config = true
+ if (workflow.profile == 'standard' && workflow.configFiles.size() <= 1) {
+ log.warn "[$workflow.manifest.name] You are attempting to run the pipeline without any custom configuration!\n\n" +
+ "This will be dependent on your local compute environment but can be achieved via one or more of the following:\n" +
+ " (1) Using an existing pipeline profile e.g. `-profile docker` or `-profile singularity`\n" +
+ " (2) Using an existing nf-core/configs for your Institution e.g. `-profile crick` or `-profile uppmax`\n" +
+ " (3) Using your own local custom config e.g. `-c /path/to/your/custom.config`\n\n" +
+ "Please refer to the quick start section and usage docs for the pipeline.\n "
+ valid_config = false
+ }
+ return valid_config
+}
+
+//
+// Citation string for pipeline
+//
+def workflowCitation() {
+ return "If you use ${workflow.manifest.name} for your analysis please cite:\n\n" +
+ "* The pipeline\n" +
+ " ${workflow.manifest.doi}\n\n" +
+ "* The nf-core framework\n" +
+ " https://doi.org/10.1038/s41587-020-0439-x\n\n" +
+ "* Software dependencies\n" +
+ " https://github.com/${workflow.manifest.name}/blob/master/CITATIONS.md"
+}
+
+//
+// Generate workflow version string
+//
+def getWorkflowVersion() {
+ String version_string = ""
+ if (workflow.manifest.version) {
+ def prefix_v = workflow.manifest.version[0] != 'v' ? 'v' : ''
+ version_string += "${prefix_v}${workflow.manifest.version}"
+ }
+
+ if (workflow.commitId) {
+ def git_shortsha = workflow.commitId.substring(0, 7)
+ version_string += "-g${git_shortsha}"
+ }
+
+ return version_string
+}
+
+//
+// Get software versions for pipeline
+//
+def processVersionsFromYAML(yaml_file) {
+ Yaml yaml = new Yaml()
+ versions = yaml.load(yaml_file).collectEntries { k, v -> [ k.tokenize(':')[-1], v ] }
+ return yaml.dumpAsMap(versions).trim()
+}
+
+//
+// Get workflow version for pipeline
+//
+def workflowVersionToYAML() {
+ return """
+ Workflow:
+ $workflow.manifest.name: ${getWorkflowVersion()}
+ Nextflow: $workflow.nextflow.version
+ """.stripIndent().trim()
+}
+
+//
+// Get channel of software versions used in pipeline in YAML format
+//
+def softwareVersionsToYAML(ch_versions) {
+ return ch_versions
+ .unique()
+ .map { processVersionsFromYAML(it) }
+ .unique()
+ .mix(Channel.of(workflowVersionToYAML()))
+}
+
+//
+// Get workflow summary for MultiQC
+//
+def paramsSummaryMultiqc(summary_params) {
+ def summary_section = ''
+ for (group in summary_params.keySet()) {
+ def group_params = summary_params.get(group) // This gets the parameters of that particular group
+ if (group_params) {
+ summary_section += " $group
\n"
+ summary_section += " \n"
+ for (param in group_params.keySet()) {
+ summary_section += " - $param
- ${group_params.get(param) ?: 'N/A'}
\n"
+ }
+ summary_section += "
\n"
+ }
+ }
+
+ String yaml_file_text = "id: '${workflow.manifest.name.replace('/','-')}-summary'\n"
+ yaml_file_text += "description: ' - this information is collected when the pipeline is started.'\n"
+ yaml_file_text += "section_name: '${workflow.manifest.name} Workflow Summary'\n"
+ yaml_file_text += "section_href: 'https://github.com/${workflow.manifest.name}'\n"
+ yaml_file_text += "plot_type: 'html'\n"
+ yaml_file_text += "data: |\n"
+ yaml_file_text += "${summary_section}"
+
+ return yaml_file_text
+}
+
+//
+// nf-core logo
+//
+def nfCoreLogo(monochrome_logs=true) {
+ Map colors = logColours(monochrome_logs)
+ String.format(
+ """\n
+ ${dashedLine(monochrome_logs)}
+ ${colors.green},--.${colors.black}/${colors.green},-.${colors.reset}
+ ${colors.blue} ___ __ __ __ ___ ${colors.green}/,-._.--~\'${colors.reset}
+ ${colors.blue} |\\ | |__ __ / ` / \\ |__) |__ ${colors.yellow}} {${colors.reset}
+ ${colors.blue} | \\| | \\__, \\__/ | \\ |___ ${colors.green}\\`-._,-`-,${colors.reset}
+ ${colors.green}`._,._,\'${colors.reset}
+ ${colors.purple} ${workflow.manifest.name} ${getWorkflowVersion()}${colors.reset}
+ ${dashedLine(monochrome_logs)}
+ """.stripIndent()
+ )
+}
+
+//
+// Return dashed line
+//
+def dashedLine(monochrome_logs=true) {
+ Map colors = logColours(monochrome_logs)
+ return "-${colors.dim}----------------------------------------------------${colors.reset}-"
+}
+
+//
+// ANSII colours used for terminal logging
+//
+def logColours(monochrome_logs=true) {
+ Map colorcodes = [:]
+
+ // Reset / Meta
+ colorcodes['reset'] = monochrome_logs ? '' : "\033[0m"
+ colorcodes['bold'] = monochrome_logs ? '' : "\033[1m"
+ colorcodes['dim'] = monochrome_logs ? '' : "\033[2m"
+ colorcodes['underlined'] = monochrome_logs ? '' : "\033[4m"
+ colorcodes['blink'] = monochrome_logs ? '' : "\033[5m"
+ colorcodes['reverse'] = monochrome_logs ? '' : "\033[7m"
+ colorcodes['hidden'] = monochrome_logs ? '' : "\033[8m"
+
+ // Regular Colors
+ colorcodes['black'] = monochrome_logs ? '' : "\033[0;30m"
+ colorcodes['red'] = monochrome_logs ? '' : "\033[0;31m"
+ colorcodes['green'] = monochrome_logs ? '' : "\033[0;32m"
+ colorcodes['yellow'] = monochrome_logs ? '' : "\033[0;33m"
+ colorcodes['blue'] = monochrome_logs ? '' : "\033[0;34m"
+ colorcodes['purple'] = monochrome_logs ? '' : "\033[0;35m"
+ colorcodes['cyan'] = monochrome_logs ? '' : "\033[0;36m"
+ colorcodes['white'] = monochrome_logs ? '' : "\033[0;37m"
+
+ // Bold
+ colorcodes['bblack'] = monochrome_logs ? '' : "\033[1;30m"
+ colorcodes['bred'] = monochrome_logs ? '' : "\033[1;31m"
+ colorcodes['bgreen'] = monochrome_logs ? '' : "\033[1;32m"
+ colorcodes['byellow'] = monochrome_logs ? '' : "\033[1;33m"
+ colorcodes['bblue'] = monochrome_logs ? '' : "\033[1;34m"
+ colorcodes['bpurple'] = monochrome_logs ? '' : "\033[1;35m"
+ colorcodes['bcyan'] = monochrome_logs ? '' : "\033[1;36m"
+ colorcodes['bwhite'] = monochrome_logs ? '' : "\033[1;37m"
+
+ // Underline
+ colorcodes['ublack'] = monochrome_logs ? '' : "\033[4;30m"
+ colorcodes['ured'] = monochrome_logs ? '' : "\033[4;31m"
+ colorcodes['ugreen'] = monochrome_logs ? '' : "\033[4;32m"
+ colorcodes['uyellow'] = monochrome_logs ? '' : "\033[4;33m"
+ colorcodes['ublue'] = monochrome_logs ? '' : "\033[4;34m"
+ colorcodes['upurple'] = monochrome_logs ? '' : "\033[4;35m"
+ colorcodes['ucyan'] = monochrome_logs ? '' : "\033[4;36m"
+ colorcodes['uwhite'] = monochrome_logs ? '' : "\033[4;37m"
+
+ // High Intensity
+ colorcodes['iblack'] = monochrome_logs ? '' : "\033[0;90m"
+ colorcodes['ired'] = monochrome_logs ? '' : "\033[0;91m"
+ colorcodes['igreen'] = monochrome_logs ? '' : "\033[0;92m"
+ colorcodes['iyellow'] = monochrome_logs ? '' : "\033[0;93m"
+ colorcodes['iblue'] = monochrome_logs ? '' : "\033[0;94m"
+ colorcodes['ipurple'] = monochrome_logs ? '' : "\033[0;95m"
+ colorcodes['icyan'] = monochrome_logs ? '' : "\033[0;96m"
+ colorcodes['iwhite'] = monochrome_logs ? '' : "\033[0;97m"
+
+ // Bold High Intensity
+ colorcodes['biblack'] = monochrome_logs ? '' : "\033[1;90m"
+ colorcodes['bired'] = monochrome_logs ? '' : "\033[1;91m"
+ colorcodes['bigreen'] = monochrome_logs ? '' : "\033[1;92m"
+ colorcodes['biyellow'] = monochrome_logs ? '' : "\033[1;93m"
+ colorcodes['biblue'] = monochrome_logs ? '' : "\033[1;94m"
+ colorcodes['bipurple'] = monochrome_logs ? '' : "\033[1;95m"
+ colorcodes['bicyan'] = monochrome_logs ? '' : "\033[1;96m"
+ colorcodes['biwhite'] = monochrome_logs ? '' : "\033[1;97m"
+
+ return colorcodes
+}
+
+//
+// Construct and send completion email
+//
+def completionEmail(summary_params, email, email_on_fail, plaintext_email, outdir, monochrome_logs=true) {
+
+ // Set up the e-mail variables
+ def subject = "[$workflow.manifest.name] Successful: $workflow.runName"
+ if (!workflow.success) {
+ subject = "[$workflow.manifest.name] FAILED: $workflow.runName"
+ }
+
+ def summary = [:]
+ for (group in summary_params.keySet()) {
+ summary << summary_params[group]
+ }
+
+ def misc_fields = [:]
+ misc_fields['Date Started'] = workflow.start
+ misc_fields['Date Completed'] = workflow.complete
+ misc_fields['Pipeline script file path'] = workflow.scriptFile
+ misc_fields['Pipeline script hash ID'] = workflow.scriptId
+ if (workflow.repository) misc_fields['Pipeline repository Git URL'] = workflow.repository
+ if (workflow.commitId) misc_fields['Pipeline repository Git Commit'] = workflow.commitId
+ if (workflow.revision) misc_fields['Pipeline Git branch/tag'] = workflow.revision
+ misc_fields['Nextflow Version'] = workflow.nextflow.version
+ misc_fields['Nextflow Build'] = workflow.nextflow.build
+ misc_fields['Nextflow Compile Timestamp'] = workflow.nextflow.timestamp
+
+ def email_fields = [:]
+ email_fields['version'] = getWorkflowVersion()
+ email_fields['runName'] = workflow.runName
+ email_fields['success'] = workflow.success
+ email_fields['dateComplete'] = workflow.complete
+ email_fields['duration'] = workflow.duration
+ email_fields['exitStatus'] = workflow.exitStatus
+ email_fields['errorMessage'] = (workflow.errorMessage ?: 'None')
+ email_fields['errorReport'] = (workflow.errorReport ?: 'None')
+ email_fields['commandLine'] = workflow.commandLine
+ email_fields['projectDir'] = workflow.projectDir
+ email_fields['summary'] = summary << misc_fields
+
+ // Check if we are only sending emails on failure
+ def email_address = email
+ if (!email && email_on_fail && !workflow.success) {
+ email_address = email_on_fail
+ }
+
+ // Render the TXT template
+ def engine = new groovy.text.GStringTemplateEngine()
+ def tf = new File("${workflow.projectDir}/assets/email_template.txt")
+ def txt_template = engine.createTemplate(tf).make(email_fields)
+ def email_txt = txt_template.toString()
+
+ // Render the HTML template
+ def hf = new File("${workflow.projectDir}/assets/email_template.html")
+ def html_template = engine.createTemplate(hf).make(email_fields)
+ def email_html = html_template.toString()
+
+ // Render the sendmail template
+ def smail_fields = [ email: email_address, subject: subject, email_txt: email_txt, email_html: email_html, projectDir: "${workflow.projectDir}" ]
+ def sf = new File("${workflow.projectDir}/assets/sendmail_template.txt")
+ def sendmail_template = engine.createTemplate(sf).make(smail_fields)
+ def sendmail_html = sendmail_template.toString()
+
+ // Send the HTML e-mail
+ Map colors = logColours(monochrome_logs)
+ if (email_address) {
+ try {
+ if (plaintext_email) { throw GroovyException('Send plaintext e-mail, not HTML') }
+ // Try to send HTML e-mail using sendmail
+ def sendmail_tf = new File(workflow.launchDir.toString(), ".sendmail_tmp.html")
+ sendmail_tf.withWriter { w -> w << sendmail_html }
+ [ 'sendmail', '-t' ].execute() << sendmail_html
+ log.info "-${colors.purple}[$workflow.manifest.name]${colors.green} Sent summary e-mail to $email_address (sendmail)-"
+ } catch (all) {
+ // Catch failures and try with plaintext
+ def mail_cmd = [ 'mail', '-s', subject, '--content-type=text/html', email_address ]
+ mail_cmd.execute() << email_html
+ log.info "-${colors.purple}[$workflow.manifest.name]${colors.green} Sent summary e-mail to $email_address (mail)-"
+ }
+ }
+
+ // Write summary e-mail HTML to a file
+ def output_hf = new File(workflow.launchDir.toString(), ".pipeline_report.html")
+ output_hf.withWriter { w -> w << email_html }
+ FilesEx.copyTo(output_hf.toPath(), "${outdir}/pipeline_info/pipeline_report.html");
+ output_hf.delete()
+
+ // Write summary e-mail TXT to a file
+ def output_tf = new File(workflow.launchDir.toString(), ".pipeline_report.txt")
+ output_tf.withWriter { w -> w << email_txt }
+ FilesEx.copyTo(output_tf.toPath(), "${outdir}/pipeline_info/pipeline_report.txt");
+ output_tf.delete()
+}
+
+//
+// Print pipeline summary on completion
+//
+def completionSummary(monochrome_logs=true) {
+ Map colors = logColours(monochrome_logs)
+ if (workflow.success) {
+ if (workflow.stats.ignoredCount == 0) {
+ log.info "-${colors.purple}[$workflow.manifest.name]${colors.green} Pipeline completed successfully${colors.reset}-"
+ } else {
+ log.info "-${colors.purple}[$workflow.manifest.name]${colors.yellow} Pipeline completed successfully, but with errored process(es) ${colors.reset}-"
+ }
+ } else {
+ log.info "-${colors.purple}[$workflow.manifest.name]${colors.red} Pipeline completed with errors${colors.reset}-"
+ }
+}
+
+//
+// Construct and send a notification to a web server as JSON e.g. Microsoft Teams and Slack
+//
+def imNotification(summary_params, hook_url) {
+ def summary = [:]
+ for (group in summary_params.keySet()) {
+ summary << summary_params[group]
+ }
+
+ def misc_fields = [:]
+ misc_fields['start'] = workflow.start
+ misc_fields['complete'] = workflow.complete
+ misc_fields['scriptfile'] = workflow.scriptFile
+ misc_fields['scriptid'] = workflow.scriptId
+ if (workflow.repository) misc_fields['repository'] = workflow.repository
+ if (workflow.commitId) misc_fields['commitid'] = workflow.commitId
+ if (workflow.revision) misc_fields['revision'] = workflow.revision
+ misc_fields['nxf_version'] = workflow.nextflow.version
+ misc_fields['nxf_build'] = workflow.nextflow.build
+ misc_fields['nxf_timestamp'] = workflow.nextflow.timestamp
+
+ def msg_fields = [:]
+ msg_fields['version'] = getWorkflowVersion()
+ msg_fields['runName'] = workflow.runName
+ msg_fields['success'] = workflow.success
+ msg_fields['dateComplete'] = workflow.complete
+ msg_fields['duration'] = workflow.duration
+ msg_fields['exitStatus'] = workflow.exitStatus
+ msg_fields['errorMessage'] = (workflow.errorMessage ?: 'None')
+ msg_fields['errorReport'] = (workflow.errorReport ?: 'None')
+ msg_fields['commandLine'] = workflow.commandLine.replaceFirst(/ +--hook_url +[^ ]+/, "")
+ msg_fields['projectDir'] = workflow.projectDir
+ msg_fields['summary'] = summary << misc_fields
+
+ // Render the JSON template
+ def engine = new groovy.text.GStringTemplateEngine()
+ // Different JSON depending on the service provider
+ // Defaults to "Adaptive Cards" (https://adaptivecards.io), except Slack which has its own format
+ def json_path = hook_url.contains("hooks.slack.com") ? "slackreport.json" : "adaptivecard.json"
+ def hf = new File("${workflow.projectDir}/assets/${json_path}")
+ def json_template = engine.createTemplate(hf).make(msg_fields)
+ def json_message = json_template.toString()
+
+ // POST
+ def post = new URL(hook_url).openConnection();
+ post.setRequestMethod("POST")
+ post.setDoOutput(true)
+ post.setRequestProperty("Content-Type", "application/json")
+ post.getOutputStream().write(json_message.getBytes("UTF-8"));
+ def postRC = post.getResponseCode();
+ if (! postRC.equals(200)) {
+ log.warn(post.getErrorStream().getText());
+ }
+}
diff --git a/subworkflows/nf-core/utils_nfcore_pipeline/meta.yml b/subworkflows/nf-core/utils_nfcore_pipeline/meta.yml
new file mode 100644
index 000000000..dd1462b23
--- /dev/null
+++ b/subworkflows/nf-core/utils_nfcore_pipeline/meta.yml
@@ -0,0 +1,20 @@
+# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/subworkflows/yaml-schema.json
+name: "UTILS_NFCORE_PIPELINE"
+description: Subworkflow with utility functions specific to the nf-core pipeline template
+keywords:
+ - utility
+ - pipeline
+ - initialise
+ - version
+components: []
+input: []
+output:
+ - success:
+ type: boolean
+ description: |
+ Dummy output to indicate success
+authors:
+ - "@adamrtalbot"
+maintainers:
+ - "@adamrtalbot"
+ - "@maxulysse"
diff --git a/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.function.nf.test b/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.function.nf.test
new file mode 100644
index 000000000..c176295d6
--- /dev/null
+++ b/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.function.nf.test
@@ -0,0 +1,114 @@
+
+nextflow_function {
+
+ name "Test Functions"
+ script "../main.nf"
+ config "subworkflows/nf-core/utils_nfcore_pipeline/tests/nextflow.config"
+ tag "subworkflows"
+ tag "subworkflows_nfcore"
+ tag "utils_nfcore_pipeline"
+ tag "subworkflows/utils_nfcore_pipeline"
+
+ test("Test Function checkConfigProvided") {
+
+ function "checkConfigProvided"
+
+ then {
+ assertAll(
+ { assert function.success },
+ { assert snapshot(function.result).match() }
+ )
+ }
+ }
+
+ test("Test Function workflowCitation") {
+
+ function "workflowCitation"
+
+ then {
+ assertAll(
+ { assert function.success },
+ { assert snapshot(function.result).match() }
+ )
+ }
+ }
+
+ test("Test Function nfCoreLogo") {
+
+ function "nfCoreLogo"
+
+ when {
+ function {
+ """
+ input[0] = false
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert function.success },
+ { assert snapshot(function.result).match() }
+ )
+ }
+ }
+
+ test("Test Function dashedLine") {
+
+ function "dashedLine"
+
+ when {
+ function {
+ """
+ input[0] = false
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert function.success },
+ { assert snapshot(function.result).match() }
+ )
+ }
+ }
+
+ test("Test Function without logColours") {
+
+ function "logColours"
+
+ when {
+ function {
+ """
+ input[0] = true
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert function.success },
+ { assert snapshot(function.result).match() }
+ )
+ }
+ }
+
+ test("Test Function with logColours") {
+ function "logColours"
+
+ when {
+ function {
+ """
+ input[0] = false
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert function.success },
+ { assert snapshot(function.result).match() }
+ )
+ }
+ }
+}
\ No newline at end of file
diff --git a/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.function.nf.test.snap b/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.function.nf.test.snap
new file mode 100644
index 000000000..afb9ab4dc
--- /dev/null
+++ b/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.function.nf.test.snap
@@ -0,0 +1,134 @@
+{
+ "Test Function checkConfigProvided": {
+ "content": [
+ true
+ ],
+ "timestamp": "2024-01-19T11:34:13.548431224"
+ },
+ "Test Function nfCoreLogo": {
+ "content": [
+ "\n\n-\u001b[2m----------------------------------------------------\u001b[0m-\n \u001b[0;32m,--.\u001b[0;30m/\u001b[0;32m,-.\u001b[0m\n\u001b[0;34m ___ __ __ __ ___ \u001b[0;32m/,-._.--~'\u001b[0m\n\u001b[0;34m |\\ | |__ __ / ` / \\ |__) |__ \u001b[0;33m} {\u001b[0m\n\u001b[0;34m | \\| | \\__, \\__/ | \\ |___ \u001b[0;32m\\`-._,-`-,\u001b[0m\n \u001b[0;32m`._,._,'\u001b[0m\n\u001b[0;35m nextflow_workflow v9.9.9\u001b[0m\n-\u001b[2m----------------------------------------------------\u001b[0m-\n"
+ ],
+ "timestamp": "2024-01-19T11:34:38.840454873"
+ },
+ "Test Function workflowCitation": {
+ "content": [
+ "If you use nextflow_workflow for your analysis please cite:\n\n* The pipeline\n https://doi.org/10.5281/zenodo.5070524\n\n* The nf-core framework\n https://doi.org/10.1038/s41587-020-0439-x\n\n* Software dependencies\n https://github.com/nextflow_workflow/blob/master/CITATIONS.md"
+ ],
+ "timestamp": "2024-01-19T11:34:22.24352016"
+ },
+ "Test Function without logColours": {
+ "content": [
+ {
+ "reset": "",
+ "bold": "",
+ "dim": "",
+ "underlined": "",
+ "blink": "",
+ "reverse": "",
+ "hidden": "",
+ "black": "",
+ "red": "",
+ "green": "",
+ "yellow": "",
+ "blue": "",
+ "purple": "",
+ "cyan": "",
+ "white": "",
+ "bblack": "",
+ "bred": "",
+ "bgreen": "",
+ "byellow": "",
+ "bblue": "",
+ "bpurple": "",
+ "bcyan": "",
+ "bwhite": "",
+ "ublack": "",
+ "ured": "",
+ "ugreen": "",
+ "uyellow": "",
+ "ublue": "",
+ "upurple": "",
+ "ucyan": "",
+ "uwhite": "",
+ "iblack": "",
+ "ired": "",
+ "igreen": "",
+ "iyellow": "",
+ "iblue": "",
+ "ipurple": "",
+ "icyan": "",
+ "iwhite": "",
+ "biblack": "",
+ "bired": "",
+ "bigreen": "",
+ "biyellow": "",
+ "biblue": "",
+ "bipurple": "",
+ "bicyan": "",
+ "biwhite": ""
+ }
+ ],
+ "timestamp": "2024-01-19T11:35:04.418416984"
+ },
+ "Test Function dashedLine": {
+ "content": [
+ "-\u001b[2m----------------------------------------------------\u001b[0m-"
+ ],
+ "timestamp": "2024-01-19T11:34:55.420000755"
+ },
+ "Test Function with logColours": {
+ "content": [
+ {
+ "reset": "\u001b[0m",
+ "bold": "\u001b[1m",
+ "dim": "\u001b[2m",
+ "underlined": "\u001b[4m",
+ "blink": "\u001b[5m",
+ "reverse": "\u001b[7m",
+ "hidden": "\u001b[8m",
+ "black": "\u001b[0;30m",
+ "red": "\u001b[0;31m",
+ "green": "\u001b[0;32m",
+ "yellow": "\u001b[0;33m",
+ "blue": "\u001b[0;34m",
+ "purple": "\u001b[0;35m",
+ "cyan": "\u001b[0;36m",
+ "white": "\u001b[0;37m",
+ "bblack": "\u001b[1;30m",
+ "bred": "\u001b[1;31m",
+ "bgreen": "\u001b[1;32m",
+ "byellow": "\u001b[1;33m",
+ "bblue": "\u001b[1;34m",
+ "bpurple": "\u001b[1;35m",
+ "bcyan": "\u001b[1;36m",
+ "bwhite": "\u001b[1;37m",
+ "ublack": "\u001b[4;30m",
+ "ured": "\u001b[4;31m",
+ "ugreen": "\u001b[4;32m",
+ "uyellow": "\u001b[4;33m",
+ "ublue": "\u001b[4;34m",
+ "upurple": "\u001b[4;35m",
+ "ucyan": "\u001b[4;36m",
+ "uwhite": "\u001b[4;37m",
+ "iblack": "\u001b[0;90m",
+ "ired": "\u001b[0;91m",
+ "igreen": "\u001b[0;92m",
+ "iyellow": "\u001b[0;93m",
+ "iblue": "\u001b[0;94m",
+ "ipurple": "\u001b[0;95m",
+ "icyan": "\u001b[0;96m",
+ "iwhite": "\u001b[0;97m",
+ "biblack": "\u001b[1;90m",
+ "bired": "\u001b[1;91m",
+ "bigreen": "\u001b[1;92m",
+ "biyellow": "\u001b[1;93m",
+ "biblue": "\u001b[1;94m",
+ "bipurple": "\u001b[1;95m",
+ "bicyan": "\u001b[1;96m",
+ "biwhite": "\u001b[1;97m"
+ }
+ ],
+ "timestamp": "2024-01-19T11:35:13.436366565"
+ }
+}
\ No newline at end of file
diff --git a/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.workflow.nf.test b/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.workflow.nf.test
new file mode 100644
index 000000000..c5f7776a6
--- /dev/null
+++ b/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.workflow.nf.test
@@ -0,0 +1,21 @@
+nextflow_workflow {
+
+ name "Test Workflow UTILS_NFCORE_PIPELINE"
+ script "../main.nf"
+ config "subworkflows/nf-core/utils_nfcore_pipeline/tests/nextflow.config"
+ workflow "UTILS_NFCORE_PIPELINE"
+ tag "subworkflows"
+ tag "subworkflows_nfcore"
+ tag "utils_nfcore_pipeline"
+ tag "subworkflows/utils_nfcore_pipeline"
+
+ test("Should run without failures") {
+
+ then {
+ assertAll(
+ { assert workflow.success },
+ { assert snapshot(workflow.out).match() }
+ )
+ }
+ }
+}
diff --git a/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.workflow.nf.test.snap b/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.workflow.nf.test.snap
new file mode 100644
index 000000000..d07ce54c5
--- /dev/null
+++ b/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.workflow.nf.test.snap
@@ -0,0 +1,15 @@
+{
+ "Should run without failures": {
+ "content": [
+ {
+ "0": [
+ true
+ ],
+ "valid_config": [
+ true
+ ]
+ }
+ ],
+ "timestamp": "2024-01-19T11:35:22.538940073"
+ }
+}
\ No newline at end of file
diff --git a/subworkflows/nf-core/utils_nfcore_pipeline/tests/nextflow.config b/subworkflows/nf-core/utils_nfcore_pipeline/tests/nextflow.config
new file mode 100644
index 000000000..53574ffec
--- /dev/null
+++ b/subworkflows/nf-core/utils_nfcore_pipeline/tests/nextflow.config
@@ -0,0 +1,9 @@
+manifest {
+ name = 'nextflow_workflow'
+ author = """nf-core"""
+ homePage = 'https://127.0.0.1'
+ description = """Dummy pipeline"""
+ nextflowVersion = '!>=23.04.0'
+ version = '9.9.9'
+ doi = 'https://doi.org/10.5281/zenodo.5070524'
+}
\ No newline at end of file
diff --git a/subworkflows/nf-core/utils_nfcore_pipeline/tests/tags.yml b/subworkflows/nf-core/utils_nfcore_pipeline/tests/tags.yml
new file mode 100644
index 000000000..ac8523c9a
--- /dev/null
+++ b/subworkflows/nf-core/utils_nfcore_pipeline/tests/tags.yml
@@ -0,0 +1,2 @@
+subworkflows/utils_nfcore_pipeline:
+ - subworkflows/nf-core/utils_nfcore_pipeline/**
diff --git a/subworkflows/nf-core/utils_nfvalidation_plugin/main.nf b/subworkflows/nf-core/utils_nfvalidation_plugin/main.nf
new file mode 100644
index 000000000..2585b65d1
--- /dev/null
+++ b/subworkflows/nf-core/utils_nfvalidation_plugin/main.nf
@@ -0,0 +1,62 @@
+//
+// Subworkflow that uses the nf-validation plugin to render help text and parameter summary
+//
+
+/*
+========================================================================================
+ IMPORT NF-VALIDATION PLUGIN
+========================================================================================
+*/
+
+include { paramsHelp } from 'plugin/nf-validation'
+include { paramsSummaryLog } from 'plugin/nf-validation'
+include { validateParameters } from 'plugin/nf-validation'
+
+/*
+========================================================================================
+ SUBWORKFLOW DEFINITION
+========================================================================================
+*/
+
+workflow UTILS_NFVALIDATION_PLUGIN {
+
+ take:
+ print_help // boolean: print help
+ workflow_command // string: default commmand used to run pipeline
+ pre_help_text // string: string to be printed before help text and summary log
+ post_help_text // string: string to be printed after help text and summary log
+ validate_params // boolean: validate parameters
+ schema_filename // path: JSON schema file, null to use default value
+
+ main:
+
+ log.debug "Using schema file: ${schema_filename}"
+
+ // Default values for strings
+ pre_help_text = pre_help_text ?: ''
+ post_help_text = post_help_text ?: ''
+ workflow_command = workflow_command ?: ''
+
+ //
+ // Print help message if needed
+ //
+ if (print_help) {
+ log.info pre_help_text + paramsHelp(workflow_command, parameters_schema: schema_filename) + post_help_text
+ System.exit(0)
+ }
+
+ //
+ // Print parameter summary to stdout
+ //
+ log.info pre_help_text + paramsSummaryLog(workflow, parameters_schema: schema_filename) + post_help_text
+
+ //
+ // Validate parameters relative to the parameter JSON schema
+ //
+ if (validate_params){
+ validateParameters(parameters_schema: schema_filename)
+ }
+
+ emit:
+ dummy_emit = true
+}
diff --git a/subworkflows/nf-core/utils_nfvalidation_plugin/meta.yml b/subworkflows/nf-core/utils_nfvalidation_plugin/meta.yml
new file mode 100644
index 000000000..3d4a6b04f
--- /dev/null
+++ b/subworkflows/nf-core/utils_nfvalidation_plugin/meta.yml
@@ -0,0 +1,44 @@
+# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/subworkflows/yaml-schema.json
+name: "UTILS_NFVALIDATION_PLUGIN"
+description: Use nf-validation to initiate and validate a pipeline
+keywords:
+ - utility
+ - pipeline
+ - initialise
+ - validation
+components: []
+input:
+ - print_help:
+ type: boolean
+ description: |
+ Print help message and exit
+ - workflow_command:
+ type: string
+ description: |
+ The command to run the workflow e.g. "nextflow run main.nf"
+ - pre_help_text:
+ type: string
+ description: |
+ Text to print before the help message
+ - post_help_text:
+ type: string
+ description: |
+ Text to print after the help message
+ - validate_params:
+ type: boolean
+ description: |
+ Validate the parameters and error if invalid.
+ - schema_filename:
+ type: string
+ description: |
+ The filename of the schema to validate against.
+output:
+ - dummy_emit:
+ type: boolean
+ description: |
+ Dummy emit to make nf-core subworkflows lint happy
+authors:
+ - "@adamrtalbot"
+maintainers:
+ - "@adamrtalbot"
+ - "@maxulysse"
diff --git a/subworkflows/nf-core/utils_nfvalidation_plugin/tests/main.nf.test b/subworkflows/nf-core/utils_nfvalidation_plugin/tests/main.nf.test
new file mode 100644
index 000000000..517ee54e4
--- /dev/null
+++ b/subworkflows/nf-core/utils_nfvalidation_plugin/tests/main.nf.test
@@ -0,0 +1,200 @@
+nextflow_workflow {
+
+ name "Test Workflow UTILS_NFVALIDATION_PLUGIN"
+ script "../main.nf"
+ workflow "UTILS_NFVALIDATION_PLUGIN"
+ tag "subworkflows"
+ tag "subworkflows_nfcore"
+ tag "plugin/nf-validation"
+ tag "'plugin/nf-validation'"
+ tag "utils_nfvalidation_plugin"
+ tag "subworkflows/utils_nfvalidation_plugin"
+
+ test("Should run nothing") {
+
+ when {
+
+ params {
+ monochrome_logs = true
+ test_data = ''
+ }
+
+ workflow {
+ """
+ help = false
+ workflow_command = null
+ pre_help_text = null
+ post_help_text = null
+ validate_params = false
+ schema_filename = "$moduleTestDir/nextflow_schema.json"
+
+ input[0] = help
+ input[1] = workflow_command
+ input[2] = pre_help_text
+ input[3] = post_help_text
+ input[4] = validate_params
+ input[5] = schema_filename
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert workflow.success }
+ )
+ }
+ }
+
+ test("Should run help") {
+
+
+ when {
+
+ params {
+ monochrome_logs = true
+ test_data = ''
+ }
+ workflow {
+ """
+ help = true
+ workflow_command = null
+ pre_help_text = null
+ post_help_text = null
+ validate_params = false
+ schema_filename = "$moduleTestDir/nextflow_schema.json"
+
+ input[0] = help
+ input[1] = workflow_command
+ input[2] = pre_help_text
+ input[3] = post_help_text
+ input[4] = validate_params
+ input[5] = schema_filename
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert workflow.success },
+ { assert workflow.exitStatus == 0 },
+ { assert workflow.stdout.any { it.contains('Input/output options') } },
+ { assert workflow.stdout.any { it.contains('--outdir') } }
+ )
+ }
+ }
+
+ test("Should run help with command") {
+
+ when {
+
+ params {
+ monochrome_logs = true
+ test_data = ''
+ }
+ workflow {
+ """
+ help = true
+ workflow_command = "nextflow run noorg/doesntexist"
+ pre_help_text = null
+ post_help_text = null
+ validate_params = false
+ schema_filename = "$moduleTestDir/nextflow_schema.json"
+
+ input[0] = help
+ input[1] = workflow_command
+ input[2] = pre_help_text
+ input[3] = post_help_text
+ input[4] = validate_params
+ input[5] = schema_filename
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert workflow.success },
+ { assert workflow.exitStatus == 0 },
+ { assert workflow.stdout.any { it.contains('nextflow run noorg/doesntexist') } },
+ { assert workflow.stdout.any { it.contains('Input/output options') } },
+ { assert workflow.stdout.any { it.contains('--outdir') } }
+ )
+ }
+ }
+
+ test("Should run help with extra text") {
+
+
+ when {
+
+ params {
+ monochrome_logs = true
+ test_data = ''
+ }
+ workflow {
+ """
+ help = true
+ workflow_command = "nextflow run noorg/doesntexist"
+ pre_help_text = "pre-help-text"
+ post_help_text = "post-help-text"
+ validate_params = false
+ schema_filename = "$moduleTestDir/nextflow_schema.json"
+
+ input[0] = help
+ input[1] = workflow_command
+ input[2] = pre_help_text
+ input[3] = post_help_text
+ input[4] = validate_params
+ input[5] = schema_filename
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert workflow.success },
+ { assert workflow.exitStatus == 0 },
+ { assert workflow.stdout.any { it.contains('pre-help-text') } },
+ { assert workflow.stdout.any { it.contains('nextflow run noorg/doesntexist') } },
+ { assert workflow.stdout.any { it.contains('Input/output options') } },
+ { assert workflow.stdout.any { it.contains('--outdir') } },
+ { assert workflow.stdout.any { it.contains('post-help-text') } }
+ )
+ }
+ }
+
+ test("Should validate params") {
+
+ when {
+
+ params {
+ monochrome_logs = true
+ test_data = ''
+ outdir = 1
+ }
+ workflow {
+ """
+ help = false
+ workflow_command = null
+ pre_help_text = null
+ post_help_text = null
+ validate_params = true
+ schema_filename = "$moduleTestDir/nextflow_schema.json"
+
+ input[0] = help
+ input[1] = workflow_command
+ input[2] = pre_help_text
+ input[3] = post_help_text
+ input[4] = validate_params
+ input[5] = schema_filename
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert workflow.failed },
+ { assert workflow.stdout.any { it.contains('ERROR ~ ERROR: Validation of pipeline parameters failed!') } }
+ )
+ }
+ }
+}
\ No newline at end of file
diff --git a/subworkflows/nf-core/utils_nfvalidation_plugin/tests/nextflow_schema.json b/subworkflows/nf-core/utils_nfvalidation_plugin/tests/nextflow_schema.json
new file mode 100644
index 000000000..7626c1c93
--- /dev/null
+++ b/subworkflows/nf-core/utils_nfvalidation_plugin/tests/nextflow_schema.json
@@ -0,0 +1,96 @@
+{
+ "$schema": "http://json-schema.org/draft-07/schema",
+ "$id": "https://raw.githubusercontent.com/./master/nextflow_schema.json",
+ "title": ". pipeline parameters",
+ "description": "",
+ "type": "object",
+ "definitions": {
+ "input_output_options": {
+ "title": "Input/output options",
+ "type": "object",
+ "fa_icon": "fas fa-terminal",
+ "description": "Define where the pipeline should find input data and save output data.",
+ "required": ["outdir"],
+ "properties": {
+ "validate_params": {
+ "type": "boolean",
+ "description": "Validate parameters?",
+ "default": true,
+ "hidden": true
+ },
+ "outdir": {
+ "type": "string",
+ "format": "directory-path",
+ "description": "The output directory where the results will be saved. You have to use absolute paths to storage on Cloud infrastructure.",
+ "fa_icon": "fas fa-folder-open"
+ },
+ "test_data_base": {
+ "type": "string",
+ "default": "https://raw.githubusercontent.com/nf-core/test-datasets/modules",
+ "description": "Base for test data directory",
+ "hidden": true
+ },
+ "test_data": {
+ "type": "string",
+ "description": "Fake test data param",
+ "hidden": true
+ }
+ }
+ },
+ "generic_options": {
+ "title": "Generic options",
+ "type": "object",
+ "fa_icon": "fas fa-file-import",
+ "description": "Less common options for the pipeline, typically set in a config file.",
+ "help_text": "These options are common to all nf-core pipelines and allow you to customise some of the core preferences for how the pipeline runs.\n\nTypically these options would be set in a Nextflow config file loaded for all pipeline runs, such as `~/.nextflow/config`.",
+ "properties": {
+ "help": {
+ "type": "boolean",
+ "description": "Display help text.",
+ "fa_icon": "fas fa-question-circle",
+ "hidden": true
+ },
+ "version": {
+ "type": "boolean",
+ "description": "Display version and exit.",
+ "fa_icon": "fas fa-question-circle",
+ "hidden": true
+ },
+ "logo": {
+ "type": "boolean",
+ "default": true,
+ "description": "Display nf-core logo in console output.",
+ "fa_icon": "fas fa-image",
+ "hidden": true
+ },
+ "singularity_pull_docker_container": {
+ "type": "boolean",
+ "description": "Pull Singularity container from Docker?",
+ "hidden": true
+ },
+ "publish_dir_mode": {
+ "type": "string",
+ "default": "copy",
+ "description": "Method used to save pipeline results to output directory.",
+ "help_text": "The Nextflow `publishDir` option specifies which intermediate files should be saved to the output directory. This option tells the pipeline what method should be used to move these files. See [Nextflow docs](https://www.nextflow.io/docs/latest/process.html#publishdir) for details.",
+ "fa_icon": "fas fa-copy",
+ "enum": ["symlink", "rellink", "link", "copy", "copyNoFollow", "move"],
+ "hidden": true
+ },
+ "monochrome_logs": {
+ "type": "boolean",
+ "description": "Use monochrome_logs",
+ "hidden": true
+ }
+ }
+ }
+ },
+ "allOf": [
+ {
+ "$ref": "#/definitions/input_output_options"
+ },
+ {
+ "$ref": "#/definitions/generic_options"
+ }
+ ]
+}
diff --git a/subworkflows/nf-core/utils_nfvalidation_plugin/tests/tags.yml b/subworkflows/nf-core/utils_nfvalidation_plugin/tests/tags.yml
new file mode 100644
index 000000000..60b1cfff4
--- /dev/null
+++ b/subworkflows/nf-core/utils_nfvalidation_plugin/tests/tags.yml
@@ -0,0 +1,2 @@
+subworkflows/utils_nfvalidation_plugin:
+ - subworkflows/nf-core/utils_nfvalidation_plugin/**
diff --git a/workflows/rnaseq.nf b/workflows/rnaseq.nf
deleted file mode 100755
index 9250125dc..000000000
--- a/workflows/rnaseq.nf
+++ /dev/null
@@ -1,941 +0,0 @@
-/*
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
- PRINT PARAMS SUMMARY
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-*/
-
-include { paramsSummaryLog; paramsSummaryMap; fromSamplesheet } from 'plugin/nf-validation'
-
-def logo = NfcoreTemplate.logo(workflow, params.monochrome_logs)
-def citation = '\n' + WorkflowMain.citation(workflow) + '\n'
-def summary_params = paramsSummaryMap(workflow)
-
-// Print parameter summary log to screen
-log.info logo + paramsSummaryLog(workflow) + citation
-
-/*
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
- VALIDATE INPUTS
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-*/
-
-WorkflowRnaseq.initialise(params, log)
-
-// Check rRNA databases for sortmerna
-if (params.remove_ribo_rna) {
- ch_ribo_db = file(params.ribo_database_manifest)
- if (ch_ribo_db.isEmpty()) {exit 1, "File provided with --ribo_database_manifest is empty: ${ch_ribo_db.getName()}!"}
-}
-
-// Check if file with list of fastas is provided when running BBSplit
-if (!params.skip_bbsplit && !params.bbsplit_index && params.bbsplit_fasta_list) {
- ch_bbsplit_fasta_list = file(params.bbsplit_fasta_list)
- if (ch_bbsplit_fasta_list.isEmpty()) {exit 1, "File provided with --bbsplit_fasta_list is empty: ${ch_bbsplit_fasta_list.getName()}!"}
-}
-
-// Check alignment parameters
-def prepareToolIndices = []
-if (!params.skip_bbsplit) { prepareToolIndices << 'bbsplit' }
-if (!params.skip_alignment) { prepareToolIndices << params.aligner }
-if (!params.skip_pseudo_alignment && params.pseudo_aligner) { prepareToolIndices << params.pseudo_aligner }
-
-// Determine whether to filter the GTF or not
-def filterGtf =
- ((
- // Condition 1: Alignment is required and aligner is set
- !params.skip_alignment && params.aligner
- ) ||
- (
- // Condition 2: Pseudoalignment is required and pseudoaligner is set
- !params.skip_pseudo_alignment && params.pseudo_aligner
- ) ||
- (
- // Condition 3: Transcript FASTA file is not provided
- !params.transcript_fasta
- )) &&
- (
- // Condition 4: --skip_gtf_filter is not provided
- !params.skip_gtf_filter
- )
-
-// Get RSeqC modules to run
-def rseqc_modules = params.rseqc_modules ? params.rseqc_modules.split(',').collect{ it.trim().toLowerCase() } : []
-if (params.bam_csi_index) {
- for (rseqc_module in ['read_distribution', 'inner_distance', 'tin']) {
- if (rseqc_modules.contains(rseqc_module)) {
- rseqc_modules.remove(rseqc_module)
- }
- }
-}
-
-// Stage dummy file to be used as an optional input where required
-ch_dummy_file = file("$projectDir/assets/dummy_file.txt", checkIfExists: true)
-
-// Check if an AWS iGenome has been provided to use the appropriate version of STAR
-def is_aws_igenome = false
-if (params.fasta && params.gtf) {
- if ((file(params.fasta).getName() - '.gz' == 'genome.fa') && (file(params.gtf).getName() - '.gz' == 'genes.gtf')) {
- is_aws_igenome = true
- }
-}
-
-/*
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
- CONFIG FILES
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-*/
-
-ch_multiqc_config = Channel.fromPath("$projectDir/assets/multiqc_config.yml", checkIfExists: true)
-ch_multiqc_custom_config = params.multiqc_config ? Channel.fromPath(params.multiqc_config) : Channel.empty()
-ch_multiqc_logo = params.multiqc_logo ? Channel.fromPath(params.multiqc_logo) : Channel.empty()
-ch_multiqc_custom_methods_description = params.multiqc_methods_description ? file(params.multiqc_methods_description) : file("$projectDir/assets/methods_description_template.yml", checkIfExists: true)
-
-// Header files for MultiQC
-ch_pca_header_multiqc = file("$projectDir/assets/multiqc/deseq2_pca_header.txt", checkIfExists: true)
-ch_clustering_header_multiqc = file("$projectDir/assets/multiqc/deseq2_clustering_header.txt", checkIfExists: true)
-ch_biotypes_header_multiqc = file("$projectDir/assets/multiqc/biotypes_header.txt", checkIfExists: true)
-
-/*
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
- IMPORT LOCAL MODULES/SUBWORKFLOWS
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-*/
-
-//
-// MODULE: Loaded from modules/local/
-//
-include { BEDTOOLS_GENOMECOV } from '../modules/local/bedtools_genomecov'
-include { DESEQ2_QC as DESEQ2_QC_STAR_SALMON } from '../modules/local/deseq2_qc'
-include { DESEQ2_QC as DESEQ2_QC_RSEM } from '../modules/local/deseq2_qc'
-include { DESEQ2_QC as DESEQ2_QC_PSEUDO } from '../modules/local/deseq2_qc'
-include { DUPRADAR } from '../modules/local/dupradar'
-include { MULTIQC } from '../modules/local/multiqc'
-include { MULTIQC_CUSTOM_BIOTYPE } from '../modules/local/multiqc_custom_biotype'
-include { UMITOOLS_PREPAREFORRSEM as UMITOOLS_PREPAREFORSALMON } from '../modules/local/umitools_prepareforrsem'
-
-//
-// SUBWORKFLOW: Consisting of a mix of local and nf-core/modules
-//
-include { PREPARE_GENOME } from '../subworkflows/local/prepare_genome'
-include { ALIGN_STAR } from '../subworkflows/local/align_star'
-include { QUANTIFY_RSEM } from '../subworkflows/local/quantify_rsem'
-include { QUANTIFY_PSEUDO_ALIGNMENT as QUANTIFY_STAR_SALMON } from '../subworkflows/local/quantify_pseudo'
-include { QUANTIFY_PSEUDO_ALIGNMENT } from '../subworkflows/local/quantify_pseudo'
-
-/*
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
- IMPORT NF-CORE MODULES/SUBWORKFLOWS
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-*/
-
-//
-// MODULE: Installed directly from nf-core/modules
-//
-include { CAT_FASTQ } from '../modules/nf-core/cat/fastq'
-include { BBMAP_BBSPLIT } from '../modules/nf-core/bbmap/bbsplit'
-include { SAMTOOLS_SORT } from '../modules/nf-core/samtools/sort'
-include { PRESEQ_LCEXTRAP } from '../modules/nf-core/preseq/lcextrap'
-include { QUALIMAP_RNASEQ } from '../modules/nf-core/qualimap/rnaseq'
-include { SORTMERNA } from '../modules/nf-core/sortmerna'
-include { STRINGTIE_STRINGTIE } from '../modules/nf-core/stringtie/stringtie'
-include { SUBREAD_FEATURECOUNTS } from '../modules/nf-core/subread/featurecounts'
-include { CUSTOM_DUMPSOFTWAREVERSIONS } from '../modules/nf-core/custom/dumpsoftwareversions'
-
-//
-// SUBWORKFLOW: Consisting entirely of nf-core/modules
-//
-include { FASTQ_SUBSAMPLE_FQ_SALMON } from '../subworkflows/nf-core/fastq_subsample_fq_salmon'
-include { FASTQ_FASTQC_UMITOOLS_TRIMGALORE } from '../subworkflows/nf-core/fastq_fastqc_umitools_trimgalore'
-include { FASTQ_FASTQC_UMITOOLS_FASTP } from '../subworkflows/nf-core/fastq_fastqc_umitools_fastp'
-include { FASTQ_ALIGN_HISAT2 } from '../subworkflows/nf-core/fastq_align_hisat2'
-include { BAM_SORT_STATS_SAMTOOLS } from '../subworkflows/nf-core/bam_sort_stats_samtools'
-include { BAM_MARKDUPLICATES_PICARD } from '../subworkflows/nf-core/bam_markduplicates_picard'
-include { BAM_RSEQC } from '../subworkflows/nf-core/bam_rseqc'
-include { BAM_DEDUP_STATS_SAMTOOLS_UMITOOLS as BAM_DEDUP_STATS_SAMTOOLS_UMITOOLS_GENOME } from '../subworkflows/nf-core/bam_dedup_stats_samtools_umitools'
-include { BAM_DEDUP_STATS_SAMTOOLS_UMITOOLS as BAM_DEDUP_STATS_SAMTOOLS_UMITOOLS_TRANSCRIPTOME } from '../subworkflows/nf-core/bam_dedup_stats_samtools_umitools'
-include { BEDGRAPH_BEDCLIP_BEDGRAPHTOBIGWIG as BEDGRAPH_BEDCLIP_BEDGRAPHTOBIGWIG_FORWARD } from '../subworkflows/nf-core/bedgraph_bedclip_bedgraphtobigwig'
-include { BEDGRAPH_BEDCLIP_BEDGRAPHTOBIGWIG as BEDGRAPH_BEDCLIP_BEDGRAPHTOBIGWIG_REVERSE } from '../subworkflows/nf-core/bedgraph_bedclip_bedgraphtobigwig'
-
-/*
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
- RUN MAIN WORKFLOW
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-*/
-
-// Info required for completion email and summary
-def multiqc_report = []
-def pass_mapped_reads = [:]
-def pass_trimmed_reads = [:]
-def pass_strand_check = [:]
-
-workflow RNASEQ {
-
- ch_versions = Channel.empty()
-
- //
- // SUBWORKFLOW: Uncompress and prepare reference genome files
- //
- def biotype = params.gencode ? "gene_type" : params.featurecounts_group_type
- PREPARE_GENOME (
- params.fasta,
- params.gtf,
- params.gff,
- params.additional_fasta,
- params.transcript_fasta,
- params.gene_bed,
- params.splicesites,
- params.bbsplit_fasta_list,
- params.star_index,
- params.rsem_index,
- params.salmon_index,
- params.kallisto_index,
- params.hisat2_index,
- params.bbsplit_index,
- params.gencode,
- is_aws_igenome,
- biotype,
- prepareToolIndices,
- filterGtf
- )
- ch_versions = ch_versions.mix(PREPARE_GENOME.out.versions)
-
- // Check if contigs in genome fasta file > 512 Mbp
- if (!params.skip_alignment && !params.bam_csi_index) {
- PREPARE_GENOME
- .out
- .fai
- .map { WorkflowRnaseq.checkMaxContigSize(it, log) }
- }
-
- //
- // Create input channel from input file provided through params.input
- //
- Channel
- .fromSamplesheet("input")
- .map {
- meta, fastq_1, fastq_2 ->
- if (!fastq_2) {
- return [ meta.id, meta + [ single_end:true ], [ fastq_1 ] ]
- } else {
- return [ meta.id, meta + [ single_end:false ], [ fastq_1, fastq_2 ] ]
- }
- }
- .groupTuple()
- .map {
- WorkflowRnaseq.validateInput(it)
- }
- .branch {
- meta, fastqs ->
- single : fastqs.size() == 1
- return [ meta, fastqs.flatten() ]
- multiple: fastqs.size() > 1
- return [ meta, fastqs.flatten() ]
- }
- .set { ch_fastq }
-
- //
- // MODULE: Concatenate FastQ files from same sample if required
- //
- CAT_FASTQ (
- ch_fastq.multiple
- )
- .reads
- .mix(ch_fastq.single)
- .set { ch_cat_fastq }
- ch_versions = ch_versions.mix(CAT_FASTQ.out.versions.first().ifEmpty(null))
-
- //
- // SUBWORKFLOW: Read QC, extract UMI and trim adapters with TrimGalore!
- //
- ch_filtered_reads = Channel.empty()
- ch_fastqc_raw_multiqc = Channel.empty()
- ch_fastqc_trim_multiqc = Channel.empty()
- ch_trim_log_multiqc = Channel.empty()
- ch_trim_read_count = Channel.empty()
- if (params.trimmer == 'trimgalore') {
- FASTQ_FASTQC_UMITOOLS_TRIMGALORE (
- ch_cat_fastq,
- params.skip_fastqc || params.skip_qc,
- params.with_umi,
- params.skip_umi_extract,
- params.skip_trimming,
- params.umi_discard_read,
- params.min_trimmed_reads
- )
- ch_filtered_reads = FASTQ_FASTQC_UMITOOLS_TRIMGALORE.out.reads
- ch_fastqc_raw_multiqc = FASTQ_FASTQC_UMITOOLS_TRIMGALORE.out.fastqc_zip
- ch_fastqc_trim_multiqc = FASTQ_FASTQC_UMITOOLS_TRIMGALORE.out.trim_zip
- ch_trim_log_multiqc = FASTQ_FASTQC_UMITOOLS_TRIMGALORE.out.trim_log
- ch_trim_read_count = FASTQ_FASTQC_UMITOOLS_TRIMGALORE.out.trim_read_count
- ch_versions = ch_versions.mix(FASTQ_FASTQC_UMITOOLS_TRIMGALORE.out.versions)
- }
-
- //
- // SUBWORKFLOW: Read QC, extract UMI and trim adapters with fastp
- //
- if (params.trimmer == 'fastp') {
- FASTQ_FASTQC_UMITOOLS_FASTP (
- ch_cat_fastq,
- params.skip_fastqc || params.skip_qc,
- params.with_umi,
- params.skip_umi_extract,
- params.umi_discard_read,
- params.skip_trimming,
- [],
- params.save_trimmed,
- params.save_trimmed,
- params.min_trimmed_reads
- )
- ch_filtered_reads = FASTQ_FASTQC_UMITOOLS_FASTP.out.reads
- ch_fastqc_raw_multiqc = FASTQ_FASTQC_UMITOOLS_FASTP.out.fastqc_raw_zip
- ch_fastqc_trim_multiqc = FASTQ_FASTQC_UMITOOLS_FASTP.out.fastqc_trim_zip
- ch_trim_log_multiqc = FASTQ_FASTQC_UMITOOLS_FASTP.out.trim_json
- ch_trim_read_count = FASTQ_FASTQC_UMITOOLS_FASTP.out.trim_read_count
- ch_versions = ch_versions.mix(FASTQ_FASTQC_UMITOOLS_FASTP.out.versions)
- }
-
- //
- // Get list of samples that failed trimming threshold for MultiQC report
- //
- ch_trim_read_count
- .map {
- meta, num_reads ->
- pass_trimmed_reads[meta.id] = true
- if (num_reads <= params.min_trimmed_reads.toFloat()) {
- pass_trimmed_reads[meta.id] = false
- return [ "$meta.id\t$num_reads" ]
- }
- }
- .collect()
- .map {
- tsv_data ->
- def header = ["Sample", "Reads after trimming"]
- WorkflowRnaseq.multiqcTsvFromList(tsv_data, header)
- }
- .set { ch_fail_trimming_multiqc }
-
- //
- // MODULE: Remove genome contaminant reads
- //
- if (!params.skip_bbsplit) {
- BBMAP_BBSPLIT (
- ch_filtered_reads,
- PREPARE_GENOME.out.bbsplit_index,
- [],
- [ [], [] ],
- false
- )
- .primary_fastq
- .set { ch_filtered_reads }
- ch_versions = ch_versions.mix(BBMAP_BBSPLIT.out.versions.first())
- }
-
- //
- // MODULE: Remove ribosomal RNA reads
- //
- ch_sortmerna_multiqc = Channel.empty()
- if (params.remove_ribo_rna) {
- ch_sortmerna_fastas = Channel.from(ch_ribo_db.readLines()).map { row -> file(row, checkIfExists: true) }.collect()
-
- SORTMERNA (
- ch_filtered_reads,
- ch_sortmerna_fastas
- )
- .reads
- .set { ch_filtered_reads }
-
- ch_sortmerna_multiqc = SORTMERNA.out.log
- ch_versions = ch_versions.mix(SORTMERNA.out.versions.first())
- }
-
- //
- // SUBWORKFLOW: Sub-sample FastQ files and pseudoalign with Salmon to auto-infer strandedness
- //
-
- // Branch FastQ channels if 'auto' specified to infer strandedness
- ch_filtered_reads
- .branch {
- meta, fastq ->
- auto_strand : meta.strandedness == 'auto'
- return [ meta, fastq ]
- known_strand: meta.strandedness != 'auto'
- return [ meta, fastq ]
- }
- .set { ch_strand_fastq }
-
- // Return empty channel if ch_strand_fastq.auto_strand is empty so salmon index isn't created
- PREPARE_GENOME
- .out
- .fasta
- .combine(ch_strand_fastq.auto_strand)
- .map { it.first() }
- .first()
- .set { ch_genome_fasta }
-
- FASTQ_SUBSAMPLE_FQ_SALMON (
- ch_strand_fastq.auto_strand,
- ch_genome_fasta,
- PREPARE_GENOME.out.transcript_fasta,
- PREPARE_GENOME.out.gtf,
- PREPARE_GENOME.out.salmon_index,
- !params.salmon_index && !('salmon' in prepareToolIndices)
- )
- ch_versions = ch_versions.mix(FASTQ_SUBSAMPLE_FQ_SALMON.out.versions)
-
- FASTQ_SUBSAMPLE_FQ_SALMON
- .out
- .json_info
- .join(ch_strand_fastq.auto_strand)
- .map { meta, json, reads ->
- return [ meta + [ strandedness: WorkflowRnaseq.getSalmonInferredStrandedness(json) ], reads ]
- }
- .mix(ch_strand_fastq.known_strand)
- .set { ch_strand_inferred_filtered_fastq }
-
- //
- // SUBWORKFLOW: Alignment with STAR and gene/transcript quantification with Salmon
- //
- ch_genome_bam = Channel.empty()
- ch_genome_bam_index = Channel.empty()
- ch_samtools_stats = Channel.empty()
- ch_samtools_flagstat = Channel.empty()
- ch_samtools_idxstats = Channel.empty()
- ch_star_multiqc = Channel.empty()
- ch_aligner_pca_multiqc = Channel.empty()
- ch_aligner_clustering_multiqc = Channel.empty()
- if (!params.skip_alignment && params.aligner == 'star_salmon') {
- ALIGN_STAR (
- ch_strand_inferred_filtered_fastq,
- PREPARE_GENOME.out.star_index.map { [ [:], it ] },
- PREPARE_GENOME.out.gtf.map { [ [:], it ] },
- params.star_ignore_sjdbgtf,
- '',
- params.seq_center ?: '',
- is_aws_igenome,
- PREPARE_GENOME.out.fasta.map { [ [:], it ] }
- )
- ch_genome_bam = ALIGN_STAR.out.bam
- ch_genome_bam_index = ALIGN_STAR.out.bai
- ch_transcriptome_bam = ALIGN_STAR.out.bam_transcript
- ch_samtools_stats = ALIGN_STAR.out.stats
- ch_samtools_flagstat = ALIGN_STAR.out.flagstat
- ch_samtools_idxstats = ALIGN_STAR.out.idxstats
- ch_star_multiqc = ALIGN_STAR.out.log_final
- if (params.bam_csi_index) {
- ch_genome_bam_index = ALIGN_STAR.out.csi
- }
- ch_versions = ch_versions.mix(ALIGN_STAR.out.versions)
-
- //
- // SUBWORKFLOW: Remove duplicate reads from BAM file based on UMIs
- //
- if (params.with_umi) {
- // Deduplicate genome BAM file before downstream analysis
- BAM_DEDUP_STATS_SAMTOOLS_UMITOOLS_GENOME (
- ch_genome_bam.join(ch_genome_bam_index, by: [0]),
- params.umitools_dedup_stats
- )
- ch_genome_bam = BAM_DEDUP_STATS_SAMTOOLS_UMITOOLS_GENOME.out.bam
- ch_genome_bam_index = BAM_DEDUP_STATS_SAMTOOLS_UMITOOLS_GENOME.out.bai
- ch_samtools_stats = BAM_DEDUP_STATS_SAMTOOLS_UMITOOLS_GENOME.out.stats
- ch_samtools_flagstat = BAM_DEDUP_STATS_SAMTOOLS_UMITOOLS_GENOME.out.flagstat
- ch_samtools_idxstats = BAM_DEDUP_STATS_SAMTOOLS_UMITOOLS_GENOME.out.idxstats
- if (params.bam_csi_index) {
- ch_genome_bam_index = BAM_DEDUP_STATS_SAMTOOLS_UMITOOLS_GENOME.out.csi
- }
- ch_versions = ch_versions.mix(BAM_DEDUP_STATS_SAMTOOLS_UMITOOLS_GENOME.out.versions)
-
- // Co-ordinate sort, index and run stats on transcriptome BAM
- BAM_SORT_STATS_SAMTOOLS (
- ch_transcriptome_bam,
- PREPARE_GENOME.out.fasta.map { [ [:], it ] }
- )
- ch_transcriptome_sorted_bam = BAM_SORT_STATS_SAMTOOLS.out.bam
- ch_transcriptome_sorted_bai = BAM_SORT_STATS_SAMTOOLS.out.bai
-
- // Deduplicate transcriptome BAM file before read counting with Salmon
- BAM_DEDUP_STATS_SAMTOOLS_UMITOOLS_TRANSCRIPTOME (
- ch_transcriptome_sorted_bam.join(ch_transcriptome_sorted_bai, by: [0]),
- params.umitools_dedup_stats
- )
-
- // Name sort BAM before passing to Salmon
- SAMTOOLS_SORT (
- BAM_DEDUP_STATS_SAMTOOLS_UMITOOLS_TRANSCRIPTOME.out.bam
- )
-
- // Only run prepare_for_rsem.py on paired-end BAM files
- SAMTOOLS_SORT
- .out
- .bam
- .branch {
- meta, bam ->
- single_end: meta.single_end
- return [ meta, bam ]
- paired_end: !meta.single_end
- return [ meta, bam ]
- }
- .set { ch_umitools_dedup_bam }
-
- // Fix paired-end reads in name sorted BAM file
- // See: https://github.com/nf-core/rnaseq/issues/828
- UMITOOLS_PREPAREFORSALMON (
- ch_umitools_dedup_bam.paired_end
- )
- ch_versions = ch_versions.mix(UMITOOLS_PREPAREFORSALMON.out.versions.first())
-
- ch_umitools_dedup_bam
- .single_end
- .mix(UMITOOLS_PREPAREFORSALMON.out.bam)
- .set { ch_transcriptome_bam }
- }
-
- //
- // SUBWORKFLOW: Count reads from BAM alignments using Salmon
- //
- QUANTIFY_STAR_SALMON (
- ch_transcriptome_bam,
- ch_dummy_file,
- PREPARE_GENOME.out.transcript_fasta,
- PREPARE_GENOME.out.gtf,
- 'salmon',
- true,
- params.salmon_quant_libtype ?: '',
- params.kallisto_quant_fraglen,
- params.kallisto_quant_fraglen_sd
- )
- ch_versions = ch_versions.mix(QUANTIFY_STAR_SALMON.out.versions)
-
- if (!params.skip_qc & !params.skip_deseq2_qc) {
- DESEQ2_QC_STAR_SALMON (
- QUANTIFY_STAR_SALMON.out.counts_gene_length_scaled,
- ch_pca_header_multiqc,
- ch_clustering_header_multiqc
- )
- ch_aligner_pca_multiqc = DESEQ2_QC_STAR_SALMON.out.pca_multiqc
- ch_aligner_clustering_multiqc = DESEQ2_QC_STAR_SALMON.out.dists_multiqc
- ch_versions = ch_versions.mix(DESEQ2_QC_STAR_SALMON.out.versions)
- }
- }
-
- //
- // SUBWORKFLOW: Alignment with STAR and gene/transcript quantification with RSEM
- //
- ch_rsem_multiqc = Channel.empty()
- if (!params.skip_alignment && params.aligner == 'star_rsem') {
- QUANTIFY_RSEM (
- ch_strand_inferred_filtered_fastq,
- PREPARE_GENOME.out.rsem_index,
- PREPARE_GENOME.out.fasta.map { [ [:], it ] }
- )
- ch_genome_bam = QUANTIFY_RSEM.out.bam
- ch_genome_bam_index = QUANTIFY_RSEM.out.bai
- ch_samtools_stats = QUANTIFY_RSEM.out.stats
- ch_samtools_flagstat = QUANTIFY_RSEM.out.flagstat
- ch_samtools_idxstats = QUANTIFY_RSEM.out.idxstats
- ch_star_multiqc = QUANTIFY_RSEM.out.logs
- ch_rsem_multiqc = QUANTIFY_RSEM.out.stat
- if (params.bam_csi_index) {
- ch_genome_bam_index = QUANTIFY_RSEM.out.csi
- }
- ch_versions = ch_versions.mix(QUANTIFY_RSEM.out.versions)
-
- if (!params.skip_qc & !params.skip_deseq2_qc) {
- DESEQ2_QC_RSEM (
- QUANTIFY_RSEM.out.merged_counts_gene,
- ch_pca_header_multiqc,
- ch_clustering_header_multiqc
- )
- ch_aligner_pca_multiqc = DESEQ2_QC_RSEM.out.pca_multiqc
- ch_aligner_clustering_multiqc = DESEQ2_QC_RSEM.out.dists_multiqc
- ch_versions = ch_versions.mix(DESEQ2_QC_RSEM.out.versions)
- }
- }
-
- //
- // SUBWORKFLOW: Alignment with HISAT2
- //
- ch_hisat2_multiqc = Channel.empty()
- if (!params.skip_alignment && params.aligner == 'hisat2') {
- FASTQ_ALIGN_HISAT2 (
- ch_strand_inferred_filtered_fastq,
- PREPARE_GENOME.out.hisat2_index.map { [ [:], it ] },
- PREPARE_GENOME.out.splicesites.map { [ [:], it ] },
- PREPARE_GENOME.out.fasta.map { [ [:], it ] }
- )
- ch_genome_bam = FASTQ_ALIGN_HISAT2.out.bam
- ch_genome_bam_index = FASTQ_ALIGN_HISAT2.out.bai
- ch_samtools_stats = FASTQ_ALIGN_HISAT2.out.stats
- ch_samtools_flagstat = FASTQ_ALIGN_HISAT2.out.flagstat
- ch_samtools_idxstats = FASTQ_ALIGN_HISAT2.out.idxstats
- ch_hisat2_multiqc = FASTQ_ALIGN_HISAT2.out.summary
- if (params.bam_csi_index) {
- ch_genome_bam_index = FASTQ_ALIGN_HISAT2.out.csi
- }
- ch_versions = ch_versions.mix(FASTQ_ALIGN_HISAT2.out.versions)
-
- //
- // SUBWORKFLOW: Remove duplicate reads from BAM file based on UMIs
- //
- if (params.with_umi) {
- BAM_DEDUP_STATS_SAMTOOLS_UMITOOLS_GENOME (
- ch_genome_bam.join(ch_genome_bam_index, by: [0]),
- params.umitools_dedup_stats
- )
- ch_genome_bam = BAM_DEDUP_STATS_SAMTOOLS_UMITOOLS_GENOME.out.bam
- ch_genome_bam_index = BAM_DEDUP_STATS_SAMTOOLS_UMITOOLS_GENOME.out.bai
- ch_samtools_stats = BAM_DEDUP_STATS_SAMTOOLS_UMITOOLS_GENOME.out.stats
- ch_samtools_flagstat = BAM_DEDUP_STATS_SAMTOOLS_UMITOOLS_GENOME.out.flagstat
- ch_samtools_idxstats = BAM_DEDUP_STATS_SAMTOOLS_UMITOOLS_GENOME.out.idxstats
- if (params.bam_csi_index) {
- ch_genome_bam_index = BAM_DEDUP_STATS_SAMTOOLS_UMITOOLS_GENOME.out.csi
- }
- ch_versions = ch_versions.mix(BAM_DEDUP_STATS_SAMTOOLS_UMITOOLS_GENOME.out.versions)
- }
- }
-
- //
- // Filter channels to get samples that passed STAR minimum mapping percentage
- //
- ch_fail_mapping_multiqc = Channel.empty()
- if (!params.skip_alignment && params.aligner.contains('star')) {
- ch_star_multiqc
- .map { meta, align_log -> [ meta ] + WorkflowRnaseq.getStarPercentMapped(params, align_log) }
- .set { ch_percent_mapped }
-
- ch_genome_bam
- .join(ch_percent_mapped, by: [0])
- .map { meta, ofile, mapped, pass -> if (pass) [ meta, ofile ] }
- .set { ch_genome_bam }
-
- ch_genome_bam_index
- .join(ch_percent_mapped, by: [0])
- .map { meta, ofile, mapped, pass -> if (pass) [ meta, ofile ] }
- .set { ch_genome_bam_index }
-
- ch_percent_mapped
- .branch { meta, mapped, pass ->
- pass: pass
- pass_mapped_reads[meta.id] = true
- return [ "$meta.id\t$mapped" ]
- fail: !pass
- pass_mapped_reads[meta.id] = false
- return [ "$meta.id\t$mapped" ]
- }
- .set { ch_pass_fail_mapped }
-
- ch_pass_fail_mapped
- .fail
- .collect()
- .map {
- tsv_data ->
- def header = ["Sample", "STAR uniquely mapped reads (%)"]
- WorkflowRnaseq.multiqcTsvFromList(tsv_data, header)
- }
- .set { ch_fail_mapping_multiqc }
- }
-
- //
- // MODULE: Run Preseq
- //
- ch_preseq_multiqc = Channel.empty()
- if (!params.skip_alignment && !params.skip_qc && !params.skip_preseq) {
- PRESEQ_LCEXTRAP (
- ch_genome_bam
- )
- ch_preseq_multiqc = PRESEQ_LCEXTRAP.out.lc_extrap
- ch_versions = ch_versions.mix(PRESEQ_LCEXTRAP.out.versions.first())
- }
-
- //
- // SUBWORKFLOW: Mark duplicate reads
- //
- ch_markduplicates_multiqc = Channel.empty()
- if (!params.skip_alignment && !params.skip_markduplicates && !params.with_umi) {
- BAM_MARKDUPLICATES_PICARD (
- ch_genome_bam,
- PREPARE_GENOME.out.fasta.map { [ [:], it ] },
- PREPARE_GENOME.out.fai.map { [ [:], it ] }
- )
- ch_genome_bam = BAM_MARKDUPLICATES_PICARD.out.bam
- ch_genome_bam_index = BAM_MARKDUPLICATES_PICARD.out.bai
- ch_samtools_stats = BAM_MARKDUPLICATES_PICARD.out.stats
- ch_samtools_flagstat = BAM_MARKDUPLICATES_PICARD.out.flagstat
- ch_samtools_idxstats = BAM_MARKDUPLICATES_PICARD.out.idxstats
- ch_markduplicates_multiqc = BAM_MARKDUPLICATES_PICARD.out.metrics
- if (params.bam_csi_index) {
- ch_genome_bam_index = BAM_MARKDUPLICATES_PICARD.out.csi
- }
- ch_versions = ch_versions.mix(BAM_MARKDUPLICATES_PICARD.out.versions)
- }
-
- //
- // MODULE: STRINGTIE
- //
- if (!params.skip_alignment && !params.skip_stringtie) {
- STRINGTIE_STRINGTIE (
- ch_genome_bam,
- PREPARE_GENOME.out.gtf
- )
- ch_versions = ch_versions.mix(STRINGTIE_STRINGTIE.out.versions.first())
- }
-
- //
- // MODULE: Feature biotype QC using featureCounts
- //
- ch_featurecounts_multiqc = Channel.empty()
- if (!params.skip_alignment && !params.skip_qc && !params.skip_biotype_qc && biotype) {
-
- PREPARE_GENOME
- .out
- .gtf
- .map { WorkflowRnaseq.biotypeInGtf(it, biotype, log) }
- .set { biotype_in_gtf }
-
- // Prevent any samples from running if GTF file doesn't have a valid biotype
- ch_genome_bam
- .combine(PREPARE_GENOME.out.gtf)
- .combine(biotype_in_gtf)
- .filter { it[-1] }
- .map { it[0.. 0) {
- BAM_RSEQC (
- ch_genome_bam.join(ch_genome_bam_index, by: [0]),
- PREPARE_GENOME.out.gene_bed,
- rseqc_modules
- )
- ch_bamstat_multiqc = BAM_RSEQC.out.bamstat_txt
- ch_inferexperiment_multiqc = BAM_RSEQC.out.inferexperiment_txt
- ch_innerdistance_multiqc = BAM_RSEQC.out.innerdistance_freq
- ch_junctionannotation_multiqc = BAM_RSEQC.out.junctionannotation_log
- ch_junctionsaturation_multiqc = BAM_RSEQC.out.junctionsaturation_rscript
- ch_readdistribution_multiqc = BAM_RSEQC.out.readdistribution_txt
- ch_readduplication_multiqc = BAM_RSEQC.out.readduplication_pos_xls
- ch_tin_multiqc = BAM_RSEQC.out.tin_txt
- ch_versions = ch_versions.mix(BAM_RSEQC.out.versions)
-
- ch_inferexperiment_multiqc
- .map {
- meta, strand_log ->
- def inferred_strand = WorkflowRnaseq.getInferexperimentStrandedness(strand_log, 30)
- pass_strand_check[meta.id] = true
- if (meta.strandedness != inferred_strand[0]) {
- pass_strand_check[meta.id] = false
- return [ "$meta.id\t$meta.strandedness\t${inferred_strand.join('\t')}" ]
- }
- }
- .collect()
- .map {
- tsv_data ->
- def header = [
- "Sample",
- "Provided strandedness",
- "Inferred strandedness",
- "Sense (%)",
- "Antisense (%)",
- "Undetermined (%)"
- ]
- WorkflowRnaseq.multiqcTsvFromList(tsv_data, header)
- }
- .set { ch_fail_strand_multiqc }
- }
- }
-
- //
- // SUBWORKFLOW: Pseudoalignment and quantification with Salmon
- //
- ch_pseudo_multiqc = Channel.empty()
- ch_pseudoaligner_pca_multiqc = Channel.empty()
- ch_pseudoaligner_clustering_multiqc = Channel.empty()
- if (!params.skip_pseudo_alignment && params.pseudo_aligner) {
-
- if (params.pseudo_aligner == 'salmon') {
- ch_pseudo_index = PREPARE_GENOME.out.salmon_index
- } else {
- ch_pseudo_index = PREPARE_GENOME.out.kallisto_index
- }
-
- QUANTIFY_PSEUDO_ALIGNMENT (
- ch_strand_inferred_filtered_fastq,
- ch_pseudo_index,
- ch_dummy_file,
- PREPARE_GENOME.out.gtf,
- params.pseudo_aligner,
- false,
- params.salmon_quant_libtype ?: '',
- params.kallisto_quant_fraglen,
- params.kallisto_quant_fraglen_sd
- )
- ch_pseudo_multiqc = QUANTIFY_PSEUDO_ALIGNMENT.out.multiqc
- ch_counts_gene_length_scaled = QUANTIFY_PSEUDO_ALIGNMENT.out.counts_gene_length_scaled
- ch_versions = ch_versions.mix(QUANTIFY_PSEUDO_ALIGNMENT.out.versions)
-
- if (!params.skip_qc & !params.skip_deseq2_qc) {
- DESEQ2_QC_PSEUDO (
- ch_counts_gene_length_scaled,
- ch_pca_header_multiqc,
- ch_clustering_header_multiqc
- )
- ch_pseudoaligner_pca_multiqc = DESEQ2_QC_PSEUDO.out.pca_multiqc
- ch_pseudoaligner_clustering_multiqc = DESEQ2_QC_PSEUDO.out.dists_multiqc
- ch_versions = ch_versions.mix(DESEQ2_QC_PSEUDO.out.versions)
- }
- }
-
- //
- // MODULE: Pipeline reporting
- //
- CUSTOM_DUMPSOFTWAREVERSIONS (
- ch_versions.unique().collectFile(name: 'collated_versions.yml')
- )
-
- //
- // MODULE: MultiQC
- //
- if (!params.skip_multiqc) {
- workflow_summary = WorkflowRnaseq.paramsSummaryMultiqc(workflow, summary_params)
- ch_workflow_summary = Channel.value(workflow_summary)
-
- methods_description = WorkflowRnaseq.methodsDescriptionText(workflow, ch_multiqc_custom_methods_description, params)
- ch_methods_description = Channel.value(methods_description)
-
- MULTIQC (
- ch_multiqc_config,
- ch_multiqc_custom_config.collect().ifEmpty([]),
- CUSTOM_DUMPSOFTWAREVERSIONS.out.mqc_yml.collect(),
- ch_workflow_summary.collectFile(name: 'workflow_summary_mqc.yaml'),
- ch_methods_description.collectFile(name: 'methods_description_mqc.yaml'),
- ch_multiqc_logo.collect().ifEmpty([]),
- ch_fail_trimming_multiqc.collectFile(name: 'fail_trimmed_samples_mqc.tsv').ifEmpty([]),
- ch_fail_mapping_multiqc.collectFile(name: 'fail_mapped_samples_mqc.tsv').ifEmpty([]),
- ch_fail_strand_multiqc.collectFile(name: 'fail_strand_check_mqc.tsv').ifEmpty([]),
- ch_fastqc_raw_multiqc.collect{it[1]}.ifEmpty([]),
- ch_fastqc_trim_multiqc.collect{it[1]}.ifEmpty([]),
- ch_trim_log_multiqc.collect{it[1]}.ifEmpty([]),
- ch_sortmerna_multiqc.collect{it[1]}.ifEmpty([]),
- ch_star_multiqc.collect{it[1]}.ifEmpty([]),
- ch_hisat2_multiqc.collect{it[1]}.ifEmpty([]),
- ch_rsem_multiqc.collect{it[1]}.ifEmpty([]),
- ch_pseudo_multiqc.collect{it[1]}.ifEmpty([]),
- ch_samtools_stats.collect{it[1]}.ifEmpty([]),
- ch_samtools_flagstat.collect{it[1]}.ifEmpty([]),
- ch_samtools_idxstats.collect{it[1]}.ifEmpty([]),
- ch_markduplicates_multiqc.collect{it[1]}.ifEmpty([]),
- ch_featurecounts_multiqc.collect{it[1]}.ifEmpty([]),
- ch_aligner_pca_multiqc.collect().ifEmpty([]),
- ch_aligner_clustering_multiqc.collect().ifEmpty([]),
- ch_pseudoaligner_pca_multiqc.collect().ifEmpty([]),
- ch_pseudoaligner_clustering_multiqc.collect().ifEmpty([]),
- ch_preseq_multiqc.collect{it[1]}.ifEmpty([]),
- ch_qualimap_multiqc.collect{it[1]}.ifEmpty([]),
- ch_dupradar_multiqc.collect{it[1]}.ifEmpty([]),
- ch_bamstat_multiqc.collect{it[1]}.ifEmpty([]),
- ch_inferexperiment_multiqc.collect{it[1]}.ifEmpty([]),
- ch_innerdistance_multiqc.collect{it[1]}.ifEmpty([]),
- ch_junctionannotation_multiqc.collect{it[1]}.ifEmpty([]),
- ch_junctionsaturation_multiqc.collect{it[1]}.ifEmpty([]),
- ch_readdistribution_multiqc.collect{it[1]}.ifEmpty([]),
- ch_readduplication_multiqc.collect{it[1]}.ifEmpty([]),
- ch_tin_multiqc.collect{it[1]}.ifEmpty([])
- )
- multiqc_report = MULTIQC.out.report.toList()
- }
-}
-
-/*
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
- COMPLETION EMAIL AND SUMMARY
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-*/
-
-workflow.onComplete {
- if (params.email || params.email_on_fail) {
- NfcoreTemplate.email(workflow, params, summary_params, projectDir, log, multiqc_report, pass_mapped_reads, pass_trimmed_reads, pass_strand_check)
- }
-
- NfcoreTemplate.dump_parameters(workflow, params)
- NfcoreTemplate.summary(workflow, params, log, pass_mapped_reads, pass_trimmed_reads, pass_strand_check)
-
- if (params.hook_url) {
- NfcoreTemplate.IM_notification(workflow, params, summary_params, projectDir, log)
- }
-}
-
-/*
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
- THE END
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-*/
diff --git a/assets/multiqc/biotypes_header.txt b/workflows/rnaseq/assets/multiqc/biotypes_header.txt
similarity index 100%
rename from assets/multiqc/biotypes_header.txt
rename to workflows/rnaseq/assets/multiqc/biotypes_header.txt
diff --git a/assets/multiqc/deseq2_clustering_header.txt b/workflows/rnaseq/assets/multiqc/deseq2_clustering_header.txt
similarity index 100%
rename from assets/multiqc/deseq2_clustering_header.txt
rename to workflows/rnaseq/assets/multiqc/deseq2_clustering_header.txt
diff --git a/assets/multiqc/deseq2_pca_header.txt b/workflows/rnaseq/assets/multiqc/deseq2_pca_header.txt
similarity index 100%
rename from assets/multiqc/deseq2_pca_header.txt
rename to workflows/rnaseq/assets/multiqc/deseq2_pca_header.txt
diff --git a/assets/methods_description_template.yml b/workflows/rnaseq/assets/multiqc/methods_description_template.yml
similarity index 100%
rename from assets/methods_description_template.yml
rename to workflows/rnaseq/assets/multiqc/methods_description_template.yml
diff --git a/assets/multiqc_config.yml b/workflows/rnaseq/assets/multiqc/multiqc_config.yml
similarity index 100%
rename from assets/multiqc_config.yml
rename to workflows/rnaseq/assets/multiqc/multiqc_config.yml
diff --git a/assets/rrna-db-defaults.txt b/workflows/rnaseq/assets/rrna-db-defaults.txt
similarity index 100%
rename from assets/rrna-db-defaults.txt
rename to workflows/rnaseq/assets/rrna-db-defaults.txt
diff --git a/workflows/rnaseq/main.nf b/workflows/rnaseq/main.nf
new file mode 100755
index 000000000..e19ccbbf0
--- /dev/null
+++ b/workflows/rnaseq/main.nf
@@ -0,0 +1,773 @@
+/*
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ IMPORT LOCAL MODULES/SUBWORKFLOWS
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+*/
+
+//
+// MODULE: Loaded from modules/local/
+//
+include { BEDTOOLS_GENOMECOV } from '../../modules/local/bedtools_genomecov'
+include { DESEQ2_QC as DESEQ2_QC_STAR_SALMON } from '../../modules/local/deseq2_qc'
+include { DESEQ2_QC as DESEQ2_QC_RSEM } from '../../modules/local/deseq2_qc'
+include { DESEQ2_QC as DESEQ2_QC_PSEUDO } from '../../modules/local/deseq2_qc'
+include { DUPRADAR } from '../../modules/local/dupradar'
+include { MULTIQC_CUSTOM_BIOTYPE } from '../../modules/local/multiqc_custom_biotype'
+include { UMITOOLS_PREPAREFORRSEM as UMITOOLS_PREPAREFORSALMON } from '../../modules/local/umitools_prepareforrsem'
+
+//
+// SUBWORKFLOW: Consisting of a mix of local and nf-core/modules
+//
+include { ALIGN_STAR } from '../../subworkflows/local/align_star'
+include { QUANTIFY_RSEM } from '../../subworkflows/local/quantify_rsem'
+include { QUANTIFY_PSEUDO_ALIGNMENT as QUANTIFY_STAR_SALMON } from '../../subworkflows/local/quantify_pseudo'
+include { QUANTIFY_PSEUDO_ALIGNMENT } from '../../subworkflows/local/quantify_pseudo'
+
+include { multiqcTsvFromList } from '../../subworkflows/local/utils_nfcore_rnaseq_pipeline'
+include { getSalmonInferredStrandedness } from '../../subworkflows/local/utils_nfcore_rnaseq_pipeline'
+include { getStarPercentMapped } from '../../subworkflows/local/utils_nfcore_rnaseq_pipeline'
+include { biotypeInGtf } from '../../subworkflows/local/utils_nfcore_rnaseq_pipeline'
+include { getInferexperimentStrandedness } from '../../subworkflows/local/utils_nfcore_rnaseq_pipeline'
+
+/*
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ IMPORT NF-CORE MODULES/SUBWORKFLOWS
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+*/
+
+//
+// MODULE: Installed directly from nf-core/modules
+//
+include { CAT_FASTQ } from '../../modules/nf-core/cat/fastq'
+include { BBMAP_BBSPLIT } from '../../modules/nf-core/bbmap/bbsplit'
+include { SAMTOOLS_SORT } from '../../modules/nf-core/samtools/sort'
+include { PRESEQ_LCEXTRAP } from '../../modules/nf-core/preseq/lcextrap'
+include { QUALIMAP_RNASEQ } from '../../modules/nf-core/qualimap/rnaseq'
+include { SORTMERNA } from '../../modules/nf-core/sortmerna'
+include { STRINGTIE_STRINGTIE } from '../../modules/nf-core/stringtie/stringtie'
+include { SUBREAD_FEATURECOUNTS } from '../../modules/nf-core/subread/featurecounts'
+include { MULTIQC } from '../../modules/nf-core/multiqc'
+
+//
+// SUBWORKFLOW: Consisting entirely of nf-core/modules
+//
+include { paramsSummaryMap } from 'plugin/nf-validation'
+include { paramsSummaryMultiqc } from '../../subworkflows/nf-core/utils_nfcore_pipeline'
+include { softwareVersionsToYAML } from '../../subworkflows/nf-core/utils_nfcore_pipeline'
+include { FASTQ_SUBSAMPLE_FQ_SALMON } from '../../subworkflows/nf-core/fastq_subsample_fq_salmon'
+include { FASTQ_FASTQC_UMITOOLS_TRIMGALORE } from '../../subworkflows/nf-core/fastq_fastqc_umitools_trimgalore'
+include { FASTQ_FASTQC_UMITOOLS_FASTP } from '../../subworkflows/nf-core/fastq_fastqc_umitools_fastp'
+include { FASTQ_ALIGN_HISAT2 } from '../../subworkflows/nf-core/fastq_align_hisat2'
+include { BAM_SORT_STATS_SAMTOOLS } from '../../subworkflows/nf-core/bam_sort_stats_samtools'
+include { BAM_MARKDUPLICATES_PICARD } from '../../subworkflows/nf-core/bam_markduplicates_picard'
+include { BAM_RSEQC } from '../../subworkflows/nf-core/bam_rseqc'
+include { BAM_DEDUP_STATS_SAMTOOLS_UMITOOLS as BAM_DEDUP_STATS_SAMTOOLS_UMITOOLS_GENOME } from '../../subworkflows/nf-core/bam_dedup_stats_samtools_umitools'
+include { BAM_DEDUP_STATS_SAMTOOLS_UMITOOLS as BAM_DEDUP_STATS_SAMTOOLS_UMITOOLS_TRANSCRIPTOME } from '../../subworkflows/nf-core/bam_dedup_stats_samtools_umitools'
+include { BEDGRAPH_BEDCLIP_BEDGRAPHTOBIGWIG as BEDGRAPH_BEDCLIP_BEDGRAPHTOBIGWIG_FORWARD } from '../../subworkflows/nf-core/bedgraph_bedclip_bedgraphtobigwig'
+include { BEDGRAPH_BEDCLIP_BEDGRAPHTOBIGWIG as BEDGRAPH_BEDCLIP_BEDGRAPHTOBIGWIG_REVERSE } from '../../subworkflows/nf-core/bedgraph_bedclip_bedgraphtobigwig'
+
+/*
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ RUN MAIN WORKFLOW
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+*/
+
+// Header files for MultiQC
+ch_pca_header_multiqc = file("$projectDir/workflows/rnaseq/assets/multiqc/deseq2_pca_header.txt", checkIfExists: true)
+ch_clustering_header_multiqc = file("$projectDir/workflows/rnaseq/assets/multiqc/deseq2_clustering_header.txt", checkIfExists: true)
+ch_biotypes_header_multiqc = file("$projectDir/workflows/rnaseq/assets/multiqc/biotypes_header.txt", checkIfExists: true)
+ch_dummy_file = ch_pca_header_multiqc
+
+workflow NFCORE_RNASEQ {
+
+ take:
+ ch_samplesheet // channel: samplesheet read in from --input
+ ch_versions // channel: [ path(versions.yml) ]
+ ch_fasta // channel: path(genome.fasta)
+ ch_gtf // channel: path(genome.gtf)
+ ch_fai // channel: path(genome.fai)
+ ch_chrom_sizes // channel: path(genome.sizes)
+ ch_gene_bed // channel: path(gene.bed)
+ ch_transcript_fasta // channel: path(transcript.fasta)
+ ch_star_index // channel: path(star/index/)
+ ch_rsem_index // channel: path(rsem/index/)
+ ch_hisat2_index // channel: path(hisat2/index/)
+ ch_salmon_index // channel: path(salmon/index/)
+ ch_kallisto_index // channel: [ meta, path(kallisto/index/) ]
+ ch_bbsplit_index // channel: path(bbsplit/index/)
+ ch_splicesites // channel: path(genome.splicesites.txt)
+
+ main:
+
+ ch_multiqc_files = Channel.empty()
+
+ //
+ // Create separate channels for samples that have single/multiple FastQ files to merge
+ //
+ ch_samplesheet
+ .branch {
+ meta, fastqs ->
+ single : fastqs.size() == 1
+ return [ meta, fastqs.flatten() ]
+ multiple: fastqs.size() > 1
+ return [ meta, fastqs.flatten() ]
+ }
+ .set { ch_fastq }
+
+ //
+ // MODULE: Concatenate FastQ files from same sample if required
+ //
+ CAT_FASTQ (
+ ch_fastq.multiple
+ )
+ .reads
+ .mix(ch_fastq.single)
+ .set { ch_cat_fastq }
+ ch_versions = ch_versions.mix(CAT_FASTQ.out.versions.first().ifEmpty(null))
+
+ //
+ // SUBWORKFLOW: Read QC, extract UMI and trim adapters with TrimGalore!
+ //
+ ch_filtered_reads = Channel.empty()
+ ch_trim_read_count = Channel.empty()
+ if (params.trimmer == 'trimgalore') {
+ FASTQ_FASTQC_UMITOOLS_TRIMGALORE (
+ ch_cat_fastq,
+ params.skip_fastqc || params.skip_qc,
+ params.with_umi,
+ params.skip_umi_extract,
+ params.skip_trimming,
+ params.umi_discard_read,
+ params.min_trimmed_reads
+ )
+ ch_filtered_reads = FASTQ_FASTQC_UMITOOLS_TRIMGALORE.out.reads
+ ch_trim_read_count = FASTQ_FASTQC_UMITOOLS_TRIMGALORE.out.trim_read_count
+ ch_multiqc_files = ch_multiqc_files.mix(FASTQ_FASTQC_UMITOOLS_TRIMGALORE.out.fastqc_zip.collect{it[1]})
+ ch_multiqc_files = ch_multiqc_files.mix(FASTQ_FASTQC_UMITOOLS_TRIMGALORE.out.trim_zip.collect{it[1]})
+ ch_multiqc_files = ch_multiqc_files.mix(FASTQ_FASTQC_UMITOOLS_TRIMGALORE.out.trim_log.collect{it[1]})
+ ch_versions = ch_versions.mix(FASTQ_FASTQC_UMITOOLS_TRIMGALORE.out.versions)
+ }
+
+ //
+ // SUBWORKFLOW: Read QC, extract UMI and trim adapters with fastp
+ //
+ if (params.trimmer == 'fastp') {
+ FASTQ_FASTQC_UMITOOLS_FASTP (
+ ch_cat_fastq,
+ params.skip_fastqc || params.skip_qc,
+ params.with_umi,
+ params.skip_umi_extract,
+ params.umi_discard_read,
+ params.skip_trimming,
+ [],
+ params.save_trimmed,
+ params.save_trimmed,
+ params.min_trimmed_reads
+ )
+ ch_filtered_reads = FASTQ_FASTQC_UMITOOLS_FASTP.out.reads
+ ch_trim_read_count = FASTQ_FASTQC_UMITOOLS_FASTP.out.trim_read_count
+ ch_multiqc_files = ch_multiqc_files.mix(FASTQ_FASTQC_UMITOOLS_FASTP.out.fastqc_raw_zip.collect{it[1]})
+ ch_multiqc_files = ch_multiqc_files.mix(FASTQ_FASTQC_UMITOOLS_FASTP.out.fastqc_trim_zip.collect{it[1]})
+ ch_multiqc_files = ch_multiqc_files.mix(FASTQ_FASTQC_UMITOOLS_FASTP.out.trim_json.collect{it[1]})
+ ch_versions = ch_versions.mix(FASTQ_FASTQC_UMITOOLS_FASTP.out.versions)
+ }
+
+ //
+ // Get list of samples that failed trimming threshold for MultiQC report
+ //
+ ch_trim_read_count
+ .map {
+ meta, num_reads ->
+ if (num_reads <= params.min_trimmed_reads.toFloat()) {
+ return [ "$meta.id\t$num_reads" ]
+ }
+ }
+ .collect()
+ .map {
+ tsv_data ->
+ def header = ["Sample", "Reads after trimming"]
+ multiqcTsvFromList(tsv_data, header)
+ }
+ .set { ch_fail_trimming_multiqc }
+ ch_multiqc_files = ch_multiqc_files.mix(ch_fail_trimming_multiqc.collectFile(name: 'fail_trimmed_samples_mqc.tsv'))
+
+ //
+ // MODULE: Remove genome contaminant reads
+ //
+ if (!params.skip_bbsplit) {
+ BBMAP_BBSPLIT (
+ ch_filtered_reads,
+ ch_bbsplit_index,
+ [],
+ [ [], [] ],
+ false
+ )
+ .primary_fastq
+ .set { ch_filtered_reads }
+ ch_versions = ch_versions.mix(BBMAP_BBSPLIT.out.versions.first())
+ }
+
+ //
+ // MODULE: Remove ribosomal RNA reads
+ //
+ if (params.remove_ribo_rna) {
+ ch_ribo_db = file(params.ribo_database_manifest)
+ ch_sortmerna_fastas = Channel.from(ch_ribo_db.readLines()).map { row -> file(row, checkIfExists: true) }.collect()
+
+ SORTMERNA (
+ ch_filtered_reads,
+ ch_sortmerna_fastas
+ )
+ .reads
+ .set { ch_filtered_reads }
+
+ ch_multiqc_files = ch_multiqc_files.mix(SORTMERNA.out.log.collect{it[1]})
+ ch_versions = ch_versions.mix(SORTMERNA.out.versions.first())
+ }
+
+ //
+ // SUBWORKFLOW: Sub-sample FastQ files and pseudoalign with Salmon to auto-infer strandedness
+ //
+
+ // Branch FastQ channels if 'auto' specified to infer strandedness
+ ch_filtered_reads
+ .branch {
+ meta, fastq ->
+ auto_strand : meta.strandedness == 'auto'
+ return [ meta, fastq ]
+ known_strand: meta.strandedness != 'auto'
+ return [ meta, fastq ]
+ }
+ .set { ch_strand_fastq }
+
+ // Return empty channel if ch_strand_fastq.auto_strand is empty so salmon index isn't created
+ ch_fasta
+ .combine(ch_strand_fastq.auto_strand)
+ .map { it.first() }
+ .first()
+ .set { ch_genome_fasta }
+
+ def prepare_tool_indices = []
+ if (!params.skip_pseudo_alignment && params.pseudo_aligner) {
+ prepare_tool_indices << params.pseudo_aligner
+ }
+ FASTQ_SUBSAMPLE_FQ_SALMON (
+ ch_strand_fastq.auto_strand,
+ ch_genome_fasta,
+ ch_transcript_fasta,
+ ch_gtf,
+ ch_salmon_index,
+ !params.salmon_index && !('salmon' in prepare_tool_indices)
+ )
+ ch_versions = ch_versions.mix(FASTQ_SUBSAMPLE_FQ_SALMON.out.versions)
+
+ FASTQ_SUBSAMPLE_FQ_SALMON
+ .out
+ .json_info
+ .join(ch_strand_fastq.auto_strand)
+ .map { meta, json, reads ->
+ return [ meta + [ strandedness: getSalmonInferredStrandedness(json) ], reads ]
+ }
+ .mix(ch_strand_fastq.known_strand)
+ .set { ch_strand_inferred_filtered_fastq }
+
+ //
+ // SUBWORKFLOW: Alignment with STAR and gene/transcript quantification with Salmon
+ //
+ ch_genome_bam = Channel.empty()
+ ch_genome_bam_index = Channel.empty()
+ ch_star_log = Channel.empty()
+ if (!params.skip_alignment && params.aligner == 'star_salmon') {
+ // Check if an AWS iGenome has been provided to use the appropriate version of STAR
+ def is_aws_igenome = false
+ if (params.fasta && params.gtf) {
+ if ((file(params.fasta).getName() - '.gz' == 'genome.fa') && (file(params.gtf).getName() - '.gz' == 'genes.gtf')) {
+ is_aws_igenome = true
+ }
+ }
+
+ ALIGN_STAR (
+ ch_strand_inferred_filtered_fastq,
+ ch_star_index.map { [ [:], it ] },
+ ch_gtf.map { [ [:], it ] },
+ params.star_ignore_sjdbgtf,
+ '',
+ params.seq_center ?: '',
+ is_aws_igenome,
+ ch_fasta.map { [ [:], it ] }
+ )
+ ch_genome_bam = ALIGN_STAR.out.bam
+ ch_genome_bam_index = ALIGN_STAR.out.bai
+ ch_transcriptome_bam = ALIGN_STAR.out.bam_transcript
+ ch_star_log = ALIGN_STAR.out.log_final
+ ch_multiqc_files = ch_multiqc_files.mix(ALIGN_STAR.out.stats.collect{it[1]})
+ ch_multiqc_files = ch_multiqc_files.mix(ALIGN_STAR.out.flagstat.collect{it[1]})
+ ch_multiqc_files = ch_multiqc_files.mix(ALIGN_STAR.out.idxstats.collect{it[1]})
+ ch_multiqc_files = ch_multiqc_files.mix(ch_star_log.collect{it[1]})
+
+ if (params.bam_csi_index) {
+ ch_genome_bam_index = ALIGN_STAR.out.csi
+ }
+ ch_versions = ch_versions.mix(ALIGN_STAR.out.versions)
+
+ //
+ // SUBWORKFLOW: Remove duplicate reads from BAM file based on UMIs
+ //
+ if (params.with_umi) {
+ // Deduplicate genome BAM file before downstream analysis
+ BAM_DEDUP_STATS_SAMTOOLS_UMITOOLS_GENOME (
+ ch_genome_bam.join(ch_genome_bam_index, by: [0]),
+ params.umitools_dedup_stats
+ )
+ ch_genome_bam = BAM_DEDUP_STATS_SAMTOOLS_UMITOOLS_GENOME.out.bam
+ ch_genome_bam_index = BAM_DEDUP_STATS_SAMTOOLS_UMITOOLS_GENOME.out.bai
+ ch_multiqc_files = ch_multiqc_files.mix(BAM_DEDUP_STATS_SAMTOOLS_UMITOOLS_GENOME.out.stats.collect{it[1]})
+ ch_multiqc_files = ch_multiqc_files.mix(BAM_DEDUP_STATS_SAMTOOLS_UMITOOLS_GENOME.out.flagstat.collect{it[1]})
+ ch_multiqc_files = ch_multiqc_files.mix(BAM_DEDUP_STATS_SAMTOOLS_UMITOOLS_GENOME.out.idxstats.collect{it[1]})
+
+ if (params.bam_csi_index) {
+ ch_genome_bam_index = BAM_DEDUP_STATS_SAMTOOLS_UMITOOLS_GENOME.out.csi
+ }
+ ch_versions = ch_versions.mix(BAM_DEDUP_STATS_SAMTOOLS_UMITOOLS_GENOME.out.versions)
+
+ // Co-ordinate sort, index and run stats on transcriptome BAM
+ BAM_SORT_STATS_SAMTOOLS (
+ ch_transcriptome_bam,
+ ch_fasta.map { [ [:], it ] }
+ )
+ ch_transcriptome_sorted_bam = BAM_SORT_STATS_SAMTOOLS.out.bam
+ ch_transcriptome_sorted_bai = BAM_SORT_STATS_SAMTOOLS.out.bai
+
+ // Deduplicate transcriptome BAM file before read counting with Salmon
+ BAM_DEDUP_STATS_SAMTOOLS_UMITOOLS_TRANSCRIPTOME (
+ ch_transcriptome_sorted_bam.join(ch_transcriptome_sorted_bai, by: [0]),
+ params.umitools_dedup_stats
+ )
+
+ // Name sort BAM before passing to Salmon
+ SAMTOOLS_SORT (
+ BAM_DEDUP_STATS_SAMTOOLS_UMITOOLS_TRANSCRIPTOME.out.bam
+ )
+
+ // Only run prepare_for_rsem.py on paired-end BAM files
+ SAMTOOLS_SORT
+ .out
+ .bam
+ .branch {
+ meta, bam ->
+ single_end: meta.single_end
+ return [ meta, bam ]
+ paired_end: !meta.single_end
+ return [ meta, bam ]
+ }
+ .set { ch_umitools_dedup_bam }
+
+ // Fix paired-end reads in name sorted BAM file
+ // See: https://github.com/nf-core/rnaseq/issues/828
+ UMITOOLS_PREPAREFORSALMON (
+ ch_umitools_dedup_bam.paired_end
+ )
+ ch_versions = ch_versions.mix(UMITOOLS_PREPAREFORSALMON.out.versions.first())
+
+ ch_umitools_dedup_bam
+ .single_end
+ .mix(UMITOOLS_PREPAREFORSALMON.out.bam)
+ .set { ch_transcriptome_bam }
+ }
+
+ //
+ // SUBWORKFLOW: Count reads from BAM alignments using Salmon
+ //
+ QUANTIFY_STAR_SALMON (
+ ch_transcriptome_bam,
+ ch_dummy_file,
+ ch_transcript_fasta,
+ ch_gtf,
+ 'salmon',
+ true,
+ params.salmon_quant_libtype ?: '',
+ params.kallisto_quant_fraglen,
+ params.kallisto_quant_fraglen_sd
+ )
+ ch_versions = ch_versions.mix(QUANTIFY_STAR_SALMON.out.versions)
+
+ if (!params.skip_qc & !params.skip_deseq2_qc) {
+ DESEQ2_QC_STAR_SALMON (
+ QUANTIFY_STAR_SALMON.out.counts_gene_length_scaled,
+ ch_pca_header_multiqc,
+ ch_clustering_header_multiqc
+ )
+ ch_multiqc_files = ch_multiqc_files.mix(DESEQ2_QC_STAR_SALMON.out.pca_multiqc.collect())
+ ch_multiqc_files = ch_multiqc_files.mix(DESEQ2_QC_STAR_SALMON.out.dists_multiqc.collect())
+ ch_versions = ch_versions.mix(DESEQ2_QC_STAR_SALMON.out.versions)
+ }
+ }
+
+ //
+ // SUBWORKFLOW: Alignment with STAR and gene/transcript quantification with RSEM
+ //
+ if (!params.skip_alignment && params.aligner == 'star_rsem') {
+ QUANTIFY_RSEM (
+ ch_strand_inferred_filtered_fastq,
+ ch_rsem_index,
+ ch_fasta.map { [ [:], it ] }
+ )
+ ch_genome_bam = QUANTIFY_RSEM.out.bam
+ ch_genome_bam_index = QUANTIFY_RSEM.out.bai
+ ch_star_log = QUANTIFY_RSEM.out.logs
+ ch_multiqc_files = ch_multiqc_files.mix(QUANTIFY_RSEM.out.stats.collect{it[1]})
+ ch_multiqc_files = ch_multiqc_files.mix(QUANTIFY_RSEM.out.flagstat.collect{it[1]})
+ ch_multiqc_files = ch_multiqc_files.mix(QUANTIFY_RSEM.out.idxstats.collect{it[1]})
+ ch_multiqc_files = ch_multiqc_files.mix(ch_star_log.collect{it[1]})
+ ch_multiqc_files = ch_multiqc_files.mix(QUANTIFY_RSEM.out.stat.collect{it[1]})
+
+ if (params.bam_csi_index) {
+ ch_genome_bam_index = QUANTIFY_RSEM.out.csi
+ }
+ ch_versions = ch_versions.mix(QUANTIFY_RSEM.out.versions)
+
+ if (!params.skip_qc & !params.skip_deseq2_qc) {
+ DESEQ2_QC_RSEM (
+ QUANTIFY_RSEM.out.merged_counts_gene,
+ ch_pca_header_multiqc,
+ ch_clustering_header_multiqc
+ )
+ ch_multiqc_files = ch_multiqc_files.mix(DESEQ2_QC_RSEM.out.pca_multiqc.collect())
+ ch_multiqc_files = ch_multiqc_files.mix(DESEQ2_QC_RSEM.out.dists_multiqc.collect())
+ ch_versions = ch_versions.mix(DESEQ2_QC_RSEM.out.versions)
+ }
+ }
+
+ //
+ // SUBWORKFLOW: Alignment with HISAT2
+ //
+ if (!params.skip_alignment && params.aligner == 'hisat2') {
+ FASTQ_ALIGN_HISAT2 (
+ ch_strand_inferred_filtered_fastq,
+ ch_hisat2_index.map { [ [:], it ] },
+ ch_splicesites.map { [ [:], it ] },
+ ch_fasta.map { [ [:], it ] }
+ )
+ ch_genome_bam = FASTQ_ALIGN_HISAT2.out.bam
+ ch_genome_bam_index = FASTQ_ALIGN_HISAT2.out.bai
+ ch_multiqc_files = ch_multiqc_files.mix(FASTQ_ALIGN_HISAT2.out.stats.collect{it[1]})
+ ch_multiqc_files = ch_multiqc_files.mix(FASTQ_ALIGN_HISAT2.out.flagstat.collect{it[1]})
+ ch_multiqc_files = ch_multiqc_files.mix(FASTQ_ALIGN_HISAT2.out.idxstats.collect{it[1]})
+ ch_multiqc_files = ch_multiqc_files.mix(FASTQ_ALIGN_HISAT2.out.summary.collect{it[1]})
+
+ if (params.bam_csi_index) {
+ ch_genome_bam_index = FASTQ_ALIGN_HISAT2.out.csi
+ }
+ ch_versions = ch_versions.mix(FASTQ_ALIGN_HISAT2.out.versions)
+
+ //
+ // SUBWORKFLOW: Remove duplicate reads from BAM file based on UMIs
+ //
+ if (params.with_umi) {
+ BAM_DEDUP_STATS_SAMTOOLS_UMITOOLS_GENOME (
+ ch_genome_bam.join(ch_genome_bam_index, by: [0]),
+ params.umitools_dedup_stats
+ )
+ ch_genome_bam = BAM_DEDUP_STATS_SAMTOOLS_UMITOOLS_GENOME.out.bam
+ ch_genome_bam_index = BAM_DEDUP_STATS_SAMTOOLS_UMITOOLS_GENOME.out.bai
+ ch_multiqc_files = ch_multiqc_files.mix(BAM_DEDUP_STATS_SAMTOOLS_UMITOOLS_GENOME.out.stats.collect{it[1]})
+ ch_multiqc_files = ch_multiqc_files.mix(BAM_DEDUP_STATS_SAMTOOLS_UMITOOLS_GENOME.out.flagstat.collect{it[1]})
+ ch_multiqc_files = ch_multiqc_files.mix(BAM_DEDUP_STATS_SAMTOOLS_UMITOOLS_GENOME.out.idxstats.collect{it[1]})
+ if (params.bam_csi_index) {
+ ch_genome_bam_index = BAM_DEDUP_STATS_SAMTOOLS_UMITOOLS_GENOME.out.csi
+ }
+ ch_versions = ch_versions.mix(BAM_DEDUP_STATS_SAMTOOLS_UMITOOLS_GENOME.out.versions)
+ }
+ }
+
+ //
+ // Filter channels to get samples that passed STAR minimum mapping percentage
+ //
+ if (!params.skip_alignment && params.aligner.contains('star')) {
+ ch_star_log
+ .map { meta, align_log -> [ meta ] + getStarPercentMapped(params, align_log) }
+ .set { ch_percent_mapped }
+
+ ch_genome_bam
+ .join(ch_percent_mapped, by: [0])
+ .map { meta, ofile, mapped, pass -> if (pass) [ meta, ofile ] }
+ .set { ch_genome_bam }
+
+ ch_genome_bam_index
+ .join(ch_percent_mapped, by: [0])
+ .map { meta, ofile, mapped, pass -> if (pass) [ meta, ofile ] }
+ .set { ch_genome_bam_index }
+
+ ch_percent_mapped
+ .branch { meta, mapped, pass ->
+ pass: pass
+ return [ "$meta.id\t$mapped" ]
+ fail: !pass
+ return [ "$meta.id\t$mapped" ]
+ }
+ .set { ch_pass_fail_mapped }
+
+ ch_pass_fail_mapped
+ .fail
+ .collect()
+ .map {
+ tsv_data ->
+ def header = ["Sample", "STAR uniquely mapped reads (%)"]
+ multiqcTsvFromList(tsv_data, header)
+ }
+ .set { ch_fail_mapping_multiqc }
+ ch_multiqc_files = ch_multiqc_files.mix(ch_fail_mapping_multiqc.collectFile(name: 'fail_mapped_samples_mqc.tsv'))
+ }
+
+ //
+ // MODULE: Run Preseq
+ //
+ if (!params.skip_alignment && !params.skip_qc && !params.skip_preseq) {
+ PRESEQ_LCEXTRAP (
+ ch_genome_bam
+ )
+ ch_multiqc_files = ch_multiqc_files.mix(PRESEQ_LCEXTRAP.out.lc_extrap.collect{it[1]})
+ ch_versions = ch_versions.mix(PRESEQ_LCEXTRAP.out.versions.first())
+ }
+
+ //
+ // SUBWORKFLOW: Mark duplicate reads
+ //
+ if (!params.skip_alignment && !params.skip_markduplicates && !params.with_umi) {
+ BAM_MARKDUPLICATES_PICARD (
+ ch_genome_bam,
+ ch_fasta.map { [ [:], it ] },
+ ch_fai.map { [ [:], it ] }
+ )
+ ch_genome_bam = BAM_MARKDUPLICATES_PICARD.out.bam
+ ch_genome_bam_index = BAM_MARKDUPLICATES_PICARD.out.bai
+ ch_multiqc_files = ch_multiqc_files.mix(BAM_MARKDUPLICATES_PICARD.out.stats.collect{it[1]})
+ ch_multiqc_files = ch_multiqc_files.mix(BAM_MARKDUPLICATES_PICARD.out.flagstat.collect{it[1]})
+ ch_multiqc_files = ch_multiqc_files.mix(BAM_MARKDUPLICATES_PICARD.out.idxstats.collect{it[1]})
+ ch_multiqc_files = ch_multiqc_files.mix(BAM_MARKDUPLICATES_PICARD.out.metrics.collect{it[1]})
+
+ if (params.bam_csi_index) {
+ ch_genome_bam_index = BAM_MARKDUPLICATES_PICARD.out.csi
+ }
+ ch_versions = ch_versions.mix(BAM_MARKDUPLICATES_PICARD.out.versions)
+ }
+
+ //
+ // MODULE: STRINGTIE
+ //
+ if (!params.skip_alignment && !params.skip_stringtie) {
+ STRINGTIE_STRINGTIE (
+ ch_genome_bam,
+ ch_gtf
+ )
+ ch_versions = ch_versions.mix(STRINGTIE_STRINGTIE.out.versions.first())
+ }
+
+ //
+ // MODULE: Feature biotype QC using featureCounts
+ //
+ def biotype = params.gencode ? "gene_type" : params.featurecounts_group_type
+ if (!params.skip_alignment && !params.skip_qc && !params.skip_biotype_qc && biotype) {
+
+ ch_gtf
+ .map { biotypeInGtf(it, biotype) }
+ .set { biotype_in_gtf }
+
+ // Prevent any samples from running if GTF file doesn't have a valid biotype
+ ch_genome_bam
+ .combine(ch_gtf)
+ .combine(biotype_in_gtf)
+ .filter { it[-1] }
+ .map { it[0.. 0) {
+ BAM_RSEQC (
+ ch_genome_bam.join(ch_genome_bam_index, by: [0]),
+ ch_gene_bed,
+ rseqc_modules
+ )
+ ch_multiqc_files = ch_multiqc_files.mix(BAM_RSEQC.out.bamstat_txt.collect{it[1]})
+ ch_multiqc_files = ch_multiqc_files.mix(BAM_RSEQC.out.inferexperiment_txt.collect{it[1]})
+ ch_multiqc_files = ch_multiqc_files.mix(BAM_RSEQC.out.innerdistance_freq.collect{it[1]})
+ ch_multiqc_files = ch_multiqc_files.mix(BAM_RSEQC.out.junctionannotation_log.collect{it[1]})
+ ch_multiqc_files = ch_multiqc_files.mix(BAM_RSEQC.out.junctionsaturation_rscript.collect{it[1]})
+ ch_multiqc_files = ch_multiqc_files.mix(BAM_RSEQC.out.readdistribution_txt.collect{it[1]})
+ ch_multiqc_files = ch_multiqc_files.mix(BAM_RSEQC.out.readduplication_pos_xls.collect{it[1]})
+ ch_multiqc_files = ch_multiqc_files.mix(BAM_RSEQC.out.tin_txt.collect{it[1]})
+ ch_versions = ch_versions.mix(BAM_RSEQC.out.versions)
+
+ BAM_RSEQC
+ .out
+ .inferexperiment_txt
+ .map {
+ meta, strand_log ->
+ def inferred_strand = getInferexperimentStrandedness(strand_log, 30)
+ if (meta.strandedness != inferred_strand[0]) {
+ return [ "$meta.id\t$meta.strandedness\t${inferred_strand.join('\t')}" ]
+ }
+ }
+ .collect()
+ .map {
+ tsv_data ->
+ def header = [
+ "Sample",
+ "Provided strandedness",
+ "Inferred strandedness",
+ "Sense (%)",
+ "Antisense (%)",
+ "Undetermined (%)"
+ ]
+ multiqcTsvFromList(tsv_data, header)
+ }
+ .set { ch_fail_strand_multiqc }
+ ch_multiqc_files = ch_multiqc_files.mix(ch_fail_strand_multiqc.collectFile(name: 'fail_strand_check_mqc.tsv'))
+ }
+ }
+
+ //
+ // SUBWORKFLOW: Pseudoalignment and quantification with Salmon
+ //
+ if (!params.skip_pseudo_alignment && params.pseudo_aligner) {
+
+ if (params.pseudo_aligner == 'salmon') {
+ ch_pseudo_index = ch_salmon_index
+ } else {
+ ch_pseudo_index = ch_kallisto_index
+ }
+
+ QUANTIFY_PSEUDO_ALIGNMENT (
+ ch_strand_inferred_filtered_fastq,
+ ch_pseudo_index,
+ ch_dummy_file,
+ ch_gtf,
+ params.pseudo_aligner,
+ false,
+ params.salmon_quant_libtype ?: '',
+ params.kallisto_quant_fraglen,
+ params.kallisto_quant_fraglen_sd
+ )
+ ch_counts_gene_length_scaled = QUANTIFY_PSEUDO_ALIGNMENT.out.counts_gene_length_scaled
+ ch_multiqc_files = ch_multiqc_files.mix(QUANTIFY_PSEUDO_ALIGNMENT.out.multiqc.collect{it[1]})
+ ch_versions = ch_versions.mix(QUANTIFY_PSEUDO_ALIGNMENT.out.versions)
+
+ if (!params.skip_qc & !params.skip_deseq2_qc) {
+ DESEQ2_QC_PSEUDO (
+ ch_counts_gene_length_scaled,
+ ch_pca_header_multiqc,
+ ch_clustering_header_multiqc
+ )
+ ch_multiqc_files = ch_multiqc_files.mix(DESEQ2_QC_PSEUDO.out.pca_multiqc.collect())
+ ch_multiqc_files = ch_multiqc_files.mix(DESEQ2_QC_PSEUDO.out.dists_multiqc.collect())
+ ch_versions = ch_versions.mix(DESEQ2_QC_PSEUDO.out.versions)
+ }
+ }
+
+ //
+ // Collate and save software versions
+ //
+ softwareVersionsToYAML(ch_versions)
+ .collectFile(storeDir: "${params.outdir}/pipeline_info", name: 'nf_core_rnaseq_software_mqc_versions.yml', sort: true, newLine: true)
+ .set { ch_collated_versions }
+
+ //
+ // MODULE: MultiQC
+ //
+ ch_multiqc_report = Channel.empty()
+ if (!params.skip_multiqc) {
+ ch_multiqc_config = Channel.fromPath("$projectDir/workflows/rnaseq/assets/multiqc/multiqc_config.yml", checkIfExists: true)
+ ch_multiqc_custom_config = params.multiqc_config ? Channel.fromPath(params.multiqc_config) : Channel.empty()
+ ch_multiqc_logo = params.multiqc_logo ? Channel.fromPath(params.multiqc_logo) : Channel.empty()
+ summary_params = paramsSummaryMap(workflow, parameters_schema: "nextflow_schema.json")
+ ch_workflow_summary = Channel.value(paramsSummaryMultiqc(summary_params))
+ ch_multiqc_files = ch_multiqc_files.mix(ch_workflow_summary.collectFile(name: 'workflow_summary_mqc.yaml'))
+ ch_multiqc_files = ch_multiqc_files.mix(ch_collated_versions)
+
+ MULTIQC (
+ ch_multiqc_files.collect(),
+ ch_multiqc_config.toList(),
+ ch_multiqc_custom_config.toList(),
+ ch_multiqc_logo.toList()
+ )
+ ch_multiqc_report = MULTIQC.out.report
+ }
+
+ emit:
+ multiqc_report = ch_multiqc_report // channel: /path/to/multiqc_report.html
+ versions = ch_versions // channel: [ path(versions.yml) ]
+}
+
+/*
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ THE END
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+*/