diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json index 4ecfbfe33..4a9bc5c79 100644 --- a/.devcontainer/devcontainer.json +++ b/.devcontainer/devcontainer.json @@ -18,11 +18,11 @@ "python.linting.flake8Path": "/opt/conda/bin/flake8", "python.linting.pycodestylePath": "/opt/conda/bin/pycodestyle", "python.linting.pydocstylePath": "/opt/conda/bin/pydocstyle", - "python.linting.pylintPath": "/opt/conda/bin/pylint" + "python.linting.pylintPath": "/opt/conda/bin/pylint", }, // Add the IDs of extensions you want installed when the container is created. - "extensions": ["ms-python.python", "ms-python.vscode-pylance", "nf-core.nf-core-extensionpack"] - } - } + "extensions": ["ms-python.python", "ms-python.vscode-pylance", "nf-core.nf-core-extensionpack"], + }, + }, } diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 66bd47cf9..c80d78b97 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -35,38 +35,9 @@ jobs: with: version: "${{ matrix.NXF_VER }}" - - name: Hash Github Workspace - id: hash_workspace - run: | - echo "digest=$(echo RNA_3.10.1_${{ github.workspace }} | md5sum | cut -c 1-25)" >> $GITHUB_OUTPUT - - - name: Cache test data - id: cache-testdata - uses: actions/cache@v3 - with: - path: test-datasets/ - key: ${{ steps.hash_workspace.outputs.digest }} - - - name: Check out test data - if: steps.cache-testdata.outputs.cache-hit != 'true' - uses: actions/checkout@v3 - with: - repository: nf-core/test-datasets - ref: rnaseq3 - path: test-datasets/ - - - name: Replace remote paths in samplesheets - run: | - for f in ${{ github.workspace }}/test-datasets/samplesheet/v3.10/*.csv; do - sed -i "s=https://raw.githubusercontent.com/nf-core/test-datasets/rnaseq/=${{ github.workspace }}/test-datasets/=g" $f - echo "========== $f ============" - cat $f - echo "========================================" - done; - - name: Run pipeline with test data run: | - nextflow run ${GITHUB_WORKSPACE} -profile test_cache,docker --outdir ./results --test_data_base ${{ github.workspace }}/test-datasets/ + nextflow run ${GITHUB_WORKSPACE} -profile test,docker --outdir ./results star_salmon: name: Test STAR Salmon with workflow parameters @@ -92,35 +63,6 @@ jobs: - name: Check out pipeline code uses: actions/checkout@v2 - - name: Hash Github Workspace - id: hash_workspace - run: | - echo "digest=$(echo RNA_3.10.1_${{ github.workspace }} | md5sum | cut -c 1-25)" >> $GITHUB_OUTPUT - - - name: Cache test data - id: cache-testdata - uses: actions/cache@v3 - with: - path: test-datasets/ - key: ${{ steps.hash_workspace.outputs.digest }} - - - name: Check out test data - if: steps.cache-testdata.outputs.cache-hit != 'true' - uses: actions/checkout@v3 - with: - repository: nf-core/test-datasets - ref: rnaseq3 - path: test-datasets/ - - - name: Replace remote paths in samplesheets - run: | - for f in ${{ github.workspace }}/test-datasets/samplesheet/v3.10/*.csv; do - sed -i "s=https://raw.githubusercontent.com/nf-core/test-datasets/rnaseq/=${{ github.workspace }}/test-datasets/=g" $f - echo "========== $f ============" - cat $f - echo "========================================" - done; - - name: Install Nextflow run: | wget -qO- get.nextflow.io | bash @@ -128,7 +70,7 @@ jobs: - name: Run pipeline with STAR and various parameters run: | - nextflow run ${GITHUB_WORKSPACE} -profile test_cache,docker --aligner star_salmon ${{ matrix.parameters }} --outdir ./results --test_data_base ${{ github.workspace }}/test-datasets/ + nextflow run ${GITHUB_WORKSPACE} -profile test,docker --aligner star_salmon ${{ matrix.parameters }} --outdir ./results star_rsem: name: Test STAR RSEM with workflow parameters @@ -143,35 +85,6 @@ jobs: - name: Check out pipeline code uses: actions/checkout@v2 - - name: Hash Github Workspace - id: hash_workspace - run: | - echo "digest=$(echo RNA_3.10.1_${{ github.workspace }} | md5sum | cut -c 1-25)" >> $GITHUB_OUTPUT - - - name: Cache test data - id: cache-testdata - uses: actions/cache@v3 - with: - path: test-datasets/ - key: ${{ steps.hash_workspace.outputs.digest }} - - - name: Check out test data - if: steps.cache-testdata.outputs.cache-hit != 'true' - uses: actions/checkout@v3 - with: - repository: nf-core/test-datasets - ref: rnaseq3 - path: test-datasets/ - - - name: Replace remote paths in samplesheets - run: | - for f in ${{ github.workspace }}/test-datasets/samplesheet/v3.10/*.csv; do - sed -i "s=https://raw.githubusercontent.com/nf-core/test-datasets/rnaseq/=${{ github.workspace }}/test-datasets/=g" $f - echo "========== $f ============" - cat $f - echo "========================================" - done; - - name: Install Nextflow run: | wget -qO- get.nextflow.io | bash @@ -179,7 +92,7 @@ jobs: - name: Run pipeline with RSEM STAR and various parameters run: | - nextflow run ${GITHUB_WORKSPACE} -profile test_cache,docker --aligner star_rsem ${{ matrix.parameters }} --outdir ./results --test_data_base ${{ github.workspace }}/test-datasets/ + nextflow run ${GITHUB_WORKSPACE} -profile test,docker --aligner star_rsem ${{ matrix.parameters }} --outdir ./results hisat2: name: Test HISAT2 with workflow parameters @@ -194,35 +107,6 @@ jobs: - name: Check out pipeline code uses: actions/checkout@v2 - - name: Hash Github Workspace - id: hash_workspace - run: | - echo "digest=$(echo RNA_3.10.1_${{ github.workspace }} | md5sum | cut -c 1-25)" >> $GITHUB_OUTPUT - - - name: Cache test data - id: cache-testdata - uses: actions/cache@v3 - with: - path: test-datasets/ - key: ${{ steps.hash_workspace.outputs.digest }} - - - name: Check out test data - if: steps.cache-testdata.outputs.cache-hit != 'true' - uses: actions/checkout@v3 - with: - repository: nf-core/test-datasets - ref: rnaseq3 - path: test-datasets/ - - - name: Replace remote paths in samplesheets - run: | - for f in ${{ github.workspace }}/test-datasets/samplesheet/v3.10/*.csv; do - sed -i "s=https://raw.githubusercontent.com/nf-core/test-datasets/rnaseq/=${{ github.workspace }}/test-datasets/=g" $f - echo "========== $f ============" - cat $f - echo "========================================" - done; - - name: Install Nextflow run: | wget -qO- get.nextflow.io | bash @@ -230,7 +114,7 @@ jobs: - name: Run pipeline with HISAT2 and various parameters run: | - nextflow run ${GITHUB_WORKSPACE} -profile test_cache,docker --aligner hisat2 ${{ matrix.parameters }} --outdir ./results --test_data_base ${{ github.workspace }}/test-datasets/ + nextflow run ${GITHUB_WORKSPACE} -profile test,docker --aligner hisat2 ${{ matrix.parameters }} --outdir ./results pseudo: name: Test Pseudoaligners with workflow parameters @@ -249,35 +133,6 @@ jobs: - name: Check out pipeline code uses: actions/checkout@v2 - - name: Hash Github Workspace - id: hash_workspace - run: | - echo "digest=$(echo RNA_3.10.1_${{ github.workspace }} | md5sum | cut -c 1-25)" >> $GITHUB_OUTPUT - - - name: Cache test data - id: cache-testdata - uses: actions/cache@v3 - with: - path: test-datasets/ - key: ${{ steps.hash_workspace.outputs.digest }} - - - name: Check out test data - if: steps.cache-testdata.outputs.cache-hit != 'true' - uses: actions/checkout@v3 - with: - repository: nf-core/test-datasets - ref: rnaseq3 - path: test-datasets/ - - - name: Replace remote paths in samplesheets - run: | - for f in ${{ github.workspace }}/test-datasets/samplesheet/v3.10/*.csv; do - sed -i "s=https://raw.githubusercontent.com/nf-core/test-datasets/rnaseq/=${{ github.workspace }}/test-datasets/=g" $f - echo "========== $f ============" - cat $f - echo "========================================" - done; - - name: Install Nextflow run: | wget -qO- get.nextflow.io | bash @@ -285,4 +140,4 @@ jobs: - name: Run pipeline with Salmon or Kallisto and various parameters run: | - nextflow run ${GITHUB_WORKSPACE} -profile test_cache,docker ${{ matrix.parameters }} --outdir ./results --test_data_base ${{ github.workspace }}/test-datasets/ + nextflow run ${GITHUB_WORKSPACE} -profile test,docker ${{ matrix.parameters }} --outdir ./results diff --git a/.nf-core.yml b/.nf-core.yml index 094459361..fdcea61c0 100644 --- a/.nf-core.yml +++ b/.nf-core.yml @@ -3,6 +3,8 @@ lint: files_unchanged: - assets/email_template.html - assets/email_template.txt + files_exist: + - lib/nfcore_external_java_deps.jar - lib/NfcoreTemplate.groovy - - pyproject.toml - multiqc_config: false + - lib/Utils.groovy + - lib/WorkflowMain.groovy diff --git a/assets/dummy_file.txt b/assets/dummy_file.txt deleted file mode 100644 index 8b1378917..000000000 --- a/assets/dummy_file.txt +++ /dev/null @@ -1 +0,0 @@ - diff --git a/assets/email_template.html b/assets/email_template.html index 0ef39b222..dc46d39ee 100644 --- a/assets/email_template.html +++ b/assets/email_template.html @@ -14,7 +14,7 @@
-

nf-core/rnaseq v${version}

+

nf-core/rnaseq ${version}

Run Name: $runName

<% if (!success){ out << """ diff --git a/assets/email_template.txt b/assets/email_template.txt index 5440f887a..8789fd8ae 100644 --- a/assets/email_template.txt +++ b/assets/email_template.txt @@ -4,7 +4,7 @@ |\\ | |__ __ / ` / \\ |__) |__ } { | \\| | \\__, \\__/ | \\ |___ \\`-._,-`-, `._,._,' - nf-core/rnaseq v${version} + nf-core/rnaseq ${version} ---------------------------------------------------- Run Name: $runName diff --git a/conf/base.config b/conf/base.config index 337564e5c..93ad25b28 100644 --- a/conf/base.config +++ b/conf/base.config @@ -57,7 +57,4 @@ process { errorStrategy = 'retry' maxRetries = 2 } - withName:CUSTOM_DUMPSOFTWAREVERSIONS { - cache = false - } } diff --git a/conf/modules.config b/conf/modules.config index 16d635cdb..e6210ea0e 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -22,14 +22,6 @@ process { mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] - - withName: 'CUSTOM_DUMPSOFTWAREVERSIONS' { - publishDir = [ - path: { "${params.outdir}/pipeline_info" }, - mode: params.publish_dir_mode, - pattern: '*_versions.yml' - ] - } } // @@ -148,7 +140,7 @@ process { if (!params.skip_bbsplit && params.bbsplit_fasta_list) { process { - withName: '.*:PREPARE_GENOME:BBMAP_BBSPLIT' { + withName: 'PREPARE_GENOME:BBMAP_BBSPLIT' { ext.args = 'build=1' publishDir = [ path: { params.save_reference ? "${params.outdir}/genome/index" : params.outdir }, @@ -348,7 +340,7 @@ if (params.remove_ribo_rna) { if (!params.skip_alignment) { process { - withName: 'NFCORE_RNASEQ:RNASEQ:.*:BAM_SORT_STATS_SAMTOOLS:BAM_STATS_SAMTOOLS:.*' { + withName: 'NFCORE_RNASEQ:.*:BAM_SORT_STATS_SAMTOOLS:BAM_STATS_SAMTOOLS:.*' { ext.prefix = { "${meta.id}.sorted.bam" } publishDir = [ path: { "${params.outdir}/${params.aligner}/samtools_stats" }, @@ -357,7 +349,7 @@ if (!params.skip_alignment) { ] } - withName: 'NFCORE_RNASEQ:RNASEQ:.*:BAM_SORT_STATS_SAMTOOLS:SAMTOOLS_SORT' { + withName: 'NFCORE_RNASEQ:.*:BAM_SORT_STATS_SAMTOOLS:SAMTOOLS_SORT' { ext.prefix = { "${meta.id}.sorted" } publishDir = [ path: { ( ['star_salmon','hisat2'].contains(params.aligner) && @@ -371,7 +363,7 @@ if (!params.skip_alignment) { ] } - withName: 'NFCORE_RNASEQ:RNASEQ:.*:BAM_SORT_STATS_SAMTOOLS:SAMTOOLS_INDEX' { + withName: 'NFCORE_RNASEQ:.*:BAM_SORT_STATS_SAMTOOLS:SAMTOOLS_INDEX' { ext.args = { params.bam_csi_index ? '-c' : '' } publishDir = [ path: { ( ['star_salmon','hisat2'].contains(params.aligner) && @@ -611,7 +603,7 @@ if (!params.skip_alignment && params.aligner == 'star_salmon') { if (params.with_umi) { process { - withName: 'NFCORE_RNASEQ:RNASEQ:SAMTOOLS_SORT' { + withName: 'NFCORE_RNASEQ:SAMTOOLS_SORT' { ext.args = '-n' ext.prefix = { "${meta.id}.umi_dedup.transcriptome" } publishDir = [ @@ -622,7 +614,7 @@ if (!params.skip_alignment && params.aligner == 'star_salmon') { ] } - withName: 'NFCORE_RNASEQ:RNASEQ:UMITOOLS_PREPAREFORSALMON' { + withName: 'NFCORE_RNASEQ:UMITOOLS_PREPAREFORSALMON' { ext.prefix = { "${meta.id}.umi_dedup.transcriptome.filtered" } publishDir = [ [ @@ -639,7 +631,7 @@ if (!params.skip_alignment && params.aligner == 'star_salmon') { ] } - withName: 'NFCORE_RNASEQ:RNASEQ:BAM_SORT_STATS_SAMTOOLS:SAMTOOLS_SORT' { + withName: 'NFCORE_RNASEQ:BAM_SORT_STATS_SAMTOOLS:SAMTOOLS_SORT' { ext.prefix = { "${meta.id}.transcriptome.sorted" } publishDir = [ path: { params.save_align_intermeds || params.save_umi_intermeds ? "${params.outdir}/${params.aligner}" : params.outdir }, @@ -649,7 +641,7 @@ if (!params.skip_alignment && params.aligner == 'star_salmon') { ] } - withName: 'NFCORE_RNASEQ:RNASEQ:BAM_SORT_STATS_SAMTOOLS:SAMTOOLS_INDEX' { + withName: 'NFCORE_RNASEQ:BAM_SORT_STATS_SAMTOOLS:SAMTOOLS_INDEX' { publishDir = [ path: { params.save_align_intermeds || params.save_umi_intermeds ? "${params.outdir}/${params.aligner}" : params.outdir }, mode: params.publish_dir_mode, @@ -658,7 +650,7 @@ if (!params.skip_alignment && params.aligner == 'star_salmon') { ] } - withName: 'NFCORE_RNASEQ:RNASEQ:BAM_SORT_STATS_SAMTOOLS:BAM_STATS_SAMTOOLS:.*' { + withName: 'NFCORE_RNASEQ:BAM_SORT_STATS_SAMTOOLS:BAM_STATS_SAMTOOLS:.*' { ext.prefix = { "${meta.id}.transcriptome.sorted.bam" } publishDir = [ path: { params.save_align_intermeds || params.save_umi_intermeds ? "${params.outdir}/${params.aligner}/samtools_stats" : params.outdir }, diff --git a/conf/test_cache.config b/conf/test_cache.config deleted file mode 100644 index 51aad577e..000000000 --- a/conf/test_cache.config +++ /dev/null @@ -1,50 +0,0 @@ -/* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - Nextflow config file for running minimal tests using cached / offline test data -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - Defines input files and everything required to run a fast and simple pipeline test. - - Use as follows: - nextflow run nf-core/rnaseq -profile test, --outdir - ----------------------------------------------------------------------------------------- -*/ - -params { - config_profile_name = 'Test profile' - config_profile_description = 'Minimal test dataset to check pipeline function' - - // Limit resources so that this can run on GitHub Actions - max_cpus = 2 - max_memory = '6.GB' - max_time = '6.h' - - - // Input data - // params.test_data_base (default) = 'https://raw.githubusercontent.com/nf-core/test-datasets/rnaseq3/' - input = "${params.test_data_base}/samplesheet/v3.10/samplesheet_test.csv" - - // Genome references - fasta = "${params.test_data_base}/reference/genome.fasta" - gtf = "${params.test_data_base}/reference/genes.gtf.gz" - gff = "${params.test_data_base}/reference/genes.gff.gz" - transcript_fasta = "${params.test_data_base}/reference/transcriptome.fasta" - additional_fasta = "${params.test_data_base}/reference/gfp.fa.gz" - - bbsplit_fasta_list = "${params.test_data_base}/reference/bbsplit_fasta_list.txt" - hisat2_index = "${params.test_data_base}/reference/hisat2.tar.gz" - salmon_index = "${params.test_data_base}/reference/salmon.tar.gz" - rsem_index = "${params.test_data_base}/reference/rsem.tar.gz" - - // Other parameters - skip_bbsplit = false - pseudo_aligner = 'salmon' - umitools_bc_pattern = 'NNNN' -} - -// When using RSEM, remove warning from STAR whilst building tiny indices -process { - withName: 'RSEM_PREPAREREFERENCE_GENOME' { - ext.args2 = "--genomeSAindexNbases 7" - } -} diff --git a/lib/NfcoreTemplate.groovy b/lib/NfcoreTemplate.groovy deleted file mode 100755 index 5cdc9a73c..000000000 --- a/lib/NfcoreTemplate.groovy +++ /dev/null @@ -1,398 +0,0 @@ -// -// This file holds several functions used within the nf-core pipeline template. -// - -import org.yaml.snakeyaml.Yaml -import groovy.json.JsonOutput -import nextflow.extension.FilesEx - -class NfcoreTemplate { - - // - // Check AWS Batch related parameters have been specified correctly - // - public static void awsBatch(workflow, params) { - if (workflow.profile.contains('awsbatch')) { - // Check params.awsqueue and params.awsregion have been set if running on AWSBatch - assert (params.awsqueue && params.awsregion) : "Specify correct --awsqueue and --awsregion parameters on AWSBatch!" - // Check outdir paths to be S3 buckets if running on AWSBatch - assert params.outdir.startsWith('s3:') : "Outdir not on S3 - specify S3 Bucket to run on AWSBatch!" - } - } - - // - // Warn if a -profile or Nextflow config has not been provided to run the pipeline - // - public static void checkConfigProvided(workflow, log) { - if (workflow.profile == 'standard' && workflow.configFiles.size() <= 1) { - log.warn "[$workflow.manifest.name] You are attempting to run the pipeline without any custom configuration!\n\n" + - "This will be dependent on your local compute environment but can be achieved via one or more of the following:\n" + - " (1) Using an existing pipeline profile e.g. `-profile docker` or `-profile singularity`\n" + - " (2) Using an existing nf-core/configs for your Institution e.g. `-profile crick` or `-profile uppmax`\n" + - " (3) Using your own local custom config e.g. `-c /path/to/your/custom.config`\n\n" + - "Please refer to the quick start section and usage docs for the pipeline.\n " - } - } - - // - // Warn if using custom configs to provide pipeline parameters - // - public static void warnParamsProvidedInConfig(workflow, log) { - if (workflow.configFiles.size() > 1) { - log.warn "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n" + - " Multiple config files detected!\n" + - " Please provide pipeline parameters via the CLI or Nextflow '-params-file' option.\n" + - " Custom config files including those provided by the '-c' Nextflow option can be\n" + - " used to provide any configuration except for parameters.\n\n" + - " Docs: https://nf-co.re/usage/configuration#custom-configuration-files\n" + - "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" - } - } - - // - // Generate version string - // - public static String version(workflow) { - String version_string = "" - - if (workflow.manifest.version) { - def prefix_v = workflow.manifest.version[0] != 'v' ? 'v' : '' - version_string += "${prefix_v}${workflow.manifest.version}" - } - - if (workflow.commitId) { - def git_shortsha = workflow.commitId.substring(0, 7) - version_string += "-g${git_shortsha}" - } - - return version_string - } - - // - // Construct and send completion email - // - public static void email(workflow, params, summary_params, projectDir, log, multiqc_report=[], pass_mapped_reads=[:], pass_trimmed_reads=[:], pass_strand_check=[:]) { - - // Set up the e-mail variables - def fail_mapped_count = pass_mapped_reads.count { key, value -> value == false } - def fail_trimmed_count = pass_trimmed_reads.count { key, value -> value == false } - def fail_strand_count = pass_strand_check.count { key, value -> value == false } - - def subject = "[$workflow.manifest.name] Successful: $workflow.runName" - if (fail_mapped_count + fail_trimmed_count + fail_strand_count > 0) { - subject = "[$workflow.manifest.name] Partially successful - samples skipped: $workflow.runName" - } - if (!workflow.success) { - subject = "[$workflow.manifest.name] FAILED: $workflow.runName" - } - - def summary = [:] - for (group in summary_params.keySet()) { - summary << summary_params[group] - } - - def misc_fields = [:] - misc_fields['Date Started'] = workflow.start - misc_fields['Date Completed'] = workflow.complete - misc_fields['Pipeline script file path'] = workflow.scriptFile - misc_fields['Pipeline script hash ID'] = workflow.scriptId - if (workflow.repository) misc_fields['Pipeline repository Git URL'] = workflow.repository - if (workflow.commitId) misc_fields['Pipeline repository Git Commit'] = workflow.commitId - if (workflow.revision) misc_fields['Pipeline Git branch/tag'] = workflow.revision - misc_fields['Nextflow Version'] = workflow.nextflow.version - misc_fields['Nextflow Build'] = workflow.nextflow.build - misc_fields['Nextflow Compile Timestamp'] = workflow.nextflow.timestamp - - def email_fields = [:] - email_fields['version'] = NfcoreTemplate.version(workflow) - email_fields['runName'] = workflow.runName - email_fields['success'] = workflow.success - email_fields['dateComplete'] = workflow.complete - email_fields['duration'] = workflow.duration - email_fields['exitStatus'] = workflow.exitStatus - email_fields['errorMessage'] = (workflow.errorMessage ?: 'None') - email_fields['errorReport'] = (workflow.errorReport ?: 'None') - email_fields['commandLine'] = workflow.commandLine - email_fields['projectDir'] = workflow.projectDir - email_fields['summary'] = summary << misc_fields - email_fields['skip_sample_count'] = fail_mapped_count + fail_trimmed_count + fail_strand_count - - // On success try attach the multiqc report - def mqc_report = null - try { - if (workflow.success && !params.skip_multiqc) { - mqc_report = multiqc_report.getVal() - if (mqc_report.getClass() == ArrayList && mqc_report.size() >= 1) { - if (mqc_report.size() > 1) { - log.warn "[$workflow.manifest.name] Found multiple reports from process 'MULTIQC', will use only one" - } - mqc_report = mqc_report[0] - } - } - } catch (all) { - if (multiqc_report) { - log.warn "[$workflow.manifest.name] Could not attach MultiQC report to summary email" - } - } - - // Check if we are only sending emails on failure - def email_address = params.email - if (!params.email && params.email_on_fail && !workflow.success) { - email_address = params.email_on_fail - } - - // Render the TXT template - def engine = new groovy.text.GStringTemplateEngine() - def tf = new File("$projectDir/assets/email_template.txt") - def txt_template = engine.createTemplate(tf).make(email_fields) - def email_txt = txt_template.toString() - - // Render the HTML template - def hf = new File("$projectDir/assets/email_template.html") - def html_template = engine.createTemplate(hf).make(email_fields) - def email_html = html_template.toString() - - // Render the sendmail template - def max_multiqc_email_size = (params.containsKey('max_multiqc_email_size') ? params.max_multiqc_email_size : 0) as nextflow.util.MemoryUnit - def smail_fields = [ email: email_address, subject: subject, email_txt: email_txt, email_html: email_html, projectDir: "$projectDir", mqcFile: mqc_report, mqcMaxSize: max_multiqc_email_size.toBytes() ] - def sf = new File("$projectDir/assets/sendmail_template.txt") - def sendmail_template = engine.createTemplate(sf).make(smail_fields) - def sendmail_html = sendmail_template.toString() - - // Send the HTML e-mail - Map colors = logColours(params.monochrome_logs) - if (email_address) { - try { - if (params.plaintext_email) { throw GroovyException('Send plaintext e-mail, not HTML') } - // Try to send HTML e-mail using sendmail - def sendmail_tf = new File(workflow.launchDir.toString(), ".sendmail_tmp.html") - sendmail_tf.withWriter { w -> w << sendmail_html } - [ 'sendmail', '-t' ].execute() << sendmail_html - log.info "-${colors.purple}[$workflow.manifest.name]${colors.green} Sent summary e-mail to $email_address (sendmail)-" - } catch (all) { - // Catch failures and try with plaintext - def mail_cmd = [ 'mail', '-s', subject, '--content-type=text/html', email_address ] - if ( mqc_report != null && mqc_report.size() <= max_multiqc_email_size.toBytes() ) { - mail_cmd += [ '-A', mqc_report ] - } - mail_cmd.execute() << email_html - log.info "-${colors.purple}[$workflow.manifest.name]${colors.green} Sent summary e-mail to $email_address (mail)-" - } - } - - // Write summary e-mail HTML to a file - def output_hf = new File(workflow.launchDir.toString(), ".pipeline_report.html") - output_hf.withWriter { w -> w << email_html } - FilesEx.copyTo(output_hf.toPath(), "${params.outdir}/pipeline_info/pipeline_report.html"); - output_hf.delete() - - // Write summary e-mail TXT to a file - def output_tf = new File(workflow.launchDir.toString(), ".pipeline_report.txt") - output_tf.withWriter { w -> w << email_txt } - FilesEx.copyTo(output_tf.toPath(), "${params.outdir}/pipeline_info/pipeline_report.txt"); - output_tf.delete() - } - - // - // Construct and send a notification to a web server as JSON - // e.g. Microsoft Teams and Slack - // - public static void IM_notification(workflow, params, summary_params, projectDir, log) { - def hook_url = params.hook_url - - def summary = [:] - for (group in summary_params.keySet()) { - summary << summary_params[group] - } - - def misc_fields = [:] - misc_fields['start'] = workflow.start - misc_fields['complete'] = workflow.complete - misc_fields['scriptfile'] = workflow.scriptFile - misc_fields['scriptid'] = workflow.scriptId - if (workflow.repository) misc_fields['repository'] = workflow.repository - if (workflow.commitId) misc_fields['commitid'] = workflow.commitId - if (workflow.revision) misc_fields['revision'] = workflow.revision - misc_fields['nxf_version'] = workflow.nextflow.version - misc_fields['nxf_build'] = workflow.nextflow.build - misc_fields['nxf_timestamp'] = workflow.nextflow.timestamp - - def msg_fields = [:] - msg_fields['version'] = NfcoreTemplate.version(workflow) - msg_fields['runName'] = workflow.runName - msg_fields['success'] = workflow.success - msg_fields['dateComplete'] = workflow.complete - msg_fields['duration'] = workflow.duration - msg_fields['exitStatus'] = workflow.exitStatus - msg_fields['errorMessage'] = (workflow.errorMessage ?: 'None') - msg_fields['errorReport'] = (workflow.errorReport ?: 'None') - msg_fields['commandLine'] = workflow.commandLine.replaceFirst(/ +--hook_url +[^ ]+/, "") - msg_fields['projectDir'] = workflow.projectDir - msg_fields['summary'] = summary << misc_fields - - // Render the JSON template - def engine = new groovy.text.GStringTemplateEngine() - // Different JSON depending on the service provider - // Defaults to "Adaptive Cards" (https://adaptivecards.io), except Slack which has its own format - def json_path = hook_url.contains("hooks.slack.com") ? "slackreport.json" : "adaptivecard.json" - def hf = new File("$projectDir/assets/${json_path}") - def json_template = engine.createTemplate(hf).make(msg_fields) - def json_message = json_template.toString() - - // POST - def post = new URL(hook_url).openConnection(); - post.setRequestMethod("POST") - post.setDoOutput(true) - post.setRequestProperty("Content-Type", "application/json") - post.getOutputStream().write(json_message.getBytes("UTF-8")); - def postRC = post.getResponseCode(); - if (! postRC.equals(200)) { - log.warn(post.getErrorStream().getText()); - } - } - - // - // Dump pipeline parameters in a json file - // - public static void dump_parameters(workflow, params) { - def timestamp = new java.util.Date().format( 'yyyy-MM-dd_HH-mm-ss') - def filename = "params_${timestamp}.json" - def temp_pf = new File(workflow.launchDir.toString(), ".${filename}") - def jsonStr = JsonOutput.toJson(params) - temp_pf.text = JsonOutput.prettyPrint(jsonStr) - - FilesEx.copyTo(temp_pf.toPath(), "${params.outdir}/pipeline_info/params_${timestamp}.json") - temp_pf.delete() - } - - // - // Print pipeline summary on completion - // - public static void summary(workflow, params, log, pass_mapped_reads=[:], pass_trimmed_reads=[:], pass_strand_check=[:]) { - Map colors = logColours(params.monochrome_logs) - - def fail_mapped_count = pass_mapped_reads.count { key, value -> value == false } - def fail_trimmed_count = pass_trimmed_reads.count { key, value -> value == false } - def fail_strand_count = pass_strand_check.count { key, value -> value == false } - if (workflow.success) { - def color = colors.green - def status = [] - if (workflow.stats.ignoredCount != 0) { - color = colors.yellow - status += ['with errored process(es)'] - } - if (fail_mapped_count > 0 || fail_trimmed_count > 0 || fail_strand_count > 0) { - color = colors.yellow - status += ['with skipped sampl(es)'] - } - log.info "-${colors.purple}[$workflow.manifest.name]${color} Pipeline completed successfully ${status.join(', ')}${colors.reset}-" - if (fail_trimmed_count > 0) { - log.info "-${colors.purple}[$workflow.manifest.name]${colors.red} Please check MultiQC report: ${fail_trimmed_count}/${pass_trimmed_reads.size()} samples skipped since they failed ${params.min_trimmed_reads} trimmed read threshold.${colors.reset}-" - } - if (fail_mapped_count > 0) { - log.info "-${colors.purple}[$workflow.manifest.name]${colors.red} Please check MultiQC report: ${fail_mapped_count}/${pass_mapped_reads.size()} samples skipped since they failed STAR ${params.min_mapped_reads}% mapped threshold.${colors.reset}-" - } - if (fail_strand_count > 0) { - log.info "-${colors.purple}[$workflow.manifest.name]${colors.red} Please check MultiQC report: ${fail_strand_count}/${pass_strand_check.size()} samples failed strandedness check.${colors.reset}-" - } - } else { - log.info "-${colors.purple}[$workflow.manifest.name]${colors.red} Pipeline completed with errors${colors.reset}-" - } - } - - // - // ANSII Colours used for terminal logging - // - public static Map logColours(Boolean monochrome_logs) { - Map colorcodes = [:] - - // Reset / Meta - colorcodes['reset'] = monochrome_logs ? '' : "\033[0m" - colorcodes['bold'] = monochrome_logs ? '' : "\033[1m" - colorcodes['dim'] = monochrome_logs ? '' : "\033[2m" - colorcodes['underlined'] = monochrome_logs ? '' : "\033[4m" - colorcodes['blink'] = monochrome_logs ? '' : "\033[5m" - colorcodes['reverse'] = monochrome_logs ? '' : "\033[7m" - colorcodes['hidden'] = monochrome_logs ? '' : "\033[8m" - - // Regular Colors - colorcodes['black'] = monochrome_logs ? '' : "\033[0;30m" - colorcodes['red'] = monochrome_logs ? '' : "\033[0;31m" - colorcodes['green'] = monochrome_logs ? '' : "\033[0;32m" - colorcodes['yellow'] = monochrome_logs ? '' : "\033[0;33m" - colorcodes['blue'] = monochrome_logs ? '' : "\033[0;34m" - colorcodes['purple'] = monochrome_logs ? '' : "\033[0;35m" - colorcodes['cyan'] = monochrome_logs ? '' : "\033[0;36m" - colorcodes['white'] = monochrome_logs ? '' : "\033[0;37m" - - // Bold - colorcodes['bblack'] = monochrome_logs ? '' : "\033[1;30m" - colorcodes['bred'] = monochrome_logs ? '' : "\033[1;31m" - colorcodes['bgreen'] = monochrome_logs ? '' : "\033[1;32m" - colorcodes['byellow'] = monochrome_logs ? '' : "\033[1;33m" - colorcodes['bblue'] = monochrome_logs ? '' : "\033[1;34m" - colorcodes['bpurple'] = monochrome_logs ? '' : "\033[1;35m" - colorcodes['bcyan'] = monochrome_logs ? '' : "\033[1;36m" - colorcodes['bwhite'] = monochrome_logs ? '' : "\033[1;37m" - - // Underline - colorcodes['ublack'] = monochrome_logs ? '' : "\033[4;30m" - colorcodes['ured'] = monochrome_logs ? '' : "\033[4;31m" - colorcodes['ugreen'] = monochrome_logs ? '' : "\033[4;32m" - colorcodes['uyellow'] = monochrome_logs ? '' : "\033[4;33m" - colorcodes['ublue'] = monochrome_logs ? '' : "\033[4;34m" - colorcodes['upurple'] = monochrome_logs ? '' : "\033[4;35m" - colorcodes['ucyan'] = monochrome_logs ? '' : "\033[4;36m" - colorcodes['uwhite'] = monochrome_logs ? '' : "\033[4;37m" - - // High Intensity - colorcodes['iblack'] = monochrome_logs ? '' : "\033[0;90m" - colorcodes['ired'] = monochrome_logs ? '' : "\033[0;91m" - colorcodes['igreen'] = monochrome_logs ? '' : "\033[0;92m" - colorcodes['iyellow'] = monochrome_logs ? '' : "\033[0;93m" - colorcodes['iblue'] = monochrome_logs ? '' : "\033[0;94m" - colorcodes['ipurple'] = monochrome_logs ? '' : "\033[0;95m" - colorcodes['icyan'] = monochrome_logs ? '' : "\033[0;96m" - colorcodes['iwhite'] = monochrome_logs ? '' : "\033[0;97m" - - // Bold High Intensity - colorcodes['biblack'] = monochrome_logs ? '' : "\033[1;90m" - colorcodes['bired'] = monochrome_logs ? '' : "\033[1;91m" - colorcodes['bigreen'] = monochrome_logs ? '' : "\033[1;92m" - colorcodes['biyellow'] = monochrome_logs ? '' : "\033[1;93m" - colorcodes['biblue'] = monochrome_logs ? '' : "\033[1;94m" - colorcodes['bipurple'] = monochrome_logs ? '' : "\033[1;95m" - colorcodes['bicyan'] = monochrome_logs ? '' : "\033[1;96m" - colorcodes['biwhite'] = monochrome_logs ? '' : "\033[1;97m" - - return colorcodes - } - - // - // Does what is says on the tin - // - public static String dashedLine(monochrome_logs) { - Map colors = logColours(monochrome_logs) - return "-${colors.dim}----------------------------------------------------${colors.reset}-" - } - - // - // nf-core logo - // - public static String logo(workflow, monochrome_logs) { - Map colors = logColours(monochrome_logs) - String workflow_version = NfcoreTemplate.version(workflow) - String.format( - """\n - ${dashedLine(monochrome_logs)} - ${colors.green},--.${colors.black}/${colors.green},-.${colors.reset} - ${colors.blue} ___ __ __ __ ___ ${colors.green}/,-._.--~\'${colors.reset} - ${colors.blue} |\\ | |__ __ / ` / \\ |__) |__ ${colors.yellow}} {${colors.reset} - ${colors.blue} | \\| | \\__, \\__/ | \\ |___ ${colors.green}\\`-._,-`-,${colors.reset} - ${colors.green}`._,._,\'${colors.reset} - ${colors.purple} ${workflow.manifest.name} ${workflow_version}${colors.reset} - ${dashedLine(monochrome_logs)} - """.stripIndent() - ) - } -} diff --git a/lib/Utils.groovy b/lib/Utils.groovy deleted file mode 100644 index 8d030f4e8..000000000 --- a/lib/Utils.groovy +++ /dev/null @@ -1,47 +0,0 @@ -// -// This file holds several Groovy functions that could be useful for any Nextflow pipeline -// - -import org.yaml.snakeyaml.Yaml - -class Utils { - - // - // When running with -profile conda, warn if channels have not been set-up appropriately - // - public static void checkCondaChannels(log) { - Yaml parser = new Yaml() - def channels = [] - try { - def config = parser.load("conda config --show channels".execute().text) - channels = config.channels - } catch(NullPointerException | IOException e) { - log.warn "Could not verify conda channel configuration." - return - } - - // Check that all channels are present - // This channel list is ordered by required channel priority. - def required_channels_in_order = ['conda-forge', 'bioconda', 'defaults'] - def channels_missing = ((required_channels_in_order as Set) - (channels as Set)) as Boolean - - // Check that they are in the right order - def channel_priority_violation = false - def n = required_channels_in_order.size() - for (int i = 0; i < n - 1; i++) { - channel_priority_violation |= !(channels.indexOf(required_channels_in_order[i]) < channels.indexOf(required_channels_in_order[i+1])) - } - - if (channels_missing | channel_priority_violation) { - log.warn "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n" + - " There is a problem with your Conda configuration!\n\n" + - " You will need to set-up the conda-forge and bioconda channels correctly.\n" + - " Please refer to https://bioconda.github.io/\n" + - " The observed channel order is \n" + - " ${channels}\n" + - " but the following channel order is required:\n" + - " ${required_channels_in_order}\n" + - "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" - } - } -} diff --git a/lib/WorkflowMain.groovy b/lib/WorkflowMain.groovy deleted file mode 100755 index 4d94dfe08..000000000 --- a/lib/WorkflowMain.groovy +++ /dev/null @@ -1,60 +0,0 @@ -// -// This file holds several functions specific to the main.nf workflow in the nf-core/rnaseq pipeline -// - -import nextflow.Nextflow - -class WorkflowMain { - - // - // Citation string for pipeline - // - public static String citation(workflow) { - return "If you use ${workflow.manifest.name} for your analysis please cite:\n\n" + - "* The pipeline\n" + - " https://doi.org/10.5281/zenodo.1400710\n\n" + - "* The nf-core framework\n" + - " https://doi.org/10.1038/s41587-020-0439-x\n\n" + - "* Software dependencies\n" + - " https://github.com/${workflow.manifest.name}/blob/master/CITATIONS.md" - } - - // - // Validate parameters and print summary to screen - // - public static void initialise(workflow, params, log) { - - // Print workflow version and exit on --version - if (params.version) { - String workflow_version = NfcoreTemplate.version(workflow) - log.info "${workflow.manifest.name} ${workflow_version}" - System.exit(0) - } - - // Warn about using custom configs to provide pipeline parameters - NfcoreTemplate.warnParamsProvidedInConfig(workflow, log) - - // Check that a -profile or Nextflow config has been provided to run the pipeline - NfcoreTemplate.checkConfigProvided(workflow, log) - - // Check that conda channels are set-up correctly - if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { - Utils.checkCondaChannels(log) - } - - // Check AWS batch settings - NfcoreTemplate.awsBatch(workflow, params) - } - - // - // Get attribute from genome config file e.g. fasta - // - public static Object getGenomeAttribute(params, attribute) { - if (params.genomes && params.genome && params.genomes.containsKey(params.genome)) { - if (params.genomes[ params.genome ].containsKey(attribute)) { - return params.genomes[ params.genome ][ attribute ] - } - } - return null - } -} diff --git a/lib/WorkflowRnaseq.groovy b/lib/WorkflowRnaseq.groovy deleted file mode 100755 index 29a1724e3..000000000 --- a/lib/WorkflowRnaseq.groovy +++ /dev/null @@ -1,474 +0,0 @@ -// -// This file holds several functions specific to the workflow/rnaseq.nf in the nf-core/rnaseq pipeline -// - -import nextflow.Nextflow -import groovy.json.JsonSlurper -import groovy.text.SimpleTemplateEngine - -class WorkflowRnaseq { - - // - // Check and validate parameters - // - public static void initialise(params, log) { - genomeExistsError(params, log) - - if (!params.fasta) { - Nextflow.error("Genome fasta file not specified with e.g. '--fasta genome.fa' or via a detectable config file.") - } - - if (!params.gtf && !params.gff) { - Nextflow.error("No GTF or GFF3 annotation specified! The pipeline requires at least one of these files.") - } - - if (params.gtf) { - if (params.gff) { - gtfGffWarn(log) - } - if (params.genome == 'GRCh38' && params.gtf.contains('Homo_sapiens/NCBI/GRCh38/Annotation/Genes/genes.gtf')) { - ncbiGenomeWarn(log) - } - if (params.gtf.contains('/UCSC/') && params.gtf.contains('Annotation/Genes/genes.gtf')) { - ucscGenomeWarn(log) - } - } - - if (params.transcript_fasta) { - transcriptsFastaWarn(log) - } - - if (!params.skip_bbsplit && !params.bbsplit_index && !params.bbsplit_fasta_list) { - Nextflow.error("Please provide either --bbsplit_fasta_list / --bbsplit_index to run BBSplit.") - } - - if (params.remove_ribo_rna && !params.ribo_database_manifest) { - Nextflow.error("Please provide --ribo_database_manifest to remove ribosomal RNA with SortMeRNA.") - } - - - if (params.with_umi && !params.skip_umi_extract) { - if (!params.umitools_bc_pattern && !params.umitools_bc_pattern2) { - Nextflow.error("UMI-tools requires a barcode pattern to extract barcodes from the reads.") - } - } - - if (params.skip_alignment) { - skipAlignmentWarn(log) - } - - if (!params.skip_pseudo_alignment && params.pseudo_aligner) { - if (!(params.salmon_index || params.transcript_fasta || (params.fasta && (params.gtf || params.gff)))) { - Nextflow.error("To use `--pseudo_aligner 'salmon'`, you must provide either --salmon_index or --transcript_fasta or both --fasta and --gtf / --gff.") - } - } - - // Checks when running --aligner star_rsem - if (!params.skip_alignment && params.aligner == 'star_rsem') { - if (params.with_umi) { - rsemUmiError(log) - } - if (params.rsem_index && params.star_index) { - rsemStarIndexWarn(log) - } - if (params.aligner == 'star_rsem' && params.extra_star_align_args) { - rsemStarExtraArgumentsWarn(log) - } - } - - // Warn if --additional_fasta provided with aligner index - if (!params.skip_alignment && params.additional_fasta) { - def index = '' - if (params.aligner == 'star_salmon' && params.star_index) { - index = 'star' - } - if (params.aligner == 'star_rsem' && params.rsem_index) { - index = 'rsem' - } - if (params.aligner == 'hisat2' && params.hisat2_index) { - index = 'hisat2' - } - if (index) { - additionaFastaIndexWarn(index, log) - } - } - - // Check which RSeQC modules we are running - def valid_rseqc_modules = ['bam_stat', 'inner_distance', 'infer_experiment', 'junction_annotation', 'junction_saturation', 'read_distribution', 'read_duplication', 'tin'] - def rseqc_modules = params.rseqc_modules ? params.rseqc_modules.split(',').collect{ it.trim().toLowerCase() } : [] - if ((valid_rseqc_modules + rseqc_modules).unique().size() != valid_rseqc_modules.size()) { - Nextflow.error("Invalid option: ${params.rseqc_modules}. Valid options for '--rseqc_modules': ${valid_rseqc_modules.join(', ')}") - } - } - - // - // Function to validate channels from input samplesheet - // - public static ArrayList validateInput(input) { - def (metas, fastqs) = input[1..2] - - // Check that multiple runs of the same sample are of the same strandedness - def strandedness_ok = metas.collect{ it.strandedness }.unique().size == 1 - if (!strandedness_ok) { - Nextflow.error("Please check input samplesheet -> Multiple runs of a sample must have the same strandedness!: ${metas[0].id}") - } - - // Check that multiple runs of the same sample are of the same datatype i.e. single-end / paired-end - def endedness_ok = metas.collect{ it.single_end }.unique().size == 1 - if (!endedness_ok) { - Nextflow.error("Please check input samplesheet -> Multiple runs of a sample must be of the same datatype i.e. single-end or paired-end: ${metas[0].id}") - } - - return [ metas[0], fastqs ] - } - - // - // Function to check whether biotype field exists in GTF file - // - public static Boolean biotypeInGtf(gtf_file, biotype, log) { - def hits = 0 - gtf_file.eachLine { line -> - def attributes = line.split('\t')[-1].split() - if (attributes.contains(biotype)) { - hits += 1 - } - } - if (hits) { - return true - } else { - log.warn "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n" + - " Biotype attribute '${biotype}' not found in the last column of the GTF file!\n\n" + - " Biotype QC will be skipped to circumvent the issue below:\n" + - " https://github.com/nf-core/rnaseq/issues/460\n\n" + - " Amend '--featurecounts_group_type' to change this behaviour.\n" + - "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" - return false - } - } - - // - // Function to generate an error if contigs in genome fasta file > 512 Mbp - // - public static void checkMaxContigSize(fai_file, log) { - def max_size = 512000000 - fai_file.eachLine { line -> - def lspl = line.split('\t') - def chrom = lspl[0] - def size = lspl[1] - if (size.toInteger() > max_size) { - def error_string = "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n" + - " Contig longer than ${max_size}bp found in reference genome!\n\n" + - " ${chrom}: ${size}\n\n" + - " Provide the '--bam_csi_index' parameter to use a CSI instead of BAI index.\n\n" + - " Please see:\n" + - " https://github.com/nf-core/rnaseq/issues/744\n" + - "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" - Nextflow.error(error_string) - } - } - } - - // - // Function that parses Salmon quant 'meta_info.json' output file to get inferred strandedness - // - public static String getSalmonInferredStrandedness(json_file) { - def lib_type = new JsonSlurper().parseText(json_file.text).get('library_types')[0] - def strandedness = 'reverse' - if (lib_type) { - if (lib_type in ['U', 'IU']) { - strandedness = 'unstranded' - } else if (lib_type in ['SF', 'ISF']) { - strandedness = 'forward' - } else if (lib_type in ['SR', 'ISR']) { - strandedness = 'reverse' - } - } - return strandedness - } - - // - // Function that parses TrimGalore log output file to get total number of reads after trimming - // - public static Integer getTrimGaloreReadsAfterFiltering(log_file) { - def total_reads = 0 - def filtered_reads = 0 - log_file.eachLine { line -> - def total_reads_matcher = line =~ /([\d\.]+)\ssequences processed in total/ - def filtered_reads_matcher = line =~ /shorter than the length cutoff[^:]+:\s([\d\.]+)/ - if (total_reads_matcher) total_reads = total_reads_matcher[0][1].toFloat() - if (filtered_reads_matcher) filtered_reads = filtered_reads_matcher[0][1].toFloat() - } - return total_reads - filtered_reads - } - - // - // Function that parses and returns the alignment rate from the STAR log output - // - public static ArrayList getStarPercentMapped(params, align_log) { - def percent_aligned = 0 - def pattern = /Uniquely mapped reads %\s*\|\s*([\d\.]+)%/ - align_log.eachLine { line -> - def matcher = line =~ pattern - if (matcher) { - percent_aligned = matcher[0][1].toFloat() - } - } - - def pass = false - if (percent_aligned >= params.min_mapped_reads.toFloat()) { - pass = true - } - return [ percent_aligned, pass ] - } - - // - // Function that parses and returns the predicted strandedness from the RSeQC infer_experiment.py output - // - public static ArrayList getInferexperimentStrandedness(inferexperiment_file, cutoff=30) { - def sense = 0 - def antisense = 0 - def undetermined = 0 - inferexperiment_file.eachLine { line -> - def undetermined_matcher = line =~ /Fraction of reads failed to determine:\s([\d\.]+)/ - def se_sense_matcher = line =~ /Fraction of reads explained by "\++,--":\s([\d\.]+)/ - def se_antisense_matcher = line =~ /Fraction of reads explained by "\+-,-\+":\s([\d\.]+)/ - def pe_sense_matcher = line =~ /Fraction of reads explained by "1\++,1--,2\+-,2-\+":\s([\d\.]+)/ - def pe_antisense_matcher = line =~ /Fraction of reads explained by "1\+-,1-\+,2\+\+,2--":\s([\d\.]+)/ - if (undetermined_matcher) undetermined = undetermined_matcher[0][1].toFloat() * 100 - if (se_sense_matcher) sense = se_sense_matcher[0][1].toFloat() * 100 - if (se_antisense_matcher) antisense = se_antisense_matcher[0][1].toFloat() * 100 - if (pe_sense_matcher) sense = pe_sense_matcher[0][1].toFloat() * 100 - if (pe_antisense_matcher) antisense = pe_antisense_matcher[0][1].toFloat() * 100 - } - def strandedness = 'unstranded' - if (sense >= 100-cutoff) { - strandedness = 'forward' - } else if (antisense >= 100-cutoff) { - strandedness = 'reverse' - } - return [ strandedness, sense, antisense, undetermined ] - } - - // - // Get workflow summary for MultiQC - // - public static String paramsSummaryMultiqc(workflow, summary) { - String summary_section = '' - for (group in summary.keySet()) { - def group_params = summary.get(group) // This gets the parameters of that particular group - if (group_params) { - summary_section += "

$group

\n" - summary_section += "
\n" - for (param in group_params.keySet()) { - summary_section += "
$param
${group_params.get(param) ?: 'N/A'}
\n" - } - summary_section += "
\n" - } - } - - String yaml_file_text = "id: '${workflow.manifest.name.replace('/','-')}-summary'\n" - yaml_file_text += "description: ' - this information is collected when the pipeline is started.'\n" - yaml_file_text += "section_name: '${workflow.manifest.name} Workflow Summary'\n" - yaml_file_text += "section_href: 'https://github.com/${workflow.manifest.name}'\n" - yaml_file_text += "plot_type: 'html'\n" - yaml_file_text += "data: |\n" - yaml_file_text += "${summary_section}" - return yaml_file_text - } - - // - // Generate methods description for MultiQC - // - public static String toolCitationText(params) { - // TODO nf-core: Optionally add in-text citation tools to this list. - // Can use ternary operators to dynamically construct based conditions, e.g. params["run_xyz"] ? "Tool (Foo et al. 2023)" : "", - // Uncomment function in methodsDescriptionText to render in MultiQC report - def citation_text = [ - "Tools used in the workflow included:", - "FastQC (Andrews 2010),", - "MultiQC (Ewels et al. 2016)", - "." - ].join(' ').trim() - - return citation_text - } - - public static String toolBibliographyText(params) { - // TODO Optionally add bibliographic entries to this list. - // Can use ternary operators to dynamically construct based conditions, e.g. params["run_xyz"] ? "
  • Author (2023) Pub name, Journal, DOI
  • " : "", - // Uncomment function in methodsDescriptionText to render in MultiQC report - def reference_text = [ - "
  • Andrews S, (2010) FastQC, URL: https://www.bioinformatics.babraham.ac.uk/projects/fastqc/).
  • ", - "
  • Ewels, P., Magnusson, M., Lundin, S., & Käller, M. (2016). MultiQC: summarize analysis results for multiple tools and samples in a single report. Bioinformatics , 32(19), 3047–3048. doi: /10.1093/bioinformatics/btw354
  • " - ].join(' ').trim() - - return reference_text - } - - public static String methodsDescriptionText(run_workflow, mqc_methods_yaml, params) { - // Convert to a named map so can be used as with familar NXF ${workflow} variable syntax in the MultiQC YML file - def meta = [:] - meta.workflow = run_workflow.toMap() - meta["manifest_map"] = run_workflow.manifest.toMap() - - // Pipeline DOI - meta["doi_text"] = meta.manifest_map.doi ? "(doi: ${meta.manifest_map.doi})" : "" - meta["nodoi_text"] = meta.manifest_map.doi ? "": "
  • If available, make sure to update the text to include the Zenodo DOI of version of the pipeline used.
  • " - - // Tool references - meta["tool_citations"] = "" - meta["tool_bibliography"] = "" - - // TODO Only uncomment below if logic in toolCitationText/toolBibliographyText has been filled! - //meta["tool_citations"] = toolCitationText(params).replaceAll(", \\.", ".").replaceAll("\\. \\.", ".").replaceAll(", \\.", ".") - //meta["tool_bibliography"] = toolBibliographyText(params) - - def methods_text = mqc_methods_yaml.text - - def engine = new SimpleTemplateEngine() - def description_html = engine.createTemplate(methods_text).make(meta) - - return description_html - } - - // - // Create MultiQC tsv custom content from a list of values - // - public static String multiqcTsvFromList(tsv_data, header) { - def tsv_string = "" - if (tsv_data.size() > 0) { - tsv_string += "${header.join('\t')}\n" - tsv_string += tsv_data.join('\n') - } - return tsv_string - } - - // - // Exit pipeline if incorrect --genome key provided - // - private static void genomeExistsError(params, log) { - if (params.genomes && params.genome && !params.genomes.containsKey(params.genome)) { - def error_string = "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n" + - " Genome '${params.genome}' not found in any config files provided to the pipeline.\n" + - " Currently, the available genome keys are:\n" + - " ${params.genomes.keySet().join(", ")}\n" + - "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" - Nextflow.error(error_string) - } - } - - // - // Print a warning if using GRCh38 assembly from igenomes.config - // - private static void ncbiGenomeWarn(log) { - log.warn "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n" + - " When using '--genome GRCh38' the assembly is from the NCBI and NOT Ensembl.\n" + - " Biotype QC will be skipped to circumvent the issue below:\n" + - " https://github.com/nf-core/rnaseq/issues/460\n\n" + - " If you would like to use the soft-masked Ensembl assembly instead please see:\n" + - " https://github.com/nf-core/rnaseq/issues/159#issuecomment-501184312\n" + - "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" - } - - // - // Print a warning if using a UCSC assembly from igenomes.config - // - private static void ucscGenomeWarn(log) { - log.warn "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n" + - " When using UCSC assemblies the 'gene_biotype' field is absent from the GTF file.\n" + - " Biotype QC will be skipped to circumvent the issue below:\n" + - " https://github.com/nf-core/rnaseq/issues/460\n\n" + - " If you would like to use the soft-masked Ensembl assembly instead please see:\n" + - " https://github.com/nf-core/rnaseq/issues/159#issuecomment-501184312\n" + - "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" - } - - // - // Print a warning if both GTF and GFF have been provided - // - private static void gtfGffWarn(log) { - log.warn "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n" + - " Both '--gtf' and '--gff' parameters have been provided.\n" + - " Using GTF file as priority.\n" + - "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" - } - - // - // Print a warning if using '--transcript_fasta' - // - private static void transcriptsFastaWarn(log) { - log.warn "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n" + - " '--transcript_fasta' parameter has been provided.\n" + - " Make sure transcript names in this file match those in the GFF/GTF file.\n\n" + - " Please see:\n" + - " https://github.com/nf-core/rnaseq/issues/753\n" + - "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" - } - - // - // Print a warning if --skip_alignment has been provided - // - private static void skipAlignmentWarn(log) { - log.warn "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n" + - " '--skip_alignment' parameter has been provided.\n" + - " Skipping alignment, genome-based quantification and all downstream QC processes.\n" + - "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" - } - - // - // Print a warning if using '--aligner star_rsem' and '--with_umi' - // - private static void rsemUmiError(log) { - def error_string = "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n" + - " When using '--aligner star_rsem', STAR is run by RSEM itself and so it is\n" + - " not possible to remove UMIs before the quantification.\n\n" + - " If you would like to remove UMI barcodes using the '--with_umi' option\n" + - " please use either '--aligner star_salmon' or '--aligner hisat2'.\n" + - "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" - Nextflow.error(error_string) - } - - // - // Print a warning if using '--aligner star_rsem' and providing both '--rsem_index' and '--star_index' - // - private static void rsemStarIndexWarn(log) { - log.warn "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n" + - " When using '--aligner star_rsem', both the STAR and RSEM indices should\n" + - " be present in the path specified by '--rsem_index'.\n\n" + - " This warning has been generated because you have provided both\n" + - " '--rsem_index' and '--star_index'. The pipeline will ignore the latter.\n\n" + - " Please see:\n" + - " https://github.com/nf-core/rnaseq/issues/568\n" + - "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" - } - - // - // Print a warning if using '--aligner star_rsem' and providing '--star_extra_alignment_args' - // - private static void rsemStarExtraArgumentsWarn(log) { - log.warn "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n" + - " No additional arguments can be passed to STAR when using RSEM.\n" + - " Because RSEM enforces its own parameters for STAR, any extra arguments\n" + - " to STAR will be ignored. Alternatively, choose the STAR+Salmon route.\n\n" + - " This warning has been generated because you have provided both\n" + - " '--aligner star_rsem' and '--extra_star_align_args'.\n\n" + - "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" - } - - // - // Print a warning if using '--additional_fasta' and '--_index' - // - private static void additionaFastaIndexWarn(index, log) { - log.warn "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n" + - " When using '--additional_fasta ' the aligner index will not\n" + - " be re-built with the transgenes incorporated by default since you have \n" + - " already provided an index via '--${index}_index '.\n\n" + - " Set '--additional_fasta --${index}_index false --gene_bed false --save_reference'\n" + - " to re-build the index with transgenes included and the index and gene BED file will be saved in\n" + - " 'results/genome/index/${index}/' for re-use with '--${index}_index'.\n\n" + - " Ignore this warning if you know that the index already contains transgenes.\n\n" + - " Please see:\n" + - " https://github.com/nf-core/rnaseq/issues/556\n" + - "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" - } -} diff --git a/lib/nfcore_external_java_deps.jar b/lib/nfcore_external_java_deps.jar deleted file mode 100644 index 805c8bb5e..000000000 Binary files a/lib/nfcore_external_java_deps.jar and /dev/null differ diff --git a/main.nf b/main.nf index 69ae6c3b3..607d02e73 100755 --- a/main.nf +++ b/main.nf @@ -13,74 +13,122 @@ nextflow.enable.dsl = 2 /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - GENOME PARAMETER VALUES + IMPORT FUNCTIONS / MODULES / SUBWORKFLOWS / WORKFLOWS ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -params.fasta = WorkflowMain.getGenomeAttribute(params, 'fasta') -params.transcript_fasta = WorkflowMain.getGenomeAttribute(params, 'transcript_fasta') -params.additional_fasta = WorkflowMain.getGenomeAttribute(params, 'additional_fasta') -params.gtf = WorkflowMain.getGenomeAttribute(params, 'gtf') -params.gff = WorkflowMain.getGenomeAttribute(params, 'gff') -params.gene_bed = WorkflowMain.getGenomeAttribute(params, 'bed12') -params.bbsplit_index = WorkflowMain.getGenomeAttribute(params, 'bbsplit') -params.star_index = WorkflowMain.getGenomeAttribute(params, 'star') -params.hisat2_index = WorkflowMain.getGenomeAttribute(params, 'hisat2') -params.rsem_index = WorkflowMain.getGenomeAttribute(params, 'rsem') -params.salmon_index = WorkflowMain.getGenomeAttribute(params, 'salmon') -params.kallisto_index = WorkflowMain.getGenomeAttribute(params, 'kallisto') +include { PREPARE_GENOME } from './subworkflows/local/prepare_genome' +include { NFCORE_RNASEQ } from './workflows/rnaseq' +include { PIPELINE_INITIALISATION } from './subworkflows/local/utils_nfcore_rnaseq_pipeline' +include { PIPELINE_COMPLETION } from './subworkflows/local/utils_nfcore_rnaseq_pipeline' +include { getGenomeAttribute } from './subworkflows/local/utils_nfcore_rnaseq_pipeline' +include { checkMaxContigSize } from './subworkflows/local/utils_nfcore_rnaseq_pipeline' /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - VALIDATE & PRINT PARAMETER SUMMARY + GENOME PARAMETER VALUES ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -include { validateParameters; paramsHelp } from 'plugin/nf-validation' - -// Print help message if needed -if (params.help) { - def logo = NfcoreTemplate.logo(workflow, params.monochrome_logs) - def citation = '\n' + WorkflowMain.citation(workflow) + '\n' - def String command = "nextflow run ${workflow.manifest.name} --input samplesheet.csv --genome GRCh37 -profile docker" - log.info logo + paramsHelp(command) + citation + NfcoreTemplate.dashedLine(params.monochrome_logs) - System.exit(0) -} - -// Validate input parameters -if (params.validate_params) { - validateParameters() -} - -WorkflowMain.initialise(workflow, params, log) +params.fasta = getGenomeAttribute('fasta') +params.transcript_fasta = getGenomeAttribute('transcript_fasta') +params.additional_fasta = getGenomeAttribute('additional_fasta') +params.gtf = getGenomeAttribute('gtf') +params.gff = getGenomeAttribute('gff') +params.gene_bed = getGenomeAttribute('bed12') +params.bbsplit_index = getGenomeAttribute('bbsplit') +params.star_index = getGenomeAttribute('star') +params.hisat2_index = getGenomeAttribute('hisat2') +params.rsem_index = getGenomeAttribute('rsem') +params.salmon_index = getGenomeAttribute('salmon') +params.kallisto_index = getGenomeAttribute('kallisto') /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - NAMED WORKFLOW FOR PIPELINE + RUN MAIN WORKFLOW ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -include { RNASEQ } from './workflows/rnaseq' +workflow { -// -// WORKFLOW: Run main nf-core/rnaseq analysis pipeline -// -workflow NFCORE_RNASEQ { - RNASEQ () -} + main: + ch_versions = Channel.empty() -/* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - RUN ALL WORKFLOWS -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -*/ + // + // SUBWORKFLOW: Run initialisation tasks + // + PIPELINE_INITIALISATION () -// -// WORKFLOW: Execute a single named workflow for the pipeline -// See: https://github.com/nf-core/rnaseq/issues/619 -// -workflow { - NFCORE_RNASEQ () + // + // SUBWORKFLOW: Prepare reference genome files + // + PREPARE_GENOME ( + params.fasta, + params.gtf, + params.gff, + params.additional_fasta, + params.transcript_fasta, + params.gene_bed, + params.splicesites, + params.bbsplit_fasta_list, + params.star_index, + params.rsem_index, + params.salmon_index, + params.kallisto_index, + params.hisat2_index, + params.bbsplit_index, + params.gencode, + params.featurecounts_group_type, + params.aligner, + params.pseudo_aligner, + params.skip_gtf_filter, + params.skip_bbsplit, + params.skip_alignment, + params.skip_pseudo_alignment + ) + ch_versions = ch_versions.mix(PREPARE_GENOME.out.versions) + + // Check if contigs in genome fasta file > 512 Mbp + if (!params.skip_alignment && !params.bam_csi_index) { + PREPARE_GENOME + .out + .fai + .map { checkMaxContigSize(it) } + } + + // + // WORKFLOW: Run nf-core/rnaseq workflow + // + NFCORE_RNASEQ ( + PIPELINE_INITIALISATION.out.samplesheet, + ch_versions, + PREPARE_GENOME.out.fasta, + PREPARE_GENOME.out.gtf, + PREPARE_GENOME.out.fai, + PREPARE_GENOME.out.chrom_sizes, + PREPARE_GENOME.out.gene_bed, + PREPARE_GENOME.out.transcript_fasta, + PREPARE_GENOME.out.star_index, + PREPARE_GENOME.out.rsem_index, + PREPARE_GENOME.out.hisat2_index, + PREPARE_GENOME.out.salmon_index, + PREPARE_GENOME.out.kallisto_index, + PREPARE_GENOME.out.bbsplit_index, + PREPARE_GENOME.out.splicesites + ) + ch_versions = ch_versions.mix(NFCORE_RNASEQ.out.versions) + + // + // SUBWORKFLOW: Run completion tasks + // + PIPELINE_COMPLETION ( + params.email, + params.email_on_fail, + params.plaintext_email, + params.outdir, + params.monochrome_logs, + params.hook_url + ) } /* diff --git a/modules.json b/modules.json index 67970abf1..537a5e175 100644 --- a/modules.json +++ b/modules.json @@ -15,12 +15,6 @@ "git_sha": "5c460c5a4736974abde2843294f35307ee2b0e5e", "installed_by": ["modules"] }, - "custom/dumpsoftwareversions": { - "branch": "master", - "git_sha": "bba7e362e4afead70653f84d8700588ea28d0f9e", - "installed_by": ["modules"], - "patch": "modules/nf-core/custom/dumpsoftwareversions/custom-dumpsoftwareversions.diff" - }, "custom/getchromsizes": { "branch": "master", "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", @@ -78,6 +72,11 @@ "git_sha": "bdc2a97ced7adc423acfa390742db83cab98c1ad", "installed_by": ["modules"] }, + "multiqc": { + "branch": "master", + "git_sha": "8ec825f465b9c17f9d83000022995b4f7de6fe93", + "installed_by": ["modules"] + }, "picard/markduplicates": { "branch": "master", "git_sha": "2ee934606f1fdf7fc1cb05d6e8abc13bec8ab448", @@ -304,6 +303,21 @@ "branch": "master", "git_sha": "dedc0e31087f3306101c38835d051bf49789445a", "installed_by": ["subworkflows"] + }, + "utils_nextflow_pipeline": { + "branch": "master", + "git_sha": "cd08c91373cd00a73255081340e4914485846ba1", + "installed_by": ["subworkflows"] + }, + "utils_nfcore_pipeline": { + "branch": "master", + "git_sha": "cd08c91373cd00a73255081340e4914485846ba1", + "installed_by": ["subworkflows"] + }, + "utils_nfvalidation_plugin": { + "branch": "master", + "git_sha": "cd08c91373cd00a73255081340e4914485846ba1", + "installed_by": ["subworkflows"] } } } diff --git a/modules/local/multiqc/main.nf b/modules/local/multiqc/main.nf deleted file mode 100644 index be0dfe2c3..000000000 --- a/modules/local/multiqc/main.nf +++ /dev/null @@ -1,74 +0,0 @@ -process MULTIQC { - label 'process_medium' - - conda "bioconda::multiqc=1.19" - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/multiqc:1.19--pyhdfd78af_0' : - 'biocontainers/multiqc:1.19--pyhdfd78af_0' }" - - input: - path multiqc_config - path multiqc_custom_config - path software_versions - path workflow_summary - path methods_description - path logo - path fail_trimming_summary - path fail_mapping_summary - path fail_strand_check - path ('fastqc/raw/*') - path ('fastqc/trim/*') - path ('trim_log/*') - path ('sortmerna/*') - path ('star/*') - path ('hisat2/*') - path ('rsem/*') - path ('pseudoalignment/*') - path ('samtools/stats/*') - path ('samtools/flagstat/*') - path ('samtools/idxstats/*') - path ('picard/markduplicates/*') - path ('featurecounts/*') - path ('deseq2/aligner/*') - path ('deseq2/aligner/*') - path ('deseq2/pseudoaligner/*') - path ('deseq2/pseudoaligner/*') - path ('preseq/*') - path ('qualimap/*') - path ('dupradar/*') - path ('rseqc/bam_stat/*') - path ('rseqc/infer_experiment/*') - path ('rseqc/inner_distance/*') - path ('rseqc/junction_annotation/*') - path ('rseqc/junction_saturation/*') - path ('rseqc/read_distribution/*') - path ('rseqc/read_duplication/*') - path ('rseqc/tin/*') - - output: - path "*multiqc_report.html", emit: report - path "*_data" , emit: data - path "*_plots" , optional:true, emit: plots - path "versions.yml" , emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - def args = task.ext.args ?: '' - def custom_config = params.multiqc_config ? "--config $multiqc_custom_config" : '' - prefix = task.ext.prefix ?: "multiqc_report" - """ - multiqc \\ - -n ${prefix}.html \\ - -f \\ - $args \\ - $custom_config \\ - . - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - multiqc: \$( multiqc --version | sed -e "s/multiqc, version //g" ) - END_VERSIONS - """ -} diff --git a/modules/nf-core/custom/dumpsoftwareversions/custom-dumpsoftwareversions.diff b/modules/nf-core/custom/dumpsoftwareversions/custom-dumpsoftwareversions.diff deleted file mode 100644 index 2ec89807d..000000000 --- a/modules/nf-core/custom/dumpsoftwareversions/custom-dumpsoftwareversions.diff +++ /dev/null @@ -1,19 +0,0 @@ -Changes in module 'nf-core/custom/dumpsoftwareversions' ---- modules/nf-core/custom/dumpsoftwareversions/main.nf -+++ modules/nf-core/custom/dumpsoftwareversions/main.nf -@@ -2,10 +2,10 @@ - label 'process_single' - - // Requires `pyyaml` which does not have a dedicated container but is in the MultiQC container -- conda "${moduleDir}/environment.yml" -+ conda "bioconda::multiqc=1.19" - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? -- 'https://depot.galaxyproject.org/singularity/multiqc:1.17--pyhdfd78af_0' : -- 'biocontainers/multiqc:1.17--pyhdfd78af_0' }" -+ 'https://depot.galaxyproject.org/singularity/multiqc:1.19--pyhdfd78af_0' : -+ 'biocontainers/multiqc:1.19--pyhdfd78af_0' }" - - input: - path versions - -************************************************************ diff --git a/modules/nf-core/custom/dumpsoftwareversions/main.nf b/modules/nf-core/custom/dumpsoftwareversions/main.nf deleted file mode 100644 index b2dcf480e..000000000 --- a/modules/nf-core/custom/dumpsoftwareversions/main.nf +++ /dev/null @@ -1,24 +0,0 @@ -process CUSTOM_DUMPSOFTWAREVERSIONS { - label 'process_single' - - // Requires `pyyaml` which does not have a dedicated container but is in the MultiQC container - conda "bioconda::multiqc=1.19" - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/multiqc:1.19--pyhdfd78af_0' : - 'biocontainers/multiqc:1.19--pyhdfd78af_0' }" - - input: - path versions - - output: - path "software_versions.yml" , emit: yml - path "software_versions_mqc.yml", emit: mqc_yml - path "versions.yml" , emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - def args = task.ext.args ?: '' - template 'dumpsoftwareversions.py' -} diff --git a/modules/nf-core/custom/dumpsoftwareversions/meta.yml b/modules/nf-core/custom/dumpsoftwareversions/meta.yml deleted file mode 100644 index 5f15a5fde..000000000 --- a/modules/nf-core/custom/dumpsoftwareversions/meta.yml +++ /dev/null @@ -1,37 +0,0 @@ -# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json -name: custom_dumpsoftwareversions -description: Custom module used to dump software versions within the nf-core pipeline template -keywords: - - custom - - dump - - version -tools: - - custom: - description: Custom module used to dump software versions within the nf-core pipeline template - homepage: https://github.com/nf-core/tools - documentation: https://github.com/nf-core/tools - licence: ["MIT"] -input: - - versions: - type: file - description: YML file containing software versions - pattern: "*.yml" -output: - - yml: - type: file - description: Standard YML file containing software versions - pattern: "software_versions.yml" - - mqc_yml: - type: file - description: MultiQC custom content YML file containing software versions - pattern: "software_versions_mqc.yml" - - versions: - type: file - description: File containing software versions - pattern: "versions.yml" -authors: - - "@drpatelh" - - "@grst" -maintainers: - - "@drpatelh" - - "@grst" diff --git a/modules/nf-core/custom/dumpsoftwareversions/templates/dumpsoftwareversions.py b/modules/nf-core/custom/dumpsoftwareversions/templates/dumpsoftwareversions.py deleted file mode 100755 index da0334085..000000000 --- a/modules/nf-core/custom/dumpsoftwareversions/templates/dumpsoftwareversions.py +++ /dev/null @@ -1,101 +0,0 @@ -#!/usr/bin/env python - - -"""Provide functions to merge multiple versions.yml files.""" - - -import yaml -import platform -from textwrap import dedent - - -def _make_versions_html(versions): - """Generate a tabular HTML output of all versions for MultiQC.""" - html = [ - dedent( - """\\ - - - - - - - - - - """ - ) - ] - for process, tmp_versions in sorted(versions.items()): - html.append("") - for i, (tool, version) in enumerate(sorted(tmp_versions.items())): - html.append( - dedent( - f"""\\ - - - - - - """ - ) - ) - html.append("") - html.append("
    Process Name Software Version
    {process if (i == 0) else ''}{tool}{version}
    ") - return "\\n".join(html) - - -def main(): - """Load all version files and generate merged output.""" - versions_this_module = {} - versions_this_module["${task.process}"] = { - "python": platform.python_version(), - "yaml": yaml.__version__, - } - - with open("$versions") as f: - versions_by_process = yaml.load(f, Loader=yaml.BaseLoader) | versions_this_module - - # aggregate versions by the module name (derived from fully-qualified process name) - versions_by_module = {} - for process, process_versions in versions_by_process.items(): - module = process.split(":")[-1] - try: - if versions_by_module[module] != process_versions: - raise AssertionError( - "We assume that software versions are the same between all modules. " - "If you see this error-message it means you discovered an edge-case " - "and should open an issue in nf-core/tools. " - ) - except KeyError: - versions_by_module[module] = process_versions - - versions_by_module["Workflow"] = { - "Nextflow": "$workflow.nextflow.version", - "$workflow.manifest.name": "$workflow.manifest.version", - } - - versions_mqc = { - "id": "software_versions", - "section_name": "${workflow.manifest.name} Software Versions", - "section_href": "https://github.com/${workflow.manifest.name}", - "plot_type": "html", - "description": "are collected at run time from the software output.", - "data": _make_versions_html(versions_by_module), - } - - with open("software_versions.yml", "w") as f: - yaml.dump(versions_by_module, f, default_flow_style=False) - with open("software_versions_mqc.yml", "w") as f: - yaml.dump(versions_mqc, f, default_flow_style=False) - - with open("versions.yml", "w") as f: - yaml.dump(versions_this_module, f, default_flow_style=False) - - -if __name__ == "__main__": - main() diff --git a/modules/nf-core/custom/dumpsoftwareversions/tests/main.nf.test b/modules/nf-core/custom/dumpsoftwareversions/tests/main.nf.test deleted file mode 100644 index eec1db10a..000000000 --- a/modules/nf-core/custom/dumpsoftwareversions/tests/main.nf.test +++ /dev/null @@ -1,38 +0,0 @@ -nextflow_process { - - name "Test Process CUSTOM_DUMPSOFTWAREVERSIONS" - script "../main.nf" - process "CUSTOM_DUMPSOFTWAREVERSIONS" - tag "modules" - tag "modules_nfcore" - tag "custom" - tag "dumpsoftwareversions" - tag "custom/dumpsoftwareversions" - - test("Should run without failures") { - when { - process { - """ - def tool1_version = ''' - TOOL1: - tool1: 0.11.9 - '''.stripIndent() - - def tool2_version = ''' - TOOL2: - tool2: 1.9 - '''.stripIndent() - - input[0] = Channel.of(tool1_version, tool2_version).collectFile() - """ - } - } - - then { - assertAll( - { assert process.success }, - { assert snapshot(process.out).match() } - ) - } - } -} diff --git a/modules/nf-core/custom/dumpsoftwareversions/tests/main.nf.test.snap b/modules/nf-core/custom/dumpsoftwareversions/tests/main.nf.test.snap deleted file mode 100644 index 4274ed57a..000000000 --- a/modules/nf-core/custom/dumpsoftwareversions/tests/main.nf.test.snap +++ /dev/null @@ -1,27 +0,0 @@ -{ - "Should run without failures": { - "content": [ - { - "0": [ - "software_versions.yml:md5,1c851188476409cda5752ce971b20b58" - ], - "1": [ - "software_versions_mqc.yml:md5,2570f4ba271ad08357b0d3d32a9cf84d" - ], - "2": [ - "versions.yml:md5,3843ac526e762117eedf8825b40683df" - ], - "mqc_yml": [ - "software_versions_mqc.yml:md5,2570f4ba271ad08357b0d3d32a9cf84d" - ], - "versions": [ - "versions.yml:md5,3843ac526e762117eedf8825b40683df" - ], - "yml": [ - "software_versions.yml:md5,1c851188476409cda5752ce971b20b58" - ] - } - ], - "timestamp": "2023-11-03T14:43:22.157011" - } -} diff --git a/modules/nf-core/custom/dumpsoftwareversions/tests/tags.yml b/modules/nf-core/custom/dumpsoftwareversions/tests/tags.yml deleted file mode 100644 index 405aa24ae..000000000 --- a/modules/nf-core/custom/dumpsoftwareversions/tests/tags.yml +++ /dev/null @@ -1,2 +0,0 @@ -custom/dumpsoftwareversions: - - modules/nf-core/custom/dumpsoftwareversions/** diff --git a/modules/nf-core/custom/dumpsoftwareversions/environment.yml b/modules/nf-core/multiqc/environment.yml similarity index 51% rename from modules/nf-core/custom/dumpsoftwareversions/environment.yml rename to modules/nf-core/multiqc/environment.yml index f0c63f698..7625b7520 100644 --- a/modules/nf-core/custom/dumpsoftwareversions/environment.yml +++ b/modules/nf-core/multiqc/environment.yml @@ -1,7 +1,7 @@ -name: custom_dumpsoftwareversions +name: multiqc channels: - conda-forge - bioconda - defaults dependencies: - - bioconda::multiqc=1.17 + - bioconda::multiqc=1.19 diff --git a/modules/nf-core/multiqc/main.nf b/modules/nf-core/multiqc/main.nf new file mode 100644 index 000000000..1b9f7c431 --- /dev/null +++ b/modules/nf-core/multiqc/main.nf @@ -0,0 +1,55 @@ +process MULTIQC { + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/multiqc:1.19--pyhdfd78af_0' : + 'biocontainers/multiqc:1.19--pyhdfd78af_0' }" + + input: + path multiqc_files, stageAs: "?/*" + path(multiqc_config) + path(extra_multiqc_config) + path(multiqc_logo) + + output: + path "*multiqc_report.html", emit: report + path "*_data" , emit: data + path "*_plots" , optional:true, emit: plots + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def config = multiqc_config ? "--config $multiqc_config" : '' + def extra_config = extra_multiqc_config ? "--config $extra_multiqc_config" : '' + def logo = multiqc_logo ? /--cl-config 'custom_logo: "${multiqc_logo}"'/ : '' + """ + multiqc \\ + --force \\ + $args \\ + $config \\ + $extra_config \\ + $logo \\ + . + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + multiqc: \$( multiqc --version | sed -e "s/multiqc, version //g" ) + END_VERSIONS + """ + + stub: + """ + mkdir multiqc_data + touch multiqc_plots + touch multiqc_report.html + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + multiqc: \$( multiqc --version | sed -e "s/multiqc, version //g" ) + END_VERSIONS + """ +} diff --git a/modules/nf-core/multiqc/meta.yml b/modules/nf-core/multiqc/meta.yml new file mode 100644 index 000000000..45a9bc35e --- /dev/null +++ b/modules/nf-core/multiqc/meta.yml @@ -0,0 +1,58 @@ +name: multiqc +description: Aggregate results from bioinformatics analyses across many samples into a single report +keywords: + - QC + - bioinformatics tools + - Beautiful stand-alone HTML report +tools: + - multiqc: + description: | + MultiQC searches a given directory for analysis logs and compiles a HTML report. + It's a general use tool, perfect for summarising the output from numerous bioinformatics tools. + homepage: https://multiqc.info/ + documentation: https://multiqc.info/docs/ + licence: ["GPL-3.0-or-later"] +input: + - multiqc_files: + type: file + description: | + List of reports / files recognised by MultiQC, for example the html and zip output of FastQC + - multiqc_config: + type: file + description: Optional config yml for MultiQC + pattern: "*.{yml,yaml}" + - extra_multiqc_config: + type: file + description: Second optional config yml for MultiQC. Will override common sections in multiqc_config. + pattern: "*.{yml,yaml}" + - multiqc_logo: + type: file + description: Optional logo file for MultiQC + pattern: "*.{png}" +output: + - report: + type: file + description: MultiQC report file + pattern: "multiqc_report.html" + - data: + type: directory + description: MultiQC data dir + pattern: "multiqc_data" + - plots: + type: file + description: Plots created by MultiQC + pattern: "*_data" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@abhi18av" + - "@bunop" + - "@drpatelh" + - "@jfy133" +maintainers: + - "@abhi18av" + - "@bunop" + - "@drpatelh" + - "@jfy133" diff --git a/modules/nf-core/multiqc/tests/main.nf.test b/modules/nf-core/multiqc/tests/main.nf.test new file mode 100644 index 000000000..d0438eda6 --- /dev/null +++ b/modules/nf-core/multiqc/tests/main.nf.test @@ -0,0 +1,83 @@ +nextflow_process { + + name "Test Process MULTIQC" + script "../main.nf" + process "MULTIQC" + tag "modules" + tag "modules_nfcore" + tag "multiqc" + + test("sarscov2 single-end [fastqc]") { + + when { + process { + """ + input[0] = Channel.of([file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz_fastqc_zip'], checkIfExists: true)]) + input[1] = [] + input[2] = [] + input[3] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert process.out.report[0] ==~ ".*/multiqc_report.html" }, + { assert process.out.data[0] ==~ ".*/multiqc_data" }, + { assert snapshot(process.out.versions).match("versions") } + ) + } + + } + + test("sarscov2 single-end [fastqc] [config]") { + + when { + process { + """ + input[0] = Channel.of([file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz_fastqc_zip'], checkIfExists: true)]) + input[1] = Channel.of(file("https://github.com/nf-core/tools/raw/dev/nf_core/pipeline-template/assets/multiqc_config.yml", checkIfExists: true)) + input[2] = [] + input[3] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert process.out.report[0] ==~ ".*/multiqc_report.html" }, + { assert process.out.data[0] ==~ ".*/multiqc_data" }, + { assert snapshot(process.out.versions).match("versions") } + ) + } + } + + test("sarscov2 single-end [fastqc] - stub") { + + options "-stub" + + when { + process { + """ + input[0] = Channel.of([file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz_fastqc_zip'], checkIfExists: true)]) + input[1] = [] + input[2] = [] + input[3] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.report.collect { file(it).getName() } + + process.out.data.collect { file(it).getName() } + + process.out.plots.collect { file(it).getName() } + + process.out.versions ).match() } + ) + } + + } +} diff --git a/modules/nf-core/multiqc/tests/main.nf.test.snap b/modules/nf-core/multiqc/tests/main.nf.test.snap new file mode 100644 index 000000000..d37e73040 --- /dev/null +++ b/modules/nf-core/multiqc/tests/main.nf.test.snap @@ -0,0 +1,21 @@ +{ + "versions": { + "content": [ + [ + "versions.yml:md5,14e9a2661241abd828f4f06a7b5c222d" + ] + ], + "timestamp": "2024-01-09T23:02:49.911994" + }, + "sarscov2 single-end [fastqc] - stub": { + "content": [ + [ + "multiqc_report.html", + "multiqc_data", + "multiqc_plots", + "versions.yml:md5,14e9a2661241abd828f4f06a7b5c222d" + ] + ], + "timestamp": "2024-01-09T23:03:14.524346" + } +} \ No newline at end of file diff --git a/modules/nf-core/multiqc/tests/tags.yml b/modules/nf-core/multiqc/tests/tags.yml new file mode 100644 index 000000000..bea6c0d37 --- /dev/null +++ b/modules/nf-core/multiqc/tests/tags.yml @@ -0,0 +1,2 @@ +multiqc: + - modules/nf-core/multiqc/** diff --git a/nextflow.config b/nextflow.config index c9c9cb08d..c4e5a8a30 100644 --- a/nextflow.config +++ b/nextflow.config @@ -54,7 +54,7 @@ params { // Ribosomal RNA removal remove_ribo_rna = false save_non_ribo_reads = false - ribo_database_manifest = "${projectDir}/assets/rrna-db-defaults.txt" + ribo_database_manifest = "${projectDir}/workflows/rnaseq/assets/rrna-db-defaults.txt" // Alignment aligner = 'star_salmon' @@ -119,7 +119,6 @@ params { custom_config_base = "https://raw.githubusercontent.com/nf-core/configs/${params.custom_config_version}" config_profile_contact = null config_profile_url = null - test_data_base = 'https://raw.githubusercontent.com/nf-core/test-datasets/rnaseq3' // Max resource options // Defaults only, expecting to be overwritten @@ -245,7 +244,6 @@ profiles { executor.memory = 8.GB } test { includeConfig 'conf/test.config' } - test_cache { includeConfig 'conf/test_cache.config' } test_full { includeConfig 'conf/test_full.config' } test_full_aws { includeConfig 'conf/test_full.config' diff --git a/nextflow_schema.json b/nextflow_schema.json index 42e4e8967..0cd5df6e0 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -278,7 +278,7 @@ "format": "file-path", "exists": true, "mimetype": "text/plain", - "default": "${projectDir}/assets/rrna-db-defaults.txt", + "default": "${projectDir}/workflows/rnaseq/assets/rrna-db-defaults.txt", "fa_icon": "fas fa-database", "description": "Text file containing paths to fasta files (one per line) that will be used to create the database for SortMeRNA.", "help_text": "By default, [rRNA databases](https://github.com/biocore/sortmerna/tree/master/data/rRNA_databases) defined in the SortMeRNA GitHub repo are used. You can see an example in the pipeline Github repository in `assets/rrna-default-dbs.txt`.\nPlease note that commercial/non-academic entities require [`licensing for SILVA`](https://www.arb-silva.de/silva-license-information) for these default databases." @@ -672,13 +672,6 @@ "description": "Institutional config URL link.", "hidden": true, "fa_icon": "fas fa-users-cog" - }, - "test_data_base": { - "type": "string", - "default": "https://raw.githubusercontent.com/nf-core/test-datasets/rnaseq3", - "description": "Base path / URL for data used in the test profiles", - "help_text": "Warning: The `-profile test` samplesheet file itself contains remote paths. Setting this parameter does not alter the contents of that file.", - "hidden": true } } }, diff --git a/pyproject.toml b/pyproject.toml index bc01239b3..0d62beb6f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,4 +1,4 @@ -# Config file for Python. Mostly used to configure linting of bin/*.py with Black. +# Config file for Python. Mostly used to configure linting of bin/check_samplesheet.py with Black. # Should be kept the same as nf-core/tools to avoid fighting with template synchronisation. [tool.black] line-length = 120 diff --git a/subworkflows/local/prepare_genome/main.nf b/subworkflows/local/prepare_genome/main.nf index 0840c7734..ace271641 100644 --- a/subworkflows/local/prepare_genome/main.nf +++ b/subworkflows/local/prepare_genome/main.nf @@ -35,28 +35,30 @@ include { STAR_GENOMEGENERATE_IGENOMES } from '../../../modules/local/st workflow PREPARE_GENOME { take: - fasta // file: /path/to/genome.fasta - gtf // file: /path/to/genome.gtf - gff // file: /path/to/genome.gff - additional_fasta // file: /path/to/additional.fasta - transcript_fasta // file: /path/to/transcript.fasta - gene_bed // file: /path/to/gene.bed - splicesites // file: /path/to/splicesites.txt - bbsplit_fasta_list // file: /path/to/bbsplit_fasta_list.txt - star_index // directory: /path/to/star/index/ - rsem_index // directory: /path/to/rsem/index/ - salmon_index // directory: /path/to/salmon/index/ - kallisto_index // directory: /path/to/kallisto/index/ - hisat2_index // directory: /path/to/hisat2/index/ - bbsplit_index // directory: /path/to/rsem/index/ - gencode // boolean: whether the genome is from GENCODE - is_aws_igenome // boolean: whether the genome files are from AWS iGenomes - biotype // string: if additional fasta file is provided biotype value to use when appending entries to GTF file - prepare_tool_indices // list: tools to prepare indices for - filter_gtf // boolean: whether to filter GTF file + fasta // file: /path/to/genome.fasta + gtf // file: /path/to/genome.gtf + gff // file: /path/to/genome.gff + additional_fasta // file: /path/to/additional.fasta + transcript_fasta // file: /path/to/transcript.fasta + gene_bed // file: /path/to/gene.bed + splicesites // file: /path/to/splicesites.txt + bbsplit_fasta_list // file: /path/to/bbsplit_fasta_list.txt + star_index // directory: /path/to/star/index/ + rsem_index // directory: /path/to/rsem/index/ + salmon_index // directory: /path/to/salmon/index/ + kallisto_index // directory: /path/to/kallisto/index/ + hisat2_index // directory: /path/to/hisat2/index/ + bbsplit_index // directory: /path/to/rsem/index/ + gencode // boolean: whether the genome is from GENCODE + featurecounts_group_type // string: The attribute type used to group feature types in the GTF file when generating the biotype plot with featureCounts + aligner // string: Specifies the alignment algorithm to use - available options are 'star_salmon', 'star_rsem' and 'hisat2' + pseudo_aligner // string: Specifies the pseudo aligner to use - available options are 'salmon'. Runs in addition to '--aligner' + skip_gtf_filter // boolean: Skip filtering of GTF for valid scaffolds and/ or transcript IDs + skip_bbsplit // boolean: Skip BBSplit for removal of non-reference genome reads + skip_alignment // boolean: Skip all of the alignment-based processes within the pipeline + skip_pseudo_alignment // boolean: Skip all of the pseudoalignment-based processes within the pipeline main: - ch_versions = Channel.empty() // @@ -91,6 +93,24 @@ workflow PREPARE_GENOME { ch_versions = ch_versions.mix(GFFREAD.out.versions) } + // Determine whether to filter the GTF or not + def filter_gtf = + (( + // Condition 1: Alignment is required and aligner is set + !skip_alignment && aligner + ) || + ( + // Condition 2: Pseudoalignment is required and pseudoaligner is set + !skip_pseudo_alignment && pseudo_aligner + ) || + ( + // Condition 3: Transcript FASTA file is not provided + !transcript_fasta + )) && + ( + // Condition 4: --skip_gtf_filter is not provided + !skip_gtf_filter + ) if (filter_gtf) { GTF_FILTER ( ch_fasta, ch_gtf ) ch_gtf = GTF_FILTER.out.genome_gtf @@ -101,6 +121,7 @@ workflow PREPARE_GENOME { // // Uncompress additional fasta file and concatenate with reference fasta and gtf files // + def biotype = gencode ? "gene_type" : featurecounts_group_type if (additional_fasta) { if (additional_fasta.endsWith('.gz')) { ch_add_fasta = GUNZIP_ADDITIONAL_FASTA ( [ [:], additional_fasta ] ).gunzip.map { it[1] } @@ -157,6 +178,14 @@ workflow PREPARE_GENOME { ch_chrom_sizes = CUSTOM_GETCHROMSIZES.out.sizes.map { it[1] } ch_versions = ch_versions.mix(CUSTOM_GETCHROMSIZES.out.versions) + // + // Get list of indices that need to be created + // + def prepare_tool_indices = [] + if (!skip_bbsplit) { prepare_tool_indices << 'bbsplit' } + if (!skip_alignment) { prepare_tool_indices << aligner } + if (!skip_pseudo_alignment && pseudo_aligner) { prepare_tool_indices << pseudo_aligner } + // // Uncompress BBSplit index or generate from scratch if required // @@ -197,6 +226,13 @@ workflow PREPARE_GENOME { ch_star_index = Channel.value(file(star_index)) } } else { + // Check if an AWS iGenome has been provided to use the appropriate version of STAR + def is_aws_igenome = false + if (fasta && gtf) { + if ((file(fasta).getName() - '.gz' == 'genome.fa') && (file(gtf).getName() - '.gz' == 'genes.gtf')) { + is_aws_igenome = true + } + } if (is_aws_igenome) { ch_star_index = STAR_GENOMEGENERATE_IGENOMES ( ch_fasta, ch_gtf ).index ch_versions = ch_versions.mix(STAR_GENOMEGENERATE_IGENOMES.out.versions) diff --git a/subworkflows/local/utils_nfcore_rnaseq_pipeline/main.nf b/subworkflows/local/utils_nfcore_rnaseq_pipeline/main.nf new file mode 100644 index 000000000..684325f6e --- /dev/null +++ b/subworkflows/local/utils_nfcore_rnaseq_pipeline/main.nf @@ -0,0 +1,528 @@ +// +// Subworkflow with functionality specific to the nf-core/rnaseq pipeline +// + +import groovy.json.JsonSlurper + +/* +======================================================================================== + IMPORT MODULES/SUBWORKFLOWS +======================================================================================== +*/ + +include { UTILS_NFVALIDATION_PLUGIN } from '../../nf-core/utils_nfvalidation_plugin' +include { fromSamplesheet } from 'plugin/nf-validation' +include { paramsSummaryMap } from 'plugin/nf-validation' +include { UTILS_NEXTFLOW_PIPELINE } from '../../nf-core/utils_nextflow_pipeline' +include { completionEmail } from '../../nf-core/utils_nfcore_pipeline' +include { completionSummary } from '../../nf-core/utils_nfcore_pipeline' +include { dashedLine } from '../../nf-core/utils_nfcore_pipeline' +include { nfCoreLogo } from '../../nf-core/utils_nfcore_pipeline' +include { imNotification } from '../../nf-core/utils_nfcore_pipeline' +include { UTILS_NFCORE_PIPELINE } from '../../nf-core/utils_nfcore_pipeline' +include { workflowCitation } from '../../nf-core/utils_nfcore_pipeline' + +/* +======================================================================================== + SUBWORKFLOW TO INITIALISE PIPELINE +======================================================================================== +*/ + +workflow PIPELINE_INITIALISATION { + + main: + + // + // Print version and exit if required and dump pipeline parameters to JSON file + // + UTILS_NEXTFLOW_PIPELINE ( + params.version, + true, + params.outdir, + workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1 + ) + + // + // Validate parameters and generate parameter summary to stdout + // + def pre_help_text = nfCoreLogo(params.monochrome_logs) + def post_help_text = '\n' + workflowCitation() + '\n' + dashedLine(params.monochrome_logs) + def String workflow_command = "nextflow run ${workflow.manifest.name} -profile --input samplesheet.csv --genome GRCh37 --outdir " + UTILS_NFVALIDATION_PLUGIN ( + params.help, + workflow_command, + pre_help_text, + post_help_text, + params.validate_params, + "nextflow_schema.json" + ) + + // + // Check config provided to the pipeline + // + UTILS_NFCORE_PIPELINE () + + // + // Custom validation for pipeline parameters + // + validateInputParameters() + + // + // Create channel from input file provided through params.input + // + Channel + .fromSamplesheet("input") + .map { + meta, fastq_1, fastq_2 -> + if (!fastq_2) { + return [ meta.id, meta + [ single_end:true ], [ fastq_1 ] ] + } else { + return [ meta.id, meta + [ single_end:false ], [ fastq_1, fastq_2 ] ] + } + } + .groupTuple() + .map { + validateInputSamplesheet(it) + } + .set { ch_samplesheet } + + emit: + samplesheet = ch_samplesheet +} + +/* +======================================================================================== + SUBWORKFLOW FOR PIPELINE COMPLETION +======================================================================================== +*/ + +workflow PIPELINE_COMPLETION { + + take: + email // string: email address + email_on_fail // string: email address sent on pipeline failure + plaintext_email // boolean: Send plain-text email instead of HTML + outdir // path: Path to output directory where results will be published + monochrome_logs // boolean: Disable ANSI colour codes in log output + hook_url // string: hook URL for notifications + + main: + + summary_params = paramsSummaryMap(workflow, parameters_schema: "nextflow_schema.json") + + // + // Completion email and summary + // + workflow.onComplete { + if (email || email_on_fail) { + completionEmail(summary_params, email, email_on_fail, plaintext_email, outdir, monochrome_logs) + } + + completionSummary(monochrome_logs) + + if (hook_url) { + imNotification(summary_params, hook_url) + } + } +} + +/* +======================================================================================== + FUNCTIONS +======================================================================================== +*/ + +// +// Function to validate channels from input samplesheet +// +def validateInputSamplesheet(input) { + def (metas, fastqs) = input[1..2] + + // Check that multiple runs of the same sample are of the same strandedness + def strandedness_ok = metas.collect{ it.strandedness }.unique().size == 1 + if (!strandedness_ok) { + error("Please check input samplesheet -> Multiple runs of a sample must have the same strandedness!: ${metas[0].id}") + } + + // Check that multiple runs of the same sample are of the same datatype i.e. single-end / paired-end + def endedness_ok = metas.collect{ it.single_end }.unique().size == 1 + if (!endedness_ok) { + error("Please check input samplesheet -> Multiple runs of a sample must be of the same datatype i.e. single-end or paired-end: ${metas[0].id}") + } + + return [ metas[0], fastqs ] +} + +// +// Check and validate pipeline parameters +// +def validateInputParameters() { + + genomeExistsError() + + if (!params.fasta) { + error("Genome fasta file not specified with e.g. '--fasta genome.fa' or via a detectable config file.") + } + + if (!params.gtf && !params.gff) { + error("No GTF or GFF3 annotation specified! The pipeline requires at least one of these files.") + } + + if (params.gtf) { + if (params.gff) { + gtfGffWarn() + } + if (params.genome == 'GRCh38' && params.gtf.contains('Homo_sapiens/NCBI/GRCh38/Annotation/Genes/genes.gtf')) { + ncbiGenomeWarn() + } + if (params.gtf.contains('/UCSC/') && params.gtf.contains('Annotation/Genes/genes.gtf')) { + ucscGenomeWarn() + } + } + + if (params.transcript_fasta) { + transcriptsFastaWarn() + } + + if (!params.skip_bbsplit && !params.bbsplit_index && !params.bbsplit_fasta_list) { + error("Please provide either --bbsplit_fasta_list / --bbsplit_index to run BBSplit.") + } + + if (params.remove_ribo_rna && !params.ribo_database_manifest) { + error("Please provide --ribo_database_manifest to remove ribosomal RNA with SortMeRNA.") + } + + if (params.with_umi && !params.skip_umi_extract) { + if (!params.umitools_bc_pattern && !params.umitools_bc_pattern2) { + error("UMI-tools requires a barcode pattern to extract barcodes from the reads.") + } + } + + if (params.skip_alignment) { + skipAlignmentWarn() + } + + if (!params.skip_pseudo_alignment && params.pseudo_aligner) { + if (!(params.salmon_index || params.transcript_fasta || (params.fasta && (params.gtf || params.gff)))) { + error("To use `--pseudo_aligner 'salmon'`, you must provide either --salmon_index or --transcript_fasta or both --fasta and --gtf / --gff.") + } + } + + // Checks when running --aligner star_rsem + if (!params.skip_alignment && params.aligner == 'star_rsem') { + if (params.with_umi) { + rsemUmiError() + } + if (params.rsem_index && params.star_index) { + rsemStarIndexWarn() + } + if (params.aligner == 'star_rsem' && params.extra_star_align_args) { + rsemStarExtraArgumentsWarn() + } + } + + // Warn if --additional_fasta provided with aligner index + if (!params.skip_alignment && params.additional_fasta) { + def index = '' + if (params.aligner == 'star_salmon' && params.star_index) { + index = 'star' + } + if (params.aligner == 'star_rsem' && params.rsem_index) { + index = 'rsem' + } + if (params.aligner == 'hisat2' && params.hisat2_index) { + index = 'hisat2' + } + if (index) { + additionaFastaIndexWarn(index) + } + } + + // Check which RSeQC modules we are running + def valid_rseqc_modules = ['bam_stat', 'inner_distance', 'infer_experiment', 'junction_annotation', 'junction_saturation', 'read_distribution', 'read_duplication', 'tin'] + def rseqc_modules = params.rseqc_modules ? params.rseqc_modules.split(',').collect{ it.trim().toLowerCase() } : [] + if ((valid_rseqc_modules + rseqc_modules).unique().size() != valid_rseqc_modules.size()) { + error("Invalid option: ${params.rseqc_modules}. Valid options for '--rseqc_modules': ${valid_rseqc_modules.join(', ')}") + } + + // Check rRNA databases for sortmerna + if (params.remove_ribo_rna) { + ch_ribo_db = file(params.ribo_database_manifest) + if (ch_ribo_db.isEmpty()) { + error("File provided with --ribo_database_manifest is empty: ${ch_ribo_db.getName()}!") + } + } + + // Check if file with list of fastas is provided when running BBSplit + if (!params.skip_bbsplit && !params.bbsplit_index && params.bbsplit_fasta_list) { + ch_bbsplit_fasta_list = file(params.bbsplit_fasta_list) + if (ch_bbsplit_fasta_list.isEmpty()) { + error("File provided with --bbsplit_fasta_list is empty: ${ch_bbsplit_fasta_list.getName()}!") + } + } +} + +// +// Get attribute from genome config file e.g. fasta +// +def getGenomeAttribute(attribute) { + if (params.genomes && params.genome && params.genomes.containsKey(params.genome)) { + if (params.genomes[ params.genome ].containsKey(attribute)) { + return params.genomes[ params.genome ][ attribute ] + } + } + return null +} + +// +// Exit pipeline if incorrect --genome key provided +// +def genomeExistsError() { + if (params.genomes && params.genome && !params.genomes.containsKey(params.genome)) { + def error_string = "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n" + + " Genome '${params.genome}' not found in any config files provided to the pipeline.\n" + + " Currently, the available genome keys are:\n" + + " ${params.genomes.keySet().join(", ")}\n" + + "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" + error(error_string) + } +} + +// +// Print a warning if both GTF and GFF have been provided +// +def gtfGffWarn() { + log.warn "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n" + + " Both '--gtf' and '--gff' parameters have been provided.\n" + + " Using GTF file as priority.\n" + + "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" +} + +// +// Print a warning if using GRCh38 assembly from igenomes.config +// +def ncbiGenomeWarn() { + log.warn "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n" + + " When using '--genome GRCh38' the assembly is from the NCBI and NOT Ensembl.\n" + + " Biotype QC will be skipped to circumvent the issue below:\n" + + " https://github.com/nf-core/rnaseq/issues/460\n\n" + + " If you would like to use the soft-masked Ensembl assembly instead please see:\n" + + " https://github.com/nf-core/rnaseq/issues/159#issuecomment-501184312\n" + + "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" +} + +// +// Print a warning if using a UCSC assembly from igenomes.config +// +def ucscGenomeWarn() { + log.warn "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n" + + " When using UCSC assemblies the 'gene_biotype' field is absent from the GTF file.\n" + + " Biotype QC will be skipped to circumvent the issue below:\n" + + " https://github.com/nf-core/rnaseq/issues/460\n\n" + + " If you would like to use the soft-masked Ensembl assembly instead please see:\n" + + " https://github.com/nf-core/rnaseq/issues/159#issuecomment-501184312\n" + + "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" +} + +// +// Print a warning if using '--transcript_fasta' +// +def transcriptsFastaWarn() { + log.warn "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n" + + " '--transcript_fasta' parameter has been provided.\n" + + " Make sure transcript names in this file match those in the GFF/GTF file.\n\n" + + " Please see:\n" + + " https://github.com/nf-core/rnaseq/issues/753\n" + + "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" +} + +// +// Print a warning if --skip_alignment has been provided +// +def skipAlignmentWarn() { + log.warn "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n" + + " '--skip_alignment' parameter has been provided.\n" + + " Skipping alignment, genome-based quantification and all downstream QC processes.\n" + + "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" +} + +// +// Print a warning if using '--aligner star_rsem' and '--with_umi' +// +def rsemUmiError() { + def error_string = "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n" + + " When using '--aligner star_rsem', STAR is run by RSEM itself and so it is\n" + + " not possible to remove UMIs before the quantification.\n\n" + + " If you would like to remove UMI barcodes using the '--with_umi' option\n" + + " please use either '--aligner star_salmon' or '--aligner hisat2'.\n" + + "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" + error(error_string) +} + +// +// Print a warning if using '--aligner star_rsem' and providing both '--rsem_index' and '--star_index' +// +def rsemStarIndexWarn() { + log.warn "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n" + + " When using '--aligner star_rsem', both the STAR and RSEM indices should\n" + + " be present in the path specified by '--rsem_index'.\n\n" + + " This warning has been generated because you have provided both\n" + + " '--rsem_index' and '--star_index'. The pipeline will ignore the latter.\n\n" + + " Please see:\n" + + " https://github.com/nf-core/rnaseq/issues/568\n" + + "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" +} + +// +// Print a warning if using '--aligner star_rsem' and providing '--star_extra_alignment_args' +// +def rsemStarExtraArgumentsWarn() { + log.warn "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n" + + " No additional arguments can be passed to STAR when using RSEM.\n" + + " Because RSEM enforces its own parameters for STAR, any extra arguments\n" + + " to STAR will be ignored. Alternatively, choose the STAR+Salmon route.\n\n" + + " This warning has been generated because you have provided both\n" + + " '--aligner star_rsem' and '--extra_star_align_args'.\n\n" + + "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" +} + +// +// Print a warning if using '--additional_fasta' and '--_index' +// +def additionaFastaIndexWarn(index) { + log.warn "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n" + + " When using '--additional_fasta ' the aligner index will not\n" + + " be re-built with the transgenes incorporated by default since you have \n" + + " already provided an index via '--${index}_index '.\n\n" + + " Set '--additional_fasta --${index}_index false --gene_bed false --save_reference'\n" + + " to re-build the index with transgenes included and the index and gene BED file will be saved in\n" + + " 'results/genome/index/${index}/' for re-use with '--${index}_index'.\n\n" + + " Ignore this warning if you know that the index already contains transgenes.\n\n" + + " Please see:\n" + + " https://github.com/nf-core/rnaseq/issues/556\n" + + "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" +} + +// +// Function to generate an error if contigs in genome fasta file > 512 Mbp +// +def checkMaxContigSize(fai_file) { + def max_size = 512000000 + fai_file.eachLine { line -> + def lspl = line.split('\t') + def chrom = lspl[0] + def size = lspl[1] + if (size.toInteger() > max_size) { + def error_string = "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n" + + " Contig longer than ${max_size}bp found in reference genome!\n\n" + + " ${chrom}: ${size}\n\n" + + " Provide the '--bam_csi_index' parameter to use a CSI instead of BAI index.\n\n" + + " Please see:\n" + + " https://github.com/nf-core/rnaseq/issues/744\n" + + "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" + error(error_string) + } + } +} + +// +// Create MultiQC tsv custom content from a list of values +// +def multiqcTsvFromList(tsv_data, header) { + def tsv_string = "" + if (tsv_data.size() > 0) { + tsv_string += "${header.join('\t')}\n" + tsv_string += tsv_data.join('\n') + } + return tsv_string +} + +// +// Function that parses Salmon quant 'meta_info.json' output file to get inferred strandedness +// +def getSalmonInferredStrandedness(json_file) { + def lib_type = new JsonSlurper().parseText(json_file.text).get('library_types')[0] + def strandedness = 'reverse' + if (lib_type) { + if (lib_type in ['U', 'IU']) { + strandedness = 'unstranded' + } else if (lib_type in ['SF', 'ISF']) { + strandedness = 'forward' + } else if (lib_type in ['SR', 'ISR']) { + strandedness = 'reverse' + } + } + return strandedness +} + +// +// Function that parses and returns the alignment rate from the STAR log output +// +def getStarPercentMapped(params, align_log) { + def percent_aligned = 0 + def pattern = /Uniquely mapped reads %\s*\|\s*([\d\.]+)%/ + align_log.eachLine { line -> + def matcher = line =~ pattern + if (matcher) { + percent_aligned = matcher[0][1].toFloat() + } + } + + def pass = false + if (percent_aligned >= params.min_mapped_reads.toFloat()) { + pass = true + } + return [ percent_aligned, pass ] +} + +// +// Function to check whether biotype field exists in GTF file +// +def biotypeInGtf(gtf_file, biotype) { + def hits = 0 + gtf_file.eachLine { line -> + def attributes = line.split('\t')[-1].split() + if (attributes.contains(biotype)) { + hits += 1 + } + } + if (hits) { + return true + } else { + log.warn "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n" + + " Biotype attribute '${biotype}' not found in the last column of the GTF file!\n\n" + + " Biotype QC will be skipped to circumvent the issue below:\n" + + " https://github.com/nf-core/rnaseq/issues/460\n\n" + + " Amend '--featurecounts_group_type' to change this behaviour.\n" + + "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" + return false + } +} + +// +// Function that parses and returns the predicted strandedness from the RSeQC infer_experiment.py output +// +def getInferexperimentStrandedness(inferexperiment_file, cutoff=30) { + def sense = 0 + def antisense = 0 + def undetermined = 0 + inferexperiment_file.eachLine { line -> + def undetermined_matcher = line =~ /Fraction of reads failed to determine:\s([\d\.]+)/ + def se_sense_matcher = line =~ /Fraction of reads explained by "\++,--":\s([\d\.]+)/ + def se_antisense_matcher = line =~ /Fraction of reads explained by "\+-,-\+":\s([\d\.]+)/ + def pe_sense_matcher = line =~ /Fraction of reads explained by "1\++,1--,2\+-,2-\+":\s([\d\.]+)/ + def pe_antisense_matcher = line =~ /Fraction of reads explained by "1\+-,1-\+,2\+\+,2--":\s([\d\.]+)/ + if (undetermined_matcher) undetermined = undetermined_matcher[0][1].toFloat() * 100 + if (se_sense_matcher) sense = se_sense_matcher[0][1].toFloat() * 100 + if (se_antisense_matcher) antisense = se_antisense_matcher[0][1].toFloat() * 100 + if (pe_sense_matcher) sense = pe_sense_matcher[0][1].toFloat() * 100 + if (pe_antisense_matcher) antisense = pe_antisense_matcher[0][1].toFloat() * 100 + } + def strandedness = 'unstranded' + if (sense >= 100-cutoff) { + strandedness = 'forward' + } else if (antisense >= 100-cutoff) { + strandedness = 'reverse' + } + return [ strandedness, sense, antisense, undetermined ] +} diff --git a/subworkflows/nf-core/utils_nextflow_pipeline/main.nf b/subworkflows/nf-core/utils_nextflow_pipeline/main.nf new file mode 100644 index 000000000..ac31f28f6 --- /dev/null +++ b/subworkflows/nf-core/utils_nextflow_pipeline/main.nf @@ -0,0 +1,126 @@ +// +// Subworkflow with functionality that may be useful for any Nextflow pipeline +// + +import org.yaml.snakeyaml.Yaml +import groovy.json.JsonOutput +import nextflow.extension.FilesEx + +/* +======================================================================================== + SUBWORKFLOW DEFINITION +======================================================================================== +*/ + +workflow UTILS_NEXTFLOW_PIPELINE { + + take: + print_version // boolean: print version + dump_parameters // boolean: dump parameters + outdir // path: base directory used to publish pipeline results + check_conda_channels // boolean: check conda channels + + main: + + // + // Print workflow version and exit on --version + // + if (print_version) { + log.info "${workflow.manifest.name} ${getWorkflowVersion()}" + System.exit(0) + } + + // + // Dump pipeline parameters to a JSON file + // + if (dump_parameters && outdir) { + dumpParametersToJSON(outdir) + } + + // + // When running with Conda, warn if channels have not been set-up appropriately + // + if (check_conda_channels) { + checkCondaChannels() + } + + emit: + dummy_emit = true +} + +/* +======================================================================================== + FUNCTIONS +======================================================================================== +*/ + +// +// Generate version string +// +def getWorkflowVersion() { + String version_string = "" + if (workflow.manifest.version) { + def prefix_v = workflow.manifest.version[0] != 'v' ? 'v' : '' + version_string += "${prefix_v}${workflow.manifest.version}" + } + + if (workflow.commitId) { + def git_shortsha = workflow.commitId.substring(0, 7) + version_string += "-g${git_shortsha}" + } + + return version_string +} + +// +// Dump pipeline parameters to a JSON file +// +def dumpParametersToJSON(outdir) { + def timestamp = new java.util.Date().format( 'yyyy-MM-dd_HH-mm-ss') + def filename = "params_${timestamp}.json" + def temp_pf = new File(workflow.launchDir.toString(), ".${filename}") + def jsonStr = JsonOutput.toJson(params) + temp_pf.text = JsonOutput.prettyPrint(jsonStr) + + FilesEx.copyTo(temp_pf.toPath(), "${outdir}/pipeline_info/params_${timestamp}.json") + temp_pf.delete() +} + +// +// When running with -profile conda, warn if channels have not been set-up appropriately +// +def checkCondaChannels() { + Yaml parser = new Yaml() + def channels = [] + try { + def config = parser.load("conda config --show channels".execute().text) + channels = config.channels + } catch(NullPointerException | IOException e) { + log.warn "Could not verify conda channel configuration." + return + } + + // Check that all channels are present + // This channel list is ordered by required channel priority. + def required_channels_in_order = ['conda-forge', 'bioconda', 'defaults'] + def channels_missing = ((required_channels_in_order as Set) - (channels as Set)) as Boolean + + // Check that they are in the right order + def channel_priority_violation = false + def n = required_channels_in_order.size() + for (int i = 0; i < n - 1; i++) { + channel_priority_violation |= !(channels.indexOf(required_channels_in_order[i]) < channels.indexOf(required_channels_in_order[i+1])) + } + + if (channels_missing | channel_priority_violation) { + log.warn "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n" + + " There is a problem with your Conda configuration!\n\n" + + " You will need to set-up the conda-forge and bioconda channels correctly.\n" + + " Please refer to https://bioconda.github.io/\n" + + " The observed channel order is \n" + + " ${channels}\n" + + " but the following channel order is required:\n" + + " ${required_channels_in_order}\n" + + "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" + } +} diff --git a/subworkflows/nf-core/utils_nextflow_pipeline/meta.yml b/subworkflows/nf-core/utils_nextflow_pipeline/meta.yml new file mode 100644 index 000000000..e5c3a0a82 --- /dev/null +++ b/subworkflows/nf-core/utils_nextflow_pipeline/meta.yml @@ -0,0 +1,38 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/subworkflows/yaml-schema.json +name: "UTILS_NEXTFLOW_PIPELINE" +description: Subworkflow with functionality that may be useful for any Nextflow pipeline +keywords: + - utility + - pipeline + - initialise + - version +components: [] +input: + - print_version: + type: boolean + description: | + Print the version of the pipeline and exit + - dump_parameters: + type: boolean + description: | + Dump the parameters of the pipeline to a JSON file + - output_directory: + type: directory + description: Path to output dir to write JSON file to. + pattern: "results/" + - check_conda_channel: + type: boolean + description: | + Check if the conda channel priority is correct. +output: + - dummy_emit: + type: boolean + description: | + Dummy emit to make nf-core subworkflows lint happy +authors: + - "@adamrtalbot" + - "@drpatelh" +maintainers: + - "@adamrtalbot" + - "@drpatelh" + - "@maxulysse" diff --git a/subworkflows/nf-core/utils_nextflow_pipeline/tests/main.function.nf.test b/subworkflows/nf-core/utils_nextflow_pipeline/tests/main.function.nf.test new file mode 100644 index 000000000..8ed4310ca --- /dev/null +++ b/subworkflows/nf-core/utils_nextflow_pipeline/tests/main.function.nf.test @@ -0,0 +1,54 @@ + +nextflow_function { + + name "Test Functions" + script "subworkflows/nf-core/utils_nextflow_pipeline/main.nf" + config "subworkflows/nf-core/utils_nextflow_pipeline/tests/nextflow.config" + tag 'subworkflows' + tag 'utils_nextflow_pipeline' + tag 'subworkflows/utils_nextflow_pipeline' + + test("Test Function getWorkflowVersion") { + + function "getWorkflowVersion" + + then { + assertAll( + { assert function.success }, + { assert snapshot(function.result).match() } + ) + } + } + + test("Test Function dumpParametersToJSON") { + + function "dumpParametersToJSON" + + when { + function { + """ + // define inputs of the function here. Example: + input[0] = "$outputDir" + """.stripIndent() + } + } + + then { + assertAll( + { assert function.success } + ) + } + } + + test("Test Function checkCondaChannels") { + + function "checkCondaChannels" + + then { + assertAll( + { assert function.success }, + { assert snapshot(function.result).match() } + ) + } + } +} \ No newline at end of file diff --git a/subworkflows/nf-core/utils_nextflow_pipeline/tests/main.function.nf.test.snap b/subworkflows/nf-core/utils_nextflow_pipeline/tests/main.function.nf.test.snap new file mode 100644 index 000000000..db2030f8b --- /dev/null +++ b/subworkflows/nf-core/utils_nextflow_pipeline/tests/main.function.nf.test.snap @@ -0,0 +1,12 @@ +{ + "Test Function getWorkflowVersion": { + "content": [ + "v9.9.9" + ], + "timestamp": "2024-01-19T11:32:36.031083" + }, + "Test Function checkCondaChannels": { + "content": null, + "timestamp": "2024-01-19T11:32:50.456" + } +} \ No newline at end of file diff --git a/subworkflows/nf-core/utils_nextflow_pipeline/tests/main.workflow.nf.test b/subworkflows/nf-core/utils_nextflow_pipeline/tests/main.workflow.nf.test new file mode 100644 index 000000000..f7c54bc68 --- /dev/null +++ b/subworkflows/nf-core/utils_nextflow_pipeline/tests/main.workflow.nf.test @@ -0,0 +1,123 @@ +nextflow_workflow { + + name "Test Workflow UTILS_NEXTFLOW_PIPELINE" + script "../main.nf" + config "subworkflows/nf-core/utils_nextflow_pipeline/tests/nextflow.config" + workflow "UTILS_NEXTFLOW_PIPELINE" + tag 'subworkflows' + tag 'utils_nextflow_pipeline' + tag 'subworkflows/utils_nextflow_pipeline' + + test("Should run no inputs") { + + when { + params { + outdir = "tests/results" + } + workflow { + """ + print_version = false + dump_parameters = false + outdir = null + check_conda_channels = false + + input[0] = print_version + input[1] = dump_parameters + input[2] = outdir + input[3] = check_conda_channels + """ + } + } + + then { + assertAll( + { assert workflow.success } + ) + } + } + + test("Should print version") { + + when { + params { + outdir = "tests/results" + } + workflow { + """ + print_version = true + dump_parameters = false + outdir = null + check_conda_channels = false + + input[0] = print_version + input[1] = dump_parameters + input[2] = outdir + input[3] = check_conda_channels + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert workflow.stdout.contains("nextflow_workflow v9.9.9") } + ) + } + } + + test("Should dump params") { + + when { + params { + outdir = "$outputDir" + } + workflow { + """ + print_version = false + dump_parameters = true + outdir = params.outdir + check_conda_channels = false + + input[0] = false + input[1] = true + input[2] = params.outdir + input[3] = false + """ + } + } + + then { + assertAll( + { assert workflow.success } + ) + } + } + + test("Should not create params JSON if no output directory") { + + when { + params { + outdir = "$outputDir" + } + workflow { + """ + print_version = false + dump_parameters = true + outdir = params.outdir + check_conda_channels = false + + input[0] = false + input[1] = true + input[2] = null + input[3] = false + """ + } + } + + then { + assertAll( + { assert workflow.success } + ) + } + } +} diff --git a/subworkflows/nf-core/utils_nextflow_pipeline/tests/nextflow.config b/subworkflows/nf-core/utils_nextflow_pipeline/tests/nextflow.config new file mode 100644 index 000000000..53574ffec --- /dev/null +++ b/subworkflows/nf-core/utils_nextflow_pipeline/tests/nextflow.config @@ -0,0 +1,9 @@ +manifest { + name = 'nextflow_workflow' + author = """nf-core""" + homePage = 'https://127.0.0.1' + description = """Dummy pipeline""" + nextflowVersion = '!>=23.04.0' + version = '9.9.9' + doi = 'https://doi.org/10.5281/zenodo.5070524' +} \ No newline at end of file diff --git a/subworkflows/nf-core/utils_nextflow_pipeline/tests/tags.yml b/subworkflows/nf-core/utils_nextflow_pipeline/tests/tags.yml new file mode 100644 index 000000000..f84761125 --- /dev/null +++ b/subworkflows/nf-core/utils_nextflow_pipeline/tests/tags.yml @@ -0,0 +1,2 @@ +subworkflows/utils_nextflow_pipeline: + - subworkflows/nf-core/utils_nextflow_pipeline/** diff --git a/subworkflows/nf-core/utils_nfcore_pipeline/main.nf b/subworkflows/nf-core/utils_nfcore_pipeline/main.nf new file mode 100644 index 000000000..6d805bb3f --- /dev/null +++ b/subworkflows/nf-core/utils_nfcore_pipeline/main.nf @@ -0,0 +1,395 @@ +// +// Subworkflow with utility functions specific to the nf-core pipeline template +// + +import org.yaml.snakeyaml.Yaml +import nextflow.extension.FilesEx + +/* +======================================================================================== + SUBWORKFLOW DEFINITION +======================================================================================== +*/ + +workflow UTILS_NFCORE_PIPELINE { + + main: + valid_config = checkConfigProvided() + + emit: + valid_config +} + +/* +======================================================================================== + FUNCTIONS +======================================================================================== +*/ + +// +// Warn if a -profile or Nextflow config has not been provided to run the pipeline +// +def checkConfigProvided() { + valid_config = true + if (workflow.profile == 'standard' && workflow.configFiles.size() <= 1) { + log.warn "[$workflow.manifest.name] You are attempting to run the pipeline without any custom configuration!\n\n" + + "This will be dependent on your local compute environment but can be achieved via one or more of the following:\n" + + " (1) Using an existing pipeline profile e.g. `-profile docker` or `-profile singularity`\n" + + " (2) Using an existing nf-core/configs for your Institution e.g. `-profile crick` or `-profile uppmax`\n" + + " (3) Using your own local custom config e.g. `-c /path/to/your/custom.config`\n\n" + + "Please refer to the quick start section and usage docs for the pipeline.\n " + valid_config = false + } + return valid_config +} + +// +// Citation string for pipeline +// +def workflowCitation() { + return "If you use ${workflow.manifest.name} for your analysis please cite:\n\n" + + "* The pipeline\n" + + " ${workflow.manifest.doi}\n\n" + + "* The nf-core framework\n" + + " https://doi.org/10.1038/s41587-020-0439-x\n\n" + + "* Software dependencies\n" + + " https://github.com/${workflow.manifest.name}/blob/master/CITATIONS.md" +} + +// +// Generate workflow version string +// +def getWorkflowVersion() { + String version_string = "" + if (workflow.manifest.version) { + def prefix_v = workflow.manifest.version[0] != 'v' ? 'v' : '' + version_string += "${prefix_v}${workflow.manifest.version}" + } + + if (workflow.commitId) { + def git_shortsha = workflow.commitId.substring(0, 7) + version_string += "-g${git_shortsha}" + } + + return version_string +} + +// +// Get software versions for pipeline +// +def processVersionsFromYAML(yaml_file) { + Yaml yaml = new Yaml() + versions = yaml.load(yaml_file).collectEntries { k, v -> [ k.tokenize(':')[-1], v ] } + return yaml.dumpAsMap(versions).trim() +} + +// +// Get workflow version for pipeline +// +def workflowVersionToYAML() { + return """ + Workflow: + $workflow.manifest.name: ${getWorkflowVersion()} + Nextflow: $workflow.nextflow.version + """.stripIndent().trim() +} + +// +// Get channel of software versions used in pipeline in YAML format +// +def softwareVersionsToYAML(ch_versions) { + return ch_versions + .unique() + .map { processVersionsFromYAML(it) } + .unique() + .mix(Channel.of(workflowVersionToYAML())) +} + +// +// Get workflow summary for MultiQC +// +def paramsSummaryMultiqc(summary_params) { + def summary_section = '' + for (group in summary_params.keySet()) { + def group_params = summary_params.get(group) // This gets the parameters of that particular group + if (group_params) { + summary_section += "

    $group

    \n" + summary_section += "
    \n" + for (param in group_params.keySet()) { + summary_section += "
    $param
    ${group_params.get(param) ?: 'N/A'}
    \n" + } + summary_section += "
    \n" + } + } + + String yaml_file_text = "id: '${workflow.manifest.name.replace('/','-')}-summary'\n" + yaml_file_text += "description: ' - this information is collected when the pipeline is started.'\n" + yaml_file_text += "section_name: '${workflow.manifest.name} Workflow Summary'\n" + yaml_file_text += "section_href: 'https://github.com/${workflow.manifest.name}'\n" + yaml_file_text += "plot_type: 'html'\n" + yaml_file_text += "data: |\n" + yaml_file_text += "${summary_section}" + + return yaml_file_text +} + +// +// nf-core logo +// +def nfCoreLogo(monochrome_logs=true) { + Map colors = logColours(monochrome_logs) + String.format( + """\n + ${dashedLine(monochrome_logs)} + ${colors.green},--.${colors.black}/${colors.green},-.${colors.reset} + ${colors.blue} ___ __ __ __ ___ ${colors.green}/,-._.--~\'${colors.reset} + ${colors.blue} |\\ | |__ __ / ` / \\ |__) |__ ${colors.yellow}} {${colors.reset} + ${colors.blue} | \\| | \\__, \\__/ | \\ |___ ${colors.green}\\`-._,-`-,${colors.reset} + ${colors.green}`._,._,\'${colors.reset} + ${colors.purple} ${workflow.manifest.name} ${getWorkflowVersion()}${colors.reset} + ${dashedLine(monochrome_logs)} + """.stripIndent() + ) +} + +// +// Return dashed line +// +def dashedLine(monochrome_logs=true) { + Map colors = logColours(monochrome_logs) + return "-${colors.dim}----------------------------------------------------${colors.reset}-" +} + +// +// ANSII colours used for terminal logging +// +def logColours(monochrome_logs=true) { + Map colorcodes = [:] + + // Reset / Meta + colorcodes['reset'] = monochrome_logs ? '' : "\033[0m" + colorcodes['bold'] = monochrome_logs ? '' : "\033[1m" + colorcodes['dim'] = monochrome_logs ? '' : "\033[2m" + colorcodes['underlined'] = monochrome_logs ? '' : "\033[4m" + colorcodes['blink'] = monochrome_logs ? '' : "\033[5m" + colorcodes['reverse'] = monochrome_logs ? '' : "\033[7m" + colorcodes['hidden'] = monochrome_logs ? '' : "\033[8m" + + // Regular Colors + colorcodes['black'] = monochrome_logs ? '' : "\033[0;30m" + colorcodes['red'] = monochrome_logs ? '' : "\033[0;31m" + colorcodes['green'] = monochrome_logs ? '' : "\033[0;32m" + colorcodes['yellow'] = monochrome_logs ? '' : "\033[0;33m" + colorcodes['blue'] = monochrome_logs ? '' : "\033[0;34m" + colorcodes['purple'] = monochrome_logs ? '' : "\033[0;35m" + colorcodes['cyan'] = monochrome_logs ? '' : "\033[0;36m" + colorcodes['white'] = monochrome_logs ? '' : "\033[0;37m" + + // Bold + colorcodes['bblack'] = monochrome_logs ? '' : "\033[1;30m" + colorcodes['bred'] = monochrome_logs ? '' : "\033[1;31m" + colorcodes['bgreen'] = monochrome_logs ? '' : "\033[1;32m" + colorcodes['byellow'] = monochrome_logs ? '' : "\033[1;33m" + colorcodes['bblue'] = monochrome_logs ? '' : "\033[1;34m" + colorcodes['bpurple'] = monochrome_logs ? '' : "\033[1;35m" + colorcodes['bcyan'] = monochrome_logs ? '' : "\033[1;36m" + colorcodes['bwhite'] = monochrome_logs ? '' : "\033[1;37m" + + // Underline + colorcodes['ublack'] = monochrome_logs ? '' : "\033[4;30m" + colorcodes['ured'] = monochrome_logs ? '' : "\033[4;31m" + colorcodes['ugreen'] = monochrome_logs ? '' : "\033[4;32m" + colorcodes['uyellow'] = monochrome_logs ? '' : "\033[4;33m" + colorcodes['ublue'] = monochrome_logs ? '' : "\033[4;34m" + colorcodes['upurple'] = monochrome_logs ? '' : "\033[4;35m" + colorcodes['ucyan'] = monochrome_logs ? '' : "\033[4;36m" + colorcodes['uwhite'] = monochrome_logs ? '' : "\033[4;37m" + + // High Intensity + colorcodes['iblack'] = monochrome_logs ? '' : "\033[0;90m" + colorcodes['ired'] = monochrome_logs ? '' : "\033[0;91m" + colorcodes['igreen'] = monochrome_logs ? '' : "\033[0;92m" + colorcodes['iyellow'] = monochrome_logs ? '' : "\033[0;93m" + colorcodes['iblue'] = monochrome_logs ? '' : "\033[0;94m" + colorcodes['ipurple'] = monochrome_logs ? '' : "\033[0;95m" + colorcodes['icyan'] = monochrome_logs ? '' : "\033[0;96m" + colorcodes['iwhite'] = monochrome_logs ? '' : "\033[0;97m" + + // Bold High Intensity + colorcodes['biblack'] = monochrome_logs ? '' : "\033[1;90m" + colorcodes['bired'] = monochrome_logs ? '' : "\033[1;91m" + colorcodes['bigreen'] = monochrome_logs ? '' : "\033[1;92m" + colorcodes['biyellow'] = monochrome_logs ? '' : "\033[1;93m" + colorcodes['biblue'] = monochrome_logs ? '' : "\033[1;94m" + colorcodes['bipurple'] = monochrome_logs ? '' : "\033[1;95m" + colorcodes['bicyan'] = monochrome_logs ? '' : "\033[1;96m" + colorcodes['biwhite'] = monochrome_logs ? '' : "\033[1;97m" + + return colorcodes +} + +// +// Construct and send completion email +// +def completionEmail(summary_params, email, email_on_fail, plaintext_email, outdir, monochrome_logs=true) { + + // Set up the e-mail variables + def subject = "[$workflow.manifest.name] Successful: $workflow.runName" + if (!workflow.success) { + subject = "[$workflow.manifest.name] FAILED: $workflow.runName" + } + + def summary = [:] + for (group in summary_params.keySet()) { + summary << summary_params[group] + } + + def misc_fields = [:] + misc_fields['Date Started'] = workflow.start + misc_fields['Date Completed'] = workflow.complete + misc_fields['Pipeline script file path'] = workflow.scriptFile + misc_fields['Pipeline script hash ID'] = workflow.scriptId + if (workflow.repository) misc_fields['Pipeline repository Git URL'] = workflow.repository + if (workflow.commitId) misc_fields['Pipeline repository Git Commit'] = workflow.commitId + if (workflow.revision) misc_fields['Pipeline Git branch/tag'] = workflow.revision + misc_fields['Nextflow Version'] = workflow.nextflow.version + misc_fields['Nextflow Build'] = workflow.nextflow.build + misc_fields['Nextflow Compile Timestamp'] = workflow.nextflow.timestamp + + def email_fields = [:] + email_fields['version'] = getWorkflowVersion() + email_fields['runName'] = workflow.runName + email_fields['success'] = workflow.success + email_fields['dateComplete'] = workflow.complete + email_fields['duration'] = workflow.duration + email_fields['exitStatus'] = workflow.exitStatus + email_fields['errorMessage'] = (workflow.errorMessage ?: 'None') + email_fields['errorReport'] = (workflow.errorReport ?: 'None') + email_fields['commandLine'] = workflow.commandLine + email_fields['projectDir'] = workflow.projectDir + email_fields['summary'] = summary << misc_fields + + // Check if we are only sending emails on failure + def email_address = email + if (!email && email_on_fail && !workflow.success) { + email_address = email_on_fail + } + + // Render the TXT template + def engine = new groovy.text.GStringTemplateEngine() + def tf = new File("${workflow.projectDir}/assets/email_template.txt") + def txt_template = engine.createTemplate(tf).make(email_fields) + def email_txt = txt_template.toString() + + // Render the HTML template + def hf = new File("${workflow.projectDir}/assets/email_template.html") + def html_template = engine.createTemplate(hf).make(email_fields) + def email_html = html_template.toString() + + // Render the sendmail template + def smail_fields = [ email: email_address, subject: subject, email_txt: email_txt, email_html: email_html, projectDir: "${workflow.projectDir}" ] + def sf = new File("${workflow.projectDir}/assets/sendmail_template.txt") + def sendmail_template = engine.createTemplate(sf).make(smail_fields) + def sendmail_html = sendmail_template.toString() + + // Send the HTML e-mail + Map colors = logColours(monochrome_logs) + if (email_address) { + try { + if (plaintext_email) { throw GroovyException('Send plaintext e-mail, not HTML') } + // Try to send HTML e-mail using sendmail + def sendmail_tf = new File(workflow.launchDir.toString(), ".sendmail_tmp.html") + sendmail_tf.withWriter { w -> w << sendmail_html } + [ 'sendmail', '-t' ].execute() << sendmail_html + log.info "-${colors.purple}[$workflow.manifest.name]${colors.green} Sent summary e-mail to $email_address (sendmail)-" + } catch (all) { + // Catch failures and try with plaintext + def mail_cmd = [ 'mail', '-s', subject, '--content-type=text/html', email_address ] + mail_cmd.execute() << email_html + log.info "-${colors.purple}[$workflow.manifest.name]${colors.green} Sent summary e-mail to $email_address (mail)-" + } + } + + // Write summary e-mail HTML to a file + def output_hf = new File(workflow.launchDir.toString(), ".pipeline_report.html") + output_hf.withWriter { w -> w << email_html } + FilesEx.copyTo(output_hf.toPath(), "${outdir}/pipeline_info/pipeline_report.html"); + output_hf.delete() + + // Write summary e-mail TXT to a file + def output_tf = new File(workflow.launchDir.toString(), ".pipeline_report.txt") + output_tf.withWriter { w -> w << email_txt } + FilesEx.copyTo(output_tf.toPath(), "${outdir}/pipeline_info/pipeline_report.txt"); + output_tf.delete() +} + +// +// Print pipeline summary on completion +// +def completionSummary(monochrome_logs=true) { + Map colors = logColours(monochrome_logs) + if (workflow.success) { + if (workflow.stats.ignoredCount == 0) { + log.info "-${colors.purple}[$workflow.manifest.name]${colors.green} Pipeline completed successfully${colors.reset}-" + } else { + log.info "-${colors.purple}[$workflow.manifest.name]${colors.yellow} Pipeline completed successfully, but with errored process(es) ${colors.reset}-" + } + } else { + log.info "-${colors.purple}[$workflow.manifest.name]${colors.red} Pipeline completed with errors${colors.reset}-" + } +} + +// +// Construct and send a notification to a web server as JSON e.g. Microsoft Teams and Slack +// +def imNotification(summary_params, hook_url) { + def summary = [:] + for (group in summary_params.keySet()) { + summary << summary_params[group] + } + + def misc_fields = [:] + misc_fields['start'] = workflow.start + misc_fields['complete'] = workflow.complete + misc_fields['scriptfile'] = workflow.scriptFile + misc_fields['scriptid'] = workflow.scriptId + if (workflow.repository) misc_fields['repository'] = workflow.repository + if (workflow.commitId) misc_fields['commitid'] = workflow.commitId + if (workflow.revision) misc_fields['revision'] = workflow.revision + misc_fields['nxf_version'] = workflow.nextflow.version + misc_fields['nxf_build'] = workflow.nextflow.build + misc_fields['nxf_timestamp'] = workflow.nextflow.timestamp + + def msg_fields = [:] + msg_fields['version'] = getWorkflowVersion() + msg_fields['runName'] = workflow.runName + msg_fields['success'] = workflow.success + msg_fields['dateComplete'] = workflow.complete + msg_fields['duration'] = workflow.duration + msg_fields['exitStatus'] = workflow.exitStatus + msg_fields['errorMessage'] = (workflow.errorMessage ?: 'None') + msg_fields['errorReport'] = (workflow.errorReport ?: 'None') + msg_fields['commandLine'] = workflow.commandLine.replaceFirst(/ +--hook_url +[^ ]+/, "") + msg_fields['projectDir'] = workflow.projectDir + msg_fields['summary'] = summary << misc_fields + + // Render the JSON template + def engine = new groovy.text.GStringTemplateEngine() + // Different JSON depending on the service provider + // Defaults to "Adaptive Cards" (https://adaptivecards.io), except Slack which has its own format + def json_path = hook_url.contains("hooks.slack.com") ? "slackreport.json" : "adaptivecard.json" + def hf = new File("${workflow.projectDir}/assets/${json_path}") + def json_template = engine.createTemplate(hf).make(msg_fields) + def json_message = json_template.toString() + + // POST + def post = new URL(hook_url).openConnection(); + post.setRequestMethod("POST") + post.setDoOutput(true) + post.setRequestProperty("Content-Type", "application/json") + post.getOutputStream().write(json_message.getBytes("UTF-8")); + def postRC = post.getResponseCode(); + if (! postRC.equals(200)) { + log.warn(post.getErrorStream().getText()); + } +} diff --git a/subworkflows/nf-core/utils_nfcore_pipeline/meta.yml b/subworkflows/nf-core/utils_nfcore_pipeline/meta.yml new file mode 100644 index 000000000..dd1462b23 --- /dev/null +++ b/subworkflows/nf-core/utils_nfcore_pipeline/meta.yml @@ -0,0 +1,20 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/subworkflows/yaml-schema.json +name: "UTILS_NFCORE_PIPELINE" +description: Subworkflow with utility functions specific to the nf-core pipeline template +keywords: + - utility + - pipeline + - initialise + - version +components: [] +input: [] +output: + - success: + type: boolean + description: | + Dummy output to indicate success +authors: + - "@adamrtalbot" +maintainers: + - "@adamrtalbot" + - "@maxulysse" diff --git a/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.function.nf.test b/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.function.nf.test new file mode 100644 index 000000000..c176295d6 --- /dev/null +++ b/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.function.nf.test @@ -0,0 +1,114 @@ + +nextflow_function { + + name "Test Functions" + script "../main.nf" + config "subworkflows/nf-core/utils_nfcore_pipeline/tests/nextflow.config" + tag "subworkflows" + tag "subworkflows_nfcore" + tag "utils_nfcore_pipeline" + tag "subworkflows/utils_nfcore_pipeline" + + test("Test Function checkConfigProvided") { + + function "checkConfigProvided" + + then { + assertAll( + { assert function.success }, + { assert snapshot(function.result).match() } + ) + } + } + + test("Test Function workflowCitation") { + + function "workflowCitation" + + then { + assertAll( + { assert function.success }, + { assert snapshot(function.result).match() } + ) + } + } + + test("Test Function nfCoreLogo") { + + function "nfCoreLogo" + + when { + function { + """ + input[0] = false + """ + } + } + + then { + assertAll( + { assert function.success }, + { assert snapshot(function.result).match() } + ) + } + } + + test("Test Function dashedLine") { + + function "dashedLine" + + when { + function { + """ + input[0] = false + """ + } + } + + then { + assertAll( + { assert function.success }, + { assert snapshot(function.result).match() } + ) + } + } + + test("Test Function without logColours") { + + function "logColours" + + when { + function { + """ + input[0] = true + """ + } + } + + then { + assertAll( + { assert function.success }, + { assert snapshot(function.result).match() } + ) + } + } + + test("Test Function with logColours") { + function "logColours" + + when { + function { + """ + input[0] = false + """ + } + } + + then { + assertAll( + { assert function.success }, + { assert snapshot(function.result).match() } + ) + } + } +} \ No newline at end of file diff --git a/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.function.nf.test.snap b/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.function.nf.test.snap new file mode 100644 index 000000000..afb9ab4dc --- /dev/null +++ b/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.function.nf.test.snap @@ -0,0 +1,134 @@ +{ + "Test Function checkConfigProvided": { + "content": [ + true + ], + "timestamp": "2024-01-19T11:34:13.548431224" + }, + "Test Function nfCoreLogo": { + "content": [ + "\n\n-\u001b[2m----------------------------------------------------\u001b[0m-\n \u001b[0;32m,--.\u001b[0;30m/\u001b[0;32m,-.\u001b[0m\n\u001b[0;34m ___ __ __ __ ___ \u001b[0;32m/,-._.--~'\u001b[0m\n\u001b[0;34m |\\ | |__ __ / ` / \\ |__) |__ \u001b[0;33m} {\u001b[0m\n\u001b[0;34m | \\| | \\__, \\__/ | \\ |___ \u001b[0;32m\\`-._,-`-,\u001b[0m\n \u001b[0;32m`._,._,'\u001b[0m\n\u001b[0;35m nextflow_workflow v9.9.9\u001b[0m\n-\u001b[2m----------------------------------------------------\u001b[0m-\n" + ], + "timestamp": "2024-01-19T11:34:38.840454873" + }, + "Test Function workflowCitation": { + "content": [ + "If you use nextflow_workflow for your analysis please cite:\n\n* The pipeline\n https://doi.org/10.5281/zenodo.5070524\n\n* The nf-core framework\n https://doi.org/10.1038/s41587-020-0439-x\n\n* Software dependencies\n https://github.com/nextflow_workflow/blob/master/CITATIONS.md" + ], + "timestamp": "2024-01-19T11:34:22.24352016" + }, + "Test Function without logColours": { + "content": [ + { + "reset": "", + "bold": "", + "dim": "", + "underlined": "", + "blink": "", + "reverse": "", + "hidden": "", + "black": "", + "red": "", + "green": "", + "yellow": "", + "blue": "", + "purple": "", + "cyan": "", + "white": "", + "bblack": "", + "bred": "", + "bgreen": "", + "byellow": "", + "bblue": "", + "bpurple": "", + "bcyan": "", + "bwhite": "", + "ublack": "", + "ured": "", + "ugreen": "", + "uyellow": "", + "ublue": "", + "upurple": "", + "ucyan": "", + "uwhite": "", + "iblack": "", + "ired": "", + "igreen": "", + "iyellow": "", + "iblue": "", + "ipurple": "", + "icyan": "", + "iwhite": "", + "biblack": "", + "bired": "", + "bigreen": "", + "biyellow": "", + "biblue": "", + "bipurple": "", + "bicyan": "", + "biwhite": "" + } + ], + "timestamp": "2024-01-19T11:35:04.418416984" + }, + "Test Function dashedLine": { + "content": [ + "-\u001b[2m----------------------------------------------------\u001b[0m-" + ], + "timestamp": "2024-01-19T11:34:55.420000755" + }, + "Test Function with logColours": { + "content": [ + { + "reset": "\u001b[0m", + "bold": "\u001b[1m", + "dim": "\u001b[2m", + "underlined": "\u001b[4m", + "blink": "\u001b[5m", + "reverse": "\u001b[7m", + "hidden": "\u001b[8m", + "black": "\u001b[0;30m", + "red": "\u001b[0;31m", + "green": "\u001b[0;32m", + "yellow": "\u001b[0;33m", + "blue": "\u001b[0;34m", + "purple": "\u001b[0;35m", + "cyan": "\u001b[0;36m", + "white": "\u001b[0;37m", + "bblack": "\u001b[1;30m", + "bred": "\u001b[1;31m", + "bgreen": "\u001b[1;32m", + "byellow": "\u001b[1;33m", + "bblue": "\u001b[1;34m", + "bpurple": "\u001b[1;35m", + "bcyan": "\u001b[1;36m", + "bwhite": "\u001b[1;37m", + "ublack": "\u001b[4;30m", + "ured": "\u001b[4;31m", + "ugreen": "\u001b[4;32m", + "uyellow": "\u001b[4;33m", + "ublue": "\u001b[4;34m", + "upurple": "\u001b[4;35m", + "ucyan": "\u001b[4;36m", + "uwhite": "\u001b[4;37m", + "iblack": "\u001b[0;90m", + "ired": "\u001b[0;91m", + "igreen": "\u001b[0;92m", + "iyellow": "\u001b[0;93m", + "iblue": "\u001b[0;94m", + "ipurple": "\u001b[0;95m", + "icyan": "\u001b[0;96m", + "iwhite": "\u001b[0;97m", + "biblack": "\u001b[1;90m", + "bired": "\u001b[1;91m", + "bigreen": "\u001b[1;92m", + "biyellow": "\u001b[1;93m", + "biblue": "\u001b[1;94m", + "bipurple": "\u001b[1;95m", + "bicyan": "\u001b[1;96m", + "biwhite": "\u001b[1;97m" + } + ], + "timestamp": "2024-01-19T11:35:13.436366565" + } +} \ No newline at end of file diff --git a/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.workflow.nf.test b/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.workflow.nf.test new file mode 100644 index 000000000..c5f7776a6 --- /dev/null +++ b/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.workflow.nf.test @@ -0,0 +1,21 @@ +nextflow_workflow { + + name "Test Workflow UTILS_NFCORE_PIPELINE" + script "../main.nf" + config "subworkflows/nf-core/utils_nfcore_pipeline/tests/nextflow.config" + workflow "UTILS_NFCORE_PIPELINE" + tag "subworkflows" + tag "subworkflows_nfcore" + tag "utils_nfcore_pipeline" + tag "subworkflows/utils_nfcore_pipeline" + + test("Should run without failures") { + + then { + assertAll( + { assert workflow.success }, + { assert snapshot(workflow.out).match() } + ) + } + } +} diff --git a/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.workflow.nf.test.snap b/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.workflow.nf.test.snap new file mode 100644 index 000000000..d07ce54c5 --- /dev/null +++ b/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.workflow.nf.test.snap @@ -0,0 +1,15 @@ +{ + "Should run without failures": { + "content": [ + { + "0": [ + true + ], + "valid_config": [ + true + ] + } + ], + "timestamp": "2024-01-19T11:35:22.538940073" + } +} \ No newline at end of file diff --git a/subworkflows/nf-core/utils_nfcore_pipeline/tests/nextflow.config b/subworkflows/nf-core/utils_nfcore_pipeline/tests/nextflow.config new file mode 100644 index 000000000..53574ffec --- /dev/null +++ b/subworkflows/nf-core/utils_nfcore_pipeline/tests/nextflow.config @@ -0,0 +1,9 @@ +manifest { + name = 'nextflow_workflow' + author = """nf-core""" + homePage = 'https://127.0.0.1' + description = """Dummy pipeline""" + nextflowVersion = '!>=23.04.0' + version = '9.9.9' + doi = 'https://doi.org/10.5281/zenodo.5070524' +} \ No newline at end of file diff --git a/subworkflows/nf-core/utils_nfcore_pipeline/tests/tags.yml b/subworkflows/nf-core/utils_nfcore_pipeline/tests/tags.yml new file mode 100644 index 000000000..ac8523c9a --- /dev/null +++ b/subworkflows/nf-core/utils_nfcore_pipeline/tests/tags.yml @@ -0,0 +1,2 @@ +subworkflows/utils_nfcore_pipeline: + - subworkflows/nf-core/utils_nfcore_pipeline/** diff --git a/subworkflows/nf-core/utils_nfvalidation_plugin/main.nf b/subworkflows/nf-core/utils_nfvalidation_plugin/main.nf new file mode 100644 index 000000000..2585b65d1 --- /dev/null +++ b/subworkflows/nf-core/utils_nfvalidation_plugin/main.nf @@ -0,0 +1,62 @@ +// +// Subworkflow that uses the nf-validation plugin to render help text and parameter summary +// + +/* +======================================================================================== + IMPORT NF-VALIDATION PLUGIN +======================================================================================== +*/ + +include { paramsHelp } from 'plugin/nf-validation' +include { paramsSummaryLog } from 'plugin/nf-validation' +include { validateParameters } from 'plugin/nf-validation' + +/* +======================================================================================== + SUBWORKFLOW DEFINITION +======================================================================================== +*/ + +workflow UTILS_NFVALIDATION_PLUGIN { + + take: + print_help // boolean: print help + workflow_command // string: default commmand used to run pipeline + pre_help_text // string: string to be printed before help text and summary log + post_help_text // string: string to be printed after help text and summary log + validate_params // boolean: validate parameters + schema_filename // path: JSON schema file, null to use default value + + main: + + log.debug "Using schema file: ${schema_filename}" + + // Default values for strings + pre_help_text = pre_help_text ?: '' + post_help_text = post_help_text ?: '' + workflow_command = workflow_command ?: '' + + // + // Print help message if needed + // + if (print_help) { + log.info pre_help_text + paramsHelp(workflow_command, parameters_schema: schema_filename) + post_help_text + System.exit(0) + } + + // + // Print parameter summary to stdout + // + log.info pre_help_text + paramsSummaryLog(workflow, parameters_schema: schema_filename) + post_help_text + + // + // Validate parameters relative to the parameter JSON schema + // + if (validate_params){ + validateParameters(parameters_schema: schema_filename) + } + + emit: + dummy_emit = true +} diff --git a/subworkflows/nf-core/utils_nfvalidation_plugin/meta.yml b/subworkflows/nf-core/utils_nfvalidation_plugin/meta.yml new file mode 100644 index 000000000..3d4a6b04f --- /dev/null +++ b/subworkflows/nf-core/utils_nfvalidation_plugin/meta.yml @@ -0,0 +1,44 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/subworkflows/yaml-schema.json +name: "UTILS_NFVALIDATION_PLUGIN" +description: Use nf-validation to initiate and validate a pipeline +keywords: + - utility + - pipeline + - initialise + - validation +components: [] +input: + - print_help: + type: boolean + description: | + Print help message and exit + - workflow_command: + type: string + description: | + The command to run the workflow e.g. "nextflow run main.nf" + - pre_help_text: + type: string + description: | + Text to print before the help message + - post_help_text: + type: string + description: | + Text to print after the help message + - validate_params: + type: boolean + description: | + Validate the parameters and error if invalid. + - schema_filename: + type: string + description: | + The filename of the schema to validate against. +output: + - dummy_emit: + type: boolean + description: | + Dummy emit to make nf-core subworkflows lint happy +authors: + - "@adamrtalbot" +maintainers: + - "@adamrtalbot" + - "@maxulysse" diff --git a/subworkflows/nf-core/utils_nfvalidation_plugin/tests/main.nf.test b/subworkflows/nf-core/utils_nfvalidation_plugin/tests/main.nf.test new file mode 100644 index 000000000..517ee54e4 --- /dev/null +++ b/subworkflows/nf-core/utils_nfvalidation_plugin/tests/main.nf.test @@ -0,0 +1,200 @@ +nextflow_workflow { + + name "Test Workflow UTILS_NFVALIDATION_PLUGIN" + script "../main.nf" + workflow "UTILS_NFVALIDATION_PLUGIN" + tag "subworkflows" + tag "subworkflows_nfcore" + tag "plugin/nf-validation" + tag "'plugin/nf-validation'" + tag "utils_nfvalidation_plugin" + tag "subworkflows/utils_nfvalidation_plugin" + + test("Should run nothing") { + + when { + + params { + monochrome_logs = true + test_data = '' + } + + workflow { + """ + help = false + workflow_command = null + pre_help_text = null + post_help_text = null + validate_params = false + schema_filename = "$moduleTestDir/nextflow_schema.json" + + input[0] = help + input[1] = workflow_command + input[2] = pre_help_text + input[3] = post_help_text + input[4] = validate_params + input[5] = schema_filename + """ + } + } + + then { + assertAll( + { assert workflow.success } + ) + } + } + + test("Should run help") { + + + when { + + params { + monochrome_logs = true + test_data = '' + } + workflow { + """ + help = true + workflow_command = null + pre_help_text = null + post_help_text = null + validate_params = false + schema_filename = "$moduleTestDir/nextflow_schema.json" + + input[0] = help + input[1] = workflow_command + input[2] = pre_help_text + input[3] = post_help_text + input[4] = validate_params + input[5] = schema_filename + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert workflow.exitStatus == 0 }, + { assert workflow.stdout.any { it.contains('Input/output options') } }, + { assert workflow.stdout.any { it.contains('--outdir') } } + ) + } + } + + test("Should run help with command") { + + when { + + params { + monochrome_logs = true + test_data = '' + } + workflow { + """ + help = true + workflow_command = "nextflow run noorg/doesntexist" + pre_help_text = null + post_help_text = null + validate_params = false + schema_filename = "$moduleTestDir/nextflow_schema.json" + + input[0] = help + input[1] = workflow_command + input[2] = pre_help_text + input[3] = post_help_text + input[4] = validate_params + input[5] = schema_filename + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert workflow.exitStatus == 0 }, + { assert workflow.stdout.any { it.contains('nextflow run noorg/doesntexist') } }, + { assert workflow.stdout.any { it.contains('Input/output options') } }, + { assert workflow.stdout.any { it.contains('--outdir') } } + ) + } + } + + test("Should run help with extra text") { + + + when { + + params { + monochrome_logs = true + test_data = '' + } + workflow { + """ + help = true + workflow_command = "nextflow run noorg/doesntexist" + pre_help_text = "pre-help-text" + post_help_text = "post-help-text" + validate_params = false + schema_filename = "$moduleTestDir/nextflow_schema.json" + + input[0] = help + input[1] = workflow_command + input[2] = pre_help_text + input[3] = post_help_text + input[4] = validate_params + input[5] = schema_filename + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert workflow.exitStatus == 0 }, + { assert workflow.stdout.any { it.contains('pre-help-text') } }, + { assert workflow.stdout.any { it.contains('nextflow run noorg/doesntexist') } }, + { assert workflow.stdout.any { it.contains('Input/output options') } }, + { assert workflow.stdout.any { it.contains('--outdir') } }, + { assert workflow.stdout.any { it.contains('post-help-text') } } + ) + } + } + + test("Should validate params") { + + when { + + params { + monochrome_logs = true + test_data = '' + outdir = 1 + } + workflow { + """ + help = false + workflow_command = null + pre_help_text = null + post_help_text = null + validate_params = true + schema_filename = "$moduleTestDir/nextflow_schema.json" + + input[0] = help + input[1] = workflow_command + input[2] = pre_help_text + input[3] = post_help_text + input[4] = validate_params + input[5] = schema_filename + """ + } + } + + then { + assertAll( + { assert workflow.failed }, + { assert workflow.stdout.any { it.contains('ERROR ~ ERROR: Validation of pipeline parameters failed!') } } + ) + } + } +} \ No newline at end of file diff --git a/subworkflows/nf-core/utils_nfvalidation_plugin/tests/nextflow_schema.json b/subworkflows/nf-core/utils_nfvalidation_plugin/tests/nextflow_schema.json new file mode 100644 index 000000000..7626c1c93 --- /dev/null +++ b/subworkflows/nf-core/utils_nfvalidation_plugin/tests/nextflow_schema.json @@ -0,0 +1,96 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema", + "$id": "https://raw.githubusercontent.com/./master/nextflow_schema.json", + "title": ". pipeline parameters", + "description": "", + "type": "object", + "definitions": { + "input_output_options": { + "title": "Input/output options", + "type": "object", + "fa_icon": "fas fa-terminal", + "description": "Define where the pipeline should find input data and save output data.", + "required": ["outdir"], + "properties": { + "validate_params": { + "type": "boolean", + "description": "Validate parameters?", + "default": true, + "hidden": true + }, + "outdir": { + "type": "string", + "format": "directory-path", + "description": "The output directory where the results will be saved. You have to use absolute paths to storage on Cloud infrastructure.", + "fa_icon": "fas fa-folder-open" + }, + "test_data_base": { + "type": "string", + "default": "https://raw.githubusercontent.com/nf-core/test-datasets/modules", + "description": "Base for test data directory", + "hidden": true + }, + "test_data": { + "type": "string", + "description": "Fake test data param", + "hidden": true + } + } + }, + "generic_options": { + "title": "Generic options", + "type": "object", + "fa_icon": "fas fa-file-import", + "description": "Less common options for the pipeline, typically set in a config file.", + "help_text": "These options are common to all nf-core pipelines and allow you to customise some of the core preferences for how the pipeline runs.\n\nTypically these options would be set in a Nextflow config file loaded for all pipeline runs, such as `~/.nextflow/config`.", + "properties": { + "help": { + "type": "boolean", + "description": "Display help text.", + "fa_icon": "fas fa-question-circle", + "hidden": true + }, + "version": { + "type": "boolean", + "description": "Display version and exit.", + "fa_icon": "fas fa-question-circle", + "hidden": true + }, + "logo": { + "type": "boolean", + "default": true, + "description": "Display nf-core logo in console output.", + "fa_icon": "fas fa-image", + "hidden": true + }, + "singularity_pull_docker_container": { + "type": "boolean", + "description": "Pull Singularity container from Docker?", + "hidden": true + }, + "publish_dir_mode": { + "type": "string", + "default": "copy", + "description": "Method used to save pipeline results to output directory.", + "help_text": "The Nextflow `publishDir` option specifies which intermediate files should be saved to the output directory. This option tells the pipeline what method should be used to move these files. See [Nextflow docs](https://www.nextflow.io/docs/latest/process.html#publishdir) for details.", + "fa_icon": "fas fa-copy", + "enum": ["symlink", "rellink", "link", "copy", "copyNoFollow", "move"], + "hidden": true + }, + "monochrome_logs": { + "type": "boolean", + "description": "Use monochrome_logs", + "hidden": true + } + } + } + }, + "allOf": [ + { + "$ref": "#/definitions/input_output_options" + }, + { + "$ref": "#/definitions/generic_options" + } + ] +} diff --git a/subworkflows/nf-core/utils_nfvalidation_plugin/tests/tags.yml b/subworkflows/nf-core/utils_nfvalidation_plugin/tests/tags.yml new file mode 100644 index 000000000..60b1cfff4 --- /dev/null +++ b/subworkflows/nf-core/utils_nfvalidation_plugin/tests/tags.yml @@ -0,0 +1,2 @@ +subworkflows/utils_nfvalidation_plugin: + - subworkflows/nf-core/utils_nfvalidation_plugin/** diff --git a/workflows/rnaseq.nf b/workflows/rnaseq.nf deleted file mode 100755 index 9250125dc..000000000 --- a/workflows/rnaseq.nf +++ /dev/null @@ -1,941 +0,0 @@ -/* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - PRINT PARAMS SUMMARY -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -*/ - -include { paramsSummaryLog; paramsSummaryMap; fromSamplesheet } from 'plugin/nf-validation' - -def logo = NfcoreTemplate.logo(workflow, params.monochrome_logs) -def citation = '\n' + WorkflowMain.citation(workflow) + '\n' -def summary_params = paramsSummaryMap(workflow) - -// Print parameter summary log to screen -log.info logo + paramsSummaryLog(workflow) + citation - -/* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - VALIDATE INPUTS -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -*/ - -WorkflowRnaseq.initialise(params, log) - -// Check rRNA databases for sortmerna -if (params.remove_ribo_rna) { - ch_ribo_db = file(params.ribo_database_manifest) - if (ch_ribo_db.isEmpty()) {exit 1, "File provided with --ribo_database_manifest is empty: ${ch_ribo_db.getName()}!"} -} - -// Check if file with list of fastas is provided when running BBSplit -if (!params.skip_bbsplit && !params.bbsplit_index && params.bbsplit_fasta_list) { - ch_bbsplit_fasta_list = file(params.bbsplit_fasta_list) - if (ch_bbsplit_fasta_list.isEmpty()) {exit 1, "File provided with --bbsplit_fasta_list is empty: ${ch_bbsplit_fasta_list.getName()}!"} -} - -// Check alignment parameters -def prepareToolIndices = [] -if (!params.skip_bbsplit) { prepareToolIndices << 'bbsplit' } -if (!params.skip_alignment) { prepareToolIndices << params.aligner } -if (!params.skip_pseudo_alignment && params.pseudo_aligner) { prepareToolIndices << params.pseudo_aligner } - -// Determine whether to filter the GTF or not -def filterGtf = - (( - // Condition 1: Alignment is required and aligner is set - !params.skip_alignment && params.aligner - ) || - ( - // Condition 2: Pseudoalignment is required and pseudoaligner is set - !params.skip_pseudo_alignment && params.pseudo_aligner - ) || - ( - // Condition 3: Transcript FASTA file is not provided - !params.transcript_fasta - )) && - ( - // Condition 4: --skip_gtf_filter is not provided - !params.skip_gtf_filter - ) - -// Get RSeqC modules to run -def rseqc_modules = params.rseqc_modules ? params.rseqc_modules.split(',').collect{ it.trim().toLowerCase() } : [] -if (params.bam_csi_index) { - for (rseqc_module in ['read_distribution', 'inner_distance', 'tin']) { - if (rseqc_modules.contains(rseqc_module)) { - rseqc_modules.remove(rseqc_module) - } - } -} - -// Stage dummy file to be used as an optional input where required -ch_dummy_file = file("$projectDir/assets/dummy_file.txt", checkIfExists: true) - -// Check if an AWS iGenome has been provided to use the appropriate version of STAR -def is_aws_igenome = false -if (params.fasta && params.gtf) { - if ((file(params.fasta).getName() - '.gz' == 'genome.fa') && (file(params.gtf).getName() - '.gz' == 'genes.gtf')) { - is_aws_igenome = true - } -} - -/* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - CONFIG FILES -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -*/ - -ch_multiqc_config = Channel.fromPath("$projectDir/assets/multiqc_config.yml", checkIfExists: true) -ch_multiqc_custom_config = params.multiqc_config ? Channel.fromPath(params.multiqc_config) : Channel.empty() -ch_multiqc_logo = params.multiqc_logo ? Channel.fromPath(params.multiqc_logo) : Channel.empty() -ch_multiqc_custom_methods_description = params.multiqc_methods_description ? file(params.multiqc_methods_description) : file("$projectDir/assets/methods_description_template.yml", checkIfExists: true) - -// Header files for MultiQC -ch_pca_header_multiqc = file("$projectDir/assets/multiqc/deseq2_pca_header.txt", checkIfExists: true) -ch_clustering_header_multiqc = file("$projectDir/assets/multiqc/deseq2_clustering_header.txt", checkIfExists: true) -ch_biotypes_header_multiqc = file("$projectDir/assets/multiqc/biotypes_header.txt", checkIfExists: true) - -/* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - IMPORT LOCAL MODULES/SUBWORKFLOWS -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -*/ - -// -// MODULE: Loaded from modules/local/ -// -include { BEDTOOLS_GENOMECOV } from '../modules/local/bedtools_genomecov' -include { DESEQ2_QC as DESEQ2_QC_STAR_SALMON } from '../modules/local/deseq2_qc' -include { DESEQ2_QC as DESEQ2_QC_RSEM } from '../modules/local/deseq2_qc' -include { DESEQ2_QC as DESEQ2_QC_PSEUDO } from '../modules/local/deseq2_qc' -include { DUPRADAR } from '../modules/local/dupradar' -include { MULTIQC } from '../modules/local/multiqc' -include { MULTIQC_CUSTOM_BIOTYPE } from '../modules/local/multiqc_custom_biotype' -include { UMITOOLS_PREPAREFORRSEM as UMITOOLS_PREPAREFORSALMON } from '../modules/local/umitools_prepareforrsem' - -// -// SUBWORKFLOW: Consisting of a mix of local and nf-core/modules -// -include { PREPARE_GENOME } from '../subworkflows/local/prepare_genome' -include { ALIGN_STAR } from '../subworkflows/local/align_star' -include { QUANTIFY_RSEM } from '../subworkflows/local/quantify_rsem' -include { QUANTIFY_PSEUDO_ALIGNMENT as QUANTIFY_STAR_SALMON } from '../subworkflows/local/quantify_pseudo' -include { QUANTIFY_PSEUDO_ALIGNMENT } from '../subworkflows/local/quantify_pseudo' - -/* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - IMPORT NF-CORE MODULES/SUBWORKFLOWS -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -*/ - -// -// MODULE: Installed directly from nf-core/modules -// -include { CAT_FASTQ } from '../modules/nf-core/cat/fastq' -include { BBMAP_BBSPLIT } from '../modules/nf-core/bbmap/bbsplit' -include { SAMTOOLS_SORT } from '../modules/nf-core/samtools/sort' -include { PRESEQ_LCEXTRAP } from '../modules/nf-core/preseq/lcextrap' -include { QUALIMAP_RNASEQ } from '../modules/nf-core/qualimap/rnaseq' -include { SORTMERNA } from '../modules/nf-core/sortmerna' -include { STRINGTIE_STRINGTIE } from '../modules/nf-core/stringtie/stringtie' -include { SUBREAD_FEATURECOUNTS } from '../modules/nf-core/subread/featurecounts' -include { CUSTOM_DUMPSOFTWAREVERSIONS } from '../modules/nf-core/custom/dumpsoftwareversions' - -// -// SUBWORKFLOW: Consisting entirely of nf-core/modules -// -include { FASTQ_SUBSAMPLE_FQ_SALMON } from '../subworkflows/nf-core/fastq_subsample_fq_salmon' -include { FASTQ_FASTQC_UMITOOLS_TRIMGALORE } from '../subworkflows/nf-core/fastq_fastqc_umitools_trimgalore' -include { FASTQ_FASTQC_UMITOOLS_FASTP } from '../subworkflows/nf-core/fastq_fastqc_umitools_fastp' -include { FASTQ_ALIGN_HISAT2 } from '../subworkflows/nf-core/fastq_align_hisat2' -include { BAM_SORT_STATS_SAMTOOLS } from '../subworkflows/nf-core/bam_sort_stats_samtools' -include { BAM_MARKDUPLICATES_PICARD } from '../subworkflows/nf-core/bam_markduplicates_picard' -include { BAM_RSEQC } from '../subworkflows/nf-core/bam_rseqc' -include { BAM_DEDUP_STATS_SAMTOOLS_UMITOOLS as BAM_DEDUP_STATS_SAMTOOLS_UMITOOLS_GENOME } from '../subworkflows/nf-core/bam_dedup_stats_samtools_umitools' -include { BAM_DEDUP_STATS_SAMTOOLS_UMITOOLS as BAM_DEDUP_STATS_SAMTOOLS_UMITOOLS_TRANSCRIPTOME } from '../subworkflows/nf-core/bam_dedup_stats_samtools_umitools' -include { BEDGRAPH_BEDCLIP_BEDGRAPHTOBIGWIG as BEDGRAPH_BEDCLIP_BEDGRAPHTOBIGWIG_FORWARD } from '../subworkflows/nf-core/bedgraph_bedclip_bedgraphtobigwig' -include { BEDGRAPH_BEDCLIP_BEDGRAPHTOBIGWIG as BEDGRAPH_BEDCLIP_BEDGRAPHTOBIGWIG_REVERSE } from '../subworkflows/nf-core/bedgraph_bedclip_bedgraphtobigwig' - -/* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - RUN MAIN WORKFLOW -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -*/ - -// Info required for completion email and summary -def multiqc_report = [] -def pass_mapped_reads = [:] -def pass_trimmed_reads = [:] -def pass_strand_check = [:] - -workflow RNASEQ { - - ch_versions = Channel.empty() - - // - // SUBWORKFLOW: Uncompress and prepare reference genome files - // - def biotype = params.gencode ? "gene_type" : params.featurecounts_group_type - PREPARE_GENOME ( - params.fasta, - params.gtf, - params.gff, - params.additional_fasta, - params.transcript_fasta, - params.gene_bed, - params.splicesites, - params.bbsplit_fasta_list, - params.star_index, - params.rsem_index, - params.salmon_index, - params.kallisto_index, - params.hisat2_index, - params.bbsplit_index, - params.gencode, - is_aws_igenome, - biotype, - prepareToolIndices, - filterGtf - ) - ch_versions = ch_versions.mix(PREPARE_GENOME.out.versions) - - // Check if contigs in genome fasta file > 512 Mbp - if (!params.skip_alignment && !params.bam_csi_index) { - PREPARE_GENOME - .out - .fai - .map { WorkflowRnaseq.checkMaxContigSize(it, log) } - } - - // - // Create input channel from input file provided through params.input - // - Channel - .fromSamplesheet("input") - .map { - meta, fastq_1, fastq_2 -> - if (!fastq_2) { - return [ meta.id, meta + [ single_end:true ], [ fastq_1 ] ] - } else { - return [ meta.id, meta + [ single_end:false ], [ fastq_1, fastq_2 ] ] - } - } - .groupTuple() - .map { - WorkflowRnaseq.validateInput(it) - } - .branch { - meta, fastqs -> - single : fastqs.size() == 1 - return [ meta, fastqs.flatten() ] - multiple: fastqs.size() > 1 - return [ meta, fastqs.flatten() ] - } - .set { ch_fastq } - - // - // MODULE: Concatenate FastQ files from same sample if required - // - CAT_FASTQ ( - ch_fastq.multiple - ) - .reads - .mix(ch_fastq.single) - .set { ch_cat_fastq } - ch_versions = ch_versions.mix(CAT_FASTQ.out.versions.first().ifEmpty(null)) - - // - // SUBWORKFLOW: Read QC, extract UMI and trim adapters with TrimGalore! - // - ch_filtered_reads = Channel.empty() - ch_fastqc_raw_multiqc = Channel.empty() - ch_fastqc_trim_multiqc = Channel.empty() - ch_trim_log_multiqc = Channel.empty() - ch_trim_read_count = Channel.empty() - if (params.trimmer == 'trimgalore') { - FASTQ_FASTQC_UMITOOLS_TRIMGALORE ( - ch_cat_fastq, - params.skip_fastqc || params.skip_qc, - params.with_umi, - params.skip_umi_extract, - params.skip_trimming, - params.umi_discard_read, - params.min_trimmed_reads - ) - ch_filtered_reads = FASTQ_FASTQC_UMITOOLS_TRIMGALORE.out.reads - ch_fastqc_raw_multiqc = FASTQ_FASTQC_UMITOOLS_TRIMGALORE.out.fastqc_zip - ch_fastqc_trim_multiqc = FASTQ_FASTQC_UMITOOLS_TRIMGALORE.out.trim_zip - ch_trim_log_multiqc = FASTQ_FASTQC_UMITOOLS_TRIMGALORE.out.trim_log - ch_trim_read_count = FASTQ_FASTQC_UMITOOLS_TRIMGALORE.out.trim_read_count - ch_versions = ch_versions.mix(FASTQ_FASTQC_UMITOOLS_TRIMGALORE.out.versions) - } - - // - // SUBWORKFLOW: Read QC, extract UMI and trim adapters with fastp - // - if (params.trimmer == 'fastp') { - FASTQ_FASTQC_UMITOOLS_FASTP ( - ch_cat_fastq, - params.skip_fastqc || params.skip_qc, - params.with_umi, - params.skip_umi_extract, - params.umi_discard_read, - params.skip_trimming, - [], - params.save_trimmed, - params.save_trimmed, - params.min_trimmed_reads - ) - ch_filtered_reads = FASTQ_FASTQC_UMITOOLS_FASTP.out.reads - ch_fastqc_raw_multiqc = FASTQ_FASTQC_UMITOOLS_FASTP.out.fastqc_raw_zip - ch_fastqc_trim_multiqc = FASTQ_FASTQC_UMITOOLS_FASTP.out.fastqc_trim_zip - ch_trim_log_multiqc = FASTQ_FASTQC_UMITOOLS_FASTP.out.trim_json - ch_trim_read_count = FASTQ_FASTQC_UMITOOLS_FASTP.out.trim_read_count - ch_versions = ch_versions.mix(FASTQ_FASTQC_UMITOOLS_FASTP.out.versions) - } - - // - // Get list of samples that failed trimming threshold for MultiQC report - // - ch_trim_read_count - .map { - meta, num_reads -> - pass_trimmed_reads[meta.id] = true - if (num_reads <= params.min_trimmed_reads.toFloat()) { - pass_trimmed_reads[meta.id] = false - return [ "$meta.id\t$num_reads" ] - } - } - .collect() - .map { - tsv_data -> - def header = ["Sample", "Reads after trimming"] - WorkflowRnaseq.multiqcTsvFromList(tsv_data, header) - } - .set { ch_fail_trimming_multiqc } - - // - // MODULE: Remove genome contaminant reads - // - if (!params.skip_bbsplit) { - BBMAP_BBSPLIT ( - ch_filtered_reads, - PREPARE_GENOME.out.bbsplit_index, - [], - [ [], [] ], - false - ) - .primary_fastq - .set { ch_filtered_reads } - ch_versions = ch_versions.mix(BBMAP_BBSPLIT.out.versions.first()) - } - - // - // MODULE: Remove ribosomal RNA reads - // - ch_sortmerna_multiqc = Channel.empty() - if (params.remove_ribo_rna) { - ch_sortmerna_fastas = Channel.from(ch_ribo_db.readLines()).map { row -> file(row, checkIfExists: true) }.collect() - - SORTMERNA ( - ch_filtered_reads, - ch_sortmerna_fastas - ) - .reads - .set { ch_filtered_reads } - - ch_sortmerna_multiqc = SORTMERNA.out.log - ch_versions = ch_versions.mix(SORTMERNA.out.versions.first()) - } - - // - // SUBWORKFLOW: Sub-sample FastQ files and pseudoalign with Salmon to auto-infer strandedness - // - - // Branch FastQ channels if 'auto' specified to infer strandedness - ch_filtered_reads - .branch { - meta, fastq -> - auto_strand : meta.strandedness == 'auto' - return [ meta, fastq ] - known_strand: meta.strandedness != 'auto' - return [ meta, fastq ] - } - .set { ch_strand_fastq } - - // Return empty channel if ch_strand_fastq.auto_strand is empty so salmon index isn't created - PREPARE_GENOME - .out - .fasta - .combine(ch_strand_fastq.auto_strand) - .map { it.first() } - .first() - .set { ch_genome_fasta } - - FASTQ_SUBSAMPLE_FQ_SALMON ( - ch_strand_fastq.auto_strand, - ch_genome_fasta, - PREPARE_GENOME.out.transcript_fasta, - PREPARE_GENOME.out.gtf, - PREPARE_GENOME.out.salmon_index, - !params.salmon_index && !('salmon' in prepareToolIndices) - ) - ch_versions = ch_versions.mix(FASTQ_SUBSAMPLE_FQ_SALMON.out.versions) - - FASTQ_SUBSAMPLE_FQ_SALMON - .out - .json_info - .join(ch_strand_fastq.auto_strand) - .map { meta, json, reads -> - return [ meta + [ strandedness: WorkflowRnaseq.getSalmonInferredStrandedness(json) ], reads ] - } - .mix(ch_strand_fastq.known_strand) - .set { ch_strand_inferred_filtered_fastq } - - // - // SUBWORKFLOW: Alignment with STAR and gene/transcript quantification with Salmon - // - ch_genome_bam = Channel.empty() - ch_genome_bam_index = Channel.empty() - ch_samtools_stats = Channel.empty() - ch_samtools_flagstat = Channel.empty() - ch_samtools_idxstats = Channel.empty() - ch_star_multiqc = Channel.empty() - ch_aligner_pca_multiqc = Channel.empty() - ch_aligner_clustering_multiqc = Channel.empty() - if (!params.skip_alignment && params.aligner == 'star_salmon') { - ALIGN_STAR ( - ch_strand_inferred_filtered_fastq, - PREPARE_GENOME.out.star_index.map { [ [:], it ] }, - PREPARE_GENOME.out.gtf.map { [ [:], it ] }, - params.star_ignore_sjdbgtf, - '', - params.seq_center ?: '', - is_aws_igenome, - PREPARE_GENOME.out.fasta.map { [ [:], it ] } - ) - ch_genome_bam = ALIGN_STAR.out.bam - ch_genome_bam_index = ALIGN_STAR.out.bai - ch_transcriptome_bam = ALIGN_STAR.out.bam_transcript - ch_samtools_stats = ALIGN_STAR.out.stats - ch_samtools_flagstat = ALIGN_STAR.out.flagstat - ch_samtools_idxstats = ALIGN_STAR.out.idxstats - ch_star_multiqc = ALIGN_STAR.out.log_final - if (params.bam_csi_index) { - ch_genome_bam_index = ALIGN_STAR.out.csi - } - ch_versions = ch_versions.mix(ALIGN_STAR.out.versions) - - // - // SUBWORKFLOW: Remove duplicate reads from BAM file based on UMIs - // - if (params.with_umi) { - // Deduplicate genome BAM file before downstream analysis - BAM_DEDUP_STATS_SAMTOOLS_UMITOOLS_GENOME ( - ch_genome_bam.join(ch_genome_bam_index, by: [0]), - params.umitools_dedup_stats - ) - ch_genome_bam = BAM_DEDUP_STATS_SAMTOOLS_UMITOOLS_GENOME.out.bam - ch_genome_bam_index = BAM_DEDUP_STATS_SAMTOOLS_UMITOOLS_GENOME.out.bai - ch_samtools_stats = BAM_DEDUP_STATS_SAMTOOLS_UMITOOLS_GENOME.out.stats - ch_samtools_flagstat = BAM_DEDUP_STATS_SAMTOOLS_UMITOOLS_GENOME.out.flagstat - ch_samtools_idxstats = BAM_DEDUP_STATS_SAMTOOLS_UMITOOLS_GENOME.out.idxstats - if (params.bam_csi_index) { - ch_genome_bam_index = BAM_DEDUP_STATS_SAMTOOLS_UMITOOLS_GENOME.out.csi - } - ch_versions = ch_versions.mix(BAM_DEDUP_STATS_SAMTOOLS_UMITOOLS_GENOME.out.versions) - - // Co-ordinate sort, index and run stats on transcriptome BAM - BAM_SORT_STATS_SAMTOOLS ( - ch_transcriptome_bam, - PREPARE_GENOME.out.fasta.map { [ [:], it ] } - ) - ch_transcriptome_sorted_bam = BAM_SORT_STATS_SAMTOOLS.out.bam - ch_transcriptome_sorted_bai = BAM_SORT_STATS_SAMTOOLS.out.bai - - // Deduplicate transcriptome BAM file before read counting with Salmon - BAM_DEDUP_STATS_SAMTOOLS_UMITOOLS_TRANSCRIPTOME ( - ch_transcriptome_sorted_bam.join(ch_transcriptome_sorted_bai, by: [0]), - params.umitools_dedup_stats - ) - - // Name sort BAM before passing to Salmon - SAMTOOLS_SORT ( - BAM_DEDUP_STATS_SAMTOOLS_UMITOOLS_TRANSCRIPTOME.out.bam - ) - - // Only run prepare_for_rsem.py on paired-end BAM files - SAMTOOLS_SORT - .out - .bam - .branch { - meta, bam -> - single_end: meta.single_end - return [ meta, bam ] - paired_end: !meta.single_end - return [ meta, bam ] - } - .set { ch_umitools_dedup_bam } - - // Fix paired-end reads in name sorted BAM file - // See: https://github.com/nf-core/rnaseq/issues/828 - UMITOOLS_PREPAREFORSALMON ( - ch_umitools_dedup_bam.paired_end - ) - ch_versions = ch_versions.mix(UMITOOLS_PREPAREFORSALMON.out.versions.first()) - - ch_umitools_dedup_bam - .single_end - .mix(UMITOOLS_PREPAREFORSALMON.out.bam) - .set { ch_transcriptome_bam } - } - - // - // SUBWORKFLOW: Count reads from BAM alignments using Salmon - // - QUANTIFY_STAR_SALMON ( - ch_transcriptome_bam, - ch_dummy_file, - PREPARE_GENOME.out.transcript_fasta, - PREPARE_GENOME.out.gtf, - 'salmon', - true, - params.salmon_quant_libtype ?: '', - params.kallisto_quant_fraglen, - params.kallisto_quant_fraglen_sd - ) - ch_versions = ch_versions.mix(QUANTIFY_STAR_SALMON.out.versions) - - if (!params.skip_qc & !params.skip_deseq2_qc) { - DESEQ2_QC_STAR_SALMON ( - QUANTIFY_STAR_SALMON.out.counts_gene_length_scaled, - ch_pca_header_multiqc, - ch_clustering_header_multiqc - ) - ch_aligner_pca_multiqc = DESEQ2_QC_STAR_SALMON.out.pca_multiqc - ch_aligner_clustering_multiqc = DESEQ2_QC_STAR_SALMON.out.dists_multiqc - ch_versions = ch_versions.mix(DESEQ2_QC_STAR_SALMON.out.versions) - } - } - - // - // SUBWORKFLOW: Alignment with STAR and gene/transcript quantification with RSEM - // - ch_rsem_multiqc = Channel.empty() - if (!params.skip_alignment && params.aligner == 'star_rsem') { - QUANTIFY_RSEM ( - ch_strand_inferred_filtered_fastq, - PREPARE_GENOME.out.rsem_index, - PREPARE_GENOME.out.fasta.map { [ [:], it ] } - ) - ch_genome_bam = QUANTIFY_RSEM.out.bam - ch_genome_bam_index = QUANTIFY_RSEM.out.bai - ch_samtools_stats = QUANTIFY_RSEM.out.stats - ch_samtools_flagstat = QUANTIFY_RSEM.out.flagstat - ch_samtools_idxstats = QUANTIFY_RSEM.out.idxstats - ch_star_multiqc = QUANTIFY_RSEM.out.logs - ch_rsem_multiqc = QUANTIFY_RSEM.out.stat - if (params.bam_csi_index) { - ch_genome_bam_index = QUANTIFY_RSEM.out.csi - } - ch_versions = ch_versions.mix(QUANTIFY_RSEM.out.versions) - - if (!params.skip_qc & !params.skip_deseq2_qc) { - DESEQ2_QC_RSEM ( - QUANTIFY_RSEM.out.merged_counts_gene, - ch_pca_header_multiqc, - ch_clustering_header_multiqc - ) - ch_aligner_pca_multiqc = DESEQ2_QC_RSEM.out.pca_multiqc - ch_aligner_clustering_multiqc = DESEQ2_QC_RSEM.out.dists_multiqc - ch_versions = ch_versions.mix(DESEQ2_QC_RSEM.out.versions) - } - } - - // - // SUBWORKFLOW: Alignment with HISAT2 - // - ch_hisat2_multiqc = Channel.empty() - if (!params.skip_alignment && params.aligner == 'hisat2') { - FASTQ_ALIGN_HISAT2 ( - ch_strand_inferred_filtered_fastq, - PREPARE_GENOME.out.hisat2_index.map { [ [:], it ] }, - PREPARE_GENOME.out.splicesites.map { [ [:], it ] }, - PREPARE_GENOME.out.fasta.map { [ [:], it ] } - ) - ch_genome_bam = FASTQ_ALIGN_HISAT2.out.bam - ch_genome_bam_index = FASTQ_ALIGN_HISAT2.out.bai - ch_samtools_stats = FASTQ_ALIGN_HISAT2.out.stats - ch_samtools_flagstat = FASTQ_ALIGN_HISAT2.out.flagstat - ch_samtools_idxstats = FASTQ_ALIGN_HISAT2.out.idxstats - ch_hisat2_multiqc = FASTQ_ALIGN_HISAT2.out.summary - if (params.bam_csi_index) { - ch_genome_bam_index = FASTQ_ALIGN_HISAT2.out.csi - } - ch_versions = ch_versions.mix(FASTQ_ALIGN_HISAT2.out.versions) - - // - // SUBWORKFLOW: Remove duplicate reads from BAM file based on UMIs - // - if (params.with_umi) { - BAM_DEDUP_STATS_SAMTOOLS_UMITOOLS_GENOME ( - ch_genome_bam.join(ch_genome_bam_index, by: [0]), - params.umitools_dedup_stats - ) - ch_genome_bam = BAM_DEDUP_STATS_SAMTOOLS_UMITOOLS_GENOME.out.bam - ch_genome_bam_index = BAM_DEDUP_STATS_SAMTOOLS_UMITOOLS_GENOME.out.bai - ch_samtools_stats = BAM_DEDUP_STATS_SAMTOOLS_UMITOOLS_GENOME.out.stats - ch_samtools_flagstat = BAM_DEDUP_STATS_SAMTOOLS_UMITOOLS_GENOME.out.flagstat - ch_samtools_idxstats = BAM_DEDUP_STATS_SAMTOOLS_UMITOOLS_GENOME.out.idxstats - if (params.bam_csi_index) { - ch_genome_bam_index = BAM_DEDUP_STATS_SAMTOOLS_UMITOOLS_GENOME.out.csi - } - ch_versions = ch_versions.mix(BAM_DEDUP_STATS_SAMTOOLS_UMITOOLS_GENOME.out.versions) - } - } - - // - // Filter channels to get samples that passed STAR minimum mapping percentage - // - ch_fail_mapping_multiqc = Channel.empty() - if (!params.skip_alignment && params.aligner.contains('star')) { - ch_star_multiqc - .map { meta, align_log -> [ meta ] + WorkflowRnaseq.getStarPercentMapped(params, align_log) } - .set { ch_percent_mapped } - - ch_genome_bam - .join(ch_percent_mapped, by: [0]) - .map { meta, ofile, mapped, pass -> if (pass) [ meta, ofile ] } - .set { ch_genome_bam } - - ch_genome_bam_index - .join(ch_percent_mapped, by: [0]) - .map { meta, ofile, mapped, pass -> if (pass) [ meta, ofile ] } - .set { ch_genome_bam_index } - - ch_percent_mapped - .branch { meta, mapped, pass -> - pass: pass - pass_mapped_reads[meta.id] = true - return [ "$meta.id\t$mapped" ] - fail: !pass - pass_mapped_reads[meta.id] = false - return [ "$meta.id\t$mapped" ] - } - .set { ch_pass_fail_mapped } - - ch_pass_fail_mapped - .fail - .collect() - .map { - tsv_data -> - def header = ["Sample", "STAR uniquely mapped reads (%)"] - WorkflowRnaseq.multiqcTsvFromList(tsv_data, header) - } - .set { ch_fail_mapping_multiqc } - } - - // - // MODULE: Run Preseq - // - ch_preseq_multiqc = Channel.empty() - if (!params.skip_alignment && !params.skip_qc && !params.skip_preseq) { - PRESEQ_LCEXTRAP ( - ch_genome_bam - ) - ch_preseq_multiqc = PRESEQ_LCEXTRAP.out.lc_extrap - ch_versions = ch_versions.mix(PRESEQ_LCEXTRAP.out.versions.first()) - } - - // - // SUBWORKFLOW: Mark duplicate reads - // - ch_markduplicates_multiqc = Channel.empty() - if (!params.skip_alignment && !params.skip_markduplicates && !params.with_umi) { - BAM_MARKDUPLICATES_PICARD ( - ch_genome_bam, - PREPARE_GENOME.out.fasta.map { [ [:], it ] }, - PREPARE_GENOME.out.fai.map { [ [:], it ] } - ) - ch_genome_bam = BAM_MARKDUPLICATES_PICARD.out.bam - ch_genome_bam_index = BAM_MARKDUPLICATES_PICARD.out.bai - ch_samtools_stats = BAM_MARKDUPLICATES_PICARD.out.stats - ch_samtools_flagstat = BAM_MARKDUPLICATES_PICARD.out.flagstat - ch_samtools_idxstats = BAM_MARKDUPLICATES_PICARD.out.idxstats - ch_markduplicates_multiqc = BAM_MARKDUPLICATES_PICARD.out.metrics - if (params.bam_csi_index) { - ch_genome_bam_index = BAM_MARKDUPLICATES_PICARD.out.csi - } - ch_versions = ch_versions.mix(BAM_MARKDUPLICATES_PICARD.out.versions) - } - - // - // MODULE: STRINGTIE - // - if (!params.skip_alignment && !params.skip_stringtie) { - STRINGTIE_STRINGTIE ( - ch_genome_bam, - PREPARE_GENOME.out.gtf - ) - ch_versions = ch_versions.mix(STRINGTIE_STRINGTIE.out.versions.first()) - } - - // - // MODULE: Feature biotype QC using featureCounts - // - ch_featurecounts_multiqc = Channel.empty() - if (!params.skip_alignment && !params.skip_qc && !params.skip_biotype_qc && biotype) { - - PREPARE_GENOME - .out - .gtf - .map { WorkflowRnaseq.biotypeInGtf(it, biotype, log) } - .set { biotype_in_gtf } - - // Prevent any samples from running if GTF file doesn't have a valid biotype - ch_genome_bam - .combine(PREPARE_GENOME.out.gtf) - .combine(biotype_in_gtf) - .filter { it[-1] } - .map { it[0.. 0) { - BAM_RSEQC ( - ch_genome_bam.join(ch_genome_bam_index, by: [0]), - PREPARE_GENOME.out.gene_bed, - rseqc_modules - ) - ch_bamstat_multiqc = BAM_RSEQC.out.bamstat_txt - ch_inferexperiment_multiqc = BAM_RSEQC.out.inferexperiment_txt - ch_innerdistance_multiqc = BAM_RSEQC.out.innerdistance_freq - ch_junctionannotation_multiqc = BAM_RSEQC.out.junctionannotation_log - ch_junctionsaturation_multiqc = BAM_RSEQC.out.junctionsaturation_rscript - ch_readdistribution_multiqc = BAM_RSEQC.out.readdistribution_txt - ch_readduplication_multiqc = BAM_RSEQC.out.readduplication_pos_xls - ch_tin_multiqc = BAM_RSEQC.out.tin_txt - ch_versions = ch_versions.mix(BAM_RSEQC.out.versions) - - ch_inferexperiment_multiqc - .map { - meta, strand_log -> - def inferred_strand = WorkflowRnaseq.getInferexperimentStrandedness(strand_log, 30) - pass_strand_check[meta.id] = true - if (meta.strandedness != inferred_strand[0]) { - pass_strand_check[meta.id] = false - return [ "$meta.id\t$meta.strandedness\t${inferred_strand.join('\t')}" ] - } - } - .collect() - .map { - tsv_data -> - def header = [ - "Sample", - "Provided strandedness", - "Inferred strandedness", - "Sense (%)", - "Antisense (%)", - "Undetermined (%)" - ] - WorkflowRnaseq.multiqcTsvFromList(tsv_data, header) - } - .set { ch_fail_strand_multiqc } - } - } - - // - // SUBWORKFLOW: Pseudoalignment and quantification with Salmon - // - ch_pseudo_multiqc = Channel.empty() - ch_pseudoaligner_pca_multiqc = Channel.empty() - ch_pseudoaligner_clustering_multiqc = Channel.empty() - if (!params.skip_pseudo_alignment && params.pseudo_aligner) { - - if (params.pseudo_aligner == 'salmon') { - ch_pseudo_index = PREPARE_GENOME.out.salmon_index - } else { - ch_pseudo_index = PREPARE_GENOME.out.kallisto_index - } - - QUANTIFY_PSEUDO_ALIGNMENT ( - ch_strand_inferred_filtered_fastq, - ch_pseudo_index, - ch_dummy_file, - PREPARE_GENOME.out.gtf, - params.pseudo_aligner, - false, - params.salmon_quant_libtype ?: '', - params.kallisto_quant_fraglen, - params.kallisto_quant_fraglen_sd - ) - ch_pseudo_multiqc = QUANTIFY_PSEUDO_ALIGNMENT.out.multiqc - ch_counts_gene_length_scaled = QUANTIFY_PSEUDO_ALIGNMENT.out.counts_gene_length_scaled - ch_versions = ch_versions.mix(QUANTIFY_PSEUDO_ALIGNMENT.out.versions) - - if (!params.skip_qc & !params.skip_deseq2_qc) { - DESEQ2_QC_PSEUDO ( - ch_counts_gene_length_scaled, - ch_pca_header_multiqc, - ch_clustering_header_multiqc - ) - ch_pseudoaligner_pca_multiqc = DESEQ2_QC_PSEUDO.out.pca_multiqc - ch_pseudoaligner_clustering_multiqc = DESEQ2_QC_PSEUDO.out.dists_multiqc - ch_versions = ch_versions.mix(DESEQ2_QC_PSEUDO.out.versions) - } - } - - // - // MODULE: Pipeline reporting - // - CUSTOM_DUMPSOFTWAREVERSIONS ( - ch_versions.unique().collectFile(name: 'collated_versions.yml') - ) - - // - // MODULE: MultiQC - // - if (!params.skip_multiqc) { - workflow_summary = WorkflowRnaseq.paramsSummaryMultiqc(workflow, summary_params) - ch_workflow_summary = Channel.value(workflow_summary) - - methods_description = WorkflowRnaseq.methodsDescriptionText(workflow, ch_multiqc_custom_methods_description, params) - ch_methods_description = Channel.value(methods_description) - - MULTIQC ( - ch_multiqc_config, - ch_multiqc_custom_config.collect().ifEmpty([]), - CUSTOM_DUMPSOFTWAREVERSIONS.out.mqc_yml.collect(), - ch_workflow_summary.collectFile(name: 'workflow_summary_mqc.yaml'), - ch_methods_description.collectFile(name: 'methods_description_mqc.yaml'), - ch_multiqc_logo.collect().ifEmpty([]), - ch_fail_trimming_multiqc.collectFile(name: 'fail_trimmed_samples_mqc.tsv').ifEmpty([]), - ch_fail_mapping_multiqc.collectFile(name: 'fail_mapped_samples_mqc.tsv').ifEmpty([]), - ch_fail_strand_multiqc.collectFile(name: 'fail_strand_check_mqc.tsv').ifEmpty([]), - ch_fastqc_raw_multiqc.collect{it[1]}.ifEmpty([]), - ch_fastqc_trim_multiqc.collect{it[1]}.ifEmpty([]), - ch_trim_log_multiqc.collect{it[1]}.ifEmpty([]), - ch_sortmerna_multiqc.collect{it[1]}.ifEmpty([]), - ch_star_multiqc.collect{it[1]}.ifEmpty([]), - ch_hisat2_multiqc.collect{it[1]}.ifEmpty([]), - ch_rsem_multiqc.collect{it[1]}.ifEmpty([]), - ch_pseudo_multiqc.collect{it[1]}.ifEmpty([]), - ch_samtools_stats.collect{it[1]}.ifEmpty([]), - ch_samtools_flagstat.collect{it[1]}.ifEmpty([]), - ch_samtools_idxstats.collect{it[1]}.ifEmpty([]), - ch_markduplicates_multiqc.collect{it[1]}.ifEmpty([]), - ch_featurecounts_multiqc.collect{it[1]}.ifEmpty([]), - ch_aligner_pca_multiqc.collect().ifEmpty([]), - ch_aligner_clustering_multiqc.collect().ifEmpty([]), - ch_pseudoaligner_pca_multiqc.collect().ifEmpty([]), - ch_pseudoaligner_clustering_multiqc.collect().ifEmpty([]), - ch_preseq_multiqc.collect{it[1]}.ifEmpty([]), - ch_qualimap_multiqc.collect{it[1]}.ifEmpty([]), - ch_dupradar_multiqc.collect{it[1]}.ifEmpty([]), - ch_bamstat_multiqc.collect{it[1]}.ifEmpty([]), - ch_inferexperiment_multiqc.collect{it[1]}.ifEmpty([]), - ch_innerdistance_multiqc.collect{it[1]}.ifEmpty([]), - ch_junctionannotation_multiqc.collect{it[1]}.ifEmpty([]), - ch_junctionsaturation_multiqc.collect{it[1]}.ifEmpty([]), - ch_readdistribution_multiqc.collect{it[1]}.ifEmpty([]), - ch_readduplication_multiqc.collect{it[1]}.ifEmpty([]), - ch_tin_multiqc.collect{it[1]}.ifEmpty([]) - ) - multiqc_report = MULTIQC.out.report.toList() - } -} - -/* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - COMPLETION EMAIL AND SUMMARY -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -*/ - -workflow.onComplete { - if (params.email || params.email_on_fail) { - NfcoreTemplate.email(workflow, params, summary_params, projectDir, log, multiqc_report, pass_mapped_reads, pass_trimmed_reads, pass_strand_check) - } - - NfcoreTemplate.dump_parameters(workflow, params) - NfcoreTemplate.summary(workflow, params, log, pass_mapped_reads, pass_trimmed_reads, pass_strand_check) - - if (params.hook_url) { - NfcoreTemplate.IM_notification(workflow, params, summary_params, projectDir, log) - } -} - -/* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - THE END -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -*/ diff --git a/assets/multiqc/biotypes_header.txt b/workflows/rnaseq/assets/multiqc/biotypes_header.txt similarity index 100% rename from assets/multiqc/biotypes_header.txt rename to workflows/rnaseq/assets/multiqc/biotypes_header.txt diff --git a/assets/multiqc/deseq2_clustering_header.txt b/workflows/rnaseq/assets/multiqc/deseq2_clustering_header.txt similarity index 100% rename from assets/multiqc/deseq2_clustering_header.txt rename to workflows/rnaseq/assets/multiqc/deseq2_clustering_header.txt diff --git a/assets/multiqc/deseq2_pca_header.txt b/workflows/rnaseq/assets/multiqc/deseq2_pca_header.txt similarity index 100% rename from assets/multiqc/deseq2_pca_header.txt rename to workflows/rnaseq/assets/multiqc/deseq2_pca_header.txt diff --git a/assets/methods_description_template.yml b/workflows/rnaseq/assets/multiqc/methods_description_template.yml similarity index 100% rename from assets/methods_description_template.yml rename to workflows/rnaseq/assets/multiqc/methods_description_template.yml diff --git a/assets/multiqc_config.yml b/workflows/rnaseq/assets/multiqc/multiqc_config.yml similarity index 100% rename from assets/multiqc_config.yml rename to workflows/rnaseq/assets/multiqc/multiqc_config.yml diff --git a/assets/rrna-db-defaults.txt b/workflows/rnaseq/assets/rrna-db-defaults.txt similarity index 100% rename from assets/rrna-db-defaults.txt rename to workflows/rnaseq/assets/rrna-db-defaults.txt diff --git a/workflows/rnaseq/main.nf b/workflows/rnaseq/main.nf new file mode 100755 index 000000000..e19ccbbf0 --- /dev/null +++ b/workflows/rnaseq/main.nf @@ -0,0 +1,773 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + IMPORT LOCAL MODULES/SUBWORKFLOWS +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +// +// MODULE: Loaded from modules/local/ +// +include { BEDTOOLS_GENOMECOV } from '../../modules/local/bedtools_genomecov' +include { DESEQ2_QC as DESEQ2_QC_STAR_SALMON } from '../../modules/local/deseq2_qc' +include { DESEQ2_QC as DESEQ2_QC_RSEM } from '../../modules/local/deseq2_qc' +include { DESEQ2_QC as DESEQ2_QC_PSEUDO } from '../../modules/local/deseq2_qc' +include { DUPRADAR } from '../../modules/local/dupradar' +include { MULTIQC_CUSTOM_BIOTYPE } from '../../modules/local/multiqc_custom_biotype' +include { UMITOOLS_PREPAREFORRSEM as UMITOOLS_PREPAREFORSALMON } from '../../modules/local/umitools_prepareforrsem' + +// +// SUBWORKFLOW: Consisting of a mix of local and nf-core/modules +// +include { ALIGN_STAR } from '../../subworkflows/local/align_star' +include { QUANTIFY_RSEM } from '../../subworkflows/local/quantify_rsem' +include { QUANTIFY_PSEUDO_ALIGNMENT as QUANTIFY_STAR_SALMON } from '../../subworkflows/local/quantify_pseudo' +include { QUANTIFY_PSEUDO_ALIGNMENT } from '../../subworkflows/local/quantify_pseudo' + +include { multiqcTsvFromList } from '../../subworkflows/local/utils_nfcore_rnaseq_pipeline' +include { getSalmonInferredStrandedness } from '../../subworkflows/local/utils_nfcore_rnaseq_pipeline' +include { getStarPercentMapped } from '../../subworkflows/local/utils_nfcore_rnaseq_pipeline' +include { biotypeInGtf } from '../../subworkflows/local/utils_nfcore_rnaseq_pipeline' +include { getInferexperimentStrandedness } from '../../subworkflows/local/utils_nfcore_rnaseq_pipeline' + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + IMPORT NF-CORE MODULES/SUBWORKFLOWS +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +// +// MODULE: Installed directly from nf-core/modules +// +include { CAT_FASTQ } from '../../modules/nf-core/cat/fastq' +include { BBMAP_BBSPLIT } from '../../modules/nf-core/bbmap/bbsplit' +include { SAMTOOLS_SORT } from '../../modules/nf-core/samtools/sort' +include { PRESEQ_LCEXTRAP } from '../../modules/nf-core/preseq/lcextrap' +include { QUALIMAP_RNASEQ } from '../../modules/nf-core/qualimap/rnaseq' +include { SORTMERNA } from '../../modules/nf-core/sortmerna' +include { STRINGTIE_STRINGTIE } from '../../modules/nf-core/stringtie/stringtie' +include { SUBREAD_FEATURECOUNTS } from '../../modules/nf-core/subread/featurecounts' +include { MULTIQC } from '../../modules/nf-core/multiqc' + +// +// SUBWORKFLOW: Consisting entirely of nf-core/modules +// +include { paramsSummaryMap } from 'plugin/nf-validation' +include { paramsSummaryMultiqc } from '../../subworkflows/nf-core/utils_nfcore_pipeline' +include { softwareVersionsToYAML } from '../../subworkflows/nf-core/utils_nfcore_pipeline' +include { FASTQ_SUBSAMPLE_FQ_SALMON } from '../../subworkflows/nf-core/fastq_subsample_fq_salmon' +include { FASTQ_FASTQC_UMITOOLS_TRIMGALORE } from '../../subworkflows/nf-core/fastq_fastqc_umitools_trimgalore' +include { FASTQ_FASTQC_UMITOOLS_FASTP } from '../../subworkflows/nf-core/fastq_fastqc_umitools_fastp' +include { FASTQ_ALIGN_HISAT2 } from '../../subworkflows/nf-core/fastq_align_hisat2' +include { BAM_SORT_STATS_SAMTOOLS } from '../../subworkflows/nf-core/bam_sort_stats_samtools' +include { BAM_MARKDUPLICATES_PICARD } from '../../subworkflows/nf-core/bam_markduplicates_picard' +include { BAM_RSEQC } from '../../subworkflows/nf-core/bam_rseqc' +include { BAM_DEDUP_STATS_SAMTOOLS_UMITOOLS as BAM_DEDUP_STATS_SAMTOOLS_UMITOOLS_GENOME } from '../../subworkflows/nf-core/bam_dedup_stats_samtools_umitools' +include { BAM_DEDUP_STATS_SAMTOOLS_UMITOOLS as BAM_DEDUP_STATS_SAMTOOLS_UMITOOLS_TRANSCRIPTOME } from '../../subworkflows/nf-core/bam_dedup_stats_samtools_umitools' +include { BEDGRAPH_BEDCLIP_BEDGRAPHTOBIGWIG as BEDGRAPH_BEDCLIP_BEDGRAPHTOBIGWIG_FORWARD } from '../../subworkflows/nf-core/bedgraph_bedclip_bedgraphtobigwig' +include { BEDGRAPH_BEDCLIP_BEDGRAPHTOBIGWIG as BEDGRAPH_BEDCLIP_BEDGRAPHTOBIGWIG_REVERSE } from '../../subworkflows/nf-core/bedgraph_bedclip_bedgraphtobigwig' + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + RUN MAIN WORKFLOW +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +// Header files for MultiQC +ch_pca_header_multiqc = file("$projectDir/workflows/rnaseq/assets/multiqc/deseq2_pca_header.txt", checkIfExists: true) +ch_clustering_header_multiqc = file("$projectDir/workflows/rnaseq/assets/multiqc/deseq2_clustering_header.txt", checkIfExists: true) +ch_biotypes_header_multiqc = file("$projectDir/workflows/rnaseq/assets/multiqc/biotypes_header.txt", checkIfExists: true) +ch_dummy_file = ch_pca_header_multiqc + +workflow NFCORE_RNASEQ { + + take: + ch_samplesheet // channel: samplesheet read in from --input + ch_versions // channel: [ path(versions.yml) ] + ch_fasta // channel: path(genome.fasta) + ch_gtf // channel: path(genome.gtf) + ch_fai // channel: path(genome.fai) + ch_chrom_sizes // channel: path(genome.sizes) + ch_gene_bed // channel: path(gene.bed) + ch_transcript_fasta // channel: path(transcript.fasta) + ch_star_index // channel: path(star/index/) + ch_rsem_index // channel: path(rsem/index/) + ch_hisat2_index // channel: path(hisat2/index/) + ch_salmon_index // channel: path(salmon/index/) + ch_kallisto_index // channel: [ meta, path(kallisto/index/) ] + ch_bbsplit_index // channel: path(bbsplit/index/) + ch_splicesites // channel: path(genome.splicesites.txt) + + main: + + ch_multiqc_files = Channel.empty() + + // + // Create separate channels for samples that have single/multiple FastQ files to merge + // + ch_samplesheet + .branch { + meta, fastqs -> + single : fastqs.size() == 1 + return [ meta, fastqs.flatten() ] + multiple: fastqs.size() > 1 + return [ meta, fastqs.flatten() ] + } + .set { ch_fastq } + + // + // MODULE: Concatenate FastQ files from same sample if required + // + CAT_FASTQ ( + ch_fastq.multiple + ) + .reads + .mix(ch_fastq.single) + .set { ch_cat_fastq } + ch_versions = ch_versions.mix(CAT_FASTQ.out.versions.first().ifEmpty(null)) + + // + // SUBWORKFLOW: Read QC, extract UMI and trim adapters with TrimGalore! + // + ch_filtered_reads = Channel.empty() + ch_trim_read_count = Channel.empty() + if (params.trimmer == 'trimgalore') { + FASTQ_FASTQC_UMITOOLS_TRIMGALORE ( + ch_cat_fastq, + params.skip_fastqc || params.skip_qc, + params.with_umi, + params.skip_umi_extract, + params.skip_trimming, + params.umi_discard_read, + params.min_trimmed_reads + ) + ch_filtered_reads = FASTQ_FASTQC_UMITOOLS_TRIMGALORE.out.reads + ch_trim_read_count = FASTQ_FASTQC_UMITOOLS_TRIMGALORE.out.trim_read_count + ch_multiqc_files = ch_multiqc_files.mix(FASTQ_FASTQC_UMITOOLS_TRIMGALORE.out.fastqc_zip.collect{it[1]}) + ch_multiqc_files = ch_multiqc_files.mix(FASTQ_FASTQC_UMITOOLS_TRIMGALORE.out.trim_zip.collect{it[1]}) + ch_multiqc_files = ch_multiqc_files.mix(FASTQ_FASTQC_UMITOOLS_TRIMGALORE.out.trim_log.collect{it[1]}) + ch_versions = ch_versions.mix(FASTQ_FASTQC_UMITOOLS_TRIMGALORE.out.versions) + } + + // + // SUBWORKFLOW: Read QC, extract UMI and trim adapters with fastp + // + if (params.trimmer == 'fastp') { + FASTQ_FASTQC_UMITOOLS_FASTP ( + ch_cat_fastq, + params.skip_fastqc || params.skip_qc, + params.with_umi, + params.skip_umi_extract, + params.umi_discard_read, + params.skip_trimming, + [], + params.save_trimmed, + params.save_trimmed, + params.min_trimmed_reads + ) + ch_filtered_reads = FASTQ_FASTQC_UMITOOLS_FASTP.out.reads + ch_trim_read_count = FASTQ_FASTQC_UMITOOLS_FASTP.out.trim_read_count + ch_multiqc_files = ch_multiqc_files.mix(FASTQ_FASTQC_UMITOOLS_FASTP.out.fastqc_raw_zip.collect{it[1]}) + ch_multiqc_files = ch_multiqc_files.mix(FASTQ_FASTQC_UMITOOLS_FASTP.out.fastqc_trim_zip.collect{it[1]}) + ch_multiqc_files = ch_multiqc_files.mix(FASTQ_FASTQC_UMITOOLS_FASTP.out.trim_json.collect{it[1]}) + ch_versions = ch_versions.mix(FASTQ_FASTQC_UMITOOLS_FASTP.out.versions) + } + + // + // Get list of samples that failed trimming threshold for MultiQC report + // + ch_trim_read_count + .map { + meta, num_reads -> + if (num_reads <= params.min_trimmed_reads.toFloat()) { + return [ "$meta.id\t$num_reads" ] + } + } + .collect() + .map { + tsv_data -> + def header = ["Sample", "Reads after trimming"] + multiqcTsvFromList(tsv_data, header) + } + .set { ch_fail_trimming_multiqc } + ch_multiqc_files = ch_multiqc_files.mix(ch_fail_trimming_multiqc.collectFile(name: 'fail_trimmed_samples_mqc.tsv')) + + // + // MODULE: Remove genome contaminant reads + // + if (!params.skip_bbsplit) { + BBMAP_BBSPLIT ( + ch_filtered_reads, + ch_bbsplit_index, + [], + [ [], [] ], + false + ) + .primary_fastq + .set { ch_filtered_reads } + ch_versions = ch_versions.mix(BBMAP_BBSPLIT.out.versions.first()) + } + + // + // MODULE: Remove ribosomal RNA reads + // + if (params.remove_ribo_rna) { + ch_ribo_db = file(params.ribo_database_manifest) + ch_sortmerna_fastas = Channel.from(ch_ribo_db.readLines()).map { row -> file(row, checkIfExists: true) }.collect() + + SORTMERNA ( + ch_filtered_reads, + ch_sortmerna_fastas + ) + .reads + .set { ch_filtered_reads } + + ch_multiqc_files = ch_multiqc_files.mix(SORTMERNA.out.log.collect{it[1]}) + ch_versions = ch_versions.mix(SORTMERNA.out.versions.first()) + } + + // + // SUBWORKFLOW: Sub-sample FastQ files and pseudoalign with Salmon to auto-infer strandedness + // + + // Branch FastQ channels if 'auto' specified to infer strandedness + ch_filtered_reads + .branch { + meta, fastq -> + auto_strand : meta.strandedness == 'auto' + return [ meta, fastq ] + known_strand: meta.strandedness != 'auto' + return [ meta, fastq ] + } + .set { ch_strand_fastq } + + // Return empty channel if ch_strand_fastq.auto_strand is empty so salmon index isn't created + ch_fasta + .combine(ch_strand_fastq.auto_strand) + .map { it.first() } + .first() + .set { ch_genome_fasta } + + def prepare_tool_indices = [] + if (!params.skip_pseudo_alignment && params.pseudo_aligner) { + prepare_tool_indices << params.pseudo_aligner + } + FASTQ_SUBSAMPLE_FQ_SALMON ( + ch_strand_fastq.auto_strand, + ch_genome_fasta, + ch_transcript_fasta, + ch_gtf, + ch_salmon_index, + !params.salmon_index && !('salmon' in prepare_tool_indices) + ) + ch_versions = ch_versions.mix(FASTQ_SUBSAMPLE_FQ_SALMON.out.versions) + + FASTQ_SUBSAMPLE_FQ_SALMON + .out + .json_info + .join(ch_strand_fastq.auto_strand) + .map { meta, json, reads -> + return [ meta + [ strandedness: getSalmonInferredStrandedness(json) ], reads ] + } + .mix(ch_strand_fastq.known_strand) + .set { ch_strand_inferred_filtered_fastq } + + // + // SUBWORKFLOW: Alignment with STAR and gene/transcript quantification with Salmon + // + ch_genome_bam = Channel.empty() + ch_genome_bam_index = Channel.empty() + ch_star_log = Channel.empty() + if (!params.skip_alignment && params.aligner == 'star_salmon') { + // Check if an AWS iGenome has been provided to use the appropriate version of STAR + def is_aws_igenome = false + if (params.fasta && params.gtf) { + if ((file(params.fasta).getName() - '.gz' == 'genome.fa') && (file(params.gtf).getName() - '.gz' == 'genes.gtf')) { + is_aws_igenome = true + } + } + + ALIGN_STAR ( + ch_strand_inferred_filtered_fastq, + ch_star_index.map { [ [:], it ] }, + ch_gtf.map { [ [:], it ] }, + params.star_ignore_sjdbgtf, + '', + params.seq_center ?: '', + is_aws_igenome, + ch_fasta.map { [ [:], it ] } + ) + ch_genome_bam = ALIGN_STAR.out.bam + ch_genome_bam_index = ALIGN_STAR.out.bai + ch_transcriptome_bam = ALIGN_STAR.out.bam_transcript + ch_star_log = ALIGN_STAR.out.log_final + ch_multiqc_files = ch_multiqc_files.mix(ALIGN_STAR.out.stats.collect{it[1]}) + ch_multiqc_files = ch_multiqc_files.mix(ALIGN_STAR.out.flagstat.collect{it[1]}) + ch_multiqc_files = ch_multiqc_files.mix(ALIGN_STAR.out.idxstats.collect{it[1]}) + ch_multiqc_files = ch_multiqc_files.mix(ch_star_log.collect{it[1]}) + + if (params.bam_csi_index) { + ch_genome_bam_index = ALIGN_STAR.out.csi + } + ch_versions = ch_versions.mix(ALIGN_STAR.out.versions) + + // + // SUBWORKFLOW: Remove duplicate reads from BAM file based on UMIs + // + if (params.with_umi) { + // Deduplicate genome BAM file before downstream analysis + BAM_DEDUP_STATS_SAMTOOLS_UMITOOLS_GENOME ( + ch_genome_bam.join(ch_genome_bam_index, by: [0]), + params.umitools_dedup_stats + ) + ch_genome_bam = BAM_DEDUP_STATS_SAMTOOLS_UMITOOLS_GENOME.out.bam + ch_genome_bam_index = BAM_DEDUP_STATS_SAMTOOLS_UMITOOLS_GENOME.out.bai + ch_multiqc_files = ch_multiqc_files.mix(BAM_DEDUP_STATS_SAMTOOLS_UMITOOLS_GENOME.out.stats.collect{it[1]}) + ch_multiqc_files = ch_multiqc_files.mix(BAM_DEDUP_STATS_SAMTOOLS_UMITOOLS_GENOME.out.flagstat.collect{it[1]}) + ch_multiqc_files = ch_multiqc_files.mix(BAM_DEDUP_STATS_SAMTOOLS_UMITOOLS_GENOME.out.idxstats.collect{it[1]}) + + if (params.bam_csi_index) { + ch_genome_bam_index = BAM_DEDUP_STATS_SAMTOOLS_UMITOOLS_GENOME.out.csi + } + ch_versions = ch_versions.mix(BAM_DEDUP_STATS_SAMTOOLS_UMITOOLS_GENOME.out.versions) + + // Co-ordinate sort, index and run stats on transcriptome BAM + BAM_SORT_STATS_SAMTOOLS ( + ch_transcriptome_bam, + ch_fasta.map { [ [:], it ] } + ) + ch_transcriptome_sorted_bam = BAM_SORT_STATS_SAMTOOLS.out.bam + ch_transcriptome_sorted_bai = BAM_SORT_STATS_SAMTOOLS.out.bai + + // Deduplicate transcriptome BAM file before read counting with Salmon + BAM_DEDUP_STATS_SAMTOOLS_UMITOOLS_TRANSCRIPTOME ( + ch_transcriptome_sorted_bam.join(ch_transcriptome_sorted_bai, by: [0]), + params.umitools_dedup_stats + ) + + // Name sort BAM before passing to Salmon + SAMTOOLS_SORT ( + BAM_DEDUP_STATS_SAMTOOLS_UMITOOLS_TRANSCRIPTOME.out.bam + ) + + // Only run prepare_for_rsem.py on paired-end BAM files + SAMTOOLS_SORT + .out + .bam + .branch { + meta, bam -> + single_end: meta.single_end + return [ meta, bam ] + paired_end: !meta.single_end + return [ meta, bam ] + } + .set { ch_umitools_dedup_bam } + + // Fix paired-end reads in name sorted BAM file + // See: https://github.com/nf-core/rnaseq/issues/828 + UMITOOLS_PREPAREFORSALMON ( + ch_umitools_dedup_bam.paired_end + ) + ch_versions = ch_versions.mix(UMITOOLS_PREPAREFORSALMON.out.versions.first()) + + ch_umitools_dedup_bam + .single_end + .mix(UMITOOLS_PREPAREFORSALMON.out.bam) + .set { ch_transcriptome_bam } + } + + // + // SUBWORKFLOW: Count reads from BAM alignments using Salmon + // + QUANTIFY_STAR_SALMON ( + ch_transcriptome_bam, + ch_dummy_file, + ch_transcript_fasta, + ch_gtf, + 'salmon', + true, + params.salmon_quant_libtype ?: '', + params.kallisto_quant_fraglen, + params.kallisto_quant_fraglen_sd + ) + ch_versions = ch_versions.mix(QUANTIFY_STAR_SALMON.out.versions) + + if (!params.skip_qc & !params.skip_deseq2_qc) { + DESEQ2_QC_STAR_SALMON ( + QUANTIFY_STAR_SALMON.out.counts_gene_length_scaled, + ch_pca_header_multiqc, + ch_clustering_header_multiqc + ) + ch_multiqc_files = ch_multiqc_files.mix(DESEQ2_QC_STAR_SALMON.out.pca_multiqc.collect()) + ch_multiqc_files = ch_multiqc_files.mix(DESEQ2_QC_STAR_SALMON.out.dists_multiqc.collect()) + ch_versions = ch_versions.mix(DESEQ2_QC_STAR_SALMON.out.versions) + } + } + + // + // SUBWORKFLOW: Alignment with STAR and gene/transcript quantification with RSEM + // + if (!params.skip_alignment && params.aligner == 'star_rsem') { + QUANTIFY_RSEM ( + ch_strand_inferred_filtered_fastq, + ch_rsem_index, + ch_fasta.map { [ [:], it ] } + ) + ch_genome_bam = QUANTIFY_RSEM.out.bam + ch_genome_bam_index = QUANTIFY_RSEM.out.bai + ch_star_log = QUANTIFY_RSEM.out.logs + ch_multiqc_files = ch_multiqc_files.mix(QUANTIFY_RSEM.out.stats.collect{it[1]}) + ch_multiqc_files = ch_multiqc_files.mix(QUANTIFY_RSEM.out.flagstat.collect{it[1]}) + ch_multiqc_files = ch_multiqc_files.mix(QUANTIFY_RSEM.out.idxstats.collect{it[1]}) + ch_multiqc_files = ch_multiqc_files.mix(ch_star_log.collect{it[1]}) + ch_multiqc_files = ch_multiqc_files.mix(QUANTIFY_RSEM.out.stat.collect{it[1]}) + + if (params.bam_csi_index) { + ch_genome_bam_index = QUANTIFY_RSEM.out.csi + } + ch_versions = ch_versions.mix(QUANTIFY_RSEM.out.versions) + + if (!params.skip_qc & !params.skip_deseq2_qc) { + DESEQ2_QC_RSEM ( + QUANTIFY_RSEM.out.merged_counts_gene, + ch_pca_header_multiqc, + ch_clustering_header_multiqc + ) + ch_multiqc_files = ch_multiqc_files.mix(DESEQ2_QC_RSEM.out.pca_multiqc.collect()) + ch_multiqc_files = ch_multiqc_files.mix(DESEQ2_QC_RSEM.out.dists_multiqc.collect()) + ch_versions = ch_versions.mix(DESEQ2_QC_RSEM.out.versions) + } + } + + // + // SUBWORKFLOW: Alignment with HISAT2 + // + if (!params.skip_alignment && params.aligner == 'hisat2') { + FASTQ_ALIGN_HISAT2 ( + ch_strand_inferred_filtered_fastq, + ch_hisat2_index.map { [ [:], it ] }, + ch_splicesites.map { [ [:], it ] }, + ch_fasta.map { [ [:], it ] } + ) + ch_genome_bam = FASTQ_ALIGN_HISAT2.out.bam + ch_genome_bam_index = FASTQ_ALIGN_HISAT2.out.bai + ch_multiqc_files = ch_multiqc_files.mix(FASTQ_ALIGN_HISAT2.out.stats.collect{it[1]}) + ch_multiqc_files = ch_multiqc_files.mix(FASTQ_ALIGN_HISAT2.out.flagstat.collect{it[1]}) + ch_multiqc_files = ch_multiqc_files.mix(FASTQ_ALIGN_HISAT2.out.idxstats.collect{it[1]}) + ch_multiqc_files = ch_multiqc_files.mix(FASTQ_ALIGN_HISAT2.out.summary.collect{it[1]}) + + if (params.bam_csi_index) { + ch_genome_bam_index = FASTQ_ALIGN_HISAT2.out.csi + } + ch_versions = ch_versions.mix(FASTQ_ALIGN_HISAT2.out.versions) + + // + // SUBWORKFLOW: Remove duplicate reads from BAM file based on UMIs + // + if (params.with_umi) { + BAM_DEDUP_STATS_SAMTOOLS_UMITOOLS_GENOME ( + ch_genome_bam.join(ch_genome_bam_index, by: [0]), + params.umitools_dedup_stats + ) + ch_genome_bam = BAM_DEDUP_STATS_SAMTOOLS_UMITOOLS_GENOME.out.bam + ch_genome_bam_index = BAM_DEDUP_STATS_SAMTOOLS_UMITOOLS_GENOME.out.bai + ch_multiqc_files = ch_multiqc_files.mix(BAM_DEDUP_STATS_SAMTOOLS_UMITOOLS_GENOME.out.stats.collect{it[1]}) + ch_multiqc_files = ch_multiqc_files.mix(BAM_DEDUP_STATS_SAMTOOLS_UMITOOLS_GENOME.out.flagstat.collect{it[1]}) + ch_multiqc_files = ch_multiqc_files.mix(BAM_DEDUP_STATS_SAMTOOLS_UMITOOLS_GENOME.out.idxstats.collect{it[1]}) + if (params.bam_csi_index) { + ch_genome_bam_index = BAM_DEDUP_STATS_SAMTOOLS_UMITOOLS_GENOME.out.csi + } + ch_versions = ch_versions.mix(BAM_DEDUP_STATS_SAMTOOLS_UMITOOLS_GENOME.out.versions) + } + } + + // + // Filter channels to get samples that passed STAR minimum mapping percentage + // + if (!params.skip_alignment && params.aligner.contains('star')) { + ch_star_log + .map { meta, align_log -> [ meta ] + getStarPercentMapped(params, align_log) } + .set { ch_percent_mapped } + + ch_genome_bam + .join(ch_percent_mapped, by: [0]) + .map { meta, ofile, mapped, pass -> if (pass) [ meta, ofile ] } + .set { ch_genome_bam } + + ch_genome_bam_index + .join(ch_percent_mapped, by: [0]) + .map { meta, ofile, mapped, pass -> if (pass) [ meta, ofile ] } + .set { ch_genome_bam_index } + + ch_percent_mapped + .branch { meta, mapped, pass -> + pass: pass + return [ "$meta.id\t$mapped" ] + fail: !pass + return [ "$meta.id\t$mapped" ] + } + .set { ch_pass_fail_mapped } + + ch_pass_fail_mapped + .fail + .collect() + .map { + tsv_data -> + def header = ["Sample", "STAR uniquely mapped reads (%)"] + multiqcTsvFromList(tsv_data, header) + } + .set { ch_fail_mapping_multiqc } + ch_multiqc_files = ch_multiqc_files.mix(ch_fail_mapping_multiqc.collectFile(name: 'fail_mapped_samples_mqc.tsv')) + } + + // + // MODULE: Run Preseq + // + if (!params.skip_alignment && !params.skip_qc && !params.skip_preseq) { + PRESEQ_LCEXTRAP ( + ch_genome_bam + ) + ch_multiqc_files = ch_multiqc_files.mix(PRESEQ_LCEXTRAP.out.lc_extrap.collect{it[1]}) + ch_versions = ch_versions.mix(PRESEQ_LCEXTRAP.out.versions.first()) + } + + // + // SUBWORKFLOW: Mark duplicate reads + // + if (!params.skip_alignment && !params.skip_markduplicates && !params.with_umi) { + BAM_MARKDUPLICATES_PICARD ( + ch_genome_bam, + ch_fasta.map { [ [:], it ] }, + ch_fai.map { [ [:], it ] } + ) + ch_genome_bam = BAM_MARKDUPLICATES_PICARD.out.bam + ch_genome_bam_index = BAM_MARKDUPLICATES_PICARD.out.bai + ch_multiqc_files = ch_multiqc_files.mix(BAM_MARKDUPLICATES_PICARD.out.stats.collect{it[1]}) + ch_multiqc_files = ch_multiqc_files.mix(BAM_MARKDUPLICATES_PICARD.out.flagstat.collect{it[1]}) + ch_multiqc_files = ch_multiqc_files.mix(BAM_MARKDUPLICATES_PICARD.out.idxstats.collect{it[1]}) + ch_multiqc_files = ch_multiqc_files.mix(BAM_MARKDUPLICATES_PICARD.out.metrics.collect{it[1]}) + + if (params.bam_csi_index) { + ch_genome_bam_index = BAM_MARKDUPLICATES_PICARD.out.csi + } + ch_versions = ch_versions.mix(BAM_MARKDUPLICATES_PICARD.out.versions) + } + + // + // MODULE: STRINGTIE + // + if (!params.skip_alignment && !params.skip_stringtie) { + STRINGTIE_STRINGTIE ( + ch_genome_bam, + ch_gtf + ) + ch_versions = ch_versions.mix(STRINGTIE_STRINGTIE.out.versions.first()) + } + + // + // MODULE: Feature biotype QC using featureCounts + // + def biotype = params.gencode ? "gene_type" : params.featurecounts_group_type + if (!params.skip_alignment && !params.skip_qc && !params.skip_biotype_qc && biotype) { + + ch_gtf + .map { biotypeInGtf(it, biotype) } + .set { biotype_in_gtf } + + // Prevent any samples from running if GTF file doesn't have a valid biotype + ch_genome_bam + .combine(ch_gtf) + .combine(biotype_in_gtf) + .filter { it[-1] } + .map { it[0.. 0) { + BAM_RSEQC ( + ch_genome_bam.join(ch_genome_bam_index, by: [0]), + ch_gene_bed, + rseqc_modules + ) + ch_multiqc_files = ch_multiqc_files.mix(BAM_RSEQC.out.bamstat_txt.collect{it[1]}) + ch_multiqc_files = ch_multiqc_files.mix(BAM_RSEQC.out.inferexperiment_txt.collect{it[1]}) + ch_multiqc_files = ch_multiqc_files.mix(BAM_RSEQC.out.innerdistance_freq.collect{it[1]}) + ch_multiqc_files = ch_multiqc_files.mix(BAM_RSEQC.out.junctionannotation_log.collect{it[1]}) + ch_multiqc_files = ch_multiqc_files.mix(BAM_RSEQC.out.junctionsaturation_rscript.collect{it[1]}) + ch_multiqc_files = ch_multiqc_files.mix(BAM_RSEQC.out.readdistribution_txt.collect{it[1]}) + ch_multiqc_files = ch_multiqc_files.mix(BAM_RSEQC.out.readduplication_pos_xls.collect{it[1]}) + ch_multiqc_files = ch_multiqc_files.mix(BAM_RSEQC.out.tin_txt.collect{it[1]}) + ch_versions = ch_versions.mix(BAM_RSEQC.out.versions) + + BAM_RSEQC + .out + .inferexperiment_txt + .map { + meta, strand_log -> + def inferred_strand = getInferexperimentStrandedness(strand_log, 30) + if (meta.strandedness != inferred_strand[0]) { + return [ "$meta.id\t$meta.strandedness\t${inferred_strand.join('\t')}" ] + } + } + .collect() + .map { + tsv_data -> + def header = [ + "Sample", + "Provided strandedness", + "Inferred strandedness", + "Sense (%)", + "Antisense (%)", + "Undetermined (%)" + ] + multiqcTsvFromList(tsv_data, header) + } + .set { ch_fail_strand_multiqc } + ch_multiqc_files = ch_multiqc_files.mix(ch_fail_strand_multiqc.collectFile(name: 'fail_strand_check_mqc.tsv')) + } + } + + // + // SUBWORKFLOW: Pseudoalignment and quantification with Salmon + // + if (!params.skip_pseudo_alignment && params.pseudo_aligner) { + + if (params.pseudo_aligner == 'salmon') { + ch_pseudo_index = ch_salmon_index + } else { + ch_pseudo_index = ch_kallisto_index + } + + QUANTIFY_PSEUDO_ALIGNMENT ( + ch_strand_inferred_filtered_fastq, + ch_pseudo_index, + ch_dummy_file, + ch_gtf, + params.pseudo_aligner, + false, + params.salmon_quant_libtype ?: '', + params.kallisto_quant_fraglen, + params.kallisto_quant_fraglen_sd + ) + ch_counts_gene_length_scaled = QUANTIFY_PSEUDO_ALIGNMENT.out.counts_gene_length_scaled + ch_multiqc_files = ch_multiqc_files.mix(QUANTIFY_PSEUDO_ALIGNMENT.out.multiqc.collect{it[1]}) + ch_versions = ch_versions.mix(QUANTIFY_PSEUDO_ALIGNMENT.out.versions) + + if (!params.skip_qc & !params.skip_deseq2_qc) { + DESEQ2_QC_PSEUDO ( + ch_counts_gene_length_scaled, + ch_pca_header_multiqc, + ch_clustering_header_multiqc + ) + ch_multiqc_files = ch_multiqc_files.mix(DESEQ2_QC_PSEUDO.out.pca_multiqc.collect()) + ch_multiqc_files = ch_multiqc_files.mix(DESEQ2_QC_PSEUDO.out.dists_multiqc.collect()) + ch_versions = ch_versions.mix(DESEQ2_QC_PSEUDO.out.versions) + } + } + + // + // Collate and save software versions + // + softwareVersionsToYAML(ch_versions) + .collectFile(storeDir: "${params.outdir}/pipeline_info", name: 'nf_core_rnaseq_software_mqc_versions.yml', sort: true, newLine: true) + .set { ch_collated_versions } + + // + // MODULE: MultiQC + // + ch_multiqc_report = Channel.empty() + if (!params.skip_multiqc) { + ch_multiqc_config = Channel.fromPath("$projectDir/workflows/rnaseq/assets/multiqc/multiqc_config.yml", checkIfExists: true) + ch_multiqc_custom_config = params.multiqc_config ? Channel.fromPath(params.multiqc_config) : Channel.empty() + ch_multiqc_logo = params.multiqc_logo ? Channel.fromPath(params.multiqc_logo) : Channel.empty() + summary_params = paramsSummaryMap(workflow, parameters_schema: "nextflow_schema.json") + ch_workflow_summary = Channel.value(paramsSummaryMultiqc(summary_params)) + ch_multiqc_files = ch_multiqc_files.mix(ch_workflow_summary.collectFile(name: 'workflow_summary_mqc.yaml')) + ch_multiqc_files = ch_multiqc_files.mix(ch_collated_versions) + + MULTIQC ( + ch_multiqc_files.collect(), + ch_multiqc_config.toList(), + ch_multiqc_custom_config.toList(), + ch_multiqc_logo.toList() + ) + ch_multiqc_report = MULTIQC.out.report + } + + emit: + multiqc_report = ch_multiqc_report // channel: /path/to/multiqc_report.html + versions = ch_versions // channel: [ path(versions.yml) ] +} + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + THE END +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/