diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index acac15a769..53d55183c0 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -61,6 +61,8 @@ jobs: - tags: "sentieon/haplotyper" - tags: "sentieon/haplotyper_joint_germline" - tags: "sentieon/haplotyper_skip_filter" + - NXF_VER: "latest-everything" + tags: "joint_germline" env: NXF_ANSI_LOG: false TEST_DATA_BASE: "${{ github.workspace }}/test-datasets" @@ -217,7 +219,6 @@ jobs: - tags: "bwamem2/mem" - tags: "cat/cat" - tags: "cat/fastq" - - tags: "custom/dumpsoftwareversions" - tags: "dragmap/align" - tags: "fastp" - tags: "fastqc" @@ -230,8 +231,8 @@ jobs: - tags: "multiqc" - tags: "samtools/mpileup" - tags: "samtools/stats" - - tags: "untar" - tags: "subworkflows/utils_nfvalidation_plugin" + - tags: "untar" env: NXF_ANSI_LOG: false TEST_DATA_BASE: "${{ github.workspace }}/test-datasets" diff --git a/CHANGELOG.md b/CHANGELOG.md index c3cab8fd7f..92b8e51ce3 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -25,9 +25,11 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - [#1360](https://github.com/nf-core/sarek/pull/1360) - Sync `TEMPLATE` with `tools` `2.11` - [#1408](https://github.com/nf-core/sarek/pull/1408), [#1412](https://github.com/nf-core/sarek/pull/1412) - Updating samtools to v1.19.2 - except in GATK/markduplicates. (Temporarily disabled nf-test for bwamem2/mem.) - [#1411](https://github.com/nf-core/sarek/pull/1411) - Temporarily disable sentieon related tests +- [#1414](https://github.com/nf-core/sarek/pull/1414) - Sync `TEMPLATE` with `tools` `2.13` - [#1419](https://github.com/nf-core/sarek/pull/1419) - Updating GATK to v4.5, and updating samtools to v1.19.2 in GATK/markduplicates. - [#1426](https://github.com/nf-core/sarek/pull/1426) - Updating certain modules in order to fix the testdata-path in the nf-tests of those modules. Setting Docker runOptions for params.use_gatk_spark. - [#1428](https://github.com/nf-core/sarek/pull/1428) - Sync `TEMPLATE` with `tools` `2.13.1` +- [#1422](https://github.com/nf-core/sarek/pull/1422) - Refactoring following `TEMPLATE` sync with `tools` `2.13` - [#1431](https://github.com/nf-core/sarek/pull/1431) - Using docker.containerOptions instead of docker.runOptions. Clearing containerOptions for SPARK modules for any kind of supported container engine. ### Fixed @@ -43,6 +45,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - [#1390](https://github.com/nf-core/sarek/pull/1390) - Fix badges in README - [#1400](https://github.com/nf-core/sarek/pull/1400) - Fixed input channel for ASSESS_SIGNIFICANCE module, updated makegraph to makegraph2. - [#1403](https://github.com/nf-core/sarek/pull/1403) - Fix intervals usage with dot in chromosome names +- [#1407](https://github.com/nf-core/sarek/pull/1407) - Fix CI tests name +- [#1420](https://github.com/nf-core/sarek/pull/1420) - Make `-a` a default argument for `bcftools` concat +- [#1422](https://github.com/nf-core/sarek/pull/1422) - Fix `Cannot serialize context map` warning ### Removed diff --git a/lib/NfcoreTemplate.groovy b/lib/NfcoreTemplate.groovy deleted file mode 100755 index 755ee64d44..0000000000 --- a/lib/NfcoreTemplate.groovy +++ /dev/null @@ -1,364 +0,0 @@ -// -// This file holds several functions used within the nf-core pipeline template. -// - -import org.yaml.snakeyaml.Yaml -import groovy.json.JsonOutput -import nextflow.extension.FilesEx - -class NfcoreTemplate { - - // - // Check AWS Batch related parameters have been specified correctly - // - public static void awsBatch(workflow, params) { - if (workflow.profile.contains('awsbatch')) { - // Check params.awsqueue and params.awsregion have been set if running on AWSBatch - assert (params.awsqueue && params.awsregion) : "Specify correct --awsqueue and --awsregion parameters on AWSBatch!" - // Check outdir paths to be S3 buckets if running on AWSBatch - assert params.outdir.startsWith('s3:') : "Outdir not on S3 - specify S3 Bucket to run on AWSBatch!" - } - } - - // - // Warn if a -profile or Nextflow config has not been provided to run the pipeline - // - public static void checkConfigProvided(workflow, log) { - if (workflow.profile == 'standard' && workflow.configFiles.size() <= 1) { - log.warn "[$workflow.manifest.name] You are attempting to run the pipeline without any custom configuration!\n\n" + - "This will be dependent on your local compute environment but can be achieved via one or more of the following:\n" + - " (1) Using an existing pipeline profile e.g. `-profile docker` or `-profile singularity`\n" + - " (2) Using an existing nf-core/configs for your Institution e.g. `-profile crick` or `-profile uppmax`\n" + - " (3) Using your own local custom config e.g. `-c /path/to/your/custom.config`\n\n" + - "Please refer to the quick start section and usage docs for the pipeline.\n " - } - } - - // - // Generate version string - // - public static String version(workflow) { - String version_string = "" - - if (workflow.manifest.version) { - def prefix_v = workflow.manifest.version[0] != 'v' ? 'v' : '' - version_string += "${prefix_v}${workflow.manifest.version}" - } - - if (workflow.commitId) { - def git_shortsha = workflow.commitId.substring(0, 7) - version_string += "-g${git_shortsha}" - } - - return version_string - } - - // - // Construct and send completion email - // - public static void email(workflow, params, summary_params, projectDir, log, multiqc_report=[]) { - - // Set up the e-mail variables - def subject = "[$workflow.manifest.name] Successful: $workflow.runName" - if (!workflow.success) { - subject = "[$workflow.manifest.name] FAILED: $workflow.runName" - } - - def summary = [:] - for (group in summary_params.keySet()) { - summary << summary_params[group] - } - - def misc_fields = [:] - misc_fields['Date Started'] = workflow.start - misc_fields['Date Completed'] = workflow.complete - misc_fields['Pipeline script file path'] = workflow.scriptFile - misc_fields['Pipeline script hash ID'] = workflow.scriptId - if (workflow.repository) misc_fields['Pipeline repository Git URL'] = workflow.repository - if (workflow.commitId) misc_fields['Pipeline repository Git Commit'] = workflow.commitId - if (workflow.revision) misc_fields['Pipeline Git branch/tag'] = workflow.revision - misc_fields['Nextflow Version'] = workflow.nextflow.version - misc_fields['Nextflow Build'] = workflow.nextflow.build - misc_fields['Nextflow Compile Timestamp'] = workflow.nextflow.timestamp - - def email_fields = [:] - email_fields['version'] = NfcoreTemplate.version(workflow) - email_fields['runName'] = workflow.runName - email_fields['success'] = workflow.success - email_fields['dateComplete'] = workflow.complete - email_fields['duration'] = workflow.duration - email_fields['exitStatus'] = workflow.exitStatus - email_fields['errorMessage'] = (workflow.errorMessage ?: 'None') - email_fields['errorReport'] = (workflow.errorReport ?: 'None') - email_fields['commandLine'] = workflow.commandLine - email_fields['projectDir'] = workflow.projectDir - email_fields['summary'] = summary << misc_fields - - // On success try attach the multiqc report - def mqc_report = null - try { - if (workflow.success) { - mqc_report = multiqc_report.getVal() - if (mqc_report.getClass() == ArrayList && mqc_report.size() >= 1) { - if (mqc_report.size() > 1) { - log.warn "[$workflow.manifest.name] Found multiple reports from process 'MULTIQC', will use only one" - } - mqc_report = mqc_report[0] - } - } - } catch (all) { - if (multiqc_report) { - log.warn "[$workflow.manifest.name] Could not attach MultiQC report to summary email" - } - } - - // Check if we are only sending emails on failure - def email_address = params.email - if (!params.email && params.email_on_fail && !workflow.success) { - email_address = params.email_on_fail - } - - // Render the TXT template - def engine = new groovy.text.GStringTemplateEngine() - def tf = new File("$projectDir/assets/email_template.txt") - def txt_template = engine.createTemplate(tf).make(email_fields) - def email_txt = txt_template.toString() - - // Render the HTML template - def hf = new File("$projectDir/assets/email_template.html") - def html_template = engine.createTemplate(hf).make(email_fields) - def email_html = html_template.toString() - - // Render the sendmail template - def max_multiqc_email_size = (params.containsKey('max_multiqc_email_size') ? params.max_multiqc_email_size : 0) as nextflow.util.MemoryUnit - def smail_fields = [ email: email_address, subject: subject, email_txt: email_txt, email_html: email_html, projectDir: "$projectDir", mqcFile: mqc_report, mqcMaxSize: max_multiqc_email_size.toBytes() ] - def sf = new File("$projectDir/assets/sendmail_template.txt") - def sendmail_template = engine.createTemplate(sf).make(smail_fields) - def sendmail_html = sendmail_template.toString() - - // Send the HTML e-mail - Map colors = logColours(params.monochrome_logs) - if (email_address) { - try { - if (params.plaintext_email) { throw GroovyException('Send plaintext e-mail, not HTML') } - // Try to send HTML e-mail using sendmail - def sendmail_tf = new File(workflow.launchDir.toString(), ".sendmail_tmp.html") - sendmail_tf.withWriter { w -> w << sendmail_html } - [ 'sendmail', '-t' ].execute() << sendmail_html - log.info "-${colors.purple}[$workflow.manifest.name]${colors.green} Sent summary e-mail to $email_address (sendmail)-" - } catch (all) { - // Catch failures and try with plaintext - def mail_cmd = [ 'mail', '-s', subject, '--content-type=text/html', email_address ] - if ( mqc_report != null && mqc_report.size() <= max_multiqc_email_size.toBytes() ) { - mail_cmd += [ '-A', mqc_report ] - } - mail_cmd.execute() << email_html - log.info "-${colors.purple}[$workflow.manifest.name]${colors.green} Sent summary e-mail to $email_address (mail)-" - } - } - - // Write summary e-mail HTML to a file - def output_hf = new File(workflow.launchDir.toString(), ".pipeline_report.html") - output_hf.withWriter { w -> w << email_html } - FilesEx.copyTo(output_hf.toPath(), "${params.outdir}/pipeline_info/pipeline_report.html"); - output_hf.delete() - - // Write summary e-mail TXT to a file - def output_tf = new File(workflow.launchDir.toString(), ".pipeline_report.txt") - output_tf.withWriter { w -> w << email_txt } - FilesEx.copyTo(output_tf.toPath(), "${params.outdir}/pipeline_info/pipeline_report.txt"); - output_tf.delete() - } - - // - // Construct and send a notification to a web server as JSON - // e.g. Microsoft Teams and Slack - // - public static void IM_notification(workflow, params, summary_params, projectDir, log) { - def hook_url = params.hook_url - - def summary = [:] - for (group in summary_params.keySet()) { - summary << summary_params[group] - } - - def misc_fields = [:] - misc_fields['start'] = workflow.start - misc_fields['complete'] = workflow.complete - misc_fields['scriptfile'] = workflow.scriptFile - misc_fields['scriptid'] = workflow.scriptId - if (workflow.repository) misc_fields['repository'] = workflow.repository - if (workflow.commitId) misc_fields['commitid'] = workflow.commitId - if (workflow.revision) misc_fields['revision'] = workflow.revision - misc_fields['nxf_version'] = workflow.nextflow.version - misc_fields['nxf_build'] = workflow.nextflow.build - misc_fields['nxf_timestamp'] = workflow.nextflow.timestamp - - def msg_fields = [:] - msg_fields['version'] = NfcoreTemplate.version(workflow) - msg_fields['runName'] = workflow.runName - msg_fields['success'] = workflow.success - msg_fields['dateComplete'] = workflow.complete - msg_fields['duration'] = workflow.duration - msg_fields['exitStatus'] = workflow.exitStatus - msg_fields['errorMessage'] = (workflow.errorMessage ?: 'None') - msg_fields['errorReport'] = (workflow.errorReport ?: 'None') - msg_fields['commandLine'] = workflow.commandLine.replaceFirst(/ +--hook_url +[^ ]+/, "") - msg_fields['projectDir'] = workflow.projectDir - msg_fields['summary'] = summary << misc_fields - - // Render the JSON template - def engine = new groovy.text.GStringTemplateEngine() - // Different JSON depending on the service provider - // Defaults to "Adaptive Cards" (https://adaptivecards.io), except Slack which has its own format - def json_path = hook_url.contains("hooks.slack.com") ? "slackreport.json" : "adaptivecard.json" - def hf = new File("$projectDir/assets/${json_path}") - def json_template = engine.createTemplate(hf).make(msg_fields) - def json_message = json_template.toString() - - // POST - def post = new URL(hook_url).openConnection(); - post.setRequestMethod("POST") - post.setDoOutput(true) - post.setRequestProperty("Content-Type", "application/json") - post.getOutputStream().write(json_message.getBytes("UTF-8")); - def postRC = post.getResponseCode(); - if (! postRC.equals(200)) { - log.warn(post.getErrorStream().getText()); - } - } - - // - // Dump pipeline parameters in a json file - // - public static void dump_parameters(workflow, params) { - def timestamp = new java.util.Date().format( 'yyyy-MM-dd_HH-mm-ss') - def filename = "params_${timestamp}.json" - def temp_pf = new File(workflow.launchDir.toString(), ".${filename}") - def jsonStr = JsonOutput.toJson(params) - temp_pf.text = JsonOutput.prettyPrint(jsonStr) - - FilesEx.copyTo(temp_pf.toPath(), "${params.outdir}/pipeline_info/params_${timestamp}.json") - temp_pf.delete() - } - - - // - // Print pipeline summary on completion - // - public static void summary(workflow, params, log) { - Map colors = logColours(params.monochrome_logs) - if (workflow.success) { - if (workflow.stats.ignoredCount == 0) { - log.info "-${colors.purple}[$workflow.manifest.name]${colors.green} Pipeline completed successfully${colors.reset}-" - } else { - log.info "-${colors.purple}[$workflow.manifest.name]${colors.yellow} Pipeline completed successfully, but with errored process(es) ${colors.reset}-" - } - } else { - log.info "-${colors.purple}[$workflow.manifest.name]${colors.red} Pipeline completed with errors${colors.reset}-" - } - } - - // - // ANSII Colours used for terminal logging - // - public static Map logColours(Boolean monochrome_logs) { - Map colorcodes = [:] - - // Reset / Meta - colorcodes['reset'] = monochrome_logs ? '' : "\033[0m" - colorcodes['bold'] = monochrome_logs ? '' : "\033[1m" - colorcodes['dim'] = monochrome_logs ? '' : "\033[2m" - colorcodes['underlined'] = monochrome_logs ? '' : "\033[4m" - colorcodes['blink'] = monochrome_logs ? '' : "\033[5m" - colorcodes['reverse'] = monochrome_logs ? '' : "\033[7m" - colorcodes['hidden'] = monochrome_logs ? '' : "\033[8m" - - // Regular Colors - colorcodes['black'] = monochrome_logs ? '' : "\033[0;30m" - colorcodes['red'] = monochrome_logs ? '' : "\033[0;31m" - colorcodes['green'] = monochrome_logs ? '' : "\033[0;32m" - colorcodes['yellow'] = monochrome_logs ? '' : "\033[0;33m" - colorcodes['blue'] = monochrome_logs ? '' : "\033[0;34m" - colorcodes['purple'] = monochrome_logs ? '' : "\033[0;35m" - colorcodes['cyan'] = monochrome_logs ? '' : "\033[0;36m" - colorcodes['white'] = monochrome_logs ? '' : "\033[0;37m" - - // Bold - colorcodes['bblack'] = monochrome_logs ? '' : "\033[1;30m" - colorcodes['bred'] = monochrome_logs ? '' : "\033[1;31m" - colorcodes['bgreen'] = monochrome_logs ? '' : "\033[1;32m" - colorcodes['byellow'] = monochrome_logs ? '' : "\033[1;33m" - colorcodes['bblue'] = monochrome_logs ? '' : "\033[1;34m" - colorcodes['bpurple'] = monochrome_logs ? '' : "\033[1;35m" - colorcodes['bcyan'] = monochrome_logs ? '' : "\033[1;36m" - colorcodes['bwhite'] = monochrome_logs ? '' : "\033[1;37m" - - // Underline - colorcodes['ublack'] = monochrome_logs ? '' : "\033[4;30m" - colorcodes['ured'] = monochrome_logs ? '' : "\033[4;31m" - colorcodes['ugreen'] = monochrome_logs ? '' : "\033[4;32m" - colorcodes['uyellow'] = monochrome_logs ? '' : "\033[4;33m" - colorcodes['ublue'] = monochrome_logs ? '' : "\033[4;34m" - colorcodes['upurple'] = monochrome_logs ? '' : "\033[4;35m" - colorcodes['ucyan'] = monochrome_logs ? '' : "\033[4;36m" - colorcodes['uwhite'] = monochrome_logs ? '' : "\033[4;37m" - - // High Intensity - colorcodes['iblack'] = monochrome_logs ? '' : "\033[0;90m" - colorcodes['ired'] = monochrome_logs ? '' : "\033[0;91m" - colorcodes['igreen'] = monochrome_logs ? '' : "\033[0;92m" - colorcodes['iyellow'] = monochrome_logs ? '' : "\033[0;93m" - colorcodes['iblue'] = monochrome_logs ? '' : "\033[0;94m" - colorcodes['ipurple'] = monochrome_logs ? '' : "\033[0;95m" - colorcodes['icyan'] = monochrome_logs ? '' : "\033[0;96m" - colorcodes['iwhite'] = monochrome_logs ? '' : "\033[0;97m" - - // Bold High Intensity - colorcodes['biblack'] = monochrome_logs ? '' : "\033[1;90m" - colorcodes['bired'] = monochrome_logs ? '' : "\033[1;91m" - colorcodes['bigreen'] = monochrome_logs ? '' : "\033[1;92m" - colorcodes['biyellow'] = monochrome_logs ? '' : "\033[1;93m" - colorcodes['biblue'] = monochrome_logs ? '' : "\033[1;94m" - colorcodes['bipurple'] = monochrome_logs ? '' : "\033[1;95m" - colorcodes['bicyan'] = monochrome_logs ? '' : "\033[1;96m" - colorcodes['biwhite'] = monochrome_logs ? '' : "\033[1;97m" - - return colorcodes - } - - // - // Does what is says on the tin - // - public static String dashedLine(monochrome_logs) { - Map colors = logColours(monochrome_logs) - return "-${colors.dim}----------------------------------------------------${colors.reset}-" - } - - // - // nf-core logo - // - public static String logo(workflow, monochrome_logs) { - Map colors = logColours(monochrome_logs) - String workflow_version = NfcoreTemplate.version(workflow) - String.format( - """\n - ${dashedLine(monochrome_logs)} - ${colors.green},--.${colors.black}/${colors.green},-.${colors.reset} - ${colors.blue} ___ __ __ __ ___ ${colors.green}/,-._.--~\'${colors.reset} - ${colors.blue} |\\ | |__ __ / ` / \\ |__) |__ ${colors.yellow}} {${colors.reset} - ${colors.blue} | \\| | \\__, \\__/ | \\ |___ ${colors.green}\\`-._,-`-,${colors.reset} - ${colors.green}`._,._,\'${colors.reset} - ${colors.white} ____${colors.reset} - ${colors.white} .´ _ `.${colors.reset} - ${colors.white} / ${colors.green}|\\${colors.reset}`-_ \\${colors.reset} ${colors.blue} __ __ ___ ${colors.reset} - ${colors.white} | ${colors.green}| \\${colors.reset} `-|${colors.reset} ${colors.blue}|__` /\\ |__) |__ |__/${colors.reset} - ${colors.white} \\ ${colors.green}| \\${colors.reset} /${colors.reset} ${colors.blue}.__| /¯¯\\ | \\ |___ | \\${colors.reset} - ${colors.white} `${colors.green}|${colors.reset}____${colors.green}\\${colors.reset}´${colors.reset} - - ${colors.purple} ${workflow.manifest.name} ${workflow_version}${colors.reset} - ${dashedLine(monochrome_logs)} - """.stripIndent() - ) - } -} diff --git a/lib/WorkflowMain.groovy b/lib/WorkflowMain.groovy deleted file mode 100755 index 847986c3da..0000000000 --- a/lib/WorkflowMain.groovy +++ /dev/null @@ -1,71 +0,0 @@ -// -// This file holds several functions specific to the main.nf workflow in the nf-core/sarek pipeline -// - -import nextflow.Nextflow - -class WorkflowMain { - - // - // Citation string for pipeline - // - public static String citation(workflow) { - return "If you use ${workflow.manifest.name} for your analysis please cite:\n\n" + - "* The pipeline\n" + - " https://doi.org/10.12688/f1000research.16665.2\n" + - " https://doi.org/10.5281/zenodo.3476425\n\n" + - "* The nf-core framework\n" + - " https://doi.org/10.1038/s41587-020-0439-x\n\n" + - "* Software dependencies\n" + - " https://github.com/${workflow.manifest.name}/blob/master/CITATIONS.md" - } - - // - // Validate parameters and print summary to screen - // - public static void initialise(workflow, params, log, args) { - - // Print workflow version and exit on --version - if (params.version) { - String workflow_version = NfcoreTemplate.version(workflow) - log.info "${workflow.manifest.name} ${workflow_version}" - System.exit(0) - } - - // Check that a -profile or Nextflow config has been provided to run the pipeline - NfcoreTemplate.checkConfigProvided(workflow, log) - // Check that the profile doesn't contain spaces and doesn't end with a trailing comma - checkProfile(workflow.profile, args, log) - - // Check AWS batch settings - NfcoreTemplate.awsBatch(workflow, params) - - // Warn that no input was provided - if (!params.input && !params.build_only_index) { - log.warn "No samplesheet specified, attempting to restart from csv files present in ${params.outdir}" - } - } - // - // Get attribute from genome config file e.g. fasta - // - public static Object getGenomeAttribute(params, attribute) { - if (params.genomes && params.genome && params.genomes.containsKey(params.genome)) { - if (params.genomes[ params.genome ].containsKey(attribute)) { - return params.genomes[ params.genome ][ attribute ] - } - } - return null - } - - // - // Exit pipeline if --profile contains spaces - // - private static void checkProfile(profile, args, log) { - if (profile.endsWith(',')) { - Nextflow.error "Profile cannot end with a trailing comma. Please remove the comma from the end of the profile string.\nHint: A common mistake is to provide multiple values to `-profile` separated by spaces. Please use commas to separate profiles instead,e.g., `-profile docker,test`." - } - if (args[0]) { - log.warn "nf-core pipelines do not accept positional arguments. The positional argument `${args[0]}` has been detected.\n Hint: A common mistake is to provide multiple values to `-profile` separated by spaces. Please use commas to separate profiles instead,e.g., `-profile docker,test`." - } - } -} diff --git a/lib/WorkflowSarek.groovy b/lib/WorkflowSarek.groovy deleted file mode 100755 index 12f001c251..0000000000 --- a/lib/WorkflowSarek.groovy +++ /dev/null @@ -1,149 +0,0 @@ -// -// This file holds several functions specific to the workflow/sarek.nf in the nf-core/sarek pipeline -// - -import nextflow.Nextflow -import groovy.text.SimpleTemplateEngine - -class WorkflowSarek { - - // - // Check and validate parameters - // - public static void initialise(params, log) { - - genomeExistsError(params, log) - - if (!params.fasta && params.step == 'annotate') { - Nextflow.error "Genome fasta file not specified with e.g. '--fasta genome.fa' or via a detectable config file." - } - } - - // - // Get workflow summary for MultiQC - // - public static String paramsSummaryMultiqc(workflow, summary) { - String summary_section = '' - for (group in summary.keySet()) { - def group_params = summary.get(group) // This gets the parameters of that particular group - if (group_params) { - summary_section += "

$group

\n" - summary_section += "
\n" - for (param in group_params.keySet()) { - summary_section += "
$param
${group_params.get(param) ?: 'N/A'}
\n" - } - summary_section += "
\n" - } - } - - String yaml_file_text = "id: '${workflow.manifest.name.replace('/','-')}-summary'\n" - yaml_file_text += "description: ' - this information is collected when the pipeline is started.'\n" - yaml_file_text += "section_name: '${workflow.manifest.name} Workflow Summary'\n" - yaml_file_text += "section_href: 'https://github.com/${workflow.manifest.name}'\n" - yaml_file_text += "plot_type: 'html'\n" - yaml_file_text += "data: |\n" - yaml_file_text += "${summary_section}" - return yaml_file_text - } - - // - // Generate methods description for MultiQC - // - - public static String toolCitationText(params) { - - // TODO nf-core: Optionally add in-text citation tools to this list. - // Can use ternary operators to dynamically construct based conditions, e.g. params["run_xyz"] ? "Tool (Foo et al. 2023)" : "", - // Uncomment function in methodsDescriptionText to render in MultiQC report - def citation_text = [ - "Tools used in the workflow included:", - "FastQC (Andrews 2010),", - "MultiQC (Ewels et al. 2016)", - "." - ].join(' ').trim() - - return citation_text - } - - public static String toolBibliographyText(params) { - - // TODO Optionally add bibliographic entries to this list. - // Can use ternary operators to dynamically construct based conditions, e.g. params["run_xyz"] ? "
  • Author (2023) Pub name, Journal, DOI
  • " : "", - // Uncomment function in methodsDescriptionText to render in MultiQC report - def reference_text = [ - "
  • Andrews S, (2010) FastQC, URL: https://www.bioinformatics.babraham.ac.uk/projects/fastqc/).
  • ", - "
  • Ewels, P., Magnusson, M., Lundin, S., & Käller, M. (2016). MultiQC: summarize analysis results for multiple tools and samples in a single report. Bioinformatics , 32(19), 3047–3048. doi: /10.1093/bioinformatics/btw354
  • " - ].join(' ').trim() - - return reference_text - } - - public static String methodsDescriptionText(run_workflow, mqc_methods_yaml, params) { - // Convert to a named map so can be used as with familar NXF ${workflow} variable syntax in the MultiQC YML file - def meta = [:] - meta.workflow = run_workflow.toMap() - meta["manifest_map"] = run_workflow.manifest.toMap() - - // Pipeline DOI - meta["doi_text"] = meta.manifest_map.doi ? "(doi: ${meta.manifest_map.doi})" : "" - meta["nodoi_text"] = meta.manifest_map.doi ? "": "
  • If available, make sure to update the text to include the Zenodo DOI of version of the pipeline used.
  • " - - // Tool references - meta["tool_citations"] = "" - meta["tool_bibliography"] = "" - - // TODO Only uncomment below if logic in toolCitationText/toolBibliographyText has been filled! - //meta["tool_citations"] = toolCitationText(params).replaceAll(", \\.", ".").replaceAll("\\. \\.", ".").replaceAll(", \\.", ".") - //meta["tool_bibliography"] = toolBibliographyText(params) - - def methods_text = mqc_methods_yaml.text - - def engine = new SimpleTemplateEngine() - def description_html = engine.createTemplate(methods_text).make(meta) - - return description_html - } - - // - // Exit pipeline if incorrect --genome key provided - // - private static void genomeExistsError(params, log) { - if (params.genomes && params.genome && !params.genomes.containsKey(params.genome)) { - def error_string = "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n" + - " Genome '${params.genome}' not found in any config files provided to the pipeline.\n" + - " Currently, the available genome keys are:\n" + - " ${params.genomes.keySet().join(", ")}\n" + - "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" - Nextflow.error(error_string) - } - } - - public static String retrieveInput(params, log){ - def input = null - if (!params.input && !params.build_only_index) { - switch (params.step) { - case 'mapping': Nextflow.error("Can't start with step $params.step without samplesheet") - break - case 'markduplicates': log.warn("Using file ${params.outdir}/csv/mapped.csv"); - input = params.outdir + "/csv/mapped.csv" - break - case 'prepare_recalibration': log.warn("Using file ${params.outdir}/csv/markduplicates_no_table.csv"); - input = params.outdir + "/csv/markduplicates_no_table.csv" - break - case 'recalibrate': log.warn("Using file ${params.outdir}/csv/markduplicates.csv"); - input = params.outdir + "/csv/markduplicates.csv" - break - case 'variant_calling': log.warn("Using file ${params.outdir}/csv/recalibrated.csv"); - input = params.outdir + "/csv/recalibrated.csv" - break - // case 'controlfreec': csv_file = file("${params.outdir}/variant_calling/csv/control-freec_mpileup.csv", checkIfExists: true); break - case 'annotate': log.warn("Using file ${params.outdir}/csv/variantcalled.csv"); - input = params.outdir + "/csv/variantcalled.csv" - break - default: log.warn("Please provide an input samplesheet to the pipeline e.g. '--input samplesheet.csv'") - Nextflow.error("Unknown step $params.step") - } - } - return input - } -} diff --git a/main.nf b/main.nf old mode 100644 new mode 100755 index d1f17e3e95..f09af42f58 --- a/main.nf +++ b/main.nf @@ -20,20 +20,6 @@ */ nextflow.enable.dsl = 2 - -/* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - IMPORT FUNCTIONS / MODULES / SUBWORKFLOWS / WORKFLOWS -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -*/ - -include { SAREK } from './workflows/sarek' -include { PIPELINE_INITIALISATION } from './subworkflows/local/utils_nfcore_sarek_pipeline' -include { PIPELINE_COMPLETION } from './subworkflows/local/utils_nfcore_sarek_pipeline' -include { getGenomeAttribute } from './subworkflows/local/utils_nfcore_sarek_pipeline' -include { paramsHelp } from 'plugin/nf-validation' -include { validateParameters } from 'plugin/nf-validation' - /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ GENOME PARAMETER VALUES @@ -75,36 +61,62 @@ params.vep_cache_version = getGenomeAttribute('vep_cache_version') params.vep_genome = getGenomeAttribute('vep_genome') params.vep_species = getGenomeAttribute('vep_species') +aligner = params.aligner + /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - ALTERNATIVE INPUT FILE ON RESTART + IMPORT FUNCTIONS / MODULES / SUBWORKFLOWS / WORKFLOWS ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -params.input_restart = WorkflowSarek.retrieveInput(params, log) +include { SAREK } from './workflows/sarek' +include { ANNOTATION_CACHE_INITIALISATION } from './subworkflows/local/annotation_cache_initialisation' +include { DOWNLOAD_CACHE_SNPEFF_VEP } from './subworkflows/local/download_cache_snpeff_vep' +include { PIPELINE_COMPLETION } from './subworkflows/local/utils_nfcore_sarek_pipeline' +include { PIPELINE_INITIALISATION } from './subworkflows/local/utils_nfcore_sarek_pipeline' +include { PREPARE_GENOME } from './subworkflows/local/prepare_genome' +include { PREPARE_INTERVALS } from './subworkflows/local/prepare_intervals' +include { PREPARE_REFERENCE_CNVKIT } from './subworkflows/local/prepare_reference_cnvkit' -/* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - NAMED WORKFLOWS FOR PIPELINE -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -*/ +// Initialize file channels based on params, defined in the params.genomes[params.genome] scope +bcftools_annotations = params.bcftools_annotations ? Channel.fromPath(params.bcftools_annotations).collect() : Channel.empty() +bcftools_header_lines = params.bcftools_header_lines ? Channel.fromPath(params.bcftools_header_lines).collect() : Channel.empty() +cf_chrom_len = params.cf_chrom_len ? Channel.fromPath(params.cf_chrom_len) : [] +dbsnp = params.dbsnp ? Channel.fromPath(params.dbsnp).collect() : Channel.value([]) +fasta = params.fasta ? Channel.fromPath(params.fasta).collect() : Channel.empty() +fasta_fai = params.fasta_fai ? Channel.fromPath(params.fasta_fai).collect() : Channel.empty() +germline_resource = params.germline_resource ? Channel.fromPath(params.germline_resource) : Channel.value([]) // Mutect2 does not require a germline resource, so set to optional input +known_indels = params.known_indels ? Channel.fromPath(params.known_indels).collect() : Channel.value([]) +known_snps = params.known_snps ? Channel.fromPath(params.known_snps).collect() : Channel.value([]) +mappability = params.mappability ? Channel.fromPath(params.mappability) : Channel.value([]) +pon = params.pon ? Channel.fromPath(params.pon) : Channel.value([]) // PON is optional for Mutect2 (but highly recommended) +sentieon_dnascope_model = params.sentieon_dnascope_model ? Channel.fromPath(params.sentieon_dnascope_model) : Channel.value([]) +// Initialize value channels based on params, defined in the params.genomes[params.genome] scope +ascat_genome = params.ascat_genome ?: Channel.empty() +dbsnp_vqsr = params.dbsnp_vqsr ? Channel.value(params.dbsnp_vqsr) : Channel.empty() +known_indels_vqsr = params.known_indels_vqsr ? Channel.value(params.known_indels_vqsr) : Channel.empty() +known_snps_vqsr = params.known_snps_vqsr ? Channel.value(params.known_snps_vqsr) : Channel.empty() +ngscheckmate_bed = params.ngscheckmate_bed ? Channel.value(params.ngscheckmate_bed) : Channel.empty() +snpeff_db = params.snpeff_db ?: Channel.empty() +vep_cache_version = params.vep_cache_version ?: Channel.empty() +vep_genome = params.vep_genome ?: Channel.empty() +vep_species = params.vep_species ?: Channel.empty() -// Print help message if needed -if (params.help) { - def logo = NfcoreTemplate.logo(workflow, params.monochrome_logs) - def citation = '\n' + WorkflowMain.citation(workflow) + '\n' - def String command = "nextflow run ${workflow.manifest.name} --input samplesheet.csv --genome GATK.GRCh38 -profile docker --outdir results" - log.info logo + paramsHelp(command) + citation + NfcoreTemplate.dashedLine(params.monochrome_logs) - System.exit(0) -} -// Validate input parameters -if (params.validate_params) { - validateParameters() +vep_extra_files = [] + +if (params.dbnsfp && params.dbnsfp_tbi) { + vep_extra_files.add(file(params.dbnsfp, checkIfExists: true)) + vep_extra_files.add(file(params.dbnsfp_tbi, checkIfExists: true)) } -WorkflowMain.initialise(workflow, params, log, args) +if (params.spliceai_snv && params.spliceai_snv_tbi && params.spliceai_indel && params.spliceai_indel_tbi) { + vep_extra_files.add(file(params.spliceai_indel, checkIfExists: true)) + vep_extra_files.add(file(params.spliceai_indel_tbi, checkIfExists: true)) + vep_extra_files.add(file(params.spliceai_snv, checkIfExists: true)) + vep_extra_files.add(file(params.spliceai_snv_tbi, checkIfExists: true)) +} /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -114,12 +126,188 @@ WorkflowMain.initialise(workflow, params, log, args) // WORKFLOW: Run main nf-core/sarek analysis pipeline workflow NFCORE_SAREK { + take: + samplesheet + main: + versions = Channel.empty() + + // build indexes if needed + PREPARE_GENOME( + params.ascat_alleles, + params.ascat_loci, + params.ascat_loci_gc, + params.ascat_loci_rt, + bcftools_annotations, + params.chr_dir, + dbsnp, + fasta, + fasta_fai, + germline_resource, + known_indels, + known_snps, + pon) + + // Gather built indices or get them from the params + // Built from the fasta file: + dict = params.dict ? Channel.fromPath(params.dict).map{ it -> [ [id:'dict'], it ] }.collect() + : PREPARE_GENOME.out.dict + fasta_fai = params.fasta_fai ? Channel.fromPath(params.fasta_fai).collect() + : PREPARE_GENOME.out.fasta_fai + bwa = params.bwa ? Channel.fromPath(params.bwa).collect() + : PREPARE_GENOME.out.bwa + bwamem2 = params.bwamem2 ? Channel.fromPath(params.bwamem2).collect() + : PREPARE_GENOME.out.bwamem2 + dragmap = params.dragmap ? Channel.fromPath(params.dragmap).collect() + : PREPARE_GENOME.out.hashtable + + // Gather index for mapping given the chosen aligner + index_alignement = (aligner == "bwa-mem" || aligner == "sentieon-bwamem") ? bwa : + aligner == "bwa-mem2" ? bwamem2 : + dragmap + + // TODO: add a params for msisensorpro_scan + msisensorpro_scan = PREPARE_GENOME.out.msisensorpro_scan + + // For ASCAT, extracted from zip or tar.gz files + allele_files = PREPARE_GENOME.out.allele_files + chr_files = PREPARE_GENOME.out.chr_files + gc_file = PREPARE_GENOME.out.gc_file + loci_files = PREPARE_GENOME.out.loci_files + rt_file = PREPARE_GENOME.out.rt_file + + // Tabix indexed vcf files + bcftools_annotations_tbi = params.bcftools_annotations ? params.bcftools_annotations_tbi ? Channel.fromPath(params.bcftools_annotations_tbi) : PREPARE_GENOME.out.bcftools_annotations_tbi : Channel.empty([]) + dbsnp_tbi = params.dbsnp ? params.dbsnp_tbi ? Channel.fromPath(params.dbsnp_tbi) : PREPARE_GENOME.out.dbsnp_tbi : Channel.value([]) + germline_resource_tbi = params.germline_resource ? params.germline_resource_tbi ? Channel.fromPath(params.germline_resource_tbi) : PREPARE_GENOME.out.germline_resource_tbi : [] //do not change to Channel.value([]), the check for its existence then fails for Getpileupsumamries + known_indels_tbi = params.known_indels ? params.known_indels_tbi ? Channel.fromPath(params.known_indels_tbi).collect() : PREPARE_GENOME.out.known_indels_tbi : Channel.value([]) + known_snps_tbi = params.known_snps ? params.known_snps_tbi ? Channel.fromPath(params.known_snps_tbi) : PREPARE_GENOME.out.known_snps_tbi : Channel.value([]) + pon_tbi = params.pon ? params.pon_tbi ? Channel.fromPath(params.pon_tbi) : PREPARE_GENOME.out.pon_tbi : Channel.value([]) + + // known_sites is made by grouping both the dbsnp and the known snps/indels resources + // Which can either or both be optional + known_sites_indels = dbsnp.concat(known_indels).collect() + known_sites_indels_tbi = dbsnp_tbi.concat(known_indels_tbi).collect() + known_sites_snps = dbsnp.concat(known_snps).collect() + known_sites_snps_tbi = dbsnp_tbi.concat(known_snps_tbi).collect() + + // Build intervals if needed + PREPARE_INTERVALS(fasta_fai, params.intervals, params.no_intervals, params.nucleotides_per_second, params.outdir, params.step) + + // Intervals for speed up preprocessing/variant calling by spread/gather + // [interval.bed] all intervals in one file + intervals_bed_combined = params.no_intervals ? Channel.value([]) : PREPARE_INTERVALS.out.intervals_bed_combined + intervals_bed_gz_tbi_combined = params.no_intervals ? Channel.value([]) : PREPARE_INTERVALS.out.intervals_bed_gz_tbi_combined + intervals_bed_combined_for_variant_calling = PREPARE_INTERVALS.out.intervals_bed_combined + + // For QC during preprocessing, we don't need any intervals (MOSDEPTH doesn't take them for WGS) + intervals_for_preprocessing = params.wes ? + intervals_bed_combined.map{it -> [ [ id:it.baseName ], it ]}.collect() : + Channel.value([ [ id:'null' ], [] ]) + intervals = PREPARE_INTERVALS.out.intervals_bed // [ interval, num_intervals ] multiple interval.bed files, divided by useful intervals for scatter/gather + intervals_bed_gz_tbi = PREPARE_INTERVALS.out.intervals_bed_gz_tbi // [ interval_bed, tbi, num_intervals ] multiple interval.bed.gz/.tbi files, divided by useful intervals for scatter/gather + intervals_and_num_intervals = intervals.map{ interval, num_intervals -> + if ( num_intervals < 1 ) [ [], num_intervals ] + else [ interval, num_intervals ] + } + intervals_bed_gz_tbi_and_num_intervals = intervals_bed_gz_tbi.map{ intervals, num_intervals -> + if ( num_intervals < 1 ) [ [], [], num_intervals ] + else [ intervals[0], intervals[1], num_intervals ] + } + if (params.tools && params.tools.split(',').contains('cnvkit')) { + if (params.cnvkit_reference) { + cnvkit_reference = Channel.fromPath(params.cnvkit_reference).collect() + } else { + PREPARE_REFERENCE_CNVKIT(fasta, intervals_bed_combined) + cnvkit_reference = PREPARE_REFERENCE_CNVKIT.out.cnvkit_reference + versions = versions.mix(PREPARE_REFERENCE_CNVKIT.out.versions) + } + } else { + cnvkit_reference = Channel.value([]) + } + // Gather used softwares versions + versions = versions.mix(PREPARE_GENOME.out.versions) + versions = versions.mix(PREPARE_INTERVALS.out.versions) + + vep_fasta = (params.vep_include_fasta) ? fasta.map{ fasta -> [ [ id:fasta.baseName ], fasta ] } : [[id: 'null'], []] + + // Download cache + if (params.download_cache) { + // Assuming that even if the cache is provided, if the user specify download_cache, sarek will download the cache + ensemblvep_info = Channel.of([ [ id:"${params.vep_cache_version}_${params.vep_genome}" ], params.vep_genome, params.vep_species, params.vep_cache_version ]) + snpeff_info = Channel.of([ [ id:"${params.snpeff_genome}.${params.snpeff_db}" ], params.snpeff_genome, params.snpeff_db ]) + DOWNLOAD_CACHE_SNPEFF_VEP(ensemblvep_info, snpeff_info) + snpeff_cache = DOWNLOAD_CACHE_SNPEFF_VEP.out.snpeff_cache + vep_cache = DOWNLOAD_CACHE_SNPEFF_VEP.out.ensemblvep_cache.map{ meta, cache -> [ cache ] } + + versions = versions.mix(DOWNLOAD_CACHE_SNPEFF_VEP.out.versions) + } else { + // Looks for cache information either locally or on the cloud + ANNOTATION_CACHE_INITIALISATION( + (params.snpeff_cache && params.tools && (params.tools.split(',').contains("snpeff") || params.tools.split(',').contains('merge'))), + params.snpeff_cache, + params.snpeff_genome, + params.snpeff_db, + (params.vep_cache && params.tools && (params.tools.split(',').contains("vep") || params.tools.split(',').contains('merge'))), + params.vep_cache, + params.vep_species, + params.vep_cache_version, + params.vep_genome, + "Please refer to https://nf-co.re/sarek/docs/usage/#how-to-customise-snpeff-and-vep-annotation for more information.") + + snpeff_cache = ANNOTATION_CACHE_INITIALISATION.out.snpeff_cache + vep_cache = ANNOTATION_CACHE_INITIALISATION.out.ensemblvep_cache + } // // WORKFLOW: Run pipeline // - SAREK() + SAREK(samplesheet, + allele_files, + bcftools_annotations, + bcftools_annotations_tbi, + bcftools_header_lines, + cf_chrom_len, + chr_files, + cnvkit_reference, + dbsnp, + dbsnp_tbi, + dbsnp_vqsr, + dict, + fasta, + fasta_fai, + gc_file, + germline_resource, + germline_resource_tbi, + index_alignement, + intervals_and_num_intervals, + intervals_bed_combined, + intervals_bed_combined_for_variant_calling, + intervals_bed_gz_tbi_and_num_intervals, + intervals_bed_gz_tbi_combined, + intervals_for_preprocessing, + known_indels_vqsr, + known_sites_indels, + known_sites_indels_tbi, + known_sites_snps, + known_sites_snps_tbi, + known_snps_vqsr, + loci_files, + mappability, + msisensorpro_scan, + ngscheckmate_bed, + pon, + pon_tbi, + rt_file, + sentieon_dnascope_model, + snpeff_cache, + vep_cache, + vep_cache_version, + vep_extra_files, + vep_fasta, + vep_genome, + vep_species + ) emit: multiqc_report = SAREK.out.multiqc_report // channel: /path/to/multiqc_report.html @@ -150,7 +338,7 @@ workflow { // // WORKFLOW: Run main workflow // - NFCORE_SAREK() + NFCORE_SAREK(PIPELINE_INITIALISATION.out.samplesheet) // // SUBWORKFLOW: Run completion tasks @@ -166,6 +354,25 @@ workflow { ) } +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + FUNCTIONS +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +// +// Get attribute from genome config file e.g. fasta +// + +def getGenomeAttribute(attribute) { + if (params.genomes && params.genome && params.genomes.containsKey(params.genome)) { + if (params.genomes[ params.genome ].containsKey(attribute)) { + return params.genomes[ params.genome ][ attribute ] + } + } + return null +} + /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ THE END diff --git a/nextflow.config b/nextflow.config index e9dabd1136..28dad4b826 100644 --- a/nextflow.config +++ b/nextflow.config @@ -10,8 +10,9 @@ params { // Workflow flags: // Mandatory arguments - input = null // No default input - step = 'mapping' // Starts with mapping + input = null // No default input + input_restart = null // No default automatic input + step = 'mapping' // Starts with mapping // References genome = 'GATK.GRCh38' @@ -124,6 +125,7 @@ params { config_profile_contact = null config_profile_url = null test_data_base = 'https://raw.githubusercontent.com/nf-core/test-datasets/sarek3' + modules_testdata_base_path = null // Max resource options // Defaults only, expecting to be overwritten diff --git a/nextflow_schema.json b/nextflow_schema.json index abcc232840..9e6e2a14f0 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -893,6 +893,11 @@ "help_text": "Warning: The `-profile test` samplesheet file itself contains remote paths. Setting this parameter does not alter the contents of that file.", "hidden": true }, + "modules_testdata_base_path": { + "type": "string", + "description": "Base path / URL for data used in the modules", + "hidden": true + }, "seq_center": { "type": "string", "fa_icon": "fas fa-university", diff --git a/subworkflows/local/initialize_annotation_cache/main.nf b/subworkflows/local/annotation_cache_initialisation/main.nf similarity index 94% rename from subworkflows/local/initialize_annotation_cache/main.nf rename to subworkflows/local/annotation_cache_initialisation/main.nf index d2c6fcb7d6..6e35a68d50 100644 --- a/subworkflows/local/initialize_annotation_cache/main.nf +++ b/subworkflows/local/annotation_cache_initialisation/main.nf @@ -1,14 +1,14 @@ // -// INITIALIZE ANNOTATION CACHE +// ANNOTATION CACHE INITIALISATION // -// Initialize channels based on params or indices that were just built +// Initialise channels based on params or indices that were just built // For all modules here: // A when clause condition is defined in the conf/modules.config to determine if the module should be run // Condition is based on params.step and params.tools // If and extra condition exists, it's specified in comments -workflow INITIALIZE_ANNOTATION_CACHE { +workflow ANNOTATION_CACHE_INITIALISATION { take: snpeff_enabled snpeff_cache diff --git a/subworkflows/local/prepare_genome/main.nf b/subworkflows/local/prepare_genome/main.nf index 46a2fa5985..5f70a22432 100644 --- a/subworkflows/local/prepare_genome/main.nf +++ b/subworkflows/local/prepare_genome/main.nf @@ -53,7 +53,7 @@ workflow PREPARE_GENOME { GATK4_CREATESEQUENCEDICTIONARY(fasta) MSISENSORPRO_SCAN(fasta) - SAMTOOLS_FAIDX(fasta, [ [ id:fasta.baseName ], [] ] ) + SAMTOOLS_FAIDX(fasta, [ [ id:'fasta' ], [] ] ) // the following are flattened and mapped in case the user supplies more than one value for the param // written for KNOWN_INDELS, but preemptively applied to the rest diff --git a/subworkflows/local/samplesheet_to_channel/main.nf b/subworkflows/local/samplesheet_to_channel/main.nf index 985924efa8..37daa1322a 100644 --- a/subworkflows/local/samplesheet_to_channel/main.nf +++ b/subworkflows/local/samplesheet_to_channel/main.nf @@ -5,6 +5,7 @@ workflow SAMPLESHEET_TO_CHANNEL{ aligner // ascat_alleles // ascat_loci // + ascat_loci_gc // ascat_loci_rt // bcftools_annotations // bcftools_annotations_tbi // @@ -306,5 +307,3 @@ def flowcellLaneFromFastq(path) { } return fcid } - - diff --git a/subworkflows/local/utils_nfcore_sarek_pipeline/main.nf b/subworkflows/local/utils_nfcore_sarek_pipeline/main.nf index e9a6c40288..afd58a8cc2 100644 --- a/subworkflows/local/utils_nfcore_sarek_pipeline/main.nf +++ b/subworkflows/local/utils_nfcore_sarek_pipeline/main.nf @@ -8,17 +8,19 @@ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -include { UTILS_NFVALIDATION_PLUGIN } from '../../nf-core/utils_nfvalidation_plugin' include { paramsSummaryMap } from 'plugin/nf-validation' include { fromSamplesheet } from 'plugin/nf-validation' include { UTILS_NEXTFLOW_PIPELINE } from '../../nf-core/utils_nextflow_pipeline' +include { UTILS_NFVALIDATION_PLUGIN } from '../../nf-core/utils_nfvalidation_plugin' +include { UTILS_NFCORE_PIPELINE } from '../../nf-core/utils_nfcore_pipeline' include { completionEmail } from '../../nf-core/utils_nfcore_pipeline' include { completionSummary } from '../../nf-core/utils_nfcore_pipeline' include { dashedLine } from '../../nf-core/utils_nfcore_pipeline' -include { nfCoreLogo } from '../../nf-core/utils_nfcore_pipeline' +include { getWorkflowVersion } from '../../nf-core/utils_nfcore_pipeline' include { imNotification } from '../../nf-core/utils_nfcore_pipeline' -include { UTILS_NFCORE_PIPELINE } from '../../nf-core/utils_nfcore_pipeline' +include { logColours } from '../../nf-core/utils_nfcore_pipeline' include { workflowCitation } from '../../nf-core/utils_nfcore_pipeline' +include { SAMPLESHEET_TO_CHANNEL } from '../samplesheet_to_channel' /* ======================================================================================== @@ -39,7 +41,7 @@ workflow PIPELINE_INITIALISATION { main: - ch_versions = Channel.empty() + versions = Channel.empty() // // Print version and exit if required and dump pipeline parameters to JSON file @@ -69,40 +71,99 @@ workflow PIPELINE_INITIALISATION { // // Check config provided to the pipeline // - UTILS_NFCORE_PIPELINE ( - nextflow_cli_args - ) + UTILS_NFCORE_PIPELINE(nextflow_cli_args) + // // Custom validation for pipeline parameters // validateInputParameters() - // - // Create channel from input file provided through params.input - // - // Channel - // .fromSamplesheet("input") - // .map { - // meta, fastq_1, fastq_2 -> - // if (!fastq_2) { - // return [ meta.id, meta + [ single_end:true ], [ fastq_1 ] ] - // } else { - // return [ meta.id, meta + [ single_end:false ], [ fastq_1, fastq_2 ] ] - // } - // } - // .groupTuple() - // .map { - // validateInputSamplesheet(it) - // } - // .map { - // meta, fastqs -> - // return [ meta, fastqs.flatten() ] - // } - // .set { ch_samplesheet } + // Check input path parameters to see if they exist + def checkPathParamList = [ + params.ascat_alleles, + params.ascat_loci, + params.ascat_loci_gc, + params.ascat_loci_rt, + params.bwa, + params.bwamem2, + params.bcftools_annotations, + params.bcftools_annotations_tbi, + params.bcftools_header_lines, + params.cf_chrom_len, + params.chr_dir, + params.cnvkit_reference, + params.dbnsfp, + params.dbnsfp_tbi, + params.dbsnp, + params.dbsnp_tbi, + params.dict, + params.dragmap, + params.fasta, + params.fasta_fai, + params.germline_resource, + params.germline_resource_tbi, + params.input, + params.intervals, + params.known_indels, + params.known_indels_tbi, + params.known_snps, + params.known_snps_tbi, + params.mappability, + params.multiqc_config, + params.ngscheckmate_bed, + params.pon, + params.pon_tbi, + params.sentieon_dnascope_model, + params.spliceai_indel, + params.spliceai_indel_tbi, + params.spliceai_snv, + params.spliceai_snv_tbi + ] + +// only check if we are using the tools +if (params.tools && (params.tools.split(',').contains('snpeff') || params.tools.split(',').contains('merge'))) checkPathParamList.add(params.snpeff_cache) +if (params.tools && (params.tools.split(',').contains('vep') || params.tools.split(',').contains('merge'))) checkPathParamList.add(params.vep_cache) + + // def retrieveInput(need_input, step, outdir) { + + params.input_restart = retrieveInput((!params.build_only_index && !params.input), params.step, params.outdir) + + ch_from_samplesheet = params.build_only_index ? Channel.empty() : params.input ? Channel.fromSamplesheet("input") : Channel.fromSamplesheet("input_restart") + + SAMPLESHEET_TO_CHANNEL( + ch_from_samplesheet, + params.aligner, + params.ascat_alleles, + params.ascat_loci, + params.ascat_loci_gc, + params.ascat_loci_rt, + params.bcftools_annotations, + params.bcftools_annotations_tbi, + params.bcftools_header_lines, + params.build_only_index, + params.dbsnp, + params.fasta, + params.germline_resource, + params.intervals, + params.joint_germline, + params.joint_mutect2, + params.known_indels, + params.known_snps, + params.no_intervals, + params.pon, + params.sentieon_dnascope_emit_mode, + params.sentieon_haplotyper_emit_mode, + params.seq_center, + params.seq_platform, + params.skip_tools, + params.step, + params.tools, + params.umi_read_structure, + params.wes) emit: - // samplesheet = ch_samplesheet - versions = ch_versions + samplesheet = SAMPLESHEET_TO_CHANNEL.out.input_sample + versions } /* @@ -168,17 +229,6 @@ def validateInputSamplesheet(input) { return [ metas[0], fastqs ] } -// -// Get attribute from genome config file e.g. fasta -// -def getGenomeAttribute(attribute) { - if (params.genomes && params.genome && params.genomes.containsKey(params.genome)) { - if (params.genomes[ params.genome ].containsKey(attribute)) { - return params.genomes[ params.genome ][ attribute ] - } - } - return null -} // // Exit pipeline if incorrect --genome key provided @@ -249,3 +299,61 @@ def methodsDescriptionText(mqc_methods_yaml) { return description_html.toString() } + +// +// nf-core/sarek logo +// +def nfCoreLogo(monochrome_logs=true) { + Map colors = logColours(monochrome_logs) + String.format( + """\n + ${dashedLine(monochrome_logs)} + ${colors.green},--.${colors.black}/${colors.green},-.${colors.reset} + ${colors.blue} ___ __ __ __ ___ ${colors.green}/,-._.--~\'${colors.reset} + ${colors.blue} |\\ | |__ __ / ` / \\ |__) |__ ${colors.yellow}} {${colors.reset} + ${colors.blue} | \\| | \\__, \\__/ | \\ |___ ${colors.green}\\`-._,-`-,${colors.reset} + ${colors.green}`._,._,\'${colors.reset} + ${colors.white} ____${colors.reset} + ${colors.white} .´ _ `.${colors.reset} + ${colors.white} / ${colors.green}|\\${colors.reset}`-_ \\${colors.reset} ${colors.blue} __ __ ___ ${colors.reset} + ${colors.white} | ${colors.green}| \\${colors.reset} `-|${colors.reset} ${colors.blue}|__` /\\ |__) |__ |__/${colors.reset} + ${colors.white} \\ ${colors.green}| \\${colors.reset} /${colors.reset} ${colors.blue}.__| /¯¯\\ | \\ |___ | \\${colors.reset} + ${colors.white} `${colors.green}|${colors.reset}____${colors.green}\\${colors.reset}´${colors.reset} + + ${colors.purple} ${workflow.manifest.name} ${getWorkflowVersion()}${colors.reset} + ${dashedLine(monochrome_logs)} + """.stripIndent() + ) +} + +// +// retrieveInput +// +def retrieveInput(need_input, step, outdir) { + def input = null + if (!params.input && !params.build_only_index) { + switch (step) { + case 'mapping': Nextflow.error("Can't start with step $step without samplesheet") + break + case 'markduplicates': log.warn("Using file ${outdir}/csv/mapped.csv"); + input = outdir + "/csv/mapped.csv" + break + case 'prepare_recalibration': log.warn("Using file ${outdir}/csv/markduplicates_no_table.csv"); + input = outdir + "/csv/markduplicates_no_table.csv" + break + case 'recalibrate': log.warn("Using file ${outdir}/csv/markduplicates.csv"); + input = outdir + "/csv/markduplicates.csv" + break + case 'variant_calling': log.warn("Using file ${outdir}/csv/recalibrated.csv"); + input = outdir + "/csv/recalibrated.csv" + break + // case 'controlfreec': csv_file = file("${outdir}/variant_calling/csv/control-freec_mpileup.csv", checkIfExists: true); break + case 'annotate': log.warn("Using file ${outdir}/csv/variantcalled.csv"); + input = outdir + "/csv/variantcalled.csv" + break + default: log.warn("Please provide an input samplesheet to the pipeline e.g. '--input samplesheet.csv'") + Nextflow.error("Unknown step $step") + } + } + return input +} diff --git a/workflows/sarek.nf b/workflows/sarek/main.nf similarity index 66% rename from workflows/sarek.nf rename to workflows/sarek/main.nf index c94ee48918..39e9d87217 100644 --- a/workflows/sarek.nf +++ b/workflows/sarek/main.nf @@ -4,226 +4,85 @@ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -include { paramsSummaryMap } from 'plugin/nf-validation' -include { paramsSummaryLog } from 'plugin/nf-validation' -include { fromSamplesheet } from 'plugin/nf-validation' - -include { paramsSummaryMultiqc } from '../subworkflows/nf-core/utils_nfcore_pipeline' -include { softwareVersionsToYAML } from '../subworkflows/nf-core/utils_nfcore_pipeline' -include { methodsDescriptionText } from '../subworkflows/local/utils_nfcore_sarek_pipeline' - -def logo = NfcoreTemplate.logo(workflow, params.monochrome_logs) -def citation = '\n' + WorkflowMain.citation(workflow) + '\n' -def summary_params = paramsSummaryMap(workflow) - -// Print parameter summary log to screen -log.info logo + paramsSummaryLog(workflow) + citation - -/* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - VALIDATE INPUTS -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -*/ - -// Check input path parameters to see if they exist -def checkPathParamList = [ - params.ascat_alleles, - params.ascat_loci, - params.ascat_loci_gc, - params.ascat_loci_rt, - params.bwa, - params.bwamem2, - params.bcftools_annotations, - params.bcftools_annotations_tbi, - params.bcftools_header_lines, - params.cf_chrom_len, - params.chr_dir, - params.cnvkit_reference, - params.dbnsfp, - params.dbnsfp_tbi, - params.dbsnp, - params.dbsnp_tbi, - params.dict, - params.dragmap, - params.fasta, - params.fasta_fai, - params.germline_resource, - params.germline_resource_tbi, - params.input, - params.intervals, - params.known_indels, - params.known_indels_tbi, - params.known_snps, - params.known_snps_tbi, - params.mappability, - params.multiqc_config, - params.ngscheckmate_bed, - params.pon, - params.pon_tbi, - params.sentieon_dnascope_model, - params.spliceai_indel, - params.spliceai_indel_tbi, - params.spliceai_snv, - params.spliceai_snv_tbi -] - -// only check if we are using the tools -if (params.tools && (params.tools.split(',').contains('snpeff') || params.tools.split(',').contains('merge'))) checkPathParamList.add(params.snpeff_cache) -if (params.tools && (params.tools.split(',').contains('vep') || params.tools.split(',').contains('merge'))) checkPathParamList.add(params.vep_cache) - -// Validate input parameters -WorkflowSarek.initialise(params, log) - -/* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - Check mandatory parameters -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -*/ - -for (param in checkPathParamList) if (param) file(param, checkIfExists: true) - -/* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - IMPORT LOCAL MODULES/SUBWORKFLOWS -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -*/ - -// Initialize file channels based on params, defined in the params.genomes[params.genome] scope -bcftools_annotations = params.bcftools_annotations ? Channel.fromPath(params.bcftools_annotations).collect() : Channel.empty() -bcftools_header_lines = params.bcftools_header_lines ? Channel.fromPath(params.bcftools_header_lines).collect() : Channel.empty() -cf_chrom_len = params.cf_chrom_len ? Channel.fromPath(params.cf_chrom_len).collect() : [] -dbsnp = params.dbsnp ? Channel.fromPath(params.dbsnp).collect() : Channel.value([]) -fasta = params.fasta ? Channel.fromPath(params.fasta).first() : Channel.empty() -fasta_fai = params.fasta_fai ? Channel.fromPath(params.fasta_fai).collect() : Channel.empty() -germline_resource = params.germline_resource ? Channel.fromPath(params.germline_resource).collect() : Channel.value([]) // Mutect2 does not require a germline resource, so set to optional input -known_indels = params.known_indels ? Channel.fromPath(params.known_indels).collect() : Channel.value([]) -known_snps = params.known_snps ? Channel.fromPath(params.known_snps).collect() : Channel.value([]) -mappability = params.mappability ? Channel.fromPath(params.mappability).collect() : Channel.value([]) -pon = params.pon ? Channel.fromPath(params.pon).collect() : Channel.value([]) // PON is optional for Mutect2 (but highly recommended) -sentieon_dnascope_model = params.sentieon_dnascope_model ? Channel.fromPath(params.sentieon_dnascope_model).collect() : Channel.value([]) - -// Initialize value channels based on params, defined in the params.genomes[params.genome] scope -ascat_genome = params.ascat_genome ?: Channel.empty() -dbsnp_vqsr = params.dbsnp_vqsr ? Channel.value(params.dbsnp_vqsr) : Channel.empty() -known_indels_vqsr = params.known_indels_vqsr ? Channel.value(params.known_indels_vqsr) : Channel.empty() -known_snps_vqsr = params.known_snps_vqsr ? Channel.value(params.known_snps_vqsr) : Channel.empty() -ngscheckmate_bed = params.ngscheckmate_bed ? Channel.value(params.ngscheckmate_bed) : Channel.empty() -snpeff_db = params.snpeff_db ?: Channel.empty() -vep_cache_version = params.vep_cache_version ?: Channel.empty() -vep_genome = params.vep_genome ?: Channel.empty() -vep_species = params.vep_species ?: Channel.empty() - - -vep_extra_files = [] - -if (params.dbnsfp && params.dbnsfp_tbi) { - vep_extra_files.add(file(params.dbnsfp, checkIfExists: true)) - vep_extra_files.add(file(params.dbnsfp_tbi, checkIfExists: true)) -} - -if (params.spliceai_snv && params.spliceai_snv_tbi && params.spliceai_indel && params.spliceai_indel_tbi) { - vep_extra_files.add(file(params.spliceai_indel, checkIfExists: true)) - vep_extra_files.add(file(params.spliceai_indel_tbi, checkIfExists: true)) - vep_extra_files.add(file(params.spliceai_snv, checkIfExists: true)) - vep_extra_files.add(file(params.spliceai_snv_tbi, checkIfExists: true)) -} - -/* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - IMPORT LOCAL/NF-CORE MODULES/SUBWORKFLOWS -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -*/ +include { paramsSummaryMap } from 'plugin/nf-validation' +include { paramsSummaryMultiqc } from '../../subworkflows/nf-core/utils_nfcore_pipeline' +include { softwareVersionsToYAML } from '../../subworkflows/nf-core/utils_nfcore_pipeline' +include { methodsDescriptionText } from '../../subworkflows/local/utils_nfcore_sarek_pipeline' // Create samplesheets to restart from different steps -include { SAMPLESHEET_TO_CHANNEL } from '../subworkflows/local/samplesheet_to_channel/main' -include { CHANNEL_ALIGN_CREATE_CSV } from '../subworkflows/local/channel_align_create_csv/main' -include { CHANNEL_MARKDUPLICATES_CREATE_CSV } from '../subworkflows/local/channel_markduplicates_create_csv/main' -include { CHANNEL_BASERECALIBRATOR_CREATE_CSV } from '../subworkflows/local/channel_baserecalibrator_create_csv/main' -include { CHANNEL_APPLYBQSR_CREATE_CSV } from '../subworkflows/local/channel_applybqsr_create_csv/main' -include { CHANNEL_VARIANT_CALLING_CREATE_CSV } from '../subworkflows/local/channel_variant_calling_create_csv/main' - -// Download cache for SnpEff/VEP if needed -include { DOWNLOAD_CACHE_SNPEFF_VEP } from '../subworkflows/local/download_cache_snpeff_vep/main' - -// Initialize annotation cache -include { INITIALIZE_ANNOTATION_CACHE } from '../subworkflows/local/initialize_annotation_cache/main' - -// Build indices if needed -include { PREPARE_GENOME } from '../subworkflows/local/prepare_genome/main' - -// Build intervals if needed -include { PREPARE_INTERVALS } from '../subworkflows/local/prepare_intervals/main' - -// Build CNVkit reference if needed -include { PREPARE_REFERENCE_CNVKIT } from '../subworkflows/local/prepare_reference_cnvkit/main' +include { CHANNEL_ALIGN_CREATE_CSV } from '../../subworkflows/local/channel_align_create_csv/main' +include { CHANNEL_MARKDUPLICATES_CREATE_CSV } from '../../subworkflows/local/channel_markduplicates_create_csv/main' +include { CHANNEL_BASERECALIBRATOR_CREATE_CSV } from '../../subworkflows/local/channel_baserecalibrator_create_csv/main' +include { CHANNEL_APPLYBQSR_CREATE_CSV } from '../../subworkflows/local/channel_applybqsr_create_csv/main' +include { CHANNEL_VARIANT_CALLING_CREATE_CSV } from '../../subworkflows/local/channel_variant_calling_create_csv/main' // Convert BAM files to FASTQ files -include { BAM_CONVERT_SAMTOOLS as CONVERT_FASTQ_INPUT } from '../subworkflows/local/bam_convert_samtools/main' -include { BAM_CONVERT_SAMTOOLS as CONVERT_FASTQ_UMI } from '../subworkflows/local/bam_convert_samtools/main' +include { BAM_CONVERT_SAMTOOLS as CONVERT_FASTQ_INPUT } from '../../subworkflows/local/bam_convert_samtools/main' +include { BAM_CONVERT_SAMTOOLS as CONVERT_FASTQ_UMI } from '../../subworkflows/local/bam_convert_samtools/main' // Run FASTQC -include { FASTQC } from '../modules/nf-core/fastqc/main' +include { FASTQC } from '../../modules/nf-core/fastqc/main' // TRIM/SPLIT FASTQ Files -include { FASTP } from '../modules/nf-core/fastp/main' +include { FASTP } from '../../modules/nf-core/fastp/main' // Create umi consensus bams from fastq -include { FASTQ_CREATE_UMI_CONSENSUS_FGBIO } from '../subworkflows/local/fastq_create_umi_consensus_fgbio/main' +include { FASTQ_CREATE_UMI_CONSENSUS_FGBIO } from '../../subworkflows/local/fastq_create_umi_consensus_fgbio/main' // Map input reads to reference genome -include { FASTQ_ALIGN_BWAMEM_MEM2_DRAGMAP_SENTIEON } from '../subworkflows/local/fastq_align_bwamem_mem2_dragmap_sentieon/main' +include { FASTQ_ALIGN_BWAMEM_MEM2_DRAGMAP_SENTIEON } from '../../subworkflows/local/fastq_align_bwamem_mem2_dragmap_sentieon/main' // Merge and index BAM files (optional) -include { BAM_MERGE_INDEX_SAMTOOLS } from '../subworkflows/local/bam_merge_index_samtools/main' +include { BAM_MERGE_INDEX_SAMTOOLS } from '../../subworkflows/local/bam_merge_index_samtools/main' // Convert BAM files -include { SAMTOOLS_CONVERT as BAM_TO_CRAM } from '../modules/nf-core/samtools/convert/main' -include { SAMTOOLS_CONVERT as BAM_TO_CRAM_MAPPING } from '../modules/nf-core/samtools/convert/main' +include { SAMTOOLS_CONVERT as BAM_TO_CRAM } from '../../modules/nf-core/samtools/convert/main' +include { SAMTOOLS_CONVERT as BAM_TO_CRAM_MAPPING } from '../../modules/nf-core/samtools/convert/main' // Convert CRAM files (optional) -include { SAMTOOLS_CONVERT as CRAM_TO_BAM } from '../modules/nf-core/samtools/convert/main' -include { SAMTOOLS_CONVERT as CRAM_TO_BAM_RECAL } from '../modules/nf-core/samtools/convert/main' +include { SAMTOOLS_CONVERT as CRAM_TO_BAM } from '../../modules/nf-core/samtools/convert/main' +include { SAMTOOLS_CONVERT as CRAM_TO_BAM_RECAL } from '../../modules/nf-core/samtools/convert/main' // Mark Duplicates (+QC) -include { BAM_MARKDUPLICATES } from '../subworkflows/local/bam_markduplicates/main' -include { BAM_MARKDUPLICATES_SPARK } from '../subworkflows/local/bam_markduplicates_spark/main' -include { BAM_SENTIEON_DEDUP } from '../subworkflows/local/bam_sentieon_dedup/main' +include { BAM_MARKDUPLICATES } from '../../subworkflows/local/bam_markduplicates/main' +include { BAM_MARKDUPLICATES_SPARK } from '../../subworkflows/local/bam_markduplicates_spark/main' +include { BAM_SENTIEON_DEDUP } from '../../subworkflows/local/bam_sentieon_dedup/main' // QC on CRAM -include { CRAM_QC_MOSDEPTH_SAMTOOLS as CRAM_QC_NO_MD } from '../subworkflows/local/cram_qc_mosdepth_samtools/main' -include { CRAM_QC_MOSDEPTH_SAMTOOLS as CRAM_QC_RECAL } from '../subworkflows/local/cram_qc_mosdepth_samtools/main' +include { CRAM_QC_MOSDEPTH_SAMTOOLS as CRAM_QC_NO_MD } from '../../subworkflows/local/cram_qc_mosdepth_samtools/main' +include { CRAM_QC_MOSDEPTH_SAMTOOLS as CRAM_QC_RECAL } from '../../subworkflows/local/cram_qc_mosdepth_samtools/main' // Create recalibration tables -include { BAM_BASERECALIBRATOR } from '../subworkflows/local/bam_baserecalibrator/main' -include { BAM_BASERECALIBRATOR_SPARK } from '../subworkflows/local/bam_baserecalibrator_spark/main' +include { BAM_BASERECALIBRATOR } from '../../subworkflows/local/bam_baserecalibrator/main' +include { BAM_BASERECALIBRATOR_SPARK } from '../../subworkflows/local/bam_baserecalibrator_spark/main' // Create recalibrated cram files to use for variant calling (+QC) -include { BAM_APPLYBQSR } from '../subworkflows/local/bam_applybqsr/main' -include { BAM_APPLYBQSR_SPARK } from '../subworkflows/local/bam_applybqsr_spark/main' +include { BAM_APPLYBQSR } from '../../subworkflows/local/bam_applybqsr/main' +include { BAM_APPLYBQSR_SPARK } from '../../subworkflows/local/bam_applybqsr_spark/main' // Variant calling on a single normal sample -include { BAM_VARIANT_CALLING_GERMLINE_ALL } from '../subworkflows/local/bam_variant_calling_germline_all/main' +include { BAM_VARIANT_CALLING_GERMLINE_ALL } from '../../subworkflows/local/bam_variant_calling_germline_all/main' // Variant calling on a single tumor sample -include { BAM_VARIANT_CALLING_TUMOR_ONLY_ALL } from '../subworkflows/local/bam_variant_calling_tumor_only_all/main' +include { BAM_VARIANT_CALLING_TUMOR_ONLY_ALL } from '../../subworkflows/local/bam_variant_calling_tumor_only_all/main' // Variant calling on tumor/normal pair -include { BAM_VARIANT_CALLING_SOMATIC_ALL } from '../subworkflows/local/bam_variant_calling_somatic_all/main' +include { BAM_VARIANT_CALLING_SOMATIC_ALL } from '../../subworkflows/local/bam_variant_calling_somatic_all/main' // POST VARIANTCALLING: e.g. merging -include { POST_VARIANTCALLING } from '../subworkflows/local/post_variantcalling/main' +include { POST_VARIANTCALLING } from '../../subworkflows/local/post_variantcalling/main' // QC on VCF files -include { VCF_QC_BCFTOOLS_VCFTOOLS } from '../subworkflows/local/vcf_qc_bcftools_vcftools/main' +include { VCF_QC_BCFTOOLS_VCFTOOLS } from '../../subworkflows/local/vcf_qc_bcftools_vcftools/main' // Sample QC on CRAM files -include { CRAM_SAMPLEQC } from '../subworkflows/local/cram_sampleqc/main' +include { CRAM_SAMPLEQC } from '../../subworkflows/local/cram_sampleqc/main' // Annotation -include { VCF_ANNOTATE_ALL } from '../subworkflows/local/vcf_annotate_all/main' +include { VCF_ANNOTATE_ALL } from '../../subworkflows/local/vcf_annotate_all/main' // MULTIQC -include { MULTIQC } from '../modules/nf-core/multiqc/main' +include { MULTIQC } from '../../modules/nf-core/multiqc/main' /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -232,188 +91,60 @@ include { MULTIQC } from '../modules/nf-core */ workflow SAREK { + take: + input_sample + allele_files + bcftools_annotations + bcftools_annotations_tbi + bcftools_header_lines + cf_chrom_len + chr_files + cnvkit_reference + dbsnp + dbsnp_tbi + dbsnp_vqsr + dict + fasta + fasta_fai + gc_file + germline_resource + germline_resource_tbi + index_alignement + intervals_and_num_intervals + intervals_bed_combined + intervals_bed_combined_for_variant_calling + intervals_bed_gz_tbi_and_num_intervals + intervals_bed_gz_tbi_combined + intervals_for_preprocessing + known_indels_vqsr + known_sites_indels + known_sites_indels_tbi + known_sites_snps + known_sites_snps_tbi + known_snps_vqsr + loci_files + mappability + msisensorpro_scan + ngscheckmate_bed + pon + pon_tbi + rt_file + sentieon_dnascope_model + snpeff_cache + vep_cache + vep_cache_version + vep_extra_files + vep_fasta + vep_genome + vep_species + + main: // To gather all QC reports for MultiQC - reports = Channel.empty() ch_multiqc_files = Channel.empty() - multiqc_report = Channel.empty() - - // To gather used softwares versions for MultiQC - versions = Channel.empty() - - // Parse samplesheet - // Set input, can either be from --input or from automatic retrieval in WorkflowSarek.groovy - ch_from_samplesheet = params.build_only_index ? Channel.empty() : params.input ? Channel.fromSamplesheet("input") : Channel.fromSamplesheet("input_restart") - - SAMPLESHEET_TO_CHANNEL( - ch_from_samplesheet, - params.aligner, - params.ascat_alleles, - params.ascat_loci, - params.ascat_loci_rt, - params.bcftools_annotations, - params.bcftools_annotations_tbi, - params.bcftools_header_lines, - params.build_only_index, - params.dbsnp, - params.fasta, - params.germline_resource, - params.intervals, - params.joint_germline, - params.joint_mutect2, - params.known_indels, - params.known_snps, - params.no_intervals, - params.pon, - params.sentieon_dnascope_emit_mode, - params.sentieon_haplotyper_emit_mode, - params.seq_center, - params.seq_platform, - params.skip_tools, - params.step, - params.tools, - params.umi_read_structure, - params.wes) - - input_sample = SAMPLESHEET_TO_CHANNEL.out.input_sample - - // MULTIQC - ch_multiqc_config = Channel.fromPath("$projectDir/assets/multiqc_config.yml", checkIfExists: true) - ch_multiqc_custom_config = params.multiqc_config ? Channel.fromPath( params.multiqc_config, checkIfExists: true ) : Channel.empty() - ch_multiqc_logo = params.multiqc_logo ? Channel.fromPath( params.multiqc_logo, checkIfExists: true ) : Channel.empty() - ch_multiqc_custom_methods_description = params.multiqc_methods_description ? file(params.multiqc_methods_description, checkIfExists: true) : file("$projectDir/assets/methods_description_template.yml", checkIfExists: true) - - - // Download cache - if (params.download_cache) { - // Assuming that even if the cache is provided, if the user specify download_cache, sarek will download the cache - ensemblvep_info = Channel.of([ [ id:"${params.vep_cache_version}_${params.vep_genome}" ], params.vep_genome, params.vep_species, params.vep_cache_version ]) - snpeff_info = Channel.of([ [ id:"${params.snpeff_genome}.${params.snpeff_db}" ], params.snpeff_genome, params.snpeff_db ]) - DOWNLOAD_CACHE_SNPEFF_VEP(ensemblvep_info, snpeff_info) - snpeff_cache = DOWNLOAD_CACHE_SNPEFF_VEP.out.snpeff_cache - vep_cache = DOWNLOAD_CACHE_SNPEFF_VEP.out.ensemblvep_cache.map{ meta, cache -> [ cache ] } - - versions = versions.mix(DOWNLOAD_CACHE_SNPEFF_VEP.out.versions) - } else { - // Looks for cache information either locally or on the cloud - INITIALIZE_ANNOTATION_CACHE( - (params.snpeff_cache && params.tools && (params.tools.split(',').contains("snpeff") || params.tools.split(',').contains('merge'))), - params.snpeff_cache, - params.snpeff_genome, - params.snpeff_db, - (params.vep_cache && params.tools && (params.tools.split(',').contains("vep") || params.tools.split(',').contains('merge'))), - params.vep_cache, - params.vep_species, - params.vep_cache_version, - params.vep_genome, - "Please refer to https://nf-co.re/sarek/docs/usage/#how-to-customise-snpeff-and-vep-annotation for more information.") - - snpeff_cache = INITIALIZE_ANNOTATION_CACHE.out.snpeff_cache - vep_cache = INITIALIZE_ANNOTATION_CACHE.out.ensemblvep_cache - } - - // Build indices if needed - PREPARE_GENOME( - params.ascat_alleles, - params.ascat_loci, - params.ascat_loci_gc, - params.ascat_loci_rt, - bcftools_annotations, - params.chr_dir, - dbsnp, - fasta, - fasta_fai, - germline_resource, - known_indels, - known_snps, - pon) - - // Gather built indices or get them from the params - // Built from the fasta file: - dict = params.dict ? Channel.fromPath(params.dict).map{ it -> [ [id:'dict'], it ] }.collect() - : PREPARE_GENOME.out.dict - fasta_fai = params.fasta_fai ? Channel.fromPath(params.fasta_fai).first() - : PREPARE_GENOME.out.fasta_fai - bwa = params.bwa ? Channel.fromPath(params.bwa).collect() - : PREPARE_GENOME.out.bwa - bwamem2 = params.bwamem2 ? Channel.fromPath(params.bwamem2).collect() - : PREPARE_GENOME.out.bwamem2 - dragmap = params.dragmap ? Channel.fromPath(params.dragmap).collect() - : PREPARE_GENOME.out.hashtable - - // Gather index for mapping given the chosen aligner - index_alignement = (params.aligner == "bwa-mem" || params.aligner == "sentieon-bwamem") ? bwa : - params.aligner == "bwa-mem2" ? bwamem2 : - dragmap - - // TODO: add a params for msisensorpro_scan - msisensorpro_scan = PREPARE_GENOME.out.msisensorpro_scan - - // For ASCAT, extracted from zip or tar.gz files: - allele_files = PREPARE_GENOME.out.allele_files - chr_files = PREPARE_GENOME.out.chr_files - gc_file = PREPARE_GENOME.out.gc_file - loci_files = PREPARE_GENOME.out.loci_files - rt_file = PREPARE_GENOME.out.rt_file - - // Tabix indexed vcf files: - bcftools_annotations_tbi = params.bcftools_annotations ? params.bcftools_annotations_tbi ? Channel.fromPath(params.bcftools_annotations_tbi).collect() : PREPARE_GENOME.out.bcftools_annotations_tbi : Channel.empty([]) - dbsnp_tbi = params.dbsnp ? params.dbsnp_tbi ? Channel.fromPath(params.dbsnp_tbi).collect() : PREPARE_GENOME.out.dbsnp_tbi : Channel.value([]) - germline_resource_tbi = params.germline_resource ? params.germline_resource_tbi ? Channel.fromPath(params.germline_resource_tbi).collect() : PREPARE_GENOME.out.germline_resource_tbi : [] //do not change to Channel.value([]), the check for its existence then fails for Getpileupsumamries - known_indels_tbi = params.known_indels ? params.known_indels_tbi ? Channel.fromPath(params.known_indels_tbi).collect() : PREPARE_GENOME.out.known_indels_tbi : Channel.value([]) - known_snps_tbi = params.known_snps ? params.known_snps_tbi ? Channel.fromPath(params.known_snps_tbi).collect() : PREPARE_GENOME.out.known_snps_tbi : Channel.value([]) - pon_tbi = params.pon ? params.pon_tbi ? Channel.fromPath(params.pon_tbi).collect() : PREPARE_GENOME.out.pon_tbi : Channel.value([]) - - // known_sites is made by grouping both the dbsnp and the known snps/indels resources - // Which can either or both be optional - known_sites_indels = dbsnp.concat(known_indels).collect() - known_sites_indels_tbi = dbsnp_tbi.concat(known_indels_tbi).collect() - - known_sites_snps = dbsnp.concat(known_snps).collect() - known_sites_snps_tbi = dbsnp_tbi.concat(known_snps_tbi).collect() - - // Build intervals if needed - PREPARE_INTERVALS(fasta_fai, params.intervals, params.no_intervals, params.nucleotides_per_second, params.outdir, params.step) - - // Intervals for speed up preprocessing/variant calling by spread/gather - // [interval.bed] all intervals in one file - intervals_bed_combined = params.no_intervals ? Channel.value([]) : PREPARE_INTERVALS.out.intervals_bed_combined - intervals_bed_gz_tbi_combined = params.no_intervals ? Channel.value([]) : PREPARE_INTERVALS.out.intervals_bed_gz_tbi_combined - - // For QC during preprocessing, we don't need any intervals (MOSDEPTH doesn't take them for WGS) - intervals_for_preprocessing = params.wes ? - intervals_bed_combined.map{it -> [ [ id:it.baseName ], it ]}.collect() : - Channel.value([ [ id:'null' ], [] ]) - - intervals = PREPARE_INTERVALS.out.intervals_bed // [ interval, num_intervals ] multiple interval.bed files, divided by useful intervals for scatter/gather - intervals_bed_gz_tbi = PREPARE_INTERVALS.out.intervals_bed_gz_tbi // [ interval_bed, tbi, num_intervals ] multiple interval.bed.gz/.tbi files, divided by useful intervals for scatter/gather - - intervals_and_num_intervals = intervals.map{ interval, num_intervals -> - if ( num_intervals < 1 ) [ [], num_intervals ] - else [ interval, num_intervals ] - } - - intervals_bed_gz_tbi_and_num_intervals = intervals_bed_gz_tbi.map{ intervals, num_intervals -> - if ( num_intervals < 1 ) [ [], [], num_intervals ] - else [ intervals[0], intervals[1], num_intervals ] - } - - if (params.tools && params.tools.split(',').contains('cnvkit')) { - if (params.cnvkit_reference) { - cnvkit_reference = Channel.fromPath(params.cnvkit_reference).collect() - } else { - PREPARE_REFERENCE_CNVKIT(fasta, intervals_bed_combined) - cnvkit_reference = PREPARE_REFERENCE_CNVKIT.out.cnvkit_reference - - versions = versions.mix(PREPARE_REFERENCE_CNVKIT.out.versions) - } - } else { - cnvkit_reference = Channel.value([]) - } - - // Gather used softwares versions - versions = versions.mix(PREPARE_GENOME.out.versions) - versions = versions.mix(PREPARE_INTERVALS.out.versions) + multiqc_report = Channel.empty() + reports = Channel.empty() + versions = Channel.empty() // PREPROCESSING @@ -903,12 +634,12 @@ workflow SAREK { if (params.step == 'annotate') cram_variant_calling = Channel.empty() - // RUN CRAM QC on the recalibrated CRAM files or when starting from step variant calling. NGSCheckmate should be run also on non-recalibrated CRAM files - CRAM_SAMPLEQC(cram_variant_calling, - ngscheckmate_bed, - fasta, - params.skip_tools && params.skip_tools.split(',').contains('baserecalibrator'), - intervals_for_preprocessing) + // RUN CRAM QC on the recalibrated CRAM files or when starting from step variant calling. NGSCheckmate should be run also on non-recalibrated CRAM files + CRAM_SAMPLEQC(cram_variant_calling, + ngscheckmate_bed, + fasta, + params.skip_tools && params.skip_tools.split(',').contains('baserecalibrator'), + intervals_for_preprocessing) if (params.tools) { @@ -988,7 +719,7 @@ workflow SAREK { intervals_and_num_intervals, intervals_bed_combined, // [] if no_intervals, else interval_bed_combined.bed, intervals_bed_gz_tbi_combined, // [] if no_intervals, else interval_bed_combined_gz, interval_bed_combined_gz_tbi - PREPARE_INTERVALS.out.intervals_bed_combined, // no_intervals.bed if no intervals, else interval_bed_combined.bed; Channel operations possible + intervals_bed_combined_for_variant_calling, // no_intervals.bed if no intervals, else interval_bed_combined.bed; Channel operations possible intervals_bed_gz_tbi_and_num_intervals, known_indels_vqsr, known_sites_indels,