diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index acac15a769..53d55183c0 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -61,6 +61,8 @@ jobs:
- tags: "sentieon/haplotyper"
- tags: "sentieon/haplotyper_joint_germline"
- tags: "sentieon/haplotyper_skip_filter"
+ - NXF_VER: "latest-everything"
+ tags: "joint_germline"
env:
NXF_ANSI_LOG: false
TEST_DATA_BASE: "${{ github.workspace }}/test-datasets"
@@ -217,7 +219,6 @@ jobs:
- tags: "bwamem2/mem"
- tags: "cat/cat"
- tags: "cat/fastq"
- - tags: "custom/dumpsoftwareversions"
- tags: "dragmap/align"
- tags: "fastp"
- tags: "fastqc"
@@ -230,8 +231,8 @@ jobs:
- tags: "multiqc"
- tags: "samtools/mpileup"
- tags: "samtools/stats"
- - tags: "untar"
- tags: "subworkflows/utils_nfvalidation_plugin"
+ - tags: "untar"
env:
NXF_ANSI_LOG: false
TEST_DATA_BASE: "${{ github.workspace }}/test-datasets"
diff --git a/CHANGELOG.md b/CHANGELOG.md
index c3cab8fd7f..92b8e51ce3 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -25,9 +25,11 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- [#1360](https://github.com/nf-core/sarek/pull/1360) - Sync `TEMPLATE` with `tools` `2.11`
- [#1408](https://github.com/nf-core/sarek/pull/1408), [#1412](https://github.com/nf-core/sarek/pull/1412) - Updating samtools to v1.19.2 - except in GATK/markduplicates. (Temporarily disabled nf-test for bwamem2/mem.)
- [#1411](https://github.com/nf-core/sarek/pull/1411) - Temporarily disable sentieon related tests
+- [#1414](https://github.com/nf-core/sarek/pull/1414) - Sync `TEMPLATE` with `tools` `2.13`
- [#1419](https://github.com/nf-core/sarek/pull/1419) - Updating GATK to v4.5, and updating samtools to v1.19.2 in GATK/markduplicates.
- [#1426](https://github.com/nf-core/sarek/pull/1426) - Updating certain modules in order to fix the testdata-path in the nf-tests of those modules. Setting Docker runOptions for params.use_gatk_spark.
- [#1428](https://github.com/nf-core/sarek/pull/1428) - Sync `TEMPLATE` with `tools` `2.13.1`
+- [#1422](https://github.com/nf-core/sarek/pull/1422) - Refactoring following `TEMPLATE` sync with `tools` `2.13`
- [#1431](https://github.com/nf-core/sarek/pull/1431) - Using docker.containerOptions instead of docker.runOptions. Clearing containerOptions for SPARK modules for any kind of supported container engine.
### Fixed
@@ -43,6 +45,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- [#1390](https://github.com/nf-core/sarek/pull/1390) - Fix badges in README
- [#1400](https://github.com/nf-core/sarek/pull/1400) - Fixed input channel for ASSESS_SIGNIFICANCE module, updated makegraph to makegraph2.
- [#1403](https://github.com/nf-core/sarek/pull/1403) - Fix intervals usage with dot in chromosome names
+- [#1407](https://github.com/nf-core/sarek/pull/1407) - Fix CI tests name
+- [#1420](https://github.com/nf-core/sarek/pull/1420) - Make `-a` a default argument for `bcftools` concat
+- [#1422](https://github.com/nf-core/sarek/pull/1422) - Fix `Cannot serialize context map` warning
### Removed
diff --git a/lib/NfcoreTemplate.groovy b/lib/NfcoreTemplate.groovy
deleted file mode 100755
index 755ee64d44..0000000000
--- a/lib/NfcoreTemplate.groovy
+++ /dev/null
@@ -1,364 +0,0 @@
-//
-// This file holds several functions used within the nf-core pipeline template.
-//
-
-import org.yaml.snakeyaml.Yaml
-import groovy.json.JsonOutput
-import nextflow.extension.FilesEx
-
-class NfcoreTemplate {
-
- //
- // Check AWS Batch related parameters have been specified correctly
- //
- public static void awsBatch(workflow, params) {
- if (workflow.profile.contains('awsbatch')) {
- // Check params.awsqueue and params.awsregion have been set if running on AWSBatch
- assert (params.awsqueue && params.awsregion) : "Specify correct --awsqueue and --awsregion parameters on AWSBatch!"
- // Check outdir paths to be S3 buckets if running on AWSBatch
- assert params.outdir.startsWith('s3:') : "Outdir not on S3 - specify S3 Bucket to run on AWSBatch!"
- }
- }
-
- //
- // Warn if a -profile or Nextflow config has not been provided to run the pipeline
- //
- public static void checkConfigProvided(workflow, log) {
- if (workflow.profile == 'standard' && workflow.configFiles.size() <= 1) {
- log.warn "[$workflow.manifest.name] You are attempting to run the pipeline without any custom configuration!\n\n" +
- "This will be dependent on your local compute environment but can be achieved via one or more of the following:\n" +
- " (1) Using an existing pipeline profile e.g. `-profile docker` or `-profile singularity`\n" +
- " (2) Using an existing nf-core/configs for your Institution e.g. `-profile crick` or `-profile uppmax`\n" +
- " (3) Using your own local custom config e.g. `-c /path/to/your/custom.config`\n\n" +
- "Please refer to the quick start section and usage docs for the pipeline.\n "
- }
- }
-
- //
- // Generate version string
- //
- public static String version(workflow) {
- String version_string = ""
-
- if (workflow.manifest.version) {
- def prefix_v = workflow.manifest.version[0] != 'v' ? 'v' : ''
- version_string += "${prefix_v}${workflow.manifest.version}"
- }
-
- if (workflow.commitId) {
- def git_shortsha = workflow.commitId.substring(0, 7)
- version_string += "-g${git_shortsha}"
- }
-
- return version_string
- }
-
- //
- // Construct and send completion email
- //
- public static void email(workflow, params, summary_params, projectDir, log, multiqc_report=[]) {
-
- // Set up the e-mail variables
- def subject = "[$workflow.manifest.name] Successful: $workflow.runName"
- if (!workflow.success) {
- subject = "[$workflow.manifest.name] FAILED: $workflow.runName"
- }
-
- def summary = [:]
- for (group in summary_params.keySet()) {
- summary << summary_params[group]
- }
-
- def misc_fields = [:]
- misc_fields['Date Started'] = workflow.start
- misc_fields['Date Completed'] = workflow.complete
- misc_fields['Pipeline script file path'] = workflow.scriptFile
- misc_fields['Pipeline script hash ID'] = workflow.scriptId
- if (workflow.repository) misc_fields['Pipeline repository Git URL'] = workflow.repository
- if (workflow.commitId) misc_fields['Pipeline repository Git Commit'] = workflow.commitId
- if (workflow.revision) misc_fields['Pipeline Git branch/tag'] = workflow.revision
- misc_fields['Nextflow Version'] = workflow.nextflow.version
- misc_fields['Nextflow Build'] = workflow.nextflow.build
- misc_fields['Nextflow Compile Timestamp'] = workflow.nextflow.timestamp
-
- def email_fields = [:]
- email_fields['version'] = NfcoreTemplate.version(workflow)
- email_fields['runName'] = workflow.runName
- email_fields['success'] = workflow.success
- email_fields['dateComplete'] = workflow.complete
- email_fields['duration'] = workflow.duration
- email_fields['exitStatus'] = workflow.exitStatus
- email_fields['errorMessage'] = (workflow.errorMessage ?: 'None')
- email_fields['errorReport'] = (workflow.errorReport ?: 'None')
- email_fields['commandLine'] = workflow.commandLine
- email_fields['projectDir'] = workflow.projectDir
- email_fields['summary'] = summary << misc_fields
-
- // On success try attach the multiqc report
- def mqc_report = null
- try {
- if (workflow.success) {
- mqc_report = multiqc_report.getVal()
- if (mqc_report.getClass() == ArrayList && mqc_report.size() >= 1) {
- if (mqc_report.size() > 1) {
- log.warn "[$workflow.manifest.name] Found multiple reports from process 'MULTIQC', will use only one"
- }
- mqc_report = mqc_report[0]
- }
- }
- } catch (all) {
- if (multiqc_report) {
- log.warn "[$workflow.manifest.name] Could not attach MultiQC report to summary email"
- }
- }
-
- // Check if we are only sending emails on failure
- def email_address = params.email
- if (!params.email && params.email_on_fail && !workflow.success) {
- email_address = params.email_on_fail
- }
-
- // Render the TXT template
- def engine = new groovy.text.GStringTemplateEngine()
- def tf = new File("$projectDir/assets/email_template.txt")
- def txt_template = engine.createTemplate(tf).make(email_fields)
- def email_txt = txt_template.toString()
-
- // Render the HTML template
- def hf = new File("$projectDir/assets/email_template.html")
- def html_template = engine.createTemplate(hf).make(email_fields)
- def email_html = html_template.toString()
-
- // Render the sendmail template
- def max_multiqc_email_size = (params.containsKey('max_multiqc_email_size') ? params.max_multiqc_email_size : 0) as nextflow.util.MemoryUnit
- def smail_fields = [ email: email_address, subject: subject, email_txt: email_txt, email_html: email_html, projectDir: "$projectDir", mqcFile: mqc_report, mqcMaxSize: max_multiqc_email_size.toBytes() ]
- def sf = new File("$projectDir/assets/sendmail_template.txt")
- def sendmail_template = engine.createTemplate(sf).make(smail_fields)
- def sendmail_html = sendmail_template.toString()
-
- // Send the HTML e-mail
- Map colors = logColours(params.monochrome_logs)
- if (email_address) {
- try {
- if (params.plaintext_email) { throw GroovyException('Send plaintext e-mail, not HTML') }
- // Try to send HTML e-mail using sendmail
- def sendmail_tf = new File(workflow.launchDir.toString(), ".sendmail_tmp.html")
- sendmail_tf.withWriter { w -> w << sendmail_html }
- [ 'sendmail', '-t' ].execute() << sendmail_html
- log.info "-${colors.purple}[$workflow.manifest.name]${colors.green} Sent summary e-mail to $email_address (sendmail)-"
- } catch (all) {
- // Catch failures and try with plaintext
- def mail_cmd = [ 'mail', '-s', subject, '--content-type=text/html', email_address ]
- if ( mqc_report != null && mqc_report.size() <= max_multiqc_email_size.toBytes() ) {
- mail_cmd += [ '-A', mqc_report ]
- }
- mail_cmd.execute() << email_html
- log.info "-${colors.purple}[$workflow.manifest.name]${colors.green} Sent summary e-mail to $email_address (mail)-"
- }
- }
-
- // Write summary e-mail HTML to a file
- def output_hf = new File(workflow.launchDir.toString(), ".pipeline_report.html")
- output_hf.withWriter { w -> w << email_html }
- FilesEx.copyTo(output_hf.toPath(), "${params.outdir}/pipeline_info/pipeline_report.html");
- output_hf.delete()
-
- // Write summary e-mail TXT to a file
- def output_tf = new File(workflow.launchDir.toString(), ".pipeline_report.txt")
- output_tf.withWriter { w -> w << email_txt }
- FilesEx.copyTo(output_tf.toPath(), "${params.outdir}/pipeline_info/pipeline_report.txt");
- output_tf.delete()
- }
-
- //
- // Construct and send a notification to a web server as JSON
- // e.g. Microsoft Teams and Slack
- //
- public static void IM_notification(workflow, params, summary_params, projectDir, log) {
- def hook_url = params.hook_url
-
- def summary = [:]
- for (group in summary_params.keySet()) {
- summary << summary_params[group]
- }
-
- def misc_fields = [:]
- misc_fields['start'] = workflow.start
- misc_fields['complete'] = workflow.complete
- misc_fields['scriptfile'] = workflow.scriptFile
- misc_fields['scriptid'] = workflow.scriptId
- if (workflow.repository) misc_fields['repository'] = workflow.repository
- if (workflow.commitId) misc_fields['commitid'] = workflow.commitId
- if (workflow.revision) misc_fields['revision'] = workflow.revision
- misc_fields['nxf_version'] = workflow.nextflow.version
- misc_fields['nxf_build'] = workflow.nextflow.build
- misc_fields['nxf_timestamp'] = workflow.nextflow.timestamp
-
- def msg_fields = [:]
- msg_fields['version'] = NfcoreTemplate.version(workflow)
- msg_fields['runName'] = workflow.runName
- msg_fields['success'] = workflow.success
- msg_fields['dateComplete'] = workflow.complete
- msg_fields['duration'] = workflow.duration
- msg_fields['exitStatus'] = workflow.exitStatus
- msg_fields['errorMessage'] = (workflow.errorMessage ?: 'None')
- msg_fields['errorReport'] = (workflow.errorReport ?: 'None')
- msg_fields['commandLine'] = workflow.commandLine.replaceFirst(/ +--hook_url +[^ ]+/, "")
- msg_fields['projectDir'] = workflow.projectDir
- msg_fields['summary'] = summary << misc_fields
-
- // Render the JSON template
- def engine = new groovy.text.GStringTemplateEngine()
- // Different JSON depending on the service provider
- // Defaults to "Adaptive Cards" (https://adaptivecards.io), except Slack which has its own format
- def json_path = hook_url.contains("hooks.slack.com") ? "slackreport.json" : "adaptivecard.json"
- def hf = new File("$projectDir/assets/${json_path}")
- def json_template = engine.createTemplate(hf).make(msg_fields)
- def json_message = json_template.toString()
-
- // POST
- def post = new URL(hook_url).openConnection();
- post.setRequestMethod("POST")
- post.setDoOutput(true)
- post.setRequestProperty("Content-Type", "application/json")
- post.getOutputStream().write(json_message.getBytes("UTF-8"));
- def postRC = post.getResponseCode();
- if (! postRC.equals(200)) {
- log.warn(post.getErrorStream().getText());
- }
- }
-
- //
- // Dump pipeline parameters in a json file
- //
- public static void dump_parameters(workflow, params) {
- def timestamp = new java.util.Date().format( 'yyyy-MM-dd_HH-mm-ss')
- def filename = "params_${timestamp}.json"
- def temp_pf = new File(workflow.launchDir.toString(), ".${filename}")
- def jsonStr = JsonOutput.toJson(params)
- temp_pf.text = JsonOutput.prettyPrint(jsonStr)
-
- FilesEx.copyTo(temp_pf.toPath(), "${params.outdir}/pipeline_info/params_${timestamp}.json")
- temp_pf.delete()
- }
-
-
- //
- // Print pipeline summary on completion
- //
- public static void summary(workflow, params, log) {
- Map colors = logColours(params.monochrome_logs)
- if (workflow.success) {
- if (workflow.stats.ignoredCount == 0) {
- log.info "-${colors.purple}[$workflow.manifest.name]${colors.green} Pipeline completed successfully${colors.reset}-"
- } else {
- log.info "-${colors.purple}[$workflow.manifest.name]${colors.yellow} Pipeline completed successfully, but with errored process(es) ${colors.reset}-"
- }
- } else {
- log.info "-${colors.purple}[$workflow.manifest.name]${colors.red} Pipeline completed with errors${colors.reset}-"
- }
- }
-
- //
- // ANSII Colours used for terminal logging
- //
- public static Map logColours(Boolean monochrome_logs) {
- Map colorcodes = [:]
-
- // Reset / Meta
- colorcodes['reset'] = monochrome_logs ? '' : "\033[0m"
- colorcodes['bold'] = monochrome_logs ? '' : "\033[1m"
- colorcodes['dim'] = monochrome_logs ? '' : "\033[2m"
- colorcodes['underlined'] = monochrome_logs ? '' : "\033[4m"
- colorcodes['blink'] = monochrome_logs ? '' : "\033[5m"
- colorcodes['reverse'] = monochrome_logs ? '' : "\033[7m"
- colorcodes['hidden'] = monochrome_logs ? '' : "\033[8m"
-
- // Regular Colors
- colorcodes['black'] = monochrome_logs ? '' : "\033[0;30m"
- colorcodes['red'] = monochrome_logs ? '' : "\033[0;31m"
- colorcodes['green'] = monochrome_logs ? '' : "\033[0;32m"
- colorcodes['yellow'] = monochrome_logs ? '' : "\033[0;33m"
- colorcodes['blue'] = monochrome_logs ? '' : "\033[0;34m"
- colorcodes['purple'] = monochrome_logs ? '' : "\033[0;35m"
- colorcodes['cyan'] = monochrome_logs ? '' : "\033[0;36m"
- colorcodes['white'] = monochrome_logs ? '' : "\033[0;37m"
-
- // Bold
- colorcodes['bblack'] = monochrome_logs ? '' : "\033[1;30m"
- colorcodes['bred'] = monochrome_logs ? '' : "\033[1;31m"
- colorcodes['bgreen'] = monochrome_logs ? '' : "\033[1;32m"
- colorcodes['byellow'] = monochrome_logs ? '' : "\033[1;33m"
- colorcodes['bblue'] = monochrome_logs ? '' : "\033[1;34m"
- colorcodes['bpurple'] = monochrome_logs ? '' : "\033[1;35m"
- colorcodes['bcyan'] = monochrome_logs ? '' : "\033[1;36m"
- colorcodes['bwhite'] = monochrome_logs ? '' : "\033[1;37m"
-
- // Underline
- colorcodes['ublack'] = monochrome_logs ? '' : "\033[4;30m"
- colorcodes['ured'] = monochrome_logs ? '' : "\033[4;31m"
- colorcodes['ugreen'] = monochrome_logs ? '' : "\033[4;32m"
- colorcodes['uyellow'] = monochrome_logs ? '' : "\033[4;33m"
- colorcodes['ublue'] = monochrome_logs ? '' : "\033[4;34m"
- colorcodes['upurple'] = monochrome_logs ? '' : "\033[4;35m"
- colorcodes['ucyan'] = monochrome_logs ? '' : "\033[4;36m"
- colorcodes['uwhite'] = monochrome_logs ? '' : "\033[4;37m"
-
- // High Intensity
- colorcodes['iblack'] = monochrome_logs ? '' : "\033[0;90m"
- colorcodes['ired'] = monochrome_logs ? '' : "\033[0;91m"
- colorcodes['igreen'] = monochrome_logs ? '' : "\033[0;92m"
- colorcodes['iyellow'] = monochrome_logs ? '' : "\033[0;93m"
- colorcodes['iblue'] = monochrome_logs ? '' : "\033[0;94m"
- colorcodes['ipurple'] = monochrome_logs ? '' : "\033[0;95m"
- colorcodes['icyan'] = monochrome_logs ? '' : "\033[0;96m"
- colorcodes['iwhite'] = monochrome_logs ? '' : "\033[0;97m"
-
- // Bold High Intensity
- colorcodes['biblack'] = monochrome_logs ? '' : "\033[1;90m"
- colorcodes['bired'] = monochrome_logs ? '' : "\033[1;91m"
- colorcodes['bigreen'] = monochrome_logs ? '' : "\033[1;92m"
- colorcodes['biyellow'] = monochrome_logs ? '' : "\033[1;93m"
- colorcodes['biblue'] = monochrome_logs ? '' : "\033[1;94m"
- colorcodes['bipurple'] = monochrome_logs ? '' : "\033[1;95m"
- colorcodes['bicyan'] = monochrome_logs ? '' : "\033[1;96m"
- colorcodes['biwhite'] = monochrome_logs ? '' : "\033[1;97m"
-
- return colorcodes
- }
-
- //
- // Does what is says on the tin
- //
- public static String dashedLine(monochrome_logs) {
- Map colors = logColours(monochrome_logs)
- return "-${colors.dim}----------------------------------------------------${colors.reset}-"
- }
-
- //
- // nf-core logo
- //
- public static String logo(workflow, monochrome_logs) {
- Map colors = logColours(monochrome_logs)
- String workflow_version = NfcoreTemplate.version(workflow)
- String.format(
- """\n
- ${dashedLine(monochrome_logs)}
- ${colors.green},--.${colors.black}/${colors.green},-.${colors.reset}
- ${colors.blue} ___ __ __ __ ___ ${colors.green}/,-._.--~\'${colors.reset}
- ${colors.blue} |\\ | |__ __ / ` / \\ |__) |__ ${colors.yellow}} {${colors.reset}
- ${colors.blue} | \\| | \\__, \\__/ | \\ |___ ${colors.green}\\`-._,-`-,${colors.reset}
- ${colors.green}`._,._,\'${colors.reset}
- ${colors.white} ____${colors.reset}
- ${colors.white} .´ _ `.${colors.reset}
- ${colors.white} / ${colors.green}|\\${colors.reset}`-_ \\${colors.reset} ${colors.blue} __ __ ___ ${colors.reset}
- ${colors.white} | ${colors.green}| \\${colors.reset} `-|${colors.reset} ${colors.blue}|__` /\\ |__) |__ |__/${colors.reset}
- ${colors.white} \\ ${colors.green}| \\${colors.reset} /${colors.reset} ${colors.blue}.__| /¯¯\\ | \\ |___ | \\${colors.reset}
- ${colors.white} `${colors.green}|${colors.reset}____${colors.green}\\${colors.reset}´${colors.reset}
-
- ${colors.purple} ${workflow.manifest.name} ${workflow_version}${colors.reset}
- ${dashedLine(monochrome_logs)}
- """.stripIndent()
- )
- }
-}
diff --git a/lib/WorkflowMain.groovy b/lib/WorkflowMain.groovy
deleted file mode 100755
index 847986c3da..0000000000
--- a/lib/WorkflowMain.groovy
+++ /dev/null
@@ -1,71 +0,0 @@
-//
-// This file holds several functions specific to the main.nf workflow in the nf-core/sarek pipeline
-//
-
-import nextflow.Nextflow
-
-class WorkflowMain {
-
- //
- // Citation string for pipeline
- //
- public static String citation(workflow) {
- return "If you use ${workflow.manifest.name} for your analysis please cite:\n\n" +
- "* The pipeline\n" +
- " https://doi.org/10.12688/f1000research.16665.2\n" +
- " https://doi.org/10.5281/zenodo.3476425\n\n" +
- "* The nf-core framework\n" +
- " https://doi.org/10.1038/s41587-020-0439-x\n\n" +
- "* Software dependencies\n" +
- " https://github.com/${workflow.manifest.name}/blob/master/CITATIONS.md"
- }
-
- //
- // Validate parameters and print summary to screen
- //
- public static void initialise(workflow, params, log, args) {
-
- // Print workflow version and exit on --version
- if (params.version) {
- String workflow_version = NfcoreTemplate.version(workflow)
- log.info "${workflow.manifest.name} ${workflow_version}"
- System.exit(0)
- }
-
- // Check that a -profile or Nextflow config has been provided to run the pipeline
- NfcoreTemplate.checkConfigProvided(workflow, log)
- // Check that the profile doesn't contain spaces and doesn't end with a trailing comma
- checkProfile(workflow.profile, args, log)
-
- // Check AWS batch settings
- NfcoreTemplate.awsBatch(workflow, params)
-
- // Warn that no input was provided
- if (!params.input && !params.build_only_index) {
- log.warn "No samplesheet specified, attempting to restart from csv files present in ${params.outdir}"
- }
- }
- //
- // Get attribute from genome config file e.g. fasta
- //
- public static Object getGenomeAttribute(params, attribute) {
- if (params.genomes && params.genome && params.genomes.containsKey(params.genome)) {
- if (params.genomes[ params.genome ].containsKey(attribute)) {
- return params.genomes[ params.genome ][ attribute ]
- }
- }
- return null
- }
-
- //
- // Exit pipeline if --profile contains spaces
- //
- private static void checkProfile(profile, args, log) {
- if (profile.endsWith(',')) {
- Nextflow.error "Profile cannot end with a trailing comma. Please remove the comma from the end of the profile string.\nHint: A common mistake is to provide multiple values to `-profile` separated by spaces. Please use commas to separate profiles instead,e.g., `-profile docker,test`."
- }
- if (args[0]) {
- log.warn "nf-core pipelines do not accept positional arguments. The positional argument `${args[0]}` has been detected.\n Hint: A common mistake is to provide multiple values to `-profile` separated by spaces. Please use commas to separate profiles instead,e.g., `-profile docker,test`."
- }
- }
-}
diff --git a/lib/WorkflowSarek.groovy b/lib/WorkflowSarek.groovy
deleted file mode 100755
index 12f001c251..0000000000
--- a/lib/WorkflowSarek.groovy
+++ /dev/null
@@ -1,149 +0,0 @@
-//
-// This file holds several functions specific to the workflow/sarek.nf in the nf-core/sarek pipeline
-//
-
-import nextflow.Nextflow
-import groovy.text.SimpleTemplateEngine
-
-class WorkflowSarek {
-
- //
- // Check and validate parameters
- //
- public static void initialise(params, log) {
-
- genomeExistsError(params, log)
-
- if (!params.fasta && params.step == 'annotate') {
- Nextflow.error "Genome fasta file not specified with e.g. '--fasta genome.fa' or via a detectable config file."
- }
- }
-
- //
- // Get workflow summary for MultiQC
- //
- public static String paramsSummaryMultiqc(workflow, summary) {
- String summary_section = ''
- for (group in summary.keySet()) {
- def group_params = summary.get(group) // This gets the parameters of that particular group
- if (group_params) {
- summary_section += "
$group
\n"
- summary_section += " \n"
- for (param in group_params.keySet()) {
- summary_section += " - $param
- ${group_params.get(param) ?: 'N/A'}
\n"
- }
- summary_section += "
\n"
- }
- }
-
- String yaml_file_text = "id: '${workflow.manifest.name.replace('/','-')}-summary'\n"
- yaml_file_text += "description: ' - this information is collected when the pipeline is started.'\n"
- yaml_file_text += "section_name: '${workflow.manifest.name} Workflow Summary'\n"
- yaml_file_text += "section_href: 'https://github.com/${workflow.manifest.name}'\n"
- yaml_file_text += "plot_type: 'html'\n"
- yaml_file_text += "data: |\n"
- yaml_file_text += "${summary_section}"
- return yaml_file_text
- }
-
- //
- // Generate methods description for MultiQC
- //
-
- public static String toolCitationText(params) {
-
- // TODO nf-core: Optionally add in-text citation tools to this list.
- // Can use ternary operators to dynamically construct based conditions, e.g. params["run_xyz"] ? "Tool (Foo et al. 2023)" : "",
- // Uncomment function in methodsDescriptionText to render in MultiQC report
- def citation_text = [
- "Tools used in the workflow included:",
- "FastQC (Andrews 2010),",
- "MultiQC (Ewels et al. 2016)",
- "."
- ].join(' ').trim()
-
- return citation_text
- }
-
- public static String toolBibliographyText(params) {
-
- // TODO Optionally add bibliographic entries to this list.
- // Can use ternary operators to dynamically construct based conditions, e.g. params["run_xyz"] ? "Author (2023) Pub name, Journal, DOI" : "",
- // Uncomment function in methodsDescriptionText to render in MultiQC report
- def reference_text = [
- "Andrews S, (2010) FastQC, URL: https://www.bioinformatics.babraham.ac.uk/projects/fastqc/).",
- "Ewels, P., Magnusson, M., Lundin, S., & Käller, M. (2016). MultiQC: summarize analysis results for multiple tools and samples in a single report. Bioinformatics , 32(19), 3047–3048. doi: /10.1093/bioinformatics/btw354"
- ].join(' ').trim()
-
- return reference_text
- }
-
- public static String methodsDescriptionText(run_workflow, mqc_methods_yaml, params) {
- // Convert to a named map so can be used as with familar NXF ${workflow} variable syntax in the MultiQC YML file
- def meta = [:]
- meta.workflow = run_workflow.toMap()
- meta["manifest_map"] = run_workflow.manifest.toMap()
-
- // Pipeline DOI
- meta["doi_text"] = meta.manifest_map.doi ? "(doi: ${meta.manifest_map.doi})" : ""
- meta["nodoi_text"] = meta.manifest_map.doi ? "": "If available, make sure to update the text to include the Zenodo DOI of version of the pipeline used. "
-
- // Tool references
- meta["tool_citations"] = ""
- meta["tool_bibliography"] = ""
-
- // TODO Only uncomment below if logic in toolCitationText/toolBibliographyText has been filled!
- //meta["tool_citations"] = toolCitationText(params).replaceAll(", \\.", ".").replaceAll("\\. \\.", ".").replaceAll(", \\.", ".")
- //meta["tool_bibliography"] = toolBibliographyText(params)
-
- def methods_text = mqc_methods_yaml.text
-
- def engine = new SimpleTemplateEngine()
- def description_html = engine.createTemplate(methods_text).make(meta)
-
- return description_html
- }
-
- //
- // Exit pipeline if incorrect --genome key provided
- //
- private static void genomeExistsError(params, log) {
- if (params.genomes && params.genome && !params.genomes.containsKey(params.genome)) {
- def error_string = "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n" +
- " Genome '${params.genome}' not found in any config files provided to the pipeline.\n" +
- " Currently, the available genome keys are:\n" +
- " ${params.genomes.keySet().join(", ")}\n" +
- "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~"
- Nextflow.error(error_string)
- }
- }
-
- public static String retrieveInput(params, log){
- def input = null
- if (!params.input && !params.build_only_index) {
- switch (params.step) {
- case 'mapping': Nextflow.error("Can't start with step $params.step without samplesheet")
- break
- case 'markduplicates': log.warn("Using file ${params.outdir}/csv/mapped.csv");
- input = params.outdir + "/csv/mapped.csv"
- break
- case 'prepare_recalibration': log.warn("Using file ${params.outdir}/csv/markduplicates_no_table.csv");
- input = params.outdir + "/csv/markduplicates_no_table.csv"
- break
- case 'recalibrate': log.warn("Using file ${params.outdir}/csv/markduplicates.csv");
- input = params.outdir + "/csv/markduplicates.csv"
- break
- case 'variant_calling': log.warn("Using file ${params.outdir}/csv/recalibrated.csv");
- input = params.outdir + "/csv/recalibrated.csv"
- break
- // case 'controlfreec': csv_file = file("${params.outdir}/variant_calling/csv/control-freec_mpileup.csv", checkIfExists: true); break
- case 'annotate': log.warn("Using file ${params.outdir}/csv/variantcalled.csv");
- input = params.outdir + "/csv/variantcalled.csv"
- break
- default: log.warn("Please provide an input samplesheet to the pipeline e.g. '--input samplesheet.csv'")
- Nextflow.error("Unknown step $params.step")
- }
- }
- return input
- }
-}
diff --git a/main.nf b/main.nf
old mode 100644
new mode 100755
index d1f17e3e95..f09af42f58
--- a/main.nf
+++ b/main.nf
@@ -20,20 +20,6 @@
*/
nextflow.enable.dsl = 2
-
-/*
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
- IMPORT FUNCTIONS / MODULES / SUBWORKFLOWS / WORKFLOWS
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-*/
-
-include { SAREK } from './workflows/sarek'
-include { PIPELINE_INITIALISATION } from './subworkflows/local/utils_nfcore_sarek_pipeline'
-include { PIPELINE_COMPLETION } from './subworkflows/local/utils_nfcore_sarek_pipeline'
-include { getGenomeAttribute } from './subworkflows/local/utils_nfcore_sarek_pipeline'
-include { paramsHelp } from 'plugin/nf-validation'
-include { validateParameters } from 'plugin/nf-validation'
-
/*
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
GENOME PARAMETER VALUES
@@ -75,36 +61,62 @@ params.vep_cache_version = getGenomeAttribute('vep_cache_version')
params.vep_genome = getGenomeAttribute('vep_genome')
params.vep_species = getGenomeAttribute('vep_species')
+aligner = params.aligner
+
/*
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
- ALTERNATIVE INPUT FILE ON RESTART
+ IMPORT FUNCTIONS / MODULES / SUBWORKFLOWS / WORKFLOWS
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
*/
-params.input_restart = WorkflowSarek.retrieveInput(params, log)
+include { SAREK } from './workflows/sarek'
+include { ANNOTATION_CACHE_INITIALISATION } from './subworkflows/local/annotation_cache_initialisation'
+include { DOWNLOAD_CACHE_SNPEFF_VEP } from './subworkflows/local/download_cache_snpeff_vep'
+include { PIPELINE_COMPLETION } from './subworkflows/local/utils_nfcore_sarek_pipeline'
+include { PIPELINE_INITIALISATION } from './subworkflows/local/utils_nfcore_sarek_pipeline'
+include { PREPARE_GENOME } from './subworkflows/local/prepare_genome'
+include { PREPARE_INTERVALS } from './subworkflows/local/prepare_intervals'
+include { PREPARE_REFERENCE_CNVKIT } from './subworkflows/local/prepare_reference_cnvkit'
-/*
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
- NAMED WORKFLOWS FOR PIPELINE
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-*/
+// Initialize file channels based on params, defined in the params.genomes[params.genome] scope
+bcftools_annotations = params.bcftools_annotations ? Channel.fromPath(params.bcftools_annotations).collect() : Channel.empty()
+bcftools_header_lines = params.bcftools_header_lines ? Channel.fromPath(params.bcftools_header_lines).collect() : Channel.empty()
+cf_chrom_len = params.cf_chrom_len ? Channel.fromPath(params.cf_chrom_len) : []
+dbsnp = params.dbsnp ? Channel.fromPath(params.dbsnp).collect() : Channel.value([])
+fasta = params.fasta ? Channel.fromPath(params.fasta).collect() : Channel.empty()
+fasta_fai = params.fasta_fai ? Channel.fromPath(params.fasta_fai).collect() : Channel.empty()
+germline_resource = params.germline_resource ? Channel.fromPath(params.germline_resource) : Channel.value([]) // Mutect2 does not require a germline resource, so set to optional input
+known_indels = params.known_indels ? Channel.fromPath(params.known_indels).collect() : Channel.value([])
+known_snps = params.known_snps ? Channel.fromPath(params.known_snps).collect() : Channel.value([])
+mappability = params.mappability ? Channel.fromPath(params.mappability) : Channel.value([])
+pon = params.pon ? Channel.fromPath(params.pon) : Channel.value([]) // PON is optional for Mutect2 (but highly recommended)
+sentieon_dnascope_model = params.sentieon_dnascope_model ? Channel.fromPath(params.sentieon_dnascope_model) : Channel.value([])
+// Initialize value channels based on params, defined in the params.genomes[params.genome] scope
+ascat_genome = params.ascat_genome ?: Channel.empty()
+dbsnp_vqsr = params.dbsnp_vqsr ? Channel.value(params.dbsnp_vqsr) : Channel.empty()
+known_indels_vqsr = params.known_indels_vqsr ? Channel.value(params.known_indels_vqsr) : Channel.empty()
+known_snps_vqsr = params.known_snps_vqsr ? Channel.value(params.known_snps_vqsr) : Channel.empty()
+ngscheckmate_bed = params.ngscheckmate_bed ? Channel.value(params.ngscheckmate_bed) : Channel.empty()
+snpeff_db = params.snpeff_db ?: Channel.empty()
+vep_cache_version = params.vep_cache_version ?: Channel.empty()
+vep_genome = params.vep_genome ?: Channel.empty()
+vep_species = params.vep_species ?: Channel.empty()
-// Print help message if needed
-if (params.help) {
- def logo = NfcoreTemplate.logo(workflow, params.monochrome_logs)
- def citation = '\n' + WorkflowMain.citation(workflow) + '\n'
- def String command = "nextflow run ${workflow.manifest.name} --input samplesheet.csv --genome GATK.GRCh38 -profile docker --outdir results"
- log.info logo + paramsHelp(command) + citation + NfcoreTemplate.dashedLine(params.monochrome_logs)
- System.exit(0)
-}
-// Validate input parameters
-if (params.validate_params) {
- validateParameters()
+vep_extra_files = []
+
+if (params.dbnsfp && params.dbnsfp_tbi) {
+ vep_extra_files.add(file(params.dbnsfp, checkIfExists: true))
+ vep_extra_files.add(file(params.dbnsfp_tbi, checkIfExists: true))
}
-WorkflowMain.initialise(workflow, params, log, args)
+if (params.spliceai_snv && params.spliceai_snv_tbi && params.spliceai_indel && params.spliceai_indel_tbi) {
+ vep_extra_files.add(file(params.spliceai_indel, checkIfExists: true))
+ vep_extra_files.add(file(params.spliceai_indel_tbi, checkIfExists: true))
+ vep_extra_files.add(file(params.spliceai_snv, checkIfExists: true))
+ vep_extra_files.add(file(params.spliceai_snv_tbi, checkIfExists: true))
+}
/*
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -114,12 +126,188 @@ WorkflowMain.initialise(workflow, params, log, args)
// WORKFLOW: Run main nf-core/sarek analysis pipeline
workflow NFCORE_SAREK {
+ take:
+ samplesheet
+
main:
+ versions = Channel.empty()
+
+ // build indexes if needed
+ PREPARE_GENOME(
+ params.ascat_alleles,
+ params.ascat_loci,
+ params.ascat_loci_gc,
+ params.ascat_loci_rt,
+ bcftools_annotations,
+ params.chr_dir,
+ dbsnp,
+ fasta,
+ fasta_fai,
+ germline_resource,
+ known_indels,
+ known_snps,
+ pon)
+
+ // Gather built indices or get them from the params
+ // Built from the fasta file:
+ dict = params.dict ? Channel.fromPath(params.dict).map{ it -> [ [id:'dict'], it ] }.collect()
+ : PREPARE_GENOME.out.dict
+ fasta_fai = params.fasta_fai ? Channel.fromPath(params.fasta_fai).collect()
+ : PREPARE_GENOME.out.fasta_fai
+ bwa = params.bwa ? Channel.fromPath(params.bwa).collect()
+ : PREPARE_GENOME.out.bwa
+ bwamem2 = params.bwamem2 ? Channel.fromPath(params.bwamem2).collect()
+ : PREPARE_GENOME.out.bwamem2
+ dragmap = params.dragmap ? Channel.fromPath(params.dragmap).collect()
+ : PREPARE_GENOME.out.hashtable
+
+ // Gather index for mapping given the chosen aligner
+ index_alignement = (aligner == "bwa-mem" || aligner == "sentieon-bwamem") ? bwa :
+ aligner == "bwa-mem2" ? bwamem2 :
+ dragmap
+
+ // TODO: add a params for msisensorpro_scan
+ msisensorpro_scan = PREPARE_GENOME.out.msisensorpro_scan
+
+ // For ASCAT, extracted from zip or tar.gz files
+ allele_files = PREPARE_GENOME.out.allele_files
+ chr_files = PREPARE_GENOME.out.chr_files
+ gc_file = PREPARE_GENOME.out.gc_file
+ loci_files = PREPARE_GENOME.out.loci_files
+ rt_file = PREPARE_GENOME.out.rt_file
+
+ // Tabix indexed vcf files
+ bcftools_annotations_tbi = params.bcftools_annotations ? params.bcftools_annotations_tbi ? Channel.fromPath(params.bcftools_annotations_tbi) : PREPARE_GENOME.out.bcftools_annotations_tbi : Channel.empty([])
+ dbsnp_tbi = params.dbsnp ? params.dbsnp_tbi ? Channel.fromPath(params.dbsnp_tbi) : PREPARE_GENOME.out.dbsnp_tbi : Channel.value([])
+ germline_resource_tbi = params.germline_resource ? params.germline_resource_tbi ? Channel.fromPath(params.germline_resource_tbi) : PREPARE_GENOME.out.germline_resource_tbi : [] //do not change to Channel.value([]), the check for its existence then fails for Getpileupsumamries
+ known_indels_tbi = params.known_indels ? params.known_indels_tbi ? Channel.fromPath(params.known_indels_tbi).collect() : PREPARE_GENOME.out.known_indels_tbi : Channel.value([])
+ known_snps_tbi = params.known_snps ? params.known_snps_tbi ? Channel.fromPath(params.known_snps_tbi) : PREPARE_GENOME.out.known_snps_tbi : Channel.value([])
+ pon_tbi = params.pon ? params.pon_tbi ? Channel.fromPath(params.pon_tbi) : PREPARE_GENOME.out.pon_tbi : Channel.value([])
+
+ // known_sites is made by grouping both the dbsnp and the known snps/indels resources
+ // Which can either or both be optional
+ known_sites_indels = dbsnp.concat(known_indels).collect()
+ known_sites_indels_tbi = dbsnp_tbi.concat(known_indels_tbi).collect()
+ known_sites_snps = dbsnp.concat(known_snps).collect()
+ known_sites_snps_tbi = dbsnp_tbi.concat(known_snps_tbi).collect()
+
+ // Build intervals if needed
+ PREPARE_INTERVALS(fasta_fai, params.intervals, params.no_intervals, params.nucleotides_per_second, params.outdir, params.step)
+
+ // Intervals for speed up preprocessing/variant calling by spread/gather
+ // [interval.bed] all intervals in one file
+ intervals_bed_combined = params.no_intervals ? Channel.value([]) : PREPARE_INTERVALS.out.intervals_bed_combined
+ intervals_bed_gz_tbi_combined = params.no_intervals ? Channel.value([]) : PREPARE_INTERVALS.out.intervals_bed_gz_tbi_combined
+ intervals_bed_combined_for_variant_calling = PREPARE_INTERVALS.out.intervals_bed_combined
+
+ // For QC during preprocessing, we don't need any intervals (MOSDEPTH doesn't take them for WGS)
+ intervals_for_preprocessing = params.wes ?
+ intervals_bed_combined.map{it -> [ [ id:it.baseName ], it ]}.collect() :
+ Channel.value([ [ id:'null' ], [] ])
+ intervals = PREPARE_INTERVALS.out.intervals_bed // [ interval, num_intervals ] multiple interval.bed files, divided by useful intervals for scatter/gather
+ intervals_bed_gz_tbi = PREPARE_INTERVALS.out.intervals_bed_gz_tbi // [ interval_bed, tbi, num_intervals ] multiple interval.bed.gz/.tbi files, divided by useful intervals for scatter/gather
+ intervals_and_num_intervals = intervals.map{ interval, num_intervals ->
+ if ( num_intervals < 1 ) [ [], num_intervals ]
+ else [ interval, num_intervals ]
+ }
+ intervals_bed_gz_tbi_and_num_intervals = intervals_bed_gz_tbi.map{ intervals, num_intervals ->
+ if ( num_intervals < 1 ) [ [], [], num_intervals ]
+ else [ intervals[0], intervals[1], num_intervals ]
+ }
+ if (params.tools && params.tools.split(',').contains('cnvkit')) {
+ if (params.cnvkit_reference) {
+ cnvkit_reference = Channel.fromPath(params.cnvkit_reference).collect()
+ } else {
+ PREPARE_REFERENCE_CNVKIT(fasta, intervals_bed_combined)
+ cnvkit_reference = PREPARE_REFERENCE_CNVKIT.out.cnvkit_reference
+ versions = versions.mix(PREPARE_REFERENCE_CNVKIT.out.versions)
+ }
+ } else {
+ cnvkit_reference = Channel.value([])
+ }
+ // Gather used softwares versions
+ versions = versions.mix(PREPARE_GENOME.out.versions)
+ versions = versions.mix(PREPARE_INTERVALS.out.versions)
+
+ vep_fasta = (params.vep_include_fasta) ? fasta.map{ fasta -> [ [ id:fasta.baseName ], fasta ] } : [[id: 'null'], []]
+
+ // Download cache
+ if (params.download_cache) {
+ // Assuming that even if the cache is provided, if the user specify download_cache, sarek will download the cache
+ ensemblvep_info = Channel.of([ [ id:"${params.vep_cache_version}_${params.vep_genome}" ], params.vep_genome, params.vep_species, params.vep_cache_version ])
+ snpeff_info = Channel.of([ [ id:"${params.snpeff_genome}.${params.snpeff_db}" ], params.snpeff_genome, params.snpeff_db ])
+ DOWNLOAD_CACHE_SNPEFF_VEP(ensemblvep_info, snpeff_info)
+ snpeff_cache = DOWNLOAD_CACHE_SNPEFF_VEP.out.snpeff_cache
+ vep_cache = DOWNLOAD_CACHE_SNPEFF_VEP.out.ensemblvep_cache.map{ meta, cache -> [ cache ] }
+
+ versions = versions.mix(DOWNLOAD_CACHE_SNPEFF_VEP.out.versions)
+ } else {
+ // Looks for cache information either locally or on the cloud
+ ANNOTATION_CACHE_INITIALISATION(
+ (params.snpeff_cache && params.tools && (params.tools.split(',').contains("snpeff") || params.tools.split(',').contains('merge'))),
+ params.snpeff_cache,
+ params.snpeff_genome,
+ params.snpeff_db,
+ (params.vep_cache && params.tools && (params.tools.split(',').contains("vep") || params.tools.split(',').contains('merge'))),
+ params.vep_cache,
+ params.vep_species,
+ params.vep_cache_version,
+ params.vep_genome,
+ "Please refer to https://nf-co.re/sarek/docs/usage/#how-to-customise-snpeff-and-vep-annotation for more information.")
+
+ snpeff_cache = ANNOTATION_CACHE_INITIALISATION.out.snpeff_cache
+ vep_cache = ANNOTATION_CACHE_INITIALISATION.out.ensemblvep_cache
+ }
//
// WORKFLOW: Run pipeline
//
- SAREK()
+ SAREK(samplesheet,
+ allele_files,
+ bcftools_annotations,
+ bcftools_annotations_tbi,
+ bcftools_header_lines,
+ cf_chrom_len,
+ chr_files,
+ cnvkit_reference,
+ dbsnp,
+ dbsnp_tbi,
+ dbsnp_vqsr,
+ dict,
+ fasta,
+ fasta_fai,
+ gc_file,
+ germline_resource,
+ germline_resource_tbi,
+ index_alignement,
+ intervals_and_num_intervals,
+ intervals_bed_combined,
+ intervals_bed_combined_for_variant_calling,
+ intervals_bed_gz_tbi_and_num_intervals,
+ intervals_bed_gz_tbi_combined,
+ intervals_for_preprocessing,
+ known_indels_vqsr,
+ known_sites_indels,
+ known_sites_indels_tbi,
+ known_sites_snps,
+ known_sites_snps_tbi,
+ known_snps_vqsr,
+ loci_files,
+ mappability,
+ msisensorpro_scan,
+ ngscheckmate_bed,
+ pon,
+ pon_tbi,
+ rt_file,
+ sentieon_dnascope_model,
+ snpeff_cache,
+ vep_cache,
+ vep_cache_version,
+ vep_extra_files,
+ vep_fasta,
+ vep_genome,
+ vep_species
+ )
emit:
multiqc_report = SAREK.out.multiqc_report // channel: /path/to/multiqc_report.html
@@ -150,7 +338,7 @@ workflow {
//
// WORKFLOW: Run main workflow
//
- NFCORE_SAREK()
+ NFCORE_SAREK(PIPELINE_INITIALISATION.out.samplesheet)
//
// SUBWORKFLOW: Run completion tasks
@@ -166,6 +354,25 @@ workflow {
)
}
+/*
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ FUNCTIONS
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+*/
+
+//
+// Get attribute from genome config file e.g. fasta
+//
+
+def getGenomeAttribute(attribute) {
+ if (params.genomes && params.genome && params.genomes.containsKey(params.genome)) {
+ if (params.genomes[ params.genome ].containsKey(attribute)) {
+ return params.genomes[ params.genome ][ attribute ]
+ }
+ }
+ return null
+}
+
/*
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
THE END
diff --git a/nextflow.config b/nextflow.config
index e9dabd1136..28dad4b826 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -10,8 +10,9 @@ params {
// Workflow flags:
// Mandatory arguments
- input = null // No default input
- step = 'mapping' // Starts with mapping
+ input = null // No default input
+ input_restart = null // No default automatic input
+ step = 'mapping' // Starts with mapping
// References
genome = 'GATK.GRCh38'
@@ -124,6 +125,7 @@ params {
config_profile_contact = null
config_profile_url = null
test_data_base = 'https://raw.githubusercontent.com/nf-core/test-datasets/sarek3'
+ modules_testdata_base_path = null
// Max resource options
// Defaults only, expecting to be overwritten
diff --git a/nextflow_schema.json b/nextflow_schema.json
index abcc232840..9e6e2a14f0 100644
--- a/nextflow_schema.json
+++ b/nextflow_schema.json
@@ -893,6 +893,11 @@
"help_text": "Warning: The `-profile test` samplesheet file itself contains remote paths. Setting this parameter does not alter the contents of that file.",
"hidden": true
},
+ "modules_testdata_base_path": {
+ "type": "string",
+ "description": "Base path / URL for data used in the modules",
+ "hidden": true
+ },
"seq_center": {
"type": "string",
"fa_icon": "fas fa-university",
diff --git a/subworkflows/local/initialize_annotation_cache/main.nf b/subworkflows/local/annotation_cache_initialisation/main.nf
similarity index 94%
rename from subworkflows/local/initialize_annotation_cache/main.nf
rename to subworkflows/local/annotation_cache_initialisation/main.nf
index d2c6fcb7d6..6e35a68d50 100644
--- a/subworkflows/local/initialize_annotation_cache/main.nf
+++ b/subworkflows/local/annotation_cache_initialisation/main.nf
@@ -1,14 +1,14 @@
//
-// INITIALIZE ANNOTATION CACHE
+// ANNOTATION CACHE INITIALISATION
//
-// Initialize channels based on params or indices that were just built
+// Initialise channels based on params or indices that were just built
// For all modules here:
// A when clause condition is defined in the conf/modules.config to determine if the module should be run
// Condition is based on params.step and params.tools
// If and extra condition exists, it's specified in comments
-workflow INITIALIZE_ANNOTATION_CACHE {
+workflow ANNOTATION_CACHE_INITIALISATION {
take:
snpeff_enabled
snpeff_cache
diff --git a/subworkflows/local/prepare_genome/main.nf b/subworkflows/local/prepare_genome/main.nf
index 46a2fa5985..5f70a22432 100644
--- a/subworkflows/local/prepare_genome/main.nf
+++ b/subworkflows/local/prepare_genome/main.nf
@@ -53,7 +53,7 @@ workflow PREPARE_GENOME {
GATK4_CREATESEQUENCEDICTIONARY(fasta)
MSISENSORPRO_SCAN(fasta)
- SAMTOOLS_FAIDX(fasta, [ [ id:fasta.baseName ], [] ] )
+ SAMTOOLS_FAIDX(fasta, [ [ id:'fasta' ], [] ] )
// the following are flattened and mapped in case the user supplies more than one value for the param
// written for KNOWN_INDELS, but preemptively applied to the rest
diff --git a/subworkflows/local/samplesheet_to_channel/main.nf b/subworkflows/local/samplesheet_to_channel/main.nf
index 985924efa8..37daa1322a 100644
--- a/subworkflows/local/samplesheet_to_channel/main.nf
+++ b/subworkflows/local/samplesheet_to_channel/main.nf
@@ -5,6 +5,7 @@ workflow SAMPLESHEET_TO_CHANNEL{
aligner //
ascat_alleles //
ascat_loci //
+ ascat_loci_gc //
ascat_loci_rt //
bcftools_annotations //
bcftools_annotations_tbi //
@@ -306,5 +307,3 @@ def flowcellLaneFromFastq(path) {
}
return fcid
}
-
-
diff --git a/subworkflows/local/utils_nfcore_sarek_pipeline/main.nf b/subworkflows/local/utils_nfcore_sarek_pipeline/main.nf
index e9a6c40288..afd58a8cc2 100644
--- a/subworkflows/local/utils_nfcore_sarek_pipeline/main.nf
+++ b/subworkflows/local/utils_nfcore_sarek_pipeline/main.nf
@@ -8,17 +8,19 @@
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
*/
-include { UTILS_NFVALIDATION_PLUGIN } from '../../nf-core/utils_nfvalidation_plugin'
include { paramsSummaryMap } from 'plugin/nf-validation'
include { fromSamplesheet } from 'plugin/nf-validation'
include { UTILS_NEXTFLOW_PIPELINE } from '../../nf-core/utils_nextflow_pipeline'
+include { UTILS_NFVALIDATION_PLUGIN } from '../../nf-core/utils_nfvalidation_plugin'
+include { UTILS_NFCORE_PIPELINE } from '../../nf-core/utils_nfcore_pipeline'
include { completionEmail } from '../../nf-core/utils_nfcore_pipeline'
include { completionSummary } from '../../nf-core/utils_nfcore_pipeline'
include { dashedLine } from '../../nf-core/utils_nfcore_pipeline'
-include { nfCoreLogo } from '../../nf-core/utils_nfcore_pipeline'
+include { getWorkflowVersion } from '../../nf-core/utils_nfcore_pipeline'
include { imNotification } from '../../nf-core/utils_nfcore_pipeline'
-include { UTILS_NFCORE_PIPELINE } from '../../nf-core/utils_nfcore_pipeline'
+include { logColours } from '../../nf-core/utils_nfcore_pipeline'
include { workflowCitation } from '../../nf-core/utils_nfcore_pipeline'
+include { SAMPLESHEET_TO_CHANNEL } from '../samplesheet_to_channel'
/*
========================================================================================
@@ -39,7 +41,7 @@ workflow PIPELINE_INITIALISATION {
main:
- ch_versions = Channel.empty()
+ versions = Channel.empty()
//
// Print version and exit if required and dump pipeline parameters to JSON file
@@ -69,40 +71,99 @@ workflow PIPELINE_INITIALISATION {
//
// Check config provided to the pipeline
//
- UTILS_NFCORE_PIPELINE (
- nextflow_cli_args
- )
+ UTILS_NFCORE_PIPELINE(nextflow_cli_args)
+
//
// Custom validation for pipeline parameters
//
validateInputParameters()
- //
- // Create channel from input file provided through params.input
- //
- // Channel
- // .fromSamplesheet("input")
- // .map {
- // meta, fastq_1, fastq_2 ->
- // if (!fastq_2) {
- // return [ meta.id, meta + [ single_end:true ], [ fastq_1 ] ]
- // } else {
- // return [ meta.id, meta + [ single_end:false ], [ fastq_1, fastq_2 ] ]
- // }
- // }
- // .groupTuple()
- // .map {
- // validateInputSamplesheet(it)
- // }
- // .map {
- // meta, fastqs ->
- // return [ meta, fastqs.flatten() ]
- // }
- // .set { ch_samplesheet }
+ // Check input path parameters to see if they exist
+ def checkPathParamList = [
+ params.ascat_alleles,
+ params.ascat_loci,
+ params.ascat_loci_gc,
+ params.ascat_loci_rt,
+ params.bwa,
+ params.bwamem2,
+ params.bcftools_annotations,
+ params.bcftools_annotations_tbi,
+ params.bcftools_header_lines,
+ params.cf_chrom_len,
+ params.chr_dir,
+ params.cnvkit_reference,
+ params.dbnsfp,
+ params.dbnsfp_tbi,
+ params.dbsnp,
+ params.dbsnp_tbi,
+ params.dict,
+ params.dragmap,
+ params.fasta,
+ params.fasta_fai,
+ params.germline_resource,
+ params.germline_resource_tbi,
+ params.input,
+ params.intervals,
+ params.known_indels,
+ params.known_indels_tbi,
+ params.known_snps,
+ params.known_snps_tbi,
+ params.mappability,
+ params.multiqc_config,
+ params.ngscheckmate_bed,
+ params.pon,
+ params.pon_tbi,
+ params.sentieon_dnascope_model,
+ params.spliceai_indel,
+ params.spliceai_indel_tbi,
+ params.spliceai_snv,
+ params.spliceai_snv_tbi
+ ]
+
+// only check if we are using the tools
+if (params.tools && (params.tools.split(',').contains('snpeff') || params.tools.split(',').contains('merge'))) checkPathParamList.add(params.snpeff_cache)
+if (params.tools && (params.tools.split(',').contains('vep') || params.tools.split(',').contains('merge'))) checkPathParamList.add(params.vep_cache)
+
+ // def retrieveInput(need_input, step, outdir) {
+
+ params.input_restart = retrieveInput((!params.build_only_index && !params.input), params.step, params.outdir)
+
+ ch_from_samplesheet = params.build_only_index ? Channel.empty() : params.input ? Channel.fromSamplesheet("input") : Channel.fromSamplesheet("input_restart")
+
+ SAMPLESHEET_TO_CHANNEL(
+ ch_from_samplesheet,
+ params.aligner,
+ params.ascat_alleles,
+ params.ascat_loci,
+ params.ascat_loci_gc,
+ params.ascat_loci_rt,
+ params.bcftools_annotations,
+ params.bcftools_annotations_tbi,
+ params.bcftools_header_lines,
+ params.build_only_index,
+ params.dbsnp,
+ params.fasta,
+ params.germline_resource,
+ params.intervals,
+ params.joint_germline,
+ params.joint_mutect2,
+ params.known_indels,
+ params.known_snps,
+ params.no_intervals,
+ params.pon,
+ params.sentieon_dnascope_emit_mode,
+ params.sentieon_haplotyper_emit_mode,
+ params.seq_center,
+ params.seq_platform,
+ params.skip_tools,
+ params.step,
+ params.tools,
+ params.umi_read_structure,
+ params.wes)
emit:
- // samplesheet = ch_samplesheet
- versions = ch_versions
+ samplesheet = SAMPLESHEET_TO_CHANNEL.out.input_sample
+ versions
}
/*
@@ -168,17 +229,6 @@ def validateInputSamplesheet(input) {
return [ metas[0], fastqs ]
}
-//
-// Get attribute from genome config file e.g. fasta
-//
-def getGenomeAttribute(attribute) {
- if (params.genomes && params.genome && params.genomes.containsKey(params.genome)) {
- if (params.genomes[ params.genome ].containsKey(attribute)) {
- return params.genomes[ params.genome ][ attribute ]
- }
- }
- return null
-}
//
// Exit pipeline if incorrect --genome key provided
@@ -249,3 +299,61 @@ def methodsDescriptionText(mqc_methods_yaml) {
return description_html.toString()
}
+
+//
+// nf-core/sarek logo
+//
+def nfCoreLogo(monochrome_logs=true) {
+ Map colors = logColours(monochrome_logs)
+ String.format(
+ """\n
+ ${dashedLine(monochrome_logs)}
+ ${colors.green},--.${colors.black}/${colors.green},-.${colors.reset}
+ ${colors.blue} ___ __ __ __ ___ ${colors.green}/,-._.--~\'${colors.reset}
+ ${colors.blue} |\\ | |__ __ / ` / \\ |__) |__ ${colors.yellow}} {${colors.reset}
+ ${colors.blue} | \\| | \\__, \\__/ | \\ |___ ${colors.green}\\`-._,-`-,${colors.reset}
+ ${colors.green}`._,._,\'${colors.reset}
+ ${colors.white} ____${colors.reset}
+ ${colors.white} .´ _ `.${colors.reset}
+ ${colors.white} / ${colors.green}|\\${colors.reset}`-_ \\${colors.reset} ${colors.blue} __ __ ___ ${colors.reset}
+ ${colors.white} | ${colors.green}| \\${colors.reset} `-|${colors.reset} ${colors.blue}|__` /\\ |__) |__ |__/${colors.reset}
+ ${colors.white} \\ ${colors.green}| \\${colors.reset} /${colors.reset} ${colors.blue}.__| /¯¯\\ | \\ |___ | \\${colors.reset}
+ ${colors.white} `${colors.green}|${colors.reset}____${colors.green}\\${colors.reset}´${colors.reset}
+
+ ${colors.purple} ${workflow.manifest.name} ${getWorkflowVersion()}${colors.reset}
+ ${dashedLine(monochrome_logs)}
+ """.stripIndent()
+ )
+}
+
+//
+// retrieveInput
+//
+def retrieveInput(need_input, step, outdir) {
+ def input = null
+ if (!params.input && !params.build_only_index) {
+ switch (step) {
+ case 'mapping': Nextflow.error("Can't start with step $step without samplesheet")
+ break
+ case 'markduplicates': log.warn("Using file ${outdir}/csv/mapped.csv");
+ input = outdir + "/csv/mapped.csv"
+ break
+ case 'prepare_recalibration': log.warn("Using file ${outdir}/csv/markduplicates_no_table.csv");
+ input = outdir + "/csv/markduplicates_no_table.csv"
+ break
+ case 'recalibrate': log.warn("Using file ${outdir}/csv/markduplicates.csv");
+ input = outdir + "/csv/markduplicates.csv"
+ break
+ case 'variant_calling': log.warn("Using file ${outdir}/csv/recalibrated.csv");
+ input = outdir + "/csv/recalibrated.csv"
+ break
+ // case 'controlfreec': csv_file = file("${outdir}/variant_calling/csv/control-freec_mpileup.csv", checkIfExists: true); break
+ case 'annotate': log.warn("Using file ${outdir}/csv/variantcalled.csv");
+ input = outdir + "/csv/variantcalled.csv"
+ break
+ default: log.warn("Please provide an input samplesheet to the pipeline e.g. '--input samplesheet.csv'")
+ Nextflow.error("Unknown step $step")
+ }
+ }
+ return input
+}
diff --git a/workflows/sarek.nf b/workflows/sarek/main.nf
similarity index 66%
rename from workflows/sarek.nf
rename to workflows/sarek/main.nf
index c94ee48918..39e9d87217 100644
--- a/workflows/sarek.nf
+++ b/workflows/sarek/main.nf
@@ -4,226 +4,85 @@
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
*/
-include { paramsSummaryMap } from 'plugin/nf-validation'
-include { paramsSummaryLog } from 'plugin/nf-validation'
-include { fromSamplesheet } from 'plugin/nf-validation'
-
-include { paramsSummaryMultiqc } from '../subworkflows/nf-core/utils_nfcore_pipeline'
-include { softwareVersionsToYAML } from '../subworkflows/nf-core/utils_nfcore_pipeline'
-include { methodsDescriptionText } from '../subworkflows/local/utils_nfcore_sarek_pipeline'
-
-def logo = NfcoreTemplate.logo(workflow, params.monochrome_logs)
-def citation = '\n' + WorkflowMain.citation(workflow) + '\n'
-def summary_params = paramsSummaryMap(workflow)
-
-// Print parameter summary log to screen
-log.info logo + paramsSummaryLog(workflow) + citation
-
-/*
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
- VALIDATE INPUTS
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-*/
-
-// Check input path parameters to see if they exist
-def checkPathParamList = [
- params.ascat_alleles,
- params.ascat_loci,
- params.ascat_loci_gc,
- params.ascat_loci_rt,
- params.bwa,
- params.bwamem2,
- params.bcftools_annotations,
- params.bcftools_annotations_tbi,
- params.bcftools_header_lines,
- params.cf_chrom_len,
- params.chr_dir,
- params.cnvkit_reference,
- params.dbnsfp,
- params.dbnsfp_tbi,
- params.dbsnp,
- params.dbsnp_tbi,
- params.dict,
- params.dragmap,
- params.fasta,
- params.fasta_fai,
- params.germline_resource,
- params.germline_resource_tbi,
- params.input,
- params.intervals,
- params.known_indels,
- params.known_indels_tbi,
- params.known_snps,
- params.known_snps_tbi,
- params.mappability,
- params.multiqc_config,
- params.ngscheckmate_bed,
- params.pon,
- params.pon_tbi,
- params.sentieon_dnascope_model,
- params.spliceai_indel,
- params.spliceai_indel_tbi,
- params.spliceai_snv,
- params.spliceai_snv_tbi
-]
-
-// only check if we are using the tools
-if (params.tools && (params.tools.split(',').contains('snpeff') || params.tools.split(',').contains('merge'))) checkPathParamList.add(params.snpeff_cache)
-if (params.tools && (params.tools.split(',').contains('vep') || params.tools.split(',').contains('merge'))) checkPathParamList.add(params.vep_cache)
-
-// Validate input parameters
-WorkflowSarek.initialise(params, log)
-
-/*
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
- Check mandatory parameters
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-*/
-
-for (param in checkPathParamList) if (param) file(param, checkIfExists: true)
-
-/*
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
- IMPORT LOCAL MODULES/SUBWORKFLOWS
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-*/
-
-// Initialize file channels based on params, defined in the params.genomes[params.genome] scope
-bcftools_annotations = params.bcftools_annotations ? Channel.fromPath(params.bcftools_annotations).collect() : Channel.empty()
-bcftools_header_lines = params.bcftools_header_lines ? Channel.fromPath(params.bcftools_header_lines).collect() : Channel.empty()
-cf_chrom_len = params.cf_chrom_len ? Channel.fromPath(params.cf_chrom_len).collect() : []
-dbsnp = params.dbsnp ? Channel.fromPath(params.dbsnp).collect() : Channel.value([])
-fasta = params.fasta ? Channel.fromPath(params.fasta).first() : Channel.empty()
-fasta_fai = params.fasta_fai ? Channel.fromPath(params.fasta_fai).collect() : Channel.empty()
-germline_resource = params.germline_resource ? Channel.fromPath(params.germline_resource).collect() : Channel.value([]) // Mutect2 does not require a germline resource, so set to optional input
-known_indels = params.known_indels ? Channel.fromPath(params.known_indels).collect() : Channel.value([])
-known_snps = params.known_snps ? Channel.fromPath(params.known_snps).collect() : Channel.value([])
-mappability = params.mappability ? Channel.fromPath(params.mappability).collect() : Channel.value([])
-pon = params.pon ? Channel.fromPath(params.pon).collect() : Channel.value([]) // PON is optional for Mutect2 (but highly recommended)
-sentieon_dnascope_model = params.sentieon_dnascope_model ? Channel.fromPath(params.sentieon_dnascope_model).collect() : Channel.value([])
-
-// Initialize value channels based on params, defined in the params.genomes[params.genome] scope
-ascat_genome = params.ascat_genome ?: Channel.empty()
-dbsnp_vqsr = params.dbsnp_vqsr ? Channel.value(params.dbsnp_vqsr) : Channel.empty()
-known_indels_vqsr = params.known_indels_vqsr ? Channel.value(params.known_indels_vqsr) : Channel.empty()
-known_snps_vqsr = params.known_snps_vqsr ? Channel.value(params.known_snps_vqsr) : Channel.empty()
-ngscheckmate_bed = params.ngscheckmate_bed ? Channel.value(params.ngscheckmate_bed) : Channel.empty()
-snpeff_db = params.snpeff_db ?: Channel.empty()
-vep_cache_version = params.vep_cache_version ?: Channel.empty()
-vep_genome = params.vep_genome ?: Channel.empty()
-vep_species = params.vep_species ?: Channel.empty()
-
-
-vep_extra_files = []
-
-if (params.dbnsfp && params.dbnsfp_tbi) {
- vep_extra_files.add(file(params.dbnsfp, checkIfExists: true))
- vep_extra_files.add(file(params.dbnsfp_tbi, checkIfExists: true))
-}
-
-if (params.spliceai_snv && params.spliceai_snv_tbi && params.spliceai_indel && params.spliceai_indel_tbi) {
- vep_extra_files.add(file(params.spliceai_indel, checkIfExists: true))
- vep_extra_files.add(file(params.spliceai_indel_tbi, checkIfExists: true))
- vep_extra_files.add(file(params.spliceai_snv, checkIfExists: true))
- vep_extra_files.add(file(params.spliceai_snv_tbi, checkIfExists: true))
-}
-
-/*
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
- IMPORT LOCAL/NF-CORE MODULES/SUBWORKFLOWS
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-*/
+include { paramsSummaryMap } from 'plugin/nf-validation'
+include { paramsSummaryMultiqc } from '../../subworkflows/nf-core/utils_nfcore_pipeline'
+include { softwareVersionsToYAML } from '../../subworkflows/nf-core/utils_nfcore_pipeline'
+include { methodsDescriptionText } from '../../subworkflows/local/utils_nfcore_sarek_pipeline'
// Create samplesheets to restart from different steps
-include { SAMPLESHEET_TO_CHANNEL } from '../subworkflows/local/samplesheet_to_channel/main'
-include { CHANNEL_ALIGN_CREATE_CSV } from '../subworkflows/local/channel_align_create_csv/main'
-include { CHANNEL_MARKDUPLICATES_CREATE_CSV } from '../subworkflows/local/channel_markduplicates_create_csv/main'
-include { CHANNEL_BASERECALIBRATOR_CREATE_CSV } from '../subworkflows/local/channel_baserecalibrator_create_csv/main'
-include { CHANNEL_APPLYBQSR_CREATE_CSV } from '../subworkflows/local/channel_applybqsr_create_csv/main'
-include { CHANNEL_VARIANT_CALLING_CREATE_CSV } from '../subworkflows/local/channel_variant_calling_create_csv/main'
-
-// Download cache for SnpEff/VEP if needed
-include { DOWNLOAD_CACHE_SNPEFF_VEP } from '../subworkflows/local/download_cache_snpeff_vep/main'
-
-// Initialize annotation cache
-include { INITIALIZE_ANNOTATION_CACHE } from '../subworkflows/local/initialize_annotation_cache/main'
-
-// Build indices if needed
-include { PREPARE_GENOME } from '../subworkflows/local/prepare_genome/main'
-
-// Build intervals if needed
-include { PREPARE_INTERVALS } from '../subworkflows/local/prepare_intervals/main'
-
-// Build CNVkit reference if needed
-include { PREPARE_REFERENCE_CNVKIT } from '../subworkflows/local/prepare_reference_cnvkit/main'
+include { CHANNEL_ALIGN_CREATE_CSV } from '../../subworkflows/local/channel_align_create_csv/main'
+include { CHANNEL_MARKDUPLICATES_CREATE_CSV } from '../../subworkflows/local/channel_markduplicates_create_csv/main'
+include { CHANNEL_BASERECALIBRATOR_CREATE_CSV } from '../../subworkflows/local/channel_baserecalibrator_create_csv/main'
+include { CHANNEL_APPLYBQSR_CREATE_CSV } from '../../subworkflows/local/channel_applybqsr_create_csv/main'
+include { CHANNEL_VARIANT_CALLING_CREATE_CSV } from '../../subworkflows/local/channel_variant_calling_create_csv/main'
// Convert BAM files to FASTQ files
-include { BAM_CONVERT_SAMTOOLS as CONVERT_FASTQ_INPUT } from '../subworkflows/local/bam_convert_samtools/main'
-include { BAM_CONVERT_SAMTOOLS as CONVERT_FASTQ_UMI } from '../subworkflows/local/bam_convert_samtools/main'
+include { BAM_CONVERT_SAMTOOLS as CONVERT_FASTQ_INPUT } from '../../subworkflows/local/bam_convert_samtools/main'
+include { BAM_CONVERT_SAMTOOLS as CONVERT_FASTQ_UMI } from '../../subworkflows/local/bam_convert_samtools/main'
// Run FASTQC
-include { FASTQC } from '../modules/nf-core/fastqc/main'
+include { FASTQC } from '../../modules/nf-core/fastqc/main'
// TRIM/SPLIT FASTQ Files
-include { FASTP } from '../modules/nf-core/fastp/main'
+include { FASTP } from '../../modules/nf-core/fastp/main'
// Create umi consensus bams from fastq
-include { FASTQ_CREATE_UMI_CONSENSUS_FGBIO } from '../subworkflows/local/fastq_create_umi_consensus_fgbio/main'
+include { FASTQ_CREATE_UMI_CONSENSUS_FGBIO } from '../../subworkflows/local/fastq_create_umi_consensus_fgbio/main'
// Map input reads to reference genome
-include { FASTQ_ALIGN_BWAMEM_MEM2_DRAGMAP_SENTIEON } from '../subworkflows/local/fastq_align_bwamem_mem2_dragmap_sentieon/main'
+include { FASTQ_ALIGN_BWAMEM_MEM2_DRAGMAP_SENTIEON } from '../../subworkflows/local/fastq_align_bwamem_mem2_dragmap_sentieon/main'
// Merge and index BAM files (optional)
-include { BAM_MERGE_INDEX_SAMTOOLS } from '../subworkflows/local/bam_merge_index_samtools/main'
+include { BAM_MERGE_INDEX_SAMTOOLS } from '../../subworkflows/local/bam_merge_index_samtools/main'
// Convert BAM files
-include { SAMTOOLS_CONVERT as BAM_TO_CRAM } from '../modules/nf-core/samtools/convert/main'
-include { SAMTOOLS_CONVERT as BAM_TO_CRAM_MAPPING } from '../modules/nf-core/samtools/convert/main'
+include { SAMTOOLS_CONVERT as BAM_TO_CRAM } from '../../modules/nf-core/samtools/convert/main'
+include { SAMTOOLS_CONVERT as BAM_TO_CRAM_MAPPING } from '../../modules/nf-core/samtools/convert/main'
// Convert CRAM files (optional)
-include { SAMTOOLS_CONVERT as CRAM_TO_BAM } from '../modules/nf-core/samtools/convert/main'
-include { SAMTOOLS_CONVERT as CRAM_TO_BAM_RECAL } from '../modules/nf-core/samtools/convert/main'
+include { SAMTOOLS_CONVERT as CRAM_TO_BAM } from '../../modules/nf-core/samtools/convert/main'
+include { SAMTOOLS_CONVERT as CRAM_TO_BAM_RECAL } from '../../modules/nf-core/samtools/convert/main'
// Mark Duplicates (+QC)
-include { BAM_MARKDUPLICATES } from '../subworkflows/local/bam_markduplicates/main'
-include { BAM_MARKDUPLICATES_SPARK } from '../subworkflows/local/bam_markduplicates_spark/main'
-include { BAM_SENTIEON_DEDUP } from '../subworkflows/local/bam_sentieon_dedup/main'
+include { BAM_MARKDUPLICATES } from '../../subworkflows/local/bam_markduplicates/main'
+include { BAM_MARKDUPLICATES_SPARK } from '../../subworkflows/local/bam_markduplicates_spark/main'
+include { BAM_SENTIEON_DEDUP } from '../../subworkflows/local/bam_sentieon_dedup/main'
// QC on CRAM
-include { CRAM_QC_MOSDEPTH_SAMTOOLS as CRAM_QC_NO_MD } from '../subworkflows/local/cram_qc_mosdepth_samtools/main'
-include { CRAM_QC_MOSDEPTH_SAMTOOLS as CRAM_QC_RECAL } from '../subworkflows/local/cram_qc_mosdepth_samtools/main'
+include { CRAM_QC_MOSDEPTH_SAMTOOLS as CRAM_QC_NO_MD } from '../../subworkflows/local/cram_qc_mosdepth_samtools/main'
+include { CRAM_QC_MOSDEPTH_SAMTOOLS as CRAM_QC_RECAL } from '../../subworkflows/local/cram_qc_mosdepth_samtools/main'
// Create recalibration tables
-include { BAM_BASERECALIBRATOR } from '../subworkflows/local/bam_baserecalibrator/main'
-include { BAM_BASERECALIBRATOR_SPARK } from '../subworkflows/local/bam_baserecalibrator_spark/main'
+include { BAM_BASERECALIBRATOR } from '../../subworkflows/local/bam_baserecalibrator/main'
+include { BAM_BASERECALIBRATOR_SPARK } from '../../subworkflows/local/bam_baserecalibrator_spark/main'
// Create recalibrated cram files to use for variant calling (+QC)
-include { BAM_APPLYBQSR } from '../subworkflows/local/bam_applybqsr/main'
-include { BAM_APPLYBQSR_SPARK } from '../subworkflows/local/bam_applybqsr_spark/main'
+include { BAM_APPLYBQSR } from '../../subworkflows/local/bam_applybqsr/main'
+include { BAM_APPLYBQSR_SPARK } from '../../subworkflows/local/bam_applybqsr_spark/main'
// Variant calling on a single normal sample
-include { BAM_VARIANT_CALLING_GERMLINE_ALL } from '../subworkflows/local/bam_variant_calling_germline_all/main'
+include { BAM_VARIANT_CALLING_GERMLINE_ALL } from '../../subworkflows/local/bam_variant_calling_germline_all/main'
// Variant calling on a single tumor sample
-include { BAM_VARIANT_CALLING_TUMOR_ONLY_ALL } from '../subworkflows/local/bam_variant_calling_tumor_only_all/main'
+include { BAM_VARIANT_CALLING_TUMOR_ONLY_ALL } from '../../subworkflows/local/bam_variant_calling_tumor_only_all/main'
// Variant calling on tumor/normal pair
-include { BAM_VARIANT_CALLING_SOMATIC_ALL } from '../subworkflows/local/bam_variant_calling_somatic_all/main'
+include { BAM_VARIANT_CALLING_SOMATIC_ALL } from '../../subworkflows/local/bam_variant_calling_somatic_all/main'
// POST VARIANTCALLING: e.g. merging
-include { POST_VARIANTCALLING } from '../subworkflows/local/post_variantcalling/main'
+include { POST_VARIANTCALLING } from '../../subworkflows/local/post_variantcalling/main'
// QC on VCF files
-include { VCF_QC_BCFTOOLS_VCFTOOLS } from '../subworkflows/local/vcf_qc_bcftools_vcftools/main'
+include { VCF_QC_BCFTOOLS_VCFTOOLS } from '../../subworkflows/local/vcf_qc_bcftools_vcftools/main'
// Sample QC on CRAM files
-include { CRAM_SAMPLEQC } from '../subworkflows/local/cram_sampleqc/main'
+include { CRAM_SAMPLEQC } from '../../subworkflows/local/cram_sampleqc/main'
// Annotation
-include { VCF_ANNOTATE_ALL } from '../subworkflows/local/vcf_annotate_all/main'
+include { VCF_ANNOTATE_ALL } from '../../subworkflows/local/vcf_annotate_all/main'
// MULTIQC
-include { MULTIQC } from '../modules/nf-core/multiqc/main'
+include { MULTIQC } from '../../modules/nf-core/multiqc/main'
/*
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -232,188 +91,60 @@ include { MULTIQC } from '../modules/nf-core
*/
workflow SAREK {
+ take:
+ input_sample
+ allele_files
+ bcftools_annotations
+ bcftools_annotations_tbi
+ bcftools_header_lines
+ cf_chrom_len
+ chr_files
+ cnvkit_reference
+ dbsnp
+ dbsnp_tbi
+ dbsnp_vqsr
+ dict
+ fasta
+ fasta_fai
+ gc_file
+ germline_resource
+ germline_resource_tbi
+ index_alignement
+ intervals_and_num_intervals
+ intervals_bed_combined
+ intervals_bed_combined_for_variant_calling
+ intervals_bed_gz_tbi_and_num_intervals
+ intervals_bed_gz_tbi_combined
+ intervals_for_preprocessing
+ known_indels_vqsr
+ known_sites_indels
+ known_sites_indels_tbi
+ known_sites_snps
+ known_sites_snps_tbi
+ known_snps_vqsr
+ loci_files
+ mappability
+ msisensorpro_scan
+ ngscheckmate_bed
+ pon
+ pon_tbi
+ rt_file
+ sentieon_dnascope_model
+ snpeff_cache
+ vep_cache
+ vep_cache_version
+ vep_extra_files
+ vep_fasta
+ vep_genome
+ vep_species
+
+ main:
// To gather all QC reports for MultiQC
- reports = Channel.empty()
ch_multiqc_files = Channel.empty()
- multiqc_report = Channel.empty()
-
- // To gather used softwares versions for MultiQC
- versions = Channel.empty()
-
- // Parse samplesheet
- // Set input, can either be from --input or from automatic retrieval in WorkflowSarek.groovy
- ch_from_samplesheet = params.build_only_index ? Channel.empty() : params.input ? Channel.fromSamplesheet("input") : Channel.fromSamplesheet("input_restart")
-
- SAMPLESHEET_TO_CHANNEL(
- ch_from_samplesheet,
- params.aligner,
- params.ascat_alleles,
- params.ascat_loci,
- params.ascat_loci_rt,
- params.bcftools_annotations,
- params.bcftools_annotations_tbi,
- params.bcftools_header_lines,
- params.build_only_index,
- params.dbsnp,
- params.fasta,
- params.germline_resource,
- params.intervals,
- params.joint_germline,
- params.joint_mutect2,
- params.known_indels,
- params.known_snps,
- params.no_intervals,
- params.pon,
- params.sentieon_dnascope_emit_mode,
- params.sentieon_haplotyper_emit_mode,
- params.seq_center,
- params.seq_platform,
- params.skip_tools,
- params.step,
- params.tools,
- params.umi_read_structure,
- params.wes)
-
- input_sample = SAMPLESHEET_TO_CHANNEL.out.input_sample
-
- // MULTIQC
- ch_multiqc_config = Channel.fromPath("$projectDir/assets/multiqc_config.yml", checkIfExists: true)
- ch_multiqc_custom_config = params.multiqc_config ? Channel.fromPath( params.multiqc_config, checkIfExists: true ) : Channel.empty()
- ch_multiqc_logo = params.multiqc_logo ? Channel.fromPath( params.multiqc_logo, checkIfExists: true ) : Channel.empty()
- ch_multiqc_custom_methods_description = params.multiqc_methods_description ? file(params.multiqc_methods_description, checkIfExists: true) : file("$projectDir/assets/methods_description_template.yml", checkIfExists: true)
-
-
- // Download cache
- if (params.download_cache) {
- // Assuming that even if the cache is provided, if the user specify download_cache, sarek will download the cache
- ensemblvep_info = Channel.of([ [ id:"${params.vep_cache_version}_${params.vep_genome}" ], params.vep_genome, params.vep_species, params.vep_cache_version ])
- snpeff_info = Channel.of([ [ id:"${params.snpeff_genome}.${params.snpeff_db}" ], params.snpeff_genome, params.snpeff_db ])
- DOWNLOAD_CACHE_SNPEFF_VEP(ensemblvep_info, snpeff_info)
- snpeff_cache = DOWNLOAD_CACHE_SNPEFF_VEP.out.snpeff_cache
- vep_cache = DOWNLOAD_CACHE_SNPEFF_VEP.out.ensemblvep_cache.map{ meta, cache -> [ cache ] }
-
- versions = versions.mix(DOWNLOAD_CACHE_SNPEFF_VEP.out.versions)
- } else {
- // Looks for cache information either locally or on the cloud
- INITIALIZE_ANNOTATION_CACHE(
- (params.snpeff_cache && params.tools && (params.tools.split(',').contains("snpeff") || params.tools.split(',').contains('merge'))),
- params.snpeff_cache,
- params.snpeff_genome,
- params.snpeff_db,
- (params.vep_cache && params.tools && (params.tools.split(',').contains("vep") || params.tools.split(',').contains('merge'))),
- params.vep_cache,
- params.vep_species,
- params.vep_cache_version,
- params.vep_genome,
- "Please refer to https://nf-co.re/sarek/docs/usage/#how-to-customise-snpeff-and-vep-annotation for more information.")
-
- snpeff_cache = INITIALIZE_ANNOTATION_CACHE.out.snpeff_cache
- vep_cache = INITIALIZE_ANNOTATION_CACHE.out.ensemblvep_cache
- }
-
- // Build indices if needed
- PREPARE_GENOME(
- params.ascat_alleles,
- params.ascat_loci,
- params.ascat_loci_gc,
- params.ascat_loci_rt,
- bcftools_annotations,
- params.chr_dir,
- dbsnp,
- fasta,
- fasta_fai,
- germline_resource,
- known_indels,
- known_snps,
- pon)
-
- // Gather built indices or get them from the params
- // Built from the fasta file:
- dict = params.dict ? Channel.fromPath(params.dict).map{ it -> [ [id:'dict'], it ] }.collect()
- : PREPARE_GENOME.out.dict
- fasta_fai = params.fasta_fai ? Channel.fromPath(params.fasta_fai).first()
- : PREPARE_GENOME.out.fasta_fai
- bwa = params.bwa ? Channel.fromPath(params.bwa).collect()
- : PREPARE_GENOME.out.bwa
- bwamem2 = params.bwamem2 ? Channel.fromPath(params.bwamem2).collect()
- : PREPARE_GENOME.out.bwamem2
- dragmap = params.dragmap ? Channel.fromPath(params.dragmap).collect()
- : PREPARE_GENOME.out.hashtable
-
- // Gather index for mapping given the chosen aligner
- index_alignement = (params.aligner == "bwa-mem" || params.aligner == "sentieon-bwamem") ? bwa :
- params.aligner == "bwa-mem2" ? bwamem2 :
- dragmap
-
- // TODO: add a params for msisensorpro_scan
- msisensorpro_scan = PREPARE_GENOME.out.msisensorpro_scan
-
- // For ASCAT, extracted from zip or tar.gz files:
- allele_files = PREPARE_GENOME.out.allele_files
- chr_files = PREPARE_GENOME.out.chr_files
- gc_file = PREPARE_GENOME.out.gc_file
- loci_files = PREPARE_GENOME.out.loci_files
- rt_file = PREPARE_GENOME.out.rt_file
-
- // Tabix indexed vcf files:
- bcftools_annotations_tbi = params.bcftools_annotations ? params.bcftools_annotations_tbi ? Channel.fromPath(params.bcftools_annotations_tbi).collect() : PREPARE_GENOME.out.bcftools_annotations_tbi : Channel.empty([])
- dbsnp_tbi = params.dbsnp ? params.dbsnp_tbi ? Channel.fromPath(params.dbsnp_tbi).collect() : PREPARE_GENOME.out.dbsnp_tbi : Channel.value([])
- germline_resource_tbi = params.germline_resource ? params.germline_resource_tbi ? Channel.fromPath(params.germline_resource_tbi).collect() : PREPARE_GENOME.out.germline_resource_tbi : [] //do not change to Channel.value([]), the check for its existence then fails for Getpileupsumamries
- known_indels_tbi = params.known_indels ? params.known_indels_tbi ? Channel.fromPath(params.known_indels_tbi).collect() : PREPARE_GENOME.out.known_indels_tbi : Channel.value([])
- known_snps_tbi = params.known_snps ? params.known_snps_tbi ? Channel.fromPath(params.known_snps_tbi).collect() : PREPARE_GENOME.out.known_snps_tbi : Channel.value([])
- pon_tbi = params.pon ? params.pon_tbi ? Channel.fromPath(params.pon_tbi).collect() : PREPARE_GENOME.out.pon_tbi : Channel.value([])
-
- // known_sites is made by grouping both the dbsnp and the known snps/indels resources
- // Which can either or both be optional
- known_sites_indels = dbsnp.concat(known_indels).collect()
- known_sites_indels_tbi = dbsnp_tbi.concat(known_indels_tbi).collect()
-
- known_sites_snps = dbsnp.concat(known_snps).collect()
- known_sites_snps_tbi = dbsnp_tbi.concat(known_snps_tbi).collect()
-
- // Build intervals if needed
- PREPARE_INTERVALS(fasta_fai, params.intervals, params.no_intervals, params.nucleotides_per_second, params.outdir, params.step)
-
- // Intervals for speed up preprocessing/variant calling by spread/gather
- // [interval.bed] all intervals in one file
- intervals_bed_combined = params.no_intervals ? Channel.value([]) : PREPARE_INTERVALS.out.intervals_bed_combined
- intervals_bed_gz_tbi_combined = params.no_intervals ? Channel.value([]) : PREPARE_INTERVALS.out.intervals_bed_gz_tbi_combined
-
- // For QC during preprocessing, we don't need any intervals (MOSDEPTH doesn't take them for WGS)
- intervals_for_preprocessing = params.wes ?
- intervals_bed_combined.map{it -> [ [ id:it.baseName ], it ]}.collect() :
- Channel.value([ [ id:'null' ], [] ])
-
- intervals = PREPARE_INTERVALS.out.intervals_bed // [ interval, num_intervals ] multiple interval.bed files, divided by useful intervals for scatter/gather
- intervals_bed_gz_tbi = PREPARE_INTERVALS.out.intervals_bed_gz_tbi // [ interval_bed, tbi, num_intervals ] multiple interval.bed.gz/.tbi files, divided by useful intervals for scatter/gather
-
- intervals_and_num_intervals = intervals.map{ interval, num_intervals ->
- if ( num_intervals < 1 ) [ [], num_intervals ]
- else [ interval, num_intervals ]
- }
-
- intervals_bed_gz_tbi_and_num_intervals = intervals_bed_gz_tbi.map{ intervals, num_intervals ->
- if ( num_intervals < 1 ) [ [], [], num_intervals ]
- else [ intervals[0], intervals[1], num_intervals ]
- }
-
- if (params.tools && params.tools.split(',').contains('cnvkit')) {
- if (params.cnvkit_reference) {
- cnvkit_reference = Channel.fromPath(params.cnvkit_reference).collect()
- } else {
- PREPARE_REFERENCE_CNVKIT(fasta, intervals_bed_combined)
- cnvkit_reference = PREPARE_REFERENCE_CNVKIT.out.cnvkit_reference
-
- versions = versions.mix(PREPARE_REFERENCE_CNVKIT.out.versions)
- }
- } else {
- cnvkit_reference = Channel.value([])
- }
-
- // Gather used softwares versions
- versions = versions.mix(PREPARE_GENOME.out.versions)
- versions = versions.mix(PREPARE_INTERVALS.out.versions)
+ multiqc_report = Channel.empty()
+ reports = Channel.empty()
+ versions = Channel.empty()
// PREPROCESSING
@@ -903,12 +634,12 @@ workflow SAREK {
if (params.step == 'annotate') cram_variant_calling = Channel.empty()
- // RUN CRAM QC on the recalibrated CRAM files or when starting from step variant calling. NGSCheckmate should be run also on non-recalibrated CRAM files
- CRAM_SAMPLEQC(cram_variant_calling,
- ngscheckmate_bed,
- fasta,
- params.skip_tools && params.skip_tools.split(',').contains('baserecalibrator'),
- intervals_for_preprocessing)
+ // RUN CRAM QC on the recalibrated CRAM files or when starting from step variant calling. NGSCheckmate should be run also on non-recalibrated CRAM files
+ CRAM_SAMPLEQC(cram_variant_calling,
+ ngscheckmate_bed,
+ fasta,
+ params.skip_tools && params.skip_tools.split(',').contains('baserecalibrator'),
+ intervals_for_preprocessing)
if (params.tools) {
@@ -988,7 +719,7 @@ workflow SAREK {
intervals_and_num_intervals,
intervals_bed_combined, // [] if no_intervals, else interval_bed_combined.bed,
intervals_bed_gz_tbi_combined, // [] if no_intervals, else interval_bed_combined_gz, interval_bed_combined_gz_tbi
- PREPARE_INTERVALS.out.intervals_bed_combined, // no_intervals.bed if no intervals, else interval_bed_combined.bed; Channel operations possible
+ intervals_bed_combined_for_variant_calling, // no_intervals.bed if no intervals, else interval_bed_combined.bed; Channel operations possible
intervals_bed_gz_tbi_and_num_intervals,
known_indels_vqsr,
known_sites_indels,