nf-core · ewels · Feb 16, 2021 · Feb 12, 2021 · Feb 12, 2021 · Feb 12, 2021
diff --git a/.gitignore b/.gitignore
@@ -21,7 +21,6 @@ dist/
 downloads/
 eggs/
 .eggs/
-lib/
 lib64/
 parts/
 sdist/

diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -4,8 +4,17 @@
 
 ### Template
 
+* **Major new feature** - Validation of pipeline parameters [[#426]](https://github.com/nf-core/tools/issues/426)
+  * The addition runs as soon as the pipeline launches and checks the pipeline input parameters two main things:
+    * No parameters are supplied that share a name with core Nextflow options (eg. `--resume` instead of `-resume`)
+    * Supplied parameters validate against the pipeline JSON schema (eg. correct variable types, required values)
+  * If either parameter validation fails or the pipeline has errors, a warning is given about any unexpected parameters found which are not described in the pipeline schema.
+  * This behaviour can be disabled by using `--validate_params false`
 * Added profiles to support the [Charliecloud](https://hpc.github.io/charliecloud/) and [Shifter](https://nersc.gitlab.io/development/shifter/how-to-use/) container engines [[#824](https://github.com/nf-core/tools/issues/824)]
 * Fixed typo in nf-core-lint CI that prevented the markdown summary from being automatically posted on PRs as a comment.
+* Changed default for `--input` from `data/*{1,2}.fastq.gz` to `null`, as this is now validated by the schema as a required value.
+* Removed support for `--name` parameter for custom run names.
+  * The same functionality for MultiQC still exists with the core Nextflow `-name` option.
 * Added to template docs about how to identify process name for resource customisation
 
 ### Modules

diff --git a/nf_core/pipeline-template/{{cookiecutter.name_noslash}}/lib/NfcoreSchema.groovy b/nf_core/pipeline-template/{{cookiecutter.name_noslash}}/lib/NfcoreSchema.groovy
@@ -0,0 +1,208 @@
+/*
+ * This file holds several functions used to perform JSON parameter validation, help and summary rendering for the nf-core pipeline template.
+ */
+
+import org.everit.json.schema.Schema
+import org.everit.json.schema.loader.SchemaLoader
+import org.everit.json.schema.ValidationException
+import org.json.JSONObject
+import org.json.JSONTokener
+import org.json.JSONArray
+import groovy.json.JsonSlurper
+import groovy.json.JsonBuilder
+
+class NfcoreSchema {
+    /*
+    * Function to loop over all parameters defined in schema and check
+    * whether the given paremeters adhere to the specificiations
+    */
+    /* groovylint-disable-next-line UnusedPrivateMethodParameter */
+    private static ArrayList validateParameters(params, jsonSchema, log) {
+        def has_error = false
+        //=====================================================================//
+        // Check for nextflow core params and unexpected params
+        def json = new File(jsonSchema).text
+        def Map schemaParams = (Map) new JsonSlurper().parseText(json).get('definitions')
+        def specifiedParamKeys = params.keySet()
+        def nf_params = [
+            // Options for base `nextflow` command
+            'bg',
+            'c',
+            'C',
+            'config',
+            'd',
+            'D',
+            'dockerize',
+            'h',
+            'log',
+            'q',
+            'quiet',
+            'syslog',
+            'v',
+            'version',
+
+            // Options for `nextflow run` command
+            'ansi',
+            'ansi-log',
+            'bg',
+            'bucket-dir',
+            'c',
+            'cache',
+            'config',
+            'dsl2',
+            'dump-channels',
+            'dump-hashes',
+            'E',
+            'entry',
+            'latest',
+            'lib',
+            'main-script',
+            'N',
+            'name',
+            'offline',
+            'params-file',
+            'pi',
+            'plugins',
+            'poll-interval',
+            'pool-size',
+            'profile',
+            'ps',
+            'qs',
+            'queue-size',
+            'r',
+            'resume',
+            'revision',
+            'stdin',
+            'stub',
+            'stub-run',
+            'test',
+            'w',
+            'with-charliecloud',
+            'with-conda',
+            'with-dag',
+            'with-docker',
+            'with-mpi',
+            'with-notification',
+            'with-podman',
+            'with-report',
+            'with-singularity',
+            'with-timeline',
+            'with-tower',
+            'with-trace',
+            'with-weblog',
+            'without-docker',
+            'without-podman',
+            'work-dir'
+        ]
+        def unexpectedParams = []
+
+        // Collect expected parameters from the schema
+        def expectedParams = []
+        for (group in schemaParams) {
+            for (p in group.value['properties']) {
+                expectedParams.push(p.key)
+            }
+        }
+
+        for (specifiedParam in specifiedParamKeys) {
+            // nextflow params
+            if (nf_params.contains(specifiedParam)) {
+                log.error "ERROR: You used a core Nextflow option with two hyphens: '--${specifiedParam}'. Please resubmit with '-${specifiedParam}'"
+                has_error = true
+            }
+            // unexpected params
+            def params_ignore = params.schema_ignore_params.split(',') + 'schema_ignore_params'
+            if (!expectedParams.contains(specifiedParam) && !params_ignore.contains(specifiedParam)) {
+                unexpectedParams.push(specifiedParam)
+            }
+        }
+
+        //=====================================================================//
+        // Validate parameters against the schema
+        InputStream inputStream = new File(jsonSchema).newInputStream()
+        JSONObject rawSchema = new JSONObject(new JSONTokener(inputStream))
+        Schema schema = SchemaLoader.load(rawSchema)
+
+        // Clean the parameters
+        def cleanedParams = cleanParameters(params)
+
+        // Convert to JSONObject
+        def jsonParams = new JsonBuilder(cleanedParams)
+        JSONObject paramsJSON = new JSONObject(jsonParams.toString())
+
+        // Validate
+        try {
+            schema.validate(paramsJSON)
+        } catch (ValidationException e) {
+            println ""
+            log.error 'ERROR: Validation of pipeline parameters failed!'
+            JSONObject exceptionJSON = e.toJSON()
+            printExceptions(exceptionJSON, paramsJSON, log)
+            if (unexpectedParams.size() > 0){
+                println ""
+                def warn_msg = 'Found unexpected parameters:'
+                for (unexpectedParam in unexpectedParams){
+                    warn_msg = warn_msg + "\n* --${unexpectedParam}: ${paramsJSON[unexpectedParam].toString()}"
+                }
+                log.warn warn_msg
+            }
+            println ""
+            has_error = true
+        }
+
+        if(has_error){
+            System.exit(1)
+        }
+
+        return unexpectedParams
+    }
+
+    // Loop over nested exceptions and print the causingException
+    private static void printExceptions(exJSON, paramsJSON, log) {
+        def causingExceptions = exJSON['causingExceptions']
+        if (causingExceptions.length() == 0) {
+            def m = exJSON['message'] =~ /required key \[([^\]]+)\] not found/
+            // Missing required param
+            if(m.matches()){
+                log.error "* Missing required parameter: --${m[0][1]}"
+            }
+            // Other base-level error
+            else if(exJSON['pointerToViolation'] == '#'){
+                log.error "* ${exJSON['message']}"
+            }
+            // Error with specific param
+            else {
+                def param = exJSON['pointerToViolation'] - ~/^#\//
+                def param_val = paramsJSON[param].toString()
+                log.error "* --${param}: ${exJSON['message']} (${param_val})"
+            }
+        }
+        for (ex in causingExceptions) {
+            printExceptions(ex, paramsJSON, log)
+        }
+    }
+
+    private static Map cleanParameters(params) {
+        def new_params = params.getClass().newInstance(params)
+        for (p in params) {
+            // remove anything evaluating to false
+            if (!p['value']) {
+                new_params.remove(p.key)
+            }
+            // Cast MemoryUnit to String
+            if (p['value'].getClass() == nextflow.util.MemoryUnit) {
+                new_params.replace(p.key, p['value'].toString())
+            }
+            // Cast Duration to String
+            if (p['value'].getClass() == nextflow.util.Duration) {
+                new_params.replace(p.key, p['value'].toString())
+            }
+            // Cast LinkedHashMap to String
+            if (p['value'].getClass() == LinkedHashMap) {
+                new_params.replace(p.key, p['value'].toString())
+            }
+        }
+        return new_params
+    }
+
+}
diff --git a/nf_core/pipeline-template/{{cookiecutter.name_noslash}}/lib/nfcore_external_java_deps.jar b/nf_core/pipeline-template/{{cookiecutter.name_noslash}}/lib/nfcore_external_java_deps.jar
diff --git a/nf_core/pipeline-template/{{cookiecutter.name_noslash}}/main.nf b/nf_core/pipeline-template/{{cookiecutter.name_noslash}}/main.nf
@@ -9,9 +9,10 @@
 ----------------------------------------------------------------------------------------
 */
 
+log.info nfcoreHeader()
+
 def helpMessage() {
     // TODO nf-core: Add to this help message with new command line parameters
-    log.info nfcoreHeader()
     log.info"""
 
     Usage:
@@ -53,6 +54,16 @@ if (params.help) {
     exit 0
 }
 
+////////////////////////////////////////////////////
+/* --         VALIDATE PARAMETERS              -- */
+////////////////////////////////////////////////////+
+def json_schema = "$baseDir/nextflow_schema.json"
+def unexpectedParams = []
+if (params.validate_params) {
+    unexpectedParams = NfcoreSchema.validateParameters(params, json_schema, log)
+}
+////////////////////////////////////////////////////
+
 /*
  * SET UP CONFIGURATION VARIABLES
  */
@@ -73,13 +84,6 @@ if (params.genomes && params.genome && !params.genomes.containsKey(params.genome
 params.fasta = params.genome ? params.genomes[ params.genome ].fasta ?: false : false
 if (params.fasta) { ch_fasta = file(params.fasta, checkIfExists: true) }
 
-// Has the run name been specified by the user?
-// this has the bonus effect of catching both -name and --name
-custom_runName = params.name
-if (!(workflow.runName ==~ /[a-z]+_[a-z]+/)) {
-    custom_runName = workflow.runName
-}
-
 // Check AWS batch settings
 if (workflow.profile.contains('awsbatch')) {
     // AWSBatch sanity checking
@@ -122,10 +126,9 @@ if (params.input_paths) {
 }
 
 // Header log info
-log.info nfcoreHeader()
 def summary = [:]
 if (workflow.revision) summary['Pipeline Release'] = workflow.revision
-summary['Run Name']         = custom_runName ?: workflow.runName
+summary['Run Name']         = workflow.runName
 // TODO nf-core: Report custom parameters here
 summary['Input']            = params.input
 summary['Fasta Ref']        = params.fasta
@@ -242,8 +245,12 @@ process multiqc {
     file "multiqc_plots"
 
     script:
-    rtitle = custom_runName ? "--title \"$custom_runName\"" : ''
-    rfilename = custom_runName ? "--filename " + custom_runName.replaceAll('\\W','_').replaceAll('_+','_') + "_multiqc_report" : ''
+    rtitle = ''
+    rfilename = ''
+    if (!(workflow.runName ==~ /[a-z]+_[a-z]+/)) {
+        rtitle = "--title \"${workflow.runName}\""
+        rfilename = "--filename " + workflow.runName.replaceAll('\\W','_').replaceAll('_+','_') + "_multiqc_report"
+    }
     custom_config_file = params.multiqc_config ? "--config $mqc_custom_config" : ''
     // TODO nf-core: Specify which MultiQC modules to use with -m for a faster run time
     """
@@ -282,7 +289,7 @@ workflow.onComplete {
     }
     def email_fields = [:]
     email_fields['version'] = workflow.manifest.version
-    email_fields['runName'] = custom_runName ?: workflow.runName
+    email_fields['runName'] = workflow.runName
     email_fields['success'] = workflow.success
     email_fields['dateComplete'] = workflow.complete
     email_fields['duration'] = workflow.duration
@@ -389,6 +396,12 @@ workflow.onComplete {
 
 }
 
+workflow.onError {
+    // Print unexpected parameters
+    for (p in unexpectedParams) {
+        log.warn "Unexpected parameter: ${p}"
+    }
+}
 
 def nfcoreHeader() {
     // Log colors ANSI codes

diff --git a/nf_core/pipeline-template/{{cookiecutter.name_noslash}}/nextflow.config b/nf_core/pipeline-template/{{cookiecutter.name_noslash}}/nextflow.config
@@ -11,13 +11,12 @@ params {
   // Workflow flags
   // TODO nf-core: Specify your pipeline's command line flags
   genome = false
-  input = "data/*{1,2}.fastq.gz"
+  input = null
   single_end = false
   outdir = './results'
   publish_dir_mode = 'copy'
 
   // Boilerplate options
-  name = false
   multiqc_config = false
   email = false
   email_on_fail = false
@@ -34,6 +33,8 @@ params {
   config_profile_description = false
   config_profile_contact = false
   config_profile_url = false
+  validate_params = true
+  schema_ignore_params = 'genomes'
 
   // Defaults only, expecting to be overwritten
   max_memory = 128.GB

diff --git a/nf_core/pipeline-template/{{cookiecutter.name_noslash}}/nextflow_schema.json b/nf_core/pipeline-template/{{cookiecutter.name_noslash}}/nextflow_schema.json
@@ -104,6 +104,11 @@
                         "move"
                     ]
                 },
+                "validate_params": {
+                    "type": "boolean",
+                    "description": "Boolean whether to validate parameters against the schema at runtime",
+                    "default": true
+                },
                 "name": {
                     "type": "string",
                     "description": "Workflow name.",
@@ -256,4 +261,4 @@
             "$ref": "#/definitions/institutional_config_options"
         }
     ]
-}
+}
diff --git a/nf_core/schema.py b/nf_core/schema.py
@@ -459,9 +459,11 @@ def add_schema_found_configs(self):
         Add anything that's found in the Nextflow params that's missing in the pipeline schema
         """
         params_added = []
+        params_ignore = self.pipeline_params.get("schema_ignore_params", "").strip("\"'").split(",")
+        params_ignore.append("schema_ignore_params")
         for p_key, p_val in self.pipeline_params.items():
             # Check if key is in schema parameters
-            if not p_key in self.schema_params:
+            if p_key not in self.schema_params and p_key not in params_ignore:
                 if (
                     self.no_prompts
                     or self.schema_from_scratch
-Original file line number
+Diff line change
@@ Expand Up / @@ -21,7 +21,6 @@ dist/ @@
     downloads/
     eggs/
     .eggs/
-    lib/
     lib64/
     parts/
     sdist/
@@ Expand Down @@