Merge pull request #58 from subwaystation/issue12

This fixes #12
nf-core · Mar 22, 2021 · 4e59b8f · 4e59b8f
2 parents 9229210 + 6ead1c4
commit 4e59b8f
Show file tree

Hide file tree

Showing 3 changed files with 180 additions and 57 deletions.
diff --git a/main.nf b/main.nf
@@ -18,9 +18,9 @@ if (params.help){
 }
 
 // We can't change global parameters inside this scope, so we build the ones we need locally
-def alignment_merge_cmd = params.alignment_merge_segments ? "-M" : params.alignment_merge_cmd
-def alignment_exclude_cmd = params.alignment_exclude_delim ? "-Y${params.alignment_exclude_delim}" : params.alignment_exclude_cmd
-def alignment_split_cmd = params.alignment_no_splits ? "-N" : params.alignment_split_cmd
+def alignment_merge_cmd = params.alignment_merge_segments ? "-M" : ""
+def alignment_exclude_cmd = params.alignment_exclude_delim ? "-Y${params.alignment_exclude_delim}" : "-X"
+def alignment_split_cmd = params.alignment_no_splits ? "-N" : ""
 def aligner = params.wfmash ? "W" : "E"
 def edyeet_align_pct_id_display = params.wfmash ? "" : "a${params.edyeet_align_pct_id}-"
 def smoothxg_poa_params_display = params.smoothxg_poa_params.replaceAll(/,/, "_")
@@ -403,7 +403,7 @@ def helpMessage() {
       --email [email]                 Set this parameter to your e-mail address to get a summary e-mail with details of the run sent to you when the workflow exits
       --email_on_fail [email]         Same as --email, except only send mail if the workflow is not successful
       --max_multiqc_email_size [str]  Threshold size for MultiQC report to be attached in notification email. If file generated by pipeline exceeds the threshold, it will not be attached (Default: 25MB)
-      -name [str]                     Name for the pipeline run. If not specified, Nextflow will automatically generate a random mnemonic
+      -name [str]                     Name for the pipeline run. If not specified, Nextflow will automatically generate a random mnemonic.
       --file_name_prefix [str]        Prefix for the output file names. If 'pggb', the file names will be very verbose and contain all parameters for each process. [default: --input]
 
     AWSBatch options:
@@ -415,10 +415,14 @@ def helpMessage() {
 
 // Has the run name been specified by the user?
 // this has the bonus effect of catching both -name and --name
+// TODO INVOKE THIS AGAIN ONCE IT IS CLEAR HOW TO ADD A NAME TO THE RUN 
+// TODO ERROR: You used a core Nextflow option with two hyphens: '--name'. Please resubmit with '-name'
+/*
 custom_runName = params.name
 if (!(workflow.runName ==~ /[a-z]+_[a-z]+/)) {
     custom_runName = workflow.runName
 }
+*/
 
 log.info Headers.nf_core(workflow, params.monochrome_logs)
 
@@ -468,8 +472,6 @@ if (params.genomes && params.genome && !params.genomes.containsKey(params.genome
 //   input:
 //   file fasta from ch_fasta
 //
-params.fasta = params.genome ? params.genomes[ params.genome ].fasta ?: false : false
-if (params.fasta) { ch_fasta = file(params.fasta, checkIfExists: true) }
 
 // Check AWS batch settings
 if (workflow.profile.contains('awsbatch')) {

diff --git a/nextflow.config b/nextflow.config
@@ -42,9 +42,6 @@ params {
   alignment_merge_segments = false
   alignment_no_splits = false
   alignment_exclude_delim = false
-  alignment_merge_cmd = ""
-  alignment_exclude_cmd = "-X"
-  alignment_split_cmd = ""
 
   // Seqwish options
   seqwish_min_match_length = 19
@@ -61,22 +58,18 @@ params {
   // poa param suggestions from minimap2
   // - asm5, --poa-params 1,19,39,3,81,1, ~0.1 divergence
   // - asm10, --poa-params 1,9,16,2,41,1, ~1 divergence
- // - asm20, --poa-params 1,4,6,2,26,1, ~5% divergence
+  // - asm20, --poa-params 1,4,6,2,26,1, ~5% divergence
   smoothxg_poa_params = "1,4,6,2,26,1"
 
   // Boilerplate options
-  genome = ""
-  name = false
   multiqc_config = false
   email = false
   email_on_fail = false
   max_multiqc_email_size = 25.MB
   plaintext_email = false
   monochrome_logs = false
   help = false
-  igenomes_base = 's3://ngi-igenomes/igenomes/'
   tracedir = "${params.outdir}/pipeline_info"
-  igenomes_ignore = false
 
   // Config options
   custom_config_version = 'master'
@@ -165,11 +158,6 @@ profiles {
   test_full { includeConfig 'conf/test_full.config' }
 }
 
-// Load igenomes.config if required
-if (!params.igenomes_ignore) {
-  includeConfig 'conf/igenomes.config'
-}
-
 // Export these variables to prevent local Python/R libraries from conflicting with those in the container
 env {
   PYTHONNOUSERSITE = 1

diff --git a/nextflow_schema.json b/nextflow_schema.json
@@ -17,14 +17,13 @@
                 "input": {
                     "type": "string",
                     "fa_icon": "fas fa-dna",
-                    "description": "Input FastQ files.",
-                    "help_text": "Use this to specify the location of your input FastQ files. For example:\n\n```bash\n--input 'path/to/data/sample_*_{1,2}.fastq'\n```\n\nPlease note the following requirements:\n\n1. The path must be enclosed in quotes\n2. The path must have at least one `*` wildcard character\n3. When using the pipeline with paired end data, the path must use `{1,2}` notation to specify read pairs.\n\nIf left unspecified, a default pattern is used: `data/*{1,2}.fastq.gz`"
+                    "description": "Input FASTA file.",
+                    "help_text": "Use this to specify the location of your input FASTA file. For example:\n\n```bash\n--input 'path/to/data/input.fa.gz'\n```\n\n."
                 },
-                "single_end": {
-                    "type": "boolean",
-                    "description": "Specifies that the input is single-end reads.",
-                    "fa_icon": "fas fa-align-center",
-                    "help_text": "By default, the pipeline expects paired-end data. If you have single-end data, you need to specify `--single_end` on the command line when you launch the pipeline. A normal glob pattern, enclosed in quotation marks, can then be used for `--input`. For example:\n\n```bash\n--single_end --input '*.fastq'\n```\n\nIt is not possible to run a mixture of single-end and paired-end files in one run."
+                "file_name_prefix": {
+                    "type": "string",
+                    "description": "Prefix for the output file names. If 'pggb', the file names will be very verbose and contain all parameters for each process.",
+                    "fa_icon": "fab fa-autoprefixer"
                 },
                 "outdir": {
                     "type": "string",
@@ -38,42 +37,173 @@
                     "fa_icon": "fas fa-envelope",
                     "help_text": "Set this parameter to your e-mail address to get a summary e-mail with details of the run sent to you when the workflow exits. If set in your user config file (`~/.nextflow/config`) then you don't need to specify this on the command line for every run.",
                     "pattern": "^([a-zA-Z0-9_\\-\\.]+)@([a-zA-Z0-9_\\-\\.]+)\\.([a-zA-Z]{2,5})$"
+                },
+                "do_stats": {
+                    "type": "boolean",
+                    "default": true,
+                    "hidden": true,
+                    "fa_icon": "fas fa-file-csv"
                 }
             }
         },
-        "reference_genome_options": {
-            "title": "Reference genome options",
+        "alignment_options": {
+            "title": "Alignment options",
             "type": "object",
             "fa_icon": "fas fa-dna",
-            "description": "Options for the reference genome indices used to align reads.",
+            "description": "Options for the all versus all alignment phase.",
             "properties": {
-                "genome": {
-                    "type": "string",
-                    "description": "Name of iGenomes reference.",
-                    "fa_icon": "fas fa-book",
-                    "help_text": "If using a reference genome configured in the pipeline using iGenomes, use this parameter to give the ID for the reference. This is then used to build the full paths for all required reference genome files e.g. `--genome GRCh38`.\n\nSee the [nf-core website docs](https://nf-co.re/usage/reference_genomes) for more details."
+                "wfmash": {
+                    "type": "boolean",
+                    "description": "Use wfmash instead of edyeet for the alignment phase.",
+                    "fa_icon": "fas fa-ban"
+                },
+                "edyeet_align_pct_id": {
+                    "type": "number",
+                    "default": 90,
+                    "description": "Percent identity in the edyeet edlib alignment step.",
+                    "fa_icon": "fas fa-percentage"
+                },
+                "alignment_map_pct_id": {
+                    "type": "number",
+                    "default": 90,
+                    "description": "Percent identity in the wfmash or edyeet mashmap.",
+                    "fa_icon": "fas fa-percentage"
+                },
+                "alignment_n_secondary": {
+                    "type": "integer",
+                    "default": 10,
+                    "description": "Number of secondary mappings to retain in 'map' filter mode.",
+                    "fa_icon": "fab fa-draft2digital"
+                },
+                "alignment_segment_length": {
+                    "type": "integer",
+                    "default": 10000,
+                    "description": "Segment length for mapping.",
+                    "fa_icon": "fab fa-draft2digital"
+                },
+                "alignment_block_length": {
+                    "type": "integer",
+                    "default": 30000,
+                    "description": "Minimum block length filter for mapping.",
+                    "fa_icon": "fab fa-draft2digital"
+                },
+                "alignment_mash_kmer": {
+                    "type": "integer",
+                    "default": 16,
+                    "description": "Kmer size for mashmap.",
+                    "fa_icon": "fab fa-draft2digital"
+                },
+                "alignment_merge_segments": {
+                    "type": "boolean",
+                    "description": "Merge successive mappings.",
+                    "fa_icon": "fas fa-ban"
+                },
+                "alignment_no_splits": {
+                    "type": "boolean",
+                    "description": "Disable splitting of input sequences during mapping.",
+                    "fa_icon": "fas fa-ban"
+                },
+                "alignment_exclude_delim": {
+                    "type": "boolean",
+                    "description": "Skip mappings between sequences with the same name prefix before the given delimiter character. [DEFAULT: all-vs-all and !self].",
+                    "fa_icon": "fas fa-ban"
+                }
+            }
+        },
+        "seqwish_options": {
+            "title": "Seqwish options",
+            "type": "object",
+            "description": "Options for the graph induction phase.",
+            "default": "",
+            "properties": {
+                "seqwish_min_match_length": {
+                    "type": "integer",
+                    "default": 19,
+                    "description": "Ignore exact matches below this length.",
+                    "fa_icon": "fab fa-draft2digital"
+                },
+                "seqwish_transclose_batch": {
+                    "type": "integer",
+                    "default": 1000000,
+                    "description": "Number of bp to use for transitive closure batch.",
+                    "fa_icon": "fab fa-draft2digital"
+                }
+            },
+            "fa_icon": "fas fa-dna"
+        },
+        "smoothxg_options": {
+            "title": "Smoothxg options",
+            "type": "object",
+            "description": "Options for graph smoothing phase.",
+            "default": "",
+            "properties": {
+                "smoothxg_max_block_weight": {
+                    "type": "integer",
+                    "default": 10000,
+                    "description": "Maximum seed sequence in block.",
+                    "fa_icon": "fab fa-draft2digital"
+                },
+                "smoothxg_max_path_jump": {
+                    "type": "integer",
+                    "default": 5000,
+                    "description": "Maximum path jump to include in block.",
+                    "fa_icon": "fab fa-draft2digital"
                 },
-                "fasta": {
+                "smoothxg_max_edge_jump": {
+                    "type": "integer",
+                    "default": 5000,
+                    "description": "Maximum edge jump before breaking.",
+                    "fa_icon": "fab fa-draft2digital"
+                },
+                "smoothxg_max_poa_length": {
+                    "type": "integer",
+                    "default": 10000,
+                    "description": "Maximum sequence length to put into POA.",
+                    "fa_icon": "fab fa-draft2digital"
+                },
+                "smoothxg_consensus_spec": {
                     "type": "string",
-                    "fa_icon": "fas fa-font",
-                    "description": "Path to FASTA genome file.",
-                    "help_text": "If you have no genome reference available, the pipeline can build one using a FASTA file. This requires additional time and resources, so it's better to use a pre-build index if possible."
+                    "default": "10,100,1000,10000",
+                    "description": "Consensus graph specification: write the consensus graph to BASENAME.cons_[spec].gfa; where each spec contains at least a min_len parameter (which defines the length of divergences from consensus paths to preserve in the output), optionally a file containing reference paths to preserve in the output, a flag (y/n) indicating whether we should also use the POA consensus paths, a minimum coverage of consensus paths to retain (min_cov), and a maximum allele length (max_len, defaults to 1e6); implies -a; example: cons,100,1000:refs1.txt:n,1000:refs2.txt:y:2.3:1000000,10000.",
+                    "fa_icon": "fab fa-superpowers"
                 },
-                "igenomes_base": {
+                "smoothxg_block_id_min": {
+                    "type": "number",
+                    "description": "Split blocks into groups connected by this identity threshold.",
+                    "fa_icon": "fas fa-percentage"
+                },
+                "smoothxg_ratio_contain": {
+                    "type": "number",
+                    "description": "Minimum short length / long length ratio to compare sequences for the containment metric in the clustering.",
+                    "fa_icon": "fas fa-percentage"
+                },
+                "smoothxg_poa_params": {
                     "type": "string",
-                    "description": "Directory / URL base for iGenomes references.",
-                    "default": "s3://ngi-igenomes/igenomes/",
-                    "fa_icon": "fas fa-cloud-download-alt",
-                    "hidden": true
+                    "default": "1,4,6,2,26,1",
+                    "description": "Score parameters for POA in the form of match,mismatch,gap1,ext1,gap2,ext2.",
+                    "fa_icon": "fab fa-superpowers"
+                }
+            },
+            "fa_icon": "fas fa-project-diagram"
+        },
+        "visualization_options": {
+            "title": "Visualization options",
+            "type": "object",
+            "description": "Do we want diagnostic visualizations of the built graphs?",
+            "default": "",
+            "properties": {
+                "do_viz": {
+                    "type": "boolean",
+                    "description": "Generate 1D visualisations of the built graphs.",
+                    "fa_icon": "fas fa-ban"
                 },
-                "igenomes_ignore": {
+                "do_layout": {
                     "type": "boolean",
-                    "description": "Do not load the iGenomes reference config.",
-                    "fa_icon": "fas fa-ban",
-                    "hidden": true,
-                    "help_text": "Do not load `igenomes.config` when running the pipeline. You may choose this option if you observe clashes between custom parameters and those supplied in `igenomes.config`."
+                    "description": "Generate 2D visualisations of the built graphs.",
+                    "fa_icon": "fas fa-ban"
                 }
-            }
+            },
+            "fa_icon": "fas fa-project-diagram"
         },
         "generic_options": {
             "title": "Generic options",
@@ -227,12 +357,6 @@
                     "hidden": true,
                     "fa_icon": "fas fa-users-cog"
                 },
-                "config_profile_name": {
-                    "type": "string",
-                    "description": "Institutional config name.",
-                    "hidden": true,
-                    "fa_icon": "fas fa-users-cog"
-                },
                 "config_profile_description": {
                     "type": "string",
                     "description": "Institutional config description.",
@@ -259,7 +383,16 @@
             "$ref": "#/definitions/input_output_options"
         },
         {
-            "$ref": "#/definitions/reference_genome_options"
+            "$ref": "#/definitions/alignment_options"
+        },
+        {
+            "$ref": "#/definitions/seqwish_options"
+        },
+        {
+            "$ref": "#/definitions/smoothxg_options"
+        },
+        {
+            "$ref": "#/definitions/visualization_options"
         },
         {
             "$ref": "#/definitions/generic_options"
@@ -271,4 +404,4 @@
             "$ref": "#/definitions/institutional_config_options"
         }
     ]
-}
+}