Skip to content

Commit

Permalink
Merge pull request #58 from subwaystation/issue12
Browse files Browse the repository at this point in the history
This fixes #12
  • Loading branch information
Zethson authored Mar 22, 2021
2 parents 9229210 + 6ead1c4 commit 4e59b8f
Show file tree
Hide file tree
Showing 3 changed files with 180 additions and 57 deletions.
14 changes: 8 additions & 6 deletions main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,9 @@ if (params.help){
}

// We can't change global parameters inside this scope, so we build the ones we need locally
def alignment_merge_cmd = params.alignment_merge_segments ? "-M" : params.alignment_merge_cmd
def alignment_exclude_cmd = params.alignment_exclude_delim ? "-Y${params.alignment_exclude_delim}" : params.alignment_exclude_cmd
def alignment_split_cmd = params.alignment_no_splits ? "-N" : params.alignment_split_cmd
def alignment_merge_cmd = params.alignment_merge_segments ? "-M" : ""
def alignment_exclude_cmd = params.alignment_exclude_delim ? "-Y${params.alignment_exclude_delim}" : "-X"
def alignment_split_cmd = params.alignment_no_splits ? "-N" : ""
def aligner = params.wfmash ? "W" : "E"
def edyeet_align_pct_id_display = params.wfmash ? "" : "a${params.edyeet_align_pct_id}-"
def smoothxg_poa_params_display = params.smoothxg_poa_params.replaceAll(/,/, "_")
Expand Down Expand Up @@ -403,7 +403,7 @@ def helpMessage() {
--email [email] Set this parameter to your e-mail address to get a summary e-mail with details of the run sent to you when the workflow exits
--email_on_fail [email] Same as --email, except only send mail if the workflow is not successful
--max_multiqc_email_size [str] Threshold size for MultiQC report to be attached in notification email. If file generated by pipeline exceeds the threshold, it will not be attached (Default: 25MB)
-name [str] Name for the pipeline run. If not specified, Nextflow will automatically generate a random mnemonic
-name [str] Name for the pipeline run. If not specified, Nextflow will automatically generate a random mnemonic.
--file_name_prefix [str] Prefix for the output file names. If 'pggb', the file names will be very verbose and contain all parameters for each process. [default: --input]
AWSBatch options:
Expand All @@ -415,10 +415,14 @@ def helpMessage() {

// Has the run name been specified by the user?
// this has the bonus effect of catching both -name and --name
// TODO INVOKE THIS AGAIN ONCE IT IS CLEAR HOW TO ADD A NAME TO THE RUN
// TODO ERROR: You used a core Nextflow option with two hyphens: '--name'. Please resubmit with '-name'
/*
custom_runName = params.name
if (!(workflow.runName ==~ /[a-z]+_[a-z]+/)) {
custom_runName = workflow.runName
}
*/

log.info Headers.nf_core(workflow, params.monochrome_logs)

Expand Down Expand Up @@ -468,8 +472,6 @@ if (params.genomes && params.genome && !params.genomes.containsKey(params.genome
// input:
// file fasta from ch_fasta
//
params.fasta = params.genome ? params.genomes[ params.genome ].fasta ?: false : false
if (params.fasta) { ch_fasta = file(params.fasta, checkIfExists: true) }

// Check AWS batch settings
if (workflow.profile.contains('awsbatch')) {
Expand Down
14 changes: 1 addition & 13 deletions nextflow.config
Original file line number Diff line number Diff line change
Expand Up @@ -42,9 +42,6 @@ params {
alignment_merge_segments = false
alignment_no_splits = false
alignment_exclude_delim = false
alignment_merge_cmd = ""
alignment_exclude_cmd = "-X"
alignment_split_cmd = ""

// Seqwish options
seqwish_min_match_length = 19
Expand All @@ -61,22 +58,18 @@ params {
// poa param suggestions from minimap2
// - asm5, --poa-params 1,19,39,3,81,1, ~0.1 divergence
// - asm10, --poa-params 1,9,16,2,41,1, ~1 divergence
// - asm20, --poa-params 1,4,6,2,26,1, ~5% divergence
// - asm20, --poa-params 1,4,6,2,26,1, ~5% divergence
smoothxg_poa_params = "1,4,6,2,26,1"

// Boilerplate options
genome = ""
name = false
multiqc_config = false
email = false
email_on_fail = false
max_multiqc_email_size = 25.MB
plaintext_email = false
monochrome_logs = false
help = false
igenomes_base = 's3://ngi-igenomes/igenomes/'
tracedir = "${params.outdir}/pipeline_info"
igenomes_ignore = false

// Config options
custom_config_version = 'master'
Expand Down Expand Up @@ -165,11 +158,6 @@ profiles {
test_full { includeConfig 'conf/test_full.config' }
}

// Load igenomes.config if required
if (!params.igenomes_ignore) {
includeConfig 'conf/igenomes.config'
}

// Export these variables to prevent local Python/R libraries from conflicting with those in the container
env {
PYTHONNOUSERSITE = 1
Expand Down
209 changes: 171 additions & 38 deletions nextflow_schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -17,14 +17,13 @@
"input": {
"type": "string",
"fa_icon": "fas fa-dna",
"description": "Input FastQ files.",
"help_text": "Use this to specify the location of your input FastQ files. For example:\n\n```bash\n--input 'path/to/data/sample_*_{1,2}.fastq'\n```\n\nPlease note the following requirements:\n\n1. The path must be enclosed in quotes\n2. The path must have at least one `*` wildcard character\n3. When using the pipeline with paired end data, the path must use `{1,2}` notation to specify read pairs.\n\nIf left unspecified, a default pattern is used: `data/*{1,2}.fastq.gz`"
"description": "Input FASTA file.",
"help_text": "Use this to specify the location of your input FASTA file. For example:\n\n```bash\n--input 'path/to/data/input.fa.gz'\n```\n\n."
},
"single_end": {
"type": "boolean",
"description": "Specifies that the input is single-end reads.",
"fa_icon": "fas fa-align-center",
"help_text": "By default, the pipeline expects paired-end data. If you have single-end data, you need to specify `--single_end` on the command line when you launch the pipeline. A normal glob pattern, enclosed in quotation marks, can then be used for `--input`. For example:\n\n```bash\n--single_end --input '*.fastq'\n```\n\nIt is not possible to run a mixture of single-end and paired-end files in one run."
"file_name_prefix": {
"type": "string",
"description": "Prefix for the output file names. If 'pggb', the file names will be very verbose and contain all parameters for each process.",
"fa_icon": "fab fa-autoprefixer"
},
"outdir": {
"type": "string",
Expand All @@ -38,42 +37,173 @@
"fa_icon": "fas fa-envelope",
"help_text": "Set this parameter to your e-mail address to get a summary e-mail with details of the run sent to you when the workflow exits. If set in your user config file (`~/.nextflow/config`) then you don't need to specify this on the command line for every run.",
"pattern": "^([a-zA-Z0-9_\\-\\.]+)@([a-zA-Z0-9_\\-\\.]+)\\.([a-zA-Z]{2,5})$"
},
"do_stats": {
"type": "boolean",
"default": true,
"hidden": true,
"fa_icon": "fas fa-file-csv"
}
}
},
"reference_genome_options": {
"title": "Reference genome options",
"alignment_options": {
"title": "Alignment options",
"type": "object",
"fa_icon": "fas fa-dna",
"description": "Options for the reference genome indices used to align reads.",
"description": "Options for the all versus all alignment phase.",
"properties": {
"genome": {
"type": "string",
"description": "Name of iGenomes reference.",
"fa_icon": "fas fa-book",
"help_text": "If using a reference genome configured in the pipeline using iGenomes, use this parameter to give the ID for the reference. This is then used to build the full paths for all required reference genome files e.g. `--genome GRCh38`.\n\nSee the [nf-core website docs](https://nf-co.re/usage/reference_genomes) for more details."
"wfmash": {
"type": "boolean",
"description": "Use wfmash instead of edyeet for the alignment phase.",
"fa_icon": "fas fa-ban"
},
"edyeet_align_pct_id": {
"type": "number",
"default": 90,
"description": "Percent identity in the edyeet edlib alignment step.",
"fa_icon": "fas fa-percentage"
},
"alignment_map_pct_id": {
"type": "number",
"default": 90,
"description": "Percent identity in the wfmash or edyeet mashmap.",
"fa_icon": "fas fa-percentage"
},
"alignment_n_secondary": {
"type": "integer",
"default": 10,
"description": "Number of secondary mappings to retain in 'map' filter mode.",
"fa_icon": "fab fa-draft2digital"
},
"alignment_segment_length": {
"type": "integer",
"default": 10000,
"description": "Segment length for mapping.",
"fa_icon": "fab fa-draft2digital"
},
"alignment_block_length": {
"type": "integer",
"default": 30000,
"description": "Minimum block length filter for mapping.",
"fa_icon": "fab fa-draft2digital"
},
"alignment_mash_kmer": {
"type": "integer",
"default": 16,
"description": "Kmer size for mashmap.",
"fa_icon": "fab fa-draft2digital"
},
"alignment_merge_segments": {
"type": "boolean",
"description": "Merge successive mappings.",
"fa_icon": "fas fa-ban"
},
"alignment_no_splits": {
"type": "boolean",
"description": "Disable splitting of input sequences during mapping.",
"fa_icon": "fas fa-ban"
},
"alignment_exclude_delim": {
"type": "boolean",
"description": "Skip mappings between sequences with the same name prefix before the given delimiter character. [DEFAULT: all-vs-all and !self].",
"fa_icon": "fas fa-ban"
}
}
},
"seqwish_options": {
"title": "Seqwish options",
"type": "object",
"description": "Options for the graph induction phase.",
"default": "",
"properties": {
"seqwish_min_match_length": {
"type": "integer",
"default": 19,
"description": "Ignore exact matches below this length.",
"fa_icon": "fab fa-draft2digital"
},
"seqwish_transclose_batch": {
"type": "integer",
"default": 1000000,
"description": "Number of bp to use for transitive closure batch.",
"fa_icon": "fab fa-draft2digital"
}
},
"fa_icon": "fas fa-dna"
},
"smoothxg_options": {
"title": "Smoothxg options",
"type": "object",
"description": "Options for graph smoothing phase.",
"default": "",
"properties": {
"smoothxg_max_block_weight": {
"type": "integer",
"default": 10000,
"description": "Maximum seed sequence in block.",
"fa_icon": "fab fa-draft2digital"
},
"smoothxg_max_path_jump": {
"type": "integer",
"default": 5000,
"description": "Maximum path jump to include in block.",
"fa_icon": "fab fa-draft2digital"
},
"fasta": {
"smoothxg_max_edge_jump": {
"type": "integer",
"default": 5000,
"description": "Maximum edge jump before breaking.",
"fa_icon": "fab fa-draft2digital"
},
"smoothxg_max_poa_length": {
"type": "integer",
"default": 10000,
"description": "Maximum sequence length to put into POA.",
"fa_icon": "fab fa-draft2digital"
},
"smoothxg_consensus_spec": {
"type": "string",
"fa_icon": "fas fa-font",
"description": "Path to FASTA genome file.",
"help_text": "If you have no genome reference available, the pipeline can build one using a FASTA file. This requires additional time and resources, so it's better to use a pre-build index if possible."
"default": "10,100,1000,10000",
"description": "Consensus graph specification: write the consensus graph to BASENAME.cons_[spec].gfa; where each spec contains at least a min_len parameter (which defines the length of divergences from consensus paths to preserve in the output), optionally a file containing reference paths to preserve in the output, a flag (y/n) indicating whether we should also use the POA consensus paths, a minimum coverage of consensus paths to retain (min_cov), and a maximum allele length (max_len, defaults to 1e6); implies -a; example: cons,100,1000:refs1.txt:n,1000:refs2.txt:y:2.3:1000000,10000.",
"fa_icon": "fab fa-superpowers"
},
"igenomes_base": {
"smoothxg_block_id_min": {
"type": "number",
"description": "Split blocks into groups connected by this identity threshold.",
"fa_icon": "fas fa-percentage"
},
"smoothxg_ratio_contain": {
"type": "number",
"description": "Minimum short length / long length ratio to compare sequences for the containment metric in the clustering.",
"fa_icon": "fas fa-percentage"
},
"smoothxg_poa_params": {
"type": "string",
"description": "Directory / URL base for iGenomes references.",
"default": "s3://ngi-igenomes/igenomes/",
"fa_icon": "fas fa-cloud-download-alt",
"hidden": true
"default": "1,4,6,2,26,1",
"description": "Score parameters for POA in the form of match,mismatch,gap1,ext1,gap2,ext2.",
"fa_icon": "fab fa-superpowers"
}
},
"fa_icon": "fas fa-project-diagram"
},
"visualization_options": {
"title": "Visualization options",
"type": "object",
"description": "Do we want diagnostic visualizations of the built graphs?",
"default": "",
"properties": {
"do_viz": {
"type": "boolean",
"description": "Generate 1D visualisations of the built graphs.",
"fa_icon": "fas fa-ban"
},
"igenomes_ignore": {
"do_layout": {
"type": "boolean",
"description": "Do not load the iGenomes reference config.",
"fa_icon": "fas fa-ban",
"hidden": true,
"help_text": "Do not load `igenomes.config` when running the pipeline. You may choose this option if you observe clashes between custom parameters and those supplied in `igenomes.config`."
"description": "Generate 2D visualisations of the built graphs.",
"fa_icon": "fas fa-ban"
}
}
},
"fa_icon": "fas fa-project-diagram"
},
"generic_options": {
"title": "Generic options",
Expand Down Expand Up @@ -227,12 +357,6 @@
"hidden": true,
"fa_icon": "fas fa-users-cog"
},
"config_profile_name": {
"type": "string",
"description": "Institutional config name.",
"hidden": true,
"fa_icon": "fas fa-users-cog"
},
"config_profile_description": {
"type": "string",
"description": "Institutional config description.",
Expand All @@ -259,7 +383,16 @@
"$ref": "#/definitions/input_output_options"
},
{
"$ref": "#/definitions/reference_genome_options"
"$ref": "#/definitions/alignment_options"
},
{
"$ref": "#/definitions/seqwish_options"
},
{
"$ref": "#/definitions/smoothxg_options"
},
{
"$ref": "#/definitions/visualization_options"
},
{
"$ref": "#/definitions/generic_options"
Expand All @@ -271,4 +404,4 @@
"$ref": "#/definitions/institutional_config_options"
}
]
}
}

0 comments on commit 4e59b8f

Please sign in to comment.