-
Notifications
You must be signed in to change notification settings - Fork 102
Options Available
Here we attempt to list all available FALCON options, with defaults where applicable.
input_fofn = REQUIRED
# (string) filename for the file-of-filenames
# Each line is fasta filename.
# Any relative paths are relative to the location of the input_fofn.
genome_size = REQUIRED
# (integer) estimated number of base-pairs in haplotype
seed_coverage = 30
# (integer) requested coverage for auto-calculated cutoff
length_cutoff = 1
# (integer) minimum length of seed-reads used for pre-assembly stage
# If '-1', then auto-calculate the cutoff based on genome_size and seed_coverage.
length_cutoff_pr = 50
# (integer) minimum length of seed-reads used after pre-assembly, for the "overlap" stage
target = assembly
# (string) "assembly" or "preads"
# If "preads", then pre-assembly stage is skipped and input is assumed to be preads.
default_conconcurrent_jobs = 8
# (int) maximum concurrency
# This applies even to "local" (non-distributed) jobs.
# Use these to override the default concurrency for any section.
pa_concurrent_jobs = %(default_concurrent_jobs)
cns_concurrent_jobs = %(default_concurrent_jobs)
ovlp_concurrent_jobs = %(default_concurrent_jobs)
job_type = local
# (string) grid submission system, or "local"
# case-insensitive
# Supported types include: "sge", "lsf", "pbs", "torque", "slurm", "local"
job_queue = default
# (string) grid job-queue name
# Can be overridden with section-specific sge_option_*
# Grid job distribution options
# No matter what the job_type, we call these "sge_option_*".
# These are ignored for the "local" job_type.
# Typical values:
sge_option_da = -pe smp 4
sge_option_la = -pe smp 4
sge_option_cns = -pe smp 4
sge_option_pda = -pe smp 4
sge_option_pla = -pe smp 4
sge_option_fc = -pe smp 4
# da: daligner (stage-0)
# la: las-merging (stage-0)
# cns: consensus (stage-0)
# pda: daligner on preads (stage-1)
# pla: las-merging on preads (stage-1)
# fc: falcon (stage-2)
# sub-tool options
# For specific sub-tool options, try `--help`, search for docs here, or see
# https://dazzlerblog.wordpress.com/command-guides/
pa_DBdust_option = ...
# Passed to `DBdust`. Used only if `dust = true`.
pa_DBsplit_option = -x250 -s500 -a
# Passed to `DBsplit` during pre-assembly stage.
pa_HPCdaligner_option = -v -k16 -h35 -w7 -e.70 -l40 -s100 -M16
# Passed to `HPC.daligner` during pre-assembly stage.
# We will add `-H` based on "length_cutoff".
pa_dazcon_option = ...
# Passed to `dazcon`. Used only if `dazcon = true`.
falcon_sense_option = ...
# Passed to `fc_consensus`.
# Ignored if `dazcon = true`.
falcon_sense_skip_contained = false
# Causes '-s' to be passed to `LA4Falcon`. Rarely needed.
ovlp_DBsplit_option = -s50 -a
# Passed to `DBsplit` during overlap stage.
ovlp_HPCdaligner_option = -v -k15 -h60 -w6 -e.95 -l40 -s100 -M16
# Passed to `HPC.daligner` during overlap stage.
overlap_filtering_setting = ...
# Passed to `fc_ovlp_filter` during assembly stage.
fc_ovlp_to_graph_option = ...
# Passed to `fc_ovlp_to_graph`.
# Others
skip_checks = false
# (boolean string)
# If "true", then skip `LAcheck` during LAmerge/LAsort.
# (Actually, `LAcheck` is run, but failures are ignored.)
# When *daligner* bugs are finally fixed, this will be unnecessary.
dust = false
# (boolean string)
# If true, then run `DBdust` before pre-assembly.
dazcon = false
# (boolean string)
# If true, then use `dazcon` (from pbdagcon repo).
stop_all_jobs_on_failure = false
# (boolean string) DEPRECATED
# This was used for the old pypeFLOW refresh-loop, used by `run0.py`.
# (This is *not* the option to let jobs currently in SGE (etc) to keep running, which is still TODO.)
use_tmpdir = false
# (boolean string) whether to run each job in TMPDIR and copy results back to nfs
# If "true", use TMPDIR. (Actually, `tempfile.tmpdir`. See standard Python docs: https://docs.python.org/2/library/tempfile.html )
# If the value looks like a path, then it is used instead of TMPDIR.
Many sites have problems with filesystem latency. One way around that is to use "blocking" qsub calls, so the end-of-job is implied by the finishing of the system call, rather than by polling the filesystem.
Note that the sge_option strings are ignored when you use the "blocking" process-watcher. (And note that, for now, all jobs use the same NPROC, but we will address that soon if people are actually using blocking-mode successfully.) You should still adjust XX_concurrent_jobs as needed.
The exact call will differ for different job-distribution systems. For qsub, use -sync y
for blocking-mode:
[General]
use_tmpdir = true # still recommended
pwatcher_type = blocking
job_type = string
job_queue = qsub -S /bin/bash -sync y -V -q myqueue -N ${JOB_ID}
-o "${STDOUT_FILE}"
-e "${STDERR_FILE}"
-pe smp ${NPROC}
"${CMD}"
For pbs, replace -sync y
with -W block=true
, along with any other necessary changes.
For "blocking" mode in a local machine, use job_queue=bash -C ${CMD}
, and be sure that your machine has at least as many processors as your XX_concurrent_jobs
settings.