-
Notifications
You must be signed in to change notification settings - Fork 7
/
Copy pathsnakemake.yml
124 lines (103 loc) · 4.65 KB
/
snakemake.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
# You may provide default values for environment variable. They will only be
# used if the variable is undefined in the current environment.
#default_env:
# TMPDIR: /home/user/tmp
# You may also override values in the environment.
#override_env:
# LANG: C
# --- Environment variables substitution ---
#
# Environment variables may be referenced in the config via `$VARNAME` or
# `${VARNAME}`; to get a literal dollar sign use `$$`. Values in `deafult_env`
# `override_env` are expanded with the unmodified environment.
#
# Beware, if using environment variables in `dentist_config` the data type
# must be correct, e.g. you cannot pass `read-coverage` be environment
# variable because it will be a string but a float is expected. Still, if you
# specify values via `env` in the config then they will keep their data type,
# so the following will work:
#
# override_env:
# COV: 30.0
# dentist_config:
# __default__:
# read-coverage: $COV
# Uncomment the follow workflow flags to alter the workflow
# Add the validation report and weak coverage mask to the list of default
# targets so they will be generated wether or not they are required to build
# the main target (gap-closed assembly)
#full_validation: true
# By default the closed gaps will be validated and only valid gaps will be
# included into the final gap-closed assembly. Give this flag to get the
# raw gap-closed assembly as final output.
#no_purge_output: true
# The workflow can be easily run inside a container (--use-singularity) to
# reduce the software installations. The default container is
# "docker://aludi/dentist:stable" and can be changed by the following line.
#dentist_container: "docker://aludi/dentist:edge"
# The workflow can be easily run inside using conda (--use-conda) to
# reduce manual software installations. The default Conda environment
# definition "envs/dentist_v2.yml" and can be changed by the following line.
#dentist_env: "envs/dentist_local.yml"
# Config file for dentist. Use this file to adjust parameters of DENTIST.
# You must set at least either `ploidy` and `read-coverage` or
# `max-coverage-reads` and `min-coverage-reads`.
dentist_config: dentist.yml
inputs:
# The reference assembly where gaps should be closed
reference: reference.fasta
# The set of long reads used for gap closing
reads: reads.fasta
# Type of reads. Use `PACBIO_SMRT` or `OXFORD_NANOPORE`. See README for
# more details on the subject.
reads_type: PACBIO_SMRT
outputs:
# The gap-closed reference assembly
output_assembly: gap-closed.fasta
# JSON file with a validation report for each closed gap
#validation_report: $workdir/validation-report.json
reference_dbsplit:
- -x1000
- -a
# Increase this value to reduce the workload of the cluster scheduler;
# this increases the runtime and memory usage per alignment job
- -s200
reads_dbsplit:
- -x1000
- -a
# Increase this value to reduce the workload of the cluster scheduler;
# this increases the runtime and memory usage per alignment job
- -s200
# Directories for intermediate and log files.
workdir: workdir
logdir: logs
# Workflow files that are created inside of workdir; uncomment and adjust if
# you feel the need
#workflow:
# self_mask: dentist-self
# reads_mask: dentist-reads
# pile_ups: pile-ups.db
# insertions_batch: insertions/batch.{batch_id}.db
# dentist_merge_config_file: dentist.merge.json
# insertions: insertions.db
# closed_gaps_mask: closed-gaps
# weak_coverage_mask: dentist-weak-coverage
# Maximum number of threads per process if enough are available. Do not use
# too many threads (say <=32) as the speedup as usually sublinear in the
# number of threads. Very high numbers may cause segmentation faults.
threads_per_process: 8
# Number of threads to use for auxiliary commands such as daligner or daccord.
# Defaults to max(1, threads // 4)
#auxiliary_threads: 1
# Masks are propagated from the assembly to the reads and back in batches
# of read DB blocks. This makes cluster jobs run for at least a few minutes
# as each block usually just takes much less than a minute.
propagate_batch_size: 50
# Insertions are generated in batches of `batch_size`; use lower number to
# shorten the runtime of each job while increasing the number of jobs.
batch_size: 50
# Validation is split into this number of similar-sized parts of the
# gap-closed vs reads alignment. If the number of blocks is higher than the
# number of contigs in the gap-closed assembly then some validation runs will
# do nothing.
validation_blocks: 32