-
Notifications
You must be signed in to change notification settings - Fork 0
/
basecall.Snakefile
90 lines (76 loc) · 2.49 KB
/
basecall.Snakefile
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
#!/usr/bin/env python3
import csv
def fix_name(new_name):
"""
Terrible hack. Sets the name of the most recently created rule to be
new_name.
"""
list(workflow.rules)[-1].name = new_name
temp_rules = list(rules.__dict__.items())
temp_rules[-1] = (new_name, temp_rules[-1][1])
rules.__dict__ = dict(temp_rules)
###########
# GLOBALS #
###########
versions_manifest = 'data/versions_to_run.csv'
fast5_path = 'data/reads/BB31_drone'
# fast5_path = 'data/reads/basecalling_practical' # from https://timkahlke.github.io/LongRead_tutorials
########
# MAIN #
########
guppy_versions = {}
with open(versions_manifest, 'rt') as f:
reader = csv.DictReader(row for row in f if not row.startswith('#'))
for row in reader:
guppy_versions[row['name']] = row['container']
versions_to_run = sorted(set(guppy_versions.keys()))
#########
# RULES #
#########
wildcard_constraints:
guppy = '|'.join(versions_to_run)
rule target:
input:
expand('output/010_basecall/{guppy}/sequencing_summary.txt',
guppy=versions_to_run)
# Full basecall. Could reduce with this strategy:
# drop reads < 5kb
# remove worst 10% of reads (check cov)
# get IDs
# only basecall those (use option -l in guppy)
for guppy in versions_to_run:
rule:
input:
fast5_path
output:
f'output/010_basecall/{guppy}/sequencing_summary.txt',
# p = directory(f'output/010_basecall/{guppy}/pass'),
# Marking the fail directory as temp means Snakemake will delete it
# after basecalling completes. This is to save storage space.
f = temp(directory(f'output/010_basecall/{guppy}/fail'))
params:
outdir = f'output/010_basecall/{guppy}',
config = ('dna_r9.4.1_450bps_sup.cfg' if guppy.endswith('_sup')
else 'dna_r9.4.1_450bps_hac.cfg')
log:
f'output/logs/full_basecall.{guppy}.log'
threads:
3
resources:
partition = 'gpu-a100',
gres = 'gpu:1',
time = 480 * 5,
mem_mb = 40000
container:
guppy_versions[guppy]
shell:
# 'nvidia-smi && '
'guppy_basecaller '
'--device auto ' # enable GPU
'--input_path {input} '
'--save_path {params.outdir} '
'--config {params.config} '
'--verbose_logs '
'--recursive '
'&> {log}'
fix_name(guppy)