-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy pathSequencing_DWGSIM.py
executable file
·77 lines (72 loc) · 3.28 KB
/
Sequencing_DWGSIM.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
#! /usr/bin/env python3
'''
Niema Moshiri 2016
"Sequencing" module, using DWGSIM to simulate NGS reads
'''
from Sequencing import Sequencing
import FAVITES_GlobalContext as GC
from gzip import open as gopen
from subprocess import call,DEVNULL
from tempfile import NamedTemporaryFile
from os.path import expanduser,isfile
from os import getcwd,makedirs,chdir,listdir
class Sequencing_DWGSIM(Sequencing):
def cite():
return GC.CITATION_DWGSIM
def init():
GC.out_dir = expanduser(GC.out_dir)
GC.dwgsim_path = expanduser(GC.dwgsim_path.strip())
GC.dwgsim_options = [i.strip() for i in GC.dwgsim_options.strip().split()]
def introduce_sequencing_error(node):
if not hasattr(GC,"sequencing_file"):
GC.sequencing_file = gopen('%s/error_prone_files/sequence_data_subsampled_errorprone_read1.fastq.gz'%GC.out_dir, 'wb', 9)
GC.sequencing_file2 = gopen('%s/error_prone_files/sequence_data_subsampled_errorprone_read2.fastq.gz'%GC.out_dir, 'wb', 9)
orig_dir = getcwd()
chdir(GC.out_dir)
makedirs("DWGSIM_output", exist_ok=True)
chdir("DWGSIM_output")
cn_label = node.get_name()
for t in GC.final_sequences[cn_label]:
f = NamedTemporaryFile(mode='w')
for l,s in GC.final_sequences[cn_label][t]:
f.write(">%s\n%s\n" % (l,s))
f.flush()
command = [GC.dwgsim_path] + GC.dwgsim_options
if GC.random_number_seed is not None:
command += ['-z',str(GC.random_number_seed)]
GC.random_number_seed += 1
command.append(f.name)
command.append('%s_%f' % (cn_label,t))
try:
call(command, stderr=open('%s_%f.log' % (cn_label,t), 'w'))
except FileNotFoundError:
chdir(GC.START_DIR)
assert False, "dwgsim executable was not found: %s" % GC.dwgsim_path
f.close()
if isfile('%s_%f.bwa.read1.fastq' % (cn_label,t)):
f = open('%s_%f.bwa.read1.fastq' % (cn_label,t))
elif isfile('%s_%f.bwa.read1.fastq.gz' % (cn_label,t)):
f = gopen('%s_%f.bwa.read1.fastq.gz' % (cn_label,t))
else:
raise FileNotFoundError("Couldn't find %s_%f.bwa.read1.fastq or %s_%f.bwa.read1.fastq.gz" % (cn_label,t,cn_label,t))
for l in f:
if isinstance(l,bytes):
GC.sequencing_file.write(l)
else:
GC.sequencing_file.write(l.encode())
if isfile('%s_%f.bwa.read2.fastq' % (cn_label,t)):
f = open('%s_%f.bwa.read2.fastq' % (cn_label,t))
elif isfile('%s_%f.bwa.read2.fastq.gz' % (cn_label,t)):
f = gopen('%s_%f.bwa.read2.fastq.gz' % (cn_label,t))
else:
raise FileNotFoundError("Couldn't find %s_%f.bwa.read2.fastq or %s_%f.bwa.read2.fastq.gz" % (cn_label,t,cn_label,t))
for l in f:
if isinstance(l,bytes):
GC.sequencing_file2.write(l)
else:
GC.sequencing_file2.write(l.encode())
chdir(orig_dir)
def finalize():
if hasattr(GC,"sequencing_file"):
GC.sequencing_file.close()
GC.sequencing_file2.close()