-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy pathSequencing_ARTillumina.py
executable file
·74 lines (69 loc) · 3.45 KB
/
Sequencing_ARTillumina.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
#! /usr/bin/env python3
'''
Niema Moshiri 2016
"Sequencing" module, using ART to simulate Illumina NGS reads
'''
from Sequencing import Sequencing
import FAVITES_GlobalContext as GC
from gzip import open as gopen
from subprocess import call
from tempfile import NamedTemporaryFile
from os.path import expanduser,isfile
from os import getcwd,makedirs,chdir,listdir,rename
class Sequencing_ARTillumina(Sequencing):
def cite():
return GC.CITATION_ART
def init():
GC.out_dir = expanduser(GC.out_dir)
GC.art_illumina_options = [i.strip() for i in GC.art_illumina_options.strip().split()]
assert "-i" not in GC.art_illumina_options and "--in" not in GC.art_illumina_options, "Don't use the -i (--in) argument (we will specify it for you)"
assert "-o" not in GC.art_illumina_options and "--out" not in GC.art_illumina_options, "Don't use the -o (--out) argument (we will specify it for you)"
GC.art_illumina_path = expanduser(GC.art_illumina_path.strip())
def introduce_sequencing_error(node):
orig_dir = getcwd()
chdir(GC.out_dir)
makedirs("ART_output", exist_ok=True)
chdir("ART_output")
cn_label = node.get_name()
for t in GC.final_sequences[cn_label]:
f = NamedTemporaryFile(mode='w')
for l,s in GC.final_sequences[cn_label][t]:
f.write(">%s\n%s\n" % (l,s))
f.flush()
command = [GC.art_illumina_path] + GC.art_illumina_options
if GC.random_number_seed is not None:
command += ['-rs',str(GC.random_number_seed)]
GC.random_number_seed += 1
command.append("-i")
command.append(f.name)
command.append("-o")
command.append('%s_%f' % (cn_label,t))
try:
call(command, stdout=open('%s_%f.log' % (cn_label,t), 'w'))
except FileNotFoundError:
chdir(GC.START_DIR)
assert False, "art_illumina executable was not found: %s" % GC.art_illumina_path
f.close()
if isfile('%s_%f.fq' % (cn_label,t)):
if not hasattr(GC,"sequencing_file"):
GC.sequencing_file = gopen('%s/error_prone_files/sequence_data_subsampled_errorprone.fastq.gz'%GC.out_dir, 'wb', 9)
for l in open('%s_%f.fq' % (cn_label,t)):
GC.sequencing_file.write(l)
elif isfile('%s_%f1.fq' % (cn_label,t)):
if not hasattr(GC,"sequencing_file"):
GC.sequencing_file = gopen('%s/error_prone_files/sequence_data_subsampled_errorprone_read1.fastq.gz'%GC.out_dir, 'wb', 9)
GC.sequencing_file2 = gopen('%s/error_prone_files/sequence_data_subsampled_errorprone_read2.fastq.gz'%GC.out_dir, 'wb', 9)
rename('%s_%f1.fq' % (cn_label,t), '%s_%f_read1.fq' % (cn_label,t))
for l in open('%s_%f_read1.fq' % (cn_label,t)):
GC.sequencing_file.write(l.encode())
rename('%s_%f2.fq' % (cn_label,t), '%s_%f_read2.fq' % (cn_label,t))
for l in open('%s_%f_read2.fq' % (cn_label,t)):
GC.sequencing_file2.write(l.encode())
else:
assert False, "Error occurred with art_illumina"
chdir(orig_dir)
def finalize():
if hasattr(GC,"sequencing_file"):
GC.sequencing_file.close()
if hasattr(GC,"sequencing_file2"):
GC.sequencing_file2.close()