-
Notifications
You must be signed in to change notification settings - Fork 0
/
parse_cutadapt_output.py
66 lines (50 loc) · 1.89 KB
/
parse_cutadapt_output.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
#!/usr/bin/env python3
"""
Written by Will Overholt
12.9.19
This script was written to parse the default output produced by cutadapt.
It works with the output from the cutadapt_pairedend_2step.sh raw_seqs/ > cutadapt_results.txt command.
"""
import os, sys, re
in_file = sys.argv[1]
if (len(sys.argv) > 2):
out_file = sys.argv[2]
OUT=open(out_file, "w")
fr = ""
rr = ""
total = ""
fra = ""
rra = ""
pr = ""
if (len(sys.argv) > 2):
print("R1_path", "R2_path", "Total Initial Reads", "ForwardReads_w_Adapters", "ReverseReads_w_Adapters", "PairedReads_w_Adapters", sep="\t", file=OUT)
else:
print("R1_path", "R2_path", "Total Initial Reads", "ForwardReads_w_Adapters", "ReverseReads_w_Adapters", "PairedReads_w_Adapters", sep="\t")
with open(in_file, "r") as f:
for line in f:
if re.match(r'Command line parameters', line):
elems = line.split(" ")
fr = os.path.basename(elems[elems.index('-o')+1])
rr = os.path.basename(elems[elems.index('-p')+1])
#print(fr, rr)
elif re.match(r'Total read pairs processed', line):
elems = line.split()
total = elems[4]
elif re.match(r' Read 1 with adapter', line):
elems = line.split()
#print(elems[4], elems[5])
fra = " ".join([elems[4], elems[5]])
elif re.match(r' Read 2 with adapter', line):
elems = line.split()
#print(elems[4], elems[5])
rra = " ".join([elems[4], elems[5]])
elif re.match(r'Pairs written', line):
elems = line.split()
#print(elems[4], elems[5])
pr = " ".join([elems[4], elems[5]])
if (len(sys.argv) > 2):
print(fr, rr, total, fra, rra, pr, sep="\t", file=OUT)
else:
print(fr, rr, total, fra, rra, pr, sep="\t")
if (len(sys.argv) > 2):
OUT.close()