-
Notifications
You must be signed in to change notification settings - Fork 1
/
convertPileup.pl
112 lines (102 loc) · 2.73 KB
/
convertPileup.pl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
# converts a pileup back to a fastq
use strict;
use warnings;
die "Need files\n" if (scalar @ARGV < 2);
open(IN, $ARGV[0]) || die "Cannot open\n";
open(OUT, ">$ARGV[1]");
my @read;
my @st;
my @seq;
my @qual;
my @mqu;
my $y = 0;
while (my $line = <IN>) {
chomp $line;
#print "$line\n";
my @spl = split("\t", $line);
my $pos = 0;
my $done = 0;
for (my $x = 0; $x < $spl[3]; $x++) {
my $ch = substr($spl[4], $pos, 1);
my $qu = substr($spl[5], $x, 1);
if ($ch eq '^') {
die "$line\nAdding things out of order: pos $x is not ", scalar @read,
"\n" if (scalar @read != $x);
$mqu[$x] = ord(substr($spl[4], ++$pos, 1)) - 33; # save mapping quality
$read[$x] = $spl[0];
$st[$x] = $spl[1];
$ch = substr($spl[4], ++$pos, 1);
}
if (($ch eq '.') || ($ch eq ',')) {
$ch = $spl[2];
} elsif ($ch eq '*') {
$ch = "";
$qu = "";
}
my $next = substr($spl[4], $pos+1, 1);
if ($next eq '-') {
my $num = substr($spl[4], $pos+2, 1);
$pos += $num + 2;
} elsif ($next eq '+') {
my $num = substr($spl[4], $pos+2, 1);
$ch .= substr($spl[4], $pos+3, $num);
$pos += $num + 2;
#$qu .= substr($ # no quality scores for inserted bases!? check.pl
#print "$line\n$ch\n";
#my $wait = <STDIN>;
}
$ch =~ tr/a-z/A-Z/;
#print "Read $x: char is $ch, qual is $qu\n";
#my $wait = <STDIN>;
$seq[$x] .= $ch;
$qual[$x] .= $qu;
if (substr($spl[4], ++$pos, 1) eq '$') {
$pos++;
print OUT '@Read', "$y $read[$x] $st[$x] $mqu[$x]\n$seq[$x]\n+\n$qual[$x]\n";
$y++;
$st[$x] = -1;
$done++;
}
}
# remove finished sequences
if ($done) {
my $skip = 0;
for (my $z = 0; $z < scalar @st - $done; $z++) {
#while ($st[$z] == -1) {
# $read[$z] = $read[$z + $skip];
# $st[$z] = $st[$z + $skip];
# $mqu[$z] = $mqu[$z + $skip];
# $seq[$z] = $seq[$z + $skip];
# $qual[$z] = $qual[$z + $skip];
#$z--;
#}
my $a;
for ($a = 0; $a < scalar @st - $skip - $z; $a++) {
last if ($st[$z + $skip + $a] != -1);
}
$skip += $a;
$read[$z] = $read[$z + $skip];
$st[$z] = $st[$z + $skip];
$mqu[$z] = $mqu[$z + $skip];
$seq[$z] = $seq[$z + $skip];
$qual[$z] = $qual[$z + $skip];
}
for (my $z = 0; $z < $done; $z++) {
pop @read;
pop @st;
pop @mqu;
pop @seq;
pop @qual;
}
#print "Finished: $done\nStill processing: ", scalar @read,
# "\n$spl[0]\t$spl[1]\t$spl[3]\n";
# print "Reads parsed: $pos\n";
# for (my $z = 0; $z < $pos; $z++) {
# print "read$z $seq[$pos] $qual[$pos]\n";
# }
#my $wait = <STDIN>;
}
}
close IN;
close OUT;
print "Reads printed: $y\n";