-
Notifications
You must be signed in to change notification settings - Fork 7
/
exblxtidy.pl
executable file
·98 lines (82 loc) · 2.8 KB
/
exblxtidy.pl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
#!/usr/bin/perl
$usage .= "$0 -- tidy up sorted MSPcrunch output\n";
$usage .= "\n";
$usage .= "Usage: $0 [-asym] [-diag] [<MSPcrunch files...>]\n";
$usage .= "\n";
while (@ARGV) {
last unless $ARGV[0] =~ /^-/;
$opt = lc shift;
if ($opt eq "-verbose") { $verbose = 1 }
elsif ($opt eq "-asym") { $asym = 1 }
elsif ($opt eq "-diag") { $diagonalise = 1 }
else { die "$usage\nUnknown option: $opt\n" }
}
while (<>) {
my @f = split;
next if /^BLAST/ || !/\S/;
next if @f != 8;
next if $f[2]=~/\D/ || $f[3]=~/\D/ || $f[5]=~/\D/ || $f[6]=~/\D/;
if ($diagonalise) { $f[6] = $f[5] + sgn($f[6]-$f[5]) * ($f[3]-$f[2]) }
next if $f[2] == $f[5] && $f[3] == $f[6] && $f[4] eq $f[7];
if ($f[2] > $f[3]) { warn "Wrong way round - \@f=(@f) -"; next }
next if $asym && ($f[4] gt $f[7] || ($f[4] eq $f[7] && $f[2] > $f[5]));
if (@g == 0 || $f[4] ne $g[4] || $f[7] ne $g[7]) { flush_cache(\@cache,\$lastpos) }
else { next if check_unsorted(\@f,$lastpos) }
if (defined($i = cache_intersect_index(\@cache,\@f))) { $cache[$i] = merge($cache[$i],\@f) }
else { push @cache, \@f }
update_cache(\@cache,$f[2],\$lastpos);
@g = @f;
}
flush_cache(\@cache,\$lastpos);
sub flush_cache {
my ($cache,$lastpos) = @_;
foreach (@$cache) { print_cache_entry($_) }
@$cache = ();
$$lastpos = 0;
}
sub print_cache_entry {
my ($f) = @_;
print join("\t",@$f)."\n";
}
sub check_unsorted {
my ($msp,$lastpos) = @_;
if ($msp->[2] < $lastpos) { warn "Unsorted (msp=[@$msp], lastpos=$lastpos)"; return 1 }
return 0;
}
sub update_cache {
my ($cache,$pos,$lastpos) = @_;
my $i;
for ($i=0;$i<@$cache;$i++) { last if $cache->[$i]->[3] >= $pos - 1 }
for (;$i>0;$i--) { print_cache_entry(shift @$cache) }
$$lastpos = $pos;
}
sub cache_intersect_index {
my ($cache,$f) = @_;
my $dir = $f->[5] <= $f->[6] ? +1 : -1;
my $diag = $f->[2] - $f->[5] * $dir;
my $i;
for ($i=0;$i<@$cache;$i++) {
my $msp = $cache->[$i];
last if $msp->[3] < $f->[2] - 1;
next unless ($msp->[6] - $msp->[5]) * $dir > 0;
next unless ($msp->[2] - $msp->[5] * $dir) == $diag;
return $i;
}
undef;
}
sub merge {
my ($a,$b) = @_;
my $forward = ($a->[5] < $a->[6]);
if (($forward && $a->[5] > $b->[5]) || (!$forward && $a->[5] < $b->[5])) { ($a,$b) = ($b,$a) }
my @f = @$a;
my ($alen,$blen) = ($a->[3] - $a->[2], $b->[3] - $b->[2]);
my $overlap = $a->[3] - $b->[2] + 1;
if ($overlap == $alen + $blen) { return $a }
$f[0] = $a->[0] + $b->[0] - ($overlap/2) * ($a->[0]/$alen + $b->[0]/$blen);
$f[1] = ($a->[1] * $alen + $b->[1] * $blen - ($overlap/2) * ($a->[1] + $b->[1])) / ($alen + $blen - $overlap);
$f[3] = $b->[3];
$f[6] = $b->[6];
warn "Merging MSPs -- overlap = $overlap\n" if $verbose;
\@f;
}
sub sgn { $_[0] <=> 0 }