-
Notifications
You must be signed in to change notification settings - Fork 2
/
9s_singleG_pattern_format.pl
executable file
·95 lines (74 loc) · 2.17 KB
/
9s_singleG_pattern_format.pl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
#!/usr/bin/perl -w
use strict;
#-----------------------------------------------------------------------------
#----------------------------------- MAIN ------------------------------------
#-----------------------------------------------------------------------------
my $file = $ARGV[0];
my @species = qw/FUGU NINE THRE/;
my %species;
foreach (@species) { $species{$_} = 1; }
my %genes;
my $gname;
open FH, $file;
while (<FH>) {
if (/>(\S+)/) { $gname = $1; }
elsif (/(\S+)\s+(\S+)/) {
$gname = "uniform";
my ($sname, $seq) = ($1, $2);
next unless defined $species{$sname};
my @bases = split //, $seq;
push @{$genes{$gname}{$sname}}, @bases;
}
}
close FH;
my %patterns;
my $pattern_counts=0;
my %gene_pattern_counts;
foreach my $gname (sort keys %genes) {
my $total_bases = scalar @{$genes{$gname}{$species[0]}};
my %local_patterns;
for (my $i=0; $i < $total_bases; $i++) {
my $good =1;
my $pattern ='';
foreach my $sname (@species) {
$pattern .= uc $genes{$gname}{$sname}[$i];
}
$local_patterns{$pattern} ++ if $pattern =~/^[AGCT]+$/;
}
next unless scalar keys %local_patterns >0;
$patterns{$gname} = \%local_patterns ;
$pattern_counts +=scalar keys %local_patterns;
$gene_pattern_counts{$gname} = scalar keys %local_patterns;
}
my $total_genes = scalar keys %patterns;
print scalar @species;
print "\t".$pattern_counts."\tP\n";
#print "G\t$total_genes";
#foreach my $gname (sort keys %patterns) {
# print "\t".$gene_pattern_counts{$gname};
#}
print "\n\n";
my $i=-1;
foreach my $sname (@species) {
$i++;
print "$sname ";
foreach my $gname (sort keys %patterns) {
print " ";
foreach my $pattern (sort keys %{$patterns{$gname}}) {
my @pattern = split //, $pattern;
print $pattern[$i];
}
}
print "\n";
}
print "\n";
foreach my $gname (sort keys %patterns) {
foreach my $pattern (sort keys %{$patterns{$gname}}) {
my @pattern = split //, $pattern;
print " ".$patterns{$gname}{$pattern};
}
}
print "\n";
#-----------------------------------------------------------------------------
#---------------------------------- SUBS -------------------------------------
#-----------------------------------------------------------------------------