-
Notifications
You must be signed in to change notification settings - Fork 0
/
cons_consensus_and_profile.js
198 lines (190 loc) · 9.9 KB
/
cons_consensus_and_profile.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
let input =
`>Rosalind_1786
AGGAGTAAACGACTTGCGGACTTCGCAACCAAGTGACCCTTCTCCGGTCCCGTCGCAGCG
ACCCTTCGTAGGCCTGCACTCCACTCCAATGAAATTGACGAACGTAGTCAGCCGAAGCTA
CTTACCGACGTGGCATAATGTCGTGTGCATAATGGCACTCAGACCTCGGTCAAGCACTAT
GCCGATCAGCCTGTAGGACCTGGATCCGAGCGTGTTGCATGCCGGTATAAGATGCCGGGC
AAACTGGAGAATATGCTAGGGTCAGAGGCGCATACACATTGTCTGACACTTTTCCGTGTC
GTCCGAGCTATAGAGGGTCGTGCATCACAATTAATCGACTAGAGAACACCACTATCGAGT
CGCATATCAGCGCCGGACGTTTTCGCCCCCTTAGGTGCACGGGAATGCCAACTGAGACAC
CTATCGAACGGTCATTCGGCGATAACTAAAATTCACCGAACCTGCTACGTGCTTTAGCGA
CGGGCAGAATAGCGTTCGCACAAACTTGCACCGCTAATTTCAAGGAAATATGCTTTGAGG
CAAGCCACACTTGTTAGCGAAAGTCCACCAGATAGCCGCACCCTTATATCCAGCACCCTG
TACTATTCAGCAATACCTTAATACTGGGCGACCCGCGGGGGAACTGCGGTCCATTTGGCG
GGGCCAGTATTAGCACAGTTTTAATCGTGCGTGGGGGCCTCTAGCCAGTACGATCATCTG
GGAGTTCCTATTGAGCACCGGAGGCCATGGCCCTTCTGGTTGACACTACAATTGATTAAT
AAACTCTGAACCACAGAAACAAATAGGTGACCTCGCTAATTATGCAGGGAACATCTCACG
ATCTTCTTCGTAGAATATCGGTATCCCCCCTAGGTGCAAGGTGGTATAGTTATGAACCAT
CCCCCATGACTACTGGTGCCCTG
>Rosalind_5041
ATCGGGGAGGCTCGTTGGTAGCATGAGCTATTCCTAAAGTGTTACCTATTTCTGCAATCC
CAGTATCTCTAATCCCGTCATAGGTGTAAGCAACGCCTGCGAATTCCGGCCCCACTCGTG
ATTGCGGCCTAACCATCTTGCGTGGGCATACTTAAATCCGACATTACACGCCATTGTAGA
CACGTGAGCGGCCTTGCCTTCTCGGGTAGATTTTGATGCCGAAGAGTAAAAGCCTACTCC
GTGCACCACGTTTATTCAGTGAACGTTATGCACTCCCTGATCCCTGCAACTACAACCTAA
TATTAAAATCAATATTGCTCTGTACCATATAGTATAGATGCAGATTCAAGCCCGGATGGC
ACTGTATCGGGATTCCGACGCTTCCAGGCTATACCTACCTACCTAGTCCTTCAGGTGATA
CTAAGCACCCGAGGTGCGTGCACCATACTCCATATCCTTGTGATGGTCGTCATATCGGGA
GCCCTCTATGGTTAGTCCGCGCTAGTTGTACGCTTCTTATGAAGGCGGTTTATTGTAAGC
ATAGAGATCGTCGTGAGCCCACAGCCTGCTAGGGCGACCTTCAGGAACCCTGACTTATAT
GCAAGCCGCTTACGACTGTCGGGGACCAAATTCGAACGTGAAGAGGAGCTTGTCTTCTTT
AACGTGCTAAGTGGTCGGAAAGACCTCGCCCACTAATTAGCCAACTCCGGAAAGGAGTCC
AGAGATTGCGCACTCATTCCACTCCTCTCCGTCAAAACGCAGAGGCGGTAGCGTAAGATT
CTTTAATCATAAGGCGAGAACGAGACTCTGGCCCGATCAAAACAGCGAACCCGAAGTGGC
ATACAGATGCGCTCTTGGAGCATTTCCCCTGTACCCCGAAGAAAAAGTACTAATAACTGC
CAGCTTTCATCATCCTGTTCACC
>Rosalind_8184
GGGCTGTGTCGTCTGCCCTGCAAGCTGCAATGCCACAAGTCTCACTGAGGGTCTACGCCA
AATAAAAGAACTACGACGAGCGAGCACAATGTTCACTCATTGGGAACTTGCCAGGTTCGA
TAATCGAAATCACAATTAACGGCCCAACCCACGGACGGGTCAGGGCATAGGTGGAGCATG
TCTGTGTTCCATGCTGCAGACTTTTCGTGGGAGTATTGCGACGCTCCAATATTCCCGCAC
CTGGATTGGCAGCCTGTATCACAGGCTGTTATCGTTCCGCAAAGCGCGAAATACGCTCCT
GTAGATTCGAGCTATCTTAGGCCACCAGAATTCGCGCCACCCGTCCACGAACCCTAGGAT
ATTCCCCACGCACCCACGTAATTCTATTGCCATATATTTCTCAGTCTCCTGTGCGAGGAA
ATCTGCTTGGTTGCCCTAAAGGACTCAGAGGGTAAGCGACTCGTGTATGTCTATAGTCCT
TGCTACCTAAGTCAGGAAGGGCGATCTAGCACGGGCCCGTCTACGGGTTTGTCGGAATTA
AGCGTACACCTTCATTAGCCCAGTTCCTATAGTAGGGAACCATCCGTAGTGACGACATGA
CTTGAGCTTCCCACAGACGCAGCGCGTTCAAGCTCTAAGCGCTCCGCTTACTGATCGCAG
CGGGCTATGGAATCCATGGTCCCAACAACTAGCTGGCTGAGCTGGGGTTAACAAGACCGG
TTTGATCGCCTTACGTGCAGGTTCCAGAGCTTAGGTCGAGTGCCTATAGTAATGCTCGAT
TATTCCAAGTGTGAACTGAGGTGAATGCCAAACGTGCAGACTGCGCAGCCGCTCACGGTA
TTACATGTAGCGATGGATTCTGTACCATGGGTGTCAGTATAGTGGGTGATATGAATTATG
AGATATGAGGAGGTCCCTGGGGC
>Rosalind_8749
GGGGTATAACGCCACATGTACAATTACCATATTGGGGGCGGAATCCCGTATGAGCTGTCC
CGGCGCTAAAGTATGGACACAGACTGACGTAGAACTTGGGGCTGGTTGGTGCTTGCACCC
GGTCGGACCCTGTGGGAGACAGATCCCAGTCGAATATTAAATCAGAATGGATATAGTGTA
ATTTCAGTAACCGTGGGGTGCTCTTGTGGGAGAGGTCCGGGTTTTGGCTTTTCCGAATCA
TCCGTTAGATATCTCGCACATACAACACTTTTAAGCAAGAGATTTTAAAGGCCTCGGAAC
GTCCTCGCTGGGACAAGCGCCCGCAGGCGTAACATAACACACTCGGGCCGCAGCATACTA
CGGATGAGTAGCGGATCGAGGTCGCTGATCCGGTATGGAGCTCTAACCCTGACGCGGTTA
AGCGACCTGCTCGCTTAACGTTCGACTAAATCGACTGTCCTACCCCTACAATTGTCGCGG
TCATACGGGTAGTCTCAGATAGATTCTACGCTCAGCCCCTCAAAAGTCTGCTTAACTGGA
ACAGCGCTGAGAGACGCGCGTACGTTATCATCATTTAGTACTGAATCATGGTGCCATGCC
TCCCTTGTCGAGCATAGCGATGCTTGCAGATAAGTGCAGATTGAAGAGGCCGATGGCATT
ATAACCCAGCAAAATTTCGAAAACACAGTAATAGATCAGCTAAGACGACGAAGTTGCTTA
CGTAAAATGACCCGAGCCTATAGGGGCAAACTTATCTGGTGGCGCTGTAAAGAAAGATTC
CTTAGAACGCCTCCTTATCTTGGTACGTCGTAACTATATCAGATTGCGTGGACTCCTATG
GAACTTGGAGCTACATTGCCTGCGATGAATGGCAGGCGTCTGGATGGGAAGGCACTCAAA
TAGTGCCCATAACACATGGCATA
>Rosalind_4302
GCTATCGCTCATCTCGGATTTTTTGCCACCATCATACCGGTCAACTCCCTAAGTGGAAAG
GACGGGAAAAACCAGCGAGACCCTCTCTTCGTCACGGCAGCACCAGGCTGACCATCGCCG
GCATGGTTATCAATTCTCTAATAATATAATGGCGCAGTCCTGTAAAGTCAGGATTTATGC
TCCAACAATTCGCAGAGGCGGTCGTGAGGACTCCCTGGCTAACGTGTCTCTGCACTACAT
CTCCCTGGCTTAAACTCGTACGCACATGCCCCTAGGACAGAAAAGGCCGAATCGATGTTG
TTTAAGGCGCCGCGACGTTGGCACCCCGAGCATTAGCGAACCCTATGCTTGGAAGTGTGA
CCTCAGCGTACTGAGTACCCTTTAAGTACTTCCGGCAAGTACCACCCCTGGGGTGTAACT
TCGCTTTGAACACGTGTCTTGATTCGCTGTCACGATTAAGGGTCAGCTGTCAATCTATGT
GCCTAGGCAGTGACACGTATCTTAGGAGGAAATCCGAATATGACTACTACTAACTTTAGG
AGAGTTTCGCCGTCGCTTCGACACGACGTGCGTTAATGGGGTTTGGTCTTCAAGGTATCC
GCTGGTAGAGTGTGTTGATTTTGCAATACCTTGAAGAGTTCGAGTCCCAGTATAGCTAGT
GTATAAGATCACGCTTGATTAGCAACACCCGGAACGGTGAGCGCCTGAAATTAATCCGGC
CATTCATTAAACCAATCTGCGATCTGCACCTTTCAGAGACTTTCTACCGTTGAGATCTCG
TCGTCGCCACGAAACGCGGGAGCGTAAGGTTCAAAAAGTGCGGGAAGTGGGAATATTCAT
TGTCTTGGGCTAGGCCCTCTAAATCTATTCTACAACGTCACAAATAAATGTAGCGTACTT
GGTGGCAGGCGCTGGGTTAGTCT
>Rosalind_3641
GCGCAATGTCCCGGCTGGGCGCCCACATCAAGCGGTATTATATGACTATACAATGATGGA
AGGTGCGGCCAGCCCTAGCGATGTAAATCTGAACTTTGCTAATAGATTAACAGCAACACC
TGCGGCGGGGTCCCATTAGGAATCCCCCGAGCCTAACATCACAGACGGAGAATATGGGGT
TGACCCGCAAACTTAGCAGCGCGAGCGAGTTTCATGGTATTATAGTGGTTGCTGGAGCGG
AGGGTCGGTAGTTCAGACAGGTAGCCTGTCTGACGCAGCTTGGGTGGGCCTAACCCAAAG
GGCCTTTTGAACTTAAGATGTTTCTTCGGCTGAGTGAAGAGAAGCTAGCAGTGGGTCCCT
CGATACTGCGGTAAGTAGACTCAGCGCGCAATAAAACTGATATACTTAAACCGCGTCGAT
ACAACTTCATAGTGGGCTACGCACAACACAGAAAAGGACTCGCCTTCCGCGCTGGTTCGA
CGAATCTTTCTACTCAGTGGAACACTCGCGCAATATGATGACCCTGTGGTCATGCGCAGT
TTGATCGGGCGTCTTTGGAGATAATAATGCGATTCATTGTAACATCGTCCTTAAGGGAGT
AATAGGAATCAGAACACCTAATGATCGTGAAAGGTTACGGTGAGCATAAAGCGCGGAGGG
CTGTTACTTCGGTAGTCTTCAGTGTATGGACTCACCCGCATCTCCGTAGGAGTTACCGAG
AGCTCTTGATGCAGTTCAGACGGAGGACCCTGGCACTGATGAAGGGACTGGTTTCAAGTC
GTGACGAGCAACTCCGTGACGAACTGGGCGGTGCTTACACGTATCTGGATAGCAGACTTT
TTCGTACGCACGCGAGCAGCGGTAGTCGTTTTCGTCGCCAGTCCCTAGTACTTCACCTAG
CCTAATCGCGTCGTAGGGTTACG
>Rosalind_5646
GTCGACAGTAACGAGGCCGTAGTCGCAGACTACCCGATCAATCTATACACGTAGATATGT
ACGAGTACTCGTACCACCCCCAAGACAAGGGGAAATGACGTCTCTAGAGATAAGTCGCGC
AGCACAGGGGCTACCCTTCGATGCGTATCGCTTAGCGATTTGGTTACTTCGACACTGACG
AGGTTTCCTCCCTATAGCCGCCCCACCATTAAATTCGGAGCGATGCGAGTAAGGTTACTT
ATTCTTTCGTCCCAGAGGGTAGACGTGAATTTATATGTTTAAGCGCAGACGACCGTTCGA
GGATCGCCAGCCGTCTCATCTAGAACTGGCGTCTAATAAGTATGAGTTGCGCCCGGAGTT
GACAAAAGACCGTAATGCGCATCGAAGACCGTCAAGCAGTAATCGTAACTTTTGTATCCA
TTATTTACCCGCAGACATGCGCCCAGAATTCGACGTTCCATCGCTCGGACCCACTTGCCG
ATAAGTATCACTGAGACTTATACCGCCGTGCACTTTCCCTTTATCTCCCCAATTTACTAA
ATCACACTGTGTTGTTTAGATCCGTCCCCAGCTATACTCTATTTGGGGGACTGTAGGCAG
CGCTTGAGCAATAGCAATTTCCAACCATGATTCGCGACGGTGCCTCACGTGACCGAGCTT
AGCACACACACTGGAGCGGTCGAGGCTGTAACGTGGCATTAGAATCTTAGTGAGGACCTC
TGATATACTAACTAGACACACGCTCCGTGCCGATACGAACGGCGGGAGCTTGATGTATTA
CCCAAGTGACTGTCCTCGCTGGCCTTAGAGGCGGGGCCTAAAAAAGTCACATACGCTCAG
CCGTCAATGTTGTGGCATTAGTTACATCCGTTCCCAGACGTTAATCCTCAGGGTACCCCG
CACTGGCGGCGAAATGGGACAGT
>Rosalind_8872
TAGGTACTCGAAGTCCTCAATGGTTCTCCTCGTAAATTCAAGTTCTGCCAATAATGGCGA
CCGCTTCAAAGCGACTCCAGTATTGATCCTCTTTTACGCTGAGAAGTCCACATTCTGAGG
AACCAGTCCTCGCGGAGTCAGTTTTGACAACCTGGCCCGTGCGTAGTCACGTACAAGTCC
CAGCTTCTAAGCATAAAGCTCCCTAGAAACATATCTTCCGCCCATCACAGTATTGTTGTT
CTTGACGGAATCACGATTTCCAAGACGCAGGAATCTTAAGTCCTTGGCGTGGCGAGAGGA
AGTTCTTTTGCAATAACTCTACGCATCTTTGTTCGCCATCCTTGCGGTGACCCTGGGGGC
CGCTTTCCGTGATCGGACGGTACTGTTGTAAAGTCTAGGAAGGAGTGGTCTTTGGGTTCT
CTTCTTGTGGTTACCGAAGCGTCCCGGATCTGGAGCAGGTATTATACGCTAGTATCTAAA
CCCTCCCCTACGTGAGCCCTGGCACTGACAGCATGGGCCACCTGGGCTAAGCGGTCGTGC
GACAGAATCTAGCAGACCATATATTAGGTGCGACTCAGTCACTAAAGCACATGAAATTAA
CAATAGTCAGCGCGAGTTACCTGCAGACGGGGGTCGACTGGGATGGGCTTTCAGGACAGG
AGTGTAAGCCCATGAAACCTCTTCCCGCCGCGCTTACAAACCCGGGCCAGCGGATAACCC
TCAGAATTATGTCACAGCAGACCCCGCGGAAGGGTCGGTTTAAAATGCTGTACTTCTAGC
CCATCTCAGAGTACCGACTATAAACTCTACCACTATCTCGAGACAAATACCCCTAACGTG
GTCCATTGATTATGGTAGTCGTTAGAAAGCAGCCAGTCACCCCTATTCTGCAACCAGGAC
AAGCAAACCGGATAAATTGCAAA
>Rosalind_9436
ATGCGCACCGGTGCTGCTCGGCTCTGATGGCACCGAGTCTAAAGCATACGCTTGAGACTG
CCCAGACCACCTGTAATGAGGCAATGTACCTAAGGTTACCACTGGTTTATGCGTCTTTCG
CTCGGAGCAGAAAACATCCGTCGCATTGATAAGTTAGCAATGTTGCTTAAGGCAACGTGC
GAGCTAAGCACTCTTGTGTTCATGCTTTATGAGAGGCGTGGGGGTAGTCGGCGTTTCTGG
TGCTTCAGTACACATCAAGATGTCGGACTGTTCGAGTAATAACAGTGGCTGGGCTCGGTA
TCGCGGTACTAAGTTATTTGCGACTTTTCATCAGGTTAGGAGAGCTCCACTAGGAGCATA
TGACCCTACGTTCAGGGCCTTTCCACCCTGTACTACGCGGTAGGCCCGAAACCAAACGGG
TGAAACTATTCTATTTAAAAGATAGTGACATTATTGAAGCCATAACTAGCTAGCACGTAA
TCTCGTATTTGGTGGAGATTTTACGTGTGGGCAGCTAGGCCGTAATTACAACTGGTGAGC
ATGCTCCGAGACCACTGTAACCGTAAGGTCCTAAGCGTCCTACGATGCTTCGGGAATGCC
GACTAAGCCGTATGACCTGTGCTGTTTTTTGTGGCACCTGGACTCTGTGTCTGGAAGCCG
ACAAGATTAATTTCACCGGGCACTACATGCTGACCGTAATTTATCAGAGATTTTTGTTCA
CGCCTTCCGCGGCAAAATCGCCTCTTTAGGACCGCTTATGAGTCAGTGAACCAATAGGCC
CAGATTGGATCAACGTGTACGGGGTCTTTGCTCTTCACGAGCTCCTACCGACACTCCGAC
CTGCATCACAGAGAATAGGACCTGTCACATTTATCAAGACAGGACAGTAACTCGCGCTTT
AAGACGGCCCAGGTAGTTAATGC
>Rosalind_5557
TGTTTACTATTAGATAGGCCACGCGTCCGCCGTGCAAGCCCCTTTGAATGAGTTTAACCC
ACGGATGCTCAAAAACGGACCAGACTATCATCGGTACTTGCGGGTGCCCGGCTGCGTTGG
CGTAGACCACTTCAAGTCATGGGGGCGTCTTTGCCCTGATGAGTTAATAGCCATAAAACG
GCCTTGCTGGAAGTAAGAGCATAGGGCCATCCTATTGGTACACGAGTTTGAAGAGCGACC
CATAGTCAGTATACTTTCACGGAATATGCCTGCTATCCACTGCACCCTACCCAGTGGCAA
CACATGCAGGTAAAATAACTACGCCGGGAATACAAGTGCAGCACGGGATGCATAGAGCTC
CATGACCGAAGCAAGAAACCCGTAGGATCCCTGTAGGAGCTCGAGGATACCCTCGGCTAA
GCCGTTGGCGTATTACAAATGTTTGTTCACAACTGTCCTCGCACTTGGGGCCACGTGCGA
CGTTCACGCTAATAAAAGCCGTCCTGTCAAGCTGCAACGATAAGGTCAGATGTCGTCAAC
CAGCTAAGGCCGTCATCAAACAGCTGCTAGTCCCGTGTAGAGGCTAGTGGTGTTAAGCTC
AGTCTGATCGTACACGTTACTATGAATGGTAATTCTACAGCGACACCGCACCAGAAAATG
CTGTCGGCTTCAATATCTAAGGAATGAACCGAGTGATGGCTTGAACGCAGAAGAACGTTC
CTTGGATCACTTACCCAAACCGCAGCCGATTAAAGGGTATGCCTCTCCTCCTCCCGCACG
TAGATAGTATTACCACCACGTGTGGAATTATCTGAAGTCTTACCTGCTCGTGCTGCCGCT
GGGCCTTAGTGAAGACGGCTGATTCCATTCAAGGAACAAGGCGGATAACCTTTACGCGGT
TGAATTGACAGCGTCGAATATTG`;
let seqs = input.split(/>.*[0-9]/).filter(t => t.length > 0).map(t => t.replaceAll('\n', ''));
let length = seqs[0].length;
const fill0 = () => Array.from({ length }, _ => 0);
let M = {A: fill0(), C: fill0(), G: fill0(), T: fill0()};
seqs.forEach((s) => s.split('').forEach((l, k) => M[l][k]++));
let max = fill0();
let consensus = [];
for (let x=0; x<length; x++) {
for (let i in M) {
if (M[i][x] > max[x]) {
max[x] = M[i][x];
consensus[x] = i;
}
}
}
console.log(consensus.join(''));
for (let i in M) {
console.log(i + ': ' + M[i].join(' '));
}