-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy pathcluster.cpp
82 lines (71 loc) · 2.72 KB
/
cluster.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
#include "cluster.hpp"
using namespace std;
// AaCcGgTtNn ==> 0,1,2,3,4
unsigned char _char26_table[256] = {
0, 1, 2, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
4, 4 /*'-'*/, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 0,
4, 1, 4, 4, 4, 2, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 3, 3, 4, 4,
4, 4, 4, 4, 4, 4, 4, 4, 4, 0, 4, 1, 4, 4, 4, 2, 4, 4, 4, 4, 4, 4,
4, 4, 4, 4, 4, 4, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4};
string Cluster::poa() {
// --- POA
uint n_seqs = seqs.size();
abpoa_t *ab = abpoa_init();
abpoa_para_t *abpt = abpoa_init_para();
abpt->disable_seeding = 1;
abpt->align_mode = 0; // global
abpt->out_msa = 0;
abpt->out_cons = 1;
abpt->out_gfa = 0;
// abpt->is_diploid = 1;
abpt->progressive_poa = 0;
abpoa_post_set_para(abpt);
// abpt->match = 2; // match score
// abpt->mismatch = 4; // mismatch penalty
// abpt->gap_mode = ABPOA_CONVEX_GAP; // gap penalty mode
// abpt->gap_open1 = 4; // gap open penalty #1
// abpt->gap_ext1 = 2; // gap extension penalty #1
// abpt->gap_open2 = 24; // gap open penalty #2
// abpt->gap_ext2 = 1; // gap extension penalty #2
// gap_penalty = min{gap_open1 + gap_len * gap_ext1, gap_open2 + gap_len *
// gap_ext2}
int *seq_lens = (int *)malloc(sizeof(int) * n_seqs);
uint8_t **bseqs = (uint8_t **)malloc(sizeof(uint8_t *) * n_seqs);
for (uint i = 0; i < n_seqs; ++i) {
seq_lens[i] = seqs[i].size();
bseqs[i] = (uint8_t *)malloc(sizeof(uint8_t) * seq_lens[i]);
for (int j = 0; j < seq_lens[i]; ++j) {
bseqs[i][j] = _char26_table[(int)seqs[i][j]];
}
}
abpoa_msa(ab, abpt, n_seqs, NULL, seq_lens, bseqs, NULL, NULL);
abpoa_cons_t *abc = ab->abc;
string cons = "";
if (abc->n_cons > 0) {
for (int j = 0; j < abc->cons_len[0]; ++j) {
cons += "ACGTN"[abc->cons_base[0][j]];
}
}
for (uint i = 0; i < n_seqs; ++i) {
free(bseqs[i]);
}
free(bseqs);
free(seq_lens);
abpoa_free(ab);
abpoa_free_para(abpt);
return cons;
}
void Cluster::dump(ofstream &o) const {
// o << chrom << " " << s << " " << e << " " << cov << " " << size();
for (const string &seq : seqs) {
o << " " << seq;
}
o << endl;
}