-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathparseRMSK.c
129 lines (105 loc) · 2.43 KB
/
parseRMSK.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
#include <stdio.h>
#include <stdlib.h>
#include <assert.h>
#include <string.h>
#include <zlib.h>
#include <float.h>
#include <math.h>
#include "gtf.h"
#include "htslib/kseq.h"
#include "htslib/kstring.h"
KSTREAM_INIT(gzFile, gzread, 16384)
GTFline * parseRMSKline(GTFline *o, GTFtree *t, kstring_t ks) {
char *p, *end;
if(o) GTFline_reset(o);
else o = initGTFline();
//bin
p = nextField(ks.s);
if(!p) return o;
//Score
p = nextField(NULL);
if(!p) return o;
if(strcmp(p, ".") == 0) o->score = DBL_MAX;
else {
o->score = strtod(p, &end);
if(*end) goto err;
}
//milliDiv
p = nextField(NULL);
if(!p) return o;
//milliDel
p = nextField(NULL);
if(!p) return o;
//milliIns
p = nextField(NULL);
if(!p) return o;
//Chromosome
p = nextField(NULL);
assert(kputs(p, &o->chrom));
//Start
p = nextField(NULL);
if(!p) goto err;
o->start = strtoull(p, &end, 10);
if(*end) goto err;
//End
p = nextField(NULL);
if(!p) goto err;
o->end = strtoull(p, &end, 10);
if(*end) goto err;
//genoLeft
p = nextField(NULL);
if(!p) return o;
//Strand
p = nextField(NULL);
if(!p) return o;
if(*p == '+') {
o->strand = 0;
} else if(*p == '-') {
o->strand = 1;
}
//repName
p = nextField(NULL);
if(!p) return o;
addAttribute(o, t, "repName", p);
//repClass
p = nextField(NULL);
if(!p) return o;
addAttribute(o, t, "repClass", p);
//repFamily
p = nextField(NULL);
if(!p) return o;
addAttribute(o, t, "repFamily", p);
//repClass
return o;
err :
destroyGTFline(o);
return NULL;
}
GTFtree *RMSK2Tree(char *fname, FILTER_FUNC ffunc) {
gzFile fp = gzopen(fname, "r");
GTFtree *o = NULL;
GTFline *line = initGTFline();
assert(line);
int dret;
kstream_t *ks = ks_init(fp);
kstring_t str;
str.s = NULL;
str.l = str.m = 0;
if(!fp) return NULL;
o = initGTFtree();
while(ks_getuntil(ks, KS_SEP_LINE, &str, &dret) >= 0) {
line = parseRMSKline(line, o, str);
if(*str.s == '#') continue;
if(!line) break;
if(ffunc == NULL) {
addGTFentry(o, line);
} else if(ffunc((void *) line)) {
addGTFentry(o, line);
}
}
if(line) destroyGTFline(line);
ks_destroy(ks);
gzclose(fp);
if(str.s) free(str.s);
return o;
}