forked from deeptools/pyBigWig
-
Notifications
You must be signed in to change notification settings - Fork 0
/
pyBigWig.h
457 lines (448 loc) · 18 KB
/
pyBigWig.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
#include <Python.h>
#include <structmember.h>
#include "bigWig.h"
#define pyBigWigVersion "0.3.18"
typedef struct {
PyObject_HEAD
bigWigFile_t *bw;
int32_t lastTid; //The TID of the last written entry (or -1)
uint32_t lastSpan; //The span of the last written entry (if applicable)
uint32_t lastStep; //The step of the last written entry (if applicable)
uint32_t lastStart; //The next start position (if applicable)
int lastType; //The type of the last written entry
} pyBigWigFile_t;
static PyObject *pyBwOpen(PyObject *self, PyObject *pyFname);
static PyObject *pyBwEnter(pyBigWigFile_t *self, PyObject *args);
static PyObject *pyBwClose(pyBigWigFile_t *pybw, PyObject *args);
static PyObject *pyBwGetChroms(pyBigWigFile_t *pybw, PyObject *args);
static PyObject *pyIsBigWig(pyBigWigFile_t *pybw, PyObject *args);
static PyObject *pyIsBigBed(pyBigWigFile_t *pybw, PyObject *args);
static PyObject *pyBwGetStats(pyBigWigFile_t *pybw, PyObject *args, PyObject *kwds);
#ifdef WITHNUMPY
static PyObject *pyBwGetValues(pyBigWigFile_t *pybw, PyObject *args, PyObject *kwds);
#else
static PyObject *pyBwGetValues(pyBigWigFile_t *pybw, PyObject *args);
#endif
static PyObject *pyBwGetIntervals(pyBigWigFile_t *pybw, PyObject *args, PyObject *kwds);
static PyObject *pyBBGetEntries(pyBigWigFile_t *pybw, PyObject *args, PyObject *kwds);
static PyObject *pyBBGetSQL(pyBigWigFile_t *pybw, PyObject *args);
static PyObject *pyBwGetHeader(pyBigWigFile_t *pybw, PyObject *args);
static PyObject *pyBwAddHeader(pyBigWigFile_t *pybw, PyObject *args, PyObject *kwds);
static PyObject *pyBwAddEntries(pyBigWigFile_t *pybw, PyObject *args, PyObject *kwds);
static void pyBwDealloc(pyBigWigFile_t *pybw);
//The function types aren't actually correct...
static PyMethodDef bwMethods[] = {
{"open", (PyCFunction)pyBwOpen, METH_VARARGS,
"Open a bigWig or bigBed file. For remote files, give a URL starting with HTTP,\n\
FTP, or HTTPS.\n\
\n\
Optional arguments:\n\
mode: An optional mode. The default is 'r', which opens a file for reading.\n\
If you specify a mode containing 'w' then you'll instead open a file\n\
for writing. Note that you then need to add an appropriate header\n\
before use. For bigBed files, only reading is supported.\n\
\n\
Returns:\n\
A bigWigFile object on success, otherwise None.\n\
\n\
Arguments:\n\
file: The name of a bigWig file.\n\
\n\
>>> import pyBigWig\n\
>>> bw = pyBigWig.open(\"some_file.bw\")\n"},
{NULL, NULL, 0, NULL}
};
static PyMethodDef bwObjMethods[] = {
{"header", (PyCFunction)pyBwGetHeader, METH_VARARGS,
"Returns the header of a bigWig file. This contains information such as: \n\
* The version number of the file ('version').\n\
* The number of zoom levels ('nLevels').\n\
* The number of bases covered ('nBasesCovered').\n\
* The minimum value ('minVal').\n\
* The maximum value ('maxVal').\n\
* The sum of all values ('sumData').\n\
* The sum of the square of all values ('sumSquared').\n\
These are returned as a dictionary.\n\
\n\
>>> import pyBigWig\n\
>>> bw = pyBigWig.open(\"some_file.bw\")\n\
>>> bw.header()\n\
{'maxVal': 2L, 'sumData': 272L, 'minVal': 0L, 'version': 4L,\n\
'sumSquared': 500L, 'nLevels': 1L, 'nBasesCovered': 154L}\n\
>>> bw.close()\n"},
{"close", (PyCFunction)pyBwClose, METH_VARARGS,
"Close a bigWig file.\n\
\n\
>>> import pyBigWig\n\
>>> bw = pyBigWig.open(\"some_file.bw\")\n\
>>> bw.close()\n"},
{"isBigWig", (PyCFunction)pyIsBigWig, METH_VARARGS,
"Returns True if the object is a bigWig file (otherwise False).\n\
>>> import pyBigWig\n\
>>> bw = pyBigWig.open(\"some_file.bigWig\")\n\
>>> bw.isBigWig()\n\
True\n\
>>> bw.isBigBed()\n\
False\n"},
{"isBigBed", (PyCFunction)pyIsBigBed, METH_VARARGS,
"Returns true if the object is a bigBed file (otherwise False).\n\
>>> import pyBigWig\n\
>>> bw = pyBigWig.open(\"some_file.bigBed\")\n\
>>> bw.isBigWig()\n\
False\n\
>>> bw.isBigBed()\n\
True\n"},
{"chroms", (PyCFunction)pyBwGetChroms, METH_VARARGS,
"Return a chromosome: length dictionary. The order is typically not\n\
alphabetical and the lengths are long (thus the 'L' suffix).\n\
\n\
Optional arguments:\n\
chrom: An optional chromosome name\n\
\n\
Returns:\n\
A list of chromosome lengths or a dictionary of them.\n\
\n\
>>> import pyBigWig\n\
>>> bw = pyBigWig.open(\"test/test.bw\")\n\
>>> bw.chroms()\n\
{'1': 195471971L, '10': 130694993L}\n\
\n\
Note that you may optionally supply a specific chromosome:\n\
\n\
>>> bw.chroms(\"chr1\")\n\
195471971L\n\
\n\
If you specify a non-existant chromosome then no output is produced:\n\
\n\
>>> bw.chroms(\"foo\")\n\
>>>\n"},
{"stats", (PyCFunction)pyBwGetStats, METH_VARARGS|METH_KEYWORDS,
"Return summary statistics for a given range. On error, this function throws a\n\
runtime exception.\n\
\n\
Positional arguments:\n\
chr: Chromosome name\n\
\n\
Keyword arguments:\n\
start: Starting position\n\
end: Ending position\n\
type: Summary type (mean, min, max, coverage, std, sum), default 'mean'.\n\
nBins: Number of bins into which the range should be divided before\n\
computing summary statistics. The default is 1.\n\
exact: By default, pyBigWig uses the same method as Kent's tools from UCSC\n\
for computing statistics. This means that 'zoom levels' may be\n\
used, rather than actual values (please see the pyBigWig repository\n\
on github for further information on this). To avoid this behaviour,\n\
simply specify 'exact=True'. Note that values returned will then\n\
differ from what UCSC, IGV, and similar other tools will report.\n\
\n\
>>> import pyBigWig\n\
>>> bw = pyBigWig.open(\"test/test.bw\")\n\
>>> bw.stats(\"1\", 0, 3)\n\
[0.2000000054637591]\n\
\n\
This is the mean value over the range 1:1-3 (in 1-based coordinates). If\n\
the start and end positions aren't given the entire chromosome is used.\n\
There are additional optional parameters 'type' and 'nBins'. 'type'\n\
specifies the type of summary information to calculate, which is 'mean'\n\
by default. Other possibilites for 'type' are: 'min' (minimum value),\n\
'max' (maximum value), 'coverage' (number of covered bases), and 'std'\n\
(standard deviation). 'nBins' defines how many bins the region will be\n\
divided into and defaults to 1.\n\
\n\
>>> bw.stats(\"1\", 0, 3, type=\"min\")\n\
[0.10000000149011612]\n\
>>> bw.stats(\"1\", 0, 3, type=\"max\")\n\
[0.30000001192092896]\n\
>>> bw.stats(\"1\", 0, 10, type=\"coverage\")\n\
[0.30000000000000004]\n\
>>> bw.stats(\"1\", 0, 3, type=\"std\")\n\
[0.10000000521540645]\n\
>>> bw.stats(\"1\",99,200, type=\"max\", nBins=2)\n\
[1.399999976158142, 1.5]\n"},
#ifdef WITHNUMPY
{"values", (PyCFunction)pyBwGetValues, METH_VARARGS|METH_KEYWORDS,
"Retrieve the value stored for each position (or None). On error, a runtime\n\
exception is thrown.\n\
\n\
Positional arguments:\n\
chr: Chromosome name\n\
start: Starting position\n\
end: Ending position\n\
\n\
Optional arguments:\n\
numpy: If True, return a numpy array rather than a list of values. This\n\
is generally more memory efficient. Note that this option is only\n\
available if pyBigWig was installed with numpy support (check the\n\
pyBigWig.numpy() function).\n\
\n\
>>> import pyBigWig\n\
>>> bw = pyBigWig.open(\"test/test.bw\")\n\
>>> bw.values(\"1\", 0, 3)\n\
[0.10000000149011612, 0.20000000298023224, 0.30000001192092896]\n\
\n\
The length of the returned list will always match the length of the\n\
range. Any uncovered bases will have a value of None.\n\
\n\
>>> bw.values(\"1\", 0, 4)\n\
[0.10000000149011612, 0.20000000298023224, 0.30000001192092896, None]\n\
\n"},
#else
{"values", (PyCFunction)pyBwGetValues, METH_VARARGS,
"Retrieve the value stored for each position (or None). On error, a runtime\n\
exception is thrown.\n\
\n\
Positional arguments:\n\
chr: Chromosome name\n\
start: Starting position\n\
end: Ending position\n\
\n\
>>> import pyBigWig\n\
>>> bw = pyBigWig.open(\"test/test.bw\")\n\
>>> bw.values(\"1\", 0, 3)\n\
[0.10000000149011612, 0.20000000298023224, 0.30000001192092896]\n\
\n\
The length of the returned list will always match the length of the\n\
range. Any uncovered bases will have a value of None.\n\
\n\
>>> bw.values(\"1\", 0, 4)\n\
[0.10000000149011612, 0.20000000298023224, 0.30000001192092896, None]\n\
\n"},
#endif
{"intervals", (PyCFunction)pyBwGetIntervals, METH_VARARGS|METH_KEYWORDS,
"Retrieve each interval covering a part of a chromosome/region. On error, a\n\
runtime exception is thrown.\n\
\n\
Positional arguments:\n\
chr: Chromosome name\n\
\n\
Keyword arguments:\n\
start: Starting position\n\
end: Ending position\n\
\n\
If start and end aren't specified, the entire chromosome is returned.\n\
The returned object is a tuple containing the starting position, end\n\
position, and value of each interval in the file. As with all bigWig\n\
positions, those returned are 0-based half-open (e.g., a start of 0 and\n\
end of 10 specifies the first 10 positions).\n\
\n\
>>> import pyBigWig\n\
>>> bw = pyBigWig.open(\"test/test.bw\")\n\
>>> bw.intervals(\"1\", 0, 3)\n\
((0, 1, 0.10000000149011612), (1, 2, 0.20000000298023224),\n\
(2, 3, 0.30000001192092896))\n\
>>> bw.close()"},
{"entries", (PyCFunction) pyBBGetEntries, METH_VARARGS|METH_KEYWORDS,
"Retrieves entries from a bigBed file. These can optionally contain the string\n\
associated with each entry.\n\
\n\
Positional arguments:\n\
chr: Chromosome name\n\
\n\
Keyword arguments:\n\
start: Starting position\n\
end: Ending position\n\
withString: If True, return the string associated with each entry.\n\
Default True.\n\
\n\
The output is a list of tuples, with members \"start\", \"end\", and \"string\"\n\
(assuming \"withString=True\"). If there are no overlapping entries, then None\n\
is returned.\n\
\n\
>>> import pyBigWig\n\
>>> bb = pyBigWig.open(\"https://www.encodeproject.org/files/ENCFF001JBR/@@download/ENCFF001JBR.bigBed\")\n\
>>> print(bw.entries('chr1',10000000,10020000))\n\
[(10009333, 10009640, '61035\t130\t-\t0.026\t0.42\t404'),\n\
(10014007, 10014289, '61047\t136\t-\t0.029\t0.42\t404'),\n\
(10014373, 10024307, '61048\t630\t-\t5.420\t0.00\t2672399')]\n\
>>> print(bb.entries(\"chr1\", 10000000, 10000500, withString=False))\n\
[(10009333, 10009640), (10014007, 10014289), (10014373, 10024307)]\n\
\n"},
{"SQL", (PyCFunction) pyBBGetSQL, METH_VARARGS,
"Returns the SQL string associated with the file. This is typically useful for\n\
bigBed files, where this determines what is held in each column of the text\n\
string associated with entries.\n\
\n\
If there is no SQL string, then None is returned.\n\
\n\
>>> import pyBigWig\n\
>>> bb = pyBigWig.open(\"https://www.encodeproject.org/files/ENCFF001JBR/@@download/ENCFF001JBR.bigBed\")\n\
>>> print(bb.SQL())\n\
table RnaElements\n\
\"BED6 + 3 scores for RNA Elements data \"\n\
(\n\
string chrom; \"Reference sequence chromosome or scaffold\"\n\
uint chromStart; \"Start position in chromosome\"\n\
uint chromEnd; \"End position in chromosome\"\n\
string name; \"Name of item\"\n\
uint score; \"Normalized score from 0-1000\"\n\
char[1] strand; \"+ or - or . for unknown\"\n\
float level; \"Expression level such as RPKM or FPKM. Set to -1 for no data.\"\n\
float signif; \"Statistical significance such as IDR. Set to -1 for no data.\"\n\
uint score2; \"Additional measurement/count e.g. number of reads. Set to 0 for no data.\"\n\
)\n\
\n\
\n"},
{"addHeader", (PyCFunction)pyBwAddHeader, METH_VARARGS|METH_KEYWORDS,
"Adds a header to a file opened for writing. This MUST be called before adding\n\
any entries. On error, a runtime exception is thrown.\n\
\n\
Positional arguments:\n\
cl: A chromosome list, of the form (('chr1', 1000), ('chr2', 2000), ...).\n\
In other words, each element of the list is a tuple containing a\n\
chromosome name and its associated length.\n\
\n\
Keyword arguments:\n\
maxZooms: The maximum number of zoom levels. The value must be >=0. The\n\
default is 10.\n\
\n\
>>> import pyBigWig\n\
>>> import tempfile\n\
>>> import os\n\
>>> ofile = tempfile.NamedTemporaryFile(delete=False)\n\
>>> oname = ofile.name\n\
>>> ofile.close()\n\
>>> bw = pyBigWig.open(oname, 'w')\n\
>>> bw.addHeader([(\"1\", 1000000), (\"2\", 1500000)], maxZooms=0)\n\
>>> bw.close()\n\
>>> os.remove(oname)"},
{"addEntries", (PyCFunction)pyBwAddEntries, METH_VARARGS|METH_KEYWORDS,
"Adds one or more entries to a bigWig file. This returns nothing, but throws a\n\
runtime exception on error.\n\
\n\
This function always accepts an optional 'validate' option. If set to 'True',\n\
which is the default, the input entries are checked to ensure that they come\n\
after previously entered entries. This comes with significant overhead, so if\n\
this is instead 'False' then this validation is not performed.\n\
\n\
There are three manners in which entries can be stored in bigWig files.\n\
\n\
\n\
bedGraph-like entries (12 bytes each):\n\
\n\
Positional arguments:\n\
chrom: A list of chromosome. These MUST match those added with addHeader().\n\
starts: A list of start positions. These are 0-based.\n\
\n\
Keyword arguments:\n\
ends: A list of end positions. These are 0-based half open, so a start of\n\
0 and end of 10 specifies the first 10 bases.\n\
values: A list of values.\n\
\n\
\n\
Variable-step entries (8 bytes each):\n\
\n\
Positional arguments:\n\
chrom: A chromosome name. This MUST match one added with addHeader().\n\
starts: A list of start positions. These are 0-based.\n\
\n\
Keyword arguments:\n\
values: A list of values.\n\
span: A span width. This is an integer value and specifies how many bases\n\
each entry describes. An entry with a start position of 0 and a span\n\
of 10 describes the first 10 bases.\n\
\n\
\n\
Fixed-step entries (4 bytes each):\n\
\n\
Positional arguments:\n\
chrom: A chromosome name. This MUST match one added with addHeader().\n\
starts: A start position. These are 0-based. The start position of each\n\
entry starts 'step' after the previous and describes 'span' bases.\n\
\n\
Keyword arguments:\n\
values: A list of values.\n\
span: A span width. This is an integer value and specifies how many bases\n\
each entry describes. An entry with a start position of 0 and a span\n\
of 10 describes the first 10 bases.\n\
step: A step width. Each subsequent entry begins this number of bases\n\
after the previous. So if the first entry has a start of 0 and step\n\
or 30, the second entry will start at 30.\n\
\n\
>>> import pyBigWig\n\
>>> import tempfile\n\
>>> import os\n\
>>> ofile = tempfile.NamedTemporaryFile(delete=False)\n\
>>> oname = ofile.name\n\
>>> ofile.close()\n\
>>> bw = pyBigWig.open(oname, 'w')\n\
>>> bw.addHeader([(\"1\", 1000000), (\"2\", 1500000)])\n\
>>> #Add some bedGraph-like entries\n\
>>> bw.addEntries([\"1\", \"1\", \"1\"], [0, 100, 125], ends=[5, 120, 126], values=[0.0, 1.0, 200.0])\n\
>>> #Variable-step entries, the span 500-520, 600-620, and 635-655\n\
>>> bw.addEntries(\"1\", [500, 600, 635], values=[-2.0, 150.0, 25.0], span=20)\n\
>>> #Fixed-step entries, the bases described are 900-920, 930-950, and 960-980\n\
>>> bw.addEntries(\"1\", 900, values=[-5.0, -20.0, 25.0], span=20, step=30)\n\
>>> #This only works due to using validate=False. Obviously the file is then corrupt.\n\
>>> bw.addEntries([\"1\", \"1\", \"1\"], [0, 100, 125], ends=[5, 120, 126], values=[0.0, 1.0, 200.0], validate=False)\n\
>>> bw.close()\n\
>>> os.remove(oname)"},
{"__enter__", (PyCFunction)pyBwEnter, METH_NOARGS, NULL},
{"__exit__", (PyCFunction)pyBwClose, METH_VARARGS, NULL},
{NULL, NULL, 0, NULL}
};
#if PY_MAJOR_VERSION >= 3
struct pyBigWigmodule_state {
PyObject *error;
};
#define GETSTATE(m) ((struct pyBigWigmodule_state*)PyModule_GetState(m))
static PyModuleDef pyBigWigmodule = {
PyModuleDef_HEAD_INIT,
"pyBigWig",
"A python module for bigWig file access",
-1,
bwMethods,
NULL, NULL, NULL, NULL
};
#endif
//Should set tp_dealloc, tp_print, tp_repr, tp_str, tp_members
static PyTypeObject bigWigFile = {
#if PY_MAJOR_VERSION >= 3
PyVarObject_HEAD_INIT(NULL, 0)
#else
PyObject_HEAD_INIT(NULL)
0, /*ob_size*/
#endif
"pyBigWig.bigWigFile", /*tp_name*/
sizeof(pyBigWigFile_t), /*tp_basicsize*/
0, /*tp_itemsize*/
(destructor)pyBwDealloc, /*tp_dealloc*/
0, /*tp_print*/
0, /*tp_getattr*/
0, /*tp_setattr*/
0, /*tp_compare*/
0, /*tp_repr*/
0, /*tp_as_number*/
0, /*tp_as_sequence*/
0, /*tp_as_mapping*/
0, /*tp_hash*/
0, /*tp_call*/
0, /*tp_str*/
PyObject_GenericGetAttr, /*tp_getattro*/
PyObject_GenericSetAttr, /*tp_setattro*/
0, /*tp_as_buffer*/
#if PY_MAJOR_VERSION >= 3
Py_TPFLAGS_DEFAULT, /*tp_flags*/
#else
Py_TPFLAGS_HAVE_CLASS, /*tp_flags*/
#endif
"bigWig File", /*tp_doc*/
0, /*tp_traverse*/
0, /*tp_clear*/
0, /*tp_richcompare*/
0, /*tp_weaklistoffset*/
0, /*tp_iter*/
0, /*tp_iternext*/
bwObjMethods, /*tp_methods*/
0, /*tp_members*/
0, /*tp_getset*/
0, /*tp_base*/
0, /*tp_dict*/
0, /*tp_descr_get*/
0, /*tp_descr_set*/
0, /*tp_dictoffset*/
0, /*tp_init*/
0, /*tp_alloc*/
0, /*tp_new*/
0,0,0,0,0,0
};