-
Notifications
You must be signed in to change notification settings - Fork 4
/
bindex.py
244 lines (228 loc) · 7.94 KB
/
bindex.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
'''
# bindex.py: an indexed binning data structure (and affiliated scripts and bits fo code).
# primary author: mark r. yoder, phd.
#
# code is available as is, with no gurantees of any kind.
'''
import numpy
import scipy
import datetime as dtm
import pytz
import matplotlib.dates as mpd
import multiprocessing as mpp
#
import random
def bindex2d_sample():
#
R=random.Random()
dx=dy=.1
x0=y0=0.
prams = {'dx':dx, 'dy':dy, 'x0':x0, 'y0':y0, 'leaf_type':float}
#
# get a float type bindex:
B=Bindex2D(**prams)
#
datas = [[x,y, R.random()] for x in numpy.arange(0., 2., .1) for y in numpy.arange(0.,3.,.1)]
#
B.add_list(datas)
#
b_datas = B.to_list()
print("datas list: d1==d2: %d, len(d1)==len(d2): %d" % (b_datas==datas, len(b_datas)==len(datas)))
B2 = Bindex2D(**prams)
B2.add_list(datas)
#
print("bindex_items==bindex_items: %d" % (B.items==B2.items))
#
B3 = Bindex2D(**prams)
B3.leaf_type=list
#
B3.add_list([[x,y, range(j + j*i,j + j*i+5)] for i,x in enumerate(numpy.arange(0., 2., .1)) for j,y in enumerate(numpy.arange(0.,3.,.1))])
#
return B3
class Bindex2D(dict):
# for now, let's just start with a 2D bindex and we'll generalize later
def __init__(self, dx=1., dy=1., x0=0., y0=0., leaf_type=float):
# leaf_type: should be a function that will evaluate as the default value for an object type for the leaf (end) nodes.
#
self.x0=x0
self.y0=y0
self.dx=dx
self.dy=dy
#
self.bin_y_0 = 0
self.bin_x_0 = 0
#
self.leaf_type=leaf_type
#
#self.items = {}
#
#def __iter__(self):
# return self.items.__iter__
#def __sizeof__(self):
# return sum([len(rw) for rw in self.items.iteritems()])
#
def get_xbin_center(self, x, dx=None, x0=None, bin_0=0):
'''
# return a dict. with the bin index and center location closest to x.
'''
#
j = self.get_xbin(x, dx=dx, x0=x0, bin_0=bin_0)
xx = self.get_bin2x(j, dx=dx, x0=x0, bin_0=bin_0)
#
return {'index':j, 'center':xx}
def get_ybin_center(self, y, dy=None, y0=None, bin_0=0):
'''
# return a dict. with the bin index and center location closest to x.
'''
#
j = self.get_ybin(y, dy=dy, y0=y0, bin_0=bin_0)
xx = self.get_bin2y(j, dy=dy, y0=y0, bin_0=bin_0)
#
return {'index':j, 'center':xx}
def get_xbin(self, x, dx=None, x0=None, bin_0=0):
dx = (dx or self.dx)
x0 = (x0 or self.x0)
#
#return int((x-x0)/dx)
return get_bin(x, dx=dx, x0=x0, bin_0=bin_0)
#
def get_bin2x(self, bin_num, dx=None, x0=None, bin_0=0):
dx = (dx or self.dx)
x0 = (x0 or self.x0)
#
return bin2x(bin_num, dx=dx, x0=x0, bin_0=bin_0)
#
def get_ybin(self, y, dy=None, y0=None, bin_0=0):
dy = (dy or self.dy)
y0 = (y0 or self.y0)
return get_bin(y, dx=dy, x0=y0, bin_0=bin_0)
#
def get_bin2y(self, bin_num, dy=None, y0=None, bin_0=0):
dy = (dy or self.dy)
y0 = (y0 or self.y0)
#
return bin2x(bin_num, dx=dy, x0=y0, bin_0=bin_0)
#
def get_xybins(self, x, y, dx=None, dy=None, bin_x_0=0, bin_y_0=0, r_type='dict'):
dx = (dx or self.dx)
dy = (dy or self.dy)
#
x_bin = get_bin(x=x, dx=dx, x0=bin_x_0, bin_0=bin_x_0)
y_bin = get_bin(x=y, dx=dy, x0=bin_y_0, bin_0=bin_y_0)
#
if r_type == 'tuple':
return (x_bin,y_bin)
elif r_type == 'list':
return [x_bin, y_bin]
elif r_type == 'dict':
return {'x':x_bin, 'y':y_bin}
else:
return [x_bin,y_bin]
#
def get_bin_items(self, x=None, y=None):
# get all the items in the bin that contains coordinates x,y
x0, y0 = self.get_xybins(x,y, r_type='tuple')
#
#return self.items[x0][y0]
return self[x0][y0]
#
#def add_item(self, x=None, y=None, z=None, unpack_z=False):
#
def add_list(self, xyz, j_x=0, j_y=1, j_z=2):
[self.add_to_bin(rw[j_x], rw[j_y], rw[j_z]) for rw in xyz]
#
return None
#
def get_value_at(self, x=None, y=None, bin_x=None, bin_y=None):
bin_x = (bin_x or get_bin(x, dx=self.dx, x0=self.x0, bin_0=0))
bin_y = (bin_y or get_bin(y, dx=self.dy, x0=self.y0, bin_0=0))
#
return self[bin_x][bin_y]
#
def add_to_bin(self, x=None, y=None, z=None, bin_x=None, bin_y=None):
# in other words, add z to the bin located at (x,y). (x,y) can be off-center; we'll find the appropriate bin.
#
# x,y: coordinates of item; z is the item.
if z==None: z=self.leaf_type()
#x0,y0 = self.get_xybins(x,y,r_type='tuple')
bin_x = (bin_x or get_bin(x, dx=self.dx, x0=self.x0, bin_0=0))
bin_y = (bin_y or get_bin(y, dx=self.dy, x0=self.y0, bin_0=0))
#
# think dic.has_key() was removed from/changed in pthon3. let's just addit as a helper funcion... or just recode..
if not bin_x in self.keys(): self[bin_x]={}
#if not self.has_key(bin_x): self[bin_x]={}
#if not self[bin_x].has_key(bin_y):
if not bin_y in self[bin_x].keys():
self[bin_x][bin_y]=self.leaf_type()
#
self[bin_x][bin_y]+=z
#
return None
#ss
def to_list(self):
# should probably figure out the proper way to override list conversion method(s).
#
r_list = []
#for X, Y_rw in self.items.iteritems():
# x = bin2x(X, self.dx, self.x0, self.bin_0)
# #
# r_list += [[x, bin2x(Y, self.dy, self.y0, self.bin_y_0), z] for Y,z in Y_rw.iteritems()]
#
#return [[bin2x(X, self.dx, self.x0, self.bin_x_0), bin2x(Y, self.dy, self.y0, self.bin_y_0), z] for X,Y_rw in self.items.iteritems() for Y,z in Y_rw.iteritems()]
return [[bin2x(X, self.dx, self.x0, self.bin_x_0), bin2x(Y, self.dy, self.y0, self.bin_y_0), z] for X,Y_rw in self.iteritems() for Y,z in Y_rw.iteritems()]
#
def to_array(self):
#
# note: in this simplified format, this will only work for float or int type objects.
#
# numpy.core.records.fromarrays(zip(*best_fit_array), names = ['section_id', 'tau', 'beta', 'sigma_tau', 'sigma_beta', 'mean_chi_sqr'], formats = [type(x).__name__ for x in best_fit_array[0]])
if not hasattr(self.leaf_type, '__len__'):
z_type_name = type(self.leaf_type()).__name__
prin("exporting to array; leaf_type_name: ", z_type_name)
#
return numpy.core.records.fromarrays(zip(*self.to_list()), names = ['x', 'y', 'z'], formats = ['f8', 'f8', z_type_name])
#
# otherwise, we've got string, list, tuple, etc. types. find the max length of all z entries.
max_z_len = max([len(x) for x in zip(*self.to_list())[2]])
#
if isinstance(self.leaf_type, str):
return numpy.core.records.fromarrays(zip(*self.to_list()), names = ['x', 'y', 'z'], formats = ['f8', 'f8', '|S%d' % max_z_len])
#
# otherwise, it's some sort of list or tuple. for now, let's just extend each row. we could also convert to a string... or we could figure out how to
# wrap list types into a recarray (which i think can be done, but there are likely issues with reference, scope, etc.... or we could use a PANDAS object,
# but then we change our syntax... and anyway, we should just add a separate function call for PANDAS (that is dependent upon PANDAS being present).
#
# we will have to assume that all the values in the row are of the same type. for now, we'll just let that break upon exception.
z_type = float
do_break=False
#for j,rw_j in enumerate(self.items.values()):
for j,rw_j in enumerate(self.values()):
for k,rw_k in enumerate(rw_j.values()):
if len(rw_k)>0:
z_type = type(rw_k[0])
do_break=True
break
if do_break:
break
#z_type = type(self.items.values()[0].values()[0][0])\
z_type = type(self.values()[0].values()[0][0])
z_col_names = ['z_%d' % j for j in xrange(max_z_len)]
#
return numpy.core.records.fromarrays(zip(*[rw[0:2] + rw[2] for rw in self.to_list()]), names = ['x', 'y'] + z_col_names, formats = ['f8', 'f8'] + [z_type.__name__ for j in z_col_names])
def has_key(self, key):
return (key in self.keys())
class Bindex(dict):
# a single, 1D bindex component. stack these together for n-D bindex objects?
# so an N-D bindex can be made from a bindex of bindices.
#
def __init__(self, x0=0., dx=0.):
self.x0=x0
self.dx=dx
#
#
def get_bin(x, dx=1., x0=0., bin_0=0):
# return bin_id/index along a single axis.
return int(bin_0) + int(round((x-x0)/dx))
def bin2x(bin_num, dx=1., x0=0., bin_0=0):
# return the position x of a bin along one axis.
return (bin_num - bin_0)*dx + x0