Skip to content

Commit 6fa7df2

Browse files
committed
ENH: better JR join function
1 parent 0493cc5 commit 6fa7df2

File tree

2 files changed

+23
-31
lines changed

2 files changed

+23
-31
lines changed

RELEASE.rst

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,8 @@ improvements
3939
- Improved speed of unit test suite by about 40%
4040
- Exception will not be raised calling `HDFStore.remove` on non-existent node
4141
with where clause
42+
- Optimized `_ensure_index` function resulting in performance savings in
43+
type-checking Index objects
4244

4345
**Bug fixes**
4446

bench/bench_join_panel.py

Lines changed: 21 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
# reasonably effecient
22

33
def create_panels_append(cls, panels):
4-
""" return an appended list of panels """
4+
""" return an append list of panels """
55
panels = [ a for a in panels if a is not None ]
66
# corner cases
77
if len(panels) == 0:
@@ -10,42 +10,32 @@ def create_panels_append(cls, panels):
1010
return panels[0]
1111
elif len(panels) == 2 and panels[0] == panels[1]:
1212
return panels[0]
13-
14-
# add indicies that are not in the major set passed in; return a reindex version of p
15-
def reindex_major_axis(p, major):
16-
index = [ ma for ma in p.major_axis if ma not in major ]
17-
major.update(index)
18-
return p.reindex(major = index, copy = False)
19-
20-
# make sure that we can actually append, e.g. that we have non-overlapping major_axis
21-
# if we do, reindex so we don't
22-
major = set()
23-
panels = [ reindex_major_axis(p, major) for p in panels ]
24-
try:
25-
major = np.concatenate([ p.major_axis for p in panels ])
26-
except (Exception), detail:
27-
raise Exception("cannot append major_axis that dont' match dimensions! -> %s" % str(detail))
28-
29-
# reindex on minor axis/items
30-
try:
31-
minor, items = set(), set()
13+
#import pdb; pdb.set_trace()
14+
# create a joint index for the axis
15+
def joint_index_for_axis(panels, axis):
16+
s = set()
3217
for p in panels:
33-
items.update(p.items)
34-
minor.update(p.minor_axis)
35-
minor = Index(sorted(list(minor)))
36-
items = Index(sorted(list(items)))
37-
panels = [ p.reindex(items = items, minor = minor, copy = False) for p in panels ]
38-
except (Exception), detail:
39-
raise Exception("cannot append minor/items that dont' match dimensions! -> [%s] %s" % (','.join([ "%s" % p for p in panels ]),str(detail)))
40-
18+
s.update(list(getattr(p,axis)))
19+
return sorted(list(s))
20+
def reindex_on_axis(panels, axis, axis_reindex):
21+
new_axis = joint_index_for_axis(panels, axis)
22+
new_panels = [ p.reindex(**{ axis_reindex : new_axis, 'copy' : False}) for p in panels ]
23+
return new_panels, new_axis
24+
# create the joint major index, dont' reindex the sub-panels - we are appending
25+
major = joint_index_for_axis(panels, 'major_axis')
26+
# reindex on minor axis
27+
panels, minor = reindex_on_axis(panels, 'minor_axis', 'minor')
28+
# reindex on items
29+
panels, items = reindex_on_axis(panels, 'items', 'items')
4130
# concatenate values
4231
try:
4332
values = np.concatenate([ p.values for p in panels ],axis=1)
4433
except (Exception), detail:
4534
raise Exception("cannot append values that dont' match dimensions! -> [%s] %s" % (','.join([ "%s" % p for p in panels ]),str(detail)))
46-
return Panel(values, items, major, minor )
47-
add_class_method(Panel, create_panels_append, 'append_many')
48-
35+
#pm('append - create_panel')
36+
p = Panel(values, items = items, major_axis = major, minor_axis = minor )
37+
#pm('append - done')
38+
return p
4939

5040

5141

0 commit comments

Comments
 (0)