Skip to content

Commit

Permalink
fixing rebasing issues
Browse files Browse the repository at this point in the history
  • Loading branch information
topper-123 committed Dec 4, 2018
1 parent 8731409 commit 3e467a7
Show file tree
Hide file tree
Showing 11 changed files with 56 additions and 2,349 deletions.
91 changes: 24 additions & 67 deletions pandas/core/indexes/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -1521,27 +1521,27 @@ def droplevel(self, level=0):
# The two checks above guarantee that here self is a MultiIndex

new_levels = list(self.levels)
new_labels = list(self.labels)
new_codes = list(self.codes)
new_names = list(self.names)

for i in levnums:
new_levels.pop(i)
new_labels.pop(i)
new_codes.pop(i)
new_names.pop(i)

if len(new_levels) == 1:

# set nan if needed
mask = new_labels[0] == -1
result = new_levels[0].take(new_labels[0])
mask = new_codes[0] == -1
result = new_levels[0].take(new_codes[0])
if mask.any():
result = result.putmask(mask, np.nan)

result.name = new_names[0]
return result
else:
from .multi import MultiIndex
return MultiIndex(levels=new_levels, labels=new_labels,
return MultiIndex(levels=new_levels, codes=new_codes,
names=new_names, verify_integrity=False)

_index_shared_docs['_get_grouper_for_level'] = """
Expand Down Expand Up @@ -3299,14 +3299,14 @@ def _join_multi(self, other, how, return_indexers=True):
# common levels, ldrop_names, rdrop_names
dropped_names = ldrop_names + rdrop_names

levels, labels, names = (
levels, codes, names = (
_restore_dropped_levels_multijoin(self, other,
dropped_names,
join_idx,
lidx, ridx))

# Re-create the multi-index
multi_join_idx = MultiIndex(levels=levels, labels=labels,
multi_join_idx = MultiIndex(levels=levels, codes=codes,
names=names, verify_integrity=False)

multi_join_idx = multi_join_idx.remove_unused_levels()
Expand Down Expand Up @@ -3417,63 +3417,63 @@ def _get_leaf_sorter(labels):
left_indexer = None
join_index = left
else: # sort the leaves
left_indexer = _get_leaf_sorter(left.labels[:level + 1])
left_indexer = _get_leaf_sorter(left.codes[:level + 1])
join_index = left[left_indexer]

else:
left_lev_indexer = ensure_int64(left_lev_indexer)
rev_indexer = lib.get_reverse_indexer(left_lev_indexer,
len(old_level))

new_lev_labels = algos.take_nd(rev_indexer, left.labels[level],
allow_fill=False)
new_lev_codes = algos.take_nd(rev_indexer, left.codes[level],
allow_fill=False)

new_labels = list(left.labels)
new_labels[level] = new_lev_labels
new_codes = list(left.codes)
new_codes[level] = new_lev_codes

new_levels = list(left.levels)
new_levels[level] = new_level

if keep_order: # just drop missing values. o.w. keep order
left_indexer = np.arange(len(left), dtype=np.intp)
mask = new_lev_labels != -1
mask = new_lev_codes != -1
if not mask.all():
new_labels = [lab[mask] for lab in new_labels]
new_codes = [lab[mask] for lab in new_codes]
left_indexer = left_indexer[mask]

else: # tie out the order with other
if level == 0: # outer most level, take the fast route
ngroups = 1 + new_lev_labels.max()
ngroups = 1 + new_lev_codes.max()
left_indexer, counts = libalgos.groupsort_indexer(
new_lev_labels, ngroups)
new_lev_codes, ngroups)

# missing values are placed first; drop them!
left_indexer = left_indexer[counts[0]:]
new_labels = [lab[left_indexer] for lab in new_labels]
new_codes = [lab[left_indexer] for lab in new_codes]

else: # sort the leaves
mask = new_lev_labels != -1
mask = new_lev_codes != -1
mask_all = mask.all()
if not mask_all:
new_labels = [lab[mask] for lab in new_labels]
new_codes = [lab[mask] for lab in new_codes]

left_indexer = _get_leaf_sorter(new_labels[:level + 1])
new_labels = [lab[left_indexer] for lab in new_labels]
left_indexer = _get_leaf_sorter(new_codes[:level + 1])
new_codes = [lab[left_indexer] for lab in new_codes]

# left_indexers are w.r.t masked frame.
# reverse to original frame!
if not mask_all:
left_indexer = mask.nonzero()[0][left_indexer]

join_index = MultiIndex(levels=new_levels, labels=new_labels,
join_index = MultiIndex(levels=new_levels, codes=new_codes,
names=left.names, verify_integrity=False)

if right_lev_indexer is not None:
right_indexer = algos.take_nd(right_lev_indexer,
join_index.labels[level],
join_index.codes[level],
allow_fill=False)
else:
right_indexer = join_index.labels[level]
right_indexer = join_index.codes[level]

if flip_order:
left_indexer, right_indexer = right_indexer, left_indexer
Expand Down Expand Up @@ -4103,24 +4103,12 @@ def asof_locs(self, where, mask):

return result

<<<<<<< HEAD
def sort_values(self, return_indexer=False, ascending=True):
"""
Return a sorted copy of the index.
Return a sorted copy of the index, and optionally return the indices
that sorted the index itself.
=======
levels, codes, names = (
_restore_dropped_levels_multijoin(self, other,
dropped_names,
join_idx,
lidx, ridx))
# Re-create the multi-index
multi_join_idx = MultiIndex(levels=levels, codes=codes,
names=names, verify_integrity=False)
>>>>>>> various changes
Parameters
----------
Expand Down Expand Up @@ -4478,24 +4466,15 @@ def isin(self, values, level=None):
passed set of values. The length of the returned boolean array matches
the length of the index.
<<<<<<< HEAD
Parameters
----------
values : set or list-like
Sought values.
.. versionadded:: 0.18.1
=======
new_level_codes = algos.take_nd(rev_indexer, left.codes[level],
allow_fill=False)
new_codes = list(left.codes)
new_codes[level] = new_level_codes
>>>>>>> various changes
Support for values as a set.
<<<<<<< HEAD
level : str or int, optional
Name or position of the index level to use (if the index is a
`MultiIndex`).
Expand All @@ -4504,40 +4483,18 @@ def isin(self, values, level=None):
-------
is_contained : ndarray
NumPy array of boolean values.
=======
if keep_order: # just drop missing values. o.w. keep order
left_indexer = np.arange(len(left), dtype=np.intp)
mask = new_level_codes != -1
if not mask.all():
new_codes = [codes_[mask] for codes_ in new_codes]
left_indexer = left_indexer[mask]
else: # tie out the order with other
if level == 0: # outer most level, take the fast route
ngroups = 1 + new_level_codes.max()
left_indexer, counts = libalgos.groupsort_indexer(
new_level_codes, ngroups)
>>>>>>> various changes
See Also
--------
Series.isin : Same for Series.
DataFrame.isin : Same method for DataFrames.
<<<<<<< HEAD
Notes
-----
In the case of `MultiIndex` you must either specify `values` as a
list-like object containing tuples that are the same length as the
number of levels, or specify `level`. Otherwise it will raise a
``ValueError``.
=======
else: # sort the leaves
mask = new_level_codes != -1
mask_all = mask.all()
if not mask_all:
new_codes = [lab[mask] for lab in new_codes]
>>>>>>> various changes
If `level` is specified:
Expand Down
18 changes: 9 additions & 9 deletions pandas/core/indexes/multi.py
Original file line number Diff line number Diff line change
Expand Up @@ -327,11 +327,11 @@ def from_arrays(cls, arrays, sortorder=None, names=None):

from pandas.core.arrays.categorical import _factorize_from_iterables

labels, levels = _factorize_from_iterables(arrays)
codes, levels = _factorize_from_iterables(arrays)
if names is None:
names = [getattr(arr, "name", None) for arr in arrays]

return MultiIndex(levels=levels, labels=labels, sortorder=sortorder,
return MultiIndex(levels=levels, codes=codes, sortorder=sortorder,
names=names, verify_integrity=False)

@classmethod
Expand Down Expand Up @@ -427,9 +427,9 @@ def from_product(cls, iterables, sortorder=None, names=None):
elif is_iterator(iterables):
iterables = list(iterables)

labels, levels = _factorize_from_iterables(iterables)
labels = cartesian_product(labels)
return MultiIndex(levels, labels, sortorder=sortorder, names=names)
codes, levels = _factorize_from_iterables(iterables)
codes = cartesian_product(codes)
return MultiIndex(levels, codes, sortorder=sortorder, names=names)

# --------------------------------------------------------------------

Expand Down Expand Up @@ -873,15 +873,15 @@ def format(self, space=2, sparsify=None, adjoin=True, names=False,
return []

stringified_levels = []
for lev, lab in zip(self.levels, self.labels):
for lev, level_codes in zip(self.levels, self.codes):
na = na_rep if na_rep is not None else _get_na_rep(lev.dtype.type)

if len(lev) > 0:

formatted = lev.take(lab).format(formatter=formatter)
formatted = lev.take(level_codes).format(formatter=formatter)

# we have some NA
mask = lab == -1
mask = level_codes == -1
if mask.any():
formatted = np.array(formatted, dtype=object)
formatted[mask] = na
Expand All @@ -891,7 +891,7 @@ def format(self, space=2, sparsify=None, adjoin=True, names=False,
# weird all NA case
formatted = [pprint_thing(na if isna(x) else x,
escape_chars=('\t', '\r', '\n'))
for x in algos.take_1d(lev._values, lab)]
for x in algos.take_1d(lev._values, level_codes)]
stringified_levels.append(formatted)

result_levels = []
Expand Down
3 changes: 1 addition & 2 deletions pandas/tests/indexes/multi/test_copy.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,9 +39,8 @@ def test_shallow_copy(idx):

def test_labels_deprecated(idx):
# GH23752
codes = idx.codes
with tm.assert_produces_warning(FutureWarning):
idx.copy(labels=codes)
idx.copy(labels=idx.codes)


def test_view(idx):
Expand Down
4 changes: 2 additions & 2 deletions pandas/tests/indexing/multiindex/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,8 @@ def multiindex_dataframe_random_data():
"""DataFrame with 2 level MultiIndex with random data"""
index = MultiIndex(levels=[['foo', 'bar', 'baz', 'qux'], ['one', 'two',
'three']],
labels=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3],
[0, 1, 2, 0, 1, 1, 2, 0, 1, 2]],
codes=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3],
[0, 1, 2, 0, 1, 1, 2, 0, 1, 2]],
names=['first', 'second'])
return DataFrame(np.random.randn(10, 3), index=index,
columns=Index(['A', 'B', 'C'], name='exp'))
Expand Down
12 changes: 6 additions & 6 deletions pandas/tests/indexing/multiindex/test_getitem.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,8 +64,8 @@ def test_getitem_duplicates_multiindex(self):

index = MultiIndex(levels=[['D', 'B', 'C'],
[0, 26, 27, 37, 57, 67, 75, 82]],
labels=[[0, 0, 0, 1, 2, 2, 2, 2, 2, 2],
[1, 3, 4, 6, 0, 2, 2, 3, 5, 7]],
codes=[[0, 0, 0, 1, 2, 2, 2, 2, 2, 2],
[1, 3, 4, 6, 0, 2, 2, 3, 5, 7]],
names=['tag', 'day'])
arr = np.random.randn(len(index), 1)
df = DataFrame(arr, index=index, columns=['val'])
Expand All @@ -87,8 +87,8 @@ def f():
# A is treated as a special Timestamp
index = MultiIndex(levels=[['A', 'B', 'C'],
[0, 26, 27, 37, 57, 67, 75, 82]],
labels=[[0, 0, 0, 1, 2, 2, 2, 2, 2, 2],
[1, 3, 4, 6, 0, 2, 2, 3, 5, 7]],
codes=[[0, 0, 0, 1, 2, 2, 2, 2, 2, 2],
[1, 3, 4, 6, 0, 2, 2, 3, 5, 7]],
names=['tag', 'day'])
df = DataFrame(arr, index=index, columns=['val'])
result = df.val['A']
Expand Down Expand Up @@ -264,8 +264,8 @@ def test_getitem_toplevel(self, multiindex_dataframe_random_data):

def test_getitem_int(self, multiindex_dataframe_random_data):
levels = [[0, 1], [0, 1, 2]]
labels = [[0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 1, 2]]
index = MultiIndex(levels=levels, labels=labels)
codes = [[0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 1, 2]]
index = MultiIndex(levels=levels, codes=codes)

frame = DataFrame(np.random.randn(6, 2), index=index)

Expand Down
6 changes: 3 additions & 3 deletions pandas/tests/indexing/multiindex/test_loc.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
def single_level_multiindex():
"""single level MultiIndex"""
return MultiIndex(levels=[['foo', 'bar', 'baz', 'qux']],
labels=[[0, 1, 2, 3]], names=['first'])
codes=[[0, 1, 2, 3]], names=['first'])


@pytest.mark.filterwarnings("ignore:\\n.ix:DeprecationWarning")
Expand Down Expand Up @@ -40,7 +40,7 @@ def test_loc_getitem_series(self):

empty = Series(data=[], dtype=np.float64)
expected = Series([], index=MultiIndex(
levels=index.levels, labels=[[], []], dtype=np.float64))
levels=index.levels, codes=[[], []], dtype=np.float64))
result = x.loc[empty]
tm.assert_series_equal(result, expected)

Expand All @@ -60,7 +60,7 @@ def test_loc_getitem_array(self):
# empty array:
empty = np.array([])
expected = Series([], index=MultiIndex(
levels=index.levels, labels=[[], []], dtype=np.float64))
levels=index.levels, codes=[[], []], dtype=np.float64))
result = x.loc[empty]
tm.assert_series_equal(result, expected)

Expand Down
12 changes: 6 additions & 6 deletions pandas/tests/indexing/multiindex/test_partial.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,9 +52,9 @@ def test_xs_partial(self, multiindex_dataframe_random_data,

# ex from #1796
index = MultiIndex(levels=[['foo', 'bar'], ['one', 'two'], [-1, 1]],
labels=[[0, 0, 0, 0, 1, 1, 1, 1],
[0, 0, 1, 1, 0, 0, 1, 1], [0, 1, 0, 1, 0, 1,
0, 1]])
codes=[[0, 0, 0, 0, 1, 1, 1, 1],
[0, 0, 1, 1, 0, 0, 1, 1], [0, 1, 0, 1, 0, 1,
0, 1]])
df = DataFrame(np.random.randn(8, 4), index=index,
columns=list('abcd'))

Expand All @@ -68,7 +68,7 @@ def test_getitem_partial(
ymd = ymd.T
result = ymd[2000, 2]

expected = ymd.reindex(columns=ymd.columns[ymd.columns.labels[1] == 1])
expected = ymd.reindex(columns=ymd.columns[ymd.columns.codes[1] == 1])
expected.columns = expected.columns.droplevel(0).droplevel(0)
tm.assert_frame_equal(result, expected)

Expand All @@ -82,12 +82,12 @@ def test_fancy_slice_partial(

ymd = multiindex_year_month_day_dataframe_random_data
result = ymd.loc[(2000, 2):(2000, 4)]
lev = ymd.index.labels[1]
lev = ymd.index.codes[1]
expected = ymd[(lev >= 1) & (lev <= 3)]
tm.assert_frame_equal(result, expected)

def test_getitem_partial_column_select(self):
idx = MultiIndex(labels=[[0, 0, 0], [0, 1, 1], [1, 0, 1]],
idx = MultiIndex(codes=[[0, 0, 0], [0, 1, 1], [1, 0, 1]],
levels=[['a', 'b'], ['x', 'y'], ['p', 'q']])
df = DataFrame(np.random.rand(3, 2), index=idx)

Expand Down
Loading

0 comments on commit 3e467a7

Please sign in to comment.