From e841c531f16615884d14a051ca9f57d4d003c196 Mon Sep 17 00:00:00 2001 From: Alan Velasco Date: Sat, 15 Jul 2017 11:16:39 -0500 Subject: [PATCH 1/3] Support non unique period indexes on join and merge operations --- pandas/core/indexes/base.py | 4 ++-- pandas/tests/reshape/test_join.py | 7 +++++++ pandas/tests/reshape/test_merge.py | 7 +++++++ 3 files changed, 16 insertions(+), 2 deletions(-) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index e1053c1610175..bbbc19b36964d 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -3119,14 +3119,14 @@ def _join_multi(self, other, how, return_indexers=True): def _join_non_unique(self, other, how='left', return_indexers=False): from pandas.core.reshape.merge import _get_join_indexers - left_idx, right_idx = _get_join_indexers([self.values], + left_idx, right_idx = _get_join_indexers([self._values], [other._values], how=how, sort=True) left_idx = _ensure_platform_int(left_idx) right_idx = _ensure_platform_int(right_idx) - join_index = np.asarray(self.values.take(left_idx)) + join_index = np.asarray(self._values.take(left_idx)) mask = left_idx == -1 np.putmask(join_index, mask, other._values.take(right_idx)) diff --git a/pandas/tests/reshape/test_join.py b/pandas/tests/reshape/test_join.py index e25661fb65271..00bb3d0c3c2c5 100644 --- a/pandas/tests/reshape/test_join.py +++ b/pandas/tests/reshape/test_join.py @@ -550,6 +550,13 @@ def test_join_mixed_non_unique_index(self): index=[1, 2, 2, 'a']) tm.assert_frame_equal(result, expected) + def test_join_non_unique_period_index(self): + per_index = pd.period_range('2016-01-01', periods=16, freq='M') + per_df = DataFrame([i for i in range(len(per_index))], + index=per_index, columns=['pnum']) + df2 = concat([per_df, per_df]) + per_df.join(df2, how='outer', rsuffix='_df2') + def test_mixed_type_join_with_suffix(self): # GH #916 df = DataFrame(np.random.randn(20, 6), diff --git a/pandas/tests/reshape/test_merge.py b/pandas/tests/reshape/test_merge.py index 4ac376a9752cb..3ec73b81f1b88 100644 --- a/pandas/tests/reshape/test_merge.py +++ b/pandas/tests/reshape/test_merge.py @@ -585,6 +585,13 @@ def test_merge_on_datetime64tz(self): assert result['value_x'].dtype == 'datetime64[ns, US/Eastern]' assert result['value_y'].dtype == 'datetime64[ns, US/Eastern]' + def test_merge_non_unique_period_index(self): + per_index = pd.period_range('2016-01-01', periods=16, freq='M') + per_df = DataFrame([i for i in range(len(per_index))], + index=per_index, columns=['pnum']) + df2 = concat([per_df, per_df]) + per_df.merge(df2, left_index=True, right_index=True, how='outer') + def test_merge_on_periods(self): left = pd.DataFrame({'key': pd.period_range('20151010', periods=2, freq='D'), From 40b964a076ed1dda9c1d883bc0f02e1edad38f48 Mon Sep 17 00:00:00 2001 From: Alan Velasco Date: Sat, 15 Jul 2017 12:12:49 -0500 Subject: [PATCH 2/3] Add frame assertion on tests and release notes --- doc/source/whatsnew/v0.21.0.txt | 2 +- pandas/tests/reshape/test_join.py | 15 ++++++++++----- pandas/tests/reshape/test_merge.py | 15 ++++++++++----- 3 files changed, 21 insertions(+), 11 deletions(-) diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index 6ddf6029b99bb..34ff73082627a 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -185,7 +185,7 @@ Sparse Reshaping ^^^^^^^^^ - +- Joining/Merging with a non unique ``PeriodIndex`` raised a TypeError (:issue:`16871`) Numeric diff --git a/pandas/tests/reshape/test_join.py b/pandas/tests/reshape/test_join.py index 00bb3d0c3c2c5..ebe49bdc7f398 100644 --- a/pandas/tests/reshape/test_join.py +++ b/pandas/tests/reshape/test_join.py @@ -551,11 +551,16 @@ def test_join_mixed_non_unique_index(self): tm.assert_frame_equal(result, expected) def test_join_non_unique_period_index(self): - per_index = pd.period_range('2016-01-01', periods=16, freq='M') - per_df = DataFrame([i for i in range(len(per_index))], - index=per_index, columns=['pnum']) - df2 = concat([per_df, per_df]) - per_df.join(df2, how='outer', rsuffix='_df2') + # GH #16871 + index = pd.period_range('2016-01-01', periods=16, freq='M') + df = DataFrame([i for i in range(len(index))], + index=index, columns=['pnum']) + df2 = concat([df, df]) + result = df.join(df2, how='inner', rsuffix='_df2') + expected = DataFrame(np.tile(np.arange(16).repeat(2).reshape(-1, 1), 2), + columns=['pnum', 'pnum_df2'], + index=df2.sort_index().index) + tm.assert_frame_equal(result, expected) def test_mixed_type_join_with_suffix(self): # GH #916 diff --git a/pandas/tests/reshape/test_merge.py b/pandas/tests/reshape/test_merge.py index 3ec73b81f1b88..7e90ae6f1ed9e 100644 --- a/pandas/tests/reshape/test_merge.py +++ b/pandas/tests/reshape/test_merge.py @@ -586,11 +586,16 @@ def test_merge_on_datetime64tz(self): assert result['value_y'].dtype == 'datetime64[ns, US/Eastern]' def test_merge_non_unique_period_index(self): - per_index = pd.period_range('2016-01-01', periods=16, freq='M') - per_df = DataFrame([i for i in range(len(per_index))], - index=per_index, columns=['pnum']) - df2 = concat([per_df, per_df]) - per_df.merge(df2, left_index=True, right_index=True, how='outer') + # GH #16871 + index = pd.period_range('2016-01-01', periods=16, freq='M') + df = DataFrame([i for i in range(len(index))], + index=index, columns=['pnum']) + df2 = concat([df, df]) + result = df.merge(df2, left_index=True, right_index=True, how='inner') + expected = DataFrame(np.tile(np.arange(16).repeat(2).reshape(-1, 1), 2), + columns=['pnum_x', 'pnum_y'], + index=df2.sort_index().index) + tm.assert_frame_equal(result, expected) def test_merge_on_periods(self): left = pd.DataFrame({'key': pd.period_range('20151010', periods=2, From 1560d75f769a296db090a9e25a3c9224b65f8b9f Mon Sep 17 00:00:00 2001 From: Alan Velasco Date: Sat, 15 Jul 2017 13:15:58 -0500 Subject: [PATCH 3/3] Explicitly use dtype int64 on arange --- pandas/tests/reshape/test_join.py | 6 +++--- pandas/tests/reshape/test_merge.py | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/pandas/tests/reshape/test_join.py b/pandas/tests/reshape/test_join.py index ebe49bdc7f398..e4894307918c6 100644 --- a/pandas/tests/reshape/test_join.py +++ b/pandas/tests/reshape/test_join.py @@ -557,9 +557,9 @@ def test_join_non_unique_period_index(self): index=index, columns=['pnum']) df2 = concat([df, df]) result = df.join(df2, how='inner', rsuffix='_df2') - expected = DataFrame(np.tile(np.arange(16).repeat(2).reshape(-1, 1), 2), - columns=['pnum', 'pnum_df2'], - index=df2.sort_index().index) + expected = DataFrame( + np.tile(np.arange(16, dtype=np.int64).repeat(2).reshape(-1, 1), 2), + columns=['pnum', 'pnum_df2'], index=df2.sort_index().index) tm.assert_frame_equal(result, expected) def test_mixed_type_join_with_suffix(self): diff --git a/pandas/tests/reshape/test_merge.py b/pandas/tests/reshape/test_merge.py index 7e90ae6f1ed9e..919675188576e 100644 --- a/pandas/tests/reshape/test_merge.py +++ b/pandas/tests/reshape/test_merge.py @@ -592,9 +592,9 @@ def test_merge_non_unique_period_index(self): index=index, columns=['pnum']) df2 = concat([df, df]) result = df.merge(df2, left_index=True, right_index=True, how='inner') - expected = DataFrame(np.tile(np.arange(16).repeat(2).reshape(-1, 1), 2), - columns=['pnum_x', 'pnum_y'], - index=df2.sort_index().index) + expected = DataFrame( + np.tile(np.arange(16, dtype=np.int64).repeat(2).reshape(-1, 1), 2), + columns=['pnum_x', 'pnum_y'], index=df2.sort_index().index) tm.assert_frame_equal(result, expected) def test_merge_on_periods(self):