From a58e8344624f66c2ee5a058e14ec8bddc7725648 Mon Sep 17 00:00:00 2001 From: terrytangyuan Date: Sat, 29 Aug 2015 12:29:59 -0400 Subject: [PATCH 1/2] BUG: Fixed bug in DataFrame.diff for issue 10907 when DataFrame is not consolidated (+1 squashed commit) Squashed commits: [6fe71d3] moved changes to correct place and fixed test_diff (+1 squashed commit) Squashed commits: [2bf3c2b] moved change to where diff is defined and updated test (+1 squashed commit) Squashed commits: [6715d7f] added unit test to test this fix (+1 squashed commit) Squashed commits: [f06fa5e] fixed bug in DataFrame.diff --- pandas/core/internals.py | 5 ++++- pandas/tests/test_frame.py | 6 ++++++ 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/pandas/core/internals.py b/pandas/core/internals.py index 1d6269ae904d2..5f538bf5e1e00 100644 --- a/pandas/core/internals.py +++ b/pandas/core/internals.py @@ -2414,7 +2414,7 @@ def _verify_integrity(self): 'tot_items: {1}'.format(len(self.items), tot_items)) - def apply(self, f, axes=None, filter=None, do_integrity_check=False, **kwargs): + def apply(self, f, axes=None, filter=None, do_integrity_check=False, consolidate=True, **kwargs): """ iterate over the blocks, collect and create a new block manager @@ -2443,6 +2443,9 @@ def apply(self, f, axes=None, filter=None, do_integrity_check=False, **kwargs): else: kwargs['filter'] = filter_locs + if consolidate: + self._consolidate_inplace() + if f == 'where': align_copy = True if kwargs.get('align', True): diff --git a/pandas/tests/test_frame.py b/pandas/tests/test_frame.py index 58c6d15f8ada5..22f838df3da05 100644 --- a/pandas/tests/test_frame.py +++ b/pandas/tests/test_frame.py @@ -10771,6 +10771,12 @@ def test_diff(self): assert_series_equal(the_diff['A'], tf['A'] - tf['A'].shift(1)) + df = pd.DataFrame({'x': pd.Series([1]),'y': pd.Series([2]), 'z': pd.Series([3])}) + result = df.diff(axis=1).astype(float) + expected = pd.DataFrame({'x':np.nan, 'y':pd.Series(1), 'z':pd.Series(1)}).astype(float) + self.assert_frame_equal(result, expected) + + def test_diff_timedelta(self): # GH 4533 df = DataFrame(dict(time=[Timestamp('20130101 9:01'), From ad7f9d34a86e799a5c0ec82d4e9edd6ac018618d Mon Sep 17 00:00:00 2001 From: terrytangyuan Date: Sun, 30 Aug 2015 09:53:14 -0400 Subject: [PATCH 2/2] DOC: Added a note in whatsnew and doc-string for fixing issue 10907 (+1 squashed commit) Squashed commits: [810cbda] DOC: Added a note in whatsnew and doc-string for fixing issue 10907 (+1 squashed commit) Squashed commits: [f9220a2] DOC: Added a note in whatsnew and doc-string for fixing issue 10907 (+1 squashed commit) Squashed commits: [0f1836f] added consolidate as param in doc-string --- doc/source/whatsnew/v0.17.0.txt | 1 + pandas/core/internals.py | 1 + pandas/tests/test_frame.py | 8 +++++--- 3 files changed, 7 insertions(+), 3 deletions(-) diff --git a/doc/source/whatsnew/v0.17.0.txt b/doc/source/whatsnew/v0.17.0.txt index 3e81a923a114c..70a33a6915516 100644 --- a/doc/source/whatsnew/v0.17.0.txt +++ b/doc/source/whatsnew/v0.17.0.txt @@ -811,6 +811,7 @@ Bug Fixes - Bug in ``read_csv`` when using the ``nrows`` or ``chunksize`` parameters if file contains only a header line (:issue:`9535`) - Bug in serialization of ``category`` types in HDF5 in presence of alternate encodings. (:issue:`10366`) - Bug in ``pd.DataFrame`` when constructing an empty DataFrame with a string dtype (:issue:`9428`) +- Bug in ``pd.DataFrame.diff`` when DataFrame is not consolidated (:issue:`10907`) - Bug in ``pd.unique`` for arrays with the ``datetime64`` or ``timedelta64`` dtype that meant an array with object dtype was returned instead the original dtype (:issue:`9431`) - Bug in ``DatetimeIndex.take`` and ``TimedeltaIndex.take`` may not raise ``IndexError`` against invalid index (:issue:`10295`) - Bug in ``Series([np.nan]).astype('M8[ms]')``, which now returns ``Series([pd.NaT])`` (:issue:`10747`) diff --git a/pandas/core/internals.py b/pandas/core/internals.py index 5f538bf5e1e00..15069bf23672b 100644 --- a/pandas/core/internals.py +++ b/pandas/core/internals.py @@ -2425,6 +2425,7 @@ def apply(self, f, axes=None, filter=None, do_integrity_check=False, consolidate filter : list, if supplied, only call the block if the filter is in the block do_integrity_check : boolean, default False. Do the block manager integrity check + consolidate: boolean, default True. Join together blocks having same dtype Returns ------- diff --git a/pandas/tests/test_frame.py b/pandas/tests/test_frame.py index 22f838df3da05..57a43592b3866 100644 --- a/pandas/tests/test_frame.py +++ b/pandas/tests/test_frame.py @@ -10771,9 +10771,11 @@ def test_diff(self): assert_series_equal(the_diff['A'], tf['A'] - tf['A'].shift(1)) - df = pd.DataFrame({'x': pd.Series([1]),'y': pd.Series([2]), 'z': pd.Series([3])}) - result = df.diff(axis=1).astype(float) - expected = pd.DataFrame({'x':np.nan, 'y':pd.Series(1), 'z':pd.Series(1)}).astype(float) + # issue 10907 + df = pd.DataFrame({'y': pd.Series([2]), 'z': pd.Series([3])}) + df.insert(0, 'x', 1) + result = df.diff(axis=1) + expected = pd.DataFrame({'x':np.nan, 'y':pd.Series(1), 'z':pd.Series(1)}).astype('float64') self.assert_frame_equal(result, expected)