From 1c5e7796f15a0e88d58e17fda4b6591bf585b1ab Mon Sep 17 00:00:00 2001 From: Gouthaman Balaraman Date: Wed, 19 Mar 2014 19:55:41 -0700 Subject: [PATCH] A merged version of all my commits . Added to note release.rst Move shift_indexer into the shift function Removed function _shift_indexer all together. Passes tests sparse. remove unwanted comments --- doc/source/release.rst | 3 ++- pandas/core/common.py | 12 ------------ pandas/core/generic.py | 4 +--- pandas/core/internals.py | 21 ++++++++++++--------- vb_suite/frame_methods.py | 13 +++++++++++++ 5 files changed, 28 insertions(+), 25 deletions(-) diff --git a/doc/source/release.rst b/doc/source/release.rst index f09969cda60f1..c0bd8f424c2b2 100644 --- a/doc/source/release.rst +++ b/doc/source/release.rst @@ -182,7 +182,8 @@ Improvements to existing features - Support passing ``encoding`` with xlwt (:issue:`3710`) - Performance improvement when converting ``DatetimeIndex`` to floating ordinals using ``DatetimeConverter`` (:issue:`6636`) - +- Performance improvement for ``DataFrame.shift`` (:issue: `5609`) + .. _release.bug_fixes-0.14.0: Bug Fixes diff --git a/pandas/core/common.py b/pandas/core/common.py index 46ca371284ae4..dadd21f8fc128 100644 --- a/pandas/core/common.py +++ b/pandas/core/common.py @@ -2003,18 +2003,6 @@ def intersection(*seqs): return type(seqs[0])(list(result)) -def _shift_indexer(N, periods): - # small reusable utility - indexer = np.zeros(N, dtype=int) - - if periods > 0: - indexer[periods:] = np.arange(N - periods) - else: - indexer[:periods] = np.arange(-periods, N) - - return indexer - - def _asarray_tuplesafe(values, dtype=None): from pandas.core.index import Index diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 4b28e6a09184a..ba6e7a33a7515 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -3222,9 +3222,7 @@ def shift(self, periods=1, freq=None, axis=0, **kwds): return self if freq is None and not len(kwds): - block_axis = self._get_block_manager_axis(axis) - indexer = com._shift_indexer(len(self._get_axis(axis)), periods) - new_data = self._data.shift(indexer=indexer, periods=periods, axis=block_axis) + new_data = self._data.shift(periods=periods, axis=axis) else: return self.tshift(periods, freq, **kwds) diff --git a/pandas/core/internals.py b/pandas/core/internals.py index fc7b4bc23ac09..fe5ae48fea281 100644 --- a/pandas/core/internals.py +++ b/pandas/core/internals.py @@ -961,23 +961,20 @@ def diff(self, n): return [make_block(new_values, self.items, self.ref_items, ndim=self.ndim, fastpath=True)] - def shift(self, indexer, periods, axis=0): + def shift(self, periods, axis=0): """ shift the block by periods, possibly upcast """ - - new_values = self.values.take(indexer, axis=axis) # convert integer to float if necessary. need to do a lot more than # that, handle boolean etc also - new_values, fill_value = com._maybe_upcast(new_values) - + new_values, fill_value = com._maybe_upcast(self.values) + new_values = np.roll(new_values.T,periods,axis=axis) axis_indexer = [ slice(None) ] * self.ndim if periods > 0: axis_indexer[axis] = slice(None,periods) else: - axis_indexer = [ slice(None) ] * self.ndim axis_indexer[axis] = slice(periods,None) new_values[tuple(axis_indexer)] = fill_value - return [make_block(new_values, self.items, self.ref_items, + return [make_block(new_values.T, self.items, self.ref_items, ndim=self.ndim, fastpath=True)] def eval(self, func, other, raise_on_error=True, try_cast=False): @@ -1910,9 +1907,15 @@ def fillna(self, value, limit=None, inplace=False, downcast=None): values = self.values if inplace else self.values.copy() return [self.make_block(values.get_values(value), fill_value=value)] - def shift(self, indexer, periods, axis=0): + + def shift(self, periods, axis=0): """ shift the block by periods """ - + N = len(self.values.T) + indexer = np.zeros(N, dtype=int) + if periods > 0: + indexer[periods:] = np.arange(N - periods) + else: + indexer[:periods] = np.arange(-periods, N) new_values = self.values.to_dense().take(indexer) # convert integer to float if necessary. need to do a lot more than # that, handle boolean etc also diff --git a/vb_suite/frame_methods.py b/vb_suite/frame_methods.py index a70d756c82b0a..7f9063003191f 100644 --- a/vb_suite/frame_methods.py +++ b/vb_suite/frame_methods.py @@ -429,3 +429,16 @@ def test_unequal(name): setup, start_date=datetime(2014, 2, 7)) + +#------------------------------------------------------------------------- +# frame shift speedup issue-5609 + +setup = common_setup + """ +df = pd.DataFrame(np.random.rand(10000,500)) +""" +frame_shift_axis0 = Benchmark('df.shift(1,axis=0)', setup, + name = 'frame_shift_axis_0', + start_date=datetime(2014,1,1)) +frame_shift_axis1 = Benchmark('df.shift(1,axis=1)', setup, + name = 'frame_shift_axis_1', + start_date=datetime(2014,1,1)) \ No newline at end of file