From 0d3928683ca0895f945f68d2c8257dd8f350aee0 Mon Sep 17 00:00:00 2001 From: Aaron Critchley Date: Tue, 13 Feb 2018 00:39:46 +0000 Subject: [PATCH 01/14] Adding numpy nansun/nanmean, etc etc to _cython_table --- pandas/core/base.py | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/pandas/core/base.py b/pandas/core/base.py index 3d8f5f265e3db..ab411bc78c618 100644 --- a/pandas/core/base.py +++ b/pandas/core/base.py @@ -187,15 +187,25 @@ class SelectionMixin(object): builtins.max: 'max', builtins.min: 'min', np.sum: 'sum', + np.nansum: 'sum', np.mean: 'mean', + np.nanmean: 'mean', np.prod: 'prod', + np.nanprod: 'prod', np.std: 'std', + np.nanstd: 'std', np.var: 'var', + np.nanvar: 'var', np.median: 'median', + np.nanmedian: 'median', np.max: 'max', + np.nanmax: 'max', np.min: 'min', + np.nanmin: 'min', np.cumprod: 'cumprod', - np.cumsum: 'cumsum' + np.nancumprod: 'cumprod', + np.cumsum: 'cumsum', + np.nancumsum: 'cumsum' } @property From 47936c7761ee33d9c637ad7ad85a596eec85e179 Mon Sep 17 00:00:00 2001 From: Aaron Critchley Date: Tue, 13 Feb 2018 22:48:00 +0000 Subject: [PATCH 02/14] Adding in tests, implementing suggested whatsnew entry --- doc/source/whatsnew/v0.23.0.txt | 2 +- pandas/tests/test_nanops.py | 49 +++++++++++++++++++++++++++++++++ 2 files changed, 50 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v0.23.0.txt b/doc/source/whatsnew/v0.23.0.txt index 72f63a4da0f4d..bc975a5dfbf8b 100644 --- a/doc/source/whatsnew/v0.23.0.txt +++ b/doc/source/whatsnew/v0.23.0.txt @@ -747,7 +747,7 @@ Numeric - Bug in the :class:`DataFrame` constructor in which data containing very large positive or very large negative numbers was causing ``OverflowError`` (:issue:`18584`) - Bug in :class:`Index` constructor with ``dtype='uint64'`` where int-like floats were not coerced to :class:`UInt64Index` (:issue:`18400`) - Bug in :class:`DataFrame` flex arithmetic (e.g. `df.add(other, fill_value=foo)`) with a `fill_value` other than ``None`` failed to raise ``NotImplementedError`` in corner cases where either the frame or ``other`` has length zero (:issue:`19522`) - +- :meth:`~DataFrame.agg` now correctly handles numpy NaN-aware methods like :meth:`numpy.nansum` (:issue:`19629`) Indexing ^^^^^^^^ diff --git a/pandas/tests/test_nanops.py b/pandas/tests/test_nanops.py index df3c49a73d227..2985ff2948493 100644 --- a/pandas/tests/test_nanops.py +++ b/pandas/tests/test_nanops.py @@ -1004,6 +1004,55 @@ def prng(self): return np.random.RandomState(1234) +class TestNumpyNaNFunctions(object): + + # xref GH 19629 + + def setup_method(self, method): + self.test_series = pd.Series([1, 2, 3, 4, 5, 6]) + self.test_df = pd.DataFrame([[1, 2, 3], [4, 5, 6]]) + + def test_np_sum(self): + tm.assert_almost_equal(self.test_series.agg(np.sum), self.test_series.agg(np.nansum), check_exact=True) + tm.assert_almost_equal(self.test_df.agg(np.sum), self.test_df.agg(np.nansum), check_exact=True) + + def test_np_mean(self): + tm.assert_almost_equal(self.test_series.agg(np.mean), self.test_series.agg(np.nanmean), check_exact=True) + tm.assert_almost_equal(self.test_df.agg(np.mean), self.test_df.agg(np.nanmean), check_exact=True) + + def test_np_prod(self): + tm.assert_almost_equal(self.test_series.agg(np.prod), self.test_series.agg(np.nanprod), check_exact=True) + tm.assert_almost_equal(self.test_df.agg(np.prod), self.test_df.agg(np.nanprod), check_exact=True) + + def test_np_std(self): + tm.assert_almost_equal(self.test_series.agg(np.std), self.test_series.agg(np.nanstd), check_exact=True) + tm.assert_almost_equal(self.test_df.agg(np.std), self.test_df.agg(np.nanstd), check_exact=True) + + def test_np_var(self): + tm.assert_almost_equal(self.test_series.agg(np.var), self.test_series.agg(np.nanvar), check_exact=True) + tm.assert_almost_equal(self.test_df.agg(np.var), self.test_df.agg(np.nanvar), check_exact=True) + + def test_np_median(self): + tm.assert_almost_equal(self.test_series.agg(np.median), self.test_series.agg(np.nanmedian), check_exact=True) + tm.assert_almost_equal(self.test_df.agg(np.median), self.test_df.agg(np.nanmedian), check_exact=True) + + def test_np_max(self): + tm.assert_almost_equal(self.test_series.agg(np.max), self.test_series.agg(np.nanmax), check_exact=True) + tm.assert_almost_equal(self.test_df.agg(np.max), self.test_df.agg(np.nanmax), check_exact=True) + + def test_np_min(self): + tm.assert_almost_equal(self.test_series.agg(np.min), self.test_series.agg(np.nanmin), check_exact=True) + tm.assert_almost_equal(self.test_df.agg(np.min), self.test_df.agg(np.nanmin), check_exact=True) + + def test_np_cumprod(self): + tm.assert_almost_equal(self.test_series.agg(np.cumprod), self.test_series.agg(np.nancumprod), check_exact=True) + tm.assert_almost_equal(self.test_df.agg(np.cumprod), self.test_df.agg(np.nancumprod), check_exact=True) + + def test_np_cumsum(self): + tm.assert_almost_equal(self.test_series.agg(np.cumsum), self.test_series.agg(np.nancumsum), check_exact=True) + tm.assert_almost_equal(self.test_df.agg(np.cumsum), self.test_df.agg(np.nancumsum), check_exact=True) + + def test_use_bottleneck(): if nanops._BOTTLENECK_INSTALLED: From d2671e6468dbbb2fd7b1b55e4f9c79f8f3edd3c5 Mon Sep 17 00:00:00 2001 From: Aaron Critchley Date: Tue, 13 Feb 2018 22:51:29 +0000 Subject: [PATCH 03/14] Fixing flake8 errors --- pandas/tests/test_nanops.py | 80 +++++++++++++++++++++++++++---------- 1 file changed, 60 insertions(+), 20 deletions(-) diff --git a/pandas/tests/test_nanops.py b/pandas/tests/test_nanops.py index 2985ff2948493..ff12c65f326de 100644 --- a/pandas/tests/test_nanops.py +++ b/pandas/tests/test_nanops.py @@ -1013,44 +1013,84 @@ def setup_method(self, method): self.test_df = pd.DataFrame([[1, 2, 3], [4, 5, 6]]) def test_np_sum(self): - tm.assert_almost_equal(self.test_series.agg(np.sum), self.test_series.agg(np.nansum), check_exact=True) - tm.assert_almost_equal(self.test_df.agg(np.sum), self.test_df.agg(np.nansum), check_exact=True) + tm.assert_almost_equal(self.test_series.agg(np.sum), + self.test_series.agg(np.nansum), + check_exact=True) + tm.assert_almost_equal(self.test_df.agg(np.sum), + self.test_df.agg(np.nansum), + check_exact=True) def test_np_mean(self): - tm.assert_almost_equal(self.test_series.agg(np.mean), self.test_series.agg(np.nanmean), check_exact=True) - tm.assert_almost_equal(self.test_df.agg(np.mean), self.test_df.agg(np.nanmean), check_exact=True) + tm.assert_almost_equal(self.test_series.agg(np.mean), + self.test_series.agg(np.nanmean), + check_exact=True) + tm.assert_almost_equal(self.test_df.agg(np.mean), + self.test_df.agg(np.nanmean), + check_exact=True) def test_np_prod(self): - tm.assert_almost_equal(self.test_series.agg(np.prod), self.test_series.agg(np.nanprod), check_exact=True) - tm.assert_almost_equal(self.test_df.agg(np.prod), self.test_df.agg(np.nanprod), check_exact=True) + tm.assert_almost_equal(self.test_series.agg(np.prod), + self.test_series.agg(np.nanprod), + check_exact=True) + tm.assert_almost_equal(self.test_df.agg(np.prod), + self.test_df.agg(np.nanprod), + check_exact=True) def test_np_std(self): - tm.assert_almost_equal(self.test_series.agg(np.std), self.test_series.agg(np.nanstd), check_exact=True) - tm.assert_almost_equal(self.test_df.agg(np.std), self.test_df.agg(np.nanstd), check_exact=True) + tm.assert_almost_equal(self.test_series.agg(np.std), + self.test_series.agg(np.nanstd), + check_exact=True) + tm.assert_almost_equal(self.test_df.agg(np.std), + self.test_df.agg(np.nanstd), + check_exact=True) def test_np_var(self): - tm.assert_almost_equal(self.test_series.agg(np.var), self.test_series.agg(np.nanvar), check_exact=True) - tm.assert_almost_equal(self.test_df.agg(np.var), self.test_df.agg(np.nanvar), check_exact=True) + tm.assert_almost_equal(self.test_series.agg(np.var), + self.test_series.agg(np.nanvar), + check_exact=True) + tm.assert_almost_equal(self.test_df.agg(np.var), + self.test_df.agg(np.nanvar), + check_exact=True) def test_np_median(self): - tm.assert_almost_equal(self.test_series.agg(np.median), self.test_series.agg(np.nanmedian), check_exact=True) - tm.assert_almost_equal(self.test_df.agg(np.median), self.test_df.agg(np.nanmedian), check_exact=True) + tm.assert_almost_equal(self.test_series.agg(np.median), + self.test_series.agg(np.nanmedian), + check_exact=True) + tm.assert_almost_equal(self.test_df.agg(np.median), + self.test_df.agg(np.nanmedian), + check_exact=True) def test_np_max(self): - tm.assert_almost_equal(self.test_series.agg(np.max), self.test_series.agg(np.nanmax), check_exact=True) - tm.assert_almost_equal(self.test_df.agg(np.max), self.test_df.agg(np.nanmax), check_exact=True) + tm.assert_almost_equal(self.test_series.agg(np.max), + self.test_series.agg(np.nanmax), + check_exact=True) + tm.assert_almost_equal(self.test_df.agg(np.max), + self.test_df.agg(np.nanmax), + check_exact=True) def test_np_min(self): - tm.assert_almost_equal(self.test_series.agg(np.min), self.test_series.agg(np.nanmin), check_exact=True) - tm.assert_almost_equal(self.test_df.agg(np.min), self.test_df.agg(np.nanmin), check_exact=True) + tm.assert_almost_equal(self.test_series.agg(np.min), + self.test_series.agg(np.nanmin), + check_exact=True) + tm.assert_almost_equal(self.test_df.agg(np.min), + self.test_df.agg(np.nanmin), + check_exact=True) def test_np_cumprod(self): - tm.assert_almost_equal(self.test_series.agg(np.cumprod), self.test_series.agg(np.nancumprod), check_exact=True) - tm.assert_almost_equal(self.test_df.agg(np.cumprod), self.test_df.agg(np.nancumprod), check_exact=True) + tm.assert_almost_equal(self.test_series.agg(np.cumprod), + self.test_series.agg(np.nancumprod), + check_exact=True) + tm.assert_almost_equal(self.test_df.agg(np.cumprod), + self.test_df.agg(np.nancumprod), + check_exact=True) def test_np_cumsum(self): - tm.assert_almost_equal(self.test_series.agg(np.cumsum), self.test_series.agg(np.nancumsum), check_exact=True) - tm.assert_almost_equal(self.test_df.agg(np.cumsum), self.test_df.agg(np.nancumsum), check_exact=True) + tm.assert_almost_equal(self.test_series.agg(np.cumsum), + self.test_series.agg(np.nancumsum), + check_exact=True) + tm.assert_almost_equal(self.test_df.agg(np.cumsum), + self.test_df.agg(np.nancumsum), + check_exact=True) def test_use_bottleneck(): From 29ccb188132e265a485c7b53a77d299ca0c765ba Mon Sep 17 00:00:00 2001 From: Aaron Critchley Date: Fri, 2 Mar 2018 00:50:35 +0000 Subject: [PATCH 04/14] PR comments, support for np.nanprod --- pandas/core/base.py | 7 ++- pandas/tests/test_nanops.py | 108 ++++++++---------------------------- 2 files changed, 30 insertions(+), 85 deletions(-) diff --git a/pandas/core/base.py b/pandas/core/base.py index ab411bc78c618..af06528f92484 100644 --- a/pandas/core/base.py +++ b/pandas/core/base.py @@ -191,7 +191,6 @@ class SelectionMixin(object): np.mean: 'mean', np.nanmean: 'mean', np.prod: 'prod', - np.nanprod: 'prod', np.std: 'std', np.nanstd: 'std', np.var: 'var', @@ -208,6 +207,12 @@ class SelectionMixin(object): np.nancumsum: 'cumsum' } + # np.nanprod was added in np version 1.10.0, we currently support >= 1.9 + try: + _cython_table[np.nanprod] = 'prod' + except AttributeError: + pass + @property def _selection_name(self): """ diff --git a/pandas/tests/test_nanops.py b/pandas/tests/test_nanops.py index ff12c65f326de..549135b0554f4 100644 --- a/pandas/tests/test_nanops.py +++ b/pandas/tests/test_nanops.py @@ -1007,90 +1007,30 @@ def prng(self): class TestNumpyNaNFunctions(object): # xref GH 19629 - - def setup_method(self, method): - self.test_series = pd.Series([1, 2, 3, 4, 5, 6]) - self.test_df = pd.DataFrame([[1, 2, 3], [4, 5, 6]]) - - def test_np_sum(self): - tm.assert_almost_equal(self.test_series.agg(np.sum), - self.test_series.agg(np.nansum), - check_exact=True) - tm.assert_almost_equal(self.test_df.agg(np.sum), - self.test_df.agg(np.nansum), - check_exact=True) - - def test_np_mean(self): - tm.assert_almost_equal(self.test_series.agg(np.mean), - self.test_series.agg(np.nanmean), - check_exact=True) - tm.assert_almost_equal(self.test_df.agg(np.mean), - self.test_df.agg(np.nanmean), - check_exact=True) - - def test_np_prod(self): - tm.assert_almost_equal(self.test_series.agg(np.prod), - self.test_series.agg(np.nanprod), - check_exact=True) - tm.assert_almost_equal(self.test_df.agg(np.prod), - self.test_df.agg(np.nanprod), - check_exact=True) - - def test_np_std(self): - tm.assert_almost_equal(self.test_series.agg(np.std), - self.test_series.agg(np.nanstd), - check_exact=True) - tm.assert_almost_equal(self.test_df.agg(np.std), - self.test_df.agg(np.nanstd), - check_exact=True) - - def test_np_var(self): - tm.assert_almost_equal(self.test_series.agg(np.var), - self.test_series.agg(np.nanvar), - check_exact=True) - tm.assert_almost_equal(self.test_df.agg(np.var), - self.test_df.agg(np.nanvar), - check_exact=True) - - def test_np_median(self): - tm.assert_almost_equal(self.test_series.agg(np.median), - self.test_series.agg(np.nanmedian), - check_exact=True) - tm.assert_almost_equal(self.test_df.agg(np.median), - self.test_df.agg(np.nanmedian), - check_exact=True) - - def test_np_max(self): - tm.assert_almost_equal(self.test_series.agg(np.max), - self.test_series.agg(np.nanmax), - check_exact=True) - tm.assert_almost_equal(self.test_df.agg(np.max), - self.test_df.agg(np.nanmax), - check_exact=True) - - def test_np_min(self): - tm.assert_almost_equal(self.test_series.agg(np.min), - self.test_series.agg(np.nanmin), - check_exact=True) - tm.assert_almost_equal(self.test_df.agg(np.min), - self.test_df.agg(np.nanmin), - check_exact=True) - - def test_np_cumprod(self): - tm.assert_almost_equal(self.test_series.agg(np.cumprod), - self.test_series.agg(np.nancumprod), - check_exact=True) - tm.assert_almost_equal(self.test_df.agg(np.cumprod), - self.test_df.agg(np.nancumprod), - check_exact=True) - - def test_np_cumsum(self): - tm.assert_almost_equal(self.test_series.agg(np.cumsum), - self.test_series.agg(np.nancumsum), - check_exact=True) - tm.assert_almost_equal(self.test_df.agg(np.cumsum), - self.test_df.agg(np.nancumsum), - check_exact=True) + methods_to_test = [ + (np.sum, np.nansum), + (np.mean, np.nanmean), + (np.prod, np.nanprod), + (np.std, np.nanstd), + (np.var, np.nanvar), + (np.median, np.nanmedian), + (np.max, np.nanmax), + (np.min, np.nanmin), + (np.cumprod, np.nancumprod), + (np.cumsum, np.nancumsum) + ] + + def test_np_nan_functions(self): + test_series = pd.Series([1, 2, 3, 4, 5, 6]) + test_df = pd.DataFrame([[1, 2, 3], [4, 5, 6]]) + + for standard, nan_method in self.methods_to_test: + tm.assert_almost_equal(test_series.agg(standard), + test_series.agg(nan_method), + check_exact=True) + tm.assert_almost_equal(test_df.agg(standard), + test_df.agg(nan_method), + check_exact=True) def test_use_bottleneck(): From 130f76771a0803fcd90d8cf4c6b74bdc5c6527fb Mon Sep 17 00:00:00 2001 From: Aaron Critchley Date: Fri, 2 Mar 2018 01:19:47 +0000 Subject: [PATCH 05/14] skipping if nanprod not implemented --- pandas/tests/test_nanops.py | 26 ++++++++++++++++---------- 1 file changed, 16 insertions(+), 10 deletions(-) diff --git a/pandas/tests/test_nanops.py b/pandas/tests/test_nanops.py index 5448e775dc644..b472ba62283ef 100644 --- a/pandas/tests/test_nanops.py +++ b/pandas/tests/test_nanops.py @@ -1011,7 +1011,6 @@ class TestNumpyNaNFunctions(object): methods_to_test = [ (np.sum, np.nansum), (np.mean, np.nanmean), - (np.prod, np.nanprod), (np.std, np.nanstd), (np.var, np.nanvar), (np.median, np.nanmedian), @@ -1021,17 +1020,24 @@ class TestNumpyNaNFunctions(object): (np.cumsum, np.nancumsum) ] - def test_np_nan_functions(self): - test_series = pd.Series([1, 2, 3, 4, 5, 6]) - test_df = pd.DataFrame([[1, 2, 3], [4, 5, 6]]) + def setup_method(self, method): + self.test_series = pd.Series([1, 2, 3, 4, 5, 6]) + self.test_df = pd.DataFrame([[1, 2, 3], [4, 5, 6]]) + + def compare_method_output(self, data, method, nan_method): + tm.assert_almost_equal(data.agg(method), + data.agg(nan_method), + check_exact=True) + def test_np_nan_functions(self): for standard, nan_method in self.methods_to_test: - tm.assert_almost_equal(test_series.agg(standard), - test_series.agg(nan_method), - check_exact=True) - tm.assert_almost_equal(test_df.agg(standard), - test_df.agg(nan_method), - check_exact=True) + self.compare_method_output(self.test_series, standard, nan_method) + self.compare_method_output(self.test_df, standard, nan_method) + + @td.skip_if_no("numpy", min_version="1.10.0") + def test_np_nanprod(self): + self.compare_method_output(self.test_series, np.prod, np.nanprod) + self.compare_method_output(self.test_df, np.prod, np.nanprod) def test_use_bottleneck(): From 0e4657a7bcac3941e4e906a0e400ebe157118b56 Mon Sep 17 00:00:00 2001 From: Aaron Critchley Date: Fri, 2 Mar 2018 01:22:57 +0000 Subject: [PATCH 06/14] Checking np version explicitly --- pandas/core/base.py | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/pandas/core/base.py b/pandas/core/base.py index 03574b6aaffa7..56efe88afd600 100644 --- a/pandas/core/base.py +++ b/pandas/core/base.py @@ -21,7 +21,7 @@ from pandas.core import common as com, algorithms import pandas.core.nanops as nanops import pandas._libs.lib as lib -from pandas.compat.numpy import function as nv +from pandas.compat.numpy import function as nv, _np_version_under1p10 from pandas.compat import PYPY from pandas.util._decorators import (Appender, cache_readonly, deprecate_kwarg, Substitution) @@ -208,11 +208,8 @@ class SelectionMixin(object): np.nancumsum: 'cumsum' } - # np.nanprod was added in np version 1.10.0, we currently support >= 1.9 - try: + if not _np_version_under1p10: _cython_table[np.nanprod] = 'prod' - except AttributeError: - pass @property def _selection_name(self): From 64c0d939fce2e2875ba1d26ae65d5ad2ba1d1e51 Mon Sep 17 00:00:00 2001 From: Aaron Critchley Date: Fri, 2 Mar 2018 01:29:01 +0000 Subject: [PATCH 07/14] Using pytest params --- pandas/tests/test_nanops.py | 30 ++++++++++++++---------------- 1 file changed, 14 insertions(+), 16 deletions(-) diff --git a/pandas/tests/test_nanops.py b/pandas/tests/test_nanops.py index b472ba62283ef..3129729d2b9d4 100644 --- a/pandas/tests/test_nanops.py +++ b/pandas/tests/test_nanops.py @@ -1008,18 +1008,6 @@ def prng(self): class TestNumpyNaNFunctions(object): # xref GH 19629 - methods_to_test = [ - (np.sum, np.nansum), - (np.mean, np.nanmean), - (np.std, np.nanstd), - (np.var, np.nanvar), - (np.median, np.nanmedian), - (np.max, np.nanmax), - (np.min, np.nanmin), - (np.cumprod, np.nancumprod), - (np.cumsum, np.nancumsum) - ] - def setup_method(self, method): self.test_series = pd.Series([1, 2, 3, 4, 5, 6]) self.test_df = pd.DataFrame([[1, 2, 3], [4, 5, 6]]) @@ -1029,10 +1017,20 @@ def compare_method_output(self, data, method, nan_method): data.agg(nan_method), check_exact=True) - def test_np_nan_functions(self): - for standard, nan_method in self.methods_to_test: - self.compare_method_output(self.test_series, standard, nan_method) - self.compare_method_output(self.test_df, standard, nan_method) + @pytest.mark.parametrize("standard, nan_method", [ + (np.sum, np.nansum), + (np.mean, np.nanmean), + (np.std, np.nanstd), + (np.var, np.nanvar), + (np.median, np.nanmedian), + (np.max, np.nanmax), + (np.min, np.nanmin), + (np.cumprod, np.nancumprod), + (np.cumsum, np.nancumsum) + ]) + def test_np_nan_functions(self, standard, nan_method): + self.compare_method_output(self.test_series, standard, nan_method) + self.compare_method_output(self.test_df, standard, nan_method) @td.skip_if_no("numpy", min_version="1.10.0") def test_np_nanprod(self): From fdaeaf98e133841b15c3f95e62b07ada6dafb792 Mon Sep 17 00:00:00 2001 From: Aaron Critchley Date: Fri, 9 Mar 2018 20:46:43 +0100 Subject: [PATCH 08/14] Updating for np 1.12 --- pandas/core/base.py | 9 ++++++--- pandas/tests/test_nanops.py | 13 ++++++++++--- 2 files changed, 16 insertions(+), 6 deletions(-) diff --git a/pandas/core/base.py b/pandas/core/base.py index 56efe88afd600..51bf6a6ee9c3b 100644 --- a/pandas/core/base.py +++ b/pandas/core/base.py @@ -21,7 +21,8 @@ from pandas.core import common as com, algorithms import pandas.core.nanops as nanops import pandas._libs.lib as lib -from pandas.compat.numpy import function as nv, _np_version_under1p10 +from pandas.compat.numpy import (function as nv, _np_version_under1p10, + _np_version_under1p12) from pandas.compat import PYPY from pandas.util._decorators import (Appender, cache_readonly, deprecate_kwarg, Substitution) @@ -203,14 +204,16 @@ class SelectionMixin(object): np.min: 'min', np.nanmin: 'min', np.cumprod: 'cumprod', - np.nancumprod: 'cumprod', np.cumsum: 'cumsum', - np.nancumsum: 'cumsum' } if not _np_version_under1p10: _cython_table[np.nanprod] = 'prod' + if not _np_version_under1p12: + _cython_table[np.nancumprod] = 'cumprod' + _cython_table[np.nancumsum] = 'cumsum' + @property def _selection_name(self): """ diff --git a/pandas/tests/test_nanops.py b/pandas/tests/test_nanops.py index 3129729d2b9d4..a6ab027c19c1a 100644 --- a/pandas/tests/test_nanops.py +++ b/pandas/tests/test_nanops.py @@ -1024,9 +1024,7 @@ def compare_method_output(self, data, method, nan_method): (np.var, np.nanvar), (np.median, np.nanmedian), (np.max, np.nanmax), - (np.min, np.nanmin), - (np.cumprod, np.nancumprod), - (np.cumsum, np.nancumsum) + (np.min, np.nanmin) ]) def test_np_nan_functions(self, standard, nan_method): self.compare_method_output(self.test_series, standard, nan_method) @@ -1037,6 +1035,15 @@ def test_np_nanprod(self): self.compare_method_output(self.test_series, np.prod, np.nanprod) self.compare_method_output(self.test_df, np.prod, np.nanprod) + @td.skip_if_no("numpy", min_version="1.12.0") + @pytest.mark.parametrize("standard, nan_method", [ + (np.cumprod, np.nancumprod), + (np.cumsum, np.nancumsum) + ]) + def test_np_nancumprod(self, standard, nan_method): + self.compare_method_output(self.test_series, standard, nan_method) + self.compare_method_output(self.test_df, standard, nan_method) + def test_use_bottleneck(): From 0c5a2ae58881715467ec58ee7049010cba241484 Mon Sep 17 00:00:00 2001 From: Aaron Critchley Date: Fri, 9 Mar 2018 20:49:27 +0100 Subject: [PATCH 09/14] Fixing bad indentation --- pandas/core/base.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/base.py b/pandas/core/base.py index 51bf6a6ee9c3b..76c839d5125a5 100644 --- a/pandas/core/base.py +++ b/pandas/core/base.py @@ -22,7 +22,7 @@ import pandas.core.nanops as nanops import pandas._libs.lib as lib from pandas.compat.numpy import (function as nv, _np_version_under1p10, - _np_version_under1p12) + _np_version_under1p12) from pandas.compat import PYPY from pandas.util._decorators import (Appender, cache_readonly, deprecate_kwarg, Substitution) From 715716185807140d137428b05a0190d6a791e1ab Mon Sep 17 00:00:00 2001 From: Aaron Critchley Date: Fri, 9 Mar 2018 23:49:46 +0100 Subject: [PATCH 10/14] Moving compat test functions inline to prevent build time issue --- pandas/tests/test_nanops.py | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/pandas/tests/test_nanops.py b/pandas/tests/test_nanops.py index a6ab027c19c1a..e04a4b3700ba0 100644 --- a/pandas/tests/test_nanops.py +++ b/pandas/tests/test_nanops.py @@ -1036,13 +1036,15 @@ def test_np_nanprod(self): self.compare_method_output(self.test_df, np.prod, np.nanprod) @td.skip_if_no("numpy", min_version="1.12.0") - @pytest.mark.parametrize("standard, nan_method", [ - (np.cumprod, np.nancumprod), - (np.cumsum, np.nancumsum) - ]) - def test_np_nancumprod(self, standard, nan_method): - self.compare_method_output(self.test_series, standard, nan_method) - self.compare_method_output(self.test_df, standard, nan_method) + def test_np_nancumprod(self): + # Not using pytest params as fails at build time + methods = [ + (np.cumprod, np.nancumprod), + (np.cumsum, np.nancumsum) + ] + for standard, nan_method in methods: + self.compare_method_output(self.test_series, standard, nan_method) + self.compare_method_output(self.test_df, standard, nan_method) def test_use_bottleneck(): From 8c2a5dd64443d017d5837efb6913445f23ebca9b Mon Sep 17 00:00:00 2001 From: Aaron Critchley Date: Thu, 22 Mar 2018 19:16:13 +0000 Subject: [PATCH 11/14] Making use of fixtures for series and df generation --- pandas/tests/test_nanops.py | 80 +++++++++++++++++++------------------ 1 file changed, 42 insertions(+), 38 deletions(-) diff --git a/pandas/tests/test_nanops.py b/pandas/tests/test_nanops.py index e04a4b3700ba0..3fd74d5f60c69 100644 --- a/pandas/tests/test_nanops.py +++ b/pandas/tests/test_nanops.py @@ -1005,46 +1005,50 @@ def prng(self): return np.random.RandomState(1234) -class TestNumpyNaNFunctions(object): +def _compare_nan_method_output(data, method, nan_method): + tm.assert_almost_equal(data.agg(method), + data.agg(nan_method), + check_exact=True) - # xref GH 19629 - def setup_method(self, method): - self.test_series = pd.Series([1, 2, 3, 4, 5, 6]) - self.test_df = pd.DataFrame([[1, 2, 3], [4, 5, 6]]) - - def compare_method_output(self, data, method, nan_method): - tm.assert_almost_equal(data.agg(method), - data.agg(nan_method), - check_exact=True) - - @pytest.mark.parametrize("standard, nan_method", [ - (np.sum, np.nansum), - (np.mean, np.nanmean), - (np.std, np.nanstd), - (np.var, np.nanvar), - (np.median, np.nanmedian), - (np.max, np.nanmax), - (np.min, np.nanmin) - ]) - def test_np_nan_functions(self, standard, nan_method): - self.compare_method_output(self.test_series, standard, nan_method) - self.compare_method_output(self.test_df, standard, nan_method) - @td.skip_if_no("numpy", min_version="1.10.0") - def test_np_nanprod(self): - self.compare_method_output(self.test_series, np.prod, np.nanprod) - self.compare_method_output(self.test_df, np.prod, np.nanprod) - - @td.skip_if_no("numpy", min_version="1.12.0") - def test_np_nancumprod(self): - # Not using pytest params as fails at build time - methods = [ - (np.cumprod, np.nancumprod), - (np.cumsum, np.nancumsum) - ] - for standard, nan_method in methods: - self.compare_method_output(self.test_series, standard, nan_method) - self.compare_method_output(self.test_df, standard, nan_method) +@pytest.fixture(params=[ + pd.Series([1, 2, 3, 4, 5, 6]), + pd.DataFrame([[1, 2, 3], [4, 5, 6]]) +]) +def nan_test_object(request): + return request.param + + +@pytest.mark.parametrize("standard, nan_method", [ + (np.sum, np.nansum), + (np.mean, np.nanmean), + (np.std, np.nanstd), + (np.var, np.nanvar), + (np.median, np.nanmedian), + (np.max, np.nanmax), + (np.min, np.nanmin) +]) +def test_np_nan_functions(standard, nan_method, nan_test_object): + _compare_nan_method_output(nan_test_object, standard, nan_method) + _compare_nan_method_output(nan_test_object, standard, nan_method) + + +@td.skip_if_no("numpy", min_version="1.10.0") +def test_np_nanprod(nan_test_object): + _compare_nan_method_output(nan_test_object, np.prod, np.nanprod) + _compare_nan_method_output(nan_test_object, np.prod, np.nanprod) + + +@td.skip_if_no("numpy", min_version="1.12.0") +def test_np_nancumprod(nan_test_object): + # Not using pytest params for methods as will fail at build time + methods = [ + (np.cumprod, np.nancumprod), + (np.cumsum, np.nancumsum) + ] + for standard, nan_method in methods: + _compare_nan_method_output(nan_test_object, standard, nan_method) + _compare_nan_method_output(nan_test_object, standard, nan_method) def test_use_bottleneck(): From 5326d56a0b155fa03942261f2c1de321f62248c3 Mon Sep 17 00:00:00 2001 From: Aaron Critchley Date: Thu, 22 Mar 2018 19:31:02 +0000 Subject: [PATCH 12/14] Fixing silly formatting, removing external function to compare --- pandas/tests/test_nanops.py | 21 +++++++++------------ 1 file changed, 9 insertions(+), 12 deletions(-) diff --git a/pandas/tests/test_nanops.py b/pandas/tests/test_nanops.py index 3fd74d5f60c69..e201641d7a718 100644 --- a/pandas/tests/test_nanops.py +++ b/pandas/tests/test_nanops.py @@ -1005,12 +1005,6 @@ def prng(self): return np.random.RandomState(1234) -def _compare_nan_method_output(data, method, nan_method): - tm.assert_almost_equal(data.agg(method), - data.agg(nan_method), - check_exact=True) - - @pytest.fixture(params=[ pd.Series([1, 2, 3, 4, 5, 6]), pd.DataFrame([[1, 2, 3], [4, 5, 6]]) @@ -1029,14 +1023,16 @@ def nan_test_object(request): (np.min, np.nanmin) ]) def test_np_nan_functions(standard, nan_method, nan_test_object): - _compare_nan_method_output(nan_test_object, standard, nan_method) - _compare_nan_method_output(nan_test_object, standard, nan_method) + tm.assert_almost_equal(nan_test_object.agg(standard), + nan_test_object.agg(nan_method), + check_exact=True) @td.skip_if_no("numpy", min_version="1.10.0") def test_np_nanprod(nan_test_object): - _compare_nan_method_output(nan_test_object, np.prod, np.nanprod) - _compare_nan_method_output(nan_test_object, np.prod, np.nanprod) + tm.assert_almost_equal(nan_test_object.agg(np.prod), + nan_test_object.agg(np.nanprod), + check_exact=True) @td.skip_if_no("numpy", min_version="1.12.0") @@ -1047,8 +1043,9 @@ def test_np_nancumprod(nan_test_object): (np.cumsum, np.nancumsum) ] for standard, nan_method in methods: - _compare_nan_method_output(nan_test_object, standard, nan_method) - _compare_nan_method_output(nan_test_object, standard, nan_method) + tm.assert_almost_equal(nan_test_object.agg(standard), + nan_test_object.agg(nan_method), + check_exact=True) def test_use_bottleneck(): From 528e12b323406e0cba40add55ff87f4f7aa3bba0 Mon Sep 17 00:00:00 2001 From: Aaron Critchley Date: Tue, 17 Apr 2018 01:36:36 +0100 Subject: [PATCH 13/14] Restoring whatsnew to normal after messy merge --- doc/source/whatsnew/v0.23.0.txt | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v0.23.0.txt b/doc/source/whatsnew/v0.23.0.txt index 55376bbb934e4..fccb07ab3666e 100644 --- a/doc/source/whatsnew/v0.23.0.txt +++ b/doc/source/whatsnew/v0.23.0.txt @@ -1086,9 +1086,10 @@ Numeric - Bug in :class:`Index` constructor with ``dtype='uint64'`` where int-like floats were not coerced to :class:`UInt64Index` (:issue:`18400`) - Bug in :class:`DataFrame` flex arithmetic (e.g. ``df.add(other, fill_value=foo)``) with a ``fill_value`` other than ``None`` failed to raise ``NotImplementedError`` in corner cases where either the frame or ``other`` has length zero (:issue:`19522`) - Multiplication and division of numeric-dtyped :class:`Index` objects with timedelta-like scalars returns ``TimedeltaIndex`` instead of raising ``TypeError`` (:issue:`19333`) +- Bug in :meth:`Series.rank` and :meth:`DataFrame.rank` when ``ascending='False'`` failed to return correct ranks for infinity if ``NaN`` were present (:issue:`19538`) - Bug where ``NaN`` was returned instead of 0 by :func:`Series.pct_change` and :func:`DataFrame.pct_change` when ``fill_method`` is not ``None`` (provided) (:issue:`19873`) - :meth:`~DataFrame.agg` now correctly handles numpy NaN-aware methods like :meth:`numpy.nansum` (:issue:`19629`) -- Bug in :meth:`Series.rank` and :meth:`DataFrame.rank` when ``ascending='False'`` failed to return correct ranks for infinity if ``NaN`` were present (:issue:`19538`) + Indexing ^^^^^^^^ From e88ac1044f4206911f5bcd8b4cffad12b77f640c Mon Sep 17 00:00:00 2001 From: Aaron Critchley Date: Tue, 17 Apr 2018 01:42:33 +0100 Subject: [PATCH 14/14] More whatsnew cleanup --- doc/source/whatsnew/v0.23.0.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v0.23.0.txt b/doc/source/whatsnew/v0.23.0.txt index fccb07ab3666e..dddeff69c3f1c 100644 --- a/doc/source/whatsnew/v0.23.0.txt +++ b/doc/source/whatsnew/v0.23.0.txt @@ -1087,7 +1087,7 @@ Numeric - Bug in :class:`DataFrame` flex arithmetic (e.g. ``df.add(other, fill_value=foo)``) with a ``fill_value`` other than ``None`` failed to raise ``NotImplementedError`` in corner cases where either the frame or ``other`` has length zero (:issue:`19522`) - Multiplication and division of numeric-dtyped :class:`Index` objects with timedelta-like scalars returns ``TimedeltaIndex`` instead of raising ``TypeError`` (:issue:`19333`) - Bug in :meth:`Series.rank` and :meth:`DataFrame.rank` when ``ascending='False'`` failed to return correct ranks for infinity if ``NaN`` were present (:issue:`19538`) -- Bug where ``NaN`` was returned instead of 0 by :func:`Series.pct_change` and :func:`DataFrame.pct_change` when ``fill_method`` is not ``None`` (provided) (:issue:`19873`) +- Bug where ``NaN`` was returned instead of 0 by :func:`Series.pct_change` and :func:`DataFrame.pct_change` when ``fill_method`` is not ``None`` (:issue:`19873`) - :meth:`~DataFrame.agg` now correctly handles numpy NaN-aware methods like :meth:`numpy.nansum` (:issue:`19629`)