From 518a62c4afc020c4b64e6a053cfe5efea5ce9cfc Mon Sep 17 00:00:00 2001 From: Aaron Staple Date: Sun, 12 Oct 2014 20:14:22 -0700 Subject: [PATCH] ENH: Move any/all to NDFrame, support additional arguments for Series. GH8302 --- doc/source/whatsnew/v0.15.2.txt | 16 ++++++- pandas/core/base.py | 18 -------- pandas/core/frame.py | 62 -------------------------- pandas/core/generic.py | 52 ++++++++++++++++++++++ pandas/core/index.py | 79 ++++++++++++++++++++++++++++++--- pandas/tests/test_index.py | 19 ++++++++ pandas/tests/test_panel.py | 19 ++++++++ pandas/tests/test_series.py | 27 +++++++++++ pandas/tseries/index.py | 4 ++ pandas/tseries/period.py | 3 ++ pandas/tseries/tdi.py | 3 ++ 11 files changed, 215 insertions(+), 87 deletions(-) diff --git a/doc/source/whatsnew/v0.15.2.txt b/doc/source/whatsnew/v0.15.2.txt index 4bcbcb82e7c83..52b0391534313 100644 --- a/doc/source/whatsnew/v0.15.2.txt +++ b/doc/source/whatsnew/v0.15.2.txt @@ -22,6 +22,20 @@ API changes - Bug in concat of Series with ``category`` dtype which were coercing to ``object``. (:issue:`8641`) +- ``Series.all`` and ``Series.any`` now support the ``level`` and ``skipna`` parameters. ``Series.all``, ``Series.any``, ``Index.all``, and ``Index.any`` no longer support the ``out`` and ``keepdims`` parameters, which existed for compatibility with ndarray. Various index types no longer support the ``all`` and ``any`` aggregation functions. (:issue:`8302`): + + .. ipython:: python + + s = pd.Series([False, True, False], index=[0, 0, 1]) + s.any(level=0) + +- ``Panel`` now supports the ``all`` and ``any`` aggregation functions. (:issue:`8302`): + + .. ipython:: python + + p = pd.Panel(np.random.rand(2, 5, 4) > 0.1) + p.all() + .. _whatsnew_0152.enhancements: Enhancements @@ -44,4 +58,4 @@ Experimental Bug Fixes ~~~~~~~~~ - Bug in ``groupby`` signatures that didn't include *args or **kwargs (:issue:`8733`). -- ``io.data.Options`` now raises ``RemoteDataError`` when no expiry dates are available from Yahoo (:issue:`8761`). \ No newline at end of file +- ``io.data.Options`` now raises ``RemoteDataError`` when no expiry dates are available from Yahoo (:issue:`8761`). diff --git a/pandas/core/base.py b/pandas/core/base.py index fba83be6fcadf..f648af85b68c5 100644 --- a/pandas/core/base.py +++ b/pandas/core/base.py @@ -268,18 +268,6 @@ def __unicode__(self): quote_strings=True) return "%s(%s, dtype='%s')" % (type(self).__name__, prepr, self.dtype) -def _unbox(func): - @Appender(func.__doc__) - def f(self, *args, **kwargs): - result = func(self.values, *args, **kwargs) - from pandas.core.index import Index - if isinstance(result, (np.ndarray, com.ABCSeries, Index)) and result.ndim == 0: - # return NumPy type - return result.dtype.type(result.item()) - else: # pragma: no cover - return result - f.__name__ = func.__name__ - return f class IndexOpsMixin(object): """ common ops mixin to support a unified inteface / docs for Series / Index """ @@ -528,12 +516,6 @@ def duplicated(self, take_last=False): from pandas.core.index import Index return Index(duplicated) - #---------------------------------------------------------------------- - # unbox reductions - - all = _unbox(np.ndarray.all) - any = _unbox(np.ndarray.any) - #---------------------------------------------------------------------- # abstracts diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 4ce9cc5804264..0ea53920ffe3c 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -4133,68 +4133,6 @@ def _count_level(self, level, axis=0, numeric_only=False): else: return result - def any(self, axis=None, bool_only=None, skipna=True, level=None, - **kwargs): - """ - Return whether any element is True over requested axis. - %(na_action)s - - Parameters - ---------- - axis : {0, 1} - 0 for row-wise, 1 for column-wise - skipna : boolean, default True - Exclude NA/null values. If an entire row/column is NA, the result - will be NA - level : int or level name, default None - If the axis is a MultiIndex (hierarchical), count along a - particular level, collapsing into a DataFrame - bool_only : boolean, default None - Only include boolean data. - - Returns - ------- - any : Series (or DataFrame if level specified) - """ - if axis is None: - axis = self._stat_axis_number - if level is not None: - return self._agg_by_level('any', axis=axis, level=level, - skipna=skipna) - return self._reduce(nanops.nanany, 'any', axis=axis, skipna=skipna, - numeric_only=bool_only, filter_type='bool') - - def all(self, axis=None, bool_only=None, skipna=True, level=None, - **kwargs): - """ - Return whether all elements are True over requested axis. - %(na_action)s - - Parameters - ---------- - axis : {0, 1} - 0 for row-wise, 1 for column-wise - skipna : boolean, default True - Exclude NA/null values. If an entire row/column is NA, the result - will be NA - level : int or level name, default None - If the axis is a MultiIndex (hierarchical), count along a - particular level, collapsing into a DataFrame - bool_only : boolean, default None - Only include boolean data. - - Returns - ------- - any : Series (or DataFrame if level specified) - """ - if axis is None: - axis = self._stat_axis_number - if level is not None: - return self._agg_by_level('all', axis=axis, level=level, - skipna=skipna) - return self._reduce(nanops.nanall, 'all', axis=axis, skipna=skipna, - numeric_only=bool_only, filter_type='bool') - def _reduce(self, op, name, axis=0, skipna=True, numeric_only=None, filter_type=None, **kwds): axis = self._get_axis_number(axis) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 89178ba2d9dcc..89c6e5836022e 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -3888,6 +3888,7 @@ def _add_numeric_operations(cls): ]) name = (cls._constructor_sliced.__name__ if cls._AXIS_LEN > 1 else 'scalar') + _num_doc = """ %(desc)s @@ -3905,6 +3906,27 @@ def _add_numeric_operations(cls): Include only float, int, boolean data. If None, will attempt to use everything, then use only numeric data +Returns +------- +%(outname)s : """ + name + " or " + cls.__name__ + " (if level specified)\n" + + _bool_doc = """ + +%(desc)s + +Parameters +---------- +axis : """ + axis_descr + """ +skipna : boolean, default True + Exclude NA/null values. If an entire row/column is NA, the result + will be NA +level : int or level name, default None + If the axis is a MultiIndex (hierarchical), count along a + particular level, collapsing into a """ + name + """ +bool_only : boolean, default None + Include only boolean data. If None, will attempt to use everything, + then use only boolean data + Returns ------- %(outname)s : """ + name + " or " + cls.__name__ + " (if level specified)\n" @@ -3971,6 +3993,36 @@ def stat_func(self, axis=None, skipna=None, level=None, want the *index* of the minimum, use ``idxmin``. This is the equivalent of the ``numpy.ndarray`` method ``argmin``.""", nanops.nanmin) + def _make_logical_function(name, desc, f): + + @Substitution(outname=name, desc=desc) + @Appender(_bool_doc) + def logical_func(self, axis=None, bool_only=None, skipna=None, + level=None, **kwargs): + if skipna is None: + skipna = True + if axis is None: + axis = self._stat_axis_number + if level is not None: + if bool_only is not None: + raise NotImplementedError( + "Option bool_only is not implemented with option " + "level.") + return self._agg_by_level(name, axis=axis, level=level, + skipna=skipna) + return self._reduce(f, axis=axis, skipna=skipna, + numeric_only=bool_only, filter_type='bool', + name=name) + logical_func.__name__ = name + return logical_func + + cls.any = _make_logical_function( + 'any', 'Return whether any element is True over requested axis', + nanops.nanany) + cls.all = _make_logical_function( + 'all', 'Return whether all elements are True over requested axis', + nanops.nanall) + @Substitution(outname='mad', desc="Return the mean absolute deviation of the values " "for the requested axis") diff --git a/pandas/core/index.py b/pandas/core/index.py index a6907c3f8b5f2..02877072b8c74 100644 --- a/pandas/core/index.py +++ b/pandas/core/index.py @@ -14,7 +14,8 @@ import pandas.index as _index from pandas.lib import Timestamp, Timedelta, is_datetime_array from pandas.core.base import PandasObject, FrozenList, FrozenNDArray, IndexOpsMixin, _shared_docs -from pandas.util.decorators import Appender, cache_readonly, deprecate +from pandas.util.decorators import (Appender, Substitution, cache_readonly, + deprecate) from pandas.core.common import isnull, array_equivalent import pandas.core.common as com from pandas.core.common import (_values_from_object, is_float, is_integer, @@ -2088,12 +2089,13 @@ def _evaluate_with_datetime_like(self, other, op, opstr): def _add_numeric_methods_disabled(cls): """ add in numeric methods to disable """ - def _make_invalid_op(opstr): + def _make_invalid_op(name): - def _invalid_op(self, other=None): - raise TypeError("cannot perform {opstr} with this index type: {typ}".format(opstr=opstr, - typ=type(self))) - return _invalid_op + def invalid_op(self, other=None): + raise TypeError("cannot perform {name} with this index type: {typ}".format(name=name, + typ=type(self))) + invalid_op.__name__ = name + return invalid_op cls.__mul__ = cls.__rmul__ = _make_invalid_op('__mul__') cls.__floordiv__ = cls.__rfloordiv__ = _make_invalid_op('__floordiv__') @@ -2178,8 +2180,62 @@ def _evaluate_numeric_unary(self): cls.__abs__ = _make_evaluate_unary(lambda x: np.abs(x),'__abs__') cls.__inv__ = _make_evaluate_unary(lambda x: -x,'__inv__') + @classmethod + def _add_logical_methods(cls): + """ add in logical methods """ + + _doc = """ + + %(desc)s + + Parameters + ---------- + All arguments to numpy.%(outname)s are accepted. + + Returns + ------- + %(outname)s : bool or array_like (if axis is specified) + A single element array_like may be converted to bool.""" + + def _make_logical_function(name, desc, f): + + @Substitution(outname=name, desc=desc) + @Appender(_doc) + def logical_func(self, *args, **kwargs): + result = f(self.values) + if isinstance(result, (np.ndarray, com.ABCSeries, Index)) \ + and result.ndim == 0: + # return NumPy type + return result.dtype.type(result.item()) + else: # pragma: no cover + return result + logical_func.__name__ = name + return logical_func + + cls.all = _make_logical_function( + 'all', 'Return whether all elements are True', np.all) + cls.any = _make_logical_function( + 'any', 'Return whether any element is True', np.any) + + @classmethod + def _add_logical_methods_disabled(cls): + """ add in logical methods to disable """ + + def _make_invalid_op(name): + + def invalid_op(self, other=None): + raise TypeError("cannot perform {name} with this index type: {typ}".format(name=name, + typ=type(self))) + invalid_op.__name__ = name + return invalid_op + + cls.all = _make_invalid_op('all') + cls.any = _make_invalid_op('any') + Index._add_numeric_methods_disabled() +Index._add_logical_methods() + class NumericIndex(Index): """ @@ -2291,7 +2347,11 @@ def equals(self, other): def _wrap_joined_index(self, joined, other): name = self.name if self.name == other.name else None return Int64Index(joined, name=name) + + Int64Index._add_numeric_methods() +Int64Index._add_logical_methods() + class Float64Index(NumericIndex): @@ -2483,7 +2543,10 @@ def isin(self, values, level=None): self._validate_index_level(level) return lib.ismember_nans(self._array_values(), value_set, isnull(list(value_set)).any()) + + Float64Index._add_numeric_methods() +Float64Index._add_logical_methods_disabled() class MultiIndex(Index): @@ -4436,7 +4499,11 @@ def isin(self, values, level=None): return np.zeros(len(labs), dtype=np.bool_) else: return np.lib.arraysetops.in1d(labs, sought_labels) + + MultiIndex._add_numeric_methods_disabled() +MultiIndex._add_logical_methods_disabled() + # For utility purposes diff --git a/pandas/tests/test_index.py b/pandas/tests/test_index.py index 8ab5c30c49f10..adb5e7d07fbe6 100644 --- a/pandas/tests/test_index.py +++ b/pandas/tests/test_index.py @@ -75,6 +75,15 @@ def test_numeric_compat(self): "cannot perform __floordiv__", lambda : 1 // idx) + def test_logical_compat(self): + idx = self.create_index() + tm.assertRaisesRegexp(TypeError, + 'cannot perform all', + lambda : idx.all()) + tm.assertRaisesRegexp(TypeError, + 'cannot perform any', + lambda : idx.any()) + def test_boolean_context_compat(self): # boolean context compat @@ -820,6 +829,11 @@ def test_take(self): expected = self.dateIndex[indexer] self.assertTrue(result.equals(expected)) + def test_logical_compat(self): + idx = self.create_index() + self.assertEqual(idx.all(), idx.values.all()) + self.assertEqual(idx.any(), idx.values.any()) + def _check_method_works(self, method): method(self.empty) method(self.dateIndex) @@ -1467,6 +1481,11 @@ def test_equals(self): self.assertTrue(self.index.equals(same_values)) self.assertTrue(same_values.equals(self.index)) + def test_logical_compat(self): + idx = self.create_index() + self.assertEqual(idx.all(), idx.values.all()) + self.assertEqual(idx.any(), idx.values.any()) + def test_identical(self): i = Index(self.index.copy()) self.assertTrue(i.identical(self.index)) diff --git a/pandas/tests/test_panel.py b/pandas/tests/test_panel.py index 01d086f57718c..7f902827ba5db 100644 --- a/pandas/tests/test_panel.py +++ b/pandas/tests/test_panel.py @@ -9,6 +9,7 @@ from pandas import Series, DataFrame, Index, isnull, notnull, pivot, MultiIndex from pandas.core.datetools import bday +from pandas.core.nanops import nanall, nanany from pandas.core.panel import Panel from pandas.core.series import remove_na import pandas.core.common as com @@ -2102,6 +2103,24 @@ def test_update_raise(self): np.testing.assert_raises(Exception, pan.update, *(pan,), **{'raise_conflict': True}) + def test_all_any(self): + self.assertTrue((self.panel.all(axis=0).values == + nanall(self.panel, axis=0)).all()) + self.assertTrue((self.panel.all(axis=1).values == + nanall(self.panel, axis=1).T).all()) + self.assertTrue((self.panel.all(axis=2).values == + nanall(self.panel, axis=2).T).all()) + self.assertTrue((self.panel.any(axis=0).values == + nanany(self.panel, axis=0)).all()) + self.assertTrue((self.panel.any(axis=1).values == + nanany(self.panel, axis=1).T).all()) + self.assertTrue((self.panel.any(axis=2).values == + nanany(self.panel, axis=2).T).all()) + + def test_all_any_unhandled(self): + self.assertRaises(NotImplementedError, self.panel.all, bool_only=True) + self.assertRaises(NotImplementedError, self.panel.any, bool_only=True) + class TestLongPanel(tm.TestCase): """ diff --git a/pandas/tests/test_series.py b/pandas/tests/test_series.py index 9ecdcd2b12d75..c4c2eebacb0e9 100644 --- a/pandas/tests/test_series.py +++ b/pandas/tests/test_series.py @@ -2474,6 +2474,33 @@ def test_all_any(self): self.assertFalse(bool_series.all()) self.assertTrue(bool_series.any()) + # Alternative types, with implicit 'object' dtype. + s = Series(['abc', True]) + self.assertEquals('abc', s.any()) # 'abc' || True => 'abc' + + def test_all_any_params(self): + # Check skipna, with implicit 'object' dtype. + s1 = Series([np.nan, True]) + s2 = Series([np.nan, False]) + self.assertTrue(s1.all(skipna=False)) # nan && True => True + self.assertTrue(s1.all(skipna=True)) + self.assertTrue(np.isnan(s2.any(skipna=False))) # nan || False => nan + self.assertFalse(s2.any(skipna=True)) + + # Check level. + s = pd.Series([False, False, True, True, False, True], + index=[0, 0, 1, 1, 2, 2]) + assert_series_equal(s.all(level=0), Series([False, True, False])) + assert_series_equal(s.any(level=0), Series([False, True, True])) + + # bool_only is not implemented with level option. + self.assertRaises(NotImplementedError, s.any, bool_only=True, level=0) + self.assertRaises(NotImplementedError, s.all, bool_only=True, level=0) + + # bool_only is not implemented alone. + self.assertRaises(NotImplementedError, s.any, bool_only=True) + self.assertRaises(NotImplementedError, s.all, bool_only=True) + def test_op_method(self): def check(series, other, check_reverse=False): simple_ops = ['add', 'sub', 'mul', 'floordiv', 'truediv', 'pow'] diff --git a/pandas/tseries/index.py b/pandas/tseries/index.py index 52ab217cbffc6..bf99de902188f 100644 --- a/pandas/tseries/index.py +++ b/pandas/tseries/index.py @@ -1665,9 +1665,13 @@ def to_julian_date(self): self.microsecond/3600.0/1e+6 + self.nanosecond/3600.0/1e+9 )/24.0) + + DatetimeIndex._add_numeric_methods_disabled() +DatetimeIndex._add_logical_methods_disabled() DatetimeIndex._add_datetimelike_methods() + def _generate_regular_range(start, end, periods, offset): if isinstance(offset, Tick): stride = offset.nanos diff --git a/pandas/tseries/period.py b/pandas/tseries/period.py index 742d8651a4035..0b4ca5014e76b 100644 --- a/pandas/tseries/period.py +++ b/pandas/tseries/period.py @@ -1262,9 +1262,12 @@ def tz_localize(self, tz, infer_dst=False): """ raise NotImplementedError("Not yet implemented for PeriodIndex") + PeriodIndex._add_numeric_methods_disabled() +PeriodIndex._add_logical_methods_disabled() PeriodIndex._add_datetimelike_methods() + def _get_ordinal_range(start, end, periods, freq): if com._count_not_none(start, end, periods) < 2: raise ValueError('Must specify 2 of start, end, periods') diff --git a/pandas/tseries/tdi.py b/pandas/tseries/tdi.py index 5a041ed09fb27..0d99cd16d8c99 100644 --- a/pandas/tseries/tdi.py +++ b/pandas/tseries/tdi.py @@ -890,9 +890,12 @@ def delete(self, loc): return TimedeltaIndex(new_tds, name=self.name, freq=freq) + TimedeltaIndex._add_numeric_methods() +TimedeltaIndex._add_logical_methods_disabled() TimedeltaIndex._add_datetimelike_methods() + def _is_convertible_to_index(other): """ return a boolean whether I can attempt conversion to a TimedeltaIndex """ if isinstance(other, TimedeltaIndex):