Skip to content

standardize signature for Index reductions, implement nanmean for datetime64 dtypes #24293

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 21 commits into from
Dec 29, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
21 commits
Select commit Hold shift + click to select a range
f38ffe1
standardize signature for Index reductions, implement nanmean for dat…
jbrockmendel Dec 15, 2018
04cf1f7
suppress warnings
jbrockmendel Dec 15, 2018
3efab79
requested edits
jbrockmendel Dec 15, 2018
a652439
implement view
jbrockmendel Dec 15, 2018
ce760d3
Merge branch 'master' of https://github.com/pandas-dev/pandas into re…
jbrockmendel Dec 19, 2018
2380af6
pass skipna, tests
jbrockmendel Dec 19, 2018
4157f0b
fixup isort
jbrockmendel Dec 20, 2018
50642ae
Merge branch 'master' of https://github.com/pandas-dev/pandas into re…
jbrockmendel Dec 23, 2018
0baedf3
fixup rebase screwups
jbrockmendel Dec 23, 2018
6e0e69f
move len-self checks
jbrockmendel Dec 23, 2018
e2c301b
fixup more rebase screwups
jbrockmendel Dec 23, 2018
fce67ac
Merge branch 'master' of https://github.com/pandas-dev/pandas into re…
jbrockmendel Dec 24, 2018
4b4979f
do values viewing in one place
jbrockmendel Dec 24, 2018
82b8cdf
unpack Series
jbrockmendel Dec 27, 2018
ffe6ada
Merge branch 'master' of https://github.com/pandas-dev/pandas into re…
jbrockmendel Dec 27, 2018
7f7693f
Merge branch 'master' of https://github.com/pandas-dev/pandas into re…
jbrockmendel Dec 28, 2018
07c3102
implement _extract_datetimelike_values_and_dtype
jbrockmendel Dec 28, 2018
6c93410
fix mask
jbrockmendel Dec 28, 2018
4620c56
Merge branch 'master' of https://github.com/pandas-dev/pandas into re…
jbrockmendel Dec 28, 2018
4777b75
requested docstring edits, remoe duplicated view
jbrockmendel Dec 28, 2018
aa4028a
Merge branch 'master' of https://github.com/pandas-dev/pandas into re…
jbrockmendel Dec 28, 2018
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
46 changes: 37 additions & 9 deletions pandas/core/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -973,10 +973,16 @@ def _ndarray_values(self):
def empty(self):
return not self.size

def max(self):
def max(self, axis=None, skipna=True):
"""
Return the maximum value of the Index.

Parameters
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

also we have similar things in pandas/core/base, these should change as well.

----------
axis : int, optional
For compatibility with NumPy. Only 0 or None are allowed.
skipna : bool, default True

Returns
-------
scalar
Expand Down Expand Up @@ -1004,22 +1010,36 @@ def max(self):
>>> idx.max()
('b', 2)
"""
return nanops.nanmax(self.values)
nv.validate_minmax_axis(axis)
return nanops.nanmax(self._values, skipna=skipna)

def argmax(self, axis=None):
def argmax(self, axis=None, skipna=True):
"""
Return a ndarray of the maximum argument indexer.

Parameters
----------
axis : {None}
Dummy argument for consistency with Series
skipna : bool, default True

See Also
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

should be consisten about the See Also, e.g. make sure in all, and add a refernce to the Series.min function as well (as appropriate)

--------
numpy.ndarray.argmax
"""
return nanops.nanargmax(self.values)
nv.validate_minmax_axis(axis)
return nanops.nanargmax(self._values, skipna=skipna)

def min(self):
def min(self, axis=None, skipna=True):
"""
Return the minimum value of the Index.

Parameters
----------
axis : {None}
Dummy argument for consistency with Series
skipna : bool, default True

Returns
-------
scalar
Expand Down Expand Up @@ -1047,17 +1067,25 @@ def min(self):
>>> idx.min()
('a', 1)
"""
return nanops.nanmin(self.values)
nv.validate_minmax_axis(axis)
return nanops.nanmin(self._values, skipna=skipna)

def argmin(self, axis=None):
def argmin(self, axis=None, skipna=True):
"""
Return a ndarray of the minimum argument indexer.

Parameters
----------
axis : {None}
Dummy argument for consistency with Series
skipna : bool, default True

See Also
--------
numpy.ndarray.argmin
"""
return nanops.nanargmin(self.values)
nv.validate_minmax_axis(axis)
return nanops.nanargmin(self._values, skipna=skipna)

def tolist(self):
"""
Expand Down Expand Up @@ -1110,7 +1138,7 @@ def _reduce(self, op, name, axis=0, skipna=True, numeric_only=None,
if func is None:
raise TypeError("{klass} cannot perform the operation {op}".format(
klass=self.__class__.__name__, op=name))
return func(**kwds)
return func(skipna=skipna, **kwds)

def _map_values(self, mapper, na_action=None):
"""
Expand Down
2 changes: 2 additions & 0 deletions pandas/core/dtypes/missing.py
Original file line number Diff line number Diff line change
Expand Up @@ -198,6 +198,8 @@ def _isna_ndarraylike(obj):
else:
values = obj
result = values.isna()
elif isinstance(obj, ABCDatetimeArray):
return obj.isna()
elif is_string_dtype(dtype):
# Working around NumPy ticket 1542
shape = values.shape
Expand Down
36 changes: 24 additions & 12 deletions pandas/core/indexes/datetimelike.py
Original file line number Diff line number Diff line change
Expand Up @@ -266,35 +266,41 @@ def tolist(self):
"""
return list(self.astype(object))

def min(self, axis=None, *args, **kwargs):
def min(self, axis=None, skipna=True, *args, **kwargs):
"""
Return the minimum value of the Index or minimum along
an axis.

See Also
--------
numpy.ndarray.min
Series.min : Return the minimum value in a Series.
"""
nv.validate_min(args, kwargs)
nv.validate_minmax_axis(axis)

try:
i8 = self.asi8
if not len(self):
return self._na_value

i8 = self.asi8
try:
# quick check
if len(i8) and self.is_monotonic:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

these quick checks make sense for Index as they are immutable, but may not make much sense here (but i guess can evaluate later)

if i8[0] != iNaT:
return self._box_func(i8[0])

if self.hasnans:
min_stamp = self[~self._isnan].asi8.min()
if skipna:
min_stamp = self[~self._isnan].asi8.min()
else:
return self._na_value
else:
min_stamp = i8.min()
return self._box_func(min_stamp)
except ValueError:
return self._na_value

def argmin(self, axis=None, *args, **kwargs):
def argmin(self, axis=None, skipna=True, *args, **kwargs):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can these inherit the doc-string?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'll take a look. Might also get rid of *args while at it

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hmm the docstrings here and for the corresponding methods in base.IndexOpsMixin are kind of clunky. This may merit a separate look, @datapythonista ?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't know well what's the class hierarchy and whether makes sense to inherit. But we'll have to add Parameters, Returns and Examples section here if this is public.

"""
Returns the indices of the minimum values along an axis.

Expand All @@ -311,41 +317,47 @@ def argmin(self, axis=None, *args, **kwargs):
i8 = self.asi8
if self.hasnans:
mask = self._isnan
if mask.all():
if mask.all() or not skipna:
return -1
i8 = i8.copy()
i8[mask] = np.iinfo('int64').max
return i8.argmin()

def max(self, axis=None, *args, **kwargs):
def max(self, axis=None, skipna=True, *args, **kwargs):
"""
Return the maximum value of the Index or maximum along
an axis.

See Also
--------
numpy.ndarray.max
Series.max : Return the maximum value in a Series.
"""
nv.validate_max(args, kwargs)
nv.validate_minmax_axis(axis)

try:
i8 = self.asi8
if not len(self):
return self._na_value

i8 = self.asi8
try:
# quick check
if len(i8) and self.is_monotonic:
if i8[-1] != iNaT:
return self._box_func(i8[-1])

if self.hasnans:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

same

max_stamp = self[~self._isnan].asi8.max()
if skipna:
max_stamp = self[~self._isnan].asi8.max()
else:
return self._na_value
else:
max_stamp = i8.max()
return self._box_func(max_stamp)
except ValueError:
return self._na_value

def argmax(self, axis=None, *args, **kwargs):
def argmax(self, axis=None, skipna=True, *args, **kwargs):
"""
Returns the indices of the maximum values along an axis.

Expand All @@ -362,7 +374,7 @@ def argmax(self, axis=None, *args, **kwargs):
i8 = self.asi8
if self.hasnans:
mask = self._isnan
if mask.all():
if mask.all() or not skipna:
return -1
i8 = i8.copy()
i8[mask] = 0
Expand Down
6 changes: 4 additions & 2 deletions pandas/core/indexes/range.py
Original file line number Diff line number Diff line change
Expand Up @@ -297,12 +297,14 @@ def _minmax(self, meth):

return self._start + self._step * no_steps

def min(self):
def min(self, axis=None, skipna=True):
"""The minimum value of the RangeIndex"""
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

same. why don't these inherit the doc-string

nv.validate_minmax_axis(axis)
return self._minmax('min')

def max(self):
def max(self, axis=None, skipna=True):
"""The maximum value of the RangeIndex"""
nv.validate_minmax_axis(axis)
return self._minmax('max')

def argsort(self, *args, **kwargs):
Expand Down
47 changes: 29 additions & 18 deletions pandas/core/nanops.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,14 +6,14 @@

import numpy as np

from pandas._libs import lib, tslibs
from pandas._libs import iNaT, lib, tslibs
import pandas.compat as compat

from pandas.core.dtypes.cast import _int64_max, maybe_upcast_putmask
from pandas.core.dtypes.common import (
_get_dtype, is_any_int_dtype, is_bool_dtype, is_complex, is_complex_dtype,
is_datetime64_dtype, is_datetime_or_timedelta_dtype, is_float,
is_float_dtype, is_integer, is_integer_dtype, is_numeric_dtype,
is_datetime64_dtype, is_datetime64tz_dtype, is_datetime_or_timedelta_dtype,
is_float, is_float_dtype, is_integer, is_integer_dtype, is_numeric_dtype,
is_object_dtype, is_scalar, is_timedelta64_dtype)
from pandas.core.dtypes.missing import isna, na_value_for_dtype, notna

Expand Down Expand Up @@ -203,15 +203,28 @@ def _get_values(values, skipna, fill_value=None, fill_value_typ=None,
if necessary copy and mask using the specified fill_value
copy = True will force the copy
"""
values = com.values_from_object(values)

if is_datetime64tz_dtype(values):
# com.values_from_object returns M8[ns] dtype instead of tz-aware,
# so this case must be handled separately from the rest
dtype = values.dtype
values = getattr(values, "_values", values)
else:
values = com.values_from_object(values)
dtype = values.dtype

if mask is None:
if isfinite:
mask = _isfinite(values)
else:
mask = isna(values)

dtype = values.dtype
if is_datetime_or_timedelta_dtype(values) or is_datetime64tz_dtype(values):
# changing timedelta64/datetime64 to int64 needs to happen after
# finding `mask` above
values = getattr(values, "asi8", values)
values = values.view(np.int64)

dtype_ok = _na_ok_dtype(dtype)

# get our fill value (in case we need to provide an alternative
Expand All @@ -232,8 +245,6 @@ def _get_values(values, skipna, fill_value=None, fill_value_typ=None,
elif copy:
values = values.copy()

values = _view_if_needed(values)

# return a platform independent precision dtype
dtype_max = dtype
if is_integer_dtype(dtype) or is_bool_dtype(dtype):
Expand All @@ -259,21 +270,19 @@ def _na_ok_dtype(dtype):
(np.integer, np.timedelta64, np.datetime64))


def _view_if_needed(values):
if is_datetime_or_timedelta_dtype(values):
return values.view(np.int64)
return values


def _wrap_results(result, dtype, fill_value=None):
""" wrap our results if needed """

if is_datetime64_dtype(dtype):
if is_datetime64_dtype(dtype) or is_datetime64tz_dtype(dtype):
if fill_value is None:
# GH#24293
fill_value = iNaT
if not isinstance(result, np.ndarray):
tz = getattr(dtype, 'tz', None)
assert not isna(fill_value), "Expected non-null fill_value"
if result == fill_value:
result = np.nan
result = tslibs.Timestamp(result)
result = tslibs.Timestamp(result, tz=tz)
else:
result = result.view(dtype)
elif is_timedelta64_dtype(dtype):
Expand Down Expand Up @@ -426,7 +435,6 @@ def nansum(values, axis=None, skipna=True, min_count=0, mask=None):
return _wrap_results(the_sum, dtype)


@disallow('M8')
@bottleneck_switch()
def nanmean(values, axis=None, skipna=True, mask=None):
"""
Expand Down Expand Up @@ -457,7 +465,8 @@ def nanmean(values, axis=None, skipna=True, mask=None):
values, skipna, 0, mask=mask)
dtype_sum = dtype_max
dtype_count = np.float64
if is_integer_dtype(dtype) or is_timedelta64_dtype(dtype):
if (is_integer_dtype(dtype) or is_timedelta64_dtype(dtype) or
is_datetime64_dtype(dtype) or is_datetime64tz_dtype(dtype)):
dtype_sum = np.float64
elif is_float_dtype(dtype):
dtype_sum = dtype
Expand All @@ -466,7 +475,9 @@ def nanmean(values, axis=None, skipna=True, mask=None):
the_sum = _ensure_numeric(values.sum(axis, dtype=dtype_sum))

if axis is not None and getattr(the_sum, 'ndim', False):
the_mean = the_sum / count
with np.errstate(all="ignore"):
# suppress division by zero warnings
the_mean = the_sum / count
ct_mask = count == 0
if ct_mask.any():
the_mean[ct_mask] = np.nan
Expand Down
10 changes: 7 additions & 3 deletions pandas/core/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,10 @@

from pandas.core.dtypes.common import (
_is_unorderable_exception, ensure_platform_int, is_bool,
is_categorical_dtype, is_datetime64tz_dtype, is_datetimelike, is_dict_like,
is_extension_array_dtype, is_extension_type, is_hashable, is_integer,
is_iterator, is_list_like, is_scalar, is_string_like, is_timedelta64_dtype)
is_categorical_dtype, is_datetime64_dtype, is_datetime64tz_dtype,
is_datetimelike, is_dict_like, is_extension_array_dtype, is_extension_type,
is_hashable, is_integer, is_iterator, is_list_like, is_scalar,
is_string_like, is_timedelta64_dtype)
from pandas.core.dtypes.generic import (
ABCDataFrame, ABCDatetimeIndex, ABCSeries, ABCSparseArray, ABCSparseSeries)
from pandas.core.dtypes.missing import (
Expand Down Expand Up @@ -3537,6 +3538,9 @@ def _reduce(self, op, name, axis=0, skipna=True, numeric_only=None,
# dispatch to ExtensionArray interface
if isinstance(delegate, ExtensionArray):
return delegate._reduce(name, skipna=skipna, **kwds)
elif is_datetime64_dtype(delegate):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

should this be first?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

it isn't clear to me that it would make a difference

# use DatetimeIndex implementation to handle skipna correctly
delegate = DatetimeIndex(delegate)

# dispatch to numpy arrays
elif isinstance(delegate, np.ndarray):
Expand Down
8 changes: 8 additions & 0 deletions pandas/tests/indexes/test_range.py
Original file line number Diff line number Diff line change
Expand Up @@ -895,10 +895,18 @@ def test_max_min(self, start, stop, step):
result = idx.max()
assert result == expected

# skipna should be irrelevant since RangeIndex should never have NAs
result2 = idx.max(skipna=False)
assert result2 == expected

expected = idx._int64index.min()
result = idx.min()
assert result == expected

# skipna should be irrelevant since RangeIndex should never have NAs
result2 = idx.min(skipna=False)
assert result2 == expected

# empty
idx = RangeIndex(start, stop, -step)
assert isna(idx.max())
Expand Down
Loading