Skip to content

CLN: Move some PI/DTI methods to EA subclasses, implement tests #22961

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 11 commits into from
Oct 8, 2018
67 changes: 66 additions & 1 deletion pandas/core/arrays/datetimes.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@
from pandas.core.algorithms import checked_add_with_arr
from pandas.core import ops

from pandas.tseries.frequencies import to_offset
from pandas.tseries.frequencies import to_offset, get_period_alias
from pandas.tseries.offsets import Tick, generate_range

from pandas.core.arrays import datetimelike as dtl
Expand Down Expand Up @@ -200,6 +200,10 @@ def __new__(cls, values, freq=None, tz=None, dtype=None):
# e.g. DatetimeIndex
tz = values.tz

if freq is None and hasattr(values, "freq"):
# i.e. DatetimeArray, DatetimeIndex
freq = values.freq

freq, freq_infer = dtl.maybe_infer_freq(freq)

# if dtype has an embedded tz, capture it
Expand Down Expand Up @@ -764,6 +768,67 @@ def normalize(self):
new_values = conversion.normalize_i8_timestamps(self.asi8, self.tz)
return type(self)(new_values, freq='infer').tz_localize(self.tz)

def to_period(self, freq=None):
"""
Cast to PeriodArray/Index at a particular frequency.

Converts DatetimeArray/Index to PeriodArray/Index.

Parameters
----------
freq : string or Offset, optional
One of pandas' :ref:`offset strings <timeseries.offset_aliases>`
or an Offset object. Will be inferred by default.

Returns
-------
PeriodArray/Index

Raises
------
ValueError
When converting a DatetimeArray/Index with non-regular values,
so that a frequency cannot be inferred.

Examples
--------
>>> df = pd.DataFrame({"y": [1,2,3]},
... index=pd.to_datetime(["2000-03-31 00:00:00",
... "2000-05-31 00:00:00",
... "2000-08-31 00:00:00"]))
>>> df.index.to_period("M")
PeriodIndex(['2000-03', '2000-05', '2000-08'],
dtype='period[M]', freq='M')

Infer the daily frequency

>>> idx = pd.date_range("2017-01-01", periods=2)
>>> idx.to_period()
PeriodIndex(['2017-01-01', '2017-01-02'],
dtype='period[D]', freq='D')

See also
--------
pandas.PeriodIndex: Immutable ndarray holding ordinal values
pandas.DatetimeIndex.to_pydatetime: Return DatetimeIndex as object
"""
from pandas.core.arrays.period import PeriodArrayMixin

if self.tz is not None:
warnings.warn("Converting to PeriodArray/Index representation "
"will drop timezone information.", UserWarning)

if freq is None:
freq = self.freqstr or self.inferred_freq

if freq is None:
raise ValueError("You must pass a freq argument as "
"current index has none.")

freq = get_period_alias(freq)

return PeriodArrayMixin(self.values, freq=freq)

# -----------------------------------------------------------------
# Properties - Vectorized Timestamp Properties/Methods

Expand Down
68 changes: 66 additions & 2 deletions pandas/core/arrays/period.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,14 +10,15 @@
Period, IncompatibleFrequency, DIFFERENT_FREQ_INDEX,
get_period_field_arr, period_asfreq_arr)
from pandas._libs.tslibs import period as libperiod
from pandas._libs.tslibs.timedeltas import delta_to_nanoseconds
from pandas._libs.tslibs.timedeltas import delta_to_nanoseconds, Timedelta
from pandas._libs.tslibs.fields import isleapyear_arr

from pandas import compat
from pandas.util._decorators import cache_readonly

from pandas.core.dtypes.common import (
is_integer_dtype, is_float_dtype, is_period_dtype)
is_integer_dtype, is_float_dtype, is_period_dtype,
is_datetime64_dtype)
from pandas.core.dtypes.dtypes import PeriodDtype
from pandas.core.dtypes.generic import ABCSeries

Expand Down Expand Up @@ -127,6 +128,10 @@ def __new__(cls, values, freq=None, **kwargs):
freq = values.freq
values = values.asi8

elif is_datetime64_dtype(values):
# TODO: what if it has tz?
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Probably a warning, right? Maybe push the one that's in DatetimeIndex.to_period down here? That can be a separate PR though, since there may be other places calling PeriodArray with a tz-aware values.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think so, yah.

If it doesn't mess up your plans too much, after this I'd like to do a PR cleaning up some of the duplicated/convoluted constructors in (Period|Datetime|Timedelta)(Index|ArrayMixin). I think making some helper functions etc before the bigger refactor would help focus attention during the latter.

values = dt64arr_to_periodarr(values, freq)

return cls._simple_new(values, freq, **kwargs)

@classmethod
Expand Down Expand Up @@ -207,6 +212,14 @@ def is_leap_year(self):
""" Logical indicating if the date belongs to a leap year """
return isleapyear_arr(np.asarray(self.year))

@property
def start_time(self):
return self.to_timestamp(how='start')

@property
def end_time(self):
return self.to_timestamp(how='end')

def asfreq(self, freq=None, how='E'):
"""
Convert the Period Array/Index to the specified frequency `freq`.
Expand Down Expand Up @@ -266,6 +279,48 @@ def asfreq(self, freq=None, how='E'):

return self._shallow_copy(new_data, freq=freq)

def to_timestamp(self, freq=None, how='start'):
"""
Cast to DatetimeArray/Index

Parameters
----------
freq : string or DateOffset, optional
Target frequency. The default is 'D' for week or longer,
'S' otherwise
how : {'s', 'e', 'start', 'end'}

Returns
-------
DatetimeArray/Index
"""
from pandas.core.arrays.datetimes import DatetimeArrayMixin
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

are you able to put these at the top?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think that's fragile. This mirrors the existing runtime import in the PeriodIndex method.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ok that's fine, yeah definitly appreciate making the import hierarchy nicer


how = libperiod._validate_end_alias(how)

end = how == 'E'
if end:
if freq == 'B':
# roll forward to ensure we land on B date
adjust = Timedelta(1, 'D') - Timedelta(1, 'ns')
return self.to_timestamp(how='start') + adjust
else:
adjust = Timedelta(1, 'ns')
return (self + 1).to_timestamp(how='start') - adjust

if freq is None:
base, mult = frequencies.get_freq_code(self.freq)
freq = frequencies.get_to_timestamp_base(base)
else:
freq = Period._maybe_convert_freq(freq)

base, mult = frequencies.get_freq_code(freq)
new_data = self.asfreq(freq, how=how)

new_data = libperiod.periodarr_to_dt64arr(new_data._ndarray_values,
base)
return DatetimeArrayMixin(new_data, freq='infer')

# ------------------------------------------------------------------
# Arithmetic Methods

Expand Down Expand Up @@ -392,6 +447,15 @@ def _maybe_convert_timedelta(self, other):
# -------------------------------------------------------------------
# Constructor Helpers

def dt64arr_to_periodarr(data, freq, tz=None):
if data.dtype != np.dtype('M8[ns]'):
raise ValueError('Wrong dtype: %s' % data.dtype)

freq = Period._maybe_convert_freq(freq)
base, mult = frequencies.get_freq_code(freq)
return libperiod.dt64arr_to_periodarr(data.view('i8'), base, tz)


def _get_ordinal_range(start, end, periods, freq, mult=1):
if com.count_not_none(start, end, periods) != 2:
raise ValueError('Of the three parameters: start, end, and periods, '
Expand Down
75 changes: 10 additions & 65 deletions pandas/core/indexes/datetimes.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@
from pandas.core.indexes.base import Index, _index_shared_docs
from pandas.core.indexes.numeric import Int64Index, Float64Index
import pandas.compat as compat
from pandas.tseries.frequencies import to_offset, get_period_alias, Resolution
from pandas.tseries.frequencies import to_offset, Resolution
from pandas.core.indexes.datetimelike import (
DatelikeOps, TimelikeOps, DatetimeIndexOpsMixin)
from pandas.tseries.offsets import (
Expand Down Expand Up @@ -302,7 +302,8 @@ def __new__(cls, data=None,
tz=tz, normalize=normalize,
closed=closed, ambiguous=ambiguous)

if not isinstance(data, (np.ndarray, Index, ABCSeries)):
if not isinstance(data, (np.ndarray, Index, ABCSeries,
DatetimeArrayMixin)):
if is_scalar(data):
raise ValueError('DatetimeIndex() must be called with a '
'collection of some kind, %s was passed'
Expand Down Expand Up @@ -673,67 +674,12 @@ def to_series(self, keep_tz=False, index=None, name=None):

return Series(values, index=index, name=name)

@Appender(DatetimeArrayMixin.to_period.__doc__)
def to_period(self, freq=None):
"""
Cast to PeriodIndex at a particular frequency.

Converts DatetimeIndex to PeriodIndex.

Parameters
----------
freq : string or Offset, optional
One of pandas' :ref:`offset strings <timeseries.offset_aliases>`
or an Offset object. Will be inferred by default.

Returns
-------
PeriodIndex

Raises
------
ValueError
When converting a DatetimeIndex with non-regular values, so that a
frequency cannot be inferred.

Examples
--------
>>> df = pd.DataFrame({"y": [1,2,3]},
... index=pd.to_datetime(["2000-03-31 00:00:00",
... "2000-05-31 00:00:00",
... "2000-08-31 00:00:00"]))
>>> df.index.to_period("M")
PeriodIndex(['2000-03', '2000-05', '2000-08'],
dtype='period[M]', freq='M')

Infer the daily frequency

>>> idx = pd.date_range("2017-01-01", periods=2)
>>> idx.to_period()
PeriodIndex(['2017-01-01', '2017-01-02'],
dtype='period[D]', freq='D')

See also
--------
pandas.PeriodIndex: Immutable ndarray holding ordinal values
pandas.DatetimeIndex.to_pydatetime: Return DatetimeIndex as object
"""
from pandas.core.indexes.period import PeriodIndex

if self.tz is not None:
warnings.warn("Converting to PeriodIndex representation will "
"drop timezone information.", UserWarning)

if freq is None:
freq = self.freqstr or self.inferred_freq

if freq is None:
msg = ("You must pass a freq argument as "
"current index has none.")
raise ValueError(msg)

freq = get_period_alias(freq)

return PeriodIndex(self.values, name=self.name, freq=freq)
result = DatetimeArrayMixin.to_period(self, freq=freq)
return PeriodIndex(result, name=self.name)

def snap(self, freq='S'):
"""
Expand All @@ -758,6 +704,7 @@ def snap(self, freq='S'):

# we know it conforms; skip check
return DatetimeIndex(snapped, freq=freq, verify_integrity=False)
# TODO: what about self.name? if so, use shallow_copy?

def unique(self, level=None):
# Override here since IndexOpsMixin.unique uses self._values.unique
Expand All @@ -769,8 +716,7 @@ def unique(self, level=None):
else:
naive = self
result = super(DatetimeIndex, naive).unique(level=level)
return self._simple_new(result.values, name=self.name, tz=self.tz,
freq=self.freq)
return self._shallow_copy(result.values)

def union(self, other):
"""
Expand Down Expand Up @@ -1421,8 +1367,7 @@ def insert(self, loc, item):
try:
new_dates = np.concatenate((self[:loc].asi8, [item.view(np.int64)],
self[loc:].asi8))
return DatetimeIndex(new_dates, name=self.name, freq=freq,
tz=self.tz)
return self._shallow_copy(new_dates, freq=freq)
except (AttributeError, TypeError):

# fall back to object index
Expand Down Expand Up @@ -1458,7 +1403,7 @@ def delete(self, loc):
if (loc.start in (0, None) or loc.stop in (len(self), None)):
freq = self.freq

return DatetimeIndex(new_dates, name=self.name, freq=freq, tz=self.tz)
return self._shallow_copy(new_dates, freq=freq)

def indexer_at_time(self, time, asof=False):
"""
Expand Down
Loading