Skip to content

Commit 1adc68b

Browse files
committed
BUG: fix intraday resampling to daily superperiod bugs raised in #1458 and #1471
1 parent b31610d commit 1adc68b

File tree

4 files changed

+70
-11
lines changed

4 files changed

+70
-11
lines changed

pandas/tseries/frequencies.py

Lines changed: 16 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -902,6 +902,12 @@ def is_subperiod(source, target):
902902
-------
903903
is_subperiod : boolean
904904
"""
905+
if isinstance(source, offsets.DateOffset):
906+
source = source.rule_code
907+
908+
if isinstance(target, offsets.DateOffset):
909+
target = target.rule_code
910+
905911
target = target.upper()
906912
source = source.upper()
907913
if _is_annual(target):
@@ -933,6 +939,12 @@ def is_superperiod(source, target):
933939
-------
934940
is_superperiod : boolean
935941
"""
942+
if isinstance(source, offsets.DateOffset):
943+
source = source.rule_code
944+
945+
if isinstance(target, offsets.DateOffset):
946+
target = target.rule_code
947+
936948
target = target.upper()
937949
source = source.upper()
938950
if _is_annual(source):
@@ -974,11 +986,13 @@ def _quarter_months_conform(source, target):
974986
return snum % 3 == tnum % 3
975987

976988
def _is_quarterly(rule):
977-
return rule.upper().startswith('Q-')
989+
rule = rule.upper()
990+
return rule == 'Q' or rule.startswith('Q-')
978991

979992

980993
def _is_weekly(rule):
981-
return rule.upper().startswith('W-')
994+
rule = rule.upper()
995+
return rule == 'W' or rule.startswith('W-')
982996

983997

984998
DAYS = ['MON', 'TUE', 'WED', 'THU', 'FRI', 'SAT', 'SUN']

pandas/tseries/resample.py

Lines changed: 18 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
from pandas.core.groupby import BinGrouper, CustomGrouper
66
from pandas.tseries.frequencies import to_offset, is_subperiod, is_superperiod
77
from pandas.tseries.index import DatetimeIndex, date_range
8-
from pandas.tseries.offsets import DateOffset
8+
from pandas.tseries.offsets import DateOffset, Tick, _delta_to_nanoseconds
99
from pandas.tseries.period import Period, PeriodIndex, period_range
1010
from pandas.util.decorators import cache_readonly
1111
import pandas.core.common as com
@@ -105,8 +105,24 @@ def _get_time_bins(self, axis):
105105
binner = binner[:-1]
106106
trimmed = True
107107

108+
ax_values = axis.asi8
109+
bin_edges = binner.asi8
110+
111+
# Some hacks for > daily data, see #1471, #1458
112+
if self.freq != 'D' and is_superperiod(self.freq, 'D'):
113+
day_nanos = _delta_to_nanoseconds(timedelta(1))
114+
if self.closed == 'right':
115+
bin_edges = bin_edges + day_nanos - 1
116+
else:
117+
bin_edges = bin_edges + day_nanos
118+
119+
# intraday values on last day
120+
if bin_edges[-2] > ax_values[-1]:
121+
bin_edges = bin_edges[:-1]
122+
binner = binner[:-1]
123+
108124
# general version, knowing nothing about relative frequencies
109-
bins = lib.generate_bins_dt64(axis.asi8, binner.asi8, self.closed)
125+
bins = lib.generate_bins_dt64(ax_values, bin_edges, self.closed)
110126

111127
if self.closed == 'right':
112128
labels = binner
@@ -236,7 +252,6 @@ def _take_new_index(obj, indexer, new_index, axis=0):
236252

237253
def _get_range_edges(axis, begin, end, offset, closed='left',
238254
base=0):
239-
from pandas.tseries.offsets import Tick, _delta_to_nanoseconds
240255
if isinstance(offset, basestring):
241256
offset = to_offset(offset)
242257

pandas/tseries/tests/test_resample.py

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
from pandas.tseries.period import period_range, PeriodIndex
1010
from pandas.tseries.resample import DatetimeIndex, TimeGrouper
1111
import pandas.tseries.offsets as offsets
12+
import pandas as pd
1213

1314
import unittest
1415
import nose
@@ -429,6 +430,41 @@ def test_weekly_resample_buglet(self):
429430
expected = ts.resample('W-SUN')
430431
assert_series_equal(resampled, expected)
431432

433+
def test_monthly_resample_error(self):
434+
# #1451
435+
dates = date_range('4/16/2012 20:00', periods=5000, freq='h')
436+
ts = Series(np.random.randn(len(dates)), index=dates)
437+
# it works!
438+
result = ts.resample('M')
439+
440+
def test_resample_anchored_intraday(self):
441+
# #1471, #1458
442+
443+
rng = pd.date_range('1/1/2012', '4/1/2012', freq='10min')
444+
df = DataFrame(rng.month, index=rng)
445+
446+
result = df.resample('M')
447+
expected = df.resample('M', kind='period').to_timestamp()
448+
tm.assert_frame_equal(result, expected)
449+
450+
result = df.resample('M', closed='left')
451+
expected = df.resample('M', kind='period', closed='left').to_timestamp()
452+
tm.assert_frame_equal(result, expected)
453+
454+
rng = pd.date_range('1/1/2012', '4/1/2013', freq='10min')
455+
df = DataFrame(rng.month, index=rng)
456+
457+
result = df.resample('Q')
458+
expected = df.resample('Q', kind='period').to_timestamp()
459+
tm.assert_frame_equal(result, expected)
460+
461+
result = df.resample('Q', closed='left')
462+
expected = df.resample('Q', kind='period', closed='left').to_timestamp()
463+
tm.assert_frame_equal(result, expected)
464+
465+
466+
rng = pd.date_range('1/1/2012', '4/1/2015', freq='10min')
467+
df = DataFrame(rng.month, index=rng)
432468

433469
def _simple_ts(start, end, freq='D'):
434470
rng = date_range(start, end, freq=freq)

pandas/tseries/tests/test_timeseries.py

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -927,12 +927,6 @@ def test_groupby_count_dateparseerror(self):
927927

928928
assert_series_equal(result, expected)
929929

930-
def test_monthly_resample_error(self):
931-
# #1451
932-
dates = date_range('4/16/2012 20:00', periods=5000, freq='h')
933-
ts = Series(randn(len(dates)), index=dates)
934-
# it works!
935-
result = ts.resample('M')
936930

937931
def _simple_ts(start, end, freq='D'):
938932
rng = date_range(start, end, freq=freq)

0 commit comments

Comments
 (0)