Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 25 additions & 0 deletions bigframes/core/blocks.py
Original file line number Diff line number Diff line change
Expand Up @@ -1996,6 +1996,31 @@ def _generate_resample_label(
Literal["epoch", "start", "start_day", "end", "end_day"],
] = "start_day",
) -> Block:
if not isinstance(rule, str):
raise NotImplementedError(
f"Only offset strings are currently supported for rule, but got {repr(rule)}. {constants.FEEDBACK_LINK}"
)

if rule in ("ME", "YE", "QE", "BME", "BA", "BQE", "W"):
raise NotImplementedError(
f"Offset strings 'ME', 'YE', 'QE', 'BME', 'BA', 'BQE', 'W' are not currently supported for rule, but got {repr(rule)}. {constants.FEEDBACK_LINK}"
)

if closed == "right":
raise NotImplementedError(
f"Only closed='left' is currently supported. {constants.FEEDBACK_LINK}",
)

if label == "right":
raise NotImplementedError(
f"Only label='left' is currently supported. {constants.FEEDBACK_LINK}",
)

if origin not in ("epoch", "start", "start_day"):
raise NotImplementedError(
f"Only origin='epoch', 'start', 'start_day' are currently supported, but got {repr(origin)}. {constants.FEEDBACK_LINK}"
)

# Validate and resolve the index or column to use for grouping
if on is None:
if len(self.index_columns) == 0:
Expand Down
62 changes: 5 additions & 57 deletions bigframes/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -4182,10 +4182,12 @@ def _split(
return [DataFrame(block) for block in blocks]

@validations.requires_ordering()
def _resample(
def resample(
self,
rule: str,
*,
closed: Optional[Literal["right", "left"]] = None,
label: Optional[Literal["right", "left"]] = None,
on: blocks.Label = None,
level: Optional[LevelsType] = None,
origin: Union[
Expand All @@ -4195,64 +4197,10 @@ def _resample(
Literal["epoch", "start", "start_day", "end", "end_day"],
] = "start_day",
) -> bigframes.core.groupby.DataFrameGroupBy:
"""Internal function to support resample. Resample time-series data.

**Examples:**

>>> import bigframes.pandas as bpd
>>> data = {
... "timestamp_col": pd.date_range(
... start="2021-01-01 13:00:00", periods=30, freq="1s"
... ),
... "int64_col": range(30),
... "int64_too": range(10, 40),
... }

Resample on a DataFrame with index:

>>> df = bpd.DataFrame(data).set_index("timestamp_col")
>>> df._resample(rule="7s").min()
int64_col int64_too
2021-01-01 12:59:55 0 10
2021-01-01 13:00:02 2 12
2021-01-01 13:00:09 9 19
2021-01-01 13:00:16 16 26
2021-01-01 13:00:23 23 33
<BLANKLINE>
[5 rows x 2 columns]

Resample with column and origin set to 'start':

>>> df = bpd.DataFrame(data)
>>> df._resample(rule="7s", on = "timestamp_col", origin="start").min()
int64_col int64_too
2021-01-01 13:00:00 0 10
2021-01-01 13:00:07 7 17
2021-01-01 13:00:14 14 24
2021-01-01 13:00:21 21 31
2021-01-01 13:00:28 28 38
<BLANKLINE>
[5 rows x 2 columns]

Args:
rule (str):
The offset string representing target conversion.
on (str, default None):
For a DataFrame, column to use instead of index for resampling. Column
must be datetime-like.
level (str or int, default None):
For a MultiIndex, level (name or number) to use for resampling.
level must be datetime-like.
origin(str, default 'start_day'):
The timestamp on which to adjust the grouping. Must be one of the following:
'epoch': origin is 1970-01-01
'start': origin is the first value of the timeseries
'start_day': origin is the first day at midnight of the timeseries
Returns:
DataFrameGroupBy: DataFrameGroupBy object.
"""
block = self._block._generate_resample_label(
rule=rule,
closed=closed,
label=label,
on=on,
level=level,
origin=origin,
Expand Down
39 changes: 1 addition & 38 deletions bigframes/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -2505,7 +2505,7 @@ def explode(self, *, ignore_index: Optional[bool] = False) -> Series:
)

@validations.requires_ordering()
def _resample(
def resample(
self,
rule: str,
*,
Expand All @@ -2519,43 +2519,6 @@ def _resample(
Literal["epoch", "start", "start_day", "end", "end_day"],
] = "start_day",
) -> bigframes.core.groupby.SeriesGroupBy:
"""Internal function to support resample. Resample time-series data.
**Examples:**
>>> import bigframes.pandas as bpd
>>> data = {
... "timestamp_col": pd.date_range(
... start="2021-01-01 13:00:00", periods=30, freq="1s"
... ),
... "int64_col": range(30),
... }
>>> s = bpd.DataFrame(data).set_index("timestamp_col")
>>> s._resample(rule="7s", origin="epoch").min()
int64_col
2021-01-01 12:59:56 0
2021-01-01 13:00:03 3
2021-01-01 13:00:10 10
2021-01-01 13:00:17 17
2021-01-01 13:00:24 24
<BLANKLINE>
[5 rows x 1 columns]
Args:
rule (str):
The offset string representing target conversion.
level (str or int, default None):
For a MultiIndex, level (name or number) to use for resampling.
level must be datetime-like.
origin(str, default 'start_day'):
The timestamp on which to adjust the grouping. Must be one of the following:
'epoch': origin is 1970-01-01
'start': origin is the first value of the timeseries
'start_day': origin is the first day at midnight of the timeseries
Returns:
SeriesGroupBy: SeriesGroupBy object.
"""
block = self._block._generate_resample_label(
rule=rule,
closed=closed,
Expand Down
58 changes: 38 additions & 20 deletions tests/system/small/test_dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -5915,21 +5915,15 @@ def test_dataframe_explode_xfail(col_names):
pytest.param("datetime_col", "5M", "epoch"),
pytest.param("datetime_col", "3Q", "start_day"),
pytest.param("datetime_col", "3YE", "start"),
pytest.param(
"int64_col", "100D", "start", marks=pytest.mark.xfail(raises=TypeError)
),
pytest.param(
"datetime_col", "100D", "end", marks=pytest.mark.xfail(raises=ValueError)
),
],
)
def test__resample_with_column(
def test_resample_with_column(
scalars_df_index, scalars_pandas_df_index, on, rule, origin
):
# TODO: supply a reason why this isn't compatible with pandas 1.x
pytest.importorskip("pandas", minversion="2.0.0")
bf_result = (
scalars_df_index._resample(rule=rule, on=on, origin=origin)[
scalars_df_index.resample(rule=rule, on=on, origin=origin)[
["int64_col", "int64_too"]
]
.max()
Expand All @@ -5943,30 +5937,54 @@ def test__resample_with_column(
)


@pytest.mark.parametrize("index_col", ["timestamp_col", "datetime_col"])
@pytest.mark.parametrize(
("index_append", "level"),
[(True, 1), (False, None), (False, 0)],
)
@pytest.mark.parametrize(
("append", "level", "col", "rule"),
"rule",
[
pytest.param(False, None, "timestamp_col", "100d"),
pytest.param(True, 1, "timestamp_col", "1200h"),
pytest.param(False, None, "datetime_col", "100d"),
# TODO(tswast): support timedeltas and dataoffsets.
# TODO(tswast): support bins that default to "right".
"100d",
"1200h",
],
)
def test__resample_with_index(
scalars_df_index, scalars_pandas_df_index, append, level, col, rule
# TODO(tswast): support "right"
@pytest.mark.parametrize("closed", ["left", None])
# TODO(tswast): support "right"
@pytest.mark.parametrize("label", ["left", None])
@pytest.mark.parametrize(
"origin",
["epoch", "start", "start_day"], # TODO(tswast): support end, end_day.
)
def test_resample_with_index(
scalars_df_index,
scalars_pandas_df_index,
index_append,
level,
index_col,
rule,
closed,
origin,
label,
):
# TODO: supply a reason why this isn't compatible with pandas 1.x
pytest.importorskip("pandas", minversion="2.0.0")
scalars_df_index = scalars_df_index.set_index(col, append=append)
scalars_pandas_df_index = scalars_pandas_df_index.set_index(col, append=append)
scalars_df_index = scalars_df_index.set_index(index_col, append=index_append)
scalars_pandas_df_index = scalars_pandas_df_index.set_index(
index_col, append=index_append
)
bf_result = (
scalars_df_index[["int64_col", "int64_too"]]
._resample(rule=rule, level=level)
.resample(rule=rule, level=level, closed=closed, origin=origin, label=label)
.min()
.to_pandas()
)
pd_result = (
scalars_pandas_df_index[["int64_col", "int64_too"]]
.resample(rule=rule, level=level)
.resample(rule=rule, level=level, closed=closed, origin=origin, label=label)
.min()
)
assert_pandas_df_equal(bf_result, pd_result)
Expand Down Expand Up @@ -6010,15 +6028,15 @@ def test__resample_with_index(
),
],
)
def test__resample_start_time(rule, origin, data):
def test_resample_start_time(rule, origin, data):
# TODO: supply a reason why this isn't compatible with pandas 1.x
pytest.importorskip("pandas", minversion="2.0.0")
col = "timestamp_col"
scalars_df_index = bpd.DataFrame(data).set_index(col)
scalars_pandas_df_index = pd.DataFrame(data).set_index(col)
scalars_pandas_df_index.index.name = None

bf_result = scalars_df_index._resample(rule=rule, origin=origin).min().to_pandas()
bf_result = scalars_df_index.resample(rule=rule, origin=origin).min().to_pandas()

pd_result = scalars_pandas_df_index.resample(rule=rule, origin=origin).min()

Expand Down
4 changes: 2 additions & 2 deletions tests/system/small/test_series.py
Original file line number Diff line number Diff line change
Expand Up @@ -4856,14 +4856,14 @@ def test_series_explode_null(data):
pytest.param(True, "timestamp_col", "timestamp_col", "1YE"),
],
)
def test__resample(scalars_df_index, scalars_pandas_df_index, append, level, col, rule):
def test_resample(scalars_df_index, scalars_pandas_df_index, append, level, col, rule):
# TODO: supply a reason why this isn't compatible with pandas 1.x
pytest.importorskip("pandas", minversion="2.0.0")
scalars_df_index = scalars_df_index.set_index(col, append=append)["int64_col"]
scalars_pandas_df_index = scalars_pandas_df_index.set_index(col, append=append)[
"int64_col"
]
bf_result = scalars_df_index._resample(rule=rule, level=level).min().to_pandas()
bf_result = scalars_df_index.resample(rule=rule, level=level).min().to_pandas()
pd_result = scalars_pandas_df_index.resample(rule=rule, level=level).min()
pd.testing.assert_series_equal(bf_result, pd_result)

Expand Down
12 changes: 8 additions & 4 deletions tests/system/small/test_unordered.py
Original file line number Diff line number Diff line change
Expand Up @@ -248,20 +248,24 @@ def test_unordered_mode_no_ambiguity_warning(unordered_session):
),
],
)
def test__resample_with_index(unordered_session, rule, origin, data):
def test_resample_with_index(unordered_session, rule, origin, data):
# TODO: supply a reason why this isn't compatible with pandas 1.x
pytest.importorskip("pandas", minversion="2.0.0")
col = "timestamp_col"
scalars_df_index = bpd.DataFrame(data, session=unordered_session).set_index(col)
scalars_pandas_df_index = pd.DataFrame(data).set_index(col)
scalars_pandas_df_index.index.name = None

bf_result = scalars_df_index._resample(rule=rule, origin=origin).min().to_pandas()

bf_result = scalars_df_index.resample(rule=rule, origin=origin).min()
pd_result = scalars_pandas_df_index.resample(rule=rule, origin=origin).min()

assert isinstance(bf_result.index, bpd.DatetimeIndex)
assert isinstance(pd_result.index, pd.DatetimeIndex)
pd.testing.assert_frame_equal(
bf_result, pd_result, check_dtype=False, check_index_type=False
bf_result.to_pandas(),
pd_result,
check_index_type=False,
check_dtype=False,
)


Expand Down
62 changes: 62 additions & 0 deletions tests/unit/test_dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,68 @@ def test_dataframe_repr_with_uninitialized_object():
assert "DataFrame" in got


@pytest.mark.parametrize(
"rule",
[
pd.DateOffset(weeks=1),
pd.Timedelta(hours=8),
# According to
# https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.resample.html
# these all default to "right" for closed and label, which isn't yet supported.
"ME",
"YE",
"QE",
"BME",
"BA",
"BQE",
"W",
],
)
def test_dataframe_rule_not_implememented(
monkeypatch: pytest.MonkeyPatch,
rule,
):
dataframe = mocks.create_dataframe(monkeypatch)

with pytest.raises(NotImplementedError, match="rule"):
dataframe.resample(rule=rule)


def test_dataframe_closed_not_implememented(
monkeypatch: pytest.MonkeyPatch,
):
dataframe = mocks.create_dataframe(monkeypatch)

with pytest.raises(NotImplementedError, match="Only closed='left'"):
dataframe.resample(rule="1d", closed="right")


def test_dataframe_label_not_implememented(
monkeypatch: pytest.MonkeyPatch,
):
dataframe = mocks.create_dataframe(monkeypatch)

with pytest.raises(NotImplementedError, match="Only label='left'"):
dataframe.resample(rule="1d", label="right")


@pytest.mark.parametrize(
"origin",
[
"end",
"end_day",
],
)
def test_dataframe_origin_not_implememented(
monkeypatch: pytest.MonkeyPatch,
origin,
):
dataframe = mocks.create_dataframe(monkeypatch)

with pytest.raises(NotImplementedError, match="origin"):
dataframe.resample(rule="1d", origin=origin)


def test_dataframe_setattr_with_uninitialized_object():
"""Ensures DataFrame can be subclassed without trying to set attributes as columns."""
# Avoid calling __init__ since it might be called later in a subclass.
Expand Down
Loading