Skip to content

Commit

Permalink
Post, new method, and rolling deprecation (#43)
Browse files Browse the repository at this point in the history
* script

* finalize image

* Add wrapper for the series

* address rolling deprecation

* up the version

* run ruff
  • Loading branch information
wd60622 authored Jan 31, 2024
1 parent 9bfddd2 commit 67deeec
Show file tree
Hide file tree
Showing 5 changed files with 132 additions and 4 deletions.
52 changes: 52 additions & 0 deletions latent_calendar/extensions.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,58 @@ class SeriesAccessor:
def __init__(self, pandas_obj: pd.Series):
self._obj = pandas_obj

def aggregate_events(
self,
minutes: int = 60,
as_multiindex: bool = True,
) -> pd.Series:
"""Transform event level Series to row of wide format.
Args:
minutes: The number of minutes to discretize by.
as_multiindex: whether to use MultiIndex columns
Returns:
Series that would be row of wide format
Examples:
Discretize datetime Series to 30 minutes
```python
import pandas as pd
import matplotlib.pyplot as plt
from latent_calendar.datasets import load_chicago_bikes
df_trips = load_chicago_bikes()
start_times = df_trips["started_at"]
agg_start_times = start_times.cal.aggregate_events(minutes=30)
agg_start_times.cal.plot_row()
plt.show()
```
"""
name = self._obj.name or "timestamp"
return (
self._obj.rename(name)
.to_frame()
.assign(tmp=1)
.cal.aggregate_events(
by="tmp",
timestamp_col=name,
minutes=minutes,
as_multiindex=as_multiindex,
)
.iloc[0]
.rename(name)
)

def timestamp_features(
self, discretize: bool = True, minutes: int = 60, create_vocab: bool = True
) -> pd.DataFrame:
Expand Down
5 changes: 3 additions & 2 deletions latent_calendar/segments/convolution.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ def sum_next_hours(df: pd.DataFrame, hours: int) -> pd.DataFrame:
00 00 column would be 06 06 23
TODO: Consider if negative hours should be allowed
TODO: Handle when minutes are not 60
Arguments:
df: DataFrame of probabilities or counts in wide format
Expand All @@ -35,9 +36,9 @@ def sum_next_hours(df: pd.DataFrame, hours: int) -> pd.DataFrame:
return (
pd.concat([df, df.iloc[:, :hours]], axis=1)
.pipe(_reverse_columns)
.rolling(hours + 1, axis=1)
.T.rolling(hours + 1)
.sum()
.iloc[:, hours:]
.T.iloc[:, hours:]
.pipe(_reverse_columns)
)

Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "latent-calendar"
version = "1.2.0"
version = "1.3.0"
description = "Analyzing and modeling weekly calendar distributions using latent components"
authors = ["Will Dean <wd60622@gmail.com>"]
readme = "README.md"
Expand Down
36 changes: 36 additions & 0 deletions scripts/plot-post.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
import pandas as pd

import matplotlib.pyplot as plt

import latent_calendar


if __name__ == "__main__":
# More information on the dataset:
# https://posit-dev.github.io/great-tables/reference/data.pizzaplace.html#great_tables.data.pizzaplace
file = "https://raw.githubusercontent.com/posit-dev/great-tables/main/great_tables/data/05-pizzaplace.csv"

df = pd.read_csv(file)

# Create a datetime column
datetime_column = pd.to_datetime(df["date"].str.cat(df["time"], sep=" "))


fig, axes = plt.subplots(nrows=1, ncols=2, sharey=True)
fig.suptitle("Plotting datetime column from posit-dev/great-tables pizza place dataset")

ax = axes[0]
datetime_column.sample(n=100, random_state=0).cal.plot(ax=ax)
ax.set_title("Continuous Series cal.plot()")

ax = axes[1]
(
datetime_column
.cal.aggregate_events(minutes=30)
.cal.plot_row(ax=ax)
)
ax.set_title("Discretized Series cal.plot_row()")

plt.show()


41 changes: 40 additions & 1 deletion tests/test_extensions.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,13 @@
import matplotlib.pyplot as plt

import latent_calendar # noqa
from latent_calendar.const import TIME_SLOTS, FULL_VOCAB, DAYS_IN_WEEK, HOURS_IN_DAY
from latent_calendar.const import (
TIME_SLOTS,
FULL_VOCAB,
DAYS_IN_WEEK,
HOURS_IN_DAY,
create_full_vocab,
)


@pytest.fixture
Expand Down Expand Up @@ -270,3 +276,36 @@ def test_dataframe_conditional_probabilities(
answer, index=df_wide_subset.index, columns=df_wide_subset.columns
)
pd.testing.assert_frame_equal(result, expected)


def test_series_aggregate_events() -> None:
name = "events"
ser = pd.Series(
pd.to_datetime(
[
# 2021-01-01 is a Friday
"2021-01-01 00:00:00",
"2021-01-01 00:30:00",
"2021-01-01 01:00:00",
"2021-01-01 01:30:00",
"2021-01-01 01:45:00",
"2021-01-01 01:50:00",
]
),
name=name,
)

result = ser.cal.aggregate_events(minutes=30)

expected = pd.Series(
0,
index=create_full_vocab(days_in_week=7, minutes=30),
name=name,
dtype=result.dtype,
)
expected.loc[(4, 0)] = 1
expected.loc[(4, 0.5)] = 1
expected.loc[(4, 1)] = 1
expected.loc[(4, 1.5)] = 3

pd.testing.assert_series_equal(result, expected)

0 comments on commit 67deeec

Please sign in to comment.