Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Remove TSDataset.loc, TSDataset.columns #631

Merged
merged 10 commits into from
Mar 18, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- **Breaking:** Remove argument `verbose` from `SimpleImputerSubsegment` ([#599](https://github.com/etna-team/etna/pull/599))
- **Breaking:** Remove attributes `df`, `raw_df`, `df_exog` from `TSDataset` ([#630](https://github.com/etna-team/etna/pull/630))
- **Breaking:** Remove attributes `known_future`, `freq`, `current_df_level` and `current_df_exog_level` from `TSDataset`, and make them properties ([#630](https://github.com/etna-team/etna/pull/630))
- **Breaking:** Remove properties `columns` and `loc` from `TSDataset` ([#630](https://github.com/etna-team/etna/pull/631))

## [2.10.0] - 2025-01-09
### Added
Expand Down
3 changes: 2 additions & 1 deletion etna/analysis/forecast/plots.py
Original file line number Diff line number Diff line change
Expand Up @@ -951,7 +951,6 @@

column_names = set(forecast_ts.features)
components = list(match_target_components(column_names))
skip_first_component = test_ts is None

if len(components) == 0:
raise ValueError("No components were detected in the provided `forecast_ts`.")
Expand All @@ -969,6 +968,8 @@

i = 0
for segment in segments:
skip_first_component = test_ts is None

Check warning on line 971 in etna/analysis/forecast/plots.py

View check run for this annotation

Codecov / codecov/patch

etna/analysis/forecast/plots.py#L971

Added line #L971 was not covered by tests

if test_ts is not None:
segment_test_df = test_ts[:, segment, :][segment]
else:
Expand Down
22 changes: 0 additions & 22 deletions etna/datasets/tsdataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -1600,28 +1600,6 @@ def drop_prediction_intervals(self):
self._df.drop(columns=list(self.prediction_intervals_names), level="feature", inplace=True)
self._prediction_intervals_names = tuple()

@property
def columns(self) -> pd.core.indexes.multi.MultiIndex:
"""Return columns of ``self.df``.

Returns
-------
pd.core.indexes.multi.MultiIndex
multiindex of dataframe with target and features.
"""
return self._df.columns

@property
def loc(self) -> pd.core.indexing._LocIndexer:
"""Return self.df.loc method.

Returns
-------
pd.core.indexing._LocIndexer
dataframe with self.df.loc[...]
"""
return self._df.loc

def isnull(self) -> pd.DataFrame:
"""Return dataframe with flag that means if the correspondent element in wide representation of data is null.

Expand Down
13 changes: 7 additions & 6 deletions etna/ensembles/stacking_ensemble.py
Original file line number Diff line number Diff line change
Expand Up @@ -222,12 +222,13 @@ def _make_features(
else:
return x, None

def _process_forecasts(self, ts: TSDataset, forecasts: List[pd.DataFrame]) -> TSDataset:
ts = self._make_same_level(ts=ts, forecasts=forecasts)
def _process_forecasts(self, ts: TSDataset, forecasts: List[TSDataset]) -> TSDataset:
forecasts_df: List[pd.DataFrame] = [forecast._df for forecast in forecasts]
ts = self._make_same_level(ts=ts, forecasts=forecasts_df)

x, _ = self._make_features(ts=ts, forecasts=forecasts, train=False)
x, _ = self._make_features(ts=ts, forecasts=forecasts_df, train=False)
y = self.final_model.predict(x)
num_segments = len(forecasts[0].segments)
num_segments = len(forecasts_df[0].columns.get_level_values("segment").unique())
y = y.reshape(num_segments, -1).T
num_timestamps = y.shape[0]

Expand All @@ -237,11 +238,11 @@ def _process_forecasts(self, ts: TSDataset, forecasts: List[pd.DataFrame]) -> TS
x.loc[:, "timestamp"] = x.index.values
df_exog = TSDataset.to_dataset(x)

df = forecasts[0][:, :, "target"].copy()
df = forecasts_df[0].loc[pd.IndexSlice[:], pd.IndexSlice[:, "target"]].copy()
df.loc[pd.IndexSlice[:], pd.IndexSlice[:, "target"]] = np.NAN

result = TSDataset(df=df, freq=ts.freq, df_exog=df_exog, hierarchical_structure=ts.hierarchical_structure)
result.loc[pd.IndexSlice[:], pd.IndexSlice[:, "target"]] = y
result._df.loc[pd.IndexSlice[:], pd.IndexSlice[:, "target"]] = y
return result

def _forecast(self, ts: TSDataset, return_components: bool) -> TSDataset:
Expand Down
15 changes: 5 additions & 10 deletions etna/metrics/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -214,7 +214,7 @@ def _validate_segments(y_true: TSDataset, y_pred: TSDataset):

@staticmethod
def _validate_target_columns(y_true: TSDataset, y_pred: TSDataset):
"""Check that all the segments from ``y_true`` and ``y_pred`` has 'target' column.
"""Check that ``y_true`` and ``y_pred`` has 'target' feature.

Parameters
----------
Expand All @@ -226,16 +226,11 @@ def _validate_target_columns(y_true: TSDataset, y_pred: TSDataset):
Raises
------
ValueError:
if one of segments in y_true or y_pred doesn't contain 'target' column.
if y_true or y_pred doesn't contain 'target' feature.
"""
segments = set(y_true.segments)

for segment in segments:
for name, dataset in zip(("y_true", "y_pred"), (y_true, y_pred)):
if (segment, "target") not in dataset.columns:
raise ValueError(
f"All the segments in {name} should contain 'target' column. Segment {segment} doesn't."
)
for name, dataset in zip(("y_true", "y_pred"), (y_true, y_pred)):
if "target" not in dataset.features:
raise ValueError(f"{name} should contain 'target' feature.")

@staticmethod
def _validate_index(y_true: TSDataset, y_pred: TSDataset):
Expand Down
4 changes: 2 additions & 2 deletions etna/metrics/intervals_metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ def _validate_tsdataset_intervals(
raise ValueError("Provided intervals borders names must be in dataset!")

else:
missing_per_segment = ts.loc[:, pd.IndexSlice[:, list(borders_set)]].isna().any()
missing_per_segment = ts._df.loc[:, pd.IndexSlice[:, list(borders_set)]].isna().any()
if missing_per_segment.any():
raise ValueError(
"Provided intervals borders contain missing values! "
Expand All @@ -49,7 +49,7 @@ def _validate_tsdataset_intervals(
raise ValueError("All quantiles must be presented in the dataset!")

else:
missing_per_segment = ts.loc[:, pd.IndexSlice[:, list(quantiles_set)]].isna().any()
missing_per_segment = ts._df.loc[:, pd.IndexSlice[:, list(quantiles_set)]].isna().any()
if missing_per_segment.any():
raise ValueError(
"Quantiles contain missing values! "
Expand Down
2 changes: 1 addition & 1 deletion etna/models/mixins.py
Original file line number Diff line number Diff line change
Expand Up @@ -586,7 +586,7 @@ def _make_predictions(self, ts: TSDataset, prediction_method: Callable, **kwargs
x = ts.to_pandas(flatten=True).drop(["segment"], axis=1)
# TODO: make it work with prediction intervals and context
y = prediction_method(self=self._base_model, df=x, **kwargs).reshape(-1, horizon).T
ts.loc[:, pd.IndexSlice[:, "target"]] = y
ts._df.loc[:, pd.IndexSlice[:, "target"]] = y
return ts

def _make_component_predictions(self, ts: TSDataset, prediction_method: Callable, **kwargs) -> pd.DataFrame:
Expand Down
46 changes: 23 additions & 23 deletions examples/301-custom_transform_and_model.ipynb

Large diffs are not rendered by default.

1,387 changes: 987 additions & 400 deletions examples/306-prediction_intervals.ipynb

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -444,7 +444,7 @@ def const_ts_anomal() -> TSDataset:
@pytest.fixture
def ts_diff_endings(example_reg_tsds):
ts = deepcopy(example_reg_tsds)
ts.loc[ts.timestamps[-5] :, pd.IndexSlice["segment_1", "target"]] = np.NAN
ts._df.loc[ts.timestamps[-5] :, pd.IndexSlice["segment_1", "target"]] = np.NAN
return ts


Expand Down
24 changes: 12 additions & 12 deletions tests/test_datasets/test_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -558,7 +558,7 @@ def test_create_segment_conversion_during_init(df_segments_int):
with pytest.warns(UserWarning, match="Segment values doesn't have string type"):
ts = TSDataset(df=df_wide, df_exog=df_exog_wide, freq="D")

assert np.all(ts.columns.get_level_values("segment") == ["1", "1", "2", "2"])
assert np.all(ts._df.columns.get_level_values("segment") == ["1", "1", "2", "2"])


def test_create_from_long_format_with_exog():
Expand Down Expand Up @@ -745,9 +745,9 @@ def test_train_test_split(ts_name, borders, true_borders, request):
)
assert isinstance(train, TSDataset)
assert isinstance(test, TSDataset)
pd.testing.assert_frame_equal(train._df, ts.loc[train_start_true:train_end_true])
pd.testing.assert_frame_equal(train._df, ts._df.loc[train_start_true:train_end_true])
pd.testing.assert_frame_equal(train._df_exog, ts._df_exog)
pd.testing.assert_frame_equal(test._df, ts.loc[test_start_true:test_end_true])
pd.testing.assert_frame_equal(test._df, ts._df.loc[test_start_true:test_end_true])
pd.testing.assert_frame_equal(test._df_exog, ts._df_exog)


Expand All @@ -770,9 +770,9 @@ def test_train_test_split_with_test_size(ts_name, test_size, true_borders, reque
train, test = ts.train_test_split(test_size=test_size)
assert isinstance(train, TSDataset)
assert isinstance(test, TSDataset)
pd.testing.assert_frame_equal(train._df, ts.loc[train_start_true:train_end_true])
pd.testing.assert_frame_equal(train._df, ts._df.loc[train_start_true:train_end_true])
pd.testing.assert_frame_equal(train._df_exog, ts._df_exog)
pd.testing.assert_frame_equal(test._df, ts.loc[test_start_true:test_end_true])
pd.testing.assert_frame_equal(test._df, ts._df.loc[test_start_true:test_end_true])
pd.testing.assert_frame_equal(test._df_exog, ts._df_exog)


Expand Down Expand Up @@ -853,9 +853,9 @@ def test_train_test_split_both(ts_name, test_size, borders, true_borders, reques
)
assert isinstance(train, TSDataset)
assert isinstance(test, TSDataset)
pd.testing.assert_frame_equal(train._df, ts.loc[train_start_true:train_end_true])
pd.testing.assert_frame_equal(train._df, ts._df.loc[train_start_true:train_end_true])
pd.testing.assert_frame_equal(train._df_exog, ts._df_exog)
pd.testing.assert_frame_equal(test._df, ts.loc[test_start_true:test_end_true])
pd.testing.assert_frame_equal(test._df, ts._df.loc[test_start_true:test_end_true])
pd.testing.assert_frame_equal(test._df_exog, ts._df_exog)


Expand Down Expand Up @@ -1102,7 +1102,7 @@ def test_make_future_datetime_timestamp():
ts = TSDataset(TSDataset.to_dataset(df), freq="D")
ts_future = ts.make_future(10)
assert np.all(ts_future.timestamps == pd.date_range(ts.timestamps.max() + pd.Timedelta("1D"), periods=10, freq="D"))
assert set(ts_future.columns.get_level_values("feature")) == {"target"}
assert set(ts_future.features) == {"target"}


def test_make_future_int_timestamp():
Expand All @@ -1111,21 +1111,21 @@ def test_make_future_int_timestamp():
ts = TSDataset(TSDataset.to_dataset(df), freq=freq)
ts_future = ts.make_future(10)
assert np.all(ts_future.timestamps == np.arange(ts.timestamps.max() + 1, ts.timestamps.max() + 10 + 1))
assert set(ts_future.columns.get_level_values("feature")) == {"target"}
assert set(ts_future.features) == {"target"}


def test_make_future_with_exog_datetime_timestamp(tsdf_with_exog):
ts = tsdf_with_exog
ts_future = ts.make_future(10)
assert np.all(ts_future.timestamps == pd.date_range(ts.timestamps.max() + pd.Timedelta("1D"), periods=10, freq="D"))
assert set(ts_future.columns.get_level_values("feature")) == {"target", "exog"}
assert set(ts_future.features) == {"target", "exog"}


def test_make_future_with_exog_int_timestamp(tsdf_int_with_exog):
ts = tsdf_int_with_exog
ts_future = ts.make_future(10)
assert np.all(ts_future.timestamps == np.arange(ts.timestamps.max() + 1, ts.timestamps.max() + 10 + 1))
assert set(ts_future.columns.get_level_values("feature")) == {"target", "exog"}
assert set(ts_future.features) == {"target", "exog"}


def test_make_future_small_horizon():
Expand All @@ -1147,7 +1147,7 @@ def test_make_future_with_regressors(df_and_regressors):
ts = TSDataset(df=df, df_exog=df_exog, freq="D", known_future=known_future)
ts_future = ts.make_future(10)
assert np.all(ts_future.timestamps == pd.date_range(ts.timestamps.max() + pd.Timedelta("1D"), periods=10, freq="D"))
assert set(ts_future.columns.get_level_values("feature")) == {"target", "regressor_1", "regressor_2"}
assert set(ts_future.features) == {"target", "regressor_1", "regressor_2"}


@pytest.mark.parametrize("tail_steps", [11, 0])
Expand Down
4 changes: 2 additions & 2 deletions tests/test_datasets/test_hierarchical_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -281,7 +281,7 @@ def test_make_future_df_same_level_df_exog(
df, df_exog = market_level_df, market_level_df_exog
ts = TSDataset(df=df, freq="D", df_exog=df_exog, hierarchical_structure=hierarchical_structure)
future = ts.make_future(future_steps=4)
future_columns = set(future.columns.get_level_values("feature"))
future_columns = set(future.features)
assert future_columns == expected_columns


Expand All @@ -291,7 +291,7 @@ def test_make_future_df_different_level_df_exog(
df, df_exog = product_level_df, market_level_df_exog
ts = TSDataset(df=df, freq="D", df_exog=df_exog, hierarchical_structure=hierarchical_structure)
future = ts.make_future(future_steps=4)
future_columns = set(future.columns.get_level_values("feature"))
future_columns = set(future.features)
assert future_columns == expected_columns


Expand Down
4 changes: 2 additions & 2 deletions tests/test_ensembles/test_stacking_ensemble.py
Original file line number Diff line number Diff line change
Expand Up @@ -211,7 +211,7 @@ def test_forecast_interface(
pipelines=[naive_featured_pipeline_1, naive_featured_pipeline_2], features_to_use=features_to_use
).fit(example_tsds)
forecast = ensemble.forecast()
features = set(forecast.columns.get_level_values("feature")) - {"target"}
features = set(forecast.features) - {"target"}
assert isinstance(forecast, TSDataset)
assert forecast.size()[0] == HORIZON
assert features == expected_features
Expand Down Expand Up @@ -262,7 +262,7 @@ def test_predict_interface(
start_timestamp=example_tsds.timestamps[start_idx],
end_timestamp=example_tsds.timestamps[end_idx],
)
features = set(prediction.columns.get_level_values("feature")) - {"target"}
features = set(prediction.features) - {"target"}
assert isinstance(prediction, TSDataset)
assert prediction.size()[0] == end_idx - start_idx + 1
assert features == expected_features
Expand Down
10 changes: 4 additions & 6 deletions tests/test_metrics/test_metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -143,11 +143,9 @@ def test_invalid_segments(metric_class, two_dfs_with_different_segments_sets):
def test_invalid_target_columns(metric_class, train_test_dfs):
"""Check metrics behavior in case of no target column in segment"""
forecast_df, true_df = train_test_dfs
columns = forecast_df._df.columns.to_list()
columns[0] = ("segment_1", "not_target")
forecast_df._df.columns = pd.MultiIndex.from_tuples(columns, names=["segment", "feature"])
forecast_df._df.columns = forecast_df._df.columns.set_levels(["not_target"], level="feature")
metric = metric_class()
with pytest.raises(ValueError, match="All the segments in .* should contain 'target' column"):
with pytest.raises(ValueError, match="y_pred should contain 'target' feature."):
_ = metric(y_true=true_df, y_pred=forecast_df)


Expand Down Expand Up @@ -341,8 +339,8 @@ def test_metrics_values(metric_class, metric_fn, train_test_dfs):
metric_values = metric(y_pred=forecast_df, y_true=true_df)
for segment, value in metric_values.items():
true_metric_value = metric_fn(
y_true=true_df.loc[:, pd.IndexSlice[segment, "target"]],
y_pred=forecast_df.loc[:, pd.IndexSlice[segment, "target"]],
y_true=true_df._df.loc[:, pd.IndexSlice[segment, "target"]],
y_pred=forecast_df._df.loc[:, pd.IndexSlice[segment, "target"]],
)
assert value == true_metric_value

Expand Down
4 changes: 2 additions & 2 deletions tests/test_models/test_mixins.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,11 +64,11 @@ def forecast_components(self, df: pd.DataFrame, **kwargs) -> pd.DataFrame:

class DummyModelBase:
def _forecast(self, ts: TSDataset, **kwargs) -> TSDataset:
ts.loc[pd.IndexSlice[:], pd.IndexSlice[:, "target"]] = 100
ts._df.loc[pd.IndexSlice[:], pd.IndexSlice[:, "target"]] = 100
return ts

def _predict(self, ts: TSDataset, **kwargs) -> TSDataset:
ts.loc[pd.IndexSlice[:], pd.IndexSlice[:, "target"]] = 200
ts._df.loc[pd.IndexSlice[:], pd.IndexSlice[:, "target"]] = 200
return ts

def _forecast_components(self, ts: TSDataset, **kwargs) -> pd.DataFrame:
Expand Down
4 changes: 2 additions & 2 deletions tests/test_pipeline/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -259,11 +259,11 @@ def context_size(self) -> int:
return 0

def _forecast(self, ts: TSDataset, **kwargs) -> TSDataset:
ts.loc[pd.IndexSlice[:], pd.IndexSlice[:, "target"]] = 100
ts._df.loc[pd.IndexSlice[:], pd.IndexSlice[:, "target"]] = 100
return ts

def _predict(self, ts: TSDataset, **kwargs) -> TSDataset:
ts.loc[pd.IndexSlice[:], pd.IndexSlice[:, "target"]] = 200
ts._df.loc[pd.IndexSlice[:], pd.IndexSlice[:, "target"]] = 200
return ts

def _forecast_components(self, ts: TSDataset, **kwargs) -> pd.DataFrame:
Expand Down
2 changes: 1 addition & 1 deletion tests/test_pipeline/test_autoregressive_pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -144,7 +144,7 @@ def test_forecast_columns(example_reg_tsds):
# generate all columns
original_ts.fit_transform(transforms)

assert set(forecast_pipeline.columns) == set(original_ts.columns)
assert set(forecast_pipeline._df.columns) == set(original_ts._df.columns)

# make sure that all values are filled
assert forecast_pipeline.to_pandas().isna().sum().sum() == 0
Expand Down
4 changes: 2 additions & 2 deletions tests/test_transforms/test_decomposition/test_dft_based.py
Original file line number Diff line number Diff line change
Expand Up @@ -317,12 +317,12 @@ def test_repetitive_fit_transform(ts_with_exogs):
transform = FourierDecomposeTransform(in_column="target", k=3)

ts_with_exogs = transform.fit_transform(ts_with_exogs)
columns_before = ts_with_exogs.columns.tolist()
columns_before = ts_with_exogs._df.columns.tolist()

ts_with_exogs = transform.inverse_transform(ts_with_exogs)

transform = FourierDecomposeTransform(in_column="target", k=3)

ts_with_exogs = transform.fit_transform(ts_with_exogs)
columns_after = ts_with_exogs.columns.tolist()
columns_after = ts_with_exogs._df.columns.tolist()
assert columns_before == columns_after
Original file line number Diff line number Diff line change
Expand Up @@ -166,7 +166,7 @@ def test_transform_format(
out_column="embedding_segment",
)
transform.fit_transform(ts=ts_with_exog_nan_begin)
obtained_columns = set(ts_with_exog_nan_begin.columns.get_level_values("feature"))
obtained_columns = set(ts_with_exog_nan_begin.features)
embedding_columns = transform.get_regressors_info()
embeddings = ts_with_exog_nan_begin._df.loc[:, pd.IndexSlice[:, embedding_columns]].values
assert sorted(obtained_columns) == sorted(expected_columns)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -146,7 +146,7 @@ def test_transform_format(
out_column="embedding_window",
)
transform.fit_transform(ts=ts_with_exog_nan_begin)
obtained_columns = set(ts_with_exog_nan_begin.columns.get_level_values("feature"))
obtained_columns = set(ts_with_exog_nan_begin.features)
embedding_columns = ["embedding_window_0", "embedding_window_1", "embedding_window_2"]
embeddings = ts_with_exog_nan_begin._df.loc[:, pd.IndexSlice[:, embedding_columns]].values
assert sorted(obtained_columns) == sorted(expected_columns)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -256,7 +256,7 @@ def test_naming_ohe_encoder(two_ts_with_new_values):
ohe.fit(ts1)
segments = ["segment_0", "segment_1"]
target = ["target", "targets_0", "targets_1", "targets_2", "regressor_0"]
assert {(i, j) for i in segments for j in target} == set(ohe.transform(ts2).columns.values)
assert {(i, j) for i in segments for j in target} == set(ohe.transform(ts2)._df.columns.values)


@pytest.mark.parametrize(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -603,7 +603,7 @@ def test_gale_shapley_transform_fit_transform(ts_with_large_regressors_number: T
relevance_table=StatisticsRelevanceTable(), top_k=5, use_rank=False
)
transformed = transform.fit_transform(ts)
assert set(transformed.columns.get_level_values("feature")) == {
assert set(transformed.features) == {
"target",
"regressor_1",
"regressor_2",
Expand Down
Loading