Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Remove access to changing TSDataset attributes #630

Merged
merged 35 commits into from
Mar 13, 2025
Merged
Changes from 1 commit
Commits
Show all changes
35 commits
Select commit Hold shift + click to select a range
5a78a8f
create _df attribute
Mar 11, 2025
02d9bc5
change tests
Mar 11, 2025
4d12e73
Merge remote-tracking branch 'origin/master' into issue-598
Mar 11, 2025
38a1e62
create _raw_df attribute
Mar 11, 2025
6c4167d
change tests
Mar 11, 2025
5148e79
add _df_exog attribute
Mar 11, 2025
f028871
change tests
Mar 11, 2025
4472279
create _known_feature attribute and read_only property known_future
Mar 11, 2025
4bcc365
change tests and add new ones
Mar 11, 2025
f3218d5
create _freq attribute and read_only property freq
Mar 11, 2025
da4d848
add new test
Mar 11, 2025
b35684f
create _current_df_level attribute and property current_df_level
Mar 11, 2025
5a45b19
add new test
Mar 11, 2025
434f873
create _current_df_exog_level attribute and property current_df_exog_…
Mar 11, 2025
bb3d9bf
add new test
Mar 11, 2025
e5a521d
lint
Mar 11, 2025
e31c3b7
fix some tests
Mar 12, 2025
b26bbf6
change changelog
Mar 12, 2025
4f0606c
change notebooks
Mar 12, 2025
d1fb936
some fixes
Mar 12, 2025
a375969
change .columns to ._df.columns
Mar 12, 2025
1e0f91d
change .columns to ._df.columns
Mar 12, 2025
287e340
change .loc to ._df.loc
Mar 12, 2025
1ab14b4
lint
Mar 12, 2025
7298012
fix typo
Mar 12, 2025
36d9326
notebook view
Mar 12, 2025
c57c1b3
review fixed
Mar 12, 2025
f3f94cc
review fixed v2
Mar 12, 2025
0d3ff73
fix typo in changelog
Mar 12, 2025
7c3efd3
review fixed v3
Mar 12, 2025
72d51e3
review fixed v4
Mar 13, 2025
9aa9789
review fixed v5
Mar 13, 2025
b24b65a
fix isnull docs
Mar 13, 2025
bd0de1f
fix isnull docs
Mar 13, 2025
5dfb9e8
fix isnull docs
Mar 13, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
create _raw_df attribute
Danil Smorchkov authored and Danil Smorchkov committed Mar 11, 2025
commit 38a1e62e87dc5a9a94208f9b0060bece5f67b7a2
28 changes: 14 additions & 14 deletions etna/datasets/tsdataset.py
Original file line number Diff line number Diff line change
@@ -143,8 +143,8 @@ def __init__(
"""
self.freq = freq
self.df_exog = None
self.raw_df = self._prepare_df(df=df, freq=freq)
self._df = self.raw_df.copy(deep=True)
self._raw_df = self._prepare_df(df=df, freq=freq)
self._df = self._raw_df.copy(deep=True)

self.hierarchical_structure = hierarchical_structure
self.current_df_level: Optional[str] = self._get_dataframe_level(df=self._df)
@@ -454,7 +454,7 @@ def make_future(
2021-07-04 33 38 NaN 73 78 NaN
"""
self._check_endings(warning=True)
df = self._expand_index(df=self.raw_df, freq=self.freq, future_steps=future_steps)
df = self._expand_index(df=self._raw_df, freq=self.freq, future_steps=future_steps)

if self.df_exog is not None and self.current_df_level == self.current_df_exog_level:
df = self._merge_exog(df=df)
@@ -522,22 +522,22 @@ def tsdataset_idx_slice(self, start_idx: Optional[int] = None, end_idx: Optional
TSDataset based on indexing slice.
"""
self_df = self._df
self_raw_df = self.raw_df
self_raw_df = self._raw_df

try:
# we do this to avoid redundant copying of data
self._df = None
self.raw_df = None
self._raw_df = None

ts_slice = deepcopy(self)
ts_slice._df = _slice_index_wide_dataframe(df=self_df, start=start_idx, stop=end_idx, label_indexing=False)
ts_slice.raw_df = _slice_index_wide_dataframe(
ts_slice._raw_df = _slice_index_wide_dataframe(
df=self_raw_df, start=start_idx, stop=end_idx, label_indexing=False
)

finally:
self._df = self_df
self.raw_df = self_raw_df
self._raw_df = self_raw_df

return ts_slice

@@ -890,7 +890,7 @@ def to_flatten(df: pd.DataFrame, features: Union[Literal["all"], Sequence[str]]
else:
stacked = df_cur.values.T.ravel()
# creating series is necessary for dtypes like "Int64", "boolean", otherwise they will be objects
df_dict[column] = pd.Series(stacked, dtype=df_cur.dtypes.iloc[0])
df_dict[column] = pd.Series(stacked, dtype=df_cur.dtypes[0])
df_flat = pd.DataFrame(df_dict)

return df_flat
@@ -1265,25 +1265,25 @@ def train_test_split(
warnings.warn(f"Min timestamp in df is {self._df.index.min()}.")

self_df = self._df
self_raw_df = self.raw_df
self_raw_df = self._raw_df
try:
# we do this to avoid redundant copying of data
self._df = None
self.raw_df = None
self._raw_df = None

train = deepcopy(self)
train._df = _slice_index_wide_dataframe(df=self_df, start=train_start_defined, stop=train_end_defined)
train.raw_df = _slice_index_wide_dataframe(
train._raw_df = _slice_index_wide_dataframe(
df=self_raw_df, start=train_start_defined, stop=train_end_defined
)

test = deepcopy(self)
test._df = _slice_index_wide_dataframe(df=self_df, start=test_start_defined, stop=test_end_defined)
test.raw_df = _slice_index_wide_dataframe(df=self_raw_df, start=train_start_defined, stop=test_end_defined)
test._raw_df = _slice_index_wide_dataframe(df=self_raw_df, start=train_start_defined, stop=test_end_defined)

finally:
self._df = self_df
self.raw_df = self_raw_df
self._raw_df = self_raw_df

return train, test

@@ -1517,7 +1517,7 @@ def add_target_components(self, target_components_df: pd.DataFrame):
except ValueError:
raise ValueError(f"Set of target components differs between segments!")

components_sum = target_components_df.T.groupby(level="segment").sum().T
components_sum = target_components_df.groupby(axis=1, level="segment").sum()
if not np.allclose(components_sum.values, self[..., "target"].values):
raise ValueError("Components don't sum up to target!")

2 changes: 1 addition & 1 deletion etna/pipeline/autoregressive_pipeline.py
Original file line number Diff line number Diff line change
@@ -189,7 +189,7 @@ def _forecast(self, ts: TSDataset, return_components: bool) -> TSDataset:

# cut only last timestamps from result dataset
prediction_ts._df = prediction_ts._df.tail(self.horizon)
prediction_ts.raw_df = prediction_ts.raw_df.tail(self.horizon)
prediction_ts._raw_df = prediction_ts._raw_df.tail(self.horizon)

if return_components:
target_components_df = pd.concat(target_components_dfs)
2 changes: 1 addition & 1 deletion etna/pipeline/mixins.py
Original file line number Diff line number Diff line change
@@ -35,7 +35,7 @@ def _create_ts(
self.model: ModelType
self.transforms: Sequence[Transform]

df = deepcopy(ts.raw_df)
df = deepcopy(ts._raw_df)
df_exog = deepcopy(ts.df_exog)
freq = deepcopy(ts.freq)
known_future = deepcopy(ts.known_future)