Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add baseline multiseries regressor #4246

Merged
merged 17 commits into from
Aug 1, 2023
Merged
Show file tree
Hide file tree
Changes from 14 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions docs/source/release_notes.rst
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ Release Notes
**Future Releases**
* Enhancements
* Updated regression metrics to handle multioutput dataframes as well as single output series :pr:`4233`
* Added baseline regressor for multiseries time series problems :pr:`4246`
* Added stacking and unstacking utility functions to work with multiseries data :pr:`4250`
* Fixes
* Added support for pandas 2 :pr:`4216`
Expand Down
1 change: 1 addition & 0 deletions evalml/pipelines/components/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
DecisionTreeClassifier,
DecisionTreeRegressor,
TimeSeriesBaselineEstimator,
MultiseriesTimeSeriesBaselineRegressor,
KNeighborsClassifier,
ProphetRegressor,
SVMClassifier,
Expand Down
1 change: 1 addition & 0 deletions evalml/pipelines/components/component_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ class ComponentBase(ABC, metaclass=ComponentBaseMeta):
# Referring to the pandas nullable dtypes; not just woodwork logical types
_integer_nullable_incompatibilities = []
_boolean_nullable_incompatibilities = []
is_multiseries = False

def __init__(self, parameters=None, component_obj=None, random_seed=0, **kwargs):
"""Base class for all components.
Expand Down
1 change: 1 addition & 0 deletions evalml/pipelines/components/estimators/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
ExtraTreesRegressor,
BaselineRegressor,
TimeSeriesBaselineEstimator,
MultiseriesTimeSeriesBaselineRegressor,
DecisionTreeRegressor,
SVMRegressor,
ExponentialSmoothingRegressor,
Expand Down
3 changes: 3 additions & 0 deletions evalml/pipelines/components/estimators/regressors/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,9 @@
from evalml.pipelines.components.estimators.regressors.time_series_baseline_estimator import (
TimeSeriesBaselineEstimator,
)
from evalml.pipelines.components.estimators.regressors.multiseries_time_series_baseline_regressor import (
MultiseriesTimeSeriesBaselineRegressor,
)
from evalml.pipelines.components.estimators.regressors.prophet_regressor import (
ProphetRegressor,
)
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,112 @@
"""Time series estimator that predicts using the naive forecasting approach."""
import numpy as np
import pandas as pd

from evalml.model_family import ModelFamily
from evalml.pipelines.components.estimators import Estimator
from evalml.problem_types import ProblemTypes
from evalml.utils import infer_feature_types


class MultiseriesTimeSeriesBaselineRegressor(Estimator):
"""Multiseries time series regressor that predicts using the naive forecasting approach.

This is useful as a simple baseline estimator for multiseries time series problems.

Args:
gap (int): Gap between prediction date and target date and must be a positive integer. If gap is 0, target date will be shifted ahead by 1 time period. Defaults to 1.
forecast_horizon (int): Number of time steps the model is expected to predict.
random_seed (int): Seed for the random number generator. Defaults to 0.
"""

name = "Multiseries Time Series Baseline Regressor"
hyperparameter_ranges = {}
"""{}"""
model_family = ModelFamily.BASELINE
"""ModelFamily.BASELINE"""
is_multiseries = True
supported_problem_types = [
ProblemTypes.TIME_SERIES_REGRESSION,
]
"""[
ProblemTypes.TIME_SERIES_REGRESSION,
]"""

def __init__(self, gap=1, forecast_horizon=1, random_seed=0, **kwargs):
self._prediction_value = None
self.start_delay = forecast_horizon + gap
self._num_features = None

if gap < 0:
raise ValueError(

Check warning on line 41 in evalml/pipelines/components/estimators/regressors/multiseries_time_series_baseline_regressor.py

View check run for this annotation

Codecov / codecov/patch

evalml/pipelines/components/estimators/regressors/multiseries_time_series_baseline_regressor.py#L41

Added line #L41 was not covered by tests
f"gap value must be a positive integer. {gap} was provided.",
)

parameters = {"gap": gap, "forecast_horizon": forecast_horizon}
parameters.update(kwargs)
super().__init__(
parameters=parameters,
component_obj=None,
random_seed=random_seed,
)

def fit(self, X, y=None):
"""Fits multiseries time series baseline regressor to data.

Args:
X (pd.DataFrame): The input training data of shape [n_samples, n_features].
y (pd.Series): The target training data of length [n_samples].
eccabay marked this conversation as resolved.
Show resolved Hide resolved

Returns:
self

Raises:
eccabay marked this conversation as resolved.
Show resolved Hide resolved
ValueError: If input y is None.
"""
if y is None:
raise ValueError(

Check warning on line 67 in evalml/pipelines/components/estimators/regressors/multiseries_time_series_baseline_regressor.py

View check run for this annotation

Codecov / codecov/patch

evalml/pipelines/components/estimators/regressors/multiseries_time_series_baseline_regressor.py#L67

Added line #L67 was not covered by tests
"Cannot train Multiseries Time Series Baseline Regressor if y is None",
)
if isinstance(y, pd.Series):
raise ValueError(

Check warning on line 71 in evalml/pipelines/components/estimators/regressors/multiseries_time_series_baseline_regressor.py

View check run for this annotation

Codecov / codecov/patch

evalml/pipelines/components/estimators/regressors/multiseries_time_series_baseline_regressor.py#L71

Added line #L71 was not covered by tests
"y must be a DataFrame with multiple columns for Multiseries Time Series Baseline Regressor",
)
self._target_column_names = list(y.columns)
self._num_features = X.shape[1]

return self

def predict(self, X):
"""Make predictions using fitted multiseries time series baseline regressor.

Args:
X (pd.DataFrame): Data of shape [n_samples, n_features].

Returns:
pd.Series: Predicted values.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm pretty sure this is correct, but just to double check we're returning the predictions with the predicted values stacked right (i.e. in series form and not as a dataframe)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Lol, this is a copypasta fail. We're returning the unstacked dataframe here


Raises:
ValueError: If the lagged columns are not present in X.
"""
X = infer_feature_types(X)
feature_names = [

Check warning on line 92 in evalml/pipelines/components/estimators/regressors/multiseries_time_series_baseline_regressor.py

View check run for this annotation

Codecov / codecov/patch

evalml/pipelines/components/estimators/regressors/multiseries_time_series_baseline_regressor.py#L91-L92

Added lines #L91 - L92 were not covered by tests
f"{col}_delay_{self.start_delay}" for col in self._target_column_names
]
if not set(feature_names).issubset(set(X.columns)):
raise ValueError(

Check warning on line 96 in evalml/pipelines/components/estimators/regressors/multiseries_time_series_baseline_regressor.py

View check run for this annotation

Codecov / codecov/patch

evalml/pipelines/components/estimators/regressors/multiseries_time_series_baseline_regressor.py#L95-L96

Added lines #L95 - L96 were not covered by tests
"Multiseries Time Series Baseline Regressor is meant to be used in a pipeline with "
"a Time Series Featurizer",
)
return X.ww[feature_names]

Check warning on line 100 in evalml/pipelines/components/estimators/regressors/multiseries_time_series_baseline_regressor.py

View check run for this annotation

Codecov / codecov/patch

evalml/pipelines/components/estimators/regressors/multiseries_time_series_baseline_regressor.py#L100

Added line #L100 was not covered by tests

@property
def feature_importance(self):
"""Returns importance associated with each feature.

Since baseline estimators do not use input features to calculate predictions, returns an array of zeroes.

Returns:
np.ndarray (float): An array of zeroes.
"""
importance = np.array([0] * self._num_features)
return importance
Comment on lines +104 to +114
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Probably another nit, but if you're calling out all Baseline Estimators...is it worth putting together a story to add a BaselineEstimator class to the inheritance chain and have them all inherit this prop def?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Filed #4255

Original file line number Diff line number Diff line change
Expand Up @@ -124,12 +124,15 @@
"""
if self.time_index is None:
raise ValueError("time_index cannot be None!")
self.statistically_significant_lags = self._find_significant_lags(
y,
conf_level=self.conf_level,
start_delay=self.start_delay,
max_delay=self.max_delay,
)
if isinstance(y, pd.DataFrame):
self.statistically_significant_lags = [self.start_delay]

Check warning on line 128 in evalml/pipelines/components/transformers/preprocessing/time_series_featurizer.py

View check run for this annotation

Codecov / codecov/patch

evalml/pipelines/components/transformers/preprocessing/time_series_featurizer.py#L128

Added line #L128 was not covered by tests
Comment on lines +131 to +132
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So for the multiseries case, do we not try to find the significant lags? Does this just use all or none of the lags?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We don't need to find the significant lags because we're not actually doing feature engineering here, just getting the properly lagged column that our baseline regressor relies on. By setting the lags that we calculate to be just self.start_delay, we only compute the one we know we need.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Might want to add an explicit comment here for the case we're splitting on...e.g. if y is a dataframe, we expect it to be multiseries.

else:
self.statistically_significant_lags = self._find_significant_lags(
y,
conf_level=self.conf_level,
start_delay=self.start_delay,
max_delay=self.max_delay,
)
return self

@staticmethod
Expand Down Expand Up @@ -215,6 +218,22 @@
)
return data

def _delay_df(
self,
data,
cols_to_delay,
categorical_columns=None,
X_categorical=None,
):
lagged_features = {}
for col_name in cols_to_delay:
col = data[col_name]
if categorical_columns and col_name in categorical_columns:
col = X_categorical[col_name]
for t in self.statistically_significant_lags:
lagged_features[f"{col_name}_delay_{t}"] = col.shift(t)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We're not going to be doing any external matching on this name format, right? If so, I think we might want to establish a pattern of making this string format like a module level thing or accessible via the class

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good call - adjusting!

return lagged_features

def _compute_delays(self, X_ww, y):
"""Computes the delayed features for numeric/categorical features in X and y.

Expand All @@ -234,33 +253,28 @@
).columns,
)
categorical_columns = self._get_categorical_columns(X_ww)
cols_derived_from_categoricals = []
lagged_features = {}
if self.delay_features and len(X_ww) > 0:
X_categorical = self._encode_X_while_preserving_index(
X_ww[categorical_columns],
)
for col_name in cols_to_delay:
col = X_ww[col_name]
if col_name in categorical_columns:
col = X_categorical[col_name]
for t in self.statistically_significant_lags:
feature_name = f"{col_name}_delay_{t}"
lagged_features[f"{col_name}_delay_{t}"] = col.shift(t)
if col_name in categorical_columns:
cols_derived_from_categoricals.append(feature_name)
lagged_features.update(
self._delay_df(X_ww, cols_to_delay, categorical_columns, X_categorical),
)
# Handle cases where the target was passed in
if self.delay_target and y is not None:
if type(y.ww.logical_type) == logical_types.Categorical:
y = self._encode_y_while_preserving_index(y)
for t in self.statistically_significant_lags:
lagged_features[self.target_colname_prefix.format(t)] = y.shift(t)
if isinstance(y, pd.DataFrame):
lagged_features.update(self._delay_df(y, y.columns))

Check warning on line 267 in evalml/pipelines/components/transformers/preprocessing/time_series_featurizer.py

View check run for this annotation

Codecov / codecov/patch

evalml/pipelines/components/transformers/preprocessing/time_series_featurizer.py#L267

Added line #L267 was not covered by tests
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

thought: should we just run self._encode_y_while_preserving_index(y) even though we won't expect categorical columns just yet?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🤔 interesting point, will we ever expect categorical columns? We're only supporting regression problems for multiseries

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

potentially sometime in the distant future! Just thought it would make it one step easier for whoever implements that 😄 it'll be a no-op anyways right now

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I like the idea in theory, but I'm worried about increasing runtime with checking if y contains any categorical columns. We'd have to do so in all cases, which feels wasteful when we know we won't be dealing with it.

else:
if type(y.ww.logical_type) == logical_types.Categorical:
y = self._encode_y_while_preserving_index(y)

Check warning on line 270 in evalml/pipelines/components/transformers/preprocessing/time_series_featurizer.py

View check run for this annotation

Codecov / codecov/patch

evalml/pipelines/components/transformers/preprocessing/time_series_featurizer.py#L270

Added line #L270 was not covered by tests
for t in self.statistically_significant_lags:
lagged_features[self.target_colname_prefix.format(t)] = y.shift(t)
# Features created from categorical columns should no longer be categorical
lagged_features = pd.DataFrame(lagged_features)
lagged_features = pd.DataFrame(lagged_features, index=X_ww.index)
lagged_features.ww.init(
logical_types={col: "Double" for col in lagged_features.columns},
)
lagged_features.index = X_ww.index
return ww.concat_columns([X_ww, lagged_features])

def transform(self, X, y=None):
Expand Down
17 changes: 14 additions & 3 deletions evalml/tests/component_tests/test_components.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@
LinearDiscriminantAnalysis,
LinearRegressor,
LogisticRegressionClassifier,
MultiseriesTimeSeriesBaselineRegressor,
NaturalLanguageFeaturizer,
OneHotEncoder,
Oversampler,
Expand Down Expand Up @@ -1015,9 +1016,9 @@ def test_components_can_be_used_for_partial_dependence_fast_mode():
# Expected number is hardcoded so that this test will fail when new components are added
# It should be len(all_native_components) - num_invalid_for_pd_fast_mode
if ProphetRegressor not in all_native_components:
expected_num_valid_for_pd_fast_mode = 63
else:
expected_num_valid_for_pd_fast_mode = 64
else:
expected_num_valid_for_pd_fast_mode = 65
assert num_valid_for_pd_fast_mode == expected_num_valid_for_pd_fast_mode


Expand Down Expand Up @@ -1210,6 +1211,7 @@ def test_all_estimators_check_fit(
StackedEnsembleClassifier,
StackedEnsembleRegressor,
TimeSeriesBaselineEstimator,
MultiseriesTimeSeriesBaselineRegressor,
VowpalWabbitBinaryClassifier,
VowpalWabbitMulticlassClassifier,
VowpalWabbitRegressor,
Expand Down Expand Up @@ -1367,6 +1369,9 @@ def test_serialization(
else:
X, y = X_y_binary

if component_class.is_multiseries:
y = pd.DataFrame({"target_a": y, "target_b": y})

component.fit(X, y)

for pickle_protocol in range(cloudpickle.DEFAULT_PROTOCOL + 1):
Expand Down Expand Up @@ -1740,6 +1745,9 @@ def test_estimator_fit_respects_custom_indices(
X = pd.DataFrame(X)
y = pd.Series(y)

if estimator_class.is_multiseries:
y = pd.DataFrame({"target_a": y, "target_b": y})

if use_custom_index and ts_problem:
X.index = pd.date_range("2020-10-01", periods=40)
y.index = pd.date_range("2020-10-01", periods=40)
Expand Down Expand Up @@ -1915,7 +1923,10 @@ def test_components_support_nullable_types(
component is added that has nullable type incompatibilities, this should fail."""
cannot_handle_boolean_target = [CatBoostRegressor]

if component_class == TimeSeriesBaselineEstimator:
if (
component_class == TimeSeriesBaselineEstimator
or component_class == MultiseriesTimeSeriesBaselineRegressor
):
pytest.skip(
"Time Series Baseline Estimator can only be used within a Pipeline.",
)
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
import pandas as pd
import pytest

Check warning on line 2 in evalml/tests/component_tests/test_multiseries_baseline_regressor.py

View check run for this annotation

Codecov / codecov/patch

evalml/tests/component_tests/test_multiseries_baseline_regressor.py#L1-L2

Added lines #L1 - L2 were not covered by tests

from evalml.model_family import ModelFamily
from evalml.pipelines.components import (

Check warning on line 5 in evalml/tests/component_tests/test_multiseries_baseline_regressor.py

View check run for this annotation

Codecov / codecov/patch

evalml/tests/component_tests/test_multiseries_baseline_regressor.py#L4-L5

Added lines #L4 - L5 were not covered by tests
MultiseriesTimeSeriesBaselineRegressor,
TimeSeriesFeaturizer,
)


def test_multiseries_time_series_baseline_regressor_init():
baseline = MultiseriesTimeSeriesBaselineRegressor()
assert baseline.model_family == ModelFamily.BASELINE
assert baseline.is_multiseries
assert baseline.start_delay == 2

Check warning on line 15 in evalml/tests/component_tests/test_multiseries_baseline_regressor.py

View check run for this annotation

Codecov / codecov/patch

evalml/tests/component_tests/test_multiseries_baseline_regressor.py#L11-L15

Added lines #L11 - L15 were not covered by tests

baseline = MultiseriesTimeSeriesBaselineRegressor(gap=2, forecast_horizon=5)
assert baseline.start_delay == 7

Check warning on line 18 in evalml/tests/component_tests/test_multiseries_baseline_regressor.py

View check run for this annotation

Codecov / codecov/patch

evalml/tests/component_tests/test_multiseries_baseline_regressor.py#L17-L18

Added lines #L17 - L18 were not covered by tests


def test_multiseries_time_series_baseline_gap_negative():
with pytest.raises(ValueError, match="gap value must be a positive integer."):
MultiseriesTimeSeriesBaselineRegressor(gap=-1)

Check warning on line 23 in evalml/tests/component_tests/test_multiseries_baseline_regressor.py

View check run for this annotation

Codecov / codecov/patch

evalml/tests/component_tests/test_multiseries_baseline_regressor.py#L21-L23

Added lines #L21 - L23 were not covered by tests


def test_multiseries_time_series_baseline_estimator_invalid_y(

Check warning on line 26 in evalml/tests/component_tests/test_multiseries_baseline_regressor.py

View check run for this annotation

Codecov / codecov/patch

evalml/tests/component_tests/test_multiseries_baseline_regressor.py#L26

Added line #L26 was not covered by tests
X_y_multiseries_regression,
):
X, _ = X_y_multiseries_regression

Check warning on line 29 in evalml/tests/component_tests/test_multiseries_baseline_regressor.py

View check run for this annotation

Codecov / codecov/patch

evalml/tests/component_tests/test_multiseries_baseline_regressor.py#L29

Added line #L29 was not covered by tests

estimator = MultiseriesTimeSeriesBaselineRegressor(gap=0, forecast_horizon=2)

Check warning on line 31 in evalml/tests/component_tests/test_multiseries_baseline_regressor.py

View check run for this annotation

Codecov / codecov/patch

evalml/tests/component_tests/test_multiseries_baseline_regressor.py#L31

Added line #L31 was not covered by tests

with pytest.raises(ValueError, match="if y is None"):
estimator.fit(X, None)
with pytest.raises(ValueError, match="y must be a DataFrame"):
estimator.fit(X, pd.Series(range(100)))

Check warning on line 36 in evalml/tests/component_tests/test_multiseries_baseline_regressor.py

View check run for this annotation

Codecov / codecov/patch

evalml/tests/component_tests/test_multiseries_baseline_regressor.py#L33-L36

Added lines #L33 - L36 were not covered by tests


def test_multiseries_baseline_no_featurizer(X_y_multiseries_regression):
X, y = X_y_multiseries_regression

Check warning on line 40 in evalml/tests/component_tests/test_multiseries_baseline_regressor.py

View check run for this annotation

Codecov / codecov/patch

evalml/tests/component_tests/test_multiseries_baseline_regressor.py#L39-L40

Added lines #L39 - L40 were not covered by tests

estimator = MultiseriesTimeSeriesBaselineRegressor(gap=0, forecast_horizon=2)
estimator.fit(X, y)

Check warning on line 43 in evalml/tests/component_tests/test_multiseries_baseline_regressor.py

View check run for this annotation

Codecov / codecov/patch

evalml/tests/component_tests/test_multiseries_baseline_regressor.py#L42-L43

Added lines #L42 - L43 were not covered by tests

with pytest.raises(ValueError, match="is meant to be used in a pipeline with "):
estimator.predict(X)

Check warning on line 46 in evalml/tests/component_tests/test_multiseries_baseline_regressor.py

View check run for this annotation

Codecov / codecov/patch

evalml/tests/component_tests/test_multiseries_baseline_regressor.py#L45-L46

Added lines #L45 - L46 were not covered by tests


def test_multiseries_time_series_baseline_lags(X_y_multiseries_regression):
X, y = X_y_multiseries_regression

Check warning on line 50 in evalml/tests/component_tests/test_multiseries_baseline_regressor.py

View check run for this annotation

Codecov / codecov/patch

evalml/tests/component_tests/test_multiseries_baseline_regressor.py#L49-L50

Added lines #L49 - L50 were not covered by tests

feat = TimeSeriesFeaturizer(time_index="index", gap=0, forecast_horizon=2)
feat.fit(X, y)
X_t = feat.transform(X, y)

Check warning on line 54 in evalml/tests/component_tests/test_multiseries_baseline_regressor.py

View check run for this annotation

Codecov / codecov/patch

evalml/tests/component_tests/test_multiseries_baseline_regressor.py#L52-L54

Added lines #L52 - L54 were not covered by tests

estimator = MultiseriesTimeSeriesBaselineRegressor(gap=0, forecast_horizon=2)
estimator.fit(X_t, y)

Check warning on line 57 in evalml/tests/component_tests/test_multiseries_baseline_regressor.py

View check run for this annotation

Codecov / codecov/patch

evalml/tests/component_tests/test_multiseries_baseline_regressor.py#L56-L57

Added lines #L56 - L57 were not covered by tests

pred = estimator.predict(X_t)
expected = y.shift(2)
expected.columns = [f"{col}_delay_2" for col in expected.columns]
pd.testing.assert_frame_equal(pred, expected)

Check warning on line 62 in evalml/tests/component_tests/test_multiseries_baseline_regressor.py

View check run for this annotation

Codecov / codecov/patch

evalml/tests/component_tests/test_multiseries_baseline_regressor.py#L59-L62

Added lines #L59 - L62 were not covered by tests
15 changes: 15 additions & 0 deletions evalml/tests/component_tests/test_time_series_featurizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -981,3 +981,18 @@
output.fit(X, y)
X_t = output.transform(X, y)
assert set(X_t["cats_delay_1"].value_counts().to_dict().keys()) == {2.0, 0.0, 1.0}


def test_featurizer_y_dataframe(X_y_multiseries_regression):
X, y = X_y_multiseries_regression
X.index = pd.date_range("2021-01-01", periods=X.shape[0])

Check warning on line 988 in evalml/tests/component_tests/test_time_series_featurizer.py

View check run for this annotation

Codecov / codecov/patch

evalml/tests/component_tests/test_time_series_featurizer.py#L986-L988

Added lines #L986 - L988 were not covered by tests

featurizer = TimeSeriesFeaturizer(time_index="index", gap=1, forecast_horizon=5)
featurizer.fit(X, y)

Check warning on line 991 in evalml/tests/component_tests/test_time_series_featurizer.py

View check run for this annotation

Codecov / codecov/patch

evalml/tests/component_tests/test_time_series_featurizer.py#L990-L991

Added lines #L990 - L991 were not covered by tests

assert featurizer.statistically_significant_lags == [6]

Check warning on line 993 in evalml/tests/component_tests/test_time_series_featurizer.py

View check run for this annotation

Codecov / codecov/patch

evalml/tests/component_tests/test_time_series_featurizer.py#L993

Added line #L993 was not covered by tests

expected_y_cols = [f"y_{i}_delay_6" for i in range(y.shape[1])]
X_t = featurizer.transform(X, y)
for expected_y_col in expected_y_cols:
assert expected_y_col in X_t.columns

Check warning on line 998 in evalml/tests/component_tests/test_time_series_featurizer.py

View check run for this annotation

Codecov / codecov/patch

evalml/tests/component_tests/test_time_series_featurizer.py#L995-L998

Added lines #L995 - L998 were not covered by tests
1 change: 1 addition & 0 deletions evalml/tests/component_tests/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,7 @@
"Target Imputer",
"Natural Language Featurizer",
"Time Series Baseline Estimator",
"Multiseries Time Series Baseline Regressor",
"Time Series Imputer",
"Time Series Regularizer",
"URL Featurizer",
Expand Down
Loading
Loading