From a7f704a41714afedb02f132322d009996f4aa12e Mon Sep 17 00:00:00 2001
From: remyogasawara <remyogasawara@gmail.com>
Date: Fri, 1 Sep 2023 17:45:22 -0700
Subject: [PATCH 01/12] init commit

---
 evalml/pipelines/utils.py | 7 +++++--
 evalml/utils/gen_utils.py | 3 ++-
 2 files changed, 7 insertions(+), 3 deletions(-)

diff --git a/evalml/pipelines/utils.py b/evalml/pipelines/utils.py
index dbc51abee8..1f4779ad07 100644
--- a/evalml/pipelines/utils.py
+++ b/evalml/pipelines/utils.py
@@ -292,9 +292,12 @@ def _get_preprocessing_components(
         list[Transformer]: A list of applicable preprocessing components to use with the estimator.
     """
     if is_multiseries(problem_type):
-        return []
+        if include_decomposer:
+            components_functions = [_get_decomposer]
+        else:
+            return []
 
-    if is_time_series(problem_type):
+    elif is_time_series(problem_type):
         components_functions = [
             _get_label_encoder,
             _get_drop_all_null,
diff --git a/evalml/utils/gen_utils.py b/evalml/utils/gen_utils.py
index af253fa021..7128b6b2ba 100644
--- a/evalml/utils/gen_utils.py
+++ b/evalml/utils/gen_utils.py
@@ -679,7 +679,8 @@ def get_time_index(X: pd.DataFrame, y: pd.Series, time_index_name: str):
                 raise ValueError(
                     f"Too many Datetime features provided in data and provided time_index column {time_index_name} not present in data.",
                 )
-
+    if dt_col.duplicated().any():
+        dt_col = dt_col.drop_duplicates()
     if not isinstance(dt_col, pd.DatetimeIndex) or dt_col.freq is None:
         dt_col = pd.DatetimeIndex(dt_col, freq="infer")
     time_index = dt_col.rename(y.index.name)

From 39f3e80e61a206b2e235ba140744fc1124b0f76f Mon Sep 17 00:00:00 2001
From: remyogasawara <remyogasawara@gmail.com>
Date: Fri, 1 Sep 2023 17:54:58 -0700
Subject: [PATCH 02/12] update release notes

---
 docs/source/release_notes.rst | 1 +
 1 file changed, 1 insertion(+)

diff --git a/docs/source/release_notes.rst b/docs/source/release_notes.rst
index 881451efa2..eda28b6b9a 100644
--- a/docs/source/release_notes.rst
+++ b/docs/source/release_notes.rst
@@ -3,6 +3,7 @@ Release Notes
 **Future Releases**
     * Enhancements
         * Extended STLDecomposer to Support Multiseries :pr:`4253`
+        * Added STLDecomposer to multiseries pipelines :pr:`4299`
     * Fixes
     * Changes
     * Documentation Changes

From 4689a95c4a7f7156adc7f8f91c791f80db1b20e6 Mon Sep 17 00:00:00 2001
From: remyogasawara <remyogasawara@gmail.com>
Date: Tue, 5 Sep 2023 17:12:14 -0700
Subject: [PATCH 03/12] add decomposer to tests

---
 evalml/tests/pipeline_tests/test_pipeline_utils.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/evalml/tests/pipeline_tests/test_pipeline_utils.py b/evalml/tests/pipeline_tests/test_pipeline_utils.py
index 92eb95cc0e..db6de1a9d0 100644
--- a/evalml/tests/pipeline_tests/test_pipeline_utils.py
+++ b/evalml/tests/pipeline_tests/test_pipeline_utils.py
@@ -170,7 +170,7 @@ def test_make_pipeline(
 
             if is_time_series(problem_type):
                 if is_multiseries(problem_type):
-                    expected_components = dfs + [estimator_class]
+                    expected_components = dfs + decomposer + [estimator_class]
                 else:
                     expected_components = (
                         dfs

From 6aa8a259425bce6ae384697c3a57d505fa3a911d Mon Sep 17 00:00:00 2001
From: remyogasawara <remyogasawara@gmail.com>
Date: Wed, 6 Sep 2023 12:34:55 -0700
Subject: [PATCH 04/12] handle duplicates

---
 evalml/utils/gen_utils.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/evalml/utils/gen_utils.py b/evalml/utils/gen_utils.py
index 7128b6b2ba..827930b8b6 100644
--- a/evalml/utils/gen_utils.py
+++ b/evalml/utils/gen_utils.py
@@ -679,10 +679,11 @@ def get_time_index(X: pd.DataFrame, y: pd.Series, time_index_name: str):
                 raise ValueError(
                     f"Too many Datetime features provided in data and provided time_index column {time_index_name} not present in data.",
                 )
-    if dt_col.duplicated().any():
-        dt_col = dt_col.drop_duplicates()
     if not isinstance(dt_col, pd.DatetimeIndex) or dt_col.freq is None:
         dt_col = pd.DatetimeIndex(dt_col, freq="infer")
+    if dt_col.duplicated().any():
+        temp_dt_col = pd.DatetimeIndex(dt_col.copy().drop_duplicates(), freq="infer")
+        dt_col.freq = temp_dt_col.freq
     time_index = dt_col.rename(y.index.name)
     return time_index
 

From 6bea453bc0bfc65bd11997638b4817902bb3bf1f Mon Sep 17 00:00:00 2001
From: christopherbunn <chris.l.bunn@gmail.com>
Date: Thu, 7 Sep 2023 11:32:21 -0400
Subject: [PATCH 05/12] Remove nan values - NOT FINISHED

---
 .../preprocessing/stl_decomposer.py           |  1 +
 evalml/pipelines/time_series_pipeline_base.py |  5 ++++
 evalml/pipelines/utils.py                     | 28 ++++++++++---------
 evalml/utils/gen_utils.py                     |  6 ++--
 4 files changed, 24 insertions(+), 16 deletions(-)

diff --git a/evalml/pipelines/components/transformers/preprocessing/stl_decomposer.py b/evalml/pipelines/components/transformers/preprocessing/stl_decomposer.py
index b4bcfdd029..503be35da3 100644
--- a/evalml/pipelines/components/transformers/preprocessing/stl_decomposer.py
+++ b/evalml/pipelines/components/transformers/preprocessing/stl_decomposer.py
@@ -442,6 +442,7 @@ def inverse_transform(
             y.append(y_series)
         y_df = pd.DataFrame(y).T
         y_df.index = original_index
+        y_df.columns = y_t.columns
         return y_df
 
     def get_trend_dataframe(self, X, y):
diff --git a/evalml/pipelines/time_series_pipeline_base.py b/evalml/pipelines/time_series_pipeline_base.py
index 3badb6dc09..0b82a86dcf 100644
--- a/evalml/pipelines/time_series_pipeline_base.py
+++ b/evalml/pipelines/time_series_pipeline_base.py
@@ -265,6 +265,11 @@ def predict_in_sample(
             calculating_residuals=calculating_residuals,
         )
         predictions = self._estimator_predict(features)
+        if len(predictions.columns) == len(y.columns):
+            # predictions.columns = y.columns
+            predictions = predictions.ww.rename(
+                dict(zip(predictions.columns, y.columns)),
+            )
         if len(predictions) == len(y):
             predictions.index = y.index
         predictions = self.inverse_transform(predictions)
diff --git a/evalml/pipelines/utils.py b/evalml/pipelines/utils.py
index 1f4779ad07..897c3ce61d 100644
--- a/evalml/pipelines/utils.py
+++ b/evalml/pipelines/utils.py
@@ -235,19 +235,21 @@ def _get_decomposer(X, y, problem_type, estimator_class, sampler_name=None):
     if is_time_series(problem_type) and is_regression(problem_type):
         time_index = get_time_index(X, y, None)
         # If the time index frequency is uninferrable, STL will fail
-        if time_index.freq is None:
-            return components
-        freq = time_index.freq.name
-        if STLDecomposer.is_freq_valid(freq):
-            # Make sure there's a seasonal period
-            order = 3 if "Q" in freq else 5
-            seasonal_period = STLDecomposer.determine_periodicity(
-                X,
-                y,
-                rel_max_order=order,
-            )
-            if seasonal_period is not None and seasonal_period <= DECOMPOSER_PERIOD_CAP:
-                components.append(STLDecomposer)
+        # if time_index.freq is None:
+        #     return components
+        if time_index.freq is not None:
+            order = 3 if "Q" in time_index.freq.name else 5
+        else:
+            order = 5
+        # if STLDecomposer.is_freq_valid(freq):
+        # Make sure there's a seasonal period
+        seasonal_period = STLDecomposer.determine_periodicity(
+            X,
+            y,
+            rel_max_order=order,
+        )
+        if seasonal_period is not None and seasonal_period <= DECOMPOSER_PERIOD_CAP:
+            components.append(STLDecomposer)
     return components
 
 
diff --git a/evalml/utils/gen_utils.py b/evalml/utils/gen_utils.py
index 827930b8b6..e53142f44d 100644
--- a/evalml/utils/gen_utils.py
+++ b/evalml/utils/gen_utils.py
@@ -681,9 +681,9 @@ def get_time_index(X: pd.DataFrame, y: pd.Series, time_index_name: str):
                 )
     if not isinstance(dt_col, pd.DatetimeIndex) or dt_col.freq is None:
         dt_col = pd.DatetimeIndex(dt_col, freq="infer")
-    if dt_col.duplicated().any():
-        temp_dt_col = pd.DatetimeIndex(dt_col.copy().drop_duplicates(), freq="infer")
-        dt_col.freq = temp_dt_col.freq
+    # if dt_col.duplicated().any():
+    #     temp_dt_col = pd.DatetimeIndex(dt_col.copy().drop_duplicates(), freq="infer")
+    #     dt_col.freq = temp_dt_col.freq
     time_index = dt_col.rename(y.index.name)
     return time_index
 

From 189eb4943f161882c53fef444775315d0dc9718b Mon Sep 17 00:00:00 2001
From: remyogasawara <remyogasawara@gmail.com>
Date: Thu, 7 Sep 2023 10:43:01 -0700
Subject: [PATCH 06/12] handle series and df

---
 evalml/pipelines/time_series_pipeline_base.py | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/evalml/pipelines/time_series_pipeline_base.py b/evalml/pipelines/time_series_pipeline_base.py
index 0b82a86dcf..5a0a23e2af 100644
--- a/evalml/pipelines/time_series_pipeline_base.py
+++ b/evalml/pipelines/time_series_pipeline_base.py
@@ -265,8 +265,11 @@ def predict_in_sample(
             calculating_residuals=calculating_residuals,
         )
         predictions = self._estimator_predict(features)
-        if len(predictions.columns) == len(y.columns):
-            # predictions.columns = y.columns
+        if isinstance(predictions, pd.Series) and len(predictions) == len(y):
+            predictions = predictions.rename(self.input_target_name)
+        elif isinstance(predictions, pd.DataFrame) and len(predictions.columns) == len(
+            y.columns,
+        ):
             predictions = predictions.ww.rename(
                 dict(zip(predictions.columns, y.columns)),
             )

From 4a9ab0e973172fa9d2e4cf935962fbff3c5d0f82 Mon Sep 17 00:00:00 2001
From: remyogasawara <remyogasawara@gmail.com>
Date: Thu, 7 Sep 2023 16:15:16 -0700
Subject: [PATCH 07/12] fix stl graph

---
 evalml/pipelines/component_graph.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/evalml/pipelines/component_graph.py b/evalml/pipelines/component_graph.py
index 48e83807c1..ac85488ea2 100644
--- a/evalml/pipelines/component_graph.py
+++ b/evalml/pipelines/component_graph.py
@@ -806,10 +806,11 @@ def graph(self, name=None, graph_format=None):
                     [
                         key + " : " + "{:0.2f}".format(val)
                         if (isinstance(val, float))
-                        else key + " : " + str(val)
+                        else key + " : " + str(val).replace("{", "").replace("}", "")
                         for key, val in component_class.parameters.items()
                     ],
                 )  # noqa: W605
+
                 label = "%s |%s\l" % (component_name, parameters)  # noqa: W605
             graph.node(component_name, shape="record", label=label, nodesep="0.03")
 

From 2fd85fe9b4b27771a09b2f0645fd3522027d9299 Mon Sep 17 00:00:00 2001
From: remyogasawara <remyogasawara@gmail.com>
Date: Thu, 7 Sep 2023 17:04:06 -0700
Subject: [PATCH 08/12] fix if statements

---
 evalml/pipelines/time_series_pipeline_base.py | 6 ++----
 evalml/utils/gen_utils.py                     | 3 ---
 2 files changed, 2 insertions(+), 7 deletions(-)

diff --git a/evalml/pipelines/time_series_pipeline_base.py b/evalml/pipelines/time_series_pipeline_base.py
index 5a0a23e2af..37f163cd90 100644
--- a/evalml/pipelines/time_series_pipeline_base.py
+++ b/evalml/pipelines/time_series_pipeline_base.py
@@ -265,11 +265,9 @@ def predict_in_sample(
             calculating_residuals=calculating_residuals,
         )
         predictions = self._estimator_predict(features)
-        if isinstance(predictions, pd.Series) and len(predictions) == len(y):
+        if isinstance(predictions, pd.Series):
             predictions = predictions.rename(self.input_target_name)
-        elif isinstance(predictions, pd.DataFrame) and len(predictions.columns) == len(
-            y.columns,
-        ):
+        elif isinstance(predictions, pd.DataFrame):
             predictions = predictions.ww.rename(
                 dict(zip(predictions.columns, y.columns)),
             )
diff --git a/evalml/utils/gen_utils.py b/evalml/utils/gen_utils.py
index e53142f44d..ae7b444654 100644
--- a/evalml/utils/gen_utils.py
+++ b/evalml/utils/gen_utils.py
@@ -681,9 +681,6 @@ def get_time_index(X: pd.DataFrame, y: pd.Series, time_index_name: str):
                 )
     if not isinstance(dt_col, pd.DatetimeIndex) or dt_col.freq is None:
         dt_col = pd.DatetimeIndex(dt_col, freq="infer")
-    # if dt_col.duplicated().any():
-    #     temp_dt_col = pd.DatetimeIndex(dt_col.copy().drop_duplicates(), freq="infer")
-    #     dt_col.freq = temp_dt_col.freq
     time_index = dt_col.rename(y.index.name)
     return time_index
 

From 639408ab22ca40d3c65b9df9e6d05585d8d7e158 Mon Sep 17 00:00:00 2001
From: remyogasawara <remyogasawara@gmail.com>
Date: Thu, 7 Sep 2023 17:06:11 -0700
Subject: [PATCH 09/12] revert utils

---
 evalml/utils/gen_utils.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/evalml/utils/gen_utils.py b/evalml/utils/gen_utils.py
index ae7b444654..af253fa021 100644
--- a/evalml/utils/gen_utils.py
+++ b/evalml/utils/gen_utils.py
@@ -679,6 +679,7 @@ def get_time_index(X: pd.DataFrame, y: pd.Series, time_index_name: str):
                 raise ValueError(
                     f"Too many Datetime features provided in data and provided time_index column {time_index_name} not present in data.",
                 )
+
     if not isinstance(dt_col, pd.DatetimeIndex) or dt_col.freq is None:
         dt_col = pd.DatetimeIndex(dt_col, freq="infer")
     time_index = dt_col.rename(y.index.name)

From 12e1771bbc6bdab49c2324dd335c1872e25cab70 Mon Sep 17 00:00:00 2001
From: remyogasawara <remyogasawara@gmail.com>
Date: Thu, 7 Sep 2023 17:07:46 -0700
Subject: [PATCH 10/12] comments

---
 evalml/pipelines/utils.py | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/evalml/pipelines/utils.py b/evalml/pipelines/utils.py
index 897c3ce61d..255065b053 100644
--- a/evalml/pipelines/utils.py
+++ b/evalml/pipelines/utils.py
@@ -234,14 +234,10 @@ def _get_decomposer(X, y, problem_type, estimator_class, sampler_name=None):
     components = []
     if is_time_series(problem_type) and is_regression(problem_type):
         time_index = get_time_index(X, y, None)
-        # If the time index frequency is uninferrable, STL will fail
-        # if time_index.freq is None:
-        #     return components
         if time_index.freq is not None:
             order = 3 if "Q" in time_index.freq.name else 5
         else:
             order = 5
-        # if STLDecomposer.is_freq_valid(freq):
         # Make sure there's a seasonal period
         seasonal_period = STLDecomposer.determine_periodicity(
             X,

From 8ab7fdf1762e061052efe6bded8b694fafe018cd Mon Sep 17 00:00:00 2001
From: remyogasawara <remyogasawara@gmail.com>
Date: Fri, 8 Sep 2023 09:34:44 -0700
Subject: [PATCH 11/12] add comments and conditional branch

---
 evalml/pipelines/component_graph.py |  1 +
 evalml/pipelines/utils.py           | 24 ++++++++++++------------
 2 files changed, 13 insertions(+), 12 deletions(-)

diff --git a/evalml/pipelines/component_graph.py b/evalml/pipelines/component_graph.py
index ac85488ea2..0f3f4e5810 100644
--- a/evalml/pipelines/component_graph.py
+++ b/evalml/pipelines/component_graph.py
@@ -802,6 +802,7 @@ def graph(self, name=None, graph_format=None):
         for component_name, component_class in self.component_instances.items():
             label = "%s\l" % (component_name)  # noqa: W605
             if isinstance(component_class, ComponentBase):
+                # Reformat labels for nodes: cast values as strings, reformat floats to 2 decimal points and remove brackets from dictionary values so Digraph can parse it
                 parameters = "\\l".join(
                     [
                         key + " : " + "{:0.2f}".format(val)
diff --git a/evalml/pipelines/utils.py b/evalml/pipelines/utils.py
index 255065b053..d2ce680105 100644
--- a/evalml/pipelines/utils.py
+++ b/evalml/pipelines/utils.py
@@ -233,19 +233,19 @@ def _get_time_series_featurizer(X, y, problem_type, estimator_class, sampler_nam
 def _get_decomposer(X, y, problem_type, estimator_class, sampler_name=None):
     components = []
     if is_time_series(problem_type) and is_regression(problem_type):
-        time_index = get_time_index(X, y, None)
-        if time_index.freq is not None:
-            order = 3 if "Q" in time_index.freq.name else 5
-        else:
-            order = 5
-        # Make sure there's a seasonal period
-        seasonal_period = STLDecomposer.determine_periodicity(
-            X,
-            y,
-            rel_max_order=order,
-        )
-        if seasonal_period is not None and seasonal_period <= DECOMPOSER_PERIOD_CAP:
+        if is_multiseries(problem_type):
             components.append(STLDecomposer)
+        else:
+            time_index = get_time_index(X, y, None)
+            order = 3 if "Q" in time_index.freq.name else 5
+            # Make sure there's a seasonal period
+            seasonal_period = STLDecomposer.determine_periodicity(
+                X,
+                y,
+                rel_max_order=order,
+            )
+            if seasonal_period is not None and seasonal_period <= DECOMPOSER_PERIOD_CAP:
+                components.append(STLDecomposer)
     return components
 
 

From 28e2cdb8985993833607ce080c5eea53ab6cb3a6 Mon Sep 17 00:00:00 2001
From: remyogasawara <remyogasawara@gmail.com>
Date: Fri, 8 Sep 2023 10:51:03 -0700
Subject: [PATCH 12/12] fix condition for adding decomposer

---
 evalml/pipelines/utils.py                     | 26 ++++++++++++-------
 .../automl_tests/test_default_algorithm.py    |  4 +--
 .../automl_tests/test_iterative_algorithm.py  |  7 +++--
 3 files changed, 24 insertions(+), 13 deletions(-)

diff --git a/evalml/pipelines/utils.py b/evalml/pipelines/utils.py
index d2ce680105..e23998096d 100644
--- a/evalml/pipelines/utils.py
+++ b/evalml/pipelines/utils.py
@@ -237,15 +237,23 @@ def _get_decomposer(X, y, problem_type, estimator_class, sampler_name=None):
             components.append(STLDecomposer)
         else:
             time_index = get_time_index(X, y, None)
-            order = 3 if "Q" in time_index.freq.name else 5
-            # Make sure there's a seasonal period
-            seasonal_period = STLDecomposer.determine_periodicity(
-                X,
-                y,
-                rel_max_order=order,
-            )
-            if seasonal_period is not None and seasonal_period <= DECOMPOSER_PERIOD_CAP:
-                components.append(STLDecomposer)
+            # If the time index frequency is uninferrable, STL will fail
+            if time_index.freq is None:
+                return components
+            freq = time_index.freq.name
+            if STLDecomposer.is_freq_valid(freq):
+                # Make sure there's a seasonal period
+                order = 3 if "Q" in freq else 5
+                seasonal_period = STLDecomposer.determine_periodicity(
+                    X,
+                    y,
+                    rel_max_order=order,
+                )
+                if (
+                    seasonal_period is not None
+                    and seasonal_period <= DECOMPOSER_PERIOD_CAP
+                ):
+                    components.append(STLDecomposer)
     return components
 
 
diff --git a/evalml/tests/automl_tests/test_default_algorithm.py b/evalml/tests/automl_tests/test_default_algorithm.py
index b21cc452cb..31b8a166f7 100644
--- a/evalml/tests/automl_tests/test_default_algorithm.py
+++ b/evalml/tests/automl_tests/test_default_algorithm.py
@@ -670,7 +670,7 @@ def test_default_algorithm_multiseries_time_series(
     )
 
     first_batch = algo.next_batch()
-    assert len(first_batch) == 1
+    assert len(first_batch) == 2
     pipeline = first_batch[0]
     assert pipeline.model_family == ModelFamily.VARMAX
     assert pipeline.parameters["pipeline"] == search_parameters["pipeline"]
@@ -679,7 +679,7 @@ def test_default_algorithm_multiseries_time_series(
 
     long_explore = algo.next_batch()
     long_estimators = set([pipeline.estimator.name for pipeline in long_explore])
-    assert len(long_explore) == 50
+    assert len(long_explore) == 100
     assert len(long_estimators) == 1
 
 
diff --git a/evalml/tests/automl_tests/test_iterative_algorithm.py b/evalml/tests/automl_tests/test_iterative_algorithm.py
index f5ed9b73ac..3030c09909 100644
--- a/evalml/tests/automl_tests/test_iterative_algorithm.py
+++ b/evalml/tests/automl_tests/test_iterative_algorithm.py
@@ -18,11 +18,12 @@
     DateTimeFeaturizer,
     EmailFeaturizer,
     NaturalLanguageFeaturizer,
+    STLDecomposer,
     TimeSeriesFeaturizer,
     URLFeaturizer,
 )
 from evalml.pipelines.components.utils import get_estimators
-from evalml.pipelines.utils import make_pipeline
+from evalml.pipelines.utils import is_regression, make_pipeline
 from evalml.problem_types import ProblemTypes, is_multiseries, is_time_series
 
 
@@ -97,6 +98,7 @@ def test_iterative_algorithm_init(
     assert algo.batch_number == 0
     assert algo.default_max_batches == 1
     estimators = get_estimators(problem_type)
+    decomposer = [STLDecomposer] if is_regression(problem_type) else []
     assert len(algo.allowed_pipelines) == len(
         [
             make_pipeline(
@@ -107,7 +109,8 @@ def test_iterative_algorithm_init(
                 parameters=search_parameters,
             )
             for estimator in estimators
-        ],
+        ]
+        + decomposer,
     )