Merge branch 'main' into nested_univ

aeon-toolkit · Feb 27, 2023 · f6de9d3 · f6de9d3
2 parents 0c56387 + 9ab91e9
commit f6de9d3
Show file tree

Hide file tree

Showing 21 changed files with 45 additions and 271 deletions.
diff --git a/.all-contributorsrc b/.all-contributorsrc
@@ -2071,6 +2071,15 @@
         "contributions": [
           "code"
         ]
+      },
+    {
+        "login": "scorcism",
+        "name": "Abhishek Pathak",
+        "avatar_url": "https://avatars.githubusercontent.com/u/69761436",
+        "profile": "https://github.com/scorcism",
+        "contributions": [
+          "bug"
+        ]
       }
   ]
 }
diff --git a/CODEOWNERS b/CODEOWNERS
@@ -72,7 +72,6 @@ sktime/forecasting/fbprophet.py @aiwalter
 sktime/forecasting/bats.py @aiwalter
 sktime/forecasting/tbats.py @aiwalter
 sktime/forecasting/arima.py @HYang1996
-sktime/forecasting/adapters/_hcrystalball.py @MichalChromcak
 sktime/forecasting/statsforecast.py @FedericoGarza
 sktime/forecasting/structural.py @juanitorduz
 sktime/forecasting/model_selection/_split @koralturkk

diff --git a/docs/source/api_reference/forecasting.rst b/docs/source/api_reference/forecasting.rst
@@ -312,18 +312,6 @@ Online and stream forecasting
     UpdateRefitsEvery
     DontUpdate
 
-Adapters to other forecasting framework packages
-------------------------------------------------
-
-Generic framework adapters that expose other frameworks in the ``sktime`` interface.
-
-.. currentmodule:: sktime.forecasting.adapters
-
-.. autosummary::
-    :toctree: auto_generated/
-    :template: class.rst
-
-    HCrystalBallAdapter
 
 Model selection and tuning
 --------------------------

diff --git a/sktime/classification/convolution_based/_arsenal.py b/sktime/classification/convolution_based/_arsenal.py
@@ -219,7 +219,7 @@ def _fit(self, X, y):
                 train_time < time_limit
                 and self.n_estimators < self.contract_max_n_estimators
             ):
-                fit = Parallel(n_jobs=self._threads_to_use)(
+                fit = Parallel(n_jobs=self._threads_to_use, prefer="threads")(
                     delayed(self._fit_estimator)(
                         _clone_estimator(
                             base_rocket,
@@ -243,7 +243,7 @@ def _fit(self, X, y):
                 self.n_estimators += self._threads_to_use
                 train_time = time.time() - start_time
         else:
-            fit = Parallel(n_jobs=self._threads_to_use)(
+            fit = Parallel(n_jobs=self._threads_to_use, prefer="threads")(
                 delayed(self._fit_estimator)(
                     _clone_estimator(
                         base_rocket,
@@ -304,7 +304,7 @@ def _predict_proba(self, X) -> np.ndarray:
         y : array-like, shape = [n_instances, n_classes_]
             Predicted probabilities using the ordering in classes_.
         """
-        y_probas = Parallel(n_jobs=self._threads_to_use)(
+        y_probas = Parallel(n_jobs=self._threads_to_use, prefer="threads")(
             delayed(self._predict_proba_for_estimator)(
                 X,
                 self.estimators_[i],
@@ -341,7 +341,7 @@ def _get_train_probs(self, X, y) -> np.ndarray:
         if not self.save_transformed_data:
             raise ValueError("Currently only works with saved transform data from fit.")
 
-        p = Parallel(n_jobs=self._threads_to_use)(
+        p = Parallel(n_jobs=self._threads_to_use, prefer="threads")(
             delayed(self._train_probas_for_estimator)(
                 y,
                 i,

diff --git a/sktime/classification/dictionary_based/_boss.py b/sktime/classification/dictionary_based/_boss.py
@@ -695,7 +695,7 @@ def pairwise_distances(X, Y=None, use_boss_distance=False, n_jobs=1):
         distance_matrix = np.zeros((X.shape[0], Y.shape[0]))
 
         if effective_n_jobs(n_jobs) > 1:
-            Parallel(n_jobs=n_jobs, backend="threading")(
+            Parallel(n_jobs=n_jobs, prefer="threads")(
                 delayed(_dist_wrapper)(distance_matrix, X, Y, s, XX_row_norms, XY)
                 for s in gen_even_slices(_num_samples(X), effective_n_jobs(n_jobs))
             )

diff --git a/sktime/classification/dictionary_based/_muse.py b/sktime/classification/dictionary_based/_muse.py
@@ -198,7 +198,7 @@ def _fit(self, X, y):
         if self.variance and self.anova:
             raise ValueError("MUSE Warning: Please set either variance or anova.")
 
-        parallel_res = Parallel(n_jobs=self.n_jobs, backend="threading")(
+        parallel_res = Parallel(n_jobs=self.n_jobs, prefer="threads")(
             delayed(_parallel_fit)(
                 X,
                 y.copy(),  # no clue why, but this copy is required.
@@ -300,7 +300,7 @@ def _transform_words(self, X):
         if self.use_first_order_differences:
             X = self._add_first_order_differences(X)
 
-        parallel_res = Parallel(n_jobs=self._threads_to_use, backend="threading")(
+        parallel_res = Parallel(n_jobs=self._threads_to_use, prefer="threads")(
             delayed(_parallel_transform_words)(
                 X, self.window_sizes, self.SFA_transformers, ind
             )

diff --git a/sktime/classification/dictionary_based/_weasel.py b/sktime/classification/dictionary_based/_weasel.py
@@ -192,7 +192,7 @@ def _fit(self, X, y):
         self.window_sizes = list(range(self.min_window, self.max_window, win_inc))
         self.highest_bit = (math.ceil(math.log2(self.max_window))) + 1
 
-        parallel_res = Parallel(n_jobs=self.n_jobs, backend="threading")(
+        parallel_res = Parallel(n_jobs=self.n_jobs, prefer="threads")(
             delayed(_parallel_fit)(
                 X,
                 y,
@@ -279,7 +279,7 @@ def _predict_proba(self, X) -> np.ndarray:
             )
 
     def _transform_words(self, X):
-        parallel_res = Parallel(n_jobs=self._threads_to_use, backend="threading")(
+        parallel_res = Parallel(n_jobs=self._threads_to_use, prefer="threads")(
             delayed(transformer.transform)(X) for transformer in self.SFA_transformers
         )
         all_words = []

diff --git a/sktime/classification/early_classification/_teaser.py b/sktime/classification/early_classification/_teaser.py
@@ -172,7 +172,7 @@ def _fit(self, X, y):
         m = getattr(self.estimator, "n_jobs", None)
         threads = self._threads_to_use if m is None else 1
 
-        fit = Parallel(n_jobs=threads)(
+        fit = Parallel(n_jobs=threads, prefer="threads")(
             delayed(self._fit_estimator)(
                 X,
                 y,
@@ -231,7 +231,7 @@ def _predict_proba(self, X) -> Tuple[np.ndarray, np.ndarray]:
         threads = self._threads_to_use if m is None else 1
 
         # compute all new updates since then
-        out = Parallel(n_jobs=threads)(
+        out = Parallel(n_jobs=threads, prefer="threads")(
             delayed(self._predict_proba_for_estimator)(
                 X,
                 i,
@@ -304,7 +304,7 @@ def _update_predict_proba(self, X) -> Tuple[np.ndarray, np.ndarray]:
         threads = self._threads_to_use if m is None else 1
 
         # compute all new updates since then
-        out = Parallel(n_jobs=threads)(
+        out = Parallel(n_jobs=threads, prefer="threads")(
             delayed(self._predict_proba_for_estimator)(
                 X,
                 i,
@@ -487,7 +487,6 @@ def _predict_oc_classifier_n_timestamps(
     def _predict_oc_classifier(
         self, X_oc, n_consecutive_predictions, idx, estimator_preds, state_info
     ):
-
         # stores whether we have made a final decision on a prediction, if true
         # state info won't be edited in later time stamps
         finished = state_info[:, 1] >= n_consecutive_predictions

diff --git a/sktime/classification/interval_based/_cif.py b/sktime/classification/interval_based/_cif.py
@@ -194,7 +194,7 @@ def _fit(self, X, y):
         if self._max_interval < self._min_interval:
             self._max_interval = self._min_interval
 
-        fit = Parallel(n_jobs=self._threads_to_use)(
+        fit = Parallel(n_jobs=self._threads_to_use, prefer="threads")(
             delayed(self._fit_estimator)(
                 X,
                 y,
@@ -224,7 +224,7 @@ def _predict_proba(self, X) -> np.ndarray:
                 "that in the test data"
             )
 
-        y_probas = Parallel(n_jobs=self._threads_to_use)(
+        y_probas = Parallel(n_jobs=self._threads_to_use, prefer="threads")(
             delayed(self._predict_proba_for_estimator)(
                 X,
                 self.estimators_[i],

diff --git a/sktime/classification/interval_based/_drcif.py b/sktime/classification/interval_based/_drcif.py
@@ -300,7 +300,7 @@ def _fit(self, X, y):
                 train_time < time_limit
                 and self._n_estimators < self.contract_max_n_estimators
             ):
-                fit = Parallel(n_jobs=self._threads_to_use)(
+                fit = Parallel(n_jobs=self._threads_to_use, prefer="threads")(
                     delayed(self._fit_estimator)(
                         X,
                         X_p,
@@ -328,7 +328,7 @@ def _fit(self, X, y):
                 self._n_estimators += self._threads_to_use
                 train_time = time.time() - start_time
         else:
-            fit = Parallel(n_jobs=self._threads_to_use)(
+            fit = Parallel(n_jobs=self._threads_to_use, prefer="threads")(
                 delayed(self._fit_estimator)(
                     X,
                     X_p,
@@ -381,7 +381,7 @@ def _predict_proba(self, X) -> np.ndarray:
 
         X_d = np.diff(X, 1)
 
-        y_probas = Parallel(n_jobs=self._threads_to_use)(
+        y_probas = Parallel(n_jobs=self._threads_to_use, prefer="threads")(
             delayed(self._predict_proba_for_estimator)(
                 X,
                 X_p,
@@ -423,7 +423,7 @@ def _get_train_probs(self, X, y) -> np.ndarray:
         if not self.save_transformed_data:
             raise ValueError("Currently only works with saved transform data from fit.")
 
-        p = Parallel(n_jobs=self._threads_to_use)(
+        p = Parallel(n_jobs=self._threads_to_use, prefer="threads")(
             delayed(self._train_probas_for_estimator)(
                 y,
                 i,

diff --git a/sktime/classification/interval_based/_rise.py b/sktime/classification/interval_based/_rise.py
@@ -273,7 +273,7 @@ def _fit(self, X, y):
         ]
 
         # Parallel loop
-        worker_rets = Parallel(n_jobs=self._threads_to_use)(
+        worker_rets = Parallel(n_jobs=self._threads_to_use, prefer="threads")(
             delayed(_parallel_build_trees)(
                 X,
                 y,
@@ -348,7 +348,7 @@ def _predict_proba(self, X) -> np.ndarray:
         n_jobs, _, _ = _partition_estimators(self.n_estimators, self._threads_to_use)
 
         # Parallel loop
-        all_proba = Parallel(n_jobs=n_jobs)(
+        all_proba = Parallel(n_jobs=n_jobs, prefer="threads")(
             delayed(_predict_proba_for_estimator)(
                 X,
                 self.estimators_[i],
@@ -495,6 +495,7 @@ def acf(x, max_lag):
 #
 #     return y
 
+
 # @jit(parallel=True, cache=True, nopython=True)
 def matrix_acf(x, num_cases, max_lag):
     """Autocorrelation function transform.

diff --git a/sktime/classification/interval_based/_stsf.py b/sktime/classification/interval_based/_stsf.py
@@ -151,7 +151,7 @@ def _fit(self, X, y):
                     (rng.choice(cls_idx, size=average - class_counts[i]), balance_cases)
                 )
 
-        fit = Parallel(n_jobs=self._threads_to_use)(
+        fit = Parallel(n_jobs=self._threads_to_use, prefer="threads")(
             delayed(self._fit_estimator)(
                 X,
                 X_p,
@@ -213,7 +213,7 @@ def _predict_proba(self, X) -> np.ndarray:
         _, X_p = signal.periodogram(X)
         X_d = np.diff(X, 1)
 
-        y_probas = Parallel(n_jobs=self._threads_to_use)(
+        y_probas = Parallel(n_jobs=self._threads_to_use, prefer="threads")(
             delayed(self._predict_proba_for_estimator)(
                 X,
                 X_p,

diff --git a/sktime/classification/interval_based/_tsf.py b/sktime/classification/interval_based/_tsf.py
@@ -158,7 +158,7 @@ def _predict_proba(self, X) -> np.ndarray:
             Predicted probabilities
         """
         X = X.squeeze(1)
-        y_probas = Parallel(n_jobs=self.n_jobs)(
+        y_probas = Parallel(n_jobs=self.n_jobs, prefer="threads")(
             delayed(_predict_single_classifier_proba)(
                 X, self.estimators_[i], self.intervals_[i]
             )

diff --git a/sktime/classification/sklearn/_rotation_forest.py b/sktime/classification/sklearn/_rotation_forest.py
@@ -215,7 +215,7 @@ def fit(self, X, y):
                 train_time < time_limit
                 and self._n_estimators < self.contract_max_n_estimators
             ):
-                fit = Parallel(n_jobs=self._n_jobs)(
+                fit = Parallel(n_jobs=self._n_jobs, prefer="threads")(
                     delayed(self._fit_estimator)(
                         X,
                         X_cls_split,
@@ -237,7 +237,7 @@ def fit(self, X, y):
         else:
             self._n_estimators = self.n_estimators
 
-            fit = Parallel(n_jobs=self._n_jobs)(
+            fit = Parallel(n_jobs=self._n_jobs, prefer="threads")(
                 delayed(self._fit_estimator)(
                     X,
                     X_cls_split,
@@ -316,7 +316,7 @@ def predict_proba(self, X):
         # normalise the data.
         X = (X - self._min) / self._ptp
 
-        y_probas = Parallel(n_jobs=self._n_jobs)(
+        y_probas = Parallel(n_jobs=self._n_jobs, prefer="threads")(
             delayed(self._predict_proba_for_estimator)(
                 X,
                 self.estimators_[i],
@@ -364,7 +364,7 @@ def _get_train_probs(self, X, y):
         if not self.save_transformed_data:
             raise ValueError("Currently only works with saved transform data from fit.")
 
-        p = Parallel(n_jobs=self._n_jobs)(
+        p = Parallel(n_jobs=self._n_jobs, prefer="threads")(
             delayed(self._train_probas_for_estimator)(
                 y,
                 i,

diff --git a/sktime/classification/tests/test_base.py b/sktime/classification/tests/test_base.py
@@ -276,8 +276,8 @@ def _check_classifier_input(X, y=None, enforce_min_instances=1):
     _check_classifier_input(test_X2, test_y2)
     # 2. Test correct: X: pd.DataFrame with 1 (univariate) and 3 cols(multivariate) vs
     # y:np.array and np.Series
-    test_X3 = _create_nested_dataframe(5, 1, 10)
-    test_X4 = _create_nested_dataframe(5, 3, 10)
+    test_X3 = _create_example_dataframe(5, 1, 10)
+    test_X4 = _create_example_dataframe(5, 3, 10)
     _check_classifier_input(test_X3, test_y1)
     _check_classifier_input(test_X4, test_y1)
     _check_classifier_input(test_X3, test_y2)
@@ -308,16 +308,6 @@ def _create_example_dataframe(cases=5, dimensions=1, length=10):
     return test_X
 
 
-def _create_nested_dataframe(cases=5, dimensions=1, length=10):
-    testy = pd.DataFrame(dtype=np.float32)
-    for i in range(0, dimensions):
-        instance_list = []
-        for _ in range(0, cases):
-            instance_list.append(pd.Series(np.random.randn(length)))
-        testy["dimension_" + str(i + 1)] = instance_list
-    return testy
-
-
 def _create_unequal_length_nested_dataframe(cases=5, dimensions=1, length=10):
     testy = pd.DataFrame(dtype=np.float32)
     for i in range(0, dimensions):

diff --git a/sktime/forecasting/adapters/__init__.py b/sktime/forecasting/adapters/__init__.py