microsoft · thinkall · Sep 22, 2023 · Sep 13, 2023 · Sep 13, 2023 · Sep 13, 2023
diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml
@@ -64,10 +64,12 @@ jobs:
         if: matrix.os == 'ubuntu-latest'
         run: |
           pip install "ray[tune]<2.5.0"
-      - name: If mac, install ray
+      - name: If mac, install ray and xgboost 1
         if: matrix.os == 'macOS-latest'
         run: |
           pip install -e .[ray]
+          # use macOS to test xgboost 1, but macOS also supports xgboost 2
+          pip install "xgboost<2"
       - name: If linux or mac, install prophet on python < 3.9
         if: (matrix.os == 'macOS-latest' || matrix.os == 'ubuntu-latest') && matrix.python-version != '3.9' && matrix.python-version != '3.10'
         run: |

diff --git a/flaml/automl/automl.py b/flaml/automl/automl.py
@@ -606,7 +606,7 @@ def add_learner(self, learner_name, learner_class):
 
         Args:
             learner_name: A string of the learner's name.
-            learner_class: A subclass of flaml.model.BaseEstimator.
+            learner_class: A subclass of flaml.automl.model.BaseEstimator.
         """
         self._state.learner_classes[learner_name] = learner_class
 

diff --git a/flaml/automl/model.py b/flaml/automl/model.py
@@ -32,6 +32,7 @@
     from sklearn.ensemble import ExtraTreesRegressor, ExtraTreesClassifier
     from sklearn.linear_model import LogisticRegression
     from sklearn.dummy import DummyClassifier, DummyRegressor
+    from xgboost import __version__ as xgboost_version
 except ImportError:
     pass
 
@@ -212,10 +213,10 @@ def _fit(self, X_train, y_train, **kwargs):
         model = self.estimator_class(**self.params)
         if logger.level == logging.DEBUG:
             # xgboost 1.6 doesn't display all the params in the model str
-            logger.debug(f"flaml.model - {model} fit started with params {self.params}")
+            logger.debug(f"flaml.automl.model - {model} fit started with params {self.params}")
         model.fit(X_train, y_train, **kwargs)
         if logger.level == logging.DEBUG:
-            logger.debug(f"flaml.model - {model} fit finished")
+            logger.debug(f"flaml.automl.model - {model} fit finished")
         train_time = time.time() - current_time
         self._model = model
         return train_time
@@ -455,10 +456,10 @@ def _fit(self, df_train: sparkDataFrame, **kwargs):
         current_time = time.time()
         pipeline_model = self.estimator_class(**self.params, **kwargs)
         if logger.level == logging.DEBUG:
-            logger.debug(f"flaml.model - {pipeline_model} fit started with params {self.params}")
+            logger.debug(f"flaml.automl.model - {pipeline_model} fit started with params {self.params}")
         pipeline_model.fit(df_train)
         if logger.level == logging.DEBUG:
-            logger.debug(f"flaml.model - {pipeline_model} fit finished")
+            logger.debug(f"flaml.automl.model - {pipeline_model} fit finished")
         train_time = time.time() - current_time
         self._model = pipeline_model
         return train_time
@@ -690,12 +691,12 @@ def _fit(self, df_train: sparkDataFrame, **kwargs):
         current_time = time.time()
         model = self.estimator_class(**self.params, **kwargs)
         if logger.level == logging.DEBUG:
-            logger.debug(f"flaml.model - {model} fit started with params {self.params}")
+            logger.debug(f"flaml.automl.model - {model} fit started with params {self.params}")
         self._model = model.fit(df_train)
         self._model.classes_ = self.model_classes_
         self._model.n_classes_ = self.model_n_classes_
         if logger.level == logging.DEBUG:
-            logger.debug(f"flaml.model - {model} fit finished")
+            logger.debug(f"flaml.automl.model - {model} fit finished")
         train_time = time.time() - current_time
         return train_time
 
@@ -1412,7 +1413,7 @@ def fit(self, X_train, y_train, budget=None, free_mem_ratio=0, **kwargs):
                 callbacks = self.params.pop("callbacks")
                 self._model.set_params(callbacks=callbacks[:-1])
             best_iteration = (
-                self._model.get_booster().best_iteration
+                getattr(self._model.get_booster(), "best_iteration", None)
                 if isinstance(self, XGBoostSklearnEstimator)
                 else self._model.best_iteration_
             )
@@ -1510,8 +1511,6 @@ def config2params(self, config: dict) -> dict:
         # params["booster"] = params.get("booster", "gbtree")
 
         # use_label_encoder is deprecated in 1.7.
-        from xgboost import __version__ as xgboost_version
-
         if xgboost_version < "1.7.0":
             params["use_label_encoder"] = params.get("use_label_encoder", False)
         if "n_jobs" in config:
@@ -1559,7 +1558,7 @@ def fit(self, X_train, y_train, budget=None, free_mem_ratio=0, **kwargs):
                 obj=obj,
                 callbacks=callbacks,
             )
-            self.params["n_estimators"] = self._model.best_iteration + 1
+            self.params["n_estimators"] = getattr(self._model, "best_iteration", _n_estimators - 1) + 1
         else:
             self._model = xgb.train(self.params, dtrain, _n_estimators, obj=obj)
             self.params["n_estimators"] = _n_estimators
@@ -1620,7 +1619,9 @@ def config2params(self, config: dict) -> dict:
         if max_depth == 0:
             params["grow_policy"] = params.get("grow_policy", "lossguide")
             params["tree_method"] = params.get("tree_method", "hist")
-        params["use_label_encoder"] = params.get("use_label_encoder", False)
+        # use_label_encoder is deprecated in 1.7.
+        if xgboost_version < "1.7.0":
+            params["use_label_encoder"] = params.get("use_label_encoder", False)
         return params
 
     def __init__(

diff --git a/flaml/automl/time_series/ts_model.py b/flaml/automl/time_series/ts_model.py
@@ -22,7 +22,7 @@ class PD:
 import numpy as np
 
 from flaml import tune
-from flaml.model import (
+from flaml.automl.model import (
     suppress_stdout_stderr,
     SKLearnEstimator,
     logger,
@@ -33,7 +33,7 @@ class PD:
     XGBoostLimitDepthEstimator,
     CatBoostEstimator,
 )
-from flaml.data import TS_TIMESTAMP_COL, TS_VALUE_COL
+from flaml.automl.data import TS_TIMESTAMP_COL, TS_VALUE_COL
 from flaml.automl.time_series.ts_data import (
     TimeSeriesDataset,
     enrich_dataset,

diff --git a/flaml/data.py b/flaml/data.py
diff --git a/flaml/default/estimator.py b/flaml/default/estimator.py
@@ -105,7 +105,12 @@ def fit(self, X, y, *args, **params):
                 # if hasattr(self, "_classes"):
                 #     self._classes = self._label_transformer.classes_
                 # else:
-                self.classes_ = self._label_transformer.classes_
+                try:
+                    self.classes_ = self._label_transformer.classes_
+                except AttributeError:
+                    # xgboost 2: AttributeError: can't set attribute
+                    if "xgb" not in estimator_name:
+                        raise
                 if "xgb" not in estimator_name:
                     # rf and et would do inverse transform automatically; xgb doesn't
                     self._label_transformer = None

diff --git a/flaml/model.py b/flaml/model.py
diff --git a/flaml/version.py b/flaml/version.py
@@ -1 +1 @@
-__version__ = "2.1.0"
+__version__ = "2.1.1"
diff --git a/notebook/automl_classification.ipynb b/notebook/automl_classification.ipynb
@@ -80,7 +80,7 @@
    ],
    "source": [
     "from minio.error import ServerError\n",
-    "from flaml.data import load_openml_dataset\n",
+    "from flaml.automl.data import load_openml_dataset\n",
     "\n",
     "try:\n",
     "    X_train, X_test, y_train, y_test = load_openml_dataset(dataset_id=1169, data_dir='./')\n",
@@ -1252,7 +1252,7 @@
     }
    ],
    "source": [
-    "from flaml.data import get_output_from_log\n",
+    "from flaml.automl.data import get_output_from_log\n",
     "time_history, best_valid_loss_history, valid_loss_history, config_history, metric_history = \\\n",
     "    get_output_from_log(filename=settings['log_file_name'], time_budget=240)\n",
     "for config in config_history:\n",
@@ -1540,7 +1540,7 @@
    "outputs": [],
    "source": [
     "''' SKLearnEstimator is the super class for a sklearn learner '''\n",
-    "from flaml.model import SKLearnEstimator\n",
+    "from flaml.automl.model import SKLearnEstimator\n",
     "from flaml import tune\n",
     "from flaml.automl.task.task import CLASSIFICATION\n",
     "\n",