Skip to content

Commit 868e7dd

Browse files
sonichithinkalllevscaut
authored
support xgboost 2.0 (#1219)
* support xgboost 2.0 * try classes_ * test version * quote * use_label_encoder * Fix xgboost test error * remove deprecated files * remove deprecated files * remove deprecated import * replace deprecated import in integrate_spark.ipynb * replace deprecated import in automl_lightgbm.ipynb * formatted integrate_spark.ipynb * replace deprecated import * try fix driver python path * Update python-package.yml * replace deprecated reference * move spark python env var to other section * Update setup.py, install xgb<2 for MacOS * Fix typo * assert * Try assert xgboost version * Fail fast * Keep all test/spark to try fail fast * No need to skip spark test in Mac or Win * Remove assert xgb version * Remove fail fast * Found root cause, fix test_sparse_matrix_xgboost * Revert "No need to skip spark test in Mac or Win" This reverts commit a090348. * remove assertion --------- Co-authored-by: Li Jiang <bnujli@gmail.com> Co-authored-by: levscaut <57213911+levscaut@users.noreply.github.com> Co-authored-by: levscaut <lwd2010530@qq.com> Co-authored-by: Li Jiang <lijiang1@microsoft.com>
1 parent 4886cb5 commit 868e7dd

22 files changed

+576
-440
lines changed

.github/workflows/python-package.yml

+3-1
Original file line numberDiff line numberDiff line change
@@ -64,10 +64,12 @@ jobs:
6464
if: matrix.os == 'ubuntu-latest'
6565
run: |
6666
pip install "ray[tune]<2.5.0"
67-
- name: If mac, install ray
67+
- name: If mac, install ray and xgboost 1
6868
if: matrix.os == 'macOS-latest'
6969
run: |
7070
pip install -e .[ray]
71+
# use macOS to test xgboost 1, but macOS also supports xgboost 2
72+
pip install "xgboost<2"
7173
- name: If linux or mac, install prophet on python < 3.9
7274
if: (matrix.os == 'macOS-latest' || matrix.os == 'ubuntu-latest') && matrix.python-version != '3.9' && matrix.python-version != '3.10'
7375
run: |

flaml/automl/automl.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -606,7 +606,7 @@ def add_learner(self, learner_name, learner_class):
606606
607607
Args:
608608
learner_name: A string of the learner's name.
609-
learner_class: A subclass of flaml.model.BaseEstimator.
609+
learner_class: A subclass of flaml.automl.model.BaseEstimator.
610610
"""
611611
self._state.learner_classes[learner_name] = learner_class
612612

flaml/automl/model.py

+12-11
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@
3232
from sklearn.ensemble import ExtraTreesRegressor, ExtraTreesClassifier
3333
from sklearn.linear_model import LogisticRegression
3434
from sklearn.dummy import DummyClassifier, DummyRegressor
35+
from xgboost import __version__ as xgboost_version
3536
except ImportError:
3637
pass
3738

@@ -212,10 +213,10 @@ def _fit(self, X_train, y_train, **kwargs):
212213
model = self.estimator_class(**self.params)
213214
if logger.level == logging.DEBUG:
214215
# xgboost 1.6 doesn't display all the params in the model str
215-
logger.debug(f"flaml.model - {model} fit started with params {self.params}")
216+
logger.debug(f"flaml.automl.model - {model} fit started with params {self.params}")
216217
model.fit(X_train, y_train, **kwargs)
217218
if logger.level == logging.DEBUG:
218-
logger.debug(f"flaml.model - {model} fit finished")
219+
logger.debug(f"flaml.automl.model - {model} fit finished")
219220
train_time = time.time() - current_time
220221
self._model = model
221222
return train_time
@@ -455,10 +456,10 @@ def _fit(self, df_train: sparkDataFrame, **kwargs):
455456
current_time = time.time()
456457
pipeline_model = self.estimator_class(**self.params, **kwargs)
457458
if logger.level == logging.DEBUG:
458-
logger.debug(f"flaml.model - {pipeline_model} fit started with params {self.params}")
459+
logger.debug(f"flaml.automl.model - {pipeline_model} fit started with params {self.params}")
459460
pipeline_model.fit(df_train)
460461
if logger.level == logging.DEBUG:
461-
logger.debug(f"flaml.model - {pipeline_model} fit finished")
462+
logger.debug(f"flaml.automl.model - {pipeline_model} fit finished")
462463
train_time = time.time() - current_time
463464
self._model = pipeline_model
464465
return train_time
@@ -690,12 +691,12 @@ def _fit(self, df_train: sparkDataFrame, **kwargs):
690691
current_time = time.time()
691692
model = self.estimator_class(**self.params, **kwargs)
692693
if logger.level == logging.DEBUG:
693-
logger.debug(f"flaml.model - {model} fit started with params {self.params}")
694+
logger.debug(f"flaml.automl.model - {model} fit started with params {self.params}")
694695
self._model = model.fit(df_train)
695696
self._model.classes_ = self.model_classes_
696697
self._model.n_classes_ = self.model_n_classes_
697698
if logger.level == logging.DEBUG:
698-
logger.debug(f"flaml.model - {model} fit finished")
699+
logger.debug(f"flaml.automl.model - {model} fit finished")
699700
train_time = time.time() - current_time
700701
return train_time
701702

@@ -1412,7 +1413,7 @@ def fit(self, X_train, y_train, budget=None, free_mem_ratio=0, **kwargs):
14121413
callbacks = self.params.pop("callbacks")
14131414
self._model.set_params(callbacks=callbacks[:-1])
14141415
best_iteration = (
1415-
self._model.get_booster().best_iteration
1416+
getattr(self._model.get_booster(), "best_iteration", None)
14161417
if isinstance(self, XGBoostSklearnEstimator)
14171418
else self._model.best_iteration_
14181419
)
@@ -1510,8 +1511,6 @@ def config2params(self, config: dict) -> dict:
15101511
# params["booster"] = params.get("booster", "gbtree")
15111512

15121513
# use_label_encoder is deprecated in 1.7.
1513-
from xgboost import __version__ as xgboost_version
1514-
15151514
if xgboost_version < "1.7.0":
15161515
params["use_label_encoder"] = params.get("use_label_encoder", False)
15171516
if "n_jobs" in config:
@@ -1559,7 +1558,7 @@ def fit(self, X_train, y_train, budget=None, free_mem_ratio=0, **kwargs):
15591558
obj=obj,
15601559
callbacks=callbacks,
15611560
)
1562-
self.params["n_estimators"] = self._model.best_iteration + 1
1561+
self.params["n_estimators"] = getattr(self._model, "best_iteration", _n_estimators - 1) + 1
15631562
else:
15641563
self._model = xgb.train(self.params, dtrain, _n_estimators, obj=obj)
15651564
self.params["n_estimators"] = _n_estimators
@@ -1620,7 +1619,9 @@ def config2params(self, config: dict) -> dict:
16201619
if max_depth == 0:
16211620
params["grow_policy"] = params.get("grow_policy", "lossguide")
16221621
params["tree_method"] = params.get("tree_method", "hist")
1623-
params["use_label_encoder"] = params.get("use_label_encoder", False)
1622+
# use_label_encoder is deprecated in 1.7.
1623+
if xgboost_version < "1.7.0":
1624+
params["use_label_encoder"] = params.get("use_label_encoder", False)
16241625
return params
16251626

16261627
def __init__(

flaml/automl/time_series/ts_model.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ class PD:
2222
import numpy as np
2323

2424
from flaml import tune
25-
from flaml.model import (
25+
from flaml.automl.model import (
2626
suppress_stdout_stderr,
2727
SKLearnEstimator,
2828
logger,
@@ -33,7 +33,7 @@ class PD:
3333
XGBoostLimitDepthEstimator,
3434
CatBoostEstimator,
3535
)
36-
from flaml.data import TS_TIMESTAMP_COL, TS_VALUE_COL
36+
from flaml.automl.data import TS_TIMESTAMP_COL, TS_VALUE_COL
3737
from flaml.automl.time_series.ts_data import (
3838
TimeSeriesDataset,
3939
enrich_dataset,

flaml/data.py

-9
This file was deleted.

flaml/default/estimator.py

+6-1
Original file line numberDiff line numberDiff line change
@@ -105,7 +105,12 @@ def fit(self, X, y, *args, **params):
105105
# if hasattr(self, "_classes"):
106106
# self._classes = self._label_transformer.classes_
107107
# else:
108-
self.classes_ = self._label_transformer.classes_
108+
try:
109+
self.classes_ = self._label_transformer.classes_
110+
except AttributeError:
111+
# xgboost 2: AttributeError: can't set attribute
112+
if "xgb" not in estimator_name:
113+
raise
109114
if "xgb" not in estimator_name:
110115
# rf and et would do inverse transform automatically; xgb doesn't
111116
self._label_transformer = None

flaml/model.py

-9
This file was deleted.

flaml/version.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
__version__ = "2.1.0"
1+
__version__ = "2.1.1"

notebook/automl_classification.ipynb

+3-3
Original file line numberDiff line numberDiff line change
@@ -80,7 +80,7 @@
8080
],
8181
"source": [
8282
"from minio.error import ServerError\n",
83-
"from flaml.data import load_openml_dataset\n",
83+
"from flaml.automl.data import load_openml_dataset\n",
8484
"\n",
8585
"try:\n",
8686
" X_train, X_test, y_train, y_test = load_openml_dataset(dataset_id=1169, data_dir='./')\n",
@@ -1252,7 +1252,7 @@
12521252
}
12531253
],
12541254
"source": [
1255-
"from flaml.data import get_output_from_log\n",
1255+
"from flaml.automl.data import get_output_from_log\n",
12561256
"time_history, best_valid_loss_history, valid_loss_history, config_history, metric_history = \\\n",
12571257
" get_output_from_log(filename=settings['log_file_name'], time_budget=240)\n",
12581258
"for config in config_history:\n",
@@ -1540,7 +1540,7 @@
15401540
"outputs": [],
15411541
"source": [
15421542
"''' SKLearnEstimator is the super class for a sklearn learner '''\n",
1543-
"from flaml.model import SKLearnEstimator\n",
1543+
"from flaml.automl.model import SKLearnEstimator\n",
15441544
"from flaml import tune\n",
15451545
"from flaml.automl.task.task import CLASSIFICATION\n",
15461546
"\n",

0 commit comments

Comments
 (0)