Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

support xgboost 2.0 #1219

Merged
merged 32 commits into from
Sep 22, 2023
Merged
Changes from all commits
Commits
Show all changes
32 commits
Select commit Hold shift + click to select a range
fafbba2
support xgboost 2.0
sonichi Sep 13, 2023
71b24d9
Merge branch 'main' into xgb2
sonichi Sep 13, 2023
9fc62f6
try classes_
sonichi Sep 13, 2023
732aaef
test version
sonichi Sep 13, 2023
9b8adf1
quote
sonichi Sep 13, 2023
bd8e3a2
use_label_encoder
sonichi Sep 14, 2023
6f1f26c
Fix xgboost test error
thinkall Sep 16, 2023
7db7acb
remove deprecated files
sonichi Sep 16, 2023
c479a04
remove deprecated files
sonichi Sep 16, 2023
9fe933c
Merge branch 'xgb2' of https://github.com/microsoft/FLAML into xgb2
sonichi Sep 16, 2023
7c843b8
remove deprecated import
sonichi Sep 16, 2023
26ad15e
replace deprecated import in integrate_spark.ipynb
levscaut Sep 19, 2023
cfbde33
replace deprecated import in automl_lightgbm.ipynb
levscaut Sep 19, 2023
a8d238a
formatted integrate_spark.ipynb
levscaut Sep 19, 2023
4c6e5b3
replace deprecated import
levscaut Sep 19, 2023
e4f9806
try fix driver python path
levscaut Sep 19, 2023
76b64ce
Update python-package.yml
levscaut Sep 19, 2023
a7ea2b9
replace deprecated reference
levscaut Sep 19, 2023
a139168
move spark python env var to other section
levscaut Sep 19, 2023
fb06c53
Update setup.py, install xgb<2 for MacOS
thinkall Sep 20, 2023
48131a1
Fix typo
thinkall Sep 20, 2023
0c7121f
assert
sonichi Sep 20, 2023
ee0c237
Try assert xgboost version
thinkall Sep 21, 2023
1369496
Merge branch 'xgb2' of https://github.com/microsoft/FLAML into xgb2
thinkall Sep 21, 2023
a5282d1
Fail fast
thinkall Sep 21, 2023
fa5fc47
Keep all test/spark to try fail fast
thinkall Sep 21, 2023
a090348
No need to skip spark test in Mac or Win
thinkall Sep 21, 2023
65883e6
Remove assert xgb version
thinkall Sep 21, 2023
40d4470
Remove fail fast
thinkall Sep 21, 2023
1826687
Found root cause, fix test_sparse_matrix_xgboost
thinkall Sep 21, 2023
6c3e1ab
Revert "No need to skip spark test in Mac or Win"
thinkall Sep 21, 2023
c0fc76e
remove assertion
sonichi Sep 22, 2023
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion .github/workflows/python-package.yml
Original file line number Diff line number Diff line change
@@ -64,10 +64,12 @@ jobs:
if: matrix.os == 'ubuntu-latest'
run: |
pip install "ray[tune]<2.5.0"
- name: If mac, install ray
- name: If mac, install ray and xgboost 1
if: matrix.os == 'macOS-latest'
run: |
pip install -e .[ray]
# use macOS to test xgboost 1, but macOS also supports xgboost 2
pip install "xgboost<2"
- name: If linux or mac, install prophet on python < 3.9
if: (matrix.os == 'macOS-latest' || matrix.os == 'ubuntu-latest') && matrix.python-version != '3.9' && matrix.python-version != '3.10'
run: |
2 changes: 1 addition & 1 deletion flaml/automl/automl.py
Original file line number Diff line number Diff line change
@@ -606,7 +606,7 @@ def add_learner(self, learner_name, learner_class):
Args:
learner_name: A string of the learner's name.
learner_class: A subclass of flaml.model.BaseEstimator.
learner_class: A subclass of flaml.automl.model.BaseEstimator.
"""
self._state.learner_classes[learner_name] = learner_class

23 changes: 12 additions & 11 deletions flaml/automl/model.py
Original file line number Diff line number Diff line change
@@ -32,6 +32,7 @@
from sklearn.ensemble import ExtraTreesRegressor, ExtraTreesClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.dummy import DummyClassifier, DummyRegressor
from xgboost import __version__ as xgboost_version
except ImportError:
pass

@@ -212,10 +213,10 @@ def _fit(self, X_train, y_train, **kwargs):
model = self.estimator_class(**self.params)
if logger.level == logging.DEBUG:
# xgboost 1.6 doesn't display all the params in the model str
logger.debug(f"flaml.model - {model} fit started with params {self.params}")
logger.debug(f"flaml.automl.model - {model} fit started with params {self.params}")
model.fit(X_train, y_train, **kwargs)
if logger.level == logging.DEBUG:
logger.debug(f"flaml.model - {model} fit finished")
logger.debug(f"flaml.automl.model - {model} fit finished")
train_time = time.time() - current_time
self._model = model
return train_time
@@ -455,10 +456,10 @@ def _fit(self, df_train: sparkDataFrame, **kwargs):
current_time = time.time()
pipeline_model = self.estimator_class(**self.params, **kwargs)
if logger.level == logging.DEBUG:
logger.debug(f"flaml.model - {pipeline_model} fit started with params {self.params}")
logger.debug(f"flaml.automl.model - {pipeline_model} fit started with params {self.params}")
pipeline_model.fit(df_train)
if logger.level == logging.DEBUG:
logger.debug(f"flaml.model - {pipeline_model} fit finished")
logger.debug(f"flaml.automl.model - {pipeline_model} fit finished")
train_time = time.time() - current_time
self._model = pipeline_model
return train_time
@@ -690,12 +691,12 @@ def _fit(self, df_train: sparkDataFrame, **kwargs):
current_time = time.time()
model = self.estimator_class(**self.params, **kwargs)
if logger.level == logging.DEBUG:
logger.debug(f"flaml.model - {model} fit started with params {self.params}")
logger.debug(f"flaml.automl.model - {model} fit started with params {self.params}")
self._model = model.fit(df_train)
self._model.classes_ = self.model_classes_
self._model.n_classes_ = self.model_n_classes_
if logger.level == logging.DEBUG:
logger.debug(f"flaml.model - {model} fit finished")
logger.debug(f"flaml.automl.model - {model} fit finished")
train_time = time.time() - current_time
return train_time

@@ -1412,7 +1413,7 @@ def fit(self, X_train, y_train, budget=None, free_mem_ratio=0, **kwargs):
callbacks = self.params.pop("callbacks")
self._model.set_params(callbacks=callbacks[:-1])
best_iteration = (
self._model.get_booster().best_iteration
getattr(self._model.get_booster(), "best_iteration", None)
if isinstance(self, XGBoostSklearnEstimator)
else self._model.best_iteration_
)
@@ -1510,8 +1511,6 @@ def config2params(self, config: dict) -> dict:
# params["booster"] = params.get("booster", "gbtree")

# use_label_encoder is deprecated in 1.7.
from xgboost import __version__ as xgboost_version

if xgboost_version < "1.7.0":
params["use_label_encoder"] = params.get("use_label_encoder", False)
if "n_jobs" in config:
@@ -1559,7 +1558,7 @@ def fit(self, X_train, y_train, budget=None, free_mem_ratio=0, **kwargs):
obj=obj,
callbacks=callbacks,
)
self.params["n_estimators"] = self._model.best_iteration + 1
self.params["n_estimators"] = getattr(self._model, "best_iteration", _n_estimators - 1) + 1
else:
self._model = xgb.train(self.params, dtrain, _n_estimators, obj=obj)
self.params["n_estimators"] = _n_estimators
@@ -1620,7 +1619,9 @@ def config2params(self, config: dict) -> dict:
if max_depth == 0:
params["grow_policy"] = params.get("grow_policy", "lossguide")
params["tree_method"] = params.get("tree_method", "hist")
params["use_label_encoder"] = params.get("use_label_encoder", False)
# use_label_encoder is deprecated in 1.7.
if xgboost_version < "1.7.0":
params["use_label_encoder"] = params.get("use_label_encoder", False)
return params

def __init__(
4 changes: 2 additions & 2 deletions flaml/automl/time_series/ts_model.py
Original file line number Diff line number Diff line change
@@ -22,7 +22,7 @@ class PD:
import numpy as np

from flaml import tune
from flaml.model import (
from flaml.automl.model import (
suppress_stdout_stderr,
SKLearnEstimator,
logger,
@@ -33,7 +33,7 @@ class PD:
XGBoostLimitDepthEstimator,
CatBoostEstimator,
)
from flaml.data import TS_TIMESTAMP_COL, TS_VALUE_COL
from flaml.automl.data import TS_TIMESTAMP_COL, TS_VALUE_COL
from flaml.automl.time_series.ts_data import (
TimeSeriesDataset,
enrich_dataset,
9 changes: 0 additions & 9 deletions flaml/data.py

This file was deleted.

7 changes: 6 additions & 1 deletion flaml/default/estimator.py
Original file line number Diff line number Diff line change
@@ -105,7 +105,12 @@ def fit(self, X, y, *args, **params):
# if hasattr(self, "_classes"):
# self._classes = self._label_transformer.classes_
# else:
self.classes_ = self._label_transformer.classes_
try:
self.classes_ = self._label_transformer.classes_
except AttributeError:
# xgboost 2: AttributeError: can't set attribute
if "xgb" not in estimator_name:
raise
if "xgb" not in estimator_name:
# rf and et would do inverse transform automatically; xgb doesn't
self._label_transformer = None
9 changes: 0 additions & 9 deletions flaml/model.py

This file was deleted.

2 changes: 1 addition & 1 deletion flaml/version.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = "2.1.0"
__version__ = "2.1.1"
6 changes: 3 additions & 3 deletions notebook/automl_classification.ipynb
Original file line number Diff line number Diff line change
@@ -80,7 +80,7 @@
],
"source": [
"from minio.error import ServerError\n",
"from flaml.data import load_openml_dataset\n",
"from flaml.automl.data import load_openml_dataset\n",
"\n",
"try:\n",
" X_train, X_test, y_train, y_test = load_openml_dataset(dataset_id=1169, data_dir='./')\n",
@@ -1252,7 +1252,7 @@
}
],
"source": [
"from flaml.data import get_output_from_log\n",
"from flaml.automl.data import get_output_from_log\n",
"time_history, best_valid_loss_history, valid_loss_history, config_history, metric_history = \\\n",
" get_output_from_log(filename=settings['log_file_name'], time_budget=240)\n",
"for config in config_history:\n",
@@ -1540,7 +1540,7 @@
"outputs": [],
"source": [
"''' SKLearnEstimator is the super class for a sklearn learner '''\n",
"from flaml.model import SKLearnEstimator\n",
"from flaml.automl.model import SKLearnEstimator\n",
"from flaml import tune\n",
"from flaml.automl.task.task import CLASSIFICATION\n",
"\n",
Loading