diff --git a/demo/guide-python/cross_validation.py b/demo/guide-python/cross_validation.py index 4e537108aa1a..a33a16c36f04 100644 --- a/demo/guide-python/cross_validation.py +++ b/demo/guide-python/cross_validation.py @@ -2,6 +2,7 @@ Demo for using cross validation =============================== """ + import os import numpy as np @@ -83,9 +84,12 @@ def logregobj(preds, dtrain): def evalerror(preds, dtrain): labels = dtrain.get_label() + preds = 1.0 / (1.0 + np.exp(-preds)) return "error", float(sum(labels != (preds > 0.0))) / len(labels) param = {"max_depth": 2, "eta": 1} # train with customized objective -xgb.cv(param, dtrain, num_round, nfold=5, seed=0, obj=logregobj, feval=evalerror) +xgb.cv( + param, dtrain, num_round, nfold=5, seed=0, obj=logregobj, custom_metric=evalerror +) diff --git a/doc/python/python_api.rst b/doc/python/python_api.rst index a8999e119ab4..5398fb5d091f 100644 --- a/doc/python/python_api.rst +++ b/doc/python/python_api.rst @@ -37,6 +37,7 @@ Core Data Structure .. autoclass:: xgboost.Booster :members: :show-inheritance: + :special-members: __getitem__ .. autoclass:: xgboost.DataIter :members: diff --git a/python-package/xgboost/dask/__init__.py b/python-package/xgboost/dask/__init__.py index 6c92e9205dc9..b2fc191f1c02 100644 --- a/python-package/xgboost/dask/__init__.py +++ b/python-package/xgboost/dask/__init__.py @@ -766,7 +766,6 @@ async def _train_async( num_boost_round: int, evals: Optional[Sequence[Tuple[DaskDMatrix, str]]], obj: Optional[Objective], - feval: Optional[Metric], early_stopping_rounds: Optional[int], verbose_eval: Union[int, bool], xgb_model: Optional[Booster], @@ -816,7 +815,6 @@ def do_train( # pylint: disable=too-many-positional-arguments evals_result=local_history, evals=evals if len(evals) != 0 else None, obj=obj, - feval=feval, custom_metric=custom_metric, early_stopping_rounds=early_stopping_rounds, verbose_eval=verbose_eval, @@ -870,7 +868,6 @@ def train( # pylint: disable=unused-argument *, evals: Optional[Sequence[Tuple[DaskDMatrix, str]]] = None, obj: Optional[Objective] = None, - feval: Optional[Metric] = None, early_stopping_rounds: Optional[int] = None, xgb_model: Optional[Booster] = None, verbose_eval: Union[int, bool] = True, @@ -1675,7 +1672,6 @@ async def _fit_async( num_boost_round=self.get_num_boosting_rounds(), evals=evals, obj=obj, - feval=None, custom_metric=metric, verbose_eval=verbose, early_stopping_rounds=self.early_stopping_rounds, @@ -1784,7 +1780,6 @@ async def _fit_async( num_boost_round=self.get_num_boosting_rounds(), evals=evals, obj=obj, - feval=None, custom_metric=metric, verbose_eval=verbose, early_stopping_rounds=self.early_stopping_rounds, @@ -1986,7 +1981,6 @@ async def _fit_async( num_boost_round=self.get_num_boosting_rounds(), evals=evals, obj=None, - feval=None, custom_metric=metric, verbose_eval=verbose, early_stopping_rounds=self.early_stopping_rounds, diff --git a/python-package/xgboost/sklearn.py b/python-package/xgboost/sklearn.py index c337505f7641..b197539bfc1f 100644 --- a/python-package/xgboost/sklearn.py +++ b/python-package/xgboost/sklearn.py @@ -426,7 +426,7 @@ def task(i: int) -> float: Metric used for monitoring the training result and early stopping. It can be a string or list of strings as names of predefined metric in XGBoost (See - doc/parameter.rst), one of the metrics in :py:mod:`sklearn.metrics`, or any + :doc:`/parameter`), one of the metrics in :py:mod:`sklearn.metrics`, or any other user defined metric that looks like `sklearn.metrics`. If custom objective is also provided, then custom metric should implement the diff --git a/python-package/xgboost/testing/__init__.py b/python-package/xgboost/testing/__init__.py index 80e0ad2db1f5..0821aee913c3 100644 --- a/python-package/xgboost/testing/__init__.py +++ b/python-package/xgboost/testing/__init__.py @@ -662,9 +662,29 @@ def predictor_equal(lhs: xgb.DMatrix, rhs: xgb.DMatrix) -> bool: M = TypeVar("M", xgb.Booster, xgb.XGBModel) -def eval_error_metric(predt: np.ndarray, dtrain: xgb.DMatrix) -> Tuple[str, np.float64]: - """Evaluation metric for xgb.train""" +def logregobj(preds: np.ndarray, dtrain: xgb.DMatrix) -> Tuple[np.ndarray, np.ndarray]: + """Binary regression custom objective.""" + labels = dtrain.get_label() + preds = 1.0 / (1.0 + np.exp(-preds)) + grad = preds - labels + hess = preds * (1.0 - preds) + return grad, hess + + +def eval_error_metric( + predt: np.ndarray, dtrain: xgb.DMatrix, rev_link: bool +) -> Tuple[str, np.float64]: + """Evaluation metric for xgb.train. + + Parameters + ---------- + rev_link : Whether the metric needs to apply the reverse link function (activation). + + """ label = dtrain.get_label() + if rev_link: + predt = 1.0 / (1.0 + np.exp(-predt)) + assert (0.0 <= predt).all() and (predt <= 1.0).all() r = np.zeros(predt.shape) gt = predt > 0.5 if predt.size == 0: @@ -675,8 +695,15 @@ def eval_error_metric(predt: np.ndarray, dtrain: xgb.DMatrix) -> Tuple[str, np.f return "CustomErr", np.sum(r) -def eval_error_metric_skl(y_true: np.ndarray, y_score: np.ndarray) -> np.float64: +def eval_error_metric_skl( + y_true: np.ndarray, y_score: np.ndarray, rev_link: bool = False +) -> np.float64: """Evaluation metric that looks like metrics provided by sklearn.""" + + if rev_link: + y_score = 1.0 / (1.0 + np.exp(-y_score)) + assert (0.0 <= y_score).all() and (y_score <= 1.0).all() + r = np.zeros(y_score.shape) gt = y_score > 0.5 r[gt] = 1 - y_true[gt] diff --git a/python-package/xgboost/training.py b/python-package/xgboost/training.py index 86370469a400..29a516e81e24 100644 --- a/python-package/xgboost/training.py +++ b/python-package/xgboost/training.py @@ -3,7 +3,6 @@ """Training Library containing training routines.""" import copy import os -import warnings from typing import Any, Dict, Iterable, List, Optional, Sequence, Tuple, Union, cast import numpy as np @@ -28,26 +27,6 @@ _CVFolds = Sequence["CVPack"] -def _configure_custom_metric( - feval: Optional[Metric], custom_metric: Optional[Metric] -) -> Optional[Metric]: - if feval is not None: - link = ( - "https://xgboost.readthedocs.io/en/latest/tutorials/custom_metric_obj.html" - ) - warnings.warn( - "`feval` is deprecated, use `custom_metric` instead. They have " - "different behavior when custom objective is also used." - f"See {link} for details on the `custom_metric`." - ) - if feval is not None and custom_metric is not None: - raise ValueError( - "Both `feval` and `custom_metric` are supplied. Use `custom_metric` instead." - ) - eval_metric = custom_metric if custom_metric is not None else feval - return eval_metric - - @_deprecate_positional_args def train( params: Dict[str, Any], @@ -56,7 +35,6 @@ def train( *, evals: Optional[Sequence[Tuple[DMatrix, str]]] = None, obj: Optional[Objective] = None, - feval: Optional[Metric] = None, maximize: Optional[bool] = None, early_stopping_rounds: Optional[int] = None, evals_result: Optional[TrainingCallback.EvalsLog] = None, @@ -81,23 +59,27 @@ def train( obj Custom objective function. See :doc:`Custom Objective ` for details. - feval : - .. deprecated:: 1.6.0 - Use `custom_metric` instead. maximize : - Whether to maximize feval. + Whether to maximize custom_metric. + early_stopping_rounds : + Activates early stopping. Validation metric needs to improve at least once in every **early_stopping_rounds** round(s) to continue training. + Requires at least one item in **evals**. + The method returns the model from the last iteration (not the best one). Use - custom callback or model slicing if the best model is desired. - If there's more than one item in **evals**, the last entry will be used for early - stopping. + custom callback :py:class:`~xgboost.callback.EarlyStopping` or :py:meth:`model + slicing ` if the best model is desired. If there's + more than one item in **evals**, the last entry will be used for early stopping. + If there's more than one metric in the **eval_metric** parameter given in **params**, the last metric will be used for early stopping. + If early stopping occurs, the model will have two additional fields: ``bst.best_score``, ``bst.best_iteration``. + evals_result : This dictionary stores the evaluation results of all the items in watchlist. @@ -113,15 +95,22 @@ def train( verbose_eval : Requires at least one item in **evals**. + If **verbose_eval** is True then the evaluation metric on the validation set is printed at each boosting stage. - If **verbose_eval** is an integer then the evaluation metric on the validation set - is printed at every given **verbose_eval** boosting stage. The last boosting stage - / the boosting stage found by using **early_stopping_rounds** is also printed. - Example: with ``verbose_eval=4`` and at least one item in **evals**, an evaluation metric - is printed every 4 boosting stages, instead of every boosting stage. + + If **verbose_eval** is an integer then the evaluation metric on the validation + set is printed at every given **verbose_eval** boosting stage. The last boosting + stage / the boosting stage found by using **early_stopping_rounds** is also + printed. + + Example: with ``verbose_eval=4`` and at least one item in **evals**, an + evaluation metric is printed every 4 boosting stages, instead of every boosting + stage. + xgb_model : Xgb model to be loaded before training (allows training continuation). + callbacks : List of callback functions that are applied at end of each iteration. It is possible to use predefined callbacks by using @@ -145,15 +134,17 @@ def train( .. versionadded 1.6.0 Custom metric function. See :doc:`Custom Metric ` - for details. + for details. The metric receives transformed prediction (after applying the + reverse link function) when using a builtin objective, and raw output when using + a custom objective. Returns ------- Booster : a trained booster model + """ callbacks = [] if callbacks is None else copy.copy(list(callbacks)) - metric_fn = _configure_custom_metric(feval, custom_metric) evals = list(evals) if evals else [] bst = Booster(params, [dtrain] + [d[0] for d in evals], model_file=xgb_model) @@ -165,12 +156,7 @@ def train( if early_stopping_rounds: callbacks.append(EarlyStopping(rounds=early_stopping_rounds, maximize=maximize)) cb_container = CallbackContainer( - callbacks, - metric=metric_fn, - # For old `feval` parameter, the behavior is unchanged. For the new - # `custom_metric`, it will receive proper prediction result when custom objective - # is not used. - output_margin=callable(obj) or metric_fn is feval, + callbacks, metric=custom_metric, output_margin=callable(obj) ) bst = cb_container.before_training(bst) @@ -423,7 +409,6 @@ def cv( folds: XGBStratifiedKFold = None, metrics: Sequence[str] = (), obj: Optional[Objective] = None, - feval: Optional[Metric] = None, maximize: Optional[bool] = None, early_stopping_rounds: Optional[int] = None, fpreproc: Optional[FPreProcCallable] = None, @@ -464,11 +449,9 @@ def cv( Custom objective function. See :doc:`Custom Objective ` for details. - feval : function - .. deprecated:: 1.6.0 - Use `custom_metric` instead. maximize : bool - Whether to maximize feval. + Whether to maximize the evaluataion metric (score or error). + early_stopping_rounds: int Activates early stopping. Cross-Validation metric (average of validation metric computed over CV folds) needs to improve at least once in @@ -559,8 +542,6 @@ def cv( shuffle=shuffle, ) - metric_fn = _configure_custom_metric(feval, custom_metric) - # setup callbacks callbacks = [] if callbacks is None else copy.copy(list(callbacks)) @@ -570,10 +551,7 @@ def cv( if early_stopping_rounds: callbacks.append(EarlyStopping(rounds=early_stopping_rounds, maximize=maximize)) callbacks_container = CallbackContainer( - callbacks, - metric=metric_fn, - is_cv=True, - output_margin=callable(obj) or metric_fn is feval, + callbacks, metric=custom_metric, is_cv=True, output_margin=callable(obj) ) booster = _PackedBooster(cvfolds) diff --git a/tests/ci_build/lint_python.py b/tests/ci_build/lint_python.py index 8ee0b4e8e692..d0ef625fa008 100644 --- a/tests/ci_build/lint_python.py +++ b/tests/ci_build/lint_python.py @@ -27,6 +27,7 @@ class LintersPaths: "tests/python/test_dt.py", "tests/python/test_demos.py", "tests/python/test_eval_metrics.py", + "tests/python/test_early_stopping.py", "tests/python/test_multi_target.py", "tests/python/test_objectives.py", "tests/python/test_predict.py", @@ -54,6 +55,7 @@ class LintersPaths: "demo/guide-python/callbacks.py", "demo/guide-python/categorical.py", "demo/guide-python/cat_pipeline.py", + "demo/guide-python/cross_validation.py", "demo/guide-python/feature_weights.py", "demo/guide-python/model_parser.py", "demo/guide-python/sklearn_parallel.py", diff --git a/tests/python/test_basic_models.py b/tests/python/test_basic_models.py index 3e945546e13b..b24152e5dc9a 100644 --- a/tests/python/test_basic_models.py +++ b/tests/python/test_basic_models.py @@ -1,6 +1,7 @@ import json import os import tempfile +from typing import Optional import numpy as np import pytest @@ -17,38 +18,49 @@ class TestModels: def test_glm(self): - param = {'objective': 'binary:logistic', - 'booster': 'gblinear', 'alpha': 0.0001, 'lambda': 1, - 'nthread': 1} + param = { + "objective": "binary:logistic", + "booster": "gblinear", + "alpha": 0.0001, + "lambda": 1, + "nthread": 1, + } dtrain, dtest = tm.load_agaricus(__file__) - watchlist = [(dtest, 'eval'), (dtrain, 'train')] + watchlist = [(dtest, "eval"), (dtrain, "train")] num_round = 4 bst = xgb.train(param, dtrain, num_round, watchlist) assert isinstance(bst, xgb.core.Booster) preds = bst.predict(dtest) labels = dtest.get_label() - err = sum(1 for i in range(len(preds)) - if int(preds[i] > 0.5) != labels[i]) / float(len(preds)) + err = sum( + 1 for i in range(len(preds)) if int(preds[i] > 0.5) != labels[i] + ) / float(len(preds)) assert err < 0.2 def test_dart(self): dtrain, dtest = tm.load_agaricus(__file__) - param = {'max_depth': 5, 'objective': 'binary:logistic', - 'eval_metric': 'logloss', 'booster': 'dart', 'verbosity': 1} + param = { + "max_depth": 5, + "objective": "binary:logistic", + "eval_metric": "logloss", + "booster": "dart", + "verbosity": 1, + } # specify validations set to watch performance - watchlist = [(dtest, 'eval'), (dtrain, 'train')] + watchlist = [(dtest, "eval"), (dtrain, "train")] num_round = 2 bst = xgb.train(param, dtrain, num_round, watchlist) # this is prediction preds = bst.predict(dtest, iteration_range=(0, num_round)) labels = dtest.get_label() - err = sum(1 for i in range(len(preds)) - if int(preds[i] > 0.5) != labels[i]) / float(len(preds)) + err = sum( + 1 for i in range(len(preds)) if int(preds[i] > 0.5) != labels[i] + ) / float(len(preds)) # error must be smaller than 10% assert err < 0.1 with tempfile.TemporaryDirectory() as tmpdir: - dtest_path = os.path.join(tmpdir, 'dtest.dmatrix') + dtest_path = os.path.join(tmpdir, "dtest.dmatrix") model_path = os.path.join(tmpdir, "xgboost.model.dart.ubj") # save dmatrix into binary buffer dtest.save_binary(dtest_path) @@ -66,28 +78,30 @@ def test_dart(self): def my_logloss(preds, dtrain): labels = dtrain.get_label() - return 'logloss', np.sum( - np.log(np.where(labels, preds, 1 - preds))) + return "logloss", np.sum(np.log(np.where(labels, preds, 1 - preds))) # check whether custom evaluation metrics work - bst = xgb.train(param, dtrain, num_round, watchlist, - feval=my_logloss) + bst = xgb.train( + param, dtrain, num_round, evals=watchlist, custom_metric=my_logloss + ) preds3 = bst.predict(dtest, iteration_range=(0, num_round)) assert all(preds3 == preds) # check whether sample_type and normalize_type work num_round = 50 - param['learning_rate'] = 0.1 - param['rate_drop'] = 0.1 + param["learning_rate"] = 0.1 + param["rate_drop"] = 0.1 preds_list = [] - for p in [[p0, p1] for p0 in ['uniform', 'weighted'] - for p1 in ['tree', 'forest']]: - param['sample_type'] = p[0] - param['normalize_type'] = p[1] - bst = xgb.train(param, dtrain, num_round, watchlist) + for p in [ + [p0, p1] for p0 in ["uniform", "weighted"] for p1 in ["tree", "forest"] + ]: + param["sample_type"] = p[0] + param["normalize_type"] = p[1] + bst = xgb.train(param, dtrain, num_round, evals=watchlist) preds = bst.predict(dtest, iteration_range=(0, num_round)) - err = sum(1 for i in range(len(preds)) - if int(preds[i] > 0.5) != labels[i]) / float(len(preds)) + err = sum( + 1 for i in range(len(preds)) if int(preds[i] > 0.5) != labels[i] + ) / float(len(preds)) assert err < 0.1 preds_list.append(preds) @@ -143,53 +157,67 @@ def test_boost_from_existing_model(self) -> None: ) assert booster.num_boosted_rounds() == 8 - def run_custom_objective(self, tree_method=None): + def run_custom_objective(self, tree_method: Optional[str] = None): param = { - 'max_depth': 2, - 'eta': 1, - 'objective': 'reg:logistic', - "tree_method": tree_method + "max_depth": 2, + "eta": 1, + "objective": "reg:logistic", + "tree_method": tree_method, } dtrain, dtest = tm.load_agaricus(__file__) - watchlist = [(dtest, 'eval'), (dtrain, 'train')] + watchlist = [(dtest, "eval"), (dtrain, "train")] num_round = 10 - def logregobj(preds, dtrain): - labels = dtrain.get_label() - preds = 1.0 / (1.0 + np.exp(-preds)) - grad = preds - labels - hess = preds * (1.0 - preds) - return grad, hess - - def evalerror(preds, dtrain): - labels = dtrain.get_label() - preds = 1.0 / (1.0 + np.exp(-preds)) - return 'error', float(sum(labels != (preds > 0.5))) / len(labels) + def evalerror(preds: np.ndarray, dtrain: xgb.DMatrix): + return tm.eval_error_metric(preds, dtrain, rev_link=True) # test custom_objective in training - bst = xgb.train(param, dtrain, num_round, watchlist, obj=logregobj, - feval=evalerror) - assert isinstance(bst, xgb.core.Booster) + bst = xgb.train( + param, + dtrain, + num_round, + watchlist, + obj=tm.logregobj, + custom_metric=evalerror, + ) + assert isinstance(bst, xgb.Booster) preds = bst.predict(dtest) labels = dtest.get_label() - err = sum(1 for i in range(len(preds)) - if int(preds[i] > 0.5) != labels[i]) / float(len(preds)) + err = sum( + 1 for i in range(len(preds)) if int(preds[i] > 0.5) != labels[i] + ) / float(len(preds)) assert err < 0.1 # test custom_objective in cross-validation - xgb.cv(param, dtrain, num_round, nfold=5, seed=0, - obj=logregobj, feval=evalerror) + xgb.cv( + param, + dtrain, + num_round, + nfold=5, + seed=0, + obj=tm.logregobj, + custom_metric=evalerror, + ) # test maximize parameter def neg_evalerror(preds, dtrain): labels = dtrain.get_label() - return 'error', float(sum(labels == (preds > 0.0))) / len(labels) - - bst2 = xgb.train(param, dtrain, num_round, watchlist, logregobj, - neg_evalerror, maximize=True) + preds = 1.0 / (1.0 + np.exp(-preds)) + return "error", float(sum(labels == (preds > 0.0))) / len(labels) + + bst2 = xgb.train( + param, + dtrain, + num_round, + evals=watchlist, + obj=tm.logregobj, + custom_metric=neg_evalerror, + maximize=True, + ) preds2 = bst2.predict(dtest) - err2 = sum(1 for i in range(len(preds2)) - if int(preds2[i] > 0.5) != labels[i]) / float(len(preds2)) + err2 = sum( + 1 for i in range(len(preds2)) if int(preds2[i] > 0.5) != labels[i] + ) / float(len(preds2)) assert err == err2 def test_custom_objective(self): @@ -197,36 +225,54 @@ def test_custom_objective(self): def test_multi_eval_metric(self): dtrain, dtest = tm.load_agaricus(__file__) - watchlist = [(dtest, 'eval'), (dtrain, 'train')] - param = {'max_depth': 2, 'eta': 0.2, 'verbosity': 1, - 'objective': 'binary:logistic'} - param['eval_metric'] = ["auc", "logloss", 'error'] + watchlist = [(dtest, "eval"), (dtrain, "train")] + param = { + "max_depth": 2, + "eta": 0.2, + "verbosity": 1, + "objective": "binary:logistic", + } + param["eval_metric"] = ["auc", "logloss", "error"] evals_result = {} - bst = xgb.train(param, dtrain, 4, watchlist, evals_result=evals_result) + bst = xgb.train(param, dtrain, 4, evals=watchlist, evals_result=evals_result) assert isinstance(bst, xgb.core.Booster) - assert len(evals_result['eval']) == 3 - assert set(evals_result['eval'].keys()) == {'auc', 'error', 'logloss'} + assert len(evals_result["eval"]) == 3 + assert set(evals_result["eval"].keys()) == {"auc", "error", "logloss"} def test_fpreproc(self): - param = {'max_depth': 2, 'eta': 1, 'objective': 'binary:logistic'} + param = {"max_depth": 2, "eta": 1, "objective": "binary:logistic"} num_round = 2 def fpreproc(dtrain, dtest, param): label = dtrain.get_label() ratio = float(np.sum(label == 0)) / np.sum(label == 1) - param['scale_pos_weight'] = ratio + param["scale_pos_weight"] = ratio return (dtrain, dtest, param) dtrain, _ = tm.load_agaricus(__file__) - xgb.cv(param, dtrain, num_round, nfold=5, - metrics={'auc'}, seed=0, fpreproc=fpreproc) + xgb.cv( + param, + dtrain, + num_round, + nfold=5, + metrics={"auc"}, + seed=0, + fpreproc=fpreproc, + ) def test_show_stdv(self): - param = {'max_depth': 2, 'eta': 1, 'objective': 'binary:logistic'} + param = {"max_depth": 2, "eta": 1, "objective": "binary:logistic"} num_round = 2 dtrain, _ = tm.load_agaricus(__file__) - xgb.cv(param, dtrain, num_round, nfold=5, - metrics={'error'}, seed=0, show_stdv=False) + xgb.cv( + param, + dtrain, + num_round, + nfold=5, + metrics={"error"}, + seed=0, + show_stdv=False, + ) def test_prediction_cache(self) -> None: X, y = tm.make_sparse_regression(512, 4, 0.5, as_dense=False) @@ -273,28 +319,34 @@ def validate_model(parameters): X = np.random.random((100, 30)) y = np.random.randint(0, 4, size=(100,)) - parameters['num_class'] = 4 + parameters["num_class"] = 4 m = xgb.DMatrix(X, y) booster = xgb.train(parameters, m) - dump = booster.get_dump(dump_format='json') + dump = booster.get_dump(dump_format="json") for i in range(len(dump)): - jsonschema.validate(instance=json.loads(dump[i]), - schema=schema) + jsonschema.validate(instance=json.loads(dump[i]), schema=schema) path = os.path.dirname( - os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) - doc = os.path.join(path, 'doc', 'dump.schema') - with open(doc, 'r') as fd: + os.path.dirname(os.path.dirname(os.path.abspath(__file__))) + ) + doc = os.path.join(path, "doc", "dump.schema") + with open(doc, "r") as fd: schema = json.load(fd) - parameters = {'tree_method': 'hist', 'booster': 'gbtree', - 'objective': 'multi:softmax'} + parameters = { + "tree_method": "hist", + "booster": "gbtree", + "objective": "multi:softmax", + } validate_model(parameters) - parameters = {'tree_method': 'hist', 'booster': 'dart', - 'objective': 'multi:softmax'} + parameters = { + "tree_method": "hist", + "booster": "dart", + "objective": "multi:softmax", + } validate_model(parameters) def test_special_model_dump_characters(self) -> None: @@ -363,7 +415,7 @@ def run_slice( sliced_trees = end * num_parallel_tree * num_classes assert sliced_trees == len(sliced.get_dump()) - sliced = booster[: end] + sliced = booster[:end] sliced_trees = end * num_parallel_tree * num_classes assert sliced_trees == len(sliced.get_dump()) diff --git a/tests/python/test_callback.py b/tests/python/test_callback.py index d2e7cb5c4b8e..1ee31d6610c1 100644 --- a/tests/python/test_callback.py +++ b/tests/python/test_callback.py @@ -1,8 +1,10 @@ import json import os import tempfile -from typing import Union +from collections import namedtuple +from typing import Tuple, Union +import numpy as np import pytest import xgboost as xgb @@ -12,21 +14,29 @@ pytestmark = pytest.mark.skipif(**tm.no_sklearn()) -class TestCallbacks: - @classmethod - def setup_class(cls): - from sklearn.datasets import load_breast_cancer +BreastCancer = namedtuple("BreastCancer", ["full", "tr", "va"]) + + +@pytest.fixture +def breast_cancer() -> BreastCancer: + from sklearn.datasets import load_breast_cancer + + X, y = load_breast_cancer(return_X_y=True) + + split = int(X.shape[0] * 0.8) + return BreastCancer( + full=(X, y), + tr=(X[:split, ...], y[:split, ...]), + va=(X[split:, ...], y[split:, ...]), + ) + - X, y = load_breast_cancer(return_X_y=True) - cls.X = X - cls.y = y +def eval_error_metric(predt: np.ndarray, dtrain: xgb.DMatrix) -> Tuple[str, np.float64]: + # No custom objective, recieve transformed output + return tm.eval_error_metric(predt, dtrain, rev_link=False) - split = int(X.shape[0] * 0.8) - cls.X_train = X[:split, ...] - cls.y_train = y[:split, ...] - cls.X_valid = X[split:, ...] - cls.y_valid = y[split:, ...] +class TestCallbacks: def run_evaluation_monitor( self, D_train: xgb.DMatrix, @@ -70,9 +80,9 @@ def check_output(output: str) -> None: output = out.getvalue().strip() check_output(output) - def test_evaluation_monitor(self): - D_train = xgb.DMatrix(self.X_train, self.y_train) - D_valid = xgb.DMatrix(self.X_valid, self.y_valid) + def test_evaluation_monitor(self, breast_cancer: BreastCancer) -> None: + D_train = xgb.DMatrix(breast_cancer.tr[0], breast_cancer.tr[1]) + D_valid = xgb.DMatrix(breast_cancer.va[0], breast_cancer.va[1]) evals_result = {} rounds = 10 xgb.train( @@ -91,9 +101,9 @@ def test_evaluation_monitor(self): self.run_evaluation_monitor(D_train, D_valid, rounds, 4) self.run_evaluation_monitor(D_train, D_valid, rounds, rounds + 1) - def test_early_stopping(self): - D_train = xgb.DMatrix(self.X_train, self.y_train) - D_valid = xgb.DMatrix(self.X_valid, self.y_valid) + def test_early_stopping(self, breast_cancer: BreastCancer) -> None: + D_train = xgb.DMatrix(breast_cancer.tr[0], breast_cancer.tr[1]) + D_valid = xgb.DMatrix(breast_cancer.va[0], breast_cancer.va[1]) evals_result = {} rounds = 30 early_stopping_rounds = 5 @@ -109,9 +119,9 @@ def test_early_stopping(self): dump = booster.get_dump(dump_format="json") assert len(dump) - booster.best_iteration == early_stopping_rounds + 1 - def test_early_stopping_custom_eval(self): - D_train = xgb.DMatrix(self.X_train, self.y_train) - D_valid = xgb.DMatrix(self.X_valid, self.y_valid) + def test_early_stopping_custom_eval(self, breast_cancer: BreastCancer) -> None: + D_train = xgb.DMatrix(breast_cancer.tr[0], breast_cancer.tr[1]) + D_valid = xgb.DMatrix(breast_cancer.va[0], breast_cancer.va[1]) early_stopping_rounds = 5 booster = xgb.train( { @@ -121,7 +131,7 @@ def test_early_stopping_custom_eval(self): }, D_train, evals=[(D_train, "Train"), (D_valid, "Valid")], - feval=tm.eval_error_metric, + custom_metric=eval_error_metric, num_boost_round=1000, early_stopping_rounds=early_stopping_rounds, verbose_eval=False, @@ -129,9 +139,9 @@ def test_early_stopping_custom_eval(self): dump = booster.get_dump(dump_format="json") assert len(dump) - booster.best_iteration == early_stopping_rounds + 1 - def test_early_stopping_customize(self): - D_train = xgb.DMatrix(self.X_train, self.y_train) - D_valid = xgb.DMatrix(self.X_valid, self.y_valid) + def test_early_stopping_customize(self, breast_cancer: BreastCancer) -> None: + D_train = xgb.DMatrix(breast_cancer.tr[0], breast_cancer.tr[1]) + D_valid = xgb.DMatrix(breast_cancer.va[0], breast_cancer.va[1]) early_stopping_rounds = 5 early_stop = xgb.callback.EarlyStopping( rounds=early_stopping_rounds, metric_name="CustomErr", data_name="Train" @@ -145,7 +155,7 @@ def test_early_stopping_customize(self): }, D_train, evals=[(D_train, "Train"), (D_valid, "Valid")], - feval=tm.eval_error_metric, + custom_metric=eval_error_metric, num_boost_round=1000, callbacks=[early_stop], verbose_eval=False, @@ -170,7 +180,8 @@ def test_early_stopping_customize(self): }, D_train, evals=[(D_train, "Train"), (D_valid, "Valid")], - feval=tm.eval_error_metric, + # No custom objective, transformed output + custom_metric=eval_error_metric, num_boost_round=rounds, callbacks=[early_stop], verbose_eval=False, @@ -179,10 +190,8 @@ def test_early_stopping_customize(self): assert booster.best_iteration == 0 assert booster.num_boosted_rounds() == 1 - def test_early_stopping_skl(self): - from sklearn.datasets import load_breast_cancer - - X, y = load_breast_cancer(return_X_y=True) + def test_early_stopping_skl(self, breast_cancer: BreastCancer) -> None: + X, y = breast_cancer.full early_stopping_rounds = 5 cls = xgb.XGBClassifier( early_stopping_rounds=early_stopping_rounds, eval_metric="error" @@ -192,10 +201,8 @@ def test_early_stopping_skl(self): dump = booster.get_dump(dump_format="json") assert len(dump) - booster.best_iteration == early_stopping_rounds + 1 - def test_early_stopping_custom_eval_skl(self): - from sklearn.datasets import load_breast_cancer - - X, y = load_breast_cancer(return_X_y=True) + def test_early_stopping_custom_eval_skl(self, breast_cancer: BreastCancer) -> None: + X, y = breast_cancer.full early_stopping_rounds = 5 early_stop = xgb.callback.EarlyStopping(rounds=early_stopping_rounds) cls = xgb.XGBClassifier( @@ -206,10 +213,8 @@ def test_early_stopping_custom_eval_skl(self): dump = booster.get_dump(dump_format="json") assert len(dump) - booster.best_iteration == early_stopping_rounds + 1 - def test_early_stopping_save_best_model(self): - from sklearn.datasets import load_breast_cancer - - X, y = load_breast_cancer(return_X_y=True) + def test_early_stopping_save_best_model(self, breast_cancer: BreastCancer) -> None: + X, y = breast_cancer.full n_estimators = 100 early_stopping_rounds = 5 early_stop = xgb.callback.EarlyStopping( @@ -248,10 +253,8 @@ def test_early_stopping_save_best_model(self): callbacks=[early_stop], ).fit(X, y, eval_set=[(X, y)]) - def test_early_stopping_continuation(self): - from sklearn.datasets import load_breast_cancer - - X, y = load_breast_cancer(return_X_y=True) + def test_early_stopping_continuation(self, breast_cancer: BreastCancer) -> None: + X, y = breast_cancer.full early_stopping_rounds = 5 early_stop = xgb.callback.EarlyStopping( @@ -283,7 +286,23 @@ def test_early_stopping_continuation(self): == booster.best_iteration + early_stopping_rounds + 1 ) - def run_eta_decay(self, tree_method): + def test_early_stopping_multiple_metrics(self): + from sklearn.datasets import make_classification + + X, y = make_classification(random_state=1994) + # AUC approaches 1.0 real quick. + clf = xgb.XGBClassifier(eval_metric=["logloss", "auc"], early_stopping_rounds=2) + clf.fit(X, y, eval_set=[(X, y)]) + assert clf.best_iteration < 8 + assert clf.evals_result()["validation_0"]["auc"][-1] > 0.99 + + clf = xgb.XGBClassifier(eval_metric=["auc", "logloss"], early_stopping_rounds=2) + clf.fit(X, y, eval_set=[(X, y)]) + + assert clf.best_iteration > 50 + assert clf.evals_result()["validation_0"]["auc"][-1] > 0.99 + + def run_eta_decay(self, tree_method: str) -> None: """Test learning rate scheduler, used by both CPU and GPU tests.""" scheduler = xgb.callback.LearningRateScheduler @@ -457,10 +476,8 @@ def test_eta_decay(self, tree_method: str) -> None: def test_eta_decay_leaf_output(self, tree_method: str, objective: str) -> None: self.run_eta_decay_leaf_output(tree_method, objective) - def test_check_point(self) -> None: - from sklearn.datasets import load_breast_cancer - - X, y = load_breast_cancer(return_X_y=True) + def test_check_point(self, breast_cancer: BreastCancer) -> None: + X, y = breast_cancer.full m = xgb.DMatrix(X, y) with tempfile.TemporaryDirectory() as tmpdir: check_point = xgb.callback.TrainingCheckPoint( @@ -509,10 +526,8 @@ def test_callback_list(self) -> None: ) assert len(callbacks) == 1 - def test_attribute_error(self) -> None: - from sklearn.datasets import load_breast_cancer - - X, y = load_breast_cancer(return_X_y=True) + def test_attribute_error(self, breast_cancer: BreastCancer) -> None: + X, y = breast_cancer.full clf = xgb.XGBClassifier(n_estimators=8) clf.fit(X, y, eval_set=[(X, y)]) diff --git a/tests/python/test_early_stopping.py b/tests/python/test_early_stopping.py index a275a8077b71..32afb5f75f51 100644 --- a/tests/python/test_early_stopping.py +++ b/tests/python/test_early_stopping.py @@ -1,3 +1,5 @@ +from typing import Tuple + import numpy as np import pytest @@ -14,9 +16,7 @@ def test_early_stopping_nonparallel(self): from sklearn.datasets import load_digits from sklearn.model_selection import train_test_split - digits = load_digits(n_class=2) - X = digits["data"] - y = digits["target"] + X, y = load_digits(n_class=2, return_X_y=True) X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0) clf1 = xgb.XGBClassifier( learning_rate=0.1, early_stopping_rounds=5, eval_metric="auc" @@ -47,50 +47,64 @@ def test_early_stopping_nonparallel(self): assert clf3.best_score == 1 - def evalerror(self, preds, dtrain): - from sklearn.metrics import mean_squared_error - - labels = dtrain.get_label() - preds = 1.0 / (1.0 + np.exp(-preds)) - return 'rmse', mean_squared_error(labels, preds) - @staticmethod def assert_metrics_length(cv, expected_length): for key, value in cv.items(): assert len(value) == expected_length @pytest.mark.skipif(**tm.no_sklearn()) - def test_cv_early_stopping(self): + def test_cv_early_stopping(self) -> None: from sklearn.datasets import load_digits - digits = load_digits(n_class=2) - X = digits['data'] - y = digits['target'] + X, y = load_digits(n_class=2, return_X_y=True) dm = xgb.DMatrix(X, label=y) params = { - 'max_depth': 2, 'eta': 1, 'objective': 'binary:logistic', - 'eval_metric': 'error' + "max_depth": 2, + "eta": 1, + "objective": "binary:logistic", + "eval_metric": "error", } - cv = xgb.cv(params, dm, num_boost_round=10, nfold=10, - early_stopping_rounds=10) + def evalerror(preds: np.ndarray, dtrain: xgb.DMatrix) -> Tuple[str, float]: + from sklearn.metrics import mean_squared_error + + labels = dtrain.get_label() + return "rmse", mean_squared_error(labels, preds) + + cv = xgb.cv(params, dm, num_boost_round=10, nfold=10, early_stopping_rounds=10) self.assert_metrics_length(cv, 10) - cv = xgb.cv(params, dm, num_boost_round=10, nfold=10, - early_stopping_rounds=5) + cv = xgb.cv(params, dm, num_boost_round=10, nfold=10, early_stopping_rounds=5) self.assert_metrics_length(cv, 3) - cv = xgb.cv(params, dm, num_boost_round=10, nfold=10, - early_stopping_rounds=1) + cv = xgb.cv(params, dm, num_boost_round=10, nfold=10, early_stopping_rounds=1) self.assert_metrics_length(cv, 1) - cv = xgb.cv(params, dm, num_boost_round=10, nfold=10, - feval=self.evalerror, early_stopping_rounds=10) + cv = xgb.cv( + params, + dm, + num_boost_round=10, + nfold=10, + custom_metric=evalerror, + early_stopping_rounds=10, + ) self.assert_metrics_length(cv, 10) - cv = xgb.cv(params, dm, num_boost_round=10, nfold=10, - feval=self.evalerror, early_stopping_rounds=1) + cv = xgb.cv( + params, + dm, + num_boost_round=10, + nfold=10, + custom_metric=evalerror, + early_stopping_rounds=1, + ) self.assert_metrics_length(cv, 5) - cv = xgb.cv(params, dm, num_boost_round=10, nfold=10, - feval=self.evalerror, maximize=True, - early_stopping_rounds=1) + cv = xgb.cv( + params, + dm, + num_boost_round=10, + nfold=10, + custom_metric=evalerror, + maximize=True, + early_stopping_rounds=1, + ) self.assert_metrics_length(cv, 1) @pytest.mark.skipif(**tm.no_sklearn()) @@ -100,21 +114,35 @@ def test_cv_early_stopping_with_multiple_eval_sets_and_metrics(self): X, y = load_breast_cancer(return_X_y=True) dm = xgb.DMatrix(X, label=y) - params = {'objective':'binary:logistic'} + params = {"objective": "binary:logistic"} - metrics = [['auc'], ['error'], ['logloss'], - ['logloss', 'auc'], ['logloss', 'error'], ['error', 'logloss']] + metrics = [ + ["auc"], + ["error"], + ["logloss"], + ["logloss", "auc"], + ["logloss", "error"], + ["error", "logloss"], + ] num_iteration_history = [] # If more than one metrics is given, early stopping should use the last metric for i, m in enumerate(metrics): - result = xgb.cv(params, dm, num_boost_round=1000, nfold=5, stratified=True, - metrics=m, early_stopping_rounds=20, seed=42) + result = xgb.cv( + params, + dm, + num_boost_round=1000, + nfold=5, + stratified=True, + metrics=m, + early_stopping_rounds=20, + seed=42, + ) num_iteration_history.append(len(result)) - df = result['test-{}-mean'.format(m[-1])] + df = result["test-{}-mean".format(m[-1])] # When early stopping is invoked, the last metric should be as best it can be. - if m[-1] == 'auc': + if m[-1] == "auc": assert np.all(df <= df.iloc[-1]) else: assert np.all(df >= df.iloc[-1]) diff --git a/tests/python/test_eval_metrics.py b/tests/python/test_eval_metrics.py index 2ee8c02cc2b5..b02f348013fb 100644 --- a/tests/python/test_eval_metrics.py +++ b/tests/python/test_eval_metrics.py @@ -92,7 +92,7 @@ def test_eval_metrics(self): 10, watchlist, early_stopping_rounds=2, - feval=self.evalerror_01, + custom_metric=self.evalerror_01, ) gbdt_02 = xgb.train( self.xgb_params_02, @@ -100,7 +100,7 @@ def test_eval_metrics(self): 10, watchlist, early_stopping_rounds=2, - feval=self.evalerror_02, + custom_metric=self.evalerror_02, ) gbdt_03 = xgb.train( self.xgb_params_03, @@ -108,7 +108,7 @@ def test_eval_metrics(self): 10, watchlist, early_stopping_rounds=2, - feval=self.evalerror_03, + custom_metric=self.evalerror_03, ) gbdt_04 = xgb.train( self.xgb_params_04, @@ -116,7 +116,7 @@ def test_eval_metrics(self): 10, watchlist, early_stopping_rounds=2, - feval=self.evalerror_04, + custom_metric=self.evalerror_04, ) assert gbdt_01.predict(dvalid)[0] == gbdt_02.predict(dvalid)[0] assert gbdt_01.predict(dvalid)[0] == gbdt_03.predict(dvalid)[0] diff --git a/tests/test_distributed/test_with_dask/test_with_dask.py b/tests/test_distributed/test_with_dask/test_with_dask.py index 53e263b5e06e..680ed025f15b 100644 --- a/tests/test_distributed/test_with_dask/test_with_dask.py +++ b/tests/test_distributed/test_with_dask/test_with_dask.py @@ -2153,6 +2153,9 @@ def test_early_stopping_custom_eval(self, client: "Client") -> None: X, y = da.from_array(X), da.from_array(y) m = dxgb.DaskDMatrix(client, X, y) + def eval_error_metric(predt: np.ndarray, dtrain: xgb.DMatrix): + return tm.eval_error_metric(predt, dtrain, rev_link=False) + valid = dxgb.DaskDMatrix(client, X, y) early_stopping_rounds = 5 booster = dxgb.train( @@ -2164,7 +2167,7 @@ def test_early_stopping_custom_eval(self, client: "Client") -> None: }, m, evals=[(m, "Train"), (valid, "Valid")], - feval=tm.eval_error_metric, + custom_metric=eval_error_metric, num_boost_round=1000, early_stopping_rounds=early_stopping_rounds, )["booster"] diff --git a/tests/test_distributed/test_with_spark/test_spark_local.py b/tests/test_distributed/test_with_spark/test_spark_local.py index 79569c7fd373..5f0dafd9d6be 100644 --- a/tests/test_distributed/test_with_spark/test_spark_local.py +++ b/tests/test_distributed/test_with_spark/test_spark_local.py @@ -9,14 +9,6 @@ import numpy as np import pytest from pyspark import SparkConf - -import xgboost as xgb -from xgboost import testing as tm -from xgboost.collective import Config -from xgboost.spark.data import pred_contribs - -pytestmark = [tm.timeout(60), pytest.mark.skipif(**tm.no_spark())] - from pyspark.ml import Pipeline, PipelineModel from pyspark.ml.evaluation import BinaryClassificationEvaluator from pyspark.ml.feature import VectorAssembler @@ -26,7 +18,10 @@ from pyspark.sql import SparkSession from pyspark.sql import functions as spark_sql_func +import xgboost as xgb from xgboost import XGBClassifier, XGBModel, XGBRegressor +from xgboost import testing as tm +from xgboost.collective import Config from xgboost.spark import ( SparkXGBClassifier, SparkXGBClassifierModel, @@ -35,11 +30,14 @@ SparkXGBRegressorModel, ) from xgboost.spark.core import _non_booster_params +from xgboost.spark.data import pred_contribs from .utils import SparkTestCase logging.getLogger("py4j").setLevel(logging.INFO) +pytestmark = [tm.timeout(60), pytest.mark.skipif(**tm.no_spark())] + def no_sparse_unwrap() -> tm.PytestSkip: try: