Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update Python custom objective demo. #5981

Merged
merged 1 commit into from
Aug 5, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
49 changes: 30 additions & 19 deletions demo/guide-python/custom_objective.py
Original file line number Diff line number Diff line change
@@ -1,28 +1,28 @@
import os
import numpy as np
import xgboost as xgb
###
# advanced: customized loss function
#
import os
import numpy as np
import xgboost as xgb

print('start running example to used customized objective function')

CURRENT_DIR = os.path.dirname(__file__)
dtrain = xgb.DMatrix(os.path.join(CURRENT_DIR, '../data/agaricus.txt.train'))
dtest = xgb.DMatrix(os.path.join(CURRENT_DIR, '../data/agaricus.txt.test'))

# note: for customized objective function, we leave objective as default
# note: what we are getting is margin value in prediction
# you must know what you are doing
param = {'max_depth': 2, 'eta': 1}
# note: what we are getting is margin value in prediction you must know what
# you are doing
param = {'max_depth': 2, 'eta': 1, 'objective': 'reg:logistic'}
watchlist = [(dtest, 'eval'), (dtrain, 'train')]
num_round = 2
num_round = 10


# user define objective function, given prediction, return gradient and second
# order gradient this is log likelihood loss
def logregobj(preds, dtrain):
labels = dtrain.get_label()
preds = 1.0 / (1.0 + np.exp(-preds))
preds = 1.0 / (1.0 + np.exp(-preds)) # transform raw leaf weight
grad = preds - labels
hess = preds * (1.0 - preds)
return grad, hess
Expand All @@ -31,20 +31,31 @@ def logregobj(preds, dtrain):
# user defined evaluation function, return a pair metric_name, result

# NOTE: when you do customized loss function, the default prediction value is
# margin. this may make builtin evaluation metric not function properly for
# example, we are doing logistic loss, the prediction is score before logistic
# transformation the builtin evaluation error assumes input is after logistic
# transformation Take this in mind when you use the customization, and maybe
# you need write customized evaluation function
# margin, which means the prediction is score before logistic transformation.
def evalerror(preds, dtrain):
labels = dtrain.get_label()
preds = 1.0 / (1.0 + np.exp(-preds)) # transform raw leaf weight
# return a pair metric_name, result. The metric name must not contain a
# colon (:) or a space since preds are margin(before logistic
# transformation, cutoff at 0)
return 'my-error', float(sum(labels != (preds > 0.0))) / len(labels)
# colon (:) or a space
return 'my-error', float(sum(labels != (preds > 0.5))) / len(labels)


py_evals_result = {}

# training with customized objective, we can also do step by step training
# simply look at xgboost.py's implementation of train
bst = xgb.train(param, dtrain, num_round, watchlist, obj=logregobj,
feval=evalerror)
py_params = param.copy()
py_params.update({'disable_default_eval_metric': True})
py_logreg = xgb.train(py_params, dtrain, num_round, watchlist, obj=logregobj,
feval=evalerror, evals_result=py_evals_result)

evals_result = {}
params = param.copy()
params.update({'eval_metric': 'error'})
logreg = xgb.train(params, dtrain, num_boost_round=num_round, evals=watchlist,
evals_result=evals_result)


for i in range(len(py_evals_result['train']['my-error'])):
np.testing.assert_almost_equal(py_evals_result['train']['my-error'],
evals_result['train']['error'])
11 changes: 7 additions & 4 deletions tests/python/test_basic_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -197,9 +197,9 @@ def test_boost_from_prediction(self):
assert np.all(np.abs(predt_2 - predt_1) < 1e-6)

def test_custom_objective(self):
param = {'max_depth': 2, 'eta': 1, 'verbosity': 0}
param = {'max_depth': 2, 'eta': 1, 'objective': 'reg:logistic'}
watchlist = [(dtest, 'eval'), (dtrain, 'train')]
num_round = 2
num_round = 10

def logregobj(preds, dtrain):
labels = dtrain.get_label()
Expand All @@ -210,10 +210,12 @@ def logregobj(preds, dtrain):

def evalerror(preds, dtrain):
labels = dtrain.get_label()
preds = 1.0 / (1.0 + np.exp(-preds))
return 'error', float(sum(labels != (preds > 0.5))) / len(labels)

# test custom_objective in training
bst = xgb.train(param, dtrain, num_round, watchlist, logregobj, evalerror)
bst = xgb.train(param, dtrain, num_round, watchlist, obj=logregobj,
feval=evalerror)
assert isinstance(bst, xgb.core.Booster)
preds = bst.predict(dtest)
labels = dtest.get_label()
Expand All @@ -230,7 +232,8 @@ def neg_evalerror(preds, dtrain):
labels = dtrain.get_label()
return 'error', float(sum(labels == (preds > 0.0))) / len(labels)

bst2 = xgb.train(param, dtrain, num_round, watchlist, logregobj, neg_evalerror, maximize=True)
bst2 = xgb.train(param, dtrain, num_round, watchlist, logregobj,
neg_evalerror, maximize=True)
preds2 = bst2.predict(dtest)
err2 = sum(1 for i in range(len(preds2))
if int(preds2[i] > 0.5) != labels[i]) / float(len(preds2))
Expand Down