From b11a95282ad4d904c54c73c233c1f092613bbec7 Mon Sep 17 00:00:00 2001 From: wxchan Date: Sat, 17 Dec 2016 18:18:44 +0800 Subject: [PATCH 1/3] add gridsearch example for python sklearn --- examples/python-guide/sklearn_example.py | 15 +++++++++++++++ python-package/lightgbm/libpath.py | 3 ++- 2 files changed, 17 insertions(+), 1 deletion(-) diff --git a/examples/python-guide/sklearn_example.py b/examples/python-guide/sklearn_example.py index ddfbd1924522..9ca0b092d8b1 100644 --- a/examples/python-guide/sklearn_example.py +++ b/examples/python-guide/sklearn_example.py @@ -3,6 +3,7 @@ import lightgbm as lgb import pandas as pd from sklearn.metrics import mean_squared_error +from sklearn.model_selection import GridSearchCV # load or create your dataset print('Load data...') @@ -34,3 +35,17 @@ print('Calculate feature importances...') # feature importances print('Feature importances:', list(gbm.feature_importance())) + +# other scikit-learn built-in module +estimator = lgb.LGBMRegressor(num_leaves=31) + +param_grid = { + 'learning_rate': [0.01, 0.1, 1], + 'n_estimators': [20, 40] +} + +gbm = GridSearchCV(estimator, param_grid) + +gbm.fit(X_train, y_train) + +print('Best parameters found by grid search are:', gbm.best_params_) diff --git a/python-package/lightgbm/libpath.py b/python-package/lightgbm/libpath.py index 387b435f7c27..52bc598d635b 100644 --- a/python-package/lightgbm/libpath.py +++ b/python-package/lightgbm/libpath.py @@ -25,5 +25,6 @@ def find_lib_path(): dll_path = [os.path.join(p, 'lib_lightgbm.so') for p in dll_path] lib_path = [p for p in dll_path if os.path.exists(p) and os.path.isfile(p)] if not lib_path: - raise Exception('Cannot find lightgbm Library') + dll_path = [os.path.realpath(p) for p in dll_path] + raise Exception('Cannot find lightgbm Library in following paths: '+','.join(dll_path)) return lib_path From 96183bc84396050bfdadbf3f29486c5350f6a3bc Mon Sep 17 00:00:00 2001 From: wxchan Date: Sun, 18 Dec 2016 00:44:16 +0800 Subject: [PATCH 2/3] reset_learning_rate->reset_parameter --- docs/Python-API.md | 94 ++++++++++++++++++++++-- examples/python-guide/sklearn_example.py | 2 +- python-package/lightgbm/callback.py | 41 ++++------- python-package/lightgbm/engine.py | 15 ++-- 4 files changed, 112 insertions(+), 40 deletions(-) diff --git a/docs/Python-API.md b/docs/Python-API.md index 6f75bf2c7bc1..cd4ed82fd8d2 100644 --- a/docs/Python-API.md +++ b/docs/Python-API.md @@ -13,6 +13,14 @@ - [LGBMClassifier](Python-API.md#lgbmclassifier) - [LGBMRegressor](Python-API.md#lgbmregressor) - [LGBMRanker](Python-API.md#lgbmranker) + +* [Callbacks](Python-API.md#callbacks) + - [Before iteration](Python-API.md#before-iteration) + + [reset_parameter](Python-API.md#reset_parameterkwargs) + - [After iteration](Python-API.md#after-iteration) + + [print_evaluation](Python-API.md#print_evaluationperiod1-show_stdvtrue) + + [record_evaluation](Python-API.md#record_evaluationeval_result) + + [early_stopping](Python-API.md#early_stoppingstopping_rounds-verbosetrue) The methods of each Class is in alphabetical order. @@ -496,12 +504,10 @@ The methods of each Class is in alphabetical order. an evaluation metric is printed every 4 (instead of 1) boosting stages. learning_rates: list or function List of learning rate for each boosting round - or a customized function that calculates learning_rate in terms of - current number of round (and the total number of boosting round) - (e.g. yields learning rate decay) + or a customized function that calculates learning_rate + in terms of current number of round (e.g. yields learning rate decay) - list l: learning_rate = l[current_round] - - function f: learning_rate = f(current_round, total_boost_round) - or learning_rate = f(current_round) + - function f: learning_rate = f(current_round) callbacks : list of callback functions List of callback functions that are applied at end of each iteration. @@ -805,3 +811,81 @@ The methods of each Class is in alphabetical order. eval_at : list of int The evaulation positions of NDCG +## Callbacks + +###Before iteration + +####reset_parameter(**kwargs) + + Reset parameter after first iteration + + NOTE: the initial parameter will still take in-effect on first iteration. + + Parameters + ---------- + **kwargs: value should be list or function + List of parameters for each boosting round + or a customized function that calculates learning_rate in terms of + current number of round (e.g. yields learning rate decay) + - list l: parameter = l[current_round] + - function f: parameter = f(current_round) + Returns + ------- + callback : function + The requested callback function. + +###After iteration + +####print_evaluation(period=1, show_stdv=True) + + Create a callback that print evaluation result. + + Parameters + ---------- + period : int + The period to log the evaluation results + + show_stdv : bool, optional + Whether show standard deviation if provided + + Returns + ------- + callback : function + A callback that prints evaluation every period iterations. + +####record_evaluation(eval_result) + + Create a call back that records the evaluation history into eval_result. + + Parameters + ---------- + eval_result : dict + A dictionary to store the evaluation results. + + Returns + ------- + callback : function + The requested callback function. + +####early_stopping(stopping_rounds, verbose=True) + + Create a callback that activates early stopping. + Activates early stopping. + Requires at least one validation data and one metric + If there's more than one, will check all of them + + Parameters + ---------- + stopping_rounds : int + The stopping rounds before the trend occur. + + verbose : optional, bool + Whether to print message about early stopping information. + + Returns + ------- + callback : function + The requested callback function. + + + diff --git a/examples/python-guide/sklearn_example.py b/examples/python-guide/sklearn_example.py index 9ca0b092d8b1..9da386442479 100644 --- a/examples/python-guide/sklearn_example.py +++ b/examples/python-guide/sklearn_example.py @@ -36,7 +36,7 @@ # feature importances print('Feature importances:', list(gbm.feature_importance())) -# other scikit-learn built-in module +# other scikit-learn modules estimator = lgb.LGBMRegressor(num_leaves=31) param_grid = { diff --git a/python-package/lightgbm/callback.py b/python-package/lightgbm/callback.py index addc52306fb3..608eee238356 100644 --- a/python-package/lightgbm/callback.py +++ b/python-package/lightgbm/callback.py @@ -2,7 +2,6 @@ # pylint: disable = invalid-name, W0105, C0301 from __future__ import absolute_import import collections -import inspect class EarlyStopException(Exception): """Exception of early stopping. @@ -98,21 +97,19 @@ def callback(env): return callback -def reset_learning_rate(learning_rates): - """Reset learning rate after first iteration +def reset_parameter(**kwargs): + """Reset parameter after first iteration - NOTE: the initial learning rate will still take in-effect on first iteration. + NOTE: the initial parameter will still take in-effect on first iteration. Parameters ---------- - learning_rates: list or function - List of learning rate for each boosting round \ - or a customized function that calculates learning_rate in terms of \ - current number of round and the total number of boosting round \ - (e.g. yields learning rate decay) - - list l: learning_rate = l[current_round] - - function f: learning_rate = f(current_round, total_boost_round) \ - or learning_rate = f(current_round) + **kwargs: value should be list or function + List of parameters for each boosting round + or a customized function that calculates learning_rate in terms of + current number of round (e.g. yields learning rate decay) + - list l: parameter = l[current_round] + - function f: parameter = f(current_round) Returns ------- callback : function @@ -120,25 +117,19 @@ def reset_learning_rate(learning_rates): """ def callback(env): """internal function""" - if isinstance(learning_rates, list): - if len(learning_rates) != env.end_iteration - env.begin_iteration: - raise ValueError("Length of list 'learning_rates' has to equal to 'num_boost_round'.") - env.model.reset_parameter({'learning_rate':learning_rates[env.iteration]}) - else: - argc = len(inspect.getargspec(learning_rates).args) - if argc is 1: - env.model.reset_parameter({"learning_rate": learning_rates(env.iteration - env.begin_iteration)}) - elif argc is 2: - env.model.reset_parameter({"learning_rate": \ - learning_rates(env.iteration - env.begin_iteration, env.end_iteration - env.begin_iteration)}) + for key, value in kwargs.items(): + if isinstance(value, list): + if len(value) != env.end_iteration - env.begin_iteration: + raise ValueError("Length of list {} has to equal to 'num_boost_round'.".format(repr(key))) + env.model.reset_parameter({key: value[env.iteration - env.begin_iteration]}) else: - raise ValueError("Self-defined function 'learning_rates' should have 1 or 2 arguments, got %d" %(argc)) + env.model.reset_parameter({key: value(env.iteration - env.begin_iteration)}) callback.before_iteration = True callback.order = 10 return callback -def early_stop(stopping_rounds, verbose=True): +def early_stopping(stopping_rounds, verbose=True): """Create a callback that activates early stopping. Activates early stopping. Requires at least one validation data and one metric diff --git a/python-package/lightgbm/engine.py b/python-package/lightgbm/engine.py index 3db48a491bbb..567ff804febe 100644 --- a/python-package/lightgbm/engine.py +++ b/python-package/lightgbm/engine.py @@ -69,12 +69,10 @@ def train(params, train_set, num_boost_round=100, an evaluation metric is printed every 4 (instead of 1) boosting stages. learning_rates: list or function List of learning rate for each boosting round - or a customized function that calculates learning_rate in terms of - current number of round (and the total number of boosting round) - (e.g. yields learning rate decay) + or a customized function that calculates learning_rate + in terms of current number of round (e.g. yields learning rate decay) - list l: learning_rate = l[current_round] - - function f: learning_rate = f(current_round, total_boost_round) - or learning_rate = f(current_round) + - function f: learning_rate = f(current_round) callbacks : list of callback functions List of callback functions that are applied at end of each iteration. @@ -138,11 +136,10 @@ def train(params, train_set, num_boost_round=100, callbacks.add(callback.print_evaluation(verbose_eval)) if early_stopping_rounds is not None: - callbacks.add(callback.early_stop(early_stopping_rounds, - verbose=bool(verbose_eval))) + callbacks.add(callback.early_stopping(early_stopping_rounds, verbose=bool(verbose_eval))) if learning_rates is not None: - callbacks.add(callback.reset_learning_rate(learning_rates)) + callbacks.add(callback.reset_parameter(learning_rate=learning_rates)) if evals_result is not None: callbacks.add(callback.record_evaluation(evals_result)) @@ -355,7 +352,7 @@ def cv(params, train_set, num_boost_round=10, nfold=5, stratified=False, cb.__dict__.setdefault('order', i - len(callbacks)) callbacks = set(callbacks) if early_stopping_rounds is not None: - callbacks.add(callback.early_stop(early_stopping_rounds, verbose=False)) + callbacks.add(callback.early_stopping(early_stopping_rounds, verbose=False)) if verbose_eval is True: callbacks.add(callback.print_evaluation(show_stdv=show_stdv)) elif isinstance(verbose_eval, int): From 5d4ba0fd1915e0daba998dcd69adb0c8961f3227 Mon Sep 17 00:00:00 2001 From: wxchan Date: Sun, 18 Dec 2016 09:33:42 +0800 Subject: [PATCH 3/3] make callbacks public --- docs/Python-API.md | 8 +++++--- python-package/lightgbm/__init__.py | 4 +++- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/docs/Python-API.md b/docs/Python-API.md index c1bb476bb69e..37846415ca40 100644 --- a/docs/Python-API.md +++ b/docs/Python-API.md @@ -839,6 +839,7 @@ The methods of each Class is in alphabetical order. ####print_evaluation(period=1, show_stdv=True) Create a callback that print evaluation result. + (Same function as `verbose_eval` in lightgbm.train()) Parameters ---------- @@ -856,6 +857,7 @@ The methods of each Class is in alphabetical order. ####record_evaluation(eval_result) Create a call back that records the evaluation history into eval_result. + (Same function as `evals_result` in lightgbm.train()) Parameters ---------- @@ -870,9 +872,9 @@ The methods of each Class is in alphabetical order. ####early_stopping(stopping_rounds, verbose=True) Create a callback that activates early stopping. - Activates early stopping. - Requires at least one validation data and one metric - If there's more than one, will check all of them + To activates early stopping, at least one validation data and one metric is required. + If there's more than one, all of them will be checked. + (Same function as `early_stopping_rounds` in lightgbm.train()) Parameters ---------- diff --git a/python-package/lightgbm/__init__.py b/python-package/lightgbm/__init__.py index e743d71661f0..3df896668dfe 100644 --- a/python-package/lightgbm/__init__.py +++ b/python-package/lightgbm/__init__.py @@ -10,6 +10,7 @@ from .basic import Dataset, Booster from .engine import train, cv +from .callback import print_evaluation, record_evaluation, reset_parameter, early_stopping try: from .sklearn import LGBMModel, LGBMRegressor, LGBMClassifier, LGBMRanker except ImportError: @@ -20,5 +21,6 @@ __all__ = ['Dataset', 'Booster', 'train', 'cv', - 'LGBMModel', 'LGBMRegressor', 'LGBMClassifier', 'LGBMRanker'] + 'LGBMModel', 'LGBMRegressor', 'LGBMClassifier', 'LGBMRanker', + 'print_evaluation', 'record_evaluation', 'reset_parameter', 'early_stopping']