From 4e5d8bbe69e4afce297e0bf8149ee08ffdc2218f Mon Sep 17 00:00:00 2001 From: James Lamb Date: Fri, 7 Jul 2023 17:03:16 -0500 Subject: [PATCH 1/5] [python-package] make _InnerPredictor construction stricter --- python-package/lightgbm/basic.py | 90 +++++++++++++++++++------------ python-package/lightgbm/engine.py | 10 +++- 2 files changed, 65 insertions(+), 35 deletions(-) diff --git a/python-package/lightgbm/basic.py b/python-package/lightgbm/basic.py index 2beee2a359c5..3762ce98c1e9 100644 --- a/python-package/lightgbm/basic.py +++ b/python-package/lightgbm/basic.py @@ -837,9 +837,11 @@ class _InnerPredictor: def __init__( self, - model_file: Optional[Union[str, Path]] = None, - booster_handle: Optional[ctypes.c_void_p] = None, - pred_parameter: Optional[Dict[str, Any]] = None + booster_handle: ctypes.c_void_p, + num_iterations: int, + pandas_categorical, + pred_parameter: Dict[str, Any], + manage_handle: bool ): """Initialize the _InnerPredictor. @@ -852,37 +854,56 @@ def __init__( pred_parameter: dict or None, optional (default=None) Other parameters for the prediction. """ - self._handle = ctypes.c_void_p() - self.__is_manage_handle = True - if model_file is not None: - """Prediction task""" - out_num_iterations = ctypes.c_int(0) - _safe_call(_LIB.LGBM_BoosterCreateFromModelfile( - _c_str(str(model_file)), - ctypes.byref(out_num_iterations), - ctypes.byref(self._handle))) - out_num_class = ctypes.c_int(0) - _safe_call(_LIB.LGBM_BoosterGetNumClasses( - self._handle, - ctypes.byref(out_num_class))) - self.num_class = out_num_class.value - self.num_total_iteration = out_num_iterations.value - self.pandas_categorical = _load_pandas_categorical(file_name=model_file) - elif booster_handle is not None: - self.__is_manage_handle = False - self._handle = booster_handle - out_num_class = ctypes.c_int(0) - _safe_call(_LIB.LGBM_BoosterGetNumClasses( + self._handle = booster_handle + self.__is_manage_handle = manage_handle + self.num_total_iteration = num_iterations + self.pandas_categorical = pandas_categorical + self.pred_parameter = _param_dict_to_str(pred_parameter) + + out_num_class = ctypes.c_int(0) + _safe_call( + _LIB.LGBM_BoosterGetNumClasses( self._handle, - ctypes.byref(out_num_class))) - self.num_class = out_num_class.value - self.num_total_iteration = self.current_iteration() - self.pandas_categorical = None - else: - raise TypeError('Need model_file or booster_handle to create a predictor') + ctypes.byref(out_num_class) + ) + ) + self.num_class = out_num_class.value - pred_parameter = {} if pred_parameter is None else pred_parameter - self.pred_parameter = _param_dict_to_str(pred_parameter) + @classmethod + def from_booster(cls, booster_handle: ctypes.c_void_p, pred_parameter: Dict[str, Any]) -> "_InnerPredictor": + out_cur_iter = ctypes.c_int(0) + _safe_call( + _LIB.LGBM_BoosterGetCurrentIteration( + booster_handle, + ctypes.byref(out_cur_iter) + ) + ) + return cls( + booster_handle=booster_handle, + num_iterations=out_cur_iter.value, + pandas_categorical=None, + pred_parameter=pred_parameter, + manage_handle=False + ) + + @classmethod + def from_model_file(cls, model_file: Union[str, Path], pred_parameter: Dict[str, Any]) -> "_InnerPredictor": + booster_handle = ctypes.c_void_p() + out_num_iterations = ctypes.c_int(0) + _safe_call( + _LIB.LGBM_BoosterCreateFromModelfile( + _c_str(str(model_file)), + ctypes.byref(out_num_iterations), + ctypes.byref(booster_handle) + ) + ) + return cls( + booster_handle=booster_handle, + num_iterations=out_num_iterations.value, + pandas_categorical=_load_pandas_categorical(file_name=model_file), + pred_parameter=pred_parameter, + manage_handle=True + ) def __del__(self) -> None: try: @@ -4333,7 +4354,10 @@ def _to_predictor( pred_parameter: Dict[str, Any] ) -> _InnerPredictor: """Convert to predictor.""" - predictor = _InnerPredictor(booster_handle=self._handle, pred_parameter=pred_parameter) + predictor = _InnerPredictor.from_booster( + booster_handle=self._handle, + pred_parameter=pred_parameter + ) predictor.pandas_categorical = self.pandas_categorical return predictor diff --git a/python-package/lightgbm/engine.py b/python-package/lightgbm/engine.py index 1f8624b7055d..0b67152073b4 100644 --- a/python-package/lightgbm/engine.py +++ b/python-package/lightgbm/engine.py @@ -183,7 +183,10 @@ def train( predictor: Optional[_InnerPredictor] = None if isinstance(init_model, (str, Path)): - predictor = _InnerPredictor(model_file=init_model, pred_parameter=params) + predictor = _InnerPredictor.from_model_file( + model_file=init_model, + pred_parameter=params + ) elif isinstance(init_model, Booster): predictor = init_model._to_predictor(pred_parameter=dict(init_model.params, **params)) init_iteration = predictor.num_total_iteration if predictor is not None else 0 @@ -685,7 +688,10 @@ def cv( first_metric_only = params.get('first_metric_only', False) if isinstance(init_model, (str, Path)): - predictor = _InnerPredictor(model_file=init_model, pred_parameter=params) + predictor = _InnerPredictor.from_model_file( + model_file=init_model, + pred_parameter=params + ) elif isinstance(init_model, Booster): predictor = init_model._to_predictor(pred_parameter=dict(init_model.params, **params)) else: From fadb6050745e1b22446f8deebc0b327b2004cbbe Mon Sep 17 00:00:00 2001 From: James Lamb Date: Fri, 7 Jul 2023 23:27:07 -0500 Subject: [PATCH 2/5] make handle non-None --- python-package/lightgbm/basic.py | 87 +++++++++++++++++++------------ python-package/lightgbm/engine.py | 16 ++++-- 2 files changed, 66 insertions(+), 37 deletions(-) diff --git a/python-package/lightgbm/basic.py b/python-package/lightgbm/basic.py index 3762ce98c1e9..316f5aa83c5f 100644 --- a/python-package/lightgbm/basic.py +++ b/python-package/lightgbm/basic.py @@ -33,6 +33,7 @@ 'Sequence', ] +_BoosterHandle = ctypes.c_void_p _DatasetHandle = ctypes.c_void_p _ctypes_int_ptr = Union[ "ctypes._Pointer[ctypes.c_int32]", @@ -837,9 +838,8 @@ class _InnerPredictor: def __init__( self, - booster_handle: ctypes.c_void_p, - num_iterations: int, - pandas_categorical, + booster_handle: _BoosterHandle, + pandas_categorical: Optional[List[List]], pred_parameter: Dict[str, Any], manage_handle: bool ): @@ -847,16 +847,18 @@ def __init__( Parameters ---------- - model_file : str, pathlib.Path or None, optional (default=None) - Path to the model file. - booster_handle : object or None, optional (default=None) + booster : Booster Handle of Booster. - pred_parameter: dict or None, optional (default=None) + pandas_categorical : list of list, or None + If provided, list of categories for ``pandas`` categorical columns. + Where the ``i``th element of the list contains the categories for the ``i``th categorical feature. + pred_parameter: dict Other parameters for the prediction. + manage_handle : bool + If ``True``, free the corresponding Booster on the C++ side when this Python object is deleted. """ self._handle = booster_handle self.__is_manage_handle = manage_handle - self.num_total_iteration = num_iterations self.pandas_categorical = pandas_categorical self.pred_parameter = _param_dict_to_str(pred_parameter) @@ -870,24 +872,49 @@ def __init__( self.num_class = out_num_class.value @classmethod - def from_booster(cls, booster_handle: ctypes.c_void_p, pred_parameter: Dict[str, Any]) -> "_InnerPredictor": + def from_booster( + cls, + booster: "Booster", + pred_parameter: Dict[str, Any] + ) -> "_InnerPredictor": + """Initialize an ``_InnerPredictor`` from a ``Booster``. + + Parameters + ---------- + booster : Booster + Handle of Booster. + pred_parameter: dict + Other parameters for the prediction. + """ out_cur_iter = ctypes.c_int(0) _safe_call( _LIB.LGBM_BoosterGetCurrentIteration( - booster_handle, + booster._handle, ctypes.byref(out_cur_iter) ) ) return cls( - booster_handle=booster_handle, - num_iterations=out_cur_iter.value, - pandas_categorical=None, + booster_handle=booster._handle, + pandas_categorical=booster.pandas_categorical, pred_parameter=pred_parameter, manage_handle=False ) @classmethod - def from_model_file(cls, model_file: Union[str, Path], pred_parameter: Dict[str, Any]) -> "_InnerPredictor": + def from_model_file( + cls, + model_file: Union[str, Path], + pred_parameter: Dict[str, Any] + ) -> "_InnerPredictor": + """Initialize an ``_InnerPredictor`` from a text file contained a LightGBM model. + + Parameters + ---------- + model_file : str or pathlib.Path + Path to the model file. + pred_parameter: dict + Other parameters for the prediction. + """ booster_handle = ctypes.c_void_p() out_num_iterations = ctypes.c_int(0) _safe_call( @@ -899,7 +926,6 @@ def from_model_file(cls, model_file: Union[str, Path], pred_parameter: Dict[str, ) return cls( booster_handle=booster_handle, - num_iterations=out_num_iterations.value, pandas_categorical=_load_pandas_categorical(file_name=model_file), pred_parameter=pred_parameter, manage_handle=True @@ -3068,7 +3094,7 @@ def __init__( model_str : str or None, optional (default=None) Model will be loaded from this string. """ - self._handle = None + self._handle = ctypes.c_void_p() self._network = False self.__need_reload_eval_info = True self._train_data_name = "training" @@ -3119,7 +3145,6 @@ def __init__( # copy the parameters from train_set params.update(train_set.get_params()) params_str = _param_dict_to_str(params) - self._handle = ctypes.c_void_p() _safe_call(_LIB.LGBM_BoosterCreate( train_set._handle, _c_str(params_str), @@ -3148,7 +3173,6 @@ def __init__( elif model_file is not None: # Prediction task out_num_iterations = ctypes.c_int(0) - self._handle = ctypes.c_void_p() _safe_call(_LIB.LGBM_BoosterCreateFromModelfile( _c_str(str(model_file)), ctypes.byref(out_num_iterations), @@ -3927,8 +3951,9 @@ def model_from_string(self, model_str: str) -> "Booster": self : Booster Loaded Booster object. """ - if self._handle is not None: - _safe_call(_LIB.LGBM_BoosterFree(self._handle)) + # ensure that existing Booster is freed before replacing it + # with a new one createdfrom file + _safe_call(_LIB.LGBM_BoosterFree(self._handle)) self._free_buffer() self._handle = ctypes.c_void_p() out_num_iterations = ctypes.c_int(0) @@ -4128,7 +4153,10 @@ def predict( Prediction result. Can be sparse or a list of sparse objects (each element represents predictions for one class) for feature contributions (when ``pred_contrib=True``). """ - predictor = self._to_predictor(pred_parameter=deepcopy(kwargs)) + predictor = _InnerPredictor.from_booster( + booster=self, + pred_parameter=deepcopy(kwargs), + ) if num_iteration is None: if start_iteration <= 0: num_iteration = self.best_iteration @@ -4245,7 +4273,10 @@ def refit( raise LightGBMError('Cannot refit due to null objective function.') if dataset_params is None: dataset_params = {} - predictor = self._to_predictor(pred_parameter=deepcopy(kwargs)) + predictor = _InnerPredictor.from_booster( + booster=self, + pred_parameter=deepcopy(kwargs) + ) leaf_preds: np.ndarray = predictor.predict( # type: ignore[assignment] data=data, start_iteration=-1, @@ -4349,18 +4380,6 @@ def set_leaf_output( ) return self - def _to_predictor( - self, - pred_parameter: Dict[str, Any] - ) -> _InnerPredictor: - """Convert to predictor.""" - predictor = _InnerPredictor.from_booster( - booster_handle=self._handle, - pred_parameter=pred_parameter - ) - predictor.pandas_categorical = self.pandas_categorical - return predictor - def num_feature(self) -> int: """Get number of features. diff --git a/python-package/lightgbm/engine.py b/python-package/lightgbm/engine.py index 0b67152073b4..2d640d741629 100644 --- a/python-package/lightgbm/engine.py +++ b/python-package/lightgbm/engine.py @@ -188,8 +188,15 @@ def train( pred_parameter=params ) elif isinstance(init_model, Booster): - predictor = init_model._to_predictor(pred_parameter=dict(init_model.params, **params)) - init_iteration = predictor.num_total_iteration if predictor is not None else 0 + predictor = _InnerPredictor.from_booster( + booster=init_model, + pred_parameter=dict(init_model.params, **params) + ) + + if predictor is not None: + init_iteration = predictor.current_iteration() + else: + init_iteration = 0 train_set._update_params(params) \ ._set_predictor(predictor) \ @@ -693,7 +700,10 @@ def cv( pred_parameter=params ) elif isinstance(init_model, Booster): - predictor = init_model._to_predictor(pred_parameter=dict(init_model.params, **params)) + predictor = _InnerPredictor.from_booster( + booster=init_model, + pred_parameter=dict(init_model.params, **params) + ) else: predictor = None From 0f92863b6c300debaf782cca47c0673e6ed02c13 Mon Sep 17 00:00:00 2001 From: James Lamb Date: Fri, 7 Jul 2023 23:46:29 -0500 Subject: [PATCH 3/5] docstring --- python-package/lightgbm/basic.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python-package/lightgbm/basic.py b/python-package/lightgbm/basic.py index 316f5aa83c5f..3d3e2f0172ef 100644 --- a/python-package/lightgbm/basic.py +++ b/python-package/lightgbm/basic.py @@ -906,7 +906,7 @@ def from_model_file( model_file: Union[str, Path], pred_parameter: Dict[str, Any] ) -> "_InnerPredictor": - """Initialize an ``_InnerPredictor`` from a text file contained a LightGBM model. + """Initialize an ``_InnerPredictor`` from a text file containing a LightGBM model. Parameters ---------- From fb06b652de795fd4f0d87e405e7172befe098c37 Mon Sep 17 00:00:00 2001 From: James Lamb Date: Wed, 12 Jul 2023 11:07:56 -0500 Subject: [PATCH 4/5] Apply suggestions from code review MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: José Morales --- python-package/lightgbm/basic.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/python-package/lightgbm/basic.py b/python-package/lightgbm/basic.py index 3d3e2f0172ef..c1fd0f7e08b9 100644 --- a/python-package/lightgbm/basic.py +++ b/python-package/lightgbm/basic.py @@ -847,7 +847,7 @@ def __init__( Parameters ---------- - booster : Booster + booster_handle : Booster Handle of Booster. pandas_categorical : list of list, or None If provided, list of categories for ``pandas`` categorical columns. @@ -882,7 +882,7 @@ def from_booster( Parameters ---------- booster : Booster - Handle of Booster. + Booster. pred_parameter: dict Other parameters for the prediction. """ From 023497136fdc9225f7c11776eac821ebab3424ab Mon Sep 17 00:00:00 2001 From: James Lamb Date: Wed, 12 Jul 2023 14:01:54 -0500 Subject: [PATCH 5/5] fix docstrings --- python-package/lightgbm/basic.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/python-package/lightgbm/basic.py b/python-package/lightgbm/basic.py index c1fd0f7e08b9..ef8689501944 100644 --- a/python-package/lightgbm/basic.py +++ b/python-package/lightgbm/basic.py @@ -847,12 +847,12 @@ def __init__( Parameters ---------- - booster_handle : Booster + booster_handle : object Handle of Booster. pandas_categorical : list of list, or None If provided, list of categories for ``pandas`` categorical columns. Where the ``i``th element of the list contains the categories for the ``i``th categorical feature. - pred_parameter: dict + pred_parameter : dict Other parameters for the prediction. manage_handle : bool If ``True``, free the corresponding Booster on the C++ side when this Python object is deleted. @@ -883,7 +883,7 @@ def from_booster( ---------- booster : Booster Booster. - pred_parameter: dict + pred_parameter : dict Other parameters for the prediction. """ out_cur_iter = ctypes.c_int(0) @@ -912,7 +912,7 @@ def from_model_file( ---------- model_file : str or pathlib.Path Path to the model file. - pred_parameter: dict + pred_parameter : dict Other parameters for the prediction. """ booster_handle = ctypes.c_void_p()