From 0f9911b8ac31567fc3bbc50712301794703a2c13 Mon Sep 17 00:00:00 2001 From: thomas0125 Date: Sat, 2 Mar 2024 12:06:38 +0800 Subject: [PATCH 1/4] fix_gridsearch --- libmultilabel/linear/utils.py | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/libmultilabel/linear/utils.py b/libmultilabel/linear/utils.py index 7f59d9e8c..c69f97507 100644 --- a/libmultilabel/linear/utils.py +++ b/libmultilabel/linear/utils.py @@ -109,32 +109,31 @@ class GridSearchCV(sklearn.model_selection.GridSearchCV): The usage is similar to sklearn's, except that the parameter ``scoring`` is unavailable. Instead, specify ``scoring_metric`` in ``MultiLabelEstimator`` in the Pipeline. Args: - pipeline (sklearn.pipeline.Pipeline): A sklearn Pipeline for grid search. + estimator (estimator object): A estimator for grid search. param_grid (dict): Search space for a grid search containing a dictionary of parameters and their corresponding list of candidate values. n_jobs (int, optional): Number of CPU cores run in parallel. Defaults to None. """ - _required_parameters = ["pipeline", "param_grid"] + _required_parameters = ["estimator", "param_grid"] - def __init__(self, pipeline: sklearn.pipeline.Pipeline, param_grid: dict, n_jobs=None, **kwargs): - assert isinstance(pipeline, sklearn.pipeline.Pipeline) + def __init__(self, estimator, param_grid: dict, n_jobs=None, **kwargs): if n_jobs is not None and n_jobs > 1: - param_grid = self._set_singlecore_options(pipeline, param_grid) + param_grid = self._set_singlecore_options(estimator, param_grid) if "scoring" in kwargs.keys(): raise ValueError( "Please specify the validation metric with `MultiLabelEstimator.scoring_metric` in the Pipeline instead of using the parameter `scoring`." ) - super().__init__(estimator=pipeline, n_jobs=n_jobs, param_grid=param_grid, **kwargs) + super().__init__(estimator=estimator, n_jobs=n_jobs, param_grid=param_grid, **kwargs) - def _set_singlecore_options(self, pipeline: sklearn.pipeline.Pipeline, param_grid: dict): + def _set_singlecore_options(self, estimator, param_grid: dict): """Set liblinear options to `-m 1`. The grid search option `n_jobs` runs multiple processes in parallel. Using multithreaded liblinear in conjunction with grid search oversubscribes the CPU and deteriorates the performance significantly. """ - params = pipeline.get_params() + params = estimator.get_params() for name, transform in params.items(): if isinstance(transform, MultiLabelEstimator): regex = r"-m \d+" From f9a4be247124c35d5c39a939236a43eb57c894d6 Mon Sep 17 00:00:00 2001 From: thomas0125 Date: Sat, 2 Mar 2024 12:10:36 +0800 Subject: [PATCH 2/4] change sklearn version --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 6b9c272ed..ed6bd6caf 100644 --- a/requirements.txt +++ b/requirements.txt @@ -2,6 +2,6 @@ liblinear-multicore numba pandas>1.3.0 PyYAML -scikit-learn==1.2.2 +scikit-learn scipy tqdm From 2a5a45b6629bf3cba4fa49fe655a8a40277005e3 Mon Sep 17 00:00:00 2001 From: thomas0125 Date: Wed, 6 Mar 2024 20:17:11 +0800 Subject: [PATCH 3/4] minor fix --- libmultilabel/linear/utils.py | 2 +- setup.cfg | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/libmultilabel/linear/utils.py b/libmultilabel/linear/utils.py index c69f97507..3d852462c 100644 --- a/libmultilabel/linear/utils.py +++ b/libmultilabel/linear/utils.py @@ -109,7 +109,7 @@ class GridSearchCV(sklearn.model_selection.GridSearchCV): The usage is similar to sklearn's, except that the parameter ``scoring`` is unavailable. Instead, specify ``scoring_metric`` in ``MultiLabelEstimator`` in the Pipeline. Args: - estimator (estimator object): A estimator for grid search. + estimator (estimator object): An estimator for grid search. param_grid (dict): Search space for a grid search containing a dictionary of parameters and their corresponding list of candidate values. n_jobs (int, optional): Number of CPU cores run in parallel. Defaults to None. diff --git a/setup.cfg b/setup.cfg index deb5a06ef..bc69c9931 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,6 +1,6 @@ [metadata] name = libmultilabel -version = 0.6.1 +version = 0.6.2 author = LibMultiLabel Team license = MIT License license_file = LICENSE From 5b04bc52d23050ab7f1accfa7082476b796eb4bf Mon Sep 17 00:00:00 2001 From: thomas0125 Date: Wed, 6 Mar 2024 22:47:37 +0800 Subject: [PATCH 4/4] modify setup --- setup.cfg | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.cfg b/setup.cfg index bc69c9931..12f041b4e 100644 --- a/setup.cfg +++ b/setup.cfg @@ -29,7 +29,7 @@ install_requires = numba pandas>1.3.0 PyYAML - scikit-learn==1.2.2 + scikit-learn scipy tqdm