diff --git a/psycop/common/model_training_v2/config/historical_registry_configs/estimator_steps/lightgbm/lightgbm_20240131_114833.cfg b/psycop/common/model_training_v2/config/historical_registry_configs/estimator_steps/lightgbm/lightgbm_20240131_114833.cfg new file mode 100644 index 000000000..489b601fb --- /dev/null +++ b/psycop/common/model_training_v2/config/historical_registry_configs/estimator_steps/lightgbm/lightgbm_20240131_114833.cfg @@ -0,0 +1,5 @@ +[placeholder] +@estimator_steps = "lightgbm" +num_leaves = 31 +max_bin = 64 +device_type = "cpu" \ No newline at end of file diff --git a/psycop/common/model_training_v2/config/historical_registry_configs/estimator_steps/xgboost/xgboost_20240131_115154.cfg b/psycop/common/model_training_v2/config/historical_registry_configs/estimator_steps/xgboost/xgboost_20240131_115154.cfg new file mode 100644 index 000000000..f6aaca031 --- /dev/null +++ b/psycop/common/model_training_v2/config/historical_registry_configs/estimator_steps/xgboost/xgboost_20240131_115154.cfg @@ -0,0 +1,9 @@ +[placeholder] +@estimator_steps = "xgboost" +alpha = 0 +reg_lambda = 1 +max_depth = 3 +learning_rate = 0.3 +gamma = 0 +tree_method = "gpu_hist" +n_estimators = 100 \ No newline at end of file diff --git a/psycop/common/model_training_v2/config/historical_registry_configs/estimator_steps/xgboost/xgboost_20240131_115217.cfg b/psycop/common/model_training_v2/config/historical_registry_configs/estimator_steps/xgboost/xgboost_20240131_115217.cfg new file mode 100644 index 000000000..f6aaca031 --- /dev/null +++ b/psycop/common/model_training_v2/config/historical_registry_configs/estimator_steps/xgboost/xgboost_20240131_115217.cfg @@ -0,0 +1,9 @@ +[placeholder] +@estimator_steps = "xgboost" +alpha = 0 +reg_lambda = 1 +max_depth = 3 +learning_rate = 0.3 +gamma = 0 +tree_method = "gpu_hist" +n_estimators = 100 \ No newline at end of file diff --git a/psycop/common/model_training_v2/config/historical_registry_configs/estimator_steps_suggesters/xgboost_suggester/xgboost_suggester_20240131_114833.cfg b/psycop/common/model_training_v2/config/historical_registry_configs/estimator_steps_suggesters/xgboost_suggester/xgboost_suggester_20240131_114833.cfg new file mode 100644 index 000000000..7c3d7afab --- /dev/null +++ b/psycop/common/model_training_v2/config/historical_registry_configs/estimator_steps_suggesters/xgboost_suggester/xgboost_suggester_20240131_114833.cfg @@ -0,0 +1,8 @@ +[placeholder] +@estimator_steps_suggesters = "xgboost_suggester" +n_estimators = [100,1200,true] +alpha = [0.00000001,0.1,true] +reg_lambda = [0.00000001,1.0,true] +max_depth = [1,10,true] +learning_rate = [0.00000001,1,true] +gamma = [0.00000001,0.001,true] \ No newline at end of file diff --git a/psycop/common/model_training_v2/config/historical_registry_configs/estimator_steps_suggesters/xgboost_suggester/xgboost_suggester_20240131_114956.cfg b/psycop/common/model_training_v2/config/historical_registry_configs/estimator_steps_suggesters/xgboost_suggester/xgboost_suggester_20240131_114956.cfg new file mode 100644 index 000000000..7c3d7afab --- /dev/null +++ b/psycop/common/model_training_v2/config/historical_registry_configs/estimator_steps_suggesters/xgboost_suggester/xgboost_suggester_20240131_114956.cfg @@ -0,0 +1,8 @@ +[placeholder] +@estimator_steps_suggesters = "xgboost_suggester" +n_estimators = [100,1200,true] +alpha = [0.00000001,0.1,true] +reg_lambda = [0.00000001,1.0,true] +max_depth = [1,10,true] +learning_rate = [0.00000001,1,true] +gamma = [0.00000001,0.001,true] \ No newline at end of file diff --git a/psycop/common/model_training_v2/hyperparameter_suggester/suggesters/suggester_spaces.py b/psycop/common/model_training_v2/hyperparameter_suggester/suggesters/suggester_spaces.py index 4638fbefd..88484ed36 100644 --- a/psycop/common/model_training_v2/hyperparameter_suggester/suggesters/suggester_spaces.py +++ b/psycop/common/model_training_v2/hyperparameter_suggester/suggesters/suggester_spaces.py @@ -110,9 +110,12 @@ def from_list_or_mapping( return cls.from_list(sequence_or_mapping) +CategoricalSpaceT = Sequence[optuna.distributions.CategoricalChoiceType] + + @dataclass(frozen=True) class CategoricalSpace: - choices: Sequence[optuna.distributions.CategoricalChoiceType] + choices: CategoricalSpaceT def suggest(self, trial: optuna.Trial, name: str) -> Any: return trial.suggest_categorical(name=name, choices=self.choices) diff --git a/psycop/common/model_training_v2/trainer/task/estimator_steps/test_suggesters.py b/psycop/common/model_training_v2/trainer/task/estimator_steps/test_suggesters.py index 16c734d50..fa2c82e50 100644 --- a/psycop/common/model_training_v2/trainer/task/estimator_steps/test_suggesters.py +++ b/psycop/common/model_training_v2/trainer/task/estimator_steps/test_suggesters.py @@ -10,6 +10,7 @@ ) from ....hyperparameter_suggester.suggesters.base_suggester import Suggester +from .xgboost import XGBoostSuggester @dataclass(frozen=True) @@ -33,6 +34,7 @@ class SuggesterExample: should="Logistic regression with list resolves correctly", suggester=LogisticRegressionSuggester(C=[0.1, 1, False]), ), + SuggesterExample(should="XGBoost resolves correctly", suggester=XGBoostSuggester()), ], ) def test_logistic_regression_suggester(example: SuggesterExample): diff --git a/psycop/common/model_training_v2/trainer/task/estimator_steps/xgboost.py b/psycop/common/model_training_v2/trainer/task/estimator_steps/xgboost.py index 3d7da7d53..7449aee8a 100644 --- a/psycop/common/model_training_v2/trainer/task/estimator_steps/xgboost.py +++ b/psycop/common/model_training_v2/trainer/task/estimator_steps/xgboost.py @@ -1,17 +1,30 @@ -from typing import Literal +from typing import Any, Literal import numpy as np +import optuna from xgboost import XGBClassifier from psycop.common.model_training_v2.config.baseline_registry import BaselineRegistry from psycop.common.model_training_v2.trainer.task.model_step import ModelStep +from ....hyperparameter_suggester.suggesters.base_suggester import Suggester +from ....hyperparameter_suggester.suggesters.suggester_spaces import ( + FloatSpace, + FloatSpaceT, + IntegerSpace, + IntegerspaceT, +) + @BaselineRegistry.estimator_steps.register("xgboost") def xgboost_classifier_step( + alpha: float = 0, + reg_lambda: float = 1, + max_depth: int = 3, + learning_rate: float = 0.3, + gamma: float = 0, tree_method: Literal["auto", "gpu_hist"] = "gpu_hist", n_estimators: int = 100, - max_depth: int = 3, ) -> ModelStep: """Initialize XGBClassifier model with hparams specified as kwargs. The 'missing' hyperparameter specifies the value to be treated as missing and is set to np.nan by default. @@ -19,6 +32,45 @@ def xgboost_classifier_step( return ( "xgboost", XGBClassifier( - n_estimators=n_estimators, max_depth=max_depth, tree_method=tree_method, missing=np.nan + alpha=alpha, + gamma=gamma, + learning_rate=learning_rate, + max_depth=max_depth, + missing=np.nan, + n_estimators=n_estimators, + reg_lambda=reg_lambda, + tree_method=tree_method, ), ) + + +@BaselineRegistry.estimator_steps_suggesters.register("xgboost_suggester") +class XGBoostSuggester(Suggester): + def __init__( + self, + n_estimators: IntegerspaceT = (100, 1200, True), + alpha: FloatSpaceT = (1e-8, 0.1, True), + reg_lambda: FloatSpaceT = (1e-8, 1.0, True), + max_depth: IntegerspaceT = (1, 10, True), + learning_rate: FloatSpaceT = (1e-8, 1, True), + gamma: FloatSpaceT = (1e-8, 0.001, True), + ): + # A little annoying, can be auto-generated using introspection of the annotations/types. E.g. added to the `Suggester` class. But this is fine for now. + self.n_estimators = IntegerSpace.from_list_or_mapping(n_estimators) + self.alpha = FloatSpace.from_list_or_mapping(alpha) + self.reg_lambda = FloatSpace.from_list_or_mapping(reg_lambda) + self.max_depth = IntegerSpace.from_list_or_mapping(max_depth) + self.learning_rate = FloatSpace.from_list_or_mapping(learning_rate) + self.gamma = FloatSpace.from_list_or_mapping(gamma) + + def suggest_hyperparameters(self, trial: optuna.Trial) -> dict[str, Any]: + # The same goes forthis, can be auto-generated. + return { + "@estimator_steps": "xgboost", + "n_estimators": self.n_estimators.suggest(trial, name="n_estimators"), + "alpha": self.alpha.suggest(trial, name="alpha"), + "reg_lambda": self.reg_lambda.suggest(trial, name="reg_lambda"), + "max_depth": self.max_depth.suggest(trial, name="max_depth"), + "learning_rate": self.learning_rate.suggest(trial, name="learning_rate"), + "gamma": self.gamma.suggest(trial, name="gamma"), + }