feat: xgboost suggester (#792)

Aarhus-Psychiatry-Research · Feb 6, 2024 · 2e9fc40 · 2e9fc40
2 parents c434fe7 + ee64365
commit 2e9fc40
Show file tree

Hide file tree

Showing 8 changed files with 100 additions and 4 deletions.
diff --git a/.../config/historical_registry_configs/estimator_steps/lightgbm/lightgbm_20240131_114833.cfg b/.../config/historical_registry_configs/estimator_steps/lightgbm/lightgbm_20240131_114833.cfg
@@ -0,0 +1,5 @@
+[placeholder]
+@estimator_steps = "lightgbm"
+num_leaves = 31
+max_bin = 64
+device_type = "cpu"
diff --git a/...v2/config/historical_registry_configs/estimator_steps/xgboost/xgboost_20240131_115154.cfg b/...v2/config/historical_registry_configs/estimator_steps/xgboost/xgboost_20240131_115154.cfg
@@ -0,0 +1,9 @@
+[placeholder]
+@estimator_steps = "xgboost"
+alpha = 0
+reg_lambda = 1
+max_depth = 3
+learning_rate = 0.3
+gamma = 0
+tree_method = "gpu_hist"
+n_estimators = 100
diff --git a/...v2/config/historical_registry_configs/estimator_steps/xgboost/xgboost_20240131_115217.cfg b/...v2/config/historical_registry_configs/estimator_steps/xgboost/xgboost_20240131_115217.cfg
@@ -0,0 +1,9 @@
+[placeholder]
+@estimator_steps = "xgboost"
+alpha = 0
+reg_lambda = 1
+max_depth = 3
+learning_rate = 0.3
+gamma = 0
+tree_method = "gpu_hist"
+n_estimators = 100
diff --git a/...onfigs/estimator_steps_suggesters/xgboost_suggester/xgboost_suggester_20240131_114833.cfg b/...onfigs/estimator_steps_suggesters/xgboost_suggester/xgboost_suggester_20240131_114833.cfg
@@ -0,0 +1,8 @@
+[placeholder]
+@estimator_steps_suggesters = "xgboost_suggester"
+n_estimators = [100,1200,true]
+alpha = [0.00000001,0.1,true]
+reg_lambda = [0.00000001,1.0,true]
+max_depth = [1,10,true]
+learning_rate = [0.00000001,1,true]
+gamma = [0.00000001,0.001,true]
diff --git a/...onfigs/estimator_steps_suggesters/xgboost_suggester/xgboost_suggester_20240131_114956.cfg b/...onfigs/estimator_steps_suggesters/xgboost_suggester/xgboost_suggester_20240131_114956.cfg
@@ -0,0 +1,8 @@
+[placeholder]
+@estimator_steps_suggesters = "xgboost_suggester"
+n_estimators = [100,1200,true]
+alpha = [0.00000001,0.1,true]
+reg_lambda = [0.00000001,1.0,true]
+max_depth = [1,10,true]
+learning_rate = [0.00000001,1,true]
+gamma = [0.00000001,0.001,true]
diff --git a/psycop/common/model_training_v2/hyperparameter_suggester/suggesters/suggester_spaces.py b/psycop/common/model_training_v2/hyperparameter_suggester/suggesters/suggester_spaces.py
@@ -110,9 +110,12 @@ def from_list_or_mapping(
         return cls.from_list(sequence_or_mapping)
 
 
+CategoricalSpaceT = Sequence[optuna.distributions.CategoricalChoiceType]
+
+
 @dataclass(frozen=True)
 class CategoricalSpace:
-    choices: Sequence[optuna.distributions.CategoricalChoiceType]
+    choices: CategoricalSpaceT
 
     def suggest(self, trial: optuna.Trial, name: str) -> Any:
         return trial.suggest_categorical(name=name, choices=self.choices)

diff --git a/psycop/common/model_training_v2/trainer/task/estimator_steps/test_suggesters.py b/psycop/common/model_training_v2/trainer/task/estimator_steps/test_suggesters.py
@@ -10,6 +10,7 @@
 )
 
 from ....hyperparameter_suggester.suggesters.base_suggester import Suggester
+from .xgboost import XGBoostSuggester
 
 
 @dataclass(frozen=True)
@@ -33,6 +34,7 @@ class SuggesterExample:
             should="Logistic regression with list resolves correctly",
             suggester=LogisticRegressionSuggester(C=[0.1, 1, False]),
         ),
+        SuggesterExample(should="XGBoost resolves correctly", suggester=XGBoostSuggester()),
     ],
 )
 def test_logistic_regression_suggester(example: SuggesterExample):

diff --git a/psycop/common/model_training_v2/trainer/task/estimator_steps/xgboost.py b/psycop/common/model_training_v2/trainer/task/estimator_steps/xgboost.py
@@ -1,24 +1,76 @@
-from typing import Literal
+from typing import Any, Literal
 
 import numpy as np
+import optuna
 from xgboost import XGBClassifier
 
 from psycop.common.model_training_v2.config.baseline_registry import BaselineRegistry
 from psycop.common.model_training_v2.trainer.task.model_step import ModelStep
 
+from ....hyperparameter_suggester.suggesters.base_suggester import Suggester
+from ....hyperparameter_suggester.suggesters.suggester_spaces import (
+    FloatSpace,
+    FloatSpaceT,
+    IntegerSpace,
+    IntegerspaceT,
+)
+
 
 @BaselineRegistry.estimator_steps.register("xgboost")
 def xgboost_classifier_step(
+    alpha: float = 0,
+    reg_lambda: float = 1,
+    max_depth: int = 3,
+    learning_rate: float = 0.3,
+    gamma: float = 0,
     tree_method: Literal["auto", "gpu_hist"] = "gpu_hist",
     n_estimators: int = 100,
-    max_depth: int = 3,
 ) -> ModelStep:
     """Initialize XGBClassifier model with hparams specified as kwargs.
     The 'missing' hyperparameter specifies the value to be treated as missing and is set to np.nan by default.
     """
     return (
         "xgboost",
         XGBClassifier(
-            n_estimators=n_estimators, max_depth=max_depth, tree_method=tree_method, missing=np.nan
+            alpha=alpha,
+            gamma=gamma,
+            learning_rate=learning_rate,
+            max_depth=max_depth,
+            missing=np.nan,
+            n_estimators=n_estimators,
+            reg_lambda=reg_lambda,
+            tree_method=tree_method,
         ),
     )
+
+
+@BaselineRegistry.estimator_steps_suggesters.register("xgboost_suggester")
+class XGBoostSuggester(Suggester):
+    def __init__(
+        self,
+        n_estimators: IntegerspaceT = (100, 1200, True),
+        alpha: FloatSpaceT = (1e-8, 0.1, True),
+        reg_lambda: FloatSpaceT = (1e-8, 1.0, True),
+        max_depth: IntegerspaceT = (1, 10, True),
+        learning_rate: FloatSpaceT = (1e-8, 1, True),
+        gamma: FloatSpaceT = (1e-8, 0.001, True),
+    ):
+        # A little annoying, can be auto-generated using introspection of the annotations/types. E.g. added to the `Suggester` class. But this is fine for now.
+        self.n_estimators = IntegerSpace.from_list_or_mapping(n_estimators)
+        self.alpha = FloatSpace.from_list_or_mapping(alpha)
+        self.reg_lambda = FloatSpace.from_list_or_mapping(reg_lambda)
+        self.max_depth = IntegerSpace.from_list_or_mapping(max_depth)
+        self.learning_rate = FloatSpace.from_list_or_mapping(learning_rate)
+        self.gamma = FloatSpace.from_list_or_mapping(gamma)
+
+    def suggest_hyperparameters(self, trial: optuna.Trial) -> dict[str, Any]:
+        # The same goes forthis, can be auto-generated.
+        return {
+            "@estimator_steps": "xgboost",
+            "n_estimators": self.n_estimators.suggest(trial, name="n_estimators"),
+            "alpha": self.alpha.suggest(trial, name="alpha"),
+            "reg_lambda": self.reg_lambda.suggest(trial, name="reg_lambda"),
+            "max_depth": self.max_depth.suggest(trial, name="max_depth"),
+            "learning_rate": self.learning_rate.suggest(trial, name="learning_rate"),
+            "gamma": self.gamma.suggest(trial, name="gamma"),
+        }