Skip to content

Commit

Permalink
feat: xgboost suggester (#792)
Browse files Browse the repository at this point in the history
<!--
Reviews go much faster if the reviewer knows what to focus on! Help them out, e.g.:
Reviewers can skip X, but should pay attention to Y.
-->
  • Loading branch information
MartinBernstorff authored Feb 6, 2024
2 parents c434fe7 + ee64365 commit 2e9fc40
Show file tree
Hide file tree
Showing 8 changed files with 100 additions and 4 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
[placeholder]
@estimator_steps = "lightgbm"
num_leaves = 31
max_bin = 64
device_type = "cpu"
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
[placeholder]
@estimator_steps = "xgboost"
alpha = 0
reg_lambda = 1
max_depth = 3
learning_rate = 0.3
gamma = 0
tree_method = "gpu_hist"
n_estimators = 100
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
[placeholder]
@estimator_steps = "xgboost"
alpha = 0
reg_lambda = 1
max_depth = 3
learning_rate = 0.3
gamma = 0
tree_method = "gpu_hist"
n_estimators = 100
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
[placeholder]
@estimator_steps_suggesters = "xgboost_suggester"
n_estimators = [100,1200,true]
alpha = [0.00000001,0.1,true]
reg_lambda = [0.00000001,1.0,true]
max_depth = [1,10,true]
learning_rate = [0.00000001,1,true]
gamma = [0.00000001,0.001,true]
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
[placeholder]
@estimator_steps_suggesters = "xgboost_suggester"
n_estimators = [100,1200,true]
alpha = [0.00000001,0.1,true]
reg_lambda = [0.00000001,1.0,true]
max_depth = [1,10,true]
learning_rate = [0.00000001,1,true]
gamma = [0.00000001,0.001,true]
Original file line number Diff line number Diff line change
Expand Up @@ -110,9 +110,12 @@ def from_list_or_mapping(
return cls.from_list(sequence_or_mapping)


CategoricalSpaceT = Sequence[optuna.distributions.CategoricalChoiceType]


@dataclass(frozen=True)
class CategoricalSpace:
choices: Sequence[optuna.distributions.CategoricalChoiceType]
choices: CategoricalSpaceT

def suggest(self, trial: optuna.Trial, name: str) -> Any:
return trial.suggest_categorical(name=name, choices=self.choices)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
)

from ....hyperparameter_suggester.suggesters.base_suggester import Suggester
from .xgboost import XGBoostSuggester


@dataclass(frozen=True)
Expand All @@ -33,6 +34,7 @@ class SuggesterExample:
should="Logistic regression with list resolves correctly",
suggester=LogisticRegressionSuggester(C=[0.1, 1, False]),
),
SuggesterExample(should="XGBoost resolves correctly", suggester=XGBoostSuggester()),
],
)
def test_logistic_regression_suggester(example: SuggesterExample):
Expand Down
Original file line number Diff line number Diff line change
@@ -1,24 +1,76 @@
from typing import Literal
from typing import Any, Literal

import numpy as np
import optuna
from xgboost import XGBClassifier

from psycop.common.model_training_v2.config.baseline_registry import BaselineRegistry
from psycop.common.model_training_v2.trainer.task.model_step import ModelStep

from ....hyperparameter_suggester.suggesters.base_suggester import Suggester
from ....hyperparameter_suggester.suggesters.suggester_spaces import (
FloatSpace,
FloatSpaceT,
IntegerSpace,
IntegerspaceT,
)


@BaselineRegistry.estimator_steps.register("xgboost")
def xgboost_classifier_step(
alpha: float = 0,
reg_lambda: float = 1,
max_depth: int = 3,
learning_rate: float = 0.3,
gamma: float = 0,
tree_method: Literal["auto", "gpu_hist"] = "gpu_hist",
n_estimators: int = 100,
max_depth: int = 3,
) -> ModelStep:
"""Initialize XGBClassifier model with hparams specified as kwargs.
The 'missing' hyperparameter specifies the value to be treated as missing and is set to np.nan by default.
"""
return (
"xgboost",
XGBClassifier(
n_estimators=n_estimators, max_depth=max_depth, tree_method=tree_method, missing=np.nan
alpha=alpha,
gamma=gamma,
learning_rate=learning_rate,
max_depth=max_depth,
missing=np.nan,
n_estimators=n_estimators,
reg_lambda=reg_lambda,
tree_method=tree_method,
),
)


@BaselineRegistry.estimator_steps_suggesters.register("xgboost_suggester")
class XGBoostSuggester(Suggester):
def __init__(
self,
n_estimators: IntegerspaceT = (100, 1200, True),
alpha: FloatSpaceT = (1e-8, 0.1, True),
reg_lambda: FloatSpaceT = (1e-8, 1.0, True),
max_depth: IntegerspaceT = (1, 10, True),
learning_rate: FloatSpaceT = (1e-8, 1, True),
gamma: FloatSpaceT = (1e-8, 0.001, True),
):
# A little annoying, can be auto-generated using introspection of the annotations/types. E.g. added to the `Suggester` class. But this is fine for now.
self.n_estimators = IntegerSpace.from_list_or_mapping(n_estimators)
self.alpha = FloatSpace.from_list_or_mapping(alpha)
self.reg_lambda = FloatSpace.from_list_or_mapping(reg_lambda)
self.max_depth = IntegerSpace.from_list_or_mapping(max_depth)
self.learning_rate = FloatSpace.from_list_or_mapping(learning_rate)
self.gamma = FloatSpace.from_list_or_mapping(gamma)

def suggest_hyperparameters(self, trial: optuna.Trial) -> dict[str, Any]:
# The same goes forthis, can be auto-generated.
return {
"@estimator_steps": "xgboost",
"n_estimators": self.n_estimators.suggest(trial, name="n_estimators"),
"alpha": self.alpha.suggest(trial, name="alpha"),
"reg_lambda": self.reg_lambda.suggest(trial, name="reg_lambda"),
"max_depth": self.max_depth.suggest(trial, name="max_depth"),
"learning_rate": self.learning_rate.suggest(trial, name="learning_rate"),
"gamma": self.gamma.suggest(trial, name="gamma"),
}

0 comments on commit 2e9fc40

Please sign in to comment.