Skip to content

Commit

Permalink
GBM: support hyperopt (#2490) (#2505)
Browse files Browse the repository at this point in the history
  • Loading branch information
tgaddair authored Sep 15, 2022
1 parent 8e98ad1 commit 7e730ff
Show file tree
Hide file tree
Showing 9 changed files with 409 additions and 266 deletions.
10 changes: 7 additions & 3 deletions ludwig/models/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
from ludwig.constants import COMBINED, LOSS, NAME, TIED, TYPE
from ludwig.features.base_feature import InputFeature, OutputFeature
from ludwig.features.feature_registries import input_type_registry, output_type_registry
from ludwig.features.feature_utils import LudwigFeatureDict
from ludwig.utils.algorithms_utils import topological_sort_feature_dependencies
from ludwig.utils.metric_utils import get_scalar_from_ludwig_metric
from ludwig.utils.misc_utils import get_from_registry
Expand Down Expand Up @@ -42,6 +43,9 @@ def __init__(self, random_seed: int = None):

super().__init__()

self.input_features = LudwigFeatureDict()
self.output_features = LudwigFeatureDict()

@classmethod
def build_inputs(cls, input_features_def: List[Dict[str, Any]]) -> Dict[str, InputFeature]:
"""Builds and returns input features in topological order."""
Expand Down Expand Up @@ -245,7 +249,7 @@ def update_metrics(self, targets, predictions):
self.eval_loss_metric.update(eval_loss)
self.eval_additional_losses_metrics.update(additional_losses)

def get_metrics(self):
def get_metrics(self) -> Dict[str, Dict[str, float]]:
"""Returns a dictionary of metrics for each output feature of the model."""
all_of_metrics = {}
for of_name, of_obj in self.output_features.items():
Expand Down Expand Up @@ -278,11 +282,11 @@ def collect_weights(self, tensor_names=None, **kwargs):
return [named_param for named_param in self.named_parameters() if named_param[0] in tensor_set]

@abstractmethod
def save(self, save_path):
def save(self, save_path: str):
"""Saves the model to the given path."""

@abstractmethod
def load(self, save_path):
def load(self, save_path: str):
"""Loads the model from the given path."""

@abstractmethod
Expand Down
3 changes: 0 additions & 3 deletions ludwig/models/ecd.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@

from ludwig.combiners.combiners import get_combiner_class
from ludwig.constants import MODEL_ECD, TYPE
from ludwig.features.feature_utils import LudwigFeatureDict
from ludwig.globals import MODEL_WEIGHTS_FILE_NAME
from ludwig.models.base import BaseModel
from ludwig.schema.utils import load_config_with_kwargs
Expand Down Expand Up @@ -42,7 +41,6 @@ def __init__(
super().__init__(random_seed=self._random_seed)

# ================ Inputs ================
self.input_features = LudwigFeatureDict()
try:
self.input_features.update(self.build_inputs(self._input_features_def))
except KeyError as e:
Expand All @@ -60,7 +58,6 @@ def __init__(
self.combiner = combiner_class(input_features=self.input_features, config=config, **kwargs)

# ================ Outputs ================
self.output_features = LudwigFeatureDict()
self.output_features.update(self.build_outputs(self._output_features_def, self.combiner))

# ================ Combined loss metric ================
Expand Down
3 changes: 0 additions & 3 deletions ludwig/models/gbm.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@

from ludwig.constants import BINARY, CATEGORY, LOGITS, MODEL_GBM, NAME, NUMBER
from ludwig.features.base_feature import OutputFeature
from ludwig.features.feature_utils import LudwigFeatureDict
from ludwig.globals import MODEL_WEIGHTS_FILE_NAME
from ludwig.models.base import BaseModel
from ludwig.utils import output_feature_utils
Expand All @@ -35,7 +34,6 @@ def __init__(
self._output_features_def = copy.deepcopy(output_features)

# ================ Inputs ================
self.input_features = LudwigFeatureDict()
try:
self.input_features.update(self.build_inputs(self._input_features_def))
except KeyError as e:
Expand All @@ -44,7 +42,6 @@ def __init__(
)

# ================ Outputs ================
self.output_features = LudwigFeatureDict()
self.output_features.update(self.build_outputs(self._output_features_def, input_size=self.input_shape[-1]))

# ================ Combined loss metric ================
Expand Down
6 changes: 3 additions & 3 deletions ludwig/schema/trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -338,8 +338,8 @@ class GBMTrainerConfig(BaseTrainerConfig):
parameter_metadata=TRAINER_METADATA["learning_rate"],
)

boosting_round_log_frequency: int = schema_utils.PositiveInteger(
default=10, description="Number of boosting rounds per log of the training progress."
boosting_rounds_per_checkpoint: int = schema_utils.PositiveInteger(
default=10, description="Number of boosting rounds per checkpoint / evaluation round."
)

# LightGBM core parameters (https://lightgbm.readthedocs.io/en/latest/Parameters.html)
Expand Down Expand Up @@ -529,7 +529,7 @@ class GBMTrainerConfig(BaseTrainerConfig):
description="Smoothing factor applied to tree nodes in the GBM trainer.",
)

verbose: int = schema_utils.IntegerRange(default=0, min=-1, max=2, description="Verbosity level for GBM trainer.")
verbose: int = schema_utils.IntegerRange(default=-1, min=-1, max=2, description="Verbosity level for GBM trainer.")

# LightGBM IO params
max_bin: int = schema_utils.PositiveInteger(
Expand Down
27 changes: 2 additions & 25 deletions ludwig/trainers/trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@
from ludwig.utils.misc_utils import set_random_seed
from ludwig.utils.torch_utils import get_torch_device
from ludwig.utils.trainer_utils import (
append_metrics,
get_final_steps_per_checkpoint,
get_new_progress_tracker,
get_total_steps,
Expand Down Expand Up @@ -1050,37 +1051,13 @@ def validation_field(self):
def validation_metric(self):
return self._validation_metric

def append_metrics(self, dataset_name, results, metrics_log, tables, progress_tracker):
epoch = progress_tracker.epoch
steps = progress_tracker.steps
for output_feature in self.model.output_features:
scores = [dataset_name]

# collect metric names based on output features metrics to
# ensure consistent order of reporting metrics
metric_names = self.model.output_features[output_feature].metric_functions.keys()

for metric in metric_names:
if metric in results[output_feature]:
# Some metrics may have been excepted and excluded from results.
score = results[output_feature][metric]
metrics_log[output_feature][metric].append(TrainerMetric(epoch=epoch, step=steps, value=score))
scores.append(score)

tables[output_feature].append(scores)

metrics_log[COMBINED][LOSS].append(TrainerMetric(epoch=epoch, step=steps, value=results[COMBINED][LOSS]))
tables[COMBINED].append([dataset_name, results[COMBINED][LOSS]])

return metrics_log, tables

def evaluation(self, dataset, dataset_name, metrics_log, tables, batch_size, progress_tracker):
predictor = Predictor(
self.model, batch_size=batch_size, horovod=self.horovod, report_tqdm_to_ray=self.report_tqdm_to_ray
)
metrics, predictions = predictor.batch_evaluation(dataset, collect_predictions=False, dataset_name=dataset_name)

self.append_metrics(dataset_name, metrics, metrics_log, tables, progress_tracker)
append_metrics(self.model, dataset_name, metrics, metrics_log, tables, progress_tracker)

return metrics_log, tables

Expand Down
Loading

0 comments on commit 7e730ff

Please sign in to comment.