Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implementing a multi-objective regression model #208

Merged
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ class GoodnessOfFitMetrics(NamedTuple):
# sample_95_ci_hit_rate: float = None
# sample_99_ci_hit_rate: float = None

def to_json(self):
def to_json(self, indent=None):
return json.dumps({
"last_refit_iteration_number": self.last_refit_iteration_number,
"observation_count": self.observation_count,
Expand All @@ -54,7 +54,7 @@ def to_json(self):
"coefficient_of_determination": self.coefficient_of_determination,
"prediction_90_ci_hit_rate": self.prediction_90_ci_hit_rate,
"sample_90_ci_hit_rate": self.sample_90_ci_hit_rate
})
}, indent=indent)

@classmethod
def from_json(cls, json_string):
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
#
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT License.
#
from typing import List
from mlos.Utils.KeyOrderedDict import KeyOrderedDict
from mlos.Optimizers.RegressionModels.GoodnessOfFitMetrics import GoodnessOfFitMetrics

class MultiObjectiveGoodnessOfFitMetrics(KeyOrderedDict):
"""A container for multiple GoodnessOfFitMetrics.

This is really just an alias to KeyOrderedDict.
"""

def __init__(self, objective_names: List[str]):
KeyOrderedDict.__init__(self, ordered_keys=objective_names, value_type=GoodnessOfFitMetrics)
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
#
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT License.
#
import pandas as pd

from mlos.Logger import create_logger
from mlos.Optimizers.RegressionModels.GoodnessOfFitMetrics import DataSetType
from mlos.Optimizers.RegressionModels.HomogeneousRandomForestRegressionModel import HomogeneousRandomForestRegressionModel
from mlos.Optimizers.RegressionModels.HomogeneousRandomForestConfigStore import homogeneous_random_forest_config_store
from mlos.Optimizers.RegressionModels.MultiObjectiveGoodnessOfFitMetrics import MultiObjectiveGoodnessOfFitMetrics
from mlos.Optimizers.RegressionModels.MultiObjectivePrediction import MultiObjectivePrediction
from mlos.Optimizers.RegressionModels.MultiObjectiveRegressionModel import MultiObjectiveRegressionModel
from mlos.Spaces import Hypergrid, Point, SimpleHypergrid
from mlos.Utils.KeyOrderedDict import KeyOrderedDict


class MultiObjectiveHomogeneousRandomForest(MultiObjectiveRegressionModel):
"""Maintains multiple HomogeneousRandomForestRegressionModels each predicting a different objective.

All single-objective models are configured according to model_config.

"""
def __init__(
self,
model_config: Point,
input_space: Hypergrid,
output_space: Hypergrid,
logger=None
):
MultiObjectiveRegressionModel.__init__(
self,
model_type=type(self),
model_config=model_config,
input_space=input_space,
output_space=output_space
)
if logger is None:
logger = create_logger("MultiObjectiveHomogeneousRandomForest")
self.logger = logger

# We just need to assert that the model config belongs in homogeneous_random_forest_config_store.parameter_space.
# A more elaborate solution might be needed down the road, but for now this simple solution should suffice.
#
assert model_config in homogeneous_random_forest_config_store.parameter_space

self._regressors_by_objective_name = KeyOrderedDict(ordered_keys=self.output_dimension_names, value_type=HomogeneousRandomForestRegressionModel)

for output_dimension in output_space.dimensions:
random_forest = HomogeneousRandomForestRegressionModel(
model_config=model_config,
input_space=input_space,
output_space=SimpleHypergrid(name=f"{output_dimension.name}_objective", dimensions=[output_dimension]),
logger=self.logger
)
self._regressors_by_objective_name[output_dimension.name] = random_forest

def fit(self, features_df: pd.DataFrame, targets_df: pd.DataFrame, iteration_number: int) -> None:
for objective_name, random_forest in self._regressors_by_objective_name:
if objective_name not in targets_df.columns:
continue

random_forest.fit(
feature_values_pandas_frame=features_df,
target_values_pandas_frame=targets_df[[objective_name]],
iteration_number=iteration_number
)

def predict(self, features_df: pd.DataFrame, include_only_valid_rows: bool = True) -> MultiObjectivePrediction:
multi_objective_predicitons = MultiObjectivePrediction(objective_names=self.output_dimension_names)
for objective_name, random_forest in self._regressors_by_objective_name:
prediction = random_forest.predict(features_df, include_only_valid_rows=include_only_valid_rows)
multi_objective_predicitons[objective_name] = prediction
return multi_objective_predicitons

def compute_goodness_of_fit(self, features_df: pd.DataFrame, targets_df: pd.DataFrame, data_set_type: DataSetType) -> MultiObjectiveGoodnessOfFitMetrics:
multi_objective_goodness_of_fit_metrics = MultiObjectiveGoodnessOfFitMetrics(objective_names=self.output_dimension_names)
for objective_name, random_forest in self._regressors_by_objective_name:
gof_metrics = random_forest.compute_goodness_of_fit(features_df=features_df, target_df=targets_df[[objective_name]], data_set_type=data_set_type)
multi_objective_goodness_of_fit_metrics[objective_name] = gof_metrics
return multi_objective_goodness_of_fit_metrics
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
#
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT License.
#
from typing import List
from mlos.Utils.KeyOrderedDict import KeyOrderedDict
from mlos.Optimizers.RegressionModels.Prediction import Prediction

class MultiObjectivePrediction(KeyOrderedDict):
"""A container for multiple predictions.

This is really just an alias to KeyOrderedDict.
"""

def __init__(self, objective_names: List[str]):
KeyOrderedDict.__init__(self, ordered_keys=objective_names, value_type=Prediction)
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
#
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT License.
#
from abc import ABC, abstractmethod

import pandas as pd

from mlos.Spaces import Hypergrid, Point
from mlos.Optimizers.RegressionModels.GoodnessOfFitMetrics import DataSetType
from mlos.Optimizers.RegressionModels.MultiObjectiveGoodnessOfFitMetrics import MultiObjectiveGoodnessOfFitMetrics
from mlos.Optimizers.RegressionModels.MultiObjectivePrediction import MultiObjectivePrediction


class MultiObjectiveRegressionModel(ABC):
"""A base class for all multi-objective regression models to implement."""


def __init__(
self,
model_type: type,
model_config: Point,
input_space: Hypergrid,
output_space: Hypergrid
):
self.model_type = model_type
self.model_config = model_config
self.input_space = input_space
self.output_space = output_space

self.input_dimension_names = self.input_space.dimension_names
self.output_dimension_names = self.output_space.dimension_names

@abstractmethod
def fit(self, features_df: pd.DataFrame, targets_df: pd.DataFrame, iteration_number: int) -> None:
raise NotImplementedError

@abstractmethod
def predict(self, features_df: pd.DataFrame, include_only_valid_rows: bool = True) -> MultiObjectivePrediction:
raise NotImplementedError

@abstractmethod
def compute_goodness_of_fit(self, features_df: pd.DataFrame, targets_df: pd.DataFrame, data_set_type: DataSetType) -> MultiObjectiveGoodnessOfFitMetrics:
raise NotImplementedError
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
#
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT License.
#
import pytest

import mlos.global_values
from mlos.OptimizerEvaluationTools.ObjectiveFunctionFactory import ObjectiveFunctionFactory, objective_function_config_store
from mlos.Optimizers.RegressionModels.GoodnessOfFitMetrics import DataSetType
from mlos.Optimizers.RegressionModels.HomogeneousRandomForestConfigStore import homogeneous_random_forest_config_store
from mlos.Optimizers.RegressionModels.MultiObjectiveHomogeneousRandomForest import MultiObjectiveHomogeneousRandomForest
from mlos.Logger import create_logger

class TestMultiObjectiveHomogeneousRandomForest:

@classmethod
def setup_class(cls) -> None:
mlos.global_values.declare_singletons()
cls.logger = create_logger("TestMultiObjectiveHomogeneousRandomForest")

@pytest.mark.parametrize('objective_function_config_name', ["2d_hypersphere_minimize_some", "10d_hypersphere_minimize_some", "5_mutually_exclusive_polynomials"])
def test_default_config(self, objective_function_config_name):
objective_function_config = objective_function_config_store.get_config_by_name(objective_function_config_name)
objective_function = ObjectiveFunctionFactory.create_objective_function(objective_function_config)

rf_config = homogeneous_random_forest_config_store.default
multi_objective_rf = MultiObjectiveHomogeneousRandomForest(
model_config=rf_config,
input_space=objective_function.parameter_space,
output_space=objective_function.output_space,
logger=self.logger
)

num_training_samples = 1000
num_testing_samples = 100
train_params_df = objective_function.parameter_space.random_dataframe(num_samples=num_training_samples)
train_objectives_df = objective_function.evaluate_dataframe(train_params_df)

test_params_df = objective_function.parameter_space.random_dataframe(num_samples=num_testing_samples)
test_objectives_df = objective_function.evaluate_dataframe(test_params_df)

multi_objective_rf.fit(features_df=train_params_df, targets_df=train_objectives_df, iteration_number=num_training_samples)
multi_objective_predictions = multi_objective_rf.predict(features_df=train_params_df, include_only_valid_rows=True)

# TRAINING DATA
#
print("------------------------------------------------------------------------------------")
print("--------------------------------------- TRAIN --------------------------------------")
print("------------------------------------------------------------------------------------")
training_gof = multi_objective_rf.compute_goodness_of_fit(features_df=train_params_df, targets_df=train_objectives_df, data_set_type=DataSetType.TRAIN)
for objective_name in objective_function.output_space.dimension_names:
print("------------------------------------------------------------------------------------")
print(objective_name)
print(training_gof[objective_name].to_json(indent=2))

# TESTING DATA
print("------------------------------------------------------------------------------------")
print("--------------------------------------- TEST ---------------------------------------")
print("------------------------------------------------------------------------------------")
testing_gof = multi_objective_rf.compute_goodness_of_fit(features_df=test_params_df, targets_df=test_objectives_df, data_set_type=DataSetType.TEST_KNOWN_RANDOM)
for objective_name in objective_function.output_space.dimension_names:
print("------------------------------------------------------------------------------------")
print(objective_name)
print(testing_gof[objective_name].to_json(indent=2))
48 changes: 48 additions & 0 deletions source/Mlos.Python/mlos/Utils/KeyOrderedDict.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
#
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT License.
#
from typing import Dict, Iterator, List, Tuple, Union

class KeyOrderedDict:
"""Dictionary where entries can be enumerated and accessed in pre-specified order.

"""

def __init__(self, ordered_keys: List[str], value_type: type, dictionary: Dict[str, object] = None):
assert all(isinstance(key, str) for key in ordered_keys)
self.value_type = value_type
self._ordered_keys = ordered_keys
self._dict = {key: None for key in self._ordered_keys}

if dictionary is None:
dictionary = {}

for key in self._ordered_keys:
if key in dictionary:
value = dictionary[key]
assert isinstance(value, self.value_type) or value is None
self._dict[key] = value
else:
self._dict[key] = None

def __getitem__(self, key_or_index: Union[str, int]) -> object:
key = self._to_key(key_or_index)
return self._dict[key]

def __setitem__(self, key_or_index: Union[str, int], value) -> None:
if not (isinstance(value, self.value_type) or value is None):
raise TypeError(f'Value must be of type {str(self.value_type)} not {type(value)}')
key = self._to_key(key_or_index)
self._dict[key] = value

def __iter__(self) -> Iterator[Tuple[str, object]]:
for key in self._ordered_keys:
yield key, self._dict[key]

def _to_key(self, key_or_index: Union[str, int]) -> str:
if isinstance(key_or_index, str):
return key_or_index
if isinstance(key_or_index, int):
return self._ordered_keys[key_or_index]
raise ValueError(f"{key_or_index} is neither an int nor a str.")
4 changes: 4 additions & 0 deletions source/Mlos.Python/mlos/Utils/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
#
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT License.
#
39 changes: 39 additions & 0 deletions source/Mlos.Python/mlos/Utils/unit_tests/TestKeyOrderedDict.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
#
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT License.
#
import pytest
from mlos.Utils.KeyOrderedDict import KeyOrderedDict

class TestKeyOrderedDict:

def test_sanity(self):
keys = [letter for letter in "abcdefghijklmnopqrstuvwxyz"]
values = [letter.upper() for letter in keys]

key_ordered_dict = KeyOrderedDict(ordered_keys=keys, value_type=str)
for key, value in zip(keys, values):
key_ordered_dict[key] = value

for i, (key, value) in enumerate(key_ordered_dict):
assert key == keys[i]
assert value == values[i]

key_ordered_dict['a'] = None
assert key_ordered_dict[0] is None
assert key_ordered_dict['a'] is None

with pytest.raises(TypeError):
key_ordered_dict['b'] = 1

assert key_ordered_dict[1] == "B"
assert key_ordered_dict['b'] == "B"
key_ordered_dict['b'] = "1"
assert key_ordered_dict[1] == "1"
assert key_ordered_dict['b'] == "1"

with pytest.raises(KeyError):
_ = key_ordered_dict['A']

with pytest.raises(IndexError):
_ = key_ordered_dict[100]
4 changes: 4 additions & 0 deletions source/Mlos.Python/mlos/Utils/unit_tests/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
#
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT License.
#