Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Adding Multi-Objective Optimization to Optimizer Evaluator #216

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@

import pandas as pd

from mlos.Optimizers.OptimizationProblem import OptimizationProblem
from mlos.Optimizers.OptimizationProblem import OptimizationProblem, Objective
from mlos.Spaces import Hypergrid, Point


Expand Down Expand Up @@ -40,6 +40,12 @@ def output_space(self) -> Hypergrid:

@property
def default_optimization_problem(self):
if self._default_optimization_problem is None:
return OptimizationProblem(
parameter_space=self.parameter_space,
objective_space=self.output_space,
objectives=[Objective(name=dim_name, minimize=True) for dim_name in self.output_space.dimension_names]
)
return self._default_optimization_problem

@default_optimization_problem.setter
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,9 @@
import json
import os
import pickle
from typing import Dict, List
from typing import Dict, List, Tuple
from mlos.OptimizerEvaluationTools.OptimumOverTime import OptimumOverTime
from mlos.Optimizers.ParetoFrontier import ParetoFrontier
from mlos.Optimizers.RegressionModels.MultiObjectiveRegressionModelFitState import MultiObjectiveRegressionModelFitState
from mlos.Spaces import Point
from mlos.Tracer import Tracer
Expand Down Expand Up @@ -42,6 +43,8 @@ def __init__(
evaluation_frequency: int = None,
regression_model_goodness_of_fit_state: MultiObjectiveRegressionModelFitState = None,
optima_over_time: Dict[str, OptimumOverTime] = None,
pareto_over_time: Dict[int, ParetoFrontier] = None,
pareto_volume_over_time: Dict[int, Tuple[float, float]] = None,
execution_trace: List[Dict[str, object]] = None
):
self.success = False
Expand All @@ -58,9 +61,11 @@ def __init__(
self.pickled_objective_function_final_state = pickled_objective_function_final_state
self.num_optimization_iterations = num_optimization_iterations
self.evaluation_frequency = evaluation_frequency
self.regression_model_goodness_of_fit_state = regression_model_goodness_of_fit_state
self.regression_model_fit_state = regression_model_goodness_of_fit_state
self.optima_over_time = optima_over_time
self.execution_trace = execution_trace
self.pareto_over_time = pareto_over_time if pareto_over_time is not None else dict()
self.pareto_volume_over_time = pareto_volume_over_time if pareto_volume_over_time is not None else dict()

def add_pickled_optimizer(self, iteration: int, pickled_optimizer: bytes):
assert iteration >= 0
Expand Down Expand Up @@ -105,14 +110,22 @@ def write_to_disk(self, target_folder):
with open(os.path.join(target_folder, "objective_function_final_state.pickle"), "wb") as out_file:
out_file.write(self.pickled_objective_function_final_state)

if self.regression_model_goodness_of_fit_state is not None:
with open(os.path.join(target_folder, "goodness_of_fit.pickle"), "wb") as out_file:
pickle.dump(self.regression_model_goodness_of_fit_state, out_file)
if self.regression_model_fit_state is not None:
with open(os.path.join(target_folder, "regression_model_goodness_of_fit_state.pickle"), "wb") as out_file:
pickle.dump(self.regression_model_fit_state, out_file)

if self.optima_over_time is not None:
with open(os.path.join(target_folder, "optima_over_time.pickle"), "wb") as out_file:
pickle.dump(self.optima_over_time, out_file)
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

json ...


if len(self.pareto_over_time) > 0:
with open(os.path.join(target_folder, "pareto_over_time.pickle"), "wb") as out_file:
pickle.dump(self.pareto_over_time, out_file)

if len(self.pareto_volume_over_time) > 0:
with open(os.path.join(target_folder, "pareto_volume_over_time.json"), "w") as out_file:
json.dump(self.pareto_volume_over_time, out_file, indent=2)

if self.execution_trace is not None:
tracer = Tracer()
tracer.trace_events = self.execution_trace
Expand All @@ -127,3 +140,69 @@ def write_to_disk(self, target_folder):
'exception_stack_trace': self.exception_traceback
}
json.dump(execution_info_dict, out_file, indent=2)

@staticmethod
def read_from_disk(target_folder):
"""Mirrors write_to_disk by reading into memory the contents of an OptimizerEvaluationReport from disk."""

optimizer_evaluation_report = OptimizerEvaluationReport()

optimizer_config_file = os.path.join(target_folder, "optimizer_config.json")
with open(optimizer_config_file, 'r') as in_file:
optimizer_evaluation_report.optimizer_configuration = Point.from_json(in_file.read())

objective_function_config_file = os.path.join(target_folder, "objective_function_config.json")
with open(objective_function_config_file, 'r') as in_file:
optimizer_evaluation_report.objective_function_configuration = Point.from_json(in_file.read())

pickled_optimizers_dir = os.path.join(target_folder, "pickled_optimizers")
if os.path.exists(pickled_optimizers_dir):
for file_name in os.listdir(pickled_optimizers_dir):
iteration_number, file_extension = file_name.split(".")
assert file_extension == "pickle"
iteration_number = int(iteration_number)
with open(os.path.join(pickled_optimizers_dir, file_name), 'rb') as in_file:
optimizer_evaluation_report.pickled_optimizers_over_time[iteration_number] = in_file.read()

objective_function_initial_state_file_path = os.path.join(target_folder, "objective_function_initial_state.pickle")
if os.path.exists(objective_function_initial_state_file_path):
with open(objective_function_initial_state_file_path, 'rb') as in_file:
optimizer_evaluation_report.pickled_objective_function_initial_state = in_file.read()


objective_function_final_state_file_path = os.path.join(target_folder, "objective_function_final_state.pickle")
if os.path.exists(objective_function_final_state_file_path):
with open(objective_function_final_state_file_path, 'rb') as in_file:
optimizer_evaluation_report.pickled_objective_function_final_state = in_file.read()

gof_file_path = os.path.join(target_folder, "regression_model_goodness_of_fit_state.pickle")
if os.path.exists(gof_file_path):
with open(gof_file_path, 'rb') as in_file:
optimizer_evaluation_report.regression_model_fit_state = pickle.load(in_file)

optima_over_time_file_path = os.path.join(target_folder, "optima_over_time.pickle")
if os.path.exists(optima_over_time_file_path):
with open(optima_over_time_file_path, 'rb') as in_file:
optimizer_evaluation_report.optima_over_time = pickle.load(in_file)

pareto_over_time_file_path = os.path.join(target_folder, "pareto_over_time.pickle")
if os.path.exists(pareto_over_time_file_path):
with open(pareto_over_time_file_path, "rb") as in_file:
optimizer_evaluation_report.pareto_over_time = pickle.load(in_file)

pareto_volume_over_time_file_path = os.path.join(target_folder, "pareto_volume_over_time.json")
if os.path.exists(pareto_volume_over_time_file_path):
with open(pareto_volume_over_time_file_path, 'r') as in_file:
optimizer_evaluation_report.pareto_volume_over_time = json.load(in_file)

execution_info_file_path = os.path.join(target_folder, "execution_info.json")
if os.path.exists(execution_info_file_path):
with open(execution_info_file_path, 'r') as in_file:
execution_info_dict = json.load(in_file)
optimizer_evaluation_report.success = execution_info_dict['success']
optimizer_evaluation_report.num_optimization_iterations = execution_info_dict['num_optimization_iterations']
optimizer_evaluation_report.evaluation_frequency = execution_info_dict['evaluation_frequency']
optimizer_evaluation_report.exception = execution_info_dict['exception']
optimizer_evaluation_report.exception_traceback = execution_info_dict['exception_stack_trace']

return optimizer_evaluation_report
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT License.
#
import copy
import pickle
import traceback

Expand All @@ -12,10 +13,10 @@
from mlos.OptimizerEvaluationTools.OptimizerEvaluatorConfigStore import optimizer_evaluator_config_store
from mlos.OptimizerEvaluationTools.OptimumOverTime import OptimumOverTime
from mlos.Optimizers.BayesianOptimizerFactory import BayesianOptimizerFactory, bayesian_optimizer_config_store
from mlos.Optimizers.OptimizationProblem import OptimizationProblem, Objective
from mlos.Optimizers.OptimizerBase import OptimizerBase
from mlos.Optimizers.OptimumDefinition import OptimumDefinition
from mlos.Optimizers.RegressionModels.GoodnessOfFitMetrics import DataSetType
from mlos.Optimizers.RegressionModels.MultiObjectiveRegressionModelFitState import MultiObjectiveRegressionModelFitState
from mlos.Optimizers.RegressionModels.RegressionModelFitState import RegressionModelFitState
from mlos.Spaces import Point
from mlos.Tracer import trace, traced, Tracer
Expand Down Expand Up @@ -70,18 +71,10 @@ def __init__(
assert False, "A valid objective_function XOR a valid objective_function_config must be specified"

# Let's get the optimizer and its config assigned to self's fields.

#
if (optimizer_config is not None) and (optimizer is None):
assert optimizer_config in bayesian_optimizer_config_store.parameter_space

objective_name = self.objective_function.output_space.dimension_names[0]
optimization_problem = OptimizationProblem(
parameter_space=self.objective_function.parameter_space,
objective_space=self.objective_function.output_space,
objectives=[Objective(name=objective_name, minimize=True)]
)

optimization_problem = self.objective_function.default_optimization_problem
self.optimizer_config = optimizer_config
self.optimizer = BayesianOptimizerFactory().create_local_optimizer(
optimizer_config=optimizer_config,
Expand All @@ -102,7 +95,7 @@ def __init__(


@trace()
def evaluate_optimizer(self) -> OptimizerEvaluationReport: # pylint: disable=too-many-statements
def evaluate_optimizer(self) -> OptimizerEvaluationReport: # pylint: disable=too-many-statements,too-many-branches
evaluation_report = OptimizerEvaluationReport(
optimizer_configuration=self.optimizer_config,
objective_function_configuration=self.objective_function_config,
Expand All @@ -123,7 +116,9 @@ def evaluate_optimizer(self) -> OptimizerEvaluationReport: # pylint: disable=too
if self.optimizer_evaluator_config.include_pickled_optimizer_in_report:
evaluation_report.pickled_optimizer_initial_state = pickle.dumps(self.optimizer)

regression_model_fit_state = RegressionModelFitState()
multi_objective_regression_model_fit_state = MultiObjectiveRegressionModelFitState(objective_names=self.optimizer.optimization_problem.objective_names)
for objective_name in self.optimizer.optimization_problem.objective_names:
multi_objective_regression_model_fit_state[objective_name] = RegressionModelFitState()

optima_over_time = {}
optima_over_time[OptimumDefinition.BEST_OBSERVATION.value] = OptimumOverTime(
Expand Down Expand Up @@ -164,8 +159,12 @@ def evaluate_optimizer(self) -> OptimizerEvaluationReport: # pylint: disable=too
evaluation_report.add_pickled_optimizer(iteration=i, pickled_optimizer=pickle.dumps(self.optimizer))

if self.optimizer.trained:
gof_metrics = self.optimizer.compute_surrogate_model_goodness_of_fit()
regression_model_fit_state.set_gof_metrics(data_set_type=DataSetType.TRAIN, gof_metrics=gof_metrics)
multi_objective_gof_metrics = self.optimizer.compute_surrogate_model_goodness_of_fit()
for objective_name, gof_metrics in multi_objective_gof_metrics:
multi_objective_regression_model_fit_state[objective_name].set_gof_metrics(
data_set_type=DataSetType.TRAIN,
gof_metrics=gof_metrics
)

for optimum_name, optimum_over_time in optima_over_time.items():
try:
Expand All @@ -180,6 +179,15 @@ def evaluate_optimizer(self) -> OptimizerEvaluationReport: # pylint: disable=too
)
except ValueError as e:
print(e)

if self.optimizer_evaluator_config.report_pareto_over_time:
evaluation_report.pareto_over_time[i] = copy.deepcopy(self.optimizer.optimization_problem)

if self.optimizer_evaluator_config.report_pareto_volume_over_time:
volume_estimator = self.optimizer.pareto_frontier.approximate_pareto_volume()
ci99_on_volume = volume_estimator.get_two_sided_confidence_interval_on_pareto_volume(alpha=0.01)
evaluation_report.pareto_volume_over_time[i] = ci99_on_volume

evaluation_report.success = True

except Exception as e:
Expand All @@ -188,11 +196,12 @@ def evaluate_optimizer(self) -> OptimizerEvaluationReport: # pylint: disable=too
evaluation_report.exception_traceback = traceback.format_exc()

with traced(scope_name="evaluating_optimizer"):
"""Once the optimization is done, we performa final evaluation of the optimizer."""
# Once the optimization is done, we perform a final evaluation of the optimizer.

if self.optimizer.trained:
gof_metrics = self.optimizer.compute_surrogate_model_goodness_of_fit()
regression_model_fit_state.set_gof_metrics(data_set_type=DataSetType.TRAIN, gof_metrics=gof_metrics)
multi_objective_gof_metrics = self.optimizer.compute_surrogate_model_goodness_of_fit()
for objective_name, gof_metrics in multi_objective_gof_metrics:
multi_objective_regression_model_fit_state[objective_name].set_gof_metrics(data_set_type=DataSetType.TRAIN, gof_metrics=gof_metrics)

for optimum_name, optimum_over_time in optima_over_time.items():
try:
Expand All @@ -205,6 +214,14 @@ def evaluate_optimizer(self) -> OptimizerEvaluationReport: # pylint: disable=too
except Exception as e:
print(e)

if self.optimizer_evaluator_config.report_pareto_over_time:
evaluation_report.pareto_over_time[i] = copy.deepcopy(self.optimizer.optimization_problem)

if self.optimizer_evaluator_config.report_pareto_volume_over_time:
volume_estimator = self.optimizer.pareto_frontier.approximate_pareto_volume()
ci99_on_volume = volume_estimator.get_two_sided_confidence_interval_on_pareto_volume(alpha=0.01)
evaluation_report.pareto_volume_over_time[i] = ci99_on_volume

if self.optimizer_evaluator_config.include_execution_trace_in_report:
evaluation_report.execution_trace = mlos.global_values.tracer.trace_events
mlos.global_values.tracer.clear_events()
Expand All @@ -216,7 +233,7 @@ def evaluate_optimizer(self) -> OptimizerEvaluationReport: # pylint: disable=too
evaluation_report.pickled_objective_function_final_state = pickle.dumps(self.objective_function)

if self.optimizer_evaluator_config.report_regression_model_goodness_of_fit:
evaluation_report.regression_model_goodness_of_fit_state = regression_model_fit_state
evaluation_report.regression_model_fit_state = multi_objective_regression_model_fit_state

if self.optimizer_evaluator_config.report_optima_over_time:
evaluation_report.optima_over_time = optima_over_time
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,8 @@
CategoricalDimension(name="include_pickled_objective_function_in_report", values=[True, False]),
CategoricalDimension(name="report_regression_model_goodness_of_fit", values=[True, False]),
CategoricalDimension(name="report_optima_over_time", values=[True, False]),
CategoricalDimension(name="report_pareto_over_time", values=[True, False]),
CategoricalDimension(name="report_pareto_volume_over_time", values=[True, False]),
CategoricalDimension(name="include_execution_trace_in_report", values=[True, False]),
]
),
Expand All @@ -36,6 +38,8 @@
include_pickled_objective_function_in_report=True,
report_regression_model_goodness_of_fit=True,
report_optima_over_time=True,
report_pareto_over_time=True,
report_pareto_volume_over_time=True,
include_execution_trace_in_report=True,
)
)
Expand Down
Loading