Update mlos_core API to support with multi-factor optimization (micro…

…soft#730) * [x] Pass multi-column DataFrame instead of Sequence to `BaseOptimizer.register()` and other methods that deal with scores * [x] Update mlos_bench `MlosCoreOptimizer` to support the new mlos_core API * [x] Update unit tests to work with the new API * [x] Add unit tests for end-to-end multi-target optimization Merge after ~microsoft#726~ --------- Co-authored-by: Brian Kroth <bpkroth@users.noreply.github.com>
bpkroth · May 20, 2024 · e2b819f · e2b819f
1 parent 6d8854b
commit e2b819f
Show file tree

Hide file tree

Showing 11 changed files with 305 additions and 115 deletions.
diff --git a/mlos_bench/mlos_bench/optimizers/base_optimizer.py b/mlos_bench/mlos_bench/optimizers/base_optimizer.py
@@ -325,6 +325,8 @@ def _get_scores(self, status: Status,
 
  if not status.is_succeeded():
  assert scores is None
+ # TODO: Be more flexible with values used for failed trials (not just +inf).
+ # Issue: https://github.com/microsoft/MLOS/issues/523
  return {opt_target: float("inf") for opt_target in self._opt_targets}
 
  assert scores is not None

diff --git a/mlos_bench/mlos_bench/optimizers/mlos_core_optimizer.py b/mlos_bench/mlos_bench/optimizers/mlos_core_optimizer.py
@@ -46,11 +46,6 @@ def __init__(self,
  service: Optional[Service] = None):
  super().__init__(tunables, config, global_config, service)
 
- # TODO: Remove after implementing multi-target optimization in mlos_core
- if len(self._opt_targets) != 1:
- raise NotImplementedError(f"Multi-target optimization is not supported: {self}")
- (self._opt_target, self._opt_sign) = list(self._opt_targets.items())[0]
-
  opt_type = getattr(OptimizerType, self._config.pop(
  'optimizer_type', DEFAULT_OPTIMIZER_TYPE.name))
 
@@ -79,6 +74,7 @@ def __init__(self,
 
  self._opt: BaseOptimizer = OptimizerFactory.create(
  parameter_space=self.config_space,
+ optimization_targets=list(self._opt_targets),
  optimizer_type=opt_type,
  optimizer_kwargs=self._config,
  space_adapter_type=space_adapter_type,
@@ -99,26 +95,43 @@ def bulk_register(self,
  configs: Sequence[dict],
  scores: Sequence[Optional[Dict[str, TunableValue]]],
  status: Optional[Sequence[Status]] = None) -> bool:
+
  if not super().bulk_register(configs, scores, status):
  return False
+
  df_configs = self._to_df(configs) # Impute missing values, if necessary
- df_scores = pd.Series(
- [self._extract_target(score) for score in scores],
- dtype=float) * self._opt_sign
+
+ df_scores = self._adjust_signs_df(
+ pd.DataFrame([{} if score is None else score for score in scores]))
+
+ opt_targets = list(self._opt_targets)
  if status is not None:
+ # Select only the completed trials, set scores for failed trials to +inf.
  df_status = pd.Series(status)
- df_scores[df_status != Status.SUCCEEDED] = float("inf")
+ # TODO: Be more flexible with values used for failed trials (not just +inf).
+ # Issue: https://github.com/microsoft/MLOS/issues/523
+ df_scores.loc[df_status != Status.SUCCEEDED, opt_targets] = float("inf")
  df_status_completed = df_status.apply(Status.is_completed)
  df_configs = df_configs[df_status_completed]
  df_scores = df_scores[df_status_completed]
- self._opt.register(df_configs, df_scores)
+
+ # TODO: Specify (in the config) which metrics to pass to the optimizer.
+ # Issue: https://github.com/microsoft/MLOS/issues/745
+ self._opt.register(df_configs, df_scores[opt_targets].astype(float))
+
  if _LOG.isEnabledFor(logging.DEBUG):
  (score, _) = self.get_best_observation()
  _LOG.debug("Warm-up END: %s :: %s", self, score)
+
  return True
 
- def _extract_target(self, scores: Optional[Dict[str, TunableValue]]) -> Optional[TunableValue]:
- return None if scores is None else scores[self._opt_target]
+ def _adjust_signs_df(self, df_scores: pd.DataFrame) -> pd.DataFrame:
+ """
+ In-place adjust the signs of the scores for MINIMIZATION problem.
+ """
+ for (opt_target, opt_dir) in self._opt_targets.items():
+ df_scores[opt_target] *= opt_dir
+ return df_scores
 
  def _to_df(self, configs: Sequence[Dict[str, TunableValue]]) -> pd.DataFrame:
  """
@@ -175,21 +188,21 @@ def suggest(self) -> TunableGroups:
 
  def register(self, tunables: TunableGroups, status: Status,
  score: Optional[Dict[str, TunableValue]] = None) -> Optional[Dict[str, float]]:
- registered_score = super().register(tunables, status, score) # With _opt_sign applied
+ registered_score = super().register(tunables, status, score) # Sign-adjusted for MINIMIZATION
  if status.is_completed():
  assert registered_score is not None
  df_config = self._to_df([tunables.get_param_values()])
  _LOG.debug("Score: %s Dataframe:\n%s", registered_score, df_config)
- self._opt.register(df_config, pd.Series([registered_score[self._opt_target]], dtype=float))
+ # TODO: Specify (in the config) which metrics to pass to the optimizer.
+ # Issue: https://github.com/microsoft/MLOS/issues/745
+ self._opt.register(df_config, pd.DataFrame([registered_score], dtype=float))
  return registered_score
 
  def get_best_observation(self) -> Union[Tuple[Dict[str, float], TunableGroups], Tuple[None, None]]:
- df_config = self._opt.get_best_observation()
+ (df_config, df_score, _df_context) = self._opt.get_best_observations()
  if len(df_config) == 0:
  return (None, None)
  params = configspace_data_to_tunable_values(df_config.iloc[0].to_dict())
- _LOG.debug("Best observation: %s", params)
- score = params.pop("score")
- assert score is not None
- score = float(score) * self._opt_sign # mlos_core always uses the `score` column
- return ({self._opt_target: score}, self._tunables.copy().assign(params))
+ scores = self._adjust_signs_df(df_score).iloc[0].to_dict()
+ _LOG.debug("Best observation: %s score: %s", params, scores)
+ return (scores, self._tunables.copy().assign(params))
diff --git a/mlos_core/mlos_core/optimizers/__init__.py b/mlos_core/mlos_core/optimizers/__init__.py
@@ -7,7 +7,7 @@
 """
 
 from enum import Enum
-from typing import Optional, TypeVar
+from typing import List, Optional, TypeVar
 
 import ConfigSpace
 
@@ -62,6 +62,7 @@ class OptimizerFactory:
  @staticmethod
  def create(*,
  parameter_space: ConfigSpace.ConfigurationSpace,
+ optimization_targets: List[str],
  optimizer_type: OptimizerType = DEFAULT_OPTIMIZER_TYPE,
  optimizer_kwargs: Optional[dict] = None,
  space_adapter_type: SpaceAdapterType = SpaceAdapterType.IDENTITY,
@@ -74,6 +75,8 @@ def create(*,
  ----------
  parameter_space : ConfigSpace.ConfigurationSpace
  Input configuration space.
+ optimization_targets : List[str]
+ The names of the optimization targets to minimize.
  optimizer_type : OptimizerType
  Optimizer class as defined by Enum.
  optimizer_kwargs : Optional[dict]
@@ -102,6 +105,7 @@ def create(*,
 
  optimizer: ConcreteOptimizer = optimizer_type.value(
  parameter_space=parameter_space,
+ optimization_targets=optimization_targets,
  space_adapter=space_adapter,
  **optimizer_kwargs
  )

diff --git a/mlos_core/mlos_core/optimizers/bayesian_optimizers/smac_optimizer.py b/mlos_core/mlos_core/optimizers/bayesian_optimizers/smac_optimizer.py
@@ -29,6 +29,7 @@ class SmacOptimizer(BaseBayesianOptimizer):
 
  def __init__(self, *, # pylint: disable=too-many-locals
  parameter_space: ConfigSpace.ConfigurationSpace,
+ optimization_targets: List[str],
  space_adapter: Optional[BaseSpaceAdapter] = None,
  seed: Optional[int] = 0,
  run_name: Optional[str] = None,
@@ -46,6 +47,9 @@ def __init__(self, *, # pylint: disable=too-many-locals
  parameter_space : ConfigSpace.ConfigurationSpace
  The parameter space to optimize.
 
+ optimization_targets : List[str]
+ The names of the optimization targets to minimize.
+
  space_adapter : BaseSpaceAdapter
  The space adapter class to employ for parameter space transformations.
 
@@ -86,6 +90,7 @@ def __init__(self, *, # pylint: disable=too-many-locals
  """
  super().__init__(
  parameter_space=parameter_space,
+ optimization_targets=optimization_targets,
  space_adapter=space_adapter,
  )
 
@@ -125,6 +130,7 @@ def __init__(self, *, # pylint: disable=too-many-locals
 
  scenario: Scenario = Scenario(
  self.optimizer_parameter_space,
+ objectives=self._optimization_targets,
  name=run_name,
  output_directory=Path(output_directory),
  deterministic=True,
@@ -186,6 +192,10 @@ def __init__(self, *, # pylint: disable=too-many-locals
  intensifier=intensifier,
  random_design=random_design,
  config_selector=config_selector,
+ multi_objective_algorithm=Optimizer_Smac.get_multi_objective_algorithm(
+ scenario,
+ # objective_weights=[1, 2], # TODO: pass weights as constructor args
+ ),
  overwrite=True,
  logging_level=False, # Use the existing logger
  )
@@ -228,15 +238,16 @@ def _dummy_target_func(config: ConfigSpace.Configuration, seed: int = 0) -> None
  # -- this planned to be fixed in some future release: https://github.com/automl/SMAC3/issues/946
  raise RuntimeError('This function should never be called.')
 
- def _register(self, configurations: pd.DataFrame, scores: pd.Series, context: Optional[pd.DataFrame] = None) -> None:
+ def _register(self, configurations: pd.DataFrame,
+ scores: pd.DataFrame, context: Optional[pd.DataFrame] = None) -> None:
  """Registers the given configurations and scores.
 
  Parameters
  ----------
  configurations : pd.DataFrame
  Dataframe of configurations / parameters. The columns are parameter names and the rows are the configurations.
 
- scores : pd.Series
+ scores : pd.DataFrame
  Scores from running the configurations. The index is the same as the index of the configurations.
 
  context : pd.DataFrame
@@ -248,10 +259,11 @@ def _register(self, configurations: pd.DataFrame, scores: pd.Series, context: Op
  warn(f"Not Implemented: Ignoring context {list(context.columns)}", UserWarning)
 
  # Register each trial (one-by-one)
- for config, score in zip(self._to_configspace_configs(configurations), scores.tolist()):
+ for (config, (_i, score)) in zip(self._to_configspace_configs(configurations), scores.iterrows()):
  # Retrieve previously generated TrialInfo (returned by .ask()) or create new TrialInfo instance
- info: TrialInfo = self.trial_info_map.get(config, TrialInfo(config=config, seed=self.base_optimizer.scenario.seed))
- value: TrialValue = TrialValue(cost=score, time=0.0, status=StatusType.SUCCESS)
+ info: TrialInfo = self.trial_info_map.get(
+ config, TrialInfo(config=config, seed=self.base_optimizer.scenario.seed))
+ value = TrialValue(cost=list(score.astype(float)), time=0.0, status=StatusType.SUCCESS)
  self.base_optimizer.tell(info, value, save=False)
 
  # Save optimizer once we register all configs
@@ -293,7 +305,7 @@ def surrogate_predict(self, configurations: pd.DataFrame, context: Optional[pd.D
  if context is not None:
  warn(f"Not Implemented: Ignoring context {list(context.columns)}", UserWarning)
  if self._space_adapter and not isinstance(self._space_adapter, IdentityAdapter):
- raise NotImplementedError()
+ raise NotImplementedError("Space adapter not supported for surrogate_predict.")
 
  # pylint: disable=protected-access
  if len(self._observations) <= self.base_optimizer._initial_design._n_configs:

diff --git a/mlos_core/mlos_core/optimizers/flaml_optimizer.py b/mlos_core/mlos_core/optimizers/flaml_optimizer.py
@@ -6,7 +6,7 @@
 Contains the FlamlOptimizer class.
 """
 
-from typing import Dict, NamedTuple, Optional, Union
+from typing import Dict, List, NamedTuple, Optional, Union
 from warnings import warn
 
 import ConfigSpace
@@ -32,17 +32,22 @@ class FlamlOptimizer(BaseOptimizer):
 
  def __init__(self, *,
  parameter_space: ConfigSpace.ConfigurationSpace,
+ optimization_targets: List[str],
  space_adapter: Optional[BaseSpaceAdapter] = None,
  low_cost_partial_config: Optional[dict] = None,
  seed: Optional[int] = None):
  """
- Create an MLOS wrapper class for FLAML.
+ Create an MLOS wrapper for FLAML.
 
  Parameters
  ----------
  parameter_space : ConfigSpace.ConfigurationSpace
  The parameter space to optimize.
 
+ optimization_targets : List[str]
+ The names of the optimization targets to minimize.
+ For FLAML it must be a list with a single element, e.g., `["score"]`.
+
  space_adapter : BaseSpaceAdapter
  The space adapter class to employ for parameter space transformations.
 
@@ -55,9 +60,14 @@ def __init__(self, *,
  """
  super().__init__(
  parameter_space=parameter_space,
+ optimization_targets=optimization_targets,
  space_adapter=space_adapter,
  )
 
+ if len(self._optimization_targets) != 1:
+ raise ValueError("FLAML does not support multi-target optimization")
+ self._flaml_optimization_target = self._optimization_targets[0]
+
  # Per upstream documentation, it is recommended to set the seed for
  # flaml at the start of its operation globally.
  if seed is not None:
@@ -72,7 +82,7 @@ def __init__(self, *,
  self.evaluated_samples: Dict[ConfigSpace.Configuration, EvaluatedSample] = {}
  self._suggested_config: Optional[dict]
 
- def _register(self, configurations: pd.DataFrame, scores: pd.Series,
+ def _register(self, configurations: pd.DataFrame, scores: pd.DataFrame,
  context: Optional[pd.DataFrame] = None) -> None:
  """Registers the given configurations and scores.
 
@@ -81,15 +91,16 @@ def _register(self, configurations: pd.DataFrame, scores: pd.Series,
  configurations : pd.DataFrame
  Dataframe of configurations / parameters. The columns are parameter names and the rows are the configurations.
 
- scores : pd.Series
+ scores : pd.DataFrame
  Scores from running the configurations. The index is the same as the index of the configurations.
 
  context : None
  Not Yet Implemented.
  """
  if context is not None:
  warn(f"Not Implemented: Ignoring context {list(context.columns)}", UserWarning)
- for (_, config), score in zip(configurations.astype('O').iterrows(), scores):
+ for (_, config), score in zip(configurations.astype('O').iterrows(),
+ scores[self._flaml_optimization_target]):
  cs_config: ConfigSpace.Configuration = ConfigSpace.Configuration(
  self.optimizer_parameter_space, values=config.to_dict())
  if cs_config in self.evaluated_samples:
@@ -140,7 +151,7 @@ def _target_function(self, config: dict) -> Union[dict, None]:
  """
  cs_config = normalize_config(self.optimizer_parameter_space, config)
  if cs_config in self.evaluated_samples:
- return {'score': self.evaluated_samples[cs_config].score}
+ return {self._flaml_optimization_target: self.evaluated_samples[cs_config].score}
 
  self._suggested_config = dict(cs_config) # Cleaned-up version of the config
  return None # Returning None stops the process
@@ -156,7 +167,8 @@ def _get_next_config(self) -> dict:
  Returns
  -------
  result: dict
- Dictionary with a single key, `score`, if config already evaluated; `None` otherwise.
+ A dictionary with a single key that is equal to the name of the optimization target,
+ if config already evaluated; `None` otherwise.
 
  Raises
  ------
@@ -182,7 +194,7 @@ def _get_next_config(self) -> dict:
  self._target_function,
  config=self.flaml_parameter_space,
  mode='min',
- metric='score',
+ metric=self._flaml_optimization_target,
  points_to_evaluate=points_to_evaluate,
  evaluated_rewards=evaluated_rewards,
  num_samples=len(points_to_evaluate) + 1,