functional redis

microsoft · Jun 14, 2024 · c29405c · c29405c
1 parent 8d2a894
commit c29405c
Show file tree

Hide file tree

Showing 8 changed files with 173 additions and 235 deletions.
diff --git a/.vscode/settings.json b/.vscode/settings.json
@@ -17,7 +17,6 @@
  // See Also:
  // - https://github.com/microsoft/vscode/issues/2809#issuecomment-1544387883
  // - mlos_bench/config/schemas/README.md
-
  {
  "fileMatch": [
  "mlos_bench/mlos_bench/tests/config/schemas/environments/test-cases/**/*.jsonc",
@@ -136,8 +135,7 @@
  // See Also .vscode/launch.json for environment variable args to pytest during debug sessions.
  // For the rest, see setup.cfg
  "python.testing.pytestArgs": [
- "--log-level=DEBUG",
  "."
  ],
  "python.testing.unittestEnabled": false
-}
+}
diff --git a/mlos_bench/mlos_bench/config/environments/apps/redis/redis.jsonc b/mlos_bench/mlos_bench/config/environments/apps/redis/redis.jsonc
@@ -45,11 +45,6 @@
  "trial_id",
  "mountPoint"
  ],
- "shell_env_params": [
- "mountPoint",
- "experiment_id",
- "trial_id"
- ],
  "setup": [
  "$mountPoint/$experiment_id/$trial_id/scripts/setup-workload.sh",
  "$mountPoint/$experiment_id/$trial_id/scripts/setup-app.sh"

diff --git a/mlos_core/mlos_core/optimizers/bayesian_optimizers/smac_optimizer.py b/mlos_core/mlos_core/optimizers/bayesian_optimizers/smac_optimizer.py
@@ -19,7 +19,6 @@
 import numpy as np
 import numpy.typing as npt
 import pandas as pd
-from mlos_core.mlos_core.optimizers.utils import filter_kwargs, to_metadata
 from mlos_core.spaces.adapters.adapter import BaseSpaceAdapter
 from mlos_core.spaces.adapters.identity_adapter import IdentityAdapter
 from smac import HyperparameterOptimizationFacade as Optimizer_Smac
@@ -45,7 +44,6 @@ def __init__(
  *, # pylint: disable=too-many-locals
  parameter_space: ConfigSpace.ConfigurationSpace,
  optimization_targets: List[str],
- objective_weights: Optional[List[float]] = None,
  space_adapter: Optional[BaseSpaceAdapter] = None,
  seed: Optional[int] = 0,
  run_name: Optional[str] = None,
@@ -71,9 +69,6 @@ def __init__(
  optimization_targets : List[str]
  The names of the optimization targets to minimize.
 
- objective_weights : Optional[List[float]]
- Optional list of weights of optimization targets.
-
  space_adapter : BaseSpaceAdapter
  The space adapter class to employ for parameter space transformations.
 
@@ -126,12 +121,11 @@ def __init__(
 
  **kwargs:
  Additional arguments to be passed to the
- facade, scenario, and intensifier
+ scenerio, and intensifier
  """
  super().__init__(
  parameter_space=parameter_space,
  optimization_targets=optimization_targets,
- objective_weights=objective_weights,
  space_adapter=space_adapter,
  )
 
@@ -140,7 +134,7 @@ def __init__(
 
  # Store for TrialInfo instances returned by .ask()
  self.trial_info_df: pd.DataFrame = pd.DataFrame(
- columns=["Configuration", "Metadata", "TrialInfo", "TrialValue"]
+ columns=["Configuration", "Context", "TrialInfo", "TrialValue"]
  )
  # The default when not specified is to use a known seed (0) to keep results reproducible.
  # However, if a `None` seed is explicitly provided, we let a random seed be produced by SMAC.
@@ -172,7 +166,7 @@ def __init__(
  n_trials=max_trials,
  seed=seed or -1, # if -1, SMAC will generate a random seed internally
  n_workers=1, # Use a single thread for evaluating trials
- **filter_kwargs(Scenario, **kwargs),
+ **SmacOptimizer._filter_kwargs(Scenario, **kwargs),
  )
 
  config_selector: ConfigSelector = facade.get_config_selector(
@@ -183,7 +177,7 @@ def __init__(
  intensifier_instance = facade.get_intensifier(scenario)
  else:
  intensifier_instance = intensifier(
- scenario, **filter_kwargs(intensifier, **kwargs)
+ scenario, **SmacOptimizer._filter_kwargs(intensifier, **kwargs)
  )
 
  # TODO: When bulk registering prior configs to rewarm the optimizer,
@@ -235,11 +229,9 @@ def __init__(
  intensifier=intensifier_instance,
  random_design=random_design,
  config_selector=config_selector,
- multi_objective_algorithm=Optimizer_Smac.get_multi_objective_algorithm(
- scenario, objective_weights=self._objective_weights),
  overwrite=True,
  logging_level=False, # Use the existing logger
- **filter_kwargs(facade, **kwargs),
+ **SmacOptimizer._filter_kwargs(facade, **kwargs),
  )
 
  self.lock = threading.Lock()
@@ -265,7 +257,33 @@ def n_random_init(self) -> int:
  return self.base_optimizer._initial_design._n_configs
 
  @staticmethod
-
+ def _filter_kwargs(function: Callable, **kwargs: Any) -> Dict[str, Any]:
+ """
+ Filters arguments provided in the kwargs dictionary to be restricted to the arguments legal for
+ the called function.
+
+ Parameters
+ ----------
+ function : Callable
+ function over which we filter kwargs for.
+ kwargs:
+ kwargs that we are filtering for the target function
+
+ Returns
+ -------
+ dict
+ kwargs with the non-legal argument filtered out
+ """
+ sig = inspect.signature(function)
+ filter_keys = [
+ param.name
+ for param in sig.parameters.values()
+ if param.kind == param.POSITIONAL_OR_KEYWORD
+ ]
+ filtered_dict = {
+ filter_key: kwargs[filter_key] for filter_key in filter_keys & kwargs.keys()
+ }
+ return filtered_dict
 
  @staticmethod
  def _dummy_target_func(
@@ -297,8 +315,7 @@ def _dummy_target_func(
  raise RuntimeError('This function should never be called.')
 
  def _register(self, configurations: pd.DataFrame,
- scores: pd.DataFrame, context: Optional[pd.DataFrame] = None, 
- metadata: Optional[pd.DataFrame] = None) -> None:
+ scores: pd.DataFrame, context: Optional[pd.DataFrame] = None) -> None:
  """Registers the given configurations and scores.
 
  Parameters
@@ -309,24 +326,18 @@ def _register(self, configurations: pd.DataFrame,
  scores : pd.DataFrame
  Scores from running the configurations. The index is the same as the index of the configurations.
 
- metadata : pd.DataFrame
- Metadata of the request that is being registered.
- 
  context : pd.DataFrame
- Not Yet Implemented.
+ Context of the request that is being registered.
  """
- if context is not None:
- warn(f"Not Implemented: Ignoring context {list(context.columns)}", UserWarning)
-
  with self.lock:
  # Register each trial (one-by-one)
- metadatas: Union[List[pd.Series], List[None]] = to_metadata(metadata) or [
+ contexts: Union[List[pd.Series], List[None]] = _to_context(context) or [
  None for _ in scores # type: ignore[misc]
  ]
  for config, score, ctx in zip(
  self._to_configspace_configs(configurations),
  scores.values.tolist(),
- metadatas,
+ contexts,
  ):
  value: TrialValue = TrialValue(
  cost=score, time=0.0, status=StatusType.SUCCESS
@@ -339,7 +350,7 @@ def _register(self, configurations: pd.DataFrame,
  matching = (
  self.trial_info_df["Configuration"] == config
  ) & pd.Series(
- [df_ctx.equals(ctx) for df_ctx in self.trial_info_df["Metadata"]]
+ [df_ctx.equals(ctx) for df_ctx in self.trial_info_df["Context"]]
  )
 
  # make a new entry
@@ -392,8 +403,8 @@ def _suggest(
  configuration : pd.DataFrame
  Pandas dataframe with a single row. Column names are the parameter names.
 
- metadata : pd.DataFrame
- Pandas dataframe with a single row containing the metadata.
+ context : pd.DataFrame
+ Pandas dataframe with a single row containing the context.
  Column names are the budget, seed, and instance of the evaluation, if valid.
  """
  with self.lock:
@@ -408,17 +419,17 @@ def _suggest(
  self.optimizer_parameter_space.check_configuration(trial.config)
  assert trial.config.config_space == self.optimizer_parameter_space
 
- config_df = _extract_config(trial)
- metadata_df = _extract_metadata(trial)
+ config_df = self._extract_config(trial)
+ context_df = SmacOptimizer._extract_context(trial)
 
  self.trial_info_df.loc[len(self.trial_info_df.index)] = [
  trial.config,
- metadata_df.iloc[0],
+ context_df.iloc[0],
  trial,
  None,
  ]
 
- return config_df, metadata_df
+ return config_df, context_df
 
  def register_pending(self, configurations: pd.DataFrame, context: Optional[pd.DataFrame] = None) -> None:
  raise NotImplementedError()
@@ -482,6 +493,31 @@ def _to_configspace_configs(self, configurations: pd.DataFrame) -> List[ConfigSp
  for (_, config) in configurations.astype('O').iterrows()
  ]
 
+ @staticmethod
+ def _extract_context(trial: TrialInfo) -> pd.DataFrame:
+ """Convert TrialInfo to a DataFrame.
+
+ Parameters
+ ----------
+ trial : TrialInfo
+ The trial to extract.
+
+ Returns
+ -------
+ context : pd.DataFrame
+ Pandas dataframe with a single row containing the context.
+ Column names are the budget and instance of the evaluation, if valid.
+ """
+ return pd.DataFrame(
+ [[trial.instance, trial.seed, trial.budget]],
+ columns=["instance", "seed", "budget"],
+ )
+
+ def _extract_config(self, trial: TrialInfo) -> pd.DataFrame:
+ return pd.DataFrame(
+ [trial.config], columns=list(self.optimizer_parameter_space.keys())
+ )
+
  def get_observations_full(self) -> pd.DataFrame:
  """Returns the observations as a dataframe with additional info.
 
@@ -510,18 +546,18 @@ def get_best_observation(self) -> pd.DataFrame:
 
  max_budget = np.nan
  budgets = [
- metadata["budget"].max()
- for _, _, metadata in self._observations
- if metadata is not None
+ context["budget"].max()
+ for _, _, context in self._observations
+ if context is not None
  ]
  if len(budgets) > 0:
  max_budget = max(budgets)
 
  if max_budget is not np.nan:
  observations = [
- (config, score, metadata)
- for config, score, metadata in self._observations
- if metadata is not None and metadata["budget"].max() == max_budget
+ (config, score, context)
+ for config, score, context in self._observations
+ if context is not None and context["budget"].max() == max_budget
  ]
 
  configs = pd.concat([config for config, _, _ in observations])
@@ -530,39 +566,8 @@ def get_best_observation(self) -> pd.DataFrame:
 
  return configs.nsmallest(1, columns="score")
 
-def _extract_metadata(trial: TrialInfo) -> pd.DataFrame:
- """Convert TrialInfo to a metadata DataFrame.
 
- Parameters
- ----------
- trial : TrialInfo
- The trial to extract.
-
- Returns
- -------
- metadata : pd.DataFrame
- Pandas dataframe with a single row containing the metadata.
- Column names are the budget and instance of the evaluation, if valid.
- """
- return pd.DataFrame(
- [[trial.instance, trial.seed, trial.budget]],
- columns=["instance", "seed", "budget"],
- )
-
-def _extract_config(self, trial: TrialInfo) -> pd.DataFrame:
- """Convert TrialInfo to a config DataFrame.
-
- Parameters
- ----------
- trial : TrialInfo
- The trial to extract.
-
- Returns
- -------
- config : pd.DataFrame
- Pandas dataframe with a single row containing the config.
- Column names are config parameters
- """
- return pd.DataFrame(
- [trial.config], columns=list(self.optimizer_parameter_space.keys())
- )
+def _to_context(contexts: Optional[pd.DataFrame]) -> Optional[List[pd.Series]]:
+ if contexts is None:
+ return None
+ return [idx_series[1] for idx_series in contexts.iterrows()]