Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add Metadata to optimizers #770

Merged
merged 18 commits into from
Jul 1, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion mlos_bench/mlos_bench/optimizers/mlos_core_optimizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -180,7 +180,7 @@ def suggest(self) -> TunableGroups:
tunables = super().suggest()
if self._start_with_defaults:
_LOG.info("Use default values for the first trial")
df_config = self._opt.suggest(defaults=self._start_with_defaults)
df_config, _metadata = self._opt.suggest(defaults=self._start_with_defaults)
self._start_with_defaults = False
_LOG.info("Iteration %d :: Suggest:\n%s", self._iter, df_config)
return tunables.assign(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@

from logging import warning
from pathlib import Path
from typing import Dict, List, Optional, Union, TYPE_CHECKING
from typing import Dict, List, Optional, Tuple, Union, TYPE_CHECKING
from tempfile import TemporaryDirectory
from warnings import warn

Expand Down Expand Up @@ -242,7 +242,7 @@ def _dummy_target_func(config: ConfigSpace.Configuration, seed: int = 0) -> None
raise RuntimeError('This function should never be called.')

def _register(self, *, configs: pd.DataFrame,
scores: pd.DataFrame, context: Optional[pd.DataFrame] = None) -> None:
scores: pd.DataFrame, context: Optional[pd.DataFrame] = None, metadata: Optional[pd.DataFrame] = None) -> None:
"""Registers the given configs and scores.

Parameters
Expand All @@ -255,6 +255,9 @@ def _register(self, *, configs: pd.DataFrame,

context : pd.DataFrame
Not Yet Implemented.

metadata: pd.DataFrame
Not Yet Implemented.
"""
from smac.runhistory import StatusType, TrialInfo, TrialValue # pylint: disable=import-outside-toplevel

Expand All @@ -272,7 +275,7 @@ def _register(self, *, configs: pd.DataFrame,
# Save optimizer once we register all configs
self.base_optimizer.optimizer.save()

def _suggest(self, *, context: Optional[pd.DataFrame] = None) -> pd.DataFrame:
def _suggest(self, *, context: Optional[pd.DataFrame] = None) -> Tuple[pd.DataFrame, Optional[pd.DataFrame]]:
"""Suggests a new configuration.

Parameters
Expand All @@ -284,6 +287,9 @@ def _suggest(self, *, context: Optional[pd.DataFrame] = None) -> pd.DataFrame:
-------
configuration : pd.DataFrame
Pandas dataframe with a single row. Column names are the parameter names.

metadata : Optional[pd.DataFrame]
Not yet implemented.
"""
if TYPE_CHECKING:
from smac.runhistory import TrialInfo # pylint: disable=import-outside-toplevel,unused-import
Expand All @@ -297,9 +303,11 @@ def _suggest(self, *, context: Optional[pd.DataFrame] = None) -> pd.DataFrame:
assert trial.config.config_space == self.optimizer_parameter_space
self.trial_info_map[trial.config] = trial
config_df = pd.DataFrame([trial.config], columns=list(self.optimizer_parameter_space.keys()))
return config_df
return config_df, None

def register_pending(self, *, configs: pd.DataFrame, context: Optional[pd.DataFrame] = None) -> None:
def register_pending(self, *, configs: pd.DataFrame,
context: Optional[pd.DataFrame] = None,
metadata: Optional[pd.DataFrame] = None) -> None:
raise NotImplementedError()

def surrogate_predict(self, *, configs: pd.DataFrame, context: Optional[pd.DataFrame] = None) -> npt.NDArray:
Expand Down
19 changes: 14 additions & 5 deletions mlos_core/mlos_core/optimizers/flaml_optimizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
Contains the FlamlOptimizer class.
"""

from typing import Dict, List, NamedTuple, Optional, Union
from typing import Dict, List, NamedTuple, Optional, Tuple, Union
from warnings import warn

import ConfigSpace
Expand Down Expand Up @@ -86,7 +86,7 @@ def __init__(self, *, # pylint: disable=too-many-arguments
self._suggested_config: Optional[dict]

def _register(self, *, configs: pd.DataFrame, scores: pd.DataFrame,
context: Optional[pd.DataFrame] = None) -> None:
context: Optional[pd.DataFrame] = None, metadata: Optional[pd.DataFrame] = None) -> None:
"""Registers the given configs and scores.

Parameters
Expand All @@ -99,9 +99,15 @@ def _register(self, *, configs: pd.DataFrame, scores: pd.DataFrame,

context : None
Not Yet Implemented.

metadata : None
Not Yet Implemented.
"""
if context is not None:
warn(f"Not Implemented: Ignoring context {list(context.columns)}", UserWarning)
if metadata is not None:
warn(f"Not Implemented: Ignoring metadata {list(metadata.columns)}", UserWarning)

for (_, config), (_, score) in zip(configs.astype('O').iterrows(), scores.iterrows()):
cs_config: ConfigSpace.Configuration = ConfigSpace.Configuration(
self.optimizer_parameter_space, values=config.to_dict())
Expand All @@ -112,7 +118,7 @@ def _register(self, *, configs: pd.DataFrame, scores: pd.DataFrame,
score=float(np.average(score.astype(float), weights=self._objective_weights)),
)

def _suggest(self, *, context: Optional[pd.DataFrame] = None) -> pd.DataFrame:
def _suggest(self, *, context: Optional[pd.DataFrame] = None) -> Tuple[pd.DataFrame, Optional[pd.DataFrame]]:
"""Suggests a new configuration.

Sampled at random using ConfigSpace.
Expand All @@ -126,14 +132,17 @@ def _suggest(self, *, context: Optional[pd.DataFrame] = None) -> pd.DataFrame:
-------
configuration : pd.DataFrame
Pandas dataframe with a single row. Column names are the parameter names.

metadata : None
Not implemented.
"""
if context is not None:
warn(f"Not Implemented: Ignoring context {list(context.columns)}", UserWarning)
config: dict = self._get_next_config()
return pd.DataFrame(config, index=[0])
return pd.DataFrame(config, index=[0]), None

def register_pending(self, *, configs: pd.DataFrame,
context: Optional[pd.DataFrame] = None) -> None:
context: Optional[pd.DataFrame] = None, metadata: Optional[pd.DataFrame] = None) -> None:
raise NotImplementedError()

def _target_function(self, config: dict) -> Union[dict, None]:
Expand Down
26 changes: 19 additions & 7 deletions mlos_core/mlos_core/optimizers/optimizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ def space_adapter(self) -> Optional[BaseSpaceAdapter]:
return self._space_adapter

def register(self, *, configs: pd.DataFrame, scores: pd.DataFrame,
context: Optional[pd.DataFrame] = None) -> None:
context: Optional[pd.DataFrame] = None, metadata: Optional[pd.DataFrame] = None) -> None:
"""Wrapper method, which employs the space adapter (if any), before registering the configs and scores.

Parameters
Expand All @@ -81,8 +81,12 @@ def register(self, *, configs: pd.DataFrame, scores: pd.DataFrame,

context : pd.DataFrame
Not Yet Implemented.

metadata : Optional[pd.DataFrame]
Not Yet Implemented.
"""
# Do some input validation.
assert metadata is None or isinstance(metadata, pd.DataFrame)
assert set(scores.columns) == set(self._optimization_targets), \
"Mismatched optimization targets."
assert self._has_context is None or self._has_context ^ (context is None), \
Expand All @@ -105,7 +109,7 @@ def register(self, *, configs: pd.DataFrame, scores: pd.DataFrame,

@abstractmethod
def _register(self, *, configs: pd.DataFrame, scores: pd.DataFrame,
context: Optional[pd.DataFrame] = None) -> None:
context: Optional[pd.DataFrame] = None, metadata: Optional[pd.DataFrame] = None) -> None:
"""Registers the given configs and scores.

Parameters
Expand All @@ -120,7 +124,8 @@ def _register(self, *, configs: pd.DataFrame, scores: pd.DataFrame,
"""
pass # pylint: disable=unnecessary-pass # pragma: no cover

def suggest(self, *, context: Optional[pd.DataFrame] = None, defaults: bool = False) -> pd.DataFrame:
def suggest(self, *, context: Optional[pd.DataFrame] = None,
defaults: bool = False) -> Tuple[pd.DataFrame, Optional[pd.DataFrame]]:
"""
Wrapper method, which employs the space adapter (if any), after suggesting a new configuration.

Expand All @@ -139,10 +144,11 @@ def suggest(self, *, context: Optional[pd.DataFrame] = None, defaults: bool = Fa
"""
if defaults:
configuration = config_to_dataframe(self.parameter_space.get_default_configuration())
metadata = None
if self.space_adapter is not None:
configuration = self.space_adapter.inverse_transform(configuration)
else:
configuration = self._suggest(context=context)
configuration, metadata = self._suggest(context=context)
assert len(configuration) == 1, \
"Suggest must return a single configuration."
assert set(configuration.columns).issubset(set(self.optimizer_parameter_space)), \
Expand All @@ -151,10 +157,10 @@ def suggest(self, *, context: Optional[pd.DataFrame] = None, defaults: bool = Fa
configuration = self._space_adapter.transform(configuration)
assert set(configuration.columns).issubset(set(self.parameter_space)), \
"Space adapter produced a configuration that does not match the expected parameter space."
return configuration
return configuration, metadata

@abstractmethod
def _suggest(self, *, context: Optional[pd.DataFrame] = None) -> pd.DataFrame:
def _suggest(self, *, context: Optional[pd.DataFrame] = None) -> Tuple[pd.DataFrame, Optional[pd.DataFrame]]:
"""Suggests a new configuration.

Parameters
Expand All @@ -166,12 +172,16 @@ def _suggest(self, *, context: Optional[pd.DataFrame] = None) -> pd.DataFrame:
-------
configuration : pd.DataFrame
Pandas dataframe with a single row. Column names are the parameter names.

metadata : Optional[pd.DataFrame]
The metadata associated with the given configuration used for evaluations.
"""
pass # pylint: disable=unnecessary-pass # pragma: no cover

@abstractmethod
def register_pending(self, *, configs: pd.DataFrame,
context: Optional[pd.DataFrame] = None) -> None:
context: Optional[pd.DataFrame] = None,
metadata: Optional[pd.DataFrame] = None) -> None:
"""Registers the given configs as "pending".
That is it say, it has been suggested by the optimizer, and an experiment trial has been started.
This can be useful for executing multiple trials in parallel, retry logic, etc.
Expand All @@ -182,6 +192,8 @@ def register_pending(self, *, configs: pd.DataFrame,
Dataframe of configs / parameters. The columns are parameter names and the rows are the configs.
context : pd.DataFrame
Not Yet Implemented.
metadata : Optional[pd.DataFrame]
Not Yet Implemented.
"""
pass # pylint: disable=unnecessary-pass # pragma: no cover

Expand Down
18 changes: 13 additions & 5 deletions mlos_core/mlos_core/optimizers/random_optimizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
Contains the RandomOptimizer class.
"""

from typing import Optional
from typing import Optional, Tuple
from warnings import warn

import pandas as pd
Expand All @@ -25,7 +25,7 @@ class RandomOptimizer(BaseOptimizer):
"""

def _register(self, *, configs: pd.DataFrame, scores: pd.DataFrame,
context: Optional[pd.DataFrame] = None) -> None:
context: Optional[pd.DataFrame] = None, metadata: Optional[pd.DataFrame] = None) -> None:
"""Registers the given configs and scores.

Doesn't do anything on the RandomOptimizer except storing configs for logging.
Expand All @@ -40,12 +40,17 @@ def _register(self, *, configs: pd.DataFrame, scores: pd.DataFrame,

context : None
Not Yet Implemented.

metadata : None
Not Yet Implemented.
"""
if context is not None:
warn(f"Not Implemented: Ignoring context {list(context.columns)}", UserWarning)
if metadata is not None:
warn(f"Not Implemented: Ignoring context {list(metadata.columns)}", UserWarning)
# should we pop them from self.pending_observations?

def _suggest(self, *, context: Optional[pd.DataFrame] = None) -> pd.DataFrame:
def _suggest(self, *, context: Optional[pd.DataFrame] = None) -> Tuple[pd.DataFrame, Optional[pd.DataFrame]]:
"""Suggests a new configuration.

Sampled at random using ConfigSpace.
Expand All @@ -59,13 +64,16 @@ def _suggest(self, *, context: Optional[pd.DataFrame] = None) -> pd.DataFrame:
-------
configuration : pd.DataFrame
Pandas dataframe with a single row. Column names are the parameter names.

metadata : None
Not implemented.
"""
if context is not None:
# not sure how that works here?
warn(f"Not Implemented: Ignoring context {list(context.columns)}", UserWarning)
return pd.DataFrame(dict(self.optimizer_parameter_space.sample_configuration()), index=[0])
return pd.DataFrame(dict(self.optimizer_parameter_space.sample_configuration()), index=[0]), None

def register_pending(self, *, configs: pd.DataFrame,
context: Optional[pd.DataFrame] = None) -> None:
context: Optional[pd.DataFrame] = None, metadata: Optional[pd.DataFrame] = None) -> None:
raise NotImplementedError()
# self._pending_observations.append((configs, context))
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ def test_context_not_implemented_warning(configuration_space: CS.ConfigurationSp
optimization_targets=['score'],
**kwargs
)
suggestion = optimizer.suggest()
suggestion, _metadata = optimizer.suggest()
scores = pd.DataFrame({'score': [1]})
context = pd.DataFrame([["something"]])

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -84,8 +84,9 @@ def objective(point: pd.DataFrame) -> pd.DataFrame:
optimizer.get_observations()

for _ in range(max_iterations):
suggestion = optimizer.suggest()
suggestion, metadata = optimizer.suggest()
assert isinstance(suggestion, pd.DataFrame)
assert metadata is None or isinstance(metadata, pd.DataFrame)
assert set(suggestion.columns) == {'x', 'y'}
# Check suggestion values are the expected dtype
assert isinstance(suggestion.x.iloc[0], np.integer)
Expand Down
21 changes: 11 additions & 10 deletions mlos_core/mlos_core/tests/optimizers/optimizer_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,15 +48,15 @@ def test_create_optimizer_and_suggest(configuration_space: CS.ConfigurationSpace

assert optimizer.parameter_space is not None

suggestion = optimizer.suggest()
suggestion, metadata = optimizer.suggest()
assert suggestion is not None

myrepr = repr(optimizer)
assert myrepr.startswith(optimizer_class.__name__)

# pending not implemented
with pytest.raises(NotImplementedError):
optimizer.register_pending(configs=suggestion)
optimizer.register_pending(configs=suggestion, metadata=metadata)


@pytest.mark.parametrize(('optimizer_class', 'kwargs'), [
Expand Down Expand Up @@ -94,16 +94,17 @@ def objective(x: pd.Series) -> pd.DataFrame:
optimizer.get_observations()

for _ in range(max_iterations):
suggestion = optimizer.suggest()
suggestion, metadata = optimizer.suggest()
assert isinstance(suggestion, pd.DataFrame)
assert metadata is None or isinstance(metadata, pd.DataFrame)
assert set(suggestion.columns) == {'x', 'y', 'z'}
# check that suggestion is in the space
configuration = CS.Configuration(optimizer.parameter_space, suggestion.iloc[0].to_dict())
# Raises an error if outside of configuration space
configuration.is_valid_configuration()
observation = objective(suggestion['x'])
assert isinstance(observation, pd.DataFrame)
optimizer.register(configs=suggestion, scores=observation)
optimizer.register(configs=suggestion, scores=observation, metadata=metadata)

(best_config, best_score, best_context) = optimizer.get_best_observations()
assert isinstance(best_config, pd.DataFrame)
Expand Down Expand Up @@ -268,16 +269,16 @@ def objective(point: pd.DataFrame) -> pd.DataFrame:
_LOG.debug("Optimizer is done with random init.")

# loop for optimizer
suggestion = optimizer.suggest()
suggestion, metadata = optimizer.suggest()
observation = objective(suggestion)
optimizer.register(configs=suggestion, scores=observation)
optimizer.register(configs=suggestion, scores=observation, metadata=metadata)

# loop for llamatune-optimizer
suggestion = llamatune_optimizer.suggest()
suggestion, metadata = llamatune_optimizer.suggest()
_x, _y = suggestion['x'].iloc[0], suggestion['y'].iloc[0]
assert _x == pytest.approx(_y, rel=1e-3) or _x + _y == pytest.approx(3., rel=1e-3) # optimizer explores 1-dimensional space
observation = objective(suggestion)
llamatune_optimizer.register(configs=suggestion, scores=observation)
llamatune_optimizer.register(configs=suggestion, scores=observation, metadata=metadata)

# Retrieve best observations
best_observation = optimizer.get_best_observations()
Expand Down Expand Up @@ -375,7 +376,7 @@ def objective(point: pd.DataFrame) -> pd.DataFrame:
optimizer.get_observations()

for _ in range(max_iterations):
suggestion = optimizer.suggest()
suggestion, metadata = optimizer.suggest()
assert isinstance(suggestion, pd.DataFrame)
assert (suggestion.columns == ['x', 'y']).all()
# Check suggestion values are the expected dtype
Expand All @@ -388,7 +389,7 @@ def objective(point: pd.DataFrame) -> pd.DataFrame:
# Test registering the suggested configuration with a score.
observation = objective(suggestion)
assert isinstance(observation, pd.DataFrame)
optimizer.register(configs=suggestion, scores=observation)
optimizer.register(configs=suggestion, scores=observation, metadata=metadata)

(best_config, best_score, best_context) = optimizer.get_best_observations()
assert isinstance(best_config, pd.DataFrame)
Expand Down
Loading