Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Require explicit arguments for mlos_core optimizers #760

Merged
merged 11 commits into from
Jun 28, 2024
4 changes: 2 additions & 2 deletions mlos_bench/mlos_bench/optimizers/mlos_core_optimizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,7 @@ def bulk_register(self,

# TODO: Specify (in the config) which metrics to pass to the optimizer.
# Issue: https://github.com/microsoft/MLOS/issues/745
self._opt.register(df_configs, df_scores[opt_targets].astype(float))
self._opt.register(configurations=df_configs, scores=df_scores[opt_targets].astype(float))
jsfreischuetz marked this conversation as resolved.
Show resolved Hide resolved

if _LOG.isEnabledFor(logging.DEBUG):
(score, _) = self.get_best_observation()
Expand Down Expand Up @@ -195,7 +195,7 @@ def register(self, tunables: TunableGroups, status: Status,
_LOG.debug("Score: %s Dataframe:\n%s", registered_score, df_config)
# TODO: Specify (in the config) which metrics to pass to the optimizer.
# Issue: https://github.com/microsoft/MLOS/issues/745
self._opt.register(df_config, pd.DataFrame([registered_score], dtype=float))
self._opt.register(configurations=df_config, scores=pd.DataFrame([registered_score], dtype=float))
return registered_score

def get_best_observation(self) -> Union[Tuple[Dict[str, float], TunableGroups], Tuple[None, None]]:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ def _optimize(env: Environment, opt: Optimizer) -> Tuple[float, TunableGroups]:
config_df = config_to_dataframe(config)
logger("config: %s", str(config))
try:
logger("prediction: %s", opt._opt.surrogate_predict(config_df))
logger("prediction: %s", opt._opt.surrogate_predict(configurations=config_df))
except RuntimeError:
pass

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ class BaseBayesianOptimizer(BaseOptimizer, metaclass=ABCMeta):
"""Abstract base class defining the interface for Bayesian optimization."""

@abstractmethod
def surrogate_predict(self, configurations: pd.DataFrame,
def surrogate_predict(self, *, configurations: pd.DataFrame,
context: Optional[pd.DataFrame] = None) -> npt.NDArray:
"""Obtain a prediction from this Bayesian optimizer's surrogate model for the given configuration(s).

Expand All @@ -35,7 +35,7 @@ def surrogate_predict(self, configurations: pd.DataFrame,
pass # pylint: disable=unnecessary-pass # pragma: no cover

@abstractmethod
def acquisition_function(self, configurations: pd.DataFrame,
def acquisition_function(self, *, configurations: pd.DataFrame,
context: Optional[pd.DataFrame] = None) -> npt.NDArray:
"""Invokes the acquisition function from this Bayesian optimizer for the given configuration.

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -241,7 +241,7 @@ def _dummy_target_func(config: ConfigSpace.Configuration, seed: int = 0) -> None
# -- this planned to be fixed in some future release: https://github.com/automl/SMAC3/issues/946
raise RuntimeError('This function should never be called.')

def _register(self, configurations: pd.DataFrame,
def _register(self, *, configurations: pd.DataFrame,
scores: pd.DataFrame, context: Optional[pd.DataFrame] = None) -> None:
"""Registers the given configurations and scores.

Expand All @@ -262,7 +262,7 @@ def _register(self, configurations: pd.DataFrame,
warn(f"Not Implemented: Ignoring context {list(context.columns)}", UserWarning)

# Register each trial (one-by-one)
for (config, (_i, score)) in zip(self._to_configspace_configs(configurations), scores.iterrows()):
for (config, (_i, score)) in zip(self._to_configspace_configs(configurations=configurations), scores.iterrows()):
# Retrieve previously generated TrialInfo (returned by .ask()) or create new TrialInfo instance
info: TrialInfo = self.trial_info_map.get(
config, TrialInfo(config=config, seed=self.base_optimizer.scenario.seed))
Expand All @@ -272,7 +272,7 @@ def _register(self, configurations: pd.DataFrame,
# Save optimizer once we register all configs
self.base_optimizer.optimizer.save()

def _suggest(self, context: Optional[pd.DataFrame] = None) -> pd.DataFrame:
def _suggest(self, *, context: Optional[pd.DataFrame] = None) -> pd.DataFrame:
"""Suggests a new configuration.

Parameters
Expand All @@ -299,10 +299,10 @@ def _suggest(self, context: Optional[pd.DataFrame] = None) -> pd.DataFrame:
config_df = pd.DataFrame([trial.config], columns=list(self.optimizer_parameter_space.keys()))
return config_df

def register_pending(self, configurations: pd.DataFrame, context: Optional[pd.DataFrame] = None) -> None:
def register_pending(self, *, configurations: pd.DataFrame, context: Optional[pd.DataFrame] = None) -> None:
raise NotImplementedError()

def surrogate_predict(self, configurations: pd.DataFrame, context: Optional[pd.DataFrame] = None) -> npt.NDArray:
def surrogate_predict(self, *, configurations: pd.DataFrame, context: Optional[pd.DataFrame] = None) -> npt.NDArray:
from smac.utils.configspace import convert_configurations_to_array # pylint: disable=import-outside-toplevel

if context is not None:
Expand All @@ -318,11 +318,11 @@ def surrogate_predict(self, configurations: pd.DataFrame, context: Optional[pd.D
if self.base_optimizer._config_selector._model is None:
raise RuntimeError('Surrogate model is not yet trained')

configs: npt.NDArray = convert_configurations_to_array(self._to_configspace_configs(configurations))
configs: npt.NDArray = convert_configurations_to_array(self._to_configspace_configs(configurations=configurations))
mean_predictions, _ = self.base_optimizer._config_selector._model.predict(configs)
return mean_predictions.reshape(-1,)

def acquisition_function(self, configurations: pd.DataFrame, context: Optional[pd.DataFrame] = None) -> npt.NDArray:
def acquisition_function(self, *, configurations: pd.DataFrame, context: Optional[pd.DataFrame] = None) -> npt.NDArray:
if context is not None:
warn(f"Not Implemented: Ignoring context {list(context.columns)}", UserWarning)
if self._space_adapter:
Expand All @@ -332,15 +332,15 @@ def acquisition_function(self, configurations: pd.DataFrame, context: Optional[p
if self.base_optimizer._config_selector._acquisition_function is None:
raise RuntimeError('Acquisition function is not yet initialized')

configs: list = self._to_configspace_configs(configurations)
configs: list = self._to_configspace_configs(configurations=configurations)
return self.base_optimizer._config_selector._acquisition_function(configs).reshape(-1,)

def cleanup(self) -> None:
if self._temp_output_directory is not None:
self._temp_output_directory.cleanup()
self._temp_output_directory = None

def _to_configspace_configs(self, configurations: pd.DataFrame) -> List[ConfigSpace.Configuration]:
def _to_configspace_configs(self, *, configurations: pd.DataFrame) -> List[ConfigSpace.Configuration]:
"""Convert a dataframe of configurations to a list of ConfigSpace configurations.

Parameters
Expand Down
6 changes: 3 additions & 3 deletions mlos_core/mlos_core/optimizers/flaml_optimizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@ def __init__(self, *, # pylint: disable=too-many-arguments
self.evaluated_samples: Dict[ConfigSpace.Configuration, EvaluatedSample] = {}
self._suggested_config: Optional[dict]

def _register(self, configurations: pd.DataFrame, scores: pd.DataFrame,
def _register(self, *, configurations: pd.DataFrame, scores: pd.DataFrame,
context: Optional[pd.DataFrame] = None) -> None:
"""Registers the given configurations and scores.

Expand All @@ -112,7 +112,7 @@ def _register(self, configurations: pd.DataFrame, scores: pd.DataFrame,
score=float(np.average(score.astype(float), weights=self._objective_weights)),
)

def _suggest(self, context: Optional[pd.DataFrame] = None) -> pd.DataFrame:
def _suggest(self, *, context: Optional[pd.DataFrame] = None) -> pd.DataFrame:
"""Suggests a new configuration.

Sampled at random using ConfigSpace.
Expand All @@ -132,7 +132,7 @@ def _suggest(self, context: Optional[pd.DataFrame] = None) -> pd.DataFrame:
config: dict = self._get_next_config()
return pd.DataFrame(config, index=[0])

def register_pending(self, configurations: pd.DataFrame,
def register_pending(self, *, configurations: pd.DataFrame,
context: Optional[pd.DataFrame] = None) -> None:
raise NotImplementedError()

Expand Down
20 changes: 10 additions & 10 deletions mlos_core/mlos_core/optimizers/optimizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ def space_adapter(self) -> Optional[BaseSpaceAdapter]:
"""Get the space adapter instance (if any)."""
return self._space_adapter

def register(self, configurations: pd.DataFrame, scores: pd.DataFrame,
def register(self, *, configurations: pd.DataFrame, scores: pd.DataFrame,
context: Optional[pd.DataFrame] = None) -> None:
"""Wrapper method, which employs the space adapter (if any), before registering the configurations and scores.

Expand Down Expand Up @@ -101,10 +101,10 @@ def register(self, configurations: pd.DataFrame, scores: pd.DataFrame,
configurations = self._space_adapter.inverse_transform(configurations)
assert configurations.shape[1] == len(self.optimizer_parameter_space.values()), \
"Mismatched configuration shape after inverse transform."
return self._register(configurations, scores, context)
return self._register(configurations=configurations, scores=scores, context=context)

@abstractmethod
def _register(self, configurations: pd.DataFrame, scores: pd.DataFrame,
def _register(self, *, configurations: pd.DataFrame, scores: pd.DataFrame,
context: Optional[pd.DataFrame] = None) -> None:
"""Registers the given configurations and scores.

Expand All @@ -120,7 +120,7 @@ def _register(self, configurations: pd.DataFrame, scores: pd.DataFrame,
"""
pass # pylint: disable=unnecessary-pass # pragma: no cover

def suggest(self, context: Optional[pd.DataFrame] = None, defaults: bool = False) -> pd.DataFrame:
def suggest(self, *, context: Optional[pd.DataFrame] = None, defaults: bool = False) -> pd.DataFrame:
"""
Wrapper method, which employs the space adapter (if any), after suggesting a new configuration.

Expand All @@ -142,7 +142,7 @@ def suggest(self, context: Optional[pd.DataFrame] = None, defaults: bool = False
if self.space_adapter is not None:
configuration = self.space_adapter.inverse_transform(configuration)
else:
configuration = self._suggest(context)
configuration = self._suggest(context=context)
assert len(configuration) == 1, \
"Suggest must return a single configuration."
assert set(configuration.columns).issubset(set(self.optimizer_parameter_space)), \
Expand All @@ -154,7 +154,7 @@ def suggest(self, context: Optional[pd.DataFrame] = None, defaults: bool = False
return configuration

@abstractmethod
def _suggest(self, context: Optional[pd.DataFrame] = None) -> pd.DataFrame:
def _suggest(self, *, context: Optional[pd.DataFrame] = None) -> pd.DataFrame:
"""Suggests a new configuration.

Parameters
Expand All @@ -170,7 +170,7 @@ def _suggest(self, context: Optional[pd.DataFrame] = None) -> pd.DataFrame:
pass # pylint: disable=unnecessary-pass # pragma: no cover

@abstractmethod
def register_pending(self, configurations: pd.DataFrame,
def register_pending(self, *, configurations: pd.DataFrame,
context: Optional[pd.DataFrame] = None) -> None:
"""Registers the given configurations as "pending".
That is it say, it has been suggested by the optimizer, and an experiment trial has been started.
Expand Down Expand Up @@ -202,7 +202,7 @@ def get_observations(self) -> Tuple[pd.DataFrame, pd.DataFrame, Optional[pd.Data
for _, _, context in self._observations]).reset_index(drop=True)
return (configs, scores, contexts if len(contexts.columns) > 0 else None)

def get_best_observations(self, n_max: int = 1) -> Tuple[pd.DataFrame, pd.DataFrame, Optional[pd.DataFrame]]:
def get_best_observations(self, *, n_max: int = 1) -> Tuple[pd.DataFrame, pd.DataFrame, Optional[pd.DataFrame]]:
"""
Get the N best observations so far as a triplet of DataFrames (config, score, context).
Default is N=1. The columns are ordered in ASCENDING order of the optimization targets.
Expand Down Expand Up @@ -231,7 +231,7 @@ def cleanup(self) -> None:
Redefine this method in optimizers that require cleanup.
"""

def _from_1hot(self, config: npt.NDArray) -> pd.DataFrame:
def _from_1hot(self, *, config: npt.NDArray) -> pd.DataFrame:
jsfreischuetz marked this conversation as resolved.
Show resolved Hide resolved
"""
Convert numpy array from one-hot encoding to a DataFrame
with categoricals and ints in proper columns.
Expand All @@ -254,7 +254,7 @@ def _from_1hot(self, config: npt.NDArray) -> pd.DataFrame:
j += 1
return pd.DataFrame(df_dict)

def _to_1hot(self, config: Union[pd.DataFrame, pd.Series]) -> npt.NDArray:
def _to_1hot(self, *, config: Union[pd.DataFrame, pd.Series]) -> npt.NDArray:
jsfreischuetz marked this conversation as resolved.
Show resolved Hide resolved
"""
Convert pandas DataFrame to one-hot-encoded numpy array.
"""
Expand Down
6 changes: 3 additions & 3 deletions mlos_core/mlos_core/optimizers/random_optimizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ class RandomOptimizer(BaseOptimizer):
The parameter space to optimize.
"""

def _register(self, configurations: pd.DataFrame, scores: pd.DataFrame,
def _register(self, *, configurations: pd.DataFrame, scores: pd.DataFrame,
context: Optional[pd.DataFrame] = None) -> None:
"""Registers the given configurations and scores.

Expand All @@ -45,7 +45,7 @@ def _register(self, configurations: pd.DataFrame, scores: pd.DataFrame,
warn(f"Not Implemented: Ignoring context {list(context.columns)}", UserWarning)
# should we pop them from self.pending_observations?

def _suggest(self, context: Optional[pd.DataFrame] = None) -> pd.DataFrame:
def _suggest(self, *, context: Optional[pd.DataFrame] = None) -> pd.DataFrame:
"""Suggests a new configuration.

Sampled at random using ConfigSpace.
Expand All @@ -65,7 +65,7 @@ def _suggest(self, context: Optional[pd.DataFrame] = None) -> pd.DataFrame:
warn(f"Not Implemented: Ignoring context {list(context.columns)}", UserWarning)
return pd.DataFrame(dict(self.optimizer_parameter_space.sample_configuration()), index=[0])

def register_pending(self, configurations: pd.DataFrame,
def register_pending(self, *, configurations: pd.DataFrame,
context: Optional[pd.DataFrame] = None) -> None:
raise NotImplementedError()
# self._pending_observations.append((configurations, context))
Original file line number Diff line number Diff line change
Expand Up @@ -39,11 +39,11 @@ def test_context_not_implemented_warning(configuration_space: CS.ConfigurationSp
context = pd.DataFrame([["something"]])

with pytest.raises(UserWarning):
optimizer.register(suggestion, scores, context=context)
optimizer.register(configurations=suggestion, scores=scores, context=context)

with pytest.raises(UserWarning):
optimizer.suggest(context=context)

if isinstance(optimizer, BaseBayesianOptimizer):
with pytest.raises(UserWarning):
optimizer.surrogate_predict(suggestion, context=context)
optimizer.surrogate_predict(configurations=suggestion, context=context)
16 changes: 8 additions & 8 deletions mlos_core/mlos_core/tests/optimizers/one_hot_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,15 +85,15 @@ def test_to_1hot_data_frame(optimizer: BaseOptimizer,
"""
Toy problem to test one-hot encoding of dataframe.
"""
assert optimizer._to_1hot(data_frame) == pytest.approx(one_hot_data_frame)
assert optimizer._to_1hot(config=data_frame) == pytest.approx(one_hot_data_frame)


def test_to_1hot_series(optimizer: BaseOptimizer,
series: pd.Series, one_hot_series: npt.NDArray) -> None:
"""
Toy problem to test one-hot encoding of series.
"""
assert optimizer._to_1hot(series) == pytest.approx(one_hot_series)
assert optimizer._to_1hot(config=series) == pytest.approx(one_hot_series)


def test_from_1hot_data_frame(optimizer: BaseOptimizer,
Expand All @@ -102,7 +102,7 @@ def test_from_1hot_data_frame(optimizer: BaseOptimizer,
"""
Toy problem to test one-hot decoding of dataframe.
"""
assert optimizer._from_1hot(one_hot_data_frame).to_dict() == data_frame.to_dict()
assert optimizer._from_1hot(config=one_hot_data_frame).to_dict() == data_frame.to_dict()


def test_from_1hot_series(optimizer: BaseOptimizer,
Expand All @@ -111,7 +111,7 @@ def test_from_1hot_series(optimizer: BaseOptimizer,
"""
Toy problem to test one-hot decoding of series.
"""
one_hot_df = optimizer._from_1hot(one_hot_series)
one_hot_df = optimizer._from_1hot(config=one_hot_series)
assert one_hot_df.shape[0] == 1, f"Unexpected number of rows ({one_hot_df.shape[0]} != 1)"
assert one_hot_df.iloc[0].to_dict() == series.to_dict()

Expand All @@ -120,7 +120,7 @@ def test_round_trip_data_frame(optimizer: BaseOptimizer, data_frame: pd.DataFram
"""
Round-trip test for one-hot-encoding and then decoding a data frame.
"""
df_round_trip = optimizer._from_1hot(optimizer._to_1hot(data_frame))
df_round_trip = optimizer._from_1hot(config=optimizer._to_1hot(config=data_frame))
assert df_round_trip.x.to_numpy() == pytest.approx(data_frame.x)
assert (df_round_trip.y == data_frame.y).all()
assert (df_round_trip.z == data_frame.z).all()
Expand All @@ -130,7 +130,7 @@ def test_round_trip_series(optimizer: BaseOptimizer, series: pd.DataFrame) -> No
"""
Round-trip test for one-hot-encoding and then decoding a series.
"""
series_round_trip = optimizer._from_1hot(optimizer._to_1hot(series))
series_round_trip = optimizer._from_1hot(config=optimizer._to_1hot(config=series))
assert series_round_trip.x.to_numpy() == pytest.approx(series.x)
assert (series_round_trip.y == series.y).all()
assert (series_round_trip.z == series.z).all()
Expand All @@ -141,7 +141,7 @@ def test_round_trip_reverse_data_frame(optimizer: BaseOptimizer,
"""
Round-trip test for one-hot-decoding and then encoding of a numpy array.
"""
round_trip = optimizer._to_1hot(optimizer._from_1hot(one_hot_data_frame))
round_trip = optimizer._to_1hot(config=optimizer._from_1hot(config=one_hot_data_frame))
assert round_trip == pytest.approx(one_hot_data_frame)


Expand All @@ -150,5 +150,5 @@ def test_round_trip_reverse_series(optimizer: BaseOptimizer,
"""
Round-trip test for one-hot-decoding and then encoding of a numpy array.
"""
round_trip = optimizer._to_1hot(optimizer._from_1hot(one_hot_series))
round_trip = optimizer._to_1hot(config=optimizer._from_1hot(config=one_hot_series))
assert round_trip == pytest.approx(one_hot_series)
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,7 @@ def objective(point: pd.DataFrame) -> pd.DataFrame:
observation = objective(suggestion)
assert isinstance(observation, pd.DataFrame)
assert set(observation.columns) == {'main_score', 'other_score'}
optimizer.register(suggestion, observation)
optimizer.register(configurations=suggestion, scores=observation)

(best_config, best_score, best_context) = optimizer.get_best_observations()
assert isinstance(best_config, pd.DataFrame)
Expand Down
Loading