From 476d48cdc0411479b47e56563b5eb384dc4b8540 Mon Sep 17 00:00:00 2001 From: Johannes Freischuetz Date: Thu, 27 Jun 2024 18:02:59 -0500 Subject: [PATCH 01/17] switch over to ordered arguments --- .../bayesian_optimizers/bayesian_optimizer.py | 4 ++-- .../bayesian_optimizers/smac_optimizer.py | 18 ++++++++--------- .../mlos_core/optimizers/flaml_optimizer.py | 8 ++++---- mlos_core/mlos_core/optimizers/optimizer.py | 20 +++++++++---------- .../mlos_core/optimizers/random_optimizer.py | 6 +++--- .../optimizers/bayesian_optimizers_test.py | 4 ++-- .../tests/optimizers/one_hot_test.py | 16 +++++++-------- .../optimizers/optimizer_multiobj_test.py | 2 +- .../tests/optimizers/optimizer_test.py | 14 ++++++------- 9 files changed, 46 insertions(+), 46 deletions(-) diff --git a/mlos_core/mlos_core/optimizers/bayesian_optimizers/bayesian_optimizer.py b/mlos_core/mlos_core/optimizers/bayesian_optimizers/bayesian_optimizer.py index f066be1fb9..9c69a09a6c 100644 --- a/mlos_core/mlos_core/optimizers/bayesian_optimizers/bayesian_optimizer.py +++ b/mlos_core/mlos_core/optimizers/bayesian_optimizers/bayesian_optimizer.py @@ -20,7 +20,7 @@ class BaseBayesianOptimizer(BaseOptimizer, metaclass=ABCMeta): """Abstract base class defining the interface for Bayesian optimization.""" @abstractmethod - def surrogate_predict(self, configurations: pd.DataFrame, + def surrogate_predict(self, *, configurations: pd.DataFrame, context: Optional[pd.DataFrame] = None) -> npt.NDArray: """Obtain a prediction from this Bayesian optimizer's surrogate model for the given configuration(s). @@ -35,7 +35,7 @@ def surrogate_predict(self, configurations: pd.DataFrame, pass # pylint: disable=unnecessary-pass # pragma: no cover @abstractmethod - def acquisition_function(self, configurations: pd.DataFrame, + def acquisition_function(self, *, configurations: pd.DataFrame, context: Optional[pd.DataFrame] = None) -> npt.NDArray: """Invokes the acquisition function from this Bayesian optimizer for the given configuration. diff --git a/mlos_core/mlos_core/optimizers/bayesian_optimizers/smac_optimizer.py b/mlos_core/mlos_core/optimizers/bayesian_optimizers/smac_optimizer.py index 8a433218fa..d1d4cf5764 100644 --- a/mlos_core/mlos_core/optimizers/bayesian_optimizers/smac_optimizer.py +++ b/mlos_core/mlos_core/optimizers/bayesian_optimizers/smac_optimizer.py @@ -241,7 +241,7 @@ def _dummy_target_func(config: ConfigSpace.Configuration, seed: int = 0) -> None # -- this planned to be fixed in some future release: https://github.com/automl/SMAC3/issues/946 raise RuntimeError('This function should never be called.') - def _register(self, configurations: pd.DataFrame, + def _register(self, *, configurations: pd.DataFrame, scores: pd.DataFrame, context: Optional[pd.DataFrame] = None) -> None: """Registers the given configurations and scores. @@ -262,7 +262,7 @@ def _register(self, configurations: pd.DataFrame, warn(f"Not Implemented: Ignoring context {list(context.columns)}", UserWarning) # Register each trial (one-by-one) - for (config, (_i, score)) in zip(self._to_configspace_configs(configurations), scores.iterrows()): + for (config, (_i, score)) in zip(self._to_configspace_configs(configurations=configurations), scores.iterrows()): # Retrieve previously generated TrialInfo (returned by .ask()) or create new TrialInfo instance info: TrialInfo = self.trial_info_map.get( config, TrialInfo(config=config, seed=self.base_optimizer.scenario.seed)) @@ -272,7 +272,7 @@ def _register(self, configurations: pd.DataFrame, # Save optimizer once we register all configs self.base_optimizer.optimizer.save() - def _suggest(self, context: Optional[pd.DataFrame] = None) -> pd.DataFrame: + def _suggest(self, *, context: Optional[pd.DataFrame] = None) -> pd.DataFrame: """Suggests a new configuration. Parameters @@ -299,10 +299,10 @@ def _suggest(self, context: Optional[pd.DataFrame] = None) -> pd.DataFrame: config_df = pd.DataFrame([trial.config], columns=list(self.optimizer_parameter_space.keys())) return config_df - def register_pending(self, configurations: pd.DataFrame, context: Optional[pd.DataFrame] = None) -> None: + def register_pending(self, *, configurations: pd.DataFrame, context: Optional[pd.DataFrame] = None) -> None: raise NotImplementedError() - def surrogate_predict(self, configurations: pd.DataFrame, context: Optional[pd.DataFrame] = None) -> npt.NDArray: + def surrogate_predict(self, *, configurations: pd.DataFrame, context: Optional[pd.DataFrame] = None) -> npt.NDArray: from smac.utils.configspace import convert_configurations_to_array # pylint: disable=import-outside-toplevel if context is not None: @@ -318,11 +318,11 @@ def surrogate_predict(self, configurations: pd.DataFrame, context: Optional[pd.D if self.base_optimizer._config_selector._model is None: raise RuntimeError('Surrogate model is not yet trained') - configs: npt.NDArray = convert_configurations_to_array(self._to_configspace_configs(configurations)) + configs: npt.NDArray = convert_configurations_to_array(self._to_configspace_configs(configurations=configurations)) mean_predictions, _ = self.base_optimizer._config_selector._model.predict(configs) return mean_predictions.reshape(-1,) - def acquisition_function(self, configurations: pd.DataFrame, context: Optional[pd.DataFrame] = None) -> npt.NDArray: + def acquisition_function(self, *, configurations: pd.DataFrame, context: Optional[pd.DataFrame] = None) -> npt.NDArray: if context is not None: warn(f"Not Implemented: Ignoring context {list(context.columns)}", UserWarning) if self._space_adapter: @@ -332,7 +332,7 @@ def acquisition_function(self, configurations: pd.DataFrame, context: Optional[p if self.base_optimizer._config_selector._acquisition_function is None: raise RuntimeError('Acquisition function is not yet initialized') - configs: list = self._to_configspace_configs(configurations) + configs: list = self._to_configspace_configs(configurations=configurations) return self.base_optimizer._config_selector._acquisition_function(configs).reshape(-1,) def cleanup(self) -> None: @@ -340,7 +340,7 @@ def cleanup(self) -> None: self._temp_output_directory.cleanup() self._temp_output_directory = None - def _to_configspace_configs(self, configurations: pd.DataFrame) -> List[ConfigSpace.Configuration]: + def _to_configspace_configs(self, *, configurations: pd.DataFrame) -> List[ConfigSpace.Configuration]: """Convert a dataframe of configurations to a list of ConfigSpace configurations. Parameters diff --git a/mlos_core/mlos_core/optimizers/flaml_optimizer.py b/mlos_core/mlos_core/optimizers/flaml_optimizer.py index 0ad3c2da29..abbdb90418 100644 --- a/mlos_core/mlos_core/optimizers/flaml_optimizer.py +++ b/mlos_core/mlos_core/optimizers/flaml_optimizer.py @@ -85,7 +85,7 @@ def __init__(self, *, # pylint: disable=too-many-arguments self.evaluated_samples: Dict[ConfigSpace.Configuration, EvaluatedSample] = {} self._suggested_config: Optional[dict] - def _register(self, configurations: pd.DataFrame, scores: pd.DataFrame, + def _register(self, *, configurations: pd.DataFrame, scores: pd.DataFrame, context: Optional[pd.DataFrame] = None) -> None: """Registers the given configurations and scores. @@ -112,7 +112,7 @@ def _register(self, configurations: pd.DataFrame, scores: pd.DataFrame, score=float(np.average(score.astype(float), weights=self._objective_weights)), ) - def _suggest(self, context: Optional[pd.DataFrame] = None) -> pd.DataFrame: + def _suggest(self, *, context: Optional[pd.DataFrame] = None) -> pd.DataFrame: """Suggests a new configuration. Sampled at random using ConfigSpace. @@ -132,11 +132,11 @@ def _suggest(self, context: Optional[pd.DataFrame] = None) -> pd.DataFrame: config: dict = self._get_next_config() return pd.DataFrame(config, index=[0]) - def register_pending(self, configurations: pd.DataFrame, + def register_pending(self, *, configurations: pd.DataFrame, context: Optional[pd.DataFrame] = None) -> None: raise NotImplementedError() - def _target_function(self, config: dict) -> Union[dict, None]: + def _target_function(self, *, config: dict) -> Union[dict, None]: """Configuration evaluation function called by FLAML optimizer. FLAML may suggest the same configuration multiple times (due to its warm-start mechanism). diff --git a/mlos_core/mlos_core/optimizers/optimizer.py b/mlos_core/mlos_core/optimizers/optimizer.py index f1cedb85dc..f0079ce25f 100644 --- a/mlos_core/mlos_core/optimizers/optimizer.py +++ b/mlos_core/mlos_core/optimizers/optimizer.py @@ -68,7 +68,7 @@ def space_adapter(self) -> Optional[BaseSpaceAdapter]: """Get the space adapter instance (if any).""" return self._space_adapter - def register(self, configurations: pd.DataFrame, scores: pd.DataFrame, + def register(self, *, configurations: pd.DataFrame, scores: pd.DataFrame, context: Optional[pd.DataFrame] = None) -> None: """Wrapper method, which employs the space adapter (if any), before registering the configurations and scores. @@ -101,10 +101,10 @@ def register(self, configurations: pd.DataFrame, scores: pd.DataFrame, configurations = self._space_adapter.inverse_transform(configurations) assert configurations.shape[1] == len(self.optimizer_parameter_space.values()), \ "Mismatched configuration shape after inverse transform." - return self._register(configurations, scores, context) + return self._register(configurations=configurations, scores=scores, context=context) @abstractmethod - def _register(self, configurations: pd.DataFrame, scores: pd.DataFrame, + def _register(self, *, configurations: pd.DataFrame, scores: pd.DataFrame, context: Optional[pd.DataFrame] = None) -> None: """Registers the given configurations and scores. @@ -120,7 +120,7 @@ def _register(self, configurations: pd.DataFrame, scores: pd.DataFrame, """ pass # pylint: disable=unnecessary-pass # pragma: no cover - def suggest(self, context: Optional[pd.DataFrame] = None, defaults: bool = False) -> pd.DataFrame: + def suggest(self, *, context: Optional[pd.DataFrame] = None, defaults: bool = False) -> pd.DataFrame: """ Wrapper method, which employs the space adapter (if any), after suggesting a new configuration. @@ -142,7 +142,7 @@ def suggest(self, context: Optional[pd.DataFrame] = None, defaults: bool = False if self.space_adapter is not None: configuration = self.space_adapter.inverse_transform(configuration) else: - configuration = self._suggest(context) + configuration = self._suggest(context=context) assert len(configuration) == 1, \ "Suggest must return a single configuration." assert set(configuration.columns).issubset(set(self.optimizer_parameter_space)), \ @@ -154,7 +154,7 @@ def suggest(self, context: Optional[pd.DataFrame] = None, defaults: bool = False return configuration @abstractmethod - def _suggest(self, context: Optional[pd.DataFrame] = None) -> pd.DataFrame: + def _suggest(self, *, context: Optional[pd.DataFrame] = None) -> pd.DataFrame: """Suggests a new configuration. Parameters @@ -170,7 +170,7 @@ def _suggest(self, context: Optional[pd.DataFrame] = None) -> pd.DataFrame: pass # pylint: disable=unnecessary-pass # pragma: no cover @abstractmethod - def register_pending(self, configurations: pd.DataFrame, + def register_pending(self, *, configurations: pd.DataFrame, context: Optional[pd.DataFrame] = None) -> None: """Registers the given configurations as "pending". That is it say, it has been suggested by the optimizer, and an experiment trial has been started. @@ -202,7 +202,7 @@ def get_observations(self) -> Tuple[pd.DataFrame, pd.DataFrame, Optional[pd.Data for _, _, context in self._observations]).reset_index(drop=True) return (configs, scores, contexts if len(contexts.columns) > 0 else None) - def get_best_observations(self, n_max: int = 1) -> Tuple[pd.DataFrame, pd.DataFrame, Optional[pd.DataFrame]]: + def get_best_observations(self, *, n_max: int = 1) -> Tuple[pd.DataFrame, pd.DataFrame, Optional[pd.DataFrame]]: """ Get the N best observations so far as a triplet of DataFrames (config, score, context). Default is N=1. The columns are ordered in ASCENDING order of the optimization targets. @@ -231,7 +231,7 @@ def cleanup(self) -> None: Redefine this method in optimizers that require cleanup. """ - def _from_1hot(self, config: npt.NDArray) -> pd.DataFrame: + def _from_1hot(self, *, config: npt.NDArray) -> pd.DataFrame: """ Convert numpy array from one-hot encoding to a DataFrame with categoricals and ints in proper columns. @@ -254,7 +254,7 @@ def _from_1hot(self, config: npt.NDArray) -> pd.DataFrame: j += 1 return pd.DataFrame(df_dict) - def _to_1hot(self, config: Union[pd.DataFrame, pd.Series]) -> npt.NDArray: + def _to_1hot(self, *, config: Union[pd.DataFrame, pd.Series]) -> npt.NDArray: """ Convert pandas DataFrame to one-hot-encoded numpy array. """ diff --git a/mlos_core/mlos_core/optimizers/random_optimizer.py b/mlos_core/mlos_core/optimizers/random_optimizer.py index f81092a65d..00c3d1e44f 100644 --- a/mlos_core/mlos_core/optimizers/random_optimizer.py +++ b/mlos_core/mlos_core/optimizers/random_optimizer.py @@ -24,7 +24,7 @@ class RandomOptimizer(BaseOptimizer): The parameter space to optimize. """ - def _register(self, configurations: pd.DataFrame, scores: pd.DataFrame, + def _register(self, *, configurations: pd.DataFrame, scores: pd.DataFrame, context: Optional[pd.DataFrame] = None) -> None: """Registers the given configurations and scores. @@ -45,7 +45,7 @@ def _register(self, configurations: pd.DataFrame, scores: pd.DataFrame, warn(f"Not Implemented: Ignoring context {list(context.columns)}", UserWarning) # should we pop them from self.pending_observations? - def _suggest(self, context: Optional[pd.DataFrame] = None) -> pd.DataFrame: + def _suggest(self, *, context: Optional[pd.DataFrame] = None) -> pd.DataFrame: """Suggests a new configuration. Sampled at random using ConfigSpace. @@ -65,7 +65,7 @@ def _suggest(self, context: Optional[pd.DataFrame] = None) -> pd.DataFrame: warn(f"Not Implemented: Ignoring context {list(context.columns)}", UserWarning) return pd.DataFrame(dict(self.optimizer_parameter_space.sample_configuration()), index=[0]) - def register_pending(self, configurations: pd.DataFrame, + def register_pending(self, *, configurations: pd.DataFrame, context: Optional[pd.DataFrame] = None) -> None: raise NotImplementedError() # self._pending_observations.append((configurations, context)) diff --git a/mlos_core/mlos_core/tests/optimizers/bayesian_optimizers_test.py b/mlos_core/mlos_core/tests/optimizers/bayesian_optimizers_test.py index 69ce4f8dff..7916296425 100644 --- a/mlos_core/mlos_core/tests/optimizers/bayesian_optimizers_test.py +++ b/mlos_core/mlos_core/tests/optimizers/bayesian_optimizers_test.py @@ -39,11 +39,11 @@ def test_context_not_implemented_warning(configuration_space: CS.ConfigurationSp context = pd.DataFrame([["something"]]) with pytest.raises(UserWarning): - optimizer.register(suggestion, scores, context=context) + optimizer.register(configurations=suggestion, scores=scores, context=context) with pytest.raises(UserWarning): optimizer.suggest(context=context) if isinstance(optimizer, BaseBayesianOptimizer): with pytest.raises(UserWarning): - optimizer.surrogate_predict(suggestion, context=context) + optimizer.surrogate_predict(configurations=suggestion, context=context) diff --git a/mlos_core/mlos_core/tests/optimizers/one_hot_test.py b/mlos_core/mlos_core/tests/optimizers/one_hot_test.py index 0a9a6ed3c5..8e10afa302 100644 --- a/mlos_core/mlos_core/tests/optimizers/one_hot_test.py +++ b/mlos_core/mlos_core/tests/optimizers/one_hot_test.py @@ -85,7 +85,7 @@ def test_to_1hot_data_frame(optimizer: BaseOptimizer, """ Toy problem to test one-hot encoding of dataframe. """ - assert optimizer._to_1hot(data_frame) == pytest.approx(one_hot_data_frame) + assert optimizer._to_1hot(config=data_frame) == pytest.approx(one_hot_data_frame) def test_to_1hot_series(optimizer: BaseOptimizer, @@ -93,7 +93,7 @@ def test_to_1hot_series(optimizer: BaseOptimizer, """ Toy problem to test one-hot encoding of series. """ - assert optimizer._to_1hot(series) == pytest.approx(one_hot_series) + assert optimizer._to_1hot(config=series) == pytest.approx(one_hot_series) def test_from_1hot_data_frame(optimizer: BaseOptimizer, @@ -102,7 +102,7 @@ def test_from_1hot_data_frame(optimizer: BaseOptimizer, """ Toy problem to test one-hot decoding of dataframe. """ - assert optimizer._from_1hot(one_hot_data_frame).to_dict() == data_frame.to_dict() + assert optimizer._from_1hot(config=one_hot_data_frame).to_dict() == data_frame.to_dict() def test_from_1hot_series(optimizer: BaseOptimizer, @@ -111,7 +111,7 @@ def test_from_1hot_series(optimizer: BaseOptimizer, """ Toy problem to test one-hot decoding of series. """ - one_hot_df = optimizer._from_1hot(one_hot_series) + one_hot_df = optimizer._from_1hot(config=one_hot_series) assert one_hot_df.shape[0] == 1, f"Unexpected number of rows ({one_hot_df.shape[0]} != 1)" assert one_hot_df.iloc[0].to_dict() == series.to_dict() @@ -120,7 +120,7 @@ def test_round_trip_data_frame(optimizer: BaseOptimizer, data_frame: pd.DataFram """ Round-trip test for one-hot-encoding and then decoding a data frame. """ - df_round_trip = optimizer._from_1hot(optimizer._to_1hot(data_frame)) + df_round_trip = optimizer._from_1hot(config=optimizer._to_1hot(config=data_frame)) assert df_round_trip.x.to_numpy() == pytest.approx(data_frame.x) assert (df_round_trip.y == data_frame.y).all() assert (df_round_trip.z == data_frame.z).all() @@ -130,7 +130,7 @@ def test_round_trip_series(optimizer: BaseOptimizer, series: pd.DataFrame) -> No """ Round-trip test for one-hot-encoding and then decoding a series. """ - series_round_trip = optimizer._from_1hot(optimizer._to_1hot(series)) + series_round_trip = optimizer._from_1hot(config=optimizer._to_1hot(config=series)) assert series_round_trip.x.to_numpy() == pytest.approx(series.x) assert (series_round_trip.y == series.y).all() assert (series_round_trip.z == series.z).all() @@ -141,7 +141,7 @@ def test_round_trip_reverse_data_frame(optimizer: BaseOptimizer, """ Round-trip test for one-hot-decoding and then encoding of a numpy array. """ - round_trip = optimizer._to_1hot(optimizer._from_1hot(one_hot_data_frame)) + round_trip = optimizer._to_1hot(config=optimizer._from_1hot(config=one_hot_data_frame)) assert round_trip == pytest.approx(one_hot_data_frame) @@ -150,5 +150,5 @@ def test_round_trip_reverse_series(optimizer: BaseOptimizer, """ Round-trip test for one-hot-decoding and then encoding of a numpy array. """ - round_trip = optimizer._to_1hot(optimizer._from_1hot(one_hot_series)) + round_trip = optimizer._to_1hot(config=optimizer._from_1hot(config=one_hot_series)) assert round_trip == pytest.approx(one_hot_series) diff --git a/mlos_core/mlos_core/tests/optimizers/optimizer_multiobj_test.py b/mlos_core/mlos_core/tests/optimizers/optimizer_multiobj_test.py index 4ff5f157f7..4cf57c2719 100644 --- a/mlos_core/mlos_core/tests/optimizers/optimizer_multiobj_test.py +++ b/mlos_core/mlos_core/tests/optimizers/optimizer_multiobj_test.py @@ -99,7 +99,7 @@ def objective(point: pd.DataFrame) -> pd.DataFrame: observation = objective(suggestion) assert isinstance(observation, pd.DataFrame) assert set(observation.columns) == {'main_score', 'other_score'} - optimizer.register(suggestion, observation) + optimizer.register(configurations=suggestion, scores=observation) (best_config, best_score, best_context) = optimizer.get_best_observations() assert isinstance(best_config, pd.DataFrame) diff --git a/mlos_core/mlos_core/tests/optimizers/optimizer_test.py b/mlos_core/mlos_core/tests/optimizers/optimizer_test.py index 67c7eddf3b..613e3af0c9 100644 --- a/mlos_core/mlos_core/tests/optimizers/optimizer_test.py +++ b/mlos_core/mlos_core/tests/optimizers/optimizer_test.py @@ -56,7 +56,7 @@ def test_create_optimizer_and_suggest(configuration_space: CS.ConfigurationSpace # pending not implemented with pytest.raises(NotImplementedError): - optimizer.register_pending(suggestion) + optimizer.register_pending(configurations=suggestion) @pytest.mark.parametrize(('optimizer_class', 'kwargs'), [ @@ -103,7 +103,7 @@ def objective(x: pd.Series) -> pd.DataFrame: configuration.is_valid_configuration() observation = objective(suggestion['x']) assert isinstance(observation, pd.DataFrame) - optimizer.register(suggestion, observation) + optimizer.register(configurations=suggestion, scores=observation) (best_config, best_score, best_context) = optimizer.get_best_observations() assert isinstance(best_config, pd.DataFrame) @@ -126,10 +126,10 @@ def objective(x: pd.Series) -> pd.DataFrame: # It would be better to put this into bayesian_optimizer_test but then we'd have to refit the model if isinstance(optimizer, BaseBayesianOptimizer): - pred_best = optimizer.surrogate_predict(best_config) + pred_best = optimizer.surrogate_predict(configurations=best_config) assert pred_best.shape == (1,) - pred_all = optimizer.surrogate_predict(all_configs) + pred_all = optimizer.surrogate_predict(configurations=all_configs) assert pred_all.shape == (20,) @@ -270,14 +270,14 @@ def objective(point: pd.DataFrame) -> pd.DataFrame: # loop for optimizer suggestion = optimizer.suggest() observation = objective(suggestion) - optimizer.register(suggestion, observation) + optimizer.register(configurations=suggestion, scores=observation) # loop for llamatune-optimizer suggestion = llamatune_optimizer.suggest() _x, _y = suggestion['x'].iloc[0], suggestion['y'].iloc[0] assert _x == pytest.approx(_y, rel=1e-3) or _x + _y == pytest.approx(3., rel=1e-3) # optimizer explores 1-dimensional space observation = objective(suggestion) - llamatune_optimizer.register(suggestion, observation) + llamatune_optimizer.register(configurations=suggestion, scores=observation) # Retrieve best observations best_observation = optimizer.get_best_observations() @@ -311,7 +311,7 @@ def objective(point: pd.DataFrame) -> pd.DataFrame: # .surrogate_predict method not currently implemented if space adapter is employed if isinstance(llamatune_optimizer, BaseBayesianOptimizer): with pytest.raises(NotImplementedError): - llamatune_optimizer.surrogate_predict(llamatune_best_config) + llamatune_optimizer.surrogate_predict(configurations=llamatune_best_config) # Dynamically determine all of the optimizers we have implemented. From 81840d400f6b4fc664c29248d84afc8c7f2e48ad Mon Sep 17 00:00:00 2001 From: Johannes Freischuetz Date: Fri, 28 Jun 2024 00:05:24 +0000 Subject: [PATCH 02/17] fix tests --- .vscode/settings.json | 5 +---- mlos_bench/mlos_bench/optimizers/mlos_core_optimizer.py | 4 ++-- .../tests/optimizers/toy_optimization_loop_test.py | 2 +- mlos_core/mlos_core/optimizers/flaml_optimizer.py | 2 +- mlos_core/mlos_core/tests/optimizers/optimizer_test.py | 2 +- 5 files changed, 6 insertions(+), 9 deletions(-) diff --git a/.vscode/settings.json b/.vscode/settings.json index 2c8098f9d9..dd8f4963d8 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -135,9 +135,6 @@ }, // See Also .vscode/launch.json for environment variable args to pytest during debug sessions. // For the rest, see setup.cfg - "python.testing.pytestArgs": [ - "--log-level=DEBUG", - "." - ], + "python.testing.pytestArgs": [], "python.testing.unittestEnabled": false } diff --git a/mlos_bench/mlos_bench/optimizers/mlos_core_optimizer.py b/mlos_bench/mlos_bench/optimizers/mlos_core_optimizer.py index 7747035c13..44d1762e6f 100644 --- a/mlos_bench/mlos_bench/optimizers/mlos_core_optimizer.py +++ b/mlos_bench/mlos_bench/optimizers/mlos_core_optimizer.py @@ -117,7 +117,7 @@ def bulk_register(self, # TODO: Specify (in the config) which metrics to pass to the optimizer. # Issue: https://github.com/microsoft/MLOS/issues/745 - self._opt.register(df_configs, df_scores[opt_targets].astype(float)) + self._opt.register(configurations=df_configs, scores=df_scores[opt_targets].astype(float)) if _LOG.isEnabledFor(logging.DEBUG): (score, _) = self.get_best_observation() @@ -195,7 +195,7 @@ def register(self, tunables: TunableGroups, status: Status, _LOG.debug("Score: %s Dataframe:\n%s", registered_score, df_config) # TODO: Specify (in the config) which metrics to pass to the optimizer. # Issue: https://github.com/microsoft/MLOS/issues/745 - self._opt.register(df_config, pd.DataFrame([registered_score], dtype=float)) + self._opt.register(configurations=df_config, scores=pd.DataFrame([registered_score], dtype=float)) return registered_score def get_best_observation(self) -> Union[Tuple[Dict[str, float], TunableGroups], Tuple[None, None]]: diff --git a/mlos_bench/mlos_bench/tests/optimizers/toy_optimization_loop_test.py b/mlos_bench/mlos_bench/tests/optimizers/toy_optimization_loop_test.py index 2307bcd646..89c666ab51 100644 --- a/mlos_bench/mlos_bench/tests/optimizers/toy_optimization_loop_test.py +++ b/mlos_bench/mlos_bench/tests/optimizers/toy_optimization_loop_test.py @@ -50,7 +50,7 @@ def _optimize(env: Environment, opt: Optimizer) -> Tuple[float, TunableGroups]: config_df = config_to_dataframe(config) logger("config: %s", str(config)) try: - logger("prediction: %s", opt._opt.surrogate_predict(config_df)) + logger("prediction: %s", opt._opt.surrogate_predict(configurations=config_df)) except RuntimeError: pass diff --git a/mlos_core/mlos_core/optimizers/flaml_optimizer.py b/mlos_core/mlos_core/optimizers/flaml_optimizer.py index abbdb90418..1caea5941b 100644 --- a/mlos_core/mlos_core/optimizers/flaml_optimizer.py +++ b/mlos_core/mlos_core/optimizers/flaml_optimizer.py @@ -136,7 +136,7 @@ def register_pending(self, *, configurations: pd.DataFrame, context: Optional[pd.DataFrame] = None) -> None: raise NotImplementedError() - def _target_function(self, *, config: dict) -> Union[dict, None]: + def _target_function(self, config: dict) -> Union[dict, None]: """Configuration evaluation function called by FLAML optimizer. FLAML may suggest the same configuration multiple times (due to its warm-start mechanism). diff --git a/mlos_core/mlos_core/tests/optimizers/optimizer_test.py b/mlos_core/mlos_core/tests/optimizers/optimizer_test.py index 613e3af0c9..678c8f4219 100644 --- a/mlos_core/mlos_core/tests/optimizers/optimizer_test.py +++ b/mlos_core/mlos_core/tests/optimizers/optimizer_test.py @@ -388,7 +388,7 @@ def objective(point: pd.DataFrame) -> pd.DataFrame: # Test registering the suggested configuration with a score. observation = objective(suggestion) assert isinstance(observation, pd.DataFrame) - optimizer.register(suggestion, observation) + optimizer.register(configurations=suggestion, scores=observation) (best_config, best_score, best_context) = optimizer.get_best_observations() assert isinstance(best_config, pd.DataFrame) From 76f7a6d863d9449a723b4f853ba1ca9491f5250f Mon Sep 17 00:00:00 2001 From: Johannes Freischuetz Date: Fri, 28 Jun 2024 00:07:40 +0000 Subject: [PATCH 03/17] revert settings --- .vscode/settings.json | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/.vscode/settings.json b/.vscode/settings.json index dd8f4963d8..2c8098f9d9 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -135,6 +135,9 @@ }, // See Also .vscode/launch.json for environment variable args to pytest during debug sessions. // For the rest, see setup.cfg - "python.testing.pytestArgs": [], + "python.testing.pytestArgs": [ + "--log-level=DEBUG", + "." + ], "python.testing.unittestEnabled": false } From 91b755308d2690dff8285779dd0debc3e26a12c6 Mon Sep 17 00:00:00 2001 From: Johannes Freischuetz Date: Fri, 28 Jun 2024 12:03:31 -0500 Subject: [PATCH 04/17] Update mlos_bench/mlos_bench/optimizers/mlos_core_optimizer.py Co-authored-by: Brian Kroth --- mlos_bench/mlos_bench/optimizers/mlos_core_optimizer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mlos_bench/mlos_bench/optimizers/mlos_core_optimizer.py b/mlos_bench/mlos_bench/optimizers/mlos_core_optimizer.py index 44d1762e6f..adef0c3750 100644 --- a/mlos_bench/mlos_bench/optimizers/mlos_core_optimizer.py +++ b/mlos_bench/mlos_bench/optimizers/mlos_core_optimizer.py @@ -117,7 +117,7 @@ def bulk_register(self, # TODO: Specify (in the config) which metrics to pass to the optimizer. # Issue: https://github.com/microsoft/MLOS/issues/745 - self._opt.register(configurations=df_configs, scores=df_scores[opt_targets].astype(float)) + self._opt.register(configs=df_configs, scores=df_scores[opt_targets].astype(float)) if _LOG.isEnabledFor(logging.DEBUG): (score, _) = self.get_best_observation() From baaf6ef83c06c05510c0fed6f6d3d42372f43f7a Mon Sep 17 00:00:00 2001 From: Johannes Freischuetz Date: Fri, 28 Jun 2024 12:10:38 -0500 Subject: [PATCH 05/17] Update mlos_core/mlos_core/optimizers/optimizer.py Co-authored-by: Brian Kroth --- mlos_core/mlos_core/optimizers/optimizer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mlos_core/mlos_core/optimizers/optimizer.py b/mlos_core/mlos_core/optimizers/optimizer.py index f0079ce25f..d6bb694645 100644 --- a/mlos_core/mlos_core/optimizers/optimizer.py +++ b/mlos_core/mlos_core/optimizers/optimizer.py @@ -231,7 +231,7 @@ def cleanup(self) -> None: Redefine this method in optimizers that require cleanup. """ - def _from_1hot(self, *, config: npt.NDArray) -> pd.DataFrame: + def _from_1hot(self, config: npt.NDArray) -> pd.DataFrame: """ Convert numpy array from one-hot encoding to a DataFrame with categoricals and ints in proper columns. From 20a6161ddf0ced69ea75399a5522ad0367ffa5c7 Mon Sep 17 00:00:00 2001 From: Johannes Freischuetz Date: Fri, 28 Jun 2024 12:10:45 -0500 Subject: [PATCH 06/17] Update mlos_core/mlos_core/optimizers/optimizer.py Co-authored-by: Brian Kroth --- mlos_core/mlos_core/optimizers/optimizer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mlos_core/mlos_core/optimizers/optimizer.py b/mlos_core/mlos_core/optimizers/optimizer.py index d6bb694645..5a5f3ac121 100644 --- a/mlos_core/mlos_core/optimizers/optimizer.py +++ b/mlos_core/mlos_core/optimizers/optimizer.py @@ -254,7 +254,7 @@ def _from_1hot(self, config: npt.NDArray) -> pd.DataFrame: j += 1 return pd.DataFrame(df_dict) - def _to_1hot(self, *, config: Union[pd.DataFrame, pd.Series]) -> npt.NDArray: + def _to_1hot(self, config: Union[pd.DataFrame, pd.Series]) -> npt.NDArray: """ Convert pandas DataFrame to one-hot-encoded numpy array. """ From 18277f9fd967a6ebe9d25292209e86649d306b81 Mon Sep 17 00:00:00 2001 From: Johannes Freischuetz Date: Fri, 28 Jun 2024 17:26:21 +0000 Subject: [PATCH 07/17] Minor changes for comments --- mlos_bench/mlos_bench/optimizers/mlos_core_optimizer.py | 2 +- mlos_core/mlos_core/optimizers/optimizer.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/mlos_bench/mlos_bench/optimizers/mlos_core_optimizer.py b/mlos_bench/mlos_bench/optimizers/mlos_core_optimizer.py index adef0c3750..44d1762e6f 100644 --- a/mlos_bench/mlos_bench/optimizers/mlos_core_optimizer.py +++ b/mlos_bench/mlos_bench/optimizers/mlos_core_optimizer.py @@ -117,7 +117,7 @@ def bulk_register(self, # TODO: Specify (in the config) which metrics to pass to the optimizer. # Issue: https://github.com/microsoft/MLOS/issues/745 - self._opt.register(configs=df_configs, scores=df_scores[opt_targets].astype(float)) + self._opt.register(configurations=df_configs, scores=df_scores[opt_targets].astype(float)) if _LOG.isEnabledFor(logging.DEBUG): (score, _) = self.get_best_observation() diff --git a/mlos_core/mlos_core/optimizers/optimizer.py b/mlos_core/mlos_core/optimizers/optimizer.py index 5a5f3ac121..f0079ce25f 100644 --- a/mlos_core/mlos_core/optimizers/optimizer.py +++ b/mlos_core/mlos_core/optimizers/optimizer.py @@ -231,7 +231,7 @@ def cleanup(self) -> None: Redefine this method in optimizers that require cleanup. """ - def _from_1hot(self, config: npt.NDArray) -> pd.DataFrame: + def _from_1hot(self, *, config: npt.NDArray) -> pd.DataFrame: """ Convert numpy array from one-hot encoding to a DataFrame with categoricals and ints in proper columns. @@ -254,7 +254,7 @@ def _from_1hot(self, config: npt.NDArray) -> pd.DataFrame: j += 1 return pd.DataFrame(df_dict) - def _to_1hot(self, config: Union[pd.DataFrame, pd.Series]) -> npt.NDArray: + def _to_1hot(self, *, config: Union[pd.DataFrame, pd.Series]) -> npt.NDArray: """ Convert pandas DataFrame to one-hot-encoded numpy array. """ From 0126239df04db4681580be66f7e978ac699b5cb9 Mon Sep 17 00:00:00 2001 From: Johannes Freischuetz Date: Fri, 28 Jun 2024 17:35:09 +0000 Subject: [PATCH 08/17] configurations to configs --- .../optimizers/mlos_core_optimizer.py | 4 +- .../optimizers/toy_optimization_loop_test.py | 2 +- .../bayesian_optimizers/bayesian_optimizer.py | 12 ++--- .../bayesian_optimizers/smac_optimizer.py | 42 ++++++++--------- .../mlos_core/optimizers/flaml_optimizer.py | 16 +++---- mlos_core/mlos_core/optimizers/optimizer.py | 46 +++++++++---------- .../mlos_core/optimizers/random_optimizer.py | 16 +++---- .../optimizers/bayesian_optimizers_test.py | 4 +- .../optimizers/optimizer_multiobj_test.py | 2 +- .../tests/optimizers/optimizer_test.py | 16 +++---- 10 files changed, 80 insertions(+), 80 deletions(-) diff --git a/mlos_bench/mlos_bench/optimizers/mlos_core_optimizer.py b/mlos_bench/mlos_bench/optimizers/mlos_core_optimizer.py index 44d1762e6f..8e7c75a0d5 100644 --- a/mlos_bench/mlos_bench/optimizers/mlos_core_optimizer.py +++ b/mlos_bench/mlos_bench/optimizers/mlos_core_optimizer.py @@ -117,7 +117,7 @@ def bulk_register(self, # TODO: Specify (in the config) which metrics to pass to the optimizer. # Issue: https://github.com/microsoft/MLOS/issues/745 - self._opt.register(configurations=df_configs, scores=df_scores[opt_targets].astype(float)) + self._opt.register(configs=df_configs, scores=df_scores[opt_targets].astype(float)) if _LOG.isEnabledFor(logging.DEBUG): (score, _) = self.get_best_observation() @@ -195,7 +195,7 @@ def register(self, tunables: TunableGroups, status: Status, _LOG.debug("Score: %s Dataframe:\n%s", registered_score, df_config) # TODO: Specify (in the config) which metrics to pass to the optimizer. # Issue: https://github.com/microsoft/MLOS/issues/745 - self._opt.register(configurations=df_config, scores=pd.DataFrame([registered_score], dtype=float)) + self._opt.register(configs=df_config, scores=pd.DataFrame([registered_score], dtype=float)) return registered_score def get_best_observation(self) -> Union[Tuple[Dict[str, float], TunableGroups], Tuple[None, None]]: diff --git a/mlos_bench/mlos_bench/tests/optimizers/toy_optimization_loop_test.py b/mlos_bench/mlos_bench/tests/optimizers/toy_optimization_loop_test.py index 89c666ab51..183db1dc62 100644 --- a/mlos_bench/mlos_bench/tests/optimizers/toy_optimization_loop_test.py +++ b/mlos_bench/mlos_bench/tests/optimizers/toy_optimization_loop_test.py @@ -50,7 +50,7 @@ def _optimize(env: Environment, opt: Optimizer) -> Tuple[float, TunableGroups]: config_df = config_to_dataframe(config) logger("config: %s", str(config)) try: - logger("prediction: %s", opt._opt.surrogate_predict(configurations=config_df)) + logger("prediction: %s", opt._opt.surrogate_predict(configs=config_df)) except RuntimeError: pass diff --git a/mlos_core/mlos_core/optimizers/bayesian_optimizers/bayesian_optimizer.py b/mlos_core/mlos_core/optimizers/bayesian_optimizers/bayesian_optimizer.py index 9c69a09a6c..2de01637f8 100644 --- a/mlos_core/mlos_core/optimizers/bayesian_optimizers/bayesian_optimizer.py +++ b/mlos_core/mlos_core/optimizers/bayesian_optimizers/bayesian_optimizer.py @@ -20,14 +20,14 @@ class BaseBayesianOptimizer(BaseOptimizer, metaclass=ABCMeta): """Abstract base class defining the interface for Bayesian optimization.""" @abstractmethod - def surrogate_predict(self, *, configurations: pd.DataFrame, + def surrogate_predict(self, *, configs: pd.DataFrame, context: Optional[pd.DataFrame] = None) -> npt.NDArray: """Obtain a prediction from this Bayesian optimizer's surrogate model for the given configuration(s). Parameters ---------- - configurations : pd.DataFrame - Dataframe of configurations / parameters. The columns are parameter names and the rows are the configurations. + configs : pd.DataFrame + Dataframe of configs / parameters. The columns are parameter names and the rows are the configs. context : pd.DataFrame Not Yet Implemented. @@ -35,14 +35,14 @@ def surrogate_predict(self, *, configurations: pd.DataFrame, pass # pylint: disable=unnecessary-pass # pragma: no cover @abstractmethod - def acquisition_function(self, *, configurations: pd.DataFrame, + def acquisition_function(self, *, configs: pd.DataFrame, context: Optional[pd.DataFrame] = None) -> npt.NDArray: """Invokes the acquisition function from this Bayesian optimizer for the given configuration. Parameters ---------- - configurations : pd.DataFrame - Dataframe of configurations / parameters. The columns are parameter names and the rows are the configurations. + configs : pd.DataFrame + Dataframe of configs / parameters. The columns are parameter names and the rows are the configs. context : pd.DataFrame Not Yet Implemented. diff --git a/mlos_core/mlos_core/optimizers/bayesian_optimizers/smac_optimizer.py b/mlos_core/mlos_core/optimizers/bayesian_optimizers/smac_optimizer.py index d1d4cf5764..4f7822a842 100644 --- a/mlos_core/mlos_core/optimizers/bayesian_optimizers/smac_optimizer.py +++ b/mlos_core/mlos_core/optimizers/bayesian_optimizers/smac_optimizer.py @@ -80,10 +80,10 @@ def __init__(self, *, # pylint: disable=too-many-locals,too-many-arguments See Also: mlos_bench.optimizer.bulk_register max_ratio : Optional[int] - Maximum ratio of max_trials to be random configurations to be evaluated + Maximum ratio of max_trials to be random configs to be evaluated at start to bootstrap the optimizer. Useful if you want to explicitly control the number of random - configurations evaluated at start. + configs evaluated at start. use_default_config: bool Whether to use the default config for the first trial after random initialization. @@ -168,7 +168,7 @@ def __init__(self, *, # pylint: disable=too-many-locals,too-many-arguments initial_design_args['n_configs'] = n_random_init if n_random_init > 0.25 * max_trials and max_ratio is None: warning( - 'Number of random initial configurations (%d) is ' + + 'Number of random initial configs (%d) is ' + 'greater than 25%% of max_trials (%d). ' + 'Consider setting max_ratio to avoid SMAC overriding n_random_init.', n_random_init, @@ -241,17 +241,17 @@ def _dummy_target_func(config: ConfigSpace.Configuration, seed: int = 0) -> None # -- this planned to be fixed in some future release: https://github.com/automl/SMAC3/issues/946 raise RuntimeError('This function should never be called.') - def _register(self, *, configurations: pd.DataFrame, + def _register(self, *, configs: pd.DataFrame, scores: pd.DataFrame, context: Optional[pd.DataFrame] = None) -> None: - """Registers the given configurations and scores. + """Registers the given configs and scores. Parameters ---------- - configurations : pd.DataFrame - Dataframe of configurations / parameters. The columns are parameter names and the rows are the configurations. + configs : pd.DataFrame + Dataframe of configs / parameters. The columns are parameter names and the rows are the configs. scores : pd.DataFrame - Scores from running the configurations. The index is the same as the index of the configurations. + Scores from running the configs. The index is the same as the index of the configs. context : pd.DataFrame Not Yet Implemented. @@ -262,7 +262,7 @@ def _register(self, *, configurations: pd.DataFrame, warn(f"Not Implemented: Ignoring context {list(context.columns)}", UserWarning) # Register each trial (one-by-one) - for (config, (_i, score)) in zip(self._to_configspace_configs(configurations=configurations), scores.iterrows()): + for (config, (_i, score)) in zip(self._to_configspace_configs(configs=configs), scores.iterrows()): # Retrieve previously generated TrialInfo (returned by .ask()) or create new TrialInfo instance info: TrialInfo = self.trial_info_map.get( config, TrialInfo(config=config, seed=self.base_optimizer.scenario.seed)) @@ -299,10 +299,10 @@ def _suggest(self, *, context: Optional[pd.DataFrame] = None) -> pd.DataFrame: config_df = pd.DataFrame([trial.config], columns=list(self.optimizer_parameter_space.keys())) return config_df - def register_pending(self, *, configurations: pd.DataFrame, context: Optional[pd.DataFrame] = None) -> None: + def register_pending(self, *, configs: pd.DataFrame, context: Optional[pd.DataFrame] = None) -> None: raise NotImplementedError() - def surrogate_predict(self, *, configurations: pd.DataFrame, context: Optional[pd.DataFrame] = None) -> npt.NDArray: + def surrogate_predict(self, *, configs: pd.DataFrame, context: Optional[pd.DataFrame] = None) -> npt.NDArray: from smac.utils.configspace import convert_configurations_to_array # pylint: disable=import-outside-toplevel if context is not None: @@ -318,11 +318,11 @@ def surrogate_predict(self, *, configurations: pd.DataFrame, context: Optional[p if self.base_optimizer._config_selector._model is None: raise RuntimeError('Surrogate model is not yet trained') - configs: npt.NDArray = convert_configurations_to_array(self._to_configspace_configs(configurations=configurations)) + configs: npt.NDArray = convert_configurations_to_array(self._to_configspace_configs(configs=configs)) mean_predictions, _ = self.base_optimizer._config_selector._model.predict(configs) return mean_predictions.reshape(-1,) - def acquisition_function(self, *, configurations: pd.DataFrame, context: Optional[pd.DataFrame] = None) -> npt.NDArray: + def acquisition_function(self, *, configs: pd.DataFrame, context: Optional[pd.DataFrame] = None) -> npt.NDArray: if context is not None: warn(f"Not Implemented: Ignoring context {list(context.columns)}", UserWarning) if self._space_adapter: @@ -332,7 +332,7 @@ def acquisition_function(self, *, configurations: pd.DataFrame, context: Optiona if self.base_optimizer._config_selector._acquisition_function is None: raise RuntimeError('Acquisition function is not yet initialized') - configs: list = self._to_configspace_configs(configurations=configurations) + configs: list = self._to_configspace_configs(configs=configs) return self.base_optimizer._config_selector._acquisition_function(configs).reshape(-1,) def cleanup(self) -> None: @@ -340,20 +340,20 @@ def cleanup(self) -> None: self._temp_output_directory.cleanup() self._temp_output_directory = None - def _to_configspace_configs(self, *, configurations: pd.DataFrame) -> List[ConfigSpace.Configuration]: - """Convert a dataframe of configurations to a list of ConfigSpace configurations. + def _to_configspace_configs(self, *, configs: pd.DataFrame) -> List[ConfigSpace.Configuration]: + """Convert a dataframe of configs to a list of ConfigSpace configs. Parameters ---------- - configurations : pd.DataFrame - Dataframe of configurations / parameters. The columns are parameter names and the rows are the configurations. + configs : pd.DataFrame + Dataframe of configs / parameters. The columns are parameter names and the rows are the configs. Returns ------- - configurations : list - List of ConfigSpace configurations. + configs : list + List of ConfigSpace configs. """ return [ ConfigSpace.Configuration(self.optimizer_parameter_space, values=config.to_dict()) - for (_, config) in configurations.astype('O').iterrows() + for (_, config) in configs.astype('O').iterrows() ] diff --git a/mlos_core/mlos_core/optimizers/flaml_optimizer.py b/mlos_core/mlos_core/optimizers/flaml_optimizer.py index 1caea5941b..a58e74af02 100644 --- a/mlos_core/mlos_core/optimizers/flaml_optimizer.py +++ b/mlos_core/mlos_core/optimizers/flaml_optimizer.py @@ -85,24 +85,24 @@ def __init__(self, *, # pylint: disable=too-many-arguments self.evaluated_samples: Dict[ConfigSpace.Configuration, EvaluatedSample] = {} self._suggested_config: Optional[dict] - def _register(self, *, configurations: pd.DataFrame, scores: pd.DataFrame, + def _register(self, *, configs: pd.DataFrame, scores: pd.DataFrame, context: Optional[pd.DataFrame] = None) -> None: - """Registers the given configurations and scores. + """Registers the given configs and scores. Parameters ---------- - configurations : pd.DataFrame - Dataframe of configurations / parameters. The columns are parameter names and the rows are the configurations. + configs : pd.DataFrame + Dataframe of configs / parameters. The columns are parameter names and the rows are the configs. scores : pd.DataFrame - Scores from running the configurations. The index is the same as the index of the configurations. + Scores from running the configs. The index is the same as the index of the configs. context : None Not Yet Implemented. """ if context is not None: warn(f"Not Implemented: Ignoring context {list(context.columns)}", UserWarning) - for (_, config), (_, score) in zip(configurations.astype('O').iterrows(), scores.iterrows()): + for (_, config), (_, score) in zip(configs.astype('O').iterrows(), scores.iterrows()): cs_config: ConfigSpace.Configuration = ConfigSpace.Configuration( self.optimizer_parameter_space, values=config.to_dict()) if cs_config in self.evaluated_samples: @@ -132,7 +132,7 @@ def _suggest(self, *, context: Optional[pd.DataFrame] = None) -> pd.DataFrame: config: dict = self._get_next_config() return pd.DataFrame(config, index=[0]) - def register_pending(self, *, configurations: pd.DataFrame, + def register_pending(self, *, configs: pd.DataFrame, context: Optional[pd.DataFrame] = None) -> None: raise NotImplementedError() @@ -165,7 +165,7 @@ def _get_next_config(self) -> dict: Since FLAML does not provide an ask-and-tell interface, we need to create a new instance of FLAML each time we get asked for a new suggestion. This is suboptimal performance-wise, but works. - To do so, we use any previously evaluated configurations to bootstrap FLAML (i.e., warm-start). + To do so, we use any previously evaluated configs to bootstrap FLAML (i.e., warm-start). For more info: https://microsoft.github.io/FLAML/docs/Use-Cases/Tune-User-Defined-Function#warm-start Returns diff --git a/mlos_core/mlos_core/optimizers/optimizer.py b/mlos_core/mlos_core/optimizers/optimizer.py index f0079ce25f..a72a4e1eb8 100644 --- a/mlos_core/mlos_core/optimizers/optimizer.py +++ b/mlos_core/mlos_core/optimizers/optimizer.py @@ -68,16 +68,16 @@ def space_adapter(self) -> Optional[BaseSpaceAdapter]: """Get the space adapter instance (if any).""" return self._space_adapter - def register(self, *, configurations: pd.DataFrame, scores: pd.DataFrame, + def register(self, *, configs: pd.DataFrame, scores: pd.DataFrame, context: Optional[pd.DataFrame] = None) -> None: - """Wrapper method, which employs the space adapter (if any), before registering the configurations and scores. + """Wrapper method, which employs the space adapter (if any), before registering the configs and scores. Parameters ---------- - configurations : pd.DataFrame - Dataframe of configurations / parameters. The columns are parameter names and the rows are the configurations. + configs : pd.DataFrame + Dataframe of configs / parameters. The columns are parameter names and the rows are the configs. scores : pd.DataFrame - Scores from running the configurations. The index is the same as the index of the configurations. + Scores from running the configs. The index is the same as the index of the configs. context : pd.DataFrame Not Yet Implemented. @@ -87,33 +87,33 @@ def register(self, *, configurations: pd.DataFrame, scores: pd.DataFrame, "Mismatched optimization targets." assert self._has_context is None or self._has_context ^ (context is None), \ "Context must always be added or never be added." - assert len(configurations) == len(scores), \ - "Mismatched number of configurations and scores." + assert len(configs) == len(scores), \ + "Mismatched number of configs and scores." if context is not None: - assert len(configurations) == len(context), \ - "Mismatched number of configurations and context." - assert configurations.shape[1] == len(self.parameter_space.values()), \ + assert len(configs) == len(context), \ + "Mismatched number of configs and context." + assert configs.shape[1] == len(self.parameter_space.values()), \ "Mismatched configuration shape." - self._observations.append((configurations, scores, context)) + self._observations.append((configs, scores, context)) self._has_context = context is not None if self._space_adapter: - configurations = self._space_adapter.inverse_transform(configurations) - assert configurations.shape[1] == len(self.optimizer_parameter_space.values()), \ + configs = self._space_adapter.inverse_transform(configs) + assert configs.shape[1] == len(self.optimizer_parameter_space.values()), \ "Mismatched configuration shape after inverse transform." - return self._register(configurations=configurations, scores=scores, context=context) + return self._register(configs=configs, scores=scores, context=context) @abstractmethod - def _register(self, *, configurations: pd.DataFrame, scores: pd.DataFrame, + def _register(self, *, configs: pd.DataFrame, scores: pd.DataFrame, context: Optional[pd.DataFrame] = None) -> None: - """Registers the given configurations and scores. + """Registers the given configs and scores. Parameters ---------- - configurations : pd.DataFrame - Dataframe of configurations / parameters. The columns are parameter names and the rows are the configurations. + configs : pd.DataFrame + Dataframe of configs / parameters. The columns are parameter names and the rows are the configs. scores : pd.DataFrame - Scores from running the configurations. The index is the same as the index of the configurations. + Scores from running the configs. The index is the same as the index of the configs. context : pd.DataFrame Not Yet Implemented. @@ -170,16 +170,16 @@ def _suggest(self, *, context: Optional[pd.DataFrame] = None) -> pd.DataFrame: pass # pylint: disable=unnecessary-pass # pragma: no cover @abstractmethod - def register_pending(self, *, configurations: pd.DataFrame, + def register_pending(self, *, configs: pd.DataFrame, context: Optional[pd.DataFrame] = None) -> None: - """Registers the given configurations as "pending". + """Registers the given configs as "pending". That is it say, it has been suggested by the optimizer, and an experiment trial has been started. This can be useful for executing multiple trials in parallel, retry logic, etc. Parameters ---------- - configurations : pd.DataFrame - Dataframe of configurations / parameters. The columns are parameter names and the rows are the configurations. + configs : pd.DataFrame + Dataframe of configs / parameters. The columns are parameter names and the rows are the configs. context : pd.DataFrame Not Yet Implemented. """ diff --git a/mlos_core/mlos_core/optimizers/random_optimizer.py b/mlos_core/mlos_core/optimizers/random_optimizer.py index 00c3d1e44f..8893b456ac 100644 --- a/mlos_core/mlos_core/optimizers/random_optimizer.py +++ b/mlos_core/mlos_core/optimizers/random_optimizer.py @@ -24,19 +24,19 @@ class RandomOptimizer(BaseOptimizer): The parameter space to optimize. """ - def _register(self, *, configurations: pd.DataFrame, scores: pd.DataFrame, + def _register(self, *, configs: pd.DataFrame, scores: pd.DataFrame, context: Optional[pd.DataFrame] = None) -> None: - """Registers the given configurations and scores. + """Registers the given configs and scores. - Doesn't do anything on the RandomOptimizer except storing configurations for logging. + Doesn't do anything on the RandomOptimizer except storing configs for logging. Parameters ---------- - configurations : pd.DataFrame - Dataframe of configurations / parameters. The columns are parameter names and the rows are the configurations. + configs : pd.DataFrame + Dataframe of configs / parameters. The columns are parameter names and the rows are the configs. scores : pd.DataFrame - Scores from running the configurations. The index is the same as the index of the configurations. + Scores from running the configs. The index is the same as the index of the configs. context : None Not Yet Implemented. @@ -65,7 +65,7 @@ def _suggest(self, *, context: Optional[pd.DataFrame] = None) -> pd.DataFrame: warn(f"Not Implemented: Ignoring context {list(context.columns)}", UserWarning) return pd.DataFrame(dict(self.optimizer_parameter_space.sample_configuration()), index=[0]) - def register_pending(self, *, configurations: pd.DataFrame, + def register_pending(self, *, configs: pd.DataFrame, context: Optional[pd.DataFrame] = None) -> None: raise NotImplementedError() - # self._pending_observations.append((configurations, context)) + # self._pending_observations.append((configs, context)) diff --git a/mlos_core/mlos_core/tests/optimizers/bayesian_optimizers_test.py b/mlos_core/mlos_core/tests/optimizers/bayesian_optimizers_test.py index 7916296425..037e85ef73 100644 --- a/mlos_core/mlos_core/tests/optimizers/bayesian_optimizers_test.py +++ b/mlos_core/mlos_core/tests/optimizers/bayesian_optimizers_test.py @@ -39,11 +39,11 @@ def test_context_not_implemented_warning(configuration_space: CS.ConfigurationSp context = pd.DataFrame([["something"]]) with pytest.raises(UserWarning): - optimizer.register(configurations=suggestion, scores=scores, context=context) + optimizer.register(configs=suggestion, scores=scores, context=context) with pytest.raises(UserWarning): optimizer.suggest(context=context) if isinstance(optimizer, BaseBayesianOptimizer): with pytest.raises(UserWarning): - optimizer.surrogate_predict(configurations=suggestion, context=context) + optimizer.surrogate_predict(configs=suggestion, context=context) diff --git a/mlos_core/mlos_core/tests/optimizers/optimizer_multiobj_test.py b/mlos_core/mlos_core/tests/optimizers/optimizer_multiobj_test.py index 4cf57c2719..e3c053fa5b 100644 --- a/mlos_core/mlos_core/tests/optimizers/optimizer_multiobj_test.py +++ b/mlos_core/mlos_core/tests/optimizers/optimizer_multiobj_test.py @@ -99,7 +99,7 @@ def objective(point: pd.DataFrame) -> pd.DataFrame: observation = objective(suggestion) assert isinstance(observation, pd.DataFrame) assert set(observation.columns) == {'main_score', 'other_score'} - optimizer.register(configurations=suggestion, scores=observation) + optimizer.register(configs=suggestion, scores=observation) (best_config, best_score, best_context) = optimizer.get_best_observations() assert isinstance(best_config, pd.DataFrame) diff --git a/mlos_core/mlos_core/tests/optimizers/optimizer_test.py b/mlos_core/mlos_core/tests/optimizers/optimizer_test.py index 678c8f4219..49ff691635 100644 --- a/mlos_core/mlos_core/tests/optimizers/optimizer_test.py +++ b/mlos_core/mlos_core/tests/optimizers/optimizer_test.py @@ -56,7 +56,7 @@ def test_create_optimizer_and_suggest(configuration_space: CS.ConfigurationSpace # pending not implemented with pytest.raises(NotImplementedError): - optimizer.register_pending(configurations=suggestion) + optimizer.register_pending(configs=suggestion) @pytest.mark.parametrize(('optimizer_class', 'kwargs'), [ @@ -103,7 +103,7 @@ def objective(x: pd.Series) -> pd.DataFrame: configuration.is_valid_configuration() observation = objective(suggestion['x']) assert isinstance(observation, pd.DataFrame) - optimizer.register(configurations=suggestion, scores=observation) + optimizer.register(configs=suggestion, scores=observation) (best_config, best_score, best_context) = optimizer.get_best_observations() assert isinstance(best_config, pd.DataFrame) @@ -126,10 +126,10 @@ def objective(x: pd.Series) -> pd.DataFrame: # It would be better to put this into bayesian_optimizer_test but then we'd have to refit the model if isinstance(optimizer, BaseBayesianOptimizer): - pred_best = optimizer.surrogate_predict(configurations=best_config) + pred_best = optimizer.surrogate_predict(configs=best_config) assert pred_best.shape == (1,) - pred_all = optimizer.surrogate_predict(configurations=all_configs) + pred_all = optimizer.surrogate_predict(configs=all_configs) assert pred_all.shape == (20,) @@ -270,14 +270,14 @@ def objective(point: pd.DataFrame) -> pd.DataFrame: # loop for optimizer suggestion = optimizer.suggest() observation = objective(suggestion) - optimizer.register(configurations=suggestion, scores=observation) + optimizer.register(configs=suggestion, scores=observation) # loop for llamatune-optimizer suggestion = llamatune_optimizer.suggest() _x, _y = suggestion['x'].iloc[0], suggestion['y'].iloc[0] assert _x == pytest.approx(_y, rel=1e-3) or _x + _y == pytest.approx(3., rel=1e-3) # optimizer explores 1-dimensional space observation = objective(suggestion) - llamatune_optimizer.register(configurations=suggestion, scores=observation) + llamatune_optimizer.register(configs=suggestion, scores=observation) # Retrieve best observations best_observation = optimizer.get_best_observations() @@ -311,7 +311,7 @@ def objective(point: pd.DataFrame) -> pd.DataFrame: # .surrogate_predict method not currently implemented if space adapter is employed if isinstance(llamatune_optimizer, BaseBayesianOptimizer): with pytest.raises(NotImplementedError): - llamatune_optimizer.surrogate_predict(configurations=llamatune_best_config) + llamatune_optimizer.surrogate_predict(configs=llamatune_best_config) # Dynamically determine all of the optimizers we have implemented. @@ -388,7 +388,7 @@ def objective(point: pd.DataFrame) -> pd.DataFrame: # Test registering the suggested configuration with a score. observation = objective(suggestion) assert isinstance(observation, pd.DataFrame) - optimizer.register(configurations=suggestion, scores=observation) + optimizer.register(configs=suggestion, scores=observation) (best_config, best_score, best_context) = optimizer.get_best_observations() assert isinstance(best_config, pd.DataFrame) From 47b4d6cc0f044c95d37f2908935ae0d2be42502b Mon Sep 17 00:00:00 2001 From: Johannes Freischuetz Date: Fri, 28 Jun 2024 17:43:15 +0000 Subject: [PATCH 09/17] CI trigger From 14080444878f546752cdf4255c2a4ed6d072ab7a Mon Sep 17 00:00:00 2001 From: Johannes Freischuetz Date: Fri, 28 Jun 2024 20:48:23 +0000 Subject: [PATCH 10/17] update typing --- .../optimizers/bayesian_optimizers/smac_optimizer.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mlos_core/mlos_core/optimizers/bayesian_optimizers/smac_optimizer.py b/mlos_core/mlos_core/optimizers/bayesian_optimizers/smac_optimizer.py index 4f7822a842..b34943f946 100644 --- a/mlos_core/mlos_core/optimizers/bayesian_optimizers/smac_optimizer.py +++ b/mlos_core/mlos_core/optimizers/bayesian_optimizers/smac_optimizer.py @@ -318,8 +318,8 @@ def surrogate_predict(self, *, configs: pd.DataFrame, context: Optional[pd.DataF if self.base_optimizer._config_selector._model is None: raise RuntimeError('Surrogate model is not yet trained') - configs: npt.NDArray = convert_configurations_to_array(self._to_configspace_configs(configs=configs)) - mean_predictions, _ = self.base_optimizer._config_selector._model.predict(configs) + config_array: npt.NDArray = convert_configurations_to_array(self._to_configspace_configs(configs=configs)) + mean_predictions, _ = self.base_optimizer._config_selector._model.predict(config_array) return mean_predictions.reshape(-1,) def acquisition_function(self, *, configs: pd.DataFrame, context: Optional[pd.DataFrame] = None) -> npt.NDArray: From dd4b534171f26785d2ce0609d31896934c164516 Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Fri, 28 Jun 2024 21:00:55 +0000 Subject: [PATCH 11/17] mypy fixup --- .../optimizers/bayesian_optimizers/smac_optimizer.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mlos_core/mlos_core/optimizers/bayesian_optimizers/smac_optimizer.py b/mlos_core/mlos_core/optimizers/bayesian_optimizers/smac_optimizer.py index b34943f946..43803b7dbb 100644 --- a/mlos_core/mlos_core/optimizers/bayesian_optimizers/smac_optimizer.py +++ b/mlos_core/mlos_core/optimizers/bayesian_optimizers/smac_optimizer.py @@ -332,8 +332,8 @@ def acquisition_function(self, *, configs: pd.DataFrame, context: Optional[pd.Da if self.base_optimizer._config_selector._acquisition_function is None: raise RuntimeError('Acquisition function is not yet initialized') - configs: list = self._to_configspace_configs(configs=configs) - return self.base_optimizer._config_selector._acquisition_function(configs).reshape(-1,) + cs_configs: list = self._to_configspace_configs(configs=configs) + return self.base_optimizer._config_selector._acquisition_function(cs_configs).reshape(-1,) def cleanup(self) -> None: if self._temp_output_directory is not None: From e0c170420494c6aa30ad19542221a3fde57d5508 Mon Sep 17 00:00:00 2001 From: Johannes Freischuetz Date: Mon, 1 Jul 2024 18:49:56 +0000 Subject: [PATCH 12/17] introduce metadata --- .../optimizers/mlos_core_optimizer.py | 2 +- .../bayesian_optimizers/smac_optimizer.py | 16 +++++++++----- .../mlos_core/optimizers/flaml_optimizer.py | 19 ++++++++++++----- mlos_core/mlos_core/optimizers/optimizer.py | 15 ++++++------- .../mlos_core/optimizers/random_optimizer.py | 18 +++++++++++----- .../optimizers/bayesian_optimizers_test.py | 2 +- .../optimizers/optimizer_multiobj_test.py | 3 ++- .../tests/optimizers/optimizer_test.py | 21 ++++++++++--------- 8 files changed, 61 insertions(+), 35 deletions(-) diff --git a/mlos_bench/mlos_bench/optimizers/mlos_core_optimizer.py b/mlos_bench/mlos_bench/optimizers/mlos_core_optimizer.py index 8e7c75a0d5..b17242f4d0 100644 --- a/mlos_bench/mlos_bench/optimizers/mlos_core_optimizer.py +++ b/mlos_bench/mlos_bench/optimizers/mlos_core_optimizer.py @@ -180,7 +180,7 @@ def suggest(self) -> TunableGroups: tunables = super().suggest() if self._start_with_defaults: _LOG.info("Use default values for the first trial") - df_config = self._opt.suggest(defaults=self._start_with_defaults) + df_config, _ = self._opt.suggest(defaults=self._start_with_defaults) self._start_with_defaults = False _LOG.info("Iteration %d :: Suggest:\n%s", self._iter, df_config) return tunables.assign( diff --git a/mlos_core/mlos_core/optimizers/bayesian_optimizers/smac_optimizer.py b/mlos_core/mlos_core/optimizers/bayesian_optimizers/smac_optimizer.py index 43803b7dbb..67b4b2a4fc 100644 --- a/mlos_core/mlos_core/optimizers/bayesian_optimizers/smac_optimizer.py +++ b/mlos_core/mlos_core/optimizers/bayesian_optimizers/smac_optimizer.py @@ -9,7 +9,7 @@ from logging import warning from pathlib import Path -from typing import Dict, List, Optional, Union, TYPE_CHECKING +from typing import Dict, List, Optional, Tuple, Union, TYPE_CHECKING from tempfile import TemporaryDirectory from warnings import warn @@ -242,7 +242,7 @@ def _dummy_target_func(config: ConfigSpace.Configuration, seed: int = 0) -> None raise RuntimeError('This function should never be called.') def _register(self, *, configs: pd.DataFrame, - scores: pd.DataFrame, context: Optional[pd.DataFrame] = None) -> None: + scores: pd.DataFrame, context: Optional[pd.DataFrame] = None, metadata: Optional[pd.DataFrame] = None) -> None: """Registers the given configs and scores. Parameters @@ -255,6 +255,9 @@ def _register(self, *, configs: pd.DataFrame, context : pd.DataFrame Not Yet Implemented. + + metadata: pd.DataFrame + Not Yet Implemented. """ from smac.runhistory import StatusType, TrialInfo, TrialValue # pylint: disable=import-outside-toplevel @@ -272,7 +275,7 @@ def _register(self, *, configs: pd.DataFrame, # Save optimizer once we register all configs self.base_optimizer.optimizer.save() - def _suggest(self, *, context: Optional[pd.DataFrame] = None) -> pd.DataFrame: + def _suggest(self, *, context: Optional[pd.DataFrame] = None) -> Tuple[pd.DataFrame, Optional[pd.DataFrame]]: """Suggests a new configuration. Parameters @@ -284,6 +287,9 @@ def _suggest(self, *, context: Optional[pd.DataFrame] = None) -> pd.DataFrame: ------- configuration : pd.DataFrame Pandas dataframe with a single row. Column names are the parameter names. + + metadata : Optional[pd.DataFrame] + Not yet implemented. """ if TYPE_CHECKING: from smac.runhistory import TrialInfo # pylint: disable=import-outside-toplevel,unused-import @@ -297,9 +303,9 @@ def _suggest(self, *, context: Optional[pd.DataFrame] = None) -> pd.DataFrame: assert trial.config.config_space == self.optimizer_parameter_space self.trial_info_map[trial.config] = trial config_df = pd.DataFrame([trial.config], columns=list(self.optimizer_parameter_space.keys())) - return config_df + return config_df, None - def register_pending(self, *, configs: pd.DataFrame, context: Optional[pd.DataFrame] = None) -> None: + def register_pending(self, *, configs: pd.DataFrame, context: Optional[pd.DataFrame] = None, metadata: Optional[pd.DataFrame] = None) -> None: raise NotImplementedError() def surrogate_predict(self, *, configs: pd.DataFrame, context: Optional[pd.DataFrame] = None) -> npt.NDArray: diff --git a/mlos_core/mlos_core/optimizers/flaml_optimizer.py b/mlos_core/mlos_core/optimizers/flaml_optimizer.py index a58e74af02..4f478db2bf 100644 --- a/mlos_core/mlos_core/optimizers/flaml_optimizer.py +++ b/mlos_core/mlos_core/optimizers/flaml_optimizer.py @@ -6,7 +6,7 @@ Contains the FlamlOptimizer class. """ -from typing import Dict, List, NamedTuple, Optional, Union +from typing import Dict, List, NamedTuple, Optional, Tuple, Union from warnings import warn import ConfigSpace @@ -86,7 +86,7 @@ def __init__(self, *, # pylint: disable=too-many-arguments self._suggested_config: Optional[dict] def _register(self, *, configs: pd.DataFrame, scores: pd.DataFrame, - context: Optional[pd.DataFrame] = None) -> None: + context: Optional[pd.DataFrame] = None, metadata: Optional[pd.DataFrame] = None) -> None: """Registers the given configs and scores. Parameters @@ -99,9 +99,15 @@ def _register(self, *, configs: pd.DataFrame, scores: pd.DataFrame, context : None Not Yet Implemented. + + metadata : None + Not Yet Implemented. """ if context is not None: warn(f"Not Implemented: Ignoring context {list(context.columns)}", UserWarning) + if metadata is not None: + warn(f"Not Implemented: Ignoring metadata {list(metadata.columns)}", UserWarning) + for (_, config), (_, score) in zip(configs.astype('O').iterrows(), scores.iterrows()): cs_config: ConfigSpace.Configuration = ConfigSpace.Configuration( self.optimizer_parameter_space, values=config.to_dict()) @@ -112,7 +118,7 @@ def _register(self, *, configs: pd.DataFrame, scores: pd.DataFrame, score=float(np.average(score.astype(float), weights=self._objective_weights)), ) - def _suggest(self, *, context: Optional[pd.DataFrame] = None) -> pd.DataFrame: + def _suggest(self, *, context: Optional[pd.DataFrame] = None) -> Tuple[pd.DataFrame, Optional[pd.DataFrame]]: """Suggests a new configuration. Sampled at random using ConfigSpace. @@ -126,14 +132,17 @@ def _suggest(self, *, context: Optional[pd.DataFrame] = None) -> pd.DataFrame: ------- configuration : pd.DataFrame Pandas dataframe with a single row. Column names are the parameter names. + + metadata : None + Not implemented. """ if context is not None: warn(f"Not Implemented: Ignoring context {list(context.columns)}", UserWarning) config: dict = self._get_next_config() - return pd.DataFrame(config, index=[0]) + return pd.DataFrame(config, index=[0]), None def register_pending(self, *, configs: pd.DataFrame, - context: Optional[pd.DataFrame] = None) -> None: + context: Optional[pd.DataFrame] = None, metadata: Optional[pd.DataFrame] = None) -> None: raise NotImplementedError() def _target_function(self, config: dict) -> Union[dict, None]: diff --git a/mlos_core/mlos_core/optimizers/optimizer.py b/mlos_core/mlos_core/optimizers/optimizer.py index a72a4e1eb8..d74f19d2ef 100644 --- a/mlos_core/mlos_core/optimizers/optimizer.py +++ b/mlos_core/mlos_core/optimizers/optimizer.py @@ -69,7 +69,7 @@ def space_adapter(self) -> Optional[BaseSpaceAdapter]: return self._space_adapter def register(self, *, configs: pd.DataFrame, scores: pd.DataFrame, - context: Optional[pd.DataFrame] = None) -> None: + context: Optional[pd.DataFrame] = None, metadata: Optional[pd.DataFrame] = None) -> None: """Wrapper method, which employs the space adapter (if any), before registering the configs and scores. Parameters @@ -105,7 +105,7 @@ def register(self, *, configs: pd.DataFrame, scores: pd.DataFrame, @abstractmethod def _register(self, *, configs: pd.DataFrame, scores: pd.DataFrame, - context: Optional[pd.DataFrame] = None) -> None: + context: Optional[pd.DataFrame] = None, metadata: Optional[pd.DataFrame] = None) -> None: """Registers the given configs and scores. Parameters @@ -120,7 +120,7 @@ def _register(self, *, configs: pd.DataFrame, scores: pd.DataFrame, """ pass # pylint: disable=unnecessary-pass # pragma: no cover - def suggest(self, *, context: Optional[pd.DataFrame] = None, defaults: bool = False) -> pd.DataFrame: + def suggest(self, *, context: Optional[pd.DataFrame] = None, defaults: bool = False) -> Tuple[pd.DataFrame, Optional[pd.DataFrame]]: """ Wrapper method, which employs the space adapter (if any), after suggesting a new configuration. @@ -139,10 +139,11 @@ def suggest(self, *, context: Optional[pd.DataFrame] = None, defaults: bool = Fa """ if defaults: configuration = config_to_dataframe(self.parameter_space.get_default_configuration()) + metadata = None if self.space_adapter is not None: configuration = self.space_adapter.inverse_transform(configuration) else: - configuration = self._suggest(context=context) + configuration, metadata = self._suggest(context=context) assert len(configuration) == 1, \ "Suggest must return a single configuration." assert set(configuration.columns).issubset(set(self.optimizer_parameter_space)), \ @@ -151,10 +152,10 @@ def suggest(self, *, context: Optional[pd.DataFrame] = None, defaults: bool = Fa configuration = self._space_adapter.transform(configuration) assert set(configuration.columns).issubset(set(self.parameter_space)), \ "Space adapter produced a configuration that does not match the expected parameter space." - return configuration + return configuration, metadata @abstractmethod - def _suggest(self, *, context: Optional[pd.DataFrame] = None) -> pd.DataFrame: + def _suggest(self, *, context: Optional[pd.DataFrame] = None) -> Tuple[pd.DataFrame, Optional[pd.DataFrame]]: """Suggests a new configuration. Parameters @@ -171,7 +172,7 @@ def _suggest(self, *, context: Optional[pd.DataFrame] = None) -> pd.DataFrame: @abstractmethod def register_pending(self, *, configs: pd.DataFrame, - context: Optional[pd.DataFrame] = None) -> None: + context: Optional[pd.DataFrame] = None, metadata: Optional[pd.DataFrame] = None) -> None: """Registers the given configs as "pending". That is it say, it has been suggested by the optimizer, and an experiment trial has been started. This can be useful for executing multiple trials in parallel, retry logic, etc. diff --git a/mlos_core/mlos_core/optimizers/random_optimizer.py b/mlos_core/mlos_core/optimizers/random_optimizer.py index 8893b456ac..0af785ef20 100644 --- a/mlos_core/mlos_core/optimizers/random_optimizer.py +++ b/mlos_core/mlos_core/optimizers/random_optimizer.py @@ -6,7 +6,7 @@ Contains the RandomOptimizer class. """ -from typing import Optional +from typing import Optional, Tuple from warnings import warn import pandas as pd @@ -25,7 +25,7 @@ class RandomOptimizer(BaseOptimizer): """ def _register(self, *, configs: pd.DataFrame, scores: pd.DataFrame, - context: Optional[pd.DataFrame] = None) -> None: + context: Optional[pd.DataFrame] = None, metadata: Optional[pd.DataFrame] = None) -> None: """Registers the given configs and scores. Doesn't do anything on the RandomOptimizer except storing configs for logging. @@ -40,12 +40,17 @@ def _register(self, *, configs: pd.DataFrame, scores: pd.DataFrame, context : None Not Yet Implemented. + + metadata : None + Not Yet Implemented. """ if context is not None: warn(f"Not Implemented: Ignoring context {list(context.columns)}", UserWarning) + if metadata is not None: + warn(f"Not Implemented: Ignoring context {list(metadata.columns)}", UserWarning) # should we pop them from self.pending_observations? - def _suggest(self, *, context: Optional[pd.DataFrame] = None) -> pd.DataFrame: + def _suggest(self, *, context: Optional[pd.DataFrame] = None) -> Tuple[pd.DataFrame, Optional[pd.DataFrame]]: """Suggests a new configuration. Sampled at random using ConfigSpace. @@ -59,13 +64,16 @@ def _suggest(self, *, context: Optional[pd.DataFrame] = None) -> pd.DataFrame: ------- configuration : pd.DataFrame Pandas dataframe with a single row. Column names are the parameter names. + + metadata : None + Not implemented. """ if context is not None: # not sure how that works here? warn(f"Not Implemented: Ignoring context {list(context.columns)}", UserWarning) - return pd.DataFrame(dict(self.optimizer_parameter_space.sample_configuration()), index=[0]) + return pd.DataFrame(dict(self.optimizer_parameter_space.sample_configuration()), index=[0]), None def register_pending(self, *, configs: pd.DataFrame, - context: Optional[pd.DataFrame] = None) -> None: + context: Optional[pd.DataFrame] = None, metadata: Optional[pd.DataFrame] = None) -> None: raise NotImplementedError() # self._pending_observations.append((configs, context)) diff --git a/mlos_core/mlos_core/tests/optimizers/bayesian_optimizers_test.py b/mlos_core/mlos_core/tests/optimizers/bayesian_optimizers_test.py index 037e85ef73..dc5a41002b 100644 --- a/mlos_core/mlos_core/tests/optimizers/bayesian_optimizers_test.py +++ b/mlos_core/mlos_core/tests/optimizers/bayesian_optimizers_test.py @@ -34,7 +34,7 @@ def test_context_not_implemented_warning(configuration_space: CS.ConfigurationSp optimization_targets=['score'], **kwargs ) - suggestion = optimizer.suggest() + suggestion, _ = optimizer.suggest() scores = pd.DataFrame({'score': [1]}) context = pd.DataFrame([["something"]]) diff --git a/mlos_core/mlos_core/tests/optimizers/optimizer_multiobj_test.py b/mlos_core/mlos_core/tests/optimizers/optimizer_multiobj_test.py index e3c053fa5b..22263b4c1d 100644 --- a/mlos_core/mlos_core/tests/optimizers/optimizer_multiobj_test.py +++ b/mlos_core/mlos_core/tests/optimizers/optimizer_multiobj_test.py @@ -84,8 +84,9 @@ def objective(point: pd.DataFrame) -> pd.DataFrame: optimizer.get_observations() for _ in range(max_iterations): - suggestion = optimizer.suggest() + suggestion, metadata = optimizer.suggest() assert isinstance(suggestion, pd.DataFrame) + assert metadata is None or isinstance(metadata, pd.DataFrame) assert set(suggestion.columns) == {'x', 'y'} # Check suggestion values are the expected dtype assert isinstance(suggestion.x.iloc[0], np.integer) diff --git a/mlos_core/mlos_core/tests/optimizers/optimizer_test.py b/mlos_core/mlos_core/tests/optimizers/optimizer_test.py index 49ff691635..8231e59feb 100644 --- a/mlos_core/mlos_core/tests/optimizers/optimizer_test.py +++ b/mlos_core/mlos_core/tests/optimizers/optimizer_test.py @@ -48,7 +48,7 @@ def test_create_optimizer_and_suggest(configuration_space: CS.ConfigurationSpace assert optimizer.parameter_space is not None - suggestion = optimizer.suggest() + suggestion, metadata = optimizer.suggest() assert suggestion is not None myrepr = repr(optimizer) @@ -56,7 +56,7 @@ def test_create_optimizer_and_suggest(configuration_space: CS.ConfigurationSpace # pending not implemented with pytest.raises(NotImplementedError): - optimizer.register_pending(configs=suggestion) + optimizer.register_pending(configs=suggestion, metadata=metadata) @pytest.mark.parametrize(('optimizer_class', 'kwargs'), [ @@ -94,8 +94,9 @@ def objective(x: pd.Series) -> pd.DataFrame: optimizer.get_observations() for _ in range(max_iterations): - suggestion = optimizer.suggest() + suggestion, metadata = optimizer.suggest() assert isinstance(suggestion, pd.DataFrame) + assert metadata is None or isinstance(metadata, pd.DataFrame) assert set(suggestion.columns) == {'x', 'y', 'z'} # check that suggestion is in the space configuration = CS.Configuration(optimizer.parameter_space, suggestion.iloc[0].to_dict()) @@ -103,7 +104,7 @@ def objective(x: pd.Series) -> pd.DataFrame: configuration.is_valid_configuration() observation = objective(suggestion['x']) assert isinstance(observation, pd.DataFrame) - optimizer.register(configs=suggestion, scores=observation) + optimizer.register(configs=suggestion, scores=observation, metadata=metadata) (best_config, best_score, best_context) = optimizer.get_best_observations() assert isinstance(best_config, pd.DataFrame) @@ -268,16 +269,16 @@ def objective(point: pd.DataFrame) -> pd.DataFrame: _LOG.debug("Optimizer is done with random init.") # loop for optimizer - suggestion = optimizer.suggest() + suggestion, metadata = optimizer.suggest() observation = objective(suggestion) - optimizer.register(configs=suggestion, scores=observation) + optimizer.register(configs=suggestion, scores=observation, metadata=metadata) # loop for llamatune-optimizer - suggestion = llamatune_optimizer.suggest() + suggestion, metadata = llamatune_optimizer.suggest() _x, _y = suggestion['x'].iloc[0], suggestion['y'].iloc[0] assert _x == pytest.approx(_y, rel=1e-3) or _x + _y == pytest.approx(3., rel=1e-3) # optimizer explores 1-dimensional space observation = objective(suggestion) - llamatune_optimizer.register(configs=suggestion, scores=observation) + llamatune_optimizer.register(configs=suggestion, scores=observation, metadata=metadata) # Retrieve best observations best_observation = optimizer.get_best_observations() @@ -375,7 +376,7 @@ def objective(point: pd.DataFrame) -> pd.DataFrame: optimizer.get_observations() for _ in range(max_iterations): - suggestion = optimizer.suggest() + suggestion, metadata = optimizer.suggest() assert isinstance(suggestion, pd.DataFrame) assert (suggestion.columns == ['x', 'y']).all() # Check suggestion values are the expected dtype @@ -388,7 +389,7 @@ def objective(point: pd.DataFrame) -> pd.DataFrame: # Test registering the suggested configuration with a score. observation = objective(suggestion) assert isinstance(observation, pd.DataFrame) - optimizer.register(configs=suggestion, scores=observation) + optimizer.register(configs=suggestion, scores=observation, metadata=metadata) (best_config, best_score, best_context) = optimizer.get_best_observations() assert isinstance(best_config, pd.DataFrame) From 72e780ff62436ff5108527c8831b410a0b0dcc72 Mon Sep 17 00:00:00 2001 From: Johannes Freischuetz Date: Mon, 1 Jul 2024 14:31:39 -0500 Subject: [PATCH 13/17] Update mlos_bench/mlos_bench/optimizers/mlos_core_optimizer.py Co-authored-by: Brian Kroth --- mlos_bench/mlos_bench/optimizers/mlos_core_optimizer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mlos_bench/mlos_bench/optimizers/mlos_core_optimizer.py b/mlos_bench/mlos_bench/optimizers/mlos_core_optimizer.py index b17242f4d0..e0235f76b9 100644 --- a/mlos_bench/mlos_bench/optimizers/mlos_core_optimizer.py +++ b/mlos_bench/mlos_bench/optimizers/mlos_core_optimizer.py @@ -180,7 +180,7 @@ def suggest(self) -> TunableGroups: tunables = super().suggest() if self._start_with_defaults: _LOG.info("Use default values for the first trial") - df_config, _ = self._opt.suggest(defaults=self._start_with_defaults) + df_config, _metadata = self._opt.suggest(defaults=self._start_with_defaults) self._start_with_defaults = False _LOG.info("Iteration %d :: Suggest:\n%s", self._iter, df_config) return tunables.assign( From 5b7b005229bbcf95db6b4976884100d74ef7ff6c Mon Sep 17 00:00:00 2001 From: Johannes Freischuetz Date: Mon, 1 Jul 2024 14:31:46 -0500 Subject: [PATCH 14/17] Update mlos_core/mlos_core/tests/optimizers/bayesian_optimizers_test.py Co-authored-by: Brian Kroth --- .../mlos_core/tests/optimizers/bayesian_optimizers_test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mlos_core/mlos_core/tests/optimizers/bayesian_optimizers_test.py b/mlos_core/mlos_core/tests/optimizers/bayesian_optimizers_test.py index dc5a41002b..c1aaa710ac 100644 --- a/mlos_core/mlos_core/tests/optimizers/bayesian_optimizers_test.py +++ b/mlos_core/mlos_core/tests/optimizers/bayesian_optimizers_test.py @@ -34,7 +34,7 @@ def test_context_not_implemented_warning(configuration_space: CS.ConfigurationSp optimization_targets=['score'], **kwargs ) - suggestion, _ = optimizer.suggest() + suggestion, _metadata = optimizer.suggest() scores = pd.DataFrame({'score': [1]}) context = pd.DataFrame([["something"]]) From 2d9f5e8d4b14941d23a3b866000ba684f30a1d96 Mon Sep 17 00:00:00 2001 From: Johannes Freischuetz Date: Mon, 1 Jul 2024 20:06:24 +0000 Subject: [PATCH 15/17] fix formatting --- .../optimizers/bayesian_optimizers/smac_optimizer.py | 4 +++- mlos_core/mlos_core/optimizers/optimizer.py | 3 ++- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/mlos_core/mlos_core/optimizers/bayesian_optimizers/smac_optimizer.py b/mlos_core/mlos_core/optimizers/bayesian_optimizers/smac_optimizer.py index 580e25f7e8..aa948b8125 100644 --- a/mlos_core/mlos_core/optimizers/bayesian_optimizers/smac_optimizer.py +++ b/mlos_core/mlos_core/optimizers/bayesian_optimizers/smac_optimizer.py @@ -305,7 +305,9 @@ def _suggest(self, *, context: Optional[pd.DataFrame] = None) -> Tuple[pd.DataFr config_df = pd.DataFrame([trial.config], columns=list(self.optimizer_parameter_space.keys())) return config_df, None - def register_pending(self, *, configs: pd.DataFrame, context: Optional[pd.DataFrame] = None, metadata: Optional[pd.DataFrame] = None) -> None: + def register_pending(self, *, configs: pd.DataFrame, + context: Optional[pd.DataFrame] = None, + metadata: Optional[pd.DataFrame] = None) -> None: raise NotImplementedError() def surrogate_predict(self, *, configs: pd.DataFrame, context: Optional[pd.DataFrame] = None) -> npt.NDArray: diff --git a/mlos_core/mlos_core/optimizers/optimizer.py b/mlos_core/mlos_core/optimizers/optimizer.py index d74f19d2ef..c6a8ab66da 100644 --- a/mlos_core/mlos_core/optimizers/optimizer.py +++ b/mlos_core/mlos_core/optimizers/optimizer.py @@ -172,7 +172,8 @@ def _suggest(self, *, context: Optional[pd.DataFrame] = None) -> Tuple[pd.DataFr @abstractmethod def register_pending(self, *, configs: pd.DataFrame, - context: Optional[pd.DataFrame] = None, metadata: Optional[pd.DataFrame] = None) -> None: + context: Optional[pd.DataFrame] = None, + metadata: Optional[pd.DataFrame] = None) -> None: """Registers the given configs as "pending". That is it say, it has been suggested by the optimizer, and an experiment trial has been started. This can be useful for executing multiple trials in parallel, retry logic, etc. From f2e2fc52ce4493548663c4907e5e2dae42bdaee3 Mon Sep 17 00:00:00 2001 From: Johannes Freischuetz Date: Mon, 1 Jul 2024 20:49:58 +0000 Subject: [PATCH 16/17] another style fix --- mlos_core/mlos_core/optimizers/optimizer.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/mlos_core/mlos_core/optimizers/optimizer.py b/mlos_core/mlos_core/optimizers/optimizer.py index c6a8ab66da..8867f11e1e 100644 --- a/mlos_core/mlos_core/optimizers/optimizer.py +++ b/mlos_core/mlos_core/optimizers/optimizer.py @@ -120,7 +120,8 @@ def _register(self, *, configs: pd.DataFrame, scores: pd.DataFrame, """ pass # pylint: disable=unnecessary-pass # pragma: no cover - def suggest(self, *, context: Optional[pd.DataFrame] = None, defaults: bool = False) -> Tuple[pd.DataFrame, Optional[pd.DataFrame]]: + def suggest(self, *, context: Optional[pd.DataFrame] = None, + defaults: bool = False) -> Tuple[pd.DataFrame, Optional[pd.DataFrame]]: """ Wrapper method, which employs the space adapter (if any), after suggesting a new configuration. From 91a678049ec3640c3fbf3b2ce910e942b6e57c2b Mon Sep 17 00:00:00 2001 From: Johannes Freischuetz Date: Mon, 1 Jul 2024 21:23:47 +0000 Subject: [PATCH 17/17] more style changes --- mlos_core/mlos_core/optimizers/optimizer.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/mlos_core/mlos_core/optimizers/optimizer.py b/mlos_core/mlos_core/optimizers/optimizer.py index 8867f11e1e..8fcf592a6c 100644 --- a/mlos_core/mlos_core/optimizers/optimizer.py +++ b/mlos_core/mlos_core/optimizers/optimizer.py @@ -81,8 +81,12 @@ def register(self, *, configs: pd.DataFrame, scores: pd.DataFrame, context : pd.DataFrame Not Yet Implemented. + + metadata : Optional[pd.DataFrame] + Not Yet Implemented. """ # Do some input validation. + assert metadata is None or isinstance(metadata, pd.DataFrame) assert set(scores.columns) == set(self._optimization_targets), \ "Mismatched optimization targets." assert self._has_context is None or self._has_context ^ (context is None), \ @@ -168,6 +172,9 @@ def _suggest(self, *, context: Optional[pd.DataFrame] = None) -> Tuple[pd.DataFr ------- configuration : pd.DataFrame Pandas dataframe with a single row. Column names are the parameter names. + + metadata : Optional[pd.DataFrame] + The metadata associated with the given configuration used for evaluations. """ pass # pylint: disable=unnecessary-pass # pragma: no cover @@ -185,6 +192,8 @@ def register_pending(self, *, configs: pd.DataFrame, Dataframe of configs / parameters. The columns are parameter names and the rows are the configs. context : pd.DataFrame Not Yet Implemented. + metadata : Optional[pd.DataFrame] + Not Yet Implemented. """ pass # pylint: disable=unnecessary-pass # pragma: no cover