From 0eafaf0312af3f2e25dcb56f1763af3d0a737185 Mon Sep 17 00:00:00 2001 From: ndhuynh Date: Mon, 5 Feb 2024 13:08:00 -0500 Subject: [PATCH 01/14] [Fix] Add fix for pandas.Dataframe.iterrows() dropping dtypes during conversion --- .../optimizers/bayesian_optimizers/smac_optimizer.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mlos_core/mlos_core/optimizers/bayesian_optimizers/smac_optimizer.py b/mlos_core/mlos_core/optimizers/bayesian_optimizers/smac_optimizer.py index 78d661be73..085a64f45b 100644 --- a/mlos_core/mlos_core/optimizers/bayesian_optimizers/smac_optimizer.py +++ b/mlos_core/mlos_core/optimizers/bayesian_optimizers/smac_optimizer.py @@ -335,6 +335,6 @@ def _to_configspace_configs(self, configurations: pd.DataFrame) -> List[ConfigSp List of ConfigSpace configurations. """ return [ - ConfigSpace.Configuration(self.optimizer_parameter_space, values=config.to_dict()) - for (_, config) in configurations.iterrows() + ConfigSpace.Configuration(self.optimizer_parameter_space, values=config._asdict()) + for config in configurations.itertuples(index=False) ] From 83953212226e88dc74831f92ad261696c763419f Mon Sep 17 00:00:00 2001 From: ndhuynh Date: Mon, 5 Feb 2024 13:54:39 -0500 Subject: [PATCH 02/14] [WIP] Add test for mixed input spaces --- .../tests/optimizers/optimizer_test.py | 59 +++++++++++++++++++ 1 file changed, 59 insertions(+) diff --git a/mlos_core/mlos_core/tests/optimizers/optimizer_test.py b/mlos_core/mlos_core/tests/optimizers/optimizer_test.py index 275a850333..a484450a15 100644 --- a/mlos_core/mlos_core/tests/optimizers/optimizer_test.py +++ b/mlos_core/mlos_core/tests/optimizers/optimizer_test.py @@ -298,3 +298,62 @@ def test_optimizer_type_defs(optimizer_class: Type[BaseOptimizer]) -> None: """ optimizer_type_classes = {member.value for member in OptimizerType} assert optimizer_class in optimizer_type_classes + + +@pytest.mark.parametrize(('optimizer_type', 'kwargs'), [ + # Enumerate all supported Optimizers + *[(member, {}) for member in OptimizerType], + # Optimizer with non-empty kwargs argument + (OptimizerType.SMAC, { + # Test with default config. + 'use_default_config': True, + # 'n_random_init': 10, + }), +]) +def test_mixed_input_space_types(optimizer_type: OptimizerType, kwargs: Optional[dict]) -> None: + """ + Toy problem to test the optimizers. + """ + max_iterations = 10 + if kwargs is None: + kwargs = {} + + def objective(point: pd.DataFrame) -> pd.Series: + # mix of hyperparameters, optimal is to select the highest possible + ret: pd.Series = point["x"] + point["y"] + return ret + + input_space = CS.ConfigurationSpace(seed=2169) + # add a mix of numeric datatypes + input_space.add_hyperparameter(CS.UniformIntegerHyperparameter(name='x', lower=0, upper=5)) + input_space.add_hyperparameter(CS.UniformFloatHyperparameter(name='y', lower=0.0, upper=5.0)) + + optimizer: BaseOptimizer = OptimizerFactory.create( + parameter_space=input_space, + optimizer_type=optimizer_type, + optimizer_kwargs=kwargs, + ) + + with pytest.raises(ValueError, match="No observations"): + optimizer.get_best_observation() + + with pytest.raises(ValueError, match="No observations"): + optimizer.get_observations() + + for _ in range(max_iterations): + suggestion = optimizer.suggest() + assert isinstance(suggestion, pd.DataFrame) + assert (suggestion.columns == ['x', 'y']).all() + # check that suggestion is in the space + configuration = CS.Configuration(optimizer.parameter_space, suggestion.iloc[0].to_dict()) + # Raises an error if outside of configuration space + configuration.is_valid_configuration() + observation = objective(suggestion) + assert isinstance(observation, pd.Series) + optimizer.register(suggestion, observation) + + best_observation = optimizer.get_best_observation() + assert isinstance(best_observation, pd.DataFrame) + + all_observations = optimizer.get_observations() + assert isinstance(all_observations, pd.DataFrame) From 71afa156e2002a353306a8e7514d1219b024445a Mon Sep 17 00:00:00 2001 From: ephoris Date: Mon, 5 Feb 2024 15:05:19 -0500 Subject: [PATCH 03/14] [WIP] Change suggestion building in pytest example Note that if you have a dataframe will all numeric types, df.iloc[0] will convert all columns to float64 because Series cannot be mixed type. --- .../tests/optimizers/optimizer_test.py | 31 +++++++++++-------- 1 file changed, 18 insertions(+), 13 deletions(-) diff --git a/mlos_core/mlos_core/tests/optimizers/optimizer_test.py b/mlos_core/mlos_core/tests/optimizers/optimizer_test.py index a484450a15..95860cf797 100644 --- a/mlos_core/mlos_core/tests/optimizers/optimizer_test.py +++ b/mlos_core/mlos_core/tests/optimizers/optimizer_test.py @@ -1,5 +1,3 @@ -# -# Copyright (c) Microsoft Corporation. # Licensed under the MIT License. # """ @@ -301,14 +299,11 @@ def test_optimizer_type_defs(optimizer_class: Type[BaseOptimizer]) -> None: @pytest.mark.parametrize(('optimizer_type', 'kwargs'), [ + # Default optimizer + (None, {}), # Enumerate all supported Optimizers *[(member, {}) for member in OptimizerType], # Optimizer with non-empty kwargs argument - (OptimizerType.SMAC, { - # Test with default config. - 'use_default_config': True, - # 'n_random_init': 10, - }), ]) def test_mixed_input_space_types(optimizer_type: OptimizerType, kwargs: Optional[dict]) -> None: """ @@ -328,11 +323,17 @@ def objective(point: pd.DataFrame) -> pd.Series: input_space.add_hyperparameter(CS.UniformIntegerHyperparameter(name='x', lower=0, upper=5)) input_space.add_hyperparameter(CS.UniformFloatHyperparameter(name='y', lower=0.0, upper=5.0)) - optimizer: BaseOptimizer = OptimizerFactory.create( - parameter_space=input_space, - optimizer_type=optimizer_type, - optimizer_kwargs=kwargs, - ) + if optimizer_type is None: + optimizer: BaseOptimizer = OptimizerFactory.create( + parameter_space=input_space, + optimizer_kwargs=kwargs, + ) + else: + optimizer: BaseOptimizer = OptimizerFactory.create( + parameter_space=input_space, + optimizer_type=optimizer_type, + optimizer_kwargs=kwargs, + ) with pytest.raises(ValueError, match="No observations"): optimizer.get_best_observation() @@ -344,8 +345,12 @@ def objective(point: pd.DataFrame) -> pd.Series: suggestion = optimizer.suggest() assert isinstance(suggestion, pd.DataFrame) assert (suggestion.columns == ['x', 'y']).all() + # Build suggestion mapping to cooperate with Configuration, note that + # doing a .iloc[0].to_dict() will cause pandas convert all numeric types + # to float64 + tmp_suggest = {'x': suggestion['x'].values[0], 'y': suggestion['y'].values[0]} # check that suggestion is in the space - configuration = CS.Configuration(optimizer.parameter_space, suggestion.iloc[0].to_dict()) + configuration = CS.Configuration(optimizer.parameter_space, tmp_suggest) # Raises an error if outside of configuration space configuration.is_valid_configuration() observation = objective(suggestion) From 9f129a5f0883c97bc9cdefbf1972e89f8f62abb3 Mon Sep 17 00:00:00 2001 From: ephoris Date: Mon, 5 Feb 2024 15:12:58 -0500 Subject: [PATCH 04/14] [Fix] Change iterrows to itertuples to preserve datatypes in numeric dataframes --- mlos_core/mlos_core/optimizers/flaml_optimizer.py | 7 ++++--- mlos_core/mlos_core/spaces/adapters/llamatune.py | 5 +++-- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/mlos_core/mlos_core/optimizers/flaml_optimizer.py b/mlos_core/mlos_core/optimizers/flaml_optimizer.py index 423c0558e2..532b3ed40b 100644 --- a/mlos_core/mlos_core/optimizers/flaml_optimizer.py +++ b/mlos_core/mlos_core/optimizers/flaml_optimizer.py @@ -86,13 +86,14 @@ def _register(self, configurations: pd.DataFrame, scores: pd.Series, """ if context is not None: raise NotImplementedError() - for (_, config), score in zip(configurations.iterrows(), scores): + for config, score in zip(configurations.itertuples(index=False), scores): + config = config._asdict() cs_config: ConfigSpace.Configuration = ConfigSpace.Configuration( - self.optimizer_parameter_space, values=config.to_dict()) + self.optimizer_parameter_space, values=config) if cs_config in self.evaluated_samples: warn(f"Configuration {config} was already registered", UserWarning) - self.evaluated_samples[cs_config] = EvaluatedSample(config=config.to_dict(), score=score) + self.evaluated_samples[cs_config] = EvaluatedSample(config=config, score=score) def _suggest(self, context: Optional[pd.DataFrame] = None) -> pd.DataFrame: """Suggests a new configuration. diff --git a/mlos_core/mlos_core/spaces/adapters/llamatune.py b/mlos_core/mlos_core/spaces/adapters/llamatune.py index a49e000a87..06ba7bd35c 100644 --- a/mlos_core/mlos_core/spaces/adapters/llamatune.py +++ b/mlos_core/mlos_core/spaces/adapters/llamatune.py @@ -89,8 +89,9 @@ def target_parameter_space(self) -> ConfigSpace.ConfigurationSpace: def inverse_transform(self, configurations: pd.DataFrame) -> pd.DataFrame: target_configurations = [] - for (_, config) in configurations.iterrows(): - configuration = ConfigSpace.Configuration(self.orig_parameter_space, values=config.to_dict()) + for config in configurations.itertuples(index=False): + config = config._asdict() + configuration = ConfigSpace.Configuration(self.orig_parameter_space, values=config) target_config = self._suggested_configs.get(configuration, None) # NOTE: HeSBO is a non-linear projection method, and does not inherently support inverse projection From 55bf6b3c545764a017a106d5fdd598b8ce899f30 Mon Sep 17 00:00:00 2001 From: ephoris Date: Mon, 5 Feb 2024 15:28:03 -0500 Subject: [PATCH 05/14] Revert "[WIP] Change suggestion building in pytest example" This reverts commit 6c3a4825230d3609d1a1906d786d6e0be8cb82d8. --- .../tests/optimizers/optimizer_test.py | 31 ++++++++----------- 1 file changed, 13 insertions(+), 18 deletions(-) diff --git a/mlos_core/mlos_core/tests/optimizers/optimizer_test.py b/mlos_core/mlos_core/tests/optimizers/optimizer_test.py index 95860cf797..a484450a15 100644 --- a/mlos_core/mlos_core/tests/optimizers/optimizer_test.py +++ b/mlos_core/mlos_core/tests/optimizers/optimizer_test.py @@ -1,3 +1,5 @@ +# +# Copyright (c) Microsoft Corporation. # Licensed under the MIT License. # """ @@ -299,11 +301,14 @@ def test_optimizer_type_defs(optimizer_class: Type[BaseOptimizer]) -> None: @pytest.mark.parametrize(('optimizer_type', 'kwargs'), [ - # Default optimizer - (None, {}), # Enumerate all supported Optimizers *[(member, {}) for member in OptimizerType], # Optimizer with non-empty kwargs argument + (OptimizerType.SMAC, { + # Test with default config. + 'use_default_config': True, + # 'n_random_init': 10, + }), ]) def test_mixed_input_space_types(optimizer_type: OptimizerType, kwargs: Optional[dict]) -> None: """ @@ -323,17 +328,11 @@ def objective(point: pd.DataFrame) -> pd.Series: input_space.add_hyperparameter(CS.UniformIntegerHyperparameter(name='x', lower=0, upper=5)) input_space.add_hyperparameter(CS.UniformFloatHyperparameter(name='y', lower=0.0, upper=5.0)) - if optimizer_type is None: - optimizer: BaseOptimizer = OptimizerFactory.create( - parameter_space=input_space, - optimizer_kwargs=kwargs, - ) - else: - optimizer: BaseOptimizer = OptimizerFactory.create( - parameter_space=input_space, - optimizer_type=optimizer_type, - optimizer_kwargs=kwargs, - ) + optimizer: BaseOptimizer = OptimizerFactory.create( + parameter_space=input_space, + optimizer_type=optimizer_type, + optimizer_kwargs=kwargs, + ) with pytest.raises(ValueError, match="No observations"): optimizer.get_best_observation() @@ -345,12 +344,8 @@ def objective(point: pd.DataFrame) -> pd.Series: suggestion = optimizer.suggest() assert isinstance(suggestion, pd.DataFrame) assert (suggestion.columns == ['x', 'y']).all() - # Build suggestion mapping to cooperate with Configuration, note that - # doing a .iloc[0].to_dict() will cause pandas convert all numeric types - # to float64 - tmp_suggest = {'x': suggestion['x'].values[0], 'y': suggestion['y'].values[0]} # check that suggestion is in the space - configuration = CS.Configuration(optimizer.parameter_space, tmp_suggest) + configuration = CS.Configuration(optimizer.parameter_space, suggestion.iloc[0].to_dict()) # Raises an error if outside of configuration space configuration.is_valid_configuration() observation = objective(suggestion) From 8d1ef69e3f5b1cdf24bce300c0c4345bee6a646f Mon Sep 17 00:00:00 2001 From: ephoris Date: Mon, 5 Feb 2024 16:02:37 -0500 Subject: [PATCH 06/14] [Test] Change suggestion building in pytest example"" This reverts commit 55bf6b3c545764a017a106d5fdd598b8ce899f30. Realized this is required as again, df.iloc[0] will convert all items to a similar type because pandas.Series cannot be mixed types. In the case of numeric values, everything is implicitly translated into numpy.float64 types. --- .../tests/optimizers/optimizer_test.py | 31 +++++++++++-------- 1 file changed, 18 insertions(+), 13 deletions(-) diff --git a/mlos_core/mlos_core/tests/optimizers/optimizer_test.py b/mlos_core/mlos_core/tests/optimizers/optimizer_test.py index a484450a15..95860cf797 100644 --- a/mlos_core/mlos_core/tests/optimizers/optimizer_test.py +++ b/mlos_core/mlos_core/tests/optimizers/optimizer_test.py @@ -1,5 +1,3 @@ -# -# Copyright (c) Microsoft Corporation. # Licensed under the MIT License. # """ @@ -301,14 +299,11 @@ def test_optimizer_type_defs(optimizer_class: Type[BaseOptimizer]) -> None: @pytest.mark.parametrize(('optimizer_type', 'kwargs'), [ + # Default optimizer + (None, {}), # Enumerate all supported Optimizers *[(member, {}) for member in OptimizerType], # Optimizer with non-empty kwargs argument - (OptimizerType.SMAC, { - # Test with default config. - 'use_default_config': True, - # 'n_random_init': 10, - }), ]) def test_mixed_input_space_types(optimizer_type: OptimizerType, kwargs: Optional[dict]) -> None: """ @@ -328,11 +323,17 @@ def objective(point: pd.DataFrame) -> pd.Series: input_space.add_hyperparameter(CS.UniformIntegerHyperparameter(name='x', lower=0, upper=5)) input_space.add_hyperparameter(CS.UniformFloatHyperparameter(name='y', lower=0.0, upper=5.0)) - optimizer: BaseOptimizer = OptimizerFactory.create( - parameter_space=input_space, - optimizer_type=optimizer_type, - optimizer_kwargs=kwargs, - ) + if optimizer_type is None: + optimizer: BaseOptimizer = OptimizerFactory.create( + parameter_space=input_space, + optimizer_kwargs=kwargs, + ) + else: + optimizer: BaseOptimizer = OptimizerFactory.create( + parameter_space=input_space, + optimizer_type=optimizer_type, + optimizer_kwargs=kwargs, + ) with pytest.raises(ValueError, match="No observations"): optimizer.get_best_observation() @@ -344,8 +345,12 @@ def objective(point: pd.DataFrame) -> pd.Series: suggestion = optimizer.suggest() assert isinstance(suggestion, pd.DataFrame) assert (suggestion.columns == ['x', 'y']).all() + # Build suggestion mapping to cooperate with Configuration, note that + # doing a .iloc[0].to_dict() will cause pandas convert all numeric types + # to float64 + tmp_suggest = {'x': suggestion['x'].values[0], 'y': suggestion['y'].values[0]} # check that suggestion is in the space - configuration = CS.Configuration(optimizer.parameter_space, suggestion.iloc[0].to_dict()) + configuration = CS.Configuration(optimizer.parameter_space, tmp_suggest) # Raises an error if outside of configuration space configuration.is_valid_configuration() observation = objective(suggestion) From bd272c89cadc856af802af90663487571f3b03da Mon Sep 17 00:00:00 2001 From: ndhuynh Date: Mon, 5 Feb 2024 19:53:33 -0500 Subject: [PATCH 07/14] Update mlos_core/mlos_core/spaces/adapters/llamatune.py Co-authored-by: Sergiy Matusevych --- mlos_core/mlos_core/spaces/adapters/llamatune.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mlos_core/mlos_core/spaces/adapters/llamatune.py b/mlos_core/mlos_core/spaces/adapters/llamatune.py index 06ba7bd35c..7f0f2466dd 100644 --- a/mlos_core/mlos_core/spaces/adapters/llamatune.py +++ b/mlos_core/mlos_core/spaces/adapters/llamatune.py @@ -90,8 +90,8 @@ def target_parameter_space(self) -> ConfigSpace.ConfigurationSpace: def inverse_transform(self, configurations: pd.DataFrame) -> pd.DataFrame: target_configurations = [] for config in configurations.itertuples(index=False): - config = config._asdict() - configuration = ConfigSpace.Configuration(self.orig_parameter_space, values=config) + configuration = ConfigSpace.Configuration( + self.orig_parameter_space, values=config._asdict()) target_config = self._suggested_configs.get(configuration, None) # NOTE: HeSBO is a non-linear projection method, and does not inherently support inverse projection From 617afe90a3f7fabb24aaf30a73d7c9c246fb456f Mon Sep 17 00:00:00 2001 From: ephoris Date: Mon, 5 Feb 2024 19:57:02 -0500 Subject: [PATCH 08/14] [Revert] Copyright lines removed by mistake --- mlos_core/mlos_core/tests/optimizers/optimizer_test.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/mlos_core/mlos_core/tests/optimizers/optimizer_test.py b/mlos_core/mlos_core/tests/optimizers/optimizer_test.py index 95860cf797..c892f82d3e 100644 --- a/mlos_core/mlos_core/tests/optimizers/optimizer_test.py +++ b/mlos_core/mlos_core/tests/optimizers/optimizer_test.py @@ -1,3 +1,5 @@ +# +# Copyright (c) Microsoft Corporation. # Licensed under the MIT License. # """ From 983763f96eaa4841926b59f2107be8f389e789b5 Mon Sep 17 00:00:00 2001 From: ephoris Date: Tue, 6 Feb 2024 15:19:25 -0500 Subject: [PATCH 09/14] [Fix] Revert to iterrows, but typecast dataframe to object type --- mlos_bench/mlos_bench/storage/util.py | 2 +- .../optimizers/bayesian_optimizers/smac_optimizer.py | 4 ++-- mlos_core/mlos_core/optimizers/flaml_optimizer.py | 7 +++---- mlos_core/mlos_core/spaces/adapters/llamatune.py | 4 ++-- 4 files changed, 8 insertions(+), 9 deletions(-) diff --git a/mlos_bench/mlos_bench/storage/util.py b/mlos_bench/mlos_bench/storage/util.py index 1ae59a6b3b..d70fa0cec3 100644 --- a/mlos_bench/mlos_bench/storage/util.py +++ b/mlos_bench/mlos_bench/storage/util.py @@ -30,7 +30,7 @@ def kv_df_to_dict(dataframe: pandas.DataFrame) -> Dict[str, Optional[TunableValu dataframe.rename(columns={'metric': 'parameter'}, inplace=True) assert dataframe.columns.tolist() == ['parameter', 'value'] data = {} - for _, row in dataframe.iterrows(): + for _, row in dataframe.astype('O').iterrows(): assert isinstance(row['parameter'], str) assert row['value'] is None or isinstance(row['value'], (str, int, float)) if row['parameter'] in data: diff --git a/mlos_core/mlos_core/optimizers/bayesian_optimizers/smac_optimizer.py b/mlos_core/mlos_core/optimizers/bayesian_optimizers/smac_optimizer.py index 085a64f45b..8d6d11e993 100644 --- a/mlos_core/mlos_core/optimizers/bayesian_optimizers/smac_optimizer.py +++ b/mlos_core/mlos_core/optimizers/bayesian_optimizers/smac_optimizer.py @@ -335,6 +335,6 @@ def _to_configspace_configs(self, configurations: pd.DataFrame) -> List[ConfigSp List of ConfigSpace configurations. """ return [ - ConfigSpace.Configuration(self.optimizer_parameter_space, values=config._asdict()) - for config in configurations.itertuples(index=False) + ConfigSpace.Configuration(self.optimizer_parameter_space, values=config.to_dict()) + for (_, config) in configurations.astype('O').iterrows() ] diff --git a/mlos_core/mlos_core/optimizers/flaml_optimizer.py b/mlos_core/mlos_core/optimizers/flaml_optimizer.py index 532b3ed40b..0c752f0049 100644 --- a/mlos_core/mlos_core/optimizers/flaml_optimizer.py +++ b/mlos_core/mlos_core/optimizers/flaml_optimizer.py @@ -86,14 +86,13 @@ def _register(self, configurations: pd.DataFrame, scores: pd.Series, """ if context is not None: raise NotImplementedError() - for config, score in zip(configurations.itertuples(index=False), scores): - config = config._asdict() + for (_, config), score in zip(configurations.astype('O').iterrows(), scores): cs_config: ConfigSpace.Configuration = ConfigSpace.Configuration( - self.optimizer_parameter_space, values=config) + self.optimizer_parameter_space, values=config.to_dict()) if cs_config in self.evaluated_samples: warn(f"Configuration {config} was already registered", UserWarning) - self.evaluated_samples[cs_config] = EvaluatedSample(config=config, score=score) + self.evaluated_samples[cs_config] = EvaluatedSample(config=config.to_dict(), score=score) def _suggest(self, context: Optional[pd.DataFrame] = None) -> pd.DataFrame: """Suggests a new configuration. diff --git a/mlos_core/mlos_core/spaces/adapters/llamatune.py b/mlos_core/mlos_core/spaces/adapters/llamatune.py index 7f0f2466dd..3d18d774f2 100644 --- a/mlos_core/mlos_core/spaces/adapters/llamatune.py +++ b/mlos_core/mlos_core/spaces/adapters/llamatune.py @@ -89,9 +89,9 @@ def target_parameter_space(self) -> ConfigSpace.ConfigurationSpace: def inverse_transform(self, configurations: pd.DataFrame) -> pd.DataFrame: target_configurations = [] - for config in configurations.itertuples(index=False): + for (_, config) in configurations.astype('O').iterrows(): configuration = ConfigSpace.Configuration( - self.orig_parameter_space, values=config._asdict()) + self.orig_parameter_space, values=config.to_dict()) target_config = self._suggested_configs.get(configuration, None) # NOTE: HeSBO is a non-linear projection method, and does not inherently support inverse projection From ff33ce4bc8daf82c31c418d5d9a25442b287668b Mon Sep 17 00:00:00 2001 From: ephoris Date: Tue, 6 Feb 2024 15:51:28 -0500 Subject: [PATCH 10/14] [Test] Fix tagging on args --- mlos_core/mlos_core/tests/optimizers/optimizer_test.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/mlos_core/mlos_core/tests/optimizers/optimizer_test.py b/mlos_core/mlos_core/tests/optimizers/optimizer_test.py index c892f82d3e..4f8b0b7879 100644 --- a/mlos_core/mlos_core/tests/optimizers/optimizer_test.py +++ b/mlos_core/mlos_core/tests/optimizers/optimizer_test.py @@ -307,7 +307,7 @@ def test_optimizer_type_defs(optimizer_class: Type[BaseOptimizer]) -> None: *[(member, {}) for member in OptimizerType], # Optimizer with non-empty kwargs argument ]) -def test_mixed_input_space_types(optimizer_type: OptimizerType, kwargs: Optional[dict]) -> None: +def test_mixed_input_space_types(optimizer_type: Optional[OptimizerType], kwargs: Optional[dict]) -> None: """ Toy problem to test the optimizers. """ @@ -326,12 +326,12 @@ def objective(point: pd.DataFrame) -> pd.Series: input_space.add_hyperparameter(CS.UniformFloatHyperparameter(name='y', lower=0.0, upper=5.0)) if optimizer_type is None: - optimizer: BaseOptimizer = OptimizerFactory.create( + optimizer = OptimizerFactory.create( parameter_space=input_space, optimizer_kwargs=kwargs, ) else: - optimizer: BaseOptimizer = OptimizerFactory.create( + optimizer = OptimizerFactory.create( parameter_space=input_space, optimizer_type=optimizer_type, optimizer_kwargs=kwargs, From 4b4ec3341ab907969d08b5a60f78a3739640f0b9 Mon Sep 17 00:00:00 2001 From: ndhuynh Date: Wed, 7 Feb 2024 12:39:31 -0500 Subject: [PATCH 11/14] Update mlos_core/mlos_core/tests/optimizers/optimizer_test.py Co-authored-by: Brian Kroth --- mlos_core/mlos_core/tests/optimizers/optimizer_test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mlos_core/mlos_core/tests/optimizers/optimizer_test.py b/mlos_core/mlos_core/tests/optimizers/optimizer_test.py index 4f8b0b7879..9098b580b7 100644 --- a/mlos_core/mlos_core/tests/optimizers/optimizer_test.py +++ b/mlos_core/mlos_core/tests/optimizers/optimizer_test.py @@ -307,7 +307,7 @@ def test_optimizer_type_defs(optimizer_class: Type[BaseOptimizer]) -> None: *[(member, {}) for member in OptimizerType], # Optimizer with non-empty kwargs argument ]) -def test_mixed_input_space_types(optimizer_type: Optional[OptimizerType], kwargs: Optional[dict]) -> None: +def test_mixed_numeric_type_input_space_types(optimizer_type: Optional[OptimizerType], kwargs: Optional[dict]) -> None: """ Toy problem to test the optimizers. """ From 423d5828fb99b40c74561dc76c59b5e852513b2f Mon Sep 17 00:00:00 2001 From: ndhuynh Date: Wed, 7 Feb 2024 12:40:02 -0500 Subject: [PATCH 12/14] Update mlos_core/mlos_core/tests/optimizers/optimizer_test.py Co-authored-by: Brian Kroth --- mlos_core/mlos_core/tests/optimizers/optimizer_test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mlos_core/mlos_core/tests/optimizers/optimizer_test.py b/mlos_core/mlos_core/tests/optimizers/optimizer_test.py index 9098b580b7..45b5938103 100644 --- a/mlos_core/mlos_core/tests/optimizers/optimizer_test.py +++ b/mlos_core/mlos_core/tests/optimizers/optimizer_test.py @@ -309,7 +309,7 @@ def test_optimizer_type_defs(optimizer_class: Type[BaseOptimizer]) -> None: ]) def test_mixed_numeric_type_input_space_types(optimizer_type: Optional[OptimizerType], kwargs: Optional[dict]) -> None: """ - Toy problem to test the optimizers. + Toy problem to test the optimizers with mixed numeric types to ensure that original dtypes are retained. """ max_iterations = 10 if kwargs is None: From 2bc2e50bf85286ac3fd2b4580caea89c1d6b2786 Mon Sep 17 00:00:00 2001 From: ephoris Date: Wed, 7 Feb 2024 12:54:01 -0500 Subject: [PATCH 13/14] [Test] Clean optimizer_test to include seed and checks --- mlos_core/mlos_core/tests/__init__.py | 3 +++ .../tests/optimizers/optimizer_test.py | 19 +++++++++---------- 2 files changed, 12 insertions(+), 10 deletions(-) diff --git a/mlos_core/mlos_core/tests/__init__.py b/mlos_core/mlos_core/tests/__init__.py index 7283e48f1c..6f74147ae9 100644 --- a/mlos_core/mlos_core/tests/__init__.py +++ b/mlos_core/mlos_core/tests/__init__.py @@ -12,6 +12,9 @@ from pkgutil import walk_packages from typing import List, Optional, Set, Type, TypeVar +# A common seed to use to avoid tracking down race conditions and intermingling +# issues of seeds across tests that run in non-deterministic parallel orders. +SEED = 42 if sys.version_info >= (3, 10): from typing import TypeAlias diff --git a/mlos_core/mlos_core/tests/optimizers/optimizer_test.py b/mlos_core/mlos_core/tests/optimizers/optimizer_test.py index 45b5938103..b93427ecd5 100644 --- a/mlos_core/mlos_core/tests/optimizers/optimizer_test.py +++ b/mlos_core/mlos_core/tests/optimizers/optimizer_test.py @@ -23,7 +23,7 @@ from mlos_core.optimizers.bayesian_optimizers import BaseBayesianOptimizer, SmacOptimizer from mlos_core.spaces.adapters import SpaceAdapterType -from mlos_core.tests import get_all_concrete_subclasses +from mlos_core.tests import get_all_concrete_subclasses, SEED _LOG = logging.getLogger(__name__) @@ -76,7 +76,7 @@ def objective(x: pd.Series) -> npt.ArrayLike: # pylint: disable=invalid-name ret: npt.ArrayLike = (6 * x - 2)**2 * np.sin(12 * x - 4) return ret # Emukit doesn't allow specifying a random state, so we set the global seed. - np.random.seed(42) + np.random.seed(SEED) optimizer = optimizer_class(parameter_space=configuration_space, **kwargs) with pytest.raises(ValueError, match="No observations"): @@ -320,7 +320,7 @@ def objective(point: pd.DataFrame) -> pd.Series: ret: pd.Series = point["x"] + point["y"] return ret - input_space = CS.ConfigurationSpace(seed=2169) + input_space = CS.ConfigurationSpace(seed=SEED) # add a mix of numeric datatypes input_space.add_hyperparameter(CS.UniformIntegerHyperparameter(name='x', lower=0, upper=5)) input_space.add_hyperparameter(CS.UniformFloatHyperparameter(name='y', lower=0.0, upper=5.0)) @@ -347,14 +347,13 @@ def objective(point: pd.DataFrame) -> pd.Series: suggestion = optimizer.suggest() assert isinstance(suggestion, pd.DataFrame) assert (suggestion.columns == ['x', 'y']).all() - # Build suggestion mapping to cooperate with Configuration, note that - # doing a .iloc[0].to_dict() will cause pandas convert all numeric types - # to float64 - tmp_suggest = {'x': suggestion['x'].values[0], 'y': suggestion['y'].values[0]} - # check that suggestion is in the space - configuration = CS.Configuration(optimizer.parameter_space, tmp_suggest) + # Check suggestion values are the expected dtype + assert isinstance(suggestion['x'].iloc[0], np.integer) + assert isinstance(suggestion['y'].iloc[0], np.floating) + # Check that suggestion is in the space + test_configuration = CS.Configuration(optimizer.parameter_space, suggestion.astype('O').iloc[0].to_dict()) # Raises an error if outside of configuration space - configuration.is_valid_configuration() + test_configuration.is_valid_configuration() observation = objective(suggestion) assert isinstance(observation, pd.Series) optimizer.register(suggestion, observation) From 05becea766223bdbd1d72112adcbc64d254be878 Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Wed, 7 Feb 2024 12:41:30 -0600 Subject: [PATCH 14/14] Update mlos_core/mlos_core/tests/optimizers/optimizer_test.py --- mlos_core/mlos_core/tests/optimizers/optimizer_test.py | 1 + 1 file changed, 1 insertion(+) diff --git a/mlos_core/mlos_core/tests/optimizers/optimizer_test.py b/mlos_core/mlos_core/tests/optimizers/optimizer_test.py index b93427ecd5..6139a3f407 100644 --- a/mlos_core/mlos_core/tests/optimizers/optimizer_test.py +++ b/mlos_core/mlos_core/tests/optimizers/optimizer_test.py @@ -354,6 +354,7 @@ def objective(point: pd.DataFrame) -> pd.Series: test_configuration = CS.Configuration(optimizer.parameter_space, suggestion.astype('O').iloc[0].to_dict()) # Raises an error if outside of configuration space test_configuration.is_valid_configuration() + # Test registering the suggested configuration with a score. observation = objective(suggestion) assert isinstance(observation, pd.Series) optimizer.register(suggestion, observation)