From 5daaa26399ca7a61038eb131c7ed4639dfa615e1 Mon Sep 17 00:00:00 2001 From: Sergiy Matusevych Date: Wed, 17 Jan 2024 13:29:53 -0800 Subject: [PATCH] Support for special tunable values outside of the range (#617) Enables special values outside of the range (e.g., `-1` with a range of `[0, 100]`). To do we make use of "conditionals" in ConfigSpace to constrain the space. This has a number of implementation implications, addressed below: * [x] Add support for special values to the `Tunable` class * [x] Add unit tests for assigning special values outside of the range to the `Tunable` objects * [x] Add special values outside of the range to the unit tests for `ConfigSpace` conversion * [x] Implement proper `TunableGroups` to `ConfigSpace` conversion for tunables with special values * [x] Update `mlos_core` optimizers to support conditionals and special values in `ConfigSpace` * [x] Add more unit tests to check the conversion * [x] Make LlamaTune adapter support conditionals in `ConfigSpace` --------- Co-authored-by: Brian Kroth --- .bumpversion.cfg | 2 +- doc/source/conf.py | 2 +- mlos_bench/_version.py | 2 +- .../runtime/linux-runtime-tunables.jsonc | 2 +- .../optimizers/convert_configspace.py | 198 +++++++++++++++--- .../optimizers/mlos_core_optimizer.py | 43 ++-- mlos_bench/mlos_bench/tests/conftest.py | 4 +- .../mlos_bench/tests/launcher_run_test.py | 4 +- .../tests/optimizers/mock_opt_test.py | 6 +- .../optimizers/opt_bulk_register_test.py | 2 +- .../optimizers/toy_optimization_loop_test.py | 16 +- .../mlos_bench/tests/tunables/conftest.py | 6 +- .../tests/tunables/tunable_definition_test.py | 9 + .../tunables/tunable_to_configspace_test.py | 96 +++++---- .../tests/tunables/tunables_assign_test.py | 51 +++++ .../tests/tunables/tunables_str_test.py | 2 +- mlos_bench/mlos_bench/tunables/tunable.py | 50 ++++- mlos_core/_version.py | 2 +- mlos_core/mlos_core/__init__.py | 19 -- .../mlos_core/optimizers/flaml_optimizer.py | 18 +- mlos_core/mlos_core/optimizers/optimizer.py | 10 +- .../mlos_core/spaces/adapters/llamatune.py | 10 +- mlos_core/mlos_core/util.py | 56 +++++ 23 files changed, 462 insertions(+), 148 deletions(-) create mode 100644 mlos_core/mlos_core/util.py diff --git a/.bumpversion.cfg b/.bumpversion.cfg index db9cf11521..fb05266cb6 100644 --- a/.bumpversion.cfg +++ b/.bumpversion.cfg @@ -1,5 +1,5 @@ [bumpversion] -current_version = 0.3.0 +current_version = 0.3.1 commit = True tag = True diff --git a/doc/source/conf.py b/doc/source/conf.py index 3191c48572..c3b88d74ce 100644 --- a/doc/source/conf.py +++ b/doc/source/conf.py @@ -36,7 +36,7 @@ author = 'GSL' # The full version, including alpha/beta/rc tags -release = '0.3.0' +release = '0.3.1' try: from setuptools_scm import get_version diff --git a/mlos_bench/_version.py b/mlos_bench/_version.py index a768871f1d..a8b115f7e3 100644 --- a/mlos_bench/_version.py +++ b/mlos_bench/_version.py @@ -7,4 +7,4 @@ """ # NOTE: This should be managed by bumpversion. -_VERSION = '0.3.0' +_VERSION = '0.3.1' diff --git a/mlos_bench/mlos_bench/config/environments/os/linux/runtime/linux-runtime-tunables.jsonc b/mlos_bench/mlos_bench/config/environments/os/linux/runtime/linux-runtime-tunables.jsonc index 5b5f46aa17..970ddfd745 100644 --- a/mlos_bench/mlos_bench/config/environments/os/linux/runtime/linux-runtime-tunables.jsonc +++ b/mlos_bench/mlos_bench/config/environments/os/linux/runtime/linux-runtime-tunables.jsonc @@ -7,7 +7,7 @@ "type": "int", "meta": {"name_prefix": "/proc/sys/kernel/"}, "default": 500000, - "range": [-1, 1000000], + "range": [0, 1000000], "special": [-1] }, "sched_latency_ns": { diff --git a/mlos_bench/mlos_bench/optimizers/convert_configspace.py b/mlos_bench/mlos_bench/optimizers/convert_configspace.py index 03bb2c3072..2b310cfc00 100644 --- a/mlos_bench/mlos_bench/optimizers/convert_configspace.py +++ b/mlos_bench/mlos_bench/optimizers/convert_configspace.py @@ -8,24 +8,38 @@ import logging -from typing import Optional - -from ConfigSpace.hyperparameters import Hyperparameter -from ConfigSpace import UniformIntegerHyperparameter -from ConfigSpace import UniformFloatHyperparameter -from ConfigSpace import CategoricalHyperparameter -from ConfigSpace import ConfigurationSpace, Configuration - -from mlos_bench.tunables.tunable import Tunable +from typing import Dict, Optional, Tuple + +from ConfigSpace import ( + CategoricalHyperparameter, + Configuration, + ConfigurationSpace, + EqualsCondition, + UniformFloatHyperparameter, + UniformIntegerHyperparameter, +) +from mlos_bench.tunables.tunable import Tunable, TunableValue from mlos_bench.tunables.tunable_groups import TunableGroups _LOG = logging.getLogger(__name__) -def _tunable_to_hyperparameter( - tunable: Tunable, group_name: Optional[str] = None, cost: int = 0) -> Hyperparameter: +class TunableValueKind: + """ + Enum for the kind of the tunable value (special or not). + It is not a true enum because ConfigSpace wants string values. """ - Convert a single Tunable to an equivalent ConfigSpace Hyperparameter object. + + SPECIAL = "special" + RANGE = "range" + + +def _tunable_to_configspace( + tunable: Tunable, group_name: Optional[str] = None, cost: int = 0) -> ConfigurationSpace: + """ + Convert a single Tunable to an equivalent set of ConfigSpace Hyperparameter objects, + wrapped in a ConfigurationSpace for composability. + Note: this may be more than one Hyperparameter in the case of special value handling. Parameters ---------- @@ -38,25 +52,56 @@ def _tunable_to_hyperparameter( Returns ------- - hyperparameter : Hyperparameter - A ConfigSpace Hyperparameter object that corresponds to the Tunable. + cs : ConfigurationSpace + A ConfigurationSpace object that corresponds to the Tunable. """ - meta = {"group": group_name, "cost": cost} # {"lower": "", "upper": "", "scaling": ""} + meta = {"group": group_name, "cost": cost} # {"scaling": ""} + if tunable.type == "categorical": - return CategoricalHyperparameter( - tunable.name, choices=tunable.categories, - default_value=tunable.default, meta=meta) - elif tunable.type == "int": - return UniformIntegerHyperparameter( - tunable.name, lower=tunable.range[0], upper=tunable.range[1], - default_value=tunable.default, meta=meta) + return ConfigurationSpace({ + tunable.name: CategoricalHyperparameter( + name=tunable.name, choices=tunable.categories, + default_value=tunable.default, meta=meta) + }) + + if tunable.type == "int": + hp_type = UniformIntegerHyperparameter elif tunable.type == "float": - return UniformFloatHyperparameter( - tunable.name, lower=tunable.range[0], upper=tunable.range[1], - default_value=tunable.default, meta=meta) + hp_type = UniformFloatHyperparameter else: raise TypeError(f"Undefined Parameter Type: {tunable.type}") + if not tunable.special: + return ConfigurationSpace({ + tunable.name: hp_type( + name=tunable.name, lower=tunable.range[0], upper=tunable.range[1], + default_value=tunable.default if tunable.in_range(tunable.default) else None, + meta=meta) + }) + + # Create three hyperparameters: one for regular values, + # one for special values, and one to choose between the two. + (special_name, type_name) = special_param_names(tunable.name) + cs = ConfigurationSpace({ + tunable.name: hp_type( + name=tunable.name, lower=tunable.range[0], upper=tunable.range[1], + default_value=tunable.default if tunable.in_range(tunable.default) else None, + meta=meta), + special_name: CategoricalHyperparameter( + name=special_name, choices=tunable.special, + default_value=tunable.default if tunable.default in tunable.special else None, + meta=meta), + type_name: CategoricalHyperparameter( + name=type_name, + choices=[TunableValueKind.SPECIAL, TunableValueKind.RANGE], + default_value=TunableValueKind.SPECIAL, + weights=[0.5, 0.5]), # TODO: Make weights configurable; FLAML requires uniform weights. + }) + cs.add_condition(EqualsCondition(cs[special_name], cs[type_name], TunableValueKind.SPECIAL)) + cs.add_condition(EqualsCondition(cs[tunable.name], cs[type_name], TunableValueKind.RANGE)) + + return cs + def tunable_groups_to_configspace(tunables: TunableGroups, seed: Optional[int] = None) -> ConfigurationSpace: """ @@ -76,10 +121,11 @@ def tunable_groups_to_configspace(tunables: TunableGroups, seed: Optional[int] = A new ConfigurationSpace instance that corresponds to the input TunableGroups. """ space = ConfigurationSpace(seed=seed) - space.add_hyperparameters([ - _tunable_to_hyperparameter(tunable, group.name, group.get_current_cost()) - for (tunable, group) in tunables - ]) + for (tunable, group) in tunables: + space.add_configuration_space( + prefix="", delimiter="", + configuration_space=_tunable_to_configspace( + tunable, group.name, group.get_current_cost())) return space @@ -97,5 +143,97 @@ def tunable_values_to_configuration(tunables: TunableGroups) -> Configuration: Configuration A ConfigSpace Configuration. """ + values: Dict[str, TunableValue] = {} + for (tunable, _group) in tunables: + if tunable.special: + (special_name, type_name) = special_param_names(tunable.name) + if tunable.value in tunable.special: + values[type_name] = TunableValueKind.SPECIAL + values[special_name] = tunable.value + else: + values[type_name] = TunableValueKind.RANGE + values[tunable.name] = tunable.value + else: + values[tunable.name] = tunable.value configspace = tunable_groups_to_configspace(tunables) - return Configuration(configspace, values={tunable.name: tunable.value for (tunable, _group) in tunables}) + return Configuration(configspace, values=values) + + +def configspace_data_to_tunable_values(data: dict) -> dict: + """ + Remove the fields that correspond to special values in ConfigSpace. + In particular, remove and keys suffixes added by `special_param_names`. + """ + data = data.copy() + specials = [ + special_param_name_strip(k) + for k in data.keys() if special_param_name_is_temp(k) + ] + for k in specials: + (special_name, type_name) = special_param_names(k) + if data[type_name] == TunableValueKind.SPECIAL: + data[k] = data[special_name] + if special_name in data: + del data[special_name] + del data[type_name] + return data + + +def special_param_names(name: str) -> Tuple[str, str]: + """ + Generate the names of the auxiliary hyperparameters that correspond + to a tunable that can have special values. + + NOTE: `!` characters are currently disallowed in Tunable names in order handle this logic. + + Parameters + ---------- + name : str + The name of the tunable parameter. + + Returns + ------- + special_name : str + The name of the hyperparameter that corresponds to the special value. + type_name : str + The name of the hyperparameter that chooses between the regular and the special values. + """ + return (name + "!special", name + "!type") + + +def special_param_name_is_temp(name: str) -> bool: + """ + Check if name corresponds to a temporary ConfigSpace parameter. + + NOTE: `!` characters are currently disallowed in Tunable names in order handle this logic. + + Parameters + ---------- + name : str + The name of the hyperparameter. + + Returns + ------- + is_special : bool + True if the name corresponds to a temporary ConfigSpace hyperparameter. + """ + return name.endswith("!type") + + +def special_param_name_strip(name: str) -> str: + """ + Remove the temporary suffix from a special parameter name. + + NOTE: `!` characters are currently disallowed in Tunable names in order handle this logic. + + Parameters + ---------- + name : str + The name of the hyperparameter. + + Returns + ------- + stripped_name : str + The name of the hyperparameter without the temporary suffix. + """ + return name.split("!", 1)[0] diff --git a/mlos_bench/mlos_bench/optimizers/mlos_core_optimizer.py b/mlos_bench/mlos_bench/optimizers/mlos_core_optimizer.py index 93128aac1d..91b1878188 100644 --- a/mlos_bench/mlos_bench/optimizers/mlos_core_optimizer.py +++ b/mlos_bench/mlos_bench/optimizers/mlos_core_optimizer.py @@ -10,7 +10,7 @@ import os from types import TracebackType -from typing import Optional, Sequence, Tuple, Type, Union +from typing import Dict, Optional, Sequence, Tuple, Type, Union from typing_extensions import Literal import pandas as pd @@ -20,10 +20,17 @@ ) from mlos_bench.environments.status import Status +from mlos_bench.services.base_service import Service +from mlos_bench.tunables.tunable import TunableValue from mlos_bench.tunables.tunable_groups import TunableGroups from mlos_bench.optimizers.base_optimizer import Optimizer -from mlos_bench.optimizers.convert_configspace import tunable_groups_to_configspace -from mlos_bench.services.base_service import Service + +from mlos_bench.optimizers.convert_configspace import ( + TunableValueKind, + configspace_data_to_tunable_values, + special_param_names, + tunable_groups_to_configspace, +) _LOG = logging.getLogger(__name__) @@ -103,16 +110,13 @@ def bulk_register(self, configs: Sequence[dict], scores: Sequence[Optional[float df_status_completed = df_status.apply(Status.is_completed) df_configs = df_configs[df_status_completed] df_scores = df_scores[df_status_completed] - # External data can have incorrect types (e.g., all strings). - for (tunable, _group) in self._tunables: - df_configs[tunable.name] = df_configs[tunable.name].astype(tunable.dtype) self._opt.register(df_configs, df_scores) if _LOG.isEnabledFor(logging.DEBUG): (score, _) = self.get_best_observation() _LOG.debug("Warm-up end: %s = %s", self.target, score) return True - def _to_df(self, configs: Sequence[dict]) -> pd.DataFrame: + def _to_df(self, configs: Sequence[Dict[str, TunableValue]]) -> pd.DataFrame: """ Select from past trials only the columns required in this experiment and impute default values for the tunables that are missing in the dataframe. @@ -128,13 +132,28 @@ def _to_df(self, configs: Sequence[dict]) -> pd.DataFrame: A dataframe with past trials data, with missing values imputed. """ df_configs = pd.DataFrame(configs) - tunables_names = self._tunables.get_param_values().keys() + tunables_names = list(self._tunables.get_param_values().keys()) missing_cols = set(tunables_names).difference(df_configs.columns) for (tunable, _group) in self._tunables: if tunable.name in missing_cols: df_configs[tunable.name] = tunable.default else: df_configs[tunable.name].fillna(tunable.default, inplace=True) + # External data can have incorrect types (e.g., all strings). + df_configs[tunable.name] = df_configs[tunable.name].astype(tunable.dtype) + # Add columns for tunables with special values. + if tunable.special: + (special_name, type_name) = special_param_names(tunable.name) + tunables_names += [special_name, type_name] + is_special = df_configs[tunable.name].apply(tunable.special.__contains__) + df_configs[type_name] = TunableValueKind.RANGE + df_configs.loc[is_special, type_name] = TunableValueKind.SPECIAL + if tunable.type == "int": + # Make int column NULLABLE: + df_configs[tunable.name] = df_configs[tunable.name].astype("Int64") + df_configs[special_name] = df_configs[tunable.name] + df_configs.loc[~is_special, special_name] = None + df_configs.loc[is_special, tunable.name] = None # By default, hyperparameters in ConfigurationSpace are sorted by name: df_configs = df_configs[sorted(tunables_names)] _LOG.debug("Loaded configs:\n%s", df_configs) @@ -146,14 +165,14 @@ def suggest(self) -> TunableGroups: df_config = self._opt.suggest(defaults=self._start_with_defaults) self._start_with_defaults = False _LOG.info("Iteration %d :: Suggest:\n%s", self._iter, df_config) - return self._tunables.copy().assign(df_config.loc[0].to_dict()) + return self._tunables.copy().assign( + configspace_data_to_tunable_values(df_config.loc[0].to_dict())) def register(self, tunables: TunableGroups, status: Status, score: Optional[Union[float, dict]] = None) -> Optional[float]: score = super().register(tunables, status, score) # With _opt_sign applied if status.is_completed(): - # By default, hyperparameters in ConfigurationSpace are sorted by name: - df_config = pd.DataFrame(dict(sorted(tunables.get_param_values().items())), index=[0]) + df_config = self._to_df([tunables.get_param_values()]) _LOG.debug("Score: %s Dataframe:\n%s", score, df_config) self._opt.register(df_config, pd.Series([score], dtype=float)) self._iter += 1 @@ -163,7 +182,7 @@ def get_best_observation(self) -> Union[Tuple[float, TunableGroups], Tuple[None, df_config = self._opt.get_best_observation() if len(df_config) == 0: return (None, None) - params = df_config.iloc[0].to_dict() + params = configspace_data_to_tunable_values(df_config.iloc[0].to_dict()) _LOG.debug("Best observation: %s", params) score = params.pop("score") * self._opt_sign # mlos_core always uses the `score` column return (score, self._tunables.copy().assign(params)) diff --git a/mlos_bench/mlos_bench/tests/conftest.py b/mlos_bench/mlos_bench/tests/conftest.py index e3e2965898..9f646ca8f9 100644 --- a/mlos_bench/mlos_bench/tests/conftest.py +++ b/mlos_bench/mlos_bench/tests/conftest.py @@ -59,8 +59,8 @@ "description": "Cost of migrating the thread to another core", "type": "int", "default": -1, - "range": [-1, 500000], - "special": [-1] + "range": [0, 500000], + "special": [-1, 0] }, "kernel_sched_latency_ns": { "description": "Initial value for the scheduler period", diff --git a/mlos_bench/mlos_bench/tests/launcher_run_test.py b/mlos_bench/mlos_bench/tests/launcher_run_test.py index cdea175bde..8dafd0725d 100644 --- a/mlos_bench/mlos_bench/tests/launcher_run_test.py +++ b/mlos_bench/mlos_bench/tests/launcher_run_test.py @@ -97,7 +97,7 @@ def test_launch_main_app_opt(root_path: str, local_exec_service: LocalExecServic [ # Iteration 1: Expect first value to be the baseline f"^{_RE_DATE} mlos_core_optimizer\\.py:\\d+ " + - r"register DEBUG Score: 65\.67\d+ Dataframe:\s*$", + r"register DEBUG Score: 64\.88\d+ Dataframe:\s*$", # Iteration 2: The result may not always be deterministic f"^{_RE_DATE} mlos_core_optimizer\\.py:\\d+ " + r"register DEBUG Score: \d+\.\d+ Dataframe:\s*$", @@ -106,6 +106,6 @@ def test_launch_main_app_opt(root_path: str, local_exec_service: LocalExecServic r"register DEBUG Score: \d+\.\d+ Dataframe:\s*$", # Final result: baseline is the optimum for the mock environment f"^{_RE_DATE} run\\.py:\\d+ " + - r"_optimize INFO Env: Mock environment best score: 65\.67\d+\s*$", + r"_optimize INFO Env: Mock environment best score: 64\.88\d+\s*$", ] ) diff --git a/mlos_bench/mlos_bench/tests/optimizers/mock_opt_test.py b/mlos_bench/mlos_bench/tests/optimizers/mock_opt_test.py index 5f186d4596..0edd8ba81c 100644 --- a/mlos_bench/mlos_bench/tests/optimizers/mock_opt_test.py +++ b/mlos_bench/mlos_bench/tests/optimizers/mock_opt_test.py @@ -23,19 +23,19 @@ def mock_configurations_no_defaults() -> list: ({ "vmSize": "Standard_B4ms", "idle": "halt", - "kernel_sched_migration_cost_ns": 13111, + "kernel_sched_migration_cost_ns": 13112, "kernel_sched_latency_ns": 796233790, }, 88.88), ({ "vmSize": "Standard_B2ms", "idle": "halt", - "kernel_sched_migration_cost_ns": 117025, + "kernel_sched_migration_cost_ns": 117026, "kernel_sched_latency_ns": 149827706, }, 66.66), ({ "vmSize": "Standard_B4ms", "idle": "halt", - "kernel_sched_migration_cost_ns": 354784, + "kernel_sched_migration_cost_ns": 354785, "kernel_sched_latency_ns": 795285932, }, 99.99), ] diff --git a/mlos_bench/mlos_bench/tests/optimizers/opt_bulk_register_test.py b/mlos_bench/mlos_bench/tests/optimizers/opt_bulk_register_test.py index e996777695..4e5582fa70 100644 --- a/mlos_bench/mlos_bench/tests/optimizers/opt_bulk_register_test.py +++ b/mlos_bench/mlos_bench/tests/optimizers/opt_bulk_register_test.py @@ -123,7 +123,7 @@ def test_update_mock_min(mock_opt: MockOptimizer, mock_configs: List[dict], assert mock_opt.suggest().get_param_values() == { "vmSize": "Standard_B4ms", "idle": "halt", - "kernel_sched_migration_cost_ns": 13111, + "kernel_sched_migration_cost_ns": 13112, 'kernel_sched_latency_ns': 796233790, } diff --git a/mlos_bench/mlos_bench/tests/optimizers/toy_optimization_loop_test.py b/mlos_bench/mlos_bench/tests/optimizers/toy_optimization_loop_test.py index 69eee102a8..54e860a855 100644 --- a/mlos_bench/mlos_bench/tests/optimizers/toy_optimization_loop_test.py +++ b/mlos_bench/mlos_bench/tests/optimizers/toy_optimization_loop_test.py @@ -12,7 +12,7 @@ import pytest -from mlos_core import config_to_dataframe +from mlos_core.util import config_to_dataframe from mlos_core.optimizers.bayesian_optimizers.smac_optimizer import SmacOptimizer from mlos_bench.optimizers.convert_configspace import tunable_values_to_configuration @@ -81,7 +81,7 @@ def test_mock_optimization_loop(mock_env_no_noise: MockEnv, assert tunables.get_param_values() == { "vmSize": "Standard_B2ms", "idle": "halt", - "kernel_sched_migration_cost_ns": 117025, + "kernel_sched_migration_cost_ns": 117026, "kernel_sched_latency_ns": 149827706, } @@ -96,7 +96,7 @@ def test_mock_optimization_loop_no_defaults(mock_env_no_noise: MockEnv, assert tunables.get_param_values() == { "vmSize": "Standard_B2s", "idle": "halt", - "kernel_sched_migration_cost_ns": 49122, + "kernel_sched_migration_cost_ns": 49123, "kernel_sched_latency_ns": 234760738, } @@ -111,8 +111,8 @@ def test_flaml_optimization_loop(mock_env_no_noise: MockEnv, assert tunables.get_param_values() == { "vmSize": "Standard_B2s", "idle": "halt", - "kernel_sched_migration_cost_ns": 50132, - "kernel_sched_latency_ns": 22674895, + "kernel_sched_migration_cost_ns": -1, + "kernel_sched_latency_ns": 13718105, } @@ -123,12 +123,12 @@ def test_smac_optimization_loop(mock_env_no_noise: MockEnv, Toy optimization loop with mock environment and SMAC optimizer. """ (score, tunables) = _optimize(mock_env_no_noise, smac_opt) - expected_score = 73.59 + expected_score = 70.33 expected_tunable_values = { "vmSize": "Standard_B2s", "idle": "mwait", - "kernel_sched_migration_cost_ns": 319025, - "kernel_sched_latency_ns": 499339615, + "kernel_sched_migration_cost_ns": 297669, + "kernel_sched_latency_ns": 290365137, } assert score == pytest.approx(expected_score, 0.01) assert tunables.get_param_values() == expected_tunable_values diff --git a/mlos_bench/mlos_bench/tests/tunables/conftest.py b/mlos_bench/mlos_bench/tests/tunables/conftest.py index 9dc22fd0f7..95de20d9b8 100644 --- a/mlos_bench/mlos_bench/tests/tunables/conftest.py +++ b/mlos_bench/mlos_bench/tests/tunables/conftest.py @@ -36,7 +36,7 @@ def tunable_categorical() -> Tunable: @pytest.fixture def tunable_int() -> Tunable: """ - A test fixture that produces an interger Tunable object with limited range. + A test fixture that produces an integer Tunable object with limited range. Returns ------- @@ -47,8 +47,8 @@ def tunable_int() -> Tunable: "description": "Cost of migrating the thread to another core", "type": "int", "default": 40000, - "range": [-1, 500000], - "special": [-1] + "range": [0, 500000], + "special": [-1] # Special value outside of the range }) diff --git a/mlos_bench/mlos_bench/tests/tunables/tunable_definition_test.py b/mlos_bench/mlos_bench/tests/tunables/tunable_definition_test.py index 4b971a1db1..5a100b59d1 100644 --- a/mlos_bench/mlos_bench/tests/tunables/tunable_definition_test.py +++ b/mlos_bench/mlos_bench/tests/tunables/tunable_definition_test.py @@ -12,6 +12,15 @@ from mlos_bench.tunables.tunable import Tunable +def test_tunable_name() -> None: + """ + Check that tunable name is valid. + """ + with pytest.raises(ValueError): + # ! characters are currently disallowed in tunable names + Tunable(name='test!tunable', config={"type": "float", "range": [0, 1], "default": 0}) + + def test_categorical_required_params() -> None: """ Check that required parameters are present for categorical tunables. diff --git a/mlos_bench/mlos_bench/tests/tunables/tunable_to_configspace_test.py b/mlos_bench/mlos_bench/tests/tunables/tunable_to_configspace_test.py index 3b7085f5ff..0cc7bd0f99 100644 --- a/mlos_bench/mlos_bench/tests/tunables/tunable_to_configspace_test.py +++ b/mlos_bench/mlos_bench/tests/tunables/tunable_to_configspace_test.py @@ -8,16 +8,22 @@ import pytest -from ConfigSpace import UniformIntegerHyperparameter -from ConfigSpace import UniformFloatHyperparameter -from ConfigSpace import CategoricalHyperparameter -from ConfigSpace import ConfigurationSpace +from ConfigSpace import ( + CategoricalHyperparameter, + ConfigurationSpace, + EqualsCondition, + UniformFloatHyperparameter, + UniformIntegerHyperparameter, +) from mlos_bench.tunables.tunable import Tunable from mlos_bench.tunables.tunable_groups import TunableGroups - -from mlos_bench.optimizers.convert_configspace import _tunable_to_hyperparameter -from mlos_bench.optimizers.convert_configspace import tunable_groups_to_configspace +from mlos_bench.optimizers.convert_configspace import ( + TunableValueKind, + _tunable_to_configspace, + special_param_names, + tunable_groups_to_configspace, +) # pylint: disable=redefined-outer-name @@ -33,79 +39,87 @@ def configuration_space() -> ConfigurationSpace: configuration_space : ConfigurationSpace A new ConfigurationSpace object for testing. """ + (kernel_sched_migration_cost_ns_special, + kernel_sched_migration_cost_ns_type) = special_param_names("kernel_sched_migration_cost_ns") + spaces = ConfigurationSpace(space={ "vmSize": ["Standard_B2s", "Standard_B2ms", "Standard_B4ms"], "idle": ["halt", "mwait", "noidle"], - "kernel_sched_migration_cost_ns": (-1, 500000), + "kernel_sched_migration_cost_ns": (0, 500000), + kernel_sched_migration_cost_ns_special: [-1, 0], + kernel_sched_migration_cost_ns_type: [TunableValueKind.SPECIAL, TunableValueKind.RANGE], "kernel_sched_latency_ns": (0, 1000000000), }) spaces["vmSize"].default_value = "Standard_B4ms" spaces["idle"].default_value = "halt" - spaces["kernel_sched_migration_cost_ns"].default_value = -1 + spaces["kernel_sched_migration_cost_ns"].default_value = 250000 + spaces[kernel_sched_migration_cost_ns_special].default_value = -1 + spaces[kernel_sched_migration_cost_ns_type].default_value = TunableValueKind.SPECIAL + spaces[kernel_sched_migration_cost_ns_type].probabilities = (0.5, 0.5) # FLAML requires distribution to be uniform spaces["kernel_sched_latency_ns"].default_value = 2000000 + spaces.add_condition(EqualsCondition( + spaces[kernel_sched_migration_cost_ns_special], + spaces[kernel_sched_migration_cost_ns_type], TunableValueKind.SPECIAL)) + spaces.add_condition(EqualsCondition( + spaces["kernel_sched_migration_cost_ns"], + spaces[kernel_sched_migration_cost_ns_type], TunableValueKind.RANGE)) + return spaces def _cmp_tunable_hyperparameter_categorical( - tunable: Tunable, cs_param: CategoricalHyperparameter) -> None: + tunable: Tunable, space: ConfigurationSpace) -> None: """ Check if categorical Tunable and ConfigSpace Hyperparameter actually match. """ - assert isinstance(cs_param, CategoricalHyperparameter) - assert set(cs_param.choices) == set(tunable.categories) - assert cs_param.default_value == tunable.value + param = space[tunable.name] + assert isinstance(param, CategoricalHyperparameter) + assert set(param.choices) == set(tunable.categories) + assert param.default_value == tunable.value -def _cmp_tunable_hyperparameter_int( - tunable: Tunable, cs_param: UniformIntegerHyperparameter) -> None: +def _cmp_tunable_hyperparameter_numerical( + tunable: Tunable, space: ConfigurationSpace) -> None: """ Check if integer Tunable and ConfigSpace Hyperparameter actually match. """ - assert isinstance(cs_param, UniformIntegerHyperparameter) - assert (cs_param.lower, cs_param.upper) == tuple(tunable.range) - assert cs_param.default_value == tunable.value - - -def _cmp_tunable_hyperparameter_float( - tunable: Tunable, cs_param: UniformFloatHyperparameter) -> None: - """ - Check if float Tunable and ConfigSpace Hyperparameter actually match. - """ - assert isinstance(cs_param, UniformFloatHyperparameter) - assert (cs_param.lower, cs_param.upper) == tuple(tunable.range) - assert cs_param.default_value == tunable.value + param = space[tunable.name] + assert isinstance(param, (UniformIntegerHyperparameter, UniformFloatHyperparameter)) + assert (param.lower, param.upper) == tuple(tunable.range) + if tunable.in_range(tunable.value): + assert param.default_value == tunable.value -def test_tunable_to_hyperparameter_categorical(tunable_categorical: Tunable) -> None: +def test_tunable_to_configspace_categorical(tunable_categorical: Tunable) -> None: """ Check the conversion of Tunable to CategoricalHyperparameter. """ - cs_param = _tunable_to_hyperparameter(tunable_categorical) + cs_param = _tunable_to_configspace(tunable_categorical) _cmp_tunable_hyperparameter_categorical(tunable_categorical, cs_param) -def test_tunable_to_hyperparameter_int(tunable_int: Tunable) -> None: +def test_tunable_to_configspace_int(tunable_int: Tunable) -> None: """ Check the conversion of Tunable to UniformIntegerHyperparameter. """ - cs_param = _tunable_to_hyperparameter(tunable_int) - _cmp_tunable_hyperparameter_int(tunable_int, cs_param) + cs_param = _tunable_to_configspace(tunable_int) + _cmp_tunable_hyperparameter_numerical(tunable_int, cs_param) -def test_tunable_to_hyperparameter_float(tunable_float: Tunable) -> None: +def test_tunable_to_configspace_float(tunable_float: Tunable) -> None: """ Check the conversion of Tunable to UniformFloatHyperparameter. """ - cs_param = _tunable_to_hyperparameter(tunable_float) - _cmp_tunable_hyperparameter_float(tunable_float, cs_param) + cs_param = _tunable_to_configspace(tunable_float) + _cmp_tunable_hyperparameter_numerical(tunable_float, cs_param) _CMP_FUNC = { - "int": _cmp_tunable_hyperparameter_int, - "float": _cmp_tunable_hyperparameter_float, - "categorical": _cmp_tunable_hyperparameter_categorical + "int": _cmp_tunable_hyperparameter_numerical, + "float": _cmp_tunable_hyperparameter_numerical, + "categorical": _cmp_tunable_hyperparameter_categorical, } @@ -116,9 +130,7 @@ def test_tunable_groups_to_hyperparameters(tunable_groups: TunableGroups) -> Non """ space = tunable_groups_to_configspace(tunable_groups) for (tunable, _group) in tunable_groups: - cs_param = space[tunable.name] - assert cs_param.default_value == tunable.value - _CMP_FUNC[tunable.type](tunable, cs_param) + _CMP_FUNC[tunable.type](tunable, space) def test_tunable_groups_to_configspace( diff --git a/mlos_bench/mlos_bench/tests/tunables/tunables_assign_test.py b/mlos_bench/mlos_bench/tests/tunables/tunables_assign_test.py index 9ceae74df6..bdc90ba8ac 100644 --- a/mlos_bench/mlos_bench/tests/tunables/tunables_assign_test.py +++ b/mlos_bench/mlos_bench/tests/tunables/tunables_assign_test.py @@ -27,6 +27,15 @@ def test_tunables_assign_unknown_param(tunable_groups: TunableGroups) -> None: }) +def test_tunables_assign_categorical(tunable_categorical: Tunable) -> None: + """ + Regular assignment for categorical tunable. + """ + # Must be one of: {"Standard_B2s", "Standard_B2ms", "Standard_B4ms"} + tunable_categorical.value = "Standard_B4ms" + assert not tunable_categorical.is_special + + def test_tunables_assign_invalid_categorical(tunable_groups: TunableGroups) -> None: """ Check parameter validation for categorical tunables. @@ -80,6 +89,7 @@ def test_tunable_assign_int_to_numerical_value(tunable_int: Tunable) -> None: """ tunable_int.numerical_value = 10.0 assert tunable_int.numerical_value == 10 + assert not tunable_int.is_special def test_tunable_assign_float_to_numerical_value(tunable_float: Tunable) -> None: @@ -88,6 +98,7 @@ def test_tunable_assign_float_to_numerical_value(tunable_float: Tunable) -> None """ tunable_float.numerical_value = 0.1 assert tunable_float.numerical_value == 0.1 + assert not tunable_float.is_special def test_tunable_assign_str_to_int(tunable_int: Tunable) -> None: @@ -96,6 +107,7 @@ def test_tunable_assign_str_to_int(tunable_int: Tunable) -> None: """ tunable_int.value = "10" assert tunable_int.value == 10 # type: ignore[comparison-overlap] + assert not tunable_int.is_special def test_tunable_assign_str_to_float(tunable_float: Tunable) -> None: @@ -104,6 +116,7 @@ def test_tunable_assign_str_to_float(tunable_float: Tunable) -> None: """ tunable_float.value = "0.5" assert tunable_float.value == 0.5 # type: ignore[comparison-overlap] + assert not tunable_float.is_special def test_tunable_assign_float_to_int(tunable_int: Tunable) -> None: @@ -112,6 +125,7 @@ def test_tunable_assign_float_to_int(tunable_int: Tunable) -> None: """ tunable_int.value = 10.0 assert tunable_int.value == 10 + assert not tunable_int.is_special def test_tunable_assign_float_to_int_fail(tunable_int: Tunable) -> None: @@ -162,3 +176,40 @@ def test_tunable_assign_null_to_float(tunable_float: Tunable) -> None: tunable_float.value = None with pytest.raises(TypeError): tunable_float.numerical_value = None # type: ignore[assignment] + + +def test_tunable_assign_special(tunable_int: Tunable) -> None: + """ + Check the assignment of a special value outside of the range (but declared `special`). + """ + tunable_int.numerical_value = -1 + assert tunable_int.numerical_value == -1 + assert tunable_int.is_special + + +def test_tunable_assign_special_fail(tunable_int: Tunable) -> None: + """ + Assign a value that is neither special nor in range and fail. + """ + with pytest.raises(ValueError): + tunable_int.numerical_value = -2 + + +def test_tunable_assign_special_with_coercion(tunable_int: Tunable) -> None: + """ + Check the assignment of a special value outside of the range (but declared `special`). + Check coercion from float to int. + """ + tunable_int.numerical_value = -1.0 + assert tunable_int.numerical_value == -1 + assert tunable_int.is_special + + +def test_tunable_assign_special_with_coercion_str(tunable_int: Tunable) -> None: + """ + Check the assignment of a special value outside of the range (but declared `special`). + Check coercion from string to int. + """ + tunable_int.value = "-1" + assert tunable_int.numerical_value == -1 + assert tunable_int.is_special diff --git a/mlos_bench/mlos_bench/tests/tunables/tunables_str_test.py b/mlos_bench/mlos_bench/tests/tunables/tunables_str_test.py index 3482532629..672b16ab73 100644 --- a/mlos_bench/mlos_bench/tests/tunables/tunables_str_test.py +++ b/mlos_bench/mlos_bench/tests/tunables/tunables_str_test.py @@ -29,7 +29,7 @@ def test_tunable_groups_str(tunable_groups: TunableGroups) -> None: "kernel_sched_migration_cost_ns": { "type": "int", "default": -1, - "range": [-1, 500000], + "range": [0, 500000], "special": [-1] } } diff --git a/mlos_bench/mlos_bench/tunables/tunable.py b/mlos_bench/mlos_bench/tunables/tunable.py index b2f2e18919..8992e9d96b 100644 --- a/mlos_bench/mlos_bench/tunables/tunable.py +++ b/mlos_bench/mlos_bench/tunables/tunable.py @@ -32,7 +32,7 @@ class TunableDict(TypedDict, total=False): default: TunableValue values: Optional[List[Optional[str]]] range: Optional[Union[Sequence[int], Sequence[float]]] - special: Optional[Union[List[int], List[str]]] + special: Optional[Union[List[int], List[float]]] meta: Dict[str, Any] @@ -59,6 +59,8 @@ def __init__(self, name: str, config: TunableDict): config : dict Python dict that represents a Tunable (e.g., deserialized from JSON) """ + if '!' in name: # TODO: Use a regex here and in JSON schema + raise ValueError(f"Invalid name of the tunable: {name}") self._name = name self._type = config["type"] # required if self._type not in self._DTYPE: @@ -76,7 +78,7 @@ def __init__(self, name: str, config: TunableDict): assert len(config_range) == 2, f"Invalid range: {config_range}" config_range = (config_range[0], config_range[1]) self._range = config_range - self._special = config.get("special") + self._special: Union[List[int], List[float]] = config.get("special") or [] self._current_value = None self._sanity_check() self.value = self._default @@ -92,8 +94,8 @@ def _sanity_check(self) -> None: raise ValueError(f"Range must be None for the categorical type tunable {self}") if len(set(self._values)) != len(self._values): raise ValueError(f"Values must be unique for the categorical type tunable {self}") - if self._special is not None: - raise ValueError(f"Special values must be None for the categorical type tunable {self}") + if self._special: + raise ValueError(f"Categorical tunable cannot have special values: {self}") elif self.is_numerical: if self._values is not None: raise ValueError(f"Values must be None for the numerical type tunable {self}") @@ -272,13 +274,25 @@ def is_valid(self, value: TunableValue) -> bool: return value in self._values elif self.is_numerical and self._range: if isinstance(value, (int, float)): - # TODO: allow special values outside of range? - return bool(self._range[0] <= value <= self._range[1]) # or value == self._default + return self.in_range(value) or value in self._special else: raise ValueError(f"Invalid value type for tunable {self}: {value}={type(value)}") else: raise ValueError(f"Invalid parameter type: {self._type}") + def in_range(self, value: Union[int, float, str, None]) -> bool: + """ + Check if the value is within the range of the tunable. + Do *NOT* check for special values. + Return False if the tunable or value is categorical or None. + """ + return ( + isinstance(value, (float, int)) and + self.is_numerical and + self._range is not None and + bool(self._range[0] <= value <= self._range[1]) + ) + @property def category(self) -> Optional[str]: """ @@ -331,6 +345,30 @@ def name(self) -> str: """ return self._name + @property + def special(self) -> Union[List[int], List[float]]: + """ + Get the special values of the tunable. Return an empty list if there are none. + + Returns + ------- + special : [int] | [float] + A list of special values of the tunable. Can be empty. + """ + return self._special + + @property + def is_special(self) -> bool: + """ + Check if the current value of the tunable is special. + + Returns + ------- + is_special : bool + True if the current value of the tunable is special, False otherwise. + """ + return self.value in self._special + @property def type(self) -> str: """ diff --git a/mlos_core/_version.py b/mlos_core/_version.py index 069b397e53..08d17a0e68 100644 --- a/mlos_core/_version.py +++ b/mlos_core/_version.py @@ -7,4 +7,4 @@ """ # NOTE: This should be managed by bumpversion. -_VERSION = '0.3.0' +_VERSION = '0.3.1' diff --git a/mlos_core/mlos_core/__init__.py b/mlos_core/mlos_core/__init__.py index ba10a11d85..3d816eb916 100644 --- a/mlos_core/mlos_core/__init__.py +++ b/mlos_core/mlos_core/__init__.py @@ -5,22 +5,3 @@ """ Basic initializer module for the mlos_core package. """ - -import ConfigSpace -import pandas as pd - - -def config_to_dataframe(config: ConfigSpace.Configuration) -> pd.DataFrame: - """Converts a ConfigSpace config to a DataFrame - - Parameters - ---------- - config : ConfigSpace.Configuration - The config to convert. - - Returns - ------- - pd.DataFrame - A DataFrame with a single row, containing the config's parameters. - """ - return pd.DataFrame([dict(config)]) diff --git a/mlos_core/mlos_core/optimizers/flaml_optimizer.py b/mlos_core/mlos_core/optimizers/flaml_optimizer.py index 0744c2e08c..423c0558e2 100644 --- a/mlos_core/mlos_core/optimizers/flaml_optimizer.py +++ b/mlos_core/mlos_core/optimizers/flaml_optimizer.py @@ -13,6 +13,7 @@ import numpy as np import pandas as pd +from mlos_core.util import normalize_config from mlos_core.optimizers.optimizer import BaseOptimizer from mlos_core.spaces.adapters.adapter import BaseSpaceAdapter @@ -134,11 +135,11 @@ def _target_function(self, config: dict) -> Union[dict, None]: result: Union[dict, None] Dictionary with a single key, `score`, if config already evaluated; `None` otherwise. """ - cs_config: ConfigSpace.Configuration = ConfigSpace.Configuration(self.optimizer_parameter_space, values=config) + cs_config = normalize_config(self.optimizer_parameter_space, config) if cs_config in self.evaluated_samples: return {'score': self.evaluated_samples[cs_config].score} - self._suggested_config = config + self._suggested_config = dict(cs_config) # Cleaned-up version of the config return None # Returning None stops the process def _get_next_config(self) -> dict: @@ -164,8 +165,13 @@ def _get_next_config(self) -> dict: points_to_evaluate: list = [] evaluated_rewards: list = [] if len(self.evaluated_samples) > 0: - points_to_evaluate = [s.config for s in self.evaluated_samples.values()] - evaluated_rewards = [s.score for s in self.evaluated_samples.values()] + points_to_evaluate = [ + dict(normalize_config(self.optimizer_parameter_space, conf)) + for conf in self.evaluated_samples + ] + evaluated_rewards = [ + s.score for s in self.evaluated_samples.values() + ] # Warm start FLAML optimizer self._suggested_config = None @@ -174,8 +180,8 @@ def _get_next_config(self) -> dict: config=self.flaml_parameter_space, mode='min', metric='score', - points_to_evaluate=list(points_to_evaluate), - evaluated_rewards=list(evaluated_rewards), + points_to_evaluate=points_to_evaluate, + evaluated_rewards=evaluated_rewards, num_samples=len(points_to_evaluate) + 1, low_cost_partial_config=self.low_cost_partial_config, verbose=0, diff --git a/mlos_core/mlos_core/optimizers/optimizer.py b/mlos_core/mlos_core/optimizers/optimizer.py index aa8ba7f847..1d4e5762af 100644 --- a/mlos_core/mlos_core/optimizers/optimizer.py +++ b/mlos_core/mlos_core/optimizers/optimizer.py @@ -15,7 +15,7 @@ import numpy.typing as npt import pandas as pd -from mlos_core import config_to_dataframe +from mlos_core.util import config_to_dataframe from mlos_core.spaces.adapters.adapter import BaseSpaceAdapter @@ -132,12 +132,12 @@ def suggest(self, context: Optional[pd.DataFrame] = None, defaults: bool = False configuration = self._suggest(context) assert len(configuration) == 1, \ "Suggest must return a single configuration." - assert len(configuration.columns) == len(self.optimizer_parameter_space.values()), \ - "Suggest returned a configuration with the wrong number of parameters." + assert set(configuration.columns).issubset(set(self.optimizer_parameter_space)), \ + "Optimizer suggested a configuration that does not match the expected parameter space." if self._space_adapter: configuration = self._space_adapter.transform(configuration) - assert len(configuration.columns) == len(self.parameter_space.values()), \ - "Space adapter transformed configuration with the wrong number of parameters." + assert set(configuration.columns).issubset(set(self.parameter_space)), \ + "Space adapter produced a configuration that does not match the expected parameter space." return configuration @abstractmethod diff --git a/mlos_core/mlos_core/spaces/adapters/llamatune.py b/mlos_core/mlos_core/spaces/adapters/llamatune.py index 7eff790d29..a49e000a87 100644 --- a/mlos_core/mlos_core/spaces/adapters/llamatune.py +++ b/mlos_core/mlos_core/spaces/adapters/llamatune.py @@ -13,6 +13,8 @@ import numpy.typing as npt import pandas as pd from sklearn.preprocessing import MinMaxScaler + +from mlos_core.util import normalize_config from mlos_core.spaces.adapters.adapter import BaseSpaceAdapter @@ -108,9 +110,11 @@ def inverse_transform(self, configurations: pd.DataFrame) -> pd.DataFrame: if getattr(self, '_pinv_matrix', None) is None: self._try_generate_approx_inverse_mapping() + # Replace NaNs with zeros for inactive hyperparameters + config_vector = np.nan_to_num(configuration.get_array(), nan=0.0) # Perform approximate reverse mapping # NOTE: applying special value biasing is not possible - vector = self._config_scaler.inverse_transform([configuration.get_array()])[0] + vector = self._config_scaler.inverse_transform([config_vector])[0] target_config_vector = self._pinv_matrix.dot(vector) target_config = ConfigSpace.Configuration(self.target_parameter_space, vector=target_config_vector) @@ -127,12 +131,12 @@ def transform(self, configuration: pd.DataFrame) -> pd.DataFrame: target_configuration = ConfigSpace.Configuration(self.target_parameter_space, values=target_values_dict) orig_values_dict = self._transform(target_values_dict) - orig_configuration = ConfigSpace.Configuration(self.orig_parameter_space, values=orig_values_dict) + orig_configuration = normalize_config(self.orig_parameter_space, orig_values_dict) # Add to inverse dictionary -- needed for registering the performance later self._suggested_configs[orig_configuration] = target_configuration - return pd.DataFrame([orig_values_dict.values()], columns=list(self.orig_parameter_space.keys())) + return pd.DataFrame([list(orig_configuration.values())], columns=list(orig_configuration.keys())) def _construct_low_dim_space(self, num_low_dims: int, max_unique_values_per_param: Optional[int]) -> None: """Constructs the low-dimensional parameter (potentially discretized) search space. diff --git a/mlos_core/mlos_core/util.py b/mlos_core/mlos_core/util.py new file mode 100644 index 0000000000..8acb654adf --- /dev/null +++ b/mlos_core/mlos_core/util.py @@ -0,0 +1,56 @@ +# +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. +# +""" +Internal helper functions for mlos_core package. +""" + +from typing import Union + +from ConfigSpace import Configuration, ConfigurationSpace +import pandas as pd + + +def config_to_dataframe(config: Configuration) -> pd.DataFrame: + """Converts a ConfigSpace config to a DataFrame + + Parameters + ---------- + config : ConfigSpace.Configuration + The config to convert. + + Returns + ------- + pd.DataFrame + A DataFrame with a single row, containing the config's parameters. + """ + return pd.DataFrame([dict(config)]) + + +def normalize_config(config_space: ConfigurationSpace, config: Union[Configuration, dict]) -> Configuration: + """ + Convert a dictionary to a valid ConfigSpace configuration. + + Some optimizers and adapters ignore ConfigSpace conditionals when proposing new + configurations. We have to manually remove inactive hyperparameters such suggestions. + + Parameters + ---------- + config_space : ConfigurationSpace + The parameter space to use. + config : dict + The configuration to convert. + + Returns + ------- + cs_config: Configuration + A valid ConfigSpace configuration with inactive parameters removed. + """ + cs_config = Configuration(config_space, values=config, allow_inactive_with_values=True) + return Configuration( + config_space, values={ + key: cs_config[key] + for key in config_space.get_active_hyperparameters(cs_config) + } + )