Skip to content

Commit

Permalink
Support for special tunable values outside of the range (microsoft#617)
Browse files Browse the repository at this point in the history
Enables special values outside of the range (e.g., `-1` with a range of
`[0, 100]`).

To do we make use of "conditionals" in ConfigSpace to constrain the
space. This has a number of implementation implications, addressed
below:

* [x] Add support for special values to the `Tunable` class
* [x] Add unit tests for assigning special values outside of the range
to the `Tunable` objects
* [x] Add special values outside of the range to the unit tests for
`ConfigSpace` conversion
* [x] Implement proper `TunableGroups` to `ConfigSpace` conversion for
tunables with special values
* [x] Update `mlos_core` optimizers to support conditionals and special
values in `ConfigSpace`
* [x] Add more unit tests to check the conversion
* [x] Make LlamaTune adapter support conditionals in `ConfigSpace`

---------

Co-authored-by: Brian Kroth <bpkroth@users.noreply.github.com>
  • Loading branch information
motus and bpkroth authored Jan 17, 2024
1 parent 0678c43 commit 5daaa26
Show file tree
Hide file tree
Showing 23 changed files with 462 additions and 148 deletions.
2 changes: 1 addition & 1 deletion .bumpversion.cfg
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
[bumpversion]
current_version = 0.3.0
current_version = 0.3.1
commit = True
tag = True

Expand Down
2 changes: 1 addition & 1 deletion doc/source/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@
author = 'GSL'

# The full version, including alpha/beta/rc tags
release = '0.3.0'
release = '0.3.1'

try:
from setuptools_scm import get_version
Expand Down
2 changes: 1 addition & 1 deletion mlos_bench/_version.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,4 +7,4 @@
"""

# NOTE: This should be managed by bumpversion.
_VERSION = '0.3.0'
_VERSION = '0.3.1'
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
"type": "int",
"meta": {"name_prefix": "/proc/sys/kernel/"},
"default": 500000,
"range": [-1, 1000000],
"range": [0, 1000000],
"special": [-1]
},
"sched_latency_ns": {
Expand Down
198 changes: 168 additions & 30 deletions mlos_bench/mlos_bench/optimizers/convert_configspace.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,24 +8,38 @@

import logging

from typing import Optional

from ConfigSpace.hyperparameters import Hyperparameter
from ConfigSpace import UniformIntegerHyperparameter
from ConfigSpace import UniformFloatHyperparameter
from ConfigSpace import CategoricalHyperparameter
from ConfigSpace import ConfigurationSpace, Configuration

from mlos_bench.tunables.tunable import Tunable
from typing import Dict, Optional, Tuple

from ConfigSpace import (
CategoricalHyperparameter,
Configuration,
ConfigurationSpace,
EqualsCondition,
UniformFloatHyperparameter,
UniformIntegerHyperparameter,
)
from mlos_bench.tunables.tunable import Tunable, TunableValue
from mlos_bench.tunables.tunable_groups import TunableGroups

_LOG = logging.getLogger(__name__)


def _tunable_to_hyperparameter(
tunable: Tunable, group_name: Optional[str] = None, cost: int = 0) -> Hyperparameter:
class TunableValueKind:
"""
Enum for the kind of the tunable value (special or not).
It is not a true enum because ConfigSpace wants string values.
"""
Convert a single Tunable to an equivalent ConfigSpace Hyperparameter object.

SPECIAL = "special"
RANGE = "range"


def _tunable_to_configspace(
tunable: Tunable, group_name: Optional[str] = None, cost: int = 0) -> ConfigurationSpace:
"""
Convert a single Tunable to an equivalent set of ConfigSpace Hyperparameter objects,
wrapped in a ConfigurationSpace for composability.
Note: this may be more than one Hyperparameter in the case of special value handling.
Parameters
----------
Expand All @@ -38,25 +52,56 @@ def _tunable_to_hyperparameter(
Returns
-------
hyperparameter : Hyperparameter
A ConfigSpace Hyperparameter object that corresponds to the Tunable.
cs : ConfigurationSpace
A ConfigurationSpace object that corresponds to the Tunable.
"""
meta = {"group": group_name, "cost": cost} # {"lower": "", "upper": "", "scaling": ""}
meta = {"group": group_name, "cost": cost} # {"scaling": ""}

if tunable.type == "categorical":
return CategoricalHyperparameter(
tunable.name, choices=tunable.categories,
default_value=tunable.default, meta=meta)
elif tunable.type == "int":
return UniformIntegerHyperparameter(
tunable.name, lower=tunable.range[0], upper=tunable.range[1],
default_value=tunable.default, meta=meta)
return ConfigurationSpace({
tunable.name: CategoricalHyperparameter(
name=tunable.name, choices=tunable.categories,
default_value=tunable.default, meta=meta)
})

if tunable.type == "int":
hp_type = UniformIntegerHyperparameter
elif tunable.type == "float":
return UniformFloatHyperparameter(
tunable.name, lower=tunable.range[0], upper=tunable.range[1],
default_value=tunable.default, meta=meta)
hp_type = UniformFloatHyperparameter
else:
raise TypeError(f"Undefined Parameter Type: {tunable.type}")

if not tunable.special:
return ConfigurationSpace({
tunable.name: hp_type(
name=tunable.name, lower=tunable.range[0], upper=tunable.range[1],
default_value=tunable.default if tunable.in_range(tunable.default) else None,
meta=meta)
})

# Create three hyperparameters: one for regular values,
# one for special values, and one to choose between the two.
(special_name, type_name) = special_param_names(tunable.name)
cs = ConfigurationSpace({
tunable.name: hp_type(
name=tunable.name, lower=tunable.range[0], upper=tunable.range[1],
default_value=tunable.default if tunable.in_range(tunable.default) else None,
meta=meta),
special_name: CategoricalHyperparameter(
name=special_name, choices=tunable.special,
default_value=tunable.default if tunable.default in tunable.special else None,
meta=meta),
type_name: CategoricalHyperparameter(
name=type_name,
choices=[TunableValueKind.SPECIAL, TunableValueKind.RANGE],
default_value=TunableValueKind.SPECIAL,
weights=[0.5, 0.5]), # TODO: Make weights configurable; FLAML requires uniform weights.
})
cs.add_condition(EqualsCondition(cs[special_name], cs[type_name], TunableValueKind.SPECIAL))
cs.add_condition(EqualsCondition(cs[tunable.name], cs[type_name], TunableValueKind.RANGE))

return cs


def tunable_groups_to_configspace(tunables: TunableGroups, seed: Optional[int] = None) -> ConfigurationSpace:
"""
Expand All @@ -76,10 +121,11 @@ def tunable_groups_to_configspace(tunables: TunableGroups, seed: Optional[int] =
A new ConfigurationSpace instance that corresponds to the input TunableGroups.
"""
space = ConfigurationSpace(seed=seed)
space.add_hyperparameters([
_tunable_to_hyperparameter(tunable, group.name, group.get_current_cost())
for (tunable, group) in tunables
])
for (tunable, group) in tunables:
space.add_configuration_space(
prefix="", delimiter="",
configuration_space=_tunable_to_configspace(
tunable, group.name, group.get_current_cost()))
return space


Expand All @@ -97,5 +143,97 @@ def tunable_values_to_configuration(tunables: TunableGroups) -> Configuration:
Configuration
A ConfigSpace Configuration.
"""
values: Dict[str, TunableValue] = {}
for (tunable, _group) in tunables:
if tunable.special:
(special_name, type_name) = special_param_names(tunable.name)
if tunable.value in tunable.special:
values[type_name] = TunableValueKind.SPECIAL
values[special_name] = tunable.value
else:
values[type_name] = TunableValueKind.RANGE
values[tunable.name] = tunable.value
else:
values[tunable.name] = tunable.value
configspace = tunable_groups_to_configspace(tunables)
return Configuration(configspace, values={tunable.name: tunable.value for (tunable, _group) in tunables})
return Configuration(configspace, values=values)


def configspace_data_to_tunable_values(data: dict) -> dict:
"""
Remove the fields that correspond to special values in ConfigSpace.
In particular, remove and keys suffixes added by `special_param_names`.
"""
data = data.copy()
specials = [
special_param_name_strip(k)
for k in data.keys() if special_param_name_is_temp(k)
]
for k in specials:
(special_name, type_name) = special_param_names(k)
if data[type_name] == TunableValueKind.SPECIAL:
data[k] = data[special_name]
if special_name in data:
del data[special_name]
del data[type_name]
return data


def special_param_names(name: str) -> Tuple[str, str]:
"""
Generate the names of the auxiliary hyperparameters that correspond
to a tunable that can have special values.
NOTE: `!` characters are currently disallowed in Tunable names in order handle this logic.
Parameters
----------
name : str
The name of the tunable parameter.
Returns
-------
special_name : str
The name of the hyperparameter that corresponds to the special value.
type_name : str
The name of the hyperparameter that chooses between the regular and the special values.
"""
return (name + "!special", name + "!type")


def special_param_name_is_temp(name: str) -> bool:
"""
Check if name corresponds to a temporary ConfigSpace parameter.
NOTE: `!` characters are currently disallowed in Tunable names in order handle this logic.
Parameters
----------
name : str
The name of the hyperparameter.
Returns
-------
is_special : bool
True if the name corresponds to a temporary ConfigSpace hyperparameter.
"""
return name.endswith("!type")


def special_param_name_strip(name: str) -> str:
"""
Remove the temporary suffix from a special parameter name.
NOTE: `!` characters are currently disallowed in Tunable names in order handle this logic.
Parameters
----------
name : str
The name of the hyperparameter.
Returns
-------
stripped_name : str
The name of the hyperparameter without the temporary suffix.
"""
return name.split("!", 1)[0]
43 changes: 31 additions & 12 deletions mlos_bench/mlos_bench/optimizers/mlos_core_optimizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
import os

from types import TracebackType
from typing import Optional, Sequence, Tuple, Type, Union
from typing import Dict, Optional, Sequence, Tuple, Type, Union
from typing_extensions import Literal

import pandas as pd
Expand All @@ -20,10 +20,17 @@
)

from mlos_bench.environments.status import Status
from mlos_bench.services.base_service import Service
from mlos_bench.tunables.tunable import TunableValue
from mlos_bench.tunables.tunable_groups import TunableGroups
from mlos_bench.optimizers.base_optimizer import Optimizer
from mlos_bench.optimizers.convert_configspace import tunable_groups_to_configspace
from mlos_bench.services.base_service import Service

from mlos_bench.optimizers.convert_configspace import (
TunableValueKind,
configspace_data_to_tunable_values,
special_param_names,
tunable_groups_to_configspace,
)

_LOG = logging.getLogger(__name__)

Expand Down Expand Up @@ -103,16 +110,13 @@ def bulk_register(self, configs: Sequence[dict], scores: Sequence[Optional[float
df_status_completed = df_status.apply(Status.is_completed)
df_configs = df_configs[df_status_completed]
df_scores = df_scores[df_status_completed]
# External data can have incorrect types (e.g., all strings).
for (tunable, _group) in self._tunables:
df_configs[tunable.name] = df_configs[tunable.name].astype(tunable.dtype)
self._opt.register(df_configs, df_scores)
if _LOG.isEnabledFor(logging.DEBUG):
(score, _) = self.get_best_observation()
_LOG.debug("Warm-up end: %s = %s", self.target, score)
return True

def _to_df(self, configs: Sequence[dict]) -> pd.DataFrame:
def _to_df(self, configs: Sequence[Dict[str, TunableValue]]) -> pd.DataFrame:
"""
Select from past trials only the columns required in this experiment and
impute default values for the tunables that are missing in the dataframe.
Expand All @@ -128,13 +132,28 @@ def _to_df(self, configs: Sequence[dict]) -> pd.DataFrame:
A dataframe with past trials data, with missing values imputed.
"""
df_configs = pd.DataFrame(configs)
tunables_names = self._tunables.get_param_values().keys()
tunables_names = list(self._tunables.get_param_values().keys())
missing_cols = set(tunables_names).difference(df_configs.columns)
for (tunable, _group) in self._tunables:
if tunable.name in missing_cols:
df_configs[tunable.name] = tunable.default
else:
df_configs[tunable.name].fillna(tunable.default, inplace=True)
# External data can have incorrect types (e.g., all strings).
df_configs[tunable.name] = df_configs[tunable.name].astype(tunable.dtype)
# Add columns for tunables with special values.
if tunable.special:
(special_name, type_name) = special_param_names(tunable.name)
tunables_names += [special_name, type_name]
is_special = df_configs[tunable.name].apply(tunable.special.__contains__)
df_configs[type_name] = TunableValueKind.RANGE
df_configs.loc[is_special, type_name] = TunableValueKind.SPECIAL
if tunable.type == "int":
# Make int column NULLABLE:
df_configs[tunable.name] = df_configs[tunable.name].astype("Int64")
df_configs[special_name] = df_configs[tunable.name]
df_configs.loc[~is_special, special_name] = None
df_configs.loc[is_special, tunable.name] = None
# By default, hyperparameters in ConfigurationSpace are sorted by name:
df_configs = df_configs[sorted(tunables_names)]
_LOG.debug("Loaded configs:\n%s", df_configs)
Expand All @@ -146,14 +165,14 @@ def suggest(self) -> TunableGroups:
df_config = self._opt.suggest(defaults=self._start_with_defaults)
self._start_with_defaults = False
_LOG.info("Iteration %d :: Suggest:\n%s", self._iter, df_config)
return self._tunables.copy().assign(df_config.loc[0].to_dict())
return self._tunables.copy().assign(
configspace_data_to_tunable_values(df_config.loc[0].to_dict()))

def register(self, tunables: TunableGroups, status: Status,
score: Optional[Union[float, dict]] = None) -> Optional[float]:
score = super().register(tunables, status, score) # With _opt_sign applied
if status.is_completed():
# By default, hyperparameters in ConfigurationSpace are sorted by name:
df_config = pd.DataFrame(dict(sorted(tunables.get_param_values().items())), index=[0])
df_config = self._to_df([tunables.get_param_values()])
_LOG.debug("Score: %s Dataframe:\n%s", score, df_config)
self._opt.register(df_config, pd.Series([score], dtype=float))
self._iter += 1
Expand All @@ -163,7 +182,7 @@ def get_best_observation(self) -> Union[Tuple[float, TunableGroups], Tuple[None,
df_config = self._opt.get_best_observation()
if len(df_config) == 0:
return (None, None)
params = df_config.iloc[0].to_dict()
params = configspace_data_to_tunable_values(df_config.iloc[0].to_dict())
_LOG.debug("Best observation: %s", params)
score = params.pop("score") * self._opt_sign # mlos_core always uses the `score` column
return (score, self._tunables.copy().assign(params))
4 changes: 2 additions & 2 deletions mlos_bench/mlos_bench/tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,8 +59,8 @@
"description": "Cost of migrating the thread to another core",
"type": "int",
"default": -1,
"range": [-1, 500000],
"special": [-1]
"range": [0, 500000],
"special": [-1, 0]
},
"kernel_sched_latency_ns": {
"description": "Initial value for the scheduler period",
Expand Down
4 changes: 2 additions & 2 deletions mlos_bench/mlos_bench/tests/launcher_run_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,7 @@ def test_launch_main_app_opt(root_path: str, local_exec_service: LocalExecServic
[
# Iteration 1: Expect first value to be the baseline
f"^{_RE_DATE} mlos_core_optimizer\\.py:\\d+ " +
r"register DEBUG Score: 65\.67\d+ Dataframe:\s*$",
r"register DEBUG Score: 64\.88\d+ Dataframe:\s*$",
# Iteration 2: The result may not always be deterministic
f"^{_RE_DATE} mlos_core_optimizer\\.py:\\d+ " +
r"register DEBUG Score: \d+\.\d+ Dataframe:\s*$",
Expand All @@ -106,6 +106,6 @@ def test_launch_main_app_opt(root_path: str, local_exec_service: LocalExecServic
r"register DEBUG Score: \d+\.\d+ Dataframe:\s*$",
# Final result: baseline is the optimum for the mock environment
f"^{_RE_DATE} run\\.py:\\d+ " +
r"_optimize INFO Env: Mock environment best score: 65\.67\d+\s*$",
r"_optimize INFO Env: Mock environment best score: 64\.88\d+\s*$",
]
)
Loading

0 comments on commit 5daaa26

Please sign in to comment.