Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Allow loading configs from JSON strings and test with new docstrings for tunables #890

Merged
merged 31 commits into from
Dec 6, 2024
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
Show all changes
31 commits
Select commit Hold shift + click to select a range
4b868d7
basic stub for allowing loading configs as json strings for docstring…
bpkroth Dec 4, 2024
3a9ad14
docstring fixups
bpkroth Dec 5, 2024
66085f5
add a todo comment
bpkroth Dec 5, 2024
6ac6949
add docstrings about tunables
bpkroth Dec 5, 2024
92769e3
formatting
bpkroth Dec 5, 2024
10449af
fixup
bpkroth Dec 5, 2024
11ccd5c
add an explicit unit test
bpkroth Dec 5, 2024
5bf9209
format
bpkroth Dec 5, 2024
cb15986
unused import
bpkroth Dec 5, 2024
b5fd716
tweaks
bpkroth Dec 5, 2024
e9e2159
Merge branch 'main' into testable-docstring-configs
bpkroth Dec 5, 2024
eccac69
add more back references
bpkroth Dec 5, 2024
61c27c7
tweaks
bpkroth Dec 5, 2024
4321512
fixup
bpkroth Dec 5, 2024
5e10e3c
format
bpkroth Dec 5, 2024
979fc53
comments
bpkroth Dec 5, 2024
2bd3873
format
bpkroth Dec 5, 2024
89a3ed7
spelling
bpkroth Dec 6, 2024
c795040
add better error handling output and tests for it
bpkroth Dec 6, 2024
bf9abdd
fix docstring
bpkroth Dec 6, 2024
e5d896b
tweaks
bpkroth Dec 6, 2024
7119551
comments
bpkroth Dec 6, 2024
6ec1f16
Also add storage config testing
bpkroth Dec 6, 2024
fd880eb
show an example with a tunable config too
bpkroth Dec 6, 2024
22b1e70
include example of fetching the config data too
bpkroth Dec 6, 2024
f312956
docstring
bpkroth Dec 6, 2024
6f2d113
make sure the provided input is a string, and not an array
bpkroth Dec 6, 2024
52e6d4a
format
bpkroth Dec 6, 2024
60ac3b4
back references
bpkroth Dec 6, 2024
f379063
fixup
bpkroth Dec 6, 2024
8c46888
pylint
bpkroth Dec 6, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions mlos_bench/mlos_bench/environments/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,12 @@
- See `mlos_bench/config/environments/README.md
<https://github.com/microsoft/MLOS/tree/main/mlos_bench/mlos_bench/config/environments/>`_
for additional config examples in the source tree.

See Also
--------
:py:mod:`mlos_bench.config`
:py:mod:`mlos_bench.services`
:py:mod:`mlos_bench.tunables`
bpkroth marked this conversation as resolved.
Show resolved Hide resolved
"""

from mlos_bench.environments.base_environment import Environment
Expand Down
1 change: 1 addition & 0 deletions mlos_bench/mlos_bench/environments/base_environment.py
Original file line number Diff line number Diff line change
Expand Up @@ -149,6 +149,7 @@ def __init__( # pylint: disable=too-many-arguments
)
tunables = TunableGroups()

# TODO: add user docstrings for these in the module
groups = self._expand_groups(
config.get("tunable_params", []),
(global_config or {}).get("tunable_params_map", {}),
Expand Down
133 changes: 74 additions & 59 deletions mlos_bench/mlos_bench/services/config_persistence.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,11 @@
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT License.
#
"""Helper functions to load, instantiate, and serialize Python objects that encapsulate
benchmark environments, tunable parameters, and service functions.
"""Helper functions to load, instantiate, and serialize Python objects that
encapsulate a benchmark :py:class:`.Environment`, :py:mod:`~mlos_bench.tunables`,
:py:class:`.Service` functions, etc. from JSON configuration files and strings.
"""

import json # For logging only
import logging
import os
import sys
Expand Down Expand Up @@ -127,15 +127,17 @@ def config_paths(self) -> List[str]:

def resolve_path(self, file_path: str, extra_paths: Optional[Iterable[str]] = None) -> str:
"""
Prepend the suitable `_config_path` to `path` if the latter is not absolute. If
`_config_path` is `None` or `path` is absolute, return `path` as is.
Resolves and prepends the suitable :py:attr:`.config_paths` to ``file_path``
if the latter is not absolute. If :py:attr:`.config_paths` is ``None`` or
``file_path`` is absolute, return ``file_path`` as is.

Parameters
----------
file_path : str
Path to the input config file.
extra_paths : Iterable[str]
Additional directories to prepend to the list of search paths.
Additional directories to prepend to the list of
:py:attr:`.config_paths` search paths.

Returns
-------
Expand All @@ -157,17 +159,19 @@ def resolve_path(self, file_path: str, extra_paths: Optional[Iterable[str]] = No

def load_config(
self,
json_file_name: str,
json: str,
schema_type: Optional[ConfigSchema],
) -> Dict[str, Any]:
"""
Load JSON config file. Search for a file relative to `_config_path` if the input
path is not absolute. This method is exported to be used as a service.
Load JSON config file or JSON string.
Search for a file relative to :py:attr:`.config_paths` if the input path is
not absolute. This method is exported to be used as a
:py:class:`.SupportsConfigLoading` type :py:class:`.Service`.

Parameters
----------
json_file_name : str
Path to the input config file.
json : str
Path to the input config file or a JSON string.
schema_type : Optional[ConfigSchema]
The schema type to validate the config against.

Expand All @@ -176,22 +180,28 @@ def load_config(
config : Union[dict, List[dict]]
Free-format dictionary that contains the configuration.
"""
json_file_name = self.resolve_path(json_file_name)
_LOG.info("Load config: %s", json_file_name)
with open(json_file_name, mode="r", encoding="utf-8") as fh_json:
config = json5.load(fh_json)
if any(c in json for c in ("{", "[")):
# If the path contains braces, it is likely already a json string,
# so just parse it.
_LOG.info("Load config from json string: %s", json)
config: Any = json5.loads(json)
else:
json = self.resolve_path(json)
_LOG.info("Load config file: %s", json)
with open(json, mode="r", encoding="utf-8") as fh_json:
config = json5.load(fh_json)
bpkroth marked this conversation as resolved.
Show resolved Hide resolved
bpkroth marked this conversation as resolved.
Show resolved Hide resolved
if schema_type is not None:
try:
schema_type.validate(config)
except (ValidationError, SchemaError) as ex:
_LOG.error(
"Failed to validate config %s against schema type %s at %s",
json_file_name,
json,
schema_type.name,
schema_type.value,
)
raise ValueError(
f"Failed to validate config {json_file_name} against "
f"Failed to validate config {json} against "
f"schema type {schema_type.name} at {schema_type.value}"
) from ex
if isinstance(config, dict) and config.get("$schema"):
Expand All @@ -203,7 +213,7 @@ def load_config(
# (e.g. Azure ARM templates).
del config["$schema"]
else:
_LOG.warning("Config %s is not validated against a schema.", json_file_name)
_LOG.warning("Config %s is not validated against a schema.", json)
return config # type: ignore[no-any-return]

def prepare_class_load(
Expand Down Expand Up @@ -256,7 +266,7 @@ def prepare_class_load(
_LOG.debug(
"Instantiating: %s with config:\n%s",
class_name,
json.dumps(class_config, indent=2),
json5.dumps(class_config, indent=2),
)

return (class_name, class_config)
Expand All @@ -270,10 +280,8 @@ def build_optimizer(
global_config: Optional[Dict[str, Any]] = None,
) -> Optimizer:
"""
Instantiation of mlos_bench Optimizer that depend on Service and TunableGroups.

A class *MUST* have a constructor that takes four named arguments:
(tunables, config, global_config, service)
Instantiation of :py:mod:`mlos_bench` :py:class:`.Optimizer` that depend on
:py:class:`.Service` and :py:class:`.TunableGroups`.

Parameters
----------
Expand All @@ -294,7 +302,7 @@ def build_optimizer(
"""
tunables_path = config.get("include_tunables")
if tunables_path is not None:
tunables = self._load_tunables(tunables_path, tunables)
tunables = self.load_tunables(tunables_path, tunables)
(class_name, class_config) = self.prepare_class_load(config, global_config)
inst = instantiate_from_config(
Optimizer, # type: ignore[type-abstract]
Expand All @@ -315,7 +323,7 @@ def build_storage(
global_config: Optional[Dict[str, Any]] = None,
) -> "Storage":
"""
Instantiation of mlos_bench Storage objects.
Instantiation of mlos_bench :py:class:`.Storage` objects.

Parameters
----------
Expand Down Expand Up @@ -356,7 +364,7 @@ def build_scheduler( # pylint: disable=too-many-arguments
root_env_config: str,
) -> "Scheduler":
"""
Instantiation of mlos_bench Scheduler.
Instantiation of mlos_bench :py:class:`.Scheduler`.

Parameters
----------
Expand Down Expand Up @@ -405,7 +413,7 @@ def build_environment(
) -> Environment:
# pylint: disable=too-many-arguments,too-many-positional-arguments
"""
Factory method for a new environment with a given config.
Factory method for a new :py:class:`.Environment` with a given config.

Parameters
----------
Expand All @@ -429,7 +437,7 @@ def build_environment(
Returns
-------
env : Environment
An instance of the `Environment` class initialized with `config`.
An instance of the ``Environment`` class initialized with ``config``.
"""
env_name = config["name"]
(env_class, env_config) = self.prepare_class_load(config, global_config, parent_args)
Expand All @@ -440,7 +448,7 @@ def build_environment(

env_tunables_path = config.get("include_tunables")
if env_tunables_path is not None:
tunables = self._load_tunables(env_tunables_path, tunables)
tunables = self.load_tunables(env_tunables_path, tunables)

_LOG.debug("Creating env: %s :: %s", env_name, env_class)
env = Environment.new(
Expand Down Expand Up @@ -552,7 +560,7 @@ def build_service(
services from the list plus the parent mix-in.
"""
if _LOG.isEnabledFor(logging.DEBUG):
_LOG.debug("Build service from config:\n%s", json.dumps(config, indent=2))
_LOG.debug("Build service from config:\n%s", json5.dumps(config, indent=2))

assert isinstance(config, dict)
config_list: List[Dict[str, Any]]
Expand All @@ -569,20 +577,20 @@ def build_service(

def load_environment(
self,
json_file_name: str,
json: str,
tunables: TunableGroups,
global_config: Optional[Dict[str, Any]] = None,
parent_args: Optional[Dict[str, TunableValue]] = None,
service: Optional[Service] = None,
) -> Environment:
# pylint: disable=too-many-arguments,too-many-positional-arguments
"""
Load and build new environment from the config file.
Load and build new :py:class:`.Environment` from the config file or JSON string.

Parameters
----------
json_file_name : str
The environment JSON configuration file.
json : str
The environment JSON configuration file or JSON string.
tunables : TunableGroups
A (possibly empty) collection of tunables to add to the environment.
global_config : dict
Expand All @@ -598,26 +606,26 @@ def load_environment(
env : Environment
A new benchmarking environment.
"""
config = self.load_config(json_file_name, ConfigSchema.ENVIRONMENT)
config = self.load_config(json, ConfigSchema.ENVIRONMENT)
assert isinstance(config, dict)
return self.build_environment(config, tunables, global_config, parent_args, service)

def load_environment_list(
self,
json_file_name: str,
json: str,
tunables: TunableGroups,
global_config: Optional[Dict[str, Any]] = None,
parent_args: Optional[Dict[str, TunableValue]] = None,
service: Optional[Service] = None,
) -> List[Environment]:
# pylint: disable=too-many-arguments,too-many-positional-arguments
"""
Load and build a list of environments from the config file.
Load and build a list of Environments from the config file or JSON string.

Parameters
----------
json_file_name : str
The environment JSON configuration file.
json : str
The environment JSON configuration file or a JSON string.
Can contain either one environment or a list of environments.
tunables : TunableGroups
An (possibly empty) collection of tunables to add to the environment.
Expand All @@ -634,23 +642,28 @@ def load_environment_list(
env : List[Environment]
A list of new benchmarking environments.
"""
config = self.load_config(json_file_name, ConfigSchema.ENVIRONMENT)
config = self.load_config(json, ConfigSchema.ENVIRONMENT)
return [self.build_environment(config, tunables, global_config, parent_args, service)]

def load_services(
self,
json_file_names: Iterable[str],
jsons: Iterable[str],
global_config: Optional[Dict[str, Any]] = None,
parent: Optional[Service] = None,
) -> Service:
"""
Read the configuration files and bundle all service methods from those configs
into a single Service object.
Read the configuration files or JSON strings and bundle all Service methods
from those configs into a single Service object.

Notes
-----
Order of the services in the list matters. If multiple Services export the
same method, the last one in the list will be used.

Parameters
----------
json_file_names : list of str
A list of service JSON configuration files.
jsons : list of str
A list of service JSON configuration files or JSON strings.
global_config : dict
Global parameters to add to the service config.
parent : Service
Expand All @@ -661,42 +674,44 @@ def load_services(
service : Service
A collection of service methods.
"""
_LOG.info("Load services: %s parent: %s", json_file_names, parent.__class__.__name__)
_LOG.info("Load services: %s parent: %s", jsons, parent.__class__.__name__)
service = Service({}, global_config, parent)
for fname in json_file_names:
config = self.load_config(fname, ConfigSchema.SERVICE)
for json in jsons:
config = self.load_config(json, ConfigSchema.SERVICE)
service.register(self.build_service(config, global_config, service).export())
return service

def _load_tunables(
def load_tunables(
bpkroth marked this conversation as resolved.
Show resolved Hide resolved
self,
json_file_names: Iterable[str],
parent: TunableGroups,
jsons: Iterable[str],
parent: Optional[TunableGroups] = None,
) -> TunableGroups:
"""
Load a collection of tunable parameters from JSON files into the parent
TunableGroup.
Load a collection of tunable parameters from JSON files or strings into the
parent TunableGroup.

This helps allow standalone environment configs to reference
overlapping tunable groups configs but still allow combining them into
a single instance that each environment can reference.

Parameters
----------
json_file_names : list of str
A list of JSON files to load.
jsons : list of str
A list of JSON files or JSON strings to load.
parent : TunableGroups
A (possibly empty) collection of tunables to add to the new collection.

Returns
-------
tunables : TunableGroup
tunables : TunableGroups
The larger collection of tunable parameters.
"""
_LOG.info("Load tunables: '%s'", json_file_names)
_LOG.info("Load tunables: '%s'", jsons)
if parent is None:
parent = TunableGroups()
tunables = parent.copy()
for fname in json_file_names:
config = self.load_config(fname, ConfigSchema.TUNABLE_PARAMS)
for json in jsons:
config = self.load_config(json, ConfigSchema.TUNABLE_PARAMS)
assert isinstance(config, dict)
tunables.merge(TunableGroups(config))
return tunables
Loading
Loading