Skip to content

Commit

Permalink
Fixups and testing for cli config file parsing (#722)
Browse files Browse the repository at this point in the history
Further fixups to #717

Some parameters were not being respected from `--config
test-cli-config.jsonc` files.

Split out from #720
  • Loading branch information
bpkroth authored Jul 23, 2024
1 parent 6fe46ca commit 7dce3d1
Show file tree
Hide file tree
Showing 6 changed files with 215 additions and 80 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@

"config": {
"trial_config_repeat_count": 3,
"max_trials": -1, // Limited only in hte Optimizer logic/config.
"max_trials": -1, // Limited only in the Optimizer logic/config.
"teardown": false
}
}
114 changes: 79 additions & 35 deletions mlos_bench/mlos_bench/launcher.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ class Launcher:

def __init__(self, description: str, long_text: str = "", argv: Optional[List[str]] = None):
# pylint: disable=too-many-statements
# pylint: disable=too-many-locals
_LOG.info("Launch: %s", description)
epilog = """
Additional --key=value pairs can be specified to augment or override
Expand All @@ -56,7 +57,7 @@ def __init__(self, description: str, long_text: str = "", argv: Optional[List[st
<https://github.com/microsoft/MLOS/tree/main/mlos_bench/>
"""
parser = argparse.ArgumentParser(description=f"{description} : {long_text}", epilog=epilog)
(args, args_rest) = self._parse_args(parser, argv)
(args, path_args, args_rest) = self._parse_args(parser, argv)

# Bootstrap config loader: command line takes priority.
config_path = args.config_path or []
Expand Down Expand Up @@ -87,11 +88,25 @@ def __init__(self, description: str, long_text: str = "", argv: Optional[List[st

self._parent_service: Service = LocalExecService(parent=self._config_loader)

# Prepare global_config from a combination of global config files, cli
# configs, and cli args.
args_dict = vars(args)
# teardown (bool) conflicts with Environment configs that use it for shell
# commands (list), so we exclude it from copying over
excluded_cli_args = path_args + ["teardown"]
# Include (almost) any item from the cli config file that either isn't in
# the cli args at all or whose cli arg is missing.
cli_config_args = {
key: val
for (key, val) in config.items()
if (args_dict.get(key) is None) and key not in excluded_cli_args
}

self.global_config = self._load_config(
config.get("globals", []) + (args.globals or []),
(args.config_path or []) + config.get("config_path", []),
args_rest,
{key: val for (key, val) in config.items() if key not in vars(args)},
args_globals=config.get("globals", []) + (args.globals or []),
config_path=(args.config_path or []) + config.get("config_path", []),
args_rest=args_rest,
global_config=cli_config_args,
)
# experiment_id is generally taken from --globals files, but we also allow
# overriding it on the CLI.
Expand Down Expand Up @@ -168,19 +183,35 @@ def service(self) -> Service:
def _parse_args(
parser: argparse.ArgumentParser,
argv: Optional[List[str]],
) -> Tuple[argparse.Namespace, List[str]]:
) -> Tuple[argparse.Namespace, List[str], List[str]]:
"""Parse the command line arguments."""
parser.add_argument(

class PathArgsTracker:
"""Simple class to help track which arguments are paths."""

def __init__(self, parser: argparse.ArgumentParser):
self._parser = parser
self.path_args: List[str] = []

def add_argument(self, *args: Any, **kwargs: Any) -> None:
"""Add an argument to the parser and track its destination."""
self.path_args.append(self._parser.add_argument(*args, **kwargs).dest)

path_args_tracker = PathArgsTracker(parser)

path_args_tracker.add_argument(
"--config",
required=False,
help="Main JSON5 configuration file. Its keys are the same as the"
+ " command line options and can be overridden by the latter.\n"
+ "\n"
+ " See the `mlos_bench/config/` tree at https://github.com/microsoft/MLOS/ "
+ " for additional config examples for this and other arguments.",
help=(
"Main JSON5 configuration file. Its keys are the same as the "
"command line options and can be overridden by the latter.\n"
"\n"
"See the `mlos_bench/config/` tree at https://github.com/microsoft/MLOS/ "
"for additional config examples for this and other arguments."
),
)

parser.add_argument(
path_args_tracker.add_argument(
"--log_file",
"--log-file",
required=False,
Expand All @@ -192,11 +223,13 @@ def _parse_args(
"--log-level",
required=False,
type=str,
help=f"Logging level. Default is {logging.getLevelName(_LOG_LEVEL)}."
+ " Set to DEBUG for debug, WARNING for warnings only.",
help=(
f"Logging level. Default is {logging.getLevelName(_LOG_LEVEL)}. "
"Set to DEBUG for debug, WARNING for warnings only."
),
)

parser.add_argument(
path_args_tracker.add_argument(
"--config_path",
"--config-path",
"--config-paths",
Expand All @@ -207,7 +240,7 @@ def _parse_args(
help="One or more locations of JSON config files.",
)

parser.add_argument(
path_args_tracker.add_argument(
"--service",
"--services",
nargs="+",
Expand All @@ -219,17 +252,19 @@ def _parse_args(
),
)

parser.add_argument(
path_args_tracker.add_argument(
"--environment",
required=False,
help="Path to JSON file with the configuration of the benchmarking environment(s).",
)

parser.add_argument(
path_args_tracker.add_argument(
"--optimizer",
required=False,
help="Path to the optimizer configuration file. If omitted, run"
+ " a single trial with default (or specified in --tunable_values).",
help=(
"Path to the optimizer configuration file. If omitted, run "
"a single trial with default (or specified in --tunable_values)."
),
)

parser.add_argument(
Expand All @@ -243,18 +278,22 @@ def _parse_args(
),
)

parser.add_argument(
path_args_tracker.add_argument(
"--scheduler",
required=False,
help="Path to the scheduler configuration file. By default, use"
+ " a single worker synchronous scheduler.",
help=(
"Path to the scheduler configuration file. By default, use "
"a single worker synchronous scheduler."
),
)

parser.add_argument(
path_args_tracker.add_argument(
"--storage",
required=False,
help="Path to the storage configuration file."
+ " If omitted, use the ephemeral in-memory SQL storage.",
help=(
"Path to the storage configuration file. "
"If omitted, use the ephemeral in-memory SQL storage."
),
)

parser.add_argument(
Expand All @@ -275,24 +314,28 @@ def _parse_args(
help="Seed to use with --random_init",
)

parser.add_argument(
path_args_tracker.add_argument(
"--tunable_values",
"--tunable-values",
nargs="+",
action="extend",
required=False,
help="Path to one or more JSON files that contain values of the tunable"
+ " parameters. This can be used for a single trial (when no --optimizer"
+ " is specified) or as default values for the first run in optimization.",
help=(
"Path to one or more JSON files that contain values of the tunable "
"parameters. This can be used for a single trial (when no --optimizer "
"is specified) or as default values for the first run in optimization."
),
)

parser.add_argument(
path_args_tracker.add_argument(
"--globals",
nargs="+",
action="extend",
required=False,
help="Path to one or more JSON files that contain additional"
+ " [private] parameters of the benchmarking environment.",
help=(
"Path to one or more JSON files that contain additional "
"[private] parameters of the benchmarking environment."
),
)

parser.add_argument(
Expand Down Expand Up @@ -328,7 +371,7 @@ def _parse_args(
argv = sys.argv[1:].copy()
(args, args_rest) = parser.parse_known_args(argv)

return (args, args_rest)
return (args, path_args_tracker.path_args, args_rest)

@staticmethod
def _try_parse_extra_args(cmdline: Iterable[str]) -> Dict[str, TunableValue]:
Expand Down Expand Up @@ -361,6 +404,7 @@ def _try_parse_extra_args(cmdline: Iterable[str]) -> Dict[str, TunableValue]:

def _load_config(
self,
*,
args_globals: Iterable[str],
config_path: Iterable[str],
args_rest: Iterable[str],
Expand Down
11 changes: 7 additions & 4 deletions mlos_bench/mlos_bench/optimizers/base_optimizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -135,20 +135,23 @@ def __exit__(
@property
def current_iteration(self) -> int:
"""
The current number of iterations (trials) registered.
The current number of iterations (suggestions) registered.
Note: this may or may not be the same as the number of configurations.
See Also: Launcher.trial_config_repeat_count.
See Also: Scheduler.trial_config_repeat_count and Scheduler.max_trials.
"""
return self._iter

# TODO: finish renaming iterations to suggestions.
# See Also: https://github.com/microsoft/MLOS/pull/713

@property
def max_iterations(self) -> int:
"""
The maximum number of iterations (trials) to run.
The maximum number of iterations (suggestions) to run.
Note: this may or may not be the same as the number of configurations.
See Also: Launcher.trial_config_repeat_count.
See Also: Scheduler.trial_config_repeat_count and Scheduler.max_trials.
"""
return self._max_iter

Expand Down
32 changes: 32 additions & 0 deletions mlos_bench/mlos_bench/schedulers/base_scheduler.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
from pytz import UTC
from typing_extensions import Literal

from mlos_bench.config.schemas import ConfigSchema
from mlos_bench.environments.base_environment import Environment
from mlos_bench.optimizers.base_optimizer import Optimizer
from mlos_bench.storage.base_storage import Storage
Expand Down Expand Up @@ -64,6 +65,7 @@ def __init__( # pylint: disable=too-many-arguments
source=global_config,
required_keys=["experiment_id", "trial_id"],
)
self._validate_json_config(config)

self._experiment_id = config["experiment_id"].strip()
self._trial_id = int(config["trial_id"])
Expand All @@ -88,6 +90,36 @@ def __init__( # pylint: disable=too-many-arguments

_LOG.debug("Scheduler instantiated: %s :: %s", self, config)

def _validate_json_config(self, config: dict) -> None:
"""Reconstructs a basic json config that this class might have been instantiated
from in order to validate configs provided outside the file loading
mechanism.
"""
json_config: dict = {
"class": self.__class__.__module__ + "." + self.__class__.__name__,
}
if config:
json_config["config"] = config.copy()
# The json schema does not allow for -1 as a valid value for config_id.
# As it is just a default placeholder value, and not required, we can
# remove it from the config copy prior to validation safely.
config_id = json_config["config"].get("config_id")
if config_id is not None and isinstance(config_id, int) and config_id < 0:
json_config["config"].pop("config_id")
ConfigSchema.SCHEDULER.validate(json_config)

@property
def trial_config_repeat_count(self) -> int:
"""Gets the number of trials to run for a given config."""
return self._trial_config_repeat_count

@property
def max_trials(self) -> int:
"""Gets the maximum number of trials to run for a given experiment, or -1 for no
limit.
"""
return self._max_trials

def __repr__(self) -> str:
"""
Produce a human-readable version of the Scheduler (mostly for logging).
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
"services/remote/mock/mock_fileshare_service.jsonc"
],

"trial_config_repeat_count": 1,
"trial_config_repeat_count": 2,

"random_seed": 42,
"random_init": true
Expand Down
Loading

0 comments on commit 7dce3d1

Please sign in to comment.