Fixups and testing for cli config file parsing (#722)

Further fixups to #717 Some parameters were not being respected from `--config test-cli-config.jsonc` files. Split out from #720
microsoft · Jul 23, 2024 · 7dce3d1 · 7dce3d1
1 parent 6fe46ca
commit 7dce3d1
Show file tree

Hide file tree

Showing 6 changed files with 215 additions and 80 deletions.
diff --git a/mlos_bench/mlos_bench/config/schedulers/sync_scheduler.jsonc b/mlos_bench/mlos_bench/config/schedulers/sync_scheduler.jsonc
@@ -6,7 +6,7 @@
 
  "config": {
  "trial_config_repeat_count": 3,
- "max_trials": -1, // Limited only in hte Optimizer logic/config.
+ "max_trials": -1, // Limited only in the Optimizer logic/config.
  "teardown": false
  }
 }
diff --git a/mlos_bench/mlos_bench/launcher.py b/mlos_bench/mlos_bench/launcher.py
@@ -44,6 +44,7 @@ class Launcher:
 
  def __init__(self, description: str, long_text: str = "", argv: Optional[List[str]] = None):
  # pylint: disable=too-many-statements
+ # pylint: disable=too-many-locals
  _LOG.info("Launch: %s", description)
  epilog = """
  Additional --key=value pairs can be specified to augment or override
@@ -56,7 +57,7 @@ def __init__(self, description: str, long_text: str = "", argv: Optional[List[st
  <https://github.com/microsoft/MLOS/tree/main/mlos_bench/>
  """
  parser = argparse.ArgumentParser(description=f"{description} : {long_text}", epilog=epilog)
- (args, args_rest) = self._parse_args(parser, argv)
+ (args, path_args, args_rest) = self._parse_args(parser, argv)
 
  # Bootstrap config loader: command line takes priority.
  config_path = args.config_path or []
@@ -87,11 +88,25 @@ def __init__(self, description: str, long_text: str = "", argv: Optional[List[st
 
  self._parent_service: Service = LocalExecService(parent=self._config_loader)
 
+ # Prepare global_config from a combination of global config files, cli
+ # configs, and cli args.
+ args_dict = vars(args)
+ # teardown (bool) conflicts with Environment configs that use it for shell
+ # commands (list), so we exclude it from copying over
+ excluded_cli_args = path_args + ["teardown"]
+ # Include (almost) any item from the cli config file that either isn't in
+ # the cli args at all or whose cli arg is missing.
+ cli_config_args = {
+ key: val
+ for (key, val) in config.items()
+ if (args_dict.get(key) is None) and key not in excluded_cli_args
+ }
+
  self.global_config = self._load_config(
- config.get("globals", []) + (args.globals or []),
- (args.config_path or []) + config.get("config_path", []),
- args_rest,
- {key: val for (key, val) in config.items() if key not in vars(args)},
+ args_globals=config.get("globals", []) + (args.globals or []),
+ config_path=(args.config_path or []) + config.get("config_path", []),
+ args_rest=args_rest,
+ global_config=cli_config_args,
  )
  # experiment_id is generally taken from --globals files, but we also allow
  # overriding it on the CLI.
@@ -168,19 +183,35 @@ def service(self) -> Service:
  def _parse_args(
  parser: argparse.ArgumentParser,
  argv: Optional[List[str]],
- ) -> Tuple[argparse.Namespace, List[str]]:
+ ) -> Tuple[argparse.Namespace, List[str], List[str]]:
  """Parse the command line arguments."""
- parser.add_argument(
+
+ class PathArgsTracker:
+ """Simple class to help track which arguments are paths."""
+
+ def __init__(self, parser: argparse.ArgumentParser):
+ self._parser = parser
+ self.path_args: List[str] = []
+
+ def add_argument(self, *args: Any, **kwargs: Any) -> None:
+ """Add an argument to the parser and track its destination."""
+ self.path_args.append(self._parser.add_argument(*args, **kwargs).dest)
+
+ path_args_tracker = PathArgsTracker(parser)
+
+ path_args_tracker.add_argument(
  "--config",
  required=False,
- help="Main JSON5 configuration file. Its keys are the same as the"
- + " command line options and can be overridden by the latter.\n"
- + "\n"
- + " See the `mlos_bench/config/` tree at https://github.com/microsoft/MLOS/ "
- + " for additional config examples for this and other arguments.",
+ help=(
+ "Main JSON5 configuration file. Its keys are the same as the "
+ "command line options and can be overridden by the latter.\n"
+ "\n"
+ "See the `mlos_bench/config/` tree at https://github.com/microsoft/MLOS/ "
+ "for additional config examples for this and other arguments."
+ ),
  )
 
- parser.add_argument(
+ path_args_tracker.add_argument(
  "--log_file",
  "--log-file",
  required=False,
@@ -192,11 +223,13 @@ def _parse_args(
  "--log-level",
  required=False,
  type=str,
- help=f"Logging level. Default is {logging.getLevelName(_LOG_LEVEL)}."
- + " Set to DEBUG for debug, WARNING for warnings only.",
+ help=(
+ f"Logging level. Default is {logging.getLevelName(_LOG_LEVEL)}. "
+ "Set to DEBUG for debug, WARNING for warnings only."
+ ),
  )
 
- parser.add_argument(
+ path_args_tracker.add_argument(
  "--config_path",
  "--config-path",
  "--config-paths",
@@ -207,7 +240,7 @@ def _parse_args(
  help="One or more locations of JSON config files.",
  )
 
- parser.add_argument(
+ path_args_tracker.add_argument(
  "--service",
  "--services",
  nargs="+",
@@ -219,17 +252,19 @@ def _parse_args(
  ),
  )
 
- parser.add_argument(
+ path_args_tracker.add_argument(
  "--environment",
  required=False,
  help="Path to JSON file with the configuration of the benchmarking environment(s).",
  )
 
- parser.add_argument(
+ path_args_tracker.add_argument(
  "--optimizer",
  required=False,
- help="Path to the optimizer configuration file. If omitted, run"
- + " a single trial with default (or specified in --tunable_values).",
+ help=(
+ "Path to the optimizer configuration file. If omitted, run "
+ "a single trial with default (or specified in --tunable_values)."
+ ),
  )
 
  parser.add_argument(
@@ -243,18 +278,22 @@ def _parse_args(
  ),
  )
 
- parser.add_argument(
+ path_args_tracker.add_argument(
  "--scheduler",
  required=False,
- help="Path to the scheduler configuration file. By default, use"
- + " a single worker synchronous scheduler.",
+ help=(
+ "Path to the scheduler configuration file. By default, use "
+ "a single worker synchronous scheduler."
+ ),
  )
 
- parser.add_argument(
+ path_args_tracker.add_argument(
  "--storage",
  required=False,
- help="Path to the storage configuration file."
- + " If omitted, use the ephemeral in-memory SQL storage.",
+ help=(
+ "Path to the storage configuration file. "
+ "If omitted, use the ephemeral in-memory SQL storage."
+ ),
  )
 
  parser.add_argument(
@@ -275,24 +314,28 @@ def _parse_args(
  help="Seed to use with --random_init",
  )
 
- parser.add_argument(
+ path_args_tracker.add_argument(
  "--tunable_values",
  "--tunable-values",
  nargs="+",
  action="extend",
  required=False,
- help="Path to one or more JSON files that contain values of the tunable"
- + " parameters. This can be used for a single trial (when no --optimizer"
- + " is specified) or as default values for the first run in optimization.",
+ help=(
+ "Path to one or more JSON files that contain values of the tunable "
+ "parameters. This can be used for a single trial (when no --optimizer "
+ "is specified) or as default values for the first run in optimization."
+ ),
  )
 
- parser.add_argument(
+ path_args_tracker.add_argument(
  "--globals",
  nargs="+",
  action="extend",
  required=False,
- help="Path to one or more JSON files that contain additional"
- + " [private] parameters of the benchmarking environment.",
+ help=(
+ "Path to one or more JSON files that contain additional "
+ "[private] parameters of the benchmarking environment."
+ ),
  )
 
  parser.add_argument(
@@ -328,7 +371,7 @@ def _parse_args(
  argv = sys.argv[1:].copy()
  (args, args_rest) = parser.parse_known_args(argv)
 
- return (args, args_rest)
+ return (args, path_args_tracker.path_args, args_rest)
 
  @staticmethod
  def _try_parse_extra_args(cmdline: Iterable[str]) -> Dict[str, TunableValue]:
@@ -361,6 +404,7 @@ def _try_parse_extra_args(cmdline: Iterable[str]) -> Dict[str, TunableValue]:
 
  def _load_config(
  self,
+ *,
  args_globals: Iterable[str],
  config_path: Iterable[str],
  args_rest: Iterable[str],

diff --git a/mlos_bench/mlos_bench/optimizers/base_optimizer.py b/mlos_bench/mlos_bench/optimizers/base_optimizer.py
@@ -135,20 +135,23 @@ def __exit__(
  @property
  def current_iteration(self) -> int:
  """
- The current number of iterations (trials) registered.
+ The current number of iterations (suggestions) registered.
 
  Note: this may or may not be the same as the number of configurations.
- See Also: Launcher.trial_config_repeat_count.
+ See Also: Scheduler.trial_config_repeat_count and Scheduler.max_trials.
  """
  return self._iter
 
+ # TODO: finish renaming iterations to suggestions.
+ # See Also: https://github.com/microsoft/MLOS/pull/713
+
  @property
  def max_iterations(self) -> int:
  """
- The maximum number of iterations (trials) to run.
+ The maximum number of iterations (suggestions) to run.
 
  Note: this may or may not be the same as the number of configurations.
- See Also: Launcher.trial_config_repeat_count.
+ See Also: Scheduler.trial_config_repeat_count and Scheduler.max_trials.
  """
  return self._max_iter
 

diff --git a/mlos_bench/mlos_bench/schedulers/base_scheduler.py b/mlos_bench/mlos_bench/schedulers/base_scheduler.py
@@ -14,6 +14,7 @@
 from pytz import UTC
 from typing_extensions import Literal
 
+from mlos_bench.config.schemas import ConfigSchema
 from mlos_bench.environments.base_environment import Environment
 from mlos_bench.optimizers.base_optimizer import Optimizer
 from mlos_bench.storage.base_storage import Storage
@@ -64,6 +65,7 @@ def __init__( # pylint: disable=too-many-arguments
  source=global_config,
  required_keys=["experiment_id", "trial_id"],
  )
+ self._validate_json_config(config)
 
  self._experiment_id = config["experiment_id"].strip()
  self._trial_id = int(config["trial_id"])
@@ -88,6 +90,36 @@ def __init__( # pylint: disable=too-many-arguments
 
  _LOG.debug("Scheduler instantiated: %s :: %s", self, config)
 
+ def _validate_json_config(self, config: dict) -> None:
+ """Reconstructs a basic json config that this class might have been instantiated
+ from in order to validate configs provided outside the file loading
+ mechanism.
+ """
+ json_config: dict = {
+ "class": self.__class__.__module__ + "." + self.__class__.__name__,
+ }
+ if config:
+ json_config["config"] = config.copy()
+ # The json schema does not allow for -1 as a valid value for config_id.
+ # As it is just a default placeholder value, and not required, we can
+ # remove it from the config copy prior to validation safely.
+ config_id = json_config["config"].get("config_id")
+ if config_id is not None and isinstance(config_id, int) and config_id < 0:
+ json_config["config"].pop("config_id")
+ ConfigSchema.SCHEDULER.validate(json_config)
+
+ @property
+ def trial_config_repeat_count(self) -> int:
+ """Gets the number of trials to run for a given config."""
+ return self._trial_config_repeat_count
+
+ @property
+ def max_trials(self) -> int:
+ """Gets the maximum number of trials to run for a given experiment, or -1 for no
+ limit.
+ """
+ return self._max_trials
+
  def __repr__(self) -> str:
  """
  Produce a human-readable version of the Scheduler (mostly for logging).

diff --git a/mlos_bench/mlos_bench/tests/config/cli/test-cli-config.jsonc b/mlos_bench/mlos_bench/tests/config/cli/test-cli-config.jsonc
@@ -17,7 +17,7 @@
  "services/remote/mock/mock_fileshare_service.jsonc"
  ],
 
- "trial_config_repeat_count": 1,
+ "trial_config_repeat_count": 2,
 
  "random_seed": 42,
  "random_init": true