Basic error checks and exit zero on success (#850)

The `mlos_bench` CLI wrapper exits non-zero currently even on success. This PR adds some basic sanity checks and makes sure we exit 0 when the process looks roughly OK. Further work to be expanded in #523. --------- Co-authored-by: Sergiy Matusevych <sergiym@microsoft.com> Co-authored-by: Sergiy Matusevych <sergiy.matusevych@gmail.com>
microsoft · Aug 20, 2024 · e514908 · e514908
1 parent 85fdec3
commit e514908
Show file tree

Hide file tree

Showing 4 changed files with 74 additions and 4 deletions.
diff --git a/mlos_bench/mlos_bench/run.py b/mlos_bench/mlos_bench/run.py
@@ -12,29 +12,80 @@
 """
 
 import logging
+import sys
 from typing import Dict, List, Optional, Tuple
 
+import numpy as np
+
 from mlos_bench.launcher import Launcher
 from mlos_bench.tunables.tunable_groups import TunableGroups
 
 _LOG = logging.getLogger(__name__)
 
 
+def _sanity_check_results(launcher: Launcher) -> None:
+    """Do some sanity checking on the results and throw an exception if it looks like
+    something went wrong.
+    """
+    basic_err_msg = "Check configuration, scripts, and logs for details."
+
+    # Check if the scheduler has any trials.
+    if not launcher.scheduler.trial_count:
+        raise RuntimeError(f"No trials were run. {basic_err_msg}")
+
+    # Check if the scheduler ran the expected number of trials.
+    expected_trial_count = min(
+        launcher.scheduler.max_trials if launcher.scheduler.max_trials > 0 else np.inf,
+        launcher.scheduler.trial_config_repeat_count * launcher.optimizer.max_suggestions,
+    )
+    if launcher.scheduler.trial_count < expected_trial_count:
+        raise RuntimeError(
+            f"Expected {expected_trial_count} trials, "
+            f"but only {launcher.scheduler.trial_count} were run. {basic_err_msg}"
+        )
+
+    # Check to see if "too many" trials seem to have failed (#523).
+    unsuccessful_trials = [t for t in launcher.scheduler.ran_trials if not t.status.is_succeeded()]
+    if len(unsuccessful_trials) > 0.2 * launcher.scheduler.trial_count:
+        raise RuntimeWarning(
+            "Too many trials failed: "
+            f"{len(unsuccessful_trials)} out of {launcher.scheduler.trial_count}. "
+            f"{basic_err_msg}"
+        )
+
+
 def _main(
     argv: Optional[List[str]] = None,
 ) -> Tuple[Optional[Dict[str, float]], Optional[TunableGroups]]:
-
     launcher = Launcher("mlos_bench", "Systems autotuning and benchmarking tool", argv=argv)
 
     with launcher.scheduler as scheduler_context:
         scheduler_context.start()
         scheduler_context.teardown()
 
+    _sanity_check_results(launcher)
+
     (score, _config) = result = launcher.scheduler.get_best_observation()
     # NOTE: This log line is used in test_launch_main_app_* unit tests:
     _LOG.info("Final score: %s", score)
     return result
 
 
+def _shell_main(
+    argv: Optional[List[str]] = None,
+) -> int:
+    (best_score, best_config) = _main(argv)
+    # Exit zero if it looks like the overall operation was successful.
+    # TODO: Improve this sanity check to be more robust.
+    if (
+        best_score
+        and best_config
+        and all(isinstance(score_value, float) for score_value in best_score.values())
+    ):
+        return 0
+    else:
+        raise ValueError(f"Unexpected result: {best_score=}, {best_config=}")
+
+
 if __name__ == "__main__":
-    _main()
+    sys.exit(_shell_main())
diff --git a/mlos_bench/mlos_bench/schedulers/base_scheduler.py b/mlos_bench/mlos_bench/schedulers/base_scheduler.py
@@ -9,7 +9,7 @@
 from abc import ABCMeta, abstractmethod
 from datetime import datetime
 from types import TracebackType
-from typing import Any, Dict, Optional, Tuple, Type
+from typing import Any, Dict, List, Optional, Tuple, Type
 
 from pytz import UTC
 from typing_extensions import Literal
@@ -87,6 +87,7 @@ def __init__(  # pylint: disable=too-many-arguments
         self.storage = storage
         self._root_env_config = root_env_config
         self._last_trial_id = -1
+        self._ran_trials: List[Storage.Trial] = []
 
         _LOG.debug("Scheduler instantiated: %s :: %s", self, config)
 
@@ -113,6 +114,11 @@ def trial_config_repeat_count(self) -> int:
         """Gets the number of trials to run for a given config."""
         return self._trial_config_repeat_count
 
+    @property
+    def trial_count(self) -> int:
+        """Gets the current number of trials run for the experiment."""
+        return self._trial_count
+
     @property
     def max_trials(self) -> int:
         """Gets the maximum number of trials to run for a given experiment, or -1 for no
@@ -302,4 +308,10 @@ def run_trial(self, trial: Storage.Trial) -> None:
         """
         assert self.experiment is not None
         self._trial_count += 1
+        self._ran_trials.append(trial)
         _LOG.info("QUEUE: Execute trial # %d/%d :: %s", self._trial_count, self._max_trials, trial)
+
+    @property
+    def ran_trials(self) -> List[Storage.Trial]:
+        """Get the list of trials that were run."""
+        return self._ran_trials
diff --git a/mlos_bench/mlos_bench/storage/base_storage.py b/mlos_bench/mlos_bench/storage/base_storage.py
@@ -391,6 +391,7 @@ def __init__(  # pylint: disable=too-many-arguments
             self._tunable_config_id = tunable_config_id
             self._opt_targets = opt_targets
             self._config = config or {}
+            self._status = Status.UNKNOWN
 
         def __repr__(self) -> str:
             return f"{self._experiment_id}:{self._trial_id}:{self._tunable_config_id}"
@@ -435,6 +436,11 @@ def config(self, global_config: Optional[Dict[str, Any]] = None) -> Dict[str, An
             config["trial_id"] = self._trial_id
             return config
 
+        @property
+        def status(self) -> Status:
+            """Get the status of the current trial."""
+            return self._status
+
         @abstractmethod
         def update(
             self,
@@ -471,6 +477,7 @@ def update(
                         opt_targets.difference(metrics.keys()),
                     )
                     # raise ValueError()
+            self._status = status
             return metrics
 
         @abstractmethod

diff --git a/mlos_bench/pyproject.toml b/mlos_bench/pyproject.toml
@@ -41,7 +41,7 @@ dynamic = [
 ]
 
 [project.scripts]
-mlos_bench = "mlos_bench.run:_main"
+mlos_bench = "mlos_bench.run:_shell_main"
 
 [project.urls]
 Documentation = "https://microsoft.github.io/MLOS/source_tree_docs/mlos_bench/"