[RLlib] Remove default-metrics from Algorithm (tune does NOT error …

…anymore if any stop-metric is missing). (#46200)
ray-project · Jun 26, 2024 · 0395e04 · 0395e04
1 parent d090d86
commit 0395e04
Show file tree

Hide file tree

Showing 11 changed files with 19 additions and 78 deletions.
diff --git a/release/rllib_tests/learning_tests/yaml_files/bc/bc-halfcheetah-v4.yaml b/release/rllib_tests/learning_tests/yaml_files/bc/bc-halfcheetah-v4.yaml
@@ -25,7 +25,6 @@ bc-halfcheetah-v0:
         evaluation_interval: 3
         evaluation_config:
             input: sampler
-        always_attach_evaluation_results: True
         num_workers: 8
         lr: 0.001
         grad_clip: 40

diff --git a/release/rllib_tests/learning_tests/yaml_files/cql/cql-halfcheetah-v4.yaml b/release/rllib_tests/learning_tests/yaml_files/cql/cql-halfcheetah-v4.yaml
@@ -53,4 +53,3 @@ cql-halfcheetah-v4:
         evaluation_duration_unit: episodes
         evaluation_config:
             input: sampler
-        always_attach_evaluation_results: True
diff --git a/release/rllib_tests/learning_tests/yaml_files/marwil/marwil-halfcheetah-v4.yaml b/release/rllib_tests/learning_tests/yaml_files/marwil/marwil-halfcheetah-v4.yaml
@@ -29,7 +29,6 @@ marwil-halfcheetah-v4:
         evaluation_config:
             input: sampler
             off_policy_estimation_methods: null
-        always_attach_evaluation_results: True
         num_workers: 8
         lr: 0.001
         grad_clip: 40

diff --git a/rllib/algorithms/algorithm.py b/rllib/algorithms/algorithm.py
@@ -528,19 +528,6 @@ def default_logger_creator(config):
         self.evaluation_config: Optional[AlgorithmConfig] = None
         # Evaluation EnvRunnerGroup and metrics last returned by `self.evaluate()`.
         self.evaluation_workers: Optional[EnvRunnerGroup] = None
-        # Initialize common evaluation_metrics to nan, before they become
-        # available. We want to make sure the metrics are always present
-        # (although their values may be nan), so that Tune doesn't complain
-        # when we use these as stopping criteria.
-        self.evaluation_metrics = {
-            EVALUATION_RESULTS: {
-                ENV_RUNNER_RESULTS: {
-                    EPISODE_RETURN_MAX: np.nan,
-                    EPISODE_RETURN_MIN: np.nan,
-                    EPISODE_RETURN_MEAN: np.nan,
-                },
-            },
-        }
 
         super().__init__(
             config=config,
@@ -836,8 +823,6 @@ def step(self) -> ResultDict:
             self.config.evaluation_interval
             and (self.iteration + 1) % self.config.evaluation_interval == 0
         )
-        evaluate_in_general = bool(self.config.evaluation_interval)
-
         # Results dict for training (and if appolicable: evaluation).
         train_results: ResultDict = {}
         eval_results: ResultDict = {}
@@ -873,19 +858,6 @@ def step(self) -> ResultDict:
         if evaluate_this_iter and not self.config.evaluation_parallel_to_training:
             eval_results = self._run_one_evaluation(parallel_train_future=None)
 
-        # Attach latest available evaluation results to train results, if necessary.
-        if (
-            evaluate_in_general
-            and not evaluate_this_iter
-            and self.config.always_attach_evaluation_results
-        ):
-            if not isinstance(self.evaluation_metrics, dict):
-                raise ValueError(
-                    "Algorithm.evaluate() needs to return a ResultDict, but returned "
-                    f"{self.evaluation_metrics}!"
-                )
-            eval_results = self.evaluation_metrics
-
         # Sync EnvRunner workers.
         # TODO (sven): For the new API stack, the common execution pattern for any algo
         #  should be: [sample + get_metrics + get_state] -> send all these in one remote
@@ -3225,17 +3197,7 @@ def _run_one_evaluation(
                 "num_remote_worker_restarts"
             ] = self.evaluation_workers.num_remote_worker_restarts()
 
-        # Evaluation does not run for every step.
-        # Save evaluation metrics on Algorithm, so it can be attached to
-        # subsequent step results as latest evaluation result.
-        self.evaluation_metrics = {EVALUATION_RESULTS: eval_results}
-        # To make the old stack forward compatible with the new API stack metrics
-        # structure, we add everything under the new key (EVALUATION_RESULTS) as well as
-        # the old one ("evaluation").
-        if not self.config.enable_env_runner_and_connector_v2:
-            self.evaluation_metrics["evaluation"] = eval_results
-
-        return self.evaluation_metrics
+        return {EVALUATION_RESULTS: eval_results}
 
     def _run_one_training_iteration_and_evaluation_in_parallel(
         self,

diff --git a/rllib/algorithms/algorithm_config.py b/rllib/algorithms/algorithm_config.py
@@ -452,7 +452,6 @@ def __init__(self, algo_class: Optional[type] = None):
         self.ope_split_batch_by_episode = True
         self.evaluation_num_env_runners = 0
         self.custom_evaluation_function = None
-        self.always_attach_evaluation_results = True
         # TODO: Set this flag still in the config or - much better - in the
         #  RolloutWorker as a property.
         self.in_evaluation = False
@@ -547,6 +546,7 @@ def __init__(self, algo_class: Optional[type] = None):
         self._enable_rl_module_api = DEPRECATED_VALUE
         self.auto_wrap_old_gym_envs = DEPRECATED_VALUE
         self.disable_env_checking = DEPRECATED_VALUE
+        self.always_attach_evaluation_results = DEPRECATED_VALUE
 
         # The following values have moved because of the new ReplayBuffer API
         self.buffer_size = DEPRECATED_VALUE
@@ -2212,9 +2212,8 @@ def evaluation(
         ope_split_batch_by_episode: Optional[bool] = NotProvided,
         evaluation_num_env_runners: Optional[int] = NotProvided,
         custom_evaluation_function: Optional[Callable] = NotProvided,
-        always_attach_evaluation_results: Optional[bool] = NotProvided,
         # Deprecated args.
-        evaluation_num_episodes=DEPRECATED_VALUE,
+        always_attach_evaluation_results=DEPRECATED_VALUE,
         evaluation_num_workers=DEPRECATED_VALUE,
     ) -> "AlgorithmConfig":
         """Sets the config's evaluation settings.
@@ -2303,19 +2302,16 @@ def evaluation(
                 iteration. See the Algorithm.evaluate() method to see the default
                 implementation. The Algorithm guarantees all eval workers have the
                 latest policy state before this function is called.
-            always_attach_evaluation_results: Make sure the latest available evaluation
-                results are always attached to a step result dict. This may be useful
-                if Tune or some other meta controller needs access to evaluation metrics
-                all the time.
 
         Returns:
             This updated AlgorithmConfig object.
         """
-        if evaluation_num_episodes != DEPRECATED_VALUE:
+        if always_attach_evaluation_results != DEPRECATED_VALUE:
             deprecation_warning(
-                old="AlgorithmConfig.evaluation(evaluation_num_episodes=..)",
-                new="AlgorithmConfig.evaluation(evaluation_duration=.., "
-                "evaluation_duration_unit='episodes')",
+                old="AlgorithmConfig.evaluation(always_attach_evaluation_results=..)",
+                help="This setting is no longer needed, b/c Tune does not error "
+                "anymore (only warns) when a metrics key can't be found in the "
+                "results.",
                 error=True,
             )
         if evaluation_num_workers != DEPRECATED_VALUE:
@@ -2363,8 +2359,6 @@ def evaluation(
             self.evaluation_num_env_runners = evaluation_num_env_runners
         if custom_evaluation_function is not NotProvided:
             self.custom_evaluation_function = custom_evaluation_function
-        if always_attach_evaluation_results is not NotProvided:
-            self.always_attach_evaluation_results = always_attach_evaluation_results
         if ope_split_batch_by_episode is not NotProvided:
             self.ope_split_batch_by_episode = ope_split_batch_by_episode
 

diff --git a/rllib/algorithms/cql/tests/test_cql.py b/rllib/algorithms/cql/tests/test_cql.py
@@ -64,7 +64,6 @@ def test_cql_compilation(self):
                 bc_iters=2,
             )
             .evaluation(
-                always_attach_evaluation_results=True,
                 evaluation_interval=2,
                 evaluation_duration=10,
                 evaluation_config=cql.CQLConfig.overrides(input_="sampler"),
@@ -83,11 +82,12 @@ def test_cql_compilation(self):
                 results = algo.train()
                 check_train_results(results)
                 print(results)
-                eval_results = results[EVALUATION_RESULTS]
-                print(
-                    f"iter={algo.iteration} "
-                    f"R={eval_results[ENV_RUNNER_RESULTS][EPISODE_RETURN_MEAN]}"
-                )
+                eval_results = results.get(EVALUATION_RESULTS)
+                if eval_results:
+                    print(
+                        f"iter={algo.iteration} "
+                        f"R={eval_results[ENV_RUNNER_RESULTS][EPISODE_RETURN_MEAN]}"
+                    )
             check_compute_single_action(algo)
 
             # Get policy and model.

diff --git a/rllib/algorithms/marwil/tests/test_marwil.py b/rllib/algorithms/marwil/tests/test_marwil.py
@@ -60,7 +60,6 @@ def test_marwil_compilation_and_learning_from_offline_file(self):
                 evaluation_parallel_to_training=True,
                 evaluation_config=marwil.MARWILConfig.overrides(input_="sampler"),
                 off_policy_estimation_methods={},
-                always_attach_evaluation_results=True,
             )
             .offline_data(input_=[data_file])
         )

diff --git a/rllib/algorithms/tests/test_algorithm.py b/rllib/algorithms/tests/test_algorithm.py
@@ -250,7 +250,6 @@ def test_evaluation_option(self):
                 evaluation_duration=2,
                 evaluation_duration_unit="episodes",
                 evaluation_config=dqn.DQNConfig.overrides(gamma=0.98),
-                always_attach_evaluation_results=False,
             )
             .callbacks(callbacks_class=AssertEvalCallback)
         )
@@ -290,14 +289,13 @@ def test_evaluation_option_always_attach_eval_metrics(self):
                 evaluation_duration=2,
                 evaluation_duration_unit="episodes",
                 evaluation_config=dqn.DQNConfig.overrides(gamma=0.98),
-                always_attach_evaluation_results=True,
             )
             .reporting(min_sample_timesteps_per_iteration=100)
             .callbacks(callbacks_class=AssertEvalCallback)
         )
         for _ in framework_iterator(config, frameworks=("torch", "tf")):
             algo = config.build()
-            # Should always see latest available eval results.
+            # Should only see eval results, when eval actually ran.
             r0 = algo.train()
             r1 = algo.train()
             r2 = algo.train()
@@ -307,9 +305,9 @@ def test_evaluation_option_always_attach_eval_metrics(self):
             # Eval results are not available at step 0.
             # But step 3 should still have it, even though no eval was
             # run during that step.
-            self.assertTrue(EVALUATION_RESULTS in r0)
+            self.assertTrue(EVALUATION_RESULTS not in r0)
             self.assertTrue(EVALUATION_RESULTS in r1)
-            self.assertTrue(EVALUATION_RESULTS in r2)
+            self.assertTrue(EVALUATION_RESULTS not in r2)
             self.assertTrue(EVALUATION_RESULTS in r3)
 
     def test_evaluation_wo_evaluation_env_runner_group(self):

diff --git a/rllib/env/multi_agent_env_runner.py b/rllib/env/multi_agent_env_runner.py
@@ -268,7 +268,7 @@ def _sample_timesteps(
                 # MARLModule forward pass: Explore or not.
                 if explore:
                     env_steps_lifetime = self.metrics.peek(
-                        NUM_ENV_STEPS_SAMPLED_LIFETIME
+                        NUM_ENV_STEPS_SAMPLED_LIFETIME, default=0
                     ) + self.metrics.peek(NUM_ENV_STEPS_SAMPLED, default=0)
                     to_env = self.module.forward_exploration(
                         to_module, t=env_steps_lifetime
@@ -465,7 +465,7 @@ def _sample_episodes(
                 # MARLModule forward pass: Explore or not.
                 if explore:
                     env_steps_lifetime = self.metrics.peek(
-                        NUM_ENV_STEPS_SAMPLED_LIFETIME
+                        NUM_ENV_STEPS_SAMPLED_LIFETIME, default=0
                     ) + self.metrics.peek(NUM_ENV_STEPS_SAMPLED, default=0)
                     to_env = self.module.forward_exploration(
                         to_module, t=env_steps_lifetime
@@ -828,8 +828,6 @@ def _make_module(self):
 
     def _setup_metrics(self):
         self.metrics = MetricsLogger()
-        # Initialize lifetime counts.
-        self.metrics.log_value(NUM_ENV_STEPS_SAMPLED_LIFETIME, 0, reduce="sum")
 
         self._done_episodes_for_metrics: List[MultiAgentEpisode] = []
         self._ongoing_episodes_for_metrics: DefaultDict[

diff --git a/rllib/env/single_agent_env_runner.py b/rllib/env/single_agent_env_runner.py
@@ -67,8 +67,6 @@ def __init__(self, config: AlgorithmConfig, **kwargs):
 
         # Create a MetricsLogger object for logging custom stats.
         self.metrics = MetricsLogger()
-        # Initialize lifetime counts.
-        self.metrics.log_value(NUM_ENV_STEPS_SAMPLED_LIFETIME, 0, reduce="sum")
 
         # Create our callbacks object.
         self._callbacks: DefaultCallbacks = self.config.callbacks_class()

diff --git a/rllib/examples/evaluation/evaluation_parallel_to_training.py b/rllib/examples/evaluation/evaluation_parallel_to_training.py
@@ -164,11 +164,6 @@ def on_train_result(
                     "Number of run evaluation timesteps: "
                     f"{num_timesteps_reported} (ok)!"
                 )
-        # Expect at least evaluation/env_runners to be always available.
-        elif algorithm.config.always_attach_evaluation_results and (
-            not eval_env_runner_results
-        ):
-            raise KeyError("`evaluation->env_runners` not found in result dict!")
 
 
 if __name__ == "__main__":