From bde7f33c4743ec8bb6b01767cceab13dff98a958 Mon Sep 17 00:00:00 2001 From: sven1977 Date: Sat, 22 Jun 2024 09:20:30 +0200 Subject: [PATCH 1/3] wip Signed-off-by: sven1977 --- .../yaml_files/bc/bc-halfcheetah-v4.yaml | 1 - .../yaml_files/cql/cql-halfcheetah-v4.yaml | 1 - .../marwil/marwil-halfcheetah-v4.yaml | 1 - rllib/algorithms/algorithm.py | 40 +------------------ rllib/algorithms/algorithm_config.py | 23 ++++------- rllib/algorithms/cql/tests/test_cql.py | 12 +++--- rllib/algorithms/marwil/tests/test_marwil.py | 1 - rllib/algorithms/tests/test_algorithm.py | 8 ++-- .../evaluation_parallel_to_training.py | 5 --- 9 files changed, 18 insertions(+), 74 deletions(-) diff --git a/release/rllib_tests/learning_tests/yaml_files/bc/bc-halfcheetah-v4.yaml b/release/rllib_tests/learning_tests/yaml_files/bc/bc-halfcheetah-v4.yaml index 199022e32d99..df0ebbf6a3b8 100644 --- a/release/rllib_tests/learning_tests/yaml_files/bc/bc-halfcheetah-v4.yaml +++ b/release/rllib_tests/learning_tests/yaml_files/bc/bc-halfcheetah-v4.yaml @@ -25,7 +25,6 @@ bc-halfcheetah-v0: evaluation_interval: 3 evaluation_config: input: sampler - always_attach_evaluation_results: True num_workers: 8 lr: 0.001 grad_clip: 40 diff --git a/release/rllib_tests/learning_tests/yaml_files/cql/cql-halfcheetah-v4.yaml b/release/rllib_tests/learning_tests/yaml_files/cql/cql-halfcheetah-v4.yaml index 32b7299b9f7f..67295091800f 100644 --- a/release/rllib_tests/learning_tests/yaml_files/cql/cql-halfcheetah-v4.yaml +++ b/release/rllib_tests/learning_tests/yaml_files/cql/cql-halfcheetah-v4.yaml @@ -53,4 +53,3 @@ cql-halfcheetah-v4: evaluation_duration_unit: episodes evaluation_config: input: sampler - always_attach_evaluation_results: True diff --git a/release/rllib_tests/learning_tests/yaml_files/marwil/marwil-halfcheetah-v4.yaml b/release/rllib_tests/learning_tests/yaml_files/marwil/marwil-halfcheetah-v4.yaml index 59ff10051cfb..f62a95f5144d 100644 --- a/release/rllib_tests/learning_tests/yaml_files/marwil/marwil-halfcheetah-v4.yaml +++ b/release/rllib_tests/learning_tests/yaml_files/marwil/marwil-halfcheetah-v4.yaml @@ -29,7 +29,6 @@ marwil-halfcheetah-v4: evaluation_config: input: sampler off_policy_estimation_methods: null - always_attach_evaluation_results: True num_workers: 8 lr: 0.001 grad_clip: 40 diff --git a/rllib/algorithms/algorithm.py b/rllib/algorithms/algorithm.py index cd4a1f8c8a08..39824b8c56f4 100644 --- a/rllib/algorithms/algorithm.py +++ b/rllib/algorithms/algorithm.py @@ -541,19 +541,6 @@ def default_logger_creator(config): self.evaluation_config: Optional[AlgorithmConfig] = None # Evaluation EnvRunnerGroup and metrics last returned by `self.evaluate()`. self.evaluation_workers: Optional[EnvRunnerGroup] = None - # Initialize common evaluation_metrics to nan, before they become - # available. We want to make sure the metrics are always present - # (although their values may be nan), so that Tune doesn't complain - # when we use these as stopping criteria. - self.evaluation_metrics = { - EVALUATION_RESULTS: { - ENV_RUNNER_RESULTS: { - EPISODE_RETURN_MAX: np.nan, - EPISODE_RETURN_MIN: np.nan, - EPISODE_RETURN_MEAN: np.nan, - }, - }, - } super().__init__( config=config, @@ -849,8 +836,6 @@ def step(self) -> ResultDict: self.config.evaluation_interval and (self.iteration + 1) % self.config.evaluation_interval == 0 ) - evaluate_in_general = bool(self.config.evaluation_interval) - # Results dict for training (and if appolicable: evaluation). train_results: ResultDict = {} eval_results: ResultDict = {} @@ -886,19 +871,6 @@ def step(self) -> ResultDict: if evaluate_this_iter and not self.config.evaluation_parallel_to_training: eval_results = self._run_one_evaluation(parallel_train_future=None) - # Attach latest available evaluation results to train results, if necessary. - if ( - evaluate_in_general - and not evaluate_this_iter - and self.config.always_attach_evaluation_results - ): - if not isinstance(self.evaluation_metrics, dict): - raise ValueError( - "Algorithm.evaluate() needs to return a ResultDict, but returned " - f"{self.evaluation_metrics}!" - ) - eval_results = self.evaluation_metrics - # Sync EnvRunner workers. # TODO (sven): For the new API stack, the common execution pattern for any algo # should be: [sample + get_metrics + get_state] -> send all these in one remote @@ -3219,17 +3191,7 @@ def _run_one_evaluation( "num_remote_worker_restarts" ] = self.evaluation_workers.num_remote_worker_restarts() - # Evaluation does not run for every step. - # Save evaluation metrics on Algorithm, so it can be attached to - # subsequent step results as latest evaluation result. - self.evaluation_metrics = {EVALUATION_RESULTS: eval_results} - # To make the old stack forward compatible with the new API stack metrics - # structure, we add everything under the new key (EVALUATION_RESULTS) as well as - # the old one ("evaluation"). - if not self.config.enable_env_runner_and_connector_v2: - self.evaluation_metrics["evaluation"] = eval_results - - return self.evaluation_metrics + return {EVALUATION_RESULTS: eval_results} def _run_one_training_iteration_and_evaluation_in_parallel( self, diff --git a/rllib/algorithms/algorithm_config.py b/rllib/algorithms/algorithm_config.py index dccf062a22fb..4a2603378be1 100644 --- a/rllib/algorithms/algorithm_config.py +++ b/rllib/algorithms/algorithm_config.py @@ -452,7 +452,6 @@ def __init__(self, algo_class: Optional[type] = None): self.ope_split_batch_by_episode = True self.evaluation_num_env_runners = 0 self.custom_evaluation_function = None - self.always_attach_evaluation_results = True # TODO: Set this flag still in the config or - much better - in the # RolloutWorker as a property. self.in_evaluation = False @@ -547,6 +546,7 @@ def __init__(self, algo_class: Optional[type] = None): self._enable_rl_module_api = DEPRECATED_VALUE self.auto_wrap_old_gym_envs = DEPRECATED_VALUE self.disable_env_checking = DEPRECATED_VALUE + self.always_attach_evaluation_results = DEPRECATED_VALUE # The following values have moved because of the new ReplayBuffer API self.buffer_size = DEPRECATED_VALUE @@ -2212,9 +2212,8 @@ def evaluation( ope_split_batch_by_episode: Optional[bool] = NotProvided, evaluation_num_env_runners: Optional[int] = NotProvided, custom_evaluation_function: Optional[Callable] = NotProvided, - always_attach_evaluation_results: Optional[bool] = NotProvided, # Deprecated args. - evaluation_num_episodes=DEPRECATED_VALUE, + always_attach_evaluation_results=DEPRECATED_VALUE, evaluation_num_workers=DEPRECATED_VALUE, ) -> "AlgorithmConfig": """Sets the config's evaluation settings. @@ -2300,28 +2299,24 @@ def evaluation( metrics: dict. See the Algorithm.evaluate() method to see the default implementation. The Algorithm guarantees all eval workers have the latest policy state before this function is called. - always_attach_evaluation_results: Make sure the latest available evaluation - results are always attached to a step result dict. This may be useful - if Tune or some other meta controller needs access to evaluation metrics - all the time. Returns: This updated AlgorithmConfig object. """ - if evaluation_num_episodes != DEPRECATED_VALUE: + if always_attach_evaluation_results != DEPRECATED_VALUE: deprecation_warning( - old="AlgorithmConfig.evaluation(evaluation_num_episodes=..)", - new="AlgorithmConfig.evaluation(evaluation_duration=.., " - "evaluation_duration_unit='episodes')", + old="AlgorithmConfig.evaluation(always_attach_evaluation_results=..)", + help="This setting is no longer needed, b/c Tune does not error " + "anymore (only warns) when a metrics key can't be found in the " + "results.", error=True, ) if evaluation_num_workers != DEPRECATED_VALUE: deprecation_warning( old="AlgorithmConfig.evaluation(evaluation_num_workers=..)", new="AlgorithmConfig.evaluation(evaluation_num_env_runners=..)", - error=False, + error=True, ) - self.evaluation_num_env_runners = evaluation_num_workers if evaluation_interval is not NotProvided: self.evaluation_interval = evaluation_interval @@ -2360,8 +2355,6 @@ def evaluation( self.evaluation_num_env_runners = evaluation_num_env_runners if custom_evaluation_function is not NotProvided: self.custom_evaluation_function = custom_evaluation_function - if always_attach_evaluation_results is not NotProvided: - self.always_attach_evaluation_results = always_attach_evaluation_results if ope_split_batch_by_episode is not NotProvided: self.ope_split_batch_by_episode = ope_split_batch_by_episode diff --git a/rllib/algorithms/cql/tests/test_cql.py b/rllib/algorithms/cql/tests/test_cql.py index 063c42c04060..b658b0c323c7 100644 --- a/rllib/algorithms/cql/tests/test_cql.py +++ b/rllib/algorithms/cql/tests/test_cql.py @@ -64,7 +64,6 @@ def test_cql_compilation(self): bc_iters=2, ) .evaluation( - always_attach_evaluation_results=True, evaluation_interval=2, evaluation_duration=10, evaluation_config=cql.CQLConfig.overrides(input_="sampler"), @@ -83,11 +82,12 @@ def test_cql_compilation(self): results = algo.train() check_train_results(results) print(results) - eval_results = results[EVALUATION_RESULTS] - print( - f"iter={algo.iteration} " - f"R={eval_results[ENV_RUNNER_RESULTS][EPISODE_RETURN_MEAN]}" - ) + eval_results = results.get(EVALUATION_RESULTS) + if eval_results: + print( + f"iter={algo.iteration} " + f"R={eval_results[ENV_RUNNER_RESULTS][EPISODE_RETURN_MEAN]}" + ) check_compute_single_action(algo) # Get policy and model. diff --git a/rllib/algorithms/marwil/tests/test_marwil.py b/rllib/algorithms/marwil/tests/test_marwil.py index 3722554f7dec..4430f6df0856 100644 --- a/rllib/algorithms/marwil/tests/test_marwil.py +++ b/rllib/algorithms/marwil/tests/test_marwil.py @@ -60,7 +60,6 @@ def test_marwil_compilation_and_learning_from_offline_file(self): evaluation_parallel_to_training=True, evaluation_config=marwil.MARWILConfig.overrides(input_="sampler"), off_policy_estimation_methods={}, - always_attach_evaluation_results=True, ) .offline_data(input_=[data_file]) ) diff --git a/rllib/algorithms/tests/test_algorithm.py b/rllib/algorithms/tests/test_algorithm.py index af67840d3f33..9fedadaf48bb 100644 --- a/rllib/algorithms/tests/test_algorithm.py +++ b/rllib/algorithms/tests/test_algorithm.py @@ -250,7 +250,6 @@ def test_evaluation_option(self): evaluation_duration=2, evaluation_duration_unit="episodes", evaluation_config=dqn.DQNConfig.overrides(gamma=0.98), - always_attach_evaluation_results=False, ) .callbacks(callbacks_class=AssertEvalCallback) ) @@ -290,14 +289,13 @@ def test_evaluation_option_always_attach_eval_metrics(self): evaluation_duration=2, evaluation_duration_unit="episodes", evaluation_config=dqn.DQNConfig.overrides(gamma=0.98), - always_attach_evaluation_results=True, ) .reporting(min_sample_timesteps_per_iteration=100) .callbacks(callbacks_class=AssertEvalCallback) ) for _ in framework_iterator(config, frameworks=("torch", "tf")): algo = config.build() - # Should always see latest available eval results. + # Should only see eval results, when eval actually ran. r0 = algo.train() r1 = algo.train() r2 = algo.train() @@ -307,9 +305,9 @@ def test_evaluation_option_always_attach_eval_metrics(self): # Eval results are not available at step 0. # But step 3 should still have it, even though no eval was # run during that step. - self.assertTrue(EVALUATION_RESULTS in r0) + self.assertTrue(EVALUATION_RESULTS not in r0) self.assertTrue(EVALUATION_RESULTS in r1) - self.assertTrue(EVALUATION_RESULTS in r2) + self.assertTrue(EVALUATION_RESULTS not in r2) self.assertTrue(EVALUATION_RESULTS in r3) def test_evaluation_wo_evaluation_env_runner_group(self): diff --git a/rllib/examples/evaluation/evaluation_parallel_to_training.py b/rllib/examples/evaluation/evaluation_parallel_to_training.py index 382cec33c77e..4d5814ab2489 100644 --- a/rllib/examples/evaluation/evaluation_parallel_to_training.py +++ b/rllib/examples/evaluation/evaluation_parallel_to_training.py @@ -164,11 +164,6 @@ def on_train_result( "Number of run evaluation timesteps: " f"{num_timesteps_reported} (ok)!" ) - # Expect at least evaluation/env_runners to be always available. - elif algorithm.config.always_attach_evaluation_results and ( - not eval_env_runner_results - ): - raise KeyError("`evaluation->env_runners` not found in result dict!") if __name__ == "__main__": From 514ca69c94c1db08f1f2bbc65363b510157dacb8 Mon Sep 17 00:00:00 2001 From: sven1977 Date: Sat, 22 Jun 2024 09:44:25 +0200 Subject: [PATCH 2/3] wip Signed-off-by: sven1977 --- rllib/env/multi_agent_env_runner.py | 6 ++---- rllib/env/single_agent_env_runner.py | 2 -- 2 files changed, 2 insertions(+), 6 deletions(-) diff --git a/rllib/env/multi_agent_env_runner.py b/rllib/env/multi_agent_env_runner.py index 98d08bf6c603..6144b5411f60 100644 --- a/rllib/env/multi_agent_env_runner.py +++ b/rllib/env/multi_agent_env_runner.py @@ -268,7 +268,7 @@ def _sample_timesteps( # MARLModule forward pass: Explore or not. if explore: env_steps_lifetime = self.metrics.peek( - NUM_ENV_STEPS_SAMPLED_LIFETIME + NUM_ENV_STEPS_SAMPLED_LIFETIME, default=0 ) + self.metrics.peek(NUM_ENV_STEPS_SAMPLED, default=0) to_env = self.module.forward_exploration( to_module, t=env_steps_lifetime @@ -465,7 +465,7 @@ def _sample_episodes( # MARLModule forward pass: Explore or not. if explore: env_steps_lifetime = self.metrics.peek( - NUM_ENV_STEPS_SAMPLED_LIFETIME + NUM_ENV_STEPS_SAMPLED_LIFETIME, default=0 ) + self.metrics.peek(NUM_ENV_STEPS_SAMPLED, default=0) to_env = self.module.forward_exploration( to_module, t=env_steps_lifetime @@ -828,8 +828,6 @@ def _make_module(self): def _setup_metrics(self): self.metrics = MetricsLogger() - # Initialize lifetime counts. - self.metrics.log_value(NUM_ENV_STEPS_SAMPLED_LIFETIME, 0, reduce="sum") self._done_episodes_for_metrics: List[MultiAgentEpisode] = [] self._ongoing_episodes_for_metrics: DefaultDict[ diff --git a/rllib/env/single_agent_env_runner.py b/rllib/env/single_agent_env_runner.py index 0b6355f16a83..b1a10f89a912 100644 --- a/rllib/env/single_agent_env_runner.py +++ b/rllib/env/single_agent_env_runner.py @@ -67,8 +67,6 @@ def __init__(self, config: AlgorithmConfig, **kwargs): # Create a MetricsLogger object for logging custom stats. self.metrics = MetricsLogger() - # Initialize lifetime counts. - self.metrics.log_value(NUM_ENV_STEPS_SAMPLED_LIFETIME, 0, reduce="sum") # Create our callbacks object. self._callbacks: DefaultCallbacks = self.config.callbacks_class() From 9eb92a6b4ed989ed744037246f1de6e447b44814 Mon Sep 17 00:00:00 2001 From: sven1977 Date: Wed, 26 Jun 2024 09:07:29 +0200 Subject: [PATCH 3/3] wip Signed-off-by: sven1977 --- rllib/algorithms/algorithm_config.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/rllib/algorithms/algorithm_config.py b/rllib/algorithms/algorithm_config.py index 4a2603378be1..ec2d00638512 100644 --- a/rllib/algorithms/algorithm_config.py +++ b/rllib/algorithms/algorithm_config.py @@ -2315,8 +2315,9 @@ def evaluation( deprecation_warning( old="AlgorithmConfig.evaluation(evaluation_num_workers=..)", new="AlgorithmConfig.evaluation(evaluation_num_env_runners=..)", - error=True, + error=False, ) + self.evaluation_num_env_runners = evaluation_num_workers if evaluation_interval is not NotProvided: self.evaluation_interval = evaluation_interval