Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[RLlib] Remove default-metrics from Algorithm (tune does NOT error anymore if any stop-metric is missing). #46200

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,6 @@ bc-halfcheetah-v0:
evaluation_interval: 3
evaluation_config:
input: sampler
always_attach_evaluation_results: True
num_workers: 8
lr: 0.001
grad_clip: 40
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -53,4 +53,3 @@ cql-halfcheetah-v4:
evaluation_duration_unit: episodes
evaluation_config:
input: sampler
always_attach_evaluation_results: True
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,6 @@ marwil-halfcheetah-v4:
evaluation_config:
input: sampler
off_policy_estimation_methods: null
always_attach_evaluation_results: True
num_workers: 8
lr: 0.001
grad_clip: 40
Expand Down
40 changes: 1 addition & 39 deletions rllib/algorithms/algorithm.py
Original file line number Diff line number Diff line change
Expand Up @@ -528,19 +528,6 @@ def default_logger_creator(config):
self.evaluation_config: Optional[AlgorithmConfig] = None
# Evaluation EnvRunnerGroup and metrics last returned by `self.evaluate()`.
self.evaluation_workers: Optional[EnvRunnerGroup] = None
# Initialize common evaluation_metrics to nan, before they become
# available. We want to make sure the metrics are always present
# (although their values may be nan), so that Tune doesn't complain
# when we use these as stopping criteria.
self.evaluation_metrics = {
EVALUATION_RESULTS: {
ENV_RUNNER_RESULTS: {
EPISODE_RETURN_MAX: np.nan,
EPISODE_RETURN_MIN: np.nan,
EPISODE_RETURN_MEAN: np.nan,
},
},
}

super().__init__(
config=config,
Expand Down Expand Up @@ -836,8 +823,6 @@ def step(self) -> ResultDict:
self.config.evaluation_interval
and (self.iteration + 1) % self.config.evaluation_interval == 0
)
evaluate_in_general = bool(self.config.evaluation_interval)

# Results dict for training (and if appolicable: evaluation).
train_results: ResultDict = {}
eval_results: ResultDict = {}
Expand Down Expand Up @@ -873,19 +858,6 @@ def step(self) -> ResultDict:
if evaluate_this_iter and not self.config.evaluation_parallel_to_training:
eval_results = self._run_one_evaluation(parallel_train_future=None)

# Attach latest available evaluation results to train results, if necessary.
if (
evaluate_in_general
and not evaluate_this_iter
and self.config.always_attach_evaluation_results
):
if not isinstance(self.evaluation_metrics, dict):
raise ValueError(
"Algorithm.evaluate() needs to return a ResultDict, but returned "
f"{self.evaluation_metrics}!"
)
eval_results = self.evaluation_metrics

# Sync EnvRunner workers.
# TODO (sven): For the new API stack, the common execution pattern for any algo
# should be: [sample + get_metrics + get_state] -> send all these in one remote
Expand Down Expand Up @@ -3225,17 +3197,7 @@ def _run_one_evaluation(
"num_remote_worker_restarts"
] = self.evaluation_workers.num_remote_worker_restarts()

# Evaluation does not run for every step.
# Save evaluation metrics on Algorithm, so it can be attached to
# subsequent step results as latest evaluation result.
self.evaluation_metrics = {EVALUATION_RESULTS: eval_results}
# To make the old stack forward compatible with the new API stack metrics
# structure, we add everything under the new key (EVALUATION_RESULTS) as well as
# the old one ("evaluation").
if not self.config.enable_env_runner_and_connector_v2:
self.evaluation_metrics["evaluation"] = eval_results

return self.evaluation_metrics
return {EVALUATION_RESULTS: eval_results}

def _run_one_training_iteration_and_evaluation_in_parallel(
self,
Expand Down
20 changes: 7 additions & 13 deletions rllib/algorithms/algorithm_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -452,7 +452,6 @@ def __init__(self, algo_class: Optional[type] = None):
self.ope_split_batch_by_episode = True
self.evaluation_num_env_runners = 0
self.custom_evaluation_function = None
self.always_attach_evaluation_results = True
# TODO: Set this flag still in the config or - much better - in the
# RolloutWorker as a property.
self.in_evaluation = False
Expand Down Expand Up @@ -547,6 +546,7 @@ def __init__(self, algo_class: Optional[type] = None):
self._enable_rl_module_api = DEPRECATED_VALUE
self.auto_wrap_old_gym_envs = DEPRECATED_VALUE
self.disable_env_checking = DEPRECATED_VALUE
self.always_attach_evaluation_results = DEPRECATED_VALUE

# The following values have moved because of the new ReplayBuffer API
self.buffer_size = DEPRECATED_VALUE
Expand Down Expand Up @@ -2212,9 +2212,8 @@ def evaluation(
ope_split_batch_by_episode: Optional[bool] = NotProvided,
evaluation_num_env_runners: Optional[int] = NotProvided,
custom_evaluation_function: Optional[Callable] = NotProvided,
always_attach_evaluation_results: Optional[bool] = NotProvided,
# Deprecated args.
evaluation_num_episodes=DEPRECATED_VALUE,
always_attach_evaluation_results=DEPRECATED_VALUE,
evaluation_num_workers=DEPRECATED_VALUE,
) -> "AlgorithmConfig":
"""Sets the config's evaluation settings.
Expand Down Expand Up @@ -2303,19 +2302,16 @@ def evaluation(
iteration. See the Algorithm.evaluate() method to see the default
implementation. The Algorithm guarantees all eval workers have the
latest policy state before this function is called.
always_attach_evaluation_results: Make sure the latest available evaluation
results are always attached to a step result dict. This may be useful
if Tune or some other meta controller needs access to evaluation metrics
all the time.

Returns:
This updated AlgorithmConfig object.
"""
if evaluation_num_episodes != DEPRECATED_VALUE:
if always_attach_evaluation_results != DEPRECATED_VALUE:
deprecation_warning(
old="AlgorithmConfig.evaluation(evaluation_num_episodes=..)",
new="AlgorithmConfig.evaluation(evaluation_duration=.., "
"evaluation_duration_unit='episodes')",
old="AlgorithmConfig.evaluation(always_attach_evaluation_results=..)",
help="This setting is no longer needed, b/c Tune does not error "
"anymore (only warns) when a metrics key can't be found in the "
"results.",
error=True,
)
if evaluation_num_workers != DEPRECATED_VALUE:
Expand Down Expand Up @@ -2363,8 +2359,6 @@ def evaluation(
self.evaluation_num_env_runners = evaluation_num_env_runners
if custom_evaluation_function is not NotProvided:
self.custom_evaluation_function = custom_evaluation_function
if always_attach_evaluation_results is not NotProvided:
self.always_attach_evaluation_results = always_attach_evaluation_results
if ope_split_batch_by_episode is not NotProvided:
self.ope_split_batch_by_episode = ope_split_batch_by_episode

Expand Down
12 changes: 6 additions & 6 deletions rllib/algorithms/cql/tests/test_cql.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,6 @@ def test_cql_compilation(self):
bc_iters=2,
)
.evaluation(
always_attach_evaluation_results=True,
evaluation_interval=2,
evaluation_duration=10,
evaluation_config=cql.CQLConfig.overrides(input_="sampler"),
Expand All @@ -83,11 +82,12 @@ def test_cql_compilation(self):
results = algo.train()
check_train_results(results)
print(results)
eval_results = results[EVALUATION_RESULTS]
print(
f"iter={algo.iteration} "
f"R={eval_results[ENV_RUNNER_RESULTS][EPISODE_RETURN_MEAN]}"
)
eval_results = results.get(EVALUATION_RESULTS)
if eval_results:
print(
f"iter={algo.iteration} "
f"R={eval_results[ENV_RUNNER_RESULTS][EPISODE_RETURN_MEAN]}"
)
check_compute_single_action(algo)

# Get policy and model.
Expand Down
1 change: 0 additions & 1 deletion rllib/algorithms/marwil/tests/test_marwil.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,6 @@ def test_marwil_compilation_and_learning_from_offline_file(self):
evaluation_parallel_to_training=True,
evaluation_config=marwil.MARWILConfig.overrides(input_="sampler"),
off_policy_estimation_methods={},
always_attach_evaluation_results=True,
)
.offline_data(input_=[data_file])
)
Expand Down
8 changes: 3 additions & 5 deletions rllib/algorithms/tests/test_algorithm.py
Original file line number Diff line number Diff line change
Expand Up @@ -250,7 +250,6 @@ def test_evaluation_option(self):
evaluation_duration=2,
evaluation_duration_unit="episodes",
evaluation_config=dqn.DQNConfig.overrides(gamma=0.98),
always_attach_evaluation_results=False,
)
.callbacks(callbacks_class=AssertEvalCallback)
)
Expand Down Expand Up @@ -290,14 +289,13 @@ def test_evaluation_option_always_attach_eval_metrics(self):
evaluation_duration=2,
evaluation_duration_unit="episodes",
evaluation_config=dqn.DQNConfig.overrides(gamma=0.98),
always_attach_evaluation_results=True,
)
.reporting(min_sample_timesteps_per_iteration=100)
.callbacks(callbacks_class=AssertEvalCallback)
)
for _ in framework_iterator(config, frameworks=("torch", "tf")):
algo = config.build()
# Should always see latest available eval results.
# Should only see eval results, when eval actually ran.
r0 = algo.train()
r1 = algo.train()
r2 = algo.train()
Expand All @@ -307,9 +305,9 @@ def test_evaluation_option_always_attach_eval_metrics(self):
# Eval results are not available at step 0.
# But step 3 should still have it, even though no eval was
# run during that step.
self.assertTrue(EVALUATION_RESULTS in r0)
self.assertTrue(EVALUATION_RESULTS not in r0)
self.assertTrue(EVALUATION_RESULTS in r1)
self.assertTrue(EVALUATION_RESULTS in r2)
self.assertTrue(EVALUATION_RESULTS not in r2)
self.assertTrue(EVALUATION_RESULTS in r3)

def test_evaluation_wo_evaluation_env_runner_group(self):
Expand Down
6 changes: 2 additions & 4 deletions rllib/env/multi_agent_env_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -268,7 +268,7 @@ def _sample_timesteps(
# MARLModule forward pass: Explore or not.
if explore:
env_steps_lifetime = self.metrics.peek(
NUM_ENV_STEPS_SAMPLED_LIFETIME
NUM_ENV_STEPS_SAMPLED_LIFETIME, default=0
) + self.metrics.peek(NUM_ENV_STEPS_SAMPLED, default=0)
to_env = self.module.forward_exploration(
to_module, t=env_steps_lifetime
Expand Down Expand Up @@ -465,7 +465,7 @@ def _sample_episodes(
# MARLModule forward pass: Explore or not.
if explore:
env_steps_lifetime = self.metrics.peek(
NUM_ENV_STEPS_SAMPLED_LIFETIME
NUM_ENV_STEPS_SAMPLED_LIFETIME, default=0
) + self.metrics.peek(NUM_ENV_STEPS_SAMPLED, default=0)
to_env = self.module.forward_exploration(
to_module, t=env_steps_lifetime
Expand Down Expand Up @@ -828,8 +828,6 @@ def _make_module(self):

def _setup_metrics(self):
self.metrics = MetricsLogger()
# Initialize lifetime counts.
self.metrics.log_value(NUM_ENV_STEPS_SAMPLED_LIFETIME, 0, reduce="sum")

self._done_episodes_for_metrics: List[MultiAgentEpisode] = []
self._ongoing_episodes_for_metrics: DefaultDict[
Expand Down
2 changes: 0 additions & 2 deletions rllib/env/single_agent_env_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,8 +67,6 @@ def __init__(self, config: AlgorithmConfig, **kwargs):

# Create a MetricsLogger object for logging custom stats.
self.metrics = MetricsLogger()
# Initialize lifetime counts.
self.metrics.log_value(NUM_ENV_STEPS_SAMPLED_LIFETIME, 0, reduce="sum")
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Don't we need the lifetime counts in the EnvRunner anymore? Are they initialized elsewhere?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We now call self.metrics.peek((.. some key))with arg default=0, so no initialization is necessary anymore.


# Create our callbacks object.
self._callbacks: DefaultCallbacks = self.config.callbacks_class()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -164,11 +164,6 @@ def on_train_result(
"Number of run evaluation timesteps: "
f"{num_timesteps_reported} (ok)!"
)
# Expect at least evaluation/env_runners to be always available.
elif algorithm.config.always_attach_evaluation_results and (
not eval_env_runner_results
):
raise KeyError("`evaluation->env_runners` not found in result dict!")


if __name__ == "__main__":
Expand Down
Loading