Skip to content

Commit

Permalink
[RLlib] Add APPO/IMPALA multi-agent StatelessCartPole learning tests …
Browse files Browse the repository at this point in the history
…to CI (+ fix some bugs related to this). (#47245)
  • Loading branch information
sven1977 authored Aug 22, 2024
1 parent 4e226be commit 746f6b6
Show file tree
Hide file tree
Showing 18 changed files with 413 additions and 222 deletions.
4 changes: 2 additions & 2 deletions .buildkite/rllib.rayci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ steps:
tags:
- rllib_gpu
- gpu
parallelism: 4
parallelism: 5
instance_type: gpu
commands:
- bazel run //ci/ray_ci:test_in_docker -- //rllib/... rllib
Expand Down Expand Up @@ -165,7 +165,7 @@ steps:
tags:
- rllib_gpu
- gpu
parallelism: 4
parallelism: 5
instance_type: gpu-large
commands:
- bazel run //ci/ray_ci:test_in_docker -- //rllib/... rllib
Expand Down
134 changes: 100 additions & 34 deletions rllib/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -184,23 +184,6 @@ py_test(
srcs = ["tuned_examples/appo/cartpole_appo.py"],
args = ["--as-test", "--enable-new-api-stack", "--num-gpus=2"]
)
# StatelessCartPole
py_test(
name = "learning_tests_stateless_cartpole_appo",
main = "tuned_examples/appo/stateless_cartpole_appo.py",
tags = ["team:rllib", "exclusive", "learning_tests", "torch_only", "learning_tests_discrete", "learning_tests_pytorch_use_all_core"],
size = "large",
srcs = ["tuned_examples/appo/stateless_cartpole_appo.py"],
args = ["--as-test", "--enable-new-api-stack", "--num-gpus=1"]
)
py_test(
name = "learning_tests_stateless_cartpole_appo_multi_gpu",
main = "tuned_examples/appo/stateless_cartpole_appo.py",
tags = ["team:rllib", "exclusive", "learning_tests", "torch_only", "learning_tests_discrete", "learning_tests_pytorch_use_all_core", "multi_gpu"],
size = "large",
srcs = ["tuned_examples/appo/stateless_cartpole_appo.py"],
args = ["--as-test", "--enable-new-api-stack", "--num-gpus=2"]
)
# MultiAgentCartPole
py_test(
name = "learning_tests_multi_agent_cartpole_appo",
Expand Down Expand Up @@ -234,6 +217,72 @@ py_test(
srcs = ["tuned_examples/appo/multi_agent_cartpole_appo.py"],
args = ["--as-test", "--enable-new-api-stack", "--num-agents=2", "--num-gpus=2", "--num-cpus=7"]
)
# StatelessCartPole
py_test(
name = "learning_tests_stateless_cartpole_appo",
main = "tuned_examples/appo/stateless_cartpole_appo.py",
tags = ["team:rllib", "exclusive", "learning_tests", "torch_only", "learning_tests_discrete", "learning_tests_pytorch_use_all_core"],
size = "large",
srcs = ["tuned_examples/appo/stateless_cartpole_appo.py"],
args = ["--as-test", "--enable-new-api-stack", "--num-gpus=1"]
)
py_test(
name = "learning_tests_stateless_cartpole_appo_gpu",
main = "tuned_examples/appo/stateless_cartpole_appo.py",
tags = ["team:rllib", "exclusive", "learning_tests", "torch_only", "learning_tests_discrete", "learning_tests_pytorch_use_all_core", "gpu"],
size = "large",
srcs = ["tuned_examples/appo/stateless_cartpole_appo.py"],
args = ["--as-test", "--enable-new-api-stack", "--num-agents=2", "--num-gpus=1"]
)
py_test(
name = "learning_tests_stateless_cartpole_appo_multi_cpu",
main = "tuned_examples/appo/stateless_cartpole_appo.py",
tags = ["team:rllib", "exclusive", "learning_tests", "torch_only", "learning_tests_discrete", "learning_tests_pytorch_use_all_core"],
size = "large",
srcs = ["tuned_examples/appo/stateless_cartpole_appo.py"],
args = ["--as-test", "--enable-new-api-stack", "--num-gpus=2"]
)
py_test(
name = "learning_tests_stateless_cartpole_appo_multi_gpu",
main = "tuned_examples/appo/stateless_cartpole_appo.py",
tags = ["team:rllib", "exclusive", "learning_tests", "torch_only", "learning_tests_discrete", "learning_tests_pytorch_use_all_core", "multi_gpu"],
size = "large",
srcs = ["tuned_examples/appo/stateless_cartpole_appo.py"],
args = ["--as-test", "--enable-new-api-stack", "--num-gpus=2"]
)
# MultiAgentStatelessCartPole
py_test(
name = "learning_tests_multi_agent_stateless_cartpole_appo",
main = "tuned_examples/appo/multi_agent_stateless_cartpole_appo.py",
tags = ["team:rllib", "exclusive", "learning_tests", "torch_only", "learning_tests_discrete", "learning_tests_pytorch_use_all_core"],
size = "large",
srcs = ["tuned_examples/appo/multi_agent_stateless_cartpole_appo.py"],
args = ["--as-test", "--enable-new-api-stack", "--num-gpus=1"]
)
py_test(
name = "learning_tests_multi_agent_stateless_cartpole_appo_gpu",
main = "tuned_examples/appo/multi_agent_stateless_cartpole_appo.py",
tags = ["team:rllib", "exclusive", "learning_tests", "torch_only", "learning_tests_discrete", "learning_tests_pytorch_use_all_core", "gpu"],
size = "large",
srcs = ["tuned_examples/appo/multi_agent_stateless_cartpole_appo.py"],
args = ["--as-test", "--enable-new-api-stack", "--num-agents=2", "--num-gpus=1"]
)
py_test(
name = "learning_tests_multi_agent_stateless_cartpole_appo_multi_cpu",
main = "tuned_examples/appo/multi_agent_stateless_cartpole_appo.py",
tags = ["team:rllib", "exclusive", "learning_tests", "torch_only", "learning_tests_discrete", "learning_tests_pytorch_use_all_core"],
size = "large",
srcs = ["tuned_examples/appo/multi_agent_stateless_cartpole_appo.py"],
args = ["--as-test", "--enable-new-api-stack", "--num-gpus=2"]
)
py_test(
name = "learning_tests_multi_agent_stateless_cartpole_appo_multi_gpu",
main = "tuned_examples/appo/multi_agent_stateless_cartpole_appo.py",
tags = ["team:rllib", "exclusive", "learning_tests", "torch_only", "learning_tests_discrete", "learning_tests_pytorch_use_all_core", "multi_gpu"],
size = "large",
srcs = ["tuned_examples/appo/multi_agent_stateless_cartpole_appo.py"],
args = ["--as-test", "--enable-new-api-stack", "--num-gpus=2"]
)

#@OldAPIStack
py_test(
Expand Down Expand Up @@ -462,23 +511,6 @@ py_test(
srcs = ["tuned_examples/impala/cartpole_impala.py"],
args = ["--as-test", "--enable-new-api-stack", "--num-gpus=2"]
)
# StatelessCartPole
py_test(
name = "learning_tests_stateless_cartpole_impala",
main = "tuned_examples/impala/stateless_cartpole_impala.py",
tags = ["team:rllib", "exclusive", "learning_tests", "torch_only", "learning_tests_discrete", "learning_tests_pytorch_use_all_core"],
size = "large",
srcs = ["tuned_examples/impala/stateless_cartpole_impala.py"],
args = ["--as-test", "--enable-new-api-stack", "--num-gpus=1"]
)
py_test(
name = "learning_tests_stateless_cartpole_impala_multi_gpu",
main = "tuned_examples/impala/stateless_cartpole_impala.py",
tags = ["team:rllib", "exclusive", "learning_tests", "torch_only", "learning_tests_discrete", "learning_tests_pytorch_use_all_core", "multi_gpu"],
size = "large",
srcs = ["tuned_examples/impala/stateless_cartpole_impala.py"],
args = ["--as-test", "--enable-new-api-stack", "--num-gpus=2"]
)
# MultiAgentCartPole
py_test(
name = "learning_tests_multi_agent_cartpole_impala",
Expand Down Expand Up @@ -512,6 +544,40 @@ py_test(
srcs = ["tuned_examples/impala/multi_agent_cartpole_impala.py"],
args = ["--as-test", "--enable-new-api-stack", "--num-agents=2", "--num-gpus=2", "--num-cpus=7"]
)
# StatelessCartPole
py_test(
name = "learning_tests_stateless_cartpole_impala",
main = "tuned_examples/impala/stateless_cartpole_impala.py",
tags = ["team:rllib", "exclusive", "learning_tests", "torch_only", "learning_tests_discrete", "learning_tests_pytorch_use_all_core"],
size = "large",
srcs = ["tuned_examples/impala/stateless_cartpole_impala.py"],
args = ["--as-test", "--enable-new-api-stack", "--num-gpus=1"]
)
py_test(
name = "learning_tests_stateless_cartpole_impala_multi_gpu",
main = "tuned_examples/impala/stateless_cartpole_impala.py",
tags = ["team:rllib", "exclusive", "learning_tests", "torch_only", "learning_tests_discrete", "learning_tests_pytorch_use_all_core", "multi_gpu"],
size = "large",
srcs = ["tuned_examples/impala/stateless_cartpole_impala.py"],
args = ["--as-test", "--enable-new-api-stack", "--num-gpus=2"]
)
# MultiAgentStatelessCartPole
py_test(
name = "learning_tests_multi_agent_stateless_cartpole_impala",
main = "tuned_examples/impala/multi_agent_stateless_cartpole_impala.py",
tags = ["team:rllib", "exclusive", "learning_tests", "torch_only", "learning_tests_discrete", "learning_tests_pytorch_use_all_core"],
size = "large",
srcs = ["tuned_examples/impala/multi_agent_stateless_cartpole_impala.py"],
args = ["--as-test", "--enable-new-api-stack", "--num-gpus=1"]
)
py_test(
name = "learning_tests_multi_agent_stateless_cartpole_impala_multi_gpu",
main = "tuned_examples/impala/multi_agent_stateless_cartpole_impala.py",
tags = ["team:rllib", "exclusive", "learning_tests", "torch_only", "learning_tests_discrete", "learning_tests_pytorch_use_all_core", "multi_gpu"],
size = "large",
srcs = ["tuned_examples/impala/multi_agent_stateless_cartpole_impala.py"],
args = ["--as-test", "--enable-new-api-stack", "--num-gpus=2"]
)

#@OldAPIstack
py_test(
Expand Down
19 changes: 14 additions & 5 deletions rllib/connectors/common/add_states_from_episodes_to_batch.py
Original file line number Diff line number Diff line change
Expand Up @@ -228,7 +228,7 @@ def __call__(
# Also, let module-to-env pipeline know that we had added a single timestep
# time rank to the data (to remove it again).
if not self._as_learner_connector:
for column, column_data in data.copy().items():
for column in data.keys():
self.foreach_batch_item_change_in_place(
batch=data,
column=column,
Expand All @@ -250,11 +250,20 @@ def __call__(
# Before adding STATE_IN to the `data`, zero-pad existing data and batch
# into max_seq_len chunks.
for column, column_data in data.copy().items():
# Do not zero-pad INFOS column.
if column == Columns.INFOS:
continue
for key, item_list in column_data.items():
if column != Columns.INFOS:
column_data[key] = split_and_zero_pad_list(
item_list, T=self.max_seq_len
)
# Multi-agent case AND RLModule is not stateful -> Do not zero-pad
# for this model.
assert isinstance(key, tuple)
if len(key) == 3:
eps_id, aid, mid = key
if not rl_module[mid].is_stateful():
continue
column_data[key] = split_and_zero_pad_list(
item_list, T=self.max_seq_len
)

for sa_episode in self.single_agent_episode_iterator(
episodes,
Expand Down
13 changes: 10 additions & 3 deletions rllib/connectors/env_to_module/mean_std_filter.py
Original file line number Diff line number Diff line change
Expand Up @@ -116,9 +116,16 @@ def __call__(
# anymore to the original observations).
for sa_episode in self.single_agent_episode_iterator(episodes):
sa_obs = sa_episode.get_observations(indices=-1)
normalized_sa_obs = self._filters[sa_episode.agent_id](
sa_obs, update=self._update_stats
)
try:
normalized_sa_obs = self._filters[sa_episode.agent_id](
sa_obs, update=self._update_stats
)
except KeyError:
raise KeyError(
"KeyError trying to access a filter by agent ID "
f"`{sa_episode.agent_id}`! You probably did NOT pass the "
f"`multi_agent=True` flag into the `MeanStdFilter()` constructor. "
)
sa_episode.set_observations(at_indices=-1, new_data=normalized_sa_obs)
# We set the Episode's observation space to ours so that we can safely
# set the last obs to the new value (without causing a space mismatch
Expand Down
16 changes: 15 additions & 1 deletion rllib/connectors/learner/add_one_ts_to_episodes_and_truncate.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
from ray.rllib.connectors.connector_v2 import ConnectorV2
from ray.rllib.core.columns import Columns
from ray.rllib.core.rl_module.rl_module import RLModule
from ray.rllib.env.multi_agent_episode import MultiAgentEpisode
from ray.rllib.utils.annotations import override
from ray.rllib.utils.postprocessing.episodes import add_one_ts_to_episodes_and_truncate
from ray.rllib.utils.typing import EpisodeType
Expand Down Expand Up @@ -101,10 +102,23 @@ def __call__(
# batch: - - - - - - - T B0- - - - - R Bx- - - - R Bx
# mask : t t t t t t t t f t t t t t t f t t t t t f

# TODO (sven): Same situation as in TODO below, but for multi-agent episode.
# Maybe add a dedicated connector piece for this task?
# We extend the MultiAgentEpisode's ID by a running number here to make sure
# we treat each MAEpisode chunk as separate (for potentially upcoming v-trace
# and LSTM zero-padding) and don't mix data from different chunks.
if isinstance(episodes[0], MultiAgentEpisode):
for i, ma_episode in enumerate(episodes):
ma_episode.id_ += "_" + str(i)
# Also change the underlying single-agent episode's
# `multi_agent_episode_id` properties.
for sa_episode in ma_episode.agent_episodes.values():
sa_episode.multi_agent_episode_id = ma_episode.id_

for i, sa_episode in enumerate(
self.single_agent_episode_iterator(episodes, agents_that_stepped_only=False)
):
# TODO (sven): This is a little bit of a hack: By expanding the Episode's
# TODO (sven): This is a little bit of a hack: By extending the Episode's
# ID, we make sure that each episode chunk in `episodes` is treated as a
# separate episode in the `self.add_n_batch_items` below. Some algos (e.g.
# APPO) may have >1 episode chunks from the same episode (same ID) in the
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -50,9 +50,21 @@ def __call__(
if shared_data is None or not shared_data.get("_added_single_ts_time_rank"):
return data

data = tree.map_structure_with_path(
lambda p, s: s if Columns.STATE_OUT in p else np.squeeze(s, axis=0),
data,
)
def _remove_single_ts(item, eps_id, aid, mid):
# Only remove time-rank for modules that are statefule (only for those has
# a timerank been added).
if mid is None or rl_module[mid].is_stateful():
return tree.map_structure(lambda s: np.squeeze(s, axis=0), item)
return item

for column, column_data in data.copy().items():
# Skip state_out (doesn't have a time rank).
if column == Columns.STATE_OUT:
continue
self.foreach_batch_item_change_in_place(
data,
column=column,
func=_remove_single_ts,
)

return data
Loading

0 comments on commit 746f6b6

Please sign in to comment.