Skip to content

Commit

Permalink
[RLlib] Add systematic APPO learning tests to CI. Combinations of [1C…
Browse files Browse the repository at this point in the history
…PU|2CPUs|1GPU|2GPUs] + [single-agent|multi-agent] (#46299)
  • Loading branch information
sven1977 authored Jun 27, 2024
1 parent e40d489 commit 3138f73
Show file tree
Hide file tree
Showing 5 changed files with 61 additions and 10 deletions.
3 changes: 2 additions & 1 deletion .buildkite/rllib.rayci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,7 @@ steps:
tags:
- rllib
- gpu
parallelism: 2
parallelism: 3
instance_type: gpu
commands:
- bazel run //ci/ray_ci:test_in_docker -- //rllib/... rllib
Expand Down Expand Up @@ -199,6 +199,7 @@ steps:
tags:
- rllib
- gpu
parallelism: 2
instance_type: gpu-large
commands:
- bazel run //ci/ray_ci:test_in_docker -- //rllib/... rllib
Expand Down
48 changes: 48 additions & 0 deletions rllib/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -160,6 +160,30 @@ py_test(
srcs = ["tuned_examples/appo/cartpole_appo.py"],
args = ["--as-test", "--enable-new-api-stack", "--num-gpus=1"]
)
py_test(
name = "learning_tests_cartpole_appo_gpu",
main = "tuned_examples/appo/cartpole_appo.py",
tags = ["team:rllib", "exclusive", "learning_tests", "torch_only", "learning_tests_cartpole", "learning_tests_discrete", "learning_tests_pytorch_use_all_core", "gpu"],
size = "large",
srcs = ["tuned_examples/appo/cartpole_appo.py"],
args = ["--as-test", "--enable-new-api-stack", "--num-gpus=1"]
)
py_test(
name = "learning_tests_cartpole_appo_multi_cpu",
main = "tuned_examples/appo/cartpole_appo.py",
tags = ["team:rllib", "exclusive", "learning_tests", "torch_only", "learning_tests_cartpole", "learning_tests_discrete", "learning_tests_pytorch_use_all_core"],
size = "large",
srcs = ["tuned_examples/appo/cartpole_appo.py"],
args = ["--as-test", "--enable-new-api-stack", "--num-gpus=2"]
)
py_test(
name = "learning_tests_cartpole_appo_multi_gpu",
main = "tuned_examples/appo/cartpole_appo.py",
tags = ["team:rllib", "exclusive", "learning_tests", "torch_only", "learning_tests_cartpole", "learning_tests_discrete", "learning_tests_pytorch_use_all_core", "multi_gpu"],
size = "large",
srcs = ["tuned_examples/appo/cartpole_appo.py"],
args = ["--as-test", "--enable-new-api-stack", "--num-gpus=2"]
)
py_test(
name = "learning_tests_multi_agent_cartpole_appo",
main = "tuned_examples/appo/multi_agent_cartpole_appo.py",
Expand All @@ -168,6 +192,30 @@ py_test(
srcs = ["tuned_examples/appo/multi_agent_cartpole_appo.py"],
args = ["--as-test", "--enable-new-api-stack", "--num-agents=2", "--num-gpus=1"]
)
py_test(
name = "learning_tests_multi_agent_cartpole_appo_gpu",
main = "tuned_examples/appo/multi_agent_cartpole_appo.py",
tags = ["team:rllib", "exclusive", "learning_tests", "torch_only", "learning_tests_cartpole", "learning_tests_discrete", "learning_tests_pytorch_use_all_core", "gpu"],
size = "large",
srcs = ["tuned_examples/appo/multi_agent_cartpole_appo.py"],
args = ["--as-test", "--enable-new-api-stack", "--num-agents=2", "--num-gpus=1", "--num-cpus=6"]
)
py_test(
name = "learning_tests_multi_agent_cartpole_appo_multi_cpu",
main = "tuned_examples/appo/multi_agent_cartpole_appo.py",
tags = ["team:rllib", "exclusive", "learning_tests", "torch_only", "learning_tests_cartpole", "learning_tests_discrete", "learning_tests_pytorch_use_all_core"],
size = "large",
srcs = ["tuned_examples/appo/multi_agent_cartpole_appo.py"],
args = ["--as-test", "--enable-new-api-stack", "--num-agents=2", "--num-gpus=2", "--num-cpus=7"]
)
py_test(
name = "learning_tests_multi_agent_cartpole_appo_multi_gpu",
main = "tuned_examples/appo/multi_agent_cartpole_appo.py",
tags = ["team:rllib", "exclusive", "learning_tests", "torch_only", "learning_tests_cartpole", "learning_tests_discrete", "learning_tests_pytorch_use_all_core", "multi_gpu"],
size = "large",
srcs = ["tuned_examples/appo/multi_agent_cartpole_appo.py"],
args = ["--as-test", "--enable-new-api-stack", "--num-agents=2", "--num-gpus=2", "--num-cpus=7"]
)

#@OldAPIStack
py_test(
Expand Down
2 changes: 0 additions & 2 deletions rllib/algorithms/appo/appo.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,8 +100,6 @@ def __init__(self, algo_class=None):

# Override some of ImpalaConfig's default values with APPO-specific values.
self.num_env_runners = 2
self.rollout_fragment_length = 50
self.train_batch_size = 500
self.min_time_s_per_iteration = 10
self.num_gpus = 0
self.num_multi_gpu_tower_stacks = 1
Expand Down
14 changes: 7 additions & 7 deletions rllib/algorithms/impala/impala.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
import platform
import queue
import random
from typing import Callable, List, Optional, Set, Tuple, Type, Union
from typing import List, Optional, Set, Tuple, Type, Union

import numpy as np
import tree # pip install dm_tree
Expand All @@ -28,6 +28,7 @@
)
from ray.rllib.utils.actors import create_colocated_actors
from ray.rllib.utils.annotations import OldAPIStack, override
from ray.rllib.utils.deprecation import DEPRECATED_VALUE, deprecation_warning
from ray.rllib.utils.metrics import (
ALL_MODULES,
ENV_RUNNER_RESULTS,
Expand Down Expand Up @@ -161,7 +162,6 @@ def __init__(self, algo_class=None):
self.entropy_coeff_schedule = None
self._separate_vf_optimizer = False # @OldAPIstack
self._lr_vf = 0.0005 # @OldAPIstack
self.after_train_step = None

# Override some of AlgorithmConfig's default values with IMPALA-specific values.
self.rollout_fragment_length = 50
Expand Down Expand Up @@ -218,7 +218,8 @@ def training(
entropy_coeff_schedule: Optional[List[List[Union[int, float]]]] = NotProvided,
_separate_vf_optimizer: Optional[bool] = NotProvided,
_lr_vf: Optional[float] = NotProvided,
after_train_step: Optional[Callable[[dict], None]] = NotProvided,
# Deprecated args.
after_train_step=DEPRECATED_VALUE,
**kwargs,
) -> "ImpalaConfig":
"""Sets the training related configuration.
Expand Down Expand Up @@ -301,15 +302,16 @@ def training(
algorithms (APPO, IMPALA) on the old API stack.
_lr_vf: If _separate_vf_optimizer is True, define separate learning rate
for the value network.
after_train_step: Callback for APPO to use to update KL, target network
periodically. The input to the callback is the learner fetches dict.
Returns:
This updated AlgorithmConfig object.
"""
# Pass kwargs onto super's `training()` method.
super().training(**kwargs)

if after_train_step != DEPRECATED_VALUE:
deprecation_warning(old="config.training(after_train_step=...)", error=True)

if vtrace is not NotProvided:
self.vtrace = vtrace
if vtrace_clip_rho_threshold is not NotProvided:
Expand Down Expand Up @@ -368,8 +370,6 @@ def training(
self._separate_vf_optimizer = _separate_vf_optimizer
if _lr_vf is not NotProvided:
self._lr_vf = _lr_vf
if after_train_step is not NotProvided:
self.after_train_step = after_train_step
if minibatch_size is not NotProvided:
self._minibatch_size = minibatch_size

Expand Down
4 changes: 4 additions & 0 deletions rllib/core/rl_module/torch/torch_rl_module.py
Original file line number Diff line number Diff line change
Expand Up @@ -189,6 +189,10 @@ class TorchDDPRLModuleWithTargetNetworksInterface(
TorchDDPRLModule,
RLModuleWithTargetNetworksInterface,
):
@override(RLModuleWithTargetNetworksInterface)
def get_target_network_pairs(self, *args, **kwargs):
return self.module.get_target_network_pairs(*args, **kwargs)

@override(RLModuleWithTargetNetworksInterface)
def sync_target_networks(self, *args, **kwargs):
return self.module.sync_target_networks(*args, **kwargs)
Expand Down

0 comments on commit 3138f73

Please sign in to comment.