Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[RLlib] Add systematic APPO learning tests to CI. Combinations of [1CPU|2CPUs|1GPU|2GPUs] + [single-agent|multi-agent] #46299

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion .buildkite/rllib.rayci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,7 @@ steps:
tags:
- rllib
- gpu
parallelism: 2
parallelism: 3
instance_type: gpu
commands:
- bazel run //ci/ray_ci:test_in_docker -- //rllib/... rllib
Expand Down Expand Up @@ -199,6 +199,7 @@ steps:
tags:
- rllib
- gpu
parallelism: 2
instance_type: gpu-large
commands:
- bazel run //ci/ray_ci:test_in_docker -- //rllib/... rllib
Expand Down
48 changes: 48 additions & 0 deletions rllib/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -160,6 +160,30 @@ py_test(
srcs = ["tuned_examples/appo/cartpole_appo.py"],
args = ["--as-test", "--enable-new-api-stack", "--num-gpus=1"]
)
py_test(
name = "learning_tests_cartpole_appo_gpu",
main = "tuned_examples/appo/cartpole_appo.py",
tags = ["team:rllib", "exclusive", "learning_tests", "torch_only", "learning_tests_cartpole", "learning_tests_discrete", "learning_tests_pytorch_use_all_core", "gpu"],
size = "large",
srcs = ["tuned_examples/appo/cartpole_appo.py"],
args = ["--as-test", "--enable-new-api-stack", "--num-gpus=1"]
)
py_test(
name = "learning_tests_cartpole_appo_multi_cpu",
main = "tuned_examples/appo/cartpole_appo.py",
tags = ["team:rllib", "exclusive", "learning_tests", "torch_only", "learning_tests_cartpole", "learning_tests_discrete", "learning_tests_pytorch_use_all_core"],
size = "large",
srcs = ["tuned_examples/appo/cartpole_appo.py"],
args = ["--as-test", "--enable-new-api-stack", "--num-gpus=2"]
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do we need to ensure resources of enough CPUs? --num-cpus=4?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good question. I think 4 CPUs is the default, so in this case, it should be fine:
2 for the EnvRunners
1 for the single Learner
1 for the main process

)
py_test(
name = "learning_tests_cartpole_appo_multi_gpu",
main = "tuned_examples/appo/cartpole_appo.py",
tags = ["team:rllib", "exclusive", "learning_tests", "torch_only", "learning_tests_cartpole", "learning_tests_discrete", "learning_tests_pytorch_use_all_core", "multi_gpu"],
size = "large",
srcs = ["tuned_examples/appo/cartpole_appo.py"],
args = ["--as-test", "--enable-new-api-stack", "--num-gpus=2"]
)
py_test(
name = "learning_tests_multi_agent_cartpole_appo",
main = "tuned_examples/appo/multi_agent_cartpole_appo.py",
Expand All @@ -168,6 +192,30 @@ py_test(
srcs = ["tuned_examples/appo/multi_agent_cartpole_appo.py"],
args = ["--as-test", "--enable-new-api-stack", "--num-agents=2", "--num-gpus=1"]
)
py_test(
name = "learning_tests_multi_agent_cartpole_appo_gpu",
main = "tuned_examples/appo/multi_agent_cartpole_appo.py",
tags = ["team:rllib", "exclusive", "learning_tests", "torch_only", "learning_tests_cartpole", "learning_tests_discrete", "learning_tests_pytorch_use_all_core", "gpu"],
size = "large",
srcs = ["tuned_examples/appo/multi_agent_cartpole_appo.py"],
args = ["--as-test", "--enable-new-api-stack", "--num-agents=2", "--num-gpus=1", "--num-cpus=6"]
)
py_test(
name = "learning_tests_multi_agent_cartpole_appo_multi_cpu",
main = "tuned_examples/appo/multi_agent_cartpole_appo.py",
tags = ["team:rllib", "exclusive", "learning_tests", "torch_only", "learning_tests_cartpole", "learning_tests_discrete", "learning_tests_pytorch_use_all_core"],
size = "large",
srcs = ["tuned_examples/appo/multi_agent_cartpole_appo.py"],
args = ["--as-test", "--enable-new-api-stack", "--num-agents=2", "--num-gpus=2", "--num-cpus=7"]
)
py_test(
name = "learning_tests_multi_agent_cartpole_appo_multi_gpu",
main = "tuned_examples/appo/multi_agent_cartpole_appo.py",
tags = ["team:rllib", "exclusive", "learning_tests", "torch_only", "learning_tests_cartpole", "learning_tests_discrete", "learning_tests_pytorch_use_all_core", "multi_gpu"],
size = "large",
srcs = ["tuned_examples/appo/multi_agent_cartpole_appo.py"],
args = ["--as-test", "--enable-new-api-stack", "--num-agents=2", "--num-gpus=2", "--num-cpus=7"]
)

#@OldAPIStack
py_test(
Expand Down
2 changes: 0 additions & 2 deletions rllib/algorithms/appo/appo.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,8 +100,6 @@ def __init__(self, algo_class=None):

# Override some of ImpalaConfig's default values with APPO-specific values.
self.num_env_runners = 2
self.rollout_fragment_length = 50
self.train_batch_size = 500
self.min_time_s_per_iteration = 10
self.num_gpus = 0
self.num_multi_gpu_tower_stacks = 1
Expand Down
14 changes: 7 additions & 7 deletions rllib/algorithms/impala/impala.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
import platform
import queue
import random
from typing import Callable, List, Optional, Set, Tuple, Type, Union
from typing import List, Optional, Set, Tuple, Type, Union

import numpy as np
import tree # pip install dm_tree
Expand All @@ -28,6 +28,7 @@
)
from ray.rllib.utils.actors import create_colocated_actors
from ray.rllib.utils.annotations import OldAPIStack, override
from ray.rllib.utils.deprecation import DEPRECATED_VALUE, deprecation_warning
from ray.rllib.utils.metrics import (
ALL_MODULES,
ENV_RUNNER_RESULTS,
Expand Down Expand Up @@ -161,7 +162,6 @@ def __init__(self, algo_class=None):
self.entropy_coeff_schedule = None
self._separate_vf_optimizer = False # @OldAPIstack
self._lr_vf = 0.0005 # @OldAPIstack
self.after_train_step = None

# Override some of AlgorithmConfig's default values with IMPALA-specific values.
self.rollout_fragment_length = 50
Expand Down Expand Up @@ -218,7 +218,8 @@ def training(
entropy_coeff_schedule: Optional[List[List[Union[int, float]]]] = NotProvided,
_separate_vf_optimizer: Optional[bool] = NotProvided,
_lr_vf: Optional[float] = NotProvided,
after_train_step: Optional[Callable[[dict], None]] = NotProvided,
# Deprecated args.
after_train_step=DEPRECATED_VALUE,
**kwargs,
) -> "ImpalaConfig":
"""Sets the training related configuration.
Expand Down Expand Up @@ -301,15 +302,16 @@ def training(
algorithms (APPO, IMPALA) on the old API stack.
_lr_vf: If _separate_vf_optimizer is True, define separate learning rate
for the value network.
after_train_step: Callback for APPO to use to update KL, target network
periodically. The input to the callback is the learner fetches dict.

Returns:
This updated AlgorithmConfig object.
"""
# Pass kwargs onto super's `training()` method.
super().training(**kwargs)

if after_train_step != DEPRECATED_VALUE:
deprecation_warning(old="config.training(after_train_step=...)", error=True)

if vtrace is not NotProvided:
self.vtrace = vtrace
if vtrace_clip_rho_threshold is not NotProvided:
Expand Down Expand Up @@ -368,8 +370,6 @@ def training(
self._separate_vf_optimizer = _separate_vf_optimizer
if _lr_vf is not NotProvided:
self._lr_vf = _lr_vf
if after_train_step is not NotProvided:
self.after_train_step = after_train_step
if minibatch_size is not NotProvided:
self._minibatch_size = minibatch_size

Expand Down
4 changes: 4 additions & 0 deletions rllib/core/rl_module/torch/torch_rl_module.py
Original file line number Diff line number Diff line change
Expand Up @@ -189,6 +189,10 @@ class TorchDDPRLModuleWithTargetNetworksInterface(
TorchDDPRLModule,
RLModuleWithTargetNetworksInterface,
):
@override(RLModuleWithTargetNetworksInterface)
def get_target_network_pairs(self, *args, **kwargs):
return self.module.get_target_network_pairs(*args, **kwargs)

@override(RLModuleWithTargetNetworksInterface)
def sync_target_networks(self, *args, **kwargs):
return self.module.sync_target_networks(*args, **kwargs)
Expand Down
Loading