diff --git a/.buildkite/rllib.rayci.yml b/.buildkite/rllib.rayci.yml index b37a32cb40e3..8ace96b726b7 100644 --- a/.buildkite/rllib.rayci.yml +++ b/.buildkite/rllib.rayci.yml @@ -107,7 +107,7 @@ steps: tags: - rllib - gpu - parallelism: 2 + parallelism: 3 instance_type: gpu commands: - bazel run //ci/ray_ci:test_in_docker -- //rllib/... rllib @@ -199,6 +199,7 @@ steps: tags: - rllib - gpu + parallelism: 2 instance_type: gpu-large commands: - bazel run //ci/ray_ci:test_in_docker -- //rllib/... rllib diff --git a/rllib/BUILD b/rllib/BUILD index 048963b0911e..36d579c6453f 100644 --- a/rllib/BUILD +++ b/rllib/BUILD @@ -160,6 +160,30 @@ py_test( srcs = ["tuned_examples/appo/cartpole_appo.py"], args = ["--as-test", "--enable-new-api-stack", "--num-gpus=1"] ) +py_test( + name = "learning_tests_cartpole_appo_gpu", + main = "tuned_examples/appo/cartpole_appo.py", + tags = ["team:rllib", "exclusive", "learning_tests", "torch_only", "learning_tests_cartpole", "learning_tests_discrete", "learning_tests_pytorch_use_all_core", "gpu"], + size = "large", + srcs = ["tuned_examples/appo/cartpole_appo.py"], + args = ["--as-test", "--enable-new-api-stack", "--num-gpus=1"] +) +py_test( + name = "learning_tests_cartpole_appo_multi_cpu", + main = "tuned_examples/appo/cartpole_appo.py", + tags = ["team:rllib", "exclusive", "learning_tests", "torch_only", "learning_tests_cartpole", "learning_tests_discrete", "learning_tests_pytorch_use_all_core"], + size = "large", + srcs = ["tuned_examples/appo/cartpole_appo.py"], + args = ["--as-test", "--enable-new-api-stack", "--num-gpus=2"] +) +py_test( + name = "learning_tests_cartpole_appo_multi_gpu", + main = "tuned_examples/appo/cartpole_appo.py", + tags = ["team:rllib", "exclusive", "learning_tests", "torch_only", "learning_tests_cartpole", "learning_tests_discrete", "learning_tests_pytorch_use_all_core", "multi_gpu"], + size = "large", + srcs = ["tuned_examples/appo/cartpole_appo.py"], + args = ["--as-test", "--enable-new-api-stack", "--num-gpus=2"] +) py_test( name = "learning_tests_multi_agent_cartpole_appo", main = "tuned_examples/appo/multi_agent_cartpole_appo.py", @@ -168,6 +192,30 @@ py_test( srcs = ["tuned_examples/appo/multi_agent_cartpole_appo.py"], args = ["--as-test", "--enable-new-api-stack", "--num-agents=2", "--num-gpus=1"] ) +py_test( + name = "learning_tests_multi_agent_cartpole_appo_gpu", + main = "tuned_examples/appo/multi_agent_cartpole_appo.py", + tags = ["team:rllib", "exclusive", "learning_tests", "torch_only", "learning_tests_cartpole", "learning_tests_discrete", "learning_tests_pytorch_use_all_core", "gpu"], + size = "large", + srcs = ["tuned_examples/appo/multi_agent_cartpole_appo.py"], + args = ["--as-test", "--enable-new-api-stack", "--num-agents=2", "--num-gpus=1", "--num-cpus=6"] +) +py_test( + name = "learning_tests_multi_agent_cartpole_appo_multi_cpu", + main = "tuned_examples/appo/multi_agent_cartpole_appo.py", + tags = ["team:rllib", "exclusive", "learning_tests", "torch_only", "learning_tests_cartpole", "learning_tests_discrete", "learning_tests_pytorch_use_all_core"], + size = "large", + srcs = ["tuned_examples/appo/multi_agent_cartpole_appo.py"], + args = ["--as-test", "--enable-new-api-stack", "--num-agents=2", "--num-gpus=2", "--num-cpus=7"] +) +py_test( + name = "learning_tests_multi_agent_cartpole_appo_multi_gpu", + main = "tuned_examples/appo/multi_agent_cartpole_appo.py", + tags = ["team:rllib", "exclusive", "learning_tests", "torch_only", "learning_tests_cartpole", "learning_tests_discrete", "learning_tests_pytorch_use_all_core", "multi_gpu"], + size = "large", + srcs = ["tuned_examples/appo/multi_agent_cartpole_appo.py"], + args = ["--as-test", "--enable-new-api-stack", "--num-agents=2", "--num-gpus=2", "--num-cpus=7"] +) #@OldAPIStack py_test( diff --git a/rllib/algorithms/appo/appo.py b/rllib/algorithms/appo/appo.py index 482801af88ae..f4be25e0f39e 100644 --- a/rllib/algorithms/appo/appo.py +++ b/rllib/algorithms/appo/appo.py @@ -100,8 +100,6 @@ def __init__(self, algo_class=None): # Override some of ImpalaConfig's default values with APPO-specific values. self.num_env_runners = 2 - self.rollout_fragment_length = 50 - self.train_batch_size = 500 self.min_time_s_per_iteration = 10 self.num_gpus = 0 self.num_multi_gpu_tower_stacks = 1 diff --git a/rllib/algorithms/impala/impala.py b/rllib/algorithms/impala/impala.py index da3dee0d61f2..038d6cc703a4 100644 --- a/rllib/algorithms/impala/impala.py +++ b/rllib/algorithms/impala/impala.py @@ -4,7 +4,7 @@ import platform import queue import random -from typing import Callable, List, Optional, Set, Tuple, Type, Union +from typing import List, Optional, Set, Tuple, Type, Union import numpy as np import tree # pip install dm_tree @@ -28,6 +28,7 @@ ) from ray.rllib.utils.actors import create_colocated_actors from ray.rllib.utils.annotations import OldAPIStack, override +from ray.rllib.utils.deprecation import DEPRECATED_VALUE, deprecation_warning from ray.rllib.utils.metrics import ( ALL_MODULES, ENV_RUNNER_RESULTS, @@ -161,7 +162,6 @@ def __init__(self, algo_class=None): self.entropy_coeff_schedule = None self._separate_vf_optimizer = False # @OldAPIstack self._lr_vf = 0.0005 # @OldAPIstack - self.after_train_step = None # Override some of AlgorithmConfig's default values with IMPALA-specific values. self.rollout_fragment_length = 50 @@ -218,7 +218,8 @@ def training( entropy_coeff_schedule: Optional[List[List[Union[int, float]]]] = NotProvided, _separate_vf_optimizer: Optional[bool] = NotProvided, _lr_vf: Optional[float] = NotProvided, - after_train_step: Optional[Callable[[dict], None]] = NotProvided, + # Deprecated args. + after_train_step=DEPRECATED_VALUE, **kwargs, ) -> "ImpalaConfig": """Sets the training related configuration. @@ -301,8 +302,6 @@ def training( algorithms (APPO, IMPALA) on the old API stack. _lr_vf: If _separate_vf_optimizer is True, define separate learning rate for the value network. - after_train_step: Callback for APPO to use to update KL, target network - periodically. The input to the callback is the learner fetches dict. Returns: This updated AlgorithmConfig object. @@ -310,6 +309,9 @@ def training( # Pass kwargs onto super's `training()` method. super().training(**kwargs) + if after_train_step != DEPRECATED_VALUE: + deprecation_warning(old="config.training(after_train_step=...)", error=True) + if vtrace is not NotProvided: self.vtrace = vtrace if vtrace_clip_rho_threshold is not NotProvided: @@ -368,8 +370,6 @@ def training( self._separate_vf_optimizer = _separate_vf_optimizer if _lr_vf is not NotProvided: self._lr_vf = _lr_vf - if after_train_step is not NotProvided: - self.after_train_step = after_train_step if minibatch_size is not NotProvided: self._minibatch_size = minibatch_size diff --git a/rllib/core/rl_module/torch/torch_rl_module.py b/rllib/core/rl_module/torch/torch_rl_module.py index 1a8617ec1ef0..456c330530b3 100644 --- a/rllib/core/rl_module/torch/torch_rl_module.py +++ b/rllib/core/rl_module/torch/torch_rl_module.py @@ -189,6 +189,10 @@ class TorchDDPRLModuleWithTargetNetworksInterface( TorchDDPRLModule, RLModuleWithTargetNetworksInterface, ): + @override(RLModuleWithTargetNetworksInterface) + def get_target_network_pairs(self, *args, **kwargs): + return self.module.get_target_network_pairs(*args, **kwargs) + @override(RLModuleWithTargetNetworksInterface) def sync_target_networks(self, *args, **kwargs): return self.module.sync_target_networks(*args, **kwargs)