[RLlib] Added test version of BC algorithm based on RLModules an RLTr…

…ainers (ray-project#32471) Signed-off-by: Kourosh Hakhamaneshi <kourosh@anyscale.com> Signed-off-by: Edward Oakes <ed.nmi.oakes@gmail.com>
edoakes · Mar 22, 2023 · 7a6ad1f · 7a6ad1f
1 parent c570cdc
commit 7a6ad1f
Show file tree

Hide file tree

Showing 13 changed files with 138 additions and 78 deletions.
diff --git a/rllib/BUILD b/rllib/BUILD
@@ -1887,6 +1887,13 @@ py_test(
     srcs = ["core/rl_trainer/torch/tests/test_torch_rl_trainer.py"]
 )
 
+py_test(
+    name = "test_bc_algorithm",
+    tags = ["team:rllib", "core"],
+    size = "medium",
+    srcs = ["core/testing/tests/test_bc_algorithm.py"]
+)
+
 # --------------------------------------------------------------------
 # Models and Distributions
 # rllib/models/

diff --git a/rllib/core/rl_module/tests/test_marl_module.py b/rllib/core/rl_module/tests/test_marl_module.py
@@ -19,12 +19,12 @@ def test_from_config(self):
         module1 = DiscreteBCTorchModule.from_model_config(
             env.observation_space,
             env.action_space,
-            model_config={"hidden_dim": 32},
+            model_config={"fcnet_hiddens": [32]},
         )
         module2 = DiscreteBCTorchModule.from_model_config(
             env.observation_space,
             env.action_space,
-            model_config={"hidden_dim": 32},
+            model_config={"fcnet_hiddens": [32]},
         )
 
         multi_agent_dict = {"module1": module1, "module2": module2}
@@ -43,11 +43,11 @@ def test_from_multi_agent_config(self):
             "modules": {
                 "module1": SingleAgentRLModuleSpec(
                     module_class=DiscreteBCTorchModule,
-                    model_config={"hidden_dim": 64},
+                    model_config={"fcnet_hiddens": [64]},
                 ),
                 "module2": SingleAgentRLModuleSpec(
                     module_class=DiscreteBCTorchModule,
-                    model_config={"hidden_dim": 32},
+                    model_config={"fcnet_hiddens": [32]},
                 ),
             },
             "observation_space": env.observation_space,  # this is common
@@ -68,7 +68,7 @@ def test_as_multi_agent(self):
         marl_module = DiscreteBCTorchModule.from_model_config(
             env.observation_space,
             env.action_space,
-            model_config={"hidden_dim": 32},
+            model_config={"fcnet_hiddens": [32]},
         ).as_multi_agent()
 
         self.assertNotIsInstance(marl_module, DiscreteBCTorchModule)
@@ -87,7 +87,7 @@ def test_get_set_state(self):
         module = DiscreteBCTorchModule.from_model_config(
             env.observation_space,
             env.action_space,
-            model_config={"hidden_dim": 32},
+            model_config={"fcnet_hiddens": [32]},
         ).as_multi_agent()
 
         state = module.get_state()
@@ -101,7 +101,7 @@ def test_get_set_state(self):
         module2 = DiscreteBCTorchModule.from_model_config(
             env.observation_space,
             env.action_space,
-            model_config={"hidden_dim": 32},
+            model_config={"fcnet_hiddens": [32]},
         ).as_multi_agent()
         state2 = module2.get_state()
         check(state, state2, false=True)
@@ -119,15 +119,15 @@ def test_add_remove_modules(self):
         module = DiscreteBCTorchModule.from_model_config(
             env.observation_space,
             env.action_space,
-            model_config={"hidden_dim": 32},
+            model_config={"fcnet_hiddens": [32]},
         ).as_multi_agent()
 
         module.add_module(
             "test",
             DiscreteBCTorchModule.from_model_config(
                 env.observation_space,
                 env.action_space,
-                model_config={"hidden_dim": 32},
+                model_config={"fcnet_hiddens": [32]},
             ),
         )
         self.assertEqual(set(module.keys()), {DEFAULT_POLICY_ID, "test"})
@@ -142,7 +142,7 @@ def test_add_remove_modules(self):
                 DiscreteBCTorchModule.from_model_config(
                     env.observation_space,
                     env.action_space,
-                    model_config={"hidden_dim": 32},
+                    model_config={"fcnet_hiddens": [32]},
                 ),
             ),
         )
@@ -152,7 +152,7 @@ def test_add_remove_modules(self):
             DiscreteBCTorchModule.from_model_config(
                 env.observation_space,
                 env.action_space,
-                model_config={"hidden_dim": 32},
+                model_config={"fcnet_hiddens": [32]},
             ),
             override=True,
         )
@@ -239,12 +239,12 @@ def test_serialize_deserialize(self):
         module1 = DiscreteBCTorchModule.from_model_config(
             env.observation_space,
             env.action_space,
-            model_config={"hidden_dim": 32},
+            model_config={"fcnet_hiddens": [32]},
         )
         module2 = DiscreteBCTorchModule.from_model_config(
             env.observation_space,
             env.action_space,
-            model_config={"hidden_dim": 32},
+            model_config={"fcnet_hiddens": [32]},
         )
 
         multi_agent_dict = {"module1": module1, "module2": module2}

diff --git a/rllib/core/rl_module/tests/test_rl_module_specs.py b/rllib/core/rl_module/tests/test_rl_module_specs.py
@@ -33,7 +33,7 @@ def build(self):
         # this handles all implementation details
         config = {
             "input_dim": self.observation_space.shape[0],
-            "hidden_dim": self.model_config["hidden_dim"],
+            "hidden_dim": self.model_config["fcnet_hiddens"][0],
             "output_dim": self.action_space.n,
         }
         return self.module_class(**config)
@@ -48,7 +48,7 @@ def test_single_agent_spec(self):
                 module_class=module_class,
                 observation_space=env.observation_space,
                 action_space=env.action_space,
-                model_config={"hidden_dim": 64},
+                model_config={"fcnet_hiddens": [64]},
             )
 
             module = spec.build()
@@ -63,7 +63,7 @@ def test_customized_single_agent_spec(self):
                 module_class=module_class,
                 observation_space=env.observation_space,
                 action_space=env.action_space,
-                model_config={"hidden_dim": 64},
+                model_config={"fcnet_hiddens": [64]},
             )
             module = spec.build()
             self.assertIsInstance(module, module_class)
@@ -81,7 +81,7 @@ def test_multi_agent_spec(self):
                     module_class=module_class,
                     observation_space=env.observation_space,
                     action_space=env.action_space,
-                    model_config={"hidden_dim": 32 * (i + 1)},
+                    model_config={"fcnet_hiddens": [32 * (i + 1)]},
                 )
 
             spec = MultiAgentRLModuleSpec(
@@ -118,7 +118,7 @@ def test_customized_multi_agent_spec(self):
                             }
                         ),
                         action_space=gym.spaces.Discrete(action_dims[0]),
-                        model_config={"hidden_dim": 128},
+                        model_config={"fcnet_hiddens": [128]},
                     ),
                     "agent_2": SingleAgentRLModuleSpec(
                         module_class=module_cls,
@@ -133,7 +133,7 @@ def test_customized_multi_agent_spec(self):
                             }
                         ),
                         action_space=gym.spaces.Discrete(action_dims[1]),
-                        model_config={"hidden_dim": 128},
+                        model_config={"fcnet_hiddens": [128]},
                     ),
                 },
             )

diff --git a/rllib/core/rl_module/tf/tests/test_tf_rl_module.py b/rllib/core/rl_module/tf/tests/test_tf_rl_module.py
@@ -18,7 +18,7 @@ def test_compilation(self):
         module = DiscreteBCTFModule.from_model_config(
             env.observation_space,
             env.action_space,
-            model_config={"hidden_dim": 32},
+            model_config={"fcnet_hiddens": [32]},
         )
 
         self.assertIsInstance(module, TfRLModule)
@@ -30,7 +30,7 @@ def test_forward_train(self):
         module = DiscreteBCTFModule.from_model_config(
             env.observation_space,
             env.action_space,
-            model_config={"hidden_dim": 32},
+            model_config={"fcnet_hiddens": [32]},
         )
 
         obs_shape = env.observation_space.shape
@@ -62,7 +62,7 @@ def test_forward(self):
         module = DiscreteBCTFModule.from_model_config(
             env.observation_space,
             env.action_space,
-            model_config={"hidden_dim": 32},
+            model_config={"fcnet_hiddens": [32]},
         )
 
         obs_shape = env.observation_space.shape
@@ -78,7 +78,7 @@ def test_get_set_state(self):
         module = DiscreteBCTFModule.from_model_config(
             env.observation_space,
             env.action_space,
-            model_config={"hidden_dim": 32},
+            model_config={"fcnet_hiddens": [32]},
         )
 
         state = module.get_state()
@@ -87,7 +87,7 @@ def test_get_set_state(self):
         module2 = DiscreteBCTFModule.from_model_config(
             env.observation_space,
             env.action_space,
-            model_config={"hidden_dim": 32},
+            model_config={"fcnet_hiddens": [32]},
         )
         state2 = module2.get_state()
         check(state["policy"][0], state2["policy"][0], false=True)
@@ -101,7 +101,7 @@ def test_serialize_deserialize(self):
         module = DiscreteBCTFModule.from_model_config(
             env.observation_space,
             env.action_space,
-            model_config={"hidden_dim": 32},
+            model_config={"fcnet_hiddens": [32]},
         )
 
         # create a new module from the old module

diff --git a/rllib/core/rl_module/torch/tests/test_torch_rl_module.py b/rllib/core/rl_module/torch/tests/test_torch_rl_module.py
@@ -17,7 +17,7 @@ def test_compilation(self):
         module = DiscreteBCTorchModule.from_model_config(
             env.observation_space,
             env.action_space,
-            model_config={"hidden_dim": 32},
+            model_config={"fcnet_hiddens": [32]},
         )
 
         self.assertIsInstance(module, TorchRLModule)
@@ -29,7 +29,7 @@ def test_forward_train(self):
         module = DiscreteBCTorchModule.from_model_config(
             env.observation_space,
             env.action_space,
-            model_config={"hidden_dim": 32},
+            model_config={"fcnet_hiddens": [32]},
         )
 
         obs_shape = env.observation_space.shape
@@ -57,7 +57,7 @@ def test_forward(self):
         module = DiscreteBCTorchModule.from_model_config(
             env.observation_space,
             env.action_space,
-            model_config={"hidden_dim": 32},
+            model_config={"fcnet_hiddens": [32]},
         )
 
         obs_shape = env.observation_space.shape
@@ -73,7 +73,7 @@ def test_get_set_state(self):
         module = DiscreteBCTorchModule.from_model_config(
             env.observation_space,
             env.action_space,
-            model_config={"hidden_dim": 32},
+            model_config={"fcnet_hiddens": [32]},
         )
 
         state = module.get_state()
@@ -82,7 +82,7 @@ def test_get_set_state(self):
         module2 = DiscreteBCTorchModule.from_model_config(
             env.observation_space,
             env.action_space,
-            model_config={"hidden_dim": 32},
+            model_config={"fcnet_hiddens": [32]},
         )
         state2 = module2.get_state()
         check(state, state2, false=True)
@@ -96,7 +96,7 @@ def test_serialize_deserialize(self):
         module = DiscreteBCTorchModule.from_model_config(
             env.observation_space,
             env.action_space,
-            model_config={"hidden_dim": 32},
+            model_config={"fcnet_hiddens": [32]},
         )
 
         # create a new module from the old module

diff --git a/rllib/core/rl_trainer/tests/test_rl_trainer.py b/rllib/core/rl_trainer/tests/test_rl_trainer.py
@@ -22,7 +22,7 @@ def get_trainer() -> RLTrainer:
             module_class=DiscreteBCTFModule,
             observation_space=env.observation_space,
             action_space=env.action_space,
-            model_config={"hidden_dim": 32},
+            model_config={"fcnet_hiddens": [32]},
         ),
         optimizer_config={"lr": 1e-3},
         trainer_scaling_config=TrainerScalingConfig(),
@@ -127,7 +127,7 @@ def set_optimizer_fn(module):
                 module_class=DiscreteBCTFModule,
                 observation_space=env.observation_space,
                 action_space=env.action_space,
-                model_config={"hidden_dim": 16},
+                model_config={"fcnet_hiddens": [16]},
             ),
             set_optimizer_fn=set_optimizer_fn,
         )

diff --git a/rllib/core/rl_trainer/tests/test_trainer_runner_config.py b/rllib/core/rl_trainer/tests/test_trainer_runner_config.py
@@ -43,7 +43,7 @@ def test_trainer_runner_build_from_algorithm_config(self):
             AlgorithmConfig()
             .rl_module(rl_module_class=DiscreteBCTFModule)
             .training(rl_trainer_class=BCTfRLTrainer)
-            .training(model={"hidden_dim": 32})
+            .training(model={"fcnet_hiddens": [32]})
         )
         config.freeze()
         runner_config = config.get_trainer_runner_config(

diff --git a/rllib/core/rl_trainer/torch/tests/test_torch_rl_trainer.py b/rllib/core/rl_trainer/torch/tests/test_torch_rl_trainer.py
@@ -8,27 +8,15 @@
 from ray.rllib.core.rl_module.rl_module import SingleAgentRLModuleSpec
 from ray.rllib.core.rl_trainer.rl_trainer import RLTrainer
 from ray.rllib.core.testing.torch.bc_module import DiscreteBCTorchModule
-from ray.rllib.core.testing.torch.bc_rl_trainer import BCTorchRLTrainer
 from ray.rllib.policy.sample_batch import DEFAULT_POLICY_ID
 from ray.rllib.utils.test_utils import check, get_cartpole_dataset_reader
 from ray.rllib.utils.numpy import convert_to_numpy
-from ray.rllib.core.rl_trainer.scaling_config import TrainerScalingConfig
+from ray.rllib.core.testing.utils import get_rl_trainer
 
 
 def _get_trainer() -> RLTrainer:
     env = gym.make("CartPole-v1")
-
-    trainer = BCTorchRLTrainer(
-        module_spec=SingleAgentRLModuleSpec(
-            module_class=DiscreteBCTorchModule,
-            observation_space=env.observation_space,
-            action_space=env.action_space,
-            model_config={"hidden_dim": 32},
-        ),
-        optimizer_config={"lr": 1e-3},
-        trainer_scaling_config=TrainerScalingConfig(),
-    )
-
+    trainer = get_rl_trainer("torch", env)
     trainer.build()
 
     return trainer
@@ -125,7 +113,7 @@ def set_optimizer_fn(module):
                 module_class=DiscreteBCTorchModule,
                 observation_space=env.observation_space,
                 action_space=env.action_space,
-                model_config={"hidden_dim": 16},
+                model_config={"fcnet_hiddens": [16]},
             ),
             set_optimizer_fn=set_optimizer_fn,
         )

diff --git a/rllib/core/testing/bc_algorithm.py b/rllib/core/testing/bc_algorithm.py
@@ -0,0 +1,45 @@
+"""Contains example implementation of a custom algorithm.
+
+Note: It doesn't include any real use-case functionality; it only serves as an example
+to test the algorithm construction and customization.
+"""
+
+from ray.rllib.algorithms import Algorithm, AlgorithmConfig
+from ray.rllib.policy.torch_policy_v2 import TorchPolicyV2
+from ray.rllib.policy.eager_tf_policy_v2 import EagerTFPolicyV2
+from ray.rllib.core.testing.torch.bc_module import DiscreteBCTorchModule
+from ray.rllib.core.testing.torch.bc_rl_trainer import BCTorchRLTrainer
+from ray.rllib.core.testing.tf.bc_module import DiscreteBCTFModule
+from ray.rllib.core.testing.tf.bc_rl_trainer import BCTfRLTrainer
+
+
+class BCConfigTest(AlgorithmConfig):
+    def __init__(self, algo_class=None):
+        super().__init__(algo_class=algo_class or BCAlgorithmTest)
+
+    def get_default_rl_module_class(self):
+        if self.framework_str == "torch":
+            return DiscreteBCTorchModule
+        elif self.framework_str == "tf2":
+            return DiscreteBCTFModule
+
+    def get_default_rl_trainer_class(self):
+        if self.framework_str == "torch":
+            return BCTorchRLTrainer
+        elif self.framework_str == "tf2":
+            return BCTfRLTrainer
+
+
+class BCAlgorithmTest(Algorithm):
+    @classmethod
+    def get_default_policy_class(cls, config: AlgorithmConfig):
+        if config.framework_str == "torch":
+            return TorchPolicyV2
+        elif config.framework_str == "tf2":
+            return EagerTFPolicyV2
+        else:
+            raise ValueError("Unknown framework: {}".format(config.framework_str))
+
+    def training_step(self):
+        # do nothing.
+        return {}