diff --git a/rllib/BUILD b/rllib/BUILD index c2b458b4beb0..49271a17ded8 100644 --- a/rllib/BUILD +++ b/rllib/BUILD @@ -1887,6 +1887,13 @@ py_test( srcs = ["core/rl_trainer/torch/tests/test_torch_rl_trainer.py"] ) +py_test( + name = "test_bc_algorithm", + tags = ["team:rllib", "core"], + size = "medium", + srcs = ["core/testing/tests/test_bc_algorithm.py"] +) + # -------------------------------------------------------------------- # Models and Distributions # rllib/models/ diff --git a/rllib/core/rl_module/tests/test_marl_module.py b/rllib/core/rl_module/tests/test_marl_module.py index fa7a2f030525..52c1afd0a79f 100644 --- a/rllib/core/rl_module/tests/test_marl_module.py +++ b/rllib/core/rl_module/tests/test_marl_module.py @@ -19,12 +19,12 @@ def test_from_config(self): module1 = DiscreteBCTorchModule.from_model_config( env.observation_space, env.action_space, - model_config={"hidden_dim": 32}, + model_config={"fcnet_hiddens": [32]}, ) module2 = DiscreteBCTorchModule.from_model_config( env.observation_space, env.action_space, - model_config={"hidden_dim": 32}, + model_config={"fcnet_hiddens": [32]}, ) multi_agent_dict = {"module1": module1, "module2": module2} @@ -43,11 +43,11 @@ def test_from_multi_agent_config(self): "modules": { "module1": SingleAgentRLModuleSpec( module_class=DiscreteBCTorchModule, - model_config={"hidden_dim": 64}, + model_config={"fcnet_hiddens": [64]}, ), "module2": SingleAgentRLModuleSpec( module_class=DiscreteBCTorchModule, - model_config={"hidden_dim": 32}, + model_config={"fcnet_hiddens": [32]}, ), }, "observation_space": env.observation_space, # this is common @@ -68,7 +68,7 @@ def test_as_multi_agent(self): marl_module = DiscreteBCTorchModule.from_model_config( env.observation_space, env.action_space, - model_config={"hidden_dim": 32}, + model_config={"fcnet_hiddens": [32]}, ).as_multi_agent() self.assertNotIsInstance(marl_module, DiscreteBCTorchModule) @@ -87,7 +87,7 @@ def test_get_set_state(self): module = DiscreteBCTorchModule.from_model_config( env.observation_space, env.action_space, - model_config={"hidden_dim": 32}, + model_config={"fcnet_hiddens": [32]}, ).as_multi_agent() state = module.get_state() @@ -101,7 +101,7 @@ def test_get_set_state(self): module2 = DiscreteBCTorchModule.from_model_config( env.observation_space, env.action_space, - model_config={"hidden_dim": 32}, + model_config={"fcnet_hiddens": [32]}, ).as_multi_agent() state2 = module2.get_state() check(state, state2, false=True) @@ -119,7 +119,7 @@ def test_add_remove_modules(self): module = DiscreteBCTorchModule.from_model_config( env.observation_space, env.action_space, - model_config={"hidden_dim": 32}, + model_config={"fcnet_hiddens": [32]}, ).as_multi_agent() module.add_module( @@ -127,7 +127,7 @@ def test_add_remove_modules(self): DiscreteBCTorchModule.from_model_config( env.observation_space, env.action_space, - model_config={"hidden_dim": 32}, + model_config={"fcnet_hiddens": [32]}, ), ) self.assertEqual(set(module.keys()), {DEFAULT_POLICY_ID, "test"}) @@ -142,7 +142,7 @@ def test_add_remove_modules(self): DiscreteBCTorchModule.from_model_config( env.observation_space, env.action_space, - model_config={"hidden_dim": 32}, + model_config={"fcnet_hiddens": [32]}, ), ), ) @@ -152,7 +152,7 @@ def test_add_remove_modules(self): DiscreteBCTorchModule.from_model_config( env.observation_space, env.action_space, - model_config={"hidden_dim": 32}, + model_config={"fcnet_hiddens": [32]}, ), override=True, ) @@ -239,12 +239,12 @@ def test_serialize_deserialize(self): module1 = DiscreteBCTorchModule.from_model_config( env.observation_space, env.action_space, - model_config={"hidden_dim": 32}, + model_config={"fcnet_hiddens": [32]}, ) module2 = DiscreteBCTorchModule.from_model_config( env.observation_space, env.action_space, - model_config={"hidden_dim": 32}, + model_config={"fcnet_hiddens": [32]}, ) multi_agent_dict = {"module1": module1, "module2": module2} diff --git a/rllib/core/rl_module/tests/test_rl_module_specs.py b/rllib/core/rl_module/tests/test_rl_module_specs.py index 6a7fec50bf45..22db1bc3f823 100644 --- a/rllib/core/rl_module/tests/test_rl_module_specs.py +++ b/rllib/core/rl_module/tests/test_rl_module_specs.py @@ -33,7 +33,7 @@ def build(self): # this handles all implementation details config = { "input_dim": self.observation_space.shape[0], - "hidden_dim": self.model_config["hidden_dim"], + "hidden_dim": self.model_config["fcnet_hiddens"][0], "output_dim": self.action_space.n, } return self.module_class(**config) @@ -48,7 +48,7 @@ def test_single_agent_spec(self): module_class=module_class, observation_space=env.observation_space, action_space=env.action_space, - model_config={"hidden_dim": 64}, + model_config={"fcnet_hiddens": [64]}, ) module = spec.build() @@ -63,7 +63,7 @@ def test_customized_single_agent_spec(self): module_class=module_class, observation_space=env.observation_space, action_space=env.action_space, - model_config={"hidden_dim": 64}, + model_config={"fcnet_hiddens": [64]}, ) module = spec.build() self.assertIsInstance(module, module_class) @@ -81,7 +81,7 @@ def test_multi_agent_spec(self): module_class=module_class, observation_space=env.observation_space, action_space=env.action_space, - model_config={"hidden_dim": 32 * (i + 1)}, + model_config={"fcnet_hiddens": [32 * (i + 1)]}, ) spec = MultiAgentRLModuleSpec( @@ -118,7 +118,7 @@ def test_customized_multi_agent_spec(self): } ), action_space=gym.spaces.Discrete(action_dims[0]), - model_config={"hidden_dim": 128}, + model_config={"fcnet_hiddens": [128]}, ), "agent_2": SingleAgentRLModuleSpec( module_class=module_cls, @@ -133,7 +133,7 @@ def test_customized_multi_agent_spec(self): } ), action_space=gym.spaces.Discrete(action_dims[1]), - model_config={"hidden_dim": 128}, + model_config={"fcnet_hiddens": [128]}, ), }, ) diff --git a/rllib/core/rl_module/tf/tests/test_tf_rl_module.py b/rllib/core/rl_module/tf/tests/test_tf_rl_module.py index 2b147a883404..b09b42a3a681 100644 --- a/rllib/core/rl_module/tf/tests/test_tf_rl_module.py +++ b/rllib/core/rl_module/tf/tests/test_tf_rl_module.py @@ -18,7 +18,7 @@ def test_compilation(self): module = DiscreteBCTFModule.from_model_config( env.observation_space, env.action_space, - model_config={"hidden_dim": 32}, + model_config={"fcnet_hiddens": [32]}, ) self.assertIsInstance(module, TfRLModule) @@ -30,7 +30,7 @@ def test_forward_train(self): module = DiscreteBCTFModule.from_model_config( env.observation_space, env.action_space, - model_config={"hidden_dim": 32}, + model_config={"fcnet_hiddens": [32]}, ) obs_shape = env.observation_space.shape @@ -62,7 +62,7 @@ def test_forward(self): module = DiscreteBCTFModule.from_model_config( env.observation_space, env.action_space, - model_config={"hidden_dim": 32}, + model_config={"fcnet_hiddens": [32]}, ) obs_shape = env.observation_space.shape @@ -78,7 +78,7 @@ def test_get_set_state(self): module = DiscreteBCTFModule.from_model_config( env.observation_space, env.action_space, - model_config={"hidden_dim": 32}, + model_config={"fcnet_hiddens": [32]}, ) state = module.get_state() @@ -87,7 +87,7 @@ def test_get_set_state(self): module2 = DiscreteBCTFModule.from_model_config( env.observation_space, env.action_space, - model_config={"hidden_dim": 32}, + model_config={"fcnet_hiddens": [32]}, ) state2 = module2.get_state() check(state["policy"][0], state2["policy"][0], false=True) @@ -101,7 +101,7 @@ def test_serialize_deserialize(self): module = DiscreteBCTFModule.from_model_config( env.observation_space, env.action_space, - model_config={"hidden_dim": 32}, + model_config={"fcnet_hiddens": [32]}, ) # create a new module from the old module diff --git a/rllib/core/rl_module/torch/tests/test_torch_rl_module.py b/rllib/core/rl_module/torch/tests/test_torch_rl_module.py index ad3ff047e4a8..c106647efbce 100644 --- a/rllib/core/rl_module/torch/tests/test_torch_rl_module.py +++ b/rllib/core/rl_module/torch/tests/test_torch_rl_module.py @@ -17,7 +17,7 @@ def test_compilation(self): module = DiscreteBCTorchModule.from_model_config( env.observation_space, env.action_space, - model_config={"hidden_dim": 32}, + model_config={"fcnet_hiddens": [32]}, ) self.assertIsInstance(module, TorchRLModule) @@ -29,7 +29,7 @@ def test_forward_train(self): module = DiscreteBCTorchModule.from_model_config( env.observation_space, env.action_space, - model_config={"hidden_dim": 32}, + model_config={"fcnet_hiddens": [32]}, ) obs_shape = env.observation_space.shape @@ -57,7 +57,7 @@ def test_forward(self): module = DiscreteBCTorchModule.from_model_config( env.observation_space, env.action_space, - model_config={"hidden_dim": 32}, + model_config={"fcnet_hiddens": [32]}, ) obs_shape = env.observation_space.shape @@ -73,7 +73,7 @@ def test_get_set_state(self): module = DiscreteBCTorchModule.from_model_config( env.observation_space, env.action_space, - model_config={"hidden_dim": 32}, + model_config={"fcnet_hiddens": [32]}, ) state = module.get_state() @@ -82,7 +82,7 @@ def test_get_set_state(self): module2 = DiscreteBCTorchModule.from_model_config( env.observation_space, env.action_space, - model_config={"hidden_dim": 32}, + model_config={"fcnet_hiddens": [32]}, ) state2 = module2.get_state() check(state, state2, false=True) @@ -96,7 +96,7 @@ def test_serialize_deserialize(self): module = DiscreteBCTorchModule.from_model_config( env.observation_space, env.action_space, - model_config={"hidden_dim": 32}, + model_config={"fcnet_hiddens": [32]}, ) # create a new module from the old module diff --git a/rllib/core/rl_trainer/tests/test_rl_trainer.py b/rllib/core/rl_trainer/tests/test_rl_trainer.py index 6b431600cc79..7489c2a1225e 100644 --- a/rllib/core/rl_trainer/tests/test_rl_trainer.py +++ b/rllib/core/rl_trainer/tests/test_rl_trainer.py @@ -22,7 +22,7 @@ def get_trainer() -> RLTrainer: module_class=DiscreteBCTFModule, observation_space=env.observation_space, action_space=env.action_space, - model_config={"hidden_dim": 32}, + model_config={"fcnet_hiddens": [32]}, ), optimizer_config={"lr": 1e-3}, trainer_scaling_config=TrainerScalingConfig(), @@ -127,7 +127,7 @@ def set_optimizer_fn(module): module_class=DiscreteBCTFModule, observation_space=env.observation_space, action_space=env.action_space, - model_config={"hidden_dim": 16}, + model_config={"fcnet_hiddens": [16]}, ), set_optimizer_fn=set_optimizer_fn, ) diff --git a/rllib/core/rl_trainer/tests/test_trainer_runner_config.py b/rllib/core/rl_trainer/tests/test_trainer_runner_config.py index 46e215acd86d..4eacf477f3b6 100644 --- a/rllib/core/rl_trainer/tests/test_trainer_runner_config.py +++ b/rllib/core/rl_trainer/tests/test_trainer_runner_config.py @@ -43,7 +43,7 @@ def test_trainer_runner_build_from_algorithm_config(self): AlgorithmConfig() .rl_module(rl_module_class=DiscreteBCTFModule) .training(rl_trainer_class=BCTfRLTrainer) - .training(model={"hidden_dim": 32}) + .training(model={"fcnet_hiddens": [32]}) ) config.freeze() runner_config = config.get_trainer_runner_config( diff --git a/rllib/core/rl_trainer/torch/tests/test_torch_rl_trainer.py b/rllib/core/rl_trainer/torch/tests/test_torch_rl_trainer.py index 0be8428599e6..95b68da7ceb3 100644 --- a/rllib/core/rl_trainer/torch/tests/test_torch_rl_trainer.py +++ b/rllib/core/rl_trainer/torch/tests/test_torch_rl_trainer.py @@ -8,27 +8,15 @@ from ray.rllib.core.rl_module.rl_module import SingleAgentRLModuleSpec from ray.rllib.core.rl_trainer.rl_trainer import RLTrainer from ray.rllib.core.testing.torch.bc_module import DiscreteBCTorchModule -from ray.rllib.core.testing.torch.bc_rl_trainer import BCTorchRLTrainer from ray.rllib.policy.sample_batch import DEFAULT_POLICY_ID from ray.rllib.utils.test_utils import check, get_cartpole_dataset_reader from ray.rllib.utils.numpy import convert_to_numpy -from ray.rllib.core.rl_trainer.scaling_config import TrainerScalingConfig +from ray.rllib.core.testing.utils import get_rl_trainer def _get_trainer() -> RLTrainer: env = gym.make("CartPole-v1") - - trainer = BCTorchRLTrainer( - module_spec=SingleAgentRLModuleSpec( - module_class=DiscreteBCTorchModule, - observation_space=env.observation_space, - action_space=env.action_space, - model_config={"hidden_dim": 32}, - ), - optimizer_config={"lr": 1e-3}, - trainer_scaling_config=TrainerScalingConfig(), - ) - + trainer = get_rl_trainer("torch", env) trainer.build() return trainer @@ -125,7 +113,7 @@ def set_optimizer_fn(module): module_class=DiscreteBCTorchModule, observation_space=env.observation_space, action_space=env.action_space, - model_config={"hidden_dim": 16}, + model_config={"fcnet_hiddens": [16]}, ), set_optimizer_fn=set_optimizer_fn, ) diff --git a/rllib/core/testing/bc_algorithm.py b/rllib/core/testing/bc_algorithm.py new file mode 100644 index 000000000000..77e4f91fb911 --- /dev/null +++ b/rllib/core/testing/bc_algorithm.py @@ -0,0 +1,45 @@ +"""Contains example implementation of a custom algorithm. + +Note: It doesn't include any real use-case functionality; it only serves as an example +to test the algorithm construction and customization. +""" + +from ray.rllib.algorithms import Algorithm, AlgorithmConfig +from ray.rllib.policy.torch_policy_v2 import TorchPolicyV2 +from ray.rllib.policy.eager_tf_policy_v2 import EagerTFPolicyV2 +from ray.rllib.core.testing.torch.bc_module import DiscreteBCTorchModule +from ray.rllib.core.testing.torch.bc_rl_trainer import BCTorchRLTrainer +from ray.rllib.core.testing.tf.bc_module import DiscreteBCTFModule +from ray.rllib.core.testing.tf.bc_rl_trainer import BCTfRLTrainer + + +class BCConfigTest(AlgorithmConfig): + def __init__(self, algo_class=None): + super().__init__(algo_class=algo_class or BCAlgorithmTest) + + def get_default_rl_module_class(self): + if self.framework_str == "torch": + return DiscreteBCTorchModule + elif self.framework_str == "tf2": + return DiscreteBCTFModule + + def get_default_rl_trainer_class(self): + if self.framework_str == "torch": + return BCTorchRLTrainer + elif self.framework_str == "tf2": + return BCTfRLTrainer + + +class BCAlgorithmTest(Algorithm): + @classmethod + def get_default_policy_class(cls, config: AlgorithmConfig): + if config.framework_str == "torch": + return TorchPolicyV2 + elif config.framework_str == "tf2": + return EagerTFPolicyV2 + else: + raise ValueError("Unknown framework: {}".format(config.framework_str)) + + def training_step(self): + # do nothing. + return {} diff --git a/rllib/core/testing/tests/test_bc_algorithm.py b/rllib/core/testing/tests/test_bc_algorithm.py new file mode 100644 index 000000000000..9909e53e79ae --- /dev/null +++ b/rllib/core/testing/tests/test_bc_algorithm.py @@ -0,0 +1,44 @@ +import unittest + +import ray +from ray.rllib.core.testing.torch.bc_module import DiscreteBCTorchModule +from ray.rllib.core.testing.tf.bc_module import DiscreteBCTFModule + +from ray.rllib.core.testing.bc_algorithm import BCConfigTest +from ray.rllib.utils.test_utils import framework_iterator + + +class TestRLTrainer(unittest.TestCase): + @classmethod + def setUp(cls) -> None: + ray.init() + + @classmethod + def tearDown(cls) -> None: + ray.shutdown() + + def test_bc_algorithm(self): + + config = ( + BCConfigTest() + .rl_module(_enable_rl_module_api=True) + .training(_enable_rl_trainer_api=True, model={"fcnet_hiddens": [32, 32]}) + ) + + # TODO (Kourosh): Add tf2 support + for fw in framework_iterator(config, frameworks=("torch")): + algo = config.build(env="CartPole-v1") + policy = algo.get_policy() + rl_module = policy.model + + if fw == "torch": + assert isinstance(rl_module, DiscreteBCTorchModule) + elif fw == "tf": + assert isinstance(rl_module, DiscreteBCTFModule) + + +if __name__ == "__main__": + import pytest + import sys + + sys.exit(pytest.main(["-v", __file__])) diff --git a/rllib/core/testing/tf/bc_module.py b/rllib/core/testing/tf/bc_module.py index 200f0c101c90..f8d213241078 100644 --- a/rllib/core/testing/tf/bc_module.py +++ b/rllib/core/testing/tf/bc_module.py @@ -33,18 +33,6 @@ def __init__( self.policy = tf.keras.Sequential(layers) self._input_dim = input_dim - @override(RLModule) - def input_specs_exploration(self) -> SpecType: - return ["obs"] - - @override(RLModule) - def input_specs_inference(self) -> SpecType: - return ["obs"] - - @override(RLModule) - def input_specs_train(self) -> SpecType: - return ["obs"] - @override(RLModule) def output_specs_exploration(self) -> SpecType: return ["action_dist"] @@ -96,7 +84,7 @@ def from_model_config( config = { "input_dim": observation_space.shape[0], - "hidden_dim": model_config["hidden_dim"], + "hidden_dim": model_config["fcnet_hiddens"][0], "output_dim": action_space.n, } @@ -151,7 +139,7 @@ def build(self): # module module_spec = next(iter(self.module_specs.values())) global_dim = module_spec.observation_space["global"].shape[0] - hidden_dim = module_spec.model_config["hidden_dim"] + hidden_dim = module_spec.model_config["fcnet_hiddens"][0] shared_encoder = tf.keras.Sequential( [ tf.keras.Input(shape=(global_dim,)), diff --git a/rllib/core/testing/torch/bc_module.py b/rllib/core/testing/torch/bc_module.py index b654ab3d3eca..ce5fd81d64a9 100644 --- a/rllib/core/testing/torch/bc_module.py +++ b/rllib/core/testing/torch/bc_module.py @@ -30,18 +30,6 @@ def __init__( self.input_dim = input_dim - @override(RLModule) - def input_specs_exploration(self) -> SpecType: - return ["obs"] - - @override(RLModule) - def input_specs_inference(self) -> SpecType: - return ["obs"] - - @override(RLModule) - def input_specs_train(self) -> SpecType: - return ["obs"] - @override(RLModule) def output_specs_exploration(self) -> SpecType: return ["action_dist"] @@ -81,7 +69,7 @@ def from_model_config( config = { "input_dim": observation_space.shape[0], - "hidden_dim": model_config["hidden_dim"], + "hidden_dim": model_config["fcnet_hiddens"][0], "output_dim": action_space.n, } @@ -150,7 +138,7 @@ def build(self): # module module_spec = next(iter(self.module_specs.values())) global_dim = module_spec.observation_space["global"].shape[0] - hidden_dim = module_spec.model_config["hidden_dim"] + hidden_dim = module_spec.model_config["fcnet_hiddens"][0] shared_encoder = nn.Sequential( nn.Linear(global_dim, hidden_dim), nn.ReLU(), diff --git a/rllib/core/testing/utils.py b/rllib/core/testing/utils.py index 6dbf9f60fd17..15f268312817 100644 --- a/rllib/core/testing/utils.py +++ b/rllib/core/testing/utils.py @@ -60,7 +60,7 @@ def get_module_spec(framework: str, env: "gym.Env", is_multi_agent: bool = False module_class=get_module_class(framework), observation_space=env.observation_space, action_space=env.action_space, - model_config={"hidden_dim": 32}, + model_config={"fcnet_hiddens": [32]}, ) if is_multi_agent: