diff --git a/release/release_tests.yaml b/release/release_tests.yaml index d19fae81bdbb..1284e07b48a0 100644 --- a/release/release_tests.yaml +++ b/release/release_tests.yaml @@ -2029,7 +2029,7 @@ run: timeout: 1500 # expected 1000 seconds - script: python tuned_examples/appo/pong_appo.py --num-learners=1 --num-env-runners=12 --as-release-test + script: python example_algorithms/appo/pong_appo.py --num-learners=1 --num-env-runners=12 --as-release-test - name: rllib_learning_tests_halfcheetah_appo_torch python: "3.12" @@ -2047,7 +2047,7 @@ run: timeout: 3000 # expected 2000 seconds - script: python tuned_examples/appo/halfcheetah_appo.py --num-learners=1 --num-env-runners=12 --as-release-test + script: python example_algorithms/appo/halfcheetah_appo.py --num-learners=1 --num-env-runners=12 --as-release-test ######################## # Core Nightly Tests diff --git a/release/rllib_tests/example_algorithms b/release/rllib_tests/example_algorithms new file mode 120000 index 000000000000..64b332356b30 --- /dev/null +++ b/release/rllib_tests/example_algorithms @@ -0,0 +1 @@ +../../rllib/examples/algorithms \ No newline at end of file diff --git a/release/rllib_tests/tuned_examples b/release/rllib_tests/tuned_examples deleted file mode 120000 index 6258eb4f971f..000000000000 --- a/release/rllib_tests/tuned_examples +++ /dev/null @@ -1 +0,0 @@ -../../rllib/tuned_examples \ No newline at end of file diff --git a/rllib/BUILD.bazel b/rllib/BUILD.bazel index 411c0997aa2e..f2a2ef3d2a94 100644 --- a/rllib/BUILD.bazel +++ b/rllib/BUILD.bazel @@ -72,8 +72,6 @@ doctest( "**/examples/**", "**/tests/**", "**/test_*.py", - # Exclude `tuned_examples` *.py files. - "**/tuned_examples/**", # Deprecated modules "utils/memory.py", "offline/off_policy_estimator.py", @@ -150,7 +148,7 @@ py_test( # Tag: learning_tests # # This will test python/yaml config files -# inside rllib/tuned_examples/[algo-name] for actual learning success. +# inside rllib/examples/algorithms/[algo-name] for actual learning success. # -------------------------------------------------------------------- # APPO @@ -158,13 +156,13 @@ py_test( py_test( name = "learning_tests_cartpole_appo", size = "large", - srcs = ["tuned_examples/appo/cartpole_appo.py"], + srcs = ["examples/algorithms/appo/cartpole_appo.py"], args = [ "--as-test", "--num-cpus=7", "--num-env-runners=5", ], - main = "tuned_examples/appo/cartpole_appo.py", + main = "examples/algorithms/appo/cartpole_appo.py", tags = [ "exclusive", "learning_tests", @@ -174,26 +172,17 @@ py_test( ], ) -# TODO (sven): For some weird reason, this test runs extremely slow on the CI (not on cluster, not locally) -> taking this out for now ... -# py_test( -# name = "learning_tests_cartpole_appo_gpu", -# main = "tuned_examples/appo/cartpole_appo.py", -# tags = ["team:rllib", "exclusive", "learning_tests", "torch_only", "learning_tests_discrete", "learning_tests_pytorch_use_all_core", "gpu"], -# size = "large", -# srcs = ["tuned_examples/appo/cartpole_appo.py"], -# args = ["--as-test", "--num-gpus-per-learner=1", "--num-cpus=7", "--num-env-runners=5"] -# ) py_test( name = "learning_tests_cartpole_appo_multi_cpu", size = "large", - srcs = ["tuned_examples/appo/cartpole_appo.py"], + srcs = ["examples/algorithms/appo/cartpole_appo.py"], args = [ "--as-test", "--num-learners=2", "--num-cpus=9", "--num-env-runners=6", ], - main = "tuned_examples/appo/cartpole_appo.py", + main = "examples/algorithms/appo/cartpole_appo.py", tags = [ "exclusive", "learning_tests", @@ -207,7 +196,7 @@ py_test( py_test( name = "learning_tests_cartpole_appo_multi_gpu", size = "large", - srcs = ["tuned_examples/appo/cartpole_appo.py"], + srcs = ["examples/algorithms/appo/cartpole_appo.py"], args = [ "--as-test", "--num-learners=2", @@ -215,7 +204,7 @@ py_test( "--num-cpus=7", "--num-env-runners=6", ], - main = "tuned_examples/appo/cartpole_appo.py", + main = "examples/algorithms/appo/cartpole_appo.py", tags = [ "exclusive", "learning_tests", @@ -231,14 +220,14 @@ py_test( py_test( name = "learning_tests_multi_agent_cartpole_appo", size = "large", - srcs = ["tuned_examples/appo/multi_agent_cartpole_appo.py"], + srcs = ["examples/algorithms/appo/multi_agent_cartpole_appo.py"], args = [ "--as-test", "--num-agents=2", "--num-cpus=8", "--num-env-runners=6", ], - main = "tuned_examples/appo/multi_agent_cartpole_appo.py", + main = "examples/algorithms/appo/multi_agent_cartpole_appo.py", tags = [ "exclusive", "learning_tests", @@ -251,7 +240,7 @@ py_test( py_test( name = "learning_tests_multi_agent_cartpole_appo_gpu", size = "large", - srcs = ["tuned_examples/appo/multi_agent_cartpole_appo.py"], + srcs = ["examples/algorithms/appo/multi_agent_cartpole_appo.py"], args = [ "--as-test", "--num-agents=2", @@ -259,7 +248,7 @@ py_test( "--num-cpus=7", "--num-env-runners=5", ], - main = "tuned_examples/appo/multi_agent_cartpole_appo.py", + main = "examples/algorithms/appo/multi_agent_cartpole_appo.py", tags = [ "exclusive", "gpu", @@ -274,7 +263,7 @@ py_test( py_test( name = "learning_tests_multi_agent_cartpole_appo_multi_cpu", size = "large", - srcs = ["tuned_examples/appo/multi_agent_cartpole_appo.py"], + srcs = ["examples/algorithms/appo/multi_agent_cartpole_appo.py"], args = [ "--as-test", "--num-agents=2", @@ -282,7 +271,7 @@ py_test( "--num-cpus=9", "--num-env-runners=6", ], - main = "tuned_examples/appo/multi_agent_cartpole_appo.py", + main = "examples/algorithms/appo/multi_agent_cartpole_appo.py", tags = [ "exclusive", "learning_tests", @@ -298,7 +287,7 @@ py_test( py_test( name = "learning_tests_multi_agent_cartpole_appo_multi_gpu", size = "large", - srcs = ["tuned_examples/appo/multi_agent_cartpole_appo.py"], + srcs = ["examples/algorithms/appo/multi_agent_cartpole_appo.py"], args = [ "--as-test", "--num-agents=2", @@ -307,7 +296,7 @@ py_test( "--num-cpus=7", "--num-env-runners=6", ], - main = "tuned_examples/appo/multi_agent_cartpole_appo.py", + main = "examples/algorithms/appo/multi_agent_cartpole_appo.py", tags = [ "exclusive", "learning_tests", @@ -323,13 +312,13 @@ py_test( py_test( name = "learning_tests_stateless_cartpole_appo", size = "large", - srcs = ["tuned_examples/appo/stateless_cartpole_appo.py"], + srcs = ["examples/algorithms/appo/stateless_cartpole_appo.py"], args = [ "--as-test", "--num-cpus=8", "--num-env-runners=6", ], - main = "tuned_examples/appo/stateless_cartpole_appo.py", + main = "examples/algorithms/appo/stateless_cartpole_appo.py", tags = [ "exclusive", "learning_tests", @@ -343,7 +332,7 @@ py_test( py_test( name = "learning_tests_stateless_cartpole_appo_gpu", size = "large", - srcs = ["tuned_examples/appo/stateless_cartpole_appo.py"], + srcs = ["examples/algorithms/appo/stateless_cartpole_appo.py"], args = [ "--as-test", "--num-agents=2", @@ -351,7 +340,7 @@ py_test( "--num-cpus=7", "--num-env-runners=5", ], - main = "tuned_examples/appo/stateless_cartpole_appo.py", + main = "examples/algorithms/appo/stateless_cartpole_appo.py", tags = [ "exclusive", "gpu", @@ -366,14 +355,14 @@ py_test( py_test( name = "learning_tests_stateless_cartpole_appo_multi_cpu", size = "large", - srcs = ["tuned_examples/appo/stateless_cartpole_appo.py"], + srcs = ["examples/algorithms/appo/stateless_cartpole_appo.py"], args = [ "--as-test", "--num-learners=2", "--num-cpus=9", "--num-env-runners=6", ], - main = "tuned_examples/appo/stateless_cartpole_appo.py", + main = "examples/algorithms/appo/stateless_cartpole_appo.py", tags = [ "exclusive", "learning_tests", @@ -387,7 +376,7 @@ py_test( py_test( name = "learning_tests_stateless_cartpole_appo_multi_gpu", size = "large", - srcs = ["tuned_examples/appo/stateless_cartpole_appo.py"], + srcs = ["examples/algorithms/appo/stateless_cartpole_appo.py"], args = [ "--as-test", "--num-learners=2", @@ -395,7 +384,7 @@ py_test( "--num-cpus=7", "--num-env-runners=6", ], - main = "tuned_examples/appo/stateless_cartpole_appo.py", + main = "examples/algorithms/appo/stateless_cartpole_appo.py", tags = [ "exclusive", "learning_tests", @@ -407,50 +396,17 @@ py_test( ], ) -# MultiAgentStatelessCartPole -# py_test( -# name = "learning_tests_multi_agent_stateless_cartpole_appo", -# main = "tuned_examples/appo/multi_agent_stateless_cartpole_appo.py", -# tags = ["team:rllib", "exclusive", "learning_tests", "torch_only", "learning_tests_discrete", "learning_tests_pytorch_use_all_core"], -# size = "large", -# srcs = ["tuned_examples/appo/multi_agent_stateless_cartpole_appo.py"], -# args = ["--as-test"] -# ) -# py_test( -# name = "learning_tests_multi_agent_stateless_cartpole_appo_gpu", -# main = "tuned_examples/appo/multi_agent_stateless_cartpole_appo.py", -# tags = ["team:rllib", "exclusive", "learning_tests", "torch_only", "learning_tests_discrete", "learning_tests_pytorch_use_all_core", "gpu"], -# size = "large", -# srcs = ["tuned_examples/appo/multi_agent_stateless_cartpole_appo.py"], -# args = ["--as-test", "--num-agents=2", "--num-gpus-per-learner=1"] -# ) -# py_test( -# name = "learning_tests_multi_agent_stateless_cartpole_appo_multi_cpu", -# main = "tuned_examples/appo/multi_agent_stateless_cartpole_appo.py", -# tags = ["team:rllib", "exclusive", "learning_tests", "torch_only", "learning_tests_discrete", "learning_tests_pytorch_use_all_core"], -# size = "large", -# srcs = ["tuned_examples/appo/multi_agent_stateless_cartpole_appo.py"], -# args = ["--as-test", "--num-learners=2"] -# ) -# py_test( -# name = "learning_tests_multi_agent_stateless_cartpole_appo_multi_gpu", -# main = "tuned_examples/appo/multi_agent_stateless_cartpole_appo.py", -# tags = ["team:rllib", "exclusive", "learning_tests", "torch_only", "learning_tests_discrete", "learning_tests_pytorch_use_all_core", "multi_gpu"], -# size = "large", -# srcs = ["tuned_examples/appo/multi_agent_stateless_cartpole_appo.py"], -# args = ["--as-test", "--num-learners=2", "--num-gpus-per-learner=1"] -# ) # Pendulum py_test( name = "learning_tests_pendulum_appo", size = "large", - srcs = ["tuned_examples/appo/pendulum_appo.py"], + srcs = ["examples/algorithms/appo/pendulum_appo.py"], args = [ "--as-test", "--num-cpus=6", "--num-env-runners=4", ], - main = "tuned_examples/appo/pendulum_appo.py", + main = "examples/algorithms/appo/pendulum_appo.py", tags = [ "exclusive", "learning_tests", @@ -464,7 +420,7 @@ py_test( py_test( name = "learning_tests_multi_agent_pong_appo_multi_gpu", size = "large", - srcs = ["tuned_examples/appo/multi_agent_pong_appo.py"], + srcs = ["examples/algorithms/appo/multi_agent_pong_appo.py"], args = [ "--stop-iters=3", "--num-agents=2", @@ -472,7 +428,7 @@ py_test( "--num-gpus-per-learner=1", "--num-aggregator-actors-per-learner=1", ], - main = "tuned_examples/appo/multi_agent_pong_appo.py", + main = "examples/algorithms/appo/multi_agent_pong_appo.py", tags = [ "exclusive", "learning_tests", @@ -483,29 +439,12 @@ py_test( ], ) -#@OldAPIStack -py_test( - name = "learning_tests_multi_agent_cartpole_w_100_policies_appo_old_api_stack", - size = "large", - srcs = ["algorithms/tests/run_regression_tests.py"], - args = ["--dir=../tuned_examples/appo"], - data = ["tuned_examples/appo/multi-agent-cartpole-w-100-policies-appo.py"], - main = "algorithms/tests/run_regression_tests.py", - tags = [ - "exclusive", - "learning_tests", - "learning_tests_discrete", - "learning_tests_pytorch_use_all_core", - "team:rllib", - ], -) - # BC # CartPole py_test( name = "learning_tests_cartpole_bc", size = "medium", - srcs = ["tuned_examples/bc/cartpole_bc.py"], + srcs = ["examples/algorithms/bc/cartpole_bc.py"], args = [ "--as-test", ], @@ -513,7 +452,7 @@ py_test( data = [ "offline/tests/data/cartpole/cartpole-v1_large", ], - main = "tuned_examples/bc/cartpole_bc.py", + main = "examples/algorithms/bc/cartpole_bc.py", tags = [ "exclusive", "learning_tests", @@ -527,7 +466,7 @@ py_test( py_test( name = "learning_tests_cartpole_bc_gpu", size = "medium", - srcs = ["tuned_examples/bc/cartpole_bc.py"], + srcs = ["examples/algorithms/bc/cartpole_bc.py"], args = [ "--as-test", "--num-gpus-per-learner=1", @@ -536,7 +475,7 @@ py_test( data = [ "offline/tests/data/cartpole/cartpole-v1_large", ], - main = "tuned_examples/bc/cartpole_bc.py", + main = "examples/algorithms/bc/cartpole_bc.py", tags = [ "exclusive", "gpu", @@ -553,7 +492,7 @@ py_test( py_test( name = "learning_tests_cartpole_bc_with_offline_evaluation", size = "medium", - srcs = ["tuned_examples/bc/cartpole_bc_with_offline_evaluation.py"], + srcs = ["examples/algorithms/bc/cartpole_bc_with_offline_evaluation.py"], args = [ "--as-test", "--offline-evaluation-interval=1", @@ -563,7 +502,7 @@ py_test( data = [ "offline/tests/data/cartpole/cartpole-v1_large", ], - main = "tuned_examples/bc/cartpole_bc_with_offline_evaluation.py", + main = "examples/algorithms/bc/cartpole_bc_with_offline_evaluation.py", tags = [ "exclusive", "learning_tests", @@ -577,7 +516,7 @@ py_test( py_test( name = "learning_tests_cartpole_bc_with_offline_evaluation_gpu", size = "medium", - srcs = ["tuned_examples/bc/cartpole_bc_with_offline_evaluation.py"], + srcs = ["examples/algorithms/bc/cartpole_bc_with_offline_evaluation.py"], args = [ "--as-test", "--num-gpus-per-learner=1", @@ -589,7 +528,7 @@ py_test( data = [ "offline/tests/data/cartpole/cartpole-v1_large", ], - main = "tuned_examples/bc/cartpole_bc_with_offline_evaluation.py", + main = "examples/algorithms/bc/cartpole_bc_with_offline_evaluation.py", tags = [ "exclusive", "learning_tests", @@ -606,7 +545,7 @@ py_test( py_test( name = "learning_tests_pendulum_cql", size = "large", - srcs = ["tuned_examples/cql/pendulum_cql.py"], + srcs = ["examples/algorithms/cql/pendulum_cql.py"], args = [ "--as-test", ], @@ -614,7 +553,7 @@ py_test( data = [ "offline/tests/data/pendulum/pendulum-v1_enormous", ], - main = "tuned_examples/cql/pendulum_cql.py", + main = "examples/algorithms/cql/pendulum_cql.py", tags = [ "exclusive", "learning_tests", @@ -632,7 +571,7 @@ py_test( py_test( name = "learning_tests_pendulum_cql_gpu", size = "large", - srcs = ["tuned_examples/cql/pendulum_cql.py"], + srcs = ["examples/algorithms/cql/pendulum_cql.py"], args = [ "--as-test", "--num-gpus-per-learner=1", @@ -641,7 +580,7 @@ py_test( data = [ "offline/tests/data/pendulum/pendulum-v1_enormous", ], - main = "tuned_examples/cql/pendulum_cql.py", + main = "examples/algorithms/cql/pendulum_cql.py", tags = [ "exclusive", "gpu", @@ -661,11 +600,11 @@ py_test( py_test( name = "learning_tests_cartpole_dqn", size = "large", - srcs = ["tuned_examples/dqn/cartpole_dqn.py"], + srcs = ["examples/algorithms/dqn/cartpole_dqn.py"], args = [ "--as-test", ], - main = "tuned_examples/dqn/cartpole_dqn.py", + main = "examples/algorithms/dqn/cartpole_dqn.py", tags = [ "exclusive", "learning_tests", @@ -679,13 +618,13 @@ py_test( py_test( name = "learning_tests_cartpole_dqn_gpu", size = "large", - srcs = ["tuned_examples/dqn/cartpole_dqn.py"], + srcs = ["examples/algorithms/dqn/cartpole_dqn.py"], args = [ "--as-test", "--num-learners=1", "--num-gpus-per-learner=1", ], - main = "tuned_examples/dqn/cartpole_dqn.py", + main = "examples/algorithms/dqn/cartpole_dqn.py", tags = [ "exclusive", "gpu", @@ -700,12 +639,12 @@ py_test( py_test( name = "learning_tests_cartpole_dqn_multi_cpu", size = "large", - srcs = ["tuned_examples/dqn/cartpole_dqn.py"], + srcs = ["examples/algorithms/dqn/cartpole_dqn.py"], args = [ "--as-test", "--num-learners=2", ], - main = "tuned_examples/dqn/cartpole_dqn.py", + main = "examples/algorithms/dqn/cartpole_dqn.py", tags = [ "exclusive", "learning_tests", @@ -719,13 +658,13 @@ py_test( py_test( name = "learning_tests_cartpole_dqn_multi_gpu", size = "large", - srcs = ["tuned_examples/dqn/cartpole_dqn.py"], + srcs = ["examples/algorithms/dqn/cartpole_dqn.py"], args = [ "--as-test", "--num-learners=2", "--num-gpus-per-learner=1", ], - main = "tuned_examples/dqn/cartpole_dqn.py", + main = "examples/algorithms/dqn/cartpole_dqn.py", tags = [ "exclusive", "learning_tests", @@ -743,13 +682,13 @@ py_test( py_test( name = "learning_tests_multi_agent_cartpole_dqn", size = "large", - srcs = ["tuned_examples/dqn/multi_agent_cartpole_dqn.py"], + srcs = ["examples/algorithms/dqn/multi_agent_cartpole_dqn.py"], args = [ "--as-test", "--num-agents=2", "--num-cpus=4", ], - main = "tuned_examples/dqn/multi_agent_cartpole_dqn.py", + main = "examples/algorithms/dqn/multi_agent_cartpole_dqn.py", tags = [ "exclusive", "learning_tests", @@ -763,7 +702,7 @@ py_test( py_test( name = "learning_tests_multi_agent_cartpole_dqn_gpu", size = "large", - srcs = ["tuned_examples/dqn/multi_agent_cartpole_dqn.py"], + srcs = ["examples/algorithms/dqn/multi_agent_cartpole_dqn.py"], args = [ "--as-test", "--num-agents=2", @@ -771,7 +710,7 @@ py_test( "--num-learners=1", "--num-gpus-per-learner=1", ], - main = "tuned_examples/dqn/multi_agent_cartpole_dqn.py", + main = "examples/algorithms/dqn/multi_agent_cartpole_dqn.py", tags = [ "exclusive", "gpu", @@ -786,14 +725,14 @@ py_test( py_test( name = "learning_tests_multi_agent_cartpole_dqn_multi_cpu", size = "large", - srcs = ["tuned_examples/dqn/multi_agent_cartpole_dqn.py"], + srcs = ["examples/algorithms/dqn/multi_agent_cartpole_dqn.py"], args = [ "--as-test", "--num-agents=2", "--num-cpus=5", "--num-learners=2", ], - main = "tuned_examples/dqn/multi_agent_cartpole_dqn.py", + main = "examples/algorithms/dqn/multi_agent_cartpole_dqn.py", tags = [ "exclusive", "learning_tests", @@ -807,7 +746,7 @@ py_test( py_test( name = "learning_tests_multi_agent_cartpole_dqn_multi_gpu", size = "large", - srcs = ["tuned_examples/dqn/multi_agent_cartpole_dqn.py"], + srcs = ["examples/algorithms/dqn/multi_agent_cartpole_dqn.py"], args = [ "--as-test", "--num-agents=2", @@ -815,7 +754,7 @@ py_test( "--num-learners=2", "--num-gpus-per-learner=1", ], - main = "tuned_examples/dqn/multi_agent_cartpole_dqn.py", + main = "examples/algorithms/dqn/multi_agent_cartpole_dqn.py", tags = [ "exclusive", "learning_tests", @@ -827,41 +766,16 @@ py_test( ], ) -# DreamerV3 -# takes too long (up to 20-30min to learn -200 on 1 GPU) -# Pendulum -# py_test( -# name = "learning_tests_pendulum_dreamerv3_gpu", -# size = "large", -# srcs = ["tuned_examples/dreamerv3/pendulum_dreamerv3.py"], -# args = [ -# "--as-test", -# "--num-gpus-per-learner=1", -# "--num-learners=1", -# "--num-env-runners=4", -# ], -# main = "tuned_examples/dreamerv3/pendulum_dreamerv3.py", -# tags = [ -# "exclusive", -# "gpu", -# "learning_tests", -# "learning_tests_continuous", -# "learning_tests_pytorch_use_all_core", -# "team:rllib", -# "torch_only", -# ], -# ) - # IMPALA # CartPole py_test( name = "learning_tests_cartpole_impala", size = "large", - srcs = ["tuned_examples/impala/cartpole_impala.py"], + srcs = ["examples/algorithms/impala/cartpole_impala.py"], args = [ "--as-test", ], - main = "tuned_examples/impala/cartpole_impala.py", + main = "examples/algorithms/impala/cartpole_impala.py", tags = [ "exclusive", "learning_tests", @@ -874,12 +788,12 @@ py_test( py_test( name = "learning_tests_cartpole_impala_gpu", size = "large", - srcs = ["tuned_examples/impala/cartpole_impala.py"], + srcs = ["examples/algorithms/impala/cartpole_impala.py"], args = [ "--as-test", "--num-gpus-per-learner=1", ], - main = "tuned_examples/impala/cartpole_impala.py", + main = "examples/algorithms/impala/cartpole_impala.py", tags = [ "exclusive", "gpu", @@ -894,12 +808,12 @@ py_test( py_test( name = "learning_tests_cartpole_impala_multi_cpu", size = "large", - srcs = ["tuned_examples/impala/cartpole_impala.py"], + srcs = ["examples/algorithms/impala/cartpole_impala.py"], args = [ "--as-test", "--num-learners=2", ], - main = "tuned_examples/impala/cartpole_impala.py", + main = "examples/algorithms/impala/cartpole_impala.py", tags = [ "exclusive", "learning_tests", @@ -913,13 +827,13 @@ py_test( py_test( name = "learning_tests_cartpole_impala_multi_gpu", size = "large", - srcs = ["tuned_examples/impala/cartpole_impala.py"], + srcs = ["examples/algorithms/impala/cartpole_impala.py"], args = [ "--as-test", "--num-learners=2", "--num-gpus-per-learner=1", ], - main = "tuned_examples/impala/cartpole_impala.py", + main = "examples/algorithms/impala/cartpole_impala.py", tags = [ "exclusive", "learning_tests", @@ -935,13 +849,13 @@ py_test( py_test( name = "learning_tests_multi_agent_cartpole_impala", size = "large", - srcs = ["tuned_examples/impala/multi_agent_cartpole_impala.py"], + srcs = ["examples/algorithms/impala/multi_agent_cartpole_impala.py"], args = [ "--as-test", "--num-agents=2", "--num-cpus=6", ], - main = "tuned_examples/impala/multi_agent_cartpole_impala.py", + main = "examples/algorithms/impala/multi_agent_cartpole_impala.py", tags = [ "exclusive", "learning_tests", @@ -955,14 +869,14 @@ py_test( py_test( name = "learning_tests_multi_agent_cartpole_impala_gpu", size = "large", - srcs = ["tuned_examples/impala/multi_agent_cartpole_impala.py"], + srcs = ["examples/algorithms/impala/multi_agent_cartpole_impala.py"], args = [ "--as-test", "--num-agents=2", "--num-gpus-per-learner=1", "--num-cpus=6", ], - main = "tuned_examples/impala/multi_agent_cartpole_impala.py", + main = "examples/algorithms/impala/multi_agent_cartpole_impala.py", tags = [ "exclusive", "gpu", @@ -977,14 +891,14 @@ py_test( py_test( name = "learning_tests_multi_agent_cartpole_impala_multi_cpu", size = "large", - srcs = ["tuned_examples/impala/multi_agent_cartpole_impala.py"], + srcs = ["examples/algorithms/impala/multi_agent_cartpole_impala.py"], args = [ "--as-test", "--num-agents=2", "--num-learners=2", "--num-cpus=7", ], - main = "tuned_examples/impala/multi_agent_cartpole_impala.py", + main = "examples/algorithms/impala/multi_agent_cartpole_impala.py", tags = [ "exclusive", "learning_tests", @@ -998,7 +912,7 @@ py_test( py_test( name = "learning_tests_multi_agent_cartpole_impala_multi_gpu", size = "large", - srcs = ["tuned_examples/impala/multi_agent_cartpole_impala.py"], + srcs = ["examples/algorithms/impala/multi_agent_cartpole_impala.py"], args = [ "--as-test", "--num-agents=2", @@ -1006,7 +920,7 @@ py_test( "--num-gpus-per-learner=1", "--num-cpus=7", ], - main = "tuned_examples/impala/multi_agent_cartpole_impala.py", + main = "examples/algorithms/impala/multi_agent_cartpole_impala.py", tags = [ "exclusive", "learning_tests", @@ -1022,11 +936,11 @@ py_test( py_test( name = "learning_tests_stateless_cartpole_impala", size = "large", - srcs = ["tuned_examples/impala/stateless_cartpole_impala.py"], + srcs = ["examples/algorithms/impala/stateless_cartpole_impala.py"], args = [ "--as-test", ], - main = "tuned_examples/impala/stateless_cartpole_impala.py", + main = "examples/algorithms/impala/stateless_cartpole_impala.py", tags = [ "exclusive", "learning_tests", @@ -1040,13 +954,13 @@ py_test( py_test( name = "learning_tests_stateless_cartpole_impala_multi_gpu", size = "large", - srcs = ["tuned_examples/impala/stateless_cartpole_impala.py"], + srcs = ["examples/algorithms/impala/stateless_cartpole_impala.py"], args = [ "--as-test", "--num-learners=2", "--num-gpus-per-learner=1", ], - main = "tuned_examples/impala/stateless_cartpole_impala.py", + main = "examples/algorithms/impala/stateless_cartpole_impala.py", tags = [ "exclusive", "learning_tests", @@ -1062,11 +976,11 @@ py_test( py_test( name = "learning_tests_multi_agent_stateless_cartpole_impala", size = "large", - srcs = ["tuned_examples/impala/multi_agent_stateless_cartpole_impala.py"], + srcs = ["examples/algorithms/impala/multi_agent_stateless_cartpole_impala.py"], args = [ "--as-test", ], - main = "tuned_examples/impala/multi_agent_stateless_cartpole_impala.py", + main = "examples/algorithms/impala/multi_agent_stateless_cartpole_impala.py", tags = [ "exclusive", "learning_tests", @@ -1076,21 +990,13 @@ py_test( "torch_only", ], ) -# py_test( -# name = "learning_tests_multi_agent_stateless_cartpole_impala_multi_gpu", -# main = "tuned_examples/impala/multi_agent_stateless_cartpole_impala.py", -# tags = ["team:rllib", "exclusive", "learning_tests", "torch_only", "learning_tests_discrete", "learning_tests_pytorch_use_all_core", "multi_gpu"], -# size = "large", -# srcs = ["tuned_examples/impala/multi_agent_stateless_cartpole_impala.py"], -# args = ["--as-test", "--num-learners=2", "--num-gpus-per-learner=1"] -# ) # IQL # Pendulum-v1 (enormous) py_test( name = "learning_tests_pendulum_iql", size = "large", - srcs = ["tuned_examples/iql/pendulum_iql.py"], + srcs = ["examples/algorithms/iql/pendulum_iql.py"], args = [ "--as-test", "--num-cpus=32", @@ -1099,7 +1005,7 @@ py_test( data = [ "offline/tests/data/pendulum/pendulum-v1_enormous", ], - main = "tuned_examples/iql/pendulum_iql.py", + main = "examples/algorithms/iql/pendulum_iql.py", tags = [ "exclusive", "learning_tests", @@ -1114,7 +1020,7 @@ py_test( py_test( name = "learning_tests_pendulum_iql_gpu", size = "large", - srcs = ["tuned_examples/iql/pendulum_iql.py"], + srcs = ["examples/algorithms/iql/pendulum_iql.py"], args = [ "--as-test", "--num-cpus=32", @@ -1124,7 +1030,7 @@ py_test( data = [ "offline/tests/data/pendulum/pendulum-v1_enormous", ], - main = "tuned_examples/iql/pendulum_iql.py", + main = "examples/algorithms/iql/pendulum_iql.py", tags = [ "exclusive", "gpu", @@ -1141,7 +1047,7 @@ py_test( py_test( name = "learning_tests_cartpole_marwil", size = "large", - srcs = ["tuned_examples/marwil/cartpole_marwil.py"], + srcs = ["examples/algorithms/marwil/cartpole_marwil.py"], args = [ "--as-test", ], @@ -1149,7 +1055,7 @@ py_test( data = [ "offline/tests/data/cartpole/cartpole-v1_large", ], - main = "tuned_examples/marwil/cartpole_marwil.py", + main = "examples/algorithms/marwil/cartpole_marwil.py", tags = [ "exclusive", "learning_tests", @@ -1164,7 +1070,7 @@ py_test( py_test( name = "learning_tests_cartpole_marwil_gpu", size = "large", - srcs = ["tuned_examples/marwil/cartpole_marwil.py"], + srcs = ["examples/algorithms/marwil/cartpole_marwil.py"], args = [ "--as-test", "--num-gpus-per-learner=1", @@ -1173,7 +1079,7 @@ py_test( data = [ "offline/tests/data/cartpole/cartpole-v1_large", ], - main = "tuned_examples/marwil/cartpole_marwil.py", + main = "examples/algorithms/marwil/cartpole_marwil.py", tags = [ "exclusive", "gpu", @@ -1190,11 +1096,11 @@ py_test( py_test( name = "learning_tests_cartpole_ppo", size = "large", - srcs = ["tuned_examples/ppo/cartpole_ppo.py"], + srcs = ["examples/algorithms/ppo/cartpole_ppo.py"], args = [ "--as-test", ], - main = "tuned_examples/ppo/cartpole_ppo.py", + main = "examples/algorithms/ppo/cartpole_ppo.py", tags = [ "exclusive", "learning_tests", @@ -1207,13 +1113,13 @@ py_test( py_test( name = "learning_tests_cartpole_ppo_gpu", size = "large", - srcs = ["tuned_examples/ppo/cartpole_ppo.py"], + srcs = ["examples/algorithms/ppo/cartpole_ppo.py"], args = [ "--as-test", "--num-learners=1", "--num-gpus-per-learner=1", ], - main = "tuned_examples/ppo/cartpole_ppo.py", + main = "examples/algorithms/ppo/cartpole_ppo.py", tags = [ "exclusive", "gpu", @@ -1228,12 +1134,12 @@ py_test( py_test( name = "learning_tests_cartpole_ppo_multi_cpu", size = "large", - srcs = ["tuned_examples/ppo/cartpole_ppo.py"], + srcs = ["examples/algorithms/ppo/cartpole_ppo.py"], args = [ "--as-test", "--num-learners=2", ], - main = "tuned_examples/ppo/cartpole_ppo.py", + main = "examples/algorithms/ppo/cartpole_ppo.py", tags = [ "exclusive", "learning_tests", @@ -1247,13 +1153,13 @@ py_test( py_test( name = "learning_tests_cartpole_ppo_multi_gpu", size = "large", - srcs = ["tuned_examples/ppo/cartpole_ppo.py"], + srcs = ["examples/algorithms/ppo/cartpole_ppo.py"], args = [ "--as-test", "--num-learners=2", "--num-gpus-per-learner=1", ], - main = "tuned_examples/ppo/cartpole_ppo.py", + main = "examples/algorithms/ppo/cartpole_ppo.py", tags = [ "exclusive", "learning_tests", @@ -1269,12 +1175,12 @@ py_test( py_test( name = "learning_tests_multi_agent_cartpole_ppo", size = "large", - srcs = ["tuned_examples/ppo/multi_agent_cartpole_ppo.py"], + srcs = ["examples/algorithms/ppo/multi_agent_cartpole_ppo.py"], args = [ "--as-test", "--num-agents=2", ], - main = "tuned_examples/ppo/multi_agent_cartpole_ppo.py", + main = "examples/algorithms/ppo/multi_agent_cartpole_ppo.py", tags = [ "exclusive", "learning_tests", @@ -1287,14 +1193,14 @@ py_test( py_test( name = "learning_tests_multi_agent_cartpole_ppo_gpu", size = "large", - srcs = ["tuned_examples/ppo/multi_agent_cartpole_ppo.py"], + srcs = ["examples/algorithms/ppo/multi_agent_cartpole_ppo.py"], args = [ "--as-test", "--num-agents=2", "--num-learners=1", "--num-gpus-per-learner=1", ], - main = "tuned_examples/ppo/multi_agent_cartpole_ppo.py", + main = "examples/algorithms/ppo/multi_agent_cartpole_ppo.py", tags = [ "exclusive", "gpu", @@ -1309,13 +1215,13 @@ py_test( py_test( name = "learning_tests_multi_agent_cartpole_ppo_multi_cpu", size = "large", - srcs = ["tuned_examples/ppo/multi_agent_cartpole_ppo.py"], + srcs = ["examples/algorithms/ppo/multi_agent_cartpole_ppo.py"], args = [ "--as-test", "--num-agents=2", "--num-learners=2", ], - main = "tuned_examples/ppo/multi_agent_cartpole_ppo.py", + main = "examples/algorithms/ppo/multi_agent_cartpole_ppo.py", tags = [ "exclusive", "learning_tests", @@ -1329,14 +1235,14 @@ py_test( py_test( name = "learning_tests_multi_agent_cartpole_ppo_multi_gpu", size = "large", - srcs = ["tuned_examples/ppo/multi_agent_cartpole_ppo.py"], + srcs = ["examples/algorithms/ppo/multi_agent_cartpole_ppo.py"], args = [ "--as-test", "--num-agents=2", "--num-learners=2", "--num-gpus-per-learner=1", ], - main = "tuned_examples/ppo/multi_agent_cartpole_ppo.py", + main = "examples/algorithms/ppo/multi_agent_cartpole_ppo.py", tags = [ "exclusive", "learning_tests", @@ -1352,11 +1258,11 @@ py_test( py_test( name = "learning_tests_cartpole_truncated_ppo", size = "large", - srcs = ["tuned_examples/ppo/cartpole_truncated_ppo.py"], + srcs = ["examples/algorithms/ppo/cartpole_truncated_ppo.py"], args = [ "--as-test", ], - main = "tuned_examples/ppo/cartpole_truncated_ppo.py", + main = "examples/algorithms/ppo/cartpole_truncated_ppo.py", tags = [ "exclusive", "learning_tests", @@ -1370,11 +1276,11 @@ py_test( py_test( name = "learning_tests_stateless_cartpole_ppo", size = "large", - srcs = ["tuned_examples/ppo/stateless_cartpole_ppo.py"], + srcs = ["examples/algorithms/ppo/stateless_cartpole_ppo.py"], args = [ "--as-test", ], - main = "tuned_examples/ppo/stateless_cartpole_ppo.py", + main = "examples/algorithms/ppo/stateless_cartpole_ppo.py", tags = [ "exclusive", "learning_tests", @@ -1387,13 +1293,13 @@ py_test( py_test( name = "learning_tests_stateless_cartpole_ppo_gpu", size = "large", - srcs = ["tuned_examples/ppo/stateless_cartpole_ppo.py"], + srcs = ["examples/algorithms/ppo/stateless_cartpole_ppo.py"], args = [ "--as-test", "--num-learners=1", "--num-gpus-per-learner=1", ], - main = "tuned_examples/ppo/stateless_cartpole_ppo.py", + main = "examples/algorithms/ppo/stateless_cartpole_ppo.py", tags = [ "exclusive", "gpu", @@ -1408,12 +1314,12 @@ py_test( py_test( name = "learning_tests_stateless_cartpole_ppo_multi_cpu", size = "large", - srcs = ["tuned_examples/ppo/stateless_cartpole_ppo.py"], + srcs = ["examples/algorithms/ppo/stateless_cartpole_ppo.py"], args = [ "--as-test", "--num-learners=2", ], - main = "tuned_examples/ppo/stateless_cartpole_ppo.py", + main = "examples/algorithms/ppo/stateless_cartpole_ppo.py", tags = [ "exclusive", "learning_tests", @@ -1427,13 +1333,13 @@ py_test( py_test( name = "learning_tests_stateless_cartpole_ppo_multi_gpu", size = "large", - srcs = ["tuned_examples/ppo/stateless_cartpole_ppo.py"], + srcs = ["examples/algorithms/ppo/stateless_cartpole_ppo.py"], args = [ "--as-test", "--num-learners=2", "--num-gpus-per-learner=1", ], - main = "tuned_examples/ppo/stateless_cartpole_ppo.py", + main = "examples/algorithms/ppo/stateless_cartpole_ppo.py", tags = [ "exclusive", "learning_tests", @@ -1449,12 +1355,12 @@ py_test( py_test( name = "learning_tests_multi_agent_stateless_cartpole_ppo", size = "large", - srcs = ["tuned_examples/ppo/multi_agent_stateless_cartpole_ppo.py"], + srcs = ["examples/algorithms/ppo/multi_agent_stateless_cartpole_ppo.py"], args = [ "--as-test", "--num-agents=2", ], - main = "tuned_examples/ppo/multi_agent_stateless_cartpole_ppo.py", + main = "examples/algorithms/ppo/multi_agent_stateless_cartpole_ppo.py", tags = [ "exclusive", "learning_tests", @@ -1467,14 +1373,14 @@ py_test( py_test( name = "learning_tests_multi_agent_stateless_cartpole_ppo_gpu", size = "large", - srcs = ["tuned_examples/ppo/multi_agent_stateless_cartpole_ppo.py"], + srcs = ["examples/algorithms/ppo/multi_agent_stateless_cartpole_ppo.py"], args = [ "--as-test", "--num-agents=2", "--num-learners=1", "--num-gpus-per-learner=1", ], - main = "tuned_examples/ppo/multi_agent_stateless_cartpole_ppo.py", + main = "examples/algorithms/ppo/multi_agent_stateless_cartpole_ppo.py", tags = [ "exclusive", "gpu", @@ -1489,13 +1395,13 @@ py_test( py_test( name = "learning_tests_multi_agent_stateless_cartpole_ppo_multi_cpu", size = "large", - srcs = ["tuned_examples/ppo/multi_agent_stateless_cartpole_ppo.py"], + srcs = ["examples/algorithms/ppo/multi_agent_stateless_cartpole_ppo.py"], args = [ "--as-test", "--num-agents=2", "--num-learners=2", ], - main = "tuned_examples/ppo/multi_agent_stateless_cartpole_ppo.py", + main = "examples/algorithms/ppo/multi_agent_stateless_cartpole_ppo.py", tags = [ "exclusive", "learning_tests", @@ -1509,14 +1415,14 @@ py_test( py_test( name = "learning_tests_multi_agent_stateless_cartpole_ppo_multi_gpu", size = "large", - srcs = ["tuned_examples/ppo/multi_agent_stateless_cartpole_ppo.py"], + srcs = ["examples/algorithms/ppo/multi_agent_stateless_cartpole_ppo.py"], args = [ "--as-test", "--num-agents=2", "--num-learners=2", "--num-gpus-per-learner=1", ], - main = "tuned_examples/ppo/multi_agent_stateless_cartpole_ppo.py", + main = "examples/algorithms/ppo/multi_agent_stateless_cartpole_ppo.py", tags = [ "exclusive", "learning_tests", @@ -1532,13 +1438,13 @@ py_test( py_test( name = "learning_tests_multi_agent_footsies_ppo", size = "large", - srcs = ["tuned_examples/ppo/multi_agent_footsies_ppo.py"], + srcs = ["examples/algorithms/ppo/multi_agent_footsies_ppo.py"], args = [ "--as-test", "--num-env-runners=6", "--evaluation-num-env-runners=2", ], - main = "tuned_examples/ppo/multi_agent_footsies_ppo.py", + main = "examples/algorithms/ppo/multi_agent_footsies_ppo.py", tags = [ "exclusive", "learning_tests", @@ -1550,7 +1456,7 @@ py_test( py_test( name = "learning_tests_multi_agent_footsies_ppo_gpu", size = "large", - srcs = ["tuned_examples/ppo/multi_agent_footsies_ppo.py"], + srcs = ["examples/algorithms/ppo/multi_agent_footsies_ppo.py"], args = [ "--as-test", "--num-env-runners=20", @@ -1558,7 +1464,7 @@ py_test( "--num-learners=1", "--num-gpus-per-learner=1", ], - main = "tuned_examples/ppo/multi_agent_footsies_ppo.py", + main = "examples/algorithms/ppo/multi_agent_footsies_ppo.py", tags = [ "exclusive", "learning_tests", @@ -1571,14 +1477,14 @@ py_test( py_test( name = "learning_tests_multi_agent_footsies_ppo_multi_cpu", size = "large", - srcs = ["tuned_examples/ppo/multi_agent_footsies_ppo.py"], + srcs = ["examples/algorithms/ppo/multi_agent_footsies_ppo.py"], args = [ "--as-test", "--num-env-runners=6", "--evaluation-num-env-runners=2", "--num-learners=2", ], - main = "tuned_examples/ppo/multi_agent_footsies_ppo.py", + main = "examples/algorithms/ppo/multi_agent_footsies_ppo.py", tags = [ "exclusive", "learning_tests", @@ -1590,7 +1496,7 @@ py_test( py_test( name = "learning_tests_multi_agent_footsies_ppo_multi_gpu", size = "large", - srcs = ["tuned_examples/ppo/multi_agent_footsies_ppo.py"], + srcs = ["examples/algorithms/ppo/multi_agent_footsies_ppo.py"], args = [ "--as-test", "--num-env-runners=20", @@ -1598,7 +1504,7 @@ py_test( "--num-learners=2", "--num-gpus-per-learner=1", ], - main = "tuned_examples/ppo/multi_agent_footsies_ppo.py", + main = "examples/algorithms/ppo/multi_agent_footsies_ppo.py", tags = [ "exclusive", "learning_tests", @@ -1612,11 +1518,11 @@ py_test( py_test( name = "learning_tests_pendulum_ppo", size = "large", - srcs = ["tuned_examples/ppo/pendulum_ppo.py"], + srcs = ["examples/algorithms/ppo/pendulum_ppo.py"], args = [ "--as-test", ], - main = "tuned_examples/ppo/pendulum_ppo.py", + main = "examples/algorithms/ppo/pendulum_ppo.py", tags = [ "exclusive", "learning_tests", @@ -1629,13 +1535,13 @@ py_test( py_test( name = "learning_tests_pendulum_ppo_gpu", size = "large", - srcs = ["tuned_examples/ppo/pendulum_ppo.py"], + srcs = ["examples/algorithms/ppo/pendulum_ppo.py"], args = [ "--as-test", "--num-learners=1", "--num-gpus-per-learner=1", ], - main = "tuned_examples/ppo/pendulum_ppo.py", + main = "examples/algorithms/ppo/pendulum_ppo.py", tags = [ "exclusive", "gpu", @@ -1650,12 +1556,12 @@ py_test( py_test( name = "learning_tests_pendulum_ppo_multi_cpu", size = "large", - srcs = ["tuned_examples/ppo/pendulum_ppo.py"], + srcs = ["examples/algorithms/ppo/pendulum_ppo.py"], args = [ "--as-test", "--num-learners=2", ], - main = "tuned_examples/ppo/pendulum_ppo.py", + main = "examples/algorithms/ppo/pendulum_ppo.py", tags = [ "exclusive", "learning_tests", @@ -1669,13 +1575,13 @@ py_test( py_test( name = "learning_tests_pendulum_ppo_multi_gpu", size = "large", - srcs = ["tuned_examples/ppo/pendulum_ppo.py"], + srcs = ["examples/algorithms/ppo/pendulum_ppo.py"], args = [ "--as-test", "--num-learners=2", "--num-gpus-per-learner=1", ], - main = "tuned_examples/ppo/pendulum_ppo.py", + main = "examples/algorithms/ppo/pendulum_ppo.py", tags = [ "exclusive", "learning_tests", @@ -1691,12 +1597,12 @@ py_test( py_test( name = "learning_tests_multi_agent_pendulum_ppo", size = "large", - srcs = ["tuned_examples/ppo/multi_agent_pendulum_ppo.py"], + srcs = ["examples/algorithms/ppo/multi_agent_pendulum_ppo.py"], args = [ "--as-test", "--num-agents=2", ], - main = "tuned_examples/ppo/multi_agent_pendulum_ppo.py", + main = "examples/algorithms/ppo/multi_agent_pendulum_ppo.py", tags = [ "exclusive", "learning_tests", @@ -1709,14 +1615,14 @@ py_test( py_test( name = "learning_tests_multi_agent_pendulum_ppo_gpu", size = "large", - srcs = ["tuned_examples/ppo/multi_agent_pendulum_ppo.py"], + srcs = ["examples/algorithms/ppo/multi_agent_pendulum_ppo.py"], args = [ "--as-test", "--num-agents=2", "--num-learners=1", "--num-gpus-per-learner=1", ], - main = "tuned_examples/ppo/multi_agent_pendulum_ppo.py", + main = "examples/algorithms/ppo/multi_agent_pendulum_ppo.py", tags = [ "exclusive", "gpu", @@ -1731,13 +1637,13 @@ py_test( py_test( name = "learning_tests_multi_agent_pendulum_ppo_multi_cpu", size = "large", - srcs = ["tuned_examples/ppo/multi_agent_pendulum_ppo.py"], + srcs = ["examples/algorithms/ppo/multi_agent_pendulum_ppo.py"], args = [ "--as-test", "--num-agents=2", "--num-learners=2", ], - main = "tuned_examples/ppo/multi_agent_pendulum_ppo.py", + main = "examples/algorithms/ppo/multi_agent_pendulum_ppo.py", tags = [ "exclusive", "learning_tests", @@ -1751,14 +1657,14 @@ py_test( py_test( name = "learning_tests_multi_agent_pendulum_ppo_multi_gpu", size = "large", - srcs = ["tuned_examples/ppo/multi_agent_pendulum_ppo.py"], + srcs = ["examples/algorithms/ppo/multi_agent_pendulum_ppo.py"], args = [ "--as-test", "--num-agents=2", "--num-learners=2", "--num-gpus-per-learner=1", ], - main = "tuned_examples/ppo/multi_agent_pendulum_ppo.py", + main = "examples/algorithms/ppo/multi_agent_pendulum_ppo.py", tags = [ "exclusive", "learning_tests", @@ -1775,11 +1681,11 @@ py_test( py_test( name = "learning_tests_mountaincar_sac", size = "large", - srcs = ["tuned_examples/sac/mountaincar_sac.py"], + srcs = ["examples/algorithms/sac/mountaincar_sac.py"], args = [ "--as-test", ], - main = "tuned_examples/sac/mountaincar_sac.py", + main = "examples/algorithms/sac/mountaincar_sac.py", tags = [ "exclusive", "learning_tests", @@ -1792,13 +1698,13 @@ py_test( py_test( name = "learning_tests_mountaincar_sac_gpu", size = "large", - srcs = ["tuned_examples/sac/mountaincar_sac.py"], + srcs = ["examples/algorithms/sac/mountaincar_sac.py"], args = [ "--as-test", "--num-learners=1", "--num-gpus-per-learner=1", ], - main = "tuned_examples/sac/mountaincar_sac.py", + main = "examples/algorithms/sac/mountaincar_sac.py", tags = [ "exclusive", "gpu", @@ -1812,12 +1718,12 @@ py_test( py_test( name = "learning_tests_mountaincar_sac_multi_cpu", size = "large", - srcs = ["tuned_examples/sac/mountaincar_sac.py"], + srcs = ["examples/algorithms/sac/mountaincar_sac.py"], args = [ "--as-test", "--num-learners=2", ], - main = "tuned_examples/sac/mountaincar_sac.py", + main = "examples/algorithms/sac/mountaincar_sac.py", tags = [ "exclusive", "learning_tests", @@ -1830,13 +1736,13 @@ py_test( py_test( name = "learning_tests_mountaincar_sac_multi_gpu", size = "large", - srcs = ["tuned_examples/sac/mountaincar_sac.py"], + srcs = ["examples/algorithms/sac/mountaincar_sac.py"], args = [ "--as-test", "--num-learners=2", "--num-gpus-per-learner=1", ], - main = "tuned_examples/sac/mountaincar_sac.py", + main = "examples/algorithms/sac/mountaincar_sac.py", tags = [ "exclusive", "learning_tests", @@ -1851,11 +1757,11 @@ py_test( py_test( name = "learning_tests_pendulum_sac", size = "large", - srcs = ["tuned_examples/sac/pendulum_sac.py"], + srcs = ["examples/algorithms/sac/pendulum_sac.py"], args = [ "--as-test", ], - main = "tuned_examples/sac/pendulum_sac.py", + main = "examples/algorithms/sac/pendulum_sac.py", tags = [ "exclusive", "learning_tests", @@ -1868,13 +1774,13 @@ py_test( py_test( name = "learning_tests_pendulum_sac_gpu", size = "large", - srcs = ["tuned_examples/sac/pendulum_sac.py"], + srcs = ["examples/algorithms/sac/pendulum_sac.py"], args = [ "--as-test", "--num-learners=1", "--num-gpus-per-learner=1", ], - main = "tuned_examples/sac/pendulum_sac.py", + main = "examples/algorithms/sac/pendulum_sac.py", tags = [ "exclusive", "gpu", @@ -1888,12 +1794,12 @@ py_test( py_test( name = "learning_tests_pendulum_sac_multi_cpu", size = "large", - srcs = ["tuned_examples/sac/pendulum_sac.py"], + srcs = ["examples/algorithms/sac/pendulum_sac.py"], args = [ "--as-test", "--num-learners=2", ], - main = "tuned_examples/sac/pendulum_sac.py", + main = "examples/algorithms/sac/pendulum_sac.py", tags = [ "exclusive", "learning_tests", @@ -1906,13 +1812,13 @@ py_test( py_test( name = "learning_tests_pendulum_sac_multi_gpu", size = "large", - srcs = ["tuned_examples/sac/pendulum_sac.py"], + srcs = ["examples/algorithms/sac/pendulum_sac.py"], args = [ "--as-test", "--num-learners=2", "--num-gpus-per-learner=1", ], - main = "tuned_examples/sac/pendulum_sac.py", + main = "examples/algorithms/sac/pendulum_sac.py", tags = [ "exclusive", "learning_tests", @@ -1927,13 +1833,13 @@ py_test( py_test( name = "learning_tests_multi_agent_pendulum_sac", size = "large", - srcs = ["tuned_examples/sac/multi_agent_pendulum_sac.py"], + srcs = ["examples/algorithms/sac/multi_agent_pendulum_sac.py"], args = [ "--as-test", "--num-agents=2", "--num-cpus=4", ], - main = "tuned_examples/sac/multi_agent_pendulum_sac.py", + main = "examples/algorithms/sac/multi_agent_pendulum_sac.py", tags = [ "exclusive", "learning_tests", @@ -1946,7 +1852,7 @@ py_test( py_test( name = "learning_tests_multi_agent_pendulum_sac_gpu", size = "large", - srcs = ["tuned_examples/sac/multi_agent_pendulum_sac.py"], + srcs = ["examples/algorithms/sac/multi_agent_pendulum_sac.py"], args = [ "--as-test", "--num-agents=2", @@ -1954,7 +1860,7 @@ py_test( "--num-learners=1", "--num-gpus-per-learner=1", ], - main = "tuned_examples/sac/multi_agent_pendulum_sac.py", + main = "examples/algorithms/sac/multi_agent_pendulum_sac.py", tags = [ "exclusive", "gpu", @@ -1968,12 +1874,12 @@ py_test( py_test( name = "learning_tests_multi_agent_pendulum_sac_multi_cpu", size = "large", - srcs = ["tuned_examples/sac/multi_agent_pendulum_sac.py"], + srcs = ["examples/algorithms/sac/multi_agent_pendulum_sac.py"], args = [ "--num-agents=2", "--num-learners=2", ], - main = "tuned_examples/sac/multi_agent_pendulum_sac.py", + main = "examples/algorithms/sac/multi_agent_pendulum_sac.py", tags = [ "exclusive", "learning_tests", @@ -1986,13 +1892,13 @@ py_test( py_test( name = "learning_tests_multi_agent_pendulum_sac_multi_gpu", size = "large", - srcs = ["tuned_examples/sac/multi_agent_pendulum_sac.py"], + srcs = ["examples/algorithms/sac/multi_agent_pendulum_sac.py"], args = [ "--num-agents=2", "--num-learners=2", "--num-gpus-per-learner=1", ], - main = "tuned_examples/sac/multi_agent_pendulum_sac.py", + main = "examples/algorithms/sac/multi_agent_pendulum_sac.py", tags = [ "exclusive", "learning_tests", @@ -2428,21 +2334,6 @@ py_test( ], ) -# -------------------------------------------------------------------- -# ConnectorV2 tests -# rllib/connector/ -# -# Tag: connector_v2 -# -------------------------------------------------------------------- - -# TODO (sven): Add these tests in a separate PR. -# py_test( -# name = "connectors/tests/test_connector_v2", -# tags = ["team:rllib", "connector_v2"], -# size = "small", -# srcs = ["connectors/tests/test_connector_v2.py"] -# ) - # -------------------------------------------------------------------- # Env tests # rllib/env/ diff --git a/rllib/algorithms/dreamerv3/README.md b/rllib/algorithms/dreamerv3/README.md index 8db9fcbae9f1..f36de087c00d 100644 --- a/rllib/algorithms/dreamerv3/README.md +++ b/rllib/algorithms/dreamerv3/README.md @@ -42,18 +42,18 @@ Here are some examples on how to set these config settings within your `DreamerV [documentation page here](https://docs.ray.io/en/latest/rllib/index.html#rllib-in-60-seconds). Use the config examples and templates in the -[tuned_examples folder](../../tuned_examples/dreamerv3) +[examples folder](../../examples/algorithms/dreamerv3) in combination with the following scripts and command lines in order to run RLlib's DreamerV3 algorithm in your experiments: -### [Atari100k](../../tuned_examples/dreamerv3/atari_100k_dreamerv3.py) +### [Atari100k](../../examples/algorithms/dreamerv3/atari_100k_dreamerv3.py) ```shell -$ cd ray/rllib/tuned_examples/dreamerv3/ +$ cd ray/rllib/examples/algorithms/dreamerv3/ $ python atari_100k_dreamerv3.py --env ale_py:ALE/Pong-v5 ``` -### [DeepMind Control Suite (vision)](../../tuned_examples/dreamerv3/dm_control_suite_vision_dreamerv3.py) +### [DeepMind Control Suite (vision)](../../examples/algorithms/dreamerv3/dm_control_suite_vision_dreamerv3.py) ```shell -$ cd ray/rllib/tuned_examples/dreamerv3/ +$ cd ray/rllib/examples/algorithms/dreamerv3/ $ python dm_control_suite_vision_dreamerv3.py --env DMC/cartpole/swingup ``` Other `--env` options for the DM Control Suite would be `--env DMC/hopper/hop`, `--env DMC/walker/walk`, etc.. @@ -122,8 +122,8 @@ $ python flappy_bird.py ``` This should be it. Feel free to try out running this on multiple GPUs using these -more advanced config examples [here (Atari100k)](../../tuned_examples/dreamerv3/atari_100k_dreamerv3.py) and -[here (DM Control Suite)](../../tuned_examples/dreamerv3/dm_control_suite_vision_dreamerv3.py). +more advanced config examples [here (Atari100k)](../../examples/algorithms/dreamerv3/atari_100k_dreamerv3.py) and +[here (DM Control Suite)](../../examples/algorithms/dreamerv3/dm_control_suite_vision_dreamerv3.py). Also see the notes below on good recipes for running on multiple GPUs. IMPORTANT: DreamerV3 out-of-the-box only supports image observation spaces of diff --git a/rllib/benchmarks/ppo/benchmark_atari_ppo.py b/rllib/benchmarks/ppo/benchmark_atari_ppo.py index d62e18b01407..ad8f05691fb7 100644 --- a/rllib/benchmarks/ppo/benchmark_atari_ppo.py +++ b/rllib/benchmarks/ppo/benchmark_atari_ppo.py @@ -96,7 +96,7 @@ # Compile the base command running the actual `tuned_example` script. base_commands = [ "python", - "../../tuned_examples/ppo/atari_ppo.py", + "../../examples/algorithms/ppo/atari_ppo.py", f"--num-env-runners={args.num_env_runners}" if args.num_env_runners else "", f"--num-learners={args.num_learners}", f"--num-gpus-per-learner={args.num_gpus_per_learner}", diff --git a/rllib/tuned_examples/dqn/atari-dist-dqn.yaml b/rllib/examples/_old_api_stack/algorithms/atari-dist-dqn.yaml similarity index 100% rename from rllib/tuned_examples/dqn/atari-dist-dqn.yaml rename to rllib/examples/_old_api_stack/algorithms/atari-dist-dqn.yaml diff --git a/rllib/tuned_examples/dqn/atari-dqn.yaml b/rllib/examples/_old_api_stack/algorithms/atari-dqn.yaml similarity index 100% rename from rllib/tuned_examples/dqn/atari-dqn.yaml rename to rllib/examples/_old_api_stack/algorithms/atari-dqn.yaml diff --git a/rllib/tuned_examples/dqn/atari-duel-ddqn.yaml b/rllib/examples/_old_api_stack/algorithms/atari-duel-ddqn.yaml similarity index 100% rename from rllib/tuned_examples/dqn/atari-duel-ddqn.yaml rename to rllib/examples/_old_api_stack/algorithms/atari-duel-ddqn.yaml diff --git a/rllib/tuned_examples/impala/atari-impala-large.yaml b/rllib/examples/_old_api_stack/algorithms/atari-impala-large.yaml similarity index 100% rename from rllib/tuned_examples/impala/atari-impala-large.yaml rename to rllib/examples/_old_api_stack/algorithms/atari-impala-large.yaml diff --git a/rllib/tuned_examples/impala/atari-impala-multi-gpu.yaml b/rllib/examples/_old_api_stack/algorithms/atari-impala-multi-gpu.yaml similarity index 100% rename from rllib/tuned_examples/impala/atari-impala-multi-gpu.yaml rename to rllib/examples/_old_api_stack/algorithms/atari-impala-multi-gpu.yaml diff --git a/rllib/tuned_examples/impala/atari-impala.yaml b/rllib/examples/_old_api_stack/algorithms/atari-impala.yaml similarity index 100% rename from rllib/tuned_examples/impala/atari-impala.yaml rename to rllib/examples/_old_api_stack/algorithms/atari-impala.yaml diff --git a/rllib/tuned_examples/sac/atari-sac.yaml b/rllib/examples/_old_api_stack/algorithms/atari-sac.yaml similarity index 100% rename from rllib/tuned_examples/sac/atari-sac.yaml rename to rllib/examples/_old_api_stack/algorithms/atari-sac.yaml diff --git a/rllib/tuned_examples/appo/cartpole-appo-separate-losses.py b/rllib/examples/_old_api_stack/algorithms/cartpole-appo-separate-losses.py similarity index 100% rename from rllib/tuned_examples/appo/cartpole-appo-separate-losses.py rename to rllib/examples/_old_api_stack/algorithms/cartpole-appo-separate-losses.py diff --git a/rllib/tuned_examples/bc/cartpole-bc.yaml b/rllib/examples/_old_api_stack/algorithms/cartpole-bc.yaml similarity index 100% rename from rllib/tuned_examples/bc/cartpole-bc.yaml rename to rllib/examples/_old_api_stack/algorithms/cartpole-bc.yaml diff --git a/rllib/tuned_examples/appo/cartpole-crashing-and-stalling-recreate-workers-appo.py b/rllib/examples/_old_api_stack/algorithms/cartpole-crashing-and-stalling-recreate-workers-appo.py similarity index 100% rename from rllib/tuned_examples/appo/cartpole-crashing-and-stalling-recreate-workers-appo.py rename to rllib/examples/_old_api_stack/algorithms/cartpole-crashing-and-stalling-recreate-workers-appo.py diff --git a/rllib/tuned_examples/appo/cartpole-crashing-recreate-workers-appo.py b/rllib/examples/_old_api_stack/algorithms/cartpole-crashing-recreate-workers-appo.py similarity index 100% rename from rllib/tuned_examples/appo/cartpole-crashing-recreate-workers-appo.py rename to rllib/examples/_old_api_stack/algorithms/cartpole-crashing-recreate-workers-appo.py diff --git a/rllib/tuned_examples/dqn/cartpole-dqn-fake-gpus.yaml b/rllib/examples/_old_api_stack/algorithms/cartpole-dqn-fake-gpus.yaml similarity index 100% rename from rllib/tuned_examples/dqn/cartpole-dqn-fake-gpus.yaml rename to rllib/examples/_old_api_stack/algorithms/cartpole-dqn-fake-gpus.yaml diff --git a/rllib/tuned_examples/dqn/cartpole-dqn-param-noise.yaml b/rllib/examples/_old_api_stack/algorithms/cartpole-dqn-param-noise.yaml similarity index 100% rename from rllib/tuned_examples/dqn/cartpole-dqn-param-noise.yaml rename to rllib/examples/_old_api_stack/algorithms/cartpole-dqn-param-noise.yaml diff --git a/rllib/tuned_examples/dqn/cartpole-dqn-softq.yaml b/rllib/examples/_old_api_stack/algorithms/cartpole-dqn-softq.yaml similarity index 100% rename from rllib/tuned_examples/dqn/cartpole-dqn-softq.yaml rename to rllib/examples/_old_api_stack/algorithms/cartpole-dqn-softq.yaml diff --git a/rllib/tuned_examples/dqn/cartpole-dqn.yaml b/rllib/examples/_old_api_stack/algorithms/cartpole-dqn.yaml similarity index 100% rename from rllib/tuned_examples/dqn/cartpole-dqn.yaml rename to rllib/examples/_old_api_stack/algorithms/cartpole-dqn.yaml diff --git a/rllib/tuned_examples/marwil/cartpole-marwil.yaml b/rllib/examples/_old_api_stack/algorithms/cartpole-marwil.yaml similarity index 100% rename from rllib/tuned_examples/marwil/cartpole-marwil.yaml rename to rllib/examples/_old_api_stack/algorithms/cartpole-marwil.yaml diff --git a/rllib/tuned_examples/sac/cartpole-sac.yaml b/rllib/examples/_old_api_stack/algorithms/cartpole-sac.yaml similarity index 100% rename from rllib/tuned_examples/sac/cartpole-sac.yaml rename to rllib/examples/_old_api_stack/algorithms/cartpole-sac.yaml diff --git a/rllib/tuned_examples/appo/frozenlake-appo-vtrace.yaml b/rllib/examples/_old_api_stack/algorithms/frozenlake-appo-vtrace.yaml similarity index 100% rename from rllib/tuned_examples/appo/frozenlake-appo-vtrace.yaml rename to rllib/examples/_old_api_stack/algorithms/frozenlake-appo-vtrace.yaml diff --git a/rllib/tuned_examples/cql/halfcheetah-bc.yaml b/rllib/examples/_old_api_stack/algorithms/halfcheetah-bc.yaml similarity index 100% rename from rllib/tuned_examples/cql/halfcheetah-bc.yaml rename to rllib/examples/_old_api_stack/algorithms/halfcheetah-bc.yaml diff --git a/rllib/tuned_examples/cql/halfcheetah-cql.yaml b/rllib/examples/_old_api_stack/algorithms/halfcheetah-cql.yaml similarity index 100% rename from rllib/tuned_examples/cql/halfcheetah-cql.yaml rename to rllib/examples/_old_api_stack/algorithms/halfcheetah-cql.yaml diff --git a/rllib/tuned_examples/ppo/halfcheetah-ppo.yaml b/rllib/examples/_old_api_stack/algorithms/halfcheetah-ppo.yaml similarity index 100% rename from rllib/tuned_examples/ppo/halfcheetah-ppo.yaml rename to rllib/examples/_old_api_stack/algorithms/halfcheetah-ppo.yaml diff --git a/rllib/tuned_examples/cql/hopper-bc.yaml b/rllib/examples/_old_api_stack/algorithms/hopper-bc.yaml similarity index 100% rename from rllib/tuned_examples/cql/hopper-bc.yaml rename to rllib/examples/_old_api_stack/algorithms/hopper-bc.yaml diff --git a/rllib/tuned_examples/cql/hopper-cql.yaml b/rllib/examples/_old_api_stack/algorithms/hopper-cql.yaml similarity index 100% rename from rllib/tuned_examples/cql/hopper-cql.yaml rename to rllib/examples/_old_api_stack/algorithms/hopper-cql.yaml diff --git a/rllib/tuned_examples/ppo/hopper-ppo.yaml b/rllib/examples/_old_api_stack/algorithms/hopper-ppo.yaml similarity index 100% rename from rllib/tuned_examples/ppo/hopper-ppo.yaml rename to rllib/examples/_old_api_stack/algorithms/hopper-ppo.yaml diff --git a/rllib/tuned_examples/ppo/humanoid-ppo-gae.yaml b/rllib/examples/_old_api_stack/algorithms/humanoid-ppo-gae.yaml similarity index 100% rename from rllib/tuned_examples/ppo/humanoid-ppo-gae.yaml rename to rllib/examples/_old_api_stack/algorithms/humanoid-ppo-gae.yaml diff --git a/rllib/tuned_examples/ppo/humanoid-ppo.yaml b/rllib/examples/_old_api_stack/algorithms/humanoid-ppo.yaml similarity index 100% rename from rllib/tuned_examples/ppo/humanoid-ppo.yaml rename to rllib/examples/_old_api_stack/algorithms/humanoid-ppo.yaml diff --git a/rllib/tuned_examples/appo/memory-leak-test-appo.yaml b/rllib/examples/_old_api_stack/algorithms/memory-leak-test-appo.yaml similarity index 100% rename from rllib/tuned_examples/appo/memory-leak-test-appo.yaml rename to rllib/examples/_old_api_stack/algorithms/memory-leak-test-appo.yaml diff --git a/rllib/tuned_examples/dqn/memory-leak-test-dqn.yaml b/rllib/examples/_old_api_stack/algorithms/memory-leak-test-dqn.yaml similarity index 100% rename from rllib/tuned_examples/dqn/memory-leak-test-dqn.yaml rename to rllib/examples/_old_api_stack/algorithms/memory-leak-test-dqn.yaml diff --git a/rllib/tuned_examples/ppo/memory-leak-test-ppo.yaml b/rllib/examples/_old_api_stack/algorithms/memory-leak-test-ppo.yaml similarity index 100% rename from rllib/tuned_examples/ppo/memory-leak-test-ppo.yaml rename to rllib/examples/_old_api_stack/algorithms/memory-leak-test-ppo.yaml diff --git a/rllib/tuned_examples/sac/memory-leak-test-sac.yaml b/rllib/examples/_old_api_stack/algorithms/memory-leak-test-sac.yaml similarity index 100% rename from rllib/tuned_examples/sac/memory-leak-test-sac.yaml rename to rllib/examples/_old_api_stack/algorithms/memory-leak-test-sac.yaml diff --git a/rllib/tuned_examples/sac/mspacman-sac.yaml b/rllib/examples/_old_api_stack/algorithms/mspacman-sac.yaml similarity index 100% rename from rllib/tuned_examples/sac/mspacman-sac.yaml rename to rllib/examples/_old_api_stack/algorithms/mspacman-sac.yaml diff --git a/rllib/tuned_examples/appo/multi-agent-cartpole-crashing-and-stalling-recreate-workers-appo.py b/rllib/examples/_old_api_stack/algorithms/multi-agent-cartpole-crashing-and-stalling-recreate-workers-appo.py similarity index 100% rename from rllib/tuned_examples/appo/multi-agent-cartpole-crashing-and-stalling-recreate-workers-appo.py rename to rllib/examples/_old_api_stack/algorithms/multi-agent-cartpole-crashing-and-stalling-recreate-workers-appo.py diff --git a/rllib/tuned_examples/appo/multi-agent-cartpole-crashing-recreate-workers-appo.py b/rllib/examples/_old_api_stack/algorithms/multi-agent-cartpole-crashing-recreate-workers-appo.py similarity index 100% rename from rllib/tuned_examples/appo/multi-agent-cartpole-crashing-recreate-workers-appo.py rename to rllib/examples/_old_api_stack/algorithms/multi-agent-cartpole-crashing-recreate-workers-appo.py diff --git a/rllib/tuned_examples/appo/multi-agent-cartpole-w-100-policies-appo.py b/rllib/examples/_old_api_stack/algorithms/multi-agent-cartpole-w-100-policies-appo.py similarity index 100% rename from rllib/tuned_examples/appo/multi-agent-cartpole-w-100-policies-appo.py rename to rllib/examples/_old_api_stack/algorithms/multi-agent-cartpole-w-100-policies-appo.py diff --git a/rllib/tuned_examples/appo/multi_agent_cartpole_appo_old_api_stack.py b/rllib/examples/_old_api_stack/algorithms/multi_agent_cartpole_appo_old_api_stack.py similarity index 100% rename from rllib/tuned_examples/appo/multi_agent_cartpole_appo_old_api_stack.py rename to rllib/examples/_old_api_stack/algorithms/multi_agent_cartpole_appo_old_api_stack.py diff --git a/rllib/tuned_examples/cql/pendulum-cql.yaml b/rllib/examples/_old_api_stack/algorithms/pendulum-cql.yaml similarity index 94% rename from rllib/tuned_examples/cql/pendulum-cql.yaml rename to rllib/examples/_old_api_stack/algorithms/pendulum-cql.yaml index 3baf6c8fdc6e..d993a151763a 100644 --- a/rllib/tuned_examples/cql/pendulum-cql.yaml +++ b/rllib/examples/_old_api_stack/algorithms/pendulum-cql.yaml @@ -1,6 +1,6 @@ # @OldAPIStack # Given a SAC-generated offline file generated via: -# rllib train -f tuned_examples/sac/pendulum-sac.yaml --no-ray-ui +# rllib train -f examples/algorithms/sac/pendulum-sac.yaml --no-ray-ui # Pendulum CQL can attain ~ -300 reward in 10k from that file. pendulum-cql: diff --git a/rllib/tuned_examples/sac/pendulum-sac.yaml b/rllib/examples/_old_api_stack/algorithms/pendulum-sac.yaml similarity index 100% rename from rllib/tuned_examples/sac/pendulum-sac.yaml rename to rllib/examples/_old_api_stack/algorithms/pendulum-sac.yaml diff --git a/rllib/tuned_examples/ppo/pendulum-transformed-actions-ppo.yaml b/rllib/examples/_old_api_stack/algorithms/pendulum-transformed-actions-ppo.yaml similarity index 100% rename from rllib/tuned_examples/ppo/pendulum-transformed-actions-ppo.yaml rename to rllib/examples/_old_api_stack/algorithms/pendulum-transformed-actions-ppo.yaml diff --git a/rllib/tuned_examples/sac/pendulum-transformed-actions-sac.yaml b/rllib/examples/_old_api_stack/algorithms/pendulum-transformed-actions-sac.yaml similarity index 100% rename from rllib/tuned_examples/sac/pendulum-transformed-actions-sac.yaml rename to rllib/examples/_old_api_stack/algorithms/pendulum-transformed-actions-sac.yaml diff --git a/rllib/tuned_examples/dqn/pong-dqn.yaml b/rllib/examples/_old_api_stack/algorithms/pong-dqn.yaml similarity index 100% rename from rllib/tuned_examples/dqn/pong-dqn.yaml rename to rllib/examples/_old_api_stack/algorithms/pong-dqn.yaml diff --git a/rllib/tuned_examples/impala/pong-impala-fast.yaml b/rllib/examples/_old_api_stack/algorithms/pong-impala-fast.yaml similarity index 100% rename from rllib/tuned_examples/impala/pong-impala-fast.yaml rename to rllib/examples/_old_api_stack/algorithms/pong-impala-fast.yaml diff --git a/rllib/tuned_examples/impala/pong-impala-vectorized.yaml b/rllib/examples/_old_api_stack/algorithms/pong-impala-vectorized.yaml similarity index 100% rename from rllib/tuned_examples/impala/pong-impala-vectorized.yaml rename to rllib/examples/_old_api_stack/algorithms/pong-impala-vectorized.yaml diff --git a/rllib/tuned_examples/impala/pong-impala.yaml b/rllib/examples/_old_api_stack/algorithms/pong-impala.yaml similarity index 100% rename from rllib/tuned_examples/impala/pong-impala.yaml rename to rllib/examples/_old_api_stack/algorithms/pong-impala.yaml diff --git a/rllib/tuned_examples/dqn/pong-rainbow.yaml b/rllib/examples/_old_api_stack/algorithms/pong-rainbow.yaml similarity index 100% rename from rllib/tuned_examples/dqn/pong-rainbow.yaml rename to rllib/examples/_old_api_stack/algorithms/pong-rainbow.yaml diff --git a/rllib/tuned_examples/ppo/unity3d-soccer-strikers-vs-goalie-ppo.yaml b/rllib/examples/_old_api_stack/algorithms/unity3d-soccer-strikers-vs-goalie-ppo.yaml similarity index 100% rename from rllib/tuned_examples/ppo/unity3d-soccer-strikers-vs-goalie-ppo.yaml rename to rllib/examples/_old_api_stack/algorithms/unity3d-soccer-strikers-vs-goalie-ppo.yaml diff --git a/rllib/tuned_examples/ppo/walker2d-ppo.yaml b/rllib/examples/_old_api_stack/algorithms/walker2d-ppo.yaml similarity index 100% rename from rllib/tuned_examples/ppo/walker2d-ppo.yaml rename to rllib/examples/_old_api_stack/algorithms/walker2d-ppo.yaml diff --git a/rllib/tuned_examples/appo/cartpole_appo.py b/rllib/examples/algorithms/appo/cartpole_appo.py similarity index 100% rename from rllib/tuned_examples/appo/cartpole_appo.py rename to rllib/examples/algorithms/appo/cartpole_appo.py diff --git a/rllib/tuned_examples/appo/halfcheetah_appo.py b/rllib/examples/algorithms/appo/halfcheetah_appo.py similarity index 100% rename from rllib/tuned_examples/appo/halfcheetah_appo.py rename to rllib/examples/algorithms/appo/halfcheetah_appo.py diff --git a/rllib/tuned_examples/appo/multi_agent_cartpole_appo.py b/rllib/examples/algorithms/appo/multi_agent_cartpole_appo.py similarity index 100% rename from rllib/tuned_examples/appo/multi_agent_cartpole_appo.py rename to rllib/examples/algorithms/appo/multi_agent_cartpole_appo.py diff --git a/rllib/tuned_examples/appo/multi_agent_pong_appo.py b/rllib/examples/algorithms/appo/multi_agent_pong_appo.py similarity index 100% rename from rllib/tuned_examples/appo/multi_agent_pong_appo.py rename to rllib/examples/algorithms/appo/multi_agent_pong_appo.py diff --git a/rllib/tuned_examples/appo/multi_agent_stateless_cartpole_appo.py b/rllib/examples/algorithms/appo/multi_agent_stateless_cartpole_appo.py similarity index 100% rename from rllib/tuned_examples/appo/multi_agent_stateless_cartpole_appo.py rename to rllib/examples/algorithms/appo/multi_agent_stateless_cartpole_appo.py diff --git a/rllib/tuned_examples/appo/pendulum_appo.py b/rllib/examples/algorithms/appo/pendulum_appo.py similarity index 100% rename from rllib/tuned_examples/appo/pendulum_appo.py rename to rllib/examples/algorithms/appo/pendulum_appo.py diff --git a/rllib/tuned_examples/appo/pong_appo.py b/rllib/examples/algorithms/appo/pong_appo.py similarity index 100% rename from rllib/tuned_examples/appo/pong_appo.py rename to rllib/examples/algorithms/appo/pong_appo.py diff --git a/rllib/tuned_examples/appo/stateless_cartpole_appo.py b/rllib/examples/algorithms/appo/stateless_cartpole_appo.py similarity index 100% rename from rllib/tuned_examples/appo/stateless_cartpole_appo.py rename to rllib/examples/algorithms/appo/stateless_cartpole_appo.py diff --git a/rllib/tuned_examples/bc/benchmark_rlunplugged_atari_pong_bc.py b/rllib/examples/algorithms/bc/benchmark_rlunplugged_atari_pong_bc.py similarity index 100% rename from rllib/tuned_examples/bc/benchmark_rlunplugged_atari_pong_bc.py rename to rllib/examples/algorithms/bc/benchmark_rlunplugged_atari_pong_bc.py diff --git a/rllib/tuned_examples/bc/cartpole_bc.py b/rllib/examples/algorithms/bc/cartpole_bc.py similarity index 97% rename from rllib/tuned_examples/bc/cartpole_bc.py rename to rllib/examples/algorithms/bc/cartpole_bc.py index 8f2b3196b970..57618f0739e7 100644 --- a/rllib/tuned_examples/bc/cartpole_bc.py +++ b/rllib/examples/algorithms/bc/cartpole_bc.py @@ -25,8 +25,7 @@ # Define the data paths. data_path = "offline/tests/data/cartpole/cartpole-v1_large" -base_path = Path(__file__).parents[2] -print(f"base_path={base_path}") +base_path = Path(__file__).parents[3] data_path = "local://" / base_path / data_path print(f"data_path={data_path}") diff --git a/rllib/tuned_examples/bc/cartpole_bc_with_offline_evaluation.py b/rllib/examples/algorithms/bc/cartpole_bc_with_offline_evaluation.py similarity index 99% rename from rllib/tuned_examples/bc/cartpole_bc_with_offline_evaluation.py rename to rllib/examples/algorithms/bc/cartpole_bc_with_offline_evaluation.py index 5bb6fd604807..30a2d4bb5a1e 100644 --- a/rllib/tuned_examples/bc/cartpole_bc_with_offline_evaluation.py +++ b/rllib/examples/algorithms/bc/cartpole_bc_with_offline_evaluation.py @@ -51,7 +51,7 @@ # Define the data paths. data_path = "offline/tests/data/cartpole/cartpole-v1_large" -base_path = Path(__file__).parents[2] +base_path = Path(__file__).parents[3] print(f"base_path={base_path}") data_path = "local://" / base_path / data_path print(f"data_path={data_path}") diff --git a/rllib/tuned_examples/bc/pendulum_bc.py b/rllib/examples/algorithms/bc/pendulum_bc.py similarity index 98% rename from rllib/tuned_examples/bc/pendulum_bc.py rename to rllib/examples/algorithms/bc/pendulum_bc.py index 55349ab107f1..b4417949d906 100644 --- a/rllib/tuned_examples/bc/pendulum_bc.py +++ b/rllib/examples/algorithms/bc/pendulum_bc.py @@ -24,7 +24,7 @@ # Define the data paths. data_path = "offline/tests/data/pendulum/pendulum-v1_large" -base_path = Path(__file__).parents[2] +base_path = Path(__file__).parents[3] print(f"base_path={base_path}") data_path = "local://" / base_path / data_path print(f"data_path={data_path}") diff --git a/rllib/tuned_examples/cql/pendulum_cql.py b/rllib/examples/algorithms/cql/pendulum_cql.py similarity index 98% rename from rllib/tuned_examples/cql/pendulum_cql.py rename to rllib/examples/algorithms/cql/pendulum_cql.py index 1b66bf67ab2b..984c3626fae9 100644 --- a/rllib/tuned_examples/cql/pendulum_cql.py +++ b/rllib/examples/algorithms/cql/pendulum_cql.py @@ -23,7 +23,7 @@ ), "This tuned example works only with `Pendulum-v1`." # Define the base path relative to this file. -base_path = Path(__file__).parents[2] +base_path = Path(__file__).parents[3] # Use the larger data set of Pendulum we have. Note, these are # parquet data, the default in `AlgorithmConfig.offline_data`. data_path = base_path / "offline/tests/data/pendulum/pendulum-v1_enormous" diff --git a/rllib/tuned_examples/dqn/benchmark_dqn_atari.py b/rllib/examples/algorithms/dqn/benchmark_dqn_atari.py similarity index 100% rename from rllib/tuned_examples/dqn/benchmark_dqn_atari.py rename to rllib/examples/algorithms/dqn/benchmark_dqn_atari.py diff --git a/rllib/tuned_examples/dqn/benchmark_dqn_atari_rllib_preprocessing.py b/rllib/examples/algorithms/dqn/benchmark_dqn_atari_rllib_preprocessing.py similarity index 100% rename from rllib/tuned_examples/dqn/benchmark_dqn_atari_rllib_preprocessing.py rename to rllib/examples/algorithms/dqn/benchmark_dqn_atari_rllib_preprocessing.py diff --git a/rllib/tuned_examples/dqn/cartpole_dqn.py b/rllib/examples/algorithms/dqn/cartpole_dqn.py similarity index 100% rename from rllib/tuned_examples/dqn/cartpole_dqn.py rename to rllib/examples/algorithms/dqn/cartpole_dqn.py diff --git a/rllib/tuned_examples/dqn/multi_agent_cartpole_dqn.py b/rllib/examples/algorithms/dqn/multi_agent_cartpole_dqn.py similarity index 100% rename from rllib/tuned_examples/dqn/multi_agent_cartpole_dqn.py rename to rllib/examples/algorithms/dqn/multi_agent_cartpole_dqn.py diff --git a/rllib/tuned_examples/dqn/stateless_cartpole_dqn.py b/rllib/examples/algorithms/dqn/stateless_cartpole_dqn.py similarity index 100% rename from rllib/tuned_examples/dqn/stateless_cartpole_dqn.py rename to rllib/examples/algorithms/dqn/stateless_cartpole_dqn.py diff --git a/rllib/tuned_examples/dreamerv3/atari_100k_dreamerv3.py b/rllib/examples/algorithms/dreamerv3/atari_100k_dreamerv3.py similarity index 100% rename from rllib/tuned_examples/dreamerv3/atari_100k_dreamerv3.py rename to rllib/examples/algorithms/dreamerv3/atari_100k_dreamerv3.py diff --git a/rllib/tuned_examples/dreamerv3/atari_200M_dreamerv3.py b/rllib/examples/algorithms/dreamerv3/atari_200M_dreamerv3.py similarity index 100% rename from rllib/tuned_examples/dreamerv3/atari_200M_dreamerv3.py rename to rllib/examples/algorithms/dreamerv3/atari_200M_dreamerv3.py diff --git a/rllib/tuned_examples/dreamerv3/cartpole_dreamerv3.py b/rllib/examples/algorithms/dreamerv3/cartpole_dreamerv3.py similarity index 100% rename from rllib/tuned_examples/dreamerv3/cartpole_dreamerv3.py rename to rllib/examples/algorithms/dreamerv3/cartpole_dreamerv3.py diff --git a/rllib/tuned_examples/dreamerv3/dm_control_suite_vision_dreamerv3.py b/rllib/examples/algorithms/dreamerv3/dm_control_suite_vision_dreamerv3.py similarity index 100% rename from rllib/tuned_examples/dreamerv3/dm_control_suite_vision_dreamerv3.py rename to rllib/examples/algorithms/dreamerv3/dm_control_suite_vision_dreamerv3.py diff --git a/rllib/tuned_examples/dreamerv3/flappy_bird_dreamerv3.py b/rllib/examples/algorithms/dreamerv3/flappy_bird_dreamerv3.py similarity index 100% rename from rllib/tuned_examples/dreamerv3/flappy_bird_dreamerv3.py rename to rllib/examples/algorithms/dreamerv3/flappy_bird_dreamerv3.py diff --git a/rllib/tuned_examples/dreamerv3/frozenlake_2x2_dreamerv3.py b/rllib/examples/algorithms/dreamerv3/frozenlake_2x2_dreamerv3.py similarity index 100% rename from rllib/tuned_examples/dreamerv3/frozenlake_2x2_dreamerv3.py rename to rllib/examples/algorithms/dreamerv3/frozenlake_2x2_dreamerv3.py diff --git a/rllib/tuned_examples/dreamerv3/frozenlake_4x4_deterministic_dreamerv3.py b/rllib/examples/algorithms/dreamerv3/frozenlake_4x4_deterministic_dreamerv3.py similarity index 100% rename from rllib/tuned_examples/dreamerv3/frozenlake_4x4_deterministic_dreamerv3.py rename to rllib/examples/algorithms/dreamerv3/frozenlake_4x4_deterministic_dreamerv3.py diff --git a/rllib/tuned_examples/dreamerv3/gymnasium_robotics_dreamerv3.py b/rllib/examples/algorithms/dreamerv3/gymnasium_robotics_dreamerv3.py similarity index 100% rename from rllib/tuned_examples/dreamerv3/gymnasium_robotics_dreamerv3.py rename to rllib/examples/algorithms/dreamerv3/gymnasium_robotics_dreamerv3.py diff --git a/rllib/tuned_examples/dreamerv3/highway_env_dreamerv3.py b/rllib/examples/algorithms/dreamerv3/highway_env_dreamerv3.py similarity index 100% rename from rllib/tuned_examples/dreamerv3/highway_env_dreamerv3.py rename to rllib/examples/algorithms/dreamerv3/highway_env_dreamerv3.py diff --git a/rllib/tuned_examples/dreamerv3/pendulum_dreamerv3.py b/rllib/examples/algorithms/dreamerv3/pendulum_dreamerv3.py similarity index 100% rename from rllib/tuned_examples/dreamerv3/pendulum_dreamerv3.py rename to rllib/examples/algorithms/dreamerv3/pendulum_dreamerv3.py diff --git a/rllib/tuned_examples/impala/cartpole-impala-separate-losses.py b/rllib/examples/algorithms/impala/cartpole-impala-separate-losses.py similarity index 100% rename from rllib/tuned_examples/impala/cartpole-impala-separate-losses.py rename to rllib/examples/algorithms/impala/cartpole-impala-separate-losses.py diff --git a/rllib/tuned_examples/impala/cartpole_impala.py b/rllib/examples/algorithms/impala/cartpole_impala.py similarity index 100% rename from rllib/tuned_examples/impala/cartpole_impala.py rename to rllib/examples/algorithms/impala/cartpole_impala.py diff --git a/rllib/tuned_examples/impala/heavy_cartpole_impala.py b/rllib/examples/algorithms/impala/heavy_cartpole_impala.py similarity index 100% rename from rllib/tuned_examples/impala/heavy_cartpole_impala.py rename to rllib/examples/algorithms/impala/heavy_cartpole_impala.py diff --git a/rllib/tuned_examples/impala/multi_agent_cartpole_impala.py b/rllib/examples/algorithms/impala/multi_agent_cartpole_impala.py similarity index 100% rename from rllib/tuned_examples/impala/multi_agent_cartpole_impala.py rename to rllib/examples/algorithms/impala/multi_agent_cartpole_impala.py diff --git a/rllib/tuned_examples/impala/multi_agent_cartpole_impala_old_api_stack.py b/rllib/examples/algorithms/impala/multi_agent_cartpole_impala_old_api_stack.py similarity index 100% rename from rllib/tuned_examples/impala/multi_agent_cartpole_impala_old_api_stack.py rename to rllib/examples/algorithms/impala/multi_agent_cartpole_impala_old_api_stack.py diff --git a/rllib/tuned_examples/impala/multi_agent_stateless_cartpole_impala.py b/rllib/examples/algorithms/impala/multi_agent_stateless_cartpole_impala.py similarity index 100% rename from rllib/tuned_examples/impala/multi_agent_stateless_cartpole_impala.py rename to rllib/examples/algorithms/impala/multi_agent_stateless_cartpole_impala.py diff --git a/rllib/tuned_examples/impala/pendulum_impala.py b/rllib/examples/algorithms/impala/pendulum_impala.py similarity index 100% rename from rllib/tuned_examples/impala/pendulum_impala.py rename to rllib/examples/algorithms/impala/pendulum_impala.py diff --git a/rllib/tuned_examples/impala/pong_impala.py b/rllib/examples/algorithms/impala/pong_impala.py similarity index 100% rename from rllib/tuned_examples/impala/pong_impala.py rename to rllib/examples/algorithms/impala/pong_impala.py diff --git a/rllib/tuned_examples/impala/pong_impala_pb2_hyperopt.py b/rllib/examples/algorithms/impala/pong_impala_pb2_hyperopt.py similarity index 100% rename from rllib/tuned_examples/impala/pong_impala_pb2_hyperopt.py rename to rllib/examples/algorithms/impala/pong_impala_pb2_hyperopt.py diff --git a/rllib/tuned_examples/impala/stateless_cartpole_impala.py b/rllib/examples/algorithms/impala/stateless_cartpole_impala.py similarity index 100% rename from rllib/tuned_examples/impala/stateless_cartpole_impala.py rename to rllib/examples/algorithms/impala/stateless_cartpole_impala.py diff --git a/rllib/tuned_examples/iql/pendulum_iql.py b/rllib/examples/algorithms/iql/pendulum_iql.py similarity index 98% rename from rllib/tuned_examples/iql/pendulum_iql.py rename to rllib/examples/algorithms/iql/pendulum_iql.py index 864d14a7fe63..eea94390d0fb 100644 --- a/rllib/tuned_examples/iql/pendulum_iql.py +++ b/rllib/examples/algorithms/iql/pendulum_iql.py @@ -24,7 +24,7 @@ # Define the data paths. data_path = "offline/tests/data/pendulum/pendulum-v1_enormous" -base_path = Path(__file__).parents[2] +base_path = Path(__file__).parents[3] print(f"base_path={base_path}") data_path = "local://" / base_path / data_path print(f"data_path={data_path}") diff --git a/rllib/tuned_examples/marwil/cartpole_marwil.py b/rllib/examples/algorithms/marwil/cartpole_marwil.py similarity index 98% rename from rllib/tuned_examples/marwil/cartpole_marwil.py rename to rllib/examples/algorithms/marwil/cartpole_marwil.py index 1a38519e1565..dd20d9aadcb7 100644 --- a/rllib/tuned_examples/marwil/cartpole_marwil.py +++ b/rllib/examples/algorithms/marwil/cartpole_marwil.py @@ -24,7 +24,7 @@ # Define the data paths. data_path = "offline/tests/data/cartpole/cartpole-v1_large" -base_path = Path(__file__).parents[2] +base_path = Path(__file__).parents[3] print(f"base_path={base_path}") data_path = "local://" / base_path / data_path print(f"data_path={data_path}") diff --git a/rllib/tuned_examples/ppo/atari_ppo.py b/rllib/examples/algorithms/ppo/atari_ppo.py similarity index 100% rename from rllib/tuned_examples/ppo/atari_ppo.py rename to rllib/examples/algorithms/ppo/atari_ppo.py diff --git a/rllib/tuned_examples/ppo/benchmark_ppo_mujoco.py b/rllib/examples/algorithms/ppo/benchmark_ppo_mujoco.py similarity index 100% rename from rllib/tuned_examples/ppo/benchmark_ppo_mujoco.py rename to rllib/examples/algorithms/ppo/benchmark_ppo_mujoco.py diff --git a/rllib/tuned_examples/ppo/cartpole_heavy_ppo.py b/rllib/examples/algorithms/ppo/cartpole_heavy_ppo.py similarity index 100% rename from rllib/tuned_examples/ppo/cartpole_heavy_ppo.py rename to rllib/examples/algorithms/ppo/cartpole_heavy_ppo.py diff --git a/rllib/tuned_examples/ppo/cartpole_ppo.py b/rllib/examples/algorithms/ppo/cartpole_ppo.py similarity index 100% rename from rllib/tuned_examples/ppo/cartpole_ppo.py rename to rllib/examples/algorithms/ppo/cartpole_ppo.py diff --git a/rllib/tuned_examples/ppo/cartpole_truncated_ppo.py b/rllib/examples/algorithms/ppo/cartpole_truncated_ppo.py similarity index 100% rename from rllib/tuned_examples/ppo/cartpole_truncated_ppo.py rename to rllib/examples/algorithms/ppo/cartpole_truncated_ppo.py diff --git a/rllib/tuned_examples/ppo/memory_leak_test_ppo_new_stack.py b/rllib/examples/algorithms/ppo/memory_leak_test_ppo_new_stack.py similarity index 100% rename from rllib/tuned_examples/ppo/memory_leak_test_ppo_new_stack.py rename to rllib/examples/algorithms/ppo/memory_leak_test_ppo_new_stack.py diff --git a/rllib/tuned_examples/ppo/multi_agent_cartpole_ppo.py b/rllib/examples/algorithms/ppo/multi_agent_cartpole_ppo.py similarity index 100% rename from rllib/tuned_examples/ppo/multi_agent_cartpole_ppo.py rename to rllib/examples/algorithms/ppo/multi_agent_cartpole_ppo.py diff --git a/rllib/tuned_examples/ppo/multi_agent_footsies_ppo.py b/rllib/examples/algorithms/ppo/multi_agent_footsies_ppo.py similarity index 100% rename from rllib/tuned_examples/ppo/multi_agent_footsies_ppo.py rename to rllib/examples/algorithms/ppo/multi_agent_footsies_ppo.py diff --git a/rllib/tuned_examples/ppo/multi_agent_pendulum_ppo.py b/rllib/examples/algorithms/ppo/multi_agent_pendulum_ppo.py similarity index 100% rename from rllib/tuned_examples/ppo/multi_agent_pendulum_ppo.py rename to rllib/examples/algorithms/ppo/multi_agent_pendulum_ppo.py diff --git a/rllib/tuned_examples/ppo/multi_agent_stateless_cartpole_ppo.py b/rllib/examples/algorithms/ppo/multi_agent_stateless_cartpole_ppo.py similarity index 100% rename from rllib/tuned_examples/ppo/multi_agent_stateless_cartpole_ppo.py rename to rllib/examples/algorithms/ppo/multi_agent_stateless_cartpole_ppo.py diff --git a/rllib/tuned_examples/ppo/pendulum_ppo.py b/rllib/examples/algorithms/ppo/pendulum_ppo.py similarity index 100% rename from rllib/tuned_examples/ppo/pendulum_ppo.py rename to rllib/examples/algorithms/ppo/pendulum_ppo.py diff --git a/rllib/tuned_examples/ppo/stateless_cartpole_ppo.py b/rllib/examples/algorithms/ppo/stateless_cartpole_ppo.py similarity index 100% rename from rllib/tuned_examples/ppo/stateless_cartpole_ppo.py rename to rllib/examples/algorithms/ppo/stateless_cartpole_ppo.py diff --git a/rllib/tuned_examples/sac/benchmark_sac_mujoco.py b/rllib/examples/algorithms/sac/benchmark_sac_mujoco.py similarity index 100% rename from rllib/tuned_examples/sac/benchmark_sac_mujoco.py rename to rllib/examples/algorithms/sac/benchmark_sac_mujoco.py diff --git a/rllib/tuned_examples/sac/halfcheetah_sac.py b/rllib/examples/algorithms/sac/halfcheetah_sac.py similarity index 100% rename from rllib/tuned_examples/sac/halfcheetah_sac.py rename to rllib/examples/algorithms/sac/halfcheetah_sac.py diff --git a/rllib/tuned_examples/sac/humanoid_sac.py b/rllib/examples/algorithms/sac/humanoid_sac.py similarity index 100% rename from rllib/tuned_examples/sac/humanoid_sac.py rename to rllib/examples/algorithms/sac/humanoid_sac.py diff --git a/rllib/tuned_examples/sac/mountaincar_sac.py b/rllib/examples/algorithms/sac/mountaincar_sac.py similarity index 100% rename from rllib/tuned_examples/sac/mountaincar_sac.py rename to rllib/examples/algorithms/sac/mountaincar_sac.py diff --git a/rllib/tuned_examples/sac/multi_agent_pendulum_sac.py b/rllib/examples/algorithms/sac/multi_agent_pendulum_sac.py similarity index 100% rename from rllib/tuned_examples/sac/multi_agent_pendulum_sac.py rename to rllib/examples/algorithms/sac/multi_agent_pendulum_sac.py diff --git a/rllib/tuned_examples/sac/pendulum_sac.py b/rllib/examples/algorithms/sac/pendulum_sac.py similarity index 100% rename from rllib/tuned_examples/sac/pendulum_sac.py rename to rllib/examples/algorithms/sac/pendulum_sac.py diff --git a/rllib/examples/multi_agent/self_play_footsies.py b/rllib/examples/multi_agent/self_play_footsies.py index 2cc5213eced2..da9047aac019 100644 --- a/rllib/examples/multi_agent/self_play_footsies.py +++ b/rllib/examples/multi_agent/self_play_footsies.py @@ -2,14 +2,14 @@ Multi-agent RLlib Footsies Simplified Example (PPO) About: - - This example as a simplified version of "rllib/tuned_examples/ppo/multi_agent_footsies_ppo.py", + - This example as a simplified version of "rllib/examples/ppo/multi_agent_footsies_ppo.py", which has more detailed comments and instructions. Please refer to that example for more information. - This example is created to test the self-play training progression with footsies. - Simplified version runs with single learner (cpu), single env runner, and single eval env runner. """ from pathlib import Path -from ray.rllib.tuned_examples.ppo.multi_agent_footsies_ppo import ( +from ray.rllib.examples.algorithms.ppo.multi_agent_footsies_ppo import ( config, env_creator, stop, diff --git a/rllib/examples/offline_rl/custom_input_api.py b/rllib/examples/offline_rl/custom_input_api.py index d7a6974825fb..77144568f558 100644 --- a/rllib/examples/offline_rl/custom_input_api.py +++ b/rllib/examples/offline_rl/custom_input_api.py @@ -87,7 +87,7 @@ def input_creator(ioctx: IOContext) -> InputReader: # we register our custom input creator with this convenient function register_input("custom_input", input_creator) - # Config modified from rllib/tuned_examples/cql/pendulum-cql.yaml + # Config modified from rllib/examples/algorithms/cql/pendulum-cql.yaml default_config = get_trainable_cls(args.run).get_default_config() config = ( default_config.environment("Pendulum-v1", clip_actions=True) diff --git a/rllib/examples/offline_rl/offline_rl.py b/rllib/examples/offline_rl/offline_rl.py index 1e8b1158221e..9abd7cbd93df 100644 --- a/rllib/examples/offline_rl/offline_rl.py +++ b/rllib/examples/offline_rl/offline_rl.py @@ -9,7 +9,7 @@ Generate the offline json file by running an SAC algo until it reaches expert level on your command line. For example: $ cd ray -$ rllib train -f rllib/tuned_examples/sac/pendulum-sac.yaml --no-ray-ui +$ rllib train -f rllib/examples/algorithms/sac/pendulum-sac.yaml --no-ray-ui Also make sure that in the above SAC yaml file (pendulum-sac.yaml), you specify an additional "output" key with any path on your local @@ -55,7 +55,7 @@ if __name__ == "__main__": args = parser.parse_args() - # See rllib/tuned_examples/cql/pendulum-cql.yaml for comparison. + # See rllib/examples/algorithms/cql/pendulum-cql.yaml for comparison. config = ( cql.CQLConfig() .api_stack( diff --git a/rllib/tuned_examples/__init__.py b/rllib/tuned_examples/__init__.py deleted file mode 100644 index e69de29bb2d1..000000000000 diff --git a/rllib/tuned_examples/cleanup_experiment.py b/rllib/tuned_examples/cleanup_experiment.py deleted file mode 100644 index 749d3ed5e522..000000000000 --- a/rllib/tuned_examples/cleanup_experiment.py +++ /dev/null @@ -1,187 +0,0 @@ -""" -This script automates cleaning up a benchmark/experiment run of some algo -against some config (with possibly more than one tune trial, -e.g. torch=grid_search([True, False])). - -Run `python cleanup_experiment.py --help` for more information. - -Use on an input directory with trial contents e.g.: -.. -IMPALA_BreakoutNoFrameskip-v4_0_use_pytorch=False_2020-05-11_10-17-54topr3h9k -IMPALA_BreakoutNoFrameskip-v4_0_use_pytorch=False_2020-05-11_13-59-35dqaetxnf -IMPALA_BreakoutNoFrameskip-v4_0_use_pytorch=False_2020-05-11_17-21-28tbhedw72 -IMPALA_BreakoutNoFrameskip-v4_2_use_pytorch=True_2020-05-11_10-17-54lv20cgn_ -IMPALA_BreakoutNoFrameskip-v4_2_use_pytorch=True_2020-05-11_13-59-35kwzhax_y -IMPALA_BreakoutNoFrameskip-v4_2_use_pytorch=True_2020-05-11_17-21-28a5j0s7za - -Then run: ->> python cleanup_experiment.py --experiment-dir [parent dir w/ trial sub-dirs] ->> --output-dir [your out dir] --results-filter dumb_col_2,superfluous_col3 ->> --results-max-size [max results file size in kb before(!) zipping] - -The script will create one output sub-dir for each trial and only copy -the configuration and the csv results (filtered and every nth row removed -based on the given args). -""" - -import argparse -import json -import os -import re -import shutil - -import yaml - -parser = argparse.ArgumentParser() -parser.add_argument( - "--experiment-dir", - type=str, - help="Experiment dir in which all sub-runs (seeds) are " - "located (as sub-dirs). Each sub0-run dir must contain the files: " - "params.json and progress.csv.", -) -parser.add_argument( - "--output-dir", - type=str, - help="The output dir, in which the cleaned up output will be placed.", -) -parser.add_argument( - "--results-filter", - type=str, - help="comma-separated list of csv fields to exclude.", - default="experiment_id,pid,hostname,node_ip,trial_id,hist_stats/episode_" - "reward,hist_stats/episode_lengths,experiment_tag", -) -parser.add_argument( - "--results-max-size", - type=int, - help="the max. size of the final results.csv file (in kb). Will erase " - "every nth line in the original input to reach that goal. " - "Use 0 for no limit (default=100).", - default=100, -) - - -def process_single_run(in_dir, out_dir): - exp_dir = os.listdir(in_dir) - - # Make sure trials dir is ok. - assert ( - "params.json" in exp_dir and "progress.csv" in exp_dir - ), "params.json or progress.csv not found in {}!".format(in_dir) - - os.makedirs(out_dir, exist_ok=True) - - for file in exp_dir: - absfile = os.path.join(in_dir, file) - # Config file -> Convert to yaml and move to output dir. - if file == "params.json": - assert os.path.isfile(absfile), "{} not a file!".format(file) - with open(absfile) as fp: - contents = json.load(fp) - with open(os.path.join(out_dir, "config.yaml"), "w") as fp: - yaml.dump(contents, fp) - # Progress csv file -> Filter out some columns, cut, and write to - # output_dir. - elif file == "progress.csv": - assert os.path.isfile(absfile), "{} not a file!".format(file) - col_idx_to_filter = [] - with open(absfile) as fp: - # Get column names. - col_names_orig = fp.readline().strip().split(",") - # Split by comma (abiding to quotes), filter out - # unwanted columns, then write to disk. - cols_to_filter = args.results_filter.split(",") - for i, c in enumerate(col_names_orig): - if c in cols_to_filter: - col_idx_to_filter.insert(0, i) - col_names = col_names_orig.copy() - for idx in col_idx_to_filter: - col_names.pop(idx) - absfile_out = os.path.join(out_dir, "progress.csv") - with open(absfile_out, "w") as out_fp: - print(",".join(col_names), file=out_fp) - while True: - line = fp.readline().strip() - if not line: - break - line = re.sub( - "(,{2,})", - lambda m: ",None" * (len(m.group()) - 1) + ",", - line, - ) - cols = re.findall('".+?"|[^,]+', line) - if len(cols) != len(col_names_orig): - continue - for idx in col_idx_to_filter: - cols.pop(idx) - print(",".join(cols), file=out_fp) - - # Reduce the size of the output file if necessary. - out_size = os.path.getsize(absfile_out) - max_size = args.results_max_size * 1024 - if 0 < max_size < out_size: - # Figure out roughly every which line we have to drop. - ratio = out_size / max_size - # If ratio > 2.0, we'll have to keep only every nth line. - if ratio > 2.0: - nth = out_size // max_size - os.system( - "awk 'NR==1||NR%{}==0' {} > {}.new".format( - nth, absfile_out, absfile_out - ) - ) - # If ratio < 2.0 (>1.0), we'll have to drop every nth line. - else: - nth = out_size // (out_size - max_size) - os.system( - "awk 'NR==1||NR%{}!=0' {} > {}.new".format( - nth, absfile_out, absfile_out - ) - ) - os.remove(absfile_out) - os.rename(absfile_out + ".new", absfile_out) - - # Zip progress.csv into results.zip. - zip_file = os.path.join(out_dir, "results.zip") - try: - os.remove(zip_file) - except FileNotFoundError: - pass - os.system( - "zip -j {} {}".format(zip_file, os.path.join(out_dir, "progress.csv")) - ) - os.remove(os.path.join(out_dir, "progress.csv")) - - # TBX events file -> Move as is. - elif re.search("^(events\\.out\\.|params\\.pkl)", file): - assert os.path.isfile(absfile), "{} not a file!".format(file) - shutil.copyfile(absfile, os.path.join(out_dir, file)) - - -if __name__ == "__main__": - args = parser.parse_args() - exp_dir = os.listdir(args.experiment_dir) - # Loop through all sub-directories. - for i, sub_run in enumerate(sorted(exp_dir)): - abspath = os.path.join(args.experiment_dir, sub_run) - # This is a seed run. - if os.path.isdir(abspath) and re.search( - "^(\\w+?)_(\\w+?-v\\d+)(_\\d+)", sub_run - ): - # Create meaningful output dir name: - # [algo]_[env]_[trial #]_[trial-config]_[date YYYY-MM-DD]. - cleaned_up_out = re.sub( - "^(\\w+?)_(\\w+?-v\\d+)(_\\d+)(_.+)?(_\\d{4}-\\d{2}-\\d{2})" - "_\\d{2}-\\d{2}-\\w+", - "{:02}_\\1_\\2\\4\\5".format(i), - sub_run, - ) - # Remove superflous `env=` specifier (anv always included in name). - cleaned_up_out = re.sub( - "^(.+)env=\\w+?-v\\d+,?(.+)", "\\1\\2", cleaned_up_out - ) - out_path = os.path.join(args.output_dir, cleaned_up_out) - process_single_run(abspath, out_path) - # Done. - print("done") diff --git a/rllib/tuned_examples/dreamerv3/__init__.py b/rllib/tuned_examples/dreamerv3/__init__.py deleted file mode 100644 index e69de29bb2d1..000000000000 diff --git a/rllib/tuned_examples/ppo/benchmark_ppo_mujoco_pb2.py b/rllib/tuned_examples/ppo/benchmark_ppo_mujoco_pb2.py deleted file mode 100644 index 51e9d2d2b3ef..000000000000 --- a/rllib/tuned_examples/ppo/benchmark_ppo_mujoco_pb2.py +++ /dev/null @@ -1,172 +0,0 @@ -import time - -from ray import tune -from ray.rllib.algorithms.ppo.ppo import PPOConfig -from ray.rllib.utils.metrics import NUM_ENV_STEPS_SAMPLED_LIFETIME -from ray.tune.schedulers.pb2 import PB2 - -# Needs the following packages to be installed on Ubuntu: -# sudo apt-get libosmesa-dev -# sudo apt-get install patchelf -# python -m pip install "gymnasium[mujoco]" -# Might need to be added to bashsrc: -# export MUJOCO_GL=osmesa" -# export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$HOME/.mujoco/mujoco200/bin" - -# See the following links for becnhmark results of other libraries: -# Original paper: https://arxiv.org/abs/1812.05905 -# CleanRL: https://wandb.ai/cleanrl/cleanrl.benchmark/reports/Mujoco--VmlldzoxODE0NjE -# AgileRL: https://github.com/AgileRL/AgileRL?tab=readme-ov-file#benchmarks -benchmark_envs = { - "HalfCheetah-v4": { - f"{NUM_ENV_STEPS_SAMPLED_LIFETIME}": 1000000, - }, - "Hopper-v4": { - f"{NUM_ENV_STEPS_SAMPLED_LIFETIME}": 1000000, - }, - "InvertedPendulum-v4": { - f"{NUM_ENV_STEPS_SAMPLED_LIFETIME}": 1000000, - }, - "InvertedDoublePendulum-v4": { - f"{NUM_ENV_STEPS_SAMPLED_LIFETIME}": 1000000, - }, - "Reacher-v4": {f"{NUM_ENV_STEPS_SAMPLED_LIFETIME}": 1000000}, - "Swimmer-v4": {f"{NUM_ENV_STEPS_SAMPLED_LIFETIME}": 1000000}, - "Walker2d-v4": { - f"{NUM_ENV_STEPS_SAMPLED_LIFETIME}": 1000000, - }, -} - -pb2_scheduler = PB2( - time_attr=f"{NUM_ENV_STEPS_SAMPLED_LIFETIME}", - metric="env_runners/episode_return_mean", - mode="max", - perturbation_interval=50000, - # Copy bottom % with top % weights. - quantile_fraction=0.25, - hyperparam_bounds={ - "lr": [1e-5, 1e-3], - "gamma": [0.95, 0.99], - "lambda": [0.97, 1.0], - "entropy_coeff": [0.0, 0.01], - "vf_loss_coeff": [0.01, 1.0], - "clip_param": [0.1, 0.3], - "kl_target": [0.01, 0.03], - "minibatch_size": [512, 4096], - "num_epochs": [6, 32], - "vf_share_layers": [False, True], - "use_kl_loss": [False, True], - "kl_coeff": [0.1, 0.4], - "vf_clip_param": [10.0, float("inf")], - "grad_clip": [40, 200], - }, -) - -experiment_start_time = time.time() -# Following the paper. -num_rollout_workers = 32 -for env, stop_criteria in benchmark_envs.items(): - hp_trial_start_time = time.time() - config = ( - PPOConfig() - .environment(env=env) - .env_runners( - rollout_fragment_length=1, - num_env_runners=num_rollout_workers, - # TODO (sven, simon): Add resources. - ) - .learners( - # Let's start with a small number of learner workers and - # add later a tune grid search for these resources. - # TODO (simon): Either add tune grid search here or make - # an extra script to only test scalability. - num_learners=1, - num_gpus_per_learner=1, - ) - # TODO (simon): Adjust to new model_config_dict. - .training( - lr=tune.uniform(1e-5, 1e-3), - gamma=tune.uniform(0.95, 0.99), - lambda_=tune.uniform(0.97, 1.0), - entropy_coeff=tune.choice([0.0, 0.01]), - vf_loss_coeff=tune.uniform(0.01, 1.0), - clip_param=tune.uniform(0.1, 0.3), - kl_target=tune.uniform(0.01, 0.03), - minibatch_size=tune.choice([512, 1024, 2048, 4096]), - num_epochs=tune.randint(6, 32), - vf_share_layers=tune.choice([True, False]), - use_kl_loss=tune.choice([True, False]), - kl_coeff=tune.uniform(0.1, 0.4), - vf_clip_param=tune.choice([10.0, 40.0, float("inf")]), - grad_clip=tune.choice([None, 40, 100, 200]), - train_batch_size=tune.sample_from( - lambda spec: spec.config["minibatch_size"] * num_rollout_workers - ), - model={ - "fcnet_hiddens": [64, 64], - "fcnet_activation": "tanh", - "vf_share_layers": True, - }, - ) - .reporting( - metrics_num_episodes_for_smoothing=5, - min_sample_timesteps_per_iteration=1000, - ) - .evaluation( - evaluation_duration="auto", - evaluation_interval=1, - evaluation_num_env_runners=1, - evaluation_parallel_to_training=True, - evaluation_config={ - # PPO learns stochastic policy. - "explore": False, - }, - ) - ) - - tuner = tune.Tuner( - "PPO", - param_space=config, - run_config=tune.RunConfig( - stop=stop_criteria, - name="benchmark_ppo_mujoco_pb2_" + env, - ), - tune_config=tune.TuneConfig( - scheduler=pb2_scheduler, - num_samples=8, - ), - ) - result_grid = tuner.fit() - best_result = result_grid.get_best_result() - print( - f"Finished running HP search for (env={env}) in " - f"{time.time() - hp_trial_start_time} seconds." - ) - print(f"Best result for {env}: {best_result}") - print(f"Best config for {env}: {best_result['config']}") - - # Run again with the best config. - best_trial_start_time = time.time() - tuner = tune.Tuner( - "PPO", - param_space=best_result.config, - run_config=tune.RunConfig( - stop=stop_criteria, - name="benchmark_ppo_mujoco_pb2_" + env + "_best", - ), - ) - print(f"Running best config for (env={env})...") - tuner.fit() - print( - f"Finished running best config for (env={env}) " - f"in {time.time() - best_trial_start_time} seconds." - ) - -print( - f"Finished running HP search on all MuJoCo benchmarks in " - f"{time.time() - experiment_start_time} seconds." -) -print( - "Results from running the best configs can be found in the " - "`benchmark_ppo_mujoco_pb2__best` directories." -) diff --git a/rllib/tuned_examples/sac/benchmark_sac_mujoco_pb2.py b/rllib/tuned_examples/sac/benchmark_sac_mujoco_pb2.py deleted file mode 100644 index f768dddf03b0..000000000000 --- a/rllib/tuned_examples/sac/benchmark_sac_mujoco_pb2.py +++ /dev/null @@ -1,165 +0,0 @@ -import time - -from ray import tune -from ray.rllib.algorithms.sac.sac import SACConfig -from ray.rllib.utils.metrics import ( - ENV_RUNNER_RESULTS, - EPISODE_RETURN_MEAN, - NUM_ENV_STEPS_SAMPLED_LIFETIME, -) -from ray.tune.schedulers.pb2 import PB2 - -# Needs the following packages to be installed on Ubuntu: -# sudo apt-get libosmesa-dev -# sudo apt-get install patchelf -# python -m pip install "gymnasium[mujoco]" -# Might need to be added to bashsrc: -# export MUJOCO_GL=osmesa" -# export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$HOME/.mujoco/mujoco200/bin" - -# See the following links for becnhmark results of other libraries: -# Original paper: https://arxiv.org/abs/1812.05905 -# CleanRL: https://wandb.ai/cleanrl/cleanrl.benchmark/reports/Mujoco--VmlldzoxODE0NjE -# AgileRL: https://github.com/AgileRL/AgileRL?tab=readme-ov-file#benchmarks -benchmark_envs = { - "HalfCheetah-v4": { - f"{NUM_ENV_STEPS_SAMPLED_LIFETIME}": 3000000, - }, - "Hopper-v4": { - f"{NUM_ENV_STEPS_SAMPLED_LIFETIME}": 1000000, - }, - "Humanoid-v4": { - f"{NUM_ENV_STEPS_SAMPLED_LIFETIME}": 10000000, - }, - "Ant-v4": {f"{NUM_ENV_STEPS_SAMPLED_LIFETIME}": 3000000}, - "Walker2d-v4": { - f"{NUM_ENV_STEPS_SAMPLED_LIFETIME}": 3000000, - }, -} - -pb2_scheduler = PB2( - time_attr=NUM_ENV_STEPS_SAMPLED_LIFETIME, - metric=f"{ENV_RUNNER_RESULTS}/{EPISODE_RETURN_MEAN}", - mode="max", - perturbation_interval=50000, - # Copy bottom % with top % weights. - quantile_fraction=0.25, - hyperparam_bounds={ - "actor_lr": [1e-5, 1e-3], - "critic_lr": [1e-6, 1e-4], - "alpha_lr": [1e-6, 1e-3], - "gamma": [0.95, 0.99], - "n_step": [1, 3], - "initial_alpha": [1.0, 1.5], - "tau": [0.001, 0.1], - "target_entropy": [-10, -1], - "train_batch_size": [128, 512], - "target_network_update_freq": [1, 4], - }, -) - -experiment_start_time = time.time() -for env, stop_criteria in benchmark_envs.items(): - hp_trial_start_time = time.time() - config = ( - SACConfig() - .environment(env=env) - .env_runners( - rollout_fragment_length="auto", - num_env_runners=1, - # TODO (sven, simon): Add resources. - ) - .learners( - # Note, we have a small batch and a sample/train ratio - # of 1:1, so a single GPU should be enough. - num_learners=1, - num_gpus_per_learner=1, - ) - # TODO (simon): Adjust to new model_config_dict. - .training( - initial_alpha=tune.choice([1.0, 1.5]), - actor_lr=tune.uniform(1e-5, 1e-3), - critic_lr=tune.uniform([1e-6, 1e-4]), - alpha_lr=tune.uniform([1e-6, 1e-3]), - target_entropy=tune.choice([-10, -5, -1, "auto"]), - n_step=tune.choice([1, 3, (1, 3)]), - tau=tune.uniform(0.001, 0.1), - train_batch_size=tune.choice([128, 256, 512]), - target_network_update_freq=tune.choice([1, 2, 4]), - replay_buffer_config={ - "type": "PrioritizedEpisodeReplayBuffer", - "capacity": 1000000, - "alpha": 0.6, - "beta": 0.4, - }, - num_steps_sampled_before_learning_starts=256, - model={ - "fcnet_hiddens": [256, 256], - "fcnet_activation": "relu", - "post_fcnet_hiddens": [], - "post_fcnet_activation": None, - "post_fcnet_weights_initializer": "orthogonal_", - "post_fcnet_weights_initializer_config": {"gain": 0.01}, - }, - ) - .reporting( - metrics_num_episodes_for_smoothing=5, - min_sample_timesteps_per_iteration=1000, - ) - .evaluation( - evaluation_duration="auto", - evaluation_interval=1, - evaluation_num_env_runners=1, - evaluation_parallel_to_training=True, - evaluation_config={ - "explore": False, - }, - ) - ) - - tuner = tune.Tuner( - "SAC", - param_space=config, - run_config=tune.RunConfig( - stop=stop_criteria, - name="benchmark_sac_mujoco_pb2_" + env, - ), - tune_config=tune.TuneConfig( - scheduler=pb2_scheduler, - num_samples=8, - ), - ) - result_grid = tuner.fit() - best_result = result_grid.get_best_result() - print( - f"Finished running HP search for (env={env}) in " - f"{time.time() - hp_trial_start_time} seconds." - ) - print(f"Best result for {env}: {best_result}") - print(f"Best config for {env}: {best_result['config']}") - - # Run again with the best config. - best_trial_start_time = time.time() - tuner = tune.Tuner( - "SAC", - param_space=best_result.config, - run_config=tune.RunConfig( - stop=stop_criteria, - name="benchmark_sac_mujoco_pb2_" + env + "_best", - ), - ) - print(f"Running best config for (env={env})...") - tuner.fit() - print( - f"Finished running best config for (env={env}) " - f"in {time.time() - best_trial_start_time} seconds." - ) - -print( - f"Finished running HP search on all MuJoCo benchmarks in " - f"{time.time() - experiment_start_time} seconds." -) -print( - "Results from running the best configs can be found in the " - "`benchmark_sac_mujoco_pb2__best` directories." -) diff --git a/rllib/utils/tests/run_memory_leak_tests.py b/rllib/utils/tests/run_memory_leak_tests.py index 8685049fb03d..598026d76a38 100644 --- a/rllib/utils/tests/run_memory_leak_tests.py +++ b/rllib/utils/tests/run_memory_leak_tests.py @@ -11,9 +11,9 @@ # tags = ["memory_leak_tests"], # size = "medium", # 5min timeout # srcs = ["tests/test_memory_leak.py"], -# data = glob(["tuned_examples/ppo/*.yaml"]), +# data = glob(["examples/algorithms/ppo/*.yaml"]), # # Pass `BAZEL` option and the path to look for yaml files. -# args = ["BAZEL", "tuned_examples/ppo/memory-leak-test-ppo.yaml"] +# args = ["BAZEL", "examples/algorithms/ppo/memory-leak-test-ppo.yaml"] # ) import argparse