diff --git a/release/release_tests.yaml b/release/release_tests.yaml
index d19fae81bdbb..1284e07b48a0 100644
--- a/release/release_tests.yaml
+++ b/release/release_tests.yaml
@@ -2029,7 +2029,7 @@
run:
timeout: 1500 # expected 1000 seconds
- script: python tuned_examples/appo/pong_appo.py --num-learners=1 --num-env-runners=12 --as-release-test
+ script: python example_algorithms/appo/pong_appo.py --num-learners=1 --num-env-runners=12 --as-release-test
- name: rllib_learning_tests_halfcheetah_appo_torch
python: "3.12"
@@ -2047,7 +2047,7 @@
run:
timeout: 3000 # expected 2000 seconds
- script: python tuned_examples/appo/halfcheetah_appo.py --num-learners=1 --num-env-runners=12 --as-release-test
+ script: python example_algorithms/appo/halfcheetah_appo.py --num-learners=1 --num-env-runners=12 --as-release-test
########################
# Core Nightly Tests
diff --git a/release/rllib_tests/example_algorithms b/release/rllib_tests/example_algorithms
new file mode 120000
index 000000000000..64b332356b30
--- /dev/null
+++ b/release/rllib_tests/example_algorithms
@@ -0,0 +1 @@
+../../rllib/examples/algorithms
\ No newline at end of file
diff --git a/release/rllib_tests/tuned_examples b/release/rllib_tests/tuned_examples
deleted file mode 120000
index 6258eb4f971f..000000000000
--- a/release/rllib_tests/tuned_examples
+++ /dev/null
@@ -1 +0,0 @@
-../../rllib/tuned_examples
\ No newline at end of file
diff --git a/rllib/BUILD.bazel b/rllib/BUILD.bazel
index 411c0997aa2e..f2a2ef3d2a94 100644
--- a/rllib/BUILD.bazel
+++ b/rllib/BUILD.bazel
@@ -72,8 +72,6 @@ doctest(
"**/examples/**",
"**/tests/**",
"**/test_*.py",
- # Exclude `tuned_examples` *.py files.
- "**/tuned_examples/**",
# Deprecated modules
"utils/memory.py",
"offline/off_policy_estimator.py",
@@ -150,7 +148,7 @@ py_test(
# Tag: learning_tests
#
# This will test python/yaml config files
-# inside rllib/tuned_examples/[algo-name] for actual learning success.
+# inside rllib/examples/algorithms/[algo-name] for actual learning success.
# --------------------------------------------------------------------
# APPO
@@ -158,13 +156,13 @@ py_test(
py_test(
name = "learning_tests_cartpole_appo",
size = "large",
- srcs = ["tuned_examples/appo/cartpole_appo.py"],
+ srcs = ["examples/algorithms/appo/cartpole_appo.py"],
args = [
"--as-test",
"--num-cpus=7",
"--num-env-runners=5",
],
- main = "tuned_examples/appo/cartpole_appo.py",
+ main = "examples/algorithms/appo/cartpole_appo.py",
tags = [
"exclusive",
"learning_tests",
@@ -174,26 +172,17 @@ py_test(
],
)
-# TODO (sven): For some weird reason, this test runs extremely slow on the CI (not on cluster, not locally) -> taking this out for now ...
-# py_test(
-# name = "learning_tests_cartpole_appo_gpu",
-# main = "tuned_examples/appo/cartpole_appo.py",
-# tags = ["team:rllib", "exclusive", "learning_tests", "torch_only", "learning_tests_discrete", "learning_tests_pytorch_use_all_core", "gpu"],
-# size = "large",
-# srcs = ["tuned_examples/appo/cartpole_appo.py"],
-# args = ["--as-test", "--num-gpus-per-learner=1", "--num-cpus=7", "--num-env-runners=5"]
-# )
py_test(
name = "learning_tests_cartpole_appo_multi_cpu",
size = "large",
- srcs = ["tuned_examples/appo/cartpole_appo.py"],
+ srcs = ["examples/algorithms/appo/cartpole_appo.py"],
args = [
"--as-test",
"--num-learners=2",
"--num-cpus=9",
"--num-env-runners=6",
],
- main = "tuned_examples/appo/cartpole_appo.py",
+ main = "examples/algorithms/appo/cartpole_appo.py",
tags = [
"exclusive",
"learning_tests",
@@ -207,7 +196,7 @@ py_test(
py_test(
name = "learning_tests_cartpole_appo_multi_gpu",
size = "large",
- srcs = ["tuned_examples/appo/cartpole_appo.py"],
+ srcs = ["examples/algorithms/appo/cartpole_appo.py"],
args = [
"--as-test",
"--num-learners=2",
@@ -215,7 +204,7 @@ py_test(
"--num-cpus=7",
"--num-env-runners=6",
],
- main = "tuned_examples/appo/cartpole_appo.py",
+ main = "examples/algorithms/appo/cartpole_appo.py",
tags = [
"exclusive",
"learning_tests",
@@ -231,14 +220,14 @@ py_test(
py_test(
name = "learning_tests_multi_agent_cartpole_appo",
size = "large",
- srcs = ["tuned_examples/appo/multi_agent_cartpole_appo.py"],
+ srcs = ["examples/algorithms/appo/multi_agent_cartpole_appo.py"],
args = [
"--as-test",
"--num-agents=2",
"--num-cpus=8",
"--num-env-runners=6",
],
- main = "tuned_examples/appo/multi_agent_cartpole_appo.py",
+ main = "examples/algorithms/appo/multi_agent_cartpole_appo.py",
tags = [
"exclusive",
"learning_tests",
@@ -251,7 +240,7 @@ py_test(
py_test(
name = "learning_tests_multi_agent_cartpole_appo_gpu",
size = "large",
- srcs = ["tuned_examples/appo/multi_agent_cartpole_appo.py"],
+ srcs = ["examples/algorithms/appo/multi_agent_cartpole_appo.py"],
args = [
"--as-test",
"--num-agents=2",
@@ -259,7 +248,7 @@ py_test(
"--num-cpus=7",
"--num-env-runners=5",
],
- main = "tuned_examples/appo/multi_agent_cartpole_appo.py",
+ main = "examples/algorithms/appo/multi_agent_cartpole_appo.py",
tags = [
"exclusive",
"gpu",
@@ -274,7 +263,7 @@ py_test(
py_test(
name = "learning_tests_multi_agent_cartpole_appo_multi_cpu",
size = "large",
- srcs = ["tuned_examples/appo/multi_agent_cartpole_appo.py"],
+ srcs = ["examples/algorithms/appo/multi_agent_cartpole_appo.py"],
args = [
"--as-test",
"--num-agents=2",
@@ -282,7 +271,7 @@ py_test(
"--num-cpus=9",
"--num-env-runners=6",
],
- main = "tuned_examples/appo/multi_agent_cartpole_appo.py",
+ main = "examples/algorithms/appo/multi_agent_cartpole_appo.py",
tags = [
"exclusive",
"learning_tests",
@@ -298,7 +287,7 @@ py_test(
py_test(
name = "learning_tests_multi_agent_cartpole_appo_multi_gpu",
size = "large",
- srcs = ["tuned_examples/appo/multi_agent_cartpole_appo.py"],
+ srcs = ["examples/algorithms/appo/multi_agent_cartpole_appo.py"],
args = [
"--as-test",
"--num-agents=2",
@@ -307,7 +296,7 @@ py_test(
"--num-cpus=7",
"--num-env-runners=6",
],
- main = "tuned_examples/appo/multi_agent_cartpole_appo.py",
+ main = "examples/algorithms/appo/multi_agent_cartpole_appo.py",
tags = [
"exclusive",
"learning_tests",
@@ -323,13 +312,13 @@ py_test(
py_test(
name = "learning_tests_stateless_cartpole_appo",
size = "large",
- srcs = ["tuned_examples/appo/stateless_cartpole_appo.py"],
+ srcs = ["examples/algorithms/appo/stateless_cartpole_appo.py"],
args = [
"--as-test",
"--num-cpus=8",
"--num-env-runners=6",
],
- main = "tuned_examples/appo/stateless_cartpole_appo.py",
+ main = "examples/algorithms/appo/stateless_cartpole_appo.py",
tags = [
"exclusive",
"learning_tests",
@@ -343,7 +332,7 @@ py_test(
py_test(
name = "learning_tests_stateless_cartpole_appo_gpu",
size = "large",
- srcs = ["tuned_examples/appo/stateless_cartpole_appo.py"],
+ srcs = ["examples/algorithms/appo/stateless_cartpole_appo.py"],
args = [
"--as-test",
"--num-agents=2",
@@ -351,7 +340,7 @@ py_test(
"--num-cpus=7",
"--num-env-runners=5",
],
- main = "tuned_examples/appo/stateless_cartpole_appo.py",
+ main = "examples/algorithms/appo/stateless_cartpole_appo.py",
tags = [
"exclusive",
"gpu",
@@ -366,14 +355,14 @@ py_test(
py_test(
name = "learning_tests_stateless_cartpole_appo_multi_cpu",
size = "large",
- srcs = ["tuned_examples/appo/stateless_cartpole_appo.py"],
+ srcs = ["examples/algorithms/appo/stateless_cartpole_appo.py"],
args = [
"--as-test",
"--num-learners=2",
"--num-cpus=9",
"--num-env-runners=6",
],
- main = "tuned_examples/appo/stateless_cartpole_appo.py",
+ main = "examples/algorithms/appo/stateless_cartpole_appo.py",
tags = [
"exclusive",
"learning_tests",
@@ -387,7 +376,7 @@ py_test(
py_test(
name = "learning_tests_stateless_cartpole_appo_multi_gpu",
size = "large",
- srcs = ["tuned_examples/appo/stateless_cartpole_appo.py"],
+ srcs = ["examples/algorithms/appo/stateless_cartpole_appo.py"],
args = [
"--as-test",
"--num-learners=2",
@@ -395,7 +384,7 @@ py_test(
"--num-cpus=7",
"--num-env-runners=6",
],
- main = "tuned_examples/appo/stateless_cartpole_appo.py",
+ main = "examples/algorithms/appo/stateless_cartpole_appo.py",
tags = [
"exclusive",
"learning_tests",
@@ -407,50 +396,17 @@ py_test(
],
)
-# MultiAgentStatelessCartPole
-# py_test(
-# name = "learning_tests_multi_agent_stateless_cartpole_appo",
-# main = "tuned_examples/appo/multi_agent_stateless_cartpole_appo.py",
-# tags = ["team:rllib", "exclusive", "learning_tests", "torch_only", "learning_tests_discrete", "learning_tests_pytorch_use_all_core"],
-# size = "large",
-# srcs = ["tuned_examples/appo/multi_agent_stateless_cartpole_appo.py"],
-# args = ["--as-test"]
-# )
-# py_test(
-# name = "learning_tests_multi_agent_stateless_cartpole_appo_gpu",
-# main = "tuned_examples/appo/multi_agent_stateless_cartpole_appo.py",
-# tags = ["team:rllib", "exclusive", "learning_tests", "torch_only", "learning_tests_discrete", "learning_tests_pytorch_use_all_core", "gpu"],
-# size = "large",
-# srcs = ["tuned_examples/appo/multi_agent_stateless_cartpole_appo.py"],
-# args = ["--as-test", "--num-agents=2", "--num-gpus-per-learner=1"]
-# )
-# py_test(
-# name = "learning_tests_multi_agent_stateless_cartpole_appo_multi_cpu",
-# main = "tuned_examples/appo/multi_agent_stateless_cartpole_appo.py",
-# tags = ["team:rllib", "exclusive", "learning_tests", "torch_only", "learning_tests_discrete", "learning_tests_pytorch_use_all_core"],
-# size = "large",
-# srcs = ["tuned_examples/appo/multi_agent_stateless_cartpole_appo.py"],
-# args = ["--as-test", "--num-learners=2"]
-# )
-# py_test(
-# name = "learning_tests_multi_agent_stateless_cartpole_appo_multi_gpu",
-# main = "tuned_examples/appo/multi_agent_stateless_cartpole_appo.py",
-# tags = ["team:rllib", "exclusive", "learning_tests", "torch_only", "learning_tests_discrete", "learning_tests_pytorch_use_all_core", "multi_gpu"],
-# size = "large",
-# srcs = ["tuned_examples/appo/multi_agent_stateless_cartpole_appo.py"],
-# args = ["--as-test", "--num-learners=2", "--num-gpus-per-learner=1"]
-# )
# Pendulum
py_test(
name = "learning_tests_pendulum_appo",
size = "large",
- srcs = ["tuned_examples/appo/pendulum_appo.py"],
+ srcs = ["examples/algorithms/appo/pendulum_appo.py"],
args = [
"--as-test",
"--num-cpus=6",
"--num-env-runners=4",
],
- main = "tuned_examples/appo/pendulum_appo.py",
+ main = "examples/algorithms/appo/pendulum_appo.py",
tags = [
"exclusive",
"learning_tests",
@@ -464,7 +420,7 @@ py_test(
py_test(
name = "learning_tests_multi_agent_pong_appo_multi_gpu",
size = "large",
- srcs = ["tuned_examples/appo/multi_agent_pong_appo.py"],
+ srcs = ["examples/algorithms/appo/multi_agent_pong_appo.py"],
args = [
"--stop-iters=3",
"--num-agents=2",
@@ -472,7 +428,7 @@ py_test(
"--num-gpus-per-learner=1",
"--num-aggregator-actors-per-learner=1",
],
- main = "tuned_examples/appo/multi_agent_pong_appo.py",
+ main = "examples/algorithms/appo/multi_agent_pong_appo.py",
tags = [
"exclusive",
"learning_tests",
@@ -483,29 +439,12 @@ py_test(
],
)
-#@OldAPIStack
-py_test(
- name = "learning_tests_multi_agent_cartpole_w_100_policies_appo_old_api_stack",
- size = "large",
- srcs = ["algorithms/tests/run_regression_tests.py"],
- args = ["--dir=../tuned_examples/appo"],
- data = ["tuned_examples/appo/multi-agent-cartpole-w-100-policies-appo.py"],
- main = "algorithms/tests/run_regression_tests.py",
- tags = [
- "exclusive",
- "learning_tests",
- "learning_tests_discrete",
- "learning_tests_pytorch_use_all_core",
- "team:rllib",
- ],
-)
-
# BC
# CartPole
py_test(
name = "learning_tests_cartpole_bc",
size = "medium",
- srcs = ["tuned_examples/bc/cartpole_bc.py"],
+ srcs = ["examples/algorithms/bc/cartpole_bc.py"],
args = [
"--as-test",
],
@@ -513,7 +452,7 @@ py_test(
data = [
"offline/tests/data/cartpole/cartpole-v1_large",
],
- main = "tuned_examples/bc/cartpole_bc.py",
+ main = "examples/algorithms/bc/cartpole_bc.py",
tags = [
"exclusive",
"learning_tests",
@@ -527,7 +466,7 @@ py_test(
py_test(
name = "learning_tests_cartpole_bc_gpu",
size = "medium",
- srcs = ["tuned_examples/bc/cartpole_bc.py"],
+ srcs = ["examples/algorithms/bc/cartpole_bc.py"],
args = [
"--as-test",
"--num-gpus-per-learner=1",
@@ -536,7 +475,7 @@ py_test(
data = [
"offline/tests/data/cartpole/cartpole-v1_large",
],
- main = "tuned_examples/bc/cartpole_bc.py",
+ main = "examples/algorithms/bc/cartpole_bc.py",
tags = [
"exclusive",
"gpu",
@@ -553,7 +492,7 @@ py_test(
py_test(
name = "learning_tests_cartpole_bc_with_offline_evaluation",
size = "medium",
- srcs = ["tuned_examples/bc/cartpole_bc_with_offline_evaluation.py"],
+ srcs = ["examples/algorithms/bc/cartpole_bc_with_offline_evaluation.py"],
args = [
"--as-test",
"--offline-evaluation-interval=1",
@@ -563,7 +502,7 @@ py_test(
data = [
"offline/tests/data/cartpole/cartpole-v1_large",
],
- main = "tuned_examples/bc/cartpole_bc_with_offline_evaluation.py",
+ main = "examples/algorithms/bc/cartpole_bc_with_offline_evaluation.py",
tags = [
"exclusive",
"learning_tests",
@@ -577,7 +516,7 @@ py_test(
py_test(
name = "learning_tests_cartpole_bc_with_offline_evaluation_gpu",
size = "medium",
- srcs = ["tuned_examples/bc/cartpole_bc_with_offline_evaluation.py"],
+ srcs = ["examples/algorithms/bc/cartpole_bc_with_offline_evaluation.py"],
args = [
"--as-test",
"--num-gpus-per-learner=1",
@@ -589,7 +528,7 @@ py_test(
data = [
"offline/tests/data/cartpole/cartpole-v1_large",
],
- main = "tuned_examples/bc/cartpole_bc_with_offline_evaluation.py",
+ main = "examples/algorithms/bc/cartpole_bc_with_offline_evaluation.py",
tags = [
"exclusive",
"learning_tests",
@@ -606,7 +545,7 @@ py_test(
py_test(
name = "learning_tests_pendulum_cql",
size = "large",
- srcs = ["tuned_examples/cql/pendulum_cql.py"],
+ srcs = ["examples/algorithms/cql/pendulum_cql.py"],
args = [
"--as-test",
],
@@ -614,7 +553,7 @@ py_test(
data = [
"offline/tests/data/pendulum/pendulum-v1_enormous",
],
- main = "tuned_examples/cql/pendulum_cql.py",
+ main = "examples/algorithms/cql/pendulum_cql.py",
tags = [
"exclusive",
"learning_tests",
@@ -632,7 +571,7 @@ py_test(
py_test(
name = "learning_tests_pendulum_cql_gpu",
size = "large",
- srcs = ["tuned_examples/cql/pendulum_cql.py"],
+ srcs = ["examples/algorithms/cql/pendulum_cql.py"],
args = [
"--as-test",
"--num-gpus-per-learner=1",
@@ -641,7 +580,7 @@ py_test(
data = [
"offline/tests/data/pendulum/pendulum-v1_enormous",
],
- main = "tuned_examples/cql/pendulum_cql.py",
+ main = "examples/algorithms/cql/pendulum_cql.py",
tags = [
"exclusive",
"gpu",
@@ -661,11 +600,11 @@ py_test(
py_test(
name = "learning_tests_cartpole_dqn",
size = "large",
- srcs = ["tuned_examples/dqn/cartpole_dqn.py"],
+ srcs = ["examples/algorithms/dqn/cartpole_dqn.py"],
args = [
"--as-test",
],
- main = "tuned_examples/dqn/cartpole_dqn.py",
+ main = "examples/algorithms/dqn/cartpole_dqn.py",
tags = [
"exclusive",
"learning_tests",
@@ -679,13 +618,13 @@ py_test(
py_test(
name = "learning_tests_cartpole_dqn_gpu",
size = "large",
- srcs = ["tuned_examples/dqn/cartpole_dqn.py"],
+ srcs = ["examples/algorithms/dqn/cartpole_dqn.py"],
args = [
"--as-test",
"--num-learners=1",
"--num-gpus-per-learner=1",
],
- main = "tuned_examples/dqn/cartpole_dqn.py",
+ main = "examples/algorithms/dqn/cartpole_dqn.py",
tags = [
"exclusive",
"gpu",
@@ -700,12 +639,12 @@ py_test(
py_test(
name = "learning_tests_cartpole_dqn_multi_cpu",
size = "large",
- srcs = ["tuned_examples/dqn/cartpole_dqn.py"],
+ srcs = ["examples/algorithms/dqn/cartpole_dqn.py"],
args = [
"--as-test",
"--num-learners=2",
],
- main = "tuned_examples/dqn/cartpole_dqn.py",
+ main = "examples/algorithms/dqn/cartpole_dqn.py",
tags = [
"exclusive",
"learning_tests",
@@ -719,13 +658,13 @@ py_test(
py_test(
name = "learning_tests_cartpole_dqn_multi_gpu",
size = "large",
- srcs = ["tuned_examples/dqn/cartpole_dqn.py"],
+ srcs = ["examples/algorithms/dqn/cartpole_dqn.py"],
args = [
"--as-test",
"--num-learners=2",
"--num-gpus-per-learner=1",
],
- main = "tuned_examples/dqn/cartpole_dqn.py",
+ main = "examples/algorithms/dqn/cartpole_dqn.py",
tags = [
"exclusive",
"learning_tests",
@@ -743,13 +682,13 @@ py_test(
py_test(
name = "learning_tests_multi_agent_cartpole_dqn",
size = "large",
- srcs = ["tuned_examples/dqn/multi_agent_cartpole_dqn.py"],
+ srcs = ["examples/algorithms/dqn/multi_agent_cartpole_dqn.py"],
args = [
"--as-test",
"--num-agents=2",
"--num-cpus=4",
],
- main = "tuned_examples/dqn/multi_agent_cartpole_dqn.py",
+ main = "examples/algorithms/dqn/multi_agent_cartpole_dqn.py",
tags = [
"exclusive",
"learning_tests",
@@ -763,7 +702,7 @@ py_test(
py_test(
name = "learning_tests_multi_agent_cartpole_dqn_gpu",
size = "large",
- srcs = ["tuned_examples/dqn/multi_agent_cartpole_dqn.py"],
+ srcs = ["examples/algorithms/dqn/multi_agent_cartpole_dqn.py"],
args = [
"--as-test",
"--num-agents=2",
@@ -771,7 +710,7 @@ py_test(
"--num-learners=1",
"--num-gpus-per-learner=1",
],
- main = "tuned_examples/dqn/multi_agent_cartpole_dqn.py",
+ main = "examples/algorithms/dqn/multi_agent_cartpole_dqn.py",
tags = [
"exclusive",
"gpu",
@@ -786,14 +725,14 @@ py_test(
py_test(
name = "learning_tests_multi_agent_cartpole_dqn_multi_cpu",
size = "large",
- srcs = ["tuned_examples/dqn/multi_agent_cartpole_dqn.py"],
+ srcs = ["examples/algorithms/dqn/multi_agent_cartpole_dqn.py"],
args = [
"--as-test",
"--num-agents=2",
"--num-cpus=5",
"--num-learners=2",
],
- main = "tuned_examples/dqn/multi_agent_cartpole_dqn.py",
+ main = "examples/algorithms/dqn/multi_agent_cartpole_dqn.py",
tags = [
"exclusive",
"learning_tests",
@@ -807,7 +746,7 @@ py_test(
py_test(
name = "learning_tests_multi_agent_cartpole_dqn_multi_gpu",
size = "large",
- srcs = ["tuned_examples/dqn/multi_agent_cartpole_dqn.py"],
+ srcs = ["examples/algorithms/dqn/multi_agent_cartpole_dqn.py"],
args = [
"--as-test",
"--num-agents=2",
@@ -815,7 +754,7 @@ py_test(
"--num-learners=2",
"--num-gpus-per-learner=1",
],
- main = "tuned_examples/dqn/multi_agent_cartpole_dqn.py",
+ main = "examples/algorithms/dqn/multi_agent_cartpole_dqn.py",
tags = [
"exclusive",
"learning_tests",
@@ -827,41 +766,16 @@ py_test(
],
)
-# DreamerV3
-# takes too long (up to 20-30min to learn -200 on 1 GPU)
-# Pendulum
-# py_test(
-# name = "learning_tests_pendulum_dreamerv3_gpu",
-# size = "large",
-# srcs = ["tuned_examples/dreamerv3/pendulum_dreamerv3.py"],
-# args = [
-# "--as-test",
-# "--num-gpus-per-learner=1",
-# "--num-learners=1",
-# "--num-env-runners=4",
-# ],
-# main = "tuned_examples/dreamerv3/pendulum_dreamerv3.py",
-# tags = [
-# "exclusive",
-# "gpu",
-# "learning_tests",
-# "learning_tests_continuous",
-# "learning_tests_pytorch_use_all_core",
-# "team:rllib",
-# "torch_only",
-# ],
-# )
-
# IMPALA
# CartPole
py_test(
name = "learning_tests_cartpole_impala",
size = "large",
- srcs = ["tuned_examples/impala/cartpole_impala.py"],
+ srcs = ["examples/algorithms/impala/cartpole_impala.py"],
args = [
"--as-test",
],
- main = "tuned_examples/impala/cartpole_impala.py",
+ main = "examples/algorithms/impala/cartpole_impala.py",
tags = [
"exclusive",
"learning_tests",
@@ -874,12 +788,12 @@ py_test(
py_test(
name = "learning_tests_cartpole_impala_gpu",
size = "large",
- srcs = ["tuned_examples/impala/cartpole_impala.py"],
+ srcs = ["examples/algorithms/impala/cartpole_impala.py"],
args = [
"--as-test",
"--num-gpus-per-learner=1",
],
- main = "tuned_examples/impala/cartpole_impala.py",
+ main = "examples/algorithms/impala/cartpole_impala.py",
tags = [
"exclusive",
"gpu",
@@ -894,12 +808,12 @@ py_test(
py_test(
name = "learning_tests_cartpole_impala_multi_cpu",
size = "large",
- srcs = ["tuned_examples/impala/cartpole_impala.py"],
+ srcs = ["examples/algorithms/impala/cartpole_impala.py"],
args = [
"--as-test",
"--num-learners=2",
],
- main = "tuned_examples/impala/cartpole_impala.py",
+ main = "examples/algorithms/impala/cartpole_impala.py",
tags = [
"exclusive",
"learning_tests",
@@ -913,13 +827,13 @@ py_test(
py_test(
name = "learning_tests_cartpole_impala_multi_gpu",
size = "large",
- srcs = ["tuned_examples/impala/cartpole_impala.py"],
+ srcs = ["examples/algorithms/impala/cartpole_impala.py"],
args = [
"--as-test",
"--num-learners=2",
"--num-gpus-per-learner=1",
],
- main = "tuned_examples/impala/cartpole_impala.py",
+ main = "examples/algorithms/impala/cartpole_impala.py",
tags = [
"exclusive",
"learning_tests",
@@ -935,13 +849,13 @@ py_test(
py_test(
name = "learning_tests_multi_agent_cartpole_impala",
size = "large",
- srcs = ["tuned_examples/impala/multi_agent_cartpole_impala.py"],
+ srcs = ["examples/algorithms/impala/multi_agent_cartpole_impala.py"],
args = [
"--as-test",
"--num-agents=2",
"--num-cpus=6",
],
- main = "tuned_examples/impala/multi_agent_cartpole_impala.py",
+ main = "examples/algorithms/impala/multi_agent_cartpole_impala.py",
tags = [
"exclusive",
"learning_tests",
@@ -955,14 +869,14 @@ py_test(
py_test(
name = "learning_tests_multi_agent_cartpole_impala_gpu",
size = "large",
- srcs = ["tuned_examples/impala/multi_agent_cartpole_impala.py"],
+ srcs = ["examples/algorithms/impala/multi_agent_cartpole_impala.py"],
args = [
"--as-test",
"--num-agents=2",
"--num-gpus-per-learner=1",
"--num-cpus=6",
],
- main = "tuned_examples/impala/multi_agent_cartpole_impala.py",
+ main = "examples/algorithms/impala/multi_agent_cartpole_impala.py",
tags = [
"exclusive",
"gpu",
@@ -977,14 +891,14 @@ py_test(
py_test(
name = "learning_tests_multi_agent_cartpole_impala_multi_cpu",
size = "large",
- srcs = ["tuned_examples/impala/multi_agent_cartpole_impala.py"],
+ srcs = ["examples/algorithms/impala/multi_agent_cartpole_impala.py"],
args = [
"--as-test",
"--num-agents=2",
"--num-learners=2",
"--num-cpus=7",
],
- main = "tuned_examples/impala/multi_agent_cartpole_impala.py",
+ main = "examples/algorithms/impala/multi_agent_cartpole_impala.py",
tags = [
"exclusive",
"learning_tests",
@@ -998,7 +912,7 @@ py_test(
py_test(
name = "learning_tests_multi_agent_cartpole_impala_multi_gpu",
size = "large",
- srcs = ["tuned_examples/impala/multi_agent_cartpole_impala.py"],
+ srcs = ["examples/algorithms/impala/multi_agent_cartpole_impala.py"],
args = [
"--as-test",
"--num-agents=2",
@@ -1006,7 +920,7 @@ py_test(
"--num-gpus-per-learner=1",
"--num-cpus=7",
],
- main = "tuned_examples/impala/multi_agent_cartpole_impala.py",
+ main = "examples/algorithms/impala/multi_agent_cartpole_impala.py",
tags = [
"exclusive",
"learning_tests",
@@ -1022,11 +936,11 @@ py_test(
py_test(
name = "learning_tests_stateless_cartpole_impala",
size = "large",
- srcs = ["tuned_examples/impala/stateless_cartpole_impala.py"],
+ srcs = ["examples/algorithms/impala/stateless_cartpole_impala.py"],
args = [
"--as-test",
],
- main = "tuned_examples/impala/stateless_cartpole_impala.py",
+ main = "examples/algorithms/impala/stateless_cartpole_impala.py",
tags = [
"exclusive",
"learning_tests",
@@ -1040,13 +954,13 @@ py_test(
py_test(
name = "learning_tests_stateless_cartpole_impala_multi_gpu",
size = "large",
- srcs = ["tuned_examples/impala/stateless_cartpole_impala.py"],
+ srcs = ["examples/algorithms/impala/stateless_cartpole_impala.py"],
args = [
"--as-test",
"--num-learners=2",
"--num-gpus-per-learner=1",
],
- main = "tuned_examples/impala/stateless_cartpole_impala.py",
+ main = "examples/algorithms/impala/stateless_cartpole_impala.py",
tags = [
"exclusive",
"learning_tests",
@@ -1062,11 +976,11 @@ py_test(
py_test(
name = "learning_tests_multi_agent_stateless_cartpole_impala",
size = "large",
- srcs = ["tuned_examples/impala/multi_agent_stateless_cartpole_impala.py"],
+ srcs = ["examples/algorithms/impala/multi_agent_stateless_cartpole_impala.py"],
args = [
"--as-test",
],
- main = "tuned_examples/impala/multi_agent_stateless_cartpole_impala.py",
+ main = "examples/algorithms/impala/multi_agent_stateless_cartpole_impala.py",
tags = [
"exclusive",
"learning_tests",
@@ -1076,21 +990,13 @@ py_test(
"torch_only",
],
)
-# py_test(
-# name = "learning_tests_multi_agent_stateless_cartpole_impala_multi_gpu",
-# main = "tuned_examples/impala/multi_agent_stateless_cartpole_impala.py",
-# tags = ["team:rllib", "exclusive", "learning_tests", "torch_only", "learning_tests_discrete", "learning_tests_pytorch_use_all_core", "multi_gpu"],
-# size = "large",
-# srcs = ["tuned_examples/impala/multi_agent_stateless_cartpole_impala.py"],
-# args = ["--as-test", "--num-learners=2", "--num-gpus-per-learner=1"]
-# )
# IQL
# Pendulum-v1 (enormous)
py_test(
name = "learning_tests_pendulum_iql",
size = "large",
- srcs = ["tuned_examples/iql/pendulum_iql.py"],
+ srcs = ["examples/algorithms/iql/pendulum_iql.py"],
args = [
"--as-test",
"--num-cpus=32",
@@ -1099,7 +1005,7 @@ py_test(
data = [
"offline/tests/data/pendulum/pendulum-v1_enormous",
],
- main = "tuned_examples/iql/pendulum_iql.py",
+ main = "examples/algorithms/iql/pendulum_iql.py",
tags = [
"exclusive",
"learning_tests",
@@ -1114,7 +1020,7 @@ py_test(
py_test(
name = "learning_tests_pendulum_iql_gpu",
size = "large",
- srcs = ["tuned_examples/iql/pendulum_iql.py"],
+ srcs = ["examples/algorithms/iql/pendulum_iql.py"],
args = [
"--as-test",
"--num-cpus=32",
@@ -1124,7 +1030,7 @@ py_test(
data = [
"offline/tests/data/pendulum/pendulum-v1_enormous",
],
- main = "tuned_examples/iql/pendulum_iql.py",
+ main = "examples/algorithms/iql/pendulum_iql.py",
tags = [
"exclusive",
"gpu",
@@ -1141,7 +1047,7 @@ py_test(
py_test(
name = "learning_tests_cartpole_marwil",
size = "large",
- srcs = ["tuned_examples/marwil/cartpole_marwil.py"],
+ srcs = ["examples/algorithms/marwil/cartpole_marwil.py"],
args = [
"--as-test",
],
@@ -1149,7 +1055,7 @@ py_test(
data = [
"offline/tests/data/cartpole/cartpole-v1_large",
],
- main = "tuned_examples/marwil/cartpole_marwil.py",
+ main = "examples/algorithms/marwil/cartpole_marwil.py",
tags = [
"exclusive",
"learning_tests",
@@ -1164,7 +1070,7 @@ py_test(
py_test(
name = "learning_tests_cartpole_marwil_gpu",
size = "large",
- srcs = ["tuned_examples/marwil/cartpole_marwil.py"],
+ srcs = ["examples/algorithms/marwil/cartpole_marwil.py"],
args = [
"--as-test",
"--num-gpus-per-learner=1",
@@ -1173,7 +1079,7 @@ py_test(
data = [
"offline/tests/data/cartpole/cartpole-v1_large",
],
- main = "tuned_examples/marwil/cartpole_marwil.py",
+ main = "examples/algorithms/marwil/cartpole_marwil.py",
tags = [
"exclusive",
"gpu",
@@ -1190,11 +1096,11 @@ py_test(
py_test(
name = "learning_tests_cartpole_ppo",
size = "large",
- srcs = ["tuned_examples/ppo/cartpole_ppo.py"],
+ srcs = ["examples/algorithms/ppo/cartpole_ppo.py"],
args = [
"--as-test",
],
- main = "tuned_examples/ppo/cartpole_ppo.py",
+ main = "examples/algorithms/ppo/cartpole_ppo.py",
tags = [
"exclusive",
"learning_tests",
@@ -1207,13 +1113,13 @@ py_test(
py_test(
name = "learning_tests_cartpole_ppo_gpu",
size = "large",
- srcs = ["tuned_examples/ppo/cartpole_ppo.py"],
+ srcs = ["examples/algorithms/ppo/cartpole_ppo.py"],
args = [
"--as-test",
"--num-learners=1",
"--num-gpus-per-learner=1",
],
- main = "tuned_examples/ppo/cartpole_ppo.py",
+ main = "examples/algorithms/ppo/cartpole_ppo.py",
tags = [
"exclusive",
"gpu",
@@ -1228,12 +1134,12 @@ py_test(
py_test(
name = "learning_tests_cartpole_ppo_multi_cpu",
size = "large",
- srcs = ["tuned_examples/ppo/cartpole_ppo.py"],
+ srcs = ["examples/algorithms/ppo/cartpole_ppo.py"],
args = [
"--as-test",
"--num-learners=2",
],
- main = "tuned_examples/ppo/cartpole_ppo.py",
+ main = "examples/algorithms/ppo/cartpole_ppo.py",
tags = [
"exclusive",
"learning_tests",
@@ -1247,13 +1153,13 @@ py_test(
py_test(
name = "learning_tests_cartpole_ppo_multi_gpu",
size = "large",
- srcs = ["tuned_examples/ppo/cartpole_ppo.py"],
+ srcs = ["examples/algorithms/ppo/cartpole_ppo.py"],
args = [
"--as-test",
"--num-learners=2",
"--num-gpus-per-learner=1",
],
- main = "tuned_examples/ppo/cartpole_ppo.py",
+ main = "examples/algorithms/ppo/cartpole_ppo.py",
tags = [
"exclusive",
"learning_tests",
@@ -1269,12 +1175,12 @@ py_test(
py_test(
name = "learning_tests_multi_agent_cartpole_ppo",
size = "large",
- srcs = ["tuned_examples/ppo/multi_agent_cartpole_ppo.py"],
+ srcs = ["examples/algorithms/ppo/multi_agent_cartpole_ppo.py"],
args = [
"--as-test",
"--num-agents=2",
],
- main = "tuned_examples/ppo/multi_agent_cartpole_ppo.py",
+ main = "examples/algorithms/ppo/multi_agent_cartpole_ppo.py",
tags = [
"exclusive",
"learning_tests",
@@ -1287,14 +1193,14 @@ py_test(
py_test(
name = "learning_tests_multi_agent_cartpole_ppo_gpu",
size = "large",
- srcs = ["tuned_examples/ppo/multi_agent_cartpole_ppo.py"],
+ srcs = ["examples/algorithms/ppo/multi_agent_cartpole_ppo.py"],
args = [
"--as-test",
"--num-agents=2",
"--num-learners=1",
"--num-gpus-per-learner=1",
],
- main = "tuned_examples/ppo/multi_agent_cartpole_ppo.py",
+ main = "examples/algorithms/ppo/multi_agent_cartpole_ppo.py",
tags = [
"exclusive",
"gpu",
@@ -1309,13 +1215,13 @@ py_test(
py_test(
name = "learning_tests_multi_agent_cartpole_ppo_multi_cpu",
size = "large",
- srcs = ["tuned_examples/ppo/multi_agent_cartpole_ppo.py"],
+ srcs = ["examples/algorithms/ppo/multi_agent_cartpole_ppo.py"],
args = [
"--as-test",
"--num-agents=2",
"--num-learners=2",
],
- main = "tuned_examples/ppo/multi_agent_cartpole_ppo.py",
+ main = "examples/algorithms/ppo/multi_agent_cartpole_ppo.py",
tags = [
"exclusive",
"learning_tests",
@@ -1329,14 +1235,14 @@ py_test(
py_test(
name = "learning_tests_multi_agent_cartpole_ppo_multi_gpu",
size = "large",
- srcs = ["tuned_examples/ppo/multi_agent_cartpole_ppo.py"],
+ srcs = ["examples/algorithms/ppo/multi_agent_cartpole_ppo.py"],
args = [
"--as-test",
"--num-agents=2",
"--num-learners=2",
"--num-gpus-per-learner=1",
],
- main = "tuned_examples/ppo/multi_agent_cartpole_ppo.py",
+ main = "examples/algorithms/ppo/multi_agent_cartpole_ppo.py",
tags = [
"exclusive",
"learning_tests",
@@ -1352,11 +1258,11 @@ py_test(
py_test(
name = "learning_tests_cartpole_truncated_ppo",
size = "large",
- srcs = ["tuned_examples/ppo/cartpole_truncated_ppo.py"],
+ srcs = ["examples/algorithms/ppo/cartpole_truncated_ppo.py"],
args = [
"--as-test",
],
- main = "tuned_examples/ppo/cartpole_truncated_ppo.py",
+ main = "examples/algorithms/ppo/cartpole_truncated_ppo.py",
tags = [
"exclusive",
"learning_tests",
@@ -1370,11 +1276,11 @@ py_test(
py_test(
name = "learning_tests_stateless_cartpole_ppo",
size = "large",
- srcs = ["tuned_examples/ppo/stateless_cartpole_ppo.py"],
+ srcs = ["examples/algorithms/ppo/stateless_cartpole_ppo.py"],
args = [
"--as-test",
],
- main = "tuned_examples/ppo/stateless_cartpole_ppo.py",
+ main = "examples/algorithms/ppo/stateless_cartpole_ppo.py",
tags = [
"exclusive",
"learning_tests",
@@ -1387,13 +1293,13 @@ py_test(
py_test(
name = "learning_tests_stateless_cartpole_ppo_gpu",
size = "large",
- srcs = ["tuned_examples/ppo/stateless_cartpole_ppo.py"],
+ srcs = ["examples/algorithms/ppo/stateless_cartpole_ppo.py"],
args = [
"--as-test",
"--num-learners=1",
"--num-gpus-per-learner=1",
],
- main = "tuned_examples/ppo/stateless_cartpole_ppo.py",
+ main = "examples/algorithms/ppo/stateless_cartpole_ppo.py",
tags = [
"exclusive",
"gpu",
@@ -1408,12 +1314,12 @@ py_test(
py_test(
name = "learning_tests_stateless_cartpole_ppo_multi_cpu",
size = "large",
- srcs = ["tuned_examples/ppo/stateless_cartpole_ppo.py"],
+ srcs = ["examples/algorithms/ppo/stateless_cartpole_ppo.py"],
args = [
"--as-test",
"--num-learners=2",
],
- main = "tuned_examples/ppo/stateless_cartpole_ppo.py",
+ main = "examples/algorithms/ppo/stateless_cartpole_ppo.py",
tags = [
"exclusive",
"learning_tests",
@@ -1427,13 +1333,13 @@ py_test(
py_test(
name = "learning_tests_stateless_cartpole_ppo_multi_gpu",
size = "large",
- srcs = ["tuned_examples/ppo/stateless_cartpole_ppo.py"],
+ srcs = ["examples/algorithms/ppo/stateless_cartpole_ppo.py"],
args = [
"--as-test",
"--num-learners=2",
"--num-gpus-per-learner=1",
],
- main = "tuned_examples/ppo/stateless_cartpole_ppo.py",
+ main = "examples/algorithms/ppo/stateless_cartpole_ppo.py",
tags = [
"exclusive",
"learning_tests",
@@ -1449,12 +1355,12 @@ py_test(
py_test(
name = "learning_tests_multi_agent_stateless_cartpole_ppo",
size = "large",
- srcs = ["tuned_examples/ppo/multi_agent_stateless_cartpole_ppo.py"],
+ srcs = ["examples/algorithms/ppo/multi_agent_stateless_cartpole_ppo.py"],
args = [
"--as-test",
"--num-agents=2",
],
- main = "tuned_examples/ppo/multi_agent_stateless_cartpole_ppo.py",
+ main = "examples/algorithms/ppo/multi_agent_stateless_cartpole_ppo.py",
tags = [
"exclusive",
"learning_tests",
@@ -1467,14 +1373,14 @@ py_test(
py_test(
name = "learning_tests_multi_agent_stateless_cartpole_ppo_gpu",
size = "large",
- srcs = ["tuned_examples/ppo/multi_agent_stateless_cartpole_ppo.py"],
+ srcs = ["examples/algorithms/ppo/multi_agent_stateless_cartpole_ppo.py"],
args = [
"--as-test",
"--num-agents=2",
"--num-learners=1",
"--num-gpus-per-learner=1",
],
- main = "tuned_examples/ppo/multi_agent_stateless_cartpole_ppo.py",
+ main = "examples/algorithms/ppo/multi_agent_stateless_cartpole_ppo.py",
tags = [
"exclusive",
"gpu",
@@ -1489,13 +1395,13 @@ py_test(
py_test(
name = "learning_tests_multi_agent_stateless_cartpole_ppo_multi_cpu",
size = "large",
- srcs = ["tuned_examples/ppo/multi_agent_stateless_cartpole_ppo.py"],
+ srcs = ["examples/algorithms/ppo/multi_agent_stateless_cartpole_ppo.py"],
args = [
"--as-test",
"--num-agents=2",
"--num-learners=2",
],
- main = "tuned_examples/ppo/multi_agent_stateless_cartpole_ppo.py",
+ main = "examples/algorithms/ppo/multi_agent_stateless_cartpole_ppo.py",
tags = [
"exclusive",
"learning_tests",
@@ -1509,14 +1415,14 @@ py_test(
py_test(
name = "learning_tests_multi_agent_stateless_cartpole_ppo_multi_gpu",
size = "large",
- srcs = ["tuned_examples/ppo/multi_agent_stateless_cartpole_ppo.py"],
+ srcs = ["examples/algorithms/ppo/multi_agent_stateless_cartpole_ppo.py"],
args = [
"--as-test",
"--num-agents=2",
"--num-learners=2",
"--num-gpus-per-learner=1",
],
- main = "tuned_examples/ppo/multi_agent_stateless_cartpole_ppo.py",
+ main = "examples/algorithms/ppo/multi_agent_stateless_cartpole_ppo.py",
tags = [
"exclusive",
"learning_tests",
@@ -1532,13 +1438,13 @@ py_test(
py_test(
name = "learning_tests_multi_agent_footsies_ppo",
size = "large",
- srcs = ["tuned_examples/ppo/multi_agent_footsies_ppo.py"],
+ srcs = ["examples/algorithms/ppo/multi_agent_footsies_ppo.py"],
args = [
"--as-test",
"--num-env-runners=6",
"--evaluation-num-env-runners=2",
],
- main = "tuned_examples/ppo/multi_agent_footsies_ppo.py",
+ main = "examples/algorithms/ppo/multi_agent_footsies_ppo.py",
tags = [
"exclusive",
"learning_tests",
@@ -1550,7 +1456,7 @@ py_test(
py_test(
name = "learning_tests_multi_agent_footsies_ppo_gpu",
size = "large",
- srcs = ["tuned_examples/ppo/multi_agent_footsies_ppo.py"],
+ srcs = ["examples/algorithms/ppo/multi_agent_footsies_ppo.py"],
args = [
"--as-test",
"--num-env-runners=20",
@@ -1558,7 +1464,7 @@ py_test(
"--num-learners=1",
"--num-gpus-per-learner=1",
],
- main = "tuned_examples/ppo/multi_agent_footsies_ppo.py",
+ main = "examples/algorithms/ppo/multi_agent_footsies_ppo.py",
tags = [
"exclusive",
"learning_tests",
@@ -1571,14 +1477,14 @@ py_test(
py_test(
name = "learning_tests_multi_agent_footsies_ppo_multi_cpu",
size = "large",
- srcs = ["tuned_examples/ppo/multi_agent_footsies_ppo.py"],
+ srcs = ["examples/algorithms/ppo/multi_agent_footsies_ppo.py"],
args = [
"--as-test",
"--num-env-runners=6",
"--evaluation-num-env-runners=2",
"--num-learners=2",
],
- main = "tuned_examples/ppo/multi_agent_footsies_ppo.py",
+ main = "examples/algorithms/ppo/multi_agent_footsies_ppo.py",
tags = [
"exclusive",
"learning_tests",
@@ -1590,7 +1496,7 @@ py_test(
py_test(
name = "learning_tests_multi_agent_footsies_ppo_multi_gpu",
size = "large",
- srcs = ["tuned_examples/ppo/multi_agent_footsies_ppo.py"],
+ srcs = ["examples/algorithms/ppo/multi_agent_footsies_ppo.py"],
args = [
"--as-test",
"--num-env-runners=20",
@@ -1598,7 +1504,7 @@ py_test(
"--num-learners=2",
"--num-gpus-per-learner=1",
],
- main = "tuned_examples/ppo/multi_agent_footsies_ppo.py",
+ main = "examples/algorithms/ppo/multi_agent_footsies_ppo.py",
tags = [
"exclusive",
"learning_tests",
@@ -1612,11 +1518,11 @@ py_test(
py_test(
name = "learning_tests_pendulum_ppo",
size = "large",
- srcs = ["tuned_examples/ppo/pendulum_ppo.py"],
+ srcs = ["examples/algorithms/ppo/pendulum_ppo.py"],
args = [
"--as-test",
],
- main = "tuned_examples/ppo/pendulum_ppo.py",
+ main = "examples/algorithms/ppo/pendulum_ppo.py",
tags = [
"exclusive",
"learning_tests",
@@ -1629,13 +1535,13 @@ py_test(
py_test(
name = "learning_tests_pendulum_ppo_gpu",
size = "large",
- srcs = ["tuned_examples/ppo/pendulum_ppo.py"],
+ srcs = ["examples/algorithms/ppo/pendulum_ppo.py"],
args = [
"--as-test",
"--num-learners=1",
"--num-gpus-per-learner=1",
],
- main = "tuned_examples/ppo/pendulum_ppo.py",
+ main = "examples/algorithms/ppo/pendulum_ppo.py",
tags = [
"exclusive",
"gpu",
@@ -1650,12 +1556,12 @@ py_test(
py_test(
name = "learning_tests_pendulum_ppo_multi_cpu",
size = "large",
- srcs = ["tuned_examples/ppo/pendulum_ppo.py"],
+ srcs = ["examples/algorithms/ppo/pendulum_ppo.py"],
args = [
"--as-test",
"--num-learners=2",
],
- main = "tuned_examples/ppo/pendulum_ppo.py",
+ main = "examples/algorithms/ppo/pendulum_ppo.py",
tags = [
"exclusive",
"learning_tests",
@@ -1669,13 +1575,13 @@ py_test(
py_test(
name = "learning_tests_pendulum_ppo_multi_gpu",
size = "large",
- srcs = ["tuned_examples/ppo/pendulum_ppo.py"],
+ srcs = ["examples/algorithms/ppo/pendulum_ppo.py"],
args = [
"--as-test",
"--num-learners=2",
"--num-gpus-per-learner=1",
],
- main = "tuned_examples/ppo/pendulum_ppo.py",
+ main = "examples/algorithms/ppo/pendulum_ppo.py",
tags = [
"exclusive",
"learning_tests",
@@ -1691,12 +1597,12 @@ py_test(
py_test(
name = "learning_tests_multi_agent_pendulum_ppo",
size = "large",
- srcs = ["tuned_examples/ppo/multi_agent_pendulum_ppo.py"],
+ srcs = ["examples/algorithms/ppo/multi_agent_pendulum_ppo.py"],
args = [
"--as-test",
"--num-agents=2",
],
- main = "tuned_examples/ppo/multi_agent_pendulum_ppo.py",
+ main = "examples/algorithms/ppo/multi_agent_pendulum_ppo.py",
tags = [
"exclusive",
"learning_tests",
@@ -1709,14 +1615,14 @@ py_test(
py_test(
name = "learning_tests_multi_agent_pendulum_ppo_gpu",
size = "large",
- srcs = ["tuned_examples/ppo/multi_agent_pendulum_ppo.py"],
+ srcs = ["examples/algorithms/ppo/multi_agent_pendulum_ppo.py"],
args = [
"--as-test",
"--num-agents=2",
"--num-learners=1",
"--num-gpus-per-learner=1",
],
- main = "tuned_examples/ppo/multi_agent_pendulum_ppo.py",
+ main = "examples/algorithms/ppo/multi_agent_pendulum_ppo.py",
tags = [
"exclusive",
"gpu",
@@ -1731,13 +1637,13 @@ py_test(
py_test(
name = "learning_tests_multi_agent_pendulum_ppo_multi_cpu",
size = "large",
- srcs = ["tuned_examples/ppo/multi_agent_pendulum_ppo.py"],
+ srcs = ["examples/algorithms/ppo/multi_agent_pendulum_ppo.py"],
args = [
"--as-test",
"--num-agents=2",
"--num-learners=2",
],
- main = "tuned_examples/ppo/multi_agent_pendulum_ppo.py",
+ main = "examples/algorithms/ppo/multi_agent_pendulum_ppo.py",
tags = [
"exclusive",
"learning_tests",
@@ -1751,14 +1657,14 @@ py_test(
py_test(
name = "learning_tests_multi_agent_pendulum_ppo_multi_gpu",
size = "large",
- srcs = ["tuned_examples/ppo/multi_agent_pendulum_ppo.py"],
+ srcs = ["examples/algorithms/ppo/multi_agent_pendulum_ppo.py"],
args = [
"--as-test",
"--num-agents=2",
"--num-learners=2",
"--num-gpus-per-learner=1",
],
- main = "tuned_examples/ppo/multi_agent_pendulum_ppo.py",
+ main = "examples/algorithms/ppo/multi_agent_pendulum_ppo.py",
tags = [
"exclusive",
"learning_tests",
@@ -1775,11 +1681,11 @@ py_test(
py_test(
name = "learning_tests_mountaincar_sac",
size = "large",
- srcs = ["tuned_examples/sac/mountaincar_sac.py"],
+ srcs = ["examples/algorithms/sac/mountaincar_sac.py"],
args = [
"--as-test",
],
- main = "tuned_examples/sac/mountaincar_sac.py",
+ main = "examples/algorithms/sac/mountaincar_sac.py",
tags = [
"exclusive",
"learning_tests",
@@ -1792,13 +1698,13 @@ py_test(
py_test(
name = "learning_tests_mountaincar_sac_gpu",
size = "large",
- srcs = ["tuned_examples/sac/mountaincar_sac.py"],
+ srcs = ["examples/algorithms/sac/mountaincar_sac.py"],
args = [
"--as-test",
"--num-learners=1",
"--num-gpus-per-learner=1",
],
- main = "tuned_examples/sac/mountaincar_sac.py",
+ main = "examples/algorithms/sac/mountaincar_sac.py",
tags = [
"exclusive",
"gpu",
@@ -1812,12 +1718,12 @@ py_test(
py_test(
name = "learning_tests_mountaincar_sac_multi_cpu",
size = "large",
- srcs = ["tuned_examples/sac/mountaincar_sac.py"],
+ srcs = ["examples/algorithms/sac/mountaincar_sac.py"],
args = [
"--as-test",
"--num-learners=2",
],
- main = "tuned_examples/sac/mountaincar_sac.py",
+ main = "examples/algorithms/sac/mountaincar_sac.py",
tags = [
"exclusive",
"learning_tests",
@@ -1830,13 +1736,13 @@ py_test(
py_test(
name = "learning_tests_mountaincar_sac_multi_gpu",
size = "large",
- srcs = ["tuned_examples/sac/mountaincar_sac.py"],
+ srcs = ["examples/algorithms/sac/mountaincar_sac.py"],
args = [
"--as-test",
"--num-learners=2",
"--num-gpus-per-learner=1",
],
- main = "tuned_examples/sac/mountaincar_sac.py",
+ main = "examples/algorithms/sac/mountaincar_sac.py",
tags = [
"exclusive",
"learning_tests",
@@ -1851,11 +1757,11 @@ py_test(
py_test(
name = "learning_tests_pendulum_sac",
size = "large",
- srcs = ["tuned_examples/sac/pendulum_sac.py"],
+ srcs = ["examples/algorithms/sac/pendulum_sac.py"],
args = [
"--as-test",
],
- main = "tuned_examples/sac/pendulum_sac.py",
+ main = "examples/algorithms/sac/pendulum_sac.py",
tags = [
"exclusive",
"learning_tests",
@@ -1868,13 +1774,13 @@ py_test(
py_test(
name = "learning_tests_pendulum_sac_gpu",
size = "large",
- srcs = ["tuned_examples/sac/pendulum_sac.py"],
+ srcs = ["examples/algorithms/sac/pendulum_sac.py"],
args = [
"--as-test",
"--num-learners=1",
"--num-gpus-per-learner=1",
],
- main = "tuned_examples/sac/pendulum_sac.py",
+ main = "examples/algorithms/sac/pendulum_sac.py",
tags = [
"exclusive",
"gpu",
@@ -1888,12 +1794,12 @@ py_test(
py_test(
name = "learning_tests_pendulum_sac_multi_cpu",
size = "large",
- srcs = ["tuned_examples/sac/pendulum_sac.py"],
+ srcs = ["examples/algorithms/sac/pendulum_sac.py"],
args = [
"--as-test",
"--num-learners=2",
],
- main = "tuned_examples/sac/pendulum_sac.py",
+ main = "examples/algorithms/sac/pendulum_sac.py",
tags = [
"exclusive",
"learning_tests",
@@ -1906,13 +1812,13 @@ py_test(
py_test(
name = "learning_tests_pendulum_sac_multi_gpu",
size = "large",
- srcs = ["tuned_examples/sac/pendulum_sac.py"],
+ srcs = ["examples/algorithms/sac/pendulum_sac.py"],
args = [
"--as-test",
"--num-learners=2",
"--num-gpus-per-learner=1",
],
- main = "tuned_examples/sac/pendulum_sac.py",
+ main = "examples/algorithms/sac/pendulum_sac.py",
tags = [
"exclusive",
"learning_tests",
@@ -1927,13 +1833,13 @@ py_test(
py_test(
name = "learning_tests_multi_agent_pendulum_sac",
size = "large",
- srcs = ["tuned_examples/sac/multi_agent_pendulum_sac.py"],
+ srcs = ["examples/algorithms/sac/multi_agent_pendulum_sac.py"],
args = [
"--as-test",
"--num-agents=2",
"--num-cpus=4",
],
- main = "tuned_examples/sac/multi_agent_pendulum_sac.py",
+ main = "examples/algorithms/sac/multi_agent_pendulum_sac.py",
tags = [
"exclusive",
"learning_tests",
@@ -1946,7 +1852,7 @@ py_test(
py_test(
name = "learning_tests_multi_agent_pendulum_sac_gpu",
size = "large",
- srcs = ["tuned_examples/sac/multi_agent_pendulum_sac.py"],
+ srcs = ["examples/algorithms/sac/multi_agent_pendulum_sac.py"],
args = [
"--as-test",
"--num-agents=2",
@@ -1954,7 +1860,7 @@ py_test(
"--num-learners=1",
"--num-gpus-per-learner=1",
],
- main = "tuned_examples/sac/multi_agent_pendulum_sac.py",
+ main = "examples/algorithms/sac/multi_agent_pendulum_sac.py",
tags = [
"exclusive",
"gpu",
@@ -1968,12 +1874,12 @@ py_test(
py_test(
name = "learning_tests_multi_agent_pendulum_sac_multi_cpu",
size = "large",
- srcs = ["tuned_examples/sac/multi_agent_pendulum_sac.py"],
+ srcs = ["examples/algorithms/sac/multi_agent_pendulum_sac.py"],
args = [
"--num-agents=2",
"--num-learners=2",
],
- main = "tuned_examples/sac/multi_agent_pendulum_sac.py",
+ main = "examples/algorithms/sac/multi_agent_pendulum_sac.py",
tags = [
"exclusive",
"learning_tests",
@@ -1986,13 +1892,13 @@ py_test(
py_test(
name = "learning_tests_multi_agent_pendulum_sac_multi_gpu",
size = "large",
- srcs = ["tuned_examples/sac/multi_agent_pendulum_sac.py"],
+ srcs = ["examples/algorithms/sac/multi_agent_pendulum_sac.py"],
args = [
"--num-agents=2",
"--num-learners=2",
"--num-gpus-per-learner=1",
],
- main = "tuned_examples/sac/multi_agent_pendulum_sac.py",
+ main = "examples/algorithms/sac/multi_agent_pendulum_sac.py",
tags = [
"exclusive",
"learning_tests",
@@ -2428,21 +2334,6 @@ py_test(
],
)
-# --------------------------------------------------------------------
-# ConnectorV2 tests
-# rllib/connector/
-#
-# Tag: connector_v2
-# --------------------------------------------------------------------
-
-# TODO (sven): Add these tests in a separate PR.
-# py_test(
-# name = "connectors/tests/test_connector_v2",
-# tags = ["team:rllib", "connector_v2"],
-# size = "small",
-# srcs = ["connectors/tests/test_connector_v2.py"]
-# )
-
# --------------------------------------------------------------------
# Env tests
# rllib/env/
diff --git a/rllib/algorithms/dreamerv3/README.md b/rllib/algorithms/dreamerv3/README.md
index 8db9fcbae9f1..f36de087c00d 100644
--- a/rllib/algorithms/dreamerv3/README.md
+++ b/rllib/algorithms/dreamerv3/README.md
@@ -42,18 +42,18 @@ Here are some examples on how to set these config settings within your `DreamerV
[documentation page here](https://docs.ray.io/en/latest/rllib/index.html#rllib-in-60-seconds).
Use the config examples and templates in the
-[tuned_examples folder](../../tuned_examples/dreamerv3)
+[examples folder](../../examples/algorithms/dreamerv3)
in combination with the following scripts and command lines in order to run RLlib's DreamerV3 algorithm in your experiments:
-### [Atari100k](../../tuned_examples/dreamerv3/atari_100k_dreamerv3.py)
+### [Atari100k](../../examples/algorithms/dreamerv3/atari_100k_dreamerv3.py)
```shell
-$ cd ray/rllib/tuned_examples/dreamerv3/
+$ cd ray/rllib/examples/algorithms/dreamerv3/
$ python atari_100k_dreamerv3.py --env ale_py:ALE/Pong-v5
```
-### [DeepMind Control Suite (vision)](../../tuned_examples/dreamerv3/dm_control_suite_vision_dreamerv3.py)
+### [DeepMind Control Suite (vision)](../../examples/algorithms/dreamerv3/dm_control_suite_vision_dreamerv3.py)
```shell
-$ cd ray/rllib/tuned_examples/dreamerv3/
+$ cd ray/rllib/examples/algorithms/dreamerv3/
$ python dm_control_suite_vision_dreamerv3.py --env DMC/cartpole/swingup
```
Other `--env` options for the DM Control Suite would be `--env DMC/hopper/hop`, `--env DMC/walker/walk`, etc..
@@ -122,8 +122,8 @@ $ python flappy_bird.py
```
This should be it. Feel free to try out running this on multiple GPUs using these
-more advanced config examples [here (Atari100k)](../../tuned_examples/dreamerv3/atari_100k_dreamerv3.py) and
-[here (DM Control Suite)](../../tuned_examples/dreamerv3/dm_control_suite_vision_dreamerv3.py).
+more advanced config examples [here (Atari100k)](../../examples/algorithms/dreamerv3/atari_100k_dreamerv3.py) and
+[here (DM Control Suite)](../../examples/algorithms/dreamerv3/dm_control_suite_vision_dreamerv3.py).
Also see the notes below on good recipes for running on multiple GPUs.
IMPORTANT: DreamerV3 out-of-the-box only supports image observation spaces of
diff --git a/rllib/benchmarks/ppo/benchmark_atari_ppo.py b/rllib/benchmarks/ppo/benchmark_atari_ppo.py
index d62e18b01407..ad8f05691fb7 100644
--- a/rllib/benchmarks/ppo/benchmark_atari_ppo.py
+++ b/rllib/benchmarks/ppo/benchmark_atari_ppo.py
@@ -96,7 +96,7 @@
# Compile the base command running the actual `tuned_example` script.
base_commands = [
"python",
- "../../tuned_examples/ppo/atari_ppo.py",
+ "../../examples/algorithms/ppo/atari_ppo.py",
f"--num-env-runners={args.num_env_runners}" if args.num_env_runners else "",
f"--num-learners={args.num_learners}",
f"--num-gpus-per-learner={args.num_gpus_per_learner}",
diff --git a/rllib/tuned_examples/dqn/atari-dist-dqn.yaml b/rllib/examples/_old_api_stack/algorithms/atari-dist-dqn.yaml
similarity index 100%
rename from rllib/tuned_examples/dqn/atari-dist-dqn.yaml
rename to rllib/examples/_old_api_stack/algorithms/atari-dist-dqn.yaml
diff --git a/rllib/tuned_examples/dqn/atari-dqn.yaml b/rllib/examples/_old_api_stack/algorithms/atari-dqn.yaml
similarity index 100%
rename from rllib/tuned_examples/dqn/atari-dqn.yaml
rename to rllib/examples/_old_api_stack/algorithms/atari-dqn.yaml
diff --git a/rllib/tuned_examples/dqn/atari-duel-ddqn.yaml b/rllib/examples/_old_api_stack/algorithms/atari-duel-ddqn.yaml
similarity index 100%
rename from rllib/tuned_examples/dqn/atari-duel-ddqn.yaml
rename to rllib/examples/_old_api_stack/algorithms/atari-duel-ddqn.yaml
diff --git a/rllib/tuned_examples/impala/atari-impala-large.yaml b/rllib/examples/_old_api_stack/algorithms/atari-impala-large.yaml
similarity index 100%
rename from rllib/tuned_examples/impala/atari-impala-large.yaml
rename to rllib/examples/_old_api_stack/algorithms/atari-impala-large.yaml
diff --git a/rllib/tuned_examples/impala/atari-impala-multi-gpu.yaml b/rllib/examples/_old_api_stack/algorithms/atari-impala-multi-gpu.yaml
similarity index 100%
rename from rllib/tuned_examples/impala/atari-impala-multi-gpu.yaml
rename to rllib/examples/_old_api_stack/algorithms/atari-impala-multi-gpu.yaml
diff --git a/rllib/tuned_examples/impala/atari-impala.yaml b/rllib/examples/_old_api_stack/algorithms/atari-impala.yaml
similarity index 100%
rename from rllib/tuned_examples/impala/atari-impala.yaml
rename to rllib/examples/_old_api_stack/algorithms/atari-impala.yaml
diff --git a/rllib/tuned_examples/sac/atari-sac.yaml b/rllib/examples/_old_api_stack/algorithms/atari-sac.yaml
similarity index 100%
rename from rllib/tuned_examples/sac/atari-sac.yaml
rename to rllib/examples/_old_api_stack/algorithms/atari-sac.yaml
diff --git a/rllib/tuned_examples/appo/cartpole-appo-separate-losses.py b/rllib/examples/_old_api_stack/algorithms/cartpole-appo-separate-losses.py
similarity index 100%
rename from rllib/tuned_examples/appo/cartpole-appo-separate-losses.py
rename to rllib/examples/_old_api_stack/algorithms/cartpole-appo-separate-losses.py
diff --git a/rllib/tuned_examples/bc/cartpole-bc.yaml b/rllib/examples/_old_api_stack/algorithms/cartpole-bc.yaml
similarity index 100%
rename from rllib/tuned_examples/bc/cartpole-bc.yaml
rename to rllib/examples/_old_api_stack/algorithms/cartpole-bc.yaml
diff --git a/rllib/tuned_examples/appo/cartpole-crashing-and-stalling-recreate-workers-appo.py b/rllib/examples/_old_api_stack/algorithms/cartpole-crashing-and-stalling-recreate-workers-appo.py
similarity index 100%
rename from rllib/tuned_examples/appo/cartpole-crashing-and-stalling-recreate-workers-appo.py
rename to rllib/examples/_old_api_stack/algorithms/cartpole-crashing-and-stalling-recreate-workers-appo.py
diff --git a/rllib/tuned_examples/appo/cartpole-crashing-recreate-workers-appo.py b/rllib/examples/_old_api_stack/algorithms/cartpole-crashing-recreate-workers-appo.py
similarity index 100%
rename from rllib/tuned_examples/appo/cartpole-crashing-recreate-workers-appo.py
rename to rllib/examples/_old_api_stack/algorithms/cartpole-crashing-recreate-workers-appo.py
diff --git a/rllib/tuned_examples/dqn/cartpole-dqn-fake-gpus.yaml b/rllib/examples/_old_api_stack/algorithms/cartpole-dqn-fake-gpus.yaml
similarity index 100%
rename from rllib/tuned_examples/dqn/cartpole-dqn-fake-gpus.yaml
rename to rllib/examples/_old_api_stack/algorithms/cartpole-dqn-fake-gpus.yaml
diff --git a/rllib/tuned_examples/dqn/cartpole-dqn-param-noise.yaml b/rllib/examples/_old_api_stack/algorithms/cartpole-dqn-param-noise.yaml
similarity index 100%
rename from rllib/tuned_examples/dqn/cartpole-dqn-param-noise.yaml
rename to rllib/examples/_old_api_stack/algorithms/cartpole-dqn-param-noise.yaml
diff --git a/rllib/tuned_examples/dqn/cartpole-dqn-softq.yaml b/rllib/examples/_old_api_stack/algorithms/cartpole-dqn-softq.yaml
similarity index 100%
rename from rllib/tuned_examples/dqn/cartpole-dqn-softq.yaml
rename to rllib/examples/_old_api_stack/algorithms/cartpole-dqn-softq.yaml
diff --git a/rllib/tuned_examples/dqn/cartpole-dqn.yaml b/rllib/examples/_old_api_stack/algorithms/cartpole-dqn.yaml
similarity index 100%
rename from rllib/tuned_examples/dqn/cartpole-dqn.yaml
rename to rllib/examples/_old_api_stack/algorithms/cartpole-dqn.yaml
diff --git a/rllib/tuned_examples/marwil/cartpole-marwil.yaml b/rllib/examples/_old_api_stack/algorithms/cartpole-marwil.yaml
similarity index 100%
rename from rllib/tuned_examples/marwil/cartpole-marwil.yaml
rename to rllib/examples/_old_api_stack/algorithms/cartpole-marwil.yaml
diff --git a/rllib/tuned_examples/sac/cartpole-sac.yaml b/rllib/examples/_old_api_stack/algorithms/cartpole-sac.yaml
similarity index 100%
rename from rllib/tuned_examples/sac/cartpole-sac.yaml
rename to rllib/examples/_old_api_stack/algorithms/cartpole-sac.yaml
diff --git a/rllib/tuned_examples/appo/frozenlake-appo-vtrace.yaml b/rllib/examples/_old_api_stack/algorithms/frozenlake-appo-vtrace.yaml
similarity index 100%
rename from rllib/tuned_examples/appo/frozenlake-appo-vtrace.yaml
rename to rllib/examples/_old_api_stack/algorithms/frozenlake-appo-vtrace.yaml
diff --git a/rllib/tuned_examples/cql/halfcheetah-bc.yaml b/rllib/examples/_old_api_stack/algorithms/halfcheetah-bc.yaml
similarity index 100%
rename from rllib/tuned_examples/cql/halfcheetah-bc.yaml
rename to rllib/examples/_old_api_stack/algorithms/halfcheetah-bc.yaml
diff --git a/rllib/tuned_examples/cql/halfcheetah-cql.yaml b/rllib/examples/_old_api_stack/algorithms/halfcheetah-cql.yaml
similarity index 100%
rename from rllib/tuned_examples/cql/halfcheetah-cql.yaml
rename to rllib/examples/_old_api_stack/algorithms/halfcheetah-cql.yaml
diff --git a/rllib/tuned_examples/ppo/halfcheetah-ppo.yaml b/rllib/examples/_old_api_stack/algorithms/halfcheetah-ppo.yaml
similarity index 100%
rename from rllib/tuned_examples/ppo/halfcheetah-ppo.yaml
rename to rllib/examples/_old_api_stack/algorithms/halfcheetah-ppo.yaml
diff --git a/rllib/tuned_examples/cql/hopper-bc.yaml b/rllib/examples/_old_api_stack/algorithms/hopper-bc.yaml
similarity index 100%
rename from rllib/tuned_examples/cql/hopper-bc.yaml
rename to rllib/examples/_old_api_stack/algorithms/hopper-bc.yaml
diff --git a/rllib/tuned_examples/cql/hopper-cql.yaml b/rllib/examples/_old_api_stack/algorithms/hopper-cql.yaml
similarity index 100%
rename from rllib/tuned_examples/cql/hopper-cql.yaml
rename to rllib/examples/_old_api_stack/algorithms/hopper-cql.yaml
diff --git a/rllib/tuned_examples/ppo/hopper-ppo.yaml b/rllib/examples/_old_api_stack/algorithms/hopper-ppo.yaml
similarity index 100%
rename from rllib/tuned_examples/ppo/hopper-ppo.yaml
rename to rllib/examples/_old_api_stack/algorithms/hopper-ppo.yaml
diff --git a/rllib/tuned_examples/ppo/humanoid-ppo-gae.yaml b/rllib/examples/_old_api_stack/algorithms/humanoid-ppo-gae.yaml
similarity index 100%
rename from rllib/tuned_examples/ppo/humanoid-ppo-gae.yaml
rename to rllib/examples/_old_api_stack/algorithms/humanoid-ppo-gae.yaml
diff --git a/rllib/tuned_examples/ppo/humanoid-ppo.yaml b/rllib/examples/_old_api_stack/algorithms/humanoid-ppo.yaml
similarity index 100%
rename from rllib/tuned_examples/ppo/humanoid-ppo.yaml
rename to rllib/examples/_old_api_stack/algorithms/humanoid-ppo.yaml
diff --git a/rllib/tuned_examples/appo/memory-leak-test-appo.yaml b/rllib/examples/_old_api_stack/algorithms/memory-leak-test-appo.yaml
similarity index 100%
rename from rllib/tuned_examples/appo/memory-leak-test-appo.yaml
rename to rllib/examples/_old_api_stack/algorithms/memory-leak-test-appo.yaml
diff --git a/rllib/tuned_examples/dqn/memory-leak-test-dqn.yaml b/rllib/examples/_old_api_stack/algorithms/memory-leak-test-dqn.yaml
similarity index 100%
rename from rllib/tuned_examples/dqn/memory-leak-test-dqn.yaml
rename to rllib/examples/_old_api_stack/algorithms/memory-leak-test-dqn.yaml
diff --git a/rllib/tuned_examples/ppo/memory-leak-test-ppo.yaml b/rllib/examples/_old_api_stack/algorithms/memory-leak-test-ppo.yaml
similarity index 100%
rename from rllib/tuned_examples/ppo/memory-leak-test-ppo.yaml
rename to rllib/examples/_old_api_stack/algorithms/memory-leak-test-ppo.yaml
diff --git a/rllib/tuned_examples/sac/memory-leak-test-sac.yaml b/rllib/examples/_old_api_stack/algorithms/memory-leak-test-sac.yaml
similarity index 100%
rename from rllib/tuned_examples/sac/memory-leak-test-sac.yaml
rename to rllib/examples/_old_api_stack/algorithms/memory-leak-test-sac.yaml
diff --git a/rllib/tuned_examples/sac/mspacman-sac.yaml b/rllib/examples/_old_api_stack/algorithms/mspacman-sac.yaml
similarity index 100%
rename from rllib/tuned_examples/sac/mspacman-sac.yaml
rename to rllib/examples/_old_api_stack/algorithms/mspacman-sac.yaml
diff --git a/rllib/tuned_examples/appo/multi-agent-cartpole-crashing-and-stalling-recreate-workers-appo.py b/rllib/examples/_old_api_stack/algorithms/multi-agent-cartpole-crashing-and-stalling-recreate-workers-appo.py
similarity index 100%
rename from rllib/tuned_examples/appo/multi-agent-cartpole-crashing-and-stalling-recreate-workers-appo.py
rename to rllib/examples/_old_api_stack/algorithms/multi-agent-cartpole-crashing-and-stalling-recreate-workers-appo.py
diff --git a/rllib/tuned_examples/appo/multi-agent-cartpole-crashing-recreate-workers-appo.py b/rllib/examples/_old_api_stack/algorithms/multi-agent-cartpole-crashing-recreate-workers-appo.py
similarity index 100%
rename from rllib/tuned_examples/appo/multi-agent-cartpole-crashing-recreate-workers-appo.py
rename to rllib/examples/_old_api_stack/algorithms/multi-agent-cartpole-crashing-recreate-workers-appo.py
diff --git a/rllib/tuned_examples/appo/multi-agent-cartpole-w-100-policies-appo.py b/rllib/examples/_old_api_stack/algorithms/multi-agent-cartpole-w-100-policies-appo.py
similarity index 100%
rename from rllib/tuned_examples/appo/multi-agent-cartpole-w-100-policies-appo.py
rename to rllib/examples/_old_api_stack/algorithms/multi-agent-cartpole-w-100-policies-appo.py
diff --git a/rllib/tuned_examples/appo/multi_agent_cartpole_appo_old_api_stack.py b/rllib/examples/_old_api_stack/algorithms/multi_agent_cartpole_appo_old_api_stack.py
similarity index 100%
rename from rllib/tuned_examples/appo/multi_agent_cartpole_appo_old_api_stack.py
rename to rllib/examples/_old_api_stack/algorithms/multi_agent_cartpole_appo_old_api_stack.py
diff --git a/rllib/tuned_examples/cql/pendulum-cql.yaml b/rllib/examples/_old_api_stack/algorithms/pendulum-cql.yaml
similarity index 94%
rename from rllib/tuned_examples/cql/pendulum-cql.yaml
rename to rllib/examples/_old_api_stack/algorithms/pendulum-cql.yaml
index 3baf6c8fdc6e..d993a151763a 100644
--- a/rllib/tuned_examples/cql/pendulum-cql.yaml
+++ b/rllib/examples/_old_api_stack/algorithms/pendulum-cql.yaml
@@ -1,6 +1,6 @@
# @OldAPIStack
# Given a SAC-generated offline file generated via:
-# rllib train -f tuned_examples/sac/pendulum-sac.yaml --no-ray-ui
+# rllib train -f examples/algorithms/sac/pendulum-sac.yaml --no-ray-ui
# Pendulum CQL can attain ~ -300 reward in 10k from that file.
pendulum-cql:
diff --git a/rllib/tuned_examples/sac/pendulum-sac.yaml b/rllib/examples/_old_api_stack/algorithms/pendulum-sac.yaml
similarity index 100%
rename from rllib/tuned_examples/sac/pendulum-sac.yaml
rename to rllib/examples/_old_api_stack/algorithms/pendulum-sac.yaml
diff --git a/rllib/tuned_examples/ppo/pendulum-transformed-actions-ppo.yaml b/rllib/examples/_old_api_stack/algorithms/pendulum-transformed-actions-ppo.yaml
similarity index 100%
rename from rllib/tuned_examples/ppo/pendulum-transformed-actions-ppo.yaml
rename to rllib/examples/_old_api_stack/algorithms/pendulum-transformed-actions-ppo.yaml
diff --git a/rllib/tuned_examples/sac/pendulum-transformed-actions-sac.yaml b/rllib/examples/_old_api_stack/algorithms/pendulum-transformed-actions-sac.yaml
similarity index 100%
rename from rllib/tuned_examples/sac/pendulum-transformed-actions-sac.yaml
rename to rllib/examples/_old_api_stack/algorithms/pendulum-transformed-actions-sac.yaml
diff --git a/rllib/tuned_examples/dqn/pong-dqn.yaml b/rllib/examples/_old_api_stack/algorithms/pong-dqn.yaml
similarity index 100%
rename from rllib/tuned_examples/dqn/pong-dqn.yaml
rename to rllib/examples/_old_api_stack/algorithms/pong-dqn.yaml
diff --git a/rllib/tuned_examples/impala/pong-impala-fast.yaml b/rllib/examples/_old_api_stack/algorithms/pong-impala-fast.yaml
similarity index 100%
rename from rllib/tuned_examples/impala/pong-impala-fast.yaml
rename to rllib/examples/_old_api_stack/algorithms/pong-impala-fast.yaml
diff --git a/rllib/tuned_examples/impala/pong-impala-vectorized.yaml b/rllib/examples/_old_api_stack/algorithms/pong-impala-vectorized.yaml
similarity index 100%
rename from rllib/tuned_examples/impala/pong-impala-vectorized.yaml
rename to rllib/examples/_old_api_stack/algorithms/pong-impala-vectorized.yaml
diff --git a/rllib/tuned_examples/impala/pong-impala.yaml b/rllib/examples/_old_api_stack/algorithms/pong-impala.yaml
similarity index 100%
rename from rllib/tuned_examples/impala/pong-impala.yaml
rename to rllib/examples/_old_api_stack/algorithms/pong-impala.yaml
diff --git a/rllib/tuned_examples/dqn/pong-rainbow.yaml b/rllib/examples/_old_api_stack/algorithms/pong-rainbow.yaml
similarity index 100%
rename from rllib/tuned_examples/dqn/pong-rainbow.yaml
rename to rllib/examples/_old_api_stack/algorithms/pong-rainbow.yaml
diff --git a/rllib/tuned_examples/ppo/unity3d-soccer-strikers-vs-goalie-ppo.yaml b/rllib/examples/_old_api_stack/algorithms/unity3d-soccer-strikers-vs-goalie-ppo.yaml
similarity index 100%
rename from rllib/tuned_examples/ppo/unity3d-soccer-strikers-vs-goalie-ppo.yaml
rename to rllib/examples/_old_api_stack/algorithms/unity3d-soccer-strikers-vs-goalie-ppo.yaml
diff --git a/rllib/tuned_examples/ppo/walker2d-ppo.yaml b/rllib/examples/_old_api_stack/algorithms/walker2d-ppo.yaml
similarity index 100%
rename from rllib/tuned_examples/ppo/walker2d-ppo.yaml
rename to rllib/examples/_old_api_stack/algorithms/walker2d-ppo.yaml
diff --git a/rllib/tuned_examples/appo/cartpole_appo.py b/rllib/examples/algorithms/appo/cartpole_appo.py
similarity index 100%
rename from rllib/tuned_examples/appo/cartpole_appo.py
rename to rllib/examples/algorithms/appo/cartpole_appo.py
diff --git a/rllib/tuned_examples/appo/halfcheetah_appo.py b/rllib/examples/algorithms/appo/halfcheetah_appo.py
similarity index 100%
rename from rllib/tuned_examples/appo/halfcheetah_appo.py
rename to rllib/examples/algorithms/appo/halfcheetah_appo.py
diff --git a/rllib/tuned_examples/appo/multi_agent_cartpole_appo.py b/rllib/examples/algorithms/appo/multi_agent_cartpole_appo.py
similarity index 100%
rename from rllib/tuned_examples/appo/multi_agent_cartpole_appo.py
rename to rllib/examples/algorithms/appo/multi_agent_cartpole_appo.py
diff --git a/rllib/tuned_examples/appo/multi_agent_pong_appo.py b/rllib/examples/algorithms/appo/multi_agent_pong_appo.py
similarity index 100%
rename from rllib/tuned_examples/appo/multi_agent_pong_appo.py
rename to rllib/examples/algorithms/appo/multi_agent_pong_appo.py
diff --git a/rllib/tuned_examples/appo/multi_agent_stateless_cartpole_appo.py b/rllib/examples/algorithms/appo/multi_agent_stateless_cartpole_appo.py
similarity index 100%
rename from rllib/tuned_examples/appo/multi_agent_stateless_cartpole_appo.py
rename to rllib/examples/algorithms/appo/multi_agent_stateless_cartpole_appo.py
diff --git a/rllib/tuned_examples/appo/pendulum_appo.py b/rllib/examples/algorithms/appo/pendulum_appo.py
similarity index 100%
rename from rllib/tuned_examples/appo/pendulum_appo.py
rename to rllib/examples/algorithms/appo/pendulum_appo.py
diff --git a/rllib/tuned_examples/appo/pong_appo.py b/rllib/examples/algorithms/appo/pong_appo.py
similarity index 100%
rename from rllib/tuned_examples/appo/pong_appo.py
rename to rllib/examples/algorithms/appo/pong_appo.py
diff --git a/rllib/tuned_examples/appo/stateless_cartpole_appo.py b/rllib/examples/algorithms/appo/stateless_cartpole_appo.py
similarity index 100%
rename from rllib/tuned_examples/appo/stateless_cartpole_appo.py
rename to rllib/examples/algorithms/appo/stateless_cartpole_appo.py
diff --git a/rllib/tuned_examples/bc/benchmark_rlunplugged_atari_pong_bc.py b/rllib/examples/algorithms/bc/benchmark_rlunplugged_atari_pong_bc.py
similarity index 100%
rename from rllib/tuned_examples/bc/benchmark_rlunplugged_atari_pong_bc.py
rename to rllib/examples/algorithms/bc/benchmark_rlunplugged_atari_pong_bc.py
diff --git a/rllib/tuned_examples/bc/cartpole_bc.py b/rllib/examples/algorithms/bc/cartpole_bc.py
similarity index 97%
rename from rllib/tuned_examples/bc/cartpole_bc.py
rename to rllib/examples/algorithms/bc/cartpole_bc.py
index 8f2b3196b970..57618f0739e7 100644
--- a/rllib/tuned_examples/bc/cartpole_bc.py
+++ b/rllib/examples/algorithms/bc/cartpole_bc.py
@@ -25,8 +25,7 @@
# Define the data paths.
data_path = "offline/tests/data/cartpole/cartpole-v1_large"
-base_path = Path(__file__).parents[2]
-print(f"base_path={base_path}")
+base_path = Path(__file__).parents[3]
data_path = "local://" / base_path / data_path
print(f"data_path={data_path}")
diff --git a/rllib/tuned_examples/bc/cartpole_bc_with_offline_evaluation.py b/rllib/examples/algorithms/bc/cartpole_bc_with_offline_evaluation.py
similarity index 99%
rename from rllib/tuned_examples/bc/cartpole_bc_with_offline_evaluation.py
rename to rllib/examples/algorithms/bc/cartpole_bc_with_offline_evaluation.py
index 5bb6fd604807..30a2d4bb5a1e 100644
--- a/rllib/tuned_examples/bc/cartpole_bc_with_offline_evaluation.py
+++ b/rllib/examples/algorithms/bc/cartpole_bc_with_offline_evaluation.py
@@ -51,7 +51,7 @@
# Define the data paths.
data_path = "offline/tests/data/cartpole/cartpole-v1_large"
-base_path = Path(__file__).parents[2]
+base_path = Path(__file__).parents[3]
print(f"base_path={base_path}")
data_path = "local://" / base_path / data_path
print(f"data_path={data_path}")
diff --git a/rllib/tuned_examples/bc/pendulum_bc.py b/rllib/examples/algorithms/bc/pendulum_bc.py
similarity index 98%
rename from rllib/tuned_examples/bc/pendulum_bc.py
rename to rllib/examples/algorithms/bc/pendulum_bc.py
index 55349ab107f1..b4417949d906 100644
--- a/rllib/tuned_examples/bc/pendulum_bc.py
+++ b/rllib/examples/algorithms/bc/pendulum_bc.py
@@ -24,7 +24,7 @@
# Define the data paths.
data_path = "offline/tests/data/pendulum/pendulum-v1_large"
-base_path = Path(__file__).parents[2]
+base_path = Path(__file__).parents[3]
print(f"base_path={base_path}")
data_path = "local://" / base_path / data_path
print(f"data_path={data_path}")
diff --git a/rllib/tuned_examples/cql/pendulum_cql.py b/rllib/examples/algorithms/cql/pendulum_cql.py
similarity index 98%
rename from rllib/tuned_examples/cql/pendulum_cql.py
rename to rllib/examples/algorithms/cql/pendulum_cql.py
index 1b66bf67ab2b..984c3626fae9 100644
--- a/rllib/tuned_examples/cql/pendulum_cql.py
+++ b/rllib/examples/algorithms/cql/pendulum_cql.py
@@ -23,7 +23,7 @@
), "This tuned example works only with `Pendulum-v1`."
# Define the base path relative to this file.
-base_path = Path(__file__).parents[2]
+base_path = Path(__file__).parents[3]
# Use the larger data set of Pendulum we have. Note, these are
# parquet data, the default in `AlgorithmConfig.offline_data`.
data_path = base_path / "offline/tests/data/pendulum/pendulum-v1_enormous"
diff --git a/rllib/tuned_examples/dqn/benchmark_dqn_atari.py b/rllib/examples/algorithms/dqn/benchmark_dqn_atari.py
similarity index 100%
rename from rllib/tuned_examples/dqn/benchmark_dqn_atari.py
rename to rllib/examples/algorithms/dqn/benchmark_dqn_atari.py
diff --git a/rllib/tuned_examples/dqn/benchmark_dqn_atari_rllib_preprocessing.py b/rllib/examples/algorithms/dqn/benchmark_dqn_atari_rllib_preprocessing.py
similarity index 100%
rename from rllib/tuned_examples/dqn/benchmark_dqn_atari_rllib_preprocessing.py
rename to rllib/examples/algorithms/dqn/benchmark_dqn_atari_rllib_preprocessing.py
diff --git a/rllib/tuned_examples/dqn/cartpole_dqn.py b/rllib/examples/algorithms/dqn/cartpole_dqn.py
similarity index 100%
rename from rllib/tuned_examples/dqn/cartpole_dqn.py
rename to rllib/examples/algorithms/dqn/cartpole_dqn.py
diff --git a/rllib/tuned_examples/dqn/multi_agent_cartpole_dqn.py b/rllib/examples/algorithms/dqn/multi_agent_cartpole_dqn.py
similarity index 100%
rename from rllib/tuned_examples/dqn/multi_agent_cartpole_dqn.py
rename to rllib/examples/algorithms/dqn/multi_agent_cartpole_dqn.py
diff --git a/rllib/tuned_examples/dqn/stateless_cartpole_dqn.py b/rllib/examples/algorithms/dqn/stateless_cartpole_dqn.py
similarity index 100%
rename from rllib/tuned_examples/dqn/stateless_cartpole_dqn.py
rename to rllib/examples/algorithms/dqn/stateless_cartpole_dqn.py
diff --git a/rllib/tuned_examples/dreamerv3/atari_100k_dreamerv3.py b/rllib/examples/algorithms/dreamerv3/atari_100k_dreamerv3.py
similarity index 100%
rename from rllib/tuned_examples/dreamerv3/atari_100k_dreamerv3.py
rename to rllib/examples/algorithms/dreamerv3/atari_100k_dreamerv3.py
diff --git a/rllib/tuned_examples/dreamerv3/atari_200M_dreamerv3.py b/rllib/examples/algorithms/dreamerv3/atari_200M_dreamerv3.py
similarity index 100%
rename from rllib/tuned_examples/dreamerv3/atari_200M_dreamerv3.py
rename to rllib/examples/algorithms/dreamerv3/atari_200M_dreamerv3.py
diff --git a/rllib/tuned_examples/dreamerv3/cartpole_dreamerv3.py b/rllib/examples/algorithms/dreamerv3/cartpole_dreamerv3.py
similarity index 100%
rename from rllib/tuned_examples/dreamerv3/cartpole_dreamerv3.py
rename to rllib/examples/algorithms/dreamerv3/cartpole_dreamerv3.py
diff --git a/rllib/tuned_examples/dreamerv3/dm_control_suite_vision_dreamerv3.py b/rllib/examples/algorithms/dreamerv3/dm_control_suite_vision_dreamerv3.py
similarity index 100%
rename from rllib/tuned_examples/dreamerv3/dm_control_suite_vision_dreamerv3.py
rename to rllib/examples/algorithms/dreamerv3/dm_control_suite_vision_dreamerv3.py
diff --git a/rllib/tuned_examples/dreamerv3/flappy_bird_dreamerv3.py b/rllib/examples/algorithms/dreamerv3/flappy_bird_dreamerv3.py
similarity index 100%
rename from rllib/tuned_examples/dreamerv3/flappy_bird_dreamerv3.py
rename to rllib/examples/algorithms/dreamerv3/flappy_bird_dreamerv3.py
diff --git a/rllib/tuned_examples/dreamerv3/frozenlake_2x2_dreamerv3.py b/rllib/examples/algorithms/dreamerv3/frozenlake_2x2_dreamerv3.py
similarity index 100%
rename from rllib/tuned_examples/dreamerv3/frozenlake_2x2_dreamerv3.py
rename to rllib/examples/algorithms/dreamerv3/frozenlake_2x2_dreamerv3.py
diff --git a/rllib/tuned_examples/dreamerv3/frozenlake_4x4_deterministic_dreamerv3.py b/rllib/examples/algorithms/dreamerv3/frozenlake_4x4_deterministic_dreamerv3.py
similarity index 100%
rename from rllib/tuned_examples/dreamerv3/frozenlake_4x4_deterministic_dreamerv3.py
rename to rllib/examples/algorithms/dreamerv3/frozenlake_4x4_deterministic_dreamerv3.py
diff --git a/rllib/tuned_examples/dreamerv3/gymnasium_robotics_dreamerv3.py b/rllib/examples/algorithms/dreamerv3/gymnasium_robotics_dreamerv3.py
similarity index 100%
rename from rllib/tuned_examples/dreamerv3/gymnasium_robotics_dreamerv3.py
rename to rllib/examples/algorithms/dreamerv3/gymnasium_robotics_dreamerv3.py
diff --git a/rllib/tuned_examples/dreamerv3/highway_env_dreamerv3.py b/rllib/examples/algorithms/dreamerv3/highway_env_dreamerv3.py
similarity index 100%
rename from rllib/tuned_examples/dreamerv3/highway_env_dreamerv3.py
rename to rllib/examples/algorithms/dreamerv3/highway_env_dreamerv3.py
diff --git a/rllib/tuned_examples/dreamerv3/pendulum_dreamerv3.py b/rllib/examples/algorithms/dreamerv3/pendulum_dreamerv3.py
similarity index 100%
rename from rllib/tuned_examples/dreamerv3/pendulum_dreamerv3.py
rename to rllib/examples/algorithms/dreamerv3/pendulum_dreamerv3.py
diff --git a/rllib/tuned_examples/impala/cartpole-impala-separate-losses.py b/rllib/examples/algorithms/impala/cartpole-impala-separate-losses.py
similarity index 100%
rename from rllib/tuned_examples/impala/cartpole-impala-separate-losses.py
rename to rllib/examples/algorithms/impala/cartpole-impala-separate-losses.py
diff --git a/rllib/tuned_examples/impala/cartpole_impala.py b/rllib/examples/algorithms/impala/cartpole_impala.py
similarity index 100%
rename from rllib/tuned_examples/impala/cartpole_impala.py
rename to rllib/examples/algorithms/impala/cartpole_impala.py
diff --git a/rllib/tuned_examples/impala/heavy_cartpole_impala.py b/rllib/examples/algorithms/impala/heavy_cartpole_impala.py
similarity index 100%
rename from rllib/tuned_examples/impala/heavy_cartpole_impala.py
rename to rllib/examples/algorithms/impala/heavy_cartpole_impala.py
diff --git a/rllib/tuned_examples/impala/multi_agent_cartpole_impala.py b/rllib/examples/algorithms/impala/multi_agent_cartpole_impala.py
similarity index 100%
rename from rllib/tuned_examples/impala/multi_agent_cartpole_impala.py
rename to rllib/examples/algorithms/impala/multi_agent_cartpole_impala.py
diff --git a/rllib/tuned_examples/impala/multi_agent_cartpole_impala_old_api_stack.py b/rllib/examples/algorithms/impala/multi_agent_cartpole_impala_old_api_stack.py
similarity index 100%
rename from rllib/tuned_examples/impala/multi_agent_cartpole_impala_old_api_stack.py
rename to rllib/examples/algorithms/impala/multi_agent_cartpole_impala_old_api_stack.py
diff --git a/rllib/tuned_examples/impala/multi_agent_stateless_cartpole_impala.py b/rllib/examples/algorithms/impala/multi_agent_stateless_cartpole_impala.py
similarity index 100%
rename from rllib/tuned_examples/impala/multi_agent_stateless_cartpole_impala.py
rename to rllib/examples/algorithms/impala/multi_agent_stateless_cartpole_impala.py
diff --git a/rllib/tuned_examples/impala/pendulum_impala.py b/rllib/examples/algorithms/impala/pendulum_impala.py
similarity index 100%
rename from rllib/tuned_examples/impala/pendulum_impala.py
rename to rllib/examples/algorithms/impala/pendulum_impala.py
diff --git a/rllib/tuned_examples/impala/pong_impala.py b/rllib/examples/algorithms/impala/pong_impala.py
similarity index 100%
rename from rllib/tuned_examples/impala/pong_impala.py
rename to rllib/examples/algorithms/impala/pong_impala.py
diff --git a/rllib/tuned_examples/impala/pong_impala_pb2_hyperopt.py b/rllib/examples/algorithms/impala/pong_impala_pb2_hyperopt.py
similarity index 100%
rename from rllib/tuned_examples/impala/pong_impala_pb2_hyperopt.py
rename to rllib/examples/algorithms/impala/pong_impala_pb2_hyperopt.py
diff --git a/rllib/tuned_examples/impala/stateless_cartpole_impala.py b/rllib/examples/algorithms/impala/stateless_cartpole_impala.py
similarity index 100%
rename from rllib/tuned_examples/impala/stateless_cartpole_impala.py
rename to rllib/examples/algorithms/impala/stateless_cartpole_impala.py
diff --git a/rllib/tuned_examples/iql/pendulum_iql.py b/rllib/examples/algorithms/iql/pendulum_iql.py
similarity index 98%
rename from rllib/tuned_examples/iql/pendulum_iql.py
rename to rllib/examples/algorithms/iql/pendulum_iql.py
index 864d14a7fe63..eea94390d0fb 100644
--- a/rllib/tuned_examples/iql/pendulum_iql.py
+++ b/rllib/examples/algorithms/iql/pendulum_iql.py
@@ -24,7 +24,7 @@
# Define the data paths.
data_path = "offline/tests/data/pendulum/pendulum-v1_enormous"
-base_path = Path(__file__).parents[2]
+base_path = Path(__file__).parents[3]
print(f"base_path={base_path}")
data_path = "local://" / base_path / data_path
print(f"data_path={data_path}")
diff --git a/rllib/tuned_examples/marwil/cartpole_marwil.py b/rllib/examples/algorithms/marwil/cartpole_marwil.py
similarity index 98%
rename from rllib/tuned_examples/marwil/cartpole_marwil.py
rename to rllib/examples/algorithms/marwil/cartpole_marwil.py
index 1a38519e1565..dd20d9aadcb7 100644
--- a/rllib/tuned_examples/marwil/cartpole_marwil.py
+++ b/rllib/examples/algorithms/marwil/cartpole_marwil.py
@@ -24,7 +24,7 @@
# Define the data paths.
data_path = "offline/tests/data/cartpole/cartpole-v1_large"
-base_path = Path(__file__).parents[2]
+base_path = Path(__file__).parents[3]
print(f"base_path={base_path}")
data_path = "local://" / base_path / data_path
print(f"data_path={data_path}")
diff --git a/rllib/tuned_examples/ppo/atari_ppo.py b/rllib/examples/algorithms/ppo/atari_ppo.py
similarity index 100%
rename from rllib/tuned_examples/ppo/atari_ppo.py
rename to rllib/examples/algorithms/ppo/atari_ppo.py
diff --git a/rllib/tuned_examples/ppo/benchmark_ppo_mujoco.py b/rllib/examples/algorithms/ppo/benchmark_ppo_mujoco.py
similarity index 100%
rename from rllib/tuned_examples/ppo/benchmark_ppo_mujoco.py
rename to rllib/examples/algorithms/ppo/benchmark_ppo_mujoco.py
diff --git a/rllib/tuned_examples/ppo/cartpole_heavy_ppo.py b/rllib/examples/algorithms/ppo/cartpole_heavy_ppo.py
similarity index 100%
rename from rllib/tuned_examples/ppo/cartpole_heavy_ppo.py
rename to rllib/examples/algorithms/ppo/cartpole_heavy_ppo.py
diff --git a/rllib/tuned_examples/ppo/cartpole_ppo.py b/rllib/examples/algorithms/ppo/cartpole_ppo.py
similarity index 100%
rename from rllib/tuned_examples/ppo/cartpole_ppo.py
rename to rllib/examples/algorithms/ppo/cartpole_ppo.py
diff --git a/rllib/tuned_examples/ppo/cartpole_truncated_ppo.py b/rllib/examples/algorithms/ppo/cartpole_truncated_ppo.py
similarity index 100%
rename from rllib/tuned_examples/ppo/cartpole_truncated_ppo.py
rename to rllib/examples/algorithms/ppo/cartpole_truncated_ppo.py
diff --git a/rllib/tuned_examples/ppo/memory_leak_test_ppo_new_stack.py b/rllib/examples/algorithms/ppo/memory_leak_test_ppo_new_stack.py
similarity index 100%
rename from rllib/tuned_examples/ppo/memory_leak_test_ppo_new_stack.py
rename to rllib/examples/algorithms/ppo/memory_leak_test_ppo_new_stack.py
diff --git a/rllib/tuned_examples/ppo/multi_agent_cartpole_ppo.py b/rllib/examples/algorithms/ppo/multi_agent_cartpole_ppo.py
similarity index 100%
rename from rllib/tuned_examples/ppo/multi_agent_cartpole_ppo.py
rename to rllib/examples/algorithms/ppo/multi_agent_cartpole_ppo.py
diff --git a/rllib/tuned_examples/ppo/multi_agent_footsies_ppo.py b/rllib/examples/algorithms/ppo/multi_agent_footsies_ppo.py
similarity index 100%
rename from rllib/tuned_examples/ppo/multi_agent_footsies_ppo.py
rename to rllib/examples/algorithms/ppo/multi_agent_footsies_ppo.py
diff --git a/rllib/tuned_examples/ppo/multi_agent_pendulum_ppo.py b/rllib/examples/algorithms/ppo/multi_agent_pendulum_ppo.py
similarity index 100%
rename from rllib/tuned_examples/ppo/multi_agent_pendulum_ppo.py
rename to rllib/examples/algorithms/ppo/multi_agent_pendulum_ppo.py
diff --git a/rllib/tuned_examples/ppo/multi_agent_stateless_cartpole_ppo.py b/rllib/examples/algorithms/ppo/multi_agent_stateless_cartpole_ppo.py
similarity index 100%
rename from rllib/tuned_examples/ppo/multi_agent_stateless_cartpole_ppo.py
rename to rllib/examples/algorithms/ppo/multi_agent_stateless_cartpole_ppo.py
diff --git a/rllib/tuned_examples/ppo/pendulum_ppo.py b/rllib/examples/algorithms/ppo/pendulum_ppo.py
similarity index 100%
rename from rllib/tuned_examples/ppo/pendulum_ppo.py
rename to rllib/examples/algorithms/ppo/pendulum_ppo.py
diff --git a/rllib/tuned_examples/ppo/stateless_cartpole_ppo.py b/rllib/examples/algorithms/ppo/stateless_cartpole_ppo.py
similarity index 100%
rename from rllib/tuned_examples/ppo/stateless_cartpole_ppo.py
rename to rllib/examples/algorithms/ppo/stateless_cartpole_ppo.py
diff --git a/rllib/tuned_examples/sac/benchmark_sac_mujoco.py b/rllib/examples/algorithms/sac/benchmark_sac_mujoco.py
similarity index 100%
rename from rllib/tuned_examples/sac/benchmark_sac_mujoco.py
rename to rllib/examples/algorithms/sac/benchmark_sac_mujoco.py
diff --git a/rllib/tuned_examples/sac/halfcheetah_sac.py b/rllib/examples/algorithms/sac/halfcheetah_sac.py
similarity index 100%
rename from rllib/tuned_examples/sac/halfcheetah_sac.py
rename to rllib/examples/algorithms/sac/halfcheetah_sac.py
diff --git a/rllib/tuned_examples/sac/humanoid_sac.py b/rllib/examples/algorithms/sac/humanoid_sac.py
similarity index 100%
rename from rllib/tuned_examples/sac/humanoid_sac.py
rename to rllib/examples/algorithms/sac/humanoid_sac.py
diff --git a/rllib/tuned_examples/sac/mountaincar_sac.py b/rllib/examples/algorithms/sac/mountaincar_sac.py
similarity index 100%
rename from rllib/tuned_examples/sac/mountaincar_sac.py
rename to rllib/examples/algorithms/sac/mountaincar_sac.py
diff --git a/rllib/tuned_examples/sac/multi_agent_pendulum_sac.py b/rllib/examples/algorithms/sac/multi_agent_pendulum_sac.py
similarity index 100%
rename from rllib/tuned_examples/sac/multi_agent_pendulum_sac.py
rename to rllib/examples/algorithms/sac/multi_agent_pendulum_sac.py
diff --git a/rllib/tuned_examples/sac/pendulum_sac.py b/rllib/examples/algorithms/sac/pendulum_sac.py
similarity index 100%
rename from rllib/tuned_examples/sac/pendulum_sac.py
rename to rllib/examples/algorithms/sac/pendulum_sac.py
diff --git a/rllib/examples/multi_agent/self_play_footsies.py b/rllib/examples/multi_agent/self_play_footsies.py
index 2cc5213eced2..da9047aac019 100644
--- a/rllib/examples/multi_agent/self_play_footsies.py
+++ b/rllib/examples/multi_agent/self_play_footsies.py
@@ -2,14 +2,14 @@
Multi-agent RLlib Footsies Simplified Example (PPO)
About:
- - This example as a simplified version of "rllib/tuned_examples/ppo/multi_agent_footsies_ppo.py",
+ - This example as a simplified version of "rllib/examples/ppo/multi_agent_footsies_ppo.py",
which has more detailed comments and instructions. Please refer to that example for more information.
- This example is created to test the self-play training progression with footsies.
- Simplified version runs with single learner (cpu), single env runner, and single eval env runner.
"""
from pathlib import Path
-from ray.rllib.tuned_examples.ppo.multi_agent_footsies_ppo import (
+from ray.rllib.examples.algorithms.ppo.multi_agent_footsies_ppo import (
config,
env_creator,
stop,
diff --git a/rllib/examples/offline_rl/custom_input_api.py b/rllib/examples/offline_rl/custom_input_api.py
index d7a6974825fb..77144568f558 100644
--- a/rllib/examples/offline_rl/custom_input_api.py
+++ b/rllib/examples/offline_rl/custom_input_api.py
@@ -87,7 +87,7 @@ def input_creator(ioctx: IOContext) -> InputReader:
# we register our custom input creator with this convenient function
register_input("custom_input", input_creator)
- # Config modified from rllib/tuned_examples/cql/pendulum-cql.yaml
+ # Config modified from rllib/examples/algorithms/cql/pendulum-cql.yaml
default_config = get_trainable_cls(args.run).get_default_config()
config = (
default_config.environment("Pendulum-v1", clip_actions=True)
diff --git a/rllib/examples/offline_rl/offline_rl.py b/rllib/examples/offline_rl/offline_rl.py
index 1e8b1158221e..9abd7cbd93df 100644
--- a/rllib/examples/offline_rl/offline_rl.py
+++ b/rllib/examples/offline_rl/offline_rl.py
@@ -9,7 +9,7 @@
Generate the offline json file by running an SAC algo until it reaches expert
level on your command line. For example:
$ cd ray
-$ rllib train -f rllib/tuned_examples/sac/pendulum-sac.yaml --no-ray-ui
+$ rllib train -f rllib/examples/algorithms/sac/pendulum-sac.yaml --no-ray-ui
Also make sure that in the above SAC yaml file (pendulum-sac.yaml),
you specify an additional "output" key with any path on your local
@@ -55,7 +55,7 @@
if __name__ == "__main__":
args = parser.parse_args()
- # See rllib/tuned_examples/cql/pendulum-cql.yaml for comparison.
+ # See rllib/examples/algorithms/cql/pendulum-cql.yaml for comparison.
config = (
cql.CQLConfig()
.api_stack(
diff --git a/rllib/tuned_examples/__init__.py b/rllib/tuned_examples/__init__.py
deleted file mode 100644
index e69de29bb2d1..000000000000
diff --git a/rllib/tuned_examples/cleanup_experiment.py b/rllib/tuned_examples/cleanup_experiment.py
deleted file mode 100644
index 749d3ed5e522..000000000000
--- a/rllib/tuned_examples/cleanup_experiment.py
+++ /dev/null
@@ -1,187 +0,0 @@
-"""
-This script automates cleaning up a benchmark/experiment run of some algo
-against some config (with possibly more than one tune trial,
-e.g. torch=grid_search([True, False])).
-
-Run `python cleanup_experiment.py --help` for more information.
-
-Use on an input directory with trial contents e.g.:
-..
-IMPALA_BreakoutNoFrameskip-v4_0_use_pytorch=False_2020-05-11_10-17-54topr3h9k
-IMPALA_BreakoutNoFrameskip-v4_0_use_pytorch=False_2020-05-11_13-59-35dqaetxnf
-IMPALA_BreakoutNoFrameskip-v4_0_use_pytorch=False_2020-05-11_17-21-28tbhedw72
-IMPALA_BreakoutNoFrameskip-v4_2_use_pytorch=True_2020-05-11_10-17-54lv20cgn_
-IMPALA_BreakoutNoFrameskip-v4_2_use_pytorch=True_2020-05-11_13-59-35kwzhax_y
-IMPALA_BreakoutNoFrameskip-v4_2_use_pytorch=True_2020-05-11_17-21-28a5j0s7za
-
-Then run:
->> python cleanup_experiment.py --experiment-dir [parent dir w/ trial sub-dirs]
->> --output-dir [your out dir] --results-filter dumb_col_2,superfluous_col3
->> --results-max-size [max results file size in kb before(!) zipping]
-
-The script will create one output sub-dir for each trial and only copy
-the configuration and the csv results (filtered and every nth row removed
-based on the given args).
-"""
-
-import argparse
-import json
-import os
-import re
-import shutil
-
-import yaml
-
-parser = argparse.ArgumentParser()
-parser.add_argument(
- "--experiment-dir",
- type=str,
- help="Experiment dir in which all sub-runs (seeds) are "
- "located (as sub-dirs). Each sub0-run dir must contain the files: "
- "params.json and progress.csv.",
-)
-parser.add_argument(
- "--output-dir",
- type=str,
- help="The output dir, in which the cleaned up output will be placed.",
-)
-parser.add_argument(
- "--results-filter",
- type=str,
- help="comma-separated list of csv fields to exclude.",
- default="experiment_id,pid,hostname,node_ip,trial_id,hist_stats/episode_"
- "reward,hist_stats/episode_lengths,experiment_tag",
-)
-parser.add_argument(
- "--results-max-size",
- type=int,
- help="the max. size of the final results.csv file (in kb). Will erase "
- "every nth line in the original input to reach that goal. "
- "Use 0 for no limit (default=100).",
- default=100,
-)
-
-
-def process_single_run(in_dir, out_dir):
- exp_dir = os.listdir(in_dir)
-
- # Make sure trials dir is ok.
- assert (
- "params.json" in exp_dir and "progress.csv" in exp_dir
- ), "params.json or progress.csv not found in {}!".format(in_dir)
-
- os.makedirs(out_dir, exist_ok=True)
-
- for file in exp_dir:
- absfile = os.path.join(in_dir, file)
- # Config file -> Convert to yaml and move to output dir.
- if file == "params.json":
- assert os.path.isfile(absfile), "{} not a file!".format(file)
- with open(absfile) as fp:
- contents = json.load(fp)
- with open(os.path.join(out_dir, "config.yaml"), "w") as fp:
- yaml.dump(contents, fp)
- # Progress csv file -> Filter out some columns, cut, and write to
- # output_dir.
- elif file == "progress.csv":
- assert os.path.isfile(absfile), "{} not a file!".format(file)
- col_idx_to_filter = []
- with open(absfile) as fp:
- # Get column names.
- col_names_orig = fp.readline().strip().split(",")
- # Split by comma (abiding to quotes), filter out
- # unwanted columns, then write to disk.
- cols_to_filter = args.results_filter.split(",")
- for i, c in enumerate(col_names_orig):
- if c in cols_to_filter:
- col_idx_to_filter.insert(0, i)
- col_names = col_names_orig.copy()
- for idx in col_idx_to_filter:
- col_names.pop(idx)
- absfile_out = os.path.join(out_dir, "progress.csv")
- with open(absfile_out, "w") as out_fp:
- print(",".join(col_names), file=out_fp)
- while True:
- line = fp.readline().strip()
- if not line:
- break
- line = re.sub(
- "(,{2,})",
- lambda m: ",None" * (len(m.group()) - 1) + ",",
- line,
- )
- cols = re.findall('".+?"|[^,]+', line)
- if len(cols) != len(col_names_orig):
- continue
- for idx in col_idx_to_filter:
- cols.pop(idx)
- print(",".join(cols), file=out_fp)
-
- # Reduce the size of the output file if necessary.
- out_size = os.path.getsize(absfile_out)
- max_size = args.results_max_size * 1024
- if 0 < max_size < out_size:
- # Figure out roughly every which line we have to drop.
- ratio = out_size / max_size
- # If ratio > 2.0, we'll have to keep only every nth line.
- if ratio > 2.0:
- nth = out_size // max_size
- os.system(
- "awk 'NR==1||NR%{}==0' {} > {}.new".format(
- nth, absfile_out, absfile_out
- )
- )
- # If ratio < 2.0 (>1.0), we'll have to drop every nth line.
- else:
- nth = out_size // (out_size - max_size)
- os.system(
- "awk 'NR==1||NR%{}!=0' {} > {}.new".format(
- nth, absfile_out, absfile_out
- )
- )
- os.remove(absfile_out)
- os.rename(absfile_out + ".new", absfile_out)
-
- # Zip progress.csv into results.zip.
- zip_file = os.path.join(out_dir, "results.zip")
- try:
- os.remove(zip_file)
- except FileNotFoundError:
- pass
- os.system(
- "zip -j {} {}".format(zip_file, os.path.join(out_dir, "progress.csv"))
- )
- os.remove(os.path.join(out_dir, "progress.csv"))
-
- # TBX events file -> Move as is.
- elif re.search("^(events\\.out\\.|params\\.pkl)", file):
- assert os.path.isfile(absfile), "{} not a file!".format(file)
- shutil.copyfile(absfile, os.path.join(out_dir, file))
-
-
-if __name__ == "__main__":
- args = parser.parse_args()
- exp_dir = os.listdir(args.experiment_dir)
- # Loop through all sub-directories.
- for i, sub_run in enumerate(sorted(exp_dir)):
- abspath = os.path.join(args.experiment_dir, sub_run)
- # This is a seed run.
- if os.path.isdir(abspath) and re.search(
- "^(\\w+?)_(\\w+?-v\\d+)(_\\d+)", sub_run
- ):
- # Create meaningful output dir name:
- # [algo]_[env]_[trial #]_[trial-config]_[date YYYY-MM-DD].
- cleaned_up_out = re.sub(
- "^(\\w+?)_(\\w+?-v\\d+)(_\\d+)(_.+)?(_\\d{4}-\\d{2}-\\d{2})"
- "_\\d{2}-\\d{2}-\\w+",
- "{:02}_\\1_\\2\\4\\5".format(i),
- sub_run,
- )
- # Remove superflous `env=` specifier (anv always included in name).
- cleaned_up_out = re.sub(
- "^(.+)env=\\w+?-v\\d+,?(.+)", "\\1\\2", cleaned_up_out
- )
- out_path = os.path.join(args.output_dir, cleaned_up_out)
- process_single_run(abspath, out_path)
- # Done.
- print("done")
diff --git a/rllib/tuned_examples/dreamerv3/__init__.py b/rllib/tuned_examples/dreamerv3/__init__.py
deleted file mode 100644
index e69de29bb2d1..000000000000
diff --git a/rllib/tuned_examples/ppo/benchmark_ppo_mujoco_pb2.py b/rllib/tuned_examples/ppo/benchmark_ppo_mujoco_pb2.py
deleted file mode 100644
index 51e9d2d2b3ef..000000000000
--- a/rllib/tuned_examples/ppo/benchmark_ppo_mujoco_pb2.py
+++ /dev/null
@@ -1,172 +0,0 @@
-import time
-
-from ray import tune
-from ray.rllib.algorithms.ppo.ppo import PPOConfig
-from ray.rllib.utils.metrics import NUM_ENV_STEPS_SAMPLED_LIFETIME
-from ray.tune.schedulers.pb2 import PB2
-
-# Needs the following packages to be installed on Ubuntu:
-# sudo apt-get libosmesa-dev
-# sudo apt-get install patchelf
-# python -m pip install "gymnasium[mujoco]"
-# Might need to be added to bashsrc:
-# export MUJOCO_GL=osmesa"
-# export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$HOME/.mujoco/mujoco200/bin"
-
-# See the following links for becnhmark results of other libraries:
-# Original paper: https://arxiv.org/abs/1812.05905
-# CleanRL: https://wandb.ai/cleanrl/cleanrl.benchmark/reports/Mujoco--VmlldzoxODE0NjE
-# AgileRL: https://github.com/AgileRL/AgileRL?tab=readme-ov-file#benchmarks
-benchmark_envs = {
- "HalfCheetah-v4": {
- f"{NUM_ENV_STEPS_SAMPLED_LIFETIME}": 1000000,
- },
- "Hopper-v4": {
- f"{NUM_ENV_STEPS_SAMPLED_LIFETIME}": 1000000,
- },
- "InvertedPendulum-v4": {
- f"{NUM_ENV_STEPS_SAMPLED_LIFETIME}": 1000000,
- },
- "InvertedDoublePendulum-v4": {
- f"{NUM_ENV_STEPS_SAMPLED_LIFETIME}": 1000000,
- },
- "Reacher-v4": {f"{NUM_ENV_STEPS_SAMPLED_LIFETIME}": 1000000},
- "Swimmer-v4": {f"{NUM_ENV_STEPS_SAMPLED_LIFETIME}": 1000000},
- "Walker2d-v4": {
- f"{NUM_ENV_STEPS_SAMPLED_LIFETIME}": 1000000,
- },
-}
-
-pb2_scheduler = PB2(
- time_attr=f"{NUM_ENV_STEPS_SAMPLED_LIFETIME}",
- metric="env_runners/episode_return_mean",
- mode="max",
- perturbation_interval=50000,
- # Copy bottom % with top % weights.
- quantile_fraction=0.25,
- hyperparam_bounds={
- "lr": [1e-5, 1e-3],
- "gamma": [0.95, 0.99],
- "lambda": [0.97, 1.0],
- "entropy_coeff": [0.0, 0.01],
- "vf_loss_coeff": [0.01, 1.0],
- "clip_param": [0.1, 0.3],
- "kl_target": [0.01, 0.03],
- "minibatch_size": [512, 4096],
- "num_epochs": [6, 32],
- "vf_share_layers": [False, True],
- "use_kl_loss": [False, True],
- "kl_coeff": [0.1, 0.4],
- "vf_clip_param": [10.0, float("inf")],
- "grad_clip": [40, 200],
- },
-)
-
-experiment_start_time = time.time()
-# Following the paper.
-num_rollout_workers = 32
-for env, stop_criteria in benchmark_envs.items():
- hp_trial_start_time = time.time()
- config = (
- PPOConfig()
- .environment(env=env)
- .env_runners(
- rollout_fragment_length=1,
- num_env_runners=num_rollout_workers,
- # TODO (sven, simon): Add resources.
- )
- .learners(
- # Let's start with a small number of learner workers and
- # add later a tune grid search for these resources.
- # TODO (simon): Either add tune grid search here or make
- # an extra script to only test scalability.
- num_learners=1,
- num_gpus_per_learner=1,
- )
- # TODO (simon): Adjust to new model_config_dict.
- .training(
- lr=tune.uniform(1e-5, 1e-3),
- gamma=tune.uniform(0.95, 0.99),
- lambda_=tune.uniform(0.97, 1.0),
- entropy_coeff=tune.choice([0.0, 0.01]),
- vf_loss_coeff=tune.uniform(0.01, 1.0),
- clip_param=tune.uniform(0.1, 0.3),
- kl_target=tune.uniform(0.01, 0.03),
- minibatch_size=tune.choice([512, 1024, 2048, 4096]),
- num_epochs=tune.randint(6, 32),
- vf_share_layers=tune.choice([True, False]),
- use_kl_loss=tune.choice([True, False]),
- kl_coeff=tune.uniform(0.1, 0.4),
- vf_clip_param=tune.choice([10.0, 40.0, float("inf")]),
- grad_clip=tune.choice([None, 40, 100, 200]),
- train_batch_size=tune.sample_from(
- lambda spec: spec.config["minibatch_size"] * num_rollout_workers
- ),
- model={
- "fcnet_hiddens": [64, 64],
- "fcnet_activation": "tanh",
- "vf_share_layers": True,
- },
- )
- .reporting(
- metrics_num_episodes_for_smoothing=5,
- min_sample_timesteps_per_iteration=1000,
- )
- .evaluation(
- evaluation_duration="auto",
- evaluation_interval=1,
- evaluation_num_env_runners=1,
- evaluation_parallel_to_training=True,
- evaluation_config={
- # PPO learns stochastic policy.
- "explore": False,
- },
- )
- )
-
- tuner = tune.Tuner(
- "PPO",
- param_space=config,
- run_config=tune.RunConfig(
- stop=stop_criteria,
- name="benchmark_ppo_mujoco_pb2_" + env,
- ),
- tune_config=tune.TuneConfig(
- scheduler=pb2_scheduler,
- num_samples=8,
- ),
- )
- result_grid = tuner.fit()
- best_result = result_grid.get_best_result()
- print(
- f"Finished running HP search for (env={env}) in "
- f"{time.time() - hp_trial_start_time} seconds."
- )
- print(f"Best result for {env}: {best_result}")
- print(f"Best config for {env}: {best_result['config']}")
-
- # Run again with the best config.
- best_trial_start_time = time.time()
- tuner = tune.Tuner(
- "PPO",
- param_space=best_result.config,
- run_config=tune.RunConfig(
- stop=stop_criteria,
- name="benchmark_ppo_mujoco_pb2_" + env + "_best",
- ),
- )
- print(f"Running best config for (env={env})...")
- tuner.fit()
- print(
- f"Finished running best config for (env={env}) "
- f"in {time.time() - best_trial_start_time} seconds."
- )
-
-print(
- f"Finished running HP search on all MuJoCo benchmarks in "
- f"{time.time() - experiment_start_time} seconds."
-)
-print(
- "Results from running the best configs can be found in the "
- "`benchmark_ppo_mujoco_pb2__best` directories."
-)
diff --git a/rllib/tuned_examples/sac/benchmark_sac_mujoco_pb2.py b/rllib/tuned_examples/sac/benchmark_sac_mujoco_pb2.py
deleted file mode 100644
index f768dddf03b0..000000000000
--- a/rllib/tuned_examples/sac/benchmark_sac_mujoco_pb2.py
+++ /dev/null
@@ -1,165 +0,0 @@
-import time
-
-from ray import tune
-from ray.rllib.algorithms.sac.sac import SACConfig
-from ray.rllib.utils.metrics import (
- ENV_RUNNER_RESULTS,
- EPISODE_RETURN_MEAN,
- NUM_ENV_STEPS_SAMPLED_LIFETIME,
-)
-from ray.tune.schedulers.pb2 import PB2
-
-# Needs the following packages to be installed on Ubuntu:
-# sudo apt-get libosmesa-dev
-# sudo apt-get install patchelf
-# python -m pip install "gymnasium[mujoco]"
-# Might need to be added to bashsrc:
-# export MUJOCO_GL=osmesa"
-# export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$HOME/.mujoco/mujoco200/bin"
-
-# See the following links for becnhmark results of other libraries:
-# Original paper: https://arxiv.org/abs/1812.05905
-# CleanRL: https://wandb.ai/cleanrl/cleanrl.benchmark/reports/Mujoco--VmlldzoxODE0NjE
-# AgileRL: https://github.com/AgileRL/AgileRL?tab=readme-ov-file#benchmarks
-benchmark_envs = {
- "HalfCheetah-v4": {
- f"{NUM_ENV_STEPS_SAMPLED_LIFETIME}": 3000000,
- },
- "Hopper-v4": {
- f"{NUM_ENV_STEPS_SAMPLED_LIFETIME}": 1000000,
- },
- "Humanoid-v4": {
- f"{NUM_ENV_STEPS_SAMPLED_LIFETIME}": 10000000,
- },
- "Ant-v4": {f"{NUM_ENV_STEPS_SAMPLED_LIFETIME}": 3000000},
- "Walker2d-v4": {
- f"{NUM_ENV_STEPS_SAMPLED_LIFETIME}": 3000000,
- },
-}
-
-pb2_scheduler = PB2(
- time_attr=NUM_ENV_STEPS_SAMPLED_LIFETIME,
- metric=f"{ENV_RUNNER_RESULTS}/{EPISODE_RETURN_MEAN}",
- mode="max",
- perturbation_interval=50000,
- # Copy bottom % with top % weights.
- quantile_fraction=0.25,
- hyperparam_bounds={
- "actor_lr": [1e-5, 1e-3],
- "critic_lr": [1e-6, 1e-4],
- "alpha_lr": [1e-6, 1e-3],
- "gamma": [0.95, 0.99],
- "n_step": [1, 3],
- "initial_alpha": [1.0, 1.5],
- "tau": [0.001, 0.1],
- "target_entropy": [-10, -1],
- "train_batch_size": [128, 512],
- "target_network_update_freq": [1, 4],
- },
-)
-
-experiment_start_time = time.time()
-for env, stop_criteria in benchmark_envs.items():
- hp_trial_start_time = time.time()
- config = (
- SACConfig()
- .environment(env=env)
- .env_runners(
- rollout_fragment_length="auto",
- num_env_runners=1,
- # TODO (sven, simon): Add resources.
- )
- .learners(
- # Note, we have a small batch and a sample/train ratio
- # of 1:1, so a single GPU should be enough.
- num_learners=1,
- num_gpus_per_learner=1,
- )
- # TODO (simon): Adjust to new model_config_dict.
- .training(
- initial_alpha=tune.choice([1.0, 1.5]),
- actor_lr=tune.uniform(1e-5, 1e-3),
- critic_lr=tune.uniform([1e-6, 1e-4]),
- alpha_lr=tune.uniform([1e-6, 1e-3]),
- target_entropy=tune.choice([-10, -5, -1, "auto"]),
- n_step=tune.choice([1, 3, (1, 3)]),
- tau=tune.uniform(0.001, 0.1),
- train_batch_size=tune.choice([128, 256, 512]),
- target_network_update_freq=tune.choice([1, 2, 4]),
- replay_buffer_config={
- "type": "PrioritizedEpisodeReplayBuffer",
- "capacity": 1000000,
- "alpha": 0.6,
- "beta": 0.4,
- },
- num_steps_sampled_before_learning_starts=256,
- model={
- "fcnet_hiddens": [256, 256],
- "fcnet_activation": "relu",
- "post_fcnet_hiddens": [],
- "post_fcnet_activation": None,
- "post_fcnet_weights_initializer": "orthogonal_",
- "post_fcnet_weights_initializer_config": {"gain": 0.01},
- },
- )
- .reporting(
- metrics_num_episodes_for_smoothing=5,
- min_sample_timesteps_per_iteration=1000,
- )
- .evaluation(
- evaluation_duration="auto",
- evaluation_interval=1,
- evaluation_num_env_runners=1,
- evaluation_parallel_to_training=True,
- evaluation_config={
- "explore": False,
- },
- )
- )
-
- tuner = tune.Tuner(
- "SAC",
- param_space=config,
- run_config=tune.RunConfig(
- stop=stop_criteria,
- name="benchmark_sac_mujoco_pb2_" + env,
- ),
- tune_config=tune.TuneConfig(
- scheduler=pb2_scheduler,
- num_samples=8,
- ),
- )
- result_grid = tuner.fit()
- best_result = result_grid.get_best_result()
- print(
- f"Finished running HP search for (env={env}) in "
- f"{time.time() - hp_trial_start_time} seconds."
- )
- print(f"Best result for {env}: {best_result}")
- print(f"Best config for {env}: {best_result['config']}")
-
- # Run again with the best config.
- best_trial_start_time = time.time()
- tuner = tune.Tuner(
- "SAC",
- param_space=best_result.config,
- run_config=tune.RunConfig(
- stop=stop_criteria,
- name="benchmark_sac_mujoco_pb2_" + env + "_best",
- ),
- )
- print(f"Running best config for (env={env})...")
- tuner.fit()
- print(
- f"Finished running best config for (env={env}) "
- f"in {time.time() - best_trial_start_time} seconds."
- )
-
-print(
- f"Finished running HP search on all MuJoCo benchmarks in "
- f"{time.time() - experiment_start_time} seconds."
-)
-print(
- "Results from running the best configs can be found in the "
- "`benchmark_sac_mujoco_pb2__best` directories."
-)
diff --git a/rllib/utils/tests/run_memory_leak_tests.py b/rllib/utils/tests/run_memory_leak_tests.py
index 8685049fb03d..598026d76a38 100644
--- a/rllib/utils/tests/run_memory_leak_tests.py
+++ b/rllib/utils/tests/run_memory_leak_tests.py
@@ -11,9 +11,9 @@
# tags = ["memory_leak_tests"],
# size = "medium", # 5min timeout
# srcs = ["tests/test_memory_leak.py"],
-# data = glob(["tuned_examples/ppo/*.yaml"]),
+# data = glob(["examples/algorithms/ppo/*.yaml"]),
# # Pass `BAZEL` option and the path to look for yaml files.
-# args = ["BAZEL", "tuned_examples/ppo/memory-leak-test-ppo.yaml"]
+# args = ["BAZEL", "examples/algorithms/ppo/memory-leak-test-ppo.yaml"]
# )
import argparse