From 75bdc1d44df113c34d851505dc1f7a5fd7850748 Mon Sep 17 00:00:00 2001
From: Mark Towers <mark@anyscale.com>
Date: Fri, 21 Nov 2025 17:06:44 +0000
Subject: [PATCH 1/9] [rllib] Merge tuned-examples into examples

Signed-off-by: Mark Towers <mark@anyscale.com>
---
 .../algorithms}/atari-dist-dqn.yaml           |   0
 .../_old_api_stack/algorithms}/atari-dqn.yaml |   0
 .../algorithms}/atari-duel-ddqn.yaml          |   0
 .../algorithms}/atari-impala-large.yaml       |   0
 .../algorithms}/atari-impala-multi-gpu.yaml   |   0
 .../algorithms}/atari-impala.yaml             |   0
 .../_old_api_stack/algorithms}/atari-sac.yaml |   0
 .../cartpole-appo-separate-losses.py          |   0
 .../algorithms}/cartpole-bc.yaml              |   0
 ...hing-and-stalling-recreate-workers-appo.py |   0
 ...cartpole-crashing-recreate-workers-appo.py |   0
 .../algorithms}/cartpole-dqn-fake-gpus.yaml   |   0
 .../algorithms}/cartpole-dqn-param-noise.yaml |   0
 .../algorithms}/cartpole-dqn-softq.yaml       |   0
 .../algorithms}/cartpole-dqn.yaml             |   0
 .../algorithms}/cartpole-marwil.yaml          |   0
 .../algorithms}/cartpole-sac.yaml             |   0
 .../algorithms}/frozenlake-appo-vtrace.yaml   |   0
 .../algorithms}/halfcheetah-bc.yaml           |   0
 .../algorithms}/halfcheetah-cql.yaml          |   0
 .../algorithms}/halfcheetah-ppo.yaml          |   0
 .../_old_api_stack/algorithms}/hopper-bc.yaml |   0
 .../algorithms}/hopper-cql.yaml               |   0
 .../algorithms}/hopper-ppo.yaml               |   0
 .../algorithms}/humanoid-ppo-gae.yaml         |   0
 .../algorithms}/humanoid-ppo.yaml             |   0
 .../algorithms}/memory-leak-test-appo.yaml    |   0
 .../algorithms}/memory-leak-test-dqn.yaml     |   0
 .../algorithms}/memory-leak-test-ppo.yaml     |   0
 .../algorithms}/memory-leak-test-sac.yaml     |   0
 .../algorithms}/mspacman-sac.yaml             |   0
 ...hing-and-stalling-recreate-workers-appo.py |   0
 ...cartpole-crashing-recreate-workers-appo.py |   0
 ...ulti-agent-cartpole-w-100-policies-appo.py |   0
 ...multi_agent_cartpole_appo_old_api_stack.py |   0
 .../algorithms}/pendulum-cql.yaml             |   0
 .../algorithms}/pendulum-sac.yaml             |   0
 .../pendulum-transformed-actions-ppo.yaml     |   0
 .../pendulum-transformed-actions-sac.yaml     |   0
 .../_old_api_stack/algorithms}/pong-dqn.yaml  |   0
 .../algorithms}/pong-impala-fast.yaml         |   0
 .../algorithms}/pong-impala-vectorized.yaml   |   0
 .../algorithms}/pong-impala.yaml              |   0
 .../algorithms}/pong-rainbow.yaml             |   0
 ...unity3d-soccer-strikers-vs-goalie-ppo.yaml |   0
 .../algorithms}/walker2d-ppo.yaml             |   0
 .../algorithms}/appo/cartpole_appo.py         |   0
 .../algorithms}/appo/halfcheetah_appo.py      |   0
 .../appo/multi_agent_cartpole_appo.py         |   0
 .../algorithms}/appo/multi_agent_pong_appo.py |   0
 .../multi_agent_stateless_cartpole_appo.py    |   0
 .../algorithms}/appo/pendulum_appo.py         |   0
 .../algorithms}/appo/pong_appo.py             |   0
 .../appo/stateless_cartpole_appo.py           |   0
 .../bc/benchmark_rlunplugged_atari_pong_bc.py |   0
 .../algorithms}/bc/cartpole_bc.py             |   0
 .../bc/cartpole_bc_with_offline_evaluation.py |   0
 .../algorithms}/bc/pendulum_bc.py             |   0
 .../algorithms}/cql/pendulum_cql.py           |   0
 .../algorithms}/dqn/benchmark_dqn_atari.py    |   0
 ...benchmark_dqn_atari_rllib_preprocessing.py |   0
 .../algorithms}/dqn/cartpole_dqn.py           |   0
 .../dqn/multi_agent_cartpole_dqn.py           |   0
 .../algorithms}/dqn/stateless_cartpole_dqn.py |   0
 .../dreamerv3/atari_100k_dreamerv3.py         |   0
 .../dreamerv3/atari_200M_dreamerv3.py         |   0
 .../dreamerv3/cartpole_dreamerv3.py           |   0
 .../dm_control_suite_vision_dreamerv3.py      |   0
 .../dreamerv3/flappy_bird_dreamerv3.py        |   0
 .../dreamerv3/frozenlake_2x2_dreamerv3.py     |   0
 .../frozenlake_4x4_deterministic_dreamerv3.py |   0
 .../dreamerv3/gymnasium_robotics_dreamerv3.py |   0
 .../dreamerv3/highway_env_dreamerv3.py        |   0
 .../dreamerv3/pendulum_dreamerv3.py           |   0
 .../impala/cartpole-impala-separate-losses.py |   0
 .../algorithms}/impala/cartpole_impala.py     |   0
 .../impala/heavy_cartpole_impala.py           |   0
 .../impala/multi_agent_cartpole_impala.py     |   0
 ...lti_agent_cartpole_impala_old_api_stack.py |   0
 .../multi_agent_stateless_cartpole_impala.py  |   0
 .../algorithms}/impala/pendulum_impala.py     |   0
 .../algorithms}/impala/pong_impala.py         |   0
 .../impala/pong_impala_pb2_hyperopt.py        |   0
 .../impala/stateless_cartpole_impala.py       |   0
 .../algorithms}/iql/pendulum_iql.py           |   0
 .../algorithms}/marwil/cartpole_marwil.py     |   0
 .../algorithms}/ppo/atari_ppo.py              |   0
 .../algorithms}/ppo/benchmark_ppo_mujoco.py   |   0
 .../algorithms}/ppo/cartpole_heavy_ppo.py     |   0
 .../algorithms}/ppo/cartpole_ppo.py           |   0
 .../algorithms}/ppo/cartpole_truncated_ppo.py |   0
 .../ppo/memory_leak_test_ppo_new_stack.py     |   0
 .../ppo/multi_agent_cartpole_ppo.py           |   0
 .../ppo/multi_agent_footsies_ppo.py           |   0
 .../ppo/multi_agent_pendulum_ppo.py           |   0
 .../ppo/multi_agent_stateless_cartpole_ppo.py |   0
 .../algorithms}/ppo/pendulum_ppo.py           |   0
 .../algorithms}/ppo/stateless_cartpole_ppo.py |   0
 .../algorithms}/sac/benchmark_sac_mujoco.py   |   0
 .../algorithms}/sac/halfcheetah_sac.py        |   0
 .../algorithms}/sac/humanoid_sac.py           |   0
 .../algorithms}/sac/mountaincar_sac.py        |   0
 .../sac/multi_agent_pendulum_sac.py           |   0
 .../algorithms}/sac/pendulum_sac.py           |   0
 rllib/tuned_examples/__init__.py              |   0
 rllib/tuned_examples/cleanup_experiment.py    | 187 ------------------
 .../compact-regression-test.yaml              | 157 ---------------
 rllib/tuned_examples/dreamerv3/__init__.py    |   0
 .../ppo/benchmark_ppo_mujoco_pb2.py           | 172 ----------------
 .../sac/benchmark_sac_mujoco_pb2.py           | 165 ----------------
 110 files changed, 681 deletions(-)
 rename rllib/{tuned_examples/dqn => examples/_old_api_stack/algorithms}/atari-dist-dqn.yaml (100%)
 rename rllib/{tuned_examples/dqn => examples/_old_api_stack/algorithms}/atari-dqn.yaml (100%)
 rename rllib/{tuned_examples/dqn => examples/_old_api_stack/algorithms}/atari-duel-ddqn.yaml (100%)
 rename rllib/{tuned_examples/impala => examples/_old_api_stack/algorithms}/atari-impala-large.yaml (100%)
 rename rllib/{tuned_examples/impala => examples/_old_api_stack/algorithms}/atari-impala-multi-gpu.yaml (100%)
 rename rllib/{tuned_examples/impala => examples/_old_api_stack/algorithms}/atari-impala.yaml (100%)
 rename rllib/{tuned_examples/sac => examples/_old_api_stack/algorithms}/atari-sac.yaml (100%)
 rename rllib/{tuned_examples/appo => examples/_old_api_stack/algorithms}/cartpole-appo-separate-losses.py (100%)
 rename rllib/{tuned_examples/bc => examples/_old_api_stack/algorithms}/cartpole-bc.yaml (100%)
 rename rllib/{tuned_examples/appo => examples/_old_api_stack/algorithms}/cartpole-crashing-and-stalling-recreate-workers-appo.py (100%)
 rename rllib/{tuned_examples/appo => examples/_old_api_stack/algorithms}/cartpole-crashing-recreate-workers-appo.py (100%)
 rename rllib/{tuned_examples/dqn => examples/_old_api_stack/algorithms}/cartpole-dqn-fake-gpus.yaml (100%)
 rename rllib/{tuned_examples/dqn => examples/_old_api_stack/algorithms}/cartpole-dqn-param-noise.yaml (100%)
 rename rllib/{tuned_examples/dqn => examples/_old_api_stack/algorithms}/cartpole-dqn-softq.yaml (100%)
 rename rllib/{tuned_examples/dqn => examples/_old_api_stack/algorithms}/cartpole-dqn.yaml (100%)
 rename rllib/{tuned_examples/marwil => examples/_old_api_stack/algorithms}/cartpole-marwil.yaml (100%)
 rename rllib/{tuned_examples/sac => examples/_old_api_stack/algorithms}/cartpole-sac.yaml (100%)
 rename rllib/{tuned_examples/appo => examples/_old_api_stack/algorithms}/frozenlake-appo-vtrace.yaml (100%)
 rename rllib/{tuned_examples/cql => examples/_old_api_stack/algorithms}/halfcheetah-bc.yaml (100%)
 rename rllib/{tuned_examples/cql => examples/_old_api_stack/algorithms}/halfcheetah-cql.yaml (100%)
 rename rllib/{tuned_examples/ppo => examples/_old_api_stack/algorithms}/halfcheetah-ppo.yaml (100%)
 rename rllib/{tuned_examples/cql => examples/_old_api_stack/algorithms}/hopper-bc.yaml (100%)
 rename rllib/{tuned_examples/cql => examples/_old_api_stack/algorithms}/hopper-cql.yaml (100%)
 rename rllib/{tuned_examples/ppo => examples/_old_api_stack/algorithms}/hopper-ppo.yaml (100%)
 rename rllib/{tuned_examples/ppo => examples/_old_api_stack/algorithms}/humanoid-ppo-gae.yaml (100%)
 rename rllib/{tuned_examples/ppo => examples/_old_api_stack/algorithms}/humanoid-ppo.yaml (100%)
 rename rllib/{tuned_examples/appo => examples/_old_api_stack/algorithms}/memory-leak-test-appo.yaml (100%)
 rename rllib/{tuned_examples/dqn => examples/_old_api_stack/algorithms}/memory-leak-test-dqn.yaml (100%)
 rename rllib/{tuned_examples/ppo => examples/_old_api_stack/algorithms}/memory-leak-test-ppo.yaml (100%)
 rename rllib/{tuned_examples/sac => examples/_old_api_stack/algorithms}/memory-leak-test-sac.yaml (100%)
 rename rllib/{tuned_examples/sac => examples/_old_api_stack/algorithms}/mspacman-sac.yaml (100%)
 rename rllib/{tuned_examples/appo => examples/_old_api_stack/algorithms}/multi-agent-cartpole-crashing-and-stalling-recreate-workers-appo.py (100%)
 rename rllib/{tuned_examples/appo => examples/_old_api_stack/algorithms}/multi-agent-cartpole-crashing-recreate-workers-appo.py (100%)
 rename rllib/{tuned_examples/appo => examples/_old_api_stack/algorithms}/multi-agent-cartpole-w-100-policies-appo.py (100%)
 rename rllib/{tuned_examples/appo => examples/_old_api_stack/algorithms}/multi_agent_cartpole_appo_old_api_stack.py (100%)
 rename rllib/{tuned_examples/cql => examples/_old_api_stack/algorithms}/pendulum-cql.yaml (100%)
 rename rllib/{tuned_examples/sac => examples/_old_api_stack/algorithms}/pendulum-sac.yaml (100%)
 rename rllib/{tuned_examples/ppo => examples/_old_api_stack/algorithms}/pendulum-transformed-actions-ppo.yaml (100%)
 rename rllib/{tuned_examples/sac => examples/_old_api_stack/algorithms}/pendulum-transformed-actions-sac.yaml (100%)
 rename rllib/{tuned_examples/dqn => examples/_old_api_stack/algorithms}/pong-dqn.yaml (100%)
 rename rllib/{tuned_examples/impala => examples/_old_api_stack/algorithms}/pong-impala-fast.yaml (100%)
 rename rllib/{tuned_examples/impala => examples/_old_api_stack/algorithms}/pong-impala-vectorized.yaml (100%)
 rename rllib/{tuned_examples/impala => examples/_old_api_stack/algorithms}/pong-impala.yaml (100%)
 rename rllib/{tuned_examples/dqn => examples/_old_api_stack/algorithms}/pong-rainbow.yaml (100%)
 rename rllib/{tuned_examples/ppo => examples/_old_api_stack/algorithms}/unity3d-soccer-strikers-vs-goalie-ppo.yaml (100%)
 rename rllib/{tuned_examples/ppo => examples/_old_api_stack/algorithms}/walker2d-ppo.yaml (100%)
 rename rllib/{tuned_examples => examples/algorithms}/appo/cartpole_appo.py (100%)
 rename rllib/{tuned_examples => examples/algorithms}/appo/halfcheetah_appo.py (100%)
 rename rllib/{tuned_examples => examples/algorithms}/appo/multi_agent_cartpole_appo.py (100%)
 rename rllib/{tuned_examples => examples/algorithms}/appo/multi_agent_pong_appo.py (100%)
 rename rllib/{tuned_examples => examples/algorithms}/appo/multi_agent_stateless_cartpole_appo.py (100%)
 rename rllib/{tuned_examples => examples/algorithms}/appo/pendulum_appo.py (100%)
 rename rllib/{tuned_examples => examples/algorithms}/appo/pong_appo.py (100%)
 rename rllib/{tuned_examples => examples/algorithms}/appo/stateless_cartpole_appo.py (100%)
 rename rllib/{tuned_examples => examples/algorithms}/bc/benchmark_rlunplugged_atari_pong_bc.py (100%)
 rename rllib/{tuned_examples => examples/algorithms}/bc/cartpole_bc.py (100%)
 rename rllib/{tuned_examples => examples/algorithms}/bc/cartpole_bc_with_offline_evaluation.py (100%)
 rename rllib/{tuned_examples => examples/algorithms}/bc/pendulum_bc.py (100%)
 rename rllib/{tuned_examples => examples/algorithms}/cql/pendulum_cql.py (100%)
 rename rllib/{tuned_examples => examples/algorithms}/dqn/benchmark_dqn_atari.py (100%)
 rename rllib/{tuned_examples => examples/algorithms}/dqn/benchmark_dqn_atari_rllib_preprocessing.py (100%)
 rename rllib/{tuned_examples => examples/algorithms}/dqn/cartpole_dqn.py (100%)
 rename rllib/{tuned_examples => examples/algorithms}/dqn/multi_agent_cartpole_dqn.py (100%)
 rename rllib/{tuned_examples => examples/algorithms}/dqn/stateless_cartpole_dqn.py (100%)
 rename rllib/{tuned_examples => examples/algorithms}/dreamerv3/atari_100k_dreamerv3.py (100%)
 rename rllib/{tuned_examples => examples/algorithms}/dreamerv3/atari_200M_dreamerv3.py (100%)
 rename rllib/{tuned_examples => examples/algorithms}/dreamerv3/cartpole_dreamerv3.py (100%)
 rename rllib/{tuned_examples => examples/algorithms}/dreamerv3/dm_control_suite_vision_dreamerv3.py (100%)
 rename rllib/{tuned_examples => examples/algorithms}/dreamerv3/flappy_bird_dreamerv3.py (100%)
 rename rllib/{tuned_examples => examples/algorithms}/dreamerv3/frozenlake_2x2_dreamerv3.py (100%)
 rename rllib/{tuned_examples => examples/algorithms}/dreamerv3/frozenlake_4x4_deterministic_dreamerv3.py (100%)
 rename rllib/{tuned_examples => examples/algorithms}/dreamerv3/gymnasium_robotics_dreamerv3.py (100%)
 rename rllib/{tuned_examples => examples/algorithms}/dreamerv3/highway_env_dreamerv3.py (100%)
 rename rllib/{tuned_examples => examples/algorithms}/dreamerv3/pendulum_dreamerv3.py (100%)
 rename rllib/{tuned_examples => examples/algorithms}/impala/cartpole-impala-separate-losses.py (100%)
 rename rllib/{tuned_examples => examples/algorithms}/impala/cartpole_impala.py (100%)
 rename rllib/{tuned_examples => examples/algorithms}/impala/heavy_cartpole_impala.py (100%)
 rename rllib/{tuned_examples => examples/algorithms}/impala/multi_agent_cartpole_impala.py (100%)
 rename rllib/{tuned_examples => examples/algorithms}/impala/multi_agent_cartpole_impala_old_api_stack.py (100%)
 rename rllib/{tuned_examples => examples/algorithms}/impala/multi_agent_stateless_cartpole_impala.py (100%)
 rename rllib/{tuned_examples => examples/algorithms}/impala/pendulum_impala.py (100%)
 rename rllib/{tuned_examples => examples/algorithms}/impala/pong_impala.py (100%)
 rename rllib/{tuned_examples => examples/algorithms}/impala/pong_impala_pb2_hyperopt.py (100%)
 rename rllib/{tuned_examples => examples/algorithms}/impala/stateless_cartpole_impala.py (100%)
 rename rllib/{tuned_examples => examples/algorithms}/iql/pendulum_iql.py (100%)
 rename rllib/{tuned_examples => examples/algorithms}/marwil/cartpole_marwil.py (100%)
 rename rllib/{tuned_examples => examples/algorithms}/ppo/atari_ppo.py (100%)
 rename rllib/{tuned_examples => examples/algorithms}/ppo/benchmark_ppo_mujoco.py (100%)
 rename rllib/{tuned_examples => examples/algorithms}/ppo/cartpole_heavy_ppo.py (100%)
 rename rllib/{tuned_examples => examples/algorithms}/ppo/cartpole_ppo.py (100%)
 rename rllib/{tuned_examples => examples/algorithms}/ppo/cartpole_truncated_ppo.py (100%)
 rename rllib/{tuned_examples => examples/algorithms}/ppo/memory_leak_test_ppo_new_stack.py (100%)
 rename rllib/{tuned_examples => examples/algorithms}/ppo/multi_agent_cartpole_ppo.py (100%)
 rename rllib/{tuned_examples => examples/algorithms}/ppo/multi_agent_footsies_ppo.py (100%)
 rename rllib/{tuned_examples => examples/algorithms}/ppo/multi_agent_pendulum_ppo.py (100%)
 rename rllib/{tuned_examples => examples/algorithms}/ppo/multi_agent_stateless_cartpole_ppo.py (100%)
 rename rllib/{tuned_examples => examples/algorithms}/ppo/pendulum_ppo.py (100%)
 rename rllib/{tuned_examples => examples/algorithms}/ppo/stateless_cartpole_ppo.py (100%)
 rename rllib/{tuned_examples => examples/algorithms}/sac/benchmark_sac_mujoco.py (100%)
 rename rllib/{tuned_examples => examples/algorithms}/sac/halfcheetah_sac.py (100%)
 rename rllib/{tuned_examples => examples/algorithms}/sac/humanoid_sac.py (100%)
 rename rllib/{tuned_examples => examples/algorithms}/sac/mountaincar_sac.py (100%)
 rename rllib/{tuned_examples => examples/algorithms}/sac/multi_agent_pendulum_sac.py (100%)
 rename rllib/{tuned_examples => examples/algorithms}/sac/pendulum_sac.py (100%)
 delete mode 100644 rllib/tuned_examples/__init__.py
 delete mode 100644 rllib/tuned_examples/cleanup_experiment.py
 delete mode 100644 rllib/tuned_examples/compact-regression-test.yaml
 delete mode 100644 rllib/tuned_examples/dreamerv3/__init__.py
 delete mode 100644 rllib/tuned_examples/ppo/benchmark_ppo_mujoco_pb2.py
 delete mode 100644 rllib/tuned_examples/sac/benchmark_sac_mujoco_pb2.py

diff --git a/rllib/tuned_examples/dqn/atari-dist-dqn.yaml b/rllib/examples/_old_api_stack/algorithms/atari-dist-dqn.yaml
similarity index 100%
rename from rllib/tuned_examples/dqn/atari-dist-dqn.yaml
rename to rllib/examples/_old_api_stack/algorithms/atari-dist-dqn.yaml
diff --git a/rllib/tuned_examples/dqn/atari-dqn.yaml b/rllib/examples/_old_api_stack/algorithms/atari-dqn.yaml
similarity index 100%
rename from rllib/tuned_examples/dqn/atari-dqn.yaml
rename to rllib/examples/_old_api_stack/algorithms/atari-dqn.yaml
diff --git a/rllib/tuned_examples/dqn/atari-duel-ddqn.yaml b/rllib/examples/_old_api_stack/algorithms/atari-duel-ddqn.yaml
similarity index 100%
rename from rllib/tuned_examples/dqn/atari-duel-ddqn.yaml
rename to rllib/examples/_old_api_stack/algorithms/atari-duel-ddqn.yaml
diff --git a/rllib/tuned_examples/impala/atari-impala-large.yaml b/rllib/examples/_old_api_stack/algorithms/atari-impala-large.yaml
similarity index 100%
rename from rllib/tuned_examples/impala/atari-impala-large.yaml
rename to rllib/examples/_old_api_stack/algorithms/atari-impala-large.yaml
diff --git a/rllib/tuned_examples/impala/atari-impala-multi-gpu.yaml b/rllib/examples/_old_api_stack/algorithms/atari-impala-multi-gpu.yaml
similarity index 100%
rename from rllib/tuned_examples/impala/atari-impala-multi-gpu.yaml
rename to rllib/examples/_old_api_stack/algorithms/atari-impala-multi-gpu.yaml
diff --git a/rllib/tuned_examples/impala/atari-impala.yaml b/rllib/examples/_old_api_stack/algorithms/atari-impala.yaml
similarity index 100%
rename from rllib/tuned_examples/impala/atari-impala.yaml
rename to rllib/examples/_old_api_stack/algorithms/atari-impala.yaml
diff --git a/rllib/tuned_examples/sac/atari-sac.yaml b/rllib/examples/_old_api_stack/algorithms/atari-sac.yaml
similarity index 100%
rename from rllib/tuned_examples/sac/atari-sac.yaml
rename to rllib/examples/_old_api_stack/algorithms/atari-sac.yaml
diff --git a/rllib/tuned_examples/appo/cartpole-appo-separate-losses.py b/rllib/examples/_old_api_stack/algorithms/cartpole-appo-separate-losses.py
similarity index 100%
rename from rllib/tuned_examples/appo/cartpole-appo-separate-losses.py
rename to rllib/examples/_old_api_stack/algorithms/cartpole-appo-separate-losses.py
diff --git a/rllib/tuned_examples/bc/cartpole-bc.yaml b/rllib/examples/_old_api_stack/algorithms/cartpole-bc.yaml
similarity index 100%
rename from rllib/tuned_examples/bc/cartpole-bc.yaml
rename to rllib/examples/_old_api_stack/algorithms/cartpole-bc.yaml
diff --git a/rllib/tuned_examples/appo/cartpole-crashing-and-stalling-recreate-workers-appo.py b/rllib/examples/_old_api_stack/algorithms/cartpole-crashing-and-stalling-recreate-workers-appo.py
similarity index 100%
rename from rllib/tuned_examples/appo/cartpole-crashing-and-stalling-recreate-workers-appo.py
rename to rllib/examples/_old_api_stack/algorithms/cartpole-crashing-and-stalling-recreate-workers-appo.py
diff --git a/rllib/tuned_examples/appo/cartpole-crashing-recreate-workers-appo.py b/rllib/examples/_old_api_stack/algorithms/cartpole-crashing-recreate-workers-appo.py
similarity index 100%
rename from rllib/tuned_examples/appo/cartpole-crashing-recreate-workers-appo.py
rename to rllib/examples/_old_api_stack/algorithms/cartpole-crashing-recreate-workers-appo.py
diff --git a/rllib/tuned_examples/dqn/cartpole-dqn-fake-gpus.yaml b/rllib/examples/_old_api_stack/algorithms/cartpole-dqn-fake-gpus.yaml
similarity index 100%
rename from rllib/tuned_examples/dqn/cartpole-dqn-fake-gpus.yaml
rename to rllib/examples/_old_api_stack/algorithms/cartpole-dqn-fake-gpus.yaml
diff --git a/rllib/tuned_examples/dqn/cartpole-dqn-param-noise.yaml b/rllib/examples/_old_api_stack/algorithms/cartpole-dqn-param-noise.yaml
similarity index 100%
rename from rllib/tuned_examples/dqn/cartpole-dqn-param-noise.yaml
rename to rllib/examples/_old_api_stack/algorithms/cartpole-dqn-param-noise.yaml
diff --git a/rllib/tuned_examples/dqn/cartpole-dqn-softq.yaml b/rllib/examples/_old_api_stack/algorithms/cartpole-dqn-softq.yaml
similarity index 100%
rename from rllib/tuned_examples/dqn/cartpole-dqn-softq.yaml
rename to rllib/examples/_old_api_stack/algorithms/cartpole-dqn-softq.yaml
diff --git a/rllib/tuned_examples/dqn/cartpole-dqn.yaml b/rllib/examples/_old_api_stack/algorithms/cartpole-dqn.yaml
similarity index 100%
rename from rllib/tuned_examples/dqn/cartpole-dqn.yaml
rename to rllib/examples/_old_api_stack/algorithms/cartpole-dqn.yaml
diff --git a/rllib/tuned_examples/marwil/cartpole-marwil.yaml b/rllib/examples/_old_api_stack/algorithms/cartpole-marwil.yaml
similarity index 100%
rename from rllib/tuned_examples/marwil/cartpole-marwil.yaml
rename to rllib/examples/_old_api_stack/algorithms/cartpole-marwil.yaml
diff --git a/rllib/tuned_examples/sac/cartpole-sac.yaml b/rllib/examples/_old_api_stack/algorithms/cartpole-sac.yaml
similarity index 100%
rename from rllib/tuned_examples/sac/cartpole-sac.yaml
rename to rllib/examples/_old_api_stack/algorithms/cartpole-sac.yaml
diff --git a/rllib/tuned_examples/appo/frozenlake-appo-vtrace.yaml b/rllib/examples/_old_api_stack/algorithms/frozenlake-appo-vtrace.yaml
similarity index 100%
rename from rllib/tuned_examples/appo/frozenlake-appo-vtrace.yaml
rename to rllib/examples/_old_api_stack/algorithms/frozenlake-appo-vtrace.yaml
diff --git a/rllib/tuned_examples/cql/halfcheetah-bc.yaml b/rllib/examples/_old_api_stack/algorithms/halfcheetah-bc.yaml
similarity index 100%
rename from rllib/tuned_examples/cql/halfcheetah-bc.yaml
rename to rllib/examples/_old_api_stack/algorithms/halfcheetah-bc.yaml
diff --git a/rllib/tuned_examples/cql/halfcheetah-cql.yaml b/rllib/examples/_old_api_stack/algorithms/halfcheetah-cql.yaml
similarity index 100%
rename from rllib/tuned_examples/cql/halfcheetah-cql.yaml
rename to rllib/examples/_old_api_stack/algorithms/halfcheetah-cql.yaml
diff --git a/rllib/tuned_examples/ppo/halfcheetah-ppo.yaml b/rllib/examples/_old_api_stack/algorithms/halfcheetah-ppo.yaml
similarity index 100%
rename from rllib/tuned_examples/ppo/halfcheetah-ppo.yaml
rename to rllib/examples/_old_api_stack/algorithms/halfcheetah-ppo.yaml
diff --git a/rllib/tuned_examples/cql/hopper-bc.yaml b/rllib/examples/_old_api_stack/algorithms/hopper-bc.yaml
similarity index 100%
rename from rllib/tuned_examples/cql/hopper-bc.yaml
rename to rllib/examples/_old_api_stack/algorithms/hopper-bc.yaml
diff --git a/rllib/tuned_examples/cql/hopper-cql.yaml b/rllib/examples/_old_api_stack/algorithms/hopper-cql.yaml
similarity index 100%
rename from rllib/tuned_examples/cql/hopper-cql.yaml
rename to rllib/examples/_old_api_stack/algorithms/hopper-cql.yaml
diff --git a/rllib/tuned_examples/ppo/hopper-ppo.yaml b/rllib/examples/_old_api_stack/algorithms/hopper-ppo.yaml
similarity index 100%
rename from rllib/tuned_examples/ppo/hopper-ppo.yaml
rename to rllib/examples/_old_api_stack/algorithms/hopper-ppo.yaml
diff --git a/rllib/tuned_examples/ppo/humanoid-ppo-gae.yaml b/rllib/examples/_old_api_stack/algorithms/humanoid-ppo-gae.yaml
similarity index 100%
rename from rllib/tuned_examples/ppo/humanoid-ppo-gae.yaml
rename to rllib/examples/_old_api_stack/algorithms/humanoid-ppo-gae.yaml
diff --git a/rllib/tuned_examples/ppo/humanoid-ppo.yaml b/rllib/examples/_old_api_stack/algorithms/humanoid-ppo.yaml
similarity index 100%
rename from rllib/tuned_examples/ppo/humanoid-ppo.yaml
rename to rllib/examples/_old_api_stack/algorithms/humanoid-ppo.yaml
diff --git a/rllib/tuned_examples/appo/memory-leak-test-appo.yaml b/rllib/examples/_old_api_stack/algorithms/memory-leak-test-appo.yaml
similarity index 100%
rename from rllib/tuned_examples/appo/memory-leak-test-appo.yaml
rename to rllib/examples/_old_api_stack/algorithms/memory-leak-test-appo.yaml
diff --git a/rllib/tuned_examples/dqn/memory-leak-test-dqn.yaml b/rllib/examples/_old_api_stack/algorithms/memory-leak-test-dqn.yaml
similarity index 100%
rename from rllib/tuned_examples/dqn/memory-leak-test-dqn.yaml
rename to rllib/examples/_old_api_stack/algorithms/memory-leak-test-dqn.yaml
diff --git a/rllib/tuned_examples/ppo/memory-leak-test-ppo.yaml b/rllib/examples/_old_api_stack/algorithms/memory-leak-test-ppo.yaml
similarity index 100%
rename from rllib/tuned_examples/ppo/memory-leak-test-ppo.yaml
rename to rllib/examples/_old_api_stack/algorithms/memory-leak-test-ppo.yaml
diff --git a/rllib/tuned_examples/sac/memory-leak-test-sac.yaml b/rllib/examples/_old_api_stack/algorithms/memory-leak-test-sac.yaml
similarity index 100%
rename from rllib/tuned_examples/sac/memory-leak-test-sac.yaml
rename to rllib/examples/_old_api_stack/algorithms/memory-leak-test-sac.yaml
diff --git a/rllib/tuned_examples/sac/mspacman-sac.yaml b/rllib/examples/_old_api_stack/algorithms/mspacman-sac.yaml
similarity index 100%
rename from rllib/tuned_examples/sac/mspacman-sac.yaml
rename to rllib/examples/_old_api_stack/algorithms/mspacman-sac.yaml
diff --git a/rllib/tuned_examples/appo/multi-agent-cartpole-crashing-and-stalling-recreate-workers-appo.py b/rllib/examples/_old_api_stack/algorithms/multi-agent-cartpole-crashing-and-stalling-recreate-workers-appo.py
similarity index 100%
rename from rllib/tuned_examples/appo/multi-agent-cartpole-crashing-and-stalling-recreate-workers-appo.py
rename to rllib/examples/_old_api_stack/algorithms/multi-agent-cartpole-crashing-and-stalling-recreate-workers-appo.py
diff --git a/rllib/tuned_examples/appo/multi-agent-cartpole-crashing-recreate-workers-appo.py b/rllib/examples/_old_api_stack/algorithms/multi-agent-cartpole-crashing-recreate-workers-appo.py
similarity index 100%
rename from rllib/tuned_examples/appo/multi-agent-cartpole-crashing-recreate-workers-appo.py
rename to rllib/examples/_old_api_stack/algorithms/multi-agent-cartpole-crashing-recreate-workers-appo.py
diff --git a/rllib/tuned_examples/appo/multi-agent-cartpole-w-100-policies-appo.py b/rllib/examples/_old_api_stack/algorithms/multi-agent-cartpole-w-100-policies-appo.py
similarity index 100%
rename from rllib/tuned_examples/appo/multi-agent-cartpole-w-100-policies-appo.py
rename to rllib/examples/_old_api_stack/algorithms/multi-agent-cartpole-w-100-policies-appo.py
diff --git a/rllib/tuned_examples/appo/multi_agent_cartpole_appo_old_api_stack.py b/rllib/examples/_old_api_stack/algorithms/multi_agent_cartpole_appo_old_api_stack.py
similarity index 100%
rename from rllib/tuned_examples/appo/multi_agent_cartpole_appo_old_api_stack.py
rename to rllib/examples/_old_api_stack/algorithms/multi_agent_cartpole_appo_old_api_stack.py
diff --git a/rllib/tuned_examples/cql/pendulum-cql.yaml b/rllib/examples/_old_api_stack/algorithms/pendulum-cql.yaml
similarity index 100%
rename from rllib/tuned_examples/cql/pendulum-cql.yaml
rename to rllib/examples/_old_api_stack/algorithms/pendulum-cql.yaml
diff --git a/rllib/tuned_examples/sac/pendulum-sac.yaml b/rllib/examples/_old_api_stack/algorithms/pendulum-sac.yaml
similarity index 100%
rename from rllib/tuned_examples/sac/pendulum-sac.yaml
rename to rllib/examples/_old_api_stack/algorithms/pendulum-sac.yaml
diff --git a/rllib/tuned_examples/ppo/pendulum-transformed-actions-ppo.yaml b/rllib/examples/_old_api_stack/algorithms/pendulum-transformed-actions-ppo.yaml
similarity index 100%
rename from rllib/tuned_examples/ppo/pendulum-transformed-actions-ppo.yaml
rename to rllib/examples/_old_api_stack/algorithms/pendulum-transformed-actions-ppo.yaml
diff --git a/rllib/tuned_examples/sac/pendulum-transformed-actions-sac.yaml b/rllib/examples/_old_api_stack/algorithms/pendulum-transformed-actions-sac.yaml
similarity index 100%
rename from rllib/tuned_examples/sac/pendulum-transformed-actions-sac.yaml
rename to rllib/examples/_old_api_stack/algorithms/pendulum-transformed-actions-sac.yaml
diff --git a/rllib/tuned_examples/dqn/pong-dqn.yaml b/rllib/examples/_old_api_stack/algorithms/pong-dqn.yaml
similarity index 100%
rename from rllib/tuned_examples/dqn/pong-dqn.yaml
rename to rllib/examples/_old_api_stack/algorithms/pong-dqn.yaml
diff --git a/rllib/tuned_examples/impala/pong-impala-fast.yaml b/rllib/examples/_old_api_stack/algorithms/pong-impala-fast.yaml
similarity index 100%
rename from rllib/tuned_examples/impala/pong-impala-fast.yaml
rename to rllib/examples/_old_api_stack/algorithms/pong-impala-fast.yaml
diff --git a/rllib/tuned_examples/impala/pong-impala-vectorized.yaml b/rllib/examples/_old_api_stack/algorithms/pong-impala-vectorized.yaml
similarity index 100%
rename from rllib/tuned_examples/impala/pong-impala-vectorized.yaml
rename to rllib/examples/_old_api_stack/algorithms/pong-impala-vectorized.yaml
diff --git a/rllib/tuned_examples/impala/pong-impala.yaml b/rllib/examples/_old_api_stack/algorithms/pong-impala.yaml
similarity index 100%
rename from rllib/tuned_examples/impala/pong-impala.yaml
rename to rllib/examples/_old_api_stack/algorithms/pong-impala.yaml
diff --git a/rllib/tuned_examples/dqn/pong-rainbow.yaml b/rllib/examples/_old_api_stack/algorithms/pong-rainbow.yaml
similarity index 100%
rename from rllib/tuned_examples/dqn/pong-rainbow.yaml
rename to rllib/examples/_old_api_stack/algorithms/pong-rainbow.yaml
diff --git a/rllib/tuned_examples/ppo/unity3d-soccer-strikers-vs-goalie-ppo.yaml b/rllib/examples/_old_api_stack/algorithms/unity3d-soccer-strikers-vs-goalie-ppo.yaml
similarity index 100%
rename from rllib/tuned_examples/ppo/unity3d-soccer-strikers-vs-goalie-ppo.yaml
rename to rllib/examples/_old_api_stack/algorithms/unity3d-soccer-strikers-vs-goalie-ppo.yaml
diff --git a/rllib/tuned_examples/ppo/walker2d-ppo.yaml b/rllib/examples/_old_api_stack/algorithms/walker2d-ppo.yaml
similarity index 100%
rename from rllib/tuned_examples/ppo/walker2d-ppo.yaml
rename to rllib/examples/_old_api_stack/algorithms/walker2d-ppo.yaml
diff --git a/rllib/tuned_examples/appo/cartpole_appo.py b/rllib/examples/algorithms/appo/cartpole_appo.py
similarity index 100%
rename from rllib/tuned_examples/appo/cartpole_appo.py
rename to rllib/examples/algorithms/appo/cartpole_appo.py
diff --git a/rllib/tuned_examples/appo/halfcheetah_appo.py b/rllib/examples/algorithms/appo/halfcheetah_appo.py
similarity index 100%
rename from rllib/tuned_examples/appo/halfcheetah_appo.py
rename to rllib/examples/algorithms/appo/halfcheetah_appo.py
diff --git a/rllib/tuned_examples/appo/multi_agent_cartpole_appo.py b/rllib/examples/algorithms/appo/multi_agent_cartpole_appo.py
similarity index 100%
rename from rllib/tuned_examples/appo/multi_agent_cartpole_appo.py
rename to rllib/examples/algorithms/appo/multi_agent_cartpole_appo.py
diff --git a/rllib/tuned_examples/appo/multi_agent_pong_appo.py b/rllib/examples/algorithms/appo/multi_agent_pong_appo.py
similarity index 100%
rename from rllib/tuned_examples/appo/multi_agent_pong_appo.py
rename to rllib/examples/algorithms/appo/multi_agent_pong_appo.py
diff --git a/rllib/tuned_examples/appo/multi_agent_stateless_cartpole_appo.py b/rllib/examples/algorithms/appo/multi_agent_stateless_cartpole_appo.py
similarity index 100%
rename from rllib/tuned_examples/appo/multi_agent_stateless_cartpole_appo.py
rename to rllib/examples/algorithms/appo/multi_agent_stateless_cartpole_appo.py
diff --git a/rllib/tuned_examples/appo/pendulum_appo.py b/rllib/examples/algorithms/appo/pendulum_appo.py
similarity index 100%
rename from rllib/tuned_examples/appo/pendulum_appo.py
rename to rllib/examples/algorithms/appo/pendulum_appo.py
diff --git a/rllib/tuned_examples/appo/pong_appo.py b/rllib/examples/algorithms/appo/pong_appo.py
similarity index 100%
rename from rllib/tuned_examples/appo/pong_appo.py
rename to rllib/examples/algorithms/appo/pong_appo.py
diff --git a/rllib/tuned_examples/appo/stateless_cartpole_appo.py b/rllib/examples/algorithms/appo/stateless_cartpole_appo.py
similarity index 100%
rename from rllib/tuned_examples/appo/stateless_cartpole_appo.py
rename to rllib/examples/algorithms/appo/stateless_cartpole_appo.py
diff --git a/rllib/tuned_examples/bc/benchmark_rlunplugged_atari_pong_bc.py b/rllib/examples/algorithms/bc/benchmark_rlunplugged_atari_pong_bc.py
similarity index 100%
rename from rllib/tuned_examples/bc/benchmark_rlunplugged_atari_pong_bc.py
rename to rllib/examples/algorithms/bc/benchmark_rlunplugged_atari_pong_bc.py
diff --git a/rllib/tuned_examples/bc/cartpole_bc.py b/rllib/examples/algorithms/bc/cartpole_bc.py
similarity index 100%
rename from rllib/tuned_examples/bc/cartpole_bc.py
rename to rllib/examples/algorithms/bc/cartpole_bc.py
diff --git a/rllib/tuned_examples/bc/cartpole_bc_with_offline_evaluation.py b/rllib/examples/algorithms/bc/cartpole_bc_with_offline_evaluation.py
similarity index 100%
rename from rllib/tuned_examples/bc/cartpole_bc_with_offline_evaluation.py
rename to rllib/examples/algorithms/bc/cartpole_bc_with_offline_evaluation.py
diff --git a/rllib/tuned_examples/bc/pendulum_bc.py b/rllib/examples/algorithms/bc/pendulum_bc.py
similarity index 100%
rename from rllib/tuned_examples/bc/pendulum_bc.py
rename to rllib/examples/algorithms/bc/pendulum_bc.py
diff --git a/rllib/tuned_examples/cql/pendulum_cql.py b/rllib/examples/algorithms/cql/pendulum_cql.py
similarity index 100%
rename from rllib/tuned_examples/cql/pendulum_cql.py
rename to rllib/examples/algorithms/cql/pendulum_cql.py
diff --git a/rllib/tuned_examples/dqn/benchmark_dqn_atari.py b/rllib/examples/algorithms/dqn/benchmark_dqn_atari.py
similarity index 100%
rename from rllib/tuned_examples/dqn/benchmark_dqn_atari.py
rename to rllib/examples/algorithms/dqn/benchmark_dqn_atari.py
diff --git a/rllib/tuned_examples/dqn/benchmark_dqn_atari_rllib_preprocessing.py b/rllib/examples/algorithms/dqn/benchmark_dqn_atari_rllib_preprocessing.py
similarity index 100%
rename from rllib/tuned_examples/dqn/benchmark_dqn_atari_rllib_preprocessing.py
rename to rllib/examples/algorithms/dqn/benchmark_dqn_atari_rllib_preprocessing.py
diff --git a/rllib/tuned_examples/dqn/cartpole_dqn.py b/rllib/examples/algorithms/dqn/cartpole_dqn.py
similarity index 100%
rename from rllib/tuned_examples/dqn/cartpole_dqn.py
rename to rllib/examples/algorithms/dqn/cartpole_dqn.py
diff --git a/rllib/tuned_examples/dqn/multi_agent_cartpole_dqn.py b/rllib/examples/algorithms/dqn/multi_agent_cartpole_dqn.py
similarity index 100%
rename from rllib/tuned_examples/dqn/multi_agent_cartpole_dqn.py
rename to rllib/examples/algorithms/dqn/multi_agent_cartpole_dqn.py
diff --git a/rllib/tuned_examples/dqn/stateless_cartpole_dqn.py b/rllib/examples/algorithms/dqn/stateless_cartpole_dqn.py
similarity index 100%
rename from rllib/tuned_examples/dqn/stateless_cartpole_dqn.py
rename to rllib/examples/algorithms/dqn/stateless_cartpole_dqn.py
diff --git a/rllib/tuned_examples/dreamerv3/atari_100k_dreamerv3.py b/rllib/examples/algorithms/dreamerv3/atari_100k_dreamerv3.py
similarity index 100%
rename from rllib/tuned_examples/dreamerv3/atari_100k_dreamerv3.py
rename to rllib/examples/algorithms/dreamerv3/atari_100k_dreamerv3.py
diff --git a/rllib/tuned_examples/dreamerv3/atari_200M_dreamerv3.py b/rllib/examples/algorithms/dreamerv3/atari_200M_dreamerv3.py
similarity index 100%
rename from rllib/tuned_examples/dreamerv3/atari_200M_dreamerv3.py
rename to rllib/examples/algorithms/dreamerv3/atari_200M_dreamerv3.py
diff --git a/rllib/tuned_examples/dreamerv3/cartpole_dreamerv3.py b/rllib/examples/algorithms/dreamerv3/cartpole_dreamerv3.py
similarity index 100%
rename from rllib/tuned_examples/dreamerv3/cartpole_dreamerv3.py
rename to rllib/examples/algorithms/dreamerv3/cartpole_dreamerv3.py
diff --git a/rllib/tuned_examples/dreamerv3/dm_control_suite_vision_dreamerv3.py b/rllib/examples/algorithms/dreamerv3/dm_control_suite_vision_dreamerv3.py
similarity index 100%
rename from rllib/tuned_examples/dreamerv3/dm_control_suite_vision_dreamerv3.py
rename to rllib/examples/algorithms/dreamerv3/dm_control_suite_vision_dreamerv3.py
diff --git a/rllib/tuned_examples/dreamerv3/flappy_bird_dreamerv3.py b/rllib/examples/algorithms/dreamerv3/flappy_bird_dreamerv3.py
similarity index 100%
rename from rllib/tuned_examples/dreamerv3/flappy_bird_dreamerv3.py
rename to rllib/examples/algorithms/dreamerv3/flappy_bird_dreamerv3.py
diff --git a/rllib/tuned_examples/dreamerv3/frozenlake_2x2_dreamerv3.py b/rllib/examples/algorithms/dreamerv3/frozenlake_2x2_dreamerv3.py
similarity index 100%
rename from rllib/tuned_examples/dreamerv3/frozenlake_2x2_dreamerv3.py
rename to rllib/examples/algorithms/dreamerv3/frozenlake_2x2_dreamerv3.py
diff --git a/rllib/tuned_examples/dreamerv3/frozenlake_4x4_deterministic_dreamerv3.py b/rllib/examples/algorithms/dreamerv3/frozenlake_4x4_deterministic_dreamerv3.py
similarity index 100%
rename from rllib/tuned_examples/dreamerv3/frozenlake_4x4_deterministic_dreamerv3.py
rename to rllib/examples/algorithms/dreamerv3/frozenlake_4x4_deterministic_dreamerv3.py
diff --git a/rllib/tuned_examples/dreamerv3/gymnasium_robotics_dreamerv3.py b/rllib/examples/algorithms/dreamerv3/gymnasium_robotics_dreamerv3.py
similarity index 100%
rename from rllib/tuned_examples/dreamerv3/gymnasium_robotics_dreamerv3.py
rename to rllib/examples/algorithms/dreamerv3/gymnasium_robotics_dreamerv3.py
diff --git a/rllib/tuned_examples/dreamerv3/highway_env_dreamerv3.py b/rllib/examples/algorithms/dreamerv3/highway_env_dreamerv3.py
similarity index 100%
rename from rllib/tuned_examples/dreamerv3/highway_env_dreamerv3.py
rename to rllib/examples/algorithms/dreamerv3/highway_env_dreamerv3.py
diff --git a/rllib/tuned_examples/dreamerv3/pendulum_dreamerv3.py b/rllib/examples/algorithms/dreamerv3/pendulum_dreamerv3.py
similarity index 100%
rename from rllib/tuned_examples/dreamerv3/pendulum_dreamerv3.py
rename to rllib/examples/algorithms/dreamerv3/pendulum_dreamerv3.py
diff --git a/rllib/tuned_examples/impala/cartpole-impala-separate-losses.py b/rllib/examples/algorithms/impala/cartpole-impala-separate-losses.py
similarity index 100%
rename from rllib/tuned_examples/impala/cartpole-impala-separate-losses.py
rename to rllib/examples/algorithms/impala/cartpole-impala-separate-losses.py
diff --git a/rllib/tuned_examples/impala/cartpole_impala.py b/rllib/examples/algorithms/impala/cartpole_impala.py
similarity index 100%
rename from rllib/tuned_examples/impala/cartpole_impala.py
rename to rllib/examples/algorithms/impala/cartpole_impala.py
diff --git a/rllib/tuned_examples/impala/heavy_cartpole_impala.py b/rllib/examples/algorithms/impala/heavy_cartpole_impala.py
similarity index 100%
rename from rllib/tuned_examples/impala/heavy_cartpole_impala.py
rename to rllib/examples/algorithms/impala/heavy_cartpole_impala.py
diff --git a/rllib/tuned_examples/impala/multi_agent_cartpole_impala.py b/rllib/examples/algorithms/impala/multi_agent_cartpole_impala.py
similarity index 100%
rename from rllib/tuned_examples/impala/multi_agent_cartpole_impala.py
rename to rllib/examples/algorithms/impala/multi_agent_cartpole_impala.py
diff --git a/rllib/tuned_examples/impala/multi_agent_cartpole_impala_old_api_stack.py b/rllib/examples/algorithms/impala/multi_agent_cartpole_impala_old_api_stack.py
similarity index 100%
rename from rllib/tuned_examples/impala/multi_agent_cartpole_impala_old_api_stack.py
rename to rllib/examples/algorithms/impala/multi_agent_cartpole_impala_old_api_stack.py
diff --git a/rllib/tuned_examples/impala/multi_agent_stateless_cartpole_impala.py b/rllib/examples/algorithms/impala/multi_agent_stateless_cartpole_impala.py
similarity index 100%
rename from rllib/tuned_examples/impala/multi_agent_stateless_cartpole_impala.py
rename to rllib/examples/algorithms/impala/multi_agent_stateless_cartpole_impala.py
diff --git a/rllib/tuned_examples/impala/pendulum_impala.py b/rllib/examples/algorithms/impala/pendulum_impala.py
similarity index 100%
rename from rllib/tuned_examples/impala/pendulum_impala.py
rename to rllib/examples/algorithms/impala/pendulum_impala.py
diff --git a/rllib/tuned_examples/impala/pong_impala.py b/rllib/examples/algorithms/impala/pong_impala.py
similarity index 100%
rename from rllib/tuned_examples/impala/pong_impala.py
rename to rllib/examples/algorithms/impala/pong_impala.py
diff --git a/rllib/tuned_examples/impala/pong_impala_pb2_hyperopt.py b/rllib/examples/algorithms/impala/pong_impala_pb2_hyperopt.py
similarity index 100%
rename from rllib/tuned_examples/impala/pong_impala_pb2_hyperopt.py
rename to rllib/examples/algorithms/impala/pong_impala_pb2_hyperopt.py
diff --git a/rllib/tuned_examples/impala/stateless_cartpole_impala.py b/rllib/examples/algorithms/impala/stateless_cartpole_impala.py
similarity index 100%
rename from rllib/tuned_examples/impala/stateless_cartpole_impala.py
rename to rllib/examples/algorithms/impala/stateless_cartpole_impala.py
diff --git a/rllib/tuned_examples/iql/pendulum_iql.py b/rllib/examples/algorithms/iql/pendulum_iql.py
similarity index 100%
rename from rllib/tuned_examples/iql/pendulum_iql.py
rename to rllib/examples/algorithms/iql/pendulum_iql.py
diff --git a/rllib/tuned_examples/marwil/cartpole_marwil.py b/rllib/examples/algorithms/marwil/cartpole_marwil.py
similarity index 100%
rename from rllib/tuned_examples/marwil/cartpole_marwil.py
rename to rllib/examples/algorithms/marwil/cartpole_marwil.py
diff --git a/rllib/tuned_examples/ppo/atari_ppo.py b/rllib/examples/algorithms/ppo/atari_ppo.py
similarity index 100%
rename from rllib/tuned_examples/ppo/atari_ppo.py
rename to rllib/examples/algorithms/ppo/atari_ppo.py
diff --git a/rllib/tuned_examples/ppo/benchmark_ppo_mujoco.py b/rllib/examples/algorithms/ppo/benchmark_ppo_mujoco.py
similarity index 100%
rename from rllib/tuned_examples/ppo/benchmark_ppo_mujoco.py
rename to rllib/examples/algorithms/ppo/benchmark_ppo_mujoco.py
diff --git a/rllib/tuned_examples/ppo/cartpole_heavy_ppo.py b/rllib/examples/algorithms/ppo/cartpole_heavy_ppo.py
similarity index 100%
rename from rllib/tuned_examples/ppo/cartpole_heavy_ppo.py
rename to rllib/examples/algorithms/ppo/cartpole_heavy_ppo.py
diff --git a/rllib/tuned_examples/ppo/cartpole_ppo.py b/rllib/examples/algorithms/ppo/cartpole_ppo.py
similarity index 100%
rename from rllib/tuned_examples/ppo/cartpole_ppo.py
rename to rllib/examples/algorithms/ppo/cartpole_ppo.py
diff --git a/rllib/tuned_examples/ppo/cartpole_truncated_ppo.py b/rllib/examples/algorithms/ppo/cartpole_truncated_ppo.py
similarity index 100%
rename from rllib/tuned_examples/ppo/cartpole_truncated_ppo.py
rename to rllib/examples/algorithms/ppo/cartpole_truncated_ppo.py
diff --git a/rllib/tuned_examples/ppo/memory_leak_test_ppo_new_stack.py b/rllib/examples/algorithms/ppo/memory_leak_test_ppo_new_stack.py
similarity index 100%
rename from rllib/tuned_examples/ppo/memory_leak_test_ppo_new_stack.py
rename to rllib/examples/algorithms/ppo/memory_leak_test_ppo_new_stack.py
diff --git a/rllib/tuned_examples/ppo/multi_agent_cartpole_ppo.py b/rllib/examples/algorithms/ppo/multi_agent_cartpole_ppo.py
similarity index 100%
rename from rllib/tuned_examples/ppo/multi_agent_cartpole_ppo.py
rename to rllib/examples/algorithms/ppo/multi_agent_cartpole_ppo.py
diff --git a/rllib/tuned_examples/ppo/multi_agent_footsies_ppo.py b/rllib/examples/algorithms/ppo/multi_agent_footsies_ppo.py
similarity index 100%
rename from rllib/tuned_examples/ppo/multi_agent_footsies_ppo.py
rename to rllib/examples/algorithms/ppo/multi_agent_footsies_ppo.py
diff --git a/rllib/tuned_examples/ppo/multi_agent_pendulum_ppo.py b/rllib/examples/algorithms/ppo/multi_agent_pendulum_ppo.py
similarity index 100%
rename from rllib/tuned_examples/ppo/multi_agent_pendulum_ppo.py
rename to rllib/examples/algorithms/ppo/multi_agent_pendulum_ppo.py
diff --git a/rllib/tuned_examples/ppo/multi_agent_stateless_cartpole_ppo.py b/rllib/examples/algorithms/ppo/multi_agent_stateless_cartpole_ppo.py
similarity index 100%
rename from rllib/tuned_examples/ppo/multi_agent_stateless_cartpole_ppo.py
rename to rllib/examples/algorithms/ppo/multi_agent_stateless_cartpole_ppo.py
diff --git a/rllib/tuned_examples/ppo/pendulum_ppo.py b/rllib/examples/algorithms/ppo/pendulum_ppo.py
similarity index 100%
rename from rllib/tuned_examples/ppo/pendulum_ppo.py
rename to rllib/examples/algorithms/ppo/pendulum_ppo.py
diff --git a/rllib/tuned_examples/ppo/stateless_cartpole_ppo.py b/rllib/examples/algorithms/ppo/stateless_cartpole_ppo.py
similarity index 100%
rename from rllib/tuned_examples/ppo/stateless_cartpole_ppo.py
rename to rllib/examples/algorithms/ppo/stateless_cartpole_ppo.py
diff --git a/rllib/tuned_examples/sac/benchmark_sac_mujoco.py b/rllib/examples/algorithms/sac/benchmark_sac_mujoco.py
similarity index 100%
rename from rllib/tuned_examples/sac/benchmark_sac_mujoco.py
rename to rllib/examples/algorithms/sac/benchmark_sac_mujoco.py
diff --git a/rllib/tuned_examples/sac/halfcheetah_sac.py b/rllib/examples/algorithms/sac/halfcheetah_sac.py
similarity index 100%
rename from rllib/tuned_examples/sac/halfcheetah_sac.py
rename to rllib/examples/algorithms/sac/halfcheetah_sac.py
diff --git a/rllib/tuned_examples/sac/humanoid_sac.py b/rllib/examples/algorithms/sac/humanoid_sac.py
similarity index 100%
rename from rllib/tuned_examples/sac/humanoid_sac.py
rename to rllib/examples/algorithms/sac/humanoid_sac.py
diff --git a/rllib/tuned_examples/sac/mountaincar_sac.py b/rllib/examples/algorithms/sac/mountaincar_sac.py
similarity index 100%
rename from rllib/tuned_examples/sac/mountaincar_sac.py
rename to rllib/examples/algorithms/sac/mountaincar_sac.py
diff --git a/rllib/tuned_examples/sac/multi_agent_pendulum_sac.py b/rllib/examples/algorithms/sac/multi_agent_pendulum_sac.py
similarity index 100%
rename from rllib/tuned_examples/sac/multi_agent_pendulum_sac.py
rename to rllib/examples/algorithms/sac/multi_agent_pendulum_sac.py
diff --git a/rllib/tuned_examples/sac/pendulum_sac.py b/rllib/examples/algorithms/sac/pendulum_sac.py
similarity index 100%
rename from rllib/tuned_examples/sac/pendulum_sac.py
rename to rllib/examples/algorithms/sac/pendulum_sac.py
diff --git a/rllib/tuned_examples/__init__.py b/rllib/tuned_examples/__init__.py
deleted file mode 100644
index e69de29bb2d1..000000000000
diff --git a/rllib/tuned_examples/cleanup_experiment.py b/rllib/tuned_examples/cleanup_experiment.py
deleted file mode 100644
index 749d3ed5e522..000000000000
--- a/rllib/tuned_examples/cleanup_experiment.py
+++ /dev/null
@@ -1,187 +0,0 @@
-"""
-This script automates cleaning up a benchmark/experiment run of some algo
-against some config (with possibly more than one tune trial,
-e.g. torch=grid_search([True, False])).
-
-Run `python cleanup_experiment.py --help` for more information.
-
-Use on an input directory with trial contents e.g.:
-..
-IMPALA_BreakoutNoFrameskip-v4_0_use_pytorch=False_2020-05-11_10-17-54topr3h9k
-IMPALA_BreakoutNoFrameskip-v4_0_use_pytorch=False_2020-05-11_13-59-35dqaetxnf
-IMPALA_BreakoutNoFrameskip-v4_0_use_pytorch=False_2020-05-11_17-21-28tbhedw72
-IMPALA_BreakoutNoFrameskip-v4_2_use_pytorch=True_2020-05-11_10-17-54lv20cgn_
-IMPALA_BreakoutNoFrameskip-v4_2_use_pytorch=True_2020-05-11_13-59-35kwzhax_y
-IMPALA_BreakoutNoFrameskip-v4_2_use_pytorch=True_2020-05-11_17-21-28a5j0s7za
-
-Then run:
->> python cleanup_experiment.py --experiment-dir [parent dir w/ trial sub-dirs]
->>   --output-dir [your out dir] --results-filter dumb_col_2,superfluous_col3
->>   --results-max-size [max results file size in kb before(!) zipping]
-
-The script will create one output sub-dir for each trial and only copy
-the configuration and the csv results (filtered and every nth row removed
-based on the given args).
-"""
-
-import argparse
-import json
-import os
-import re
-import shutil
-
-import yaml
-
-parser = argparse.ArgumentParser()
-parser.add_argument(
-    "--experiment-dir",
-    type=str,
-    help="Experiment dir in which all sub-runs (seeds) are "
-    "located (as sub-dirs). Each sub0-run dir must contain the files: "
-    "params.json and progress.csv.",
-)
-parser.add_argument(
-    "--output-dir",
-    type=str,
-    help="The output dir, in which the cleaned up output will be placed.",
-)
-parser.add_argument(
-    "--results-filter",
-    type=str,
-    help="comma-separated list of csv fields to exclude.",
-    default="experiment_id,pid,hostname,node_ip,trial_id,hist_stats/episode_"
-    "reward,hist_stats/episode_lengths,experiment_tag",
-)
-parser.add_argument(
-    "--results-max-size",
-    type=int,
-    help="the max. size of the final results.csv file (in kb). Will erase "
-    "every nth line in the original input to reach that goal. "
-    "Use 0 for no limit (default=100).",
-    default=100,
-)
-
-
-def process_single_run(in_dir, out_dir):
-    exp_dir = os.listdir(in_dir)
-
-    # Make sure trials dir is ok.
-    assert (
-        "params.json" in exp_dir and "progress.csv" in exp_dir
-    ), "params.json or progress.csv not found in {}!".format(in_dir)
-
-    os.makedirs(out_dir, exist_ok=True)
-
-    for file in exp_dir:
-        absfile = os.path.join(in_dir, file)
-        # Config file -> Convert to yaml and move to output dir.
-        if file == "params.json":
-            assert os.path.isfile(absfile), "{} not a file!".format(file)
-            with open(absfile) as fp:
-                contents = json.load(fp)
-            with open(os.path.join(out_dir, "config.yaml"), "w") as fp:
-                yaml.dump(contents, fp)
-        # Progress csv file -> Filter out some columns, cut, and write to
-        # output_dir.
-        elif file == "progress.csv":
-            assert os.path.isfile(absfile), "{} not a file!".format(file)
-            col_idx_to_filter = []
-            with open(absfile) as fp:
-                # Get column names.
-                col_names_orig = fp.readline().strip().split(",")
-                # Split by comma (abiding to quotes), filter out
-                # unwanted columns, then write to disk.
-                cols_to_filter = args.results_filter.split(",")
-                for i, c in enumerate(col_names_orig):
-                    if c in cols_to_filter:
-                        col_idx_to_filter.insert(0, i)
-                col_names = col_names_orig.copy()
-                for idx in col_idx_to_filter:
-                    col_names.pop(idx)
-                absfile_out = os.path.join(out_dir, "progress.csv")
-                with open(absfile_out, "w") as out_fp:
-                    print(",".join(col_names), file=out_fp)
-                    while True:
-                        line = fp.readline().strip()
-                        if not line:
-                            break
-                        line = re.sub(
-                            "(,{2,})",
-                            lambda m: ",None" * (len(m.group()) - 1) + ",",
-                            line,
-                        )
-                        cols = re.findall('".+?"|[^,]+', line)
-                        if len(cols) != len(col_names_orig):
-                            continue
-                        for idx in col_idx_to_filter:
-                            cols.pop(idx)
-                        print(",".join(cols), file=out_fp)
-
-            # Reduce the size of the output file if necessary.
-            out_size = os.path.getsize(absfile_out)
-            max_size = args.results_max_size * 1024
-            if 0 < max_size < out_size:
-                # Figure out roughly every which line we have to drop.
-                ratio = out_size / max_size
-                # If ratio > 2.0, we'll have to keep only every nth line.
-                if ratio > 2.0:
-                    nth = out_size // max_size
-                    os.system(
-                        "awk 'NR==1||NR%{}==0' {} > {}.new".format(
-                            nth, absfile_out, absfile_out
-                        )
-                    )
-                # If ratio < 2.0 (>1.0), we'll have to drop every nth line.
-                else:
-                    nth = out_size // (out_size - max_size)
-                    os.system(
-                        "awk 'NR==1||NR%{}!=0' {} > {}.new".format(
-                            nth, absfile_out, absfile_out
-                        )
-                    )
-                os.remove(absfile_out)
-                os.rename(absfile_out + ".new", absfile_out)
-
-            # Zip progress.csv into results.zip.
-            zip_file = os.path.join(out_dir, "results.zip")
-            try:
-                os.remove(zip_file)
-            except FileNotFoundError:
-                pass
-            os.system(
-                "zip -j {} {}".format(zip_file, os.path.join(out_dir, "progress.csv"))
-            )
-            os.remove(os.path.join(out_dir, "progress.csv"))
-
-        # TBX events file -> Move as is.
-        elif re.search("^(events\\.out\\.|params\\.pkl)", file):
-            assert os.path.isfile(absfile), "{} not a file!".format(file)
-            shutil.copyfile(absfile, os.path.join(out_dir, file))
-
-
-if __name__ == "__main__":
-    args = parser.parse_args()
-    exp_dir = os.listdir(args.experiment_dir)
-    # Loop through all sub-directories.
-    for i, sub_run in enumerate(sorted(exp_dir)):
-        abspath = os.path.join(args.experiment_dir, sub_run)
-        # This is a seed run.
-        if os.path.isdir(abspath) and re.search(
-            "^(\\w+?)_(\\w+?-v\\d+)(_\\d+)", sub_run
-        ):
-            # Create meaningful output dir name:
-            # [algo]_[env]_[trial #]_[trial-config]_[date YYYY-MM-DD].
-            cleaned_up_out = re.sub(
-                "^(\\w+?)_(\\w+?-v\\d+)(_\\d+)(_.+)?(_\\d{4}-\\d{2}-\\d{2})"
-                "_\\d{2}-\\d{2}-\\w+",
-                "{:02}_\\1_\\2\\4\\5".format(i),
-                sub_run,
-            )
-            # Remove superflous `env=` specifier (anv always included in name).
-            cleaned_up_out = re.sub(
-                "^(.+)env=\\w+?-v\\d+,?(.+)", "\\1\\2", cleaned_up_out
-            )
-            out_path = os.path.join(args.output_dir, cleaned_up_out)
-            process_single_run(abspath, out_path)
-    # Done.
-    print("done")
diff --git a/rllib/tuned_examples/compact-regression-test.yaml b/rllib/tuned_examples/compact-regression-test.yaml
deleted file mode 100644
index 80003257ccb7..000000000000
--- a/rllib/tuned_examples/compact-regression-test.yaml
+++ /dev/null
@@ -1,157 +0,0 @@
-# This file runs on a single g3.16xl or p3.16xl node. It is suggested
-# to run these in a DLAMI / tensorflow_p36 env. Note that RL runs are
-# inherently high variance, so you'll have to check to see if the
-# rewards reached seem reasonably in line with previous results.
-#
-# You can find the reference results here:
-# https://github.com/ray-project/ray/tree/master/release/release_logs
-atari-impala:
-    env: ale_py:ALE/Breakout-v5
-    run: IMPALA
-    num_samples: 4
-    stop:
-        time_total_s: 3600
-    config:
-        env_config:
-            frameskip: 1  # no frameskip
-        rollout_fragment_length: 50
-        train_batch_size: 500
-        num_env_runners: 10
-        num_envs_per_env_runner: 5
-        clip_rewards: True
-        lr_schedule: [
-            [0, 0.0005],
-            [20000000, 0.000000000001],
-        ]
-        num_gpus: 1
-atari-ppo-tf:
-    env: ale_py:ALE/Breakout-v5
-    run: PPO
-    num_samples: 4
-    stop:
-        time_total_s: 3600
-    config:
-        env_config:
-            frameskip: 1  # no frameskip
-        lambda: 0.95
-        kl_coeff: 0.5
-        clip_rewards: True
-        clip_param: 0.1
-        vf_clip_param: 10.0
-        entropy_coeff: 0.01
-        train_batch_size: 5000
-        rollout_fragment_length: 100
-        minibatch_size: 500
-        num_epochs: 10
-        num_env_runners: 10
-        num_envs_per_env_runner: 5
-        batch_mode: truncate_episodes
-        observation_filter: NoFilter
-        model:
-            vf_share_layers: true
-        num_gpus: 1
-atari-ppo-torch:
-    env: ale_py:ALE/Breakout-v5
-    run: PPO
-    num_samples: 4
-    stop:
-        time_total_s: 3600
-    config:
-        framework: torch
-        env_config:
-            frameskip: 1  # no frameskip
-        lambda: 0.95
-        kl_coeff: 0.5
-        clip_rewards: True
-        clip_param: 0.1
-        vf_clip_param: 10.0
-        entropy_coeff: 0.01
-        train_batch_size: 5000
-        rollout_fragment_length: 100
-        minibatch_size: 500
-        num_epochs: 10
-        num_env_runners: 10
-        num_envs_per_env_runner: 5
-        batch_mode: truncate_episodes
-        observation_filter: NoFilter
-        model:
-            vf_share_layers: true
-        num_gpus: 1
-apex:
-    env: ale_py:ALE/Breakout-v5
-    run: APEX
-    num_samples: 4
-    stop:
-        time_total_s: 3600
-    config:
-        env_config:
-            frameskip: 1  # no frameskip
-        double_q: false
-        dueling: false
-        num_atoms: 1
-        noisy: false
-        n_step: 3
-        lr: .0001
-        adam_epsilon: .00015
-        hiddens: [512]
-        exploration_config:
-          epsilon_timesteps: 200000
-          final_epsilon: 0.01
-        replay_buffer_config:
-          type: MultiAgentPrioritizedReplayBuffer
-          prioritized_replay_alpha: 0.5
-          capacity: 1000000
-        num_gpus: 1
-        num_env_runners: 8
-        num_envs_per_env_runner: 8
-        rollout_fragment_length: 20
-        train_batch_size: 512
-        target_network_update_freq: 50000
-        min_sample_timesteps_per_iteration: 25000
-atari-a2c:
-    env: ale_py:ALE/Breakout-v5
-    run: A2C
-    num_samples: 4
-    stop:
-        time_total_s: 3600
-    config:
-        env_config:
-            frameskip: 1  # no frameskip
-        rollout_fragment_length: 20
-        clip_rewards: True
-        num_env_runners: 5
-        num_envs_per_env_runner: 5
-        num_gpus: 1
-        lr_schedule: [
-            [0, 0.0007],
-            [20000000, 0.000000000001],
-        ]
-atari-basic-dqn:
-    env: ale_py:ALE/Breakout-v5
-    run: DQN
-    num_samples: 4
-    stop:
-        time_total_s: 3600
-    config:
-        env_config:
-            frameskip: 1  # no frameskip
-        double_q: false
-        dueling: false
-        num_atoms: 1
-        noisy: false
-        replay_buffer_config:
-          type: MultiAgentReplayBuffer
-          capacity: 1000000
-        num_steps_sampled_before_learning_starts: 20000
-        n_step: 1
-        target_network_update_freq: 8000
-        lr: .0000625
-        adam_epsilon: .00015
-        hiddens: [512]
-        rollout_fragment_length: 4
-        train_batch_size: 32
-        exploration_config:
-          epsilon_timesteps: 200000
-          final_epsilon: 0.01
-        num_gpus: 0.2
-        min_sample_timesteps_per_iteration: 10000
diff --git a/rllib/tuned_examples/dreamerv3/__init__.py b/rllib/tuned_examples/dreamerv3/__init__.py
deleted file mode 100644
index e69de29bb2d1..000000000000
diff --git a/rllib/tuned_examples/ppo/benchmark_ppo_mujoco_pb2.py b/rllib/tuned_examples/ppo/benchmark_ppo_mujoco_pb2.py
deleted file mode 100644
index 51e9d2d2b3ef..000000000000
--- a/rllib/tuned_examples/ppo/benchmark_ppo_mujoco_pb2.py
+++ /dev/null
@@ -1,172 +0,0 @@
-import time
-
-from ray import tune
-from ray.rllib.algorithms.ppo.ppo import PPOConfig
-from ray.rllib.utils.metrics import NUM_ENV_STEPS_SAMPLED_LIFETIME
-from ray.tune.schedulers.pb2 import PB2
-
-# Needs the following packages to be installed on Ubuntu:
-#   sudo apt-get libosmesa-dev
-#   sudo apt-get install patchelf
-#   python -m pip install "gymnasium[mujoco]"
-# Might need to be added to bashsrc:
-#   export MUJOCO_GL=osmesa"
-#   export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$HOME/.mujoco/mujoco200/bin"
-
-# See the following links for becnhmark results of other libraries:
-#   Original paper: https://arxiv.org/abs/1812.05905
-#   CleanRL: https://wandb.ai/cleanrl/cleanrl.benchmark/reports/Mujoco--VmlldzoxODE0NjE
-#   AgileRL: https://github.com/AgileRL/AgileRL?tab=readme-ov-file#benchmarks
-benchmark_envs = {
-    "HalfCheetah-v4": {
-        f"{NUM_ENV_STEPS_SAMPLED_LIFETIME}": 1000000,
-    },
-    "Hopper-v4": {
-        f"{NUM_ENV_STEPS_SAMPLED_LIFETIME}": 1000000,
-    },
-    "InvertedPendulum-v4": {
-        f"{NUM_ENV_STEPS_SAMPLED_LIFETIME}": 1000000,
-    },
-    "InvertedDoublePendulum-v4": {
-        f"{NUM_ENV_STEPS_SAMPLED_LIFETIME}": 1000000,
-    },
-    "Reacher-v4": {f"{NUM_ENV_STEPS_SAMPLED_LIFETIME}": 1000000},
-    "Swimmer-v4": {f"{NUM_ENV_STEPS_SAMPLED_LIFETIME}": 1000000},
-    "Walker2d-v4": {
-        f"{NUM_ENV_STEPS_SAMPLED_LIFETIME}": 1000000,
-    },
-}
-
-pb2_scheduler = PB2(
-    time_attr=f"{NUM_ENV_STEPS_SAMPLED_LIFETIME}",
-    metric="env_runners/episode_return_mean",
-    mode="max",
-    perturbation_interval=50000,
-    # Copy bottom % with top % weights.
-    quantile_fraction=0.25,
-    hyperparam_bounds={
-        "lr": [1e-5, 1e-3],
-        "gamma": [0.95, 0.99],
-        "lambda": [0.97, 1.0],
-        "entropy_coeff": [0.0, 0.01],
-        "vf_loss_coeff": [0.01, 1.0],
-        "clip_param": [0.1, 0.3],
-        "kl_target": [0.01, 0.03],
-        "minibatch_size": [512, 4096],
-        "num_epochs": [6, 32],
-        "vf_share_layers": [False, True],
-        "use_kl_loss": [False, True],
-        "kl_coeff": [0.1, 0.4],
-        "vf_clip_param": [10.0, float("inf")],
-        "grad_clip": [40, 200],
-    },
-)
-
-experiment_start_time = time.time()
-# Following the paper.
-num_rollout_workers = 32
-for env, stop_criteria in benchmark_envs.items():
-    hp_trial_start_time = time.time()
-    config = (
-        PPOConfig()
-        .environment(env=env)
-        .env_runners(
-            rollout_fragment_length=1,
-            num_env_runners=num_rollout_workers,
-            # TODO (sven, simon): Add resources.
-        )
-        .learners(
-            # Let's start with a small number of learner workers and
-            # add later a tune grid search for these resources.
-            # TODO (simon): Either add tune grid search here or make
-            # an extra script to only test scalability.
-            num_learners=1,
-            num_gpus_per_learner=1,
-        )
-        # TODO (simon): Adjust to new model_config_dict.
-        .training(
-            lr=tune.uniform(1e-5, 1e-3),
-            gamma=tune.uniform(0.95, 0.99),
-            lambda_=tune.uniform(0.97, 1.0),
-            entropy_coeff=tune.choice([0.0, 0.01]),
-            vf_loss_coeff=tune.uniform(0.01, 1.0),
-            clip_param=tune.uniform(0.1, 0.3),
-            kl_target=tune.uniform(0.01, 0.03),
-            minibatch_size=tune.choice([512, 1024, 2048, 4096]),
-            num_epochs=tune.randint(6, 32),
-            vf_share_layers=tune.choice([True, False]),
-            use_kl_loss=tune.choice([True, False]),
-            kl_coeff=tune.uniform(0.1, 0.4),
-            vf_clip_param=tune.choice([10.0, 40.0, float("inf")]),
-            grad_clip=tune.choice([None, 40, 100, 200]),
-            train_batch_size=tune.sample_from(
-                lambda spec: spec.config["minibatch_size"] * num_rollout_workers
-            ),
-            model={
-                "fcnet_hiddens": [64, 64],
-                "fcnet_activation": "tanh",
-                "vf_share_layers": True,
-            },
-        )
-        .reporting(
-            metrics_num_episodes_for_smoothing=5,
-            min_sample_timesteps_per_iteration=1000,
-        )
-        .evaluation(
-            evaluation_duration="auto",
-            evaluation_interval=1,
-            evaluation_num_env_runners=1,
-            evaluation_parallel_to_training=True,
-            evaluation_config={
-                # PPO learns stochastic policy.
-                "explore": False,
-            },
-        )
-    )
-
-    tuner = tune.Tuner(
-        "PPO",
-        param_space=config,
-        run_config=tune.RunConfig(
-            stop=stop_criteria,
-            name="benchmark_ppo_mujoco_pb2_" + env,
-        ),
-        tune_config=tune.TuneConfig(
-            scheduler=pb2_scheduler,
-            num_samples=8,
-        ),
-    )
-    result_grid = tuner.fit()
-    best_result = result_grid.get_best_result()
-    print(
-        f"Finished running HP search for (env={env}) in "
-        f"{time.time() - hp_trial_start_time} seconds."
-    )
-    print(f"Best result for {env}: {best_result}")
-    print(f"Best config for {env}: {best_result['config']}")
-
-    # Run again with the best config.
-    best_trial_start_time = time.time()
-    tuner = tune.Tuner(
-        "PPO",
-        param_space=best_result.config,
-        run_config=tune.RunConfig(
-            stop=stop_criteria,
-            name="benchmark_ppo_mujoco_pb2_" + env + "_best",
-        ),
-    )
-    print(f"Running best config for (env={env})...")
-    tuner.fit()
-    print(
-        f"Finished running best config for (env={env}) "
-        f"in {time.time() - best_trial_start_time} seconds."
-    )
-
-print(
-    f"Finished running HP search on all MuJoCo benchmarks in "
-    f"{time.time() - experiment_start_time} seconds."
-)
-print(
-    "Results from running the best configs can be found in the "
-    "`benchmark_ppo_mujoco_pb2_<ENV-NAME>_best` directories."
-)
diff --git a/rllib/tuned_examples/sac/benchmark_sac_mujoco_pb2.py b/rllib/tuned_examples/sac/benchmark_sac_mujoco_pb2.py
deleted file mode 100644
index f768dddf03b0..000000000000
--- a/rllib/tuned_examples/sac/benchmark_sac_mujoco_pb2.py
+++ /dev/null
@@ -1,165 +0,0 @@
-import time
-
-from ray import tune
-from ray.rllib.algorithms.sac.sac import SACConfig
-from ray.rllib.utils.metrics import (
-    ENV_RUNNER_RESULTS,
-    EPISODE_RETURN_MEAN,
-    NUM_ENV_STEPS_SAMPLED_LIFETIME,
-)
-from ray.tune.schedulers.pb2 import PB2
-
-# Needs the following packages to be installed on Ubuntu:
-#   sudo apt-get libosmesa-dev
-#   sudo apt-get install patchelf
-#   python -m pip install "gymnasium[mujoco]"
-# Might need to be added to bashsrc:
-#   export MUJOCO_GL=osmesa"
-#   export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$HOME/.mujoco/mujoco200/bin"
-
-# See the following links for becnhmark results of other libraries:
-#   Original paper: https://arxiv.org/abs/1812.05905
-#   CleanRL: https://wandb.ai/cleanrl/cleanrl.benchmark/reports/Mujoco--VmlldzoxODE0NjE
-#   AgileRL: https://github.com/AgileRL/AgileRL?tab=readme-ov-file#benchmarks
-benchmark_envs = {
-    "HalfCheetah-v4": {
-        f"{NUM_ENV_STEPS_SAMPLED_LIFETIME}": 3000000,
-    },
-    "Hopper-v4": {
-        f"{NUM_ENV_STEPS_SAMPLED_LIFETIME}": 1000000,
-    },
-    "Humanoid-v4": {
-        f"{NUM_ENV_STEPS_SAMPLED_LIFETIME}": 10000000,
-    },
-    "Ant-v4": {f"{NUM_ENV_STEPS_SAMPLED_LIFETIME}": 3000000},
-    "Walker2d-v4": {
-        f"{NUM_ENV_STEPS_SAMPLED_LIFETIME}": 3000000,
-    },
-}
-
-pb2_scheduler = PB2(
-    time_attr=NUM_ENV_STEPS_SAMPLED_LIFETIME,
-    metric=f"{ENV_RUNNER_RESULTS}/{EPISODE_RETURN_MEAN}",
-    mode="max",
-    perturbation_interval=50000,
-    # Copy bottom % with top % weights.
-    quantile_fraction=0.25,
-    hyperparam_bounds={
-        "actor_lr": [1e-5, 1e-3],
-        "critic_lr": [1e-6, 1e-4],
-        "alpha_lr": [1e-6, 1e-3],
-        "gamma": [0.95, 0.99],
-        "n_step": [1, 3],
-        "initial_alpha": [1.0, 1.5],
-        "tau": [0.001, 0.1],
-        "target_entropy": [-10, -1],
-        "train_batch_size": [128, 512],
-        "target_network_update_freq": [1, 4],
-    },
-)
-
-experiment_start_time = time.time()
-for env, stop_criteria in benchmark_envs.items():
-    hp_trial_start_time = time.time()
-    config = (
-        SACConfig()
-        .environment(env=env)
-        .env_runners(
-            rollout_fragment_length="auto",
-            num_env_runners=1,
-            # TODO (sven, simon): Add resources.
-        )
-        .learners(
-            # Note, we have a small batch and a sample/train ratio
-            # of 1:1, so a single GPU should be enough.
-            num_learners=1,
-            num_gpus_per_learner=1,
-        )
-        # TODO (simon): Adjust to new model_config_dict.
-        .training(
-            initial_alpha=tune.choice([1.0, 1.5]),
-            actor_lr=tune.uniform(1e-5, 1e-3),
-            critic_lr=tune.uniform([1e-6, 1e-4]),
-            alpha_lr=tune.uniform([1e-6, 1e-3]),
-            target_entropy=tune.choice([-10, -5, -1, "auto"]),
-            n_step=tune.choice([1, 3, (1, 3)]),
-            tau=tune.uniform(0.001, 0.1),
-            train_batch_size=tune.choice([128, 256, 512]),
-            target_network_update_freq=tune.choice([1, 2, 4]),
-            replay_buffer_config={
-                "type": "PrioritizedEpisodeReplayBuffer",
-                "capacity": 1000000,
-                "alpha": 0.6,
-                "beta": 0.4,
-            },
-            num_steps_sampled_before_learning_starts=256,
-            model={
-                "fcnet_hiddens": [256, 256],
-                "fcnet_activation": "relu",
-                "post_fcnet_hiddens": [],
-                "post_fcnet_activation": None,
-                "post_fcnet_weights_initializer": "orthogonal_",
-                "post_fcnet_weights_initializer_config": {"gain": 0.01},
-            },
-        )
-        .reporting(
-            metrics_num_episodes_for_smoothing=5,
-            min_sample_timesteps_per_iteration=1000,
-        )
-        .evaluation(
-            evaluation_duration="auto",
-            evaluation_interval=1,
-            evaluation_num_env_runners=1,
-            evaluation_parallel_to_training=True,
-            evaluation_config={
-                "explore": False,
-            },
-        )
-    )
-
-    tuner = tune.Tuner(
-        "SAC",
-        param_space=config,
-        run_config=tune.RunConfig(
-            stop=stop_criteria,
-            name="benchmark_sac_mujoco_pb2_" + env,
-        ),
-        tune_config=tune.TuneConfig(
-            scheduler=pb2_scheduler,
-            num_samples=8,
-        ),
-    )
-    result_grid = tuner.fit()
-    best_result = result_grid.get_best_result()
-    print(
-        f"Finished running HP search for (env={env}) in "
-        f"{time.time() - hp_trial_start_time} seconds."
-    )
-    print(f"Best result for {env}: {best_result}")
-    print(f"Best config for {env}: {best_result['config']}")
-
-    # Run again with the best config.
-    best_trial_start_time = time.time()
-    tuner = tune.Tuner(
-        "SAC",
-        param_space=best_result.config,
-        run_config=tune.RunConfig(
-            stop=stop_criteria,
-            name="benchmark_sac_mujoco_pb2_" + env + "_best",
-        ),
-    )
-    print(f"Running best config for (env={env})...")
-    tuner.fit()
-    print(
-        f"Finished running best config for (env={env}) "
-        f"in {time.time() - best_trial_start_time} seconds."
-    )
-
-print(
-    f"Finished running HP search on all MuJoCo benchmarks in "
-    f"{time.time() - experiment_start_time} seconds."
-)
-print(
-    "Results from running the best configs can be found in the "
-    "`benchmark_sac_mujoco_pb2_<ENV-NAME>_best` directories."
-)

From b89a7af70cf837b8aa63ecf036962daed7d96f37 Mon Sep 17 00:00:00 2001
From: Mark Towers <mark@anyscale.com>
Date: Fri, 21 Nov 2025 17:13:40 +0000
Subject: [PATCH 2/9] Update BUILD.bazel for tuned-examples new location

Signed-off-by: Mark Towers <mark@anyscale.com>
---
 rllib/BUILD.bazel | 368 +++++++++++++++++++++++-----------------------
 1 file changed, 183 insertions(+), 185 deletions(-)

diff --git a/rllib/BUILD.bazel b/rllib/BUILD.bazel
index 15da5de0d454..df13bc2836e0 100644
--- a/rllib/BUILD.bazel
+++ b/rllib/BUILD.bazel
@@ -72,8 +72,6 @@ doctest(
             "**/examples/**",
             "**/tests/**",
             "**/test_*.py",
-            # Exclude `tuned_examples` *.py files.
-            "**/tuned_examples/**",
             # Deprecated modules
             "utils/window_stat.py",
             "utils/timer.py",
@@ -161,7 +159,7 @@ py_test(
 # Tag: learning_tests
 #
 # This will test python/yaml config files
-# inside rllib/tuned_examples/[algo-name] for actual learning success.
+# inside rllib/examples/algorithm/[algo-name] for actual learning success.
 # --------------------------------------------------------------------
 
 # APPO
@@ -169,13 +167,13 @@ py_test(
 py_test(
     name = "learning_tests_cartpole_appo",
     size = "large",
-    srcs = ["tuned_examples/appo/cartpole_appo.py"],
+    srcs = ["examples/algorithms/appo/cartpole_appo.py"],
     args = [
         "--as-test",
         "--num-cpus=7",
         "--num-env-runners=5",
     ],
-    main = "tuned_examples/appo/cartpole_appo.py",
+    main = "examples/algorithms/appo/cartpole_appo.py",
     tags = [
         "exclusive",
         "learning_tests",
@@ -188,23 +186,23 @@ py_test(
 # TODO (sven): For some weird reason, this test runs extremely slow on the CI (not on cluster, not locally) -> taking this out for now ...
 # py_test(
 #    name = "learning_tests_cartpole_appo_gpu",
-#    main = "tuned_examples/appo/cartpole_appo.py",
+#    main = "examples/algorithms/appo/cartpole_appo.py",
 #    tags = ["team:rllib", "exclusive", "learning_tests", "torch_only", "learning_tests_discrete", "learning_tests_pytorch_use_all_core", "gpu"],
 #    size = "large",
-#    srcs = ["tuned_examples/appo/cartpole_appo.py"],
+#    srcs = ["examples/algorithms/appo/cartpole_appo.py"],
 #    args = ["--as-test", "--num-gpus-per-learner=1", "--num-cpus=7", "--num-env-runners=5"]
 # )
 py_test(
     name = "learning_tests_cartpole_appo_multi_cpu",
     size = "large",
-    srcs = ["tuned_examples/appo/cartpole_appo.py"],
+    srcs = ["examples/algorithms/appo/cartpole_appo.py"],
     args = [
         "--as-test",
         "--num-learners=2",
         "--num-cpus=9",
         "--num-env-runners=6",
     ],
-    main = "tuned_examples/appo/cartpole_appo.py",
+    main = "examples/algorithms/appo/cartpole_appo.py",
     tags = [
         "exclusive",
         "learning_tests",
@@ -218,7 +216,7 @@ py_test(
 py_test(
     name = "learning_tests_cartpole_appo_multi_gpu",
     size = "large",
-    srcs = ["tuned_examples/appo/cartpole_appo.py"],
+    srcs = ["examples/algorithms/appo/cartpole_appo.py"],
     args = [
         "--as-test",
         "--num-learners=2",
@@ -226,7 +224,7 @@ py_test(
         "--num-cpus=7",
         "--num-env-runners=6",
     ],
-    main = "tuned_examples/appo/cartpole_appo.py",
+    main = "examples/algorithms/appo/cartpole_appo.py",
     tags = [
         "exclusive",
         "learning_tests",
@@ -242,14 +240,14 @@ py_test(
 py_test(
     name = "learning_tests_multi_agent_cartpole_appo",
     size = "large",
-    srcs = ["tuned_examples/appo/multi_agent_cartpole_appo.py"],
+    srcs = ["examples/algorithms/appo/multi_agent_cartpole_appo.py"],
     args = [
         "--as-test",
         "--num-agents=2",
         "--num-cpus=8",
         "--num-env-runners=6",
     ],
-    main = "tuned_examples/appo/multi_agent_cartpole_appo.py",
+    main = "examples/algorithms/appo/multi_agent_cartpole_appo.py",
     tags = [
         "exclusive",
         "learning_tests",
@@ -262,7 +260,7 @@ py_test(
 py_test(
     name = "learning_tests_multi_agent_cartpole_appo_gpu",
     size = "large",
-    srcs = ["tuned_examples/appo/multi_agent_cartpole_appo.py"],
+    srcs = ["examples/algorithms/appo/multi_agent_cartpole_appo.py"],
     args = [
         "--as-test",
         "--num-agents=2",
@@ -270,7 +268,7 @@ py_test(
         "--num-cpus=7",
         "--num-env-runners=5",
     ],
-    main = "tuned_examples/appo/multi_agent_cartpole_appo.py",
+    main = "examples/algorithms/appo/multi_agent_cartpole_appo.py",
     tags = [
         "exclusive",
         "gpu",
@@ -285,7 +283,7 @@ py_test(
 py_test(
     name = "learning_tests_multi_agent_cartpole_appo_multi_cpu",
     size = "large",
-    srcs = ["tuned_examples/appo/multi_agent_cartpole_appo.py"],
+    srcs = ["examples/algorithms/appo/multi_agent_cartpole_appo.py"],
     args = [
         "--as-test",
         "--num-agents=2",
@@ -293,7 +291,7 @@ py_test(
         "--num-cpus=9",
         "--num-env-runners=6",
     ],
-    main = "tuned_examples/appo/multi_agent_cartpole_appo.py",
+    main = "examples/algorithms/appo/multi_agent_cartpole_appo.py",
     tags = [
         "exclusive",
         "learning_tests",
@@ -309,7 +307,7 @@ py_test(
 py_test(
     name = "learning_tests_multi_agent_cartpole_appo_multi_gpu",
     size = "large",
-    srcs = ["tuned_examples/appo/multi_agent_cartpole_appo.py"],
+    srcs = ["examples/algorithms/appo/multi_agent_cartpole_appo.py"],
     args = [
         "--as-test",
         "--num-agents=2",
@@ -318,7 +316,7 @@ py_test(
         "--num-cpus=7",
         "--num-env-runners=6",
     ],
-    main = "tuned_examples/appo/multi_agent_cartpole_appo.py",
+    main = "examples/algorithms/appo/multi_agent_cartpole_appo.py",
     tags = [
         "exclusive",
         "learning_tests",
@@ -334,13 +332,13 @@ py_test(
 py_test(
     name = "learning_tests_stateless_cartpole_appo",
     size = "large",
-    srcs = ["tuned_examples/appo/stateless_cartpole_appo.py"],
+    srcs = ["examples/algorithms/appo/stateless_cartpole_appo.py"],
     args = [
         "--as-test",
         "--num-cpus=8",
         "--num-env-runners=6",
     ],
-    main = "tuned_examples/appo/stateless_cartpole_appo.py",
+    main = "examples/algorithms/appo/stateless_cartpole_appo.py",
     tags = [
         "exclusive",
         "learning_tests",
@@ -354,7 +352,7 @@ py_test(
 py_test(
     name = "learning_tests_stateless_cartpole_appo_gpu",
     size = "large",
-    srcs = ["tuned_examples/appo/stateless_cartpole_appo.py"],
+    srcs = ["examples/algorithms/appo/stateless_cartpole_appo.py"],
     args = [
         "--as-test",
         "--num-agents=2",
@@ -362,7 +360,7 @@ py_test(
         "--num-cpus=7",
         "--num-env-runners=5",
     ],
-    main = "tuned_examples/appo/stateless_cartpole_appo.py",
+    main = "examples/algorithms/appo/stateless_cartpole_appo.py",
     tags = [
         "exclusive",
         "gpu",
@@ -377,14 +375,14 @@ py_test(
 py_test(
     name = "learning_tests_stateless_cartpole_appo_multi_cpu",
     size = "large",
-    srcs = ["tuned_examples/appo/stateless_cartpole_appo.py"],
+    srcs = ["examples/algorithms/appo/stateless_cartpole_appo.py"],
     args = [
         "--as-test",
         "--num-learners=2",
         "--num-cpus=9",
         "--num-env-runners=6",
     ],
-    main = "tuned_examples/appo/stateless_cartpole_appo.py",
+    main = "examples/algorithms/appo/stateless_cartpole_appo.py",
     tags = [
         "exclusive",
         "learning_tests",
@@ -398,7 +396,7 @@ py_test(
 py_test(
     name = "learning_tests_stateless_cartpole_appo_multi_gpu",
     size = "large",
-    srcs = ["tuned_examples/appo/stateless_cartpole_appo.py"],
+    srcs = ["examples/algorithms/appo/stateless_cartpole_appo.py"],
     args = [
         "--as-test",
         "--num-learners=2",
@@ -406,7 +404,7 @@ py_test(
         "--num-cpus=7",
         "--num-env-runners=6",
     ],
-    main = "tuned_examples/appo/stateless_cartpole_appo.py",
+    main = "examples/algorithms/appo/stateless_cartpole_appo.py",
     tags = [
         "exclusive",
         "learning_tests",
@@ -421,47 +419,47 @@ py_test(
 # MultiAgentStatelessCartPole
 # py_test(
 #     name = "learning_tests_multi_agent_stateless_cartpole_appo",
-#     main = "tuned_examples/appo/multi_agent_stateless_cartpole_appo.py",
+#     main = "examples/algorithms/appo/multi_agent_stateless_cartpole_appo.py",
 #     tags = ["team:rllib", "exclusive", "learning_tests", "torch_only", "learning_tests_discrete", "learning_tests_pytorch_use_all_core"],
 #     size = "large",
-#     srcs = ["tuned_examples/appo/multi_agent_stateless_cartpole_appo.py"],
+#     srcs = ["examples/algorithms/appo/multi_agent_stateless_cartpole_appo.py"],
 #     args = ["--as-test"]
 # )
 # py_test(
 #     name = "learning_tests_multi_agent_stateless_cartpole_appo_gpu",
-#     main = "tuned_examples/appo/multi_agent_stateless_cartpole_appo.py",
+#     main = "examples/algorithms/appo/multi_agent_stateless_cartpole_appo.py",
 #     tags = ["team:rllib", "exclusive", "learning_tests", "torch_only", "learning_tests_discrete", "learning_tests_pytorch_use_all_core", "gpu"],
 #     size = "large",
-#     srcs = ["tuned_examples/appo/multi_agent_stateless_cartpole_appo.py"],
+#     srcs = ["examples/algorithms/appo/multi_agent_stateless_cartpole_appo.py"],
 #     args = ["--as-test", "--num-agents=2", "--num-gpus-per-learner=1"]
 # )
 # py_test(
 #     name = "learning_tests_multi_agent_stateless_cartpole_appo_multi_cpu",
-#     main = "tuned_examples/appo/multi_agent_stateless_cartpole_appo.py",
+#     main = "examples/algorithms/appo/multi_agent_stateless_cartpole_appo.py",
 #     tags = ["team:rllib", "exclusive", "learning_tests", "torch_only", "learning_tests_discrete", "learning_tests_pytorch_use_all_core"],
 #     size = "large",
-#     srcs = ["tuned_examples/appo/multi_agent_stateless_cartpole_appo.py"],
+#     srcs = ["examples/algorithms/appo/multi_agent_stateless_cartpole_appo.py"],
 #     args = ["--as-test", "--num-learners=2"]
 # )
 # py_test(
 #     name = "learning_tests_multi_agent_stateless_cartpole_appo_multi_gpu",
-#     main = "tuned_examples/appo/multi_agent_stateless_cartpole_appo.py",
+#     main = "examples/algorithms/appo/multi_agent_stateless_cartpole_appo.py",
 #     tags = ["team:rllib", "exclusive", "learning_tests", "torch_only", "learning_tests_discrete", "learning_tests_pytorch_use_all_core", "multi_gpu"],
 #     size = "large",
-#     srcs = ["tuned_examples/appo/multi_agent_stateless_cartpole_appo.py"],
+#     srcs = ["examples/algorithms/appo/multi_agent_stateless_cartpole_appo.py"],
 #     args = ["--as-test", "--num-learners=2", "--num-gpus-per-learner=1"]
 # )
 # Pendulum
 py_test(
     name = "learning_tests_pendulum_appo",
     size = "large",
-    srcs = ["tuned_examples/appo/pendulum_appo.py"],
+    srcs = ["examples/algorithms/appo/pendulum_appo.py"],
     args = [
         "--as-test",
         "--num-cpus=6",
         "--num-env-runners=4",
     ],
-    main = "tuned_examples/appo/pendulum_appo.py",
+    main = "examples/algorithms/appo/pendulum_appo.py",
     tags = [
         "exclusive",
         "learning_tests",
@@ -475,7 +473,7 @@ py_test(
 py_test(
     name = "learning_tests_multi_agent_pong_appo_multi_gpu",
     size = "large",
-    srcs = ["tuned_examples/appo/multi_agent_pong_appo.py"],
+    srcs = ["examples/algorithms/appo/multi_agent_pong_appo.py"],
     args = [
         "--stop-iters=3",
         "--num-agents=2",
@@ -483,7 +481,7 @@ py_test(
         "--num-gpus-per-learner=1",
         "--num-aggregator-actors-per-learner=1",
     ],
-    main = "tuned_examples/appo/multi_agent_pong_appo.py",
+    main = "examples/algorithms/appo/multi_agent_pong_appo.py",
     tags = [
         "exclusive",
         "learning_tests",
@@ -499,8 +497,8 @@ py_test(
     name = "learning_tests_multi_agent_cartpole_w_100_policies_appo_old_api_stack",
     size = "large",
     srcs = ["tests/run_regression_tests.py"],
-    args = ["--dir=tuned_examples/appo"],
-    data = ["tuned_examples/appo/multi-agent-cartpole-w-100-policies-appo.py"],
+    args = ["--dir=examples/algorithms/appo"],
+    data = ["examples/algorithms/appo/multi-agent-cartpole-w-100-policies-appo.py"],
     main = "tests/run_regression_tests.py",
     tags = [
         "exclusive",
@@ -516,7 +514,7 @@ py_test(
 py_test(
     name = "learning_tests_cartpole_bc",
     size = "medium",
-    srcs = ["tuned_examples/bc/cartpole_bc.py"],
+    srcs = ["examples/algorithms/bc/cartpole_bc.py"],
     args = [
         "--as-test",
     ],
@@ -524,7 +522,7 @@ py_test(
     data = [
         "tests/data/cartpole/cartpole-v1_large",
     ],
-    main = "tuned_examples/bc/cartpole_bc.py",
+    main = "examples/algorithms/bc/cartpole_bc.py",
     tags = [
         "exclusive",
         "learning_tests",
@@ -538,7 +536,7 @@ py_test(
 py_test(
     name = "learning_tests_cartpole_bc_gpu",
     size = "medium",
-    srcs = ["tuned_examples/bc/cartpole_bc.py"],
+    srcs = ["examples/algorithms/bc/cartpole_bc.py"],
     args = [
         "--as-test",
         "--num-gpus-per-learner=1",
@@ -547,7 +545,7 @@ py_test(
     data = [
         "tests/data/cartpole/cartpole-v1_large",
     ],
-    main = "tuned_examples/bc/cartpole_bc.py",
+    main = "examples/algorithms/bc/cartpole_bc.py",
     tags = [
         "exclusive",
         "gpu",
@@ -564,7 +562,7 @@ py_test(
 py_test(
     name = "learning_tests_cartpole_bc_with_offline_evaluation",
     size = "medium",
-    srcs = ["tuned_examples/bc/cartpole_bc_with_offline_evaluation.py"],
+    srcs = ["examples/algorithms/bc/cartpole_bc_with_offline_evaluation.py"],
     args = [
         "--as-test",
         "--offline-evaluation-interval=1",
@@ -574,7 +572,7 @@ py_test(
     data = [
         "tests/data/cartpole/cartpole-v1_large",
     ],
-    main = "tuned_examples/bc/cartpole_bc_with_offline_evaluation.py",
+    main = "examples/algorithms/bc/cartpole_bc_with_offline_evaluation.py",
     tags = [
         "exclusive",
         "learning_tests",
@@ -588,7 +586,7 @@ py_test(
 py_test(
     name = "learning_tests_cartpole_bc_with_offline_evaluation_gpu",
     size = "medium",
-    srcs = ["tuned_examples/bc/cartpole_bc_with_offline_evaluation.py"],
+    srcs = ["examples/algorithms/bc/cartpole_bc_with_offline_evaluation.py"],
     args = [
         "--as-test",
         "--num-gpus-per-learner=1",
@@ -600,7 +598,7 @@ py_test(
     data = [
         "tests/data/cartpole/cartpole-v1_large",
     ],
-    main = "tuned_examples/bc/cartpole_bc_with_offline_evaluation.py",
+    main = "examples/algorithms/bc/cartpole_bc_with_offline_evaluation.py",
     tags = [
         "exclusive",
         "learning_tests",
@@ -617,7 +615,7 @@ py_test(
 py_test(
     name = "learning_tests_pendulum_cql",
     size = "large",
-    srcs = ["tuned_examples/cql/pendulum_cql.py"],
+    srcs = ["examples/algorithms/cql/pendulum_cql.py"],
     args = [
         "--as-test",
     ],
@@ -625,7 +623,7 @@ py_test(
     data = [
         "tests/data/pendulum/pendulum-v1_enormous",
     ],
-    main = "tuned_examples/cql/pendulum_cql.py",
+    main = "examples/algorithms/cql/pendulum_cql.py",
     tags = [
         "exclusive",
         "learning_tests",
@@ -643,7 +641,7 @@ py_test(
 py_test(
     name = "learning_tests_pendulum_cql_gpu",
     size = "large",
-    srcs = ["tuned_examples/cql/pendulum_cql.py"],
+    srcs = ["examples/algorithms/cql/pendulum_cql.py"],
     args = [
         "--as-test",
         "--num-gpus-per-learner=1",
@@ -652,7 +650,7 @@ py_test(
     data = [
         "tests/data/pendulum/pendulum-v1_enormous",
     ],
-    main = "tuned_examples/cql/pendulum_cql.py",
+    main = "examples/algorithms/cql/pendulum_cql.py",
     tags = [
         "exclusive",
         "gpu",
@@ -672,11 +670,11 @@ py_test(
 py_test(
     name = "learning_tests_cartpole_dqn",
     size = "large",
-    srcs = ["tuned_examples/dqn/cartpole_dqn.py"],
+    srcs = ["examples/algorithms/dqn/cartpole_dqn.py"],
     args = [
         "--as-test",
     ],
-    main = "tuned_examples/dqn/cartpole_dqn.py",
+    main = "examples/algorithms/dqn/cartpole_dqn.py",
     tags = [
         "exclusive",
         "learning_tests",
@@ -690,13 +688,13 @@ py_test(
 py_test(
     name = "learning_tests_cartpole_dqn_gpu",
     size = "large",
-    srcs = ["tuned_examples/dqn/cartpole_dqn.py"],
+    srcs = ["examples/algorithms/dqn/cartpole_dqn.py"],
     args = [
         "--as-test",
         "--num-learners=1",
         "--num-gpus-per-learner=1",
     ],
-    main = "tuned_examples/dqn/cartpole_dqn.py",
+    main = "examples/algorithms/dqn/cartpole_dqn.py",
     tags = [
         "exclusive",
         "gpu",
@@ -711,12 +709,12 @@ py_test(
 py_test(
     name = "learning_tests_cartpole_dqn_multi_cpu",
     size = "large",
-    srcs = ["tuned_examples/dqn/cartpole_dqn.py"],
+    srcs = ["examples/algorithms/dqn/cartpole_dqn.py"],
     args = [
         "--as-test",
         "--num-learners=2",
     ],
-    main = "tuned_examples/dqn/cartpole_dqn.py",
+    main = "examples/algorithms/dqn/cartpole_dqn.py",
     tags = [
         "exclusive",
         "learning_tests",
@@ -730,13 +728,13 @@ py_test(
 py_test(
     name = "learning_tests_cartpole_dqn_multi_gpu",
     size = "large",
-    srcs = ["tuned_examples/dqn/cartpole_dqn.py"],
+    srcs = ["examples/algorithms/dqn/cartpole_dqn.py"],
     args = [
         "--as-test",
         "--num-learners=2",
         "--num-gpus-per-learner=1",
     ],
-    main = "tuned_examples/dqn/cartpole_dqn.py",
+    main = "examples/algorithms/dqn/cartpole_dqn.py",
     tags = [
         "exclusive",
         "learning_tests",
@@ -754,13 +752,13 @@ py_test(
 py_test(
     name = "learning_tests_multi_agent_cartpole_dqn",
     size = "large",
-    srcs = ["tuned_examples/dqn/multi_agent_cartpole_dqn.py"],
+    srcs = ["examples/algorithms/dqn/multi_agent_cartpole_dqn.py"],
     args = [
         "--as-test",
         "--num-agents=2",
         "--num-cpus=4",
     ],
-    main = "tuned_examples/dqn/multi_agent_cartpole_dqn.py",
+    main = "examples/algorithms/dqn/multi_agent_cartpole_dqn.py",
     tags = [
         "exclusive",
         "learning_tests",
@@ -774,7 +772,7 @@ py_test(
 py_test(
     name = "learning_tests_multi_agent_cartpole_dqn_gpu",
     size = "large",
-    srcs = ["tuned_examples/dqn/multi_agent_cartpole_dqn.py"],
+    srcs = ["examples/algorithms/dqn/multi_agent_cartpole_dqn.py"],
     args = [
         "--as-test",
         "--num-agents=2",
@@ -782,7 +780,7 @@ py_test(
         "--num-learners=1",
         "--num-gpus-per-learner=1",
     ],
-    main = "tuned_examples/dqn/multi_agent_cartpole_dqn.py",
+    main = "examples/algorithms/dqn/multi_agent_cartpole_dqn.py",
     tags = [
         "exclusive",
         "gpu",
@@ -797,14 +795,14 @@ py_test(
 py_test(
     name = "learning_tests_multi_agent_cartpole_dqn_multi_cpu",
     size = "large",
-    srcs = ["tuned_examples/dqn/multi_agent_cartpole_dqn.py"],
+    srcs = ["examples/algorithms/dqn/multi_agent_cartpole_dqn.py"],
     args = [
         "--as-test",
         "--num-agents=2",
         "--num-cpus=5",
         "--num-learners=2",
     ],
-    main = "tuned_examples/dqn/multi_agent_cartpole_dqn.py",
+    main = "examples/algorithms/dqn/multi_agent_cartpole_dqn.py",
     tags = [
         "exclusive",
         "learning_tests",
@@ -818,7 +816,7 @@ py_test(
 py_test(
     name = "learning_tests_multi_agent_cartpole_dqn_multi_gpu",
     size = "large",
-    srcs = ["tuned_examples/dqn/multi_agent_cartpole_dqn.py"],
+    srcs = ["examples/algorithms/dqn/multi_agent_cartpole_dqn.py"],
     args = [
         "--as-test",
         "--num-agents=2",
@@ -826,7 +824,7 @@ py_test(
         "--num-learners=2",
         "--num-gpus-per-learner=1",
     ],
-    main = "tuned_examples/dqn/multi_agent_cartpole_dqn.py",
+    main = "examples/algorithms/dqn/multi_agent_cartpole_dqn.py",
     tags = [
         "exclusive",
         "learning_tests",
@@ -844,13 +842,13 @@ py_test(
 # py_test(
 #    name = "learning_tests_pendulum_dreamerv3_gpu",
 #    size = "large",
-#    srcs = ["tuned_examples/dreamerv3/pendulum_dreamerv3.py"],
+#    srcs = ["examples/algorithms/dreamerv3/pendulum_dreamerv3.py"],
 #    args = [
 #        "--as-test",
 #        "--num-gpus-per-learner=1",
 #        "--num-learners=1",
 #    ],
-#    main = "tuned_examples/marwil/cartpole_marwil.py",
+#    main = "examples/algorithms/marwil/cartpole_marwil.py",
 #    tags = [
 #        "exclusive",
 #        "gpu",
@@ -867,11 +865,11 @@ py_test(
 py_test(
     name = "learning_tests_cartpole_impala",
     size = "large",
-    srcs = ["tuned_examples/impala/cartpole_impala.py"],
+    srcs = ["examples/algorithms/impala/cartpole_impala.py"],
     args = [
         "--as-test",
     ],
-    main = "tuned_examples/impala/cartpole_impala.py",
+    main = "examples/algorithms/impala/cartpole_impala.py",
     tags = [
         "exclusive",
         "learning_tests",
@@ -884,12 +882,12 @@ py_test(
 py_test(
     name = "learning_tests_cartpole_impala_gpu",
     size = "large",
-    srcs = ["tuned_examples/impala/cartpole_impala.py"],
+    srcs = ["examples/algorithms/impala/cartpole_impala.py"],
     args = [
         "--as-test",
         "--num-gpus-per-learner=1",
     ],
-    main = "tuned_examples/impala/cartpole_impala.py",
+    main = "examples/algorithms/impala/cartpole_impala.py",
     tags = [
         "exclusive",
         "gpu",
@@ -904,12 +902,12 @@ py_test(
 py_test(
     name = "learning_tests_cartpole_impala_multi_cpu",
     size = "large",
-    srcs = ["tuned_examples/impala/cartpole_impala.py"],
+    srcs = ["examples/algorithms/impala/cartpole_impala.py"],
     args = [
         "--as-test",
         "--num-learners=2",
     ],
-    main = "tuned_examples/impala/cartpole_impala.py",
+    main = "examples/algorithms/impala/cartpole_impala.py",
     tags = [
         "exclusive",
         "learning_tests",
@@ -923,13 +921,13 @@ py_test(
 py_test(
     name = "learning_tests_cartpole_impala_multi_gpu",
     size = "large",
-    srcs = ["tuned_examples/impala/cartpole_impala.py"],
+    srcs = ["examples/algorithms/impala/cartpole_impala.py"],
     args = [
         "--as-test",
         "--num-learners=2",
         "--num-gpus-per-learner=1",
     ],
-    main = "tuned_examples/impala/cartpole_impala.py",
+    main = "examples/algorithms/impala/cartpole_impala.py",
     tags = [
         "exclusive",
         "learning_tests",
@@ -945,13 +943,13 @@ py_test(
 py_test(
     name = "learning_tests_multi_agent_cartpole_impala",
     size = "large",
-    srcs = ["tuned_examples/impala/multi_agent_cartpole_impala.py"],
+    srcs = ["examples/algorithms/impala/multi_agent_cartpole_impala.py"],
     args = [
         "--as-test",
         "--num-agents=2",
         "--num-cpus=6",
     ],
-    main = "tuned_examples/impala/multi_agent_cartpole_impala.py",
+    main = "examples/algorithms/impala/multi_agent_cartpole_impala.py",
     tags = [
         "exclusive",
         "learning_tests",
@@ -965,14 +963,14 @@ py_test(
 py_test(
     name = "learning_tests_multi_agent_cartpole_impala_gpu",
     size = "large",
-    srcs = ["tuned_examples/impala/multi_agent_cartpole_impala.py"],
+    srcs = ["examples/algorithms/impala/multi_agent_cartpole_impala.py"],
     args = [
         "--as-test",
         "--num-agents=2",
         "--num-gpus-per-learner=1",
         "--num-cpus=6",
     ],
-    main = "tuned_examples/impala/multi_agent_cartpole_impala.py",
+    main = "examples/algorithms/impala/multi_agent_cartpole_impala.py",
     tags = [
         "exclusive",
         "gpu",
@@ -987,14 +985,14 @@ py_test(
 py_test(
     name = "learning_tests_multi_agent_cartpole_impala_multi_cpu",
     size = "large",
-    srcs = ["tuned_examples/impala/multi_agent_cartpole_impala.py"],
+    srcs = ["examples/algorithms/impala/multi_agent_cartpole_impala.py"],
     args = [
         "--as-test",
         "--num-agents=2",
         "--num-learners=2",
         "--num-cpus=7",
     ],
-    main = "tuned_examples/impala/multi_agent_cartpole_impala.py",
+    main = "examples/algorithms/impala/multi_agent_cartpole_impala.py",
     tags = [
         "exclusive",
         "learning_tests",
@@ -1008,7 +1006,7 @@ py_test(
 py_test(
     name = "learning_tests_multi_agent_cartpole_impala_multi_gpu",
     size = "large",
-    srcs = ["tuned_examples/impala/multi_agent_cartpole_impala.py"],
+    srcs = ["examples/algorithms/impala/multi_agent_cartpole_impala.py"],
     args = [
         "--as-test",
         "--num-agents=2",
@@ -1016,7 +1014,7 @@ py_test(
         "--num-gpus-per-learner=1",
         "--num-cpus=7",
     ],
-    main = "tuned_examples/impala/multi_agent_cartpole_impala.py",
+    main = "examples/algorithms/impala/multi_agent_cartpole_impala.py",
     tags = [
         "exclusive",
         "learning_tests",
@@ -1032,11 +1030,11 @@ py_test(
 py_test(
     name = "learning_tests_stateless_cartpole_impala",
     size = "large",
-    srcs = ["tuned_examples/impala/stateless_cartpole_impala.py"],
+    srcs = ["examples/algorithms/impala/stateless_cartpole_impala.py"],
     args = [
         "--as-test",
     ],
-    main = "tuned_examples/impala/stateless_cartpole_impala.py",
+    main = "examples/algorithms/impala/stateless_cartpole_impala.py",
     tags = [
         "exclusive",
         "learning_tests",
@@ -1050,13 +1048,13 @@ py_test(
 py_test(
     name = "learning_tests_stateless_cartpole_impala_multi_gpu",
     size = "large",
-    srcs = ["tuned_examples/impala/stateless_cartpole_impala.py"],
+    srcs = ["examples/algorithms/impala/stateless_cartpole_impala.py"],
     args = [
         "--as-test",
         "--num-learners=2",
         "--num-gpus-per-learner=1",
     ],
-    main = "tuned_examples/impala/stateless_cartpole_impala.py",
+    main = "examples/algorithms/impala/stateless_cartpole_impala.py",
     tags = [
         "exclusive",
         "learning_tests",
@@ -1072,11 +1070,11 @@ py_test(
 py_test(
     name = "learning_tests_multi_agent_stateless_cartpole_impala",
     size = "large",
-    srcs = ["tuned_examples/impala/multi_agent_stateless_cartpole_impala.py"],
+    srcs = ["examples/algorithms/impala/multi_agent_stateless_cartpole_impala.py"],
     args = [
         "--as-test",
     ],
-    main = "tuned_examples/impala/multi_agent_stateless_cartpole_impala.py",
+    main = "examples/algorithms/impala/multi_agent_stateless_cartpole_impala.py",
     tags = [
         "exclusive",
         "learning_tests",
@@ -1088,10 +1086,10 @@ py_test(
 )
 # py_test(
 #    name = "learning_tests_multi_agent_stateless_cartpole_impala_multi_gpu",
-#    main = "tuned_examples/impala/multi_agent_stateless_cartpole_impala.py",
+#    main = "examples/algorithms/impala/multi_agent_stateless_cartpole_impala.py",
 #    tags = ["team:rllib", "exclusive", "learning_tests", "torch_only", "learning_tests_discrete", "learning_tests_pytorch_use_all_core", "multi_gpu"],
 #    size = "large",
-#    srcs = ["tuned_examples/impala/multi_agent_stateless_cartpole_impala.py"],
+#    srcs = ["examples/algorithms/impala/multi_agent_stateless_cartpole_impala.py"],
 #    args = ["--as-test", "--num-learners=2", "--num-gpus-per-learner=1"]
 # )
 
@@ -1100,7 +1098,7 @@ py_test(
 py_test(
     name = "learning_tests_pendulum_iql",
     size = "large",
-    srcs = ["tuned_examples/iql/pendulum_iql.py"],
+    srcs = ["examples/algorithms/iql/pendulum_iql.py"],
     args = [
         "--as-test",
         "--num-cpus=32",
@@ -1109,7 +1107,7 @@ py_test(
     data = [
         "tests/data/pendulum/pendulum-v1_enormous",
     ],
-    main = "tuned_examples/iql/pendulum_iql.py",
+    main = "examples/algorithms/iql/pendulum_iql.py",
     tags = [
         "exclusive",
         "learning_tests",
@@ -1124,7 +1122,7 @@ py_test(
 py_test(
     name = "learning_tests_pendulum_iql_gpu",
     size = "large",
-    srcs = ["tuned_examples/iql/pendulum_iql.py"],
+    srcs = ["examples/algorithms/iql/pendulum_iql.py"],
     args = [
         "--as-test",
         "--num-cpus=32",
@@ -1134,7 +1132,7 @@ py_test(
     data = [
         "tests/data/pendulum/pendulum-v1_enormous",
     ],
-    main = "tuned_examples/iql/pendulum_iql.py",
+    main = "examples/algorithms/iql/pendulum_iql.py",
     tags = [
         "exclusive",
         "gpu",
@@ -1151,7 +1149,7 @@ py_test(
 py_test(
     name = "learning_tests_cartpole_marwil",
     size = "large",
-    srcs = ["tuned_examples/marwil/cartpole_marwil.py"],
+    srcs = ["examples/algorithms/marwil/cartpole_marwil.py"],
     args = [
         "--as-test",
     ],
@@ -1159,7 +1157,7 @@ py_test(
     data = [
         "tests/data/cartpole/cartpole-v1_large",
     ],
-    main = "tuned_examples/marwil/cartpole_marwil.py",
+    main = "examples/algorithms/marwil/cartpole_marwil.py",
     tags = [
         "exclusive",
         "learning_tests",
@@ -1174,7 +1172,7 @@ py_test(
 py_test(
     name = "learning_tests_cartpole_marwil_gpu",
     size = "large",
-    srcs = ["tuned_examples/marwil/cartpole_marwil.py"],
+    srcs = ["examples/algorithms/marwil/cartpole_marwil.py"],
     args = [
         "--as-test",
         "--num-gpus-per-learner=1",
@@ -1183,7 +1181,7 @@ py_test(
     data = [
         "tests/data/cartpole/cartpole-v1_large",
     ],
-    main = "tuned_examples/marwil/cartpole_marwil.py",
+    main = "examples/algorithms/marwil/cartpole_marwil.py",
     tags = [
         "exclusive",
         "gpu",
@@ -1200,11 +1198,11 @@ py_test(
 py_test(
     name = "learning_tests_cartpole_ppo",
     size = "large",
-    srcs = ["tuned_examples/ppo/cartpole_ppo.py"],
+    srcs = ["examples/algorithms/ppo/cartpole_ppo.py"],
     args = [
         "--as-test",
     ],
-    main = "tuned_examples/ppo/cartpole_ppo.py",
+    main = "examples/algorithms/ppo/cartpole_ppo.py",
     tags = [
         "exclusive",
         "learning_tests",
@@ -1217,13 +1215,13 @@ py_test(
 py_test(
     name = "learning_tests_cartpole_ppo_gpu",
     size = "large",
-    srcs = ["tuned_examples/ppo/cartpole_ppo.py"],
+    srcs = ["examples/algorithms/ppo/cartpole_ppo.py"],
     args = [
         "--as-test",
         "--num-learners=1",
         "--num-gpus-per-learner=1",
     ],
-    main = "tuned_examples/ppo/cartpole_ppo.py",
+    main = "examples/algorithms/ppo/cartpole_ppo.py",
     tags = [
         "exclusive",
         "gpu",
@@ -1238,12 +1236,12 @@ py_test(
 py_test(
     name = "learning_tests_cartpole_ppo_multi_cpu",
     size = "large",
-    srcs = ["tuned_examples/ppo/cartpole_ppo.py"],
+    srcs = ["examples/algorithms/ppo/cartpole_ppo.py"],
     args = [
         "--as-test",
         "--num-learners=2",
     ],
-    main = "tuned_examples/ppo/cartpole_ppo.py",
+    main = "examples/algorithms/ppo/cartpole_ppo.py",
     tags = [
         "exclusive",
         "learning_tests",
@@ -1257,13 +1255,13 @@ py_test(
 py_test(
     name = "learning_tests_cartpole_ppo_multi_gpu",
     size = "large",
-    srcs = ["tuned_examples/ppo/cartpole_ppo.py"],
+    srcs = ["examples/algorithms/ppo/cartpole_ppo.py"],
     args = [
         "--as-test",
         "--num-learners=2",
         "--num-gpus-per-learner=1",
     ],
-    main = "tuned_examples/ppo/cartpole_ppo.py",
+    main = "examples/algorithms/ppo/cartpole_ppo.py",
     tags = [
         "exclusive",
         "learning_tests",
@@ -1279,12 +1277,12 @@ py_test(
 py_test(
     name = "learning_tests_multi_agent_cartpole_ppo",
     size = "large",
-    srcs = ["tuned_examples/ppo/multi_agent_cartpole_ppo.py"],
+    srcs = ["examples/algorithms/ppo/multi_agent_cartpole_ppo.py"],
     args = [
         "--as-test",
         "--num-agents=2",
     ],
-    main = "tuned_examples/ppo/multi_agent_cartpole_ppo.py",
+    main = "examples/algorithms/ppo/multi_agent_cartpole_ppo.py",
     tags = [
         "exclusive",
         "learning_tests",
@@ -1297,14 +1295,14 @@ py_test(
 py_test(
     name = "learning_tests_multi_agent_cartpole_ppo_gpu",
     size = "large",
-    srcs = ["tuned_examples/ppo/multi_agent_cartpole_ppo.py"],
+    srcs = ["examples/algorithms/ppo/multi_agent_cartpole_ppo.py"],
     args = [
         "--as-test",
         "--num-agents=2",
         "--num-learners=1",
         "--num-gpus-per-learner=1",
     ],
-    main = "tuned_examples/ppo/multi_agent_cartpole_ppo.py",
+    main = "examples/algorithms/ppo/multi_agent_cartpole_ppo.py",
     tags = [
         "exclusive",
         "gpu",
@@ -1319,13 +1317,13 @@ py_test(
 py_test(
     name = "learning_tests_multi_agent_cartpole_ppo_multi_cpu",
     size = "large",
-    srcs = ["tuned_examples/ppo/multi_agent_cartpole_ppo.py"],
+    srcs = ["examples/algorithms/ppo/multi_agent_cartpole_ppo.py"],
     args = [
         "--as-test",
         "--num-agents=2",
         "--num-learners=2",
     ],
-    main = "tuned_examples/ppo/multi_agent_cartpole_ppo.py",
+    main = "examples/algorithms/ppo/multi_agent_cartpole_ppo.py",
     tags = [
         "exclusive",
         "learning_tests",
@@ -1339,14 +1337,14 @@ py_test(
 py_test(
     name = "learning_tests_multi_agent_cartpole_ppo_multi_gpu",
     size = "large",
-    srcs = ["tuned_examples/ppo/multi_agent_cartpole_ppo.py"],
+    srcs = ["examples/algorithms/ppo/multi_agent_cartpole_ppo.py"],
     args = [
         "--as-test",
         "--num-agents=2",
         "--num-learners=2",
         "--num-gpus-per-learner=1",
     ],
-    main = "tuned_examples/ppo/multi_agent_cartpole_ppo.py",
+    main = "examples/algorithms/ppo/multi_agent_cartpole_ppo.py",
     tags = [
         "exclusive",
         "learning_tests",
@@ -1362,11 +1360,11 @@ py_test(
 py_test(
     name = "learning_tests_cartpole_truncated_ppo",
     size = "large",
-    srcs = ["tuned_examples/ppo/cartpole_truncated_ppo.py"],
+    srcs = ["examples/algorithms/ppo/cartpole_truncated_ppo.py"],
     args = [
         "--as-test",
     ],
-    main = "tuned_examples/ppo/cartpole_truncated_ppo.py",
+    main = "examples/algorithms/ppo/cartpole_truncated_ppo.py",
     tags = [
         "exclusive",
         "learning_tests",
@@ -1380,11 +1378,11 @@ py_test(
 py_test(
     name = "learning_tests_stateless_cartpole_ppo",
     size = "large",
-    srcs = ["tuned_examples/ppo/stateless_cartpole_ppo.py"],
+    srcs = ["examples/algorithms/ppo/stateless_cartpole_ppo.py"],
     args = [
         "--as-test",
     ],
-    main = "tuned_examples/ppo/stateless_cartpole_ppo.py",
+    main = "examples/algorithms/ppo/stateless_cartpole_ppo.py",
     tags = [
         "exclusive",
         "learning_tests",
@@ -1397,13 +1395,13 @@ py_test(
 py_test(
     name = "learning_tests_stateless_cartpole_ppo_gpu",
     size = "large",
-    srcs = ["tuned_examples/ppo/stateless_cartpole_ppo.py"],
+    srcs = ["examples/algorithms/ppo/stateless_cartpole_ppo.py"],
     args = [
         "--as-test",
         "--num-learners=1",
         "--num-gpus-per-learner=1",
     ],
-    main = "tuned_examples/ppo/stateless_cartpole_ppo.py",
+    main = "examples/algorithms/ppo/stateless_cartpole_ppo.py",
     tags = [
         "exclusive",
         "gpu",
@@ -1418,12 +1416,12 @@ py_test(
 py_test(
     name = "learning_tests_stateless_cartpole_ppo_multi_cpu",
     size = "large",
-    srcs = ["tuned_examples/ppo/stateless_cartpole_ppo.py"],
+    srcs = ["examples/algorithms/ppo/stateless_cartpole_ppo.py"],
     args = [
         "--as-test",
         "--num-learners=2",
     ],
-    main = "tuned_examples/ppo/stateless_cartpole_ppo.py",
+    main = "examples/algorithms/ppo/stateless_cartpole_ppo.py",
     tags = [
         "exclusive",
         "learning_tests",
@@ -1437,13 +1435,13 @@ py_test(
 py_test(
     name = "learning_tests_stateless_cartpole_ppo_multi_gpu",
     size = "large",
-    srcs = ["tuned_examples/ppo/stateless_cartpole_ppo.py"],
+    srcs = ["examples/algorithms/ppo/stateless_cartpole_ppo.py"],
     args = [
         "--as-test",
         "--num-learners=2",
         "--num-gpus-per-learner=1",
     ],
-    main = "tuned_examples/ppo/stateless_cartpole_ppo.py",
+    main = "examples/algorithms/ppo/stateless_cartpole_ppo.py",
     tags = [
         "exclusive",
         "learning_tests",
@@ -1459,12 +1457,12 @@ py_test(
 py_test(
     name = "learning_tests_multi_agent_stateless_cartpole_ppo",
     size = "large",
-    srcs = ["tuned_examples/ppo/multi_agent_stateless_cartpole_ppo.py"],
+    srcs = ["examples/algorithms/ppo/multi_agent_stateless_cartpole_ppo.py"],
     args = [
         "--as-test",
         "--num-agents=2",
     ],
-    main = "tuned_examples/ppo/multi_agent_stateless_cartpole_ppo.py",
+    main = "examples/algorithms/ppo/multi_agent_stateless_cartpole_ppo.py",
     tags = [
         "exclusive",
         "learning_tests",
@@ -1477,14 +1475,14 @@ py_test(
 py_test(
     name = "learning_tests_multi_agent_stateless_cartpole_ppo_gpu",
     size = "large",
-    srcs = ["tuned_examples/ppo/multi_agent_stateless_cartpole_ppo.py"],
+    srcs = ["examples/algorithms/ppo/multi_agent_stateless_cartpole_ppo.py"],
     args = [
         "--as-test",
         "--num-agents=2",
         "--num-learners=1",
         "--num-gpus-per-learner=1",
     ],
-    main = "tuned_examples/ppo/multi_agent_stateless_cartpole_ppo.py",
+    main = "examples/algorithms/ppo/multi_agent_stateless_cartpole_ppo.py",
     tags = [
         "exclusive",
         "gpu",
@@ -1499,13 +1497,13 @@ py_test(
 py_test(
     name = "learning_tests_multi_agent_stateless_cartpole_ppo_multi_cpu",
     size = "large",
-    srcs = ["tuned_examples/ppo/multi_agent_stateless_cartpole_ppo.py"],
+    srcs = ["examples/algorithms/ppo/multi_agent_stateless_cartpole_ppo.py"],
     args = [
         "--as-test",
         "--num-agents=2",
         "--num-learners=2",
     ],
-    main = "tuned_examples/ppo/multi_agent_stateless_cartpole_ppo.py",
+    main = "examples/algorithms/ppo/multi_agent_stateless_cartpole_ppo.py",
     tags = [
         "exclusive",
         "learning_tests",
@@ -1519,14 +1517,14 @@ py_test(
 py_test(
     name = "learning_tests_multi_agent_stateless_cartpole_ppo_multi_gpu",
     size = "large",
-    srcs = ["tuned_examples/ppo/multi_agent_stateless_cartpole_ppo.py"],
+    srcs = ["examples/algorithms/ppo/multi_agent_stateless_cartpole_ppo.py"],
     args = [
         "--as-test",
         "--num-agents=2",
         "--num-learners=2",
         "--num-gpus-per-learner=1",
     ],
-    main = "tuned_examples/ppo/multi_agent_stateless_cartpole_ppo.py",
+    main = "examples/algorithms/ppo/multi_agent_stateless_cartpole_ppo.py",
     tags = [
         "exclusive",
         "learning_tests",
@@ -1542,13 +1540,13 @@ py_test(
 py_test(
     name = "learning_tests_multi_agent_footsies_ppo",
     size = "large",
-    srcs = ["tuned_examples/ppo/multi_agent_footsies_ppo.py"],
+    srcs = ["examples/algorithms/ppo/multi_agent_footsies_ppo.py"],
     args = [
         "--as-test",
         "--num-env-runners=6",
         "--evaluation-num-env-runners=2",
     ],
-    main = "tuned_examples/ppo/multi_agent_footsies_ppo.py",
+    main = "examples/algorithms/ppo/multi_agent_footsies_ppo.py",
     tags = [
         "exclusive",
         "learning_tests",
@@ -1560,7 +1558,7 @@ py_test(
 py_test(
     name = "learning_tests_multi_agent_footsies_ppo_gpu",
     size = "large",
-    srcs = ["tuned_examples/ppo/multi_agent_footsies_ppo.py"],
+    srcs = ["examples/algorithms/ppo/multi_agent_footsies_ppo.py"],
     args = [
         "--as-test",
         "--num-env-runners=20",
@@ -1568,7 +1566,7 @@ py_test(
         "--num-learners=1",
         "--num-gpus-per-learner=1",
     ],
-    main = "tuned_examples/ppo/multi_agent_footsies_ppo.py",
+    main = "examples/algorithms/ppo/multi_agent_footsies_ppo.py",
     tags = [
         "exclusive",
         "learning_tests",
@@ -1581,14 +1579,14 @@ py_test(
 py_test(
     name = "learning_tests_multi_agent_footsies_ppo_multi_cpu",
     size = "large",
-    srcs = ["tuned_examples/ppo/multi_agent_footsies_ppo.py"],
+    srcs = ["examples/algorithms/ppo/multi_agent_footsies_ppo.py"],
     args = [
         "--as-test",
         "--num-env-runners=6",
         "--evaluation-num-env-runners=2",
         "--num-learners=2",
     ],
-    main = "tuned_examples/ppo/multi_agent_footsies_ppo.py",
+    main = "examples/algorithms/ppo/multi_agent_footsies_ppo.py",
     tags = [
         "exclusive",
         "learning_tests",
@@ -1600,7 +1598,7 @@ py_test(
 py_test(
     name = "learning_tests_multi_agent_footsies_ppo_multi_gpu",
     size = "large",
-    srcs = ["tuned_examples/ppo/multi_agent_footsies_ppo.py"],
+    srcs = ["examples/algorithms/ppo/multi_agent_footsies_ppo.py"],
     args = [
         "--as-test",
         "--num-env-runners=20",
@@ -1608,7 +1606,7 @@ py_test(
         "--num-learners=2",
         "--num-gpus-per-learner=1",
     ],
-    main = "tuned_examples/ppo/multi_agent_footsies_ppo.py",
+    main = "examples/algorithms/ppo/multi_agent_footsies_ppo.py",
     tags = [
         "exclusive",
         "learning_tests",
@@ -1622,11 +1620,11 @@ py_test(
 py_test(
     name = "learning_tests_pendulum_ppo",
     size = "large",
-    srcs = ["tuned_examples/ppo/pendulum_ppo.py"],
+    srcs = ["examples/algorithms/ppo/pendulum_ppo.py"],
     args = [
         "--as-test",
     ],
-    main = "tuned_examples/ppo/pendulum_ppo.py",
+    main = "examples/algorithms/ppo/pendulum_ppo.py",
     tags = [
         "exclusive",
         "learning_tests",
@@ -1639,13 +1637,13 @@ py_test(
 py_test(
     name = "learning_tests_pendulum_ppo_gpu",
     size = "large",
-    srcs = ["tuned_examples/ppo/pendulum_ppo.py"],
+    srcs = ["examples/algorithms/ppo/pendulum_ppo.py"],
     args = [
         "--as-test",
         "--num-learners=1",
         "--num-gpus-per-learner=1",
     ],
-    main = "tuned_examples/ppo/pendulum_ppo.py",
+    main = "examples/algorithms/ppo/pendulum_ppo.py",
     tags = [
         "exclusive",
         "gpu",
@@ -1660,12 +1658,12 @@ py_test(
 py_test(
     name = "learning_tests_pendulum_ppo_multi_cpu",
     size = "large",
-    srcs = ["tuned_examples/ppo/pendulum_ppo.py"],
+    srcs = ["examples/algorithms/ppo/pendulum_ppo.py"],
     args = [
         "--as-test",
         "--num-learners=2",
     ],
-    main = "tuned_examples/ppo/pendulum_ppo.py",
+    main = "examples/algorithms/ppo/pendulum_ppo.py",
     tags = [
         "exclusive",
         "learning_tests",
@@ -1679,13 +1677,13 @@ py_test(
 py_test(
     name = "learning_tests_pendulum_ppo_multi_gpu",
     size = "large",
-    srcs = ["tuned_examples/ppo/pendulum_ppo.py"],
+    srcs = ["examples/algorithms/ppo/pendulum_ppo.py"],
     args = [
         "--as-test",
         "--num-learners=2",
         "--num-gpus-per-learner=1",
     ],
-    main = "tuned_examples/ppo/pendulum_ppo.py",
+    main = "examples/algorithms/ppo/pendulum_ppo.py",
     tags = [
         "exclusive",
         "learning_tests",
@@ -1701,12 +1699,12 @@ py_test(
 py_test(
     name = "learning_tests_multi_agent_pendulum_ppo",
     size = "large",
-    srcs = ["tuned_examples/ppo/multi_agent_pendulum_ppo.py"],
+    srcs = ["examples/algorithms/ppo/multi_agent_pendulum_ppo.py"],
     args = [
         "--as-test",
         "--num-agents=2",
     ],
-    main = "tuned_examples/ppo/multi_agent_pendulum_ppo.py",
+    main = "examples/algorithms/ppo/multi_agent_pendulum_ppo.py",
     tags = [
         "exclusive",
         "learning_tests",
@@ -1719,14 +1717,14 @@ py_test(
 py_test(
     name = "learning_tests_multi_agent_pendulum_ppo_gpu",
     size = "large",
-    srcs = ["tuned_examples/ppo/multi_agent_pendulum_ppo.py"],
+    srcs = ["examples/algorithms/ppo/multi_agent_pendulum_ppo.py"],
     args = [
         "--as-test",
         "--num-agents=2",
         "--num-learners=1",
         "--num-gpus-per-learner=1",
     ],
-    main = "tuned_examples/ppo/multi_agent_pendulum_ppo.py",
+    main = "examples/algorithms/ppo/multi_agent_pendulum_ppo.py",
     tags = [
         "exclusive",
         "gpu",
@@ -1741,13 +1739,13 @@ py_test(
 py_test(
     name = "learning_tests_multi_agent_pendulum_ppo_multi_cpu",
     size = "large",
-    srcs = ["tuned_examples/ppo/multi_agent_pendulum_ppo.py"],
+    srcs = ["examples/algorithms/ppo/multi_agent_pendulum_ppo.py"],
     args = [
         "--as-test",
         "--num-agents=2",
         "--num-learners=2",
     ],
-    main = "tuned_examples/ppo/multi_agent_pendulum_ppo.py",
+    main = "examples/algorithms/ppo/multi_agent_pendulum_ppo.py",
     tags = [
         "exclusive",
         "learning_tests",
@@ -1761,14 +1759,14 @@ py_test(
 py_test(
     name = "learning_tests_multi_agent_pendulum_ppo_multi_gpu",
     size = "large",
-    srcs = ["tuned_examples/ppo/multi_agent_pendulum_ppo.py"],
+    srcs = ["examples/algorithms/ppo/multi_agent_pendulum_ppo.py"],
     args = [
         "--as-test",
         "--num-agents=2",
         "--num-learners=2",
         "--num-gpus-per-learner=1",
     ],
-    main = "tuned_examples/ppo/multi_agent_pendulum_ppo.py",
+    main = "examples/algorithms/ppo/multi_agent_pendulum_ppo.py",
     tags = [
         "exclusive",
         "learning_tests",
@@ -1785,11 +1783,11 @@ py_test(
 py_test(
     name = "learning_tests_mountaincar_sac",
     size = "large",
-    srcs = ["tuned_examples/sac/mountaincar_sac.py"],
+    srcs = ["examples/algorithms/sac/mountaincar_sac.py"],
     args = [
         "--as-test",
     ],
-    main = "tuned_examples/sac/mountaincar_sac.py",
+    main = "examples/algorithms/sac/mountaincar_sac.py",
     tags = [
         "exclusive",
         "learning_tests",
@@ -1802,13 +1800,13 @@ py_test(
 py_test(
     name = "learning_tests_mountaincar_sac_gpu",
     size = "large",
-    srcs = ["tuned_examples/sac/mountaincar_sac.py"],
+    srcs = ["examples/algorithms/sac/mountaincar_sac.py"],
     args = [
         "--as-test",
         "--num-learners=1",
         "--num-gpus-per-learner=1",
     ],
-    main = "tuned_examples/sac/mountaincar_sac.py",
+    main = "examples/algorithms/sac/mountaincar_sac.py",
     tags = [
         "exclusive",
         "gpu",
@@ -1822,12 +1820,12 @@ py_test(
 py_test(
     name = "learning_tests_mountaincar_sac_multi_cpu",
     size = "large",
-    srcs = ["tuned_examples/sac/mountaincar_sac.py"],
+    srcs = ["examples/algorithms/sac/mountaincar_sac.py"],
     args = [
         "--as-test",
         "--num-learners=2",
     ],
-    main = "tuned_examples/sac/mountaincar_sac.py",
+    main = "examples/algorithms/sac/mountaincar_sac.py",
     tags = [
         "exclusive",
         "learning_tests",
@@ -1840,13 +1838,13 @@ py_test(
 py_test(
     name = "learning_tests_mountaincar_sac_multi_gpu",
     size = "large",
-    srcs = ["tuned_examples/sac/mountaincar_sac.py"],
+    srcs = ["examples/algorithms/sac/mountaincar_sac.py"],
     args = [
         "--as-test",
         "--num-learners=2",
         "--num-gpus-per-learner=1",
     ],
-    main = "tuned_examples/sac/mountaincar_sac.py",
+    main = "examples/algorithms/sac/mountaincar_sac.py",
     tags = [
         "exclusive",
         "learning_tests",
@@ -1861,11 +1859,11 @@ py_test(
 py_test(
     name = "learning_tests_pendulum_sac",
     size = "large",
-    srcs = ["tuned_examples/sac/pendulum_sac.py"],
+    srcs = ["examples/algorithms/sac/pendulum_sac.py"],
     args = [
         "--as-test",
     ],
-    main = "tuned_examples/sac/pendulum_sac.py",
+    main = "examples/algorithms/sac/pendulum_sac.py",
     tags = [
         "exclusive",
         "learning_tests",
@@ -1878,13 +1876,13 @@ py_test(
 py_test(
     name = "learning_tests_pendulum_sac_gpu",
     size = "large",
-    srcs = ["tuned_examples/sac/pendulum_sac.py"],
+    srcs = ["examples/algorithms/sac/pendulum_sac.py"],
     args = [
         "--as-test",
         "--num-learners=1",
         "--num-gpus-per-learner=1",
     ],
-    main = "tuned_examples/sac/pendulum_sac.py",
+    main = "examples/algorithms/sac/pendulum_sac.py",
     tags = [
         "exclusive",
         "gpu",
@@ -1898,12 +1896,12 @@ py_test(
 py_test(
     name = "learning_tests_pendulum_sac_multi_cpu",
     size = "large",
-    srcs = ["tuned_examples/sac/pendulum_sac.py"],
+    srcs = ["examples/algorithms/sac/pendulum_sac.py"],
     args = [
         "--as-test",
         "--num-learners=2",
     ],
-    main = "tuned_examples/sac/pendulum_sac.py",
+    main = "examples/algorithms/sac/pendulum_sac.py",
     tags = [
         "exclusive",
         "learning_tests",
@@ -1916,13 +1914,13 @@ py_test(
 py_test(
     name = "learning_tests_pendulum_sac_multi_gpu",
     size = "large",
-    srcs = ["tuned_examples/sac/pendulum_sac.py"],
+    srcs = ["examples/algorithms/sac/pendulum_sac.py"],
     args = [
         "--as-test",
         "--num-learners=2",
         "--num-gpus-per-learner=1",
     ],
-    main = "tuned_examples/sac/pendulum_sac.py",
+    main = "examples/algorithms/sac/pendulum_sac.py",
     tags = [
         "exclusive",
         "learning_tests",
@@ -1937,13 +1935,13 @@ py_test(
 py_test(
     name = "learning_tests_multi_agent_pendulum_sac",
     size = "large",
-    srcs = ["tuned_examples/sac/multi_agent_pendulum_sac.py"],
+    srcs = ["examples/algorithms/sac/multi_agent_pendulum_sac.py"],
     args = [
         "--as-test",
         "--num-agents=2",
         "--num-cpus=4",
     ],
-    main = "tuned_examples/sac/multi_agent_pendulum_sac.py",
+    main = "examples/algorithms/sac/multi_agent_pendulum_sac.py",
     tags = [
         "exclusive",
         "learning_tests",
@@ -1956,7 +1954,7 @@ py_test(
 py_test(
     name = "learning_tests_multi_agent_pendulum_sac_gpu",
     size = "large",
-    srcs = ["tuned_examples/sac/multi_agent_pendulum_sac.py"],
+    srcs = ["examples/algorithms/sac/multi_agent_pendulum_sac.py"],
     args = [
         "--as-test",
         "--num-agents=2",
@@ -1964,7 +1962,7 @@ py_test(
         "--num-learners=1",
         "--num-gpus-per-learner=1",
     ],
-    main = "tuned_examples/sac/multi_agent_pendulum_sac.py",
+    main = "examples/algorithms/sac/multi_agent_pendulum_sac.py",
     tags = [
         "exclusive",
         "gpu",
@@ -1978,12 +1976,12 @@ py_test(
 py_test(
     name = "learning_tests_multi_agent_pendulum_sac_multi_cpu",
     size = "large",
-    srcs = ["tuned_examples/sac/multi_agent_pendulum_sac.py"],
+    srcs = ["examples/algorithms/sac/multi_agent_pendulum_sac.py"],
     args = [
         "--num-agents=2",
         "--num-learners=2",
     ],
-    main = "tuned_examples/sac/multi_agent_pendulum_sac.py",
+    main = "examples/algorithms/sac/multi_agent_pendulum_sac.py",
     tags = [
         "exclusive",
         "learning_tests",
@@ -1996,13 +1994,13 @@ py_test(
 py_test(
     name = "learning_tests_multi_agent_pendulum_sac_multi_gpu",
     size = "large",
-    srcs = ["tuned_examples/sac/multi_agent_pendulum_sac.py"],
+    srcs = ["examples/algorithms/sac/multi_agent_pendulum_sac.py"],
     args = [
         "--num-agents=2",
         "--num-learners=2",
         "--num-gpus-per-learner=1",
     ],
-    main = "tuned_examples/sac/multi_agent_pendulum_sac.py",
+    main = "examples/algorithms/sac/multi_agent_pendulum_sac.py",
     tags = [
         "exclusive",
         "learning_tests",

From 5100f821ad91303e965b4b542ff9fed81b0fe36f Mon Sep 17 00:00:00 2001
From: Mark Towers <mark@anyscale.com>
Date: Fri, 21 Nov 2025 18:12:06 +0000
Subject: [PATCH 3/9] Gemini review

Signed-off-by: Mark Towers <mark@anyscale.com>
---
 rllib/BUILD.bazel | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/rllib/BUILD.bazel b/rllib/BUILD.bazel
index df13bc2836e0..6d5484d44827 100644
--- a/rllib/BUILD.bazel
+++ b/rllib/BUILD.bazel
@@ -497,8 +497,8 @@ py_test(
     name = "learning_tests_multi_agent_cartpole_w_100_policies_appo_old_api_stack",
     size = "large",
     srcs = ["tests/run_regression_tests.py"],
-    args = ["--dir=examples/algorithms/appo"],
-    data = ["examples/algorithms/appo/multi-agent-cartpole-w-100-policies-appo.py"],
+    args = ["--dir=examples/_old_api_stack/algorithms/"],
+    data = ["examples/_old_api_stack/algorithms/multi-agent-cartpole-w-100-policies-appo.py"],
     main = "tests/run_regression_tests.py",
     tags = [
         "exclusive",
@@ -848,7 +848,7 @@ py_test(
 #        "--num-gpus-per-learner=1",
 #        "--num-learners=1",
 #    ],
-#    main = "examples/algorithms/marwil/cartpole_marwil.py",
+#    main = "examples/algorithms/dreamerv3/pendulum_dreamerv3.py",
 #    tags = [
 #        "exclusive",
 #        "gpu",

From a0dd0c3286fbfb1d441fff11a01116173413ec78 Mon Sep 17 00:00:00 2001
From: Mark Towers <mark@anyscale.com>
Date: Mon, 24 Nov 2025 11:06:37 +0000
Subject: [PATCH 4/9] update offline data path

Signed-off-by: Mark Towers <mark@anyscale.com>
---
 rllib/examples/algorithms/bc/cartpole_bc.py                  | 5 ++---
 .../algorithms/bc/cartpole_bc_with_offline_evaluation.py     | 4 ++--
 rllib/examples/algorithms/bc/pendulum_bc.py                  | 4 ++--
 rllib/examples/algorithms/cql/pendulum_cql.py                | 4 ++--
 rllib/examples/algorithms/iql/pendulum_iql.py                | 4 ++--
 rllib/examples/algorithms/marwil/cartpole_marwil.py          | 4 ++--
 6 files changed, 12 insertions(+), 13 deletions(-)

diff --git a/rllib/examples/algorithms/bc/cartpole_bc.py b/rllib/examples/algorithms/bc/cartpole_bc.py
index cb2e3ee9d074..57618f0739e7 100644
--- a/rllib/examples/algorithms/bc/cartpole_bc.py
+++ b/rllib/examples/algorithms/bc/cartpole_bc.py
@@ -24,9 +24,8 @@
 ), "This tuned example works only with `CartPole-v1`."
 
 # Define the data paths.
-data_path = "tests/data/cartpole/cartpole-v1_large"
-base_path = Path(__file__).parents[2]
-print(f"base_path={base_path}")
+data_path = "offline/tests/data/cartpole/cartpole-v1_large"
+base_path = Path(__file__).parents[3]
 data_path = "local://" / base_path / data_path
 print(f"data_path={data_path}")
 
diff --git a/rllib/examples/algorithms/bc/cartpole_bc_with_offline_evaluation.py b/rllib/examples/algorithms/bc/cartpole_bc_with_offline_evaluation.py
index 5f8d53865820..30a2d4bb5a1e 100644
--- a/rllib/examples/algorithms/bc/cartpole_bc_with_offline_evaluation.py
+++ b/rllib/examples/algorithms/bc/cartpole_bc_with_offline_evaluation.py
@@ -50,8 +50,8 @@
 ), "This tuned example works only with `CartPole-v1`."
 
 # Define the data paths.
-data_path = "tests/data/cartpole/cartpole-v1_large"
-base_path = Path(__file__).parents[2]
+data_path = "offline/tests/data/cartpole/cartpole-v1_large"
+base_path = Path(__file__).parents[3]
 print(f"base_path={base_path}")
 data_path = "local://" / base_path / data_path
 print(f"data_path={data_path}")
diff --git a/rllib/examples/algorithms/bc/pendulum_bc.py b/rllib/examples/algorithms/bc/pendulum_bc.py
index 28fb7c8f184e..b4417949d906 100644
--- a/rllib/examples/algorithms/bc/pendulum_bc.py
+++ b/rllib/examples/algorithms/bc/pendulum_bc.py
@@ -23,8 +23,8 @@
 ), "This tuned example works only with `Pendulum-v1`."
 
 # Define the data paths.
-data_path = "tests/data/pendulum/pendulum-v1_large"
-base_path = Path(__file__).parents[2]
+data_path = "offline/tests/data/pendulum/pendulum-v1_large"
+base_path = Path(__file__).parents[3]
 print(f"base_path={base_path}")
 data_path = "local://" / base_path / data_path
 print(f"data_path={data_path}")
diff --git a/rllib/examples/algorithms/cql/pendulum_cql.py b/rllib/examples/algorithms/cql/pendulum_cql.py
index 391e7a7376d0..984c3626fae9 100644
--- a/rllib/examples/algorithms/cql/pendulum_cql.py
+++ b/rllib/examples/algorithms/cql/pendulum_cql.py
@@ -23,10 +23,10 @@
 ), "This tuned example works only with `Pendulum-v1`."
 
 # Define the base path relative to this file.
-base_path = Path(__file__).parents[2]
+base_path = Path(__file__).parents[3]
 # Use the larger data set of Pendulum we have. Note, these are
 # parquet data, the default in `AlgorithmConfig.offline_data`.
-data_path = base_path / "tests/data/pendulum/pendulum-v1_enormous"
+data_path = base_path / "offline/tests/data/pendulum/pendulum-v1_enormous"
 
 # Define the configuration.
 config = (
diff --git a/rllib/examples/algorithms/iql/pendulum_iql.py b/rllib/examples/algorithms/iql/pendulum_iql.py
index 6b5fd07e8f2c..eea94390d0fb 100644
--- a/rllib/examples/algorithms/iql/pendulum_iql.py
+++ b/rllib/examples/algorithms/iql/pendulum_iql.py
@@ -23,8 +23,8 @@
 ), "This tuned example works only with `Pendulum-v1`."
 
 # Define the data paths.
-data_path = "tests/data/pendulum/pendulum-v1_enormous"
-base_path = Path(__file__).parents[2]
+data_path = "offline/tests/data/pendulum/pendulum-v1_enormous"
+base_path = Path(__file__).parents[3]
 print(f"base_path={base_path}")
 data_path = "local://" / base_path / data_path
 print(f"data_path={data_path}")
diff --git a/rllib/examples/algorithms/marwil/cartpole_marwil.py b/rllib/examples/algorithms/marwil/cartpole_marwil.py
index c758bae0f238..dd20d9aadcb7 100644
--- a/rllib/examples/algorithms/marwil/cartpole_marwil.py
+++ b/rllib/examples/algorithms/marwil/cartpole_marwil.py
@@ -23,8 +23,8 @@
 ), "This tuned example works only with `CartPole-v1`."
 
 # Define the data paths.
-data_path = "tests/data/cartpole/cartpole-v1_large"
-base_path = Path(__file__).parents[2]
+data_path = "offline/tests/data/cartpole/cartpole-v1_large"
+base_path = Path(__file__).parents[3]
 print(f"base_path={base_path}")
 data_path = "local://" / base_path / data_path
 print(f"data_path={data_path}")

From 5a4d28b4716edc70940625acf697511a1a86a6f3 Mon Sep 17 00:00:00 2001
From: Mark Towers <mark@anyscale.com>
Date: Mon, 24 Nov 2025 11:13:06 +0000
Subject: [PATCH 5/9] update tuned_example file paths

Signed-off-by: Mark Towers <mark@anyscale.com>
---
 rllib/algorithms/dreamerv3/README.md               | 14 +++++++-------
 rllib/benchmarks/ppo/benchmark_atari_ppo.py        |  2 +-
 .../_old_api_stack/algorithms/pendulum-cql.yaml    |  2 +-
 rllib/examples/multi_agent/self_play_footsies.py   |  4 ++--
 rllib/examples/offline_rl/custom_input_api.py      |  2 +-
 rllib/examples/offline_rl/offline_rl.py            |  4 ++--
 rllib/utils/tests/run_memory_leak_tests.py         |  4 ++--
 7 files changed, 16 insertions(+), 16 deletions(-)

diff --git a/rllib/algorithms/dreamerv3/README.md b/rllib/algorithms/dreamerv3/README.md
index 8db9fcbae9f1..f36de087c00d 100644
--- a/rllib/algorithms/dreamerv3/README.md
+++ b/rllib/algorithms/dreamerv3/README.md
@@ -42,18 +42,18 @@ Here are some examples on how to set these config settings within your `DreamerV
 [documentation page here](https://docs.ray.io/en/latest/rllib/index.html#rllib-in-60-seconds).
 
 Use the config examples and templates in the
-[tuned_examples folder](../../tuned_examples/dreamerv3)
+[examples folder](../../examples/algorithms/dreamerv3)
 in combination with the following scripts and command lines in order to run RLlib's DreamerV3 algorithm in your experiments:
 
-### [Atari100k](../../tuned_examples/dreamerv3/atari_100k_dreamerv3.py)
+### [Atari100k](../../examples/algorithms/dreamerv3/atari_100k_dreamerv3.py)
 ```shell
-$ cd ray/rllib/tuned_examples/dreamerv3/
+$ cd ray/rllib/examples/algorithms/dreamerv3/
 $ python atari_100k_dreamerv3.py --env ale_py:ALE/Pong-v5
 ```
 
-### [DeepMind Control Suite (vision)](../../tuned_examples/dreamerv3/dm_control_suite_vision_dreamerv3.py)
+### [DeepMind Control Suite (vision)](../../examples/algorithms/dreamerv3/dm_control_suite_vision_dreamerv3.py)
 ```shell
-$ cd ray/rllib/tuned_examples/dreamerv3/
+$ cd ray/rllib/examples/algorithms/dreamerv3/
 $ python dm_control_suite_vision_dreamerv3.py --env DMC/cartpole/swingup
 ```
 Other `--env` options for the DM Control Suite would be `--env DMC/hopper/hop`, `--env DMC/walker/walk`, etc..
@@ -122,8 +122,8 @@ $ python flappy_bird.py
 ```
 
 This should be it. Feel free to try out running this on multiple GPUs using these
-more advanced config examples [here (Atari100k)](../../tuned_examples/dreamerv3/atari_100k_dreamerv3.py) and
-[here (DM Control Suite)](../../tuned_examples/dreamerv3/dm_control_suite_vision_dreamerv3.py).
+more advanced config examples [here (Atari100k)](../../examples/algorithms/dreamerv3/atari_100k_dreamerv3.py) and
+[here (DM Control Suite)](../../examples/algorithms/dreamerv3/dm_control_suite_vision_dreamerv3.py).
 Also see the notes below on good recipes for running on multiple GPUs.
 
 <b>IMPORTANT:</b> DreamerV3 out-of-the-box only supports image observation spaces of
diff --git a/rllib/benchmarks/ppo/benchmark_atari_ppo.py b/rllib/benchmarks/ppo/benchmark_atari_ppo.py
index d62e18b01407..ad8f05691fb7 100644
--- a/rllib/benchmarks/ppo/benchmark_atari_ppo.py
+++ b/rllib/benchmarks/ppo/benchmark_atari_ppo.py
@@ -96,7 +96,7 @@
     # Compile the base command running the actual `tuned_example` script.
     base_commands = [
         "python",
-        "../../tuned_examples/ppo/atari_ppo.py",
+        "../../examples/algorithms/ppo/atari_ppo.py",
         f"--num-env-runners={args.num_env_runners}" if args.num_env_runners else "",
         f"--num-learners={args.num_learners}",
         f"--num-gpus-per-learner={args.num_gpus_per_learner}",
diff --git a/rllib/examples/_old_api_stack/algorithms/pendulum-cql.yaml b/rllib/examples/_old_api_stack/algorithms/pendulum-cql.yaml
index 6858c17b3cb2..fee655b012a5 100644
--- a/rllib/examples/_old_api_stack/algorithms/pendulum-cql.yaml
+++ b/rllib/examples/_old_api_stack/algorithms/pendulum-cql.yaml
@@ -1,6 +1,6 @@
 # @OldAPIStack
 # Given a SAC-generated offline file generated via:
-# rllib train -f tuned_examples/sac/pendulum-sac.yaml --no-ray-ui
+# rllib train -f examples/algorithms/sac/pendulum-sac.yaml --no-ray-ui
 
 # Pendulum CQL can attain ~ -300 reward in 10k from that file.
 pendulum-cql:
diff --git a/rllib/examples/multi_agent/self_play_footsies.py b/rllib/examples/multi_agent/self_play_footsies.py
index 2cc5213eced2..a641262c7c48 100644
--- a/rllib/examples/multi_agent/self_play_footsies.py
+++ b/rllib/examples/multi_agent/self_play_footsies.py
@@ -2,14 +2,14 @@
 Multi-agent RLlib Footsies Simplified Example (PPO)
 
 About:
-    - This example as a simplified version of "rllib/tuned_examples/ppo/multi_agent_footsies_ppo.py",
+    - This example as a simplified version of "rllib/examples/ppo/multi_agent_footsies_ppo.py",
       which has more detailed comments and instructions. Please refer to that example for more information.
     - This example is created to test the self-play training progression with footsies.
     - Simplified version runs with single learner (cpu), single env runner, and single eval env runner.
 """
 from pathlib import Path
 
-from ray.rllib.tuned_examples.ppo.multi_agent_footsies_ppo import (
+from ray.rllib.examples.ppo.multi_agent_footsies_ppo import (
     config,
     env_creator,
     stop,
diff --git a/rllib/examples/offline_rl/custom_input_api.py b/rllib/examples/offline_rl/custom_input_api.py
index d6fd2f6c1d1d..3cc7ac4f8c60 100644
--- a/rllib/examples/offline_rl/custom_input_api.py
+++ b/rllib/examples/offline_rl/custom_input_api.py
@@ -87,7 +87,7 @@ def input_creator(ioctx: IOContext) -> InputReader:
     # we register our custom input creator with this convenient function
     register_input("custom_input", input_creator)
 
-    # Config modified from rllib/tuned_examples/cql/pendulum-cql.yaml
+    # Config modified from rllib/examples/algorithms/cql/pendulum-cql.yaml
     default_config = get_trainable_cls(args.run).get_default_config()
     config = (
         default_config.environment("Pendulum-v1", clip_actions=True)
diff --git a/rllib/examples/offline_rl/offline_rl.py b/rllib/examples/offline_rl/offline_rl.py
index b4bf817300df..80d71891ef65 100644
--- a/rllib/examples/offline_rl/offline_rl.py
+++ b/rllib/examples/offline_rl/offline_rl.py
@@ -9,7 +9,7 @@
 Generate the offline json file by running an SAC algo until it reaches expert
 level on your command line. For example:
 $ cd ray
-$ rllib train -f rllib/tuned_examples/sac/pendulum-sac.yaml --no-ray-ui
+$ rllib train -f rllib/examples/algorithms/sac/pendulum-sac.yaml --no-ray-ui
 
 Also make sure that in the above SAC yaml file (pendulum-sac.yaml),
 you specify an additional "output" key with any path on your local
@@ -55,7 +55,7 @@
 if __name__ == "__main__":
     args = parser.parse_args()
 
-    # See rllib/tuned_examples/cql/pendulum-cql.yaml for comparison.
+    # See rllib/examples/algorithms/cql/pendulum-cql.yaml for comparison.
     config = (
         cql.CQLConfig()
         .api_stack(
diff --git a/rllib/utils/tests/run_memory_leak_tests.py b/rllib/utils/tests/run_memory_leak_tests.py
index 8685049fb03d..598026d76a38 100644
--- a/rllib/utils/tests/run_memory_leak_tests.py
+++ b/rllib/utils/tests/run_memory_leak_tests.py
@@ -11,9 +11,9 @@
 #     tags = ["memory_leak_tests"],
 #     size = "medium",  # 5min timeout
 #     srcs = ["tests/test_memory_leak.py"],
-#     data = glob(["tuned_examples/ppo/*.yaml"]),
+#     data = glob(["examples/algorithms/ppo/*.yaml"]),
 #     # Pass `BAZEL` option and the path to look for yaml files.
-#     args = ["BAZEL", "tuned_examples/ppo/memory-leak-test-ppo.yaml"]
+#     args = ["BAZEL", "examples/algorithms/ppo/memory-leak-test-ppo.yaml"]
 # )
 
 import argparse

From 0f2d5bd4349b1ae378b12bf049b1eb953d18ba82 Mon Sep 17 00:00:00 2001
From: Mark Towers <mark@anyscale.com>
Date: Mon, 24 Nov 2025 12:38:07 +0000
Subject: [PATCH 6/9] Fix file paths

Signed-off-by: Mark Towers <mark@anyscale.com>
---
 rllib/examples/algorithms/bc/cartpole_bc.py                     | 2 +-
 .../algorithms/bc/cartpole_bc_with_offline_evaluation.py        | 2 +-
 rllib/examples/algorithms/bc/pendulum_bc.py                     | 2 +-
 rllib/examples/algorithms/cql/pendulum_cql.py                   | 2 +-
 rllib/examples/algorithms/iql/pendulum_iql.py                   | 2 +-
 rllib/examples/algorithms/marwil/cartpole_marwil.py             | 2 +-
 rllib/examples/multi_agent/self_play_footsies.py                | 2 +-
 7 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/rllib/examples/algorithms/bc/cartpole_bc.py b/rllib/examples/algorithms/bc/cartpole_bc.py
index 57618f0739e7..076a045f1e7b 100644
--- a/rllib/examples/algorithms/bc/cartpole_bc.py
+++ b/rllib/examples/algorithms/bc/cartpole_bc.py
@@ -24,7 +24,7 @@
 ), "This tuned example works only with `CartPole-v1`."
 
 # Define the data paths.
-data_path = "offline/tests/data/cartpole/cartpole-v1_large"
+data_path = "tests/data/cartpole/cartpole-v1_large"
 base_path = Path(__file__).parents[3]
 data_path = "local://" / base_path / data_path
 print(f"data_path={data_path}")
diff --git a/rllib/examples/algorithms/bc/cartpole_bc_with_offline_evaluation.py b/rllib/examples/algorithms/bc/cartpole_bc_with_offline_evaluation.py
index 30a2d4bb5a1e..a18a088cf8df 100644
--- a/rllib/examples/algorithms/bc/cartpole_bc_with_offline_evaluation.py
+++ b/rllib/examples/algorithms/bc/cartpole_bc_with_offline_evaluation.py
@@ -50,7 +50,7 @@
 ), "This tuned example works only with `CartPole-v1`."
 
 # Define the data paths.
-data_path = "offline/tests/data/cartpole/cartpole-v1_large"
+data_path = "tests/data/cartpole/cartpole-v1_large"
 base_path = Path(__file__).parents[3]
 print(f"base_path={base_path}")
 data_path = "local://" / base_path / data_path
diff --git a/rllib/examples/algorithms/bc/pendulum_bc.py b/rllib/examples/algorithms/bc/pendulum_bc.py
index b4417949d906..a262e7aa79e4 100644
--- a/rllib/examples/algorithms/bc/pendulum_bc.py
+++ b/rllib/examples/algorithms/bc/pendulum_bc.py
@@ -23,7 +23,7 @@
 ), "This tuned example works only with `Pendulum-v1`."
 
 # Define the data paths.
-data_path = "offline/tests/data/pendulum/pendulum-v1_large"
+data_path = "tests/data/pendulum/pendulum-v1_large"
 base_path = Path(__file__).parents[3]
 print(f"base_path={base_path}")
 data_path = "local://" / base_path / data_path
diff --git a/rllib/examples/algorithms/cql/pendulum_cql.py b/rllib/examples/algorithms/cql/pendulum_cql.py
index 984c3626fae9..722bb28a4cd6 100644
--- a/rllib/examples/algorithms/cql/pendulum_cql.py
+++ b/rllib/examples/algorithms/cql/pendulum_cql.py
@@ -26,7 +26,7 @@
 base_path = Path(__file__).parents[3]
 # Use the larger data set of Pendulum we have. Note, these are
 # parquet data, the default in `AlgorithmConfig.offline_data`.
-data_path = base_path / "offline/tests/data/pendulum/pendulum-v1_enormous"
+data_path = base_path / "tests/data/pendulum/pendulum-v1_enormous"
 
 # Define the configuration.
 config = (
diff --git a/rllib/examples/algorithms/iql/pendulum_iql.py b/rllib/examples/algorithms/iql/pendulum_iql.py
index eea94390d0fb..720edc434923 100644
--- a/rllib/examples/algorithms/iql/pendulum_iql.py
+++ b/rllib/examples/algorithms/iql/pendulum_iql.py
@@ -23,7 +23,7 @@
 ), "This tuned example works only with `Pendulum-v1`."
 
 # Define the data paths.
-data_path = "offline/tests/data/pendulum/pendulum-v1_enormous"
+data_path = "tests/data/pendulum/pendulum-v1_enormous"
 base_path = Path(__file__).parents[3]
 print(f"base_path={base_path}")
 data_path = "local://" / base_path / data_path
diff --git a/rllib/examples/algorithms/marwil/cartpole_marwil.py b/rllib/examples/algorithms/marwil/cartpole_marwil.py
index dd20d9aadcb7..e8cad25ac06d 100644
--- a/rllib/examples/algorithms/marwil/cartpole_marwil.py
+++ b/rllib/examples/algorithms/marwil/cartpole_marwil.py
@@ -23,7 +23,7 @@
 ), "This tuned example works only with `CartPole-v1`."
 
 # Define the data paths.
-data_path = "offline/tests/data/cartpole/cartpole-v1_large"
+data_path = "tests/data/cartpole/cartpole-v1_large"
 base_path = Path(__file__).parents[3]
 print(f"base_path={base_path}")
 data_path = "local://" / base_path / data_path
diff --git a/rllib/examples/multi_agent/self_play_footsies.py b/rllib/examples/multi_agent/self_play_footsies.py
index a641262c7c48..da9047aac019 100644
--- a/rllib/examples/multi_agent/self_play_footsies.py
+++ b/rllib/examples/multi_agent/self_play_footsies.py
@@ -9,7 +9,7 @@
 """
 from pathlib import Path
 
-from ray.rllib.examples.ppo.multi_agent_footsies_ppo import (
+from ray.rllib.examples.algorithms.ppo.multi_agent_footsies_ppo import (
     config,
     env_creator,
     stop,

From dc3be81a50799315887453aeca29a14ce80b09a0 Mon Sep 17 00:00:00 2001
From: Mark Towers <mark@anyscale.com>
Date: Wed, 26 Nov 2025 11:18:15 +0000
Subject: [PATCH 7/9] Update rllib release test directory and release test
 paths

Signed-off-by: Mark Towers <mark@anyscale.com>
---
 release/release_tests.yaml             | 4 ++--
 release/rllib_tests/example_algorithms | 1 +
 release/rllib_tests/tuned_examples     | 1 -
 3 files changed, 3 insertions(+), 3 deletions(-)
 create mode 120000 release/rllib_tests/example_algorithms
 delete mode 120000 release/rllib_tests/tuned_examples

diff --git a/release/release_tests.yaml b/release/release_tests.yaml
index e9641c7ed77e..408ba58a401e 100644
--- a/release/release_tests.yaml
+++ b/release/release_tests.yaml
@@ -2029,7 +2029,7 @@
 
   run:
     timeout: 1500  # expected 1000 seconds
-    script: python tuned_examples/appo/pong_appo.py --num-learners=1 --num-env-runners=12 --as-release-test
+    script: python example_algorithms/appo/pong_appo.py --num-learners=1 --num-env-runners=12 --as-release-test
 
 - name: rllib_learning_tests_halfcheetah_appo_torch
   python: "3.12"
@@ -2047,7 +2047,7 @@
 
   run:
     timeout: 3000  # expected 2000 seconds
-    script: python tuned_examples/appo/halfcheetah_appo.py --num-learners=1 --num-env-runners=12 --as-release-test
+    script: python example_algorithms/appo/halfcheetah_appo.py --num-learners=1 --num-env-runners=12 --as-release-test
 
 ########################
 # Core Nightly Tests
diff --git a/release/rllib_tests/example_algorithms b/release/rllib_tests/example_algorithms
new file mode 120000
index 000000000000..64b332356b30
--- /dev/null
+++ b/release/rllib_tests/example_algorithms
@@ -0,0 +1 @@
+../../rllib/examples/algorithms
\ No newline at end of file
diff --git a/release/rllib_tests/tuned_examples b/release/rllib_tests/tuned_examples
deleted file mode 120000
index 6258eb4f971f..000000000000
--- a/release/rllib_tests/tuned_examples
+++ /dev/null
@@ -1 +0,0 @@
-../../rllib/tuned_examples
\ No newline at end of file

From 1250bc06a31fffbda81e6adaa4bab97286aeb9f3 Mon Sep 17 00:00:00 2001
From: Mark Towers <mark@anyscale.com>
Date: Wed, 26 Nov 2025 13:55:08 +0000
Subject: [PATCH 8/9] remove regression test

Signed-off-by: Mark Towers <mark@anyscale.com>
---
 rllib/BUILD.bazel | 17 -----------------
 1 file changed, 17 deletions(-)

diff --git a/rllib/BUILD.bazel b/rllib/BUILD.bazel
index 0d686c303adb..29947e6728fd 100644
--- a/rllib/BUILD.bazel
+++ b/rllib/BUILD.bazel
@@ -481,23 +481,6 @@ py_test(
     ],
 )
 
-#@OldAPIStack
-py_test(
-    name = "learning_tests_multi_agent_cartpole_w_100_policies_appo_old_api_stack",
-    size = "large",
-    srcs = ["algorithms/tests/run_regression_tests.py"],
-    args = ["--dir=examples/_old_api_stack/algorithms/"],
-    data = ["examples/_old_api_stack/algorithms/multi-agent-cartpole-w-100-policies-appo.py"],
-    main = "talgorithms/ests/run_regression_tests.py",
-    tags = [
-        "exclusive",
-        "learning_tests",
-        "learning_tests_discrete",
-        "learning_tests_pytorch_use_all_core",
-        "team:rllib",
-    ],
-)
-
 # BC
 # CartPole
 py_test(

From 9f710e2a9fac2190e2f19e53794b7b1df8e43405 Mon Sep 17 00:00:00 2001
From: Mark Towers <mark@anyscale.com>
Date: Wed, 26 Nov 2025 17:45:53 +0000
Subject: [PATCH 9/9] Remove commented code

Signed-off-by: Mark Towers <mark@anyscale.com>
---
 rllib/BUILD.bazel | 92 +----------------------------------------------
 1 file changed, 1 insertion(+), 91 deletions(-)

diff --git a/rllib/BUILD.bazel b/rllib/BUILD.bazel
index 29947e6728fd..f2a2ef3d2a94 100644
--- a/rllib/BUILD.bazel
+++ b/rllib/BUILD.bazel
@@ -148,7 +148,7 @@ py_test(
 # Tag: learning_tests
 #
 # This will test python/yaml config files
-# inside rllib/examples/algorithm/[algo-name] for actual learning success.
+# inside rllib/examples/algorithms/[algo-name] for actual learning success.
 # --------------------------------------------------------------------
 
 # APPO
@@ -172,15 +172,6 @@ py_test(
     ],
 )
 
-# TODO (sven): For some weird reason, this test runs extremely slow on the CI (not on cluster, not locally) -> taking this out for now ...
-# py_test(
-#    name = "learning_tests_cartpole_appo_gpu",
-#    main = "examples/algorithms/appo/cartpole_appo.py",
-#    tags = ["team:rllib", "exclusive", "learning_tests", "torch_only", "learning_tests_discrete", "learning_tests_pytorch_use_all_core", "gpu"],
-#    size = "large",
-#    srcs = ["examples/algorithms/appo/cartpole_appo.py"],
-#    args = ["--as-test", "--num-gpus-per-learner=1", "--num-cpus=7", "--num-env-runners=5"]
-# )
 py_test(
     name = "learning_tests_cartpole_appo_multi_cpu",
     size = "large",
@@ -405,39 +396,6 @@ py_test(
     ],
 )
 
-# MultiAgentStatelessCartPole
-# py_test(
-#     name = "learning_tests_multi_agent_stateless_cartpole_appo",
-#     main = "examples/algorithms/appo/multi_agent_stateless_cartpole_appo.py",
-#     tags = ["team:rllib", "exclusive", "learning_tests", "torch_only", "learning_tests_discrete", "learning_tests_pytorch_use_all_core"],
-#     size = "large",
-#     srcs = ["examples/algorithms/appo/multi_agent_stateless_cartpole_appo.py"],
-#     args = ["--as-test"]
-# )
-# py_test(
-#     name = "learning_tests_multi_agent_stateless_cartpole_appo_gpu",
-#     main = "examples/algorithms/appo/multi_agent_stateless_cartpole_appo.py",
-#     tags = ["team:rllib", "exclusive", "learning_tests", "torch_only", "learning_tests_discrete", "learning_tests_pytorch_use_all_core", "gpu"],
-#     size = "large",
-#     srcs = ["examples/algorithms/appo/multi_agent_stateless_cartpole_appo.py"],
-#     args = ["--as-test", "--num-agents=2", "--num-gpus-per-learner=1"]
-# )
-# py_test(
-#     name = "learning_tests_multi_agent_stateless_cartpole_appo_multi_cpu",
-#     main = "examples/algorithms/appo/multi_agent_stateless_cartpole_appo.py",
-#     tags = ["team:rllib", "exclusive", "learning_tests", "torch_only", "learning_tests_discrete", "learning_tests_pytorch_use_all_core"],
-#     size = "large",
-#     srcs = ["examples/algorithms/appo/multi_agent_stateless_cartpole_appo.py"],
-#     args = ["--as-test", "--num-learners=2"]
-# )
-# py_test(
-#     name = "learning_tests_multi_agent_stateless_cartpole_appo_multi_gpu",
-#     main = "examples/algorithms/appo/multi_agent_stateless_cartpole_appo.py",
-#     tags = ["team:rllib", "exclusive", "learning_tests", "torch_only", "learning_tests_discrete", "learning_tests_pytorch_use_all_core", "multi_gpu"],
-#     size = "large",
-#     srcs = ["examples/algorithms/appo/multi_agent_stateless_cartpole_appo.py"],
-#     args = ["--as-test", "--num-learners=2", "--num-gpus-per-learner=1"]
-# )
 # Pendulum
 py_test(
     name = "learning_tests_pendulum_appo",
@@ -808,31 +766,6 @@ py_test(
     ],
 )
 
-# DreamerV3
-# takes too long (up to 20-30min to learn -200 on 1 GPU)
-# Pendulum
-# py_test(
-#    name = "learning_tests_pendulum_dreamerv3_gpu",
-#    size = "large",
-#    srcs = ["examples/algorithms/dreamerv3/pendulum_dreamerv3.py"],
-#    args = [
-#        "--as-test",
-#        "--num-gpus-per-learner=1",
-#        "--num-learners=1",
-#        "--num-env-runners=4",
-#    ],
-#    main = "examples/algorithms/dreamerv3/pendulum_dreamerv3.py",
-#    tags = [
-#        "exclusive",
-#        "gpu",
-#        "learning_tests",
-#        "learning_tests_continuous",
-#        "learning_tests_pytorch_use_all_core",
-#        "team:rllib",
-#        "torch_only",
-#    ],
-# )
-
 # IMPALA
 # CartPole
 py_test(
@@ -1057,14 +990,6 @@ py_test(
         "torch_only",
     ],
 )
-# py_test(
-#    name = "learning_tests_multi_agent_stateless_cartpole_impala_multi_gpu",
-#    main = "examples/algorithms/impala/multi_agent_stateless_cartpole_impala.py",
-#    tags = ["team:rllib", "exclusive", "learning_tests", "torch_only", "learning_tests_discrete", "learning_tests_pytorch_use_all_core", "multi_gpu"],
-#    size = "large",
-#    srcs = ["examples/algorithms/impala/multi_agent_stateless_cartpole_impala.py"],
-#    args = ["--as-test", "--num-learners=2", "--num-gpus-per-learner=1"]
-# )
 
 # IQL
 # Pendulum-v1 (enormous)
@@ -2409,21 +2334,6 @@ py_test(
     ],
 )
 
-# --------------------------------------------------------------------
-# ConnectorV2 tests
-# rllib/connector/
-#
-# Tag: connector_v2
-# --------------------------------------------------------------------
-
-# TODO (sven): Add these tests in a separate PR.
-# py_test(
-#    name = "connectors/tests/test_connector_v2",
-#    tags = ["team:rllib", "connector_v2"],
-#    size = "small",
-#    srcs = ["connectors/tests/test_connector_v2.py"]
-# )
-
 # --------------------------------------------------------------------
 # Env tests
 # rllib/env/