From 75bdc1d44df113c34d851505dc1f7a5fd7850748 Mon Sep 17 00:00:00 2001 From: Mark Towers Date: Fri, 21 Nov 2025 17:06:44 +0000 Subject: [PATCH 1/9] [rllib] Merge tuned-examples into examples Signed-off-by: Mark Towers --- .../algorithms}/atari-dist-dqn.yaml | 0 .../_old_api_stack/algorithms}/atari-dqn.yaml | 0 .../algorithms}/atari-duel-ddqn.yaml | 0 .../algorithms}/atari-impala-large.yaml | 0 .../algorithms}/atari-impala-multi-gpu.yaml | 0 .../algorithms}/atari-impala.yaml | 0 .../_old_api_stack/algorithms}/atari-sac.yaml | 0 .../cartpole-appo-separate-losses.py | 0 .../algorithms}/cartpole-bc.yaml | 0 ...hing-and-stalling-recreate-workers-appo.py | 0 ...cartpole-crashing-recreate-workers-appo.py | 0 .../algorithms}/cartpole-dqn-fake-gpus.yaml | 0 .../algorithms}/cartpole-dqn-param-noise.yaml | 0 .../algorithms}/cartpole-dqn-softq.yaml | 0 .../algorithms}/cartpole-dqn.yaml | 0 .../algorithms}/cartpole-marwil.yaml | 0 .../algorithms}/cartpole-sac.yaml | 0 .../algorithms}/frozenlake-appo-vtrace.yaml | 0 .../algorithms}/halfcheetah-bc.yaml | 0 .../algorithms}/halfcheetah-cql.yaml | 0 .../algorithms}/halfcheetah-ppo.yaml | 0 .../_old_api_stack/algorithms}/hopper-bc.yaml | 0 .../algorithms}/hopper-cql.yaml | 0 .../algorithms}/hopper-ppo.yaml | 0 .../algorithms}/humanoid-ppo-gae.yaml | 0 .../algorithms}/humanoid-ppo.yaml | 0 .../algorithms}/memory-leak-test-appo.yaml | 0 .../algorithms}/memory-leak-test-dqn.yaml | 0 .../algorithms}/memory-leak-test-ppo.yaml | 0 .../algorithms}/memory-leak-test-sac.yaml | 0 .../algorithms}/mspacman-sac.yaml | 0 ...hing-and-stalling-recreate-workers-appo.py | 0 ...cartpole-crashing-recreate-workers-appo.py | 0 ...ulti-agent-cartpole-w-100-policies-appo.py | 0 ...multi_agent_cartpole_appo_old_api_stack.py | 0 .../algorithms}/pendulum-cql.yaml | 0 .../algorithms}/pendulum-sac.yaml | 0 .../pendulum-transformed-actions-ppo.yaml | 0 .../pendulum-transformed-actions-sac.yaml | 0 .../_old_api_stack/algorithms}/pong-dqn.yaml | 0 .../algorithms}/pong-impala-fast.yaml | 0 .../algorithms}/pong-impala-vectorized.yaml | 0 .../algorithms}/pong-impala.yaml | 0 .../algorithms}/pong-rainbow.yaml | 0 ...unity3d-soccer-strikers-vs-goalie-ppo.yaml | 0 .../algorithms}/walker2d-ppo.yaml | 0 .../algorithms}/appo/cartpole_appo.py | 0 .../algorithms}/appo/halfcheetah_appo.py | 0 .../appo/multi_agent_cartpole_appo.py | 0 .../algorithms}/appo/multi_agent_pong_appo.py | 0 .../multi_agent_stateless_cartpole_appo.py | 0 .../algorithms}/appo/pendulum_appo.py | 0 .../algorithms}/appo/pong_appo.py | 0 .../appo/stateless_cartpole_appo.py | 0 .../bc/benchmark_rlunplugged_atari_pong_bc.py | 0 .../algorithms}/bc/cartpole_bc.py | 0 .../bc/cartpole_bc_with_offline_evaluation.py | 0 .../algorithms}/bc/pendulum_bc.py | 0 .../algorithms}/cql/pendulum_cql.py | 0 .../algorithms}/dqn/benchmark_dqn_atari.py | 0 ...benchmark_dqn_atari_rllib_preprocessing.py | 0 .../algorithms}/dqn/cartpole_dqn.py | 0 .../dqn/multi_agent_cartpole_dqn.py | 0 .../algorithms}/dqn/stateless_cartpole_dqn.py | 0 .../dreamerv3/atari_100k_dreamerv3.py | 0 .../dreamerv3/atari_200M_dreamerv3.py | 0 .../dreamerv3/cartpole_dreamerv3.py | 0 .../dm_control_suite_vision_dreamerv3.py | 0 .../dreamerv3/flappy_bird_dreamerv3.py | 0 .../dreamerv3/frozenlake_2x2_dreamerv3.py | 0 .../frozenlake_4x4_deterministic_dreamerv3.py | 0 .../dreamerv3/gymnasium_robotics_dreamerv3.py | 0 .../dreamerv3/highway_env_dreamerv3.py | 0 .../dreamerv3/pendulum_dreamerv3.py | 0 .../impala/cartpole-impala-separate-losses.py | 0 .../algorithms}/impala/cartpole_impala.py | 0 .../impala/heavy_cartpole_impala.py | 0 .../impala/multi_agent_cartpole_impala.py | 0 ...lti_agent_cartpole_impala_old_api_stack.py | 0 .../multi_agent_stateless_cartpole_impala.py | 0 .../algorithms}/impala/pendulum_impala.py | 0 .../algorithms}/impala/pong_impala.py | 0 .../impala/pong_impala_pb2_hyperopt.py | 0 .../impala/stateless_cartpole_impala.py | 0 .../algorithms}/iql/pendulum_iql.py | 0 .../algorithms}/marwil/cartpole_marwil.py | 0 .../algorithms}/ppo/atari_ppo.py | 0 .../algorithms}/ppo/benchmark_ppo_mujoco.py | 0 .../algorithms}/ppo/cartpole_heavy_ppo.py | 0 .../algorithms}/ppo/cartpole_ppo.py | 0 .../algorithms}/ppo/cartpole_truncated_ppo.py | 0 .../ppo/memory_leak_test_ppo_new_stack.py | 0 .../ppo/multi_agent_cartpole_ppo.py | 0 .../ppo/multi_agent_footsies_ppo.py | 0 .../ppo/multi_agent_pendulum_ppo.py | 0 .../ppo/multi_agent_stateless_cartpole_ppo.py | 0 .../algorithms}/ppo/pendulum_ppo.py | 0 .../algorithms}/ppo/stateless_cartpole_ppo.py | 0 .../algorithms}/sac/benchmark_sac_mujoco.py | 0 .../algorithms}/sac/halfcheetah_sac.py | 0 .../algorithms}/sac/humanoid_sac.py | 0 .../algorithms}/sac/mountaincar_sac.py | 0 .../sac/multi_agent_pendulum_sac.py | 0 .../algorithms}/sac/pendulum_sac.py | 0 rllib/tuned_examples/__init__.py | 0 rllib/tuned_examples/cleanup_experiment.py | 187 ------------------ .../compact-regression-test.yaml | 157 --------------- rllib/tuned_examples/dreamerv3/__init__.py | 0 .../ppo/benchmark_ppo_mujoco_pb2.py | 172 ---------------- .../sac/benchmark_sac_mujoco_pb2.py | 165 ---------------- 110 files changed, 681 deletions(-) rename rllib/{tuned_examples/dqn => examples/_old_api_stack/algorithms}/atari-dist-dqn.yaml (100%) rename rllib/{tuned_examples/dqn => examples/_old_api_stack/algorithms}/atari-dqn.yaml (100%) rename rllib/{tuned_examples/dqn => examples/_old_api_stack/algorithms}/atari-duel-ddqn.yaml (100%) rename rllib/{tuned_examples/impala => examples/_old_api_stack/algorithms}/atari-impala-large.yaml (100%) rename rllib/{tuned_examples/impala => examples/_old_api_stack/algorithms}/atari-impala-multi-gpu.yaml (100%) rename rllib/{tuned_examples/impala => examples/_old_api_stack/algorithms}/atari-impala.yaml (100%) rename rllib/{tuned_examples/sac => examples/_old_api_stack/algorithms}/atari-sac.yaml (100%) rename rllib/{tuned_examples/appo => examples/_old_api_stack/algorithms}/cartpole-appo-separate-losses.py (100%) rename rllib/{tuned_examples/bc => examples/_old_api_stack/algorithms}/cartpole-bc.yaml (100%) rename rllib/{tuned_examples/appo => examples/_old_api_stack/algorithms}/cartpole-crashing-and-stalling-recreate-workers-appo.py (100%) rename rllib/{tuned_examples/appo => examples/_old_api_stack/algorithms}/cartpole-crashing-recreate-workers-appo.py (100%) rename rllib/{tuned_examples/dqn => examples/_old_api_stack/algorithms}/cartpole-dqn-fake-gpus.yaml (100%) rename rllib/{tuned_examples/dqn => examples/_old_api_stack/algorithms}/cartpole-dqn-param-noise.yaml (100%) rename rllib/{tuned_examples/dqn => examples/_old_api_stack/algorithms}/cartpole-dqn-softq.yaml (100%) rename rllib/{tuned_examples/dqn => examples/_old_api_stack/algorithms}/cartpole-dqn.yaml (100%) rename rllib/{tuned_examples/marwil => examples/_old_api_stack/algorithms}/cartpole-marwil.yaml (100%) rename rllib/{tuned_examples/sac => examples/_old_api_stack/algorithms}/cartpole-sac.yaml (100%) rename rllib/{tuned_examples/appo => examples/_old_api_stack/algorithms}/frozenlake-appo-vtrace.yaml (100%) rename rllib/{tuned_examples/cql => examples/_old_api_stack/algorithms}/halfcheetah-bc.yaml (100%) rename rllib/{tuned_examples/cql => examples/_old_api_stack/algorithms}/halfcheetah-cql.yaml (100%) rename rllib/{tuned_examples/ppo => examples/_old_api_stack/algorithms}/halfcheetah-ppo.yaml (100%) rename rllib/{tuned_examples/cql => examples/_old_api_stack/algorithms}/hopper-bc.yaml (100%) rename rllib/{tuned_examples/cql => examples/_old_api_stack/algorithms}/hopper-cql.yaml (100%) rename rllib/{tuned_examples/ppo => examples/_old_api_stack/algorithms}/hopper-ppo.yaml (100%) rename rllib/{tuned_examples/ppo => examples/_old_api_stack/algorithms}/humanoid-ppo-gae.yaml (100%) rename rllib/{tuned_examples/ppo => examples/_old_api_stack/algorithms}/humanoid-ppo.yaml (100%) rename rllib/{tuned_examples/appo => examples/_old_api_stack/algorithms}/memory-leak-test-appo.yaml (100%) rename rllib/{tuned_examples/dqn => examples/_old_api_stack/algorithms}/memory-leak-test-dqn.yaml (100%) rename rllib/{tuned_examples/ppo => examples/_old_api_stack/algorithms}/memory-leak-test-ppo.yaml (100%) rename rllib/{tuned_examples/sac => examples/_old_api_stack/algorithms}/memory-leak-test-sac.yaml (100%) rename rllib/{tuned_examples/sac => examples/_old_api_stack/algorithms}/mspacman-sac.yaml (100%) rename rllib/{tuned_examples/appo => examples/_old_api_stack/algorithms}/multi-agent-cartpole-crashing-and-stalling-recreate-workers-appo.py (100%) rename rllib/{tuned_examples/appo => examples/_old_api_stack/algorithms}/multi-agent-cartpole-crashing-recreate-workers-appo.py (100%) rename rllib/{tuned_examples/appo => examples/_old_api_stack/algorithms}/multi-agent-cartpole-w-100-policies-appo.py (100%) rename rllib/{tuned_examples/appo => examples/_old_api_stack/algorithms}/multi_agent_cartpole_appo_old_api_stack.py (100%) rename rllib/{tuned_examples/cql => examples/_old_api_stack/algorithms}/pendulum-cql.yaml (100%) rename rllib/{tuned_examples/sac => examples/_old_api_stack/algorithms}/pendulum-sac.yaml (100%) rename rllib/{tuned_examples/ppo => examples/_old_api_stack/algorithms}/pendulum-transformed-actions-ppo.yaml (100%) rename rllib/{tuned_examples/sac => examples/_old_api_stack/algorithms}/pendulum-transformed-actions-sac.yaml (100%) rename rllib/{tuned_examples/dqn => examples/_old_api_stack/algorithms}/pong-dqn.yaml (100%) rename rllib/{tuned_examples/impala => examples/_old_api_stack/algorithms}/pong-impala-fast.yaml (100%) rename rllib/{tuned_examples/impala => examples/_old_api_stack/algorithms}/pong-impala-vectorized.yaml (100%) rename rllib/{tuned_examples/impala => examples/_old_api_stack/algorithms}/pong-impala.yaml (100%) rename rllib/{tuned_examples/dqn => examples/_old_api_stack/algorithms}/pong-rainbow.yaml (100%) rename rllib/{tuned_examples/ppo => examples/_old_api_stack/algorithms}/unity3d-soccer-strikers-vs-goalie-ppo.yaml (100%) rename rllib/{tuned_examples/ppo => examples/_old_api_stack/algorithms}/walker2d-ppo.yaml (100%) rename rllib/{tuned_examples => examples/algorithms}/appo/cartpole_appo.py (100%) rename rllib/{tuned_examples => examples/algorithms}/appo/halfcheetah_appo.py (100%) rename rllib/{tuned_examples => examples/algorithms}/appo/multi_agent_cartpole_appo.py (100%) rename rllib/{tuned_examples => examples/algorithms}/appo/multi_agent_pong_appo.py (100%) rename rllib/{tuned_examples => examples/algorithms}/appo/multi_agent_stateless_cartpole_appo.py (100%) rename rllib/{tuned_examples => examples/algorithms}/appo/pendulum_appo.py (100%) rename rllib/{tuned_examples => examples/algorithms}/appo/pong_appo.py (100%) rename rllib/{tuned_examples => examples/algorithms}/appo/stateless_cartpole_appo.py (100%) rename rllib/{tuned_examples => examples/algorithms}/bc/benchmark_rlunplugged_atari_pong_bc.py (100%) rename rllib/{tuned_examples => examples/algorithms}/bc/cartpole_bc.py (100%) rename rllib/{tuned_examples => examples/algorithms}/bc/cartpole_bc_with_offline_evaluation.py (100%) rename rllib/{tuned_examples => examples/algorithms}/bc/pendulum_bc.py (100%) rename rllib/{tuned_examples => examples/algorithms}/cql/pendulum_cql.py (100%) rename rllib/{tuned_examples => examples/algorithms}/dqn/benchmark_dqn_atari.py (100%) rename rllib/{tuned_examples => examples/algorithms}/dqn/benchmark_dqn_atari_rllib_preprocessing.py (100%) rename rllib/{tuned_examples => examples/algorithms}/dqn/cartpole_dqn.py (100%) rename rllib/{tuned_examples => examples/algorithms}/dqn/multi_agent_cartpole_dqn.py (100%) rename rllib/{tuned_examples => examples/algorithms}/dqn/stateless_cartpole_dqn.py (100%) rename rllib/{tuned_examples => examples/algorithms}/dreamerv3/atari_100k_dreamerv3.py (100%) rename rllib/{tuned_examples => examples/algorithms}/dreamerv3/atari_200M_dreamerv3.py (100%) rename rllib/{tuned_examples => examples/algorithms}/dreamerv3/cartpole_dreamerv3.py (100%) rename rllib/{tuned_examples => examples/algorithms}/dreamerv3/dm_control_suite_vision_dreamerv3.py (100%) rename rllib/{tuned_examples => examples/algorithms}/dreamerv3/flappy_bird_dreamerv3.py (100%) rename rllib/{tuned_examples => examples/algorithms}/dreamerv3/frozenlake_2x2_dreamerv3.py (100%) rename rllib/{tuned_examples => examples/algorithms}/dreamerv3/frozenlake_4x4_deterministic_dreamerv3.py (100%) rename rllib/{tuned_examples => examples/algorithms}/dreamerv3/gymnasium_robotics_dreamerv3.py (100%) rename rllib/{tuned_examples => examples/algorithms}/dreamerv3/highway_env_dreamerv3.py (100%) rename rllib/{tuned_examples => examples/algorithms}/dreamerv3/pendulum_dreamerv3.py (100%) rename rllib/{tuned_examples => examples/algorithms}/impala/cartpole-impala-separate-losses.py (100%) rename rllib/{tuned_examples => examples/algorithms}/impala/cartpole_impala.py (100%) rename rllib/{tuned_examples => examples/algorithms}/impala/heavy_cartpole_impala.py (100%) rename rllib/{tuned_examples => examples/algorithms}/impala/multi_agent_cartpole_impala.py (100%) rename rllib/{tuned_examples => examples/algorithms}/impala/multi_agent_cartpole_impala_old_api_stack.py (100%) rename rllib/{tuned_examples => examples/algorithms}/impala/multi_agent_stateless_cartpole_impala.py (100%) rename rllib/{tuned_examples => examples/algorithms}/impala/pendulum_impala.py (100%) rename rllib/{tuned_examples => examples/algorithms}/impala/pong_impala.py (100%) rename rllib/{tuned_examples => examples/algorithms}/impala/pong_impala_pb2_hyperopt.py (100%) rename rllib/{tuned_examples => examples/algorithms}/impala/stateless_cartpole_impala.py (100%) rename rllib/{tuned_examples => examples/algorithms}/iql/pendulum_iql.py (100%) rename rllib/{tuned_examples => examples/algorithms}/marwil/cartpole_marwil.py (100%) rename rllib/{tuned_examples => examples/algorithms}/ppo/atari_ppo.py (100%) rename rllib/{tuned_examples => examples/algorithms}/ppo/benchmark_ppo_mujoco.py (100%) rename rllib/{tuned_examples => examples/algorithms}/ppo/cartpole_heavy_ppo.py (100%) rename rllib/{tuned_examples => examples/algorithms}/ppo/cartpole_ppo.py (100%) rename rllib/{tuned_examples => examples/algorithms}/ppo/cartpole_truncated_ppo.py (100%) rename rllib/{tuned_examples => examples/algorithms}/ppo/memory_leak_test_ppo_new_stack.py (100%) rename rllib/{tuned_examples => examples/algorithms}/ppo/multi_agent_cartpole_ppo.py (100%) rename rllib/{tuned_examples => examples/algorithms}/ppo/multi_agent_footsies_ppo.py (100%) rename rllib/{tuned_examples => examples/algorithms}/ppo/multi_agent_pendulum_ppo.py (100%) rename rllib/{tuned_examples => examples/algorithms}/ppo/multi_agent_stateless_cartpole_ppo.py (100%) rename rllib/{tuned_examples => examples/algorithms}/ppo/pendulum_ppo.py (100%) rename rllib/{tuned_examples => examples/algorithms}/ppo/stateless_cartpole_ppo.py (100%) rename rllib/{tuned_examples => examples/algorithms}/sac/benchmark_sac_mujoco.py (100%) rename rllib/{tuned_examples => examples/algorithms}/sac/halfcheetah_sac.py (100%) rename rllib/{tuned_examples => examples/algorithms}/sac/humanoid_sac.py (100%) rename rllib/{tuned_examples => examples/algorithms}/sac/mountaincar_sac.py (100%) rename rllib/{tuned_examples => examples/algorithms}/sac/multi_agent_pendulum_sac.py (100%) rename rllib/{tuned_examples => examples/algorithms}/sac/pendulum_sac.py (100%) delete mode 100644 rllib/tuned_examples/__init__.py delete mode 100644 rllib/tuned_examples/cleanup_experiment.py delete mode 100644 rllib/tuned_examples/compact-regression-test.yaml delete mode 100644 rllib/tuned_examples/dreamerv3/__init__.py delete mode 100644 rllib/tuned_examples/ppo/benchmark_ppo_mujoco_pb2.py delete mode 100644 rllib/tuned_examples/sac/benchmark_sac_mujoco_pb2.py diff --git a/rllib/tuned_examples/dqn/atari-dist-dqn.yaml b/rllib/examples/_old_api_stack/algorithms/atari-dist-dqn.yaml similarity index 100% rename from rllib/tuned_examples/dqn/atari-dist-dqn.yaml rename to rllib/examples/_old_api_stack/algorithms/atari-dist-dqn.yaml diff --git a/rllib/tuned_examples/dqn/atari-dqn.yaml b/rllib/examples/_old_api_stack/algorithms/atari-dqn.yaml similarity index 100% rename from rllib/tuned_examples/dqn/atari-dqn.yaml rename to rllib/examples/_old_api_stack/algorithms/atari-dqn.yaml diff --git a/rllib/tuned_examples/dqn/atari-duel-ddqn.yaml b/rllib/examples/_old_api_stack/algorithms/atari-duel-ddqn.yaml similarity index 100% rename from rllib/tuned_examples/dqn/atari-duel-ddqn.yaml rename to rllib/examples/_old_api_stack/algorithms/atari-duel-ddqn.yaml diff --git a/rllib/tuned_examples/impala/atari-impala-large.yaml b/rllib/examples/_old_api_stack/algorithms/atari-impala-large.yaml similarity index 100% rename from rllib/tuned_examples/impala/atari-impala-large.yaml rename to rllib/examples/_old_api_stack/algorithms/atari-impala-large.yaml diff --git a/rllib/tuned_examples/impala/atari-impala-multi-gpu.yaml b/rllib/examples/_old_api_stack/algorithms/atari-impala-multi-gpu.yaml similarity index 100% rename from rllib/tuned_examples/impala/atari-impala-multi-gpu.yaml rename to rllib/examples/_old_api_stack/algorithms/atari-impala-multi-gpu.yaml diff --git a/rllib/tuned_examples/impala/atari-impala.yaml b/rllib/examples/_old_api_stack/algorithms/atari-impala.yaml similarity index 100% rename from rllib/tuned_examples/impala/atari-impala.yaml rename to rllib/examples/_old_api_stack/algorithms/atari-impala.yaml diff --git a/rllib/tuned_examples/sac/atari-sac.yaml b/rllib/examples/_old_api_stack/algorithms/atari-sac.yaml similarity index 100% rename from rllib/tuned_examples/sac/atari-sac.yaml rename to rllib/examples/_old_api_stack/algorithms/atari-sac.yaml diff --git a/rllib/tuned_examples/appo/cartpole-appo-separate-losses.py b/rllib/examples/_old_api_stack/algorithms/cartpole-appo-separate-losses.py similarity index 100% rename from rllib/tuned_examples/appo/cartpole-appo-separate-losses.py rename to rllib/examples/_old_api_stack/algorithms/cartpole-appo-separate-losses.py diff --git a/rllib/tuned_examples/bc/cartpole-bc.yaml b/rllib/examples/_old_api_stack/algorithms/cartpole-bc.yaml similarity index 100% rename from rllib/tuned_examples/bc/cartpole-bc.yaml rename to rllib/examples/_old_api_stack/algorithms/cartpole-bc.yaml diff --git a/rllib/tuned_examples/appo/cartpole-crashing-and-stalling-recreate-workers-appo.py b/rllib/examples/_old_api_stack/algorithms/cartpole-crashing-and-stalling-recreate-workers-appo.py similarity index 100% rename from rllib/tuned_examples/appo/cartpole-crashing-and-stalling-recreate-workers-appo.py rename to rllib/examples/_old_api_stack/algorithms/cartpole-crashing-and-stalling-recreate-workers-appo.py diff --git a/rllib/tuned_examples/appo/cartpole-crashing-recreate-workers-appo.py b/rllib/examples/_old_api_stack/algorithms/cartpole-crashing-recreate-workers-appo.py similarity index 100% rename from rllib/tuned_examples/appo/cartpole-crashing-recreate-workers-appo.py rename to rllib/examples/_old_api_stack/algorithms/cartpole-crashing-recreate-workers-appo.py diff --git a/rllib/tuned_examples/dqn/cartpole-dqn-fake-gpus.yaml b/rllib/examples/_old_api_stack/algorithms/cartpole-dqn-fake-gpus.yaml similarity index 100% rename from rllib/tuned_examples/dqn/cartpole-dqn-fake-gpus.yaml rename to rllib/examples/_old_api_stack/algorithms/cartpole-dqn-fake-gpus.yaml diff --git a/rllib/tuned_examples/dqn/cartpole-dqn-param-noise.yaml b/rllib/examples/_old_api_stack/algorithms/cartpole-dqn-param-noise.yaml similarity index 100% rename from rllib/tuned_examples/dqn/cartpole-dqn-param-noise.yaml rename to rllib/examples/_old_api_stack/algorithms/cartpole-dqn-param-noise.yaml diff --git a/rllib/tuned_examples/dqn/cartpole-dqn-softq.yaml b/rllib/examples/_old_api_stack/algorithms/cartpole-dqn-softq.yaml similarity index 100% rename from rllib/tuned_examples/dqn/cartpole-dqn-softq.yaml rename to rllib/examples/_old_api_stack/algorithms/cartpole-dqn-softq.yaml diff --git a/rllib/tuned_examples/dqn/cartpole-dqn.yaml b/rllib/examples/_old_api_stack/algorithms/cartpole-dqn.yaml similarity index 100% rename from rllib/tuned_examples/dqn/cartpole-dqn.yaml rename to rllib/examples/_old_api_stack/algorithms/cartpole-dqn.yaml diff --git a/rllib/tuned_examples/marwil/cartpole-marwil.yaml b/rllib/examples/_old_api_stack/algorithms/cartpole-marwil.yaml similarity index 100% rename from rllib/tuned_examples/marwil/cartpole-marwil.yaml rename to rllib/examples/_old_api_stack/algorithms/cartpole-marwil.yaml diff --git a/rllib/tuned_examples/sac/cartpole-sac.yaml b/rllib/examples/_old_api_stack/algorithms/cartpole-sac.yaml similarity index 100% rename from rllib/tuned_examples/sac/cartpole-sac.yaml rename to rllib/examples/_old_api_stack/algorithms/cartpole-sac.yaml diff --git a/rllib/tuned_examples/appo/frozenlake-appo-vtrace.yaml b/rllib/examples/_old_api_stack/algorithms/frozenlake-appo-vtrace.yaml similarity index 100% rename from rllib/tuned_examples/appo/frozenlake-appo-vtrace.yaml rename to rllib/examples/_old_api_stack/algorithms/frozenlake-appo-vtrace.yaml diff --git a/rllib/tuned_examples/cql/halfcheetah-bc.yaml b/rllib/examples/_old_api_stack/algorithms/halfcheetah-bc.yaml similarity index 100% rename from rllib/tuned_examples/cql/halfcheetah-bc.yaml rename to rllib/examples/_old_api_stack/algorithms/halfcheetah-bc.yaml diff --git a/rllib/tuned_examples/cql/halfcheetah-cql.yaml b/rllib/examples/_old_api_stack/algorithms/halfcheetah-cql.yaml similarity index 100% rename from rllib/tuned_examples/cql/halfcheetah-cql.yaml rename to rllib/examples/_old_api_stack/algorithms/halfcheetah-cql.yaml diff --git a/rllib/tuned_examples/ppo/halfcheetah-ppo.yaml b/rllib/examples/_old_api_stack/algorithms/halfcheetah-ppo.yaml similarity index 100% rename from rllib/tuned_examples/ppo/halfcheetah-ppo.yaml rename to rllib/examples/_old_api_stack/algorithms/halfcheetah-ppo.yaml diff --git a/rllib/tuned_examples/cql/hopper-bc.yaml b/rllib/examples/_old_api_stack/algorithms/hopper-bc.yaml similarity index 100% rename from rllib/tuned_examples/cql/hopper-bc.yaml rename to rllib/examples/_old_api_stack/algorithms/hopper-bc.yaml diff --git a/rllib/tuned_examples/cql/hopper-cql.yaml b/rllib/examples/_old_api_stack/algorithms/hopper-cql.yaml similarity index 100% rename from rllib/tuned_examples/cql/hopper-cql.yaml rename to rllib/examples/_old_api_stack/algorithms/hopper-cql.yaml diff --git a/rllib/tuned_examples/ppo/hopper-ppo.yaml b/rllib/examples/_old_api_stack/algorithms/hopper-ppo.yaml similarity index 100% rename from rllib/tuned_examples/ppo/hopper-ppo.yaml rename to rllib/examples/_old_api_stack/algorithms/hopper-ppo.yaml diff --git a/rllib/tuned_examples/ppo/humanoid-ppo-gae.yaml b/rllib/examples/_old_api_stack/algorithms/humanoid-ppo-gae.yaml similarity index 100% rename from rllib/tuned_examples/ppo/humanoid-ppo-gae.yaml rename to rllib/examples/_old_api_stack/algorithms/humanoid-ppo-gae.yaml diff --git a/rllib/tuned_examples/ppo/humanoid-ppo.yaml b/rllib/examples/_old_api_stack/algorithms/humanoid-ppo.yaml similarity index 100% rename from rllib/tuned_examples/ppo/humanoid-ppo.yaml rename to rllib/examples/_old_api_stack/algorithms/humanoid-ppo.yaml diff --git a/rllib/tuned_examples/appo/memory-leak-test-appo.yaml b/rllib/examples/_old_api_stack/algorithms/memory-leak-test-appo.yaml similarity index 100% rename from rllib/tuned_examples/appo/memory-leak-test-appo.yaml rename to rllib/examples/_old_api_stack/algorithms/memory-leak-test-appo.yaml diff --git a/rllib/tuned_examples/dqn/memory-leak-test-dqn.yaml b/rllib/examples/_old_api_stack/algorithms/memory-leak-test-dqn.yaml similarity index 100% rename from rllib/tuned_examples/dqn/memory-leak-test-dqn.yaml rename to rllib/examples/_old_api_stack/algorithms/memory-leak-test-dqn.yaml diff --git a/rllib/tuned_examples/ppo/memory-leak-test-ppo.yaml b/rllib/examples/_old_api_stack/algorithms/memory-leak-test-ppo.yaml similarity index 100% rename from rllib/tuned_examples/ppo/memory-leak-test-ppo.yaml rename to rllib/examples/_old_api_stack/algorithms/memory-leak-test-ppo.yaml diff --git a/rllib/tuned_examples/sac/memory-leak-test-sac.yaml b/rllib/examples/_old_api_stack/algorithms/memory-leak-test-sac.yaml similarity index 100% rename from rllib/tuned_examples/sac/memory-leak-test-sac.yaml rename to rllib/examples/_old_api_stack/algorithms/memory-leak-test-sac.yaml diff --git a/rllib/tuned_examples/sac/mspacman-sac.yaml b/rllib/examples/_old_api_stack/algorithms/mspacman-sac.yaml similarity index 100% rename from rllib/tuned_examples/sac/mspacman-sac.yaml rename to rllib/examples/_old_api_stack/algorithms/mspacman-sac.yaml diff --git a/rllib/tuned_examples/appo/multi-agent-cartpole-crashing-and-stalling-recreate-workers-appo.py b/rllib/examples/_old_api_stack/algorithms/multi-agent-cartpole-crashing-and-stalling-recreate-workers-appo.py similarity index 100% rename from rllib/tuned_examples/appo/multi-agent-cartpole-crashing-and-stalling-recreate-workers-appo.py rename to rllib/examples/_old_api_stack/algorithms/multi-agent-cartpole-crashing-and-stalling-recreate-workers-appo.py diff --git a/rllib/tuned_examples/appo/multi-agent-cartpole-crashing-recreate-workers-appo.py b/rllib/examples/_old_api_stack/algorithms/multi-agent-cartpole-crashing-recreate-workers-appo.py similarity index 100% rename from rllib/tuned_examples/appo/multi-agent-cartpole-crashing-recreate-workers-appo.py rename to rllib/examples/_old_api_stack/algorithms/multi-agent-cartpole-crashing-recreate-workers-appo.py diff --git a/rllib/tuned_examples/appo/multi-agent-cartpole-w-100-policies-appo.py b/rllib/examples/_old_api_stack/algorithms/multi-agent-cartpole-w-100-policies-appo.py similarity index 100% rename from rllib/tuned_examples/appo/multi-agent-cartpole-w-100-policies-appo.py rename to rllib/examples/_old_api_stack/algorithms/multi-agent-cartpole-w-100-policies-appo.py diff --git a/rllib/tuned_examples/appo/multi_agent_cartpole_appo_old_api_stack.py b/rllib/examples/_old_api_stack/algorithms/multi_agent_cartpole_appo_old_api_stack.py similarity index 100% rename from rllib/tuned_examples/appo/multi_agent_cartpole_appo_old_api_stack.py rename to rllib/examples/_old_api_stack/algorithms/multi_agent_cartpole_appo_old_api_stack.py diff --git a/rllib/tuned_examples/cql/pendulum-cql.yaml b/rllib/examples/_old_api_stack/algorithms/pendulum-cql.yaml similarity index 100% rename from rllib/tuned_examples/cql/pendulum-cql.yaml rename to rllib/examples/_old_api_stack/algorithms/pendulum-cql.yaml diff --git a/rllib/tuned_examples/sac/pendulum-sac.yaml b/rllib/examples/_old_api_stack/algorithms/pendulum-sac.yaml similarity index 100% rename from rllib/tuned_examples/sac/pendulum-sac.yaml rename to rllib/examples/_old_api_stack/algorithms/pendulum-sac.yaml diff --git a/rllib/tuned_examples/ppo/pendulum-transformed-actions-ppo.yaml b/rllib/examples/_old_api_stack/algorithms/pendulum-transformed-actions-ppo.yaml similarity index 100% rename from rllib/tuned_examples/ppo/pendulum-transformed-actions-ppo.yaml rename to rllib/examples/_old_api_stack/algorithms/pendulum-transformed-actions-ppo.yaml diff --git a/rllib/tuned_examples/sac/pendulum-transformed-actions-sac.yaml b/rllib/examples/_old_api_stack/algorithms/pendulum-transformed-actions-sac.yaml similarity index 100% rename from rllib/tuned_examples/sac/pendulum-transformed-actions-sac.yaml rename to rllib/examples/_old_api_stack/algorithms/pendulum-transformed-actions-sac.yaml diff --git a/rllib/tuned_examples/dqn/pong-dqn.yaml b/rllib/examples/_old_api_stack/algorithms/pong-dqn.yaml similarity index 100% rename from rllib/tuned_examples/dqn/pong-dqn.yaml rename to rllib/examples/_old_api_stack/algorithms/pong-dqn.yaml diff --git a/rllib/tuned_examples/impala/pong-impala-fast.yaml b/rllib/examples/_old_api_stack/algorithms/pong-impala-fast.yaml similarity index 100% rename from rllib/tuned_examples/impala/pong-impala-fast.yaml rename to rllib/examples/_old_api_stack/algorithms/pong-impala-fast.yaml diff --git a/rllib/tuned_examples/impala/pong-impala-vectorized.yaml b/rllib/examples/_old_api_stack/algorithms/pong-impala-vectorized.yaml similarity index 100% rename from rllib/tuned_examples/impala/pong-impala-vectorized.yaml rename to rllib/examples/_old_api_stack/algorithms/pong-impala-vectorized.yaml diff --git a/rllib/tuned_examples/impala/pong-impala.yaml b/rllib/examples/_old_api_stack/algorithms/pong-impala.yaml similarity index 100% rename from rllib/tuned_examples/impala/pong-impala.yaml rename to rllib/examples/_old_api_stack/algorithms/pong-impala.yaml diff --git a/rllib/tuned_examples/dqn/pong-rainbow.yaml b/rllib/examples/_old_api_stack/algorithms/pong-rainbow.yaml similarity index 100% rename from rllib/tuned_examples/dqn/pong-rainbow.yaml rename to rllib/examples/_old_api_stack/algorithms/pong-rainbow.yaml diff --git a/rllib/tuned_examples/ppo/unity3d-soccer-strikers-vs-goalie-ppo.yaml b/rllib/examples/_old_api_stack/algorithms/unity3d-soccer-strikers-vs-goalie-ppo.yaml similarity index 100% rename from rllib/tuned_examples/ppo/unity3d-soccer-strikers-vs-goalie-ppo.yaml rename to rllib/examples/_old_api_stack/algorithms/unity3d-soccer-strikers-vs-goalie-ppo.yaml diff --git a/rllib/tuned_examples/ppo/walker2d-ppo.yaml b/rllib/examples/_old_api_stack/algorithms/walker2d-ppo.yaml similarity index 100% rename from rllib/tuned_examples/ppo/walker2d-ppo.yaml rename to rllib/examples/_old_api_stack/algorithms/walker2d-ppo.yaml diff --git a/rllib/tuned_examples/appo/cartpole_appo.py b/rllib/examples/algorithms/appo/cartpole_appo.py similarity index 100% rename from rllib/tuned_examples/appo/cartpole_appo.py rename to rllib/examples/algorithms/appo/cartpole_appo.py diff --git a/rllib/tuned_examples/appo/halfcheetah_appo.py b/rllib/examples/algorithms/appo/halfcheetah_appo.py similarity index 100% rename from rllib/tuned_examples/appo/halfcheetah_appo.py rename to rllib/examples/algorithms/appo/halfcheetah_appo.py diff --git a/rllib/tuned_examples/appo/multi_agent_cartpole_appo.py b/rllib/examples/algorithms/appo/multi_agent_cartpole_appo.py similarity index 100% rename from rllib/tuned_examples/appo/multi_agent_cartpole_appo.py rename to rllib/examples/algorithms/appo/multi_agent_cartpole_appo.py diff --git a/rllib/tuned_examples/appo/multi_agent_pong_appo.py b/rllib/examples/algorithms/appo/multi_agent_pong_appo.py similarity index 100% rename from rllib/tuned_examples/appo/multi_agent_pong_appo.py rename to rllib/examples/algorithms/appo/multi_agent_pong_appo.py diff --git a/rllib/tuned_examples/appo/multi_agent_stateless_cartpole_appo.py b/rllib/examples/algorithms/appo/multi_agent_stateless_cartpole_appo.py similarity index 100% rename from rllib/tuned_examples/appo/multi_agent_stateless_cartpole_appo.py rename to rllib/examples/algorithms/appo/multi_agent_stateless_cartpole_appo.py diff --git a/rllib/tuned_examples/appo/pendulum_appo.py b/rllib/examples/algorithms/appo/pendulum_appo.py similarity index 100% rename from rllib/tuned_examples/appo/pendulum_appo.py rename to rllib/examples/algorithms/appo/pendulum_appo.py diff --git a/rllib/tuned_examples/appo/pong_appo.py b/rllib/examples/algorithms/appo/pong_appo.py similarity index 100% rename from rllib/tuned_examples/appo/pong_appo.py rename to rllib/examples/algorithms/appo/pong_appo.py diff --git a/rllib/tuned_examples/appo/stateless_cartpole_appo.py b/rllib/examples/algorithms/appo/stateless_cartpole_appo.py similarity index 100% rename from rllib/tuned_examples/appo/stateless_cartpole_appo.py rename to rllib/examples/algorithms/appo/stateless_cartpole_appo.py diff --git a/rllib/tuned_examples/bc/benchmark_rlunplugged_atari_pong_bc.py b/rllib/examples/algorithms/bc/benchmark_rlunplugged_atari_pong_bc.py similarity index 100% rename from rllib/tuned_examples/bc/benchmark_rlunplugged_atari_pong_bc.py rename to rllib/examples/algorithms/bc/benchmark_rlunplugged_atari_pong_bc.py diff --git a/rllib/tuned_examples/bc/cartpole_bc.py b/rllib/examples/algorithms/bc/cartpole_bc.py similarity index 100% rename from rllib/tuned_examples/bc/cartpole_bc.py rename to rllib/examples/algorithms/bc/cartpole_bc.py diff --git a/rllib/tuned_examples/bc/cartpole_bc_with_offline_evaluation.py b/rllib/examples/algorithms/bc/cartpole_bc_with_offline_evaluation.py similarity index 100% rename from rllib/tuned_examples/bc/cartpole_bc_with_offline_evaluation.py rename to rllib/examples/algorithms/bc/cartpole_bc_with_offline_evaluation.py diff --git a/rllib/tuned_examples/bc/pendulum_bc.py b/rllib/examples/algorithms/bc/pendulum_bc.py similarity index 100% rename from rllib/tuned_examples/bc/pendulum_bc.py rename to rllib/examples/algorithms/bc/pendulum_bc.py diff --git a/rllib/tuned_examples/cql/pendulum_cql.py b/rllib/examples/algorithms/cql/pendulum_cql.py similarity index 100% rename from rllib/tuned_examples/cql/pendulum_cql.py rename to rllib/examples/algorithms/cql/pendulum_cql.py diff --git a/rllib/tuned_examples/dqn/benchmark_dqn_atari.py b/rllib/examples/algorithms/dqn/benchmark_dqn_atari.py similarity index 100% rename from rllib/tuned_examples/dqn/benchmark_dqn_atari.py rename to rllib/examples/algorithms/dqn/benchmark_dqn_atari.py diff --git a/rllib/tuned_examples/dqn/benchmark_dqn_atari_rllib_preprocessing.py b/rllib/examples/algorithms/dqn/benchmark_dqn_atari_rllib_preprocessing.py similarity index 100% rename from rllib/tuned_examples/dqn/benchmark_dqn_atari_rllib_preprocessing.py rename to rllib/examples/algorithms/dqn/benchmark_dqn_atari_rllib_preprocessing.py diff --git a/rllib/tuned_examples/dqn/cartpole_dqn.py b/rllib/examples/algorithms/dqn/cartpole_dqn.py similarity index 100% rename from rllib/tuned_examples/dqn/cartpole_dqn.py rename to rllib/examples/algorithms/dqn/cartpole_dqn.py diff --git a/rllib/tuned_examples/dqn/multi_agent_cartpole_dqn.py b/rllib/examples/algorithms/dqn/multi_agent_cartpole_dqn.py similarity index 100% rename from rllib/tuned_examples/dqn/multi_agent_cartpole_dqn.py rename to rllib/examples/algorithms/dqn/multi_agent_cartpole_dqn.py diff --git a/rllib/tuned_examples/dqn/stateless_cartpole_dqn.py b/rllib/examples/algorithms/dqn/stateless_cartpole_dqn.py similarity index 100% rename from rllib/tuned_examples/dqn/stateless_cartpole_dqn.py rename to rllib/examples/algorithms/dqn/stateless_cartpole_dqn.py diff --git a/rllib/tuned_examples/dreamerv3/atari_100k_dreamerv3.py b/rllib/examples/algorithms/dreamerv3/atari_100k_dreamerv3.py similarity index 100% rename from rllib/tuned_examples/dreamerv3/atari_100k_dreamerv3.py rename to rllib/examples/algorithms/dreamerv3/atari_100k_dreamerv3.py diff --git a/rllib/tuned_examples/dreamerv3/atari_200M_dreamerv3.py b/rllib/examples/algorithms/dreamerv3/atari_200M_dreamerv3.py similarity index 100% rename from rllib/tuned_examples/dreamerv3/atari_200M_dreamerv3.py rename to rllib/examples/algorithms/dreamerv3/atari_200M_dreamerv3.py diff --git a/rllib/tuned_examples/dreamerv3/cartpole_dreamerv3.py b/rllib/examples/algorithms/dreamerv3/cartpole_dreamerv3.py similarity index 100% rename from rllib/tuned_examples/dreamerv3/cartpole_dreamerv3.py rename to rllib/examples/algorithms/dreamerv3/cartpole_dreamerv3.py diff --git a/rllib/tuned_examples/dreamerv3/dm_control_suite_vision_dreamerv3.py b/rllib/examples/algorithms/dreamerv3/dm_control_suite_vision_dreamerv3.py similarity index 100% rename from rllib/tuned_examples/dreamerv3/dm_control_suite_vision_dreamerv3.py rename to rllib/examples/algorithms/dreamerv3/dm_control_suite_vision_dreamerv3.py diff --git a/rllib/tuned_examples/dreamerv3/flappy_bird_dreamerv3.py b/rllib/examples/algorithms/dreamerv3/flappy_bird_dreamerv3.py similarity index 100% rename from rllib/tuned_examples/dreamerv3/flappy_bird_dreamerv3.py rename to rllib/examples/algorithms/dreamerv3/flappy_bird_dreamerv3.py diff --git a/rllib/tuned_examples/dreamerv3/frozenlake_2x2_dreamerv3.py b/rllib/examples/algorithms/dreamerv3/frozenlake_2x2_dreamerv3.py similarity index 100% rename from rllib/tuned_examples/dreamerv3/frozenlake_2x2_dreamerv3.py rename to rllib/examples/algorithms/dreamerv3/frozenlake_2x2_dreamerv3.py diff --git a/rllib/tuned_examples/dreamerv3/frozenlake_4x4_deterministic_dreamerv3.py b/rllib/examples/algorithms/dreamerv3/frozenlake_4x4_deterministic_dreamerv3.py similarity index 100% rename from rllib/tuned_examples/dreamerv3/frozenlake_4x4_deterministic_dreamerv3.py rename to rllib/examples/algorithms/dreamerv3/frozenlake_4x4_deterministic_dreamerv3.py diff --git a/rllib/tuned_examples/dreamerv3/gymnasium_robotics_dreamerv3.py b/rllib/examples/algorithms/dreamerv3/gymnasium_robotics_dreamerv3.py similarity index 100% rename from rllib/tuned_examples/dreamerv3/gymnasium_robotics_dreamerv3.py rename to rllib/examples/algorithms/dreamerv3/gymnasium_robotics_dreamerv3.py diff --git a/rllib/tuned_examples/dreamerv3/highway_env_dreamerv3.py b/rllib/examples/algorithms/dreamerv3/highway_env_dreamerv3.py similarity index 100% rename from rllib/tuned_examples/dreamerv3/highway_env_dreamerv3.py rename to rllib/examples/algorithms/dreamerv3/highway_env_dreamerv3.py diff --git a/rllib/tuned_examples/dreamerv3/pendulum_dreamerv3.py b/rllib/examples/algorithms/dreamerv3/pendulum_dreamerv3.py similarity index 100% rename from rllib/tuned_examples/dreamerv3/pendulum_dreamerv3.py rename to rllib/examples/algorithms/dreamerv3/pendulum_dreamerv3.py diff --git a/rllib/tuned_examples/impala/cartpole-impala-separate-losses.py b/rllib/examples/algorithms/impala/cartpole-impala-separate-losses.py similarity index 100% rename from rllib/tuned_examples/impala/cartpole-impala-separate-losses.py rename to rllib/examples/algorithms/impala/cartpole-impala-separate-losses.py diff --git a/rllib/tuned_examples/impala/cartpole_impala.py b/rllib/examples/algorithms/impala/cartpole_impala.py similarity index 100% rename from rllib/tuned_examples/impala/cartpole_impala.py rename to rllib/examples/algorithms/impala/cartpole_impala.py diff --git a/rllib/tuned_examples/impala/heavy_cartpole_impala.py b/rllib/examples/algorithms/impala/heavy_cartpole_impala.py similarity index 100% rename from rllib/tuned_examples/impala/heavy_cartpole_impala.py rename to rllib/examples/algorithms/impala/heavy_cartpole_impala.py diff --git a/rllib/tuned_examples/impala/multi_agent_cartpole_impala.py b/rllib/examples/algorithms/impala/multi_agent_cartpole_impala.py similarity index 100% rename from rllib/tuned_examples/impala/multi_agent_cartpole_impala.py rename to rllib/examples/algorithms/impala/multi_agent_cartpole_impala.py diff --git a/rllib/tuned_examples/impala/multi_agent_cartpole_impala_old_api_stack.py b/rllib/examples/algorithms/impala/multi_agent_cartpole_impala_old_api_stack.py similarity index 100% rename from rllib/tuned_examples/impala/multi_agent_cartpole_impala_old_api_stack.py rename to rllib/examples/algorithms/impala/multi_agent_cartpole_impala_old_api_stack.py diff --git a/rllib/tuned_examples/impala/multi_agent_stateless_cartpole_impala.py b/rllib/examples/algorithms/impala/multi_agent_stateless_cartpole_impala.py similarity index 100% rename from rllib/tuned_examples/impala/multi_agent_stateless_cartpole_impala.py rename to rllib/examples/algorithms/impala/multi_agent_stateless_cartpole_impala.py diff --git a/rllib/tuned_examples/impala/pendulum_impala.py b/rllib/examples/algorithms/impala/pendulum_impala.py similarity index 100% rename from rllib/tuned_examples/impala/pendulum_impala.py rename to rllib/examples/algorithms/impala/pendulum_impala.py diff --git a/rllib/tuned_examples/impala/pong_impala.py b/rllib/examples/algorithms/impala/pong_impala.py similarity index 100% rename from rllib/tuned_examples/impala/pong_impala.py rename to rllib/examples/algorithms/impala/pong_impala.py diff --git a/rllib/tuned_examples/impala/pong_impala_pb2_hyperopt.py b/rllib/examples/algorithms/impala/pong_impala_pb2_hyperopt.py similarity index 100% rename from rllib/tuned_examples/impala/pong_impala_pb2_hyperopt.py rename to rllib/examples/algorithms/impala/pong_impala_pb2_hyperopt.py diff --git a/rllib/tuned_examples/impala/stateless_cartpole_impala.py b/rllib/examples/algorithms/impala/stateless_cartpole_impala.py similarity index 100% rename from rllib/tuned_examples/impala/stateless_cartpole_impala.py rename to rllib/examples/algorithms/impala/stateless_cartpole_impala.py diff --git a/rllib/tuned_examples/iql/pendulum_iql.py b/rllib/examples/algorithms/iql/pendulum_iql.py similarity index 100% rename from rllib/tuned_examples/iql/pendulum_iql.py rename to rllib/examples/algorithms/iql/pendulum_iql.py diff --git a/rllib/tuned_examples/marwil/cartpole_marwil.py b/rllib/examples/algorithms/marwil/cartpole_marwil.py similarity index 100% rename from rllib/tuned_examples/marwil/cartpole_marwil.py rename to rllib/examples/algorithms/marwil/cartpole_marwil.py diff --git a/rllib/tuned_examples/ppo/atari_ppo.py b/rllib/examples/algorithms/ppo/atari_ppo.py similarity index 100% rename from rllib/tuned_examples/ppo/atari_ppo.py rename to rllib/examples/algorithms/ppo/atari_ppo.py diff --git a/rllib/tuned_examples/ppo/benchmark_ppo_mujoco.py b/rllib/examples/algorithms/ppo/benchmark_ppo_mujoco.py similarity index 100% rename from rllib/tuned_examples/ppo/benchmark_ppo_mujoco.py rename to rllib/examples/algorithms/ppo/benchmark_ppo_mujoco.py diff --git a/rllib/tuned_examples/ppo/cartpole_heavy_ppo.py b/rllib/examples/algorithms/ppo/cartpole_heavy_ppo.py similarity index 100% rename from rllib/tuned_examples/ppo/cartpole_heavy_ppo.py rename to rllib/examples/algorithms/ppo/cartpole_heavy_ppo.py diff --git a/rllib/tuned_examples/ppo/cartpole_ppo.py b/rllib/examples/algorithms/ppo/cartpole_ppo.py similarity index 100% rename from rllib/tuned_examples/ppo/cartpole_ppo.py rename to rllib/examples/algorithms/ppo/cartpole_ppo.py diff --git a/rllib/tuned_examples/ppo/cartpole_truncated_ppo.py b/rllib/examples/algorithms/ppo/cartpole_truncated_ppo.py similarity index 100% rename from rllib/tuned_examples/ppo/cartpole_truncated_ppo.py rename to rllib/examples/algorithms/ppo/cartpole_truncated_ppo.py diff --git a/rllib/tuned_examples/ppo/memory_leak_test_ppo_new_stack.py b/rllib/examples/algorithms/ppo/memory_leak_test_ppo_new_stack.py similarity index 100% rename from rllib/tuned_examples/ppo/memory_leak_test_ppo_new_stack.py rename to rllib/examples/algorithms/ppo/memory_leak_test_ppo_new_stack.py diff --git a/rllib/tuned_examples/ppo/multi_agent_cartpole_ppo.py b/rllib/examples/algorithms/ppo/multi_agent_cartpole_ppo.py similarity index 100% rename from rllib/tuned_examples/ppo/multi_agent_cartpole_ppo.py rename to rllib/examples/algorithms/ppo/multi_agent_cartpole_ppo.py diff --git a/rllib/tuned_examples/ppo/multi_agent_footsies_ppo.py b/rllib/examples/algorithms/ppo/multi_agent_footsies_ppo.py similarity index 100% rename from rllib/tuned_examples/ppo/multi_agent_footsies_ppo.py rename to rllib/examples/algorithms/ppo/multi_agent_footsies_ppo.py diff --git a/rllib/tuned_examples/ppo/multi_agent_pendulum_ppo.py b/rllib/examples/algorithms/ppo/multi_agent_pendulum_ppo.py similarity index 100% rename from rllib/tuned_examples/ppo/multi_agent_pendulum_ppo.py rename to rllib/examples/algorithms/ppo/multi_agent_pendulum_ppo.py diff --git a/rllib/tuned_examples/ppo/multi_agent_stateless_cartpole_ppo.py b/rllib/examples/algorithms/ppo/multi_agent_stateless_cartpole_ppo.py similarity index 100% rename from rllib/tuned_examples/ppo/multi_agent_stateless_cartpole_ppo.py rename to rllib/examples/algorithms/ppo/multi_agent_stateless_cartpole_ppo.py diff --git a/rllib/tuned_examples/ppo/pendulum_ppo.py b/rllib/examples/algorithms/ppo/pendulum_ppo.py similarity index 100% rename from rllib/tuned_examples/ppo/pendulum_ppo.py rename to rllib/examples/algorithms/ppo/pendulum_ppo.py diff --git a/rllib/tuned_examples/ppo/stateless_cartpole_ppo.py b/rllib/examples/algorithms/ppo/stateless_cartpole_ppo.py similarity index 100% rename from rllib/tuned_examples/ppo/stateless_cartpole_ppo.py rename to rllib/examples/algorithms/ppo/stateless_cartpole_ppo.py diff --git a/rllib/tuned_examples/sac/benchmark_sac_mujoco.py b/rllib/examples/algorithms/sac/benchmark_sac_mujoco.py similarity index 100% rename from rllib/tuned_examples/sac/benchmark_sac_mujoco.py rename to rllib/examples/algorithms/sac/benchmark_sac_mujoco.py diff --git a/rllib/tuned_examples/sac/halfcheetah_sac.py b/rllib/examples/algorithms/sac/halfcheetah_sac.py similarity index 100% rename from rllib/tuned_examples/sac/halfcheetah_sac.py rename to rllib/examples/algorithms/sac/halfcheetah_sac.py diff --git a/rllib/tuned_examples/sac/humanoid_sac.py b/rllib/examples/algorithms/sac/humanoid_sac.py similarity index 100% rename from rllib/tuned_examples/sac/humanoid_sac.py rename to rllib/examples/algorithms/sac/humanoid_sac.py diff --git a/rllib/tuned_examples/sac/mountaincar_sac.py b/rllib/examples/algorithms/sac/mountaincar_sac.py similarity index 100% rename from rllib/tuned_examples/sac/mountaincar_sac.py rename to rllib/examples/algorithms/sac/mountaincar_sac.py diff --git a/rllib/tuned_examples/sac/multi_agent_pendulum_sac.py b/rllib/examples/algorithms/sac/multi_agent_pendulum_sac.py similarity index 100% rename from rllib/tuned_examples/sac/multi_agent_pendulum_sac.py rename to rllib/examples/algorithms/sac/multi_agent_pendulum_sac.py diff --git a/rllib/tuned_examples/sac/pendulum_sac.py b/rllib/examples/algorithms/sac/pendulum_sac.py similarity index 100% rename from rllib/tuned_examples/sac/pendulum_sac.py rename to rllib/examples/algorithms/sac/pendulum_sac.py diff --git a/rllib/tuned_examples/__init__.py b/rllib/tuned_examples/__init__.py deleted file mode 100644 index e69de29bb2d1..000000000000 diff --git a/rllib/tuned_examples/cleanup_experiment.py b/rllib/tuned_examples/cleanup_experiment.py deleted file mode 100644 index 749d3ed5e522..000000000000 --- a/rllib/tuned_examples/cleanup_experiment.py +++ /dev/null @@ -1,187 +0,0 @@ -""" -This script automates cleaning up a benchmark/experiment run of some algo -against some config (with possibly more than one tune trial, -e.g. torch=grid_search([True, False])). - -Run `python cleanup_experiment.py --help` for more information. - -Use on an input directory with trial contents e.g.: -.. -IMPALA_BreakoutNoFrameskip-v4_0_use_pytorch=False_2020-05-11_10-17-54topr3h9k -IMPALA_BreakoutNoFrameskip-v4_0_use_pytorch=False_2020-05-11_13-59-35dqaetxnf -IMPALA_BreakoutNoFrameskip-v4_0_use_pytorch=False_2020-05-11_17-21-28tbhedw72 -IMPALA_BreakoutNoFrameskip-v4_2_use_pytorch=True_2020-05-11_10-17-54lv20cgn_ -IMPALA_BreakoutNoFrameskip-v4_2_use_pytorch=True_2020-05-11_13-59-35kwzhax_y -IMPALA_BreakoutNoFrameskip-v4_2_use_pytorch=True_2020-05-11_17-21-28a5j0s7za - -Then run: ->> python cleanup_experiment.py --experiment-dir [parent dir w/ trial sub-dirs] ->> --output-dir [your out dir] --results-filter dumb_col_2,superfluous_col3 ->> --results-max-size [max results file size in kb before(!) zipping] - -The script will create one output sub-dir for each trial and only copy -the configuration and the csv results (filtered and every nth row removed -based on the given args). -""" - -import argparse -import json -import os -import re -import shutil - -import yaml - -parser = argparse.ArgumentParser() -parser.add_argument( - "--experiment-dir", - type=str, - help="Experiment dir in which all sub-runs (seeds) are " - "located (as sub-dirs). Each sub0-run dir must contain the files: " - "params.json and progress.csv.", -) -parser.add_argument( - "--output-dir", - type=str, - help="The output dir, in which the cleaned up output will be placed.", -) -parser.add_argument( - "--results-filter", - type=str, - help="comma-separated list of csv fields to exclude.", - default="experiment_id,pid,hostname,node_ip,trial_id,hist_stats/episode_" - "reward,hist_stats/episode_lengths,experiment_tag", -) -parser.add_argument( - "--results-max-size", - type=int, - help="the max. size of the final results.csv file (in kb). Will erase " - "every nth line in the original input to reach that goal. " - "Use 0 for no limit (default=100).", - default=100, -) - - -def process_single_run(in_dir, out_dir): - exp_dir = os.listdir(in_dir) - - # Make sure trials dir is ok. - assert ( - "params.json" in exp_dir and "progress.csv" in exp_dir - ), "params.json or progress.csv not found in {}!".format(in_dir) - - os.makedirs(out_dir, exist_ok=True) - - for file in exp_dir: - absfile = os.path.join(in_dir, file) - # Config file -> Convert to yaml and move to output dir. - if file == "params.json": - assert os.path.isfile(absfile), "{} not a file!".format(file) - with open(absfile) as fp: - contents = json.load(fp) - with open(os.path.join(out_dir, "config.yaml"), "w") as fp: - yaml.dump(contents, fp) - # Progress csv file -> Filter out some columns, cut, and write to - # output_dir. - elif file == "progress.csv": - assert os.path.isfile(absfile), "{} not a file!".format(file) - col_idx_to_filter = [] - with open(absfile) as fp: - # Get column names. - col_names_orig = fp.readline().strip().split(",") - # Split by comma (abiding to quotes), filter out - # unwanted columns, then write to disk. - cols_to_filter = args.results_filter.split(",") - for i, c in enumerate(col_names_orig): - if c in cols_to_filter: - col_idx_to_filter.insert(0, i) - col_names = col_names_orig.copy() - for idx in col_idx_to_filter: - col_names.pop(idx) - absfile_out = os.path.join(out_dir, "progress.csv") - with open(absfile_out, "w") as out_fp: - print(",".join(col_names), file=out_fp) - while True: - line = fp.readline().strip() - if not line: - break - line = re.sub( - "(,{2,})", - lambda m: ",None" * (len(m.group()) - 1) + ",", - line, - ) - cols = re.findall('".+?"|[^,]+', line) - if len(cols) != len(col_names_orig): - continue - for idx in col_idx_to_filter: - cols.pop(idx) - print(",".join(cols), file=out_fp) - - # Reduce the size of the output file if necessary. - out_size = os.path.getsize(absfile_out) - max_size = args.results_max_size * 1024 - if 0 < max_size < out_size: - # Figure out roughly every which line we have to drop. - ratio = out_size / max_size - # If ratio > 2.0, we'll have to keep only every nth line. - if ratio > 2.0: - nth = out_size // max_size - os.system( - "awk 'NR==1||NR%{}==0' {} > {}.new".format( - nth, absfile_out, absfile_out - ) - ) - # If ratio < 2.0 (>1.0), we'll have to drop every nth line. - else: - nth = out_size // (out_size - max_size) - os.system( - "awk 'NR==1||NR%{}!=0' {} > {}.new".format( - nth, absfile_out, absfile_out - ) - ) - os.remove(absfile_out) - os.rename(absfile_out + ".new", absfile_out) - - # Zip progress.csv into results.zip. - zip_file = os.path.join(out_dir, "results.zip") - try: - os.remove(zip_file) - except FileNotFoundError: - pass - os.system( - "zip -j {} {}".format(zip_file, os.path.join(out_dir, "progress.csv")) - ) - os.remove(os.path.join(out_dir, "progress.csv")) - - # TBX events file -> Move as is. - elif re.search("^(events\\.out\\.|params\\.pkl)", file): - assert os.path.isfile(absfile), "{} not a file!".format(file) - shutil.copyfile(absfile, os.path.join(out_dir, file)) - - -if __name__ == "__main__": - args = parser.parse_args() - exp_dir = os.listdir(args.experiment_dir) - # Loop through all sub-directories. - for i, sub_run in enumerate(sorted(exp_dir)): - abspath = os.path.join(args.experiment_dir, sub_run) - # This is a seed run. - if os.path.isdir(abspath) and re.search( - "^(\\w+?)_(\\w+?-v\\d+)(_\\d+)", sub_run - ): - # Create meaningful output dir name: - # [algo]_[env]_[trial #]_[trial-config]_[date YYYY-MM-DD]. - cleaned_up_out = re.sub( - "^(\\w+?)_(\\w+?-v\\d+)(_\\d+)(_.+)?(_\\d{4}-\\d{2}-\\d{2})" - "_\\d{2}-\\d{2}-\\w+", - "{:02}_\\1_\\2\\4\\5".format(i), - sub_run, - ) - # Remove superflous `env=` specifier (anv always included in name). - cleaned_up_out = re.sub( - "^(.+)env=\\w+?-v\\d+,?(.+)", "\\1\\2", cleaned_up_out - ) - out_path = os.path.join(args.output_dir, cleaned_up_out) - process_single_run(abspath, out_path) - # Done. - print("done") diff --git a/rllib/tuned_examples/compact-regression-test.yaml b/rllib/tuned_examples/compact-regression-test.yaml deleted file mode 100644 index 80003257ccb7..000000000000 --- a/rllib/tuned_examples/compact-regression-test.yaml +++ /dev/null @@ -1,157 +0,0 @@ -# This file runs on a single g3.16xl or p3.16xl node. It is suggested -# to run these in a DLAMI / tensorflow_p36 env. Note that RL runs are -# inherently high variance, so you'll have to check to see if the -# rewards reached seem reasonably in line with previous results. -# -# You can find the reference results here: -# https://github.com/ray-project/ray/tree/master/release/release_logs -atari-impala: - env: ale_py:ALE/Breakout-v5 - run: IMPALA - num_samples: 4 - stop: - time_total_s: 3600 - config: - env_config: - frameskip: 1 # no frameskip - rollout_fragment_length: 50 - train_batch_size: 500 - num_env_runners: 10 - num_envs_per_env_runner: 5 - clip_rewards: True - lr_schedule: [ - [0, 0.0005], - [20000000, 0.000000000001], - ] - num_gpus: 1 -atari-ppo-tf: - env: ale_py:ALE/Breakout-v5 - run: PPO - num_samples: 4 - stop: - time_total_s: 3600 - config: - env_config: - frameskip: 1 # no frameskip - lambda: 0.95 - kl_coeff: 0.5 - clip_rewards: True - clip_param: 0.1 - vf_clip_param: 10.0 - entropy_coeff: 0.01 - train_batch_size: 5000 - rollout_fragment_length: 100 - minibatch_size: 500 - num_epochs: 10 - num_env_runners: 10 - num_envs_per_env_runner: 5 - batch_mode: truncate_episodes - observation_filter: NoFilter - model: - vf_share_layers: true - num_gpus: 1 -atari-ppo-torch: - env: ale_py:ALE/Breakout-v5 - run: PPO - num_samples: 4 - stop: - time_total_s: 3600 - config: - framework: torch - env_config: - frameskip: 1 # no frameskip - lambda: 0.95 - kl_coeff: 0.5 - clip_rewards: True - clip_param: 0.1 - vf_clip_param: 10.0 - entropy_coeff: 0.01 - train_batch_size: 5000 - rollout_fragment_length: 100 - minibatch_size: 500 - num_epochs: 10 - num_env_runners: 10 - num_envs_per_env_runner: 5 - batch_mode: truncate_episodes - observation_filter: NoFilter - model: - vf_share_layers: true - num_gpus: 1 -apex: - env: ale_py:ALE/Breakout-v5 - run: APEX - num_samples: 4 - stop: - time_total_s: 3600 - config: - env_config: - frameskip: 1 # no frameskip - double_q: false - dueling: false - num_atoms: 1 - noisy: false - n_step: 3 - lr: .0001 - adam_epsilon: .00015 - hiddens: [512] - exploration_config: - epsilon_timesteps: 200000 - final_epsilon: 0.01 - replay_buffer_config: - type: MultiAgentPrioritizedReplayBuffer - prioritized_replay_alpha: 0.5 - capacity: 1000000 - num_gpus: 1 - num_env_runners: 8 - num_envs_per_env_runner: 8 - rollout_fragment_length: 20 - train_batch_size: 512 - target_network_update_freq: 50000 - min_sample_timesteps_per_iteration: 25000 -atari-a2c: - env: ale_py:ALE/Breakout-v5 - run: A2C - num_samples: 4 - stop: - time_total_s: 3600 - config: - env_config: - frameskip: 1 # no frameskip - rollout_fragment_length: 20 - clip_rewards: True - num_env_runners: 5 - num_envs_per_env_runner: 5 - num_gpus: 1 - lr_schedule: [ - [0, 0.0007], - [20000000, 0.000000000001], - ] -atari-basic-dqn: - env: ale_py:ALE/Breakout-v5 - run: DQN - num_samples: 4 - stop: - time_total_s: 3600 - config: - env_config: - frameskip: 1 # no frameskip - double_q: false - dueling: false - num_atoms: 1 - noisy: false - replay_buffer_config: - type: MultiAgentReplayBuffer - capacity: 1000000 - num_steps_sampled_before_learning_starts: 20000 - n_step: 1 - target_network_update_freq: 8000 - lr: .0000625 - adam_epsilon: .00015 - hiddens: [512] - rollout_fragment_length: 4 - train_batch_size: 32 - exploration_config: - epsilon_timesteps: 200000 - final_epsilon: 0.01 - num_gpus: 0.2 - min_sample_timesteps_per_iteration: 10000 diff --git a/rllib/tuned_examples/dreamerv3/__init__.py b/rllib/tuned_examples/dreamerv3/__init__.py deleted file mode 100644 index e69de29bb2d1..000000000000 diff --git a/rllib/tuned_examples/ppo/benchmark_ppo_mujoco_pb2.py b/rllib/tuned_examples/ppo/benchmark_ppo_mujoco_pb2.py deleted file mode 100644 index 51e9d2d2b3ef..000000000000 --- a/rllib/tuned_examples/ppo/benchmark_ppo_mujoco_pb2.py +++ /dev/null @@ -1,172 +0,0 @@ -import time - -from ray import tune -from ray.rllib.algorithms.ppo.ppo import PPOConfig -from ray.rllib.utils.metrics import NUM_ENV_STEPS_SAMPLED_LIFETIME -from ray.tune.schedulers.pb2 import PB2 - -# Needs the following packages to be installed on Ubuntu: -# sudo apt-get libosmesa-dev -# sudo apt-get install patchelf -# python -m pip install "gymnasium[mujoco]" -# Might need to be added to bashsrc: -# export MUJOCO_GL=osmesa" -# export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$HOME/.mujoco/mujoco200/bin" - -# See the following links for becnhmark results of other libraries: -# Original paper: https://arxiv.org/abs/1812.05905 -# CleanRL: https://wandb.ai/cleanrl/cleanrl.benchmark/reports/Mujoco--VmlldzoxODE0NjE -# AgileRL: https://github.com/AgileRL/AgileRL?tab=readme-ov-file#benchmarks -benchmark_envs = { - "HalfCheetah-v4": { - f"{NUM_ENV_STEPS_SAMPLED_LIFETIME}": 1000000, - }, - "Hopper-v4": { - f"{NUM_ENV_STEPS_SAMPLED_LIFETIME}": 1000000, - }, - "InvertedPendulum-v4": { - f"{NUM_ENV_STEPS_SAMPLED_LIFETIME}": 1000000, - }, - "InvertedDoublePendulum-v4": { - f"{NUM_ENV_STEPS_SAMPLED_LIFETIME}": 1000000, - }, - "Reacher-v4": {f"{NUM_ENV_STEPS_SAMPLED_LIFETIME}": 1000000}, - "Swimmer-v4": {f"{NUM_ENV_STEPS_SAMPLED_LIFETIME}": 1000000}, - "Walker2d-v4": { - f"{NUM_ENV_STEPS_SAMPLED_LIFETIME}": 1000000, - }, -} - -pb2_scheduler = PB2( - time_attr=f"{NUM_ENV_STEPS_SAMPLED_LIFETIME}", - metric="env_runners/episode_return_mean", - mode="max", - perturbation_interval=50000, - # Copy bottom % with top % weights. - quantile_fraction=0.25, - hyperparam_bounds={ - "lr": [1e-5, 1e-3], - "gamma": [0.95, 0.99], - "lambda": [0.97, 1.0], - "entropy_coeff": [0.0, 0.01], - "vf_loss_coeff": [0.01, 1.0], - "clip_param": [0.1, 0.3], - "kl_target": [0.01, 0.03], - "minibatch_size": [512, 4096], - "num_epochs": [6, 32], - "vf_share_layers": [False, True], - "use_kl_loss": [False, True], - "kl_coeff": [0.1, 0.4], - "vf_clip_param": [10.0, float("inf")], - "grad_clip": [40, 200], - }, -) - -experiment_start_time = time.time() -# Following the paper. -num_rollout_workers = 32 -for env, stop_criteria in benchmark_envs.items(): - hp_trial_start_time = time.time() - config = ( - PPOConfig() - .environment(env=env) - .env_runners( - rollout_fragment_length=1, - num_env_runners=num_rollout_workers, - # TODO (sven, simon): Add resources. - ) - .learners( - # Let's start with a small number of learner workers and - # add later a tune grid search for these resources. - # TODO (simon): Either add tune grid search here or make - # an extra script to only test scalability. - num_learners=1, - num_gpus_per_learner=1, - ) - # TODO (simon): Adjust to new model_config_dict. - .training( - lr=tune.uniform(1e-5, 1e-3), - gamma=tune.uniform(0.95, 0.99), - lambda_=tune.uniform(0.97, 1.0), - entropy_coeff=tune.choice([0.0, 0.01]), - vf_loss_coeff=tune.uniform(0.01, 1.0), - clip_param=tune.uniform(0.1, 0.3), - kl_target=tune.uniform(0.01, 0.03), - minibatch_size=tune.choice([512, 1024, 2048, 4096]), - num_epochs=tune.randint(6, 32), - vf_share_layers=tune.choice([True, False]), - use_kl_loss=tune.choice([True, False]), - kl_coeff=tune.uniform(0.1, 0.4), - vf_clip_param=tune.choice([10.0, 40.0, float("inf")]), - grad_clip=tune.choice([None, 40, 100, 200]), - train_batch_size=tune.sample_from( - lambda spec: spec.config["minibatch_size"] * num_rollout_workers - ), - model={ - "fcnet_hiddens": [64, 64], - "fcnet_activation": "tanh", - "vf_share_layers": True, - }, - ) - .reporting( - metrics_num_episodes_for_smoothing=5, - min_sample_timesteps_per_iteration=1000, - ) - .evaluation( - evaluation_duration="auto", - evaluation_interval=1, - evaluation_num_env_runners=1, - evaluation_parallel_to_training=True, - evaluation_config={ - # PPO learns stochastic policy. - "explore": False, - }, - ) - ) - - tuner = tune.Tuner( - "PPO", - param_space=config, - run_config=tune.RunConfig( - stop=stop_criteria, - name="benchmark_ppo_mujoco_pb2_" + env, - ), - tune_config=tune.TuneConfig( - scheduler=pb2_scheduler, - num_samples=8, - ), - ) - result_grid = tuner.fit() - best_result = result_grid.get_best_result() - print( - f"Finished running HP search for (env={env}) in " - f"{time.time() - hp_trial_start_time} seconds." - ) - print(f"Best result for {env}: {best_result}") - print(f"Best config for {env}: {best_result['config']}") - - # Run again with the best config. - best_trial_start_time = time.time() - tuner = tune.Tuner( - "PPO", - param_space=best_result.config, - run_config=tune.RunConfig( - stop=stop_criteria, - name="benchmark_ppo_mujoco_pb2_" + env + "_best", - ), - ) - print(f"Running best config for (env={env})...") - tuner.fit() - print( - f"Finished running best config for (env={env}) " - f"in {time.time() - best_trial_start_time} seconds." - ) - -print( - f"Finished running HP search on all MuJoCo benchmarks in " - f"{time.time() - experiment_start_time} seconds." -) -print( - "Results from running the best configs can be found in the " - "`benchmark_ppo_mujoco_pb2__best` directories." -) diff --git a/rllib/tuned_examples/sac/benchmark_sac_mujoco_pb2.py b/rllib/tuned_examples/sac/benchmark_sac_mujoco_pb2.py deleted file mode 100644 index f768dddf03b0..000000000000 --- a/rllib/tuned_examples/sac/benchmark_sac_mujoco_pb2.py +++ /dev/null @@ -1,165 +0,0 @@ -import time - -from ray import tune -from ray.rllib.algorithms.sac.sac import SACConfig -from ray.rllib.utils.metrics import ( - ENV_RUNNER_RESULTS, - EPISODE_RETURN_MEAN, - NUM_ENV_STEPS_SAMPLED_LIFETIME, -) -from ray.tune.schedulers.pb2 import PB2 - -# Needs the following packages to be installed on Ubuntu: -# sudo apt-get libosmesa-dev -# sudo apt-get install patchelf -# python -m pip install "gymnasium[mujoco]" -# Might need to be added to bashsrc: -# export MUJOCO_GL=osmesa" -# export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$HOME/.mujoco/mujoco200/bin" - -# See the following links for becnhmark results of other libraries: -# Original paper: https://arxiv.org/abs/1812.05905 -# CleanRL: https://wandb.ai/cleanrl/cleanrl.benchmark/reports/Mujoco--VmlldzoxODE0NjE -# AgileRL: https://github.com/AgileRL/AgileRL?tab=readme-ov-file#benchmarks -benchmark_envs = { - "HalfCheetah-v4": { - f"{NUM_ENV_STEPS_SAMPLED_LIFETIME}": 3000000, - }, - "Hopper-v4": { - f"{NUM_ENV_STEPS_SAMPLED_LIFETIME}": 1000000, - }, - "Humanoid-v4": { - f"{NUM_ENV_STEPS_SAMPLED_LIFETIME}": 10000000, - }, - "Ant-v4": {f"{NUM_ENV_STEPS_SAMPLED_LIFETIME}": 3000000}, - "Walker2d-v4": { - f"{NUM_ENV_STEPS_SAMPLED_LIFETIME}": 3000000, - }, -} - -pb2_scheduler = PB2( - time_attr=NUM_ENV_STEPS_SAMPLED_LIFETIME, - metric=f"{ENV_RUNNER_RESULTS}/{EPISODE_RETURN_MEAN}", - mode="max", - perturbation_interval=50000, - # Copy bottom % with top % weights. - quantile_fraction=0.25, - hyperparam_bounds={ - "actor_lr": [1e-5, 1e-3], - "critic_lr": [1e-6, 1e-4], - "alpha_lr": [1e-6, 1e-3], - "gamma": [0.95, 0.99], - "n_step": [1, 3], - "initial_alpha": [1.0, 1.5], - "tau": [0.001, 0.1], - "target_entropy": [-10, -1], - "train_batch_size": [128, 512], - "target_network_update_freq": [1, 4], - }, -) - -experiment_start_time = time.time() -for env, stop_criteria in benchmark_envs.items(): - hp_trial_start_time = time.time() - config = ( - SACConfig() - .environment(env=env) - .env_runners( - rollout_fragment_length="auto", - num_env_runners=1, - # TODO (sven, simon): Add resources. - ) - .learners( - # Note, we have a small batch and a sample/train ratio - # of 1:1, so a single GPU should be enough. - num_learners=1, - num_gpus_per_learner=1, - ) - # TODO (simon): Adjust to new model_config_dict. - .training( - initial_alpha=tune.choice([1.0, 1.5]), - actor_lr=tune.uniform(1e-5, 1e-3), - critic_lr=tune.uniform([1e-6, 1e-4]), - alpha_lr=tune.uniform([1e-6, 1e-3]), - target_entropy=tune.choice([-10, -5, -1, "auto"]), - n_step=tune.choice([1, 3, (1, 3)]), - tau=tune.uniform(0.001, 0.1), - train_batch_size=tune.choice([128, 256, 512]), - target_network_update_freq=tune.choice([1, 2, 4]), - replay_buffer_config={ - "type": "PrioritizedEpisodeReplayBuffer", - "capacity": 1000000, - "alpha": 0.6, - "beta": 0.4, - }, - num_steps_sampled_before_learning_starts=256, - model={ - "fcnet_hiddens": [256, 256], - "fcnet_activation": "relu", - "post_fcnet_hiddens": [], - "post_fcnet_activation": None, - "post_fcnet_weights_initializer": "orthogonal_", - "post_fcnet_weights_initializer_config": {"gain": 0.01}, - }, - ) - .reporting( - metrics_num_episodes_for_smoothing=5, - min_sample_timesteps_per_iteration=1000, - ) - .evaluation( - evaluation_duration="auto", - evaluation_interval=1, - evaluation_num_env_runners=1, - evaluation_parallel_to_training=True, - evaluation_config={ - "explore": False, - }, - ) - ) - - tuner = tune.Tuner( - "SAC", - param_space=config, - run_config=tune.RunConfig( - stop=stop_criteria, - name="benchmark_sac_mujoco_pb2_" + env, - ), - tune_config=tune.TuneConfig( - scheduler=pb2_scheduler, - num_samples=8, - ), - ) - result_grid = tuner.fit() - best_result = result_grid.get_best_result() - print( - f"Finished running HP search for (env={env}) in " - f"{time.time() - hp_trial_start_time} seconds." - ) - print(f"Best result for {env}: {best_result}") - print(f"Best config for {env}: {best_result['config']}") - - # Run again with the best config. - best_trial_start_time = time.time() - tuner = tune.Tuner( - "SAC", - param_space=best_result.config, - run_config=tune.RunConfig( - stop=stop_criteria, - name="benchmark_sac_mujoco_pb2_" + env + "_best", - ), - ) - print(f"Running best config for (env={env})...") - tuner.fit() - print( - f"Finished running best config for (env={env}) " - f"in {time.time() - best_trial_start_time} seconds." - ) - -print( - f"Finished running HP search on all MuJoCo benchmarks in " - f"{time.time() - experiment_start_time} seconds." -) -print( - "Results from running the best configs can be found in the " - "`benchmark_sac_mujoco_pb2__best` directories." -) From b89a7af70cf837b8aa63ecf036962daed7d96f37 Mon Sep 17 00:00:00 2001 From: Mark Towers Date: Fri, 21 Nov 2025 17:13:40 +0000 Subject: [PATCH 2/9] Update BUILD.bazel for tuned-examples new location Signed-off-by: Mark Towers --- rllib/BUILD.bazel | 368 +++++++++++++++++++++++----------------------- 1 file changed, 183 insertions(+), 185 deletions(-) diff --git a/rllib/BUILD.bazel b/rllib/BUILD.bazel index 15da5de0d454..df13bc2836e0 100644 --- a/rllib/BUILD.bazel +++ b/rllib/BUILD.bazel @@ -72,8 +72,6 @@ doctest( "**/examples/**", "**/tests/**", "**/test_*.py", - # Exclude `tuned_examples` *.py files. - "**/tuned_examples/**", # Deprecated modules "utils/window_stat.py", "utils/timer.py", @@ -161,7 +159,7 @@ py_test( # Tag: learning_tests # # This will test python/yaml config files -# inside rllib/tuned_examples/[algo-name] for actual learning success. +# inside rllib/examples/algorithm/[algo-name] for actual learning success. # -------------------------------------------------------------------- # APPO @@ -169,13 +167,13 @@ py_test( py_test( name = "learning_tests_cartpole_appo", size = "large", - srcs = ["tuned_examples/appo/cartpole_appo.py"], + srcs = ["examples/algorithms/appo/cartpole_appo.py"], args = [ "--as-test", "--num-cpus=7", "--num-env-runners=5", ], - main = "tuned_examples/appo/cartpole_appo.py", + main = "examples/algorithms/appo/cartpole_appo.py", tags = [ "exclusive", "learning_tests", @@ -188,23 +186,23 @@ py_test( # TODO (sven): For some weird reason, this test runs extremely slow on the CI (not on cluster, not locally) -> taking this out for now ... # py_test( # name = "learning_tests_cartpole_appo_gpu", -# main = "tuned_examples/appo/cartpole_appo.py", +# main = "examples/algorithms/appo/cartpole_appo.py", # tags = ["team:rllib", "exclusive", "learning_tests", "torch_only", "learning_tests_discrete", "learning_tests_pytorch_use_all_core", "gpu"], # size = "large", -# srcs = ["tuned_examples/appo/cartpole_appo.py"], +# srcs = ["examples/algorithms/appo/cartpole_appo.py"], # args = ["--as-test", "--num-gpus-per-learner=1", "--num-cpus=7", "--num-env-runners=5"] # ) py_test( name = "learning_tests_cartpole_appo_multi_cpu", size = "large", - srcs = ["tuned_examples/appo/cartpole_appo.py"], + srcs = ["examples/algorithms/appo/cartpole_appo.py"], args = [ "--as-test", "--num-learners=2", "--num-cpus=9", "--num-env-runners=6", ], - main = "tuned_examples/appo/cartpole_appo.py", + main = "examples/algorithms/appo/cartpole_appo.py", tags = [ "exclusive", "learning_tests", @@ -218,7 +216,7 @@ py_test( py_test( name = "learning_tests_cartpole_appo_multi_gpu", size = "large", - srcs = ["tuned_examples/appo/cartpole_appo.py"], + srcs = ["examples/algorithms/appo/cartpole_appo.py"], args = [ "--as-test", "--num-learners=2", @@ -226,7 +224,7 @@ py_test( "--num-cpus=7", "--num-env-runners=6", ], - main = "tuned_examples/appo/cartpole_appo.py", + main = "examples/algorithms/appo/cartpole_appo.py", tags = [ "exclusive", "learning_tests", @@ -242,14 +240,14 @@ py_test( py_test( name = "learning_tests_multi_agent_cartpole_appo", size = "large", - srcs = ["tuned_examples/appo/multi_agent_cartpole_appo.py"], + srcs = ["examples/algorithms/appo/multi_agent_cartpole_appo.py"], args = [ "--as-test", "--num-agents=2", "--num-cpus=8", "--num-env-runners=6", ], - main = "tuned_examples/appo/multi_agent_cartpole_appo.py", + main = "examples/algorithms/appo/multi_agent_cartpole_appo.py", tags = [ "exclusive", "learning_tests", @@ -262,7 +260,7 @@ py_test( py_test( name = "learning_tests_multi_agent_cartpole_appo_gpu", size = "large", - srcs = ["tuned_examples/appo/multi_agent_cartpole_appo.py"], + srcs = ["examples/algorithms/appo/multi_agent_cartpole_appo.py"], args = [ "--as-test", "--num-agents=2", @@ -270,7 +268,7 @@ py_test( "--num-cpus=7", "--num-env-runners=5", ], - main = "tuned_examples/appo/multi_agent_cartpole_appo.py", + main = "examples/algorithms/appo/multi_agent_cartpole_appo.py", tags = [ "exclusive", "gpu", @@ -285,7 +283,7 @@ py_test( py_test( name = "learning_tests_multi_agent_cartpole_appo_multi_cpu", size = "large", - srcs = ["tuned_examples/appo/multi_agent_cartpole_appo.py"], + srcs = ["examples/algorithms/appo/multi_agent_cartpole_appo.py"], args = [ "--as-test", "--num-agents=2", @@ -293,7 +291,7 @@ py_test( "--num-cpus=9", "--num-env-runners=6", ], - main = "tuned_examples/appo/multi_agent_cartpole_appo.py", + main = "examples/algorithms/appo/multi_agent_cartpole_appo.py", tags = [ "exclusive", "learning_tests", @@ -309,7 +307,7 @@ py_test( py_test( name = "learning_tests_multi_agent_cartpole_appo_multi_gpu", size = "large", - srcs = ["tuned_examples/appo/multi_agent_cartpole_appo.py"], + srcs = ["examples/algorithms/appo/multi_agent_cartpole_appo.py"], args = [ "--as-test", "--num-agents=2", @@ -318,7 +316,7 @@ py_test( "--num-cpus=7", "--num-env-runners=6", ], - main = "tuned_examples/appo/multi_agent_cartpole_appo.py", + main = "examples/algorithms/appo/multi_agent_cartpole_appo.py", tags = [ "exclusive", "learning_tests", @@ -334,13 +332,13 @@ py_test( py_test( name = "learning_tests_stateless_cartpole_appo", size = "large", - srcs = ["tuned_examples/appo/stateless_cartpole_appo.py"], + srcs = ["examples/algorithms/appo/stateless_cartpole_appo.py"], args = [ "--as-test", "--num-cpus=8", "--num-env-runners=6", ], - main = "tuned_examples/appo/stateless_cartpole_appo.py", + main = "examples/algorithms/appo/stateless_cartpole_appo.py", tags = [ "exclusive", "learning_tests", @@ -354,7 +352,7 @@ py_test( py_test( name = "learning_tests_stateless_cartpole_appo_gpu", size = "large", - srcs = ["tuned_examples/appo/stateless_cartpole_appo.py"], + srcs = ["examples/algorithms/appo/stateless_cartpole_appo.py"], args = [ "--as-test", "--num-agents=2", @@ -362,7 +360,7 @@ py_test( "--num-cpus=7", "--num-env-runners=5", ], - main = "tuned_examples/appo/stateless_cartpole_appo.py", + main = "examples/algorithms/appo/stateless_cartpole_appo.py", tags = [ "exclusive", "gpu", @@ -377,14 +375,14 @@ py_test( py_test( name = "learning_tests_stateless_cartpole_appo_multi_cpu", size = "large", - srcs = ["tuned_examples/appo/stateless_cartpole_appo.py"], + srcs = ["examples/algorithms/appo/stateless_cartpole_appo.py"], args = [ "--as-test", "--num-learners=2", "--num-cpus=9", "--num-env-runners=6", ], - main = "tuned_examples/appo/stateless_cartpole_appo.py", + main = "examples/algorithms/appo/stateless_cartpole_appo.py", tags = [ "exclusive", "learning_tests", @@ -398,7 +396,7 @@ py_test( py_test( name = "learning_tests_stateless_cartpole_appo_multi_gpu", size = "large", - srcs = ["tuned_examples/appo/stateless_cartpole_appo.py"], + srcs = ["examples/algorithms/appo/stateless_cartpole_appo.py"], args = [ "--as-test", "--num-learners=2", @@ -406,7 +404,7 @@ py_test( "--num-cpus=7", "--num-env-runners=6", ], - main = "tuned_examples/appo/stateless_cartpole_appo.py", + main = "examples/algorithms/appo/stateless_cartpole_appo.py", tags = [ "exclusive", "learning_tests", @@ -421,47 +419,47 @@ py_test( # MultiAgentStatelessCartPole # py_test( # name = "learning_tests_multi_agent_stateless_cartpole_appo", -# main = "tuned_examples/appo/multi_agent_stateless_cartpole_appo.py", +# main = "examples/algorithms/appo/multi_agent_stateless_cartpole_appo.py", # tags = ["team:rllib", "exclusive", "learning_tests", "torch_only", "learning_tests_discrete", "learning_tests_pytorch_use_all_core"], # size = "large", -# srcs = ["tuned_examples/appo/multi_agent_stateless_cartpole_appo.py"], +# srcs = ["examples/algorithms/appo/multi_agent_stateless_cartpole_appo.py"], # args = ["--as-test"] # ) # py_test( # name = "learning_tests_multi_agent_stateless_cartpole_appo_gpu", -# main = "tuned_examples/appo/multi_agent_stateless_cartpole_appo.py", +# main = "examples/algorithms/appo/multi_agent_stateless_cartpole_appo.py", # tags = ["team:rllib", "exclusive", "learning_tests", "torch_only", "learning_tests_discrete", "learning_tests_pytorch_use_all_core", "gpu"], # size = "large", -# srcs = ["tuned_examples/appo/multi_agent_stateless_cartpole_appo.py"], +# srcs = ["examples/algorithms/appo/multi_agent_stateless_cartpole_appo.py"], # args = ["--as-test", "--num-agents=2", "--num-gpus-per-learner=1"] # ) # py_test( # name = "learning_tests_multi_agent_stateless_cartpole_appo_multi_cpu", -# main = "tuned_examples/appo/multi_agent_stateless_cartpole_appo.py", +# main = "examples/algorithms/appo/multi_agent_stateless_cartpole_appo.py", # tags = ["team:rllib", "exclusive", "learning_tests", "torch_only", "learning_tests_discrete", "learning_tests_pytorch_use_all_core"], # size = "large", -# srcs = ["tuned_examples/appo/multi_agent_stateless_cartpole_appo.py"], +# srcs = ["examples/algorithms/appo/multi_agent_stateless_cartpole_appo.py"], # args = ["--as-test", "--num-learners=2"] # ) # py_test( # name = "learning_tests_multi_agent_stateless_cartpole_appo_multi_gpu", -# main = "tuned_examples/appo/multi_agent_stateless_cartpole_appo.py", +# main = "examples/algorithms/appo/multi_agent_stateless_cartpole_appo.py", # tags = ["team:rllib", "exclusive", "learning_tests", "torch_only", "learning_tests_discrete", "learning_tests_pytorch_use_all_core", "multi_gpu"], # size = "large", -# srcs = ["tuned_examples/appo/multi_agent_stateless_cartpole_appo.py"], +# srcs = ["examples/algorithms/appo/multi_agent_stateless_cartpole_appo.py"], # args = ["--as-test", "--num-learners=2", "--num-gpus-per-learner=1"] # ) # Pendulum py_test( name = "learning_tests_pendulum_appo", size = "large", - srcs = ["tuned_examples/appo/pendulum_appo.py"], + srcs = ["examples/algorithms/appo/pendulum_appo.py"], args = [ "--as-test", "--num-cpus=6", "--num-env-runners=4", ], - main = "tuned_examples/appo/pendulum_appo.py", + main = "examples/algorithms/appo/pendulum_appo.py", tags = [ "exclusive", "learning_tests", @@ -475,7 +473,7 @@ py_test( py_test( name = "learning_tests_multi_agent_pong_appo_multi_gpu", size = "large", - srcs = ["tuned_examples/appo/multi_agent_pong_appo.py"], + srcs = ["examples/algorithms/appo/multi_agent_pong_appo.py"], args = [ "--stop-iters=3", "--num-agents=2", @@ -483,7 +481,7 @@ py_test( "--num-gpus-per-learner=1", "--num-aggregator-actors-per-learner=1", ], - main = "tuned_examples/appo/multi_agent_pong_appo.py", + main = "examples/algorithms/appo/multi_agent_pong_appo.py", tags = [ "exclusive", "learning_tests", @@ -499,8 +497,8 @@ py_test( name = "learning_tests_multi_agent_cartpole_w_100_policies_appo_old_api_stack", size = "large", srcs = ["tests/run_regression_tests.py"], - args = ["--dir=tuned_examples/appo"], - data = ["tuned_examples/appo/multi-agent-cartpole-w-100-policies-appo.py"], + args = ["--dir=examples/algorithms/appo"], + data = ["examples/algorithms/appo/multi-agent-cartpole-w-100-policies-appo.py"], main = "tests/run_regression_tests.py", tags = [ "exclusive", @@ -516,7 +514,7 @@ py_test( py_test( name = "learning_tests_cartpole_bc", size = "medium", - srcs = ["tuned_examples/bc/cartpole_bc.py"], + srcs = ["examples/algorithms/bc/cartpole_bc.py"], args = [ "--as-test", ], @@ -524,7 +522,7 @@ py_test( data = [ "tests/data/cartpole/cartpole-v1_large", ], - main = "tuned_examples/bc/cartpole_bc.py", + main = "examples/algorithms/bc/cartpole_bc.py", tags = [ "exclusive", "learning_tests", @@ -538,7 +536,7 @@ py_test( py_test( name = "learning_tests_cartpole_bc_gpu", size = "medium", - srcs = ["tuned_examples/bc/cartpole_bc.py"], + srcs = ["examples/algorithms/bc/cartpole_bc.py"], args = [ "--as-test", "--num-gpus-per-learner=1", @@ -547,7 +545,7 @@ py_test( data = [ "tests/data/cartpole/cartpole-v1_large", ], - main = "tuned_examples/bc/cartpole_bc.py", + main = "examples/algorithms/bc/cartpole_bc.py", tags = [ "exclusive", "gpu", @@ -564,7 +562,7 @@ py_test( py_test( name = "learning_tests_cartpole_bc_with_offline_evaluation", size = "medium", - srcs = ["tuned_examples/bc/cartpole_bc_with_offline_evaluation.py"], + srcs = ["examples/algorithms/bc/cartpole_bc_with_offline_evaluation.py"], args = [ "--as-test", "--offline-evaluation-interval=1", @@ -574,7 +572,7 @@ py_test( data = [ "tests/data/cartpole/cartpole-v1_large", ], - main = "tuned_examples/bc/cartpole_bc_with_offline_evaluation.py", + main = "examples/algorithms/bc/cartpole_bc_with_offline_evaluation.py", tags = [ "exclusive", "learning_tests", @@ -588,7 +586,7 @@ py_test( py_test( name = "learning_tests_cartpole_bc_with_offline_evaluation_gpu", size = "medium", - srcs = ["tuned_examples/bc/cartpole_bc_with_offline_evaluation.py"], + srcs = ["examples/algorithms/bc/cartpole_bc_with_offline_evaluation.py"], args = [ "--as-test", "--num-gpus-per-learner=1", @@ -600,7 +598,7 @@ py_test( data = [ "tests/data/cartpole/cartpole-v1_large", ], - main = "tuned_examples/bc/cartpole_bc_with_offline_evaluation.py", + main = "examples/algorithms/bc/cartpole_bc_with_offline_evaluation.py", tags = [ "exclusive", "learning_tests", @@ -617,7 +615,7 @@ py_test( py_test( name = "learning_tests_pendulum_cql", size = "large", - srcs = ["tuned_examples/cql/pendulum_cql.py"], + srcs = ["examples/algorithms/cql/pendulum_cql.py"], args = [ "--as-test", ], @@ -625,7 +623,7 @@ py_test( data = [ "tests/data/pendulum/pendulum-v1_enormous", ], - main = "tuned_examples/cql/pendulum_cql.py", + main = "examples/algorithms/cql/pendulum_cql.py", tags = [ "exclusive", "learning_tests", @@ -643,7 +641,7 @@ py_test( py_test( name = "learning_tests_pendulum_cql_gpu", size = "large", - srcs = ["tuned_examples/cql/pendulum_cql.py"], + srcs = ["examples/algorithms/cql/pendulum_cql.py"], args = [ "--as-test", "--num-gpus-per-learner=1", @@ -652,7 +650,7 @@ py_test( data = [ "tests/data/pendulum/pendulum-v1_enormous", ], - main = "tuned_examples/cql/pendulum_cql.py", + main = "examples/algorithms/cql/pendulum_cql.py", tags = [ "exclusive", "gpu", @@ -672,11 +670,11 @@ py_test( py_test( name = "learning_tests_cartpole_dqn", size = "large", - srcs = ["tuned_examples/dqn/cartpole_dqn.py"], + srcs = ["examples/algorithms/dqn/cartpole_dqn.py"], args = [ "--as-test", ], - main = "tuned_examples/dqn/cartpole_dqn.py", + main = "examples/algorithms/dqn/cartpole_dqn.py", tags = [ "exclusive", "learning_tests", @@ -690,13 +688,13 @@ py_test( py_test( name = "learning_tests_cartpole_dqn_gpu", size = "large", - srcs = ["tuned_examples/dqn/cartpole_dqn.py"], + srcs = ["examples/algorithms/dqn/cartpole_dqn.py"], args = [ "--as-test", "--num-learners=1", "--num-gpus-per-learner=1", ], - main = "tuned_examples/dqn/cartpole_dqn.py", + main = "examples/algorithms/dqn/cartpole_dqn.py", tags = [ "exclusive", "gpu", @@ -711,12 +709,12 @@ py_test( py_test( name = "learning_tests_cartpole_dqn_multi_cpu", size = "large", - srcs = ["tuned_examples/dqn/cartpole_dqn.py"], + srcs = ["examples/algorithms/dqn/cartpole_dqn.py"], args = [ "--as-test", "--num-learners=2", ], - main = "tuned_examples/dqn/cartpole_dqn.py", + main = "examples/algorithms/dqn/cartpole_dqn.py", tags = [ "exclusive", "learning_tests", @@ -730,13 +728,13 @@ py_test( py_test( name = "learning_tests_cartpole_dqn_multi_gpu", size = "large", - srcs = ["tuned_examples/dqn/cartpole_dqn.py"], + srcs = ["examples/algorithms/dqn/cartpole_dqn.py"], args = [ "--as-test", "--num-learners=2", "--num-gpus-per-learner=1", ], - main = "tuned_examples/dqn/cartpole_dqn.py", + main = "examples/algorithms/dqn/cartpole_dqn.py", tags = [ "exclusive", "learning_tests", @@ -754,13 +752,13 @@ py_test( py_test( name = "learning_tests_multi_agent_cartpole_dqn", size = "large", - srcs = ["tuned_examples/dqn/multi_agent_cartpole_dqn.py"], + srcs = ["examples/algorithms/dqn/multi_agent_cartpole_dqn.py"], args = [ "--as-test", "--num-agents=2", "--num-cpus=4", ], - main = "tuned_examples/dqn/multi_agent_cartpole_dqn.py", + main = "examples/algorithms/dqn/multi_agent_cartpole_dqn.py", tags = [ "exclusive", "learning_tests", @@ -774,7 +772,7 @@ py_test( py_test( name = "learning_tests_multi_agent_cartpole_dqn_gpu", size = "large", - srcs = ["tuned_examples/dqn/multi_agent_cartpole_dqn.py"], + srcs = ["examples/algorithms/dqn/multi_agent_cartpole_dqn.py"], args = [ "--as-test", "--num-agents=2", @@ -782,7 +780,7 @@ py_test( "--num-learners=1", "--num-gpus-per-learner=1", ], - main = "tuned_examples/dqn/multi_agent_cartpole_dqn.py", + main = "examples/algorithms/dqn/multi_agent_cartpole_dqn.py", tags = [ "exclusive", "gpu", @@ -797,14 +795,14 @@ py_test( py_test( name = "learning_tests_multi_agent_cartpole_dqn_multi_cpu", size = "large", - srcs = ["tuned_examples/dqn/multi_agent_cartpole_dqn.py"], + srcs = ["examples/algorithms/dqn/multi_agent_cartpole_dqn.py"], args = [ "--as-test", "--num-agents=2", "--num-cpus=5", "--num-learners=2", ], - main = "tuned_examples/dqn/multi_agent_cartpole_dqn.py", + main = "examples/algorithms/dqn/multi_agent_cartpole_dqn.py", tags = [ "exclusive", "learning_tests", @@ -818,7 +816,7 @@ py_test( py_test( name = "learning_tests_multi_agent_cartpole_dqn_multi_gpu", size = "large", - srcs = ["tuned_examples/dqn/multi_agent_cartpole_dqn.py"], + srcs = ["examples/algorithms/dqn/multi_agent_cartpole_dqn.py"], args = [ "--as-test", "--num-agents=2", @@ -826,7 +824,7 @@ py_test( "--num-learners=2", "--num-gpus-per-learner=1", ], - main = "tuned_examples/dqn/multi_agent_cartpole_dqn.py", + main = "examples/algorithms/dqn/multi_agent_cartpole_dqn.py", tags = [ "exclusive", "learning_tests", @@ -844,13 +842,13 @@ py_test( # py_test( # name = "learning_tests_pendulum_dreamerv3_gpu", # size = "large", -# srcs = ["tuned_examples/dreamerv3/pendulum_dreamerv3.py"], +# srcs = ["examples/algorithms/dreamerv3/pendulum_dreamerv3.py"], # args = [ # "--as-test", # "--num-gpus-per-learner=1", # "--num-learners=1", # ], -# main = "tuned_examples/marwil/cartpole_marwil.py", +# main = "examples/algorithms/marwil/cartpole_marwil.py", # tags = [ # "exclusive", # "gpu", @@ -867,11 +865,11 @@ py_test( py_test( name = "learning_tests_cartpole_impala", size = "large", - srcs = ["tuned_examples/impala/cartpole_impala.py"], + srcs = ["examples/algorithms/impala/cartpole_impala.py"], args = [ "--as-test", ], - main = "tuned_examples/impala/cartpole_impala.py", + main = "examples/algorithms/impala/cartpole_impala.py", tags = [ "exclusive", "learning_tests", @@ -884,12 +882,12 @@ py_test( py_test( name = "learning_tests_cartpole_impala_gpu", size = "large", - srcs = ["tuned_examples/impala/cartpole_impala.py"], + srcs = ["examples/algorithms/impala/cartpole_impala.py"], args = [ "--as-test", "--num-gpus-per-learner=1", ], - main = "tuned_examples/impala/cartpole_impala.py", + main = "examples/algorithms/impala/cartpole_impala.py", tags = [ "exclusive", "gpu", @@ -904,12 +902,12 @@ py_test( py_test( name = "learning_tests_cartpole_impala_multi_cpu", size = "large", - srcs = ["tuned_examples/impala/cartpole_impala.py"], + srcs = ["examples/algorithms/impala/cartpole_impala.py"], args = [ "--as-test", "--num-learners=2", ], - main = "tuned_examples/impala/cartpole_impala.py", + main = "examples/algorithms/impala/cartpole_impala.py", tags = [ "exclusive", "learning_tests", @@ -923,13 +921,13 @@ py_test( py_test( name = "learning_tests_cartpole_impala_multi_gpu", size = "large", - srcs = ["tuned_examples/impala/cartpole_impala.py"], + srcs = ["examples/algorithms/impala/cartpole_impala.py"], args = [ "--as-test", "--num-learners=2", "--num-gpus-per-learner=1", ], - main = "tuned_examples/impala/cartpole_impala.py", + main = "examples/algorithms/impala/cartpole_impala.py", tags = [ "exclusive", "learning_tests", @@ -945,13 +943,13 @@ py_test( py_test( name = "learning_tests_multi_agent_cartpole_impala", size = "large", - srcs = ["tuned_examples/impala/multi_agent_cartpole_impala.py"], + srcs = ["examples/algorithms/impala/multi_agent_cartpole_impala.py"], args = [ "--as-test", "--num-agents=2", "--num-cpus=6", ], - main = "tuned_examples/impala/multi_agent_cartpole_impala.py", + main = "examples/algorithms/impala/multi_agent_cartpole_impala.py", tags = [ "exclusive", "learning_tests", @@ -965,14 +963,14 @@ py_test( py_test( name = "learning_tests_multi_agent_cartpole_impala_gpu", size = "large", - srcs = ["tuned_examples/impala/multi_agent_cartpole_impala.py"], + srcs = ["examples/algorithms/impala/multi_agent_cartpole_impala.py"], args = [ "--as-test", "--num-agents=2", "--num-gpus-per-learner=1", "--num-cpus=6", ], - main = "tuned_examples/impala/multi_agent_cartpole_impala.py", + main = "examples/algorithms/impala/multi_agent_cartpole_impala.py", tags = [ "exclusive", "gpu", @@ -987,14 +985,14 @@ py_test( py_test( name = "learning_tests_multi_agent_cartpole_impala_multi_cpu", size = "large", - srcs = ["tuned_examples/impala/multi_agent_cartpole_impala.py"], + srcs = ["examples/algorithms/impala/multi_agent_cartpole_impala.py"], args = [ "--as-test", "--num-agents=2", "--num-learners=2", "--num-cpus=7", ], - main = "tuned_examples/impala/multi_agent_cartpole_impala.py", + main = "examples/algorithms/impala/multi_agent_cartpole_impala.py", tags = [ "exclusive", "learning_tests", @@ -1008,7 +1006,7 @@ py_test( py_test( name = "learning_tests_multi_agent_cartpole_impala_multi_gpu", size = "large", - srcs = ["tuned_examples/impala/multi_agent_cartpole_impala.py"], + srcs = ["examples/algorithms/impala/multi_agent_cartpole_impala.py"], args = [ "--as-test", "--num-agents=2", @@ -1016,7 +1014,7 @@ py_test( "--num-gpus-per-learner=1", "--num-cpus=7", ], - main = "tuned_examples/impala/multi_agent_cartpole_impala.py", + main = "examples/algorithms/impala/multi_agent_cartpole_impala.py", tags = [ "exclusive", "learning_tests", @@ -1032,11 +1030,11 @@ py_test( py_test( name = "learning_tests_stateless_cartpole_impala", size = "large", - srcs = ["tuned_examples/impala/stateless_cartpole_impala.py"], + srcs = ["examples/algorithms/impala/stateless_cartpole_impala.py"], args = [ "--as-test", ], - main = "tuned_examples/impala/stateless_cartpole_impala.py", + main = "examples/algorithms/impala/stateless_cartpole_impala.py", tags = [ "exclusive", "learning_tests", @@ -1050,13 +1048,13 @@ py_test( py_test( name = "learning_tests_stateless_cartpole_impala_multi_gpu", size = "large", - srcs = ["tuned_examples/impala/stateless_cartpole_impala.py"], + srcs = ["examples/algorithms/impala/stateless_cartpole_impala.py"], args = [ "--as-test", "--num-learners=2", "--num-gpus-per-learner=1", ], - main = "tuned_examples/impala/stateless_cartpole_impala.py", + main = "examples/algorithms/impala/stateless_cartpole_impala.py", tags = [ "exclusive", "learning_tests", @@ -1072,11 +1070,11 @@ py_test( py_test( name = "learning_tests_multi_agent_stateless_cartpole_impala", size = "large", - srcs = ["tuned_examples/impala/multi_agent_stateless_cartpole_impala.py"], + srcs = ["examples/algorithms/impala/multi_agent_stateless_cartpole_impala.py"], args = [ "--as-test", ], - main = "tuned_examples/impala/multi_agent_stateless_cartpole_impala.py", + main = "examples/algorithms/impala/multi_agent_stateless_cartpole_impala.py", tags = [ "exclusive", "learning_tests", @@ -1088,10 +1086,10 @@ py_test( ) # py_test( # name = "learning_tests_multi_agent_stateless_cartpole_impala_multi_gpu", -# main = "tuned_examples/impala/multi_agent_stateless_cartpole_impala.py", +# main = "examples/algorithms/impala/multi_agent_stateless_cartpole_impala.py", # tags = ["team:rllib", "exclusive", "learning_tests", "torch_only", "learning_tests_discrete", "learning_tests_pytorch_use_all_core", "multi_gpu"], # size = "large", -# srcs = ["tuned_examples/impala/multi_agent_stateless_cartpole_impala.py"], +# srcs = ["examples/algorithms/impala/multi_agent_stateless_cartpole_impala.py"], # args = ["--as-test", "--num-learners=2", "--num-gpus-per-learner=1"] # ) @@ -1100,7 +1098,7 @@ py_test( py_test( name = "learning_tests_pendulum_iql", size = "large", - srcs = ["tuned_examples/iql/pendulum_iql.py"], + srcs = ["examples/algorithms/iql/pendulum_iql.py"], args = [ "--as-test", "--num-cpus=32", @@ -1109,7 +1107,7 @@ py_test( data = [ "tests/data/pendulum/pendulum-v1_enormous", ], - main = "tuned_examples/iql/pendulum_iql.py", + main = "examples/algorithms/iql/pendulum_iql.py", tags = [ "exclusive", "learning_tests", @@ -1124,7 +1122,7 @@ py_test( py_test( name = "learning_tests_pendulum_iql_gpu", size = "large", - srcs = ["tuned_examples/iql/pendulum_iql.py"], + srcs = ["examples/algorithms/iql/pendulum_iql.py"], args = [ "--as-test", "--num-cpus=32", @@ -1134,7 +1132,7 @@ py_test( data = [ "tests/data/pendulum/pendulum-v1_enormous", ], - main = "tuned_examples/iql/pendulum_iql.py", + main = "examples/algorithms/iql/pendulum_iql.py", tags = [ "exclusive", "gpu", @@ -1151,7 +1149,7 @@ py_test( py_test( name = "learning_tests_cartpole_marwil", size = "large", - srcs = ["tuned_examples/marwil/cartpole_marwil.py"], + srcs = ["examples/algorithms/marwil/cartpole_marwil.py"], args = [ "--as-test", ], @@ -1159,7 +1157,7 @@ py_test( data = [ "tests/data/cartpole/cartpole-v1_large", ], - main = "tuned_examples/marwil/cartpole_marwil.py", + main = "examples/algorithms/marwil/cartpole_marwil.py", tags = [ "exclusive", "learning_tests", @@ -1174,7 +1172,7 @@ py_test( py_test( name = "learning_tests_cartpole_marwil_gpu", size = "large", - srcs = ["tuned_examples/marwil/cartpole_marwil.py"], + srcs = ["examples/algorithms/marwil/cartpole_marwil.py"], args = [ "--as-test", "--num-gpus-per-learner=1", @@ -1183,7 +1181,7 @@ py_test( data = [ "tests/data/cartpole/cartpole-v1_large", ], - main = "tuned_examples/marwil/cartpole_marwil.py", + main = "examples/algorithms/marwil/cartpole_marwil.py", tags = [ "exclusive", "gpu", @@ -1200,11 +1198,11 @@ py_test( py_test( name = "learning_tests_cartpole_ppo", size = "large", - srcs = ["tuned_examples/ppo/cartpole_ppo.py"], + srcs = ["examples/algorithms/ppo/cartpole_ppo.py"], args = [ "--as-test", ], - main = "tuned_examples/ppo/cartpole_ppo.py", + main = "examples/algorithms/ppo/cartpole_ppo.py", tags = [ "exclusive", "learning_tests", @@ -1217,13 +1215,13 @@ py_test( py_test( name = "learning_tests_cartpole_ppo_gpu", size = "large", - srcs = ["tuned_examples/ppo/cartpole_ppo.py"], + srcs = ["examples/algorithms/ppo/cartpole_ppo.py"], args = [ "--as-test", "--num-learners=1", "--num-gpus-per-learner=1", ], - main = "tuned_examples/ppo/cartpole_ppo.py", + main = "examples/algorithms/ppo/cartpole_ppo.py", tags = [ "exclusive", "gpu", @@ -1238,12 +1236,12 @@ py_test( py_test( name = "learning_tests_cartpole_ppo_multi_cpu", size = "large", - srcs = ["tuned_examples/ppo/cartpole_ppo.py"], + srcs = ["examples/algorithms/ppo/cartpole_ppo.py"], args = [ "--as-test", "--num-learners=2", ], - main = "tuned_examples/ppo/cartpole_ppo.py", + main = "examples/algorithms/ppo/cartpole_ppo.py", tags = [ "exclusive", "learning_tests", @@ -1257,13 +1255,13 @@ py_test( py_test( name = "learning_tests_cartpole_ppo_multi_gpu", size = "large", - srcs = ["tuned_examples/ppo/cartpole_ppo.py"], + srcs = ["examples/algorithms/ppo/cartpole_ppo.py"], args = [ "--as-test", "--num-learners=2", "--num-gpus-per-learner=1", ], - main = "tuned_examples/ppo/cartpole_ppo.py", + main = "examples/algorithms/ppo/cartpole_ppo.py", tags = [ "exclusive", "learning_tests", @@ -1279,12 +1277,12 @@ py_test( py_test( name = "learning_tests_multi_agent_cartpole_ppo", size = "large", - srcs = ["tuned_examples/ppo/multi_agent_cartpole_ppo.py"], + srcs = ["examples/algorithms/ppo/multi_agent_cartpole_ppo.py"], args = [ "--as-test", "--num-agents=2", ], - main = "tuned_examples/ppo/multi_agent_cartpole_ppo.py", + main = "examples/algorithms/ppo/multi_agent_cartpole_ppo.py", tags = [ "exclusive", "learning_tests", @@ -1297,14 +1295,14 @@ py_test( py_test( name = "learning_tests_multi_agent_cartpole_ppo_gpu", size = "large", - srcs = ["tuned_examples/ppo/multi_agent_cartpole_ppo.py"], + srcs = ["examples/algorithms/ppo/multi_agent_cartpole_ppo.py"], args = [ "--as-test", "--num-agents=2", "--num-learners=1", "--num-gpus-per-learner=1", ], - main = "tuned_examples/ppo/multi_agent_cartpole_ppo.py", + main = "examples/algorithms/ppo/multi_agent_cartpole_ppo.py", tags = [ "exclusive", "gpu", @@ -1319,13 +1317,13 @@ py_test( py_test( name = "learning_tests_multi_agent_cartpole_ppo_multi_cpu", size = "large", - srcs = ["tuned_examples/ppo/multi_agent_cartpole_ppo.py"], + srcs = ["examples/algorithms/ppo/multi_agent_cartpole_ppo.py"], args = [ "--as-test", "--num-agents=2", "--num-learners=2", ], - main = "tuned_examples/ppo/multi_agent_cartpole_ppo.py", + main = "examples/algorithms/ppo/multi_agent_cartpole_ppo.py", tags = [ "exclusive", "learning_tests", @@ -1339,14 +1337,14 @@ py_test( py_test( name = "learning_tests_multi_agent_cartpole_ppo_multi_gpu", size = "large", - srcs = ["tuned_examples/ppo/multi_agent_cartpole_ppo.py"], + srcs = ["examples/algorithms/ppo/multi_agent_cartpole_ppo.py"], args = [ "--as-test", "--num-agents=2", "--num-learners=2", "--num-gpus-per-learner=1", ], - main = "tuned_examples/ppo/multi_agent_cartpole_ppo.py", + main = "examples/algorithms/ppo/multi_agent_cartpole_ppo.py", tags = [ "exclusive", "learning_tests", @@ -1362,11 +1360,11 @@ py_test( py_test( name = "learning_tests_cartpole_truncated_ppo", size = "large", - srcs = ["tuned_examples/ppo/cartpole_truncated_ppo.py"], + srcs = ["examples/algorithms/ppo/cartpole_truncated_ppo.py"], args = [ "--as-test", ], - main = "tuned_examples/ppo/cartpole_truncated_ppo.py", + main = "examples/algorithms/ppo/cartpole_truncated_ppo.py", tags = [ "exclusive", "learning_tests", @@ -1380,11 +1378,11 @@ py_test( py_test( name = "learning_tests_stateless_cartpole_ppo", size = "large", - srcs = ["tuned_examples/ppo/stateless_cartpole_ppo.py"], + srcs = ["examples/algorithms/ppo/stateless_cartpole_ppo.py"], args = [ "--as-test", ], - main = "tuned_examples/ppo/stateless_cartpole_ppo.py", + main = "examples/algorithms/ppo/stateless_cartpole_ppo.py", tags = [ "exclusive", "learning_tests", @@ -1397,13 +1395,13 @@ py_test( py_test( name = "learning_tests_stateless_cartpole_ppo_gpu", size = "large", - srcs = ["tuned_examples/ppo/stateless_cartpole_ppo.py"], + srcs = ["examples/algorithms/ppo/stateless_cartpole_ppo.py"], args = [ "--as-test", "--num-learners=1", "--num-gpus-per-learner=1", ], - main = "tuned_examples/ppo/stateless_cartpole_ppo.py", + main = "examples/algorithms/ppo/stateless_cartpole_ppo.py", tags = [ "exclusive", "gpu", @@ -1418,12 +1416,12 @@ py_test( py_test( name = "learning_tests_stateless_cartpole_ppo_multi_cpu", size = "large", - srcs = ["tuned_examples/ppo/stateless_cartpole_ppo.py"], + srcs = ["examples/algorithms/ppo/stateless_cartpole_ppo.py"], args = [ "--as-test", "--num-learners=2", ], - main = "tuned_examples/ppo/stateless_cartpole_ppo.py", + main = "examples/algorithms/ppo/stateless_cartpole_ppo.py", tags = [ "exclusive", "learning_tests", @@ -1437,13 +1435,13 @@ py_test( py_test( name = "learning_tests_stateless_cartpole_ppo_multi_gpu", size = "large", - srcs = ["tuned_examples/ppo/stateless_cartpole_ppo.py"], + srcs = ["examples/algorithms/ppo/stateless_cartpole_ppo.py"], args = [ "--as-test", "--num-learners=2", "--num-gpus-per-learner=1", ], - main = "tuned_examples/ppo/stateless_cartpole_ppo.py", + main = "examples/algorithms/ppo/stateless_cartpole_ppo.py", tags = [ "exclusive", "learning_tests", @@ -1459,12 +1457,12 @@ py_test( py_test( name = "learning_tests_multi_agent_stateless_cartpole_ppo", size = "large", - srcs = ["tuned_examples/ppo/multi_agent_stateless_cartpole_ppo.py"], + srcs = ["examples/algorithms/ppo/multi_agent_stateless_cartpole_ppo.py"], args = [ "--as-test", "--num-agents=2", ], - main = "tuned_examples/ppo/multi_agent_stateless_cartpole_ppo.py", + main = "examples/algorithms/ppo/multi_agent_stateless_cartpole_ppo.py", tags = [ "exclusive", "learning_tests", @@ -1477,14 +1475,14 @@ py_test( py_test( name = "learning_tests_multi_agent_stateless_cartpole_ppo_gpu", size = "large", - srcs = ["tuned_examples/ppo/multi_agent_stateless_cartpole_ppo.py"], + srcs = ["examples/algorithms/ppo/multi_agent_stateless_cartpole_ppo.py"], args = [ "--as-test", "--num-agents=2", "--num-learners=1", "--num-gpus-per-learner=1", ], - main = "tuned_examples/ppo/multi_agent_stateless_cartpole_ppo.py", + main = "examples/algorithms/ppo/multi_agent_stateless_cartpole_ppo.py", tags = [ "exclusive", "gpu", @@ -1499,13 +1497,13 @@ py_test( py_test( name = "learning_tests_multi_agent_stateless_cartpole_ppo_multi_cpu", size = "large", - srcs = ["tuned_examples/ppo/multi_agent_stateless_cartpole_ppo.py"], + srcs = ["examples/algorithms/ppo/multi_agent_stateless_cartpole_ppo.py"], args = [ "--as-test", "--num-agents=2", "--num-learners=2", ], - main = "tuned_examples/ppo/multi_agent_stateless_cartpole_ppo.py", + main = "examples/algorithms/ppo/multi_agent_stateless_cartpole_ppo.py", tags = [ "exclusive", "learning_tests", @@ -1519,14 +1517,14 @@ py_test( py_test( name = "learning_tests_multi_agent_stateless_cartpole_ppo_multi_gpu", size = "large", - srcs = ["tuned_examples/ppo/multi_agent_stateless_cartpole_ppo.py"], + srcs = ["examples/algorithms/ppo/multi_agent_stateless_cartpole_ppo.py"], args = [ "--as-test", "--num-agents=2", "--num-learners=2", "--num-gpus-per-learner=1", ], - main = "tuned_examples/ppo/multi_agent_stateless_cartpole_ppo.py", + main = "examples/algorithms/ppo/multi_agent_stateless_cartpole_ppo.py", tags = [ "exclusive", "learning_tests", @@ -1542,13 +1540,13 @@ py_test( py_test( name = "learning_tests_multi_agent_footsies_ppo", size = "large", - srcs = ["tuned_examples/ppo/multi_agent_footsies_ppo.py"], + srcs = ["examples/algorithms/ppo/multi_agent_footsies_ppo.py"], args = [ "--as-test", "--num-env-runners=6", "--evaluation-num-env-runners=2", ], - main = "tuned_examples/ppo/multi_agent_footsies_ppo.py", + main = "examples/algorithms/ppo/multi_agent_footsies_ppo.py", tags = [ "exclusive", "learning_tests", @@ -1560,7 +1558,7 @@ py_test( py_test( name = "learning_tests_multi_agent_footsies_ppo_gpu", size = "large", - srcs = ["tuned_examples/ppo/multi_agent_footsies_ppo.py"], + srcs = ["examples/algorithms/ppo/multi_agent_footsies_ppo.py"], args = [ "--as-test", "--num-env-runners=20", @@ -1568,7 +1566,7 @@ py_test( "--num-learners=1", "--num-gpus-per-learner=1", ], - main = "tuned_examples/ppo/multi_agent_footsies_ppo.py", + main = "examples/algorithms/ppo/multi_agent_footsies_ppo.py", tags = [ "exclusive", "learning_tests", @@ -1581,14 +1579,14 @@ py_test( py_test( name = "learning_tests_multi_agent_footsies_ppo_multi_cpu", size = "large", - srcs = ["tuned_examples/ppo/multi_agent_footsies_ppo.py"], + srcs = ["examples/algorithms/ppo/multi_agent_footsies_ppo.py"], args = [ "--as-test", "--num-env-runners=6", "--evaluation-num-env-runners=2", "--num-learners=2", ], - main = "tuned_examples/ppo/multi_agent_footsies_ppo.py", + main = "examples/algorithms/ppo/multi_agent_footsies_ppo.py", tags = [ "exclusive", "learning_tests", @@ -1600,7 +1598,7 @@ py_test( py_test( name = "learning_tests_multi_agent_footsies_ppo_multi_gpu", size = "large", - srcs = ["tuned_examples/ppo/multi_agent_footsies_ppo.py"], + srcs = ["examples/algorithms/ppo/multi_agent_footsies_ppo.py"], args = [ "--as-test", "--num-env-runners=20", @@ -1608,7 +1606,7 @@ py_test( "--num-learners=2", "--num-gpus-per-learner=1", ], - main = "tuned_examples/ppo/multi_agent_footsies_ppo.py", + main = "examples/algorithms/ppo/multi_agent_footsies_ppo.py", tags = [ "exclusive", "learning_tests", @@ -1622,11 +1620,11 @@ py_test( py_test( name = "learning_tests_pendulum_ppo", size = "large", - srcs = ["tuned_examples/ppo/pendulum_ppo.py"], + srcs = ["examples/algorithms/ppo/pendulum_ppo.py"], args = [ "--as-test", ], - main = "tuned_examples/ppo/pendulum_ppo.py", + main = "examples/algorithms/ppo/pendulum_ppo.py", tags = [ "exclusive", "learning_tests", @@ -1639,13 +1637,13 @@ py_test( py_test( name = "learning_tests_pendulum_ppo_gpu", size = "large", - srcs = ["tuned_examples/ppo/pendulum_ppo.py"], + srcs = ["examples/algorithms/ppo/pendulum_ppo.py"], args = [ "--as-test", "--num-learners=1", "--num-gpus-per-learner=1", ], - main = "tuned_examples/ppo/pendulum_ppo.py", + main = "examples/algorithms/ppo/pendulum_ppo.py", tags = [ "exclusive", "gpu", @@ -1660,12 +1658,12 @@ py_test( py_test( name = "learning_tests_pendulum_ppo_multi_cpu", size = "large", - srcs = ["tuned_examples/ppo/pendulum_ppo.py"], + srcs = ["examples/algorithms/ppo/pendulum_ppo.py"], args = [ "--as-test", "--num-learners=2", ], - main = "tuned_examples/ppo/pendulum_ppo.py", + main = "examples/algorithms/ppo/pendulum_ppo.py", tags = [ "exclusive", "learning_tests", @@ -1679,13 +1677,13 @@ py_test( py_test( name = "learning_tests_pendulum_ppo_multi_gpu", size = "large", - srcs = ["tuned_examples/ppo/pendulum_ppo.py"], + srcs = ["examples/algorithms/ppo/pendulum_ppo.py"], args = [ "--as-test", "--num-learners=2", "--num-gpus-per-learner=1", ], - main = "tuned_examples/ppo/pendulum_ppo.py", + main = "examples/algorithms/ppo/pendulum_ppo.py", tags = [ "exclusive", "learning_tests", @@ -1701,12 +1699,12 @@ py_test( py_test( name = "learning_tests_multi_agent_pendulum_ppo", size = "large", - srcs = ["tuned_examples/ppo/multi_agent_pendulum_ppo.py"], + srcs = ["examples/algorithms/ppo/multi_agent_pendulum_ppo.py"], args = [ "--as-test", "--num-agents=2", ], - main = "tuned_examples/ppo/multi_agent_pendulum_ppo.py", + main = "examples/algorithms/ppo/multi_agent_pendulum_ppo.py", tags = [ "exclusive", "learning_tests", @@ -1719,14 +1717,14 @@ py_test( py_test( name = "learning_tests_multi_agent_pendulum_ppo_gpu", size = "large", - srcs = ["tuned_examples/ppo/multi_agent_pendulum_ppo.py"], + srcs = ["examples/algorithms/ppo/multi_agent_pendulum_ppo.py"], args = [ "--as-test", "--num-agents=2", "--num-learners=1", "--num-gpus-per-learner=1", ], - main = "tuned_examples/ppo/multi_agent_pendulum_ppo.py", + main = "examples/algorithms/ppo/multi_agent_pendulum_ppo.py", tags = [ "exclusive", "gpu", @@ -1741,13 +1739,13 @@ py_test( py_test( name = "learning_tests_multi_agent_pendulum_ppo_multi_cpu", size = "large", - srcs = ["tuned_examples/ppo/multi_agent_pendulum_ppo.py"], + srcs = ["examples/algorithms/ppo/multi_agent_pendulum_ppo.py"], args = [ "--as-test", "--num-agents=2", "--num-learners=2", ], - main = "tuned_examples/ppo/multi_agent_pendulum_ppo.py", + main = "examples/algorithms/ppo/multi_agent_pendulum_ppo.py", tags = [ "exclusive", "learning_tests", @@ -1761,14 +1759,14 @@ py_test( py_test( name = "learning_tests_multi_agent_pendulum_ppo_multi_gpu", size = "large", - srcs = ["tuned_examples/ppo/multi_agent_pendulum_ppo.py"], + srcs = ["examples/algorithms/ppo/multi_agent_pendulum_ppo.py"], args = [ "--as-test", "--num-agents=2", "--num-learners=2", "--num-gpus-per-learner=1", ], - main = "tuned_examples/ppo/multi_agent_pendulum_ppo.py", + main = "examples/algorithms/ppo/multi_agent_pendulum_ppo.py", tags = [ "exclusive", "learning_tests", @@ -1785,11 +1783,11 @@ py_test( py_test( name = "learning_tests_mountaincar_sac", size = "large", - srcs = ["tuned_examples/sac/mountaincar_sac.py"], + srcs = ["examples/algorithms/sac/mountaincar_sac.py"], args = [ "--as-test", ], - main = "tuned_examples/sac/mountaincar_sac.py", + main = "examples/algorithms/sac/mountaincar_sac.py", tags = [ "exclusive", "learning_tests", @@ -1802,13 +1800,13 @@ py_test( py_test( name = "learning_tests_mountaincar_sac_gpu", size = "large", - srcs = ["tuned_examples/sac/mountaincar_sac.py"], + srcs = ["examples/algorithms/sac/mountaincar_sac.py"], args = [ "--as-test", "--num-learners=1", "--num-gpus-per-learner=1", ], - main = "tuned_examples/sac/mountaincar_sac.py", + main = "examples/algorithms/sac/mountaincar_sac.py", tags = [ "exclusive", "gpu", @@ -1822,12 +1820,12 @@ py_test( py_test( name = "learning_tests_mountaincar_sac_multi_cpu", size = "large", - srcs = ["tuned_examples/sac/mountaincar_sac.py"], + srcs = ["examples/algorithms/sac/mountaincar_sac.py"], args = [ "--as-test", "--num-learners=2", ], - main = "tuned_examples/sac/mountaincar_sac.py", + main = "examples/algorithms/sac/mountaincar_sac.py", tags = [ "exclusive", "learning_tests", @@ -1840,13 +1838,13 @@ py_test( py_test( name = "learning_tests_mountaincar_sac_multi_gpu", size = "large", - srcs = ["tuned_examples/sac/mountaincar_sac.py"], + srcs = ["examples/algorithms/sac/mountaincar_sac.py"], args = [ "--as-test", "--num-learners=2", "--num-gpus-per-learner=1", ], - main = "tuned_examples/sac/mountaincar_sac.py", + main = "examples/algorithms/sac/mountaincar_sac.py", tags = [ "exclusive", "learning_tests", @@ -1861,11 +1859,11 @@ py_test( py_test( name = "learning_tests_pendulum_sac", size = "large", - srcs = ["tuned_examples/sac/pendulum_sac.py"], + srcs = ["examples/algorithms/sac/pendulum_sac.py"], args = [ "--as-test", ], - main = "tuned_examples/sac/pendulum_sac.py", + main = "examples/algorithms/sac/pendulum_sac.py", tags = [ "exclusive", "learning_tests", @@ -1878,13 +1876,13 @@ py_test( py_test( name = "learning_tests_pendulum_sac_gpu", size = "large", - srcs = ["tuned_examples/sac/pendulum_sac.py"], + srcs = ["examples/algorithms/sac/pendulum_sac.py"], args = [ "--as-test", "--num-learners=1", "--num-gpus-per-learner=1", ], - main = "tuned_examples/sac/pendulum_sac.py", + main = "examples/algorithms/sac/pendulum_sac.py", tags = [ "exclusive", "gpu", @@ -1898,12 +1896,12 @@ py_test( py_test( name = "learning_tests_pendulum_sac_multi_cpu", size = "large", - srcs = ["tuned_examples/sac/pendulum_sac.py"], + srcs = ["examples/algorithms/sac/pendulum_sac.py"], args = [ "--as-test", "--num-learners=2", ], - main = "tuned_examples/sac/pendulum_sac.py", + main = "examples/algorithms/sac/pendulum_sac.py", tags = [ "exclusive", "learning_tests", @@ -1916,13 +1914,13 @@ py_test( py_test( name = "learning_tests_pendulum_sac_multi_gpu", size = "large", - srcs = ["tuned_examples/sac/pendulum_sac.py"], + srcs = ["examples/algorithms/sac/pendulum_sac.py"], args = [ "--as-test", "--num-learners=2", "--num-gpus-per-learner=1", ], - main = "tuned_examples/sac/pendulum_sac.py", + main = "examples/algorithms/sac/pendulum_sac.py", tags = [ "exclusive", "learning_tests", @@ -1937,13 +1935,13 @@ py_test( py_test( name = "learning_tests_multi_agent_pendulum_sac", size = "large", - srcs = ["tuned_examples/sac/multi_agent_pendulum_sac.py"], + srcs = ["examples/algorithms/sac/multi_agent_pendulum_sac.py"], args = [ "--as-test", "--num-agents=2", "--num-cpus=4", ], - main = "tuned_examples/sac/multi_agent_pendulum_sac.py", + main = "examples/algorithms/sac/multi_agent_pendulum_sac.py", tags = [ "exclusive", "learning_tests", @@ -1956,7 +1954,7 @@ py_test( py_test( name = "learning_tests_multi_agent_pendulum_sac_gpu", size = "large", - srcs = ["tuned_examples/sac/multi_agent_pendulum_sac.py"], + srcs = ["examples/algorithms/sac/multi_agent_pendulum_sac.py"], args = [ "--as-test", "--num-agents=2", @@ -1964,7 +1962,7 @@ py_test( "--num-learners=1", "--num-gpus-per-learner=1", ], - main = "tuned_examples/sac/multi_agent_pendulum_sac.py", + main = "examples/algorithms/sac/multi_agent_pendulum_sac.py", tags = [ "exclusive", "gpu", @@ -1978,12 +1976,12 @@ py_test( py_test( name = "learning_tests_multi_agent_pendulum_sac_multi_cpu", size = "large", - srcs = ["tuned_examples/sac/multi_agent_pendulum_sac.py"], + srcs = ["examples/algorithms/sac/multi_agent_pendulum_sac.py"], args = [ "--num-agents=2", "--num-learners=2", ], - main = "tuned_examples/sac/multi_agent_pendulum_sac.py", + main = "examples/algorithms/sac/multi_agent_pendulum_sac.py", tags = [ "exclusive", "learning_tests", @@ -1996,13 +1994,13 @@ py_test( py_test( name = "learning_tests_multi_agent_pendulum_sac_multi_gpu", size = "large", - srcs = ["tuned_examples/sac/multi_agent_pendulum_sac.py"], + srcs = ["examples/algorithms/sac/multi_agent_pendulum_sac.py"], args = [ "--num-agents=2", "--num-learners=2", "--num-gpus-per-learner=1", ], - main = "tuned_examples/sac/multi_agent_pendulum_sac.py", + main = "examples/algorithms/sac/multi_agent_pendulum_sac.py", tags = [ "exclusive", "learning_tests", From 5100f821ad91303e965b4b542ff9fed81b0fe36f Mon Sep 17 00:00:00 2001 From: Mark Towers Date: Fri, 21 Nov 2025 18:12:06 +0000 Subject: [PATCH 3/9] Gemini review Signed-off-by: Mark Towers --- rllib/BUILD.bazel | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/rllib/BUILD.bazel b/rllib/BUILD.bazel index df13bc2836e0..6d5484d44827 100644 --- a/rllib/BUILD.bazel +++ b/rllib/BUILD.bazel @@ -497,8 +497,8 @@ py_test( name = "learning_tests_multi_agent_cartpole_w_100_policies_appo_old_api_stack", size = "large", srcs = ["tests/run_regression_tests.py"], - args = ["--dir=examples/algorithms/appo"], - data = ["examples/algorithms/appo/multi-agent-cartpole-w-100-policies-appo.py"], + args = ["--dir=examples/_old_api_stack/algorithms/"], + data = ["examples/_old_api_stack/algorithms/multi-agent-cartpole-w-100-policies-appo.py"], main = "tests/run_regression_tests.py", tags = [ "exclusive", @@ -848,7 +848,7 @@ py_test( # "--num-gpus-per-learner=1", # "--num-learners=1", # ], -# main = "examples/algorithms/marwil/cartpole_marwil.py", +# main = "examples/algorithms/dreamerv3/pendulum_dreamerv3.py", # tags = [ # "exclusive", # "gpu", From a0dd0c3286fbfb1d441fff11a01116173413ec78 Mon Sep 17 00:00:00 2001 From: Mark Towers Date: Mon, 24 Nov 2025 11:06:37 +0000 Subject: [PATCH 4/9] update offline data path Signed-off-by: Mark Towers --- rllib/examples/algorithms/bc/cartpole_bc.py | 5 ++--- .../algorithms/bc/cartpole_bc_with_offline_evaluation.py | 4 ++-- rllib/examples/algorithms/bc/pendulum_bc.py | 4 ++-- rllib/examples/algorithms/cql/pendulum_cql.py | 4 ++-- rllib/examples/algorithms/iql/pendulum_iql.py | 4 ++-- rllib/examples/algorithms/marwil/cartpole_marwil.py | 4 ++-- 6 files changed, 12 insertions(+), 13 deletions(-) diff --git a/rllib/examples/algorithms/bc/cartpole_bc.py b/rllib/examples/algorithms/bc/cartpole_bc.py index cb2e3ee9d074..57618f0739e7 100644 --- a/rllib/examples/algorithms/bc/cartpole_bc.py +++ b/rllib/examples/algorithms/bc/cartpole_bc.py @@ -24,9 +24,8 @@ ), "This tuned example works only with `CartPole-v1`." # Define the data paths. -data_path = "tests/data/cartpole/cartpole-v1_large" -base_path = Path(__file__).parents[2] -print(f"base_path={base_path}") +data_path = "offline/tests/data/cartpole/cartpole-v1_large" +base_path = Path(__file__).parents[3] data_path = "local://" / base_path / data_path print(f"data_path={data_path}") diff --git a/rllib/examples/algorithms/bc/cartpole_bc_with_offline_evaluation.py b/rllib/examples/algorithms/bc/cartpole_bc_with_offline_evaluation.py index 5f8d53865820..30a2d4bb5a1e 100644 --- a/rllib/examples/algorithms/bc/cartpole_bc_with_offline_evaluation.py +++ b/rllib/examples/algorithms/bc/cartpole_bc_with_offline_evaluation.py @@ -50,8 +50,8 @@ ), "This tuned example works only with `CartPole-v1`." # Define the data paths. -data_path = "tests/data/cartpole/cartpole-v1_large" -base_path = Path(__file__).parents[2] +data_path = "offline/tests/data/cartpole/cartpole-v1_large" +base_path = Path(__file__).parents[3] print(f"base_path={base_path}") data_path = "local://" / base_path / data_path print(f"data_path={data_path}") diff --git a/rllib/examples/algorithms/bc/pendulum_bc.py b/rllib/examples/algorithms/bc/pendulum_bc.py index 28fb7c8f184e..b4417949d906 100644 --- a/rllib/examples/algorithms/bc/pendulum_bc.py +++ b/rllib/examples/algorithms/bc/pendulum_bc.py @@ -23,8 +23,8 @@ ), "This tuned example works only with `Pendulum-v1`." # Define the data paths. -data_path = "tests/data/pendulum/pendulum-v1_large" -base_path = Path(__file__).parents[2] +data_path = "offline/tests/data/pendulum/pendulum-v1_large" +base_path = Path(__file__).parents[3] print(f"base_path={base_path}") data_path = "local://" / base_path / data_path print(f"data_path={data_path}") diff --git a/rllib/examples/algorithms/cql/pendulum_cql.py b/rllib/examples/algorithms/cql/pendulum_cql.py index 391e7a7376d0..984c3626fae9 100644 --- a/rllib/examples/algorithms/cql/pendulum_cql.py +++ b/rllib/examples/algorithms/cql/pendulum_cql.py @@ -23,10 +23,10 @@ ), "This tuned example works only with `Pendulum-v1`." # Define the base path relative to this file. -base_path = Path(__file__).parents[2] +base_path = Path(__file__).parents[3] # Use the larger data set of Pendulum we have. Note, these are # parquet data, the default in `AlgorithmConfig.offline_data`. -data_path = base_path / "tests/data/pendulum/pendulum-v1_enormous" +data_path = base_path / "offline/tests/data/pendulum/pendulum-v1_enormous" # Define the configuration. config = ( diff --git a/rllib/examples/algorithms/iql/pendulum_iql.py b/rllib/examples/algorithms/iql/pendulum_iql.py index 6b5fd07e8f2c..eea94390d0fb 100644 --- a/rllib/examples/algorithms/iql/pendulum_iql.py +++ b/rllib/examples/algorithms/iql/pendulum_iql.py @@ -23,8 +23,8 @@ ), "This tuned example works only with `Pendulum-v1`." # Define the data paths. -data_path = "tests/data/pendulum/pendulum-v1_enormous" -base_path = Path(__file__).parents[2] +data_path = "offline/tests/data/pendulum/pendulum-v1_enormous" +base_path = Path(__file__).parents[3] print(f"base_path={base_path}") data_path = "local://" / base_path / data_path print(f"data_path={data_path}") diff --git a/rllib/examples/algorithms/marwil/cartpole_marwil.py b/rllib/examples/algorithms/marwil/cartpole_marwil.py index c758bae0f238..dd20d9aadcb7 100644 --- a/rllib/examples/algorithms/marwil/cartpole_marwil.py +++ b/rllib/examples/algorithms/marwil/cartpole_marwil.py @@ -23,8 +23,8 @@ ), "This tuned example works only with `CartPole-v1`." # Define the data paths. -data_path = "tests/data/cartpole/cartpole-v1_large" -base_path = Path(__file__).parents[2] +data_path = "offline/tests/data/cartpole/cartpole-v1_large" +base_path = Path(__file__).parents[3] print(f"base_path={base_path}") data_path = "local://" / base_path / data_path print(f"data_path={data_path}") From 5a4d28b4716edc70940625acf697511a1a86a6f3 Mon Sep 17 00:00:00 2001 From: Mark Towers Date: Mon, 24 Nov 2025 11:13:06 +0000 Subject: [PATCH 5/9] update tuned_example file paths Signed-off-by: Mark Towers --- rllib/algorithms/dreamerv3/README.md | 14 +++++++------- rllib/benchmarks/ppo/benchmark_atari_ppo.py | 2 +- .../_old_api_stack/algorithms/pendulum-cql.yaml | 2 +- rllib/examples/multi_agent/self_play_footsies.py | 4 ++-- rllib/examples/offline_rl/custom_input_api.py | 2 +- rllib/examples/offline_rl/offline_rl.py | 4 ++-- rllib/utils/tests/run_memory_leak_tests.py | 4 ++-- 7 files changed, 16 insertions(+), 16 deletions(-) diff --git a/rllib/algorithms/dreamerv3/README.md b/rllib/algorithms/dreamerv3/README.md index 8db9fcbae9f1..f36de087c00d 100644 --- a/rllib/algorithms/dreamerv3/README.md +++ b/rllib/algorithms/dreamerv3/README.md @@ -42,18 +42,18 @@ Here are some examples on how to set these config settings within your `DreamerV [documentation page here](https://docs.ray.io/en/latest/rllib/index.html#rllib-in-60-seconds). Use the config examples and templates in the -[tuned_examples folder](../../tuned_examples/dreamerv3) +[examples folder](../../examples/algorithms/dreamerv3) in combination with the following scripts and command lines in order to run RLlib's DreamerV3 algorithm in your experiments: -### [Atari100k](../../tuned_examples/dreamerv3/atari_100k_dreamerv3.py) +### [Atari100k](../../examples/algorithms/dreamerv3/atari_100k_dreamerv3.py) ```shell -$ cd ray/rllib/tuned_examples/dreamerv3/ +$ cd ray/rllib/examples/algorithms/dreamerv3/ $ python atari_100k_dreamerv3.py --env ale_py:ALE/Pong-v5 ``` -### [DeepMind Control Suite (vision)](../../tuned_examples/dreamerv3/dm_control_suite_vision_dreamerv3.py) +### [DeepMind Control Suite (vision)](../../examples/algorithms/dreamerv3/dm_control_suite_vision_dreamerv3.py) ```shell -$ cd ray/rllib/tuned_examples/dreamerv3/ +$ cd ray/rllib/examples/algorithms/dreamerv3/ $ python dm_control_suite_vision_dreamerv3.py --env DMC/cartpole/swingup ``` Other `--env` options for the DM Control Suite would be `--env DMC/hopper/hop`, `--env DMC/walker/walk`, etc.. @@ -122,8 +122,8 @@ $ python flappy_bird.py ``` This should be it. Feel free to try out running this on multiple GPUs using these -more advanced config examples [here (Atari100k)](../../tuned_examples/dreamerv3/atari_100k_dreamerv3.py) and -[here (DM Control Suite)](../../tuned_examples/dreamerv3/dm_control_suite_vision_dreamerv3.py). +more advanced config examples [here (Atari100k)](../../examples/algorithms/dreamerv3/atari_100k_dreamerv3.py) and +[here (DM Control Suite)](../../examples/algorithms/dreamerv3/dm_control_suite_vision_dreamerv3.py). Also see the notes below on good recipes for running on multiple GPUs. IMPORTANT: DreamerV3 out-of-the-box only supports image observation spaces of diff --git a/rllib/benchmarks/ppo/benchmark_atari_ppo.py b/rllib/benchmarks/ppo/benchmark_atari_ppo.py index d62e18b01407..ad8f05691fb7 100644 --- a/rllib/benchmarks/ppo/benchmark_atari_ppo.py +++ b/rllib/benchmarks/ppo/benchmark_atari_ppo.py @@ -96,7 +96,7 @@ # Compile the base command running the actual `tuned_example` script. base_commands = [ "python", - "../../tuned_examples/ppo/atari_ppo.py", + "../../examples/algorithms/ppo/atari_ppo.py", f"--num-env-runners={args.num_env_runners}" if args.num_env_runners else "", f"--num-learners={args.num_learners}", f"--num-gpus-per-learner={args.num_gpus_per_learner}", diff --git a/rllib/examples/_old_api_stack/algorithms/pendulum-cql.yaml b/rllib/examples/_old_api_stack/algorithms/pendulum-cql.yaml index 6858c17b3cb2..fee655b012a5 100644 --- a/rllib/examples/_old_api_stack/algorithms/pendulum-cql.yaml +++ b/rllib/examples/_old_api_stack/algorithms/pendulum-cql.yaml @@ -1,6 +1,6 @@ # @OldAPIStack # Given a SAC-generated offline file generated via: -# rllib train -f tuned_examples/sac/pendulum-sac.yaml --no-ray-ui +# rllib train -f examples/algorithms/sac/pendulum-sac.yaml --no-ray-ui # Pendulum CQL can attain ~ -300 reward in 10k from that file. pendulum-cql: diff --git a/rllib/examples/multi_agent/self_play_footsies.py b/rllib/examples/multi_agent/self_play_footsies.py index 2cc5213eced2..a641262c7c48 100644 --- a/rllib/examples/multi_agent/self_play_footsies.py +++ b/rllib/examples/multi_agent/self_play_footsies.py @@ -2,14 +2,14 @@ Multi-agent RLlib Footsies Simplified Example (PPO) About: - - This example as a simplified version of "rllib/tuned_examples/ppo/multi_agent_footsies_ppo.py", + - This example as a simplified version of "rllib/examples/ppo/multi_agent_footsies_ppo.py", which has more detailed comments and instructions. Please refer to that example for more information. - This example is created to test the self-play training progression with footsies. - Simplified version runs with single learner (cpu), single env runner, and single eval env runner. """ from pathlib import Path -from ray.rllib.tuned_examples.ppo.multi_agent_footsies_ppo import ( +from ray.rllib.examples.ppo.multi_agent_footsies_ppo import ( config, env_creator, stop, diff --git a/rllib/examples/offline_rl/custom_input_api.py b/rllib/examples/offline_rl/custom_input_api.py index d6fd2f6c1d1d..3cc7ac4f8c60 100644 --- a/rllib/examples/offline_rl/custom_input_api.py +++ b/rllib/examples/offline_rl/custom_input_api.py @@ -87,7 +87,7 @@ def input_creator(ioctx: IOContext) -> InputReader: # we register our custom input creator with this convenient function register_input("custom_input", input_creator) - # Config modified from rllib/tuned_examples/cql/pendulum-cql.yaml + # Config modified from rllib/examples/algorithms/cql/pendulum-cql.yaml default_config = get_trainable_cls(args.run).get_default_config() config = ( default_config.environment("Pendulum-v1", clip_actions=True) diff --git a/rllib/examples/offline_rl/offline_rl.py b/rllib/examples/offline_rl/offline_rl.py index b4bf817300df..80d71891ef65 100644 --- a/rllib/examples/offline_rl/offline_rl.py +++ b/rllib/examples/offline_rl/offline_rl.py @@ -9,7 +9,7 @@ Generate the offline json file by running an SAC algo until it reaches expert level on your command line. For example: $ cd ray -$ rllib train -f rllib/tuned_examples/sac/pendulum-sac.yaml --no-ray-ui +$ rllib train -f rllib/examples/algorithms/sac/pendulum-sac.yaml --no-ray-ui Also make sure that in the above SAC yaml file (pendulum-sac.yaml), you specify an additional "output" key with any path on your local @@ -55,7 +55,7 @@ if __name__ == "__main__": args = parser.parse_args() - # See rllib/tuned_examples/cql/pendulum-cql.yaml for comparison. + # See rllib/examples/algorithms/cql/pendulum-cql.yaml for comparison. config = ( cql.CQLConfig() .api_stack( diff --git a/rllib/utils/tests/run_memory_leak_tests.py b/rllib/utils/tests/run_memory_leak_tests.py index 8685049fb03d..598026d76a38 100644 --- a/rllib/utils/tests/run_memory_leak_tests.py +++ b/rllib/utils/tests/run_memory_leak_tests.py @@ -11,9 +11,9 @@ # tags = ["memory_leak_tests"], # size = "medium", # 5min timeout # srcs = ["tests/test_memory_leak.py"], -# data = glob(["tuned_examples/ppo/*.yaml"]), +# data = glob(["examples/algorithms/ppo/*.yaml"]), # # Pass `BAZEL` option and the path to look for yaml files. -# args = ["BAZEL", "tuned_examples/ppo/memory-leak-test-ppo.yaml"] +# args = ["BAZEL", "examples/algorithms/ppo/memory-leak-test-ppo.yaml"] # ) import argparse From 0f2d5bd4349b1ae378b12bf049b1eb953d18ba82 Mon Sep 17 00:00:00 2001 From: Mark Towers Date: Mon, 24 Nov 2025 12:38:07 +0000 Subject: [PATCH 6/9] Fix file paths Signed-off-by: Mark Towers --- rllib/examples/algorithms/bc/cartpole_bc.py | 2 +- .../algorithms/bc/cartpole_bc_with_offline_evaluation.py | 2 +- rllib/examples/algorithms/bc/pendulum_bc.py | 2 +- rllib/examples/algorithms/cql/pendulum_cql.py | 2 +- rllib/examples/algorithms/iql/pendulum_iql.py | 2 +- rllib/examples/algorithms/marwil/cartpole_marwil.py | 2 +- rllib/examples/multi_agent/self_play_footsies.py | 2 +- 7 files changed, 7 insertions(+), 7 deletions(-) diff --git a/rllib/examples/algorithms/bc/cartpole_bc.py b/rllib/examples/algorithms/bc/cartpole_bc.py index 57618f0739e7..076a045f1e7b 100644 --- a/rllib/examples/algorithms/bc/cartpole_bc.py +++ b/rllib/examples/algorithms/bc/cartpole_bc.py @@ -24,7 +24,7 @@ ), "This tuned example works only with `CartPole-v1`." # Define the data paths. -data_path = "offline/tests/data/cartpole/cartpole-v1_large" +data_path = "tests/data/cartpole/cartpole-v1_large" base_path = Path(__file__).parents[3] data_path = "local://" / base_path / data_path print(f"data_path={data_path}") diff --git a/rllib/examples/algorithms/bc/cartpole_bc_with_offline_evaluation.py b/rllib/examples/algorithms/bc/cartpole_bc_with_offline_evaluation.py index 30a2d4bb5a1e..a18a088cf8df 100644 --- a/rllib/examples/algorithms/bc/cartpole_bc_with_offline_evaluation.py +++ b/rllib/examples/algorithms/bc/cartpole_bc_with_offline_evaluation.py @@ -50,7 +50,7 @@ ), "This tuned example works only with `CartPole-v1`." # Define the data paths. -data_path = "offline/tests/data/cartpole/cartpole-v1_large" +data_path = "tests/data/cartpole/cartpole-v1_large" base_path = Path(__file__).parents[3] print(f"base_path={base_path}") data_path = "local://" / base_path / data_path diff --git a/rllib/examples/algorithms/bc/pendulum_bc.py b/rllib/examples/algorithms/bc/pendulum_bc.py index b4417949d906..a262e7aa79e4 100644 --- a/rllib/examples/algorithms/bc/pendulum_bc.py +++ b/rllib/examples/algorithms/bc/pendulum_bc.py @@ -23,7 +23,7 @@ ), "This tuned example works only with `Pendulum-v1`." # Define the data paths. -data_path = "offline/tests/data/pendulum/pendulum-v1_large" +data_path = "tests/data/pendulum/pendulum-v1_large" base_path = Path(__file__).parents[3] print(f"base_path={base_path}") data_path = "local://" / base_path / data_path diff --git a/rllib/examples/algorithms/cql/pendulum_cql.py b/rllib/examples/algorithms/cql/pendulum_cql.py index 984c3626fae9..722bb28a4cd6 100644 --- a/rllib/examples/algorithms/cql/pendulum_cql.py +++ b/rllib/examples/algorithms/cql/pendulum_cql.py @@ -26,7 +26,7 @@ base_path = Path(__file__).parents[3] # Use the larger data set of Pendulum we have. Note, these are # parquet data, the default in `AlgorithmConfig.offline_data`. -data_path = base_path / "offline/tests/data/pendulum/pendulum-v1_enormous" +data_path = base_path / "tests/data/pendulum/pendulum-v1_enormous" # Define the configuration. config = ( diff --git a/rllib/examples/algorithms/iql/pendulum_iql.py b/rllib/examples/algorithms/iql/pendulum_iql.py index eea94390d0fb..720edc434923 100644 --- a/rllib/examples/algorithms/iql/pendulum_iql.py +++ b/rllib/examples/algorithms/iql/pendulum_iql.py @@ -23,7 +23,7 @@ ), "This tuned example works only with `Pendulum-v1`." # Define the data paths. -data_path = "offline/tests/data/pendulum/pendulum-v1_enormous" +data_path = "tests/data/pendulum/pendulum-v1_enormous" base_path = Path(__file__).parents[3] print(f"base_path={base_path}") data_path = "local://" / base_path / data_path diff --git a/rllib/examples/algorithms/marwil/cartpole_marwil.py b/rllib/examples/algorithms/marwil/cartpole_marwil.py index dd20d9aadcb7..e8cad25ac06d 100644 --- a/rllib/examples/algorithms/marwil/cartpole_marwil.py +++ b/rllib/examples/algorithms/marwil/cartpole_marwil.py @@ -23,7 +23,7 @@ ), "This tuned example works only with `CartPole-v1`." # Define the data paths. -data_path = "offline/tests/data/cartpole/cartpole-v1_large" +data_path = "tests/data/cartpole/cartpole-v1_large" base_path = Path(__file__).parents[3] print(f"base_path={base_path}") data_path = "local://" / base_path / data_path diff --git a/rllib/examples/multi_agent/self_play_footsies.py b/rllib/examples/multi_agent/self_play_footsies.py index a641262c7c48..da9047aac019 100644 --- a/rllib/examples/multi_agent/self_play_footsies.py +++ b/rllib/examples/multi_agent/self_play_footsies.py @@ -9,7 +9,7 @@ """ from pathlib import Path -from ray.rllib.examples.ppo.multi_agent_footsies_ppo import ( +from ray.rllib.examples.algorithms.ppo.multi_agent_footsies_ppo import ( config, env_creator, stop, From dc3be81a50799315887453aeca29a14ce80b09a0 Mon Sep 17 00:00:00 2001 From: Mark Towers Date: Wed, 26 Nov 2025 11:18:15 +0000 Subject: [PATCH 7/9] Update rllib release test directory and release test paths Signed-off-by: Mark Towers --- release/release_tests.yaml | 4 ++-- release/rllib_tests/example_algorithms | 1 + release/rllib_tests/tuned_examples | 1 - 3 files changed, 3 insertions(+), 3 deletions(-) create mode 120000 release/rllib_tests/example_algorithms delete mode 120000 release/rllib_tests/tuned_examples diff --git a/release/release_tests.yaml b/release/release_tests.yaml index e9641c7ed77e..408ba58a401e 100644 --- a/release/release_tests.yaml +++ b/release/release_tests.yaml @@ -2029,7 +2029,7 @@ run: timeout: 1500 # expected 1000 seconds - script: python tuned_examples/appo/pong_appo.py --num-learners=1 --num-env-runners=12 --as-release-test + script: python example_algorithms/appo/pong_appo.py --num-learners=1 --num-env-runners=12 --as-release-test - name: rllib_learning_tests_halfcheetah_appo_torch python: "3.12" @@ -2047,7 +2047,7 @@ run: timeout: 3000 # expected 2000 seconds - script: python tuned_examples/appo/halfcheetah_appo.py --num-learners=1 --num-env-runners=12 --as-release-test + script: python example_algorithms/appo/halfcheetah_appo.py --num-learners=1 --num-env-runners=12 --as-release-test ######################## # Core Nightly Tests diff --git a/release/rllib_tests/example_algorithms b/release/rllib_tests/example_algorithms new file mode 120000 index 000000000000..64b332356b30 --- /dev/null +++ b/release/rllib_tests/example_algorithms @@ -0,0 +1 @@ +../../rllib/examples/algorithms \ No newline at end of file diff --git a/release/rllib_tests/tuned_examples b/release/rllib_tests/tuned_examples deleted file mode 120000 index 6258eb4f971f..000000000000 --- a/release/rllib_tests/tuned_examples +++ /dev/null @@ -1 +0,0 @@ -../../rllib/tuned_examples \ No newline at end of file From 1250bc06a31fffbda81e6adaa4bab97286aeb9f3 Mon Sep 17 00:00:00 2001 From: Mark Towers Date: Wed, 26 Nov 2025 13:55:08 +0000 Subject: [PATCH 8/9] remove regression test Signed-off-by: Mark Towers --- rllib/BUILD.bazel | 17 ----------------- 1 file changed, 17 deletions(-) diff --git a/rllib/BUILD.bazel b/rllib/BUILD.bazel index 0d686c303adb..29947e6728fd 100644 --- a/rllib/BUILD.bazel +++ b/rllib/BUILD.bazel @@ -481,23 +481,6 @@ py_test( ], ) -#@OldAPIStack -py_test( - name = "learning_tests_multi_agent_cartpole_w_100_policies_appo_old_api_stack", - size = "large", - srcs = ["algorithms/tests/run_regression_tests.py"], - args = ["--dir=examples/_old_api_stack/algorithms/"], - data = ["examples/_old_api_stack/algorithms/multi-agent-cartpole-w-100-policies-appo.py"], - main = "talgorithms/ests/run_regression_tests.py", - tags = [ - "exclusive", - "learning_tests", - "learning_tests_discrete", - "learning_tests_pytorch_use_all_core", - "team:rllib", - ], -) - # BC # CartPole py_test( From 9f710e2a9fac2190e2f19e53794b7b1df8e43405 Mon Sep 17 00:00:00 2001 From: Mark Towers Date: Wed, 26 Nov 2025 17:45:53 +0000 Subject: [PATCH 9/9] Remove commented code Signed-off-by: Mark Towers --- rllib/BUILD.bazel | 92 +---------------------------------------------- 1 file changed, 1 insertion(+), 91 deletions(-) diff --git a/rllib/BUILD.bazel b/rllib/BUILD.bazel index 29947e6728fd..f2a2ef3d2a94 100644 --- a/rllib/BUILD.bazel +++ b/rllib/BUILD.bazel @@ -148,7 +148,7 @@ py_test( # Tag: learning_tests # # This will test python/yaml config files -# inside rllib/examples/algorithm/[algo-name] for actual learning success. +# inside rllib/examples/algorithms/[algo-name] for actual learning success. # -------------------------------------------------------------------- # APPO @@ -172,15 +172,6 @@ py_test( ], ) -# TODO (sven): For some weird reason, this test runs extremely slow on the CI (not on cluster, not locally) -> taking this out for now ... -# py_test( -# name = "learning_tests_cartpole_appo_gpu", -# main = "examples/algorithms/appo/cartpole_appo.py", -# tags = ["team:rllib", "exclusive", "learning_tests", "torch_only", "learning_tests_discrete", "learning_tests_pytorch_use_all_core", "gpu"], -# size = "large", -# srcs = ["examples/algorithms/appo/cartpole_appo.py"], -# args = ["--as-test", "--num-gpus-per-learner=1", "--num-cpus=7", "--num-env-runners=5"] -# ) py_test( name = "learning_tests_cartpole_appo_multi_cpu", size = "large", @@ -405,39 +396,6 @@ py_test( ], ) -# MultiAgentStatelessCartPole -# py_test( -# name = "learning_tests_multi_agent_stateless_cartpole_appo", -# main = "examples/algorithms/appo/multi_agent_stateless_cartpole_appo.py", -# tags = ["team:rllib", "exclusive", "learning_tests", "torch_only", "learning_tests_discrete", "learning_tests_pytorch_use_all_core"], -# size = "large", -# srcs = ["examples/algorithms/appo/multi_agent_stateless_cartpole_appo.py"], -# args = ["--as-test"] -# ) -# py_test( -# name = "learning_tests_multi_agent_stateless_cartpole_appo_gpu", -# main = "examples/algorithms/appo/multi_agent_stateless_cartpole_appo.py", -# tags = ["team:rllib", "exclusive", "learning_tests", "torch_only", "learning_tests_discrete", "learning_tests_pytorch_use_all_core", "gpu"], -# size = "large", -# srcs = ["examples/algorithms/appo/multi_agent_stateless_cartpole_appo.py"], -# args = ["--as-test", "--num-agents=2", "--num-gpus-per-learner=1"] -# ) -# py_test( -# name = "learning_tests_multi_agent_stateless_cartpole_appo_multi_cpu", -# main = "examples/algorithms/appo/multi_agent_stateless_cartpole_appo.py", -# tags = ["team:rllib", "exclusive", "learning_tests", "torch_only", "learning_tests_discrete", "learning_tests_pytorch_use_all_core"], -# size = "large", -# srcs = ["examples/algorithms/appo/multi_agent_stateless_cartpole_appo.py"], -# args = ["--as-test", "--num-learners=2"] -# ) -# py_test( -# name = "learning_tests_multi_agent_stateless_cartpole_appo_multi_gpu", -# main = "examples/algorithms/appo/multi_agent_stateless_cartpole_appo.py", -# tags = ["team:rllib", "exclusive", "learning_tests", "torch_only", "learning_tests_discrete", "learning_tests_pytorch_use_all_core", "multi_gpu"], -# size = "large", -# srcs = ["examples/algorithms/appo/multi_agent_stateless_cartpole_appo.py"], -# args = ["--as-test", "--num-learners=2", "--num-gpus-per-learner=1"] -# ) # Pendulum py_test( name = "learning_tests_pendulum_appo", @@ -808,31 +766,6 @@ py_test( ], ) -# DreamerV3 -# takes too long (up to 20-30min to learn -200 on 1 GPU) -# Pendulum -# py_test( -# name = "learning_tests_pendulum_dreamerv3_gpu", -# size = "large", -# srcs = ["examples/algorithms/dreamerv3/pendulum_dreamerv3.py"], -# args = [ -# "--as-test", -# "--num-gpus-per-learner=1", -# "--num-learners=1", -# "--num-env-runners=4", -# ], -# main = "examples/algorithms/dreamerv3/pendulum_dreamerv3.py", -# tags = [ -# "exclusive", -# "gpu", -# "learning_tests", -# "learning_tests_continuous", -# "learning_tests_pytorch_use_all_core", -# "team:rllib", -# "torch_only", -# ], -# ) - # IMPALA # CartPole py_test( @@ -1057,14 +990,6 @@ py_test( "torch_only", ], ) -# py_test( -# name = "learning_tests_multi_agent_stateless_cartpole_impala_multi_gpu", -# main = "examples/algorithms/impala/multi_agent_stateless_cartpole_impala.py", -# tags = ["team:rllib", "exclusive", "learning_tests", "torch_only", "learning_tests_discrete", "learning_tests_pytorch_use_all_core", "multi_gpu"], -# size = "large", -# srcs = ["examples/algorithms/impala/multi_agent_stateless_cartpole_impala.py"], -# args = ["--as-test", "--num-learners=2", "--num-gpus-per-learner=1"] -# ) # IQL # Pendulum-v1 (enormous) @@ -2409,21 +2334,6 @@ py_test( ], ) -# -------------------------------------------------------------------- -# ConnectorV2 tests -# rllib/connector/ -# -# Tag: connector_v2 -# -------------------------------------------------------------------- - -# TODO (sven): Add these tests in a separate PR. -# py_test( -# name = "connectors/tests/test_connector_v2", -# tags = ["team:rllib", "connector_v2"], -# size = "small", -# srcs = ["connectors/tests/test_connector_v2.py"] -# ) - # -------------------------------------------------------------------- # Env tests # rllib/env/