diff --git a/doc/source/ray-core/examples/plot_pong_example.ipynb b/doc/source/ray-core/examples/plot_pong_example.ipynb
index 70648185d0437..642199fef7f90 100644
--- a/doc/source/ray-core/examples/plot_pong_example.ipynb
+++ b/doc/source/ray-core/examples/plot_pong_example.ipynb
@@ -292,7 +292,7 @@
     "@ray.remote\n",
     "class RolloutWorker(object):\n",
     "    def __init__(self):\n",
-    "        self.env = gym.make(\"ALE/Pong-v5\")\n",
+    "        self.env = gym.make(\"ale_py:ALE/Pong-v5\")\n",
     "\n",
     "    def compute_gradient(self, model):\n",
     "        # Compute a simulation episode.\n",
diff --git a/doc/source/rllib/doc_code/dreamerv3_inference.py b/doc/source/rllib/doc_code/dreamerv3_inference.py
index 681212151693d..25b8e5a111e09 100644
--- a/doc/source/rllib/doc_code/dreamerv3_inference.py
+++ b/doc/source/rllib/doc_code/dreamerv3_inference.py
@@ -10,7 +10,7 @@
 
 env_name = "CartPole-v1"
 # Use the vector env API.
-env = gym.vector.make(env_name, num_envs=1, asynchronous=False)
+env = gym.make_vec(env_name, num_envs=1, vectorization_mode="sync")
 
 terminated = truncated = False
 # Reset the env.
diff --git a/doc/source/rllib/doc_code/training.py b/doc/source/rllib/doc_code/training.py
index 451bc664cbdf2..75bf8a48f18c1 100644
--- a/doc/source/rllib/doc_code/training.py
+++ b/doc/source/rllib/doc_code/training.py
@@ -4,7 +4,7 @@
 try:
     import gymnasium as gym
 
-    env = gym.make("ALE/Pong-v5")
+    env = gym.make("ale_py:ALE/Pong-v5")
     obs, infos = env.reset()
 except Exception:
     import gym
diff --git a/doc/source/rllib/rllib-examples.rst b/doc/source/rllib/rllib-examples.rst
index bdb4ee65dd237..148f9a7ee8511 100644
--- a/doc/source/rllib/rllib-examples.rst
+++ b/doc/source/rllib/rllib-examples.rst
@@ -202,7 +202,7 @@ in roughly 5min. It can be run like this on a single g5.24xlarge (or g6.24xlarge
 .. code-block:: bash
 
     $ cd ray/rllib/tuned_examples/ppo
-    $ python atari_ppo.py --env ALE/Pong-v5 --num-gpus=4 --num-env-runners=95
+    $ python atari_ppo.py --env=ale_py:ALE/Pong-v5 --num-gpus=4 --num-env-runners=95
 
 Note that some of the files in this folder are used for RLlib's daily or weekly
 release tests as well.
diff --git a/python/requirements.txt b/python/requirements.txt
index 8ddf6ea190b70..b721f735b2f4a 100644
--- a/python/requirements.txt
+++ b/python/requirements.txt
@@ -41,7 +41,7 @@ colorful
 rich
 opentelemetry-sdk
 fastapi
-gymnasium==0.28.1
+gymnasium==1.0.0
 virtualenv!=20.21.1,>=20.0.24
 opentelemetry-api
 opencensus
diff --git a/python/requirements/ml/rllib-test-requirements.txt b/python/requirements/ml/rllib-test-requirements.txt
index 1c47364f6b656..027c57446e602 100644
--- a/python/requirements/ml/rllib-test-requirements.txt
+++ b/python/requirements/ml/rllib-test-requirements.txt
@@ -3,43 +3,32 @@
 # Environment adapters.
 # ---------------------
 # Atari
-gymnasium==0.28.1; python_version < "3.12"
-imageio; python_version < "3.12"
-ale_py==0.8.1; python_version < "3.12"
+ale_py==0.10.1
+imageio==2.34.2
+opencv-python==4.8.1.78
+
 # For testing MuJoCo envs with gymnasium.
-mujoco==2.3.6; python_version < "3.12"
+mujoco==3.2.4
 dm_control==1.0.12; python_version < "3.12"
 
 # For tests on PettingZoo's multi-agent envs.
-pettingzoo==1.23.1
-# When installing pettingzoo, chess is missing, even though its a dependancy
-# TODO: remove if a future pettingzoo and/or ray version fixes this dependancy issue.
-chess==1.7.0
+pettingzoo==1.24.3
 pymunk==6.2.1
-supersuit==3.8.0; python_version < "3.12"
-tinyscaler==1.2.6; python_version < "3.12"
-shimmy
-
-# Kaggle envs.
-kaggle_environments==1.7.11
-# Unity3D testing
-# TODO(sven): Add this back to rllib-requirements.txt once mlagents no longer pins torch<1.9.0 version.
-#mlagents==0.28.0
-mlagents_envs==0.28.0
+tinyscaler==1.2.8
+shimmy==2.0.0
+supersuit==3.9.3
 
 # For tests on minigrid.
-minigrid
-# For tests on RecSim and Kaggle envs.
-# Explicitly depends on `tensorflow` and doesn't accept `tensorflow-macos`
-recsim==0.2.4; (sys_platform != 'darwin' or platform_machine != 'arm64') and python_version < "3.12"
-# recsim depends on dopamine-rl, but dopamine-rl pins gym <= 0.25.2, which break some envs
-dopamine-rl==4.0.5; (sys_platform != 'darwin' or platform_machine != 'arm64') and python_version < "3.12"
+minigrid==2.3.1
 tensorflow_estimator
+
 # DeepMind's OpenSpiel
 open-spiel==1.4
 
+# Unity3D testing
+mlagents_envs==0.28.0
+
 # Requires libtorrent which is unavailable for arm64
-autorom[accept-rom-license]; platform_machine != "arm64"
 h5py==3.10.0
 
 # Requirements for rendering.
diff --git a/python/requirements_compiled.txt b/python/requirements_compiled.txt
index a1043afc5b51b..1347afee24c5a 100644
--- a/python/requirements_compiled.txt
+++ b/python/requirements_compiled.txt
@@ -75,10 +75,10 @@ aiosqlite==0.19.0
     # via ypy-websocket
 alabaster==0.7.13
     # via sphinx
-ale-py==0.8.1 ; python_version < "3.12"
+ale-py==0.10.1
     # via
     #   -r /ray/ci/../python/requirements/ml/rllib-test-requirements.txt
-    #   gym
+    #   gymnasium
 alembic==1.12.1
     # via
     #   aim
@@ -272,8 +272,6 @@ charset-normalizer==3.3.2
     # via
     #   requests
     #   snowflake-connector-python
-chess==1.7.0
-    # via -r /ray/ci/../python/requirements/ml/rllib-test-requirements.txt
 chex==0.1.7
     # via optax
 clang-format==12.0.1
@@ -306,7 +304,6 @@ cloudpickle==2.2.0
     #   -r /ray/ci/../python/requirements/test-requirements.txt
     #   dask
     #   distributed
-    #   gym
     #   gymnasium
     #   hyperopt
     #   mlagents-envs
@@ -704,13 +701,7 @@ gsutil==5.27
     # via -r /ray/ci/../python/requirements/docker/ray-docker-requirements.txt
 gunicorn==20.1.0
     # via mlflow
-gym==0.26.2
-    # via
-    #   dopamine-rl
-    #   recsim
-gym-notices==0.0.8
-    # via gym
-gymnasium==0.28.1 ; python_version < "3.12"
+gymnasium==1.0.0
     # via
     #   -r /ray/ci/../python/requirements.txt
     #   -r /ray/ci/../python/requirements/ml/rllib-test-requirements.txt
@@ -1126,7 +1117,7 @@ msrestazure==0.6.4
     # via
     #   -r /ray/ci/../python/requirements/test-requirements.txt
     #   azure-cli-core
-mujoco==2.3.6 ; python_version < "3.12"
+mujoco==3.2.4
     # via
     #   -r /ray/ci/../python/requirements/ml/rllib-test-requirements.txt
     #   dm-control
@@ -1246,7 +1237,6 @@ numpy==1.26.4
     #   flax
     #   gpy
     #   gradio
-    #   gym
     #   gymnasium
     #   h5py
     #   hpbandster
@@ -1290,7 +1280,6 @@ numpy==1.26.4
     #   pyro-ppl
     #   pytorch-lightning
     #   raydp
-    #   recsim
     #   scikit-image
     #   scikit-learn
     #   scipy
@@ -1489,7 +1478,7 @@ pbr==6.0.0
     #   sarif-om
 peewee==3.17.0
     # via semgrep
-pettingzoo==1.23.1
+pettingzoo==1.24.3
     # via -r /ray/ci/../python/requirements/ml/rllib-test-requirements.txt
 pexpect==4.8.0
     # via
@@ -1862,8 +1851,6 @@ querystring-parser==1.2.4
     # via raydp
 raydp==1.7.0b20231020.dev0
     # via -r /ray/ci/../python/requirements/ml/data-test-requirements.txt
-recsim==0.2.4 ; (sys_platform != "darwin" or platform_machine != "arm64") and python_version < "3.12"
-    # via -r /ray/ci/../python/requirements/ml/rllib-test-requirements.txt
 redis==4.4.2
     # via -r /ray/ci/../python/requirements/test-requirements.txt
 regex==2024.5.15
@@ -2049,7 +2036,7 @@ shellcheck-py==0.7.1.1
     # via -r /ray/ci/../python/requirements/lint-requirements.txt
 shellingham==1.5.4
     # via typer
-shimmy==1.3.0
+shimmy==2.0.0
     # via -r /ray/ci/../python/requirements/ml/rllib-test-requirements.txt
 shortuuid==1.0.1
     # via -r /ray/ci/../python/requirements/ml/tune-test-requirements.txt
@@ -2167,9 +2154,7 @@ statsmodels==0.14.0
     # via
     #   hpbandster
     #   statsforecast
-strictyaml==1.7.3
-    # via pyiceberg
-supersuit==3.8.0 ; python_version < "3.12"
+supersuit==3.9.3
     # via -r /ray/ci/../python/requirements/ml/rllib-test-requirements.txt
 sympy==1.13.1
     # via
@@ -2256,7 +2241,7 @@ timm==0.9.2
     # via -r /ray/ci/../python/requirements/ml/tune-test-requirements.txt
 tinycss2==1.3.0
     # via nbconvert
-tinyscaler==1.2.6 ; python_version < "3.12"
+tinyscaler==1.2.8
     # via
     #   -r /ray/ci/../python/requirements/ml/rllib-test-requirements.txt
     #   supersuit
diff --git a/python/setup.py b/python/setup.py
index dd8a05046992f..53f954851deea 100644
--- a/python/setup.py
+++ b/python/setup.py
@@ -302,7 +302,7 @@ def get_packages(self):
 
     setup_spec.extras["rllib"] = setup_spec.extras["tune"] + [
         "dm_tree",
-        "gymnasium==0.28.1",
+        "gymnasium==1.0.0",
         "lz4",
         "scikit-image",
         "pyyaml",
diff --git a/release/long_running_tests/workloads/apex.py b/release/long_running_tests/workloads/apex.py
index 4aee3c40db3f2..90adcd52bc258 100644
--- a/release/long_running_tests/workloads/apex.py
+++ b/release/long_running_tests/workloads/apex.py
@@ -39,7 +39,7 @@
     {
         "apex": {
             "run": "APEX",
-            "env": "ALE/Pong-v5",
+            "env": "ale_py:ALE/Pong-v5",
             "config": {
                 "num_workers": 3,
                 "num_gpus": 0,
diff --git a/release/ml_user_tests/tune_rllib/run_connect_tests.py b/release/ml_user_tests/tune_rllib/run_connect_tests.py
index d263264b29d5d..7fb4b2e73ccb8 100644
--- a/release/ml_user_tests/tune_rllib/run_connect_tests.py
+++ b/release/ml_user_tests/tune_rllib/run_connect_tests.py
@@ -26,7 +26,7 @@ def run(smoke_test=False, storage_path: str = None):
 
     config = (
         APPOConfig()
-        .environment("ALE/Pong-v5", clip_rewards=True)
+        .environment("ale_py:ALE/Pong-v5", clip_rewards=True)
         .framework(tune.grid_search(["tf", "torch"]))
         .rollouts(
             rollout_fragment_length=50,
diff --git a/release/ray_release/byod/requirements_byod_3.9.in b/release/ray_release/byod/requirements_byod_3.9.in
index d9ffd017133f5..cca2b941f7eec 100644
--- a/release/ray_release/byod/requirements_byod_3.9.in
+++ b/release/ray_release/byod/requirements_byod_3.9.in
@@ -10,8 +10,8 @@ dask[complete]
 fastapi
 gcsfs==2023.5.0
 gsutil
-gym
-gym[atari]
+gymnasium
+gymnasium[atari]
 importlib-metadata
 jsonschema==4.17.3
 lightgbm
diff --git a/release/ray_release/byod/requirements_byod_3.9.txt b/release/ray_release/byod/requirements_byod_3.9.txt
index d55e3d79a7a84..a468ea0d4e831 100644
--- a/release/ray_release/byod/requirements_byod_3.9.txt
+++ b/release/ray_release/byod/requirements_byod_3.9.txt
@@ -1,4 +1,3 @@
-
 #
 # This file is autogenerated by pip-compile with python 3.9
 # To update, run:
@@ -116,30 +115,35 @@ aiosignal==1.3.1 \
     # via
     #   -c release/ray_release/byod/requirements_compiled.txt
     #   aiohttp
-ale-py==0.8.1 \
-    --hash=sha256:0006d80dfe7745eb5a93444492337203c8bc7eb594a2c24c6a651c5c5b0eaf09 \
-    --hash=sha256:0856ca777473ec4ae8a59f3af9580259adb0fd4a47d586a125a440c62e82fc10 \
-    --hash=sha256:0ffecb5c956749596030e464827642945162170a132d093c3d4fa2d7e5725c18 \
-    --hash=sha256:2d9fcfa06c74a613c5419e942ef4d3e0959533f52e94d2d4bda61d07fbfffeee \
-    --hash=sha256:5fcc31f495de79ee1d6bfc0f4b7c4619948851e679bbf010035e25f23146a687 \
-    --hash=sha256:6f2f6b92c8fd6189654979bbf0b305dbe0ecf82176c47f244d8c1cbc36286b89 \
-    --hash=sha256:7cd74b7ee0248ef11a086c9764e142e71defd40ec8989a99232bfd2d9e8023be \
-    --hash=sha256:817adf9a3a82c4923c731e634520a5ecf296aca0367f5c69959a96b32119d831 \
-    --hash=sha256:87557db05be0e04130e2ec1bf909d3bb0b0bc034645d4f664e6baa573fe32191 \
-    --hash=sha256:9773eea7505484e024beb2fff0f3bfd363db151bdb9799d70995448e196b1ded \
-    --hash=sha256:ade5c32af567629164a6b49378978c728a15dc4db07ad6b679e8832d4fd3ea1f \
-    --hash=sha256:ae2ba24557e0ce541ea3be13b148db2a9cfa730d83537b4cbed5e10449826e51 \
-    --hash=sha256:b00f74e27815131c1a2791f3d48114363fa2708e19f09ce6b7b614cb14c9d469 \
-    --hash=sha256:b2aa2f69a4169742800615970efe6914fa856e33eaf7fa9133c0e06a617a80e2 \
-    --hash=sha256:c9b168eb88c87d0f3e2a778e6c5cdde4ad951d1ca8a6dc3d3679fd45398df7d1 \
-    --hash=sha256:d49b550a2d9c25b63c343aa680fd81f253a3714cdc0e1835640933ebff1798ff \
-    --hash=sha256:eadf9f3990b4ff2f9e5ca35889f5e2e95cddd6a353d9d857d9b4601a6e1c4e7c \
-    --hash=sha256:f10b1df8774bbe3b00365748b5e0e07cf35f6a703bbaff991bc7b3b2247dccc9 \
-    --hash=sha256:f278036f9b6066062abcdf0987a0ec5a8e0f22a2c7cfac925e39378d4343d490
+ale-py==0.10.1 \
+    --hash=sha256:076a44a61c2518b844f765692a91d0a6b383c6592b5fdabd94fd24d4c62a54ef \
+    --hash=sha256:0835ee11004efeb5a9805a09c1525242f737257a8a4f5f4f0b9b3e047e6dca86 \
+    --hash=sha256:12617edc9799c73570df67a731a4293bcfd500f413e0bfa867b53fc411fa7629 \
+    --hash=sha256:24b9e61a4e868a4266f8a0ef7809cc20cecedb8c10d515d14ff6078950d51d8b \
+    --hash=sha256:24f7aa19e1b3b1540516942020a95f57964af71285497620e58f03b2c113424e \
+    --hash=sha256:3971a8552d2f982f569c87152479901574a9fe86410e5d1a26276e7ffccb59e1 \
+    --hash=sha256:3d82d81715f15598b9db50529da971d36117cda027af9d112bd2ea22cefe3bcb \
+    --hash=sha256:43d63b262f4b3bfcd567ce736a5648b4193470b2691bc14e38ac0c05dfe2a7e2 \
+    --hash=sha256:4dd55a52e074497f1143785a215a50706afba3111be8b4923d46cc507c16be8f \
+    --hash=sha256:4f3aaea36c1671812c21b5f7c5dcf9f5f9c726f5b10cbe7a657a844de963bb55 \
+    --hash=sha256:5d4f326236c95736182323a480363c7b98959fc9a4ba09d2aa5b152faa6a2d59 \
+    --hash=sha256:6f0a3da4ff47f913b5c61e66571fe7fb92fc569e5babdf4b0eeee348aac1d457 \
+    --hash=sha256:771d5a1cd5a50d2cf226eba45c418fb7a18b453bd332b6a2189310030eda421a \
+    --hash=sha256:7733d521921452b9e644e9e31e4d5b1ba612305473c5ba0266cafb7eff6a5461 \
+    --hash=sha256:82c676030b8b6543cb6969a905ff841ae6f086a2efe707542d014ef6ca4ada4e \
+    --hash=sha256:92a31bd44687c6a3595fcdac35bc3238e305dd604171ba6a9cb7912bc83c99ee \
+    --hash=sha256:9f30d763c38063e5579783844868c1330f89049f252e94c49534785515f785f2 \
+    --hash=sha256:9fa3f3977f63b685394301432cba7fe417882cfea72424d75aaf6bf98f79a2c9 \
+    --hash=sha256:b84025670cf37527348a417d7465ee193a19d0a336bcd62f943957c13fef6ebb \
+    --hash=sha256:c43308af7013cb60c6f5e77cba2b9ccaed2f5e2ae444b365dce9b7ac3bb5d48f \
+    --hash=sha256:c77653e47d79e60abcc21bfad7dd105784ce2649fc5bc4eaaa1de45b40112772 \
+    --hash=sha256:c9fac7fe11c56ed301a409d8a940f3e764ed2929b756ebb033eadf492a3d696e \
+    --hash=sha256:d3247ad68f7dda1f9c046ede74310e347114f2c191a9f4cd247f432410941eb9 \
+    --hash=sha256:e0637ddc4074b814ae46db28d61aface08d7eba16ea713cdfe0734e0b18c3794 \
+    --hash=sha256:f6f91ab4b2a18e24c82a33fd1d616f32d121fcd6429f9045d515960df8cdc580
     # via
     #   -c release/ray_release/byod/requirements_compiled.txt
     #   -r release/ray_release/byod/requirements_byod_3.9.in
-    #   gym
 annotated-types==0.6.0 \
     --hash=sha256:0641064de18ba7a25dee8f96403ebc39113d0cb953a01429249d5c7564666a43 \
     --hash=sha256:563339e807e53ffd9c267e99fc6d9ea23eb8443c08f112651963e24e22f84a5d
@@ -470,7 +474,7 @@ cloudpickle==2.2.0 \
     #   -c release/ray_release/byod/requirements_compiled.txt
     #   dask
     #   distributed
-    #   gym
+    #   gymnasium
 cmake==3.28.1 \
     --hash=sha256:0d4051d101d151d8387156c463aa45c8cd0e164f870e0ac0c8c91d3ff08528e1 \
     --hash=sha256:1be8f351271f8bcbe32288066e5add642d7c32f2f8fec3f135949c2cb13dfac2 \
@@ -490,12 +494,6 @@ cmake==3.28.1 \
     --hash=sha256:c82bc0eb1495cf518cb4f355b8a73e584e67d53453406c0498bacc454cf6c404 \
     --hash=sha256:d0978cdd08c0ebc76f4f8543aba1381a41580dcb9c3bcffb536c41337b75aea1
     # via -r release/ray_release/byod/requirements_byod_3.9.in
-commonmark==0.9.1 \
-    --hash=sha256:452f9dc859be7f06631ddcb328b6919c67984aca654e5fefb3914d54691aed60 \
-    --hash=sha256:da2f38c92590f83de410ba1a3cbceafbc74fee9def35f9251ba9a971d6d66fd9
-    # via
-    #   -c release/ray_release/byod/requirements_compiled.txt
-    #   rich
 configargparse==1.7 \
     --hash=sha256:d249da6591465c6c26df64a9f73d2536e743be2f244eb3ebe61114af2f94f86b \
     --hash=sha256:e7067471884de5478c58a511e529f0f9bd1c66bfef1dea90935438d6c23306d1
@@ -675,7 +673,7 @@ diskcache==5.6.3 \
     --hash=sha256:2c3a3fa2743d8535d832ec61c2054a1641f41775aa7c556758a109941e33e4fc \
     --hash=sha256:5e31b2d5fbad117cc363ebaf6b689474db18a1f6438bc82358b024abd4c2ca19
     # via petastorm
-distributed==2022.10.1 \
+distributed==2022.10.1 ; python_version < "3.12" \
     --hash=sha256:31abab8ecc76951875828a3689d47dc4f20226b3ec99a0dc1af6183d02dbe5fe \
     --hash=sha256:42c6fe7d3bea491e23ce020879c411f2ecfecdb4914a6cb6b4a63530a7b0fa70
     # via
@@ -687,6 +685,12 @@ exceptiongroup==1.2.1 \
     # via
     #   anyio
     #   pytest
+farama-notifications==0.0.4 \
+    --hash=sha256:13fceff2d14314cf80703c8266462ebf3733c7d165336eee998fc58e545efd18 \
+    --hash=sha256:14de931035a41961f7c056361dc7f980762a143d05791ef5794a751a2caf05ae
+    # via
+    #   -c release/ray_release/byod/requirements_compiled.txt
+    #   gymnasium
 fastapi==0.109.2 \
     --hash=sha256:2c9bab24667293b501cad8dd388c05240c850b58ec5876ee3283c47d6e1e3a4d \
     --hash=sha256:f3817eac96fe4f65a2ebb4baa000f394e55f5fccdaf7f75250804bc58f354f73
@@ -724,68 +728,84 @@ flatbuffers==23.5.26 \
     # via
     #   -c release/ray_release/byod/requirements_compiled.txt
     #   tensorflow
-frozenlist==1.4.0 \
-    --hash=sha256:007df07a6e3eb3e33e9a1fe6a9db7af152bbd8a185f9aaa6ece10a3529e3e1c6 \
-    --hash=sha256:008eb8b31b3ea6896da16c38c1b136cb9fec9e249e77f6211d479db79a4eaf01 \
-    --hash=sha256:09163bdf0b2907454042edb19f887c6d33806adc71fbd54afc14908bfdc22251 \
-    --hash=sha256:0c7c1b47859ee2cac3846fde1c1dc0f15da6cec5a0e5c72d101e0f83dcb67ff9 \
-    --hash=sha256:0e5c8764c7829343d919cc2dfc587a8db01c4f70a4ebbc49abde5d4b158b007b \
-    --hash=sha256:10ff5faaa22786315ef57097a279b833ecab1a0bfb07d604c9cbb1c4cdc2ed87 \
-    --hash=sha256:17ae5cd0f333f94f2e03aaf140bb762c64783935cc764ff9c82dff626089bebf \
-    --hash=sha256:19488c57c12d4e8095a922f328df3f179c820c212940a498623ed39160bc3c2f \
-    --hash=sha256:1a0848b52815006ea6596c395f87449f693dc419061cc21e970f139d466dc0a0 \
-    --hash=sha256:1e78fb68cf9c1a6aa4a9a12e960a5c9dfbdb89b3695197aa7064705662515de2 \
-    --hash=sha256:261b9f5d17cac914531331ff1b1d452125bf5daa05faf73b71d935485b0c510b \
-    --hash=sha256:2b8bcf994563466db019fab287ff390fffbfdb4f905fc77bc1c1d604b1c689cc \
-    --hash=sha256:38461d02d66de17455072c9ba981d35f1d2a73024bee7790ac2f9e361ef1cd0c \
-    --hash=sha256:490132667476f6781b4c9458298b0c1cddf237488abd228b0b3650e5ecba7467 \
-    --hash=sha256:491e014f5c43656da08958808588cc6c016847b4360e327a62cb308c791bd2d9 \
-    --hash=sha256:515e1abc578dd3b275d6a5114030b1330ba044ffba03f94091842852f806f1c1 \
-    --hash=sha256:556de4430ce324c836789fa4560ca62d1591d2538b8ceb0b4f68fb7b2384a27a \
-    --hash=sha256:5833593c25ac59ede40ed4de6d67eb42928cca97f26feea219f21d0ed0959b79 \
-    --hash=sha256:6221d84d463fb110bdd7619b69cb43878a11d51cbb9394ae3105d082d5199167 \
-    --hash=sha256:6918d49b1f90821e93069682c06ffde41829c346c66b721e65a5c62b4bab0300 \
-    --hash=sha256:6c38721585f285203e4b4132a352eb3daa19121a035f3182e08e437cface44bf \
-    --hash=sha256:71932b597f9895f011f47f17d6428252fc728ba2ae6024e13c3398a087c2cdea \
-    --hash=sha256:7211ef110a9194b6042449431e08c4d80c0481e5891e58d429df5899690511c2 \
-    --hash=sha256:764226ceef3125e53ea2cb275000e309c0aa5464d43bd72abd661e27fffc26ab \
-    --hash=sha256:7645a8e814a3ee34a89c4a372011dcd817964ce8cb273c8ed6119d706e9613e3 \
-    --hash=sha256:76d4711f6f6d08551a7e9ef28c722f4a50dd0fc204c56b4bcd95c6cc05ce6fbb \
-    --hash=sha256:7f4f399d28478d1f604c2ff9119907af9726aed73680e5ed1ca634d377abb087 \
-    --hash=sha256:88f7bc0fcca81f985f78dd0fa68d2c75abf8272b1f5c323ea4a01a4d7a614efc \
-    --hash=sha256:8d0edd6b1c7fb94922bf569c9b092ee187a83f03fb1a63076e7774b60f9481a8 \
-    --hash=sha256:901289d524fdd571be1c7be054f48b1f88ce8dddcbdf1ec698b27d4b8b9e5d62 \
-    --hash=sha256:93ea75c050c5bb3d98016b4ba2497851eadf0ac154d88a67d7a6816206f6fa7f \
-    --hash=sha256:981b9ab5a0a3178ff413bca62526bb784249421c24ad7381e39d67981be2c326 \
-    --hash=sha256:9ac08e601308e41eb533f232dbf6b7e4cea762f9f84f6357136eed926c15d12c \
-    --hash=sha256:a02eb8ab2b8f200179b5f62b59757685ae9987996ae549ccf30f983f40602431 \
-    --hash=sha256:a0c6da9aee33ff0b1a451e867da0c1f47408112b3391dd43133838339e410963 \
-    --hash=sha256:a6c8097e01886188e5be3e6b14e94ab365f384736aa1fca6a0b9e35bd4a30bc7 \
-    --hash=sha256:aa384489fefeb62321b238e64c07ef48398fe80f9e1e6afeff22e140e0850eef \
-    --hash=sha256:ad2a9eb6d9839ae241701d0918f54c51365a51407fd80f6b8289e2dfca977cc3 \
-    --hash=sha256:b206646d176a007466358aa21d85cd8600a415c67c9bd15403336c331a10d956 \
-    --hash=sha256:b826d97e4276750beca7c8f0f1a4938892697a6bcd8ec8217b3312dad6982781 \
-    --hash=sha256:b89ac9768b82205936771f8d2eb3ce88503b1556324c9f903e7156669f521472 \
-    --hash=sha256:bd7bd3b3830247580de99c99ea2a01416dfc3c34471ca1298bccabf86d0ff4dc \
-    --hash=sha256:bdf1847068c362f16b353163391210269e4f0569a3c166bc6a9f74ccbfc7e839 \
-    --hash=sha256:c11b0746f5d946fecf750428a95f3e9ebe792c1ee3b1e96eeba145dc631a9672 \
-    --hash=sha256:c5374b80521d3d3f2ec5572e05adc94601985cc526fb276d0c8574a6d749f1b3 \
-    --hash=sha256:ca265542ca427bf97aed183c1676e2a9c66942e822b14dc6e5f42e038f92a503 \
-    --hash=sha256:ce31ae3e19f3c902de379cf1323d90c649425b86de7bbdf82871b8a2a0615f3d \
-    --hash=sha256:ceb6ec0a10c65540421e20ebd29083c50e6d1143278746a4ef6bcf6153171eb8 \
-    --hash=sha256:d081f13b095d74b67d550de04df1c756831f3b83dc9881c38985834387487f1b \
-    --hash=sha256:d5655a942f5f5d2c9ed93d72148226d75369b4f6952680211972a33e59b1dfdc \
-    --hash=sha256:d5a32087d720c608f42caed0ef36d2b3ea61a9d09ee59a5142d6070da9041b8f \
-    --hash=sha256:d6484756b12f40003c6128bfcc3fa9f0d49a687e171186c2d85ec82e3758c559 \
-    --hash=sha256:dd65632acaf0d47608190a71bfe46b209719bf2beb59507db08ccdbe712f969b \
-    --hash=sha256:de343e75f40e972bae1ef6090267f8260c1446a1695e77096db6cfa25e759a95 \
-    --hash=sha256:e29cda763f752553fa14c68fb2195150bfab22b352572cb36c43c47bedba70eb \
-    --hash=sha256:e41f3de4df3e80de75845d3e743b3f1c4c8613c3997a912dbf0229fc61a8b963 \
-    --hash=sha256:e66d2a64d44d50d2543405fb183a21f76b3b5fd16f130f5c99187c3fb4e64919 \
-    --hash=sha256:e74b0506fa5aa5598ac6a975a12aa8928cbb58e1f5ac8360792ef15de1aa848f \
-    --hash=sha256:f0ed05f5079c708fe74bf9027e95125334b6978bf07fd5ab923e9e55e5fbb9d3 \
-    --hash=sha256:f61e2dc5ad442c52b4887f1fdc112f97caeff4d9e6ebe78879364ac59f1663e1 \
-    --hash=sha256:fec520865f42e5c7f050c2a79038897b1c7d1595e907a9e08e3353293ffc948e
+frozenlist==1.4.1 \
+    --hash=sha256:04ced3e6a46b4cfffe20f9ae482818e34eba9b5fb0ce4056e4cc9b6e212d09b7 \
+    --hash=sha256:0633c8d5337cb5c77acbccc6357ac49a1770b8c487e5b3505c57b949b4b82e98 \
+    --hash=sha256:068b63f23b17df8569b7fdca5517edef76171cf3897eb68beb01341131fbd2ad \
+    --hash=sha256:0c250a29735d4f15321007fb02865f0e6b6a41a6b88f1f523ca1596ab5f50bd5 \
+    --hash=sha256:1979bc0aeb89b33b588c51c54ab0161791149f2461ea7c7c946d95d5f93b56ae \
+    --hash=sha256:1a4471094e146b6790f61b98616ab8e44f72661879cc63fa1049d13ef711e71e \
+    --hash=sha256:1b280e6507ea8a4fa0c0a7150b4e526a8d113989e28eaaef946cc77ffd7efc0a \
+    --hash=sha256:1d0ce09d36d53bbbe566fe296965b23b961764c0bcf3ce2fa45f463745c04701 \
+    --hash=sha256:20b51fa3f588ff2fe658663db52a41a4f7aa6c04f6201449c6c7c476bd255c0d \
+    --hash=sha256:23b2d7679b73fe0e5a4560b672a39f98dfc6f60df63823b0a9970525325b95f6 \
+    --hash=sha256:23b701e65c7b36e4bf15546a89279bd4d8675faabc287d06bbcfac7d3c33e1e6 \
+    --hash=sha256:2471c201b70d58a0f0c1f91261542a03d9a5e088ed3dc6c160d614c01649c106 \
+    --hash=sha256:27657df69e8801be6c3638054e202a135c7f299267f1a55ed3a598934f6c0d75 \
+    --hash=sha256:29acab3f66f0f24674b7dc4736477bcd4bc3ad4b896f5f45379a67bce8b96868 \
+    --hash=sha256:32453c1de775c889eb4e22f1197fe3bdfe457d16476ea407472b9442e6295f7a \
+    --hash=sha256:3a670dc61eb0d0eb7080890c13de3066790f9049b47b0de04007090807c776b0 \
+    --hash=sha256:3e0153a805a98f5ada7e09826255ba99fb4f7524bb81bf6b47fb702666484ae1 \
+    --hash=sha256:410478a0c562d1a5bcc2f7ea448359fcb050ed48b3c6f6f4f18c313a9bdb1826 \
+    --hash=sha256:442acde1e068288a4ba7acfe05f5f343e19fac87bfc96d89eb886b0363e977ec \
+    --hash=sha256:48f6a4533887e189dae092f1cf981f2e3885175f7a0f33c91fb5b7b682b6bab6 \
+    --hash=sha256:4f57dab5fe3407b6c0c1cc907ac98e8a189f9e418f3b6e54d65a718aaafe3950 \
+    --hash=sha256:4f9c515e7914626b2a2e1e311794b4c35720a0be87af52b79ff8e1429fc25f19 \
+    --hash=sha256:55fdc093b5a3cb41d420884cdaf37a1e74c3c37a31f46e66286d9145d2063bd0 \
+    --hash=sha256:5667ed53d68d91920defdf4035d1cdaa3c3121dc0b113255124bcfada1cfa1b8 \
+    --hash=sha256:590344787a90ae57d62511dd7c736ed56b428f04cd8c161fcc5e7232c130c69a \
+    --hash=sha256:5a7d70357e7cee13f470c7883a063aae5fe209a493c57d86eb7f5a6f910fae09 \
+    --hash=sha256:5c3894db91f5a489fc8fa6a9991820f368f0b3cbdb9cd8849547ccfab3392d86 \
+    --hash=sha256:5c849d495bf5154cd8da18a9eb15db127d4dba2968d88831aff6f0331ea9bd4c \
+    --hash=sha256:64536573d0a2cb6e625cf309984e2d873979709f2cf22839bf2d61790b448ad5 \
+    --hash=sha256:693945278a31f2086d9bf3df0fe8254bbeaef1fe71e1351c3bd730aa7d31c41b \
+    --hash=sha256:6db4667b187a6742b33afbbaf05a7bc551ffcf1ced0000a571aedbb4aa42fc7b \
+    --hash=sha256:6eb73fa5426ea69ee0e012fb59cdc76a15b1283d6e32e4f8dc4482ec67d1194d \
+    --hash=sha256:722e1124aec435320ae01ee3ac7bec11a5d47f25d0ed6328f2273d287bc3abb0 \
+    --hash=sha256:7268252af60904bf52c26173cbadc3a071cece75f873705419c8681f24d3edea \
+    --hash=sha256:74fb4bee6880b529a0c6560885fce4dc95936920f9f20f53d99a213f7bf66776 \
+    --hash=sha256:780d3a35680ced9ce682fbcf4cb9c2bad3136eeff760ab33707b71db84664e3a \
+    --hash=sha256:82e8211d69a4f4bc360ea22cd6555f8e61a1bd211d1d5d39d3d228b48c83a897 \
+    --hash=sha256:89aa2c2eeb20957be2d950b85974b30a01a762f3308cd02bb15e1ad632e22dc7 \
+    --hash=sha256:8aefbba5f69d42246543407ed2461db31006b0f76c4e32dfd6f42215a2c41d09 \
+    --hash=sha256:96ec70beabbd3b10e8bfe52616a13561e58fe84c0101dd031dc78f250d5128b9 \
+    --hash=sha256:9750cc7fe1ae3b1611bb8cfc3f9ec11d532244235d75901fb6b8e42ce9229dfe \
+    --hash=sha256:9acbb16f06fe7f52f441bb6f413ebae6c37baa6ef9edd49cdd567216da8600cd \
+    --hash=sha256:9d3e0c25a2350080e9319724dede4f31f43a6c9779be48021a7f4ebde8b2d742 \
+    --hash=sha256:a06339f38e9ed3a64e4c4e43aec7f59084033647f908e4259d279a52d3757d09 \
+    --hash=sha256:a0cb6f11204443f27a1628b0e460f37fb30f624be6051d490fa7d7e26d4af3d0 \
+    --hash=sha256:a7496bfe1da7fb1a4e1cc23bb67c58fab69311cc7d32b5a99c2007b4b2a0e932 \
+    --hash=sha256:a828c57f00f729620a442881cc60e57cfcec6842ba38e1b19fd3e47ac0ff8dc1 \
+    --hash=sha256:a9b2de4cf0cdd5bd2dee4c4f63a653c61d2408055ab77b151c1957f221cabf2a \
+    --hash=sha256:b46c8ae3a8f1f41a0d2ef350c0b6e65822d80772fe46b653ab6b6274f61d4a49 \
+    --hash=sha256:b7e3ed87d4138356775346e6845cccbe66cd9e207f3cd11d2f0b9fd13681359d \
+    --hash=sha256:b7f2f9f912dca3934c1baec2e4585a674ef16fe00218d833856408c48d5beee7 \
+    --hash=sha256:ba60bb19387e13597fb059f32cd4d59445d7b18b69a745b8f8e5db0346f33480 \
+    --hash=sha256:beee944ae828747fd7cb216a70f120767fc9f4f00bacae8543c14a6831673f89 \
+    --hash=sha256:bfa4a17e17ce9abf47a74ae02f32d014c5e9404b6d9ac7f729e01562bbee601e \
+    --hash=sha256:c037a86e8513059a2613aaba4d817bb90b9d9b6b69aace3ce9c877e8c8ed402b \
+    --hash=sha256:c302220494f5c1ebeb0912ea782bcd5e2f8308037b3c7553fad0e48ebad6ad82 \
+    --hash=sha256:c6321c9efe29975232da3bd0af0ad216800a47e93d763ce64f291917a381b8eb \
+    --hash=sha256:c757a9dd70d72b076d6f68efdbb9bc943665ae954dad2801b874c8c69e185068 \
+    --hash=sha256:c99169d4ff810155ca50b4da3b075cbde79752443117d89429595c2e8e37fed8 \
+    --hash=sha256:c9c92be9fd329ac801cc420e08452b70e7aeab94ea4233a4804f0915c14eba9b \
+    --hash=sha256:cc7b01b3754ea68a62bd77ce6020afaffb44a590c2289089289363472d13aedb \
+    --hash=sha256:db9e724bebd621d9beca794f2a4ff1d26eed5965b004a97f1f1685a173b869c2 \
+    --hash=sha256:dca69045298ce5c11fd539682cff879cc1e664c245d1c64da929813e54241d11 \
+    --hash=sha256:dd9b1baec094d91bf36ec729445f7769d0d0cf6b64d04d86e45baf89e2b9059b \
+    --hash=sha256:e02a0e11cf6597299b9f3bbd3f93d79217cb90cfd1411aec33848b13f5c656cc \
+    --hash=sha256:e6a20a581f9ce92d389a8c7d7c3dd47c81fd5d6e655c8dddf341e14aa48659d0 \
+    --hash=sha256:e7004be74cbb7d9f34553a5ce5fb08be14fb33bc86f332fb71cbe5216362a497 \
+    --hash=sha256:e774d53b1a477a67838a904131c4b0eef6b3d8a651f8b138b04f748fccfefe17 \
+    --hash=sha256:edb678da49d9f72c9f6c609fbe41a5dfb9a9282f9e6a2253d5a91e0fc382d7c0 \
+    --hash=sha256:f146e0911cb2f1da549fc58fc7bcd2b836a44b79ef871980d605ec392ff6b0d2 \
+    --hash=sha256:f56e2333dda1fe0f909e7cc59f021eba0d2307bc6f012a1ccf2beca6ba362439 \
+    --hash=sha256:f9a3ea26252bd92f570600098783d1371354d89d5f6b7dfd87359d669f2109b5 \
+    --hash=sha256:f9aa1878d1083b276b0196f2dfbe00c9b7e752475ed3b682025ff20c1c1f51ac \
+    --hash=sha256:fb3c2db03683b5767dedb5769b8a40ebb47d6f7f45b1b3e3b4b51ec8ad9d9825 \
+    --hash=sha256:fbeb989b5cc29e8daf7f976b421c220f1b8c731cbf22b9130d8815418ea45887 \
+    --hash=sha256:fde5bd59ab5357e3853313127f4d3565fc7dad314a74d7b5d43c22c6a5ed2ced \
+    --hash=sha256:fe1a06da377e3a1062ae5fe0926e12b84eceb8a50b350ddca72dc85015873f74
     # via
     #   -c release/ray_release/byod/requirements_compiled.txt
     #   aiohttp
@@ -799,8 +819,9 @@ fsspec==2023.5.0 \
     #   gcsfs
     #   petastorm
     #   s3fs
-future==0.18.3 \
-    --hash=sha256:34a17436ed1e96697a86f9de3d15a3b0be01d8bc8de9c1dffd59fb8234ed5307
+future==1.0.0 \
+    --hash=sha256:929292d34f5872e70396626ef385ec22355a1fae8ad29e1a734c3e43f9fbc216 \
+    --hash=sha256:bd2968309307861edae1458a4f8a4f3598c03be43b97521076aebf5d94c07b05
     # via
     #   -c release/ray_release/byod/requirements_compiled.txt
     #   petastorm
@@ -1178,61 +1199,62 @@ greenlet==3.0.1 \
     # via
     #   -c release/ray_release/byod/requirements_compiled.txt
     #   gevent
-grpcio==1.60.0 ; sys_platform != "darwin" \
-    --hash=sha256:073f959c6f570797272f4ee9464a9997eaf1e98c27cb680225b82b53390d61e6 \
-    --hash=sha256:0fd3b3968ffe7643144580f260f04d39d869fcc2cddb745deef078b09fd2b328 \
-    --hash=sha256:1434ca77d6fed4ea312901122dc8da6c4389738bf5788f43efb19a838ac03ead \
-    --hash=sha256:1c30bb23a41df95109db130a6cc1b974844300ae2e5d68dd4947aacba5985aa5 \
-    --hash=sha256:20e7a4f7ded59097c84059d28230907cd97130fa74f4a8bfd1d8e5ba18c81491 \
-    --hash=sha256:2199165a1affb666aa24adf0c97436686d0a61bc5fc113c037701fb7c7fceb96 \
-    --hash=sha256:297eef542156d6b15174a1231c2493ea9ea54af8d016b8ca7d5d9cc65cfcc444 \
-    --hash=sha256:2aef56e85901c2397bd557c5ba514f84de1f0ae5dd132f5d5fed042858115951 \
-    --hash=sha256:30943b9530fe3620e3b195c03130396cd0ee3a0d10a66c1bee715d1819001eaf \
-    --hash=sha256:3b36a2c6d4920ba88fa98075fdd58ff94ebeb8acc1215ae07d01a418af4c0253 \
-    --hash=sha256:428d699c8553c27e98f4d29fdc0f0edc50e9a8a7590bfd294d2edb0da7be3629 \
-    --hash=sha256:43e636dc2ce9ece583b3e2ca41df5c983f4302eabc6d5f9cd04f0562ee8ec1ae \
-    --hash=sha256:452ca5b4afed30e7274445dd9b441a35ece656ec1600b77fff8c216fdf07df43 \
-    --hash=sha256:467a7d31554892eed2aa6c2d47ded1079fc40ea0b9601d9f79204afa8902274b \
-    --hash=sha256:4b44d7e39964e808b071714666a812049765b26b3ea48c4434a3b317bac82f14 \
-    --hash=sha256:4c86343cf9ff7b2514dd229bdd88ebba760bd8973dac192ae687ff75e39ebfab \
-    --hash=sha256:5208a57eae445ae84a219dfd8b56e04313445d146873117b5fa75f3245bc1390 \
-    --hash=sha256:5ff21e000ff2f658430bde5288cb1ac440ff15c0d7d18b5fb222f941b46cb0d2 \
-    --hash=sha256:675997222f2e2f22928fbba640824aebd43791116034f62006e19730715166c0 \
-    --hash=sha256:676e4a44e740deaba0f4d95ba1d8c5c89a2fcc43d02c39f69450b1fa19d39590 \
-    --hash=sha256:6e306b97966369b889985a562ede9d99180def39ad42c8014628dd3cc343f508 \
-    --hash=sha256:6fd9584bf1bccdfff1512719316efa77be235469e1e3295dce64538c4773840b \
-    --hash=sha256:705a68a973c4c76db5d369ed573fec3367d7d196673fa86614b33d8c8e9ebb08 \
-    --hash=sha256:74d7d9fa97809c5b892449b28a65ec2bfa458a4735ddad46074f9f7d9550ad13 \
-    --hash=sha256:77c8a317f0fd5a0a2be8ed5cbe5341537d5c00bb79b3bb27ba7c5378ba77dbca \
-    --hash=sha256:79a050889eb8d57a93ed21d9585bb63fca881666fc709f5d9f7f9372f5e7fd03 \
-    --hash=sha256:7db16dd4ea1b05ada504f08d0dca1cd9b926bed3770f50e715d087c6f00ad748 \
-    --hash=sha256:83f2292ae292ed5a47cdcb9821039ca8e88902923198f2193f13959360c01860 \
-    --hash=sha256:87c9224acba0ad8bacddf427a1c2772e17ce50b3042a789547af27099c5f751d \
-    --hash=sha256:8a97a681e82bc11a42d4372fe57898d270a2707f36c45c6676e49ce0d5c41353 \
-    --hash=sha256:9073513ec380434eb8d21970e1ab3161041de121f4018bbed3146839451a6d8e \
-    --hash=sha256:90bdd76b3f04bdb21de5398b8a7c629676c81dfac290f5f19883857e9371d28c \
-    --hash=sha256:91229d7203f1ef0ab420c9b53fe2ca5c1fbeb34f69b3bc1b5089466237a4a134 \
-    --hash=sha256:92f88ca1b956eb8427a11bb8b4a0c0b2b03377235fc5102cb05e533b8693a415 \
-    --hash=sha256:95ae3e8e2c1b9bf671817f86f155c5da7d49a2289c5cf27a319458c3e025c320 \
-    --hash=sha256:9e30be89a75ee66aec7f9e60086fadb37ff8c0ba49a022887c28c134341f7179 \
-    --hash=sha256:a48edde788b99214613e440fce495bbe2b1e142a7f214cce9e0832146c41e324 \
-    --hash=sha256:a7152fa6e597c20cb97923407cf0934e14224af42c2b8d915f48bc3ad2d9ac18 \
-    --hash=sha256:a9c7b71211f066908e518a2ef7a5e211670761651039f0d6a80d8d40054047df \
-    --hash=sha256:b0571a5aef36ba9177e262dc88a9240c866d903a62799e44fd4aae3f9a2ec17e \
-    --hash=sha256:b0fb2d4801546598ac5cd18e3ec79c1a9af8b8f2a86283c55a5337c5aeca4b1b \
-    --hash=sha256:b10241250cb77657ab315270b064a6c7f1add58af94befa20687e7c8d8603ae6 \
-    --hash=sha256:b87efe4a380887425bb15f220079aa8336276398dc33fce38c64d278164f963d \
-    --hash=sha256:b98f43fcdb16172dec5f4b49f2fece4b16a99fd284d81c6bbac1b3b69fcbe0ff \
-    --hash=sha256:c193109ca4070cdcaa6eff00fdb5a56233dc7610216d58fb81638f89f02e4968 \
-    --hash=sha256:c826f93050c73e7769806f92e601e0efdb83ec8d7c76ddf45d514fee54e8e619 \
-    --hash=sha256:d020cfa595d1f8f5c6b343530cd3ca16ae5aefdd1e832b777f9f0eb105f5b139 \
-    --hash=sha256:d6a478581b1a1a8fdf3318ecb5f4d0cda41cacdffe2b527c23707c9c1b8fdb55 \
-    --hash=sha256:de2ad69c9a094bf37c1102b5744c9aec6cf74d2b635558b779085d0263166454 \
-    --hash=sha256:e278eafb406f7e1b1b637c2cf51d3ad45883bb5bd1ca56bc05e4fc135dfdaa65 \
-    --hash=sha256:e381fe0c2aa6c03b056ad8f52f8efca7be29fb4d9ae2f8873520843b6039612a \
-    --hash=sha256:e61e76020e0c332a98290323ecfec721c9544f5b739fab925b6e8cbe1944cf19 \
-    --hash=sha256:f897c3b127532e6befdcf961c415c97f320d45614daf84deba0a54e64ea2457b \
-    --hash=sha256:fb464479934778d7cc5baf463d959d361954d6533ad34c3a4f1d267e86ee25fd
+grpcio==1.66.2 ; sys_platform != "darwin" \
+    --hash=sha256:02697eb4a5cbe5a9639f57323b4c37bcb3ab2d48cec5da3dc2f13334d72790dd \
+    --hash=sha256:03b0b307ba26fae695e067b94cbb014e27390f8bc5ac7a3a39b7723fed085604 \
+    --hash=sha256:05bc2ceadc2529ab0b227b1310d249d95d9001cd106aa4d31e8871ad3c428d73 \
+    --hash=sha256:06de8ec0bd71be123eec15b0e0d457474931c2c407869b6c349bd9bed4adbac3 \
+    --hash=sha256:0be4e0490c28da5377283861bed2941d1d20ec017ca397a5df4394d1c31a9b50 \
+    --hash=sha256:12fda97ffae55e6526825daf25ad0fa37483685952b5d0f910d6405c87e3adb6 \
+    --hash=sha256:1caa38fb22a8578ab8393da99d4b8641e3a80abc8fd52646f1ecc92bcb8dee34 \
+    --hash=sha256:2018b053aa15782db2541ca01a7edb56a0bf18c77efed975392583725974b249 \
+    --hash=sha256:20657d6b8cfed7db5e11b62ff7dfe2e12064ea78e93f1434d61888834bc86d75 \
+    --hash=sha256:2335c58560a9e92ac58ff2bc5649952f9b37d0735608242973c7a8b94a6437d8 \
+    --hash=sha256:31fd163105464797a72d901a06472860845ac157389e10f12631025b3e4d0453 \
+    --hash=sha256:38b68498ff579a3b1ee8f93a05eb48dc2595795f2f62716e797dc24774c1aaa8 \
+    --hash=sha256:3b00efc473b20d8bf83e0e1ae661b98951ca56111feb9b9611df8efc4fe5d55d \
+    --hash=sha256:3ed71e81782966ffead60268bbda31ea3f725ebf8aa73634d5dda44f2cf3fb9c \
+    --hash=sha256:45a3d462826f4868b442a6b8fdbe8b87b45eb4f5b5308168c156b21eca43f61c \
+    --hash=sha256:49f0ca7ae850f59f828a723a9064cadbed90f1ece179d375966546499b8a2c9c \
+    --hash=sha256:4e504572433f4e72b12394977679161d495c4c9581ba34a88d843eaf0f2fbd39 \
+    --hash=sha256:4ea1d062c9230278793820146c95d038dc0f468cbdd172eec3363e42ff1c7d01 \
+    --hash=sha256:563588c587b75c34b928bc428548e5b00ea38c46972181a4d8b75ba7e3f24231 \
+    --hash=sha256:6001e575b8bbd89eee11960bb640b6da6ae110cf08113a075f1e2051cc596cae \
+    --hash=sha256:66a0cd8ba6512b401d7ed46bb03f4ee455839957f28b8d61e7708056a806ba6a \
+    --hash=sha256:6851de821249340bdb100df5eacfecfc4e6075fa85c6df7ee0eb213170ec8e5d \
+    --hash=sha256:728bdf36a186e7f51da73be7f8d09457a03061be848718d0edf000e709418987 \
+    --hash=sha256:73e3b425c1e155730273f73e419de3074aa5c5e936771ee0e4af0814631fb30a \
+    --hash=sha256:73fc8f8b9b5c4a03e802b3cd0c18b2b06b410d3c1dcbef989fdeb943bd44aff7 \
+    --hash=sha256:78fa51ebc2d9242c0fc5db0feecc57a9943303b46664ad89921f5079e2e4ada7 \
+    --hash=sha256:7b2c86457145ce14c38e5bf6bdc19ef88e66c5fee2c3d83285c5aef026ba93b3 \
+    --hash=sha256:7d69ce1f324dc2d71e40c9261d3fdbe7d4c9d60f332069ff9b2a4d8a257c7b2b \
+    --hash=sha256:802d84fd3d50614170649853d121baaaa305de7b65b3e01759247e768d691ddf \
+    --hash=sha256:80fd702ba7e432994df208f27514280b4b5c6843e12a48759c9255679ad38db8 \
+    --hash=sha256:8ac475e8da31484efa25abb774674d837b343afb78bb3bcdef10f81a93e3d6bf \
+    --hash=sha256:950da58d7d80abd0ea68757769c9db0a95b31163e53e5bb60438d263f4bed7b7 \
+    --hash=sha256:99a641995a6bc4287a6315989ee591ff58507aa1cbe4c2e70d88411c4dcc0839 \
+    --hash=sha256:9c3a99c519f4638e700e9e3f83952e27e2ea10873eecd7935823dab0c1c9250e \
+    --hash=sha256:9c509a4f78114cbc5f0740eb3d7a74985fd2eff022971bc9bc31f8bc93e66a3b \
+    --hash=sha256:a18e20d8321c6400185b4263e27982488cb5cdd62da69147087a76a24ef4e7e3 \
+    --hash=sha256:a917d26e0fe980b0ac7bfcc1a3c4ad6a9a4612c911d33efb55ed7833c749b0ee \
+    --hash=sha256:a9539f01cb04950fd4b5ab458e64a15f84c2acc273670072abe49a3f29bbad54 \
+    --hash=sha256:ad2efdbe90c73b0434cbe64ed372e12414ad03c06262279b104a029d1889d13e \
+    --hash=sha256:b672abf90a964bfde2d0ecbce30f2329a47498ba75ce6f4da35a2f4532b7acbc \
+    --hash=sha256:bbd27c24a4cc5e195a7f56cfd9312e366d5d61b86e36d46bbe538457ea6eb8dd \
+    --hash=sha256:c400ba5675b67025c8a9f48aa846f12a39cf0c44df5cd060e23fda5b30e9359d \
+    --hash=sha256:c408f5ef75cfffa113cacd8b0c0e3611cbfd47701ca3cdc090594109b9fcbaed \
+    --hash=sha256:c806852deaedee9ce8280fe98955c9103f62912a5b2d5ee7e3eaa284a6d8d8e7 \
+    --hash=sha256:ce89f5876662f146d4c1f695dda29d4433a5d01c8681fbd2539afff535da14d4 \
+    --hash=sha256:d25a14af966438cddf498b2e338f88d1c9706f3493b1d73b93f695c99c5f0e2a \
+    --hash=sha256:d8d4732cc5052e92cea2f78b233c2e2a52998ac40cd651f40e398893ad0d06ec \
+    --hash=sha256:d9a9724a156c8ec6a379869b23ba3323b7ea3600851c91489b871e375f710bc8 \
+    --hash=sha256:e636ce23273683b00410f1971d209bf3689238cf5538d960adc3cdfe80dd0dbd \
+    --hash=sha256:e88264caad6d8d00e7913996030bac8ad5f26b7411495848cc218bd3a9040b6c \
+    --hash=sha256:f145cc21836c332c67baa6fc81099d1d27e266401565bf481948010d6ea32d46 \
+    --hash=sha256:fb57870449dfcfac428afbb5a877829fcb0d6db9d9baa1148705739e9083880e \
+    --hash=sha256:fb70487c95786e345af5e854ffec8cb8cc781bcc5df7930c4fbb7feaa72e1cdf \
+    --hash=sha256:fe96281713168a3270878255983d2cb1a97e034325c8c2c25169a69289d3ecfa \
+    --hash=sha256:ff1f7882e56c40b0d33c4922c15dfa30612f05fb785074a012f7cda74d1c3679
     # via
     #   -c release/ray_release/byod/requirements_compiled.txt
     #   tensorboard
@@ -1242,17 +1264,12 @@ gsutil==5.27 \
     # via
     #   -c release/ray_release/byod/requirements_compiled.txt
     #   -r release/ray_release/byod/requirements_byod_3.9.in
-gym[atari]==0.26.2 \
-    --hash=sha256:e0d882f4b54f0c65f203104c24ab8a38b039f1289986803c7d02cdbe214fbcc4
+gymnasium==1.0.0 \
+    --hash=sha256:9d2b66f30c1b34fe3c2ce7fae65ecf365d0e9982d2b3d860235e773328a3b403 \
+    --hash=sha256:b6f40e1e24c5bd419361e1a5b86a9117d2499baecc3a660d44dfff4c465393ad
     # via
     #   -c release/ray_release/byod/requirements_compiled.txt
     #   -r release/ray_release/byod/requirements_byod_3.9.in
-gym-notices==0.0.8 \
-    --hash=sha256:ad25e200487cafa369728625fe064e88ada1346618526102659b4640f2b4b911 \
-    --hash=sha256:e5f82e00823a166747b4c2a07de63b6560b1acb880638547e0cabf825a01e463
-    # via
-    #   -c release/ray_release/byod/requirements_compiled.txt
-    #   gym
 h5py==3.10.0 \
     --hash=sha256:012ab448590e3c4f5a8dd0f3533255bc57f80629bf7c5054cf4c87b30085063c \
     --hash=sha256:212bb997a91e6a895ce5e2f365ba764debeaef5d2dca5c6fb7098d66607adf99 \
@@ -1307,14 +1324,8 @@ importlib-metadata==6.11.0 \
     #   -r release/ray_release/byod/requirements_byod_3.9.in
     #   ale-py
     #   flask
-    #   gym
+    #   gymnasium
     #   markdown
-importlib-resources==5.13.0 \
-    --hash=sha256:82d5c6cca930697dbbd86c93333bb2c2e72861d4789a11c2662b933e5ad2b528 \
-    --hash=sha256:9f7bd0c97b79972a6cce36a366356d16d5e13b09679c11a58f1014bfdf8e64b2
-    # via
-    #   -c release/ray_release/byod/requirements_compiled.txt
-    #   ale-py
 iniconfig==2.0.0 \
     --hash=sha256:2d91e135bf72d31a410b17c16da610a82cb55f6b0477d1a902134b24a455b8b3 \
     --hash=sha256:b6a85871a79d2e3b22d2d1b94ac2824226a63c6b741c88f7ae975f18b6778374
@@ -1403,6 +1414,12 @@ markdown==3.5.1 \
     # via
     #   -c release/ray_release/byod/requirements_compiled.txt
     #   tensorboard
+markdown-it-py==2.2.0 \
+    --hash=sha256:5a35f8d1870171d9acc47b99612dc146129b631baf04970128b568f190d0cc30 \
+    --hash=sha256:7c9a5e412688bc771c67432cbfebcdd686c93ce6484913dccf06cb5a0bea35a1
+    # via
+    #   -c release/ray_release/byod/requirements_compiled.txt
+    #   rich
 markupsafe==2.1.3 \
     --hash=sha256:05fb21170423db021895e1ea1e1f3ab3adb85d1c2333cbc2310f2a26bc77272e \
     --hash=sha256:0a4e4a1aff6c7ac4cd55792abf96c915634c2b97e3cc1c7129578aa68ebd754e \
@@ -1468,6 +1485,12 @@ markupsafe==2.1.3 \
     #   -c release/ray_release/byod/requirements_compiled.txt
     #   jinja2
     #   werkzeug
+mdurl==0.1.2 \
+    --hash=sha256:84008a41e51615a49fc9966191ff91509e3c40b939176e643fd50a5c2196b8f8 \
+    --hash=sha256:bb413d29f5eea38f31dd4754dd7377d4465116fb207585f97bf925588687c1ba
+    # via
+    #   -c release/ray_release/byod/requirements_compiled.txt
+    #   markdown-it-py
 memray==1.10.0 ; platform_system != "Windows" and sys_platform != "darwin" and platform_machine != "aarch64" \
     --hash=sha256:0a21745fb516b7a6efcd40aa7487c59e9313fcfc782d0193fcfcf00b48426874 \
     --hash=sha256:22f2a47871c172a0539bd72737bb6b294fc10c510464066b825d90fcd3bb4916 \
@@ -1595,81 +1618,97 @@ msgpack==1.0.7 \
     #   -c release/ray_release/byod/requirements_compiled.txt
     #   distributed
     #   locust
-multidict==6.0.4 \
-    --hash=sha256:01a3a55bd90018c9c080fbb0b9f4891db37d148a0a18722b42f94694f8b6d4c9 \
-    --hash=sha256:0b1a97283e0c85772d613878028fec909f003993e1007eafa715b24b377cb9b8 \
-    --hash=sha256:0dfad7a5a1e39c53ed00d2dd0c2e36aed4650936dc18fd9a1826a5ae1cad6f03 \
-    --hash=sha256:11bdf3f5e1518b24530b8241529d2050014c884cf18b6fc69c0c2b30ca248710 \
-    --hash=sha256:1502e24330eb681bdaa3eb70d6358e818e8e8f908a22a1851dfd4e15bc2f8161 \
-    --hash=sha256:16ab77bbeb596e14212e7bab8429f24c1579234a3a462105cda4a66904998664 \
-    --hash=sha256:16d232d4e5396c2efbbf4f6d4df89bfa905eb0d4dc5b3549d872ab898451f569 \
-    --hash=sha256:21a12c4eb6ddc9952c415f24eef97e3e55ba3af61f67c7bc388dcdec1404a067 \
-    --hash=sha256:27c523fbfbdfd19c6867af7346332b62b586eed663887392cff78d614f9ec313 \
-    --hash=sha256:281af09f488903fde97923c7744bb001a9b23b039a909460d0f14edc7bf59706 \
-    --hash=sha256:33029f5734336aa0d4c0384525da0387ef89148dc7191aae00ca5fb23d7aafc2 \
-    --hash=sha256:3601a3cece3819534b11d4efc1eb76047488fddd0c85a3948099d5da4d504636 \
-    --hash=sha256:3666906492efb76453c0e7b97f2cf459b0682e7402c0489a95484965dbc1da49 \
-    --hash=sha256:36c63aaa167f6c6b04ef2c85704e93af16c11d20de1d133e39de6a0e84582a93 \
-    --hash=sha256:39ff62e7d0f26c248b15e364517a72932a611a9b75f35b45be078d81bdb86603 \
-    --hash=sha256:43644e38f42e3af682690876cff722d301ac585c5b9e1eacc013b7a3f7b696a0 \
-    --hash=sha256:4372381634485bec7e46718edc71528024fcdc6f835baefe517b34a33c731d60 \
-    --hash=sha256:458f37be2d9e4c95e2d8866a851663cbc76e865b78395090786f6cd9b3bbf4f4 \
-    --hash=sha256:45e1ecb0379bfaab5eef059f50115b54571acfbe422a14f668fc8c27ba410e7e \
-    --hash=sha256:4b9d9e4e2b37daddb5c23ea33a3417901fa7c7b3dee2d855f63ee67a0b21e5b1 \
-    --hash=sha256:4ceef517eca3e03c1cceb22030a3e39cb399ac86bff4e426d4fc6ae49052cc60 \
-    --hash=sha256:4d1a3d7ef5e96b1c9e92f973e43aa5e5b96c659c9bc3124acbbd81b0b9c8a951 \
-    --hash=sha256:4dcbb0906e38440fa3e325df2359ac6cb043df8e58c965bb45f4e406ecb162cc \
-    --hash=sha256:509eac6cf09c794aa27bcacfd4d62c885cce62bef7b2c3e8b2e49d365b5003fe \
-    --hash=sha256:52509b5be062d9eafc8170e53026fbc54cf3b32759a23d07fd935fb04fc22d95 \
-    --hash=sha256:52f2dffc8acaba9a2f27174c41c9e57f60b907bb9f096b36b1a1f3be71c6284d \
-    --hash=sha256:574b7eae1ab267e5f8285f0fe881f17efe4b98c39a40858247720935b893bba8 \
-    --hash=sha256:5979b5632c3e3534e42ca6ff856bb24b2e3071b37861c2c727ce220d80eee9ed \
-    --hash=sha256:59d43b61c59d82f2effb39a93c48b845efe23a3852d201ed2d24ba830d0b4cf2 \
-    --hash=sha256:5a4dcf02b908c3b8b17a45fb0f15b695bf117a67b76b7ad18b73cf8e92608775 \
-    --hash=sha256:5cad9430ab3e2e4fa4a2ef4450f548768400a2ac635841bc2a56a2052cdbeb87 \
-    --hash=sha256:5fc1b16f586f049820c5c5b17bb4ee7583092fa0d1c4e28b5239181ff9532e0c \
-    --hash=sha256:62501642008a8b9871ddfccbf83e4222cf8ac0d5aeedf73da36153ef2ec222d2 \
-    --hash=sha256:64bdf1086b6043bf519869678f5f2757f473dee970d7abf6da91ec00acb9cb98 \
-    --hash=sha256:64da238a09d6039e3bd39bb3aee9c21a5e34f28bfa5aa22518581f910ff94af3 \
-    --hash=sha256:666daae833559deb2d609afa4490b85830ab0dfca811a98b70a205621a6109fe \
-    --hash=sha256:67040058f37a2a51ed8ea8f6b0e6ee5bd78ca67f169ce6122f3e2ec80dfe9b78 \
-    --hash=sha256:6748717bb10339c4760c1e63da040f5f29f5ed6e59d76daee30305894069a660 \
-    --hash=sha256:6b181d8c23da913d4ff585afd1155a0e1194c0b50c54fcfe286f70cdaf2b7176 \
-    --hash=sha256:6ed5f161328b7df384d71b07317f4d8656434e34591f20552c7bcef27b0ab88e \
-    --hash=sha256:7582a1d1030e15422262de9f58711774e02fa80df0d1578995c76214f6954988 \
-    --hash=sha256:7d18748f2d30f94f498e852c67d61261c643b349b9d2a581131725595c45ec6c \
-    --hash=sha256:7d6ae9d593ef8641544d6263c7fa6408cc90370c8cb2bbb65f8d43e5b0351d9c \
-    --hash=sha256:81a4f0b34bd92df3da93315c6a59034df95866014ac08535fc819f043bfd51f0 \
-    --hash=sha256:8316a77808c501004802f9beebde51c9f857054a0c871bd6da8280e718444449 \
-    --hash=sha256:853888594621e6604c978ce2a0444a1e6e70c8d253ab65ba11657659dcc9100f \
-    --hash=sha256:99b76c052e9f1bc0721f7541e5e8c05db3941eb9ebe7b8553c625ef88d6eefde \
-    --hash=sha256:a2e4369eb3d47d2034032a26c7a80fcb21a2cb22e1173d761a162f11e562caa5 \
-    --hash=sha256:ab55edc2e84460694295f401215f4a58597f8f7c9466faec545093045476327d \
-    --hash=sha256:af048912e045a2dc732847d33821a9d84ba553f5c5f028adbd364dd4765092ac \
-    --hash=sha256:b1a2eeedcead3a41694130495593a559a668f382eee0727352b9a41e1c45759a \
-    --hash=sha256:b1e8b901e607795ec06c9e42530788c45ac21ef3aaa11dbd0c69de543bfb79a9 \
-    --hash=sha256:b41156839806aecb3641f3208c0dafd3ac7775b9c4c422d82ee2a45c34ba81ca \
-    --hash=sha256:b692f419760c0e65d060959df05f2a531945af31fda0c8a3b3195d4efd06de11 \
-    --hash=sha256:bc779e9e6f7fda81b3f9aa58e3a6091d49ad528b11ed19f6621408806204ad35 \
-    --hash=sha256:bf6774e60d67a9efe02b3616fee22441d86fab4c6d335f9d2051d19d90a40063 \
-    --hash=sha256:c048099e4c9e9d615545e2001d3d8a4380bd403e1a0578734e0d31703d1b0c0b \
-    --hash=sha256:c5cb09abb18c1ea940fb99360ea0396f34d46566f157122c92dfa069d3e0e982 \
-    --hash=sha256:cc8e1d0c705233c5dd0c5e6460fbad7827d5d36f310a0fadfd45cc3029762258 \
-    --hash=sha256:d5e3fc56f88cc98ef8139255cf8cd63eb2c586531e43310ff859d6bb3a6b51f1 \
-    --hash=sha256:d6aa0418fcc838522256761b3415822626f866758ee0bc6632c9486b179d0b52 \
-    --hash=sha256:d6c254ba6e45d8e72739281ebc46ea5eb5f101234f3ce171f0e9f5cc86991480 \
-    --hash=sha256:d6d635d5209b82a3492508cf5b365f3446afb65ae7ebd755e70e18f287b0adf7 \
-    --hash=sha256:dcfe792765fab89c365123c81046ad4103fcabbc4f56d1c1997e6715e8015461 \
-    --hash=sha256:ddd3915998d93fbcd2566ddf9cf62cdb35c9e093075f862935573d265cf8f65d \
-    --hash=sha256:ddff9c4e225a63a5afab9dd15590432c22e8057e1a9a13d28ed128ecf047bbdc \
-    --hash=sha256:e41b7e2b59679edfa309e8db64fdf22399eec4b0b24694e1b2104fb789207779 \
-    --hash=sha256:e69924bfcdda39b722ef4d9aa762b2dd38e4632b3641b1d9a57ca9cd18f2f83a \
-    --hash=sha256:ea20853c6dbbb53ed34cb4d080382169b6f4554d394015f1bef35e881bf83547 \
-    --hash=sha256:ee2a1ece51b9b9e7752e742cfb661d2a29e7bcdba2d27e66e28a99f1890e4fa0 \
-    --hash=sha256:eeb6dcc05e911516ae3d1f207d4b0520d07f54484c49dfc294d6e7d63b734171 \
-    --hash=sha256:f70b98cd94886b49d91170ef23ec5c0e8ebb6f242d734ed7ed677b24d50c82cf \
-    --hash=sha256:fc35cb4676846ef752816d5be2193a1e8367b4c1397b74a565a9d0389c433a1d \
-    --hash=sha256:ff959bee35038c4624250473988b24f846cbeb2c6639de3602c073f10410ceba
+multidict==6.0.5 \
+    --hash=sha256:01265f5e40f5a17f8241d52656ed27192be03bfa8764d88e8220141d1e4b3556 \
+    --hash=sha256:0275e35209c27a3f7951e1ce7aaf93ce0d163b28948444bec61dd7badc6d3f8c \
+    --hash=sha256:04bde7a7b3de05732a4eb39c94574db1ec99abb56162d6c520ad26f83267de29 \
+    --hash=sha256:04da1bb8c8dbadf2a18a452639771951c662c5ad03aefe4884775454be322c9b \
+    --hash=sha256:09a892e4a9fb47331da06948690ae38eaa2426de97b4ccbfafbdcbe5c8f37ff8 \
+    --hash=sha256:0d63c74e3d7ab26de115c49bffc92cc77ed23395303d496eae515d4204a625e7 \
+    --hash=sha256:107c0cdefe028703fb5dafe640a409cb146d44a6ae201e55b35a4af8e95457dd \
+    --hash=sha256:141b43360bfd3bdd75f15ed811850763555a251e38b2405967f8e25fb43f7d40 \
+    --hash=sha256:14c2976aa9038c2629efa2c148022ed5eb4cb939e15ec7aace7ca932f48f9ba6 \
+    --hash=sha256:19fe01cea168585ba0f678cad6f58133db2aa14eccaf22f88e4a6dccadfad8b3 \
+    --hash=sha256:1d147090048129ce3c453f0292e7697d333db95e52616b3793922945804a433c \
+    --hash=sha256:1d9ea7a7e779d7a3561aade7d596649fbecfa5c08a7674b11b423783217933f9 \
+    --hash=sha256:215ed703caf15f578dca76ee6f6b21b7603791ae090fbf1ef9d865571039ade5 \
+    --hash=sha256:21fd81c4ebdb4f214161be351eb5bcf385426bf023041da2fd9e60681f3cebae \
+    --hash=sha256:220dd781e3f7af2c2c1053da9fa96d9cf3072ca58f057f4c5adaaa1cab8fc442 \
+    --hash=sha256:228b644ae063c10e7f324ab1ab6b548bdf6f8b47f3ec234fef1093bc2735e5f9 \
+    --hash=sha256:29bfeb0dff5cb5fdab2023a7a9947b3b4af63e9c47cae2a10ad58394b517fddc \
+    --hash=sha256:2f4848aa3baa109e6ab81fe2006c77ed4d3cd1e0ac2c1fbddb7b1277c168788c \
+    --hash=sha256:2faa5ae9376faba05f630d7e5e6be05be22913782b927b19d12b8145968a85ea \
+    --hash=sha256:2ffc42c922dbfddb4a4c3b438eb056828719f07608af27d163191cb3e3aa6cc5 \
+    --hash=sha256:37b15024f864916b4951adb95d3a80c9431299080341ab9544ed148091b53f50 \
+    --hash=sha256:3cc2ad10255f903656017363cd59436f2111443a76f996584d1077e43ee51182 \
+    --hash=sha256:3d25f19500588cbc47dc19081d78131c32637c25804df8414463ec908631e453 \
+    --hash=sha256:403c0911cd5d5791605808b942c88a8155c2592e05332d2bf78f18697a5fa15e \
+    --hash=sha256:411bf8515f3be9813d06004cac41ccf7d1cd46dfe233705933dd163b60e37600 \
+    --hash=sha256:425bf820055005bfc8aa9a0b99ccb52cc2f4070153e34b701acc98d201693733 \
+    --hash=sha256:435a0984199d81ca178b9ae2c26ec3d49692d20ee29bc4c11a2a8d4514c67eda \
+    --hash=sha256:4a6a4f196f08c58c59e0b8ef8ec441d12aee4125a7d4f4fef000ccb22f8d7241 \
+    --hash=sha256:4cc0ef8b962ac7a5e62b9e826bd0cd5040e7d401bc45a6835910ed699037a461 \
+    --hash=sha256:51d035609b86722963404f711db441cf7134f1889107fb171a970c9701f92e1e \
+    --hash=sha256:53689bb4e102200a4fafa9de9c7c3c212ab40a7ab2c8e474491914d2305f187e \
+    --hash=sha256:55205d03e8a598cfc688c71ca8ea5f66447164efff8869517f175ea632c7cb7b \
+    --hash=sha256:5c0631926c4f58e9a5ccce555ad7747d9a9f8b10619621f22f9635f069f6233e \
+    --hash=sha256:5cb241881eefd96b46f89b1a056187ea8e9ba14ab88ba632e68d7a2ecb7aadf7 \
+    --hash=sha256:60d698e8179a42ec85172d12f50b1668254628425a6bd611aba022257cac1386 \
+    --hash=sha256:612d1156111ae11d14afaf3a0669ebf6c170dbb735e510a7438ffe2369a847fd \
+    --hash=sha256:6214c5a5571802c33f80e6c84713b2c79e024995b9c5897f794b43e714daeec9 \
+    --hash=sha256:6939c95381e003f54cd4c5516740faba40cf5ad3eeff460c3ad1d3e0ea2549bf \
+    --hash=sha256:69db76c09796b313331bb7048229e3bee7928eb62bab5e071e9f7fcc4879caee \
+    --hash=sha256:6bf7a982604375a8d49b6cc1b781c1747f243d91b81035a9b43a2126c04766f5 \
+    --hash=sha256:766c8f7511df26d9f11cd3a8be623e59cca73d44643abab3f8c8c07620524e4a \
+    --hash=sha256:76c0de87358b192de7ea9649beb392f107dcad9ad27276324c24c91774ca5271 \
+    --hash=sha256:76f067f5121dcecf0d63a67f29080b26c43c71a98b10c701b0677e4a065fbd54 \
+    --hash=sha256:7901c05ead4b3fb75113fb1dd33eb1253c6d3ee37ce93305acd9d38e0b5f21a4 \
+    --hash=sha256:79660376075cfd4b2c80f295528aa6beb2058fd289f4c9252f986751a4cd0496 \
+    --hash=sha256:79a6d2ba910adb2cbafc95dad936f8b9386e77c84c35bc0add315b856d7c3abb \
+    --hash=sha256:7afcdd1fc07befad18ec4523a782cde4e93e0a2bf71239894b8d61ee578c1319 \
+    --hash=sha256:7be7047bd08accdb7487737631d25735c9a04327911de89ff1b26b81745bd4e3 \
+    --hash=sha256:7c6390cf87ff6234643428991b7359b5f59cc15155695deb4eda5c777d2b880f \
+    --hash=sha256:7df704ca8cf4a073334e0427ae2345323613e4df18cc224f647f251e5e75a527 \
+    --hash=sha256:85f67aed7bb647f93e7520633d8f51d3cbc6ab96957c71272b286b2f30dc70ed \
+    --hash=sha256:896ebdcf62683551312c30e20614305f53125750803b614e9e6ce74a96232604 \
+    --hash=sha256:92d16a3e275e38293623ebf639c471d3e03bb20b8ebb845237e0d3664914caef \
+    --hash=sha256:99f60d34c048c5c2fabc766108c103612344c46e35d4ed9ae0673d33c8fb26e8 \
+    --hash=sha256:9fe7b0653ba3d9d65cbe7698cca585bf0f8c83dbbcc710db9c90f478e175f2d5 \
+    --hash=sha256:a3145cb08d8625b2d3fee1b2d596a8766352979c9bffe5d7833e0503d0f0b5e5 \
+    --hash=sha256:aeaf541ddbad8311a87dd695ed9642401131ea39ad7bc8cf3ef3967fd093b626 \
+    --hash=sha256:b55358304d7a73d7bdf5de62494aaf70bd33015831ffd98bc498b433dfe5b10c \
+    --hash=sha256:b82cc8ace10ab5bd93235dfaab2021c70637005e1ac787031f4d1da63d493c1d \
+    --hash=sha256:c0868d64af83169e4d4152ec612637a543f7a336e4a307b119e98042e852ad9c \
+    --hash=sha256:c1c1496e73051918fcd4f58ff2e0f2f3066d1c76a0c6aeffd9b45d53243702cc \
+    --hash=sha256:c9bf56195c6bbd293340ea82eafd0071cb3d450c703d2c93afb89f93b8386ccc \
+    --hash=sha256:cbebcd5bcaf1eaf302617c114aa67569dd3f090dd0ce8ba9e35e9985b41ac35b \
+    --hash=sha256:cd6c8fca38178e12c00418de737aef1261576bd1b6e8c6134d3e729a4e858b38 \
+    --hash=sha256:ceb3b7e6a0135e092de86110c5a74e46bda4bd4fbfeeb3a3bcec79c0f861e450 \
+    --hash=sha256:cf590b134eb70629e350691ecca88eac3e3b8b3c86992042fb82e3cb1830d5e1 \
+    --hash=sha256:d3eb1ceec286eba8220c26f3b0096cf189aea7057b6e7b7a2e60ed36b373b77f \
+    --hash=sha256:d65f25da8e248202bd47445cec78e0025c0fe7582b23ec69c3b27a640dd7a8e3 \
+    --hash=sha256:d6f6d4f185481c9669b9447bf9d9cf3b95a0e9df9d169bbc17e363b7d5487755 \
+    --hash=sha256:d84a5c3a5f7ce6db1f999fb9438f686bc2e09d38143f2d93d8406ed2dd6b9226 \
+    --hash=sha256:d946b0a9eb8aaa590df1fe082cee553ceab173e6cb5b03239716338629c50c7a \
+    --hash=sha256:dce1c6912ab9ff5f179eaf6efe7365c1f425ed690b03341911bf4939ef2f3046 \
+    --hash=sha256:de170c7b4fe6859beb8926e84f7d7d6c693dfe8e27372ce3b76f01c46e489fcf \
+    --hash=sha256:e02021f87a5b6932fa6ce916ca004c4d441509d33bbdbeca70d05dff5e9d2479 \
+    --hash=sha256:e030047e85cbcedbfc073f71836d62dd5dadfbe7531cae27789ff66bc551bd5e \
+    --hash=sha256:e0e79d91e71b9867c73323a3444724d496c037e578a0e1755ae159ba14f4f3d1 \
+    --hash=sha256:e4428b29611e989719874670fd152b6625500ad6c686d464e99f5aaeeaca175a \
+    --hash=sha256:e4972624066095e52b569e02b5ca97dbd7a7ddd4294bf4e7247d52635630dd83 \
+    --hash=sha256:e7be68734bd8c9a513f2b0cfd508802d6609da068f40dc57d4e3494cefc92929 \
+    --hash=sha256:e8e94e6912639a02ce173341ff62cc1201232ab86b8a8fcc05572741a5dc7d93 \
+    --hash=sha256:ea1456df2a27c73ce51120fa2f519f1bea2f4a03a917f4a43c8707cf4cbbae1a \
+    --hash=sha256:ebd8d160f91a764652d3e51ce0d2956b38efe37c9231cd82cfc0bed2e40b581c \
+    --hash=sha256:eca2e9d0cc5a889850e9bbd68e98314ada174ff6ccd1129500103df7a94a7a44 \
+    --hash=sha256:edd08e6f2f1a390bf137080507e44ccc086353c8e98c657e666c017718561b89 \
+    --hash=sha256:f285e862d2f153a70586579c15c44656f888806ed0e5b56b64489afe4a2dbfba \
+    --hash=sha256:f2a1dee728b52b33eebff5072817176c172050d44d67befd681609b4746e1c2e \
+    --hash=sha256:f7e301075edaf50500f0b341543c41194d8df3ae5caf4702f2095f3ca73dd8da \
+    --hash=sha256:fb616be3538599e797a2017cccca78e354c767165e8858ab5116813146041a24 \
+    --hash=sha256:fce28b3c8a81b6b36dfac9feb1de115bab619b3c13905b419ec71d03a3fc1423 \
+    --hash=sha256:fe5d7785250541f7f5019ab9cba2c71169dc7d74d0f45253f8313f436458a4ef
     # via
     #   -c release/ray_release/byod/requirements_compiled.txt
     #   aiohttp
@@ -1739,7 +1778,7 @@ numpy==1.26.4 \
     #   ale-py
     #   bokeh
     #   dask
-    #   gym
+    #   gymnasium
     #   h5py
     #   lightgbm
     #   ml-dtypes
@@ -1756,6 +1795,12 @@ numpy==1.26.4 \
     #   xarray
     #   xgboost
     #   zarr
+nvidia-nccl-cu12==2.20.5 \
+    --hash=sha256:057f6bf9685f75215d0c53bf3ac4a10b3e6578351de307abad9e18a99182af56 \
+    --hash=sha256:1fc150d5c3250b170b29410ba682384b14581db722b2531b0d8d33c595f33d01
+    # via
+    #   -c release/ray_release/byod/requirements_compiled.txt
+    #   xgboost
 oauth2client==4.1.3 \
     --hash=sha256:b8a81cc5d60e2d364f0b1b98f958dbd472887acaf1a5b05e21c28c31a2d6d3ac \
     --hash=sha256:d486741e451287f69568a4d26d70d9acd73a2bbfa275746c535b4209891cccc6
@@ -1792,7 +1837,7 @@ packaging==23.0 \
     #   tensorboardx
     #   tensorflow
     #   xarray
-pandas==1.5.3 \
+pandas==1.5.3 ; python_version < "3.12" \
     --hash=sha256:14e45300521902689a81f3f41386dc86f19b8ba8dd5ac5a3c7010ef8d2932813 \
     --hash=sha256:26d9c71772c7afb9d5046e6e9cf42d83dd147b5cf5bcb9d97252077118543792 \
     --hash=sha256:3749077d86e3a2f0ed51367f30bf5b82e131cc0f14260c4d3e499186fccc4406 \
@@ -2398,9 +2443,9 @@ requests==2.31.0 \
     #   locust
     #   requests-oauthlib
     #   tensorboard
-requests-oauthlib==1.3.1 \
-    --hash=sha256:2577c501a2fb8d05a304c09d090d6e47c306fef15809d102b327cf8364bddab5 \
-    --hash=sha256:75beac4a47881eeb94d5ea5d6ad31ef88856affe2332b9aafb52c6452ccf0d7a
+requests-oauthlib==2.0.0 \
+    --hash=sha256:7dd8a5c40426b779b0868c404bdef9768deccf22749cde15852df527e6269b36 \
+    --hash=sha256:b3dffaebd884d8cd778494369603a9e7b58d29111bf6b41bdc2dcd87203af4e9
     # via
     #   -c release/ray_release/byod/requirements_compiled.txt
     #   google-auth-oauthlib
@@ -2410,12 +2455,13 @@ retry-decorator==1.1.1 \
     #   -c release/ray_release/byod/requirements_compiled.txt
     #   gcs-oauth2-boto-plugin
     #   gsutil
-rich==12.6.0 \
-    --hash=sha256:a4eb26484f2c82589bd9a17c73d32a010b1e29d89f1604cd9bf3a2097b81bb5e \
-    --hash=sha256:ba3a3775974105c221d31141f2c116f4fd65c5ceb0698657a11e9f295ec93fd0
+rich==13.3.2 \
+    --hash=sha256:91954fe80cfb7985727a467ca98a7618e5dd15178cc2da10f553b36a93859001 \
+    --hash=sha256:a104f37270bf677148d8acb07d33be1569eeee87e2d1beb286a4e9113caf6f2f
     # via
     #   -c release/ray_release/byod/requirements_compiled.txt
     #   memray
+    #   typer
 roundrobin==0.0.4 \
     --hash=sha256:7e9d19a5bd6123d99993fb935fa86d25c88bb2096e493885f61737ed0f5e9abd
     # via locust
@@ -2506,6 +2552,12 @@ semidbm==0.5.1 \
     --hash=sha256:0dd74b5e9276eb5af186ace8b74165acec0c887e746bdae60340be91b99cffaf \
     --hash=sha256:add3e644dd6afcce83d1752b34ff80fa4e2b37b4ce6bce3289ad19d6f0bcd6ae
     # via -r release/ray_release/byod/requirements_byod_3.9.in
+shellingham==1.5.4 \
+    --hash=sha256:7ecfff8f2fd72616f7481040475a65b2bf8af90a56c89140852d1120324e8686 \
+    --hash=sha256:8dbca0739d487e5bd35ab3ca4b36e11c4078f3a234bfce294b0a0291363404de
+    # via
+    #   -c release/ray_release/byod/requirements_compiled.txt
+    #   typer
 six==1.16.0 \
     --hash=sha256:1e61c37477a1626458e36f7b1d82aa5c9b094fa4802892072e49de9c60c4c926 \
     --hash=sha256:8abb2f1d86890a2dfb989f9a77cfcfd3e47c2a354b01111771326f8aa26e0254
@@ -2524,9 +2576,9 @@ six==1.16.0 \
     #   tensorboard
     #   tensorflow
     #   trueskill
-sniffio==1.3.0 \
-    --hash=sha256:e60305c5e5d314f5389259b7f22aaa33d8f7dee49763119234af3755c55b9101 \
-    --hash=sha256:eecefdce1e5bbfb7ad2eeaabf7c1eeb404d7757c379bd1f7e5cce9d8bf425384
+sniffio==1.3.1 \
+    --hash=sha256:2f6da418d1f1e0fddd844478f41680e794e6051915791a034ff65e5f100525a2 \
+    --hash=sha256:f4324edc670a0f49750a81b895f35c3adb843cca46f0530f79fc1babb23789dc
     # via
     #   -c release/ray_release/byod/requirements_compiled.txt
     #   anyio
@@ -2614,15 +2666,15 @@ tensorflow-io-gcs-filesystem==0.31.0 ; python_version < "3.12" \
     # via
     #   -c release/ray_release/byod/requirements_compiled.txt
     #   tensorflow
-termcolor==2.3.0 \
-    --hash=sha256:3afb05607b89aed0ffe25202399ee0867ad4d3cb4180d98aaf8eefa6a5f7d475 \
-    --hash=sha256:b5b08f68937f138fe92f6c089b99f1e2da0ae56c52b78bf7075fd95420fd9a5a
+termcolor==2.4.0 \
+    --hash=sha256:9297c0df9c99445c2412e832e882a7884038a25617c60cea2ad69488d4040d63 \
+    --hash=sha256:aab9e56047c8ac41ed798fa36d892a37aca6b3e9159f3e0c24bc64a9b3ac7b7a
     # via
     #   -c release/ray_release/byod/requirements_compiled.txt
     #   tensorflow
-terminado==0.18.0 \
-    --hash=sha256:1ea08a89b835dd1b8c0c900d92848147cef2537243361b2e3f4dc15df9b6fded \
-    --hash=sha256:87b0d96642d0fe5f5abd7783857b9cab167f221a39ff98e3b9619a788a3c0f2e
+terminado==0.18.1 \
+    --hash=sha256:a4468e1b37bb318f8a86514f65814e1afc977cf29b3992a4500d9dd305dcceb0 \
+    --hash=sha256:de09f2c4b85de4765f7714688fff57d3e75bad1f909b589fde880460c753fd2e
     # via
     #   -c release/ray_release/byod/requirements_compiled.txt
     #   -r release/ray_release/byod/requirements_byod_3.9.in
@@ -2638,9 +2690,9 @@ tomli==2.0.1 \
     # via
     #   -c release/ray_release/byod/requirements_compiled.txt
     #   pytest
-toolz==0.12.0 \
-    --hash=sha256:2059bd4148deb1884bb0eb770a3cde70e7f954cfbbdc2285f1f2de01fd21eb6f \
-    --hash=sha256:88c570861c440ee3f2f6037c4654613228ff40c93a6c25e0eba70d17282c6194
+toolz==0.12.1 \
+    --hash=sha256:d22731364c07d72eea0a0ad45bafb2c2937ab6fd38a3507bf55eae8744aa7d85 \
+    --hash=sha256:ecca342664893f177a13dac0e6b41cbd8ac25a358e5f215316d43e2100224f4d
     # via
     #   -c release/ray_release/byod/requirements_compiled.txt
     #   dask
@@ -2702,9 +2754,9 @@ tqdm==4.64.1 \
 trueskill==0.4.5 \
     --hash=sha256:9d62b48d2428369d712bd9becff9f9a2caa325e1a2ab5f9392d34bff757867bb
     # via -r release/ray_release/byod/requirements_byod_3.9.in
-typer==0.9.0 \
-    --hash=sha256:50922fd79aea2f4751a8e0408ff10d2662bd0c8bbfa84755a699f3bada2978b2 \
-    --hash=sha256:5d96d986a21493606a358cae4461bd8cdf83cbf33a5aa950ae629ca3b51467ee
+typer==0.12.3 \
+    --hash=sha256:070d7ca53f785acbccba8e7d28b08dcd88f79f1fbda035ade0aecec71ca5c914 \
+    --hash=sha256:49e73131481d804288ef62598d97a1ceef3058905aa536a1134f90891ba35482
     # via
     #   -c release/ray_release/byod/requirements_compiled.txt
     #   -r release/ray_release/byod/requirements_byod_3.9.in
@@ -2718,14 +2770,15 @@ typing-extensions==4.8.0 \
     #   ale-py
     #   bokeh
     #   fastapi
+    #   gymnasium
     #   pydantic
     #   pydantic-core
     #   starlette
     #   tensorflow
     #   typer
-urllib3==1.26.18 \
-    --hash=sha256:34b97092d7e0a3a8cf7cd10e386f401b3737364026c45e622aa02903dffe0f07 \
-    --hash=sha256:f8ecc1bba5667413457c529ab955bf8c67b45db799d159066261719e328580a0
+urllib3==1.26.19 \
+    --hash=sha256:37a0344459b199fce0e80b0d3569837ec6b6937435c5244e7fd73fa6006830f3 \
+    --hash=sha256:3e3d753a8618b86d7de333b4223005f68720bcd6a7d2bcb9fbd2229ec7c1e429
     # via
     #   -c release/ray_release/byod/requirements_compiled.txt
     #   botocore
@@ -2739,9 +2792,9 @@ werkzeug==2.3.8 \
     #   flask
     #   locust
     #   tensorboard
-wheel==0.42.0 \
-    --hash=sha256:177f9c9b0d45c47873b619f5b650346d632cdc35fb5e4d25058e09c9e581433d \
-    --hash=sha256:c45be39f7882c9d34243236f2d63cbd58039e360f85d0913425fbd7ceea617a8
+wheel==0.43.0 \
+    --hash=sha256:465ef92c69fa5c5da2d1cf8ac40559a8c940886afcef87dcf14b9470862f1d85 \
+    --hash=sha256:55c570405f142630c6b9f72fe09d9b67cf1477fcf543ae5b8dcb1f5b7377da81
     # via
     #   -c release/ray_release/byod/requirements_compiled.txt
     #   astunparse
@@ -2829,107 +2882,109 @@ xarray==2023.12.0 \
     --hash=sha256:3c22b6824681762b6c3fcad86dfd18960a617bccbc7f456ce21b43a20e455fb9 \
     --hash=sha256:4565dbc890de47e278346c44d6b33bb07d3427383e077a7ca8ab6606196fd433
     # via -r release/ray_release/byod/requirements_byod_3.9.in
-xgboost==1.7.6 \
-    --hash=sha256:127cf1f5e2ec25cd41429394c6719b87af1456ce583e89f0bffd35d02ad18bcb \
-    --hash=sha256:1c527554a400445e0c38186039ba1a00425dcdb4e40b37eed0e74cb39a159c47 \
-    --hash=sha256:281c3c6f4fbed2d36bf95cd02a641afa95e72e9abde70064056da5e76233e8df \
-    --hash=sha256:4c34675b4d2678c624ddde5d45361e7e16046923e362e4e609b88353e6b87124 \
-    --hash=sha256:59b4b366d2cafc7f645e87d897983a5b59be02876194b1d213bd8d8b811d8ce8 \
-    --hash=sha256:b1d5db49b199152d62bd9217c98760207d3de86d2b9d243260c573ffe638f80a
+xgboost==2.1.0 \
+    --hash=sha256:19d145eb847b070c32342b1bf2d7331c102783e07a484f8b13b7d759d707c6b0 \
+    --hash=sha256:43b16205689249d7509daf7a6ab00ad0e6c570b3a9c263cb32b26e39d9477bb3 \
+    --hash=sha256:7144980923e76ce741c7b03a14d3bd7514db6de5c7cabe96ba95b229d274f5ca \
+    --hash=sha256:73673c9bb85927db7fe2e3aed6df6d35dba708cfd6767cc63d4ea11dda2dede5 \
+    --hash=sha256:74904b91c42524a6c32147fe5718569e78fb65911ff4499b053f81d0964514d4 \
+    --hash=sha256:840a0c6e2119d8c8f260a5dace996ea064a267f62b301a25d7d452488a7ac860 \
+    --hash=sha256:b2a456eb0f3d3e8fd8ab37e44ac288292bf8ea8744c294be9fd88713d27af810 \
+    --hash=sha256:cedc2e386e686795735448fd4597533acacc5ba6fb47dd910c204c468b80bb96
     # via
     #   -c release/ray_release/byod/requirements_compiled.txt
     #   -r release/ray_release/byod/requirements_byod_3.9.in
-yarl==1.9.3 \
-    --hash=sha256:09c19e5f4404574fcfb736efecf75844ffe8610606f3fccc35a1515b8b6712c4 \
-    --hash=sha256:0ab5baaea8450f4a3e241ef17e3d129b2143e38a685036b075976b9c415ea3eb \
-    --hash=sha256:0d155a092bf0ebf4a9f6f3b7a650dc5d9a5bbb585ef83a52ed36ba46f55cc39d \
-    --hash=sha256:126638ab961633f0940a06e1c9d59919003ef212a15869708dcb7305f91a6732 \
-    --hash=sha256:1a0a4f3aaa18580038cfa52a7183c8ffbbe7d727fe581300817efc1e96d1b0e9 \
-    --hash=sha256:1d93461e2cf76c4796355494f15ffcb50a3c198cc2d601ad8d6a96219a10c363 \
-    --hash=sha256:26a1a8443091c7fbc17b84a0d9f38de34b8423b459fb853e6c8cdfab0eacf613 \
-    --hash=sha256:271d63396460b6607b588555ea27a1a02b717ca2e3f2cf53bdde4013d7790929 \
-    --hash=sha256:28a108cb92ce6cf867690a962372996ca332d8cda0210c5ad487fe996e76b8bb \
-    --hash=sha256:29beac86f33d6c7ab1d79bd0213aa7aed2d2f555386856bb3056d5fdd9dab279 \
-    --hash=sha256:2c757f64afe53a422e45e3e399e1e3cf82b7a2f244796ce80d8ca53e16a49b9f \
-    --hash=sha256:2dad8166d41ebd1f76ce107cf6a31e39801aee3844a54a90af23278b072f1ccf \
-    --hash=sha256:2dc72e891672343b99db6d497024bf8b985537ad6c393359dc5227ef653b2f17 \
-    --hash=sha256:2f3c8822bc8fb4a347a192dd6a28a25d7f0ea3262e826d7d4ef9cc99cd06d07e \
-    --hash=sha256:32435d134414e01d937cd9d6cc56e8413a8d4741dea36af5840c7750f04d16ab \
-    --hash=sha256:3cfa4dbe17b2e6fca1414e9c3bcc216f6930cb18ea7646e7d0d52792ac196808 \
-    --hash=sha256:3d5434b34100b504aabae75f0622ebb85defffe7b64ad8f52b8b30ec6ef6e4b9 \
-    --hash=sha256:4003f380dac50328c85e85416aca6985536812c082387255c35292cb4b41707e \
-    --hash=sha256:44e91a669c43f03964f672c5a234ae0d7a4d49c9b85d1baa93dec28afa28ffbd \
-    --hash=sha256:4a14907b597ec55740f63e52d7fee0e9ee09d5b9d57a4f399a7423268e457b57 \
-    --hash=sha256:4ce77d289f8d40905c054b63f29851ecbfd026ef4ba5c371a158cfe6f623663e \
-    --hash=sha256:4d6d74a97e898c1c2df80339aa423234ad9ea2052f66366cef1e80448798c13d \
-    --hash=sha256:51382c72dd5377861b573bd55dcf680df54cea84147c8648b15ac507fbef984d \
-    --hash=sha256:525cd69eff44833b01f8ef39aa33a9cc53a99ff7f9d76a6ef6a9fb758f54d0ff \
-    --hash=sha256:53ec65f7eee8655bebb1f6f1607760d123c3c115a324b443df4f916383482a67 \
-    --hash=sha256:5f74b015c99a5eac5ae589de27a1201418a5d9d460e89ccb3366015c6153e60a \
-    --hash=sha256:6280353940f7e5e2efaaabd686193e61351e966cc02f401761c4d87f48c89ea4 \
-    --hash=sha256:632c7aeb99df718765adf58eacb9acb9cbc555e075da849c1378ef4d18bf536a \
-    --hash=sha256:6465d36381af057d0fab4e0f24ef0e80ba61f03fe43e6eeccbe0056e74aadc70 \
-    --hash=sha256:66a6dbf6ca7d2db03cc61cafe1ee6be838ce0fbc97781881a22a58a7c5efef42 \
-    --hash=sha256:6d350388ba1129bc867c6af1cd17da2b197dff0d2801036d2d7d83c2d771a682 \
-    --hash=sha256:7217234b10c64b52cc39a8d82550342ae2e45be34f5bff02b890b8c452eb48d7 \
-    --hash=sha256:721ee3fc292f0d069a04016ef2c3a25595d48c5b8ddc6029be46f6158d129c92 \
-    --hash=sha256:72a57b41a0920b9a220125081c1e191b88a4cdec13bf9d0649e382a822705c65 \
-    --hash=sha256:73cc83f918b69110813a7d95024266072d987b903a623ecae673d1e71579d566 \
-    --hash=sha256:778df71c8d0c8c9f1b378624b26431ca80041660d7be7c3f724b2c7a6e65d0d6 \
-    --hash=sha256:79e1df60f7c2b148722fb6cafebffe1acd95fd8b5fd77795f56247edaf326752 \
-    --hash=sha256:7c86d0d0919952d05df880a1889a4f0aeb6868e98961c090e335671dea5c0361 \
-    --hash=sha256:7eaf13af79950142ab2bbb8362f8d8d935be9aaf8df1df89c86c3231e4ff238a \
-    --hash=sha256:828235a2a169160ee73a2fcfb8a000709edf09d7511fccf203465c3d5acc59e4 \
-    --hash=sha256:8535e111a064f3bdd94c0ed443105934d6f005adad68dd13ce50a488a0ad1bf3 \
-    --hash=sha256:88d2c3cc4b2f46d1ba73d81c51ec0e486f59cc51165ea4f789677f91a303a9a7 \
-    --hash=sha256:8a2538806be846ea25e90c28786136932ec385c7ff3bc1148e45125984783dc6 \
-    --hash=sha256:8dab30b21bd6fb17c3f4684868c7e6a9e8468078db00f599fb1c14e324b10fca \
-    --hash=sha256:8f18a7832ff85dfcd77871fe677b169b1bc60c021978c90c3bb14f727596e0ae \
-    --hash=sha256:946db4511b2d815979d733ac6a961f47e20a29c297be0d55b6d4b77ee4b298f6 \
-    --hash=sha256:96758e56dceb8a70f8a5cff1e452daaeff07d1cc9f11e9b0c951330f0a2396a7 \
-    --hash=sha256:9a172c3d5447b7da1680a1a2d6ecdf6f87a319d21d52729f45ec938a7006d5d8 \
-    --hash=sha256:9a5211de242754b5e612557bca701f39f8b1a9408dff73c6db623f22d20f470e \
-    --hash=sha256:9df9a0d4c5624790a0dea2e02e3b1b3c69aed14bcb8650e19606d9df3719e87d \
-    --hash=sha256:aa4643635f26052401750bd54db911b6342eb1a9ac3e74f0f8b58a25d61dfe41 \
-    --hash=sha256:aed37db837ecb5962469fad448aaae0f0ee94ffce2062cf2eb9aed13328b5196 \
-    --hash=sha256:af52725c7c39b0ee655befbbab5b9a1b209e01bb39128dce0db226a10014aacc \
-    --hash=sha256:b0b8c06afcf2bac5a50b37f64efbde978b7f9dc88842ce9729c020dc71fae4ce \
-    --hash=sha256:b61e64b06c3640feab73fa4ff9cb64bd8182de52e5dc13038e01cfe674ebc321 \
-    --hash=sha256:b7831566595fe88ba17ea80e4b61c0eb599f84c85acaa14bf04dd90319a45b90 \
-    --hash=sha256:b8bc5b87a65a4e64bc83385c05145ea901b613d0d3a434d434b55511b6ab0067 \
-    --hash=sha256:b8d51817cf4b8d545963ec65ff06c1b92e5765aa98831678d0e2240b6e9fd281 \
-    --hash=sha256:b9f9cafaf031c34d95c1528c16b2fa07b710e6056b3c4e2e34e9317072da5d1a \
-    --hash=sha256:bb72d2a94481e7dc7a0c522673db288f31849800d6ce2435317376a345728225 \
-    --hash=sha256:c25ec06e4241e162f5d1f57c370f4078797ade95c9208bd0c60f484834f09c96 \
-    --hash=sha256:c405d482c320a88ab53dcbd98d6d6f32ada074f2d965d6e9bf2d823158fa97de \
-    --hash=sha256:c4472fe53ebf541113e533971bd8c32728debc4c6d8cc177f2bff31d011ec17e \
-    --hash=sha256:c4b1efb11a8acd13246ffb0bee888dd0e8eb057f8bf30112e3e21e421eb82d4a \
-    --hash=sha256:c5f3faeb8100a43adf3e7925d556801d14b5816a0ac9e75e22948e787feec642 \
-    --hash=sha256:c6f034386e5550b5dc8ded90b5e2ff7db21f0f5c7de37b6efc5dac046eb19c10 \
-    --hash=sha256:c99ddaddb2fbe04953b84d1651149a0d85214780e4d0ee824e610ab549d98d92 \
-    --hash=sha256:ca6b66f69e30f6e180d52f14d91ac854b8119553b524e0e28d5291a724f0f423 \
-    --hash=sha256:cccdc02e46d2bd7cb5f38f8cc3d9db0d24951abd082b2f242c9e9f59c0ab2af3 \
-    --hash=sha256:cd49a908cb6d387fc26acee8b7d9fcc9bbf8e1aca890c0b2fdfd706057546080 \
-    --hash=sha256:cf7a4e8de7f1092829caef66fd90eaf3710bc5efd322a816d5677b7664893c93 \
-    --hash=sha256:cfd77e8e5cafba3fb584e0f4b935a59216f352b73d4987be3af51f43a862c403 \
-    --hash=sha256:d34c4f80956227f2686ddea5b3585e109c2733e2d4ef12eb1b8b4e84f09a2ab6 \
-    --hash=sha256:d61a0ca95503867d4d627517bcfdc28a8468c3f1b0b06c626f30dd759d3999fd \
-    --hash=sha256:d81657b23e0edb84b37167e98aefb04ae16cbc5352770057893bd222cdc6e45f \
-    --hash=sha256:d92d897cb4b4bf915fbeb5e604c7911021a8456f0964f3b8ebbe7f9188b9eabb \
-    --hash=sha256:dd318e6b75ca80bff0b22b302f83a8ee41c62b8ac662ddb49f67ec97e799885d \
-    --hash=sha256:dd952b9c64f3b21aedd09b8fe958e4931864dba69926d8a90c90d36ac4e28c9a \
-    --hash=sha256:e0e7e83f31e23c5d00ff618045ddc5e916f9e613d33c5a5823bc0b0a0feb522f \
-    --hash=sha256:e0f17d1df951336a02afc8270c03c0c6e60d1f9996fcbd43a4ce6be81de0bd9d \
-    --hash=sha256:e2a16ef5fa2382af83bef4a18c1b3bcb4284c4732906aa69422cf09df9c59f1f \
-    --hash=sha256:e36021db54b8a0475805acc1d6c4bca5d9f52c3825ad29ae2d398a9d530ddb88 \
-    --hash=sha256:e73db54c967eb75037c178a54445c5a4e7461b5203b27c45ef656a81787c0c1b \
-    --hash=sha256:e741bd48e6a417bdfbae02e088f60018286d6c141639359fb8df017a3b69415a \
-    --hash=sha256:f7271d6bd8838c49ba8ae647fc06469137e1c161a7ef97d778b72904d9b68696 \
-    --hash=sha256:fc391e3941045fd0987c77484b2799adffd08e4b6735c4ee5f054366a2e1551d \
-    --hash=sha256:fc94441bcf9cb8c59f51f23193316afefbf3ff858460cb47b5758bf66a14d130 \
-    --hash=sha256:fe34befb8c765b8ce562f0200afda3578f8abb159c76de3ab354c80b72244c41 \
-    --hash=sha256:fe8080b4f25dfc44a86bedd14bc4f9d469dfc6456e6f3c5d9077e81a5fedfba7 \
-    --hash=sha256:ff34cb09a332832d1cf38acd0f604c068665192c6107a439a92abfd8acf90fe2
+yarl==1.9.4 \
+    --hash=sha256:008d3e808d03ef28542372d01057fd09168419cdc8f848efe2804f894ae03e51 \
+    --hash=sha256:03caa9507d3d3c83bca08650678e25364e1843b484f19986a527630ca376ecce \
+    --hash=sha256:07574b007ee20e5c375a8fe4a0789fad26db905f9813be0f9fef5a68080de559 \
+    --hash=sha256:09efe4615ada057ba2d30df871d2f668af661e971dfeedf0c159927d48bbeff0 \
+    --hash=sha256:0d2454f0aef65ea81037759be5ca9947539667eecebca092733b2eb43c965a81 \
+    --hash=sha256:0e9d124c191d5b881060a9e5060627694c3bdd1fe24c5eecc8d5d7d0eb6faabc \
+    --hash=sha256:18580f672e44ce1238b82f7fb87d727c4a131f3a9d33a5e0e82b793362bf18b4 \
+    --hash=sha256:1f23e4fe1e8794f74b6027d7cf19dc25f8b63af1483d91d595d4a07eca1fb26c \
+    --hash=sha256:206a55215e6d05dbc6c98ce598a59e6fbd0c493e2de4ea6cc2f4934d5a18d130 \
+    --hash=sha256:23d32a2594cb5d565d358a92e151315d1b2268bc10f4610d098f96b147370136 \
+    --hash=sha256:26a1dc6285e03f3cc9e839a2da83bcbf31dcb0d004c72d0730e755b33466c30e \
+    --hash=sha256:29e0f83f37610f173eb7e7b5562dd71467993495e568e708d99e9d1944f561ec \
+    --hash=sha256:2b134fd795e2322b7684155b7855cc99409d10b2e408056db2b93b51a52accc7 \
+    --hash=sha256:2d47552b6e52c3319fede1b60b3de120fe83bde9b7bddad11a69fb0af7db32f1 \
+    --hash=sha256:357495293086c5b6d34ca9616a43d329317feab7917518bc97a08f9e55648455 \
+    --hash=sha256:35a2b9396879ce32754bd457d31a51ff0a9d426fd9e0e3c33394bf4b9036b099 \
+    --hash=sha256:3777ce5536d17989c91696db1d459574e9a9bd37660ea7ee4d3344579bb6f129 \
+    --hash=sha256:3986b6f41ad22988e53d5778f91855dc0399b043fc8946d4f2e68af22ee9ff10 \
+    --hash=sha256:44d8ffbb9c06e5a7f529f38f53eda23e50d1ed33c6c869e01481d3fafa6b8142 \
+    --hash=sha256:49a180c2e0743d5d6e0b4d1a9e5f633c62eca3f8a86ba5dd3c471060e352ca98 \
+    --hash=sha256:4aa9741085f635934f3a2583e16fcf62ba835719a8b2b28fb2917bb0537c1dfa \
+    --hash=sha256:4b21516d181cd77ebd06ce160ef8cc2a5e9ad35fb1c5930882baff5ac865eee7 \
+    --hash=sha256:4b3c1ffe10069f655ea2d731808e76e0f452fc6c749bea04781daf18e6039525 \
+    --hash=sha256:4c7d56b293cc071e82532f70adcbd8b61909eec973ae9d2d1f9b233f3d943f2c \
+    --hash=sha256:4e9035df8d0880b2f1c7f5031f33f69e071dfe72ee9310cfc76f7b605958ceb9 \
+    --hash=sha256:54525ae423d7b7a8ee81ba189f131054defdb122cde31ff17477951464c1691c \
+    --hash=sha256:549d19c84c55d11687ddbd47eeb348a89df9cb30e1993f1b128f4685cd0ebbf8 \
+    --hash=sha256:54beabb809ffcacbd9d28ac57b0db46e42a6e341a030293fb3185c409e626b8b \
+    --hash=sha256:566db86717cf8080b99b58b083b773a908ae40f06681e87e589a976faf8246bf \
+    --hash=sha256:5a2e2433eb9344a163aced6a5f6c9222c0786e5a9e9cac2c89f0b28433f56e23 \
+    --hash=sha256:5aef935237d60a51a62b86249839b51345f47564208c6ee615ed2a40878dccdd \
+    --hash=sha256:604f31d97fa493083ea21bd9b92c419012531c4e17ea6da0f65cacdcf5d0bd27 \
+    --hash=sha256:63b20738b5aac74e239622d2fe30df4fca4942a86e31bf47a81a0e94c14df94f \
+    --hash=sha256:686a0c2f85f83463272ddffd4deb5e591c98aac1897d65e92319f729c320eece \
+    --hash=sha256:6a962e04b8f91f8c4e5917e518d17958e3bdee71fd1d8b88cdce74dd0ebbf434 \
+    --hash=sha256:6ad6d10ed9b67a382b45f29ea028f92d25bc0bc1daf6c5b801b90b5aa70fb9ec \
+    --hash=sha256:6f5cb257bc2ec58f437da2b37a8cd48f666db96d47b8a3115c29f316313654ff \
+    --hash=sha256:6fe79f998a4052d79e1c30eeb7d6c1c1056ad33300f682465e1b4e9b5a188b78 \
+    --hash=sha256:7855426dfbddac81896b6e533ebefc0af2f132d4a47340cee6d22cac7190022d \
+    --hash=sha256:7d5aaac37d19b2904bb9dfe12cdb08c8443e7ba7d2852894ad448d4b8f442863 \
+    --hash=sha256:801e9264d19643548651b9db361ce3287176671fb0117f96b5ac0ee1c3530d53 \
+    --hash=sha256:81eb57278deb6098a5b62e88ad8281b2ba09f2f1147c4767522353eaa6260b31 \
+    --hash=sha256:824d6c50492add5da9374875ce72db7a0733b29c2394890aef23d533106e2b15 \
+    --hash=sha256:8397a3817d7dcdd14bb266283cd1d6fc7264a48c186b986f32e86d86d35fbac5 \
+    --hash=sha256:848cd2a1df56ddbffeb375535fb62c9d1645dde33ca4d51341378b3f5954429b \
+    --hash=sha256:84fc30f71689d7fc9168b92788abc977dc8cefa806909565fc2951d02f6b7d57 \
+    --hash=sha256:8619d6915b3b0b34420cf9b2bb6d81ef59d984cb0fde7544e9ece32b4b3043c3 \
+    --hash=sha256:8a854227cf581330ffa2c4824d96e52ee621dd571078a252c25e3a3b3d94a1b1 \
+    --hash=sha256:8be9e837ea9113676e5754b43b940b50cce76d9ed7d2461df1af39a8ee674d9f \
+    --hash=sha256:928cecb0ef9d5a7946eb6ff58417ad2fe9375762382f1bf5c55e61645f2c43ad \
+    --hash=sha256:957b4774373cf6f709359e5c8c4a0af9f6d7875db657adb0feaf8d6cb3c3964c \
+    --hash=sha256:992f18e0ea248ee03b5a6e8b3b4738850ae7dbb172cc41c966462801cbf62cf7 \
+    --hash=sha256:9fc5fc1eeb029757349ad26bbc5880557389a03fa6ada41703db5e068881e5f2 \
+    --hash=sha256:a00862fb23195b6b8322f7d781b0dc1d82cb3bcac346d1e38689370cc1cc398b \
+    --hash=sha256:a3a6ed1d525bfb91b3fc9b690c5a21bb52de28c018530ad85093cc488bee2dd2 \
+    --hash=sha256:a6327976c7c2f4ee6816eff196e25385ccc02cb81427952414a64811037bbc8b \
+    --hash=sha256:a7409f968456111140c1c95301cadf071bd30a81cbd7ab829169fb9e3d72eae9 \
+    --hash=sha256:a825ec844298c791fd28ed14ed1bffc56a98d15b8c58a20e0e08c1f5f2bea1be \
+    --hash=sha256:a8c1df72eb746f4136fe9a2e72b0c9dc1da1cbd23b5372f94b5820ff8ae30e0e \
+    --hash=sha256:a9bd00dc3bc395a662900f33f74feb3e757429e545d831eef5bb280252631984 \
+    --hash=sha256:aa102d6d280a5455ad6a0f9e6d769989638718e938a6a0a2ff3f4a7ff8c62cc4 \
+    --hash=sha256:aaaea1e536f98754a6e5c56091baa1b6ce2f2700cc4a00b0d49eca8dea471074 \
+    --hash=sha256:ad4d7a90a92e528aadf4965d685c17dacff3df282db1121136c382dc0b6014d2 \
+    --hash=sha256:b8477c1ee4bd47c57d49621a062121c3023609f7a13b8a46953eb6c9716ca392 \
+    --hash=sha256:ba6f52cbc7809cd8d74604cce9c14868306ae4aa0282016b641c661f981a6e91 \
+    --hash=sha256:bac8d525a8dbc2a1507ec731d2867025d11ceadcb4dd421423a5d42c56818541 \
+    --hash=sha256:bef596fdaa8f26e3d66af846bbe77057237cb6e8efff8cd7cc8dff9a62278bbf \
+    --hash=sha256:c0ec0ed476f77db9fb29bca17f0a8fcc7bc97ad4c6c1d8959c507decb22e8572 \
+    --hash=sha256:c38c9ddb6103ceae4e4498f9c08fac9b590c5c71b0370f98714768e22ac6fa66 \
+    --hash=sha256:c7224cab95645c7ab53791022ae77a4509472613e839dab722a72abe5a684575 \
+    --hash=sha256:c74018551e31269d56fab81a728f683667e7c28c04e807ba08f8c9e3bba32f14 \
+    --hash=sha256:ca06675212f94e7a610e85ca36948bb8fc023e458dd6c63ef71abfd482481aa5 \
+    --hash=sha256:d1d2532b340b692880261c15aee4dc94dd22ca5d61b9db9a8a361953d36410b1 \
+    --hash=sha256:d25039a474c4c72a5ad4b52495056f843a7ff07b632c1b92ea9043a3d9950f6e \
+    --hash=sha256:d5ff2c858f5f6a42c2a8e751100f237c5e869cbde669a724f2062d4c4ef93551 \
+    --hash=sha256:d7d7f7de27b8944f1fee2c26a88b4dabc2409d2fea7a9ed3df79b67277644e17 \
+    --hash=sha256:d7eeb6d22331e2fd42fce928a81c697c9ee2d51400bd1a28803965883e13cead \
+    --hash=sha256:d8a1c6c0be645c745a081c192e747c5de06e944a0d21245f4cf7c05e457c36e0 \
+    --hash=sha256:d8b889777de69897406c9fb0b76cdf2fd0f31267861ae7501d93003d55f54fbe \
+    --hash=sha256:d9e09c9d74f4566e905a0b8fa668c58109f7624db96a2171f21747abc7524234 \
+    --hash=sha256:db8e58b9d79200c76956cefd14d5c90af54416ff5353c5bfd7cbe58818e26ef0 \
+    --hash=sha256:ddb2a5c08a4eaaba605340fdee8fc08e406c56617566d9643ad8bf6852778fc7 \
+    --hash=sha256:e0381b4ce23ff92f8170080c97678040fc5b08da85e9e292292aba67fdac6c34 \
+    --hash=sha256:e23a6d84d9d1738dbc6e38167776107e63307dfc8ad108e580548d1f2c587f42 \
+    --hash=sha256:e516dc8baf7b380e6c1c26792610230f37147bb754d6426462ab115a02944385 \
+    --hash=sha256:ea65804b5dc88dacd4a40279af0cdadcfe74b3e5b4c897aa0d81cf86927fee78 \
+    --hash=sha256:ec61d826d80fc293ed46c9dd26995921e3a82146feacd952ef0757236fc137be \
+    --hash=sha256:ee04010f26d5102399bd17f8df8bc38dc7ccd7701dc77f4a68c5b8d733406958 \
+    --hash=sha256:f3bc6af6e2b8f92eced34ef6a96ffb248e863af20ef4fde9448cc8c9b858b749 \
+    --hash=sha256:f7d6b36dd2e029b6bcb8a13cf19664c7b8e19ab3a58e0fefbb5b8461447ed5ec
     # via
     #   -c release/ray_release/byod/requirements_compiled.txt
     #   aiohttp
@@ -2943,13 +2998,12 @@ zict==3.0.0 \
     # via
     #   -c release/ray_release/byod/requirements_compiled.txt
     #   distributed
-zipp==3.17.0 \
-    --hash=sha256:0e923e726174922dce09c53c59ad483ff7bbb8e572e00c7f7c46b88556409f31 \
-    --hash=sha256:84e64a1c28cf7e91ed2078bb8cc8c259cb19b76942096c8d7b84947690cabaf0
+zipp==3.19.2 \
+    --hash=sha256:bf1dcf6450f873a13e952a29504887c89e6de7506209e5b1bcc3460135d4de19 \
+    --hash=sha256:f091755f667055f2d02b32c53771a7a6c8b47e1fdbc4b72a8b9072b3eef8015c
     # via
     #   -c release/ray_release/byod/requirements_compiled.txt
     #   importlib-metadata
-    #   importlib-resources
 zope-event==5.0 \
     --hash=sha256:2832e95014f4db26c47a13fdaef84cef2f4df37e66b59d8f1f4a8f319a632c26 \
     --hash=sha256:bac440d8d9891b4068e2b5a2c5e2c9765a9df762944bda6955f96bb9b91e67cd
@@ -2995,4 +3049,4 @@ zope-interface==6.2 \
 
 # WARNING: The following packages were not pinned, but pip requires them to be
 # pinned when the requirements file includes hashes. Consider using the --allow-unsafe flag.
-# setuptools
+# setuptools
\ No newline at end of file
diff --git a/release/release_tests.yaml b/release/release_tests.yaml
index 81681cff83d63..60d7c80485238 100644
--- a/release/release_tests.yaml
+++ b/release/release_tests.yaml
@@ -2716,7 +2716,7 @@
 
   run:
     timeout: 43200  # 12h
-    script: python learning_tests/tuned_examples/dreamerv3/atari_100k.py --framework=tf2 --env=ALE/Pong-v5 --num-gpus=1 --stop-reward=15.0 --as-release-test
+    script: python learning_tests/tuned_examples/dreamerv3/atari_100k.py --framework=tf2 --env=ale_py:ALE/Pong-v5 --num-gpus=1 --stop-reward=15.0 --as-release-test
 
   alert: default
 
@@ -2751,7 +2751,7 @@
 
   run:
     timeout: 1200
-    script: python learning_tests/tuned_examples/ppo/atari_ppo.py --enable-new-api-stack --env=ALE/Pong-v5 --num-gpus=4 --num-env-runners=95 --stop-reward=20.0 --as-release-test
+    script: python learning_tests/tuned_examples/ppo/atari_ppo.py --enable-new-api-stack --env=ale_py:ALE/Pong-v5 --num-gpus=4 --num-env-runners=95 --stop-reward=20.0 --as-release-test
 
   alert: default
 
diff --git a/release/rllib_contrib/learning_tests/yaml_files/a2c/a2c-breakout-v5.yaml b/release/rllib_contrib/learning_tests/yaml_files/a2c/a2c-breakout-v5.yaml
index c38c9f8fffb08..0ba5a759811f9 100644
--- a/release/rllib_contrib/learning_tests/yaml_files/a2c/a2c-breakout-v5.yaml
+++ b/release/rllib_contrib/learning_tests/yaml_files/a2c/a2c-breakout-v5.yaml
@@ -1,5 +1,5 @@
 a2c-breakoutnoframeskip-v5:
-    env: ALE/Breakout-v5
+    env: ale_py:ALE/Breakout-v5
     run: A2C
     # Minimum reward and total ts (in given time_total_s) to pass this test.
     pass_criteria:
diff --git a/release/rllib_contrib/learning_tests/yaml_files/a3c/a3c-pongdeterministic-v5.yaml b/release/rllib_contrib/learning_tests/yaml_files/a3c/a3c-pongdeterministic-v5.yaml
index 3ea52a7045250..fe6ffb7527292 100644
--- a/release/rllib_contrib/learning_tests/yaml_files/a3c/a3c-pongdeterministic-v5.yaml
+++ b/release/rllib_contrib/learning_tests/yaml_files/a3c/a3c-pongdeterministic-v5.yaml
@@ -1,5 +1,5 @@
 a3c-pongdeterministic-v5:
-    env: ALE/Pong-v5
+    env: ale_py:ALE/Pong-v5
     run: A3C
     # Minimum reward and total ts (in given time_total_s) to pass this test.
     pass_criteria:
diff --git a/release/rllib_contrib/learning_tests/yaml_files/apex/apex-breakoutnoframeskip-v5.yaml b/release/rllib_contrib/learning_tests/yaml_files/apex/apex-breakoutnoframeskip-v5.yaml
index 81c8fdd20e488..d825b7a3275e9 100644
--- a/release/rllib_contrib/learning_tests/yaml_files/apex/apex-breakoutnoframeskip-v5.yaml
+++ b/release/rllib_contrib/learning_tests/yaml_files/apex/apex-breakoutnoframeskip-v5.yaml
@@ -1,5 +1,5 @@
 apex-breakoutnoframeskip-v5:
-    env: ALE/Breakout-v5
+    env: ale_py:ALE/Breakout-v5
     run: APEX
     # Minimum reward and total ts (in given time_total_s) to pass this test.
     pass_criteria:
diff --git a/release/rllib_tests/learning_tests/yaml_files/appo/hybrid_stack/appo-pongnoframeskip-v5.yaml b/release/rllib_tests/learning_tests/yaml_files/appo/hybrid_stack/appo-pongnoframeskip-v5.yaml
index 741d5561ee362..9c6a82866f018 100644
--- a/release/rllib_tests/learning_tests/yaml_files/appo/hybrid_stack/appo-pongnoframeskip-v5.yaml
+++ b/release/rllib_tests/learning_tests/yaml_files/appo/hybrid_stack/appo-pongnoframeskip-v5.yaml
@@ -1,5 +1,5 @@
 appo-pongnoframeskip-v5:
-    env: ALE/Pong-v5
+    env: ale_py:ALE/Pong-v5
     run: APPO
     # Minimum reward and total ts (in given time_total_s) to pass this test.
     pass_criteria:
diff --git a/release/rllib_tests/learning_tests/yaml_files/appo/old_stack/appo-pongnoframeskip-v5.yaml b/release/rllib_tests/learning_tests/yaml_files/appo/old_stack/appo-pongnoframeskip-v5.yaml
index 9b5e5a84f9bc6..7930cf33df8c0 100644
--- a/release/rllib_tests/learning_tests/yaml_files/appo/old_stack/appo-pongnoframeskip-v5.yaml
+++ b/release/rllib_tests/learning_tests/yaml_files/appo/old_stack/appo-pongnoframeskip-v5.yaml
@@ -1,5 +1,5 @@
 appo-pongnoframeskip-v5:
-    env: ALE/Pong-v5
+    env: ale_py:ALE/Pong-v5
     run: APPO
     # Minimum reward and total ts (in given time_total_s) to pass this test.
     pass_criteria:
diff --git a/release/rllib_tests/learning_tests/yaml_files/dqn/dqn-breakoutnoframeskip-v5.yaml b/release/rllib_tests/learning_tests/yaml_files/dqn/dqn-breakoutnoframeskip-v5.yaml
index 2da9c8ac89ccb..61dea97452d05 100644
--- a/release/rllib_tests/learning_tests/yaml_files/dqn/dqn-breakoutnoframeskip-v5.yaml
+++ b/release/rllib_tests/learning_tests/yaml_files/dqn/dqn-breakoutnoframeskip-v5.yaml
@@ -1,5 +1,5 @@
 dqn-breakoutnoframeskip-v5:
-    env: ALE/Breakout-v5
+    env: ale_py:ALE/Breakout-v5
     run: DQN
     # Minimum reward and total ts (in given time_total_s) to pass this test.
     pass_criteria:
diff --git a/release/rllib_tests/learning_tests/yaml_files/impala/impala-breakoutnoframeskip-v5.yaml b/release/rllib_tests/learning_tests/yaml_files/impala/impala-breakoutnoframeskip-v5.yaml
index 2a12ca0522563..80e9c8ed5e67b 100644
--- a/release/rllib_tests/learning_tests/yaml_files/impala/impala-breakoutnoframeskip-v5.yaml
+++ b/release/rllib_tests/learning_tests/yaml_files/impala/impala-breakoutnoframeskip-v5.yaml
@@ -1,5 +1,5 @@
 impala-breakoutnoframeskip-v5:
-    env: ALE/Breakout-v5
+    env: ale_py:ALE/Breakout-v5
     run: IMPALA
     # Minimum reward and total ts (in given time_total_s) to pass this test.
     pass_criteria:
diff --git a/release/rllib_tests/learning_tests/yaml_files/ppo/new_stack/ppo_breakout.py b/release/rllib_tests/learning_tests/yaml_files/ppo/new_stack/ppo_breakout.py
index 2209ac64ea198..20987e6a4c6a6 100644
--- a/release/rllib_tests/learning_tests/yaml_files/ppo/new_stack/ppo_breakout.py
+++ b/release/rllib_tests/learning_tests/yaml_files/ppo/new_stack/ppo_breakout.py
@@ -20,7 +20,7 @@ def _make_learner_connector(input_observation_space, input_action_space):
 # We would like our frame stacking connector to do this job.
 def _env_creator(cfg):
     return wrap_atari_for_new_api_stack(
-        gym.make("ALE/Breakout-v5", **cfg, **{"render_mode": "rgb_array"}),
+        gym.make("ale_py:ALE/Breakout-v5", **cfg, **{"render_mode": "rgb_array"}),
         # Perform through ConnectorV2 API.
         framestack=None,
     )
diff --git a/release/rllib_tests/learning_tests/yaml_files/ppo/new_stack/ppo_pong.py b/release/rllib_tests/learning_tests/yaml_files/ppo/new_stack/ppo_pong.py
index 5619eb0246e6b..b727ebc73c798 100644
--- a/release/rllib_tests/learning_tests/yaml_files/ppo/new_stack/ppo_pong.py
+++ b/release/rllib_tests/learning_tests/yaml_files/ppo/new_stack/ppo_pong.py
@@ -20,7 +20,7 @@ def _make_learner_connector(input_observation_space, input_action_space):
 # We would like our frame stacking connector to do this job.
 def _env_creator(cfg):
     return wrap_atari_for_new_api_stack(
-        gym.make("ALE/Pong-v5", **cfg, **{"render_mode": "rgb_array"}),
+        gym.make("ale_py:ALE/Pong-v5", **cfg, **{"render_mode": "rgb_array"}),
         # Perform through ConnectorV2 API.
         framestack=None,
     )
diff --git a/release/rllib_tests/learning_tests/yaml_files/ppo/old_stack/ppo-breakoutnoframeskip-v5.yaml b/release/rllib_tests/learning_tests/yaml_files/ppo/old_stack/ppo-breakoutnoframeskip-v5.yaml
index 6e892c7c5142b..62de17ab28a2c 100644
--- a/release/rllib_tests/learning_tests/yaml_files/ppo/old_stack/ppo-breakoutnoframeskip-v5.yaml
+++ b/release/rllib_tests/learning_tests/yaml_files/ppo/old_stack/ppo-breakoutnoframeskip-v5.yaml
@@ -1,5 +1,5 @@
 ppo-breakoutnoframeskip-v5:
-    env: ALE/Breakout-v5
+    env: ale_py:ALE/Breakout-v5
     run: PPO
     # Minimum reward and total ts (in given time_total_s) to pass this test.
     pass_criteria:
diff --git a/rllib/BUILD b/rllib/BUILD
index 9854e95adc98a..26b2c4426d813 100644
--- a/rllib/BUILD
+++ b/rllib/BUILD
@@ -78,7 +78,6 @@ doctest(
             "utils/memory.py",
             "offline/off_policy_estimator.py",
             "offline/estimators/feature_importance.py",
-            "env/wrappers/recsim_wrapper.py",
             "env/remote_vector_env.py",
             # Missing imports
             "algorithms/dreamerv3/**",
@@ -2543,8 +2542,8 @@ py_test(
     name = "examples/envs/env_rendering_and_recording",
     srcs = ["examples/envs/env_rendering_and_recording.py"],
     tags = ["team:rllib", "exclusive", "examples"],
-    size = "small",
-    args = ["--enable-new-api-stack", "--env=CartPole-v1", "--stop-iters=3"]
+    size = "medium",
+    args = ["--enable-new-api-stack", "--env=CartPole-v1", "--stop-iters=2"]
 )
 
 #@OldAPIStack
diff --git a/rllib/algorithms/algorithm_config.py b/rllib/algorithms/algorithm_config.py
index 8161ea0b89efa..6b7bd8cea053f 100644
--- a/rllib/algorithms/algorithm_config.py
+++ b/rllib/algorithms/algorithm_config.py
@@ -3562,7 +3562,7 @@ def is_atari(self) -> bool:
         # Not yet determined, try to figure this out.
         if self._is_atari is None:
             # Atari envs are usually specified via a string like "PongNoFrameskip-v4"
-            # or "ALE/Breakout-v5".
+            # or "ale_py:ALE/Breakout-v5".
             # We do NOT attempt to auto-detect Atari env for other specified types like
             # a callable, to avoid running heavy logics in validate().
             # For these cases, users can explicitly set `environment(atari=True)`.
diff --git a/rllib/algorithms/dreamerv3/README.md b/rllib/algorithms/dreamerv3/README.md
index a92918273f64d..13a773bb02dd1 100644
--- a/rllib/algorithms/dreamerv3/README.md
+++ b/rllib/algorithms/dreamerv3/README.md
@@ -49,7 +49,7 @@ in combination with the following scripts and command lines in order to run RLli
 ### [Atari100k](../../tuned_examples/dreamerv3/atari_100k.py)
 ```shell
 $ cd ray/rllib/tuned_examples/dreamerv3/
-$ python atari_100k.py --env ALE/Pong-v5 
+$ python atari_100k.py --env ale_py:ALE/Pong-v5 
 ```
 
 ### [DeepMind Control Suite (vision)](../../tuned_examples/dreamerv3/dm_control_suite_vision.py)
diff --git a/rllib/algorithms/dreamerv3/tests/test_dreamerv3.py b/rllib/algorithms/dreamerv3/tests/test_dreamerv3.py
index 7fbb8fd55c2ac..87c46e2a2eaca 100644
--- a/rllib/algorithms/dreamerv3/tests/test_dreamerv3.py
+++ b/rllib/algorithms/dreamerv3/tests/test_dreamerv3.py
@@ -63,7 +63,7 @@ def test_dreamerv3_compilation(self):
         for env in [
             "FrozenLake-v1",
             "CartPole-v1",
-            "ALE/MsPacman-v5",
+            "ale_py:ALE/MsPacman-v5",
             "Pendulum-v1",
         ]:
             print("Env={}".format(env))
diff --git a/rllib/algorithms/dreamerv3/utils/env_runner.py b/rllib/algorithms/dreamerv3/utils/env_runner.py
index df725f39f4b22..19e906bdaaf9b 100644
--- a/rllib/algorithms/dreamerv3/utils/env_runner.py
+++ b/rllib/algorithms/dreamerv3/utils/env_runner.py
@@ -12,6 +12,7 @@
 from typing import Collection, List, Optional, Tuple, Union
 
 import gymnasium as gym
+from gymnasium.wrappers.vector import DictInfoToList
 import numpy as np
 import tree  # pip install dm_tree
 
@@ -75,7 +76,7 @@ def __init__(
 
         # Create the gym.vector.Env object.
         # Atari env.
-        if self.config.env.startswith("ALE/"):
+        if self.config.env.startswith("ale_py:ALE/"):
             # TODO (sven): This import currently causes a Tune test to fail. Either way,
             #  we need to figure out how to properly setup the CI environment with
             #  the correct versions of all gymnasium-related packages.
@@ -114,17 +115,21 @@ def _entry_point():
 
             gym.register("rllib-single-agent-env-v0", entry_point=_entry_point)
 
-            self.env = gym.vector.make(
-                "rllib-single-agent-env-v0",
-                num_envs=self.config.num_envs_per_env_runner,
-                asynchronous=self.config.remote_worker_envs,
-                wrappers=[
-                    partial(gym.wrappers.TimeLimit, max_episode_steps=108000),
-                    partial(resize_v1, x_size=64, y_size=64),  # resize to 64x64
-                    NormalizedImageEnv,
-                    NoopResetEnv,
-                    MaxAndSkipEnv,
-                ],
+            self.env = DictInfoToList(
+                gym.make_vec(
+                    "rllib-single-agent-env-v0",
+                    num_envs=self.config.num_envs_per_env_runner,
+                    vectorization_mode=(
+                        "async" if self.config.remote_worker_envs else "sync"
+                    ),
+                    wrappers=[
+                        partial(gym.wrappers.TimeLimit, max_episode_steps=108000),
+                        partial(resize_v1, x_size=64, y_size=64),  # resize to 64x64
+                        NormalizedImageEnv,
+                        NoopResetEnv,
+                        MaxAndSkipEnv,
+                    ],
+                )
             )
         # DeepMind Control.
         elif self.config.env.startswith("DMC/"):
@@ -139,12 +144,16 @@ def _entry_point():
                     parts[1], parts[2], from_pixels=from_pixels, channels_first=False
                 ),
             )
-            self.env = gym.vector.make(
-                "dmc_env-v0",
-                wrappers=[ActionClip],
-                num_envs=self.config.num_envs_per_env_runner,
-                asynchronous=self.config.remote_worker_envs,
-                **dict(self.config.env_config),
+            self.env = DictInfoToList(
+                gym.make_vec(
+                    "dmc_env-v0",
+                    wrappers=[ActionClip],
+                    num_envs=self.config.num_envs_per_env_runner,
+                    vectorization_mode=(
+                        "async" if self.config.remote_worker_envs else "sync"
+                    ),
+                    **dict(self.config.env_config),
+                )
             )
         # All other envs (gym or `tune.register_env()`'d by the user).
         else:
@@ -162,11 +171,15 @@ def _entry_point():
                     env_descriptor=self.config.env,
                 ),
             )
-            # Create the vectorized gymnasium env.
-            self.env = gym.vector.make(
-                "dreamerv3-custom-env-v0",
-                num_envs=self.config.num_envs_per_env_runner,
-                asynchronous=False,  # self.config.remote_worker_envs,
+            # Wrap into `DictInfoToList` wrapper to get infos as lists.
+            self.env = DictInfoToList(
+                gym.make_vec(
+                    "dreamerv3-custom-env-v0",
+                    num_envs=self.config.num_envs_per_env_runner,
+                    vectorization_mode=(
+                        "async" if self.config.remote_worker_envs else "sync"
+                    ),
+                )
             )
         self.num_envs = self.env.num_envs
         assert self.num_envs == self.config.num_envs_per_env_runner
@@ -185,6 +198,8 @@ def _entry_point():
             # TODO (sven): DreamerV3 is currently single-agent only.
             self.module = self.multi_rl_module_spec.build()[DEFAULT_MODULE_ID]
 
+        self._cached_to_module = None
+
         self.metrics = MetricsLogger()
 
         self._device = None
@@ -258,7 +273,7 @@ def sample(
 
         # Sample n timesteps.
         if num_timesteps is not None:
-            return self._sample_timesteps(
+            return self._sample(
                 num_timesteps=num_timesteps,
                 explore=explore,
                 random_actions=random_actions,
@@ -269,7 +284,7 @@ def sample(
             # `_sample_episodes` returns only one list (with completed episodes)
             # return empty list for incomplete ones.
             return (
-                self._sample_episodes(
+                self._sample(
                     num_episodes=num_episodes,
                     explore=explore,
                     random_actions=random_actions,
@@ -277,18 +292,18 @@ def sample(
                 [],
             )
 
-    def _sample_timesteps(
+    def _sample(
         self,
-        num_timesteps: int,
+        *,
+        num_timesteps: Optional[int] = None,
+        num_episodes: Optional[int] = None,
         explore: bool = True,
         random_actions: bool = False,
         force_reset: bool = False,
     ) -> List[SingleAgentEpisode]:
-        """Helper method to run n timesteps.
+        """Helper method to sample n timesteps or m episodes."""
 
-        See docstring of self.sample() for more details.
-        """
-        done_episodes_to_return = []
+        done_episodes_to_return: List[SingleAgentEpisode] = []
 
         # Get initial states for all `batch_size_B` rows in the forward batch.
         initial_states = tree.map_structure(
@@ -297,193 +312,151 @@ def _sample_timesteps(
         )
 
         # Have to reset the env (on all vector sub-envs).
-        if force_reset or self._needs_initial_reset:
-            obs, _ = self.env.reset()
+        if force_reset or num_episodes is not None or self._needs_initial_reset:
+            episodes = self._episodes = [None for _ in range(self.num_envs)]
+            self._reset_envs(episodes, initial_states)
+            # We just reset the env. Don't have to force this again in the next
+            # call to `self._sample()`.
             self._needs_initial_reset = False
 
-            self._episodes = [SingleAgentEpisode() for _ in range(self.num_envs)]
-
             # Set initial obs and states in the episodes.
             for i in range(self.num_envs):
-                self._episodes[i].add_env_reset(observation=obs[i])
                 self._states[i] = None
-
-        # Don't reset existing envs; continue in already started episodes.
         else:
-            # Pick up stored observations and states from previous timesteps.
-            obs = np.stack([eps.observations[-1] for eps in self._episodes])
+            episodes = self._episodes
 
-        # Loop through env for n timesteps.
+        # Loop through `num_timesteps` timesteps or `num_episodes` episodes.
         ts = 0
-        while ts < num_timesteps:
+        eps = 0
+        while (
+            (ts < num_timesteps) if num_timesteps is not None else (eps < num_episodes)
+        ):
             # Act randomly.
             if random_actions:
                 actions = self.env.action_space.sample()
-            # Compute an action using our RLModule.
+            # Compute an action using the RLModule.
             else:
-                is_first = np.zeros((self.num_envs,))
-                for i, eps in enumerate(self._episodes):
-                    if self._states[i] is None:
-                        is_first[i] = 1.0
-                        self._states[i] = {k: s[i] for k, s in initial_states.items()}
-                to_module = {
-                    Columns.STATE_IN: tree.map_structure(
-                        lambda s: self.convert_to_tensor(s), batch(self._states)
-                    ),
-                    Columns.OBS: self.convert_to_tensor(obs),
-                    "is_first": self.convert_to_tensor(is_first),
-                }
-                # Explore or not.
+                # Env-to-module connector (already cached).
+                to_module = self._cached_to_module
+                assert to_module is not None
+                self._cached_to_module = None
+
+                # RLModule forward pass: Explore or not.
                 if explore:
-                    outs = self.module.forward_exploration(to_module)
+                    to_env = self.module.forward_exploration(to_module)
                 else:
-                    outs = self.module.forward_inference(to_module)
+                    to_env = self.module.forward_inference(to_module)
 
                 # Model outputs one-hot actions (if discrete). Convert to int actions
                 # as well.
-                actions = convert_to_numpy(outs[Columns.ACTIONS])
+                actions = convert_to_numpy(to_env[Columns.ACTIONS])
                 if isinstance(self.env.single_action_space, gym.spaces.Discrete):
                     actions = np.argmax(actions, axis=-1)
-                self._states = unbatch(convert_to_numpy(outs[Columns.STATE_OUT]))
+                self._states = unbatch(convert_to_numpy(to_env[Columns.STATE_OUT]))
 
-            obs, rewards, terminateds, truncateds, infos = self.env.step(actions)
-            ts += self.num_envs
+            observations, rewards, terminateds, truncateds, infos = self.env.step(
+                actions
+            )
 
-            for i in range(self.num_envs):
-                # The last entry in self.observations[i] is already the reset
-                # obs of the new episode.
-                if terminateds[i] or truncateds[i]:
-                    # Finish the episode with the actual terminal observation stored in
-                    # the info dict.
-                    self._episodes[i].add_env_step(
-                        observation=infos["final_observation"][i],
-                        action=actions[i],
-                        reward=rewards[i],
-                        terminated=terminateds[i],
-                        truncated=truncateds[i],
+            call_on_episode_start = set()
+            for env_index in range(self.num_envs):
+                # Episode has no data in it yet -> Was just reset and needs to be called
+                # with its `add_env_reset()` method.
+                if not episodes[env_index].is_reset:
+                    episodes[env_index].add_env_reset(
+                        observation=observations[env_index],
+                        infos=infos[env_index],
                     )
-                    self._states[i] = None
-                    done_episodes_to_return.append(self._episodes[i])
-                    # Create a new episode object.
-                    self._episodes[i] = SingleAgentEpisode(observations=[obs[i]])
+                    call_on_episode_start.add(env_index)
+                    self._states[env_index] = None
+
+                # Call `add_env_step()` method on episode.
                 else:
-                    self._episodes[i].add_env_step(
-                        observation=obs[i],
-                        action=actions[i],
-                        reward=rewards[i],
+                    # Only increase ts when we actually stepped (not reset'd as a reset
+                    # does not count as a timestep).
+                    ts += 1
+                    episodes[env_index].add_env_step(
+                        observation=observations[env_index],
+                        action=actions[env_index],
+                        reward=rewards[env_index],
+                        infos=infos[env_index],
+                        terminated=terminateds[env_index],
+                        truncated=truncateds[env_index],
                     )
 
-        # Return done episodes ...
-        self._done_episodes_for_metrics.extend(done_episodes_to_return)
-        # ... and all ongoing episode chunks. Also, make sure, we return
-        # a copy and start new chunks so that callers of this function
-        # don't alter our ongoing and returned Episode objects.
-        ongoing_episodes = self._episodes
-        self._episodes = [eps.cut() for eps in self._episodes]
-        for eps in ongoing_episodes:
-            self._ongoing_episodes_for_metrics[eps.id_].append(eps)
-
-        self._increase_sampled_metrics(ts)
-
-        return done_episodes_to_return + ongoing_episodes
-
-    def _sample_episodes(
-        self,
-        num_episodes: int,
-        explore: bool = True,
-        random_actions: bool = False,
-    ) -> List[SingleAgentEpisode]:
-        """Helper method to run n episodes.
-
-        See docstring of `self.sample()` for more details.
-        """
-        done_episodes_to_return = []
-
-        obs, _ = self.env.reset()
-        episodes = [SingleAgentEpisode() for _ in range(self.num_envs)]
-
-        # Multiply states n times according to our vector env batch size (num_envs).
-        states = tree.map_structure(
-            lambda s: np.repeat(s, self.num_envs, axis=0),
-            convert_to_numpy(self.module.get_initial_state()),
-        )
-        is_first = np.ones((self.num_envs,))
-
-        for i in range(self.num_envs):
-            episodes[i].add_env_reset(observation=obs[i])
-
-        eps = 0
-        while eps < num_episodes:
-            if random_actions:
-                actions = self.env.action_space.sample()
-            else:
-                batch = {
+            # Cache results as we will do the RLModule forward pass only in the next
+            # `while`-iteration.
+            if self.module is not None:
+                is_first = np.zeros((self.num_envs,))
+                for env_index, episode in enumerate(episodes):
+                    if self._states[env_index] is None:
+                        is_first[env_index] = 1.0
+                        self._states[env_index] = {
+                            k: s[env_index] for k, s in initial_states.items()
+                        }
+                self._cached_to_module = {
                     Columns.STATE_IN: tree.map_structure(
-                        lambda s: self.convert_to_tensor(s), states
+                        lambda s: self.convert_to_tensor(s), batch(self._states)
                     ),
-                    Columns.OBS: self.convert_to_tensor(obs),
+                    Columns.OBS: self.convert_to_tensor(observations),
                     "is_first": self.convert_to_tensor(is_first),
                 }
 
-                if explore:
-                    outs = self.module.forward_exploration(batch)
-                else:
-                    outs = self.module.forward_inference(batch)
+        for env_index in range(self.num_envs):
+            # Episode is not done.
+            if not episodes[env_index].is_done:
+                continue
 
-                actions = convert_to_numpy(outs[Columns.ACTIONS])
-                if isinstance(self.env.single_action_space, gym.spaces.Discrete):
-                    actions = np.argmax(actions, axis=-1)
-                states = convert_to_numpy(outs[Columns.STATE_OUT])
+            eps += 1
 
-            obs, rewards, terminateds, truncateds, infos = self.env.step(actions)
+            # Then finalize (numpy'ize) the episode.
+            done_episodes_to_return.append(episodes[env_index].finalize())
 
-            for i in range(self.num_envs):
-                # The last entry in self.observations[i] is already the reset
-                # obs of the new episode.
-                if terminateds[i] or truncateds[i]:
-                    eps += 1
-
-                    episodes[i].add_env_step(
-                        observation=infos["final_observation"][i],
-                        action=actions[i],
-                        reward=rewards[i],
-                        terminated=terminateds[i],
-                        truncated=truncateds[i],
-                    )
-                    done_episodes_to_return.append(episodes[i])
-
-                    # Also early-out if we reach the number of episodes within this
-                    # for-loop.
-                    if eps == num_episodes:
-                        break
-
-                    # Reset h-states to the model's initial ones b/c we are starting a
-                    # new episode.
-                    for k, v in convert_to_numpy(
-                        self.module.get_initial_state()
-                    ).items():
-                        states[k][i] = v
-                    is_first[i] = True
-
-                    episodes[i] = SingleAgentEpisode(observations=[obs[i]])
-                else:
-                    episodes[i].add_env_step(
-                        observation=obs[i],
-                        action=actions[i],
-                        reward=rewards[i],
-                    )
-                    is_first[i] = False
+            # Also early-out if we reach the number of episodes within this
+            # for-loop.
+            if eps == num_episodes:
+                break
+
+            # Create a new episode object with no data in it and execute
+            # `on_episode_created` callback (before the `env.reset()` call).
+            episodes[env_index] = SingleAgentEpisode(
+                observation_space=self.env.single_observation_space,
+                action_space=self.env.single_action_space,
+            )
 
+        # Return done episodes ...
+        # TODO (simon): Check, how much memory this attribute uses.
         self._done_episodes_for_metrics.extend(done_episodes_to_return)
+        # ... and all ongoing episode chunks.
 
-        # If user calls sample(num_timesteps=..) after this, we must reset again
-        # at the beginning.
-        self._needs_initial_reset = True
+        # Also, make sure we start new episode chunks (continuing the ongoing episodes
+        # from the to-be-returned chunks).
+        ongoing_episodes_to_return = []
+        # Only if we are doing individual timesteps: We have to maybe cut an ongoing
+        # episode and continue building it on the next call to `sample()`.
+        if num_timesteps is not None:
+            ongoing_episodes_continuations = [
+                episode.cut(len_lookback_buffer=self.config.episode_lookback_horizon)
+                for episode in episodes
+            ]
+
+            for episode in episodes:
+                # Just started Episodes do not have to be returned. There is no data
+                # in them anyway.
+                if episode.t == 0:
+                    continue
+                episode.validate()
+                self._ongoing_episodes_for_metrics[episode.id_].append(episode)
+                # Return finalized (numpy'ized) Episodes.
+                ongoing_episodes_to_return.append(episode.finalize())
+
+            # Continue collecting into the cut Episode chunks.
+            self._episodes = ongoing_episodes_continuations
 
-        ts = sum(map(len, done_episodes_to_return))
         self._increase_sampled_metrics(ts)
 
-        return done_episodes_to_return
+        # Return collected episode data.
+        return done_episodes_to_return + ongoing_episodes_to_return
 
     def get_spaces(self):
         return {
@@ -564,6 +537,51 @@ def stop(self):
         # Close our env object via gymnasium's API.
         self.env.close()
 
+    def _reset_envs(self, episodes, initial_states):
+        # Create n new episodes and make the `on_episode_created` callbacks.
+        for env_index in range(self.num_envs):
+            self._new_episode(env_index, episodes)
+
+        # Erase all cached ongoing episodes (these will never be completed and
+        # would thus never be returned/cleaned by `get_metrics` and cause a memory
+        # leak).
+        self._ongoing_episodes_for_metrics.clear()
+
+        observations, infos = self.env.reset()
+        observations = unbatch(observations)
+
+        # Set initial obs and infos in the episodes.
+        for env_index in range(self.num_envs):
+            episodes[env_index].add_env_reset(
+                observation=observations[env_index],
+                infos=infos[env_index],
+            )
+
+        # Run the env-to-module connector to make sure the reset-obs/infos have
+        # properly been processed (if applicable).
+        self._cached_to_module = None
+        if self.module:
+            is_first = np.zeros((self.num_envs,))
+            for i, eps in enumerate(self._episodes):
+                if self._states[i] is None:
+                    is_first[i] = 1.0
+                    self._states[i] = {k: s[i] for k, s in initial_states.items()}
+            self._cached_to_module = {
+                Columns.STATE_IN: tree.map_structure(
+                    lambda s: self.convert_to_tensor(s), batch(self._states)
+                ),
+                Columns.OBS: self.convert_to_tensor(observations),
+                "is_first": self.convert_to_tensor(is_first),
+            }
+            # self._cached_to_module = TODO!!
+
+    def _new_episode(self, env_index, episodes=None):
+        episodes = episodes if episodes is not None else self._episodes
+        episodes[env_index] = SingleAgentEpisode(
+            observation_space=self.env.single_observation_space,
+            action_space=self.env.single_action_space,
+        )
+
     def _increase_sampled_metrics(self, num_steps):
         # Per sample cycle stats.
         self.metrics.log_value(
diff --git a/rllib/algorithms/ppo/tests/test_ppo.py b/rllib/algorithms/ppo/tests/test_ppo.py
index ae51de75389dc..3febf97fb2cad 100644
--- a/rllib/algorithms/ppo/tests/test_ppo.py
+++ b/rllib/algorithms/ppo/tests/test_ppo.py
@@ -98,7 +98,7 @@ def test_ppo_compilation_and_schedule_mixins(self):
             # "CliffWalking-v0",
             "CartPole-v1",
             "Pendulum-v1",
-        ]:  # "ALE/Breakout-v5"]:
+        ]:  # "ale_py:ALE/Breakout-v5"]:
             print("Env={}".format(env))
             for lstm in [False]:
                 print("LSTM={}".format(lstm))
diff --git a/rllib/algorithms/ppo/tests/test_ppo_old_api_stack.py b/rllib/algorithms/ppo/tests/test_ppo_old_api_stack.py
index 24453758f6f07..edb2b3b3122e1 100644
--- a/rllib/algorithms/ppo/tests/test_ppo_old_api_stack.py
+++ b/rllib/algorithms/ppo/tests/test_ppo_old_api_stack.py
@@ -155,7 +155,7 @@ def test_ppo_compilation_w_connectors(self):
 
         num_iterations = 2
 
-        for env in ["FrozenLake-v1", "ALE/MsPacman-v5"]:
+        for env in ["FrozenLake-v1", "ale_py:ALE/MsPacman-v5"]:
             print("Env={}".format(env))
             for lstm in [False, True]:
                 print("LSTM={}".format(lstm))
@@ -216,7 +216,7 @@ def test_ppo_compilation_and_schedule_mixins(self):
 
         num_iterations = 2
 
-        for env in ["FrozenLake-v1", "ALE/MsPacman-v5"]:
+        for env in ["FrozenLake-v1", "ale_py:ALE/MsPacman-v5"]:
             print("Env={}".format(env))
             for lstm in [False, True]:
                 print("LSTM={}".format(lstm))
diff --git a/rllib/algorithms/ppo/tests/test_ppo_rl_module.py b/rllib/algorithms/ppo/tests/test_ppo_rl_module.py
index de3d3f42f424b..2b1df1bf33e84 100644
--- a/rllib/algorithms/ppo/tests/test_ppo_rl_module.py
+++ b/rllib/algorithms/ppo/tests/test_ppo_rl_module.py
@@ -63,7 +63,7 @@ def tearDownClass(cls):
 
     def test_rollouts(self):
         # TODO: Add FrozenLake-v1 to cover LSTM case.
-        env_names = ["CartPole-v1", "Pendulum-v1", "ALE/Breakout-v5"]
+        env_names = ["CartPole-v1", "Pendulum-v1", "ale_py:ALE/Breakout-v5"]
         fwd_fns = ["forward_exploration", "forward_inference"]
         lstm = [True, False]
         config_combinations = [env_names, fwd_fns, lstm]
@@ -98,7 +98,7 @@ def test_rollouts(self):
 
     def test_forward_train(self):
         # TODO: Add FrozenLake-v1 to cover LSTM case.
-        env_names = ["CartPole-v1", "Pendulum-v1", "ALE/Breakout-v5"]
+        env_names = ["CartPole-v1", "Pendulum-v1", "ale_py:ALE/Breakout-v5"]
         lstm = [False, True]
         config_combinations = [env_names, lstm]
         for config in itertools.product(*config_combinations):
diff --git a/rllib/algorithms/tests/test_algorithm_config.py b/rllib/algorithms/tests/test_algorithm_config.py
index 1d7a32e87a2ac..11d55a741be32 100644
--- a/rllib/algorithms/tests/test_algorithm_config.py
+++ b/rllib/algorithms/tests/test_algorithm_config.py
@@ -145,11 +145,11 @@ def test_rollout_fragment_length(self):
     def test_detect_atari_env(self):
         """Tests that we can properly detect Atari envs."""
         config = AlgorithmConfig().environment(
-            env="ALE/Breakout-v5", env_config={"frameskip": 1}
+            env="ale_py:ALE/Breakout-v5", env_config={"frameskip": 1}
         )
         self.assertTrue(config.is_atari)
 
-        config = AlgorithmConfig().environment(env="ALE/Pong-v5")
+        config = AlgorithmConfig().environment(env="ale_py:ALE/Pong-v5")
         self.assertTrue(config.is_atari)
 
         config = AlgorithmConfig().environment(env="CartPole-v1")
@@ -158,7 +158,7 @@ def test_detect_atari_env(self):
 
         config = AlgorithmConfig().environment(
             env=lambda ctx: gym.make(
-                "ALE/Breakout-v5",
+                "ale_py:ALE/Breakout-v5",
                 frameskip=1,
             )
         )
diff --git a/rllib/algorithms/tests/test_callbacks_on_env_runner.py b/rllib/algorithms/tests/test_callbacks_on_env_runner.py
index 42abf70918417..ae8443b5b811a 100644
--- a/rllib/algorithms/tests/test_callbacks_on_env_runner.py
+++ b/rllib/algorithms/tests/test_callbacks_on_env_runner.py
@@ -24,19 +24,19 @@ def on_environment_created(self, *args, env_runner, metrics_logger, env, **kwarg
     def on_episode_start(self, *args, env_runner, metrics_logger, env, **kwargs):
         assert isinstance(env_runner, EnvRunner)
         assert isinstance(metrics_logger, MetricsLogger)
-        assert isinstance(env, gym.Env)
+        assert isinstance(env, (gym.Env, gym.vector.VectorEnv))
         self.counts.update({"start": 1})
 
     def on_episode_step(self, *args, env_runner, metrics_logger, env, **kwargs):
         assert isinstance(env_runner, EnvRunner)
         assert isinstance(metrics_logger, MetricsLogger)
-        assert isinstance(env, gym.Env)
+        assert isinstance(env, (gym.Env, gym.vector.VectorEnv))
         self.counts.update({"step": 1})
 
     def on_episode_end(self, *args, env_runner, metrics_logger, env, **kwargs):
         assert isinstance(env_runner, EnvRunner)
         assert isinstance(metrics_logger, MetricsLogger)
-        assert isinstance(env, gym.Env)
+        assert isinstance(env, (gym.Env, gym.vector.VectorEnv))
         self.counts.update({"end": 1})
 
     def on_sample_end(self, *args, env_runner, metrics_logger, **kwargs):
diff --git a/rllib/benchmarks/ppo/benchmark_atari_ppo.py b/rllib/benchmarks/ppo/benchmark_atari_ppo.py
index 0b697ff4b9025..e434f2ac078fc 100644
--- a/rllib/benchmarks/ppo/benchmark_atari_ppo.py
+++ b/rllib/benchmarks/ppo/benchmark_atari_ppo.py
@@ -6,7 +6,7 @@
 --num-gpus=4 --num-env-runners=95`
 
 In order to only run individual or lists of envs, you can provide a list of env-strings
-under the `--env` arg, such as `--env ALE/Pong-v5,ALE/Breakout-v5`.
+under the `--env` arg, such as `--env=ale_py:ALE/Pong-v5,ale_py:ALE/Breakout-v5`.
 
 For logging to your WandB account, use:
 `--wandb-key=[your WandB API key] --wandb-project=[some project name]
@@ -34,60 +34,60 @@
 # rainbow).
 # Note that for PPO, we simply run everything for 6M ts.
 benchmark_envs = {
-    "ALE/Alien-v5": (6022.9, 200000000),
-    "ALE/Amidar-v5": (202.8, 200000000),
-    "ALE/Assault-v5": (14491.7, 200000000),
-    "ALE/Asterix-v5": (280114.0, 200000000),
-    "ALE/Asteroids-v5": (2249.4, 200000000),
-    "ALE/Atlantis-v5": (814684.0, 200000000),
-    "ALE/BankHeist-v5": (826.0, 200000000),
-    "ALE/BattleZone-v5": (52040.0, 200000000),
-    "ALE/BeamRider-v5": (21768.5, 200000000),
-    "ALE/Berzerk-v5": (1793.4, 200000000),
-    "ALE/Bowling-v5": (39.4, 200000000),
-    "ALE/Boxing-v5": (54.9, 200000000),
-    "ALE/Breakout-v5": (379.5, 200000000),
-    "ALE/Centipede-v5": (7160.9, 200000000),
-    "ALE/ChopperCommand-v5": (10916.0, 200000000),
-    "ALE/CrazyClimber-v5": (143962.0, 200000000),
-    "ALE/Defender-v5": (47671.3, 200000000),
-    "ALE/DemonAttack-v5": (109670.7, 200000000),
-    "ALE/DoubleDunk-v5": (-0.6, 200000000),
-    "ALE/Enduro-v5": (2061.1, 200000000),
-    "ALE/FishingDerby-v5": (22.6, 200000000),
-    "ALE/Freeway-v5": (29.1, 200000000),
-    "ALE/Frostbite-v5": (4141.1, 200000000),
-    "ALE/Gopher-v5": (72595.7, 200000000),
-    "ALE/Gravitar-v5": (567.5, 200000000),
-    "ALE/Hero-v5": (50496.8, 200000000),
-    "ALE/IceHockey-v5": (-11685.8, 200000000),
-    "ALE/Kangaroo-v5": (10841.0, 200000000),
-    "ALE/Krull-v5": (6715.5, 200000000),
-    "ALE/KungFuMaster-v5": (28999.8, 200000000),
-    "ALE/MontezumaRevenge-v5": (154.0, 200000000),
-    "ALE/MsPacman-v5": (2570.2, 200000000),
-    "ALE/NameThisGame-v5": (11686.5, 200000000),
-    "ALE/Phoenix-v5": (103061.6, 200000000),
-    "ALE/Pitfall-v5": (-37.6, 200000000),
-    "ALE/Pong-v5": (19.0, 200000000),
-    "ALE/PrivateEye-v5": (1704.4, 200000000),
-    "ALE/Qbert-v5": (18397.6, 200000000),
-    "ALE/RoadRunner-v5": (54261.0, 200000000),
-    "ALE/Robotank-v5": (55.2, 200000000),
-    "ALE/Seaquest-v5": (19176.0, 200000000),
-    "ALE/Skiing-v5": (-11685.8, 200000000),
-    "ALE/Solaris-v5": (2860.7, 200000000),
-    "ALE/SpaceInvaders-v5": (12629.0, 200000000),
-    "ALE/StarGunner-v5": (123853.0, 200000000),
-    "ALE/Surround-v5": (7.0, 200000000),
-    "ALE/Tennis-v5": (-2.2, 200000000),
-    "ALE/TimePilot-v5": (11190.5, 200000000),
-    "ALE/Tutankham-v5": (126.9, 200000000),
-    "ALE/Venture-v5": (45.0, 200000000),
-    "ALE/VideoPinball-v5": (506817.2, 200000000),
-    "ALE/WizardOfWor-v5": (14631.5, 200000000),
-    "ALE/YarsRevenge-v5": (93007.9, 200000000),
-    "ALE/Zaxxon-v5": (19658.0, 200000000),
+    "ale_py:ALE/Alien-v5": (6022.9, 200000000),
+    "ale_py:ALE/Amidar-v5": (202.8, 200000000),
+    "ale_py:ALE/Assault-v5": (14491.7, 200000000),
+    "ale_py:ALE/Asterix-v5": (280114.0, 200000000),
+    "ale_py:ALE/Asteroids-v5": (2249.4, 200000000),
+    "ale_py:ALE/Atlantis-v5": (814684.0, 200000000),
+    "ale_py:ALE/BankHeist-v5": (826.0, 200000000),
+    "ale_py:ALE/BattleZone-v5": (52040.0, 200000000),
+    "ale_py:ALE/BeamRider-v5": (21768.5, 200000000),
+    "ale_py:ALE/Berzerk-v5": (1793.4, 200000000),
+    "ale_py:ALE/Bowling-v5": (39.4, 200000000),
+    "ale_py:ALE/Boxing-v5": (54.9, 200000000),
+    "ale_py:ALE/Breakout-v5": (379.5, 200000000),
+    "ale_py:ALE/Centipede-v5": (7160.9, 200000000),
+    "ale_py:ALE/ChopperCommand-v5": (10916.0, 200000000),
+    "ale_py:ALE/CrazyClimber-v5": (143962.0, 200000000),
+    "ale_py:ALE/Defender-v5": (47671.3, 200000000),
+    "ale_py:ALE/DemonAttack-v5": (109670.7, 200000000),
+    "ale_py:ALE/DoubleDunk-v5": (-0.6, 200000000),
+    "ale_py:ALE/Enduro-v5": (2061.1, 200000000),
+    "ale_py:ALE/FishingDerby-v5": (22.6, 200000000),
+    "ale_py:ALE/Freeway-v5": (29.1, 200000000),
+    "ale_py:ALE/Frostbite-v5": (4141.1, 200000000),
+    "ale_py:ALE/Gopher-v5": (72595.7, 200000000),
+    "ale_py:ALE/Gravitar-v5": (567.5, 200000000),
+    "ale_py:ALE/Hero-v5": (50496.8, 200000000),
+    "ale_py:ALE/IceHockey-v5": (-11685.8, 200000000),
+    "ale_py:ALE/Kangaroo-v5": (10841.0, 200000000),
+    "ale_py:ALE/Krull-v5": (6715.5, 200000000),
+    "ale_py:ALE/KungFuMaster-v5": (28999.8, 200000000),
+    "ale_py:ALE/MontezumaRevenge-v5": (154.0, 200000000),
+    "ale_py:ALE/MsPacman-v5": (2570.2, 200000000),
+    "ale_py:ALE/NameThisGame-v5": (11686.5, 200000000),
+    "ale_py:ALE/Phoenix-v5": (103061.6, 200000000),
+    "ale_py:ALE/Pitfall-v5": (-37.6, 200000000),
+    "ale_py:ALE/Pong-v5": (19.0, 200000000),
+    "ale_py:ALE/PrivateEye-v5": (1704.4, 200000000),
+    "ale_py:ALE/Qbert-v5": (18397.6, 200000000),
+    "ale_py:ALE/RoadRunner-v5": (54261.0, 200000000),
+    "ale_py:ALE/Robotank-v5": (55.2, 200000000),
+    "ale_py:ALE/Seaquest-v5": (19176.0, 200000000),
+    "ale_py:ALE/Skiing-v5": (-11685.8, 200000000),
+    "ale_py:ALE/Solaris-v5": (2860.7, 200000000),
+    "ale_py:ALE/SpaceInvaders-v5": (12629.0, 200000000),
+    "ale_py:ALE/StarGunner-v5": (123853.0, 200000000),
+    "ale_py:ALE/Surround-v5": (7.0, 200000000),
+    "ale_py:ALE/Tennis-v5": (-2.2, 200000000),
+    "ale_py:ALE/TimePilot-v5": (11190.5, 200000000),
+    "ale_py:ALE/Tutankham-v5": (126.9, 200000000),
+    "ale_py:ALE/Venture-v5": (45.0, 200000000),
+    "ale_py:ALE/VideoPinball-v5": (506817.2, 200000000),
+    "ale_py:ALE/WizardOfWor-v5": (14631.5, 200000000),
+    "ale_py:ALE/YarsRevenge-v5": (93007.9, 200000000),
+    "ale_py:ALE/Zaxxon-v5": (19658.0, 200000000),
 }
 
 
diff --git a/rllib/benchmarks/torch_compile/run_inference_bm.py b/rllib/benchmarks/torch_compile/run_inference_bm.py
index a92e49b9cb504..e15b87be5965b 100644
--- a/rllib/benchmarks/torch_compile/run_inference_bm.py
+++ b/rllib/benchmarks/torch_compile/run_inference_bm.py
@@ -92,7 +92,7 @@ def main(pargs):
         json.dump(config, f)
 
     # Create the environment.
-    env = wrap_atari_for_new_api_stack(gym.make("ALE/Breakout-v5"))
+    env = wrap_atari_for_new_api_stack(gym.make("ale_py:ALE/Breakout-v5"))
 
     # setup RLModule
     model_cfg = MODEL_DEFAULTS.copy()
diff --git a/rllib/benchmarks/torch_compile/run_ppo_with_inference_bm.py b/rllib/benchmarks/torch_compile/run_ppo_with_inference_bm.py
index fa046b05285da..23c0cba796766 100644
--- a/rllib/benchmarks/torch_compile/run_ppo_with_inference_bm.py
+++ b/rllib/benchmarks/torch_compile/run_ppo_with_inference_bm.py
@@ -29,7 +29,7 @@ def main(pargs):
     config = (
         PPOConfig()
         .environment(
-            "ALE/Breakout-v5",
+            "ale_py:ALE/Breakout-v5",
             clip_rewards=True,
             env_config={
                 "frameskip": 1,
diff --git a/rllib/env/multi_agent_env_runner.py b/rllib/env/multi_agent_env_runner.py
index 8cc4c6e4e2df1..03b8105fbedb4 100644
--- a/rllib/env/multi_agent_env_runner.py
+++ b/rllib/env/multi_agent_env_runner.py
@@ -90,7 +90,9 @@ def __init__(self, config: AlgorithmConfig, **kwargs):
         self.make_env()
 
         # Create the env-to-module connector pipeline.
-        self._env_to_module = self.config.build_env_to_module_connector(self.env)
+        self._env_to_module = self.config.build_env_to_module_connector(
+            self.env.unwrapped
+        )
         # Cached env-to-module results taken at the end of a `_sample_timesteps()`
         # call to make sure the final observation (before an episode cut) gets properly
         # processed (and maybe postprocessed and re-stored into the episode).
@@ -104,7 +106,7 @@ def __init__(self, config: AlgorithmConfig, **kwargs):
         # Construct the MultiRLModule.
         try:
             module_spec: MultiRLModuleSpec = self.config.get_multi_rl_module_spec(
-                env=self.env, spaces=self.get_spaces(), inference_only=True
+                env=self.env.unwrapped, spaces=self.get_spaces(), inference_only=True
             )
             # Build the module from its spec.
             self.module = module_spec.build()
@@ -114,7 +116,9 @@ def __init__(self, config: AlgorithmConfig, **kwargs):
             self.module = None
 
         # Create the two connector pipelines: env-to-module and module-to-env.
-        self._module_to_env = self.config.build_module_to_env_connector(self.env)
+        self._module_to_env = self.config.build_module_to_env_connector(
+            self.env.unwrapped
+        )
 
         self._needs_initial_reset: bool = True
         self._episode: Optional[MultiAgentEpisode] = None
@@ -259,7 +263,7 @@ def _sample_timesteps(
                 to_env = {
                     Columns.ACTIONS: [
                         {
-                            aid: self.env.get_action_space(aid).sample()
+                            aid: self.env.unwrapped.get_action_space(aid).sample()
                             for aid in self._episode.get_agents_to_act()
                         }
                     ]
@@ -461,7 +465,7 @@ def _sample_episodes(
                 to_env = {
                     Columns.ACTIONS: [
                         {
-                            aid: self.env.get_action_space(aid).sample()
+                            aid: self.env.unwrapped.get_action_space(aid).sample()
                             for aid in self._episode.get_agents_to_act()
                         }
                     ]
@@ -869,7 +873,7 @@ def make_env(self):
         self._callbacks.on_environment_created(
             env_runner=self,
             metrics_logger=self.metrics,
-            env=self.env,
+            env=self.env.unwrapped,
             env_context=env_ctx,
         )
 
@@ -889,11 +893,12 @@ def _setup_metrics(self):
     def _new_episode(self):
         return MultiAgentEpisode(
             observation_space={
-                aid: self.env.get_observation_space(aid)
-                for aid in self.env.possible_agents
+                aid: self.env.unwrapped.get_observation_space(aid)
+                for aid in self.env.unwrapped.possible_agents
             },
             action_space={
-                aid: self.env.get_action_space(aid) for aid in self.env.possible_agents
+                aid: self.env.unwrapped.get_action_space(aid)
+                for aid in self.env.unwrapped.possible_agents
             },
             agent_to_module_mapping_fn=self.config.policy_mapping_fn,
         )
@@ -904,7 +909,7 @@ def _make_on_episode_callback(self, which: str, episode=None):
             episode=episode,
             env_runner=self,
             metrics_logger=self.metrics,
-            env=self.env,
+            env=self.env.unwrapped,
             rl_module=self.module,
             env_index=0,
         )
diff --git a/rllib/env/single_agent_env_runner.py b/rllib/env/single_agent_env_runner.py
index 967d4ec174b3b..14bf1fd635b8e 100644
--- a/rllib/env/single_agent_env_runner.py
+++ b/rllib/env/single_agent_env_runner.py
@@ -1,10 +1,12 @@
-import time
 from collections import defaultdict
 from functools import partial
 import logging
+import time
 from typing import Collection, DefaultDict, List, Optional, Union
 
 import gymnasium as gym
+from gymnasium.wrappers.vector import DictInfoToList
+from gymnasium.envs.registration import VectorizeMode
 
 from ray.rllib.algorithms.algorithm_config import AlgorithmConfig
 from ray.rllib.algorithms.callbacks import DefaultCallbacks
@@ -81,7 +83,7 @@ def __init__(self, config: AlgorithmConfig, **kwargs):
         self._callbacks: DefaultCallbacks = self.config.callbacks_class()
 
         # Create the vectorized gymnasium env.
-        self.env: Optional[gym.Wrapper] = None
+        self.env: Optional[gym.vector.VectorEnvWrapper] = None
         self.num_envs: int = 0
         self.make_env()
 
@@ -100,7 +102,7 @@ def __init__(self, config: AlgorithmConfig, **kwargs):
         # Create the RLModule.
         try:
             module_spec: RLModuleSpec = self.config.get_rl_module_spec(
-                env=self.env, spaces=self.get_spaces(), inference_only=True
+                env=self.env.unwrapped, spaces=self.get_spaces(), inference_only=True
             )
             # Build the module from its spec.
             self.module = module_spec.build()
@@ -186,7 +188,7 @@ def sample(
 
             # Sample n timesteps.
             if num_timesteps is not None:
-                samples = self._sample_timesteps(
+                samples = self._sample(
                     num_timesteps=num_timesteps,
                     explore=explore,
                     random_actions=random_actions,
@@ -194,19 +196,16 @@ def sample(
                 )
             # Sample m episodes.
             elif num_episodes is not None:
-                samples = self._sample_episodes(
+                samples = self._sample(
                     num_episodes=num_episodes,
                     explore=explore,
                     random_actions=random_actions,
                 )
-            # For complete episodes mode, sample a single episode and
-            # leave coordination of sampling to `synchronous_parallel_sample`.
-            # TODO (simon, sven): The coordination will eventually move
-            #  to `EnvRunnerGroup` in the future. So from the algorithm one
-            #  would do `EnvRunnerGroup.sample()`.
+            # For complete episodes mode, sample as long as the number of timesteps
+            # done is smaller than the `train_batch_size`.
             else:
-                samples = self._sample_episodes(
-                    num_episodes=1,
+                samples = self._sample(
+                    num_episodes=self.num_envs,
                     explore=explore,
                     random_actions=random_actions,
                 )
@@ -222,57 +221,40 @@ def sample(
 
         return samples
 
-    def _sample_timesteps(
+    def _sample(
         self,
-        num_timesteps: int,
+        *,
+        num_timesteps: Optional[int] = None,
+        num_episodes: Optional[int] = None,
         explore: bool,
         random_actions: bool = False,
         force_reset: bool = False,
     ) -> List[SingleAgentEpisode]:
-        """Helper method to sample n timesteps."""
+        """Helper method to sample n timesteps or m episodes."""
 
         done_episodes_to_return: List[SingleAgentEpisode] = []
 
         # Have to reset the env (on all vector sub_envs).
-        if force_reset or self._needs_initial_reset:
-            # Create n new episodes.
-            # TODO (sven): Add callback `on_episode_created` as soon as
-            # `gymnasium-v1.0.0a2` PR is coming.
-            self._episodes = []
-            for env_index in range(self.num_envs):
-                self._episodes.append(self._new_episode())
-            self._shared_data = {}
-
-            # Erase all cached ongoing episodes (these will never be completed and
-            # would thus never be returned/cleaned by `get_metrics` and cause a memory
-            # leak).
-            self._ongoing_episodes_for_metrics.clear()
-
-            # Try resetting the environment.
-            # TODO (simon): Check, if we need here the seed from the config.
-            obs, infos = self._try_env_reset()
-            obs = unbatch(obs)
-            self._cached_to_module = None
-
-            # Call `on_episode_start()` callbacks.
-            for env_index in range(self.num_envs):
-                self._make_on_episode_callback("on_episode_start", env_index)
-
+        if force_reset or num_episodes is not None or self._needs_initial_reset:
+            episodes = self._episodes = [None for _ in range(self.num_envs)]
+            shared_data = self._shared_data = {}
+            self._reset_envs(episodes, shared_data, explore)
             # We just reset the env. Don't have to force this again in the next
             # call to `self._sample_timesteps()`.
             self._needs_initial_reset = False
+        else:
+            episodes = self._episodes
+            shared_data = self._shared_data
 
-            # Set initial obs and infos in the episodes.
-            for env_index in range(self.num_envs):
-                self._episodes[env_index].add_env_reset(
-                    observation=obs[env_index],
-                    infos=infos[env_index],
-                )
+        if num_episodes is not None:
+            self._needs_initial_reset = True
 
-        # Loop through timesteps.
+        # Loop through `num_timesteps` timesteps or `num_episodes` episodes.
         ts = 0
-
-        while ts < num_timesteps:
+        eps = 0
+        while (
+            (ts < num_timesteps) if num_timesteps is not None else (eps < num_episodes)
+        ):
             # Act randomly.
             if random_actions:
                 to_env = {
@@ -280,13 +262,9 @@ def _sample_timesteps(
                 }
             # Compute an action using the RLModule.
             else:
-                # Env-to-module connector.
-                to_module = self._cached_to_module or self._env_to_module(
-                    rl_module=self.module,
-                    episodes=self._episodes,
-                    explore=explore,
-                    shared_data=self._shared_data,
-                )
+                # Env-to-module connector (already cached).
+                to_module = self._cached_to_module
+                assert to_module is not None
                 self._cached_to_module = None
 
                 # RLModule forward pass: Explore or not.
@@ -305,9 +283,9 @@ def _sample_timesteps(
                 to_env = self._module_to_env(
                     rl_module=self.module,
                     batch=to_env,
-                    episodes=self._episodes,
+                    episodes=episodes,
                     explore=explore,
-                    shared_data=self._shared_data,
+                    shared_data=shared_data,
                 )
 
             # Extract the (vectorized) actions (to be sent to the env) from the
@@ -320,264 +298,78 @@ def _sample_timesteps(
             # Try stepping the environment.
             results = self._try_env_step(actions_for_env)
             if results == ENV_STEP_FAILURE:
-                return self._sample_timesteps(
+                return self._sample(
                     num_timesteps=num_timesteps,
+                    num_episodes=num_episodes,
                     explore=explore,
                     random_actions=random_actions,
                     force_reset=True,
                 )
-            obs, rewards, terminateds, truncateds, infos = results
-            obs, actions = unbatch(obs), unbatch(actions)
-
-            ts += self.num_envs
+            observations, rewards, terminateds, truncateds, infos = results
+            observations, actions = unbatch(observations), unbatch(actions)
 
+            call_on_episode_start = set()
             for env_index in range(self.num_envs):
-                # TODO (simon): This might be unfortunate if a user needs to set a
-                #  certain env parameter during different episodes (for example for
-                #  benchmarking).
                 extra_model_output = {k: v[env_index] for k, v in to_env.items()}
                 extra_model_output[WEIGHTS_SEQ_NO] = self._weights_seq_no
 
-                # In inference, we have only the action logits.
-                if terminateds[env_index] or truncateds[env_index]:
-                    # Finish the episode with the actual terminal observation stored in
-                    # the info dict.
-                    self._episodes[env_index].add_env_step(
-                        # Gym vector env provides the `"final_observation"`.
-                        # Pop these out of the infos dict so this information doesn't
-                        # appear in the next episode as well (at index=0).
-                        infos[env_index].pop("final_observation"),
-                        actions[env_index],
-                        rewards[env_index],
-                        infos=infos[env_index].pop("final_info"),
-                        terminated=terminateds[env_index],
-                        truncated=truncateds[env_index],
-                        extra_model_outputs=extra_model_output,
-                    )
-                    # Make the `on_episode_step` and `on_episode_end` callbacks (before
-                    # finalizing the episode object).
-                    self._make_on_episode_callback("on_episode_step", env_index)
-
-                    # We have to perform an extra env-to-module pass here, just in case
-                    # the user's connector pipeline performs (permanent) transforms
-                    # on each observation (including this final one here). Without such
-                    # a call and in case the structure of the observations change
-                    # sufficiently, the following `finalize()` call on the episode will
-                    # fail.
-                    if self.module is not None:
-                        self._env_to_module(
-                            episodes=[self._episodes[env_index]],
-                            explore=explore,
-                            rl_module=self.module,
-                            shared_data=self._shared_data,
-                        )
-
-                    self._make_on_episode_callback("on_episode_end", env_index)
-
-                    # Then finalize (numpy'ize) the episode.
-                    done_episodes_to_return.append(self._episodes[env_index].finalize())
-
-                    # Create a new episode object with already the reset data in it.
-                    self._episodes[env_index] = SingleAgentEpisode(
-                        observations=[obs[env_index]],
-                        infos=[infos[env_index]],
-                        observation_space=self.env.single_observation_space,
-                        action_space=self.env.single_action_space,
+                # Episode has no data in it yet -> Was just reset and needs to be called
+                # with its `add_env_reset()` method.
+                if not self._episodes[env_index].is_reset:
+                    episodes[env_index].add_env_reset(
+                        observation=observations[env_index],
+                        infos=infos[env_index],
                     )
+                    call_on_episode_start.add(env_index)
 
-                    # Make the `on_episode_start` callback.
-                    self._make_on_episode_callback("on_episode_start", env_index)
-
+                # Call `add_env_step()` method on episode.
                 else:
-                    self._episodes[env_index].add_env_step(
-                        obs[env_index],
-                        actions[env_index],
-                        rewards[env_index],
+                    # Only increase ts when we actually stepped (not reset'd as a reset
+                    # does not count as a timestep).
+                    ts += 1
+                    episodes[env_index].add_env_step(
+                        observation=observations[env_index],
+                        action=actions[env_index],
+                        reward=rewards[env_index],
                         infos=infos[env_index],
+                        terminated=terminateds[env_index],
+                        truncated=truncateds[env_index],
                         extra_model_outputs=extra_model_output,
                     )
 
-                    # Make the `on_episode_step` callback.
-                    self._make_on_episode_callback("on_episode_step", env_index)
-
-        # Already perform env-to-module connector call for next call to
-        # `_sample_timesteps()`. See comment in c'tor for `self._cached_to_module`.
-        if self.module is not None:
-            self._cached_to_module = self._env_to_module(
-                rl_module=self.module,
-                episodes=self._episodes,
-                explore=explore,
-                shared_data=self._shared_data,
-            )
-
-        # Return done episodes ...
-        # TODO (simon): Check, how much memory this attribute uses.
-        self._done_episodes_for_metrics.extend(done_episodes_to_return)
-        # ... and all ongoing episode chunks.
-
-        # Also, make sure we start new episode chunks (continuing the ongoing episodes
-        # from the to-be-returned chunks).
-        ongoing_episodes_continuations = [
-            eps.cut(len_lookback_buffer=self.config.episode_lookback_horizon)
-            for eps in self._episodes
-        ]
-
-        ongoing_episodes_to_return = []
-        for eps in self._episodes:
-            # Just started Episodes do not have to be returned. There is no data
-            # in them anyway.
-            if eps.t == 0:
-                continue
-            eps.validate()
-            self._ongoing_episodes_for_metrics[eps.id_].append(eps)
-            # Return finalized (numpy'ized) Episodes.
-            ongoing_episodes_to_return.append(eps.finalize())
-
-        # Continue collecting into the cut Episode chunks.
-        self._episodes = ongoing_episodes_continuations
-
-        self._increase_sampled_metrics(ts)
-
-        # Return collected episode data.
-        return done_episodes_to_return + ongoing_episodes_to_return
-
-    def _sample_episodes(
-        self,
-        num_episodes: int,
-        explore: bool,
-        random_actions: bool = False,
-    ) -> List[SingleAgentEpisode]:
-        """Helper method to run n episodes.
-
-        See docstring of `self.sample()` for more details.
-        """
-        # If user calls sample(num_timesteps=..) after this, we must reset again
-        # at the beginning.
-        self._needs_initial_reset = True
-
-        done_episodes_to_return: List[SingleAgentEpisode] = []
-
-        episodes = []
-        for env_index in range(self.num_envs):
-            episodes.append(self._new_episode())
-            # TODO (sven): Add callback `on_episode_created` as soon as
-            # `gymnasium-v1.0.0a2` PR is coming.
-        _shared_data = {}
-
-        # Try resetting the environment.
-        # TODO (simon): Check, if we need here the seed from the config.
-        obs, infos = self._try_env_reset()
-        for env_index in range(self.num_envs):
-            episodes[env_index].add_env_reset(
-                observation=unbatch(obs)[env_index],
-                infos=infos[env_index],
-            )
-            self._make_on_episode_callback("on_episode_start", env_index, episodes)
-
-        # Loop over episodes.
-        eps = 0
-        ts = 0
-        while eps < num_episodes:
-            # Act randomly.
-            if random_actions:
-                to_env = {
-                    Columns.ACTIONS: self.env.action_space.sample(),
-                }
-            # Compute an action using the RLModule.
-            else:
-                # Env-to-module connector.
-                to_module = self._env_to_module(
-                    rl_module=self.module,
+            # Env-to-module connector pass (cache results as we will do the RLModule
+            # forward pass only in the next `while`-iteration.
+            if self.module is not None:
+                self._cached_to_module = self._env_to_module(
                     episodes=episodes,
                     explore=explore,
-                    shared_data=_shared_data,
-                )
-
-                # RLModule forward pass: Explore or not.
-                if explore:
-                    env_steps_lifetime = (
-                        self.metrics.peek(NUM_ENV_STEPS_SAMPLED_LIFETIME, default=0)
-                        + ts
-                    )
-                    to_env = self.module.forward_exploration(
-                        to_module, t=env_steps_lifetime
-                    )
-                else:
-                    to_env = self.module.forward_inference(to_module)
-
-                # Module-to-env connector.
-                to_env = self._module_to_env(
                     rl_module=self.module,
-                    batch=to_env,
-                    episodes=episodes,
-                    explore=explore,
-                    shared_data=_shared_data,
+                    shared_data=shared_data,
                 )
 
-            # Extract the (vectorized) actions (to be sent to the env) from the
-            # module/connector output. Note that these actions are fully ready (e.g.
-            # already unsquashed/clipped) to be sent to the environment) and might not
-            # be identical to the actions produced by the RLModule/distribution, which
-            # are the ones stored permanently in the episode objects.
-            actions = to_env.pop(Columns.ACTIONS)
-            actions_for_env = to_env.pop(Columns.ACTIONS_FOR_ENV, actions)
-            # Try stepping the environment.
-            results = self._try_env_step(actions_for_env)
-            if results == ENV_STEP_FAILURE:
-                return self._sample_episodes(
-                    num_episodes=num_episodes,
-                    explore=explore,
-                    random_actions=random_actions,
-                )
-            obs, rewards, terminateds, truncateds, infos = results
-            obs, actions = unbatch(obs), unbatch(actions)
-            ts += self.num_envs
-
             for env_index in range(self.num_envs):
-                extra_model_output = {k: v[env_index] for k, v in to_env.items()}
-                extra_model_output[WEIGHTS_SEQ_NO] = self._weights_seq_no
-
-                if terminateds[env_index] or truncateds[env_index]:
-                    eps += 1
-
-                    episodes[env_index].add_env_step(
-                        infos[env_index].pop("final_observation"),
-                        actions[env_index],
-                        rewards[env_index],
-                        infos=infos[env_index].pop("final_info"),
-                        terminated=terminateds[env_index],
-                        truncated=truncateds[env_index],
-                        extra_model_outputs=extra_model_output,
+                # Call `on_episode_start()` callback (always after reset).
+                if env_index in call_on_episode_start:
+                    self._make_on_episode_callback(
+                        "on_episode_start", env_index, episodes
                     )
-                    # Make `on_episode_step` and `on_episode_end` callbacks before
-                    # finalizing the episode.
+                # Make the `on_episode_step` callbacks.
+                else:
                     self._make_on_episode_callback(
                         "on_episode_step", env_index, episodes
                     )
 
-                    # We have to perform an extra env-to-module pass here, just in case
-                    # the user's connector pipeline performs (permanent) transforms
-                    # on each observation (including this final one here). Without such
-                    # a call and in case the structure of the observations change
-                    # sufficiently, the following `finalize()` call on the episode will
-                    # fail.
-                    if self.module is not None:
-                        self._env_to_module(
-                            episodes=[episodes[env_index]],
-                            explore=explore,
-                            rl_module=self.module,
-                            shared_data=_shared_data,
-                        )
-
-                    # Make the `on_episode_end` callback (before finalizing the episode,
-                    # but after(!) the last env-to-module connector call has been made.
-                    # -> All obs (even the terminal one) should have been processed now
-                    # (by the connector, if applicable).
+                # Episode is done.
+                if episodes[env_index].is_done:
+                    eps += 1
+
+                    # Make the `on_episode_end` callbacks (before finalizing the episode
+                    # object).
                     self._make_on_episode_callback(
                         "on_episode_end", env_index, episodes
                     )
 
-                    # Finalize (numpy'ize) the episode.
+                    # Then finalize (numpy'ize) the episode.
                     done_episodes_to_return.append(episodes[env_index].finalize())
 
                     # Also early-out if we reach the number of episodes within this
@@ -585,38 +377,46 @@ def _sample_episodes(
                     if eps == num_episodes:
                         break
 
-                    # Create a new episode object.
+                    # Create a new episode object with no data in it and execute
+                    # `on_episode_created` callback (before the `env.reset()` call).
                     episodes[env_index] = SingleAgentEpisode(
-                        observations=[obs[env_index]],
-                        infos=[infos[env_index]],
                         observation_space=self.env.single_observation_space,
                         action_space=self.env.single_action_space,
                     )
-                    # Make `on_episode_start` callback.
-                    self._make_on_episode_callback(
-                        "on_episode_start", env_index, episodes
-                    )
-                else:
-                    episodes[env_index].add_env_step(
-                        obs[env_index],
-                        actions[env_index],
-                        rewards[env_index],
-                        infos=infos[env_index],
-                        extra_model_outputs=extra_model_output,
-                    )
-                    # Make `on_episode_step` callback.
-                    self._make_on_episode_callback(
-                        "on_episode_step", env_index, episodes
-                    )
 
+        # Return done episodes ...
+        # TODO (simon): Check, how much memory this attribute uses.
         self._done_episodes_for_metrics.extend(done_episodes_to_return)
+        # ... and all ongoing episode chunks.
 
-        # Initialized episodes have to be removed as they lack `extra_model_outputs`.
-        samples = [episode for episode in done_episodes_to_return if episode.t > 0]
+        # Also, make sure we start new episode chunks (continuing the ongoing episodes
+        # from the to-be-returned chunks).
+        ongoing_episodes_to_return = []
+        # Only if we are doing individual timesteps: We have to maybe cut an ongoing
+        # episode and continue building it on the next call to `sample()`.
+        if num_timesteps is not None:
+            ongoing_episodes_continuations = [
+                eps.cut(len_lookback_buffer=self.config.episode_lookback_horizon)
+                for eps in self._episodes
+            ]
+
+            for eps in self._episodes:
+                # Just started Episodes do not have to be returned. There is no data
+                # in them anyway.
+                if eps.t == 0:
+                    continue
+                eps.validate()
+                self._ongoing_episodes_for_metrics[eps.id_].append(eps)
+                # Return finalized (numpy'ized) Episodes.
+                ongoing_episodes_to_return.append(eps.finalize())
+
+            # Continue collecting into the cut Episode chunks.
+            self._episodes = ongoing_episodes_continuations
 
         self._increase_sampled_metrics(ts)
 
-        return samples
+        # Return collected episode data.
+        return done_episodes_to_return + ongoing_episodes_to_return
 
     @override(EnvRunner)
     def get_spaces(self):
@@ -820,12 +620,15 @@ def make_env(self) -> None:
             )
         gym.register("rllib-single-agent-env-v0", entry_point=entry_point)
 
-        # Wrap into `VectorListInfo`` wrapper to get infos as lists.
-        self.env: gym.Wrapper = gym.wrappers.VectorListInfo(
-            gym.vector.make(
+        self.env = DictInfoToList(
+            gym.make_vec(
                 "rllib-single-agent-env-v0",
                 num_envs=self.config.num_envs_per_env_runner,
-                asynchronous=self.config.remote_worker_envs,
+                vectorization_mode=(
+                    VectorizeMode.ASYNC
+                    if self.config.remote_worker_envs
+                    else VectorizeMode.SYNC
+                ),
             )
         )
 
@@ -839,7 +642,7 @@ def make_env(self) -> None:
         self._callbacks.on_environment_created(
             env_runner=self,
             metrics_logger=self.metrics,
-            env=self.env,
+            env=self.env.unwrapped,
             env_context=env_ctx,
         )
 
@@ -848,19 +651,57 @@ def stop(self):
         # Close our env object via gymnasium's API.
         self.env.close()
 
-    def _new_episode(self):
-        return SingleAgentEpisode(
+    def _reset_envs(self, episodes, shared_data, explore):
+        # Create n new episodes and make the `on_episode_created` callbacks.
+        for env_index in range(self.num_envs):
+            self._new_episode(env_index, episodes)
+
+        # Erase all cached ongoing episodes (these will never be completed and
+        # would thus never be returned/cleaned by `get_metrics` and cause a memory
+        # leak).
+        self._ongoing_episodes_for_metrics.clear()
+
+        # Try resetting the environment.
+        # TODO (simon): Check, if we need here the seed from the config.
+        observations, infos = self._try_env_reset()
+        observations = unbatch(observations)
+
+        # Set initial obs and infos in the episodes.
+        for env_index in range(self.num_envs):
+            episodes[env_index].add_env_reset(
+                observation=observations[env_index],
+                infos=infos[env_index],
+            )
+
+        # Run the env-to-module connector to make sure the reset-obs/infos have
+        # properly been processed (if applicable).
+        self._cached_to_module = None
+        if self.module:
+            self._cached_to_module = self._env_to_module(
+                rl_module=self.module,
+                episodes=episodes,
+                explore=explore,
+                shared_data=shared_data,
+            )
+
+        # Call `on_episode_start()` callbacks (always after reset).
+        for env_index in range(self.num_envs):
+            self._make_on_episode_callback("on_episode_start", env_index, episodes)
+
+    def _new_episode(self, env_index, episodes=None):
+        episodes = episodes if episodes is not None else self._episodes
+        episodes[env_index] = SingleAgentEpisode(
             observation_space=self.env.single_observation_space,
             action_space=self.env.single_action_space,
         )
+        self._make_on_episode_callback("on_episode_created", env_index, episodes)
 
-    def _make_on_episode_callback(self, which: str, idx: int, episodes=None):
-        episodes = episodes if episodes is not None else self._episodes
+    def _make_on_episode_callback(self, which: str, idx: int, episodes):
         getattr(self._callbacks, which)(
             episode=episodes[idx],
             env_runner=self,
             metrics_logger=self.metrics,
-            env=self.env,
+            env=self.env.unwrapped,
             rl_module=self.module,
             env_index=idx,
         )
diff --git a/rllib/env/single_agent_episode.py b/rllib/env/single_agent_episode.py
index dd4f480394705..b11cdd6783746 100644
--- a/rllib/env/single_agent_episode.py
+++ b/rllib/env/single_agent_episode.py
@@ -362,6 +362,7 @@ def add_env_reset(
             observation: The initial observation returned by `env.reset()`.
             infos: An (optional) info dict returned by `env.reset()`.
         """
+        assert not self.is_reset
         assert not self.is_done
         assert len(self.observations) == 0
         # Assume that this episode is completely empty and has not stepped yet.
@@ -485,6 +486,11 @@ def validate(self) -> None:
             for k, v in self.extra_model_outputs.items():
                 assert len(v) == len(self.observations) - 1
 
+    @property
+    def is_reset(self) -> bool:
+        """Returns True if `self.add_env_reset()` has already been called."""
+        return len(self.observations) > 0
+
     @property
     def is_finalized(self) -> bool:
         """True, if the data in this episode is already stored as numpy arrays."""
diff --git a/rllib/env/tests/test_single_agent_env_runner.py b/rllib/env/tests/test_single_agent_env_runner.py
index d6dbf7082985c..4d5f8808aa84c 100644
--- a/rllib/env/tests/test_single_agent_env_runner.py
+++ b/rllib/env/tests/test_single_agent_env_runner.py
@@ -9,6 +9,7 @@
 from ray.rllib.env.single_agent_env_runner import SingleAgentEnvRunner
 from ray.rllib.env.utils import _gym_env_creator
 from ray.rllib.examples.envs.classes.simple_corridor import SimpleCorridor
+from ray.rllib.utils.test_utils import check
 
 
 class TestSingleAgentEnvRunner(unittest.TestCase):
@@ -53,7 +54,7 @@ def test_sample(self):
         # Sample 10 episodes (5 per env) 100 times.
         for _ in range(100):
             episodes = env_runner.sample(num_episodes=10, random_actions=True)
-            self.assertTrue(len(episodes) == 10)
+            check(len(episodes), 10)
             # Since we sampled complete episodes, there should be no ongoing episodes
             # being returned.
             self.assertTrue(all(e.is_done for e in episodes))
@@ -61,20 +62,22 @@ def test_sample(self):
         # Sample 10 timesteps (5 per env) 100 times.
         for _ in range(100):
             episodes = env_runner.sample(num_timesteps=10, random_actions=True)
-            # Check, whether the sum of lengths of all episodes returned is 20
-            self.assertTrue(sum(len(e) for e in episodes) == 10)
+            # Check the sum of lengths of all episodes returned.
+            sum_ = sum(map(len, episodes))
+            self.assertTrue(sum_ in [10, 11])
 
         # Sample (by default setting: rollout_fragment_length=64) 10 times.
         for _ in range(100):
             episodes = env_runner.sample(random_actions=True)
             # Check, whether the sum of lengths of all episodes returned is 128
             # 2 (num_env_per_worker) * 64 (rollout_fragment_length).
-            self.assertTrue(sum(len(e) for e in episodes) == 128)
+            sum_ = sum(map(len, episodes))
+            self.assertTrue(sum_ in [128, 129])
 
     def test_async_vector_env(self):
         """Tests, whether SingleAgentGymEnvRunner can run with vector envs."""
 
-        for env in ["TestEnv-v0", "CartPole-v1", SimpleCorridor, "tune-registered"]:
+        for env in ["CartPole-v1", SimpleCorridor, "tune-registered"]:
             config = (
                 AlgorithmConfig().environment(env)
                 # Vectorize x5 and by default, rollout 64 timesteps per individual env.
@@ -110,7 +113,7 @@ def test_distributed_env_runner(self):
             for env_spec in ["tune-registered", "CartPole-v1", SimpleCorridor]:
                 config = (
                     AlgorithmConfig().environment(env_spec)
-                    # Vectorize x5 and by default, rollout 64 timesteps per individual
+                    # Vectorize x5 and by default, rollout 10 timesteps per individual
                     # env.
                     .env_runners(
                         num_env_runners=5,
@@ -129,9 +132,14 @@ def test_distributed_env_runner(self):
                 # Loop over individual EnvRunner Actor's results and inspect each.
                 for episodes in results:
                     # Assert length of all fragments is  `rollout_fragment_length`.
-                    self.assertEqual(
+                    self.assertIn(
                         sum(len(e) for e in episodes),
-                        config.num_envs_per_env_runner * config.rollout_fragment_length,
+                        [
+                            config.num_envs_per_env_runner
+                            * config.rollout_fragment_length
+                            + i
+                            for i in range(config.num_envs_per_env_runner)
+                        ],
                     )
 
 
diff --git a/rllib/env/utils/__init__.py b/rllib/env/utils/__init__.py
index 67dc49efd76b3..09dfbe227e5a6 100644
--- a/rllib/env/utils/__init__.py
+++ b/rllib/env/utils/__init__.py
@@ -103,6 +103,13 @@ def _gym_env_creator(
     except (AttributeError, ModuleNotFoundError, ImportError):
         pass
 
+    # If env descriptor is a str, starting with "ale_py:ALE/", for now, register all ALE
+    # envs from ale_py.
+    if isinstance(env_descriptor, str) and env_descriptor.startswith("ale_py:ALE/"):
+        import ale_py
+
+        gym.register_envs(ale_py)
+
     # Try creating a gym env. If this fails we can output a
     # decent error message.
     try:
diff --git a/rllib/env/wrappers/atari_wrappers.py b/rllib/env/wrappers/atari_wrappers.py
index 2edefd58208b3..3bb0f3ff77196 100644
--- a/rllib/env/wrappers/atari_wrappers.py
+++ b/rllib/env/wrappers/atari_wrappers.py
@@ -13,7 +13,8 @@ def is_atari(env: Union[gym.Env, str]) -> bool:
     """Returns, whether a given env object or env descriptor (str) is an Atari env.
 
     Args:
-        env: The gym.Env object or a string descriptor of the env (e.g. "ALE/Pong-v5").
+        env: The gym.Env object or a string descriptor of the env (for example,
+        "ale_py:ALE/Pong-v5").
 
     Returns:
         Whether `env` is an Atari environment.
@@ -28,9 +29,9 @@ def is_atari(env: Union[gym.Env, str]) -> bool:
         ):
             return False
         return "AtariEnv<ALE" in str(env)
-    # If string, check for "ALE/" prefix.
+    # If string, check for "ale_py:ALE/" prefix.
     else:
-        return env.startswith("ALE/")
+        return env.startswith("ALE/") or env.startswith("ale_py:")
 
 
 @PublicAPI
diff --git a/rllib/env/wrappers/kaggle_wrapper.py b/rllib/env/wrappers/kaggle_wrapper.py
deleted file mode 100644
index 3b4df264b4f55..0000000000000
--- a/rllib/env/wrappers/kaggle_wrapper.py
+++ /dev/null
@@ -1,189 +0,0 @@
-"""Wrap Kaggle's environment
-
-Source: https://github.com/Kaggle/kaggle-environments
-"""
-
-from copy import deepcopy
-from gymnasium.spaces import (
-    Box,
-    Dict as DictSpace,
-    Discrete,
-    MultiBinary,
-    MultiDiscrete,
-    Space,
-    Tuple as TupleSpace,
-)
-
-try:
-    import kaggle_environments
-except (ImportError, ModuleNotFoundError):
-    pass
-import numpy as np
-from typing import Any, Dict, Optional, Tuple
-
-from ray.rllib.env import MultiAgentEnv
-from ray.rllib.utils.typing import MultiAgentDict, AgentID
-
-
-class KaggleFootballMultiAgentEnv(MultiAgentEnv):
-    """An interface to the kaggle's football environment.
-
-    See: https://github.com/Kaggle/kaggle-environments
-    """
-
-    def __init__(self, configuration: Optional[Dict[str, Any]] = None) -> None:
-        """Initializes a Kaggle football environment.
-
-        Args:
-            configuration (Optional[Dict[str, Any]]): configuration of the
-                football environment. For detailed information, see:
-                https://github.com/Kaggle/kaggle-environments/blob/master/kaggle_\
-                environments/envs/football/football.json
-        """
-        super().__init__()
-        self.kaggle_env = kaggle_environments.make(
-            "football", configuration=configuration or {}
-        )
-        self.last_cumulative_reward = None
-
-    def reset(
-        self,
-        *,
-        seed: Optional[int] = None,
-        options: Optional[dict] = None,
-    ) -> Tuple[MultiAgentDict, MultiAgentDict]:
-        kaggle_state = self.kaggle_env.reset()
-        self.last_cumulative_reward = None
-        return {
-            f"agent{idx}": self._convert_obs(agent_state["observation"])
-            for idx, agent_state in enumerate(kaggle_state)
-            if agent_state["status"] == "ACTIVE"
-        }, {}
-
-    def step(
-        self, action_dict: Dict[AgentID, int]
-    ) -> Tuple[
-        MultiAgentDict, MultiAgentDict, MultiAgentDict, MultiAgentDict, MultiAgentDict
-    ]:
-        # Convert action_dict (used by RLlib) to a list of actions (used by
-        # kaggle_environments)
-        action_list = [None] * len(self.kaggle_env.state)
-        for idx, agent_state in enumerate(self.kaggle_env.state):
-            if agent_state["status"] == "ACTIVE":
-                action = action_dict[f"agent{idx}"]
-                action_list[idx] = [action]
-        self.kaggle_env.step(action_list)
-
-        # Parse (obs, reward, terminated, truncated, info) from kaggle's "state"
-        # representation.
-        obs = {}
-        cumulative_reward = {}
-        terminated = {"__all__": self.kaggle_env.done}
-        truncated = {"__all__": False}
-        info = {}
-        for idx in range(len(self.kaggle_env.state)):
-            agent_state = self.kaggle_env.state[idx]
-            agent_name = f"agent{idx}"
-            if agent_state["status"] == "ACTIVE":
-                obs[agent_name] = self._convert_obs(agent_state["observation"])
-            cumulative_reward[agent_name] = agent_state["reward"]
-            terminated[agent_name] = agent_state["status"] != "ACTIVE"
-            truncated[agent_name] = False
-            info[agent_name] = agent_state["info"]
-        # Compute the step rewards from the cumulative rewards
-        if self.last_cumulative_reward is not None:
-            reward = {
-                agent_id: agent_reward - self.last_cumulative_reward[agent_id]
-                for agent_id, agent_reward in cumulative_reward.items()
-            }
-        else:
-            reward = cumulative_reward
-        self.last_cumulative_reward = cumulative_reward
-        return obs, reward, terminated, truncated, info
-
-    def _convert_obs(self, obs: Dict[str, Any]) -> Dict[str, Any]:
-        """Convert raw observations
-
-        These conversions are necessary to make the observations fall into the
-        observation space defined below.
-        """
-        new_obs = deepcopy(obs)
-        if new_obs["players_raw"][0]["ball_owned_team"] == -1:
-            new_obs["players_raw"][0]["ball_owned_team"] = 2
-        if new_obs["players_raw"][0]["ball_owned_player"] == -1:
-            new_obs["players_raw"][0]["ball_owned_player"] = 11
-        new_obs["players_raw"][0]["steps_left"] = [
-            new_obs["players_raw"][0]["steps_left"]
-        ]
-        return new_obs
-
-    def build_agent_spaces(self) -> Tuple[Space, Space]:
-        """Construct the action and observation spaces
-
-        Description of actions and observations:
-        https://github.com/google-research/football/blob/master/gfootball/doc/
-        observation.md
-        """  # noqa: E501
-        action_space = Discrete(19)
-        # The football field's corners are [+-1., +-0.42]. However, the players
-        # and balls may get out of the field. Thus we multiply those limits by
-        # a factor of 2.
-        xlim = 1.0 * 2
-        ylim = 0.42 * 2
-        num_players: int = 11
-        xy_space = Box(
-            np.array([-xlim, -ylim], dtype=np.float32),
-            np.array([xlim, ylim], dtype=np.float32),
-        )
-        xyz_space = Box(
-            np.array([-xlim, -ylim, 0], dtype=np.float32),
-            np.array([xlim, ylim, np.inf], dtype=np.float32),
-        )
-        observation_space = DictSpace(
-            {
-                "controlled_players": Discrete(2),
-                "players_raw": TupleSpace(
-                    [
-                        DictSpace(
-                            {
-                                # ball information
-                                "ball": xyz_space,
-                                "ball_direction": Box(-np.inf, np.inf, (3,)),
-                                "ball_rotation": Box(-np.inf, np.inf, (3,)),
-                                "ball_owned_team": Discrete(3),
-                                "ball_owned_player": Discrete(num_players + 1),
-                                # left team
-                                "left_team": TupleSpace([xy_space] * num_players),
-                                "left_team_direction": TupleSpace(
-                                    [xy_space] * num_players
-                                ),
-                                "left_team_tired_factor": Box(0.0, 1.0, (num_players,)),
-                                "left_team_yellow_card": MultiBinary(num_players),
-                                "left_team_active": MultiBinary(num_players),
-                                "left_team_roles": MultiDiscrete([10] * num_players),
-                                # right team
-                                "right_team": TupleSpace([xy_space] * num_players),
-                                "right_team_direction": TupleSpace(
-                                    [xy_space] * num_players
-                                ),
-                                "right_team_tired_factor": Box(
-                                    0.0, 1.0, (num_players,)
-                                ),
-                                "right_team_yellow_card": MultiBinary(num_players),
-                                "right_team_active": MultiBinary(num_players),
-                                "right_team_roles": MultiDiscrete([10] * num_players),
-                                # controlled player information
-                                "active": Discrete(num_players),
-                                "designated": Discrete(num_players),
-                                "sticky_actions": MultiBinary(10),
-                                # match state
-                                "score": Box(-np.inf, np.inf, (2,)),
-                                "steps_left": Box(0, np.inf, (1,)),
-                                "game_mode": Discrete(7),
-                            }
-                        )
-                    ]
-                ),
-            }
-        )
-        return action_space, observation_space
diff --git a/rllib/env/wrappers/model_vector_env.py b/rllib/env/wrappers/model_vector_env.py
deleted file mode 100644
index 8facedab25e8c..0000000000000
--- a/rllib/env/wrappers/model_vector_env.py
+++ /dev/null
@@ -1,164 +0,0 @@
-import logging
-from gymnasium.spaces import Discrete
-import numpy as np
-
-from ray.rllib.utils.annotations import override
-from ray.rllib.env.vector_env import VectorEnv
-from ray.rllib.evaluation.rollout_worker import get_global_worker
-from ray.rllib.env.base_env import BaseEnv, convert_to_base_env
-from ray.rllib.utils.typing import EnvType
-
-logger = logging.getLogger(__name__)
-
-
-def model_vector_env(env: EnvType) -> BaseEnv:
-    """Returns a VectorizedEnv wrapper around the given environment.
-
-    To obtain worker configs, one can call get_global_worker().
-
-    Args:
-        env: The input environment (of any supported environment
-            type) to be convert to a _VectorizedModelGymEnv (wrapped as
-            an RLlib BaseEnv).
-
-    Returns:
-        BaseEnv: The BaseEnv converted input `env`.
-    """
-    worker = get_global_worker()
-    worker_index = worker.worker_index
-    if worker_index:
-        env = _VectorizedModelGymEnv(
-            make_env=worker.make_sub_env_fn,
-            existing_envs=[env],
-            num_envs=worker.config.num_envs_per_env_runner,
-            observation_space=env.observation_space,
-            action_space=env.action_space,
-        )
-    return convert_to_base_env(
-        env,
-        make_env=worker.make_sub_env_fn,
-        num_envs=worker.config.num_envs_per_env_runner,
-        remote_envs=False,
-        remote_env_batch_wait_ms=0,
-    )
-
-
-class _VectorizedModelGymEnv(VectorEnv):
-    """Vectorized Environment Wrapper for MB-MPO.
-
-    Primary change is in the `vector_step` method, which calls the dynamics
-    models for next_obs "calculation" (instead of the actual env). Also, the
-    actual envs need to have two extra methods implemented: `reward(obs)` and
-    (optionally) `done(obs)`. If `done` is not implemented, we will assume
-    that episodes in the env do not terminate, ever.
-    """
-
-    def __init__(
-        self,
-        make_env=None,
-        existing_envs=None,
-        num_envs=1,
-        *,
-        observation_space=None,
-        action_space=None,
-        env_config=None
-    ):
-        self.make_env = make_env
-        self.envs = existing_envs
-        self.num_envs = num_envs
-        while len(self.envs) < num_envs:
-            self.envs.append(self.make_env(len(self.envs)))
-        self._timesteps = [0 for _ in range(self.num_envs)]
-        self.cur_obs = [None for _ in range(self.num_envs)]
-
-        super().__init__(
-            observation_space=observation_space or self.envs[0].observation_space,
-            action_space=action_space or self.envs[0].action_space,
-            num_envs=num_envs,
-        )
-        worker = get_global_worker()
-        self.model, self.device = worker.foreach_policy(
-            lambda x, y: (x.dynamics_model, x.device)
-        )[0]
-
-    @override(VectorEnv)
-    def vector_reset(self, *, seeds=None, options=None):
-        """Override parent to store actual env obs for upcoming predictions."""
-        seeds = seeds or [None] * self.num_envs
-        options = options or [None] * self.num_envs
-        reset_results = [
-            e.reset(seed=seeds[i], options=options[i]) for i, e in enumerate(self.envs)
-        ]
-        self.cur_obs = [io[0] for io in reset_results]
-        infos = [io[1] for io in reset_results]
-        self._timesteps = [0 for _ in range(self.num_envs)]
-        return self.cur_obs, infos
-
-    @override(VectorEnv)
-    def reset_at(self, index, *, seed=None, options=None):
-        """Override parent to store actual env obs for upcoming predictions."""
-        obs, infos = self.envs[index].reset(seed=seed, options=options)
-        self.cur_obs[index] = obs
-        self._timesteps[index] = 0
-        return obs, infos
-
-    @override(VectorEnv)
-    def vector_step(self, actions):
-        if self.cur_obs is None:
-            raise ValueError("Need to reset env first")
-
-        for idx in range(self.num_envs):
-            self._timesteps[idx] += 1
-
-        # If discrete, need to one-hot actions
-        if isinstance(self.action_space, Discrete):
-            act = np.array(actions)
-            new_act = np.zeros((act.size, act.max() + 1))
-            new_act[np.arange(act.size), act] = 1
-            actions = new_act.astype("float32")
-
-        # Batch the TD-model prediction.
-        obs_batch = np.stack(self.cur_obs, axis=0)
-        action_batch = np.stack(actions, axis=0)
-        # Predict the next observation, given previous a) real obs
-        # (after a reset), b) predicted obs (any other time).
-        next_obs_batch = self.model.predict_model_batches(
-            obs_batch, action_batch, device=self.device
-        )
-        next_obs_batch = np.clip(next_obs_batch, -1000, 1000)
-
-        # Call env's reward function.
-        # Note: Each actual env must implement one to output exact rewards.
-        rew_batch = self.envs[0].reward(obs_batch, action_batch, next_obs_batch)
-
-        # If env has a `done` method, use it.
-        if hasattr(self.envs[0], "done"):
-            dones_batch = self.envs[0].done(next_obs_batch)
-        # Our sub-environments have timestep limits.
-        elif hasattr(self.envs[0], "_max_episode_steps"):
-            dones_batch = np.array(
-                [
-                    self._timesteps[idx] >= self.envs[0]._max_episode_steps
-                    for idx in range(self.num_envs)
-                ]
-            )
-        # Otherwise, assume the episode does not end.
-        else:
-            dones_batch = np.asarray([False for _ in range(self.num_envs)])
-        truncateds_batch = [False for _ in range(self.num_envs)]
-
-        info_batch = [{} for _ in range(self.num_envs)]
-
-        self.cur_obs = next_obs_batch
-
-        return (
-            list(next_obs_batch),
-            list(rew_batch),
-            list(dones_batch),
-            truncateds_batch,
-            info_batch,
-        )
-
-    @override(VectorEnv)
-    def get_sub_environments(self):
-        return self.envs
diff --git a/rllib/env/wrappers/recsim.py b/rllib/env/wrappers/recsim.py
deleted file mode 100644
index b1d3e749e5144..0000000000000
--- a/rllib/env/wrappers/recsim.py
+++ /dev/null
@@ -1,270 +0,0 @@
-"""Tools and utils to create RLlib-ready recommender system envs using RecSim.
-
-For examples on how to generate a RecSim env class (usable in RLlib):
-See ray.rllib.examples.envs.classes.recommender_system_envs_with_recsim.py
-
-For more information on google's RecSim itself:
-https://github.com/google-research/recsim
-"""
-
-from collections import OrderedDict
-import gymnasium as gym
-from gymnasium.spaces import Dict, Discrete, MultiDiscrete
-from gymnasium.wrappers import EnvCompatibility
-import numpy as np
-from recsim.document import AbstractDocumentSampler
-from recsim.simulator import environment, recsim_gym
-from recsim.user import AbstractUserModel, AbstractResponse
-from typing import Callable, List, Optional, Type
-
-from ray.rllib.env.env_context import EnvContext
-from ray.rllib.utils.error import UnsupportedSpaceException
-from ray.rllib.utils.spaces.space_utils import convert_element_to_space_type
-
-
-class RecSimObservationSpaceWrapper(gym.ObservationWrapper):
-    """Fix RecSim environment's observation space
-
-    In RecSim's observation spaces, the "doc" field is a dictionary keyed by
-    document IDs. Those IDs are changing every step, thus generating a
-    different observation space in each time. This causes issues for RLlib
-    because it expects the observation space to remain the same across steps.
-
-    This environment wrapper fixes that by reindexing the documents by their
-    positions in the list.
-    """
-
-    def __init__(self, env: gym.Env):
-        super().__init__(env)
-        obs_space = self.env.observation_space
-        doc_space = Dict(
-            OrderedDict(
-                [
-                    (str(k), doc)
-                    for k, (_, doc) in enumerate(obs_space["doc"].spaces.items())
-                ]
-            )
-        )
-        self.observation_space = Dict(
-            OrderedDict(
-                [
-                    ("user", obs_space["user"]),
-                    ("doc", doc_space),
-                    ("response", obs_space["response"]),
-                ]
-            )
-        )
-        self._sampled_obs = self.observation_space.sample()
-        self.action_space = self.env.action_space
-
-    def observation(self, obs):
-        new_obs = OrderedDict()
-        new_obs["user"] = obs["user"]
-        new_obs["doc"] = {str(k): v for k, (_, v) in enumerate(obs["doc"].items())}
-        new_obs["response"] = obs["response"]
-        new_obs = convert_element_to_space_type(new_obs, self._sampled_obs)
-        return new_obs
-
-
-class RecSimObservationBanditWrapper(gym.ObservationWrapper):
-    """Fix RecSim environment's observation format
-
-    RecSim's observations are keyed by document IDs, and nested under
-    "doc" key.
-    Our Bandits agent expects the observations to be flat 2D array
-    and under "item" key.
-
-    This environment wrapper converts obs into the right format.
-    """
-
-    def __init__(self, env: gym.Env):
-        super().__init__(env)
-        obs_space = self.env.observation_space
-
-        num_items = len(obs_space["doc"])
-        embedding_dim = next(iter(obs_space["doc"].values())).shape[-1]
-        self.observation_space = Dict(
-            OrderedDict(
-                [
-                    (
-                        "item",
-                        gym.spaces.Box(
-                            low=-1.0, high=1.0, shape=(num_items, embedding_dim)
-                        ),
-                    ),
-                ]
-            )
-        )
-        self._sampled_obs = self.observation_space.sample()
-        self.action_space = self.env.action_space
-
-    def observation(self, obs):
-        new_obs = OrderedDict()
-        new_obs["item"] = np.vstack(list(obs["doc"].values()))
-        new_obs = convert_element_to_space_type(new_obs, self._sampled_obs)
-        return new_obs
-
-
-class RecSimResetWrapper(gym.Wrapper):
-    """Fix RecSim environment's reset() and close() function
-
-    RecSim's reset() function returns an observation without the "response"
-    field, breaking RLlib's check. This wrapper fixes that by assigning a
-    random "response".
-
-    RecSim's close() function raises NotImplementedError. We change the
-    behavior to doing nothing.
-    """
-
-    def __init__(self, env: gym.Env):
-        super().__init__(env)
-        self._sampled_obs = self.env.observation_space.sample()
-
-    def reset(self, *, seed=None, options=None):
-        obs, info = super().reset()
-        obs["response"] = self.env.observation_space["response"].sample()
-        obs = convert_element_to_space_type(obs, self._sampled_obs)
-        return obs, info
-
-    def close(self):
-        pass
-
-
-class MultiDiscreteToDiscreteActionWrapper(gym.ActionWrapper):
-    """Convert the action space from MultiDiscrete to Discrete
-
-    At this moment, RLlib's DQN algorithms only work on Discrete action space.
-    This wrapper allows us to apply DQN algorithms to the RecSim environment.
-    """
-
-    def __init__(self, env: gym.Env):
-        super().__init__(env)
-
-        if not isinstance(env.action_space, MultiDiscrete):
-            raise UnsupportedSpaceException(
-                f"Action space {env.action_space} "
-                f"is not supported by {self.__class__.__name__}"
-            )
-        self.action_space_dimensions = env.action_space.nvec
-        self.action_space = Discrete(np.prod(self.action_space_dimensions))
-
-    def action(self, action: int) -> List[int]:
-        """Convert a Discrete action to a MultiDiscrete action"""
-        multi_action = [None] * len(self.action_space_dimensions)
-        for idx, n in enumerate(self.action_space_dimensions):
-            action, dim_action = divmod(action, n)
-            multi_action[idx] = dim_action
-        return multi_action
-
-
-def recsim_gym_wrapper(
-    recsim_gym_env: gym.Env,
-    convert_to_discrete_action_space: bool = False,
-    wrap_for_bandits: bool = False,
-) -> gym.Env:
-    """Makes sure a RecSim gym.Env can ba handled by RLlib.
-
-    In RecSim's observation spaces, the "doc" field is a dictionary keyed by
-    document IDs. Those IDs are changing every step, thus generating a
-    different observation space in each time. This causes issues for RLlib
-    because it expects the observation space to remain the same across steps.
-
-    Also, RecSim's reset() function returns an observation without the
-    "response" field, breaking RLlib's check. This wrapper fixes that by
-    assigning a random "response".
-
-    Args:
-        recsim_gym_env: The RecSim gym.Env instance. Usually resulting from a
-            raw RecSim env having been passed through RecSim's utility function:
-            `recsim.simulator.recsim_gym.RecSimGymEnv()`.
-        convert_to_discrete_action_space: Optional bool indicating, whether
-            the action space of the created env class should be Discrete
-            (rather than MultiDiscrete, even if slate size > 1). This is useful
-            for algorithms that don't support MultiDiscrete action spaces,
-            such as RLlib's DQN. If None, `convert_to_discrete_action_space`
-            may also be provided via the EnvContext (config) when creating an
-            actual env instance.
-        wrap_for_bandits: Bool indicating, whether this RecSim env should be
-            wrapped for use with our Bandits agent.
-
-    Returns:
-        An RLlib-ready gym.Env instance.
-    """
-    env = RecSimResetWrapper(recsim_gym_env)
-    env = RecSimObservationSpaceWrapper(env)
-    if convert_to_discrete_action_space:
-        env = MultiDiscreteToDiscreteActionWrapper(env)
-    if wrap_for_bandits:
-        env = RecSimObservationBanditWrapper(env)
-    return env
-
-
-def make_recsim_env(
-    recsim_user_model_creator: Callable[[EnvContext], AbstractUserModel],
-    recsim_document_sampler_creator: Callable[[EnvContext], AbstractDocumentSampler],
-    reward_aggregator: Callable[[List[AbstractResponse]], float],
-) -> Type[gym.Env]:
-    """Creates a RLlib-ready gym.Env class given RecSim user and doc models.
-
-    See https://github.com/google-research/recsim for more information on how to
-    build the required components from scratch in python using RecSim.
-
-    Args:
-        recsim_user_model_creator: A callable taking an EnvContext and returning
-            a RecSim AbstractUserModel instance to use.
-        recsim_document_sampler_creator: A callable taking an EnvContext and
-            returning a RecSim AbstractDocumentSampler
-            to use. This will include a AbstractDocument as well.
-        reward_aggregator: Callable taking a list of RecSim
-            AbstractResponse instances and returning a float (aggregated
-            reward).
-
-    Returns:
-        An RLlib-ready gym.Env class to use inside an Algorithm.
-    """
-
-    class _RecSimEnv(gym.Wrapper):
-        def __init__(self, config: Optional[EnvContext] = None):
-
-            # Override with default values, in case they are not set by the user.
-            default_config = {
-                "num_candidates": 10,
-                "slate_size": 2,
-                "resample_documents": True,
-                "seed": 0,
-                "convert_to_discrete_action_space": False,
-                "wrap_for_bandits": False,
-            }
-            if config is None or isinstance(config, dict):
-                config = EnvContext(config or default_config, worker_index=0)
-            config.set_defaults(default_config)
-
-            # Create the RecSim user model instance.
-            recsim_user_model = recsim_user_model_creator(config)
-            # Create the RecSim document sampler instance.
-            recsim_document_sampler = recsim_document_sampler_creator(config)
-
-            # Create a raw RecSim environment (not yet a gym.Env!).
-            raw_recsim_env = environment.SingleUserEnvironment(
-                recsim_user_model,
-                recsim_document_sampler,
-                config["num_candidates"],
-                config["slate_size"],
-                resample_documents=config["resample_documents"],
-            )
-            # Convert raw RecSim env to a gym.Env.
-            gym_env = recsim_gym.RecSimGymEnv(raw_recsim_env, reward_aggregator)
-            # Wrap for the new gym API (RecSim does not support this).
-            gym_env = EnvCompatibility(gym_env)
-
-            # Fix observation space and - if necessary - convert to discrete
-            # action space (from multi-discrete).
-            env = recsim_gym_wrapper(
-                gym_env,
-                config["convert_to_discrete_action_space"],
-                config["wrap_for_bandits"],
-            )
-            # Call the super (Wrapper constructor) passing it the created env.
-            super().__init__(env=env)
-
-    return _RecSimEnv
diff --git a/rllib/env/wrappers/recsim_wrapper.py b/rllib/env/wrappers/recsim_wrapper.py
deleted file mode 100644
index 3251ea1a3a3e7..0000000000000
--- a/rllib/env/wrappers/recsim_wrapper.py
+++ /dev/null
@@ -1,14 +0,0 @@
-# Deprecated module: Use ray.rllib.env.wrappers.recsim instead!
-from ray.rllib.env.wrappers.recsim import (  # noqa: F401
-    make_recsim_env,
-    MultiDiscreteToDiscreteActionWrapper,
-    RecSimObservationSpaceWrapper,
-    RecSimResetWrapper,
-)
-from ray.rllib.utils.deprecation import deprecation_warning
-
-deprecation_warning(
-    old="ray.rllib.env.wrappers.recsim_wrapper",
-    new="ray.rllib.env.wrappers.recsim",
-    error=True,
-)
diff --git a/rllib/env/wrappers/uncertainty_wrappers.py b/rllib/env/wrappers/uncertainty_wrappers.py
deleted file mode 100644
index e8e2d1fa48337..0000000000000
--- a/rllib/env/wrappers/uncertainty_wrappers.py
+++ /dev/null
@@ -1,23 +0,0 @@
-##########
-# Contribution by the Center on Long-Term Risk:
-# https://github.com/longtermrisk/marltoolbox
-##########
-import numpy as np
-
-
-def add_RewardUncertaintyEnvClassWrapper(
-    EnvClass, reward_uncertainty_std, reward_uncertainty_mean=0.0
-):
-    class RewardUncertaintyEnvClassWrapper(EnvClass):
-        def step(self, action):
-            observations, rewards, done, info = super().step(action)
-            return observations, self.reward_wrapper(rewards), done, info
-
-        def reward_wrapper(self, reward_dict):
-            for k in reward_dict.keys():
-                reward_dict[k] += np.random.normal(
-                    loc=reward_uncertainty_mean, scale=reward_uncertainty_std, size=()
-                )
-            return reward_dict
-
-    return RewardUncertaintyEnvClassWrapper
diff --git a/rllib/examples/_old_api_stack/custom_keras_model.py b/rllib/examples/_old_api_stack/custom_keras_model.py
index cdf1f516ef329..e3ccad874b300 100644
--- a/rllib/examples/_old_api_stack/custom_keras_model.py
+++ b/rllib/examples/_old_api_stack/custom_keras_model.py
@@ -127,7 +127,9 @@ def on_train_result(self, *, algorithm, result, **kwargs):
     config = (
         get_trainable_cls(args.run)
         .get_default_config()
-        .environment("ALE/Breakout-v5" if args.use_vision_network else "CartPole-v1")
+        .environment(
+            "ale_py:ALE/Breakout-v5" if args.use_vision_network else "CartPole-v1"
+        )
         .framework("tf")
         .callbacks(MyCallbacks)
         .training(
diff --git a/rllib/examples/connectors/frame_stacking.py b/rllib/examples/connectors/frame_stacking.py
index 554bd1c8f20d3..103ae8de5f113 100644
--- a/rllib/examples/connectors/frame_stacking.py
+++ b/rllib/examples/connectors/frame_stacking.py
@@ -97,7 +97,7 @@
 # Use Pong by default.
 parser.set_defaults(
     enable_new_api_stack=True,
-    env="ALE/Pong-v5",
+    env="ale_py:ALE/Pong-v5",
 )
 parser.add_argument(
     "--num-frames",
diff --git a/rllib/examples/curiosity/euclidian_distance_based_curiosity.py b/rllib/examples/curiosity/euclidian_distance_based_curiosity.py
index 0d73c6b50c1f0..d471c17f18587 100644
--- a/rllib/examples/curiosity/euclidian_distance_based_curiosity.py
+++ b/rllib/examples/curiosity/euclidian_distance_based_curiosity.py
@@ -67,12 +67,11 @@
 )
 from ray.tune.registry import get_trainable_cls
 
-# TODO (sven): SB3's PPO does seem to learn MountainCar-v0 until a reward of ~-110.
-#  We might have to play around some more with different initializations, more
-#  randomized SGD minibatching (we don't shuffle batch rn), etc.. to get to these
-#  results as well.
+# TODO (sven): SB3's PPO learns MountainCar-v0 until a reward of ~-110.
+#  We might have to play around some more with different initializations, etc..
+#  to get to these results as well.
 parser = add_rllib_example_script_args(
-    default_reward=-130.0, default_iters=2000, default_timesteps=1000000
+    default_reward=-140.0, default_iters=2000, default_timesteps=1000000
 )
 parser.set_defaults(
     enable_new_api_stack=True,
diff --git a/rllib/examples/curiosity/intrinsic_curiosity_model_based_curiosity.py b/rllib/examples/curiosity/intrinsic_curiosity_model_based_curiosity.py
index 323bc20c8a582..b70cc89bdbe7d 100644
--- a/rllib/examples/curiosity/intrinsic_curiosity_model_based_curiosity.py
+++ b/rllib/examples/curiosity/intrinsic_curiosity_model_based_curiosity.py
@@ -73,6 +73,8 @@
 """
 from collections import defaultdict
 
+import numpy as np
+
 from ray import tune
 from ray.rllib.algorithms.algorithm_config import AlgorithmConfig
 from ray.rllib.algorithms.callbacks import DefaultCallbacks
@@ -132,9 +134,9 @@ def on_episode_step(
         rl_module,
         **kwargs,
     ):
-        obs = episode.get_observations(-1)
         num_rows = env.envs[0].unwrapped.nrow
         num_cols = env.envs[0].unwrapped.ncol
+        obs = np.argmax(episode.get_observations(-1))
         row = obs // num_cols
         col = obs % num_rows
         curr_dist = (row**2 + col**2) ** 0.5
@@ -298,7 +300,7 @@ def on_sample_end(
 
     success_key = f"{ENV_RUNNER_RESULTS}/max_dist_travelled_across_running_episodes"
     stop = {
-        success_key: 8.0,
+        success_key: 12.0,
         f"{ENV_RUNNER_RESULTS}/{EPISODE_RETURN_MEAN}": args.stop_reward,
         NUM_ENV_STEPS_SAMPLED_LIFETIME: args.stop_timesteps,
     }
diff --git a/rllib/examples/envs/env_rendering_and_recording.py b/rllib/examples/envs/env_rendering_and_recording.py
index ba02f50b7f168..77669649e66c6 100644
--- a/rllib/examples/envs/env_rendering_and_recording.py
+++ b/rllib/examples/envs/env_rendering_and_recording.py
@@ -73,7 +73,10 @@
 from ray import tune
 
 parser = add_rllib_example_script_args(default_reward=20.0)
-parser.set_defaults(env="ALE/Pong-v5")
+parser.set_defaults(
+    enable_new_api_stack=True,
+    env="ale_py:ALE/Pong-v5",
+)
 
 
 class EnvRenderCallback(DefaultCallbacks):
@@ -129,10 +132,10 @@ def on_episode_step(
 
         # If we have a vector env, only render the sub-env at index 0.
         if isinstance(env.unwrapped, gym.vector.VectorEnv):
-            image = env.envs[0].render()
+            image = env.unwrapped.envs[0].render()
         # Render the gym.Env.
         else:
-            image = env.render()
+            image = env.unwrapped.render()
 
         # Original render images for CartPole are 400x600 (hxw). We'll downsize here to
         # a very small dimension (to save space and bandwidth).
@@ -239,14 +242,10 @@ def on_sample_end(
 if __name__ == "__main__":
     args = parser.parse_args()
 
-    assert (
-        args.enable_new_api_stack
-    ), "Must set --enable-new-api-stack when running this script!"
-
     # Register our environment with tune.
     def _env_creator(cfg):
         cfg.update({"render_mode": "rgb_array"})
-        if args.env.startswith("ALE/"):
+        if args.env.startswith("ale_py:ALE/"):
             cfg.update(
                 {
                     # Make analogous to old v4 + NoFrameskip.
diff --git a/rllib/examples/evaluation/custom_evaluation.py b/rllib/examples/evaluation/custom_evaluation.py
index a6d4a1c3e029f..f4d05ea3bd26e 100644
--- a/rllib/examples/evaluation/custom_evaluation.py
+++ b/rllib/examples/evaluation/custom_evaluation.py
@@ -112,12 +112,12 @@ def custom_eval_function(
     # `set_corridor_length` method on these.
     eval_workers.foreach_worker(
         func=lambda worker: (
-            env.set_corridor_length(
+            env.unwrapped.set_corridor_length(
                 args.corridor_length_eval_worker_1
                 if worker.worker_index == 1
                 else args.corridor_length_eval_worker_2
             )
-            for env in worker.env.envs
+            for env in worker.env.unwrapped.envs
         )
     )
 
diff --git a/rllib/examples/metrics/custom_metrics_in_env_runners.py b/rllib/examples/metrics/custom_metrics_in_env_runners.py
index 3b10ac4966417..cba86a50afb60 100644
--- a/rllib/examples/metrics/custom_metrics_in_env_runners.py
+++ b/rllib/examples/metrics/custom_metrics_in_env_runners.py
@@ -301,7 +301,7 @@ def _get_pacman_yx_pos(self, env):
     register_env(
         "env",
         lambda cfg: wrap_atari_for_new_api_stack(
-            gym.make("ALE/MsPacman-v5", **cfg, **{"render_mode": "rgb_array"}),
+            gym.make("ale_py:ALE/MsPacman-v5", **cfg, **{"render_mode": "rgb_array"}),
             framestack=4,
         ),
     )
diff --git a/rllib/examples/ray_tune/custom_experiment.py b/rllib/examples/ray_tune/custom_experiment.py
index d0e424911d468..779c5c1fd0410 100644
--- a/rllib/examples/ray_tune/custom_experiment.py
+++ b/rllib/examples/ray_tune/custom_experiment.py
@@ -105,7 +105,7 @@ def my_experiment(config: Dict):
     # Extract the gymnasium env object from the created algo (its local
     # SingleAgentEnvRunner worker). Note that the env in this single-agent
     # case is a gymnasium vector env and that we get its first sub-env here.
-    env = local_env_runner.env.envs[0]
+    env = local_env_runner.env.unwrapped.envs[0]
 
     # The local worker (SingleAgentEnvRunner)
     rl_module = local_env_runner.module
diff --git a/rllib/examples/rl_modules/custom_cnn_rl_module.py b/rllib/examples/rl_modules/custom_cnn_rl_module.py
index a8aac2980530a..4001f3e21d6b8 100644
--- a/rllib/examples/rl_modules/custom_cnn_rl_module.py
+++ b/rllib/examples/rl_modules/custom_cnn_rl_module.py
@@ -66,7 +66,7 @@
 parser = add_rllib_example_script_args(default_iters=100, default_timesteps=600000)
 parser.set_defaults(
     enable_new_api_stack=True,
-    env="ALE/Pong-v5",
+    env="ale_py:ALE/Pong-v5",
 )
 
 
diff --git a/rllib/models/tests/test_preprocessors.py b/rllib/models/tests/test_preprocessors.py
index 51ad457dabe7e..03a344de32893 100644
--- a/rllib/models/tests/test_preprocessors.py
+++ b/rllib/models/tests/test_preprocessors.py
@@ -90,12 +90,12 @@ def test_gym_preprocessors(self):
         p2 = ModelCatalog.get_preprocessor(gym.make("FrozenLake-v1"))
         self.assertEqual(type(p2), OneHotPreprocessor)
 
-        p3 = ModelCatalog.get_preprocessor(gym.make("ALE/MsPacman-ram-v5"))
+        p3 = ModelCatalog.get_preprocessor(gym.make("ale_py:ALE/MsPacman-ram-v5"))
         self.assertEqual(type(p3), AtariRamPreprocessor)
 
         p4 = ModelCatalog.get_preprocessor(
             gym.make(
-                "ALE/MsPacman-v5",
+                "ale_py:ALE/MsPacman-v5",
                 frameskip=1,
             )
         )
diff --git a/rllib/tuned_examples/appo/pong-appo-w-rl-modules-and-learner.yaml b/rllib/tuned_examples/appo/pong-appo-w-rl-modules-and-learner.yaml
index 94088ab67c29e..2c11e896744ed 100644
--- a/rllib/tuned_examples/appo/pong-appo-w-rl-modules-and-learner.yaml
+++ b/rllib/tuned_examples/appo/pong-appo-w-rl-modules-and-learner.yaml
@@ -2,7 +2,7 @@
 # This can reach 18.0 reward in ~10 minutes on 4x M60 GPUs
 # with 30 rollout workers, 4 learning workers, and 8 envs per rollout worker.
 appo-pongnoframeskip-v5:
-    env: ALE/Pong-v5
+    env: ale_py:ALE/Pong-v5
     run: APPO
     stop:
         env_runners/episode_return_mean: 18.0
diff --git a/rllib/tuned_examples/appo/pong-appo.yaml b/rllib/tuned_examples/appo/pong-appo.yaml
index 837e0559a8f8f..3b1ecd9215cba 100644
--- a/rllib/tuned_examples/appo/pong-appo.yaml
+++ b/rllib/tuned_examples/appo/pong-appo.yaml
@@ -5,7 +5,7 @@
 # APPO can also solve Pong in 2.5 million timesteps, which is
 # 2x more efficient than that of IMPALA.
 pong-appo:
-    env: ALE/Pong-v5
+    env: ale_py:ALE/Pong-v5
     run: APPO
     stop:
         env_runners/episode_return_mean: 18.0
diff --git a/rllib/tuned_examples/bc/benchmark_atari_pong_bc.py b/rllib/tuned_examples/bc/benchmark_atari_pong_bc.py
index f5d7727bb68a5..d084f61fb9f4c 100644
--- a/rllib/tuned_examples/bc/benchmark_atari_pong_bc.py
+++ b/rllib/tuned_examples/bc/benchmark_atari_pong_bc.py
@@ -128,7 +128,7 @@ def _make_learner_connector(observation_space, action_space):
 # in the collection of the `rl_unplugged` data.
 def _env_creator(cfg):
     return wrap_atari_for_new_api_stack(
-        gym.make("ALE/Pong-v5", **cfg),
+        gym.make("ale_py:ALE/Pong-v5", **cfg),
         # Perform frame-stacking through ConnectorV2 API.
         framestack=4,
         dim=84,
diff --git a/rllib/tuned_examples/compact-regression-test.yaml b/rllib/tuned_examples/compact-regression-test.yaml
index 21dbdb6d1be41..80003257ccb74 100644
--- a/rllib/tuned_examples/compact-regression-test.yaml
+++ b/rllib/tuned_examples/compact-regression-test.yaml
@@ -6,7 +6,7 @@
 # You can find the reference results here:
 # https://github.com/ray-project/ray/tree/master/release/release_logs
 atari-impala:
-    env: ALE/Breakout-v5
+    env: ale_py:ALE/Breakout-v5
     run: IMPALA
     num_samples: 4
     stop:
@@ -25,7 +25,7 @@ atari-impala:
         ]
         num_gpus: 1
 atari-ppo-tf:
-    env: ALE/Breakout-v5
+    env: ale_py:ALE/Breakout-v5
     run: PPO
     num_samples: 4
     stop:
@@ -51,7 +51,7 @@ atari-ppo-tf:
             vf_share_layers: true
         num_gpus: 1
 atari-ppo-torch:
-    env: ALE/Breakout-v5
+    env: ale_py:ALE/Breakout-v5
     run: PPO
     num_samples: 4
     stop:
@@ -78,7 +78,7 @@ atari-ppo-torch:
             vf_share_layers: true
         num_gpus: 1
 apex:
-    env: ALE/Breakout-v5
+    env: ale_py:ALE/Breakout-v5
     run: APEX
     num_samples: 4
     stop:
@@ -109,7 +109,7 @@ apex:
         target_network_update_freq: 50000
         min_sample_timesteps_per_iteration: 25000
 atari-a2c:
-    env: ALE/Breakout-v5
+    env: ale_py:ALE/Breakout-v5
     run: A2C
     num_samples: 4
     stop:
@@ -127,7 +127,7 @@ atari-a2c:
             [20000000, 0.000000000001],
         ]
 atari-basic-dqn:
-    env: ALE/Breakout-v5
+    env: ale_py:ALE/Breakout-v5
     run: DQN
     num_samples: 4
     stop:
diff --git a/rllib/tuned_examples/dqn/atari-dist-dqn.yaml b/rllib/tuned_examples/dqn/atari-dist-dqn.yaml
index 1de99ce54f73e..53f72ca5bb851 100644
--- a/rllib/tuned_examples/dqn/atari-dist-dqn.yaml
+++ b/rllib/tuned_examples/dqn/atari-dist-dqn.yaml
@@ -2,10 +2,10 @@
 atari-dist-dqn:
     env:
         grid_search:
-            - ALE/Breakout-v5
-            - ALE/BeamRider-v5
-            - ALE/Qbert-v5
-            - ALE/SpaceInvaders-v5
+            - ale_py:ALE/Breakout-v5
+            - ale_py:ALE/BeamRider-v5
+            - ale_py:ALE/Qbert-v5
+            - ale_py:ALE/SpaceInvaders-v5
     run: DQN
     config:
         # Make analogous to old v4 + NoFrameskip.
diff --git a/rllib/tuned_examples/dqn/atari-dqn.yaml b/rllib/tuned_examples/dqn/atari-dqn.yaml
index 287446e232c4a..928820925756c 100644
--- a/rllib/tuned_examples/dqn/atari-dqn.yaml
+++ b/rllib/tuned_examples/dqn/atari-dqn.yaml
@@ -4,10 +4,10 @@
 atari-basic-dqn:
     env:
         grid_search:
-            - ALE/Breakout-v5
-            - ALE/BeamRider-v5
-            - ALE/Qbert-v5
-            - ALE/SpaceInvaders-v5
+            - ale_py:ALE/Breakout-v5
+            - ale_py:ALE/BeamRider-v5
+            - ale_py:ALE/Qbert-v5
+            - ale_py:ALE/SpaceInvaders-v5
     run: DQN
     config:
         # Works for both torch and tf.
diff --git a/rllib/tuned_examples/dqn/atari-duel-ddqn.yaml b/rllib/tuned_examples/dqn/atari-duel-ddqn.yaml
index dfa84c8a44667..84d96828da2d3 100644
--- a/rllib/tuned_examples/dqn/atari-duel-ddqn.yaml
+++ b/rllib/tuned_examples/dqn/atari-duel-ddqn.yaml
@@ -4,10 +4,10 @@
 dueling-ddqn:
     env:
         grid_search:
-            - ALE/Breakout-v5
-            - ALE/BeamRider-v5
-            - ALE/Qbert-v5
-            - ALE/SpaceInvaders-v5
+            - ale_py:ALE/Breakout-v5
+            - ale_py:ALE/BeamRider-v5
+            - ale_py:ALE/Qbert-v5
+            - ale_py:ALE/SpaceInvaders-v5
     run: DQN
     config:
         # Works for both torch and tf.
diff --git a/rllib/tuned_examples/dqn/pong-dqn.yaml b/rllib/tuned_examples/dqn/pong-dqn.yaml
index b6bb32cc7673a..08b51412aeae4 100644
--- a/rllib/tuned_examples/dqn/pong-dqn.yaml
+++ b/rllib/tuned_examples/dqn/pong-dqn.yaml
@@ -1,7 +1,7 @@
 # @OldAPIStack
 # You can expect ~20 reward within 1.1m timesteps / 2.1 hours on a K80 GPU
 pong-deterministic-dqn:
-    env: ALE/Pong-v5
+    env: ale_py:ALE/Pong-v5
     run: DQN
     stop:
         env_runners/episode_return_mean: 20
diff --git a/rllib/tuned_examples/dqn/pong-rainbow.yaml b/rllib/tuned_examples/dqn/pong-rainbow.yaml
index 0a0c05299fe4f..58abda37344f9 100644
--- a/rllib/tuned_examples/dqn/pong-rainbow.yaml
+++ b/rllib/tuned_examples/dqn/pong-rainbow.yaml
@@ -1,6 +1,6 @@
 # @OldAPIStack
 pong-deterministic-rainbow:
-    env: ALE/Pong-v5
+    env: ale_py:ALE/Pong-v5
     run: DQN
     stop:
         env_runners/episode_return_mean: 20
diff --git a/rllib/tuned_examples/dreamerv3/atari_100k.py b/rllib/tuned_examples/dreamerv3/atari_100k.py
index 443ce9b13d163..740da2840f68a 100644
--- a/rllib/tuned_examples/dreamerv3/atari_100k.py
+++ b/rllib/tuned_examples/dreamerv3/atari_100k.py
@@ -9,7 +9,7 @@
 """
 
 # Run with:
-# python [this script name].py --env ALE/[gym ID e.g. Pong-v5]
+# python [this script name].py --env ale_py:ALE/[gym ID e.g. Pong-v5]
 
 # To see all available options:
 # python [this script name].py --help
diff --git a/rllib/tuned_examples/dreamerv3/atari_200M.py b/rllib/tuned_examples/dreamerv3/atari_200M.py
index 2339d345d2f86..7cc69a0ab228f 100644
--- a/rllib/tuned_examples/dreamerv3/atari_200M.py
+++ b/rllib/tuned_examples/dreamerv3/atari_200M.py
@@ -9,7 +9,7 @@
 """
 
 # Run with:
-# python [this script name].py --env ALE/[gym ID e.g. Pong-v5]
+# python [this script name].py --env ale_py:ALE/[gym ID e.g. Pong-v5]
 
 # To see all available options:
 # python [this script name].py --help
diff --git a/rllib/tuned_examples/impala/atari-impala-large.yaml b/rllib/tuned_examples/impala/atari-impala-large.yaml
index 71d8f4dc3de1f..0c4287801bd0b 100644
--- a/rllib/tuned_examples/impala/atari-impala-large.yaml
+++ b/rllib/tuned_examples/impala/atari-impala-large.yaml
@@ -4,10 +4,10 @@
 atari-impala:
     env:
         grid_search:
-            - ALE/Breakout-v5
-            - ALE/BeamRider-v5
-            - ALE/Qbert-v5
-            - ALE/SpaceInvaders-v5
+            - ale_py:ALE/Breakout-v5
+            - ale_py:ALE/BeamRider-v5
+            - ale_py:ALE/Qbert-v5
+            - ale_py:ALE/SpaceInvaders-v5
     run: IMPALA
     stop:
         timesteps_total: 3000000
diff --git a/rllib/tuned_examples/impala/atari-impala-multi-gpu.yaml b/rllib/tuned_examples/impala/atari-impala-multi-gpu.yaml
index 7716eeb43830d..c97120008c31f 100644
--- a/rllib/tuned_examples/impala/atari-impala-multi-gpu.yaml
+++ b/rllib/tuned_examples/impala/atari-impala-multi-gpu.yaml
@@ -2,7 +2,7 @@
 # Runs on a p2.8xlarge single head node machine.
 # Should reach ~400 reward in about 1h and after 15-20M ts.
 atari-impala:
-    env: ALE/Breakout-v5
+    env: ale_py:ALE/Breakout-v5
     run: IMPALA
     config:
         # Works for both torch and tf.
diff --git a/rllib/tuned_examples/impala/atari-impala.yaml b/rllib/tuned_examples/impala/atari-impala.yaml
index 09966556924e4..23ba57207b366 100644
--- a/rllib/tuned_examples/impala/atari-impala.yaml
+++ b/rllib/tuned_examples/impala/atari-impala.yaml
@@ -4,10 +4,10 @@
 atari-impala:
     env:
         grid_search:
-            - ALE/Breakout-v5
-            - ALE/BeamRider-v5
-            - ALE/Qbert-v5
-            - ALE/SpaceInvaders-v5
+            - ale_py:ALE/Breakout-v5
+            - ale_py:ALE/BeamRider-v5
+            - ale_py:ALE/Qbert-v5
+            - ale_py:ALE/SpaceInvaders-v5
     run: IMPALA
     config:
         # Make analogous to old v4 + NoFrameskip.
diff --git a/rllib/tuned_examples/impala/pong-impala-fast.yaml b/rllib/tuned_examples/impala/pong-impala-fast.yaml
index f13e276c9744d..fca3a179527c9 100644
--- a/rllib/tuned_examples/impala/pong-impala-fast.yaml
+++ b/rllib/tuned_examples/impala/pong-impala-fast.yaml
@@ -5,7 +5,7 @@
 #    32 workers -> 7 minutes
 # See also: pong-impala.yaml, pong-impala-vectorized.yaml
 pong-impala-fast:
-    env: ALE/Pong-v5
+    env: ale_py:ALE/Pong-v5
     run: IMPALA
     config:
         # Make analogous to old v4 + NoFrameskip.
diff --git a/rllib/tuned_examples/impala/pong-impala-vectorized.yaml b/rllib/tuned_examples/impala/pong-impala-vectorized.yaml
index 5778848c194bf..1da8bebf68462 100644
--- a/rllib/tuned_examples/impala/pong-impala-vectorized.yaml
+++ b/rllib/tuned_examples/impala/pong-impala-vectorized.yaml
@@ -3,7 +3,7 @@
 # with 32 workers and 10 envs per worker. This is more efficient than the non-vectorized
 # configuration which requires 128 workers to achieve the same performance.
 pong-impala-vectorized:
-    env: ALE/Pong-v5
+    env: ale_py:ALE/Pong-v5
     run: IMPALA
     config:
         # Make analogous to old v4 + NoFrameskip.
diff --git a/rllib/tuned_examples/impala/pong-impala.yaml b/rllib/tuned_examples/impala/pong-impala.yaml
index ba6afa441554b..85d44f439b31a 100644
--- a/rllib/tuned_examples/impala/pong-impala.yaml
+++ b/rllib/tuned_examples/impala/pong-impala.yaml
@@ -5,7 +5,7 @@
 #    16 workers -> 40 min+
 # See also: pong-impala-fast.yaml, pong-impala-vectorized.yaml
 pong-impala:
-    env: ALE/Pong-v5
+    env: ale_py:ALE/Pong-v5
     run: IMPALA
     config:
         # Make analogous to old v4 + NoFrameskip.
diff --git a/rllib/tuned_examples/impala/pong_impala.py b/rllib/tuned_examples/impala/pong_impala.py
index 8802abf6a3b23..3fe08f9c35eda 100644
--- a/rllib/tuned_examples/impala/pong_impala.py
+++ b/rllib/tuned_examples/impala/pong_impala.py
@@ -15,7 +15,7 @@
 parser = add_rllib_example_script_args()
 parser.set_defaults(
     enable_new_api_stack=True,
-    env="ALE/Pong-v5",
+    env="ale_py:ALE/Pong-v5",
 )
 parser.add_argument(
     "--use-tiny-cnn",
diff --git a/rllib/tuned_examples/impala/pong_impala_pb2_hyperopt.py b/rllib/tuned_examples/impala/pong_impala_pb2_hyperopt.py
index 2f7b100500c6d..ca331fe9a861c 100644
--- a/rllib/tuned_examples/impala/pong_impala_pb2_hyperopt.py
+++ b/rllib/tuned_examples/impala/pong_impala_pb2_hyperopt.py
@@ -15,7 +15,7 @@
 from ray import tune
 
 parser = add_rllib_example_script_args()
-parser.set_defaults(env="ALE/Pong-v5")
+parser.set_defaults(env="ale_py:ALE/Pong-v5")
 parser.add_argument(
     "--use-tiny-cnn",
     action="store_true",
diff --git a/rllib/tuned_examples/ppo/atari_ppo.py b/rllib/tuned_examples/ppo/atari_ppo.py
index 7abcfdff245ef..ad298550e8a31 100644
--- a/rllib/tuned_examples/ppo/atari_ppo.py
+++ b/rllib/tuned_examples/ppo/atari_ppo.py
@@ -14,7 +14,10 @@
     default_timesteps=3000000,
     default_iters=100000000000,
 )
-parser.set_defaults(enable_new_api_stack=True)
+parser.set_defaults(
+    enable_new_api_stack=True,
+    env="ale_py:ALE/Pong-v5",
+)
 # Use `parser` to add your own custom command line options to this script
 # and (if needed) use their values toset up `config` below.
 args = parser.parse_args()
diff --git a/rllib/tuned_examples/sac/atari-sac.yaml b/rllib/tuned_examples/sac/atari-sac.yaml
index 000a62d17e747..9626327d463fc 100644
--- a/rllib/tuned_examples/sac/atari-sac.yaml
+++ b/rllib/tuned_examples/sac/atari-sac.yaml
@@ -5,8 +5,8 @@
 atari-sac-tf-and-torch:
     env:
         grid_search:
-            - ALE/MsPacman-v5
-            - ALE/Pong-v5
+            - ale_py:ALE/MsPacman-v5
+            - ale_py:ALE/Pong-v5
     run: SAC
     stop:
         timesteps_total: 20000000
diff --git a/rllib/tuned_examples/sac/mspacman-sac.yaml b/rllib/tuned_examples/sac/mspacman-sac.yaml
index b2f6b5f80e2c5..16d23a4af22b5 100644
--- a/rllib/tuned_examples/sac/mspacman-sac.yaml
+++ b/rllib/tuned_examples/sac/mspacman-sac.yaml
@@ -3,7 +3,7 @@
 # to ~750 reward in 40k timesteps. Run e.g. on a g3.4xlarge with `num_gpus=1`.
 # Uses the hyperparameters published in [2] (see rllib/agents/sac/README.md).
 mspacman-sac-tf:
-    env: ALE/MsPacman-v5
+    env: ale_py:ALE/MsPacman-v5
     run: SAC
     stop:
         env_runners/episode_return_mean: 800
diff --git a/rllib/utils/error.py b/rllib/utils/error.py
index 5671abc10eef3..d2b9db4c351a3 100644
--- a/rllib/utils/error.py
+++ b/rllib/utils/error.py
@@ -67,7 +67,7 @@ class NotSerializable(Exception):
 1) Run `pip install gymnasium` on your command line.
 2) Change all your import statements in your code from
    `import gym` -> `import gymnasium as gym` OR
-   `from gym.space import Discrete` -> `from gymnasium.spaces import Discrete`
+   `from gym.spaces import Discrete` -> `from gymnasium.spaces import Discrete`
 
 For your custom (single agent) gym.Env classes:
 3.1) Either wrap your old Env class via the provided `from gymnasium.wrappers import
diff --git a/rllib/utils/exploration/tests/test_curiosity.py b/rllib/utils/exploration/tests/test_curiosity.py
index 4531154371f0b..bcc603171264b 100644
--- a/rllib/utils/exploration/tests/test_curiosity.py
+++ b/rllib/utils/exploration/tests/test_curiosity.py
@@ -1,23 +1,14 @@
-from collections import deque
-import gymnasium as gym
-import minigrid
 import numpy as np
 import sys
 import unittest
 
 import ray
-from ray import air, tune
-from ray.air.constants import TRAINING_ITERATION
 from ray.rllib.algorithms.callbacks import DefaultCallbacks
 import ray.rllib.algorithms.ppo as ppo
-from ray.rllib.utils.test_utils import check_learning_achieved
 from ray.rllib.utils.metrics import (
     ENV_RUNNER_RESULTS,
     EPISODE_RETURN_MAX,
-    EPISODE_RETURN_MEAN,
 )
-from ray.rllib.utils.numpy import one_hot
-from ray.tune import register_env
 
 
 class MyCallBack(DefaultCallbacks):
@@ -46,96 +37,6 @@ def on_sample_end(self, *, worker, samples, **kwargs):
         self.deltas = []
 
 
-class OneHotWrapper(gym.core.ObservationWrapper):
-    def __init__(self, env, vector_index, framestack):
-        super().__init__(env)
-        self.framestack = framestack
-        # 49=7x7 field of vision; 11=object types; 6=colors; 3=state types.
-        # +4: Direction.
-        self.single_frame_dim = 49 * (11 + 6 + 3) + 4
-        self.init_x = None
-        self.init_y = None
-        self.x_positions = []
-        self.y_positions = []
-        self.x_y_delta_buffer = deque(maxlen=100)
-        self.vector_index = vector_index
-        self.frame_buffer = deque(maxlen=self.framestack)
-        for _ in range(self.framestack):
-            self.frame_buffer.append(np.zeros((self.single_frame_dim,)))
-
-        self.observation_space = gym.spaces.Box(
-            0.0, 1.0, shape=(self.single_frame_dim * self.framestack,), dtype=np.float32
-        )
-
-    def observation(self, obs):
-        # Debug output: max-x/y positions to watch exploration progress.
-        if self.step_count == 0:
-            for _ in range(self.framestack):
-                self.frame_buffer.append(np.zeros((self.single_frame_dim,)))
-            if self.vector_index == 0:
-                if self.x_positions:
-                    max_diff = max(
-                        np.sqrt(
-                            (np.array(self.x_positions) - self.init_x) ** 2
-                            + (np.array(self.y_positions) - self.init_y) ** 2
-                        )
-                    )
-                    self.x_y_delta_buffer.append(max_diff)
-                    print(
-                        "100-average dist travelled={}".format(
-                            np.mean(self.x_y_delta_buffer)
-                        )
-                    )
-                    self.x_positions = []
-                    self.y_positions = []
-                self.init_x = self.agent_pos[0]
-                self.init_y = self.agent_pos[1]
-
-        # Are we carrying the key?
-        # if self.carrying is not None:
-        #    print("Carrying KEY!!")
-
-        self.x_positions.append(self.agent_pos[0])
-        self.y_positions.append(self.agent_pos[1])
-
-        # One-hot the last dim into 11, 6, 3 one-hot vectors, then flatten.
-        objects = one_hot(obs[:, :, 0], depth=11)
-        colors = one_hot(obs[:, :, 1], depth=6)
-        states = one_hot(obs[:, :, 2], depth=3)
-        # Is the door we see open?
-        # for x in range(7):
-        #    for y in range(7):
-        #        if objects[x, y, 4] == 1.0 and states[x, y, 0] == 1.0:
-        #            print("Door OPEN!!")
-
-        all_ = np.concatenate([objects, colors, states], -1)
-        all_flat = np.reshape(all_, (-1,))
-        direction = one_hot(np.array(self.agent_dir), depth=4).astype(np.float32)
-        single_frame = np.concatenate([all_flat, direction])
-        self.frame_buffer.append(single_frame)
-        return np.concatenate(self.frame_buffer)
-
-
-def env_maker(config):
-    name = config.get("name", "MiniGrid-Empty-5x5-v0")
-    framestack = config.get("framestack", 4)
-    env = gym.make(name)
-    # Make it impossible to reach goal by chance.
-    env = gym.wrappers.TimeLimit(env, max_episode_steps=15)
-    # Only use image portion of observation (discard goal and direction).
-    env = minigrid.wrappers.ImgObsWrapper(env)
-    env = OneHotWrapper(
-        env,
-        config.vector_index if hasattr(config, "vector_index") else 0,
-        framestack=framestack,
-    )
-    return env
-
-
-register_env("mini-grid", env_maker)
-CONV_FILTERS = [[16, [11, 11], 3], [32, [9, 9], 3], [64, [5, 5], 3]]
-
-
 class TestCuriosity(unittest.TestCase):
     @classmethod
     def setUpClass(cls):
@@ -187,10 +88,7 @@ def test_curiosity_on_frozen_lake(self):
                         "type": "StochasticSampling",
                     },
                 },
-            )
-            # TODO (Kourosh): We need to provide examples on how we do curiosity with
-            # RLModule API
-            .training(lr=0.001)
+            ).training(lr=0.001)
         )
 
         num_iterations = 10
@@ -207,106 +105,6 @@ def test_curiosity_on_frozen_lake(self):
         algo.stop()
         self.assertTrue(learnt)
 
-        # Disable this check for now. Add too much flakyness to test.
-        # if fw == "tf":
-        #    # W/o Curiosity. Expect to learn nothing.
-        #    print("Trying w/o curiosity (not expected to learn).")
-        #    config["exploration_config"] = {
-        #        "type": "StochasticSampling",
-        #    }
-        #    algo = ppo.PPO(config=config)
-        #    rewards_wo = 0.0
-        #    for _ in range(num_iterations):
-        #        result = algo.train()
-        #        rewards_wo += result[ENV_RUNNER_RESULTS][EPISODE_RETURN_MEAN]
-        #        print(result)
-        #    algo.stop()
-        #    self.assertTrue(rewards_wo == 0.0)
-        #    print("Did not reach goal w/o curiosity!")
-
-    def test_curiosity_on_partially_observable_domain(self):
-        config = (
-            ppo.PPOConfig()
-            .environment(
-                "mini-grid",
-                env_config={
-                    # Also works with:
-                    # - MiniGrid-MultiRoom-N4-S5-v0
-                    # - MiniGrid-MultiRoom-N2-S4-v0
-                    "name": "MiniGrid-Empty-8x8-v0",
-                    "framestack": 1,  # seems to work even w/o framestacking
-                },
-            )
-            .env_runners(
-                num_envs_per_env_runner=4,
-                num_env_runners=0,
-                exploration_config={
-                    "type": "Curiosity",
-                    # For the feature NN, use a non-LSTM fcnet (same as the one
-                    # in the policy model).
-                    "eta": 0.1,
-                    "lr": 0.0003,  # 0.0003 or 0.0005 seem to work fine as well.
-                    "feature_dim": 64,
-                    # No actual feature net: map directly from observations to feature
-                    # vector (linearly).
-                    "feature_net_config": {
-                        "fcnet_hiddens": [],
-                        "fcnet_activation": "relu",
-                    },
-                    "sub_exploration": {
-                        "type": "StochasticSampling",
-                    },
-                },
-            )
-            .training(
-                model={
-                    "fcnet_hiddens": [256, 256],
-                    "fcnet_activation": "relu",
-                },
-                num_epochs=8,
-            )
-        )
-
-        min_reward = 0.001
-        stop = {
-            TRAINING_ITERATION: 25,
-            f"{ENV_RUNNER_RESULTS}/{EPISODE_RETURN_MEAN}": min_reward,
-        }
-        # To replay:
-        # algo = ppo.PPO(config=config)
-        # algo.restore("[checkpoint file]")
-        # env = env_maker(config["env_config"])
-        # obs, info = env.reset()
-        # for _ in range(10000):
-        #     obs, reward, done, truncated, info = env.step(
-        #         algo.compute_single_action(s)
-        #     )
-        #     if done:
-        #         obs, info = env.reset()
-        #     env.render()
-
-        results = tune.Tuner(
-            "PPO",
-            param_space=config,
-            run_config=air.RunConfig(stop=stop, verbose=1),
-        ).fit()
-        check_learning_achieved(results, min_reward)
-        iters = results.get_best_result().metrics[TRAINING_ITERATION]
-        print("Reached in {} iterations.".format(iters))
-
-        # config_wo = config.copy()
-        # config_wo["exploration_config"] = {"type": "StochasticSampling"}
-        # stop_wo = stop.copy()
-        # stop_wo[TRAINING_ITERATION] = iters
-        # results = tune.Tuner(
-        #     "PPO", param_space=config_wo, stop=stop_wo, verbose=1).fit()
-        # try:
-        #     check_learning_achieved(results, min_reward)
-        # except ValueError:
-        #     print("Did not learn w/o curiosity (expected).")
-        # else:
-        #     raise ValueError("Learnt w/o curiosity (not expected)!")
-
 
 if __name__ == "__main__":
     import pytest
diff --git a/rllib/utils/images.py b/rllib/utils/images.py
index 91e6bc610843f..0716ea5c45b4b 100644
--- a/rllib/utils/images.py
+++ b/rllib/utils/images.py
@@ -15,31 +15,29 @@
 except ImportError:
     cv2 = None
 
-if cv2 is None:
-    try:
-        from skimage import color, io, transform
-
-        logger.debug("CV2 not found for image processing, using Skimage.")
-    except ImportError:
-        raise ModuleNotFoundError("Either scikit-image or opencv is required")
-
 
 @DeveloperAPI
 def resize(img: np.ndarray, height: int, width: int) -> np.ndarray:
-    if cv2:
-        return cv2.resize(img, (width, height), interpolation=cv2.INTER_AREA)
-    return transform.resize(img, (height, width))
+    if not cv2:
+        raise ModuleNotFoundError(
+            "`opencv` not installed! Do `pip install opencv-python`"
+        )
+    return cv2.resize(img, (width, height), interpolation=cv2.INTER_AREA)
 
 
 @DeveloperAPI
 def rgb2gray(img: np.ndarray) -> np.ndarray:
-    if cv2:
-        return cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)
-    return color.rgb2gray(img)
+    if not cv2:
+        raise ModuleNotFoundError(
+            "`opencv` not installed! Do `pip install opencv-python`"
+        )
+    return cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)
 
 
 @DeveloperAPI
 def imread(img_file: str) -> np.ndarray:
-    if cv2:
-        return cv2.imread(img_file).astype(np.float32)
-    return io.imread(img_file).astype(np.float32)
+    if not cv2:
+        raise ModuleNotFoundError(
+            "`opencv` not installed! Do `pip install opencv-python`"
+        )
+    return cv2.imread(img_file).astype(np.float32)