diff --git a/doc/source/ray-core/examples/plot_pong_example.ipynb b/doc/source/ray-core/examples/plot_pong_example.ipynb index 70648185d0437..642199fef7f90 100644 --- a/doc/source/ray-core/examples/plot_pong_example.ipynb +++ b/doc/source/ray-core/examples/plot_pong_example.ipynb @@ -292,7 +292,7 @@ "@ray.remote\n", "class RolloutWorker(object):\n", " def __init__(self):\n", - " self.env = gym.make(\"ALE/Pong-v5\")\n", + " self.env = gym.make(\"ale_py:ALE/Pong-v5\")\n", "\n", " def compute_gradient(self, model):\n", " # Compute a simulation episode.\n", diff --git a/doc/source/rllib/doc_code/dreamerv3_inference.py b/doc/source/rllib/doc_code/dreamerv3_inference.py index 681212151693d..25b8e5a111e09 100644 --- a/doc/source/rllib/doc_code/dreamerv3_inference.py +++ b/doc/source/rllib/doc_code/dreamerv3_inference.py @@ -10,7 +10,7 @@ env_name = "CartPole-v1" # Use the vector env API. -env = gym.vector.make(env_name, num_envs=1, asynchronous=False) +env = gym.make_vec(env_name, num_envs=1, vectorization_mode="sync") terminated = truncated = False # Reset the env. diff --git a/doc/source/rllib/doc_code/training.py b/doc/source/rllib/doc_code/training.py index 451bc664cbdf2..75bf8a48f18c1 100644 --- a/doc/source/rllib/doc_code/training.py +++ b/doc/source/rllib/doc_code/training.py @@ -4,7 +4,7 @@ try: import gymnasium as gym - env = gym.make("ALE/Pong-v5") + env = gym.make("ale_py:ALE/Pong-v5") obs, infos = env.reset() except Exception: import gym diff --git a/doc/source/rllib/rllib-examples.rst b/doc/source/rllib/rllib-examples.rst index bdb4ee65dd237..148f9a7ee8511 100644 --- a/doc/source/rllib/rllib-examples.rst +++ b/doc/source/rllib/rllib-examples.rst @@ -202,7 +202,7 @@ in roughly 5min. It can be run like this on a single g5.24xlarge (or g6.24xlarge .. code-block:: bash $ cd ray/rllib/tuned_examples/ppo - $ python atari_ppo.py --env ALE/Pong-v5 --num-gpus=4 --num-env-runners=95 + $ python atari_ppo.py --env=ale_py:ALE/Pong-v5 --num-gpus=4 --num-env-runners=95 Note that some of the files in this folder are used for RLlib's daily or weekly release tests as well. diff --git a/python/requirements.txt b/python/requirements.txt index 8ddf6ea190b70..b721f735b2f4a 100644 --- a/python/requirements.txt +++ b/python/requirements.txt @@ -41,7 +41,7 @@ colorful rich opentelemetry-sdk fastapi -gymnasium==0.28.1 +gymnasium==1.0.0 virtualenv!=20.21.1,>=20.0.24 opentelemetry-api opencensus diff --git a/python/requirements/ml/rllib-test-requirements.txt b/python/requirements/ml/rllib-test-requirements.txt index 1c47364f6b656..027c57446e602 100644 --- a/python/requirements/ml/rllib-test-requirements.txt +++ b/python/requirements/ml/rllib-test-requirements.txt @@ -3,43 +3,32 @@ # Environment adapters. # --------------------- # Atari -gymnasium==0.28.1; python_version < "3.12" -imageio; python_version < "3.12" -ale_py==0.8.1; python_version < "3.12" +ale_py==0.10.1 +imageio==2.34.2 +opencv-python==4.8.1.78 + # For testing MuJoCo envs with gymnasium. -mujoco==2.3.6; python_version < "3.12" +mujoco==3.2.4 dm_control==1.0.12; python_version < "3.12" # For tests on PettingZoo's multi-agent envs. -pettingzoo==1.23.1 -# When installing pettingzoo, chess is missing, even though its a dependancy -# TODO: remove if a future pettingzoo and/or ray version fixes this dependancy issue. -chess==1.7.0 +pettingzoo==1.24.3 pymunk==6.2.1 -supersuit==3.8.0; python_version < "3.12" -tinyscaler==1.2.6; python_version < "3.12" -shimmy - -# Kaggle envs. -kaggle_environments==1.7.11 -# Unity3D testing -# TODO(sven): Add this back to rllib-requirements.txt once mlagents no longer pins torch<1.9.0 version. -#mlagents==0.28.0 -mlagents_envs==0.28.0 +tinyscaler==1.2.8 +shimmy==2.0.0 +supersuit==3.9.3 # For tests on minigrid. -minigrid -# For tests on RecSim and Kaggle envs. -# Explicitly depends on `tensorflow` and doesn't accept `tensorflow-macos` -recsim==0.2.4; (sys_platform != 'darwin' or platform_machine != 'arm64') and python_version < "3.12" -# recsim depends on dopamine-rl, but dopamine-rl pins gym <= 0.25.2, which break some envs -dopamine-rl==4.0.5; (sys_platform != 'darwin' or platform_machine != 'arm64') and python_version < "3.12" +minigrid==2.3.1 tensorflow_estimator + # DeepMind's OpenSpiel open-spiel==1.4 +# Unity3D testing +mlagents_envs==0.28.0 + # Requires libtorrent which is unavailable for arm64 -autorom[accept-rom-license]; platform_machine != "arm64" h5py==3.10.0 # Requirements for rendering. diff --git a/python/requirements_compiled.txt b/python/requirements_compiled.txt index a1043afc5b51b..1347afee24c5a 100644 --- a/python/requirements_compiled.txt +++ b/python/requirements_compiled.txt @@ -75,10 +75,10 @@ aiosqlite==0.19.0 # via ypy-websocket alabaster==0.7.13 # via sphinx -ale-py==0.8.1 ; python_version < "3.12" +ale-py==0.10.1 # via # -r /ray/ci/../python/requirements/ml/rllib-test-requirements.txt - # gym + # gymnasium alembic==1.12.1 # via # aim @@ -272,8 +272,6 @@ charset-normalizer==3.3.2 # via # requests # snowflake-connector-python -chess==1.7.0 - # via -r /ray/ci/../python/requirements/ml/rllib-test-requirements.txt chex==0.1.7 # via optax clang-format==12.0.1 @@ -306,7 +304,6 @@ cloudpickle==2.2.0 # -r /ray/ci/../python/requirements/test-requirements.txt # dask # distributed - # gym # gymnasium # hyperopt # mlagents-envs @@ -704,13 +701,7 @@ gsutil==5.27 # via -r /ray/ci/../python/requirements/docker/ray-docker-requirements.txt gunicorn==20.1.0 # via mlflow -gym==0.26.2 - # via - # dopamine-rl - # recsim -gym-notices==0.0.8 - # via gym -gymnasium==0.28.1 ; python_version < "3.12" +gymnasium==1.0.0 # via # -r /ray/ci/../python/requirements.txt # -r /ray/ci/../python/requirements/ml/rllib-test-requirements.txt @@ -1126,7 +1117,7 @@ msrestazure==0.6.4 # via # -r /ray/ci/../python/requirements/test-requirements.txt # azure-cli-core -mujoco==2.3.6 ; python_version < "3.12" +mujoco==3.2.4 # via # -r /ray/ci/../python/requirements/ml/rllib-test-requirements.txt # dm-control @@ -1246,7 +1237,6 @@ numpy==1.26.4 # flax # gpy # gradio - # gym # gymnasium # h5py # hpbandster @@ -1290,7 +1280,6 @@ numpy==1.26.4 # pyro-ppl # pytorch-lightning # raydp - # recsim # scikit-image # scikit-learn # scipy @@ -1489,7 +1478,7 @@ pbr==6.0.0 # sarif-om peewee==3.17.0 # via semgrep -pettingzoo==1.23.1 +pettingzoo==1.24.3 # via -r /ray/ci/../python/requirements/ml/rllib-test-requirements.txt pexpect==4.8.0 # via @@ -1862,8 +1851,6 @@ querystring-parser==1.2.4 # via raydp raydp==1.7.0b20231020.dev0 # via -r /ray/ci/../python/requirements/ml/data-test-requirements.txt -recsim==0.2.4 ; (sys_platform != "darwin" or platform_machine != "arm64") and python_version < "3.12" - # via -r /ray/ci/../python/requirements/ml/rllib-test-requirements.txt redis==4.4.2 # via -r /ray/ci/../python/requirements/test-requirements.txt regex==2024.5.15 @@ -2049,7 +2036,7 @@ shellcheck-py==0.7.1.1 # via -r /ray/ci/../python/requirements/lint-requirements.txt shellingham==1.5.4 # via typer -shimmy==1.3.0 +shimmy==2.0.0 # via -r /ray/ci/../python/requirements/ml/rllib-test-requirements.txt shortuuid==1.0.1 # via -r /ray/ci/../python/requirements/ml/tune-test-requirements.txt @@ -2167,9 +2154,7 @@ statsmodels==0.14.0 # via # hpbandster # statsforecast -strictyaml==1.7.3 - # via pyiceberg -supersuit==3.8.0 ; python_version < "3.12" +supersuit==3.9.3 # via -r /ray/ci/../python/requirements/ml/rllib-test-requirements.txt sympy==1.13.1 # via @@ -2256,7 +2241,7 @@ timm==0.9.2 # via -r /ray/ci/../python/requirements/ml/tune-test-requirements.txt tinycss2==1.3.0 # via nbconvert -tinyscaler==1.2.6 ; python_version < "3.12" +tinyscaler==1.2.8 # via # -r /ray/ci/../python/requirements/ml/rllib-test-requirements.txt # supersuit diff --git a/python/setup.py b/python/setup.py index dd8a05046992f..53f954851deea 100644 --- a/python/setup.py +++ b/python/setup.py @@ -302,7 +302,7 @@ def get_packages(self): setup_spec.extras["rllib"] = setup_spec.extras["tune"] + [ "dm_tree", - "gymnasium==0.28.1", + "gymnasium==1.0.0", "lz4", "scikit-image", "pyyaml", diff --git a/release/long_running_tests/workloads/apex.py b/release/long_running_tests/workloads/apex.py index 4aee3c40db3f2..90adcd52bc258 100644 --- a/release/long_running_tests/workloads/apex.py +++ b/release/long_running_tests/workloads/apex.py @@ -39,7 +39,7 @@ { "apex": { "run": "APEX", - "env": "ALE/Pong-v5", + "env": "ale_py:ALE/Pong-v5", "config": { "num_workers": 3, "num_gpus": 0, diff --git a/release/ml_user_tests/tune_rllib/run_connect_tests.py b/release/ml_user_tests/tune_rllib/run_connect_tests.py index d263264b29d5d..7fb4b2e73ccb8 100644 --- a/release/ml_user_tests/tune_rllib/run_connect_tests.py +++ b/release/ml_user_tests/tune_rllib/run_connect_tests.py @@ -26,7 +26,7 @@ def run(smoke_test=False, storage_path: str = None): config = ( APPOConfig() - .environment("ALE/Pong-v5", clip_rewards=True) + .environment("ale_py:ALE/Pong-v5", clip_rewards=True) .framework(tune.grid_search(["tf", "torch"])) .rollouts( rollout_fragment_length=50, diff --git a/release/ray_release/byod/requirements_byod_3.9.in b/release/ray_release/byod/requirements_byod_3.9.in index d9ffd017133f5..cca2b941f7eec 100644 --- a/release/ray_release/byod/requirements_byod_3.9.in +++ b/release/ray_release/byod/requirements_byod_3.9.in @@ -10,8 +10,8 @@ dask[complete] fastapi gcsfs==2023.5.0 gsutil -gym -gym[atari] +gymnasium +gymnasium[atari] importlib-metadata jsonschema==4.17.3 lightgbm diff --git a/release/ray_release/byod/requirements_byod_3.9.txt b/release/ray_release/byod/requirements_byod_3.9.txt index d55e3d79a7a84..a468ea0d4e831 100644 --- a/release/ray_release/byod/requirements_byod_3.9.txt +++ b/release/ray_release/byod/requirements_byod_3.9.txt @@ -1,4 +1,3 @@ - # # This file is autogenerated by pip-compile with python 3.9 # To update, run: @@ -116,30 +115,35 @@ aiosignal==1.3.1 \ # via # -c release/ray_release/byod/requirements_compiled.txt # aiohttp -ale-py==0.8.1 \ - --hash=sha256:0006d80dfe7745eb5a93444492337203c8bc7eb594a2c24c6a651c5c5b0eaf09 \ - --hash=sha256:0856ca777473ec4ae8a59f3af9580259adb0fd4a47d586a125a440c62e82fc10 \ - --hash=sha256:0ffecb5c956749596030e464827642945162170a132d093c3d4fa2d7e5725c18 \ - --hash=sha256:2d9fcfa06c74a613c5419e942ef4d3e0959533f52e94d2d4bda61d07fbfffeee \ - --hash=sha256:5fcc31f495de79ee1d6bfc0f4b7c4619948851e679bbf010035e25f23146a687 \ - --hash=sha256:6f2f6b92c8fd6189654979bbf0b305dbe0ecf82176c47f244d8c1cbc36286b89 \ - --hash=sha256:7cd74b7ee0248ef11a086c9764e142e71defd40ec8989a99232bfd2d9e8023be \ - --hash=sha256:817adf9a3a82c4923c731e634520a5ecf296aca0367f5c69959a96b32119d831 \ - --hash=sha256:87557db05be0e04130e2ec1bf909d3bb0b0bc034645d4f664e6baa573fe32191 \ - --hash=sha256:9773eea7505484e024beb2fff0f3bfd363db151bdb9799d70995448e196b1ded \ - --hash=sha256:ade5c32af567629164a6b49378978c728a15dc4db07ad6b679e8832d4fd3ea1f \ - --hash=sha256:ae2ba24557e0ce541ea3be13b148db2a9cfa730d83537b4cbed5e10449826e51 \ - --hash=sha256:b00f74e27815131c1a2791f3d48114363fa2708e19f09ce6b7b614cb14c9d469 \ - --hash=sha256:b2aa2f69a4169742800615970efe6914fa856e33eaf7fa9133c0e06a617a80e2 \ - --hash=sha256:c9b168eb88c87d0f3e2a778e6c5cdde4ad951d1ca8a6dc3d3679fd45398df7d1 \ - --hash=sha256:d49b550a2d9c25b63c343aa680fd81f253a3714cdc0e1835640933ebff1798ff \ - --hash=sha256:eadf9f3990b4ff2f9e5ca35889f5e2e95cddd6a353d9d857d9b4601a6e1c4e7c \ - --hash=sha256:f10b1df8774bbe3b00365748b5e0e07cf35f6a703bbaff991bc7b3b2247dccc9 \ - --hash=sha256:f278036f9b6066062abcdf0987a0ec5a8e0f22a2c7cfac925e39378d4343d490 +ale-py==0.10.1 \ + --hash=sha256:076a44a61c2518b844f765692a91d0a6b383c6592b5fdabd94fd24d4c62a54ef \ + --hash=sha256:0835ee11004efeb5a9805a09c1525242f737257a8a4f5f4f0b9b3e047e6dca86 \ + --hash=sha256:12617edc9799c73570df67a731a4293bcfd500f413e0bfa867b53fc411fa7629 \ + --hash=sha256:24b9e61a4e868a4266f8a0ef7809cc20cecedb8c10d515d14ff6078950d51d8b \ + --hash=sha256:24f7aa19e1b3b1540516942020a95f57964af71285497620e58f03b2c113424e \ + --hash=sha256:3971a8552d2f982f569c87152479901574a9fe86410e5d1a26276e7ffccb59e1 \ + --hash=sha256:3d82d81715f15598b9db50529da971d36117cda027af9d112bd2ea22cefe3bcb \ + --hash=sha256:43d63b262f4b3bfcd567ce736a5648b4193470b2691bc14e38ac0c05dfe2a7e2 \ + --hash=sha256:4dd55a52e074497f1143785a215a50706afba3111be8b4923d46cc507c16be8f \ + --hash=sha256:4f3aaea36c1671812c21b5f7c5dcf9f5f9c726f5b10cbe7a657a844de963bb55 \ + --hash=sha256:5d4f326236c95736182323a480363c7b98959fc9a4ba09d2aa5b152faa6a2d59 \ + --hash=sha256:6f0a3da4ff47f913b5c61e66571fe7fb92fc569e5babdf4b0eeee348aac1d457 \ + --hash=sha256:771d5a1cd5a50d2cf226eba45c418fb7a18b453bd332b6a2189310030eda421a \ + --hash=sha256:7733d521921452b9e644e9e31e4d5b1ba612305473c5ba0266cafb7eff6a5461 \ + --hash=sha256:82c676030b8b6543cb6969a905ff841ae6f086a2efe707542d014ef6ca4ada4e \ + --hash=sha256:92a31bd44687c6a3595fcdac35bc3238e305dd604171ba6a9cb7912bc83c99ee \ + --hash=sha256:9f30d763c38063e5579783844868c1330f89049f252e94c49534785515f785f2 \ + --hash=sha256:9fa3f3977f63b685394301432cba7fe417882cfea72424d75aaf6bf98f79a2c9 \ + --hash=sha256:b84025670cf37527348a417d7465ee193a19d0a336bcd62f943957c13fef6ebb \ + --hash=sha256:c43308af7013cb60c6f5e77cba2b9ccaed2f5e2ae444b365dce9b7ac3bb5d48f \ + --hash=sha256:c77653e47d79e60abcc21bfad7dd105784ce2649fc5bc4eaaa1de45b40112772 \ + --hash=sha256:c9fac7fe11c56ed301a409d8a940f3e764ed2929b756ebb033eadf492a3d696e \ + --hash=sha256:d3247ad68f7dda1f9c046ede74310e347114f2c191a9f4cd247f432410941eb9 \ + --hash=sha256:e0637ddc4074b814ae46db28d61aface08d7eba16ea713cdfe0734e0b18c3794 \ + --hash=sha256:f6f91ab4b2a18e24c82a33fd1d616f32d121fcd6429f9045d515960df8cdc580 # via # -c release/ray_release/byod/requirements_compiled.txt # -r release/ray_release/byod/requirements_byod_3.9.in - # gym annotated-types==0.6.0 \ --hash=sha256:0641064de18ba7a25dee8f96403ebc39113d0cb953a01429249d5c7564666a43 \ --hash=sha256:563339e807e53ffd9c267e99fc6d9ea23eb8443c08f112651963e24e22f84a5d @@ -470,7 +474,7 @@ cloudpickle==2.2.0 \ # -c release/ray_release/byod/requirements_compiled.txt # dask # distributed - # gym + # gymnasium cmake==3.28.1 \ --hash=sha256:0d4051d101d151d8387156c463aa45c8cd0e164f870e0ac0c8c91d3ff08528e1 \ --hash=sha256:1be8f351271f8bcbe32288066e5add642d7c32f2f8fec3f135949c2cb13dfac2 \ @@ -490,12 +494,6 @@ cmake==3.28.1 \ --hash=sha256:c82bc0eb1495cf518cb4f355b8a73e584e67d53453406c0498bacc454cf6c404 \ --hash=sha256:d0978cdd08c0ebc76f4f8543aba1381a41580dcb9c3bcffb536c41337b75aea1 # via -r release/ray_release/byod/requirements_byod_3.9.in -commonmark==0.9.1 \ - --hash=sha256:452f9dc859be7f06631ddcb328b6919c67984aca654e5fefb3914d54691aed60 \ - --hash=sha256:da2f38c92590f83de410ba1a3cbceafbc74fee9def35f9251ba9a971d6d66fd9 - # via - # -c release/ray_release/byod/requirements_compiled.txt - # rich configargparse==1.7 \ --hash=sha256:d249da6591465c6c26df64a9f73d2536e743be2f244eb3ebe61114af2f94f86b \ --hash=sha256:e7067471884de5478c58a511e529f0f9bd1c66bfef1dea90935438d6c23306d1 @@ -675,7 +673,7 @@ diskcache==5.6.3 \ --hash=sha256:2c3a3fa2743d8535d832ec61c2054a1641f41775aa7c556758a109941e33e4fc \ --hash=sha256:5e31b2d5fbad117cc363ebaf6b689474db18a1f6438bc82358b024abd4c2ca19 # via petastorm -distributed==2022.10.1 \ +distributed==2022.10.1 ; python_version < "3.12" \ --hash=sha256:31abab8ecc76951875828a3689d47dc4f20226b3ec99a0dc1af6183d02dbe5fe \ --hash=sha256:42c6fe7d3bea491e23ce020879c411f2ecfecdb4914a6cb6b4a63530a7b0fa70 # via @@ -687,6 +685,12 @@ exceptiongroup==1.2.1 \ # via # anyio # pytest +farama-notifications==0.0.4 \ + --hash=sha256:13fceff2d14314cf80703c8266462ebf3733c7d165336eee998fc58e545efd18 \ + --hash=sha256:14de931035a41961f7c056361dc7f980762a143d05791ef5794a751a2caf05ae + # via + # -c release/ray_release/byod/requirements_compiled.txt + # gymnasium fastapi==0.109.2 \ --hash=sha256:2c9bab24667293b501cad8dd388c05240c850b58ec5876ee3283c47d6e1e3a4d \ --hash=sha256:f3817eac96fe4f65a2ebb4baa000f394e55f5fccdaf7f75250804bc58f354f73 @@ -724,68 +728,84 @@ flatbuffers==23.5.26 \ # via # -c release/ray_release/byod/requirements_compiled.txt # tensorflow -frozenlist==1.4.0 \ - --hash=sha256:007df07a6e3eb3e33e9a1fe6a9db7af152bbd8a185f9aaa6ece10a3529e3e1c6 \ - --hash=sha256:008eb8b31b3ea6896da16c38c1b136cb9fec9e249e77f6211d479db79a4eaf01 \ - --hash=sha256:09163bdf0b2907454042edb19f887c6d33806adc71fbd54afc14908bfdc22251 \ - --hash=sha256:0c7c1b47859ee2cac3846fde1c1dc0f15da6cec5a0e5c72d101e0f83dcb67ff9 \ - --hash=sha256:0e5c8764c7829343d919cc2dfc587a8db01c4f70a4ebbc49abde5d4b158b007b \ - --hash=sha256:10ff5faaa22786315ef57097a279b833ecab1a0bfb07d604c9cbb1c4cdc2ed87 \ - --hash=sha256:17ae5cd0f333f94f2e03aaf140bb762c64783935cc764ff9c82dff626089bebf \ - --hash=sha256:19488c57c12d4e8095a922f328df3f179c820c212940a498623ed39160bc3c2f \ - --hash=sha256:1a0848b52815006ea6596c395f87449f693dc419061cc21e970f139d466dc0a0 \ - --hash=sha256:1e78fb68cf9c1a6aa4a9a12e960a5c9dfbdb89b3695197aa7064705662515de2 \ - --hash=sha256:261b9f5d17cac914531331ff1b1d452125bf5daa05faf73b71d935485b0c510b \ - --hash=sha256:2b8bcf994563466db019fab287ff390fffbfdb4f905fc77bc1c1d604b1c689cc \ - --hash=sha256:38461d02d66de17455072c9ba981d35f1d2a73024bee7790ac2f9e361ef1cd0c \ - --hash=sha256:490132667476f6781b4c9458298b0c1cddf237488abd228b0b3650e5ecba7467 \ - --hash=sha256:491e014f5c43656da08958808588cc6c016847b4360e327a62cb308c791bd2d9 \ - --hash=sha256:515e1abc578dd3b275d6a5114030b1330ba044ffba03f94091842852f806f1c1 \ - --hash=sha256:556de4430ce324c836789fa4560ca62d1591d2538b8ceb0b4f68fb7b2384a27a \ - --hash=sha256:5833593c25ac59ede40ed4de6d67eb42928cca97f26feea219f21d0ed0959b79 \ - --hash=sha256:6221d84d463fb110bdd7619b69cb43878a11d51cbb9394ae3105d082d5199167 \ - --hash=sha256:6918d49b1f90821e93069682c06ffde41829c346c66b721e65a5c62b4bab0300 \ - --hash=sha256:6c38721585f285203e4b4132a352eb3daa19121a035f3182e08e437cface44bf \ - --hash=sha256:71932b597f9895f011f47f17d6428252fc728ba2ae6024e13c3398a087c2cdea \ - --hash=sha256:7211ef110a9194b6042449431e08c4d80c0481e5891e58d429df5899690511c2 \ - --hash=sha256:764226ceef3125e53ea2cb275000e309c0aa5464d43bd72abd661e27fffc26ab \ - --hash=sha256:7645a8e814a3ee34a89c4a372011dcd817964ce8cb273c8ed6119d706e9613e3 \ - --hash=sha256:76d4711f6f6d08551a7e9ef28c722f4a50dd0fc204c56b4bcd95c6cc05ce6fbb \ - --hash=sha256:7f4f399d28478d1f604c2ff9119907af9726aed73680e5ed1ca634d377abb087 \ - --hash=sha256:88f7bc0fcca81f985f78dd0fa68d2c75abf8272b1f5c323ea4a01a4d7a614efc \ - --hash=sha256:8d0edd6b1c7fb94922bf569c9b092ee187a83f03fb1a63076e7774b60f9481a8 \ - --hash=sha256:901289d524fdd571be1c7be054f48b1f88ce8dddcbdf1ec698b27d4b8b9e5d62 \ - --hash=sha256:93ea75c050c5bb3d98016b4ba2497851eadf0ac154d88a67d7a6816206f6fa7f \ - --hash=sha256:981b9ab5a0a3178ff413bca62526bb784249421c24ad7381e39d67981be2c326 \ - --hash=sha256:9ac08e601308e41eb533f232dbf6b7e4cea762f9f84f6357136eed926c15d12c \ - --hash=sha256:a02eb8ab2b8f200179b5f62b59757685ae9987996ae549ccf30f983f40602431 \ - --hash=sha256:a0c6da9aee33ff0b1a451e867da0c1f47408112b3391dd43133838339e410963 \ - --hash=sha256:a6c8097e01886188e5be3e6b14e94ab365f384736aa1fca6a0b9e35bd4a30bc7 \ - --hash=sha256:aa384489fefeb62321b238e64c07ef48398fe80f9e1e6afeff22e140e0850eef \ - --hash=sha256:ad2a9eb6d9839ae241701d0918f54c51365a51407fd80f6b8289e2dfca977cc3 \ - --hash=sha256:b206646d176a007466358aa21d85cd8600a415c67c9bd15403336c331a10d956 \ - --hash=sha256:b826d97e4276750beca7c8f0f1a4938892697a6bcd8ec8217b3312dad6982781 \ - --hash=sha256:b89ac9768b82205936771f8d2eb3ce88503b1556324c9f903e7156669f521472 \ - --hash=sha256:bd7bd3b3830247580de99c99ea2a01416dfc3c34471ca1298bccabf86d0ff4dc \ - --hash=sha256:bdf1847068c362f16b353163391210269e4f0569a3c166bc6a9f74ccbfc7e839 \ - --hash=sha256:c11b0746f5d946fecf750428a95f3e9ebe792c1ee3b1e96eeba145dc631a9672 \ - --hash=sha256:c5374b80521d3d3f2ec5572e05adc94601985cc526fb276d0c8574a6d749f1b3 \ - --hash=sha256:ca265542ca427bf97aed183c1676e2a9c66942e822b14dc6e5f42e038f92a503 \ - --hash=sha256:ce31ae3e19f3c902de379cf1323d90c649425b86de7bbdf82871b8a2a0615f3d \ - --hash=sha256:ceb6ec0a10c65540421e20ebd29083c50e6d1143278746a4ef6bcf6153171eb8 \ - --hash=sha256:d081f13b095d74b67d550de04df1c756831f3b83dc9881c38985834387487f1b \ - --hash=sha256:d5655a942f5f5d2c9ed93d72148226d75369b4f6952680211972a33e59b1dfdc \ - --hash=sha256:d5a32087d720c608f42caed0ef36d2b3ea61a9d09ee59a5142d6070da9041b8f \ - --hash=sha256:d6484756b12f40003c6128bfcc3fa9f0d49a687e171186c2d85ec82e3758c559 \ - --hash=sha256:dd65632acaf0d47608190a71bfe46b209719bf2beb59507db08ccdbe712f969b \ - --hash=sha256:de343e75f40e972bae1ef6090267f8260c1446a1695e77096db6cfa25e759a95 \ - --hash=sha256:e29cda763f752553fa14c68fb2195150bfab22b352572cb36c43c47bedba70eb \ - --hash=sha256:e41f3de4df3e80de75845d3e743b3f1c4c8613c3997a912dbf0229fc61a8b963 \ - --hash=sha256:e66d2a64d44d50d2543405fb183a21f76b3b5fd16f130f5c99187c3fb4e64919 \ - --hash=sha256:e74b0506fa5aa5598ac6a975a12aa8928cbb58e1f5ac8360792ef15de1aa848f \ - --hash=sha256:f0ed05f5079c708fe74bf9027e95125334b6978bf07fd5ab923e9e55e5fbb9d3 \ - --hash=sha256:f61e2dc5ad442c52b4887f1fdc112f97caeff4d9e6ebe78879364ac59f1663e1 \ - --hash=sha256:fec520865f42e5c7f050c2a79038897b1c7d1595e907a9e08e3353293ffc948e +frozenlist==1.4.1 \ + --hash=sha256:04ced3e6a46b4cfffe20f9ae482818e34eba9b5fb0ce4056e4cc9b6e212d09b7 \ + --hash=sha256:0633c8d5337cb5c77acbccc6357ac49a1770b8c487e5b3505c57b949b4b82e98 \ + --hash=sha256:068b63f23b17df8569b7fdca5517edef76171cf3897eb68beb01341131fbd2ad \ + --hash=sha256:0c250a29735d4f15321007fb02865f0e6b6a41a6b88f1f523ca1596ab5f50bd5 \ + --hash=sha256:1979bc0aeb89b33b588c51c54ab0161791149f2461ea7c7c946d95d5f93b56ae \ + --hash=sha256:1a4471094e146b6790f61b98616ab8e44f72661879cc63fa1049d13ef711e71e \ + --hash=sha256:1b280e6507ea8a4fa0c0a7150b4e526a8d113989e28eaaef946cc77ffd7efc0a \ + --hash=sha256:1d0ce09d36d53bbbe566fe296965b23b961764c0bcf3ce2fa45f463745c04701 \ + --hash=sha256:20b51fa3f588ff2fe658663db52a41a4f7aa6c04f6201449c6c7c476bd255c0d \ + --hash=sha256:23b2d7679b73fe0e5a4560b672a39f98dfc6f60df63823b0a9970525325b95f6 \ + --hash=sha256:23b701e65c7b36e4bf15546a89279bd4d8675faabc287d06bbcfac7d3c33e1e6 \ + --hash=sha256:2471c201b70d58a0f0c1f91261542a03d9a5e088ed3dc6c160d614c01649c106 \ + --hash=sha256:27657df69e8801be6c3638054e202a135c7f299267f1a55ed3a598934f6c0d75 \ + --hash=sha256:29acab3f66f0f24674b7dc4736477bcd4bc3ad4b896f5f45379a67bce8b96868 \ + --hash=sha256:32453c1de775c889eb4e22f1197fe3bdfe457d16476ea407472b9442e6295f7a \ + --hash=sha256:3a670dc61eb0d0eb7080890c13de3066790f9049b47b0de04007090807c776b0 \ + --hash=sha256:3e0153a805a98f5ada7e09826255ba99fb4f7524bb81bf6b47fb702666484ae1 \ + --hash=sha256:410478a0c562d1a5bcc2f7ea448359fcb050ed48b3c6f6f4f18c313a9bdb1826 \ + --hash=sha256:442acde1e068288a4ba7acfe05f5f343e19fac87bfc96d89eb886b0363e977ec \ + --hash=sha256:48f6a4533887e189dae092f1cf981f2e3885175f7a0f33c91fb5b7b682b6bab6 \ + --hash=sha256:4f57dab5fe3407b6c0c1cc907ac98e8a189f9e418f3b6e54d65a718aaafe3950 \ + --hash=sha256:4f9c515e7914626b2a2e1e311794b4c35720a0be87af52b79ff8e1429fc25f19 \ + --hash=sha256:55fdc093b5a3cb41d420884cdaf37a1e74c3c37a31f46e66286d9145d2063bd0 \ + --hash=sha256:5667ed53d68d91920defdf4035d1cdaa3c3121dc0b113255124bcfada1cfa1b8 \ + --hash=sha256:590344787a90ae57d62511dd7c736ed56b428f04cd8c161fcc5e7232c130c69a \ + --hash=sha256:5a7d70357e7cee13f470c7883a063aae5fe209a493c57d86eb7f5a6f910fae09 \ + --hash=sha256:5c3894db91f5a489fc8fa6a9991820f368f0b3cbdb9cd8849547ccfab3392d86 \ + --hash=sha256:5c849d495bf5154cd8da18a9eb15db127d4dba2968d88831aff6f0331ea9bd4c \ + --hash=sha256:64536573d0a2cb6e625cf309984e2d873979709f2cf22839bf2d61790b448ad5 \ + --hash=sha256:693945278a31f2086d9bf3df0fe8254bbeaef1fe71e1351c3bd730aa7d31c41b \ + --hash=sha256:6db4667b187a6742b33afbbaf05a7bc551ffcf1ced0000a571aedbb4aa42fc7b \ + --hash=sha256:6eb73fa5426ea69ee0e012fb59cdc76a15b1283d6e32e4f8dc4482ec67d1194d \ + --hash=sha256:722e1124aec435320ae01ee3ac7bec11a5d47f25d0ed6328f2273d287bc3abb0 \ + --hash=sha256:7268252af60904bf52c26173cbadc3a071cece75f873705419c8681f24d3edea \ + --hash=sha256:74fb4bee6880b529a0c6560885fce4dc95936920f9f20f53d99a213f7bf66776 \ + --hash=sha256:780d3a35680ced9ce682fbcf4cb9c2bad3136eeff760ab33707b71db84664e3a \ + --hash=sha256:82e8211d69a4f4bc360ea22cd6555f8e61a1bd211d1d5d39d3d228b48c83a897 \ + --hash=sha256:89aa2c2eeb20957be2d950b85974b30a01a762f3308cd02bb15e1ad632e22dc7 \ + --hash=sha256:8aefbba5f69d42246543407ed2461db31006b0f76c4e32dfd6f42215a2c41d09 \ + --hash=sha256:96ec70beabbd3b10e8bfe52616a13561e58fe84c0101dd031dc78f250d5128b9 \ + --hash=sha256:9750cc7fe1ae3b1611bb8cfc3f9ec11d532244235d75901fb6b8e42ce9229dfe \ + --hash=sha256:9acbb16f06fe7f52f441bb6f413ebae6c37baa6ef9edd49cdd567216da8600cd \ + --hash=sha256:9d3e0c25a2350080e9319724dede4f31f43a6c9779be48021a7f4ebde8b2d742 \ + --hash=sha256:a06339f38e9ed3a64e4c4e43aec7f59084033647f908e4259d279a52d3757d09 \ + --hash=sha256:a0cb6f11204443f27a1628b0e460f37fb30f624be6051d490fa7d7e26d4af3d0 \ + --hash=sha256:a7496bfe1da7fb1a4e1cc23bb67c58fab69311cc7d32b5a99c2007b4b2a0e932 \ + --hash=sha256:a828c57f00f729620a442881cc60e57cfcec6842ba38e1b19fd3e47ac0ff8dc1 \ + --hash=sha256:a9b2de4cf0cdd5bd2dee4c4f63a653c61d2408055ab77b151c1957f221cabf2a \ + --hash=sha256:b46c8ae3a8f1f41a0d2ef350c0b6e65822d80772fe46b653ab6b6274f61d4a49 \ + --hash=sha256:b7e3ed87d4138356775346e6845cccbe66cd9e207f3cd11d2f0b9fd13681359d \ + --hash=sha256:b7f2f9f912dca3934c1baec2e4585a674ef16fe00218d833856408c48d5beee7 \ + --hash=sha256:ba60bb19387e13597fb059f32cd4d59445d7b18b69a745b8f8e5db0346f33480 \ + --hash=sha256:beee944ae828747fd7cb216a70f120767fc9f4f00bacae8543c14a6831673f89 \ + --hash=sha256:bfa4a17e17ce9abf47a74ae02f32d014c5e9404b6d9ac7f729e01562bbee601e \ + --hash=sha256:c037a86e8513059a2613aaba4d817bb90b9d9b6b69aace3ce9c877e8c8ed402b \ + --hash=sha256:c302220494f5c1ebeb0912ea782bcd5e2f8308037b3c7553fad0e48ebad6ad82 \ + --hash=sha256:c6321c9efe29975232da3bd0af0ad216800a47e93d763ce64f291917a381b8eb \ + --hash=sha256:c757a9dd70d72b076d6f68efdbb9bc943665ae954dad2801b874c8c69e185068 \ + --hash=sha256:c99169d4ff810155ca50b4da3b075cbde79752443117d89429595c2e8e37fed8 \ + --hash=sha256:c9c92be9fd329ac801cc420e08452b70e7aeab94ea4233a4804f0915c14eba9b \ + --hash=sha256:cc7b01b3754ea68a62bd77ce6020afaffb44a590c2289089289363472d13aedb \ + --hash=sha256:db9e724bebd621d9beca794f2a4ff1d26eed5965b004a97f1f1685a173b869c2 \ + --hash=sha256:dca69045298ce5c11fd539682cff879cc1e664c245d1c64da929813e54241d11 \ + --hash=sha256:dd9b1baec094d91bf36ec729445f7769d0d0cf6b64d04d86e45baf89e2b9059b \ + --hash=sha256:e02a0e11cf6597299b9f3bbd3f93d79217cb90cfd1411aec33848b13f5c656cc \ + --hash=sha256:e6a20a581f9ce92d389a8c7d7c3dd47c81fd5d6e655c8dddf341e14aa48659d0 \ + --hash=sha256:e7004be74cbb7d9f34553a5ce5fb08be14fb33bc86f332fb71cbe5216362a497 \ + --hash=sha256:e774d53b1a477a67838a904131c4b0eef6b3d8a651f8b138b04f748fccfefe17 \ + --hash=sha256:edb678da49d9f72c9f6c609fbe41a5dfb9a9282f9e6a2253d5a91e0fc382d7c0 \ + --hash=sha256:f146e0911cb2f1da549fc58fc7bcd2b836a44b79ef871980d605ec392ff6b0d2 \ + --hash=sha256:f56e2333dda1fe0f909e7cc59f021eba0d2307bc6f012a1ccf2beca6ba362439 \ + --hash=sha256:f9a3ea26252bd92f570600098783d1371354d89d5f6b7dfd87359d669f2109b5 \ + --hash=sha256:f9aa1878d1083b276b0196f2dfbe00c9b7e752475ed3b682025ff20c1c1f51ac \ + --hash=sha256:fb3c2db03683b5767dedb5769b8a40ebb47d6f7f45b1b3e3b4b51ec8ad9d9825 \ + --hash=sha256:fbeb989b5cc29e8daf7f976b421c220f1b8c731cbf22b9130d8815418ea45887 \ + --hash=sha256:fde5bd59ab5357e3853313127f4d3565fc7dad314a74d7b5d43c22c6a5ed2ced \ + --hash=sha256:fe1a06da377e3a1062ae5fe0926e12b84eceb8a50b350ddca72dc85015873f74 # via # -c release/ray_release/byod/requirements_compiled.txt # aiohttp @@ -799,8 +819,9 @@ fsspec==2023.5.0 \ # gcsfs # petastorm # s3fs -future==0.18.3 \ - --hash=sha256:34a17436ed1e96697a86f9de3d15a3b0be01d8bc8de9c1dffd59fb8234ed5307 +future==1.0.0 \ + --hash=sha256:929292d34f5872e70396626ef385ec22355a1fae8ad29e1a734c3e43f9fbc216 \ + --hash=sha256:bd2968309307861edae1458a4f8a4f3598c03be43b97521076aebf5d94c07b05 # via # -c release/ray_release/byod/requirements_compiled.txt # petastorm @@ -1178,61 +1199,62 @@ greenlet==3.0.1 \ # via # -c release/ray_release/byod/requirements_compiled.txt # gevent -grpcio==1.60.0 ; sys_platform != "darwin" \ - --hash=sha256:073f959c6f570797272f4ee9464a9997eaf1e98c27cb680225b82b53390d61e6 \ - --hash=sha256:0fd3b3968ffe7643144580f260f04d39d869fcc2cddb745deef078b09fd2b328 \ - --hash=sha256:1434ca77d6fed4ea312901122dc8da6c4389738bf5788f43efb19a838ac03ead \ - --hash=sha256:1c30bb23a41df95109db130a6cc1b974844300ae2e5d68dd4947aacba5985aa5 \ - --hash=sha256:20e7a4f7ded59097c84059d28230907cd97130fa74f4a8bfd1d8e5ba18c81491 \ - --hash=sha256:2199165a1affb666aa24adf0c97436686d0a61bc5fc113c037701fb7c7fceb96 \ - --hash=sha256:297eef542156d6b15174a1231c2493ea9ea54af8d016b8ca7d5d9cc65cfcc444 \ - --hash=sha256:2aef56e85901c2397bd557c5ba514f84de1f0ae5dd132f5d5fed042858115951 \ - --hash=sha256:30943b9530fe3620e3b195c03130396cd0ee3a0d10a66c1bee715d1819001eaf \ - --hash=sha256:3b36a2c6d4920ba88fa98075fdd58ff94ebeb8acc1215ae07d01a418af4c0253 \ - --hash=sha256:428d699c8553c27e98f4d29fdc0f0edc50e9a8a7590bfd294d2edb0da7be3629 \ - --hash=sha256:43e636dc2ce9ece583b3e2ca41df5c983f4302eabc6d5f9cd04f0562ee8ec1ae \ - --hash=sha256:452ca5b4afed30e7274445dd9b441a35ece656ec1600b77fff8c216fdf07df43 \ - --hash=sha256:467a7d31554892eed2aa6c2d47ded1079fc40ea0b9601d9f79204afa8902274b \ - --hash=sha256:4b44d7e39964e808b071714666a812049765b26b3ea48c4434a3b317bac82f14 \ - --hash=sha256:4c86343cf9ff7b2514dd229bdd88ebba760bd8973dac192ae687ff75e39ebfab \ - --hash=sha256:5208a57eae445ae84a219dfd8b56e04313445d146873117b5fa75f3245bc1390 \ - --hash=sha256:5ff21e000ff2f658430bde5288cb1ac440ff15c0d7d18b5fb222f941b46cb0d2 \ - --hash=sha256:675997222f2e2f22928fbba640824aebd43791116034f62006e19730715166c0 \ - --hash=sha256:676e4a44e740deaba0f4d95ba1d8c5c89a2fcc43d02c39f69450b1fa19d39590 \ - --hash=sha256:6e306b97966369b889985a562ede9d99180def39ad42c8014628dd3cc343f508 \ - --hash=sha256:6fd9584bf1bccdfff1512719316efa77be235469e1e3295dce64538c4773840b \ - --hash=sha256:705a68a973c4c76db5d369ed573fec3367d7d196673fa86614b33d8c8e9ebb08 \ - --hash=sha256:74d7d9fa97809c5b892449b28a65ec2bfa458a4735ddad46074f9f7d9550ad13 \ - --hash=sha256:77c8a317f0fd5a0a2be8ed5cbe5341537d5c00bb79b3bb27ba7c5378ba77dbca \ - --hash=sha256:79a050889eb8d57a93ed21d9585bb63fca881666fc709f5d9f7f9372f5e7fd03 \ - --hash=sha256:7db16dd4ea1b05ada504f08d0dca1cd9b926bed3770f50e715d087c6f00ad748 \ - --hash=sha256:83f2292ae292ed5a47cdcb9821039ca8e88902923198f2193f13959360c01860 \ - --hash=sha256:87c9224acba0ad8bacddf427a1c2772e17ce50b3042a789547af27099c5f751d \ - --hash=sha256:8a97a681e82bc11a42d4372fe57898d270a2707f36c45c6676e49ce0d5c41353 \ - --hash=sha256:9073513ec380434eb8d21970e1ab3161041de121f4018bbed3146839451a6d8e \ - --hash=sha256:90bdd76b3f04bdb21de5398b8a7c629676c81dfac290f5f19883857e9371d28c \ - --hash=sha256:91229d7203f1ef0ab420c9b53fe2ca5c1fbeb34f69b3bc1b5089466237a4a134 \ - --hash=sha256:92f88ca1b956eb8427a11bb8b4a0c0b2b03377235fc5102cb05e533b8693a415 \ - --hash=sha256:95ae3e8e2c1b9bf671817f86f155c5da7d49a2289c5cf27a319458c3e025c320 \ - --hash=sha256:9e30be89a75ee66aec7f9e60086fadb37ff8c0ba49a022887c28c134341f7179 \ - --hash=sha256:a48edde788b99214613e440fce495bbe2b1e142a7f214cce9e0832146c41e324 \ - --hash=sha256:a7152fa6e597c20cb97923407cf0934e14224af42c2b8d915f48bc3ad2d9ac18 \ - --hash=sha256:a9c7b71211f066908e518a2ef7a5e211670761651039f0d6a80d8d40054047df \ - --hash=sha256:b0571a5aef36ba9177e262dc88a9240c866d903a62799e44fd4aae3f9a2ec17e \ - --hash=sha256:b0fb2d4801546598ac5cd18e3ec79c1a9af8b8f2a86283c55a5337c5aeca4b1b \ - --hash=sha256:b10241250cb77657ab315270b064a6c7f1add58af94befa20687e7c8d8603ae6 \ - --hash=sha256:b87efe4a380887425bb15f220079aa8336276398dc33fce38c64d278164f963d \ - --hash=sha256:b98f43fcdb16172dec5f4b49f2fece4b16a99fd284d81c6bbac1b3b69fcbe0ff \ - --hash=sha256:c193109ca4070cdcaa6eff00fdb5a56233dc7610216d58fb81638f89f02e4968 \ - --hash=sha256:c826f93050c73e7769806f92e601e0efdb83ec8d7c76ddf45d514fee54e8e619 \ - --hash=sha256:d020cfa595d1f8f5c6b343530cd3ca16ae5aefdd1e832b777f9f0eb105f5b139 \ - --hash=sha256:d6a478581b1a1a8fdf3318ecb5f4d0cda41cacdffe2b527c23707c9c1b8fdb55 \ - --hash=sha256:de2ad69c9a094bf37c1102b5744c9aec6cf74d2b635558b779085d0263166454 \ - --hash=sha256:e278eafb406f7e1b1b637c2cf51d3ad45883bb5bd1ca56bc05e4fc135dfdaa65 \ - --hash=sha256:e381fe0c2aa6c03b056ad8f52f8efca7be29fb4d9ae2f8873520843b6039612a \ - --hash=sha256:e61e76020e0c332a98290323ecfec721c9544f5b739fab925b6e8cbe1944cf19 \ - --hash=sha256:f897c3b127532e6befdcf961c415c97f320d45614daf84deba0a54e64ea2457b \ - --hash=sha256:fb464479934778d7cc5baf463d959d361954d6533ad34c3a4f1d267e86ee25fd +grpcio==1.66.2 ; sys_platform != "darwin" \ + --hash=sha256:02697eb4a5cbe5a9639f57323b4c37bcb3ab2d48cec5da3dc2f13334d72790dd \ + --hash=sha256:03b0b307ba26fae695e067b94cbb014e27390f8bc5ac7a3a39b7723fed085604 \ + --hash=sha256:05bc2ceadc2529ab0b227b1310d249d95d9001cd106aa4d31e8871ad3c428d73 \ + --hash=sha256:06de8ec0bd71be123eec15b0e0d457474931c2c407869b6c349bd9bed4adbac3 \ + --hash=sha256:0be4e0490c28da5377283861bed2941d1d20ec017ca397a5df4394d1c31a9b50 \ + --hash=sha256:12fda97ffae55e6526825daf25ad0fa37483685952b5d0f910d6405c87e3adb6 \ + --hash=sha256:1caa38fb22a8578ab8393da99d4b8641e3a80abc8fd52646f1ecc92bcb8dee34 \ + --hash=sha256:2018b053aa15782db2541ca01a7edb56a0bf18c77efed975392583725974b249 \ + --hash=sha256:20657d6b8cfed7db5e11b62ff7dfe2e12064ea78e93f1434d61888834bc86d75 \ + --hash=sha256:2335c58560a9e92ac58ff2bc5649952f9b37d0735608242973c7a8b94a6437d8 \ + --hash=sha256:31fd163105464797a72d901a06472860845ac157389e10f12631025b3e4d0453 \ + --hash=sha256:38b68498ff579a3b1ee8f93a05eb48dc2595795f2f62716e797dc24774c1aaa8 \ + --hash=sha256:3b00efc473b20d8bf83e0e1ae661b98951ca56111feb9b9611df8efc4fe5d55d \ + --hash=sha256:3ed71e81782966ffead60268bbda31ea3f725ebf8aa73634d5dda44f2cf3fb9c \ + --hash=sha256:45a3d462826f4868b442a6b8fdbe8b87b45eb4f5b5308168c156b21eca43f61c \ + --hash=sha256:49f0ca7ae850f59f828a723a9064cadbed90f1ece179d375966546499b8a2c9c \ + --hash=sha256:4e504572433f4e72b12394977679161d495c4c9581ba34a88d843eaf0f2fbd39 \ + --hash=sha256:4ea1d062c9230278793820146c95d038dc0f468cbdd172eec3363e42ff1c7d01 \ + --hash=sha256:563588c587b75c34b928bc428548e5b00ea38c46972181a4d8b75ba7e3f24231 \ + --hash=sha256:6001e575b8bbd89eee11960bb640b6da6ae110cf08113a075f1e2051cc596cae \ + --hash=sha256:66a0cd8ba6512b401d7ed46bb03f4ee455839957f28b8d61e7708056a806ba6a \ + --hash=sha256:6851de821249340bdb100df5eacfecfc4e6075fa85c6df7ee0eb213170ec8e5d \ + --hash=sha256:728bdf36a186e7f51da73be7f8d09457a03061be848718d0edf000e709418987 \ + --hash=sha256:73e3b425c1e155730273f73e419de3074aa5c5e936771ee0e4af0814631fb30a \ + --hash=sha256:73fc8f8b9b5c4a03e802b3cd0c18b2b06b410d3c1dcbef989fdeb943bd44aff7 \ + --hash=sha256:78fa51ebc2d9242c0fc5db0feecc57a9943303b46664ad89921f5079e2e4ada7 \ + --hash=sha256:7b2c86457145ce14c38e5bf6bdc19ef88e66c5fee2c3d83285c5aef026ba93b3 \ + --hash=sha256:7d69ce1f324dc2d71e40c9261d3fdbe7d4c9d60f332069ff9b2a4d8a257c7b2b \ + --hash=sha256:802d84fd3d50614170649853d121baaaa305de7b65b3e01759247e768d691ddf \ + --hash=sha256:80fd702ba7e432994df208f27514280b4b5c6843e12a48759c9255679ad38db8 \ + --hash=sha256:8ac475e8da31484efa25abb774674d837b343afb78bb3bcdef10f81a93e3d6bf \ + --hash=sha256:950da58d7d80abd0ea68757769c9db0a95b31163e53e5bb60438d263f4bed7b7 \ + --hash=sha256:99a641995a6bc4287a6315989ee591ff58507aa1cbe4c2e70d88411c4dcc0839 \ + --hash=sha256:9c3a99c519f4638e700e9e3f83952e27e2ea10873eecd7935823dab0c1c9250e \ + --hash=sha256:9c509a4f78114cbc5f0740eb3d7a74985fd2eff022971bc9bc31f8bc93e66a3b \ + --hash=sha256:a18e20d8321c6400185b4263e27982488cb5cdd62da69147087a76a24ef4e7e3 \ + --hash=sha256:a917d26e0fe980b0ac7bfcc1a3c4ad6a9a4612c911d33efb55ed7833c749b0ee \ + --hash=sha256:a9539f01cb04950fd4b5ab458e64a15f84c2acc273670072abe49a3f29bbad54 \ + --hash=sha256:ad2efdbe90c73b0434cbe64ed372e12414ad03c06262279b104a029d1889d13e \ + --hash=sha256:b672abf90a964bfde2d0ecbce30f2329a47498ba75ce6f4da35a2f4532b7acbc \ + --hash=sha256:bbd27c24a4cc5e195a7f56cfd9312e366d5d61b86e36d46bbe538457ea6eb8dd \ + --hash=sha256:c400ba5675b67025c8a9f48aa846f12a39cf0c44df5cd060e23fda5b30e9359d \ + --hash=sha256:c408f5ef75cfffa113cacd8b0c0e3611cbfd47701ca3cdc090594109b9fcbaed \ + --hash=sha256:c806852deaedee9ce8280fe98955c9103f62912a5b2d5ee7e3eaa284a6d8d8e7 \ + --hash=sha256:ce89f5876662f146d4c1f695dda29d4433a5d01c8681fbd2539afff535da14d4 \ + --hash=sha256:d25a14af966438cddf498b2e338f88d1c9706f3493b1d73b93f695c99c5f0e2a \ + --hash=sha256:d8d4732cc5052e92cea2f78b233c2e2a52998ac40cd651f40e398893ad0d06ec \ + --hash=sha256:d9a9724a156c8ec6a379869b23ba3323b7ea3600851c91489b871e375f710bc8 \ + --hash=sha256:e636ce23273683b00410f1971d209bf3689238cf5538d960adc3cdfe80dd0dbd \ + --hash=sha256:e88264caad6d8d00e7913996030bac8ad5f26b7411495848cc218bd3a9040b6c \ + --hash=sha256:f145cc21836c332c67baa6fc81099d1d27e266401565bf481948010d6ea32d46 \ + --hash=sha256:fb57870449dfcfac428afbb5a877829fcb0d6db9d9baa1148705739e9083880e \ + --hash=sha256:fb70487c95786e345af5e854ffec8cb8cc781bcc5df7930c4fbb7feaa72e1cdf \ + --hash=sha256:fe96281713168a3270878255983d2cb1a97e034325c8c2c25169a69289d3ecfa \ + --hash=sha256:ff1f7882e56c40b0d33c4922c15dfa30612f05fb785074a012f7cda74d1c3679 # via # -c release/ray_release/byod/requirements_compiled.txt # tensorboard @@ -1242,17 +1264,12 @@ gsutil==5.27 \ # via # -c release/ray_release/byod/requirements_compiled.txt # -r release/ray_release/byod/requirements_byod_3.9.in -gym[atari]==0.26.2 \ - --hash=sha256:e0d882f4b54f0c65f203104c24ab8a38b039f1289986803c7d02cdbe214fbcc4 +gymnasium==1.0.0 \ + --hash=sha256:9d2b66f30c1b34fe3c2ce7fae65ecf365d0e9982d2b3d860235e773328a3b403 \ + --hash=sha256:b6f40e1e24c5bd419361e1a5b86a9117d2499baecc3a660d44dfff4c465393ad # via # -c release/ray_release/byod/requirements_compiled.txt # -r release/ray_release/byod/requirements_byod_3.9.in -gym-notices==0.0.8 \ - --hash=sha256:ad25e200487cafa369728625fe064e88ada1346618526102659b4640f2b4b911 \ - --hash=sha256:e5f82e00823a166747b4c2a07de63b6560b1acb880638547e0cabf825a01e463 - # via - # -c release/ray_release/byod/requirements_compiled.txt - # gym h5py==3.10.0 \ --hash=sha256:012ab448590e3c4f5a8dd0f3533255bc57f80629bf7c5054cf4c87b30085063c \ --hash=sha256:212bb997a91e6a895ce5e2f365ba764debeaef5d2dca5c6fb7098d66607adf99 \ @@ -1307,14 +1324,8 @@ importlib-metadata==6.11.0 \ # -r release/ray_release/byod/requirements_byod_3.9.in # ale-py # flask - # gym + # gymnasium # markdown -importlib-resources==5.13.0 \ - --hash=sha256:82d5c6cca930697dbbd86c93333bb2c2e72861d4789a11c2662b933e5ad2b528 \ - --hash=sha256:9f7bd0c97b79972a6cce36a366356d16d5e13b09679c11a58f1014bfdf8e64b2 - # via - # -c release/ray_release/byod/requirements_compiled.txt - # ale-py iniconfig==2.0.0 \ --hash=sha256:2d91e135bf72d31a410b17c16da610a82cb55f6b0477d1a902134b24a455b8b3 \ --hash=sha256:b6a85871a79d2e3b22d2d1b94ac2824226a63c6b741c88f7ae975f18b6778374 @@ -1403,6 +1414,12 @@ markdown==3.5.1 \ # via # -c release/ray_release/byod/requirements_compiled.txt # tensorboard +markdown-it-py==2.2.0 \ + --hash=sha256:5a35f8d1870171d9acc47b99612dc146129b631baf04970128b568f190d0cc30 \ + --hash=sha256:7c9a5e412688bc771c67432cbfebcdd686c93ce6484913dccf06cb5a0bea35a1 + # via + # -c release/ray_release/byod/requirements_compiled.txt + # rich markupsafe==2.1.3 \ --hash=sha256:05fb21170423db021895e1ea1e1f3ab3adb85d1c2333cbc2310f2a26bc77272e \ --hash=sha256:0a4e4a1aff6c7ac4cd55792abf96c915634c2b97e3cc1c7129578aa68ebd754e \ @@ -1468,6 +1485,12 @@ markupsafe==2.1.3 \ # -c release/ray_release/byod/requirements_compiled.txt # jinja2 # werkzeug +mdurl==0.1.2 \ + --hash=sha256:84008a41e51615a49fc9966191ff91509e3c40b939176e643fd50a5c2196b8f8 \ + --hash=sha256:bb413d29f5eea38f31dd4754dd7377d4465116fb207585f97bf925588687c1ba + # via + # -c release/ray_release/byod/requirements_compiled.txt + # markdown-it-py memray==1.10.0 ; platform_system != "Windows" and sys_platform != "darwin" and platform_machine != "aarch64" \ --hash=sha256:0a21745fb516b7a6efcd40aa7487c59e9313fcfc782d0193fcfcf00b48426874 \ --hash=sha256:22f2a47871c172a0539bd72737bb6b294fc10c510464066b825d90fcd3bb4916 \ @@ -1595,81 +1618,97 @@ msgpack==1.0.7 \ # -c release/ray_release/byod/requirements_compiled.txt # distributed # locust -multidict==6.0.4 \ - --hash=sha256:01a3a55bd90018c9c080fbb0b9f4891db37d148a0a18722b42f94694f8b6d4c9 \ - --hash=sha256:0b1a97283e0c85772d613878028fec909f003993e1007eafa715b24b377cb9b8 \ - --hash=sha256:0dfad7a5a1e39c53ed00d2dd0c2e36aed4650936dc18fd9a1826a5ae1cad6f03 \ - --hash=sha256:11bdf3f5e1518b24530b8241529d2050014c884cf18b6fc69c0c2b30ca248710 \ - --hash=sha256:1502e24330eb681bdaa3eb70d6358e818e8e8f908a22a1851dfd4e15bc2f8161 \ - --hash=sha256:16ab77bbeb596e14212e7bab8429f24c1579234a3a462105cda4a66904998664 \ - --hash=sha256:16d232d4e5396c2efbbf4f6d4df89bfa905eb0d4dc5b3549d872ab898451f569 \ - --hash=sha256:21a12c4eb6ddc9952c415f24eef97e3e55ba3af61f67c7bc388dcdec1404a067 \ - --hash=sha256:27c523fbfbdfd19c6867af7346332b62b586eed663887392cff78d614f9ec313 \ - --hash=sha256:281af09f488903fde97923c7744bb001a9b23b039a909460d0f14edc7bf59706 \ - --hash=sha256:33029f5734336aa0d4c0384525da0387ef89148dc7191aae00ca5fb23d7aafc2 \ - --hash=sha256:3601a3cece3819534b11d4efc1eb76047488fddd0c85a3948099d5da4d504636 \ - --hash=sha256:3666906492efb76453c0e7b97f2cf459b0682e7402c0489a95484965dbc1da49 \ - --hash=sha256:36c63aaa167f6c6b04ef2c85704e93af16c11d20de1d133e39de6a0e84582a93 \ - --hash=sha256:39ff62e7d0f26c248b15e364517a72932a611a9b75f35b45be078d81bdb86603 \ - --hash=sha256:43644e38f42e3af682690876cff722d301ac585c5b9e1eacc013b7a3f7b696a0 \ - --hash=sha256:4372381634485bec7e46718edc71528024fcdc6f835baefe517b34a33c731d60 \ - --hash=sha256:458f37be2d9e4c95e2d8866a851663cbc76e865b78395090786f6cd9b3bbf4f4 \ - --hash=sha256:45e1ecb0379bfaab5eef059f50115b54571acfbe422a14f668fc8c27ba410e7e \ - --hash=sha256:4b9d9e4e2b37daddb5c23ea33a3417901fa7c7b3dee2d855f63ee67a0b21e5b1 \ - --hash=sha256:4ceef517eca3e03c1cceb22030a3e39cb399ac86bff4e426d4fc6ae49052cc60 \ - --hash=sha256:4d1a3d7ef5e96b1c9e92f973e43aa5e5b96c659c9bc3124acbbd81b0b9c8a951 \ - --hash=sha256:4dcbb0906e38440fa3e325df2359ac6cb043df8e58c965bb45f4e406ecb162cc \ - --hash=sha256:509eac6cf09c794aa27bcacfd4d62c885cce62bef7b2c3e8b2e49d365b5003fe \ - --hash=sha256:52509b5be062d9eafc8170e53026fbc54cf3b32759a23d07fd935fb04fc22d95 \ - --hash=sha256:52f2dffc8acaba9a2f27174c41c9e57f60b907bb9f096b36b1a1f3be71c6284d \ - --hash=sha256:574b7eae1ab267e5f8285f0fe881f17efe4b98c39a40858247720935b893bba8 \ - --hash=sha256:5979b5632c3e3534e42ca6ff856bb24b2e3071b37861c2c727ce220d80eee9ed \ - --hash=sha256:59d43b61c59d82f2effb39a93c48b845efe23a3852d201ed2d24ba830d0b4cf2 \ - --hash=sha256:5a4dcf02b908c3b8b17a45fb0f15b695bf117a67b76b7ad18b73cf8e92608775 \ - --hash=sha256:5cad9430ab3e2e4fa4a2ef4450f548768400a2ac635841bc2a56a2052cdbeb87 \ - --hash=sha256:5fc1b16f586f049820c5c5b17bb4ee7583092fa0d1c4e28b5239181ff9532e0c \ - --hash=sha256:62501642008a8b9871ddfccbf83e4222cf8ac0d5aeedf73da36153ef2ec222d2 \ - --hash=sha256:64bdf1086b6043bf519869678f5f2757f473dee970d7abf6da91ec00acb9cb98 \ - --hash=sha256:64da238a09d6039e3bd39bb3aee9c21a5e34f28bfa5aa22518581f910ff94af3 \ - --hash=sha256:666daae833559deb2d609afa4490b85830ab0dfca811a98b70a205621a6109fe \ - --hash=sha256:67040058f37a2a51ed8ea8f6b0e6ee5bd78ca67f169ce6122f3e2ec80dfe9b78 \ - --hash=sha256:6748717bb10339c4760c1e63da040f5f29f5ed6e59d76daee30305894069a660 \ - --hash=sha256:6b181d8c23da913d4ff585afd1155a0e1194c0b50c54fcfe286f70cdaf2b7176 \ - --hash=sha256:6ed5f161328b7df384d71b07317f4d8656434e34591f20552c7bcef27b0ab88e \ - --hash=sha256:7582a1d1030e15422262de9f58711774e02fa80df0d1578995c76214f6954988 \ - --hash=sha256:7d18748f2d30f94f498e852c67d61261c643b349b9d2a581131725595c45ec6c \ - --hash=sha256:7d6ae9d593ef8641544d6263c7fa6408cc90370c8cb2bbb65f8d43e5b0351d9c \ - --hash=sha256:81a4f0b34bd92df3da93315c6a59034df95866014ac08535fc819f043bfd51f0 \ - --hash=sha256:8316a77808c501004802f9beebde51c9f857054a0c871bd6da8280e718444449 \ - --hash=sha256:853888594621e6604c978ce2a0444a1e6e70c8d253ab65ba11657659dcc9100f \ - --hash=sha256:99b76c052e9f1bc0721f7541e5e8c05db3941eb9ebe7b8553c625ef88d6eefde \ - --hash=sha256:a2e4369eb3d47d2034032a26c7a80fcb21a2cb22e1173d761a162f11e562caa5 \ - --hash=sha256:ab55edc2e84460694295f401215f4a58597f8f7c9466faec545093045476327d \ - --hash=sha256:af048912e045a2dc732847d33821a9d84ba553f5c5f028adbd364dd4765092ac \ - --hash=sha256:b1a2eeedcead3a41694130495593a559a668f382eee0727352b9a41e1c45759a \ - --hash=sha256:b1e8b901e607795ec06c9e42530788c45ac21ef3aaa11dbd0c69de543bfb79a9 \ - --hash=sha256:b41156839806aecb3641f3208c0dafd3ac7775b9c4c422d82ee2a45c34ba81ca \ - --hash=sha256:b692f419760c0e65d060959df05f2a531945af31fda0c8a3b3195d4efd06de11 \ - --hash=sha256:bc779e9e6f7fda81b3f9aa58e3a6091d49ad528b11ed19f6621408806204ad35 \ - --hash=sha256:bf6774e60d67a9efe02b3616fee22441d86fab4c6d335f9d2051d19d90a40063 \ - --hash=sha256:c048099e4c9e9d615545e2001d3d8a4380bd403e1a0578734e0d31703d1b0c0b \ - --hash=sha256:c5cb09abb18c1ea940fb99360ea0396f34d46566f157122c92dfa069d3e0e982 \ - --hash=sha256:cc8e1d0c705233c5dd0c5e6460fbad7827d5d36f310a0fadfd45cc3029762258 \ - --hash=sha256:d5e3fc56f88cc98ef8139255cf8cd63eb2c586531e43310ff859d6bb3a6b51f1 \ - --hash=sha256:d6aa0418fcc838522256761b3415822626f866758ee0bc6632c9486b179d0b52 \ - --hash=sha256:d6c254ba6e45d8e72739281ebc46ea5eb5f101234f3ce171f0e9f5cc86991480 \ - --hash=sha256:d6d635d5209b82a3492508cf5b365f3446afb65ae7ebd755e70e18f287b0adf7 \ - --hash=sha256:dcfe792765fab89c365123c81046ad4103fcabbc4f56d1c1997e6715e8015461 \ - --hash=sha256:ddd3915998d93fbcd2566ddf9cf62cdb35c9e093075f862935573d265cf8f65d \ - --hash=sha256:ddff9c4e225a63a5afab9dd15590432c22e8057e1a9a13d28ed128ecf047bbdc \ - --hash=sha256:e41b7e2b59679edfa309e8db64fdf22399eec4b0b24694e1b2104fb789207779 \ - --hash=sha256:e69924bfcdda39b722ef4d9aa762b2dd38e4632b3641b1d9a57ca9cd18f2f83a \ - --hash=sha256:ea20853c6dbbb53ed34cb4d080382169b6f4554d394015f1bef35e881bf83547 \ - --hash=sha256:ee2a1ece51b9b9e7752e742cfb661d2a29e7bcdba2d27e66e28a99f1890e4fa0 \ - --hash=sha256:eeb6dcc05e911516ae3d1f207d4b0520d07f54484c49dfc294d6e7d63b734171 \ - --hash=sha256:f70b98cd94886b49d91170ef23ec5c0e8ebb6f242d734ed7ed677b24d50c82cf \ - --hash=sha256:fc35cb4676846ef752816d5be2193a1e8367b4c1397b74a565a9d0389c433a1d \ - --hash=sha256:ff959bee35038c4624250473988b24f846cbeb2c6639de3602c073f10410ceba +multidict==6.0.5 \ + --hash=sha256:01265f5e40f5a17f8241d52656ed27192be03bfa8764d88e8220141d1e4b3556 \ + --hash=sha256:0275e35209c27a3f7951e1ce7aaf93ce0d163b28948444bec61dd7badc6d3f8c \ + --hash=sha256:04bde7a7b3de05732a4eb39c94574db1ec99abb56162d6c520ad26f83267de29 \ + --hash=sha256:04da1bb8c8dbadf2a18a452639771951c662c5ad03aefe4884775454be322c9b \ + --hash=sha256:09a892e4a9fb47331da06948690ae38eaa2426de97b4ccbfafbdcbe5c8f37ff8 \ + --hash=sha256:0d63c74e3d7ab26de115c49bffc92cc77ed23395303d496eae515d4204a625e7 \ + --hash=sha256:107c0cdefe028703fb5dafe640a409cb146d44a6ae201e55b35a4af8e95457dd \ + --hash=sha256:141b43360bfd3bdd75f15ed811850763555a251e38b2405967f8e25fb43f7d40 \ + --hash=sha256:14c2976aa9038c2629efa2c148022ed5eb4cb939e15ec7aace7ca932f48f9ba6 \ + --hash=sha256:19fe01cea168585ba0f678cad6f58133db2aa14eccaf22f88e4a6dccadfad8b3 \ + --hash=sha256:1d147090048129ce3c453f0292e7697d333db95e52616b3793922945804a433c \ + --hash=sha256:1d9ea7a7e779d7a3561aade7d596649fbecfa5c08a7674b11b423783217933f9 \ + --hash=sha256:215ed703caf15f578dca76ee6f6b21b7603791ae090fbf1ef9d865571039ade5 \ + --hash=sha256:21fd81c4ebdb4f214161be351eb5bcf385426bf023041da2fd9e60681f3cebae \ + --hash=sha256:220dd781e3f7af2c2c1053da9fa96d9cf3072ca58f057f4c5adaaa1cab8fc442 \ + --hash=sha256:228b644ae063c10e7f324ab1ab6b548bdf6f8b47f3ec234fef1093bc2735e5f9 \ + --hash=sha256:29bfeb0dff5cb5fdab2023a7a9947b3b4af63e9c47cae2a10ad58394b517fddc \ + --hash=sha256:2f4848aa3baa109e6ab81fe2006c77ed4d3cd1e0ac2c1fbddb7b1277c168788c \ + --hash=sha256:2faa5ae9376faba05f630d7e5e6be05be22913782b927b19d12b8145968a85ea \ + --hash=sha256:2ffc42c922dbfddb4a4c3b438eb056828719f07608af27d163191cb3e3aa6cc5 \ + --hash=sha256:37b15024f864916b4951adb95d3a80c9431299080341ab9544ed148091b53f50 \ + --hash=sha256:3cc2ad10255f903656017363cd59436f2111443a76f996584d1077e43ee51182 \ + --hash=sha256:3d25f19500588cbc47dc19081d78131c32637c25804df8414463ec908631e453 \ + --hash=sha256:403c0911cd5d5791605808b942c88a8155c2592e05332d2bf78f18697a5fa15e \ + --hash=sha256:411bf8515f3be9813d06004cac41ccf7d1cd46dfe233705933dd163b60e37600 \ + --hash=sha256:425bf820055005bfc8aa9a0b99ccb52cc2f4070153e34b701acc98d201693733 \ + --hash=sha256:435a0984199d81ca178b9ae2c26ec3d49692d20ee29bc4c11a2a8d4514c67eda \ + --hash=sha256:4a6a4f196f08c58c59e0b8ef8ec441d12aee4125a7d4f4fef000ccb22f8d7241 \ + --hash=sha256:4cc0ef8b962ac7a5e62b9e826bd0cd5040e7d401bc45a6835910ed699037a461 \ + --hash=sha256:51d035609b86722963404f711db441cf7134f1889107fb171a970c9701f92e1e \ + --hash=sha256:53689bb4e102200a4fafa9de9c7c3c212ab40a7ab2c8e474491914d2305f187e \ + --hash=sha256:55205d03e8a598cfc688c71ca8ea5f66447164efff8869517f175ea632c7cb7b \ + --hash=sha256:5c0631926c4f58e9a5ccce555ad7747d9a9f8b10619621f22f9635f069f6233e \ + --hash=sha256:5cb241881eefd96b46f89b1a056187ea8e9ba14ab88ba632e68d7a2ecb7aadf7 \ + --hash=sha256:60d698e8179a42ec85172d12f50b1668254628425a6bd611aba022257cac1386 \ + --hash=sha256:612d1156111ae11d14afaf3a0669ebf6c170dbb735e510a7438ffe2369a847fd \ + --hash=sha256:6214c5a5571802c33f80e6c84713b2c79e024995b9c5897f794b43e714daeec9 \ + --hash=sha256:6939c95381e003f54cd4c5516740faba40cf5ad3eeff460c3ad1d3e0ea2549bf \ + --hash=sha256:69db76c09796b313331bb7048229e3bee7928eb62bab5e071e9f7fcc4879caee \ + --hash=sha256:6bf7a982604375a8d49b6cc1b781c1747f243d91b81035a9b43a2126c04766f5 \ + --hash=sha256:766c8f7511df26d9f11cd3a8be623e59cca73d44643abab3f8c8c07620524e4a \ + --hash=sha256:76c0de87358b192de7ea9649beb392f107dcad9ad27276324c24c91774ca5271 \ + --hash=sha256:76f067f5121dcecf0d63a67f29080b26c43c71a98b10c701b0677e4a065fbd54 \ + --hash=sha256:7901c05ead4b3fb75113fb1dd33eb1253c6d3ee37ce93305acd9d38e0b5f21a4 \ + --hash=sha256:79660376075cfd4b2c80f295528aa6beb2058fd289f4c9252f986751a4cd0496 \ + --hash=sha256:79a6d2ba910adb2cbafc95dad936f8b9386e77c84c35bc0add315b856d7c3abb \ + --hash=sha256:7afcdd1fc07befad18ec4523a782cde4e93e0a2bf71239894b8d61ee578c1319 \ + --hash=sha256:7be7047bd08accdb7487737631d25735c9a04327911de89ff1b26b81745bd4e3 \ + --hash=sha256:7c6390cf87ff6234643428991b7359b5f59cc15155695deb4eda5c777d2b880f \ + --hash=sha256:7df704ca8cf4a073334e0427ae2345323613e4df18cc224f647f251e5e75a527 \ + --hash=sha256:85f67aed7bb647f93e7520633d8f51d3cbc6ab96957c71272b286b2f30dc70ed \ + --hash=sha256:896ebdcf62683551312c30e20614305f53125750803b614e9e6ce74a96232604 \ + --hash=sha256:92d16a3e275e38293623ebf639c471d3e03bb20b8ebb845237e0d3664914caef \ + --hash=sha256:99f60d34c048c5c2fabc766108c103612344c46e35d4ed9ae0673d33c8fb26e8 \ + --hash=sha256:9fe7b0653ba3d9d65cbe7698cca585bf0f8c83dbbcc710db9c90f478e175f2d5 \ + --hash=sha256:a3145cb08d8625b2d3fee1b2d596a8766352979c9bffe5d7833e0503d0f0b5e5 \ + --hash=sha256:aeaf541ddbad8311a87dd695ed9642401131ea39ad7bc8cf3ef3967fd093b626 \ + --hash=sha256:b55358304d7a73d7bdf5de62494aaf70bd33015831ffd98bc498b433dfe5b10c \ + --hash=sha256:b82cc8ace10ab5bd93235dfaab2021c70637005e1ac787031f4d1da63d493c1d \ + --hash=sha256:c0868d64af83169e4d4152ec612637a543f7a336e4a307b119e98042e852ad9c \ + --hash=sha256:c1c1496e73051918fcd4f58ff2e0f2f3066d1c76a0c6aeffd9b45d53243702cc \ + --hash=sha256:c9bf56195c6bbd293340ea82eafd0071cb3d450c703d2c93afb89f93b8386ccc \ + --hash=sha256:cbebcd5bcaf1eaf302617c114aa67569dd3f090dd0ce8ba9e35e9985b41ac35b \ + --hash=sha256:cd6c8fca38178e12c00418de737aef1261576bd1b6e8c6134d3e729a4e858b38 \ + --hash=sha256:ceb3b7e6a0135e092de86110c5a74e46bda4bd4fbfeeb3a3bcec79c0f861e450 \ + --hash=sha256:cf590b134eb70629e350691ecca88eac3e3b8b3c86992042fb82e3cb1830d5e1 \ + --hash=sha256:d3eb1ceec286eba8220c26f3b0096cf189aea7057b6e7b7a2e60ed36b373b77f \ + --hash=sha256:d65f25da8e248202bd47445cec78e0025c0fe7582b23ec69c3b27a640dd7a8e3 \ + --hash=sha256:d6f6d4f185481c9669b9447bf9d9cf3b95a0e9df9d169bbc17e363b7d5487755 \ + --hash=sha256:d84a5c3a5f7ce6db1f999fb9438f686bc2e09d38143f2d93d8406ed2dd6b9226 \ + --hash=sha256:d946b0a9eb8aaa590df1fe082cee553ceab173e6cb5b03239716338629c50c7a \ + --hash=sha256:dce1c6912ab9ff5f179eaf6efe7365c1f425ed690b03341911bf4939ef2f3046 \ + --hash=sha256:de170c7b4fe6859beb8926e84f7d7d6c693dfe8e27372ce3b76f01c46e489fcf \ + --hash=sha256:e02021f87a5b6932fa6ce916ca004c4d441509d33bbdbeca70d05dff5e9d2479 \ + --hash=sha256:e030047e85cbcedbfc073f71836d62dd5dadfbe7531cae27789ff66bc551bd5e \ + --hash=sha256:e0e79d91e71b9867c73323a3444724d496c037e578a0e1755ae159ba14f4f3d1 \ + --hash=sha256:e4428b29611e989719874670fd152b6625500ad6c686d464e99f5aaeeaca175a \ + --hash=sha256:e4972624066095e52b569e02b5ca97dbd7a7ddd4294bf4e7247d52635630dd83 \ + --hash=sha256:e7be68734bd8c9a513f2b0cfd508802d6609da068f40dc57d4e3494cefc92929 \ + --hash=sha256:e8e94e6912639a02ce173341ff62cc1201232ab86b8a8fcc05572741a5dc7d93 \ + --hash=sha256:ea1456df2a27c73ce51120fa2f519f1bea2f4a03a917f4a43c8707cf4cbbae1a \ + --hash=sha256:ebd8d160f91a764652d3e51ce0d2956b38efe37c9231cd82cfc0bed2e40b581c \ + --hash=sha256:eca2e9d0cc5a889850e9bbd68e98314ada174ff6ccd1129500103df7a94a7a44 \ + --hash=sha256:edd08e6f2f1a390bf137080507e44ccc086353c8e98c657e666c017718561b89 \ + --hash=sha256:f285e862d2f153a70586579c15c44656f888806ed0e5b56b64489afe4a2dbfba \ + --hash=sha256:f2a1dee728b52b33eebff5072817176c172050d44d67befd681609b4746e1c2e \ + --hash=sha256:f7e301075edaf50500f0b341543c41194d8df3ae5caf4702f2095f3ca73dd8da \ + --hash=sha256:fb616be3538599e797a2017cccca78e354c767165e8858ab5116813146041a24 \ + --hash=sha256:fce28b3c8a81b6b36dfac9feb1de115bab619b3c13905b419ec71d03a3fc1423 \ + --hash=sha256:fe5d7785250541f7f5019ab9cba2c71169dc7d74d0f45253f8313f436458a4ef # via # -c release/ray_release/byod/requirements_compiled.txt # aiohttp @@ -1739,7 +1778,7 @@ numpy==1.26.4 \ # ale-py # bokeh # dask - # gym + # gymnasium # h5py # lightgbm # ml-dtypes @@ -1756,6 +1795,12 @@ numpy==1.26.4 \ # xarray # xgboost # zarr +nvidia-nccl-cu12==2.20.5 \ + --hash=sha256:057f6bf9685f75215d0c53bf3ac4a10b3e6578351de307abad9e18a99182af56 \ + --hash=sha256:1fc150d5c3250b170b29410ba682384b14581db722b2531b0d8d33c595f33d01 + # via + # -c release/ray_release/byod/requirements_compiled.txt + # xgboost oauth2client==4.1.3 \ --hash=sha256:b8a81cc5d60e2d364f0b1b98f958dbd472887acaf1a5b05e21c28c31a2d6d3ac \ --hash=sha256:d486741e451287f69568a4d26d70d9acd73a2bbfa275746c535b4209891cccc6 @@ -1792,7 +1837,7 @@ packaging==23.0 \ # tensorboardx # tensorflow # xarray -pandas==1.5.3 \ +pandas==1.5.3 ; python_version < "3.12" \ --hash=sha256:14e45300521902689a81f3f41386dc86f19b8ba8dd5ac5a3c7010ef8d2932813 \ --hash=sha256:26d9c71772c7afb9d5046e6e9cf42d83dd147b5cf5bcb9d97252077118543792 \ --hash=sha256:3749077d86e3a2f0ed51367f30bf5b82e131cc0f14260c4d3e499186fccc4406 \ @@ -2398,9 +2443,9 @@ requests==2.31.0 \ # locust # requests-oauthlib # tensorboard -requests-oauthlib==1.3.1 \ - --hash=sha256:2577c501a2fb8d05a304c09d090d6e47c306fef15809d102b327cf8364bddab5 \ - --hash=sha256:75beac4a47881eeb94d5ea5d6ad31ef88856affe2332b9aafb52c6452ccf0d7a +requests-oauthlib==2.0.0 \ + --hash=sha256:7dd8a5c40426b779b0868c404bdef9768deccf22749cde15852df527e6269b36 \ + --hash=sha256:b3dffaebd884d8cd778494369603a9e7b58d29111bf6b41bdc2dcd87203af4e9 # via # -c release/ray_release/byod/requirements_compiled.txt # google-auth-oauthlib @@ -2410,12 +2455,13 @@ retry-decorator==1.1.1 \ # -c release/ray_release/byod/requirements_compiled.txt # gcs-oauth2-boto-plugin # gsutil -rich==12.6.0 \ - --hash=sha256:a4eb26484f2c82589bd9a17c73d32a010b1e29d89f1604cd9bf3a2097b81bb5e \ - --hash=sha256:ba3a3775974105c221d31141f2c116f4fd65c5ceb0698657a11e9f295ec93fd0 +rich==13.3.2 \ + --hash=sha256:91954fe80cfb7985727a467ca98a7618e5dd15178cc2da10f553b36a93859001 \ + --hash=sha256:a104f37270bf677148d8acb07d33be1569eeee87e2d1beb286a4e9113caf6f2f # via # -c release/ray_release/byod/requirements_compiled.txt # memray + # typer roundrobin==0.0.4 \ --hash=sha256:7e9d19a5bd6123d99993fb935fa86d25c88bb2096e493885f61737ed0f5e9abd # via locust @@ -2506,6 +2552,12 @@ semidbm==0.5.1 \ --hash=sha256:0dd74b5e9276eb5af186ace8b74165acec0c887e746bdae60340be91b99cffaf \ --hash=sha256:add3e644dd6afcce83d1752b34ff80fa4e2b37b4ce6bce3289ad19d6f0bcd6ae # via -r release/ray_release/byod/requirements_byod_3.9.in +shellingham==1.5.4 \ + --hash=sha256:7ecfff8f2fd72616f7481040475a65b2bf8af90a56c89140852d1120324e8686 \ + --hash=sha256:8dbca0739d487e5bd35ab3ca4b36e11c4078f3a234bfce294b0a0291363404de + # via + # -c release/ray_release/byod/requirements_compiled.txt + # typer six==1.16.0 \ --hash=sha256:1e61c37477a1626458e36f7b1d82aa5c9b094fa4802892072e49de9c60c4c926 \ --hash=sha256:8abb2f1d86890a2dfb989f9a77cfcfd3e47c2a354b01111771326f8aa26e0254 @@ -2524,9 +2576,9 @@ six==1.16.0 \ # tensorboard # tensorflow # trueskill -sniffio==1.3.0 \ - --hash=sha256:e60305c5e5d314f5389259b7f22aaa33d8f7dee49763119234af3755c55b9101 \ - --hash=sha256:eecefdce1e5bbfb7ad2eeaabf7c1eeb404d7757c379bd1f7e5cce9d8bf425384 +sniffio==1.3.1 \ + --hash=sha256:2f6da418d1f1e0fddd844478f41680e794e6051915791a034ff65e5f100525a2 \ + --hash=sha256:f4324edc670a0f49750a81b895f35c3adb843cca46f0530f79fc1babb23789dc # via # -c release/ray_release/byod/requirements_compiled.txt # anyio @@ -2614,15 +2666,15 @@ tensorflow-io-gcs-filesystem==0.31.0 ; python_version < "3.12" \ # via # -c release/ray_release/byod/requirements_compiled.txt # tensorflow -termcolor==2.3.0 \ - --hash=sha256:3afb05607b89aed0ffe25202399ee0867ad4d3cb4180d98aaf8eefa6a5f7d475 \ - --hash=sha256:b5b08f68937f138fe92f6c089b99f1e2da0ae56c52b78bf7075fd95420fd9a5a +termcolor==2.4.0 \ + --hash=sha256:9297c0df9c99445c2412e832e882a7884038a25617c60cea2ad69488d4040d63 \ + --hash=sha256:aab9e56047c8ac41ed798fa36d892a37aca6b3e9159f3e0c24bc64a9b3ac7b7a # via # -c release/ray_release/byod/requirements_compiled.txt # tensorflow -terminado==0.18.0 \ - --hash=sha256:1ea08a89b835dd1b8c0c900d92848147cef2537243361b2e3f4dc15df9b6fded \ - --hash=sha256:87b0d96642d0fe5f5abd7783857b9cab167f221a39ff98e3b9619a788a3c0f2e +terminado==0.18.1 \ + --hash=sha256:a4468e1b37bb318f8a86514f65814e1afc977cf29b3992a4500d9dd305dcceb0 \ + --hash=sha256:de09f2c4b85de4765f7714688fff57d3e75bad1f909b589fde880460c753fd2e # via # -c release/ray_release/byod/requirements_compiled.txt # -r release/ray_release/byod/requirements_byod_3.9.in @@ -2638,9 +2690,9 @@ tomli==2.0.1 \ # via # -c release/ray_release/byod/requirements_compiled.txt # pytest -toolz==0.12.0 \ - --hash=sha256:2059bd4148deb1884bb0eb770a3cde70e7f954cfbbdc2285f1f2de01fd21eb6f \ - --hash=sha256:88c570861c440ee3f2f6037c4654613228ff40c93a6c25e0eba70d17282c6194 +toolz==0.12.1 \ + --hash=sha256:d22731364c07d72eea0a0ad45bafb2c2937ab6fd38a3507bf55eae8744aa7d85 \ + --hash=sha256:ecca342664893f177a13dac0e6b41cbd8ac25a358e5f215316d43e2100224f4d # via # -c release/ray_release/byod/requirements_compiled.txt # dask @@ -2702,9 +2754,9 @@ tqdm==4.64.1 \ trueskill==0.4.5 \ --hash=sha256:9d62b48d2428369d712bd9becff9f9a2caa325e1a2ab5f9392d34bff757867bb # via -r release/ray_release/byod/requirements_byod_3.9.in -typer==0.9.0 \ - --hash=sha256:50922fd79aea2f4751a8e0408ff10d2662bd0c8bbfa84755a699f3bada2978b2 \ - --hash=sha256:5d96d986a21493606a358cae4461bd8cdf83cbf33a5aa950ae629ca3b51467ee +typer==0.12.3 \ + --hash=sha256:070d7ca53f785acbccba8e7d28b08dcd88f79f1fbda035ade0aecec71ca5c914 \ + --hash=sha256:49e73131481d804288ef62598d97a1ceef3058905aa536a1134f90891ba35482 # via # -c release/ray_release/byod/requirements_compiled.txt # -r release/ray_release/byod/requirements_byod_3.9.in @@ -2718,14 +2770,15 @@ typing-extensions==4.8.0 \ # ale-py # bokeh # fastapi + # gymnasium # pydantic # pydantic-core # starlette # tensorflow # typer -urllib3==1.26.18 \ - --hash=sha256:34b97092d7e0a3a8cf7cd10e386f401b3737364026c45e622aa02903dffe0f07 \ - --hash=sha256:f8ecc1bba5667413457c529ab955bf8c67b45db799d159066261719e328580a0 +urllib3==1.26.19 \ + --hash=sha256:37a0344459b199fce0e80b0d3569837ec6b6937435c5244e7fd73fa6006830f3 \ + --hash=sha256:3e3d753a8618b86d7de333b4223005f68720bcd6a7d2bcb9fbd2229ec7c1e429 # via # -c release/ray_release/byod/requirements_compiled.txt # botocore @@ -2739,9 +2792,9 @@ werkzeug==2.3.8 \ # flask # locust # tensorboard -wheel==0.42.0 \ - --hash=sha256:177f9c9b0d45c47873b619f5b650346d632cdc35fb5e4d25058e09c9e581433d \ - --hash=sha256:c45be39f7882c9d34243236f2d63cbd58039e360f85d0913425fbd7ceea617a8 +wheel==0.43.0 \ + --hash=sha256:465ef92c69fa5c5da2d1cf8ac40559a8c940886afcef87dcf14b9470862f1d85 \ + --hash=sha256:55c570405f142630c6b9f72fe09d9b67cf1477fcf543ae5b8dcb1f5b7377da81 # via # -c release/ray_release/byod/requirements_compiled.txt # astunparse @@ -2829,107 +2882,109 @@ xarray==2023.12.0 \ --hash=sha256:3c22b6824681762b6c3fcad86dfd18960a617bccbc7f456ce21b43a20e455fb9 \ --hash=sha256:4565dbc890de47e278346c44d6b33bb07d3427383e077a7ca8ab6606196fd433 # via -r release/ray_release/byod/requirements_byod_3.9.in -xgboost==1.7.6 \ - --hash=sha256:127cf1f5e2ec25cd41429394c6719b87af1456ce583e89f0bffd35d02ad18bcb \ - --hash=sha256:1c527554a400445e0c38186039ba1a00425dcdb4e40b37eed0e74cb39a159c47 \ - --hash=sha256:281c3c6f4fbed2d36bf95cd02a641afa95e72e9abde70064056da5e76233e8df \ - --hash=sha256:4c34675b4d2678c624ddde5d45361e7e16046923e362e4e609b88353e6b87124 \ - --hash=sha256:59b4b366d2cafc7f645e87d897983a5b59be02876194b1d213bd8d8b811d8ce8 \ - --hash=sha256:b1d5db49b199152d62bd9217c98760207d3de86d2b9d243260c573ffe638f80a +xgboost==2.1.0 \ + --hash=sha256:19d145eb847b070c32342b1bf2d7331c102783e07a484f8b13b7d759d707c6b0 \ + --hash=sha256:43b16205689249d7509daf7a6ab00ad0e6c570b3a9c263cb32b26e39d9477bb3 \ + --hash=sha256:7144980923e76ce741c7b03a14d3bd7514db6de5c7cabe96ba95b229d274f5ca \ + --hash=sha256:73673c9bb85927db7fe2e3aed6df6d35dba708cfd6767cc63d4ea11dda2dede5 \ + --hash=sha256:74904b91c42524a6c32147fe5718569e78fb65911ff4499b053f81d0964514d4 \ + --hash=sha256:840a0c6e2119d8c8f260a5dace996ea064a267f62b301a25d7d452488a7ac860 \ + --hash=sha256:b2a456eb0f3d3e8fd8ab37e44ac288292bf8ea8744c294be9fd88713d27af810 \ + --hash=sha256:cedc2e386e686795735448fd4597533acacc5ba6fb47dd910c204c468b80bb96 # via # -c release/ray_release/byod/requirements_compiled.txt # -r release/ray_release/byod/requirements_byod_3.9.in -yarl==1.9.3 \ - --hash=sha256:09c19e5f4404574fcfb736efecf75844ffe8610606f3fccc35a1515b8b6712c4 \ - --hash=sha256:0ab5baaea8450f4a3e241ef17e3d129b2143e38a685036b075976b9c415ea3eb \ - --hash=sha256:0d155a092bf0ebf4a9f6f3b7a650dc5d9a5bbb585ef83a52ed36ba46f55cc39d \ - --hash=sha256:126638ab961633f0940a06e1c9d59919003ef212a15869708dcb7305f91a6732 \ - --hash=sha256:1a0a4f3aaa18580038cfa52a7183c8ffbbe7d727fe581300817efc1e96d1b0e9 \ - --hash=sha256:1d93461e2cf76c4796355494f15ffcb50a3c198cc2d601ad8d6a96219a10c363 \ - --hash=sha256:26a1a8443091c7fbc17b84a0d9f38de34b8423b459fb853e6c8cdfab0eacf613 \ - --hash=sha256:271d63396460b6607b588555ea27a1a02b717ca2e3f2cf53bdde4013d7790929 \ - --hash=sha256:28a108cb92ce6cf867690a962372996ca332d8cda0210c5ad487fe996e76b8bb \ - --hash=sha256:29beac86f33d6c7ab1d79bd0213aa7aed2d2f555386856bb3056d5fdd9dab279 \ - --hash=sha256:2c757f64afe53a422e45e3e399e1e3cf82b7a2f244796ce80d8ca53e16a49b9f \ - --hash=sha256:2dad8166d41ebd1f76ce107cf6a31e39801aee3844a54a90af23278b072f1ccf \ - --hash=sha256:2dc72e891672343b99db6d497024bf8b985537ad6c393359dc5227ef653b2f17 \ - --hash=sha256:2f3c8822bc8fb4a347a192dd6a28a25d7f0ea3262e826d7d4ef9cc99cd06d07e \ - --hash=sha256:32435d134414e01d937cd9d6cc56e8413a8d4741dea36af5840c7750f04d16ab \ - --hash=sha256:3cfa4dbe17b2e6fca1414e9c3bcc216f6930cb18ea7646e7d0d52792ac196808 \ - --hash=sha256:3d5434b34100b504aabae75f0622ebb85defffe7b64ad8f52b8b30ec6ef6e4b9 \ - --hash=sha256:4003f380dac50328c85e85416aca6985536812c082387255c35292cb4b41707e \ - --hash=sha256:44e91a669c43f03964f672c5a234ae0d7a4d49c9b85d1baa93dec28afa28ffbd \ - --hash=sha256:4a14907b597ec55740f63e52d7fee0e9ee09d5b9d57a4f399a7423268e457b57 \ - --hash=sha256:4ce77d289f8d40905c054b63f29851ecbfd026ef4ba5c371a158cfe6f623663e \ - --hash=sha256:4d6d74a97e898c1c2df80339aa423234ad9ea2052f66366cef1e80448798c13d \ - --hash=sha256:51382c72dd5377861b573bd55dcf680df54cea84147c8648b15ac507fbef984d \ - --hash=sha256:525cd69eff44833b01f8ef39aa33a9cc53a99ff7f9d76a6ef6a9fb758f54d0ff \ - --hash=sha256:53ec65f7eee8655bebb1f6f1607760d123c3c115a324b443df4f916383482a67 \ - --hash=sha256:5f74b015c99a5eac5ae589de27a1201418a5d9d460e89ccb3366015c6153e60a \ - --hash=sha256:6280353940f7e5e2efaaabd686193e61351e966cc02f401761c4d87f48c89ea4 \ - --hash=sha256:632c7aeb99df718765adf58eacb9acb9cbc555e075da849c1378ef4d18bf536a \ - --hash=sha256:6465d36381af057d0fab4e0f24ef0e80ba61f03fe43e6eeccbe0056e74aadc70 \ - --hash=sha256:66a6dbf6ca7d2db03cc61cafe1ee6be838ce0fbc97781881a22a58a7c5efef42 \ - --hash=sha256:6d350388ba1129bc867c6af1cd17da2b197dff0d2801036d2d7d83c2d771a682 \ - --hash=sha256:7217234b10c64b52cc39a8d82550342ae2e45be34f5bff02b890b8c452eb48d7 \ - --hash=sha256:721ee3fc292f0d069a04016ef2c3a25595d48c5b8ddc6029be46f6158d129c92 \ - --hash=sha256:72a57b41a0920b9a220125081c1e191b88a4cdec13bf9d0649e382a822705c65 \ - --hash=sha256:73cc83f918b69110813a7d95024266072d987b903a623ecae673d1e71579d566 \ - --hash=sha256:778df71c8d0c8c9f1b378624b26431ca80041660d7be7c3f724b2c7a6e65d0d6 \ - --hash=sha256:79e1df60f7c2b148722fb6cafebffe1acd95fd8b5fd77795f56247edaf326752 \ - --hash=sha256:7c86d0d0919952d05df880a1889a4f0aeb6868e98961c090e335671dea5c0361 \ - --hash=sha256:7eaf13af79950142ab2bbb8362f8d8d935be9aaf8df1df89c86c3231e4ff238a \ - --hash=sha256:828235a2a169160ee73a2fcfb8a000709edf09d7511fccf203465c3d5acc59e4 \ - --hash=sha256:8535e111a064f3bdd94c0ed443105934d6f005adad68dd13ce50a488a0ad1bf3 \ - --hash=sha256:88d2c3cc4b2f46d1ba73d81c51ec0e486f59cc51165ea4f789677f91a303a9a7 \ - --hash=sha256:8a2538806be846ea25e90c28786136932ec385c7ff3bc1148e45125984783dc6 \ - --hash=sha256:8dab30b21bd6fb17c3f4684868c7e6a9e8468078db00f599fb1c14e324b10fca \ - --hash=sha256:8f18a7832ff85dfcd77871fe677b169b1bc60c021978c90c3bb14f727596e0ae \ - --hash=sha256:946db4511b2d815979d733ac6a961f47e20a29c297be0d55b6d4b77ee4b298f6 \ - --hash=sha256:96758e56dceb8a70f8a5cff1e452daaeff07d1cc9f11e9b0c951330f0a2396a7 \ - --hash=sha256:9a172c3d5447b7da1680a1a2d6ecdf6f87a319d21d52729f45ec938a7006d5d8 \ - --hash=sha256:9a5211de242754b5e612557bca701f39f8b1a9408dff73c6db623f22d20f470e \ - --hash=sha256:9df9a0d4c5624790a0dea2e02e3b1b3c69aed14bcb8650e19606d9df3719e87d \ - --hash=sha256:aa4643635f26052401750bd54db911b6342eb1a9ac3e74f0f8b58a25d61dfe41 \ - --hash=sha256:aed37db837ecb5962469fad448aaae0f0ee94ffce2062cf2eb9aed13328b5196 \ - --hash=sha256:af52725c7c39b0ee655befbbab5b9a1b209e01bb39128dce0db226a10014aacc \ - --hash=sha256:b0b8c06afcf2bac5a50b37f64efbde978b7f9dc88842ce9729c020dc71fae4ce \ - --hash=sha256:b61e64b06c3640feab73fa4ff9cb64bd8182de52e5dc13038e01cfe674ebc321 \ - --hash=sha256:b7831566595fe88ba17ea80e4b61c0eb599f84c85acaa14bf04dd90319a45b90 \ - --hash=sha256:b8bc5b87a65a4e64bc83385c05145ea901b613d0d3a434d434b55511b6ab0067 \ - --hash=sha256:b8d51817cf4b8d545963ec65ff06c1b92e5765aa98831678d0e2240b6e9fd281 \ - --hash=sha256:b9f9cafaf031c34d95c1528c16b2fa07b710e6056b3c4e2e34e9317072da5d1a \ - --hash=sha256:bb72d2a94481e7dc7a0c522673db288f31849800d6ce2435317376a345728225 \ - --hash=sha256:c25ec06e4241e162f5d1f57c370f4078797ade95c9208bd0c60f484834f09c96 \ - --hash=sha256:c405d482c320a88ab53dcbd98d6d6f32ada074f2d965d6e9bf2d823158fa97de \ - --hash=sha256:c4472fe53ebf541113e533971bd8c32728debc4c6d8cc177f2bff31d011ec17e \ - --hash=sha256:c4b1efb11a8acd13246ffb0bee888dd0e8eb057f8bf30112e3e21e421eb82d4a \ - --hash=sha256:c5f3faeb8100a43adf3e7925d556801d14b5816a0ac9e75e22948e787feec642 \ - --hash=sha256:c6f034386e5550b5dc8ded90b5e2ff7db21f0f5c7de37b6efc5dac046eb19c10 \ - --hash=sha256:c99ddaddb2fbe04953b84d1651149a0d85214780e4d0ee824e610ab549d98d92 \ - --hash=sha256:ca6b66f69e30f6e180d52f14d91ac854b8119553b524e0e28d5291a724f0f423 \ - --hash=sha256:cccdc02e46d2bd7cb5f38f8cc3d9db0d24951abd082b2f242c9e9f59c0ab2af3 \ - --hash=sha256:cd49a908cb6d387fc26acee8b7d9fcc9bbf8e1aca890c0b2fdfd706057546080 \ - --hash=sha256:cf7a4e8de7f1092829caef66fd90eaf3710bc5efd322a816d5677b7664893c93 \ - --hash=sha256:cfd77e8e5cafba3fb584e0f4b935a59216f352b73d4987be3af51f43a862c403 \ - --hash=sha256:d34c4f80956227f2686ddea5b3585e109c2733e2d4ef12eb1b8b4e84f09a2ab6 \ - --hash=sha256:d61a0ca95503867d4d627517bcfdc28a8468c3f1b0b06c626f30dd759d3999fd \ - --hash=sha256:d81657b23e0edb84b37167e98aefb04ae16cbc5352770057893bd222cdc6e45f \ - --hash=sha256:d92d897cb4b4bf915fbeb5e604c7911021a8456f0964f3b8ebbe7f9188b9eabb \ - --hash=sha256:dd318e6b75ca80bff0b22b302f83a8ee41c62b8ac662ddb49f67ec97e799885d \ - --hash=sha256:dd952b9c64f3b21aedd09b8fe958e4931864dba69926d8a90c90d36ac4e28c9a \ - --hash=sha256:e0e7e83f31e23c5d00ff618045ddc5e916f9e613d33c5a5823bc0b0a0feb522f \ - --hash=sha256:e0f17d1df951336a02afc8270c03c0c6e60d1f9996fcbd43a4ce6be81de0bd9d \ - --hash=sha256:e2a16ef5fa2382af83bef4a18c1b3bcb4284c4732906aa69422cf09df9c59f1f \ - --hash=sha256:e36021db54b8a0475805acc1d6c4bca5d9f52c3825ad29ae2d398a9d530ddb88 \ - --hash=sha256:e73db54c967eb75037c178a54445c5a4e7461b5203b27c45ef656a81787c0c1b \ - --hash=sha256:e741bd48e6a417bdfbae02e088f60018286d6c141639359fb8df017a3b69415a \ - --hash=sha256:f7271d6bd8838c49ba8ae647fc06469137e1c161a7ef97d778b72904d9b68696 \ - --hash=sha256:fc391e3941045fd0987c77484b2799adffd08e4b6735c4ee5f054366a2e1551d \ - --hash=sha256:fc94441bcf9cb8c59f51f23193316afefbf3ff858460cb47b5758bf66a14d130 \ - --hash=sha256:fe34befb8c765b8ce562f0200afda3578f8abb159c76de3ab354c80b72244c41 \ - --hash=sha256:fe8080b4f25dfc44a86bedd14bc4f9d469dfc6456e6f3c5d9077e81a5fedfba7 \ - --hash=sha256:ff34cb09a332832d1cf38acd0f604c068665192c6107a439a92abfd8acf90fe2 +yarl==1.9.4 \ + --hash=sha256:008d3e808d03ef28542372d01057fd09168419cdc8f848efe2804f894ae03e51 \ + --hash=sha256:03caa9507d3d3c83bca08650678e25364e1843b484f19986a527630ca376ecce \ + --hash=sha256:07574b007ee20e5c375a8fe4a0789fad26db905f9813be0f9fef5a68080de559 \ + --hash=sha256:09efe4615ada057ba2d30df871d2f668af661e971dfeedf0c159927d48bbeff0 \ + --hash=sha256:0d2454f0aef65ea81037759be5ca9947539667eecebca092733b2eb43c965a81 \ + --hash=sha256:0e9d124c191d5b881060a9e5060627694c3bdd1fe24c5eecc8d5d7d0eb6faabc \ + --hash=sha256:18580f672e44ce1238b82f7fb87d727c4a131f3a9d33a5e0e82b793362bf18b4 \ + --hash=sha256:1f23e4fe1e8794f74b6027d7cf19dc25f8b63af1483d91d595d4a07eca1fb26c \ + --hash=sha256:206a55215e6d05dbc6c98ce598a59e6fbd0c493e2de4ea6cc2f4934d5a18d130 \ + --hash=sha256:23d32a2594cb5d565d358a92e151315d1b2268bc10f4610d098f96b147370136 \ + --hash=sha256:26a1dc6285e03f3cc9e839a2da83bcbf31dcb0d004c72d0730e755b33466c30e \ + --hash=sha256:29e0f83f37610f173eb7e7b5562dd71467993495e568e708d99e9d1944f561ec \ + --hash=sha256:2b134fd795e2322b7684155b7855cc99409d10b2e408056db2b93b51a52accc7 \ + --hash=sha256:2d47552b6e52c3319fede1b60b3de120fe83bde9b7bddad11a69fb0af7db32f1 \ + --hash=sha256:357495293086c5b6d34ca9616a43d329317feab7917518bc97a08f9e55648455 \ + --hash=sha256:35a2b9396879ce32754bd457d31a51ff0a9d426fd9e0e3c33394bf4b9036b099 \ + --hash=sha256:3777ce5536d17989c91696db1d459574e9a9bd37660ea7ee4d3344579bb6f129 \ + --hash=sha256:3986b6f41ad22988e53d5778f91855dc0399b043fc8946d4f2e68af22ee9ff10 \ + --hash=sha256:44d8ffbb9c06e5a7f529f38f53eda23e50d1ed33c6c869e01481d3fafa6b8142 \ + --hash=sha256:49a180c2e0743d5d6e0b4d1a9e5f633c62eca3f8a86ba5dd3c471060e352ca98 \ + --hash=sha256:4aa9741085f635934f3a2583e16fcf62ba835719a8b2b28fb2917bb0537c1dfa \ + --hash=sha256:4b21516d181cd77ebd06ce160ef8cc2a5e9ad35fb1c5930882baff5ac865eee7 \ + --hash=sha256:4b3c1ffe10069f655ea2d731808e76e0f452fc6c749bea04781daf18e6039525 \ + --hash=sha256:4c7d56b293cc071e82532f70adcbd8b61909eec973ae9d2d1f9b233f3d943f2c \ + --hash=sha256:4e9035df8d0880b2f1c7f5031f33f69e071dfe72ee9310cfc76f7b605958ceb9 \ + --hash=sha256:54525ae423d7b7a8ee81ba189f131054defdb122cde31ff17477951464c1691c \ + --hash=sha256:549d19c84c55d11687ddbd47eeb348a89df9cb30e1993f1b128f4685cd0ebbf8 \ + --hash=sha256:54beabb809ffcacbd9d28ac57b0db46e42a6e341a030293fb3185c409e626b8b \ + --hash=sha256:566db86717cf8080b99b58b083b773a908ae40f06681e87e589a976faf8246bf \ + --hash=sha256:5a2e2433eb9344a163aced6a5f6c9222c0786e5a9e9cac2c89f0b28433f56e23 \ + --hash=sha256:5aef935237d60a51a62b86249839b51345f47564208c6ee615ed2a40878dccdd \ + --hash=sha256:604f31d97fa493083ea21bd9b92c419012531c4e17ea6da0f65cacdcf5d0bd27 \ + --hash=sha256:63b20738b5aac74e239622d2fe30df4fca4942a86e31bf47a81a0e94c14df94f \ + --hash=sha256:686a0c2f85f83463272ddffd4deb5e591c98aac1897d65e92319f729c320eece \ + --hash=sha256:6a962e04b8f91f8c4e5917e518d17958e3bdee71fd1d8b88cdce74dd0ebbf434 \ + --hash=sha256:6ad6d10ed9b67a382b45f29ea028f92d25bc0bc1daf6c5b801b90b5aa70fb9ec \ + --hash=sha256:6f5cb257bc2ec58f437da2b37a8cd48f666db96d47b8a3115c29f316313654ff \ + --hash=sha256:6fe79f998a4052d79e1c30eeb7d6c1c1056ad33300f682465e1b4e9b5a188b78 \ + --hash=sha256:7855426dfbddac81896b6e533ebefc0af2f132d4a47340cee6d22cac7190022d \ + --hash=sha256:7d5aaac37d19b2904bb9dfe12cdb08c8443e7ba7d2852894ad448d4b8f442863 \ + --hash=sha256:801e9264d19643548651b9db361ce3287176671fb0117f96b5ac0ee1c3530d53 \ + --hash=sha256:81eb57278deb6098a5b62e88ad8281b2ba09f2f1147c4767522353eaa6260b31 \ + --hash=sha256:824d6c50492add5da9374875ce72db7a0733b29c2394890aef23d533106e2b15 \ + --hash=sha256:8397a3817d7dcdd14bb266283cd1d6fc7264a48c186b986f32e86d86d35fbac5 \ + --hash=sha256:848cd2a1df56ddbffeb375535fb62c9d1645dde33ca4d51341378b3f5954429b \ + --hash=sha256:84fc30f71689d7fc9168b92788abc977dc8cefa806909565fc2951d02f6b7d57 \ + --hash=sha256:8619d6915b3b0b34420cf9b2bb6d81ef59d984cb0fde7544e9ece32b4b3043c3 \ + --hash=sha256:8a854227cf581330ffa2c4824d96e52ee621dd571078a252c25e3a3b3d94a1b1 \ + --hash=sha256:8be9e837ea9113676e5754b43b940b50cce76d9ed7d2461df1af39a8ee674d9f \ + --hash=sha256:928cecb0ef9d5a7946eb6ff58417ad2fe9375762382f1bf5c55e61645f2c43ad \ + --hash=sha256:957b4774373cf6f709359e5c8c4a0af9f6d7875db657adb0feaf8d6cb3c3964c \ + --hash=sha256:992f18e0ea248ee03b5a6e8b3b4738850ae7dbb172cc41c966462801cbf62cf7 \ + --hash=sha256:9fc5fc1eeb029757349ad26bbc5880557389a03fa6ada41703db5e068881e5f2 \ + --hash=sha256:a00862fb23195b6b8322f7d781b0dc1d82cb3bcac346d1e38689370cc1cc398b \ + --hash=sha256:a3a6ed1d525bfb91b3fc9b690c5a21bb52de28c018530ad85093cc488bee2dd2 \ + --hash=sha256:a6327976c7c2f4ee6816eff196e25385ccc02cb81427952414a64811037bbc8b \ + --hash=sha256:a7409f968456111140c1c95301cadf071bd30a81cbd7ab829169fb9e3d72eae9 \ + --hash=sha256:a825ec844298c791fd28ed14ed1bffc56a98d15b8c58a20e0e08c1f5f2bea1be \ + --hash=sha256:a8c1df72eb746f4136fe9a2e72b0c9dc1da1cbd23b5372f94b5820ff8ae30e0e \ + --hash=sha256:a9bd00dc3bc395a662900f33f74feb3e757429e545d831eef5bb280252631984 \ + --hash=sha256:aa102d6d280a5455ad6a0f9e6d769989638718e938a6a0a2ff3f4a7ff8c62cc4 \ + --hash=sha256:aaaea1e536f98754a6e5c56091baa1b6ce2f2700cc4a00b0d49eca8dea471074 \ + --hash=sha256:ad4d7a90a92e528aadf4965d685c17dacff3df282db1121136c382dc0b6014d2 \ + --hash=sha256:b8477c1ee4bd47c57d49621a062121c3023609f7a13b8a46953eb6c9716ca392 \ + --hash=sha256:ba6f52cbc7809cd8d74604cce9c14868306ae4aa0282016b641c661f981a6e91 \ + --hash=sha256:bac8d525a8dbc2a1507ec731d2867025d11ceadcb4dd421423a5d42c56818541 \ + --hash=sha256:bef596fdaa8f26e3d66af846bbe77057237cb6e8efff8cd7cc8dff9a62278bbf \ + --hash=sha256:c0ec0ed476f77db9fb29bca17f0a8fcc7bc97ad4c6c1d8959c507decb22e8572 \ + --hash=sha256:c38c9ddb6103ceae4e4498f9c08fac9b590c5c71b0370f98714768e22ac6fa66 \ + --hash=sha256:c7224cab95645c7ab53791022ae77a4509472613e839dab722a72abe5a684575 \ + --hash=sha256:c74018551e31269d56fab81a728f683667e7c28c04e807ba08f8c9e3bba32f14 \ + --hash=sha256:ca06675212f94e7a610e85ca36948bb8fc023e458dd6c63ef71abfd482481aa5 \ + --hash=sha256:d1d2532b340b692880261c15aee4dc94dd22ca5d61b9db9a8a361953d36410b1 \ + --hash=sha256:d25039a474c4c72a5ad4b52495056f843a7ff07b632c1b92ea9043a3d9950f6e \ + --hash=sha256:d5ff2c858f5f6a42c2a8e751100f237c5e869cbde669a724f2062d4c4ef93551 \ + --hash=sha256:d7d7f7de27b8944f1fee2c26a88b4dabc2409d2fea7a9ed3df79b67277644e17 \ + --hash=sha256:d7eeb6d22331e2fd42fce928a81c697c9ee2d51400bd1a28803965883e13cead \ + --hash=sha256:d8a1c6c0be645c745a081c192e747c5de06e944a0d21245f4cf7c05e457c36e0 \ + --hash=sha256:d8b889777de69897406c9fb0b76cdf2fd0f31267861ae7501d93003d55f54fbe \ + --hash=sha256:d9e09c9d74f4566e905a0b8fa668c58109f7624db96a2171f21747abc7524234 \ + --hash=sha256:db8e58b9d79200c76956cefd14d5c90af54416ff5353c5bfd7cbe58818e26ef0 \ + --hash=sha256:ddb2a5c08a4eaaba605340fdee8fc08e406c56617566d9643ad8bf6852778fc7 \ + --hash=sha256:e0381b4ce23ff92f8170080c97678040fc5b08da85e9e292292aba67fdac6c34 \ + --hash=sha256:e23a6d84d9d1738dbc6e38167776107e63307dfc8ad108e580548d1f2c587f42 \ + --hash=sha256:e516dc8baf7b380e6c1c26792610230f37147bb754d6426462ab115a02944385 \ + --hash=sha256:ea65804b5dc88dacd4a40279af0cdadcfe74b3e5b4c897aa0d81cf86927fee78 \ + --hash=sha256:ec61d826d80fc293ed46c9dd26995921e3a82146feacd952ef0757236fc137be \ + --hash=sha256:ee04010f26d5102399bd17f8df8bc38dc7ccd7701dc77f4a68c5b8d733406958 \ + --hash=sha256:f3bc6af6e2b8f92eced34ef6a96ffb248e863af20ef4fde9448cc8c9b858b749 \ + --hash=sha256:f7d6b36dd2e029b6bcb8a13cf19664c7b8e19ab3a58e0fefbb5b8461447ed5ec # via # -c release/ray_release/byod/requirements_compiled.txt # aiohttp @@ -2943,13 +2998,12 @@ zict==3.0.0 \ # via # -c release/ray_release/byod/requirements_compiled.txt # distributed -zipp==3.17.0 \ - --hash=sha256:0e923e726174922dce09c53c59ad483ff7bbb8e572e00c7f7c46b88556409f31 \ - --hash=sha256:84e64a1c28cf7e91ed2078bb8cc8c259cb19b76942096c8d7b84947690cabaf0 +zipp==3.19.2 \ + --hash=sha256:bf1dcf6450f873a13e952a29504887c89e6de7506209e5b1bcc3460135d4de19 \ + --hash=sha256:f091755f667055f2d02b32c53771a7a6c8b47e1fdbc4b72a8b9072b3eef8015c # via # -c release/ray_release/byod/requirements_compiled.txt # importlib-metadata - # importlib-resources zope-event==5.0 \ --hash=sha256:2832e95014f4db26c47a13fdaef84cef2f4df37e66b59d8f1f4a8f319a632c26 \ --hash=sha256:bac440d8d9891b4068e2b5a2c5e2c9765a9df762944bda6955f96bb9b91e67cd @@ -2995,4 +3049,4 @@ zope-interface==6.2 \ # WARNING: The following packages were not pinned, but pip requires them to be # pinned when the requirements file includes hashes. Consider using the --allow-unsafe flag. -# setuptools +# setuptools \ No newline at end of file diff --git a/release/release_tests.yaml b/release/release_tests.yaml index 81681cff83d63..60d7c80485238 100644 --- a/release/release_tests.yaml +++ b/release/release_tests.yaml @@ -2716,7 +2716,7 @@ run: timeout: 43200 # 12h - script: python learning_tests/tuned_examples/dreamerv3/atari_100k.py --framework=tf2 --env=ALE/Pong-v5 --num-gpus=1 --stop-reward=15.0 --as-release-test + script: python learning_tests/tuned_examples/dreamerv3/atari_100k.py --framework=tf2 --env=ale_py:ALE/Pong-v5 --num-gpus=1 --stop-reward=15.0 --as-release-test alert: default @@ -2751,7 +2751,7 @@ run: timeout: 1200 - script: python learning_tests/tuned_examples/ppo/atari_ppo.py --enable-new-api-stack --env=ALE/Pong-v5 --num-gpus=4 --num-env-runners=95 --stop-reward=20.0 --as-release-test + script: python learning_tests/tuned_examples/ppo/atari_ppo.py --enable-new-api-stack --env=ale_py:ALE/Pong-v5 --num-gpus=4 --num-env-runners=95 --stop-reward=20.0 --as-release-test alert: default diff --git a/release/rllib_contrib/learning_tests/yaml_files/a2c/a2c-breakout-v5.yaml b/release/rllib_contrib/learning_tests/yaml_files/a2c/a2c-breakout-v5.yaml index c38c9f8fffb08..0ba5a759811f9 100644 --- a/release/rllib_contrib/learning_tests/yaml_files/a2c/a2c-breakout-v5.yaml +++ b/release/rllib_contrib/learning_tests/yaml_files/a2c/a2c-breakout-v5.yaml @@ -1,5 +1,5 @@ a2c-breakoutnoframeskip-v5: - env: ALE/Breakout-v5 + env: ale_py:ALE/Breakout-v5 run: A2C # Minimum reward and total ts (in given time_total_s) to pass this test. pass_criteria: diff --git a/release/rllib_contrib/learning_tests/yaml_files/a3c/a3c-pongdeterministic-v5.yaml b/release/rllib_contrib/learning_tests/yaml_files/a3c/a3c-pongdeterministic-v5.yaml index 3ea52a7045250..fe6ffb7527292 100644 --- a/release/rllib_contrib/learning_tests/yaml_files/a3c/a3c-pongdeterministic-v5.yaml +++ b/release/rllib_contrib/learning_tests/yaml_files/a3c/a3c-pongdeterministic-v5.yaml @@ -1,5 +1,5 @@ a3c-pongdeterministic-v5: - env: ALE/Pong-v5 + env: ale_py:ALE/Pong-v5 run: A3C # Minimum reward and total ts (in given time_total_s) to pass this test. pass_criteria: diff --git a/release/rllib_contrib/learning_tests/yaml_files/apex/apex-breakoutnoframeskip-v5.yaml b/release/rllib_contrib/learning_tests/yaml_files/apex/apex-breakoutnoframeskip-v5.yaml index 81c8fdd20e488..d825b7a3275e9 100644 --- a/release/rllib_contrib/learning_tests/yaml_files/apex/apex-breakoutnoframeskip-v5.yaml +++ b/release/rllib_contrib/learning_tests/yaml_files/apex/apex-breakoutnoframeskip-v5.yaml @@ -1,5 +1,5 @@ apex-breakoutnoframeskip-v5: - env: ALE/Breakout-v5 + env: ale_py:ALE/Breakout-v5 run: APEX # Minimum reward and total ts (in given time_total_s) to pass this test. pass_criteria: diff --git a/release/rllib_tests/learning_tests/yaml_files/appo/hybrid_stack/appo-pongnoframeskip-v5.yaml b/release/rllib_tests/learning_tests/yaml_files/appo/hybrid_stack/appo-pongnoframeskip-v5.yaml index 741d5561ee362..9c6a82866f018 100644 --- a/release/rllib_tests/learning_tests/yaml_files/appo/hybrid_stack/appo-pongnoframeskip-v5.yaml +++ b/release/rllib_tests/learning_tests/yaml_files/appo/hybrid_stack/appo-pongnoframeskip-v5.yaml @@ -1,5 +1,5 @@ appo-pongnoframeskip-v5: - env: ALE/Pong-v5 + env: ale_py:ALE/Pong-v5 run: APPO # Minimum reward and total ts (in given time_total_s) to pass this test. pass_criteria: diff --git a/release/rllib_tests/learning_tests/yaml_files/appo/old_stack/appo-pongnoframeskip-v5.yaml b/release/rllib_tests/learning_tests/yaml_files/appo/old_stack/appo-pongnoframeskip-v5.yaml index 9b5e5a84f9bc6..7930cf33df8c0 100644 --- a/release/rllib_tests/learning_tests/yaml_files/appo/old_stack/appo-pongnoframeskip-v5.yaml +++ b/release/rllib_tests/learning_tests/yaml_files/appo/old_stack/appo-pongnoframeskip-v5.yaml @@ -1,5 +1,5 @@ appo-pongnoframeskip-v5: - env: ALE/Pong-v5 + env: ale_py:ALE/Pong-v5 run: APPO # Minimum reward and total ts (in given time_total_s) to pass this test. pass_criteria: diff --git a/release/rllib_tests/learning_tests/yaml_files/dqn/dqn-breakoutnoframeskip-v5.yaml b/release/rllib_tests/learning_tests/yaml_files/dqn/dqn-breakoutnoframeskip-v5.yaml index 2da9c8ac89ccb..61dea97452d05 100644 --- a/release/rllib_tests/learning_tests/yaml_files/dqn/dqn-breakoutnoframeskip-v5.yaml +++ b/release/rllib_tests/learning_tests/yaml_files/dqn/dqn-breakoutnoframeskip-v5.yaml @@ -1,5 +1,5 @@ dqn-breakoutnoframeskip-v5: - env: ALE/Breakout-v5 + env: ale_py:ALE/Breakout-v5 run: DQN # Minimum reward and total ts (in given time_total_s) to pass this test. pass_criteria: diff --git a/release/rllib_tests/learning_tests/yaml_files/impala/impala-breakoutnoframeskip-v5.yaml b/release/rllib_tests/learning_tests/yaml_files/impala/impala-breakoutnoframeskip-v5.yaml index 2a12ca0522563..80e9c8ed5e67b 100644 --- a/release/rllib_tests/learning_tests/yaml_files/impala/impala-breakoutnoframeskip-v5.yaml +++ b/release/rllib_tests/learning_tests/yaml_files/impala/impala-breakoutnoframeskip-v5.yaml @@ -1,5 +1,5 @@ impala-breakoutnoframeskip-v5: - env: ALE/Breakout-v5 + env: ale_py:ALE/Breakout-v5 run: IMPALA # Minimum reward and total ts (in given time_total_s) to pass this test. pass_criteria: diff --git a/release/rllib_tests/learning_tests/yaml_files/ppo/new_stack/ppo_breakout.py b/release/rllib_tests/learning_tests/yaml_files/ppo/new_stack/ppo_breakout.py index 2209ac64ea198..20987e6a4c6a6 100644 --- a/release/rllib_tests/learning_tests/yaml_files/ppo/new_stack/ppo_breakout.py +++ b/release/rllib_tests/learning_tests/yaml_files/ppo/new_stack/ppo_breakout.py @@ -20,7 +20,7 @@ def _make_learner_connector(input_observation_space, input_action_space): # We would like our frame stacking connector to do this job. def _env_creator(cfg): return wrap_atari_for_new_api_stack( - gym.make("ALE/Breakout-v5", **cfg, **{"render_mode": "rgb_array"}), + gym.make("ale_py:ALE/Breakout-v5", **cfg, **{"render_mode": "rgb_array"}), # Perform through ConnectorV2 API. framestack=None, ) diff --git a/release/rllib_tests/learning_tests/yaml_files/ppo/new_stack/ppo_pong.py b/release/rllib_tests/learning_tests/yaml_files/ppo/new_stack/ppo_pong.py index 5619eb0246e6b..b727ebc73c798 100644 --- a/release/rllib_tests/learning_tests/yaml_files/ppo/new_stack/ppo_pong.py +++ b/release/rllib_tests/learning_tests/yaml_files/ppo/new_stack/ppo_pong.py @@ -20,7 +20,7 @@ def _make_learner_connector(input_observation_space, input_action_space): # We would like our frame stacking connector to do this job. def _env_creator(cfg): return wrap_atari_for_new_api_stack( - gym.make("ALE/Pong-v5", **cfg, **{"render_mode": "rgb_array"}), + gym.make("ale_py:ALE/Pong-v5", **cfg, **{"render_mode": "rgb_array"}), # Perform through ConnectorV2 API. framestack=None, ) diff --git a/release/rllib_tests/learning_tests/yaml_files/ppo/old_stack/ppo-breakoutnoframeskip-v5.yaml b/release/rllib_tests/learning_tests/yaml_files/ppo/old_stack/ppo-breakoutnoframeskip-v5.yaml index 6e892c7c5142b..62de17ab28a2c 100644 --- a/release/rllib_tests/learning_tests/yaml_files/ppo/old_stack/ppo-breakoutnoframeskip-v5.yaml +++ b/release/rllib_tests/learning_tests/yaml_files/ppo/old_stack/ppo-breakoutnoframeskip-v5.yaml @@ -1,5 +1,5 @@ ppo-breakoutnoframeskip-v5: - env: ALE/Breakout-v5 + env: ale_py:ALE/Breakout-v5 run: PPO # Minimum reward and total ts (in given time_total_s) to pass this test. pass_criteria: diff --git a/rllib/BUILD b/rllib/BUILD index 9854e95adc98a..26b2c4426d813 100644 --- a/rllib/BUILD +++ b/rllib/BUILD @@ -78,7 +78,6 @@ doctest( "utils/memory.py", "offline/off_policy_estimator.py", "offline/estimators/feature_importance.py", - "env/wrappers/recsim_wrapper.py", "env/remote_vector_env.py", # Missing imports "algorithms/dreamerv3/**", @@ -2543,8 +2542,8 @@ py_test( name = "examples/envs/env_rendering_and_recording", srcs = ["examples/envs/env_rendering_and_recording.py"], tags = ["team:rllib", "exclusive", "examples"], - size = "small", - args = ["--enable-new-api-stack", "--env=CartPole-v1", "--stop-iters=3"] + size = "medium", + args = ["--enable-new-api-stack", "--env=CartPole-v1", "--stop-iters=2"] ) #@OldAPIStack diff --git a/rllib/algorithms/algorithm_config.py b/rllib/algorithms/algorithm_config.py index 8161ea0b89efa..6b7bd8cea053f 100644 --- a/rllib/algorithms/algorithm_config.py +++ b/rllib/algorithms/algorithm_config.py @@ -3562,7 +3562,7 @@ def is_atari(self) -> bool: # Not yet determined, try to figure this out. if self._is_atari is None: # Atari envs are usually specified via a string like "PongNoFrameskip-v4" - # or "ALE/Breakout-v5". + # or "ale_py:ALE/Breakout-v5". # We do NOT attempt to auto-detect Atari env for other specified types like # a callable, to avoid running heavy logics in validate(). # For these cases, users can explicitly set `environment(atari=True)`. diff --git a/rllib/algorithms/dreamerv3/README.md b/rllib/algorithms/dreamerv3/README.md index a92918273f64d..13a773bb02dd1 100644 --- a/rllib/algorithms/dreamerv3/README.md +++ b/rllib/algorithms/dreamerv3/README.md @@ -49,7 +49,7 @@ in combination with the following scripts and command lines in order to run RLli ### [Atari100k](../../tuned_examples/dreamerv3/atari_100k.py) ```shell $ cd ray/rllib/tuned_examples/dreamerv3/ -$ python atari_100k.py --env ALE/Pong-v5 +$ python atari_100k.py --env ale_py:ALE/Pong-v5 ``` ### [DeepMind Control Suite (vision)](../../tuned_examples/dreamerv3/dm_control_suite_vision.py) diff --git a/rllib/algorithms/dreamerv3/tests/test_dreamerv3.py b/rllib/algorithms/dreamerv3/tests/test_dreamerv3.py index 7fbb8fd55c2ac..87c46e2a2eaca 100644 --- a/rllib/algorithms/dreamerv3/tests/test_dreamerv3.py +++ b/rllib/algorithms/dreamerv3/tests/test_dreamerv3.py @@ -63,7 +63,7 @@ def test_dreamerv3_compilation(self): for env in [ "FrozenLake-v1", "CartPole-v1", - "ALE/MsPacman-v5", + "ale_py:ALE/MsPacman-v5", "Pendulum-v1", ]: print("Env={}".format(env)) diff --git a/rllib/algorithms/dreamerv3/utils/env_runner.py b/rllib/algorithms/dreamerv3/utils/env_runner.py index df725f39f4b22..19e906bdaaf9b 100644 --- a/rllib/algorithms/dreamerv3/utils/env_runner.py +++ b/rllib/algorithms/dreamerv3/utils/env_runner.py @@ -12,6 +12,7 @@ from typing import Collection, List, Optional, Tuple, Union import gymnasium as gym +from gymnasium.wrappers.vector import DictInfoToList import numpy as np import tree # pip install dm_tree @@ -75,7 +76,7 @@ def __init__( # Create the gym.vector.Env object. # Atari env. - if self.config.env.startswith("ALE/"): + if self.config.env.startswith("ale_py:ALE/"): # TODO (sven): This import currently causes a Tune test to fail. Either way, # we need to figure out how to properly setup the CI environment with # the correct versions of all gymnasium-related packages. @@ -114,17 +115,21 @@ def _entry_point(): gym.register("rllib-single-agent-env-v0", entry_point=_entry_point) - self.env = gym.vector.make( - "rllib-single-agent-env-v0", - num_envs=self.config.num_envs_per_env_runner, - asynchronous=self.config.remote_worker_envs, - wrappers=[ - partial(gym.wrappers.TimeLimit, max_episode_steps=108000), - partial(resize_v1, x_size=64, y_size=64), # resize to 64x64 - NormalizedImageEnv, - NoopResetEnv, - MaxAndSkipEnv, - ], + self.env = DictInfoToList( + gym.make_vec( + "rllib-single-agent-env-v0", + num_envs=self.config.num_envs_per_env_runner, + vectorization_mode=( + "async" if self.config.remote_worker_envs else "sync" + ), + wrappers=[ + partial(gym.wrappers.TimeLimit, max_episode_steps=108000), + partial(resize_v1, x_size=64, y_size=64), # resize to 64x64 + NormalizedImageEnv, + NoopResetEnv, + MaxAndSkipEnv, + ], + ) ) # DeepMind Control. elif self.config.env.startswith("DMC/"): @@ -139,12 +144,16 @@ def _entry_point(): parts[1], parts[2], from_pixels=from_pixels, channels_first=False ), ) - self.env = gym.vector.make( - "dmc_env-v0", - wrappers=[ActionClip], - num_envs=self.config.num_envs_per_env_runner, - asynchronous=self.config.remote_worker_envs, - **dict(self.config.env_config), + self.env = DictInfoToList( + gym.make_vec( + "dmc_env-v0", + wrappers=[ActionClip], + num_envs=self.config.num_envs_per_env_runner, + vectorization_mode=( + "async" if self.config.remote_worker_envs else "sync" + ), + **dict(self.config.env_config), + ) ) # All other envs (gym or `tune.register_env()`'d by the user). else: @@ -162,11 +171,15 @@ def _entry_point(): env_descriptor=self.config.env, ), ) - # Create the vectorized gymnasium env. - self.env = gym.vector.make( - "dreamerv3-custom-env-v0", - num_envs=self.config.num_envs_per_env_runner, - asynchronous=False, # self.config.remote_worker_envs, + # Wrap into `DictInfoToList` wrapper to get infos as lists. + self.env = DictInfoToList( + gym.make_vec( + "dreamerv3-custom-env-v0", + num_envs=self.config.num_envs_per_env_runner, + vectorization_mode=( + "async" if self.config.remote_worker_envs else "sync" + ), + ) ) self.num_envs = self.env.num_envs assert self.num_envs == self.config.num_envs_per_env_runner @@ -185,6 +198,8 @@ def _entry_point(): # TODO (sven): DreamerV3 is currently single-agent only. self.module = self.multi_rl_module_spec.build()[DEFAULT_MODULE_ID] + self._cached_to_module = None + self.metrics = MetricsLogger() self._device = None @@ -258,7 +273,7 @@ def sample( # Sample n timesteps. if num_timesteps is not None: - return self._sample_timesteps( + return self._sample( num_timesteps=num_timesteps, explore=explore, random_actions=random_actions, @@ -269,7 +284,7 @@ def sample( # `_sample_episodes` returns only one list (with completed episodes) # return empty list for incomplete ones. return ( - self._sample_episodes( + self._sample( num_episodes=num_episodes, explore=explore, random_actions=random_actions, @@ -277,18 +292,18 @@ def sample( [], ) - def _sample_timesteps( + def _sample( self, - num_timesteps: int, + *, + num_timesteps: Optional[int] = None, + num_episodes: Optional[int] = None, explore: bool = True, random_actions: bool = False, force_reset: bool = False, ) -> List[SingleAgentEpisode]: - """Helper method to run n timesteps. + """Helper method to sample n timesteps or m episodes.""" - See docstring of self.sample() for more details. - """ - done_episodes_to_return = [] + done_episodes_to_return: List[SingleAgentEpisode] = [] # Get initial states for all `batch_size_B` rows in the forward batch. initial_states = tree.map_structure( @@ -297,193 +312,151 @@ def _sample_timesteps( ) # Have to reset the env (on all vector sub-envs). - if force_reset or self._needs_initial_reset: - obs, _ = self.env.reset() + if force_reset or num_episodes is not None or self._needs_initial_reset: + episodes = self._episodes = [None for _ in range(self.num_envs)] + self._reset_envs(episodes, initial_states) + # We just reset the env. Don't have to force this again in the next + # call to `self._sample()`. self._needs_initial_reset = False - self._episodes = [SingleAgentEpisode() for _ in range(self.num_envs)] - # Set initial obs and states in the episodes. for i in range(self.num_envs): - self._episodes[i].add_env_reset(observation=obs[i]) self._states[i] = None - - # Don't reset existing envs; continue in already started episodes. else: - # Pick up stored observations and states from previous timesteps. - obs = np.stack([eps.observations[-1] for eps in self._episodes]) + episodes = self._episodes - # Loop through env for n timesteps. + # Loop through `num_timesteps` timesteps or `num_episodes` episodes. ts = 0 - while ts < num_timesteps: + eps = 0 + while ( + (ts < num_timesteps) if num_timesteps is not None else (eps < num_episodes) + ): # Act randomly. if random_actions: actions = self.env.action_space.sample() - # Compute an action using our RLModule. + # Compute an action using the RLModule. else: - is_first = np.zeros((self.num_envs,)) - for i, eps in enumerate(self._episodes): - if self._states[i] is None: - is_first[i] = 1.0 - self._states[i] = {k: s[i] for k, s in initial_states.items()} - to_module = { - Columns.STATE_IN: tree.map_structure( - lambda s: self.convert_to_tensor(s), batch(self._states) - ), - Columns.OBS: self.convert_to_tensor(obs), - "is_first": self.convert_to_tensor(is_first), - } - # Explore or not. + # Env-to-module connector (already cached). + to_module = self._cached_to_module + assert to_module is not None + self._cached_to_module = None + + # RLModule forward pass: Explore or not. if explore: - outs = self.module.forward_exploration(to_module) + to_env = self.module.forward_exploration(to_module) else: - outs = self.module.forward_inference(to_module) + to_env = self.module.forward_inference(to_module) # Model outputs one-hot actions (if discrete). Convert to int actions # as well. - actions = convert_to_numpy(outs[Columns.ACTIONS]) + actions = convert_to_numpy(to_env[Columns.ACTIONS]) if isinstance(self.env.single_action_space, gym.spaces.Discrete): actions = np.argmax(actions, axis=-1) - self._states = unbatch(convert_to_numpy(outs[Columns.STATE_OUT])) + self._states = unbatch(convert_to_numpy(to_env[Columns.STATE_OUT])) - obs, rewards, terminateds, truncateds, infos = self.env.step(actions) - ts += self.num_envs + observations, rewards, terminateds, truncateds, infos = self.env.step( + actions + ) - for i in range(self.num_envs): - # The last entry in self.observations[i] is already the reset - # obs of the new episode. - if terminateds[i] or truncateds[i]: - # Finish the episode with the actual terminal observation stored in - # the info dict. - self._episodes[i].add_env_step( - observation=infos["final_observation"][i], - action=actions[i], - reward=rewards[i], - terminated=terminateds[i], - truncated=truncateds[i], + call_on_episode_start = set() + for env_index in range(self.num_envs): + # Episode has no data in it yet -> Was just reset and needs to be called + # with its `add_env_reset()` method. + if not episodes[env_index].is_reset: + episodes[env_index].add_env_reset( + observation=observations[env_index], + infos=infos[env_index], ) - self._states[i] = None - done_episodes_to_return.append(self._episodes[i]) - # Create a new episode object. - self._episodes[i] = SingleAgentEpisode(observations=[obs[i]]) + call_on_episode_start.add(env_index) + self._states[env_index] = None + + # Call `add_env_step()` method on episode. else: - self._episodes[i].add_env_step( - observation=obs[i], - action=actions[i], - reward=rewards[i], + # Only increase ts when we actually stepped (not reset'd as a reset + # does not count as a timestep). + ts += 1 + episodes[env_index].add_env_step( + observation=observations[env_index], + action=actions[env_index], + reward=rewards[env_index], + infos=infos[env_index], + terminated=terminateds[env_index], + truncated=truncateds[env_index], ) - # Return done episodes ... - self._done_episodes_for_metrics.extend(done_episodes_to_return) - # ... and all ongoing episode chunks. Also, make sure, we return - # a copy and start new chunks so that callers of this function - # don't alter our ongoing and returned Episode objects. - ongoing_episodes = self._episodes - self._episodes = [eps.cut() for eps in self._episodes] - for eps in ongoing_episodes: - self._ongoing_episodes_for_metrics[eps.id_].append(eps) - - self._increase_sampled_metrics(ts) - - return done_episodes_to_return + ongoing_episodes - - def _sample_episodes( - self, - num_episodes: int, - explore: bool = True, - random_actions: bool = False, - ) -> List[SingleAgentEpisode]: - """Helper method to run n episodes. - - See docstring of `self.sample()` for more details. - """ - done_episodes_to_return = [] - - obs, _ = self.env.reset() - episodes = [SingleAgentEpisode() for _ in range(self.num_envs)] - - # Multiply states n times according to our vector env batch size (num_envs). - states = tree.map_structure( - lambda s: np.repeat(s, self.num_envs, axis=0), - convert_to_numpy(self.module.get_initial_state()), - ) - is_first = np.ones((self.num_envs,)) - - for i in range(self.num_envs): - episodes[i].add_env_reset(observation=obs[i]) - - eps = 0 - while eps < num_episodes: - if random_actions: - actions = self.env.action_space.sample() - else: - batch = { + # Cache results as we will do the RLModule forward pass only in the next + # `while`-iteration. + if self.module is not None: + is_first = np.zeros((self.num_envs,)) + for env_index, episode in enumerate(episodes): + if self._states[env_index] is None: + is_first[env_index] = 1.0 + self._states[env_index] = { + k: s[env_index] for k, s in initial_states.items() + } + self._cached_to_module = { Columns.STATE_IN: tree.map_structure( - lambda s: self.convert_to_tensor(s), states + lambda s: self.convert_to_tensor(s), batch(self._states) ), - Columns.OBS: self.convert_to_tensor(obs), + Columns.OBS: self.convert_to_tensor(observations), "is_first": self.convert_to_tensor(is_first), } - if explore: - outs = self.module.forward_exploration(batch) - else: - outs = self.module.forward_inference(batch) + for env_index in range(self.num_envs): + # Episode is not done. + if not episodes[env_index].is_done: + continue - actions = convert_to_numpy(outs[Columns.ACTIONS]) - if isinstance(self.env.single_action_space, gym.spaces.Discrete): - actions = np.argmax(actions, axis=-1) - states = convert_to_numpy(outs[Columns.STATE_OUT]) + eps += 1 - obs, rewards, terminateds, truncateds, infos = self.env.step(actions) + # Then finalize (numpy'ize) the episode. + done_episodes_to_return.append(episodes[env_index].finalize()) - for i in range(self.num_envs): - # The last entry in self.observations[i] is already the reset - # obs of the new episode. - if terminateds[i] or truncateds[i]: - eps += 1 - - episodes[i].add_env_step( - observation=infos["final_observation"][i], - action=actions[i], - reward=rewards[i], - terminated=terminateds[i], - truncated=truncateds[i], - ) - done_episodes_to_return.append(episodes[i]) - - # Also early-out if we reach the number of episodes within this - # for-loop. - if eps == num_episodes: - break - - # Reset h-states to the model's initial ones b/c we are starting a - # new episode. - for k, v in convert_to_numpy( - self.module.get_initial_state() - ).items(): - states[k][i] = v - is_first[i] = True - - episodes[i] = SingleAgentEpisode(observations=[obs[i]]) - else: - episodes[i].add_env_step( - observation=obs[i], - action=actions[i], - reward=rewards[i], - ) - is_first[i] = False + # Also early-out if we reach the number of episodes within this + # for-loop. + if eps == num_episodes: + break + + # Create a new episode object with no data in it and execute + # `on_episode_created` callback (before the `env.reset()` call). + episodes[env_index] = SingleAgentEpisode( + observation_space=self.env.single_observation_space, + action_space=self.env.single_action_space, + ) + # Return done episodes ... + # TODO (simon): Check, how much memory this attribute uses. self._done_episodes_for_metrics.extend(done_episodes_to_return) + # ... and all ongoing episode chunks. - # If user calls sample(num_timesteps=..) after this, we must reset again - # at the beginning. - self._needs_initial_reset = True + # Also, make sure we start new episode chunks (continuing the ongoing episodes + # from the to-be-returned chunks). + ongoing_episodes_to_return = [] + # Only if we are doing individual timesteps: We have to maybe cut an ongoing + # episode and continue building it on the next call to `sample()`. + if num_timesteps is not None: + ongoing_episodes_continuations = [ + episode.cut(len_lookback_buffer=self.config.episode_lookback_horizon) + for episode in episodes + ] + + for episode in episodes: + # Just started Episodes do not have to be returned. There is no data + # in them anyway. + if episode.t == 0: + continue + episode.validate() + self._ongoing_episodes_for_metrics[episode.id_].append(episode) + # Return finalized (numpy'ized) Episodes. + ongoing_episodes_to_return.append(episode.finalize()) + + # Continue collecting into the cut Episode chunks. + self._episodes = ongoing_episodes_continuations - ts = sum(map(len, done_episodes_to_return)) self._increase_sampled_metrics(ts) - return done_episodes_to_return + # Return collected episode data. + return done_episodes_to_return + ongoing_episodes_to_return def get_spaces(self): return { @@ -564,6 +537,51 @@ def stop(self): # Close our env object via gymnasium's API. self.env.close() + def _reset_envs(self, episodes, initial_states): + # Create n new episodes and make the `on_episode_created` callbacks. + for env_index in range(self.num_envs): + self._new_episode(env_index, episodes) + + # Erase all cached ongoing episodes (these will never be completed and + # would thus never be returned/cleaned by `get_metrics` and cause a memory + # leak). + self._ongoing_episodes_for_metrics.clear() + + observations, infos = self.env.reset() + observations = unbatch(observations) + + # Set initial obs and infos in the episodes. + for env_index in range(self.num_envs): + episodes[env_index].add_env_reset( + observation=observations[env_index], + infos=infos[env_index], + ) + + # Run the env-to-module connector to make sure the reset-obs/infos have + # properly been processed (if applicable). + self._cached_to_module = None + if self.module: + is_first = np.zeros((self.num_envs,)) + for i, eps in enumerate(self._episodes): + if self._states[i] is None: + is_first[i] = 1.0 + self._states[i] = {k: s[i] for k, s in initial_states.items()} + self._cached_to_module = { + Columns.STATE_IN: tree.map_structure( + lambda s: self.convert_to_tensor(s), batch(self._states) + ), + Columns.OBS: self.convert_to_tensor(observations), + "is_first": self.convert_to_tensor(is_first), + } + # self._cached_to_module = TODO!! + + def _new_episode(self, env_index, episodes=None): + episodes = episodes if episodes is not None else self._episodes + episodes[env_index] = SingleAgentEpisode( + observation_space=self.env.single_observation_space, + action_space=self.env.single_action_space, + ) + def _increase_sampled_metrics(self, num_steps): # Per sample cycle stats. self.metrics.log_value( diff --git a/rllib/algorithms/ppo/tests/test_ppo.py b/rllib/algorithms/ppo/tests/test_ppo.py index ae51de75389dc..3febf97fb2cad 100644 --- a/rllib/algorithms/ppo/tests/test_ppo.py +++ b/rllib/algorithms/ppo/tests/test_ppo.py @@ -98,7 +98,7 @@ def test_ppo_compilation_and_schedule_mixins(self): # "CliffWalking-v0", "CartPole-v1", "Pendulum-v1", - ]: # "ALE/Breakout-v5"]: + ]: # "ale_py:ALE/Breakout-v5"]: print("Env={}".format(env)) for lstm in [False]: print("LSTM={}".format(lstm)) diff --git a/rllib/algorithms/ppo/tests/test_ppo_old_api_stack.py b/rllib/algorithms/ppo/tests/test_ppo_old_api_stack.py index 24453758f6f07..edb2b3b3122e1 100644 --- a/rllib/algorithms/ppo/tests/test_ppo_old_api_stack.py +++ b/rllib/algorithms/ppo/tests/test_ppo_old_api_stack.py @@ -155,7 +155,7 @@ def test_ppo_compilation_w_connectors(self): num_iterations = 2 - for env in ["FrozenLake-v1", "ALE/MsPacman-v5"]: + for env in ["FrozenLake-v1", "ale_py:ALE/MsPacman-v5"]: print("Env={}".format(env)) for lstm in [False, True]: print("LSTM={}".format(lstm)) @@ -216,7 +216,7 @@ def test_ppo_compilation_and_schedule_mixins(self): num_iterations = 2 - for env in ["FrozenLake-v1", "ALE/MsPacman-v5"]: + for env in ["FrozenLake-v1", "ale_py:ALE/MsPacman-v5"]: print("Env={}".format(env)) for lstm in [False, True]: print("LSTM={}".format(lstm)) diff --git a/rllib/algorithms/ppo/tests/test_ppo_rl_module.py b/rllib/algorithms/ppo/tests/test_ppo_rl_module.py index de3d3f42f424b..2b1df1bf33e84 100644 --- a/rllib/algorithms/ppo/tests/test_ppo_rl_module.py +++ b/rllib/algorithms/ppo/tests/test_ppo_rl_module.py @@ -63,7 +63,7 @@ def tearDownClass(cls): def test_rollouts(self): # TODO: Add FrozenLake-v1 to cover LSTM case. - env_names = ["CartPole-v1", "Pendulum-v1", "ALE/Breakout-v5"] + env_names = ["CartPole-v1", "Pendulum-v1", "ale_py:ALE/Breakout-v5"] fwd_fns = ["forward_exploration", "forward_inference"] lstm = [True, False] config_combinations = [env_names, fwd_fns, lstm] @@ -98,7 +98,7 @@ def test_rollouts(self): def test_forward_train(self): # TODO: Add FrozenLake-v1 to cover LSTM case. - env_names = ["CartPole-v1", "Pendulum-v1", "ALE/Breakout-v5"] + env_names = ["CartPole-v1", "Pendulum-v1", "ale_py:ALE/Breakout-v5"] lstm = [False, True] config_combinations = [env_names, lstm] for config in itertools.product(*config_combinations): diff --git a/rllib/algorithms/tests/test_algorithm_config.py b/rllib/algorithms/tests/test_algorithm_config.py index 1d7a32e87a2ac..11d55a741be32 100644 --- a/rllib/algorithms/tests/test_algorithm_config.py +++ b/rllib/algorithms/tests/test_algorithm_config.py @@ -145,11 +145,11 @@ def test_rollout_fragment_length(self): def test_detect_atari_env(self): """Tests that we can properly detect Atari envs.""" config = AlgorithmConfig().environment( - env="ALE/Breakout-v5", env_config={"frameskip": 1} + env="ale_py:ALE/Breakout-v5", env_config={"frameskip": 1} ) self.assertTrue(config.is_atari) - config = AlgorithmConfig().environment(env="ALE/Pong-v5") + config = AlgorithmConfig().environment(env="ale_py:ALE/Pong-v5") self.assertTrue(config.is_atari) config = AlgorithmConfig().environment(env="CartPole-v1") @@ -158,7 +158,7 @@ def test_detect_atari_env(self): config = AlgorithmConfig().environment( env=lambda ctx: gym.make( - "ALE/Breakout-v5", + "ale_py:ALE/Breakout-v5", frameskip=1, ) ) diff --git a/rllib/algorithms/tests/test_callbacks_on_env_runner.py b/rllib/algorithms/tests/test_callbacks_on_env_runner.py index 42abf70918417..ae8443b5b811a 100644 --- a/rllib/algorithms/tests/test_callbacks_on_env_runner.py +++ b/rllib/algorithms/tests/test_callbacks_on_env_runner.py @@ -24,19 +24,19 @@ def on_environment_created(self, *args, env_runner, metrics_logger, env, **kwarg def on_episode_start(self, *args, env_runner, metrics_logger, env, **kwargs): assert isinstance(env_runner, EnvRunner) assert isinstance(metrics_logger, MetricsLogger) - assert isinstance(env, gym.Env) + assert isinstance(env, (gym.Env, gym.vector.VectorEnv)) self.counts.update({"start": 1}) def on_episode_step(self, *args, env_runner, metrics_logger, env, **kwargs): assert isinstance(env_runner, EnvRunner) assert isinstance(metrics_logger, MetricsLogger) - assert isinstance(env, gym.Env) + assert isinstance(env, (gym.Env, gym.vector.VectorEnv)) self.counts.update({"step": 1}) def on_episode_end(self, *args, env_runner, metrics_logger, env, **kwargs): assert isinstance(env_runner, EnvRunner) assert isinstance(metrics_logger, MetricsLogger) - assert isinstance(env, gym.Env) + assert isinstance(env, (gym.Env, gym.vector.VectorEnv)) self.counts.update({"end": 1}) def on_sample_end(self, *args, env_runner, metrics_logger, **kwargs): diff --git a/rllib/benchmarks/ppo/benchmark_atari_ppo.py b/rllib/benchmarks/ppo/benchmark_atari_ppo.py index 0b697ff4b9025..e434f2ac078fc 100644 --- a/rllib/benchmarks/ppo/benchmark_atari_ppo.py +++ b/rllib/benchmarks/ppo/benchmark_atari_ppo.py @@ -6,7 +6,7 @@ --num-gpus=4 --num-env-runners=95` In order to only run individual or lists of envs, you can provide a list of env-strings -under the `--env` arg, such as `--env ALE/Pong-v5,ALE/Breakout-v5`. +under the `--env` arg, such as `--env=ale_py:ALE/Pong-v5,ale_py:ALE/Breakout-v5`. For logging to your WandB account, use: `--wandb-key=[your WandB API key] --wandb-project=[some project name] @@ -34,60 +34,60 @@ # rainbow). # Note that for PPO, we simply run everything for 6M ts. benchmark_envs = { - "ALE/Alien-v5": (6022.9, 200000000), - "ALE/Amidar-v5": (202.8, 200000000), - "ALE/Assault-v5": (14491.7, 200000000), - "ALE/Asterix-v5": (280114.0, 200000000), - "ALE/Asteroids-v5": (2249.4, 200000000), - "ALE/Atlantis-v5": (814684.0, 200000000), - "ALE/BankHeist-v5": (826.0, 200000000), - "ALE/BattleZone-v5": (52040.0, 200000000), - "ALE/BeamRider-v5": (21768.5, 200000000), - "ALE/Berzerk-v5": (1793.4, 200000000), - "ALE/Bowling-v5": (39.4, 200000000), - "ALE/Boxing-v5": (54.9, 200000000), - "ALE/Breakout-v5": (379.5, 200000000), - "ALE/Centipede-v5": (7160.9, 200000000), - "ALE/ChopperCommand-v5": (10916.0, 200000000), - "ALE/CrazyClimber-v5": (143962.0, 200000000), - "ALE/Defender-v5": (47671.3, 200000000), - "ALE/DemonAttack-v5": (109670.7, 200000000), - "ALE/DoubleDunk-v5": (-0.6, 200000000), - "ALE/Enduro-v5": (2061.1, 200000000), - "ALE/FishingDerby-v5": (22.6, 200000000), - "ALE/Freeway-v5": (29.1, 200000000), - "ALE/Frostbite-v5": (4141.1, 200000000), - "ALE/Gopher-v5": (72595.7, 200000000), - "ALE/Gravitar-v5": (567.5, 200000000), - "ALE/Hero-v5": (50496.8, 200000000), - "ALE/IceHockey-v5": (-11685.8, 200000000), - "ALE/Kangaroo-v5": (10841.0, 200000000), - "ALE/Krull-v5": (6715.5, 200000000), - "ALE/KungFuMaster-v5": (28999.8, 200000000), - "ALE/MontezumaRevenge-v5": (154.0, 200000000), - "ALE/MsPacman-v5": (2570.2, 200000000), - "ALE/NameThisGame-v5": (11686.5, 200000000), - "ALE/Phoenix-v5": (103061.6, 200000000), - "ALE/Pitfall-v5": (-37.6, 200000000), - "ALE/Pong-v5": (19.0, 200000000), - "ALE/PrivateEye-v5": (1704.4, 200000000), - "ALE/Qbert-v5": (18397.6, 200000000), - "ALE/RoadRunner-v5": (54261.0, 200000000), - "ALE/Robotank-v5": (55.2, 200000000), - "ALE/Seaquest-v5": (19176.0, 200000000), - "ALE/Skiing-v5": (-11685.8, 200000000), - "ALE/Solaris-v5": (2860.7, 200000000), - "ALE/SpaceInvaders-v5": (12629.0, 200000000), - "ALE/StarGunner-v5": (123853.0, 200000000), - "ALE/Surround-v5": (7.0, 200000000), - "ALE/Tennis-v5": (-2.2, 200000000), - "ALE/TimePilot-v5": (11190.5, 200000000), - "ALE/Tutankham-v5": (126.9, 200000000), - "ALE/Venture-v5": (45.0, 200000000), - "ALE/VideoPinball-v5": (506817.2, 200000000), - "ALE/WizardOfWor-v5": (14631.5, 200000000), - "ALE/YarsRevenge-v5": (93007.9, 200000000), - "ALE/Zaxxon-v5": (19658.0, 200000000), + "ale_py:ALE/Alien-v5": (6022.9, 200000000), + "ale_py:ALE/Amidar-v5": (202.8, 200000000), + "ale_py:ALE/Assault-v5": (14491.7, 200000000), + "ale_py:ALE/Asterix-v5": (280114.0, 200000000), + "ale_py:ALE/Asteroids-v5": (2249.4, 200000000), + "ale_py:ALE/Atlantis-v5": (814684.0, 200000000), + "ale_py:ALE/BankHeist-v5": (826.0, 200000000), + "ale_py:ALE/BattleZone-v5": (52040.0, 200000000), + "ale_py:ALE/BeamRider-v5": (21768.5, 200000000), + "ale_py:ALE/Berzerk-v5": (1793.4, 200000000), + "ale_py:ALE/Bowling-v5": (39.4, 200000000), + "ale_py:ALE/Boxing-v5": (54.9, 200000000), + "ale_py:ALE/Breakout-v5": (379.5, 200000000), + "ale_py:ALE/Centipede-v5": (7160.9, 200000000), + "ale_py:ALE/ChopperCommand-v5": (10916.0, 200000000), + "ale_py:ALE/CrazyClimber-v5": (143962.0, 200000000), + "ale_py:ALE/Defender-v5": (47671.3, 200000000), + "ale_py:ALE/DemonAttack-v5": (109670.7, 200000000), + "ale_py:ALE/DoubleDunk-v5": (-0.6, 200000000), + "ale_py:ALE/Enduro-v5": (2061.1, 200000000), + "ale_py:ALE/FishingDerby-v5": (22.6, 200000000), + "ale_py:ALE/Freeway-v5": (29.1, 200000000), + "ale_py:ALE/Frostbite-v5": (4141.1, 200000000), + "ale_py:ALE/Gopher-v5": (72595.7, 200000000), + "ale_py:ALE/Gravitar-v5": (567.5, 200000000), + "ale_py:ALE/Hero-v5": (50496.8, 200000000), + "ale_py:ALE/IceHockey-v5": (-11685.8, 200000000), + "ale_py:ALE/Kangaroo-v5": (10841.0, 200000000), + "ale_py:ALE/Krull-v5": (6715.5, 200000000), + "ale_py:ALE/KungFuMaster-v5": (28999.8, 200000000), + "ale_py:ALE/MontezumaRevenge-v5": (154.0, 200000000), + "ale_py:ALE/MsPacman-v5": (2570.2, 200000000), + "ale_py:ALE/NameThisGame-v5": (11686.5, 200000000), + "ale_py:ALE/Phoenix-v5": (103061.6, 200000000), + "ale_py:ALE/Pitfall-v5": (-37.6, 200000000), + "ale_py:ALE/Pong-v5": (19.0, 200000000), + "ale_py:ALE/PrivateEye-v5": (1704.4, 200000000), + "ale_py:ALE/Qbert-v5": (18397.6, 200000000), + "ale_py:ALE/RoadRunner-v5": (54261.0, 200000000), + "ale_py:ALE/Robotank-v5": (55.2, 200000000), + "ale_py:ALE/Seaquest-v5": (19176.0, 200000000), + "ale_py:ALE/Skiing-v5": (-11685.8, 200000000), + "ale_py:ALE/Solaris-v5": (2860.7, 200000000), + "ale_py:ALE/SpaceInvaders-v5": (12629.0, 200000000), + "ale_py:ALE/StarGunner-v5": (123853.0, 200000000), + "ale_py:ALE/Surround-v5": (7.0, 200000000), + "ale_py:ALE/Tennis-v5": (-2.2, 200000000), + "ale_py:ALE/TimePilot-v5": (11190.5, 200000000), + "ale_py:ALE/Tutankham-v5": (126.9, 200000000), + "ale_py:ALE/Venture-v5": (45.0, 200000000), + "ale_py:ALE/VideoPinball-v5": (506817.2, 200000000), + "ale_py:ALE/WizardOfWor-v5": (14631.5, 200000000), + "ale_py:ALE/YarsRevenge-v5": (93007.9, 200000000), + "ale_py:ALE/Zaxxon-v5": (19658.0, 200000000), } diff --git a/rllib/benchmarks/torch_compile/run_inference_bm.py b/rllib/benchmarks/torch_compile/run_inference_bm.py index a92e49b9cb504..e15b87be5965b 100644 --- a/rllib/benchmarks/torch_compile/run_inference_bm.py +++ b/rllib/benchmarks/torch_compile/run_inference_bm.py @@ -92,7 +92,7 @@ def main(pargs): json.dump(config, f) # Create the environment. - env = wrap_atari_for_new_api_stack(gym.make("ALE/Breakout-v5")) + env = wrap_atari_for_new_api_stack(gym.make("ale_py:ALE/Breakout-v5")) # setup RLModule model_cfg = MODEL_DEFAULTS.copy() diff --git a/rllib/benchmarks/torch_compile/run_ppo_with_inference_bm.py b/rllib/benchmarks/torch_compile/run_ppo_with_inference_bm.py index fa046b05285da..23c0cba796766 100644 --- a/rllib/benchmarks/torch_compile/run_ppo_with_inference_bm.py +++ b/rllib/benchmarks/torch_compile/run_ppo_with_inference_bm.py @@ -29,7 +29,7 @@ def main(pargs): config = ( PPOConfig() .environment( - "ALE/Breakout-v5", + "ale_py:ALE/Breakout-v5", clip_rewards=True, env_config={ "frameskip": 1, diff --git a/rllib/env/multi_agent_env_runner.py b/rllib/env/multi_agent_env_runner.py index 8cc4c6e4e2df1..03b8105fbedb4 100644 --- a/rllib/env/multi_agent_env_runner.py +++ b/rllib/env/multi_agent_env_runner.py @@ -90,7 +90,9 @@ def __init__(self, config: AlgorithmConfig, **kwargs): self.make_env() # Create the env-to-module connector pipeline. - self._env_to_module = self.config.build_env_to_module_connector(self.env) + self._env_to_module = self.config.build_env_to_module_connector( + self.env.unwrapped + ) # Cached env-to-module results taken at the end of a `_sample_timesteps()` # call to make sure the final observation (before an episode cut) gets properly # processed (and maybe postprocessed and re-stored into the episode). @@ -104,7 +106,7 @@ def __init__(self, config: AlgorithmConfig, **kwargs): # Construct the MultiRLModule. try: module_spec: MultiRLModuleSpec = self.config.get_multi_rl_module_spec( - env=self.env, spaces=self.get_spaces(), inference_only=True + env=self.env.unwrapped, spaces=self.get_spaces(), inference_only=True ) # Build the module from its spec. self.module = module_spec.build() @@ -114,7 +116,9 @@ def __init__(self, config: AlgorithmConfig, **kwargs): self.module = None # Create the two connector pipelines: env-to-module and module-to-env. - self._module_to_env = self.config.build_module_to_env_connector(self.env) + self._module_to_env = self.config.build_module_to_env_connector( + self.env.unwrapped + ) self._needs_initial_reset: bool = True self._episode: Optional[MultiAgentEpisode] = None @@ -259,7 +263,7 @@ def _sample_timesteps( to_env = { Columns.ACTIONS: [ { - aid: self.env.get_action_space(aid).sample() + aid: self.env.unwrapped.get_action_space(aid).sample() for aid in self._episode.get_agents_to_act() } ] @@ -461,7 +465,7 @@ def _sample_episodes( to_env = { Columns.ACTIONS: [ { - aid: self.env.get_action_space(aid).sample() + aid: self.env.unwrapped.get_action_space(aid).sample() for aid in self._episode.get_agents_to_act() } ] @@ -869,7 +873,7 @@ def make_env(self): self._callbacks.on_environment_created( env_runner=self, metrics_logger=self.metrics, - env=self.env, + env=self.env.unwrapped, env_context=env_ctx, ) @@ -889,11 +893,12 @@ def _setup_metrics(self): def _new_episode(self): return MultiAgentEpisode( observation_space={ - aid: self.env.get_observation_space(aid) - for aid in self.env.possible_agents + aid: self.env.unwrapped.get_observation_space(aid) + for aid in self.env.unwrapped.possible_agents }, action_space={ - aid: self.env.get_action_space(aid) for aid in self.env.possible_agents + aid: self.env.unwrapped.get_action_space(aid) + for aid in self.env.unwrapped.possible_agents }, agent_to_module_mapping_fn=self.config.policy_mapping_fn, ) @@ -904,7 +909,7 @@ def _make_on_episode_callback(self, which: str, episode=None): episode=episode, env_runner=self, metrics_logger=self.metrics, - env=self.env, + env=self.env.unwrapped, rl_module=self.module, env_index=0, ) diff --git a/rllib/env/single_agent_env_runner.py b/rllib/env/single_agent_env_runner.py index 967d4ec174b3b..14bf1fd635b8e 100644 --- a/rllib/env/single_agent_env_runner.py +++ b/rllib/env/single_agent_env_runner.py @@ -1,10 +1,12 @@ -import time from collections import defaultdict from functools import partial import logging +import time from typing import Collection, DefaultDict, List, Optional, Union import gymnasium as gym +from gymnasium.wrappers.vector import DictInfoToList +from gymnasium.envs.registration import VectorizeMode from ray.rllib.algorithms.algorithm_config import AlgorithmConfig from ray.rllib.algorithms.callbacks import DefaultCallbacks @@ -81,7 +83,7 @@ def __init__(self, config: AlgorithmConfig, **kwargs): self._callbacks: DefaultCallbacks = self.config.callbacks_class() # Create the vectorized gymnasium env. - self.env: Optional[gym.Wrapper] = None + self.env: Optional[gym.vector.VectorEnvWrapper] = None self.num_envs: int = 0 self.make_env() @@ -100,7 +102,7 @@ def __init__(self, config: AlgorithmConfig, **kwargs): # Create the RLModule. try: module_spec: RLModuleSpec = self.config.get_rl_module_spec( - env=self.env, spaces=self.get_spaces(), inference_only=True + env=self.env.unwrapped, spaces=self.get_spaces(), inference_only=True ) # Build the module from its spec. self.module = module_spec.build() @@ -186,7 +188,7 @@ def sample( # Sample n timesteps. if num_timesteps is not None: - samples = self._sample_timesteps( + samples = self._sample( num_timesteps=num_timesteps, explore=explore, random_actions=random_actions, @@ -194,19 +196,16 @@ def sample( ) # Sample m episodes. elif num_episodes is not None: - samples = self._sample_episodes( + samples = self._sample( num_episodes=num_episodes, explore=explore, random_actions=random_actions, ) - # For complete episodes mode, sample a single episode and - # leave coordination of sampling to `synchronous_parallel_sample`. - # TODO (simon, sven): The coordination will eventually move - # to `EnvRunnerGroup` in the future. So from the algorithm one - # would do `EnvRunnerGroup.sample()`. + # For complete episodes mode, sample as long as the number of timesteps + # done is smaller than the `train_batch_size`. else: - samples = self._sample_episodes( - num_episodes=1, + samples = self._sample( + num_episodes=self.num_envs, explore=explore, random_actions=random_actions, ) @@ -222,57 +221,40 @@ def sample( return samples - def _sample_timesteps( + def _sample( self, - num_timesteps: int, + *, + num_timesteps: Optional[int] = None, + num_episodes: Optional[int] = None, explore: bool, random_actions: bool = False, force_reset: bool = False, ) -> List[SingleAgentEpisode]: - """Helper method to sample n timesteps.""" + """Helper method to sample n timesteps or m episodes.""" done_episodes_to_return: List[SingleAgentEpisode] = [] # Have to reset the env (on all vector sub_envs). - if force_reset or self._needs_initial_reset: - # Create n new episodes. - # TODO (sven): Add callback `on_episode_created` as soon as - # `gymnasium-v1.0.0a2` PR is coming. - self._episodes = [] - for env_index in range(self.num_envs): - self._episodes.append(self._new_episode()) - self._shared_data = {} - - # Erase all cached ongoing episodes (these will never be completed and - # would thus never be returned/cleaned by `get_metrics` and cause a memory - # leak). - self._ongoing_episodes_for_metrics.clear() - - # Try resetting the environment. - # TODO (simon): Check, if we need here the seed from the config. - obs, infos = self._try_env_reset() - obs = unbatch(obs) - self._cached_to_module = None - - # Call `on_episode_start()` callbacks. - for env_index in range(self.num_envs): - self._make_on_episode_callback("on_episode_start", env_index) - + if force_reset or num_episodes is not None or self._needs_initial_reset: + episodes = self._episodes = [None for _ in range(self.num_envs)] + shared_data = self._shared_data = {} + self._reset_envs(episodes, shared_data, explore) # We just reset the env. Don't have to force this again in the next # call to `self._sample_timesteps()`. self._needs_initial_reset = False + else: + episodes = self._episodes + shared_data = self._shared_data - # Set initial obs and infos in the episodes. - for env_index in range(self.num_envs): - self._episodes[env_index].add_env_reset( - observation=obs[env_index], - infos=infos[env_index], - ) + if num_episodes is not None: + self._needs_initial_reset = True - # Loop through timesteps. + # Loop through `num_timesteps` timesteps or `num_episodes` episodes. ts = 0 - - while ts < num_timesteps: + eps = 0 + while ( + (ts < num_timesteps) if num_timesteps is not None else (eps < num_episodes) + ): # Act randomly. if random_actions: to_env = { @@ -280,13 +262,9 @@ def _sample_timesteps( } # Compute an action using the RLModule. else: - # Env-to-module connector. - to_module = self._cached_to_module or self._env_to_module( - rl_module=self.module, - episodes=self._episodes, - explore=explore, - shared_data=self._shared_data, - ) + # Env-to-module connector (already cached). + to_module = self._cached_to_module + assert to_module is not None self._cached_to_module = None # RLModule forward pass: Explore or not. @@ -305,9 +283,9 @@ def _sample_timesteps( to_env = self._module_to_env( rl_module=self.module, batch=to_env, - episodes=self._episodes, + episodes=episodes, explore=explore, - shared_data=self._shared_data, + shared_data=shared_data, ) # Extract the (vectorized) actions (to be sent to the env) from the @@ -320,264 +298,78 @@ def _sample_timesteps( # Try stepping the environment. results = self._try_env_step(actions_for_env) if results == ENV_STEP_FAILURE: - return self._sample_timesteps( + return self._sample( num_timesteps=num_timesteps, + num_episodes=num_episodes, explore=explore, random_actions=random_actions, force_reset=True, ) - obs, rewards, terminateds, truncateds, infos = results - obs, actions = unbatch(obs), unbatch(actions) - - ts += self.num_envs + observations, rewards, terminateds, truncateds, infos = results + observations, actions = unbatch(observations), unbatch(actions) + call_on_episode_start = set() for env_index in range(self.num_envs): - # TODO (simon): This might be unfortunate if a user needs to set a - # certain env parameter during different episodes (for example for - # benchmarking). extra_model_output = {k: v[env_index] for k, v in to_env.items()} extra_model_output[WEIGHTS_SEQ_NO] = self._weights_seq_no - # In inference, we have only the action logits. - if terminateds[env_index] or truncateds[env_index]: - # Finish the episode with the actual terminal observation stored in - # the info dict. - self._episodes[env_index].add_env_step( - # Gym vector env provides the `"final_observation"`. - # Pop these out of the infos dict so this information doesn't - # appear in the next episode as well (at index=0). - infos[env_index].pop("final_observation"), - actions[env_index], - rewards[env_index], - infos=infos[env_index].pop("final_info"), - terminated=terminateds[env_index], - truncated=truncateds[env_index], - extra_model_outputs=extra_model_output, - ) - # Make the `on_episode_step` and `on_episode_end` callbacks (before - # finalizing the episode object). - self._make_on_episode_callback("on_episode_step", env_index) - - # We have to perform an extra env-to-module pass here, just in case - # the user's connector pipeline performs (permanent) transforms - # on each observation (including this final one here). Without such - # a call and in case the structure of the observations change - # sufficiently, the following `finalize()` call on the episode will - # fail. - if self.module is not None: - self._env_to_module( - episodes=[self._episodes[env_index]], - explore=explore, - rl_module=self.module, - shared_data=self._shared_data, - ) - - self._make_on_episode_callback("on_episode_end", env_index) - - # Then finalize (numpy'ize) the episode. - done_episodes_to_return.append(self._episodes[env_index].finalize()) - - # Create a new episode object with already the reset data in it. - self._episodes[env_index] = SingleAgentEpisode( - observations=[obs[env_index]], - infos=[infos[env_index]], - observation_space=self.env.single_observation_space, - action_space=self.env.single_action_space, + # Episode has no data in it yet -> Was just reset and needs to be called + # with its `add_env_reset()` method. + if not self._episodes[env_index].is_reset: + episodes[env_index].add_env_reset( + observation=observations[env_index], + infos=infos[env_index], ) + call_on_episode_start.add(env_index) - # Make the `on_episode_start` callback. - self._make_on_episode_callback("on_episode_start", env_index) - + # Call `add_env_step()` method on episode. else: - self._episodes[env_index].add_env_step( - obs[env_index], - actions[env_index], - rewards[env_index], + # Only increase ts when we actually stepped (not reset'd as a reset + # does not count as a timestep). + ts += 1 + episodes[env_index].add_env_step( + observation=observations[env_index], + action=actions[env_index], + reward=rewards[env_index], infos=infos[env_index], + terminated=terminateds[env_index], + truncated=truncateds[env_index], extra_model_outputs=extra_model_output, ) - # Make the `on_episode_step` callback. - self._make_on_episode_callback("on_episode_step", env_index) - - # Already perform env-to-module connector call for next call to - # `_sample_timesteps()`. See comment in c'tor for `self._cached_to_module`. - if self.module is not None: - self._cached_to_module = self._env_to_module( - rl_module=self.module, - episodes=self._episodes, - explore=explore, - shared_data=self._shared_data, - ) - - # Return done episodes ... - # TODO (simon): Check, how much memory this attribute uses. - self._done_episodes_for_metrics.extend(done_episodes_to_return) - # ... and all ongoing episode chunks. - - # Also, make sure we start new episode chunks (continuing the ongoing episodes - # from the to-be-returned chunks). - ongoing_episodes_continuations = [ - eps.cut(len_lookback_buffer=self.config.episode_lookback_horizon) - for eps in self._episodes - ] - - ongoing_episodes_to_return = [] - for eps in self._episodes: - # Just started Episodes do not have to be returned. There is no data - # in them anyway. - if eps.t == 0: - continue - eps.validate() - self._ongoing_episodes_for_metrics[eps.id_].append(eps) - # Return finalized (numpy'ized) Episodes. - ongoing_episodes_to_return.append(eps.finalize()) - - # Continue collecting into the cut Episode chunks. - self._episodes = ongoing_episodes_continuations - - self._increase_sampled_metrics(ts) - - # Return collected episode data. - return done_episodes_to_return + ongoing_episodes_to_return - - def _sample_episodes( - self, - num_episodes: int, - explore: bool, - random_actions: bool = False, - ) -> List[SingleAgentEpisode]: - """Helper method to run n episodes. - - See docstring of `self.sample()` for more details. - """ - # If user calls sample(num_timesteps=..) after this, we must reset again - # at the beginning. - self._needs_initial_reset = True - - done_episodes_to_return: List[SingleAgentEpisode] = [] - - episodes = [] - for env_index in range(self.num_envs): - episodes.append(self._new_episode()) - # TODO (sven): Add callback `on_episode_created` as soon as - # `gymnasium-v1.0.0a2` PR is coming. - _shared_data = {} - - # Try resetting the environment. - # TODO (simon): Check, if we need here the seed from the config. - obs, infos = self._try_env_reset() - for env_index in range(self.num_envs): - episodes[env_index].add_env_reset( - observation=unbatch(obs)[env_index], - infos=infos[env_index], - ) - self._make_on_episode_callback("on_episode_start", env_index, episodes) - - # Loop over episodes. - eps = 0 - ts = 0 - while eps < num_episodes: - # Act randomly. - if random_actions: - to_env = { - Columns.ACTIONS: self.env.action_space.sample(), - } - # Compute an action using the RLModule. - else: - # Env-to-module connector. - to_module = self._env_to_module( - rl_module=self.module, + # Env-to-module connector pass (cache results as we will do the RLModule + # forward pass only in the next `while`-iteration. + if self.module is not None: + self._cached_to_module = self._env_to_module( episodes=episodes, explore=explore, - shared_data=_shared_data, - ) - - # RLModule forward pass: Explore or not. - if explore: - env_steps_lifetime = ( - self.metrics.peek(NUM_ENV_STEPS_SAMPLED_LIFETIME, default=0) - + ts - ) - to_env = self.module.forward_exploration( - to_module, t=env_steps_lifetime - ) - else: - to_env = self.module.forward_inference(to_module) - - # Module-to-env connector. - to_env = self._module_to_env( rl_module=self.module, - batch=to_env, - episodes=episodes, - explore=explore, - shared_data=_shared_data, + shared_data=shared_data, ) - # Extract the (vectorized) actions (to be sent to the env) from the - # module/connector output. Note that these actions are fully ready (e.g. - # already unsquashed/clipped) to be sent to the environment) and might not - # be identical to the actions produced by the RLModule/distribution, which - # are the ones stored permanently in the episode objects. - actions = to_env.pop(Columns.ACTIONS) - actions_for_env = to_env.pop(Columns.ACTIONS_FOR_ENV, actions) - # Try stepping the environment. - results = self._try_env_step(actions_for_env) - if results == ENV_STEP_FAILURE: - return self._sample_episodes( - num_episodes=num_episodes, - explore=explore, - random_actions=random_actions, - ) - obs, rewards, terminateds, truncateds, infos = results - obs, actions = unbatch(obs), unbatch(actions) - ts += self.num_envs - for env_index in range(self.num_envs): - extra_model_output = {k: v[env_index] for k, v in to_env.items()} - extra_model_output[WEIGHTS_SEQ_NO] = self._weights_seq_no - - if terminateds[env_index] or truncateds[env_index]: - eps += 1 - - episodes[env_index].add_env_step( - infos[env_index].pop("final_observation"), - actions[env_index], - rewards[env_index], - infos=infos[env_index].pop("final_info"), - terminated=terminateds[env_index], - truncated=truncateds[env_index], - extra_model_outputs=extra_model_output, + # Call `on_episode_start()` callback (always after reset). + if env_index in call_on_episode_start: + self._make_on_episode_callback( + "on_episode_start", env_index, episodes ) - # Make `on_episode_step` and `on_episode_end` callbacks before - # finalizing the episode. + # Make the `on_episode_step` callbacks. + else: self._make_on_episode_callback( "on_episode_step", env_index, episodes ) - # We have to perform an extra env-to-module pass here, just in case - # the user's connector pipeline performs (permanent) transforms - # on each observation (including this final one here). Without such - # a call and in case the structure of the observations change - # sufficiently, the following `finalize()` call on the episode will - # fail. - if self.module is not None: - self._env_to_module( - episodes=[episodes[env_index]], - explore=explore, - rl_module=self.module, - shared_data=_shared_data, - ) - - # Make the `on_episode_end` callback (before finalizing the episode, - # but after(!) the last env-to-module connector call has been made. - # -> All obs (even the terminal one) should have been processed now - # (by the connector, if applicable). + # Episode is done. + if episodes[env_index].is_done: + eps += 1 + + # Make the `on_episode_end` callbacks (before finalizing the episode + # object). self._make_on_episode_callback( "on_episode_end", env_index, episodes ) - # Finalize (numpy'ize) the episode. + # Then finalize (numpy'ize) the episode. done_episodes_to_return.append(episodes[env_index].finalize()) # Also early-out if we reach the number of episodes within this @@ -585,38 +377,46 @@ def _sample_episodes( if eps == num_episodes: break - # Create a new episode object. + # Create a new episode object with no data in it and execute + # `on_episode_created` callback (before the `env.reset()` call). episodes[env_index] = SingleAgentEpisode( - observations=[obs[env_index]], - infos=[infos[env_index]], observation_space=self.env.single_observation_space, action_space=self.env.single_action_space, ) - # Make `on_episode_start` callback. - self._make_on_episode_callback( - "on_episode_start", env_index, episodes - ) - else: - episodes[env_index].add_env_step( - obs[env_index], - actions[env_index], - rewards[env_index], - infos=infos[env_index], - extra_model_outputs=extra_model_output, - ) - # Make `on_episode_step` callback. - self._make_on_episode_callback( - "on_episode_step", env_index, episodes - ) + # Return done episodes ... + # TODO (simon): Check, how much memory this attribute uses. self._done_episodes_for_metrics.extend(done_episodes_to_return) + # ... and all ongoing episode chunks. - # Initialized episodes have to be removed as they lack `extra_model_outputs`. - samples = [episode for episode in done_episodes_to_return if episode.t > 0] + # Also, make sure we start new episode chunks (continuing the ongoing episodes + # from the to-be-returned chunks). + ongoing_episodes_to_return = [] + # Only if we are doing individual timesteps: We have to maybe cut an ongoing + # episode and continue building it on the next call to `sample()`. + if num_timesteps is not None: + ongoing_episodes_continuations = [ + eps.cut(len_lookback_buffer=self.config.episode_lookback_horizon) + for eps in self._episodes + ] + + for eps in self._episodes: + # Just started Episodes do not have to be returned. There is no data + # in them anyway. + if eps.t == 0: + continue + eps.validate() + self._ongoing_episodes_for_metrics[eps.id_].append(eps) + # Return finalized (numpy'ized) Episodes. + ongoing_episodes_to_return.append(eps.finalize()) + + # Continue collecting into the cut Episode chunks. + self._episodes = ongoing_episodes_continuations self._increase_sampled_metrics(ts) - return samples + # Return collected episode data. + return done_episodes_to_return + ongoing_episodes_to_return @override(EnvRunner) def get_spaces(self): @@ -820,12 +620,15 @@ def make_env(self) -> None: ) gym.register("rllib-single-agent-env-v0", entry_point=entry_point) - # Wrap into `VectorListInfo`` wrapper to get infos as lists. - self.env: gym.Wrapper = gym.wrappers.VectorListInfo( - gym.vector.make( + self.env = DictInfoToList( + gym.make_vec( "rllib-single-agent-env-v0", num_envs=self.config.num_envs_per_env_runner, - asynchronous=self.config.remote_worker_envs, + vectorization_mode=( + VectorizeMode.ASYNC + if self.config.remote_worker_envs + else VectorizeMode.SYNC + ), ) ) @@ -839,7 +642,7 @@ def make_env(self) -> None: self._callbacks.on_environment_created( env_runner=self, metrics_logger=self.metrics, - env=self.env, + env=self.env.unwrapped, env_context=env_ctx, ) @@ -848,19 +651,57 @@ def stop(self): # Close our env object via gymnasium's API. self.env.close() - def _new_episode(self): - return SingleAgentEpisode( + def _reset_envs(self, episodes, shared_data, explore): + # Create n new episodes and make the `on_episode_created` callbacks. + for env_index in range(self.num_envs): + self._new_episode(env_index, episodes) + + # Erase all cached ongoing episodes (these will never be completed and + # would thus never be returned/cleaned by `get_metrics` and cause a memory + # leak). + self._ongoing_episodes_for_metrics.clear() + + # Try resetting the environment. + # TODO (simon): Check, if we need here the seed from the config. + observations, infos = self._try_env_reset() + observations = unbatch(observations) + + # Set initial obs and infos in the episodes. + for env_index in range(self.num_envs): + episodes[env_index].add_env_reset( + observation=observations[env_index], + infos=infos[env_index], + ) + + # Run the env-to-module connector to make sure the reset-obs/infos have + # properly been processed (if applicable). + self._cached_to_module = None + if self.module: + self._cached_to_module = self._env_to_module( + rl_module=self.module, + episodes=episodes, + explore=explore, + shared_data=shared_data, + ) + + # Call `on_episode_start()` callbacks (always after reset). + for env_index in range(self.num_envs): + self._make_on_episode_callback("on_episode_start", env_index, episodes) + + def _new_episode(self, env_index, episodes=None): + episodes = episodes if episodes is not None else self._episodes + episodes[env_index] = SingleAgentEpisode( observation_space=self.env.single_observation_space, action_space=self.env.single_action_space, ) + self._make_on_episode_callback("on_episode_created", env_index, episodes) - def _make_on_episode_callback(self, which: str, idx: int, episodes=None): - episodes = episodes if episodes is not None else self._episodes + def _make_on_episode_callback(self, which: str, idx: int, episodes): getattr(self._callbacks, which)( episode=episodes[idx], env_runner=self, metrics_logger=self.metrics, - env=self.env, + env=self.env.unwrapped, rl_module=self.module, env_index=idx, ) diff --git a/rllib/env/single_agent_episode.py b/rllib/env/single_agent_episode.py index dd4f480394705..b11cdd6783746 100644 --- a/rllib/env/single_agent_episode.py +++ b/rllib/env/single_agent_episode.py @@ -362,6 +362,7 @@ def add_env_reset( observation: The initial observation returned by `env.reset()`. infos: An (optional) info dict returned by `env.reset()`. """ + assert not self.is_reset assert not self.is_done assert len(self.observations) == 0 # Assume that this episode is completely empty and has not stepped yet. @@ -485,6 +486,11 @@ def validate(self) -> None: for k, v in self.extra_model_outputs.items(): assert len(v) == len(self.observations) - 1 + @property + def is_reset(self) -> bool: + """Returns True if `self.add_env_reset()` has already been called.""" + return len(self.observations) > 0 + @property def is_finalized(self) -> bool: """True, if the data in this episode is already stored as numpy arrays.""" diff --git a/rllib/env/tests/test_single_agent_env_runner.py b/rllib/env/tests/test_single_agent_env_runner.py index d6dbf7082985c..4d5f8808aa84c 100644 --- a/rllib/env/tests/test_single_agent_env_runner.py +++ b/rllib/env/tests/test_single_agent_env_runner.py @@ -9,6 +9,7 @@ from ray.rllib.env.single_agent_env_runner import SingleAgentEnvRunner from ray.rllib.env.utils import _gym_env_creator from ray.rllib.examples.envs.classes.simple_corridor import SimpleCorridor +from ray.rllib.utils.test_utils import check class TestSingleAgentEnvRunner(unittest.TestCase): @@ -53,7 +54,7 @@ def test_sample(self): # Sample 10 episodes (5 per env) 100 times. for _ in range(100): episodes = env_runner.sample(num_episodes=10, random_actions=True) - self.assertTrue(len(episodes) == 10) + check(len(episodes), 10) # Since we sampled complete episodes, there should be no ongoing episodes # being returned. self.assertTrue(all(e.is_done for e in episodes)) @@ -61,20 +62,22 @@ def test_sample(self): # Sample 10 timesteps (5 per env) 100 times. for _ in range(100): episodes = env_runner.sample(num_timesteps=10, random_actions=True) - # Check, whether the sum of lengths of all episodes returned is 20 - self.assertTrue(sum(len(e) for e in episodes) == 10) + # Check the sum of lengths of all episodes returned. + sum_ = sum(map(len, episodes)) + self.assertTrue(sum_ in [10, 11]) # Sample (by default setting: rollout_fragment_length=64) 10 times. for _ in range(100): episodes = env_runner.sample(random_actions=True) # Check, whether the sum of lengths of all episodes returned is 128 # 2 (num_env_per_worker) * 64 (rollout_fragment_length). - self.assertTrue(sum(len(e) for e in episodes) == 128) + sum_ = sum(map(len, episodes)) + self.assertTrue(sum_ in [128, 129]) def test_async_vector_env(self): """Tests, whether SingleAgentGymEnvRunner can run with vector envs.""" - for env in ["TestEnv-v0", "CartPole-v1", SimpleCorridor, "tune-registered"]: + for env in ["CartPole-v1", SimpleCorridor, "tune-registered"]: config = ( AlgorithmConfig().environment(env) # Vectorize x5 and by default, rollout 64 timesteps per individual env. @@ -110,7 +113,7 @@ def test_distributed_env_runner(self): for env_spec in ["tune-registered", "CartPole-v1", SimpleCorridor]: config = ( AlgorithmConfig().environment(env_spec) - # Vectorize x5 and by default, rollout 64 timesteps per individual + # Vectorize x5 and by default, rollout 10 timesteps per individual # env. .env_runners( num_env_runners=5, @@ -129,9 +132,14 @@ def test_distributed_env_runner(self): # Loop over individual EnvRunner Actor's results and inspect each. for episodes in results: # Assert length of all fragments is `rollout_fragment_length`. - self.assertEqual( + self.assertIn( sum(len(e) for e in episodes), - config.num_envs_per_env_runner * config.rollout_fragment_length, + [ + config.num_envs_per_env_runner + * config.rollout_fragment_length + + i + for i in range(config.num_envs_per_env_runner) + ], ) diff --git a/rllib/env/utils/__init__.py b/rllib/env/utils/__init__.py index 67dc49efd76b3..09dfbe227e5a6 100644 --- a/rllib/env/utils/__init__.py +++ b/rllib/env/utils/__init__.py @@ -103,6 +103,13 @@ def _gym_env_creator( except (AttributeError, ModuleNotFoundError, ImportError): pass + # If env descriptor is a str, starting with "ale_py:ALE/", for now, register all ALE + # envs from ale_py. + if isinstance(env_descriptor, str) and env_descriptor.startswith("ale_py:ALE/"): + import ale_py + + gym.register_envs(ale_py) + # Try creating a gym env. If this fails we can output a # decent error message. try: diff --git a/rllib/env/wrappers/atari_wrappers.py b/rllib/env/wrappers/atari_wrappers.py index 2edefd58208b3..3bb0f3ff77196 100644 --- a/rllib/env/wrappers/atari_wrappers.py +++ b/rllib/env/wrappers/atari_wrappers.py @@ -13,7 +13,8 @@ def is_atari(env: Union[gym.Env, str]) -> bool: """Returns, whether a given env object or env descriptor (str) is an Atari env. Args: - env: The gym.Env object or a string descriptor of the env (e.g. "ALE/Pong-v5"). + env: The gym.Env object or a string descriptor of the env (for example, + "ale_py:ALE/Pong-v5"). Returns: Whether `env` is an Atari environment. @@ -28,9 +29,9 @@ def is_atari(env: Union[gym.Env, str]) -> bool: ): return False return "AtariEnv None: - """Initializes a Kaggle football environment. - - Args: - configuration (Optional[Dict[str, Any]]): configuration of the - football environment. For detailed information, see: - https://github.com/Kaggle/kaggle-environments/blob/master/kaggle_\ - environments/envs/football/football.json - """ - super().__init__() - self.kaggle_env = kaggle_environments.make( - "football", configuration=configuration or {} - ) - self.last_cumulative_reward = None - - def reset( - self, - *, - seed: Optional[int] = None, - options: Optional[dict] = None, - ) -> Tuple[MultiAgentDict, MultiAgentDict]: - kaggle_state = self.kaggle_env.reset() - self.last_cumulative_reward = None - return { - f"agent{idx}": self._convert_obs(agent_state["observation"]) - for idx, agent_state in enumerate(kaggle_state) - if agent_state["status"] == "ACTIVE" - }, {} - - def step( - self, action_dict: Dict[AgentID, int] - ) -> Tuple[ - MultiAgentDict, MultiAgentDict, MultiAgentDict, MultiAgentDict, MultiAgentDict - ]: - # Convert action_dict (used by RLlib) to a list of actions (used by - # kaggle_environments) - action_list = [None] * len(self.kaggle_env.state) - for idx, agent_state in enumerate(self.kaggle_env.state): - if agent_state["status"] == "ACTIVE": - action = action_dict[f"agent{idx}"] - action_list[idx] = [action] - self.kaggle_env.step(action_list) - - # Parse (obs, reward, terminated, truncated, info) from kaggle's "state" - # representation. - obs = {} - cumulative_reward = {} - terminated = {"__all__": self.kaggle_env.done} - truncated = {"__all__": False} - info = {} - for idx in range(len(self.kaggle_env.state)): - agent_state = self.kaggle_env.state[idx] - agent_name = f"agent{idx}" - if agent_state["status"] == "ACTIVE": - obs[agent_name] = self._convert_obs(agent_state["observation"]) - cumulative_reward[agent_name] = agent_state["reward"] - terminated[agent_name] = agent_state["status"] != "ACTIVE" - truncated[agent_name] = False - info[agent_name] = agent_state["info"] - # Compute the step rewards from the cumulative rewards - if self.last_cumulative_reward is not None: - reward = { - agent_id: agent_reward - self.last_cumulative_reward[agent_id] - for agent_id, agent_reward in cumulative_reward.items() - } - else: - reward = cumulative_reward - self.last_cumulative_reward = cumulative_reward - return obs, reward, terminated, truncated, info - - def _convert_obs(self, obs: Dict[str, Any]) -> Dict[str, Any]: - """Convert raw observations - - These conversions are necessary to make the observations fall into the - observation space defined below. - """ - new_obs = deepcopy(obs) - if new_obs["players_raw"][0]["ball_owned_team"] == -1: - new_obs["players_raw"][0]["ball_owned_team"] = 2 - if new_obs["players_raw"][0]["ball_owned_player"] == -1: - new_obs["players_raw"][0]["ball_owned_player"] = 11 - new_obs["players_raw"][0]["steps_left"] = [ - new_obs["players_raw"][0]["steps_left"] - ] - return new_obs - - def build_agent_spaces(self) -> Tuple[Space, Space]: - """Construct the action and observation spaces - - Description of actions and observations: - https://github.com/google-research/football/blob/master/gfootball/doc/ - observation.md - """ # noqa: E501 - action_space = Discrete(19) - # The football field's corners are [+-1., +-0.42]. However, the players - # and balls may get out of the field. Thus we multiply those limits by - # a factor of 2. - xlim = 1.0 * 2 - ylim = 0.42 * 2 - num_players: int = 11 - xy_space = Box( - np.array([-xlim, -ylim], dtype=np.float32), - np.array([xlim, ylim], dtype=np.float32), - ) - xyz_space = Box( - np.array([-xlim, -ylim, 0], dtype=np.float32), - np.array([xlim, ylim, np.inf], dtype=np.float32), - ) - observation_space = DictSpace( - { - "controlled_players": Discrete(2), - "players_raw": TupleSpace( - [ - DictSpace( - { - # ball information - "ball": xyz_space, - "ball_direction": Box(-np.inf, np.inf, (3,)), - "ball_rotation": Box(-np.inf, np.inf, (3,)), - "ball_owned_team": Discrete(3), - "ball_owned_player": Discrete(num_players + 1), - # left team - "left_team": TupleSpace([xy_space] * num_players), - "left_team_direction": TupleSpace( - [xy_space] * num_players - ), - "left_team_tired_factor": Box(0.0, 1.0, (num_players,)), - "left_team_yellow_card": MultiBinary(num_players), - "left_team_active": MultiBinary(num_players), - "left_team_roles": MultiDiscrete([10] * num_players), - # right team - "right_team": TupleSpace([xy_space] * num_players), - "right_team_direction": TupleSpace( - [xy_space] * num_players - ), - "right_team_tired_factor": Box( - 0.0, 1.0, (num_players,) - ), - "right_team_yellow_card": MultiBinary(num_players), - "right_team_active": MultiBinary(num_players), - "right_team_roles": MultiDiscrete([10] * num_players), - # controlled player information - "active": Discrete(num_players), - "designated": Discrete(num_players), - "sticky_actions": MultiBinary(10), - # match state - "score": Box(-np.inf, np.inf, (2,)), - "steps_left": Box(0, np.inf, (1,)), - "game_mode": Discrete(7), - } - ) - ] - ), - } - ) - return action_space, observation_space diff --git a/rllib/env/wrappers/model_vector_env.py b/rllib/env/wrappers/model_vector_env.py deleted file mode 100644 index 8facedab25e8c..0000000000000 --- a/rllib/env/wrappers/model_vector_env.py +++ /dev/null @@ -1,164 +0,0 @@ -import logging -from gymnasium.spaces import Discrete -import numpy as np - -from ray.rllib.utils.annotations import override -from ray.rllib.env.vector_env import VectorEnv -from ray.rllib.evaluation.rollout_worker import get_global_worker -from ray.rllib.env.base_env import BaseEnv, convert_to_base_env -from ray.rllib.utils.typing import EnvType - -logger = logging.getLogger(__name__) - - -def model_vector_env(env: EnvType) -> BaseEnv: - """Returns a VectorizedEnv wrapper around the given environment. - - To obtain worker configs, one can call get_global_worker(). - - Args: - env: The input environment (of any supported environment - type) to be convert to a _VectorizedModelGymEnv (wrapped as - an RLlib BaseEnv). - - Returns: - BaseEnv: The BaseEnv converted input `env`. - """ - worker = get_global_worker() - worker_index = worker.worker_index - if worker_index: - env = _VectorizedModelGymEnv( - make_env=worker.make_sub_env_fn, - existing_envs=[env], - num_envs=worker.config.num_envs_per_env_runner, - observation_space=env.observation_space, - action_space=env.action_space, - ) - return convert_to_base_env( - env, - make_env=worker.make_sub_env_fn, - num_envs=worker.config.num_envs_per_env_runner, - remote_envs=False, - remote_env_batch_wait_ms=0, - ) - - -class _VectorizedModelGymEnv(VectorEnv): - """Vectorized Environment Wrapper for MB-MPO. - - Primary change is in the `vector_step` method, which calls the dynamics - models for next_obs "calculation" (instead of the actual env). Also, the - actual envs need to have two extra methods implemented: `reward(obs)` and - (optionally) `done(obs)`. If `done` is not implemented, we will assume - that episodes in the env do not terminate, ever. - """ - - def __init__( - self, - make_env=None, - existing_envs=None, - num_envs=1, - *, - observation_space=None, - action_space=None, - env_config=None - ): - self.make_env = make_env - self.envs = existing_envs - self.num_envs = num_envs - while len(self.envs) < num_envs: - self.envs.append(self.make_env(len(self.envs))) - self._timesteps = [0 for _ in range(self.num_envs)] - self.cur_obs = [None for _ in range(self.num_envs)] - - super().__init__( - observation_space=observation_space or self.envs[0].observation_space, - action_space=action_space or self.envs[0].action_space, - num_envs=num_envs, - ) - worker = get_global_worker() - self.model, self.device = worker.foreach_policy( - lambda x, y: (x.dynamics_model, x.device) - )[0] - - @override(VectorEnv) - def vector_reset(self, *, seeds=None, options=None): - """Override parent to store actual env obs for upcoming predictions.""" - seeds = seeds or [None] * self.num_envs - options = options or [None] * self.num_envs - reset_results = [ - e.reset(seed=seeds[i], options=options[i]) for i, e in enumerate(self.envs) - ] - self.cur_obs = [io[0] for io in reset_results] - infos = [io[1] for io in reset_results] - self._timesteps = [0 for _ in range(self.num_envs)] - return self.cur_obs, infos - - @override(VectorEnv) - def reset_at(self, index, *, seed=None, options=None): - """Override parent to store actual env obs for upcoming predictions.""" - obs, infos = self.envs[index].reset(seed=seed, options=options) - self.cur_obs[index] = obs - self._timesteps[index] = 0 - return obs, infos - - @override(VectorEnv) - def vector_step(self, actions): - if self.cur_obs is None: - raise ValueError("Need to reset env first") - - for idx in range(self.num_envs): - self._timesteps[idx] += 1 - - # If discrete, need to one-hot actions - if isinstance(self.action_space, Discrete): - act = np.array(actions) - new_act = np.zeros((act.size, act.max() + 1)) - new_act[np.arange(act.size), act] = 1 - actions = new_act.astype("float32") - - # Batch the TD-model prediction. - obs_batch = np.stack(self.cur_obs, axis=0) - action_batch = np.stack(actions, axis=0) - # Predict the next observation, given previous a) real obs - # (after a reset), b) predicted obs (any other time). - next_obs_batch = self.model.predict_model_batches( - obs_batch, action_batch, device=self.device - ) - next_obs_batch = np.clip(next_obs_batch, -1000, 1000) - - # Call env's reward function. - # Note: Each actual env must implement one to output exact rewards. - rew_batch = self.envs[0].reward(obs_batch, action_batch, next_obs_batch) - - # If env has a `done` method, use it. - if hasattr(self.envs[0], "done"): - dones_batch = self.envs[0].done(next_obs_batch) - # Our sub-environments have timestep limits. - elif hasattr(self.envs[0], "_max_episode_steps"): - dones_batch = np.array( - [ - self._timesteps[idx] >= self.envs[0]._max_episode_steps - for idx in range(self.num_envs) - ] - ) - # Otherwise, assume the episode does not end. - else: - dones_batch = np.asarray([False for _ in range(self.num_envs)]) - truncateds_batch = [False for _ in range(self.num_envs)] - - info_batch = [{} for _ in range(self.num_envs)] - - self.cur_obs = next_obs_batch - - return ( - list(next_obs_batch), - list(rew_batch), - list(dones_batch), - truncateds_batch, - info_batch, - ) - - @override(VectorEnv) - def get_sub_environments(self): - return self.envs diff --git a/rllib/env/wrappers/recsim.py b/rllib/env/wrappers/recsim.py deleted file mode 100644 index b1d3e749e5144..0000000000000 --- a/rllib/env/wrappers/recsim.py +++ /dev/null @@ -1,270 +0,0 @@ -"""Tools and utils to create RLlib-ready recommender system envs using RecSim. - -For examples on how to generate a RecSim env class (usable in RLlib): -See ray.rllib.examples.envs.classes.recommender_system_envs_with_recsim.py - -For more information on google's RecSim itself: -https://github.com/google-research/recsim -""" - -from collections import OrderedDict -import gymnasium as gym -from gymnasium.spaces import Dict, Discrete, MultiDiscrete -from gymnasium.wrappers import EnvCompatibility -import numpy as np -from recsim.document import AbstractDocumentSampler -from recsim.simulator import environment, recsim_gym -from recsim.user import AbstractUserModel, AbstractResponse -from typing import Callable, List, Optional, Type - -from ray.rllib.env.env_context import EnvContext -from ray.rllib.utils.error import UnsupportedSpaceException -from ray.rllib.utils.spaces.space_utils import convert_element_to_space_type - - -class RecSimObservationSpaceWrapper(gym.ObservationWrapper): - """Fix RecSim environment's observation space - - In RecSim's observation spaces, the "doc" field is a dictionary keyed by - document IDs. Those IDs are changing every step, thus generating a - different observation space in each time. This causes issues for RLlib - because it expects the observation space to remain the same across steps. - - This environment wrapper fixes that by reindexing the documents by their - positions in the list. - """ - - def __init__(self, env: gym.Env): - super().__init__(env) - obs_space = self.env.observation_space - doc_space = Dict( - OrderedDict( - [ - (str(k), doc) - for k, (_, doc) in enumerate(obs_space["doc"].spaces.items()) - ] - ) - ) - self.observation_space = Dict( - OrderedDict( - [ - ("user", obs_space["user"]), - ("doc", doc_space), - ("response", obs_space["response"]), - ] - ) - ) - self._sampled_obs = self.observation_space.sample() - self.action_space = self.env.action_space - - def observation(self, obs): - new_obs = OrderedDict() - new_obs["user"] = obs["user"] - new_obs["doc"] = {str(k): v for k, (_, v) in enumerate(obs["doc"].items())} - new_obs["response"] = obs["response"] - new_obs = convert_element_to_space_type(new_obs, self._sampled_obs) - return new_obs - - -class RecSimObservationBanditWrapper(gym.ObservationWrapper): - """Fix RecSim environment's observation format - - RecSim's observations are keyed by document IDs, and nested under - "doc" key. - Our Bandits agent expects the observations to be flat 2D array - and under "item" key. - - This environment wrapper converts obs into the right format. - """ - - def __init__(self, env: gym.Env): - super().__init__(env) - obs_space = self.env.observation_space - - num_items = len(obs_space["doc"]) - embedding_dim = next(iter(obs_space["doc"].values())).shape[-1] - self.observation_space = Dict( - OrderedDict( - [ - ( - "item", - gym.spaces.Box( - low=-1.0, high=1.0, shape=(num_items, embedding_dim) - ), - ), - ] - ) - ) - self._sampled_obs = self.observation_space.sample() - self.action_space = self.env.action_space - - def observation(self, obs): - new_obs = OrderedDict() - new_obs["item"] = np.vstack(list(obs["doc"].values())) - new_obs = convert_element_to_space_type(new_obs, self._sampled_obs) - return new_obs - - -class RecSimResetWrapper(gym.Wrapper): - """Fix RecSim environment's reset() and close() function - - RecSim's reset() function returns an observation without the "response" - field, breaking RLlib's check. This wrapper fixes that by assigning a - random "response". - - RecSim's close() function raises NotImplementedError. We change the - behavior to doing nothing. - """ - - def __init__(self, env: gym.Env): - super().__init__(env) - self._sampled_obs = self.env.observation_space.sample() - - def reset(self, *, seed=None, options=None): - obs, info = super().reset() - obs["response"] = self.env.observation_space["response"].sample() - obs = convert_element_to_space_type(obs, self._sampled_obs) - return obs, info - - def close(self): - pass - - -class MultiDiscreteToDiscreteActionWrapper(gym.ActionWrapper): - """Convert the action space from MultiDiscrete to Discrete - - At this moment, RLlib's DQN algorithms only work on Discrete action space. - This wrapper allows us to apply DQN algorithms to the RecSim environment. - """ - - def __init__(self, env: gym.Env): - super().__init__(env) - - if not isinstance(env.action_space, MultiDiscrete): - raise UnsupportedSpaceException( - f"Action space {env.action_space} " - f"is not supported by {self.__class__.__name__}" - ) - self.action_space_dimensions = env.action_space.nvec - self.action_space = Discrete(np.prod(self.action_space_dimensions)) - - def action(self, action: int) -> List[int]: - """Convert a Discrete action to a MultiDiscrete action""" - multi_action = [None] * len(self.action_space_dimensions) - for idx, n in enumerate(self.action_space_dimensions): - action, dim_action = divmod(action, n) - multi_action[idx] = dim_action - return multi_action - - -def recsim_gym_wrapper( - recsim_gym_env: gym.Env, - convert_to_discrete_action_space: bool = False, - wrap_for_bandits: bool = False, -) -> gym.Env: - """Makes sure a RecSim gym.Env can ba handled by RLlib. - - In RecSim's observation spaces, the "doc" field is a dictionary keyed by - document IDs. Those IDs are changing every step, thus generating a - different observation space in each time. This causes issues for RLlib - because it expects the observation space to remain the same across steps. - - Also, RecSim's reset() function returns an observation without the - "response" field, breaking RLlib's check. This wrapper fixes that by - assigning a random "response". - - Args: - recsim_gym_env: The RecSim gym.Env instance. Usually resulting from a - raw RecSim env having been passed through RecSim's utility function: - `recsim.simulator.recsim_gym.RecSimGymEnv()`. - convert_to_discrete_action_space: Optional bool indicating, whether - the action space of the created env class should be Discrete - (rather than MultiDiscrete, even if slate size > 1). This is useful - for algorithms that don't support MultiDiscrete action spaces, - such as RLlib's DQN. If None, `convert_to_discrete_action_space` - may also be provided via the EnvContext (config) when creating an - actual env instance. - wrap_for_bandits: Bool indicating, whether this RecSim env should be - wrapped for use with our Bandits agent. - - Returns: - An RLlib-ready gym.Env instance. - """ - env = RecSimResetWrapper(recsim_gym_env) - env = RecSimObservationSpaceWrapper(env) - if convert_to_discrete_action_space: - env = MultiDiscreteToDiscreteActionWrapper(env) - if wrap_for_bandits: - env = RecSimObservationBanditWrapper(env) - return env - - -def make_recsim_env( - recsim_user_model_creator: Callable[[EnvContext], AbstractUserModel], - recsim_document_sampler_creator: Callable[[EnvContext], AbstractDocumentSampler], - reward_aggregator: Callable[[List[AbstractResponse]], float], -) -> Type[gym.Env]: - """Creates a RLlib-ready gym.Env class given RecSim user and doc models. - - See https://github.com/google-research/recsim for more information on how to - build the required components from scratch in python using RecSim. - - Args: - recsim_user_model_creator: A callable taking an EnvContext and returning - a RecSim AbstractUserModel instance to use. - recsim_document_sampler_creator: A callable taking an EnvContext and - returning a RecSim AbstractDocumentSampler - to use. This will include a AbstractDocument as well. - reward_aggregator: Callable taking a list of RecSim - AbstractResponse instances and returning a float (aggregated - reward). - - Returns: - An RLlib-ready gym.Env class to use inside an Algorithm. - """ - - class _RecSimEnv(gym.Wrapper): - def __init__(self, config: Optional[EnvContext] = None): - - # Override with default values, in case they are not set by the user. - default_config = { - "num_candidates": 10, - "slate_size": 2, - "resample_documents": True, - "seed": 0, - "convert_to_discrete_action_space": False, - "wrap_for_bandits": False, - } - if config is None or isinstance(config, dict): - config = EnvContext(config or default_config, worker_index=0) - config.set_defaults(default_config) - - # Create the RecSim user model instance. - recsim_user_model = recsim_user_model_creator(config) - # Create the RecSim document sampler instance. - recsim_document_sampler = recsim_document_sampler_creator(config) - - # Create a raw RecSim environment (not yet a gym.Env!). - raw_recsim_env = environment.SingleUserEnvironment( - recsim_user_model, - recsim_document_sampler, - config["num_candidates"], - config["slate_size"], - resample_documents=config["resample_documents"], - ) - # Convert raw RecSim env to a gym.Env. - gym_env = recsim_gym.RecSimGymEnv(raw_recsim_env, reward_aggregator) - # Wrap for the new gym API (RecSim does not support this). - gym_env = EnvCompatibility(gym_env) - - # Fix observation space and - if necessary - convert to discrete - # action space (from multi-discrete). - env = recsim_gym_wrapper( - gym_env, - config["convert_to_discrete_action_space"], - config["wrap_for_bandits"], - ) - # Call the super (Wrapper constructor) passing it the created env. - super().__init__(env=env) - - return _RecSimEnv diff --git a/rllib/env/wrappers/recsim_wrapper.py b/rllib/env/wrappers/recsim_wrapper.py deleted file mode 100644 index 3251ea1a3a3e7..0000000000000 --- a/rllib/env/wrappers/recsim_wrapper.py +++ /dev/null @@ -1,14 +0,0 @@ -# Deprecated module: Use ray.rllib.env.wrappers.recsim instead! -from ray.rllib.env.wrappers.recsim import ( # noqa: F401 - make_recsim_env, - MultiDiscreteToDiscreteActionWrapper, - RecSimObservationSpaceWrapper, - RecSimResetWrapper, -) -from ray.rllib.utils.deprecation import deprecation_warning - -deprecation_warning( - old="ray.rllib.env.wrappers.recsim_wrapper", - new="ray.rllib.env.wrappers.recsim", - error=True, -) diff --git a/rllib/env/wrappers/uncertainty_wrappers.py b/rllib/env/wrappers/uncertainty_wrappers.py deleted file mode 100644 index e8e2d1fa48337..0000000000000 --- a/rllib/env/wrappers/uncertainty_wrappers.py +++ /dev/null @@ -1,23 +0,0 @@ -########## -# Contribution by the Center on Long-Term Risk: -# https://github.com/longtermrisk/marltoolbox -########## -import numpy as np - - -def add_RewardUncertaintyEnvClassWrapper( - EnvClass, reward_uncertainty_std, reward_uncertainty_mean=0.0 -): - class RewardUncertaintyEnvClassWrapper(EnvClass): - def step(self, action): - observations, rewards, done, info = super().step(action) - return observations, self.reward_wrapper(rewards), done, info - - def reward_wrapper(self, reward_dict): - for k in reward_dict.keys(): - reward_dict[k] += np.random.normal( - loc=reward_uncertainty_mean, scale=reward_uncertainty_std, size=() - ) - return reward_dict - - return RewardUncertaintyEnvClassWrapper diff --git a/rllib/examples/_old_api_stack/custom_keras_model.py b/rllib/examples/_old_api_stack/custom_keras_model.py index cdf1f516ef329..e3ccad874b300 100644 --- a/rllib/examples/_old_api_stack/custom_keras_model.py +++ b/rllib/examples/_old_api_stack/custom_keras_model.py @@ -127,7 +127,9 @@ def on_train_result(self, *, algorithm, result, **kwargs): config = ( get_trainable_cls(args.run) .get_default_config() - .environment("ALE/Breakout-v5" if args.use_vision_network else "CartPole-v1") + .environment( + "ale_py:ALE/Breakout-v5" if args.use_vision_network else "CartPole-v1" + ) .framework("tf") .callbacks(MyCallbacks) .training( diff --git a/rllib/examples/connectors/frame_stacking.py b/rllib/examples/connectors/frame_stacking.py index 554bd1c8f20d3..103ae8de5f113 100644 --- a/rllib/examples/connectors/frame_stacking.py +++ b/rllib/examples/connectors/frame_stacking.py @@ -97,7 +97,7 @@ # Use Pong by default. parser.set_defaults( enable_new_api_stack=True, - env="ALE/Pong-v5", + env="ale_py:ALE/Pong-v5", ) parser.add_argument( "--num-frames", diff --git a/rllib/examples/curiosity/euclidian_distance_based_curiosity.py b/rllib/examples/curiosity/euclidian_distance_based_curiosity.py index 0d73c6b50c1f0..d471c17f18587 100644 --- a/rllib/examples/curiosity/euclidian_distance_based_curiosity.py +++ b/rllib/examples/curiosity/euclidian_distance_based_curiosity.py @@ -67,12 +67,11 @@ ) from ray.tune.registry import get_trainable_cls -# TODO (sven): SB3's PPO does seem to learn MountainCar-v0 until a reward of ~-110. -# We might have to play around some more with different initializations, more -# randomized SGD minibatching (we don't shuffle batch rn), etc.. to get to these -# results as well. +# TODO (sven): SB3's PPO learns MountainCar-v0 until a reward of ~-110. +# We might have to play around some more with different initializations, etc.. +# to get to these results as well. parser = add_rllib_example_script_args( - default_reward=-130.0, default_iters=2000, default_timesteps=1000000 + default_reward=-140.0, default_iters=2000, default_timesteps=1000000 ) parser.set_defaults( enable_new_api_stack=True, diff --git a/rllib/examples/curiosity/intrinsic_curiosity_model_based_curiosity.py b/rllib/examples/curiosity/intrinsic_curiosity_model_based_curiosity.py index 323bc20c8a582..b70cc89bdbe7d 100644 --- a/rllib/examples/curiosity/intrinsic_curiosity_model_based_curiosity.py +++ b/rllib/examples/curiosity/intrinsic_curiosity_model_based_curiosity.py @@ -73,6 +73,8 @@ """ from collections import defaultdict +import numpy as np + from ray import tune from ray.rllib.algorithms.algorithm_config import AlgorithmConfig from ray.rllib.algorithms.callbacks import DefaultCallbacks @@ -132,9 +134,9 @@ def on_episode_step( rl_module, **kwargs, ): - obs = episode.get_observations(-1) num_rows = env.envs[0].unwrapped.nrow num_cols = env.envs[0].unwrapped.ncol + obs = np.argmax(episode.get_observations(-1)) row = obs // num_cols col = obs % num_rows curr_dist = (row**2 + col**2) ** 0.5 @@ -298,7 +300,7 @@ def on_sample_end( success_key = f"{ENV_RUNNER_RESULTS}/max_dist_travelled_across_running_episodes" stop = { - success_key: 8.0, + success_key: 12.0, f"{ENV_RUNNER_RESULTS}/{EPISODE_RETURN_MEAN}": args.stop_reward, NUM_ENV_STEPS_SAMPLED_LIFETIME: args.stop_timesteps, } diff --git a/rllib/examples/envs/env_rendering_and_recording.py b/rllib/examples/envs/env_rendering_and_recording.py index ba02f50b7f168..77669649e66c6 100644 --- a/rllib/examples/envs/env_rendering_and_recording.py +++ b/rllib/examples/envs/env_rendering_and_recording.py @@ -73,7 +73,10 @@ from ray import tune parser = add_rllib_example_script_args(default_reward=20.0) -parser.set_defaults(env="ALE/Pong-v5") +parser.set_defaults( + enable_new_api_stack=True, + env="ale_py:ALE/Pong-v5", +) class EnvRenderCallback(DefaultCallbacks): @@ -129,10 +132,10 @@ def on_episode_step( # If we have a vector env, only render the sub-env at index 0. if isinstance(env.unwrapped, gym.vector.VectorEnv): - image = env.envs[0].render() + image = env.unwrapped.envs[0].render() # Render the gym.Env. else: - image = env.render() + image = env.unwrapped.render() # Original render images for CartPole are 400x600 (hxw). We'll downsize here to # a very small dimension (to save space and bandwidth). @@ -239,14 +242,10 @@ def on_sample_end( if __name__ == "__main__": args = parser.parse_args() - assert ( - args.enable_new_api_stack - ), "Must set --enable-new-api-stack when running this script!" - # Register our environment with tune. def _env_creator(cfg): cfg.update({"render_mode": "rgb_array"}) - if args.env.startswith("ALE/"): + if args.env.startswith("ale_py:ALE/"): cfg.update( { # Make analogous to old v4 + NoFrameskip. diff --git a/rllib/examples/evaluation/custom_evaluation.py b/rllib/examples/evaluation/custom_evaluation.py index a6d4a1c3e029f..f4d05ea3bd26e 100644 --- a/rllib/examples/evaluation/custom_evaluation.py +++ b/rllib/examples/evaluation/custom_evaluation.py @@ -112,12 +112,12 @@ def custom_eval_function( # `set_corridor_length` method on these. eval_workers.foreach_worker( func=lambda worker: ( - env.set_corridor_length( + env.unwrapped.set_corridor_length( args.corridor_length_eval_worker_1 if worker.worker_index == 1 else args.corridor_length_eval_worker_2 ) - for env in worker.env.envs + for env in worker.env.unwrapped.envs ) ) diff --git a/rllib/examples/metrics/custom_metrics_in_env_runners.py b/rllib/examples/metrics/custom_metrics_in_env_runners.py index 3b10ac4966417..cba86a50afb60 100644 --- a/rllib/examples/metrics/custom_metrics_in_env_runners.py +++ b/rllib/examples/metrics/custom_metrics_in_env_runners.py @@ -301,7 +301,7 @@ def _get_pacman_yx_pos(self, env): register_env( "env", lambda cfg: wrap_atari_for_new_api_stack( - gym.make("ALE/MsPacman-v5", **cfg, **{"render_mode": "rgb_array"}), + gym.make("ale_py:ALE/MsPacman-v5", **cfg, **{"render_mode": "rgb_array"}), framestack=4, ), ) diff --git a/rllib/examples/ray_tune/custom_experiment.py b/rllib/examples/ray_tune/custom_experiment.py index d0e424911d468..779c5c1fd0410 100644 --- a/rllib/examples/ray_tune/custom_experiment.py +++ b/rllib/examples/ray_tune/custom_experiment.py @@ -105,7 +105,7 @@ def my_experiment(config: Dict): # Extract the gymnasium env object from the created algo (its local # SingleAgentEnvRunner worker). Note that the env in this single-agent # case is a gymnasium vector env and that we get its first sub-env here. - env = local_env_runner.env.envs[0] + env = local_env_runner.env.unwrapped.envs[0] # The local worker (SingleAgentEnvRunner) rl_module = local_env_runner.module diff --git a/rllib/examples/rl_modules/custom_cnn_rl_module.py b/rllib/examples/rl_modules/custom_cnn_rl_module.py index a8aac2980530a..4001f3e21d6b8 100644 --- a/rllib/examples/rl_modules/custom_cnn_rl_module.py +++ b/rllib/examples/rl_modules/custom_cnn_rl_module.py @@ -66,7 +66,7 @@ parser = add_rllib_example_script_args(default_iters=100, default_timesteps=600000) parser.set_defaults( enable_new_api_stack=True, - env="ALE/Pong-v5", + env="ale_py:ALE/Pong-v5", ) diff --git a/rllib/models/tests/test_preprocessors.py b/rllib/models/tests/test_preprocessors.py index 51ad457dabe7e..03a344de32893 100644 --- a/rllib/models/tests/test_preprocessors.py +++ b/rllib/models/tests/test_preprocessors.py @@ -90,12 +90,12 @@ def test_gym_preprocessors(self): p2 = ModelCatalog.get_preprocessor(gym.make("FrozenLake-v1")) self.assertEqual(type(p2), OneHotPreprocessor) - p3 = ModelCatalog.get_preprocessor(gym.make("ALE/MsPacman-ram-v5")) + p3 = ModelCatalog.get_preprocessor(gym.make("ale_py:ALE/MsPacman-ram-v5")) self.assertEqual(type(p3), AtariRamPreprocessor) p4 = ModelCatalog.get_preprocessor( gym.make( - "ALE/MsPacman-v5", + "ale_py:ALE/MsPacman-v5", frameskip=1, ) ) diff --git a/rllib/tuned_examples/appo/pong-appo-w-rl-modules-and-learner.yaml b/rllib/tuned_examples/appo/pong-appo-w-rl-modules-and-learner.yaml index 94088ab67c29e..2c11e896744ed 100644 --- a/rllib/tuned_examples/appo/pong-appo-w-rl-modules-and-learner.yaml +++ b/rllib/tuned_examples/appo/pong-appo-w-rl-modules-and-learner.yaml @@ -2,7 +2,7 @@ # This can reach 18.0 reward in ~10 minutes on 4x M60 GPUs # with 30 rollout workers, 4 learning workers, and 8 envs per rollout worker. appo-pongnoframeskip-v5: - env: ALE/Pong-v5 + env: ale_py:ALE/Pong-v5 run: APPO stop: env_runners/episode_return_mean: 18.0 diff --git a/rllib/tuned_examples/appo/pong-appo.yaml b/rllib/tuned_examples/appo/pong-appo.yaml index 837e0559a8f8f..3b1ecd9215cba 100644 --- a/rllib/tuned_examples/appo/pong-appo.yaml +++ b/rllib/tuned_examples/appo/pong-appo.yaml @@ -5,7 +5,7 @@ # APPO can also solve Pong in 2.5 million timesteps, which is # 2x more efficient than that of IMPALA. pong-appo: - env: ALE/Pong-v5 + env: ale_py:ALE/Pong-v5 run: APPO stop: env_runners/episode_return_mean: 18.0 diff --git a/rllib/tuned_examples/bc/benchmark_atari_pong_bc.py b/rllib/tuned_examples/bc/benchmark_atari_pong_bc.py index f5d7727bb68a5..d084f61fb9f4c 100644 --- a/rllib/tuned_examples/bc/benchmark_atari_pong_bc.py +++ b/rllib/tuned_examples/bc/benchmark_atari_pong_bc.py @@ -128,7 +128,7 @@ def _make_learner_connector(observation_space, action_space): # in the collection of the `rl_unplugged` data. def _env_creator(cfg): return wrap_atari_for_new_api_stack( - gym.make("ALE/Pong-v5", **cfg), + gym.make("ale_py:ALE/Pong-v5", **cfg), # Perform frame-stacking through ConnectorV2 API. framestack=4, dim=84, diff --git a/rllib/tuned_examples/compact-regression-test.yaml b/rllib/tuned_examples/compact-regression-test.yaml index 21dbdb6d1be41..80003257ccb74 100644 --- a/rllib/tuned_examples/compact-regression-test.yaml +++ b/rllib/tuned_examples/compact-regression-test.yaml @@ -6,7 +6,7 @@ # You can find the reference results here: # https://github.com/ray-project/ray/tree/master/release/release_logs atari-impala: - env: ALE/Breakout-v5 + env: ale_py:ALE/Breakout-v5 run: IMPALA num_samples: 4 stop: @@ -25,7 +25,7 @@ atari-impala: ] num_gpus: 1 atari-ppo-tf: - env: ALE/Breakout-v5 + env: ale_py:ALE/Breakout-v5 run: PPO num_samples: 4 stop: @@ -51,7 +51,7 @@ atari-ppo-tf: vf_share_layers: true num_gpus: 1 atari-ppo-torch: - env: ALE/Breakout-v5 + env: ale_py:ALE/Breakout-v5 run: PPO num_samples: 4 stop: @@ -78,7 +78,7 @@ atari-ppo-torch: vf_share_layers: true num_gpus: 1 apex: - env: ALE/Breakout-v5 + env: ale_py:ALE/Breakout-v5 run: APEX num_samples: 4 stop: @@ -109,7 +109,7 @@ apex: target_network_update_freq: 50000 min_sample_timesteps_per_iteration: 25000 atari-a2c: - env: ALE/Breakout-v5 + env: ale_py:ALE/Breakout-v5 run: A2C num_samples: 4 stop: @@ -127,7 +127,7 @@ atari-a2c: [20000000, 0.000000000001], ] atari-basic-dqn: - env: ALE/Breakout-v5 + env: ale_py:ALE/Breakout-v5 run: DQN num_samples: 4 stop: diff --git a/rllib/tuned_examples/dqn/atari-dist-dqn.yaml b/rllib/tuned_examples/dqn/atari-dist-dqn.yaml index 1de99ce54f73e..53f72ca5bb851 100644 --- a/rllib/tuned_examples/dqn/atari-dist-dqn.yaml +++ b/rllib/tuned_examples/dqn/atari-dist-dqn.yaml @@ -2,10 +2,10 @@ atari-dist-dqn: env: grid_search: - - ALE/Breakout-v5 - - ALE/BeamRider-v5 - - ALE/Qbert-v5 - - ALE/SpaceInvaders-v5 + - ale_py:ALE/Breakout-v5 + - ale_py:ALE/BeamRider-v5 + - ale_py:ALE/Qbert-v5 + - ale_py:ALE/SpaceInvaders-v5 run: DQN config: # Make analogous to old v4 + NoFrameskip. diff --git a/rllib/tuned_examples/dqn/atari-dqn.yaml b/rllib/tuned_examples/dqn/atari-dqn.yaml index 287446e232c4a..928820925756c 100644 --- a/rllib/tuned_examples/dqn/atari-dqn.yaml +++ b/rllib/tuned_examples/dqn/atari-dqn.yaml @@ -4,10 +4,10 @@ atari-basic-dqn: env: grid_search: - - ALE/Breakout-v5 - - ALE/BeamRider-v5 - - ALE/Qbert-v5 - - ALE/SpaceInvaders-v5 + - ale_py:ALE/Breakout-v5 + - ale_py:ALE/BeamRider-v5 + - ale_py:ALE/Qbert-v5 + - ale_py:ALE/SpaceInvaders-v5 run: DQN config: # Works for both torch and tf. diff --git a/rllib/tuned_examples/dqn/atari-duel-ddqn.yaml b/rllib/tuned_examples/dqn/atari-duel-ddqn.yaml index dfa84c8a44667..84d96828da2d3 100644 --- a/rllib/tuned_examples/dqn/atari-duel-ddqn.yaml +++ b/rllib/tuned_examples/dqn/atari-duel-ddqn.yaml @@ -4,10 +4,10 @@ dueling-ddqn: env: grid_search: - - ALE/Breakout-v5 - - ALE/BeamRider-v5 - - ALE/Qbert-v5 - - ALE/SpaceInvaders-v5 + - ale_py:ALE/Breakout-v5 + - ale_py:ALE/BeamRider-v5 + - ale_py:ALE/Qbert-v5 + - ale_py:ALE/SpaceInvaders-v5 run: DQN config: # Works for both torch and tf. diff --git a/rllib/tuned_examples/dqn/pong-dqn.yaml b/rllib/tuned_examples/dqn/pong-dqn.yaml index b6bb32cc7673a..08b51412aeae4 100644 --- a/rllib/tuned_examples/dqn/pong-dqn.yaml +++ b/rllib/tuned_examples/dqn/pong-dqn.yaml @@ -1,7 +1,7 @@ # @OldAPIStack # You can expect ~20 reward within 1.1m timesteps / 2.1 hours on a K80 GPU pong-deterministic-dqn: - env: ALE/Pong-v5 + env: ale_py:ALE/Pong-v5 run: DQN stop: env_runners/episode_return_mean: 20 diff --git a/rllib/tuned_examples/dqn/pong-rainbow.yaml b/rllib/tuned_examples/dqn/pong-rainbow.yaml index 0a0c05299fe4f..58abda37344f9 100644 --- a/rllib/tuned_examples/dqn/pong-rainbow.yaml +++ b/rllib/tuned_examples/dqn/pong-rainbow.yaml @@ -1,6 +1,6 @@ # @OldAPIStack pong-deterministic-rainbow: - env: ALE/Pong-v5 + env: ale_py:ALE/Pong-v5 run: DQN stop: env_runners/episode_return_mean: 20 diff --git a/rllib/tuned_examples/dreamerv3/atari_100k.py b/rllib/tuned_examples/dreamerv3/atari_100k.py index 443ce9b13d163..740da2840f68a 100644 --- a/rllib/tuned_examples/dreamerv3/atari_100k.py +++ b/rllib/tuned_examples/dreamerv3/atari_100k.py @@ -9,7 +9,7 @@ """ # Run with: -# python [this script name].py --env ALE/[gym ID e.g. Pong-v5] +# python [this script name].py --env ale_py:ALE/[gym ID e.g. Pong-v5] # To see all available options: # python [this script name].py --help diff --git a/rllib/tuned_examples/dreamerv3/atari_200M.py b/rllib/tuned_examples/dreamerv3/atari_200M.py index 2339d345d2f86..7cc69a0ab228f 100644 --- a/rllib/tuned_examples/dreamerv3/atari_200M.py +++ b/rllib/tuned_examples/dreamerv3/atari_200M.py @@ -9,7 +9,7 @@ """ # Run with: -# python [this script name].py --env ALE/[gym ID e.g. Pong-v5] +# python [this script name].py --env ale_py:ALE/[gym ID e.g. Pong-v5] # To see all available options: # python [this script name].py --help diff --git a/rllib/tuned_examples/impala/atari-impala-large.yaml b/rllib/tuned_examples/impala/atari-impala-large.yaml index 71d8f4dc3de1f..0c4287801bd0b 100644 --- a/rllib/tuned_examples/impala/atari-impala-large.yaml +++ b/rllib/tuned_examples/impala/atari-impala-large.yaml @@ -4,10 +4,10 @@ atari-impala: env: grid_search: - - ALE/Breakout-v5 - - ALE/BeamRider-v5 - - ALE/Qbert-v5 - - ALE/SpaceInvaders-v5 + - ale_py:ALE/Breakout-v5 + - ale_py:ALE/BeamRider-v5 + - ale_py:ALE/Qbert-v5 + - ale_py:ALE/SpaceInvaders-v5 run: IMPALA stop: timesteps_total: 3000000 diff --git a/rllib/tuned_examples/impala/atari-impala-multi-gpu.yaml b/rllib/tuned_examples/impala/atari-impala-multi-gpu.yaml index 7716eeb43830d..c97120008c31f 100644 --- a/rllib/tuned_examples/impala/atari-impala-multi-gpu.yaml +++ b/rllib/tuned_examples/impala/atari-impala-multi-gpu.yaml @@ -2,7 +2,7 @@ # Runs on a p2.8xlarge single head node machine. # Should reach ~400 reward in about 1h and after 15-20M ts. atari-impala: - env: ALE/Breakout-v5 + env: ale_py:ALE/Breakout-v5 run: IMPALA config: # Works for both torch and tf. diff --git a/rllib/tuned_examples/impala/atari-impala.yaml b/rllib/tuned_examples/impala/atari-impala.yaml index 09966556924e4..23ba57207b366 100644 --- a/rllib/tuned_examples/impala/atari-impala.yaml +++ b/rllib/tuned_examples/impala/atari-impala.yaml @@ -4,10 +4,10 @@ atari-impala: env: grid_search: - - ALE/Breakout-v5 - - ALE/BeamRider-v5 - - ALE/Qbert-v5 - - ALE/SpaceInvaders-v5 + - ale_py:ALE/Breakout-v5 + - ale_py:ALE/BeamRider-v5 + - ale_py:ALE/Qbert-v5 + - ale_py:ALE/SpaceInvaders-v5 run: IMPALA config: # Make analogous to old v4 + NoFrameskip. diff --git a/rllib/tuned_examples/impala/pong-impala-fast.yaml b/rllib/tuned_examples/impala/pong-impala-fast.yaml index f13e276c9744d..fca3a179527c9 100644 --- a/rllib/tuned_examples/impala/pong-impala-fast.yaml +++ b/rllib/tuned_examples/impala/pong-impala-fast.yaml @@ -5,7 +5,7 @@ # 32 workers -> 7 minutes # See also: pong-impala.yaml, pong-impala-vectorized.yaml pong-impala-fast: - env: ALE/Pong-v5 + env: ale_py:ALE/Pong-v5 run: IMPALA config: # Make analogous to old v4 + NoFrameskip. diff --git a/rllib/tuned_examples/impala/pong-impala-vectorized.yaml b/rllib/tuned_examples/impala/pong-impala-vectorized.yaml index 5778848c194bf..1da8bebf68462 100644 --- a/rllib/tuned_examples/impala/pong-impala-vectorized.yaml +++ b/rllib/tuned_examples/impala/pong-impala-vectorized.yaml @@ -3,7 +3,7 @@ # with 32 workers and 10 envs per worker. This is more efficient than the non-vectorized # configuration which requires 128 workers to achieve the same performance. pong-impala-vectorized: - env: ALE/Pong-v5 + env: ale_py:ALE/Pong-v5 run: IMPALA config: # Make analogous to old v4 + NoFrameskip. diff --git a/rllib/tuned_examples/impala/pong-impala.yaml b/rllib/tuned_examples/impala/pong-impala.yaml index ba6afa441554b..85d44f439b31a 100644 --- a/rllib/tuned_examples/impala/pong-impala.yaml +++ b/rllib/tuned_examples/impala/pong-impala.yaml @@ -5,7 +5,7 @@ # 16 workers -> 40 min+ # See also: pong-impala-fast.yaml, pong-impala-vectorized.yaml pong-impala: - env: ALE/Pong-v5 + env: ale_py:ALE/Pong-v5 run: IMPALA config: # Make analogous to old v4 + NoFrameskip. diff --git a/rllib/tuned_examples/impala/pong_impala.py b/rllib/tuned_examples/impala/pong_impala.py index 8802abf6a3b23..3fe08f9c35eda 100644 --- a/rllib/tuned_examples/impala/pong_impala.py +++ b/rllib/tuned_examples/impala/pong_impala.py @@ -15,7 +15,7 @@ parser = add_rllib_example_script_args() parser.set_defaults( enable_new_api_stack=True, - env="ALE/Pong-v5", + env="ale_py:ALE/Pong-v5", ) parser.add_argument( "--use-tiny-cnn", diff --git a/rllib/tuned_examples/impala/pong_impala_pb2_hyperopt.py b/rllib/tuned_examples/impala/pong_impala_pb2_hyperopt.py index 2f7b100500c6d..ca331fe9a861c 100644 --- a/rllib/tuned_examples/impala/pong_impala_pb2_hyperopt.py +++ b/rllib/tuned_examples/impala/pong_impala_pb2_hyperopt.py @@ -15,7 +15,7 @@ from ray import tune parser = add_rllib_example_script_args() -parser.set_defaults(env="ALE/Pong-v5") +parser.set_defaults(env="ale_py:ALE/Pong-v5") parser.add_argument( "--use-tiny-cnn", action="store_true", diff --git a/rllib/tuned_examples/ppo/atari_ppo.py b/rllib/tuned_examples/ppo/atari_ppo.py index 7abcfdff245ef..ad298550e8a31 100644 --- a/rllib/tuned_examples/ppo/atari_ppo.py +++ b/rllib/tuned_examples/ppo/atari_ppo.py @@ -14,7 +14,10 @@ default_timesteps=3000000, default_iters=100000000000, ) -parser.set_defaults(enable_new_api_stack=True) +parser.set_defaults( + enable_new_api_stack=True, + env="ale_py:ALE/Pong-v5", +) # Use `parser` to add your own custom command line options to this script # and (if needed) use their values toset up `config` below. args = parser.parse_args() diff --git a/rllib/tuned_examples/sac/atari-sac.yaml b/rllib/tuned_examples/sac/atari-sac.yaml index 000a62d17e747..9626327d463fc 100644 --- a/rllib/tuned_examples/sac/atari-sac.yaml +++ b/rllib/tuned_examples/sac/atari-sac.yaml @@ -5,8 +5,8 @@ atari-sac-tf-and-torch: env: grid_search: - - ALE/MsPacman-v5 - - ALE/Pong-v5 + - ale_py:ALE/MsPacman-v5 + - ale_py:ALE/Pong-v5 run: SAC stop: timesteps_total: 20000000 diff --git a/rllib/tuned_examples/sac/mspacman-sac.yaml b/rllib/tuned_examples/sac/mspacman-sac.yaml index b2f6b5f80e2c5..16d23a4af22b5 100644 --- a/rllib/tuned_examples/sac/mspacman-sac.yaml +++ b/rllib/tuned_examples/sac/mspacman-sac.yaml @@ -3,7 +3,7 @@ # to ~750 reward in 40k timesteps. Run e.g. on a g3.4xlarge with `num_gpus=1`. # Uses the hyperparameters published in [2] (see rllib/agents/sac/README.md). mspacman-sac-tf: - env: ALE/MsPacman-v5 + env: ale_py:ALE/MsPacman-v5 run: SAC stop: env_runners/episode_return_mean: 800 diff --git a/rllib/utils/error.py b/rllib/utils/error.py index 5671abc10eef3..d2b9db4c351a3 100644 --- a/rllib/utils/error.py +++ b/rllib/utils/error.py @@ -67,7 +67,7 @@ class NotSerializable(Exception): 1) Run `pip install gymnasium` on your command line. 2) Change all your import statements in your code from `import gym` -> `import gymnasium as gym` OR - `from gym.space import Discrete` -> `from gymnasium.spaces import Discrete` + `from gym.spaces import Discrete` -> `from gymnasium.spaces import Discrete` For your custom (single agent) gym.Env classes: 3.1) Either wrap your old Env class via the provided `from gymnasium.wrappers import diff --git a/rllib/utils/exploration/tests/test_curiosity.py b/rllib/utils/exploration/tests/test_curiosity.py index 4531154371f0b..bcc603171264b 100644 --- a/rllib/utils/exploration/tests/test_curiosity.py +++ b/rllib/utils/exploration/tests/test_curiosity.py @@ -1,23 +1,14 @@ -from collections import deque -import gymnasium as gym -import minigrid import numpy as np import sys import unittest import ray -from ray import air, tune -from ray.air.constants import TRAINING_ITERATION from ray.rllib.algorithms.callbacks import DefaultCallbacks import ray.rllib.algorithms.ppo as ppo -from ray.rllib.utils.test_utils import check_learning_achieved from ray.rllib.utils.metrics import ( ENV_RUNNER_RESULTS, EPISODE_RETURN_MAX, - EPISODE_RETURN_MEAN, ) -from ray.rllib.utils.numpy import one_hot -from ray.tune import register_env class MyCallBack(DefaultCallbacks): @@ -46,96 +37,6 @@ def on_sample_end(self, *, worker, samples, **kwargs): self.deltas = [] -class OneHotWrapper(gym.core.ObservationWrapper): - def __init__(self, env, vector_index, framestack): - super().__init__(env) - self.framestack = framestack - # 49=7x7 field of vision; 11=object types; 6=colors; 3=state types. - # +4: Direction. - self.single_frame_dim = 49 * (11 + 6 + 3) + 4 - self.init_x = None - self.init_y = None - self.x_positions = [] - self.y_positions = [] - self.x_y_delta_buffer = deque(maxlen=100) - self.vector_index = vector_index - self.frame_buffer = deque(maxlen=self.framestack) - for _ in range(self.framestack): - self.frame_buffer.append(np.zeros((self.single_frame_dim,))) - - self.observation_space = gym.spaces.Box( - 0.0, 1.0, shape=(self.single_frame_dim * self.framestack,), dtype=np.float32 - ) - - def observation(self, obs): - # Debug output: max-x/y positions to watch exploration progress. - if self.step_count == 0: - for _ in range(self.framestack): - self.frame_buffer.append(np.zeros((self.single_frame_dim,))) - if self.vector_index == 0: - if self.x_positions: - max_diff = max( - np.sqrt( - (np.array(self.x_positions) - self.init_x) ** 2 - + (np.array(self.y_positions) - self.init_y) ** 2 - ) - ) - self.x_y_delta_buffer.append(max_diff) - print( - "100-average dist travelled={}".format( - np.mean(self.x_y_delta_buffer) - ) - ) - self.x_positions = [] - self.y_positions = [] - self.init_x = self.agent_pos[0] - self.init_y = self.agent_pos[1] - - # Are we carrying the key? - # if self.carrying is not None: - # print("Carrying KEY!!") - - self.x_positions.append(self.agent_pos[0]) - self.y_positions.append(self.agent_pos[1]) - - # One-hot the last dim into 11, 6, 3 one-hot vectors, then flatten. - objects = one_hot(obs[:, :, 0], depth=11) - colors = one_hot(obs[:, :, 1], depth=6) - states = one_hot(obs[:, :, 2], depth=3) - # Is the door we see open? - # for x in range(7): - # for y in range(7): - # if objects[x, y, 4] == 1.0 and states[x, y, 0] == 1.0: - # print("Door OPEN!!") - - all_ = np.concatenate([objects, colors, states], -1) - all_flat = np.reshape(all_, (-1,)) - direction = one_hot(np.array(self.agent_dir), depth=4).astype(np.float32) - single_frame = np.concatenate([all_flat, direction]) - self.frame_buffer.append(single_frame) - return np.concatenate(self.frame_buffer) - - -def env_maker(config): - name = config.get("name", "MiniGrid-Empty-5x5-v0") - framestack = config.get("framestack", 4) - env = gym.make(name) - # Make it impossible to reach goal by chance. - env = gym.wrappers.TimeLimit(env, max_episode_steps=15) - # Only use image portion of observation (discard goal and direction). - env = minigrid.wrappers.ImgObsWrapper(env) - env = OneHotWrapper( - env, - config.vector_index if hasattr(config, "vector_index") else 0, - framestack=framestack, - ) - return env - - -register_env("mini-grid", env_maker) -CONV_FILTERS = [[16, [11, 11], 3], [32, [9, 9], 3], [64, [5, 5], 3]] - - class TestCuriosity(unittest.TestCase): @classmethod def setUpClass(cls): @@ -187,10 +88,7 @@ def test_curiosity_on_frozen_lake(self): "type": "StochasticSampling", }, }, - ) - # TODO (Kourosh): We need to provide examples on how we do curiosity with - # RLModule API - .training(lr=0.001) + ).training(lr=0.001) ) num_iterations = 10 @@ -207,106 +105,6 @@ def test_curiosity_on_frozen_lake(self): algo.stop() self.assertTrue(learnt) - # Disable this check for now. Add too much flakyness to test. - # if fw == "tf": - # # W/o Curiosity. Expect to learn nothing. - # print("Trying w/o curiosity (not expected to learn).") - # config["exploration_config"] = { - # "type": "StochasticSampling", - # } - # algo = ppo.PPO(config=config) - # rewards_wo = 0.0 - # for _ in range(num_iterations): - # result = algo.train() - # rewards_wo += result[ENV_RUNNER_RESULTS][EPISODE_RETURN_MEAN] - # print(result) - # algo.stop() - # self.assertTrue(rewards_wo == 0.0) - # print("Did not reach goal w/o curiosity!") - - def test_curiosity_on_partially_observable_domain(self): - config = ( - ppo.PPOConfig() - .environment( - "mini-grid", - env_config={ - # Also works with: - # - MiniGrid-MultiRoom-N4-S5-v0 - # - MiniGrid-MultiRoom-N2-S4-v0 - "name": "MiniGrid-Empty-8x8-v0", - "framestack": 1, # seems to work even w/o framestacking - }, - ) - .env_runners( - num_envs_per_env_runner=4, - num_env_runners=0, - exploration_config={ - "type": "Curiosity", - # For the feature NN, use a non-LSTM fcnet (same as the one - # in the policy model). - "eta": 0.1, - "lr": 0.0003, # 0.0003 or 0.0005 seem to work fine as well. - "feature_dim": 64, - # No actual feature net: map directly from observations to feature - # vector (linearly). - "feature_net_config": { - "fcnet_hiddens": [], - "fcnet_activation": "relu", - }, - "sub_exploration": { - "type": "StochasticSampling", - }, - }, - ) - .training( - model={ - "fcnet_hiddens": [256, 256], - "fcnet_activation": "relu", - }, - num_epochs=8, - ) - ) - - min_reward = 0.001 - stop = { - TRAINING_ITERATION: 25, - f"{ENV_RUNNER_RESULTS}/{EPISODE_RETURN_MEAN}": min_reward, - } - # To replay: - # algo = ppo.PPO(config=config) - # algo.restore("[checkpoint file]") - # env = env_maker(config["env_config"]) - # obs, info = env.reset() - # for _ in range(10000): - # obs, reward, done, truncated, info = env.step( - # algo.compute_single_action(s) - # ) - # if done: - # obs, info = env.reset() - # env.render() - - results = tune.Tuner( - "PPO", - param_space=config, - run_config=air.RunConfig(stop=stop, verbose=1), - ).fit() - check_learning_achieved(results, min_reward) - iters = results.get_best_result().metrics[TRAINING_ITERATION] - print("Reached in {} iterations.".format(iters)) - - # config_wo = config.copy() - # config_wo["exploration_config"] = {"type": "StochasticSampling"} - # stop_wo = stop.copy() - # stop_wo[TRAINING_ITERATION] = iters - # results = tune.Tuner( - # "PPO", param_space=config_wo, stop=stop_wo, verbose=1).fit() - # try: - # check_learning_achieved(results, min_reward) - # except ValueError: - # print("Did not learn w/o curiosity (expected).") - # else: - # raise ValueError("Learnt w/o curiosity (not expected)!") - if __name__ == "__main__": import pytest diff --git a/rllib/utils/images.py b/rllib/utils/images.py index 91e6bc610843f..0716ea5c45b4b 100644 --- a/rllib/utils/images.py +++ b/rllib/utils/images.py @@ -15,31 +15,29 @@ except ImportError: cv2 = None -if cv2 is None: - try: - from skimage import color, io, transform - - logger.debug("CV2 not found for image processing, using Skimage.") - except ImportError: - raise ModuleNotFoundError("Either scikit-image or opencv is required") - @DeveloperAPI def resize(img: np.ndarray, height: int, width: int) -> np.ndarray: - if cv2: - return cv2.resize(img, (width, height), interpolation=cv2.INTER_AREA) - return transform.resize(img, (height, width)) + if not cv2: + raise ModuleNotFoundError( + "`opencv` not installed! Do `pip install opencv-python`" + ) + return cv2.resize(img, (width, height), interpolation=cv2.INTER_AREA) @DeveloperAPI def rgb2gray(img: np.ndarray) -> np.ndarray: - if cv2: - return cv2.cvtColor(img, cv2.COLOR_RGB2GRAY) - return color.rgb2gray(img) + if not cv2: + raise ModuleNotFoundError( + "`opencv` not installed! Do `pip install opencv-python`" + ) + return cv2.cvtColor(img, cv2.COLOR_RGB2GRAY) @DeveloperAPI def imread(img_file: str) -> np.ndarray: - if cv2: - return cv2.imread(img_file).astype(np.float32) - return io.imread(img_file).astype(np.float32) + if not cv2: + raise ModuleNotFoundError( + "`opencv` not installed! Do `pip install opencv-python`" + ) + return cv2.imread(img_file).astype(np.float32)