From b95637225c0b920e1e91ee5847b2ca068157db9f Mon Sep 17 00:00:00 2001 From: dentiny Date: Sat, 16 Nov 2024 10:57:32 +0000 Subject: [PATCH 01/12] fix security issue for retry package Signed-off-by: dentiny --- python/ray/util/retry.py | 38 +++++++++++++++++++ release/BUILD.bazel | 1 - .../ray_release/cluster_manager/minimal.py | 4 +- release/requirements_buildkite.txt | 4 -- 4 files changed, 40 insertions(+), 7 deletions(-) create mode 100644 python/ray/util/retry.py diff --git a/python/ray/util/retry.py b/python/ray/util/retry.py new file mode 100644 index 000000000000..c0ac7ff64ee0 --- /dev/null +++ b/python/ray/util/retry.py @@ -0,0 +1,38 @@ +"""Utils on retry.""" + +import time +from functools import wraps + +_RAY_DEFAULT_MAX_RETRY_COUNT = 10 +_RAY_DEFAULT_INIT_DELAY_SEC = 1 +_RAY_DEFAULT_MAX_DELAY_SEC = 30 +_RAY_DEFAULT_BACKOFF = 2 +_RAT_DEFAULT_JITTER_SEC = 1 + + +def retry( + max_retry_count=_RAY_DEFAULT_MAX_RETRY_COUNT, + init_delay_sec=_RAY_DEFAULT_INIT_DELAY_SEC, + max_delay_sec=_RAY_DEFAULT_MAX_DELAY_SEC, + backoff=_RAY_DEFAULT_BACKOFF, + jitter_sec=_RAT_DEFAULT_JITTER_SEC, +): + def wrapper(fn): + @wraps(fn) + def wrapped(*args, **kwargs): + for cur_retry_count in range(max_retry_count): + try: + return fn(*args, **kwargs) + except Exception: + if cur_retry_count + 1 == max_retry_count: + raise + else: + sleep_sec = min( + init_delay_sec * (backoff**cur_retry_count) + jitter_sec, + max_delay_sec, + ) + time.sleep(sleep_sec) + + return wrapped + + return wrapper diff --git a/release/BUILD.bazel b/release/BUILD.bazel index a09070d9b313..dd62a62b922a 100644 --- a/release/BUILD.bazel +++ b/release/BUILD.bazel @@ -309,7 +309,6 @@ py_library( bk_require("pybuildkite"), bk_require("pygithub"), bk_require("requests"), - bk_require("retry"), ], ) diff --git a/release/ray_release/cluster_manager/minimal.py b/release/ray_release/cluster_manager/minimal.py index 8b202c247753..16ddd6df4f07 100644 --- a/release/ray_release/cluster_manager/minimal.py +++ b/release/ray_release/cluster_manager/minimal.py @@ -9,7 +9,7 @@ from ray_release.logger import logger from ray_release.cluster_manager.cluster_manager import ClusterManager from ray_release.util import format_link, anyscale_cluster_env_build_url -from retry import retry +from ray.util import retry REPORT_S = 30.0 @@ -20,7 +20,7 @@ class MinimalClusterManager(ClusterManager): Builds app config and compute template but does not start or stop session. """ - @retry((ClusterEnvCreateError), delay=10, jitter=5, tries=2) + @retry(init_delay_sec=10, jitter_sec=5, max_retry_count=2) def create_cluster_env(self): assert self.cluster_env_id is None diff --git a/release/requirements_buildkite.txt b/release/requirements_buildkite.txt index 9bfaada37880..3601010ade98 100644 --- a/release/requirements_buildkite.txt +++ b/release/requirements_buildkite.txt @@ -1539,10 +1539,6 @@ requests-toolbelt==1.0.0 \ --hash=sha256:7681a0a3d047012b5bdc0ee37d7f8f07ebe76ab08caeccfc3921ce23c88d5bc6 \ --hash=sha256:cccfdd665f0a24fcf4726e690f65639d272bb0637b9b92dfd91a5568ccf6bd06 # via twine -retry==0.9.2 \ - --hash=sha256:ccddf89761fa2c726ab29391837d4327f819ea14d244c232a1d24c67a2f98606 \ - --hash=sha256:f8bfa8b99b69c4506d6f5bd3b0aabf77f98cdb17f3c9fc3f5ca820033336fba4 - # via -r release/requirements_buildkite.in rfc3986==2.0.0 \ --hash=sha256:50b1502b60e289cb37883f3dfd34532b8873c7de9f49bb546641ce9cbd256ebd \ --hash=sha256:97aacf9dbd4bfd829baad6e6309fa6573aaf1be3f6fa735c8ab05e46cecb261c From 1bc8bd9a01f9a6af880fddfb75fece9f140695b4 Mon Sep 17 00:00:00 2001 From: dentiny Date: Sun, 17 Nov 2024 00:24:01 +0000 Subject: [PATCH 02/12] fix dependency file Signed-off-by: dentiny --- release/requirements_buildkite.in | 1 - release/requirements_buildkite.txt | 8 +------- 2 files changed, 1 insertion(+), 8 deletions(-) diff --git a/release/requirements_buildkite.in b/release/requirements_buildkite.in index 96d05e27bb96..0c20af4d9088 100644 --- a/release/requirements_buildkite.in +++ b/release/requirements_buildkite.in @@ -14,7 +14,6 @@ pyyaml pybuildkite PyGithub requests -retry twine == 5.0.0 docker >= 7.1.0 diff --git a/release/requirements_buildkite.txt b/release/requirements_buildkite.txt index 3601010ade98..659db4c5e8db 100644 --- a/release/requirements_buildkite.txt +++ b/release/requirements_buildkite.txt @@ -424,9 +424,7 @@ debugpy==1.8.2 \ decorator==5.1.1 \ --hash=sha256:637996211036b6385ef91435e4fae22989472f9d571faba8927ba8253acbc330 \ --hash=sha256:b8c3f85900b9dc423225913c5aace94729fe1fa9763b38939a95226f02d37186 - # via - # ipython - # retry + # via ipython deprecated==1.2.14 \ --hash=sha256:6fac8b097794a90302bdbb17b9b815e732d3c4720583ff1b198499d78470466c \ --hash=sha256:e5323eb936458dccc2582dc6f9c322c852a775a27065ff2b0c4970b9d53d01b3 @@ -1174,10 +1172,6 @@ pure-eval==0.2.3 \ --hash=sha256:1db8e35b67b3d218d818ae653e27f06c3aa420901fa7b081ca98cbedc874e0d0 \ --hash=sha256:5f4e983f40564c576c7c8635ae88db5956bb2229d7e9237d03b3c0b0190eaf42 # via stack-data -py==1.11.0 \ - --hash=sha256:51c75c4126074b472f746a24399ad32f6053d1b34b68d2fa41e558e6f4a98719 \ - --hash=sha256:607c53218732647dff4acdfcd50cb62615cedf612e72d1724fb1a0cc6405b378 - # via retry pyasn1==0.6.0 \ --hash=sha256:3a35ab2c4b5ef98e17dfdec8ab074046fbda76e281c5a706ccd82328cfc8f64c \ --hash=sha256:cca4bb0f2df5504f02f6f8a775b6e416ff9b0b3b16f7ee80b5a3153d9b804473 From c43a0fdf9dbaeda21c4605de75d9da29b2dc9d22 Mon Sep 17 00:00:00 2001 From: dentiny Date: Sun, 17 Nov 2024 01:10:26 +0000 Subject: [PATCH 03/12] Add unit test for retry Signed-off-by: dentiny --- python/ray/util/BUILD | 5 +++ python/ray/util/retry.py | 12 +++--- python/ray/util/tests/unit/BUILD | 10 +++++ python/ray/util/tests/unit/test_retry.py | 41 +++++++++++++++++++ .../ray_release/cluster_manager/minimal.py | 2 +- 5 files changed, 63 insertions(+), 7 deletions(-) create mode 100644 python/ray/util/tests/unit/BUILD create mode 100644 python/ray/util/tests/unit/test_retry.py diff --git a/python/ray/util/BUILD b/python/ray/util/BUILD index f4d79eee0918..c8af93378a88 100644 --- a/python/ray/util/BUILD +++ b/python/ray/util/BUILD @@ -6,3 +6,8 @@ doctest( files = glob(["*.py"], exclude=["client_connect.py", "iter.py", "iter_metrics.py"]), tags = ["team:core"] ) + +py_library( + name = "retry", + srcs = ["retry.py"], +) diff --git a/python/ray/util/retry.py b/python/ray/util/retry.py index c0ac7ff64ee0..6d1ecf567eaf 100644 --- a/python/ray/util/retry.py +++ b/python/ray/util/retry.py @@ -26,12 +26,12 @@ def wrapped(*args, **kwargs): except Exception: if cur_retry_count + 1 == max_retry_count: raise - else: - sleep_sec = min( - init_delay_sec * (backoff**cur_retry_count) + jitter_sec, - max_delay_sec, - ) - time.sleep(sleep_sec) + + sleep_sec = min( + init_delay_sec * (backoff**cur_retry_count) + jitter_sec, + max_delay_sec, + ) + time.sleep(sleep_sec) return wrapped diff --git a/python/ray/util/tests/unit/BUILD b/python/ray/util/tests/unit/BUILD new file mode 100644 index 000000000000..d3578002d450 --- /dev/null +++ b/python/ray/util/tests/unit/BUILD @@ -0,0 +1,10 @@ +load("//bazel:python.bzl", "py_test_module_list") + +py_test_module_list( + files = [ + "test_retry.py", + ], + size = "small", + tags = ["team:core"], + deps = [], +) diff --git a/python/ray/util/tests/unit/test_retry.py b/python/ray/util/tests/unit/test_retry.py new file mode 100644 index 000000000000..a0ce348364bf --- /dev/null +++ b/python/ray/util/tests/unit/test_retry.py @@ -0,0 +1,41 @@ +from ray.util import retry + +import sys +import pytest + + +def test_retry_with_no_error(): + invocation_count = 0 + + # Function doesn't raise exception; use a dummy value to check invocation. + @retry.retry() + def no_error_func() -> int: + nonlocal invocation_count + invocation_count += 1 + return 1 + + assert no_error_func() == 1 + assert invocation_count == 1 + + +# Test senario: exception count is less than retry count. +def test_retry_with_limited_error(): + invocation_count = 0 + + # Function doesn't raise exception; use a dummy value to check invocation. + @retry.retry(init_delay_sec=1, jitter_sec=1) + def limited_error() -> int: + nonlocal invocation_count + + invocation_count += 1 + + if invocation_count == 1: + raise Exception("Manual exception") + return 1 + + assert limited_error() == 1 + assert invocation_count == 2 + + +if __name__ == "__main__": + sys.exit(pytest.main(["-sv", __file__])) diff --git a/release/ray_release/cluster_manager/minimal.py b/release/ray_release/cluster_manager/minimal.py index 16ddd6df4f07..907250c86c49 100644 --- a/release/ray_release/cluster_manager/minimal.py +++ b/release/ray_release/cluster_manager/minimal.py @@ -20,7 +20,7 @@ class MinimalClusterManager(ClusterManager): Builds app config and compute template but does not start or stop session. """ - @retry(init_delay_sec=10, jitter_sec=5, max_retry_count=2) + @retry.retry(init_delay_sec=10, jitter_sec=5, max_retry_count=2) def create_cluster_env(self): assert self.cluster_env_id is None From 06c66d98b87012d45600a7b10aab6d5a0709869b Mon Sep 17 00:00:00 2001 From: dentiny Date: Sun, 17 Nov 2024 01:16:22 +0000 Subject: [PATCH 04/12] add test on retry failure Signed-off-by: dentiny --- python/ray/util/tests/unit/test_retry.py | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/python/ray/util/tests/unit/test_retry.py b/python/ray/util/tests/unit/test_retry.py index a0ce348364bf..85edf1f3c2ed 100644 --- a/python/ray/util/tests/unit/test_retry.py +++ b/python/ray/util/tests/unit/test_retry.py @@ -37,5 +37,21 @@ def limited_error() -> int: assert invocation_count == 2 +# Test senario: exception count exceeds retry count. +def test_retry_with_unlimited_error(): + invocation_count = 0 + + @retry.retry(init_delay_sec=1, jitter_sec=1, backoff=1, max_retry_count=3) + def unlimited_error() -> int: + nonlocal invocation_count + + invocation_count += 1 + raise Exception("Manual exception") + + with pytest.raises(Exception, match="Manual exception"): + unlimited_error() + assert invocation_count == 3 + + if __name__ == "__main__": sys.exit(pytest.main(["-sv", __file__])) From f336c57caf0ebb6284b421eb2c3819fdd942a5a9 Mon Sep 17 00:00:00 2001 From: dentiny Date: Sun, 17 Nov 2024 02:29:51 +0000 Subject: [PATCH 05/12] move retry file around Signed-off-by: dentiny --- python/ray/util/BUILD | 5 ----- python/ray/util/tests/unit/BUILD | 10 ---------- release/ray_release/cluster_manager/minimal.py | 2 +- {python/ray/util => release/ray_release}/retry.py | 0 .../unit => release/ray_release/tests}/test_retry.py | 2 +- 5 files changed, 2 insertions(+), 17 deletions(-) delete mode 100644 python/ray/util/tests/unit/BUILD rename {python/ray/util => release/ray_release}/retry.py (100%) rename {python/ray/util/tests/unit => release/ray_release/tests}/test_retry.py (97%) diff --git a/python/ray/util/BUILD b/python/ray/util/BUILD index c8af93378a88..f4d79eee0918 100644 --- a/python/ray/util/BUILD +++ b/python/ray/util/BUILD @@ -6,8 +6,3 @@ doctest( files = glob(["*.py"], exclude=["client_connect.py", "iter.py", "iter_metrics.py"]), tags = ["team:core"] ) - -py_library( - name = "retry", - srcs = ["retry.py"], -) diff --git a/python/ray/util/tests/unit/BUILD b/python/ray/util/tests/unit/BUILD deleted file mode 100644 index d3578002d450..000000000000 --- a/python/ray/util/tests/unit/BUILD +++ /dev/null @@ -1,10 +0,0 @@ -load("//bazel:python.bzl", "py_test_module_list") - -py_test_module_list( - files = [ - "test_retry.py", - ], - size = "small", - tags = ["team:core"], - deps = [], -) diff --git a/release/ray_release/cluster_manager/minimal.py b/release/ray_release/cluster_manager/minimal.py index 907250c86c49..a3ea06c16467 100644 --- a/release/ray_release/cluster_manager/minimal.py +++ b/release/ray_release/cluster_manager/minimal.py @@ -9,7 +9,7 @@ from ray_release.logger import logger from ray_release.cluster_manager.cluster_manager import ClusterManager from ray_release.util import format_link, anyscale_cluster_env_build_url -from ray.util import retry +from ray_release import retry REPORT_S = 30.0 diff --git a/python/ray/util/retry.py b/release/ray_release/retry.py similarity index 100% rename from python/ray/util/retry.py rename to release/ray_release/retry.py diff --git a/python/ray/util/tests/unit/test_retry.py b/release/ray_release/tests/test_retry.py similarity index 97% rename from python/ray/util/tests/unit/test_retry.py rename to release/ray_release/tests/test_retry.py index 85edf1f3c2ed..983940873992 100644 --- a/python/ray/util/tests/unit/test_retry.py +++ b/release/ray_release/tests/test_retry.py @@ -1,4 +1,4 @@ -from ray.util import retry +from ray_release import retry import sys import pytest From c1d8ef6f42322ac0d1c62bd824b4c004cf98e2ea Mon Sep 17 00:00:00 2001 From: dentiny Date: Sun, 17 Nov 2024 03:43:11 +0000 Subject: [PATCH 06/12] Add test to build Signed-off-by: dentiny --- release/BUILD.bazel | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/release/BUILD.bazel b/release/BUILD.bazel index dd62a62b922a..f269add55138 100644 --- a/release/BUILD.bazel +++ b/release/BUILD.bazel @@ -623,3 +623,18 @@ py_test( bk_require("pytest"), ], ) + +py_test( + name = "test_retry", + size = "small", + srcs = ["ray_release/tests/test_retry.py"], + exec_compatible_with = ["//:hermetic_python"], + tags = [ + "release_unit", + "team:ci", + ], + deps = [ + ":ray_release", + bk_require("pytest"), + ], +) From 655d6022b1f349bc6829c117ba2ad32bc548b4bd Mon Sep 17 00:00:00 2001 From: dentiny Date: Sun, 17 Nov 2024 03:44:16 +0000 Subject: [PATCH 07/12] Update retry config name Signed-off-by: dentiny --- python/ray/util/retry.py | 39 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 39 insertions(+) create mode 100644 python/ray/util/retry.py diff --git a/python/ray/util/retry.py b/python/ray/util/retry.py new file mode 100644 index 000000000000..abe4ebd32961 --- /dev/null +++ b/python/ray/util/retry.py @@ -0,0 +1,39 @@ +"""Utils on retry.""" + +import time +from functools import wraps + +# Default configuration for retry. +_DEFAULT_MAX_RETRY_COUNT = 10 +_DEFAULT_INIT_DELAY_SEC = 1 +_DEFAULT_MAX_DELAY_SEC = 30 +_DEFAULT_BACKOFF = 2 +_DEFAULT_JITTER_SEC = 1 + + +def retry( + max_retry_count=_DEFAULT_MAX_RETRY_COUNT, + init_delay_sec=_DEFAULT_INIT_DELAY_SEC, + max_delay_sec=_DEFAULT_MAX_DELAY_SEC, + backoff=_DEFAULT_BACKOFF, + jitter_sec=_DEFAULT_JITTER_SEC, +): + def wrapper(fn): + @wraps(fn) + def wrapped(*args, **kwargs): + for cur_retry_count in range(max_retry_count): + try: + return fn(*args, **kwargs) + except Exception: + if cur_retry_count + 1 == max_retry_count: + raise + + sleep_sec = min( + init_delay_sec * (backoff**cur_retry_count) + jitter_sec, + max_delay_sec, + ) + time.sleep(sleep_sec) + + return wrapped + + return wrapper From c0136104d6f1beaf9313ae3ef3c5c6d86737638e Mon Sep 17 00:00:00 2001 From: dentiny Date: Sun, 17 Nov 2024 03:45:00 +0000 Subject: [PATCH 08/12] update import Signed-off-by: dentiny --- release/ray_release/cluster_manager/minimal.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/release/ray_release/cluster_manager/minimal.py b/release/ray_release/cluster_manager/minimal.py index a3ea06c16467..ea0842f52d10 100644 --- a/release/ray_release/cluster_manager/minimal.py +++ b/release/ray_release/cluster_manager/minimal.py @@ -9,7 +9,7 @@ from ray_release.logger import logger from ray_release.cluster_manager.cluster_manager import ClusterManager from ray_release.util import format_link, anyscale_cluster_env_build_url -from ray_release import retry +from ray_release.retry import retry REPORT_S = 30.0 @@ -20,7 +20,7 @@ class MinimalClusterManager(ClusterManager): Builds app config and compute template but does not start or stop session. """ - @retry.retry(init_delay_sec=10, jitter_sec=5, max_retry_count=2) + @retry(init_delay_sec=10, jitter_sec=5, max_retry_count=2) def create_cluster_env(self): assert self.cluster_env_id is None From 9a786c75205f08b60c1fc28d8c3102ceb263cae2 Mon Sep 17 00:00:00 2001 From: dentiny Date: Sun, 17 Nov 2024 03:50:54 +0000 Subject: [PATCH 09/12] Add param to add exceptions Signed-off-by: dentiny --- python/ray/util/retry.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/python/ray/util/retry.py b/python/ray/util/retry.py index abe4ebd32961..d9e5fafd14a8 100644 --- a/python/ray/util/retry.py +++ b/python/ray/util/retry.py @@ -9,6 +9,7 @@ _DEFAULT_MAX_DELAY_SEC = 30 _DEFAULT_BACKOFF = 2 _DEFAULT_JITTER_SEC = 1 +_DEFAULT_EXCEPTIONS = (Exception,) def retry( @@ -17,6 +18,7 @@ def retry( max_delay_sec=_DEFAULT_MAX_DELAY_SEC, backoff=_DEFAULT_BACKOFF, jitter_sec=_DEFAULT_JITTER_SEC, + exceptions=_DEFAULT_EXCEPTIONS, ): def wrapper(fn): @wraps(fn) @@ -24,7 +26,7 @@ def wrapped(*args, **kwargs): for cur_retry_count in range(max_retry_count): try: return fn(*args, **kwargs) - except Exception: + except exceptions: if cur_retry_count + 1 == max_retry_count: raise From 24383b9645632f55361c124b640fcff558cc07e0 Mon Sep 17 00:00:00 2001 From: dentiny Date: Sun, 17 Nov 2024 03:54:10 +0000 Subject: [PATCH 10/12] Add exceptions test Signed-off-by: dentiny --- python/ray/util/retry.py | 41 ------------------------- release/ray_release/retry.py | 25 ++++++++------- release/ray_release/tests/test_retry.py | 18 +++++++++++ 3 files changed, 32 insertions(+), 52 deletions(-) delete mode 100644 python/ray/util/retry.py diff --git a/python/ray/util/retry.py b/python/ray/util/retry.py deleted file mode 100644 index d9e5fafd14a8..000000000000 --- a/python/ray/util/retry.py +++ /dev/null @@ -1,41 +0,0 @@ -"""Utils on retry.""" - -import time -from functools import wraps - -# Default configuration for retry. -_DEFAULT_MAX_RETRY_COUNT = 10 -_DEFAULT_INIT_DELAY_SEC = 1 -_DEFAULT_MAX_DELAY_SEC = 30 -_DEFAULT_BACKOFF = 2 -_DEFAULT_JITTER_SEC = 1 -_DEFAULT_EXCEPTIONS = (Exception,) - - -def retry( - max_retry_count=_DEFAULT_MAX_RETRY_COUNT, - init_delay_sec=_DEFAULT_INIT_DELAY_SEC, - max_delay_sec=_DEFAULT_MAX_DELAY_SEC, - backoff=_DEFAULT_BACKOFF, - jitter_sec=_DEFAULT_JITTER_SEC, - exceptions=_DEFAULT_EXCEPTIONS, -): - def wrapper(fn): - @wraps(fn) - def wrapped(*args, **kwargs): - for cur_retry_count in range(max_retry_count): - try: - return fn(*args, **kwargs) - except exceptions: - if cur_retry_count + 1 == max_retry_count: - raise - - sleep_sec = min( - init_delay_sec * (backoff**cur_retry_count) + jitter_sec, - max_delay_sec, - ) - time.sleep(sleep_sec) - - return wrapped - - return wrapper diff --git a/release/ray_release/retry.py b/release/ray_release/retry.py index 6d1ecf567eaf..d9e5fafd14a8 100644 --- a/release/ray_release/retry.py +++ b/release/ray_release/retry.py @@ -3,19 +3,22 @@ import time from functools import wraps -_RAY_DEFAULT_MAX_RETRY_COUNT = 10 -_RAY_DEFAULT_INIT_DELAY_SEC = 1 -_RAY_DEFAULT_MAX_DELAY_SEC = 30 -_RAY_DEFAULT_BACKOFF = 2 -_RAT_DEFAULT_JITTER_SEC = 1 +# Default configuration for retry. +_DEFAULT_MAX_RETRY_COUNT = 10 +_DEFAULT_INIT_DELAY_SEC = 1 +_DEFAULT_MAX_DELAY_SEC = 30 +_DEFAULT_BACKOFF = 2 +_DEFAULT_JITTER_SEC = 1 +_DEFAULT_EXCEPTIONS = (Exception,) def retry( - max_retry_count=_RAY_DEFAULT_MAX_RETRY_COUNT, - init_delay_sec=_RAY_DEFAULT_INIT_DELAY_SEC, - max_delay_sec=_RAY_DEFAULT_MAX_DELAY_SEC, - backoff=_RAY_DEFAULT_BACKOFF, - jitter_sec=_RAT_DEFAULT_JITTER_SEC, + max_retry_count=_DEFAULT_MAX_RETRY_COUNT, + init_delay_sec=_DEFAULT_INIT_DELAY_SEC, + max_delay_sec=_DEFAULT_MAX_DELAY_SEC, + backoff=_DEFAULT_BACKOFF, + jitter_sec=_DEFAULT_JITTER_SEC, + exceptions=_DEFAULT_EXCEPTIONS, ): def wrapper(fn): @wraps(fn) @@ -23,7 +26,7 @@ def wrapped(*args, **kwargs): for cur_retry_count in range(max_retry_count): try: return fn(*args, **kwargs) - except Exception: + except exceptions: if cur_retry_count + 1 == max_retry_count: raise diff --git a/release/ray_release/tests/test_retry.py b/release/ray_release/tests/test_retry.py index 983940873992..b630e19f2dd0 100644 --- a/release/ray_release/tests/test_retry.py +++ b/release/ray_release/tests/test_retry.py @@ -53,5 +53,23 @@ def unlimited_error() -> int: assert invocation_count == 3 +def test_retry_on_certain_errors(): + invocation_count = 0 + + # Function doesn't raise exception; use a dummy value to check invocation. + @retry.retry(init_delay_sec=1, jitter_sec=1, exceptions=(KeyError,)) + def limited_error() -> int: + nonlocal invocation_count + + invocation_count += 1 + + if invocation_count == 1: + raise KeyError("Manual exception") + return 1 + + assert limited_error() == 1 + assert invocation_count == 2 + + if __name__ == "__main__": sys.exit(pytest.main(["-sv", __file__])) From 0e91ab82cbc4563d08564da7309a4f0cbe249127 Mon Sep 17 00:00:00 2001 From: dentiny Date: Sun, 17 Nov 2024 03:55:38 +0000 Subject: [PATCH 11/12] only handle certain errors Signed-off-by: dentiny --- release/ray_release/cluster_manager/minimal.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/release/ray_release/cluster_manager/minimal.py b/release/ray_release/cluster_manager/minimal.py index ea0842f52d10..1cfe14c1e2f2 100644 --- a/release/ray_release/cluster_manager/minimal.py +++ b/release/ray_release/cluster_manager/minimal.py @@ -20,7 +20,12 @@ class MinimalClusterManager(ClusterManager): Builds app config and compute template but does not start or stop session. """ - @retry(init_delay_sec=10, jitter_sec=5, max_retry_count=2) + @retry( + init_delay_sec=10, + jitter_sec=5, + max_retry_count=2, + exceptions=(ClusterEnvCreateError,), + ) def create_cluster_env(self): assert self.cluster_env_id is None From ead3c85c4121553eaa3fa7625208a8bbb9024575 Mon Sep 17 00:00:00 2001 From: dentiny Date: Sun, 17 Nov 2024 06:59:23 +0000 Subject: [PATCH 12/12] type annotation Signed-off-by: dentiny --- release/ray_release/retry.py | 25 +++++++++++++------------ 1 file changed, 13 insertions(+), 12 deletions(-) diff --git a/release/ray_release/retry.py b/release/ray_release/retry.py index d9e5fafd14a8..dec0bd9be925 100644 --- a/release/ray_release/retry.py +++ b/release/ray_release/retry.py @@ -2,23 +2,24 @@ import time from functools import wraps +from typing import Tuple # Default configuration for retry. -_DEFAULT_MAX_RETRY_COUNT = 10 -_DEFAULT_INIT_DELAY_SEC = 1 -_DEFAULT_MAX_DELAY_SEC = 30 -_DEFAULT_BACKOFF = 2 -_DEFAULT_JITTER_SEC = 1 -_DEFAULT_EXCEPTIONS = (Exception,) +_DEFAULT_MAX_RETRY_COUNT: int = 10 +_DEFAULT_INIT_DELAY_SEC: int = 1 +_DEFAULT_MAX_DELAY_SEC: int = 30 +_DEFAULT_BACKOFF: int = 2 +_DEFAULT_JITTER_SEC: int = 1 +_DEFAULT_EXCEPTIONS: Tuple[Exception] = (Exception,) def retry( - max_retry_count=_DEFAULT_MAX_RETRY_COUNT, - init_delay_sec=_DEFAULT_INIT_DELAY_SEC, - max_delay_sec=_DEFAULT_MAX_DELAY_SEC, - backoff=_DEFAULT_BACKOFF, - jitter_sec=_DEFAULT_JITTER_SEC, - exceptions=_DEFAULT_EXCEPTIONS, + max_retry_count: int = _DEFAULT_MAX_RETRY_COUNT, + init_delay_sec: int = _DEFAULT_INIT_DELAY_SEC, + max_delay_sec: int = _DEFAULT_MAX_DELAY_SEC, + backoff: int = _DEFAULT_BACKOFF, + jitter_sec: int = _DEFAULT_JITTER_SEC, + exceptions: Tuple[Exception] = _DEFAULT_EXCEPTIONS, ): def wrapper(fn): @wraps(fn)