diff --git a/release/BUILD.bazel b/release/BUILD.bazel index a09070d9b313..f269add55138 100644 --- a/release/BUILD.bazel +++ b/release/BUILD.bazel @@ -309,7 +309,6 @@ py_library( bk_require("pybuildkite"), bk_require("pygithub"), bk_require("requests"), - bk_require("retry"), ], ) @@ -624,3 +623,18 @@ py_test( bk_require("pytest"), ], ) + +py_test( + name = "test_retry", + size = "small", + srcs = ["ray_release/tests/test_retry.py"], + exec_compatible_with = ["//:hermetic_python"], + tags = [ + "release_unit", + "team:ci", + ], + deps = [ + ":ray_release", + bk_require("pytest"), + ], +) diff --git a/release/ray_release/cluster_manager/minimal.py b/release/ray_release/cluster_manager/minimal.py index 8b202c247753..1cfe14c1e2f2 100644 --- a/release/ray_release/cluster_manager/minimal.py +++ b/release/ray_release/cluster_manager/minimal.py @@ -9,7 +9,7 @@ from ray_release.logger import logger from ray_release.cluster_manager.cluster_manager import ClusterManager from ray_release.util import format_link, anyscale_cluster_env_build_url -from retry import retry +from ray_release.retry import retry REPORT_S = 30.0 @@ -20,7 +20,12 @@ class MinimalClusterManager(ClusterManager): Builds app config and compute template but does not start or stop session. """ - @retry((ClusterEnvCreateError), delay=10, jitter=5, tries=2) + @retry( + init_delay_sec=10, + jitter_sec=5, + max_retry_count=2, + exceptions=(ClusterEnvCreateError,), + ) def create_cluster_env(self): assert self.cluster_env_id is None diff --git a/release/ray_release/retry.py b/release/ray_release/retry.py new file mode 100644 index 000000000000..dec0bd9be925 --- /dev/null +++ b/release/ray_release/retry.py @@ -0,0 +1,42 @@ +"""Utils on retry.""" + +import time +from functools import wraps +from typing import Tuple + +# Default configuration for retry. +_DEFAULT_MAX_RETRY_COUNT: int = 10 +_DEFAULT_INIT_DELAY_SEC: int = 1 +_DEFAULT_MAX_DELAY_SEC: int = 30 +_DEFAULT_BACKOFF: int = 2 +_DEFAULT_JITTER_SEC: int = 1 +_DEFAULT_EXCEPTIONS: Tuple[Exception] = (Exception,) + + +def retry( + max_retry_count: int = _DEFAULT_MAX_RETRY_COUNT, + init_delay_sec: int = _DEFAULT_INIT_DELAY_SEC, + max_delay_sec: int = _DEFAULT_MAX_DELAY_SEC, + backoff: int = _DEFAULT_BACKOFF, + jitter_sec: int = _DEFAULT_JITTER_SEC, + exceptions: Tuple[Exception] = _DEFAULT_EXCEPTIONS, +): + def wrapper(fn): + @wraps(fn) + def wrapped(*args, **kwargs): + for cur_retry_count in range(max_retry_count): + try: + return fn(*args, **kwargs) + except exceptions: + if cur_retry_count + 1 == max_retry_count: + raise + + sleep_sec = min( + init_delay_sec * (backoff**cur_retry_count) + jitter_sec, + max_delay_sec, + ) + time.sleep(sleep_sec) + + return wrapped + + return wrapper diff --git a/release/ray_release/tests/test_retry.py b/release/ray_release/tests/test_retry.py new file mode 100644 index 000000000000..b630e19f2dd0 --- /dev/null +++ b/release/ray_release/tests/test_retry.py @@ -0,0 +1,75 @@ +from ray_release import retry + +import sys +import pytest + + +def test_retry_with_no_error(): + invocation_count = 0 + + # Function doesn't raise exception; use a dummy value to check invocation. + @retry.retry() + def no_error_func() -> int: + nonlocal invocation_count + invocation_count += 1 + return 1 + + assert no_error_func() == 1 + assert invocation_count == 1 + + +# Test senario: exception count is less than retry count. +def test_retry_with_limited_error(): + invocation_count = 0 + + # Function doesn't raise exception; use a dummy value to check invocation. + @retry.retry(init_delay_sec=1, jitter_sec=1) + def limited_error() -> int: + nonlocal invocation_count + + invocation_count += 1 + + if invocation_count == 1: + raise Exception("Manual exception") + return 1 + + assert limited_error() == 1 + assert invocation_count == 2 + + +# Test senario: exception count exceeds retry count. +def test_retry_with_unlimited_error(): + invocation_count = 0 + + @retry.retry(init_delay_sec=1, jitter_sec=1, backoff=1, max_retry_count=3) + def unlimited_error() -> int: + nonlocal invocation_count + + invocation_count += 1 + raise Exception("Manual exception") + + with pytest.raises(Exception, match="Manual exception"): + unlimited_error() + assert invocation_count == 3 + + +def test_retry_on_certain_errors(): + invocation_count = 0 + + # Function doesn't raise exception; use a dummy value to check invocation. + @retry.retry(init_delay_sec=1, jitter_sec=1, exceptions=(KeyError,)) + def limited_error() -> int: + nonlocal invocation_count + + invocation_count += 1 + + if invocation_count == 1: + raise KeyError("Manual exception") + return 1 + + assert limited_error() == 1 + assert invocation_count == 2 + + +if __name__ == "__main__": + sys.exit(pytest.main(["-sv", __file__])) diff --git a/release/requirements_buildkite.in b/release/requirements_buildkite.in index 96d05e27bb96..0c20af4d9088 100644 --- a/release/requirements_buildkite.in +++ b/release/requirements_buildkite.in @@ -14,7 +14,6 @@ pyyaml pybuildkite PyGithub requests -retry twine == 5.0.0 docker >= 7.1.0 diff --git a/release/requirements_buildkite.txt b/release/requirements_buildkite.txt index 9bfaada37880..659db4c5e8db 100644 --- a/release/requirements_buildkite.txt +++ b/release/requirements_buildkite.txt @@ -424,9 +424,7 @@ debugpy==1.8.2 \ decorator==5.1.1 \ --hash=sha256:637996211036b6385ef91435e4fae22989472f9d571faba8927ba8253acbc330 \ --hash=sha256:b8c3f85900b9dc423225913c5aace94729fe1fa9763b38939a95226f02d37186 - # via - # ipython - # retry + # via ipython deprecated==1.2.14 \ --hash=sha256:6fac8b097794a90302bdbb17b9b815e732d3c4720583ff1b198499d78470466c \ --hash=sha256:e5323eb936458dccc2582dc6f9c322c852a775a27065ff2b0c4970b9d53d01b3 @@ -1174,10 +1172,6 @@ pure-eval==0.2.3 \ --hash=sha256:1db8e35b67b3d218d818ae653e27f06c3aa420901fa7b081ca98cbedc874e0d0 \ --hash=sha256:5f4e983f40564c576c7c8635ae88db5956bb2229d7e9237d03b3c0b0190eaf42 # via stack-data -py==1.11.0 \ - --hash=sha256:51c75c4126074b472f746a24399ad32f6053d1b34b68d2fa41e558e6f4a98719 \ - --hash=sha256:607c53218732647dff4acdfcd50cb62615cedf612e72d1724fb1a0cc6405b378 - # via retry pyasn1==0.6.0 \ --hash=sha256:3a35ab2c4b5ef98e17dfdec8ab074046fbda76e281c5a706ccd82328cfc8f64c \ --hash=sha256:cca4bb0f2df5504f02f6f8a775b6e416ff9b0b3b16f7ee80b5a3153d9b804473 @@ -1539,10 +1533,6 @@ requests-toolbelt==1.0.0 \ --hash=sha256:7681a0a3d047012b5bdc0ee37d7f8f07ebe76ab08caeccfc3921ce23c88d5bc6 \ --hash=sha256:cccfdd665f0a24fcf4726e690f65639d272bb0637b9b92dfd91a5568ccf6bd06 # via twine -retry==0.9.2 \ - --hash=sha256:ccddf89761fa2c726ab29391837d4327f819ea14d244c232a1d24c67a2f98606 \ - --hash=sha256:f8bfa8b99b69c4506d6f5bd3b0aabf77f98cdb17f3c9fc3f5ca820033336fba4 - # via -r release/requirements_buildkite.in rfc3986==2.0.0 \ --hash=sha256:50b1502b60e289cb37883f3dfd34532b8873c7de9f49bb546641ce9cbd256ebd \ --hash=sha256:97aacf9dbd4bfd829baad6e6309fa6573aaf1be3f6fa735c8ab05e46cecb261c