Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 19 additions & 2 deletions src/parallel/lib/_time.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,24 @@ def _raise_timeout(run_id: str, exc: Union[Exception, None]) -> NoReturn:
raise TimeoutError(f"Fetching task run result for run id {run_id} timed out.") from exc


def _is_retryable_error(status_code: int) -> bool:
"""Determine if an error is retryable.

We retry the following HTTP status codes within the SDK:
- 408 (Request Timeout): The server timed out waiting for the request
- 503 (Service Unavailable): The server is temporarily unable to handle the request
- 504 (Gateway Timeout): The gateway server timed out

These errors typically indicate temporary issues with the server or network
that may resolve upon retry. We don't include 429 (Too Many Requests) as this
indicates rate limiting, which requires backing off rather than immediate retries.

Note: This is a low-level retry mechanism within the SDK. Customers may want to
implement their own retry logic at the application level for other error types.
"""
return status_code in (408, 503, 504)


@contextlib.contextmanager
def timeout_retry_context(run_id: str, deadline: float) -> Iterator[None]:
"""Context manager for handling timeouts and retries when fetching task run results.
Expand All @@ -49,8 +67,7 @@ def timeout_retry_context(run_id: str, deadline: float) -> Iterator[None]:
exc = e
continue
except APIStatusError as e:
# retry on timeouts from the API
if e.status_code == 408:
if _is_retryable_error(e.status_code):
exc = e
continue
raise
Expand Down