Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix(cli, studio): don't retry on non request/http errors #907

Merged
merged 1 commit into from
Feb 8, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 6 additions & 6 deletions src/datachain/remote/studio.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
from urllib.parse import urlparse, urlunparse

import websockets
from requests.exceptions import HTTPError, Timeout

from datachain.config import Config
from datachain.error import DataChainError
Expand Down Expand Up @@ -104,8 +105,8 @@ def _get_team(self) -> str:
raise DataChainError(
"Studio team is not set. "
"Use `datachain auth team <team_name>` "
"or environment variable `DVC_STUDIO_TEAM` to set it."
"You can also set it in the config file as team under studio."
"or environment variable `DVC_STUDIO_TEAM` to set it. "
"You can also set `studio.team` in the config file."
)

return team
Expand Down Expand Up @@ -158,15 +159,14 @@ def _send_request_msgpack(
message = content.get("message", "")
return Response(response_data, ok, message)

@retry_with_backoff(retries=5)
@retry_with_backoff(retries=3, errors=(HTTPError, Timeout))
def _send_request(
self, route: str, data: dict[str, Any], method: Optional[str] = "POST"
) -> Response[Any]:
"""
Function that communicate Studio API.
It will raise an exception, and try to retry, if 5xx status code is
returned, or if ConnectionError or Timeout exceptions are thrown from
requests lib
returned, or if Timeout exceptions is thrown from the requests lib
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[C]: removed ConnectionError from the one we retry on. It is usually Proxy, or something SSL - don't think we can recover from those

"""
import requests

Expand All @@ -188,7 +188,7 @@ def _send_request(
)
try:
response.raise_for_status()
except requests.exceptions.HTTPError:
except HTTPError:
if _is_server_error(response.status_code):
# going to retry
raise
Expand Down
14 changes: 12 additions & 2 deletions src/datachain/utils.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import glob
import io
import json
import logging
import os
import os.path as osp
import random
Expand All @@ -25,6 +26,9 @@
import pandas as pd
from typing_extensions import Self


logger = logging.getLogger("datachain")

NUL = b"\0"
TIME_ZERO = datetime.fromtimestamp(0, tz=timezone.utc)

Expand Down Expand Up @@ -271,19 +275,25 @@ def flatten(items):
yield item


def retry_with_backoff(retries=5, backoff_sec=1):
def retry_with_backoff(retries=5, backoff_sec=1, errors=(Exception,)):
def retry(f):
def wrapper(*args, **kwargs):
num_tried = 0
while True:
try:
return f(*args, **kwargs)
except Exception:
except errors:
if num_tried == retries:
raise
sleep = (
backoff_sec * 2** num_tried + random.uniform(0, 1) # noqa: S311
)
logger.exception(
"Error in %s, retrying in %ds, attempt %d",
f.__name__,
sleep,
num_tried,
)
time.sleep(sleep)
num_tried += 1

Expand Down