Skip to content

Commit

Permalink
retry download given 429 or 503 status, resolves #268
Browse files Browse the repository at this point in the history
  • Loading branch information
RemyLau committed Oct 2, 2022
1 parent a245a4d commit 22fb594
Show file tree
Hide file tree
Showing 3 changed files with 37 additions and 9 deletions.
5 changes: 5 additions & 0 deletions src/nleval/config/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,16 @@
from nleval.typing import Dict

__all__ = [
"DEFAULT_RETRY_DELAY",
"MAX_DOWNLOAD_RETRIES",
"NLEDATA_URL_DICT",
"NLEDATA_URL_DICT_DEV",
"NLEDATA_URL_DICT_STABLE",
]

DEFAULT_RETRY_DELAY = 5
MAX_DOWNLOAD_RETRIES = 10

NLEDATA_URL_DICT_STABLE: Dict[str, str] = {}
NLEDATA_URL_DICT_DEV: Dict[str, str] = {
"nledata-v1.0-test": "https://sandbox.zenodo.org/record/1096827/files/",
Expand Down
4 changes: 4 additions & 0 deletions src/nleval/exception.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,10 @@ class DataNotFoundError(Exception):
"""Raised when an particular version of arvhival data is unavailable."""


class ExceededMaxNumRetries(Exception):
"""Raised when the number of download retries exceeds the limit."""


class IDNotExistError(Exception):
"""Raised when query ID not exist."""

Expand Down
37 changes: 28 additions & 9 deletions src/nleval/util/download.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,12 +68,31 @@ def download_unzip(url: str, root: str, *, logger: Optional[Logger] = None):
logger = logger or native_logger

logger.info(f"Downloading zip archive from {url}")
r = requests.get(url)
if not r.ok:
logger.error(f"Download filed: {r} {r.reason}")
raise requests.exceptions.RequestException(r)

logger.info("Download completed, start unpacking...")
zf = ZipFile(BytesIO(r.content))
zf.extractall(root)
logger.info("Done extracting")

num_tries = 0
while num_tries < MAX_DOWNLOAD_RETRIES:
num_tries += 1
r = requests.get(url)

if r.ok:
logger.info("Download completed, start unpacking...")
zf = ZipFile(BytesIO(r.content))
zf.extractall(root)
logger.info("Done extracting")
break
elif r.status_code in [429, 503]: # Retry later
t = r.headers.get("Retry-after", DEFAULT_RETRY_DELAY)
logger.warning(f"Server temporarily unavailable, waiting for {t} sec")
time.sleep(int(t))
elif r.status_code == 404:
reason = f"{url} is unavailable, try using a more recent data version"
logger.error(reason)
raise DataNotFoundError(reason)
else:
logger.error(f"Failed to download {url}: {r} {r.reason}")
raise requests.exceptions.RequestException(r)

else: # failed to download within the allowed number of retries
logger.error(f"Failed to download {url}")
reason = f"Max number of retries exceeded {MAX_DOWNLOAD_RETRIES=}"
raise ExceededMaxNumRetries(reason)

0 comments on commit 22fb594

Please sign in to comment.