Skip to content

Commit

Permalink
add catch for 429 error from CDSE
Browse files Browse the repository at this point in the history
also download in paralel

may work to help dbekaert/RAiDER#610
  • Loading branch information
scottstanie committed Nov 7, 2023
1 parent 070b4d5 commit 4ce9b9d
Show file tree
Hide file tree
Showing 3 changed files with 56 additions and 11 deletions.
8 changes: 8 additions & 0 deletions eof/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,12 @@
is_flag=True,
help="save credentials provided interactively in the ~/.netrc file if necessary",
)
@click.option(
"--max-workers",
type=int,
default=3,
help="Number of parallel downloads to run. Note that CDSE has a limit of 4",
)
def cli(
search_path: str,
save_dir: str,
Expand All @@ -107,6 +113,7 @@ def cli(
cdse_password: str = "",
ask_password: bool = False,
update_netrc: bool = False,
max_workers: int = 3,
):
"""Download Sentinel precise orbit files.
Expand Down Expand Up @@ -136,4 +143,5 @@ def cli(
asf_password=asf_password,
cdse_user=cdse_user,
cdse_password=cdse_password,
max_workers=max_workers,
)
44 changes: 36 additions & 8 deletions eof/dataspace_client.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
"""Client to get orbit files from dataspace.copernicus.eu ."""
from __future__ import annotations

from concurrent.futures import ThreadPoolExecutor
from datetime import datetime, timedelta
from pathlib import Path
from typing import Optional
Expand Down Expand Up @@ -164,13 +165,19 @@ def query_orbit_by_dt(
logger.warning("The following dates were not found: %s", remaining_dates)
return all_results

def download_all(self, query_results: list[dict], output_directory: Filename):
def download_all(
self,
query_results: list[dict],
output_directory: Filename,
max_workers: int = 3,
):
"""Download all the specified orbit products."""
return download_all(
query_results,
output_directory=output_directory,
username=self._username,
password=self._password,
max_workers=max_workers,
)


Expand Down Expand Up @@ -362,6 +369,7 @@ def download_orbit_file(
if chunk:
outfile.write(chunk)

logger.info(f"Orbit file downloaded to {output_orbit_file_path}")
return output_orbit_file_path


Expand All @@ -370,6 +378,7 @@ def download_all(
output_directory: Filename,
username: str = "",
password: str = "",
max_workers: int = 3,
) -> list[Path]:
"""Download all the specified orbit products.
Expand All @@ -383,6 +392,9 @@ def download_all(
CDSE username
password : str
CDSE password
max_workers : int, default = 3
Maximum parallel downloads from CDSE.
Note that >4 connections will result in a HTTP 429 Error
"""
downloaded_paths: list[Path] = []
Expand All @@ -391,22 +403,38 @@ def download_all(
# query_results, start_time, stop_time
# )
# Obtain an access token the download request from the provided credentials

access_token = get_access_token(username, password)
output_names = []
download_urls = []
for query_result in query_results:
orbit_file_name = query_result["Name"]
orbit_file_request_id = query_result["Id"]

# Construct the URL used to download the Orbit file
download_url = f"{DOWNLOAD_URL}({orbit_file_request_id})/$value"
download_urls.append(download_url)

logger.info(
orbit_file_name = query_result["Name"]
output_names.append(orbit_file_name)

logger.debug(
f"Downloading Orbit file {orbit_file_name} from service endpoint "
f"{download_url}"
)
output_orbit_file_path = download_orbit_file(
download_url, output_directory, orbit_file_name, access_token
)

logger.info(f"Orbit file downloaded to {output_orbit_file_path}")
downloaded_paths.append(output_orbit_file_path)
downloaded_paths = []
with ThreadPoolExecutor(max_workers=max_workers) as exc:
futures = [
exc.submit(
download_orbit_file,
request_url=u,
output_directory=output_directory,
orbit_file_name=n,
access_token=access_token,
)
for (u, n) in zip(download_urls, output_names)
]
for f in futures:
downloaded_paths.append(f.result())

return downloaded_paths
15 changes: 12 additions & 3 deletions eof/download.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
from pathlib import Path

from dateutil.parser import parse
from requests.exceptions import HTTPError

from .asf_client import ASFClient
from .dataspace_client import DataspaceClient
Expand Down Expand Up @@ -102,9 +103,17 @@ def download_eofs(

if query:
logger.info("Attempting download from SciHub")
results = client.download_all(query, output_directory=save_dir)
filenames.extend(results)
dataspace_successful = True
try:
results = client.download_all(
query, output_directory=save_dir, max_workers=max_workers
)
filenames.extend(results)
dataspace_successful = True
except HTTPError as e:
assert e.response is not None
if e.response.status_code == 429:
logger.warning(f"Failed due to too many requests: {e.args}")
# Dataspace failed -> try asf

# For failures from scihub, try ASF
if not dataspace_successful:
Expand Down

0 comments on commit 4ce9b9d

Please sign in to comment.