Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Raise exceptions more #351

Merged
merged 1 commit into from
Nov 22, 2023
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
76 changes: 31 additions & 45 deletions earthaccess/store.py
Original file line number Diff line number Diff line change
Expand Up @@ -230,10 +230,9 @@ def get_s3fs_session(
self.initial_ts = datetime.datetime.now()
return deepcopy(self.s3_fs)
else:
print(
raise ValueError(
"A valid Earthdata login instance is required to retrieve S3 credentials"
)
return None

@lru_cache
def get_fsspec_session(self) -> fsspec.AbstractFileSystem:
Expand Down Expand Up @@ -269,7 +268,7 @@ def open(
self,
granules: Union[List[str], List[DataGranule]],
provider: Optional[str] = None,
) -> Union[List[Any], None]:
) -> List[Any]:
"""Returns a list of fsspec file-like objects that can be used to access files
hosted on S3 or HTTPS by third party libraries like xarray.

Expand All @@ -280,15 +279,14 @@ def open(
"""
if len(granules):
return self._open(granules, provider)
print("The granules list is empty, moving on...")
return None
return []

@singledispatchmethod
def _open(
self,
granules: Union[List[str], List[DataGranule]],
provider: Optional[str] = None,
) -> Union[List[Any], None]:
) -> List[Any]:
"""Returns a list of fsspec file-like objects that can be used to access files
hosted on S3 or HTTPS by third party libraries like xarray.

Expand All @@ -305,17 +303,16 @@ def _open_granules(
granules: List[DataGranule],
provider: Optional[str] = None,
threads: Optional[int] = 8,
) -> Union[List[Any], None]:
) -> List[Any]:
fileset: List = []
data_links: List = []
total_size = round(sum([granule.size() for granule in granules]) / 1024, 2)
print(f" Opening {len(granules)} granules, approx size: {total_size} GB")
print(f"Opening {len(granules)} granules, approx size: {total_size} GB")

if self.auth is None:
print(
raise ValueError(
"A valid Earthdata login instance is required to retrieve credentials"
)
return None

if self.running_in_aws:
if granules[0].cloud_hosted:
Expand Down Expand Up @@ -347,13 +344,12 @@ def _open_granules(
fs=s3_fs,
threads=threads,
)
except Exception:
print(
"An exception occurred while trying to access remote files on S3: "
"This may be caused by trying to access the data outside the us-west-2 region"
except Exception as e:
raise RuntimeError(
"An exception occurred while trying to access remote files on S3. "
"This may be caused by trying to access the data outside the us-west-2 region."
f"Exception: {traceback.format_exc()}"
)
return None
) from e
else:
fileset = self._open_urls_https(data_links, granules, threads=threads)
return fileset
Expand All @@ -373,7 +369,7 @@ def _open_urls(
granules: List[str],
provider: Optional[str] = None,
threads: Optional[int] = 8,
) -> Union[List[Any], None]:
) -> List[Any]:
fileset: List = []
data_links: List = []

Expand All @@ -384,15 +380,13 @@ def _open_urls(
provider = provider
data_links = granules
else:
print(
raise ValueError(
f"Schema for {granules[0]} is not recognized, must be an HTTP or S3 URL"
)
return None
if self.auth is None:
print(
raise ValueError(
"A valid Earthdata login instance is required to retrieve S3 credentials"
)
return None

if self.running_in_aws and granules[0].startswith("s3"):
if provider is not None:
Expand All @@ -405,27 +399,24 @@ def _open_urls(
fs=s3_fs,
threads=threads,
)
except Exception:
print(
"An exception occurred while trying to access remote files on S3: "
"This may be caused by trying to access the data outside the us-west-2 region"
except Exception as e:
raise RuntimeError(
"An exception occurred while trying to access remote files on S3. "
"This may be caused by trying to access the data outside the us-west-2 region."
f"Exception: {traceback.format_exc()}"
)
return None
) from e
else:
print(f"Provider {provider} has no valid cloud credentials")
return fileset
else:
print(
raise ValueError(
"earthaccess cannot derive the DAAC provider from URLs only, a provider is needed e.g. POCLOUD"
)
return None
else:
if granules[0].startswith("s3"):
print(
raise ValueError(
"We cannot open S3 links when we are not in-region, try using HTTPS links"
)
return None
fileset = self._open_urls_https(data_links, granules, threads)
return fileset

Expand All @@ -435,7 +426,7 @@ def get(
local_path: Optional[str] = None,
provider: Optional[str] = None,
threads: int = 8,
) -> Union[None, List[str]]:
) -> List[str]:
"""Retrieves data granules from a remote storage system.

* If we run this in the cloud we are moving data from S3 to a cloud compute instance (EC2, AWS Lambda)
Expand Down Expand Up @@ -463,8 +454,7 @@ def get(
files = self._get(granules, local_path, provider, threads)
return files
else:
print("List of URLs or DataGranule isntances expected")
return None
raise ValueError("List of URLs or DataGranule isntances expected")

@singledispatchmethod
def _get(
Expand All @@ -473,7 +463,7 @@ def _get(
local_path: str,
provider: Optional[str] = None,
threads: int = 8,
) -> Union[None, List[str]]:
) -> List[str]:
"""Retrieves data granules from a remote storage system.

* If we run this in the cloud we are moving data from S3 to a cloud compute instance (EC2, AWS Lambda)
Expand All @@ -491,8 +481,7 @@ def _get(
Returns:
None
"""
print("List of URLs or DataGranule isntances expected")
return None
raise NotImplementedError(f"Cannot _get {granules}")

@_get.register
def _get_urls(
Expand All @@ -501,15 +490,14 @@ def _get_urls(
local_path: str,
provider: Optional[str] = None,
threads: int = 8,
) -> Union[None, List[str]]:
) -> List[str]:
data_links = granules
downloaded_files: List = []
if provider is None and self.running_in_aws and "cumulus" in data_links[0]:
print(
raise ValueError(
"earthaccess can't yet guess the provider for cloud collections, "
"we need to use one from earthaccess.list_cloud_providers()"
)
return None
if self.running_in_aws and data_links[0].startswith("s3"):
print(f"Accessing cloud dataset using provider: {provider}")
s3_fs = self.get_s3fs_session(provider=provider)
Expand All @@ -532,7 +520,7 @@ def _get_granules(
local_path: str,
provider: Optional[str] = None,
threads: int = 8,
) -> Union[None, List[str]]:
) -> List[str]:
data_links: List = []
downloaded_files: List = []
provider = granules[0]["meta"]["provider-id"]
Expand Down Expand Up @@ -615,13 +603,11 @@ def _download_onprem_granules(
:returns: None
"""
if urls is None:
print("The granules didn't provide a valid GET DATA link")
return None
raise ValueError("The granules didn't provide a valid GET DATA link")
if self.auth is None:
print(
raise ValueError(
"We need to be logged into NASA EDL in order to download data granules"
)
return []
if not os.path.exists(directory):
os.makedirs(directory)

Expand Down