Skip to content

Commit

Permalink
Merge pull request #88 from andrewdelman/ecco_access_fsspec
Browse files Browse the repository at this point in the history
edits to function headers and arguments order
  • Loading branch information
andrewdelman authored Oct 8, 2024
2 parents ce55c17 + ae33a21 commit 789a9a4
Show file tree
Hide file tree
Showing 3 changed files with 25 additions and 33 deletions.
28 changes: 10 additions & 18 deletions ecco_access/ecco_access.py
Original file line number Diff line number Diff line change
Expand Up @@ -124,14 +124,14 @@ def ecco_podaac_access(query,version='v4r4',grid=None,time_res='all',\
Returns
-------
download_files: dict, with keys: ShortNames and values:
URLs (if in 'query' mode), or paths of files that can be
passed directly to xarray (open_dataset or open_mfdataset).
Values are of type str if query finds only one granule/file
for that ShortName; of type list if query finds
multiple granules in the same dataset;
or of type fsspec.mapping.FSMap if mode = 's3_open_fsspec'.
Only returned if return_granules=True (default).
granule_files: dict with ShortNames as keys; values are URLs or S3 paths
(if in 'query' mode), or paths of files that can be
passed directly to xarray (open_dataset or open_mfdataset).
Values are of type str if query finds only one granule/file
for that ShortName; of type list if query finds
multiple granules in the same dataset;
or of type fsspec.mapping.FSMap if mode = 's3_open_fsspec'.
Only returned if return_granules=True (default).
"""

Expand Down Expand Up @@ -272,6 +272,7 @@ def shortnames_find(query_list,grid,time_res):
###================================================================================================================



def ecco_podaac_to_xrdataset(query,version='v4r4',grid=None,time_res='all',\
StartDate=None,EndDate=None,snapshot_interval=None,\
mode='download_ifspace',download_root_dir=None,**kwargs):
Expand Down Expand Up @@ -325,10 +326,6 @@ def ecco_podaac_to_xrdataset(query,version='v4r4',grid=None,time_res='all',\
and 'monthly' otherwise.
mode: str, one of the following:
'ls' or 'query': Query dataset ShortNames and variable names/
descriptions only; no downloads.
's3_ls' or 's3_query': Query dataset ShortNames and variable names/
descriptions only; return paths on S3.
'download': Download datasets using NASA Earthdata URLs
'download_ifspace': Check storage availability before downloading.
Download only if storage footprint of downloads
Expand Down Expand Up @@ -375,16 +372,11 @@ def ecco_podaac_to_xrdataset(query,version='v4r4',grid=None,time_res='all',\
force_redownload: bool, if True, existing files will be redownloaded and replaced;
if False (default), existing files will not be replaced.
return_granules: bool, if True (default), str or list of queried or
downloaded granules/files (including ones that
were already on disk and not replaced) is returned.
if False, the function returns nothing.
Returns
-------
ds_out: xarray Dataset or dict of xarray Datasets (with ShortNames as keys),
containing all of the accessed datasets.
Does not work with the query modes: 'ls','query','s3_ls','s3_query'.
This function does not work with the query modes: 'ls','query','s3_ls','s3_query'.
"""

pass
Expand Down
14 changes: 7 additions & 7 deletions ecco_access/ecco_download.py
Original file line number Diff line number Diff line change
Expand Up @@ -383,8 +383,8 @@ def ecco_podaac_download(ShortName,StartDate,EndDate,snapshot_interval='monthly'
###================================================================================================================


def ecco_podaac_download_diskaware(ShortNames,StartDate,EndDate,max_avail_frac=0.5,snapshot_interval=None,\
download_root_dir=None,n_workers=6,force_redownload=False):
def ecco_podaac_download_diskaware(ShortNames,StartDate,EndDate,snapshot_interval=None,\
download_root_dir=None,max_avail_frac=0.5,n_workers=6,force_redownload=False):

"""
Expand All @@ -405,11 +405,6 @@ def ecco_podaac_download_diskaware(ShortNames,StartDate,EndDate,max_avail_frac=0
ECCOv4r4 date range is '1992-01-01' to '2017-12-31'.
For 'SNAPSHOT' datasets, an additional day is added to EndDate to enable closed budgets
within the specified date range.
max_avail_frac: float, maximum fraction of remaining available disk space to use in storing current ECCO datasets.
If storing the datasets exceeds this fraction, an error is returned.
Valid range is [0,0.9]. If number provided is outside this range, it is replaced by the closer
endpoint of the range.
snapshot_interval: ('monthly', 'daily', or None), if snapshot datasets are included in ShortNames,
this determines whether snapshots are included for only the beginning/end of each month
Expand All @@ -420,6 +415,11 @@ def ecco_podaac_download_diskaware(ShortNames,StartDate,EndDate,max_avail_frac=0
download_root_dir: str, defines parent directory to download files to.
Files will be downloaded to directory download_root_dir/ShortName/.
If not specified, parent directory defaults to '~/Downloads/ECCO_V4r4_PODAAC/'.
max_avail_frac: float, maximum fraction of remaining available disk space to use in storing current ECCO datasets.
If storing the datasets exceeds this fraction, an error is returned.
Valid range is [0,0.9]. If number provided is outside this range, it is replaced by the closer
endpoint of the range.
n_workers: int, number of workers to use in concurrent downloads. Benefits typically taper off above 5-6.
Applies only if files are downloaded.
Expand Down
16 changes: 8 additions & 8 deletions ecco_access/ecco_s3_retrieve.py
Original file line number Diff line number Diff line change
Expand Up @@ -527,8 +527,8 @@ def ecco_podaac_s3_get(ShortName,StartDate,EndDate,snapshot_interval='monthly',d
###================================================================================================================


def ecco_podaac_s3_get_diskaware(ShortNames,StartDate,EndDate,max_avail_frac=0.5,snapshot_interval=None,\
download_root_dir=None,n_workers=6,force_redownload=False):
def ecco_podaac_s3_get_diskaware(ShortNames,StartDate,EndDate,snapshot_interval=None,\
download_root_dir=None,max_avail_frac=0.5,n_workers=6,force_redownload=False):

"""
Expand All @@ -551,12 +551,7 @@ def ecco_podaac_s3_get_diskaware(ShortNames,StartDate,EndDate,max_avail_frac=0.5
ECCOv4r4 date range is '1992-01-01' to '2017-12-31'.
For 'SNAPSHOT' datasets, an additional day is added to EndDate to enable closed budgets
within the specified date range.
max_avail_frac: float, maximum fraction of remaining available disk space to use in storing current ECCO datasets.
This determines whether the dataset files are stored on the current instance, or opened on S3.
Valid range is [0,0.9]. If number provided is outside this range, it is replaced by the closer
endpoint of the range.
snapshot_interval: ('monthly', 'daily', or None), if snapshot datasets are included in ShortNames,
this determines whether snapshots are included for only the beginning/end of each month
('monthly'), or for every day ('daily').
Expand All @@ -567,6 +562,11 @@ def ecco_podaac_s3_get_diskaware(ShortNames,StartDate,EndDate,max_avail_frac=0.5
Files will be downloaded to directory download_root_dir/ShortName/.
If not specified, parent directory defaults to '~/Downloads/ECCO_V4r4_PODAAC/'.
max_avail_frac: float, maximum fraction of remaining available disk space to use in storing current ECCO datasets.
This determines whether the dataset files are stored on the current instance, or opened on S3.
Valid range is [0,0.9]. If number provided is outside this range, it is replaced by the closer
endpoint of the range.
n_workers: int, number of workers to use in concurrent downloads. Benefits typically taper off above 5-6.
Applies only if files are downloaded.
Expand Down

0 comments on commit 789a9a4

Please sign in to comment.