-
Notifications
You must be signed in to change notification settings - Fork 9
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Gdp-update #282
Gdp-update #282
Changes from all commits
59b6e62
4b39d15
29da4ee
425ca31
4313187
53895a5
bdae72d
b02e499
d70048d
03c048c
5839b09
ec34676
de02a56
5ee5945
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -12,8 +12,6 @@ | |
import urllib.request | ||
import warnings | ||
|
||
GDP_VERSION = "2.00" | ||
|
||
GDP_COORDS = [ | ||
"ids", | ||
"time", | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -18,12 +18,14 @@ | |
import warnings | ||
import xarray as xr | ||
|
||
GDP_VERSION = "2.01" | ||
|
||
GDP_DATA_URL = "https://www.aoml.noaa.gov/ftp/pub/phod/lumpkin/hourly/v2.00/netcdf/" | ||
GDP_DATA_URL = "https://www.aoml.noaa.gov/ftp/pub/phod/lumpkin/hourly/v2.01/netcdf/" | ||
GDP_DATA_URL_EXPERIMENTAL = ( | ||
"https://www.aoml.noaa.gov/ftp/pub/phod/lumpkin/hourly/experimental/" | ||
) | ||
GDP_TMP_PATH = os.path.join(tempfile.gettempdir(), "clouddrift", "gdp") | ||
GDP_TMP_PATH_EXPERIMENTAL = os.path.join(tempfile.gettempdir(), "clouddrift", "gdp_exp") | ||
GDP_DATA = [ | ||
"lon", | ||
"lat", | ||
|
@@ -51,7 +53,7 @@ def download( | |
drifter_ids: list = None, | ||
n_random_id: int = None, | ||
url: str = GDP_DATA_URL, | ||
tmp_path: str = GDP_TMP_PATH, | ||
tmp_path: str = None, | ||
): | ||
"""Download individual NetCDF files from the AOML server. | ||
|
||
|
@@ -70,17 +72,21 @@ def download( | |
Returns | ||
------- | ||
out : list | ||
List of retrived drifters | ||
List of retrieved drifters | ||
""" | ||
|
||
# adjust the tmp_path if using the experimental source | ||
if tmp_path is None: | ||
tmp_path = GDP_TMP_PATH if url == GDP_DATA_URL else GDP_TMP_PATH_EXPERIMENTAL | ||
|
||
print(f"Downloading GDP hourly data from {url} to {tmp_path}...") | ||
|
||
# Create a temporary directory if doesn't already exists. | ||
os.makedirs(tmp_path, exist_ok=True) | ||
|
||
if url == GDP_DATA_URL: | ||
pattern = "drifter_[0-9]*.nc" | ||
filename_pattern = "drifter_{id}.nc" | ||
pattern = "drifter_hourly_[0-9]*.nc" | ||
filename_pattern = "drifter_hourly_{id}.nc" | ||
elif url == GDP_DATA_URL_EXPERIMENTAL: | ||
pattern = "drifter_hourly_[0-9]*.nc" | ||
filename_pattern = "drifter_hourly_{id}.nc" | ||
|
@@ -482,7 +488,7 @@ def preprocess(index: int, **kwargs) -> xr.Dataset: | |
# global attributes | ||
attrs = { | ||
"title": "Global Drifter Program hourly drifting buoy collection", | ||
"history": f"version {gdp.GDP_VERSION}. Metadata from dirall.dat and deplog.dat", | ||
"history": f"version {GDP_VERSION}. Metadata from dirall.dat and deplog.dat", | ||
"Conventions": "CF-1.6", | ||
"date_created": datetime.now().isoformat(), | ||
"publisher_name": "GDP Drifter DAC", | ||
|
@@ -520,7 +526,7 @@ def to_raggedarray( | |
drifter_ids: Optional[list[int]] = None, | ||
n_random_id: Optional[int] = None, | ||
url: Optional[str] = GDP_DATA_URL, | ||
tmp_path: Optional[str] = GDP_TMP_PATH, | ||
tmp_path: Optional[str] = None, | ||
) -> RaggedArray: | ||
"""Download and process individual GDP hourly files and return a RaggedArray | ||
instance with the data. | ||
|
@@ -547,7 +553,7 @@ def to_raggedarray( | |
-------- | ||
|
||
Invoke `to_raggedarray` without any arguments to download all drifter data | ||
from the 2.00 GDP feed: | ||
from the 2.01 GDP feed: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Could we make the docstring include automatically the GDP_VERSION variable here? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't think that's possible. The doc is also generated from the source code and it would have to be interpreted to get the value.. |
||
|
||
>>> from clouddrift.adapters.gdp1h import to_raggedarray | ||
>>> ra = to_raggedarray() | ||
|
@@ -582,10 +588,15 @@ def to_raggedarray( | |
>>> arr = ra.to_awkward() | ||
>>> arr.to_parquet("gdp1h.parquet") | ||
""" | ||
|
||
# adjust the tmp_path if using the experimental source | ||
if tmp_path is None: | ||
tmp_path = GDP_TMP_PATH if url == GDP_DATA_URL else GDP_TMP_PATH_EXPERIMENTAL | ||
|
||
ids = download(drifter_ids, n_random_id, url, tmp_path) | ||
|
||
if url == GDP_DATA_URL: | ||
filename_pattern = "drifter_{id}.nc" | ||
filename_pattern = "drifter_hourly_{id}.nc" | ||
elif url == GDP_DATA_URL_EXPERIMENTAL: | ||
filename_pattern = "drifter_hourly_{id}.nc" | ||
else: | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This is not useful anymore but should we keep it to bring awareness of some possible changes upstream?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I kept it in case eventually there is a difference with the two datasets. If we are sure it is not going to change I would remove it.