Skip to content

Commit

Permalink
Gdp-update (#282)
Browse files Browse the repository at this point in the history
* update datasets.gdp1h()

* lint

* move GDP_VERSION

* file pattern change

* fix the fix

* typo

* adjust path with experimental url

* actually I prefer this

* forgot the default value

---------

Co-authored-by: Philippe Miron <philippe.miron@dtn.com>
  • Loading branch information
selipot and Philippe Miron authored Oct 4, 2023
1 parent c71062b commit af51004
Show file tree
Hide file tree
Showing 3 changed files with 32 additions and 27 deletions.
2 changes: 0 additions & 2 deletions clouddrift/adapters/gdp.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,6 @@
import urllib.request
import warnings

GDP_VERSION = "2.00"

GDP_COORDS = [
"ids",
"time",
Expand Down
29 changes: 20 additions & 9 deletions clouddrift/adapters/gdp1h.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,12 +18,14 @@
import warnings
import xarray as xr

GDP_VERSION = "2.01"

GDP_DATA_URL = "https://www.aoml.noaa.gov/ftp/pub/phod/lumpkin/hourly/v2.00/netcdf/"
GDP_DATA_URL = "https://www.aoml.noaa.gov/ftp/pub/phod/lumpkin/hourly/v2.01/netcdf/"
GDP_DATA_URL_EXPERIMENTAL = (
"https://www.aoml.noaa.gov/ftp/pub/phod/lumpkin/hourly/experimental/"
)
GDP_TMP_PATH = os.path.join(tempfile.gettempdir(), "clouddrift", "gdp")
GDP_TMP_PATH_EXPERIMENTAL = os.path.join(tempfile.gettempdir(), "clouddrift", "gdp_exp")
GDP_DATA = [
"lon",
"lat",
Expand Down Expand Up @@ -51,7 +53,7 @@ def download(
drifter_ids: list = None,
n_random_id: int = None,
url: str = GDP_DATA_URL,
tmp_path: str = GDP_TMP_PATH,
tmp_path: str = None,
):
"""Download individual NetCDF files from the AOML server.
Expand All @@ -70,17 +72,21 @@ def download(
Returns
-------
out : list
List of retrived drifters
List of retrieved drifters
"""

# adjust the tmp_path if using the experimental source
if tmp_path is None:
tmp_path = GDP_TMP_PATH if url == GDP_DATA_URL else GDP_TMP_PATH_EXPERIMENTAL

print(f"Downloading GDP hourly data from {url} to {tmp_path}...")

# Create a temporary directory if doesn't already exists.
os.makedirs(tmp_path, exist_ok=True)

if url == GDP_DATA_URL:
pattern = "drifter_[0-9]*.nc"
filename_pattern = "drifter_{id}.nc"
pattern = "drifter_hourly_[0-9]*.nc"
filename_pattern = "drifter_hourly_{id}.nc"
elif url == GDP_DATA_URL_EXPERIMENTAL:
pattern = "drifter_hourly_[0-9]*.nc"
filename_pattern = "drifter_hourly_{id}.nc"
Expand Down Expand Up @@ -482,7 +488,7 @@ def preprocess(index: int, **kwargs) -> xr.Dataset:
# global attributes
attrs = {
"title": "Global Drifter Program hourly drifting buoy collection",
"history": f"version {gdp.GDP_VERSION}. Metadata from dirall.dat and deplog.dat",
"history": f"version {GDP_VERSION}. Metadata from dirall.dat and deplog.dat",
"Conventions": "CF-1.6",
"date_created": datetime.now().isoformat(),
"publisher_name": "GDP Drifter DAC",
Expand Down Expand Up @@ -520,7 +526,7 @@ def to_raggedarray(
drifter_ids: Optional[list[int]] = None,
n_random_id: Optional[int] = None,
url: Optional[str] = GDP_DATA_URL,
tmp_path: Optional[str] = GDP_TMP_PATH,
tmp_path: Optional[str] = None,
) -> RaggedArray:
"""Download and process individual GDP hourly files and return a RaggedArray
instance with the data.
Expand All @@ -547,7 +553,7 @@ def to_raggedarray(
--------
Invoke `to_raggedarray` without any arguments to download all drifter data
from the 2.00 GDP feed:
from the 2.01 GDP feed:
>>> from clouddrift.adapters.gdp1h import to_raggedarray
>>> ra = to_raggedarray()
Expand Down Expand Up @@ -582,10 +588,15 @@ def to_raggedarray(
>>> arr = ra.to_awkward()
>>> arr.to_parquet("gdp1h.parquet")
"""

# adjust the tmp_path if using the experimental source
if tmp_path is None:
tmp_path = GDP_TMP_PATH if url == GDP_DATA_URL else GDP_TMP_PATH_EXPERIMENTAL

ids = download(drifter_ids, n_random_id, url, tmp_path)

if url == GDP_DATA_URL:
filename_pattern = "drifter_{id}.nc"
filename_pattern = "drifter_hourly_{id}.nc"
elif url == GDP_DATA_URL_EXPERIMENTAL:
filename_pattern = "drifter_hourly_{id}.nc"
else:
Expand Down
28 changes: 12 additions & 16 deletions clouddrift/datasets.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,14 +8,12 @@


def gdp1h() -> xr.Dataset:
"""Returns the NOAA Global Drifter Program (GDP) hourly dataset as an Xarray
dataset.
"""Returns the latest version of the NOAA Global Drifter Program (GDP) hourly
dataset as an Xarray dataset.
The data is accessed from a public AWS S3 bucket accessible at
https://registry.opendata.aws/noaa-oar-hourly-gdp/. This dataset includes
corrections and additional metadata since the original submission of the
dataset to NCEI (accessible via https://doi.org/10.25921/x46c-3620). We
recommend using this dataset over the one distributed via NCEI.
The data is accessed from zarr archive hosted on a public AWS S3 bucket accessible at
https://registry.opendata.aws/noaa-oar-hourly-gdp/. Original data source from NOAA NCEI
is https://doi.org/10.25921/x46c-3620).
Returns
-------
Expand All @@ -28,23 +26,21 @@ def gdp1h() -> xr.Dataset:
>>> ds = gdp1h()
>>> ds
<xarray.Dataset>
Dimensions: (traj: 17324, obs: 165754333)
Dimensions: (traj: 19396, obs: 197214787)
Coordinates:
ids (obs) int64 ...
lat (obs) float32 ...
lon (obs) float32 ...
time (obs) datetime64[ns] ...
Dimensions without coordinates: traj, obs
Data variables: (12/55)
Data variables: (12/60)
BuoyTypeManufacturer (traj) |S20 ...
BuoyTypeSensorArray (traj) |S20 ...
CurrentProgram (traj) float64 ...
CurrentProgram (traj) float32 ...
DeployingCountry (traj) |S20 ...
DeployingShip (traj) |S20 ...
DeploymentComments (traj) |S20 ...
... ...
sst1 (obs) float64 ...
sst2 (obs) float64 ...
start_lat (traj) float32 ...
start_lon (traj) float32 ...
typebuoy (traj) |S10 ...
typedeath (traj) int8 ...
ve (obs) float32 ...
Expand All @@ -54,7 +50,7 @@ def gdp1h() -> xr.Dataset:
acknowledgement: Elipot, Shane; Sykulski, Adam; Lumpkin, Rick; Centurio...
contributor_name: NOAA Global Drifter Program
contributor_role: Data Acquisition Center
date_created: 2022-12-09T06:02:29.684949
date_created: 2023-09-08T17:05:12.130123
doi: 10.25921/x46c-3620
... ...
processing_level: Level 2 QC by GDP drifter DAC
Expand All @@ -68,7 +64,7 @@ def gdp1h() -> xr.Dataset:
--------
:func:`gdp6h`
"""
url = "https://noaa-oar-hourly-gdp-pds.s3.amazonaws.com/latest/gdp_v2.00.zarr"
url = "https://noaa-oar-hourly-gdp-pds.s3.amazonaws.com/latest/gdp-v2.01.zarr"
return xr.open_dataset(url, engine="zarr")


Expand Down

0 comments on commit af51004

Please sign in to comment.