Skip to content

Commit

Permalink
New opendap dataformat (#570)
Browse files Browse the repository at this point in the history
* New opendap dataformat

* New test cat for opendap - new test opendap - new test dep pydap

* Upd ESM spec and changelog
  • Loading branch information
aulemahal authored Feb 22, 2023
1 parent 5e854c7 commit 1086177
Show file tree
Hide file tree
Showing 9 changed files with 111 additions and 6 deletions.
6 changes: 6 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,11 @@
# Changelog

## Unreleased

### New features added

- Add `opendap` as a possible data format.[#570](https://github.com/intake/intake-esm/pull/570) ([@aulemahal](https://github.com/aulemahal))

## v2022.9.18

([full changelog](https://github.com/intake/intake-esm/compare/v2021.8.17...3e959d126663f9b8415528bfcee575967c3ef0c1))
Expand Down
1 change: 1 addition & 0 deletions ci/environment-upstream-dev.yml
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ dependencies:
- pre-commit
- psutil
- pydantic>=1.9
- pydap
- pyproj
- pytest
- pytest-cov
Expand Down
1 change: 1 addition & 0 deletions ci/environment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ dependencies:
- pooch
- pre-commit
- pydantic>=1.9
- pydap
- pytest
- pytest-cov
- pytest-sugar
Expand Down
10 changes: 5 additions & 5 deletions docs/source/reference/esm-catalog-spec.md
Original file line number Diff line number Diff line change
Expand Up @@ -85,11 +85,11 @@ The column names can optionally be associated with a controlled vocabulary, such

An assets object describes the columns in the CSV file relevant for opening the actual data files.

| Element | Type | Description |
| ------------------ | ------ | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ |
| column_name | string | **REQUIRED.** The name of the column containing the path to the asset. Must be in the header of the CSV file. |
| format | string | The data format. Valid values are `netcdf`, `zarr`, or `reference` ([`kerchunk`](https://github.com/fsspec/kerchunk) reference files). If specified, it means that all data in the catalog is the same type. |
| format_column_name | string | The column name which contains the data format, allowing for variable data types in one catalog. Mutually exclusive with `format`. |
| Element | Type | Description |
| ------------------ | ------ | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
| column_name | string | **REQUIRED.** The name of the column containing the path to the asset. Must be in the header of the CSV file. |
| format | string | The data format. Valid values are `netcdf`, `zarr`, `opendap` or `reference` ([`kerchunk`](https://github.com/fsspec/kerchunk) reference files). If specified, it means that all data in the catalog is the same type. |
| format_column_name | string | The column name which contains the data format, allowing for variable data types in one catalog. Mutually exclusive with `format`. |

### Aggregation Control Object

Expand Down
1 change: 1 addition & 0 deletions intake_esm/cat.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@ class DataFormat(str, enum.Enum):
netcdf = 'netcdf'
zarr = 'zarr'
reference = 'reference'
opendap = 'opendap'

class Config:
validate_all = True
Expand Down
2 changes: 1 addition & 1 deletion intake_esm/source.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ def _open_dataset(
xarray_open_kwargs['backend_kwargs']['consolidated'] = False
urlpath = 'reference://'

if xarray_open_kwargs['engine'] == 'zarr':
if xarray_open_kwargs['engine'] in 'zarr' or data_format == 'opendap':
url = urlpath
elif fsspec.utils.can_be_local(urlpath):
url = fsspec.open_local(urlpath, **storage_options)
Expand Down
86 changes: 86 additions & 0 deletions tests/sample-catalogs/noaa-pathfinder-opendap.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
{
"esmcat_version": "0.1.0",
"id": "noaa-pathfinder-opendap",
"description": "This is an ESM catalog for NOAA's pathfinder data publicly available on a test opendap server.",
"catalog_dict": [
{
"domain": "global",
"variable": "sst",
"first_swap": "2005001",
"last_swap": "2005008",
"scode": 482,
"path": "http://test.opendap.org/opendap/noaa/pathfinder/2005001-2005008.s0482pfv50-sst.hdf"
},
{
"domain": "global",
"variable": "sst",
"first_swap": "2005001",
"last_swap": "2005008",
"scode": 484,
"path": "http://test.opendap.org/opendap/noaa/pathfinder/2005001-2005008.s0484pfv50-sst.hdf"
},
{
"domain": "global",
"variable": "sst",
"first_swap": "2005009",
"last_swap": "2005016",
"scode": 482,
"path": "http://test.opendap.org/opendap/noaa/pathfinder/2005009-2005016.s0482pfv50-sst.hdf"
},
{
"domain": "global",
"variable": "sst",
"first_swap": "2005009",
"last_swap": "2005016",
"scode": 484,
"path": "http://test.opendap.org/opendap/noaa/pathfinder/2005009-2005016.s0484pfv50-sst.hdf"
},
{
"domain": "global",
"variable": "sst",
"first_swap": "2005017",
"last_swap": "2005024",
"scode": 482,
"path": "http://test.opendap.org/opendap/noaa/pathfinder/2005017-2005024.s0482pfv50-sst.hdf"
},
{
"domain": "global",
"variable": "sst",
"first_swap": "2005017",
"last_swap": "2005024",
"scode": 484,
"path": "http://test.opendap.org/opendap/noaa/pathfinder/2005017-2005024.s0484pfv50-sst.hdf"
}
],
"attributes": [
{
"column_name": "domain",
"vocabulary": ""
},
{
"column_name": "variable",
"vocabulary": ""
},
{
"column_name": "first_swap",
"vocabulary": ""
},
{
"column_name": "last_swap",
"vocabulary": ""
},
{
"column_name": "scode",
"vocabulary": ""
}
],
"assets": {
"column_name": "path",
"format": "opendap"
},
"aggregation_control": {
"variable_column_name": "variable",
"groupby_attrs": ["first_swap", "scode"],
"aggregations": []
}
}
9 changes: 9 additions & 0 deletions tests/test_core.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ def func_multivar(ds):
cdf_cat_sample_cmip6,
mixed_cat_sample_cmip6,
multi_variable_cat,
opendap_cat_sample_noaa,
sample_df,
sample_esmcat_data,
zarr_cat_aws_cesm,
Expand Down Expand Up @@ -469,6 +470,14 @@ def funcs(ds):
)


def test_to_dask_opendap():
cat = intake.open_esm_datastore(opendap_cat_sample_noaa)
new_cat = cat.search(variable='sst', first_swap='2005001', scode=482)
ds = new_cat.to_dask(xarray_open_kwargs=dict(engine='pydap'))
assert 'sst' in ds.data_vars
assert len(ds.__dask_keys__()) > 0


def test_subclassing_catalog():
class ChildCatalog(intake_esm.esm_datastore):
pass
Expand Down
1 change: 1 addition & 0 deletions tests/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
cdf_cat_sample_cmip5 = os.path.join(here, 'sample-catalogs/cmip5-netcdf.json')
cdf_cat_sample_cesmle = os.path.join(here, 'sample-catalogs/cesm1-lens-netcdf.json')
catalog_dict_records = os.path.join(here, 'sample-catalogs/catalog-dict-records.json')
opendap_cat_sample_noaa = os.path.join(here, 'sample-catalogs/noaa-pathfinder-opendap.json')
zarr_cat_aws_cesm = (
'https://raw.githubusercontent.com/NCAR/cesm-lens-aws/master/intake-catalogs/aws-cesm1-le.json'
)
Expand Down

0 comments on commit 1086177

Please sign in to comment.