diff --git a/CHANGELOG.md b/CHANGELOG.md index a1cb5f1b..e4ad8461 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,11 @@ # Changelog +## Unreleased + +### New features added + +- Add `opendap` as a possible data format.[#570](https://github.com/intake/intake-esm/pull/570) ([@aulemahal](https://github.com/aulemahal)) + ## v2022.9.18 ([full changelog](https://github.com/intake/intake-esm/compare/v2021.8.17...3e959d126663f9b8415528bfcee575967c3ef0c1)) diff --git a/ci/environment-upstream-dev.yml b/ci/environment-upstream-dev.yml index 711a301b..c0f05915 100644 --- a/ci/environment-upstream-dev.yml +++ b/ci/environment-upstream-dev.yml @@ -19,6 +19,7 @@ dependencies: - pre-commit - psutil - pydantic>=1.9 + - pydap - pyproj - pytest - pytest-cov diff --git a/ci/environment.yml b/ci/environment.yml index 7e3440e4..e58356b6 100644 --- a/ci/environment.yml +++ b/ci/environment.yml @@ -17,6 +17,7 @@ dependencies: - pooch - pre-commit - pydantic>=1.9 + - pydap - pytest - pytest-cov - pytest-sugar diff --git a/docs/source/reference/esm-catalog-spec.md b/docs/source/reference/esm-catalog-spec.md index 3c91b852..58b11ae6 100644 --- a/docs/source/reference/esm-catalog-spec.md +++ b/docs/source/reference/esm-catalog-spec.md @@ -85,11 +85,11 @@ The column names can optionally be associated with a controlled vocabulary, such An assets object describes the columns in the CSV file relevant for opening the actual data files. -| Element | Type | Description | -| ------------------ | ------ | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | -| column_name | string | **REQUIRED.** The name of the column containing the path to the asset. Must be in the header of the CSV file. | -| format | string | The data format. Valid values are `netcdf`, `zarr`, or `reference` ([`kerchunk`](https://github.com/fsspec/kerchunk) reference files). If specified, it means that all data in the catalog is the same type. | -| format_column_name | string | The column name which contains the data format, allowing for variable data types in one catalog. Mutually exclusive with `format`. | +| Element | Type | Description | +| ------------------ | ------ | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| column_name | string | **REQUIRED.** The name of the column containing the path to the asset. Must be in the header of the CSV file. | +| format | string | The data format. Valid values are `netcdf`, `zarr`, `opendap` or `reference` ([`kerchunk`](https://github.com/fsspec/kerchunk) reference files). If specified, it means that all data in the catalog is the same type. | +| format_column_name | string | The column name which contains the data format, allowing for variable data types in one catalog. Mutually exclusive with `format`. | ### Aggregation Control Object diff --git a/intake_esm/cat.py b/intake_esm/cat.py index 08b1fe7f..5ee793bb 100644 --- a/intake_esm/cat.py +++ b/intake_esm/cat.py @@ -49,6 +49,7 @@ class DataFormat(str, enum.Enum): netcdf = 'netcdf' zarr = 'zarr' reference = 'reference' + opendap = 'opendap' class Config: validate_all = True diff --git a/intake_esm/source.py b/intake_esm/source.py index d5861910..3730cc1d 100644 --- a/intake_esm/source.py +++ b/intake_esm/source.py @@ -55,7 +55,7 @@ def _open_dataset( xarray_open_kwargs['backend_kwargs']['consolidated'] = False urlpath = 'reference://' - if xarray_open_kwargs['engine'] == 'zarr': + if xarray_open_kwargs['engine'] in 'zarr' or data_format == 'opendap': url = urlpath elif fsspec.utils.can_be_local(urlpath): url = fsspec.open_local(urlpath, **storage_options) diff --git a/tests/sample-catalogs/noaa-pathfinder-opendap.json b/tests/sample-catalogs/noaa-pathfinder-opendap.json new file mode 100644 index 00000000..c0da7d8d --- /dev/null +++ b/tests/sample-catalogs/noaa-pathfinder-opendap.json @@ -0,0 +1,86 @@ +{ + "esmcat_version": "0.1.0", + "id": "noaa-pathfinder-opendap", + "description": "This is an ESM catalog for NOAA's pathfinder data publicly available on a test opendap server.", + "catalog_dict": [ + { + "domain": "global", + "variable": "sst", + "first_swap": "2005001", + "last_swap": "2005008", + "scode": 482, + "path": "http://test.opendap.org/opendap/noaa/pathfinder/2005001-2005008.s0482pfv50-sst.hdf" + }, + { + "domain": "global", + "variable": "sst", + "first_swap": "2005001", + "last_swap": "2005008", + "scode": 484, + "path": "http://test.opendap.org/opendap/noaa/pathfinder/2005001-2005008.s0484pfv50-sst.hdf" + }, + { + "domain": "global", + "variable": "sst", + "first_swap": "2005009", + "last_swap": "2005016", + "scode": 482, + "path": "http://test.opendap.org/opendap/noaa/pathfinder/2005009-2005016.s0482pfv50-sst.hdf" + }, + { + "domain": "global", + "variable": "sst", + "first_swap": "2005009", + "last_swap": "2005016", + "scode": 484, + "path": "http://test.opendap.org/opendap/noaa/pathfinder/2005009-2005016.s0484pfv50-sst.hdf" + }, + { + "domain": "global", + "variable": "sst", + "first_swap": "2005017", + "last_swap": "2005024", + "scode": 482, + "path": "http://test.opendap.org/opendap/noaa/pathfinder/2005017-2005024.s0482pfv50-sst.hdf" + }, + { + "domain": "global", + "variable": "sst", + "first_swap": "2005017", + "last_swap": "2005024", + "scode": 484, + "path": "http://test.opendap.org/opendap/noaa/pathfinder/2005017-2005024.s0484pfv50-sst.hdf" + } + ], + "attributes": [ + { + "column_name": "domain", + "vocabulary": "" + }, + { + "column_name": "variable", + "vocabulary": "" + }, + { + "column_name": "first_swap", + "vocabulary": "" + }, + { + "column_name": "last_swap", + "vocabulary": "" + }, + { + "column_name": "scode", + "vocabulary": "" + } + ], + "assets": { + "column_name": "path", + "format": "opendap" + }, + "aggregation_control": { + "variable_column_name": "variable", + "groupby_attrs": ["first_swap", "scode"], + "aggregations": [] + } +} diff --git a/tests/test_core.py b/tests/test_core.py index 50ea4ac5..217eec82 100644 --- a/tests/test_core.py +++ b/tests/test_core.py @@ -43,6 +43,7 @@ def func_multivar(ds): cdf_cat_sample_cmip6, mixed_cat_sample_cmip6, multi_variable_cat, + opendap_cat_sample_noaa, sample_df, sample_esmcat_data, zarr_cat_aws_cesm, @@ -469,6 +470,14 @@ def funcs(ds): ) +def test_to_dask_opendap(): + cat = intake.open_esm_datastore(opendap_cat_sample_noaa) + new_cat = cat.search(variable='sst', first_swap='2005001', scode=482) + ds = new_cat.to_dask(xarray_open_kwargs=dict(engine='pydap')) + assert 'sst' in ds.data_vars + assert len(ds.__dask_keys__()) > 0 + + def test_subclassing_catalog(): class ChildCatalog(intake_esm.esm_datastore): pass diff --git a/tests/utils.py b/tests/utils.py index 01380fb3..dd55668c 100644 --- a/tests/utils.py +++ b/tests/utils.py @@ -9,6 +9,7 @@ cdf_cat_sample_cmip5 = os.path.join(here, 'sample-catalogs/cmip5-netcdf.json') cdf_cat_sample_cesmle = os.path.join(here, 'sample-catalogs/cesm1-lens-netcdf.json') catalog_dict_records = os.path.join(here, 'sample-catalogs/catalog-dict-records.json') +opendap_cat_sample_noaa = os.path.join(here, 'sample-catalogs/noaa-pathfinder-opendap.json') zarr_cat_aws_cesm = ( 'https://raw.githubusercontent.com/NCAR/cesm-lens-aws/master/intake-catalogs/aws-cesm1-le.json' )