Skip to content

Commit

Permalink
Ensure global attributes added by intake-esm are compatible with netC…
Browse files Browse the repository at this point in the history
…DF and Zarr (#509)
  • Loading branch information
andersy005 authored Aug 23, 2022
1 parent bb15984 commit ba4bb59
Show file tree
Hide file tree
Showing 4 changed files with 26 additions and 7 deletions.
1 change: 0 additions & 1 deletion intake_esm/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -557,7 +557,6 @@ def to_dataset_dict(
)

xarray_open_kwargs = xarray_open_kwargs or {}

xarray_combine_by_coords_kwargs = xarray_combine_by_coords_kwargs or {}
cdf_kwargs, zarr_kwargs = kwargs.get('cdf_kwargs'), kwargs.get('zarr_kwargs')

Expand Down
11 changes: 7 additions & 4 deletions intake_esm/source.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,15 +90,18 @@ def _open_dataset(
ds.attrs[OPTIONS['vars_key']] = varname

ds = _expand_dims(expand_dims, ds)
ds = _update_attrs(additional_attrs, ds)
ds = _update_attrs(additional_attrs=additional_attrs, ds=ds)
return ds


def _update_attrs(additional_attrs, ds):
def _update_attrs(*, additional_attrs, ds):
additional_attrs = additional_attrs or {}
if additional_attrs:
additional_attrs = {
f"{OPTIONS['attrs_prefix']}/{key}": value for key, value in additional_attrs.items()
f"{OPTIONS['attrs_prefix']}:{key}": f'{value}'
if isinstance(value, str) or not hasattr(value, '__iter__')
else ','.join(value)
for key, value in additional_attrs.items()
}
ds.attrs = {**ds.attrs, **additional_attrs}
return ds
Expand Down Expand Up @@ -229,7 +232,7 @@ def _open_dataset(self):
},
requested_variables=self.requested_variables,
data_format=record['_data_format_'],
additional_attrs=record.to_dict(),
additional_attrs=record[~record.isnull()].to_dict(),
)
for _, record in self.df.iterrows()
]
Expand Down
2 changes: 1 addition & 1 deletion tests/test_core.py
Original file line number Diff line number Diff line change
Expand Up @@ -472,4 +472,4 @@ def test_options():
_, ds = scat.to_dataset_dict(
xarray_open_kwargs={'backend_kwargs': {'storage_options': {'anon': True}}},
).popitem()
assert ds.attrs['myprefix/component'] == 'atm'
assert ds.attrs['myprefix:component'] == 'atm'
19 changes: 18 additions & 1 deletion tests/test_source.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import pytest
import xarray

from intake_esm.source import _get_xarray_open_kwargs, _open_dataset
from intake_esm.source import _get_xarray_open_kwargs, _open_dataset, _update_attrs

here = os.path.abspath(os.path.dirname(__file__))

Expand Down Expand Up @@ -54,3 +54,20 @@ def test_open_dataset_kerchunk(kerchunk_file=kerchunk_file):
xarray_open_kwargs=xarray_open_kwargs,
).compute()
assert isinstance(ds, xarray.Dataset)


@pytest.mark.parametrize('data_format', ['zarr', 'netcdf'])
@pytest.mark.parametrize('attrs', [{}, {'units': 'K'}, {'variables': ['foo', 'bar']}])
def test_update_attrs(tmp_path, data_format, attrs):
fpath = tmp_path / 'test.nc' if data_format == 'netcdf' else tmp_path / 'test.zarr'
fpath = str(fpath)
ds = _common_open(f1)
ds = _update_attrs(ds=ds, additional_attrs=attrs)
if data_format == 'netcdf':
ds.to_netcdf(fpath)
else:
ds.to_zarr(fpath)

_xarray_open_kwargs = _get_xarray_open_kwargs(data_format=data_format)
ds_new = _open_dataset(fpath, 'tasmax', xarray_open_kwargs=_xarray_open_kwargs).compute()
assert ds_new.attrs == ds.attrs

0 comments on commit ba4bb59

Please sign in to comment.