From d80a9843fa898727cc287a277ab267e3a13639aa Mon Sep 17 00:00:00 2001 From: Alex Kerney Date: Fri, 5 May 2023 10:56:40 -0400 Subject: [PATCH] Set default ZarrPlugin prefix to /zarr (#188) * Set default ZarrPlugin prefix to /zarr Closes #182 * Sort Zarr coordinates * Add error message for zmetadata test when coordinates are swapped * xfail on existing zmetadata test, and add a new test that sorts xarray zmeta --- tests/test_rest_api.py | 24 ++++++------ tests/test_zarr_compat.py | 61 ++++++++++++++++++++++++++++++- tests/utils.py | 5 ++- xpublish/plugins/included/zarr.py | 2 +- xpublish/utils/zarr.py | 2 +- 5 files changed, 77 insertions(+), 17 deletions(-) diff --git a/tests/test_rest_api.py b/tests/test_rest_api.py index da818a3d..b0ec263a 100644 --- a/tests/test_rest_api.py +++ b/tests/test_rest_api.py @@ -288,7 +288,7 @@ def test_repr(airtemp_ds, airtemp_app_client): def test_zmetadata(airtemp_ds, airtemp_app_client): - response = airtemp_app_client.get('/.zmetadata') + response = airtemp_app_client.get('/zarr/.zmetadata') assert response.status_code == 200 assert json.dumps(response.json()) == json.dumps( jsonify_zmetadata(airtemp_ds, create_zmetadata(airtemp_ds)) @@ -296,34 +296,34 @@ def test_zmetadata(airtemp_ds, airtemp_app_client): def test_bad_key(airtemp_app_client): - response = airtemp_app_client.get('/notakey') + response = airtemp_app_client.get('/zarr/notakey') assert response.status_code == 404 def test_zgroup(airtemp_app_client): - response = airtemp_app_client.get('/.zgroup') + response = airtemp_app_client.get('/zarr/.zgroup') assert response.status_code == 200 def test_zarray(airtemp_app_client): - response = airtemp_app_client.get('/air/.zarray') + response = airtemp_app_client.get('/zarr/air/.zarray') assert response.status_code == 200 def test_zattrs(airtemp_app_client): - response = airtemp_app_client.get('/air/.zattrs') + response = airtemp_app_client.get('/zarr/air/.zattrs') assert response.status_code == 200 - response = airtemp_app_client.get('/.zattrs') + response = airtemp_app_client.get('/zarr/.zattrs') assert response.status_code == 200 def test_get_chunk(airtemp_app_client): - response = airtemp_app_client.get('/air/0.0.0') + response = airtemp_app_client.get('/zarr/air/0.0.0') assert response.status_code == 200 def test_array_group_raises_404(airtemp_app_client): - response = airtemp_app_client.get('/air/.zgroup') + response = airtemp_app_client.get('/zarr/air/.zgroup') assert response.status_code == 404 @@ -333,12 +333,12 @@ def test_cache(airtemp_ds): client = TestClient(rest.app) - response1 = client.get('/air/0.0.0') + response1 = client.get('/zarr/air/0.0.0') assert response1.status_code == 200 assert '/air/0.0.0' in rest.cache # test that we can retrieve - response2 = client.get('/air/0.0.0') + response2 = client.get('/zarr/air/0.0.0') assert response2.status_code == 200 assert response1.content == response2.content @@ -377,7 +377,7 @@ def test_ds_dict_keys(ds_dict, ds_dict_app_client): assert response.status_code == 200 assert response.json() == list(ds_dict) - response = ds_dict_app_client.get('/datasets/not_in_dict') + response = ds_dict_app_client.get('/datasets/zarr/not_in_dict') assert response.status_code == 404 @@ -386,7 +386,7 @@ def test_ds_dict_cache(ds_dict): client = TestClient(rest.app) - response1 = client.get('/datasets/ds1/var/0') + response1 = client.get('/datasets/ds1/zarr/var/0') assert response1.status_code == 200 assert 'ds1/var/0' in rest.cache diff --git a/tests/test_zarr_compat.py b/tests/test_zarr_compat.py index 7e92dbcd..6616bdce 100644 --- a/tests/test_zarr_compat.py +++ b/tests/test_zarr_compat.py @@ -41,7 +41,19 @@ def test_zmetadata_identical(start, end, freq, nlats, nlons, var_const, calendar @pytest.mark.parametrize( 'start, end, freq, nlats, nlons, var_const, calendar, use_cftime', [ - ('2018-01-01', '2021-01-01', 'MS', 15, 30, True, 'standard', False), + pytest.param( + '2018-01-01', + '2021-01-01', + 'MS', + 15, + 30, + True, + 'standard', + False, + marks=pytest.mark.xfail( + reason="Xarray's ordering of metadata coords in a string can very" + ), + ), ], ) def test_zmetadata_identical_coords( @@ -65,6 +77,53 @@ def test_zmetadata_identical_coords( mapper = TestMapper(SingleDatasetRest(ds).app) actual = json.loads(mapper['.zmetadata'].decode()) expected = json.loads(zarr_dict['.zmetadata'].decode()) + assert ( + actual == expected + ), "Zarr metadata did not match, likely because array coordinates aren't sorted by Xarray" + + +@pytest.mark.parametrize( + 'start, end, freq, nlats, nlons, var_const, calendar, use_cftime', + [ + ( + '2018-01-01', + '2021-01-01', + 'MS', + 15, + 30, + True, + 'standard', + False, + ), + ], +) +def test_zmetadata_identical_coords_sorted( + start, end, freq, nlats, nlons, var_const, calendar, use_cftime +): + """Test that zmetadata passes when coords are explicitly sorted""" + ds = create_dataset( + start=start, + end=end, + freq=freq, + nlats=nlats, + nlons=nlons, + var_const=var_const, + use_cftime=use_cftime, + calendar=calendar, + use_xy_dim=True, + ) + + ds = ds.chunk(ds.dims) + zarr_dict = {} + ds.to_zarr(zarr_dict, consolidated=True) + mapper = TestMapper(SingleDatasetRest(ds).app) + actual = json.loads(mapper['.zmetadata'].decode()) + expected = json.loads(zarr_dict['.zmetadata'].decode()) + + for key in ['tmin/.zattrs', 'tmax/.zattrs']: + coords = expected['metadata'][key]['coordinates'] + expected['metadata'][key]['coordinates'] = ' '.join(sorted(coords.split(' '))) + assert actual == expected diff --git a/tests/utils.py b/tests/utils.py index e4fc14d0..3d3c2889 100644 --- a/tests/utils.py +++ b/tests/utils.py @@ -16,9 +16,10 @@ class TestMapper(TestClient, BaseStore): """ def __getitem__(self, key): - response = self.get(key) + zarr_key = f'/zarr/{key}' + response = self.get(zarr_key) if response.status_code != 200: - raise KeyError('{} not found. status_code = {}'.format(key, response.status_code)) + raise KeyError('{} not found. status_code = {}'.format(zarr_key, response.status_code)) return response.content def __delitem__(self, key): diff --git a/xpublish/plugins/included/zarr.py b/xpublish/plugins/included/zarr.py index 8e4ee243..86d97405 100644 --- a/xpublish/plugins/included/zarr.py +++ b/xpublish/plugins/included/zarr.py @@ -23,7 +23,7 @@ class ZarrPlugin(Plugin): name = 'zarr' - dataset_router_prefix: str = '' + dataset_router_prefix: str = '/zarr' dataset_router_tags: Sequence[str] = ['zarr'] @hookimpl diff --git a/xpublish/utils/zarr.py b/xpublish/utils/zarr.py index da6feef6..aaa8060f 100644 --- a/xpublish/utils/zarr.py +++ b/xpublish/utils/zarr.py @@ -58,7 +58,7 @@ def _extract_dataarray_coords(da, zattrs): nondim_coords = set(da.coords) - set(da.dims) if len(nondim_coords) > 0 and da.name not in nondim_coords: - coords = ' '.join(list(nondim_coords)) + coords = ' '.join(sorted(list(nondim_coords))) zattrs['coordinates'] = encode_zarr_attr_value(coords) return zattrs