diff --git a/docs/examples/grib_metadata_object.ipynb b/docs/examples/grib_metadata_object.ipynb index 1a6a9fc2..ab2e33d8 100644 --- a/docs/examples/grib_metadata_object.ipynb +++ b/docs/examples/grib_metadata_object.ipynb @@ -137,7 +137,7 @@ { "data": { "text/plain": [ - "" + "" ] }, "execution_count": 3, @@ -250,7 +250,7 @@ { "data": { "text/plain": [ - "" + "" ] }, "execution_count": 5, @@ -263,108 +263,6 @@ "md_copy" ] }, - { - "cell_type": "raw", - "id": "388d83a5-8165-4b3d-a121-2c7f2cb578e1", - "metadata": { - "editable": true, - "raw_mimetype": "text/restructuredtext", - "slideshow": { - "slide_type": "" - }, - "tags": [], - "vscode": { - "languageId": "raw" - } - }, - "source": [ - "By default :py:meth:`~data.readers.grib.metadata.GribMetadata.override` is called with ``headers_only_clone=True`` generating the new handle with all the data values (and some related information) removed. With this the resulting object can be significantly smaller, especially if the data section is large. The downside is that now the value related keys either cannot be accessed or give back wrong values. E.g when using the \"average\" key we get:" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "id": "568fe5c9-df84-4d49-aee1-ada0e6a15c28", - "metadata": { - "editable": true, - "slideshow": { - "slide_type": "" - }, - "tags": [] - }, - "outputs": [ - { - "data": { - "text/plain": [ - "(279.70703560965404, 47485.4296875)" - ] - }, - "execution_count": 6, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "md[\"average\"], md_copy[\"average\"]" - ] - }, - { - "cell_type": "markdown", - "id": "8796976f-a02d-4101-8cff-c14a7730d82c", - "metadata": { - "editable": true, - "slideshow": { - "slide_type": "" - }, - "tags": [] - }, - "source": [ - "To get a copy without shrinking the GRIB handle use ``headers_only_clone=False``." - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "id": "ba989ad8-e034-4168-bc29-1d73877edad2", - "metadata": { - "editable": true, - "slideshow": { - "slide_type": "" - }, - "tags": [] - }, - "outputs": [], - "source": [ - "md_copy_full = md.override(headers_only_clone=False)" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "id": "15d723c7-2f73-4c14-ab87-2a94de2379f8", - "metadata": { - "editable": true, - "slideshow": { - "slide_type": "" - }, - "tags": [] - }, - "outputs": [ - { - "data": { - "text/plain": [ - "279.70703560965404" - ] - }, - "execution_count": 8, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "md_copy_full[\"average\"]" - ] - }, { "cell_type": "markdown", "id": "3240cef2-baa9-4a87-a83a-dafa97b78e43", @@ -396,7 +294,7 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 6, "id": "40c6d232-03de-402b-82bf-8647e8a7bece", "metadata": { "editable": true, @@ -412,7 +310,7 @@ "('z', 850)" ] }, - "execution_count": 9, + "execution_count": 6, "metadata": {}, "output_type": "execute_result" } @@ -438,7 +336,7 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 7, "id": "ef78a3ec-4ea2-4ff5-8c90-e60b5e07e77f", "metadata": { "editable": true, @@ -454,7 +352,7 @@ "('t', 1000)" ] }, - "execution_count": 10, + "execution_count": 7, "metadata": {}, "output_type": "execute_result" } @@ -507,7 +405,7 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 8, "id": "cb59ad5f-c48b-4943-984d-3abdf48fda8d", "metadata": { "editable": true, @@ -516,15 +414,7 @@ }, "tags": [] }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "array.shape[0]= 84 len(metadata)= 1\n" - ] - } - ], + "outputs": [], "source": [ "from earthkit.data import FieldList\n", "import numpy as np\n", @@ -537,7 +427,7 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 9, "id": "85c32bfb-c929-404f-add9-9adae40418d2", "metadata": { "editable": true, @@ -606,7 +496,7 @@ "0 an 0 regular_ll " ] }, - "execution_count": 12, + "execution_count": 9, "metadata": {}, "output_type": "execute_result" } @@ -630,12 +520,12 @@ } }, "source": [ - "Please note that the resulting :py:class:`~data.sources.array_list.ArrayFieldList` always contains a :py:class:`~data.readers.grib.metadata.RestrictedGribMetadata` object for each field. These objects possess their own GRIB handles, which is ensured by creating a copy with ``override(headers_only_clone=True)`` when needed. On top of that metadata access is limited to keys not related to data values. Getting metadata on any other keys will throw an exception. " + "Please note that the resulting :py:class:`~data.sources.array_list.ArrayFieldList` always contains a :py:class:`~data.readers.grib.metadata.RestrictedGribMetadata` object for each field. These objects possess their own GRIB handles, which is ensured by creating a copy with ``override()`` when needed. On top of that metadata access is limited to keys not related to data values. Getting metadata on any other keys will throw an exception. " ] }, { "cell_type": "code", - "execution_count": 13, + "execution_count": 10, "id": "c6fe87ed-ee88-4f4d-a2b6-9401b364e2df", "metadata": { "editable": true, @@ -648,10 +538,10 @@ { "data": { "text/plain": [ - "" + "" ] }, - "execution_count": 13, + "execution_count": 10, "metadata": {}, "output_type": "execute_result" } @@ -662,7 +552,7 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": 11, "id": "27686ac4-9382-4916-ad0e-be96a649d034", "metadata": { "editable": true, @@ -678,7 +568,7 @@ "'Wind speed'" ] }, - "execution_count": 14, + "execution_count": 11, "metadata": {}, "output_type": "execute_result" } @@ -689,7 +579,7 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": 12, "id": "dc28fa77-4020-431f-ad37-e480a69f9d7f", "metadata": { "editable": true, @@ -730,7 +620,7 @@ }, { "cell_type": "code", - "execution_count": 16, + "execution_count": 13, "id": "8eab3462-3661-4fc1-9d23-8be05dc99cd8", "metadata": { "editable": true, @@ -746,7 +636,7 @@ "7.450183054360252" ] }, - "execution_count": 16, + "execution_count": 13, "metadata": {}, "output_type": "execute_result" } diff --git a/docs/release_notes/version_0.11_updates.rst b/docs/release_notes/version_0.11_updates.rst index 99a6ce6f..8570109c 100644 --- a/docs/release_notes/version_0.11_updates.rst +++ b/docs/release_notes/version_0.11_updates.rst @@ -2,6 +2,15 @@ Version 0.11 Updates ///////////////////////// +Version 0.11.4 +=============== + +Fixes +++++++ + +- Enforced the use of ``headers_only_clone=False`` when calling :meth:`GribMetadata.override() `. It was a necessary step to fix issues when writing :py:class:`ArrayField`\ containing :class:`~data.readers.grib.metadata.grib.GribMetadata` to disk. This is considered a temporary change until the issues with ``headers_only_clone`` are sorted out (:pr:`555`). + + Version 0.11.3 =============== @@ -13,8 +22,6 @@ Fixes - Increased the minimum version of ``multiurl`` to 0.3.3 - - Version 0.11.2 =============== diff --git a/src/earthkit/data/readers/grib/metadata.py b/src/earthkit/data/readers/grib/metadata.py index f7e9ce3c..557259c8 100644 --- a/src/earthkit/data/readers/grib/metadata.py +++ b/src/earthkit/data/readers/grib/metadata.py @@ -379,6 +379,10 @@ def _copy_key(self, target_handle, key): def override(self, *args, headers_only_clone=True, **kwargs): d = dict(*args, **kwargs) + # using headers_only_clone=True can cause problems when we want to write GRIB + # to disk or modify the generated handle. Until it is fixed, we use headers_only_clone=False. + headers_only_clone = False + new_value_size = None # extra = None gridspec = d.pop("gridspec", None) diff --git a/tests/array_fieldlist/test_numpy_fl_write.py b/tests/array_fieldlist/test_numpy_fl_write.py index 7059579e..48d7c108 100644 --- a/tests/array_fieldlist/test_numpy_fl_write.py +++ b/tests/array_fieldlist/test_numpy_fl_write.py @@ -22,6 +22,7 @@ from earthkit.data.testing import ARRAY_BACKENDS from earthkit.data.testing import check_array_type from earthkit.data.testing import earthkit_examples_file +from earthkit.data.testing import earthkit_test_data_file from earthkit.data.testing import get_array_namespace here = os.path.dirname(__file__) @@ -225,6 +226,63 @@ def test_array_fl_grib_write_bits_per_value(array_backend, _kwargs, expected_val assert ds1.metadata("bitsPerValue") == [expected_value] * len(ds) +@pytest.mark.parametrize( + "filename,shape", + [ + (earthkit_examples_file("test.grib"), (11, 19)), + (earthkit_test_data_file("O32_global.grib1"), (5248,)), + (earthkit_test_data_file("O32_global.grib2"), (5248,)), + ], +) +def test_array_fl_grib_single_write_to_path(filename, shape): + ds = from_source("file", filename) + + assert len(ds) >= 1 + v1 = ds[0].values + 1 + + md = ds[0].metadata() + md1 = md.override(shortName="msl") + r = FieldList.from_array(v1, md1) + assert r[0].shape == shape + + with temp_file() as tmp: + r.save(tmp) + assert os.path.exists(tmp) + r_tmp = from_source("file", tmp) + # r_tmp = r_tmp.to_fieldlist(array_backend=array_backend) + assert r_tmp[0].shape == shape + assert r_tmp[0].metadata("shortName") == "msl" + v_tmp = r_tmp[0].values + assert np.allclose(v1, v_tmp) + + +@pytest.mark.parametrize( + "filename,shape", + [ + (earthkit_examples_file("test.grib"), (11, 19)), + (earthkit_test_data_file("O32_global.grib1"), (5248,)), + (earthkit_test_data_file("O32_global.grib2"), (5248,)), + ], +) +@pytest.mark.parametrize( + "_kwargs,expected_value", + [({}, None), ({"bits_per_value": 8}, 8), ({"bits_per_value": None}, None)], +) +def test_array_fl_grib_single_write_bits_per_value(filename, shape, _kwargs, expected_value): + ds0 = from_source("file", filename) + + ds = ds0.from_fields([ds0[0].copy()]) + assert ds[0].shape == shape + + if expected_value is None: + expected_value = ds[0].metadata("bitsPerValue") + + with temp_file() as tmp: + ds.save(tmp, **_kwargs) + ds1 = from_source("file", tmp) + assert ds1.metadata("bitsPerValue") == [expected_value] * len(ds) + + if __name__ == "__main__": from earthkit.data.testing import main diff --git a/tests/core/test_metadata.py b/tests/core/test_metadata.py index 45344009..44f67351 100644 --- a/tests/core/test_metadata.py +++ b/tests/core/test_metadata.py @@ -263,6 +263,7 @@ def test_grib_metadata_override_invalid(): assert "EncodingError" in e.typename +@pytest.mark.skipif(True, reason="headers_only_clone has to be fixed") def test_grib_metadata_override_headers_only_true(): ds = from_source("file", earthkit_examples_file("test.grib")) ref_size = ds[0].metadata("totalLength") diff --git a/tests/data/O32_global.grib1 b/tests/data/O32_global.grib1 new file mode 100644 index 00000000..77386853 Binary files /dev/null and b/tests/data/O32_global.grib1 differ diff --git a/tests/data/O32_global.grib2 b/tests/data/O32_global.grib2 new file mode 100644 index 00000000..d3466d3b Binary files /dev/null and b/tests/data/O32_global.grib2 differ