Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix issue when cannot write GRIB based array field to disk #555

Merged
merged 2 commits into from
Dec 9, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
148 changes: 19 additions & 129 deletions docs/examples/grib_metadata_object.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -137,7 +137,7 @@
{
"data": {
"text/plain": [
"<earthkit.data.readers.grib.metadata.GribFieldMetadata at 0x290e35390>"
"<earthkit.data.readers.grib.metadata.GribFieldMetadata at 0x294e37ee0>"
]
},
"execution_count": 3,
Expand Down Expand Up @@ -250,7 +250,7 @@
{
"data": {
"text/plain": [
"<earthkit.data.readers.grib.metadata.StandAloneGribMetadata at 0x290e36080>"
"<earthkit.data.readers.grib.metadata.StandAloneGribMetadata at 0x294e7cee0>"
]
},
"execution_count": 5,
Expand All @@ -263,108 +263,6 @@
"md_copy"
]
},
{
"cell_type": "raw",
"id": "388d83a5-8165-4b3d-a121-2c7f2cb578e1",
"metadata": {
"editable": true,
"raw_mimetype": "text/restructuredtext",
"slideshow": {
"slide_type": ""
},
"tags": [],
"vscode": {
"languageId": "raw"
}
},
"source": [
"By default :py:meth:`~data.readers.grib.metadata.GribMetadata.override` is called with ``headers_only_clone=True`` generating the new handle with all the data values (and some related information) removed. With this the resulting object can be significantly smaller, especially if the data section is large. The downside is that now the value related keys either cannot be accessed or give back wrong values. E.g when using the \"average\" key we get:"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "568fe5c9-df84-4d49-aee1-ada0e6a15c28",
"metadata": {
"editable": true,
"slideshow": {
"slide_type": ""
},
"tags": []
},
"outputs": [
{
"data": {
"text/plain": [
"(279.70703560965404, 47485.4296875)"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"md[\"average\"], md_copy[\"average\"]"
]
},
{
"cell_type": "markdown",
"id": "8796976f-a02d-4101-8cff-c14a7730d82c",
"metadata": {
"editable": true,
"slideshow": {
"slide_type": ""
},
"tags": []
},
"source": [
"To get a copy without shrinking the GRIB handle use ``headers_only_clone=False``."
]
},
{
"cell_type": "code",
"execution_count": 7,
"id": "ba989ad8-e034-4168-bc29-1d73877edad2",
"metadata": {
"editable": true,
"slideshow": {
"slide_type": ""
},
"tags": []
},
"outputs": [],
"source": [
"md_copy_full = md.override(headers_only_clone=False)"
]
},
{
"cell_type": "code",
"execution_count": 8,
"id": "15d723c7-2f73-4c14-ab87-2a94de2379f8",
"metadata": {
"editable": true,
"slideshow": {
"slide_type": ""
},
"tags": []
},
"outputs": [
{
"data": {
"text/plain": [
"279.70703560965404"
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"md_copy_full[\"average\"]"
]
},
{
"cell_type": "markdown",
"id": "3240cef2-baa9-4a87-a83a-dafa97b78e43",
Expand Down Expand Up @@ -396,7 +294,7 @@
},
{
"cell_type": "code",
"execution_count": 9,
"execution_count": 6,
"id": "40c6d232-03de-402b-82bf-8647e8a7bece",
"metadata": {
"editable": true,
Expand All @@ -412,7 +310,7 @@
"('z', 850)"
]
},
"execution_count": 9,
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
Expand All @@ -438,7 +336,7 @@
},
{
"cell_type": "code",
"execution_count": 10,
"execution_count": 7,
"id": "ef78a3ec-4ea2-4ff5-8c90-e60b5e07e77f",
"metadata": {
"editable": true,
Expand All @@ -454,7 +352,7 @@
"('t', 1000)"
]
},
"execution_count": 10,
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
Expand Down Expand Up @@ -507,7 +405,7 @@
},
{
"cell_type": "code",
"execution_count": 11,
"execution_count": 8,
"id": "cb59ad5f-c48b-4943-984d-3abdf48fda8d",
"metadata": {
"editable": true,
Expand All @@ -516,15 +414,7 @@
},
"tags": []
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"array.shape[0]= 84 len(metadata)= 1\n"
]
}
],
"outputs": [],
"source": [
"from earthkit.data import FieldList\n",
"import numpy as np\n",
Expand All @@ -537,7 +427,7 @@
},
{
"cell_type": "code",
"execution_count": 12,
"execution_count": 9,
"id": "85c32bfb-c929-404f-add9-9adae40418d2",
"metadata": {
"editable": true,
Expand Down Expand Up @@ -606,7 +496,7 @@
"0 an 0 regular_ll "
]
},
"execution_count": 12,
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
Expand All @@ -630,12 +520,12 @@
}
},
"source": [
"Please note that the resulting :py:class:`~data.sources.array_list.ArrayFieldList` always contains a :py:class:`~data.readers.grib.metadata.RestrictedGribMetadata` object for each field. These objects possess their own GRIB handles, which is ensured by creating a copy with ``override(headers_only_clone=True)`` when needed. On top of that metadata access is limited to keys not related to data values. Getting metadata on any other keys will throw an exception. "
"Please note that the resulting :py:class:`~data.sources.array_list.ArrayFieldList` always contains a :py:class:`~data.readers.grib.metadata.RestrictedGribMetadata` object for each field. These objects possess their own GRIB handles, which is ensured by creating a copy with ``override()`` when needed. On top of that metadata access is limited to keys not related to data values. Getting metadata on any other keys will throw an exception. "
]
},
{
"cell_type": "code",
"execution_count": 13,
"execution_count": 10,
"id": "c6fe87ed-ee88-4f4d-a2b6-9401b364e2df",
"metadata": {
"editable": true,
Expand All @@ -648,10 +538,10 @@
{
"data": {
"text/plain": [
"<earthkit.data.readers.grib.metadata.RestrictedGribMetadata at 0x290e7d990>"
"<earthkit.data.readers.grib.metadata.RestrictedGribMetadata at 0x294e7dc60>"
]
},
"execution_count": 13,
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
Expand All @@ -662,7 +552,7 @@
},
{
"cell_type": "code",
"execution_count": 14,
"execution_count": 11,
"id": "27686ac4-9382-4916-ad0e-be96a649d034",
"metadata": {
"editable": true,
Expand All @@ -678,7 +568,7 @@
"'Wind speed'"
]
},
"execution_count": 14,
"execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
Expand All @@ -689,7 +579,7 @@
},
{
"cell_type": "code",
"execution_count": 15,
"execution_count": 12,
"id": "dc28fa77-4020-431f-ad37-e480a69f9d7f",
"metadata": {
"editable": true,
Expand Down Expand Up @@ -730,7 +620,7 @@
},
{
"cell_type": "code",
"execution_count": 16,
"execution_count": 13,
"id": "8eab3462-3661-4fc1-9d23-8be05dc99cd8",
"metadata": {
"editable": true,
Expand All @@ -746,7 +636,7 @@
"7.450183054360252"
]
},
"execution_count": 16,
"execution_count": 13,
"metadata": {},
"output_type": "execute_result"
}
Expand Down
11 changes: 9 additions & 2 deletions docs/release_notes/version_0.11_updates.rst
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,15 @@ Version 0.11 Updates
/////////////////////////


Version 0.11.4
===============

Fixes
++++++

- Enforced the use of ``headers_only_clone=False`` when calling :meth:`GribMetadata.override() <data.readers.grib.metadata.GribMetadata.override>`. It was a necessary step to fix issues when writing :py:class:`ArrayField`\ containing :class:`~data.readers.grib.metadata.grib.GribMetadata` to disk. This is considered a temporary change until the issues with ``headers_only_clone`` are sorted out (:pr:`555`).


Version 0.11.3
===============

Expand All @@ -13,8 +22,6 @@ Fixes
- Increased the minimum version of ``multiurl`` to 0.3.3




Version 0.11.2
===============

Expand Down
4 changes: 4 additions & 0 deletions src/earthkit/data/readers/grib/metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -379,6 +379,10 @@ def _copy_key(self, target_handle, key):
def override(self, *args, headers_only_clone=True, **kwargs):
d = dict(*args, **kwargs)

# using headers_only_clone=True can cause problems when we want to write GRIB
# to disk or modify the generated handle. Until it is fixed, we use headers_only_clone=False.
headers_only_clone = False

new_value_size = None
# extra = None
gridspec = d.pop("gridspec", None)
Expand Down
58 changes: 58 additions & 0 deletions tests/array_fieldlist/test_numpy_fl_write.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
from earthkit.data.testing import ARRAY_BACKENDS
from earthkit.data.testing import check_array_type
from earthkit.data.testing import earthkit_examples_file
from earthkit.data.testing import earthkit_test_data_file
from earthkit.data.testing import get_array_namespace

here = os.path.dirname(__file__)
Expand Down Expand Up @@ -225,6 +226,63 @@ def test_array_fl_grib_write_bits_per_value(array_backend, _kwargs, expected_val
assert ds1.metadata("bitsPerValue") == [expected_value] * len(ds)


@pytest.mark.parametrize(
"filename,shape",
[
(earthkit_examples_file("test.grib"), (11, 19)),
(earthkit_test_data_file("O32_global.grib1"), (5248,)),
(earthkit_test_data_file("O32_global.grib2"), (5248,)),
],
)
def test_array_fl_grib_single_write_to_path(filename, shape):
ds = from_source("file", filename)

assert len(ds) >= 1
v1 = ds[0].values + 1

md = ds[0].metadata()
md1 = md.override(shortName="msl")
r = FieldList.from_array(v1, md1)
assert r[0].shape == shape

with temp_file() as tmp:
r.save(tmp)
assert os.path.exists(tmp)
r_tmp = from_source("file", tmp)
# r_tmp = r_tmp.to_fieldlist(array_backend=array_backend)
assert r_tmp[0].shape == shape
assert r_tmp[0].metadata("shortName") == "msl"
v_tmp = r_tmp[0].values
assert np.allclose(v1, v_tmp)


@pytest.mark.parametrize(
"filename,shape",
[
(earthkit_examples_file("test.grib"), (11, 19)),
(earthkit_test_data_file("O32_global.grib1"), (5248,)),
(earthkit_test_data_file("O32_global.grib2"), (5248,)),
],
)
@pytest.mark.parametrize(
"_kwargs,expected_value",
[({}, None), ({"bits_per_value": 8}, 8), ({"bits_per_value": None}, None)],
)
def test_array_fl_grib_single_write_bits_per_value(filename, shape, _kwargs, expected_value):
ds0 = from_source("file", filename)

ds = ds0.from_fields([ds0[0].copy()])
assert ds[0].shape == shape

if expected_value is None:
expected_value = ds[0].metadata("bitsPerValue")

with temp_file() as tmp:
ds.save(tmp, **_kwargs)
ds1 = from_source("file", tmp)
assert ds1.metadata("bitsPerValue") == [expected_value] * len(ds)


if __name__ == "__main__":
from earthkit.data.testing import main

Expand Down
1 change: 1 addition & 0 deletions tests/core/test_metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -263,6 +263,7 @@ def test_grib_metadata_override_invalid():
assert "EncodingError" in e.typename


@pytest.mark.skipif(True, reason="headers_only_clone has to be fixed")
def test_grib_metadata_override_headers_only_true():
ds = from_source("file", earthkit_examples_file("test.grib"))
ref_size = ds[0].metadata("totalLength")
Expand Down
Binary file added tests/data/O32_global.grib1
Binary file not shown.
Binary file added tests/data/O32_global.grib2
Binary file not shown.
Loading