From b8010b177ff16b784df2486846c8ce69d35fd4de Mon Sep 17 00:00:00 2001 From: Sandor Kertesz Date: Tue, 2 Jan 2024 15:29:18 +0000 Subject: [PATCH 1/2] Add release notes for verison 5.0 --- docs/release_notes/index.rst | 1 + docs/release_notes/version_0.5_updates.rst | 25 ++++++++++++++++++++++ 2 files changed, 26 insertions(+) create mode 100644 docs/release_notes/version_0.5_updates.rst diff --git a/docs/release_notes/index.rst b/docs/release_notes/index.rst index 31d83e5c..b8279079 100644 --- a/docs/release_notes/index.rst +++ b/docs/release_notes/index.rst @@ -4,6 +4,7 @@ Release notes .. toctree:: :maxdepth: 1 + version_0.5_updates version_0.4_updates version_0.3_updates version_0.2_updates diff --git a/docs/release_notes/version_0.5_updates.rst b/docs/release_notes/version_0.5_updates.rst new file mode 100644 index 00000000..15cabe09 --- /dev/null +++ b/docs/release_notes/version_0.5_updates.rst @@ -0,0 +1,25 @@ +Version 0.5 Updates +///////////////////////// + +Version 0.5.0 +=============== + +New features +++++++++++++++++ + +- changed the default :ref:`cache policy ` to :ref:`off `, which now enables the usage of remote sources like :ref:`data-sources-mars`. See the :ref:`/examples/cache.ipynb` notebook example. +- allowed defining source :ref:`plugins ` +- enabled reading :ref:`data-sources-url` sources as streams. See the :ref:`/examples/grib_url_stream.ipynb` notebook example +- added the :meth:`FieldList.to_fieldlist() ` method to convert to a new :class:`FieldList` based on a given backend +- added the :meth:`nearest_point_haversine` and :meth:`nearest_point_kdtree` methods to find the nearest point out of a set of locations. See the :ref:`/examples/grib_nearest_gridpoint.ipynb` and :ref:`/examples/grib_time_series.ipynb` notebook examples. +- enabled using dictionaries in the ``split_on`` request parameter for :ref:`data-sources-cds` retrievals +- added the experimental "constants" source type +- ensured consistent usage of ``pandas_read_csv_kwargs`` for :ref:`data-sources-file` and :ref:`data-sources-cds` sources + + +Fixes +++++++ +- fixed issue when slicing did not work on :class:`~data.core.fieldlist.FieldList` filtered with ``sel()`` +- fixed crash in :meth:`FieldList.to_xarray() ` when the ``filter_by_keys`` option in ``backend_kwargs`` was used +- fixed issue when list of dates could not be used in a :ref:`data-sources-cds` request +- fixed issue when some metadata keys of a :class:`~data.core.readers.numpy_list.NumpyFieldList` did not match the actual field values. These metadata keys are now not available on a ``NumpyFieldList``. From 0cbd8bdd212310af824dc40d1973a8d51ab92a7d Mon Sep 17 00:00:00 2001 From: Sandor Kertesz Date: Tue, 23 Jan 2024 19:20:53 +0000 Subject: [PATCH 2/2] Release notes for 5.0 --- docs/guide/sources.rst | 27 ++++++- docs/guide/split_on.rst | 85 ++++++++++++++++++++++ docs/release_notes/version_0.5_updates.rst | 18 +++-- earthkit/data/core/fieldlist.py | 4 + earthkit/data/sources/numpy_list.py | 22 ++++++ earthkit/data/writers/grib.py | 8 +- 6 files changed, 152 insertions(+), 12 deletions(-) create mode 100644 docs/guide/split_on.rst diff --git a/docs/guide/sources.rst b/docs/guide/sources.rst index e92bd638..4fb31cdb 100644 --- a/docs/guide/sources.rst +++ b/docs/guide/sources.rst @@ -385,13 +385,14 @@ cds .. py:function:: from_source("cds", dataset, *args, **kwargs) :noindex: - The ``cds`` source accesses the `Copernicus Climate Data Store`_ (CDS), using the cdsapi_ package. In addition to data retrieval, ``request`` also has post-processing options such as ``grid`` and ``area`` for regridding and sub-area extraction respectively. + The ``cds`` source accesses the `Copernicus Climate Data Store`_ (CDS), using the cdsapi_ package. In addition to data retrieval, the request has post-processing options such as ``grid`` and ``area`` for regridding and sub-area extraction respectively. It can + also contain the earthkit-data specific :ref:`split_on ` parameter. :param str dataset: the name of the CDS dataset - :param tuple *args: specify the request as a dict + :param tuple *args: specify the request as dict. A sequence of dicts can be used to specify multiple requests. :param dict **kwargs: other keyword arguments specifying the request - The following example retrieves ERA5 reanalysis GRIB data for a subarea for 2 surface parameters: + The following example retrieves ERA5 reanalysis GRIB data for a subarea for 2 surface parameters. The request is specified using ``kwargs``: .. code-block:: python @@ -407,6 +408,26 @@ cds date="2012-05-10", ) + The same retrieval can be defined by passing the request as a positional argument: + + .. code-block:: python + + import earthkit.data + + req = dict( + variable=["2t", "msl"], + product_type="reanalysis", + area=[50, -10, 40, 10], # N,W,S,E + grid=[2, 2], + date="2012-05-10", + ) + + ds = earthkit.data.from_source( + "cds", + "reanalysis-era5-single-levels", + req, + ) + Data downloaded from the CDS is stored in the the :ref:`cache `. diff --git a/docs/guide/split_on.rst b/docs/guide/split_on.rst new file mode 100644 index 00000000..1c3a2a54 --- /dev/null +++ b/docs/guide/split_on.rst @@ -0,0 +1,85 @@ +.. _split_on: + +Using split_on in CDS retrievals +==================================== + +A :ref:`data-sources-cds` request can contain the ``split_on`` parameter. It does not specify the data but instructs :ref:`from_source ` to split the request into multiple parts, which are then executed independently and the resulting data will be stored in different files. However, the actual storage is hidden from the users and they can still work with the results as a single object. + +Single key +----------- + +``split_on`` can be a single string referring to the key the request should be split on: + + .. code-block:: python + + import earthkit.data + + ds = earthkit.data.from_source( + "cds", + "reanalysis-era5-single-levels", + variable=["2t", "msl"], + product_type="reanalysis", + area=[50, -10, 40, 10], # N,W,S,E + grid=[2, 2], + date="2012-05-10", + time=[0, 12], + split_on="variable", + ) + +This will send 2 requests to the CDS one for "2t" and another one for "msl". + +Sequence of keys +----------------- + +A sequence of keys can also be specified: + + .. code-block:: python + + import earthkit.data + + ds = earthkit.data.from_source( + "cds", + "reanalysis-era5-single-levels", + variable=["2t", "msl"], + product_type="reanalysis", + area=[50, -10, 40, 10], # N,W,S,E + grid=[2, 2], + date="2012-05-10", + time=[0, 12], + split_on=("variable", "time"), + ) + +This will send 4 requests to the CDS: + + - variable="2t", time=0 + - variable="2t", time=12 + - variable="msl", time=0 + - variable="msl", time=12 + +Dictionary of keys +---------------------- + +By specifying a dict we can define grouping per key for the splitting: + + .. code-block:: python + + import earthkit.data + + ds = earthkit.data.from_source( + "cds", + "reanalysis-era5-single-levels", + variable=["2t", "2d", "msl", "sstk"], + product_type="reanalysis", + area=[50, -10, 40, 10], # N,W,S,E + grid=[2, 2], + date="2012-05-10", + time=[0, 12], + split_on={"variable": 2, "time": 1}, + ) + +This will send 4 requests to the CDS: + + - variable=["2t", "2d"], time=0 + - variable=["2t", "2d"], time=12 + - variable=["msl", "sstk"], time=0 + - variable=["msl", "sstk"], time=12 diff --git a/docs/release_notes/version_0.5_updates.rst b/docs/release_notes/version_0.5_updates.rst index 15cabe09..e720a962 100644 --- a/docs/release_notes/version_0.5_updates.rst +++ b/docs/release_notes/version_0.5_updates.rst @@ -7,19 +7,25 @@ Version 0.5.0 New features ++++++++++++++++ -- changed the default :ref:`cache policy ` to :ref:`off `, which now enables the usage of remote sources like :ref:`data-sources-mars`. See the :ref:`/examples/cache.ipynb` notebook example. -- allowed defining source :ref:`plugins ` +- changed the default :ref:`cache policy ` to :ref:`off `. This mode now enables the usage of remote sources like :ref:`data-sources-mars`. See the :ref:`/examples/cache.ipynb` notebook example. +- allowed creating source :ref:`plugins ` - enabled reading :ref:`data-sources-url` sources as streams. See the :ref:`/examples/grib_url_stream.ipynb` notebook example -- added the :meth:`FieldList.to_fieldlist() ` method to convert to a new :class:`FieldList` based on a given backend +- added the :meth:`FieldList.to_fieldlist() ` method to convert to a new :class:`FieldList` based on a given backend - added the :meth:`nearest_point_haversine` and :meth:`nearest_point_kdtree` methods to find the nearest point out of a set of locations. See the :ref:`/examples/grib_nearest_gridpoint.ipynb` and :ref:`/examples/grib_time_series.ipynb` notebook examples. -- enabled using dictionaries in the ``split_on`` request parameter for :ref:`data-sources-cds` retrievals +- enabled using multiple keys and dictionaries in the :ref:`split_on ` request parameter for :ref:`data-sources-cds` retrievals +- enabled using list of requests in :ref:`data-sources-cds` retrievals - added the experimental "constants" source type - ensured consistent usage of ``pandas_read_csv_kwargs`` for :ref:`data-sources-file` and :ref:`data-sources-cds` sources - +- added the ``bits_per_value`` option to :meth:`NumpyFieldList.save() ` +- when a :class:`~data.sources.numpy_list.NumpyFieldList` is written to disk with :meth:`NumpyFieldList.save() ` the ``generatingProcessIdentifier`` GRIB key is not set implicitly to 255 any longer. Instead, users must set its value when calling :meth:`Metadata.override() `. +- significantly reduced field size in a :class:`~data.sources.numpy_list.NumpyFieldList`. Available with ecCodes >= 2.34.0 and eccodes-python >= 1.17.0 +- added experimental support for retrieving coverage json data from a :ref:`data-sources-polytope` source Fixes ++++++ - fixed issue when slicing did not work on :class:`~data.core.fieldlist.FieldList` filtered with ``sel()`` - fixed crash in :meth:`FieldList.to_xarray() ` when the ``filter_by_keys`` option in ``backend_kwargs`` was used - fixed issue when list of dates could not be used in a :ref:`data-sources-cds` request -- fixed issue when some metadata keys of a :class:`~data.core.readers.numpy_list.NumpyFieldList` did not match the actual field values. These metadata keys are now not available on a ``NumpyFieldList``. +- fixed issue when some metadata keys of a :class:`~data.sources.numpy_list.NumpyFieldList` did not match the actual field values. These metadata keys are now not available in a :class:`~data.sources.numpy_list.NumpyFieldList` +- fixed issue when NetCDF input containing a coordinate with string values caused a crash +- ensured compatibility with the changes in ecCodes version 2.34.0. diff --git a/earthkit/data/core/fieldlist.py b/earthkit/data/core/fieldlist.py index e55c1d62..e125a5aa 100644 --- a/earthkit/data/core/fieldlist.py +++ b/earthkit/data/core/fieldlist.py @@ -1174,6 +1174,8 @@ def save(self, filename, append=False, **kwargs): append: bool When it is true append data to the target file. Otherwise the target file be overwritten if already exists. + **kwargs: dict, optional + Other keyword arguments passed to :obj:`write`. """ flag = "wb" if not append else "ab" with open(filename, flag) as f: @@ -1186,6 +1188,8 @@ def write(self, f, **kwargs): ---------- f: file object The target file object. + **kwargs: dict, optional + Other keyword arguments passed to the underlying field implementation. """ for s in self: s.write(f, **kwargs) diff --git a/earthkit/data/sources/numpy_list.py b/earthkit/data/sources/numpy_list.py index 055caada..7a959942 100644 --- a/earthkit/data/sources/numpy_list.py +++ b/earthkit/data/sources/numpy_list.py @@ -133,6 +133,28 @@ def _to_numpy_fieldlist(self, **kwargs): else: return type(self)(self.to_numpy(**kwargs), self._metadata) + def save(self, filename, append=False, check_nans=True, bits_per_value=16): + r"""Write all the fields into a file. + + Parameters + ---------- + filename: str + The target file path. + append: bool + When it is true append data to the target file. Otherwise + the target file be overwritten if already exists. + check_nans: bool + Replace nans in the values with GRIB missing values when generating the output. + bits_per_value: int + Set the ``bitsPerValue`` GRIB key in the generated output. + """ + super().save( + filename, + append=append, + check_nans=check_nans, + bits_per_value=bits_per_value, + ) + class MultiUnwindMerger: def __init__(self, sources): diff --git a/earthkit/data/writers/grib.py b/earthkit/data/writers/grib.py index f9ac7d3d..cb92a4a4 100644 --- a/earthkit/data/writers/grib.py +++ b/earthkit/data/writers/grib.py @@ -21,11 +21,13 @@ def write(self, f, values, metadata, check_nans=True, bits_per_value=16): f: file object The target file object. values: ndarray - Values of the GRIB field/message. - values: :class:`GribMetadata` - Metadata of the GRIB field/message. + Values of the GRIB field. + metadata: :class:`GribMetadata` + Metadata of the GRIB field. check_nans: bool Replace nans in ``values`` with GRIB missing values when writing to``f``. + bits_per_value: int + Set the ``bitsPerValue`` GRIB key in the generated GRIB message. """ handle = metadata._handle.clone()