ecmwf · sandorkertesz · Jan 24, 2024 · Jan 2, 2024 · Jan 16, 2024 · Jan 18, 2024
diff --git a/docs/guide/sources.rst b/docs/guide/sources.rst
@@ -385,13 +385,14 @@ cds
 .. py:function:: from_source("cds", dataset, *args, **kwargs)
   :noindex:
 
-  The ``cds`` source accesses the `Copernicus Climate Data Store`_ (CDS), using the cdsapi_ package. In addition to data retrieval, ``request`` also has post-processing options such as ``grid`` and ``area`` for regridding and sub-area extraction respectively.
+  The ``cds`` source accesses the `Copernicus Climate Data Store`_ (CDS), using the cdsapi_ package. In addition to data retrieval, the request has post-processing options such as ``grid`` and ``area`` for regridding and sub-area extraction respectively. It can
+  also contain the earthkit-data specific :ref:`split_on <split_on>` parameter.
 
   :param str dataset: the name of the CDS dataset
-  :param tuple *args: specify the request as a dict
+  :param tuple *args: specify the request as dict. A sequence of dicts can be used to specify multiple requests.
   :param dict **kwargs: other keyword arguments specifying the request
 
-  The following example retrieves ERA5 reanalysis GRIB data for a subarea for 2 surface parameters:
+  The following example retrieves ERA5 reanalysis GRIB data for a subarea for 2 surface parameters. The request is specified using ``kwargs``:
 
   .. code-block:: python
 
@@ -407,6 +408,26 @@ cds
           date="2012-05-10",
       )
 
+  The same retrieval can be defined by passing the request as a positional argument:
+
+  .. code-block:: python
+
+      import earthkit.data
+
+      req = dict(
+          variable=["2t", "msl"],
+          product_type="reanalysis",
+          area=[50, -10, 40, 10],  # N,W,S,E
+          grid=[2, 2],
+          date="2012-05-10",
+      )
+
+      ds = earthkit.data.from_source(
+          "cds",
+          "reanalysis-era5-single-levels",
+          req,
+      )
+
 
   Data downloaded from the CDS is stored in the the :ref:`cache <caching>`.
 

diff --git a/docs/guide/split_on.rst b/docs/guide/split_on.rst
@@ -0,0 +1,85 @@
+.. _split_on:
+
+Using split_on in CDS retrievals
+====================================
+
+A :ref:`data-sources-cds` request can contain the ``split_on`` parameter. It does not specify the data but instructs :ref:`from_source <data-sources-cds>` to split the request into multiple parts, which are then executed independently and the resulting data will be stored in different files. However, the actual storage is hidden from the users and they can still work with the results as a single object.
+
+Single key
+-----------
+
+``split_on`` can be a single string referring to the key the request should be split on:
+
+  .. code-block:: python
+
+      import earthkit.data
+
+      ds = earthkit.data.from_source(
+          "cds",
+          "reanalysis-era5-single-levels",
+          variable=["2t", "msl"],
+          product_type="reanalysis",
+          area=[50, -10, 40, 10],  # N,W,S,E
+          grid=[2, 2],
+          date="2012-05-10",
+          time=[0, 12],
+          split_on="variable",
+      )
+
+This will send 2 requests to the CDS one for "2t" and another one for "msl".
+
+Sequence of keys
+-----------------
+
+A sequence of keys can also be specified:
+
+  .. code-block:: python
+
+      import earthkit.data
+
+      ds = earthkit.data.from_source(
+          "cds",
+          "reanalysis-era5-single-levels",
+          variable=["2t", "msl"],
+          product_type="reanalysis",
+          area=[50, -10, 40, 10],  # N,W,S,E
+          grid=[2, 2],
+          date="2012-05-10",
+          time=[0, 12],
+          split_on=("variable", "time"),
+      )
+
+This will send 4 requests to the CDS:
+
+    - variable="2t",  time=0
+    - variable="2t",  time=12
+    - variable="msl", time=0
+    - variable="msl", time=12
+
+Dictionary of keys
+----------------------
+
+By specifying a dict we can define grouping per key for the splitting:
+
+  .. code-block:: python
+
+      import earthkit.data
+
+      ds = earthkit.data.from_source(
+          "cds",
+          "reanalysis-era5-single-levels",
+          variable=["2t", "2d", "msl", "sstk"],
+          product_type="reanalysis",
+          area=[50, -10, 40, 10],  # N,W,S,E
+          grid=[2, 2],
+          date="2012-05-10",
+          time=[0, 12],
+          split_on={"variable": 2, "time": 1},
+      )
+
+This will send 4 requests to the CDS:
+
+    - variable=["2t", "2d"],  time=0
+    - variable=["2t", "2d"],  time=12
+    - variable=["msl", "sstk"], time=0
+    - variable=["msl", "sstk"], time=12
diff --git a/docs/release_notes/index.rst b/docs/release_notes/index.rst
@@ -4,6 +4,7 @@ Release notes
 .. toctree::
     :maxdepth: 1
 
+    version_0.5_updates
     version_0.4_updates
     version_0.3_updates
     version_0.2_updates
diff --git a/docs/release_notes/version_0.5_updates.rst b/docs/release_notes/version_0.5_updates.rst
@@ -0,0 +1,31 @@
+Version 0.5 Updates
+/////////////////////////
+
+Version 0.5.0
+===============
+
+New features
+++++++++++++++++
+
+- changed the default :ref:`cache policy <cache_policies>` to :ref:`off <off_cache_policy>`. This mode now enables the usage of remote sources like :ref:`data-sources-mars`. See the :ref:`/examples/cache.ipynb` notebook example.
+- allowed creating source :ref:`plugins <plugin-overview>`
+- enabled reading :ref:`data-sources-url` sources as streams. See the :ref:`/examples/grib_url_stream.ipynb` notebook example
+- added the :meth:`FieldList.to_fieldlist() <data.core.fieldlist.FieldList.to_fieldlist>` method to convert to a new :class:`FieldList` based on a given backend
+- added the :meth:`nearest_point_haversine` and :meth:`nearest_point_kdtree` methods to find the nearest point out of a set of locations. See the :ref:`/examples/grib_nearest_gridpoint.ipynb` and :ref:`/examples/grib_time_series.ipynb` notebook examples.
+- enabled using multiple keys and dictionaries in the :ref:`split_on <split_on>` request parameter for :ref:`data-sources-cds` retrievals
+- enabled using list of requests in :ref:`data-sources-cds` retrievals
+- added the experimental "constants" source type
+- ensured consistent usage of ``pandas_read_csv_kwargs`` for :ref:`data-sources-file` and :ref:`data-sources-cds` sources
+- added the ``bits_per_value`` option to :meth:`NumpyFieldList.save() <data.sources.numpy_list.NumpyFieldList.save>`
+- when a :class:`~data.sources.numpy_list.NumpyFieldList` is written to disk with :meth:`NumpyFieldList.save() <data.sources.numpy_list.NumpyFieldList.save>` the ``generatingProcessIdentifier`` GRIB key is not set implicitly to 255 any longer. Instead, users must set its value when calling :meth:`Metadata.override() <data.core.metadata.Metadata.override>`.
+- significantly reduced field size in a :class:`~data.sources.numpy_list.NumpyFieldList`. Available with ecCodes >= 2.34.0 and eccodes-python >= 1.17.0
+- added experimental support for retrieving coverage json data from a :ref:`data-sources-polytope` source
+
+Fixes
+++++++
+- fixed issue when slicing did not work on :class:`~data.core.fieldlist.FieldList` filtered with ``sel()``
+- fixed crash in :meth:`FieldList.to_xarray() <data.core.fieldlist.FieldList.to_xarray>` when  the ``filter_by_keys`` option in ``backend_kwargs`` was used
+- fixed issue when list of dates could not be used in a :ref:`data-sources-cds` request
+- fixed issue when some metadata keys of a :class:`~data.sources.numpy_list.NumpyFieldList` did not match the actual field values. These metadata keys are now not available in a  :class:`~data.sources.numpy_list.NumpyFieldList`
+- fixed issue when NetCDF input containing a coordinate with string values caused a crash
+- ensured compatibility with the changes in ecCodes version 2.34.0.
diff --git a/earthkit/data/core/fieldlist.py b/earthkit/data/core/fieldlist.py
@@ -1174,6 +1174,8 @@ def save(self, filename, append=False, **kwargs):
         append: bool
             When it is true append data to the target file. Otherwise
             the target file be overwritten if already exists.
+        **kwargs: dict, optional
+            Other keyword arguments passed to :obj:`write`.
         """
         flag = "wb" if not append else "ab"
         with open(filename, flag) as f:
@@ -1186,6 +1188,8 @@ def write(self, f, **kwargs):
         ----------
         f: file object
             The target file object.
+        **kwargs: dict, optional
+            Other keyword arguments passed to the underlying field implementation.
         """
         for s in self:
             s.write(f, **kwargs)

diff --git a/earthkit/data/sources/numpy_list.py b/earthkit/data/sources/numpy_list.py
@@ -133,6 +133,28 @@ def _to_numpy_fieldlist(self, **kwargs):
         else:
             return type(self)(self.to_numpy(**kwargs), self._metadata)
 
+    def save(self, filename, append=False, check_nans=True, bits_per_value=16):
+        r"""Write all the fields into a file.
+
+        Parameters
+        ----------
+        filename: str
+            The target file path.
+        append: bool
+            When it is true append data to the target file. Otherwise
+            the target file be overwritten if already exists.
+        check_nans: bool
+            Replace nans in the values with GRIB missing values when generating the output.
+        bits_per_value: int
+            Set the ``bitsPerValue`` GRIB key in the generated output.
+        """
+        super().save(
+            filename,
+            append=append,
+            check_nans=check_nans,
+            bits_per_value=bits_per_value,
+        )
+
 
 class MultiUnwindMerger:
     def __init__(self, sources):

diff --git a/earthkit/data/writers/grib.py b/earthkit/data/writers/grib.py
@@ -21,11 +21,13 @@ def write(self, f, values, metadata, check_nans=True, bits_per_value=16):
         f: file object
             The target file object.
         values: ndarray
-            Values of the GRIB field/message.
-        values: :class:`GribMetadata`
-            Metadata of the GRIB field/message.
+            Values of the GRIB field.
+        metadata: :class:`GribMetadata`
+            Metadata of the GRIB field.
         check_nans: bool
             Replace nans in ``values`` with GRIB missing values when writing to``f``.
+        bits_per_value: int
+            Set the ``bitsPerValue`` GRIB key in the generated GRIB message.
         """
         handle = metadata._handle.clone()