Addressing #420 (for CCI Toolbox)

forman · forman · commit 44196fbaf375 · 2021-03-10T11:41:17.000+01:00
diff --git a/CHANGES.md b/CHANGES.md
@@ -1,5 +1,10 @@
 ## Changes in 0.7.1.dev1 (in development)
 
+* Slightly changed signature of `xcube.core.store.DataStore.get_dataset_ids()`
+  by adding a new keyword argument `include_attrs: Sequence[str] = None` that
+  can be used to obtain a minimum set of dataset attributes for each returned 
+  dataset identifier. However, `include_attrs` is ignored to far in the "s3", 
+  "memory", and "directory" data stores. (#420) 
 * Added `s3fs`  requirement that has been removed by accident.
 * Added missing requirements `requests` and `urllib3`.
 
diff --git a/xcube/core/store/store.py b/xcube/core/store/store.py
@@ -20,7 +20,7 @@
 # SOFTWARE.
 
 from abc import abstractmethod, ABC
-from typing import Iterator, Tuple, Any, Optional, List, Type
+from typing import Iterator, Tuple, Any, Optional, List, Type, Sequence
 
 from xcube.constants import EXTENSION_POINT_DATA_STORES
 from xcube.util.extension import Extension
@@ -153,7 +153,10 @@ def get_type_specifiers_for_data(self, data_id: str) -> Tuple[str, ...]:
         """
 
     @abstractmethod
-    def get_data_ids(self, type_specifier: str = None, include_titles: bool = True) -> \
+    def get_data_ids(self,
+                     type_specifier: str = None,
+                     include_titles: bool = True,
+                     include_attrs: Sequence[str] = None) -> \
             Iterator[Tuple[str, Optional[str]]]:
         """
         Get an iterator over the data resource identifiers for the given type *type_specifier*.
@@ -162,13 +165,29 @@ def get_data_ids(self, type_specifier: str = None, include_titles: bool = True)
         If a store implementation supports only a single data type, it should verify that *type_specifier*
         is either None or compatible with the supported data type.
 
-        The returned iterator items are 2-tuples of the form (*data_id*, *title*), where *data_id*
-        is the actual data identifier and *title* is an optional, human-readable title for the data.
-        If *include_titles* is false, the second item of the result tuple will be None.
+        The returned iterator items are either 2-tuples of the form (*data_id*, *title*)
+        or (*data_id*, *attrs*), where *data_id* is the actual data identifier:
 
-        :param type_specifier: If given, only data identifiers that are available as this type are returned. If this is
-        omitted, all available data identifiers are returned.
+        * The first form is returned if *include_titles* is given.
+          Then *title* is an optional, human-readable title (of type ``str```) for the data.
+        * The second form is returned if *include_attrs* is provided and not empty. and *attrs*.
+          Then *attrs* is a possibly empty mapping (of type ``dict```) of attribute names to values
+          with names given by *include_attrs*.
+
+        For data store backward compatibility, *include_titles* takes preference over *include_attrs*, so if
+        *include_titles* is given (the default), a store should ignore *include_attrs*.
+        Clients should however prefer ```include_attrs=["title", ...]``` over ```include_titles=True```.
+
+        If neither *include_titles* nor *include_attrs* is provided, the second item of the result tuple
+        will be None.
+
+        :param type_specifier: If given, only data identifiers that are available as this type are returned.
+            If this is omitted, all available data identifiers are returned.
         :param include_titles: If true, the store will attempt to also provide a title.
+            In this case, the second tuple item is a ```str``` or None.
+        :param include_attrs: A sequence of names of attributes to be returned for each dataset identifier.
+            If given, the store will attempt to also provide the given set of dataset attributes.
+            In this case, the second tuple item is a ```dict``` or None. (added in 0.7.1)
         :return: An iterator over the identifiers and titles of data resources provided by this data store.
         :raise DataStoreError: If an error occurs.
         """
diff --git a/xcube/core/store/stores/directory.py b/xcube/core/store/stores/directory.py
@@ -21,7 +21,7 @@
 
 import os.path
 import uuid
-from typing import Optional, Iterator, Any, Tuple, List
+from typing import Optional, Iterator, Any, Tuple, List, Sequence
 
 import geopandas as gpd
 import xarray as xr
@@ -116,8 +116,13 @@ def get_type_specifiers_for_data(self, data_id: str) -> Tuple[str, ...]:
         actual_type_specifier, _, _ = self._get_accessor_id_parts(data_id)
         return actual_type_specifier,
 
-    def get_data_ids(self, type_specifier: str = None, include_titles: bool = True) -> \
+    def get_data_ids(self,
+                     type_specifier: str = None,
+                     include_titles: bool = True,
+                     include_attrs: Sequence[str] = None) -> \
             Iterator[Tuple[str, Optional[str]]]:
+        # TODO: do not ignore include_titles
+        # TODO: do not ignore include_attrs
         if type_specifier is not None:
             type_specifier = TypeSpecifier.normalize(type_specifier)
         # TODO: Use os.walk(), which provides a generator rather than a list
diff --git a/xcube/core/store/stores/memory.py b/xcube/core/store/stores/memory.py
@@ -20,7 +20,7 @@
 # SOFTWARE.
 
 import uuid
-from typing import Iterator, Dict, Any, Optional, Tuple, Mapping
+from typing import Iterator, Dict, Any, Optional, Tuple, Mapping, Sequence
 
 from xcube.core.store import DataDescriptor
 from xcube.core.store import DataStoreError
@@ -66,8 +66,13 @@ def get_type_specifiers_for_data(self, data_id: str) -> Tuple[str, ...]:
         type_specifier = get_type_specifier(self._data_dict[data_id])
         return str(type_specifier),
 
-    def get_data_ids(self, type_specifier: str = None, include_titles: bool = True) -> Iterator[
-        Tuple[str, Optional[str]]]:
+    def get_data_ids(self,
+                     type_specifier: str = None,
+                     include_titles: bool = True,
+                     include_attrs: Sequence[str] = None) \
+            -> Iterator[Tuple[str, Optional[str]]]:
+        # TODO: do not ignore include_titles
+        # TODO: do not ignore include_attrs
         if type_specifier is None:
             for data_id, data in self._data_dict.items():
                 yield data_id, None
diff --git a/xcube/core/store/stores/s3.py b/xcube/core/store/stores/s3.py
@@ -124,8 +124,13 @@ def get_type_specifiers_for_data(self, data_id: str) -> Tuple[str, ...]:
         data_type_specifier, _, _ = self._get_accessor_id_parts(data_id)
         return data_type_specifier,
 
-    def get_data_ids(self, type_specifier: str = None, include_titles=True) -> Iterator[Tuple[str, Optional[str]]]:
-        # todo do not ignore type_specifier
+    def get_data_ids(self,
+                     type_specifier: str = None,
+                     include_titles=True,
+                     include_attrs=None) -> Iterator[Tuple[str, Optional[str]]]:
+        # TODO: do not ignore type_specifier
+        # TODO: do not ignore include_titles
+        # TODO: do not ignore include_attrs
         prefix = self._bucket_name + '/'
         first_index = len(prefix)
         for item in self._s3.listdir(self._bucket_name, detail=False):