From 4e86c968256f4284796de297a06ebca3eeab71a1 Mon Sep 17 00:00:00 2001 From: Norman Fomferra Date: Wed, 29 May 2024 16:30:10 +0200 Subject: [PATCH 1/6] Allow passing roots to `xcube.webapi.viewer.Viewer` --- CHANGES.md | 6 +++ xcube/webapi/viewer/viewer.py | 87 ++++++++++++++++++++++++++--------- 2 files changed, 72 insertions(+), 21 deletions(-) diff --git a/CHANGES.md b/CHANGES.md index 1233c812f..718e73954 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -46,6 +46,12 @@ * xcube server can now read SNAP color palette definition files (`*.cpd`) with alpha values. (#932) +* The class `xcube.webapi.viewer.Viewer` now accepts root paths or URLs that + will each be scanned for datasets. The roots are passed as keyword argument + `roots` that takes an iterable of type `str`. A new keyword argument `max_depth` + defines the maximum subdirectory depths used to search for datasets in case + `roots` is given. It defaults to `2`. + ### Incompatible API changes * The `get_cmap()` method of `util.cmaps.ColormapProvider` now returns a diff --git a/xcube/webapi/viewer/viewer.py b/xcube/webapi/viewer/viewer.py index 52b244753..26559a3d0 100644 --- a/xcube/webapi/viewer/viewer.py +++ b/xcube/webapi/viewer/viewer.py @@ -9,9 +9,11 @@ from pathlib import Path from typing import Optional, Union, Mapping, Any, Tuple, Dict +import fsspec import tornado.ioloop import xarray as xr +from xcube.util.config import merge_config from xcube.constants import LOG from xcube.core.mldataset import MultiLevelDataset from xcube.server.server import Server @@ -29,33 +31,30 @@ _LAB_INFO_FILE = "~/.xcube/jupyterlab/lab-info.json" +_DEFAULT_MAX_DEPTH = 2 + + class Viewer: - """Experimental class that represents the xcube Viewer - in Jupyter Notebooks. + """xcube Viewer for Jupyter Notebooks. Args: - server_config: Server configuration. See "xcube serve --show + args: + server_config: server configuration. See "xcube serve --show configschema". + roots: paths or URLs that will each be scanned for datasets. + max_depth: defines the maximum subdirectory depth used to + search for datasets in case roots is given. """ - def __init__(self, server_config: Optional[Mapping[str, Any]] = None): - server_config = dict(server_config or {}) - - port = server_config.get("port") - address = server_config.get("address") - - if port is None: - port = _find_port() - if address is None: - address = "0.0.0.0" - - server_config["port"] = port - server_config["address"] = address - - server_url, reverse_url_prefix = _get_server_url_and_rev_prefix(port) - server_config["reverse_url_prefix"] = reverse_url_prefix - - self._server_config = server_config + def __init__( + self, + server_config: Optional[Mapping[str, Any]] = None, + roots: Optional[Iterable[str]] = None, + max_depth: Optional[int] = None, + ): + self._server_config = _get_server_config( + server_config=server_config, roots=roots, max_depth=max_depth + ) # Got trick from # https://stackoverflow.com/questions/55201748/running-a-tornado-server-within-a-jupyter-notebook @@ -200,6 +199,36 @@ def _check_server_running(self): return self.is_server_running +def _get_server_config( + server_config: Optional[Mapping[str, Any]] = None, + roots: Optional[Iterable[str]] = None, + max_depth: Optional[int] = None, +) -> dict[str, Any]: + server_config = dict(server_config or {}) + max_depth = max_depth or _DEFAULT_MAX_DEPTH + + port = server_config.get("port") + address = server_config.get("address") + + if port is None: + port = _find_port() + if address is None: + address = "0.0.0.0" + + server_config["port"] = port + server_config["address"] = address + + server_url, reverse_url_prefix = _get_server_url_and_rev_prefix(port) + server_config["reverse_url_prefix"] = reverse_url_prefix + + if roots is not None: + config_stores = list(server_config.get("DataStores", [])) + root_stores = _get_data_stores_from_roots(roots, max_depth) + server_config["DataStores"] = config_stores + root_stores + + return server_config + + def _get_server_url_and_rev_prefix(port: int) -> Tuple[str, str]: lab_url = os.environ.get(_LAB_URL_ENV_VAR) or None has_proxy = lab_url is not None @@ -233,3 +262,19 @@ def _find_port(start: int = 8000, end: Optional[int] = None) -> int: if s.connect_ex(("localhost", port)) != 0: return port raise RuntimeError("No available port found") + + +def _get_data_stores_from_roots( + roots: tuple[str, ...], max_depth: int +) -> list[dict[str, dict]]: + extra_data_stores = [] + for index, root in enumerate(roots): + protocol, path = fsspec.core.split_protocol(root) + extra_data_stores.append( + { + "Identifier": f"_root_{index}", + "StoreId": protocol or "file", + "StoreParams": {"root": path, "max_depth": max_depth}, + } + ) + return extra_data_stores From 8ab162b100dafda438afbaf3bf5280cb2df8bc61 Mon Sep 17 00:00:00 2001 From: Norman Fomferra Date: Wed, 29 May 2024 18:59:54 +0200 Subject: [PATCH 2/6] fix --- xcube/webapi/viewer/viewer.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/xcube/webapi/viewer/viewer.py b/xcube/webapi/viewer/viewer.py index 26559a3d0..8e3fe68d7 100644 --- a/xcube/webapi/viewer/viewer.py +++ b/xcube/webapi/viewer/viewer.py @@ -7,7 +7,8 @@ import socket import threading from pathlib import Path -from typing import Optional, Union, Mapping, Any, Tuple, Dict +from typing import Optional, Union, Mapping, Any +from collections.abc import Iterable import fsspec import tornado.ioloop @@ -116,7 +117,7 @@ def add_dataset( ds_id: Optional[str] = None, title: Optional[str] = None, style: Optional[str] = None, - color_mappings: Dict[str, Dict[str, Any]] = None, + color_mappings: dict[str, dict[str, Any]] = None, ): """Add a dataset to this viewer. @@ -229,7 +230,7 @@ def _get_server_config( return server_config -def _get_server_url_and_rev_prefix(port: int) -> Tuple[str, str]: +def _get_server_url_and_rev_prefix(port: int) -> tuple[str, str]: lab_url = os.environ.get(_LAB_URL_ENV_VAR) or None has_proxy = lab_url is not None From 388f1a90bb8db19136492db43b35fe86f5d9d5a5 Mon Sep 17 00:00:00 2001 From: Norman Fomferra Date: Thu, 30 May 2024 09:51:40 +0200 Subject: [PATCH 3/6] Viewer now accepts root paths/URLs --- test/webapi/viewer/test_viewer.py | 75 ++++++++++++++++++++++++++----- xcube/webapi/viewer/viewer.py | 22 +++++---- 2 files changed, 75 insertions(+), 22 deletions(-) diff --git a/test/webapi/viewer/test_viewer.py b/test/webapi/viewer/test_viewer.py index 1c0dab2f7..f9abf783a 100644 --- a/test/webapi/viewer/test_viewer.py +++ b/test/webapi/viewer/test_viewer.py @@ -4,6 +4,7 @@ import os import unittest +from collections.abc import Iterable from typing import Optional, Mapping, Any import pytest @@ -33,8 +34,15 @@ def tearDown(self) -> None: if self.viewer is not None: self.viewer.stop_server() - def get_viewer(self, server_config: Optional[Mapping[str, Any]] = None) -> Viewer: - self.viewer = Viewer(server_config=server_config) + def get_viewer( + self, + server_config: Optional[Mapping[str, Any]] = None, + roots: Optional[Iterable[str]] = None, + max_depth: Optional[int] = None, + ) -> Viewer: + self.viewer = Viewer( + server_config=server_config, roots=roots, max_depth=max_depth + ) return self.viewer def test_start_and_stop_server(self): @@ -61,18 +69,65 @@ def test_show(self): def test_no_config(self): viewer = self.get_viewer() self.assertIsInstance(viewer.server_config, dict) - self.assertIn("port", viewer.server_config) + self.assertIsInstance(viewer.server_config.get("port"), int) self.assertIn("address", viewer.server_config) self.assertIn("reverse_url_prefix", viewer.server_config) + self.assertNotIn("DataStores", viewer.server_config) def test_with_config(self): - viewer = self.get_viewer(STYLES_CONFIG) - self.assertIsInstance(viewer.server_config, dict) - self.assertIn("port", viewer.server_config) - self.assertIn("address", viewer.server_config) - self.assertIn("reverse_url_prefix", viewer.server_config) - self.assertIn("Styles", viewer.server_config) - self.assertEqual(STYLES_CONFIG["Styles"], viewer.server_config["Styles"]) + viewer = self.get_viewer({"port": 8888, **STYLES_CONFIG}) + self.assertEqual( + { + "address": "0.0.0.0", + "port": 8888, + "reverse_url_prefix": "", + **STYLES_CONFIG, + }, + viewer.server_config, + ) + + def test_with_root(self): + viewer = self.get_viewer({"port": 8081}, roots="data") + self.assertEqual( + { + "address": "0.0.0.0", + "port": 8081, + "reverse_url_prefix": "", + "DataStores": [ + { + "Identifier": "_root_0", + "StoreId": "file", + "StoreParams": {"max_depth": 1, "root": "data"}, + } + ], + }, + viewer.server_config, + ) + + def test_with_roots(self): + viewer = self.get_viewer( + {"port": 8080}, roots=["data", "s3://xcube"], max_depth=2 + ) + self.assertEqual( + { + "address": "0.0.0.0", + "port": 8080, + "reverse_url_prefix": "", + "DataStores": [ + { + "Identifier": "_root_0", + "StoreId": "file", + "StoreParams": {"max_depth": 2, "root": "data"}, + }, + { + "Identifier": "_root_1", + "StoreId": "s3", + "StoreParams": {"max_depth": 2, "root": "xcube"}, + }, + ], + }, + viewer.server_config, + ) def test_urls(self): viewer = self.get_viewer() diff --git a/xcube/webapi/viewer/viewer.py b/xcube/webapi/viewer/viewer.py index 8e3fe68d7..16e7a41a0 100644 --- a/xcube/webapi/viewer/viewer.py +++ b/xcube/webapi/viewer/viewer.py @@ -14,7 +14,6 @@ import tornado.ioloop import xarray as xr -from xcube.util.config import merge_config from xcube.constants import LOG from xcube.core.mldataset import MultiLevelDataset from xcube.server.server import Server @@ -39,9 +38,8 @@ class Viewer: """xcube Viewer for Jupyter Notebooks. Args: - args: - server_config: server configuration. See "xcube serve --show - configschema". + server_config: server configuration. + See also output of ``$ xcube serve --show configschema``. roots: paths or URLs that will each be scanned for datasets. max_depth: defines the maximum subdirectory depth used to search for datasets in case roots is given. @@ -53,9 +51,11 @@ def __init__( roots: Optional[Iterable[str]] = None, max_depth: Optional[int] = None, ): - self._server_config = _get_server_config( + self._server_config, server_url = _get_server_config( server_config=server_config, roots=roots, max_depth=max_depth ) + self._server_url = server_url + self._viewer_url = f"{server_url}/viewer/?serverUrl={server_url}" # Got trick from # https://stackoverflow.com/questions/55201748/running-a-tornado-server-within-a-jupyter-notebook @@ -66,14 +66,11 @@ def __init__( self._server = Server( TornadoFramework(io_loop=self._io_loop, shared_io_loop=True), - config=server_config, + config=self._server_config, ) self._io_loop.add_callback(self._server.start) - self._server_url = server_url - self._viewer_url = f"{server_url}/viewer/?serverUrl={server_url}" - @property def server_config(self) -> Mapping[str, Any]: """The server configuration used by this viewer.""" @@ -170,6 +167,7 @@ def show(self, width: Union[int, str] = "100%", height: Union[str, int] = 800): height: The height of the viewer's iframe. """ try: + # noinspection PyPackageRequirements from IPython.core.display import HTML return HTML( @@ -204,7 +202,7 @@ def _get_server_config( server_config: Optional[Mapping[str, Any]] = None, roots: Optional[Iterable[str]] = None, max_depth: Optional[int] = None, -) -> dict[str, Any]: +) -> tuple[dict[str, Any], str]: server_config = dict(server_config or {}) max_depth = max_depth or _DEFAULT_MAX_DEPTH @@ -227,7 +225,7 @@ def _get_server_config( root_stores = _get_data_stores_from_roots(roots, max_depth) server_config["DataStores"] = config_stores + root_stores - return server_config + return server_config, server_url def _get_server_url_and_rev_prefix(port: int) -> tuple[str, str]: @@ -266,7 +264,7 @@ def _find_port(start: int = 8000, end: Optional[int] = None) -> int: def _get_data_stores_from_roots( - roots: tuple[str, ...], max_depth: int + roots: Iterable[str], max_depth: int ) -> list[dict[str, dict]]: extra_data_stores = [] for index, root in enumerate(roots): From c1508ea5954a06fd21205fc2d0c561c8fee1f42c Mon Sep 17 00:00:00 2001 From: Norman Fomferra Date: Thu, 30 May 2024 10:39:39 +0200 Subject: [PATCH 4/6] fixed tests --- CHANGES.md | 6 +++--- test/webapi/viewer/test_viewer.py | 25 ++++++++++++++++--------- xcube/webapi/viewer/viewer.py | 7 ++++--- 3 files changed, 23 insertions(+), 15 deletions(-) diff --git a/CHANGES.md b/CHANGES.md index 4f0a3c200..243a9fbb0 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -48,9 +48,9 @@ * The class `xcube.webapi.viewer.Viewer` now accepts root paths or URLs that will each be scanned for datasets. The roots are passed as keyword argument - `roots` that takes an iterable of type `str`. A new keyword argument `max_depth` - defines the maximum subdirectory depths used to search for datasets in case - `roots` is given. It defaults to `2`. + `roots` whose value is path or URL or an iterable of paths or URLs. + A new keyword argument `max_depth` defines the maximum subdirectory depths + used to search for datasets in case `roots` is given. It defaults to `1`. ### Incompatible API changes diff --git a/test/webapi/viewer/test_viewer.py b/test/webapi/viewer/test_viewer.py index 4dcc5ec6c..56c26ece6 100644 --- a/test/webapi/viewer/test_viewer.py +++ b/test/webapi/viewer/test_viewer.py @@ -5,9 +5,8 @@ import os import unittest from collections.abc import Iterable -from typing import Optional, Mapping, Any -from typing import Optional, Any from collections.abc import Mapping +from typing import Optional, Any, Union import pytest @@ -39,7 +38,7 @@ def tearDown(self) -> None: def get_viewer( self, server_config: Optional[Mapping[str, Any]] = None, - roots: Optional[Iterable[str]] = None, + roots: Optional[Union[str, Iterable[str]]] = None, max_depth: Optional[int] = None, ) -> Viewer: self.viewer = Viewer( @@ -72,17 +71,19 @@ def test_no_config(self): viewer = self.get_viewer() self.assertIsInstance(viewer.server_config, dict) self.assertIsInstance(viewer.server_config.get("port"), int) - self.assertIn("address", viewer.server_config) - self.assertIn("reverse_url_prefix", viewer.server_config) - self.assertNotIn("DataStores", viewer.server_config) + self.assertIsInstance(viewer.server_config.get("address"), str) + self.assertIsInstance(viewer.server_config.get("reverse_url_prefix"), str) def test_with_config(self): viewer = self.get_viewer({"port": 8888, **STYLES_CONFIG}) + self.assertIsInstance(viewer.server_config, dict) + # Get rid of "reverse_url_prefix" as it depends on env vars + # noinspection PyUnresolvedReferences + self.assertIsInstance(viewer.server_config.pop("reverse_url_prefix", None), str) self.assertEqual( { "address": "0.0.0.0", "port": 8888, - "reverse_url_prefix": "", **STYLES_CONFIG, }, viewer.server_config, @@ -90,11 +91,14 @@ def test_with_config(self): def test_with_root(self): viewer = self.get_viewer({"port": 8081}, roots="data") + self.assertIsInstance(viewer.server_config, dict) + # Get rid of "reverse_url_prefix" as it depends on env vars + # noinspection PyUnresolvedReferences + self.assertIsInstance(viewer.server_config.pop("reverse_url_prefix", None), str) self.assertEqual( { "address": "0.0.0.0", "port": 8081, - "reverse_url_prefix": "", "DataStores": [ { "Identifier": "_root_0", @@ -110,11 +114,14 @@ def test_with_roots(self): viewer = self.get_viewer( {"port": 8080}, roots=["data", "s3://xcube"], max_depth=2 ) + self.assertIsInstance(viewer.server_config, dict) + # Get rid of "reverse_url_prefix" as it depends on env vars + # noinspection PyUnresolvedReferences + self.assertIsInstance(viewer.server_config.pop("reverse_url_prefix", None), str) self.assertEqual( { "address": "0.0.0.0", "port": 8080, - "reverse_url_prefix": "", "DataStores": [ { "Identifier": "_root_0", diff --git a/xcube/webapi/viewer/viewer.py b/xcube/webapi/viewer/viewer.py index 38f0fa245..788fd3531 100644 --- a/xcube/webapi/viewer/viewer.py +++ b/xcube/webapi/viewer/viewer.py @@ -32,7 +32,7 @@ _LAB_INFO_FILE = "~/.xcube/jupyterlab/lab-info.json" -_DEFAULT_MAX_DEPTH = 2 +_DEFAULT_MAX_DEPTH = 1 class Viewer: @@ -49,7 +49,7 @@ class Viewer: def __init__( self, server_config: Optional[Mapping[str, Any]] = None, - roots: Optional[Iterable[str]] = None, + roots: Optional[Union[str, Iterable[str]]] = None, max_depth: Optional[int] = None, ): self._server_config, server_url = _get_server_config( @@ -201,7 +201,7 @@ def _check_server_running(self): def _get_server_config( server_config: Optional[Mapping[str, Any]] = None, - roots: Optional[Iterable[str]] = None, + roots: Optional[Union[str, Iterable[str]]] = None, max_depth: Optional[int] = None, ) -> tuple[dict[str, Any], str]: server_config = dict(server_config or {}) @@ -222,6 +222,7 @@ def _get_server_config( server_config["reverse_url_prefix"] = reverse_url_prefix if roots is not None: + roots = [roots] if isinstance(roots, str) else roots config_stores = list(server_config.get("DataStores", [])) root_stores = _get_data_stores_from_roots(roots, max_depth) server_config["DataStores"] = config_stores + root_stores From 445ba0b52924dab3917b8de3961fc91882a2192d Mon Sep 17 00:00:00 2001 From: Norman Fomferra Date: Thu, 30 May 2024 10:41:34 +0200 Subject: [PATCH 5/6] typo --- CHANGES.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGES.md b/CHANGES.md index 243a9fbb0..2b534f666 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -48,7 +48,7 @@ * The class `xcube.webapi.viewer.Viewer` now accepts root paths or URLs that will each be scanned for datasets. The roots are passed as keyword argument - `roots` whose value is path or URL or an iterable of paths or URLs. + `roots` whose value is a path or URL or an iterable of paths or URLs. A new keyword argument `max_depth` defines the maximum subdirectory depths used to search for datasets in case `roots` is given. It defaults to `1`. From 6666dbb00f1163a0f650ef3338512f8295ca4d33 Mon Sep 17 00:00:00 2001 From: Norman Fomferra Date: Thu, 30 May 2024 14:42:54 +0200 Subject: [PATCH 6/6] more documentation --- docs/source/api.rst | 3 +++ xcube/webapi/viewer/viewer.py | 34 +++++++++++++++++++++++++++++++--- 2 files changed, 34 insertions(+), 3 deletions(-) diff --git a/docs/source/api.rst b/docs/source/api.rst index 4a1140250..cf6efa724 100644 --- a/docs/source/api.rst +++ b/docs/source/api.rst @@ -208,6 +208,9 @@ Utilities .. autoclass:: xcube.core.schema.CubeSchema :members: +.. autoclass:: xcube.webapi.viewer.Viewer + :members: + .. autofunction:: xcube.util.dask.new_cluster Plugin Development diff --git a/xcube/webapi/viewer/viewer.py b/xcube/webapi/viewer/viewer.py index 788fd3531..2e45b5389 100644 --- a/xcube/webapi/viewer/viewer.py +++ b/xcube/webapi/viewer/viewer.py @@ -38,12 +38,40 @@ class Viewer: """xcube Viewer for Jupyter Notebooks. + The viewer can be used to visualise and inspect datacubes + with at least one data variable with dimensions ``["time", "lat", "lon"]`` + or, if a grid mapping is present, with arbitrary ``"time"`` and + arbitrarily x- and y-dimensions, e.g., ``["time", "y", "x"]`` . + + Add datacubes from instances of ``xarray.Dataset``: + + ``` + viewer = Viewer() + viewer.add_dataset(dataset) # can set color styles here too, see doc below + viewer.show() + ``` + + Display all datasets of formats Zarr, NetCDF, COG/GeoTIFF found in the + given directories in the local filesystem or in a given S3 bucket: + + ``` + viewer = Viewer(roots=["/eodata/smos/l2", "s3://xcube/examples"]) + viewer.show() + ``` + + The `Viewer` class takes a xcube server configuration as first + argument. More details regarding configuration parameters are given in the + `server documentation `_. + The full configuration reference can be generated by excecuting CLI command + ``$ xcube serve --show configschema``. + Args: - server_config: server configuration. + server_config: Server configuration. See also output of ``$ xcube serve --show configschema``. - roots: paths or URLs that will each be scanned for datasets. + roots: A path or URL or an iterable of paths or URLs + that will each be scanned for datasets to be shown in the viewer. max_depth: defines the maximum subdirectory depth used to - search for datasets in case roots is given. + search for datasets in case *roots* is given. """ def __init__(