From f5956f5babf91497a482c7958b5d5cbe1bf6c03d Mon Sep 17 00:00:00 2001 From: Wh1isper <9573586@qq.com> Date: Fri, 24 Nov 2023 00:33:26 +0800 Subject: [PATCH] cherry-pick https://github.com/fcollonval/jupyter_server/pull/1 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Frédéric Collonval --- docs/source/developers/contents.rst | 80 +++--- jupyter_server/services/api/api.yaml | 8 +- .../services/contents/filecheckpoints.py | 4 +- jupyter_server/services/contents/fileio.py | 271 +++++++++++------- .../services/contents/filemanager.py | 58 ++-- jupyter_server/services/contents/handlers.py | 73 +++-- jupyter_server/services/contents/manager.py | 31 +- tests/services/contents/test_api.py | 10 +- tests/services/contents/test_fileio.py | 55 +++- tests/services/contents/test_manager.py | 31 +- .../services/contents/test_manager_no_hash.py | 44 +++ 11 files changed, 421 insertions(+), 244 deletions(-) create mode 100644 tests/services/contents/test_manager_no_hash.py diff --git a/docs/source/developers/contents.rst b/docs/source/developers/contents.rst index 48c329fa2a..f61e5d36fb 100644 --- a/docs/source/developers/contents.rst +++ b/docs/source/developers/contents.rst @@ -33,44 +33,46 @@ which we refer to as **models**. Models may contain the following entries: -+--------------------+-----------+------------------------------+ -| Key | Type |Info | -+====================+===========+==============================+ -|**name** |unicode |Basename of the entity. | -+--------------------+-----------+------------------------------+ -|**path** |unicode |Full | -| | |(:ref:`API-style`) | -| | |path to the entity. | -+--------------------+-----------+------------------------------+ -|**type** |unicode |The entity type. One of | -| | |``"notebook"``, ``"file"`` or | -| | |``"directory"``. | -+--------------------+-----------+------------------------------+ -|**created** |datetime |Creation date of the entity. | -+--------------------+-----------+------------------------------+ -|**last_modified** |datetime |Last modified date of the | -| | |entity. | -+--------------------+-----------+------------------------------+ -|**content** |variable |The "content" of the entity. | -| | |(:ref:`See | -| | |Below`) | -+--------------------+-----------+------------------------------+ -|**mimetype** |unicode or |The mimetype of ``content``, | -| |``None`` |if any. (:ref:`See | -| | |Below`) | -+--------------------+-----------+------------------------------+ -|**format** |unicode or |The format of ``content``, | -| |``None`` |if any. (:ref:`See | -| | |Below`) | -+--------------------+-----------+------------------------------+ -|**hash** |unicode or |The hash of the contents. | -| |``None`` | | -| | | | -+--------------------+-----------+------------------------------+ -|**hash_algorithm** |unicode |The algorithm used to compute | -| | |hash value. | -| | | | -+--------------------+-----------+------------------------------+ ++--------------------+------------+-----------------------------------------+ +| Key | Type | Info | ++====================+============+=========================================+ +| **name** | unicode | Basename of the entity. | ++--------------------+------------+-----------------------------------------+ +| **path** | unicode | Full | +| | | (:ref:`API-style`) | +| | | path to the entity. | ++--------------------+------------+-----------------------------------------+ +| **type** | unicode | The entity type. One of | +| | | ``"notebook"``, ``"file"`` or | +| | | ``"directory"``. | ++--------------------+------------+-----------------------------------------+ +| **created** | datetime | Creation date of the entity. | ++--------------------+------------+-----------------------------------------+ +| **last_modified** | datetime | Last modified date of the | +| | | entity. | ++--------------------+------------+-----------------------------------------+ +| **content** | variable | The "content" of the entity. | +| | | (:ref:`See | +| | | Below`) | ++--------------------+------------+-----------------------------------------+ +| **mimetype** | unicode or | The mimetype of ``content``, | +| | ``None`` | if any. (:ref:`See | +| | | Below`) | ++--------------------+------------+-----------------------------------------+ +| **format** | unicode or | The format of ``content``, | +| | ``None`` | if any. (:ref:`See | +| | | Below`) | ++--------------------+------------+-----------------------------------------+ +| [optional] | | | +| **hash** | unicode or | The hash of the contents. | +| | ``None`` | It cannot be null if ``hash_algorithm`` | +| | | is defined. | ++--------------------+------------+-----------------------------------------+ +| [optional] | | | +| **hash_algorithm** | unicode or | The algorithm used to compute | +| | ``None`` | hash value. | +| | | It cannot be null if hash is defined. | ++--------------------+------------+-----------------------------------------+ .. _modelcontent: @@ -122,7 +124,7 @@ model. There are three model types: **notebook**, **file**, and **directory**. .. code-block:: python - # Notebook Model with Content + # Notebook Model with Content and Hash { "content": { "metadata": {}, diff --git a/jupyter_server/services/api/api.yaml b/jupyter_server/services/api/api.yaml index 3b77a60314..5ee5c416bd 100644 --- a/jupyter_server/services/api/api.yaml +++ b/jupyter_server/services/api/api.yaml @@ -108,7 +108,7 @@ paths: type: integer - name: hash in: query - description: "Return hash hexdigest string of content (0 for no hash, 1 for return hash), when ContentsManager not support hash, it will be ignored." + description: "May return hash hexdigest string of content and the hash algorithm (0 for no hash - default, 1 for return hash). It may be ignored by the content manager." type: integer responses: 404: @@ -901,8 +901,6 @@ definitions: - mimetype - format - content - - hash - - hash_algorithm properties: name: type: string @@ -942,10 +940,10 @@ definitions: description: Format of content (one of null, 'text', 'base64', 'json') hash: type: string - description: "The hexdigest hash string of content, if requested (otherwise null)." + description: "[optional] The hexdigest hash string of content, if requested (otherwise null). It cannot be null if hash_algorithm is defined." hash_algorithm: type: string - ddescription: "The algorithm used to produce the hash." + description: "[optional] The algorithm used to produce the hash, if requested (otherwise null). It cannot be null if hash is defined." Checkpoints: description: A checkpoint object. type: object diff --git a/jupyter_server/services/contents/filecheckpoints.py b/jupyter_server/services/contents/filecheckpoints.py index d7c7e77103..f6d1ef44e7 100644 --- a/jupyter_server/services/contents/filecheckpoints.py +++ b/jupyter_server/services/contents/filecheckpoints.py @@ -252,7 +252,7 @@ def get_file_checkpoint(self, checkpoint_id, path): if not os.path.isfile(os_checkpoint_path): self.no_such_checkpoint(path, checkpoint_id) - content, format, _ = self._read_file(os_checkpoint_path, format=None) + content, format = self._read_file(os_checkpoint_path, format=None) return { "type": "file", "content": content, @@ -318,7 +318,7 @@ async def get_file_checkpoint(self, checkpoint_id, path): if not os.path.isfile(os_checkpoint_path): self.no_such_checkpoint(path, checkpoint_id) - content, format, _ = await self._read_file(os_checkpoint_path, format=None) + content, format = await self._read_file(os_checkpoint_path, format=None) return { "type": "file", "content": content, diff --git a/jupyter_server/services/contents/fileio.py b/jupyter_server/services/contents/fileio.py index 99debe8c7a..a404ee91b3 100644 --- a/jupyter_server/services/contents/fileio.py +++ b/jupyter_server/services/contents/fileio.py @@ -3,6 +3,9 @@ """ # Copyright (c) Jupyter Development Team. # Distributed under the terms of the Modified BSD License. + +from __future__ import annotations + import errno import hashlib import os @@ -14,7 +17,7 @@ import nbformat from anyio.to_thread import run_sync from tornado.web import HTTPError -from traitlets import Bool, Unicode +from traitlets import Bool, Enum from traitlets.config import Configurable from traitlets.config.configurable import LoggingConfigurable @@ -193,10 +196,11 @@ class FileManagerMixin(LoggingConfigurable, Configurable): If set to False, the new notebook is written directly on the old one which could fail (eg: full filesystem or quota )""", ) - hash_algorithm = Unicode( - "sha256", + hash_algorithm = Enum( + hashlib.algorithms_available, + default_value="sha256", config=True, - help="hash algorithm to use for file content, support by hashlib", + help="Hash algorithm to use for file content, support by hashlib", ) @contextmanager @@ -270,37 +274,42 @@ def _get_os_path(self, path): raise HTTPError(404, "%s is outside root contents directory" % path) return os_path - def _read_notebook(self, os_path, as_version=4, capture_validation_error=None): + def _read_notebook( + self, os_path, as_version=4, capture_validation_error=None, raw: bool = False + ): """Read a notebook from an os path.""" - with self.open(os_path, "r", encoding="utf-8") as f: - try: - return nbformat.read( - f, - as_version=as_version, - capture_validation_error=capture_validation_error, - ) - except Exception as e: - e_orig = e - - # If use_atomic_writing is enabled, we'll guess that it was also - # enabled when this notebook was written and look for a valid - # atomic intermediate. - tmp_path = path_to_intermediate(os_path) - - if not self.use_atomic_writing or not os.path.exists(tmp_path): - raise HTTPError( - 400, - f"Unreadable Notebook: {os_path} {e_orig!r}", - ) + answer = self._read_file(os_path, "text", raw=raw) + + try: + nb = nbformat.reads( + answer[0], + as_version=as_version, + capture_validation_error=capture_validation_error, + ) + + return (nb, answer[2]) if raw else nb + except Exception as e: + e_orig = e + + # If use_atomic_writing is enabled, we'll guess that it was also + # enabled when this notebook was written and look for a valid + # atomic intermediate. + tmp_path = path_to_intermediate(os_path) - # Move the bad file aside, restore the intermediate, and try again. - invalid_file = path_to_invalid(os_path) - replace_file(os_path, invalid_file) - replace_file(tmp_path, os_path) - return self._read_notebook( - os_path, as_version, capture_validation_error=capture_validation_error + if not self.use_atomic_writing or not os.path.exists(tmp_path): + raise HTTPError( + 400, + f"Unreadable Notebook: {os_path} {e_orig!r}", ) + # Move the bad file aside, restore the intermediate, and try again. + invalid_file = path_to_invalid(os_path) + replace_file(os_path, invalid_file) + replace_file(tmp_path, os_path) + return self._read_notebook( + os_path, as_version, capture_validation_error=capture_validation_error, raw=raw + ) + def _save_notebook(self, os_path, nb, capture_validation_error=None): """Save a notebook to an os_path.""" with self.atomic_writing(os_path, encoding="utf-8") as f: @@ -311,26 +320,46 @@ def _save_notebook(self, os_path, nb, capture_validation_error=None): capture_validation_error=capture_validation_error, ) - def _get_hash_from_file(self, os_path): - _, _, h = self._read_file(os_path, "byte", require_hash=True) - return h + def _get_hash(self, byte_content: bytes) -> dict[str, str]: + """Compute the hash hexdigest for the provided bytes. - def _get_hash_from_content(self, bcontent: bytes): - h = hashlib.new(self.hash_algorithm) - h.update(bcontent) - return h.hexdigest() + The hash algorithm is provided by the `hash_algorithm` attribute. - def _read_file(self, os_path, format, require_hash=False): + Parameters + ---------- + byte_content : bytes + The bytes to hash + + Returns + ------- + A dictionary to be appended to a model {"hash": str, "hash_algorithm": str}. + """ + algorithm = self.hash_algorithm + h = hashlib.new(algorithm) + h.update(byte_content) + return {"hash": h.hexdigest(), "hash_algorithm": algorithm} + + def _read_file( + self, os_path: str, format: str, raw: bool = False + ) -> tuple[str, str] | tuple[str, str, bytes]: """Read a non-notebook file. - os_path: The path to be read. - format: - If 'text', the contents will be decoded as UTF-8. - If 'base64', the raw bytes contents will be encoded as base64. - If 'byte', the raw bytes contents will be returned. - If not specified, try to decode as UTF-8, and fall back to base64 - hash: - If require_hash is 'True', return the hash of the file contents as a hex string. + Parameters + ---------- + os_path: str + The path to be read. + format: str + If 'text', the contents will be decoded as UTF-8. + If 'base64', the raw bytes contents will be encoded as base64. + If 'byte', the raw bytes contents will be returned. + If not specified, try to decode as UTF-8, and fall back to base64 + raw: bool + [Optional] If True, will return as third argument the raw bytes content + + Returns + ------- + (content, format, byte_content) It returns the content in the given format + as well as the raw byte content. """ if not os.path.isfile(os_path): raise HTTPError(400, "Cannot read non-file %s" % os_path) @@ -338,18 +367,22 @@ def _read_file(self, os_path, format, require_hash=False): with self.open(os_path, "rb") as f: bcontent = f.read() - # Calculate hash value if required - hash_value = self._get_hash_from_content(bcontent) if require_hash else None - if format == "byte": # Not for http response but internal use - return bcontent, "byte", hash_value + return (bcontent, "byte", bcontent) if raw else (bcontent, "byte") if format is None or format == "text": # Try to interpret as unicode if format is unknown or if unicode # was explicitly requested. try: - return bcontent.decode("utf8"), "text", hash_value + return ( + (bcontent.decode("utf8"), "text", bcontent) + if raw + else ( + bcontent.decode("utf8"), + "text", + ) + ) except UnicodeError as e: if format == "text": raise HTTPError( @@ -357,7 +390,14 @@ def _read_file(self, os_path, format, require_hash=False): "%s is not UTF-8 encoded" % os_path, reason="bad format", ) from e - return encodebytes(bcontent).decode("ascii"), "base64", hash_value + return ( + (encodebytes(bcontent).decode("ascii"), "base64", bcontent) + if raw + else ( + encodebytes(bcontent).decode("ascii"), + "base64", + ) + ) def _save_file(self, os_path, content, format): """Save content of a generic file.""" @@ -391,40 +431,46 @@ async def _copy(self, src, dest): """ await async_copy2_safe(src, dest, log=self.log) - async def _read_notebook(self, os_path, as_version=4, capture_validation_error=None): + async def _read_notebook( + self, os_path, as_version=4, capture_validation_error=None, raw: bool = False + ): """Read a notebook from an os path.""" - with self.open(os_path, encoding="utf-8") as f: - try: - return await run_sync( - partial( - nbformat.read, - as_version=as_version, - capture_validation_error=capture_validation_error, - ), - f, - ) - except Exception as e: - e_orig = e - - # If use_atomic_writing is enabled, we'll guess that it was also - # enabled when this notebook was written and look for a valid - # atomic intermediate. - tmp_path = path_to_intermediate(os_path) - - if not self.use_atomic_writing or not os.path.exists(tmp_path): - raise HTTPError( - 400, - f"Unreadable Notebook: {os_path} {e_orig!r}", - ) + answer = await self._read_file(os_path, "text", raw) + + try: + nb = await run_sync( + partial( + nbformat.reads, + as_version=as_version, + capture_validation_error=capture_validation_error, + ), + answer[0], + ) + return (nb, answer[2]) if raw else nb + except Exception as e: + e_orig = e - # Move the bad file aside, restore the intermediate, and try again. - invalid_file = path_to_invalid(os_path) - await async_replace_file(os_path, invalid_file) - await async_replace_file(tmp_path, os_path) - return await self._read_notebook( - os_path, as_version, capture_validation_error=capture_validation_error + # If use_atomic_writing is enabled, we'll guess that it was also + # enabled when this notebook was written and look for a valid + # atomic intermediate. + tmp_path = path_to_intermediate(os_path) + + if not self.use_atomic_writing or not os.path.exists(tmp_path): + raise HTTPError( + 400, + f"Unreadable Notebook: {os_path} {e_orig!r}", ) + # Move the bad file aside, restore the intermediate, and try again. + invalid_file = path_to_invalid(os_path) + await async_replace_file(os_path, invalid_file) + await async_replace_file(tmp_path, os_path) + answer = await self._read_notebook( + os_path, as_version, capture_validation_error=capture_validation_error, raw=raw + ) + + return answer + async def _save_notebook(self, os_path, nb, capture_validation_error=None): """Save a notebook to an os_path.""" with self.atomic_writing(os_path, encoding="utf-8") as f: @@ -438,17 +484,27 @@ async def _save_notebook(self, os_path, nb, capture_validation_error=None): f, ) - async def _read_file(self, os_path, format, require_hash=False): + async def _read_file( + self, os_path: str, format: str, raw: bool = False + ) -> tuple[str, str] | tuple[str, str, bytes]: """Read a non-notebook file. - os_path: The path to be read. - format: - If 'text', the contents will be decoded as UTF-8. - If 'base64', the raw bytes contents will be encoded as base64. - If 'byte', the raw bytes contents will be returned. - If not specified, try to decode as UTF-8, and fall back to base64 - hash: - If require_hash is 'True', return the hash of the file contents as a hex string. + Parameters + ---------- + os_path: str + The path to be read. + format: str + If 'text', the contents will be decoded as UTF-8. + If 'base64', the raw bytes contents will be encoded as base64. + If 'byte', the raw bytes contents will be returned. + If not specified, try to decode as UTF-8, and fall back to base64 + raw: bool + [Optional] If True, will return as third argument the raw bytes content + + Returns + ------- + (content, format, byte_content) It returns the content in the given format + as well as the raw byte content. """ if not os.path.isfile(os_path): raise HTTPError(400, "Cannot read non-file %s" % os_path) @@ -456,20 +512,22 @@ async def _read_file(self, os_path, format, require_hash=False): with self.open(os_path, "rb") as f: bcontent = await run_sync(f.read) - if require_hash: - hash_value = await self._get_hash_from_content(bcontent) - else: - hash_value = None - if format == "byte": # Not for http response but internal use - return bcontent, "byte", hash_value + return (bcontent, "byte", bcontent) if raw else (bcontent, "byte") if format is None or format == "text": # Try to interpret as unicode if format is unknown or if unicode # was explicitly requested. try: - return bcontent.decode("utf8"), "text", hash_value + return ( + (bcontent.decode("utf8"), "text", bcontent) + if raw + else ( + bcontent.decode("utf8"), + "text", + ) + ) except UnicodeError as e: if format == "text": raise HTTPError( @@ -477,7 +535,11 @@ async def _read_file(self, os_path, format, require_hash=False): "%s is not UTF-8 encoded" % os_path, reason="bad format", ) from e - return encodebytes(bcontent).decode("ascii"), "base64", hash_value + return ( + (encodebytes(bcontent).decode("ascii"), "base64", bcontent) + if raw + else (encodebytes(bcontent).decode("ascii"), "base64") + ) async def _save_file(self, os_path, content, format): """Save content of a generic file.""" @@ -497,12 +559,3 @@ async def _save_file(self, os_path, content, format): with self.atomic_writing(os_path, text=False) as f: await run_sync(f.write, bcontent) - - async def _get_hash_from_content(self, bcontent: bytes): - h = hashlib.new(self.hash_algorithm) - await run_sync(h.update, bcontent) - return h.hexdigest() - - async def _get_hash_from_file(self, os_path): - _, _, h = await self._read_file(os_path, "byte", require_hash=True) - return h diff --git a/jupyter_server/services/contents/filemanager.py b/jupyter_server/services/contents/filemanager.py index f01d22620a..301de58106 100644 --- a/jupyter_server/services/contents/filemanager.py +++ b/jupyter_server/services/contents/filemanager.py @@ -48,7 +48,6 @@ class FileContentsManager(FileManagerMixin, ContentsManager): root_dir = Unicode(config=True) max_copy_folder_size_mb = Int(500, config=True, help="The max folder size that can be copied") - support_hash = Bool(True, config=False, help="Support require_hash argument in `get`") @default("root_dir") def _default_root_dir(self): @@ -270,7 +269,7 @@ def _base_model(self, path): model["size"] = size model["writable"] = self.is_writable(path) model["hash"] = None - model["hash_algorithm"] = self.hash_algorithm + model["hash_algorithm"] = None return model @@ -356,11 +355,9 @@ def _file_model(self, path, content=True, format=None, require_hash=False): os_path = self._get_os_path(path) model["mimetype"] = mimetypes.guess_type(os_path)[0] - hash_value = None - if content or require_hash: - content, format, hash_value = self._read_file( - os_path, format, require_hash=require_hash - ) + bytes_content = None + if content: + content, format, bytes_content = self._read_file(os_path, format, raw=True) if model["mimetype"] is None: default_mime = { "text": "text/plain", @@ -371,9 +368,13 @@ def _file_model(self, path, content=True, format=None, require_hash=False): model.update( content=content, format=format, - hash=hash_value, ) + if require_hash: + if bytes_content is None: + bytes_content, _ = self._read_file(os_path, "byte") + model.update(**self._get_hash(bytes_content)) + return model def _notebook_model(self, path, content=True, require_hash=False): @@ -388,22 +389,25 @@ def _notebook_model(self, path, content=True, require_hash=False): model["type"] = "notebook" os_path = self._get_os_path(path) + bytes_content = None if content: validation_error: dict[str, t.Any] = {} - nb = self._read_notebook( - os_path, as_version=4, capture_validation_error=validation_error + nb, bytes_content = self._read_notebook( + os_path, as_version=4, capture_validation_error=validation_error, raw=True ) self.mark_trusted_cells(nb, path) model["content"] = nb model["format"] = "json" self.validate_notebook_model(model, validation_error) + if require_hash: - # FIXME: Here we may read file twice while content=True - model["hash"] = self._get_hash_from_file(os_path) + if bytes_content is None: + bytes_content, _ = self._read_file(os_path, "byte") + model.update(**self._get_hash(bytes_content)) return model - def get(self, path, content=True, type=None, format=None, require_hash=None): + def get(self, path, content=True, type=None, format=None, require_hash=False): """Takes a path for an entity and returns its model Parameters @@ -822,10 +826,9 @@ async def _file_model(self, path, content=True, format=None, require_hash=False) os_path = self._get_os_path(path) model["mimetype"] = mimetypes.guess_type(os_path)[0] - if content or require_hash: - content, format, hash_value = await self._read_file( - os_path, format, require_hash=require_hash - ) + bytes_content = None + if content: + content, format, bytes_content = await self._read_file(os_path, format, raw=True) if model["mimetype"] is None: default_mime = { "text": "text/plain", @@ -833,7 +836,15 @@ async def _file_model(self, path, content=True, format=None, require_hash=False) }[format] model["mimetype"] = default_mime - model.update(content=content, format=format, hash=hash_value) + model.update( + content=content, + format=format, + ) + + if require_hash: + if bytes_content is None: + bytes_content, _ = await self._read_file(os_path, "byte") + model.update(**self._get_hash(bytes_content)) return model @@ -847,18 +858,21 @@ async def _notebook_model(self, path, content=True, require_hash=False): model["type"] = "notebook" os_path = self._get_os_path(path) + bytes_content = None if content: validation_error: dict[str, t.Any] = {} - nb = await self._read_notebook( - os_path, as_version=4, capture_validation_error=validation_error + nb, bytes_content = await self._read_notebook( + os_path, as_version=4, capture_validation_error=validation_error, raw=True ) self.mark_trusted_cells(nb, path) model["content"] = nb model["format"] = "json" self.validate_notebook_model(model, validation_error) + if require_hash: - # FIXME: Here we may read file twice while content=True - model["hash"] = await self._get_hash_from_file(os_path) + if bytes_content is None: + bytes_content, _ = await self._read_file(os_path, "byte") + model.update(**(self._get_hash(bytes_content))) return model diff --git a/jupyter_server/services/contents/handlers.py b/jupyter_server/services/contents/handlers.py index cbd71a1b27..a7c7ffff17 100644 --- a/jupyter_server/services/contents/handlers.py +++ b/jupyter_server/services/contents/handlers.py @@ -23,20 +23,20 @@ AUTH_RESOURCE = "contents" -def _validate_in_or_not(expect_in_model: bool, model: Dict[str, Any], maybe_none_keys: List[str]): +def _validate_keys(expect_defined: bool, model: Dict[str, Any], keys: List[str]): """ - Validate that the keys in maybe_none_keys are None or not None + Validate that the keys are defined (i.e. not None) or not (i.e. None) """ - if expect_in_model: - errors = [key for key in maybe_none_keys if model[key] is None] + if expect_defined: + errors = [key for key in keys if model[key] is None] if errors: raise web.HTTPError( 500, f"Keys unexpectedly None: {errors}", ) else: - errors = {key: model[key] for key in maybe_none_keys if model[key] is not None} # type: ignore[assignment] + errors = {key: model[key] for key in keys if model[key] is not None} # type: ignore[assignment] if errors: raise web.HTTPError( 500, @@ -44,7 +44,7 @@ def _validate_in_or_not(expect_in_model: bool, model: Dict[str, Any], maybe_none ) -def validate_model(model, expect_content, expect_hash): +def validate_model(model, expect_content=False, expect_hash=False): """ Validate a model returned by a ContentsManager method. @@ -63,9 +63,9 @@ def validate_model(model, expect_content, expect_hash): "mimetype", "content", "format", - "hash", - "hash_algorithm", } + if expect_hash: + required_keys.update(["hash", "hash_algorithm"]) missing = required_keys - set(model.keys()) if missing: raise web.HTTPError( @@ -74,9 +74,9 @@ def validate_model(model, expect_content, expect_hash): ) content_keys = ["content", "format"] - hash_keys = ["hash"] - _validate_in_or_not(expect_content, model, content_keys) - _validate_in_or_not(expect_hash, model, hash_keys) + _validate_keys(expect_content, model, content_keys) + if expect_hash: + _validate_keys(expect_hash, model, ["hash", "hash_algorithm"]) class ContentsAPIHandler(APIHandler): @@ -139,28 +139,39 @@ async def get(self, path=""): hash_str = self.get_query_argument("hash", default="0") if hash_str not in {"0", "1"}: - raise web.HTTPError(400, "Content %r is invalid" % hash_str) - require_hash = int(hash_str or "") + raise web.HTTPError(400, f"Content {hash_str!r} is invalid") + require_hash = int(hash_str) if not cm.allow_hidden and await ensure_async(cm.is_hidden(path)): await self._finish_error( HTTPStatus.NOT_FOUND, f"file or directory {path!r} does not exist" ) - kwargs = { - "path": path, - "type": type, - "format": format, - "content": content, - } - - # See https://github.com/jupyter-server/jupyter_server/issues/1366 - # try not breaking `ContentManager.get` API while intro `hash` argument to ContentManager - if cm.support_hash: - kwargs["require_hash"] = require_hash try: - model = await ensure_async(self.contents_manager.get(**kwargs)) - validate_model(model, expect_content=content, expect_hash=require_hash) + expect_hash = require_hash + try: + model = await ensure_async( + self.contents_manager.get( + path=path, + type=type, + format=format, + content=content, + require_hash=require_hash, + ) + ) + except TypeError: + # Fallback for ContentsManager not handling the require_hash argument + # introduced in 2.11 + expect_hash = False + model = await ensure_async( + self.contents_manager.get( + path=path, + type=type, + format=format, + content=content, + ) + ) + validate_model(model, expect_content=content, expect_hash=expect_hash) self._finish_model(model, location=False) except web.HTTPError as exc: # 404 is okay in this context, catch exception and return 404 code to prevent stack trace on client @@ -190,7 +201,7 @@ async def patch(self, path=""): raise web.HTTPError(400, f"Cannot rename file or directory {path!r}") model = await ensure_async(cm.update(model, path)) - validate_model(model, expect_content=False, expect_hash=False) + validate_model(model) self._finish_model(model) async def _copy(self, copy_from, copy_to=None): @@ -203,7 +214,7 @@ async def _copy(self, copy_from, copy_to=None): ) model = await ensure_async(self.contents_manager.copy(copy_from, copy_to)) self.set_status(201) - validate_model(model, expect_content=False, expect_hash=False) + validate_model(model) self._finish_model(model) async def _upload(self, model, path): @@ -211,7 +222,7 @@ async def _upload(self, model, path): self.log.info("Uploading file to %s", path) model = await ensure_async(self.contents_manager.new(model, path)) self.set_status(201) - validate_model(model, expect_content=False, expect_hash=False) + validate_model(model) self._finish_model(model) async def _new_untitled(self, path, type="", ext=""): @@ -221,7 +232,7 @@ async def _new_untitled(self, path, type="", ext=""): self.contents_manager.new_untitled(path=path, type=type, ext=ext) ) self.set_status(201) - validate_model(model, expect_content=False, expect_hash=False) + validate_model(model) self._finish_model(model) async def _save(self, model, path): @@ -230,7 +241,7 @@ async def _save(self, model, path): if not chunk or chunk == -1: # Avoid tedious log information self.log.info("Saving file at %s", path) model = await ensure_async(self.contents_manager.save(model, path)) - validate_model(model, expect_content=False, expect_hash=False) + validate_model(model) self._finish_model(model) @web.authenticated diff --git a/jupyter_server/services/contents/manager.py b/jupyter_server/services/contents/manager.py index 1c0455c4c9..b12a2055ec 100644 --- a/jupyter_server/services/contents/manager.py +++ b/jupyter_server/services/contents/manager.py @@ -111,7 +111,6 @@ def _validate_preferred_dir(self, proposal): return value allow_hidden = Bool(False, config=True, help="Allow access to hidden files") - support_hash = Bool(False, config=False, help="Support require_hash argument in `get`") notary = Instance(sign.NotebookNotary) @@ -448,14 +447,15 @@ def exists(self, path): """ return self.file_exists(path) or self.dir_exists(path) - def get(self, path, content=True, type=None, format=None): - """ - Get a file or directory model. + def get(self, path, content=True, type=None, format=None, require_hash=False): + """Get a file or directory model. + + Parameters + ---------- + require_hash : bool + Whether the file hash must be returned or not. - Support for hash: - If a ContentManager supports calculating the hash value of a file, - `ContentManager.support_hash` should be True and this function will accept an `require_hash` parameter, - will return a dict with `hash` and `hash_algorithm` key. + *Changed in version 2.11*: The *require_hash* parameter was added. """ raise NotImplementedError @@ -857,14 +857,15 @@ async def exists(self, path): self.dir_exists(path) ) - async def get(self, path, content=True, type=None, format=None): - """ - Get a file or directory model. + async def get(self, path, content=True, type=None, format=None, require_hash=False): + """Get a file or directory model. + + Parameters + ---------- + require_hash : bool + Whether the file hash must be returned or not. - Support for hash: - If a ContentManager supports calculating the hash value of a file, - `ContentManager.support_hash` should be True and this function will accept an `require_hash` parameter, - will return a dict with `hash` and `hash_algorithm` key. + *Changed in version 2.11*: The *require_hash* parameter was added. """ raise NotImplementedError diff --git a/tests/services/contents/test_api.py b/tests/services/contents/test_api.py index abab48445a..b74ee8f62a 100644 --- a/tests/services/contents/test_api.py +++ b/tests/services/contents/test_api.py @@ -97,9 +97,8 @@ async def test_get_nb_contents(jp_fetch, contents, path, name): assert model["path"] == nbpath assert model["type"] == "notebook" assert "content" in model - assert "hash" in model - assert model["hash"] == None - assert "hash_algorithm" in model + assert model["hash"] is None + assert model["hash_algorithm"] is None assert model["format"] == "json" assert "metadata" in model["content"] assert isinstance(model["content"]["metadata"], dict) @@ -114,9 +113,8 @@ async def test_get_nb_hash(jp_fetch, contents, path, name): assert model["name"] == nbname assert model["path"] == nbpath assert model["type"] == "notebook" - assert "hash" in model assert model["hash"] - assert "hash_algorithm" in model + assert model["hash_algorithm"] assert "metadata" in model["content"] assert isinstance(model["content"]["metadata"], dict) @@ -221,7 +219,7 @@ async def test_get_text_file_hash(jp_fetch, contents, path, name): assert model["path"] == txtpath assert "hash" in model assert model["hash"] - assert "hash_algorithm" in model + assert model["hash_algorithm"] assert model["format"] == "text" assert model["type"] == "file" diff --git a/tests/services/contents/test_fileio.py b/tests/services/contents/test_fileio.py index c31769f13c..12752ee810 100644 --- a/tests/services/contents/test_fileio.py +++ b/tests/services/contents/test_fileio.py @@ -137,16 +137,16 @@ def test_path_to_invalid(tmpdir): @pytest.mark.skipif(os.name == "nt", reason="test fails on Windows") -def test_file_manager_mixin(tmpdir): +def test_file_manager_mixin(tmp_path): mixin = FileManagerMixin() mixin.log = logging.getLogger() - bad_content = tmpdir / "bad_content.ipynb" + bad_content = tmp_path / "bad_content.ipynb" bad_content.write_text("{}", "utf8") # Same as `echo -n {} | sha256sum` - assert ( - mixin._get_hash_from_file(bad_content) - == "44136fa355b3678a1146ad16f7e8649e94fb4fc21fe77e8310c060f61caaff8a" - ) + assert mixin._get_hash(bad_content.read_bytes()) == { + "hash": "44136fa355b3678a1146ad16f7e8649e94fb4fc21fe77e8310c060f61caaff8a", + "hash_algorithm": "sha256", + } with pytest.raises(HTTPError): mixin._read_notebook(bad_content) other = path_to_intermediate(bad_content) @@ -157,10 +157,10 @@ def test_file_manager_mixin(tmpdir): validate(nb) with pytest.raises(HTTPError): - mixin._read_file(tmpdir, "text") + mixin._read_file(tmp_path, "text") with pytest.raises(HTTPError): - mixin._save_file(tmpdir / "foo", "foo", "bar") + mixin._save_file(tmp_path / "foo", "foo", "bar") @pytest.mark.skipif(os.name == "nt", reason="test fails on Windows") @@ -169,18 +169,18 @@ async def test_async_file_manager_mixin(tmpdir): mixin.log = logging.getLogger() bad_content = tmpdir / "bad_content.ipynb" bad_content.write_text("{}", "utf8") - # Same as `echo -n {} | sha256sum` - assert ( - await mixin._get_hash_from_file(bad_content) - == "44136fa355b3678a1146ad16f7e8649e94fb4fc21fe77e8310c060f61caaff8a" - ) with pytest.raises(HTTPError): await mixin._read_notebook(bad_content) other = path_to_intermediate(bad_content) with open(other, "w") as fid: json.dump(new_notebook(), fid) mixin.use_atomic_writing = True - nb = await mixin._read_notebook(bad_content) + nb, bcontent = await mixin._read_notebook(bad_content, raw=True) + # Same as `echo -n {} | sha256sum` + assert mixin._get_hash(bcontent) == { + "hash": "4747f9680816e352a697d0fb69d82334457cdd1e46f053e800859833d3e6003e", + "hash_algorithm": "sha256", + } validate(nb) with pytest.raises(HTTPError): @@ -188,3 +188,30 @@ async def test_async_file_manager_mixin(tmpdir): with pytest.raises(HTTPError): await mixin._save_file(tmpdir / "foo", "foo", "bar") + + +async def test_AsyncFileManagerMixin_read_notebook_no_raw(tmpdir): + mixin = AsyncFileManagerMixin() + mixin.log = logging.getLogger() + bad_content = tmpdir / "bad_content.ipynb" + bad_content.write_text("{}", "utf8") + + other = path_to_intermediate(bad_content) + with open(other, "w") as fid: + json.dump(new_notebook(), fid) + mixin.use_atomic_writing = True + answer = await mixin._read_notebook(bad_content) + + assert not isinstance(answer, tuple) + + +async def test_AsyncFileManagerMixin_read_file_no_raw(tmpdir): + mixin = AsyncFileManagerMixin() + mixin.log = logging.getLogger() + file_path = tmpdir / "bad_content.text" + file_path.write_text("blablabla", "utf8") + + mixin.use_atomic_writing = True + answer = await mixin._read_file(file_path, "text") + + assert len(answer) == 2 diff --git a/tests/services/contents/test_manager.py b/tests/services/contents/test_manager.py index 3c3b480132..e718036b0b 100644 --- a/tests/services/contents/test_manager.py +++ b/tests/services/contents/test_manager.py @@ -573,6 +573,14 @@ async def test_get(jp_contents_manager): nb_with_hash = await ensure_async(cm.get(path, require_hash=True)) assert nb_with_hash["hash"] + assert nb_with_hash["hash_algorithm"] + + # Get the hash without the content + nb_with_hash = await ensure_async(cm.get(path, content=False, require_hash=True)) + assert nb_with_hash["content"] is None + assert nb_with_hash["format"] is None + assert nb_with_hash["hash"] + assert nb_with_hash["hash_algorithm"] # Test in sub-directory sub_dir = "/foo/" @@ -597,13 +605,34 @@ async def test_get(jp_contents_manager): "path": "foo/untitled.txt", "type": "file", "writable": True, + "hash_algorithm": cm.hash_algorithm, + } + # Assert expected model is in file_model + for key, value in expected_model.items(): + assert file_model[key] == value + assert "created" in file_model + assert "last_modified" in file_model + assert file_model["hash"] + + # Get hash without content + file_model = await ensure_async(cm.get(file_model_path, content=False, require_hash=True)) + expected_model = { + "content": None, + "format": None, + "mimetype": "text/plain", + "name": "untitled.txt", + "path": "foo/untitled.txt", + "type": "file", + "writable": True, + "hash_algorithm": cm.hash_algorithm, } + # Assert expected model is in file_model for key, value in expected_model.items(): assert file_model[key] == value assert "created" in file_model assert "last_modified" in file_model - assert "hash" in file_model + assert file_model["hash"] # Create a sub-sub directory to test getting directory contents with a # subdir. diff --git a/tests/services/contents/test_manager_no_hash.py b/tests/services/contents/test_manager_no_hash.py new file mode 100644 index 0000000000..511a8d319b --- /dev/null +++ b/tests/services/contents/test_manager_no_hash.py @@ -0,0 +1,44 @@ +import json + +import pytest + +from jupyter_server.services.contents.filemanager import ( + AsyncFileContentsManager, +) + + +class NoHashFileManager(AsyncFileContentsManager): + """FileManager prior to 2.11 that introduce the ability to request file hash.""" + + def _base_model(self, path): + """Drop new attributes from model.""" + model = super()._base_model(path) + + del model["hash"] + del model["hash_algorithm"] + + return model + + async def get(self, path, content=True, type=None, format=None): + """Get without the new `require_hash` argument""" + model = await super().get(path, content=content, type=type, format=format) + return model + + +@pytest.fixture +def jp_server_config(jp_server_config): + jp_server_config["ServerApp"]["contents_manager_class"] = NoHashFileManager + return jp_server_config + + +async def test_manager_no_hash_support(tmp_path, jp_root_dir, jp_fetch): + # Create some content + path = "dummy.txt" + (jp_root_dir / path).write_text("blablabla", encoding="utf-8") + + response = await jp_fetch("api", "contents", path, method="GET", params=dict(hash="1")) + + model = json.loads(response.body) + + assert "hash" not in model + assert "hash_algorithm" not in model