From 37a6b1fbdfe77553d60d688e663fa9abbc130af3 Mon Sep 17 00:00:00 2001 From: david qiu Date: Tue, 25 Oct 2022 13:28:50 -0700 Subject: [PATCH] flesh out abstract and arbitrary contents managers (#1) * set default only when config doesn't specify file_id_manager_class * flesh out abstract and arbitrary file ID managers - make root_dir non-configurable - add get_handlers_by_action() method * edit docstring Co-authored-by: David Brochart * update help string Co-authored-by: David Brochart * update log Co-authored-by: David Brochart * update log Co-authored-by: David Brochart * Rename AbstractFileIdManager to BaseFileIdManager Co-authored-by: David Brochart --- jupyter_server_fileid/extension.py | 27 ++-- jupyter_server_fileid/manager.py | 201 +++++++++++++++++++------ jupyter_server_fileid/pytest_plugin.py | 13 +- tests/test_manager.py | 82 +++++----- 4 files changed, 224 insertions(+), 99 deletions(-) diff --git a/jupyter_server_fileid/extension.py b/jupyter_server_fileid/extension.py index d13cefd..b130731 100644 --- a/jupyter_server_fileid/extension.py +++ b/jupyter_server_fileid/extension.py @@ -4,7 +4,7 @@ from traitlets import Type from jupyter_server_fileid.manager import ( - AbstractFileIdManager, + BaseFileIdManager, ArbitraryFileIdManager, LocalFileIdManager, ) @@ -17,7 +17,7 @@ class FileIdExtension(ExtensionApp): name = "jupyter_server_fileid" file_id_manager_class = Type( - klass=AbstractFileIdManager, + klass=BaseFileIdManager, help="""File ID manager instance to use. Defaults to: @@ -25,19 +25,24 @@ class FileIdExtension(ExtensionApp): - ArbitraryFileIdManager otherwise. """, config=True, + default_value=None, + allow_none=True, ) def initialize_settings(self): - if self.file_id_manager_class == AbstractFileIdManager: + if self.file_id_manager_class is None: if isinstance(self.settings["contents_manager"], FileContentsManager): - self.log.debug( - "Contents manager is a FileContentsManager, defaulting to LocalFileIdManager." + self.log.info( + "Contents manager is a FileContentsManager. Defaulting to LocalFileIdManager." ) self.file_id_manager_class = LocalFileIdManager else: + self.log.info( + "Contents manager is not a FileContentsManager. Defaulting to ArbitraryFileIdManager." + ) self.file_id_manager_class = ArbitraryFileIdManager - self.log.debug(f"Configured File ID manager: {self.file_id_manager_class.__name__}") + self.log.info(f"Configured File ID manager: {self.file_id_manager_class.__name__}") file_id_manager = self.file_id_manager_class( log=self.log, root_dir=self.serverapp.root_dir, config=self.config ) @@ -49,15 +54,7 @@ def initialize_settings(self): def initialize_event_listeners(self): file_id_manager = self.settings["file_id_manager"] - - # define event handlers per contents manager action - handlers_by_action = { - "get": None, - "save": lambda data: file_id_manager.save(data["path"]), - "rename": lambda data: file_id_manager.move(data["source_path"], data["path"]), - "copy": lambda data: file_id_manager.copy(data["source_path"], data["path"]), - "delete": lambda data: file_id_manager.delete(data["path"]), - } + handlers_by_action = file_id_manager.get_handlers_by_action() async def cm_listener(logger: EventLogger, schema_id: str, data: dict) -> None: handler = handlers_by_action[data["action"]] diff --git a/jupyter_server_fileid/manager.py b/jupyter_server_fileid/manager.py index 6e6612d..39a1671 100644 --- a/jupyter_server_fileid/manager.py +++ b/jupyter_server_fileid/manager.py @@ -1,8 +1,7 @@ import os import sqlite3 import stat -from abc import ABC, abstractmethod -from typing import Optional +from typing import Any, Callable, Dict, Optional, Union from jupyter_core.paths import jupyter_data_dir from traitlets import TraitError, Unicode, validate @@ -37,41 +36,167 @@ def wrapped(self, *args, **kwargs): return decorator -class AbstractFileIdManager(ABC): - @abstractmethod - def get_id(self, path: str) -> str: - ... +class BaseFileIdManager(LoggingConfigurable): + """ + Base class for File ID manager implementations. All File ID + managers should inherit from this class. + """ - @abstractmethod - def get_path(self, id: str) -> str: - ... + root_dir = Unicode( + help=("The root directory being served by Jupyter server. Must be an absolute path."), config=False + ) + db_path = Unicode( + default_value=default_db_path, + help=( + "The path of the DB file used by File ID manager implementations. " + "Defaults to `jupyter_data_dir()/file_id_manager.db`." + ), + config=True, + ) + + @validate("root_dir", "db_path") + def _validate_abspath_traits(self, proposal): + if proposal["value"] is None: + raise TraitError(f"FileIdManager : {proposal['trait'].name} must not be None") + if not os.path.isabs(proposal["value"]): + raise TraitError( + f"FileIdManager : {proposal['trait'].name} must be an absolute path" + ) + return proposal["value"] -class ArbitraryFileIdManager(AbstractFileIdManager): def __init__(self, *args, **kwargs): - pass + super().__init__(*args, **kwargs) - def get_id(self, path: str) -> str: - return path + def index(self, path: str) -> Union[int, str, None]: + raise NotImplementedError("must be implemented by subclass") + + def get_id(self, path: str) -> Union[int, str, None]: + raise NotImplementedError("must be implemented by subclass") + + def get_path(self, id: Union[int, str]) -> Union[int, str, None]: + raise NotImplementedError("must be implemented by subclass") + + def move(self, old_path: str, new_path: str) -> Union[int, str, None]: + raise NotImplementedError("must be implemented by subclass") + + def copy(self, from_path: str, to_path: str) -> Union[int, str, None]: + raise NotImplementedError("must be implemented by subclass") + + def delete(self, path: str) -> None: + raise NotImplementedError("must be implemented by subclass") + + def save(self, path: str) -> None: + raise NotImplementedError("must be implemented by subclass") + + def get_handlers_by_action(self) -> Dict[str, Optional[Callable[[Dict[str, Any]], Any]]]: + """Returns a dictionary whose keys are contents manager event actions + and whose values are callables invoked upon receipt of an event of the + same action. The callable accepts the body of the event as its only + argument. To ignore an event action, set the value to `None`.""" + raise NotImplementedError("must be implemented by subclass") + + +class ArbitraryFileIdManager(BaseFileIdManager): + """ + File ID manager that works on arbitrary filesystems. Each file is assigned a + unique ID. The path is only updated upon calling `move()`, `copy()`, or + `delete()`, e.g. upon receipt of contents manager events emitted by Jupyter + Server 2. + """ + + def __init__(self, *args, **kwargs): + # pass args and kwargs to parent Configurable + super().__init__(*args, **kwargs) + # initialize instance attrs + self._update_cursor = False + # initialize connection with db + self.log.info(f"ArbitraryFileIdManager : Configured root dir: {self.root_dir}") + self.log.info(f"ArbitraryFileIdManager : Configured database path: {self.db_path}") + self.con = sqlite3.connect(self.db_path) + self.log.info("ArbitraryFileIdManager : Successfully connected to database file.") + self.log.info("ArbitraryFileIdManager : Creating File ID tables and indices.") + # do not allow reads to block writes. required when using multiple processes + self.con.execute("PRAGMA journal_mode = WAL") + self.con.execute( + "CREATE TABLE IF NOT EXISTS Files(" + "id INTEGER PRIMARY KEY AUTOINCREMENT, " + "path TEXT NOT NULL UNIQUE" + ")" + ) + self.con.execute("CREATE INDEX IF NOT EXISTS ix_Files_path ON Files (path)") + self.con.commit() + + def index(self, path: str) -> int: + row = self.con.execute("SELECT id FROM Files WHERE path = ?", (path,)).fetchone() + existing_id = row and row[0] + + if existing_id: + return existing_id + + # create new record + cursor = self.con.execute("INSERT INTO Files (path) VALUES (?)", (path,)) + self.con.commit() + return cursor.lastrowid # type:ignore + + def get_id(self, path: str) -> Optional[int]: + row = self.con.execute("SELECT id FROM Files WHERE path = ?", (path,)).fetchone() + return row and row[0] + + def get_path(self, id: Union[int, str]) -> Optional[int]: + row = self.con.execute("SELECT path FROM Files WHERE id = ?", (id,)).fetchone() + return row and row[0] - def get_path(self, id: str) -> str: + def move(self, old_path: str, new_path: str) -> None: + row = self.con.execute("SELECT id FROM Files WHERE path = ?", (old_path,)).fetchone() + id = row and row[0] + + if id: + self.con.execute("UPDATE Files SET path = ? WHERE path = ?", (new_path, old_path)) + else: + cursor = self.con.execute("INSERT INTO Files (path) VALUES (?)", (new_path,)) + id = cursor.lastrowid + + self.con.commit() return id + def copy(self, from_path: str, to_path: str) -> Optional[int]: + cursor = self.con.execute("INSERT INTO Files (path) VALUES (?)", (to_path,)) + self.con.commit() + return cursor.lastrowid -class LocalFileIdManagerMeta(type(AbstractFileIdManager), type(LoggingConfigurable)): # type: ignore - pass + def delete(self, path: str) -> None: + self.con.execute("DELETE FROM Files WHERE path = ?", (path,)) + self.con.commit() + def save(self, path: str) -> None: + return -class LocalFileIdManager( - AbstractFileIdManager, LoggingConfigurable, metaclass=LocalFileIdManagerMeta -): + def get_handlers_by_action(self) -> Dict[str, Optional[Callable[[Dict[str, Any]], Any]]]: + return { + "get": None, + "save": None, + "rename": lambda data: self.move(data["source_path"], data["path"]), + "copy": lambda data: self.copy(data["source_path"], data["path"]), + "delete": lambda data: self.delete(data["path"]), + } + + def __del__(self): + """Cleans up `ArbitraryFileIdManager` by committing any pending + transactions and closing the connection.""" + if hasattr(self, "con"): + self.con.commit() + self.con.close() + + +class LocalFileIdManager(BaseFileIdManager): """ - Manager that supports tracking files across their lifetime by associating - each with a unique file ID, which is maintained across filesystem operations. + File ID manager that supports tracking files in local filesystems by + associating each with a unique file ID, which is maintained across + filesystem operations. Notes ----- - All private helper methods prefixed with an underscore (except `__init__()`) do NOT commit their SQL statements in a transaction via `self.con.commit()`. This responsibility is delegated to the public method calling them to @@ -80,19 +205,6 @@ class LocalFileIdManager( performed during a method's procedure body. """ - root_dir = Unicode( - help=("The root being served by Jupyter server. Must be an absolute path."), config=True - ) - - db_path = Unicode( - default_value=default_db_path, - help=( - "The path of the DB file used by `LocalFileIdManager`. " - "Defaults to `jupyter_data_dir()/file_id_manager.db`." - ), - config=True, - ) - def __init__(self, *args, **kwargs): # pass args and kwargs to parent Configurable super().__init__(*args, **kwargs) @@ -124,16 +236,6 @@ def __init__(self, *args, **kwargs): self.con.execute("CREATE INDEX IF NOT EXISTS ix_Files_is_dir ON Files (is_dir)") self.con.commit() - @validate("root_dir", "db_path") - def _validate_abspath_traits(self, proposal): - if proposal["value"] is None: - raise TraitError(f"LocalFileIdManager : {proposal['trait'].name} must not be None") - if not os.path.isabs(proposal["value"]): - raise TraitError( - f"LocalFileIdManager : {proposal['trait'].name} must be an absolute path" - ) - return self._normalize_path(proposal["value"]) - def _index_all(self): """Recursively indexes all directories under the server root.""" self._index_dir_recursively(self.root_dir, self._stat(self.root_dir)) @@ -616,6 +718,15 @@ def save(self, path): self._update(id, stat_info) self.con.commit() + def get_handlers_by_action(self) -> Dict[str, Optional[Callable[[Dict[str, Any]], Any]]]: + return { + "get": None, + "save": lambda data: self.save(data["path"]), + "rename": lambda data: self.move(data["source_path"], data["path"]), + "copy": lambda data: self.copy(data["source_path"], data["path"]), + "delete": lambda data: self.delete(data["path"]), + } + def __del__(self): """Cleans up `LocalFileIdManager` by committing any pending transactions and closing the connection.""" diff --git a/jupyter_server_fileid/pytest_plugin.py b/jupyter_server_fileid/pytest_plugin.py index 3a8c97a..7a3492a 100644 --- a/jupyter_server_fileid/pytest_plugin.py +++ b/jupyter_server_fileid/pytest_plugin.py @@ -4,7 +4,7 @@ import pytest -from jupyter_server_fileid.manager import LocalFileIdManager +from jupyter_server_fileid.manager import ArbitraryFileIdManager, LocalFileIdManager @pytest.fixture @@ -40,6 +40,17 @@ def fid_manager(fid_db_path, jp_root_dir): return fid_manager +@pytest.fixture(params=["local", "arbitrary"]) +def any_fid_manager(request, fid_db_path, jp_root_dir): + """Parametrized fixture that runs the test with each of the default File ID + manager implementations.""" + class_by_param = {"local": LocalFileIdManager, "arbitrary": ArbitraryFileIdManager} + + fid_manager = class_by_param[request.param](db_path=fid_db_path, root_dir=str(jp_root_dir)) + fid_manager.con.execute("PRAGMA journal_mode = OFF") # type: ignore[attr-defined] + return fid_manager + + @pytest.fixture def fs_helpers(jp_root_dir): class FsHelpers: diff --git a/tests/test_manager.py b/tests/test_manager.py index c0114aa..9ec55e6 100644 --- a/tests/test_manager.py +++ b/tests/test_manager.py @@ -3,7 +3,7 @@ import pytest from traitlets import TraitError -from jupyter_server_fileid.manager import LocalFileIdManager +from jupyter_server_fileid.manager import ArbitraryFileIdManager, LocalFileIdManager @pytest.fixture @@ -79,21 +79,27 @@ def get_path_nosync(fid_manager, id): def test_validates_root_dir(fid_db_path): with pytest.raises(TraitError, match="must be an absolute path"): LocalFileIdManager(root_dir=os.path.join("some", "rel", "path"), db_path=fid_db_path) + with pytest.raises(TraitError, match="must be an absolute path"): + ArbitraryFileIdManager(root_dir=os.path.join("some", "rel", "path"), db_path=fid_db_path) def test_validates_db_path(jp_root_dir): with pytest.raises(TraitError, match="must be an absolute path"): LocalFileIdManager(root_dir=str(jp_root_dir), db_path=os.path.join("some", "rel", "path")) + with pytest.raises(TraitError, match="must be an absolute path"): + ArbitraryFileIdManager( + root_dir=str(jp_root_dir), db_path=os.path.join("some", "rel", "path") + ) -def test_index(fid_manager, test_path): - id = fid_manager.index(test_path) +def test_index(any_fid_manager, test_path): + id = any_fid_manager.index(test_path) assert id is not None -def test_index_already_indexed(fid_manager, test_path): - id = fid_manager.index(test_path) - assert id == fid_manager.index(test_path) +def test_index_already_indexed(any_fid_manager, test_path): + id = any_fid_manager.index(test_path) + assert id == any_fid_manager.index(test_path) def test_index_symlink(fid_manager, test_path): @@ -169,11 +175,11 @@ def test_index_crtime(fid_manager, test_path, stub_stat_crtime): assert fid_manager.index(test_path) == id -def test_getters_indexed(fid_manager, test_path): - id = fid_manager.index(test_path) +def test_getters_indexed(any_fid_manager, test_path): + id = any_fid_manager.index(test_path) - assert fid_manager.get_id(test_path) == id - assert fid_manager.get_path(id) == test_path + assert any_fid_manager.get_id(test_path) == id + assert any_fid_manager.get_path(id) == test_path def test_getters_nonnormalized(fid_manager, test_path, fs_helpers): @@ -197,8 +203,8 @@ def test_getters_oob_delete(fid_manager, test_path, fs_helpers): assert fid_manager.get_path(id) is None -def test_get_id_unindexed(fid_manager, test_path_child): - assert fid_manager.get_id(test_path_child) is None +def test_get_id_unindexed(any_fid_manager, test_path_child): + assert any_fid_manager.get_id(test_path_child) is None # test out-of-band move detection for FIM.get_id() @@ -313,36 +319,36 @@ def test_get_path_oob_move_deeply_nested( assert fid_manager.get_path(id) == new_test_path -def test_move_unindexed(fid_manager, old_path, new_path, fs_helpers): +def test_move_unindexed(any_fid_manager, old_path, new_path, fs_helpers): fs_helpers.move(old_path, new_path) - id = fid_manager.move(old_path, new_path) + id = any_fid_manager.move(old_path, new_path) assert id is not None - assert fid_manager.get_id(old_path) is None - assert fid_manager.get_id(new_path) is id - assert fid_manager.get_path(id) == new_path + assert any_fid_manager.get_id(old_path) is None + assert any_fid_manager.get_id(new_path) is id + assert any_fid_manager.get_path(id) == new_path -def test_move_indexed(fid_manager, old_path, new_path, fs_helpers): - old_id = fid_manager.index(old_path) +def test_move_indexed(any_fid_manager, old_path, new_path, fs_helpers): + old_id = any_fid_manager.index(old_path) fs_helpers.move(old_path, new_path) - new_id = fid_manager.move(old_path, new_path) + new_id = any_fid_manager.move(old_path, new_path) assert old_id == new_id - assert fid_manager.get_id(old_path) is None - assert fid_manager.get_id(new_path) == new_id - assert fid_manager.get_path(old_id) == new_path + assert any_fid_manager.get_id(old_path) is None + assert any_fid_manager.get_id(new_path) == new_id + assert any_fid_manager.get_path(old_id) == new_path # test for disjoint move handling # disjoint move: any out-of-band move that does not preserve stat info -def test_disjoint_move_indexed(fid_manager, old_path, new_path, fs_helpers): - old_id = fid_manager.index(old_path) +def test_disjoint_move_indexed(any_fid_manager, old_path, new_path, fs_helpers): + old_id = any_fid_manager.index(old_path) fs_helpers.delete(old_path) fs_helpers.touch(new_path, dir=True) - new_id = fid_manager.move(old_path, new_path) + new_id = any_fid_manager.move(old_path, new_path) assert old_id == new_id @@ -371,10 +377,10 @@ def test_move_recursive( assert get_id_nosync(fid_manager, new_path_grandchild) == grandchild_id -def test_copy(fid_manager, old_path, new_path, fs_helpers): +def test_copy(any_fid_manager, old_path, new_path, fs_helpers): + old_id = any_fid_manager.index(old_path) fs_helpers.copy(old_path, new_path) - new_id = fid_manager.copy(old_path, new_path) - old_id = fid_manager.get_id(old_path) + new_id = any_fid_manager.copy(old_path, new_path) assert old_id is not None assert new_id is not None @@ -403,14 +409,14 @@ def test_copy_recursive( assert fid_manager.get_id(new_path_grandchild) is not None -def test_delete(fid_manager, test_path, fs_helpers): - id = fid_manager.index(test_path) +def test_delete(any_fid_manager, test_path, fs_helpers): + id = any_fid_manager.index(test_path) fs_helpers.delete(test_path) - fid_manager.delete(test_path) + any_fid_manager.delete(test_path) - assert fid_manager.get_id(test_path) is None - assert fid_manager.get_path(id) is None + assert any_fid_manager.get_id(test_path) is None + assert any_fid_manager.get_path(id) is None def test_delete_recursive(fid_manager, test_path, test_path_child, fs_helpers): @@ -423,10 +429,10 @@ def test_delete_recursive(fid_manager, test_path, test_path_child, fs_helpers): assert fid_manager.get_id(test_path_child) is None -def test_save(fid_manager, test_path, fs_helpers): - id = fid_manager.index(test_path) +def test_save(any_fid_manager, test_path, fs_helpers): + id = any_fid_manager.index(test_path) fs_helpers.edit(test_path) - fid_manager.save(test_path) + any_fid_manager.save(test_path) - assert fid_manager.get_id(test_path) == id + assert any_fid_manager.get_id(test_path) == id