Skip to content

Commit

Permalink
flesh out abstract and arbitrary contents managers (#1)
Browse files Browse the repository at this point in the history
* set default only when config doesn't specify file_id_manager_class

* flesh out abstract and arbitrary file ID managers

- make root_dir non-configurable
- add get_handlers_by_action() method

* edit docstring

Co-authored-by: David Brochart <david.brochart@gmail.com>

* update help string

Co-authored-by: David Brochart <david.brochart@gmail.com>

* update log

Co-authored-by: David Brochart <david.brochart@gmail.com>

* update log

Co-authored-by: David Brochart <david.brochart@gmail.com>

* Rename AbstractFileIdManager to BaseFileIdManager

Co-authored-by: David Brochart <david.brochart@gmail.com>
  • Loading branch information
dlqqq and davidbrochart authored Oct 25, 2022
1 parent d21d05f commit 37a6b1f
Show file tree
Hide file tree
Showing 4 changed files with 224 additions and 99 deletions.
27 changes: 12 additions & 15 deletions jupyter_server_fileid/extension.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
from traitlets import Type

from jupyter_server_fileid.manager import (
AbstractFileIdManager,
BaseFileIdManager,
ArbitraryFileIdManager,
LocalFileIdManager,
)
Expand All @@ -17,27 +17,32 @@ class FileIdExtension(ExtensionApp):
name = "jupyter_server_fileid"

file_id_manager_class = Type(
klass=AbstractFileIdManager,
klass=BaseFileIdManager,
help="""File ID manager instance to use.
Defaults to:
- LocalFileIdManager if contents manager is a FileContentsManager,
- ArbitraryFileIdManager otherwise.
""",
config=True,
default_value=None,
allow_none=True,
)

def initialize_settings(self):
if self.file_id_manager_class == AbstractFileIdManager:
if self.file_id_manager_class is None:
if isinstance(self.settings["contents_manager"], FileContentsManager):
self.log.debug(
"Contents manager is a FileContentsManager, defaulting to LocalFileIdManager."
self.log.info(
"Contents manager is a FileContentsManager. Defaulting to LocalFileIdManager."
)
self.file_id_manager_class = LocalFileIdManager
else:
self.log.info(
"Contents manager is not a FileContentsManager. Defaulting to ArbitraryFileIdManager."
)
self.file_id_manager_class = ArbitraryFileIdManager

self.log.debug(f"Configured File ID manager: {self.file_id_manager_class.__name__}")
self.log.info(f"Configured File ID manager: {self.file_id_manager_class.__name__}")
file_id_manager = self.file_id_manager_class(
log=self.log, root_dir=self.serverapp.root_dir, config=self.config
)
Expand All @@ -49,15 +54,7 @@ def initialize_settings(self):

def initialize_event_listeners(self):
file_id_manager = self.settings["file_id_manager"]

# define event handlers per contents manager action
handlers_by_action = {
"get": None,
"save": lambda data: file_id_manager.save(data["path"]),
"rename": lambda data: file_id_manager.move(data["source_path"], data["path"]),
"copy": lambda data: file_id_manager.copy(data["source_path"], data["path"]),
"delete": lambda data: file_id_manager.delete(data["path"]),
}
handlers_by_action = file_id_manager.get_handlers_by_action()

async def cm_listener(logger: EventLogger, schema_id: str, data: dict) -> None:
handler = handlers_by_action[data["action"]]
Expand Down
201 changes: 156 additions & 45 deletions jupyter_server_fileid/manager.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,7 @@
import os
import sqlite3
import stat
from abc import ABC, abstractmethod
from typing import Optional
from typing import Any, Callable, Dict, Optional, Union

from jupyter_core.paths import jupyter_data_dir
from traitlets import TraitError, Unicode, validate
Expand Down Expand Up @@ -37,41 +36,167 @@ def wrapped(self, *args, **kwargs):
return decorator


class AbstractFileIdManager(ABC):
@abstractmethod
def get_id(self, path: str) -> str:
...
class BaseFileIdManager(LoggingConfigurable):
"""
Base class for File ID manager implementations. All File ID
managers should inherit from this class.
"""

@abstractmethod
def get_path(self, id: str) -> str:
...
root_dir = Unicode(
help=("The root directory being served by Jupyter server. Must be an absolute path."), config=False
)

db_path = Unicode(
default_value=default_db_path,
help=(
"The path of the DB file used by File ID manager implementations. "
"Defaults to `jupyter_data_dir()/file_id_manager.db`."
),
config=True,
)

@validate("root_dir", "db_path")
def _validate_abspath_traits(self, proposal):
if proposal["value"] is None:
raise TraitError(f"FileIdManager : {proposal['trait'].name} must not be None")
if not os.path.isabs(proposal["value"]):
raise TraitError(
f"FileIdManager : {proposal['trait'].name} must be an absolute path"
)
return proposal["value"]

class ArbitraryFileIdManager(AbstractFileIdManager):
def __init__(self, *args, **kwargs):
pass
super().__init__(*args, **kwargs)

def get_id(self, path: str) -> str:
return path
def index(self, path: str) -> Union[int, str, None]:
raise NotImplementedError("must be implemented by subclass")

def get_id(self, path: str) -> Union[int, str, None]:
raise NotImplementedError("must be implemented by subclass")

def get_path(self, id: Union[int, str]) -> Union[int, str, None]:
raise NotImplementedError("must be implemented by subclass")

def move(self, old_path: str, new_path: str) -> Union[int, str, None]:
raise NotImplementedError("must be implemented by subclass")

def copy(self, from_path: str, to_path: str) -> Union[int, str, None]:
raise NotImplementedError("must be implemented by subclass")

def delete(self, path: str) -> None:
raise NotImplementedError("must be implemented by subclass")

def save(self, path: str) -> None:
raise NotImplementedError("must be implemented by subclass")

def get_handlers_by_action(self) -> Dict[str, Optional[Callable[[Dict[str, Any]], Any]]]:
"""Returns a dictionary whose keys are contents manager event actions
and whose values are callables invoked upon receipt of an event of the
same action. The callable accepts the body of the event as its only
argument. To ignore an event action, set the value to `None`."""
raise NotImplementedError("must be implemented by subclass")


class ArbitraryFileIdManager(BaseFileIdManager):
"""
File ID manager that works on arbitrary filesystems. Each file is assigned a
unique ID. The path is only updated upon calling `move()`, `copy()`, or
`delete()`, e.g. upon receipt of contents manager events emitted by Jupyter
Server 2.
"""

def __init__(self, *args, **kwargs):
# pass args and kwargs to parent Configurable
super().__init__(*args, **kwargs)
# initialize instance attrs
self._update_cursor = False
# initialize connection with db
self.log.info(f"ArbitraryFileIdManager : Configured root dir: {self.root_dir}")
self.log.info(f"ArbitraryFileIdManager : Configured database path: {self.db_path}")
self.con = sqlite3.connect(self.db_path)
self.log.info("ArbitraryFileIdManager : Successfully connected to database file.")
self.log.info("ArbitraryFileIdManager : Creating File ID tables and indices.")
# do not allow reads to block writes. required when using multiple processes
self.con.execute("PRAGMA journal_mode = WAL")
self.con.execute(
"CREATE TABLE IF NOT EXISTS Files("
"id INTEGER PRIMARY KEY AUTOINCREMENT, "
"path TEXT NOT NULL UNIQUE"
")"
)
self.con.execute("CREATE INDEX IF NOT EXISTS ix_Files_path ON Files (path)")
self.con.commit()

def index(self, path: str) -> int:
row = self.con.execute("SELECT id FROM Files WHERE path = ?", (path,)).fetchone()
existing_id = row and row[0]

if existing_id:
return existing_id

# create new record
cursor = self.con.execute("INSERT INTO Files (path) VALUES (?)", (path,))
self.con.commit()
return cursor.lastrowid # type:ignore

def get_id(self, path: str) -> Optional[int]:
row = self.con.execute("SELECT id FROM Files WHERE path = ?", (path,)).fetchone()
return row and row[0]

def get_path(self, id: Union[int, str]) -> Optional[int]:
row = self.con.execute("SELECT path FROM Files WHERE id = ?", (id,)).fetchone()
return row and row[0]

def get_path(self, id: str) -> str:
def move(self, old_path: str, new_path: str) -> None:
row = self.con.execute("SELECT id FROM Files WHERE path = ?", (old_path,)).fetchone()
id = row and row[0]

if id:
self.con.execute("UPDATE Files SET path = ? WHERE path = ?", (new_path, old_path))
else:
cursor = self.con.execute("INSERT INTO Files (path) VALUES (?)", (new_path,))
id = cursor.lastrowid

self.con.commit()
return id

def copy(self, from_path: str, to_path: str) -> Optional[int]:
cursor = self.con.execute("INSERT INTO Files (path) VALUES (?)", (to_path,))
self.con.commit()
return cursor.lastrowid

class LocalFileIdManagerMeta(type(AbstractFileIdManager), type(LoggingConfigurable)): # type: ignore
pass
def delete(self, path: str) -> None:
self.con.execute("DELETE FROM Files WHERE path = ?", (path,))
self.con.commit()

def save(self, path: str) -> None:
return

class LocalFileIdManager(
AbstractFileIdManager, LoggingConfigurable, metaclass=LocalFileIdManagerMeta
):
def get_handlers_by_action(self) -> Dict[str, Optional[Callable[[Dict[str, Any]], Any]]]:
return {
"get": None,
"save": None,
"rename": lambda data: self.move(data["source_path"], data["path"]),
"copy": lambda data: self.copy(data["source_path"], data["path"]),
"delete": lambda data: self.delete(data["path"]),
}

def __del__(self):
"""Cleans up `ArbitraryFileIdManager` by committing any pending
transactions and closing the connection."""
if hasattr(self, "con"):
self.con.commit()
self.con.close()


class LocalFileIdManager(BaseFileIdManager):
"""
Manager that supports tracking files across their lifetime by associating
each with a unique file ID, which is maintained across filesystem operations.
File ID manager that supports tracking files in local filesystems by
associating each with a unique file ID, which is maintained across
filesystem operations.
Notes
-----
All private helper methods prefixed with an underscore (except `__init__()`)
do NOT commit their SQL statements in a transaction via `self.con.commit()`.
This responsibility is delegated to the public method calling them to
Expand All @@ -80,19 +205,6 @@ class LocalFileIdManager(
performed during a method's procedure body.
"""

root_dir = Unicode(
help=("The root being served by Jupyter server. Must be an absolute path."), config=True
)

db_path = Unicode(
default_value=default_db_path,
help=(
"The path of the DB file used by `LocalFileIdManager`. "
"Defaults to `jupyter_data_dir()/file_id_manager.db`."
),
config=True,
)

def __init__(self, *args, **kwargs):
# pass args and kwargs to parent Configurable
super().__init__(*args, **kwargs)
Expand Down Expand Up @@ -124,16 +236,6 @@ def __init__(self, *args, **kwargs):
self.con.execute("CREATE INDEX IF NOT EXISTS ix_Files_is_dir ON Files (is_dir)")
self.con.commit()

@validate("root_dir", "db_path")
def _validate_abspath_traits(self, proposal):
if proposal["value"] is None:
raise TraitError(f"LocalFileIdManager : {proposal['trait'].name} must not be None")
if not os.path.isabs(proposal["value"]):
raise TraitError(
f"LocalFileIdManager : {proposal['trait'].name} must be an absolute path"
)
return self._normalize_path(proposal["value"])

def _index_all(self):
"""Recursively indexes all directories under the server root."""
self._index_dir_recursively(self.root_dir, self._stat(self.root_dir))
Expand Down Expand Up @@ -616,6 +718,15 @@ def save(self, path):
self._update(id, stat_info)
self.con.commit()

def get_handlers_by_action(self) -> Dict[str, Optional[Callable[[Dict[str, Any]], Any]]]:
return {
"get": None,
"save": lambda data: self.save(data["path"]),
"rename": lambda data: self.move(data["source_path"], data["path"]),
"copy": lambda data: self.copy(data["source_path"], data["path"]),
"delete": lambda data: self.delete(data["path"]),
}

def __del__(self):
"""Cleans up `LocalFileIdManager` by committing any pending transactions and
closing the connection."""
Expand Down
13 changes: 12 additions & 1 deletion jupyter_server_fileid/pytest_plugin.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

import pytest

from jupyter_server_fileid.manager import LocalFileIdManager
from jupyter_server_fileid.manager import ArbitraryFileIdManager, LocalFileIdManager


@pytest.fixture
Expand Down Expand Up @@ -40,6 +40,17 @@ def fid_manager(fid_db_path, jp_root_dir):
return fid_manager


@pytest.fixture(params=["local", "arbitrary"])
def any_fid_manager(request, fid_db_path, jp_root_dir):
"""Parametrized fixture that runs the test with each of the default File ID
manager implementations."""
class_by_param = {"local": LocalFileIdManager, "arbitrary": ArbitraryFileIdManager}

fid_manager = class_by_param[request.param](db_path=fid_db_path, root_dir=str(jp_root_dir))
fid_manager.con.execute("PRAGMA journal_mode = OFF") # type: ignore[attr-defined]
return fid_manager


@pytest.fixture
def fs_helpers(jp_root_dir):
class FsHelpers:
Expand Down
Loading

0 comments on commit 37a6b1f

Please sign in to comment.