diff --git a/.gitignore b/.gitignore index 36aac677..9450daf1 100644 --- a/.gitignore +++ b/.gitignore @@ -137,7 +137,7 @@ develop-eggs/ downloads/ eggs/ .eggs/ -lib/ +/lib/ lib64/ parts/ sdist/ diff --git a/MANIFEST.in b/MANIFEST.in index 1181ca8f..dcc520df 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -1 +1,2 @@ include hydroshare_jupyter_sync/assets/* +include hydroshare_jupyter_sync/labextension/* diff --git a/hydroshare_jupyter_sync/__init__.py b/hydroshare_jupyter_sync/__init__.py index 7c99b763..9b59c674 100644 --- a/hydroshare_jupyter_sync/__init__.py +++ b/hydroshare_jupyter_sync/__init__.py @@ -1,47 +1,58 @@ -''' -This file sets up the jupyter server extension to launch our backend -server when jupyter is launched. +""" +Setup jupyterlab server and lab extensions. Add tornado HTTP handlers to jupyter session. +""" +import json +from pathlib import Path -Author: 2019-20 CUAHSI Olin SCOPE Team -Vicky McDermott, Kyle Combes, Emily Lepert, and Charlie Weiss -''' -# !/usr/bin/python -# -*- coding: utf-8 +# local imports +from .config_setup import ConfigFile +from .handlers import get_route_handlers -import logging -from hydroshare_jupyter_sync.config_reader_writer import get_config_values -from hydroshare_jupyter_sync.index_html import (set_backend_url, - set_frontend_url) -from .server import get_route_handlers -from notebook.utils import url_path_join +# Constants +FRONTEND_PATH = "/sync" +BACKEND_PATH = "/syncApi" + +MODULE_NAME = "hydroshare_jupyter_sync" +EXTENSION_DIRNAME = "labextension" + +PARENT_DIR = Path(__file__).parent.resolve() +EXTENSION_METADATA_PATH = PARENT_DIR / f"{EXTENSION_DIRNAME}/package.json" + +# read metadata from js extension package metadata file, `package.json` +extension_metadata = json.loads(EXTENSION_METADATA_PATH.read_text()) + + +def _jupyter_labextension_paths(): + return [{"src": EXTENSION_DIRNAME, "dest": extension_metadata["name"]}] def _jupyter_server_extension_paths(): - """Creates the path to load the jupyter server extension. - """ - return [{ - "module": "hydroshare_jupyter_sync" - }] + return [{"module": MODULE_NAME}] -def load_jupyter_server_extension(nb_server_app): - """Sets up logging to a specific file, sets frontend & backend urls, - and loads up the server extension. +def _load_jupyter_server_extension(server_app): + """Registers the API handler to receive HTTP requests from the frontend extension. + + Parameters + ---------- + server_app: jupyterlab.labapp.LabApp + JupyterLab application instance """ - nb_server_app.log.info("Successfully loaded hydroshare_jupyter_sync server " - "extension.") - - config = get_config_values(['logPath']) - log_file_path = None - if config: - log_file_path = config.get('logPath') - logging.basicConfig(level=logging.DEBUG, filename=log_file_path) - - web_app = nb_server_app.web_app - - frontend_base_url = url_path_join(web_app.settings['base_url'], 'sync') - backend_base_url = url_path_join(web_app.settings['base_url'], 'syncApi') - set_backend_url(backend_base_url) - set_frontend_url(frontend_base_url) - handlers = get_route_handlers(frontend_base_url, backend_base_url) - web_app.add_handlers('.*$', handlers) + handlers = get_route_handlers(FRONTEND_PATH, BACKEND_PATH) + + # `cookie_secret` inherited from `server_app` + server_app.web_app.add_handlers(".*$", handlers) + server_app.log.info(f"Registered {MODULE_NAME} extension") + + # parse config file. if env variables present, they take precedence. + # looks for config in following order: + # 1. "~/.config/hydroshare_jupyter_sync/config" + # 2. "~/.hydroshare_jupyter_sync_config" + config = ConfigFile() + + # pass config file settings to Tornado Application (web app) + server_app.web_app.settings.update(config.dict()) + + +# For backward compatibility with the classical notebook +load_jupyter_server_extension = _load_jupyter_server_extension diff --git a/hydroshare_jupyter_sync/__main__.py b/hydroshare_jupyter_sync/__main__.py new file mode 100644 index 00000000..51fdad94 --- /dev/null +++ b/hydroshare_jupyter_sync/__main__.py @@ -0,0 +1,72 @@ +import argparse +import logging +import signal +import sys + +from tornado import options +from tornado import ioloop +from tornado.web import Application +from pathlib import Path +from .handlers import get_route_handlers +from .cli import parse +from .config_setup import ConfigFile + + +class TestApp(Application): + """Class for setting up the server & making sure it can exit cleanly""" + + is_closing = False + + def signal_handler(self, signum, frame): + logging.info("Shutting down") + self.is_closing = True + + def try_exit(self): + if self.is_closing: + ioloop.IOLoop.instance().stop() + logging.info("Exit successful") + + +def get_test_app(**settings) -> Application: + """Thin wrapper returning web.Application instance. Written in this way for use in + unit tests. + """ + return TestApp( + get_route_handlers("/", "/syncApi"), + cookie_secret="__TODO:_GENERATE_YOUR_OWN_RANDOM_VALUE_HERE__", + login_url="/syncApi/login", + template_path=Path(__file__).resolve().parent / "templates", + **settings, + ) + + +def main(parser: argparse.Namespace): + # clear argv. options parse command line somehow sets up logging + # tornados logging setup is pretty broken. Do not want to pass any command line args + # here + sys.argv = [] + options.parse_command_line() + debug_enabled = not parser.no_debug + # TODO: write logs to file in config.log + + # parse config file + config = ( + ConfigFile() if parser.config is None else ConfigFile(_env_file=parser.config) + ) + + app = get_test_app( + default_hostname=parser.hostname, debug=debug_enabled, **config.dict() + ) + + logging.info(f"Server starting on {parser.hostname}:{parser.port}") + logging.info(f"Debugging mode {'enabled' if debug_enabled else 'disabled'}") + + signal.signal(signal.SIGINT, app.signal_handler) + app.listen(parser.port) + ioloop.PeriodicCallback(app.try_exit, 100).start() + ioloop.IOLoop.instance().start() + + +if __name__ == "__main__": + parser = parse() + main(parser) diff --git a/hydroshare_jupyter_sync/cli.py b/hydroshare_jupyter_sync/cli.py new file mode 100644 index 00000000..c9fb0e0e --- /dev/null +++ b/hydroshare_jupyter_sync/cli.py @@ -0,0 +1,73 @@ +import argparse +from pathlib import Path +from typing import Union +from .utilities.pathlib_utils import expand_and_resolve + + +def parse() -> Union[argparse.Namespace, None]: + parser = argparse.ArgumentParser( + prog="hydroshare_jupyter_sync", + description=( + """HydroShare Jupyter Sync:\n\t + A Jupyter server extension enabling management of HydroShare resource + files within Jupyter. Open HydroShare resources, work on their files, + and then sync those changes back to HydroShare using a drag-and-drop + interface. + + Note: + Debug mode is enabled by default when starting the server via this CLI. + """ + ), + formatter_class=argparse.ArgumentDefaultsHelpFormatter, # This adds defaults to help page + ) + + parser.add_argument( + "-p", + "--port", + type=int, + nargs="?", + help="Port number to listen on", + default=8080, + ) + + parser.add_argument( + "-n", + "--hostname", + type=str, + nargs="?", + help="HTTP Server hostname", + default="127.0.0.1", # localhost + ) + + parser.add_argument( + "-d", + "--no-debug", + action="store_true", + default=False, + help="Disable debugging mode", + ) + + parser.add_argument( + "-c", + "--config", + nargs="?", + type=absolute_file_path, + help="Path to configuration file. By default read from ~/.config/hydroshare_jupyter_sync/config then ~/.hydroshare_jupyter_sync_config if either exist.", + required=False, + ) + + return parser.parse_args() + + +def is_file_and_exists(f: Union[str, Path]) -> bool: + """Expand and resolve path and return if it is a file.""" + f = expand_and_resolve(f) + return f.is_file() and f.exists() + + +def absolute_file_path(f: Union[str, Path]) -> str: + """Return absolute path to file, if exists.""" + f = expand_and_resolve(f) + if is_file_and_exists(f): + return str(f) + raise FileNotFoundError(f"File: {f}, does not exist.") diff --git a/hydroshare_jupyter_sync/config_setup.py b/hydroshare_jupyter_sync/config_setup.py new file mode 100644 index 00000000..5bbf5f04 --- /dev/null +++ b/hydroshare_jupyter_sync/config_setup.py @@ -0,0 +1,40 @@ +from pydantic import BaseSettings, Field, root_validator +from pathlib import Path +from typing import Union +from .utilities.pathlib_utils import first_existing_file, expand_and_resolve + +_DEFAULT_CONFIG_FILE_LOCATIONS = ( + "~/.config/hydroshare_jupyter_sync/config", + "~/.hydroshare_jupyter_sync_config", +) + +_DEFAULT_DATA_PATH = expand_and_resolve("~/hydroshare") +_DEFAULT_LOG_PATH = expand_and_resolve("~/hydroshare/logs") + + +class FileNotDirectoryError(Exception): + def __init__(self, message: str) -> None: + super().__init__(message) + + +class ConfigFile(BaseSettings): + # case-insensitive alias values DATA and LOG + data_path: Path = Field(_DEFAULT_DATA_PATH, env="data") + log_path: Path = Field(_DEFAULT_LOG_PATH, env="log") + + class Config: + env_file: Union[str, None] = first_existing_file(_DEFAULT_CONFIG_FILE_LOCATIONS) + env_file_encoding = "utf-8" + + @root_validator + def create_paths_if_do_not_exist(cls, values: dict): + for key, path in values.items(): + path = expand_and_resolve(path) + if path.is_file(): + raise FileNotDirectoryError( + f"Configuration setting: {key}={str(path)} is a file not a directory." + ) + elif not path.exists(): + path.mkdir() + + return values diff --git a/hydroshare_jupyter_sync/fs_event_handler.py b/hydroshare_jupyter_sync/fs_event_handler.py new file mode 100644 index 00000000..67b0389f --- /dev/null +++ b/hydroshare_jupyter_sync/fs_event_handler.py @@ -0,0 +1,93 @@ +from watchdog.events import ( + FileSystemEventHandler, + PatternMatchingEventHandler, + FileCreatedEvent, + FileModifiedEvent, + FileDeletedEvent, + FileMovedEvent, + FileClosedEvent, +) + +from .fs_events import Events +from .lib.filesystem.fs_resource_map import LocalFSResourceMap +from .lib.events.event_broker import EventBroker + +from functools import wraps +import logging + +# module level log +logger = logging.getLogger(__name__) + + +def log_event(fn): + @wraps(fn) + def wrapper(self, event): + logger.debug(event) + return fn(self, event) + + return wrapper + + +def fs_event_handler_factory(event_broker: EventBroker) -> FileSystemEventHandler: + """Wrap FSEventHandler in event_broker context. There should be only one resource per + FSEventHandler instance.""" + + class FSEventHandler(PatternMatchingEventHandler): + def __init__(self, local_fs_map: LocalFSResourceMap): + # TODO: use pattern kwarg to ignore certain files/file extensions. it would be nice if + # this were a configurable. + super().__init__(ignore_directories=True) + + # dependency inject local filesystem map + self._res_map = local_fs_map + + @log_event + def on_any_event(self, event): + # log all events + ... + + def on_created(self, event: FileCreatedEvent) -> None: + # add file to local fs map + self._res_map.add_file(event.src_path) + + # dispatch new state + event_broker.dispatch(Events.STATUS, self.resource_id) + + def on_modified(self, event: FileModifiedEvent) -> None: + # update file in local fs map + self._res_map.update_file(event.src_path) + + # dispatch new state + event_broker.dispatch(Events.STATUS, self.resource_id) + + def on_deleted(self, event: FileDeletedEvent) -> None: + # remove file from local fs map + self._res_map.delete_file(event.src_path) + + # dispatch new state + event_broker.dispatch(Events.STATUS, self.resource_id) + + def on_moved(self, event: FileMovedEvent) -> None: + # update/add file in local fs map, remove file from local fs map + self._res_map.delete_file(event.src_path) + + # NOTE: change in the future. Right now, this covers all cases. + self._res_map.add_file(event.dest_path) + self._res_map.update_file(event.dest_path) + + # dispatch new state + event_broker.dispatch(Events.STATUS, self.resource_id) + + def on_closed(self, event: FileClosedEvent) -> None: + # update file in local fs map + self._res_map.update_file(event.src_path) + + # dispatch new state + event_broker.dispatch(Events.STATUS, self.resource_id) + + # properties + @property + def resource_id(self) -> str: + return self._res_map.resource_id + + return FSEventHandler diff --git a/hydroshare_jupyter_sync/fs_events.py b/hydroshare_jupyter_sync/fs_events.py new file mode 100644 index 00000000..33d2cb17 --- /dev/null +++ b/hydroshare_jupyter_sync/fs_events.py @@ -0,0 +1,21 @@ +from enum import Enum, auto + +# type hint imports +from typing import Callable, Union +from pathlib import Path +from hsclient import HydroShare + +# local imports +from .lib.filesystem.types import ResourceId + +NOOP = lambda _: ... + + +class Events(Enum): + LOGIN_SUCCESSFUL = auto() # Callable[[Union[Path, str], HydroShare], None] + STATUS = auto() # Callable[[ResourceId], None] + RESOURCE_DOWNLOADED = auto() # Callable[[ResourceId], None] + RESOURCE_ENTITY_DOWNLOADED = auto() # Callable[[ResourceId], None] + RESOURCE_ENTITY_UPLOADED = auto() # Callable[[ResourceId], None] + # TODO: implement below. + LOGOUT = auto() # NOOP diff --git a/hydroshare_jupyter_sync/handlers/__init__.py b/hydroshare_jupyter_sync/handlers/__init__.py new file mode 100644 index 00000000..6358df9b --- /dev/null +++ b/hydroshare_jupyter_sync/handlers/__init__.py @@ -0,0 +1,71 @@ +from pathlib import Path +from notebook.utils import url_path_join +import tornado +from ..websocket_handler import FileSystemEventWebSocketHandler +from ..server import ( + DataDirectoryHandler, + LoginHandler, + UserInfoHandler, + ListUserHydroShareResources, + ListHydroShareResourceFiles, + HydroShareResourceHandler, + LocalResourceEntityHandler, + HydroShareResourceEntityHandler, + WebAppHandler, +) + + +def get_route_handlers(frontend_url, backend_url): + # TODO: These are constants, so change case + # also, this should be moved to the config setup + mod_path = Path(__file__).absolute().parent.parent + assets_path = mod_path / "assets" + data_path = Path("~").expanduser() / "hydroshare" / "local_hs_resources" + + # routes look like they need to be updated to remove .* + return [ + # "frontend" + ( + url_path_join(frontend_url, r"/assets/(.*)"), + tornado.web.StaticFileHandler, + {"path": str(assets_path)}, + ), + # "backend" + ( + url_path_join(backend_url, r"/ws"), + FileSystemEventWebSocketHandler, + ), + ( + url_path_join(backend_url, r"/data_directory"), + DataDirectoryHandler, + ), + ( + url_path_join(backend_url, r"/download/(.*)"), + tornado.web.StaticFileHandler, + {"path": str(data_path)}, + ), + (url_path_join(backend_url, "/login"), LoginHandler), + (url_path_join(backend_url, r"/user"), UserInfoHandler), + (url_path_join(backend_url, r"/resources"), ListUserHydroShareResources), + # (url_path_join(backend_url, r"/resources/([^/]+)"), ResourceHandler), + ( + url_path_join(backend_url, r"/resources/([^/]+)"), + ListHydroShareResourceFiles, + ), + ( + url_path_join(backend_url, r"/resources/([^/]+)/download"), + HydroShareResourceHandler, + ), + ( + url_path_join(backend_url, r"/resources/([^/]+)/upload"), + LocalResourceEntityHandler, + ), + ( + url_path_join(backend_url, r"/resources/([^/]+)/download/(.+)"), + HydroShareResourceEntityHandler, + ), + # Put this last to catch everything else + # order does matter + # Host patterns are processed sequentially in the order they were added. All matching patterns will be considered. + (frontend_url + r".*", WebAppHandler), + ] diff --git a/hydroshare_jupyter_sync/hydroshare_resource_cache.py b/hydroshare_jupyter_sync/hydroshare_resource_cache.py new file mode 100644 index 00000000..0e7e0db4 --- /dev/null +++ b/hydroshare_jupyter_sync/hydroshare_resource_cache.py @@ -0,0 +1,21 @@ +from hsclient import HydroShare, Resource + + +class HydroShareWithResourceCache(HydroShare): + """Extends hsclient.HydroShare to include a dictionary of cached resources""" + + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self._resource_dict = dict() + + def resource(self, resource_id: str, validate: bool = True) -> Resource: + """Add Resource object caching""" + # Returned if already in cached resources + if resource_id in self._resource_dict: + return self._resource_dict[resource_id] + + res = super().resource(resource_id, validate=validate) + + # Put Resource in cache + self._resource_dict[resource_id] = res + return res diff --git a/hydroshare_jupyter_sync/lib/__init__.py b/hydroshare_jupyter_sync/lib/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/hydroshare_jupyter_sync/lib/events/__init__.py b/hydroshare_jupyter_sync/lib/events/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/hydroshare_jupyter_sync/lib/events/event_broker.py b/hydroshare_jupyter_sync/lib/events/event_broker.py new file mode 100644 index 00000000..fed2eac3 --- /dev/null +++ b/hydroshare_jupyter_sync/lib/events/event_broker.py @@ -0,0 +1,48 @@ +from typing import Callable, Dict, List +from enum import Enum + + +class EventBroker: + def __init__(self, event_types: Enum) -> None: + # enum name (not value) as event type name + self.event_listeners: Dict[str, List[Callable]] = { + event_name.name: list() for event_name in event_types + } + + self._event_types = event_types + + def subscribe(self, event_name: str, fn) -> None: + event_name = self._parse_enum(event_name) + + if event_name in self.event_listeners: + self.event_listeners[event_name].append(fn) + + def unsubscribe(self, event_name: str, fn) -> None: + event_name = self._parse_enum(event_name) + + if event_name in self.event_listeners: + listeners = self.event_listeners[event_name] + for idx, f in enumerate(listeners): + if f == fn: + listeners.pop(idx) + + def dispatch(self, event_name: str, *args, **kwargs) -> None: + event_name = self._parse_enum(event_name) + + if event_name in self.event_listeners: + for fn in self.event_listeners[event_name]: + fn(*args, **kwargs) + + def unsubscribe_all(self) -> None: + for listeners in self.event_listeners.values(): + # clear event listeners + listeners.clear() + + @property + def events_types(self): + return list(self.event_listeners.keys()) + + def _parse_enum(self, event_name: str) -> str: + if isinstance(event_name, self._event_types): + return event_name.name + return event_name diff --git a/hydroshare_jupyter_sync/lib/filesystem/__init__.py b/hydroshare_jupyter_sync/lib/filesystem/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/hydroshare_jupyter_sync/lib/filesystem/aggregate_fs_map.py b/hydroshare_jupyter_sync/lib/filesystem/aggregate_fs_map.py new file mode 100644 index 00000000..e06ed785 --- /dev/null +++ b/hydroshare_jupyter_sync/lib/filesystem/aggregate_fs_map.py @@ -0,0 +1,104 @@ +from dataclasses import dataclass +from typing import Tuple, Union +from pathlib import Path +from hsclient import HydroShare + +from .fs_map import IFSMap, IEntityFSMap, LocalFSMap, RemoteFSMap +from .types import ResourceId, T +from .exceptions import AggregateFSMapResourceMembershipError +from .aggregate_fs_resource_map_sync_state import ( + AggregateFSResourceMapSyncState, + AggregateFSResourceMapSyncStateCollection, +) + + +@dataclass +class AggregateFSMap(IFSMap, IEntityFSMap): + local_map: LocalFSMap + remote_map: RemoteFSMap + + # IFSMap implementations + + @classmethod + def create_map( + cls, fs_root: Union[Path, str], hydroshare: HydroShare + ) -> "AggregateFSMap": + # create local and remote map instances + remote_map = RemoteFSMap.create_map(fs_root, hydroshare) + local_map = LocalFSMap(fs_root) + + # `remote_map` only contains resources that user owns and are local in fs_root. + # Add those resources to local map + for res in remote_map.keys(): + local_map.add_resource(res) + + return cls(local_map=local_map, remote_map=remote_map) + + def add_resource(self, resource_id: ResourceId) -> None: + """Add resource to local and remote map""" + self._map_fn(lambda o: o.add_resource(resource_id)) + + def delete_resource(self, resource_id: ResourceId) -> None: + """Remove resource from local and remote FSMap instances""" + self._map_fn(lambda o: o.delete_resource(resource_id)) + + def update_resource(self, resource_id: ResourceId) -> None: + """Update a local and remote resource""" + self._map_fn(lambda o: o.update_resource(resource_id)) + + # IEntityFSMap implementations + + def add_resource_file( + self, resource_id: ResourceId, relative_resource_file: Union[Path, str] + ) -> None: + """Add resource file to local map. Does not interact with the remote FS map.""" + self.local_map.add_resource_file(resource_id, relative_resource_file) + + def delete_resource_file( + self, resource_id: ResourceId, relative_resource_file: Union[Path, str] + ) -> None: + """Remove resource file from local map. Does not interact with the remote FS map.""" + self.local_map.delete_resource_file(resource_id, relative_resource_file) + + def update_resource_file( + self, resource_id: ResourceId, relative_resource_file: Union[Path, str] + ): + """Update resource file in local map. Does not interact with the remote FS map.""" + self.local_map.update_resource_file(resource_id, relative_resource_file) + + # other methods + + def get_sync_state(self) -> AggregateFSResourceMapSyncStateCollection: + """Get sync status of all resources in local and remote maps.""" + return AggregateFSResourceMapSyncStateCollection.from_aggregate_map( + aggregate_fs_map=self + ) + + def get_resource_sync_state( + self, resource_id: ResourceId + ) -> AggregateFSResourceMapSyncState: + """Get sync status for a given resource between the local and remote map.""" + if resource_id in self.local_map and resource_id in self.remote_map: + local_resource = self.local_map[resource_id] + remote_resource = self.remote_map[resource_id] + return AggregateFSResourceMapSyncState.from_resource_maps( + local_resource_map=local_resource, remote_resource_map=remote_resource + ) + + error_message = ( + f"ResourceID: {resource_id}, does not exist in local_map or remote_map.\n" + f"{self.local_map.keys()=}\n" + f"{self.remote_map.keys()=}" + ) + raise AggregateFSMapResourceMembershipError(error_message) + + # helper methods + def _map_fn(self, fn, *args, **kwargs) -> Tuple[T]: + """Map a function call over the LocalFSMap and RemoteFSMap. Results returned in that order.""" + res = [] + for o in [self.local_map, self.remote_map]: + # append result + res.append(fn(o, *args, **kwargs)) + + # cast to tuple for in adherence with standard pythonic practices + return tuple(res) diff --git a/hydroshare_jupyter_sync/lib/filesystem/aggregate_fs_resource_map_sync_state.py b/hydroshare_jupyter_sync/lib/filesystem/aggregate_fs_resource_map_sync_state.py new file mode 100644 index 00000000..a5a9dfc7 --- /dev/null +++ b/hydroshare_jupyter_sync/lib/filesystem/aggregate_fs_resource_map_sync_state.py @@ -0,0 +1,84 @@ +from __future__ import annotations +from pydantic import BaseModel +from pathlib import Path +from typing import Set, List, TYPE_CHECKING +from .fs_resource_map import LocalFSResourceMap, RemoteFSResourceMap + +# Avoid cyclic import +if TYPE_CHECKING: + from .aggregate_fs_map import AggregateFSMap + + +class AggregateFSResourceMapSyncState(BaseModel): + """Data structure that describes the filesystem sync state between a local and + remote FSResourceMap i.e. file exists locally, not remotely and vice versa. Or, + local and remote file md5 checksums do not agree. + + Notes: + only_local => local - (local n remote) + so: local - remote + only_remote => remote - (local n remote) + out_of_sync => ( (set(local.value) u set(remote.value)) - (set(local.value) n set(remote.value)) ) | (local n remote) + symmetric difference + """ + + resource_id: str + only_local: Set[Path] + only_remote: Set[Path] + out_of_sync: Set[Path] + in_sync: Set[Path] + + @classmethod + def from_resource_maps( + cls, + *, + local_resource_map: LocalFSResourceMap, + remote_resource_map: RemoteFSResourceMap + ) -> "AggregateFSResourceMapSyncState": + # verify local and remote instances track the same resource + assert local_resource_map.resource_id == remote_resource_map.resource_id + resource_id = local_resource_map.resource_id + + local = set(local_resource_map.keys()) + remote = set(remote_resource_map.keys()) + + only_local = local - remote + only_remote = remote - local + + intersection = local & remote + out_of_sync = { + item + for item in intersection + if local_resource_map[item] != remote_resource_map[item] + } + in_sync = intersection - out_of_sync + + return cls( + resource_id=resource_id, + in_sync=in_sync, + out_of_sync=out_of_sync, + only_local=only_local, + only_remote=only_remote, + ) + + +class AggregateFSResourceMapSyncStateCollection(BaseModel): + __root__: List[AggregateFSResourceMapSyncState] + + @classmethod + def from_aggregate_map( + cls, *, aggregate_fs_map: AggregateFSMap + ) -> "AggregateFSResourceMapSyncStateCollection": + lm = aggregate_fs_map.local_map + rm = aggregate_fs_map.remote_map + + res_intersection = set(lm) & set(rm) + + return cls.parse_obj( + [ + AggregateFSResourceMapSyncState.from_resource_maps( + local_resource_map=lm[res_id], remote_resource_map=rm[res_id] + ) + for res_id in res_intersection + ] + ) diff --git a/hydroshare_jupyter_sync/lib/filesystem/exceptions.py b/hydroshare_jupyter_sync/lib/filesystem/exceptions.py new file mode 100644 index 00000000..e8b5b37c --- /dev/null +++ b/hydroshare_jupyter_sync/lib/filesystem/exceptions.py @@ -0,0 +1,2 @@ +class AggregateFSMapResourceMembershipError(Exception): + pass diff --git a/hydroshare_jupyter_sync/lib/filesystem/fs_map.py b/hydroshare_jupyter_sync/lib/filesystem/fs_map.py new file mode 100644 index 00000000..405cb681 --- /dev/null +++ b/hydroshare_jupyter_sync/lib/filesystem/fs_map.py @@ -0,0 +1,214 @@ +from abc import ABC, abstractmethod +from collections import UserDict +from pathlib import Path +from typing import List, Union +from hsclient import HydroShare +from concurrent.futures import ThreadPoolExecutor, as_completed + +from .fs_resource_map import ( + RemoteFSResourceMap, + LocalFSResourceMap, +) + +from .types import ResourceId + + +# Abstract Interfaces + + +class IFSMap(ABC): + """Abstract interface describing a FSMap's verbs (methods)""" + + @classmethod + @abstractmethod + def create_map(cls) -> "FSMap": + """Create a new file system map instance from a provided root location. + Note, subclasses overload this method.""" + + @abstractmethod + def add_resource(self, resource_id: ResourceId) -> None: + """Create new FSResourceMap and add to the FSMap instance""" + + @abstractmethod + def delete_resource(self, resource_id: ResourceId) -> None: + """Remove FSResourceMap instance from FSMap instance""" + + @abstractmethod + def update_resource(self, resource_id: ResourceId) -> None: + """Update FSResourceMap instance in FSMap instance""" + + +class IEntityFSMap(ABC): + """Abstract interface describing FSMap verbs (methods) that operate on FS entities.""" + + @abstractmethod + def add_resource_file( + self, resource_id: ResourceId, relative_resource_file: Union[Path, str] + ) -> None: + """Remove a file in a FSResourceMap for a given resource id.""" + + @abstractmethod + def delete_resource_file( + self, resource_id: ResourceId, relative_resource_file: Union[Path, str] + ) -> None: + """Remove a file in a FSResourceMap for a given resource id.""" + + @abstractmethod + def update_resource_file( + self, resource_id: ResourceId, relative_resource_file: Union[Path, str] + ) -> None: + """Update a file in a FSResourceMap for a given resource id.""" + + +# Semi-concrete implementations + + +class FSMap(UserDict, IFSMap, ABC): + """Class representing the relationship between HydroShare resource's, resource files, and + resource MD5 Hashes.""" + + def delete_resource(self, resource_id: ResourceId) -> None: + """Remove FSResourceMap instance from FSMap instance""" + if resource_id in self.data: + del self.data[resource_id] + + def update_resource(self, resource_id: ResourceId) -> None: + """Update the FSResourceMap for a given resource id.""" + if resource_id in self.data: + self.data[resource_id].update_resource() + + @property + def resources(self) -> List[str]: + """Return list of resources.""" + return list(self.data.keys()) + + def _get_resource_ids(self) -> List[ResourceId]: + """Get list of resource ids. Resource ids are parsed from directory names and are assumes to + be direct children of the `fs_root` directory. Valid resource id names are also assumed to + be 32 characters long (length of md5 hash) per the bagit specification + (https://tools.ietf.org/html/draft-kunze-bagit-13). HydroShare uses md5 for creating + resource id's.""" + return ( + [d.name for d in self.fs_root.glob("*") if len(d.name) == 32 and d.is_dir()] + if self.fs_root + else [] + ) + + +# Concrete implementations + + +class LocalFSMap(FSMap, IEntityFSMap): + """Class representing the relationship between *local* HydroShare resources', resource files, + and resource MD5 Hashes.""" + + def __init__(self, fs_root: Union[str, Path]) -> None: + super().__init__() + self.fs_root = Path(fs_root).expanduser().resolve() + + # override + @classmethod + def create_map(cls, fs_root: Union[Path, str]) -> "LocalFSMap": + # create class instance + fs_map = cls(fs_root) + + for resource in fs_map._get_resource_ids(): + # create new instance of LocalFSResourceMap + fs_map[resource] = LocalFSResourceMap(fs_map.fs_root / resource) + + return fs_map + + def add_resource(self, resource_id: ResourceId) -> None: + """Create new LocalFSResourceMap and add to LocalFSMap instance. `resource_id` provided must + be direct child directory of `fs_root`.""" + if resource_id not in self.data: + # create new local resource map + r_map = LocalFSResourceMap.from_resource_path(self.fs_root / resource_id) + + # add local resource map to dictionary + self.data[resource_id] = r_map + + def add_resource_file( + self, resource_id: ResourceId, relative_resource_file: Union[Path, str] + ) -> None: + if resource_id in self.data: + self.data[resource_id].add_file(relative_resource_file) + + def update_resource_file( + self, resource_id: ResourceId, relative_resource_file: Union[Path, str] + ) -> None: + """Update a file in a LocalFSResourceMap instance for a given resource id.""" + if resource_id in self.data: + self.data[resource_id].update_file(relative_resource_file) + + def delete_resource_file( + self, resource_id: ResourceId, relative_resource_file: Union[Path, str] + ) -> None: + """Remove a file in a LocalFSResourceMap instance for a given resource id.""" + if resource_id in self.data: + self.data[resource_id].delete_file(relative_resource_file) + + +class RemoteFSMap(FSMap): + """Class representing the relationship between remote HydroShare resource's, resource files, and + resource MD5 Hashes.""" + + def __init__(self, fs_root: Union[str, Path], hydroshare: HydroShare) -> None: + super().__init__() + self.fs_root = Path(fs_root).expanduser().resolve() + self._hydroshare = hydroshare + + # override + @classmethod + def create_map( + cls, fs_root: Union[Path, str], hydroshare: HydroShare + ) -> "RemoteFSMap": + # create class instance + fs_map = cls(fs_root, hydroshare) + + # NOTE: assumes user if logged in and using standard username, pass auth. Likely should + # guard in future. + # get username + username = hydroshare._hs_session._session.auth[0] + + # NOTE: assumes only resources desired for tracking are owned. may not be desirable. + # get resources the user can edit + remote_resources = { + res.resource_id for res in hydroshare.search(edit_permission=True) + } + # naively get local resources based on fs_root location and directory name length + naive_local_resources = set(fs_map._get_resource_ids()) + + # resources s.t. user is owner and some files from res are local + users_local_resources = naive_local_resources & remote_resources + + res_objs = [ + fs_map._hydroshare.resource(res_id) for res_id in users_local_resources + ] + + # NOTE: should probably move this to a method and/or strategy pattern. + # see https://docs.python.org/3/library/concurrent.futures.html#concurrent.futures.ThreadPoolExecutor + # ThreadPoolExecutor max_workers + with ThreadPoolExecutor(max_workers=None) as executor: + # create RemoteFSResourceMap instances for each resource and populate checksums + futures = { + # key: Future, value: HydroShare resource id + executor.submit(RemoteFSResourceMap.from_resource, res): res.resource_id + for res in res_objs + } + + for fut_ref in as_completed(futures): + res_id = futures[fut_ref] + res_map = fut_ref.result() + + fs_map.data[res_id] = res_map + + return fs_map + + def add_resource(self, resource_id: ResourceId) -> None: + """Create new RemoteFSResourceMap and add to the FSMap instance""" + if resource_id not in self.data: + res = self._hydroshare.resource(resource_id) + res_map = RemoteFSResourceMap.from_resource(res) + + self.data[resource_id] = res_map diff --git a/hydroshare_jupyter_sync/lib/filesystem/fs_resource_map.py b/hydroshare_jupyter_sync/lib/filesystem/fs_resource_map.py new file mode 100644 index 00000000..fae64f9e --- /dev/null +++ b/hydroshare_jupyter_sync/lib/filesystem/fs_resource_map.py @@ -0,0 +1,197 @@ +from abc import ABC, abstractmethod +from collections import UserDict +from typing import Dict, List, Union +from hsclient import Resource +import hashlib +from pathlib import Path + + +def compute_file_md5_hexdigest(file: Union[str, Path]) -> str: + """Compute a file's md5 hexdigest. Read file as chunks to conserve memory usage.""" + with open(file, "rb") as f: + hash = hashlib.md5() + chunk = f.read(8192) + while chunk: + hash.update(chunk) + chunk = f.read(8192) + + return hash.hexdigest() + + +# abstract interfaces + + +class IFSResourceMap(ABC): + """Class representing the relationship between a file (no directory) path to the file's MD5 Hash.""" + + @abstractmethod + def update_resource(self) -> None: + """Update entire resource map. md5hash's updated, non-existent files removed, and new files inserted.""" + + +class IEntityFSResourceMap(ABC): + """Class representing the relationship between a file (no directory) path to the file's MD5 Hash.""" + + @abstractmethod + def add_file(self, relative_resource_file: Union[Path, str]) -> None: + """Add file to FSResource map.""" + + @abstractmethod + def delete_file(self, relative_resource_file: Union[Path, str]) -> None: + """Remove file from FSResource map.""" + + @abstractmethod + def update_file(self, relative_resource_file: Union[Path, str]) -> None: + """Update file in FSResource map.""" + + +# semi-concrete implementations + + +class FSResourceMap(UserDict, IFSResourceMap): + @property + def files(self) -> List[Path]: + """Return list of files in resource.""" + return list(self.data.keys()) + + +# concrete implementations + + +class LocalFSResourceMap(FSResourceMap, IEntityFSResourceMap): + """Concrete class representing the relationship between a local file (not directory) path to the file's MD5 Hash.""" + + def __init__(self, resource_path: Union[Path, str]) -> None: + super().__init__() + self.resource_path = Path(resource_path).expanduser().resolve() + self.resource_id = resource_path.name + + @classmethod + def from_resource_path( + cls, resource_path: Union[Path, str] + ) -> "LocalFSResourceMap": + # create class instance + fsresource_map = cls(resource_path) + + fsresource_map.update_resource() + return fsresource_map + + def add_file(self, relative_resource_file: Union[Path, str]) -> None: + if self._valid_resource_file(relative_resource_file) and not self._is_member( + relative_resource_file + ): + self._insert(relative_resource_file) + + def update_file(self, relative_resource_file: Union[Path, str]) -> None: + if self._is_member(relative_resource_file): + self._insert(relative_resource_file) + + def delete_file(self, relative_resource_file: Union[Path, str]) -> None: + if self._is_member(relative_resource_file): + relative_resource_file = self._as_child_of_base_directory( + relative_resource_file + ) + del self.data[relative_resource_file] + + def update_resource(self) -> None: + # clear data dictionary + self.data = dict() + for resource_file in self.contents_path.glob("**/*"): + # insert relative file path to contents_path and md5 digest + self._insert(resource_file) + + @property + def base_directory(self) -> Path: + """Return assumed path to base directory (i.e. `/some/path/{resource_id}/{resource_id}`). + + The property name, `base_directory`, was chosen based on nameing conventions found in any + HydroShare resource's `readme.txt`""" + return self.resource_path / self.resource_id + + @property + def contents_path(self): + """Return assumed path to resource data (i.e. `/some/path/{resource_id}/{resource_id}/data/contents`)""" + return self.base_directory / "data" / "contents" + + # Helper methods + def _abs_path(self, resource_file: Union[Path, str]) -> Path: + resource_file = Path(resource_file) + return ( + resource_file + if resource_file.is_absolute() + else self.base_directory / resource_file + ).resolve() + + def _is_member(self, resource_file: Union[Path, str]) -> bool: + if self._is_child_of_contents_path(resource_file): + return self._as_child_of_base_directory(resource_file) in self.data + return False + + def _valid_resource_file(self, resource_file: Union[Path, str]) -> bool: + abs_path = self._abs_path(resource_file) + + return ( + abs_path.is_file() + and not abs_path.is_symlink() + and self._is_child_of_contents_path(abs_path) + ) + + def _as_child_of( + self, resource_file: Union[Path, str], parents: Union[Path, str] + ) -> Path: + abs_path = self._abs_path(resource_file) + return abs_path.relative_to(parents) + + def _as_child_of_contents_path(self, resource_file: Union[Path, str]) -> Path: + return self._as_child_of(resource_file, self.contents_path) + + def _as_child_of_base_directory(self, resource_file: Union[Path, str]) -> Path: + return self._as_child_of(resource_file, self.base_directory) + + def _is_child_of_contents_path(self, resource_file: Union[Path, str]) -> bool: + try: + # ensure is child of contents_path + self._as_child_of_contents_path(resource_file) + return True + except ValueError: + return False + + def _insert(self, resource_file: Union[Path, str]) -> None: + abs_path = self._abs_path(resource_file) + + if self._valid_resource_file(abs_path): + # path relative to resource base directory. For comparison, this is how files are listed + # in any resource's `manifest-md5.txt` file. + truncated_path = abs_path.relative_to(self.base_directory) + # compute file md5 hex digest + digest = compute_file_md5_hexdigest(abs_path) + # insert into collection + self.data[truncated_path] = digest + + +class RemoteFSResourceMap(FSResourceMap): + def __init__(self, resource: Resource) -> None: + super().__init__() + self.resource = resource + self.resource_id = resource.resource_id + + @classmethod + def from_resource(cls, resource: Resource) -> "FSResourceMap": + # create class instance + fsresource_map = cls(resource) + + fsresource_map.update_resource() + return fsresource_map + + def update_resource(self) -> None: + # clear instance data dictionary + self.data = dict() + + # force resource to re-fetch manifest-md5.txt from hs + self.resource._parsed_checksums = None + + self.data = { + Path(k): v + for k, v in self.resource._checksums.items() + if k.startswith("data/contents/") + } diff --git a/hydroshare_jupyter_sync/lib/filesystem/types.py b/hydroshare_jupyter_sync/lib/filesystem/types.py new file mode 100644 index 00000000..66406650 --- /dev/null +++ b/hydroshare_jupyter_sync/lib/filesystem/types.py @@ -0,0 +1,7 @@ +from typing import NewType, TypeVar + +MD5Hash = NewType("MD5Hash", str) +ResourceId = NewType("ResourceId", str) + +# Generic type +T = TypeVar("T") diff --git a/hydroshare_jupyter_sync/lib/resource_factories.py b/hydroshare_jupyter_sync/lib/resource_factories.py new file mode 100644 index 00000000..d76911cf --- /dev/null +++ b/hydroshare_jupyter_sync/lib/resource_factories.py @@ -0,0 +1,40 @@ +from enum import Enum, auto +from hsclient import Resource +from .resource_strategies import ( + HydroShareFileDownloadStrategy, + HydroShareFolderDownloadStrategy, +) + + +class InvalidEntityTypeException(Exception): + """Invalid EntityTypeEnum Exception""" + + def __init__(self, o: object) -> None: + message = f"{o} is not a EntityTypeEnum member" + super().__init__(message) + + +class EntityTypeEnum(Enum): + FILE = auto() + FOLDER = auto() + + +class HydroShareEntityDownloadFactory: + _CHOICES = { + EntityTypeEnum.FILE: HydroShareFileDownloadStrategy, + EntityTypeEnum.FOLDER: HydroShareFolderDownloadStrategy, + } + + @staticmethod + def download( + entity_type: EntityTypeEnum, + resource: Resource, + data_path: str, + path: str, + ): + cls = HydroShareEntityDownloadFactory._CHOICES.get(entity_type, None) + if cls is None: + raise InvalidEntityTypeException(entity_type) + + downloader = cls(resource, data_path) + return downloader.download(path) diff --git a/hydroshare_jupyter_sync/lib/resource_strategies.py b/hydroshare_jupyter_sync/lib/resource_strategies.py new file mode 100644 index 00000000..7edd905b --- /dev/null +++ b/hydroshare_jupyter_sync/lib/resource_strategies.py @@ -0,0 +1,57 @@ +from abc import ABC, abstractmethod +import shutil +from pathlib import Path +from tempfile import TemporaryDirectory +from hsclient import Resource +from zipfile import ZipFile + + +class AbstractHydroShareEntityDownloadStrategy(ABC): + def __init__(self, resource: Resource, data_path: str): + self.resource = resource + self.data_path = data_path + + @abstractmethod + def download(self, path: str): + """Interface for HydroShare file system entity download""" + + def create_intermediary_directories(self, path: Path) -> Path: + """Handles the creation of {data_path}/{resource_id}/{resource_id}/data/contents""" + contents_path = ( + Path(self.data_path) / self.resource.resource_id / self.resource.resource_id / "data/contents" / path + ) + contents_path.mkdir(parents=True, exist_ok=True) + return contents_path + + +# Concrete strategies + + +class HydroShareFileDownloadStrategy(AbstractHydroShareEntityDownloadStrategy): + def download(self, path: str): + """Download file from HydroShare and move to {data_path}/{resource_id}/data/contents""" + + with TemporaryDirectory() as temp_dir: + downloaded_file = self.resource.file_download( + path, save_path=temp_dir, zipped=False + ) + path = Path(path) + fn = path.name + parent_dir = path.parent + contents_path = self.create_intermediary_directories(parent_dir) + + shutil.move(downloaded_file, contents_path / fn) + + +class HydroShareFolderDownloadStrategy(AbstractHydroShareEntityDownloadStrategy): + def download(self, path: str): + """Download folder from HydroShare""" + + with TemporaryDirectory() as temp_dir: + downloaded_folder = self.resource.folder_download(path, save_path=temp_dir) + parent_dir = Path(path).parent + contents_path = self.create_intermediary_directories(parent_dir) + + # unzip resource + with ZipFile(downloaded_folder, "r") as zr: + zr.extractall(contents_path / parent_dir) diff --git a/hydroshare_jupyter_sync/models/__init__.py b/hydroshare_jupyter_sync/models/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/hydroshare_jupyter_sync/models/api_models.py b/hydroshare_jupyter_sync/models/api_models.py new file mode 100644 index 00000000..21aa81ec --- /dev/null +++ b/hydroshare_jupyter_sync/models/api_models.py @@ -0,0 +1,74 @@ +from hsmodels.schemas import resource +from pydantic import ( + BaseModel, + Field, + StrictStr, + StrictBool, + conlist, + constr, + ConstrainedList, + validator, +) +from typing import List +from .resource_type_enum import ResourceTypeEnum + + +class Boolean(BaseModel): + value: bool + + @classmethod + def get_value(cls, value: bool) -> bool: + return cls(value=value).value + + +class Credentials(BaseModel): + username: StrictStr = Field(...) + password: StrictStr = Field(...) + + +class Success(BaseModel): + success: StrictBool = Field(...) + + +class ResourceMetadata(BaseModel): + resource_type: str = Field(...) + resource_title: str = Field(...) + resource_id: str = Field(...) + immutable: bool = Field(...) + resource_url: str = Field(...) + date_created: str = Field(...) + date_last_updated: str = Field(...) + creator: str = Field(...) + authors: List[str] = Field(...) + + # NOTE: remove once https://github.com/hydroshare/hsclient/issues/23 has been resolved + @validator("authors", pre=True, always=True) + def handle_null_author(cls, v): + return v or [] + + +class CollectionOfResourceMetadata(BaseModel): + # from https://github.com/samuelcolvin/pydantic/issues/675#issuecomment-513029543 + __root__: List[ResourceMetadata] + + +class ResourceCreationRequest(BaseModel): + title: str + metadata: str + extra_metadata: str + edit_users: str + edit_groups: str + view_users: str + view_groups: str + keywords: List[str] + abstract: str + resource_type: ResourceTypeEnum + + +class ResourceFiles(BaseModel): + # str in list cannot contain .. or ~ + files: List[constr(regex=r"^((?!~|\.{2}).)*$")] = Field(...) + + +class DataDir(BaseModel): + data_directory: str = Field(...) diff --git a/hydroshare_jupyter_sync/models/resource_type_enum.py b/hydroshare_jupyter_sync/models/resource_type_enum.py new file mode 100644 index 00000000..6af87c44 --- /dev/null +++ b/hydroshare_jupyter_sync/models/resource_type_enum.py @@ -0,0 +1,18 @@ +from enum import Enum + + +class ResourceTypeEnum(Enum): + GenericResource = "GenericResource" + RasterResource = "RasterResource" + RefTimeSeriesResource = "RefTimeSeriesResource" + TimeSeriesResource = "TimeSeriesResource" + NetcdfResource = "NetcdfResource" + ModelProgramResource = "ModelProgramResource" + ModelInstanceResource = "ModelInstanceResource" + ToolResource = "ToolResource" + SWATModelInstanceResource = "SWATModelInstanceResource" + GeographicFeatureResource = "GeographicFeatureResource" + ScriptResource = "ScriptResource" + CollectionResource = "CollectionResource" + MODFLOWModelInstanceResource = "MODFLOWModelInstanceResource" + CompositeResource = "CompositeResource" diff --git a/hydroshare_jupyter_sync/resource_manager.py b/hydroshare_jupyter_sync/resource_manager.py index 29ecbc48..f4b5f39b 100644 --- a/hydroshare_jupyter_sync/resource_manager.py +++ b/hydroshare_jupyter_sync/resource_manager.py @@ -5,138 +5,144 @@ Author: 2019-20 CUAHSI Olin SCOPE Team Vicky McDermott, Kyle Combes, Emily Lepert, and Charlie Weiss """ -# !/usr/bin/python # -*- coding: utf-8 -*- import base64 import json import logging import shutil +import zipfile from pathlib import Path +from typing import Union from hs_restclient import HydroShare, HydroShareAuthBasic from hs_restclient.exceptions import HydroShareHTTPException -from hydroshare_jupyter_sync.config_reader_writer import (get_config_values) -from hydroshare_jupyter_sync.credential_reader_writer import ( - get_credential_values, set_credential_values) +from hsclient import HydroShare + +# local imports +from .config_reader_writer import get_config_values +from .credential_reader_writer import ( + get_credential_values, + set_credential_values, +) +from .utilities.pathlib_utils import expand_and_resolve + +# module logger instance +_log = logging.getLogger(__name__) HYDROSHARE_AUTHENTICATION_ERROR = { - 'type': 'HydroShareAuthenticationError', - 'message': 'Invalid HydroShare username or password.', + "type": "HydroShareAuthenticationError", + "message": "Invalid HydroShare username or password.", } PERMISSION_ERROR = { - 'type': 'PermissionDeniedError', - 'message': 'Cannot write to Directory, check you have write permissions' + "type": "PermissionDeniedError", + "message": "Cannot write to Directory, check you have write permissions", } -defaultPath = Path.home() / 'hydroshare' / 'local_hs_resources' -data_log = Path.home() / 'hydroshare' / 'sync.log' +# TODO: should be moved else where +defaultPath = Path.home() / "hydroshare" / "local_hs_resources" +data_log = Path.home() / "hydroshare" / "sync.log" class ResourceManager: - """ Class that defines a handler for working with all of a user's resources. + """Class that defines a handler for working with all of a user's resources. This is where they will be able to delete a resource, create a new one, or just get the list of a user's resources in hydroshare or jupyterhub. """ - def __init__(self): + + def __init__( + self, + *, + data_path: Union[str, Path] = Path("~/hydroshare"), + ) -> None: """Makes an output folder for storing HS files locally, if none exists, and sets up authentication on hydroshare API. """ - config = get_config_values(['dataPath', 'hydroShareHostname']) - self.hs_hostname = 'www.hydroshare.org' - # TODO: Rename to hydroshare_resource_data (https://github.com/hydroshare/hydroshare_jupyter_sync/issues/380) - self.output_folder = Path(defaultPath) - self.data_log_folder = Path(data_log) - if config: - if 'hydroShareHostname' in config: - self.hs_hostname = config['hydroShareHostname'] - if 'dataPath' in config: - self.output_folder = Path(config['dataPath']) - if 'logPath' in config: - self.data_log_folder = Path(config['logPath']) - - if not self.output_folder.is_dir(): - # Let any exceptions that occur bubble up - self.output_folder.mkdir(parents=True) - - if not self.data_log_folder.is_dir(): - # Let any exceptions that occur bubble up - self.data_log_folder.mkdir(parents=True) - - self.hs_api_conn = None # Use 'authenticate' to initialize - self.username = None + self._data_path = expand_and_resolve(data_path) + + # Create directories if they don't already exist + self._data_path.mkdir(parents=True, exist_ok=True) + + self._session = None # Use 'authenticate' to initialize + self._archive_message = None - def authenticate(self, username=None, password=None, save=False): - """ Attempts to authenticate with HydroShare. - - :param username: the user's HydroShare username - :type username: str - :param password: the user's HydroShare password - :type password: str - :param save: whether or not to save the credentials to the config - file if the authentication succeeds - :type save: bool - :return: the user's information (name, email, etc) from HydroShare if - successful, None otherwise + def authenticate( + self, + username: str = None, + password: str = None, + host: str = HydroShare.default_host, + protocol: str = HydroShare.default_protocol, + port: int = HydroShare.default_port, + client_id: str = None, + token: str = None, + ) -> dict: + """Create and authenticate request session with HydroShare. + + Parameters + ---------- + username : str, optional + HydroShare username, by default None + password : str, optional + HydroShare password, by default None + host : str, optional + hostname of hydroshare instance, by default "www.hydroshare.org" + protocol : str, optional + request protocol, by default "https" + port : int, optional + request port, by default 443 + client_id : str, optional + client id associated with OAuth2 token, by default None + token : str, optional + OAuth2 token, by default None + + Returns + ------- + dict + Dictionary of user info """ - if not username or not password: - # Check the config file for a username and password - credentials = get_credential_values(['u', 'p']) - if not credentials or 'u' not in credentials or 'p' not in credentials: - # No passed credentials and no saved credentials -- - # can't authenticate - return None - username = credentials['u'] - password = base64.b64decode(credentials['p']).decode('utf-8') - - # Try to authenticate - auth = HydroShareAuthBasic(username=username, password=password) - self.hs_api_conn = HydroShare(auth=auth, hostname=self.hs_hostname) - try: - user_info = self.hs_api_conn.getUserInfo() - self.username = user_info.get('username') - except HydroShareHTTPException as e: - if e.status_code == 401: # Unauthorized - return None # Authentication failed - raise e # Some other error -- bubble it up - - # Authentication succeeded - # if save: - # Save the username and password - saved_successfully = set_credential_values({ - 'u': - username, - 'p': - str(base64.b64encode(password.encode('utf-8')).decode('utf-8')), - }) - if saved_successfully: - logging.info('Successfully saved HydroShare credentials to ' - 'config file.') - - return user_info # Authenticated successfully - - def is_authenticated(self): - if self.hs_api_conn is None: - self.authenticate() - return self.hs_api_conn is not None - - def save_resource_locally(self, res_id): - """ Downloads a resource to the local filesystem if a copy does - not already exist locally - - :param res_id: the resource ID - :type res_id: str + # TODO: verify that username and password's of None can't be passed to create a non-login session + # instantiate authenticated session + self._session = HydroShare( + username=username, + password=password, + host=host, + protocol=protocol, + port=port, + client_id=client_id, + token=token, + ) + + # Retrieve user info + return self._session.my_user_info() + + def is_authenticated(self) -> bool: + """Return the status of the HydroShare authenticated requests session. + + Returns + ------- + bool """ + return self._session is not None + + def save_resource_locally(self, res_id: str): # Get resource from HS if it doesn't already exist locally - config = get_config_values(['dataPath', 'hydroShareHostname']) - self.output_folder = Path(config['dataPath']) - if not (self.output_folder / res_id).exists(): - logging.info(f"Downloading resource {res_id} from HydroShare...") + res_path = self._data_path / res_id + if not (res_path).exists(): + _log.info(f"Downloading resource {res_id} from HydroShare.") + + res = self._session.resource(res_id) + res.download(save_path=self._data_path) + + res_zip = res_path.with_suffix(".zip") + _log.debug(f"Unzipping resource {res_zip}.") + + with zipfile.ZipFile(res_zip, "r") as zip_ref: + zip_ref.extractall(res_path) - self.hs_api_conn.getResource(res_id, - destination=self.output_folder, - unzip=True) + _log.debug(f"Removing zipped bag resource {res_zip}.") + # delete zip bag archive + res_zip.unlink() def save_file_locally(self, res_id, item_path): """ Downloads a file from HydroShare to the local filesystem if a copy does diff --git a/hydroshare_jupyter_sync/server.py b/hydroshare_jupyter_sync/server.py index f8491130..b979cc06 100644 --- a/hydroshare_jupyter_sync/server.py +++ b/hydroshare_jupyter_sync/server.py @@ -5,15 +5,18 @@ Author: 2019-20 CUAHSI Olin SCOPE Team Vicky McDermott, Kyle Combes, Emily Lepert, and Charlie Weiss """ -# !/usr/bin/python # -*- coding: utf-8 -*- -import itertools import json import logging import os import signal import sys from pathlib import Path +from http import HTTPStatus +import secrets +import re +import shutil +from pydantic import ValidationError import requests import datetime @@ -23,262 +26,708 @@ from hs_restclient import exceptions as HSExceptions from notebook.base.handlers import IPythonHandler from notebook.utils import url_path_join +from typing import Union, List, Optional +from tempfile import TemporaryDirectory +from zipfile import ZipFile -from hydroshare_jupyter_sync.config_reader_writer import (get_config_values, - set_config_values) -from hydroshare_jupyter_sync.credential_reader_writer import credential_path -from hydroshare_jupyter_sync.index_html import get_index_html -from hydroshare_jupyter_sync.resource_hydroshare_data import ( - ResourceHydroShareData, HS_PREFIX) -from hydroshare_jupyter_sync.resource_local_data import (ResourceLocalData, - LOCAL_PREFIX) -from hydroshare_jupyter_sync.resource_manager import ( +from hsclient import HydroShare +from .hydroshare_resource_cache import HydroShareWithResourceCache + +from .config_reader_writer import ( + get_config_values, + set_config_values, +) +from .credential_reader_writer import credential_path +from .index_html import get_index_html +from .resource_hydroshare_data import ( + ResourceHydroShareData, + HS_PREFIX, +) +from .resource_local_data import ResourceLocalData, LOCAL_PREFIX +from .resource_manager import ( ResourceManager, HYDROSHARE_AUTHENTICATION_ERROR, ) -# Global resource handler variable +from .models.api_models import ( + Boolean, + Credentials, + DataDir, + Success, + CollectionOfResourceMetadata, + ResourceFiles, +) +from .session_struct import SessionStruct +from .session import session_sync_struct + +# from .websocket_handler import FileSystemEventWebSocketHandler +from .lib.resource_factories import HydroShareEntityDownloadFactory, EntityTypeEnum + + +# Global singleton session wrapper. Contains: +# - hs_client.HydroShare instance +# - deciphered user cookie +# - last user activity time +# NOTE: This should be a bound composition element or collection in the future. +# The current state does not support multiple connected users. +# activity initialized at -1, ergo no connection made +SESSION = SessionStruct(session=None, cookie=None, id=None, username=None) + resource_manager = ResourceManager() +_log = logging.getLogger(__name__) + WORKSPACE_FILES_ERROR = { - 'type': 'WorkspaceFileError', - 'message': 'HydroShare files not present in Workspace', + "type": "WorkspaceFileError", + "message": "HydroShare files not present in Workspace", } -assets_path = Path(__file__).absolute().parent / 'assets' -data_path = Path.cwd() / 'hydroshare' / 'local_hs_resources' +# TODO: These are constants, so change case +# also, this should be moved to the config setup +assets_path = Path(__file__).absolute().parent / "assets" +data_path = Path.cwd() / "hydroshare" / "local_hs_resources" + isFile = False # If we're running this file directly with Python, we'll be firing up a full # Tornado web server, so use Tornado's RequestHandler as our base class. # Otherwise (i.e. if this is being run by a Jupyter notebook server) we want to # use IPythonHandler as our base class. (See the code at the bottom of this # file for the server launching.) -BaseHandler = (tornado.web.RequestHandler - if __name__ == '__main__' else IPythonHandler) +# NOTE: Given that IPythonHandler is a subclass of tornado.web.RequestHandler, I don't +# think it is an issue to use it as the base class. This may come up in the future and +# need to be changed. +# BaseHandler = tornado.web.RequestHandler if __name__ == "__main__" else IPythonHandler + + +class SessionMixIn: + """MixIn with methods for reading the state of the current session.""" + + session_cookie_key = "user" + + def get_current_user(self): + # @overrides BaseRequestHandler.get_current_user() + return self.validate_session() + + def get_session(self) -> SessionStruct: + return SESSION + + def get_hs_session(self) -> HydroShare: + return SESSION.session + + def get_session_id(self) -> Union[int, None]: + return SESSION.id + + def get_client_cookie(self) -> Union[bytes, None]: + """Get deciphered cookie value from client request""" + return self.get_secure_cookie(self.session_cookie_key) + def get_server_cookie(self) -> Union[bytes, None]: + """Get deciphered cookie value stored on server side""" + return self.get_session().cookie -class BaseRequestHandler(BaseHandler): - """ Sets the headers for all the request handlers that extend - this class """ + def get_client_server_cookie_status(self) -> bool: + """Return if client and server cookies match or if server cookies are outdated.""" + # server side session + session = self.get_session() + client_cookie = self.get_client_cookie() + + # server side cookie != client request cookie or server side cookie is outdated + return session == client_cookie + + def validate_session(self) -> Union[bytes, None]: + if self.get_client_server_cookie_status(): + return self.get_client_cookie() + + return None + + +class BaseRequestHandler(SessionMixIn, IPythonHandler): # TODO: will need to change + """Sets the headers for all the request handlers that extend + this class""" def set_default_headers(self): # TODO: change from * (any server) to our specific url (https://github.com/hydroshare/hydroshare_jupyter_sync/issues/40) - self.set_header("Access-Control-Allow-Origin", "*") - self.set_header("Access-Control-Allow-Headers", "x-requested-with, " - "content-type, x-xsrftoken") + self.set_header("Access-Control-Allow-Origin", "localhost") + self.set_header( + "Access-Control-Allow-Headers", + "x-requested-with, content-type, x-xsrftoken, cookie", + ) + + def get_login_url(self) -> str: + # NOTE: hardcoded to login path, may want to change in the future + return "/syncApi/login" + + def prepare(self): + # NOTE: See: https://www.tornadoweb.org/en/stable/guide/security.html#user-authentication for a potential alternative solution + super().prepare() + if not self.get_client_server_cookie_status(): + self.set_status(HTTPStatus.FOUND) # 302 + # append requested uri as `next` Location parameter + uri = self.request.uri + self.redirect(f"{self.get_login_url()}?next={uri}") def options(self, _=None): # web browsers make an OPTIONS request to check what methods (line 31) # are allowed at/for an endpoint. - self.set_status(204) # No content + self.set_status(HTTPStatus.NO_CONTENT) self.finish() + def get_template_path(self) -> Optional[str]: + """Override template path and set application specific template path. Only applies to sub-classes.""" + return Path(__file__).parent.resolve() / "templates" -class WebAppHandler(BaseRequestHandler): - """ Serves up the HTML for the React web app """ + @property + def data_path(self) -> Path: + """Local HydroShare resources file system location.""" + return Path(self.settings.get("data_path")) + +class HeadersMixIn: def set_default_headers(self): BaseRequestHandler.set_default_headers(self) - self.set_header('Access-Control-Allow-Methods', 'GET, OPTIONS') + # [(header, value tuples), ...] + for header, value in self._custom_headers: # implement in child class + self.set_header(header, value) - def get(self): - running_in_dev_mode = __name__ == '__main__' - self.write(get_index_html(running_in_dev_mode)) +class MutateSessionMixIn: + """MixIn allowing mutation current session.""" -class LoginHandler(BaseRequestHandler): - """ Handles authenticating the user with HydroShare """ + def set_session(self, session: SessionStruct) -> None: + global SESSION + SESSION = session - def set_default_headers(self): - BaseRequestHandler.set_default_headers(self) - self.set_header('Access-Control-Allow-Methods', - 'OPTIONS, POST,DELETE ') - def delete(self): - if os.path.isfile(credential_path): - logging.info( - 'Deleting the credential file which contains user information') - os.remove(credential_path) - resource_manager.hs_api_conn = None - s = requests.Session() +class WebAppHandler(HeadersMixIn, BaseRequestHandler): + """Serves up the HTML for the React web app""" - s.cookies.clear_session_cookies + _custom_headers = [("Access-Control-Allow-Methods", "GET, OPTIONS")] - def post(self): - isFile = False - credentials = json.loads(self.request.body.decode('utf-8')) - do_save = credentials.get('remember', False) - user_info = resource_manager.authenticate(credentials['username'], - credentials['password'], - do_save) - dirdetails = Path(Path.home() / 'hydroshare' / 'dirinfo.json') - if dirdetails.exists(): - isFile = True + def prepare(self): + # NOTE: Bypass base request prepare. This should change in the future + pass - self.write({ - 'success': user_info is not None, - 'userInfo': user_info, - 'isFile': isFile, - }) + def get(self): + debug = "dev" if self.settings.get("debug", False) else "dist" + template_kwargs = { + "frontend_path": "/sync", + "backend_path": "/syncApi", + "bundle_suffix": debug, + "getting_started_notebook_path": "", + "notebook_url_path_prefix": None, + } + # NOTE: This may need to change to accommodate multiple template directories + # when integrating with Jupyter. + self.render("root.html", **template_kwargs) -class Localmd5Handler(BaseRequestHandler): - """ Handles calculation of local md5 values """ - def set_default_headers(self): - BaseRequestHandler.set_default_headers(self) - self.set_header('Access-Control-Allow-Methods', - 'OPTIONS, POST,DELETE,GET ') +class DataDirectoryHandler(HeadersMixIn, BaseRequestHandler): + """Return absolute path to directory configured to store HydroShare data locally.""" - def get(self, res_id): - local_data = ResourceLocalData(res_id) + _custom_headers = [("Access-Control-Allow-Methods", "GET, OPTIONS")] - local_data.get_md5(res_id) + def prepare(self): + # NOTE: Bypass base request prepare. This should change in the future + pass - self.write({ - 'success': 'success', - 'userInfo': '', - }) + def get(self): + self.write(DataDir(data_directory=str(self.data_path)).dict()) + + +class LoginHandler(MutateSessionMixIn, HeadersMixIn, BaseRequestHandler): + """Handles authenticating the user with HydroShare. + + HTTP Request type: + DELETE: + Action: + logout user + POST: + Action: + Login user. A cookie with key "user" is sent in response header. The + cookie maintains the login state. The cookie is generated using an + encrypted token consisting of the HydroShare id of the user + some + 16-bit salt. The decrypted version is stored in memory for reference. + Body: + Content-Type: application/json + Schema: {"username": str, "password": str} + """ + _custom_headers = [("Access-Control-Allow-Methods", "OPTIONS,POST,DELETE")] + # instance flag indicating if a request concluded in a successful login. + # switched in `post`. used in `on_finish` to instantiate local and remote FSMaps + successful_login = False -class Hsmd5Handler(BaseRequestHandler): - """ Handles calculation of local md5 values """ + def prepare(self): + if self.request.headers.get("Content-Type", None) != "application/json": + return self.set_status(HTTPStatus.UNSUPPORTED_MEDIA_TYPE) - def set_default_headers(self): - BaseRequestHandler.set_default_headers(self) - self.set_header('Access-Control-Allow-Methods', - 'OPTIONS, POST,DELETE,GET ') + def delete(self): + # Ensure user is signed before destroying session + if self.get_client_server_cookie_status(): + self._destroy_session() + else: + self.set_status(HTTPStatus.UNAUTHORIZED) # 401 - def get(self, res_id): - diff_overall = True - if not resource_manager.is_authenticated(): - self.write({ - 'success': False, - 'error': HYDROSHARE_AUTHENTICATION_ERROR, - }) - return + def post(self): + # NOTE: A user can login and then try to login to another account, but if they + # do not use the DELETE method first, they will still be signed into the first + # account bc of the `user` cookie + credentials = Credentials.parse_raw(self.request.body.decode("utf-8")) + + self.successful_login = True + # client and server cookies don't match or is out of date + if not self.get_client_server_cookie_status(): + try: + self._create_session(credentials) - self.write({'success': diff_overall}) + except Exception as e: + self.successful_login = False + _log.exception(e) + if "401" in str(e): + self.set_status(HTTPStatus.UNAUTHORIZED) # 401 + else: + self.set_status(HTTPStatus.INTERNAL_SERVER_ERROR) # 500 + + # self.successful_login initialized to False in `prepare` + self.write(Success(success=self.successful_login).dict()) + + def _create_session(self, credentials: Credentials) -> None: + hs = HydroShareWithResourceCache( + username=credentials.username, password=credentials.password + ) + user_info = hs.my_user_info() + user_id = int(user_info["id"]) + username = user_info["username"] + + # salt the user id and create salted cookie + salt = secrets.randbits(16) + salted_token = f"{user_id}{salt}".encode() + + # cookie is tied to session, meaning it has no expire date + self.set_secure_cookie(self.session_cookie_key, salted_token, expires_days=None) + + self.set_session( + SessionStruct( + session=hs, cookie=salted_token, id=user_id, username=username + ) + ) + + def on_finish(self) -> None: + if self.successful_login and session_sync_struct.is_empty: + + # it is possible for a user who does not send cookies with their request to "login" + # multiple times. this is unlikely, but it is possible. additionally, logins from + # different browser tabs may also cause result in multiple successful logins. + # note: here, it is assumed and implicitly enforced that a user cannot be logged into + # multiple accounts at once. in the future, this may be desirable, but at the moment + # this is not possible. the session cookie is retains the login state. even if a user + # logins in successfully, gets a cookie, and then logs in with a different account (but + # sends the first session cookie) the first account will remain logged in. + + # only create new sync session if the current is empty + # NOTE: based on the way `is_empty` is implemented, it is possible for some attrs of the + # _SessionSyncSingleton to be empty/None and some to be present and True is returned. + # this may come up in the future as a place where the session is corrupted. + hs_session = self.get_hs_session() + session_sync_struct.new_sync_session(self.data_path, hs_session) + + def _destroy_session(self): + # handle logout logic + self.clear_cookie(self.session_cookie_key) + self.set_session( + SessionStruct(session=None, cookie=None, id=None, username=None) + ) + # shutdown previous resources and reset session sync to initial state + session_sync_struct.reset_session() + + +# NOTE: deprecated +class Localmd5Handler(HeadersMixIn, BaseRequestHandler): + """Handles calculation of local md5 values""" + + _custom_headers = [("Access-Control-Allow-Methods", "GET")] + def get(self, res_id): + # TODO: Add schema. Is this a string, bytes, int? + local_data = ResourceLocalData(res_id) -class ResourcesRootHandler(BaseRequestHandler): - """ Handles /resources. Gets a user's resources (with metadata) and - creates new resources. """ + local_data.get_md5(res_id) - def set_default_headers(self): - BaseRequestHandler.set_default_headers(self) - self.set_header('Access-Control-Allow-Methods', 'GET, OPTIONS, POST') + # TODO: add output schema. Something like: Might can use subset of output schema from LoginHandler + # { "success" : "string", + # "userInfo" : "string", + # } + self.write( + { + "success": "success", + "userInfo": "", + } + ) - def options(self, _=None): - # web browsers make an OPTIONS request to check what methods (line 31) - # are allowed at/for an endpoint. - # We just need to respond with the header set on line 31. - self.set_status(204) # No content - self.finish() - def get(self): - if not resource_manager.is_authenticated(): - self.write({ - 'success': False, - 'error': HYDROSHARE_AUTHENTICATION_ERROR, - }) - return +# NOTE: deprecated +class Hsmd5Handler(HeadersMixIn, BaseRequestHandler): + """Handles calculation of local md5 values""" - resources, error = resource_manager.get_list_of_user_resources() - archive_message = resource_manager.get_archive_message() + _custom_headers = [("Access-Control-Allow-Methods", "GET")] - self.write({ - 'resources': resources, - 'archive_message': archive_message, - 'success': error is None, - 'error': error - }) + # NOTE: How is this different from Localmd5Handler? - def post(self): - """ - Makes a new resource with the bare minimum amount of information + def get(self, res_id): + # NOTE: This is not implemented. Should it be? + # TODO: Add schema. Is this a string, bytes, int? + diff_overall = True - Expects body: - {"resource title": string - "creators": list of strings} - """ - if not resource_manager.is_authenticated(): - self.write({ - 'success': False, - 'error': HYDROSHARE_AUTHENTICATION_ERROR, - }) - return + # TODO: add output schema. Something like: + self.write({"success": diff_overall}) - body = json.loads(self.request.body.decode('utf-8')) - resource_title = body.get("title") - creators = body.get("creators") # list of names (strings) - abstract = body.get("abstract") - privacy = body.get("privacy", 'Private') # Public or private - - if resource_title is None or creators is None: - self.set_status(400) - self.write({ - 'success': False, - 'error': { - 'type': - 'InvalidRequest', - 'message': ('The request body must specify ' - '"title" and "creators".'), - }, - }) - else: - resource_id, error = resource_manager.create_HS_resource( - resource_title, creators, abstract, privacy) - self.write({ - 'resource_id': resource_id, - 'success': error is None, - 'error': error, - }) +class ListUserHydroShareResources(HeadersMixIn, BaseRequestHandler): + """List the HydroShare resources a user has edit permission of.""" -class ResourceHandler(BaseRequestHandler): - """ Handles resource-specific requests made to /resources/ """ + _custom_headers = [("Access-Control-Allow-Methods", "GET")] - def set_default_headers(self): - BaseRequestHandler.set_default_headers(self) - self.set_header('Access-Control-Allow-Methods', 'DELETE, OPTIONS') + def get(self): + session = self.get_session() + + resources = list(session.session.search(edit_permission=True)) + + # Marshall hsclient representation into CollectionOfResourceMetadata + self.write(CollectionOfResourceMetadata.parse_obj(resources).json()) + + # # TODO: This should be moved to its own endpoint + # def post(self): + # """ + # Makes a new resource with the bare minimum amount of information + + # Expects body: + # {"resource title": string + # "creators": list of strings} + # """ + # # TODO: IMO, this endpoint is not needed for the MVP + # # TODO: Add schema validation + # # { + # # "title": {"type": "string"}, + # # "creators": {"type": "list", "schema": "string"} + # # + # # + # # } + # body = json.loads(self.request.body.decode("utf-8")) + # resource_title = body.get("title") + # creators = body.get("creators") # list of names (strings) + # abstract = body.get("abstract") + # privacy = body.get("privacy", "Private") # Public or private + + # if resource_title is None or creators is None: + # self.set_status(400) + # self.write( + # { + # "success": False, + # "error": { + # "type": "InvalidRequest", + # "message": ( + # "The request body must specify " '"title" and "creators".' + # ), + # }, + # } + # ) + # else: + # resource_id, error = resource_manager.create_HS_resource( + # resource_title, creators, abstract, privacy + # ) + # self.write( + # { + # "resource_id": resource_id, + # "success": error is None, + # "error": error, + # } + # ) + + +class ListHydroShareResourceFiles(HeadersMixIn, BaseRequestHandler): + """List the files in a HydroShare resource.""" + + _custom_headers = [("Access-Control-Allow-Methods", "GET")] + + def get(self, resource_id: str): + # NOTE: May want to sanitize input in future. i.e. require it be a min/certain length + session = self.get_hs_session() + + resource = session.resource(resource_id) + files = [ + file + # The file names and checksums are implicitly cached by the resource + for file in resource._checksums.keys() + if file.startswith("data/contents/") + ] + + # Marshall hsclient representation into CollectionOfResourceMetadata + self.write(ResourceFiles(files=files).json()) + + +class HydroShareResourceHandler(HeadersMixIn, BaseRequestHandler): + """Download HydroShare resource to local file system.""" + + resource_id: str + _custom_headers = [("Access-Control-Allow-Methods", "GET")] + + def get(self, resource_id: str): + # NOTE: May want to sanitize input in future. i.e. require it be a min/certain length + session = self.get_hs_session() + resource = session.resource(resource_id) + + # download resource to temp directory + with TemporaryDirectory() as temp_dir: + downloaded_zip = resource.download(temp_dir) + # unzip resource + with ZipFile(downloaded_zip, "r") as zr: + zr.extractall(self.data_path) + + # set instance variable for `on_finish` + self.resource_id = resource_id + self.set_status(HTTPStatus.CREATED) # 201 + + def on_finish(self) -> None: + if self.get_status() == HTTPStatus.CREATED: + # dispatch resource downloaded event with resource_id + session_sync_struct.event_broker.dispatch( + "RESOURCE_DOWNLOADED", self.resource_id + ) + + +class HydroShareResourceEntityHandler(HeadersMixIn, BaseRequestHandler): + """Download file or folder from HydroShare resource to local file system.""" + + BAGGIT_PREFIX_RE = r"^/?data/contents/?" + BAGGIT_PREFIX_MATCHER = re.compile(BAGGIT_PREFIX_RE) + resource_id: str + + _custom_headers = [("Access-Control-Allow-Methods", "GET")] + + def get(self, resource_id: str, path: str): + """Use query param, `folder` with a boolean value (True, true, 1 | False, false, + 0) to indicate downloading a folder versus a file.""" + # NOTE: May want to sanitize input in future. i.e. require it be a min/certain length + session = self.get_hs_session() + resource = session.resource(resource_id) + + path = self._truncate_baggit_prefix(path) + + is_folder_entity = Boolean.get_value(self.get_query_argument("folder", False)) + + entity_type = EntityTypeEnum.FILE + if is_folder_entity: + entity_type = EntityTypeEnum.FOLDER + + HydroShareEntityDownloadFactory.download( + entity_type, resource, self.data_path, path + ) + + # set instance variable for `on_finish` + self.resource_id = resource_id + self.set_status(HTTPStatus.CREATED) # 201 + + def on_finish(self) -> None: + if self.get_status() == HTTPStatus.CREATED: + # dispatch resource entity downloaded event with resource_id + session_sync_struct.event_broker.dispatch( + "RESOURCE_ENTITY_DOWNLOADED", self.resource_id + ) + + @staticmethod + def _truncate_baggit_prefix(file_path: str): + baggit_prefix_match = ( + HydroShareResourceEntityHandler.BAGGIT_PREFIX_MATCHER.match(file_path) + ) + + if baggit_prefix_match is not None: + # left-truncate baggit prefix path + file_path = file_path[baggit_prefix_match.end() :] + + return file_path + + +class LocalResourceEntityHandler(HeadersMixIn, BaseRequestHandler): + """Upload file or folder from local file system to existing HydroShare resource.""" + + BAGGIT_PREFIX_RE = r"^/?data/contents/?" + BAGGIT_PREFIX_MATCHER = re.compile(BAGGIT_PREFIX_RE) + BAGGIT_PREFIX = "data/contents/" + _ZIP_FILENAME = "__zip.zip" + resource_id: str + + _custom_headers = [("Access-Control-Allow-Methods", "POST")] + + def prepare(self): + super().prepare() + if self.request.headers["Content-Type"] != "application/json": + self.set_status(HTTPStatus.UNSUPPORTED_MEDIA_TYPE) + + def post(self, resource_id: str): + """Upload one or more local entities (files or dirs) to existing HydroShare resource. + File paths are passed in the request body and must reside within a downloaded HS + resource directory inside the configured `data` directory. + (i.e. an entity inside: ~/hydroshare//data/contents/) + + The method adds the prefix `/data/contents` to each file path. If it already + exists, it is ignored. Thus, the following are equivalent: + - { "files": ["some-file"] } + - { "files": ["/data/contents/some-file"] } + + HTTP Request type: + POST: + Action: + Upload local entities to existing hydroshare resource. + Body: + Content-Type: application/json + Schema: {"files": [str] } + Schema Notes: List members should be relative to `data` configuration directory (i.e. "/data/contents/file"). + However, `~` and `..` are not allowed in provided paths and return 403 status. + (i.e. "data/contents/../../" is not allowed) + """ + # TODO: Add the ability to version data + try: + # Marshall request body + files = ResourceFiles.parse_raw(self.request.body.decode("utf-8")).files + + except ValidationError: + # fail fast + return self.set_status(HTTPStatus.FORBIDDEN) + + session = self.get_hs_session() + # create resource object. Will fail if invalid/user does not have access. + resource = session.resource(resource_id) + + files = self._add_baggit_prefix_and_drop_nonexistant_files(resource_id, files) + resource_path_prefix = self.data_path / f"{resource_id}/{resource_id}" + + with TemporaryDirectory() as temp_dir: + zip_file = Path(temp_dir) / self._ZIP_FILENAME + + # create zip archive + with ZipFile(zip_file, "w") as zipped: + for file in files: + zipped.write( + file, + # maintain file system structure relative to where data is stored in baggit (/data/contents/) + # Example: `/data/contents/dir1/some-file.txt` will be archived in the zip at, `/dir1/some-file.txt` + file.relative_to(resource_path_prefix / self.BAGGIT_PREFIX), + ) + + # upload zip to HydroShare + resource.file_upload(zip_file) + self._unpack_zip_on_hydroshare(resource, zip_file.name) + + # set instance variable for `on_finish` + self.resource_id = resource_id + self.set_status(HTTPStatus.CREATED) # 201 + + def on_finish(self) -> None: + if self.get_status() == HTTPStatus.CREATED: + # dispatch resource uploaded event with resource_id + session_sync_struct.event_broker.dispatch( + "RESOURCE_ENTITY_UPLOADED", self.resource_id + ) + + def _unpack_zip_on_hydroshare( + self, resource, filename: str, location: str = "" + ) -> None: + # unpack and delete zip + unzip_path = url_path_join( + resource._hsapi_path, + "functions", + "unzip", + "data", + "contents", + location, + filename, + ) + # post job to HydroShare + resource._hs_session.post( + unzip_path, + status_code=200, + data={"overwrite": "true", "ingest_metadata": "true"}, + ) + + def _add_baggit_prefix_and_drop_nonexistant_files( + self, resource_id: str, files: List[str] + ) -> List[Path]: + # prepend baggit prefix, files/dirs that don't exist are dropped + resource_path_prefix = self.data_path / f"{resource_id}/{resource_id}" + return [ + resource_path_prefix / self._prepend_baggit_prefix(f) + for f in files + if (resource_path_prefix / self._prepend_baggit_prefix(f)).exists() + ] + + @staticmethod + def _truncate_baggit_prefix(file_path: str) -> str: + baggit_prefix_match = LocalResourceEntityHandler.BAGGIT_PREFIX_MATCHER.match( + file_path + ) + + if baggit_prefix_match is not None: + # left-truncate baggit prefix path + file_path = file_path[baggit_prefix_match.end() :] + + return file_path + + @staticmethod + def _prepend_baggit_prefix(file_path: str) -> str: + # remove existing prefix to sanitize the input + left_truncated_path = LocalResourceEntityHandler._truncate_baggit_prefix( + file_path + ) + + return f"{LocalResourceEntityHandler.BAGGIT_PREFIX}{left_truncated_path}" + + +# NOTE: deprecated +class ResourceHandler(HeadersMixIn, BaseRequestHandler): + """Handles resource-specific requests made to /resources/""" + + _custom_headers = [("Access-Control-Allow-Methods", "DELETE, OPTIONS")] + + # TODO: Change name to be more representative + # NOTE: Does this mean delete it locally or on HS? + # NOTE: I am curious if it's possible to leverage jupyter's file browser here to do + # the lifting? + # TODO: Split into two endpoints if we want to support deleting on hydroshare and locally def delete(self, res_id): - if not resource_manager.is_authenticated(): - self.write({ - 'success': False, - 'error': HYDROSHARE_AUTHENTICATION_ERROR, - }) - return - - body = json.loads(self.request.body.decode('utf-8')) + # TODO: Needs a schema + body = json.loads(self.request.body.decode("utf-8")) del_locally_only = body.get("locallyOnly", True) error = resource_manager.delete_resource_locally(res_id) if not error and not del_locally_only: error = resource_manager.delete_resource_from_hs(res_id) - self.write({ - 'success': error is None, - 'error': error, - }) + self.write( + { + "success": error is None, + "error": error, + } + ) -class DirectorySelectorHandler(BaseRequestHandler): - """ Handles downloading of hydroshare data in user selected directory """ +# NOTE: deprecated +class DirectorySelectorHandler(HeadersMixIn, BaseRequestHandler): + """Handles downloading of hydroshare data in user selected directory""" - def set_default_headers(self): - BaseRequestHandler.set_default_headers(self) - self.set_header('Access-Control-Allow-Methods', - 'DELETE, OPTIONS, GET,POST') + _custom_headers = [("Access-Control-Allow-Methods", "POST")] def post(self): returnValue = "" isFile = False - dirpathinfo = json.loads(self.request.body.decode('utf-8')) + dirpathinfo = json.loads(self.request.body.decode("utf-8")) + # NOTE: Ensure that path is descendent of specified root dir in config. Meaning /home/hydroshare, + # here, you could not go outside of the hydroshare directory. directoryPath = dirpathinfo["dirpath"] choice = dirpathinfo["choice"] - self.dirdetails = Path(Path.home() / 'hydroshare' / 'dirinfo.json') + # NOTE: what does this do? + self.dirdetails = Path(Path.home() / "hydroshare" / "dirinfo.json") ## ##if not self.directoryPath.is_dir(): # Let any exceptions that occur bubble up @@ -289,7 +738,8 @@ def post(self): try: if choice == "No": - hydroshare = 'hydroshare' + hydroshare = "hydroshare" + # NOTE: Should not be an instance method returnValue = self.createDirectory(Path.home() / hydroshare) self.dirdetails.mkdir(parents=True) isFile = True @@ -298,7 +748,9 @@ def post(self): dpath = Path(directoryPath) if not dpath.exists(): - returnValue = 'Directory path is not valid, please check if directory exists' + returnValue = ( + "Directory path is not valid, please check if directory exists" + ) elif os.access(dpath, os.W_OK): returnValue = self.createDirectory(dpath) @@ -307,80 +759,96 @@ def post(self): else: returnValue = "Permission Denied" + + # TODO: Make more specific except Exception as e: - returnValue = 'Error while setting the file path ' - config = get_config_values(['dataPath']) - if 'dataPath' in config: - config_data_path = str(config['dataPath']) - config_new_path = config_data_path.replace(str(Path.home()), '') + returnValue = "Error while setting the file path " + + # TODO: don't read again. These should be in the context somehow + config = get_config_values(["dataPath"]) - notebook_url_path_prefix = url_path_join('/tree', config_new_path) + if "dataPath" in config: + config_data_path = str(config["dataPath"]) + config_new_path = config_data_path.replace(str(Path.home()), "") + + notebook_url_path_prefix = url_path_join("/tree", config_new_path) + + # TODO: fail faster and include output schema if returnValue != "": - self.write({ - 'error': returnValue, - }) + self.write( + { + "error": returnValue, + } + ) else: - self.write({ - 'success': "Configuration saved successfully.", - 'isFile': isFile, - 'configDataPath': notebook_url_path_prefix, - }) + self.write( + { + "success": "Configuration saved successfully.", + "isFile": isFile, + "configDataPath": notebook_url_path_prefix, + } + ) def createDirectory(self, defaultPath): - returnValue = '' - localhsResources = 'local_hs_resources' - logpath = Path.home() / 'hydroshare' / 'sync.log' - saved_successfully = set_config_values({ - "dataPath": - str(defaultPath / localhsResources), - "logPath": - str(logpath) - }) + # NOTE: This method should be removed or moved to a non-instance method. There is no + # need in it being an instance method. + returnValue = "" + localhsResources = "local_hs_resources" + + # TODO: remove hard coded path's. This should be handled by the global logger + logpath = Path.home() / "hydroshare" / "sync.log" + saved_successfully = set_config_values( + {"dataPath": str(defaultPath / localhsResources), "logPath": str(logpath)} + ) if saved_successfully: + # TODO: Remove. Not used. resource_manager = ResourceManager() else: - returnValue = 'Cannot set data Path values in config file' + returnValue = "Cannot set data Path values in config file" return returnValue -class ResourceLocalFilesRequestHandler(BaseRequestHandler): - logging.info('Resource local Handler Class is called') - """ Facilitates getting, deleting, and uploading to the files contained in - a resource on the local disk """ +# NOTE: deprecated +class ResourceLocalFilesRequestHandler(HeadersMixIn, BaseRequestHandler): + """Facilitates getting, deleting, and uploading to the files contained in + a resource on the local disk""" - def set_default_headers(self): - BaseRequestHandler.set_default_headers(self) - self.set_header('Access-Control-Allow-Methods', ('DELETE, GET,' - 'OPTIONS, POST, PUT')) + _custom_headers = [("Access-Control-Allow-Methods", "DELETE, GET, POST, PUT")] + + # TODO: use module level logging + logging.info("Resource local Handler Class is called") def get(self, res_id): + # TODO: add input schema # Handling authentication first to ensure local data if not present is downloaded from Hydroshare - if not resource_manager.is_authenticated(): - self.write({ - 'success': False, - 'error': HYDROSHARE_AUTHENTICATION_ERROR, - }) - return - + # NOTE: Seems like a static method could be used to check if a resource exists or not local_data = ResourceLocalData(res_id) if not local_data.is_downloaded(): resource_manager.save_resource_locally(res_id) - self.write({ - 'readMe': local_data.get_readme(), - 'rootDir': local_data.get_files_and_folders(), - }) + + # TODO: add output schema + self.write( + { + "readMe": local_data.get_readme(), + "rootDir": local_data.get_files_and_folders(), + } + ) # TODO: move some of the logic here outside this file and deduplicate # code (https://github.com/hydroshare/hydroshare_jupyter_sync/issues/41) def delete(self, res_id): - body = json.loads(self.request.body.decode('utf-8')) - file_and_folder_paths = body.get('files') + # NOTE: is this a local deletion with no update to hydroshare? + # TODO: input schema + + body = json.loads(self.request.body.decode("utf-8")) + file_and_folder_paths = body.get("files") + if file_and_folder_paths is None: self.set_status(400) # Bad Request self.write('Could not find "files" in request body.') @@ -398,7 +866,7 @@ def delete(self, res_id): results = [] for item_path in file_and_folder_paths: # Remove any leading / - if item_path.startswith('/'): + if item_path.startswith("/"): item_path = item_path[1:] try: for deleted_folder in deleted_folders: @@ -406,107 +874,113 @@ def delete(self, res_id): # slash is appended to ensure that a file in, # say, '/My data 2' is not skipped because '/My data' was # deleted) - if item_path.startswith(deleted_folder + '/'): + if item_path.startswith(deleted_folder + "/"): # We can skip deleting this file because it was already # deleted with its parent folder break else: # Only runs if the break statement above is never hit # (yes, the indentation is right here) # Try to delete this item - deleted_type = local_folder.delete_file_or_folder( - item_path) - if deleted_type == 'folder': + deleted_type = local_folder.delete_file_or_folder(item_path) + if deleted_type == "folder": deleted_folders.append(item_path) success_count += 1 - results.append({'success': True}) + results.append({"success": True}) except Exception as e: logging.error(e) - results.append({ - 'success': False, - 'error': { - 'type': - 'UnknownError', - 'message': (f'An unknown error occurred when ' - f'attempting to delete {item_path}.') + results.append( + { + "success": False, + "error": { + "type": "UnknownError", + "message": ( + f"An unknown error occurred when " + f"attempting to delete {item_path}." + ), + }, } - }) + ) failure_count += 1 - self.write({ - 'results': results, - 'successCount': success_count, - 'failureCount': failure_count, - }) + # NOTE: Needs output schema + self.write( + { + "results": results, + "successCount": success_count, + "failureCount": failure_count, + } + ) def put(self, res_id): - """ Creates a new file in the local copy of the resource - - :param res_id: the resource ID - :type res_id: str - """ - body = json.loads(self.request.body.decode('utf-8')) - item_type = body.get('type') - name = body.get('name') + """Creates a new file in the local copy of the resource + + :param res_id: the resource ID + :type res_id: str + """ + # TODO: input schema + + body = json.loads(self.request.body.decode("utf-8")) + item_type = body.get("type") + name = body.get("name") error_msg = None if item_type is None or name is None: - error_msg = ('Request must include both "type" and "name" ' - 'attributes.') - if not error_msg and not (item_type == 'file' - or item_type == 'folder'): + error_msg = 'Request must include both "type" and "name" ' "attributes." + if not error_msg and not (item_type == "file" or item_type == "folder"): error_msg = '"type" attribute must be either "file" or "folder".' if error_msg: self.set_status(400) # Bad Request - self.write({ - 'success': False, - 'error': { - 'type': 'InvalidRequest', - 'message': error_msg, - }, - }) + self.write( + { + "success": False, + "error": { + "type": "InvalidRequest", + "message": error_msg, + }, + } + ) return local_data = ResourceLocalData(res_id) - if item_type == 'file': + if item_type == "file": local_data.create_file(name) - elif item_type == 'folder': + elif item_type == "folder": local_data.create_local_folder(name) - self.write({ - 'success': True, - }) + # TODO: Output schema + self.write( + { + "success": True, + } + ) def post(self, res_id): - """ Uploads a file from the user's computer to the local filesystem + """Uploads a file from the user's computer to the local filesystem + + :param res_id: the resource ID + :type res_id: str + """ + # TODO: input schema - :param res_id: the resource ID - :type res_id: str - """ local_data = ResourceLocalData(res_id) for field_name, files in self.request.files.items(): for info in files: - with open(str(local_data.data_path / info['filename']), - "wb") as f: - f.write(info['body']) - self.write({ - 'success': True, - }) + with open(str(local_data.data_path / info["filename"]), "wb") as f: + f.write(info["body"]) + # TODO: output schema + self.write( + { + "success": True, + } + ) -class ResourceHydroShareFilesRequestHandler(BaseRequestHandler): - """ Handles getting and deleting the files in a HydroShare resource """ +# NOTE: deprecated +class ResourceHydroShareFilesRequestHandler(HeadersMixIn, BaseRequestHandler): + """Handles getting and deleting the files in a HydroShare resource""" - def set_default_headers(self): - BaseRequestHandler.set_default_headers(self) - self.set_header('Access-Control-Allow-Methods', 'DELETE, GET, OPTIONS') + _custom_headers = [("Access-Control-Allow-Methods", "DELETE, GET")] def get(self, res_id): - if not resource_manager.is_authenticated(): - self.write({ - 'success': False, - 'error': HYDROSHARE_AUTHENTICATION_ERROR, - }) - return - hs_data = ResourceHydroShareData(resource_manager.hs_api_conn, res_id) # Code for updating the latest files on Root Dir object @@ -515,20 +989,13 @@ def get(self, res_id): hydroshare_data = hs_data.get_files() checkHydroShareSyncStatus(hydroshare_data, res_id, False) - self.write({'rootDir': hydroshare_data}) + self.write({"rootDir": hydroshare_data}) # TODO: Move the bulk of this function out of this file and # deduplicate code (https://github.com/hydroshare/hydroshare_jupyter_sync/issues/41) def delete(self, res_id): - if not resource_manager.is_authenticated(): - self.write({ - 'success': False, - 'error': HYDROSHARE_AUTHENTICATION_ERROR, - }) - return - - data = json.loads(self.request.body.decode('utf-8')) - file_and_folder_paths = data.get('files') + data = json.loads(self.request.body.decode("utf-8")) + file_and_folder_paths = data.get("files") if file_and_folder_paths is None: self.set_status(400) # Bad Request self.write('Could not find "files" in request body.') @@ -546,7 +1013,7 @@ def delete(self, res_id): results = [] for item_path in file_and_folder_paths: # Remove any leading / - if item_path.startswith('/'): + if item_path.startswith("/"): item_path = item_path[1:] try: for deleted_folder in deleted_folders: @@ -554,7 +1021,7 @@ def delete(self, res_id): # slash is appended to ensure that a file in, # say, '/My data 2' is not skipped because '/My data' # was deleted) - if item_path.startswith(deleted_folder + '/'): + if item_path.startswith(deleted_folder + "/"): # We can skip deleting this file because it was already # deleted with its parent folder break @@ -562,71 +1029,74 @@ def delete(self, res_id): # (yes, the indentation is right here) # Try to delete this item deleted_type = hs_data.delete_file_or_folder(item_path) - if deleted_type == 'folder': + if deleted_type == "folder": deleted_folders.append(item_path) success_count += 1 - results.append({'success': True}) + results.append({"success": True}) except HSExceptions.HydroShareNotFound: - results.append({ - 'success': False, - 'error': { - 'type': 'NotFoundError', - 'message': f'Could not find {item_path} in ' - 'HydroShare.', - }, - }) + results.append( + { + "success": False, + "error": { + "type": "NotFoundError", + "message": f"Could not find {item_path} in " "HydroShare.", + }, + } + ) except HSExceptions.HydroShareNotAuthorized: - results.append({ - 'success': False, - 'error': { - 'type': - 'NotAuthorizedError', - 'message': (f'Could not delete {item_path}. Do you ' - 'have write access to the resource?'), - }, - }) + results.append( + { + "success": False, + "error": { + "type": "NotAuthorizedError", + "message": ( + f"Could not delete {item_path}. Do you " + "have write access to the resource?" + ), + }, + } + ) except Exception as e: + # TODO: move to module level logging logging.error(e) - results.append({ - 'success': False, - 'error': { - 'type': - 'UnknownError', - 'message': (f'An unknown error occurred when' - ' attempting to delete {item_path}.') + results.append( + { + "success": False, + "error": { + "type": "UnknownError", + "message": ( + f"An unknown error occurred when" + " attempting to delete {item_path}." + ), + }, } - }) + ) failure_count += 1 - self.write({ - 'results': results, - 'successCount': success_count, - 'failureCount': failure_count, - }) + self.write( + { + "results": results, + "successCount": success_count, + "failureCount": failure_count, + } + ) -MOVE = 'move' -COPY = 'copy' +MOVE = "move" +COPY = "copy" -class DownloadHydroShareFilesRequestHandler(BaseRequestHandler): - def set_default_headers(self): - BaseRequestHandler.set_default_headers(self) - self.set_header('Access-Control-Allow-Methods', - 'DELETE, GET, OPTIONS, POST') +# NOTE: deprecated +class DownloadHydroShareFilesRequestHandler(HeadersMixIn, BaseRequestHandler): + _custom_headers = [("Access-Control-Allow-Methods", "POST")] def post(self, res_id): - if not resource_manager.is_authenticated(): - self.write({ - 'success': False, - 'error': HYDROSHARE_AUTHENTICATION_ERROR, - }) - return hs_data = ResourceHydroShareData(resource_manager.hs_api_conn, res_id) - data = json.loads(self.request.body.decode('utf-8')) + data = json.loads(self.request.body.decode("utf-8")) - file_and_folder_paths = data.get('files') - filesChanged = 'sync' + file_and_folder_paths = data.get("files") + # TODO: unused + filesChanged = "sync" if file_and_folder_paths is None: self.set_status(400) # Bad Request @@ -634,75 +1104,77 @@ def post(self, res_id): return for item_path in file_and_folder_paths: # Remove any leading / - if item_path.startswith('/'): + if item_path.startswith("/"): item_path = item_path[1:] local_data = ResourceLocalData(res_id) # resource_manager.save_file_locally(res_id, item_path) hs_data.download_to_local(local_data, Path(item_path), Path(item_path)) - self.write({ - 'readMe': local_data.get_readme(), - 'rootDir': local_data.get_files_and_folders(), - }) + self.write( + { + "readMe": local_data.get_readme(), + "rootDir": local_data.get_files_and_folders(), + } + ) +# NOTE: deprecated def checkFileSyncStatus(temporaryRoot, res_id): - serverIsLatest = 'HydroShare is latest' - localIsLatest = 'Local is Latest' - localSyncServer = 'In Sync' - isfileExists = '' + serverIsLatest = "HydroShare is latest" + localIsLatest = "Local is Latest" + localSyncServer = "In Sync" + isfileExists = "" local_data = ResourceLocalData(res_id) hs_data = ResourceHydroShareData(resource_manager.hs_api_conn, res_id) # find where are the files and its properties in temporaryRoot - contents = temporaryRoot['contents'] + contents = temporaryRoot["contents"] for file in contents: - modified_time_local = file['modifiedTime'] + modified_time_local = file["modifiedTime"] checksum_local = file["checksum"] - checksum_hs, modified_time_hs = hs_data.get_modified_time_hs( - file['name']) + checksum_hs, modified_time_hs = hs_data.get_modified_time_hs(file["name"]) if checksum_hs == None or modified_time_hs == None: syncStatus = " " isfileExists = "File doesn't exist in HydroShare" - file.update({ - "fileChanged": isfileExists, - "syncStatus": syncStatus - }) + file.update({"fileChanged": isfileExists, "syncStatus": syncStatus}) else: if checksum_local != checksum_hs: syncStatus = "Out of Sync" if modified_time_hs < modified_time_local: # add fileChanged value - file.update({ - "fileChanged": localIsLatest, - "syncStatus": syncStatus - }) + file.update( + {"fileChanged": localIsLatest, "syncStatus": syncStatus} + ) elif modified_time_hs > modified_time_local: - file.update({ - "fileChanged": serverIsLatest, - "syncStatus": syncStatus - }) + file.update( + {"fileChanged": serverIsLatest, "syncStatus": syncStatus} + ) elif checksum_local == checksum_hs: syncStatus = "In Sync" - file.update({ - "fileChanged": "Local and HydroShare are synced", - "syncStatus": syncStatus - }) + file.update( + { + "fileChanged": "Local and HydroShare are synced", + "syncStatus": syncStatus, + } + ) - temporaryRoot = sorted(contents, key=lambda x: x['syncStatus'] == ' ') + temporaryRoot = sorted(contents, key=lambda x: x["syncStatus"] == " ") +# NOTE: deprecated def checkHydroShareSyncStatus(local_or_hs_file_data, res_id, is_local_data): - serverIsLatest = 'HydroShare is latest' - localIsLatest = 'Local is Latest' - localSyncServer = 'In Sync' - isFileExist = '' + serverIsLatest = "HydroShare is latest" + localIsLatest = "Local is Latest" + # TODO: unused + localSyncServer = "In Sync" + isFileExist = "" + # TODO: move docstring """ if its localdata then get hydroshare data for the res_id else if hydrosharedata then get local data for the res_id @@ -712,58 +1184,78 @@ def checkHydroShareSyncStatus(local_or_hs_file_data, res_id, is_local_data): else: data_to_compare = ResourceLocalData(res_id) - data_to_check_sync_status = local_or_hs_file_data['contents'] + data_to_check_sync_status = local_or_hs_file_data["contents"] for data in data_to_check_sync_status: - addParameters(data, data_to_compare, localIsLatest, serverIsLatest, res_id, is_local_data) + addParameters( + data, data_to_compare, localIsLatest, serverIsLatest, res_id, is_local_data + ) -def addParameters(data, data_to_compare, localIsLatest, serverIsLatest, res_id, is_local_data): +# NOTE: deprecated +def addParameters( + data, data_to_compare, localIsLatest, serverIsLatest, res_id, is_local_data +): # First iterate through folders, and then recrusively call the same method for each file. - if data['type'] == 'folder': + if data["type"] == "folder": for k, v in data.items(): - if k == 'contents': + if k == "contents": for j in v: - addParameters(j, data_to_compare, localIsLatest, serverIsLatest, res_id, is_local_data) + addParameters( + j, + data_to_compare, + localIsLatest, + serverIsLatest, + res_id, + is_local_data, + ) else: """ - TODO: Soumya - If checksum present for - local file - then local file exist - hydroshare file - then file exist in Hydroshare server + TODO: Soumya + If checksum present for + local file - then local file exist + hydroshare file - then file exist in Hydroshare server - If checksum matches - Then both files are in sync - Else - If they are not in sync, then check their last update time and identify which is latest. + If checksum matches + Then both files are in sync + Else + If they are not in sync, then check their last update time and identify which is latest. - Sync status is dependent upon checksum. So, if checksum is present for both, then the file exist in both HS and local. - if local file doesnt have checksum the file is no + Sync status is dependent upon checksum. So, if checksum is present for both, then the file exist in both HS and local. + if local file doesnt have checksum the file is no """ # Identify if its Hydroshare file or local file - if data['path'].startswith('hs'): - file_name = data['path'][4:] + if data["path"].startswith("hs"): + file_name = data["path"][4:] else: - file_name = data['path'][7:] + file_name = data["path"][7:] # Get checksum for both Hydroshare and local files if is_local_data: - item_path = str(ResourceLocalData(res_id).data_path) + '/' + file_name + item_path = str(ResourceLocalData(res_id).data_path) + "/" + file_name checksum_local = ResourceLocalData(res_id).get_md5_files(item_path) - checksum_hs = data_to_compare.checksum_hs(file_name.partition('.')[0], file_name.partition('.')[2]) - modified_time_local = str(datetime.datetime.fromtimestamp(Path(item_path).stat().st_mtime)) - modified_time_hs = data_to_compare.modified_time_hs(file_name.partition('.')[0], file_name.partition('.')[2]) + checksum_hs = data_to_compare.checksum_hs( + file_name.partition(".")[0], file_name.partition(".")[2] + ) + modified_time_local = str( + datetime.datetime.fromtimestamp(Path(item_path).stat().st_mtime) + ) + modified_time_hs = data_to_compare.modified_time_hs( + file_name.partition(".")[0], file_name.partition(".")[2] + ) else: - item_path = str(data_to_compare.data_path) + '/' + file_name + item_path = str(data_to_compare.data_path) + "/" + file_name checksum_local = data_to_compare.get_md5_files(item_path) modified_time_local = None if Path(item_path).exists(): - modified_time_local = str(datetime.datetime.fromtimestamp(Path(item_path).stat().st_mtime)) - checksum_hs = data['checksum'] - modified_time_hs = data['modifiedTime'] + modified_time_local = str( + datetime.datetime.fromtimestamp(Path(item_path).stat().st_mtime) + ) + checksum_hs = data["checksum"] + modified_time_hs = data["modifiedTime"] syncStatus = " " @@ -776,39 +1268,38 @@ def addParameters(data, data_to_compare, localIsLatest, serverIsLatest, res_id, else: if checksum_local != checksum_hs: - syncStatus = 'Out of Sync' + syncStatus = "Out of Sync" if modified_time_hs < modified_time_local: # add fileChanged value - data.update({"fileChanged": localIsLatest, "syncStatus": syncStatus}) + data.update( + {"fileChanged": localIsLatest, "syncStatus": syncStatus} + ) elif modified_time_hs > modified_time_local: - data.update({"fileChanged": serverIsLatest, "syncStatus": syncStatus}) + data.update( + {"fileChanged": serverIsLatest, "syncStatus": syncStatus} + ) else: - syncStatus = 'In Sync' - data.update({"fileChanged": "Local and HydroShare are synced", "syncStatus": syncStatus}) - + syncStatus = "In Sync" + data.update( + { + "fileChanged": "Local and HydroShare are synced", + "syncStatus": syncStatus, + } + ) -class CheckSyncStatusFilesRequestHandler(BaseRequestHandler): - filesChanged = 'sync' - modified_time_local = '' - modified_time_hs = '' - def set_default_headers(self): - BaseRequestHandler.set_default_headers(self) - self.set_header('Access-Control-Allow-Methods', - 'DELETE, GET, OPTIONS, POST') +# NOTE: deprecated +class CheckSyncStatusFilesRequestHandler(HeadersMixIn, BaseRequestHandler): + _custom_headers = [("Access-Control-Allow-Methods", "POST")] + filesChanged = "sync" + modified_time_local = "" + modified_time_hs = "" def post(self, res_id): - if not resource_manager.is_authenticated(): - self.write({ - 'success': False, - 'error': HYDROSHARE_AUTHENTICATION_ERROR, - }) - return + data = json.loads(self.request.body.decode("utf-8")) - data = json.loads(self.request.body.decode('utf-8')) - - file_and_folder_paths = data.get('files') + file_and_folder_paths = data.get("files") myList = [] @@ -819,128 +1310,131 @@ def post(self, res_id): for item_path in file_and_folder_paths: # file_path = item_path # Remove any leading / - if item_path.startswith('/'): + if item_path.startswith("/"): file_name = item_path[1:] local_data = ResourceLocalData(res_id) - CheckSyncStatusFilesRequestHandler.modified_time_local = local_data.get_md5_files( - file_name) + CheckSyncStatusFilesRequestHandler.modified_time_local = ( + local_data.get_md5_files(file_name) + ) # appending local modified time to list - hs_data = ResourceHydroShareData(resource_manager.hs_api_conn, - res_id) - CheckSyncStatusFilesRequestHandler.modified_time_hs = hs_data.get_md5_files( - res_id, file_name) + hs_data = ResourceHydroShareData(resource_manager.hs_api_conn, res_id) + CheckSyncStatusFilesRequestHandler.modified_time_hs = ( + hs_data.get_md5_files(res_id, file_name) + ) - if CheckSyncStatusFilesRequestHandler.modified_time_hs < CheckSyncStatusFilesRequestHandler.modified_time_local: + if ( + CheckSyncStatusFilesRequestHandler.modified_time_hs + < CheckSyncStatusFilesRequestHandler.modified_time_local + ): - CheckSyncStatusFilesRequestHandler.filesChanged = 'local' + CheckSyncStatusFilesRequestHandler.filesChanged = "local" myDict = { - 'resourceId': res_id, - 'filesChanged': - CheckSyncStatusFilesRequestHandler.filesChanged, - 'modified_time_local': - CheckSyncStatusFilesRequestHandler.modified_time_local, - 'file_name': file_name, - 'file_path': item_path + "resourceId": res_id, + "filesChanged": CheckSyncStatusFilesRequestHandler.filesChanged, + "modified_time_local": CheckSyncStatusFilesRequestHandler.modified_time_local, + "file_name": file_name, + "file_path": item_path, } myList.append(myDict) myJson = json.dumps(myList) - elif CheckSyncStatusFilesRequestHandler.modified_time_hs > CheckSyncStatusFilesRequestHandler.modified_time_local: + elif ( + CheckSyncStatusFilesRequestHandler.modified_time_hs + > CheckSyncStatusFilesRequestHandler.modified_time_local + ): myDict = { - 'resourceId': res_id, - 'filesChanged': - CheckSyncStatusFilesRequestHandler.filesChanged, - 'modified_time_local': - CheckSyncStatusFilesRequestHandler.modified_time_hs, - 'file_name': file_name, - 'file_path': item_path + "resourceId": res_id, + "filesChanged": CheckSyncStatusFilesRequestHandler.filesChanged, + "modified_time_local": CheckSyncStatusFilesRequestHandler.modified_time_hs, + "file_name": file_name, + "file_path": item_path, } myList.append(myDict) myJson = json.dumps(myList) temporaryRoot = local_data.get_files_and_folders() - self.write({ - 'readMe': local_data.get_readme(), - 'rootDir': temporaryRoot, - 'myJson': myJson - }) + self.write( + { + "readMe": local_data.get_readme(), + "rootDir": temporaryRoot, + "myJson": myJson, + } + ) -class DownloadedLocalFilesRequestHandler(BaseRequestHandler): - def set_default_headers(self): - BaseRequestHandler.set_default_headers(self) - self.set_header('Access-Control-Allow-Methods', - 'DELETE, GET, OPTIONS, POST') +# NOTE: deprecated +class DownloadedLocalFilesRequestHandler(HeadersMixIn, BaseRequestHandler): + _custom_headers = [("Access-Control-Allow-Methods", "GET")] def get(self, res_id): - if not resource_manager.is_authenticated(): - self.write({ - 'success': False, - 'error': HYDROSHARE_AUTHENTICATION_ERROR, - }) - return local_data = ResourceLocalData(res_id) if not local_data.is_downloaded(): - self.write({ - 'success': False, - 'error': WORKSPACE_FILES_ERROR, - # 'error' : 'HydroShare files not present in Workspace', - }) + self.write( + { + "success": False, + "error": WORKSPACE_FILES_ERROR, + # 'error' : 'HydroShare files not present in Workspace', + } + ) return else: local_file_data = local_data.get_files_and_folders() # checkFileSyncStatus(temporaryRoot, res_id) checkHydroShareSyncStatus(local_file_data, res_id, True) - self.write({ - 'readMe': local_data.get_readme(), - 'rootDir': local_file_data, - }) + self.write( + { + "readMe": local_data.get_readme(), + "rootDir": local_file_data, + } + ) -class MoveCopyFiles(BaseRequestHandler): - """ Handles moving (or renaming) files within the local filesystem, - on HydroShare, and between the two. """ +# NOTE: deprecated +class MoveCopyFiles(HeadersMixIn, BaseRequestHandler): + """Handles moving (or renaming) files within the local filesystem, + on HydroShare, and between the two.""" - def set_default_headers(self): - BaseRequestHandler.set_default_headers(self) - self.set_header('Access-Control-Allow-Methods', 'PATCH, OPTIONS') + _custom_headers = [("Access-Control-Allow-Methods", "PATCH")] def patch(self, res_id): - body = json.loads(self.request.body.decode('utf-8')) + body = json.loads(self.request.body.decode("utf-8")) local_data = ResourceLocalData(res_id) hs_data = ResourceHydroShareData(resource_manager.hs_api_conn, res_id) - file_operations = body['operations'] + file_operations = body["operations"] results = [] success_count = 0 failure_count = 0 for operation in file_operations: - method = operation['method'] # 'copy' or 'move' - src_uri = operation['source'] - dest_uri = operation['destination'] + method = operation["method"] # 'copy' or 'move' + src_uri = operation["source"] + dest_uri = operation["destination"] # Split paths into filesystem prefix ('hs' or 'local') and path # relative to the resource root on # that filesystem - src_fs, src_path = src_uri.split(':') - dest_fs, dest_path = dest_uri.split(':') + src_fs, src_path = src_uri.split(":") + dest_fs, dest_path = dest_uri.split(":") # If this operation involves HydroShare, make sure we're # authenticated - if ((src_path == HS_PREFIX or dest_fs == HS_PREFIX) - and not resource_manager.is_authenticated()): - results.append({ - 'success': False, - 'error': HYDROSHARE_AUTHENTICATION_ERROR, - }) + if ( + src_path == HS_PREFIX or dest_fs == HS_PREFIX + ) and not resource_manager.is_authenticated(): + results.append( + { + "success": False, + "error": HYDROSHARE_AUTHENTICATION_ERROR, + } + ) failure_count += 1 continue @@ -954,51 +1448,56 @@ def patch(self, res_id): if src_fs == HS_PREFIX and dest_fs == HS_PREFIX: if method == MOVE: # Move or rename try: - hs_data.rename_or_move_file(Path(src_path), - Path(dest_path)) - results.append({'success': True}) + hs_data.rename_or_move_file(Path(src_path), Path(dest_path)) + results.append({"success": True}) success_count += 1 except FileExistsError: - results.append({ - 'success': False, - 'error': { - 'type': - 'FileOrFolderExists', - 'message': (f'The file {dest_path} already ' - 'exists in HydroShare.'), - }, - }) + results.append( + { + "success": False, + "error": { + "type": "FileOrFolderExists", + "message": ( + f"The file {dest_path} already " + "exists in HydroShare." + ), + }, + } + ) failure_count += 1 else: # TODO: Copy (https://github.com/hydroshare/hydroshare_jupyter_sync/issues/42) # The frontend never requests this, but if one were to # add such functionality, you'd handle it here - raise NotImplementedError('Copy within HydroShare ' - 'not implemented') + raise NotImplementedError( + "Copy within HydroShare " "not implemented" + ) # Move/copy within the local filesystem elif src_fs == LOCAL_PREFIX and dest_fs == LOCAL_PREFIX: if method == MOVE: # Move or rename - ResourceLocalData(res_id).rename_or_move_item( - src_path, dest_path) - results.append({'success': True}) + ResourceLocalData(res_id).rename_or_move_item(src_path, dest_path) + results.append({"success": True}) success_count += 1 else: # TODO: Copy (https://github.com/hydroshare/hydroshare_jupyter_sync/issues/42) # The frontend never requests this, but if one were to # add such functionality, you'd handle it here - raise NotImplementedError('Copy within the local ' - 'filesystem not implemented yet') + raise NotImplementedError( + "Copy within the local " "filesystem not implemented yet" + ) # Move/copy from the local filesystem to HydroShare elif src_fs == LOCAL_PREFIX and dest_fs == HS_PREFIX: # Transfer the file regardless of if we're moving or copying - error = hs_data.upload_from_local(local_data, Path(src_path), - Path(dest_path)) + error = hs_data.upload_from_local( + local_data, Path(src_path), Path(dest_path) + ) if not error and method == MOVE: # Delete the local copy of the file - error = (ResourceLocalData(res_id).delete_file_or_folder( - src_path)) - results.append({ - 'success': error is None, - 'error': error, - }) + error = ResourceLocalData(res_id).delete_file_or_folder(src_path) + results.append( + { + "success": error is None, + "error": error, + } + ) if error: failure_count += 1 else: @@ -1006,130 +1505,118 @@ def patch(self, res_id): # Move/copy from HydroShare to the local filesystem elif src_fs == HS_PREFIX and dest_fs == LOCAL_PREFIX: # Transfer the file regardless of if we're moving or copying - hs_data.download_to_local(local_data, Path(src_path), - Path(dest_path)) + hs_data.download_to_local(local_data, Path(src_path), Path(dest_path)) if method == MOVE: # Delete the HS copy of the file hs_data.delete_file_or_folder(src_path) - results.append({'success': True}) + results.append({"success": True}) success_count += 1 else: - msg = f'"source" prefix "{src_fs}" and/or destination ' \ - f'prefix "{dest_fs} not recognized. Valid options' \ - f' are "hs" and "local"' + msg = ( + f'"source" prefix "{src_fs}" and/or destination ' + f'prefix "{dest_fs} not recognized. Valid options' + f' are "hs" and "local"' + ) logging.warning(msg) - results.append({ - 'success': False, - 'error': 'UnrecognizedPathPrefix', - 'message': msg, - }) + results.append( + { + "success": False, + "error": "UnrecognizedPathPrefix", + "message": msg, + } + ) failure_count += 1 - self.write({ - 'results': results, - 'successCount': success_count, - 'failureCount': failure_count, - }) + self.write( + { + "results": results, + "successCount": success_count, + "failureCount": failure_count, + } + ) -class UserInfoHandler(BaseRequestHandler): - """ Handles getting the user's information (name, email, etc) - from HydroShare and storing the user's - HydroShare credentials. - """ +class UserInfoHandler(HeadersMixIn, BaseRequestHandler): + """Get user information from HydroShare""" - def set_default_headers(self): - BaseRequestHandler.set_default_headers(self) - self.set_header('Access-Control-Allow-Methods', 'GET, OPTIONS') + _custom_headers = [("Access-Control-Allow-Methods", "GET, OPTIONS")] def get(self): - """ Gets the user's information (name, email, etc) from HydroShare """ - isFile = False - if not resource_manager.is_authenticated(): - data, error = None, HYDROSHARE_AUTHENTICATION_ERROR - else: - data, error = resource_manager.get_user_info() - dirdetails = Path(Path.home() / 'hydroshare' / 'dirinfo.json') - if dirdetails.exists(): - isFile = True - self.write({ - 'data': data, - 'success': error is None, - 'isFile': isFile, - 'error': error - }) - - -class TestApp(tornado.web.Application): - """ Class for setting up the server & making sure it can exit cleanly """ - - is_closing = False - - def signal_handler(self, signum, frame): - logging.info('exiting...') - self.is_closing = True - - def try_exit(self): - if self.is_closing: - tornado.ioloop.IOLoop.instance().stop() - logging.info('exit success') + """Gets the user's information (name, email, etc) from HydroShare""" + session = self.get_session() + user = session.session.user(session.id).dict() + self.write(user) +# NOTE: deprecated def get_route_handlers(frontend_url, backend_url): + # routes look like they need to be updated to remove .* return [ - (url_path_join(frontend_url, - r"/assets/(.*)"), tornado.web.StaticFileHandler, { - 'path': str(assets_path) - }), - (url_path_join(backend_url, - r"/download/(.*)"), tornado.web.StaticFileHandler, { - 'path': str(data_path) - }), + # "frontend" + ( + url_path_join(frontend_url, r"/assets/(.*)"), + tornado.web.StaticFileHandler, + {"path": str(assets_path)}, + ), + # "backend" + # ( + # url_path_join(backend_url, r"/ws"), + # FileSystemEventWebSocketHandler, + # ), + ( + url_path_join(backend_url, r"/download/(.*)"), + tornado.web.StaticFileHandler, + {"path": str(data_path)}, + ), (url_path_join(backend_url, "/login"), LoginHandler), (url_path_join(backend_url, r"/user"), UserInfoHandler), - (url_path_join(backend_url, r"/resources"), ResourcesRootHandler), - (url_path_join(backend_url, r"/resources/([^/]+)"), ResourceHandler), - (url_path_join(backend_url, r"/resources/([^/]+)/hs-files"), - ResourceHydroShareFilesRequestHandler), - (url_path_join(backend_url, r"/resources/([^/]+)/download-hs-files"), - DownloadHydroShareFilesRequestHandler), - (url_path_join(backend_url, r"/resources/([^/]+)/check-sync-files"), - CheckSyncStatusFilesRequestHandler), - (url_path_join(backend_url, r"/resources/([^/]+)/local-files"), - ResourceLocalFilesRequestHandler), - (url_path_join(backend_url, - r"/resources/([^/]+)/downloaded-local-files"), - DownloadedLocalFilesRequestHandler), + (url_path_join(backend_url, r"/resources"), ListUserHydroShareResources), + # (url_path_join(backend_url, r"/resources/([^/]+)"), ResourceHandler), + ( + url_path_join(backend_url, r"/resources/([^/]+)"), + ListHydroShareResourceFiles, + ), + ( + url_path_join(backend_url, r"/resources/([^/]+)/download"), + HydroShareResourceHandler, + ), + ( + url_path_join(backend_url, r"/resources/([^/]+)/upload"), + LocalResourceEntityHandler, + ), + ( + url_path_join(backend_url, r"/resources/([^/]+)/download/(.+)"), + HydroShareResourceEntityHandler, + ), + ( + url_path_join(backend_url, r"/resources/([^/]+)/hs-files"), + ResourceHydroShareFilesRequestHandler, + ), + ( + url_path_join(backend_url, r"/resources/([^/]+)/download-hs-files"), + DownloadHydroShareFilesRequestHandler, + ), + ( + url_path_join(backend_url, r"/resources/([^/]+)/check-sync-files"), + CheckSyncStatusFilesRequestHandler, + ), + ( + url_path_join(backend_url, r"/resources/([^/]+)/local-files"), + ResourceLocalFilesRequestHandler, + ), + ( + url_path_join(backend_url, r"/resources/([^/]+)/downloaded-local-files"), + DownloadedLocalFilesRequestHandler, + ), (url_path_join(backend_url, "/selectdir"), DirectorySelectorHandler), - (url_path_join(backend_url, - r"/resources/([^/]+)/localmd5"), Localmd5Handler), - (url_path_join(backend_url, - r"/resources/([^/]+)/hsmd5"), Hsmd5Handler), - (url_path_join(backend_url, - r"/resources/([^/]+)/move-copy-files"), MoveCopyFiles), + (url_path_join(backend_url, r"/resources/([^/]+)/localmd5"), Localmd5Handler), + (url_path_join(backend_url, r"/resources/([^/]+)/hsmd5"), Hsmd5Handler), + ( + url_path_join(backend_url, r"/resources/([^/]+)/move-copy-files"), + MoveCopyFiles, + ), # Put this last to catch everything else + # order does matter + # Host patterns are processed sequentially in the order they were added. All matching patterns will be considered. (frontend_url + r".*", WebAppHandler), ] - - -if __name__ == '__main__': - LEVELS = { - 'debug': logging.DEBUG, - 'info': logging.INFO, - 'warning': logging.WARNING, - 'error': logging.ERROR, - 'critical': logging.CRITICAL - } - - if len(sys.argv) > 1: - level_name = sys.argv[1] - level = LEVELS.get(level_name, logging.NOTSET) - logging.basicConfig(level=level) - - app = TestApp(get_route_handlers('/', '/syncApi')) - print("Starting server at localhost:8080") - tornado.options.parse_command_line() - signal.signal(signal.SIGINT, app.signal_handler) - app.listen(8080) - tornado.ioloop.PeriodicCallback(app.try_exit, 100).start() - tornado.ioloop.IOLoop.instance().start() diff --git a/hydroshare_jupyter_sync/session.py b/hydroshare_jupyter_sync/session.py new file mode 100644 index 00000000..94e92816 --- /dev/null +++ b/hydroshare_jupyter_sync/session.py @@ -0,0 +1,53 @@ +from typing import Callable, Union +from pathlib import Path +from hsclient import HydroShare +from functools import partial + +from .session_struct import SessionSyncStruct + + +class _SessionSyncSingleton: + def __init__(self) -> None: + + # Create empty session sync struct + self.reset_session() + + def new_sync_session( + self, fs_root: Union[Path, str], hydroshare: HydroShare + ) -> None: + new_session = partial(SessionSyncStruct.create_sync_struct, fs_root, hydroshare) + self._handle_session(new_session) + + def reset_session(self) -> None: + # create empty session sync struct + self._handle_session(SessionSyncStruct) + + @property + def is_empty(self) -> bool: + # ignore shutdown fn when verifying if any attrs are not empty + return not any(v for k, v in self.__dict__.items() if k != "shutdown") + + # helpers + def _handle_session(self, session: Callable[[], SessionSyncStruct]): + # shutdown any open previous resources + try: + self.shutdown() + except AttributeError: + # on __init__, shutdown attribute will not be set + ... + + # create session instance + session = session() + # re-bind shutdown method to singleton instance + self.shutdown = session.shutdown + + self._update_attrs(session) + + def _update_attrs(self, o: object) -> None: + attrs = o.__dict__ + for attr, value in attrs.items(): + setattr(self, attr, value) + + +# create empty session +session_sync_struct = _SessionSyncSingleton() diff --git a/hydroshare_jupyter_sync/session_struct.py b/hydroshare_jupyter_sync/session_struct.py new file mode 100644 index 00000000..0200450c --- /dev/null +++ b/hydroshare_jupyter_sync/session_struct.py @@ -0,0 +1,109 @@ +from dataclasses import dataclass +from hsclient import HydroShare +from watchdog.observers import Observer + +# type hint imports +from typing import Optional, Union +from pathlib import Path + +# lib imports +from .lib.filesystem.aggregate_fs_map import AggregateFSMap +from .lib.events.event_broker import EventBroker + +# local imports +from .fs_event_handler import fs_event_handler_factory +from .fs_events import Events +from .session_struct_interface import ISessionSyncStruct +from .session_sync_event_listeners import SessionSyncEventListeners + + +@dataclass +class SessionStruct: + session: Optional[HydroShare] = None + cookie: Optional[bytes] = None + id: Optional[int] = None + username: Optional[str] = None + + @classmethod + def create_empty(cls): + return cls() + + def __eq__(self, o: Union[bytes, "SessionStruct"]) -> bool: + """Check if self.cookie is equal to o. If self.cookie is none, False.""" + if self.cookie == None: + return False + if isinstance(o, SessionStruct): + return self.cookie == o.cookie + if not isinstance(o, bytes): + return False + return self.cookie == o + + +@dataclass +class SessionSyncStruct(ISessionSyncStruct): + @classmethod + def create_sync_struct( + cls, fs_root: Union[Path, str], hydroshare: HydroShare + ) -> "SessionSyncStruct": + # instantiate and populate local and remote FSMaps + agg_map = AggregateFSMap.create_map(fs_root, hydroshare) + + event_broker = EventBroker(Events) + # `event_broker` context given to each factory object + _event_handler_factory = fs_event_handler_factory(event_broker) + + # create and start observer thread + observer = Observer() + observer.start() + + # mapping of resource_id to application specific watchdog FileSystemEventHandler instance + fs_observers = dict() + for res_id, res in agg_map.local_map.items(): + # create a resource specific event handler + event_handler = _event_handler_factory(res) + + # bind handler to observer + watcher = observer.schedule( + event_handler, res.contents_path, recursive=True + ) + + fs_observers[res_id] = watcher + + # setup event listeners + SessionSyncEventListeners( + aggregate_fs_map=agg_map, + event_broker=event_broker, + observer=observer, + fs_observers=fs_observers, + event_handler_factory=_event_handler_factory, + ).setup_event_listeners() + + return cls( + aggregate_fs_map=agg_map, + event_broker=event_broker, + observer=observer, + fs_observers=fs_observers, + event_handler_factory=_event_handler_factory, + ) + + def shutdown(self) -> None: + # unsubscribe from all event + self._cleanup_event_broker() + + # cleanup observer: unschedule, stop, and rejoin thread + self._cleanup_observer() + + def _cleanup_event_broker(self) -> None: + """event broker cleanup logic""" + if self.event_broker is not None: + self.event_broker.unsubscribe_all() + + def _cleanup_observer(self) -> None: + """observer cleanup logic""" + if self.observer is not None: + # unschedule all observers + self.observer.unschedule_all() + + # join and stop observer + self.observer.stop() + self.observer.join() diff --git a/hydroshare_jupyter_sync/session_struct_interface.py b/hydroshare_jupyter_sync/session_struct_interface.py new file mode 100644 index 00000000..2b9112ba --- /dev/null +++ b/hydroshare_jupyter_sync/session_struct_interface.py @@ -0,0 +1,21 @@ +from dataclasses import dataclass + +# type hint imports +from typing import Callable, Dict, Optional +from watchdog.events import FileSystemEventHandler +from watchdog.observers import Observer +from .lib.events.event_broker import EventBroker +from .lib.filesystem.types import ResourceId +from .lib.filesystem.aggregate_fs_map import AggregateFSMap +from .lib.filesystem.fs_resource_map import LocalFSResourceMap + + +@dataclass +class ISessionSyncStruct: + aggregate_fs_map: Optional[AggregateFSMap] = None + event_broker: Optional[EventBroker] = None + observer: Optional[Observer] = None + fs_observers: Optional[Dict[ResourceId, FileSystemEventHandler]] = None + event_handler_factory: Optional[ + Callable[[LocalFSResourceMap], FileSystemEventHandler] + ] = None diff --git a/hydroshare_jupyter_sync/session_sync_event_listeners.py b/hydroshare_jupyter_sync/session_sync_event_listeners.py new file mode 100644 index 00000000..21d2915b --- /dev/null +++ b/hydroshare_jupyter_sync/session_sync_event_listeners.py @@ -0,0 +1,62 @@ +from dataclasses import dataclass + +# local imports +from .fs_events import Events +from .session_struct_interface import ISessionSyncStruct + + +@dataclass +class SessionSyncEventListeners(ISessionSyncStruct): + """Shim that encapsulates session sync struct event listener logic.""" + + def setup_event_listeners(self): + # event listeners + listeners = [ + (Events.RESOURCE_DOWNLOADED, self.resource_downloaded), + (Events.RESOURCE_ENTITY_DOWNLOADED, self.resource_entity_downloaded), + (Events.RESOURCE_ENTITY_UPLOADED, self.resource_uploaded), + ] + for event, listener in listeners: + self.event_broker.subscribe(event, listener) + + def resource_uploaded(self, resource_id) -> None: + # pull updated md5 checksums from HydroShare + self.aggregate_fs_map.remote_map.update_resource(resource_id) + + def resource_downloaded(self, resource_id) -> None: + # if resource already in agg map, just update resource in local map + if resource_id in self.aggregate_fs_map.local_map: + self.aggregate_fs_map.local_map.update_resource(resource_id) + + else: + self._add_resource_to_agg_map_and_create_watcher(resource_id) + + def resource_entity_downloaded(self, resource_id) -> None: + # if resource already in agg map, add resource file + if resource_id in self.aggregate_fs_map.local_map: + # TODO: `add_resource_file` is not method on `AggregateFSMap`. For now, both local and + # remote resources stored in the `AggregateFSMap` instance are updated. this is + # computationally burdensome and not necessary, but has desirable guarantees. a proper + # solution should be implemented in the future. + self.aggregate_fs_map.update_resource(resource_id) + # self.aggregate_fs_map.add_resource_file(resource_id) + + else: + self._add_resource_to_agg_map_and_create_watcher(resource_id) + + def _add_resource_to_agg_map_and_create_watcher(self, resource_id): + self.aggregate_fs_map.add_resource(resource_id) + + if resource_id not in self.fs_observers: + # get local resource object + res = self.aggregate_fs_map.local_map[resource_id] + + # create a resource specific event handler + event_handler = self.event_handler_factory(res) + + # bind handler to observer + watcher = self.observer.schedule( + event_handler, res.contents_path, recursive=True + ) + + self.fs_observers[resource_id] = watcher diff --git a/hydroshare_jupyter_sync/templates/root.html b/hydroshare_jupyter_sync/templates/root.html new file mode 100644 index 00000000..9a4afc6c --- /dev/null +++ b/hydroshare_jupyter_sync/templates/root.html @@ -0,0 +1,34 @@ +{% from notebook.utils import url_path_join %} + + + + + + + + + {% block title %} CUAHSI Compute Sync {% end %} + + + +
{% comment React app is rendered from here%}
+ + + + diff --git a/hydroshare_jupyter_sync/utilities/__init__.py b/hydroshare_jupyter_sync/utilities/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/hydroshare_jupyter_sync/utilities/pathlib_utils.py b/hydroshare_jupyter_sync/utilities/pathlib_utils.py new file mode 100644 index 00000000..04a5e533 --- /dev/null +++ b/hydroshare_jupyter_sync/utilities/pathlib_utils.py @@ -0,0 +1,78 @@ +from pathlib import Path + +# type hints imports +from typing import Union, List, Tuple + + +def expand_and_resolve(p: Union[str, Path]) -> Path: + """Given a string or Path, expand the user (~) and resolve the absolute path. + + Parameters + ---------- + p : Union[str, Path] + string or path + + Returns + ------- + Path + """ + return Path(p).expanduser().resolve() + + +def expand_and_resolve_path_to_posix(p: Union[str, Path]) -> str: + """Given a string or Path, expand the user (~) and resolve the absolute path and + return it's POSIX string representation. + + Parameters + ---------- + p : Union[str, Path] + string or path + + Returns + ------- + str + POSIX absolute path + """ + return expand_and_resolve(p).as_posix() + + +def is_descendant(child: Union[str, Path], parent: Union[str, Path]) -> bool: + """Determine if a child path is descendant of some parent path. + + Parameters + ---------- + child : Union[str, Path] + parent : Union[str, Path] + + Returns + ------- + bool + Is child of parent? + """ + child = expand_and_resolve_path_to_posix(child) + parent = expand_and_resolve_path_to_posix(parent) + + return child.startswith(parent) + + +def first_existing_file(file_paths: Union[List, Tuple]) -> Union[str, None]: + """Given a list or tuple of file paths, return the absolute path of the first file + (element wise) that exists; None if no files in the passed collection exist. Paths + are tilde expanded and resolved to their absolute form. + + Parameters + ---------- + file_paths : Union[List, Tuple] + List of relative or absolute file paths + + Returns + ------- + Union[str, None] + File absolute path or None + """ + for path in file_paths: + resolved_path = expand_and_resolve(path) + if resolved_path.exists(): + return str(resolved_path) + + return None diff --git a/hydroshare_jupyter_sync/websocket_handler.py b/hydroshare_jupyter_sync/websocket_handler.py new file mode 100644 index 00000000..aa4b4aa6 --- /dev/null +++ b/hydroshare_jupyter_sync/websocket_handler.py @@ -0,0 +1,79 @@ +from functools import partial +from tornado.websocket import WebSocketHandler +from http import HTTPStatus +import logging +import asyncio + +from .server import SessionMixIn + +# event types +from .fs_events import Events + +# from .session import event_broker, session_sync_struct as session +from .session import session_sync_struct as session + + +class FileSystemEventWebSocketHandler(SessionMixIn, WebSocketHandler): + def prepare(self): + # get current running event loop in main thread + self.loop = asyncio.get_event_loop() + + if not self.get_client_server_cookie_status(): + self.set_status(HTTPStatus.FOUND) # 302 + # append requested uri as `next` Location parameter + uri = self.request.uri + self.redirect(f"{self.get_login_url()}?next={uri}") + + def open(self, *args, **kwargs): + # ignore args and kwargs + + # send initial state/status + message = session.aggregate_fs_map.get_sync_state().json() + logging.debug(message) + self.write_message(message) + + # subscribe to FSEvents + logging.debug("subscribing to events") + self._subscribe_to_events() + + def on_message(self, message): + # message handler + logging.debug(message) + + def on_close(self): + # unsubscribe to FSEvents + logging.debug("unsubscribing from events") + self._unsubscribe_from_events() + + def _subscribe_to_events(self): + session.event_broker.subscribe(Events.STATUS, self._get_resource_status) + session.event_broker.subscribe( + Events.RESOURCE_ENTITY_UPLOADED, self._get_resource_status + ) + session.event_broker.subscribe( + Events.RESOURCE_DOWNLOADED, self._get_resource_status + ) + session.event_broker.subscribe( + Events.RESOURCE_ENTITY_DOWNLOADED, self._get_resource_status + ) + + def _unsubscribe_from_events(self): + session.event_broker.subscribe(Events.STATUS, self._get_resource_status) + session.event_broker.subscribe( + Events.RESOURCE_ENTITY_UPLOADED, self._get_resource_status + ) + session.event_broker.subscribe( + Events.RESOURCE_DOWNLOADED, self._get_resource_status + ) + session.event_broker.subscribe( + Events.RESOURCE_ENTITY_DOWNLOADED, self._get_resource_status + ) + + def _get_resource_status(self, res_id: str) -> str: + """Write json stringified resource sync state""" + # NOTE: It is possible for aggregate_fs_map to be None if the user has not logged in. + # this state should not occur if the user is logged in. + message = session.aggregate_fs_map.get_resource_sync_state(res_id).json() + logging.info(message) + call = partial(self.write_message, message) + self.loop.call_soon_threadsafe(call) diff --git a/pytest.ini b/pytest.ini new file mode 100644 index 00000000..6dd49a68 --- /dev/null +++ b/pytest.ini @@ -0,0 +1,4 @@ +[pytest] +log_cli = True +log_cli_level = INFO +log_cli_format = %(levelname)s %(message)s diff --git a/setup.py b/setup.py index 9901ad31..9d6d7b13 100644 --- a/setup.py +++ b/setup.py @@ -36,14 +36,17 @@ # Package dependency requirements REQUIREMENTS = [ - "hs-restclient", + "hs-restclient", # TODO: remove hs-restclient + "hsclient", "jupyterlab", "notebook", "requests", + "pydantic[dotenv]", + "watchdog", ] # Development requirements -DEVELOPMENT_REQUIREMENTS = ["pytest"] +DEVELOPMENT_REQUIREMENTS = ["pytest", "pytest-tornado"] SHORT_DESCRIPTION = "A web app and server for syncing files between the local filesystem and HydroShare. Can run as a Jupyter notebook extension." diff --git a/tests/test_aggregate_fs_map.py b/tests/test_aggregate_fs_map.py new file mode 100644 index 00000000..869cf58c --- /dev/null +++ b/tests/test_aggregate_fs_map.py @@ -0,0 +1,39 @@ +import pytest +import os +from hsclient import HydroShare +from pathlib import Path +from pydantic import BaseSettings, Field + +from hydroshare_jupyter_sync.lib.filesystem.aggregate_fs_map import AggregateFSMap + + +def get_env_file_path() -> Path: + env_file = Path(__file__).resolve().parent.parent + return env_file / ".env" + + +class HydroShareCreds(BaseSettings): + username: str = Field(..., env="HYDRO_USERNAME") + password: str = Field(..., env="HYDRO_PASSWORD") + + class Config: + env_file = get_env_file_path() + env_file_encoding = "utf-8" + + +@pytest.fixture +def hydroshare(): + creds = HydroShareCreds() + hs = HydroShare(**creds.dict()) + return hs + + +@pytest.fixture +def test_dir(): + return Path(__file__).parent.resolve() + + +def test_create_aggregate_fs_map(hydroshare, test_dir): + fs_root = test_dir + agg_map = AggregateFSMap.create_map(fs_root, hydroshare) + print(agg_map.get_sync_state()) diff --git a/tests/test_api_models.py b/tests/test_api_models.py new file mode 100644 index 00000000..beaf446d --- /dev/null +++ b/tests/test_api_models.py @@ -0,0 +1,62 @@ +import pytest +import pydantic +from hydroshare_jupyter_sync.models import api_models as m + + +@pytest.fixture +def resource_metadata(): + return m.ResourceMetadata( + resource_type="resource", + resource_title="title", + resource_id="42", + immutable=True, + resource_url="www.fake.org", + date_created="2021-01-01", + date_last_updated="2021-01-02", + ) + + +def test_collection_of_resource_metadata(resource_metadata): + metadata = resource_metadata + metadata_dict = metadata.dict() + + assert m.CollectionOfResourceMetadata.parse_obj([metadata, metadata, metadata]) + assert m.CollectionOfResourceMetadata.parse_obj([metadata]) + # test as dictionaries + assert m.CollectionOfResourceMetadata.parse_obj( + [metadata_dict, metadata_dict, metadata_dict] + ) + assert m.CollectionOfResourceMetadata.parse_obj([metadata_dict]) + + +def test_collection_of_resource_metadata_raises(resource_metadata): + metadata = resource_metadata + + metadata_subset = { + "resource_type": "resource", + "resource_title": "title", + } + + with pytest.raises(pydantic.error_wrappers.ValidationError): + assert m.CollectionOfResourceMetadata.parse_obj([metadata, metadata_subset]) + + +def test_resource_files_should_pass(): + data = {"files": ["/data", "/data/contents", ""]} + m.ResourceFiles(**data) + + +RESOURCE_FILES_SHOULD_FAIL_CASES = [ + (["~"]), + (""), + (["../"]), + (["/fake/file/../../"]), + (["/something~"]), +] + + +@pytest.mark.parametrize("test_data", RESOURCE_FILES_SHOULD_FAIL_CASES) +def test_resource_files_should_fail(test_data): + data = {"files": test_data} + with pytest.raises(pydantic.ValidationError): + m.ResourceFiles(**data) diff --git a/tests/test_config.py b/tests/test_config.py new file mode 100644 index 00000000..748ec145 --- /dev/null +++ b/tests/test_config.py @@ -0,0 +1,46 @@ +import pytest +from hydroshare_jupyter_sync.config_setup import ConfigFile, FileNotDirectoryError +from tempfile import TemporaryDirectory +from pathlib import Path + + +def test_config_file(): + with TemporaryDirectory() as temp: + log = Path(temp) / "logs" + ConfigFile(data_path=temp, log_path=log) + + +def test_config_log_is_file(): + with pytest.raises(FileNotDirectoryError): + with TemporaryDirectory() as temp: + log = Path(temp) / "logs" + log.touch() + ConfigFile(data_path=temp, log_path=log) + + +def test_config_using_env_vars(monkeypatch): + """Test configuration using environment variables""" + with TemporaryDirectory() as temp: + log = Path(temp) / "logs" + monkeypatch.setenv("DATA", str(temp)) + monkeypatch.setenv("LOG", str(log)) + c = ConfigFile() + assert str(c.data_path) == str(temp) + assert str(c.log_path) == str(log) + + +def test_config_using_env_file(): + """Test configuration using environment variables""" + with TemporaryDirectory() as temp: + log = Path(temp) / "logs" + env_contents = f""" + DATA={temp} + LOG={log} + """ + env_file = Path(temp) / "env" + with open(env_file, "w") as f: + f.write(env_contents) + + c = ConfigFile(_env_file=env_file) + assert str(c.data_path) == str(temp) + assert str(c.log_path) == str(log) diff --git a/tests/test_fs_resource_map.py b/tests/test_fs_resource_map.py new file mode 100644 index 00000000..4f2985e9 --- /dev/null +++ b/tests/test_fs_resource_map.py @@ -0,0 +1,137 @@ +from tempfile import TemporaryDirectory +from pathlib import Path +import random +import string +from hashlib import md5 +import pytest +from typing import Tuple, NewType + +from hydroshare_jupyter_sync.lib.filesystem.fs_resource_map import LocalFSResourceMap + +# type declarations +ResourcePath = NewType("ResourcePath", Path) +DataDir = NewType("DataDir", Path) + + +# helper functions +def random_string(size: int = 12, chars: str = string.ascii_letters + string.digits): + # from https://stackoverflow.com/a/2257449 + return "".join(random.choice(chars) for _ in range(size)) + + +def create_resource_id(random_string_size: int = 12) -> str: + rand_bytes = random_string(size=random_string_size).encode() + return md5(rand_bytes).hexdigest() + + +# fixtures +@pytest.fixture +def resource_mock() -> Tuple[ResourcePath, DataDir]: + resource_name = create_resource_id() + + with TemporaryDirectory() as temp_dir: + # resolve symlinks. without, causes issues on mac b.c. /var is symlinked to /private/var + temp_dir = Path(temp_dir).resolve() + + # create intermediate data dir following bagit layout + contents_dir = temp_dir / resource_name / resource_name / "data" / "contents" + contents_dir.mkdir(parents=True) + + yield temp_dir / resource_name, contents_dir + + +# tests +def test_local_fs_resource_map(resource_mock): + rdir, contents_path = resource_mock + fsmap = LocalFSResourceMap(rdir) + + assert fsmap.base_directory == rdir / rdir.name + assert fsmap.contents_path == contents_path + + +def test_local_fs_resource_map_add_file(resource_mock): + rdir, data_dir = resource_mock + fsmap = LocalFSResourceMap(rdir) + + # create test file + fn = "test" + test_file = data_dir / fn + test_file.touch() + + relative_location = f"data/contents/{fn}" + + assert Path(relative_location) not in fsmap + fsmap.add_file(test_file) + assert relative_location not in fsmap + assert Path(relative_location) in fsmap + + +def test_local_fs_resource_map_delete_file(resource_mock): + rdir, data_dir = resource_mock + fsmap = LocalFSResourceMap(rdir) + + # create test file + fn = "test" + test_file = data_dir / fn + test_file.touch() + + relative_location = f"data/contents/{fn}" + + fsmap.add_file(test_file) + assert Path(relative_location) in fsmap + fsmap.delete_file(relative_location) + assert Path(relative_location) not in fsmap + + +def test_local_fs_resource_map_update_file(resource_mock): + rdir, data_dir = resource_mock + fsmap = LocalFSResourceMap(rdir) + + # create test file + fn = "test" + test_file = data_dir / fn + test_file.touch() + + relative_location = f"data/contents/{fn}" + p_relative_location = Path(relative_location) + + fsmap.add_file(test_file) + assert p_relative_location in fsmap + + # current md5 + file_hash = fsmap[p_relative_location] + + # write new stuff to file + test_file.write_text("some test data") + + # try to update the file using incorrect relative location + fsmap.update_file(fn) + assert file_hash == fsmap[p_relative_location] + + # try to update the file using incorrect absolute location + fsmap.update_file(f"/{fn}") + assert file_hash == fsmap[p_relative_location] + + fsmap.update_file(relative_location) + new_file_hash = fsmap[p_relative_location] + assert file_hash != new_file_hash + + +def test_local_fs_resource_map_update_resource(resource_mock): + rdir, data_dir = resource_mock + + # create test file + files = [] + for i in range(10): + fn = f"test_{i}" + # Create test file + test_file = data_dir / fn + test_file.touch() + files.append(test_file) + + fsmap = LocalFSResourceMap.from_resource_path(rdir) + for i in range(5): + fsmap.delete_file(files[i]) + files.pop(i) + + assert len(fsmap.files) == len(files) diff --git a/tests/test_pathlib_utils.py b/tests/test_pathlib_utils.py new file mode 100644 index 00000000..656c79ee --- /dev/null +++ b/tests/test_pathlib_utils.py @@ -0,0 +1,42 @@ +import pytest +import os + +from hydroshare_jupyter_sync.utilities import pathlib_utils + +TEST_EXPAND_AND_RESOLVE_CASES = [ + ("~/test", "/test_user/test", "/test_user"), + ("~/test/another/test", "/test_user/test/another/test", "/test_user"), +] + + +@pytest.mark.parametrize("test,validation,user", TEST_EXPAND_AND_RESOLVE_CASES) +def test_expand_and_resolve(test, validation, user): + # mock user. See below documentation for cross-platform support + # https://docs.python.org/3/library/os.path.html#os.path.expanduser + + # unix-like os use `HOME`. Windows use `USERPROFILE` + os.environ["HOME"] = os.environ["USERPROFILE"] = user + + assert str(pathlib_utils.expand_and_resolve(test)) == validation + + +TEST_IS_DESCENDANT = [ + # child, parent, validation + ("~/test/some-file", "~/test", True), + ("/test/some-file", "/test", True), + ("~/test/../tests/some-file", "~/test", True), + ("/test/../tests/some-file/..", "/test", True), + ("/test/../some-file/..", "/test", False), + ("~/test/../", "~/test", False), + ("/test/../", "/test", False), + ("~/../test/../", "~", False), +] + + +@pytest.mark.parametrize("child,parent,validation", TEST_IS_DESCENDANT) +def test_is_descendant(child, parent, validation): + # See test_expand_and_resolve for explanation + os.environ["HOME"] = os.environ["USERPROFILE"] = "/user" + + result = pathlib_utils.is_descendant(child, parent) + assert result is validation diff --git a/tests/test_server.py b/tests/test_server.py index 2be38e26..01cf54c5 100644 --- a/tests/test_server.py +++ b/tests/test_server.py @@ -1,40 +1,192 @@ -""" -This file sets up tornado unit tests to test the hydroshare server -Author: 2019-20 CUAHSI Olin SCOPE Team -""" -from hydroshare_jupyter_sync.server import TestApp, get_route_handlers -from tornado.testing import AsyncHTTPTestCase, gen_test -import unittest - -import nest_asyncio -nest_asyncio.apply() - - -class TestHSServer(AsyncHTTPTestCase): - """ Create an instance of the server for running tests """ - def get_app(self): - return TestApp(get_route_handlers('/', '/syncApi')) - - @gen_test - def test_user(self): - """ Tests get user info functionality """ - response = self.fetch(r"/syncApi/user") - self.assertEqual(response.code, 200) - # make sure the string "username" is in the response - un = 'username' - un = un.encode('ascii') - self.assertIn(un, response.body) - - @gen_test - def test_resources(self): - """ Tests get user resources functionality """ - response = self.fetch(r"/syncApi/resources") - self.assertEqual(response.code, 200) - # make sure the string "resources" is in the response - res = 'resources' - res = res.encode('ascii') - self.assertIn(res, response.body) - - -if __name__ == '__main__': - unittest.main() +from tornado.httpclient import HTTPRequest, HTTPResponse +from tornado.httputil import HTTPHeaders +from hsclient import HydroShare +from hydroshare_jupyter_sync.__main__ import get_test_app +from hydroshare_jupyter_sync.hydroshare_resource_cache import ( + HydroShareWithResourceCache, +) +from hydroshare_jupyter_sync.server import LocalResourceEntityHandler +from hydroshare_jupyter_sync.session import _SessionSyncSingleton +import json +import pytest +from dataclasses import dataclass +from http.cookies import SimpleCookie, Morsel +from typing import Union + + +def my_user_info_mock(*args, **kwargs): + return {"id": 42, "username": "test"} + + +def get_user_cookie(headers: HTTPHeaders) -> Union[Morsel, None]: + cookies = headers.get_list("set-cookie") + cookie = SimpleCookie() + if cookies: + cookie.load(cookies[0]) + user_cookie = cookie.get("user") + if user_cookie is not None: + return f"{user_cookie.key}={user_cookie.value};" + return None + + +def get_user_cookie_from_http_response(response: HTTPResponse): + response_cookie = get_user_cookie(response.headers) + return response_cookie + + +@pytest.fixture +def app(): + return get_test_app() + + +@pytest.fixture +async def mocked_login_session(http_client, base_url, monkeypatch): + body = {"username": "test", "password": "test"} + monkeypatch.setattr(HydroShare, "my_user_info", my_user_info_mock) + monkeypatch.setattr(HydroShareWithResourceCache, "my_user_info", my_user_info_mock) + monkeypatch.setattr(_SessionSyncSingleton, "is_empty", False) + + req = HTTPRequest( + base_url + "/syncApi/login", + method="POST", + body=json.dumps(body), + headers={"content-type": "application/json"}, + ) + response = await http_client.fetch(req) + return response + + +@pytest.mark.gen_test +async def test_login(mocked_login_session): + response = await mocked_login_session + assert json.loads(response.body.decode("utf-8")) == {"success": True} + + +@pytest.mark.gen_test +async def test_login_then_login_with_another_account( + mocked_login_session, http_client, base_url +): + response = await mocked_login_session # type: HTTPResponse + assert json.loads(response.body.decode("utf-8")) == {"success": True} + + # first login was successful, so subsequent calls to login ignore passed credentials + body = {"username": "", "password": ""} + response_cookie = get_user_cookie_from_http_response(response) + wrapper_cookie = {"Cookie": response_cookie, "content-type": "application/json"} + + req = HTTPRequest( + base_url + "/syncApi/login", + method="POST", + body=json.dumps(body), + headers=wrapper_cookie, + ) + + response = await http_client.fetch(req) + assert json.loads(response.body.decode("utf-8")) == {"success": True} + + # should not get a response header if user is already signed in + assert get_user_cookie_from_http_response(response) == None + + +@pytest.mark.gen_test +async def test_valid_logout(mocked_login_session, http_client, base_url): + response = await mocked_login_session # type: HTTPResponse + response_cookie = get_user_cookie_from_http_response(response) + wrapper_cookie = {"Cookie": response_cookie, "content-type": "application/json"} + + req = HTTPRequest( + base_url + "/syncApi/login", + method="DELETE", + headers=wrapper_cookie, + ) + response = await http_client.fetch(req) + # assert successfully logged out + assert response.code == 200 + + body = {"username": "", "password": ""} + + req = HTTPRequest( + base_url + "/syncApi/login", + method="POST", + body=json.dumps(body), + headers=wrapper_cookie, + ) + response = await http_client.fetch(req) + + second_response_cookie = get_user_cookie_from_http_response(response) + + # assert request after logout created new cookie + assert response_cookie != second_response_cookie + + +@pytest.mark.gen_test +async def test_invalid_logout(http_client, base_url): + req = HTTPRequest( + base_url + "/syncApi/login", + method="DELETE", + ) + response = await http_client.fetch(req, raise_error=False) + # assert an UNAUTHORIZED status + assert response.code == 401 + + +@pytest.mark.gen_test +async def test_redirect_to_login_if_not_logged_in(http_client, base_url): + # In theory, each endpoint that is not the login endpoint should be checked + login_url = "/syncApi/login" + uri = "/syncApi/user" + req = HTTPRequest( + base_url + uri, + method="GET", + # disable redirects + follow_redirects=False, + ) + response = await http_client.fetch(req, raise_error=False) # type: HTTPResponse + + # assert FOUND status code + assert response.code == 302 + # assert redirect to login prepended to uri request + assert response.headers["location"] == f"{login_url}?next={uri}" + + +TRUNCATE_BAGGIT_PREFIX_TEST_DATA = [ + # Tuple[test: str, validation: str] + ("data/contents/file", "file"), + ("/data/contents/file", "file"), + # TODO: Fix BAGGIT_PREFIX_RE regex to cover this case. + ("/data/contentsfile", "file"), + ("/data/contents/dir/file", "dir/file"), +] + + +@pytest.mark.parametrize("test,validation", TRUNCATE_BAGGIT_PREFIX_TEST_DATA) +def test__truncate_baggit_prefix(test, validation): + + assert LocalResourceEntityHandler._truncate_baggit_prefix(test) == validation + + +BAGGIT_PREFIX = LocalResourceEntityHandler.BAGGIT_PREFIX +TRUNCATE_BAGGIT_PREPEND_TEST_DATA = [ + # Tuple[test: str, validation: str] + ( + f"{BAGGIT_PREFIX}file", + f"{BAGGIT_PREFIX}file", + ), + ( + f"{BAGGIT_PREFIX}dir/file", + f"{BAGGIT_PREFIX}dir/file", + ), + ( + "file", + f"{BAGGIT_PREFIX}file", + ), + ( + "dir/file", + f"{BAGGIT_PREFIX}dir/file", + ), +] + + +@pytest.mark.parametrize("test,validation", TRUNCATE_BAGGIT_PREPEND_TEST_DATA) +def test__prepend_baggit_prefix(test, validation): + assert LocalResourceEntityHandler._prepend_baggit_prefix(test) == validation diff --git a/tests/test_session_struct.py b/tests/test_session_struct.py new file mode 100644 index 00000000..d40aa236 --- /dev/null +++ b/tests/test_session_struct.py @@ -0,0 +1,21 @@ +import pytest +from hydroshare_jupyter_sync.session_struct import SessionStruct + + +@pytest.fixture +def empty_session_struct(): + return SessionStruct(session=None, cookie=None, id=None, username=None) + + +@pytest.fixture +def empty_session_with_cookie_struct(): + return SessionStruct(session=None, cookie=b"test", id=42, username="test") + + +def test_eq_empty(empty_session_struct): + assert empty_session_struct != empty_session_struct + + +def test_eq(empty_session_with_cookie_struct): + assert empty_session_with_cookie_struct == empty_session_with_cookie_struct + assert empty_session_with_cookie_struct == b"test"