Skip to content

Commit

Permalink
Emit events from the Contents Service (#954)
Browse files Browse the repository at this point in the history
* emit events from the contents manager

* remove unused imports

* return default event_logger in contents manager

* remove unused actions
  • Loading branch information
Zsailer authored Aug 31, 2022
1 parent cc7e6c4 commit 6dc7d53
Show file tree
Hide file tree
Showing 6 changed files with 124 additions and 2 deletions.
3 changes: 3 additions & 0 deletions jupyter_server/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
"""The Jupyter Server"""
import os
import pathlib
import subprocess
import sys

Expand All @@ -10,6 +11,8 @@
]

DEFAULT_JUPYTER_SERVER_PORT = 8888
JUPYTER_SERVER_EVENTS_URI = "https://events.jupyter.org/jupyter_server"
DEFAULT_EVENTS_SCHEMA_PATH = pathlib.Path(__file__).parent / "event_schemas"

del os

Expand Down
73 changes: 73 additions & 0 deletions jupyter_server/event_schemas/contents_service/v1.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
"$id": https://events.jupyter.org/jupyter_server/contents_service/v1
version: 1
title: Contents Manager activities
personal-data: true
description: |
Record actions on files via the ContentsManager.
The notebook ContentsManager REST API is used by all frontends to retreive,
save, list, delete and perform other actions on notebooks, directories,
and other files through the UI. This is pluggable - the default acts on
the file system, but can be replaced with a different ContentsManager
implementation - to work on S3, Postgres, other object stores, etc.
The events get recorded regardless of the ContentsManager implementation
being used.
Limitations:
1. This does not record all filesystem access, just the ones that happen
explicitly via the notebook server's REST API. Users can (and often do)
trivially access the filesystem in many other ways (such as `open()` calls
in their code), so this is usually never a complete record.
2. As with all events recorded by the notebook server, users most likely
have the ability to modify the code of the notebook server. Unless other
security measures are in place, these events should be treated as user
controlled and not used in high security areas.
3. Events are only recorded when an action succeeds.
type: object
required:
- action
- path
properties:
action:
enum:
- get
- create
- save
- upload
- rename
- copy
- delete
description: |
Action performed by the ContentsManager API.
This is a required field.
Possible values:
1. get
Get contents of a particular file, or list contents of a directory.
2. save
Save a file at path with contents from the client
3. rename
Rename a file or directory from value in source_path to
value in path.
4. copy
Copy a file or directory from value in source_path to
value in path.
5. delete
Delete a file or empty directory at given path
path:
type: string
description: |
Logical path on which the operation was performed.
This is a required field.
source_path:
type: string
description: |
Source path of an operation when action is 'copy' or 'rename'
15 changes: 15 additions & 0 deletions jupyter_server/serverapp.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,9 +83,11 @@
from traitlets.config.application import boolean_flag, catch_config_error

from jupyter_server import (
DEFAULT_EVENTS_SCHEMA_PATH,
DEFAULT_JUPYTER_SERVER_PORT,
DEFAULT_STATIC_FILES_PATH,
DEFAULT_TEMPLATE_PATH_LIST,
JUPYTER_SERVER_EVENTS_URI,
__version__,
)
from jupyter_server._sysinfo import get_sys_info
Expand Down Expand Up @@ -1951,6 +1953,19 @@ def init_logging(self):
def init_event_logger(self):
"""Initialize the Event Bus."""
self.event_logger = EventLogger(parent=self)
# Load the core Jupyter Server event schemas
# All event schemas must start with Jupyter Server's
# events URI, `JUPYTER_SERVER_EVENTS_URI`.
schema_ids = [
"https://events.jupyter.org/jupyter_server/contents_service/v1",
]
for schema_id in schema_ids:
# Get the schema path from the schema ID.
rel_schema_path = schema_id.lstrip(JUPYTER_SERVER_EVENTS_URI) + ".yaml"
schema_path = DEFAULT_EVENTS_SCHEMA_PATH / rel_schema_path
# Use this pathlib object to register the schema
# breakpoint()
self.event_logger.register_event_schema(schema_path)

def init_webapp(self):
"""initialize tornado webapp"""
Expand Down
6 changes: 4 additions & 2 deletions jupyter_server/services/contents/filemanager.py
Original file line number Diff line number Diff line change
Expand Up @@ -395,6 +395,7 @@ def get(self, path, content=True, type=None, format=None):
if type == "directory":
raise web.HTTPError(400, "%s is not a directory" % path, reason="bad type")
model = self._file_model(path, content=content, format=format)
self.emit(data={"action": "get", "path": path})
return model

def _save_directory(self, os_path, model, path=""):
Expand Down Expand Up @@ -459,7 +460,7 @@ def save(self, model, path=""):
model["message"] = validation_message

self.run_post_save_hooks(model=model, os_path=os_path)

self.emit(data={"action": "save", "path": path})
return model

def delete_file(self, path):
Expand Down Expand Up @@ -735,6 +736,7 @@ async def get(self, path, content=True, type=None, format=None):
if type == "directory":
raise web.HTTPError(400, "%s is not a directory" % path, reason="bad type")
model = await self._file_model(path, content=content, format=format)
self.emit(data={"action": "get", "path": path})
return model

async def _save_directory(self, os_path, model, path=""):
Expand Down Expand Up @@ -795,7 +797,7 @@ async def save(self, model, path=""):
model["message"] = validation_message

self.run_post_save_hooks(model=model, os_path=os_path)

self.emit(data={"action": "save", "path": path})
return model

async def delete_file(self, path):
Expand Down
3 changes: 3 additions & 0 deletions jupyter_server/services/contents/largefilemanager.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@ def save(self, model, path=""):
# Last chunk
if chunk == -1:
self.run_post_save_hooks(model=model, os_path=os_path)
self.emit(data={"action": "save", "path": path})
return model
else:
return super().save(model, path)
Expand Down Expand Up @@ -125,6 +126,8 @@ async def save(self, model, path=""):
# Last chunk
if chunk == -1:
self.run_post_save_hooks(model=model, os_path=os_path)

self.emit(data={"action": "save", "path": path})
return model
else:
return await super().save(model, path)
Expand Down
26 changes: 26 additions & 0 deletions jupyter_server/services/contents/manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
import warnings
from fnmatch import fnmatch

from jupyter_events import EventLogger
from nbformat import ValidationError, sign
from nbformat import validate as validate_nb
from nbformat.v4 import new_notebook
Expand All @@ -25,6 +26,7 @@
)
from traitlets.config.configurable import LoggingConfigurable

from jupyter_server import DEFAULT_EVENTS_SCHEMA_PATH, JUPYTER_SERVER_EVENTS_URI
from jupyter_server.transutils import _i18n
from jupyter_server.utils import ensure_async, import_item

Expand Down Expand Up @@ -53,6 +55,24 @@ class ContentsManager(LoggingConfigurable):
"""

event_schema_id = JUPYTER_SERVER_EVENTS_URI + "/contents_service/v1"
event_logger = Instance(EventLogger).tag(config=True)

@default("event_logger")
def _default_event_logger(self):
if self.parent and hasattr(self.parent, "event_logger"):
return self.parent.event_logger
else:
# If parent does not have an event logger, create one.
logger = EventLogger()
schema_path = DEFAULT_EVENTS_SCHEMA_PATH / "contents_service" / "v1.yaml"
logger.register_event_schema(schema_path)
return logger

def emit(self, data):
"""Emit event using the core event schema from Jupyter Server's Contents Manager."""
self.event_logger.emit(schema_id=self.event_schema_id, data=data)

root_dir = Unicode("/", config=True)

allow_hidden = Bool(False, config=True, help="Allow access to hidden files")
Expand Down Expand Up @@ -416,11 +436,13 @@ def delete(self, path):
raise HTTPError(400, "Can't delete root")
self.delete_file(path)
self.checkpoints.delete_all_checkpoints(path)
self.emit(data={"action": "delete", "path": path})

def rename(self, old_path, new_path):
"""Rename a file and any checkpoints associated with that file."""
self.rename_file(old_path, new_path)
self.checkpoints.rename_all_checkpoints(old_path, new_path)
self.emit(data={"action": "rename", "path": new_path, "source_path": old_path})

def update(self, model, path):
"""Update the file's path
Expand Down Expand Up @@ -616,6 +638,7 @@ def copy(self, from_path, to_path=None):
raise HTTPError(404, "No such directory: %s" % to_path)

model = self.save(model, to_path)
self.emit(data={"action": "copy", "path": to_path, "source_path": from_path})
return model

def log_info(self):
Expand Down Expand Up @@ -819,11 +842,13 @@ async def delete(self, path):

await self.delete_file(path)
await self.checkpoints.delete_all_checkpoints(path)
self.emit(data={"action": "delete", "path": path})

async def rename(self, old_path, new_path):
"""Rename a file and any checkpoints associated with that file."""
await self.rename_file(old_path, new_path)
await self.checkpoints.rename_all_checkpoints(old_path, new_path)
self.emit(data={"action": "rename", "path": new_path, "source_path": old_path})

async def update(self, model, path):
"""Update the file's path
Expand Down Expand Up @@ -985,6 +1010,7 @@ async def copy(self, from_path, to_path=None):
raise HTTPError(404, "No such directory: %s" % to_path)

model = await self.save(model, to_path)
self.emit(data={"action": "copy", "path": to_path, "source_path": from_path})
return model

async def trust_notebook(self, path):
Expand Down

0 comments on commit 6dc7d53

Please sign in to comment.