Skip to content

Commit

Permalink
Run all notebooks in all dirs, add exclude option
Browse files Browse the repository at this point in the history
Remove magic notebook exclude dir
fajpunk committed May 20, 2024
1 parent dfa2141 commit 26a6535
Showing 10 changed files with 684 additions and 23 deletions.
6 changes: 6 additions & 0 deletions changelog.d/20240520_113001_danfuchs_notebook_directories.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
<!-- Delete the sections that don't apply -->

### Backwards-incompatible changes

- NotebookRunner business now runs all notebooks in a repo, at tht root and in all subdirs recursively, by default.
- Add `exclude_dirs` option to NotebookRunner business to list directories in which notebooks will not be run.
12 changes: 12 additions & 0 deletions src/mobu/models/business/notebookrunner.py
Original file line number Diff line number Diff line change
@@ -2,6 +2,7 @@

from __future__ import annotations

from pathlib import Path
from typing import Literal

from pydantic import Field
@@ -48,6 +49,17 @@ class NotebookRunnerOptions(NubladoBusinessOptions):
description="Only used by the NotebookRunner",
)

exclude_dirs: set[Path] = Field(
set(),
title="Any notebooks in these directories will not be run",
description=(
" These directories are relative to the repo root. Any notebooks"
" in child directories of these directories will also be excluded."
" Only used by the NotebookRunner."
),
examples=["some-dir", "some-dir/some-other-dir"],
)


class NotebookRunnerConfig(BusinessConfig):
"""Configuration specialization for NotebookRunner."""
12 changes: 11 additions & 1 deletion src/mobu/services/business/notebookrunner.py
Original file line number Diff line number Diff line change
@@ -57,6 +57,7 @@ def __init__(
self._notebook: Path | None = None
self._notebook_paths: list[Path] | None = None
self._repo_dir: Path | None = None
self._exclude_paths: set[Path] = set()
self._running_code: str | None = None
self._git = Git(logger=logger)

@@ -72,6 +73,9 @@ async def startup(self) -> None:
if self._repo_dir is None:
self._repo_dir = Path(TemporaryDirectory().name)
await self.clone_repo()
self._exclude_paths = {
(self._repo_dir / path) for path in self.options.exclude_dirs
}
self._notebook_paths = self.find_notebooks()
self.logger.info("Repository cloned and ready")
await super().startup()
@@ -87,14 +91,20 @@ async def clone_repo(self) -> None:
with self.timings.start("clone_repo"):
await self._git.clone("-b", branch, url, str(self._repo_dir))

def is_excluded(self, notebook: Path) -> bool:
# A notebook is excluded if any of its parent directories are excluded
return bool(set(notebook.parents) & self._exclude_paths)

def find_notebooks(self) -> list[Path]:
with self.timings.start("find_notebooks"):
if self._repo_dir is None:
raise NotebookRepositoryError(
"Repository directory must be set", self.user.username
)
notebooks = [
p for p in self._repo_dir.iterdir() if p.suffix == ".ipynb"
n
for n in self._repo_dir.glob("**/*.ipynb")
if not self.is_excluded(n)
]
if not notebooks:
msg = "No notebooks found in {self._repo_dir}"
238 changes: 216 additions & 22 deletions tests/business/notebookrunner_test.py
Original file line number Diff line number Diff line change
@@ -18,6 +18,18 @@
from ..support.util import wait_for_business


async def setup_git_repo(repo_path: Path) -> None:
"""Initialize and populate a git repo at `repo_path`."""
git = Git(repo=repo_path)
await git.init("--initial-branch=main")
await git.config("user.email", "gituser@example.com")
await git.config("user.name", "Git User")
for path in repo_path.iterdir():
if not path.name.startswith("."):
await git.add(str(path))
await git.commit("-m", "Initial commit")


@pytest.mark.asyncio
async def test_run(
client: AsyncClient, respx_mock: respx.Router, tmp_path: Path
@@ -30,18 +42,9 @@ async def test_run(
repo_path = tmp_path / "notebooks"

shutil.copytree(str(source_path), str(repo_path))
# Remove exception notebook
(repo_path / "exception.ipynb").unlink()

# Set up git client
git = Git(repo=repo_path)
await git.init("--initial-branch=main")
await git.config("user.email", "gituser@example.com")
await git.config("user.name", "Git User")
for path in repo_path.iterdir():
if not path.name.startswith("."):
await git.add(str(path))
await git.commit("-m", "Initial commit")
# Set up git repo
await setup_git_repo(repo_path)

# Start a monkey. We have to do this in a try/finally block since the
# runner will change working directories, which because working
@@ -93,11 +96,210 @@ async def test_run(
# Get the log and check the cell output.
r = await client.get("/mobu/flocks/test/monkeys/testuser1/log")
assert r.status_code == 200

# Root notebook
assert "This is a test" in r.text
assert "This is another test" in r.text
assert "Final test" in r.text

# Exceptions
assert "Exception thrown" not in r.text

# Make sure mobu ran all of the notebooks it thinks it should have
assert "Done with this cycle of notebooks" in r.text


@pytest.mark.asyncio
async def test_run_recursive(
client: AsyncClient, respx_mock: respx.Router, tmp_path: Path
) -> None:
mock_gafaelfawr(respx_mock)
cwd = Path.cwd()

# Set up a notebook repository.
source_path = Path(__file__).parent.parent / "notebooks_recursive"
repo_path = tmp_path / "notebooks"

shutil.copytree(str(source_path), str(repo_path))
# Remove exception notebook
(repo_path / "exception.ipynb").unlink()

# Set up git repo
await setup_git_repo(repo_path)

# Start a monkey. We have to do this in a try/finally block since the
# runner will change working directories, which because working
# directories are process-global may mess up future tests.
try:
r = await client.put(
"/mobu/flocks",
json={
"name": "test",
"count": 1,
"user_spec": {"username_prefix": "testuser"},
"scopes": ["exec:notebook"],
"business": {
"type": "NotebookRunner",
"options": {
"spawn_settle_time": 0,
"execution_idle_time": 0,
"max_executions": 4,
"repo_url": str(repo_path),
"repo_branch": "main",
"working_directory": str(repo_path),
},
},
},
)
assert r.status_code == 201

# Wait until we've finished one loop and check the results.
data = await wait_for_business(client, "testuser1")
assert data == {
"name": "testuser1",
"business": {
"failure_count": 0,
"name": "NotebookRunner",
"notebook": ANY,
"success_count": 1,
"timings": ANY,
},
"state": "RUNNING",
"user": {
"scopes": ["exec:notebook"],
"token": ANY,
"username": "testuser1",
},
}
finally:
os.chdir(cwd)

# Get the log and check the cell output.
r = await client.get("/mobu/flocks/test/monkeys/testuser1/log")
assert r.status_code == 200

# Root notebook
assert "This is a test" in r.text
assert "This is another test" in r.text
assert "Final test" in r.text

# some-dir notebook
assert "Test some-dir" in r.text
assert "Another test some-dir" in r.text
assert "Final test some-dir" in r.text

# some-other-dir notebook
assert "Test some-other-dir" in r.text
assert "Another test some-other-dir" in r.text
assert "Final test some-other-dir" in r.text

# double-nested-dir notebook
assert "Test double-nested-dir" in r.text
assert "Another test double-nested-dir" in r.text
assert "Final test double-nested-dir" in r.text

# Exceptions
assert "Exception thrown" not in r.text

# Make sure mobu ran all of the notebooks it thinks it should have
assert "Done with this cycle of notebooks" in r.text


@pytest.mark.asyncio
async def test_exclude_dirs(
client: AsyncClient, respx_mock: respx.Router, tmp_path: Path
) -> None:
mock_gafaelfawr(respx_mock)
cwd = Path.cwd()

# Set up a notebook repository.
source_path = Path(__file__).parent.parent / "notebooks_recursive"
repo_path = tmp_path / "notebooks"

shutil.copytree(str(source_path), str(repo_path))
# Remove exception notebook
(repo_path / "exception.ipynb").unlink()

# Set up git repo
await setup_git_repo(repo_path)

# Start a monkey. We have to do this in a try/finally block since the
# runner will change working directories, which because working
# directories are process-global may mess up future tests.
try:
r = await client.put(
"/mobu/flocks",
json={
"name": "test",
"count": 1,
"user_spec": {"username_prefix": "testuser"},
"scopes": ["exec:notebook"],
"business": {
"type": "NotebookRunner",
"options": {
"spawn_settle_time": 0,
"execution_idle_time": 0,
"max_executions": 2,
"repo_url": str(repo_path),
"repo_branch": "main",
"working_directory": str(repo_path),
"exclude_dirs": [
"some-other-dir/nested-dir",
"some-dir",
],
},
},
},
)
assert r.status_code == 201

# Wait until we've finished one loop and check the results.
data = await wait_for_business(client, "testuser1")
assert data == {
"name": "testuser1",
"business": {
"failure_count": 0,
"name": "NotebookRunner",
"notebook": ANY,
"success_count": 1,
"timings": ANY,
},
"state": "RUNNING",
"user": {
"scopes": ["exec:notebook"],
"token": ANY,
"username": "testuser1",
},
}
finally:
os.chdir(cwd)

# Get the log and check the cell output.
r = await client.get("/mobu/flocks/test/monkeys/testuser1/log")
assert r.status_code == 200

# Root notebook
assert "This is a test" in r.text
assert "This is another test" in r.text
assert "Final test" in r.text

# some-other-dir notebook
assert "Test some-other-dir" in r.text
assert "Another test some-other-dir" in r.text
assert "Final test some-other-dir" in r.text

# some-dir notebook
assert "Test some-dir" not in r.text

# nested-dir notebook
assert "Test double-nested-dir" not in r.text

# Exceptions
assert "Exception thrown" not in r.text

# Make sure mobu ran all of the notebooks it thinks it should have
assert "Done with this cycle of notebooks" in r.text


@pytest.mark.asyncio
async def test_alert(
@@ -109,21 +311,13 @@ async def test_alert(
mock_gafaelfawr(respx_mock)

# Set up a notebook repository with the exception notebook.
source_path = Path(__file__).parent.parent / "notebooks"
source_path = Path(__file__).parent.parent / "notebooks_recursive"
repo_path = tmp_path / "notebooks"
repo_path.mkdir()
shutil.copy(str(source_path / "exception.ipynb"), str(repo_path))

# Set up git client
git = Git(repo=repo_path)
await git.init("--initial-branch=main")
await git.config("--local", "user.email", "gituser@example.com")
await git.config("--local", "user.name", "Git User")

for path in repo_path.iterdir():
if not path.name.startswith("."):
await git.add(str(path))
await git.commit("-m", "Initial commit")
# Set up git repo
await setup_git_repo(repo_path)

# The bad code run by the exception test.
bad_code = 'foo = {"bar": "baz"}\nfoo["nothing"]'
File renamed without changes.
File renamed without changes.
116 changes: 116 additions & 0 deletions tests/notebooks_recursive/some-dir/test-some-dir-notebook.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,116 @@
{
"cells": [
{
"cell_type": "markdown",
"id": "5cb3e0b7",
"metadata": {},
"source": "This is a test notebook to check the NotebookRunner business. It should run even though it's in a directory, unless this directory is excluded..."
},
{
"cell_type": "code",
"id": "f84f0959",
"metadata": {
"ExecuteTime": {
"end_time": "2024-05-17T20:37:58.826140Z",
"start_time": "2024-05-17T20:37:58.823537Z"
}
},
"source": "print(\"Test some-dir\")",
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Test some-dir\n"
]
}
],
"execution_count": 1
},
{
"cell_type": "code",
"id": "44ada997",
"metadata": {
"ExecuteTime": {
"end_time": "2024-05-17T20:38:00.590204Z",
"start_time": "2024-05-17T20:38:00.587773Z"
}
},
"source": [
"import os\n",
"print(\"Another test some-dir\")"
],
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Another test some-dir\n"
]
}
],
"execution_count": 2
},
{
"cell_type": "markdown",
"id": "d5cea3a7",
"metadata": {},
"source": [
"Another Markdown cell"
]
},
{
"cell_type": "markdown",
"id": "541caea0",
"metadata": {},
"source": [
"Yet another Markdown cell. This one has a list.\n",
"- one\n",
"- two\n",
"- three"
]
},
{
"cell_type": "code",
"id": "53a941a4",
"metadata": {
"ExecuteTime": {
"end_time": "2024-05-17T20:38:02.105516Z",
"start_time": "2024-05-17T20:38:02.103211Z"
}
},
"source": "print(\"Final test some-dir\")",
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Final test some-dir\n"
]
}
],
"execution_count": 3
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.2"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
{
"cells": [
{
"cell_type": "markdown",
"id": "5cb3e0b7",
"metadata": {},
"source": " This is a test notebook to check the NotebookRunner business. It should run even though it's in a directory, unless this directory is excluded..."
},
{
"cell_type": "code",
"id": "f84f0959",
"metadata": {
"ExecuteTime": {
"end_time": "2024-05-17T20:39:57.123503Z",
"start_time": "2024-05-17T20:39:57.121412Z"
}
},
"source": "print(\"Test double-nested-dir\")",
"execution_count": 1,
"outputs": []
},
{
"cell_type": "code",
"id": "44ada997",
"metadata": {
"ExecuteTime": {
"end_time": "2024-05-17T20:39:57.150633Z",
"start_time": "2024-05-17T20:39:57.124284Z"
}
},
"source": [
"import os\n",
"print(\"Another test double-nested-dir\")"
],
"execution_count": 2,
"outputs": []
},
{
"cell_type": "markdown",
"id": "d5cea3a7",
"metadata": {},
"source": [
"Another Markdown cell"
]
},
{
"cell_type": "markdown",
"id": "541caea0",
"metadata": {},
"source": [
"Yet another Markdown cell. This one has a list.\n",
"- one\n",
"- two\n",
"- three"
]
},
{
"cell_type": "code",
"id": "53a941a4",
"metadata": {
"ExecuteTime": {
"end_time": "2024-05-17T20:39:57.176580Z",
"start_time": "2024-05-17T20:39:57.151166Z"
}
},
"source": "print(\"Final test double-nested-dir\")",
"execution_count": 3,
"outputs": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.2"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
116 changes: 116 additions & 0 deletions tests/notebooks_recursive/some-other-dir/test-some-other-dir.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,116 @@
{
"cells": [
{
"cell_type": "markdown",
"id": "5cb3e0b7",
"metadata": {},
"source": "This is a test notebook to check the NotebookRunner business. It should run even though it's in a directory, unless this directory is excluded..."
},
{
"cell_type": "code",
"id": "f84f0959",
"metadata": {
"ExecuteTime": {
"end_time": "2024-05-17T20:39:13.319752Z",
"start_time": "2024-05-17T20:39:13.317012Z"
}
},
"source": "print(\"Test some-other-dir\")",
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Test some-other-dir\n"
]
}
],
"execution_count": 1
},
{
"cell_type": "code",
"id": "44ada997",
"metadata": {
"ExecuteTime": {
"end_time": "2024-05-17T20:39:13.345548Z",
"start_time": "2024-05-17T20:39:13.320800Z"
}
},
"source": [
"import os\n",
"print(\"Another test some-other-dir\")"
],
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Another test some-other-dir\n"
]
}
],
"execution_count": 2
},
{
"cell_type": "markdown",
"id": "d5cea3a7",
"metadata": {},
"source": [
"Another Markdown cell"
]
},
{
"cell_type": "markdown",
"id": "541caea0",
"metadata": {},
"source": [
"Yet another Markdown cell. This one has a list.\n",
"- one\n",
"- two\n",
"- three"
]
},
{
"cell_type": "code",
"id": "53a941a4",
"metadata": {
"ExecuteTime": {
"end_time": "2024-05-17T20:39:13.371583Z",
"start_time": "2024-05-17T20:39:13.346126Z"
}
},
"source": "print(\"Final test some-other-dir\")",
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Final test some-other-dir\n"
]
}
],
"execution_count": 3
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.2"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
115 changes: 115 additions & 0 deletions tests/notebooks_recursive/test-notebook.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,115 @@
{
"cells": [
{
"cell_type": "markdown",
"id": "5cb3e0b7",
"metadata": {},
"source": [
"This is a test notebook to check the NotebookRunner business. It contains some Markdown cells and some code cells. Only the code cells should run."
]
},
{
"cell_type": "code",
"execution_count": 1,
"id": "f84f0959",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"This is a test\n"
]
}
],
"source": [
"print(\"This is a test\")"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "44ada997",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"This is another test\n"
]
}
],
"source": [
"import os\n",
"print(\"This is another test\")"
]
},
{
"cell_type": "markdown",
"id": "d5cea3a7",
"metadata": {},
"source": [
"Another Markdown cell"
]
},
{
"cell_type": "markdown",
"id": "541caea0",
"metadata": {},
"source": [
"Yet another Markdown cell. This one has a list.\n",
"- one\n",
"- two\n",
"- three"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "53a941a4",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Final test\n"
]
}
],
"source": [
"print(\"Final test\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "823560c6",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.2"
}
},
"nbformat": 4,
"nbformat_minor": 5
}

0 comments on commit 26a6535

Please sign in to comment.