Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -150,3 +150,5 @@ cython_debug/
# and can be added to the global gitignore or merged into this file. For a more nuclear
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
#.idea/
/tests/test_data/notebook1/output
/tests/test_data/notebook2/output
35 changes: 33 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ millrun --help
│ * notebook_dir_or_file TEXT Path to a notebook file or a directory containing notebooks. │
│ [default: None] │
│ [required] │
│ * params TEXT JSON file that contains parameters for notebook execution. Can │
│ * notebook_params TEXT JSON file that contains parameters for notebook execution. Can │
│ either be a 'list of dict' or 'dict of list'. │
│ [default: None] │
│ [required] │
Expand Down Expand Up @@ -140,6 +140,38 @@ Where each notebook given to millrun will execute against each dictionary in the
This format is offered as a convenience format. Internally, it is converted into "Format 1" prior to execution.


## CLI Profile execution

As of v0.2.0, `millrun` allows the creation of a "profiles" yaml file which prevents the need for typing really long commands on the command line, especially if, for a particular project, the commands are always going to be the same.

YAML format:

The format basically describes the kwargs required to execute the command.

The top level keys can be arbitrarily named but they represent one command execution.
The values underneath each top level key are the kwargs of the command.

The only required values are `notebook_dir_or_file` and `notebook_params`. All other params are optional.

```yaml
notebook1: # This is the name of the profile. A profile is equal to one command on the command line
notebook_dir_or_file: ./notebook1/notebook1.ipynb # Req'd
notebook_params: ./notebook1/notebook1_params.json # Req'd
output_dir: ./notebook1/output # Optional
prepend: # Optional
- name
- design
append: # Optional
- executed

notebook2: # This profile will be executed immediately after the first profile. It's like running the command again.
notebook_dir_or_file: ./notebook2
notebook_params: ./notebook2/notebook2_params.json
output_dir: ./notebook2/output
prepend:
- tester
```

## CLI parallel execution

Since millrun iterates over two dimensions (each notebook and then dict of parameters in the list), there are two ways of parellelizing:
Expand All @@ -153,7 +185,6 @@ However, this method becomes inefficient if you have MANY notebooks and only 1-3

If you need this use case then feel free to raise an issue and/or contribute a PR to implement it as an option for execution.


## Troubleshooting

There seems to be an un-planned-for behaviour (by me) with the parallel execution where if there is an error in the execution process, that iteration is simply skipped. I don't have any `try`/`except` in the code that causes this.
Expand Down
3 changes: 3 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ requires-python = ">=3.10"
dependencies = [
"papermill>=2.6.0",
"rich",
"ruamel-yaml>=0.18.15",
"typer>=0.16.0",
]

Expand All @@ -21,5 +22,7 @@ build-backend = "flit_core.buildapi"

[dependency-groups]
dev = [
"black>=25.1.0",
"ipykernel>=6.29.5",
"pytest>=8.4.2",
]
4 changes: 2 additions & 2 deletions src/millrun/__init__.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
"""
Millrun: A Python library and CLI tool to automate the execution of notebooks
with papermill.
with papermill.
"""

__version__ = "0.1.1"

from .millrun import execute_run
from .millrun import execute_run
126 changes: 79 additions & 47 deletions src/millrun/cli.py
Original file line number Diff line number Diff line change
@@ -1,19 +1,17 @@
import json
from ruamel.yaml import YAML
from typing import Optional, Any
from typing_extensions import Annotated
import pathlib

import typer
from .millrun import execute_run
from .millrun import execute_run, execute_profile


def _parse_json(filepath: str) -> dict:
with open(filepath, 'r') as file:
return json.load(file)

APP_INTRO = typer.style(
"""
AISC sections database W-section selection tool (2023-05-28)
"""
Executes a notebook or directory of notebooks using the provided bulk parameters JSON file
""",
fg=typer.colors.BRIGHT_YELLOW,
bold=True,
Expand All @@ -23,43 +21,69 @@ def _parse_json(filepath: str) -> dict:
add_completion=False,
no_args_is_help=True,
help=APP_INTRO,
# pretty_exceptions_enable=False,
pretty_exceptions_show_locals=False
pretty_exceptions_show_locals=False,
)


@app.command(
name='run',
name="run",
help="Executes a notebook or directory of notebooks using the provided bulk parameters JSON file",
context_settings={"allow_extra_args": True, "ignore_unknown_options": True},
)
def run(
notebook_dir_or_file: Annotated[str, typer.Argument(
help="Path to a notebook file or a directory containing notebooks.")
],
params: Annotated[str, typer.Argument(
help=("JSON file that contains parameters for notebook execution. "
"Can either be a 'list of dict' or 'dict of list'."),
callback=lambda value: _parse_json(value),
)
],
output_dir: Annotated[Optional[str], typer.Option(
help=("Directory to place output files into. If not provided"
" the file directory will be used."),
)
notebook_dir_or_file: Annotated[
Optional[str],
typer.Argument(
help="Path to a notebook file or a directory containing notebooks.",
),
] = None,
prepend: Annotated[Optional[str], typer.Option(
help=("Prepend components to use on output filename."
"Can use dict keys from 'params' which will be evaluated."
"(Comma-separated values)."),
callback=lambda x: x.split(",") if x else None
)
notebook_params: Annotated[
Optional[str],
typer.Argument(
help=(
"JSON file that contains parameters for notebook execution. "
"Can either be a 'list of dict' or 'dict of list'."
),
),
] = None,
append: Annotated[Optional[str], typer.Option(
help=("Append components to use on output filename."
"Can use dict keys from 'params' which will be evaluated."
"(Comma-separated values)."),
callback=lambda x: x.split(",") if x else None
)
profile: Annotated[
Optional[str],
typer.Argument(
help=(
"A millrun YAML profile file that specifies the notebook_dir_or_file and notebook_params (along with additional options) instead of providing them directly."
),
),
] = None,
output_dir: Annotated[
Optional[str],
typer.Option(
help=(
"Directory to place output files into. If not provided"
" the current working directory will be used."
),
),
] = None,
prepend: Annotated[
Optional[str],
typer.Option(
help=(
"Prepend components to use on output filename."
"Can use dict keys from 'params' which will be evaluated."
"(Comma-separated values)."
),
callback=lambda x: x.split(",") if x else None,
),
] = None,
append: Annotated[
Optional[str],
typer.Option(
help=(
"Append components to use on output filename."
"Can use dict keys from 'params' which will be evaluated."
"(Comma-separated values)."
),
callback=lambda x: x.split(",") if x else None,
),
] = None,
recursive: bool = False,
exclude_glob_pattern: Optional[str] = None,
Expand All @@ -69,19 +93,27 @@ def run(
output_dir = pathlib.Path(output_dir)
else:
output_dir = pathlib.Path.cwd()
execute_run(
notebook_dir_or_file,
params,
output_dir,
prepend,
append,
recursive,
exclude_glob_pattern,
include_glob_pattern,
use_multiprocessing=True
# **kwargs
)

# Automated profile execution
if profile is not None:
profile_file = pathlib.Path.cwd() / pathlib.Path(profile)
execute_profile(profile_file)

# Typical execution
elif None not in [notebook_dir_or_file, notebook_params]:
execute_run(
notebook_dir_or_file,
notebook_params,
output_dir,
prepend,
append,
recursive,
exclude_glob_pattern,
include_glob_pattern,
use_multiprocessing=True,
# **kwargs
)


if __name__ == "__main__":
app()
app()
Loading