Skip to content

Commit 84e2866

Browse files
Merge pull request #4 from StructuralPython/features/profile_execution
Features/profile execution
2 parents d4ad96b + 3a64822 commit 84e2866

File tree

14 files changed

+759
-142
lines changed

14 files changed

+759
-142
lines changed

.gitignore

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -150,3 +150,5 @@ cython_debug/
150150
# and can be added to the global gitignore or merged into this file. For a more nuclear
151151
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
152152
#.idea/
153+
/tests/test_data/notebook1/output
154+
/tests/test_data/notebook2/output

README.md

Lines changed: 33 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@ millrun --help
4949
│ * notebook_dir_or_file TEXT Path to a notebook file or a directory containing notebooks. │
5050
│ [default: None] │
5151
│ [required] │
52-
│ * params TEXT JSON file that contains parameters for notebook execution. Can │
52+
│ * notebook_params TEXT JSON file that contains parameters for notebook execution. Can │
5353
│ either be a 'list of dict' or 'dict of list'. │
5454
│ [default: None] │
5555
│ [required] │
@@ -140,6 +140,38 @@ Where each notebook given to millrun will execute against each dictionary in the
140140
This format is offered as a convenience format. Internally, it is converted into "Format 1" prior to execution.
141141

142142

143+
## CLI Profile execution
144+
145+
As of v0.2.0, `millrun` allows the creation of a "profiles" yaml file which prevents the need for typing really long commands on the command line, especially if, for a particular project, the commands are always going to be the same.
146+
147+
YAML format:
148+
149+
The format basically describes the kwargs required to execute the command.
150+
151+
The top level keys can be arbitrarily named but they represent one command execution.
152+
The values underneath each top level key are the kwargs of the command.
153+
154+
The only required values are `notebook_dir_or_file` and `notebook_params`. All other params are optional.
155+
156+
```yaml
157+
notebook1: # This is the name of the profile. A profile is equal to one command on the command line
158+
notebook_dir_or_file: ./notebook1/notebook1.ipynb # Req'd
159+
notebook_params: ./notebook1/notebook1_params.json # Req'd
160+
output_dir: ./notebook1/output # Optional
161+
prepend: # Optional
162+
- name
163+
- design
164+
append: # Optional
165+
- executed
166+
167+
notebook2: # This profile will be executed immediately after the first profile. It's like running the command again.
168+
notebook_dir_or_file: ./notebook2
169+
notebook_params: ./notebook2/notebook2_params.json
170+
output_dir: ./notebook2/output
171+
prepend:
172+
- tester
173+
```
174+
143175
## CLI parallel execution
144176
145177
Since millrun iterates over two dimensions (each notebook and then dict of parameters in the list), there are two ways of parellelizing:
@@ -153,7 +185,6 @@ However, this method becomes inefficient if you have MANY notebooks and only 1-3
153185
154186
If you need this use case then feel free to raise an issue and/or contribute a PR to implement it as an option for execution.
155187
156-
157188
## Troubleshooting
158189
159190
There seems to be an un-planned-for behaviour (by me) with the parallel execution where if there is an error in the execution process, that iteration is simply skipped. I don't have any `try`/`except` in the code that causes this.

pyproject.toml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ requires-python = ">=3.10"
99
dependencies = [
1010
"papermill>=2.6.0",
1111
"rich",
12+
"ruamel-yaml>=0.18.15",
1213
"typer>=0.16.0",
1314
]
1415

@@ -21,5 +22,7 @@ build-backend = "flit_core.buildapi"
2122

2223
[dependency-groups]
2324
dev = [
25+
"black>=25.1.0",
2426
"ipykernel>=6.29.5",
27+
"pytest>=8.4.2",
2528
]

src/millrun/__init__.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
11
"""
22
Millrun: A Python library and CLI tool to automate the execution of notebooks
3-
with papermill.
3+
with papermill.
44
"""
55

66
__version__ = "0.1.1"
77

8-
from .millrun import execute_run
8+
from .millrun import execute_run

src/millrun/cli.py

Lines changed: 79 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -1,19 +1,17 @@
11
import json
2+
from ruamel.yaml import YAML
23
from typing import Optional, Any
34
from typing_extensions import Annotated
45
import pathlib
56

67
import typer
7-
from .millrun import execute_run
8+
from .millrun import execute_run, execute_profile
89

910

10-
def _parse_json(filepath: str) -> dict:
11-
with open(filepath, 'r') as file:
12-
return json.load(file)
1311

1412
APP_INTRO = typer.style(
15-
"""
16-
AISC sections database W-section selection tool (2023-05-28)
13+
"""
14+
Executes a notebook or directory of notebooks using the provided bulk parameters JSON file
1715
""",
1816
fg=typer.colors.BRIGHT_YELLOW,
1917
bold=True,
@@ -23,43 +21,69 @@ def _parse_json(filepath: str) -> dict:
2321
add_completion=False,
2422
no_args_is_help=True,
2523
help=APP_INTRO,
26-
# pretty_exceptions_enable=False,
27-
pretty_exceptions_show_locals=False
24+
pretty_exceptions_show_locals=False,
2825
)
2926

27+
3028
@app.command(
31-
name='run',
29+
name="run",
3230
help="Executes a notebook or directory of notebooks using the provided bulk parameters JSON file",
3331
context_settings={"allow_extra_args": True, "ignore_unknown_options": True},
3432
)
3533
def run(
36-
notebook_dir_or_file: Annotated[str, typer.Argument(
37-
help="Path to a notebook file or a directory containing notebooks.")
38-
],
39-
params: Annotated[str, typer.Argument(
40-
help=("JSON file that contains parameters for notebook execution. "
41-
"Can either be a 'list of dict' or 'dict of list'."),
42-
callback=lambda value: _parse_json(value),
43-
)
44-
],
45-
output_dir: Annotated[Optional[str], typer.Option(
46-
help=("Directory to place output files into. If not provided"
47-
" the file directory will be used."),
48-
)
34+
notebook_dir_or_file: Annotated[
35+
Optional[str],
36+
typer.Argument(
37+
help="Path to a notebook file or a directory containing notebooks.",
38+
),
4939
] = None,
50-
prepend: Annotated[Optional[str], typer.Option(
51-
help=("Prepend components to use on output filename."
52-
"Can use dict keys from 'params' which will be evaluated."
53-
"(Comma-separated values)."),
54-
callback=lambda x: x.split(",") if x else None
55-
)
40+
notebook_params: Annotated[
41+
Optional[str],
42+
typer.Argument(
43+
help=(
44+
"JSON file that contains parameters for notebook execution. "
45+
"Can either be a 'list of dict' or 'dict of list'."
46+
),
47+
),
5648
] = None,
57-
append: Annotated[Optional[str], typer.Option(
58-
help=("Append components to use on output filename."
59-
"Can use dict keys from 'params' which will be evaluated."
60-
"(Comma-separated values)."),
61-
callback=lambda x: x.split(",") if x else None
62-
)
49+
profile: Annotated[
50+
Optional[str],
51+
typer.Argument(
52+
help=(
53+
"A millrun YAML profile file that specifies the notebook_dir_or_file and notebook_params (along with additional options) instead of providing them directly."
54+
),
55+
),
56+
] = None,
57+
output_dir: Annotated[
58+
Optional[str],
59+
typer.Option(
60+
help=(
61+
"Directory to place output files into. If not provided"
62+
" the current working directory will be used."
63+
),
64+
),
65+
] = None,
66+
prepend: Annotated[
67+
Optional[str],
68+
typer.Option(
69+
help=(
70+
"Prepend components to use on output filename."
71+
"Can use dict keys from 'params' which will be evaluated."
72+
"(Comma-separated values)."
73+
),
74+
callback=lambda x: x.split(",") if x else None,
75+
),
76+
] = None,
77+
append: Annotated[
78+
Optional[str],
79+
typer.Option(
80+
help=(
81+
"Append components to use on output filename."
82+
"Can use dict keys from 'params' which will be evaluated."
83+
"(Comma-separated values)."
84+
),
85+
callback=lambda x: x.split(",") if x else None,
86+
),
6387
] = None,
6488
recursive: bool = False,
6589
exclude_glob_pattern: Optional[str] = None,
@@ -69,19 +93,27 @@ def run(
6993
output_dir = pathlib.Path(output_dir)
7094
else:
7195
output_dir = pathlib.Path.cwd()
72-
execute_run(
73-
notebook_dir_or_file,
74-
params,
75-
output_dir,
76-
prepend,
77-
append,
78-
recursive,
79-
exclude_glob_pattern,
80-
include_glob_pattern,
81-
use_multiprocessing=True
82-
# **kwargs
83-
)
96+
97+
# Automated profile execution
98+
if profile is not None:
99+
profile_file = pathlib.Path.cwd() / pathlib.Path(profile)
100+
execute_profile(profile_file)
101+
102+
# Typical execution
103+
elif None not in [notebook_dir_or_file, notebook_params]:
104+
execute_run(
105+
notebook_dir_or_file,
106+
notebook_params,
107+
output_dir,
108+
prepend,
109+
append,
110+
recursive,
111+
exclude_glob_pattern,
112+
include_glob_pattern,
113+
use_multiprocessing=True,
114+
# **kwargs
115+
)
84116

85117

86118
if __name__ == "__main__":
87-
app()
119+
app()

0 commit comments

Comments
 (0)