Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Adds --verify #195

Merged
merged 15 commits into from
Nov 3, 2024
7 changes: 7 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,13 @@ Do a dry run and only list which files would have been stripped:

nbstripout --dry-run FILE.ipynb [FILE2.ipynb ...]

or

Do a verification run, which works like dry run but will fail
if any files would have been stripped:

nbstripout --verify FILE.ipynb [FILE2.ipynb ...]

Operate on all `.ipynb` files in the current directory and subdirectories
recursively:

Expand Down
33 changes: 28 additions & 5 deletions nbstripout/_nbstripout.py
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,7 @@

from argparse import ArgumentParser, RawDescriptionHelpFormatter
import collections
import copy
import io
import json
from os import devnull, environ, makedirs, path
Expand Down Expand Up @@ -331,28 +332,38 @@ def status(git_config, install_location=INSTALL_LOCATION_LOCAL, verbose=False):
return 1

def process_notebook(input_stream, output_stream, args, extra_keys, filename='input from stdin'):
any_change = False
if args.mode == 'zeppelin':
nb = json.load(input_stream, object_pairs_hook=collections.OrderedDict)
nb_orig = copy.deepcopy(nb)
nb_stripped = strip_zeppelin_output(nb)
if nb_orig != nb_stripped:
any_change = True

if args.dry_run:
output_stream.write(f'Dry run: would have stripped {filename}\n')
return
return any_change
if output_stream.seekable():
output_stream.seek(0)
output_stream.truncate()
json.dump(nb_stripped, output_stream, indent=2)
output_stream.write('\n')
output_stream.flush()
return
return any_change

with warnings.catch_warnings():
warnings.simplefilter("ignore", category=UserWarning)
nb = nbformat.read(input_stream, as_version=nbformat.NO_CONVERT)

nb_orig = copy.deepcopy(nb)
nb = strip_output(nb, args.keep_output, args.keep_count, args.keep_id,
extra_keys, args.drop_empty_cells,
args.drop_tagged_cells.split(), args.strip_init_cells,
_parse_size(args.max_size))

if nb_orig != nb:
any_change = True

if args.dry_run:
output_stream.write(f'Dry run: would have stripped {filename}\n')
else:
Expand All @@ -363,7 +374,7 @@ def process_notebook(input_stream, output_stream, args, extra_keys, filename='in
warnings.simplefilter("ignore", category=UserWarning)
nbformat.write(nb, output_stream)
output_stream.flush()

return any_change

def main():
parser = ArgumentParser(epilog=__doc__, formatter_class=RawDescriptionHelpFormatter)
Expand All @@ -383,6 +394,8 @@ def main():
'repository and configuration summary if installed')
task.add_argument('--version', action='store_true',
help='Print version')
parser.add_argument("--verify", action="store_true",
help="Return a non-zero exit code if any files were changed, Implies --dry-run")
parser.add_argument('--keep-count', action='store_true',
help='Do not strip the execution count/prompt number')
parser.add_argument('--keep-output', action='store_true',
Expand Down Expand Up @@ -428,6 +441,9 @@ def main():
args = parser.parse_args()
git_config = ['git', 'config']

if args.verify and not args.dry_run:
args.dry_run = True

if args._system:
git_config.append('--system')
install_location = INSTALL_LOCATION_SYSTEM
Expand Down Expand Up @@ -483,14 +499,17 @@ def main():
input_stream = io.TextIOWrapper(sys.stdin.buffer, encoding='utf-8') if sys.stdin else None
output_stream = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8', newline='')

any_change = False
for filename in args.files:
if not (args.force or filename.endswith('.ipynb') or filename.endswith('.zpln')):
continue

try:
with io.open(filename, 'r+', encoding='utf8', newline='') as f:
out = output_stream if args.textconv or args.dry_run else f
process_notebook(f, out, args, extra_keys, filename)
if process_notebook(f, out, args, extra_keys, filename):
any_change = True

except nbformat.reader.NotJSONError:
print(f"No valid notebook detected in '{filename}'", file=sys.stderr)
raise SystemExit(1)
Expand All @@ -504,7 +523,11 @@ def main():

if not args.files and input_stream:
try:
process_notebook(input_stream, output_stream, args, extra_keys)
if process_notebook(input_stream, output_stream, args, extra_keys):
any_change = True
except nbformat.reader.NotJSONError:
print('No valid notebook detected on stdin', file=sys.stderr)
raise SystemExit(1)

if args.verify and any_change:
raise SystemExit(1)
65 changes: 54 additions & 11 deletions tests/test_end_to_end.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,49 +53,92 @@ def nbstripout_exe():


@pytest.mark.parametrize("input_file, expected_file, args", TEST_CASES)
def test_end_to_end_stdin(input_file: str, expected_file: str, args: List[str]):
@pytest.mark.parametrize("verify", (True, False))
def test_end_to_end_stdin(input_file: str, expected_file: str, args: List[str], verify: bool):
with open(NOTEBOOKS_FOLDER / expected_file, mode="r") as f:
expected = f.read()

with open(NOTEBOOKS_FOLDER / input_file, mode="r") as f:
pc = run([nbstripout_exe()] + args, stdin=f, stdout=PIPE, universal_newlines=True)
input_ = f.read()

with open(NOTEBOOKS_FOLDER / input_file, mode="r") as f:
args = [nbstripout_exe()] + args
if verify:
args.append("--verify")
pc = run(args, stdin=f, stdout=PIPE, universal_newlines=True)
output = pc.stdout

assert output == expected
if verify:
# When using stin, the dry run flag is disregarded.
assert pc.returncode == (1 if input_ != expected else 0)
else:
assert output == expected
assert pc.returncode == 0


@pytest.mark.parametrize("input_file, expected_file, args", TEST_CASES)
def test_end_to_end_file(input_file: str, expected_file: str, args: List[str], tmp_path):
@pytest.mark.parametrize("verify", (True, False))
def test_end_to_end_file(input_file: str, expected_file: str, args: List[str], tmp_path, verify: bool):
with open(NOTEBOOKS_FOLDER / expected_file, mode="r") as f:
expected = f.read()

p = tmp_path / input_file
with open(NOTEBOOKS_FOLDER / input_file, mode="r") as f:
p.write_text(f.read())
pc = run([nbstripout_exe(), p] + args, stdout=PIPE, universal_newlines=True)

assert not pc.stdout and p.read_text() == expected
with open(NOTEBOOKS_FOLDER / input_file, mode="r") as f:
input_ = f.read()

args = [nbstripout_exe(), p] + args
if verify:
args.append("--verify")
pc = run(args, stdout=PIPE, universal_newlines=True)

output = pc.stdout.strip()
if verify:
if expected != input_:
assert "Dry run: would have stripped" in output
assert pc.returncode == 1

# Since verify implies --dry-run, we make sure the file is not modified
with open(NOTEBOOKS_FOLDER / input_file, mode="r") as f:
output_ = f.read()

assert output_ == input_
else:
assert pc.returncode == 0
assert not pc.stdout and p.read_text() == expected


@pytest.mark.parametrize("input_file, extra_args", DRY_RUN_CASES)
def test_dry_run_stdin(input_file: str, extra_args: List[str]):
@pytest.mark.parametrize("verify", (True, False))
def test_dry_run_stdin(input_file: str, extra_args: List[str], verify: bool):
expected = "Dry run: would have stripped input from stdin\n"

with open(NOTEBOOKS_FOLDER / input_file, mode="r") as f:
pc = run([nbstripout_exe(), "--dry-run"] + extra_args, stdin=f, stdout=PIPE, universal_newlines=True)
args = [nbstripout_exe(), "--dry-run"] + extra_args
if verify:
args.append("--verify")
pc = run(args, stdin=f, stdout=PIPE, universal_newlines=True)
output = pc.stdout

assert output == expected
assert pc.returncode == (1 if verify else 0)


@pytest.mark.parametrize("input_file, extra_args", DRY_RUN_CASES)
def test_dry_run_args(input_file: str, extra_args: List[str]):
@pytest.mark.parametrize("verify", (True, False))
def test_dry_run_args(input_file: str, extra_args: List[str], verify: bool):
expected_regex = re.compile(f"Dry run: would have stripped .*[/\\\\]{input_file}\n")

pc = run([nbstripout_exe(), str(NOTEBOOKS_FOLDER / input_file), "--dry-run", ] + extra_args, stdout=PIPE, universal_newlines=True)
args = [nbstripout_exe(), str(NOTEBOOKS_FOLDER / input_file), "--dry-run", ] + extra_args
if verify:
args.append("--verify")
pc = run(args, stdout=PIPE, universal_newlines=True)
output = pc.stdout

assert expected_regex.match(output)
if verify:
assert pc.returncode == 1


@pytest.mark.parametrize("input_file, expected_errs, extra_args", ERR_OUTPUT_CASES)
Expand Down
Loading