diff --git a/README.md b/README.md index d73e9ba..04bba0a 100644 --- a/README.md +++ b/README.md @@ -78,6 +78,13 @@ Do a dry run and only list which files would have been stripped: nbstripout --dry-run FILE.ipynb [FILE2.ipynb ...] +or + +Do a verification run, which works like dry run but will fail +if any files would have been stripped: + + nbstripout --verify FILE.ipynb [FILE2.ipynb ...] + Operate on all `.ipynb` files in the current directory and subdirectories recursively: diff --git a/nbstripout/_nbstripout.py b/nbstripout/_nbstripout.py index 77e7944..5b1cf41 100644 --- a/nbstripout/_nbstripout.py +++ b/nbstripout/_nbstripout.py @@ -111,6 +111,7 @@ from argparse import ArgumentParser, RawDescriptionHelpFormatter import collections +import copy import io import json from os import devnull, environ, makedirs, path @@ -331,28 +332,38 @@ def status(git_config, install_location=INSTALL_LOCATION_LOCAL, verbose=False): return 1 def process_notebook(input_stream, output_stream, args, extra_keys, filename='input from stdin'): + any_change = False if args.mode == 'zeppelin': nb = json.load(input_stream, object_pairs_hook=collections.OrderedDict) + nb_orig = copy.deepcopy(nb) nb_stripped = strip_zeppelin_output(nb) + if nb_orig != nb_stripped: + any_change = True + if args.dry_run: output_stream.write(f'Dry run: would have stripped {filename}\n') - return + return any_change if output_stream.seekable(): output_stream.seek(0) output_stream.truncate() json.dump(nb_stripped, output_stream, indent=2) output_stream.write('\n') output_stream.flush() - return + return any_change + with warnings.catch_warnings(): warnings.simplefilter("ignore", category=UserWarning) nb = nbformat.read(input_stream, as_version=nbformat.NO_CONVERT) + nb_orig = copy.deepcopy(nb) nb = strip_output(nb, args.keep_output, args.keep_count, args.keep_id, extra_keys, args.drop_empty_cells, args.drop_tagged_cells.split(), args.strip_init_cells, _parse_size(args.max_size)) + if nb_orig != nb: + any_change = True + if args.dry_run: output_stream.write(f'Dry run: would have stripped {filename}\n') else: @@ -363,7 +374,7 @@ def process_notebook(input_stream, output_stream, args, extra_keys, filename='in warnings.simplefilter("ignore", category=UserWarning) nbformat.write(nb, output_stream) output_stream.flush() - + return any_change def main(): parser = ArgumentParser(epilog=__doc__, formatter_class=RawDescriptionHelpFormatter) @@ -383,6 +394,8 @@ def main(): 'repository and configuration summary if installed') task.add_argument('--version', action='store_true', help='Print version') + parser.add_argument("--verify", action="store_true", + help="Return a non-zero exit code if any files were changed, Implies --dry-run") parser.add_argument('--keep-count', action='store_true', help='Do not strip the execution count/prompt number') parser.add_argument('--keep-output', action='store_true', @@ -428,6 +441,9 @@ def main(): args = parser.parse_args() git_config = ['git', 'config'] + if args.verify and not args.dry_run: + args.dry_run = True + if args._system: git_config.append('--system') install_location = INSTALL_LOCATION_SYSTEM @@ -483,6 +499,7 @@ def main(): input_stream = io.TextIOWrapper(sys.stdin.buffer, encoding='utf-8') if sys.stdin else None output_stream = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8', newline='') + any_change = False for filename in args.files: if not (args.force or filename.endswith('.ipynb') or filename.endswith('.zpln')): continue @@ -490,7 +507,9 @@ def main(): try: with io.open(filename, 'r+', encoding='utf8', newline='') as f: out = output_stream if args.textconv or args.dry_run else f - process_notebook(f, out, args, extra_keys, filename) + if process_notebook(f, out, args, extra_keys, filename): + any_change = True + except nbformat.reader.NotJSONError: print(f"No valid notebook detected in '{filename}'", file=sys.stderr) raise SystemExit(1) @@ -504,7 +523,11 @@ def main(): if not args.files and input_stream: try: - process_notebook(input_stream, output_stream, args, extra_keys) + if process_notebook(input_stream, output_stream, args, extra_keys): + any_change = True except nbformat.reader.NotJSONError: print('No valid notebook detected on stdin', file=sys.stderr) raise SystemExit(1) + + if args.verify and any_change: + raise SystemExit(1) diff --git a/tests/test_end_to_end.py b/tests/test_end_to_end.py index 725c53d..5210134 100644 --- a/tests/test_end_to_end.py +++ b/tests/test_end_to_end.py @@ -53,49 +53,92 @@ def nbstripout_exe(): @pytest.mark.parametrize("input_file, expected_file, args", TEST_CASES) -def test_end_to_end_stdin(input_file: str, expected_file: str, args: List[str]): +@pytest.mark.parametrize("verify", (True, False)) +def test_end_to_end_stdin(input_file: str, expected_file: str, args: List[str], verify: bool): with open(NOTEBOOKS_FOLDER / expected_file, mode="r") as f: expected = f.read() with open(NOTEBOOKS_FOLDER / input_file, mode="r") as f: - pc = run([nbstripout_exe()] + args, stdin=f, stdout=PIPE, universal_newlines=True) + input_ = f.read() + + with open(NOTEBOOKS_FOLDER / input_file, mode="r") as f: + args = [nbstripout_exe()] + args + if verify: + args.append("--verify") + pc = run(args, stdin=f, stdout=PIPE, universal_newlines=True) output = pc.stdout - assert output == expected + if verify: + # When using stin, the dry run flag is disregarded. + assert pc.returncode == (1 if input_ != expected else 0) + else: + assert output == expected + assert pc.returncode == 0 @pytest.mark.parametrize("input_file, expected_file, args", TEST_CASES) -def test_end_to_end_file(input_file: str, expected_file: str, args: List[str], tmp_path): +@pytest.mark.parametrize("verify", (True, False)) +def test_end_to_end_file(input_file: str, expected_file: str, args: List[str], tmp_path, verify: bool): with open(NOTEBOOKS_FOLDER / expected_file, mode="r") as f: expected = f.read() p = tmp_path / input_file with open(NOTEBOOKS_FOLDER / input_file, mode="r") as f: p.write_text(f.read()) - pc = run([nbstripout_exe(), p] + args, stdout=PIPE, universal_newlines=True) - assert not pc.stdout and p.read_text() == expected + with open(NOTEBOOKS_FOLDER / input_file, mode="r") as f: + input_ = f.read() + + args = [nbstripout_exe(), p] + args + if verify: + args.append("--verify") + pc = run(args, stdout=PIPE, universal_newlines=True) + + output = pc.stdout.strip() + if verify: + if expected != input_: + assert "Dry run: would have stripped" in output + assert pc.returncode == 1 + + # Since verify implies --dry-run, we make sure the file is not modified + with open(NOTEBOOKS_FOLDER / input_file, mode="r") as f: + output_ = f.read() + + assert output_ == input_ + else: + assert pc.returncode == 0 + assert not pc.stdout and p.read_text() == expected @pytest.mark.parametrize("input_file, extra_args", DRY_RUN_CASES) -def test_dry_run_stdin(input_file: str, extra_args: List[str]): +@pytest.mark.parametrize("verify", (True, False)) +def test_dry_run_stdin(input_file: str, extra_args: List[str], verify: bool): expected = "Dry run: would have stripped input from stdin\n" with open(NOTEBOOKS_FOLDER / input_file, mode="r") as f: - pc = run([nbstripout_exe(), "--dry-run"] + extra_args, stdin=f, stdout=PIPE, universal_newlines=True) + args = [nbstripout_exe(), "--dry-run"] + extra_args + if verify: + args.append("--verify") + pc = run(args, stdin=f, stdout=PIPE, universal_newlines=True) output = pc.stdout assert output == expected + assert pc.returncode == (1 if verify else 0) @pytest.mark.parametrize("input_file, extra_args", DRY_RUN_CASES) -def test_dry_run_args(input_file: str, extra_args: List[str]): +@pytest.mark.parametrize("verify", (True, False)) +def test_dry_run_args(input_file: str, extra_args: List[str], verify: bool): expected_regex = re.compile(f"Dry run: would have stripped .*[/\\\\]{input_file}\n") - - pc = run([nbstripout_exe(), str(NOTEBOOKS_FOLDER / input_file), "--dry-run", ] + extra_args, stdout=PIPE, universal_newlines=True) + args = [nbstripout_exe(), str(NOTEBOOKS_FOLDER / input_file), "--dry-run", ] + extra_args + if verify: + args.append("--verify") + pc = run(args, stdout=PIPE, universal_newlines=True) output = pc.stdout assert expected_regex.match(output) + if verify: + assert pc.returncode == 1 @pytest.mark.parametrize("input_file, expected_errs, extra_args", ERR_OUTPUT_CASES)