diff --git a/LICENSE.txt b/LICENSE.txt index fc6d123..ecc5369 100644 --- a/LICENSE.txt +++ b/LICENSE.txt @@ -1,4 +1,5 @@ Copyright (c) 2015 Min RK, Florian Rathgeber, Michael McNeil Forbes +2019 Casper da Costa-Luis Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the diff --git a/nbstripout/__init__.py b/nbstripout/__init__.py new file mode 100644 index 0000000..4c1bbab --- /dev/null +++ b/nbstripout/__init__.py @@ -0,0 +1,4 @@ +from ._nbstripout import install, uninstall, status, main +from ._utils import pop_recursive, strip_output +__all__ = ["install", "uninstall", "status", "main", + "pop_recursive", "strip_output"] diff --git a/nbstripout.py b/nbstripout/_nbstripout.py similarity index 77% rename from nbstripout.py rename to nbstripout/_nbstripout.py index b8e063a..4fb2dc7 100755 --- a/nbstripout.py +++ b/nbstripout/_nbstripout.py @@ -85,11 +85,12 @@ *.ipynb diff=ipynb """ - from __future__ import print_function from argparse import ArgumentParser, RawDescriptionHelpFormatter import io import sys +from nbstripout._utils import strip_output +__all__ = ["install", "uninstall", "status", "main"] input_stream = None if sys.version_info < (3, 0): @@ -132,106 +133,6 @@ def write(nb, f): return current.write(nb, f, 'json') -def _cells(nb): - """Yield all cells in an nbformat-insensitive manner""" - if nb.nbformat < 4: - for ws in nb.worksheets: - for cell in ws.cells: - yield cell - else: - for cell in nb.cells: - yield cell - - -def pop_recursive(d, key, default=None): - """dict.pop(key) where `key` is a `.`-delimited list of nested keys. - - >>> d = {'a': {'b': 1, 'c': 2}} - >>> pop_recursive(d, 'a.c') - 2 - >>> d - {'a': {'b': 1}} - """ - nested = key.split('.') - current = d - for k in nested[:-1]: - if hasattr(current, 'get'): - current = current.get(k, {}) - else: - return default - if not hasattr(current, 'pop'): - return default - return current.pop(nested[-1], default) - - -def strip_output(nb, keep_output, keep_count, extra_keys=''): - """ - Strip the outputs, execution count/prompt number and miscellaneous - metadata from a notebook object, unless specified to keep either the outputs - or counts. - - `extra_keys` could be 'metadata.foo cell.metadata.bar metadata.baz' - """ - extra_keys = extra_keys.split() - keys = {'metadata': [], 'cell': {'metadata': []}} - for key in extra_keys: - if key.startswith('metadata.'): - keys['metadata'].append(key[len('metadata.'):]) - elif key.startswith('cell.metadata.'): - keys['cell']['metadata'].append(key[len('cell.metadata.'):]) - else: - sys.stderr.write('ignoring extra key `%s`' % key) - - nb.metadata.pop('signature', None) - nb.metadata.pop('widgets', None) - for field in keys['metadata']: - pop_recursive(nb.metadata, field) - - for cell in _cells(nb): - - keep_output_this_cell = keep_output - - # Keep the output for these cells, but strip count and metadata - if cell.metadata.get('init_cell') or cell.metadata.get('keep_output'): - keep_output_this_cell = True - - # Remove the outputs, unless directed otherwise - if 'outputs' in cell: - - # Default behavior strips outputs. With all outputs stripped, - # there are no counts to keep and keep_count is ignored. - if not keep_output_this_cell: - cell['outputs'] = [] - - # If keep_output_this_cell, but not keep_count, strip the counts - # from the output. - if keep_output_this_cell and not keep_count: - for output in cell['outputs']: - if 'execution_count' in output: - output['execution_count'] = None - - # If keep_output_this_cell and keep_count, do nothing. - - # Remove the prompt_number/execution_count, unless directed otherwise - if 'prompt_number' in cell and not keep_count: - cell['prompt_number'] = None - if 'execution_count' in cell and not keep_count: - cell['execution_count'] = None - - # Always remove this metadata - for output_style in ['collapsed', 'scrolled']: - if output_style in cell.metadata: - cell.metadata[output_style] = False - if 'metadata' in cell: - for field in ['collapsed', 'scrolled', 'ExecuteTime']: - cell.metadata.pop(field, None) - for (extra, fields) in keys['cell'].items(): - if extra in cell: - for field in fields: - pop_recursive(getattr(cell, extra), field) - return nb - - def install(attrfile=None): """Install the git filter and set the git attributes.""" from os import name, path diff --git a/nbstripout/_utils.py b/nbstripout/_utils.py new file mode 100644 index 0000000..82d36cf --- /dev/null +++ b/nbstripout/_utils.py @@ -0,0 +1,102 @@ +import sys + +__all__ = ["pop_recursive", "strip_output"] + + +def pop_recursive(d, key, default=None): + """dict.pop(key) where `key` is a `.`-delimited list of nested keys. + + >>> d = {'a': {'b': 1, 'c': 2}} + >>> pop_recursive(d, 'a.c') + 2 + >>> d + {'a': {'b': 1}} + """ + nested = key.split('.') + current = d + for k in nested[:-1]: + if hasattr(current, 'get'): + current = current.get(k, {}) + else: + return default + if not hasattr(current, 'pop'): + return default + return current.pop(nested[-1], default) + + +def _cells(nb): + """Yield all cells in an nbformat-insensitive manner""" + if nb.nbformat < 4: + for ws in nb.worksheets: + for cell in ws.cells: + yield cell + else: + for cell in nb.cells: + yield cell + + +def strip_output(nb, keep_output, keep_count, extra_keys=''): + """ + Strip the outputs, execution count/prompt number and miscellaneous + metadata from a notebook object, unless specified to keep either the outputs + or counts. + + `extra_keys` could be 'metadata.foo cell.metadata.bar metadata.baz' + """ + extra_keys = extra_keys.split() + keys = {'metadata': [], 'cell': {'metadata': []}} + for key in extra_keys: + if key.startswith('metadata.'): + keys['metadata'].append(key[len('metadata.'):]) + elif key.startswith('cell.metadata.'): + keys['cell']['metadata'].append(key[len('cell.metadata.'):]) + else: + sys.stderr.write('ignoring extra key `%s`' % key) + + nb.metadata.pop('signature', None) + nb.metadata.pop('widgets', None) + for field in keys['metadata']: + pop_recursive(nb.metadata, field) + + for cell in _cells(nb): + keep_output_this_cell = keep_output + + # Keep the output for these cells, but strip count and metadata + if cell.metadata.get('init_cell') or cell.metadata.get('keep_output'): + keep_output_this_cell = True + + # Remove the outputs, unless directed otherwise + if 'outputs' in cell: + + # Default behavior strips outputs. With all outputs stripped, + # there are no counts to keep and keep_count is ignored. + if not keep_output_this_cell: + cell['outputs'] = [] + + # If keep_output_this_cell, but not keep_count, strip the counts + # from the output. + if keep_output_this_cell and not keep_count: + for output in cell['outputs']: + if 'execution_count' in output: + output['execution_count'] = None + + # If keep_output_this_cell and keep_count, do nothing. + + # Remove the prompt_number/execution_count, unless directed otherwise + if 'prompt_number' in cell and not keep_count: + cell['prompt_number'] = None + if 'execution_count' in cell and not keep_count: + cell['execution_count'] = None + + # Always remove this metadata + for output_style in ['collapsed', 'scrolled']: + if output_style in cell.metadata: + cell.metadata[output_style] = False + if 'metadata' in cell: + for field in ['collapsed', 'scrolled', 'ExecuteTime']: + cell.metadata.pop(field, None) + for (extra, fields) in keys['cell'].items(): + if extra in cell: + for field in fields: + pop_recursive(getattr(cell, extra), field) + return nb diff --git a/setup.py b/setup.py index c30568b..6b6f208 100644 --- a/setup.py +++ b/setup.py @@ -1,4 +1,4 @@ -from setuptools import setup +from setuptools import setup, find_packages with open('README.rst') as f: long_description = f.read() @@ -32,10 +32,11 @@ description='Strips outputs from Jupyter and IPython notebooks', long_description=long_description, - py_modules=['nbstripout'], + packages=find_packages(), + provides=['nbstripout'], entry_points={ 'console_scripts': [ - 'nbstripout = nbstripout:main' + 'nbstripout = nbstripout._nbstripout:main' ], }, diff --git a/tests/test_functions.py b/tests/test_functions.py deleted file mode 100644 index 6a3c2b4..0000000 --- a/tests/test_functions.py +++ /dev/null @@ -1,7 +0,0 @@ -import nbstripout - - -def test_pos_recursive(): - d = {'a': {'b': 1, 'c': 2}} - assert nbstripout.pop_recursive(d, 'a.c') == 2 - assert d == {'a': {'b': 1}} diff --git a/tests/test_utils.py b/tests/test_utils.py new file mode 100644 index 0000000..a2daf75 --- /dev/null +++ b/tests/test_utils.py @@ -0,0 +1,7 @@ +from nbstripout._utils import pop_recursive + + +def test_pop_recursive(): + d = {'a': {'b': 1, 'c': 2}} + assert pop_recursive(d, 'a.c') == 2 + assert d == {'a': {'b': 1}}