Skip to content

Commit

Permalink
attempt to modularise
Browse files Browse the repository at this point in the history
  • Loading branch information
casperdcl committed Feb 27, 2019
1 parent bc39a51 commit 4dfe997
Show file tree
Hide file tree
Showing 7 changed files with 120 additions and 111 deletions.
1 change: 1 addition & 0 deletions LICENSE.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
Copyright (c) 2015 Min RK, Florian Rathgeber, Michael McNeil Forbes
2019 Casper da Costa-Luis

Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
Expand Down
4 changes: 4 additions & 0 deletions nbstripout/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
from ._nbstripout import install, uninstall, status, main
from ._utils import pop_recursive, strip_output
__all__ = ["install", "uninstall", "status", "main",
"pop_recursive", "strip_output"]
103 changes: 2 additions & 101 deletions nbstripout.py → nbstripout/_nbstripout.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,11 +85,12 @@
*.ipynb diff=ipynb
"""

from __future__ import print_function
from argparse import ArgumentParser, RawDescriptionHelpFormatter
import io
import sys
from nbstripout._utils import strip_output
__all__ = ["install", "uninstall", "status", "main"]

input_stream = None
if sys.version_info < (3, 0):
Expand Down Expand Up @@ -132,106 +133,6 @@ def write(nb, f):
return current.write(nb, f, 'json')


def _cells(nb):
"""Yield all cells in an nbformat-insensitive manner"""
if nb.nbformat < 4:
for ws in nb.worksheets:
for cell in ws.cells:
yield cell
else:
for cell in nb.cells:
yield cell


def pop_recursive(d, key, default=None):
"""dict.pop(key) where `key` is a `.`-delimited list of nested keys.
>>> d = {'a': {'b': 1, 'c': 2}}
>>> pop_recursive(d, 'a.c')
2
>>> d
{'a': {'b': 1}}
"""
nested = key.split('.')
current = d
for k in nested[:-1]:
if hasattr(current, 'get'):
current = current.get(k, {})
else:
return default
if not hasattr(current, 'pop'):
return default
return current.pop(nested[-1], default)


def strip_output(nb, keep_output, keep_count, extra_keys=''):
"""
Strip the outputs, execution count/prompt number and miscellaneous
metadata from a notebook object, unless specified to keep either the outputs
or counts.
`extra_keys` could be 'metadata.foo cell.metadata.bar metadata.baz'
"""
extra_keys = extra_keys.split()
keys = {'metadata': [], 'cell': {'metadata': []}}
for key in extra_keys:
if key.startswith('metadata.'):
keys['metadata'].append(key[len('metadata.'):])
elif key.startswith('cell.metadata.'):
keys['cell']['metadata'].append(key[len('cell.metadata.'):])
else:
sys.stderr.write('ignoring extra key `%s`' % key)

nb.metadata.pop('signature', None)
nb.metadata.pop('widgets', None)
for field in keys['metadata']:
pop_recursive(nb.metadata, field)

for cell in _cells(nb):

keep_output_this_cell = keep_output

# Keep the output for these cells, but strip count and metadata
if cell.metadata.get('init_cell') or cell.metadata.get('keep_output'):
keep_output_this_cell = True

# Remove the outputs, unless directed otherwise
if 'outputs' in cell:

# Default behavior strips outputs. With all outputs stripped,
# there are no counts to keep and keep_count is ignored.
if not keep_output_this_cell:
cell['outputs'] = []

# If keep_output_this_cell, but not keep_count, strip the counts
# from the output.
if keep_output_this_cell and not keep_count:
for output in cell['outputs']:
if 'execution_count' in output:
output['execution_count'] = None

# If keep_output_this_cell and keep_count, do nothing.

# Remove the prompt_number/execution_count, unless directed otherwise
if 'prompt_number' in cell and not keep_count:
cell['prompt_number'] = None
if 'execution_count' in cell and not keep_count:
cell['execution_count'] = None

# Always remove this metadata
for output_style in ['collapsed', 'scrolled']:
if output_style in cell.metadata:
cell.metadata[output_style] = False
if 'metadata' in cell:
for field in ['collapsed', 'scrolled', 'ExecuteTime']:
cell.metadata.pop(field, None)
for (extra, fields) in keys['cell'].items():
if extra in cell:
for field in fields:
pop_recursive(getattr(cell, extra), field)
return nb


def install(attrfile=None):
"""Install the git filter and set the git attributes."""
from os import name, path
Expand Down
102 changes: 102 additions & 0 deletions nbstripout/_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
import sys

__all__ = ["pop_recursive", "strip_output"]


def pop_recursive(d, key, default=None):
"""dict.pop(key) where `key` is a `.`-delimited list of nested keys.
>>> d = {'a': {'b': 1, 'c': 2}}
>>> pop_recursive(d, 'a.c')
2
>>> d
{'a': {'b': 1}}
"""
nested = key.split('.')
current = d
for k in nested[:-1]:
if hasattr(current, 'get'):
current = current.get(k, {})
else:
return default
if not hasattr(current, 'pop'):
return default
return current.pop(nested[-1], default)


def _cells(nb):
"""Yield all cells in an nbformat-insensitive manner"""
if nb.nbformat < 4:
for ws in nb.worksheets:
for cell in ws.cells:
yield cell
else:
for cell in nb.cells:
yield cell


def strip_output(nb, keep_output, keep_count, extra_keys=''):
"""
Strip the outputs, execution count/prompt number and miscellaneous
metadata from a notebook object, unless specified to keep either the outputs
or counts.
`extra_keys` could be 'metadata.foo cell.metadata.bar metadata.baz'
"""
extra_keys = extra_keys.split()
keys = {'metadata': [], 'cell': {'metadata': []}}
for key in extra_keys:
if key.startswith('metadata.'):
keys['metadata'].append(key[len('metadata.'):])
elif key.startswith('cell.metadata.'):
keys['cell']['metadata'].append(key[len('cell.metadata.'):])
else:
sys.stderr.write('ignoring extra key `%s`' % key)

nb.metadata.pop('signature', None)
nb.metadata.pop('widgets', None)
for field in keys['metadata']:
pop_recursive(nb.metadata, field)

for cell in _cells(nb):
keep_output_this_cell = keep_output

# Keep the output for these cells, but strip count and metadata
if cell.metadata.get('init_cell') or cell.metadata.get('keep_output'):
keep_output_this_cell = True

# Remove the outputs, unless directed otherwise
if 'outputs' in cell:

# Default behavior strips outputs. With all outputs stripped,
# there are no counts to keep and keep_count is ignored.
if not keep_output_this_cell:
cell['outputs'] = []

# If keep_output_this_cell, but not keep_count, strip the counts
# from the output.
if keep_output_this_cell and not keep_count:
for output in cell['outputs']:
if 'execution_count' in output:
output['execution_count'] = None

# If keep_output_this_cell and keep_count, do nothing.

# Remove the prompt_number/execution_count, unless directed otherwise
if 'prompt_number' in cell and not keep_count:
cell['prompt_number'] = None
if 'execution_count' in cell and not keep_count:
cell['execution_count'] = None

# Always remove this metadata
for output_style in ['collapsed', 'scrolled']:
if output_style in cell.metadata:
cell.metadata[output_style] = False
if 'metadata' in cell:
for field in ['collapsed', 'scrolled', 'ExecuteTime']:
cell.metadata.pop(field, None)
for (extra, fields) in keys['cell'].items():
if extra in cell:
for field in fields:
pop_recursive(getattr(cell, extra), field)
return nb
7 changes: 4 additions & 3 deletions setup.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from setuptools import setup
from setuptools import setup, find_packages

with open('README.rst') as f:
long_description = f.read()
Expand Down Expand Up @@ -32,10 +32,11 @@

description='Strips outputs from Jupyter and IPython notebooks',
long_description=long_description,
py_modules=['nbstripout'],
packages=find_packages(),
provides=['nbstripout'],
entry_points={
'console_scripts': [
'nbstripout = nbstripout:main'
'nbstripout = nbstripout._nbstripout:main'
],
},

Expand Down
7 changes: 0 additions & 7 deletions tests/test_functions.py

This file was deleted.

7 changes: 7 additions & 0 deletions tests/test_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
from nbstripout._utils import pop_recursive


def test_pop_recursive():
d = {'a': {'b': 1, 'c': 2}}
assert pop_recursive(d, 'a.c') == 2
assert d == {'a': {'b': 1}}

0 comments on commit 4dfe997

Please sign in to comment.