Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

custom blacklist metadata fields #92

Merged
merged 15 commits into from
Mar 26, 2019
1 change: 1 addition & 0 deletions LICENSE.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
Copyright (c) 2015 Min RK, Florian Rathgeber, Michael McNeil Forbes
2019 Casper da Costa-Luis

Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
Expand Down
14 changes: 14 additions & 0 deletions README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -154,6 +154,20 @@ This is the same metadata used by the `init_cell nbextension`__.

__ https://github.com/ipython-contrib/jupyter_contrib_nbextensions/tree/master/src/jupyter_contrib_nbextensions/nbextensions/init_cell

Stripping metadata
++++++++++++++++++

This is configurable via `git config (--global) filter.nbstripout.extrakeys`.
An example would be:

git config --global filter.nbstripout.extrakeys '
metadata.celltoolbar metadata.kernel_spec.display_name
metadata.kernel_spec.name metadata.language_info.codemirror_mode.version
metadata.language_info.pygments_lexer metadata.language_info.version
metadata.toc metadata.notify_time metadata.varInspector
cell.metadata.heading_collapsed cell.metadata.hidden
cell.metadata.code_folding cell.metadata.tags cell.metadata.init_cell'

Manual filter installation
==========================

Expand Down
4 changes: 4 additions & 0 deletions nbstripout/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
from ._nbstripout import install, uninstall, status, main
from ._utils import pop_recursive, strip_output
__all__ = ["install", "uninstall", "status", "main",
"pop_recursive", "strip_output"]
113 changes: 31 additions & 82 deletions nbstripout.py → nbstripout/_nbstripout.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,29 +85,11 @@

*.ipynb diff=ipynb
"""

from __future__ import print_function
from argparse import ArgumentParser, RawDescriptionHelpFormatter
import io
import sys

input_stream = None
if sys.version_info < (3, 0):
import codecs
# Use UTF8 reader/writer for stdin/stdout
# http://stackoverflow.com/a/1169209
if sys.stdin:
input_stream = codecs.getreader('utf8')(sys.stdin)
output_stream = codecs.getwriter('utf8')(sys.stdout)
else:
# Wrap input/output stream in UTF-8 encoded text wrapper
# https://stackoverflow.com/a/16549381
if sys.stdin:
input_stream = io.TextIOWrapper(sys.stdin.buffer, encoding='utf-8')
output_stream = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8')

__version__ = '0.3.3'

from nbstripout._utils import strip_output
try:
# Jupyter >= 4
from nbformat import read, write, NO_CONVERT
Expand All @@ -131,67 +113,8 @@ def read(f, as_version):
def write(nb, f):
return current.write(nb, f, 'json')


def _cells(nb):
"""Yield all cells in an nbformat-insensitive manner"""
if nb.nbformat < 4:
for ws in nb.worksheets:
for cell in ws.cells:
yield cell
else:
for cell in nb.cells:
yield cell


def strip_output(nb, keep_output, keep_count):
"""
Strip the outputs, execution count/prompt number and miscellaneous
metadata from a notebook object, unless specified to keep either the outputs
or counts.
"""

nb.metadata.pop('signature', None)
nb.metadata.pop('widgets', None)

for cell in _cells(nb):

keep_output_this_cell = keep_output

# Keep the output for these cells, but strip count and metadata
if cell.metadata.get('init_cell') or cell.metadata.get('keep_output'):
keep_output_this_cell = True

# Remove the outputs, unless directed otherwise
if 'outputs' in cell:

# Default behavior strips outputs. With all outputs stripped,
# there are no counts to keep and keep_count is ignored.
if not keep_output_this_cell:
cell['outputs'] = []

# If keep_output_this_cell, but not keep_count, strip the counts
# from the output.
if keep_output_this_cell and not keep_count:
for output in cell['outputs']:
if 'execution_count' in output:
output['execution_count'] = None

# If keep_output_this_cell and keep_count, do nothing.

# Remove the prompt_number/execution_count, unless directed otherwise
if 'prompt_number' in cell and not keep_count:
cell['prompt_number'] = None
if 'execution_count' in cell and not keep_count:
cell['execution_count'] = None

# Always remove this metadata
for output_style in ['collapsed', 'scrolled']:
if output_style in cell.metadata:
cell.metadata[output_style] = False
if 'metadata' in cell:
for field in ['collapsed', 'scrolled', 'ExecuteTime']:
cell.metadata.pop(field, None)
return nb
__all__ = ["install", "uninstall", "status", "main"]
__version__ = '0.3.3'


def install(attrfile=None):
Expand Down Expand Up @@ -273,6 +196,10 @@ def status(verbose=False):
diff = check_output(['git', 'config', 'diff.ipynb.textconv']).strip()
attributes = check_output(['git', 'check-attr', 'filter', '--', '*.ipynb']).strip()
diff_attributes = check_output(['git', 'check-attr', 'diff', '--', '*.ipynb']).strip()
try:
extra_keys = check_output(['git', 'config', 'filter.nbstripout.extrakeys']).strip()
except CalledProcessError:
extra_keys = ''
if attributes.endswith(b'unspecified'):
if verbose:
print('nbstripout is not installed in repository', git_dir)
Expand All @@ -284,6 +211,7 @@ def status(verbose=False):
print(' smudge =', smudge)
print(' required =', required)
print(' diff=', diff)
print(' extrakeys=', extra_keys)
print('\nAttributes:\n ', attributes)
print('\nDiff Attributes:\n ', diff_attributes)
return 0
Expand All @@ -294,6 +222,7 @@ def status(verbose=False):


def main():
from subprocess import check_output, CalledProcessError
parser = ArgumentParser(epilog=__doc__, formatter_class=RawDescriptionHelpFormatter)
task = parser.add_mutually_exclusive_group()
task.add_argument('--install', action='store_true',
Expand Down Expand Up @@ -336,13 +265,33 @@ def main():
print(__version__)
sys.exit(0)

try:
extra_keys = check_output(['git', 'config', 'filter.nbstripout.extrakeys']).strip()
except CalledProcessError:
extra_keys = ''

input_stream = None
if sys.version_info < (3, 0):
import codecs
# Use UTF8 reader/writer for stdin/stdout
# http://stackoverflow.com/a/1169209
if sys.stdin:
input_stream = codecs.getreader('utf8')(sys.stdin)
output_stream = codecs.getwriter('utf8')(sys.stdout)
else:
# Wrap input/output stream in UTF-8 encoded text wrapper
# https://stackoverflow.com/a/16549381
if sys.stdin:
input_stream = io.TextIOWrapper(sys.stdin.buffer, encoding='utf-8')
output_stream = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8')

for filename in args.files:
if not (args.force or filename.endswith('.ipynb')):
continue
try:
with io.open(filename, 'r', encoding='utf8') as f:
nb = read(f, as_version=NO_CONVERT)
nb = strip_output(nb, args.keep_output, args.keep_count)
nb = strip_output(nb, args.keep_output, args.keep_count, extra_keys)
if args.textconv:
write(nb, output_stream)
output_stream.flush()
Expand All @@ -360,7 +309,7 @@ def main():
if not args.files and input_stream:
try:
nb = strip_output(read(input_stream, as_version=NO_CONVERT),
args.keep_output, args.keep_count)
args.keep_output, args.keep_count, extra_keys)
write(nb, output_stream)
output_stream.flush()
except NotJSONError:
Expand Down
104 changes: 104 additions & 0 deletions nbstripout/_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
import sys

__all__ = ["pop_recursive", "strip_output"]


def pop_recursive(d, key, default=None):
"""dict.pop(key) where `key` is a `.`-delimited list of nested keys.

>>> d = {'a': {'b': 1, 'c': 2}}
>>> pop_recursive(d, 'a.c')
2
>>> d
{'a': {'b': 1}}
"""
nested = key.split('.')
current = d
for k in nested[:-1]:
if hasattr(current, 'get'):
current = current.get(k, {})
else:
return default
if not hasattr(current, 'pop'):
return default
return current.pop(nested[-1], default)


def _cells(nb):
"""Yield all cells in an nbformat-insensitive manner"""
if nb.nbformat < 4:
for ws in nb.worksheets:
for cell in ws.cells:
yield cell
else:
for cell in nb.cells:
yield cell


def strip_output(nb, keep_output, keep_count, extra_keys=''):
"""
Strip the outputs, execution count/prompt number and miscellaneous
metadata from a notebook object, unless specified to keep either the outputs
or counts.

`extra_keys` could be 'metadata.foo cell.metadata.bar metadata.baz'
"""
if hasattr(extra_keys, 'decode'):
extra_keys = extra_keys.decode()
extra_keys = extra_keys.split()
keys = {'metadata': [], 'cell': {'metadata': []}}
for key in extra_keys:
if key.startswith('metadata.'):
keys['metadata'].append(key[len('metadata.'):])
elif key.startswith('cell.metadata.'):
keys['cell']['metadata'].append(key[len('cell.metadata.'):])
else:
sys.stderr.write('ignoring extra key `%s`' % key)

nb.metadata.pop('signature', None)
nb.metadata.pop('widgets', None)
for field in keys['metadata']:
pop_recursive(nb.metadata, field)

for cell in _cells(nb):
keep_output_this_cell = keep_output

# Keep the output for these cells, but strip count and metadata
if cell.metadata.get('init_cell') or cell.metadata.get('keep_output'):
keep_output_this_cell = True

# Remove the outputs, unless directed otherwise
if 'outputs' in cell:

# Default behavior strips outputs. With all outputs stripped,
# there are no counts to keep and keep_count is ignored.
if not keep_output_this_cell:
cell['outputs'] = []

# If keep_output_this_cell, but not keep_count, strip the counts
# from the output.
if keep_output_this_cell and not keep_count:
for output in cell['outputs']:
if 'execution_count' in output:
output['execution_count'] = None

# If keep_output_this_cell and keep_count, do nothing.

# Remove the prompt_number/execution_count, unless directed otherwise
if 'prompt_number' in cell and not keep_count:
cell['prompt_number'] = None
if 'execution_count' in cell and not keep_count:
cell['execution_count'] = None

# Always remove this metadata
for output_style in ['collapsed', 'scrolled']:
if output_style in cell.metadata:
cell.metadata[output_style] = False
if 'metadata' in cell:
for field in ['collapsed', 'scrolled', 'ExecuteTime']:
cell.metadata.pop(field, None)
for (extra, fields) in keys['cell'].items():
if extra in cell:
for field in fields:
pop_recursive(getattr(cell, extra), field)
return nb
7 changes: 4 additions & 3 deletions setup.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from setuptools import setup
from setuptools import setup, find_packages

with open('README.rst') as f:
long_description = f.read()
Expand Down Expand Up @@ -32,10 +32,11 @@

description='Strips outputs from Jupyter and IPython notebooks',
long_description=long_description,
py_modules=['nbstripout'],
packages=find_packages(),
provides=['nbstripout'],
entry_points={
'console_scripts': [
'nbstripout = nbstripout:main'
'nbstripout = nbstripout._nbstripout:main'
],
},

Expand Down
30 changes: 28 additions & 2 deletions tests/test-git.t
Original file line number Diff line number Diff line change
@@ -1,17 +1,43 @@
$ git init foobar
Initialized empty Git repository in .* (re)
$ cd foobar
$ git config --local filter.nbstripout.extrakeys ' '
$ echo -n "*.txt text" >> .git/info/attributes
$ ${NBSTRIPOUT_EXE:-nbstripout} --is-installed
[1]
$ ${NBSTRIPOUT_EXE:-nbstripout} --install
$ ${NBSTRIPOUT_EXE:-nbstripout} --is-installed
$ git diff --no-index --no-ext-diff --unified=0 --exit-code -a --no-prefix ${TESTDIR}/test_diff.ipynb ${TESTDIR}/test_diff_output.ipynb
$ git diff --no-index ${TESTDIR}/test_diff.ipynb ${TESTDIR}/test_diff_different.ipynb
$ git diff --no-index ${TESTDIR}/test_diff.ipynb ${TESTDIR}/test_diff_different_extrakeys.ipynb
(diff --git.*) (re)
(index .*) (re)
(--- .*test_diff.ipynb) (re)
(\+\+\+ .*test_diff_different.ipynb) (re)
(\+\+\+ .*test_diff_different_extrakeys.ipynb) (re)
@@ -6,15 +6,14 @@
"metadata": {},
"outputs": [],
"source": [
- "print(\"aou\")"
+ "print(\"aou now it is different\")"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 2",
- "language": "python",
- "name": "python2"
+ "language": "python"
},
"language_info": {
"codemirror_mode": {
[1]
$ git config --local filter.nbstripout.extrakeys 'cell.metadata.collapsed metadata.kernelspec.name'
$ git diff --no-index ${TESTDIR}/test_diff.ipynb ${TESTDIR}/test_diff_different_extrakeys.ipynb
(diff --git.*) (re)
(index .*) (re)
(--- .*test_diff.ipynb) (re)
(\+\+\+ .*test_diff_different_extrakeys.ipynb) (re)
@@ -6,7 +6,7 @@
"metadata": {},
"outputs": [],
Expand Down
Loading