Skip to content

Commit

Permalink
Initial commit
Browse files Browse the repository at this point in the history
  • Loading branch information
roy-ht committed Nov 14, 2019
0 parents commit 2152c73
Show file tree
Hide file tree
Showing 7 changed files with 167 additions and 0 deletions.
6 changes: 6 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
.ipynb_checkpoints
.mypy_cache
.vscode
.pytest_cache
__pycache__
*.egg-info
20 changes: 20 additions & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
default_stages: [commit, push]
fail_fast: true
repos:
- repo: local
hooks:
- id: jupyter-notebook-cleanup
name: jupyter-notebook-cleanup
language: python
entry: jupyter-notebook-cleanup
files: \.ipynb$
types: ["file"]

- repo: https://gitlab.com/pycqa/flake8
rev: 3.7.8
hooks:
- id: flake8
- repo: https://github.com/pre-commit/mirrors-mypy
rev: v0.740
hooks:
- id: mypy
8 changes: 8 additions & 0 deletions .pre-commit-hooks.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
- id: jupyter-notebook-cleanup
name: jupyter-notebook-cleanup
description: "Remove cel output for better security"
entry: jupyter-notebook-cleanup
language: python
language_version: python3
files: \.ipynb$
types: ["file"]
Empty file.
54 changes: 54 additions & 0 deletions jupyter_notebook_cleanup/cli.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
import argparse
import copy
import difflib
import json
from collections import OrderedDict


def main():
args = parse_args()
for path in args.files:
remove_output(path, preview=args.dry_run)


def parse_args():
psr = argparse.ArgumentParser()
psr.add_argument("files", nargs="*", help="ipynb files")
psr.add_argument("--dry-run", action="store_true", default=False)
return psr.parse_args()


def check_if_unremovable(source):
"""comment annotation must be the first line and started with #"""
for s in source:
ss = s.strip()
if ss.startswith("#") and "[pin]" in ss:
return True
return False


def remove_output(path, preview):
"""If preview=True, Do not overwrite a path, only display an diffs"""
with open(path, "rt") as f:
data = json.load(f, object_pairs_hook=OrderedDict)
new_data = copy.deepcopy(data)
for cell in new_data["cells"]:
if "outputs" in cell and "source" in cell:
source = cell["source"]
if not isinstance(source, list):
continue
if check_if_unremovable(source):
continue
cell["outputs"] = []
dump_args = {"ensure_ascii": False, "separators": (",", ": "), "indent": 1}
if preview:
before_j = json.dumps(data, **dump_args).splitlines()
after_j = json.dumps(new_data, **dump_args).splitlines()
print("\n".join(difflib.unified_diff(before_j, after_j, fromfile="before", tofile="after")))
else:
with open(path, "wt", encoding="utf-8") as fo:
json.dump(new_data, fo, **dump_args)


if __name__ == "__main__":
main()
66 changes: 66 additions & 0 deletions notebooks/test.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Test for output sanitization"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"a = 1\n",
"b = 2\n",
"a + b"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": "日本語\n"
}
],
"source": [
"# [pin] -- do not remove output\n",
"print('日本語')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.4"
}
},
"nbformat": 4,
"nbformat_minor": 4
}
13 changes: 13 additions & 0 deletions setup.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
from setuptools import find_packages, setup

setup(
name="jupyter-notebook-cleanup",
version="1.0.0",
description="Automagically remove notebook outputs for better security",
author="Hiroyuki Tanaka",
author_email="aflc0x@gmail.com",
packages=find_packages(),
python_requires=">=3",
install_requires=[],
entry_points={"console_scripts": ["jupyter-notebook-cleanup=jupyter_notebook_cleanup.cli:main"]},
)

0 comments on commit 2152c73

Please sign in to comment.