From 7b9b2154fcbe8b68f8faeadc68f15b2796d743ec Mon Sep 17 00:00:00 2001 From: Matthias Bussonnier Date: Mon, 15 Nov 2021 17:15:36 -0800 Subject: [PATCH 01/23] Start working on mutation issues in validate. We change the validation logic and separate the normalisation from the validation step. We make sure that if a notebook is normalized, it emits a warning. In the future we will turn the warning in to an Error. We add test for the current and an xfail test for the future behavior --- nbformat/json_compat.py | 2 + nbformat/validator.py | 117 +++++++++++++++++++++------- nbformat/warnings.py | 32 ++++++++ tests/test4.5.ipynb | 164 ++++++++++++++++++++++++++++++++++++++++ tests/test_validator.py | 57 +++++++++++++- 5 files changed, 342 insertions(+), 30 deletions(-) create mode 100644 nbformat/warnings.py create mode 100644 tests/test4.5.ipynb diff --git a/nbformat/json_compat.py b/nbformat/json_compat.py index fbab24ba..737a9111 100644 --- a/nbformat/json_compat.py +++ b/nbformat/json_compat.py @@ -88,6 +88,8 @@ def _validator_for_name(validator_name): for (name, module, validator_cls) in _VALIDATOR_MAP: if module and validator_name == name: return validator_cls + # we always return something. + raise ValueError(f"Missing validator for {repr(validator_name)}") def get_current_validator(): diff --git a/nbformat/validator.py b/nbformat/validator.py index 888013b2..aa22c2d4 100644 --- a/nbformat/validator.py +++ b/nbformat/validator.py @@ -5,6 +5,9 @@ import json import os import pprint +import sys +import warnings +from copy import deepcopy from traitlets.log import get_logger @@ -12,6 +15,7 @@ from .corpus.words import generate_corpus_id from .json_compat import ValidationError, _validator_for_name, get_current_validator from .reader import get_version +from .warnings import DuplicateCellId, MissingIDFieldWarning validators = {} @@ -246,6 +250,82 @@ def better_validation_error(error, version, version_minor): return NotebookValidationError(error, ref) +def normalize(nbdict, version, version_minor): + """ + EXPERIMENTAL + + normalise a notebook prior to validation. + + This tries to implement a couple of normalisation steps to standardise + notebooks and make validation easier. + + You should in general not rely on this function and make sure the notebooks + that reach nbformat are already in a normal form. + + Parameters + ---------- + nbdict : dict + notebook document + version : int + version_minor : int + + Returns + ------- + changes : int + number of changes in the notebooks + notebook : dict + deep-copy of the original object with relevant changes. + + """ + nbdict = deepcopy(nbdict) + return _normalize(nbdict) + + +def _normalize(nbdict, version, version_minor, repair_duplicate_cell_ids): + changes = 0 + + if version >= 4 and version_minor >= 5: + # if we support cell ids ensure default ids are provided + for cell in nbdict["cells"]: + if "id" not in cell: + warnings.warn( + "Code cell is missing an id field, this will become" + " a hard error in future nbformat versions. You may want" + " to use `normalize()` on your notebooks before validations" + " (available since nbformat 5.1.4). Previous of nbformat" + " are also mutating their arguments, and will stop to do so" + " in the future.", + MissingIDFieldWarning, + stacklevel=3, + ) + # Generate cell ids if any are missing + if repair_duplicate_cell_ids: + cell["id"] = generate_corpus_id() + changes += 1 + + # if we support cell ids check for uniqueness when validating the whole notebook + seen_ids = set() + for cell in nbdict["cells"]: + if "id" not in cell: + continue + cell_id = cell["id"] + if cell_id in seen_ids: + # Best effort to repair if we find a duplicate id + if repair_duplicate_cell_ids: + new_id = generate_corpus_id() + cell["id"] = new_id + changes += 1 + warnings.warn( + f"Non-unique cell id {cell_id!r} detected. Corrected to {new_id!r}.", + DuplicateCellId, + stacklevel=3, + ) + else: + raise ValidationError(f"Non-unique cell id '{cell_id}' detected.") + seen_ids.add(cell_id) + return changes, nbdict + + def validate( nbdict=None, ref=None, @@ -256,13 +336,18 @@ def validate( repair_duplicate_cell_ids=True, strip_invalid_metadata=False, ): + """Checks whether the given notebook dict-like object conforms to the relevant notebook format schema. - + Parameters + ---------- + ref : optional, str + reference to the subset of the schema we want to validate against. + for example ``"markdown_cell"``, `"code_cell"` .... Raises ValidationError if not valid. """ - + assert isinstance(ref, str) or ref is None # backwards compatibility for nbjson argument if nbdict is not None: pass @@ -283,13 +368,8 @@ def validate( if version is None: version, version_minor = 1, 0 - notebook_supports_cell_ids = ref is None and version >= 4 and version_minor >= 5 - if notebook_supports_cell_ids and repair_duplicate_cell_ids: - # Auto-generate cell ids for cells that are missing them. - for cell in nbdict["cells"]: - if "id" not in cell: - # Generate cell ids if any are missing - cell["id"] = generate_corpus_id() + if ref is None: + _normalize(nbdict, version, version_minor, repair_duplicate_cell_ids) for error in iter_validate( nbdict, @@ -299,25 +379,8 @@ def validate( relax_add_props=relax_add_props, strip_invalid_metadata=strip_invalid_metadata, ): - raise error - if notebook_supports_cell_ids: - # if we support cell ids check for uniqueness when validating the whole notebook - seen_ids = set() - for cell in nbdict["cells"]: - cell_id = cell["id"] - if cell_id in seen_ids: - if repair_duplicate_cell_ids: - # Best effort to repair if we find a duplicate id - cell["id"] = generate_corpus_id() - get_logger().warning( - "Non-unique cell id '{}' detected. Corrected to '{}'.".format( - cell_id, cell["id"] - ) - ) - else: - raise ValidationError(f"Non-unique cell id '{cell_id}' detected.") - seen_ids.add(cell_id) + raise error def iter_validate( diff --git a/nbformat/warnings.py b/nbformat/warnings.py new file mode 100644 index 00000000..45156c43 --- /dev/null +++ b/nbformat/warnings.py @@ -0,0 +1,32 @@ +""" +Warnings that can be emitted by nbformat. +""" + + +class MissingIDFieldWarning(FutureWarning): + """ + + This warning is emitted in the validation step of nbformat as we used to + mutate the structure which is cause signature issues. + + This will be turned into an error at later point. + + We subclass FutureWarning as we will change the behavior in the future. + + """ + + pass + + +class DuplicateCellId(FutureWarning): + """ + + This warning is emitted in the validation step of nbformat as we used to + mutate the structure which is cause signature issues. + + This will be turned into an error at later point. + + We subclass FutureWarning as we will change the behavior in the future. + """ + + pass diff --git a/tests/test4.5.ipynb b/tests/test4.5.ipynb new file mode 100644 index 00000000..ba09ecbd --- /dev/null +++ b/tests/test4.5.ipynb @@ -0,0 +1,164 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "2fcdfa53", + "metadata": {}, + "source": [ + "# nbconvert latex test" + ] + }, + { + "cell_type": "markdown", + "id": "0bc81532", + "metadata": {}, + "source": [ + "**Lorem ipsum** dolor sit amet, consectetur adipiscing elit. Nunc luctus bibendum felis dictum sodales. Ut suscipit, orci ut interdum imperdiet, purus ligula mollis *justo*, non malesuada nisl augue eget lorem. Donec bibendum, erat sit amet porttitor aliquam, urna lorem ornare libero, in vehicula diam diam ut ante. Nam non urna rhoncus, accumsan elit sit amet, mollis tellus. Vestibulum nec tellus metus. Vestibulum tempor, ligula et vehicula rhoncus, sapien turpis faucibus lorem, id dapibus turpis mauris ac orci. Sed volutpat vestibulum venenatis." + ] + }, + { + "cell_type": "markdown", + "id": "bb687f78", + "metadata": {}, + "source": [ + "## Printed Using Python" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "38f37a24", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "hello\n" + ] + } + ], + "source": [ + "print(\"hello\")" + ] + }, + { + "cell_type": "markdown", + "id": "a1f70963", + "metadata": {}, + "source": [ + "## Pyout" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "8206b3b9", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "\n", + "HTML\n" + ], + "text/plain": [ + "" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from IPython.display import HTML\n", + "HTML(\"\"\"\n", + "\n", + "HTML\n", + "\"\"\")" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "88d8965b", + "metadata": {}, + "outputs": [ + { + "data": { + "application/javascript": [ + "console.log(\"hi\");" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "%%javascript\n", + "console.log(\"hi\");" + ] + }, + { + "cell_type": "markdown", + "id": "34334c4f", + "metadata": {}, + "source": [ + "### Image" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "8b414a68", + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAggAAABDCAYAAAD5/P3lAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAAH3AAAB9wBYvxo6AAAABl0RVh0U29mdHdhcmUAd3d3Lmlua3NjYXBlLm9yZ5vuPBoAACAASURB\nVHic7Z15uBxF1bjfugkJhCWBsCSAJGACNg4QCI3RT1lEAVE+UEBNOmwCDcjHT1wQgU+WD3dFxA1o\nCAikAZFFVlnCjizpsCUjHQjBIAkQlpCFJGS79fvjdGf69vTsc2fuza33eeaZmeqq6jM9vZw6dc4p\nBUwC+tE+fqW1fqmRDpRSHjCggS40sBxYDCxKvL8KzNBaL21EPoPB0DPIWVY/4NlE0ffzYfhgu+Qx\nGHoy/YFjaK+CcB3QkIIAHAWs3wRZsuhUSs0CXgQeBm7UWi/spn0Z+jA5yxpEfYruqnwYllRic5a1\nMaWv8U5gaT4M19Sx396IAnZLfB/SLkEMhp5O/3YL0AvoAHaKXl8HLlZK3QZcpbWe0lbJDOsaHuDU\n0e4u4JAy2wPk/C1JzrKWArOQ0fUtwH35MOysQxaDwbCO0NFuAXoh6wPjgQeUUvcqpUa0WyCDoQls\nCIwBjgfuAV7KWdY+7RWpmJxlXZezrEdylvXxdstiMKzrGAtCYxwI/EspdZbW+g/tFsbQ67kQuBHY\nFNgseh9FV6vCbUAeWBC9PgBeq2EfS6J2MQOBrRDTe5KdgAdzlvW1fBjeUUP/3UbOsoYBE6OvG7VT\nFoOhL9Af+BUwFLkZpV+DaY6V4UPkRpb1+ncT+m8nGwK/V0oN01qf025hDL2XfBi+DLycLMtZVo6u\nCsKfGnSq8/NheEpqHwOBEcDBwJnAsGhTP2ByzrJG5cPwnQb22Sy+0G4BDIa+RH+t9dmlNiqlFKIk\nJJWGi+jq5JPmq8BbJJQArfXqpkncczlbKbVQa/3rdgtiMNRCPgxXAK8Ar+Qs63LgXmDvaPPGwPeA\nH7VJvCRfbLcABkNfouwUg9ZaAwuj178BlFLvVejzgR4WFviM1npcuQpKqf6IyXIjxLS7GzAWuUnu\nXsO+fqWUellr3ZBJdq/jr9+BDn1uve07O9Rz0y6f8PtGZGgWe53oT6SBkZ/q1/nHZy47aloTRTKU\nIR+Gy3OWNR6Zxtg0Kv4KRkEwGPocxgcBiCwcsSI0F5iOhF+ilPok8C3gVGS+thK/VErdrbWuO2ys\ns/+aLZTuOKbe9krrIUCPUBB0B+PQ1P1bdKe6EzAKQgvJh+GbOct6gkJkxM45y+qXDIWMHBhjBWJe\nPgyDWvaRs6zPIVObAG/nw/DpEvUGAp8E9gGGJzbtl7Os7cvs4skqp0V0Yl8jgcOBjyMDhbmIZeWl\nfBg+UUVfReQsayhwELAnsAXi6/E28BxwTz4MP6iyn92RaSCA+/NhuCwqXx9R4MYhU0MfRTK/AjyW\nD8MFGd0ZDFVhFIQKaK3/BXxfKXUlklTq0xWafAI4Driyu2UzGLqRlygoCArYHJif2H4gcFb0+Z2c\nZW2bD8NV1XScs6yNgH8g/jsAPwCeTmzfFPgjYsnbiez71MUVdnMQcF8V4nyUs6whwB8QX4+0s2Ys\n0yPAt/NhGFbRZ/wbzgO+DaxXotqqnGX9GbigCkXhf5CBCsDngYdzljURGQhsWqLN+znL+iFwdT4M\ndYk6BkNJTJhjlWitQ2Bf4P4qqv848t8wGHor6Yd9+ruHJFkC2BI4rIa+D6egHKwmstYlGAxMQCwH\nrRjEPI5ER5S7ZvcFXsxZ1phKneUsawSi8HyH0soB0bbvAM9Ebaplt5xlnYkct1LKAYiFZhJwSQ19\nGwxrMRaEGtBar1RKfRX4JxIzXortou3PN1mE+YgJsSwaeoLHOQCqUy3QSr9eqZ6G/gq2aYVMhqrY\nOfF5FeJwvJZ8GM7JWdY/gC9HRS7wtyr7Pjrx+e6MqYC3KLbU7Qhck/h+FJIKvRRVjfSREXicU8EH\npgAvIIqLBZwGfC7avl5Uf29KkLOsTZCMq8npj9sQx89no37HIlaAODplNPBIzrJ2z4dhNVlaT0HC\nXwFmIkrAC4if2PaIz8/3KCgn385Z1pX5MJxeRd8Gw1qMglAjWutlSqnTgUcqVP0SzVYQtP5mcMXE\nSvvtUUy9YsK5QEWHy7EnTB6lOtSsFohkqEDOsgYAdqJoagkT9Z8pKAj75yzr4/kwnF2h748ho/GY\nq9J1oqiKLj4JOctKK8Yz8mH4Yrl9VcnHkXVYTsyHoZ8WJWdZNyPThbF5/3M5yzowH4alpi9+T0E5\nWA18Nx+Gf0zVeRG4KmdZ90R9bwCMRKwyX69C5h2j91uA4/JhuCSxbTYwJWdZtwNPIFbifsAFSISZ\nwVA1ZoqhDrTWjyIjjXIc3ApZDIZu4ELgY4nvt5Wody8wJ/qsgBOr6HsihfvOfCRrY7v5dYZyAECk\nGP0ISEZmZYZ55yxrB8SyEXNxhnKQ7Pt64H8TRUfmLGuXKmWeC4xPKQfJvp9CLCJlZTYYymEUhPq5\ntcL2XVsihcHQJHKWtU3Osi5GnAZj5iKWgiKitRouTxQdl7OscnPu0HV64dp8GLY7R8pyxEGxJPkw\nfBcZ9ceUSvN8IoV76upK/UZcgawcG3NKqYopfleFU+gDic/b5SzLWIwNNWFOmPqp5CG9sVJqPa11\nVZ7dBkOL2D1nWcmcBkOR8MFtgM/QdTXJZcCR+TBcXqa/SYj5egAFZ8VMX4ScZe2FRPnEXF2z9M3n\n3nwYVsrtAmK6/0z0uVR4ZXLtivvzYfhGpU7zYbgkZ1k3ACdHRQdWIQsUO3ZmkUzB3Q/xjaolLbeh\nj2MUhDrRWr+mlFpJ+eV5hyIxz4YWs98Fj/Rf8uZbozo0/ZYt7D8rf9ORK9stUw/hU9GrEnMAp1R+\ngph8GL4bzdNPiIpOorSzYtJ68FS1IYPdTLWp3hcnPm+Q3pizrA7E+TCmFn+aZN0dcpY1LB+G5e4b\ny6rM8bA49X39GmQyGMwUQ4NUGnkMrbDd0A3sdeLk4z6cN+89pTtDTWd+gyErF+7pTv5eu+XqJbyK\nTDHsmg/DJ6tsc2ni8+dzljUqXSGaevhmoqjIObFNVBzlV8kQug4W5tbQNl13WGatAv+poW+DoW6M\nBaExPgC2LrO9nHWhpSilDqI4NPMhrfXUJvS9M/DfqeJXtdY3N9p3rex50uQ9lFKT6BrTvoFCXbTX\nyZNfmnrZxHtbLVMP4xng74nvK5DzeD7wfIWRayb5MHwiZ1kzgF0oOCuemar2ZQoK8zLgr7Xup5t4\ns0n9DEl9b0RBSPeV5q0a+jYY6sYoCI1RacnZ91siRXUMAH6eKnsYicdulDOAY1NlpzWh35pRqG9R\nIuGN7uw4AfG878s8nw/DX3RDv5dScGY8NmdZP86HYXJaJzm9cHMp7/s2UHdK9BTpKaxBNbRN163k\nt9Rux05DH8FMMTTGZhW2v9sSKarjbopNk/sqpUY30qlSahCSGS/JCuD6RvqtF6UpMm/HaHTJbYaG\nmQzED/0umRVzlrUZhXwJ0HOmF5pJOlXyxzJrZbNt6rtZP8HQIzAKQp0opTZAlsItxTKtdTnv75YS\nLR7lpYqrjV0vx2EUH4fbtdZtucnpMqOrDjPy6jYii8DkRFHSYnAEhem22cBjrZKrVeTDcCldTf/p\nh345ksrEGprnF2EwNIRREOrnMxW2z2uJFLVxJcXmy2OVUo34ShydUda+EaIq7T2u0SZTY/eSdFY8\nMGdZm0efk86J6/LCQUnFp5pIkZjkcvQz8mH4YZPkMRgawigI9VNp7v7BlkhRA1rr+RQneNqC2hba\nWYtSajiS9z3JXLomaGktq/VllLIUdKqSWe0MjZMPwxlIel8Q/6Zv5CxrGIX8AJ10XU+hFtIRQ+UW\nKWoXyYyTu+Qsa79KDXKWNRpJyx5zZ9OlMhjqxCgIdaCU6g98o0K1npBCNotLM8rcOvuagCRgSXKN\n1rozq3IrCCZNfFkrfRjotWsCaJinUBODK51/tkuuPkTy/DoYOIDCfeb+fBjW4t2/lqhdcmRdbUri\nVnILXS2HZ1WRvfAcCk61K4A/dYdgBkM9GAWhPr5F6XSrIBf6Qy2SpSaidSReShV/XilV7veUIj29\noOkB2fGmXT7x7sCbOGpFf7VZx4A1m0/znG2nehMyc+0bms7NFJxzxwH7J7Y1OvWUPG9/mLOsLRvs\nr6lEaaOT0TtfBB5ITLWsJWdZg3KWdRNwTKL4wnwYzu9mMQ2GqjFhjjWilBqBpJYtx51a66UV6rST\nS+maJz52VvxRdvVilFK7UbzexGNa67Kr+bWS6X+ekPYs79HkLGt34JOI+Xyz6D2d1vfMnGUdini6\nL0C851/Oh2HD+SyaQT4MV+YsaxJyLm1Gwf9gAXBHg93/JNHHtsArOcuajCztPBDYCkkytBXg5sOw\n5QmF8mF4W86yLgK+HxXtC8zKWVaALMm8CslHsicS7RFzL8VhyAZDWzEKQg0opbYE7qd8prPVdF2h\nrSdyLfALYMNE2XFKqR/XsHbEURll62L4Wiv5PuBUqPPF6JXkLuCQbpGoPi4HfohYKGMHWD9axrlu\n8mF4Z7RuwfioaDBwaonqRemQW0U+DH+Qs6xFwHnIFNwQsv+3mMnA8dHiVwZDj8FMMVSJUuow4DkK\na7GX4gqt9cstEKlutNaL6boULMho5tBq2iul+lH8IFuCmJcNfZx8GM6hOCFVU5THfBhOQHxfylkH\n3gY+asb+6iUfhhcCewC3l5BlFbJk/P75MDwqlVTKYOgRKK1rizhSSk2h67ximo1abV5XSi2n9EIk\nz2itx5XYVqnfQcjI7DiqW2XtfeCTUbRA3ex50nWfUrqjeJEcrfcLrpj4SCN9xyilxgDPp4of0Fof\nUEXbg4B/pIqv1FrXnVNh7AmTR3V0qIwwRH1E4E28pd5+De0hZ1m/Bb4bfX0+H4Z7dMM+hgGjkDwC\nS5FpjFk9bR4/Z1mDkGmF4VHR20g4Y3oxJYOhR9EXphg6lFLlVjFbH0mZvDGwCTAayCFe0ntTOZ1y\nzDLgkEaVg1ahtX5BKfUU8OlE8ReUUjtorSstCduzch8YehSR5/6ERFG3nBvRuhE9frXUfBguA6pd\n+Mpg6DH0BQXBBro7o+Ea4Bta66e6eT/N5lK6KggKOAE4u1QDpdTGFOdNmNkLf7uh+zgYcRQEMa+3\nJe22wWBoDOOD0DhLgYla67vaLUgd3ETxglLHRXkeSnEExQ5gbQ9tNPQokis5TsqHoVlbwGDohRgF\noTECYHet9Y3tFqQetNYrKDb/DqN46eYk6emF1UhUhMFAzrImUEhDvgr4VRvFMRgMDWAUhPpYAvwf\n8Bmte31+/8uQBEdJMjMrKqW2o5A2N+YfWusePw9s6F5yltWRs6zxwKRE8RXtyEVgMBiaQ1/wQWgm\neWTe/jqtdU9Zz74htNavKaXuAw5KFB+glBqptZ6Tqj6RQlrYGDO90AfJWdY5wNeQFQwHIAmetk5U\neZFCsiCDwdALMQpCed5AphEC4NF12BHvUroqCAoJ7TwvVS+d++BdJEmPoe+xKRLnn0UeODwfhm3N\nRWAwGBqjLygIbwN/LbNdI1MGH6ReL/eWkMUmcDeSeGa7RNlRSqnzdZQoQym1C7Bzqt11NWReNKxb\nzEMU6GHAesBiYCaSLOviaF0Cg8HQi+kLCsLrWuvT2y1ET0ZrvUYp5SG57mO2Bz4LPB59/2ZRQ5P7\noM+SD8OLgYvbLYfBYOg+jJOiIeZKxOs8STJiIb28daC1/lf3imQwGAyGdmEUBAMA0XTKraniI5VS\nA6O0zOnloI31wGAwGNZhjIJgSHJp6vtgJBNlehW65cANLZHIYDAYDG3BKAiGtWitHwVeShV/muLF\nuW7VWi9qjVQGg8FgaAd9wUnRUBuXAn9IfN8f+FyqTo/OfbDnSX8brDpXnqEUe2ropzQvdtDx66ev\nGN9XolIMPQDb9T8LrBd4zsPtlsXQe7Bd/0BgQeA5QbtlMQqCIc21wC+ADaPv6WWu5wAPtVKgWtjt\n6Os2XG/9jhdQjIzTQ2rFF9bQecy4E2/I9UQlwXb9LYDDK1R7K/Cc21shj6FxbNcfDjwGKNv1Rwae\n83q7ZWo2tusPBb6ELGW9BbAICX99Gngs8Jx0hlZDBWzXHwvcC6ywXX9o4DlL2ymPURAMXdBaL1ZK\n+ZRItwz8Jc6N0BMZMFB9GxiZsWnzTjrPAH7QWomqYgTF/h9pngC6RUGwXf+XwC2B50ztjv57M7br\nXwJMCjxneo1NP0SWgAfJq7LOYLv+esAFwOkUL9wWM912/d0Dz+lsnWQ9A9v1BwEXAT8PPKfWVOML\nkPVt3kNWQm0rxgfBkEWph5UG/tJCOWqnQ40ttUkrvWcrRamWwHOmAZsguSfGAi9Hmy5AUhgPAz7f\nHfu2XX8k8ENgx+7ovzdju/4uwP9D/peaCDxnCbANsF3gOYubLVu7sF1/AHAHcBaiHDwI/C+ywNsE\n4KfA68BdfVE5iNgbOBmxqtRE4Dn/BoYDnwg8Z02zBasVY0EwFKG1fkEp9RTioJjkIa11zzaVarYq\nvVFt2TpBaiN6oCwB5tiu/2FUPCvwnLTTaLM5oJv77800dGwCz1kXHXkvRNKydwI/Cjzn1+kKtuuf\ni2TX7Ks0et681yxBGsUoCIZSBBQrCL0h98EbdW7rddiuPwoYFJu/bdffFNgL2BZ4DZgWKR5ZbRWS\n2+KIqGiE7fpjUtXmlrtZRdaHscBAYDowM/CckimWbdffFfgw8JzXou/9kfUccojV5MXAcz4s0XYw\nsCsymu8PzAVmBJ7zVqn9pdoPRVKF7wSsAN4EgqzRve36HcAoZDEqgO0zjs3rged8kGo3gOJ05ADT\ns0bTkan+k9HXGaVGjNFxykVf81nH2Hb9Ich/MRJJeT291H9fL7brj6CwANfPspQDgOi3rijRx/rI\nb8kB7wPPBZ4zL6Ne/JvfCDzn/WhufhvgvsBzVkR1dgN2AR4JPGduom38P7wXeM7c6FzfCfgU4iMR\nlFLebNfPIefXzMBzikz8tusPQyx676bljmTeCfhyVLST7frp//TV9Dluu/6GwOhUvTWB58zIkjFq\nsykyNfmfwHMW2K7fLzoWeyDTFPnAc14t1T7qYwNgT+Rc/wi5ZyT/N20UBEMRSqn+wNdTxQspTqTU\n41BaP6yVOipzGzzSYnG6m6uBz0YPv7OQm3dytc35tuuflHZutF3/BuArwEaJ4p/QNdU2wGnAH9M7\njRSTG5CbS5LQdv2joymTLKYBzwHjbNc/DomW2TCxfbXt+sMCz3k/sa8RwM+Qh/X6qf5W2q4/CTit\nzMN1OPB7CopQktW2658YeM5fEvXvRKZzBiXqZaWUPha4JlW2NfB8Rt0hiANfmjWIuf5jiLPfvVm/\nAfmvbgNmB54zKrkheuD+Bjg11Wap7fpnBJ5TybelFk4E+iE+Fb+ptbHt+scg//nGqfJbgeMDz1mY\nKN4UOZYX2q7fSWHhuNdt198ZOBc4MypbbLv+5wPPeTb6PiJqe5ft+ichx3WXRN8rbdc/OfCcrGis\nR4ChiHKSlSn2f4BzkOvitMRvCKJ9DEzU9TPafwGZlkkyBvExSrKUrtdnmoOBycA5tus/iCyat3li\nu7Zd/0rk2ihS1mzXPwT4E3LulaLTKAiGLL6EaMlJbtBat91pphIjFw289t9DVh4N7Jva9EKnWnpJ\nG0RqBXcjCa08YCqy/PJE4L8A33b9HQPPeTNR/0bgvujzGchoywPSq5U+nd6R7fp7IDfRjYDrEE99\nDeyHrPb5lO364xI36zTb2q4/AUnt/SSyLHQHMvJZklQOIhYChyCLid2FWBoGIQrDfwGnAP8Gskzd\nVvSbBgPvIMdpJjLHuxdikXgg1ewa4Jbo84+BHRAFI/3gT9/QQZa+/iIy9zwccVQrSeA5nbbrX4s8\ncI6htIIQK7xdFJLIAvEEYjmYBlyP/E4LeXj92Xb94YHnnFtOjhrYJ3q/vtbpE9v1fwqcjYxUL0GO\n51bI//g1YIzt+mNTSgJIivfNEIXgBOThfx0ySv8Nct7vgzgfj0+1HQf8E5iPKM/vI+vLHA9cZbs+\nJZSEevgDBZ++3yIKzgVI1FeSrCnD6ci0zebAJxCfjmoZjxzXPPBL5By0gW8jCt3sqHwtkYL1N0RB\n/R2ymOG2yHE5CLFAHAu8ahQEQxbfyijrDdML3HTTkWvUBRfsb88bPb6TzjEK+oHKL184YHL+Jmdl\nu+XrJsYBhwaec0dcYLu+hzw0dkcu/AvjbUmLgu36DqIgPB54zuQq9nURMgI8LjnyBibZrj8z2s/l\ntuvvVcJJbWvkXDoi8JzbKu0s8JxFtut/IqXgAPzOdv0/IiPnb5KhICAjpMGIEjAhPV1iu35HWsbA\nc25ObD8ZURAeqibENBqpTYnark8FBSHiakRBOMx2/cHpB29kSv4KooSlLRYnIcrBHcBXk7/Fdv0b\ngReAM23Xvz7wnJlVyFIJK3qfXUsj2/U/jiiiq4B9ktEytuv/Fhlpfx2xEnw31XxHYLfAc6bbrv8k\ncny/Bnwz8Jy/2q6/DTLd9F8Zu94ceXAeEHhOvM7MNbbrT0UU4vNs15+c2FY3gedcm/hNP0EUhDvL\nKMrJtkuIFPboWNWiIOSAO4HDE7/Dj67FSxEn21+m2pyOWDpuCDxn7fG2Xf8e4F1EIVsceE5oohgM\nXVBKjURuSEke11qXMhv3OPR553VO9Sb407yJZwTexO8FnnNV/qYj11XlAOCfSeUA1s4D/y36mp7f\nrAvb9fdGLDMzU8pBzMXIg2wsMhLKQiFhgxWVg5gM5SDm+uh9VHqD7fr7IlaNFcAJWb4UPcHLPvCc\n2YgVZn3gyIwq30AsQg8lQ+aiefUfR1/PzlB08sD9Udusfmsi2t+Q6GutjspnIE6L16dDaSN/irMR\np8dTbddPOxK/nwgxTZr8747e30SsEkNL7PvXGQrAVYgvwggK/gK9mXMyfuON0fvWkY9Dkp2i97uT\nhYHnLKNgURsDxknRUMz5FJ8XP22DHIbqSc9pxsSOW8ObtJ89ovdXbNcvpQC8j4zcdiTbnAoy4q2b\n6Ia3CYV5/Y0zqsXOf4/WEYveaq5GQuOOQaZekhydqJNkW2BLZF2UzhL/R+xE2XAIa+A52nb9lUho\nY63hd7GD5d1ZGwPPmW27/iuIUrkLXc/n9xP13rZd/yNgVezoF8n1NjAyyyKETGGl97fGdv1/IlaL\n3h7e+06WM2PgOQtt11+GTMcNo6vVJ1aWsyK+4nvFQjAKgiGBUmoshfnOmGe11vdl1Tf0GOaUKI9v\nlqrE9lqJb6b/Hb3KsU2Zba/VslPb9bdDfA0ORLz0N62iWWxVqMkc3iZuRuawP2u7/g6JKI9RSCTR\nYoodhOP/YgNKK2Ix2zZJzjnINMN2NbaL/4uiaIUE/0EUhB3pqiCkMwl2IscjXZZFJ/B2iW1xRtWR\nZWTqDcwps63U9f8Q0TSN7fp/iK0PtuvviPjmrCHyR1qrICilNkTmHjZDLsDke/JzOtwnzY1KqXcR\nR4cFiBab9XlRT87I19dQSo1GNPz0tJOxHvR8mhrOVobB0XuAOBiWo1zmwaqdXW3X3x+4BzGVv4SM\npN9AnPEg21McxMIArTs2dRN4zoe26/8NOA6xGJwfbYqV9b8GnrM81Sz+Lz5A0qOXo2y4Ww3MoT4F\nIY4+KTfNF58TaXN4VthstVNDitLKcdxvOjKmEj0tv0M953fs87E3Eul0B2JliBflOzfwnFcA+iul\n5iEmwQFNEBaK569L0amUWggcqrXO8gg2FKHG2CdW4Uem9XvBlUflu7RUaiByU3lPa92ZKN8cSav8\nfUQBTHKr1rrqueIsxp18/eg1azrLjSYB6NfRsY3G6Is9nDjDYxh4zundvbMotvtm5N50duA5P09t\nT0faJIkfirU+zNrF1YiC4FBQECZE73/JqB//F+u14r+ImIVEOB1iu/6ZNfhwzEamp7YuU2e7RN1m\noZBnW5YVIfZ1qNWfotw51yuIph++hET0bAkcikwpTAEuCjxnSly3PzIP0a8NcnYgD6SBlSoaIhQX\nV2UtVup24LBU6S7IyG+NUuodZP52awojrTSvIjeshlij9XdQKh2jXYRRDtpGfOCruQfEpmzbdn0V\ndP9iPLsgjnEryI67Lzd/PCt6/5Tt+v3LJXAqQ/z7ut2ZO/Ccx23XfxUYZbt+7D8xCngl8Jwsa80s\nZBS8ke36O7cg4ybA5UgegJ0QE/XN5auvZRaiIMQRF12wXX8TCv9ls6eERpOtIMR+EXNS5YsRh8dS\nTo/V+CzUck21i6uR5++4wHNeKFXJRDH0PfoR5fqmtHKwDDhCa73O5JA3lCSeF04v6Z3FPRTMzBO7\nS6AE8Q12PbomgYn5Xpm29yMPhu2RUK96iKMn9q6zfa38JXo/NHoly7oQeM5K4Iro60+jKINuJVJC\nYu/439uuX805A4VkWyfbrp+V/MdFnOmeCmpfFKsSRYMc2/U/DeyG3OfSjpOx5WmfVHmcuXFcFfus\n5ZpqObbrb45EtswqpxyAcVI0FDMbOFxrXeT9a+heopvnEArzolvashT0wmbEapdgGpIU5XDb9R9F\nYqrXQyyL8wPPeTeuGHjOMtv1T0VuqldH6W//jigNmyHOcAcBgwPPcZog20xkRLcJ8DPb9S9CRqM7\nI7kDvoDE1hfdxwLPWWy7/plI7oCLbNffHXm4zUQeRtsjGRP/EXhOKSfcABkpj49i5+9G/putgHmB\n5yxIN4iSF21C14V6Rtiu/yYSW15uHv4a4P8oKAedlPcvOAv4KmItfCTKKfAS8v8NR1ILHwnsl5GA\nqF7ORdYaGA48HGWyfBqYgViDRwCfQR72PkDgOU9E2TvHI4m0TgeeRczb30DyH2iKcyA0ymrgWNv1\nFyDK1NvIQ3tStN3LCH+9HUl29UPb9echFo8BUbtLEKfJtJ9EmgA59ifbrj8bCR3cGDlvZqdTLcPa\n9NCbUMhs2GFLKvPFSAKxZl7/CxEL8pgoA+QMxD+kE3HenAHcHnjOGmNB6Dt8iGjHWSFKK4HHkcQr\nOxvloLXYrr+77fqrEIejNyiE6P0WccZbabv+lFLtG+Ry5AY/BHkYfRDtR9M79QAAA3FJREFUcwYS\nNdCFwHPuQR6a7wHfAR5GMhk+i9xcT6G6KIOKBJ6zFBn9r0GUmBlIWN9ziHf/5yjO/phsfy2yqt4i\nxOJxF3INTI9k/Q7ZoV4xv0PC5LZCci4sQm6g08kYHdquvxy5lt4DwsSmF5EENCts1//Idv3M9LbR\negJTkEx4NvBA1joFifqLIjkeR6wcfwdeQfIFTEEcjHNU79RXkShvw95Ixs5+yOj/KuSh+ATiAHcq\nxb4fxwOXRfJMQc6zlxGF6B3g4MBznmmWnBFzEUfP0xDFcCGiAG+JHKushESXIdanjRBF4l3EInAj\n8vuOqWK/5yNRGaOQFNkfIhkOX6CQgwAA2/W3jkI3V0T7ejjatAFyXb2PXP/LbVnroWGi6bbzo697\nIlaWk5Br93wkk+jztusP7o94Lna7eaoMZU0cVXIAped7eqGZfP2ZqmPFl+ptrVf3n19UpvVMYLRS\nagBywxuEjLwWAe9qrTMXV2mUzs7OP/Xrp+6qt33Hmn5Zue3XNeZTOVoky5nqKiQkrNT883Qk3WvJ\nsMLAc1bbrv9Z5AH6KWRkOB+5wRWlWo7a3Ga7/mOIomAho/GFyI30YeDREru7ELlOq07TG3jONbbr\nT0Nu9KOQm+i/gFsDz3nTdv2fI2FbpdpfHnlpH4LcnHdAlIz5yLErqXgFnvOR7fo28lDYE7lu3kKO\nTdZ9K52xrhTl7knnUVB6SqVeTsr4apQU6lDEbG4hCsFbROsRBE1ebjrwnNB2/XGIGf5gRBkYhPyv\n7yDpjR9MtVkOnGK7/vWIgrFrVPcF4O8ZKbaXIuduWkH6KfL/JbkEsWClfWK2CDzHt10/jzhXjkGO\nyzNIZEiRD00ga3ocaLv+kUh2xo8hSuVURKmIUyiXVGYCWVzKQlJD7xrJNg85b9LX8RLgF6X6SpFU\n9Cpe28gaJgORqEEAbNffDLlvHIQoAndR8NEYilwjExD/nwuUiTQ0GAwGw7qC7fqjEUvKqsBzmhWd\nt05gu/5pyNoifw48J9N5PForxQeeNFMMBoPBYDD0DWL/llvK1In9jt4zCoLBYDAYDH2DePo5MwrJ\ndv0hFPwTnjBRDAaDwWAw9A3+hPgOHRPl25iK+FhsiuR4OARx0Lwf+J1REAwGg8Fg6AMEnvNklL78\nHMRRca/E5hVINNIVwI2B56z6/3ExLRI31pXNAAAAAElFTkSuQmCC\n", + "text/plain": [ + "" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from IPython.display import Image\n", + "Image(\"http://ipython.org/_static/IPy_header.png\")" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.0" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/tests/test_validator.py b/tests/test_validator.py index 09d8d302..6408f5bb 100644 --- a/tests/test_validator.py +++ b/tests/test_validator.py @@ -6,6 +6,7 @@ import json import os import re +from copy import deepcopy import pytest from jsonschema import ValidationError @@ -14,9 +15,12 @@ from nbformat import read from nbformat.json_compat import VALIDATORS from nbformat.validator import isvalid, iter_validate, validate +from nbformat.warnings import DuplicateCellId, MissingIDFieldWarning from .base import TestsBase +nb4 = ("test4.ipynb", "test4.5.ipynb") + # Fixtures @pytest.fixture(autouse=True) @@ -32,6 +36,49 @@ def set_validator(validator_name): os.environ["NBFORMAT_VALIDATOR"] = validator_name +@pytest.mark.parametrize("validator_name", VALIDATORS) +def test_should_warn(validator_name): + """Test that a v4 notebook witout id emit a warning""" + set_validator(validator_name) + with TestsBase.fopen("test4.5.ipynb", "r") as f: + nb = read(f, as_version=4) + + del nb.cells[3]["id"] + assert nb.cells[3].get("id") is None + assert nb.cells[3]["cell_type"] == "code" + + nb_copy = deepcopy(nb) + + with pytest.warns(MissingIDFieldWarning): + validate(nb) + assert isvalid(nb) == True + + +@pytest.mark.xfail(reason="In the future we want to stop warning, and raise an error") +@pytest.mark.parametrize("validator_name", VALIDATORS) +def test_should_not_mutate(validator_name): + """Test that a v4 notebook without id raise an error and does/not mutate + + Probably should be 2 test. To enable in the future. + """ + set_validator(validator_name) + with TestsBase.fopen("test4.5.ipynb", "r") as f: + nb = read(f, as_version=4) + + del nb.cells[3]["id"] + assert nb.cells[3].get("id") is None + assert nb.cells[3]["cell_type"] == "code" + + nb_deep_copy = deepcopy(nb) + + with (pytest.raises(MissingIDFieldWarning), pytest.warns(None)): + validate(nb) + + assert nb == nb_deep_copy + + assert isvalid(nb) == True + + @pytest.mark.parametrize("validator_name", VALIDATORS) def test_nb2(validator_name): """Test that a v2 notebook converted to current passes validation""" @@ -53,10 +100,11 @@ def test_nb3(validator_name): @pytest.mark.parametrize("validator_name", VALIDATORS) -def test_nb4(validator_name): +@pytest.mark.parametrize("nbfile", nb4) +def test_nb4(validator_name, nbfile): """Test that a v4 notebook passes validation""" set_validator(validator_name) - with TestsBase.fopen("test4.ipynb", "r") as f: + with TestsBase.fopen(nbfile, "r") as f: nb = read(f, as_version=4) validate(nb) assert isvalid(nb) @@ -237,10 +285,12 @@ def test_repair_non_unique_cell_ids(): with TestsBase.fopen("invalid_unique_cell_id.ipynb", "r") as f: # Avoids validate call from `.read` nb = nbformat.from_dict(json.load(f)) - validate(nb) + with pytest.warns(DuplicateCellId): + validate(nb) assert isvalid(nb) +@pytest.mark.filterwarnings(MissingIDFieldWarning) def test_no_cell_ids(): """Test that a cell without a cell ID does not pass validation""" import nbformat @@ -255,6 +305,7 @@ def test_no_cell_ids(): validate(nb, repair_duplicate_cell_ids=False) +@pytest.mark.filterwarnings(MissingIDFieldWarning) def test_repair_no_cell_ids(): """Test that we will repair cells without ids if asked during validation""" import nbformat From e967f44f3f14fbb0bc317b3b35127b1231e8453e Mon Sep 17 00:00:00 2001 From: Matthias Bussonnier Date: Wed, 8 Jun 2022 10:26:51 +0200 Subject: [PATCH 02/23] please precommit --- nbformat/validator.py | 3 --- tests/test_validator.py | 4 ++-- 2 files changed, 2 insertions(+), 5 deletions(-) diff --git a/nbformat/validator.py b/nbformat/validator.py index aa22c2d4..6bc05aa4 100644 --- a/nbformat/validator.py +++ b/nbformat/validator.py @@ -5,12 +5,9 @@ import json import os import pprint -import sys import warnings from copy import deepcopy -from traitlets.log import get_logger - from ._imports import import_item from .corpus.words import generate_corpus_id from .json_compat import ValidationError, _validator_for_name, get_current_validator diff --git a/tests/test_validator.py b/tests/test_validator.py index 6408f5bb..c5e5bd73 100644 --- a/tests/test_validator.py +++ b/tests/test_validator.py @@ -290,7 +290,7 @@ def test_repair_non_unique_cell_ids(): assert isvalid(nb) -@pytest.mark.filterwarnings(MissingIDFieldWarning) +@pytest.mark.filterwarnings("ignore::nbformat.warnings.MissingIDFieldWarning") def test_no_cell_ids(): """Test that a cell without a cell ID does not pass validation""" import nbformat @@ -305,7 +305,7 @@ def test_no_cell_ids(): validate(nb, repair_duplicate_cell_ids=False) -@pytest.mark.filterwarnings(MissingIDFieldWarning) +@pytest.mark.filterwarnings("ignore::nbformat.warnings.MissingIDFieldWarning") def test_repair_no_cell_ids(): """Test that we will repair cells without ids if asked during validation""" import nbformat From 9a67bde0a40481f091e931132d69428a6ecfecee Mon Sep 17 00:00:00 2001 From: Matthias Bussonnier Date: Wed, 8 Jun 2022 10:27:42 +0200 Subject: [PATCH 03/23] more warnings --- nbformat/validator.py | 2 +- tests/test_validator.py | 10 +++++----- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/nbformat/validator.py b/nbformat/validator.py index 6bc05aa4..47cd8205 100644 --- a/nbformat/validator.py +++ b/nbformat/validator.py @@ -275,7 +275,7 @@ def normalize(nbdict, version, version_minor): """ nbdict = deepcopy(nbdict) - return _normalize(nbdict) + return _normalize(nbdict, version, version_minor, True) def _normalize(nbdict, version, version_minor, repair_duplicate_cell_ids): diff --git a/tests/test_validator.py b/tests/test_validator.py index c5e5bd73..13210983 100644 --- a/tests/test_validator.py +++ b/tests/test_validator.py @@ -51,7 +51,7 @@ def test_should_warn(validator_name): with pytest.warns(MissingIDFieldWarning): validate(nb) - assert isvalid(nb) == True + assert isvalid(nb) is True @pytest.mark.xfail(reason="In the future we want to stop warning, and raise an error") @@ -70,13 +70,13 @@ def test_should_not_mutate(validator_name): assert nb.cells[3]["cell_type"] == "code" nb_deep_copy = deepcopy(nb) - - with (pytest.raises(MissingIDFieldWarning), pytest.warns(None)): - validate(nb) + with pytest.warns(None): + with pytest.raises(MissingIDFieldWarning): + validate(nb) assert nb == nb_deep_copy - assert isvalid(nb) == True + assert isvalid(nb) is True @pytest.mark.parametrize("validator_name", VALIDATORS) From 8b55731d1f116a9c678cb71a22876f238adbf693 Mon Sep 17 00:00:00 2001 From: Matthias Bussonnier Date: Wed, 8 Jun 2022 10:35:08 +0200 Subject: [PATCH 04/23] no warn none --- tests/test_validator.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/tests/test_validator.py b/tests/test_validator.py index 13210983..af28c8fb 100644 --- a/tests/test_validator.py +++ b/tests/test_validator.py @@ -70,9 +70,8 @@ def test_should_not_mutate(validator_name): assert nb.cells[3]["cell_type"] == "code" nb_deep_copy = deepcopy(nb) - with pytest.warns(None): - with pytest.raises(MissingIDFieldWarning): - validate(nb) + with pytest.raises(MissingIDFieldWarning): + validate(nb) assert nb == nb_deep_copy From fee6c3c93b536b12dadc6dd99bf3f93126bf51aa Mon Sep 17 00:00:00 2001 From: Matthias Bussonnier Date: Wed, 8 Jun 2022 15:16:26 +0200 Subject: [PATCH 05/23] try to extract logic --- nbformat/validator.py | 155 ++++++++++++++++++++++++++++-------------- 1 file changed, 105 insertions(+), 50 deletions(-) diff --git a/nbformat/validator.py b/nbformat/validator.py index 47cd8205..6fac7590 100644 --- a/nbformat/validator.py +++ b/nbformat/validator.py @@ -247,11 +247,13 @@ def better_validation_error(error, version, version_minor): return NotebookValidationError(error, ref) -def normalize(nbdict, version, version_minor): +def normalize(nbdict, version=None, version_minor=None, *, relax_add_props: bool = False): """ - EXPERIMENTAL + Normalise a notebook prior to validation. - normalise a notebook prior to validation. + .. note:: + + Experimental This tries to implement a couple of normalisation steps to standardise notebooks and make validation easier. @@ -265,6 +267,9 @@ def normalize(nbdict, version, version_minor): notebook document version : int version_minor : int + relax_add_props : bool + Wether to allow extra property in the Json schema validating the + notebook. Returns ------- @@ -275,10 +280,23 @@ def normalize(nbdict, version, version_minor): """ nbdict = deepcopy(nbdict) - return _normalize(nbdict, version, version_minor, True) + nbdict_version, nbdict_version_minor = get_version(nbdict) + if version is None: + version = nbdict_version + if version_minor is None: + version_minor = nbdict_version_minor + return _normalize(nbdict, version, version_minor, True, relax_add_props=relax_add_props) + + +def _normalize(nbdict, version, version_minor, repair_duplicate_cell_ids, relax_add_props): + """ + Private normalisation routine. -def _normalize(nbdict, version, version_minor, repair_duplicate_cell_ids): + This attempt to mormalize teh + + + """ changes = 0 if version >= 4 and version_minor >= 5: @@ -320,6 +338,8 @@ def _normalize(nbdict, version, version_minor, repair_duplicate_cell_ids): else: raise ValidationError(f"Non-unique cell id '{cell_id}' detected.") seen_ids.add(cell_id) + + changes += _try_fix_error(nbdict, version, version_minor, relax_add_props=relax_add_props) return changes, nbdict @@ -366,7 +386,13 @@ def validate( version, version_minor = 1, 0 if ref is None: - _normalize(nbdict, version, version_minor, repair_duplicate_cell_ids) + _normalize( + nbdict, + version, + version_minor, + repair_duplicate_cell_ids, + relax_add_props=relax_add_props, + ) for error in iter_validate( nbdict, @@ -380,6 +406,63 @@ def validate( raise error +def _try_fix_error(nbdict, version, version_minor, relax_add_props): + """ + This function try to extract errors from the validator + and fix them if necessary. + + """ + validator = get_validator(version, version_minor, relax_add_props=relax_add_props) + if not validator: + raise ValidationError(f"No schema for validating v{version}.{version_minor} notebooks") + errors = [e for e in validator.iter_errors(nbdict)] + + if len(errors) > 0: + if validator.name == "fastjsonschema": + validator = get_validator( + version, + version_minor, + relax_add_props=relax_add_props, + name="jsonschema", + ) + errors = [e for e in validator.iter_errors(nbdict)] + + error_tree = validator.error_tree(errors) + changes = 0 + if "metadata" in error_tree: + for key in error_tree["metadata"]: + nbdict["metadata"].pop(key, None) + changes += 1 + + if "cells" in error_tree: + number_of_cells = len(nbdict.get("cells", 0)) + for cell_idx in range(number_of_cells): + # Cells don't report individual metadata keys as having failed validation + # Instead it reports that it failed to validate against each cell-type definition. + # We have to delve into why those definitions failed to uncover which metadata + # keys are misbehaving. + if "oneOf" in error_tree["cells"][cell_idx].errors: + intended_cell_type = nbdict["cells"][cell_idx]["cell_type"] + schemas_by_index = [ + ref["$ref"] + for ref in error_tree["cells"][cell_idx].errors["oneOf"].schema["oneOf"] + ] + cell_type_definition_name = f"#/definitions/{intended_cell_type}_cell" + if cell_type_definition_name in schemas_by_index: + schema_index = schemas_by_index.index(cell_type_definition_name) + for error in error_tree["cells"][cell_idx].errors["oneOf"].context: + rel_path = error.relative_path + error_for_intended_schema = error.schema_path[0] == schema_index + is_top_level_metadata_key = ( + len(rel_path) == 2 and rel_path[0] == "metadata" + ) + if error_for_intended_schema and is_top_level_metadata_key: + nbdict["cells"][cell_idx]["metadata"].pop(rel_path[1], None) + changes += 1 + + return 0 + + def iter_validate( nbdict=None, ref=None, @@ -393,6 +476,14 @@ def iter_validate( relevant notebook format schema. Returns a generator of all ValidationErrors if not valid. + + + Notes + ----- + To fix: For security reason this should not ever mutate it's arguments, and + should not ever try to validate a mutated or modified version of it's notebook. + + """ # backwards compatibility for nbjson argument if nbdict is not None: @@ -415,50 +506,14 @@ def iter_validate( if ref: errors = validator.iter_errors(nbdict, {"$ref": "#/definitions/%s" % ref}) else: - errors = [e for e in validator.iter_errors(nbdict)] - - if len(errors) > 0 and strip_invalid_metadata: - if validator.name == "fastjsonschema": - validator = get_validator( - version, version_minor, relax_add_props=relax_add_props, name="jsonschema" - ) - errors = [e for e in validator.iter_errors(nbdict)] - - error_tree = validator.error_tree(errors) - if "metadata" in error_tree: - for key in error_tree["metadata"]: - nbdict["metadata"].pop(key, None) - - if "cells" in error_tree: - number_of_cells = len(nbdict.get("cells", 0)) - for cell_idx in range(number_of_cells): - # Cells don't report individual metadata keys as having failed validation - # Instead it reports that it failed to validate against each cell-type definition. - # We have to delve into why those definitions failed to uncover which metadata - # keys are misbehaving. - if "oneOf" in error_tree["cells"][cell_idx].errors: - intended_cell_type = nbdict["cells"][cell_idx]["cell_type"] - schemas_by_index = [ - ref["$ref"] - for ref in error_tree["cells"][cell_idx].errors["oneOf"].schema["oneOf"] - ] - cell_type_definition_name = f"#/definitions/{intended_cell_type}_cell" - if cell_type_definition_name in schemas_by_index: - schema_index = schemas_by_index.index(cell_type_definition_name) - for error in error_tree["cells"][cell_idx].errors["oneOf"].context: - rel_path = error.relative_path - error_for_intended_schema = error.schema_path[0] == schema_index - is_top_level_metadata_key = ( - len(rel_path) == 2 and rel_path[0] == "metadata" - ) - if error_for_intended_schema and is_top_level_metadata_key: - nbdict["cells"][cell_idx]["metadata"].pop(rel_path[1], None) - - # Validate one more time to ensure that us removing metadata - # didn't cause another complex validation issue in the schema. - # Also to ensure that higher-level errors produced by individual metadata validation - # failures are removed. - errors = validator.iter_errors(nbdict) + if strip_invalid_metadata: + _try_fix_error(nbdict, version, version_minor, relax_add_props) + + # Validate one more time to ensure that us removing metadata + # didn't cause another complex validation issue in the schema. + # Also to ensure that higher-level errors produced by individual metadata validation + # failures are removed. + errors = validator.iter_errors(nbdict) for error in errors: yield better_validation_error(error, version, version_minor) From fbc85381a94c2715e9afbecffbbd66ef4518eb92 Mon Sep 17 00:00:00 2001 From: Matthias Bussonnier Date: Wed, 8 Jun 2022 15:25:39 +0200 Subject: [PATCH 06/23] typing --- nbformat/validator.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/nbformat/validator.py b/nbformat/validator.py index 6fac7590..02e44050 100644 --- a/nbformat/validator.py +++ b/nbformat/validator.py @@ -406,7 +406,7 @@ def validate( raise error -def _try_fix_error(nbdict, version, version_minor, relax_add_props): +def _try_fix_error(nbdict, version: int, version_minor: int, relax_add_props: bool) -> int: """ This function try to extract errors from the validator and fix them if necessary. @@ -417,6 +417,7 @@ def _try_fix_error(nbdict, version, version_minor, relax_add_props): raise ValidationError(f"No schema for validating v{version}.{version_minor} notebooks") errors = [e for e in validator.iter_errors(nbdict)] + changes = 0 if len(errors) > 0: if validator.name == "fastjsonschema": validator = get_validator( @@ -428,7 +429,6 @@ def _try_fix_error(nbdict, version, version_minor, relax_add_props): errors = [e for e in validator.iter_errors(nbdict)] error_tree = validator.error_tree(errors) - changes = 0 if "metadata" in error_tree: for key in error_tree["metadata"]: nbdict["metadata"].pop(key, None) @@ -460,7 +460,7 @@ def _try_fix_error(nbdict, version, version_minor, relax_add_props): nbdict["cells"][cell_idx]["metadata"].pop(rel_path[1], None) changes += 1 - return 0 + return changes def iter_validate( From c106df86064a9399444fff1a76b44c8f0f8f7e89 Mon Sep 17 00:00:00 2001 From: Matthias Bussonnier Date: Wed, 8 Jun 2022 15:28:14 +0200 Subject: [PATCH 07/23] pass test --- nbformat/validator.py | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/nbformat/validator.py b/nbformat/validator.py index 02e44050..48c9ccc6 100644 --- a/nbformat/validator.py +++ b/nbformat/validator.py @@ -288,7 +288,14 @@ def normalize(nbdict, version=None, version_minor=None, *, relax_add_props: bool return _normalize(nbdict, version, version_minor, True, relax_add_props=relax_add_props) -def _normalize(nbdict, version, version_minor, repair_duplicate_cell_ids, relax_add_props): +def _normalize( + nbdict, + version, + version_minor, + repair_duplicate_cell_ids, + relax_add_props, + strip_invalid_metadata=False, +): """ Private normalisation routine. @@ -338,8 +345,8 @@ def _normalize(nbdict, version, version_minor, repair_duplicate_cell_ids, relax_ else: raise ValidationError(f"Non-unique cell id '{cell_id}' detected.") seen_ids.add(cell_id) - - changes += _try_fix_error(nbdict, version, version_minor, relax_add_props=relax_add_props) + if strip_invalid_metadata: + changes += _try_fix_error(nbdict, version, version_minor, relax_add_props=relax_add_props) return changes, nbdict @@ -392,6 +399,7 @@ def validate( version_minor, repair_duplicate_cell_ids, relax_add_props=relax_add_props, + strip_invalid_metadata=strip_invalid_metadata, ) for error in iter_validate( From 3656974684a5162b5a38aaa80c654d63dafc7e79 Mon Sep 17 00:00:00 2001 From: Matthias Bussonnier Date: Thu, 9 Jun 2022 08:20:51 +0200 Subject: [PATCH 08/23] type annotations --- nbformat/validator.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/nbformat/validator.py b/nbformat/validator.py index 48c9ccc6..581dd325 100644 --- a/nbformat/validator.py +++ b/nbformat/validator.py @@ -7,6 +7,7 @@ import pprint import warnings from copy import deepcopy +from typing import Any, Optional from ._imports import import_item from .corpus.words import generate_corpus_id @@ -247,7 +248,13 @@ def better_validation_error(error, version, version_minor): return NotebookValidationError(error, ref) -def normalize(nbdict, version=None, version_minor=None, *, relax_add_props: bool = False): +def normalize( + nbdict: Any, + version: Optional[int] = None, + version_minor: Optional[int] = None, + *, + relax_add_props: bool = False, +) -> Any: """ Normalise a notebook prior to validation. @@ -414,7 +421,7 @@ def validate( raise error -def _try_fix_error(nbdict, version: int, version_minor: int, relax_add_props: bool) -> int: +def _try_fix_error(nbdict: Any, version: int, version_minor: int, relax_add_props: bool) -> int: """ This function try to extract errors from the validator and fix them if necessary. From f15413a2ef54439acc671e6ede36cbaf19f761a9 Mon Sep 17 00:00:00 2001 From: Matthias Bussonnier Date: Wed, 22 Jun 2022 10:26:00 +0200 Subject: [PATCH 09/23] add deprecation --- nbformat/validator.py | 81 +++++++++++++++++++++++++++++++------------ 1 file changed, 59 insertions(+), 22 deletions(-) diff --git a/nbformat/validator.py b/nbformat/validator.py index 581dd325..8d7b2b01 100644 --- a/nbformat/validator.py +++ b/nbformat/validator.py @@ -7,7 +7,7 @@ import pprint import warnings from copy import deepcopy -from typing import Any, Optional +from typing import Any, Optional, Tuple from ._imports import import_item from .corpus.words import generate_corpus_id @@ -16,6 +16,7 @@ from .warnings import DuplicateCellId, MissingIDFieldWarning validators = {} +_deprecated = object() def _relax_additional_properties(obj): @@ -254,7 +255,7 @@ def normalize( version_minor: Optional[int] = None, *, relax_add_props: bool = False, -) -> Any: +) -> Tuple[Any, int]: """ Normalise a notebook prior to validation. @@ -280,10 +281,10 @@ def normalize( Returns ------- - changes : int - number of changes in the notebooks notebook : dict deep-copy of the original object with relevant changes. + changes : int + number of changes in the notebooks """ nbdict = deepcopy(nbdict) @@ -296,19 +297,21 @@ def normalize( def _normalize( - nbdict, - version, - version_minor, - repair_duplicate_cell_ids, - relax_add_props, - strip_invalid_metadata=False, -): + nbdict: Any, + version: int, + version_minor: int, + repair_duplicate_cell_ids: bool, + relax_add_props: bool, + strip_invalid_metadata: bool = False, +) -> Tuple[Any, int]: """ Private normalisation routine. - This attempt to mormalize teh - + This attempt to normalize le nbdict passed to it. As this is currently used + both in `validate()` for historical reasons, and in the `normalize` public + function it does currently mutates it's argument. Ideally once removed from + the `validate()` function, it should stop mutating it's arguments. """ changes = 0 @@ -357,16 +360,29 @@ def _normalize( return changes, nbdict +def _dep_warn(field): + warnings.warn( + f"""`{field}` kwargs of validate has been deprecated for security reason, and will be removed soon. + + Please explicitly use the `new_notebook,n_changes = nbformat.validator.normalize(old_notebook, ...)` if you wish to + normalise your notebook. `normalize` is available since nbformat 5.5.0 + + """, + DeprecationWarning, + stacklevel=3, + ) + + def validate( - nbdict=None, - ref=None, - version=None, - version_minor=None, - relax_add_props=False, - nbjson=None, - repair_duplicate_cell_ids=True, - strip_invalid_metadata=False, -): + nbdict: Any = None, + ref: Optional[str] = None, + version: Optional[int] = None, + version_minor: Optional[int] = None, + relax_add_props: bool = _deprecated, # type: ignore + nbjson: Any = None, + repair_duplicate_cell_ids: bool = _deprecated, # type: ignore + strip_invalid_metadata: bool = _deprecated, # type: ignore +) -> None: """Checks whether the given notebook dict-like object conforms to the relevant notebook format schema. @@ -379,6 +395,24 @@ def validate( Raises ValidationError if not valid. """ assert isinstance(ref, str) or ref is None + + if relax_add_props is _deprecated: + relax_add_props = False + else: + _dep_warn("relax_add_props") + + if strip_invalid_metadata is _deprecated: + strip_invalid_metadata = False + else: + _dep_warn("strip_invalid_metadata") + pass + + if repair_duplicate_cell_ids is _deprecated: + repair_duplicate_cell_ids = True + else: + _dep_warn("repair_duplicate_cell_ids") + pass + # backwards compatibility for nbjson argument if nbdict is not None: pass @@ -399,6 +433,9 @@ def validate( if version is None: version, version_minor = 1, 0 + assert isinstance(version, int) + assert isinstance(version_minor, int) + if ref is None: _normalize( nbdict, From cdf580067359c7c50e7efd9b17fa26bef0aa2589 Mon Sep 17 00:00:00 2001 From: Matthias Bussonnier Date: Wed, 22 Jun 2022 10:36:01 +0200 Subject: [PATCH 10/23] Fix the tests --- nbformat/validator.py | 20 ++++++++++---------- tests/test_validator.py | 15 ++++++++++----- 2 files changed, 20 insertions(+), 15 deletions(-) diff --git a/nbformat/validator.py b/nbformat/validator.py index 8d7b2b01..84a3c5c9 100644 --- a/nbformat/validator.py +++ b/nbformat/validator.py @@ -7,6 +7,7 @@ import pprint import warnings from copy import deepcopy +from textwrap import dedent from typing import Any, Optional, Tuple from ._imports import import_item @@ -259,15 +260,12 @@ def normalize( """ Normalise a notebook prior to validation. - .. note:: - - Experimental - This tries to implement a couple of normalisation steps to standardise notebooks and make validation easier. You should in general not rely on this function and make sure the notebooks - that reach nbformat are already in a normal form. + that reach nbformat are already in a normal form. If not you likely have a bug, + and may have security issues. Parameters ---------- @@ -362,12 +360,15 @@ def _normalize( def _dep_warn(field): warnings.warn( - f"""`{field}` kwargs of validate has been deprecated for security reason, and will be removed soon. + dedent( + f"""`{field}` kwargs of validate has been deprecated for security + reasons, and will be removed soon. Please explicitly use the `new_notebook,n_changes = nbformat.validator.normalize(old_notebook, ...)` if you wish to normalise your notebook. `normalize` is available since nbformat 5.5.0 - """, + """ + ), DeprecationWarning, stacklevel=3, ) @@ -433,10 +434,9 @@ def validate( if version is None: version, version_minor = 1, 0 - assert isinstance(version, int) - assert isinstance(version_minor, int) - if ref is None: + assert isinstance(version, int) + assert isinstance(version_minor, int) _normalize( nbdict, version, diff --git a/tests/test_validator.py b/tests/test_validator.py index af28c8fb..7709791c 100644 --- a/tests/test_validator.py +++ b/tests/test_validator.py @@ -271,10 +271,12 @@ def test_non_unique_cell_ids(): # Avoids validate call from `.read` nb = nbformat.from_dict(json.load(f)) with pytest.raises(ValidationError): - validate(nb, repair_duplicate_cell_ids=False) + with pytest.warns(DeprecationWarning): + validate(nb, repair_duplicate_cell_ids=False) # try again to verify that we didn't modify the content with pytest.raises(ValidationError): - validate(nb, repair_duplicate_cell_ids=False) + with pytest.warns(DeprecationWarning): + validate(nb, repair_duplicate_cell_ids=False) def test_repair_non_unique_cell_ids(): @@ -298,10 +300,12 @@ def test_no_cell_ids(): # Avoids validate call from `.read` nb = nbformat.from_dict(json.load(f)) with pytest.raises(ValidationError): - validate(nb, repair_duplicate_cell_ids=False) + with pytest.warns(DeprecationWarning): + validate(nb, repair_duplicate_cell_ids=False) # try again to verify that we didn't modify the content with pytest.raises(ValidationError): - validate(nb, repair_duplicate_cell_ids=False) + with pytest.warns(DeprecationWarning): + validate(nb, repair_duplicate_cell_ids=False) @pytest.mark.filterwarnings("ignore::nbformat.warnings.MissingIDFieldWarning") @@ -340,5 +344,6 @@ def test_strip_invalid_metadata(): with TestsBase.fopen("v4_5_invalid_metadata.ipynb", "r") as f: nb = nbformat.from_dict(json.load(f)) assert not isvalid(nb) - validate(nb, strip_invalid_metadata=True) + with pytest.warns(DeprecationWarning): + validate(nb, strip_invalid_metadata=True) assert isvalid(nb) From ab5f945a8344af49f8fcfde33ad905babc9a003f Mon Sep 17 00:00:00 2001 From: Matthias Bussonnier Date: Wed, 22 Jun 2022 10:37:05 +0200 Subject: [PATCH 11/23] remove useless local imports --- tests/test_validator.py | 7 ------- 1 file changed, 7 deletions(-) diff --git a/tests/test_validator.py b/tests/test_validator.py index 7709791c..7b4e242b 100644 --- a/tests/test_validator.py +++ b/tests/test_validator.py @@ -254,8 +254,6 @@ def test_fallback_validator_with_iter_errors_using_ref(recwarn): Test that when creating a standalone object (code_cell etc) the default validator is used as fallback. """ - import nbformat - set_validator("fastjsonschema") nbformat.v4.new_code_cell() nbformat.v4.new_markdown_cell() @@ -265,8 +263,6 @@ def test_fallback_validator_with_iter_errors_using_ref(recwarn): def test_non_unique_cell_ids(): """Test than a non-unique cell id does not pass validation""" - import nbformat - with TestsBase.fopen("invalid_unique_cell_id.ipynb", "r") as f: # Avoids validate call from `.read` nb = nbformat.from_dict(json.load(f)) @@ -281,7 +277,6 @@ def test_non_unique_cell_ids(): def test_repair_non_unique_cell_ids(): """Test that we will repair non-unique cell ids if asked during validation""" - import nbformat with TestsBase.fopen("invalid_unique_cell_id.ipynb", "r") as f: # Avoids validate call from `.read` @@ -294,7 +289,6 @@ def test_repair_non_unique_cell_ids(): @pytest.mark.filterwarnings("ignore::nbformat.warnings.MissingIDFieldWarning") def test_no_cell_ids(): """Test that a cell without a cell ID does not pass validation""" - import nbformat with TestsBase.fopen("v4_5_no_cell_id.ipynb", "r") as f: # Avoids validate call from `.read` @@ -311,7 +305,6 @@ def test_no_cell_ids(): @pytest.mark.filterwarnings("ignore::nbformat.warnings.MissingIDFieldWarning") def test_repair_no_cell_ids(): """Test that we will repair cells without ids if asked during validation""" - import nbformat with TestsBase.fopen("v4_5_no_cell_id.ipynb", "r") as f: # Avoids validate call from `.read` From 7842c8bc0c627ff189d0bc4251bee573cd765ac4 Mon Sep 17 00:00:00 2001 From: Matthias Bussonnier Date: Wed, 22 Jun 2022 10:44:33 +0200 Subject: [PATCH 12/23] update changelog --- docs/changelog.rst | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/docs/changelog.rst b/docs/changelog.rst index fc7a2f28..f2124099 100644 --- a/docs/changelog.rst +++ b/docs/changelog.rst @@ -7,6 +7,18 @@ Changes in nbformat In Development ============== +The biggest change in nbformat 5.5.0 is the deprecation of arguments to +``validate()`` that try to fix notebooks errors during validation. + +``validate()`` is a function that is core to the security model of Jupyter, +and is assumed in a number of places to not mutate it's argument, or try to fix +notebooks passed to it. + +Auto fixing of notebook in validate can also hide subtle bugs, and will +therefore be updated in a near future to not take any of the argument related to +auto-fixing, and fail instead of silently modifying its parameters on invalid +notebooks. + 5.4.0 ===== * Add project URLs to ``setup.py`` From 16ff6ed47020e7812d03a1cb30748835269ab900 Mon Sep 17 00:00:00 2001 From: Matthias Bussonnier Date: Wed, 22 Jun 2022 14:08:47 +0200 Subject: [PATCH 13/23] update deprecations --- nbformat/current.py | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/nbformat/current.py b/nbformat/current.py index fb2db279..9f6688d4 100644 --- a/nbformat/current.py +++ b/nbformat/current.py @@ -84,7 +84,7 @@ class NBFormatError(ValueError): def _warn_format(): warnings.warn( - """Non-JSON file support in nbformat is deprecated. + """Non-JSON file support in nbformat is deprecated since nbformat 1.0. Use nbconvert to create files of other formats.""" ) @@ -107,13 +107,21 @@ def parse_py(s, **kwargs): def reads_json(nbjson, **kwargs): """DEPRECATED, use reads""" - warnings.warn("reads_json is deprecated, use reads") + warnings.warn( + "reads_json is deprecated since nbformat 3.0, use reads", + DeprecationWarning, + stacklevel=2, + ) return reads(nbjson) def writes_json(nb, **kwargs): """DEPRECATED, use writes""" - warnings.warn("writes_json is deprecated, use writes") + warnings.warn( + "writes_json is deprecated since nbformat 3.0, use writes", + DeprecationWarning, + stacklevel=2, + ) return writes(nb, **kwargs) From 4697e7fe564e946e6b44620ee7dfb80f4178700f Mon Sep 17 00:00:00 2001 From: Matthias Bussonnier Date: Wed, 22 Jun 2022 14:23:20 +0200 Subject: [PATCH 14/23] write strict by default --- nbformat/current.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/nbformat/current.py b/nbformat/current.py index 9f6688d4..09701a31 100644 --- a/nbformat/current.py +++ b/nbformat/current.py @@ -166,7 +166,9 @@ def reads(s, format="DEPRECATED", version=current_nbformat, **kwargs): nb = reader_reads(s, **kwargs) nb = convert(nb, version) try: - validate(nb) + with warnings.catch_warnings(): + warnings.filterwarnings("ignore", category=DeprecationWarning) + validate(nb, repair_duplicate_cell_ids=False) except ValidationError as e: get_logger().error("Notebook JSON is invalid: %s", e) return nb @@ -194,7 +196,9 @@ def writes(nb, format="DEPRECATED", version=current_nbformat, **kwargs): _warn_format() nb = convert(nb, version) try: - validate(nb) + with warnings.catch_warnings(): + warnings.filterwarnings("ignore", category=DeprecationWarning) + validate(nb, repair_duplicate_cell_ids=False) except ValidationError as e: get_logger().error("Notebook JSON is invalid: %s", e) return versions[version].writes_json(nb, **kwargs) From d404679a12fa8cef5fa3f880144074745a3c5d39 Mon Sep 17 00:00:00 2001 From: Matthias Bussonnier Date: Wed, 22 Jun 2022 14:42:56 +0200 Subject: [PATCH 15/23] rely on isvalid --- nbformat/current.py | 15 ++++----------- nbformat/validator.py | 7 ++++++- 2 files changed, 10 insertions(+), 12 deletions(-) diff --git a/nbformat/current.py b/nbformat/current.py index 09701a31..b1766760 100644 --- a/nbformat/current.py +++ b/nbformat/current.py @@ -42,7 +42,7 @@ from . import versions from .converter import convert from .reader import reads as reader_reads -from .validator import ValidationError, validate +from .validator import ValidationError, isvalid, validate __all__ = [ "NotebookNode", @@ -61,6 +61,7 @@ "to_notebook_json", "convert", "validate", + "isvalid", "NBFormatError", "parse_py", "reads_json", @@ -165,11 +166,7 @@ def reads(s, format="DEPRECATED", version=current_nbformat, **kwargs): _warn_format() nb = reader_reads(s, **kwargs) nb = convert(nb, version) - try: - with warnings.catch_warnings(): - warnings.filterwarnings("ignore", category=DeprecationWarning) - validate(nb, repair_duplicate_cell_ids=False) - except ValidationError as e: + if not isvalid(nb): get_logger().error("Notebook JSON is invalid: %s", e) return nb @@ -195,11 +192,7 @@ def writes(nb, format="DEPRECATED", version=current_nbformat, **kwargs): if format not in {"DEPRECATED", "json"}: _warn_format() nb = convert(nb, version) - try: - with warnings.catch_warnings(): - warnings.filterwarnings("ignore", category=DeprecationWarning) - validate(nb, repair_duplicate_cell_ids=False) - except ValidationError as e: + if not isvalid(nb): get_logger().error("Notebook JSON is invalid: %s", e) return versions[version].writes_json(nb, **kwargs) diff --git a/nbformat/validator.py b/nbformat/validator.py index 84a3c5c9..bda762de 100644 --- a/nbformat/validator.py +++ b/nbformat/validator.py @@ -114,12 +114,17 @@ def isvalid(nbjson, ref=None, version=None, version_minor=None): To see the individual errors that were encountered, please use the `validate` function instead. """ + orig = deepcopy(nbjson) try: - validate(nbjson, ref, version, version_minor) + with warnings.catch_warnings(): + warnings.filterwarnings("ignore", category=DeprecationWarning) + validate(nbjson, ref, version, version_minor, repair_duplicate_cell_ids=False) except ValidationError: return False else: return True + finally: + assert nbjson == orig def _format_as_index(indices): From 630530c136167929740847fc795a79d0f92e8e47 Mon Sep 17 00:00:00 2001 From: Matthias Bussonnier Date: Wed, 22 Jun 2022 05:56:14 -0700 Subject: [PATCH 16/23] Apply suggestions from code review Co-authored-by: Joachim Jablon --- nbformat/validator.py | 29 ++++++++++++++++------------- 1 file changed, 16 insertions(+), 13 deletions(-) diff --git a/nbformat/validator.py b/nbformat/validator.py index bda762de..6a140d79 100644 --- a/nbformat/validator.py +++ b/nbformat/validator.py @@ -311,15 +311,18 @@ def _normalize( Private normalisation routine. - This attempt to normalize le nbdict passed to it. As this is currently used - both in `validate()` for historical reasons, and in the `normalize` public - function it does currently mutates it's argument. Ideally once removed from - the `validate()` function, it should stop mutating it's arguments. + This function attempts to normalize the `nbdict` passed to it. + + As `_normalize()` is currently used both in `validate()` (for + historical reasons), and in the `normalize()` public function, + `_normalize()` does currently mutate `nbdict`. + Ideally, once `validate()` stops calling `_normalize()`, `_normalize()` + may stop mutating `nbdict`. """ changes = 0 - if version >= 4 and version_minor >= 5: + if (version, version_minor) >= (4, 5): # if we support cell ids ensure default ids are provided for cell in nbdict["cells"]: if "id" not in cell: @@ -327,8 +330,8 @@ def _normalize( "Code cell is missing an id field, this will become" " a hard error in future nbformat versions. You may want" " to use `normalize()` on your notebooks before validations" - " (available since nbformat 5.1.4). Previous of nbformat" - " are also mutating their arguments, and will stop to do so" + " (available since nbformat 5.1.4). Previous versions of nbformat" + " are fixing this issue transparently, and will stop doing so" " in the future.", MissingIDFieldWarning, stacklevel=3, @@ -465,7 +468,7 @@ def validate( def _try_fix_error(nbdict: Any, version: int, version_minor: int, relax_add_props: bool) -> int: """ - This function try to extract errors from the validator + This function tries to extract errors from the validator and fix them if necessary. """ @@ -478,12 +481,12 @@ def _try_fix_error(nbdict: Any, version: int, version_minor: int, relax_add_prop if len(errors) > 0: if validator.name == "fastjsonschema": validator = get_validator( - version, - version_minor, + version=version, + version_minor=version_minor, relax_add_props=relax_add_props, name="jsonschema", ) - errors = [e for e in validator.iter_errors(nbdict)] + errors = list(validator.iter_errors(nbdict)) error_tree = validator.error_tree(errors) if "metadata" in error_tree: @@ -537,8 +540,8 @@ def iter_validate( Notes ----- - To fix: For security reason this should not ever mutate it's arguments, and - should not ever try to validate a mutated or modified version of it's notebook. + To fix: For security reasons, this function should *never* mutate its `nbdict` argument, and + should *never* try to validate a mutated or modified version of its notebook. """ From 7c86058dca22b413fbf9f76a18a1b6c73d070960 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 22 Jun 2022 12:57:28 +0000 Subject: [PATCH 17/23] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- nbformat/validator.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nbformat/validator.py b/nbformat/validator.py index 6a140d79..81b961e4 100644 --- a/nbformat/validator.py +++ b/nbformat/validator.py @@ -312,7 +312,7 @@ def _normalize( This function attempts to normalize the `nbdict` passed to it. - + As `_normalize()` is currently used both in `validate()` (for historical reasons), and in the `normalize()` public function, `_normalize()` does currently mutate `nbdict`. From 733941cf8a778ce11527a3e3f5d855a50e3ef601 Mon Sep 17 00:00:00 2001 From: Matthias Bussonnier Date: Wed, 22 Jun 2022 15:02:35 +0200 Subject: [PATCH 18/23] isvalid does not return errors --- nbformat/current.py | 15 +++++++++++---- nbformat/validator.py | 11 +++++++++-- 2 files changed, 20 insertions(+), 6 deletions(-) diff --git a/nbformat/current.py b/nbformat/current.py index b1766760..09701a31 100644 --- a/nbformat/current.py +++ b/nbformat/current.py @@ -42,7 +42,7 @@ from . import versions from .converter import convert from .reader import reads as reader_reads -from .validator import ValidationError, isvalid, validate +from .validator import ValidationError, validate __all__ = [ "NotebookNode", @@ -61,7 +61,6 @@ "to_notebook_json", "convert", "validate", - "isvalid", "NBFormatError", "parse_py", "reads_json", @@ -166,7 +165,11 @@ def reads(s, format="DEPRECATED", version=current_nbformat, **kwargs): _warn_format() nb = reader_reads(s, **kwargs) nb = convert(nb, version) - if not isvalid(nb): + try: + with warnings.catch_warnings(): + warnings.filterwarnings("ignore", category=DeprecationWarning) + validate(nb, repair_duplicate_cell_ids=False) + except ValidationError as e: get_logger().error("Notebook JSON is invalid: %s", e) return nb @@ -192,7 +195,11 @@ def writes(nb, format="DEPRECATED", version=current_nbformat, **kwargs): if format not in {"DEPRECATED", "json"}: _warn_format() nb = convert(nb, version) - if not isvalid(nb): + try: + with warnings.catch_warnings(): + warnings.filterwarnings("ignore", category=DeprecationWarning) + validate(nb, repair_duplicate_cell_ids=False) + except ValidationError as e: get_logger().error("Notebook JSON is invalid: %s", e) return versions[version].writes_json(nb, **kwargs) diff --git a/nbformat/validator.py b/nbformat/validator.py index bda762de..ad652f3b 100644 --- a/nbformat/validator.py +++ b/nbformat/validator.py @@ -296,7 +296,14 @@ def normalize( version = nbdict_version if version_minor is None: version_minor = nbdict_version_minor - return _normalize(nbdict, version, version_minor, True, relax_add_props=relax_add_props) + return _normalize( + nbdict, + version, + version_minor, + True, + relax_add_props=relax_add_props, + strip_invalid_metadata=False, + ) def _normalize( @@ -305,7 +312,7 @@ def _normalize( version_minor: int, repair_duplicate_cell_ids: bool, relax_add_props: bool, - strip_invalid_metadata: bool = False, + strip_invalid_metadata: bool, ) -> Tuple[Any, int]: """ Private normalisation routine. From 4445c69affef1b48d468658d217db69f56ce3840 Mon Sep 17 00:00:00 2001 From: Matthias Bussonnier Date: Wed, 3 Aug 2022 10:19:34 +0200 Subject: [PATCH 19/23] take reviews into account --- nbformat/validator.py | 32 +++++++++++++++++++++++++++----- tests/test_validator.py | 5 ++--- 2 files changed, 29 insertions(+), 8 deletions(-) diff --git a/nbformat/validator.py b/nbformat/validator.py index cd9844ce..fc4144cb 100644 --- a/nbformat/validator.py +++ b/nbformat/validator.py @@ -369,7 +369,9 @@ def _normalize( raise ValidationError(f"Non-unique cell id '{cell_id}' detected.") seen_ids.add(cell_id) if strip_invalid_metadata: - changes += _try_fix_error(nbdict, version, version_minor, relax_add_props=relax_add_props) + changes += _strip_invalida_metadata( + nbdict, version, version_minor, relax_add_props=relax_add_props + ) return changes, nbdict @@ -473,10 +475,30 @@ def validate( raise error -def _try_fix_error(nbdict: Any, version: int, version_minor: int, relax_add_props: bool) -> int: +def _strip_invalida_metadata( + nbdict: Any, version: int, version_minor: int, relax_add_props: bool +) -> int: """ - This function tries to extract errors from the validator - and fix them if necessary. + + This function tries to extract metadata errors from the validator and fix + them if necessary. This mostly mean stripping unknown keys from metadata + fields, or removing metadata fields altogether. + + Parameters + ---------- + nbdict : dict + notebook document + version : int + version_minor : int + relax_add_props : bool + Wether to allow extra property in the Json schema validating the + notebook. + + Return + ------ + int : + number of modifications + """ validator = get_validator(version, version_minor, relax_add_props=relax_add_props) @@ -574,7 +596,7 @@ def iter_validate( errors = validator.iter_errors(nbdict, {"$ref": "#/definitions/%s" % ref}) else: if strip_invalid_metadata: - _try_fix_error(nbdict, version, version_minor, relax_add_props) + _strip_invalida_metadata(nbdict, version, version_minor, relax_add_props) # Validate one more time to ensure that us removing metadata # didn't cause another complex validation issue in the schema. diff --git a/tests/test_validator.py b/tests/test_validator.py index 7b4e242b..f8f8e7b4 100644 --- a/tests/test_validator.py +++ b/tests/test_validator.py @@ -44,7 +44,6 @@ def test_should_warn(validator_name): nb = read(f, as_version=4) del nb.cells[3]["id"] - assert nb.cells[3].get("id") is None assert nb.cells[3]["cell_type"] == "code" nb_copy = deepcopy(nb) @@ -59,7 +58,7 @@ def test_should_warn(validator_name): def test_should_not_mutate(validator_name): """Test that a v4 notebook without id raise an error and does/not mutate - Probably should be 2 test. To enable in the future. + Probably should be 2 distinct tests. To enable in the future. """ set_validator(validator_name) with TestsBase.fopen("test4.5.ipynb", "r") as f: @@ -75,7 +74,7 @@ def test_should_not_mutate(validator_name): assert nb == nb_deep_copy - assert isvalid(nb) is True + assert isvalid(nb) is False @pytest.mark.parametrize("validator_name", VALIDATORS) From 449b6502174e998a20c360011342cfad7e965dc2 Mon Sep 17 00:00:00 2001 From: Matthias Bussonnier Date: Wed, 3 Aug 2022 10:27:32 +0200 Subject: [PATCH 20/23] docs --- nbformat/validator.py | 23 ++++++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) diff --git a/nbformat/validator.py b/nbformat/validator.py index fc4144cb..944d0fca 100644 --- a/nbformat/validator.py +++ b/nbformat/validator.py @@ -407,10 +407,31 @@ def validate( Parameters ---------- + nbdict : dict + notebook document ref : optional, str reference to the subset of the schema we want to validate against. for example ``"markdown_cell"``, `"code_cell"` .... - Raises ValidationError if not valid. + version : int + version_minor : int + relax_add_props : bool + Deprecated since 5.5.0 – will be removed in the future. + Wether to allow extra property in the Json schema validating the + notebook. + nbjson : + repair_duplicate_cell_ids : boolny + Deprecated since 5.5.0 – will be removed in the future. + strip_invalid_metadata : bool + Deprecated since 5.5.0 – will be removed in the future. + + Returns + ------- + None + + + Raises + ------ + ValidationError if not valid. """ assert isinstance(ref, str) or ref is None From 1c625fddef3d2e8f63c64d8b84f71096fbe7a8e6 Mon Sep 17 00:00:00 2001 From: Matthias Bussonnier Date: Wed, 3 Aug 2022 10:28:50 +0200 Subject: [PATCH 21/23] cleanup documentation with veln --- nbformat/validator.py | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-) diff --git a/nbformat/validator.py b/nbformat/validator.py index 944d0fca..bbc85fb6 100644 --- a/nbformat/validator.py +++ b/nbformat/validator.py @@ -317,7 +317,6 @@ def _normalize( """ Private normalisation routine. - This function attempts to normalize the `nbdict` passed to it. As `_normalize()` is currently used both in `validate()` (for @@ -418,7 +417,7 @@ def validate( Deprecated since 5.5.0 – will be removed in the future. Wether to allow extra property in the Json schema validating the notebook. - nbjson : + nbjson repair_duplicate_cell_ids : boolny Deprecated since 5.5.0 – will be removed in the future. strip_invalid_metadata : bool @@ -428,7 +427,6 @@ def validate( ------- None - Raises ------ ValidationError if not valid. @@ -500,7 +498,6 @@ def _strip_invalida_metadata( nbdict: Any, version: int, version_minor: int, relax_add_props: bool ) -> int: """ - This function tries to extract metadata errors from the validator and fix them if necessary. This mostly mean stripping unknown keys from metadata fields, or removing metadata fields altogether. @@ -515,12 +512,11 @@ def _strip_invalida_metadata( Wether to allow extra property in the Json schema validating the notebook. - Return - ------ - int : + Returns + ------- + int number of modifications - """ validator = get_validator(version, version_minor, relax_add_props=relax_add_props) if not validator: @@ -587,13 +583,11 @@ def iter_validate( Returns a generator of all ValidationErrors if not valid. - Notes ----- To fix: For security reasons, this function should *never* mutate its `nbdict` argument, and should *never* try to validate a mutated or modified version of its notebook. - """ # backwards compatibility for nbjson argument if nbdict is not None: From 898cd15015e7b43a77110555948c95286cef4640 Mon Sep 17 00:00:00 2001 From: Matthias Bussonnier Date: Wed, 3 Aug 2022 10:31:20 +0200 Subject: [PATCH 22/23] note on deprecation --- nbformat/validator.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/nbformat/validator.py b/nbformat/validator.py index bbc85fb6..0124bae7 100644 --- a/nbformat/validator.py +++ b/nbformat/validator.py @@ -430,6 +430,16 @@ def validate( Raises ------ ValidationError if not valid. + + + Notes + ----- + + Prior to Nbformat 5.5.0 the `validate` and `isvalid` method would silently + try to fix invalid notebook and mutate arguments. This behavior is deprecated + and will be removed in a near future. + + Please explicitly call `normalize` if you need to normalize notebooks. """ assert isinstance(ref, str) or ref is None From fc45fed51440b6f2d38578171777a83adafed2f5 Mon Sep 17 00:00:00 2001 From: Matthias Bussonnier Date: Wed, 3 Aug 2022 10:56:02 +0200 Subject: [PATCH 23/23] test that in_valid does note mutate or autofix --- nbformat/validator.py | 1 + tests/test_validator.py | 31 ++++++++++++++++++++++++++++++- 2 files changed, 31 insertions(+), 1 deletion(-) diff --git a/nbformat/validator.py b/nbformat/validator.py index 0124bae7..f63c841a 100644 --- a/nbformat/validator.py +++ b/nbformat/validator.py @@ -118,6 +118,7 @@ def isvalid(nbjson, ref=None, version=None, version_minor=None): try: with warnings.catch_warnings(): warnings.filterwarnings("ignore", category=DeprecationWarning) + warnings.filterwarnings("ignore", category=MissingIDFieldWarning) validate(nbjson, ref, version, version_minor, repair_duplicate_cell_ids=False) except ValidationError: return False diff --git a/tests/test_validator.py b/tests/test_validator.py index f8f8e7b4..7edf8555 100644 --- a/tests/test_validator.py +++ b/tests/test_validator.py @@ -65,7 +65,6 @@ def test_should_not_mutate(validator_name): nb = read(f, as_version=4) del nb.cells[3]["id"] - assert nb.cells[3].get("id") is None assert nb.cells[3]["cell_type"] == "code" nb_deep_copy = deepcopy(nb) @@ -77,6 +76,36 @@ def test_should_not_mutate(validator_name): assert isvalid(nb) is False +def _invalidator_1(nb): + del nb.cells[3]["id"] + + +def _invalidator_3(nb): + nb.cells[3]["id"] = "hey" + nb.cells[2]["id"] = "hey" + + +def _invalidator_2(nb): + nb.cells[3]["id"] = nb.cells[2]["id"] + + +@pytest.mark.parametrize("validator_name", VALIDATORS) +@pytest.mark.parametrize("invalidator", [_invalidator_1, _invalidator_2]) +def test_is_valid_should_not_mutate(validator_name, invalidator): + """Test that a v4 notebook does not mutate in is_valid, and does note autofix.""" + set_validator(validator_name) + with TestsBase.fopen("test4.5.ipynb", "r") as f: + nb = read(f, as_version=4) + + invalidator(nb) + assert nb.cells[3]["cell_type"] == "code" + + nb_deep_copy = deepcopy(nb) + assert isvalid(nb) is False + + assert nb == nb_deep_copy + + @pytest.mark.parametrize("validator_name", VALIDATORS) def test_nb2(validator_name): """Test that a v2 notebook converted to current passes validation"""