Merge pull request #1654 from mathbunnyru/asalikhov/pre_commit_notebooks

pre-commit hooks added/run for jupyter notebooks: nbstripout, nbqa-pyupgrade, nbqa-black, nbqa-flake8
jupyter · Mar 16, 2022 · 25add57 · 25add57
2 parents 442e703 + 9346d39
commit 25add57
Show file tree

Hide file tree

Showing 7 changed files with 189 additions and 313 deletions.
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -99,6 +99,25 @@ repos:
       - id: markdownlint
         args: ["--fix"]
 
+  # Run tools on Jupyter notebooks
+
+  # strip output from Jupyter notebooks
+  - repo: https://github.com/kynan/nbstripout
+    rev: 0.5.0
+    hooks:
+      - id: nbstripout
+
+  # nbQA provides tools from the Python ecosystem like
+  # pyupgrade, black, and flake8, adjusted for notebooks.
+  - repo: https://github.com/nbQA-dev/nbQA
+    rev: 1.3.1
+    hooks:
+      - id: nbqa-pyupgrade
+        args: [--py39-plus]
+      - id: nbqa-black
+        args: [--target-version=py39]
+      - id: nbqa-flake8
+
 # Docker hooks do not work in pre-commit.ci
 # See: <https://github.com/pre-commit-ci/issues/issues/11>
 ci:

diff --git a/binder/README.ipynb b/binder/README.ipynb
@@ -18,7 +18,10 @@
    "outputs": [],
    "source": [
     "import os\n",
-    "print(f'This container is using tag {os.environ[\"TAG\"]} of the jupyter/base-notebook image')"
+    "\n",
+    "print(\n",
+    "    f'This container is using tag {os.environ[\"TAG\"]} of the jupyter/base-notebook image'\n",
+    ")"
    ]
   },
   {
@@ -111,7 +114,7 @@
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "Python 3",
+   "display_name": "Python 3 (ipykernel)",
    "language": "python",
    "name": "python3"
   },
@@ -125,9 +128,9 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.7.1"
+   "version": "3.9.10"
   }
  },
  "nbformat": 4,
- "nbformat_minor": 2
+ "nbformat_minor": 4
 }
diff --git a/tests/all-spark-notebook/data/issue_1168.ipynb b/tests/all-spark-notebook/data/issue_1168.ipynb
@@ -9,48 +9,37 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
     "from pyspark.sql import SparkSession\n",
-    "from pyspark.sql.functions import pandas_udf\n",
     "\n",
     "# Spark session & context\n",
-    "spark = SparkSession.builder.master('local').getOrCreate()"
+    "spark = SparkSession.builder.master(\"local\").getOrCreate()"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 7,
+   "execution_count": null,
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "+---+---+\n",
-      "| id|age|\n",
-      "+---+---+\n",
-      "|  1| 21|\n",
-      "+---+---+\n",
-      "\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "df = spark.createDataFrame([(1, 21), (2, 30)], (\"id\", \"age\"))\n",
+    "\n",
+    "\n",
     "def filter_func(iterator):\n",
     "    for pdf in iterator:\n",
     "        yield pdf[pdf.id == 1]\n",
     "\n",
+    "\n",
     "df.mapInPandas(filter_func, df.schema).show()"
    ]
   }
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "Python 3",
+   "display_name": "Python 3 (ipykernel)",
    "language": "python",
    "name": "python3"
   },
@@ -64,7 +53,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.8.6"
+   "version": "3.9.10"
   }
  },
  "nbformat": 4,

diff --git a/tests/all-spark-notebook/data/local_pyspark.ipynb b/tests/all-spark-notebook/data/local_pyspark.ipynb
@@ -2,31 +2,14 @@
  "cells": [
   {
    "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": null,
    "metadata": {},
-   "outputs": [
-    {
-     "output_type": "error",
-     "ename": "Error",
-     "evalue": "Jupyter cannot be started. Error attempting to locate jupyter: Data Science libraries jupyter and notebook are not installed in interpreter Python 3.7.7 64-bit ('jupyter': conda).",
-     "traceback": [
-      "Error: Jupyter cannot be started. Error attempting to locate jupyter: Data Science libraries jupyter and notebook are not installed in interpreter Python 3.7.7 64-bit ('jupyter': conda).",
-      "at b.startServer (/Users/romain/.vscode/extensions/ms-python.python-2020.5.80290/out/client/extension.js:92:270430)",
-      "at async b.createServer (/Users/romain/.vscode/extensions/ms-python.python-2020.5.80290/out/client/extension.js:92:269873)",
-      "at async connect (/Users/romain/.vscode/extensions/ms-python.python-2020.5.80290/out/client/extension.js:92:397876)",
-      "at async w.ensureConnectionAndNotebookImpl (/Users/romain/.vscode/extensions/ms-python.python-2020.5.80290/out/client/extension.js:16:556625)",
-      "at async w.ensureConnectionAndNotebook (/Users/romain/.vscode/extensions/ms-python.python-2020.5.80290/out/client/extension.js:16:556303)",
-      "at async w.clearResult (/Users/romain/.vscode/extensions/ms-python.python-2020.5.80290/out/client/extension.js:16:552346)",
-      "at async w.reexecuteCell (/Users/romain/.vscode/extensions/ms-python.python-2020.5.80290/out/client/extension.js:16:540374)",
-      "at async w.reexecuteCells (/Users/romain/.vscode/extensions/ms-python.python-2020.5.80290/out/client/extension.js:16:537541)"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "from pyspark.sql import SparkSession\n",
     "\n",
     "# Spark session & context\n",
-    "spark = SparkSession.builder.master('local').getOrCreate()\n",
+    "spark = SparkSession.builder.master(\"local\").getOrCreate()\n",
     "sc = spark.sparkContext\n",
     "\n",
     "# Sum of the first 100 whole numbers\n",
@@ -38,7 +21,7 @@
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "Python 3",
+   "display_name": "Python 3 (ipykernel)",
    "language": "python",
    "name": "python3"
   },
@@ -52,7 +35,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.7.6"
+   "version": "3.9.10"
   }
  },
  "nbformat": 4,

diff --git a/tests/all-spark-notebook/data/local_spylon.ipynb b/tests/all-spark-notebook/data/local_spylon.ipynb
@@ -2,7 +2,7 @@
  "cells": [
   {
    "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -14,21 +14,9 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 7,
+   "execution_count": null,
    "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "rdd: org.apache.spark.rdd.RDD[Int] = ParallelCollectionRDD[8] at parallelize at <console>:28\n",
-       "res4: Double = 5050.0\n"
-      ]
-     },
-     "execution_count": 7,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
+   "outputs": [],
    "source": [
     "// Sum of the first 100 whole numbers\n",
     "val rdd = sc.parallelize(0 to 100)\n",