Skip to content

Commit

Permalink
Merge pull request #1654 from mathbunnyru/asalikhov/pre_commit_notebooks
Browse files Browse the repository at this point in the history
pre-commit hooks added/run for jupyter notebooks: nbstripout, nbqa-pyupgrade, nbqa-black, nbqa-flake8
  • Loading branch information
mathbunnyru authored Mar 16, 2022
2 parents 442e703 + 9346d39 commit 25add57
Show file tree
Hide file tree
Showing 7 changed files with 189 additions and 313 deletions.
19 changes: 19 additions & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,25 @@ repos:
- id: markdownlint
args: ["--fix"]

# Run tools on Jupyter notebooks

# strip output from Jupyter notebooks
- repo: https://github.com/kynan/nbstripout
rev: 0.5.0
hooks:
- id: nbstripout

# nbQA provides tools from the Python ecosystem like
# pyupgrade, black, and flake8, adjusted for notebooks.
- repo: https://github.com/nbQA-dev/nbQA
rev: 1.3.1
hooks:
- id: nbqa-pyupgrade
args: [--py39-plus]
- id: nbqa-black
args: [--target-version=py39]
- id: nbqa-flake8

# Docker hooks do not work in pre-commit.ci
# See: <https://github.com/pre-commit-ci/issues/issues/11>
ci:
Expand Down
11 changes: 7 additions & 4 deletions binder/README.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,10 @@
"outputs": [],
"source": [
"import os\n",
"print(f'This container is using tag {os.environ[\"TAG\"]} of the jupyter/base-notebook image')"
"\n",
"print(\n",
" f'This container is using tag {os.environ[\"TAG\"]} of the jupyter/base-notebook image'\n",
")"
]
},
{
Expand Down Expand Up @@ -111,7 +114,7 @@
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
Expand All @@ -125,9 +128,9 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.1"
"version": "3.9.10"
}
},
"nbformat": 4,
"nbformat_minor": 2
"nbformat_minor": 4
}
29 changes: 9 additions & 20 deletions tests/all-spark-notebook/data/issue_1168.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -9,48 +9,37 @@
},
{
"cell_type": "code",
"execution_count": 6,
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from pyspark.sql import SparkSession\n",
"from pyspark.sql.functions import pandas_udf\n",
"\n",
"# Spark session & context\n",
"spark = SparkSession.builder.master('local').getOrCreate()"
"spark = SparkSession.builder.master(\"local\").getOrCreate()"
]
},
{
"cell_type": "code",
"execution_count": 7,
"execution_count": null,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"+---+---+\n",
"| id|age|\n",
"+---+---+\n",
"| 1| 21|\n",
"+---+---+\n",
"\n"
]
}
],
"outputs": [],
"source": [
"df = spark.createDataFrame([(1, 21), (2, 30)], (\"id\", \"age\"))\n",
"\n",
"\n",
"def filter_func(iterator):\n",
" for pdf in iterator:\n",
" yield pdf[pdf.id == 1]\n",
"\n",
"\n",
"df.mapInPandas(filter_func, df.schema).show()"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
Expand All @@ -64,7 +53,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.6"
"version": "3.9.10"
}
},
"nbformat": 4,
Expand Down
27 changes: 5 additions & 22 deletions tests/all-spark-notebook/data/local_pyspark.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -2,31 +2,14 @@
"cells": [
{
"cell_type": "code",
"execution_count": 2,
"execution_count": null,
"metadata": {},
"outputs": [
{
"output_type": "error",
"ename": "Error",
"evalue": "Jupyter cannot be started. Error attempting to locate jupyter: Data Science libraries jupyter and notebook are not installed in interpreter Python 3.7.7 64-bit ('jupyter': conda).",
"traceback": [
"Error: Jupyter cannot be started. Error attempting to locate jupyter: Data Science libraries jupyter and notebook are not installed in interpreter Python 3.7.7 64-bit ('jupyter': conda).",
"at b.startServer (/Users/romain/.vscode/extensions/ms-python.python-2020.5.80290/out/client/extension.js:92:270430)",
"at async b.createServer (/Users/romain/.vscode/extensions/ms-python.python-2020.5.80290/out/client/extension.js:92:269873)",
"at async connect (/Users/romain/.vscode/extensions/ms-python.python-2020.5.80290/out/client/extension.js:92:397876)",
"at async w.ensureConnectionAndNotebookImpl (/Users/romain/.vscode/extensions/ms-python.python-2020.5.80290/out/client/extension.js:16:556625)",
"at async w.ensureConnectionAndNotebook (/Users/romain/.vscode/extensions/ms-python.python-2020.5.80290/out/client/extension.js:16:556303)",
"at async w.clearResult (/Users/romain/.vscode/extensions/ms-python.python-2020.5.80290/out/client/extension.js:16:552346)",
"at async w.reexecuteCell (/Users/romain/.vscode/extensions/ms-python.python-2020.5.80290/out/client/extension.js:16:540374)",
"at async w.reexecuteCells (/Users/romain/.vscode/extensions/ms-python.python-2020.5.80290/out/client/extension.js:16:537541)"
]
}
],
"outputs": [],
"source": [
"from pyspark.sql import SparkSession\n",
"\n",
"# Spark session & context\n",
"spark = SparkSession.builder.master('local').getOrCreate()\n",
"spark = SparkSession.builder.master(\"local\").getOrCreate()\n",
"sc = spark.sparkContext\n",
"\n",
"# Sum of the first 100 whole numbers\n",
Expand All @@ -38,7 +21,7 @@
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
Expand All @@ -52,7 +35,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.6"
"version": "3.9.10"
}
},
"nbformat": 4,
Expand Down
18 changes: 3 additions & 15 deletions tests/all-spark-notebook/data/local_spylon.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
"cells": [
{
"cell_type": "code",
"execution_count": 6,
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -14,21 +14,9 @@
},
{
"cell_type": "code",
"execution_count": 7,
"execution_count": null,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"rdd: org.apache.spark.rdd.RDD[Int] = ParallelCollectionRDD[8] at parallelize at <console>:28\n",
"res4: Double = 5050.0\n"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"outputs": [],
"source": [
"// Sum of the first 100 whole numbers\n",
"val rdd = sc.parallelize(0 to 100)\n",
Expand Down
Loading

0 comments on commit 25add57

Please sign in to comment.