Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

RHOAIENG-16587: fix(test): ensure papermill tests run successfully for all supported notebooks #834

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -128,6 +128,7 @@ venv/
ENV/
env.bak/
venv.bak/
.DS_store

# Spyder project settings
.spyderproject
Expand Down
71 changes: 10 additions & 61 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -28,13 +28,15 @@ BUILD_DEPENDENT_IMAGES ?= yes
PUSH_IMAGES ?= yes

# OS dependant: Generate date, select appropriate cmd to locate container engine
ifeq ($(OS), Windows_NT)
DATE ?= $(shell powershell -Command "Get-Date -Format 'yyyyMMdd'")
WHERE_WHICH ?= where
else
DATE ?= $(shell date +'%Y%m%d')
WHERE_WHICH ?= which
ifdef OS
ifeq ($(OS), Windows_NT)
DATE ?= $(shell powershell -Command "Get-Date -Format 'yyyyMMdd'")
WHERE_WHICH ?= where
endif
endif
DATE ?= $(shell date +'%Y%m%d')
WHERE_WHICH ?= which


# linux/amd64 or darwin/arm64
OS_ARCH=$(shell go env GOOS)/$(shell go env GOARCH)
Expand Down Expand Up @@ -340,64 +342,11 @@ undeploy-c9s-%: bin/kubectl
$(info # Undeploying notebook from $(NOTEBOOK_DIR) directory...)
$(KUBECTL_BIN) delete -k $(NOTEBOOK_DIR)

# Function for testing a notebook with papermill
# ARG 1: Notebook name
# ARG 1: UBI flavor
# ARG 1: Python kernel
define test_with_papermill
$(eval PREFIX_NAME := $(subst /,-,$(1)_$(2)))
$(KUBECTL_BIN) exec $(FULL_NOTEBOOK_NAME) -- /bin/sh -c "python3 -m pip install papermill"
if ! $(KUBECTL_BIN) exec $(FULL_NOTEBOOK_NAME) -- /bin/sh -c "wget ${NOTEBOOK_REPO_BRANCH_BASE}/jupyter/$(1)/$(2)-$(3)/test/test_notebook.ipynb -O test_notebook.ipynb && python3 -m papermill test_notebook.ipynb $(PREFIX_NAME)_output.ipynb --kernel python3 --stderr-file $(PREFIX_NAME)_error.txt" ; then
echo "ERROR: The $(1) $(2) notebook encountered a failure. To investigate the issue, you can review the logs located in the ocp-ci cluster on 'artifacts/notebooks-e2e-tests/jupyter-$(1)-$(2)-$(3)-test-e2e' directory or run 'cat $(PREFIX_NAME)_error.txt' within your container. The make process has been aborted."
exit 1
fi
if $(KUBECTL_BIN) exec $(FULL_NOTEBOOK_NAME) -- /bin/sh -c "cat $(PREFIX_NAME)_error.txt | grep --quiet FAILED" ; then
echo "ERROR: The $(1) $(2) notebook encountered a failure. The make process has been aborted."
$(KUBECTL_BIN) exec $(FULL_NOTEBOOK_NAME) -- /bin/sh -c "cat $(PREFIX_NAME)_error.txt"
exit 1
fi
endef

# Verify the notebook's readiness by pinging the /api endpoint and executing the corresponding test_notebook.ipynb file in accordance with the build chain logic.
.PHONY: test
test-%: bin/kubectl
# Verify the notebook's readiness by pinging the /api endpoint
$(eval NOTEBOOK_NAME := $(subst .,-,$(subst cuda-,,$*)))
$(eval PYTHON_VERSION := $(shell echo $* | sed 's/.*-python-//'))
$(info # Running tests for $(NOTEBOOK_NAME) notebook...)
$(KUBECTL_BIN) wait --for=condition=ready pod -l app=$(NOTEBOOK_NAME) --timeout=600s
$(KUBECTL_BIN) port-forward svc/$(NOTEBOOK_NAME)-notebook 8888:8888 & curl --retry 5 --retry-delay 5 --retry-connrefused http://localhost:8888/notebook/opendatahub/jovyan/api ; EXIT_CODE=$$?; echo && pkill --full "^$(KUBECTL_BIN).*port-forward.*"
$(eval FULL_NOTEBOOK_NAME = $(shell ($(KUBECTL_BIN) get pods -l app=$(NOTEBOOK_NAME) -o custom-columns=":metadata.name" | tr -d '\n')))

# Tests notebook's functionalities
if echo "$(FULL_NOTEBOOK_NAME)" | grep -q "minimal-ubi9"; then
$(call test_with_papermill,minimal,ubi9,python-$(PYTHON_VERSION))
elif echo "$(FULL_NOTEBOOK_NAME)" | grep -q "intel-tensorflow-ubi9"; then
$(call test_with_papermill,intel/tensorflow,ubi9,python-$(PYTHON_VERSION))
elif echo "$(FULL_NOTEBOOK_NAME)" | grep -q "intel-pytorch-ubi9"; then
$(call test_with_papermill,intel/pytorch,ubi9,python-$(PYTHON_VERSION))
elif echo "$(FULL_NOTEBOOK_NAME)" | grep -q "datascience-ubi9"; then
$(MAKE) validate-ubi9-datascience PYTHON_VERSION=$(PYTHON_VERSION) -e FULL_NOTEBOOK_NAME=$(FULL_NOTEBOOK_NAME)
elif echo "$(FULL_NOTEBOOK_NAME)" | grep -q "pytorch-ubi9"; then
$(MAKE) validate-ubi9-datascience PYTHON_VERSION=$(PYTHON_VERSION) -e FULL_NOTEBOOK_NAME=$(FULL_NOTEBOOK_NAME)
$(call test_with_papermill,pytorch,ubi9,python-$(PYTHON_VERSION))
elif echo "$(FULL_NOTEBOOK_NAME)" | grep -q "tensorflow-ubi9"; then
$(MAKE) validate-ubi9-datascience PYTHON_VERSION=$(PYTHON_VERSION) -e FULL_NOTEBOOK_NAME=$(FULL_NOTEBOOK_NAME)
$(call test_with_papermill,tensorflow,ubi9,python-$(PYTHON_VERSION))
elif echo "$(FULL_NOTEBOOK_NAME)" | grep -q "intel-ml-ubi9"; then
$(call test_with_papermill,intel/ml,ubi9,python-$(PYTHON_VERSION))
elif echo "$(FULL_NOTEBOOK_NAME)" | grep -q "trustyai-ubi9"; then
$(call test_with_papermill,trustyai,ubi9,python-$(PYTHON_VERSION))
elif echo "$(FULL_NOTEBOOK_NAME)" | grep -q "anaconda"; then
echo "There is no test notebook implemented yet for Anaconda Notebook...."
else
echo "No matching condition found for $(FULL_NOTEBOOK_NAME)."
fi

.PHONY: validate-ubi9-datascience
validate-ubi9-datascience:
$(call test_with_papermill,minimal,ubi9,python-$(PYTHON_VERSION))
$(call test_with_papermill,datascience,ubi9,python-$(PYTHON_VERSION))
$(info # Running tests for $* notebook...)
@./scripts/test_jupyter_with_papermill.sh $*

# Validate that runtime image meets minimum criteria
# This validation is created from subset of https://github.com/elyra-ai/elyra/blob/9c417d2adc9d9f972de5f98fd37f6945e0357ab9/Makefile#L325
Expand Down
37 changes: 27 additions & 10 deletions jupyter/datascience/ubi9-python-3.11/test/test_notebook.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,9 @@
"metadata": {},
"outputs": [],
"source": [
"from pathlib import Path\n",
"import json\n",
"import re\n",
"import unittest\n",
"from unittest import mock\n",
"from platform import python_version\n",
Expand All @@ -24,22 +27,35 @@
"import kafka\n",
"from kafka import KafkaConsumer, KafkaProducer, TopicPartition\n",
"from kafka.producer.buffer import SimpleBufferPool\n",
"from kafka import KafkaConsumer\n",
"from kafka.errors import KafkaConfigurationError\n",
"import boto3\n",
"\n",
"def get_major_minor(s):\n",
" return '.'.join(s.split('.')[:2])\n",
"\n",
"def load_expected_versions() -> dict:\n",
" lock_file = Path('./expected_versions.json')\n",
" data = {}\n",
"\n",
" with open(lock_file, 'r') as file:\n",
" data = json.load(file)\n",
"\n",
" return data \n",
"\n",
"def get_expected_version(dependency_name: str) -> str:\n",
" raw_value = expected_versions.get(dependency_name)\n",
" raw_version = re.sub(r'^\\D+', '', raw_value)\n",
" return get_major_minor(raw_version)\n",
"\n",
"class TestPythonVersion(unittest.TestCase):\n",
" def test_version(self):\n",
" expected_major_minor = '3.11'\n",
" expected_major_minor = expected_major_minor = get_expected_version('Python')\n",
" actual_major_minor = get_major_minor(python_version())\n",
" self.assertEqual(actual_major_minor, expected_major_minor, \"incorrect version\")\n",
"\n",
"class TestPandas(unittest.TestCase):\n",
" def test_version(self):\n",
" expected_major_minor = '2.2'\n",
" expected_major_minor = get_expected_version('Pandas')\n",
" actual_major_minor = get_major_minor(pd.__version__)\n",
" self.assertEqual(actual_major_minor, expected_major_minor, \"incorrect version\")\n",
"\n",
Expand Down Expand Up @@ -94,7 +110,7 @@
"\n",
"class TestNumpy(unittest.TestCase):\n",
" def test_version(self):\n",
" expected_major_minor = '2.1'\n",
" expected_major_minor = get_expected_version('Numpy')\n",
" actual_major_minor = get_major_minor(np.__version__)\n",
" self.assertEqual(actual_major_minor, expected_major_minor, \"incorrect version\")\n",
"\n",
Expand All @@ -119,7 +135,7 @@
"\n",
"class TestScipy(unittest.TestCase):\n",
" def test_version(self):\n",
" expected_major_minor = '1.14'\n",
" expected_major_minor = get_expected_version('Scipy')\n",
" actual_major_minor = get_major_minor(scipy.__version__)\n",
" self.assertEqual(actual_major_minor, expected_major_minor, \"incorrect version\")\n",
"\n",
Expand All @@ -136,7 +152,7 @@
"\n",
"class TestSKLearn(unittest.TestCase):\n",
" def test_version(self):\n",
" expected_major_minor = '1.5'\n",
" expected_major_minor = get_expected_version('Scikit-learn')\n",
" actual_major_minor = get_major_minor(sklearn.__version__)\n",
" self.assertEqual(actual_major_minor, expected_major_minor, \"incorrect version\")\n",
"\n",
Expand All @@ -160,7 +176,7 @@
"class TestMatplotlib(unittest.TestCase):\n",
"\n",
" def test_version(self):\n",
" expected_major_minor = '3.9'\n",
" expected_major_minor = get_expected_version('Matplotlib')\n",
" actual_major_minor = get_major_minor(matplotlib.__version__)\n",
" self.assertEqual(actual_major_minor, expected_major_minor, \"incorrect version\")\n",
"\n",
Expand All @@ -171,7 +187,7 @@
"class TestKafkaPython(unittest.TestCase):\n",
"\n",
" def test_version(self):\n",
" expected_major_minor = '2.2'\n",
" expected_major_minor = get_expected_version('Kafka-Python-ng')\n",
" actual_major_minor = get_major_minor(kafka.__version__)\n",
" self.assertEqual(actual_major_minor, expected_major_minor, \"incorrect version\")\n",
"\n",
Expand All @@ -193,7 +209,7 @@
"class TestBoto3(unittest.TestCase):\n",
"\n",
" def test_version(self):\n",
" expected_major_minor = '1.35'\n",
" expected_major_minor = get_expected_version('Boto3')\n",
" actual_major_minor = get_major_minor(boto3.__version__)\n",
" self.assertEqual(actual_major_minor, expected_major_minor, \"incorrect version\")\n",
"\n",
Expand All @@ -212,6 +228,7 @@
"\n",
" self.assertEqual(boto3.DEFAULT_SESSION, session)\n",
"\n",
"expected_versions = load_expected_versions()\n",
"unittest.main(argv=[''], verbosity=2, exit=False)"
]
}
Expand All @@ -223,5 +240,5 @@
"orig_nbformat": 4
},
"nbformat": 4,
"nbformat_minor": 2
"nbformat_minor": 5
}
Original file line number Diff line number Diff line change
Expand Up @@ -36,17 +36,19 @@ spec:
livenessProbe:
tcpSocket:
port: notebook-port
initialDelaySeconds: 5
periodSeconds: 5
initialDelaySeconds: 15
periodSeconds: 10
timeoutSeconds: 5
successThreshold: 1
failureThreshold: 3
readinessProbe:
httpGet:
path: /notebook/opendatahub/jovyan/api
port: notebook-port
scheme: HTTP
initialDelaySeconds: 10
periodSeconds: 5
initialDelaySeconds: 15
periodSeconds: 10
timeoutSeconds: 5
Comment on lines +39 to +51
Copy link
Member

@jiridanek jiridanek Jan 18, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

in production, these timeouts come from dashboard; maybe we should use what dashboard sets? or possibly be stricter; and not more generous here

https://github.com/opendatahub-io/odh-dashboard/blob/440d8fef243d9ae78ed5a3a4c51002d655521cb6/frontend/src/api/k8s/notebooks.ts#L160-L176

Copy link
Contributor Author

@andyatmiami andyatmiami Jan 18, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Dashboard also takes a user-supplied configured value for resources:

And at some level.. the amount of compute resources available to the workload are going to influence the liveness/readiness probe performance..

I don't personally believe its a good apples-to-apples comparison..

That being said - I didn't spend time determining which "lever" of the probes was the problem - I suspect its actually the timeoutSeconds - as compute heavy workloads like TF, PYT, and Intel-ML (whatever that is 😎 ) do a lot of initial churning when firing up...

timeoutSeconds: 1 is the default behavior.. so I fear following your recommendation would simply "revert" my change.. and I have plenty of evidence from running these tests to know without any changes.. these workloads WILL DIE leading to flaky tests..

However, if the desire is to keep these properties as close to Dashboard as possible - I can spend a little more time profiling to perhaps land on something like timeoutSeconds: 3 | 4 (with all other values unchanged) that will keep things consistently passing... I do think some deviation from the Dashboard probe settings is unavoidable though (just to set expectations)

successThreshold: 1
failureThreshold: 3
resources:
Expand Down
52 changes: 33 additions & 19 deletions jupyter/intel/ml/ubi9-python-3.11/test/test_notebook.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,15 @@
{
"cell_type": "code",
"execution_count": null,
"id": "bac7ee1b-65c4-4515-8006-7ba01e843906",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"from pathlib import Path\n",
"import json\n",
"import re\n",
"import unittest\n",
"from unittest import mock\n",
"from platform import python_version\n",
Expand All @@ -27,7 +31,6 @@
"from kafka.producer.buffer import SimpleBufferPool\n",
"from kafka import KafkaConsumer\n",
"from kafka.errors import KafkaConfigurationError\n",
"import boto3\n",
"import kfp_tekton\n",
"import kfp\n",
"from kfp import LocalClient, run_pipeline_func_locally\n",
Expand All @@ -37,15 +40,29 @@
"def get_major_minor(s):\n",
" return '.'.join(s.split('.')[:2])\n",
"\n",
"def load_expected_versions() -> dict:\n",
" lock_file = Path('./expected_versions.json')\n",
" data = {}\n",
"\n",
" with open(lock_file, 'r') as file:\n",
" data = json.load(file)\n",
"\n",
" return data \n",
"\n",
"def get_expected_version(dependency_name: str) -> str:\n",
" raw_value = expected_versions.get(dependency_name)\n",
" raw_version = re.sub(r'^\\D+', '', raw_value)\n",
" return get_major_minor(raw_version)\n",
" \n",
"class TestPythonVersion(unittest.TestCase):\n",
" def test_version(self):\n",
" expected_major_minor = '3.11'\n",
" expected_major_minor = get_expected_version('Python')\n",
" actual_major_minor = get_major_minor(python_version())\n",
" self.assertEqual(actual_major_minor, expected_major_minor, \"incorrect version\")\n",
"\n",
"class TestModin(unittest.TestCase):\n",
" def test_version(self):\n",
" expected_major_minor = '0.24'\n",
" expected_major_minor = get_expected_version('Modin')\n",
" actual_major_minor = get_major_minor(pdm.__version__)\n",
" self.assertEqual(actual_major_minor, expected_major_minor, \"incorrect version\")\n",
"\n",
Expand All @@ -61,7 +78,7 @@
"\n",
"class TestPandas(unittest.TestCase):\n",
" def test_version(self):\n",
" expected_major_minor = '2.1'\n",
" expected_major_minor = get_expected_version('Pandas')\n",
" actual_major_minor = get_major_minor(pd.__version__)\n",
" self.assertEqual(actual_major_minor, expected_major_minor, \"incorrect version\")\n",
"\n",
Expand Down Expand Up @@ -116,7 +133,7 @@
"\n",
"class TestNumpy(unittest.TestCase):\n",
" def test_version(self):\n",
" expected_major_minor = '1.24'\n",
" expected_major_minor = get_expected_version('Numpy')\n",
" actual_major_minor = get_major_minor(np.__version__)\n",
" self.assertEqual(actual_major_minor, expected_major_minor, \"incorrect version\")\n",
"\n",
Expand All @@ -141,7 +158,7 @@
"\n",
"class TestScipy(unittest.TestCase):\n",
" def test_version(self):\n",
" expected_major_minor = '1.11'\n",
" expected_major_minor = get_expected_version('Scipy')\n",
" actual_major_minor = get_major_minor(scipy.__version__)\n",
" self.assertEqual(actual_major_minor, expected_major_minor, \"incorrect version\")\n",
"\n",
Expand All @@ -158,7 +175,7 @@
"\n",
"class TestSKLearn(unittest.TestCase):\n",
" def test_version(self):\n",
" expected_major_minor = '1.3'\n",
" expected_major_minor = get_expected_version('Scikit-learn')\n",
" actual_major_minor = get_major_minor(sklearn.__version__)\n",
" self.assertEqual(actual_major_minor, expected_major_minor, \"incorrect version\")\n",
"\n",
Expand All @@ -182,7 +199,7 @@
"class TestMatplotlib(unittest.TestCase):\n",
"\n",
" def test_version(self):\n",
" expected_major_minor = '3.6'\n",
" expected_major_minor = get_expected_version('Matplotlib')\n",
" actual_major_minor = get_major_minor(matplotlib.__version__)\n",
" self.assertEqual(actual_major_minor, expected_major_minor, \"incorrect version\")\n",
"\n",
Expand All @@ -193,7 +210,7 @@
"class TestKafkaPython(unittest.TestCase):\n",
"\n",
" def test_version(self):\n",
" expected_major_minor = '2.0'\n",
" expected_major_minor = get_expected_version('Kafka-Python')\n",
" actual_major_minor = get_major_minor(kafka.__version__)\n",
" self.assertEqual(actual_major_minor, expected_major_minor, \"incorrect version\")\n",
"\n",
Expand All @@ -215,19 +232,16 @@
"class TestKFPTekton(unittest.TestCase):\n",
"\n",
" def test_version(self):\n",
" expected_major_minor = '1.6.0'\n",
" unsupported_version = '1.6'\n",
"\n",
" self.assertLess(kfp_tekton.__version__, expected_major_minor)\n",
" expected_major_minor = get_expected_version('KFP-Tekton')\n",
"\n",
" self.assertLess(expected_major_minor, unsupported_version)\n",
" self.assertLess(kfp_tekton.__version__, unsupported_version)\n",
"\n",
"expected_versions = load_expected_versions()\n",
"unittest.main(argv=[''], verbosity=2, exit=False)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
Expand All @@ -250,5 +264,5 @@
}
},
"nbformat": 4,
"nbformat_minor": 4
"nbformat_minor": 5
}
Loading
Loading