From da60e9d9073734519e26a45fdf266633ccd98ebf Mon Sep 17 00:00:00 2001 From: Ramesh Sampath <1437573+sampathweb@users.noreply.github.com> Date: Fri, 8 Dec 2023 16:39:58 -0600 Subject: [PATCH] Adds Kokoro GPU Tests (#2224) * Adds Kokoro tests * Add Kokoro Tests --- .kokoro/README.md | 1 + .kokoro/github/ubuntu/gpu/build.sh | 83 +++++++++++++++++++ .kokoro/github/ubuntu/gpu/jax/continuous.cfg | 18 ++++ .kokoro/github/ubuntu/gpu/jax/presubmit.cfg | 16 ++++ .../github/ubuntu/gpu/keras2/continuous.cfg | 18 ++++ .../github/ubuntu/gpu/keras2/presubmit.cfg | 16 ++++ .../ubuntu/gpu/tensorflow/continuous.cfg | 18 ++++ .../ubuntu/gpu/tensorflow/presubmit.cfg | 16 ++++ .../github/ubuntu/gpu/torch/continuous.cfg | 18 ++++ .kokoro/github/ubuntu/gpu/torch/presubmit.cfg | 16 ++++ keras_cv/conftest.py | 27 ++++++ 11 files changed, 247 insertions(+) create mode 100644 .kokoro/README.md create mode 100644 .kokoro/github/ubuntu/gpu/build.sh create mode 100644 .kokoro/github/ubuntu/gpu/jax/continuous.cfg create mode 100644 .kokoro/github/ubuntu/gpu/jax/presubmit.cfg create mode 100644 .kokoro/github/ubuntu/gpu/keras2/continuous.cfg create mode 100644 .kokoro/github/ubuntu/gpu/keras2/presubmit.cfg create mode 100644 .kokoro/github/ubuntu/gpu/tensorflow/continuous.cfg create mode 100644 .kokoro/github/ubuntu/gpu/tensorflow/presubmit.cfg create mode 100644 .kokoro/github/ubuntu/gpu/torch/continuous.cfg create mode 100644 .kokoro/github/ubuntu/gpu/torch/presubmit.cfg diff --git a/.kokoro/README.md b/.kokoro/README.md new file mode 100644 index 0000000000..2c7724d988 --- /dev/null +++ b/.kokoro/README.md @@ -0,0 +1 @@ +CI to run on PR and merge to Master. \ No newline at end of file diff --git a/.kokoro/github/ubuntu/gpu/build.sh b/.kokoro/github/ubuntu/gpu/build.sh new file mode 100644 index 0000000000..e8fc6f5d75 --- /dev/null +++ b/.kokoro/github/ubuntu/gpu/build.sh @@ -0,0 +1,83 @@ +set -e +set -x + +cd "${KOKORO_ROOT}/" + +sudo update-alternatives --install /usr/bin/python3 python3 /usr/bin/python3.9 1 + +PYTHON_BINARY="/usr/bin/python3.9" + +"${PYTHON_BINARY}" -m venv venv +source venv/bin/activate +# Check the python version +python --version +python3 --version + +export LD_LIBRARY_PATH="$LD_LIBRARY_PATH:/usr/local/cuda/lib64:/usr/local/cuda/extras/CUPTI/lib64:" +# Check cuda +nvidia-smi +nvcc --version + +cd "src/github/keras-cv" +pip install -U pip setuptools + +if [ "${KERAS2:-0}" == "1" ] +then + echo "Keras2 detected." + pip install -r requirements-common.txt --progress-bar off + pip install tensorflow~=2.14 + pip install --extra-index-url https://download.pytorch.org/whl/cpu torch==2.1.0+cpu + pip install torchvision~=0.16.0 + pip install "jax[cpu]" + +elif [ "$KERAS_BACKEND" == "tensorflow" ] +then + echo "TensorFlow backend detected." + pip install -r requirements-tensorflow-cuda.txt --progress-bar off + +elif [ "$KERAS_BACKEND" == "jax" ] +then + echo "JAX backend detected." + pip install -r requirements-jax-cuda.txt --progress-bar off + +elif [ "$KERAS_BACKEND" == "torch" ] +then + echo "PyTorch backend detected." + pip install -r requirements-torch-cuda.txt --progress-bar off +fi + +pip install --no-deps -e "." --progress-bar off + +# Run Extra Large Tests for Continuous builds +if [ "${RUN_XLARGE:-0}" == "1" ] +then + pytest --check_gpu --run_large --run_extra_large --durations 0 \ + keras_cv/bounding_box \ + keras_cv/callbacks \ + keras_cv/losses \ + keras_cv/layers/object_detection \ + keras_cv/layers/preprocessing \ + keras_cv/models/backbones \ + keras_cv/models/classification \ + keras_cv/models/object_detection/retinanet \ + keras_cv/models/object_detection/yolo_v8 \ + keras_cv/models/object_detection_3d \ + keras_cv/models/segmentation \ + keras_cv/models/stable_diffusion \ + --cov=keras-cv +else + pytest --check_gpu --run_large --durations 0 \ + keras_cv/bounding_box \ + keras_cv/callbacks \ + keras_cv/losses \ + keras_cv/layers/object_detection \ + keras_cv/layers/preprocessing \ + keras_cv/models/backbones \ + keras_cv/models/classification \ + keras_cv/models/object_detection/retinanet \ + keras_cv/models/object_detection/yolo_v8 \ + keras_cv/models/object_detection_3d \ + keras_cv/models/segmentation \ + keras_cv/models/stable_diffusion \ + --cov=keras-cv +fi \ No newline at end of file diff --git a/.kokoro/github/ubuntu/gpu/jax/continuous.cfg b/.kokoro/github/ubuntu/gpu/jax/continuous.cfg new file mode 100644 index 0000000000..350fd02108 --- /dev/null +++ b/.kokoro/github/ubuntu/gpu/jax/continuous.cfg @@ -0,0 +1,18 @@ +build_file: "keras-cv/.kokoro/github/ubuntu/gpu/build.sh" + +action { + define_artifacts { + regex: "**/sponge_log.log" + regex: "**/sponge_log.xml" + } +} + +env_vars: { + key: "KERAS_BACKEND" + value: "jax" +} + +env_vars: { + key: "RUN_XLARGE" + value: "1" +} \ No newline at end of file diff --git a/.kokoro/github/ubuntu/gpu/jax/presubmit.cfg b/.kokoro/github/ubuntu/gpu/jax/presubmit.cfg new file mode 100644 index 0000000000..2aca2e95ff --- /dev/null +++ b/.kokoro/github/ubuntu/gpu/jax/presubmit.cfg @@ -0,0 +1,16 @@ +build_file: "keras-cv/.kokoro/github/ubuntu/gpu/build.sh" + +action { + define_artifacts { + regex: "**/sponge_log.log" + regex: "**/sponge_log.xml" + } +} + +env_vars: { + key: "KERAS_BACKEND" + value: "jax" +} + +# Set timeout to 60 mins from default 180 mins +timeout_mins: 60 \ No newline at end of file diff --git a/.kokoro/github/ubuntu/gpu/keras2/continuous.cfg b/.kokoro/github/ubuntu/gpu/keras2/continuous.cfg new file mode 100644 index 0000000000..361e35235b --- /dev/null +++ b/.kokoro/github/ubuntu/gpu/keras2/continuous.cfg @@ -0,0 +1,18 @@ +build_file: "keras-cv/.kokoro/github/ubuntu/gpu/build.sh" + +action { + define_artifacts { + regex: "**/sponge_log.log" + regex: "**/sponge_log.xml" + } +} + +env_vars: { + key: "KERAS2" + value: "1" +} + +env_vars: { + key: "RUN_XLARGE" + value: "1" +} \ No newline at end of file diff --git a/.kokoro/github/ubuntu/gpu/keras2/presubmit.cfg b/.kokoro/github/ubuntu/gpu/keras2/presubmit.cfg new file mode 100644 index 0000000000..d5caba18f7 --- /dev/null +++ b/.kokoro/github/ubuntu/gpu/keras2/presubmit.cfg @@ -0,0 +1,16 @@ +build_file: "keras-cv/.kokoro/github/ubuntu/gpu/build.sh" + +action { + define_artifacts { + regex: "**/sponge_log.log" + regex: "**/sponge_log.xml" + } +} + +env_vars: { + key: "KERAS2" + value: "1" +} + +# Set timeout to 60 mins from default 180 mins +timeout_mins: 60 \ No newline at end of file diff --git a/.kokoro/github/ubuntu/gpu/tensorflow/continuous.cfg b/.kokoro/github/ubuntu/gpu/tensorflow/continuous.cfg new file mode 100644 index 0000000000..9ed8200e71 --- /dev/null +++ b/.kokoro/github/ubuntu/gpu/tensorflow/continuous.cfg @@ -0,0 +1,18 @@ +build_file: "keras-cv/.kokoro/github/ubuntu/gpu/build.sh" + +action { + define_artifacts { + regex: "**/sponge_log.log" + regex: "**/sponge_log.xml" + } +} + +env_vars: { + key: "KERAS_BACKEND" + value: "tensorflow" +} + +env_vars: { + key: "RUN_XLARGE" + value: "1" +} \ No newline at end of file diff --git a/.kokoro/github/ubuntu/gpu/tensorflow/presubmit.cfg b/.kokoro/github/ubuntu/gpu/tensorflow/presubmit.cfg new file mode 100644 index 0000000000..f7e02e6efa --- /dev/null +++ b/.kokoro/github/ubuntu/gpu/tensorflow/presubmit.cfg @@ -0,0 +1,16 @@ +build_file: "keras-cv/.kokoro/github/ubuntu/gpu/build.sh" + +action { + define_artifacts { + regex: "**/sponge_log.log" + regex: "**/sponge_log.xml" + } +} + +env_vars: { + key: "KERAS_BACKEND" + value: "tensorflow" +} + +# Set timeout to 60 mins from default 180 mins +timeout_mins: 60 \ No newline at end of file diff --git a/.kokoro/github/ubuntu/gpu/torch/continuous.cfg b/.kokoro/github/ubuntu/gpu/torch/continuous.cfg new file mode 100644 index 0000000000..c3e118a6ef --- /dev/null +++ b/.kokoro/github/ubuntu/gpu/torch/continuous.cfg @@ -0,0 +1,18 @@ +build_file: "keras-cv/.kokoro/github/ubuntu/gpu/build.sh" + +action { + define_artifacts { + regex: "**/sponge_log.log" + regex: "**/sponge_log.xml" + } +} + +env_vars: { + key: "KERAS_BACKEND" + value: "torch" +} + +env_vars: { + key: "RUN_XLARGE" + value: "1" +} \ No newline at end of file diff --git a/.kokoro/github/ubuntu/gpu/torch/presubmit.cfg b/.kokoro/github/ubuntu/gpu/torch/presubmit.cfg new file mode 100644 index 0000000000..a96e865152 --- /dev/null +++ b/.kokoro/github/ubuntu/gpu/torch/presubmit.cfg @@ -0,0 +1,16 @@ +build_file: "keras-cv/.kokoro/github/ubuntu/gpu/build.sh" + +action { + define_artifacts { + regex: "**/sponge_log.log" + regex: "**/sponge_log.xml" + } +} + +env_vars: { + key: "KERAS_BACKEND" + value: "torch" +} + +# Set timeout to 60 mins from default 180 mins +timeout_mins: 60 \ No newline at end of file diff --git a/keras_cv/conftest.py b/keras_cv/conftest.py index b8be780c39..eaee5024b9 100644 --- a/keras_cv/conftest.py +++ b/keras_cv/conftest.py @@ -17,6 +17,7 @@ import tensorflow as tf from packaging import version +from keras_cv.backend import config as backend_config from keras_cv.backend.config import keras_3 @@ -33,9 +34,35 @@ def pytest_addoption(parser): default=False, help="run extra_large tests", ) + parser.addoption( + "--check_gpu", + action="store_true", + default=False, + help="fail if a gpu is not present", + ) def pytest_configure(config): + # Verify that device has GPU and detected by backend + if config.getoption("--check_gpu"): + found_gpu = False + backend = backend_config.backend() + if backend == "jax": + import jax + + try: + found_gpu = bool(jax.devices("gpu")) + except RuntimeError: + found_gpu = False + elif backend == "tensorflow": + found_gpu = bool(tf.config.list_logical_devices("GPU")) + elif backend == "torch": + import torch + + found_gpu = bool(torch.cuda.device_count()) + if not found_gpu: + pytest.fail(f"No GPUs discovered on the {backend} backend.") + config.addinivalue_line( "markers", "large: mark test as being slow or requiring a network" )