From 843130f6ccbf6777685d1e9ab41a5b1d9498bb65 Mon Sep 17 00:00:00 2001 From: Daniel Ching Date: Tue, 22 Apr 2025 12:12:31 -0500 Subject: [PATCH 01/22] CI: Add compute-sanitizer paths to linux test environment --- .github/workflows/test-wheel-linux.yml | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/.github/workflows/test-wheel-linux.yml b/.github/workflows/test-wheel-linux.yml index 322f859e3..a8a7e147b 100644 --- a/.github/workflows/test-wheel-linux.yml +++ b/.github/workflows/test-wheel-linux.yml @@ -74,6 +74,13 @@ jobs: fi fi + COMPUTE_SANITIZER=${CUDA_HOME}/bin/compute-sanitizer + COMPUTE_SANITIZER_VERSION=$(${COMPUTE_SANITIZER} --version | grep -Eo "[0-9]{4}\.[0-9]\.[0-9]" | sed -e 's/\.//g') + SANITIZER_CMD="${COMPUTE_SANITIZER} --target-processes=all --launch-timeout=0 --tool=memcheck" + if [[ "$COMPUTE_SANITIZER_VERSION" -ge 202111 ]]; then + SANITIZER_CMD="${SANITIZER_CMD} --padding=32" + fi + # make outputs from the previous job as env vars CUDA_CORE_ARTIFACT_BASENAME="cuda-core-python${PYTHON_VERSION_FORMATTED}-${{ inputs.host-platform }}" echo "PYTHON_VERSION_FORMATTED=${PYTHON_VERSION_FORMATTED}" >> $GITHUB_ENV @@ -86,6 +93,8 @@ jobs: echo "CUDA_BINDINGS_ARTIFACTS_DIR=$(realpath "$REPO_DIR/cuda_bindings/dist")" >> $GITHUB_ENV echo "SKIP_CUDA_BINDINGS_TEST=${SKIP_CUDA_BINDINGS_TEST}" >> $GITHUB_ENV echo "SKIP_CUDA_CORE_CYTHON_TEST=${SKIP_CUDA_CORE_CYTHON_TEST}" >> $GITHUB_ENV + echo "COMPUTE_SANITIZER_VERSION=${COMPUTE_SANITIZER_VERSION}" >> $GITHUB_ENV + echo "SANITIZER_CMD=${SANITIZER_CMD}" >> $GITHUB_ENV - name: Install dependencies uses: ./.github/actions/install_unix_deps @@ -202,9 +211,9 @@ jobs: if [[ "${{ inputs.host-platform }}" == linux* ]]; then bash tests/cython/build_tests.sh elif [[ "${{ inputs.host-platform }}" == win* ]]; then - # TODO: enable this once win-64 runners are up + # TODO: enable this once win-64 runners are up exit 1 - fi + fi pytest -rxXs -v tests/cython fi popd From c8df0dc2b86bacbe141eab1d64b67dc6b07f5731 Mon Sep 17 00:00:00 2001 From: Daniel Ching Date: Tue, 22 Apr 2025 12:17:11 -0500 Subject: [PATCH 02/22] CI: Run pytest in the context of compute-sanitizer --- .github/workflows/test-wheel-linux.yml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/test-wheel-linux.yml b/.github/workflows/test-wheel-linux.yml index a8a7e147b..30e2f7524 100644 --- a/.github/workflows/test-wheel-linux.yml +++ b/.github/workflows/test-wheel-linux.yml @@ -203,7 +203,7 @@ jobs: pushd ./cuda_bindings pip install -r requirements.txt - pytest -rxXs -v tests/ + ${SANITIZER_CMD} pytest -rxXs -v tests/ # It is a bit convoluted to run the Cython tests against CTK wheels, # so let's just skip them. @@ -214,7 +214,7 @@ jobs: # TODO: enable this once win-64 runners are up exit 1 fi - pytest -rxXs -v tests/cython + ${SANITIZER_CMD} pytest -rxXs -v tests/cython fi popd @@ -238,7 +238,7 @@ jobs: pushd ./cuda_core pip install -r "tests/requirements-cu${TEST_CUDA_MAJOR}.txt" - pytest -rxXs -v tests/ + ${SANITIZER_CMD} pytest -rxXs -v tests/ # It is a bit convoluted to run the Cython tests against CTK wheels, # so let's just skip them. Also, currently our CI always installs the @@ -252,7 +252,7 @@ jobs: # TODO: enable this once win-64 runners are up exit 1 fi - pytest -rxXs -v tests/cython + ${SANITIZER_CMD} pytest -rxXs -v tests/cython fi popd From 4ac79d054dd6be3880010fd2a2facb677852acb1 Mon Sep 17 00:00:00 2001 From: Daniel Ching Date: Tue, 22 Apr 2025 12:23:01 -0500 Subject: [PATCH 03/22] CI: Only use compute-sanitizer for tests of one python version --- .github/workflows/test-wheel-linux.yml | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/.github/workflows/test-wheel-linux.yml b/.github/workflows/test-wheel-linux.yml index 30e2f7524..6427f8098 100644 --- a/.github/workflows/test-wheel-linux.yml +++ b/.github/workflows/test-wheel-linux.yml @@ -74,11 +74,16 @@ jobs: fi fi - COMPUTE_SANITIZER=${CUDA_HOME}/bin/compute-sanitizer - COMPUTE_SANITIZER_VERSION=$(${COMPUTE_SANITIZER} --version | grep -Eo "[0-9]{4}\.[0-9]\.[0-9]" | sed -e 's/\.//g') - SANITIZER_CMD="${COMPUTE_SANITIZER} --target-processes=all --launch-timeout=0 --tool=memcheck" - if [[ "$COMPUTE_SANITIZER_VERSION" -ge 202111 ]]; then - SANITIZER_CMD="${SANITIZER_CMD} --padding=32" + if [[ "${{ inputs.python-version }}" == "3.12" ]]; then + COMPUTE_SANITIZER=${CUDA_HOME}/bin/compute-sanitizer + COMPUTE_SANITIZER_VERSION=$(${COMPUTE_SANITIZER} --version | grep -Eo "[0-9]{4}\.[0-9]\.[0-9]" | sed -e 's/\.//g') + SANITIZER_CMD="${COMPUTE_SANITIZER} --target-processes=all --launch-timeout=0 --tool=memcheck" + if [[ "$COMPUTE_SANITIZER_VERSION" -ge 202111 ]]; then + SANITIZER_CMD="${SANITIZER_CMD} --padding=32" + fi + else + COMPUTE_SANITIZER_VERSION="None" + SANITIZER_CMD="" fi # make outputs from the previous job as env vars From be912c9f73ade81937c404ca42d24e579a99d905 Mon Sep 17 00:00:00 2001 From: Daniel Ching Date: Tue, 22 Apr 2025 12:51:42 -0500 Subject: [PATCH 04/22] CI: Add non-zero exitcode to compute-sanitizer --- .github/workflows/test-wheel-linux.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/test-wheel-linux.yml b/.github/workflows/test-wheel-linux.yml index 6427f8098..87a754502 100644 --- a/.github/workflows/test-wheel-linux.yml +++ b/.github/workflows/test-wheel-linux.yml @@ -77,7 +77,7 @@ jobs: if [[ "${{ inputs.python-version }}" == "3.12" ]]; then COMPUTE_SANITIZER=${CUDA_HOME}/bin/compute-sanitizer COMPUTE_SANITIZER_VERSION=$(${COMPUTE_SANITIZER} --version | grep -Eo "[0-9]{4}\.[0-9]\.[0-9]" | sed -e 's/\.//g') - SANITIZER_CMD="${COMPUTE_SANITIZER} --target-processes=all --launch-timeout=0 --tool=memcheck" + SANITIZER_CMD="${COMPUTE_SANITIZER} --target-processes=all --launch-timeout=0 --tool=memcheck --error-exitcode=1" if [[ "$COMPUTE_SANITIZER_VERSION" -ge 202111 ]]; then SANITIZER_CMD="${SANITIZER_CMD} --padding=32" fi From 865eeb5ef51ae93154924d57c0a7fe2b1e09e832 Mon Sep 17 00:00:00 2001 From: Daniel Ching Date: Tue, 22 Apr 2025 17:06:47 -0500 Subject: [PATCH 05/22] CI: Only run compute-sanitzer when testing against local ctk There is no compute-sanitizer wheel, so we can only run when the ctk is installed system-wide --- .github/workflows/test-wheel-linux.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/test-wheel-linux.yml b/.github/workflows/test-wheel-linux.yml index 87a754502..e896e7401 100644 --- a/.github/workflows/test-wheel-linux.yml +++ b/.github/workflows/test-wheel-linux.yml @@ -74,7 +74,7 @@ jobs: fi fi - if [[ "${{ inputs.python-version }}" == "3.12" ]]; then + if [[ "${{ inputs.python-version }}" == "3.12" && "${{ inputs.local-ctk }}" == 1 ]]; then COMPUTE_SANITIZER=${CUDA_HOME}/bin/compute-sanitizer COMPUTE_SANITIZER_VERSION=$(${COMPUTE_SANITIZER} --version | grep -Eo "[0-9]{4}\.[0-9]\.[0-9]" | sed -e 's/\.//g') SANITIZER_CMD="${COMPUTE_SANITIZER} --target-processes=all --launch-timeout=0 --tool=memcheck --error-exitcode=1" From ddd071484daf7e8c4ee7e96ac58c92023afd4540 Mon Sep 17 00:00:00 2001 From: Daniel Ching Date: Wed, 23 Apr 2025 12:32:06 -0500 Subject: [PATCH 06/22] CI: Delay CUDA_HOME variable expansion --- .github/workflows/test-wheel-linux.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/test-wheel-linux.yml b/.github/workflows/test-wheel-linux.yml index e896e7401..4eca49da3 100644 --- a/.github/workflows/test-wheel-linux.yml +++ b/.github/workflows/test-wheel-linux.yml @@ -75,7 +75,8 @@ jobs: fi if [[ "${{ inputs.python-version }}" == "3.12" && "${{ inputs.local-ctk }}" == 1 ]]; then - COMPUTE_SANITIZER=${CUDA_HOME}/bin/compute-sanitizer + # Use single quotes to delay expansion of $CUDA_HOME until after mini-CTK step is complete + COMPUTE_SANITIZER='${CUDA_HOME}/bin/compute-sanitizer' COMPUTE_SANITIZER_VERSION=$(${COMPUTE_SANITIZER} --version | grep -Eo "[0-9]{4}\.[0-9]\.[0-9]" | sed -e 's/\.//g') SANITIZER_CMD="${COMPUTE_SANITIZER} --target-processes=all --launch-timeout=0 --tool=memcheck --error-exitcode=1" if [[ "$COMPUTE_SANITIZER_VERSION" -ge 202111 ]]; then From 507960590bbdfa6e50eaff0ed87be00e2be4c706 Mon Sep 17 00:00:00 2001 From: Daniel Ching Date: Wed, 23 Apr 2025 14:25:06 -0500 Subject: [PATCH 07/22] CI: Move compute-sanitzer setup into own step after CTK setup Because the sanitizer commands depend on the version of the sanitizer we need to be able to run the sanitzer to set the sanitizer cmd. Thus, we need to setup the sanitzer after it is installed. --- .github/workflows/test-wheel-linux.yml | 31 +++++++++++++------------- 1 file changed, 16 insertions(+), 15 deletions(-) diff --git a/.github/workflows/test-wheel-linux.yml b/.github/workflows/test-wheel-linux.yml index 4eca49da3..630f38dc7 100644 --- a/.github/workflows/test-wheel-linux.yml +++ b/.github/workflows/test-wheel-linux.yml @@ -74,19 +74,6 @@ jobs: fi fi - if [[ "${{ inputs.python-version }}" == "3.12" && "${{ inputs.local-ctk }}" == 1 ]]; then - # Use single quotes to delay expansion of $CUDA_HOME until after mini-CTK step is complete - COMPUTE_SANITIZER='${CUDA_HOME}/bin/compute-sanitizer' - COMPUTE_SANITIZER_VERSION=$(${COMPUTE_SANITIZER} --version | grep -Eo "[0-9]{4}\.[0-9]\.[0-9]" | sed -e 's/\.//g') - SANITIZER_CMD="${COMPUTE_SANITIZER} --target-processes=all --launch-timeout=0 --tool=memcheck --error-exitcode=1" - if [[ "$COMPUTE_SANITIZER_VERSION" -ge 202111 ]]; then - SANITIZER_CMD="${SANITIZER_CMD} --padding=32" - fi - else - COMPUTE_SANITIZER_VERSION="None" - SANITIZER_CMD="" - fi - # make outputs from the previous job as env vars CUDA_CORE_ARTIFACT_BASENAME="cuda-core-python${PYTHON_VERSION_FORMATTED}-${{ inputs.host-platform }}" echo "PYTHON_VERSION_FORMATTED=${PYTHON_VERSION_FORMATTED}" >> $GITHUB_ENV @@ -99,8 +86,6 @@ jobs: echo "CUDA_BINDINGS_ARTIFACTS_DIR=$(realpath "$REPO_DIR/cuda_bindings/dist")" >> $GITHUB_ENV echo "SKIP_CUDA_BINDINGS_TEST=${SKIP_CUDA_BINDINGS_TEST}" >> $GITHUB_ENV echo "SKIP_CUDA_CORE_CYTHON_TEST=${SKIP_CUDA_CORE_CYTHON_TEST}" >> $GITHUB_ENV - echo "COMPUTE_SANITIZER_VERSION=${COMPUTE_SANITIZER_VERSION}" >> $GITHUB_ENV - echo "SANITIZER_CMD=${SANITIZER_CMD}" >> $GITHUB_ENV - name: Install dependencies uses: ./.github/actions/install_unix_deps @@ -195,6 +180,22 @@ jobs: host-platform: ${{ inputs.host-platform }} cuda-version: ${{ inputs.cuda-version }} + - name: Set up compute-saniziter + run: | + if [[ "${{ inputs.python-version }}" == "3.12" && "${{ inputs.local-ctk }}" == 1 ]]; then + COMPUTE_SANITIZER="${CUDA_HOME}/bin/compute-sanitizer" + COMPUTE_SANITIZER_VERSION=$(${COMPUTE_SANITIZER} --version | grep -Eo "[0-9]{4}\.[0-9]\.[0-9]" | sed -e 's/\.//g') + SANITIZER_CMD="${COMPUTE_SANITIZER} --target-processes=all --launch-timeout=0 --tool=memcheck --error-exitcode=1" + if [[ "$COMPUTE_SANITIZER_VERSION" -ge 202111 ]]; then + SANITIZER_CMD="${SANITIZER_CMD} --padding=32" + fi + else + COMPUTE_SANITIZER_VERSION="None" + SANITIZER_CMD="" + fi + echo "COMPUTE_SANITIZER_VERSION=${COMPUTE_SANITIZER_VERSION}" >> $GITHUB_ENV + echo "SANITIZER_CMD=${SANITIZER_CMD}" >> $GITHUB_ENV + - name: Run cuda.bindings tests if: ${{ env.SKIP_CUDA_BINDINGS_TEST == '0' }} run: | From 4a034656cfcc1686dd63965d61d01c5131882003 Mon Sep 17 00:00:00 2001 From: Daniel Ching Date: Thu, 24 Apr 2025 10:13:16 -0500 Subject: [PATCH 08/22] CI: Optionally skip tests that raise CUDA API errors --- cuda_bindings/tests/test_cuda.py | 26 ++++++++++++++++++++++++++ cuda_bindings/tests/test_cudart.py | 5 +++++ cuda_core/tests/test_cuda_utils.py | 8 ++++++++ cuda_core/tests/test_event.py | 8 +++++++- 4 files changed, 46 insertions(+), 1 deletion(-) diff --git a/cuda_bindings/tests/test_cuda.py b/cuda_bindings/tests/test_cuda.py index 7f92aefe2..0c5233f1a 100644 --- a/cuda_bindings/tests/test_cuda.py +++ b/cuda_bindings/tests/test_cuda.py @@ -5,6 +5,7 @@ # this software. Any use, reproduction, disclosure, or distribution of # this software and related documentation outside the terms of the EULA # is strictly prohibited. +import os import platform import shutil import textwrap @@ -83,6 +84,10 @@ def test_cuda_memcpy(): assert err == cuda.CUresult.CUDA_SUCCESS +@pytest.mark.skipif( + os.environ.get("CUDA_PYTHON_SANITIZER_RUNNING", "0") == "1", + reason="The compute-sanitzer is running, and this test intentionally causes an API error.", +) def test_cuda_array(): (err,) = cuda.cuInit(0) assert err == cuda.CUresult.CUDA_SUCCESS @@ -236,6 +241,10 @@ def test_cuda_uuid_list_access(): assert err == cuda.CUresult.CUDA_SUCCESS +@pytest.mark.skipif( + os.environ.get("CUDA_PYTHON_SANITIZER_RUNNING", "0") == "1", + reason="FIXME: This test causes an API error.", +) def test_cuda_cuModuleLoadDataEx(): (err,) = cuda.cuInit(0) assert err == cuda.CUresult.CUDA_SUCCESS @@ -251,6 +260,7 @@ def test_cuda_cuModuleLoadDataEx(): cuda.CUjit_option.CU_JIT_ERROR_LOG_BUFFER_SIZE_BYTES, cuda.CUjit_option.CU_JIT_LOG_VERBOSE, ] + # FIXME: This function call raises CUDA_ERROR_INVALID_VALUE err, mod = cuda.cuModuleLoadDataEx(0, 0, option_keys, []) (err,) = cuda.cuCtxDestroy(ctx) @@ -622,6 +632,10 @@ def test_cuda_coredump_attr(): assert err == cuda.CUresult.CUDA_SUCCESS +@pytest.mark.skipif( + os.environ.get("CUDA_PYTHON_SANITIZER_RUNNING", "0") == "1", + reason="The compute-sanitzer is running, and this test intentionally causes an API error.", +) def test_get_error_name_and_string(): (err,) = cuda.cuInit(0) assert err == cuda.CUresult.CUDA_SUCCESS @@ -950,6 +964,10 @@ def test_CUmemDecompressParams_st(): assert int(desc.dstActBytes) == 0 +@pytest.mark.skipif( + os.environ.get("CUDA_PYTHON_SANITIZER_RUNNING", "0") == "1", + reason="The compute-sanitzer is running, and this test intentionally causes an API error.", +) def test_all_CUresult_codes(): max_code = int(max(cuda.CUresult)) # Smoke test. CUDA_ERROR_UNKNOWN = 999, but intentionally using literal value. @@ -982,12 +1000,20 @@ def test_all_CUresult_codes(): assert num_good >= 76 # CTK 11.0.3_450.51.06 +@pytest.mark.skipif( + os.environ.get("CUDA_PYTHON_SANITIZER_RUNNING", "0") == "1", + reason="The compute-sanitzer is running, and this test intentionally causes an API error.", +) def test_cuKernelGetName_failure(): err, name = cuda.cuKernelGetName(0) assert err == cuda.CUresult.CUDA_ERROR_INVALID_VALUE assert name is None +@pytest.mark.skipif( + os.environ.get("CUDA_PYTHON_SANITIZER_RUNNING", "0") == "1", + reason="The compute-sanitzer is running, and this test intentionally causes an API error.", +) def test_cuFuncGetName_failure(): err, name = cuda.cuFuncGetName(0) assert err == cuda.CUresult.CUDA_ERROR_INVALID_VALUE diff --git a/cuda_bindings/tests/test_cudart.py b/cuda_bindings/tests/test_cudart.py index 88f1b968a..f7b73ab12 100644 --- a/cuda_bindings/tests/test_cudart.py +++ b/cuda_bindings/tests/test_cudart.py @@ -7,6 +7,7 @@ # is strictly prohibited. import ctypes import math +import os import numpy as np import pytest @@ -70,6 +71,10 @@ def test_cudart_memcpy(): assertSuccess(err) +@pytest.mark.skipif( + os.environ.get("CUDA_PYTHON_SANITIZER_RUNNING", "0") == "1", + reason="The compute-sanitzer is running, and this test intentionally causes an API error.", +) def test_cudart_hostRegister(): # Use hostRegister API to check for correct enum return values page_size = 80 diff --git a/cuda_core/tests/test_cuda_utils.py b/cuda_core/tests/test_cuda_utils.py index 5f94e545f..bcd49b868 100644 --- a/cuda_core/tests/test_cuda_utils.py +++ b/cuda_core/tests/test_cuda_utils.py @@ -1,6 +1,7 @@ # Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. ALL RIGHTS RESERVED. # # SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE +import os import pytest @@ -40,6 +41,13 @@ def test_runtime_cuda_error_explanations_health(): assert not extra_expl +@pytest.mark.skipif( + os.environ.get("CUDA_PYTHON_SANITIZER_RUNNING", "0") == "1", + reason=( + "The compute-sanitzer is running, and this test causes an API error " + "when the driver is too old to know about all of the error codes." + ), +) def test_check_driver_error(): num_unexpected = 0 for error in driver.CUresult: diff --git a/cuda_core/tests/test_event.py b/cuda_core/tests/test_event.py index 108b8b140..ae04c7335 100644 --- a/cuda_core/tests/test_event.py +++ b/cuda_core/tests/test_event.py @@ -20,7 +20,13 @@ def test_event_init_disabled(): cuda.core.experimental._event.Event() # Ensure back door is locked. -@pytest.mark.parametrize("enable_timing", [True, False, None]) +@pytest.mark.parametrize( + "enable_timing", + [ + True, + ] + + ([False, None] if os.environ.get("CUDA_PYTHON_SANITIZER_RUNNING", "0") != "1" else []), +) def test_timing(init_cuda, enable_timing): options = EventOptions(enable_timing=enable_timing) stream = Device().create_stream() From bd88039becc41dad8860a05d2dbe9452825ab8d5 Mon Sep 17 00:00:00 2001 From: Daniel Ching Date: Thu, 24 Apr 2025 13:20:48 -0500 Subject: [PATCH 09/22] CI: Add sanitizer skip environment variable to CI --- .github/workflows/test-wheel-linux.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/test-wheel-linux.yml b/.github/workflows/test-wheel-linux.yml index 630f38dc7..8b5abfcef 100644 --- a/.github/workflows/test-wheel-linux.yml +++ b/.github/workflows/test-wheel-linux.yml @@ -189,6 +189,7 @@ jobs: if [[ "$COMPUTE_SANITIZER_VERSION" -ge 202111 ]]; then SANITIZER_CMD="${SANITIZER_CMD} --padding=32" fi + echo "CUDA_PYTHON_SANITIZER_RUNNING=1" >> $GITHUB_ENV else COMPUTE_SANITIZER_VERSION="None" SANITIZER_CMD="" From 0430930f0cda867ff5ef62da7099e9a426ef2484 Mon Sep 17 00:00:00 2001 From: Daniel Ching Date: Thu, 24 Apr 2025 15:24:57 -0500 Subject: [PATCH 10/22] DOC: Fix spelling of CI step name --- .github/workflows/test-wheel-linux.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/test-wheel-linux.yml b/.github/workflows/test-wheel-linux.yml index 8b5abfcef..5c037b572 100644 --- a/.github/workflows/test-wheel-linux.yml +++ b/.github/workflows/test-wheel-linux.yml @@ -180,7 +180,7 @@ jobs: host-platform: ${{ inputs.host-platform }} cuda-version: ${{ inputs.cuda-version }} - - name: Set up compute-saniziter + - name: Set up compute-sanitizer run: | if [[ "${{ inputs.python-version }}" == "3.12" && "${{ inputs.local-ctk }}" == 1 ]]; then COMPUTE_SANITIZER="${CUDA_HOME}/bin/compute-sanitizer" From 95c391481741f8a95551b05694b8e92f7a678123 Mon Sep 17 00:00:00 2001 From: Daniel Ching Date: Fri, 25 Apr 2025 11:23:28 -0500 Subject: [PATCH 11/22] CI: Skip state failure test when running sanitizer --- cuda_bindings/tests/test_cuda.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/cuda_bindings/tests/test_cuda.py b/cuda_bindings/tests/test_cuda.py index 0c5233f1a..e445f3b6f 100644 --- a/cuda_bindings/tests/test_cuda.py +++ b/cuda_bindings/tests/test_cuda.py @@ -1020,6 +1020,10 @@ def test_cuFuncGetName_failure(): assert name is None +@pytest.mark.skipif( + os.environ.get("CUDA_PYTHON_SANITIZER_RUNNING", "0") == "1", + reason="The compute-sanitzer is running, and this test intentionally causes an API error.", +) @pytest.mark.skipif( driverVersionLessThan(12080) or not supportsCudaAPI("cuCheckpointProcessGetState"), reason="When API was introduced", From b091340046d757a47db139a25cc9dbf831d0503f Mon Sep 17 00:00:00 2001 From: Daniel Ching Date: Fri, 25 Apr 2025 12:44:52 -0500 Subject: [PATCH 12/22] CI: Skip linker error log test when sanitizer is running --- cuda_core/tests/test_linker.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/cuda_core/tests/test_linker.py b/cuda_core/tests/test_linker.py index 78195c2dc..aab2dc5dc 100644 --- a/cuda_core/tests/test_linker.py +++ b/cuda_core/tests/test_linker.py @@ -1,6 +1,7 @@ # Copyright (c) 2024-2025, NVIDIA CORPORATION & AFFILIATES. ALL RIGHTS RESERVED. # # SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE +import os import pytest @@ -140,6 +141,10 @@ def test_linker_link_invalid_target_type(compile_ptx_functions): linker.link("invalid_target") +@pytest.mark.skipif( + is_culink_backend and os.environ.get("CUDA_PYTHON_SANITIZER_RUNNING", "0") == "1", + reason="The compute-sanitzer is running, and this test causes an API error using the culink API.", +) def test_linker_get_error_log(compile_ptx_functions): options = LinkerOptions(arch=ARCH) From 10d4c7a9b52512d03b1da078ae238f6e96719c2e Mon Sep 17 00:00:00 2001 From: Daniel Ching Date: Fri, 25 Apr 2025 12:58:13 -0500 Subject: [PATCH 13/22] CI: Add note explaining test skip --- cuda_core/tests/test_event.py | 1 + 1 file changed, 1 insertion(+) diff --git a/cuda_core/tests/test_event.py b/cuda_core/tests/test_event.py index ae04c7335..380aef30a 100644 --- a/cuda_core/tests/test_event.py +++ b/cuda_core/tests/test_event.py @@ -25,6 +25,7 @@ def test_event_init_disabled(): [ True, ] + # The compute-sanitzer is running, and this test intentionally causes an API error. + ([False, None] if os.environ.get("CUDA_PYTHON_SANITIZER_RUNNING", "0") != "1" else []), ) def test_timing(init_cuda, enable_timing): From bdfb547c6dc57c0bfdeeb4fa36859a28a00dc624 Mon Sep 17 00:00:00 2001 From: Daniel Ching Date: Fri, 25 Apr 2025 13:53:03 -0500 Subject: [PATCH 14/22] DOC: Document CUDA_PYTHON_SANITIZER_RUNNING --- cuda_bindings/docs/source/environment_variables.md | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/cuda_bindings/docs/source/environment_variables.md b/cuda_bindings/docs/source/environment_variables.md index 7329e582c..9787c5f01 100644 --- a/cuda_bindings/docs/source/environment_variables.md +++ b/cuda_bindings/docs/source/environment_variables.md @@ -11,3 +11,8 @@ ## Runtime Environment Variables - `CUDA_PYTHON_CUDA_PER_THREAD_DEFAULT_STREAM` : When set to 1, the default stream is the per-thread default stream. When set to 0, the default stream is the legacy default stream. This defaults to 0, for the legacy default stream. See [Stream Synchronization Behavior](https://docs.nvidia.com/cuda/cuda-runtime-api/stream-sync-behavior.html) for an explanation of the legacy and per-thread default streams. + + +## Test-Time Environment Variables + +- `CUDA_PYTHON_SANTIZER_RUNNING` : When set to 1, tests are skipped that would cause [compute-sanitizer](https://docs.nvidia.com/compute-sanitizer/ComputeSanitizer/index.html) to raise an error. From 9c2591056fb8a82fc7ac3296af60ae2d6a2cd727 Mon Sep 17 00:00:00 2001 From: Daniel Ching Date: Fri, 25 Apr 2025 15:26:07 -0500 Subject: [PATCH 15/22] CI: Skip compute-sanitizer on CTK11 --- .github/workflows/test-wheel-linux.yml | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/.github/workflows/test-wheel-linux.yml b/.github/workflows/test-wheel-linux.yml index 5c037b572..666932331 100644 --- a/.github/workflows/test-wheel-linux.yml +++ b/.github/workflows/test-wheel-linux.yml @@ -182,7 +182,10 @@ jobs: - name: Set up compute-sanitizer run: | - if [[ "${{ inputs.python-version }}" == "3.12" && "${{ inputs.local-ctk }}" == 1 ]]; then + # We don't test compute-sanitizer on CTK<12 because backporting fixes is too much effort + # We only test compute-sanitizer on python 3.12 arbitrarily; we don't need to use sanitizer on the entire matrix + # Only local ctk installs have compute-sanitizer; there is not wheel for it + if [[ "${{ inputs.python-version }}" == "3.12" && "${{ inputs.cuda-verion }}" != "11.8.0" && "${{ inputs.local-ctk }}" == 1 ]]; then COMPUTE_SANITIZER="${CUDA_HOME}/bin/compute-sanitizer" COMPUTE_SANITIZER_VERSION=$(${COMPUTE_SANITIZER} --version | grep -Eo "[0-9]{4}\.[0-9]\.[0-9]" | sed -e 's/\.//g') SANITIZER_CMD="${COMPUTE_SANITIZER} --target-processes=all --launch-timeout=0 --tool=memcheck --error-exitcode=1" From 7fb013e88f33ebedb3d16222b684607d3d1eca59 Mon Sep 17 00:00:00 2001 From: Daniel Ching Date: Fri, 25 Apr 2025 16:46:51 -0500 Subject: [PATCH 16/22] BUG: Correctly spell "version" --- .github/workflows/test-wheel-linux.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/test-wheel-linux.yml b/.github/workflows/test-wheel-linux.yml index 666932331..58649202b 100644 --- a/.github/workflows/test-wheel-linux.yml +++ b/.github/workflows/test-wheel-linux.yml @@ -185,7 +185,7 @@ jobs: # We don't test compute-sanitizer on CTK<12 because backporting fixes is too much effort # We only test compute-sanitizer on python 3.12 arbitrarily; we don't need to use sanitizer on the entire matrix # Only local ctk installs have compute-sanitizer; there is not wheel for it - if [[ "${{ inputs.python-version }}" == "3.12" && "${{ inputs.cuda-verion }}" != "11.8.0" && "${{ inputs.local-ctk }}" == 1 ]]; then + if [[ "${{ inputs.python-version }}" == "3.12" && "${{ inputs.cuda-version }}" != "11.8.0" && "${{ inputs.local-ctk }}" == 1 ]]; then COMPUTE_SANITIZER="${CUDA_HOME}/bin/compute-sanitizer" COMPUTE_SANITIZER_VERSION=$(${COMPUTE_SANITIZER} --version | grep -Eo "[0-9]{4}\.[0-9]\.[0-9]" | sed -e 's/\.//g') SANITIZER_CMD="${COMPUTE_SANITIZER} --target-processes=all --launch-timeout=0 --tool=memcheck --error-exitcode=1" From 2632bbc38694f5355e64e1d135c8c897c4796bb3 Mon Sep 17 00:00:00 2001 From: Daniel Ching Date: Mon, 28 Apr 2025 10:39:59 -0500 Subject: [PATCH 17/22] DOC: Fix spelling of sanitizer --- cuda_bindings/docs/source/environment_variables.md | 2 +- cuda_bindings/tests/test_cuda.py | 12 ++++++------ cuda_bindings/tests/test_cudart.py | 2 +- cuda_core/tests/test_cuda_utils.py | 2 +- cuda_core/tests/test_event.py | 2 +- cuda_core/tests/test_linker.py | 2 +- 6 files changed, 11 insertions(+), 11 deletions(-) diff --git a/cuda_bindings/docs/source/environment_variables.md b/cuda_bindings/docs/source/environment_variables.md index 9787c5f01..58f84996d 100644 --- a/cuda_bindings/docs/source/environment_variables.md +++ b/cuda_bindings/docs/source/environment_variables.md @@ -15,4 +15,4 @@ ## Test-Time Environment Variables -- `CUDA_PYTHON_SANTIZER_RUNNING` : When set to 1, tests are skipped that would cause [compute-sanitizer](https://docs.nvidia.com/compute-sanitizer/ComputeSanitizer/index.html) to raise an error. +- `CUDA_PYTHON_SANITIZER_RUNNING` : When set to 1, tests are skipped that would cause [compute-sanitizer](https://docs.nvidia.com/compute-sanitizer/ComputeSanitizer/index.html) to raise an error. diff --git a/cuda_bindings/tests/test_cuda.py b/cuda_bindings/tests/test_cuda.py index e445f3b6f..613a81881 100644 --- a/cuda_bindings/tests/test_cuda.py +++ b/cuda_bindings/tests/test_cuda.py @@ -86,7 +86,7 @@ def test_cuda_memcpy(): @pytest.mark.skipif( os.environ.get("CUDA_PYTHON_SANITIZER_RUNNING", "0") == "1", - reason="The compute-sanitzer is running, and this test intentionally causes an API error.", + reason="The compute-sanitizer is running, and this test intentionally causes an API error.", ) def test_cuda_array(): (err,) = cuda.cuInit(0) @@ -634,7 +634,7 @@ def test_cuda_coredump_attr(): @pytest.mark.skipif( os.environ.get("CUDA_PYTHON_SANITIZER_RUNNING", "0") == "1", - reason="The compute-sanitzer is running, and this test intentionally causes an API error.", + reason="The compute-sanitizer is running, and this test intentionally causes an API error.", ) def test_get_error_name_and_string(): (err,) = cuda.cuInit(0) @@ -966,7 +966,7 @@ def test_CUmemDecompressParams_st(): @pytest.mark.skipif( os.environ.get("CUDA_PYTHON_SANITIZER_RUNNING", "0") == "1", - reason="The compute-sanitzer is running, and this test intentionally causes an API error.", + reason="The compute-sanitizer is running, and this test intentionally causes an API error.", ) def test_all_CUresult_codes(): max_code = int(max(cuda.CUresult)) @@ -1002,7 +1002,7 @@ def test_all_CUresult_codes(): @pytest.mark.skipif( os.environ.get("CUDA_PYTHON_SANITIZER_RUNNING", "0") == "1", - reason="The compute-sanitzer is running, and this test intentionally causes an API error.", + reason="The compute-sanitizer is running, and this test intentionally causes an API error.", ) def test_cuKernelGetName_failure(): err, name = cuda.cuKernelGetName(0) @@ -1012,7 +1012,7 @@ def test_cuKernelGetName_failure(): @pytest.mark.skipif( os.environ.get("CUDA_PYTHON_SANITIZER_RUNNING", "0") == "1", - reason="The compute-sanitzer is running, and this test intentionally causes an API error.", + reason="The compute-sanitizer is running, and this test intentionally causes an API error.", ) def test_cuFuncGetName_failure(): err, name = cuda.cuFuncGetName(0) @@ -1022,7 +1022,7 @@ def test_cuFuncGetName_failure(): @pytest.mark.skipif( os.environ.get("CUDA_PYTHON_SANITIZER_RUNNING", "0") == "1", - reason="The compute-sanitzer is running, and this test intentionally causes an API error.", + reason="The compute-sanitizer is running, and this test intentionally causes an API error.", ) @pytest.mark.skipif( driverVersionLessThan(12080) or not supportsCudaAPI("cuCheckpointProcessGetState"), diff --git a/cuda_bindings/tests/test_cudart.py b/cuda_bindings/tests/test_cudart.py index f7b73ab12..41bed1f54 100644 --- a/cuda_bindings/tests/test_cudart.py +++ b/cuda_bindings/tests/test_cudart.py @@ -73,7 +73,7 @@ def test_cudart_memcpy(): @pytest.mark.skipif( os.environ.get("CUDA_PYTHON_SANITIZER_RUNNING", "0") == "1", - reason="The compute-sanitzer is running, and this test intentionally causes an API error.", + reason="The compute-sanitizer is running, and this test intentionally causes an API error.", ) def test_cudart_hostRegister(): # Use hostRegister API to check for correct enum return values diff --git a/cuda_core/tests/test_cuda_utils.py b/cuda_core/tests/test_cuda_utils.py index bcd49b868..097ead8f6 100644 --- a/cuda_core/tests/test_cuda_utils.py +++ b/cuda_core/tests/test_cuda_utils.py @@ -44,7 +44,7 @@ def test_runtime_cuda_error_explanations_health(): @pytest.mark.skipif( os.environ.get("CUDA_PYTHON_SANITIZER_RUNNING", "0") == "1", reason=( - "The compute-sanitzer is running, and this test causes an API error " + "The compute-sanitizer is running, and this test causes an API error " "when the driver is too old to know about all of the error codes." ), ) diff --git a/cuda_core/tests/test_event.py b/cuda_core/tests/test_event.py index 380aef30a..bb01a5979 100644 --- a/cuda_core/tests/test_event.py +++ b/cuda_core/tests/test_event.py @@ -25,7 +25,7 @@ def test_event_init_disabled(): [ True, ] - # The compute-sanitzer is running, and this test intentionally causes an API error. + # The compute-sanitizer is running, and this test intentionally causes an API error. + ([False, None] if os.environ.get("CUDA_PYTHON_SANITIZER_RUNNING", "0") != "1" else []), ) def test_timing(init_cuda, enable_timing): diff --git a/cuda_core/tests/test_linker.py b/cuda_core/tests/test_linker.py index aab2dc5dc..12d63f46e 100644 --- a/cuda_core/tests/test_linker.py +++ b/cuda_core/tests/test_linker.py @@ -143,7 +143,7 @@ def test_linker_link_invalid_target_type(compile_ptx_functions): @pytest.mark.skipif( is_culink_backend and os.environ.get("CUDA_PYTHON_SANITIZER_RUNNING", "0") == "1", - reason="The compute-sanitzer is running, and this test causes an API error using the culink API.", + reason="The compute-sanitizer is running, and this test causes an API error using the culink API.", ) def test_linker_get_error_log(compile_ptx_functions): options = LinkerOptions(arch=ARCH) From 2bfd402b6b116901bd2fa2c367ba796079c0126a Mon Sep 17 00:00:00 2001 From: Daniel Ching Date: Mon, 28 Apr 2025 11:09:29 -0500 Subject: [PATCH 18/22] TST: Define new test skip in conftest instead of copy-paste --- cuda_bindings/tests/conftest.py | 8 +++++++ cuda_bindings/tests/test_cuda.py | 37 +++++++----------------------- cuda_bindings/tests/test_cudart.py | 7 ++---- cuda_core/tests/conftest.py | 6 +++++ cuda_core/tests/test_cuda_utils.py | 11 +++------ cuda_core/tests/test_linker.py | 8 +++---- 6 files changed, 30 insertions(+), 47 deletions(-) create mode 100644 cuda_bindings/tests/conftest.py diff --git a/cuda_bindings/tests/conftest.py b/cuda_bindings/tests/conftest.py new file mode 100644 index 000000000..be0136aa5 --- /dev/null +++ b/cuda_bindings/tests/conftest.py @@ -0,0 +1,8 @@ +import os + +import pytest + +skipif_compute_sanitizer_is_running = pytest.mark.skipif( + os.environ.get("CUDA_PYTHON_SANITIZER_RUNNING", "0") == "1", + reason="The compute-sanitizer is running, and this test causes an API error.", +) diff --git a/cuda_bindings/tests/test_cuda.py b/cuda_bindings/tests/test_cuda.py index 613a81881..a543b32c1 100644 --- a/cuda_bindings/tests/test_cuda.py +++ b/cuda_bindings/tests/test_cuda.py @@ -5,13 +5,13 @@ # this software. Any use, reproduction, disclosure, or distribution of # this software and related documentation outside the terms of the EULA # is strictly prohibited. -import os import platform import shutil import textwrap import numpy as np import pytest +from conftest import skipif_compute_sanitizer_is_running import cuda.cuda as cuda import cuda.cudart as cudart @@ -84,10 +84,7 @@ def test_cuda_memcpy(): assert err == cuda.CUresult.CUDA_SUCCESS -@pytest.mark.skipif( - os.environ.get("CUDA_PYTHON_SANITIZER_RUNNING", "0") == "1", - reason="The compute-sanitizer is running, and this test intentionally causes an API error.", -) +@skipif_compute_sanitizer_is_running def test_cuda_array(): (err,) = cuda.cuInit(0) assert err == cuda.CUresult.CUDA_SUCCESS @@ -241,10 +238,7 @@ def test_cuda_uuid_list_access(): assert err == cuda.CUresult.CUDA_SUCCESS -@pytest.mark.skipif( - os.environ.get("CUDA_PYTHON_SANITIZER_RUNNING", "0") == "1", - reason="FIXME: This test causes an API error.", -) +@skipif_compute_sanitizer_is_running def test_cuda_cuModuleLoadDataEx(): (err,) = cuda.cuInit(0) assert err == cuda.CUresult.CUDA_SUCCESS @@ -632,10 +626,7 @@ def test_cuda_coredump_attr(): assert err == cuda.CUresult.CUDA_SUCCESS -@pytest.mark.skipif( - os.environ.get("CUDA_PYTHON_SANITIZER_RUNNING", "0") == "1", - reason="The compute-sanitizer is running, and this test intentionally causes an API error.", -) +@skipif_compute_sanitizer_is_running def test_get_error_name_and_string(): (err,) = cuda.cuInit(0) assert err == cuda.CUresult.CUDA_SUCCESS @@ -964,10 +955,7 @@ def test_CUmemDecompressParams_st(): assert int(desc.dstActBytes) == 0 -@pytest.mark.skipif( - os.environ.get("CUDA_PYTHON_SANITIZER_RUNNING", "0") == "1", - reason="The compute-sanitizer is running, and this test intentionally causes an API error.", -) +@skipif_compute_sanitizer_is_running def test_all_CUresult_codes(): max_code = int(max(cuda.CUresult)) # Smoke test. CUDA_ERROR_UNKNOWN = 999, but intentionally using literal value. @@ -1000,30 +988,21 @@ def test_all_CUresult_codes(): assert num_good >= 76 # CTK 11.0.3_450.51.06 -@pytest.mark.skipif( - os.environ.get("CUDA_PYTHON_SANITIZER_RUNNING", "0") == "1", - reason="The compute-sanitizer is running, and this test intentionally causes an API error.", -) +@skipif_compute_sanitizer_is_running def test_cuKernelGetName_failure(): err, name = cuda.cuKernelGetName(0) assert err == cuda.CUresult.CUDA_ERROR_INVALID_VALUE assert name is None -@pytest.mark.skipif( - os.environ.get("CUDA_PYTHON_SANITIZER_RUNNING", "0") == "1", - reason="The compute-sanitizer is running, and this test intentionally causes an API error.", -) +@skipif_compute_sanitizer_is_running def test_cuFuncGetName_failure(): err, name = cuda.cuFuncGetName(0) assert err == cuda.CUresult.CUDA_ERROR_INVALID_VALUE assert name is None -@pytest.mark.skipif( - os.environ.get("CUDA_PYTHON_SANITIZER_RUNNING", "0") == "1", - reason="The compute-sanitizer is running, and this test intentionally causes an API error.", -) +@skipif_compute_sanitizer_is_running @pytest.mark.skipif( driverVersionLessThan(12080) or not supportsCudaAPI("cuCheckpointProcessGetState"), reason="When API was introduced", diff --git a/cuda_bindings/tests/test_cudart.py b/cuda_bindings/tests/test_cudart.py index 41bed1f54..ae4a936a0 100644 --- a/cuda_bindings/tests/test_cudart.py +++ b/cuda_bindings/tests/test_cudart.py @@ -7,10 +7,10 @@ # is strictly prohibited. import ctypes import math -import os import numpy as np import pytest +from conftest import skipif_compute_sanitizer_is_running import cuda.cuda as cuda import cuda.cudart as cudart @@ -71,10 +71,7 @@ def test_cudart_memcpy(): assertSuccess(err) -@pytest.mark.skipif( - os.environ.get("CUDA_PYTHON_SANITIZER_RUNNING", "0") == "1", - reason="The compute-sanitizer is running, and this test intentionally causes an API error.", -) +@skipif_compute_sanitizer_is_running def test_cudart_hostRegister(): # Use hostRegister API to check for correct enum return values page_size = 80 diff --git a/cuda_core/tests/conftest.py b/cuda_core/tests/conftest.py index 889372417..a5c089c1f 100644 --- a/cuda_core/tests/conftest.py +++ b/cuda_core/tests/conftest.py @@ -64,3 +64,9 @@ def clean_up_cffi_files(): os.remove(f) except FileNotFoundError: pass # noqa: SIM105 + + +skipif_compute_sanitizer_is_running = pytest.mark.skipif( + os.environ.get("CUDA_PYTHON_SANITIZER_RUNNING", "0") == "1", + reason="The compute-sanitizer is running, and this test causes an API error.", +) diff --git a/cuda_core/tests/test_cuda_utils.py b/cuda_core/tests/test_cuda_utils.py index 097ead8f6..6f945cc33 100644 --- a/cuda_core/tests/test_cuda_utils.py +++ b/cuda_core/tests/test_cuda_utils.py @@ -1,9 +1,9 @@ # Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. ALL RIGHTS RESERVED. # # SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE -import os import pytest +from conftest import skipif_compute_sanitizer_is_running from cuda.bindings import driver, runtime from cuda.core.experimental._utils import cuda_utils @@ -41,13 +41,8 @@ def test_runtime_cuda_error_explanations_health(): assert not extra_expl -@pytest.mark.skipif( - os.environ.get("CUDA_PYTHON_SANITIZER_RUNNING", "0") == "1", - reason=( - "The compute-sanitizer is running, and this test causes an API error " - "when the driver is too old to know about all of the error codes." - ), -) +# this test causes an API error when the driver is too old to know about all of the error codes +@skipif_compute_sanitizer_is_running def test_check_driver_error(): num_unexpected = 0 for error in driver.CUresult: diff --git a/cuda_core/tests/test_linker.py b/cuda_core/tests/test_linker.py index 12d63f46e..40ef4b69f 100644 --- a/cuda_core/tests/test_linker.py +++ b/cuda_core/tests/test_linker.py @@ -1,9 +1,9 @@ # Copyright (c) 2024-2025, NVIDIA CORPORATION & AFFILIATES. ALL RIGHTS RESERVED. # # SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE -import os import pytest +from conftest import skipif_compute_sanitizer_is_running from cuda.core.experimental import Device, Linker, LinkerOptions, Program, ProgramOptions, _linker from cuda.core.experimental._module import ObjectCode @@ -141,10 +141,8 @@ def test_linker_link_invalid_target_type(compile_ptx_functions): linker.link("invalid_target") -@pytest.mark.skipif( - is_culink_backend and os.environ.get("CUDA_PYTHON_SANITIZER_RUNNING", "0") == "1", - reason="The compute-sanitizer is running, and this test causes an API error using the culink API.", -) +# this test causes an API error when using the culink API +@skipif_compute_sanitizer_is_running def test_linker_get_error_log(compile_ptx_functions): options = LinkerOptions(arch=ARCH) From 02559a33aa3e21ced8ff2c3f2717e0f2a0008444 Mon Sep 17 00:00:00 2001 From: Daniel Ching Date: Mon, 28 Apr 2025 11:23:53 -0500 Subject: [PATCH 19/22] TST: Cleanup post-merge --- cuda_core/tests/test_event.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/cuda_core/tests/test_event.py b/cuda_core/tests/test_event.py index 4895c0a67..6f6240642 100644 --- a/cuda_core/tests/test_event.py +++ b/cuda_core/tests/test_event.py @@ -12,6 +12,7 @@ import numpy as np import pytest +from conftest import skipif_compute_sanitizer_is_running import cuda.core.experimental from cuda.core.experimental import Device, EventOptions, LaunchConfig, Program, ProgramOptions, launch @@ -75,6 +76,7 @@ def test_is_done(init_cuda): assert event.is_done in (True, False) +@skipif_compute_sanitizer_is_running def test_error_timing_disabled(): device = Device() device.set_current() @@ -97,6 +99,7 @@ def test_error_timing_disabled(): event2 - event1 +@skipif_compute_sanitizer_is_running def test_error_timing_recorded(): device = Device() device.set_current() From 675c41fb77e6311f7765b7c7f48533aadacd7c59 Mon Sep 17 00:00:00 2001 From: Daniel Ching Date: Mon, 28 Apr 2025 11:27:07 -0500 Subject: [PATCH 20/22] CI: Remove COMPUTE_SANITIZER_VERSION variable --- .github/workflows/test-wheel-linux.yml | 2 -- 1 file changed, 2 deletions(-) diff --git a/.github/workflows/test-wheel-linux.yml b/.github/workflows/test-wheel-linux.yml index 58649202b..a86b5e35a 100644 --- a/.github/workflows/test-wheel-linux.yml +++ b/.github/workflows/test-wheel-linux.yml @@ -194,10 +194,8 @@ jobs: fi echo "CUDA_PYTHON_SANITIZER_RUNNING=1" >> $GITHUB_ENV else - COMPUTE_SANITIZER_VERSION="None" SANITIZER_CMD="" fi - echo "COMPUTE_SANITIZER_VERSION=${COMPUTE_SANITIZER_VERSION}" >> $GITHUB_ENV echo "SANITIZER_CMD=${SANITIZER_CMD}" >> $GITHUB_ENV - name: Run cuda.bindings tests From 5abd6e3c86a006dc7d29c7925d1b8994f0c11d74 Mon Sep 17 00:00:00 2001 From: Daniel Ching Date: Mon, 28 Apr 2025 12:41:37 -0500 Subject: [PATCH 21/22] TST: Skip another post-merge error --- cuda_core/tests/test_event.py | 1 + 1 file changed, 1 insertion(+) diff --git a/cuda_core/tests/test_event.py b/cuda_core/tests/test_event.py index 6f6240642..7e79ec934 100644 --- a/cuda_core/tests/test_event.py +++ b/cuda_core/tests/test_event.py @@ -120,6 +120,7 @@ def test_error_timing_recorded(): # TODO: improve this once path finder can find headers +@skipif_compute_sanitizer_is_running @pytest.mark.skipif(os.environ.get("CUDA_PATH") is None, reason="need libcu++ header") @pytest.mark.skipif(tuple(int(i) for i in np.__version__.split(".")[:2]) < (2, 1), reason="need numpy 2.1.0+") def test_error_timing_incomplete(): From 53a01cb14c8869eca5ec97a6f0c29bb3a3186b07 Mon Sep 17 00:00:00 2001 From: Daniel Ching Date: Mon, 28 Apr 2025 15:57:48 -0500 Subject: [PATCH 22/22] TST: Use consistent name for environment variable and test skip function --- .github/workflows/test-wheel-linux.yml | 2 +- .../docs/source/environment_variables.md | 2 +- cuda_bindings/tests/conftest.py | 4 ++-- cuda_bindings/tests/test_cuda.py | 16 ++++++++-------- cuda_bindings/tests/test_cudart.py | 4 ++-- cuda_core/tests/conftest.py | 4 ++-- cuda_core/tests/test_cuda_utils.py | 4 ++-- cuda_core/tests/test_event.py | 8 ++++---- cuda_core/tests/test_linker.py | 4 ++-- 9 files changed, 24 insertions(+), 24 deletions(-) diff --git a/.github/workflows/test-wheel-linux.yml b/.github/workflows/test-wheel-linux.yml index a86b5e35a..0fd694ee5 100644 --- a/.github/workflows/test-wheel-linux.yml +++ b/.github/workflows/test-wheel-linux.yml @@ -192,7 +192,7 @@ jobs: if [[ "$COMPUTE_SANITIZER_VERSION" -ge 202111 ]]; then SANITIZER_CMD="${SANITIZER_CMD} --padding=32" fi - echo "CUDA_PYTHON_SANITIZER_RUNNING=1" >> $GITHUB_ENV + echo "CUDA_PYTHON_TESTING_WITH_COMPUTE_SANITIZER=1" >> $GITHUB_ENV else SANITIZER_CMD="" fi diff --git a/cuda_bindings/docs/source/environment_variables.md b/cuda_bindings/docs/source/environment_variables.md index 58f84996d..67b52b4dd 100644 --- a/cuda_bindings/docs/source/environment_variables.md +++ b/cuda_bindings/docs/source/environment_variables.md @@ -15,4 +15,4 @@ ## Test-Time Environment Variables -- `CUDA_PYTHON_SANITIZER_RUNNING` : When set to 1, tests are skipped that would cause [compute-sanitizer](https://docs.nvidia.com/compute-sanitizer/ComputeSanitizer/index.html) to raise an error. +- `CUDA_PYTHON_TESTING_WITH_COMPUTE_SANITIZER` : When set to 1, tests are skipped that would cause [compute-sanitizer](https://docs.nvidia.com/compute-sanitizer/ComputeSanitizer/index.html) to raise an error. diff --git a/cuda_bindings/tests/conftest.py b/cuda_bindings/tests/conftest.py index be0136aa5..45767fb78 100644 --- a/cuda_bindings/tests/conftest.py +++ b/cuda_bindings/tests/conftest.py @@ -2,7 +2,7 @@ import pytest -skipif_compute_sanitizer_is_running = pytest.mark.skipif( - os.environ.get("CUDA_PYTHON_SANITIZER_RUNNING", "0") == "1", +skipif_testing_with_compute_sanitizer = pytest.mark.skipif( + os.environ.get("CUDA_PYTHON_TESTING_WITH_COMPUTE_SANITIZER", "0") == "1", reason="The compute-sanitizer is running, and this test causes an API error.", ) diff --git a/cuda_bindings/tests/test_cuda.py b/cuda_bindings/tests/test_cuda.py index 835f4ef44..612401f3e 100644 --- a/cuda_bindings/tests/test_cuda.py +++ b/cuda_bindings/tests/test_cuda.py @@ -11,7 +11,7 @@ import numpy as np import pytest -from conftest import skipif_compute_sanitizer_is_running +from conftest import skipif_testing_with_compute_sanitizer import cuda.cuda as cuda import cuda.cudart as cudart @@ -84,7 +84,7 @@ def test_cuda_memcpy(): assert err == cuda.CUresult.CUDA_SUCCESS -@skipif_compute_sanitizer_is_running +@skipif_testing_with_compute_sanitizer def test_cuda_array(): (err,) = cuda.cuInit(0) assert err == cuda.CUresult.CUDA_SUCCESS @@ -238,7 +238,7 @@ def test_cuda_uuid_list_access(): assert err == cuda.CUresult.CUDA_SUCCESS -@skipif_compute_sanitizer_is_running +@skipif_testing_with_compute_sanitizer def test_cuda_cuModuleLoadDataEx(): (err,) = cuda.cuInit(0) assert err == cuda.CUresult.CUDA_SUCCESS @@ -626,7 +626,7 @@ def test_cuda_coredump_attr(): assert err == cuda.CUresult.CUDA_SUCCESS -@skipif_compute_sanitizer_is_running +@skipif_testing_with_compute_sanitizer def test_get_error_name_and_string(): (err,) = cuda.cuInit(0) assert err == cuda.CUresult.CUDA_SUCCESS @@ -956,7 +956,7 @@ def test_CUmemDecompressParams_st(): assert int(desc.dstActBytes) == 0 -@skipif_compute_sanitizer_is_running +@skipif_testing_with_compute_sanitizer def test_all_CUresult_codes(): max_code = int(max(cuda.CUresult)) # Smoke test. CUDA_ERROR_UNKNOWN = 999, but intentionally using literal value. @@ -989,21 +989,21 @@ def test_all_CUresult_codes(): assert num_good >= 76 # CTK 11.0.3_450.51.06 -@skipif_compute_sanitizer_is_running +@skipif_testing_with_compute_sanitizer def test_cuKernelGetName_failure(): err, name = cuda.cuKernelGetName(0) assert err == cuda.CUresult.CUDA_ERROR_INVALID_VALUE assert name is None -@skipif_compute_sanitizer_is_running +@skipif_testing_with_compute_sanitizer def test_cuFuncGetName_failure(): err, name = cuda.cuFuncGetName(0) assert err == cuda.CUresult.CUDA_ERROR_INVALID_VALUE assert name is None -@skipif_compute_sanitizer_is_running +@skipif_testing_with_compute_sanitizer @pytest.mark.skipif( driverVersionLessThan(12080) or not supportsCudaAPI("cuCheckpointProcessGetState"), reason="When API was introduced", diff --git a/cuda_bindings/tests/test_cudart.py b/cuda_bindings/tests/test_cudart.py index ae4a936a0..f7eb1abb9 100644 --- a/cuda_bindings/tests/test_cudart.py +++ b/cuda_bindings/tests/test_cudart.py @@ -10,7 +10,7 @@ import numpy as np import pytest -from conftest import skipif_compute_sanitizer_is_running +from conftest import skipif_testing_with_compute_sanitizer import cuda.cuda as cuda import cuda.cudart as cudart @@ -71,7 +71,7 @@ def test_cudart_memcpy(): assertSuccess(err) -@skipif_compute_sanitizer_is_running +@skipif_testing_with_compute_sanitizer def test_cudart_hostRegister(): # Use hostRegister API to check for correct enum return values page_size = 80 diff --git a/cuda_core/tests/conftest.py b/cuda_core/tests/conftest.py index a5c089c1f..23bb0274e 100644 --- a/cuda_core/tests/conftest.py +++ b/cuda_core/tests/conftest.py @@ -66,7 +66,7 @@ def clean_up_cffi_files(): pass # noqa: SIM105 -skipif_compute_sanitizer_is_running = pytest.mark.skipif( - os.environ.get("CUDA_PYTHON_SANITIZER_RUNNING", "0") == "1", +skipif_testing_with_compute_sanitizer = pytest.mark.skipif( + os.environ.get("CUDA_PYTHON_TESTING_WITH_COMPUTE_SANITIZER", "0") == "1", reason="The compute-sanitizer is running, and this test causes an API error.", ) diff --git a/cuda_core/tests/test_cuda_utils.py b/cuda_core/tests/test_cuda_utils.py index 6f945cc33..e96d904eb 100644 --- a/cuda_core/tests/test_cuda_utils.py +++ b/cuda_core/tests/test_cuda_utils.py @@ -3,7 +3,7 @@ # SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE import pytest -from conftest import skipif_compute_sanitizer_is_running +from conftest import skipif_testing_with_compute_sanitizer from cuda.bindings import driver, runtime from cuda.core.experimental._utils import cuda_utils @@ -42,7 +42,7 @@ def test_runtime_cuda_error_explanations_health(): # this test causes an API error when the driver is too old to know about all of the error codes -@skipif_compute_sanitizer_is_running +@skipif_testing_with_compute_sanitizer def test_check_driver_error(): num_unexpected = 0 for error in driver.CUresult: diff --git a/cuda_core/tests/test_event.py b/cuda_core/tests/test_event.py index 7e79ec934..f568ecdba 100644 --- a/cuda_core/tests/test_event.py +++ b/cuda_core/tests/test_event.py @@ -12,7 +12,7 @@ import numpy as np import pytest -from conftest import skipif_compute_sanitizer_is_running +from conftest import skipif_testing_with_compute_sanitizer import cuda.core.experimental from cuda.core.experimental import Device, EventOptions, LaunchConfig, Program, ProgramOptions, launch @@ -76,7 +76,7 @@ def test_is_done(init_cuda): assert event.is_done in (True, False) -@skipif_compute_sanitizer_is_running +@skipif_testing_with_compute_sanitizer def test_error_timing_disabled(): device = Device() device.set_current() @@ -99,7 +99,7 @@ def test_error_timing_disabled(): event2 - event1 -@skipif_compute_sanitizer_is_running +@skipif_testing_with_compute_sanitizer def test_error_timing_recorded(): device = Device() device.set_current() @@ -120,7 +120,7 @@ def test_error_timing_recorded(): # TODO: improve this once path finder can find headers -@skipif_compute_sanitizer_is_running +@skipif_testing_with_compute_sanitizer @pytest.mark.skipif(os.environ.get("CUDA_PATH") is None, reason="need libcu++ header") @pytest.mark.skipif(tuple(int(i) for i in np.__version__.split(".")[:2]) < (2, 1), reason="need numpy 2.1.0+") def test_error_timing_incomplete(): diff --git a/cuda_core/tests/test_linker.py b/cuda_core/tests/test_linker.py index 40ef4b69f..f15e98a42 100644 --- a/cuda_core/tests/test_linker.py +++ b/cuda_core/tests/test_linker.py @@ -3,7 +3,7 @@ # SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE import pytest -from conftest import skipif_compute_sanitizer_is_running +from conftest import skipif_testing_with_compute_sanitizer from cuda.core.experimental import Device, Linker, LinkerOptions, Program, ProgramOptions, _linker from cuda.core.experimental._module import ObjectCode @@ -142,7 +142,7 @@ def test_linker_link_invalid_target_type(compile_ptx_functions): # this test causes an API error when using the culink API -@skipif_compute_sanitizer_is_running +@skipif_testing_with_compute_sanitizer def test_linker_get_error_log(compile_ptx_functions): options = LinkerOptions(arch=ARCH)