From c626b956bc7ca1cc963b89bceafc3dfc3b0f84aa Mon Sep 17 00:00:00 2001 From: sandeepd-nv Date: Mon, 23 Sep 2024 18:03:30 +0530 Subject: [PATCH 01/56] Adding support for CI testing. --- .github/actions/build/action.yml | 2 +- .github/actions/test/action.yml | 37 ++++++++++ .github/workflows/gh-build-and-test.yml | 23 ++++++- .github/workflows/gh-test.yml | 80 ++++++++++++++++++++++ continuous_integration/scripts/setup-utils | 23 +++++++ continuous_integration/scripts/test | 33 +++++++++ 6 files changed, 195 insertions(+), 3 deletions(-) create mode 100644 .github/actions/test/action.yml create mode 100644 .github/workflows/gh-test.yml create mode 100755 continuous_integration/scripts/test diff --git a/.github/actions/build/action.yml b/.github/actions/build/action.yml index 952fb9cd..b6741343 100644 --- a/.github/actions/build/action.yml +++ b/.github/actions/build/action.yml @@ -54,7 +54,7 @@ runs: --rm "${{ inputs.docker-image }}" \ /bin/bash -c "${{ env.REPO_DIR }}/continuous_integration/scripts/entrypoint ${{ env.REPO_DIR }}/continuous_integration/scripts/build ${{ inputs.build-type}} ${{ inputs.target-device }}" - - if: ${{ !inputs.use-container }} + - if: ${{ !inputs.use-container && steps.cache-build.outputs.cache-hit != 'true'}} name: Build (without container) shell: bash --noprofile --norc -xeuo pipefail {0} run: | diff --git a/.github/actions/test/action.yml b/.github/actions/test/action.yml new file mode 100644 index 00000000..a11a9938 --- /dev/null +++ b/.github/actions/test/action.yml @@ -0,0 +1,37 @@ +name: test + +description: Run tests in specified project + +inputs: + test-options: + required: true + type: string + runner-has-gpu: + required: true + type: boolean + description: "The runner has GPU(s)." + +runs: + using: composite + steps: + - if: ${{ inputs.runner-has-gpu == true }} + name: Run nvidia-smi to make sure GPU is working + shell: bash --noprofile --norc -xeuo pipefail {0} + run: nvidia-smi + + - name: Download build artifacts + uses: actions/download-artifact@v4 + with: + name: ${{ env.ARTIFACT_NAME }} + path: ${{ env.ARTIFACTS_DIR }} + + - name: Display structure of downloaded artifacts + shell: bash --noprofile --norc -xeuo pipefail {0} + run: | + pwd + ls -lahR $ARTIFACTS_DIR + + - name: Run test / analysis + shell: bash --noprofile --norc -xeuo pipefail {0} + run: | + "${{ env.REPO_DIR }}/continuous_integration/scripts/entrypoint" "${{ env.REPO_DIR }}/continuous_integration/scripts/test" ${{ inputs.test-options }} diff --git a/.github/workflows/gh-build-and-test.yml b/.github/workflows/gh-build-and-test.yml index 430fbf5b..1df308ab 100644 --- a/.github/workflows/gh-build-and-test.yml +++ b/.github/workflows/gh-build-and-test.yml @@ -27,9 +27,28 @@ jobs: with: client-repo: ${{ github.event.repository.name }} target-device: ${{ inputs.target-device }} - runs-on: ${{ (inputs.host-platform == 'linux-x64' && 'linux-amd64-cpu16') || (inputs.host-platform == 'linux-aarch64' && 'linux-arm64-cpu16') || (inputs.host-platform == 'mac' && 'macos-latest') }} + runs-on: ${{ (inputs.host-platform == 'linux-x64' && 'linux-amd64-cpu8') || (inputs.host-platform == 'linux-aarch64' && 'linux-arm64-cpu16') || (inputs.host-platform == 'mac' && 'macos-latest') }} + build-type: ${{ inputs.build-type }} + use-container: false + host-platform: ${{ inputs.host-platform }} + dependencies-file: "" + build-mode: ${{ inputs.build-mode }} + upload-enabled: ${{ inputs.upload-enabled }} + secrets: inherit + + test: + if: ${{ github.repository_owner == 'nvidia' }} + needs: + - build + uses: + ./.github/workflows/gh-test.yml + with: + client-repo: ${{ github.event.repository.name }} + target-device: ${{ inputs.target-device }} + test-options: ${{ inputs.build-type }} + runs-on: ${{ (inputs.host-platform == 'linux-x64' && 'linux-amd64-gpu-v100-latest-1') || (inputs.host-platform == 'linux-aarch64' && 'linux-arm64-cpu16') || (inputs.host-platform == 'mac' && 'macos-latest') }} + runner-has-gpu: ${{ inputs.host-platform == 'linux-x64' }} build-type: ${{ inputs.build-type }} - use-container: ${{ inputs.host-platform == 'linux-x64' || inputs.host-platform == 'linux-aarch64'}} host-platform: ${{ inputs.host-platform }} dependencies-file: "" build-mode: ${{ inputs.build-mode }} diff --git a/.github/workflows/gh-test.yml b/.github/workflows/gh-test.yml new file mode 100644 index 00000000..74f1c520 --- /dev/null +++ b/.github/workflows/gh-test.yml @@ -0,0 +1,80 @@ +name: Test + +on: + workflow_call: + inputs: + client-repo: + required: true + type: string + target-device: + required: true + type: string + test-options: + required: true + type: string + runs-on: + required: true + type: string + runner-has-gpu: + required: true + type: boolean + description: "The runner has GPU(s)." + build-type: + required: true + type: string + description: One of ci / release + host-platform: + required: true + type: string + dependencies-file: + required: true + type: string + description: path to versions.json relative to the target repo dir + build-mode: + required: true + type: string + upload-enabled: + required: true + type: boolean + python-version: + required: false + type: string + +jobs: + build: + name: Test (${{ inputs.host-platform }}, ${{ inputs.target-device }}, ${{ inputs.build-type }}, CMake build-mode=${{ inputs.build-mode }}, Python "${{ inputs.python-version }}", Use container=${{ inputs.use-container }} ) + + permissions: + id-token: write # This is required for configure-aws-credentials + contents: read # This is required for actions/checkout + + runs-on: ${{ inputs.runs-on }} + + container: + options: -u root --security-opt seccomp=unconfined --privileged --shm-size 16g + image: condaforge/miniforge3:latest + env: + NVIDIA_VISIBLE_DEVICES: ${{ env.NVIDIA_VISIBLE_DEVICES }} + + steps: + - name: Checkout ${{ inputs.client-repo }} + uses: actions/checkout@v4 + with: + fetch-depth: 0 + + - name: Setup + uses: ./.github/actions/setup + with: + client-repo: ${{ inputs.client-repo }} + build-type: ${{ inputs.build-type }} + target-device: "${{ inputs.target-device }}" + host-platform: ${{ inputs.host-platform }} + build-mode: ${{ inputs.build-mode }} + upload-enabled: ${{ inputs.upload-enabled }} + python-version: ${{ inputs.python-version }} + + - name: Call test action + uses: ./.github/actions/test + with: + test-options: ${{ inputs.test-options }} + runner-has-gpu: ${{ inputs.runner-has-gpu }} diff --git a/continuous_integration/scripts/setup-utils b/continuous_integration/scripts/setup-utils index 62579e63..f8faefa4 100755 --- a/continuous_integration/scripts/setup-utils +++ b/continuous_integration/scripts/setup-utils @@ -151,6 +151,29 @@ init_build_env() { make-conda-env "$BUILD_TYPE"; + activate_conda_env; + conda_info; +} + +init_test_env() { + set -x; + + . conda-utils; + + export TEST_TYPE=$1 + + set -xeuo pipefail; + + set_base_defs; + + cd "$PREBUILD_DIR" + + # setup_test_env; + + cd "$REPO_DIR"; + + make-conda-env "$TEST_TYPE"; + activate_conda_env; conda_info; } \ No newline at end of file diff --git a/continuous_integration/scripts/test b/continuous_integration/scripts/test new file mode 100755 index 00000000..e8c56c52 --- /dev/null +++ b/continuous_integration/scripts/test @@ -0,0 +1,33 @@ +#!/usr/bin/env bash + +test_ci() { + set -xeou pipefail + + cd "${ARTIFACTS_DIR}" + + activate_conda_env; + + pip install *.whl + + cd "${REPO_DIR}" + + python -m pytest +} + +test_project() { + set -xeou pipefail + + export PYTHONUNBUFFERED=1 + + . setup-utils; + init_test_env "$@"; + + git config --global --add safe.directory "$REPO_DIR/.git" + + case "${TEST_TYPE}" in + ci) test_ci;; + *) return 1;; + esac +} + +(test_project "$@"); From 5467b5284c3467f8f3a41570a7df108049389f42 Mon Sep 17 00:00:00 2001 From: sandeepd-nv Date: Wed, 27 Nov 2024 04:18:20 +0530 Subject: [PATCH 02/56] Supply python-version. --- .github/workflows/gh-build-and-test.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/gh-build-and-test.yml b/.github/workflows/gh-build-and-test.yml index 1df308ab..adf8477a 100644 --- a/.github/workflows/gh-build-and-test.yml +++ b/.github/workflows/gh-build-and-test.yml @@ -34,6 +34,7 @@ jobs: dependencies-file: "" build-mode: ${{ inputs.build-mode }} upload-enabled: ${{ inputs.upload-enabled }} + python-version: ${{ inputs.python-version }} secrets: inherit test: From c78ebfdcaa92ea40ade3014a3da952e5a11dc8e6 Mon Sep 17 00:00:00 2001 From: sandeepd-nv Date: Wed, 27 Nov 2024 04:36:17 +0530 Subject: [PATCH 03/56] Update test driver to test bindings and core separately. --- .github/actions/test/action.yml | 22 +++++++++++++++++----- continuous_integration/scripts/test | 13 +++++++++---- 2 files changed, 26 insertions(+), 9 deletions(-) diff --git a/.github/actions/test/action.yml b/.github/actions/test/action.yml index a11a9938..018db9aa 100644 --- a/.github/actions/test/action.yml +++ b/.github/actions/test/action.yml @@ -19,17 +19,29 @@ runs: shell: bash --noprofile --norc -xeuo pipefail {0} run: nvidia-smi - - name: Download build artifacts + - name: Download bindings build artifacts uses: actions/download-artifact@v4 with: - name: ${{ env.ARTIFACT_NAME }} - path: ${{ env.ARTIFACTS_DIR }} + name: ${{ env.BINDINGS_ARTIFACT_NAME }} + path: ${{ env.BINDINGS_ARTIFACTS_DIR }} - - name: Display structure of downloaded artifacts + - name: Display structure of downloaded bindings artifacts shell: bash --noprofile --norc -xeuo pipefail {0} run: | pwd - ls -lahR $ARTIFACTS_DIR + ls -lahR $BINDINGS_ARTIFACTS_DIR + + - name: Download core build artifacts + uses: actions/download-artifact@v4 + with: + name: ${{ env.CORE_ARTIFACT_NAME }} + path: ${{ env.CORE_ARTIFACTS_DIR }} + + - name: Display structure of downloaded core build artifacts + shell: bash --noprofile --norc -xeuo pipefail {0} + run: | + pwd + ls -lahR $CORE_ARTIFACTS_DIR - name: Run test / analysis shell: bash --noprofile --norc -xeuo pipefail {0} diff --git a/continuous_integration/scripts/test b/continuous_integration/scripts/test index e8c56c52..96bdf8d5 100755 --- a/continuous_integration/scripts/test +++ b/continuous_integration/scripts/test @@ -3,15 +3,20 @@ test_ci() { set -xeou pipefail - cd "${ARTIFACTS_DIR}" - activate_conda_env; + cd "${BINDINGS_ARTIFACTS_DIR}" + pip install *.whl + + cd "${CORE_ARTIFACTS_DIR}" pip install *.whl - cd "${REPO_DIR}" + cd "${REPO_DIR}/cuda_python/cuda_bindings" + python -m pytest tests/ + + cd "${REPO_DIR}/cuda_python/cuda_core" + python -m pytest tests/ - python -m pytest } test_project() { From e5bf104ddf6ec94ec36ed74b68d148003fe8b6da Mon Sep 17 00:00:00 2001 From: sandeepd-nv Date: Mon, 23 Sep 2024 18:03:30 +0530 Subject: [PATCH 04/56] Adding support for CI testing. --- .github/workflows/gh-build-and-test.yml | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/.github/workflows/gh-build-and-test.yml b/.github/workflows/gh-build-and-test.yml index adf8477a..acf7e509 100644 --- a/.github/workflows/gh-build-and-test.yml +++ b/.github/workflows/gh-build-and-test.yml @@ -56,3 +56,21 @@ jobs: upload-enabled: ${{ inputs.upload-enabled }} python-version: ${{ inputs.python-version }} secrets: inherit + + test: + if: ${{ github.repository_owner == 'nvidia' }} + uses: + ./.github/workflows/gh-build.yml + with: + client-repo: ${{ github.event.repository.name }} + target-device: ${{ inputs.target-device }} + test-options: ${{ inputs.build-type }} + runs-on: ${{ (inputs.host-platform == 'linux-x64' && 'linux-amd64-gpu-v100-latest-1') || (inputs.host-platform == 'linux-aarch64' && 'linux-arm64-cpu16') || (inputs.host-platform == 'mac' && 'macos-latest') }} + runner-has-gpu: ${{ inputs.host-platform == 'linux-x64' }} + build-type: ${{ inputs.build-type }} + use-container: false + host-platform: ${{ inputs.host-platform }} + dependencies-file: "" + build-mode: ${{ inputs.build-mode }} + upload-enabled: ${{ inputs.upload-enabled }} + secrets: inherit From 360e1b2d23f064eec19e3cd0c87d5bd823a41901 Mon Sep 17 00:00:00 2001 From: sandeepd-nv Date: Mon, 23 Sep 2024 18:05:07 +0530 Subject: [PATCH 05/56] Adding support for CI testing. Attempt 2. --- .github/workflows/gh-build-and-test.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/gh-build-and-test.yml b/.github/workflows/gh-build-and-test.yml index acf7e509..1d9bb4ea 100644 --- a/.github/workflows/gh-build-and-test.yml +++ b/.github/workflows/gh-build-and-test.yml @@ -60,7 +60,7 @@ jobs: test: if: ${{ github.repository_owner == 'nvidia' }} uses: - ./.github/workflows/gh-build.yml + ./.github/workflows/gh-test.yml with: client-repo: ${{ github.event.repository.name }} target-device: ${{ inputs.target-device }} From 67b7aed7ad1efbbbf017c221a5ef8223bed0c032 Mon Sep 17 00:00:00 2001 From: sandeepd-nv Date: Mon, 23 Sep 2024 18:06:10 +0530 Subject: [PATCH 06/56] Adding support for CI testing. Attempt 3. --- .github/workflows/gh-build-and-test.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/gh-build-and-test.yml b/.github/workflows/gh-build-and-test.yml index 1d9bb4ea..65a4a72a 100644 --- a/.github/workflows/gh-build-and-test.yml +++ b/.github/workflows/gh-build-and-test.yml @@ -59,6 +59,8 @@ jobs: test: if: ${{ github.repository_owner == 'nvidia' }} + needs: + - build uses: ./.github/workflows/gh-test.yml with: From 6fab977584c4f4a5a5cf2f1f1cef3719fe8ed4d5 Mon Sep 17 00:00:00 2001 From: sandeepd-nv Date: Mon, 23 Sep 2024 18:15:07 +0530 Subject: [PATCH 07/56] Use container for tests on the GPU runner. --- .github/workflows/gh-build-and-test.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/gh-build-and-test.yml b/.github/workflows/gh-build-and-test.yml index 65a4a72a..167a5546 100644 --- a/.github/workflows/gh-build-and-test.yml +++ b/.github/workflows/gh-build-and-test.yml @@ -70,7 +70,7 @@ jobs: runs-on: ${{ (inputs.host-platform == 'linux-x64' && 'linux-amd64-gpu-v100-latest-1') || (inputs.host-platform == 'linux-aarch64' && 'linux-arm64-cpu16') || (inputs.host-platform == 'mac' && 'macos-latest') }} runner-has-gpu: ${{ inputs.host-platform == 'linux-x64' }} build-type: ${{ inputs.build-type }} - use-container: false + use-container: ${{ inputs.host-platform == 'linux-x64' }} host-platform: ${{ inputs.host-platform }} dependencies-file: "" build-mode: ${{ inputs.build-mode }} From f2a0939aadf87d92c59e84584ded24f6a77b077a Mon Sep 17 00:00:00 2001 From: sandeepd-nv Date: Mon, 23 Sep 2024 18:27:47 +0530 Subject: [PATCH 08/56] Use container for tests on the GPU runner. Attempt 2. --- .github/workflows/gh-build-and-test.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/.github/workflows/gh-build-and-test.yml b/.github/workflows/gh-build-and-test.yml index 167a5546..185ede7a 100644 --- a/.github/workflows/gh-build-and-test.yml +++ b/.github/workflows/gh-build-and-test.yml @@ -70,7 +70,6 @@ jobs: runs-on: ${{ (inputs.host-platform == 'linux-x64' && 'linux-amd64-gpu-v100-latest-1') || (inputs.host-platform == 'linux-aarch64' && 'linux-arm64-cpu16') || (inputs.host-platform == 'mac' && 'macos-latest') }} runner-has-gpu: ${{ inputs.host-platform == 'linux-x64' }} build-type: ${{ inputs.build-type }} - use-container: ${{ inputs.host-platform == 'linux-x64' }} host-platform: ${{ inputs.host-platform }} dependencies-file: "" build-mode: ${{ inputs.build-mode }} From 508a83c072b2ec46750d174926d32684a3207092 Mon Sep 17 00:00:00 2001 From: sandeepd-nv Date: Fri, 15 Nov 2024 20:23:05 +0530 Subject: [PATCH 09/56] Remove build caching. --- .github/actions/build/action.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/actions/build/action.yml b/.github/actions/build/action.yml index b6741343..952fb9cd 100644 --- a/.github/actions/build/action.yml +++ b/.github/actions/build/action.yml @@ -54,7 +54,7 @@ runs: --rm "${{ inputs.docker-image }}" \ /bin/bash -c "${{ env.REPO_DIR }}/continuous_integration/scripts/entrypoint ${{ env.REPO_DIR }}/continuous_integration/scripts/build ${{ inputs.build-type}} ${{ inputs.target-device }}" - - if: ${{ !inputs.use-container && steps.cache-build.outputs.cache-hit != 'true'}} + - if: ${{ !inputs.use-container }} name: Build (without container) shell: bash --noprofile --norc -xeuo pipefail {0} run: | From 72062aa1ef77f6c76e6f6be8ac1bfa480d9abe4b Mon Sep 17 00:00:00 2001 From: sandeepd-nv Date: Fri, 15 Nov 2024 20:32:58 +0530 Subject: [PATCH 10/56] Hard select Build (without container). --- .github/actions/build/action.yml | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/.github/actions/build/action.yml b/.github/actions/build/action.yml index 952fb9cd..583f5775 100644 --- a/.github/actions/build/action.yml +++ b/.github/actions/build/action.yml @@ -54,10 +54,11 @@ runs: --rm "${{ inputs.docker-image }}" \ /bin/bash -c "${{ env.REPO_DIR }}/continuous_integration/scripts/entrypoint ${{ env.REPO_DIR }}/continuous_integration/scripts/build ${{ inputs.build-type}} ${{ inputs.target-device }}" - - if: ${{ !inputs.use-container }} - name: Build (without container) + #- if: ${{ inputs.use-container == false }} + - name: Build (without container) shell: bash --noprofile --norc -xeuo pipefail {0} run: | + echo "inputs.use-container=${{ inputs.use-container }}" "${{ env.REPO_DIR }}/continuous_integration/scripts/entrypoint" "${{ env.REPO_DIR }}/continuous_integration/scripts/build" "${{ inputs.build-type}}" "${{ inputs.target-device }}" - name: Display structure of the bindings artifacts folder (post build) From 32ca908e36e261008ad8cec93b094e0db3b5a8cd Mon Sep 17 00:00:00 2001 From: sandeepd-nv Date: Fri, 15 Nov 2024 20:41:03 +0530 Subject: [PATCH 11/56] Use container with preinstalled conda for build. --- .github/workflows/gh-build-and-test.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/gh-build-and-test.yml b/.github/workflows/gh-build-and-test.yml index 185ede7a..e6e40624 100644 --- a/.github/workflows/gh-build-and-test.yml +++ b/.github/workflows/gh-build-and-test.yml @@ -29,7 +29,7 @@ jobs: target-device: ${{ inputs.target-device }} runs-on: ${{ (inputs.host-platform == 'linux-x64' && 'linux-amd64-cpu8') || (inputs.host-platform == 'linux-aarch64' && 'linux-arm64-cpu16') || (inputs.host-platform == 'mac' && 'macos-latest') }} build-type: ${{ inputs.build-type }} - use-container: false + use-container: ${{ inputs.host-platform == 'linux-x64' || inputs.host-platform == 'linux-aarch64'}} host-platform: ${{ inputs.host-platform }} dependencies-file: "" build-mode: ${{ inputs.build-mode }} From 970a8e5c4b43be8c6331904993674487b039c3eb Mon Sep 17 00:00:00 2001 From: sandeepd-nv Date: Fri, 15 Nov 2024 20:42:04 +0530 Subject: [PATCH 12/56] Use container with preinstalled conda for build. Attempt 2. --- .github/actions/build/action.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/actions/build/action.yml b/.github/actions/build/action.yml index 583f5775..7a09ed14 100644 --- a/.github/actions/build/action.yml +++ b/.github/actions/build/action.yml @@ -54,8 +54,8 @@ runs: --rm "${{ inputs.docker-image }}" \ /bin/bash -c "${{ env.REPO_DIR }}/continuous_integration/scripts/entrypoint ${{ env.REPO_DIR }}/continuous_integration/scripts/build ${{ inputs.build-type}} ${{ inputs.target-device }}" - #- if: ${{ inputs.use-container == false }} - - name: Build (without container) + - if: ${{ inputs.use-container == false }} + name: Build (without container) shell: bash --noprofile --norc -xeuo pipefail {0} run: | echo "inputs.use-container=${{ inputs.use-container }}" From be969e595ad6b66d16f9d32e96f6df550890bd70 Mon Sep 17 00:00:00 2001 From: sandeepd-nv Date: Fri, 15 Nov 2024 20:44:07 +0530 Subject: [PATCH 13/56] Use container with preinstalled conda for build. Attempt 3. --- .github/actions/build/action.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/actions/build/action.yml b/.github/actions/build/action.yml index 7a09ed14..e5f67202 100644 --- a/.github/actions/build/action.yml +++ b/.github/actions/build/action.yml @@ -54,7 +54,7 @@ runs: --rm "${{ inputs.docker-image }}" \ /bin/bash -c "${{ env.REPO_DIR }}/continuous_integration/scripts/entrypoint ${{ env.REPO_DIR }}/continuous_integration/scripts/build ${{ inputs.build-type}} ${{ inputs.target-device }}" - - if: ${{ inputs.use-container == false }} + - if: ${{ !inputs.use-container }} name: Build (without container) shell: bash --noprofile --norc -xeuo pipefail {0} run: | From 3382d68b42b17b39535dcbe3b5e58e54b4695f11 Mon Sep 17 00:00:00 2001 From: sandeepd-nv Date: Fri, 29 Nov 2024 09:12:32 +0530 Subject: [PATCH 14/56] Updated paths. --- continuous_integration/scripts/test | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/continuous_integration/scripts/test b/continuous_integration/scripts/test index 96bdf8d5..cbee6998 100755 --- a/continuous_integration/scripts/test +++ b/continuous_integration/scripts/test @@ -11,10 +11,10 @@ test_ci() { cd "${CORE_ARTIFACTS_DIR}" pip install *.whl - cd "${REPO_DIR}/cuda_python/cuda_bindings" + cd "${REPO_DIR}/cuda_bindings" python -m pytest tests/ - cd "${REPO_DIR}/cuda_python/cuda_core" + cd "${REPO_DIR}/cuda_core" python -m pytest tests/ } From a9ed0c6a038bbcb605f5016df584aff081d378cf Mon Sep 17 00:00:00 2001 From: sandeepd-nv Date: Fri, 29 Nov 2024 09:16:00 +0530 Subject: [PATCH 15/56] Removed duplicate tests section. --- .github/workflows/gh-build-and-test.yml | 19 ------------------- 1 file changed, 19 deletions(-) diff --git a/.github/workflows/gh-build-and-test.yml b/.github/workflows/gh-build-and-test.yml index e6e40624..9b414a22 100644 --- a/.github/workflows/gh-build-and-test.yml +++ b/.github/workflows/gh-build-and-test.yml @@ -56,22 +56,3 @@ jobs: upload-enabled: ${{ inputs.upload-enabled }} python-version: ${{ inputs.python-version }} secrets: inherit - - test: - if: ${{ github.repository_owner == 'nvidia' }} - needs: - - build - uses: - ./.github/workflows/gh-test.yml - with: - client-repo: ${{ github.event.repository.name }} - target-device: ${{ inputs.target-device }} - test-options: ${{ inputs.build-type }} - runs-on: ${{ (inputs.host-platform == 'linux-x64' && 'linux-amd64-gpu-v100-latest-1') || (inputs.host-platform == 'linux-aarch64' && 'linux-arm64-cpu16') || (inputs.host-platform == 'mac' && 'macos-latest') }} - runner-has-gpu: ${{ inputs.host-platform == 'linux-x64' }} - build-type: ${{ inputs.build-type }} - host-platform: ${{ inputs.host-platform }} - dependencies-file: "" - build-mode: ${{ inputs.build-mode }} - upload-enabled: ${{ inputs.upload-enabled }} - secrets: inherit From 84124b590f62c089503b5ab44a9608a83a151ac4 Mon Sep 17 00:00:00 2001 From: sandeepd-nv Date: Wed, 4 Dec 2024 09:49:20 +0530 Subject: [PATCH 16/56] Run cuda_core tests before cuda_binding. --- continuous_integration/scripts/test | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/continuous_integration/scripts/test b/continuous_integration/scripts/test index cbee6998..3a705c3c 100755 --- a/continuous_integration/scripts/test +++ b/continuous_integration/scripts/test @@ -11,10 +11,10 @@ test_ci() { cd "${CORE_ARTIFACTS_DIR}" pip install *.whl - cd "${REPO_DIR}/cuda_bindings" + cd "${REPO_DIR}/cuda_core" python -m pytest tests/ - cd "${REPO_DIR}/cuda_core" + cd "${REPO_DIR}/cuda_bindings" python -m pytest tests/ } From aeebaf757808cad15ed9c241e011457a9bfa97e4 Mon Sep 17 00:00:00 2001 From: Leo Fang Date: Sat, 7 Dec 2024 15:11:53 -0500 Subject: [PATCH 17/56] skip testing on win; remove mac --- .github/workflows/gh-build-and-test.yml | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/.github/workflows/gh-build-and-test.yml b/.github/workflows/gh-build-and-test.yml index 913b17fd..2bd7ec97 100644 --- a/.github/workflows/gh-build-and-test.yml +++ b/.github/workflows/gh-build-and-test.yml @@ -45,7 +45,8 @@ jobs: secrets: inherit test: - if: ${{ github.repository_owner == 'nvidia' }} + if: ${{ (github.repository_owner == 'nvidia') && + !startsWith(inputs.host-platform, 'win) }} needs: - build uses: @@ -54,7 +55,8 @@ jobs: client-repo: ${{ github.event.repository.name }} target-device: ${{ inputs.target-device }} test-options: ${{ inputs.build-type }} - runs-on: ${{ (inputs.host-platform == 'linux-x64' && 'linux-amd64-gpu-v100-latest-1') || (inputs.host-platform == 'linux-aarch64' && 'linux-arm64-cpu16') || (inputs.host-platform == 'mac' && 'macos-latest') }} + runs-on: ${{ (inputs.host-platform == 'linux-x64' && 'linux-amd64-gpu-v100-latest-1') || + (inputs.host-platform == 'linux-aarch64' && 'linux-arm64-cpu16') }} runner-has-gpu: ${{ inputs.host-platform == 'linux-x64' }} build-type: ${{ inputs.build-type }} host-platform: ${{ inputs.host-platform }} From 76a8822fb5df845bb3a0e15c8407c651dc9a2e89 Mon Sep 17 00:00:00 2001 From: Leo Fang Date: Sat, 7 Dec 2024 15:15:06 -0500 Subject: [PATCH 18/56] fix typo --- .github/workflows/gh-build-and-test.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/gh-build-and-test.yml b/.github/workflows/gh-build-and-test.yml index 2bd7ec97..ab32a62a 100644 --- a/.github/workflows/gh-build-and-test.yml +++ b/.github/workflows/gh-build-and-test.yml @@ -46,7 +46,7 @@ jobs: test: if: ${{ (github.repository_owner == 'nvidia') && - !startsWith(inputs.host-platform, 'win) }} + !startsWith(inputs.host-platform, 'win') }} needs: - build uses: From c23467f24badb0424648b6c9ee89c5913bdd570a Mon Sep 17 00:00:00 2001 From: Leo Fang Date: Sat, 7 Dec 2024 15:52:45 -0500 Subject: [PATCH 19/56] skip setup if build stage was called --- .github/workflows/gh-test.yml | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/.github/workflows/gh-test.yml b/.github/workflows/gh-test.yml index 74f1c520..01eae085 100644 --- a/.github/workflows/gh-test.yml +++ b/.github/workflows/gh-test.yml @@ -39,6 +39,10 @@ on: python-version: required: false type: string + has-built: + required: false + type: boolean + description: whether the built stage was launched (and passed) jobs: build: @@ -63,6 +67,7 @@ jobs: fetch-depth: 0 - name: Setup + if: ${{ !inputs.has-built }} uses: ./.github/actions/setup with: client-repo: ${{ inputs.client-repo }} From 3d892ad9546187edf8cbe23386e1cdf1ac16ac48 Mon Sep 17 00:00:00 2001 From: Leo Fang Date: Sat, 7 Dec 2024 16:05:25 -0500 Subject: [PATCH 20/56] set build output --- .github/actions/build/action.yml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/.github/actions/build/action.yml b/.github/actions/build/action.yml index e1552ae8..48c4e50a 100644 --- a/.github/actions/build/action.yml +++ b/.github/actions/build/action.yml @@ -16,6 +16,10 @@ inputs: upload-enabled: required: true type: boolean +outputs: + has-built: + value: true # TODO: we might need to check the job success here + description: whether the built stage was launched (and passed) runs: using: composite From ae0a994416edb6d63f58fce1c2e41ac32a6c44ea Mon Sep 17 00:00:00 2001 From: Leo Fang Date: Sat, 7 Dec 2024 16:09:35 -0500 Subject: [PATCH 21/56] pass output from build to test --- .github/workflows/gh-build-and-test.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/gh-build-and-test.yml b/.github/workflows/gh-build-and-test.yml index ab32a62a..f9606ba7 100644 --- a/.github/workflows/gh-build-and-test.yml +++ b/.github/workflows/gh-build-and-test.yml @@ -64,4 +64,5 @@ jobs: build-mode: ${{ inputs.build-mode }} upload-enabled: ${{ inputs.upload-enabled }} python-version: ${{ inputs.python-version }} + has-built: ${{ needs.build.outputs.has-built }} secrets: inherit From c3fe6a14881d4bc1b968e1394ca6e05ca4009380 Mon Sep 17 00:00:00 2001 From: Leo Fang Date: Sat, 7 Dec 2024 16:25:03 -0500 Subject: [PATCH 22/56] wrong place --- .github/actions/build/action.yml | 4 ---- 1 file changed, 4 deletions(-) diff --git a/.github/actions/build/action.yml b/.github/actions/build/action.yml index 48c4e50a..e1552ae8 100644 --- a/.github/actions/build/action.yml +++ b/.github/actions/build/action.yml @@ -16,10 +16,6 @@ inputs: upload-enabled: required: true type: boolean -outputs: - has-built: - value: true # TODO: we might need to check the job success here - description: whether the built stage was launched (and passed) runs: using: composite From 0bbc706c5fe61e59bda8bfd0091fd29484203b38 Mon Sep 17 00:00:00 2001 From: Leo Fang Date: Sat, 7 Dec 2024 16:26:18 -0500 Subject: [PATCH 23/56] it's the build workflow, not action, that should have outputs --- .github/workflows/gh-build.yml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/.github/workflows/gh-build.yml b/.github/workflows/gh-build.yml index 7a9f03ce..c83fb00a 100644 --- a/.github/workflows/gh-build.yml +++ b/.github/workflows/gh-build.yml @@ -35,6 +35,10 @@ on: cuda-version: required: true type: string + outputs: + has-built: + value: true # TODO: we might need to check the job success here + description: whether the built stage was launched (and passed) jobs: build: From ba0bbdedcdaed83b182af9f62489cd84226e1f5c Mon Sep 17 00:00:00 2001 From: Leo Fang Date: Sat, 7 Dec 2024 16:28:47 -0500 Subject: [PATCH 24/56] fix indentation --- .github/workflows/gh-build.yml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/gh-build.yml b/.github/workflows/gh-build.yml index c83fb00a..86b77516 100644 --- a/.github/workflows/gh-build.yml +++ b/.github/workflows/gh-build.yml @@ -35,10 +35,10 @@ on: cuda-version: required: true type: string - outputs: - has-built: - value: true # TODO: we might need to check the job success here - description: whether the built stage was launched (and passed) + outputs: + has-built: + value: true # TODO: we might need to check the job success here + description: whether the built stage was launched (and passed) jobs: build: From aed5bb6b1aa04c15633d6f68c841ee98b0294162 Mon Sep 17 00:00:00 2001 From: Leo Fang Date: Sat, 7 Dec 2024 18:02:09 -0500 Subject: [PATCH 25/56] try to take output as a string --- .github/workflows/gh-test.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/gh-test.yml b/.github/workflows/gh-test.yml index 01eae085..b0b94514 100644 --- a/.github/workflows/gh-test.yml +++ b/.github/workflows/gh-test.yml @@ -41,7 +41,7 @@ on: type: string has-built: required: false - type: boolean + type: string description: whether the built stage was launched (and passed) jobs: @@ -67,7 +67,7 @@ jobs: fetch-depth: 0 - name: Setup - if: ${{ !inputs.has-built }} + if: ${{ inputs.has-built == 'true' }} uses: ./.github/actions/setup with: client-repo: ${{ inputs.client-repo }} From 330251d90c003df945ac4725df1cedb6b3f257e2 Mon Sep 17 00:00:00 2001 From: Leo Fang Date: Sat, 7 Dec 2024 18:12:52 -0500 Subject: [PATCH 26/56] fix logic --- .github/workflows/gh-test.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/gh-test.yml b/.github/workflows/gh-test.yml index b0b94514..575ccada 100644 --- a/.github/workflows/gh-test.yml +++ b/.github/workflows/gh-test.yml @@ -67,7 +67,7 @@ jobs: fetch-depth: 0 - name: Setup - if: ${{ inputs.has-built == 'true' }} + if: ${{ inputs.has-built != 'true' }} uses: ./.github/actions/setup with: client-repo: ${{ inputs.client-repo }} From 076104bbb52e6c3b06c5eb3ac00cfb8ecf6235c7 Mon Sep 17 00:00:00 2001 From: Leo Fang Date: Sat, 7 Dec 2024 23:36:56 +0000 Subject: [PATCH 27/56] multiple fixes - fix artifact env vars - runner must have GPUs for testing - shorten workflow names --- .github/actions/test/action.yml | 19 +++++++------------ .github/workflows/ci-gh.yml | 3 +-- .github/workflows/gh-build-and-test.yml | 9 +++++---- .github/workflows/gh-build.yml | 2 +- .github/workflows/gh-test.yml | 7 +------ 5 files changed, 15 insertions(+), 25 deletions(-) diff --git a/.github/actions/test/action.yml b/.github/actions/test/action.yml index 018db9aa..675263fb 100644 --- a/.github/actions/test/action.yml +++ b/.github/actions/test/action.yml @@ -6,42 +6,37 @@ inputs: test-options: required: true type: string - runner-has-gpu: - required: true - type: boolean - description: "The runner has GPU(s)." runs: using: composite steps: - - if: ${{ inputs.runner-has-gpu == true }} - name: Run nvidia-smi to make sure GPU is working + - name: Run nvidia-smi to make sure GPU is working shell: bash --noprofile --norc -xeuo pipefail {0} run: nvidia-smi - name: Download bindings build artifacts uses: actions/download-artifact@v4 with: - name: ${{ env.BINDINGS_ARTIFACT_NAME }} - path: ${{ env.BINDINGS_ARTIFACTS_DIR }} + name: ${{ env.CUDA_BINDINGS_ARTIFACT_NAME }} + path: ${{ env.CUDA_BINDINGS_ARTIFACTS_DIR }} - name: Display structure of downloaded bindings artifacts shell: bash --noprofile --norc -xeuo pipefail {0} run: | pwd - ls -lahR $BINDINGS_ARTIFACTS_DIR + ls -lahR $CUDA_BINDINGS_ARTIFACTS_DIR - name: Download core build artifacts uses: actions/download-artifact@v4 with: - name: ${{ env.CORE_ARTIFACT_NAME }} - path: ${{ env.CORE_ARTIFACTS_DIR }} + name: ${{ env.CUDA_CORE_ARTIFACT_NAME }} + path: ${{ env.CUDA_CORE_ARTIFACTS_DIR }} - name: Display structure of downloaded core build artifacts shell: bash --noprofile --norc -xeuo pipefail {0} run: | pwd - ls -lahR $CORE_ARTIFACTS_DIR + ls -lahR $CUDA_CORE_ARTIFACTS_DIR - name: Run test / analysis shell: bash --noprofile --norc -xeuo pipefail {0} diff --git a/.github/workflows/ci-gh.yml b/.github/workflows/ci-gh.yml index 1975c3b5..7c493505 100644 --- a/.github/workflows/ci-gh.yml +++ b/.github/workflows/ci-gh.yml @@ -11,8 +11,7 @@ on: - "main" jobs: - build-and-test: - name: Build and test (${{ matrix.host-platform }}, ${{ matrix.target-device }}, ${{ matrix.build-mode }}) + ci: strategy: fail-fast: false matrix: diff --git a/.github/workflows/gh-build-and-test.yml b/.github/workflows/gh-build-and-test.yml index f9606ba7..0f10fcff 100644 --- a/.github/workflows/gh-build-and-test.yml +++ b/.github/workflows/gh-build-and-test.yml @@ -25,6 +25,7 @@ on: jobs: build: + name: Build wheels if: ${{ github.repository_owner == 'nvidia' }} uses: ./.github/workflows/gh-build.yml @@ -45,8 +46,10 @@ jobs: secrets: inherit test: + name: Test against wheels + # TODO: enable testing once linux-aarch64 & win-64 GPU runners are up if: ${{ (github.repository_owner == 'nvidia') && - !startsWith(inputs.host-platform, 'win') }} + startsWith(inputs.host-platform, 'linux-x64') }} needs: - build uses: @@ -55,9 +58,7 @@ jobs: client-repo: ${{ github.event.repository.name }} target-device: ${{ inputs.target-device }} test-options: ${{ inputs.build-type }} - runs-on: ${{ (inputs.host-platform == 'linux-x64' && 'linux-amd64-gpu-v100-latest-1') || - (inputs.host-platform == 'linux-aarch64' && 'linux-arm64-cpu16') }} - runner-has-gpu: ${{ inputs.host-platform == 'linux-x64' }} + runs-on: ${{ (inputs.host-platform == 'linux-x64' && 'linux-amd64-gpu-v100-latest-1') }} build-type: ${{ inputs.build-type }} host-platform: ${{ inputs.host-platform }} dependencies-file: "" diff --git a/.github/workflows/gh-build.yml b/.github/workflows/gh-build.yml index 86b77516..46026ba9 100644 --- a/.github/workflows/gh-build.yml +++ b/.github/workflows/gh-build.yml @@ -42,7 +42,7 @@ on: jobs: build: - name: Build (${{ inputs.host-platform }}, ${{ inputs.build-type }}, ${{ inputs.build-mode }}, Python "${{ inputs.python-version }}") + name: Build (${{ inputs.host-platform }}, Python "${{ inputs.python-version }}") permissions: id-token: write # This is required for configure-aws-credentials diff --git a/.github/workflows/gh-test.yml b/.github/workflows/gh-test.yml index 575ccada..8216ce10 100644 --- a/.github/workflows/gh-test.yml +++ b/.github/workflows/gh-test.yml @@ -15,10 +15,6 @@ on: runs-on: required: true type: string - runner-has-gpu: - required: true - type: boolean - description: "The runner has GPU(s)." build-type: required: true type: string @@ -46,7 +42,7 @@ on: jobs: build: - name: Test (${{ inputs.host-platform }}, ${{ inputs.target-device }}, ${{ inputs.build-type }}, CMake build-mode=${{ inputs.build-mode }}, Python "${{ inputs.python-version }}", Use container=${{ inputs.use-container }} ) + name: Test (${{ inputs.host-platform }}, Python "${{ inputs.python-version }}", Use container=${{ inputs.use-container }} ) permissions: id-token: write # This is required for configure-aws-credentials @@ -82,4 +78,3 @@ jobs: uses: ./.github/actions/test with: test-options: ${{ inputs.test-options }} - runner-has-gpu: ${{ inputs.runner-has-gpu }} From 7c6fba04dc68313cca9f84cece8b588db166200a Mon Sep 17 00:00:00 2001 From: Leo Fang Date: Sun, 8 Dec 2024 00:41:48 +0000 Subject: [PATCH 28/56] merge build & test workflows to allow passing env vars; further simplify job names --- .github/workflows/ci-gh.yml | 3 +- .github/workflows/gh-build-and-test.yml | 101 ++++++++++++++++-------- .github/workflows/gh-build.yml | 77 ------------------ .github/workflows/gh-test.yml | 80 ------------------- 4 files changed, 69 insertions(+), 192 deletions(-) delete mode 100644 .github/workflows/gh-build.yml delete mode 100644 .github/workflows/gh-test.yml diff --git a/.github/workflows/ci-gh.yml b/.github/workflows/ci-gh.yml index 7c493505..31446beb 100644 --- a/.github/workflows/ci-gh.yml +++ b/.github/workflows/ci-gh.yml @@ -1,5 +1,3 @@ -name: Build and test - concurrency: group: ${{ startsWith(github.ref_name, 'main') && format('unique-{0}', github.run_id) || format('ci-build-and-test-on-{0}-from-{1}', github.event_name, github.ref_name) }} cancel-in-progress: true @@ -12,6 +10,7 @@ on: jobs: ci: + name: "CI" strategy: fail-fast: false matrix: diff --git a/.github/workflows/gh-build-and-test.yml b/.github/workflows/gh-build-and-test.yml index 0f10fcff..f7296823 100644 --- a/.github/workflows/gh-build-and-test.yml +++ b/.github/workflows/gh-build-and-test.yml @@ -25,45 +25,80 @@ on: jobs: build: - name: Build wheels + name: Build (${{ inputs.host-platform }}, Python "${{ inputs.python-version }}") if: ${{ github.repository_owner == 'nvidia' }} - uses: - ./.github/workflows/gh-build.yml - with: - client-repo: ${{ github.event.repository.name }} - target-device: ${{ inputs.target-device }} - runs-on: ${{ (inputs.host-platform == 'linux-x64' && 'linux-amd64-cpu8') || - (inputs.host-platform == 'linux-aarch64' && 'linux-arm64-cpu8') || - (inputs.host-platform == 'win-x64' && 'windows-2019') }} - # (inputs.host-platform == 'win-x64' && 'windows-amd64-cpu8') }} - build-type: ${{ inputs.build-type }} - host-platform: ${{ inputs.host-platform }} - build-mode: ${{ inputs.build-mode }} - upload-enabled: ${{ inputs.upload-enabled }} - python-version: ${{ inputs.python-version }} - cuda-version: ${{ inputs.cuda-version }} - dependencies-file: "" + permissions: + id-token: write # This is required for configure-aws-credentials + contents: read # This is required for actions/checkout + runs-on: ${{ (inputs.host-platform == 'linux-x64' && 'linux-amd64-cpu8') || + (inputs.host-platform == 'linux-aarch64' && 'linux-arm64-cpu8') || + (inputs.host-platform == 'win-x64' && 'windows-2019') }} + # (inputs.host-platform == 'win-x64' && 'windows-amd64-cpu8') }} secrets: inherit + steps: + - name: Checkout ${{ github.event.repository.name }} + uses: actions/checkout@v4 + with: + fetch-depth: 0 + + - name: Set up build environment + uses: ./.github/actions/setup + with: + client-repo: ${{ github.event.repository.name }} + build-type: ${{ inputs.build-type }} + target-device: "${{ inputs.target-device }}" + host-platform: ${{ inputs.host-platform }} + build-mode: ${{ inputs.build-mode }} + upload-enabled: ${{ inputs.upload-enabled }} + python-version: ${{ inputs.python-version }} + cuda-version: ${{ inputs.cuda-version }} + + - name: Call build action + uses: ./.github/actions/build + with: + build-type: ${{ inputs.build-type }} + target-device: "${{ inputs.target-device }}" + host-platform: ${{ inputs.host-platform }} + upload-enabled: ${{ inputs.upload-enabled }} test: - name: Test against wheels + # TODO: improve the name once a separate test matrix is defined + name: Test (CUDA ${{ inputs.cuda-version }}, Use container=${{ inputs.use-container }}) # TODO: enable testing once linux-aarch64 & win-64 GPU runners are up if: ${{ (github.repository_owner == 'nvidia') && startsWith(inputs.host-platform, 'linux-x64') }} + permissions: + id-token: write # This is required for configure-aws-credentials + contents: read # This is required for actions/checkout + runs-on: ${{ (inputs.host-platform == 'linux-x64' && 'linux-amd64-gpu-v100-latest-1') }} + secrets: inherit + container: + options: -u root --security-opt seccomp=unconfined --privileged --shm-size 16g + image: condaforge/miniforge3:latest + env: + NVIDIA_VISIBLE_DEVICES: ${{ env.NVIDIA_VISIBLE_DEVICES }} needs: - build - uses: - ./.github/workflows/gh-test.yml - with: - client-repo: ${{ github.event.repository.name }} - target-device: ${{ inputs.target-device }} - test-options: ${{ inputs.build-type }} - runs-on: ${{ (inputs.host-platform == 'linux-x64' && 'linux-amd64-gpu-v100-latest-1') }} - build-type: ${{ inputs.build-type }} - host-platform: ${{ inputs.host-platform }} - dependencies-file: "" - build-mode: ${{ inputs.build-mode }} - upload-enabled: ${{ inputs.upload-enabled }} - python-version: ${{ inputs.python-version }} - has-built: ${{ needs.build.outputs.has-built }} - secrets: inherit + steps: + - name: Checkout ${{ github.event.repository.name }} + uses: actions/checkout@v4 + with: + fetch-depth: 0 + + # TODO: we probably don't need this? + # - name: Setup + # if: ${{ inputs.has-built != 'true' }} + # uses: ./.github/actions/setup + # with: + # client-repo: ${{ github.event.repository.name }} + # build-type: ${{ inputs.build-type }} + # target-device: "${{ inputs.target-device }}" + # host-platform: ${{ inputs.host-platform }} + # build-mode: ${{ inputs.build-mode }} + # upload-enabled: ${{ inputs.upload-enabled }} + # python-version: ${{ inputs.python-version }} + + - name: Call test action + uses: ./.github/actions/test + with: + test-options: ${{ inputs.build-type }} diff --git a/.github/workflows/gh-build.yml b/.github/workflows/gh-build.yml deleted file mode 100644 index 46026ba9..00000000 --- a/.github/workflows/gh-build.yml +++ /dev/null @@ -1,77 +0,0 @@ -name: Build - -on: - workflow_call: - inputs: - client-repo: - required: true - type: string - target-device: - required: true - type: string - runs-on: - required: true - type: string - build-type: - required: true - type: string - description: One of ci / release - host-platform: - required: true - type: string - dependencies-file: - required: true - type: string - description: path to versions.json relative to the target repo dir - build-mode: - required: true - type: string - upload-enabled: - required: true - type: boolean - python-version: - required: true - type: string - cuda-version: - required: true - type: string - outputs: - has-built: - value: true # TODO: we might need to check the job success here - description: whether the built stage was launched (and passed) - -jobs: - build: - name: Build (${{ inputs.host-platform }}, Python "${{ inputs.python-version }}") - - permissions: - id-token: write # This is required for configure-aws-credentials - contents: read # This is required for actions/checkout - - runs-on: ${{ inputs.runs-on }} - - steps: - - name: Checkout ${{ inputs.client-repo }} - uses: actions/checkout@v4 - with: - fetch-depth: 0 - - - name: Set up build environment - uses: ./.github/actions/setup - with: - client-repo: ${{ inputs.client-repo }} - build-type: ${{ inputs.build-type }} - target-device: "${{ inputs.target-device }}" - host-platform: ${{ inputs.host-platform }} - build-mode: ${{ inputs.build-mode }} - upload-enabled: ${{ inputs.upload-enabled }} - python-version: ${{ inputs.python-version }} - cuda-version: ${{ inputs.cuda-version }} - - - name: Call build action - uses: ./.github/actions/build - with: - build-type: ${{ inputs.build-type }} - target-device: "${{ inputs.target-device }}" - host-platform: ${{ inputs.host-platform }} - upload-enabled: ${{ inputs.upload-enabled }} diff --git a/.github/workflows/gh-test.yml b/.github/workflows/gh-test.yml deleted file mode 100644 index 8216ce10..00000000 --- a/.github/workflows/gh-test.yml +++ /dev/null @@ -1,80 +0,0 @@ -name: Test - -on: - workflow_call: - inputs: - client-repo: - required: true - type: string - target-device: - required: true - type: string - test-options: - required: true - type: string - runs-on: - required: true - type: string - build-type: - required: true - type: string - description: One of ci / release - host-platform: - required: true - type: string - dependencies-file: - required: true - type: string - description: path to versions.json relative to the target repo dir - build-mode: - required: true - type: string - upload-enabled: - required: true - type: boolean - python-version: - required: false - type: string - has-built: - required: false - type: string - description: whether the built stage was launched (and passed) - -jobs: - build: - name: Test (${{ inputs.host-platform }}, Python "${{ inputs.python-version }}", Use container=${{ inputs.use-container }} ) - - permissions: - id-token: write # This is required for configure-aws-credentials - contents: read # This is required for actions/checkout - - runs-on: ${{ inputs.runs-on }} - - container: - options: -u root --security-opt seccomp=unconfined --privileged --shm-size 16g - image: condaforge/miniforge3:latest - env: - NVIDIA_VISIBLE_DEVICES: ${{ env.NVIDIA_VISIBLE_DEVICES }} - - steps: - - name: Checkout ${{ inputs.client-repo }} - uses: actions/checkout@v4 - with: - fetch-depth: 0 - - - name: Setup - if: ${{ inputs.has-built != 'true' }} - uses: ./.github/actions/setup - with: - client-repo: ${{ inputs.client-repo }} - build-type: ${{ inputs.build-type }} - target-device: "${{ inputs.target-device }}" - host-platform: ${{ inputs.host-platform }} - build-mode: ${{ inputs.build-mode }} - upload-enabled: ${{ inputs.upload-enabled }} - python-version: ${{ inputs.python-version }} - - - name: Call test action - uses: ./.github/actions/test - with: - test-options: ${{ inputs.test-options }} From ca7b437189395454d8bc09ca5bae79b7862dff5c Mon Sep 17 00:00:00 2001 From: Leo Fang Date: Sun, 8 Dec 2024 00:49:41 +0000 Subject: [PATCH 29/56] no need to pass secrets as we don't have reusable workflows anymore --- .github/workflows/gh-build-and-test.yml | 2 -- 1 file changed, 2 deletions(-) diff --git a/.github/workflows/gh-build-and-test.yml b/.github/workflows/gh-build-and-test.yml index f7296823..c9247f4e 100644 --- a/.github/workflows/gh-build-and-test.yml +++ b/.github/workflows/gh-build-and-test.yml @@ -34,7 +34,6 @@ jobs: (inputs.host-platform == 'linux-aarch64' && 'linux-arm64-cpu8') || (inputs.host-platform == 'win-x64' && 'windows-2019') }} # (inputs.host-platform == 'win-x64' && 'windows-amd64-cpu8') }} - secrets: inherit steps: - name: Checkout ${{ github.event.repository.name }} uses: actions/checkout@v4 @@ -71,7 +70,6 @@ jobs: id-token: write # This is required for configure-aws-credentials contents: read # This is required for actions/checkout runs-on: ${{ (inputs.host-platform == 'linux-x64' && 'linux-amd64-gpu-v100-latest-1') }} - secrets: inherit container: options: -u root --security-opt seccomp=unconfined --privileged --shm-size 16g image: condaforge/miniforge3:latest From 5d0b014250d5313d2dffc40a5aba0ece12e705d2 Mon Sep 17 00:00:00 2001 From: Leo Fang Date: Sun, 8 Dec 2024 01:13:24 +0000 Subject: [PATCH 30/56] pass job outputs explicitly... --- .github/workflows/ci-gh.yml | 3 ++- .github/workflows/gh-build-and-test.yml | 18 ++++++++++++++++++ 2 files changed, 20 insertions(+), 1 deletion(-) diff --git a/.github/workflows/ci-gh.yml b/.github/workflows/ci-gh.yml index 31446beb..189258eb 100644 --- a/.github/workflows/ci-gh.yml +++ b/.github/workflows/ci-gh.yml @@ -1,3 +1,5 @@ +name: "CI" + concurrency: group: ${{ startsWith(github.ref_name, 'main') && format('unique-{0}', github.run_id) || format('ci-build-and-test-on-{0}-from-{1}', github.event_name, github.ref_name) }} cancel-in-progress: true @@ -10,7 +12,6 @@ on: jobs: ci: - name: "CI" strategy: fail-fast: false matrix: diff --git a/.github/workflows/gh-build-and-test.yml b/.github/workflows/gh-build-and-test.yml index c9247f4e..a7d2919a 100644 --- a/.github/workflows/gh-build-and-test.yml +++ b/.github/workflows/gh-build-and-test.yml @@ -60,6 +60,19 @@ jobs: host-platform: ${{ inputs.host-platform }} upload-enabled: ${{ inputs.upload-enabled }} + - name: Pass environment variables + id: pass_env + run: | + echo "CUDA_CORE_ARTIFACT_NAME=${CUDA_CORE_ARTIFACT_NAME}" >> $GITHUB_OUTPUT + echo "CUDA_CORE_ARTIFACTS_DIR=${CUDA_CORE_ARTIFACTS_DIR}" >> $GITHUB_OUTPUT + echo "CUDA_BINDINGS_ARTIFACT_NAME=${CUDA_CORE_ARTIFACT_NAME}" >> $GITHUB_OUTPUT + echo "CUDA_BINDINGS_ARTIFACTS_DIR=${CUDA_CORE_ARTIFACTS_DIR}" >> $GITHUB_OUTPUT + outputs: + CUDA_CORE_ARTIFACT_NAME=${{ steps.pass_env.outputs.CUDA_CORE_ARTIFACT_NAME }} + CUDA_CORE_ARTIFACTS_DIR=${{ steps.pass_env.outputs.CUDA_CORE_ARTIFACTS_DIR }} + CUDA_BINDINGS_ARTIFACT_NAME=${{ steps.pass_env.outputs.CUDA_BINDINGS_ARTIFACT_NAME }} + CUDA_BINDINGS_ARTIFACTS_DIR=${{ steps.pass_env.outputs.CUDA_BINDINGS_ARTIFACTS_DIR }} + test: # TODO: improve the name once a separate test matrix is defined name: Test (CUDA ${{ inputs.cuda-version }}, Use container=${{ inputs.use-container }}) @@ -100,3 +113,8 @@ jobs: uses: ./.github/actions/test with: test-options: ${{ inputs.build-type }} + env: + CUDA_CORE_ARTIFACT_NAME: ${{ needs.build.outputs.CUDA_CORE_ARTIFACT_NAME }} + CUDA_CORE_ARTIFACTS_DIR: ${{ needs.build.outputs.CUDA_CORE_ARTIFACTS_DIR }} + CUDA_BINDINGS_ARTIFACT_NAME: ${{ needs.build.outputs.CUDA_BINDINGS_ARTIFACT_NAME }} + CUDA_BINDINGS_ARTIFACTS_DIR: ${{ needs.build.outputs.CUDA_BINDINGS_ARTIFACTS_DIR }} From 7350965130c2172d59657515c7a1e834e859198c Mon Sep 17 00:00:00 2001 From: Leo Fang Date: Sun, 8 Dec 2024 01:16:35 +0000 Subject: [PATCH 31/56] try changing the order --- .github/workflows/gh-build-and-test.yml | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/.github/workflows/gh-build-and-test.yml b/.github/workflows/gh-build-and-test.yml index a7d2919a..5c1f1f29 100644 --- a/.github/workflows/gh-build-and-test.yml +++ b/.github/workflows/gh-build-and-test.yml @@ -34,6 +34,11 @@ jobs: (inputs.host-platform == 'linux-aarch64' && 'linux-arm64-cpu8') || (inputs.host-platform == 'win-x64' && 'windows-2019') }} # (inputs.host-platform == 'win-x64' && 'windows-amd64-cpu8') }} + outputs: + CUDA_CORE_ARTIFACT_NAME=${{ steps.pass_env.outputs.CUDA_CORE_ARTIFACT_NAME }} + CUDA_CORE_ARTIFACTS_DIR=${{ steps.pass_env.outputs.CUDA_CORE_ARTIFACTS_DIR }} + CUDA_BINDINGS_ARTIFACT_NAME=${{ steps.pass_env.outputs.CUDA_BINDINGS_ARTIFACT_NAME }} + CUDA_BINDINGS_ARTIFACTS_DIR=${{ steps.pass_env.outputs.CUDA_BINDINGS_ARTIFACTS_DIR }} steps: - name: Checkout ${{ github.event.repository.name }} uses: actions/checkout@v4 @@ -65,13 +70,8 @@ jobs: run: | echo "CUDA_CORE_ARTIFACT_NAME=${CUDA_CORE_ARTIFACT_NAME}" >> $GITHUB_OUTPUT echo "CUDA_CORE_ARTIFACTS_DIR=${CUDA_CORE_ARTIFACTS_DIR}" >> $GITHUB_OUTPUT - echo "CUDA_BINDINGS_ARTIFACT_NAME=${CUDA_CORE_ARTIFACT_NAME}" >> $GITHUB_OUTPUT - echo "CUDA_BINDINGS_ARTIFACTS_DIR=${CUDA_CORE_ARTIFACTS_DIR}" >> $GITHUB_OUTPUT - outputs: - CUDA_CORE_ARTIFACT_NAME=${{ steps.pass_env.outputs.CUDA_CORE_ARTIFACT_NAME }} - CUDA_CORE_ARTIFACTS_DIR=${{ steps.pass_env.outputs.CUDA_CORE_ARTIFACTS_DIR }} - CUDA_BINDINGS_ARTIFACT_NAME=${{ steps.pass_env.outputs.CUDA_BINDINGS_ARTIFACT_NAME }} - CUDA_BINDINGS_ARTIFACTS_DIR=${{ steps.pass_env.outputs.CUDA_BINDINGS_ARTIFACTS_DIR }} + echo "CUDA_BINDINGS_ARTIFACT_NAME=${CUDA_BINDINGS_ARTIFACT_NAME}" >> $GITHUB_OUTPUT + echo "CUDA_BINDINGS_ARTIFACTS_DIR=${CUDA_BINDINGS_ARTIFACTS_DIR}" >> $GITHUB_OUTPUT test: # TODO: improve the name once a separate test matrix is defined From eedffd9776bfe63c3967f089803e480d3017c37d Mon Sep 17 00:00:00 2001 From: Leo Fang Date: Sun, 8 Dec 2024 01:22:14 +0000 Subject: [PATCH 32/56] fix syntax --- .github/workflows/gh-build-and-test.yml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/gh-build-and-test.yml b/.github/workflows/gh-build-and-test.yml index 5c1f1f29..e7659a6f 100644 --- a/.github/workflows/gh-build-and-test.yml +++ b/.github/workflows/gh-build-and-test.yml @@ -35,10 +35,10 @@ jobs: (inputs.host-platform == 'win-x64' && 'windows-2019') }} # (inputs.host-platform == 'win-x64' && 'windows-amd64-cpu8') }} outputs: - CUDA_CORE_ARTIFACT_NAME=${{ steps.pass_env.outputs.CUDA_CORE_ARTIFACT_NAME }} - CUDA_CORE_ARTIFACTS_DIR=${{ steps.pass_env.outputs.CUDA_CORE_ARTIFACTS_DIR }} - CUDA_BINDINGS_ARTIFACT_NAME=${{ steps.pass_env.outputs.CUDA_BINDINGS_ARTIFACT_NAME }} - CUDA_BINDINGS_ARTIFACTS_DIR=${{ steps.pass_env.outputs.CUDA_BINDINGS_ARTIFACTS_DIR }} + CUDA_CORE_ARTIFACT_NAME: ${{ steps.pass_env.outputs.CUDA_CORE_ARTIFACT_NAME }} + CUDA_CORE_ARTIFACTS_DIR: ${{ steps.pass_env.outputs.CUDA_CORE_ARTIFACTS_DIR }} + CUDA_BINDINGS_ARTIFACT_NAME: ${{ steps.pass_env.outputs.CUDA_BINDINGS_ARTIFACT_NAME }} + CUDA_BINDINGS_ARTIFACTS_DIR: ${{ steps.pass_env.outputs.CUDA_BINDINGS_ARTIFACTS_DIR }} steps: - name: Checkout ${{ github.event.repository.name }} uses: actions/checkout@v4 From c7d3e0322c2c9a077d8a657de2cf946345b0cd4e Mon Sep 17 00:00:00 2001 From: Leo Fang Date: Sun, 8 Dec 2024 01:37:29 +0000 Subject: [PATCH 33/56] fix workflow merge error --- .github/actions/test/action.yml | 2 +- .github/workflows/ci-gh.yml | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/actions/test/action.yml b/.github/actions/test/action.yml index 675263fb..edfcee3b 100644 --- a/.github/actions/test/action.yml +++ b/.github/actions/test/action.yml @@ -41,4 +41,4 @@ runs: - name: Run test / analysis shell: bash --noprofile --norc -xeuo pipefail {0} run: | - "${{ env.REPO_DIR }}/continuous_integration/scripts/entrypoint" "${{ env.REPO_DIR }}/continuous_integration/scripts/test" ${{ inputs.test-options }} + "./continuous_integration/scripts/entrypoint" "./continuous_integration/scripts/test" ${{ inputs.test-options }} diff --git a/.github/workflows/ci-gh.yml b/.github/workflows/ci-gh.yml index 189258eb..cb27c879 100644 --- a/.github/workflows/ci-gh.yml +++ b/.github/workflows/ci-gh.yml @@ -34,6 +34,7 @@ jobs: # Note: this is for build-time only; the test-time matrix needs to be # defined separately. - "12.6.2" + name: "CI" uses: ./.github/workflows/gh-build-and-test.yml with: From 35c244f3833d9e749d414e22710e2c091330832c Mon Sep 17 00:00:00 2001 From: Leo Fang Date: Sun, 8 Dec 2024 03:00:52 +0000 Subject: [PATCH 34/56] allow mini-ctk to be cached & reused in tests --- .github/actions/setup/action.yml | 40 ++++++++++++++++++++ .github/actions/test/action.yml | 49 ++++++++++++++++++++++++- .github/workflows/gh-build-and-test.yml | 2 + 3 files changed, 90 insertions(+), 1 deletion(-) diff --git a/.github/actions/setup/action.yml b/.github/actions/setup/action.yml index e00cf27f..bed73c57 100644 --- a/.github/actions/setup/action.yml +++ b/.github/actions/setup/action.yml @@ -47,7 +47,21 @@ runs: run: | env + - name: Set up CTK cache variable + shell: bash --noprofile --norc -xeuo pipefail {0} + run: | + echo "CTK_CACHE_FILENAME=mini-ctk-${{ inputs.cuda-version }}.tar.xz" >> $GITHUB_ENV + + - name: Download CTK cache + id: ctk-get-cache + uses: actions/download-artifact@v4 + continue-on-error: true + with: + name: ${{ env.CTK_CACHE_FILENAME }} + path: . + - name: Get CUDA components + if: ${{ steps.ctk-get-cache.outcome == 'failure' }} shell: bash --noprofile --norc -xeuo pipefail {0} run: | CUDA_PATH="./cuda_toolkit" @@ -90,15 +104,41 @@ runs: } # Get headers and shared libraries in place + # Note: the existing artifact would need to be manually deleted (ex: through web UI) + # if this list is changed, as the artifact actions do not offer any option for us to + # invalidate the artifact. populate_cuda_path cuda_nvcc populate_cuda_path cuda_cudart populate_cuda_path cuda_nvrtc populate_cuda_path cuda_profiler_api ls -l $CUDA_PATH + # Prepare the cache + tar cf - $CUDA_PATH | xz -z -T0 - > $CTK_CACHE_FILENAME + # Note: the headers will be copied into the cibuildwheel manylinux container, # so setting the CUDA_PATH env var here is meaningless. + - name: Upload CTK cache + if: ${{ steps.ctk-get-cache.outcome == 'failure' }} + uses: actions/upload-artifact@v4 + with: + pattern: ${{ env.CTK_CACHE_FILENAME }} + path: . + if-no-files-found: error + + - name: Restore CTK cache + if: ${{ steps.ctk-get-cache.outcome == 'success' }} + shell: bash --noprofile --norc -xeuo pipefail {0} + run: | + CUDA_PATH="./cuda_toolkit" + mkdir $CUDA_PATH + tar -xvf $CTK_CACHE_FILENAME -C $CUDA_PATH --strip-components=1 + ls -l $CUDA_PATH + if [ ! -d "$CUDA_PATH/include" ]; then + exit 1 + fi + - name: Set environment variables shell: bash --noprofile --norc -xeuo pipefail {0} run: | diff --git a/.github/actions/test/action.yml b/.github/actions/test/action.yml index edfcee3b..0881b645 100644 --- a/.github/actions/test/action.yml +++ b/.github/actions/test/action.yml @@ -38,7 +38,54 @@ runs: pwd ls -lahR $CUDA_CORE_ARTIFACTS_DIR + - name: Set up Python ${{ env.PYTHON_VERSION }} + uses: actions/setup-python@v5 + with: + python-version: ${{ env.PYTHON_VERSION }} + + - name: Set up CTK cache variable + shell: bash --noprofile --norc -xeuo pipefail {0} + run: | + echo "CTK_CACHE_FILENAME=mini-ctk-${{ inputs.cuda-version }}.tar.xz" >> $GITHUB_ENV + + - name: Download CTK cache + id: ctk-get-cache + uses: actions/download-artifact@v4 + continue-on-error: true + with: + name: ${{ env.CTK_CACHE_FILENAME }} + path: . + + - name: Restore CTK cache + shell: bash --noprofile --norc -xeuo pipefail {0} + run: | + CUDA_PATH="./cuda_toolkit" + mkdir $CUDA_PATH + tar -xvf $CTK_CACHE_FILENAME -C $CUDA_PATH --strip-components=1 + ls -l $CUDA_PATH + if [ ! -d "$CUDA_PATH/include" ]; then + exit 1 + fi + + # TODO: check if we really need these for tests? + echo "CUDA_PATH=$CUDA_PATH" >> $GITHUB_ENV + echo "PATH=$PATH:$CUDA_PATH/bin" >> $GITHUB_ENV + echo "LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$CUDA_PATH/lib" >> $GITHUB_ENV + - name: Run test / analysis shell: bash --noprofile --norc -xeuo pipefail {0} run: | - "./continuous_integration/scripts/entrypoint" "./continuous_integration/scripts/test" ${{ inputs.test-options }} + REPO_DIR=$(pwd) + + cd "${CUDA_BINDINGS_ARTIFACTS_DIR}" + pip install *.whl + + cd "${CUDA_CORE_ARTIFACTS_DIR}" + pip install *.whl + + cd "${REPO_DIR}/cuda_bindings" + pytest tests/ + #pytest tests/cython + + cd "${REPO_DIR}/cuda_core" + pytest tests/ diff --git a/.github/workflows/gh-build-and-test.yml b/.github/workflows/gh-build-and-test.yml index e7659a6f..23663aad 100644 --- a/.github/workflows/gh-build-and-test.yml +++ b/.github/workflows/gh-build-and-test.yml @@ -83,6 +83,7 @@ jobs: id-token: write # This is required for configure-aws-credentials contents: read # This is required for actions/checkout runs-on: ${{ (inputs.host-platform == 'linux-x64' && 'linux-amd64-gpu-v100-latest-1') }} + # TODO: use a different (nvidia?) container, or just run on bare image container: options: -u root --security-opt seccomp=unconfined --privileged --shm-size 16g image: condaforge/miniforge3:latest @@ -118,3 +119,4 @@ jobs: CUDA_CORE_ARTIFACTS_DIR: ${{ needs.build.outputs.CUDA_CORE_ARTIFACTS_DIR }} CUDA_BINDINGS_ARTIFACT_NAME: ${{ needs.build.outputs.CUDA_BINDINGS_ARTIFACT_NAME }} CUDA_BINDINGS_ARTIFACTS_DIR: ${{ needs.build.outputs.CUDA_BINDINGS_ARTIFACTS_DIR }} + PYTHON_VERSION: ${{ inputs-python-version }} From 5615d5429ddc0f1ba752260c432d12dddd8babb6 Mon Sep 17 00:00:00 2001 From: Leo Fang Date: Sun, 8 Dec 2024 03:02:39 +0000 Subject: [PATCH 35/56] fix typo --- .github/workflows/gh-build-and-test.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/gh-build-and-test.yml b/.github/workflows/gh-build-and-test.yml index 23663aad..d8dce525 100644 --- a/.github/workflows/gh-build-and-test.yml +++ b/.github/workflows/gh-build-and-test.yml @@ -119,4 +119,4 @@ jobs: CUDA_CORE_ARTIFACTS_DIR: ${{ needs.build.outputs.CUDA_CORE_ARTIFACTS_DIR }} CUDA_BINDINGS_ARTIFACT_NAME: ${{ needs.build.outputs.CUDA_BINDINGS_ARTIFACT_NAME }} CUDA_BINDINGS_ARTIFACTS_DIR: ${{ needs.build.outputs.CUDA_BINDINGS_ARTIFACTS_DIR }} - PYTHON_VERSION: ${{ inputs-python-version }} + PYTHON_VERSION: ${{ inputs.python-version }} From bb5fed32339d2e1679c97728ecc774af181bb6ff Mon Sep 17 00:00:00 2001 From: Leo Fang Date: Sun, 8 Dec 2024 03:17:22 +0000 Subject: [PATCH 36/56] try to escape | and > ... --- .github/actions/setup/action.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/actions/setup/action.yml b/.github/actions/setup/action.yml index bed73c57..567b2d25 100644 --- a/.github/actions/setup/action.yml +++ b/.github/actions/setup/action.yml @@ -114,7 +114,8 @@ runs: ls -l $CUDA_PATH # Prepare the cache - tar cf - $CUDA_PATH | xz -z -T0 - > $CTK_CACHE_FILENAME + # Note: try to escape | and > ... + echo "$(tar cf - ${CUDA_PATH} | xz -z -T0 - > ${CTK_CACHE_FILENAME})" # Note: the headers will be copied into the cibuildwheel manylinux container, # so setting the CUDA_PATH env var here is meaningless. From 9d6e69bae192998abe56b52f43511c87a42fdce0 Mon Sep 17 00:00:00 2001 From: Leo Fang Date: Sun, 8 Dec 2024 03:36:25 +0000 Subject: [PATCH 37/56] switch to gz for simplicity --- .github/actions/setup/action.yml | 6 +++--- .github/actions/test/action.yml | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/actions/setup/action.yml b/.github/actions/setup/action.yml index 567b2d25..2908399f 100644 --- a/.github/actions/setup/action.yml +++ b/.github/actions/setup/action.yml @@ -50,7 +50,7 @@ runs: - name: Set up CTK cache variable shell: bash --noprofile --norc -xeuo pipefail {0} run: | - echo "CTK_CACHE_FILENAME=mini-ctk-${{ inputs.cuda-version }}.tar.xz" >> $GITHUB_ENV + echo "CTK_CACHE_FILENAME=mini-ctk-${{ inputs.cuda-version }}.tar.gz" >> $GITHUB_ENV - name: Download CTK cache id: ctk-get-cache @@ -115,7 +115,7 @@ runs: # Prepare the cache # Note: try to escape | and > ... - echo "$(tar cf - ${CUDA_PATH} | xz -z -T0 - > ${CTK_CACHE_FILENAME})" + tar -czvf ${CTK_CACHE_FILENAME} ${CUDA_PATH} # Note: the headers will be copied into the cibuildwheel manylinux container, # so setting the CUDA_PATH env var here is meaningless. @@ -134,7 +134,7 @@ runs: run: | CUDA_PATH="./cuda_toolkit" mkdir $CUDA_PATH - tar -xvf $CTK_CACHE_FILENAME -C $CUDA_PATH --strip-components=1 + tar -xzvf $CTK_CACHE_FILENAME -C $CUDA_PATH --strip-components=1 ls -l $CUDA_PATH if [ ! -d "$CUDA_PATH/include" ]; then exit 1 diff --git a/.github/actions/test/action.yml b/.github/actions/test/action.yml index 0881b645..559d05fe 100644 --- a/.github/actions/test/action.yml +++ b/.github/actions/test/action.yml @@ -46,7 +46,7 @@ runs: - name: Set up CTK cache variable shell: bash --noprofile --norc -xeuo pipefail {0} run: | - echo "CTK_CACHE_FILENAME=mini-ctk-${{ inputs.cuda-version }}.tar.xz" >> $GITHUB_ENV + echo "CTK_CACHE_FILENAME=mini-ctk-${{ inputs.cuda-version }}.tar.gz" >> $GITHUB_ENV - name: Download CTK cache id: ctk-get-cache @@ -61,7 +61,7 @@ runs: run: | CUDA_PATH="./cuda_toolkit" mkdir $CUDA_PATH - tar -xvf $CTK_CACHE_FILENAME -C $CUDA_PATH --strip-components=1 + tar -xzvf $CTK_CACHE_FILENAME -C $CUDA_PATH --strip-components=1 ls -l $CUDA_PATH if [ ! -d "$CUDA_PATH/include" ]; then exit 1 From 8ec609063e8c69966c9f06410f1b06e4692091d5 Mon Sep 17 00:00:00 2001 From: Leo Fang Date: Sun, 8 Dec 2024 03:47:36 +0000 Subject: [PATCH 38/56] fix artifact parallel upload & lack of cache key --- .github/actions/setup/action.yml | 10 ++++++---- .github/actions/test/action.yml | 5 +++-- 2 files changed, 9 insertions(+), 6 deletions(-) diff --git a/.github/actions/setup/action.yml b/.github/actions/setup/action.yml index 2908399f..6c84d4c7 100644 --- a/.github/actions/setup/action.yml +++ b/.github/actions/setup/action.yml @@ -50,6 +50,7 @@ runs: - name: Set up CTK cache variable shell: bash --noprofile --norc -xeuo pipefail {0} run: | + echo "CTK_CACHE_KEY=mini-ctk-${{ inputs.cuda-version }}" >> $GITHUB_ENV echo "CTK_CACHE_FILENAME=mini-ctk-${{ inputs.cuda-version }}.tar.gz" >> $GITHUB_ENV - name: Download CTK cache @@ -57,8 +58,8 @@ runs: uses: actions/download-artifact@v4 continue-on-error: true with: - name: ${{ env.CTK_CACHE_FILENAME }} - path: . + name: ${{ env.CTK_CACHE_KEY }} + path: ./${{ env.CTK_CACHE_FILENAME }} - name: Get CUDA components if: ${{ steps.ctk-get-cache.outcome == 'failure' }} @@ -123,9 +124,10 @@ runs: - name: Upload CTK cache if: ${{ steps.ctk-get-cache.outcome == 'failure' }} uses: actions/upload-artifact@v4 + continue-on-error: true with: - pattern: ${{ env.CTK_CACHE_FILENAME }} - path: . + name: ${{ env.CTK_CACHE_KEY }} + path: ./${{ env.CTK_CACHE_FILENAME }} if-no-files-found: error - name: Restore CTK cache diff --git a/.github/actions/test/action.yml b/.github/actions/test/action.yml index 559d05fe..4fab178f 100644 --- a/.github/actions/test/action.yml +++ b/.github/actions/test/action.yml @@ -46,6 +46,7 @@ runs: - name: Set up CTK cache variable shell: bash --noprofile --norc -xeuo pipefail {0} run: | + echo "CTK_CACHE_KEY=mini-ctk-${{ inputs.cuda-version }}" >> $GITHUB_ENV echo "CTK_CACHE_FILENAME=mini-ctk-${{ inputs.cuda-version }}.tar.gz" >> $GITHUB_ENV - name: Download CTK cache @@ -53,8 +54,8 @@ runs: uses: actions/download-artifact@v4 continue-on-error: true with: - name: ${{ env.CTK_CACHE_FILENAME }} - path: . + name: ${{ env.CTK_CACHE_KEY }} + path: ./${{ env.CTK_CACHE_FILENAME }} - name: Restore CTK cache shell: bash --noprofile --norc -xeuo pipefail {0} From 94561772a8ab10df6f8daac424eff8b085ae6b08 Mon Sep 17 00:00:00 2001 From: Leo Fang Date: Sun, 8 Dec 2024 03:51:50 +0000 Subject: [PATCH 39/56] fix download path --- .github/actions/setup/action.yml | 3 ++- .github/actions/test/action.yml | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/.github/actions/setup/action.yml b/.github/actions/setup/action.yml index 6c84d4c7..2c27ce94 100644 --- a/.github/actions/setup/action.yml +++ b/.github/actions/setup/action.yml @@ -59,7 +59,7 @@ runs: continue-on-error: true with: name: ${{ env.CTK_CACHE_KEY }} - path: ./${{ env.CTK_CACHE_FILENAME }} + path: ./ - name: Get CUDA components if: ${{ steps.ctk-get-cache.outcome == 'failure' }} @@ -136,6 +136,7 @@ runs: run: | CUDA_PATH="./cuda_toolkit" mkdir $CUDA_PATH + ls -l tar -xzvf $CTK_CACHE_FILENAME -C $CUDA_PATH --strip-components=1 ls -l $CUDA_PATH if [ ! -d "$CUDA_PATH/include" ]; then diff --git a/.github/actions/test/action.yml b/.github/actions/test/action.yml index 4fab178f..a1480810 100644 --- a/.github/actions/test/action.yml +++ b/.github/actions/test/action.yml @@ -55,13 +55,14 @@ runs: continue-on-error: true with: name: ${{ env.CTK_CACHE_KEY }} - path: ./${{ env.CTK_CACHE_FILENAME }} + path: ./ - name: Restore CTK cache shell: bash --noprofile --norc -xeuo pipefail {0} run: | CUDA_PATH="./cuda_toolkit" mkdir $CUDA_PATH + ls -l tar -xzvf $CTK_CACHE_FILENAME -C $CUDA_PATH --strip-components=1 ls -l $CUDA_PATH if [ ! -d "$CUDA_PATH/include" ]; then From 2f2046d1e5f7f149fa756e7dbcc6468c81cfe98f Mon Sep 17 00:00:00 2001 From: Leo Fang Date: Sun, 8 Dec 2024 04:02:19 +0000 Subject: [PATCH 40/56] fix extract --- .github/actions/setup/action.yml | 5 ++--- .github/actions/test/action.yml | 5 ++--- 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/.github/actions/setup/action.yml b/.github/actions/setup/action.yml index 2c27ce94..22cd8121 100644 --- a/.github/actions/setup/action.yml +++ b/.github/actions/setup/action.yml @@ -134,10 +134,9 @@ runs: if: ${{ steps.ctk-get-cache.outcome == 'success' }} shell: bash --noprofile --norc -xeuo pipefail {0} run: | - CUDA_PATH="./cuda_toolkit" - mkdir $CUDA_PATH ls -l - tar -xzvf $CTK_CACHE_FILENAME -C $CUDA_PATH --strip-components=1 + CUDA_PATH="./cuda_toolkit" + tar -xzvf $CTK_CACHE_FILENAME ls -l $CUDA_PATH if [ ! -d "$CUDA_PATH/include" ]; then exit 1 diff --git a/.github/actions/test/action.yml b/.github/actions/test/action.yml index a1480810..e013b1d2 100644 --- a/.github/actions/test/action.yml +++ b/.github/actions/test/action.yml @@ -60,10 +60,9 @@ runs: - name: Restore CTK cache shell: bash --noprofile --norc -xeuo pipefail {0} run: | - CUDA_PATH="./cuda_toolkit" - mkdir $CUDA_PATH ls -l - tar -xzvf $CTK_CACHE_FILENAME -C $CUDA_PATH --strip-components=1 + CUDA_PATH="./cuda_toolkit" + tar -xzvf $CTK_CACHE_FILENAME ls -l $CUDA_PATH if [ ! -d "$CUDA_PATH/include" ]; then exit 1 From c361ad8f752af8a6e0a30b3299937b7a86c3244a Mon Sep 17 00:00:00 2001 From: Leo Fang Date: Sun, 8 Dec 2024 04:13:46 +0000 Subject: [PATCH 41/56] propagate cuda-version --- .github/actions/test/action.yml | 4 ++-- .github/workflows/gh-build-and-test.yml | 1 + 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/.github/actions/test/action.yml b/.github/actions/test/action.yml index e013b1d2..2703af16 100644 --- a/.github/actions/test/action.yml +++ b/.github/actions/test/action.yml @@ -46,8 +46,8 @@ runs: - name: Set up CTK cache variable shell: bash --noprofile --norc -xeuo pipefail {0} run: | - echo "CTK_CACHE_KEY=mini-ctk-${{ inputs.cuda-version }}" >> $GITHUB_ENV - echo "CTK_CACHE_FILENAME=mini-ctk-${{ inputs.cuda-version }}.tar.gz" >> $GITHUB_ENV + echo "CTK_CACHE_KEY=mini-ctk-${CTK_BUILD_VER}" >> $GITHUB_ENV + echo "CTK_CACHE_FILENAME=mini-ctk-${CTK_BUILD_VER}.tar.gz" >> $GITHUB_ENV - name: Download CTK cache id: ctk-get-cache diff --git a/.github/workflows/gh-build-and-test.yml b/.github/workflows/gh-build-and-test.yml index d8dce525..556c47f2 100644 --- a/.github/workflows/gh-build-and-test.yml +++ b/.github/workflows/gh-build-and-test.yml @@ -120,3 +120,4 @@ jobs: CUDA_BINDINGS_ARTIFACT_NAME: ${{ needs.build.outputs.CUDA_BINDINGS_ARTIFACT_NAME }} CUDA_BINDINGS_ARTIFACTS_DIR: ${{ needs.build.outputs.CUDA_BINDINGS_ARTIFACTS_DIR }} PYTHON_VERSION: ${{ inputs.python-version }} + CTK_BUILD_VER: ${{ inputs.cuda-version }} From 37a3bb15f92adc6365545a72e429a4388caaba79 Mon Sep 17 00:00:00 2001 From: Leo Fang Date: Sat, 7 Dec 2024 23:26:36 -0500 Subject: [PATCH 42/56] install binding test deps --- .github/actions/test/action.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/actions/test/action.yml b/.github/actions/test/action.yml index 2703af16..1328cbf2 100644 --- a/.github/actions/test/action.yml +++ b/.github/actions/test/action.yml @@ -85,6 +85,7 @@ runs: pip install *.whl cd "${REPO_DIR}/cuda_bindings" + pip install -r requirements.txt pytest tests/ #pytest tests/cython From 54707dcadbd55d263f2f9300f48d642989470434 Mon Sep 17 00:00:00 2001 From: Leo Fang Date: Sat, 7 Dec 2024 23:41:12 -0500 Subject: [PATCH 43/56] fix paths --- .github/actions/test/action.yml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/.github/actions/test/action.yml b/.github/actions/test/action.yml index 1328cbf2..a04e9ccf 100644 --- a/.github/actions/test/action.yml +++ b/.github/actions/test/action.yml @@ -61,14 +61,13 @@ runs: shell: bash --noprofile --norc -xeuo pipefail {0} run: | ls -l - CUDA_PATH="./cuda_toolkit" + CUDA_PATH="$(pwd)/cuda_toolkit" tar -xzvf $CTK_CACHE_FILENAME ls -l $CUDA_PATH if [ ! -d "$CUDA_PATH/include" ]; then exit 1 fi - # TODO: check if we really need these for tests? echo "CUDA_PATH=$CUDA_PATH" >> $GITHUB_ENV echo "PATH=$PATH:$CUDA_PATH/bin" >> $GITHUB_ENV echo "LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$CUDA_PATH/lib" >> $GITHUB_ENV From 3faf8a3da359d8af3483226b5dee33eb330f0afd Mon Sep 17 00:00:00 2001 From: Leo Fang Date: Sat, 7 Dec 2024 23:56:05 -0500 Subject: [PATCH 44/56] include nvjitlink to mini CTK for testing --- .github/actions/setup/action.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/actions/setup/action.yml b/.github/actions/setup/action.yml index 22cd8121..bed9a63d 100644 --- a/.github/actions/setup/action.yml +++ b/.github/actions/setup/action.yml @@ -112,6 +112,7 @@ runs: populate_cuda_path cuda_cudart populate_cuda_path cuda_nvrtc populate_cuda_path cuda_profiler_api + populate_cuda_path libnvjitlink ls -l $CUDA_PATH # Prepare the cache From e35706aa13c699c91e2b6076310ba75e4dc2228b Mon Sep 17 00:00:00 2001 From: Leo Fang Date: Sun, 8 Dec 2024 05:19:14 +0000 Subject: [PATCH 45/56] ensure cupy is an optional test dependency --- cuda_core/tests/example_tests/utils.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/cuda_core/tests/example_tests/utils.py b/cuda_core/tests/example_tests/utils.py index 3d218a91..81479903 100644 --- a/cuda_core/tests/example_tests/utils.py +++ b/cuda_core/tests/example_tests/utils.py @@ -10,7 +10,6 @@ import os import sys -import cupy as cp import pytest @@ -53,4 +52,3 @@ def run_example(samples_path, filename, env=None): sys.argv = old_argv # further reduce the memory watermark gc.collect() - cp.get_default_memory_pool().free_all_blocks() From e0c610e08ec20fa94f3d6c2fa39964c5597fa15a Mon Sep 17 00:00:00 2001 From: Leo Fang Date: Sun, 8 Dec 2024 05:31:07 +0000 Subject: [PATCH 46/56] per arch ctk --- .github/actions/setup/action.yml | 4 ++-- .github/actions/test/action.yml | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/actions/setup/action.yml b/.github/actions/setup/action.yml index bed9a63d..2416fcf4 100644 --- a/.github/actions/setup/action.yml +++ b/.github/actions/setup/action.yml @@ -50,8 +50,8 @@ runs: - name: Set up CTK cache variable shell: bash --noprofile --norc -xeuo pipefail {0} run: | - echo "CTK_CACHE_KEY=mini-ctk-${{ inputs.cuda-version }}" >> $GITHUB_ENV - echo "CTK_CACHE_FILENAME=mini-ctk-${{ inputs.cuda-version }}.tar.gz" >> $GITHUB_ENV + echo "CTK_CACHE_KEY=mini-ctk-${{ inputs.cuda-version }}-${{ inputs.host-platform }}" >> $GITHUB_ENV + echo "CTK_CACHE_FILENAME=mini-ctk-${{ inputs.cuda-version }}-${{ inputs.host-platform }}.tar.gz" >> $GITHUB_ENV - name: Download CTK cache id: ctk-get-cache diff --git a/.github/actions/test/action.yml b/.github/actions/test/action.yml index a04e9ccf..0a1e621a 100644 --- a/.github/actions/test/action.yml +++ b/.github/actions/test/action.yml @@ -46,8 +46,8 @@ runs: - name: Set up CTK cache variable shell: bash --noprofile --norc -xeuo pipefail {0} run: | - echo "CTK_CACHE_KEY=mini-ctk-${CTK_BUILD_VER}" >> $GITHUB_ENV - echo "CTK_CACHE_FILENAME=mini-ctk-${CTK_BUILD_VER}.tar.gz" >> $GITHUB_ENV + echo "CTK_CACHE_KEY=mini-ctk-${CTK_BUILD_VER}-${{ inputs.host-platform }}" >> $GITHUB_ENV + echo "CTK_CACHE_FILENAME=mini-ctk-${CTK_BUILD_VER}-${{ inputs.host-platform }}.tar.gz" >> $GITHUB_ENV - name: Download CTK cache id: ctk-get-cache From fb487d0fedb9b91ef2d73024d407a9384aa657d3 Mon Sep 17 00:00:00 2001 From: Leo Fang Date: Sun, 8 Dec 2024 05:41:57 +0000 Subject: [PATCH 47/56] fix arg passing --- .github/actions/test/action.yml | 4 ++-- .github/workflows/gh-build-and-test.yml | 3 ++- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/.github/actions/test/action.yml b/.github/actions/test/action.yml index 0a1e621a..c82a8450 100644 --- a/.github/actions/test/action.yml +++ b/.github/actions/test/action.yml @@ -46,8 +46,8 @@ runs: - name: Set up CTK cache variable shell: bash --noprofile --norc -xeuo pipefail {0} run: | - echo "CTK_CACHE_KEY=mini-ctk-${CTK_BUILD_VER}-${{ inputs.host-platform }}" >> $GITHUB_ENV - echo "CTK_CACHE_FILENAME=mini-ctk-${CTK_BUILD_VER}-${{ inputs.host-platform }}.tar.gz" >> $GITHUB_ENV + echo "CTK_CACHE_KEY=mini-ctk-${CTK_BUILD_VER}-${HOST_PLATFORM}" >> $GITHUB_ENV + echo "CTK_CACHE_FILENAME=mini-ctk-${CTK_BUILD_VER}-${HOST_PLATFORM}.tar.gz" >> $GITHUB_ENV - name: Download CTK cache id: ctk-get-cache diff --git a/.github/workflows/gh-build-and-test.yml b/.github/workflows/gh-build-and-test.yml index 556c47f2..06f6a168 100644 --- a/.github/workflows/gh-build-and-test.yml +++ b/.github/workflows/gh-build-and-test.yml @@ -75,7 +75,7 @@ jobs: test: # TODO: improve the name once a separate test matrix is defined - name: Test (CUDA ${{ inputs.cuda-version }}, Use container=${{ inputs.use-container }}) + name: Test (CUDA ${{ inputs.cuda-version }}) # TODO: enable testing once linux-aarch64 & win-64 GPU runners are up if: ${{ (github.repository_owner == 'nvidia') && startsWith(inputs.host-platform, 'linux-x64') }} @@ -121,3 +121,4 @@ jobs: CUDA_BINDINGS_ARTIFACTS_DIR: ${{ needs.build.outputs.CUDA_BINDINGS_ARTIFACTS_DIR }} PYTHON_VERSION: ${{ inputs.python-version }} CTK_BUILD_VER: ${{ inputs.cuda-version }} + HOST_PLATFORM: ${{ inputs.host-platform }} From c01e015c26d8d7f0e775865559d69e8aa3d7e823 Mon Sep 17 00:00:00 2001 From: Leo Fang Date: Sun, 8 Dec 2024 05:55:48 +0000 Subject: [PATCH 48/56] fix invalid context during test teardown --- cuda_core/tests/conftest.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/cuda_core/tests/conftest.py b/cuda_core/tests/conftest.py index 59e5883f..b67eeec2 100644 --- a/cuda_core/tests/conftest.py +++ b/cuda_core/tests/conftest.py @@ -30,6 +30,10 @@ def init_cuda(): def _device_unset_current(): + ctx = handle_return(driver.cuCtxGetCurrent()) + if int(ctx) == 0: + # no active context, do nothing + return handle_return(driver.cuCtxPopCurrent()) with _device._tls_lock: del _device._tls.devices From f1d0e4027231adfbd44a6e70070d939e5e129d97 Mon Sep 17 00:00:00 2001 From: Leo Fang Date: Sun, 8 Dec 2024 06:09:32 +0000 Subject: [PATCH 49/56] WAR: mark PTX test xfail due to CI condition --- cuda_core/tests/test_program.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/cuda_core/tests/test_program.py b/cuda_core/tests/test_program.py index 95c4d377..562f89de 100644 --- a/cuda_core/tests/test_program.py +++ b/cuda_core/tests/test_program.py @@ -8,10 +8,20 @@ import pytest +from cuda import cuda, nvrtc from cuda.core.experimental import Program from cuda.core.experimental._module import Kernel, ObjectCode +@pytest.fixture +def can_load_generated_ptx(): + _, driver_ver = cuda.cuDriverGetVersion() + _, nvrtc_major, nvrtc_minor = nvrtc.nvrtcVersion() + if nvrtc_major * 1000 + nvrtc_minor * 10 > driver_ver: + return False + return True + + def test_program_init_valid_code_type(): code = 'extern "C" __global__ void my_kernel() {}' program = Program(code, "c++") @@ -31,6 +41,8 @@ def test_program_init_invalid_code_format(): Program(code, "c++") +# TODO: incorporate this check in Program +@pytest.mark.xfail(not can_load_generated_ptx, reason="PTX version too new") def test_program_compile_valid_target_type(): code = 'extern "C" __global__ void my_kernel() {}' program = Program(code, "c++") From a08fbc94bb811298a3d9a190bd7d8cc6c72c1fbc Mon Sep 17 00:00:00 2001 From: Leo Fang Date: Sun, 8 Dec 2024 01:34:16 -0500 Subject: [PATCH 50/56] debug --- .github/actions/test/action.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/actions/test/action.yml b/.github/actions/test/action.yml index c82a8450..78572917 100644 --- a/.github/actions/test/action.yml +++ b/.github/actions/test/action.yml @@ -89,4 +89,4 @@ runs: #pytest tests/cython cd "${REPO_DIR}/cuda_core" - pytest tests/ + pytest -rxXs tests/ From f36393e65c80d667f9eeed311d52038f0026da7c Mon Sep 17 00:00:00 2001 From: Leo Fang Date: Sun, 8 Dec 2024 15:15:13 +0000 Subject: [PATCH 51/56] also detect if CUDA is ever initialized --- cuda_core/tests/conftest.py | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/cuda_core/tests/conftest.py b/cuda_core/tests/conftest.py index b67eeec2..9c8ed52b 100644 --- a/cuda_core/tests/conftest.py +++ b/cuda_core/tests/conftest.py @@ -18,7 +18,7 @@ import pytest from cuda.core.experimental import Device, _device -from cuda.core.experimental._utils import handle_return +from cuda.core.experimental._utils import CUDAError, handle_return @pytest.fixture(scope="function") @@ -30,10 +30,15 @@ def init_cuda(): def _device_unset_current(): - ctx = handle_return(driver.cuCtxGetCurrent()) - if int(ctx) == 0: - # no active context, do nothing - return + try: + ctx = handle_return(driver.cuCtxGetCurrent()) + except CUDAError as e: + if "CUDA_ERROR_NOT_INITIALIZED" in str(e): + return + else: + if int(ctx) == 0: + # no active context, do nothing + return handle_return(driver.cuCtxPopCurrent()) with _device._tls_lock: del _device._tls.devices From f3cc6bde2b575bb9f1a4bfe785e345cf57a575a1 Mon Sep 17 00:00:00 2001 From: Leo Fang Date: Sun, 8 Dec 2024 16:26:34 +0000 Subject: [PATCH 52/56] ensure CUDA is init'd at test start time --- cuda_core/tests/conftest.py | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/cuda_core/tests/conftest.py b/cuda_core/tests/conftest.py index 9c8ed52b..58cc7cef 100644 --- a/cuda_core/tests/conftest.py +++ b/cuda_core/tests/conftest.py @@ -21,8 +21,14 @@ from cuda.core.experimental._utils import CUDAError, handle_return +@pytest.fixture(scope="session", autouse=True) +def always_init_cuda(): + handle_return(driver.cuInit(0)) + + @pytest.fixture(scope="function") def init_cuda(): + # TODO: rename this to e.g. init_context device = Device() device.set_current() yield @@ -30,15 +36,10 @@ def init_cuda(): def _device_unset_current(): - try: - ctx = handle_return(driver.cuCtxGetCurrent()) - except CUDAError as e: - if "CUDA_ERROR_NOT_INITIALIZED" in str(e): - return - else: - if int(ctx) == 0: - # no active context, do nothing - return + ctx = handle_return(driver.cuCtxGetCurrent()) + if int(ctx) == 0: + # no active context, do nothing + return handle_return(driver.cuCtxPopCurrent()) with _device._tls_lock: del _device._tls.devices @@ -46,6 +47,7 @@ def _device_unset_current(): @pytest.fixture(scope="function") def deinit_cuda(): + # TODO: rename this to e.g. deinit_context yield _device_unset_current() From b1f07a38f60c5aa70cd71048b79b7efe6b934094 Mon Sep 17 00:00:00 2001 From: Leo Fang Date: Sun, 8 Dec 2024 16:41:15 +0000 Subject: [PATCH 53/56] enforce the right CC is passed to NVRTC --- cuda_core/tests/conftest.py | 2 +- cuda_core/tests/test_program.py | 6 ++++-- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/cuda_core/tests/conftest.py b/cuda_core/tests/conftest.py index 58cc7cef..fe755738 100644 --- a/cuda_core/tests/conftest.py +++ b/cuda_core/tests/conftest.py @@ -18,7 +18,7 @@ import pytest from cuda.core.experimental import Device, _device -from cuda.core.experimental._utils import CUDAError, handle_return +from cuda.core.experimental._utils import handle_return @pytest.fixture(scope="session", autouse=True) diff --git a/cuda_core/tests/test_program.py b/cuda_core/tests/test_program.py index 562f89de..10789856 100644 --- a/cuda_core/tests/test_program.py +++ b/cuda_core/tests/test_program.py @@ -9,7 +9,7 @@ import pytest from cuda import cuda, nvrtc -from cuda.core.experimental import Program +from cuda.core.experimental import Device, Program from cuda.core.experimental._module import Kernel, ObjectCode @@ -46,7 +46,9 @@ def test_program_init_invalid_code_format(): def test_program_compile_valid_target_type(): code = 'extern "C" __global__ void my_kernel() {}' program = Program(code, "c++") - object_code = program.compile("ptx") + arch = "".join(str(i) for i in Device().compute_capability) + object_code = program.compile("ptx", options=(f"-arch=compute_{arch}",)) + print(object_code._module.decode()) kernel = object_code.get_kernel("my_kernel") assert isinstance(object_code, ObjectCode) assert isinstance(kernel, Kernel) From 8a6738b3aa3d265933a68f58b3b891ee8b7c196a Mon Sep 17 00:00:00 2001 From: Leo Fang Date: Sun, 8 Dec 2024 12:10:57 -0500 Subject: [PATCH 54/56] fix xfail mark --- cuda_core/tests/test_program.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/cuda_core/tests/test_program.py b/cuda_core/tests/test_program.py index 10789856..f1c24b3e 100644 --- a/cuda_core/tests/test_program.py +++ b/cuda_core/tests/test_program.py @@ -13,7 +13,6 @@ from cuda.core.experimental._module import Kernel, ObjectCode -@pytest.fixture def can_load_generated_ptx(): _, driver_ver = cuda.cuDriverGetVersion() _, nvrtc_major, nvrtc_minor = nvrtc.nvrtcVersion() @@ -42,7 +41,7 @@ def test_program_init_invalid_code_format(): # TODO: incorporate this check in Program -@pytest.mark.xfail(not can_load_generated_ptx, reason="PTX version too new") +@pytest.mark.xfail(not can_load_generated_ptx(), reason="PTX version too new") def test_program_compile_valid_target_type(): code = 'extern "C" __global__ void my_kernel() {}' program = Program(code, "c++") From ed0386a33d9273e766063813c85828b0b5ffda54 Mon Sep 17 00:00:00 2001 From: Leo Fang Date: Sun, 8 Dec 2024 22:42:25 +0000 Subject: [PATCH 55/56] switch to use github cache to improve reuse --- .github/actions/setup/action.yml | 19 +++++++++---------- .github/actions/test/action.yml | 7 ++++--- 2 files changed, 13 insertions(+), 13 deletions(-) diff --git a/.github/actions/setup/action.yml b/.github/actions/setup/action.yml index 2416fcf4..084f4c2f 100644 --- a/.github/actions/setup/action.yml +++ b/.github/actions/setup/action.yml @@ -55,14 +55,14 @@ runs: - name: Download CTK cache id: ctk-get-cache - uses: actions/download-artifact@v4 + uses: actions/cache/restore@v4 continue-on-error: true with: - name: ${{ env.CTK_CACHE_KEY }} - path: ./ + key: ${{ env.CTK_CACHE_KEY }} + path: ./${{ env.CTK_CACHE_FILENAME }} - name: Get CUDA components - if: ${{ steps.ctk-get-cache.outcome == 'failure' }} + if: ${{ steps.ctk-get-cache.outputs.cache-hit != 'true' }} shell: bash --noprofile --norc -xeuo pipefail {0} run: | CUDA_PATH="./cuda_toolkit" @@ -123,16 +123,15 @@ runs: # so setting the CUDA_PATH env var here is meaningless. - name: Upload CTK cache - if: ${{ steps.ctk-get-cache.outcome == 'failure' }} - uses: actions/upload-artifact@v4 - continue-on-error: true + if: ${{ always() && + steps.ctk-get-cache.outputs.cache-hit != 'true' }} + uses: actions/cache/save@v4 with: - name: ${{ env.CTK_CACHE_KEY }} + key: ${{ env.CTK_CACHE_KEY }} path: ./${{ env.CTK_CACHE_FILENAME }} - if-no-files-found: error - name: Restore CTK cache - if: ${{ steps.ctk-get-cache.outcome == 'success' }} + if: ${{ steps.ctk-get-cache.outputs.cache-hit == 'true' }} shell: bash --noprofile --norc -xeuo pipefail {0} run: | ls -l diff --git a/.github/actions/test/action.yml b/.github/actions/test/action.yml index 78572917..66468bd1 100644 --- a/.github/actions/test/action.yml +++ b/.github/actions/test/action.yml @@ -51,11 +51,12 @@ runs: - name: Download CTK cache id: ctk-get-cache - uses: actions/download-artifact@v4 + uses: actions/cache/restore@v4 continue-on-error: true with: - name: ${{ env.CTK_CACHE_KEY }} - path: ./ + key: ${{ env.CTK_CACHE_KEY }} + path: ./${{ env.CTK_CACHE_FILENAME }} + fail-on-cache-miss: true - name: Restore CTK cache shell: bash --noprofile --norc -xeuo pipefail {0} From 7b074f03794bf3cb2b6cabdf2d77fd7860cbcb86 Mon Sep 17 00:00:00 2001 From: Leo Fang Date: Sun, 8 Dec 2024 22:54:45 +0000 Subject: [PATCH 56/56] clean up legacy CI scripts --- continuous_integration/environment.yml | 24 --- continuous_integration/scripts/build | 32 ---- continuous_integration/scripts/conda-utils | 16 -- continuous_integration/scripts/entrypoint | 20 -- .../scripts/generate-environment | 36 ---- continuous_integration/scripts/make-conda-env | 27 --- continuous_integration/scripts/setup-utils | 179 ------------------ continuous_integration/scripts/test | 38 ---- 8 files changed, 372 deletions(-) delete mode 100644 continuous_integration/environment.yml delete mode 100755 continuous_integration/scripts/build delete mode 100755 continuous_integration/scripts/conda-utils delete mode 100755 continuous_integration/scripts/entrypoint delete mode 100755 continuous_integration/scripts/generate-environment delete mode 100755 continuous_integration/scripts/make-conda-env delete mode 100755 continuous_integration/scripts/setup-utils delete mode 100755 continuous_integration/scripts/test diff --git a/continuous_integration/environment.yml b/continuous_integration/environment.yml deleted file mode 100644 index 6d922d43..00000000 --- a/continuous_integration/environment.yml +++ /dev/null @@ -1,24 +0,0 @@ -name: cuda_python -channels: - - defaults -dependencies: - - python>=3.10 - - cython>=3.0.0 - - pytest>=6.2.4 - - numpy>=1.21.1 - - setuptools - - wheel - - pip - - cuda-version=12.6 - - cuda-cudart-static - - cuda-driver-dev - - cuda-cudart-dev - - cuda-profiler-api - - cuda-nvrtc-dev - - cuda-nvcc - - pip: - - pytest-benchmark>=3.4.1 - - pyclibrary>=0.1.7 - - versioneer==0.29 - - tomli; python_version < "3.11" - - pywin32; sys_platform == 'win32' diff --git a/continuous_integration/scripts/build b/continuous_integration/scripts/build deleted file mode 100755 index 618edd5f..00000000 --- a/continuous_integration/scripts/build +++ /dev/null @@ -1,32 +0,0 @@ -#!/usr/bin/env bash - -build_ci() { - set -xeou pipefail - - export CUDA_HOME="${CONDA_PREFIX}/targets/x86_64-linux" - export PARALLEL_LEVEL=$(nproc --ignore 1) - - cd "${REPO_DIR}/cuda_bindings" - python setup.py bdist_wheel - - cd "${REPO_DIR}/cuda_core" - python setup.py bdist_wheel -} - -build_project() { - set -xeou pipefail - - export PYTHONUNBUFFERED=1 - - . setup-utils; - init_build_env "$@"; - - git config --global --add safe.directory "$REPO_DIR/.git" - - case "${BUILD_TYPE}" in - ci) build_ci;; - *) return 1;; - esac -} - -(build_project "$@"); diff --git a/continuous_integration/scripts/conda-utils b/continuous_integration/scripts/conda-utils deleted file mode 100755 index e0dd32ca..00000000 --- a/continuous_integration/scripts/conda-utils +++ /dev/null @@ -1,16 +0,0 @@ -#!/usr/bin/env bash - -activate_conda_env() { - set +xu - eval "$(conda shell.bash hook)" - conda activate "${CONDA_ENV}"; - set -xu - : ${PYTHON_VERSION:=$(python -c "import sys; print(f'{sys.version_info.major}.{sys.version_info.minor}')")} - export PYTHON_VERSION -} - -conda_info() { - set +x - conda info - set -x -} diff --git a/continuous_integration/scripts/entrypoint b/continuous_integration/scripts/entrypoint deleted file mode 100755 index fe4f5cea..00000000 --- a/continuous_integration/scripts/entrypoint +++ /dev/null @@ -1,20 +0,0 @@ -#!/usr/bin/env bash - -set_initial_env() { - set -xeuo pipefail - - export PATH="${PATH}:${REPO_DIR}/continuous_integration/scripts" -} - -entrypoint() { - set -xeuo pipefail - set_initial_env; - - git config --global --add safe.directory "$REPO_DIR/.git" - - cd "${REPO_DIR}" - - exec "$@"; -} - -entrypoint "$@"; diff --git a/continuous_integration/scripts/generate-environment b/continuous_integration/scripts/generate-environment deleted file mode 100755 index 8bf2c38d..00000000 --- a/continuous_integration/scripts/generate-environment +++ /dev/null @@ -1,36 +0,0 @@ -#!/usr/bin/env bash - -# Function to generate environment.yml -generate_environment_yml() { - local python_version=$1 - local cuda_version=$2 - local output_path=$3 - - cat < "${output_path}/environment.yml" -name: cuda_python -channels: - - defaults - - conda-forge -dependencies: - - python=${python_version} - - cython - - pytest - - numpy - - setuptools - - wheel - - pip - - cuda-version=${cuda_version} - - cuda-cudart-static - - cuda-driver-dev - - cuda-cudart-dev - - cuda-profiler-api - - cuda-nvrtc-dev - - cuda-nvcc - - pip: - - pytest-benchmark - - pyclibrary - - versioneer==0.29 - - tomli; python_version < "3.11" - - pywin32; sys_platform == 'win32' -EOF -} \ No newline at end of file diff --git a/continuous_integration/scripts/make-conda-env b/continuous_integration/scripts/make-conda-env deleted file mode 100755 index 37539b37..00000000 --- a/continuous_integration/scripts/make-conda-env +++ /dev/null @@ -1,27 +0,0 @@ -#!/usr/bin/env bash - -set -x - -make_ci_env() { - #TODO wire cuda version as a top level matrix argument - generate_environment_yml "${PYTHON_VERSION}" 12.6 . - mamba env create -n "${CONDA_ENV}" -f ./environment.yml -} - -make_conda_env() { - set -xeuo pipefail - - . setup-utils; - . generate-environment - set_base_defs; - - case "$1" in - ci) make_ci_env;; - test) make_test_env;; - *) return 1;; - esac - - return 0; -} - -(make_conda_env "$@"); diff --git a/continuous_integration/scripts/setup-utils b/continuous_integration/scripts/setup-utils deleted file mode 100755 index f8faefa4..00000000 --- a/continuous_integration/scripts/setup-utils +++ /dev/null @@ -1,179 +0,0 @@ -#!/usr/bin/env bash - -install_from_apt() { - set -xeuo pipefail - - export DEBIAN_FRONTEND=non-interactive - - # Run package updates and install packages - apt-get -q update - apt-get -q install -y wget curl jq sudo ninja-build vim numactl rsync -} - -install_cmake() { - set -xeuo pipefail - - wget -q https://github.com/Kitware/CMake/releases/download/v3.26.5/cmake-3.26.5-linux-x86_64.tar.gz - - tar -xzf cmake-3.26.5-linux-x86_64.tar.gz -} - -setup_linux_build_env() { - set -xeuo pipefail - export OS_SHORT_NAME=linux - export PATH="${PATH}:${PREBUILD_DIR}/cmake-3.26.5-linux-x86_64/bin" - - mkdir -p /tmp/out /tmp/env_yaml -} - -install_linux_tools() { - set -xeuo pipefail - - export SED=sed - export READLINK=readlink - - install_from_apt; - install_cmake; - - mkdir -p /tmp/out /tmp/env_yaml -} - -install_linux_test_tools() { - set -xeuo pipefail - - export SED=sed - export READLINK=readlink - - # Run package updates and install packages - apt-get -q update - apt-get -q install -y numactl -} - -set_base_defs() { - set -xeuo pipefail - - export CONDA_ENV=cuda_python - - CONDA_PLATFORM=$(conda info | grep 'platform' | awk -F ' : ' '{print $2}') - export CONDA_PLATFORM - - export PREBUILD_DIR=/tmp/prebuild - mkdir -p "$PREBUILD_DIR" - - export BUILD_DIR="$REPO_DIR/build" - - # Get the machine architecture - ARCH=$(uname -m) - - if [ "$ARCH" == "aarch64" ]; then - # Use the gcc march value used by aarch64 Ubuntu. - BUILD_MARCH=armv8-a - else - # Use uname -m otherwise - BUILD_MARCH=$(uname -m | tr '_' '-') - fi - - export BUILD_MARCH - - export CUDA_VERSION=12.2.2 - - export MAX_LIBSANITIZER_VERSION=11.4 - - export USE_OPENMP=ON -} - -# ----------------------------------------------------------------------------- - -prep_git() { - # Temporarily disable exit on error - set +e - git config --global user.email > /dev/null - local email_exit_status=$? - git config --global user.name > /dev/null - local name_exit_status=$? - # Re-enable exit on error - set -e - - if [ $email_exit_status -ne 0 ]; then - git config --global --add user.email "users.noreply.github.com" - echo "git user.email was not set. It's now set to users.noreply.github.com" - else - echo "Note: git user.email is already set." - fi - - if [ $name_exit_status -ne 0 ]; then - git config --global --add user.name "anon" - echo "git user.name was not set. It's now set to anon" - else - echo "Note: git user.name is already set." - fi - - # Fix "fatal: detected dubious ownership in repository at '/tmp/legate.core'" - # during local builds. - git config --global --add safe.directory "$REPO_DIR" -} - - -setup_build_env() { - set -xeuo pipefail - - install_linux_tools; - - setup_linux_build_env; - - rm -rf "$PREBUILD_DIR" - mkdir -p "$PREBUILD_DIR" - cd $PREBUILD_DIR - - prep_git; -} - -init_build_env() { - set -x; - - . conda-utils; - - export BUILD_TYPE=$1 - - set -xeuo pipefail; - - set_base_defs; - - cd "$PREBUILD_DIR" - - setup_build_env; - - cd "$REPO_DIR"; - - if [[ -d "${BUILD_DIR}" ]]; then - rm -rf "${BUILD_DIR}" - fi - - make-conda-env "$BUILD_TYPE"; - - activate_conda_env; - conda_info; -} - -init_test_env() { - set -x; - - . conda-utils; - - export TEST_TYPE=$1 - - set -xeuo pipefail; - - set_base_defs; - - cd "$PREBUILD_DIR" - - # setup_test_env; - - cd "$REPO_DIR"; - - make-conda-env "$TEST_TYPE"; - - activate_conda_env; - conda_info; -} \ No newline at end of file diff --git a/continuous_integration/scripts/test b/continuous_integration/scripts/test deleted file mode 100755 index 3a705c3c..00000000 --- a/continuous_integration/scripts/test +++ /dev/null @@ -1,38 +0,0 @@ -#!/usr/bin/env bash - -test_ci() { - set -xeou pipefail - - activate_conda_env; - - cd "${BINDINGS_ARTIFACTS_DIR}" - pip install *.whl - - cd "${CORE_ARTIFACTS_DIR}" - pip install *.whl - - cd "${REPO_DIR}/cuda_core" - python -m pytest tests/ - - cd "${REPO_DIR}/cuda_bindings" - python -m pytest tests/ - -} - -test_project() { - set -xeou pipefail - - export PYTHONUNBUFFERED=1 - - . setup-utils; - init_test_env "$@"; - - git config --global --add safe.directory "$REPO_DIR/.git" - - case "${TEST_TYPE}" in - ci) test_ci;; - *) return 1;; - esac -} - -(test_project "$@");