Skip to content

nightly-build-and-test #154

nightly-build-and-test

nightly-build-and-test #154

name: nightly-build-and-test
on:
workflow_dispatch:
schedule:
- cron: '30 5 * * *' # Daily 5:30 AM UTC / 12:30 AM EST
defaults:
run:
shell: bash
permissions:
contents: read
jobs:
sdk-build-and-test:
name: Build and Test PTI Lib
#
# pti is a selector to machines with Intel Ponte Vecchio officially
# Intel(R) Data Center GPU Max 1100
#
runs-on: [self-hosted, Linux, pti-2gpu]
continue-on-error: true
strategy:
max-parallel: 1
matrix:
include:
- container: ${{ vars.PTI_DOCKER_RHEL_9_ONEAPI_24_2_1 }}
uploadname: "rhel-9.2024.2.1"
preset: linux-icpx-release
- container: ${{ vars.PTI_DOCKER_SLES_15_3_ONEAPI_24_2_1 }}
uploadname: "sles-15.3.2024.2.1"
preset: linux-icpx-release
- container: ${{ vars.PTI_DOCKER_UBUNTU_24_4_ONEAPI_24_2_1 }}
uploadname: "ubuntu-24.04.2024.2.1"
preset: linux-icpx-release
- container: ${{ vars.PTI_DOCKER_ROCKY_8_ONEAPI_24_2_1 }}
uploadname: "rocky-8.2024.2.1"
preset: linux-icpx-release
- container: ${{ vars.PTI_DOCKER_RHEL_9_ONEAPI_25_0_0 }}
uploadname: "rhel-9.2025.0.0"
preset: linux-icpx-release
- container: ${{ vars.PTI_DOCKER_SLES_15_3_ONEAPI_25_0_0 }}
uploadname: "sles-15.3.2025.0.0"
preset: linux-icpx-release
- container: ${{ vars.PTI_DOCKER_UBUNTU_24_4_ONEAPI_25_0_0 }}
uploadname: "ubuntu-24.04.2025.0.0"
preset: linux-icpx-release
- container: ${{ vars.PTI_DOCKER_ROCKY_8_ONEAPI_25_0_0 }}
uploadname: "rocky-8.2025.0.0"
preset: linux-icpx-release
container:
image: ${{ matrix.container }}
options: --device=/dev/dri --cap-add CAP_PERFMON ${{ vars.PTI_DOCKER_PROXIES }}
if: vars.PTI_RUN_TESTS == 1
steps:
- name: Clean-up
run: rm -rf *
- name: Checkout
uses: actions/checkout@v4
- name: Build
working-directory: sdk
run: |
source /opt/intel/oneapi/setvars.sh
cmake --preset ${{ matrix.preset }}
cmake --build --preset ${{ matrix.preset }} -j $(($(nproc)/2))
- name: Test
working-directory: sdk
run: |
source /opt/intel/oneapi/setvars.sh
ONEAPI_VER=${{ matrix.uploadname }}
ctest --output-on-failure --preset ${{ matrix.preset }} \
${ONEAPI_VER/*2025\.0\.0*/-LE performance}
- name: Build AddressSanitizer
working-directory: sdk
run: |
source /opt/intel/oneapi/setvars.sh
cmake --preset linux-asan
cmake --build --preset linux-asan --parallel $(($(nproc)/2))
- name: Build ThreadSanitizer
working-directory: sdk
run: |
source /opt/intel/oneapi/setvars.sh
cmake --preset linux-tsan
cmake --build --preset linux-tsan --parallel $(($(nproc)/2))
- name: Build libFuzzer
working-directory: sdk
run: |
# To ensure it still builds, run build for fuzz targets until we have
# proper fuzz testing infrastructure in place.
source /opt/intel/oneapi/setvars.sh
cmake --preset linux-fuzz
cmake --build --preset linux-fuzz --parallel $(($(nproc)/2))
#
# Rocky8 and Red Hat has a problem with this test.
#
- name: Test AddressSanitizer
working-directory: sdk
run: |
source /opt/intel/oneapi/setvars.sh
#
# Must skip Rocky8 with more than one GPU because of
# an unexpected failure as documented in PTI-74
#
if [ $(sycl-ls|grep 'ext_oneapi_level_zero:gpu:' -c) -gt 1 ] || \
[[ $(cat /etc/redhat-release) =~ Rocky.*8 ]] || \
[[ $(cat /etc/redhat-release) =~ Red\ Hat.*9 ]]; then
[ -f /etc/redhat-release] && cat /etc/redhat-release
echo "Skipping Test AddressSanitizer"
exit 0
fi
ctest --preset linux-asan --output-on-failure -L samples
- name: Test ThreadSanitizer
working-directory: sdk
run: |
source /opt/intel/oneapi/setvars.sh
ctest --preset linux-tsan --output-on-failure -L samples
- name: Package SDK
working-directory: sdk
run: |
source /opt/intel/oneapi/setvars.sh
cpack -G TGZ --config build-${{ matrix.preset }}/CPackConfig.cmake
- name: Upload SDK
uses: actions/upload-artifact@v4
with:
name: "PTI-SDK-PKG-${{ matrix.preset }}-${{ matrix.uploadname }}"
path: |
sdk/pti*.tar.gz
sdk/pti*.tar.gz*
pti-tools-build-and-test:
if: vars.PTI_RUN_TESTS == 1
name: Build and Test PTI Tools
runs-on: [Linux, pti]
continue-on-error: true
strategy:
max-parallel: 2
matrix:
include:
- container: ${{ vars.PTI_DOCKER_IMAGE_2024_1_1 }}
- container: ${{ vars.PTI_DOCKER_IMAGE_2024_1_0 }}
- container: ${{ vars.PTI_DOCKER_IMAGE_ROCKY8_2024_1_1 }}
container:
image: ${{ matrix.container }}
options: --device=/dev/dri --cap-add CAP_PERFMON ${{ vars.PTI_DOCKER_PROXIES }}
steps:
- name: Clean-up
run: rm -rf *
- name: Checkout
uses: actions/checkout@v4
- name: Build-and-test-unitrace
run: |
source /opt/intel/oneapi/setvars.sh
python3 ./tests/run.py -s unitrace
- name: Build-and-test-onetrace
run: |
source /opt/intel/oneapi/setvars.sh
python3 ./tests/run.py -s onetrace
- name: Build-and-test-oneprof
run: |
source /opt/intel/oneapi/setvars.sh
#
# Must with more than one GPU because of
# an random failures as documented in PTI-75
#
if [ $(sycl-ls|grep 'ext_oneapi_level_zero:gpu:' -c) -gt 1 ]; then
exit 0
fi
python3 ./tests/run.py -s oneprof
- name: Build-and-test-sysmon
run: |
source /opt/intel/oneapi/setvars.sh
python3 ./tests/run.py -s sysmon
- name: Build-and-test-cl_gpu_metrics
run: |
source /opt/intel/oneapi/setvars.sh
python3 ./tests/run.py -s cl_gpu_metrics
- name: Build-and-test-cl_debug_info
run: |
source /opt/intel/oneapi/setvars.sh
python3 ./tests/run.py -s cl_debug_info
#
# Fix!!
# A problem with GLIBCXX_3.4.26 as noted in VASP-30704
#
- name: Build-and-test-instcount
if: vars.PTI_DOCKER_IMAGE_ROCKY8_2024_1_1 != matrix.container
run: |
source /opt/intel/oneapi/setvars.sh
python3 ./tests/run.py -s instcount
#
# TODO:
# Add code to ze_debug_info to handle std::experimental::filesystem case.
#
- name: Build-and-test-ze_debug_info
if: vars.PTI_DOCKER_IMAGE_ROCKY8_2024_1_1 != matrix.container
run: |
source /opt/intel/oneapi/setvars.sh
python3 ./tests/run.py -s ze_debug_info