From 76a34b405e6b4c8aea04226a62109082fa13b6ce Mon Sep 17 00:00:00 2001 From: Veera Rajasekhar Reddy Gopu Date: Thu, 15 Jan 2026 06:37:35 +0000 Subject: [PATCH 1/5] CI: add manual aiter prebuilt upload flow Add a workflow_dispatch GHA to build/upload aiter prebuilts using a chosen image and GPU arch list, reusing the shell script. Make ci/aiter_upload.sh handle build + package + upload with optional env-based upload, respecting GPU_ARCHS input and defaulting to gfx942;gfx950. Strip upload/packaging logic out of the CMake helper so normal builds only download/use prebuilts. --- .github/workflows/aiter-prebuilt-upload.yml | 92 +++++++++++++++++++ ci/aiter_upload.sh | 81 ++++++++++++++++ .../common/ck_fused_attn/aiter_prebuilt.cmake | 43 --------- 3 files changed, 173 insertions(+), 43 deletions(-) create mode 100644 .github/workflows/aiter-prebuilt-upload.yml create mode 100755 ci/aiter_upload.sh diff --git a/.github/workflows/aiter-prebuilt-upload.yml b/.github/workflows/aiter-prebuilt-upload.yml new file mode 100644 index 000000000..467140453 --- /dev/null +++ b/.github/workflows/aiter-prebuilt-upload.yml @@ -0,0 +1,92 @@ +name: AITER Prebuilt Upload + +on: + workflow_dispatch: + inputs: + gpu_archs: + description: "GPU arch list for aiter build" + required: true + default: "gfx942;gfx950" + docker_image: + description: "Docker image" + required: false + default: "" + +jobs: + upload: + runs-on: linux-mi325-8 + steps: + - name: Checkout source + uses: actions/checkout@v4 + with: + ref: ${{ github.ref }} + submodules: recursive + fetch-depth: 0 + + - name: Host Diagnostics (upload) + run: | + echo "::group::Host Diagnostics" + echo ">>> Active Containers:" + docker ps -a + echo ">>> ROCm Installation:" + ls -d /opt/rocm* || echo "No /opt/rocm found" + echo ">>> GPU info:" + ls -l /dev/dri + ls -l /dev/kfd + rocm-smi || true + echo "::endgroup::" + + - name: Resolve docker image + id: cfg + run: | + IMAGE="${{ inputs.docker_image }}" + if [ -z "$IMAGE" ]; then + IMAGE="${{ vars.DEV_DOCKER_IMAGE }}" + fi + if [ -z "$IMAGE" ]; then + echo "No docker image provided and vars.DEV_DOCKER_IMAGE is empty." >&2 + exit 1 + fi + echo "image=${IMAGE}" >> $GITHUB_OUTPUT + + - name: Pull docker image + run: docker pull ${{ steps.cfg.outputs.image }} + + - name: Run container + run: | + docker run -dt \ + --name te-aiter-upload \ + --network=host \ + --device=/dev/dri --device=/dev/kfd \ + --shm-size=16G \ + --pid=host \ + --group-add $(getent group render | cut -d: -f3) \ + --group-add $(getent group video | cut -d: -f3) \ + -v "${{ github.workspace }}:/workspace" \ + -w /workspace \ + ${{ steps.cfg.outputs.image }} + + - name: Build and upload aiter prebuilt + env: + NVTE_AITER_PREBUILT_BASE_URL: https://compute-artifactory.amd.com:5000/artifactory/rocm-generic-local/te-ci/aiter-prebuilts + NVTE_AITER_PREBUILT_UPLOAD_TOKEN: ${{ secrets.AITER_ARTIFACTORY_TOKEN }} + run: | + if [ -z "${NVTE_AITER_PREBUILT_UPLOAD_TOKEN}" ]; then + echo "Missing secrets.AITER_ARTIFACTORY_TOKEN" >&2 + exit 1 + fi + + docker exec \ + -e NVTE_AITER_PREBUILT_BASE_URL=${NVTE_AITER_PREBUILT_BASE_URL} \ + -e NVTE_AITER_PREBUILT_UPLOAD_TOKEN=${NVTE_AITER_PREBUILT_UPLOAD_TOKEN} \ + -e GPU_ARCHS_INPUT="${{ inputs.gpu_archs }}" \ + te-aiter-upload bash -c 'set -ex + export HIP_PATH="" + export GPU_ARCHS="$GPU_ARCHS_INPUT" + ci/aiter_upload.sh --build + ' + + - name: Cleanup container + if: always() + run: docker rm -f te-aiter-upload || true + diff --git a/ci/aiter_upload.sh b/ci/aiter_upload.sh new file mode 100755 index 000000000..92b5ec281 --- /dev/null +++ b/ci/aiter_upload.sh @@ -0,0 +1,81 @@ +#!/usr/bin/env bash +set -euo pipefail + +# Inputs for upload (optional): +# NVTE_AITER_PREBUILT_BASE_URL - base URL for prebuilts +# NVTE_AITER_PREBUILT_UPLOAD_TOKEN - bearer token for Artifactory +# Optional flag: +# --build : build aiter libs before packaging/uploading; default is package-only. + +# Derive ROCm version and aiter commit -> cache key +ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)" +ROCM_PATH="${ROCM_PATH:-/opt/rocm}" +ROCM_VER="$(head -n1 "${ROCM_PATH}/.info/version" | sed -n 's/^\([0-9]\+\.[0-9]\+\).*/\1/p')" + +AITER_DIR="${ROOT_DIR}/3rdparty/aiter" +git -C "${AITER_DIR}" config --global --add safe.directory "${AITER_DIR}" >/dev/null +AITER_SHA="$(git -C "${AITER_DIR}" rev-parse HEAD)" + +KEY="rocm-${ROCM_VER}_aiter-${AITER_SHA}" +CACHE_ROOT="${ROOT_DIR}/build/aiter-prebuilts" +EXTRACT_DIR="${CACHE_ROOT}/${KEY}" +OUTPUT_TGZ="/tmp/${KEY}.tar.gz" + +HAS_UPLOAD=0 +if [[ -n "${NVTE_AITER_PREBUILT_BASE_URL:-}" && -n "${NVTE_AITER_PREBUILT_UPLOAD_TOKEN:-}" ]]; then + HAS_UPLOAD=1 +fi + +# Skip early when remote prebuilt already exists +REMOTE_URL="" +if [[ ${HAS_UPLOAD} -eq 1 ]]; then + REMOTE_URL="${NVTE_AITER_PREBUILT_BASE_URL}/${KEY}.tar.gz" + if curl -sIf "${REMOTE_URL}" >/dev/null; then + echo "[aiter-upload] Remote prebuilt already present at ${REMOTE_URL}; nothing to do." + exit 0 + fi +fi + +# Optional build stage (uses GPU_ARCHS if set, else gfx942;gfx950) +if [[ "${1:-}" == "--build" ]]; then + shift + ARCHS="${GPU_ARCHS:-gfx942;gfx950}" + echo "[AITER-PREBUILT] Building aiter libs for ${ARCHS} ..." + rm -rf "${AITER_DIR}/aiter/jit/build" + AITER_LOG_MORE=1 \ + GPU_ARCHS="${ARCHS}" \ + python3 "${ROOT_DIR}/3rdparty/aiter/op_tests/cpp/mha/compile.py" + mkdir -p "${EXTRACT_DIR}" + cp "${ROOT_DIR}/3rdparty/aiter/op_tests/cpp/mha/libmha_fwd.so" "${EXTRACT_DIR}/" + cp "${ROOT_DIR}/3rdparty/aiter/op_tests/cpp/mha/libmha_bwd.so" "${EXTRACT_DIR}/" +fi + +# Ensure built libs exist +if [[ ! -f "${EXTRACT_DIR}/libmha_fwd.so" ]]; then + echo "[AITER-PREBUILT] Missing libmha_fwd.so in ${EXTRACT_DIR}" >&2 + exit 1 +fi +if [[ ! -f "${EXTRACT_DIR}/libmha_bwd.so" ]]; then + echo "[AITER-PREBUILT] Missing libmha_bwd.so in ${EXTRACT_DIR}" >&2 + exit 1 +fi + +echo "[AITER-PREBUILT] Packaging ${EXTRACT_DIR} -> ${OUTPUT_TGZ}" +tar -C "${CACHE_ROOT}" -czf "${OUTPUT_TGZ}" "${KEY}" + +if [[ ${HAS_UPLOAD} -eq 1 ]]; then + echo "[AITER-PREBUILT] Uploading..." + COLUMNS=50 curl --progress-bar --fail -X PUT \ + -H "Authorization: Bearer ${NVTE_AITER_PREBUILT_UPLOAD_TOKEN}" \ + -T "${OUTPUT_TGZ}" \ + "${REMOTE_URL}" \ + -o /dev/null + echo "[AITER-PREBUILT] Uploaded tgz to ${REMOTE_URL}" +fi + +echo "[AITER-PREBUILT] Artifacts:" +echo " tgz: ${OUTPUT_TGZ}" +if [[ ${HAS_UPLOAD} -eq 0 ]]; then + echo "[AITER-PREBUILT] To upload, set NVTE_AITER_PREBUILT_BASE_URL and NVTE_AITER_PREBUILT_UPLOAD_TOKEN." +fi + diff --git a/transformer_engine/common/ck_fused_attn/aiter_prebuilt.cmake b/transformer_engine/common/ck_fused_attn/aiter_prebuilt.cmake index a4bc9a6be..275e30705 100644 --- a/transformer_engine/common/ck_fused_attn/aiter_prebuilt.cmake +++ b/transformer_engine/common/ck_fused_attn/aiter_prebuilt.cmake @@ -79,46 +79,3 @@ function(download_aiter_prebuilt DOWNLOAD_SUCCESS) message(STATUS "[AITER-PREBUILT] Successfully downloaded.") set(${DOWNLOAD_SUCCESS} TRUE PARENT_SCOPE) endfunction() - -# Create prebuilt tgz file to upload -function(create_upload_files) - # Locate .so files - if (NOT EXISTS "${EXTRACT_DIR}/libmha_fwd.so") - message(FATAL_ERROR "[AITER-PREBUILT] Missing libmha_fwd.so") - endif() - if (NOT EXISTS "${EXTRACT_DIR}/libmha_bwd.so") - message(FATAL_ERROR "[AITER-PREBUILT] Missing libmha_bwd.so") - endif() - - # Output paths - set(OUTPUT_TGZ "/tmp/${KEY}.tar.gz") - set(OUTPUT_SHA "/tmp/${KEY}.tar.gz.sha256") - - message(STATUS "[AITER-PREBUILT] Creating prebuilt files...") - # Create archive - file(ARCHIVE_CREATE - OUTPUT "${OUTPUT_TGZ}" - PATHS "${KEY}" - WORKING_DIRECTORY "${CACHE_ROOT}" - FORMAT "gnutar" - COMPRESSION "GZip") - - # Compute SHA256 - file(SHA256 "${OUTPUT_TGZ}" ARCHIVE_HASH) - file(WRITE "${OUTPUT_SHA}" "${ARCHIVE_HASH}") - message(STATUS "[AITER-PREBUILT] tgz and sha256 files generated successfully:") - message(STATUS " ${OUTPUT_TGZ}") - message(STATUS " ${OUTPUT_SHA}") -endfunction() - -# ------------------------------------------------------ -# Script-mode entry point (to create upload files) -# Usage: cmake -DACTION=upload -P /path/to/aiter_prebuilt.cmake -# ------------------------------------------------------ -if (CMAKE_SCRIPT_MODE_FILE) - if (DEFINED ACTION AND ACTION STREQUAL "upload") - create_upload_files() - else() - message(FATAL_ERROR "[AITER-PREBUILT] Invalid ACTION=${ACTION}. Use upload.") - endif() -endif() \ No newline at end of file From 3fa47e366d3920131d7bd477058d41dfe433a447 Mon Sep 17 00:00:00 2001 From: Veera Rajasekhar Reddy Gopu Date: Sun, 18 Jan 2026 03:58:50 +0000 Subject: [PATCH 2/5] Addressed reviews Move aiter upload helper to .github/scripts, add copyright header, and use a temp gitconfig for safe.directory/commit lookup set CK_TILE_FLOAT_TO_BFLOAT16_DEFAULT, added functionality to verify remote SHA after upload Trim workflow diagnostics/cleanup, use --rm container, pass GPU_ARCHS input directly --- {ci => .github/scripts}/aiter_upload.sh | 31 ++++++++++++++++-- .github/workflows/aiter-prebuilt-upload.yml | 36 ++++++--------------- 2 files changed, 38 insertions(+), 29 deletions(-) rename {ci => .github/scripts}/aiter_upload.sh (67%) diff --git a/ci/aiter_upload.sh b/.github/scripts/aiter_upload.sh similarity index 67% rename from ci/aiter_upload.sh rename to .github/scripts/aiter_upload.sh index 92b5ec281..dd66449b2 100755 --- a/ci/aiter_upload.sh +++ b/.github/scripts/aiter_upload.sh @@ -1,4 +1,7 @@ #!/usr/bin/env bash +# Copyright (c) 2026, Advanced Micro Devices, Inc. All rights reserved. +# +# See LICENSE for license information. set -euo pipefail # Inputs for upload (optional): @@ -8,18 +11,21 @@ set -euo pipefail # --build : build aiter libs before packaging/uploading; default is package-only. # Derive ROCm version and aiter commit -> cache key -ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)" +ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)" ROCM_PATH="${ROCM_PATH:-/opt/rocm}" ROCM_VER="$(head -n1 "${ROCM_PATH}/.info/version" | sed -n 's/^\([0-9]\+\.[0-9]\+\).*/\1/p')" AITER_DIR="${ROOT_DIR}/3rdparty/aiter" -git -C "${AITER_DIR}" config --global --add safe.directory "${AITER_DIR}" >/dev/null -AITER_SHA="$(git -C "${AITER_DIR}" rev-parse HEAD)" +GIT_CONFIG_GLOBAL="$(mktemp /tmp/gitconfig.XXXXXX)" +trap 'rm -f "${GIT_CONFIG_GLOBAL}"' EXIT +git config --global --add safe.directory "${AITER_DIR}" --file "${GIT_CONFIG_GLOBAL}" >/dev/null 2>&1 || true +AITER_SHA="$(GIT_CONFIG_GLOBAL=${GIT_CONFIG_GLOBAL} git -C "${AITER_DIR}" rev-parse HEAD)" KEY="rocm-${ROCM_VER}_aiter-${AITER_SHA}" CACHE_ROOT="${ROOT_DIR}/build/aiter-prebuilts" EXTRACT_DIR="${CACHE_ROOT}/${KEY}" OUTPUT_TGZ="/tmp/${KEY}.tar.gz" +OUTPUT_SHA="/tmp/${KEY}.tar.gz.sha256" HAS_UPLOAD=0 if [[ -n "${NVTE_AITER_PREBUILT_BASE_URL:-}" && -n "${NVTE_AITER_PREBUILT_UPLOAD_TOKEN:-}" ]]; then @@ -43,6 +49,7 @@ if [[ "${1:-}" == "--build" ]]; then echo "[AITER-PREBUILT] Building aiter libs for ${ARCHS} ..." rm -rf "${AITER_DIR}/aiter/jit/build" AITER_LOG_MORE=1 \ + CK_TILE_FLOAT_TO_BFLOAT16_DEFAULT=3 \ GPU_ARCHS="${ARCHS}" \ python3 "${ROOT_DIR}/3rdparty/aiter/op_tests/cpp/mha/compile.py" mkdir -p "${EXTRACT_DIR}" @@ -62,6 +69,7 @@ fi echo "[AITER-PREBUILT] Packaging ${EXTRACT_DIR} -> ${OUTPUT_TGZ}" tar -C "${CACHE_ROOT}" -czf "${OUTPUT_TGZ}" "${KEY}" +sha256sum "${OUTPUT_TGZ}" | awk '{print $1}' > "${OUTPUT_SHA}" if [[ ${HAS_UPLOAD} -eq 1 ]]; then echo "[AITER-PREBUILT] Uploading..." @@ -71,10 +79,27 @@ if [[ ${HAS_UPLOAD} -eq 1 ]]; then "${REMOTE_URL}" \ -o /dev/null echo "[AITER-PREBUILT] Uploaded tgz to ${REMOTE_URL}" + + # Verify remote SHA256 matches local + REMOTE_SHA_TMP="$(mktemp /tmp/aiter_remote_sha.XXXXXX)" + trap 'rm -f "${REMOTE_SHA_TMP}"' EXIT + if curl -fsSL "${REMOTE_URL}.sha256" -o "${REMOTE_SHA_TMP}"; then + REMOTE_SHA_VAL="$(awk '{print $1}' "${REMOTE_SHA_TMP}")" + LOCAL_SHA_VAL="$(cat "${OUTPUT_SHA}")" + if [[ "${REMOTE_SHA_VAL}" != "${LOCAL_SHA_VAL}" ]]; then + echo "[AITER-PREBUILT] Remote SHA256 mismatch!" + exit 1 + else + echo "[AITER-PREBUILT] Remote SHA256 verified." + fi + else + echo "[AITER-PREBUILT] Warning: failed to download remote .sha256 for verification." >&2 + fi fi echo "[AITER-PREBUILT] Artifacts:" echo " tgz: ${OUTPUT_TGZ}" +echo " sha: ${OUTPUT_SHA}" if [[ ${HAS_UPLOAD} -eq 0 ]]; then echo "[AITER-PREBUILT] To upload, set NVTE_AITER_PREBUILT_BASE_URL and NVTE_AITER_PREBUILT_UPLOAD_TOKEN." fi diff --git a/.github/workflows/aiter-prebuilt-upload.yml b/.github/workflows/aiter-prebuilt-upload.yml index 467140453..d9d9b856c 100644 --- a/.github/workflows/aiter-prebuilt-upload.yml +++ b/.github/workflows/aiter-prebuilt-upload.yml @@ -1,3 +1,6 @@ +# Copyright (c) 2026, Advanced Micro Devices, Inc. All rights reserved. +# +# See LICENSE for license information. name: AITER Prebuilt Upload on: @@ -23,19 +26,6 @@ jobs: submodules: recursive fetch-depth: 0 - - name: Host Diagnostics (upload) - run: | - echo "::group::Host Diagnostics" - echo ">>> Active Containers:" - docker ps -a - echo ">>> ROCm Installation:" - ls -d /opt/rocm* || echo "No /opt/rocm found" - echo ">>> GPU info:" - ls -l /dev/dri - ls -l /dev/kfd - rocm-smi || true - echo "::endgroup::" - - name: Resolve docker image id: cfg run: | @@ -55,6 +45,7 @@ jobs: - name: Run container run: | docker run -dt \ + --rm \ --name te-aiter-upload \ --network=host \ --device=/dev/dri --device=/dev/kfd \ @@ -71,22 +62,15 @@ jobs: NVTE_AITER_PREBUILT_BASE_URL: https://compute-artifactory.amd.com:5000/artifactory/rocm-generic-local/te-ci/aiter-prebuilts NVTE_AITER_PREBUILT_UPLOAD_TOKEN: ${{ secrets.AITER_ARTIFACTORY_TOKEN }} run: | - if [ -z "${NVTE_AITER_PREBUILT_UPLOAD_TOKEN}" ]; then - echo "Missing secrets.AITER_ARTIFACTORY_TOKEN" >&2 - exit 1 - fi - docker exec \ -e NVTE_AITER_PREBUILT_BASE_URL=${NVTE_AITER_PREBUILT_BASE_URL} \ -e NVTE_AITER_PREBUILT_UPLOAD_TOKEN=${NVTE_AITER_PREBUILT_UPLOAD_TOKEN} \ - -e GPU_ARCHS_INPUT="${{ inputs.gpu_archs }}" \ + -e GPU_ARCHS="${{ inputs.gpu_archs }}" \ te-aiter-upload bash -c 'set -ex + if [ -z "${NVTE_AITER_PREBUILT_UPLOAD_TOKEN}" ]; then + echo "Missing secrets.AITER_ARTIFACTORY_TOKEN" >&2 + exit 1 + fi export HIP_PATH="" - export GPU_ARCHS="$GPU_ARCHS_INPUT" - ci/aiter_upload.sh --build + .github/scripts/aiter_upload.sh --build ' - - - name: Cleanup container - if: always() - run: docker rm -f te-aiter-upload || true - From 0e380589f763802db3b900c3ad4eebbf2c3f27f4 Mon Sep 17 00:00:00 2001 From: Veera Rajasekhar Reddy Gopu Date: Thu, 22 Jan 2026 18:52:16 +0000 Subject: [PATCH 3/5] Adressed comments --- ...ter_upload.sh => aiter_prebuild_upload.sh} | 17 ++++--- .github/workflows/aiter-prebuilt-upload.yml | 7 +-- .../common/ck_fused_attn/CMakeLists.txt | 12 +++-- .../common/ck_fused_attn/aiter_build.sh | 44 +++++++++++++++++++ 4 files changed, 58 insertions(+), 22 deletions(-) rename .github/scripts/{aiter_upload.sh => aiter_prebuild_upload.sh} (88%) create mode 100644 transformer_engine/common/ck_fused_attn/aiter_build.sh diff --git a/.github/scripts/aiter_upload.sh b/.github/scripts/aiter_prebuild_upload.sh similarity index 88% rename from .github/scripts/aiter_upload.sh rename to .github/scripts/aiter_prebuild_upload.sh index dd66449b2..ca5d14cb9 100755 --- a/.github/scripts/aiter_upload.sh +++ b/.github/scripts/aiter_prebuild_upload.sh @@ -37,21 +37,20 @@ REMOTE_URL="" if [[ ${HAS_UPLOAD} -eq 1 ]]; then REMOTE_URL="${NVTE_AITER_PREBUILT_BASE_URL}/${KEY}.tar.gz" if curl -sIf "${REMOTE_URL}" >/dev/null; then - echo "[aiter-upload] Remote prebuilt already present at ${REMOTE_URL}; nothing to do." + echo "[aiter-upload] Remote prebuilt already exists at ${REMOTE_URL}; nothing to do." exit 0 fi fi -# Optional build stage (uses GPU_ARCHS if set, else gfx942;gfx950) +# Optional build stage if [[ "${1:-}" == "--build" ]]; then shift - ARCHS="${GPU_ARCHS:-gfx942;gfx950}" - echo "[AITER-PREBUILT] Building aiter libs for ${ARCHS} ..." - rm -rf "${AITER_DIR}/aiter/jit/build" - AITER_LOG_MORE=1 \ - CK_TILE_FLOAT_TO_BFLOAT16_DEFAULT=3 \ - GPU_ARCHS="${ARCHS}" \ - python3 "${ROOT_DIR}/3rdparty/aiter/op_tests/cpp/mha/compile.py" + GPU_ARCHS="gfx942;gfx950" + echo "[AITER-PREBUILT] Building aiter libs for ${GPU_ARCHS} ..." + bash "${ROOT_DIR}/transformer_engine/common/ck_fused_attn/aiter_build.sh" \ + --aiter-dir "${ROOT_DIR}/3rdparty/aiter" \ + --aiter-test-dir "${ROOT_DIR}/3rdparty/aiter/op_tests/cpp/mha" \ + --gpu-archs "${GPU_ARCHS}" mkdir -p "${EXTRACT_DIR}" cp "${ROOT_DIR}/3rdparty/aiter/op_tests/cpp/mha/libmha_fwd.so" "${EXTRACT_DIR}/" cp "${ROOT_DIR}/3rdparty/aiter/op_tests/cpp/mha/libmha_bwd.so" "${EXTRACT_DIR}/" diff --git a/.github/workflows/aiter-prebuilt-upload.yml b/.github/workflows/aiter-prebuilt-upload.yml index d9d9b856c..123e580dc 100644 --- a/.github/workflows/aiter-prebuilt-upload.yml +++ b/.github/workflows/aiter-prebuilt-upload.yml @@ -6,10 +6,6 @@ name: AITER Prebuilt Upload on: workflow_dispatch: inputs: - gpu_archs: - description: "GPU arch list for aiter build" - required: true - default: "gfx942;gfx950" docker_image: description: "Docker image" required: false @@ -65,12 +61,11 @@ jobs: docker exec \ -e NVTE_AITER_PREBUILT_BASE_URL=${NVTE_AITER_PREBUILT_BASE_URL} \ -e NVTE_AITER_PREBUILT_UPLOAD_TOKEN=${NVTE_AITER_PREBUILT_UPLOAD_TOKEN} \ - -e GPU_ARCHS="${{ inputs.gpu_archs }}" \ te-aiter-upload bash -c 'set -ex if [ -z "${NVTE_AITER_PREBUILT_UPLOAD_TOKEN}" ]; then echo "Missing secrets.AITER_ARTIFACTORY_TOKEN" >&2 exit 1 fi export HIP_PATH="" - .github/scripts/aiter_upload.sh --build + .github/scripts/aiter_prebuild_upload.sh --build ' diff --git a/transformer_engine/common/ck_fused_attn/CMakeLists.txt b/transformer_engine/common/ck_fused_attn/CMakeLists.txt index 02b8ef687..6418e1195 100644 --- a/transformer_engine/common/ck_fused_attn/CMakeLists.txt +++ b/transformer_engine/common/ck_fused_attn/CMakeLists.txt @@ -51,14 +51,12 @@ else() # If not downloaded, Fallback: Build from source if(NOT AITER_PREBUILT_DOWNLOAD_SUCCESS) message(STATUS " [AITER-PREBUILT] Building aiter from source.") - # delete the existing aiter/jit/build dir for a clean build - file(REMOVE_RECURSE "${__AITER_SOURCE_DIR}/aiter/jit/build") - # compile the libmha_fwd.so and libmha_bwd.so - set(ENV{AITER_LOG_MORE} 1) - # fp32 to bf16 cvt env still required for MI300X - set(ENV{CK_TILE_FLOAT_TO_BFLOAT16_DEFAULT} ${CK_FUSED_ATTN_FLOAT_TO_BFLOAT16_DEFAULT}) execute_process( - COMMAND python3 ${__AITER_TEST_DIR}/compile.py + COMMAND bash ${CMAKE_CURRENT_LIST_DIR}/aiter_build.sh + --aiter-dir ${__AITER_SOURCE_DIR} + --aiter-test-dir ${__AITER_TEST_DIR} + --gpu-archs ${V3_ASM_ARCHS_STR} + --ck-tile-bf16 ${CK_FUSED_ATTN_FLOAT_TO_BFLOAT16_DEFAULT} ) # libmha_fwd.so and libmha_bwd.so will be under 3rdparty/aiter/op_tests/cpp/mha cache_local_aiter_build(${__AITER_TEST_DIR}) diff --git a/transformer_engine/common/ck_fused_attn/aiter_build.sh b/transformer_engine/common/ck_fused_attn/aiter_build.sh new file mode 100644 index 000000000..3ccf2979c --- /dev/null +++ b/transformer_engine/common/ck_fused_attn/aiter_build.sh @@ -0,0 +1,44 @@ +#!/usr/bin/env bash +# Copyright (c) 2026, Advanced Micro Devices, Inc. All rights reserved. +# +# See LICENSE for license information. + +# Helper to build aiter libs +# Options: +# --aiter-dir Path to aiter root (required) +# --aiter-test-dir Path to aiter test dir containing compile.py (required) +# --gpu-archs GPU arches (required) +# --ck-tile-bf16 CK_TILE_FLOAT_TO_BFLOAT16_DEFAULT, default: 3 +set -euo pipefail + +AITER_DIR="" +AITER_TEST_DIR="" +GPU_ARCHS_VAL="" +CK_TILE_BF16_DEFAULT="${CK_TILE_FLOAT_TO_BFLOAT16_DEFAULT:-3}" + +while [[ $# -gt 0 ]]; do + case "$1" in + --aiter-dir) + AITER_DIR="$2"; shift 2;; + --aiter-test-dir) + AITER_TEST_DIR="$2"; shift 2;; + --gpu-archs) + GPU_ARCHS_VAL="$2"; shift 2;; + --ck-tile-bf16) + CK_TILE_BF16_DEFAULT="$2"; shift 2;; + *) + echo "Unknown option: $1" >&2; exit 1;; + esac +done + +if [[ -z "${AITER_DIR}" || -z "${AITER_TEST_DIR}" || -z "${GPU_ARCHS_VAL}" ]]; then + echo "[AITER-PREBUILT] --aiter-dir, --aiter-test-dir, and --gpu-archs are required." >&2 + exit 1 +fi + +rm -rf "${AITER_DIR}/aiter/jit/build" +AITER_LOG_MORE=1 \ +CK_TILE_FLOAT_TO_BFLOAT16_DEFAULT="${CK_TILE_BF16_DEFAULT}" \ +GPU_ARCHS="${GPU_ARCHS_VAL}" \ +python3 "${AITER_TEST_DIR}/compile.py" + From 906dd6aa9defe01ca64d95a743e96d95408cc6d1 Mon Sep 17 00:00:00 2001 From: Veera Rajasekhar Reddy Gopu Date: Fri, 23 Jan 2026 21:02:44 +0000 Subject: [PATCH 4/5] Copyright update --- transformer_engine/common/ck_fused_attn/CMakeLists.txt | 2 +- transformer_engine/common/ck_fused_attn/aiter_prebuilt.cmake | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/transformer_engine/common/ck_fused_attn/CMakeLists.txt b/transformer_engine/common/ck_fused_attn/CMakeLists.txt index 6418e1195..bc4b5d598 100644 --- a/transformer_engine/common/ck_fused_attn/CMakeLists.txt +++ b/transformer_engine/common/ck_fused_attn/CMakeLists.txt @@ -1,4 +1,4 @@ -# Copyright (c) 2024-2025, Advanced Micro Devices, Inc. All rights reserved. +# Copyright (c) 2024-2026, Advanced Micro Devices, Inc. All rights reserved. # SPDX-License-Identifier: MIT cmake_minimum_required(VERSION 3.21) diff --git a/transformer_engine/common/ck_fused_attn/aiter_prebuilt.cmake b/transformer_engine/common/ck_fused_attn/aiter_prebuilt.cmake index 275e30705..a59605e00 100644 --- a/transformer_engine/common/ck_fused_attn/aiter_prebuilt.cmake +++ b/transformer_engine/common/ck_fused_attn/aiter_prebuilt.cmake @@ -1,4 +1,4 @@ -# Copyright (c) 2025, Advanced Micro Devices, Inc. All rights reserved. +# Copyright (c) 2025-2026, Advanced Micro Devices, Inc. All rights reserved. # # See LICENSE for license information. From c53372196a44c54418a57e9041b0f9892b2b7598 Mon Sep 17 00:00:00 2001 From: Veera Rajasekhar Reddy Gopu Date: Mon, 26 Jan 2026 05:30:36 +0000 Subject: [PATCH 5/5] run in bash mode --- .github/workflows/aiter-prebuilt-upload.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/aiter-prebuilt-upload.yml b/.github/workflows/aiter-prebuilt-upload.yml index 123e580dc..acd9c58b4 100644 --- a/.github/workflows/aiter-prebuilt-upload.yml +++ b/.github/workflows/aiter-prebuilt-upload.yml @@ -67,5 +67,5 @@ jobs: exit 1 fi export HIP_PATH="" - .github/scripts/aiter_prebuild_upload.sh --build + bash .github/scripts/aiter_prebuild_upload.sh --build '