Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[CI] Extract torch build as a standalone job #1271

Merged
merged 3 commits into from
Jan 14, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
181 changes: 181 additions & 0 deletions .github/workflows/_linux_build.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,181 @@
name: Linux PyTorch XPU Build

on:
workflow_call:
inputs:
pytorch:
required: false
type: string
default: 'main'
description: Pytorch branch/commit
keep_torch_xpu_ops:
required: false
type: string
default: 'false'
description: Keep torch-xpu-ops pin. `true` means use pined commit
abi:
required: false
type: string
default: 1
description: ABI version. Default abi as 1.
python:
required: false
type: string
default: '3.10'
description: Python version
runner:
required: true
type: string
default: 'linux.idc.xpu'
description: Runner label
driver:
required: false
type: string
default: 'lts'
description: Driver lts/rolling
outputs:
whl_name:
description: The name of the wheel file
value: ${{ jobs.Torch-XPU-Build.outputs.whl_name }}
torch_commit_id:
description: The commit id of the torch build
value: ${{ jobs.Torch-XPU-Build.outputs.TORCH_COMMIT_ID }}

permissions:
issues: write

jobs:
build:
if: ${{ inputs.pytorch }} != 'nightly_wheel'
runs-on: ${{ inputs.runner }}
outputs:
TORCH_COMMIT_ID: ${{ steps.build_version.outputs.TORCH_COMMIT_ID }}
timeout-minutes: 900
env:
commit_issue: 1280
GH_TOKEN: ${{ github.token }}
NEOReadDebugKeys: ${{ inputs.driver == 'rolling' && '1' || '0' }}
DisableScratchPages: ${{ inputs.driver == 'rolling' && '1' || '0' }}
steps:
- name: Checkout torch-xpu-ops
uses: actions/checkout@v4
- name: Prepare Stock Pytorch
run: |
pwd
which conda && conda clean -ay
conda remove --all -y -n xpu_build || \
rm -rf $(dirname ${CONDA_EXE})/../envs/xpu_build
conda create -n xpu_build python=${{ inputs.python }} cmake ninja -y
source activate xpu_build
cd ../ && rm -rf pytorch
pip install requests
git clone https://github.com/pytorch/pytorch pytorch
if [ "${{ inputs.pytorch }}" != "nightly_wheel" ]; then
cd pytorch && git checkout $(echo ${{ inputs.pytorch }})
# apply PRs for stock pytorch
python ../torch-xpu-ops/.github/scripts/apply_torch_pr.py
git status && git show -s
git submodule sync && git submodule update --init --recursive
if [[ ${{ inputs.keep_torch_xpu_ops }} == 'true' ]]; then
echo "Don't replace torch-xpu-ops!"
else
rm -rf third_party/torch-xpu-ops && cp -r ../torch-xpu-ops third_party/
# Workaround for torch-xpu-ops ci test
sed -i "s/checkout --quiet \${TORCH_XPU_OPS_COMMIT}/log -n 1/g" caffe2/CMakeLists.txt
fi
fi
- name: Build Pytorch XPU
run: |
source activate xpu_build
source .github/scripts/env.sh ${{ inputs.pytorch }}
pip install mkl-static==2025.0.1 mkl-include==2025.0.1
if [[ ${{ inputs.abi }} == '0' ]]; then
export _GLIBCXX_USE_CXX11_ABI=0
else
export _GLIBCXX_USE_CXX11_ABI=1
fi
if [ "${{ inputs.pytorch }}" != "nightly_wheel" ]; then
build_url="${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}"
repo="${{ github.repository }}"
last_commit=$(gh --repo $repo issue view $commit_issue --json body -q .body | grep ${{ inputs.pytorch }} | cut -d'[' -f 2 | cut -d']' -f 1)
cd ../pytorch
current_commit=$(git rev-parse HEAD)
echo ">>>>>>>>>>>>branch: ${{ inputs.pytorch }}, last commit: ${last_commit}, current commit: ${current_commit}"

export CMAKE_PREFIX_PATH=${CMAKE_PREFIX_PATH}:${CONDA_PREFIX:-"$(dirname $(which conda))/../"}
pip install -r requirements.txt
WERROR=1 python setup.py bdist_wheel 2>&1 | tee pytorch_${current_commit}_build.log

if [ -f dist/torch*.whl ]; then
echo "Wheel build successful, update last commit in the issue https://github.com/intel/torch-xpu-ops/issues/1280"
gh --repo $repo issue view $commit_issue --json body -q .body | sed "s;${last_commit};${current_commit};" > new_body.txt
gh --repo $repo issue edit $commit_issue --body-file new_body.txt
else
echo "Wheel build failed, use last commit in the issue https://github.com/intel/torch-xpu-ops/issues/1280"
gh --repo $repo issue comment $commit_issue -b "Wheel build failed with commit [${current_commit}](https://github.com/pytorch/pytorch/tree/${current_commit}), refer ${build_url}. CC @intel/torch-xpu-ops-maintain @EikanWang @riverliuintel @fengyuan14 @xytintel @etaf @chuanqi129 @mengfei25"
git clean -df .
git checkout $last_commit
# apply PRs for stock pytorch
python ../torch-xpu-ops/.github/scripts/apply_torch_pr.py
git status && git show -s
git submodule sync && git submodule update --init --recursive
if [[ ${{ inputs.keep_torch_xpu_ops }} == 'true' ]]; then
echo "Don't replace torch-xpu-ops!"
else
rm -rf third_party/torch-xpu-ops && cp -r ../torch-xpu-ops third_party/
# Workaround for torch-xpu-ops ci test
sed -i "s/checkout --quiet \${TORCH_XPU_OPS_COMMIT}/log -n 1/g" caffe2/CMakeLists.txt
fi
WERROR=1 python setup.py bdist_wheel
fi
pip install --force-reinstall dist/*.whl
cp dist/*.whl ${{ github.workspace }}/
cp pytorch_${current_commit}_build.log ${{ github.workspace }}/
else
pip install torch torchvision torchaudio --pre --index-url https://download.pytorch.org/whl/nightly/xpu
TORCH_COMMIT_ID=$(python -c 'import torch; print(torch.version.git_version)')
cd ../pytorch
git reset --hard && git checkout ${TORCH_COMMIT_ID}
TORCH_XPU_OPS_COMMIT=$(<third_party/xpu.txt)
rm -rf third_party/torch-xpu-ops
git clone https://github.com/intel/torch-xpu-ops.git third_party/torch-xpu-ops
cd third_party/torch-xpu-ops
git checkout ${TORCH_XPU_OPS_COMMIT}
cd ../..
fi
- name: Torch Config
run: |
source activate xpu_build
source .github/scripts/env.sh ${{ inputs.pytorch }}
python -c "import torch; print(torch.__config__.show())"
python -c "import torch; print(torch.__config__.parallel_info())"
python -c "import torch; print(torch.__config__.torch.xpu.device_count())"

cd ..
python pytorch/torch/utils/collect_env.py
- name: Identify Build version
id: build_version
run: |
source .github/scripts/env.sh
cd ../pytorch
echo "TORCH_BRANCH_ID=$(git rev-parse --abbrev-ref HEAD)" |tee -a "${GITHUB_OUTPUT}" >> "${GITHUB_ENV}"
echo "TORCH_COMMIT_ID=$(git rev-parse HEAD)" |tee -a "${GITHUB_OUTPUT}" >> "${GITHUB_ENV}"
echo "DRIVER_VERSION=$(dkms status 2>&1 |grep 'intel-i915-dkms' |sed 's/.*\///;s/,.*//')" |tee -a "${GITHUB_OUTPUT}" >> "${GITHUB_ENV}"
echo "KERNEL_VERSION=$(uname -rv 2>&1)" |tee -a "${GITHUB_OUTPUT}" >> "${GITHUB_ENV}"
echo "BUNDLE_VERSION=$(dpcpp --version 2>&1 |grep 'DPC++/C++' |sed 's/.*(//;s/).*//')" |tee -a "${GITHUB_OUTPUT}" >> "${GITHUB_ENV}"
. /etc/os-release
echo "OS_PRETTY_NAME=${PRETTY_NAME}" |tee -a "${GITHUB_OUTPUT}" >> "${GITHUB_ENV}"
echo "GCC_VERSION=$(gcc -dumpversion)" |tee -a "${GITHUB_OUTPUT}" >> "${GITHUB_ENV}"
echo ${GITHUB_ENV}
- name: Upload Torch XPU Wheel
if: always()
uses: actions/upload-artifact@v4
with:
name: Torch-XPU-Wheel-${{ github.event.pull_request.number || github.sha }}-${{ inputs.abi }}
path: ${{ github.workspace }}/torch*.whl
- name: Upload Build Log
if: always()
uses: actions/upload-artifact@v4
with:
name: Torch-XPU-Build-Log-${{ github.event.pull_request.number || github.sha }}-${{ inputs.abi }}
path: ${{ github.workspace }}/pytorch_*.log
13 changes: 9 additions & 4 deletions .github/workflows/_linux_ut.yml
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ on:
permissions: read-all

jobs:
Torch-XPU-UT-Tests:
ut_test:
runs-on: ${{ inputs.runner }}
timeout-minutes: 900
env:
Expand Down Expand Up @@ -95,7 +95,13 @@ jobs:
if [ "${{ inputs.pytorch }}" != "nightly_wheel" ]; then
pip install --force-reinstall "git+${TRITON_REPO}@${TRITON_COMMIT_ID}#subdirectory=python"
fi
- name: Build Pytorch XPU
- name: Download Pytorch wheel
if: ${{ inputs.pytorch }} != 'nightly_wheel'
uses: actions/download-artifact@v4
with:
name: Torch-XPU-Wheel-${{ github.event.pull_request.number || github.sha }}-${{ inputs.abi }}
path: ${{ github.workspace }}
- name: Install Pytorch XPU
run: |
source activate xpu_op_${ZE_AFFINITY_MASK}
source .github/scripts/env.sh ${{ inputs.pytorch }}
Expand All @@ -109,8 +115,7 @@ jobs:
cd ../pytorch
export CMAKE_PREFIX_PATH=${CMAKE_PREFIX_PATH}:${CONDA_PREFIX:-"$(dirname $(which conda))/../"}
pip install -r requirements.txt
WERROR=1 python setup.py bdist_wheel
pip install --force-reinstall dist/*.whl
pip install --force-reinstall ${{ github.workspace }}/torch*.whl
git clone https://github.com/pytorch/vision && cd vision && python setup.py install && cd ..
else
pip install torch torchvision torchaudio --pre --index-url https://download.pytorch.org/whl/nightly/xpu
Expand Down
70 changes: 53 additions & 17 deletions .github/workflows/nightly_ondemand.yml
Original file line number Diff line number Diff line change
Expand Up @@ -66,36 +66,41 @@ concurrency:
cancel-in-progress: ${{ github.event_name != 'schedule' }}

jobs:
Linux-Nightly-Ondemand-UT-Tests:
if: github.event_name == 'schedule' || ${{ inputs.ut_suite }}
uses: ./.github/workflows/_linux_ut.yml
Linux-Nightly-Ondemand-Build:
if: always()
name: linux-nightly-ondemand
permissions:
issues: write
uses: ./.github/workflows/_linux_build.yml
with:
keep_torch_xpu_ops: ${{ github.event_name == 'schedule' && 'false' || inputs.keep_torch_xpu_ops }}
ut: ${{ github.event_name == 'schedule' && 'op_regression,op_regression_dev1,op_extended,op_ut' || inputs.ut }}
pytorch: ${{ github.event_name == 'schedule' && 'main' || inputs.pytorch }}
keep_torch_xpu_ops: ${{ github.event_name == 'schedule' && 'false' || inputs.keep_torch_xpu_ops }}
abi: 1
python: ${{ github.event_name == 'schedule' && '3.10' || inputs.python }}
triton: ${{ github.event_name == 'schedule' && '' || inputs.triton }}
runner: linux.idc.xpu
runner: pvc_e2e

Linux-Weekly-UT-Tests-ABI-0:
if: github.event_name == 'schedule' && github.event.schedule == '0 16 * * 5'
Linux-Nightly-Ondemand-UT-Tests:
if: github.event_name == 'schedule' || ${{ inputs.ut_suite }}
name: linux-nightly-ondemand
needs: Linux-Nightly-Ondemand-Build
uses: ./.github/workflows/_linux_ut.yml
with:
abi: 0
keep_torch_xpu_ops: ${{ github.event_name == 'schedule' && 'false' || inputs.keep_torch_xpu_ops }}
ut: op_regression,op_regression_dev1,op_extended,op_ut
pytorch: ${{ github.event_name == 'schedule' && 'main' || inputs.pytorch }}
ut: ${{ github.event_name == 'schedule' && 'op_regression,op_regression_dev1,op_extended,op_ut' || inputs.ut }}
pytorch: ${{ needs.Linux-Nightly-Ondemand-Build.outputs.torch_commit_id }}
python: ${{ github.event_name == 'schedule' && '3.10' || inputs.python }}
triton: ${{ github.event_name == 'schedule' && '' || inputs.triton }}
runner: linux.idc.xpu

Linux-Nightly-Ondemand-E2E-Tests:
runs-on: pvc_e2e
name: linux-nightly-ondemand / e2e_test
# Don't run on forked repos
if: github.repository_owner == 'intel'
needs: Linux-Nightly-Ondemand-Build
timeout-minutes: 3600
env:
pytorch: ${{ github.event_name == 'schedule' && 'main' || inputs.pytorch }}
pytorch: ${{ needs.Linux-Nightly-Ondemand-Build.outputs.torch_commit_id }}
keep_torch_xpu_ops: ${{ github.event_name == 'schedule' && 'false' || inputs.keep_torch_xpu_ops }}
ut: ${{ github.event_name == 'schedule' && 'op_regression,op_regression_dev1,op_extended,op_ut,torch_xpu' || inputs.ut }}
python: ${{ github.event_name == 'schedule' && '3.10' || inputs.python }}
Expand Down Expand Up @@ -176,15 +181,19 @@ jobs:
TRITON_REPO="https://github.com/intel/intel-xpu-backend-for-triton"
echo ${TRITON_REPO}@${TRITON_COMMIT_ID}
pip install --force-reinstall "git+${TRITON_REPO}@${TRITON_COMMIT_ID}#subdirectory=python"
- name: Build Pytorch XPU
- name: Download Pytorch wheel
if: ${{ inputs.pytorch }} != 'nightly_wheel'
uses: actions/download-artifact@v4
with:
name: Torch-XPU-Wheel-${{ github.event.pull_request.number || github.sha }}-${{ inputs.abi }}
path: ${{ github.workspace }}
- name: Install Pytorch XPU
run: |
source activate e2e_ci
source .github/scripts/env.sh
cd ../pytorch
pip install -r requirements.txt
export CMAKE_PREFIX_PATH=${CMAKE_PREFIX_PATH}:${CONDA_PREFIX:-"$(dirname $(which conda))/../"}
python setup.py bdist_wheel
pip install --force-reinstall dist/*.whl
pip install --force-reinstall ${{ github.workspace }}/torch*.whl
- name: Show GITHUB_ENV
run: |
echo "$GITHUB_ENV"
Expand Down Expand Up @@ -294,6 +303,33 @@ jobs:
name: Inductor-XPU-E2E-Data-${{ github.event.pull_request.number || github.sha }}
path: ${{ github.workspace }}/upload_files

Linux-Nightly-Ondemand-Build-ABI-0:
if: github.event_name == 'schedule' && github.event.schedule == '0 16 * * 5'
name: linux-nightly-ondemand-abi0
permissions:
issues: write
uses: ./.github/workflows/_linux_build.yml
with:
pytorch: ${{ github.event_name == 'schedule' && 'main' || inputs.pytorch }}
keep_torch_xpu_ops: ${{ github.event_name == 'schedule' && 'false' || inputs.keep_torch_xpu_ops }}
abi: 0
python: ${{ github.event_name == 'schedule' && '3.10' || inputs.python }}
runner: pvc_e2e

Linux-Weekly-UT-Tests-ABI-0:
if: github.event_name == 'schedule' && github.event.schedule == '0 16 * * 5'
name: linux-nightly-ondemand-abi0
needs: Linux-Nightly-Ondemand-Build-ABI-0
uses: ./.github/workflows/_linux_ut.yml
with:
abi: 0
keep_torch_xpu_ops: ${{ github.event_name == 'schedule' && 'false' || inputs.keep_torch_xpu_ops }}
ut: op_regression,op_regression_dev1,op_extended,op_ut
pytorch: ${{ needs.Linux-Nightly-Ondemand-Build-ABI-0.outputs.torch_commit_id }}
python: ${{ github.event_name == 'schedule' && '3.10' || inputs.python }}
triton: ${{ github.event_name == 'schedule' && '' || inputs.triton }}
runner: linux.idc.xpu

Tests-Failure-And-Report:
if: ${{ ! cancelled() }}
runs-on: [ self-hosted, Linux ]
Expand Down
Loading
Loading