[CI] Extract torch build as a standalone job #4483
Workflow file for this run
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: pull | |
on: | |
pull_request: | |
types: | |
- opened | |
- synchronize | |
- reopened | |
- converted_to_draft | |
- ready_for_review | |
- labeled | |
branches: | |
- main | |
- release/* | |
concurrency: | |
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.sha }} | |
cancel-in-progress: true | |
permissions: read-all | |
jobs: | |
preci-linux-build: | |
# Don't run on forked repos and draft PRs | |
if: ${{ (github.repository_owner == 'intel') && (github.event.pull_request.draft == false) }} | |
permissions: | |
issues: write | |
uses: ./.github/workflows/_linux_build.yml | |
with: | |
runner: pvc_e2e | |
preci-ut: | |
# Don't run on forked repos and draft PRs | |
if: ${{ (github.repository_owner == 'intel') && (github.event.pull_request.draft == false) }} | |
needs: preci-linux-build | |
uses: ./.github/workflows/_linux_ut.yml | |
with: | |
pytorch: ${{ needs.preci-linux-build.outputs.torch_commit_id }} | |
ut: op_regression,op_regression_dev1,op_extended,op_ut | |
runner: linux.idc.xpu | |
Inductor-XPU-E2E-CI-Tests: | |
needs: preci-linux-build | |
runs-on: pvc_e2e | |
# Don't run on forked repos and draft PRs | |
if: ${{ (github.repository_owner == 'intel') && (github.event.pull_request.draft == false) }} | |
timeout-minutes: 900 | |
steps: | |
- name: Checkout torch-xpu-ops | |
uses: actions/checkout@v4 | |
- name: Prepare Conda ENV | |
run: | | |
which conda && conda clean -ay | |
conda remove --all -y -n e2e_ci || rm -rf $(dirname ${CONDA_EXE})/../envs/e2e_ci | |
conda create -n e2e_ci python=3.10 cmake ninja -y | |
source activate e2e_ci | |
pip install mkl-static mkl-include | |
pip install pandas scipy tqdm | |
- name: Prepare Stock Pytorch | |
run: | | |
pwd | |
cd ../ && rm -rf pytorch | |
source activate e2e_ci | |
git clone https://github.com/pytorch/pytorch pytorch | |
cd pytorch && git checkout ${{ needs.preci-linux-build.outputs.torch_commit_id }} | |
# apply PRs for stock pytorch | |
pip install requests | |
# https://github.com/mengfei25/pytorch/pull/18 internal use only for subset model list | |
python ../torch-xpu-ops/.github/scripts/apply_torch_pr.py -e https://github.com/mengfei25/pytorch/pull/18 | |
git status && git show -s | |
git submodule sync && git submodule update --init --recursive | |
rm -rf third_party/torch-xpu-ops && cp -r ../torch-xpu-ops third_party/ | |
# Workaround for torch-xpu-ops ci test | |
sed -i "s/checkout --quiet \${TORCH_XPU_OPS_COMMIT}/log -n 1/g" caffe2/CMakeLists.txt | |
- name: Triton Installation | |
run: | | |
source activate e2e_ci | |
cd ../pytorch | |
TRITON_REPO="https://github.com/intel/intel-xpu-backend-for-triton" | |
TRITON_PINNED_COMMIT=$(cat .ci/docker/ci_commit_pins/triton-xpu.txt) | |
echo ${TRITON_REPO}@${TRITON_PINNED_COMMIT} | |
pip install --force-reinstall "git+${TRITON_REPO}@${TRITON_PINNED_COMMIT}#subdirectory=python" | |
- name: Download Pytorch wheel | |
if: ${{ inputs.pytorch }} != 'nightly_wheel' | |
uses: actions/download-artifact@v4 | |
with: | |
name: Torch-XPU-Wheel-${{ github.event.pull_request.number || github.sha }}-${{ inputs.abi }} | |
path: ${{ github.workspace }} | |
- name: Install Pytorch XPU | |
run: | | |
source activate e2e_ci | |
source .github/scripts/env.sh | |
cd ../pytorch | |
pip install -r requirements.txt | |
pip install --force-reinstall ${{ github.workspace }}/torch*.whl | |
- name: Identify pinned versions | |
run: | | |
cd ../pytorch | |
echo "TRITON_COMMIT_ID=$(<.ci/docker/ci_commit_pins/triton-xpu.txt)" >> "${GITHUB_ENV}" | |
echo "TORCHVISION_COMMIT_ID=$(<.github/ci_commit_pins/vision.txt)" >> "${GITHUB_ENV}" | |
echo "TORCHBENCH_COMMIT_ID=$(<.github/ci_commit_pins/torchbench.txt)" >> "${GITHUB_ENV}" | |
echo "TORCHAUDIO_COMMIT_ID=$(<.github/ci_commit_pins/audio.txt)" >> "${GITHUB_ENV}" | |
echo "TRANSFORMERS_VERSION=$(<.ci/docker/ci_commit_pins/huggingface.txt)" >> "${GITHUB_ENV}" | |
echo "TIMM_COMMIT_ID=$(<.ci/docker/ci_commit_pins/timm.txt)" >> "${GITHUB_ENV}" | |
- name: Torch Config | |
run: | | |
echo "$GITHUB_ENV" | |
rm -rf ../pytorch/inductor_log | |
rm -rf /tmp/torchinductor_* | |
cd .. | |
source activate e2e_ci | |
python -c "import triton; print(triton.__version__)" | |
python pytorch/torch/utils/collect_env.py | |
- name: Huggingface BF16 Training Accuracy Test | |
uses: ./.github/actions/inductor-xpu-e2e-test | |
with: | |
suite: huggingface | |
dt: bfloat16 | |
mode: training | |
scenario: accuracy | |
env_prepare: true | |
hf_token: ${{ secrets.HUGGING_FACE_HUB_TOKEN }} | |
- name: Huggingface FP16 Training Accuracy Test | |
uses: ./.github/actions/inductor-xpu-e2e-test | |
with: | |
suite: huggingface | |
dt: float16 | |
mode: training | |
scenario: accuracy | |
hf_token: ${{ secrets.HUGGING_FACE_HUB_TOKEN }} | |
- name: Timm_models BF16 Training Accuracy Test | |
uses: ./.github/actions/inductor-xpu-e2e-test | |
with: | |
suite: timm_models | |
dt: bfloat16 | |
mode: training | |
scenario: accuracy | |
env_prepare: true | |
hf_token: ${{ secrets.HUGGING_FACE_HUB_TOKEN }} | |
- name: Torchbench BF16 Training Accuracy Test | |
uses: ./.github/actions/inductor-xpu-e2e-test | |
with: | |
suite: torchbench | |
dt: bfloat16 | |
mode: training | |
scenario: accuracy | |
env_prepare: true | |
hf_token: ${{ secrets.HUGGING_FACE_HUB_TOKEN }} | |
- name: Summarize archieve files | |
if: ${{ ! cancelled() }} | |
run: | | |
rm -rf ${{ github.workspace }}/upload_files | |
cp -r ${{ github.workspace }}/../pytorch/inductor_log ${{ github.workspace }}/upload_files | |
failed_case=$(grep "Real failed models: *[1-9]" ${{ github.workspace }}/upload_files/summary_accuracy.log |wc -l || true) | |
if [ ${failed_case} -ne 0 ];then | |
grep -E "Real failed models: [1-9]|Summary for" ${{ github.workspace }}/upload_files/summary_accuracy.log | |
exit 1 | |
fi | |
- name: Upload Inductor XPU E2E Data | |
if: ${{ ! cancelled() }} | |
uses: actions/upload-artifact@v4 | |
with: | |
name: Inductor-XPU-E2E-Data-${{ github.event.pull_request.number || github.sha }} | |
path: ${{ github.workspace }}/upload_files | |
preci-linux-build-abi-0: | |
# Don't run on forked repos and draft PRs | |
if: ${{ (github.repository_owner == 'intel') && (github.event.pull_request.draft == false) }} | |
permissions: | |
issues: write | |
uses: ./.github/workflows/_linux_build.yml | |
with: | |
abi: 0 | |
runner: pvc_e2e | |
preci-ut-abi-0: | |
# Don't run on forked repos and draft PRs | |
if: ${{ (github.repository_owner == 'intel') && (github.event.pull_request.draft == false) }} | |
needs: preci-linux-build-abi-0 | |
uses: ./.github/workflows/_linux_ut.yml | |
with: | |
abi: 0 | |
pytorch: ${{ needs.preci-linux-build-abi-0.outputs.torch_commit_id }} | |
ut: op_extended | |
runner: linux.idc.xpu | |
preci-windows: | |
# Don't run on forked repos and draft PRs | |
if: ${{ (github.repository_owner == 'intel') && (github.event.pull_request.draft == false) && contains(github.event.pull_request.labels.*.name, 'windows_ci') }} | |
uses: ./.github/workflows/_windows_ut.yml | |
with: | |
ut: op_extended | |
runner: Windows_CI |