E2E Nightly_OnDemand Tests #294
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: E2E Nightly_OnDemand Tests | |
on: | |
schedule: | |
# GMT+8 21:00 every day | |
- cron: '0 13 * * *' | |
workflow_dispatch: | |
inputs: | |
python: | |
required: false | |
type: string | |
default: '3.10' | |
description: Specify python version | |
triton: | |
required: false | |
type: string | |
default: '' | |
description: Specify triton commit, use pytorch pined commit by default | |
suite: | |
required: true | |
type: string | |
default: 'huggingface' | |
description: Dynamo benchmarks test suite. huggingface,timm_models,torchbench. Delimiter is comma | |
dt: | |
required: true | |
type: string | |
default: 'float32' | |
description: Data precision of the test.float32,bfloat16,float16,amp_bf16,amp_fp16. Delimiter is comma | |
mode: | |
required: true | |
type: string | |
default: 'inference' | |
description: inference,training. Delimiter is comma | |
scenario: | |
required: true | |
type: string | |
default: 'accuracy' | |
description: accuracy,performance. Delimiter is comma | |
model: | |
required: false | |
type: string | |
default: '' | |
description: If set, will only launch this one | |
permissions: read-all | |
concurrency: | |
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref_name }}-${{ github.ref_type == 'branch' && github.sha }}-${{ github.event_name == 'workflow_dispatch' }}-${{ github.event_name == 'schedule' }}-${{ inputs.suite }}-${{ inputs.dt }}-${{ inputs.mode }}-${{ inputs.scenario }}-${{ inputs.triton }}-${{ inputs.model }} | |
cancel-in-progress: true | |
jobs: | |
Inductor-XPU-E2E-Nightly-Tests: | |
runs-on: pvc_e2e | |
# Don't run on forked repos | |
if: github.repository_owner == 'intel' | |
timeout-minutes: 900 | |
outputs: | |
TORCH_BRANCH_ID: ${{ steps.pinned.outputs.TORCH_BRANCH_ID }} | |
TORCH_COMMIT_ID: ${{ steps.pinned.outputs.TORCH_COMMIT_ID }} | |
DRIVER_VERSION: ${{ steps.pinned.outputs.DRIVER_VERSION }} | |
BUNDLE_VERSION: ${{ steps.pinned.outputs.BUNDLE_VERSION }} | |
OS_PRETTY_NAME: ${{ steps.pinned.outputs.OS_PRETTY_NAME }} | |
GCC_VERSION: ${{ steps.pinned.outputs.GCC_VERSION }} | |
TORCHBENCH_COMMIT_ID: ${{ steps.pinned.outputs.TORCHBENCH_COMMIT_ID }} | |
TORCHVISION_COMMIT_ID: ${{ steps.pinned.outputs.TORCHVISION_COMMIT_ID }} | |
TORCHAUDIO_COMMIT_ID: ${{ steps.pinned.outputs.TORCHAUDIO_COMMIT_ID }} | |
# TORCHTEXT_COMMIT_ID: ${{ steps.pinned.outputs.TORCHTEXT_COMMIT_ID }} | |
TRANSFORMERS_VERSION: ${{ steps.pinned.outputs.TRANSFORMERS_VERSION }} | |
TIMM_COMMIT_ID: ${{ steps.pinned.outputs.TIMM_COMMIT_ID }} | |
TRITON_COMMIT_ID: ${{ steps.pinned.outputs.TRITON_COMMIT_ID }} | |
steps: | |
- name: Checkout torch-xpu-ops | |
uses: actions/checkout@v4 | |
- name: Prepare Conda ENV | |
run: | | |
which conda && conda clean -ay | |
conda remove --all -y -n e2e_ci || rm -rf $(dirname ${CONDA_EXE})/../envs/e2e_ci | |
conda create -n e2e_ci python=${{ inputs.python }} cmake ninja -y | |
source activate e2e_ci | |
conda install -c intel mkl-static mkl-include -y | |
pip install pandas scipy tqdm | |
- name: Prepare Stock Pytorch | |
run: | | |
pwd | |
cd ../ && rm -rf pytorch | |
source activate e2e_ci | |
git clone -b main https://github.com/pytorch/pytorch pytorch | |
cd pytorch | |
# apply PRs for stock pytorch | |
pip install requests | |
python ../torch-xpu-ops/.github/scripts/apply_torch_pr.py | |
git status && git show -s | |
git submodule sync && git submodule update --init --recursive | |
rm -rf third_party/torch-xpu-ops && cp -r ../torch-xpu-ops third_party/ | |
# Workaround for torch-xpu-ops ci test | |
sed -i "s/checkout --quiet \${TORCH_XPU_OPS_COMMIT}/log -n 1/g" caffe2/CMakeLists.txt | |
- name: Identify pinned versions | |
id: pinned | |
run: | | |
cd ../pytorch | |
if [ -z ${{ inputs.triton }} ]; then | |
echo "TRITON_COMMIT_ID=$(<.ci/docker/ci_commit_pins/triton-xpu.txt)" |tee -a "${GITHUB_OUTPUT}" >> "${GITHUB_ENV}" | |
else | |
echo "TRITON_COMMIT_ID=${{ inputs.triton }}" |tee -a "${GITHUB_OUTPUT}" >> "${GITHUB_ENV}" | |
fi | |
echo "TORCH_BRANCH_ID=$(git rev-parse --abbrev-ref HEAD)" |tee -a "${GITHUB_OUTPUT}" >> "${GITHUB_ENV}" | |
echo "TORCH_COMMIT_ID=$(git rev-parse HEAD)" |tee -a "${GITHUB_OUTPUT}" >> "${GITHUB_ENV}" | |
echo "TORCHBENCH_COMMIT_ID=$(<third_party/torch-xpu-ops/.github/ci_commit_pins/torchbench.txt)" |tee -a "${GITHUB_OUTPUT}" >> "${GITHUB_ENV}" | |
echo "TORCHVISION_COMMIT_ID=$(<.github/ci_commit_pins/vision.txt)" |tee -a "${GITHUB_OUTPUT}" >> "${GITHUB_ENV}" | |
echo "TORCHAUDIO_COMMIT_ID=$(<.github/ci_commit_pins/audio.txt)" |tee -a "${GITHUB_OUTPUT}" >> "${GITHUB_ENV}" | |
# echo "TORCHTEXT_COMMIT_ID=$(<.github/ci_commit_pins/text.txt)" |tee -a "${GITHUB_OUTPUT}" >> "${GITHUB_ENV}" | |
echo "TRANSFORMERS_VERSION=$(<.ci/docker/ci_commit_pins/huggingface.txt)" |tee -a "${GITHUB_OUTPUT}" >> "${GITHUB_ENV}" | |
echo "TIMM_COMMIT_ID=$(<.ci/docker/ci_commit_pins/timm.txt)" |tee -a "${GITHUB_OUTPUT}" >> "${GITHUB_ENV}" | |
echo "MODEL_ONLY_NAME=${{ inputs.model }}" |tee -a "${GITHUB_OUTPUT}" >> "${GITHUB_ENV}" | |
source /opt/intel/oneapi/compiler/latest/env/vars.sh | |
echo "DRIVER_VERSION=$(dkms status 2>&1 |grep 'intel-i915-dkms' |sed 's/.*\///;s/,.*//')" |tee -a "${GITHUB_OUTPUT}" >> "${GITHUB_ENV}" | |
echo "BUNDLE_VERSION=$(dpcpp --version 2>&1 |grep 'DPC++/C++' |sed 's/.*(//;s/).*//')" |tee -a "${GITHUB_OUTPUT}" >> "${GITHUB_ENV}" | |
. /etc/os-release | |
echo "OS_PRETTY_NAME=${PRETTY_NAME}" |tee -a "${GITHUB_OUTPUT}" >> "${GITHUB_ENV}" | |
echo "GCC_VERSION=$(gcc -dumpversion)" |tee -a "${GITHUB_OUTPUT}" >> "${GITHUB_ENV}" | |
echo ${GITHUB_ENV} | |
- name: Triton Installation | |
run: | | |
source activate e2e_ci | |
cd ../pytorch | |
TRITON_REPO="https://github.com/intel/intel-xpu-backend-for-triton" | |
echo ${TRITON_REPO}@${TRITON_COMMIT_ID} | |
pip install --force-reinstall "git+${TRITON_REPO}@${TRITON_COMMIT_ID}#subdirectory=python" | |
- name: Build Pytorch XPU | |
run: | | |
source activate e2e_ci | |
cd ../pytorch | |
pip install -r requirements.txt | |
export USE_XPU=1 | |
source /opt/intel/oneapi/compiler/latest/env/vars.sh | |
export CMAKE_PREFIX_PATH=${CONDA_PREFIX:-"$(dirname $(which conda))/../"} | |
python setup.py bdist_wheel | |
pip install --force-reinstall dist/*.whl | |
- name: Show GITHUB_ENV | |
run: | | |
echo "$GITHUB_ENV" | |
rm -rf ../pytorch/inductor_log | |
rm -rf /tmp/torchinductor_* | |
- name: Nightly Huggingface FP32 Inference Accuracy Test | |
if: ${{ !inputs.suite }} | |
uses: ./.github/actions/inductor-xpu-e2e-test | |
with: | |
suite: huggingface | |
env_prepare: true | |
dt: float32 | |
mode: inference | |
scenario: accuracy | |
hf_token: ${{ secrets.HUGGING_FACE_HUB_TOKEN }} | |
- name: Nightly Huggingface BF16 Inference Accuracy Test | |
if: ${{ !inputs.suite }} | |
uses: ./.github/actions/inductor-xpu-e2e-test | |
with: | |
suite: huggingface | |
dt: bfloat16 | |
mode: inference | |
scenario: accuracy | |
hf_token: ${{ secrets.HUGGING_FACE_HUB_TOKEN }} | |
- name: Nightly Huggingface FP16 Inference Accuracy Test | |
if: ${{ !inputs.suite }} | |
uses: ./.github/actions/inductor-xpu-e2e-test | |
with: | |
suite: huggingface | |
dt: float16 | |
mode: inference | |
scenario: accuracy | |
hf_token: ${{ secrets.HUGGING_FACE_HUB_TOKEN }} | |
- name: Nightly Huggingface FP32 Training Accuracy Test | |
if: ${{ !inputs.suite }} | |
uses: ./.github/actions/inductor-xpu-e2e-test | |
with: | |
suite: huggingface | |
dt: float32 | |
mode: training | |
scenario: accuracy | |
hf_token: ${{ secrets.HUGGING_FACE_HUB_TOKEN }} | |
- name: Nightly Huggingface BF16 Training Accuracy Test | |
if: ${{ !inputs.suite }} | |
uses: ./.github/actions/inductor-xpu-e2e-test | |
with: | |
suite: huggingface | |
dt: bfloat16 | |
mode: training | |
scenario: accuracy | |
hf_token: ${{ secrets.HUGGING_FACE_HUB_TOKEN }} | |
- name: Nightly Huggingface FP16 Training Accuracy Test | |
if: ${{ !inputs.suite }} | |
uses: ./.github/actions/inductor-xpu-e2e-test | |
with: | |
suite: huggingface | |
dt: float16 | |
mode: training | |
scenario: accuracy | |
hf_token: ${{ secrets.HUGGING_FACE_HUB_TOKEN }} | |
- name: Nightly Torchbench BF16 Training Accuracy Test | |
if: ${{ !inputs.suite }} | |
uses: ./.github/actions/inductor-xpu-e2e-test | |
with: | |
suite: torchbench | |
dt: bfloat16 | |
mode: training | |
scenario: accuracy | |
env_prepare: true | |
hf_token: ${{ secrets.HUGGING_FACE_HUB_TOKEN }} | |
- name: Nightly Timm_models FP16 Training Accuracy Test | |
if: ${{ !inputs.suite }} | |
uses: ./.github/actions/inductor-xpu-e2e-test | |
with: | |
suite: timm_models | |
dt: float16 | |
mode: training | |
scenario: accuracy | |
env_prepare: true | |
hf_token: ${{ secrets.HUGGING_FACE_HUB_TOKEN }} | |
- name: OnDemand Test (${{ inputs.suite }} ${{ inputs.dt }} ${{ inputs.mode }} ${{ inputs.scenario }}) | |
if: ${{ inputs.suite }} | |
uses: ./.github/actions/inductor-xpu-e2e-test | |
with: | |
suite: ${{ inputs.suite }} | |
env_prepare: true | |
dt: ${{ inputs.dt }} | |
mode: ${{ inputs.mode }} | |
scenario: ${{ inputs.scenario }} | |
hf_token: ${{ secrets.HUGGING_FACE_HUB_TOKEN }} | |
- name: Summarize archieve files | |
if: always() | |
run: | | |
rm -rf ${{ github.workspace }}/upload_files | |
cp -r ${{ github.workspace }}/../pytorch/inductor_log ${{ github.workspace }}/upload_files | |
failed_case=$(grep "Real failed: models: *[1-9]" ${{ github.workspace }}/upload_files/summary_accuracy.log |wc -l || true) | |
if [ ${failed_case} -ne 0 ];then | |
grep -E "Real failed: models: [1-9]|Summary for" ${{ github.workspace }}/summary_accuracy.log | |
exit 1 | |
fi | |
- name: Upload Inductor XPU E2E Data | |
if: always() | |
uses: actions/upload-artifact@v4 | |
with: | |
name: Inductor-XPU-E2E-Data-${{ github.event.pull_request.number || github.sha }} | |
path: ${{ github.workspace }}/upload_files | |
Tests-Failure-And-Report: | |
if: always() | |
runs-on: pvc_e2e | |
permissions: | |
issues: write | |
env: | |
GH_TOKEN: ${{ github.token }} | |
needs: Inductor-XPU-E2E-Nightly-Tests | |
steps: | |
- name: Report github issue for XPU OPS nightly | |
if: github.repository_owner == 'intel' | |
run: | | |
set -xe | |
# Test env | |
build_url="${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}" | |
repo="${{ github.repository }}" | |
TORCH_BRANCH_ID="${{ needs.Inductor-XPU-E2E-Nightly-Tests.outputs.TORCH_BRANCH_ID }}" | |
TORCH_COMMIT_ID="${{ needs.Inductor-XPU-E2E-Nightly-Tests.outputs.TORCH_COMMIT_ID }}" | |
DRIVER_VERSION="${{ needs.Inductor-XPU-E2E-Nightly-Tests.outputs.DRIVER_VERSION }}" | |
BUNDLE_VERSION="${{ needs.Inductor-XPU-E2E-Nightly-Tests.outputs.BUNDLE_VERSION }}" | |
OS_PRETTY_NAME="${{ needs.Inductor-XPU-E2E-Nightly-Tests.outputs.OS_PRETTY_NAME }}" | |
GCC_VERSION="${{ needs.Inductor-XPU-E2E-Nightly-Tests.outputs.GCC_VERSION }}" | |
TORCHBENCH_COMMIT_ID="${{ needs.Inductor-XPU-E2E-Nightly-Tests.outputs.TORCHBENCH_COMMIT_ID }}" | |
TORCHVISION_COMMIT_ID="${{ needs.Inductor-XPU-E2E-Nightly-Tests.outputs.TORCHVISION_COMMIT_ID }}" | |
TORCHAUDIO_COMMIT_ID="${{ needs.Inductor-XPU-E2E-Nightly-Tests.outputs.TORCHAUDIO_COMMIT_ID }}" | |
# TORCHTEXT_COMMIT_ID="${{ needs.Inductor-XPU-E2E-Nightly-Tests.outputs.TORCHTEXT_COMMIT_ID }}" | |
TRANSFORMERS_VERSION="${{ needs.Inductor-XPU-E2E-Nightly-Tests.outputs.TRANSFORMERS_VERSION }}" | |
TIMM_COMMIT_ID="${{ needs.Inductor-XPU-E2E-Nightly-Tests.outputs.TIMM_COMMIT_ID }}" | |
TRITON_COMMIT_ID="${{ needs.Inductor-XPU-E2E-Nightly-Tests.outputs.TRITON_COMMIT_ID }}" | |
# Test status | |
if [ "${{ needs.Inductor-XPU-E2E-Nightly-Tests.result }}" == "success" ];then | |
test_status=Success | |
elif [ "${{ needs.Inductor-XPU-E2E-Nightly-Tests.result }}" == "failure" ];then | |
test_status=Failure | |
cc_comment="CC ${{ secrets.NIGHTLY_EMAIL_LIST }}" | |
else | |
test_status=None | |
exit 0 | |
fi | |
# Test Type | |
if [ "${GITHUB_EVENT_NAME}" == "workflow_dispatch" ];then | |
test_type="On-demand" | |
test_issue_id=426 | |
cc_comment="CC @${GITHUB_TRIGGERING_ACTOR}" | |
else | |
test_type="Nightly" | |
test_issue_id=432 | |
fi | |
# Test report | |
echo -e "$cc_comment\n**${test_status}** $test_type Test on $(date +'%F'), See: $build_url\n" > ${{ github.workspace }}/report.txt | |
printf "Torch-xpu-ops | PyTorch | Triton\n--- | --- | ---\n${GITHUB_WORKFLOW_SHA:0:7} on ${GITHUB_REF_NAME} | " >> ${{ github.workspace }}/report.txt | |
printf "[${TORCH_COMMIT_ID:0:7}](https://github.com/pytorch/pytorch/commit/${TORCH_COMMIT_ID:0:7}) on $TORCH_BRANCH_ID | " >> ${{ github.workspace }}/report.txt | |
echo -e "[${TRITON_COMMIT_ID:0:7}](https://github.com/intel/intel-xpu-backend-for-triton/commit/${TRITON_COMMIT_ID:0:7}) \n" >> ${{ github.workspace }}/report.txt | |
printf "Transformers | Timm | Torchbench | Torchvision | Torchaudio\n--- | --- | --- | --- | ---\n" >> ${{ github.workspace }}/report.txt | |
printf "[${TRANSFORMERS_VERSION:0:7}](https://github.com/huggingface/transformers/commit/${TRANSFORMERS_VERSION:0:7}) | " >> ${{ github.workspace }}/report.txt | |
printf "[${TIMM_COMMIT_ID:0:7}](https://github.com/huggingface/pytorch-image-models/commit/${TIMM_COMMIT_ID:0:7}) | " >> ${{ github.workspace }}/report.txt | |
printf "[${TORCHBENCH_COMMIT_ID:0:7}](https://github.com/pytorch/benchmark/commit/${TORCHBENCH_COMMIT_ID:0:7}) | " >> ${{ github.workspace }}/report.txt | |
printf "[${TORCHVISION_COMMIT_ID:0:7}](https://github.com/pytorch/vision/commit/${TORCHVISION_COMMIT_ID:0:7}) | " >> ${{ github.workspace }}/report.txt | |
echo -e "[${TORCHAUDIO_COMMIT_ID:0:7}](https://github.com/pytorch/audio/commit/${TORCHAUDIO_COMMIT_ID:0:7}) \n" >> ${{ github.workspace }}/report.txt | |
printf "Device | OS | GCC | Python | Driver(DKMS) | Bundle(DPCPP)\n--- | --- | --- | --- | --- | ---\n" >> ${{ github.workspace }}/report.txt | |
echo -e "$RUNNER_NAME | $OS_PRETTY_NAME | $GCC_VERSION | ${{ inputs.python }} | $DRIVER_VERSION| $BUNDLE_VERSION \n" >> ${{ github.workspace }}/report.txt | |
if [ "${GITHUB_EVENT_NAME}" == "workflow_dispatch" ];then | |
test_scope="${{ inputs.suite }}/${{ inputs.dt }}/${{ inputs.mode }}/${{ inputs.scenario }}" | |
if [ "${{ inputs.triton }}" != "" ];then | |
test_scope+="; triton=${{ inputs.triton }}" | |
fi | |
if [ "${{ inputs.model }}" != "" ];then | |
test_scope+="; model=${{ inputs.model }}" | |
fi | |
echo -e "Inputs | $test_scope\n--- | --- \n" >> ${{ github.workspace }}/report.txt | |
fi | |
# Report | |
report_txt=$(cat ${{ github.workspace }}/report.txt) | |
gh --repo $repo issue comment $test_issue_id --body "$report_txt" |