E2E Nightly_OnDemand Tests #294

Workflow file for this run

.github/workflows/inductor_xpu_e2e_nightly.yml at b84b21f

	name: E2E Nightly_OnDemand Tests

	on:
	schedule:
	# GMT+8 21:00 every day
	- cron: '0 13 * * *'
	workflow_dispatch:
	inputs:
	python:
	required: false
	type: string
	default: '3.10'
	description: Specify python version
	triton:
	required: false
	type: string
	default: ''
	description: Specify triton commit, use pytorch pined commit by default
	suite:
	required: true
	type: string
	default: 'huggingface'
	description: Dynamo benchmarks test suite. huggingface,timm_models,torchbench. Delimiter is comma
	dt:
	required: true
	type: string
	default: 'float32'
	description: Data precision of the test.float32,bfloat16,float16,amp_bf16,amp_fp16. Delimiter is comma
	mode:
	required: true
	type: string
	default: 'inference'
	description: inference,training. Delimiter is comma
	scenario:
	required: true
	type: string
	default: 'accuracy'
	description: accuracy,performance. Delimiter is comma
	model:
	required: false
	type: string
	default: ''
	description: If set, will only launch this one


	permissions: read-all

	concurrency:
	group: ${{ github.workflow }}-${{ github.event.pull_request.number \|\| github.ref_name }}-${{ github.ref_type == 'branch' && github.sha }}-${{ github.event_name == 'workflow_dispatch' }}-${{ github.event_name == 'schedule' }}-${{ inputs.suite }}-${{ inputs.dt }}-${{ inputs.mode }}-${{ inputs.scenario }}-${{ inputs.triton }}-${{ inputs.model }}
	cancel-in-progress: true

	jobs:
	Inductor-XPU-E2E-Nightly-Tests:
	runs-on: pvc_e2e
	# Don't run on forked repos
	if: github.repository_owner == 'intel'
	timeout-minutes: 900
	outputs:
	TORCH_BRANCH_ID: ${{ steps.pinned.outputs.TORCH_BRANCH_ID }}
	TORCH_COMMIT_ID: ${{ steps.pinned.outputs.TORCH_COMMIT_ID }}
	DRIVER_VERSION: ${{ steps.pinned.outputs.DRIVER_VERSION }}
	BUNDLE_VERSION: ${{ steps.pinned.outputs.BUNDLE_VERSION }}
	OS_PRETTY_NAME: ${{ steps.pinned.outputs.OS_PRETTY_NAME }}
	GCC_VERSION: ${{ steps.pinned.outputs.GCC_VERSION }}
	TORCHBENCH_COMMIT_ID: ${{ steps.pinned.outputs.TORCHBENCH_COMMIT_ID }}
	TORCHVISION_COMMIT_ID: ${{ steps.pinned.outputs.TORCHVISION_COMMIT_ID }}
	TORCHAUDIO_COMMIT_ID: ${{ steps.pinned.outputs.TORCHAUDIO_COMMIT_ID }}
	# TORCHTEXT_COMMIT_ID: ${{ steps.pinned.outputs.TORCHTEXT_COMMIT_ID }}
	TRANSFORMERS_VERSION: ${{ steps.pinned.outputs.TRANSFORMERS_VERSION }}
	TIMM_COMMIT_ID: ${{ steps.pinned.outputs.TIMM_COMMIT_ID }}
	TRITON_COMMIT_ID: ${{ steps.pinned.outputs.TRITON_COMMIT_ID }}
	steps:
	- name: Checkout torch-xpu-ops
	uses: actions/checkout@v4
	- name: Prepare Conda ENV
	run: \|
	which conda && conda clean -ay
	conda remove --all -y -n e2e_ci \|\| rm -rf $(dirname ${CONDA_EXE})/../envs/e2e_ci
	conda create -n e2e_ci python=${{ inputs.python }} cmake ninja -y
	source activate e2e_ci
	conda install -c intel mkl-static mkl-include -y
	pip install pandas scipy tqdm
	- name: Prepare Stock Pytorch
	run: \|
	pwd
	cd ../ && rm -rf pytorch
	source activate e2e_ci
	git clone -b main https://github.com/pytorch/pytorch pytorch
	cd pytorch
	# apply PRs for stock pytorch
	pip install requests
	python ../torch-xpu-ops/.github/scripts/apply_torch_pr.py
	git status && git show -s
	git submodule sync && git submodule update --init --recursive
	rm -rf third_party/torch-xpu-ops && cp -r ../torch-xpu-ops third_party/
	# Workaround for torch-xpu-ops ci test
	sed -i "s/checkout --quiet \${TORCH_XPU_OPS_COMMIT}/log -n 1/g" caffe2/CMakeLists.txt
	- name: Identify pinned versions
	id: pinned
	run: \|
	cd ../pytorch
	if [ -z ${{ inputs.triton }} ]; then
	echo "TRITON_COMMIT_ID=$(<.ci/docker/ci_commit_pins/triton-xpu.txt)" \|tee -a "${GITHUB_OUTPUT}" >> "${GITHUB_ENV}"
	else
	echo "TRITON_COMMIT_ID=${{ inputs.triton }}" \|tee -a "${GITHUB_OUTPUT}" >> "${GITHUB_ENV}"
	fi
	echo "TORCH_BRANCH_ID=$(git rev-parse --abbrev-ref HEAD)" \|tee -a "${GITHUB_OUTPUT}" >> "${GITHUB_ENV}"
	echo "TORCH_COMMIT_ID=$(git rev-parse HEAD)" \|tee -a "${GITHUB_OUTPUT}" >> "${GITHUB_ENV}"
	echo "TORCHBENCH_COMMIT_ID=$(<third_party/torch-xpu-ops/.github/ci_commit_pins/torchbench.txt)" \|tee -a "${GITHUB_OUTPUT}" >> "${GITHUB_ENV}"
	echo "TORCHVISION_COMMIT_ID=$(<.github/ci_commit_pins/vision.txt)" \|tee -a "${GITHUB_OUTPUT}" >> "${GITHUB_ENV}"
	echo "TORCHAUDIO_COMMIT_ID=$(<.github/ci_commit_pins/audio.txt)" \|tee -a "${GITHUB_OUTPUT}" >> "${GITHUB_ENV}"
	# echo "TORCHTEXT_COMMIT_ID=$(<.github/ci_commit_pins/text.txt)" \|tee -a "${GITHUB_OUTPUT}" >> "${GITHUB_ENV}"
	echo "TRANSFORMERS_VERSION=$(<.ci/docker/ci_commit_pins/huggingface.txt)" \|tee -a "${GITHUB_OUTPUT}" >> "${GITHUB_ENV}"
	echo "TIMM_COMMIT_ID=$(<.ci/docker/ci_commit_pins/timm.txt)" \|tee -a "${GITHUB_OUTPUT}" >> "${GITHUB_ENV}"
	echo "MODEL_ONLY_NAME=${{ inputs.model }}" \|tee -a "${GITHUB_OUTPUT}" >> "${GITHUB_ENV}"
	source /opt/intel/oneapi/compiler/latest/env/vars.sh
	echo "DRIVER_VERSION=$(dkms status 2>&1 \|grep 'intel-i915-dkms' \|sed 's/.\///;s/,.//')" \|tee -a "${GITHUB_OUTPUT}" >> "${GITHUB_ENV}"
	echo "BUNDLE_VERSION=$(dpcpp --version 2>&1 \|grep 'DPC++/C++' \|sed 's/.(//;s/).//')" \|tee -a "${GITHUB_OUTPUT}" >> "${GITHUB_ENV}"
	. /etc/os-release
	echo "OS_PRETTY_NAME=${PRETTY_NAME}" \|tee -a "${GITHUB_OUTPUT}" >> "${GITHUB_ENV}"
	echo "GCC_VERSION=$(gcc -dumpversion)" \|tee -a "${GITHUB_OUTPUT}" >> "${GITHUB_ENV}"
	echo ${GITHUB_ENV}
	- name: Triton Installation
	run: \|
	source activate e2e_ci
	cd ../pytorch
	TRITON_REPO="https://github.com/intel/intel-xpu-backend-for-triton"
	echo ${TRITON_REPO}@${TRITON_COMMIT_ID}
	pip install --force-reinstall "git+${TRITON_REPO}@${TRITON_COMMIT_ID}#subdirectory=python"
	- name: Build Pytorch XPU
	run: \|
	source activate e2e_ci
	cd ../pytorch
	pip install -r requirements.txt
	export USE_XPU=1
	source /opt/intel/oneapi/compiler/latest/env/vars.sh
	export CMAKE_PREFIX_PATH=${CONDA_PREFIX:-"$(dirname $(which conda))/../"}
	python setup.py bdist_wheel
	pip install --force-reinstall dist/*.whl
	- name: Show GITHUB_ENV
	run: \|
	echo "$GITHUB_ENV"
	rm -rf ../pytorch/inductor_log
	rm -rf /tmp/torchinductor_*
	- name: Nightly Huggingface FP32 Inference Accuracy Test
	if: ${{ !inputs.suite }}
	uses: ./.github/actions/inductor-xpu-e2e-test
	with:
	suite: huggingface
	env_prepare: true
	dt: float32
	mode: inference
	scenario: accuracy
	hf_token: ${{ secrets.HUGGING_FACE_HUB_TOKEN }}
	- name: Nightly Huggingface BF16 Inference Accuracy Test
	if: ${{ !inputs.suite }}
	uses: ./.github/actions/inductor-xpu-e2e-test
	with:
	suite: huggingface
	dt: bfloat16
	mode: inference
	scenario: accuracy
	hf_token: ${{ secrets.HUGGING_FACE_HUB_TOKEN }}
	- name: Nightly Huggingface FP16 Inference Accuracy Test
	if: ${{ !inputs.suite }}
	uses: ./.github/actions/inductor-xpu-e2e-test
	with:
	suite: huggingface
	dt: float16
	mode: inference
	scenario: accuracy
	hf_token: ${{ secrets.HUGGING_FACE_HUB_TOKEN }}
	- name: Nightly Huggingface FP32 Training Accuracy Test
	if: ${{ !inputs.suite }}
	uses: ./.github/actions/inductor-xpu-e2e-test
	with:
	suite: huggingface
	dt: float32
	mode: training
	scenario: accuracy
	hf_token: ${{ secrets.HUGGING_FACE_HUB_TOKEN }}
	- name: Nightly Huggingface BF16 Training Accuracy Test
	if: ${{ !inputs.suite }}
	uses: ./.github/actions/inductor-xpu-e2e-test
	with:
	suite: huggingface
	dt: bfloat16
	mode: training
	scenario: accuracy
	hf_token: ${{ secrets.HUGGING_FACE_HUB_TOKEN }}
	- name: Nightly Huggingface FP16 Training Accuracy Test
	if: ${{ !inputs.suite }}
	uses: ./.github/actions/inductor-xpu-e2e-test
	with:
	suite: huggingface
	dt: float16
	mode: training
	scenario: accuracy
	hf_token: ${{ secrets.HUGGING_FACE_HUB_TOKEN }}
	- name: Nightly Torchbench BF16 Training Accuracy Test
	if: ${{ !inputs.suite }}
	uses: ./.github/actions/inductor-xpu-e2e-test
	with:
	suite: torchbench
	dt: bfloat16
	mode: training
	scenario: accuracy
	env_prepare: true
	hf_token: ${{ secrets.HUGGING_FACE_HUB_TOKEN }}
	- name: Nightly Timm_models FP16 Training Accuracy Test
	if: ${{ !inputs.suite }}
	uses: ./.github/actions/inductor-xpu-e2e-test
	with:
	suite: timm_models
	dt: float16
	mode: training
	scenario: accuracy
	env_prepare: true
	hf_token: ${{ secrets.HUGGING_FACE_HUB_TOKEN }}
	- name: OnDemand Test (${{ inputs.suite }} ${{ inputs.dt }} ${{ inputs.mode }} ${{ inputs.scenario }})
	if: ${{ inputs.suite }}
	uses: ./.github/actions/inductor-xpu-e2e-test
	with:
	suite: ${{ inputs.suite }}
	env_prepare: true
	dt: ${{ inputs.dt }}
	mode: ${{ inputs.mode }}
	scenario: ${{ inputs.scenario }}
	hf_token: ${{ secrets.HUGGING_FACE_HUB_TOKEN }}
	- name: Summarize archieve files
	if: always()
	run: \|
	rm -rf ${{ github.workspace }}/upload_files
	cp -r ${{ github.workspace }}/../pytorch/inductor_log ${{ github.workspace }}/upload_files
	failed_case=$(grep "Real failed: models: *[1-9]" ${{ github.workspace }}/upload_files/summary_accuracy.log \|wc -l \|\| true)
	if [ ${failed_case} -ne 0 ];then
	grep -E "Real failed: models: [1-9]\|Summary for" ${{ github.workspace }}/summary_accuracy.log
	exit 1
	fi
	- name: Upload Inductor XPU E2E Data
	if: always()
	uses: actions/upload-artifact@v4
	with:
	name: Inductor-XPU-E2E-Data-${{ github.event.pull_request.number \|\| github.sha }}
	path: ${{ github.workspace }}/upload_files

	Tests-Failure-And-Report:
	if: always()
	runs-on: pvc_e2e
	permissions:
	issues: write
	env:
	GH_TOKEN: ${{ github.token }}
	needs: Inductor-XPU-E2E-Nightly-Tests
	steps:
	- name: Report github issue for XPU OPS nightly
	if: github.repository_owner == 'intel'
	run: \|
	set -xe
	# Test env
	build_url="${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}"
	repo="${{ github.repository }}"
	TORCH_BRANCH_ID="${{ needs.Inductor-XPU-E2E-Nightly-Tests.outputs.TORCH_BRANCH_ID }}"
	TORCH_COMMIT_ID="${{ needs.Inductor-XPU-E2E-Nightly-Tests.outputs.TORCH_COMMIT_ID }}"
	DRIVER_VERSION="${{ needs.Inductor-XPU-E2E-Nightly-Tests.outputs.DRIVER_VERSION }}"
	BUNDLE_VERSION="${{ needs.Inductor-XPU-E2E-Nightly-Tests.outputs.BUNDLE_VERSION }}"
	OS_PRETTY_NAME="${{ needs.Inductor-XPU-E2E-Nightly-Tests.outputs.OS_PRETTY_NAME }}"
	GCC_VERSION="${{ needs.Inductor-XPU-E2E-Nightly-Tests.outputs.GCC_VERSION }}"
	TORCHBENCH_COMMIT_ID="${{ needs.Inductor-XPU-E2E-Nightly-Tests.outputs.TORCHBENCH_COMMIT_ID }}"
	TORCHVISION_COMMIT_ID="${{ needs.Inductor-XPU-E2E-Nightly-Tests.outputs.TORCHVISION_COMMIT_ID }}"
	TORCHAUDIO_COMMIT_ID="${{ needs.Inductor-XPU-E2E-Nightly-Tests.outputs.TORCHAUDIO_COMMIT_ID }}"
	# TORCHTEXT_COMMIT_ID="${{ needs.Inductor-XPU-E2E-Nightly-Tests.outputs.TORCHTEXT_COMMIT_ID }}"
	TRANSFORMERS_VERSION="${{ needs.Inductor-XPU-E2E-Nightly-Tests.outputs.TRANSFORMERS_VERSION }}"
	TIMM_COMMIT_ID="${{ needs.Inductor-XPU-E2E-Nightly-Tests.outputs.TIMM_COMMIT_ID }}"
	TRITON_COMMIT_ID="${{ needs.Inductor-XPU-E2E-Nightly-Tests.outputs.TRITON_COMMIT_ID }}"
	# Test status
	if [ "${{ needs.Inductor-XPU-E2E-Nightly-Tests.result }}" == "success" ];then
	test_status=Success
	elif [ "${{ needs.Inductor-XPU-E2E-Nightly-Tests.result }}" == "failure" ];then
	test_status=Failure
	cc_comment="CC ${{ secrets.NIGHTLY_EMAIL_LIST }}"
	else
	test_status=None
	exit 0
	fi
	# Test Type
	if [ "${GITHUB_EVENT_NAME}" == "workflow_dispatch" ];then
	test_type="On-demand"
	test_issue_id=426
	cc_comment="CC @${GITHUB_TRIGGERING_ACTOR}"
	else
	test_type="Nightly"
	test_issue_id=432
	fi
	# Test report
	echo -e "$cc_comment\n${test_status} $test_type Test on $(date +'%F'), See: $build_url\n" > ${{ github.workspace }}/report.txt
	printf "Torch-xpu-ops \| PyTorch \| Triton\n--- \| --- \| ---\n${GITHUB_WORKFLOW_SHA:0:7} on ${GITHUB_REF_NAME} \| " >> ${{ github.workspace }}/report.txt
	printf "[${TORCH_COMMIT_ID:0:7}](https://github.com/pytorch/pytorch/commit/${TORCH_COMMIT_ID:0:7}) on $TORCH_BRANCH_ID \| " >> ${{ github.workspace }}/report.txt
	echo -e "[${TRITON_COMMIT_ID:0:7}](https://github.com/intel/intel-xpu-backend-for-triton/commit/${TRITON_COMMIT_ID:0:7}) \n" >> ${{ github.workspace }}/report.txt
	printf "Transformers \| Timm \| Torchbench \| Torchvision \| Torchaudio\n--- \| --- \| --- \| --- \| ---\n" >> ${{ github.workspace }}/report.txt
	printf "[${TRANSFORMERS_VERSION:0:7}](https://github.com/huggingface/transformers/commit/${TRANSFORMERS_VERSION:0:7}) \| " >> ${{ github.workspace }}/report.txt
	printf "[${TIMM_COMMIT_ID:0:7}](https://github.com/huggingface/pytorch-image-models/commit/${TIMM_COMMIT_ID:0:7}) \| " >> ${{ github.workspace }}/report.txt
	printf "[${TORCHBENCH_COMMIT_ID:0:7}](https://github.com/pytorch/benchmark/commit/${TORCHBENCH_COMMIT_ID:0:7}) \| " >> ${{ github.workspace }}/report.txt
	printf "[${TORCHVISION_COMMIT_ID:0:7}](https://github.com/pytorch/vision/commit/${TORCHVISION_COMMIT_ID:0:7}) \| " >> ${{ github.workspace }}/report.txt
	echo -e "[${TORCHAUDIO_COMMIT_ID:0:7}](https://github.com/pytorch/audio/commit/${TORCHAUDIO_COMMIT_ID:0:7}) \n" >> ${{ github.workspace }}/report.txt
	printf "Device \| OS \| GCC \| Python \| Driver(DKMS) \| Bundle(DPCPP)\n--- \| --- \| --- \| --- \| --- \| ---\n" >> ${{ github.workspace }}/report.txt
	echo -e "$RUNNER_NAME \| $OS_PRETTY_NAME \| $GCC_VERSION \| ${{ inputs.python }} \| $DRIVER_VERSION\| $BUNDLE_VERSION \n" >> ${{ github.workspace }}/report.txt
	if [ "${GITHUB_EVENT_NAME}" == "workflow_dispatch" ];then
	test_scope="${{ inputs.suite }}/${{ inputs.dt }}/${{ inputs.mode }}/${{ inputs.scenario }}"
	if [ "${{ inputs.triton }}" != "" ];then
	test_scope+="; triton=${{ inputs.triton }}"
	fi
	if [ "${{ inputs.model }}" != "" ];then
	test_scope+="; model=${{ inputs.model }}"
	fi
	echo -e "Inputs \| $test_scope\n--- \| --- \n" >> ${{ github.workspace }}/report.txt
	fi
	# Report
	report_txt=$(cat ${{ github.workspace }}/report.txt)
	gh --repo $repo issue comment $test_issue_id --body "$report_txt"

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

E2E Nightly_OnDemand Tests #294

Workflow file

E2E Nightly_OnDemand Tests #294

Jobs

Run details

Workflow file for this run