Torch Nightly WHL Tests #154

Workflow file for this run

.github/workflows/nightly_ondemand_whl.yml at 68cb739

	name: Torch Nightly WHL Tests

	on:
	schedule:
	# GMT+8 21:00 every workday
	- cron: '0 14 * * 0-4'
	# GMT+8 0:00 Saturday
	- cron: '0 17 * * 5'
	workflow_dispatch:
	inputs:
	pytorch:
	required: false
	type: string
	default: 'nightly'
	description: Pytorch branch/commit
	ut:
	required: false
	type: string
	default: 'torch_xpu'
	description: UT scope. `op_regression,op_regression_dev1,op_extended,op_ut,torch_xpu`. Delimiter is comma
	suite:
	required: true
	type: string
	default: 'huggingface'
	description: Dynamo benchmarks test suite. `huggingface,timm_models,torchbench`. Delimiter is comma
	dt:
	required: true
	type: string
	default: 'float32'
	description: Data precision of the test. `float32,bfloat16,float16,amp_bf16,amp_fp16`. Delimiter is comma
	mode:
	required: true
	type: string
	default: 'inference'
	description: Test mode. `inference,training`. Delimiter is comma
	scenario:
	required: true
	type: string
	default: 'accuracy'
	description: Test scenario. `accuracy,performance`. Delimiter is comma
	model:
	required: false
	type: string
	default: ''
	description: Model. Will only run this one mode if set
	python:
	required: false
	type: string
	default: '3.10'
	description: Python version

	permissions: read-all

	concurrency:
	group: ${{ github.workflow }}-${{ github.sha }}-${{ github.event_name }}-${{ inputs.pytorch }}-${{ inputs.ut }}-${{ inputs.suite }}-${{ inputs.dt }}-${{ inputs.mode }}-${{ inputs.scenario }}-${{ inputs.model }}-${{ inputs.python }}
	cancel-in-progress: true

	jobs:
	Linux-Nightly-Ondemand-UT-WHL-Tests:
	uses: ./.github/workflows/_linux_ut.yml
	with:
	ut: ${{ github.event_name == 'schedule' && 'op_regression,op_regression_dev1,op_extended,op_ut,torch_xpu' \|\| inputs.ut }}
	python: ${{ github.event_name == 'schedule' && '3.10' \|\| inputs.python }}
	pytorch: nightly_wheel
	runner: e2e_internal

	Linux-Nightly-Ondemand-E2E-WHL-Tests:
	runs-on: e2e_internal
	# Don't run on forked repos
	if: github.repository_owner == 'intel'
	timeout-minutes: 36000
	env:
	pytorch: ${{ github.event_name == 'schedule' && 'nightly' \|\| inputs.pytorch }}
	ut: ${{ github.event_name == 'schedule' && 'op_regression,op_regression_dev1,op_extended,op_ut,torch_xpu' \|\| inputs.ut }}
	python: ${{ github.event_name == 'schedule' && '3.10' \|\| inputs.python }}
	outputs:
	TORCH_BRANCH_ID: ${{ steps.installed.outputs.TORCH_BRANCH_ID }}
	TORCH_COMMIT_ID: ${{ steps.installed.outputs.TORCH_COMMIT_ID }}
	TORCH_XPU_OPS_COMMIT: ${{ steps.installed.outputs.TORCH_XPU_OPS_COMMIT }}
	TORCHBENCH_COMMIT_ID: ${{ steps.pinned.outputs.TORCHBENCH_COMMIT_ID }}
	TORCHVISION_COMMIT_ID: ${{ steps.pinned.outputs.TORCHVISION_COMMIT_ID }}
	TORCHAUDIO_COMMIT_ID: ${{ steps.pinned.outputs.TORCHAUDIO_COMMIT_ID }}
	TRANSFORMERS_VERSION: ${{ steps.pinned.outputs.TRANSFORMERS_VERSION }}
	TIMM_COMMIT_ID: ${{ steps.pinned.outputs.TIMM_COMMIT_ID }}
	TRITON_COMMIT_ID: ${{ steps.pinned.outputs.TRITON_COMMIT_ID }}
	DRIVER_VERSION: ${{ steps.pinned.outputs.DRIVER_VERSION }}
	KERNEL_VERSION: ${{ steps.pinned.outputs.KERNEL_VERSION }}
	BUNDLE_VERSION: ${{ steps.pinned.outputs.BUNDLE_VERSION }}
	OS_PRETTY_NAME: ${{ steps.pinned.outputs.OS_PRETTY_NAME }}
	GCC_VERSION: ${{ steps.pinned.outputs.GCC_VERSION }}
	TIMEOUT_MODELS: ${{ steps.summary.outputs.TIMEOUT_MODELS }}
	steps:
	- name: Checkout torch-xpu-ops
	uses: actions/checkout@v4
	- name: Prepare Conda ENV
	run: \|
	rm -rf ${HOME}/mengfeil/myvenv-e2e
	/usr/bin/python3.10 -m venv ${HOME}/mengfeil/myvenv-e2e
	source ${HOME}/mengfeil/myvenv-e2e/bin/activate
	# pip install mkl-static==2025.0.1 mkl-include==2025.0.1
	pip install pandas scipy tqdm
	- name: Prepare Stock Pytorch
	id: installed
	run: \|
	pwd
	source ${HOME}/mengfeil/myvenv-e2e/bin/activate
	pip install torch torchvision torchaudio --pre --index-url https://download.pytorch.org/whl/nightly/xpu
	echo "TORCH_BRANCH_ID=$(python -c 'import torch; print(torch.__version__)')" \|tee -a "${GITHUB_OUTPUT}" >> "${GITHUB_ENV}"
	TORCH_COMMIT_ID=$(python -c 'import torch; print(torch.version.git_version)')
	echo "TORCH_COMMIT_ID=${TORCH_COMMIT_ID}" \|tee -a "${GITHUB_OUTPUT}" >> "${GITHUB_ENV}"
	cd ../ && rm -rf pytorch
	git clone https://github.com/pytorch/pytorch pytorch
	cd pytorch && git checkout ${TORCH_COMMIT_ID}
	# apply PRs for stock pytorch
	pip install requests
	python ../torch-xpu-ops/.github/scripts/apply_torch_pr.py
	git status && git show -s
	pip install -r requirements.txt
	TORCH_XPU_OPS_COMMIT=$(<third_party/xpu.txt)
	echo "TORCH_XPU_OPS_COMMIT=${TORCH_XPU_OPS_COMMIT}" \|tee -a "${GITHUB_OUTPUT}" >> "${GITHUB_ENV}"
	rm -rf third_party/torch-xpu-ops
	git clone https://github.com/intel/torch-xpu-ops.git third_party/torch-xpu-ops
	cd third_party/torch-xpu-ops
	git checkout ${TORCH_XPU_OPS_COMMIT}
	- name: Identify pinned versions
	id: pinned
	run: \|
	source ${HOME}/mengfeil/myvenv-e2e/bin/activate
	echo "TORCHVISION_COMMIT_ID=$(python -c 'import torchvision; print(torchvision.version.git_version)')" \|tee -a "${GITHUB_OUTPUT}" >> "${GITHUB_ENV}"
	echo "TORCHAUDIO_COMMIT_ID=$(python -c 'import torchaudio; print(torchaudio.version.git_version)')" \|tee -a "${GITHUB_OUTPUT}" >> "${GITHUB_ENV}"
	echo "TRITON_COMMIT_ID=$(python -c 'import triton; print(triton.__version__)')" \|tee -a "${GITHUB_OUTPUT}" >> "${GITHUB_ENV}"
	cd ../pytorch
	echo "TORCHBENCH_COMMIT_ID=$(<third_party/torch-xpu-ops/.github/ci_commit_pins/torchbench.txt)" \|tee -a "${GITHUB_OUTPUT}" >> "${GITHUB_ENV}"
	echo "TRANSFORMERS_VERSION=$(<.ci/docker/ci_commit_pins/huggingface.txt)" \|tee -a "${GITHUB_OUTPUT}" >> "${GITHUB_ENV}"
	echo "TIMM_COMMIT_ID=$(<.ci/docker/ci_commit_pins/timm.txt)" \|tee -a "${GITHUB_OUTPUT}" >> "${GITHUB_ENV}"
	echo "MODEL_ONLY_NAME=${{ inputs.model }}" \|tee -a "${GITHUB_OUTPUT}" >> "${GITHUB_ENV}"
	echo "DRIVER_VERSION=$(dkms status 2>&1 \|grep 'intel-i915-dkms' \|sed 's/.\///;s/,.//')" \|tee -a "${GITHUB_OUTPUT}" >> "${GITHUB_ENV}"
	echo "KERNEL_VERSION=$(uname -rv 2>&1)" \|tee -a "${GITHUB_OUTPUT}" >> "${GITHUB_ENV}"
	echo "BUNDLE_VERSION=$(pip list \|grep cmplr \|head -n 1)" \|tee -a "${GITHUB_OUTPUT}" >> "${GITHUB_ENV}"
	. /etc/os-release
	echo "OS_PRETTY_NAME=${PRETTY_NAME}" \|tee -a "${GITHUB_OUTPUT}" >> "${GITHUB_ENV}"
	echo "GCC_VERSION=$(gcc -dumpversion)" \|tee -a "${GITHUB_OUTPUT}" >> "${GITHUB_ENV}"
	echo ${GITHUB_ENV}
	- name: Show GITHUB_ENV
	run: \|
	echo "$GITHUB_ENV"
	rm -rf ../pytorch/inductor_log
	rm -rf /tmp/torchinductor_*

	# Nihglty launch
	- name: Nightly Huggingface FP32/BF16/FP16 Inference & Training Accuracy Test
	if: github.event_name == 'schedule' && github.event.schedule == '0 14 * * 0-4'
	uses: ./.github/actions/inductor-xpu-e2e-test
	with:
	suite: huggingface
	env_prepare: true
	dt: float32,bfloat16,float16,amp_bf16,amp_fp16
	mode: inference,training
	scenario: accuracy
	pytorch: nightly_wheel
	hf_token: ${{ secrets.HUGGING_FACE_HUB_TOKEN }}
	- name: Nightly Torchbench BF16 Training Accuracy Test
	if: github.event_name == 'schedule' && github.event.schedule == '0 14 * * 0-4'
	uses: ./.github/actions/inductor-xpu-e2e-test
	with:
	suite: torchbench
	dt: bfloat16
	mode: training
	scenario: accuracy
	pytorch: nightly_wheel
	env_prepare: true
	hf_token: ${{ secrets.HUGGING_FACE_HUB_TOKEN }}
	- name: Nightly Timm_models FP16 Training Accuracy Test
	if: github.event_name == 'schedule' && github.event.schedule == '0 14 * * 0-4'
	uses: ./.github/actions/inductor-xpu-e2e-test
	with:
	suite: timm_models
	dt: float16
	mode: training
	scenario: accuracy
	pytorch: nightly_wheel
	env_prepare: true
	hf_token: ${{ secrets.HUGGING_FACE_HUB_TOKEN }}
	# Weekly launch
	- name: Weekly Huggingface Full Test
	if: github.event_name == 'schedule' && github.event.schedule == '0 17 * * 5'
	uses: ./.github/actions/inductor-xpu-e2e-test
	with:
	suite: huggingface
	env_prepare: true
	dt: float32,bfloat16,float16,amp_bf16,amp_fp16
	mode: inference,training
	scenario: accuracy,performance
	pytorch: nightly_wheel
	hf_token: ${{ secrets.HUGGING_FACE_HUB_TOKEN }}
	- name: Weekly Torchbench Full Test
	if: github.event_name == 'schedule' && github.event.schedule == '0 17 * * 5'
	uses: ./.github/actions/inductor-xpu-e2e-test
	with:
	suite: torchbench
	env_prepare: true
	dt: float32,bfloat16,float16,amp_bf16,amp_fp16
	mode: inference,training
	scenario: accuracy,performance
	pytorch: nightly_wheel
	hf_token: ${{ secrets.HUGGING_FACE_HUB_TOKEN }}
	- name: Weekly Timm_models Full Test
	if: github.event_name == 'schedule' && github.event.schedule == '0 17 * * 5'
	uses: ./.github/actions/inductor-xpu-e2e-test
	with:
	suite: timm_models
	env_prepare: true
	dt: float32,bfloat16,float16,amp_bf16,amp_fp16
	mode: inference,training
	scenario: accuracy,performance
	pytorch: nightly_wheel
	hf_token: ${{ secrets.HUGGING_FACE_HUB_TOKEN }}
	# On-demand launch
	- name: OnDemand Test (${{ inputs.suite }} ${{ inputs.dt }} ${{ inputs.mode }} ${{ inputs.scenario }})
	if: github.event_name != 'schedule'
	uses: ./.github/actions/inductor-xpu-e2e-test
	with:
	suite: ${{ inputs.suite }}
	env_prepare: true
	dt: ${{ inputs.dt }}
	mode: ${{ inputs.mode }}
	scenario: ${{ inputs.scenario }}
	pytorch: nightly_wheel
	hf_token: ${{ secrets.HUGGING_FACE_HUB_TOKEN }}

	- name: Summarize archieve files
	id: summary
	if: ${{ ! cancelled() }}
	run: \|
	rm -rf ${{ github.workspace }}/upload_files
	cp -r ${{ github.workspace }}/../pytorch/inductor_log ${{ github.workspace }}/upload_files
	mkdir -p ${{ github.workspace }}/../../_backup/ && cd ${{ github.workspace }}/../../_backup/
	find . -type f -name "*.tgz" -mtime +3 -delete # delete files older than 3 days
	tar zcf xpu-inductor-${GITHUB_RUN_ID}.tgz -C ${{ github.workspace }}/upload_files/ . # backup logs
	- name: Upload Inductor XPU E2E Data
	if: ${{ ! cancelled() }}
	uses: actions/upload-artifact@v4
	with:
	name: Inductor-XPU-E2E-Data-${{ github.event.pull_request.number \|\| github.sha }}
	path: ${{ github.workspace }}/upload_files

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Torch Nightly WHL Tests #154

Workflow file

Torch Nightly WHL Tests #154

Jobs

Run details

Workflow file for this run