Inductor E2E Nightly Tests #35

Workflow file for this run

.github/workflows/triton_xpu_backend_e2e_nightly.yml at adb2af9

	name: Inductor E2E Nightly Tests

	on:
	workflow_dispatch:
	inputs:
	torchrepo:
	description: 'torchrepo'
	required: true
	default: 'https://github.com/pytorch/pytorch.git'
	torchbranch:
	description: 'torchbranch'
	required: true
	default: 'v2.0.1'
	torchcommit:
	description: 'torchcommit'
	required: true
	default: 'e9ebda29d87ce0916ab08c06ab26fd3766a870e5'
	ipexrepo:
	description: 'ipexrepo'
	required: true
	default: 'https://github.com/intel/intel-extension-for-pytorch.git'
	ipexbranch:
	description: 'ipexbranch'
	required: true
	default: 'xpu-master'
	ipexcommit:
	description: 'ipexcommit'
	required: true
	default: '4af80f77740ed939be78eba28ae36951823f335c'
	oneapi:
	description: 'oneAPI basekit version'
	required: true
	default: '2023.2.0'
	schedule:
	- cron: "0 14 * * *" # run at 2 PM UTC

	jobs:

	Tests-Env-Prepare:

	runs-on: [self-hosted, PVC_E2E]

	steps:

	- name: Create conda environment
	run: \|
	source ${HOME}/miniconda3/bin/activate triton-nightly-test
	conda install -y astunparse numpy ninja pyyaml setuptools cmake cffi typing_extensions future six requests dataclasses mkl mkl-include
	conda install -y -c conda-forge libstdcxx-ng

	- name: Triton source code prepare
	run: \|
	source ${HOME}/miniconda3/bin/activate triton-nightly-test
	cd ${HOME}/triton-nightly
	rm -rf triton
	git clone https://github.com/openai/triton triton
	cd triton
	triton_commit=`git rev-parse HEAD`
	echo "triton_commit: ${triton_commit}" \| tee sw_info.log
	git submodule sync
	git submodule update --init --recursive --jobs 0
	cd third_party/intel_xpu_backend
	git checkout main && git pull

	- name: Install Dependency
	run: \|
	python --version
	source ${HOME}/miniconda3/bin/activate triton-nightly-test
	python --version
	pip install setuptools cython numpy wheel scikit-build scipy
	pip install psutil cpuid
	cd ${HOME}/triton-nightly
	cp triton/third_party/intel_xpu_backend/.github/scripts/env_prepare.sh .
	cp triton/third_party/intel_xpu_backend/.github/scripts/env_triton.sh ${HOME}/
	cp -r triton/third_party/intel_xpu_backend/.github/patches/ .
	bash env_prepare.sh triton-nightly \
	${{ github.event.inputs.torchrepo }} \
	${{ github.event.inputs.torchbranch }} \
	${{ github.event.inputs.torchcommit }} \
	${{ github.event.inputs.ipexrepo }} \
	${{ github.event.inputs.ipexbranch }} \
	${{ github.event.inputs.ipexcommit }} \
	${{ github.event.inputs.oneapi }}
	source ${HOME}/env_triton.sh ${{ github.event.inputs.oneapi }}
	python -c "import torch;import intel_extension_for_pytorch"
	if [ ${PIPESTATUS[0]} -ne 0 ]; then
	echo -e "[ERROR] Public-torch or IPEX BUILD FAIL"
	exit 1
	fi

	- name: Build Triton
	shell: bash
	run: \|
	source ${HOME}/miniconda3/bin/activate triton-nightly-test
	pip uninstall -y triton
	sudo update-ca-certificates --fresh
	export SSL_CERT_DIR=/etc/ssl/certs
	pip install pybind11
	cd ${HOME}/triton-nightly/triton/python
	python setup.py clean
	TRITON_CODEGEN_INTEL_XPU_BACKEND=1 python setup.py bdist_wheel
	pip install dist/*.whl
	cd ${HOME}/triton-nightly
	source ${HOME}/env_triton.sh ${{ github.event.inputs.oneapi }}
	python -c "import triton"
	if [ ${PIPESTATUS[0]} -ne 0 ]; then
	echo -e "[ERROR] Triton BUILD FAIL"
	exit 1
	fi

	- name: Prepare Benchmark
	run: \|
	cp ${HOME}/triton-nightly/triton/third_party/intel_xpu_backend/.github/scripts/inductor_xpu_test.sh ${HOME}/triton-nightly/pytorch
	cp ${HOME}/triton-nightly/triton/third_party/intel_xpu_backend/.github/scripts/inductor_perf_summary.py ${HOME}/triton-nightly/pytorch
	pip install styleFrame scipy pandas

	Accuracy-Test:

	needs: Tests-Env-Prepare

	runs-on: [self-hosted, PVC_E2E]

	steps:

	- name: ACC Test for triton on PVC
	run: \|
	echo -e "[ INFO ] Run E2E Acc test on Node $(hostname)"
	source ${HOME}/miniconda3/bin/activate triton-nightly-test
	source ${HOME}/env_triton.sh ${{ github.event.inputs.oneapi }}
	cd ${HOME}/triton-nightly
	bash set_proxy.sh
	cd ${HOME}/triton-nightly/pytorch
	rm -rf inductor_log
	bash inductor_xpu_test.sh huggingface amp_bf16 inference accuracy xpu 0 & \
	bash inductor_xpu_test.sh huggingface amp_bf16 training accuracy xpu 1 & \
	bash inductor_xpu_test.sh huggingface amp_fp16 inference accuracy xpu 2 & \
	bash inductor_xpu_test.sh huggingface amp_fp16 training accuracy xpu 3 & wait
	cp ${HOME}/triton-nightly/triton/sw_info.log inductor_log/

	- name: ACC Test Results Overview
	run: \|
	cd ${HOME}/triton-nightly/pytorch/inductor_log/huggingface
	cd amp_bf16
	echo -e "============ Acc Check for HF amp_bf16 ============" \| tee -a ./e2e_summary.log
	csv_lines_inf=$(cat inductor_huggingface_amp_bf16_inference_xpu_accuracy.csv \| wc -l)
	let num_total_amp_bf16=csv_lines_inf-1
	num_passed_amp_bf16_inf=$(grep "pass" inductor_huggingface_amp_bf16_inference_xpu_accuracy.csv \| wc -l)
	let num_failed_amp_bf16_inf=num_total_amp_bf16-num_passed_amp_bf16_inf
	amp_bf16_inf_acc_pass_rate=`awk 'BEGIN{printf "%.2f%%\n",('$num_passed_amp_bf16_inf'/'$num_total_amp_bf16')*100}'`
	echo "num_total_amp_bf16: $num_total_amp_bf16" \| tee -a ./e2e_summary.log
	echo "num_passed_amp_bf16_inf: $num_passed_amp_bf16_inf" \| tee -a ./e2e_summary.log
	echo "num_failed_amp_bf16_inf: $num_failed_amp_bf16_inf" \| tee -a ./e2e_summary.log
	echo "amp_bf16_inf_acc_pass_rate: $amp_bf16_inf_acc_pass_rate" \| tee -a ./e2e_summary.log

	num_passed_amp_bf16_tra=$(grep "pass" inductor_huggingface_amp_bf16_training_xpu_accuracy.csv \| wc -l)
	let num_failed_amp_bf16_tra=num_total_amp_bf16-num_passed_amp_bf16_tra
	amp_bf16_tra_acc_pass_rate=`awk 'BEGIN{printf "%.2f%%\n",('$num_passed_amp_bf16_tra'/'$num_total_amp_bf16')*100}'`
	echo "num_passed_amp_bf16_tra: $num_passed_amp_bf16_tra" \| tee -a ./e2e_summary.log
	echo "num_failed_amp_bf16_tra: $num_failed_amp_bf16_tra" \| tee -a ./e2e_summary.log
	echo "amp_bf16_tra_acc_pass_rate: $amp_bf16_tra_acc_pass_rate" \| tee -a ./e2e_summary.log

	cd ../amp_fp16
	echo -e "============ Acc Check for HF amp_fp16 ============" \| tee -a ./e2e_summary.log
	csv_lines_inf=$(cat inductor_huggingface_amp_fp16_inference_xpu_accuracy.csv \| wc -l)
	let num_total_amp_fp16=csv_lines_inf-1
	num_passed_amp_fp16_inf=$(grep "pass" inductor_huggingface_amp_fp16_inference_xpu_accuracy.csv \| wc -l)
	let num_failed_amp_fp16_inf=num_total_amp_fp16-num_passed_amp_fp16_inf
	amp_fp16_inf_acc_pass_rate=`awk 'BEGIN{printf "%.2f%%\n",('$num_passed_amp_fp16_inf'/'$num_total_amp_fp16')*100}'`
	echo "num_total_amp_fp16: $num_total_amp_fp16" \| tee -a ./e2e_summary.log
	echo "num_passed_amp_fp16_inf: $num_passed_amp_fp16_inf" \| tee -a ./e2e_summary.log
	echo "num_failed_amp_fp16_inf: $num_failed_amp_fp16_inf" \| tee -a ./e2e_summary.log
	echo "amp_fp16_inf_acc_pass_rate: $amp_fp16_inf_acc_pass_rate" \| tee -a ./e2e_summary.log

	num_passed_amp_fp16_tra=$(grep "pass" inductor_huggingface_amp_fp16_training_xpu_accuracy.csv \| wc -l)
	let num_failed_amp_fp16_tra=num_total_amp_fp16-num_passed_amp_fp16_tra
	amp_fp16_tra_acc_pass_rate=`awk 'BEGIN{printf "%.2f%%\n",('$num_passed_amp_fp16_tra'/'$num_total_amp_fp16')*100}'`
	echo "num_passed_amp_fp16_tra: $num_passed_amp_fp16_tra" \| tee -a ./e2e_summary.log
	echo "num_failed_amp_fp16_tra: $num_failed_amp_fp16_tra" \| tee -a ./e2e_summary.log
	echo "amp_fp16_tra_acc_pass_rate: $amp_fp16_tra_acc_pass_rate" \| tee -a ./e2e_summary.log

	- name: Upload Triton Inductor E2E Nightly Data
	uses: actions/upload-artifact@v3
	with:
	name: Triton-Inductor-E2E-Nightly-Data
	path: /home/gta/triton-nightly/pytorch/inductor_log/

	- name: Test Results Check
	run: \|
	cd ${HOME}/triton-nightly/pytorch/inductor_log/huggingface
	cd amp_bf16
	num_passed_amp_bf16_inf=$(grep "num_passed_amp_bf16_inf:" e2e_summary.log \| sed -e 's/.*://;s/[^0-9.]//')
	if [ $num_passed_amp_bf16_inf -lt 45 ]; then
	echo -e "[ERROR] Inductor E2E Nightly test for HF amp_bf16 inference passed_num < 45"
	exit 1
	fi
	num_passed_amp_bf16_tra=$(grep "num_passed_amp_bf16_tra:" e2e_summary.log \| sed -e 's/.*://;s/[^0-9.]//')
	if [ $num_passed_amp_bf16_tra -lt 42 ]; then
	echo -e "[ERROR] Inductor E2E Nightly test for HF amp_bf16 training passed_num < 42"
	exit 1
	fi
	cd ../amp_fp16
	num_passed_amp_fp16_inf=$(grep "num_passed_amp_fp16_inf:" e2e_summary.log \| sed -e 's/.*://;s/[^0-9.]//')
	if [ $num_passed_amp_fp16_inf -lt 45 ]; then
	echo -e "[ERROR] Inductor E2E Nightly test for HF amp_fp16 inference passed_num < 45"
	exit 1
	fi
	num_passed_amp_fp16_tra=$(grep "num_passed_amp_fp16_tra:" e2e_summary.log \| sed -e 's/.*://;s/[^0-9.]//')
	if [ $num_passed_amp_fp16_tra -lt 42 ]; then
	echo -e "[ERROR] Inductor E2E Nightly test for HF amp_fp16 training passed_num < 42"
	exit 1
	fi

	Performance-Test:

	needs: Accuracy-Test

	runs-on: [self-hosted, PVC_E2E]

	steps:

	- name: Perf Test for triton on PVC
	run: \|
	echo -e "[ INFO ] Run E2E Perf test on Node $(hostname)"
	source ${HOME}/miniconda3/bin/activate triton-nightly-test
	source ${HOME}/env_triton.sh
	cd ${HOME}/triton-nightly
	bash set_proxy.sh
	cd ${HOME}/triton-nightly/pytorch
	bash inductor_xpu_test.sh huggingface amp_bf16 inference performance xpu 0 & \
	bash inductor_xpu_test.sh huggingface amp_bf16 training performance xpu 1 & \
	bash inductor_xpu_test.sh huggingface amp_fp16 inference performance xpu 2 & \
	bash inductor_xpu_test.sh huggingface amp_fp16 training performance xpu 3 & wait

	- name: Perf Test Results Generate and Overview
	run: \|
	cd ${HOME}/triton-nightly/pytorch
	python inductor_perf_summary.py -s huggingface -p amp_bf16 amp_fp16

	- name: Upload Triton Inductor E2E Nightly Data
	uses: actions/upload-artifact@v3
	with:
	name: Triton-Inductor-E2E-Nightly-Data
	path: /home/gta/triton-nightly/pytorch/inductor_log/

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Inductor E2E Nightly Tests #35

Workflow file

Inductor E2E Nightly Tests #35

Jobs

Run details

Workflow file for this run