-
Notifications
You must be signed in to change notification settings - Fork 25
202 lines (192 loc) · 7.68 KB
/
pull.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
name: pull
on:
pull_request:
types:
- opened
- synchronize
- reopened
- converted_to_draft
- ready_for_review
- labeled
branches:
- main
- release/*
concurrency:
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.sha }}
cancel-in-progress: true
permissions: read-all
jobs:
preci-linux-build:
# Don't run on forked repos and draft PRs
if: ${{ (github.repository_owner == 'intel') && (github.event.pull_request.draft == false) }}
name: preci-linux
permissions:
issues: write
uses: ./.github/workflows/_linux_build.yml
with:
pytorch: main
runner: pvc_e2e
preci-ut:
# Don't run on forked repos and draft PRs
if: ${{ (github.repository_owner == 'intel') && (github.event.pull_request.draft == false) }}
name: preci-linux
needs: preci-linux-build
uses: ./.github/workflows/_linux_ut.yml
with:
pytorch: ${{ needs.preci-linux-build.outputs.torch_commit_id }}
ut: op_regression,op_regression_dev1,op_extended,op_ut
runner: linux.idc.xpu
Inductor-XPU-E2E-CI-Tests:
name: preci-linux / e2e_test
needs: preci-linux-build
runs-on: pvc_e2e
# Don't run on forked repos and draft PRs
if: ${{ (github.repository_owner == 'intel') && (github.event.pull_request.draft == false) }}
timeout-minutes: 900
steps:
- name: Checkout torch-xpu-ops
uses: actions/checkout@v4
- name: Prepare Conda ENV
run: |
which conda && conda clean -ay
conda remove --all -y -n e2e_ci || rm -rf $(dirname ${CONDA_EXE})/../envs/e2e_ci
conda create -n e2e_ci python=3.10 cmake ninja -y
source activate e2e_ci
pip install mkl-static mkl-include
pip install pandas scipy tqdm
- name: Prepare Stock Pytorch
run: |
pwd
cd ../ && rm -rf pytorch
source activate e2e_ci
git clone https://github.com/pytorch/pytorch pytorch
cd pytorch && git checkout ${{ needs.preci-linux-build.outputs.torch_commit_id }}
# apply PRs for stock pytorch
pip install requests
# https://github.com/mengfei25/pytorch/pull/18 internal use only for subset model list
python ../torch-xpu-ops/.github/scripts/apply_torch_pr.py -e https://github.com/mengfei25/pytorch/pull/18
git status && git show -s
git submodule sync && git submodule update --init --recursive
rm -rf third_party/torch-xpu-ops && cp -r ../torch-xpu-ops third_party/
# Workaround for torch-xpu-ops ci test
sed -i "s/checkout --quiet \${TORCH_XPU_OPS_COMMIT}/log -n 1/g" caffe2/CMakeLists.txt
- name: Triton Installation
run: |
source activate e2e_ci
cd ../pytorch
TRITON_REPO="https://github.com/intel/intel-xpu-backend-for-triton"
TRITON_PINNED_COMMIT=$(cat .ci/docker/ci_commit_pins/triton-xpu.txt)
echo ${TRITON_REPO}@${TRITON_PINNED_COMMIT}
pip install --force-reinstall "git+${TRITON_REPO}@${TRITON_PINNED_COMMIT}#subdirectory=python"
- name: Download Pytorch wheel
if: ${{ inputs.pytorch }} != 'nightly_wheel'
uses: actions/download-artifact@v4
with:
name: Torch-XPU-Wheel-${{ github.event.pull_request.number || github.sha }}-1
path: ${{ github.workspace }}
- name: Install Pytorch XPU
run: |
source activate e2e_ci
source .github/scripts/env.sh
cd ../pytorch
pip install -r requirements.txt
pip install --force-reinstall ${{ github.workspace }}/torch*.whl
- name: Identify pinned versions
run: |
cd ../pytorch
echo "TRITON_COMMIT_ID=$(<.ci/docker/ci_commit_pins/triton-xpu.txt)" >> "${GITHUB_ENV}"
echo "TORCHVISION_COMMIT_ID=$(<.github/ci_commit_pins/vision.txt)" >> "${GITHUB_ENV}"
echo "TORCHBENCH_COMMIT_ID=$(<.github/ci_commit_pins/torchbench.txt)" >> "${GITHUB_ENV}"
echo "TORCHAUDIO_COMMIT_ID=$(<.github/ci_commit_pins/audio.txt)" >> "${GITHUB_ENV}"
echo "TRANSFORMERS_VERSION=$(<.ci/docker/ci_commit_pins/huggingface.txt)" >> "${GITHUB_ENV}"
echo "TIMM_COMMIT_ID=$(<.ci/docker/ci_commit_pins/timm.txt)" >> "${GITHUB_ENV}"
- name: Torch Config
run: |
echo "$GITHUB_ENV"
rm -rf ../pytorch/inductor_log
rm -rf /tmp/torchinductor_*
cd ..
source activate e2e_ci
python -c "import triton; print(triton.__version__)"
python pytorch/torch/utils/collect_env.py
- name: Huggingface BF16 Training Accuracy Test
uses: ./.github/actions/inductor-xpu-e2e-test
with:
suite: huggingface
dt: bfloat16
mode: training
scenario: accuracy
env_prepare: true
hf_token: ${{ secrets.HUGGING_FACE_HUB_TOKEN }}
- name: Huggingface FP16 Training Accuracy Test
uses: ./.github/actions/inductor-xpu-e2e-test
with:
suite: huggingface
dt: float16
mode: training
scenario: accuracy
hf_token: ${{ secrets.HUGGING_FACE_HUB_TOKEN }}
- name: Timm_models BF16 Training Accuracy Test
uses: ./.github/actions/inductor-xpu-e2e-test
with:
suite: timm_models
dt: bfloat16
mode: training
scenario: accuracy
env_prepare: true
hf_token: ${{ secrets.HUGGING_FACE_HUB_TOKEN }}
- name: Torchbench BF16 Training Accuracy Test
uses: ./.github/actions/inductor-xpu-e2e-test
with:
suite: torchbench
dt: bfloat16
mode: training
scenario: accuracy
env_prepare: true
hf_token: ${{ secrets.HUGGING_FACE_HUB_TOKEN }}
- name: Summarize archieve files
if: ${{ ! cancelled() }}
run: |
rm -rf ${{ github.workspace }}/upload_files
cp -r ${{ github.workspace }}/../pytorch/inductor_log ${{ github.workspace }}/upload_files
failed_case=$(grep "Real failed models: *[1-9]" ${{ github.workspace }}/upload_files/summary_accuracy.log |wc -l || true)
if [ ${failed_case} -ne 0 ];then
grep -E "Real failed models: [1-9]|Summary for" ${{ github.workspace }}/upload_files/summary_accuracy.log
exit 1
fi
- name: Upload Inductor XPU E2E Data
if: ${{ ! cancelled() }}
uses: actions/upload-artifact@v4
with:
name: Inductor-XPU-E2E-Data-${{ github.event.pull_request.number || github.sha }}
path: ${{ github.workspace }}/upload_files
preci-linux-build-abi-0:
# Don't run on forked repos and draft PRs
if: ${{ (github.repository_owner == 'intel') && (github.event.pull_request.draft == false) }}
name: preci-linux-abi-0
permissions:
issues: write
uses: ./.github/workflows/_linux_build.yml
with:
pytorch: main
abi: 0
runner: pvc_e2e
preci-ut-abi-0:
# Don't run on forked repos and draft PRs
if: ${{ (github.repository_owner == 'intel') && (github.event.pull_request.draft == false) }}
name: preci-linux-abi-0
needs: preci-linux-build-abi-0
uses: ./.github/workflows/_linux_ut.yml
with:
abi: 0
pytorch: ${{ needs.preci-linux-build-abi-0.outputs.torch_commit_id }}
ut: op_extended
runner: linux.idc.xpu
preci-windows:
# Don't run on forked repos and draft PRs
if: ${{ (github.repository_owner == 'intel') && (github.event.pull_request.draft == false) && contains(github.event.pull_request.labels.*.name, 'windows_ci') }}
name: preci-windows
uses: ./.github/workflows/_windows_ut.yml
with:
ut: op_extended
runner: Windows_CI