Skip to content

Commit e19b49c

Browse files
tianshuo78520aManfredss
authored andcommitted
Add formers ci (PaddlePaddle#76162)
* Add formers ci * Add formers ci * Update * Update * Update * Update * Update * Update * Update * Update * Update * Update * Update * Update * Update * update * update * update * update * update * update * update * update * update * Update * Update * Update * Update * Update * Update * Update * Update * Update * Update
1 parent 62bd71c commit e19b49c

File tree

5 files changed

+423
-133
lines changed

5 files changed

+423
-133
lines changed

.github/workflows/CI.yml

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -74,9 +74,25 @@ jobs:
7474
docker_npu_image: ${{ needs.build-docker.outputs.docker_npu_image }}
7575

7676
distribute:
77-
name: Distribute-stable
77+
name: Distribute-stable-build
7878
uses: ./.github/workflows/_Distribute-stable.yml
7979
needs: [clone, build-docker]
8080
with:
8181
docker_distribute_image: ${{ needs.build-docker.outputs.docker_distribute_image }}
8282
clone-can-skip: ${{ needs.clone.outputs.can-skip }}
83+
84+
distribute-test:
85+
name: Distribute-stable-test
86+
uses: ./.github/workflows/_Distribute-stable-Test.yml
87+
needs: [clone, build-docker, distribute]
88+
with:
89+
docker_distribute_image: ${{ needs.build-docker.outputs.docker_distribute_image }}
90+
clone-can-skip: ${{ needs.clone.outputs.can-skip }}
91+
92+
distribute-formers:
93+
name: Distribute-stable-formers
94+
uses: ./.github/workflows/_Distribute-stable-Formers.yml
95+
needs: [clone, build-docker, distribute]
96+
with:
97+
docker_distribute_image: ${{ needs.build-docker.outputs.docker_distribute_image }}
98+
clone-can-skip: ${{ needs.clone.outputs.can-skip }}
Lines changed: 178 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,178 @@
1+
name: Distribute-stable-Formers
2+
3+
on:
4+
workflow_call:
5+
inputs:
6+
docker_distribute_image:
7+
type: string
8+
required: true
9+
clone-can-skip:
10+
type: string
11+
required: false
12+
default: "false"
13+
14+
env:
15+
PR_ID: ${{ github.event.pull_request.number }}
16+
COMMIT_ID: ${{ github.event.pull_request.head.sha }}
17+
work_dir: /paddle
18+
PADDLE_ROOT: /paddle
19+
TASK: paddle-CI-${{ github.event.pull_request.number }}-distribute-formers
20+
ci_scripts: /paddle/ci
21+
BRANCH: ${{ github.event.pull_request.base.ref }}
22+
CI_name: distribute
23+
no_proxy: bcebos.com,apiin.im.baidu.com,gitee.com,aliyun.com,.baidu.com,.tuna.tsinghua.edu.cn,paddlepaddle.org.cn
24+
docker_image: ${{ inputs.docker_distribute_image }}
25+
26+
defaults:
27+
run:
28+
shell: bash
29+
30+
jobs:
31+
formers-test:
32+
name: formers-Test
33+
if: ${{ inputs.clone-can-skip != 'true' }}
34+
runs-on:
35+
group: Distribute
36+
steps:
37+
- name: Check docker image and run container
38+
env:
39+
FLAGS_fraction_of_gpu_memory_to_use: 0.15
40+
CTEST_OUTPUT_ON_FAILURE: 1
41+
CTEST_PARALLEL_LEVEL: 4
42+
WITH_GPU: "ON"
43+
WITH_AVX: "ON"
44+
WITH_DISTRIBUTE: "ON"
45+
WITH_TESTING: "ON"
46+
WITH_COVERAGE: "OFF"
47+
CMAKE_BUILD_TYPE: Release
48+
PADDLE_FRACTION_GPU_MEMORY_TO_USE: 0.15
49+
PRECISION_TEST: "OFF"
50+
WITH_UNITY_BUILD: "ON"
51+
AGILE_COMPILE_BRANCH: ${{ github.event.pull_request.base.ref }}
52+
AGILE_REVISION: ${{ github.event.pull_request.head.sha }}
53+
WITH_INCREMENTAL_COVERAGE: "OFF"
54+
WITH_ONNXRUNTIME: "OFF"
55+
COVERALLS_UPLOAD: "ON"
56+
PADDLE_VERSION: 0.0.0
57+
GIT_PR_ID: ${{ github.event.pull_request.number }}
58+
PY_VERSION: "3.10"
59+
CUDA_ARCH_NAME: Auto
60+
WITH_CUDNN_FRONTEND: "ON"
61+
FLAGS_enable_cudnn_frontend: 1
62+
CACHE_DIR: /root/.cache/build
63+
CCACHE_DIR: /root/.ccache/formers
64+
CFS_DIR: /home/data/cfs
65+
paddle_whl: /workspace/dist/paddlepaddle_gpu-0.0.0-cp310-cp310-linux_x86_64.whl
66+
formers_docker: ccr-2vdh3abv-pub.cnc.bj.baidubce.com/paddlepaddle/paddle:cuda126-dev-latest
67+
run: |
68+
export CUDA_SO="$(\ls -d /usr/lib64/libcuda* | xargs -I{} echo '-v {}:{}') $(\ls -d /usr/lib64/libnvidia* | xargs -I{} echo '-v {}:{}')"
69+
export DEVICES="$(\ls -d /dev/nvidia* | xargs -I{} echo "-v {}:{}") $(\ls /dev/nvidia-caps/* | xargs -I{} echo "-v {}:{}")"
70+
export SMI="-v /usr/bin/nvidia-smi:/usr/bin/nvidia-smi"
71+
container_name=${TASK}-test-$(date +%Y%m%d-%H%M%S)
72+
echo "container_name=${container_name}" >> ${{ github.env }}
73+
docker run -d -t --name ${container_name} ${CUDA_SO} ${DEVICES} ${SMI} --runtime=nvidia --shm-size=32G --privileged \
74+
-v "/home/data/cfs:/home/data/cfs" \
75+
-v "/home/data/cfs/.cache/:/root/.cache" \
76+
-v "/home/data/cfs/.ccache:/root/.ccache" \
77+
-v "/ssd1/models:/home/models" \
78+
-v "/ssd1/root:/root" \
79+
-v "/dev/shm:/dev/shm" \
80+
-v ${{ github.workspace }}/../../..:${{ github.workspace }}/../../.. \
81+
-v ${{ github.workspace }}:/workspace \
82+
-e BRANCH \
83+
-e PR_ID \
84+
-e COMMIT_ID \
85+
-e work_dir \
86+
-e PADDLE_ROOT \
87+
-e ci_scripts \
88+
-e CI_name \
89+
-e PF_HOME=/home/models \
90+
-e FLAGS_fraction_of_gpu_memory_to_use \
91+
-e CTEST_OUTPUT_ON_FAILURE \
92+
-e CTEST_PARALLEL_LEVEL \
93+
-e WITH_GPU \
94+
-e WITH_AVX \
95+
-e WITH_DISTRIBUTE \
96+
-e WITH_TESTING \
97+
-e WITH_COVERAGE \
98+
-e CMAKE_BUILD_TYPE \
99+
-e PADDLE_FRACTION_GPU_MEMORY_TO_USE \
100+
-e PRECISION_TEST \
101+
-e WITH_UNITY_BUILD \
102+
-e AGILE_COMPILE_BRANCH \
103+
-e AGILE_REVISION \
104+
-e WITH_INCREMENTAL_COVERAGE \
105+
-e WITH_ONNXRUNTIME \
106+
-e COVERALLS_UPLOAD \
107+
-e PADDLE_VERSION \
108+
-e GIT_PR_ID \
109+
-e PY_VERSION \
110+
-e CUDA_ARCH_NAME \
111+
-e WITH_CUDNN_FRONTEND \
112+
-e FLAGS_enable_cudnn_frontend \
113+
-e CACHE_DIR \
114+
-e CCACHE_DIR \
115+
-e CFS_DIR \
116+
-e paddle_whl \
117+
-e no_proxy \
118+
-w /workspace --network host ${formers_docker}
119+
- name: Download paddle.tar.gz and merge target branch
120+
run: |
121+
docker exec -t ${{ env.container_name }} /bin/bash -c '
122+
rm -rf * .[^.]*
123+
echo "Downloading Paddle.tar.gz"
124+
wget -q --tries=5 --no-proxy https://paddle-github-action.bj.bcebos.com/PR/gpups/${{ env.PR_ID }}/${{ env.COMMIT_ID }}/Paddle.tar.gz --no-check-certificate
125+
echo "Extracting Paddle.tar.gz"
126+
tar --use-compress-program="pzstd" -xf Paddle.tar.gz --strip-components=1
127+
rm Paddle.tar.gz
128+
git config --global --add safe.directory /workspace
129+
git checkout test
130+
'
131+
- name: Test
132+
run: |
133+
docker exec -t ${{ env.container_name }} /bin/bash -c '
134+
source ${{ github.workspace }}/../../../proxy
135+
source ${{ github.workspace }}/../../../AISTUDIO_ACCESS_TOKEN
136+
set -ex
137+
bash /workspace/ci/formers_test.sh
138+
'
139+
- name: Upload and display logs
140+
if: always()
141+
env:
142+
home_path: ${{ github.workspace }}/..
143+
bos_file: ${{ github.workspace }}/../bos_retry/BosClient.py
144+
run: |
145+
docker exec -t ${{ env.container_name }} /bin/bash -c '
146+
export AK=paddle
147+
export SK=paddle
148+
if [ ! -f "${{ env.bos_file }}" ]; then
149+
wget -q --no-proxy -O ${{ env.home_path }}/bos_retry.tar.gz https://xly-devops.bj.bcebos.com/home/bos_retry.tar.gz --no-check-certificate
150+
mkdir ${{ env.home_path }}/bos_retry
151+
tar xf ${{ env.home_path }}/bos_retry.tar.gz -C ${{ env.home_path }}/bos_retry
152+
fi
153+
if [ -n "$PR_ID" ] && [ "$PR_ID" != "0" ]; then
154+
bos_prefix="${PR_ID}/${COMMIT_ID}"
155+
else
156+
bos_prefix="schedule/$(date +%Y%m%d)"
157+
fi
158+
# api test logs
159+
cd /workspace/PaddleFormers/unittest_logs
160+
for FILE in /workspace/PaddleFormers/unittest_logs/*; do
161+
file=$(basename "$FILE")
162+
python ${{ env.bos_file }} $file paddle-github-action/PR/PaddleFormers/unittest-gpu/${bos_prefix}/logs
163+
echo "$file: https://paddle-github-action.bj.bcebos.com/PR/PaddleFormers/unittest-gpu/${bos_prefix}/logs/$file"
164+
done
165+
# models test logs
166+
cd /workspace/PaddleFormers/model_unittest_logs
167+
for FILE in /workspace/PaddleFormers/model_unittest_logs/*; do
168+
file=$(basename "$FILE")
169+
python ${{ env.bos_file }} $file paddle-github-action/PR/PaddleFormers/model-unittest-gpu/${bos_prefix}/logs
170+
echo "$file: https://paddle-github-action.bj.bcebos.com/PR/PaddleFormers/model-unittest-gpu/${bos_prefix}/logs/$file"
171+
done
172+
'
173+
- name: Terminate and delete the container
174+
if: always()
175+
run: |
176+
set +e
177+
docker exec -t ${{ env.container_name }} /bin/bash -c 'rm -rf * .[^.]*'
178+
docker rm -f ${{ env.container_name }}
Lines changed: 159 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,159 @@
1+
name: Distribute-stable-Test
2+
3+
on:
4+
workflow_call:
5+
inputs:
6+
docker_distribute_image:
7+
type: string
8+
required: true
9+
clone-can-skip:
10+
type: string
11+
required: false
12+
default: "false"
13+
14+
env:
15+
PR_ID: ${{ github.event.pull_request.number }}
16+
COMMIT_ID: ${{ github.event.pull_request.head.sha }}
17+
work_dir: /paddle
18+
PADDLE_ROOT: /paddle
19+
TASK: paddle-CI-${{ github.event.pull_request.number }}-distribute-test
20+
ci_scripts: /paddle/ci
21+
BRANCH: ${{ github.event.pull_request.base.ref }}
22+
CI_name: distribute
23+
no_proxy: bcebos.com,apiin.im.baidu.com,gitee.com,aliyun.com,.baidu.com,.tuna.tsinghua.edu.cn,paddlepaddle.org.cn
24+
docker_image: ${{ inputs.docker_distribute_image }}
25+
26+
defaults:
27+
run:
28+
shell: bash
29+
30+
jobs:
31+
test:
32+
name: Test
33+
if: ${{ inputs.clone-can-skip != 'true' }}
34+
runs-on:
35+
group: Distribute
36+
steps:
37+
- name: Check docker image and run container
38+
env:
39+
FLAGS_fraction_of_gpu_memory_to_use: 0.15
40+
CTEST_OUTPUT_ON_FAILURE: 1
41+
CTEST_PARALLEL_LEVEL: 4
42+
WITH_GPU: "ON"
43+
WITH_AVX: "ON"
44+
WITH_DISTRIBUTE: "ON"
45+
WITH_TESTING: "ON"
46+
WITH_COVERAGE: "OFF"
47+
CMAKE_BUILD_TYPE: Release
48+
PADDLE_FRACTION_GPU_MEMORY_TO_USE: 0.15
49+
PRECISION_TEST: "OFF"
50+
WITH_UNITY_BUILD: "ON"
51+
AGILE_COMPILE_BRANCH: ${{ github.event.pull_request.base.ref }}
52+
AGILE_REVISION: ${{ github.event.pull_request.head.sha }}
53+
WITH_INCREMENTAL_COVERAGE: "OFF"
54+
WITH_ONNXRUNTIME: "OFF"
55+
COVERALLS_UPLOAD: "ON"
56+
PADDLE_VERSION: 0.0.0
57+
GIT_PR_ID: ${{ github.event.pull_request.number }}
58+
PY_VERSION: "3.10"
59+
CUDA_ARCH_NAME: Auto
60+
WITH_CUDNN_FRONTEND: "ON"
61+
FLAGS_enable_cudnn_frontend: 1
62+
CACHE_DIR: /root/.cache/build
63+
CCACHE_DIR: /root/.ccache/gpubox
64+
run: |
65+
export CUDA_SO="$(\ls -d /usr/lib64/libcuda* | xargs -I{} echo '-v {}:{}') $(\ls -d /usr/lib64/libnvidia* | xargs -I{} echo '-v {}:{}')"
66+
export DEVICES="$(\ls -d /dev/nvidia* | xargs -I{} echo "-v {}:{}") $(\ls /dev/nvidia-caps/* | xargs -I{} echo "-v {}:{}")"
67+
export SMI="-v /usr/bin/nvidia-smi:/usr/bin/nvidia-smi"
68+
container_name=${TASK}-test-$(date +%Y%m%d-%H%M%S)
69+
echo "container_name=${container_name}" >> ${{ github.env }}
70+
docker run -d -t --name ${container_name} ${CUDA_SO} ${DEVICES} ${SMI} --runtime=nvidia --shm-size=32G \
71+
-v "/home/data/cfs:/home/data/cfs" \
72+
-v "/home/data/cfs/.cache/:/root/.cache" \
73+
-v "/home/data/cfs/.ccache:/root/.ccache" \
74+
-v "/ssd1/root:/root" \
75+
-v "/dev/shm:/dev/shm" \
76+
-v ${{ github.workspace }}/../../..:${{ github.workspace }}/../../.. \
77+
-v ${{ github.workspace }}:/paddle \
78+
-e BRANCH \
79+
-e PR_ID \
80+
-e COMMIT_ID \
81+
-e work_dir \
82+
-e PADDLE_ROOT \
83+
-e ci_scripts \
84+
-e CI_name \
85+
-e FLAGS_fraction_of_gpu_memory_to_use \
86+
-e CTEST_OUTPUT_ON_FAILURE \
87+
-e CTEST_PARALLEL_LEVEL \
88+
-e WITH_GPU \
89+
-e WITH_AVX \
90+
-e WITH_DISTRIBUTE \
91+
-e WITH_TESTING \
92+
-e WITH_COVERAGE \
93+
-e CMAKE_BUILD_TYPE \
94+
-e PADDLE_FRACTION_GPU_MEMORY_TO_USE \
95+
-e PRECISION_TEST \
96+
-e WITH_UNITY_BUILD \
97+
-e AGILE_COMPILE_BRANCH \
98+
-e AGILE_REVISION \
99+
-e WITH_INCREMENTAL_COVERAGE \
100+
-e WITH_ONNXRUNTIME \
101+
-e COVERALLS_UPLOAD \
102+
-e PADDLE_VERSION \
103+
-e GIT_PR_ID \
104+
-e PY_VERSION \
105+
-e CUDA_ARCH_NAME \
106+
-e WITH_CUDNN_FRONTEND \
107+
-e FLAGS_enable_cudnn_frontend \
108+
-e CACHE_DIR \
109+
-e CCACHE_DIR \
110+
-e no_proxy \
111+
-w /paddle --network host ${docker_image}
112+
113+
- name: Download paddle.tar.gz and merge target branch
114+
run: |
115+
docker exec -t ${{ env.container_name }} /bin/bash -c '
116+
rm -rf * .[^.]*
117+
echo "Downloading Paddle.tar.gz"
118+
wget -q --tries=5 --no-proxy https://paddle-github-action.bj.bcebos.com/PR/gpups/${{ env.PR_ID }}/${{ env.COMMIT_ID }}/Paddle.tar.gz --no-check-certificate
119+
echo "Extracting Paddle.tar.gz"
120+
tar --use-compress-program="pzstd" -xf Paddle.tar.gz --strip-components=1
121+
rm Paddle.tar.gz
122+
git checkout test
123+
'
124+
125+
- name: Test
126+
run: |
127+
docker exec -t ${{ env.container_name }} /bin/bash -c '
128+
source ${{ github.workspace }}/../../../proxy
129+
bash ${ci_scripts}/distribute_test.sh
130+
'
131+
132+
- name: Upload and display logs
133+
if: always()
134+
env:
135+
home_path: ${{ github.workspace }}/..
136+
bos_file: ${{ github.workspace }}/../bos_retry/BosClient.py
137+
run: |
138+
docker exec -t ${{ env.container_name }} /bin/bash -c '
139+
export AK=paddle
140+
export SK=paddle
141+
if [ ! -f "${{ env.bos_file }}" ]; then
142+
wget -q --no-proxy -O ${{ env.home_path }}/bos_retry.tar.gz https://xly-devops.bj.bcebos.com/home/bos_retry.tar.gz --no-check-certificate
143+
mkdir ${{ env.home_path }}/bos_retry
144+
tar xf ${{ env.home_path }}/bos_retry.tar.gz -C ${{ env.home_path }}/bos_retry
145+
fi
146+
cd /case_logs
147+
for FILE in /case_logs/*; do
148+
file=$(basename "$FILE")
149+
python ${{ env.bos_file }} $file paddle-github-action/PR/Distribute-Stable/${PR_ID}/${COMMIT_ID}/logs
150+
echo "$file: https://paddle-github-action.bj.bcebos.com/PR/Distribute-Stable/${PR_ID}/${COMMIT_ID}/logs/$file"
151+
done
152+
'
153+
154+
- name: Terminate and delete the container
155+
if: always()
156+
run: |
157+
set +e
158+
docker exec -t ${{ env.container_name }} /bin/bash -c 'rm -rf * .[^.]*'
159+
docker rm -f ${{ env.container_name }}

0 commit comments

Comments
 (0)