Skip to content

Commit

Permalink
[NPU] fix NPU ci scripts, test=develop
Browse files Browse the repository at this point in the history
  • Loading branch information
qili93 committed Aug 24, 2021
1 parent cb28753 commit 9f2b4f9
Show file tree
Hide file tree
Showing 2 changed files with 66 additions and 17 deletions.
59 changes: 42 additions & 17 deletions paddle/scripts/paddle_build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -1059,7 +1059,7 @@ function get_quickly_disable_ut() {

function card_test() {
set -m
CTEST_PARALLEL_LEVEL=2

case_count $1 $2
ut_startTime_s=`date +%s`

Expand Down Expand Up @@ -1725,16 +1725,22 @@ set +x
single_card_tests="$single_card_tests|^$testcase$"
fi
done <<< "$test_cases";
card_test "$single_card_tests" 1

ut_actual_total_startTime_s=`date +%s`

card_test "$single_card_tests" 1 # run cases 1 job each time with single GPU
collect_failed_tests

# add unit test retry for NPU
rm -f $tmp_dir/*
exec_times=0
retry_unittests_record=''
retry_time=3
exec_time_array=('first' 'second' 'third')
retry_time=4
exec_time_array=('first' 'second' 'third' 'fourth')
parallel_failed_tests_exec_retry_threshold=80
exec_retry_threshold=10
is_retry_execuate=0
rerun_ut_startTime_s=`date +%s`
if [ -n "$failed_test_lists" ];then
if [ ${TIMEOUT_DEBUG_HELP:-OFF} == "ON" ];then
bash $PADDLE_ROOT/tools/timeout_debug_help.sh "$failed_test_lists" # cat logs for tiemout uts which killed by ctest
Expand All @@ -1743,14 +1749,30 @@ set +x
need_retry_ut_arr=(${need_retry_ut_str})
need_retry_ut_count=${#need_retry_ut_arr[@]}
read retry_unittests <<< $(echo "$failed_test_lists" | grep -oEi "\-.+\(.+\)" | sed 's/(.\+)//' | sed 's/- //' )
if [ $need_retry_ut_count -lt $exec_retry_threshold ];then
while ( [ $exec_times -lt $retry_time ] )
do
while ( [ $exec_times -lt $retry_time ] )
do
if [[ "${exec_times}" == "0" ]] ;then
if [ $need_retry_ut_count -lt $parallel_failed_tests_exec_retry_threshold ];then
is_retry_execuate=0
else
is_retry_execuate=1
fi
elif [[ "${exec_times}" == "1" ]] ;then
read need_retry_ut_str <<< $(echo "$failed_test_lists" | grep -oEi "\-.+\(.+\)" | sed 's/(.\+)//' | sed 's/- //' )
need_retry_ut_arr=(${need_retry_ut_str})
need_retry_ut_count=${#need_retry_ut_arr[@]}
if [ $need_retry_ut_count -lt $exec_retry_threshold ];then
is_retry_execuate=0
else
is_retry_execuate=1
fi
fi
if [[ "$is_retry_execuate" == "0" ]];then
set +e
retry_unittests_record="$retry_unittests_record$failed_test_lists"
failed_test_lists_ult=`echo "${failed_test_lists}" |grep -Po '[^ ].*$'`
set -e
if [[ "${exec_times}" == "1" ]];then
if [[ "${exec_times}" == "1" ]] || [[ "${exec_times}" == "3" ]];then
if [[ "${failed_test_lists}" == "" ]];then
break
else
Expand All @@ -1761,35 +1783,38 @@ set +x
echo "This is the ${exec_time_array[$exec_times]} time to re-run"
echo "========================================="
echo "The following unittest will be re-run:"
echo "${retry_unittests}"

echo "${retry_unittests}"
for line in ${retry_unittests[@]} ;
do
read tmp_one_tmp <<< "$( echo $single_card_tests | grep -oEi $line )"

if [[ "$tmp_one_tmp" != "" ]]; then
if [[ "$one_card_retry" == "" ]]; then
one_card_retry="^$line$"
else
one_card_retry="$one_card_retry|^$line$"
fi
fi

done

if [[ "$one_card_retry" != "" ]]; then
card_test "$one_card_retry" 1
card_test "$one_card_retry" 1 # run cases 1 job each time with single GPU
fi

exec_times=$[$exec_times+1]
failed_test_lists=''
collect_failed_tests
rm -f $tmp_dir/*
one_card_retry=''
done
else
# There are more than 10 failed unit tests, so no unit test retry
is_retry_execuate=1
fi
fi
done
fi

rerun_ut_endTime_s=`date +%s`

echo "ipipe_log_param_Rerun_TestCases_Total_Time: $[ $rerun_ut_endTime_s - $rerun_ut_startTime_s ]s" >> ${PADDLE_ROOT}/build/build_summary.txt
ut_actual_total_endTime_s=`date +%s`
echo "ipipe_log_param_actual_TestCases_Total_Time: $[ $ut_actual_total_endTime_s - $ut_actual_total_startTime_s ]s" >> ${PADDLE_ROOT}/build/build_summary.txt
if [[ "$EXIT_CODE" != "0" ]]; then
show_ut_retry_result
fi
Expand Down
24 changes: 24 additions & 0 deletions tools/coverage/paddle_coverage.sh
Original file line number Diff line number Diff line change
Expand Up @@ -85,8 +85,30 @@ function gen_full_html_report_xpu() {
mv -f coverage-full.tmp coverage-full.info
}

function gen_full_html_report_npu() {
lcov --extract coverage.info \
'/paddle/paddle/fluid/operators/*npu*' \
-o coverage-full.tmp \
--rc lcov_branch_coverage=0

mv -f coverage-full.tmp coverage-full.info

lcov --remove coverage-full.info \
'/paddle/paddle/fluid/framework/*_test*' \
'/paddle/paddle/fluid/*/*test*' \
'/paddle/paddle/fluid/*/*/*test*' \
'/paddle/paddle/fluid/inference/tests/*' \
'/paddle/paddle/fluid/inference/api/demo_ci/*' \
-o coverage-full.tmp \
--rc lcov_branch_coverage=0

mv -f coverage-full.tmp coverage-full.info
}

if [ ${WITH_XPU:-OFF} == "ON" ]; then
gen_full_html_report_xpu || true
elif [ ${WITH_ASCEND_CL:-OFF} == "ON" ]; then
gen_full_html_report_npu || true
else
gen_full_html_report || true
fi
Expand Down Expand Up @@ -183,6 +205,8 @@ echo "Assert Python Diff Coverage"

if [ ${WITH_XPU:-OFF} == "ON" ]; then
echo "XPU has no python coverage!"
elif [ ${WITH_ASCEND_CL:-OFF} == "ON" ]; then
echo "NPU has no python coverage!"
else
if [[ "${NO_PYTHON_COVERAGE_DATA}" != "1" ]];then
python3.7 ${PADDLE_ROOT}/tools/coverage/coverage_lines.py python-coverage-diff.info 0.9 || PYTHON_COVERAGE_LINES_ASSERT=1
Expand Down

1 comment on commit 9f2b4f9

@paddle-bot-old
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Congratulation! Your pull request passed all required CI. You could ask reviewer(s) to approve and merge. 🎉

Please sign in to comment.