Skip to content

Commit

Permalink
[NPU] fix NPU ci scripts, test=develop (#35095)
Browse files Browse the repository at this point in the history
  • Loading branch information
qili93 authored Aug 24, 2021
1 parent de64515 commit a332352
Show file tree
Hide file tree
Showing 2 changed files with 66 additions and 17 deletions.
59 changes: 42 additions & 17 deletions paddle/scripts/paddle_build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -1059,7 +1059,7 @@ function get_quickly_disable_ut() {

function card_test() {
set -m
CTEST_PARALLEL_LEVEL=2

case_count $1 $2
ut_startTime_s=`date +%s`

Expand Down Expand Up @@ -1725,16 +1725,22 @@ set +x
single_card_tests="$single_card_tests|^$testcase$"
fi
done <<< "$test_cases";
card_test "$single_card_tests" 1

ut_actual_total_startTime_s=`date +%s`

card_test "$single_card_tests" 1 # run cases 1 job each time with single GPU
collect_failed_tests

# add unit test retry for NPU
rm -f $tmp_dir/*
exec_times=0
retry_unittests_record=''
retry_time=3
exec_time_array=('first' 'second' 'third')
retry_time=4
exec_time_array=('first' 'second' 'third' 'fourth')
parallel_failed_tests_exec_retry_threshold=80
exec_retry_threshold=10
is_retry_execuate=0
rerun_ut_startTime_s=`date +%s`
if [ -n "$failed_test_lists" ];then
if [ ${TIMEOUT_DEBUG_HELP:-OFF} == "ON" ];then
bash $PADDLE_ROOT/tools/timeout_debug_help.sh "$failed_test_lists" # cat logs for tiemout uts which killed by ctest
Expand All @@ -1743,14 +1749,30 @@ set +x
need_retry_ut_arr=(${need_retry_ut_str})
need_retry_ut_count=${#need_retry_ut_arr[@]}
read retry_unittests <<< $(echo "$failed_test_lists" | grep -oEi "\-.+\(.+\)" | sed 's/(.\+)//' | sed 's/- //' )
if [ $need_retry_ut_count -lt $exec_retry_threshold ];then
while ( [ $exec_times -lt $retry_time ] )
do
while ( [ $exec_times -lt $retry_time ] )
do
if [[ "${exec_times}" == "0" ]] ;then
if [ $need_retry_ut_count -lt $parallel_failed_tests_exec_retry_threshold ];then
is_retry_execuate=0
else
is_retry_execuate=1
fi
elif [[ "${exec_times}" == "1" ]] ;then
read need_retry_ut_str <<< $(echo "$failed_test_lists" | grep -oEi "\-.+\(.+\)" | sed 's/(.\+)//' | sed 's/- //' )
need_retry_ut_arr=(${need_retry_ut_str})
need_retry_ut_count=${#need_retry_ut_arr[@]}
if [ $need_retry_ut_count -lt $exec_retry_threshold ];then
is_retry_execuate=0
else
is_retry_execuate=1
fi
fi
if [[ "$is_retry_execuate" == "0" ]];then
set +e
retry_unittests_record="$retry_unittests_record$failed_test_lists"
failed_test_lists_ult=`echo "${failed_test_lists}" |grep -Po '[^ ].*$'`
set -e
if [[ "${exec_times}" == "1" ]];then
if [[ "${exec_times}" == "1" ]] || [[ "${exec_times}" == "3" ]];then
if [[ "${failed_test_lists}" == "" ]];then
break
else
Expand All @@ -1761,35 +1783,38 @@ set +x
echo "This is the ${exec_time_array[$exec_times]} time to re-run"
echo "========================================="
echo "The following unittest will be re-run:"
echo "${retry_unittests}"

echo "${retry_unittests}"
for line in ${retry_unittests[@]} ;
do
read tmp_one_tmp <<< "$( echo $single_card_tests | grep -oEi $line )"

if [[ "$tmp_one_tmp" != "" ]]; then
if [[ "$one_card_retry" == "" ]]; then
one_card_retry="^$line$"
else
one_card_retry="$one_card_retry|^$line$"
fi
fi

done

if [[ "$one_card_retry" != "" ]]; then
card_test "$one_card_retry" 1
card_test "$one_card_retry" 1 # run cases 1 job each time with single GPU
fi

exec_times=$[$exec_times+1]
failed_test_lists=''
collect_failed_tests
rm -f $tmp_dir/*
one_card_retry=''
done
else
# There are more than 10 failed unit tests, so no unit test retry
is_retry_execuate=1
fi
fi
done
fi

rerun_ut_endTime_s=`date +%s`

echo "ipipe_log_param_Rerun_TestCases_Total_Time: $[ $rerun_ut_endTime_s - $rerun_ut_startTime_s ]s" >> ${PADDLE_ROOT}/build/build_summary.txt
ut_actual_total_endTime_s=`date +%s`
echo "ipipe_log_param_actual_TestCases_Total_Time: $[ $ut_actual_total_endTime_s - $ut_actual_total_startTime_s ]s" >> ${PADDLE_ROOT}/build/build_summary.txt
if [[ "$EXIT_CODE" != "0" ]]; then
show_ut_retry_result
fi
Expand Down
24 changes: 24 additions & 0 deletions tools/coverage/paddle_coverage.sh
Original file line number Diff line number Diff line change
Expand Up @@ -85,8 +85,30 @@ function gen_full_html_report_xpu() {
mv -f coverage-full.tmp coverage-full.info
}

function gen_full_html_report_npu() {
lcov --extract coverage.info \
'/paddle/paddle/fluid/operators/*npu*' \
-o coverage-full.tmp \
--rc lcov_branch_coverage=0

mv -f coverage-full.tmp coverage-full.info

lcov --remove coverage-full.info \
'/paddle/paddle/fluid/framework/*_test*' \
'/paddle/paddle/fluid/*/*test*' \
'/paddle/paddle/fluid/*/*/*test*' \
'/paddle/paddle/fluid/inference/tests/*' \
'/paddle/paddle/fluid/inference/api/demo_ci/*' \
-o coverage-full.tmp \
--rc lcov_branch_coverage=0

mv -f coverage-full.tmp coverage-full.info
}

if [ ${WITH_XPU:-OFF} == "ON" ]; then
gen_full_html_report_xpu || true
elif [ ${WITH_ASCEND_CL:-OFF} == "ON" ]; then
gen_full_html_report_npu || true
else
gen_full_html_report || true
fi
Expand Down Expand Up @@ -183,6 +205,8 @@ echo "Assert Python Diff Coverage"

if [ ${WITH_XPU:-OFF} == "ON" ]; then
echo "XPU has no python coverage!"
elif [ ${WITH_ASCEND_CL:-OFF} == "ON" ]; then
echo "NPU has no python coverage!"
else
if [[ "${NO_PYTHON_COVERAGE_DATA}" != "1" ]];then
python3.7 ${PADDLE_ROOT}/tools/coverage/coverage_lines.py python-coverage-diff.info 0.9 || PYTHON_COVERAGE_LINES_ASSERT=1
Expand Down

0 comments on commit a332352

Please sign in to comment.