Skip to content

Commit 5aa3c27

Browse files
committed
Merge remote-tracking branch 'origin/develop' into all
2 parents 1875fc1 + 7731d1c commit 5aa3c27

File tree

375 files changed

+5234
-4260
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

375 files changed

+5234
-4260
lines changed

.github/workflows/Api-Benchmark-baseline.yml

Lines changed: 15 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,6 @@ on:
1818
- others
1919
schedule:
2020
- cron: '0 21 * * *'
21-
- cron: '0 22 * * 3'
2221

2322
permissions: read-all
2423

@@ -52,7 +51,7 @@ jobs:
5251
is_pr: 'false'
5352

5453
api-benchmark-baseline-schedule:
55-
name: Api benchmark baseline
54+
name: Api benchmark baseline with schedule
5655
strategy:
5756
matrix:
5857
run-labels: [api-bm-20, api-bm-27]
@@ -63,49 +62,26 @@ jobs:
6362
baseline: 'true'
6463
run-labels: ${{ matrix.run-labels }}
6564

66-
api-benchmark-baseline-pr:
67-
name: Api benchmark baseline
65+
api-benchmark-baseline-pr-20:
66+
name: Api benchmark baseline with PR on 20
6867
if: github.event_name == 'workflow_dispatch' && github.event.inputs.job-name == 'api-benchmark'
69-
strategy:
70-
matrix:
71-
run-labels: [api-bm-20, api-bm-27]
7268
uses: ./.github/workflows/_Api-Benchmark.yml
7369
needs: [clone, build-docker]
7470
with:
7571
docker_build_image: ${{ needs.build-docker.outputs.docker_build_image }}
7672
baseline: 'true'
7773
MANUALLY_PR_ID: ${{ inputs.PR_ID }}
7874
MANUALLY_COMMIT_ID: ${{ inputs.COMMIT_ID }}
79-
run-labels: ${{ matrix.run-labels }}
75+
run-labels: api-bm-20
8076

81-
test1:
82-
runs-on: ubuntu-latest
83-
if: github.event.schedule == '0 0 * * *'
84-
steps:
85-
- name: Test
86-
run: |
87-
echo "test1"
88-
89-
test2:
90-
runs-on: ubuntu-latest
91-
if: github.event.schedule == '0 21 * * *'
92-
steps:
93-
- name: Test
94-
run: |
95-
echo "test2"
96-
97-
test3:
98-
runs-on: ubuntu-latest
99-
if: github.event.schedule == '0 22 * * 3'
100-
steps:
101-
- name: Test
102-
run: |
103-
echo "test3"
104-
105-
test4:
106-
runs-on: ubuntu-latest
107-
if: github.event.schedule == '0 21 * * 1'
108-
steps:
109-
- name: Test
110-
run: |
111-
echo "test4"
77+
api-benchmark-baseline-pr-27:
78+
name: Api benchmark baseline with PR on 27
79+
if: github.event_name == 'workflow_dispatch' && github.event.inputs.job-name == 'api-benchmark'
80+
uses: ./.github/workflows/_Api-Benchmark.yml
81+
needs: [clone, build-docker]
82+
with:
83+
docker_build_image: ${{ needs.build-docker.outputs.docker_build_image }}
84+
baseline: 'true'
85+
MANUALLY_PR_ID: ${{ inputs.PR_ID }}
86+
MANUALLY_COMMIT_ID: ${{ inputs.COMMIT_ID }}
87+
run-labels: api-bm-27

.github/workflows/_Api-Benchmark.yml

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ env:
3131
PADDLE_ROOT: /paddle
3232
TASK: paddle-CI-${{ github.event.pull_request.number }}-api-benchmark
3333
ci_scripts: /paddle/ci
34-
BRANCH: ${{ github.event.pull_request.base.ref }}
34+
BRANCH: ${{ github.event.pull_request.base.ref || github.ref_name }}
3535
CI_name: api-benchmark
3636
no_proxy: "bcebos.com,apiin.im.baidu.com,gitee.com,aliyun.com,.baidu.com,.tuna.tsinghua.edu.cn"
3737

@@ -76,7 +76,7 @@ jobs:
7676
- name: Check docker image and run container
7777
env:
7878
python: "python3.10"
79-
GIT_PR_ID: ${{ github.event.pull_request.number }}
79+
GIT_PR_ID: ${{ github.event.pull_request.number || '0' }}
8080
GITHUB_API_TOKEN: ${{ secrets.GITHUB_TOKEN }}
8181
RUN_ID: ${{ github.run_id }}
8282
wheel_link: https://paddle-github-action.bj.bcebos.com/PR/build/${{ github.event.pull_request.number }}/${{ github.event.pull_request.head.sha }}/paddlepaddle_gpu-0.0.0-cp310-cp310-linux_x86_64.whl
@@ -134,13 +134,14 @@ jobs:
134134
cp /paddle/PTSTools/Uploader/apibm_config.yml .
135135
source ${{ github.workspace }}/../../../proxy
136136
if [[ "${{ inputs.baseline }}" == "true" ]];then
137+
set -e
137138
if [[ "${{ inputs.MANUALLY_PR_ID }}" == "" ]]; then
138-
export pr_wheel_link=https://paddle-github-action.bj.bcebos.com/PR/build/${{ github.event.pull_request.number }}/${{ github.event.pull_request.head.sha }}/paddlepaddle_gpu-0.0.0-cp310-cp310-linux_x86_64.whl
139+
export pr_wheel_link=https://paddle-github-action.bj.bcebos.com/PR/build/$PR_ID/$COMMIT_ID/paddlepaddle_gpu-0.0.0-cp310-cp310-linux_x86_64.whl
139140
else
140141
export pr_wheel_link=https://paddle-github-action.bj.bcebos.com/PR/build/${{ inputs.MANUALLY_PR_ID }}/${{ inputs.MANUALLY_COMMIT_ID }}/paddlepaddle_gpu-0.0.0-cp310-cp310-linux_x86_64.whl
141142
fi
142143
${python} -m pip install $pr_wheel_link
143-
${python} runner_ci_action.py --yaml ../yaml/api_benchmark_fp32.yml --baseline_whl_link $pr_wheel_link
144+
${python} runner_ci_multipro_action.py --yaml ../yaml/sort_api_benchmark_fp32.yml --core_index ${core_index} --baseline_whl_link $pr_wheel_link
144145
exit 0
145146
fi
146147
${python} -m pip install $wheel_link

.pre-commit-config.yaml

Lines changed: 22 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -81,33 +81,33 @@ repos:
8181
8282
| python/paddle/distributed/[b-e].+
8383
84-
# | python/paddle/distributed/f.+
84+
| python/paddle/distributed/f.+
8585
86-
# | python/paddle/distributed/[g-z].+
86+
| python/paddle/distributed/[g-z].+
8787
88-
# | python/paddle/[e-i].+
88+
| python/paddle/[e-i].+
8989
90-
# | python/paddle/j.+
90+
| python/paddle/j.+
9191
92-
# | python/paddle/[k-n].+
92+
| python/paddle/[k-n].+
9393
94-
# | python/paddle/[o-t].+
94+
| python/paddle/[o-t].+
9595
96-
# | python/paddle/[u-z].+
96+
| python/paddle/[u-z].+
9797
98-
# | python/_.+
98+
| python/_.+
9999
100100
# | test/a.+
101101
102102
# | test/[b-h].+
103103
104-
# | test/[i-k].+
104+
| test/[i-k].+
105105
106106
# | test/l.+
107107
108-
# | test/[m-z].+
108+
| test/[m-z].+
109109
110-
# | tools/.+
110+
| tools/.+
111111
)$
112112
- repo: https://github.com/astral-sh/ruff-pre-commit
113113
rev: v0.12.0
@@ -137,33 +137,33 @@ repos:
137137
138138
# | python/paddle/distributed/[b-e].+
139139
140-
| python/paddle/distributed/f.+
140+
# | python/paddle/distributed/f.+
141141
142-
| python/paddle/distributed/[g-z].+
142+
# | python/paddle/distributed/[g-z].+
143143
144-
| python/paddle/[e-i].+
144+
# | python/paddle/[e-i].+
145145
146-
| python/paddle/j.+
146+
# | python/paddle/j.+
147147
148-
| python/paddle/[k-n].+
148+
# | python/paddle/[k-n].+
149149
150-
| python/paddle/[o-t].+
150+
# | python/paddle/[o-t].+
151151
152-
| python/paddle/[u-z].+
152+
# | python/paddle/[u-z].+
153153
154-
| python/_.+
154+
# | python/_.+
155155
156156
| test/a.+
157157
158158
| test/[b-h].+
159159
160-
| test/[i-k].+
160+
# | test/[i-k].+
161161
162162
| test/l.+
163163
164-
| test/[m-z].+
164+
# | test/[m-z].+
165165
166-
| tools/.+
166+
# | tools/.+
167167
)$
168168
# For C++ files
169169
- repo: local

ci/auto_parallel/ci_auto_parallel.sh

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,7 @@ get_diff_TO_case(){
7777
case_list[${#case_list[*]}]=llama_auto
7878
case_list[${#case_list[*]}]=gpt-3_auto
7979
case_list[${#case_list[*]}]=gpt-3_dygraph
80+
case_list[${#case_list[*]}]=deepseek_auto
8081
}
8182

8283
print_info(){
@@ -258,6 +259,14 @@ if [[ ${#case_list[*]} -ne 0 ]];then
258259
execute_func_list $cmd gpt-3_dygraph
259260
let case_num++
260261
clean_file ${work_dir}/../PaddleNLP/llm
262+
elif [[ ${case} == "deepseek_auto" ]];then
263+
cmd=${work_dir}/../PaddleNLP/scripts/distribute/ci_case_auto.sh
264+
timeout 5m bash $cmd prepare_case deepseek_case_list_auto $FLAGS_install_deps $FLAGS_download_data
265+
execute_func_list $cmd deepseek_auto
266+
export FLAGS_install_deps=1
267+
export FLAGS_download_data="deepseek ""$FLAGS_download_data"
268+
let case_num++
269+
clean_file ${work_dir}/../PaddleNLP/llm/auto_parallel/deepseek-v3
261270
else
262271
echo -e "\033[31m ---- no ${case} \033"
263272
let case_num++

ci/coverage_test.sh

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -24,10 +24,14 @@ function is_run_distribute_in_op_test() {
2424
echo "export FLAGS_COVERAGE_RUN_AUTO_PARALLEL_IN_OP_TEST=1" >> "$HOME/.bashrc"
2525
fi
2626
done
27-
ALL_CHANGE_FILES=`git diff --numstat upstream/$BRANCH | awk '{print $3}' | grep ".py"|| true`
27+
ALL_CHANGE_FILES=$(git diff --name-only upstream/$BRANCH | grep ".py"|| true)
2828
echo ${ALL_CHANGE_FILES}
2929
for CHANGE_FILE in ${ALL_CHANGE_FILES}; do
30-
ALL_OPTEST_BAN_AUTO_PARALLEL_TEST=`git diff -U0 upstream/$BRANCH ${PADDLE_ROOT}/${CHANGE_FILE} | grep "+" | grep "check_auto_parallel=" || true`
30+
TARGET_FILE="${PADDLE_ROOT}/${CHANGE_FILE}"
31+
if [ ! -f "$TARGET_FILE" ]; then
32+
continue
33+
fi
34+
ALL_OPTEST_BAN_AUTO_PARALLEL_TEST=`git diff -U0 upstream/$BRANCH "TARGET_FILE" | grep "+" | grep "check_auto_parallel=" || true`
3135
if [ "${ALL_OPTEST_BAN_AUTO_PARALLEL_TEST}" != "" ] && [ "${GIT_PR_ID}" != "" ]; then
3236
export FLAGS_COVERAGE_RUN_AUTO_PARALLEL_IN_OP_TEST=1
3337
echo "export FLAGS_COVERAGE_RUN_AUTO_PARALLEL_IN_OP_TEST=1" >> "$HOME/.bashrc"

paddle/fluid/eager/to_static/run_program_impl.cc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -573,7 +573,7 @@ std::vector<paddle::Tensor> RunProgramImpl(
573573
#endif
574574

575575
auto passed_kernel_program = paddle::framework::ApplyIrPass(
576-
forward_program.get(), place, no_need_buffer_name_set);
576+
program.get(), place, no_need_buffer_name_set);
577577
const auto &new_block = passed_kernel_program->block();
578578
passed_kernel_program = paddle::framework::ApplyRemoveShadowFeedPass(
579579
std::move(passed_kernel_program), new_block, place, global_inner_scope);

paddle/fluid/pybind/eager_utils.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -479,7 +479,7 @@ class TensorListBufferAllocator {
479479
bool is_available;
480480
std::vector<paddle::Tensor> buffer;
481481
TensorListBuffer() = default;
482-
explicit TensorListBuffer(ssize_t len) : buffer(len), is_available(true) {}
482+
explicit TensorListBuffer(ssize_t len) : is_available(true), buffer(len) {}
483483
};
484484

485485
using MapType =

paddle/fluid/pybind/slice_utils.h

Lines changed: 76 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -820,6 +820,43 @@ static paddle::Tensor getValueForBoolTensor(const paddle::Tensor& tensor,
820820
indices_int64.push_back(indice);
821821
}
822822

823+
// AMP Logic
824+
if (egr::Controller::Instance().GetAMPLevel() !=
825+
paddle::imperative::AmpLevel::O0) {
826+
auto op_name = phi::TransToFluidOpName("index_elementwise_get");
827+
paddle::small_vector<std::vector<paddle::Tensor>,
828+
egr::kSlotSmallVectorSize>
829+
amp_tensors_vector = {{self_tensor}};
830+
831+
auto amp_dst_dtype =
832+
paddle::imperative::GetAmpDestDtype(op_name, amp_tensors_vector);
833+
834+
auto new_self_tensor = paddle::imperative::AmpAutoCast(
835+
"self_tensor", self_tensor, amp_dst_dtype, op_name);
836+
auto new_tensor = paddle::imperative::AmpAutoCast(
837+
"tensor", tensor, amp_dst_dtype, op_name);
838+
839+
{
840+
paddle::imperative::AutoCastGuard guard(
841+
egr::Controller::Instance().GetCurrentAmpAttrs(),
842+
paddle::imperative::AmpLevel::O0);
843+
844+
AdvancedIndex ad = AdvancedIndex(new_tensor, indices_int64);
845+
const bool is_combined = false;
846+
const bool accumulate = false;
847+
848+
return index_elementwise_get_ad_func(new_self_tensor,
849+
ad.indices,
850+
ad.src_sizes,
851+
ad.src_strides,
852+
ad.indexed_sizes,
853+
ad.indexed_strides,
854+
slice_offset,
855+
accumulate,
856+
is_combined);
857+
}
858+
}
859+
823860
AdvancedIndex ad = AdvancedIndex(tensor, indices_int64);
824861
const bool is_combined = false;
825862
const bool accumulate = false;
@@ -1287,6 +1324,45 @@ static void ApplyGetitem(const int index_size,
12871324
transed_tensor,
12881325
&transed_index_int64);
12891326

1327+
// AMP Logic
1328+
if (egr::Controller::Instance().GetAMPLevel() !=
1329+
paddle::imperative::AmpLevel::O0) {
1330+
auto op_name = phi::TransToFluidOpName("index_elementwise_get");
1331+
paddle::small_vector<std::vector<paddle::Tensor>,
1332+
egr::kSlotSmallVectorSize>
1333+
amp_tensors_vector = {{*self_tensor}};
1334+
1335+
auto amp_dst_dtype =
1336+
paddle::imperative::GetAmpDestDtype(op_name, amp_tensors_vector);
1337+
1338+
auto new_self_tensor = paddle::imperative::AmpAutoCast(
1339+
"self_tensor", *self_tensor, amp_dst_dtype, op_name);
1340+
auto new_transed_tensor = paddle::imperative::AmpAutoCast(
1341+
"transed_tensor", *transed_tensor, amp_dst_dtype, op_name);
1342+
1343+
{
1344+
paddle::imperative::AutoCastGuard guard(
1345+
egr::Controller::Instance().GetCurrentAmpAttrs(),
1346+
paddle::imperative::AmpLevel::O0);
1347+
1348+
AdvancedIndex ad =
1349+
AdvancedIndex(new_transed_tensor, transed_index_int64);
1350+
1351+
const bool is_combined = (index_size == 1) ? false : true;
1352+
const bool accumulate = true;
1353+
*out = index_elementwise_get_ad_func(new_self_tensor,
1354+
ad.indices,
1355+
ad.src_sizes,
1356+
ad.src_strides,
1357+
ad.indexed_sizes,
1358+
ad.indexed_strides,
1359+
slice_offset,
1360+
accumulate,
1361+
is_combined);
1362+
}
1363+
return;
1364+
}
1365+
12901366
AdvancedIndex ad = AdvancedIndex(*transed_tensor, transed_index_int64);
12911367
// is_combined:
12921368
// Distinguishes between regular indexing (single index) and combined

paddle/phi/kernels/cpu/cum_grad_kernel.cc

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,8 @@ PD_REGISTER_KERNEL(cumsum_grad,
5454
phi::CumsumGradKernel,
5555
float,
5656
double,
57+
uint8_t,
58+
int8_t,
5759
int16_t,
5860
int,
5961
int64_t,

paddle/phi/kernels/cpu/cum_kernel.cc

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -273,6 +273,8 @@ PD_REGISTER_KERNEL(cumsum,
273273
phi::CumsumKernel,
274274
float,
275275
double,
276+
uint8_t,
277+
int8_t,
276278
int16_t,
277279
int,
278280
int64_t,

0 commit comments

Comments
 (0)