Skip to content

Commit 5145f13

Browse files
authored
Merge branch 'PaddlePaddle:develop' into feat/add-swanlab-logger
2 parents dc68aac + eb43056 commit 5145f13

File tree

348 files changed

+3965
-1196
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

348 files changed

+3965
-1196
lines changed

csrc/gpu/moe/fused_moe/cutlass_kernels/moe_gemm/fused_moe_gemm_kernels_template.h

Lines changed: 26 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -69,7 +69,8 @@ void generic_moe_gemm_kernelLauncher(const T* A,
6969
cudaStream_t stream,
7070
int* kernel_occupancy = nullptr) {
7171
if (gemm_config.split_k_style != SplitKStyle::NO_SPLIT_K) {
72-
PADDLE_FATAL("[MoeGemm] Grouped gemm does not support split-k");
72+
throw std::runtime_error(
73+
"[MoeGemm] Grouped gemm does not support split-k");
7374
}
7475

7576
#ifdef PADDLE_CUDA_BF16
@@ -169,7 +170,7 @@ void generic_moe_gemm_kernelLauncher(const T* A,
169170
int occupancy = std::min(2, GemmGrouped::maximum_active_blocks());
170171

171172
if (occupancy == 0) {
172-
PADDLE_FATAL(
173+
throw std::runtime_error(
173174
"[MoE Runner] GPU lacks the shared memory resources to run "
174175
"GroupedGEMM kernel");
175176
}
@@ -197,7 +198,7 @@ void generic_moe_gemm_kernelLauncher(const T* A,
197198
if (can_implement != cutlass::Status::kSuccess) {
198199
std::string err_msg = "MoEFC kernel will fail for params. Error: " +
199200
std::string(cutlassGetStatusString(can_implement));
200-
PADDLE_FATAL("[MoE Runner] " + err_msg);
201+
throw std::runtime_error("[MoE Runner] " + err_msg);
201202
}
202203

203204
auto init_status = gemm.initialize(args);
@@ -243,7 +244,7 @@ struct dispatch_stages {
243244
std::string err_msg = "Cutlass fpA_intB gemm. Not instantiates for arch " +
244245
std::to_string(arch::kMinComputeCapability) +
245246
" with stages set to " + std::to_string(Stages);
246-
PADDLE_FATAL("[dispatch_stages::dispatch] " + err_msg);
247+
throw std::runtime_error("[dispatch_stages::dispatch] " + err_msg);
247248
}
248249
};
249250

@@ -394,7 +395,8 @@ void dispatch_gemm_config(const T* A,
394395
default:
395396
std::string err_msg = "dispatch_gemm_config does not support stages " +
396397
std::to_string(gemm_config.stages);
397-
PADDLE_FATAL("[MoE][dispatch_gemm_config] " + err_msg);
398+
throw std::runtime_error(
399+
"[MoE][dispatch_gemm_config] " + err_msg);
398400
break;
399401
}
400402
}
@@ -452,15 +454,16 @@ void dispatch_moe_gemm_to_cutlass(const T* A,
452454
dispatch_gemm_config_macro(64, 128, 64, 32, 64, 64);
453455
dispatch_gemm_config_macro(128, 128, 64, 64, 32, 64);
454456
case CutlassTileConfig::Undefined:
455-
PADDLE_FATAL("[dispatch_moe_gemm_to_cutlass] gemm config undefined.");
457+
throw std::runtime_error(
458+
"[dispatch_moe_gemm_to_cutlass] gemm config undefined.");
456459
break;
457460
case CutlassTileConfig::ChooseWithHeuristic:
458-
PADDLE_FATAL(
461+
throw std::runtime_error(
459462
"[dispatch_moe_gemm_to_cutlass] gemm config should have "
460463
"already been set by heuristic.");
461464
break;
462465
default:
463-
PADDLE_FATAL(
466+
throw std::runtime_error(
464467
"[dispatch_moe_gemm_to_cutlass] Config is invalid for same "
465468
"type MoE tensorop GEMM.");
466469
break;
@@ -497,38 +500,44 @@ void dispatch_moe_gemm_to_cutlass(const T* A,
497500
dispatch_gemm_config_macro(32, 128, 64, 32, 32, 64);
498501
dispatch_gemm_config_macro(64, 128, 64, 64, 64, 64);
499502
case CutlassTileConfig::Undefined:
500-
PADDLE_FATAL("[dispatch_moe_gemm_to_cutlass] gemm config undefined.");
503+
throw std::runtime_error(
504+
"[dispatch_moe_gemm_to_cutlass] gemm config undefined.");
501505
break;
502506
case CutlassTileConfig::ChooseWithHeuristic:
503-
PADDLE_FATAL(
507+
throw std::runtime_error(
504508
"[dispatch_moe_gemm_to_cutlass] gemm config should have "
505509
"already been set by heuristic.");
506510
break;
507511
default:
508-
PADDLE_FATAL(
512+
throw std::runtime_error(
509513
"[dispatch_moe_gemm_to_cutlass] Config is invalid for "
510514
"mixed type tensorop GEMM.");
511515
break;
512516
}
513517
} else {
514518
switch (gemm_config.tile_config) {
515519
dispatch_gemm_config_macro(16, 128, 64, 16, 32, 64);
520+
dispatch_gemm_config_macro(16, 256, 64, 16, 64, 64);
521+
dispatch_gemm_config_macro(64, 64, 64, 32, 32, 64);
516522
dispatch_gemm_config_macro(32, 128, 64, 32, 32, 64);
523+
dispatch_gemm_config_macro(128, 64, 64, 64, 32, 64);
517524
dispatch_gemm_config_macro(64, 128, 64, 64, 64, 64);
518525
dispatch_gemm_config_macro(128, 128, 64, 64, 64, 64);
519526
dispatch_gemm_config_macro(128, 128, 64, 128, 32, 64);
520527
dispatch_gemm_config_macro(128, 256, 64, 64, 64, 64);
521528
dispatch_gemm_config_macro(64, 128, 64, 64, 32, 64);
529+
dispatch_gemm_config_macro(256, 128, 64, 64, 64, 64);
522530
case CutlassTileConfig::Undefined:
523-
PADDLE_FATAL("[dispatch_moe_gemm_to_cutlass] gemm config undefined.");
531+
throw std::runtime_error(
532+
"[dispatch_moe_gemm_to_cutlass] gemm config undefined.");
524533
break;
525534
case CutlassTileConfig::ChooseWithHeuristic:
526-
PADDLE_FATAL(
535+
throw std::runtime_error(
527536
"[dispatch_moe_gemm_to_cutlass] gemm config should have "
528537
"already been set by heuristic.");
529538
break;
530539
default:
531-
PADDLE_FATAL(
540+
throw std::runtime_error(
532541
"[dispatch_moe_gemm_to_cutlass] Config is invalid for "
533542
"mixed type tensorop GEMM.");
534543
break;
@@ -561,17 +570,17 @@ void dispatch_moe_gemm_to_cutlass(const T* A,
561570
switch (gemm_config.tile_config) {
562571
dispatch_gemm_config_macro(128, 128, 8, 64, 64, 8);
563572
case CutlassTileConfig::Undefined:
564-
PADDLE_FATAL(
573+
throw std::runtime_error(
565574
"[dispatch_moe_gemm_to_cutlass][SIMT] gemm config "
566575
"undefined.");
567576
break;
568577
case CutlassTileConfig::ChooseWithHeuristic:
569-
PADDLE_FATAL(
578+
throw std::runtime_error(
570579
"[dispatch_moe_gemm_to_cutlass][SIMT] gemm config should "
571580
"have already been set by heuristic.");
572581
break;
573582
default:
574-
PADDLE_FATAL(
583+
throw std::runtime_error(
575584
"[dispatch_moe_gemm_to_cutlass][SIMT] Unsupported config "
576585
"for float MoE gemm.");
577586
break;

csrc/gpu/update_inputs_v2.cu

Lines changed: 22 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -42,8 +42,10 @@ __global__ void update_inputs_kernel_v2(
4242
const int bsz,
4343
const int max_bsz,
4444
const int input_ids_stride,
45-
const int end_length) {
45+
const int end_length,
46+
const int Flag_truncated_return_eos) {
4647
int thread_idx = threadIdx.x;
48+
bool output_len_truncated = false;
4749
// update step_idx and stop_flags
4850
if (thread_idx < max_bsz) {
4951
bool stop_flag = stop_flags[thread_idx];
@@ -52,6 +54,7 @@ __global__ void update_inputs_kernel_v2(
5254
}
5355
if (step_idx[thread_idx] >= max_dec_len[thread_idx]) {
5456
stop_flags[thread_idx] = true;
57+
output_len_truncated = true;
5558
}
5659
}
5760
__syncthreads();
@@ -61,8 +64,13 @@ __global__ void update_inputs_kernel_v2(
6164
if (seq_lens_this_time[thread_idx] == 0) {
6265
next_tokens[thread_idx] = -1;
6366
} else {
64-
next_tokens[thread_idx] = end_ids[0];
65-
kwargs_next_tokens[thread_idx] = end_ids[0];
67+
if (!Flag_truncated_return_eos && output_len_truncated) {
68+
// output len truncated will not return eos for rl.
69+
kwargs_next_tokens[thread_idx] = next_tokens[thread_idx];
70+
}else{
71+
next_tokens[thread_idx] = end_ids[0];
72+
kwargs_next_tokens[thread_idx] = end_ids[0];
73+
}
6674
}
6775
} else {
6876
kwargs_next_tokens[thread_idx] = next_tokens[thread_idx];
@@ -127,6 +135,15 @@ void UpdateInputesV2(const paddle::Tensor& stop_flags,
127135
const int end_length = end_ids.shape()[0];
128136

129137
auto not_need_stop_gpu = not_need_stop.copy_to(stop_flags.place(), false);
138+
int Flag_truncated_return_eos = 1;
139+
if (const char* inference_truncated_return_eos_env_p =
140+
std::getenv("INFERENCE_TRUNCATED_RETURN_EOS")) {
141+
std::string inference_truncated_return_eos_env_str(
142+
inference_truncated_return_eos_env_p);
143+
int inference_truncated_return_eos_from_env =
144+
std::stoi(inference_truncated_return_eos_env_str);
145+
Flag_truncated_return_eos = inference_truncated_return_eos_from_env;
146+
}
130147

131148
update_inputs_kernel_v2<1024><<<1, 1024, 0, input_ids.stream()>>>(
132149
const_cast<bool*>(not_need_stop_gpu.data<bool>()),
@@ -145,7 +162,8 @@ void UpdateInputesV2(const paddle::Tensor& stop_flags,
145162
now_bsz,
146163
max_bsz,
147164
input_ids_stride,
148-
end_length
165+
end_length,
166+
Flag_truncated_return_eos
149167
);
150168

151169
auto not_need_stop_cpu = not_need_stop_gpu.copy_to(not_need_stop.place(), false);

docs/zh/index.rst

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -128,6 +128,10 @@
128128
:caption: 实践教程
129129

130130
AI Studio Notebook <tutorials/overview>
131+
大模型预训练新手指南 <llm/docs/pretrain_tutorial.md>
132+
大模型精调新手指南 <llm/docs/finetune_tutorial.md>
133+
大模型对齐新手指南 <llm/docs/alignment_tutorial.md>
134+
大模型量化新手指南 <llm/docs/quantization_tutorial.md>
131135

132136
.. toctree::
133137
:maxdepth: 1
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
../../../../../../llm/devices/intel_hpu/tests/README.md
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
../../../../llm/docs/alignment_tutorial.md
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
../../../../llm/docs/finetune_tutorial.md

docs/zh/llm/docs/pretrain.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
../../../../llm/docs/pretrain.md
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
../../../../llm/docs/pretrain_tutorial.md
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
../../../../llm/docs/quantization_tutorial.md

docs/zh/locale/en/LC_MESSAGES/source/paddlenlp.taskflow.utils.po

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -276,7 +276,7 @@ msgid "word of current node."
276276
msgstr ""
277277

278278
#: of paddlenlp.taskflow.utils.BurkhardKellerTree:1
279-
msgid "Implementataion of BK-Tree"
279+
msgid "Implementation of BK-Tree"
280280
msgstr ""
281281

282282
#: of paddlenlp.taskflow.utils.BurkhardKellerTree.add:1
@@ -300,7 +300,7 @@ msgid "similar words."
300300
msgstr ""
301301

302302
#: of paddlenlp.taskflow.utils.TriedTree:1
303-
msgid "Implementataion of TriedTree"
303+
msgid "Implementation of TriedTree"
304304
msgstr ""
305305

306306
#: of paddlenlp.taskflow.utils.TriedTree.add_word:1

0 commit comments

Comments
 (0)