From b5928ba9d536911ba076770e2688be4a6b644ab2 Mon Sep 17 00:00:00 2001 From: laipaang Date: Mon, 11 Dec 2023 11:00:15 +0800 Subject: [PATCH] beam support 20/30 and fused_multi_transformer_int8 keep fp32 --- paddle/phi/kernels/fusion/gpu/beam_search_softmax.cu | 2 ++ python/paddle/fluid/contrib/mixed_precision/fp16_utils.py | 5 +++-- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/paddle/phi/kernels/fusion/gpu/beam_search_softmax.cu b/paddle/phi/kernels/fusion/gpu/beam_search_softmax.cu index cea15b1e3a6ab..4a0a63bfd4407 100644 --- a/paddle/phi/kernels/fusion/gpu/beam_search_softmax.cu +++ b/paddle/phi/kernels/fusion/gpu/beam_search_softmax.cu @@ -840,6 +840,8 @@ void invokeTopkSoftMax(const Context &dev_ctx, CASE_K(14); CASE_K(15); CASE_K(16); + CASE_K(20); + CASE_K(30); CASE_K(50); default: PADDLE_THROW(paddle::platform::errors::Unimplemented( diff --git a/python/paddle/fluid/contrib/mixed_precision/fp16_utils.py b/python/paddle/fluid/contrib/mixed_precision/fp16_utils.py index 8884caca96ba6..61ee2e57dfa64 100644 --- a/python/paddle/fluid/contrib/mixed_precision/fp16_utils.py +++ b/python/paddle/fluid/contrib/mixed_precision/fp16_utils.py @@ -110,8 +110,9 @@ def _keep_fp32_input(op, in_name): return in_name in { 'LnScale', 'LnBias', 'Ln2Scale', 'Ln2Bias', "Ln1Scale", "Ln1Bias" } - if op_type in ['fused_multi_transformer', 'fused_multi_transformer_moe']: - return in_name in {'LnScale', 'LnBias', 'FFNLnScale', 'FFNLnBias'} + if op_type in ['fused_multi_transformer', 'fused_multi_transformer_int8', 'fused_multi_transformer_moe']: + return in_name in {'LnScale', 'LnBias', 'FFNLnScale', 'FFNLnBias', 'QKVOutScale', + 'OutLinearOutScale', 'FFN1OutScale', 'FFN2OutScale'} return False