diff --git a/paddle/phi/kernels/fusion/gpu/beam_search_softmax.cu b/paddle/phi/kernels/fusion/gpu/beam_search_softmax.cu index cea15b1e3a6ab..4a0a63bfd4407 100644 --- a/paddle/phi/kernels/fusion/gpu/beam_search_softmax.cu +++ b/paddle/phi/kernels/fusion/gpu/beam_search_softmax.cu @@ -840,6 +840,8 @@ void invokeTopkSoftMax(const Context &dev_ctx, CASE_K(14); CASE_K(15); CASE_K(16); + CASE_K(20); + CASE_K(30); CASE_K(50); default: PADDLE_THROW(paddle::platform::errors::Unimplemented( diff --git a/python/paddle/fluid/contrib/mixed_precision/fp16_utils.py b/python/paddle/fluid/contrib/mixed_precision/fp16_utils.py index 8884caca96ba6..61ee2e57dfa64 100644 --- a/python/paddle/fluid/contrib/mixed_precision/fp16_utils.py +++ b/python/paddle/fluid/contrib/mixed_precision/fp16_utils.py @@ -110,8 +110,9 @@ def _keep_fp32_input(op, in_name): return in_name in { 'LnScale', 'LnBias', 'Ln2Scale', 'Ln2Bias', "Ln1Scale", "Ln1Bias" } - if op_type in ['fused_multi_transformer', 'fused_multi_transformer_moe']: - return in_name in {'LnScale', 'LnBias', 'FFNLnScale', 'FFNLnBias'} + if op_type in ['fused_multi_transformer', 'fused_multi_transformer_int8', 'fused_multi_transformer_moe']: + return in_name in {'LnScale', 'LnBias', 'FFNLnScale', 'FFNLnBias', 'QKVOutScale', + 'OutLinearOutScale', 'FFN1OutScale', 'FFN2OutScale'} return False