qingshui · laipaang · Dec 11, 2023 · Dec 11, 2023
diff --git a/paddle/phi/kernels/fusion/gpu/beam_search_softmax.cu b/paddle/phi/kernels/fusion/gpu/beam_search_softmax.cu
@@ -840,6 +840,8 @@ void invokeTopkSoftMax(const Context &dev_ctx,
     CASE_K(14);
     CASE_K(15);
     CASE_K(16);
+    CASE_K(20);
+    CASE_K(30);
     CASE_K(50);
     default:
       PADDLE_THROW(paddle::platform::errors::Unimplemented(

diff --git a/python/paddle/fluid/contrib/mixed_precision/fp16_utils.py b/python/paddle/fluid/contrib/mixed_precision/fp16_utils.py
@@ -110,8 +110,9 @@ def _keep_fp32_input(op, in_name):
         return in_name in {
             'LnScale', 'LnBias', 'Ln2Scale', 'Ln2Bias', "Ln1Scale", "Ln1Bias"
         }
-    if op_type in ['fused_multi_transformer', 'fused_multi_transformer_moe']:
-        return in_name in {'LnScale', 'LnBias', 'FFNLnScale', 'FFNLnBias'}
+    if op_type in ['fused_multi_transformer', 'fused_multi_transformer_int8', 'fused_multi_transformer_moe']:
+        return in_name in {'LnScale', 'LnBias', 'FFNLnScale', 'FFNLnBias', 'QKVOutScale',
+            'OutLinearOutScale', 'FFN1OutScale', 'FFN2OutScale'}
     return False