From 5dd3ca83351b5ef3607388a24d36cc0b39161c85 Mon Sep 17 00:00:00 2001 From: zobinHuang Date: Wed, 22 Feb 2023 21:10:14 +0800 Subject: [PATCH] fix: scale input bug inside AddBiasResidualLayerNorm kernel --- src/fastertransformer/kernels/layernorm_kernels.cu | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/fastertransformer/kernels/layernorm_kernels.cu b/src/fastertransformer/kernels/layernorm_kernels.cu index f832d9104..369030b37 100644 --- a/src/fastertransformer/kernels/layernorm_kernels.cu +++ b/src/fastertransformer/kernels/layernorm_kernels.cu @@ -78,13 +78,13 @@ __global__ void generalAddBiasResidualLayerNormOpt(T* normed_output, if (IS_OUTPUT) { T in_val; if (scale_input) { - in_val = input[index]; - } - else { in_val = cuda_cast(cuda_cast(reinterpret_cast(input)[index]) * scale_from_int); } - val = hadd2(val, input[index]); + else { + in_val = input[index]; + } + val = hadd2(val, in_val); } shmem[i] = val; output[index] = val;