Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
…to main
  • Loading branch information
byshiue committed Mar 6, 2023
2 parents f7a4418 + 8bf96eb commit 303e052
Show file tree
Hide file tree
Showing 4 changed files with 169 additions and 169 deletions.
8 changes: 4 additions & 4 deletions src/fastertransformer/kernels/layernorm_kernels.cu
Original file line number Diff line number Diff line change
Expand Up @@ -78,13 +78,13 @@ __global__ void generalAddBiasResidualLayerNormOpt(T* normed_output,
if (IS_OUTPUT) {
T in_val;
if (scale_input) {
in_val = input[index];
}
else {
in_val = cuda_cast<T>(cuda_cast<Float_Packed_T>(reinterpret_cast<const Int32_Packed_T*>(input)[index])
* scale_from_int);
}
val = hadd2(val, input[index]);
else {
in_val = input[index];
}
val = hadd2(val, in_val);
}
shmem[i] = val;
output[index] = val;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -139,7 +139,7 @@ void SelfAttentionFP8Layer<T1, T2>::forward(TensorMap* ou
float attn_scale_2 =
attention_weights->query_weight.output_h_scale[0]
* attention_weights->attention_output_weight.input_h_scale_inv[0]; // v and output scale
dispatcher_fp8->setScaleList(attn_scale_1, 0.0f, attn_scale_2);
dispatcher_fp8->setScaleList(attn_scale_1, 1.0f, attn_scale_2);
// For example, if a query is like
// [[S_0, P_0], [S_1, P_1]], where S_i is real tokens and P_i is padded tokens.
// In zero pad case, we remove the padding and the input looks like [S_0, S_1].
Expand Down
Loading

0 comments on commit 303e052

Please sign in to comment.