We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent b3aba04 commit 221bf72Copy full SHA for 221bf72
vllm/model_executor/layers/batch_invariant.py
@@ -134,10 +134,7 @@ def matmul_kernel_persistent(
134
bias_ptrs = bias_ptr + offs_cn
135
bias = tl.load(bias_ptrs, mask=offs_cn < N, other=0.0).to(tl.float32)
136
accumulator += bias
137
- if c_ptr.dtype.element_ty == tl.float8e4nv:
138
- c = accumulator.to(tl.float8e4nv)
139
- else:
140
- c = accumulator.to(tl.float16)
+ c = accumulator.to(c_ptr.dtype.element_ty)
141
tl.store(c_ptrs, c, mask=c_mask)
142
143
0 commit comments