We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent 23a04e0 commit 8b8c209Copy full SHA for 8b8c209
vllm/_custom_ops.py
@@ -1276,7 +1276,7 @@ def scaled_fp8_quant(
1276
torch.ops._C.dynamic_scaled_fp8_quant(output, input, scale)
1277
else:
1278
# num_token_padding not implemented for this case
1279
- assert (scale.numel() == 1 or num_token_padding is None)
+ assert (scale.numel() == 1 and num_token_padding is None)
1280
torch.ops._C.static_scaled_fp8_quant(output, input, scale)
1281
1282
return output, scale
0 commit comments