static_scaled_fp8_quant should not run when scale.numel is not 1 (#20076)

eldarkurtic · web-flow · commit 8b8c209e3528 · 2025-06-25T15:08:03.000-04:00
diff --git a/vllm/_custom_ops.py b/vllm/_custom_ops.py
@@ -1276,7 +1276,7 @@ def scaled_fp8_quant(
             torch.ops._C.dynamic_scaled_fp8_quant(output, input, scale)
     else:
         # num_token_padding not implemented for this case
-        assert (scale.numel() == 1 or num_token_padding is None)
+        assert (scale.numel() == 1 and num_token_padding is None)
         torch.ops._C.static_scaled_fp8_quant(output, input, scale)
 
     return output, scale