vllm-project · DarkLight1337 · Oct 5, 2025 · Sep 30, 2025 · Oct 3, 2025
@@ -448,7 +448,7 @@ def __init__(self, quant_config: Fp8Config, layer: torch.nn.Module):
         self.layer = layer
         self.quant_config = quant_config
         self.weight_block_size = self.quant_config.weight_block_size
-        self.block_quant = self.weight_block_size is not None
+        self.block_quant: bool = self.weight_block_size is not None
 
         self.fused_experts: Optional[
             mk.FusedMoEModularKernel] = None  # type: ignore
@@ -1069,7 +1069,7 @@ def apply(
                 expert_map=expert_map,
             )
         elif self.flashinfer_moe_backend == FlashinferMoeBackend.CUTLASS:
-            assert self.block_quant is None
+            assert not self.block_quant
             assert (not renormalize and custom_routing_function is not None)
             assert activation == 'silu', (
                 f"Expected 'silu' activation but got {activation}")