File tree Expand file tree Collapse file tree 1 file changed +2
-2
lines changed
vllm/model_executor/layers/quantization Expand file tree Collapse file tree 1 file changed +2
-2
lines changed Original file line number Diff line number Diff line change @@ -50,7 +50,7 @@ def process_weights_after_loading(self, layer: torch.nn.Module) -> None:
5050 # We prefer to use separate k_scale and v_scale if present
5151 k_scale = layer .k_scale .to ("cpu" ).tolist ()
5252 v_scale = layer .v_scale .to ("cpu" ).tolist ()
53- if current_platform .is_rocm ():
53+ if current_platform .is_fp8_fnuz ():
5454 k_scale *= 2
5555 v_scale *= 2
5656 elif layer .k_scale < 0.0 and layer .v_scale < 0.0 :
@@ -66,7 +66,7 @@ def process_weights_after_loading(self, layer: torch.nn.Module) -> None:
6666 scale_to_duplicate = max (layer .k_scale , layer .v_scale )
6767 k_scale = scale_to_duplicate .to ("cpu" ).tolist ()
6868 v_scale = scale_to_duplicate .to ("cpu" ).tolist ()
69- if current_platform .is_rocm ():
69+ if current_platform .is_fp8_fnuz ():
7070 k_scale *= 2
7171 v_scale *= 2
7272
You can’t perform that action at this time.
0 commit comments