Skip to content

Commit 0b754e0

Browse files
author
jingyu
committed
Qwen FP8 Support
Signed-off-by: jingyu <jingyu@omniml.ai>
1 parent 458e74e commit 0b754e0

File tree

2 files changed

+13
-3
lines changed

2 files changed

+13
-3
lines changed

vllm/config/__init__.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1066,8 +1066,9 @@ def _parse_quant_hf_config(self):
10661066
# Set quant_method for ModelOpt models.
10671067
producer_name = quant_cfg.get("producer", {}).get("name")
10681068
if producer_name == "modelopt":
1069-
quant_algo = quant_cfg.get("quantization",
1070-
{}).get("quant_algo")
1069+
quant_algo = (quant_cfg.get("quantization",
1070+
{}).get("quant_algo")
1071+
or quant_cfg.get("quant_algo"))
10711072
if quant_algo == "FP8":
10721073
quant_cfg["quant_method"] = "modelopt"
10731074
elif quant_algo == "NVFP4":

vllm/model_executor/models/qwen3_moe.py

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -455,6 +455,12 @@ def load_weights(self, weights: Iterable[tuple[str,
455455
# Skip non-stacked layers and experts (experts handled below).
456456
if weight_name not in name:
457457
continue
458+
if name.endswith("scale"):
459+
remapped_name = maybe_remap_kv_scale_name(
460+
name, params_dict)
461+
if remapped_name is None:
462+
continue
463+
name = remapped_name
458464
# We have mlp.experts[0].gate_proj in the checkpoint.
459465
# Since we handle the experts below in expert_params_mapping,
460466
# we need to skip here BEFORE we update the name, otherwise
@@ -475,8 +481,11 @@ def load_weights(self, weights: Iterable[tuple[str,
475481
if name.endswith("scale"):
476482
# Remapping the name of FP8 kv-scale.
477483
name = maybe_remap_kv_scale_name(name, params_dict)
478-
if name is None:
484+
remapped_name = maybe_remap_kv_scale_name(
485+
name, params_dict)
486+
if remapped_name is None:
479487
continue
488+
name = remapped_name
480489
if name not in params_dict:
481490
continue
482491

0 commit comments

Comments
 (0)