From 0b754e09b02fc05a02f257df81867ed12d4dc239 Mon Sep 17 00:00:00 2001
From: jingyu <jingyu@omniml.ai>
Date: Mon, 11 Aug 2025 20:28:03 +0000
Subject: [PATCH] Qwen FP8 Support

Signed-off-by: jingyu <jingyu@omniml.ai>
---
 vllm/config/__init__.py                 |  5 +++--
 vllm/model_executor/models/qwen3_moe.py | 11 ++++++++++-
 2 files changed, 13 insertions(+), 3 deletions(-)

diff --git a/vllm/config/__init__.py b/vllm/config/__init__.py
index 03ab034c62c1..9ad85edef669 100644
--- a/vllm/config/__init__.py
+++ b/vllm/config/__init__.py
@@ -1066,8 +1066,9 @@ def _parse_quant_hf_config(self):
             # Set quant_method for ModelOpt models.
             producer_name = quant_cfg.get("producer", {}).get("name")
             if producer_name == "modelopt":
-                quant_algo = quant_cfg.get("quantization",
-                                           {}).get("quant_algo")
+                quant_algo = (quant_cfg.get("quantization",
+                                            {}).get("quant_algo")
+                              or quant_cfg.get("quant_algo"))
                 if quant_algo == "FP8":
                     quant_cfg["quant_method"] = "modelopt"
                 elif quant_algo == "NVFP4":
diff --git a/vllm/model_executor/models/qwen3_moe.py b/vllm/model_executor/models/qwen3_moe.py
index 9b49952f3724..05b15bbb50c0 100644
--- a/vllm/model_executor/models/qwen3_moe.py
+++ b/vllm/model_executor/models/qwen3_moe.py
@@ -455,6 +455,12 @@ def load_weights(self, weights: Iterable[tuple[str,
                 # Skip non-stacked layers and experts (experts handled below).
                 if weight_name not in name:
                     continue
+                if name.endswith("scale"):
+                    remapped_name = maybe_remap_kv_scale_name(
+                        name, params_dict)
+                    if remapped_name is None:
+                        continue
+                    name = remapped_name
                 # We have mlp.experts[0].gate_proj in the checkpoint.
                 # Since we handle the experts below in expert_params_mapping,
                 # we need to skip here BEFORE we update the name, otherwise
@@ -475,8 +481,11 @@ def load_weights(self, weights: Iterable[tuple[str,
                 if name.endswith("scale"):
                     # Remapping the name of FP8 kv-scale.
                     name = maybe_remap_kv_scale_name(name, params_dict)
-                    if name is None:
+                    remapped_name = maybe_remap_kv_scale_name(
+                        name, params_dict)
+                    if remapped_name is None:
                         continue
+                    name = remapped_name
                 if name not in params_dict:
                     continue