[BugFix] Fix ascend config check

wangxiyuan · wangxiyuan · commit b9a4e9597597 · 2025-06-05T23:34:57.000+08:00
Signed-off-by: wangxiyuan &lt;wangxiyuan1007@gmail.com&gt;
diff --git a/docs/source/user_guide/additional_config.md b/docs/source/user_guide/additional_config.md
@@ -29,6 +29,7 @@ The following table lists the additional configuration options available in vLLM
 | `torchair_graph_config` | dict | `{}` | The config options for torchair graph mode |
 | `ascend_scheduler_config` | dict | `{}` | The config options for ascend scheduler  |
 | `expert_tensor_parallel_size` | str | `1` | Expert tensor parallel size the model to use. |
+| `refresh` | bool | `false` | Whether to refesh global ascend config content. This value is usually used by rlhf case. |
 
 The details of each config option are as follows:
 
@@ -59,12 +60,13 @@ A full example of additional configuration is as follows:
         "enabled": true,
         "use_cached_graph": true,
         "graph_batch_sizes": [1, 2, 4, 8],
-        "graph_batch_sizes_init": true
+        "graph_batch_sizes_init": false
     },
     "ascend_scheduler_config": {
         "enabled": true,
         "chunked_prefill_enabled": true,
     },
-    "expert_tensor_parallel_size": 1
+    "expert_tensor_parallel_size": 1,
+    "refresh": false,
 }
 ```
diff --git a/tests/singlecard/test_ascend_config.py b/tests/singlecard/test_ascend_config.py
@@ -16,7 +16,7 @@
 import pytest
 
 from tests.conftest import VllmRunner
-from vllm_ascend.ascend_config import clear_ascend_config, get_ascend_config
+from vllm_ascend.ascend_config import clear_ascend_config, get_ascend_config, init_ascend_config
 
 
 def _clean_up_ascend_config(func):
@@ -59,7 +59,25 @@ def test_run_with_ascend_config():
         },
         "expert_tensor_parallel_size": 1
     }
+
+    # check passed with eager mode
+    with VllmRunner("facebook/opt-125m",
+                    additional_config=input_additional_config):
+        ascend_config = get_ascend_config()
+
+        assert not ascend_config.torchair_graph_config.enabled
+        assert ascend_config.torchair_graph_config.use_cached_graph
+        assert ascend_config.torchair_graph_config.graph_batch_sizes == [
+            1, 2, 4, 8
+        ]
+        assert not ascend_config.torchair_graph_config.graph_batch_sizes_init
+        assert ascend_config.ascend_scheduler_config.enabled
+        assert ascend_config.ascend_scheduler_config.enable_chunked_prefill
+        assert ascend_config.expert_tensor_parallel_size == 1
+
+    # check passed with aclgraph mode
     with VllmRunner("facebook/opt-125m",
+                    enforce_eager=False,
                     additional_config=input_additional_config):
         ascend_config = get_ascend_config()
 
@@ -114,5 +132,56 @@ def test_ascend_config_load_error():
             },
         }
         with VllmRunner("facebook/opt-125m",
+                        enforce_eager=False,
                         additional_config=input_additional_config_fake_2):
             pass
+
+    # torchair graph should not be enabled with eager mode
+    with pytest.raises(RuntimeError):
+        input_additional_config_fake_1 = {
+            "torchair_graph_config": {
+                "enabled": True,
+            },
+        }
+        with VllmRunner("facebook/opt-125m",
+                        enforce_eager=True,
+                        additional_config=input_additional_config_fake_1):
+            pass
+
+
+@_clean_up_ascend_config
+def test_ascend_config_refresh():
+    from vllm.config import get_current_vllm_config
+    vllm_config = get_current_vllm_config()
+    # set additional_config with none
+    init_ascend_config(vllm_config)
+
+    input_additional_config = {
+        "torchair_graph_config": {
+            "enabled": False,
+            "use_cached_graph": True,
+            "graph_batch_sizes": [1, 2, 4, 8],
+            "graph_batch_sizes_init": False,
+        },
+        "ascend_scheduler_config": {
+            "enabled": True,
+            "enable_chunked_prefill": True,
+        },
+        "expert_tensor_parallel_size": 1,
+        "refresh": True,
+    }
+
+    # refresh ascend config
+    with VllmRunner("facebook/opt-125m",
+                    additional_config=input_additional_config):
+        ascend_config = get_ascend_config()
+
+        assert not ascend_config.torchair_graph_config.enabled
+        assert ascend_config.torchair_graph_config.use_cached_graph
+        assert ascend_config.torchair_graph_config.graph_batch_sizes == [
+            1, 2, 4, 8
+        ]
+        assert not ascend_config.torchair_graph_config.graph_batch_sizes_init
+        assert ascend_config.ascend_scheduler_config.enabled
+        assert ascend_config.ascend_scheduler_config.enable_chunked_prefill
+        assert ascend_config.expert_tensor_parallel_size == 1
diff --git a/vllm_ascend/ascend_config.py b/vllm_ascend/ascend_config.py
@@ -80,8 +80,10 @@ def __init__(self, ascend_scheduler_config: dict):
 
 
 def init_ascend_config(vllm_config):
+    additional_config = vllm_config.additional_config if vllm_config.additional_config is not None else {}
+    refresh = additional_config.get("refresh", False) if additional_config else False
     global _ASCEND_CONFIG
-    if _ASCEND_CONFIG is not None:
+    if _ASCEND_CONFIG is not None and not refresh:
         return _ASCEND_CONFIG
     _ASCEND_CONFIG = AscendConfig(vllm_config)
     return _ASCEND_CONFIG
@@ -105,7 +107,7 @@ def check_ascend_config(vllm_config, enforce_eager):
     ascend_config = get_ascend_config()
 
     # Both for V0 and V1 Engine, torchair_graph cannot be enabled with eager mode.
-    if ascend_config.torchair_graph_config.enabled and not enforce_eager:
+    if ascend_config.torchair_graph_config.enabled and enforce_eager:
         raise RuntimeError(
             "Can't enable graph mode and eager mode at the same time. Please set `enforce_eager=False` if you attempt to enable NPU graph mode."
         )
@@ -124,7 +126,7 @@ def check_ascend_config(vllm_config, enforce_eager):
                     "Torchair graph mode only works with deepseek model.")
 
     # for V1 Engine, aclgraph doesn't work with deepseek model and only qwen model is well tested.
-    if envs.VLLM_USE_V1 and vllm_config.model_config is not None and not enforce_eager:
+    if envs.VLLM_USE_V1 and vllm_config.model_config is not None and not enforce_eager and not ascend_config.torchair_graph_config.enabled:
         model_type = vllm_config.model_config.hf_config.model_type
         if "deepseek" in model_type:
             raise NotImplementedError(
diff --git a/vllm_ascend/worker/model_runner_v1.py b/vllm_ascend/worker/model_runner_v1.py
@@ -323,7 +323,7 @@ def __init__(self, vllm_config: VllmConfig, device: torch.device):
 
         ascend_config = get_ascend_config()
         self.torchair_graph_enabled = ascend_config.torchair_graph_config.enabled and self.vllm_config.model_config.use_mla
-        self.torchair_graph_use_cached_npu_graph = ascend_config.torchair_graph_config.use_cached_graph
+        self.use_cached_npu_graph = ascend_config.torchair_graph_config.use_cached_graph
         self.torchair_graph_batch_sizes = ascend_config.torchair_graph_config.graph_batch_sizes
 
         if ascend_config.torchair_graph_config.graph_batch_sizes_init: