Add basic wrapper for torchair multistream utilities

w00800020 · w00800020 · commit 4ba1bad09334 · 2025-05-30T09:24:29.000+08:00
Helps to unite pathes where multistream is turned on or off.

Signed-off-by: w00800020 &lt;weijinyi3@huawei.com&gt;
diff --git a/vllm_ascend/utils.py b/vllm_ascend/utils.py
@@ -17,15 +17,28 @@
 # Adapted from vllm-project/vllm/vllm/worker/worker.py
 #
 
+import contextlib
 import math
 from typing import TYPE_CHECKING
 
 import torch
+import torchair  # type: ignore  # noqa: F401
 from packaging.version import InvalidVersion, Version
 from vllm.logger import logger
 
 import vllm_ascend.envs as envs
 
+try:
+    from torchair.scope import \
+        npu_stream_switch as _npu_stream_switch  # type: ignore
+    from torchair.scope import \
+        npu_wait_tensor as _npu_wait_tensor  # type: ignore
+except ImportError:
+    from torchair.ops import \
+        NpuStreamSwitch as _npu_stream_switch  # type: ignore
+    from torchair.ops import \
+        npu_wait_tensor as _npu_wait_tensor  # type: ignore
+
 if TYPE_CHECKING:
     from vllm.config import VllmConfig
 else:
@@ -173,3 +186,14 @@ def update_aclgraph_sizes(vllm_config: VllmConfig) -> None:
 
 def dispose_tensor(x: torch.Tensor):
     x.set_(torch.empty((0, ), device=x.device, dtype=x.dtype))
+
+
+def npu_stream_switch(tag: str, priority: int = 0, enabled: bool = True):
+    return _npu_stream_switch(
+        tag, priority) if enabled else contextlib.nullcontext()
+
+
+def npu_wait_tensor(self: torch.Tensor,
+                    dependency: torch.Tensor,
+                    enabled: bool = True):
+    return _npu_wait_tensor(self, dependency) if enabled else self