Skip to content

Commit d795617

Browse files
committed
update
Signed-off-by: shen-shanshan <467638484@qq.com>
1 parent 3152bf0 commit d795617

File tree

8 files changed

+25
-25
lines changed

8 files changed

+25
-25
lines changed

vllm_ascend/ascend_forward_context.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
from vllm.distributed import get_dp_group, get_ep_group, get_tp_group
99
from vllm.forward_context import get_forward_context, set_forward_context
1010

11-
import vllm_ascend.envs as envs
11+
import vllm_ascend.envs as envs_ascend
1212
from vllm_ascend.platform import NPUPlatform
1313

1414

@@ -26,15 +26,15 @@ def _get_fused_moe_state(ep_size: int, with_prefill: bool,
2626
is_deepseek_v3_r1: bool):
2727
# the fusion operator torch_npu.npu_grouped_matmul_finalize_routing called by allgather ep
2828
# only supports deepseek v3/r1
29-
if (envs.VLLM_ENABLE_FUSED_EXPERTS_ALLGATHER_EP and ep_size > 1
29+
if (envs_ascend.VLLM_ENABLE_FUSED_EXPERTS_ALLGATHER_EP and ep_size > 1
3030
and is_deepseek_v3_r1):
3131
return FusedMoEState.AllGatherEP
3232
elif ep_size == 1:
3333
if with_prefill:
3434
return FusedMoEState.NaiveMulticast
3535
else:
3636
return FusedMoEState.AllGather
37-
elif envs.VLLM_ASCEND_ENABLE_MOE_ALL2ALL_SEQ:
37+
elif envs_ascend.VLLM_ASCEND_ENABLE_MOE_ALL2ALL_SEQ:
3838
# MC2 Dispatch/Combine performs better than alltoall_seq in decoding stage.
3939
return (FusedMoEState.All2AllSeq if
4040
(ep_size < 16 or with_prefill) else FusedMoEState.MC2)

vllm_ascend/compilation/piecewise_backend.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@
2323

2424
import torch
2525
import torch.fx as fx
26-
import vllm.envs as envs
26+
import vllm.envs as envs_vllm
2727
from vllm.compilation.backends import VllmBackend
2828
from vllm.compilation.counter import compilation_counter
2929
from vllm.compilation.monitor import end_monitoring_torch_compile
@@ -93,7 +93,7 @@ def __init__(self, graph: fx.GraphModule, vllm_config: VllmConfig,
9393

9494
self.sym_shape_indices = sym_shape_indices
9595

96-
self.is_debugging_mode = envs.VLLM_LOGGING_LEVEL == "DEBUG"
96+
self.is_debugging_mode = envs_vllm.VLLM_LOGGING_LEVEL == "DEBUG"
9797

9898
# the entries for different shapes that we need to either
9999
# compile or capture aclgraph

vllm_ascend/models/__init__.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
from vllm import ModelRegistry
22

3-
import vllm_ascend.envs as envs
3+
import vllm_ascend.envs as envs_ascend
44

55

66
def register_model():
@@ -21,7 +21,7 @@ def register_model():
2121
"Qwen2VLForConditionalGeneration",
2222
"vllm_ascend.models.qwen2_vl:AscendQwen2VLForConditionalGeneration")
2323

24-
if envs.USE_OPTIMIZED_MODEL:
24+
if envs_ascend.USE_OPTIMIZED_MODEL:
2525
ModelRegistry.register_model(
2626
"Qwen2_5_VLForConditionalGeneration",
2727
"vllm_ascend.models.qwen2_5_vl:AscendQwen2_5_VLForConditionalGeneration"
@@ -32,7 +32,7 @@ def register_model():
3232
"vllm_ascend.models.qwen2_5_vl_without_padding:AscendQwen2_5_VLForConditionalGeneration_Without_Padding"
3333
)
3434

35-
if envs.VLLM_ASCEND_ENABLE_DBO:
35+
if envs_ascend.VLLM_ASCEND_ENABLE_DBO:
3636
ModelRegistry.register_model(
3737
"DeepseekV2ForCausalLM",
3838
"vllm_ascend.models.deepseek_dbo:CustomDeepseekDBOForCausalLM")

vllm_ascend/patch/platform/patch_common/patch_distributed.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818
# This file is a part of the vllm-ascend project.
1919

2020
import torch
21-
import vllm.envs as envs
21+
import vllm.envs as envs_vllm
2222
from vllm.config import ParallelConfig
2323

2424
from vllm_ascend.utils import is_310p
@@ -37,7 +37,7 @@ def parallel_config_get_dp_port(self) -> int:
3737
self.data_parallel_master_port += 1
3838

3939
# NOTE: Get port from envs directly when using torchrun
40-
port = envs.VLLM_DP_MASTER_PORT if envs.VLLM_DP_MASTER_PORT else answer
40+
port = envs_vllm.VLLM_DP_MASTER_PORT if envs_vllm.VLLM_DP_MASTER_PORT else answer
4141
return port
4242

4343

vllm_ascend/platform.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@
2020
from typing import TYPE_CHECKING, Optional, Tuple
2121

2222
import torch
23-
import vllm.envs as envs
23+
import vllm.envs as envs_vllm
2424
from torch.distributed import ProcessGroup
2525
from torch.distributed.distributed_c10d import PrefixStore
2626
from vllm.logger import logger
@@ -116,7 +116,7 @@ def clear_npu_memory(cls):
116116

117117
@classmethod
118118
def check_and_update_config(cls, vllm_config: VllmConfig) -> None:
119-
if not envs.VLLM_USE_V1:
119+
if not envs_vllm.VLLM_USE_V1:
120120
raise ValueError("vLLM Ascend does not support V0 engine.")
121121
# initialize ascend config from vllm additional_config
122122
ascend_config = init_ascend_config(vllm_config)

vllm_ascend/quantization/w8a8_dynamic.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@
2323
from vllm.distributed import GroupCoordinator, get_ep_group
2424
from vllm.forward_context import get_forward_context
2525

26-
import vllm_ascend.envs as envs
26+
import vllm_ascend.envs as envs_ascend
2727
from vllm_ascend.ascend_config import get_ascend_config
2828
from vllm_ascend.ascend_forward_context import FusedMoEState
2929
from vllm_ascend.distributed.parallel_state import get_mc2_group
@@ -1019,7 +1019,7 @@ def process_weights_after_loading(self, layer):
10191019
1, 2).contiguous()
10201020
layer.w2_weight.data = layer.w2_weight.data.transpose(
10211021
1, 2).contiguous()
1022-
if envs.VLLM_ENABLE_FUSED_EXPERTS_ALLGATHER_EP:
1022+
if envs_ascend.VLLM_ENABLE_FUSED_EXPERTS_ALLGATHER_EP:
10231023
torch_npu.npu_format_cast_(layer.w2_weight, ACL_FORMAT_FRACTAL_NZ)
10241024
layer.w13_weight_scale.data = layer.w13_weight_scale.data.view(
10251025
layer.w13_weight_scale.data.shape[0], -1)

vllm_ascend/utils.py

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@
3131
from torch_npu.npu.streams import Event
3232
from vllm.logger import logger
3333

34-
import vllm_ascend.envs as envs
34+
import vllm_ascend.envs as envs_ascend
3535
from vllm_ascend.ascend_config import get_ascend_config
3636

3737
if TYPE_CHECKING:
@@ -232,7 +232,7 @@ def find_hccl_library() -> str:
232232
After importing `torch`, `libhccl.so` can be
233233
found by `ctypes` automatically.
234234
"""
235-
so_file = envs.HCCL_SO_PATH
235+
so_file = envs_ascend.HCCL_SO_PATH
236236

237237
# manually load the hccl library
238238
if so_file:
@@ -273,8 +273,8 @@ def adapt_patch(is_global_patch: bool = False):
273273

274274
@functools.cache
275275
def vllm_version_is(target_vllm_version: str):
276-
if envs.VLLM_VERSION is not None:
277-
vllm_version = envs.VLLM_VERSION
276+
if envs_ascend.VLLM_VERSION is not None:
277+
vllm_version = envs_ascend.VLLM_VERSION
278278
else:
279279
import vllm
280280
vllm_version = vllm.__version__
@@ -385,7 +385,7 @@ def destroy(self):
385385

386386
@contextmanager
387387
def capture_async(self, duration_tag: str):
388-
if not envs.VLLM_ASCEND_MODEL_EXECUTE_TIME_OBSERVE:
388+
if not envs_ascend.VLLM_ASCEND_MODEL_EXECUTE_TIME_OBSERVE:
389389
yield
390390
return
391391

@@ -403,7 +403,7 @@ def capture_async(self, duration_tag: str):
403403
def pop_captured_sync(self) -> dict:
404404
"""Pop and synchronize all events in the observation list"""
405405
durations: dict[str, float] = {}
406-
if not envs.VLLM_ASCEND_MODEL_EXECUTE_TIME_OBSERVE:
406+
if not envs_ascend.VLLM_ASCEND_MODEL_EXECUTE_TIME_OBSERVE:
407407
return durations
408408

409409
while self._observations:
@@ -437,7 +437,7 @@ def get_rm_router_logits_state(ep_size: int, dp_size: int,
437437
# the fusion operator torch_npu.npu_grouped_matmul_finalize_routing called by allgather ep
438438
# only supports deepseek v3/r1
439439
if dp_size > 1:
440-
if (envs.VLLM_ENABLE_FUSED_EXPERTS_ALLGATHER_EP and ep_size > 1
440+
if (envs_ascend.VLLM_ENABLE_FUSED_EXPERTS_ALLGATHER_EP and ep_size > 1
441441
and is_deepseek_v3_r1):
442442
return True
443443
elif ep_size == 1 and is_deepseek_v3_r1:
@@ -451,7 +451,7 @@ def get_rm_router_logits_state(ep_size: int, dp_size: int,
451451
def get_all_reduce_merge_state(ep_size: int, is_deepseek_v3_r1: bool):
452452
# the fusion operator torch_npu.npu_grouped_matmul_finalize_routing called by allgather ep
453453
# only supports deepseek v3/r1
454-
if (envs.VLLM_ENABLE_FUSED_EXPERTS_ALLGATHER_EP and ep_size > 1
454+
if (envs_ascend.VLLM_ENABLE_FUSED_EXPERTS_ALLGATHER_EP and ep_size > 1
455455
and is_deepseek_v3_r1):
456456
return True
457457
elif ep_size == 1 and is_deepseek_v3_r1:

vllm_ascend/worker/worker_v1.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@
2424
import torch.nn as nn
2525
import torch_npu
2626
from torch_npu.op_plugin.atb._atb_ops import _register_atb_extensions
27-
from vllm import envs
27+
from vllm import envs_vllm
2828
from vllm.config import VllmConfig
2929
from vllm.distributed import (ensure_model_parallel_initialized,
3030
init_distributed_environment)
@@ -317,8 +317,8 @@ def _init_worker_distributed_environment(self) -> None:
317317
def _init_profiler(self):
318318
# Torch profiler. Enabled and configured through env vars:
319319
# VLLM_TORCH_PROFILER_DIR=/path/to/save/trace
320-
if envs.VLLM_TORCH_PROFILER_DIR:
321-
torch_profiler_trace_dir = envs.VLLM_TORCH_PROFILER_DIR
320+
if envs_vllm.VLLM_TORCH_PROFILER_DIR:
321+
torch_profiler_trace_dir = envs_vllm.VLLM_TORCH_PROFILER_DIR
322322
logger.info("Profiling enabled. Traces will be saved to: %s",
323323
torch_profiler_trace_dir)
324324

0 commit comments

Comments
 (0)