Skip to content

Commit 28ce6ee

Browse files
committed
[Fix] Correct minor formatting issues
Signed-off-by: Yizhou Liu <liu_yizhou@outlook.com>
1 parent ba0cd81 commit 28ce6ee

File tree

7 files changed

+38
-37
lines changed

7 files changed

+38
-37
lines changed

Dockerfile

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,8 @@ RUN source /usr/local/Ascend/ascend-toolkit/set_env.sh && \
5151
source /usr/local/Ascend/nnal/atb/set_env.sh && \
5252
export LD_LIBRARY_PATH=/usr/local/Ascend/ascend-toolkit/latest/`uname -i`-linux/devlib:$LD_LIBRARY_PATH && \
5353
export LIBRARY_PATH=/usr/local/Ascend/ascend-toolkit/latest/lib64:$LIBRARY_PATH && \
54-
python3 -m pip install -v /workspace/vllm-ascend/ --extra-index https://download.pytorch.org/whl/cpu/
54+
python3 -m pip install -r /workspace/vllm-ascend/requirements.txt && \
55+
python3 -m pip install -v --no-build-isolation /workspace/vllm-ascend/ --extra-index https://download.pytorch.org/whl/cpu/
5556

5657
# Install modelscope (for fast download) and ray (for multinode)
5758
RUN python3 -m pip install modelscope ray

Dockerfile.openEuler

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,8 @@ RUN source /usr/local/Ascend/ascend-toolkit/set_env.sh && \
5050
source /usr/local/Ascend/nnal/atb/set_env.sh && \
5151
export LD_LIBRARY_PATH=/usr/local/Ascend/ascend-toolkit/latest/`uname -i`-linux/devlib:$LD_LIBRARY_PATH && \
5252
export LIBRARY_PATH=/usr/local/Ascend/ascend-toolkit/latest/lib64:$LIBRARY_PATH && \
53-
python3 -m pip install -v /workspace/vllm-ascend/ --extra-index https://download.pytorch.org/whl/cpu/
53+
python3 -m pip install -r /workspace/vllm-ascend/requirements.txt && \
54+
python3 -m pip install --no-build-isolation -v /workspace/vllm-ascend/ --extra-index https://download.pytorch.org/whl/cpu/
5455

5556
# Install modelscope (for fast download) and ray (for multinode)
5657
RUN python3 -m pip install modelscope ray

tests/compile/test_simple.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,6 @@
1313
set_current_vllm_config)
1414
from vllm.utils import direct_register_custom_op
1515

16-
1716
global_counter = 0
1817

1918
# create a library to hold the custom op

tests/conftest.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,9 +36,11 @@
3636
# TODO: remove this part after the patch merged into vllm, if
3737
# we not explicitly patch here, some of them might be effectiveless
3838
# in pytest scenario
39-
from vllm_ascend.utils import adapt_patch # noqa E402
39+
from vllm_ascend.utils import (adapt_patch, # noqa E402
40+
register_dummy_fusion_op)
4041

4142
adapt_patch(True)
43+
register_dummy_fusion_op()
4244

4345
from vllm.distributed.parallel_state import ( # noqa E402
4446
destroy_distributed_environment, destroy_model_parallel)

vllm_ascend/ops/__init__.py

Lines changed: 0 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -15,38 +15,8 @@
1515
# This file is a part of the vllm-ascend project.
1616
#
1717

18-
import torch
19-
import torch_npu
20-
2118
import vllm_ascend.ops.activation # noqa
2219
import vllm_ascend.ops.fused_moe # noqa
2320
import vllm_ascend.ops.layernorm # noqa
2421
import vllm_ascend.ops.rotary_embedding # noqa
2522
import vllm_ascend.ops.vocab_parallel_embedding # noqa
26-
27-
28-
class dummyFusionOp:
29-
default = None
30-
31-
def __init__(self, name=""):
32-
self.name = name
33-
34-
35-
def register_dummy_fusion_op() -> None:
36-
torch.cuda.CUDAGraph = torch_npu.npu.NPUGraph
37-
torch.ops._C.rms_norm = dummyFusionOp(name="rms_norm")
38-
torch.ops._C.fused_add_rms_norm = dummyFusionOp(name="fused_add_rms_norm")
39-
torch.ops._C.static_scaled_fp8_quant = dummyFusionOp(
40-
name="static_scaled_fp8_quant")
41-
torch.ops._C.dynamic_scaled_fp8_quant = dummyFusionOp(
42-
name="dynamic_scaled_fp8_quant")
43-
torch.ops._C.dynamic_per_token_scaled_fp8_quant = dummyFusionOp(
44-
name="dynamic_per_token_scaled_fp8_quant")
45-
torch.ops._C.rms_norm_static_fp8_quant = dummyFusionOp(
46-
name="rms_norm_static_fp8_quant")
47-
torch.ops._C.fused_add_rms_norm_static_fp8_quant = dummyFusionOp(
48-
name="fused_add_rms_norm_static_fp8_quant")
49-
torch.ops._C.rms_norm_dynamic_per_token_quant = dummyFusionOp(
50-
name="rms_norm_dynamic_per_token_quant")
51-
torch.ops._C.rms_norm_dynamic_per_token_quant = dummyFusionOp(
52-
name="rms_norm_dynamic_per_token_quant")

vllm_ascend/platform.py

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -22,10 +22,11 @@
2222
import torch
2323
import torch_npu # noqa: F401
2424
import vllm.envs as envs
25+
from torch_npu.op_plugin.atb._atb_ops import _register_atb_extensions
2526
from vllm.logger import logger
2627
from vllm.platforms import Platform, PlatformEnum
27-
from vllm_ascend.ops import register_dummy_fusion_op
28-
from torch_npu.op_plugin.atb._atb_ops import _register_atb_extensions
28+
29+
from vllm_ascend.utils import register_dummy_fusion_op
2930

3031
CUSTOM_OP_ENABLED = False
3132
try:
@@ -76,6 +77,8 @@ def pre_register_and_update(cls,
7677
from vllm_ascend.utils import adapt_patch
7778
adapt_patch(is_global_patch=True)
7879

80+
register_dummy_fusion_op()
81+
7982
from vllm_ascend.quantization.quant_config import \
8083
AscendQuantConfig # noqa: F401
8184

@@ -115,7 +118,7 @@ def mem_get_info(cls) -> Tuple[int, int]:
115118
def check_and_update_config(cls, vllm_config: VllmConfig) -> None:
116119
from vllm.config import CompilationLevel # noqa: E402
117120
compilation_config = vllm_config.compilation_config
118-
register_dummy_fusion_op()
121+
119122
enforce_eager_flag = False
120123
# Check whether the eager mode is configured
121124
try:

vllm_ascend/utils.py

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,31 @@
2828
VLLM_ENABLE_GRAPH_MODE = os.environ.get('VLLM_ENABLE_GRAPH_MODE', '0')
2929

3030

31+
class dummyFusionOp:
32+
default = None
33+
34+
def __init__(self, name=""):
35+
self.name = name
36+
37+
38+
def register_dummy_fusion_op() -> None:
39+
torch.cuda.CUDAGraph = torch_npu.npu.NPUGraph
40+
torch.ops._C.rms_norm = dummyFusionOp(name="rms_norm")
41+
torch.ops._C.fused_add_rms_norm = dummyFusionOp(name="fused_add_rms_norm")
42+
torch.ops._C.static_scaled_fp8_quant = dummyFusionOp(
43+
name="static_scaled_fp8_quant")
44+
torch.ops._C.dynamic_scaled_fp8_quant = dummyFusionOp(
45+
name="dynamic_scaled_fp8_quant")
46+
torch.ops._C.dynamic_per_token_scaled_fp8_quant = dummyFusionOp(
47+
name="dynamic_per_token_scaled_fp8_quant")
48+
torch.ops._C.rms_norm_static_fp8_quant = dummyFusionOp(
49+
name="rms_norm_static_fp8_quant")
50+
torch.ops._C.fused_add_rms_norm_static_fp8_quant = dummyFusionOp(
51+
name="fused_add_rms_norm_static_fp8_quant")
52+
torch.ops._C.rms_norm_dynamic_per_token_quant = dummyFusionOp(
53+
name="rms_norm_dynamic_per_token_quant")
54+
55+
3156
def try_register_lib(lib_name: str, lib_info: str = ""):
3257
import importlib
3358
import importlib.util

0 commit comments

Comments
 (0)