Skip to content

Commit c465d53

Browse files
committed
[Misc] format patch to make the code clear
Signed-off-by: wangxiyuan <wangxiyuan1007@gmail.com>
1 parent e564470 commit c465d53

File tree

12 files changed

+25
-35
lines changed

12 files changed

+25
-35
lines changed

tests/singlecard/spec_decode/test_spec_decode_worker.py

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -589,7 +589,6 @@ def test_empty_input_batch(k: int, batch_size: int,
589589

590590
@pytest.mark.parametrize("acceptance_sampler_method",
591591
["rejection_sampler", "typical_acceptance_sampler"])
592-
@pytest.mark.skip_global_cleanup
593592
def test_init_device(acceptance_sampler_method: str):
594593
"""Verify SpecDecodeWorker invokes proposer/scorer worker init_device, as
595594
well as other GPU initialization.
@@ -646,7 +645,6 @@ def test_initialize_cache(acceptance_sampler_method):
646645
@pytest.mark.parametrize('draft_kv_size_bytes', [0, 2 * 2 * 768, 2 * 2 * 4096])
647646
@pytest.mark.parametrize("acceptance_sampler_method",
648647
["rejection_sampler", "typical_acceptance_sampler"])
649-
@pytest.mark.skip_global_cleanup
650648
def test_determine_num_available_blocks(available_gpu_blocks: int,
651649
available_cpu_blocks: int,
652650
target_cache_block_size_bytes: int,
@@ -685,7 +683,6 @@ def test_determine_num_available_blocks(available_gpu_blocks: int,
685683
@pytest.mark.parametrize('target_cache_block_size_bytes',
686684
[2 * 2 * 4096, 2 * 2 * 8192])
687685
@pytest.mark.parametrize('draft_kv_size_bytes', [0, 2 * 2 * 768, 2 * 2 * 4096])
688-
@pytest.mark.skip_global_cleanup
689686
def test_split_num_cache_blocks_evenly(available_gpu_blocks: int,
690687
target_cache_block_size_bytes: int,
691688
draft_kv_size_bytes: int):

vllm_ascend/__init__.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,10 +18,9 @@
1818

1919
def register():
2020
"""Register the NPU platform."""
21-
2221
return "vllm_ascend.platform.NPUPlatform"
2322

2423

2524
def register_model():
26-
from .models import register_model
25+
from vllm_ascend.models import register_model
2726
register_model()

vllm_ascend/models/__init__.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -2,12 +2,12 @@
22

33

44
def register_model():
5-
from .deepseek_mtp import CustomDeepSeekMTP # noqa: F401
6-
from .deepseek_v2 import CustomDeepseekV2ForCausalLM # noqa: F401
7-
from .deepseek_v2 import CustomDeepseekV3ForCausalLM # noqa: F401
8-
from .qwen2_5_vl import \
9-
AscendQwen2_5_VLForConditionalGeneration # noqa: F401
10-
from .qwen2_vl import AscendQwen2VLForConditionalGeneration # noqa: F401
5+
from vllm_ascend.models.deepseek_mtp import CustomDeepSeekMTP # noqa: F401
6+
from vllm_ascend.models.deepseek_v2 import CustomDeepseekV2ForCausalLM # noqa: F401
7+
from vllm_ascend.models.deepseek_v2 import CustomDeepseekV3ForCausalLM # noqa: F401
8+
from vllm_ascend.models.qwen2_5_vl import \
9+
AvscendQwen2_5_VLForConditionalGeneration # noqa: F401
10+
from vllm_ascend.models.qwen2_vl import AscendQwen2VLForConditionalGeneration # noqa: F401
1111

1212
ModelRegistry.register_model(
1313
"DeepSeekMTPModel",

vllm_ascend/models/deepseek_mtp.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@
3737
from vllm.model_executor.sampling_metadata import SamplingMetadata
3838
from vllm.sequence import IntermediateTensors
3939

40-
from .deepseek_v2 import CustomDeepseekV2DecoderLayer
40+
from vllm_ascend.models.deepseek_v2 import CustomDeepseekV2DecoderLayer
4141

4242

4343
class CustomDeepSeekMultiTokenPredictorLayer(DeepSeekMultiTokenPredictorLayer):

vllm_ascend/patch/__init__.py

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -90,14 +90,14 @@
9090
# ===============
9191
# ** File: worker/patch_common/patch_metrics.py **
9292
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
93-
# 1. `vllm.spec_decode.metrics.AsyncMetricsCollector.maybe_collect_rejsample_metrics`
93+
# 1. `vllm.spec_decode.metrics.AsyncMetricsCollector._copy_rejsample_metrics_async`
9494
# Why:
9595
# There are cuda hard code (current_platform.is_cuda_alike()) in
96-
# `AsyncMetricsCollector.maybe_collect_rejsample_metrics`
96+
# `AsyncMetricsCollector._copy_rejsample_metrics_async`
9797
# How:
9898
# Change to use `current_platform.Event` to determine whether to return None
99-
# Related PR (if no, explain why): 1. refused by vllm. 2. vllm doesn't support 3. prepare to submit....
100-
# https://github.com/vllm-project/vllm/pull/14411
99+
# Related PR (if no, explain why):
100+
# Need a PR to vllm to fix the issue.
101101
# Future Plan:
102102
# Revert it when the related pr is merged in vllm.
103103
#
@@ -110,7 +110,7 @@
110110
# However float32 is not supported in cann rope op, thus we keep this patch
111111
# How:
112112
# Removed the dtype convert operations in forward
113-
# Related PR (if no, explain why): 1. refused by vllm. 2. vllm doesn't support 3. prepare to submit....
113+
# Related PR (if no, explain why):
114114
# NO, only for npu due to rope op.
115115
# Future Plan:
116116
# Keep this patch in vllm-ascend.
@@ -126,7 +126,7 @@
126126
# - support attention metadata register to the set supported spec decode
127127
# - offer a api in platform to determine whether spec decode is supported,
128128
# and deprecate is_cuda_alike in it.
129-
# Related PR (if no, explain why): 1. refused by vllm. 2. vllm doesn't support 3. prepare to submit....
129+
# Related PR (if no, explain why):
130130
# - https://github.com/vllm-project/vllm/pull/15195
131131
# - https://github.com/vllm-project/vllm-ascend/pull/395
132132
# Future Plan:
@@ -138,7 +138,7 @@
138138
# vLLM `Remove Sampler from Model Code` so vllm-ascend needs adapt to this change.
139139
# How:
140140
# Use vLLM 0.8.4 method to patch it.
141-
# Related PR (if no, explain why): 1. refused by vllm. 2. vllm doesn't support 3. prepare to submit....
141+
# Related PR (if no, explain why):
142142
# - https://github.com/vllm-project/vllm/pull/15195
143143
# - https://github.com/vllm-project/vllm-ascend/pull/395
144144
# Future Plan:
@@ -153,7 +153,7 @@
153153
# `FlashAttentionMetadata`
154154
# How:
155155
# ditto
156-
# Related PR (if no, explain why): 1. refused by vllm. 2. vllm doesn't support 3. prepare to submit....
156+
# Related PR (if no, explain why):
157157
# - https://github.com/vllm-project/vllm/pull/15195
158158
# - https://github.com/vllm-project/vllm-ascend/pull/395
159159
# Future Plan:

vllm_ascend/patch/worker/patch_common/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
# limitations under the License.
1616
#
1717

18+
import vllm_ascend.patch.worker.patch_common.patch_cache_engine # noqa
1819
import vllm_ascend.patch.worker.patch_common.patch_metrics # noqa
1920
import vllm_ascend.patch.worker.patch_common.patch_minicpm # noqa
2021
import vllm_ascend.patch.worker.patch_common.patch_multi_step_worker # noqa

vllm_ascend/worker/cache_engine.py renamed to vllm_ascend/patch/worker/patch_common/patch_cache_engine.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,5 @@
11
#
22
# Copyright (c) 2025 Huawei Technologies Co., Ltd. All Rights Reserved.
3-
# This file is a part of the vllm-ascend project.
4-
# Adapted from vllm-project/vllm/vllm/worker/model_runner.py
53
# Copyright 2023 The vLLM team.
64
#
75
# Licensed under the Apache License, Version 2.0 (the "License");

vllm_ascend/patch/worker/patch_common/patch_metrics.py

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -15,13 +15,9 @@
1515
# limitations under the License.
1616
#
1717

18-
from typing import Callable
19-
2018
import torch
2119
from vllm.spec_decode.metrics import AsyncMetricsCollector
2220

23-
Timer = Callable[[], float]
24-
2521

2622
def _copy_rejsample_metrics_async(self) -> torch.npu.Event:
2723
"""

vllm_ascend/quantization/quant_config.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -34,8 +34,7 @@
3434
from vllm.model_executor.utils import set_weight_attrs
3535

3636
from vllm_ascend.ops.fused_moe import AscendUnquantizedFusedMoEMethod
37-
38-
from .quantizer import AscendQuantizer
37+
from vllm_ascend.quantization.quantizer import AscendQuantizer
3938

4039

4140
@register_quantization_config("ascend")

vllm_ascend/quantization/quantizer.py

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -22,11 +22,12 @@
2222

2323
from vllm.logger import logger
2424

25-
from .func_wrapper import (wrapper_load_model, wrapper_rmsnorm_forward_oot,
26-
wrapper_rmsnorm_init)
27-
from .w8a8 import AscendW8A8LinearMethod
28-
from .w8a8_dynamic import (AscendW8A8DynamicFusedMoEMethod,
29-
AscendW8A8DynamicLinearMethod)
25+
from vllm_ascend.quantization.func_wrapper import (wrapper_load_model,
26+
wrapper_rmsnorm_forward_oot,
27+
wrapper_rmsnorm_init)
28+
from vllm_ascend.quantization.w8a8 import AscendW8A8LinearMethod
29+
from vllm_ascend.quantization.w8a8_dynamic import (
30+
AscendW8A8DynamicFusedMoEMethod, AscendW8A8DynamicLinearMethod)
3031

3132
CUSTOMIZED_QUANTIZER_TYPE: List[str] = []
3233

0 commit comments

Comments
 (0)