Skip to content

Commit 02f89d1

Browse files
authored
[CI] Update vllm version to 20250922(5aeb925) (#3091)
### What this PR does / why we need it? This pr bump vllm commit hash to vllm-project/vllm@5aeb925 fix issues: 1. vllm-project/vllm#25345 has remove v0 metadata 2. vllm-project/vllm#25332 3. vllm-project/vllm#25334 4. vllm-project/vllm#23558, note that this vllm commit update the model register logic, which will check all the model registered have the `vllm.model_executor.models` path , which breaks our custom registration of the deepseek_v3 model (it doesn't exist in the vllm model path). so I move deepseek_v3 model registy to deepseek_v2 to solve temporary ### How was this patch tested? - vLLM version: v0.10.2 - vLLM main: vllm-project/vllm@9607d5e --------- Signed-off-by: wangli <wangli858794774@gmail.com>
1 parent 1c9f0fe commit 02f89d1

21 files changed

+58
-92
lines changed

.github/workflows/format_pr_body.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ jobs:
3636

3737
- name: Get vLLM version
3838
run: |
39-
VLLM_COMMIT=9607d5eb449711b349d4c2bee0a9c94afcc7ed14
39+
VLLM_COMMIT=5aeb9254521023f97aca292b3478aa7ff485ffb2
4040
echo "VLLM_COMMIT=https://github.com/vllm-project/vllm/commit/$VLLM_COMMIT" >> $GITHUB_ENV
4141
4242
- name: Checkout repository

.github/workflows/vllm_ascend_test.yaml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@ jobs:
4242
lint:
4343
uses: ./.github/workflows/pre-commit.yml
4444
with:
45-
vllm: 9607d5eb449711b349d4c2bee0a9c94afcc7ed14
45+
vllm: 5aeb9254521023f97aca292b3478aa7ff485ffb2
4646

4747
changes:
4848
runs-on: ubuntu-latest
@@ -83,7 +83,7 @@ jobs:
8383
VLLM_USE_MODELSCOPE: True
8484
strategy:
8585
matrix:
86-
vllm_version: [9607d5eb449711b349d4c2bee0a9c94afcc7ed14, v0.10.2]
86+
vllm_version: [5aeb9254521023f97aca292b3478aa7ff485ffb2, v0.10.2]
8787
steps:
8888
- name: Install packages
8989
run: |
@@ -138,7 +138,7 @@ jobs:
138138
name: e2e-light
139139
strategy:
140140
matrix:
141-
vllm_version: [9607d5eb449711b349d4c2bee0a9c94afcc7ed14, v0.10.2]
141+
vllm_version: [5aeb9254521023f97aca292b3478aa7ff485ffb2, v0.10.2]
142142
# Note (yikun): If CI resource are limited we can split job into two chain jobs
143143
needs: [lint, changes]
144144
# only trigger e2e test after lint passed and the change is e2e related with pull request.

.github/workflows/vllm_ascend_test_full.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -68,7 +68,7 @@ jobs:
6868
name: e2e-full
6969
strategy:
7070
matrix:
71-
vllm_version: [9607d5eb449711b349d4c2bee0a9c94afcc7ed14, v0.10.2]
71+
vllm_version: [5aeb9254521023f97aca292b3478aa7ff485ffb2, v0.10.2]
7272
needs: [changes]
7373
if: ${{ needs.changes.outputs.e2e_tracker == 'true' }}
7474
uses: ./.github/workflows/_e2e_test.yaml

docs/source/developer_guide/modeling/adding_a_new_model.md

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -61,7 +61,6 @@ from torch import nn
6161
from vllm.attention import Attention
6262
from vllm.config import VllmConfig
6363
from vllm.sequence import IntermediateTensors
64-
from vllm.model_executor.sampling_metadata import SamplingMetadata
6564

6665
class CustomAttention(nn.Module):
6766
def __init__(self, vllm_config: VllmConfig, prefix: str):

tests/e2e/model_utils.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,12 @@
1919

2020
from typing import Dict, List, Optional, Sequence, Tuple, Union
2121

22-
from vllm.sequence import PromptLogprobs, SampleLogprobs
22+
from vllm_ascend.utils import vllm_version_is
23+
24+
if vllm_version_is("0.10.2"):
25+
from vllm.sequence import PromptLogprobs, SampleLogprobs
26+
else:
27+
from vllm.logprobs import PromptLogprobs, SampleLogprobs
2328

2429
TokensText = Tuple[List[int], str]
2530

tests/ut/core/test_schedule_config.py

Lines changed: 0 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,6 @@ def setUp(self):
2727
max_model_len=8192,
2828
is_multimodal_model=False,
2929
send_delta_data=False,
30-
scheduler_delay_factor=0,
3130
)
3231

3332
def test_initialize_from_config_with_default(self):
@@ -90,21 +89,6 @@ def test_not_implemented_send_delta_data(self):
9089
str(context.exception),
9190
)
9291

93-
def test_not_implemented_delay_factor(self):
94-
with self.assertRaises(NotImplementedError) as context:
95-
AscendSchedulerConfig.initialize_from_config(
96-
self.basic_scheduler_config,
97-
AscendSchedulerConfig(
98-
delay_factor=1,
99-
max_num_batched_tokens=2048,
100-
max_model_len=2048,
101-
),
102-
)
103-
self.assertIn(
104-
"currently AscendScheduler doesn't support scheduler_delay_factor",
105-
str(context.exception),
106-
)
107-
10892
def test_no_override(self):
10993
ascend_config = AscendSchedulerConfig.initialize_from_config(
11094
self.basic_scheduler_config, {})

tests/ut/models/test_deepseek_mtp.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -168,8 +168,6 @@ def setup_mtp(self, mocker: MockerFixture):
168168
mocker.patch(
169169
"vllm_ascend.models.deepseek_mtp.CustomDeepSeekMultiTokenPredictorLayer.__call__",
170170
return_value=None)
171-
mocker.patch("vllm.model_executor.layers.sampler.get_sampler",
172-
return_value=None)
173171
mocker.patch(
174172
"vllm_ascend.ops.vocab_parallel_embedding.AscendVocabParallelEmbedding.__init__",
175173
return_value=None)

tests/ut/torchair/models/test_torchair_deepseek_mtp.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -165,8 +165,6 @@ def setup_mtp(self, mocker: MockerFixture):
165165
mocker.patch(
166166
"vllm_ascend.torchair.models.torchair_deepseek_mtp.TorchairDeepSeekMultiTokenPredictorLayer.__call__",
167167
return_value=None)
168-
mocker.patch("vllm.model_executor.layers.sampler.get_sampler",
169-
return_value=None)
170168
mocker.patch(
171169
"vllm_ascend.ops.vocab_parallel_embedding.AscendVocabParallelEmbedding.__init__",
172170
return_value=None)

vllm_ascend/core/schedule_config.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -74,7 +74,7 @@ def __post_init__(self) -> None:
7474
if self.send_delta_data:
7575
raise NotImplementedError(
7676
"currently AscendScheduler doesn't support send_delta_data.")
77-
if self.delay_factor > 0:
77+
if getattr(self, "scheduler_delay_factor", 0) > 0:
7878
raise NotImplementedError(
7979
"currently AscendScheduler doesn't support scheduler_delay_factor."
8080
)

vllm_ascend/models/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ def register_model():
2525

2626
ModelRegistry.register_model(
2727
"DeepseekV3ForCausalLM",
28-
"vllm_ascend.models.deepseek_v3:CustomDeepseekV3ForCausalLM")
28+
"vllm_ascend.models.deepseek_v2:CustomDeepseekV3ForCausalLM")
2929

3030
ModelRegistry.register_model(
3131
"DeepSeekMTPModel",

0 commit comments

Comments
 (0)