From 5cb546a7c8b38b10fe4842e4d7b61e2c227e6ccf Mon Sep 17 00:00:00 2001 From: Yikun Jiang Date: Sat, 20 Sep 2025 21:46:29 +0800 Subject: [PATCH 1/2] Bump main Signed-off-by: Yikun Jiang --- .github/workflows/format_pr_body.yaml | 2 +- .github/workflows/vllm_ascend_test.yaml | 6 +++--- .github/workflows/vllm_ascend_test_full.yaml | 4 ++-- tests/e2e/conftest.py | 9 ++++++++- vllm_ascend/sample/sampler.py | 7 +++---- 5 files changed, 17 insertions(+), 11 deletions(-) diff --git a/.github/workflows/format_pr_body.yaml b/.github/workflows/format_pr_body.yaml index 407ce228ab..8b0661a9bf 100644 --- a/.github/workflows/format_pr_body.yaml +++ b/.github/workflows/format_pr_body.yaml @@ -36,7 +36,7 @@ jobs: - name: Get vLLM version run: | - VLLM_COMMIT=6d8246aaffff3ebec84767e373212a7b8da328e2 + VLLM_COMMIT=c60e6137f0bf2034853919b3a9d705d7e06b93cf echo "VLLM_COMMIT=https://github.com/vllm-project/vllm/commit/$VLLM_COMMIT" >> $GITHUB_ENV - name: Checkout repository diff --git a/.github/workflows/vllm_ascend_test.yaml b/.github/workflows/vllm_ascend_test.yaml index 7ffff02bea..c406907ebd 100644 --- a/.github/workflows/vllm_ascend_test.yaml +++ b/.github/workflows/vllm_ascend_test.yaml @@ -82,7 +82,7 @@ jobs: VLLM_USE_MODELSCOPE: True strategy: matrix: - vllm_version: [6d8246aaffff3ebec84767e373212a7b8da328e2, v0.10.2] + vllm_version: [c60e6137f0bf2034853919b3a9d705d7e06b93cf, v0.10.2] steps: - name: Install packages run: | @@ -140,7 +140,7 @@ jobs: max-parallel: 2 matrix: os: [linux-aarch64-a2-1] - vllm_version: [6d8246aaffff3ebec84767e373212a7b8da328e2, v0.10.2] + vllm_version: [c60e6137f0bf2034853919b3a9d705d7e06b93cf, v0.10.2] name: singlecard e2e test - light runs-on: ${{ matrix.os }} container: @@ -206,7 +206,7 @@ jobs: max-parallel: 2 matrix: os: [linux-aarch64-a2-2] - vllm_version: [6d8246aaffff3ebec84767e373212a7b8da328e2, v0.10.2] + vllm_version: [c60e6137f0bf2034853919b3a9d705d7e06b93cf, v0.10.2] name: multicard e2e test - light runs-on: ${{ matrix.os }} container: diff --git a/.github/workflows/vllm_ascend_test_full.yaml b/.github/workflows/vllm_ascend_test_full.yaml index ab9992f43d..0c389a58a1 100644 --- a/.github/workflows/vllm_ascend_test_full.yaml +++ b/.github/workflows/vllm_ascend_test_full.yaml @@ -72,7 +72,7 @@ jobs: max-parallel: 2 matrix: os: [linux-aarch64-a2-1] - vllm_version: [6d8246aaffff3ebec84767e373212a7b8da328e2, v0.10.2] + vllm_version: [c60e6137f0bf2034853919b3a9d705d7e06b93cf, v0.10.2] name: singlecard e2e test - full runs-on: ${{ matrix.os }} container: @@ -156,7 +156,7 @@ jobs: max-parallel: 2 matrix: os: [linux-aarch64-a2-2] - vllm_version: [6d8246aaffff3ebec84767e373212a7b8da328e2, v0.10.2] + vllm_version: [c60e6137f0bf2034853919b3a9d705d7e06b93cf, v0.10.2] name: multicard e2e test - full runs-on: ${{ matrix.os }} container: diff --git a/tests/e2e/conftest.py b/tests/e2e/conftest.py index 430153ae9d..d0f1b762f9 100644 --- a/tests/e2e/conftest.py +++ b/tests/e2e/conftest.py @@ -32,7 +32,14 @@ BatchEncoding, BatchFeature) from transformers.models.auto.auto_factory import _BaseAutoModelClass from vllm import LLM, SamplingParams -from vllm.config import TaskOption, _get_and_verify_dtype + +from vllm_ascend.utils import vllm_version_is + +if vllm_version_is("0.10.2"): + from vllm.config import TaskOption, _get_and_verify_dtype +else: + from vllm.config.model import TaskOption, _get_and_verify_dtype + from vllm.inputs import TextPrompt from vllm.outputs import RequestOutput from vllm.transformers_utils.utils import maybe_model_redirect diff --git a/vllm_ascend/sample/sampler.py b/vllm_ascend/sample/sampler.py index e009e4cd56..6e5b38444d 100644 --- a/vllm_ascend/sample/sampler.py +++ b/vllm_ascend/sample/sampler.py @@ -1,12 +1,11 @@ import torch import torch_npu -from vllm.config import LogprobsMode from vllm.v1.sample.ops.topk_topp_sampler import TopKTopPSampler, random_sample from vllm.v1.sample.sampler import Sampler from vllm_ascend.utils import is_310p -DEFAULT_LOGPROBS_MODE = LogprobsMode.RAW_LOGPROBS +DEFAULT_LOGPROBS_MODE = "raw_logprobs" class AscendSampler(Sampler): @@ -65,9 +64,9 @@ def forward_native(self, logits, generators, k, p): """Override pytorch native implementation to torch_npu""" logits = self._apply_top_k_top_p(logits, k, p) logits_to_return = None - if self.logprobs_mode == LogprobsMode.PROCESSED_LOGITS: + if self.logprobs_mode == "processed_logits": logits_to_return = logits - elif self.logprobs_mode == LogprobsMode.PROCESSED_LOGPROBS: + elif self.logprobs_mode == "processed_logprobs": logits_to_return = logits.log_softmax(dim=-1, dtype=torch.float32) probs = logits.softmax(dim=-1, dtype=torch.float32) From 230d695081701235032c97b0c9ea579c70a062e0 Mon Sep 17 00:00:00 2001 From: Yikun Jiang Date: Sat, 20 Sep 2025 23:53:37 +0800 Subject: [PATCH 2/2] Fix UnboundLocalError Signed-off-by: Yikun Jiang --- vllm_ascend/sample/sampler.py | 24 ++++++++++++++++++------ 1 file changed, 18 insertions(+), 6 deletions(-) diff --git a/vllm_ascend/sample/sampler.py b/vllm_ascend/sample/sampler.py index 6e5b38444d..6a5c130ffe 100644 --- a/vllm_ascend/sample/sampler.py +++ b/vllm_ascend/sample/sampler.py @@ -3,9 +3,13 @@ from vllm.v1.sample.ops.topk_topp_sampler import TopKTopPSampler, random_sample from vllm.v1.sample.sampler import Sampler -from vllm_ascend.utils import is_310p +from vllm_ascend.utils import is_310p, vllm_version_is -DEFAULT_LOGPROBS_MODE = "raw_logprobs" +if vllm_version_is("0.10.2"): + from vllm.config import LogprobsMode + DEFAULT_LOGPROBS_MODE = LogprobsMode.RAW_LOGPROBS +else: + DEFAULT_LOGPROBS_MODE = "raw_logprobs" class AscendSampler(Sampler): @@ -64,10 +68,18 @@ def forward_native(self, logits, generators, k, p): """Override pytorch native implementation to torch_npu""" logits = self._apply_top_k_top_p(logits, k, p) logits_to_return = None - if self.logprobs_mode == "processed_logits": - logits_to_return = logits - elif self.logprobs_mode == "processed_logprobs": - logits_to_return = logits.log_softmax(dim=-1, dtype=torch.float32) + if vllm_version_is("0.10.2"): + if self.logprobs_mode == LogprobsMode.PROCESSED_LOGITS: + logits_to_return = logits + elif self.logprobs_mode == LogprobsMode.PROCESSED_LOGPROBS: + logits_to_return = logits.log_softmax(dim=-1, + dtype=torch.float32) + else: + if self.logprobs_mode == "processed_logits": + logits_to_return = logits + elif self.logprobs_mode == "processed_logprobs": + logits_to_return = logits.log_softmax(dim=-1, + dtype=torch.float32) probs = logits.softmax(dim=-1, dtype=torch.float32) return random_sample(probs, generators), logits_to_return