diff --git a/vllm_ascend/__init__.py b/vllm_ascend/__init__.py index 7588e70ed9..2ff8772574 100644 --- a/vllm_ascend/__init__.py +++ b/vllm_ascend/__init__.py @@ -23,5 +23,9 @@ def register(): def register_model(): + # TODO: fixme when TritonPlaceholder fixed + from vllm_ascend.utils import vllm_version_is + if not (vllm_version_is("0.8.5") or vllm_version_is("0.8.5.post1")): + import vllm_ascend.patch.worker.patch_main.patch_tritonplaceholder # noqa from .models import register_model register_model() diff --git a/vllm_ascend/patch/__init__.py b/vllm_ascend/patch/__init__.py index 445a167b08..40b97da112 100644 --- a/vllm_ascend/patch/__init__.py +++ b/vllm_ascend/patch/__init__.py @@ -158,4 +158,15 @@ # - https://github.com/vllm-project/vllm-ascend/pull/395 # Future Plan: # Revert it when the related pr is merged in vllm and vllm-ascend. -# \ No newline at end of file +# +# ** File: worker/patch_main/patch_tritonplaceholder.py ** +# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +# 1. `triton` Module +# Why: +# Triton is not supported on npu currently, importing triton will break vllm-ascend +# How: +# ditto +# Related PR (if no, explain why): vllm haven't support yet +# TritonPlaceholder is only available in vllm>0.8.5.post1 +# Future Plan: +# https://github.com/vllm-project/vllm/pull/17446 diff --git a/vllm_ascend/patch/platform/__init__.py b/vllm_ascend/patch/platform/__init__.py index dc217f9641..6f13e40843 100644 --- a/vllm_ascend/patch/platform/__init__.py +++ b/vllm_ascend/patch/platform/__init__.py @@ -17,7 +17,7 @@ from vllm_ascend.utils import vllm_version_is # Import specific patches for different versions -if vllm_version_is("0.8.5"): +if vllm_version_is("0.8.5") or vllm_version_is("0.8.5.post1"): from vllm_ascend.patch.platform import patch_0_8_5 # noqa: F401 from vllm_ascend.patch.platform import patch_common # noqa: F401 else: diff --git a/vllm_ascend/patch/worker/__init__.py b/vllm_ascend/patch/worker/__init__.py index 3c9c0b8aa1..24bbae97e6 100644 --- a/vllm_ascend/patch/worker/__init__.py +++ b/vllm_ascend/patch/worker/__init__.py @@ -18,9 +18,9 @@ from vllm_ascend.utils import vllm_version_is # Import specific patches for different versions -if vllm_version_is("0.8.5"): +if vllm_version_is("0.8.5") or vllm_version_is("0.8.5.post1"): from vllm_ascend.patch.worker import patch_0_8_5 # noqa: F401 from vllm_ascend.patch.worker import patch_common # noqa: F401 else: + from vllm_ascend.patch.worker import patch_main # noqa: F401 # isort:skip from vllm_ascend.patch.worker import patch_common # noqa: F401 - from vllm_ascend.patch.worker import patch_main # noqa: F401 diff --git a/vllm_ascend/patch/worker/patch_main/__init__.py b/vllm_ascend/patch/worker/patch_main/__init__.py index 2ed088b746..e87639245d 100644 --- a/vllm_ascend/patch/worker/patch_main/__init__.py +++ b/vllm_ascend/patch/worker/patch_main/__init__.py @@ -13,4 +13,5 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -# \ No newline at end of file +# +import vllm_ascend.patch.worker.patch_main.patch_tritonplaceholder # noqa diff --git a/vllm_ascend/patch/worker/patch_main/patch_tritonplaceholder.py b/vllm_ascend/patch/worker/patch_main/patch_tritonplaceholder.py new file mode 100644 index 0000000000..6271753bd1 --- /dev/null +++ b/vllm_ascend/patch/worker/patch_main/patch_tritonplaceholder.py @@ -0,0 +1,71 @@ +# +# Copyright (c) 2025 Huawei Technologies Co., Ltd. All Rights Reserved. +# Copyright 2023 The vLLM team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# This file is a part of the vllm-ascend project. +# Adapted from vllm/triton_utils/importing.py +# + +import importlib +import sys +import types +from importlib.util import find_spec + +from vllm.logger import logger + +HAS_TRITON = ( + find_spec("triton") is not None + or find_spec("pytorch-triton-xpu") is not None # Not compatible +) + +if not HAS_TRITON: + logger.info("Triton not installed or not compatible; certain GPU-related" + " functions will not be available.") + + class TritonPlaceholder(types.ModuleType): + + def __init__(self): + super().__init__("triton") + self.jit = self._dummy_decorator("jit") + self.autotune = self._dummy_decorator("autotune") + self.heuristics = self._dummy_decorator("heuristics") + self.language = TritonLanguagePlaceholder() + self.__spec__ = importlib.machinery.ModuleSpec( + name="triton", loader=None, origin="placeholder") + logger.warning_once( + "Triton is not installed. Using dummy decorators. " + "Install it via `pip install triton` to enable kernel" + " compilation.") + + def _dummy_decorator(self, name): + + def decorator(func=None, **kwargs): + if func is None: + return lambda f: f + return func + + return decorator + + class TritonLanguagePlaceholder(types.ModuleType): + + def __init__(self): + super().__init__("triton.language") + self.constexpr = None + self.dtype = None + + sys.modules['triton'] = TritonPlaceholder() + sys.modules['triton.language'] = TritonLanguagePlaceholder() + +if 'triton' in sys.modules: + logger.info("Triton module has been replaced with a placeholder.")