1818from vllm .engine .llm_engine import LLMEngine , SchedulerOutputState
1919from vllm .engine .metrics_types import StatLoggerBase
2020from vllm .engine .protocol import EngineClient
21- from vllm .executor .executor_base import ExecutorAsyncBase
22- from vllm .executor .gpu_executor import GPUExecutorAsync
23- from vllm .executor .ray_utils import initialize_ray_cluster
21+ from vllm .executor .executor_base import ExecutorBase
2422from vllm .inputs import PromptType
2523from vllm .inputs .preprocess import InputPreprocessor
2624from vllm .logger import init_logger
@@ -620,69 +618,9 @@ def __del__(self):
620618 rt .new_requests_event .set ()
621619
622620 @classmethod
623- def _get_executor_cls (
624- cls , engine_config : VllmConfig ) -> Type [ExecutorAsyncBase ]:
625- distributed_executor_backend = (
626- engine_config .parallel_config .distributed_executor_backend )
627- if isinstance (distributed_executor_backend , type ):
628- if not issubclass (distributed_executor_backend , ExecutorAsyncBase ):
629- raise TypeError (
630- "distributed_executor_backend must be a subclass of "
631- f"ExecutorAsyncBase. Got { distributed_executor_backend } ." )
632- executor_class = distributed_executor_backend
633- elif engine_config .device_config .device_type == "neuron" :
634- from vllm .executor .neuron_executor import NeuronExecutorAsync
635- executor_class = NeuronExecutorAsync
636- elif engine_config .device_config .device_type == "tpu" :
637- if distributed_executor_backend == "ray" :
638- from vllm .executor .ray_tpu_executor import RayTPUExecutorAsync
639- executor_class = RayTPUExecutorAsync
640- else :
641- assert distributed_executor_backend is None
642- from vllm .executor .tpu_executor import TPUExecutorAsync
643- executor_class = TPUExecutorAsync
644- elif engine_config .device_config .device_type == "cpu" :
645- from vllm .executor .cpu_executor import CPUExecutorAsync
646- executor_class = CPUExecutorAsync
647- elif engine_config .device_config .device_type == "hpu" :
648- if distributed_executor_backend == "ray" :
649- initialize_ray_cluster (engine_config .parallel_config )
650- from vllm .executor .ray_hpu_executor import RayHPUExecutorAsync
651- executor_class = RayHPUExecutorAsync
652- else :
653- from vllm .executor .hpu_executor import HPUExecutorAsync
654- executor_class = HPUExecutorAsync
655- elif engine_config .device_config .device_type == "openvino" :
656- assert distributed_executor_backend is None , (
657- "Distributed execution is not supported with "
658- "the OpenVINO backend." )
659- from vllm .executor .openvino_executor import OpenVINOExecutorAsync
660- executor_class = OpenVINOExecutorAsync
661- elif engine_config .device_config .device_type == "xpu" :
662- if distributed_executor_backend is None :
663- from vllm .executor .xpu_executor import XPUExecutorAsync
664- executor_class = XPUExecutorAsync
665- elif distributed_executor_backend == "ray" :
666- from vllm .executor .ray_xpu_executor import RayXPUExecutorAsync
667- executor_class = RayXPUExecutorAsync
668- elif distributed_executor_backend == "mp" :
669- from vllm .executor .multiproc_xpu_executor import (
670- MultiprocessingXPUExecutorAsync )
671- executor_class = MultiprocessingXPUExecutorAsync
672- else :
673- raise RuntimeError (
674- "Not supported distributed execution model on XPU device." )
675- elif distributed_executor_backend == "ray" :
676- from vllm .executor .ray_gpu_executor import RayGPUExecutorAsync
677- executor_class = RayGPUExecutorAsync
678- elif distributed_executor_backend == "mp" :
679- from vllm .executor .multiproc_gpu_executor import (
680- MultiprocessingGPUExecutorAsync )
681- executor_class = MultiprocessingGPUExecutorAsync
682- else :
683- from vllm .executor .gpu_executor import GPUExecutorAsync
684- executor_class = GPUExecutorAsync
685- return executor_class
621+ def _get_executor_cls (cls ,
622+ engine_config : VllmConfig ) -> Type [ExecutorBase ]:
623+ return LLMEngine ._get_executor_cls (engine_config )
686624
687625 @classmethod
688626 def from_engine_args (
@@ -700,9 +638,6 @@ def from_engine_args(
700638
701639 executor_class = cls ._get_executor_cls (engine_config )
702640
703- if executor_class .uses_ray :
704- initialize_ray_cluster (engine_config .parallel_config )
705-
706641 # Create the async LLM engine.
707642 engine = cls (
708643 vllm_config = engine_config ,
@@ -1242,23 +1177,12 @@ def remove_logger(self, logger_name: str) -> None:
12421177 self .engine .remove_logger (logger_name = logger_name )
12431178
12441179 async def start_profile (self ) -> None :
1245- # using type instead of isinstance to check to avoid capturing
1246- # inherited classes
1247- if type (self .engine .model_executor ) == GPUExecutorAsync : # noqa: E721
1248- self .engine .model_executor .start_profile ()
1249- else :
1250- self .engine .model_executor ._run_workers ("start_profile" )
1180+ self .engine .start_profile ()
12511181
12521182 async def stop_profile (self ) -> None :
1253- # using type instead of isinstance to check to avoid capturing
1254- # inherited classes
1255- if type (self .engine .model_executor ) == GPUExecutorAsync : # noqa: E721
1256- self .engine .model_executor .stop_profile ()
1257- else :
1258- self .engine .model_executor ._run_workers ("stop_profile" )
1183+ self .engine .stop_profile ()
12591184
12601185 async def add_lora (self , lora_request : LoRARequest ) -> None :
1261- """Load a new LoRA adapter into the engine for future requests."""
12621186 self .engine .add_lora (lora_request )
12631187
12641188
0 commit comments