Skip to content

Commit 0cee750

Browse files
njhill0xrushi
authored andcommitted
[V0 Deprecation] Remove V0 executors (vllm-project#27142)
Signed-off-by: Nick Hill <nhill@redhat.com> Signed-off-by: 0xrushi <6279035+0xrushi@users.noreply.github.com>
1 parent 6e15cd1 commit 0cee750

File tree

31 files changed

+424
-1042
lines changed

31 files changed

+424
-1042
lines changed

tests/basic_correctness/test_basic_correctness.py

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -157,11 +157,9 @@ def test_models_distributed(
157157
and distributed_executor_backend == "ray"
158158
and attention_backend == ""
159159
and test_suite == "L4"
160+
and enable_prompt_embeds
160161
): # noqa
161-
if enable_prompt_embeds:
162-
pytest.skip("enable_prompt_embeds does not work with ray compiled dag.")
163-
monkeypatch_context.setenv("VLLM_USE_RAY_SPMD_WORKER", "1")
164-
monkeypatch_context.setenv("VLLM_USE_RAY_COMPILED_DAG", "1")
162+
pytest.skip("enable_prompt_embeds does not work with ray compiled dag.")
165163

166164
if attention_backend:
167165
monkeypatch_context.setenv(

tests/distributed/test_multi_node_assignment.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,8 +18,8 @@
1818

1919
from vllm import initialize_ray_cluster
2020
from vllm.config import ParallelConfig
21-
from vllm.executor.ray_utils import _wait_until_pg_removed
2221
from vllm.utils.network_utils import get_ip
22+
from vllm.v1.executor.ray_utils import _wait_until_pg_removed
2323

2424
VLLM_MULTI_NODE = os.getenv("VLLM_MULTI_NODE", "0") == "1"
2525

tests/distributed/test_pipeline_parallel.py

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -305,10 +305,8 @@ def _compare_tp(
305305
common_args.extend(["--max-num-seqs", f"{max_num_seqs}"])
306306

307307
if distributed_backend == "ray":
308-
# For V1, test Ray Compiled Graph for all the tests
308+
# Test Ray Compiled Graph for all the tests
309309
pp_env = {
310-
"VLLM_USE_RAY_COMPILED_DAG": "1",
311-
"VLLM_USE_RAY_SPMD_WORKER": "1",
312310
"VLLM_USE_RAY_COMPILED_DAG_NCCL_CHANNEL": "1",
313311
}
314312
# Temporary. Currently when zeromq + SPMD is used, it does not properly

tests/model_executor/model_loader/tensorizer_loader/conftest.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
from vllm.model_executor.model_loader import tensorizer as tensorizer_mod
1010
from vllm.model_executor.model_loader.tensorizer import TensorizerConfig
1111
from vllm.utils.network_utils import get_distributed_init_method, get_ip, get_open_port
12-
from vllm.v1.executor.abstract import UniProcExecutor
12+
from vllm.v1.executor import UniProcExecutor
1313
from vllm.v1.worker.worker_base import WorkerWrapperBase
1414

1515
MODEL_REF = "facebook/opt-125m"

tests/v1/engine/test_engine_core.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,8 @@
1515
from vllm.utils.torch_utils import set_default_torch_num_threads
1616
from vllm.v1.engine import EngineCoreRequest
1717
from vllm.v1.engine.core import EngineCore
18-
from vllm.v1.executor.abstract import Executor, UniProcExecutor
18+
from vllm.v1.executor.abstract import Executor
19+
from vllm.v1.executor.uniproc_executor import UniProcExecutor
1920
from vllm.v1.kv_cache_interface import KVCacheConfig
2021
from vllm.v1.outputs import ModelRunnerOutput
2122

tools/pre_commit/check_pickle_imports.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -17,8 +17,6 @@
1717
# add to this list if absolutely necessary and after careful security review.
1818
ALLOWED_FILES = {
1919
# pickle
20-
"vllm/v1/serial_utils.py",
21-
"vllm/v1/executor/multiproc_executor.py",
2220
"vllm/multimodal/hasher.py",
2321
"vllm/transformers_utils/config.py",
2422
"vllm/model_executor/models/registry.py",
@@ -38,11 +36,13 @@
3836
"benchmarks/cutlass_benchmarks/w8a8_benchmarks.py",
3937
"benchmarks/cutlass_benchmarks/sparse_benchmarks.py",
4038
# cloudpickle
41-
"vllm/executor/mp_distributed_executor.py",
42-
"vllm/executor/ray_distributed_executor.py",
39+
"vllm/v1/executor/multiproc_executor.py",
40+
"vllm/v1/executor/ray_executor.py",
4341
"vllm/entrypoints/llm.py",
4442
"vllm/utils/__init__.py",
4543
"tests/utils.py",
44+
# pickle and cloudpickle
45+
"vllm/v1/serial_utils.py",
4646
}
4747

4848
PICKLE_RE = re.compile(

vllm/__init__.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@
2121
"AsyncLLMEngine": ".engine.async_llm_engine:AsyncLLMEngine",
2222
"LLMEngine": ".engine.llm_engine:LLMEngine",
2323
"LLM": ".entrypoints.llm:LLM",
24-
"initialize_ray_cluster": ".executor.ray_utils:initialize_ray_cluster",
24+
"initialize_ray_cluster": ".v1.executor.ray_utils:initialize_ray_cluster",
2525
"PromptType": ".inputs:PromptType",
2626
"TextPrompt": ".inputs:TextPrompt",
2727
"TokensPrompt": ".inputs:TokensPrompt",
@@ -45,7 +45,6 @@
4545
from vllm.engine.async_llm_engine import AsyncLLMEngine
4646
from vllm.engine.llm_engine import LLMEngine
4747
from vllm.entrypoints.llm import LLM
48-
from vllm.executor.ray_utils import initialize_ray_cluster
4948
from vllm.inputs import PromptType, TextPrompt, TokensPrompt
5049
from vllm.model_executor.models import ModelRegistry
5150
from vllm.outputs import (
@@ -62,6 +61,7 @@
6261
)
6362
from vllm.pooling_params import PoolingParams
6463
from vllm.sampling_params import SamplingParams
64+
from vllm.v1.executor.ray_utils import initialize_ray_cluster
6565

6666
from ._bc_linter import bc_linter_include, bc_linter_skip
6767
else:

vllm/config/parallel.py

Lines changed: 14 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -25,11 +25,11 @@
2525
from ray.runtime_env import RuntimeEnv
2626
from ray.util.placement_group import PlacementGroup
2727

28-
from vllm.executor.executor_base import ExecutorBase
28+
from vllm.v1.executor import Executor
2929
else:
3030
RuntimeEnv = Any
3131
PlacementGroup = Any
32-
ExecutorBase = Any
32+
Executor = Any
3333

3434
logger = init_logger(__name__)
3535

@@ -189,7 +189,7 @@ class ParallelConfig:
189189
"""ray distributed model workers placement group."""
190190

191191
distributed_executor_backend: (
192-
str | DistributedExecutorBackend | type[ExecutorBase] | None
192+
str | DistributedExecutorBackend | type[Executor] | None
193193
) = None
194194
"""Backend to use for distributed model
195195
workers, either "ray" or "mp" (multiprocessing). If the product
@@ -511,7 +511,7 @@ def __post_init__(self) -> None:
511511
# We use multiprocessing by default if world_size fits on the
512512
# current node and we aren't in a ray placement group.
513513

514-
from vllm.executor import ray_utils
514+
from vllm.v1.executor import ray_utils
515515

516516
backend: DistributedExecutorBackend = "mp"
517517
ray_found = ray_utils.ray_is_available()
@@ -553,6 +553,12 @@ def __post_init__(self) -> None:
553553
if self.distributed_executor_backend is None and self.world_size == 1:
554554
self.distributed_executor_backend = "uni"
555555

556+
if self.max_parallel_loading_workers is not None:
557+
logger.warning(
558+
"max_parallel_loading_workers is currently "
559+
"not supported and will be ignored."
560+
)
561+
556562
@property
557563
def use_ray(self) -> bool:
558564
return self.distributed_executor_backend == "ray" or (
@@ -563,7 +569,7 @@ def use_ray(self) -> bool:
563569
@model_validator(mode="after")
564570
def _verify_args(self) -> Self:
565571
# Lazy import to avoid circular import
566-
from vllm.executor.executor_base import ExecutorBase
572+
from vllm.v1.executor import Executor
567573

568574
# Enable batch invariance settings if requested
569575
if vllm_is_batch_invariant():
@@ -574,17 +580,17 @@ def _verify_args(self) -> Self:
574580
and not isinstance(self.distributed_executor_backend, str)
575581
and not (
576582
isinstance(self.distributed_executor_backend, type)
577-
and issubclass(self.distributed_executor_backend, ExecutorBase)
583+
and issubclass(self.distributed_executor_backend, Executor)
578584
)
579585
):
580586
raise ValueError(
581587
"Unrecognized distributed executor backend "
582588
f"{self.distributed_executor_backend}. Supported "
583589
"values are 'ray', 'mp' 'uni', 'external_launcher', "
584-
" custom ExecutorBase subclass or its import path."
590+
" custom Executor subclass or its import path."
585591
)
586592
if self.use_ray:
587-
from vllm.executor import ray_utils
593+
from vllm.v1.executor import ray_utils
588594

589595
ray_utils.assert_ray_available()
590596

vllm/config/scheduler.py

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -107,12 +107,6 @@ class SchedulerConfig:
107107
NOTE: This is not currently configurable. It will be overridden by
108108
max_num_batched_tokens in case max multimodal embedding size is larger."""
109109

110-
send_delta_data: bool = False
111-
"""Private API. If used, scheduler sends delta data to
112-
workers instead of an entire data. It should be enabled only
113-
when SPMD worker architecture is enabled. I.e.,
114-
VLLM_USE_RAY_SPMD_WORKER=1"""
115-
116110
policy: SchedulerPolicy = "fcfs"
117111
"""The scheduling policy to use:\n
118112
- "fcfs" means first come first served, i.e. requests are handled in order

vllm/distributed/device_communicators/tpu_communicator.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@
3131
)
3232

3333
if USE_RAY:
34-
from vllm.executor import ray_utils
34+
from vllm.v1.executor import ray_utils
3535

3636

3737
class TpuCommunicator(DeviceCommunicatorBase):

0 commit comments

Comments
 (0)