Skip to content

Commit

Permalink
[misc] remove engine_use_ray (vllm-project#8126)
Browse files Browse the repository at this point in the history
  • Loading branch information
youkaichao authored and Jeffwan committed Sep 19, 2024
1 parent d0143f3 commit 73b246d
Show file tree
Hide file tree
Showing 8 changed files with 32 additions and 197 deletions.
18 changes: 4 additions & 14 deletions tests/async_engine/test_api_server.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
import os
import subprocess
import sys
import time
Expand Down Expand Up @@ -26,8 +25,7 @@ def _query_server_long(prompt: str) -> dict:


@pytest.fixture
def api_server(tokenizer_pool_size: int, engine_use_ray: bool,
worker_use_ray: bool):
def api_server(tokenizer_pool_size: int, worker_use_ray: bool):
script_path = Path(__file__).parent.joinpath(
"api_server_async_engine.py").absolute()
commands = [
Expand All @@ -37,25 +35,17 @@ def api_server(tokenizer_pool_size: int, engine_use_ray: bool,
str(tokenizer_pool_size)
]

# Copy the environment variables and append `VLLM_ALLOW_ENGINE_USE_RAY=1`
# to prevent `--engine-use-ray` raises an exception due to it deprecation
env_vars = os.environ.copy()
env_vars["VLLM_ALLOW_ENGINE_USE_RAY"] = "1"

if engine_use_ray:
commands.append("--engine-use-ray")
if worker_use_ray:
commands.append("--worker-use-ray")
uvicorn_process = subprocess.Popen(commands, env=env_vars)
uvicorn_process = subprocess.Popen(commands)
yield
uvicorn_process.terminate()


@pytest.mark.parametrize("tokenizer_pool_size", [0, 2])
@pytest.mark.parametrize("worker_use_ray", [False, True])
@pytest.mark.parametrize("engine_use_ray", [False, True])
def test_api_server(api_server, tokenizer_pool_size: int, worker_use_ray: bool,
engine_use_ray: bool):
def test_api_server(api_server, tokenizer_pool_size: int,
worker_use_ray: bool):
"""
Run the API server and test it.
Expand Down
14 changes: 3 additions & 11 deletions tests/async_engine/test_async_llm_engine.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
import asyncio
import os
from asyncio import CancelledError
from dataclasses import dataclass
from typing import Optional
Expand Down Expand Up @@ -72,14 +71,12 @@ def has_unfinished_requests_for_virtual_engine(self, virtual_engine):


class MockAsyncLLMEngine(AsyncLLMEngine):

def _init_engine(self, *args, **kwargs):
return MockEngine()
_engine_class = MockEngine


@pytest.mark.asyncio
async def test_new_requests_event():
engine = MockAsyncLLMEngine(worker_use_ray=False, engine_use_ray=False)
engine = MockAsyncLLMEngine(worker_use_ray=False)
engine.start_background_loop()
await asyncio.sleep(0.01)
assert engine.engine.step_calls == 0
Expand Down Expand Up @@ -112,16 +109,11 @@ async def test_new_requests_event():
assert engine.engine.add_request_calls == 3
assert engine.engine.step_calls == old_step_calls + 1

# Allow deprecated engine_use_ray to not raise exception
os.environ["VLLM_ALLOW_ENGINE_USE_RAY"] = "1"

engine = MockAsyncLLMEngine(worker_use_ray=True, engine_use_ray=True)
engine = MockAsyncLLMEngine(worker_use_ray=True)
assert engine.get_model_config() is not None
assert engine.get_tokenizer() is not None
assert engine.get_decoding_config() is not None

os.environ.pop("VLLM_ALLOW_ENGINE_USE_RAY")


def start_engine():
wait_for_gpu_memory_to_clear(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,16 +19,11 @@ def server():
"--max-model-len",
"2048",
"--enforce-eager",
"--engine-use-ray",
"--chat-template",
str(chatml_jinja_path),
]

# Allow `--engine-use-ray`, otherwise the launch of the server throw
# an error due to try to use a deprecated feature
env_dict = {"VLLM_ALLOW_ENGINE_USE_RAY": "1"}
with RemoteOpenAIServer(MODEL_NAME, args,
env_dict=env_dict) as remote_server:
with RemoteOpenAIServer(MODEL_NAME, args) as remote_server:
yield remote_server


Expand Down
11 changes: 0 additions & 11 deletions vllm/engine/arg_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -1035,24 +1035,13 @@ def create_engine_config(self) -> EngineConfig:
@dataclass
class AsyncEngineArgs(EngineArgs):
"""Arguments for asynchronous vLLM engine."""
engine_use_ray: bool = False
disable_log_requests: bool = False

@staticmethod
def add_cli_args(parser: FlexibleArgumentParser,
async_args_only: bool = False) -> FlexibleArgumentParser:
if not async_args_only:
parser = EngineArgs.add_cli_args(parser)
parser.add_argument('--engine-use-ray',
action='store_true',
help='Use Ray to start the LLM engine in a '
'separate process as the server process.'
'(DEPRECATED. This argument is deprecated '
'and will be removed in a future update. '
'Set `VLLM_ALLOW_ENGINE_USE_RAY=1` to force '
'use it. See '
'https://github.com/vllm-project/vllm/issues/7045.'
')')
parser.add_argument('--disable-log-requests',
action='store_true',
help='Disable logging requests.')
Expand Down
Loading

0 comments on commit 73b246d

Please sign in to comment.