Skip to content

Commit eb6dd74

Browse files
author
Andrew Xia
committed
make a helper fct
Signed-off-by: Andrew Xia <axia@fb.com>
1 parent bf20956 commit eb6dd74

File tree

2 files changed

+39
-29
lines changed

2 files changed

+39
-29
lines changed

tests/entrypoints/openai/test_response_api_with_harmony.py

Lines changed: 19 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33

44
import json
55
import os
6+
import textwrap
67
import time
78

89
import pytest
@@ -771,43 +772,44 @@ async def test_output_messages_enabled(client: OpenAI, model_name: str,
771772

772773

773774
@pytest.fixture(scope="module")
774-
def server_with_mock(monkeypatch_module: pytest.MonkeyPatch, tmp_path_factory):
775-
import textwrap
775+
def server_with_mock_render_for_completion(
776+
monkeypatch_module: pytest.MonkeyPatch, tmp_path_factory):
776777

777778
args = ["--enforce-eager", "--tool-server", "demo"]
778779

779780
# Create a sitecustomize.py that patches render_for_completion
780781
# Python automatically imports sitecustomize on startup if it's in sys.path
781782
tmp_dir = tmp_path_factory.mktemp("test_setup")
782783
sitecustomize = tmp_dir / "sitecustomize.py"
783-
sitecustomize.write_text(textwrap.dedent("""
784+
sitecustomize.write_text(
785+
textwrap.dedent("""
784786
import os
785-
if os.environ.get('VLLM_TEST_MOCK_LARGE_PROMPT') == '1':
786-
from unittest.mock import patch
787+
from unittest.mock import patch
787788
788-
# Mock render_for_completion to return a large token list
789-
def mock_render_for_completion(messages):
790-
return list(range(1000000)) # Return 1M tokens for testing
789+
# Mock render_for_completion to return a large token list
790+
def mock_render_for_completion(messages):
791+
return list(range(1000000)) # Return 1M tokens for testing
791792
792-
# Patch it at module level before it's imported
793-
patch('vllm.entrypoints.harmony_utils.render_for_completion',
794-
mock_render_for_completion).start()
793+
# Patch it at module level before it's imported
794+
patch('vllm.entrypoints.harmony_utils.render_for_completion',
795+
mock_render_for_completion).start()
795796
"""))
796797

797798
with monkeypatch_module.context() as m:
798799
m.setenv("VLLM_ENABLE_RESPONSES_API_STORE", "1")
799-
m.setenv("VLLM_TEST_MOCK_LARGE_PROMPT", "1")
800800
# Add tmp_dir to PYTHONPATH so sitecustomize.py is found
801-
current_pythonpath = os.environ.get("PYTHONPATH", "")
802-
new_pythonpath = f"{tmp_dir}:{current_pythonpath}" if current_pythonpath else str(tmp_dir)
801+
curr_path = os.environ.get("PYTHONPATH", "")
802+
new_pythonpath = f"{tmp_dir}:{curr_path}" if curr_path else str(
803+
tmp_dir)
803804
m.setenv("PYTHONPATH", new_pythonpath)
804805
with RemoteOpenAIServer(MODEL_NAME, args) as remote_server:
805806
yield remote_server
806807

807808

808809
@pytest_asyncio.fixture
809-
async def client_with_mock(server_with_mock):
810-
async with server_with_mock.get_async_client() as async_client:
810+
async def client_with_mock(server_with_mock_render_for_completion):
811+
async with server_with_mock_render_for_completion.get_async_client(
812+
) as async_client:
811813
yield async_client
812814

813815

@@ -822,7 +824,7 @@ async def test_prompt_length_exceeds_max_model_len(client_with_mock: OpenAI,
822824
input="hello",
823825
)
824826

825-
# Verify the error message matches what's expected from lines 287-294
827+
# Verify the error message matches what's expected
826828
error = exc_info.value
827829
assert "'The engine prompt length" in str(error)
828830
assert "Please reduce prompt" in str(error)

vllm/entrypoints/openai/serving_responses.py

Lines changed: 20 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -189,6 +189,23 @@ def __init__(
189189

190190
self.tool_server = tool_server
191191

192+
def _validate_generator_input(
193+
self,
194+
engine_prompt: EngineTokensPrompt) -> Optional[ErrorResponse]:
195+
"""Add validations to the input to the generator here."""
196+
if self.max_model_len <= len(engine_prompt["prompt_token_ids"]):
197+
error_message = (
198+
"The engine prompt length"
199+
f" {len(engine_prompt['prompt_token_ids'])} "
200+
f"exceeds the max_model_len {self.max_model_len}. "
201+
"Please reduce prompt.")
202+
return self.create_error_response(
203+
err_type="invalid_request_error",
204+
message=error_message,
205+
status_code=HTTPStatus.BAD_REQUEST,
206+
)
207+
return None
208+
192209
async def create_responses(
193210
self,
194211
request: ResponsesRequest,
@@ -284,19 +301,10 @@ async def create_responses(
284301
available_tools = []
285302
try:
286303
for i, engine_prompt in enumerate(engine_prompts):
304+
maybe_error = self._validate_generator_input(engine_prompt)
305+
if maybe_error is not None:
306+
return maybe_error
287307

288-
if self.max_model_len <= len(
289-
engine_prompt["prompt_token_ids"]):
290-
error_message = (
291-
"The engine prompt length"
292-
f" {len(engine_prompt['prompt_token_ids'])} "
293-
f"exceeds the max_model_len {self.max_model_len}. "
294-
"Please reduce prompt.")
295-
return self.create_error_response(
296-
err_type="invalid_request_error",
297-
message=error_message,
298-
status_code=HTTPStatus.BAD_REQUEST,
299-
)
300308
default_max_tokens = self.max_model_len - len(
301309
engine_prompt["prompt_token_ids"])
302310

0 commit comments

Comments
 (0)