33
44import json
55import os
6+ import textwrap
67import time
78
89import pytest
@@ -771,43 +772,44 @@ async def test_output_messages_enabled(client: OpenAI, model_name: str,
771772
772773
773774@pytest .fixture (scope = "module" )
774- def server_with_mock ( monkeypatch_module : pytest . MonkeyPatch , tmp_path_factory ):
775- import textwrap
775+ def server_with_mock_render_for_completion (
776+ monkeypatch_module : pytest . MonkeyPatch , tmp_path_factory ):
776777
777778 args = ["--enforce-eager" , "--tool-server" , "demo" ]
778779
779780 # Create a sitecustomize.py that patches render_for_completion
780781 # Python automatically imports sitecustomize on startup if it's in sys.path
781782 tmp_dir = tmp_path_factory .mktemp ("test_setup" )
782783 sitecustomize = tmp_dir / "sitecustomize.py"
783- sitecustomize .write_text (textwrap .dedent ("""
784+ sitecustomize .write_text (
785+ textwrap .dedent ("""
784786 import os
785- if os.environ.get('VLLM_TEST_MOCK_LARGE_PROMPT') == '1':
786- from unittest.mock import patch
787+ from unittest.mock import patch
787788
788- # Mock render_for_completion to return a large token list
789- def mock_render_for_completion(messages):
790- return list(range(1000000)) # Return 1M tokens for testing
789+ # Mock render_for_completion to return a large token list
790+ def mock_render_for_completion(messages):
791+ return list(range(1000000)) # Return 1M tokens for testing
791792
792- # Patch it at module level before it's imported
793- patch('vllm.entrypoints.harmony_utils.render_for_completion',
794- mock_render_for_completion).start()
793+ # Patch it at module level before it's imported
794+ patch('vllm.entrypoints.harmony_utils.render_for_completion',
795+ mock_render_for_completion).start()
795796 """ ))
796797
797798 with monkeypatch_module .context () as m :
798799 m .setenv ("VLLM_ENABLE_RESPONSES_API_STORE" , "1" )
799- m .setenv ("VLLM_TEST_MOCK_LARGE_PROMPT" , "1" )
800800 # Add tmp_dir to PYTHONPATH so sitecustomize.py is found
801- current_pythonpath = os .environ .get ("PYTHONPATH" , "" )
802- new_pythonpath = f"{ tmp_dir } :{ current_pythonpath } " if current_pythonpath else str (tmp_dir )
801+ curr_path = os .environ .get ("PYTHONPATH" , "" )
802+ new_pythonpath = f"{ tmp_dir } :{ curr_path } " if curr_path else str (
803+ tmp_dir )
803804 m .setenv ("PYTHONPATH" , new_pythonpath )
804805 with RemoteOpenAIServer (MODEL_NAME , args ) as remote_server :
805806 yield remote_server
806807
807808
808809@pytest_asyncio .fixture
809- async def client_with_mock (server_with_mock ):
810- async with server_with_mock .get_async_client () as async_client :
810+ async def client_with_mock (server_with_mock_render_for_completion ):
811+ async with server_with_mock_render_for_completion .get_async_client (
812+ ) as async_client :
811813 yield async_client
812814
813815
@@ -822,7 +824,7 @@ async def test_prompt_length_exceeds_max_model_len(client_with_mock: OpenAI,
822824 input = "hello" ,
823825 )
824826
825- # Verify the error message matches what's expected from lines 287-294
827+ # Verify the error message matches what's expected
826828 error = exc_info .value
827829 assert "'The engine prompt length" in str (error )
828830 assert "Please reduce prompt" in str (error )
0 commit comments