33import asyncio
44import time
55import uuid
6- from typing import Dict , List
6+ from contextlib import ExitStack
7+ from typing import Dict , List , Optional
78
89import pytest
910from transformers import AutoTokenizer
1415from vllm .platforms import current_platform
1516from vllm .usage .usage_lib import UsageContext
1617from vllm .v1 .engine import EngineCoreRequest
17- from vllm .v1 .engine .core_client import EngineCoreClient
18+ from vllm .v1 .engine .core import EngineCore
19+ from vllm .v1 .engine .core_client import (AsyncMPClient , EngineCoreClient ,
20+ SyncMPClient )
1821from vllm .v1 .executor .abstract import Executor
1922
2023if not current_platform .is_cuda ():
@@ -63,7 +66,7 @@ def loop_until_done(client: EngineCoreClient, outputs: Dict):
6366async def loop_until_done_async (client : EngineCoreClient , outputs : Dict ):
6467
6568 while True :
66- engine_core_outputs = await client .get_output_async ().outputs
69+ engine_core_outputs = ( await client .get_output_async () ).outputs
6770
6871 if len (engine_core_outputs ) == 0 :
6972 break
@@ -78,14 +81,25 @@ async def loop_until_done_async(client: EngineCoreClient, outputs: Dict):
7881 break
7982
8083
84+ # Dummy utility function to monkey-patch into engine core.
85+ def echo (self , msg : str , err_msg : Optional [str ] = None ) -> str :
86+ print (f"echo util function called: { msg } , { err_msg } " )
87+ if err_msg is not None :
88+ raise ValueError (err_msg )
89+ return msg
90+
91+
8192@fork_new_process_for_each_test
8293@pytest .mark .parametrize ("multiprocessing_mode" , [True , False ])
8394def test_engine_core_client (monkeypatch , multiprocessing_mode : bool ):
8495
8596 with monkeypatch .context () as m :
8697 m .setenv ("VLLM_USE_V1" , "1" )
8798
88- engine_args = EngineArgs (model = MODEL_NAME , compilation_config = 3 )
99+ # Monkey-patch core engine utility function to test.
100+ m .setattr (EngineCore , "echo" , echo , raising = False )
101+
102+ engine_args = EngineArgs (model = MODEL_NAME , enforce_eager = True )
89103 vllm_config = engine_args .create_engine_config (
90104 UsageContext .UNKNOWN_CONTEXT )
91105 executor_class = Executor .get_class (vllm_config )
@@ -147,15 +161,30 @@ def test_engine_core_client(monkeypatch, multiprocessing_mode: bool):
147161
148162 client .abort_requests ([request .request_id ])
149163
164+ if multiprocessing_mode :
165+ """Utility method invocation"""
150166
151- @fork_new_process_for_each_test
152- @pytest .mark .asyncio
167+ core_client : SyncMPClient = client
168+
169+ result = core_client ._call_utility ("echo" , "testarg" )
170+ assert result == "testarg"
171+
172+ with pytest .raises (Exception ) as e_info :
173+ core_client ._call_utility ("echo" , None , "help!" )
174+
175+ assert str (e_info .value ) == "Call to echo method failed: help!"
176+
177+
178+ @pytest .mark .asyncio (loop_scope = "function" )
153179async def test_engine_core_client_asyncio (monkeypatch ):
154180
155- with monkeypatch .context () as m :
181+ with monkeypatch .context () as m , ExitStack () as after :
156182 m .setenv ("VLLM_USE_V1" , "1" )
157183
158- engine_args = EngineArgs (model = MODEL_NAME )
184+ # Monkey-patch core engine utility function to test.
185+ m .setattr (EngineCore , "echo" , echo , raising = False )
186+
187+ engine_args = EngineArgs (model = MODEL_NAME , enforce_eager = True )
159188 vllm_config = engine_args .create_engine_config (
160189 usage_context = UsageContext .UNKNOWN_CONTEXT )
161190 executor_class = Executor .get_class (vllm_config )
@@ -166,6 +195,7 @@ async def test_engine_core_client_asyncio(monkeypatch):
166195 executor_class = executor_class ,
167196 log_stats = True ,
168197 )
198+ after .callback (client .shutdown )
169199
170200 MAX_TOKENS = 20
171201 params = SamplingParams (max_tokens = MAX_TOKENS )
@@ -204,3 +234,14 @@ async def test_engine_core_client_asyncio(monkeypatch):
204234 else :
205235 assert len (outputs [req_id ]) == MAX_TOKENS , (
206236 f"{ len (outputs [req_id ])= } , { MAX_TOKENS = } " )
237+ """Utility method invocation"""
238+
239+ core_client : AsyncMPClient = client
240+
241+ result = await core_client ._call_utility_async ("echo" , "testarg" )
242+ assert result == "testarg"
243+
244+ with pytest .raises (Exception ) as e_info :
245+ await core_client ._call_utility_async ("echo" , None , "help!" )
246+
247+ assert str (e_info .value ) == "Call to echo method failed: help!"
0 commit comments