| 
12 | 12 | 
 
  | 
13 | 13 | from ....conftest import HfRunner, VllmRunner  | 
14 | 14 | from ....utils import RemoteOpenAIServer  | 
 | 15 | +from ...registry import HF_EXAMPLE_MODELS  | 
15 | 16 | from ...utils import check_logprobs_close  | 
16 | 17 | 
 
  | 
17 | 18 | MODEL_NAME = "fixie-ai/ultravox-v0_5-llama-3_2-1b"  | 
@@ -55,7 +56,10 @@ def server(request, audio_assets):  | 
55 | 56 |         for key, value in request.param.items()  | 
56 | 57 |     ]  | 
57 | 58 | 
 
  | 
58 |  | -    with RemoteOpenAIServer(MODEL_NAME, args) as remote_server:  | 
 | 59 | +    with RemoteOpenAIServer(MODEL_NAME,  | 
 | 60 | +                            args,  | 
 | 61 | +                            env_dict={"VLLM_AUDIO_FETCH_TIMEOUT":  | 
 | 62 | +                                      "30"}) as remote_server:  | 
59 | 63 |         yield remote_server  | 
60 | 64 | 
 
  | 
61 | 65 | 
 
  | 
@@ -106,6 +110,10 @@ def run_test(  | 
106 | 110 |     **kwargs,  | 
107 | 111 | ):  | 
108 | 112 |     """Inference result should be the same between hf and vllm."""  | 
 | 113 | +    model_info = HF_EXAMPLE_MODELS.find_hf_info(model)  | 
 | 114 | +    model_info.check_available_online(on_fail="skip")  | 
 | 115 | +    model_info.check_transformers_version(on_fail="skip")  | 
 | 116 | + | 
109 | 117 |     # NOTE: take care of the order. run vLLM first, and then run HF.  | 
110 | 118 |     # vLLM needs a fresh new process without cuda initialization.  | 
111 | 119 |     # if we run HF first, the cuda initialization will be done and it  | 
@@ -156,6 +164,10 @@ def run_multi_audio_test(  | 
156 | 164 |     num_logprobs: int,  | 
157 | 165 |     **kwargs,  | 
158 | 166 | ):  | 
 | 167 | +    model_info = HF_EXAMPLE_MODELS.find_hf_info(model)  | 
 | 168 | +    model_info.check_available_online(on_fail="skip")  | 
 | 169 | +    model_info.check_transformers_version(on_fail="skip")  | 
 | 170 | + | 
159 | 171 |     with vllm_runner(model,  | 
160 | 172 |                      dtype=dtype,  | 
161 | 173 |                      enforce_eager=True,  | 
 | 
0 commit comments