File tree Expand file tree Collapse file tree 2 files changed +16
-3
lines changed Expand file tree Collapse file tree 2 files changed +16
-3
lines changed Original file line number Diff line number Diff line change @@ -12,4 +12,4 @@ remove_docker_container
1212# For HF_TOKEN.
1313source /etc/environment
1414# Run a simple end-to-end example.
15- docker run --privileged --net host --shm-size=16G -it -e HF_TOKEN=$HF_TOKEN --name tpu-test vllm-tpu /bin/bash -c " python3 -m pip install git+https://github.com/thuml/depyf.git && python3 -m pip install pytest && pytest -v -s /workspace/vllm/tests/tpu/test_custom_dispatcher.py && python3 /workspace/vllm/tests/tpu/test_compilation.py && python3 /workspace/vllm/examples/offline_inference_tpu.py"
15+ docker run --privileged --net host --shm-size=16G -it -e HF_TOKEN=$HF_TOKEN --name tpu-test vllm-tpu /bin/bash -c " python3 -m pip install git+https://github.com/thuml/depyf.git && python3 -m pip install pytest && python3 -m pip install lm_eval[api]==0.4.4 && pytest -v -s /workspace/vllm/tests/entrypoints/openai/test_accuracy.py && pytest -v -s /workspace/vllm/tests/tpu/test_custom_dispatcher.py && python3 /workspace/vllm/tests/tpu/test_compilation.py && python3 /workspace/vllm/examples/offline_inference_tpu.py"
Original file line number Diff line number Diff line change 1010import lm_eval
1111import pytest
1212
13+ from vllm .platforms import current_platform
14+
1315from ...utils import RemoteOpenAIServer
1416
1517MODEL_NAME = "Qwen/Qwen2-1.5B-Instruct"
1820FILTER = "exact_match,strict-match"
1921RTOL = 0.03
2022EXPECTED_VALUE = 0.58
21- DEFAULT_ARGS = ["--max-model-len" , "4096 " , "--disable-log-requests" ]
23+ DEFAULT_ARGS = ["--max-model-len" , "2048 " , "--disable-log-requests" ]
2224MORE_ARGS_LIST = [
25+ [], # Default
2326 ["--enable-chunked-prefill" ], # Chunked
2427 ["--num-scheduler-steps" , "8" ], # MS
2528 ["--num-scheduler-steps" , "8" , "--multi-step-stream-outputs" ] # MS+Stream
2629]
30+ MAX_WAIT_SECONDS = None
31+
32+ if current_platform .is_tpu ():
33+ MORE_ARGS_LIST = [
34+ [], # Default
35+ # ["--num-scheduler-steps", "8"], # Multi-step << currently fails
36+ ]
37+ MAX_WAIT_SECONDS = 600
2738
2839
2940@pytest .mark .parametrize ("more_args" , MORE_ARGS_LIST )
@@ -33,7 +44,9 @@ def test_lm_eval_accuracy(more_args):
3344
3445 print (f"Running with: { args } " )
3546
36- with RemoteOpenAIServer (MODEL_NAME , args ) as remote_server :
47+ with RemoteOpenAIServer (
48+ MODEL_NAME , args ,
49+ max_wait_seconds = MAX_WAIT_SECONDS ) as remote_server :
3750 url = f"{ remote_server .url_for ('v1' )} /completions"
3851
3952 model_args = (
You can’t perform that action at this time.
0 commit comments