Skip to content

Commit 1c45f4c

Browse files
[CI] Basic Integration Test For TPU (#9968)
Signed-off-by: Robert Shaw <rshaw@neuralmagic.com>
1 parent 603a661 commit 1c45f4c

File tree

2 files changed

+16
-3
lines changed

2 files changed

+16
-3
lines changed

.buildkite/run-tpu-test.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,4 +12,4 @@ remove_docker_container
1212
# For HF_TOKEN.
1313
source /etc/environment
1414
# Run a simple end-to-end example.
15-
docker run --privileged --net host --shm-size=16G -it -e HF_TOKEN=$HF_TOKEN --name tpu-test vllm-tpu /bin/bash -c "python3 -m pip install git+https://github.com/thuml/depyf.git && python3 -m pip install pytest && pytest -v -s /workspace/vllm/tests/tpu/test_custom_dispatcher.py && python3 /workspace/vllm/tests/tpu/test_compilation.py && python3 /workspace/vllm/examples/offline_inference_tpu.py"
15+
docker run --privileged --net host --shm-size=16G -it -e HF_TOKEN=$HF_TOKEN --name tpu-test vllm-tpu /bin/bash -c "python3 -m pip install git+https://github.com/thuml/depyf.git && python3 -m pip install pytest && python3 -m pip install lm_eval[api]==0.4.4 && pytest -v -s /workspace/vllm/tests/entrypoints/openai/test_accuracy.py && pytest -v -s /workspace/vllm/tests/tpu/test_custom_dispatcher.py && python3 /workspace/vllm/tests/tpu/test_compilation.py && python3 /workspace/vllm/examples/offline_inference_tpu.py"

tests/entrypoints/openai/test_accuracy.py

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,8 @@
1010
import lm_eval
1111
import pytest
1212

13+
from vllm.platforms import current_platform
14+
1315
from ...utils import RemoteOpenAIServer
1416

1517
MODEL_NAME = "Qwen/Qwen2-1.5B-Instruct"
@@ -18,12 +20,21 @@
1820
FILTER = "exact_match,strict-match"
1921
RTOL = 0.03
2022
EXPECTED_VALUE = 0.58
21-
DEFAULT_ARGS = ["--max-model-len", "4096", "--disable-log-requests"]
23+
DEFAULT_ARGS = ["--max-model-len", "2048", "--disable-log-requests"]
2224
MORE_ARGS_LIST = [
25+
[], # Default
2326
["--enable-chunked-prefill"], # Chunked
2427
["--num-scheduler-steps", "8"], # MS
2528
["--num-scheduler-steps", "8", "--multi-step-stream-outputs"] # MS+Stream
2629
]
30+
MAX_WAIT_SECONDS = None
31+
32+
if current_platform.is_tpu():
33+
MORE_ARGS_LIST = [
34+
[], # Default
35+
# ["--num-scheduler-steps", "8"], # Multi-step << currently fails
36+
]
37+
MAX_WAIT_SECONDS = 600
2738

2839

2940
@pytest.mark.parametrize("more_args", MORE_ARGS_LIST)
@@ -33,7 +44,9 @@ def test_lm_eval_accuracy(more_args):
3344

3445
print(f"Running with: {args}")
3546

36-
with RemoteOpenAIServer(MODEL_NAME, args) as remote_server:
47+
with RemoteOpenAIServer(
48+
MODEL_NAME, args,
49+
max_wait_seconds=MAX_WAIT_SECONDS) as remote_server:
3750
url = f"{remote_server.url_for('v1')}/completions"
3851

3952
model_args = (

0 commit comments

Comments
 (0)