Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion .buildkite/scripts/hardware_ci/run-tpu-v1-test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ source /etc/environment
docker run --privileged --net host --shm-size=16G -it \
-e "HF_TOKEN=$HF_TOKEN" --name tpu-test \
vllm-tpu /bin/bash -c "python3 -m pip install git+https://github.com/thuml/depyf.git \
&& python3 -m pip install pytest tpu-info \
&& python3 -m pip install pytest pytest-asyncio tpu-info \
&& python3 -m pip install lm_eval[api]==0.4.4 \
&& export VLLM_USE_V1=1 \
&& export VLLM_XLA_CHECK_RECOMPILATION=1 \
Expand All @@ -42,6 +42,8 @@ docker run --privileged --net host --shm-size=16G -it \
&& echo TEST_8 \
&& pytest -s -v /workspace/vllm/tests/v1/tpu/test_topk_topp_sampler.py \
&& echo TEST_9 \
&& pytest -s -v /workspace/vllm/tests/v1/tpu/test_multimodal.py \
&& echo TEST_10 \
&& pytest -s -v /workspace/vllm/tests/v1/tpu/test_pallas.py" \


Expand Down
91 changes: 91 additions & 0 deletions tests/v1/tpu/test_multimodal.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
# SPDX-License-Identifier: Apache-2.0

import openai
import pytest

from vllm import envs
from vllm.multimodal.utils import encode_image_base64, fetch_image
from vllm.platforms import current_platform

from ...entrypoints.openai.test_vision import TEST_IMAGE_URLS
from ...utils import RemoteOpenAIServer

if not envs.VLLM_USE_V1:
pytest.skip(
"Skipping V1 tests. Rerun with `VLLM_USE_V1=1` to test.",
allow_module_level=True,
)


@pytest.fixture(scope="session")
def base64_encoded_image() -> dict[str, str]:
return {
image_url: encode_image_base64(fetch_image(image_url))
for image_url in TEST_IMAGE_URLS
}


@pytest.mark.asyncio
@pytest.mark.skipif(not current_platform.is_tpu(),
reason="This test needs a TPU")
@pytest.mark.parametrize("model_name", ["llava-hf/llava-1.5-7b-hf"])
async def test_basic_vision(model_name: str, base64_encoded_image: dict[str,
str]):

def whats_in_this_image_msg(b64):
return [{
"role":
"user",
"content": [
{
"type": "text",
"text": "What's in this image?"
},
{
"type": "image_url",
"image_url": {
"url": f"data:image/jpeg;base64,{b64}"
},
},
],
}]

server_args = [
"--max-model-len",
"1024",
"--max-num-seqs",
"16",
"--gpu-memory-utilization",
"0.95",
"--trust-remote-code",
"--max-num-batched-tokens",
"576",
# NOTE: max-num-batched-tokens>=mm_item_size
"--disable_chunked_mm_input",
"--chat-template",
"examples/template_llava.jinja"
]

# Server will pre-compile on first startup (takes a long time).
with RemoteOpenAIServer(model_name, server_args,
max_wait_seconds=600) as remote_server:
client: openai.AsyncOpenAI = remote_server.get_async_client()

# Other requests now should be much faster
for image_url in TEST_IMAGE_URLS:
image_base64 = base64_encoded_image[image_url]
chat_completion_from_base64 = await client.chat.completions\
.create(
model=model_name,
messages=whats_in_this_image_msg(image_base64),
max_completion_tokens=24,
temperature=0.0)
result = chat_completion_from_base64
assert result
choice = result.choices[0]
assert choice.finish_reason == "length"

message = choice.message
message = result.choices[0].message
assert message.content is not None and len(message.content) >= 10
assert message.role == "assistant"
Loading