From 960ffd821b02ca8f32024fcbddb1d8fe77d34832 Mon Sep 17 00:00:00 2001 From: Roger Wang <136131678+ywang96@users.noreply.github.com> Date: Mon, 10 Jun 2024 02:13:39 -0700 Subject: [PATCH] [Frontend][Misc] Enforce Pixel Values as Input Type for VLMs in API Server (#5374) --- vllm/entrypoints/openai/api_server.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/vllm/entrypoints/openai/api_server.py b/vllm/entrypoints/openai/api_server.py index 95417718b51fe..e7503b9655830 100644 --- a/vllm/entrypoints/openai/api_server.py +++ b/vllm/entrypoints/openai/api_server.py @@ -183,6 +183,16 @@ async def authentication(request: Request, call_next): served_model_names = [args.model] engine_args = AsyncEngineArgs.from_cli_args(args) + + # Enforce pixel values as image input type for vision language models + # when serving with API server + if engine_args.image_input_type is not None and \ + engine_args.image_input_type.upper() != "PIXEL_VALUES": + raise ValueError( + f"Invalid image_input_type: {engine_args.image_input_type}. " + "Only --image-input-type 'pixel_values' is supported for serving " + "vision language models with the vLLM API server.") + engine = AsyncLLMEngine.from_engine_args( engine_args, usage_context=UsageContext.OPENAI_API_SERVER)