diff --git a/examples/multimodal/components/frontend.py b/examples/multimodal/components/frontend.py index f73b577154..8c6d681e23 100644 --- a/examples/multimodal/components/frontend.py +++ b/examples/multimodal/components/frontend.py @@ -18,8 +18,9 @@ from components.processor import Processor from fastapi import FastAPI -from fastapi.responses import StreamingResponse +from fastapi.responses import JSONResponse, StreamingResponse from utils.protocol import MultiModalRequest +from utils.vllm import parse_vllm_args from dynamo.sdk import DYNAMO_IMAGE, api, depends, service @@ -38,8 +39,18 @@ class Frontend: processor = depends(Processor) + def __init__(self): + class_name = self.__class__.__name__ + self.engine_args = parse_vllm_args(class_name, "") + @api(name="v1/chat/completions") async def generate(self, request: MultiModalRequest): + if self.engine_args.model != request.model: + return JSONResponse( + {"error": f"Model '{request.model}' not found"}, + status_code=404, + ) + async def content_generator(): async for response in self.processor.generate(request.model_dump_json()): try: diff --git a/examples/multimodal/components/video_frontend.py b/examples/multimodal/components/video_frontend.py index 73b7279f05..dda35cd754 100644 --- a/examples/multimodal/components/video_frontend.py +++ b/examples/multimodal/components/video_frontend.py @@ -18,8 +18,9 @@ from components.video_processor import Processor from fastapi import FastAPI -from fastapi.responses import StreamingResponse +from fastapi.responses import JSONResponse, StreamingResponse from utils.protocol import MultiModalRequest +from utils.vllm import parse_vllm_args from dynamo.sdk import DYNAMO_IMAGE, api, depends, service @@ -38,8 +39,18 @@ class Frontend: processor = depends(Processor) + def __init__(self): + class_name = self.__class__.__name__ + self.engine_args = parse_vllm_args(class_name, "") + @api(name="v1/chat/completions") async def generate(self, request: MultiModalRequest): + if self.engine_args.model != request.model: + return JSONResponse( + {"error": f"Model '{request.model}' not found"}, + status_code=404, + ) + async def content_generator(): async for response in self.processor.generate(request.model_dump_json()): try: diff --git a/examples/multimodal/configs/agg-llava.yaml b/examples/multimodal/configs/agg-llava.yaml index c0c7346525..8edb247856 100644 --- a/examples/multimodal/configs/agg-llava.yaml +++ b/examples/multimodal/configs/agg-llava.yaml @@ -17,6 +17,9 @@ Common: block-size: 64 max-model-len: 4096 +Frontend: + common-configs: [model] + Processor: router: round-robin prompt-template: "USER: \n ASSISTANT:" diff --git a/examples/multimodal/configs/agg-phi3v.yaml b/examples/multimodal/configs/agg-phi3v.yaml index bc794ae546..8fc77ae0c4 100644 --- a/examples/multimodal/configs/agg-phi3v.yaml +++ b/examples/multimodal/configs/agg-phi3v.yaml @@ -18,6 +18,9 @@ Common: max-model-len: 4096 trust-remote-code: true +Frontend: + common-configs: [model] + Processor: router: round-robin prompt-template: "<|user|>\n<|image_1|>\n<|end|>\n<|assistant|>\n" diff --git a/examples/multimodal/configs/agg-qwen.yaml b/examples/multimodal/configs/agg-qwen.yaml index 324a4ffc57..346b501730 100644 --- a/examples/multimodal/configs/agg-qwen.yaml +++ b/examples/multimodal/configs/agg-qwen.yaml @@ -17,6 +17,9 @@ Common: block-size: 64 max-model-len: 4096 +Frontend: + common-configs: [model] + Processor: router: round-robin prompt-template: "<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n<|im_start|>user\n<|vision_start|><|image_pad|><|vision_end|><|im_end|>\n<|im_start|>assistant\n" diff --git a/examples/multimodal/configs/agg_video.yaml b/examples/multimodal/configs/agg_video.yaml index f8185c2217..30c1da20e4 100644 --- a/examples/multimodal/configs/agg_video.yaml +++ b/examples/multimodal/configs/agg_video.yaml @@ -24,6 +24,9 @@ Common: video-token-id: 32000 dummy-tokens-per-frame: 144 +Frontend: + common-configs: [model] + Processor: router: round-robin common-configs: [model, block-size, max-model-len] diff --git a/examples/multimodal/configs/disagg.yaml b/examples/multimodal/configs/disagg.yaml index bbfd13ac02..171e89e5be 100644 --- a/examples/multimodal/configs/disagg.yaml +++ b/examples/multimodal/configs/disagg.yaml @@ -20,6 +20,9 @@ Common: num-patches: 576 kv-transfer-config: '{"kv_connector":"DynamoNixlConnector"}' +Frontend: + common-configs: [model] + Processor: router: round-robin prompt-template: "USER: \n ASSISTANT:" diff --git a/examples/multimodal/configs/disagg_video.yaml b/examples/multimodal/configs/disagg_video.yaml index 81d9e8f966..7ef223c0df 100644 --- a/examples/multimodal/configs/disagg_video.yaml +++ b/examples/multimodal/configs/disagg_video.yaml @@ -25,6 +25,9 @@ Common: video-token-id: 32000 dummy-tokens-per-frame: 144 +Frontend: + common-configs: [model] + Processor: router: round-robin common-configs: [model, block-size]