check multimodal backend prior to dataset instantiation

NickLucche · NickLucche · commit 9f13aff21ff4 · 2025-04-18T09:44:20.000Z
Signed-off-by: NickLucche &lt;nlucches@redhat.com&gt;
diff --git a/benchmarks/benchmark_dataset.py b/benchmarks/benchmark_dataset.py
@@ -64,6 +64,7 @@ class SampleRequest:
 
 class BenchmarkDataset(ABC):
     DEFAULT_SEED = 0
+    IS_MULTIMODAL = False
 
     def __init__(
         self,
@@ -621,6 +622,7 @@ class ConversationDataset(HuggingFaceDataset):
     SUPPORTED_DATASET_PATHS = {
         'lmms-lab/LLaVA-OneVision-Data', 'Aeala/ShareGPT_Vicuna_unfiltered'
     }
+    IS_MULTIMODAL = True
 
     def sample(self,
                tokenizer: PreTrainedTokenizerBase,
@@ -685,6 +687,7 @@ class VisionArenaDataset(HuggingFaceDataset):
         "lmarena-ai/vision-arena-bench-v0.1":
         lambda x: x["turns"][0][0]["content"]
     }
+    IS_MULTIMODAL = True
 
     def sample(
         self,
@@ -846,6 +849,8 @@ class ASRDataset(HuggingFaceDataset):
     }
 
     DEFAULT_OUTPUT_LEN = 128
+    IS_MULTIMODAL = True
+
     # TODO Whisper-specific. Abstract interface when more models are supported.
     TRANSCRIPTION_PREAMBLE = "<|startoftranscript|><|en|><|transcribe|>"\
                               "<|notimestamps|>"
diff --git a/benchmarks/benchmark_serving.py b/benchmarks/benchmark_serving.py
@@ -274,12 +274,6 @@ async def benchmark(
         input_requests[0].expected_output_len, \
             input_requests[0].multi_modal_data
 
-    if (test_mm_content is not None and backend not in \
-        ["openai-chat", "openai-audio"]):
-        # multi-modal benchmark is only available on OpenAI Chat backend.
-        raise ValueError(
-            "Multi-modal content is only supported on 'openai-chat' and " \
-            "'openai-audio' backend.")
     assert test_mm_content is None or isinstance(test_mm_content, dict)
     test_input = RequestFuncInput(
         model=model_id,
@@ -620,6 +614,13 @@ def main(args: argparse.Namespace):
                 f" from one of following: {supported_datasets}. "
                 "Please consider contributing if you would "
                 "like to add support for additional dataset formats.")
+
+        if (dataset_class.IS_MULTIMODAL and backend not in \
+            ["openai-chat", "openai-audio"]):
+            # multi-modal benchmark is only available on OpenAI Chat backend.
+            raise ValueError(
+                "Multi-modal content is only supported on 'openai-chat' and " \
+                "'openai-audio' backend.")
         input_requests = dataset_class(
             dataset_path=args.dataset_path,
             dataset_subset=args.hf_subset,