opendatahub-io · NickLucche · Feb 14, 2025 · Feb 14, 2025
diff --git a/docs/source/models/supported_models.md b/docs/source/models/supported_models.md
@@ -890,6 +890,26 @@ For more details, please see: <gh-pr:4087#issuecomment-2250397630>
 To use Qwen2.5-VL series models, you have to install Huggingface `transformers` library from source via `pip install git+https://github.com/huggingface/transformers`.
 :::
 
+#### Transcription (`--task transcription`)
+
+Speech2Text models trained specifically for Automatic Speech Recognition.
+
+:::{list-table}
+:widths: 25 25 25 5 5
+:header-rows: 1
+
+- * Architecture
+  * Models
+  * Example HF Models
+  * [LoRA](#lora-adapter)
+  * [PP](#distributed-serving)
+- * `Whisper`
+  * Whisper-based
+  * `openai/whisper-large-v3-turbo`
+  * 🚧
+  * 🚧
+:::
+
 ### Pooling Models
 
 See [this page](pooling-models) for more information on how to use pooling models.

diff --git a/vllm/entrypoints/llm.py b/vllm/entrypoints/llm.py
@@ -421,7 +421,7 @@ def generate(
             instead pass them via the ``inputs`` parameter.
         """
         runner_type = self.llm_engine.model_config.runner_type
-        if runner_type != "generate":
+        if runner_type not in ["generate", "transcription"]:
             messages = [
                 "LLM.generate() is only supported for (conditional) generation "
                 "models (XForCausalLM, XForConditionalGeneration).",