diff --git a/examples/online_serving/openai_transcription_client.py b/examples/online_serving/openai_transcription_client.py index 12d45de3c81b..ae43cb5da790 100644 --- a/examples/online_serving/openai_transcription_client.py +++ b/examples/online_serving/openai_transcription_client.py @@ -1,5 +1,23 @@ # SPDX-License-Identifier: Apache-2.0 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project +""" +This script demonstrates how to use the vLLM API server to perform audio +transcription with the `openai/whisper-large-v3` model. + +Before running this script, you must start the vLLM server with the following command: + + vllm serve openai/whisper-large-v3 + +Requirements: +- vLLM with audio support +- openai Python SDK +- httpx for streaming support + +The script performs: +1. Synchronous transcription using OpenAI-compatible API. +2. Streaming transcription using raw HTTP request to the vLLM server. +""" + import asyncio import json @@ -21,6 +39,9 @@ def sync_openai(): + """ + Perform synchronous transcription using OpenAI-compatible API. + """ with open(str(mary_had_lamb), "rb") as f: transcription = client.audio.transcriptions.create( file=f, @@ -37,11 +58,11 @@ def sync_openai(): print("transcription result:", transcription.text) -sync_openai() - - # OpenAI Transcription API client does not support streaming. async def stream_openai_response(): + """ + Perform streaming transcription using vLLM's raw HTTP streaming API. + """ data = { "language": "en", "stream": True, @@ -68,7 +89,15 @@ async def stream_openai_response(): # Extract and print the content content = chunk["choices"][0].get("delta", {}).get("content") print(content, end="") + print() # Final newline after stream ends + + +def main(): + sync_openai() + + # Run the asynchronous function + asyncio.run(stream_openai_response()) -# Run the asynchronous function -asyncio.run(stream_openai_response()) +if __name__ == "__main__": + main()