From 297c2b81552d10ef69b2b000535832e263dc7de8 Mon Sep 17 00:00:00 2001
From: reidliu41 <reid201711@gmail.com>
Date: Thu, 19 Jun 2025 18:28:53 +0800
Subject: [PATCH] [Misc] refactor example - openai_transcription_client

Signed-off-by: reidliu41 <reid201711@gmail.com>
---
 .../openai_transcription_client.py            | 39 ++++++++++++++++---
 1 file changed, 34 insertions(+), 5 deletions(-)

diff --git a/examples/online_serving/openai_transcription_client.py b/examples/online_serving/openai_transcription_client.py
index 12d45de3c81b..ae43cb5da790 100644
--- a/examples/online_serving/openai_transcription_client.py
+++ b/examples/online_serving/openai_transcription_client.py
@@ -1,5 +1,23 @@
 # SPDX-License-Identifier: Apache-2.0
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+"""
+This script demonstrates how to use the vLLM API server to perform audio
+transcription with the `openai/whisper-large-v3` model.
+
+Before running this script, you must start the vLLM server with the following command:
+
+    vllm serve openai/whisper-large-v3
+
+Requirements:
+- vLLM with audio support
+- openai Python SDK
+- httpx for streaming support
+
+The script performs:
+1. Synchronous transcription using OpenAI-compatible API.
+2. Streaming transcription using raw HTTP request to the vLLM server.
+"""
+
 import asyncio
 import json
 
@@ -21,6 +39,9 @@
 
 
 def sync_openai():
+    """
+    Perform synchronous transcription using OpenAI-compatible API.
+    """
     with open(str(mary_had_lamb), "rb") as f:
         transcription = client.audio.transcriptions.create(
             file=f,
@@ -37,11 +58,11 @@ def sync_openai():
         print("transcription result:", transcription.text)
 
 
-sync_openai()
-
-
 # OpenAI Transcription API client does not support streaming.
 async def stream_openai_response():
+    """
+    Perform streaming transcription using vLLM's raw HTTP streaming API.
+    """
     data = {
         "language": "en",
         "stream": True,
@@ -68,7 +89,15 @@ async def stream_openai_response():
                         # Extract and print the content
                         content = chunk["choices"][0].get("delta", {}).get("content")
                         print(content, end="")
+    print()  # Final newline after stream ends
+
+
+def main():
+    sync_openai()
+
+    # Run the asynchronous function
+    asyncio.run(stream_openai_response())
 
 
-# Run the asynchronous function
-asyncio.run(stream_openai_response())
+if __name__ == "__main__":
+    main()