Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions clients/aws-sdk-transcribe-streaming/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,3 +11,11 @@ Pages can be built into portable HTML files for the time being. You can
follow the instructions in the docs [README.md](https://github.com/awslabs/aws-sdk-python/blob/main/clients/aws-sdk-transcribe-streaming/docs/README.md).

For high-level documentation, you can view the [`dev-guide`](https://github.com/awslabs/aws-sdk-python/tree/main/dev-guide) at the top level of this repo.

### Examples

The `examples` directory contains the following scripts to help you get started.
You can run each one by calling `uv run <file_name>`. This will set up an
environment for you with a supported Python version and required dependencies.
- `simple_mic.py` - Stream audio from your microphone in real-time and receive transcription results as you speak.
- `simple_file.py` - Transcribe a pre-recorded audio file with simulated real-time streaming and rate limiting.
155 changes: 155 additions & 0 deletions clients/aws-sdk-transcribe-streaming/examples/simple_file.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,155 @@
# /// script
# requires-python = ">=3.12"
# dependencies = [
# "aiofile~=3.9.0",
# "aws-sdk-transcribe-streaming",
# ]
#
# [tool.uv.sources]
# aws-sdk-transcribe-streaming = { path = "../" }
# ///
"""
Audio file transcription example using AWS Transcribe Streaming.

This example demonstrates how to:
- Read audio from a pre-recorded file
- Stream audio to AWS Transcribe Streaming service with rate limiting
- Receive and display transcription results as they arrive

Prerequisites:
- AWS credentials configured (via environment variables)
- An audio file (default: test.wav in PCM format)
- [uv](https://docs.astral.sh/uv/getting-started/installation/) installed

Usage:
- `uv run simple_file.py`
"""

import asyncio
import time
from pathlib import Path

import aiofile
from smithy_aws_core.identity import EnvironmentCredentialsResolver
from smithy_core.aio.interfaces.eventstream import EventPublisher, EventReceiver

from aws_sdk_transcribe_streaming.client import (
StartStreamTranscriptionInput,
TranscribeStreamingClient,
)
from aws_sdk_transcribe_streaming.config import Config
from aws_sdk_transcribe_streaming.models import (
AudioEvent,
AudioStream,
AudioStreamAudioEvent,
TranscriptEvent,
TranscriptResultStream,
)

AWS_REGION = "us-west-2"
ENDPOINT_URI = f"https://transcribestreaming.{AWS_REGION}.amazonaws.com"

SAMPLE_RATE = 16000
BYTES_PER_SAMPLE = 2
CHANNEL_NUMS = 1
AUDIO_PATH = Path(__file__).parent / "test.wav"
CHUNK_SIZE = 1024 * 8


async def apply_realtime_delay(
audio_stream: EventPublisher[AudioStream],
reader,
bytes_per_sample: int,
sample_rate: float,
channel_nums: int,
) -> None:
"""Applies a delay when reading an audio file stream to simulate a real-time delay."""
start_time = time.time()
elapsed_audio_time = 0.0
async for chunk in reader:
await audio_stream.send(
AudioStreamAudioEvent(value=AudioEvent(audio_chunk=chunk))
)
elapsed_audio_time += len(chunk) / (
bytes_per_sample * sample_rate * channel_nums
)
# sleep to simulate real-time streaming
wait_time = start_time + elapsed_audio_time - time.time()
await asyncio.sleep(wait_time)


class TranscriptResultStreamHandler:
def __init__(self, stream: EventReceiver[TranscriptResultStream]):
self.stream = stream

async def handle_events(self):
# Continuously receives events from the stream and delegates
# to appropriate handlers based on event type.
async for event in self.stream:
if isinstance(event.value, TranscriptEvent):
await self.handle_transcript_event(event.value)

async def handle_transcript_event(self, event: TranscriptEvent):
# This handler can be implemented to handle transcriptions as needed.
# Here's an example to get started.
if not event.transcript or not event.transcript.results:
return

results = event.transcript.results
for result in results:
if result.alternatives:
for alt in result.alternatives:
print(alt.transcript)


async def write_chunks(audio_stream: EventPublisher[AudioStream]):
# NOTE: For pre-recorded files longer than 5 minutes, the sent audio
# chunks should be rate limited to match the realtime bitrate of the
# audio stream to avoid signing issues.
async with aiofile.AIOFile(AUDIO_PATH, "rb") as afp:
reader = aiofile.Reader(afp, chunk_size=CHUNK_SIZE)
await apply_realtime_delay(
audio_stream, reader, BYTES_PER_SAMPLE, SAMPLE_RATE, CHANNEL_NUMS
)

# Send an empty audio event to signal end of input
await audio_stream.send(AudioStreamAudioEvent(value=AudioEvent(audio_chunk=b"")))
# Small delay to ensure empty frame is sent before close
await asyncio.sleep(0.4)
await audio_stream.close()


async def main():
# Initialize the Transcribe Streaming client
client = TranscribeStreamingClient(
config=Config(
endpoint_uri=ENDPOINT_URI,
region=AWS_REGION,
aws_credentials_identity_resolver=EnvironmentCredentialsResolver(),
)
)

# Start a streaming transcription session
stream = await client.start_stream_transcription(
input=StartStreamTranscriptionInput(
language_code="en-US",
media_sample_rate_hertz=SAMPLE_RATE,
media_encoding="pcm",
)
)

# Get the output stream for receiving transcription results
_, output_stream = await stream.await_output()

# Set up the handler for processing transcription events
handler = TranscriptResultStreamHandler(output_stream)

print("Transcribing audio from file...")
print("===============================")

# Run audio streaming and transcription handling concurrently
await asyncio.gather(write_chunks(stream.input_stream), handler.handle_events())


if __name__ == "__main__":
asyncio.run(main())
154 changes: 154 additions & 0 deletions clients/aws-sdk-transcribe-streaming/examples/simple_mic.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,154 @@
# /// script
# requires-python = ">=3.12"
# dependencies = [
# "aws-sdk-transcribe-streaming",
# "sounddevice~=0.5.3",
# ]
#
# [tool.uv.sources]
# aws-sdk-transcribe-streaming = { path = "../" }
# ///
"""
Real-time audio transcription example using AWS Transcribe Streaming.

This example demonstrates how to:
- Stream audio from your microphone in real-time
- Send audio to AWS Transcribe Streaming service
- Receive and display transcription results as they arrive

Prerequisites:
- AWS credentials configured (via environment variables)
- A working microphone
- [uv](https://docs.astral.sh/uv/getting-started/installation/) installed

Usage:
- `uv run simple_mic.py`
"""

import asyncio
import sys
from typing import Any, AsyncGenerator, Tuple

import sounddevice
from smithy_aws_core.identity import EnvironmentCredentialsResolver
from smithy_core.aio.interfaces.eventstream import EventPublisher, EventReceiver

from aws_sdk_transcribe_streaming.client import (
StartStreamTranscriptionInput,
TranscribeStreamingClient,
)
from aws_sdk_transcribe_streaming.config import Config
from aws_sdk_transcribe_streaming.models import (
AudioEvent,
AudioStream,
AudioStreamAudioEvent,
TranscriptEvent,
TranscriptResultStream,
)

# Configuration
AWS_REGION = "us-west-2"
ENDPOINT_URI = f"https://transcribestreaming.{AWS_REGION}.amazonaws.com"
SAMPLE_RATE = 16000


async def mic_stream() -> AsyncGenerator[Tuple[bytes, Any], None]:
# This function wraps the raw input stream from the microphone forwarding
# the blocks to an asyncio.Queue.
loop = asyncio.get_event_loop()
input_queue: asyncio.Queue = asyncio.Queue()

def callback(indata, frame_count, time_info, status):
loop.call_soon_threadsafe(input_queue.put_nowait, (bytes(indata), status))

# Be sure to use the correct parameters for the audio stream that matches
# the audio formats described for the source language you'll be using:
# https://docs.aws.amazon.com/transcribe/latest/dg/streaming.html
stream = sounddevice.RawInputStream(
channels=1,
samplerate=SAMPLE_RATE,
callback=callback,
blocksize=1024 * 2,
dtype="int16",
)

# Initiate the audio stream and asynchronously yield the audio chunks
# as they become available.
with stream:
while True:
indata, status = await input_queue.get()
yield indata, status


class TranscriptResultStreamHandler:
def __init__(self, stream: EventReceiver[TranscriptResultStream]):
self.stream = stream

async def handle_events(self):
# Continuously receives events from the stream and delegates
# to appropriate handlers based on event type.
async for event in self.stream:
if isinstance(event.value, TranscriptEvent):
await self.handle_transcript_event(event.value)

async def handle_transcript_event(self, event: TranscriptEvent):
# This handler can be implemented to handle transcriptions as needed.
# Here's an example to get started.
if not event.transcript or not event.transcript.results:
return

results = event.transcript.results
for result in results:
if result.alternatives:
for alt in result.alternatives:
print(alt.transcript)


async def write_chunks(audio_stream: EventPublisher[AudioStream]):
# This connects the raw audio chunks generator coming from the microphone
# and passes them along to the transcription stream.
async for chunk, _ in mic_stream():
await audio_stream.send(
AudioStreamAudioEvent(value=AudioEvent(audio_chunk=chunk))
)


async def main():
# Initialize the Transcribe Streaming client
client = TranscribeStreamingClient(
config=Config(
endpoint_uri=ENDPOINT_URI,
region=AWS_REGION,
aws_credentials_identity_resolver=EnvironmentCredentialsResolver(),
)
)

# Start a streaming transcription session
stream = await client.start_stream_transcription(
input=StartStreamTranscriptionInput(
language_code="en-US",
media_sample_rate_hertz=SAMPLE_RATE,
media_encoding="pcm",
)
)

# Get the output stream for receiving transcription results
_, output_stream = await stream.await_output()

# Set up the handler for processing transcription events
handler = TranscriptResultStreamHandler(output_stream)

print("Start talking to see transcription!")
print("(Press Ctrl+C to stop)")
print("===================================")

# Run audio streaming and transcription handling concurrently
await asyncio.gather(write_chunks(stream.input_stream), handler.handle_events())


if __name__ == "__main__":
try:
asyncio.run(main())
except KeyboardInterrupt:
print("\nExiting.")
sys.exit(0)
Binary file not shown.