diff --git a/clients/aws-sdk-transcribe-streaming/README.md b/clients/aws-sdk-transcribe-streaming/README.md index 6b1cbfa..bb8f06f 100644 --- a/clients/aws-sdk-transcribe-streaming/README.md +++ b/clients/aws-sdk-transcribe-streaming/README.md @@ -11,3 +11,11 @@ Pages can be built into portable HTML files for the time being. You can follow the instructions in the docs [README.md](https://github.com/awslabs/aws-sdk-python/blob/main/clients/aws-sdk-transcribe-streaming/docs/README.md). For high-level documentation, you can view the [`dev-guide`](https://github.com/awslabs/aws-sdk-python/tree/main/dev-guide) at the top level of this repo. + +### Examples + +The `examples` directory contains the following scripts to help you get started. +You can run each one by calling `uv run `. This will set up an +environment for you with a supported Python version and required dependencies. +- `simple_mic.py` - Stream audio from your microphone in real-time and receive transcription results as you speak. +- `simple_file.py` - Transcribe a pre-recorded audio file with simulated real-time streaming and rate limiting. diff --git a/clients/aws-sdk-transcribe-streaming/examples/simple_file.py b/clients/aws-sdk-transcribe-streaming/examples/simple_file.py new file mode 100644 index 0000000..45020e1 --- /dev/null +++ b/clients/aws-sdk-transcribe-streaming/examples/simple_file.py @@ -0,0 +1,155 @@ +# /// script +# requires-python = ">=3.12" +# dependencies = [ +# "aiofile~=3.9.0", +# "aws-sdk-transcribe-streaming", +# ] +# +# [tool.uv.sources] +# aws-sdk-transcribe-streaming = { path = "../" } +# /// +""" +Audio file transcription example using AWS Transcribe Streaming. + +This example demonstrates how to: +- Read audio from a pre-recorded file +- Stream audio to AWS Transcribe Streaming service with rate limiting +- Receive and display transcription results as they arrive + +Prerequisites: +- AWS credentials configured (via environment variables) +- An audio file (default: test.wav in PCM format) +- [uv](https://docs.astral.sh/uv/getting-started/installation/) installed + +Usage: +- `uv run simple_file.py` +""" + +import asyncio +import time +from pathlib import Path + +import aiofile +from smithy_aws_core.identity import EnvironmentCredentialsResolver +from smithy_core.aio.interfaces.eventstream import EventPublisher, EventReceiver + +from aws_sdk_transcribe_streaming.client import ( + StartStreamTranscriptionInput, + TranscribeStreamingClient, +) +from aws_sdk_transcribe_streaming.config import Config +from aws_sdk_transcribe_streaming.models import ( + AudioEvent, + AudioStream, + AudioStreamAudioEvent, + TranscriptEvent, + TranscriptResultStream, +) + +AWS_REGION = "us-west-2" +ENDPOINT_URI = f"https://transcribestreaming.{AWS_REGION}.amazonaws.com" + +SAMPLE_RATE = 16000 +BYTES_PER_SAMPLE = 2 +CHANNEL_NUMS = 1 +AUDIO_PATH = Path(__file__).parent / "test.wav" +CHUNK_SIZE = 1024 * 8 + + +async def apply_realtime_delay( + audio_stream: EventPublisher[AudioStream], + reader, + bytes_per_sample: int, + sample_rate: float, + channel_nums: int, +) -> None: + """Applies a delay when reading an audio file stream to simulate a real-time delay.""" + start_time = time.time() + elapsed_audio_time = 0.0 + async for chunk in reader: + await audio_stream.send( + AudioStreamAudioEvent(value=AudioEvent(audio_chunk=chunk)) + ) + elapsed_audio_time += len(chunk) / ( + bytes_per_sample * sample_rate * channel_nums + ) + # sleep to simulate real-time streaming + wait_time = start_time + elapsed_audio_time - time.time() + await asyncio.sleep(wait_time) + + +class TranscriptResultStreamHandler: + def __init__(self, stream: EventReceiver[TranscriptResultStream]): + self.stream = stream + + async def handle_events(self): + # Continuously receives events from the stream and delegates + # to appropriate handlers based on event type. + async for event in self.stream: + if isinstance(event.value, TranscriptEvent): + await self.handle_transcript_event(event.value) + + async def handle_transcript_event(self, event: TranscriptEvent): + # This handler can be implemented to handle transcriptions as needed. + # Here's an example to get started. + if not event.transcript or not event.transcript.results: + return + + results = event.transcript.results + for result in results: + if result.alternatives: + for alt in result.alternatives: + print(alt.transcript) + + +async def write_chunks(audio_stream: EventPublisher[AudioStream]): + # NOTE: For pre-recorded files longer than 5 minutes, the sent audio + # chunks should be rate limited to match the realtime bitrate of the + # audio stream to avoid signing issues. + async with aiofile.AIOFile(AUDIO_PATH, "rb") as afp: + reader = aiofile.Reader(afp, chunk_size=CHUNK_SIZE) + await apply_realtime_delay( + audio_stream, reader, BYTES_PER_SAMPLE, SAMPLE_RATE, CHANNEL_NUMS + ) + + # Send an empty audio event to signal end of input + await audio_stream.send(AudioStreamAudioEvent(value=AudioEvent(audio_chunk=b""))) + # Small delay to ensure empty frame is sent before close + await asyncio.sleep(0.4) + await audio_stream.close() + + +async def main(): + # Initialize the Transcribe Streaming client + client = TranscribeStreamingClient( + config=Config( + endpoint_uri=ENDPOINT_URI, + region=AWS_REGION, + aws_credentials_identity_resolver=EnvironmentCredentialsResolver(), + ) + ) + + # Start a streaming transcription session + stream = await client.start_stream_transcription( + input=StartStreamTranscriptionInput( + language_code="en-US", + media_sample_rate_hertz=SAMPLE_RATE, + media_encoding="pcm", + ) + ) + + # Get the output stream for receiving transcription results + _, output_stream = await stream.await_output() + + # Set up the handler for processing transcription events + handler = TranscriptResultStreamHandler(output_stream) + + print("Transcribing audio from file...") + print("===============================") + + # Run audio streaming and transcription handling concurrently + await asyncio.gather(write_chunks(stream.input_stream), handler.handle_events()) + + +if __name__ == "__main__": + asyncio.run(main()) diff --git a/clients/aws-sdk-transcribe-streaming/examples/simple_mic.py b/clients/aws-sdk-transcribe-streaming/examples/simple_mic.py new file mode 100644 index 0000000..3dca9fd --- /dev/null +++ b/clients/aws-sdk-transcribe-streaming/examples/simple_mic.py @@ -0,0 +1,154 @@ +# /// script +# requires-python = ">=3.12" +# dependencies = [ +# "aws-sdk-transcribe-streaming", +# "sounddevice~=0.5.3", +# ] +# +# [tool.uv.sources] +# aws-sdk-transcribe-streaming = { path = "../" } +# /// +""" +Real-time audio transcription example using AWS Transcribe Streaming. + +This example demonstrates how to: +- Stream audio from your microphone in real-time +- Send audio to AWS Transcribe Streaming service +- Receive and display transcription results as they arrive + +Prerequisites: +- AWS credentials configured (via environment variables) +- A working microphone +- [uv](https://docs.astral.sh/uv/getting-started/installation/) installed + +Usage: +- `uv run simple_mic.py` +""" + +import asyncio +import sys +from typing import Any, AsyncGenerator, Tuple + +import sounddevice +from smithy_aws_core.identity import EnvironmentCredentialsResolver +from smithy_core.aio.interfaces.eventstream import EventPublisher, EventReceiver + +from aws_sdk_transcribe_streaming.client import ( + StartStreamTranscriptionInput, + TranscribeStreamingClient, +) +from aws_sdk_transcribe_streaming.config import Config +from aws_sdk_transcribe_streaming.models import ( + AudioEvent, + AudioStream, + AudioStreamAudioEvent, + TranscriptEvent, + TranscriptResultStream, +) + +# Configuration +AWS_REGION = "us-west-2" +ENDPOINT_URI = f"https://transcribestreaming.{AWS_REGION}.amazonaws.com" +SAMPLE_RATE = 16000 + + +async def mic_stream() -> AsyncGenerator[Tuple[bytes, Any], None]: + # This function wraps the raw input stream from the microphone forwarding + # the blocks to an asyncio.Queue. + loop = asyncio.get_event_loop() + input_queue: asyncio.Queue = asyncio.Queue() + + def callback(indata, frame_count, time_info, status): + loop.call_soon_threadsafe(input_queue.put_nowait, (bytes(indata), status)) + + # Be sure to use the correct parameters for the audio stream that matches + # the audio formats described for the source language you'll be using: + # https://docs.aws.amazon.com/transcribe/latest/dg/streaming.html + stream = sounddevice.RawInputStream( + channels=1, + samplerate=SAMPLE_RATE, + callback=callback, + blocksize=1024 * 2, + dtype="int16", + ) + + # Initiate the audio stream and asynchronously yield the audio chunks + # as they become available. + with stream: + while True: + indata, status = await input_queue.get() + yield indata, status + + +class TranscriptResultStreamHandler: + def __init__(self, stream: EventReceiver[TranscriptResultStream]): + self.stream = stream + + async def handle_events(self): + # Continuously receives events from the stream and delegates + # to appropriate handlers based on event type. + async for event in self.stream: + if isinstance(event.value, TranscriptEvent): + await self.handle_transcript_event(event.value) + + async def handle_transcript_event(self, event: TranscriptEvent): + # This handler can be implemented to handle transcriptions as needed. + # Here's an example to get started. + if not event.transcript or not event.transcript.results: + return + + results = event.transcript.results + for result in results: + if result.alternatives: + for alt in result.alternatives: + print(alt.transcript) + + +async def write_chunks(audio_stream: EventPublisher[AudioStream]): + # This connects the raw audio chunks generator coming from the microphone + # and passes them along to the transcription stream. + async for chunk, _ in mic_stream(): + await audio_stream.send( + AudioStreamAudioEvent(value=AudioEvent(audio_chunk=chunk)) + ) + + +async def main(): + # Initialize the Transcribe Streaming client + client = TranscribeStreamingClient( + config=Config( + endpoint_uri=ENDPOINT_URI, + region=AWS_REGION, + aws_credentials_identity_resolver=EnvironmentCredentialsResolver(), + ) + ) + + # Start a streaming transcription session + stream = await client.start_stream_transcription( + input=StartStreamTranscriptionInput( + language_code="en-US", + media_sample_rate_hertz=SAMPLE_RATE, + media_encoding="pcm", + ) + ) + + # Get the output stream for receiving transcription results + _, output_stream = await stream.await_output() + + # Set up the handler for processing transcription events + handler = TranscriptResultStreamHandler(output_stream) + + print("Start talking to see transcription!") + print("(Press Ctrl+C to stop)") + print("===================================") + + # Run audio streaming and transcription handling concurrently + await asyncio.gather(write_chunks(stream.input_stream), handler.handle_events()) + + +if __name__ == "__main__": + try: + asyncio.run(main()) + except KeyboardInterrupt: + print("\nExiting.") + sys.exit(0) diff --git a/clients/aws-sdk-transcribe-streaming/examples/test.wav b/clients/aws-sdk-transcribe-streaming/examples/test.wav new file mode 100644 index 0000000..5f6ca02 Binary files /dev/null and b/clients/aws-sdk-transcribe-streaming/examples/test.wav differ