|
| 1 | +import asyncio |
| 2 | +import pytest |
| 3 | +from dotenv import load_dotenv |
| 4 | + |
| 5 | +from vision_agents.plugins.openai import Realtime |
| 6 | +from vision_agents.core.llm.events import RealtimeAudioOutputEvent |
| 7 | + |
| 8 | +# Load environment variables |
| 9 | +load_dotenv() |
| 10 | + |
| 11 | + |
| 12 | +class TestOpenAIRealtime: |
| 13 | + """Integration tests for OpenAI Realtime API""" |
| 14 | + |
| 15 | + @pytest.fixture |
| 16 | + async def realtime(self): |
| 17 | + """Create and manage Realtime connection lifecycle""" |
| 18 | + realtime = Realtime( |
| 19 | + model="gpt-realtime", |
| 20 | + voice="alloy", |
| 21 | + ) |
| 22 | + try: |
| 23 | + yield realtime |
| 24 | + finally: |
| 25 | + await realtime.close() |
| 26 | + |
| 27 | + @pytest.mark.integration |
| 28 | + async def test_simple_response_flow(self, realtime): |
| 29 | + """Test sending a simple text message and receiving response""" |
| 30 | + # Send a simple message |
| 31 | + events = [] |
| 32 | + |
| 33 | + @realtime.events.subscribe |
| 34 | + async def on_audio(event: RealtimeAudioOutputEvent): |
| 35 | + events.append(event) |
| 36 | + |
| 37 | + await asyncio.sleep(0.01) |
| 38 | + await realtime.connect() |
| 39 | + await realtime.simple_response("Hello, can you hear me?") |
| 40 | + |
| 41 | + # Wait for response |
| 42 | + await asyncio.sleep(3.0) |
| 43 | + assert len(events) > 0 |
| 44 | + |
| 45 | + @pytest.mark.integration |
| 46 | + async def test_audio_sending_flow(self, realtime, mia_audio_16khz): |
| 47 | + """Test sending real audio data and verify connection remains stable""" |
| 48 | + events = [] |
| 49 | + |
| 50 | + @realtime.events.subscribe |
| 51 | + async def on_audio(event: RealtimeAudioOutputEvent): |
| 52 | + events.append(event) |
| 53 | + |
| 54 | + await asyncio.sleep(0.01) |
| 55 | + await realtime.connect() |
| 56 | + |
| 57 | + # Wait for connection to be fully established |
| 58 | + await asyncio.sleep(2.0) |
| 59 | + |
| 60 | + # Convert 16kHz audio to 48kHz for OpenAI realtime |
| 61 | + # OpenAI expects 48kHz PCM audio |
| 62 | + import numpy as np |
| 63 | + from scipy import signal |
| 64 | + from vision_agents.core.edge.types import PcmData |
| 65 | + |
| 66 | + # Resample from 16kHz to 48kHz |
| 67 | + samples_16k = mia_audio_16khz.samples |
| 68 | + num_samples_48k = int(len(samples_16k) * 48000 / 16000) |
| 69 | + samples_48k = signal.resample(samples_16k, num_samples_48k).astype(np.int16) |
| 70 | + |
| 71 | + # Create new PcmData with 48kHz |
| 72 | + audio_48khz = PcmData( |
| 73 | + samples=samples_48k, |
| 74 | + sample_rate=48000, |
| 75 | + format="s16" |
| 76 | + ) |
| 77 | + |
| 78 | + await realtime.simple_response("Listen to the following audio and tell me what you hear") |
| 79 | + await asyncio.sleep(5.0) |
| 80 | + |
| 81 | + # Send the resampled audio |
| 82 | + await realtime.simple_audio_response(audio_48khz) |
| 83 | + |
| 84 | + # Wait for response |
| 85 | + await asyncio.sleep(10.0) |
| 86 | + assert len(events) > 0 |
| 87 | + |
| 88 | + @pytest.mark.integration |
| 89 | + async def test_video_sending_flow(self, realtime, bunny_video_track): |
| 90 | + """Test sending real video data and verify connection remains stable""" |
| 91 | + events = [] |
| 92 | + |
| 93 | + @realtime.events.subscribe |
| 94 | + async def on_audio(event: RealtimeAudioOutputEvent): |
| 95 | + events.append(event) |
| 96 | + |
| 97 | + await asyncio.sleep(0.01) |
| 98 | + await realtime.connect() |
| 99 | + await realtime.simple_response("Describe what you see in this video please") |
| 100 | + await asyncio.sleep(10.0) |
| 101 | + # Start video sender with low FPS to avoid overwhelming the connection |
| 102 | + await realtime._watch_video_track(bunny_video_track) |
| 103 | + |
| 104 | + # Let it run for a few seconds |
| 105 | + await asyncio.sleep(10.0) |
| 106 | + |
| 107 | + # Stop video sender |
| 108 | + await realtime._stop_watching_video_track() |
| 109 | + assert len(events) > 0 |
| 110 | + |
0 commit comments