From 09e711debe23b2e76c023c0ab2598c6390e9e287 Mon Sep 17 00:00:00 2001 From: Ian Kelk Date: Fri, 31 May 2024 12:12:51 -0400 Subject: [PATCH] Add raw python API example (#60) --- README.md | 3 +- evi-python-api-example/.gitignore | 4 + evi-python-api-example/LICENSE | 21 ++ evi-python-api-example/README.md | 68 ++++++ evi-python-api-example/requirements_linux.txt | 21 ++ evi-python-api-example/requirements_mac.txt | 174 +++++++++++++++ evi-python-api-example/src/authenticator.py | 71 ++++++ evi-python-api-example/src/connection.py | 205 ++++++++++++++++++ evi-python-api-example/src/devices.py | 81 +++++++ evi-python-api-example/src/main.py | 109 ++++++++++ 10 files changed, 756 insertions(+), 1 deletion(-) create mode 100644 evi-python-api-example/.gitignore create mode 100644 evi-python-api-example/LICENSE create mode 100644 evi-python-api-example/README.md create mode 100644 evi-python-api-example/requirements_linux.txt create mode 100644 evi-python-api-example/requirements_mac.txt create mode 100644 evi-python-api-example/src/authenticator.py create mode 100644 evi-python-api-example/src/connection.py create mode 100644 evi-python-api-example/src/devices.py create mode 100644 evi-python-api-example/src/main.py diff --git a/README.md b/README.md index 32b815e..356c971 100644 --- a/README.md +++ b/README.md @@ -12,7 +12,8 @@ This repository contains examples of how to use the [Hume API](https://docs.hume | [`evi-next-js-pages-router`](/evi-next-js-pages-router/README.md) | Typescript | Next.js | | [`evi-typescript-example`](/evi-typescript-example/README.md) | Typescript | | | [`evi-embed-vue`](/evi-embed-vue/README.md) | Typescript | Vue | -| [`evi-python-example`](/evi-python-example/README.md) | Python | | +| [`evi-python-example`](/evi-python-example/README.md) | Python | Hume Python SDK | +| [`evi-python-api-example`](/evi-python-api-example/README.md) | Python | | | [`meld`](/meld/README.md) (`evi-react-example`) | Typescript | React | ## [Expression Measurement API](https://dev.hume.ai/docs/expression-measurement-api/overview) diff --git a/evi-python-api-example/.gitignore b/evi-python-api-example/.gitignore new file mode 100644 index 0000000..d0edad6 --- /dev/null +++ b/evi-python-api-example/.gitignore @@ -0,0 +1,4 @@ +.env +.DS_store +*.pyc +__pycache__/ \ No newline at end of file diff --git a/evi-python-api-example/LICENSE b/evi-python-api-example/LICENSE new file mode 100644 index 0000000..aa7bcd4 --- /dev/null +++ b/evi-python-api-example/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2024 Hume AI + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/evi-python-api-example/README.md b/evi-python-api-example/README.md new file mode 100644 index 0000000..8924b4d --- /dev/null +++ b/evi-python-api-example/README.md @@ -0,0 +1,68 @@ +
+ +

Empathic Voice Interface | Sample Implementation

+

+ Jumpstart your development with Hume's Empathic Voice Interface! +

+
+ +## Overview + +This project features a sample implementation of Hume's [Empathic Voice Interface](https://hume.docs.buildwithfern.com/docs/empathic-voice-interface-evi/overview) using Hume's API with Python in a terminal window. + +## Setting up a virtual environment (optional) + +Before you install the dependencies, you might want to create a virtual environment to isolate your package installations. To create a virtual environment, run the following commands in your terminal: + +```bash +# Create a virtual environment in the directory 'evi-env' +python -m venv evi-env + +# Activate the virtual environment +# On Mac/Linux: +source evi-env/bin/activate +``` + +After activating the virtual environment, you can proceed with the installation of dependencies as described below. + +## Dependencies + +In order to run it, you need to install the `requirements.txt` using `pip`: + +### Mac + +```bash +pip install -r requirements_mac.txt +``` + +### Linux + +```bash +pip install -r requirements_linux.txt +``` + +## Environment variables + +Either create a `.env` file or set environment variables for HUME_API_KEY and HUME_CLIENT_SECRET. + +Example `.env` file: + +```bash +HUME_API_KEY="autj04acqK3jpfERQJpkg7829AATGUEvlAR7SYmmOB5ZZVbZD" +HUME_CLIENT_SECRET="gnxftyfpcagTyB1hgffGis2zHgkUiQZN3Fo46Tj9vMchBBupgAXhM8pamd2Aw9Qd" +``` + +Example terminal commands to set environment variables manually: +```bash +export HUME_API_KEY="autj04acqK3jpfERQJpkg7829AATGUEvlAR7SYmmOB5ZZVbZD" +export HUME_CLIENT_SECRET="gnxftyfpcagTyB1hgffGis2zHgkUiQZN3Fo46Tj9vMchBBupgAXhM8pamd2Aw9Qd" +``` + +## Usage + +```bash +cd src +python main.py +``` + + diff --git a/evi-python-api-example/requirements_linux.txt b/evi-python-api-example/requirements_linux.txt new file mode 100644 index 0000000..2e2521a --- /dev/null +++ b/evi-python-api-example/requirements_linux.txt @@ -0,0 +1,21 @@ +certifi==2024.2.2 +cffi==1.16.0 +charset-normalizer==3.3.2 +gevent==24.2.1 +greenlet==3.0.3 +idna==3.6 +numpy==1.26.4 +playsound==1.3.0 +PyAudio==0.2.14 +pycparser==2.21 +requests==2.31.0 +setuptools==69.1.1 +simpleaudio==1.0.4 +sounddevice==0.4.6 +soundfile +urllib3==2.2.1 +websockets==12.0 +wheel==0.42.0 +zope.event==5.0 +zope.interface==6.2 +python-dotenv diff --git a/evi-python-api-example/requirements_mac.txt b/evi-python-api-example/requirements_mac.txt new file mode 100644 index 0000000..612af3f --- /dev/null +++ b/evi-python-api-example/requirements_mac.txt @@ -0,0 +1,174 @@ +certifi==2024.2.2 +cffi==1.16.0 +charset-normalizer==3.3.2 +gevent==24.2.1 +greenlet==3.0.3 +idna==3.6 +numpy==1.26.4 +playsound==1.3.0 +PyAudio==0.2.14 +pycparser==2.21 +pyobjc==10.1 +pyobjc-core==10.1 +pyobjc-framework-Accessibility==10.1 +pyobjc-framework-Accounts==10.1 +pyobjc-framework-AddressBook==10.1 +pyobjc-framework-AdServices==10.1 +pyobjc-framework-AdSupport==10.1 +pyobjc-framework-AppleScriptKit==10.1 +pyobjc-framework-AppleScriptObjC==10.1 +pyobjc-framework-ApplicationServices==10.1 +pyobjc-framework-AppTrackingTransparency==10.1 +pyobjc-framework-AudioVideoBridging==10.1 +pyobjc-framework-AuthenticationServices==10.1 +pyobjc-framework-AutomaticAssessmentConfiguration==10.1 +pyobjc-framework-Automator==10.1 +pyobjc-framework-AVFoundation==10.1 +pyobjc-framework-AVKit==10.1 +pyobjc-framework-AVRouting==10.1 +pyobjc-framework-BackgroundAssets==10.1 +pyobjc-framework-BusinessChat==10.1 +pyobjc-framework-CalendarStore==10.1 +pyobjc-framework-CallKit==10.1 +pyobjc-framework-CFNetwork==10.1 +pyobjc-framework-Cinematic==10.1 +pyobjc-framework-ClassKit==10.1 +pyobjc-framework-CloudKit==10.1 +pyobjc-framework-Cocoa==10.1 +pyobjc-framework-Collaboration==10.1 +pyobjc-framework-ColorSync==10.1 +pyobjc-framework-Contacts==10.1 +pyobjc-framework-ContactsUI==10.1 +pyobjc-framework-CoreAudio==10.1 +pyobjc-framework-CoreAudioKit==10.1 +pyobjc-framework-CoreBluetooth==10.1 +pyobjc-framework-CoreData==10.1 +pyobjc-framework-CoreHaptics==10.1 +pyobjc-framework-CoreLocation==10.1 +pyobjc-framework-CoreMedia==10.1 +pyobjc-framework-CoreMediaIO==10.1 +pyobjc-framework-CoreMIDI==10.1 +pyobjc-framework-CoreML==10.1 +pyobjc-framework-CoreMotion==10.1 +pyobjc-framework-CoreServices==10.1 +pyobjc-framework-CoreSpotlight==10.1 +pyobjc-framework-CoreText==10.1 +pyobjc-framework-CoreWLAN==10.1 +pyobjc-framework-CryptoTokenKit==10.1 +pyobjc-framework-DataDetection==10.1 +pyobjc-framework-DeviceCheck==10.1 +pyobjc-framework-DictionaryServices==10.1 +pyobjc-framework-DiscRecording==10.1 +pyobjc-framework-DiscRecordingUI==10.1 +pyobjc-framework-DiskArbitration==10.1 +pyobjc-framework-DVDPlayback==10.1 +pyobjc-framework-EventKit==10.1 +pyobjc-framework-ExceptionHandling==10.1 +pyobjc-framework-ExecutionPolicy==10.1 +pyobjc-framework-ExtensionKit==10.1 +pyobjc-framework-ExternalAccessory==10.1 +pyobjc-framework-FileProvider==10.1 +pyobjc-framework-FileProviderUI==10.1 +pyobjc-framework-FinderSync==10.1 +pyobjc-framework-FSEvents==10.1 +pyobjc-framework-GameCenter==10.1 +pyobjc-framework-GameController==10.1 +pyobjc-framework-GameKit==10.1 +pyobjc-framework-GameplayKit==10.1 +pyobjc-framework-HealthKit==10.1 +pyobjc-framework-ImageCaptureCore==10.1 +pyobjc-framework-InputMethodKit==10.1 +pyobjc-framework-InstallerPlugins==10.1 +pyobjc-framework-InstantMessage==10.1 +pyobjc-framework-Intents==10.1 +pyobjc-framework-IntentsUI==10.1 +pyobjc-framework-IOBluetooth==10.1 +pyobjc-framework-IOBluetoothUI==10.1 +pyobjc-framework-IOSurface==10.1 +pyobjc-framework-iTunesLibrary==10.1 +pyobjc-framework-KernelManagement==10.1 +pyobjc-framework-LatentSemanticMapping==10.1 +pyobjc-framework-LaunchServices==10.1 +pyobjc-framework-libdispatch==10.1 +pyobjc-framework-libxpc==10.1 +pyobjc-framework-LinkPresentation==10.1 +pyobjc-framework-LocalAuthentication==10.1 +pyobjc-framework-LocalAuthenticationEmbeddedUI==10.1 +pyobjc-framework-MailKit==10.1 +pyobjc-framework-MapKit==10.1 +pyobjc-framework-MediaAccessibility==10.1 +pyobjc-framework-MediaLibrary==10.1 +pyobjc-framework-MediaPlayer==10.1 +pyobjc-framework-MediaToolbox==10.1 +pyobjc-framework-Metal==10.1 +pyobjc-framework-MetalFX==10.1 +pyobjc-framework-MetalKit==10.1 +pyobjc-framework-MetalPerformanceShaders==10.1 +pyobjc-framework-MetalPerformanceShadersGraph==10.1 +pyobjc-framework-MetricKit==10.1 +pyobjc-framework-MLCompute==10.1 +pyobjc-framework-ModelIO==10.1 +pyobjc-framework-MultipeerConnectivity==10.1 +pyobjc-framework-NaturalLanguage==10.1 +pyobjc-framework-NetFS==10.1 +pyobjc-framework-Network==10.1 +pyobjc-framework-NetworkExtension==10.1 +pyobjc-framework-NotificationCenter==10.1 +pyobjc-framework-OpenDirectory==10.1 +pyobjc-framework-OSAKit==10.1 +pyobjc-framework-OSLog==10.1 +pyobjc-framework-PassKit==10.1 +pyobjc-framework-PencilKit==10.1 +pyobjc-framework-PHASE==10.1 +pyobjc-framework-Photos==10.1 +pyobjc-framework-PhotosUI==10.1 +pyobjc-framework-PreferencePanes==10.1 +pyobjc-framework-PushKit==10.1 +pyobjc-framework-Quartz==10.1 +pyobjc-framework-QuickLookThumbnailing==10.1 +pyobjc-framework-ReplayKit==10.1 +pyobjc-framework-SafariServices==10.1 +pyobjc-framework-SafetyKit==10.1 +pyobjc-framework-SceneKit==10.1 +pyobjc-framework-ScreenCaptureKit==10.1 +pyobjc-framework-ScreenSaver==10.1 +pyobjc-framework-ScreenTime==10.1 +pyobjc-framework-ScriptingBridge==10.1 +pyobjc-framework-SearchKit==10.1 +pyobjc-framework-Security==10.1 +pyobjc-framework-SecurityFoundation==10.1 +pyobjc-framework-SecurityInterface==10.1 +pyobjc-framework-SensitiveContentAnalysis==10.1 +pyobjc-framework-ServiceManagement==10.1 +pyobjc-framework-SharedWithYou==10.1 +pyobjc-framework-SharedWithYouCore==10.1 +pyobjc-framework-ShazamKit==10.1 +pyobjc-framework-Social==10.1 +pyobjc-framework-SoundAnalysis==10.1 +pyobjc-framework-Speech==10.1 +pyobjc-framework-SpriteKit==10.1 +pyobjc-framework-StoreKit==10.1 +pyobjc-framework-Symbols==10.1 +pyobjc-framework-SyncServices==10.1 +pyobjc-framework-SystemConfiguration==10.1 +pyobjc-framework-SystemExtensions==10.1 +pyobjc-framework-ThreadNetwork==10.1 +pyobjc-framework-UniformTypeIdentifiers==10.1 +pyobjc-framework-UserNotifications==10.1 +pyobjc-framework-UserNotificationsUI==10.1 +pyobjc-framework-VideoSubscriberAccount==10.1 +pyobjc-framework-VideoToolbox==10.1 +pyobjc-framework-Virtualization==10.1 +pyobjc-framework-Vision==10.1 +pyobjc-framework-WebKit==10.1 +requests==2.31.0 +setuptools==69.1.1 +simpleaudio==1.0.4 +sounddevice==0.4.6 +soundfile==0.12.1 +urllib3==2.2.1 +websockets==12.0 +wheel==0.42.0 +zope.event==5.0 +zope.interface==6.2 +python-dotenv diff --git a/evi-python-api-example/src/authenticator.py b/evi-python-api-example/src/authenticator.py new file mode 100644 index 0000000..f541198 --- /dev/null +++ b/evi-python-api-example/src/authenticator.py @@ -0,0 +1,71 @@ +# authenticator.py + +import base64 +import requests + + +class Authenticator: + """ + A class to handle authentication with Hume AI's API via OAuth2. + + Attributes: + api_key (str): The API key provided by Hume AI. + client_secret (str): The client secret provided by Hume AI. + host (str): The host URL of the API (default is "test-api.hume.ai"). + """ + + def __init__(self, api_key: str, client_secret: str, host: str = "test-api.hume.ai"): + """ + Initialize the Authenticator with the provided API key, client secret, and host. + + Args: + api_key (str): The API key provided by Hume AI. + client_secret (str): The client secret provided by Hume AI. + host (str, optional): The host URL of the API. Defaults to "test-api.hume.ai". + """ + self.api_key = api_key + self.client_secret = client_secret + self.host = host + + def fetch_access_token(self) -> str: + """ + Fetch an access token from Hume AI's OAuth2 service. + + This method constructs the necessary headers and body for the OAuth2 client credentials + grant, makes the POST request to the OAuth2 token endpoint, and extracts the access token + from the response. + + Returns: + str: The access token. + + Raises: + ValueError: If the access token is not found in the response. + """ + # Prepare the authorization string + auth_string = f"{self.api_key}:{self.client_secret}" + encoded = base64.b64encode(auth_string.encode()).decode() + + # Set up the headers + headers = { + "Content-Type": "application/x-www-form-urlencoded", + "Authorization": f"Basic {encoded}", + } + + # Prepare the body + data = { + "grant_type": "client_credentials", + } + + # Make the POST request to the OAuth2 token endpoint + response = requests.post( + f"https://{self.host}/oauth2-cc/token", headers=headers, data=data + ) + + # Parse the JSON response + data = response.json() + + # Extract the access token, raise an error if not found + if "access_token" not in data: + raise ValueError("Access token not found in response") + + return data["access_token"] diff --git a/evi-python-api-example/src/connection.py b/evi-python-api-example/src/connection.py new file mode 100644 index 0000000..cbcb256 --- /dev/null +++ b/evi-python-api-example/src/connection.py @@ -0,0 +1,205 @@ +# connection.py + +import asyncio +import base64 +import json +import tempfile +import logging +import io +import wave +import numpy as np +import websockets +import soundfile +from playsound import playsound +from pyaudio import Stream as PyAudioStream +from concurrent.futures import ThreadPoolExecutor + +# Set up a thread pool executor for non-blocking audio stream reading +executor = ThreadPoolExecutor(max_workers=1) + +# Configure logging +logging.basicConfig( + format="%(asctime)s - %(levelname)s - %(message)s", level=logging.DEBUG +) + +class Connection: + """ + A class to handle the connection to the WebSocket server for streaming audio data. + """ + + @classmethod + async def connect( + cls, + socket_url: str, + audio_stream: PyAudioStream, + sample_rate: int, + sample_width: int, + num_channels: int, + chunk_size: int, + ): + """ + Establish and maintain a connection to the WebSocket server, handling reconnections as needed. + + Args: + socket_url (str): The URL of the WebSocket server. + audio_stream (PyAudioStream): The PyAudio stream to read audio data from. + sample_rate (int): The sample rate of the audio data. + sample_width (int): The sample width of the audio data. + num_channels (int): The number of audio channels. + chunk_size (int): The size of each audio chunk. + + Raises: + Exception: If any error occurs during WebSocket connection or data transmission. + """ + while True: + try: + async with websockets.connect(socket_url) as socket: + print("Connected to WebSocket") + # Create tasks for sending and receiving audio data + send_task = asyncio.create_task( + cls._send_audio_data( + socket, + audio_stream, + sample_rate, + sample_width, + num_channels, + chunk_size, + ) + ) + receive_task = asyncio.create_task(cls._receive_audio_data(socket)) + # Wait for both tasks to complete + await asyncio.gather(receive_task, send_task) + except websockets.exceptions.ConnectionClosed: + print( + "WebSocket connection closed. Attempting to reconnect in 5 seconds..." + ) + await asyncio.sleep(5) + except Exception as e: + print( + f"An error occurred: {e}. Attempting to reconnect in 5 seconds..." + ) + await asyncio.sleep(5) + + @classmethod + async def _receive_audio_data(cls, socket): + """ + Receive and process audio data from the WebSocket server. + + Args: + socket (WebSocketClientProtocol): The WebSocket connection. + + Raises: + Exception: If any error occurs while receiving or processing audio data. + """ + try: + async for message in socket: + try: + # Attempt to parse the JSON message + json_message = json.loads(message) + print("Received JSON message:", json_message) + + # Check if the message type is 'audio_output' + if json_message.get("type") == "audio_output": + # Decode the base64 audio data + audio_data = base64.b64decode(json_message["data"]) + + # Write the decoded audio data to a temporary file and play it + with tempfile.NamedTemporaryFile(delete=True, suffix=".wav") as tmpfile: + tmpfile.write(audio_data) + tmpfile.flush() # Ensure all data is written to disk + playsound(tmpfile.name) + print("Audio played") + + except ValueError as e: + print(f"Failed to parse JSON, error: {e}") + except KeyError as e: + print(f"Key error in JSON data: {e}") + + except Exception as e: + print(f"An error occurred while receiving audio: {e}") + + @classmethod + async def _read_audio_stream_non_blocking(cls, audio_stream, chunk_size): + """ + Read a chunk of audio data from the PyAudio stream in a non-blocking manner. + + Args: + audio_stream (PyAudioStream): The PyAudio stream to read audio data from. + chunk_size (int): The size of each audio chunk. + + Returns: + bytes: The audio data read from the stream. + """ + loop = asyncio.get_running_loop() + data = await loop.run_in_executor( + executor, audio_stream.read, chunk_size, False + ) + return data + + @classmethod + async def _send_audio_data( + cls, + socket, + audio_stream: PyAudioStream, + sample_rate: int, + sample_width: int, + num_channels: int, + chunk_size: int, + ): + """ + Read audio data from the PyAudio stream and send it to the WebSocket server. + + Args: + socket (WebSocketClientProtocol): The WebSocket connection. + audio_stream (PyAudioStream): The PyAudio stream to read audio data from. + sample_rate (int): The sample rate of the audio data. + sample_width (int): The sample width of the audio data. + num_channels (int): The number of audio channels. + chunk_size (int): The size of each audio chunk. + """ + wav_buffer = io.BytesIO() + headers_sent = False + + while True: + # Read audio data from the stream + data = await cls._read_audio_stream_non_blocking(audio_stream, chunk_size) + if num_channels == 2: # Stereo to mono conversion if stereo is detected + # Assuming the sample width is 2 bytes, hence 'int16' + stereo_data = np.frombuffer(data, dtype=np.int16) + # Averaging every two samples (left and right channels) + mono_data = ((stereo_data[0::2] + stereo_data[1::2]) / 2).astype(np.int16) + data = mono_data.tobytes() + + # Convert audio data to numpy array and write to buffer + np_array = np.frombuffer(data, dtype="int16") + soundfile.write( + wav_buffer, + np_array, + samplerate=sample_rate, + subtype="PCM_16", + format="RAW", + ) + + wav_content = wav_buffer.getvalue() + if not headers_sent: + # Write WAV header if not already sent + header_buffer = io.BytesIO() + with wave.open(header_buffer, "wb") as wf: + wf.setnchannels(num_channels) + wf.setsampwidth(sample_width) + wf.setframerate(sample_rate) + wf.setnframes(chunk_size) + + wf.writeframes(b"") + + headers = header_buffer.getvalue() + wav_content = headers + wav_content + headers_sent = True + + # Encode audio data to base64 and send as JSON message + encoded_audio = base64.b64encode(wav_content).decode('utf-8') + json_message = json.dumps({"type": "audio_input", "data": encoded_audio}) + await socket.send(json_message) + + # Reset buffer for the next chunk of audio data + wav_buffer = io.BytesIO() diff --git a/evi-python-api-example/src/devices.py b/evi-python-api-example/src/devices.py new file mode 100644 index 0000000..d47fdfa --- /dev/null +++ b/evi-python-api-example/src/devices.py @@ -0,0 +1,81 @@ +# devices.py + +from typing import List, Tuple +from pyaudio import PyAudio + +class AudioDevices: + """ + A class to manage and select audio input and output devices using PyAudio. + """ + + @classmethod + def list_audio_devices( + cls, pyaudio: PyAudio + ) -> Tuple[List[Tuple[int, str]], List[Tuple[int, str]]]: + """ + List available audio input and output devices. + + Args: + pyaudio (PyAudio): An instance of PyAudio to interact with the audio system. + + Returns: + Tuple[List[Tuple[int, str]], List[Tuple[int, str]]]: A tuple containing two lists: + - A list of tuples for input devices, each containing the device index, name, and default sample rate. + - A list of tuples for output devices, each containing the device index and name. + """ + # Get host API info and number of devices + info = pyaudio.get_host_api_info_by_index(0) + n_devices = info.get("deviceCount") + + input_devices = [] + output_devices = [] + + # Iterate through all devices and classify them as input or output devices + for i in range(n_devices): + device = pyaudio.get_device_info_by_host_api_device_index(0, i) + if device.get("maxInputChannels") > 0: + input_devices.append( + (i, device.get("name"), int(device.get("defaultSampleRate"))) + ) + if device.get("maxOutputChannels") > 0: + output_devices.append((i, device.get("name"), device)) + + return input_devices, output_devices + + @classmethod + def choose_device(cls, devices, device_type="input"): + """ + Allow the user to select an audio device from a list of available devices. + + Args: + devices (List[Tuple[int, str, int]]): A list of tuples representing the available devices. + device_type (str, optional): The type of device to choose ('input' or 'output'). Defaults to 'input'. + + Returns: + Tuple[int, int] or int: For input devices, returns a tuple containing the chosen device index and sample rate. + For output devices, returns the chosen device index. + """ + if not devices: + print(f"No {device_type} devices found.") + return None + + # Display available devices + print(f"Available {device_type} devices:") + for _, (device_index, name, sample_rate) in enumerate(devices): + print(f"{device_index}: {name}") + + # Prompt the user to select a device by index + while True: + try: + choice = int(input(f"Select {device_type} device by index: ")) + if choice in [d[0] for d in devices]: + if device_type == "input": + return choice, sample_rate + else: + return choice + else: + print( + f"Invalid selection. Please choose a valid {device_type} device index." + ) + except ValueError: + print("Please enter a numerical index.") diff --git a/evi-python-api-example/src/main.py b/evi-python-api-example/src/main.py new file mode 100644 index 0000000..b5ded17 --- /dev/null +++ b/evi-python-api-example/src/main.py @@ -0,0 +1,109 @@ +# main.py + +import asyncio +import os + +from authenticator import Authenticator +from connection import Connection +from devices import AudioDevices +from dotenv import load_dotenv +from pyaudio import PyAudio, paInt16 + +# Audio format and parameters +FORMAT = paInt16 +CHANNELS = 1 +SAMPLE_WIDTH = 2 # PyAudio.get_sample_size(pyaudio, format=paInt16) +CHUNK_SIZE = 1024 + + +async def main(): + """ + Main asynchronous function to set up audio devices, authenticate, and connect to the Hume AI websocket. + """ + # Initialize PyAudio instance + pyaudio = PyAudio() + + # List available audio input and output devices + input_devices, output_devices = AudioDevices.list_audio_devices(pyaudio) + + # Choose the audio input device and get its sample rate + input_device_index, input_device_sample_rate = AudioDevices.choose_device( + input_devices, "input" + ) + + # Choose the audio output device + output_device_index = AudioDevices.choose_device(output_devices, "output") + + # Open the audio stream with the selected parameters + audio_stream = pyaudio.open( + format=FORMAT, + channels=CHANNELS, + frames_per_buffer=CHUNK_SIZE, + rate=input_device_sample_rate, + input=True, + output=True, + input_device_index=input_device_index, + output_device_index=output_device_index, + ) + + # Fetch the access token for authentication + access_token = get_access_token() + + # Construct the websocket URL with the access token + socket_url = ( + "wss://api.hume.ai/v0/assistant/chat?" + f"access_token={access_token}" + ) + + # Connect to the websocket and start the audio stream + await Connection.connect( + socket_url, + audio_stream, + input_device_sample_rate, + SAMPLE_WIDTH, + CHANNELS, + CHUNK_SIZE, + ) + + # Close the PyAudio stream and terminate PyAudio + audio_stream.stop_stream() + audio_stream.close() + pyaudio.terminate() + + +def get_access_token() -> str: + """ + Load API credentials from environment variables and fetch an access token. + + Returns: + str: The access token. + + Raises: + SystemExit: If API key or client secret are not set. + """ + load_dotenv() + + # Attempt to retrieve API key and client secret from environment variables + HUME_API_KEY = os.getenv("HUME_API_KEY") + HUME_CLIENT_SECRET = os.getenv("HUME_CLIENT_SECRET") + + # Ensure API key and client secret are set + if HUME_API_KEY is None or HUME_CLIENT_SECRET is None: + print( + "Error: HUME_API_KEY and HUME_CLIENT_SECRET must be set either in a .env file or as environment variables." + ) + exit() + + # Create an instance of Authenticator with the API key and client secret + authenticator = Authenticator(HUME_API_KEY, HUME_CLIENT_SECRET) + + # Fetch the access token + access_token = authenticator.fetch_access_token() + return access_token + + +if __name__ == "__main__": + """ + Entry point for the script. Runs the main asynchronous function. + """ + asyncio.run(main())