Cleans up microphone interface example

HumeAI · Sep 12, 2024 · 12648b8 · 12648b8
1 parent d2ab16e
commit 12648b8
Showing 1 changed file with 53 additions and 28 deletions.
diff --git a/evi-python-example/quickstart.py b/evi-python-example/quickstart.py
@@ -6,7 +6,7 @@
 from hume.client import AsyncHumeClient
 from hume.empathic_voice.chat.socket_client import ChatConnectOptions, ChatWebsocketConnection
 from hume.empathic_voice.chat.types import SubscribeEvent
-from hume.empathic_voice.types import UserInput, UserMessage, AssistantMessage
+from hume.empathic_voice.types import UserInput
 from hume.core.api_error import ApiError
 from hume import MicrophoneInterface, Stream
 
@@ -33,14 +33,7 @@ def set_socket(self, socket: ChatWebsocketConnection):
         self.socket = socket
 
     async def on_open(self):
-        """Handle the event when the WebSocket connection is opened.
-        
-        This asynchronous method is a callback that gets triggered when 
-        the WebSocket connection is successfully established. It can be 
-        used to perform any setup tasks required immediately after opening 
-        the connection, such as logging the event or initializing certain 
-        variables.
-        """
+        """Logic invoked when the WebSocket connection is opened."""
         print("WebSocket connection opened.")
 
     async def on_message(self, message: SubscribeEvent):
@@ -61,6 +54,9 @@ async def on_message(self, message: SubscribeEvent):
             data (SubscribeEvent): This represents any type of message that is received through the EVI WebSocket, formatted in JSON. See the full list of messages in the API Reference [here](https://dev.hume.ai/reference/empathic-voice-interface-evi/chat/chat#receive).
         """
 
+        # Create an empty dictionary to store expression inference scores
+        scores = {}
+
         if message.type == "chat_metadata":
             message_type = message.type.upper()
             chat_id = message.chat_id
@@ -70,6 +66,8 @@ async def on_message(self, message: SubscribeEvent):
             role = message.message.role.upper()
             message_text = message.message.content
             text = f"{role}: {message_text}"
+            if message.from_text is False:
+                scores = message.models.prosody.scores
         elif message.type == "audio_output":
             message_str: str = message.data
             message_bytes = base64.b64decode(message_str.encode("utf-8"))
@@ -83,37 +81,30 @@ async def on_message(self, message: SubscribeEvent):
             message_type = message.type.upper()
             text = f"<{message_type}>"
 
-        # Print the formatted message to the terminal
+        # Print the formatted message
         self._print_prompt(text)
+
+        # Extract and print the top 3 emotions inferred from user and assistant expressions
+        if len(scores) > 0:
+            top_3_emotions = self._extract_top_n_emotions(scores, 3)
+            self._print_emotion_scores(top_3_emotions)
 
     async def on_close(self):
-        """Logic invoked when the WebSocket connection is closed.
-        
-        This callback function can be used to perform any cleanup tasks required
-        after closing the connection, such as logging the event or resetting certain variables.
-        """
+        """Logic invoked when the WebSocket connection is closed."""
         print("WebSocket connection closed.")
 
     async def on_error(self, error):
         """Logic invoked when an error occurs in the WebSocket connection.
         
-        It logs the error message, and can be extended to perform additional 
-        error handling such as attempting to reconnect or notifying the user.
-        
         See the full list of errors [here](https://dev.hume.ai/docs/resources/errors).
 
         Args:
             error (Exception): The error that occurred during the WebSocket communication.
         """
         print(f"Error: {error}")
-    
+
     def _print_prompt(self, text: str) -> None:
         """Print a formatted message with a timestamp.
-        
-        This helper method prints the provided text message with a 
-        timestamp. It can be used for logging or displaying messages 
-        received through the WebSocket. The timestamp is in UTC and 
-        formatted as HH:MM:SS.
 
         Args:
             text (str): The message text to be printed.
@@ -122,10 +113,43 @@ def _print_prompt(self, text: str) -> None:
         now_str = now.strftime("%H:%M:%S")
         print(f"[{now_str}] {text}")
 
+    def _extract_top_n_emotions(emotion_scores: dict, n: int) -> dict:
+        """
+        Extract the top N emotions based on confidence scores.
+
+        Args:
+            emotion_scores (dict): A dictionary of emotions and their corresponding confidence scores.
+            n (int): The number of top emotions to extract.
+
+        Returns:
+            dict: A dictionary containing the top N emotions as keys and their raw scores as values.
+        """
+        # Convert the dictionary into a list of tuples and sort by the score in descending order
+        sorted_emotions = sorted(emotion_scores.items(), key=lambda item: item[1], reverse=True)
+
+        # Extract the top N emotions
+        top_n_emotions = {emotion: score for emotion, score in sorted_emotions[:n]}
+
+        return top_n_emotions
+
+    def _print_emotion_scores(emotion_scores: dict) -> None:
+        """
+        Print the emotions and their scores in a formatted, single-line manner.
+
+        Args:
+            emotion_scores (dict): A dictionary of emotions and their corresponding confidence scores.
+        """
+        # Format the output string
+        formatted_emotions = ' | '.join([f"{emotion} ({score:.2f})" for emotion, score in emotion_scores.items()])
+
+        # Print the formatted string
+        print(f"|{formatted_emotions}|")
+
+
 async def sending_handler(socket: ChatWebsocketConnection):
     """Handle sending a message over the socket.
 
-    This method sends a UserInput message, which takes a `text` parameter as input.
+    This method waits 3 seconds and sends a UserInput message, which takes a `text` parameter as input.
     - https://dev.hume.ai/reference/empathic-voice-interface-evi/chat/chat#send.User%20Input.type
     
     See the full list of messages to send [here](https://dev.hume.ai/reference/empathic-voice-interface-evi/chat/chat#send).
@@ -146,12 +170,12 @@ async def main() -> None:
     # Retrieve any environment variables stored in the .env file
     load_dotenv()
 
-    # Retrieve the Hume API key from the environment variables
+    # Retrieve the API key, Secret key, and EVI config id from the environment variables
     HUME_API_KEY = os.getenv("HUME_API_KEY")
     HUME_SECRET_KEY = os.getenv("HUME_SECRET_KEY")
     HUME_CONFIG_ID = os.getenv("HUME_CONFIG_ID")
 
-    # Connect and authenticate with Hume
+    # Initialize the asynchronous client, authenticating with your API key
     client = AsyncHumeClient(api_key=HUME_API_KEY)
 
     # Define options for the WebSocket connection, such as an EVI config id and a secret key for token authentication
@@ -173,7 +197,7 @@ async def main() -> None:
         # Set the socket instance in the handler
         websocket_interface.set_socket(socket)
 
-        # Create an asynchronous task to continuously detect and process input from the microphone
+        # Create an asynchronous task to continuously detect and process input from the microphone, as well as play audio
         microphone_task = asyncio.create_task(MicrophoneInterface.start(socket, allow_user_interrupt=True, byte_stream=websocket_interface.byte_strs))
 
         # Create an asynchronous task to send messages over the WebSocket connection
@@ -182,4 +206,5 @@ async def main() -> None:
         # Schedule the coroutines to occur simultaneously
         await asyncio.gather(microphone_task, message_sending_task)
 
+# Execute the main asynchronous function using asyncio's event loop
 asyncio.run(main())