feat(voice_hmi.py): add split_and_publish method to handle message se…

…gmentation for improved message processing fix(voice_hmi.py): replace direct message publishing with split_and_publish to ensure messages are sent as individual sentences feat(tts_clients.py): enhance ElevenLabsClient to include voice settings and validate voice existence during initialization fix(tts_clients.py): correct typo in logging message for synthesizing speech error handling
RobotecAI · Sep 26, 2024 · 3a7a09f · 3a7a09f
1 parent 9e9d269
commit 3a7a09f
Show file tree

Hide file tree

Showing 2 changed files with 21 additions and 3 deletions.
diff --git a/src/rai_hmi/rai_hmi/voice_hmi.py b/src/rai_hmi/rai_hmi/voice_hmi.py
@@ -14,6 +14,7 @@
 #
 
 import logging
+import re
 import threading
 import time
 from queue import Queue
@@ -103,6 +104,12 @@ def __init__(
     def set_agent(self, agent):
         self.agent = agent
 
+    def split_and_publish(self, message: str):
+        sentences = re.split(r"(?<=\.)\s|[:!]", message)
+        for sentence in sentences:
+            if sentence:
+                self.hmi_publisher.publish(String(data=sentence))
+
     def handle_human_message(self, msg: String):
         self.processing = True
         self.get_logger().info("Processing started")
@@ -118,7 +125,7 @@ def handle_human_message(self, msg: String):
                     self.get_logger().info(
                         f'Sending message to human: "{last_message}"'
                     )
-                    self.hmi_publisher.publish(String(data=last_message))
+                    self.split_and_publish(last_message)
 
         self.get_logger().info("Processing finished")
         self.processing = False

diff --git a/src/rai_tts/rai_tts/tts_clients.py b/src/rai_tts/rai_tts/tts_clients.py
@@ -21,6 +21,8 @@
 
 import requests
 from elevenlabs.client import ElevenLabs
+from elevenlabs.types import Voice
+from elevenlabs.types.voice_settings import VoiceSettings
 
 logger = logging.getLogger(__name__)
 
@@ -46,10 +48,19 @@ def save_audio_to_file(audio_data: bytes, suffix: str) -> str:
 class ElevenLabsClient(TTSClient):
     def __init__(self, voice: str, base_url: Optional[str] = None):
         self.base_url = base_url
-        self.voice = voice
         api_key = os.getenv(key="ELEVENLABS_API_KEY")
         self.client = ElevenLabs(base_url=None, api_key=api_key)
 
+        self.voice_settings = VoiceSettings(
+            stability=0.7,
+            similarity_boost=0.5,
+        )
+        voices = self.client.voices.get_all().voices
+        voice_id = next((v.voice_id for v in voices if v.name == voice), None)
+        if voice_id is None:
+            raise ValueError(f"Voice {voice} not found")
+        self.voice = Voice(voice_id=voice_id, settings=self.voice_settings)
+
     def synthesize_speech_to_file(self, text: str) -> str:
         tries = 0
         while tries < TTS_TRIES:
@@ -62,7 +73,7 @@ def synthesize_speech_to_file(self, text: str) -> str:
                 audio_data = b"".join(response)
                 return self.save_audio_to_file(audio_data, suffix=".mp3")
             except Exception as e:
-                logger.warn(f"Error occurred during sythesizing speech: {e}.")  # type: ignore
+                logger.warn(f"Error occurred during synthesizing speech: {e}.")  # type: ignore
                 tries += 1
         audio_data = b"".join(response)
         return self.save_audio_to_file(audio_data, suffix=".mp3")