Skip to content

Commit

Permalink
feat(voice_hmi.py): add split_and_publish method to handle message se…
Browse files Browse the repository at this point in the history
…gmentation for improved message processing

fix(voice_hmi.py): replace direct message publishing with split_and_publish to ensure messages are sent as individual sentences
feat(tts_clients.py): enhance ElevenLabsClient to include voice settings and validate voice existence during initialization
fix(tts_clients.py): correct typo in logging message for synthesizing speech error handling
  • Loading branch information
maciejmajek committed Sep 26, 2024
1 parent 9e9d269 commit 3a7a09f
Show file tree
Hide file tree
Showing 2 changed files with 21 additions and 3 deletions.
9 changes: 8 additions & 1 deletion src/rai_hmi/rai_hmi/voice_hmi.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
#

import logging
import re
import threading
import time
from queue import Queue
Expand Down Expand Up @@ -103,6 +104,12 @@ def __init__(
def set_agent(self, agent):
self.agent = agent

def split_and_publish(self, message: str):
sentences = re.split(r"(?<=\.)\s|[:!]", message)
for sentence in sentences:
if sentence:
self.hmi_publisher.publish(String(data=sentence))

def handle_human_message(self, msg: String):
self.processing = True
self.get_logger().info("Processing started")
Expand All @@ -118,7 +125,7 @@ def handle_human_message(self, msg: String):
self.get_logger().info(
f'Sending message to human: "{last_message}"'
)
self.hmi_publisher.publish(String(data=last_message))
self.split_and_publish(last_message)

self.get_logger().info("Processing finished")
self.processing = False
Expand Down
15 changes: 13 additions & 2 deletions src/rai_tts/rai_tts/tts_clients.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,8 @@

import requests
from elevenlabs.client import ElevenLabs
from elevenlabs.types import Voice
from elevenlabs.types.voice_settings import VoiceSettings

logger = logging.getLogger(__name__)

Expand All @@ -46,10 +48,19 @@ def save_audio_to_file(audio_data: bytes, suffix: str) -> str:
class ElevenLabsClient(TTSClient):
def __init__(self, voice: str, base_url: Optional[str] = None):
self.base_url = base_url
self.voice = voice
api_key = os.getenv(key="ELEVENLABS_API_KEY")
self.client = ElevenLabs(base_url=None, api_key=api_key)

self.voice_settings = VoiceSettings(
stability=0.7,
similarity_boost=0.5,
)
voices = self.client.voices.get_all().voices
voice_id = next((v.voice_id for v in voices if v.name == voice), None)
if voice_id is None:
raise ValueError(f"Voice {voice} not found")
self.voice = Voice(voice_id=voice_id, settings=self.voice_settings)

def synthesize_speech_to_file(self, text: str) -> str:
tries = 0
while tries < TTS_TRIES:
Expand All @@ -62,7 +73,7 @@ def synthesize_speech_to_file(self, text: str) -> str:
audio_data = b"".join(response)
return self.save_audio_to_file(audio_data, suffix=".mp3")
except Exception as e:
logger.warn(f"Error occurred during sythesizing speech: {e}.") # type: ignore
logger.warn(f"Error occurred during synthesizing speech: {e}.") # type: ignore
tries += 1
audio_data = b"".join(response)
return self.save_audio_to_file(audio_data, suffix=".mp3")
Expand Down

0 comments on commit 3a7a09f

Please sign in to comment.