Some fixes to audio playing. Minor UI fixes.

paulovcmedeiros · Mar 8, 2024 · 747291c · 747291c
2 parents c07208d + e4f9e64
commit 747291c
Show file tree

Hide file tree

Showing 8 changed files with 37 additions and 32 deletions.
diff --git a/pyproject.toml b/pyproject.toml
@@ -7,7 +7,7 @@
   license = "MIT"
   name = "pyrobbot"
   readme = "README.md"
-  version = "0.7.5"
+  version = "0.7.6"
 
 [build-system]
   build-backend = "poetry.core.masonry.api"

diff --git a/pyrobbot/app/app_page_templates.py b/pyrobbot/app/app_page_templates.py
@@ -160,7 +160,7 @@ def render_custom_audio_player(
         b64 = base64.b64encode(data).decode()
         md = f"""
                 <audio controls {autoplay} {hidden} preload="metadata">
-                <source src="data:audio/mp3;base64,{b64}#" type="audio/mp3">
+                <source src="data:audio/mp3;base64,{b64}#" type="audio/mpeg">
                 </audio>
                 """
         parent_element = parent_element or st
@@ -259,7 +259,10 @@ def render_chat_history(self):
                 continue
             with st.chat_message(role, avatar=self.avatars.get(role)):
                 with contextlib.suppress(KeyError):
-                    st.caption(f"{message['chat_model']}, {message['timestamp']}")
+                    if role == "assistant":
+                        st.caption(message["chat_model"])
+                    else:
+                        st.caption(message["timestamp"])
                 st.markdown(message["content"])
                 with contextlib.suppress(KeyError):
                     if audio := message.get("reply_audio_file_path"):
@@ -285,7 +288,7 @@ def voice_output(self) -> bool:
         """Return the state of the voice output toggle."""
         return st.session_state.get("toggle_voice_output", False)
 
-    def play_chime(self, chime_type: str = "correct-answer-tone", parent_element=None):
+    def play_chime(self, chime_type: str = "success", parent_element=None):
         """Sound a chime to send notificatons to the user."""
         chime = load_chime(chime_type)
         self.render_custom_audio_player(
@@ -294,13 +297,12 @@ def play_chime(self, chime_type: str = "correct-answer-tone", parent_element=Non
 
     def render_title(self):
         """Render the title of the chatbot page."""
-        with st.container(height=70, border=False):
+        with st.container(height=145, border=False):
             self.title_container = st.empty()
-        with st.container(height=50, border=False):
+            self.title_container.subheader(self.title, divider="rainbow")
             left, _ = st.columns([0.7, 0.3])
             with left:
                 self.status_msg_container = st.empty()
-        self.title_container.subheader(self.title, divider="rainbow")
 
     @property
     def direct_text_prompt(self):
@@ -310,13 +312,14 @@ def direct_text_prompt(self):
         )
         text_from_manual_audio_recorder = ""
         with st.container():
-            left, right = st.columns([0.95, 0.05])
+            left, right = st.columns([0.9, 0.1])
             with left:
                 text_from_chat_input_widget = st.chat_input(placeholder=placeholder)
             with right:
                 if not st.session_state.get("toggle_continuous_voice_input"):
                     audio = self.manual_switch_mic_recorder()
                     text_from_manual_audio_recorder = self.chat_obj.stt(audio).text
+
         return text_from_chat_input_widget or text_from_manual_audio_recorder
 
     @property
@@ -334,7 +337,7 @@ def continuous_text_prompt(self):
 
         logger.debug("Running on continuous audio prompt. Waiting user input...")
         with self.status_msg_container:
-            self.play_chime()
+            self.play_chime(chime_type="warning")
             with st.spinner(f"{self.chat_obj.assistant_name} is listening..."):
                 while True:
                     with self.parent.text_prompt_queue.mutex:
@@ -361,7 +364,7 @@ def _render_chatbot_page(self):  # noqa: PLR0915
         self.chat_obj.reply_only_as_text = not self.voice_output
 
         self.render_title()
-        chat_msgs_container = st.container(height=600, border=False)
+        chat_msgs_container = st.container(height=550, border=False)
         with chat_msgs_container:
             self.render_chat_history()
 
@@ -377,7 +380,7 @@ def _render_chatbot_page(self):  # noqa: PLR0915
             self.parent.reply_ongoing.set()
 
             if continuous_stt_prompt:
-                self.play_chime("option-select")
+                self.play_chime("success")
                 self.status_msg_container.success("Got your message!")
                 time.sleep(0.5)
         elif continuous_stt_prompt:
@@ -403,7 +406,6 @@ def _render_chatbot_page(self):  # noqa: PLR0915
                             "name": self.chat_obj.username,
                             "content": prompt,
                             "timestamp": time_now,
-                            "chat_model": self.chat_obj.model,
                         }
                     )
 
@@ -418,6 +420,7 @@ def _render_chatbot_page(self):  # noqa: PLR0915
                                 "name": self.chat_obj.assistant_name,
                                 "content": reply["text"],
                                 "reply_audio_file_path": reply["audio"],
+                                "chat_model": self.chat_obj.model,
                             }
                         )
 

diff --git a/pyrobbot/app/app_utils.py b/pyrobbot/app/app_utils.py
@@ -215,12 +215,6 @@ def get_avatar_images():
 @st.cache_data
 def load_chime(chime_type: str) -> AudioSegment:
     """Load a chime sound from the data directory."""
-    type2filename = {
-        "correct-answer-tone": "mixkit-correct-answer-tone-2870.wav",
-        "option-select": "mixkit-interface-option-select-2573.wav",
-    }
-
     return AudioSegment.from_file(
-        GeneralDefinitions.APP_DIR / "data" / type2filename[chime_type],
-        format="wav",
+        GeneralDefinitions.APP_DIR / "data" / f"{chime_type}.wav", format="wav"
     )
diff --git a/pyrobbot/app/data/mixkit-correct-answer-tone-2870.wav b/pyrobbot/app/data/mixkit-correct-answer-tone-2870.wav
diff --git a/pyrobbot/app/data/mixkit-interface-option-select-2573.wav b/pyrobbot/app/data/mixkit-interface-option-select-2573.wav
diff --git a/pyrobbot/app/data/success.wav b/pyrobbot/app/data/success.wav
diff --git a/pyrobbot/app/data/warning.wav b/pyrobbot/app/data/warning.wav
diff --git a/pyrobbot/voice_chat.py b/pyrobbot/voice_chat.py
@@ -57,18 +57,6 @@ def __init__(self, configs: VoiceChatConfigs = default_configs, **kwargs):
 
         self.block_size = int((self.sample_rate * self.frame_duration) / 1000)
 
-        self.mixer = pygame.mixer
-        try:
-            self.mixer.init(
-                frequency=self.sample_rate, channels=1, buffer=self.block_size
-            )
-        except pygame.error as error:
-            logger.exception(error)
-            logger.error(
-                "Can't initialize the mixer. Please check your system's audio settings."
-            )
-            logger.warning("Voice chat may not be available or may not work as expected.")
-
         self.vad = webrtcvad.Vad(2)
 
         self.default_chime_theme = "big-sur"
@@ -113,6 +101,26 @@ def __init__(self, configs: VoiceChatConfigs = default_configs, **kwargs):
             daemon=True,
         )
 
+    @property
+    def mixer(self):
+        """Return the mixer object."""
+        mixer = getattr(self, "_mixer", None)
+        if mixer is not None:
+            return mixer
+
+        self._mixer = pygame.mixer
+        try:
+            self.mixer.init(
+                frequency=self.sample_rate, channels=1, buffer=self.block_size
+            )
+        except pygame.error as error:
+            logger.exception(error)
+            logger.error(
+                "Can't initialize the mixer. Please check your system's audio settings."
+            )
+            logger.warning("Voice chat may not be available or may not work as expected.")
+        return self._mixer
+
     def start(self):
         """Start the chat."""
         # ruff: noqa: T201
@@ -524,7 +532,7 @@ def _assistant_still_replying(self):
 def _check_needed_imports():
     """Check if the needed modules are available."""
     if not _sounddevice_imported:
-        logger.error(
+        logger.warning(
             "Module `sounddevice`, needed for local audio recording, is not available."
         )