From 7060df05a44c30e541eb09afd0d090d002214d85 Mon Sep 17 00:00:00 2001
From: kyle gaspar <kyleg.job@gmail.com>
Date: Sat, 3 Aug 2024 21:53:31 -0400
Subject: [PATCH 1/9] added speech recognition

---
 .../terminal_interface/utils/cli_input.py     | 85 ++++++++++++++++---
 1 file changed, 71 insertions(+), 14 deletions(-)

diff --git a/interpreter/terminal_interface/utils/cli_input.py b/interpreter/terminal_interface/utils/cli_input.py
index 891c1c8ab7..e7b44ad451 100644
--- a/interpreter/terminal_interface/utils/cli_input.py
+++ b/interpreter/terminal_interface/utils/cli_input.py
@@ -1,17 +1,74 @@
+import speech_recognition as sr
+import time
+
+speech_mode = False
+
+
+class SpeechRecognizer:
+    def __init__(self, wake_word="jarvis"):
+        self.r = sr.Recognizer()
+        self.mic = sr.Microphone()
+        self.wake_word = wake_word
+
+    def listen_for_speech(self):
+        """Listens for speech and returns the transcribed text."""
+        with self.mic as source:
+            print("Listening...")
+            #            self.r.adjust_for_ambient_noise(source)
+            audio = self.r.listen(source)
+
+        try:
+            text = self.r.recognize_google(audio)
+            text = text[text.find(" ") + 1 :] if " " in text else text # Get rid of activation word
+            print(f"You said: {text}")
+            return text
+        except sr.UnknownValueError:
+            print("Could not understand audio")
+            return ""
+        except sr.RequestError as e:
+            print(
+                f"Could not request results from Google Speech Recognition service; {e}"
+            )
+            return ""
+
+
 def cli_input(prompt: str = "") -> str:
+    global speech_mode
+
     start_marker = '"""'
     end_marker = '"""'
-    message = input(prompt)
-
-    # Multi-line input mode
-    if start_marker in message:
-        lines = [message]
-        while True:
-            line = input()
-            lines.append(line)
-            if end_marker in line:
-                break
-        return "\n".join(lines)
-
-    # Single-line input mode
-    return message
+
+    while True:
+        if speech_mode:
+            recognizer = SpeechRecognizer()
+            text = recognizer.listen_for_speech()
+            if text == "exit":
+                print("Exiting speech recognition mode.")
+                speech_mode = False
+            elif text:
+                return text
+        else:
+            message = input(prompt)
+            # Speech recognition trigger
+            if message == ">":
+                speech_mode = True
+                continue  # Go back to the beginning of the loop for speech input
+
+            # Multi-line input mode
+            if start_marker in message:
+                lines = [message]
+                while True:
+                    line = input()
+                    lines.append(line)
+                    if end_marker in line:
+                        break
+                return "\n".join(lines)
+
+            # Single-line input mode
+            return message
+
+
+if __name__ == "__main__":
+    while True:
+        user_input = cli_input("Enter text or '>' for speech input: ")
+        print(f"You entered: {user_input}")

From 90f331d16a3fa6b8ddeca4b78d90cfdcba40a699 Mon Sep 17 00:00:00 2001
From: kyle gaspar <kyleg.job@gmail.com>
Date: Sun, 4 Aug 2024 11:51:55 -0400
Subject: [PATCH 2/9] add vox to y/n code execution

---
 .../terminal_interface/terminal_interface.py  | 19 ++++++++++++++++---
 1 file changed, 16 insertions(+), 3 deletions(-)

diff --git a/interpreter/terminal_interface/terminal_interface.py b/interpreter/terminal_interface/terminal_interface.py
index 9b9f6fd7e3..6ae1ce6b1d 100644
--- a/interpreter/terminal_interface/terminal_interface.py
+++ b/interpreter/terminal_interface/terminal_interface.py
@@ -213,9 +213,22 @@ def terminal_interface(interpreter, message):
                                 "Would you like to run this code? (y/n)\n\n"
                             )
                         else:
-                            response = input(
-                                "  Would you like to run this code? (y/n)\n\n  "
-                            )
+                            print("  Would you like to run this code? (yes/no)\n\n  ")
+                            # response = input(
+                            #     "  666Would you like to run this code? (y/n)\n\n  "
+                            # )
+                            response = cli_input().strip().lower()
+
+                        print(f"-->{response}<--")
+
+                        if response == 'yes':
+                            response = 'y'
+                        if response == 'edit':
+                            response = 'e'
+                        if response == 'no':
+                            response = 'n'
+                            
+
                         print("")  # <- Aesthetic choice
 
                         if response.strip().lower() == "y":

From 579022cd71f6c6ac13eefdb4f0f287a8a670cefe Mon Sep 17 00:00:00 2001
From: kyle gaspar <kyleg.job@gmail.com>
Date: Sun, 4 Aug 2024 11:52:48 -0400
Subject: [PATCH 3/9] catch speech library import error

---
 .../terminal_interface/utils/cli_input.py       | 17 ++++++++++++++++-
 1 file changed, 16 insertions(+), 1 deletion(-)

diff --git a/interpreter/terminal_interface/utils/cli_input.py b/interpreter/terminal_interface/utils/cli_input.py
index e7b44ad451..3fa2915843 100644
--- a/interpreter/terminal_interface/utils/cli_input.py
+++ b/interpreter/terminal_interface/utils/cli_input.py
@@ -1,19 +1,34 @@
-import speech_recognition as sr
+#import speech_recognition as sr
 import time
+import sys
 
 speech_mode = False
 
 
 class SpeechRecognizer:
     def __init__(self, wake_word="jarvis"):
+        global sr
+        self.import_library()
         self.r = sr.Recognizer()
         self.mic = sr.Microphone()
         self.wake_word = wake_word
 
+    def import_library(self):
+        """Check if the required libraries are installed."""
+        global sr
+        try:
+            import speech_recognition as sr
+        except ImportError:
+            print("Please install the SpeechRecognition and pyaudio libraries.")
+            if sys.platform == "darwin":
+                print("On Mac, install portaudio with: brew install portaudio") 
+            print("pip install SpeechRecognition pyaudio")
+
     def listen_for_speech(self):
         """Listens for speech and returns the transcribed text."""
         with self.mic as source:
             print("Listening...")
+            # This might be good. More testing needed. Seemed to work better without it.
             #            self.r.adjust_for_ambient_noise(source)
             audio = self.r.listen(source)
 

From cffcd9a725823a70b0fe032397ad5f3199f09c2c Mon Sep 17 00:00:00 2001
From: kyle gaspar <kyleg.job@gmail.com>
Date: Mon, 5 Aug 2024 07:25:56 -0400
Subject: [PATCH 4/9] code/namespace cleanup. added documentation

---
 .../terminal_interface/terminal_interface.py  | 27 ++++---
 .../terminal_interface/utils/cli_input.py     | 76 +++++++++++++------
 2 files changed, 67 insertions(+), 36 deletions(-)

diff --git a/interpreter/terminal_interface/terminal_interface.py b/interpreter/terminal_interface/terminal_interface.py
index 6ae1ce6b1d..866f68859b 100644
--- a/interpreter/terminal_interface/terminal_interface.py
+++ b/interpreter/terminal_interface/terminal_interface.py
@@ -213,21 +213,24 @@ def terminal_interface(interpreter, message):
                                 "Would you like to run this code? (y/n)\n\n"
                             )
                         else:
-                            print("  Would you like to run this code? (yes/no)\n\n  ")
+                            # print("  Would you like to run this code? (yes/no)\n\n  ")
                             # response = input(
                             #     "  666Would you like to run this code? (y/n)\n\n  "
                             # )
-                            response = cli_input().strip().lower()
-
-                        print(f"-->{response}<--")
-
-                        if response == 'yes':
-                            response = 'y'
-                        if response == 'edit':
-                            response = 'e'
-                        if response == 'no':
-                            response = 'n'
-                            
+                            # edit isn't in original prompt, but there's a branch for it just below here. Wonder what it's for.
+                            response = (
+                                cli_input(
+                                    "  Would you like to run this code? (yes/no)\n\n  "
+                                )
+                                .strip()
+                                .lower()
+                            )
+                        if response == "yes":
+                            response = "y"
+                        if response == "edit":
+                            response = "e"
+                        if response == "no":
+                            response = "n"
 
                         print("")  # <- Aesthetic choice
 
diff --git a/interpreter/terminal_interface/utils/cli_input.py b/interpreter/terminal_interface/utils/cli_input.py
index 3fa2915843..02b66e163b 100644
--- a/interpreter/terminal_interface/utils/cli_input.py
+++ b/interpreter/terminal_interface/utils/cli_input.py
@@ -1,72 +1,100 @@
-#import speech_recognition as sr
-import time
 import sys
 
-speech_mode = False
+"""Return input from keyboard or speech recognition."""
 
 
 class SpeechRecognizer:
+    """Handle speech recognition using google. You must enable the API in the google cloud console."""
+
+    """Join chroma-dev group when logged into gmail: https://groups.google.com/a/chromium.org/g/chromium-dev"""
+    """Create project. Go to APIs and services -> Library. Search for speech. Enable Speech API."""
+    """Go to API manager -> Credentials and create an API key."""
+
     def __init__(self, wake_word="jarvis"):
-        global sr
-        self.import_library()
-        self.r = sr.Recognizer()
-        self.mic = sr.Microphone()
+        self.speech_mode = False
+        self.imported = False
         self.wake_word = wake_word
 
-    def import_library(self):
-        """Check if the required libraries are installed."""
-        global sr
+    def speak(self, val=None) -> bool:
+        """Set speech mode. Called with no argument, return current value. Called with an argument, sets value."""
+        if val == None:
+            return self.speech_mode
+        self.speech_mode = val
+        return self.speech_mode
+
+    def import_library(self) -> bool:
+        """Check if the required libraries are installed, if not, load them and return loaded status."""
+        if self.imported:
+            return True
         try:
             import speech_recognition as sr
+
+            self.sr = sr
+            self.r = sr.Recognizer()
+            self.mic = sr.Microphone()
+            self.imported = True
+            return True
         except ImportError:
-            print("Please install the SpeechRecognition and pyaudio libraries.")
+            print(
+                "Please install the SpeechRecognition and pyaudio libraries by executing the following commands:"
+            )
             if sys.platform == "darwin":
-                print("On Mac, install portaudio with: brew install portaudio") 
+                print("brew install portaudio")
             print("pip install SpeechRecognition pyaudio")
+            return False
 
-    def listen_for_speech(self):
+    def listen_for_speech(self) -> str:
         """Listens for speech and returns the transcribed text."""
         with self.mic as source:
             print("Listening...")
-            # This might be good. More testing needed. Seemed to work better without it.
-            #            self.r.adjust_for_ambient_noise(source)
+            # This might be good. More testing needed. Might work better without it.
+            self.r.adjust_for_ambient_noise(source)
             audio = self.r.listen(source)
 
         try:
             text = self.r.recognize_google(audio)
-            text = text[text.find(" ") + 1 :] if " " in text else text # Get rid of activation word
+            text = (
+                text[text.find(" ") + 1 :] if " " in text else text
+            )  # Get rid of activation word
             print(f"You said: {text}")
             return text
-        except sr.UnknownValueError:
+        except self.sr.UnknownValueError:
             print("Could not understand audio")
             return ""
-        except sr.RequestError as e:
+        except self.sr.RequestError as e:
             print(
                 f"Could not request results from Google Speech Recognition service; {e}"
             )
             return ""
 
 
-def cli_input(prompt: str = "") -> str:
-    global speech_mode
+recognizer = SpeechRecognizer()
 
+
+def cli_input(prompt: str = "") -> str:
+    """Return user input from keyboard or speech."""
+    global recognizer
     start_marker = '"""'
     end_marker = '"""'
 
     while True:
-        if speech_mode:
-            recognizer = SpeechRecognizer()
+        if recognizer.speak():
+            print(prompt)
             text = recognizer.listen_for_speech()
             if text == "exit":
                 print("Exiting speech recognition mode.")
-                speech_mode = False
+                recognizer.speak(False)
             elif text:
                 return text
         else:
             message = input(prompt)
             # Speech recognition trigger
             if message == ">":
-                speech_mode = True
+                if recognizer.import_library():
+                    recognizer.speak(True)
+                    continue
+                recognizer.import_library()
+                recognizer.speak(True)
                 continue  # Go back to the beginning of the loop for speech input
 
             # Multi-line input mode

From 3b9b77eeae9eafb10bc62e24c7ebcb6a72818d6e Mon Sep 17 00:00:00 2001
From: kyle gaspar <kyleg.job@gmail.com>
Date: Mon, 5 Aug 2024 09:01:28 -0400
Subject: [PATCH 5/9] code cleanup; push string cleaning up execution chain.

---
 interpreter/terminal_interface/terminal_interface.py | 9 +++------
 1 file changed, 3 insertions(+), 6 deletions(-)

diff --git a/interpreter/terminal_interface/terminal_interface.py b/interpreter/terminal_interface/terminal_interface.py
index 866f68859b..66b6c278f9 100644
--- a/interpreter/terminal_interface/terminal_interface.py
+++ b/interpreter/terminal_interface/terminal_interface.py
@@ -212,11 +212,8 @@ def terminal_interface(interpreter, message):
                             response = input(
                                 "Would you like to run this code? (y/n)\n\n"
                             )
+                            response = response.strip().lower()
                         else:
-                            # print("  Would you like to run this code? (yes/no)\n\n  ")
-                            # response = input(
-                            #     "  666Would you like to run this code? (y/n)\n\n  "
-                            # )
                             # edit isn't in original prompt, but there's a branch for it just below here. Wonder what it's for.
                             response = (
                                 cli_input(
@@ -234,14 +231,14 @@ def terminal_interface(interpreter, message):
 
                         print("")  # <- Aesthetic choice
 
-                        if response.strip().lower() == "y":
+                        if response == "y":
                             # Create a new, identical block where the code will actually be run
                             # Conveniently, the chunk includes everything we need to do this:
                             active_block = CodeBlock(interpreter)
                             active_block.margin_top = False  # <- Aesthetic choice
                             active_block.language = language
                             active_block.code = code
-                        elif response.strip().lower() == "e":
+                        elif response == "e":
                             # Edit
 
                             # Create a temporary file

From b8a4eab58f63574c60fc34e4bb6a9ce02ce81448 Mon Sep 17 00:00:00 2001
From: kyle gaspar <kyleg.job@gmail.com>
Date: Mon, 5 Aug 2024 10:30:55 -0400
Subject: [PATCH 6/9] Informational messages such as speech not understood will
 stay on one line instead of continuously scrolling the display

---
 .../terminal_interface/terminal_interface.py  |  2 +-
 .../terminal_interface/utils/cli_input.py     | 25 +++++++++++++------
 2 files changed, 18 insertions(+), 9 deletions(-)

diff --git a/interpreter/terminal_interface/terminal_interface.py b/interpreter/terminal_interface/terminal_interface.py
index 66b6c278f9..e336f7f672 100644
--- a/interpreter/terminal_interface/terminal_interface.py
+++ b/interpreter/terminal_interface/terminal_interface.py
@@ -217,7 +217,7 @@ def terminal_interface(interpreter, message):
                             # edit isn't in original prompt, but there's a branch for it just below here. Wonder what it's for.
                             response = (
                                 cli_input(
-                                    "  Would you like to run this code? (yes/no)\n\n  "
+                                    "  Would you like to run this code? (yes/no)  "
                                 )
                                 .strip()
                                 .lower()
diff --git a/interpreter/terminal_interface/utils/cli_input.py b/interpreter/terminal_interface/utils/cli_input.py
index 02b66e163b..00169365ee 100644
--- a/interpreter/terminal_interface/utils/cli_input.py
+++ b/interpreter/terminal_interface/utils/cli_input.py
@@ -1,4 +1,5 @@
 import sys
+import time
 
 """Return input from keyboard or speech recognition."""
 
@@ -40,13 +41,17 @@ def import_library(self) -> bool:
             )
             if sys.platform == "darwin":
                 print("brew install portaudio")
+            if sys.platform == "linux":
+                print("sudo apt install python3-pyaudio")
+                print("If that doesn't work, you may need to install portaudio19 from source:")
+                print("https://www.portaudio.com/ then ./configure && make && make install.")
             print("pip install SpeechRecognition pyaudio")
             return False
 
-    def listen_for_speech(self) -> str:
+    def listen(self) -> str:
         """Listens for speech and returns the transcribed text."""
         with self.mic as source:
-            print("Listening...")
+            print("Listening...", end='', flush=True)
             # This might be good. More testing needed. Might work better without it.
             self.r.adjust_for_ambient_noise(source)
             audio = self.r.listen(source)
@@ -56,14 +61,16 @@ def listen_for_speech(self) -> str:
             text = (
                 text[text.find(" ") + 1 :] if " " in text else text
             )  # Get rid of activation word
-            print(f"You said: {text}")
+            print(f"\rYou said: {text}")
             return text
         except self.sr.UnknownValueError:
-            print("Could not understand audio")
+            print("\rCould not understand audio." + " " * 30 + "\r", end='', flush=True)
+            time.sleep(2)
+            print("\r" + " " * 30 + "\r", end='', flush=True)  # Clear the line
             return ""
         except self.sr.RequestError as e:
             print(
-                f"Could not request results from Google Speech Recognition service; {e}"
+                f"\rCould not request results from Google Speech Recognition service; {e}"
             )
             return ""
 
@@ -79,13 +86,15 @@ def cli_input(prompt: str = "") -> str:
 
     while True:
         if recognizer.speak():
-            print(prompt)
-            text = recognizer.listen_for_speech()
+            print(prompt, end='', flush=True)
+            text = recognizer.listen()
             if text == "exit":
-                print("Exiting speech recognition mode.")
+                print("\rExiting speech recognition mode.")
                 recognizer.speak(False)
             elif text:
                 return text
+            else:
+                print("\r" + " " * 30 + "\r", end='', flush=True)  # Clear the line
         else:
             message = input(prompt)
             # Speech recognition trigger

From 4f4204a1949def53ddce71d3c11b1585d58f9fa2 Mon Sep 17 00:00:00 2001
From: kyle gaspar <kyleg.job@gmail.com>
Date: Mon, 5 Aug 2024 13:49:00 -0400
Subject: [PATCH 7/9] cleanup

---
 interpreter/terminal_interface/utils/cli_input.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/interpreter/terminal_interface/utils/cli_input.py b/interpreter/terminal_interface/utils/cli_input.py
index 00169365ee..ad186fbba2 100644
--- a/interpreter/terminal_interface/utils/cli_input.py
+++ b/interpreter/terminal_interface/utils/cli_input.py
@@ -61,7 +61,7 @@ def listen(self) -> str:
             text = (
                 text[text.find(" ") + 1 :] if " " in text else text
             )  # Get rid of activation word
-            print(f"\rYou said: {text}")
+            print(f"\rYou said: {text}" + " " * 30)
             return text
         except self.sr.UnknownValueError:
             print("\rCould not understand audio." + " " * 30 + "\r", end='', flush=True)

From dd650f339a07f87c9f34407d9dde6d18123ca1c0 Mon Sep 17 00:00:00 2001
From: kyle gaspar <kyleg.job@gmail.com>
Date: Tue, 6 Aug 2024 08:44:37 -0400
Subject: [PATCH 8/9] got rid of wake word

---
 interpreter/terminal_interface/utils/cli_input.py | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/interpreter/terminal_interface/utils/cli_input.py b/interpreter/terminal_interface/utils/cli_input.py
index ad186fbba2..49fad921ca 100644
--- a/interpreter/terminal_interface/utils/cli_input.py
+++ b/interpreter/terminal_interface/utils/cli_input.py
@@ -11,10 +11,9 @@ class SpeechRecognizer:
     """Create project. Go to APIs and services -> Library. Search for speech. Enable Speech API."""
     """Go to API manager -> Credentials and create an API key."""
 
-    def __init__(self, wake_word="jarvis"):
+    def __init__(self):
         self.speech_mode = False
         self.imported = False
-        self.wake_word = wake_word
 
     def speak(self, val=None) -> bool:
         """Set speech mode. Called with no argument, return current value. Called with an argument, sets value."""
@@ -58,9 +57,6 @@ def listen(self) -> str:
 
         try:
             text = self.r.recognize_google(audio)
-            text = (
-                text[text.find(" ") + 1 :] if " " in text else text
-            )  # Get rid of activation word
             print(f"\rYou said: {text}" + " " * 30)
             return text
         except self.sr.UnknownValueError:

From a0758fc4a9c6baada511a3dcad969b80b45cad88 Mon Sep 17 00:00:00 2001
From: kyle gaspar <kyleg.job@gmail.com>
Date: Tue, 6 Aug 2024 09:17:44 -0400
Subject: [PATCH 9/9] added > to enable speech recognnition

---
 README.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/README.md b/README.md
index 37651c7133..6a39ecfaec 100644
--- a/README.md
+++ b/README.md
@@ -284,6 +284,7 @@ In the interactive mode, you can use the below commands to enhance your experien
 - `%undo`: Removes the previous user message and the AI's response from the message history.
 - `%tokens [prompt]`: (_Experimental_) Calculate the tokens that will be sent with the next prompt as context and estimate their cost. Optionally calculate the tokens and estimated cost of a `prompt` if one is provided. Relies on [LiteLLM's `cost_per_token()` method](https://docs.litellm.ai/docs/completion/token_usage#2-cost_per_token) for estimated costs.
 - `%help`: Show the help message.
+- `>`: Start speech recognition mode using google's speech recognition API. Saying `exit` returns to text mode. Requires an internet connection and enabling the [speech library](https://nerdvittles.com/creating-an-api-key-for-google-speech-recognition/).
 
 ### Configuration / Profiles