[DEMOS][TESTS]

kyegomez · Dec 24, 2023 · fd58cfa · fd58cfa
1 parent f39d722
commit fd58cfa
Show file tree

Hide file tree

Showing 10 changed files with 108 additions and 352 deletions.
diff --git a/playground/demos/personal_assistant/better_communication.py b/playground/demos/personal_assistant/better_communication.py
@@ -0,0 +1,96 @@
+import time
+import os
+
+import pygame
+import speech_recognition as sr
+from dotenv import load_dotenv
+from playsound import playsound
+
+from swarms import OpenAIChat, OpenAITTS
+
+# Load the environment variables
+load_dotenv()
+
+# Get the API key from the environment
+openai_api_key = os.environ.get("OPENAI_API_KEY")
+
+# Initialize the language model
+llm = OpenAIChat(
+    openai_api_key=openai_api_key,
+)
+
+# Initialize the text-to-speech model
+tts = OpenAITTS(
+    model_name="tts-1-1106",
+    voice="onyx",
+    openai_api_key=openai_api_key,
+    saved_filepath="runs/tts_speech.wav",
+)
+
+# Initialize the speech recognition model
+r = sr.Recognizer()
+
+
+def play_audio(file_path):
+    # Check if the file exists
+    if not os.path.isfile(file_path):
+        print(f"Audio file {file_path} not found.")
+        return
+
+    # Initialize the mixer module
+    pygame.mixer.init()
+
+    try:
+        # Load the mp3 file
+        pygame.mixer.music.load(file_path)
+
+        # Play the mp3 file
+        pygame.mixer.music.play()
+
+        # Wait for the audio to finish playing
+        while pygame.mixer.music.get_busy():
+            pygame.time.Clock().tick(10)
+    except pygame.error as e:
+        print(f"Couldn't play {file_path}: {e}")
+    finally:
+        # Stop the mixer module and free resources
+        pygame.mixer.quit()
+
+while True:
+    # Listen for user speech
+    with sr.Microphone() as source:
+        print("Listening...")
+        audio = r.listen(source)
+
+    # Convert speech to text
+    try:
+        print("Recognizing...")
+        task = r.recognize_google(audio)
+        print(f"User said: {task}")
+    except sr.UnknownValueError:
+        print("Could not understand audio")
+        continue
+    except Exception as e:
+        print(f"Error: {e}")
+        continue
+
+
+    # Run the Gemini model on the task
+    print("Running GPT4 model...")
+    out = llm(task)
+    print(f"Gemini output: {out}")
+
+    # Convert the Gemini output to speech
+    print("Running text-to-speech model...")
+    out = tts.run_and_save(out)
+    print(f"Text-to-speech output: {out}")
+
+    # Ask the user if they want to play the audio
+    # play_audio = input("Do you want to play the audio? (yes/no): ")
+    # if play_audio.lower() == "yes":
+    # Initialize the mixer module
+    # Play the audio file
+
+    time.sleep(5)
+
+    playsound('runs/tts_speech.wav')
diff --git a/pyproject.toml b/pyproject.toml
@@ -4,7 +4,7 @@ build-backend = "poetry.core.masonry.api"
 
 [tool.poetry]
 name = "swarms"
-version = "2.3.0"
+version = "2.3.8"
 description = "Swarms - Pytorch"
 license = "MIT"
 authors = ["Kye Gomez <kye@apac.ai>"]

diff --git a/swarms/memory/weaviate_db.py b/swarms/memory/weaviate_db.py
@@ -50,7 +50,7 @@ def __init__(
         grpc_secure: Optional[bool] = None,
         auth_client_secret: Optional[Any] = None,
         additional_headers: Optional[Dict[str, str]] = None,
-        additional_config: Optional[weaviate.AdditionalConfig] = None,
+        additional_config: Optional[Any] = None,
         connection_params: Dict[str, Any] = None,
         *args,
         **kwargs,

diff --git a/swarms/models/base_multimodal_model.py b/swarms/models/base_multimodal_model.py
@@ -108,7 +108,11 @@ def run(
         pass
 
     def __call__(
-        self, task: str = None, img: str = None, *args, **kwargs
+        self,
+        task: Optional[str] = None,
+        img: Optional[str] = None,
+        *args,
+        **kwargs,
     ):
         """Call the model
 

diff --git a/tests/models/test_distilled_whisperx.py → swarms/models/base_vision_model.py b/tests/models/test_distilled_whisperx.py → swarms/models/base_vision_model.py
diff --git a/swarms/models/fastvit.py b/swarms/models/fastvit.py
@@ -39,14 +39,11 @@ class FastViT:
     Returns:
         ClassificationResult: a pydantic BaseModel containing the class ids and confidences of the model's predictions
 
-
     Example:
         >>> fastvit = FastViT()
         >>> result = fastvit(img="path_to_image.jpg", confidence_threshold=0.5)
 
-
     To use, create a json file called: fast_vit_classes.json
-
     """
 
     def __init__(self):
@@ -62,7 +59,7 @@ def __init__(self):
     def __call__(
         self, img: str, confidence_threshold: float = 0.5
     ) -> ClassificationResult:
-        """classifies the input image and returns the top k classes and their probabilities"""
+        """Classifies the input image and returns the top k classes and their probabilities"""
         img = Image.open(img).convert("RGB")
         img_tensor = self.transforms(img).unsqueeze(0).to(DEVICE)
         with torch.no_grad():
@@ -81,7 +78,6 @@ def __call__(
         # Convert to Python lists and map class indices to labels if needed
         top_probs = top_probs.cpu().numpy().tolist()
         top_classes = top_classes.cpu().numpy().tolist()
-        # top_class_labels = [FASTVIT_IMAGENET_1K_CLASSES[i] for i in top_classes] # Uncomment if class labels are needed
 
         return ClassificationResult(
             class_id=top_classes, confidence=top_probs

diff --git a/swarms/utils/device_checker_cuda.py b/swarms/utils/device_checker_cuda.py
@@ -66,5 +66,5 @@ def check_device(
     return devices
 
 
-devices = check_device()
-logging.info(f"Using device(s): {devices}")
+# devices = check_device()
+# logging.info(f"Using device(s): {devices}")