Skip to content

Commit

Permalink
[DEMOS][TESTS]
Browse files Browse the repository at this point in the history
  • Loading branch information
Kye committed Dec 24, 2023
1 parent f39d722 commit fd58cfa
Show file tree
Hide file tree
Showing 10 changed files with 108 additions and 352 deletions.
96 changes: 96 additions & 0 deletions playground/demos/personal_assistant/better_communication.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
import time
import os

import pygame
import speech_recognition as sr
from dotenv import load_dotenv
from playsound import playsound

from swarms import OpenAIChat, OpenAITTS

# Load the environment variables
load_dotenv()

# Get the API key from the environment
openai_api_key = os.environ.get("OPENAI_API_KEY")

# Initialize the language model
llm = OpenAIChat(
openai_api_key=openai_api_key,
)

# Initialize the text-to-speech model
tts = OpenAITTS(
model_name="tts-1-1106",
voice="onyx",
openai_api_key=openai_api_key,
saved_filepath="runs/tts_speech.wav",
)

# Initialize the speech recognition model
r = sr.Recognizer()


def play_audio(file_path):
# Check if the file exists
if not os.path.isfile(file_path):
print(f"Audio file {file_path} not found.")
return

# Initialize the mixer module
pygame.mixer.init()

try:
# Load the mp3 file
pygame.mixer.music.load(file_path)

# Play the mp3 file
pygame.mixer.music.play()

# Wait for the audio to finish playing
while pygame.mixer.music.get_busy():
pygame.time.Clock().tick(10)
except pygame.error as e:
print(f"Couldn't play {file_path}: {e}")
finally:
# Stop the mixer module and free resources
pygame.mixer.quit()

while True:
# Listen for user speech
with sr.Microphone() as source:
print("Listening...")
audio = r.listen(source)

# Convert speech to text
try:
print("Recognizing...")
task = r.recognize_google(audio)
print(f"User said: {task}")
except sr.UnknownValueError:
print("Could not understand audio")
continue
except Exception as e:
print(f"Error: {e}")
continue


# Run the Gemini model on the task
print("Running GPT4 model...")
out = llm(task)
print(f"Gemini output: {out}")

# Convert the Gemini output to speech
print("Running text-to-speech model...")
out = tts.run_and_save(out)
print(f"Text-to-speech output: {out}")

# Ask the user if they want to play the audio
# play_audio = input("Do you want to play the audio? (yes/no): ")
# if play_audio.lower() == "yes":
# Initialize the mixer module
# Play the audio file

time.sleep(5)

playsound('runs/tts_speech.wav')
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ build-backend = "poetry.core.masonry.api"

[tool.poetry]
name = "swarms"
version = "2.3.0"
version = "2.3.8"
description = "Swarms - Pytorch"
license = "MIT"
authors = ["Kye Gomez <kye@apac.ai>"]
Expand Down
2 changes: 1 addition & 1 deletion swarms/memory/weaviate_db.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ def __init__(
grpc_secure: Optional[bool] = None,
auth_client_secret: Optional[Any] = None,
additional_headers: Optional[Dict[str, str]] = None,
additional_config: Optional[weaviate.AdditionalConfig] = None,
additional_config: Optional[Any] = None,
connection_params: Dict[str, Any] = None,
*args,
**kwargs,
Expand Down
6 changes: 5 additions & 1 deletion swarms/models/base_multimodal_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,11 @@ def run(
pass

def __call__(
self, task: str = None, img: str = None, *args, **kwargs
self,
task: Optional[str] = None,
img: Optional[str] = None,
*args,
**kwargs,
):
"""Call the model
Expand Down
File renamed without changes.
6 changes: 1 addition & 5 deletions swarms/models/fastvit.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,14 +39,11 @@ class FastViT:
Returns:
ClassificationResult: a pydantic BaseModel containing the class ids and confidences of the model's predictions
Example:
>>> fastvit = FastViT()
>>> result = fastvit(img="path_to_image.jpg", confidence_threshold=0.5)
To use, create a json file called: fast_vit_classes.json
"""

def __init__(self):
Expand All @@ -62,7 +59,7 @@ def __init__(self):
def __call__(
self, img: str, confidence_threshold: float = 0.5
) -> ClassificationResult:
"""classifies the input image and returns the top k classes and their probabilities"""
"""Classifies the input image and returns the top k classes and their probabilities"""
img = Image.open(img).convert("RGB")
img_tensor = self.transforms(img).unsqueeze(0).to(DEVICE)
with torch.no_grad():
Expand All @@ -81,7 +78,6 @@ def __call__(
# Convert to Python lists and map class indices to labels if needed
top_probs = top_probs.cpu().numpy().tolist()
top_classes = top_classes.cpu().numpy().tolist()
# top_class_labels = [FASTVIT_IMAGENET_1K_CLASSES[i] for i in top_classes] # Uncomment if class labels are needed

return ClassificationResult(
class_id=top_classes, confidence=top_probs
Expand Down
4 changes: 2 additions & 2 deletions swarms/utils/device_checker_cuda.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,5 +66,5 @@ def check_device(
return devices


devices = check_device()
logging.info(f"Using device(s): {devices}")
# devices = check_device()
# logging.info(f"Using device(s): {devices}")
Loading

0 comments on commit fd58cfa

Please sign in to comment.