-
Notifications
You must be signed in to change notification settings - Fork 11
/
Copy pathmain.py
98 lines (72 loc) · 3.03 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
import keyboard
import os
import tempfile
import numpy as np
import openai
import sounddevice as sd
import soundfile as sf
import tweepy
from elevenlabs import generate, play, set_api_key
from langchain.agents import initialize_agent, load_tools
from langchain.agents.agent_toolkits import ZapierToolkit
from langchain.llms import OpenAI
from langchain.memory import ConversationBufferMemory
from langchain.tools import BaseTool
from langchain.utilities.zapier import ZapierNLAWrapper
set_api_key("<11LABS_API_KEY>")
openai.api_key = "<OPENAI_API_KEY>"
# Set recording parameters
duration = 5 # duration of each recording in seconds
fs = 44100 # sample rate
channels = 1 # number of channels
def record_audio(duration, fs, channels):
print("Recording...")
recording = sd.rec(int(duration * fs), samplerate=fs, channels=channels)
sd.wait()
print("Finished recording.")
return recording
def transcribe_audio(recording, fs):
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_audio:
sf.write(temp_audio.name, recording, fs)
temp_audio.close()
with open(temp_audio.name, "rb") as audio_file:
transcript = openai.Audio.transcribe("whisper-1", audio_file)
os.remove(temp_audio.name)
return transcript["text"].strip()
def play_generated_audio(text, voice="Bella", model="eleven_monolingual_v1"):
audio = generate(text=text, voice=voice, model=model)
play(audio)
# Replace with your API keys
consumer_key = "<CONSUMER_KEY>"
consumer_secret = "<CONSUMER_SECRET>"
access_token = "<ACCESS_TOKEN>"
access_token_secret = "<ACCESS_TOKEN_SECRET>"
client = tweepy.Client(
consumer_key=consumer_key, consumer_secret=consumer_secret,
access_token=access_token, access_token_secret=access_token_secret
)
class TweeterPostTool(BaseTool):
name = "Twitter Post Tweet"
description = "Use this tool to post a tweet to twitter."
def _run(self, text: str) -> str:
"""Use the tool."""
return client.create_tweet(text=text)
async def _arun(self, query: str) -> str:
"""Use the tool asynchronously."""
raise NotImplementedError("This tool does not support async")
if __name__ == '__main__':
llm = OpenAI(temperature=0)
memory = ConversationBufferMemory(memory_key="chat_history")
zapier = ZapierNLAWrapper(zapier_nla_api_key="<ZAPIER_NLA_API_KEY>")
toolkit = ZapierToolkit.from_zapier_nla_wrapper(zapier)
# tools = [TweeterPostTool()] + toolkit.get_tools() + load_tools(["human"])
tools = toolkit.get_tools() + load_tools(["human"])
agent = initialize_agent(tools, llm, memory=memory, agent="conversational-react-description", verbose=True)
while True:
print("Press spacebar to start recording.")
keyboard.wait("space") # wait for spacebar to be pressed
recorded_audio = record_audio(duration, fs, channels)
message = transcribe_audio(recorded_audio, fs)
print(f"You: {message}")
assistant_message = agent.run(message)
play_generated_audio(assistant_message)