Skip to content

Commit

Permalink
Bug fixes to default mac behavior, better audio quality on mac, bette…
Browse files Browse the repository at this point in the history
…r recollection too use, more predictable completion detection assessments
  • Loading branch information
Elias Weston-Farber authored and Elias Weston-Farber committed Dec 11, 2023
1 parent f6e692e commit 26c7a65
Show file tree
Hide file tree
Showing 10 changed files with 102 additions and 55 deletions.
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[project]
name = "jarvis_conversationalist"
version = "0.4.3"
version = "0.4.4"
authors = [{name="Elias Weston-Farber", email="eweston4@jhu.edu"}]
description = "A voice assistant for the command line"
readme = "README.md"
Expand Down
2 changes: 1 addition & 1 deletion src/jarvis_conversationalist/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,5 +2,5 @@
if sys.platform == 'linux':
__import__('pysqlite3')
sys.modules['sqlite3'] = sys.modules.pop('pysqlite3')
__version__ = '0.4.3'
__version__ = '0.4.4'
# Path: src/jarvis_conversationalist/conversationalist.py
4 changes: 3 additions & 1 deletion src/jarvis_conversationalist/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,8 @@
if os.path.exists(lock_unknown_speakers):
os.remove(lock_unknown_speakers)

print("\033[KSummarizing previous conversations... Please Wait...\033[K", end='\r')

from .conversationalist import converse
import warnings

Expand Down Expand Up @@ -81,10 +83,10 @@ def main():
if args.no_speaker_detection:
config['speakers'] = False

save_config(config, key)
set_speakers_active(config.get('speakers', sys.platform != 'darwin'))
set_openai_key(config.get('key', None))
set_user(config.get('user', 'User'))
save_config(config, key)

if get_openai_key() is None:
print("Please set your OpenAI API key using the --key argument once to cache your key.")
Expand Down
18 changes: 9 additions & 9 deletions src/jarvis_conversationalist/audio_player.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,16 +86,16 @@ def _play_audio_file_blocking(file_path: str, stop_event: threading.Event, loops
for loop in range(loops):
if not stop_event.is_set() or (added_stop_event and not added_stop_event.is_set()):
data, fs = sf.read(file_path)
sd.play(data, fs)
while sd.get_stream().active:
if stop_event.is_set() or (added_stop_event and added_stop_event.is_set()):
sd.stop()
break
if added_stop_event:
added_stop_event.wait(timeout=.02)
else:
stop_event.wait(timeout=.02)
sd.play(data, fs, latency=.25)
while sd.get_stream().active:
if stop_event.is_set() or (added_stop_event and added_stop_event.is_set()):
sd.stop()
break
if added_stop_event:
added_stop_event.wait(timeout=.02)
else:
stop_event.wait(timeout=.02)
sd.stop()
# Destroy the file if needed
if destroy:
os.remove(file_path)
Expand Down
2 changes: 1 addition & 1 deletion src/jarvis_conversationalist/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
USER = "User"
KEY = None
file = {}
if sys.platform == "darwin":
if sys.platform != "darwin":
SPEAKERS = True
else:
SPEAKERS = False
Expand Down
5 changes: 3 additions & 2 deletions src/jarvis_conversationalist/conversationalist.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,8 @@
import importlib.resources as pkg_resources

from .openai_utility_functions import check_for_directed_at_me, check_for_completion, extract_query
from .openai_interface import stream_response, resolve_response, use_tools, schedule_refresh_assistant
from .openai_interface import stream_response, resolve_response, use_tools, schedule_refresh_assistant, \
get_speaker_detection
from .streaming_response_audio import stream_audio_response, set_rt_text_queue
from .audio_player import play_audio_file
from .audio_listener import audio_capture_process
Expand Down Expand Up @@ -201,7 +202,7 @@ def converse(memory, interrupt_event, start_event, stop_event):
threading.Event().wait(0.3)
speaking.set()
beeps_stop_event = play_audio_file(core_path+"/beeps.wav", loops=7, blocking=False)
extracted_query = extract_query(transcript)
extracted_query = extract_query(transcript, speaker_detection=get_speaker_detection())
logger.info("Query extracted: " + extracted_query)
new_history = None
if not interrupt_event.is_set():
Expand Down
102 changes: 65 additions & 37 deletions src/jarvis_conversationalist/openai_interface.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,9 @@
tools_list = get_function_list() + get_speaker_function_list()
function_info = get_function_info()
speaker_info = get_speaker_function_info()
speaker_detection = False
for speaker_info_key, speaker_info_value in speaker_info.items():
speaker_detection = True
function_info[speaker_info_key] = speaker_info_value

# Setup background task system
Expand All @@ -61,6 +63,17 @@
atexit.register(executor.shutdown, wait=True)


def get_speaker_detection():
"""
Get whether speaker detection is enabled or not.
:return: Whether speaker detection is enabled or not.
:rtype: bool
"""
global speaker_detection
return speaker_detection


def summarizer(input_list):
"""
Summarize a conversation by sending a query to the OpenAI API.
Expand Down Expand Up @@ -93,39 +106,40 @@ def summarizer(input_list):
return {"role": "system", "content": output}


def recollect(question="", search_query="", mode=""):
def recollect(question="", query="", mode=""):
"""
Search the conversation history for a query.
:param question: The question to answer.
:type question: str
:param search_query: The query to search for.
:type search_query: str
:param query: The query input.
:type query: str
:param mode: The mode to search in.
Can be 'search_full', 'search_details', 'similarity_full', or 'similarity_details'.
Can be 'search_exact_text_full', 'search_exact_text_summaries', 'vector_similarity_full', or
'vector_similarity_summaries'.
:type mode: str
:return: The AI Assistant's response.
:rtype: str
"""
global models
description = ""
if mode == "search_full":
description = "search for the literal string '" + search_query + \
if mode == "search_exact_text_summaries":
description = "search for the literal string '" + query + \
"' in a collection of summaries of conversations"
results = history_access.summaries.get(where_document={"$contains": search_query},
results = history_access.summaries.get(where_document={"$contains": query},
include=["metadatas", "documents"])
if mode == "search_details":
description = "search for the literal string '" + search_query + "' in a collection of conversations"
results = history_access.history.get(where_document={"$contains": search_query},
if mode == "search_exact_text_full":
description = "search for the literal string '" + query + "' in a collection of conversations"
results = history_access.history.get(where_document={"$contains": query},
include=["metadatas", "documents"])
if mode == "similarity_full":
description = "search for the most similar string to '" + search_query + \
if mode == "vector_similarity_summaries":
description = "search for the most similar string to '" + query + \
"' in a collection of summaries of conversations"
results = history_access.summaries.query(query_texts=[search_query], n_results=20,
results = history_access.summaries.query(query_texts=[query], n_results=20,
include=["metadatas", "documents"])
if mode == "similarity_details":
description = "search for the most similar string to '" + search_query + "' in a collection of conversations"
results = history_access.history.query(query_texts=[search_query], n_results=20,
if mode == "vector_similarity_full":
description = "search for the most similar string to '" + query + "' in a collection of conversations"
results = history_access.history.query(query_texts=[query], n_results=20,
include=["metadatas", "documents"])
if mode == "schema":
schema = {"type": "function",
Expand All @@ -139,36 +153,38 @@ def recollect(question="", search_query="", mode=""):
"type": "string",
"description": "The question to answer.",
},
"search_query": {
"query": {
"type": "string",
"description": "The query to search for. If mode is 'search_full' or 'search_details' "
"this is the literal string to search for so keep it short or you "
"will get no results. If mode is 'similarity_full' "
"or 'similarity_details' this is the string to find the most similar "
"description": "The query to search for. If mode is 'search_exact_text_full' or "
"'search_exact_text_summaries' this is the literal string to search "
"for so keep it short or you will get no results. If mode is "
"'vector_similarity_full' or 'vector_similarity_summaries' this "
"is the string to find the most similar "
"string to so you can make it longer.",
},
"mode": {
"type": "string",
"description": "The mode to search in. Can be 'search_full', 'search_details', "
"'similarity_full', or 'similarity_details'. 'search_full' searches "
"description": "The mode to search in. Can be 'search_exact_text_full', "
"'search_exact_text_summaries', 'vector_similarity_full', or "
"'vector_similarity_summaries'. 'search_exact_text_full' searches "
"for the literal string in a collection of summaries of conversations. "
"'search_details' searches for the literal string in a collection of "
"conversations. 'similarity_full' searches for the most similar "
"string to the query in a collection of summaries of conversations. "
"'similarity_details' searches for the most similar string to the "
"query in a collection of conversations.",
"'search_exact_text_summaries' searches for the literal string in a "
"collection of conversations. 'vector_similarity_full' searches for "
"the most similar string to the query in a collection of summaries of "
"conversations. 'vector_similarity_summaries' searches for the most "
"similar string to the query in a collection of conversations.",
},
},
"required": ["question", "search_query", "mode"],
"required": ["question", "query", "mode"],
},
}
}
return schema
if mode == "examples":
examples = 'Examples:\n {"function_name": "recollect", "parameters": {"question": "What is the name of the' \
'user\'s dog?", "search_query": "dog", "mode": "search_full"}}\n {"function_name": "recollect", ' \
'parameters": {"question": "What is the town the user grew up in?", "search_query": "I was born in' \
' and grew up in ", "mode": "similarity_details"}}\n'
examples = 'Examples:\n{"function_name": "recollect", "parameters": {"question": "What is the name of the' \
'user\'s dog?", "query": "dog", "mode": "search_exact_text_full"}}\n{"function_name": "recollect",' \
' parameters": {"question": "What is the town the user grew up in?", "query": "I was born in' \
' and grew up in ", "mode": "vector_similarity_summaries"}}\n'
return examples
if description == "":
raise Exception("Invalid mode")
Expand All @@ -178,16 +194,28 @@ def recollect(question="", search_query="", mode=""):

input_list = []
for i in range(len(results['ids'])):
input_list.append({"role": results["metadatas"][i]["role"], "content": results["documents"][i] + "\n" +
" took place on: " + convert_utc_to_local(results["metadatas"][i]["utc_time"])})
print(results)
if mode.startswith("vector_similarity"):
input_list.append({"role": results["metadatas"][0][i]["role"],
"content": results["documents"][0][i] +
"\n" + " took place on: " +
convert_utc_to_local(results["metadatas"][0][i]["utc_time"])})
else:
input_list.append({"role": results["metadatas"][i]["role"], "content": results["documents"][i] + "\n" +
" took place on: " + convert_utc_to_local(results["metadatas"][i]["utc_time"])})
input_list = history_access.truncate_input_context(input_list)

system_mem = [{"role": "system", "content": "You help an AI remember things by receiving a context based on a " +
description + "\n Please help it answer the following question:" +
"\n\n" + question}]
"\n\n" + question + "\n\nNOTE: If the current conversation " +
"context does not contain the answer to the question, " +
"make sure to tell the AI to modify either modify the query and if"
"the recollection process fails to the answer the question after"
"multiple query modifications, to consider the possibility that "
"what it is trying to remember 'is not in our memories'."}]
response = client.chat.completions.create(model=models["primary"]['name'],
messages=system_mem,
temperature=models["primary"]["temperature"],
temperature=.1,
max_tokens=models["primary"]["max_message"],
top_p=models["primary"]["top_p"],
frequency_penalty=models["primary"]["frequency_penalty"],
Expand Down
17 changes: 16 additions & 1 deletion src/jarvis_conversationalist/openai_utility_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ def check_for_directed_at_me(transcript, n=1):
" to " + name + " directly."

response = client.chat.completions.create(model="gpt-3.5-turbo",
temperature=0.1,
messages=[{"role": "system", "content": system_message},
{"role": "user", "content": "\n".join(transcript)}],
functions=functions,
Expand Down Expand Up @@ -98,6 +99,7 @@ def check_for_completion(transcript, n=1):
" if the user is done speaking by analyzing the text below and seeing if the user has completed their thought."

response = client.chat.completions.create(model="gpt-4",
temperature=0.1,
messages=[{"role": "system", "content": system_message},
{"role": "user", "content": "\n".join(transcript)}],
functions=functions,
Expand All @@ -111,7 +113,7 @@ def check_for_completion(transcript, n=1):
return probabilities


def extract_query(transcript):
def extract_query(transcript, speaker_detection=True):
"""
Extracts the query from the user's speech.
Expand Down Expand Up @@ -156,6 +158,19 @@ def extract_query(transcript):
"sure to include the " \
"speaker annotation for " \
"each subsection."
if not speaker_detection:
system_message = "You are seeing a live transcription of what is being said in a room. It is your job to " \
"determine the query the user is asking by analyzing the text below and extracting word " \
"for word the section of the transcript that is the query. Ignore the rest of the unrelated " \
"transcript. Keep in mind that sometimes context from earlier parts of the conversation are " \
"critical to understanding a query - make sure to include all the context needed to complete" \
" the query well. There may be multiple people in the room or people on the phone. It's your" \
" job to determine which part of the transcript is the query. The query should be a question" \
" or a command or a statement directed at or highly related to " + \
name + ". It is ok if there are multiple parts of the query, or if multiple people appear " \
"to be asking questions to" + name + ", it is ok to include all of those subsections " \
"in the query - just make sure to include the " \
"speaker annotation for each subsection."

response = client.chat.completions.create(model="gpt-4",
messages=[{"role": "system", "content": system_message},
Expand Down
3 changes: 2 additions & 1 deletion src/jarvis_conversationalist/streaming_response_audio.py
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,8 @@ def _play_audio(self, stop_other_audio: threading.Event = None,
self.playing = True

chunk_played = False
with sd.OutputStream(samplerate=sample_rate, channels=CHANNELS, dtype='int16') as stream:
with sd.OutputStream(samplerate=sample_rate, latency=.25,
channels=CHANNELS, dtype='int16') as stream:
for chunk in generator():
if skip and skip.is_set():
self.stop()
Expand Down
2 changes: 1 addition & 1 deletion src/jarvis_conversationalist/text_speech.py
Original file line number Diff line number Diff line change
Expand Up @@ -207,7 +207,7 @@ def text_to_speech(text: str, model="gpt-4", stream=False):
first_word = fixed_text.split(" ")[0]
rest_of_text = fixed_text.replace(first_word, "")
fixed_text = "[[rate 175]] " + first_word + "[[rate 200]] " + rest_of_text
text_cmd = f'[[pbas {pitch}]] [[slnc 300]]{fixed_text}[[slnc 200]]'
text_cmd = f'[[pbas {pitch}]] [[slnc 100]]{fixed_text}[[slnc 100]]'
output_file = os.path.join(audio_folder, str(uuid.uuid4()) + ".wav")
result = subprocess.run(['say']+vflag+[text_cmd, "-o", output_file, '--data-format=LEI16@22050'],
capture_output=True)
Expand Down

0 comments on commit 26c7a65

Please sign in to comment.