-
Notifications
You must be signed in to change notification settings - Fork 0
/
query_model.py
80 lines (56 loc) · 2.64 KB
/
query_model.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
# This file was originally named db_loading.py in the Article
# Ariel - I've changed it significantly and renamed it to better match new functionality
from langchain import PromptTemplate, LLMChain
from langchain.llms import GPT4All
from langchain.callbacks.base import CallbackManager
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
from langchain.embeddings import LlamaCppEmbeddings # Alpaca Embeddings
from langchain.vectorstores.faiss import FAISS # Vector similarity search
gpt4all_path = './models/gpt4all-converted.bin'
llama_path = './models/ggml-model-q4_0.bin'
callback_manager = CallbackManager([StreamingStdOutCallbackHandler()])
# This prints all the ugly output. Can't be disabled
embeddings = LlamaCppEmbeddings(model_path=llama_path)
llm = GPT4All(model=gpt4all_path, callback_manager=callback_manager, verbose=True)
# Load our local index vector
# 1. You create embeddings of your document
# 2. Search for similarity betw. Query and index using FAISS
# 3. Give top-k similar vectors (their actual words) as Context to the model
index = FAISS.load_local("my_faiss_index-description-all-rows", embeddings)
def run_query(query):
"""
Take user query, find best embeddings with best context, print output
"""
docs = index.similarity_search(query)
# Get the matches best 3 results - defined in the function k=3
print(f"\n\nThe question is: {query}")
# print("\nHere is the most similar result, solely on Embedding similarity:")
# print(docs[0].page_content)
print("\n\n------------------------------------------------------------------")
print("GPT4All Results, Using top K embeddings as context\n\n")
# prompt template
template = """
Context: {context}
---
Question: {question}
Answer: """ # Let's NOT think step by step.
matched_docs = index.similarity_search(query, k=5)
# Creating the context
context = " ".join([doc.page_content for doc in matched_docs])
# instantiating the prompt template and the GPT4All chain
prompt = PromptTemplate(template=template, input_variables=["context", "question"]).partial(context=context)
llm_chain = LLMChain(prompt=prompt, llm=llm)
llm_chain.run(query)
# test_query = ["What was the root cause of the damage to the dust cups?",
# "Where is The Progesterone bulk product received from?",
# "What was the root cause of the damage to the dust cups?"
# ]
# for q in test_query:
# run_query(q)
while True:
try:
query = input("Your question (Ctrl + C to exit): ")
run_query(query)
except KeyboardInterrupt:
print("\nLoop terminated by Ctrl + C")
break