-
Notifications
You must be signed in to change notification settings - Fork 3
/
qa.py
157 lines (124 loc) · 4.91 KB
/
qa.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
import os
from typing import List
from langchain.embeddings import HuggingFaceBgeEmbeddings
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import Chroma
from langchain.llms import LlamaCpp
from langchain.chains import (
ConversationalRetrievalChain,
)
from io import BytesIO
import PyPDF2
from langchain.callbacks.manager import CallbackManager
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
from langchain.docstore.document import Document
from langchain.memory import ChatMessageHistory, ConversationBufferMemory
import chainlit as cl
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
llama_model_path = "../src/vicuna-7b-v1.5.Q4_K_M.gguf"
@cl.on_chat_start
async def on_chat_start():
files = None
# Wait for the user to upload a file
while files == None:
files = await cl.AskFileMessage(
content="Please upload a text file to begin!",
accept=["application/pdf", "text/plain"],
max_size_mb=100,
timeout=180,
max_files=5
).send()
chunks = []
for file in files:
_, ext = os.path.splitext(file.name)
print(f"File type: {ext}")
msg = cl.Message(
content=f"Processing `{file.name}`...", disable_human_feedback=True
)
await msg.send()
if ext == ".txt":
tex = file.content.decode("utf-8")
chunks.extend(text_splitter.split_text(text=tex))
elif ext == ".pdf":
print("entered pdf elif")
pdf_file = PyPDF2.PdfReader(BytesIO(file.content))
# Get the total number of pages in the PDF file.
num_pages = len(pdf_file.pages)
# Create a list to store the decoded text from each page.
decoded_text = []
# Decode each page of the PDF file and add the decoded text to the list.
for i in range(num_pages):
page = pdf_file.pages[i]
decoded_text.append(page.extract_text())
for text in decoded_text:
chunks.extend(text_splitter.split_text(text))
# Split the text into chunks
# texts = text_splitter.split_text(text)
print(f"chunks size: {len(chunks)}")
# Create a metadata for each chunk
metadatas = [{"source": f"{i}-pl"} for i in range(len(chunks))]
# Create a Chroma vector store
model_name = "BAAI/bge-small-en"
model_kwargs = {'device': 'cpu'}
encode_kwargs = {'normalize_embeddings': False}
hf = HuggingFaceBgeEmbeddings(
model_name=model_name,
model_kwargs=model_kwargs,
encode_kwargs=encode_kwargs
)
embeddings = hf
docsearch = await cl.make_async(Chroma.from_texts)(
chunks, embeddings, metadatas=metadatas
)
message_history = ChatMessageHistory()
memory = ConversationBufferMemory(
memory_key="chat_history",
output_key="answer",
chat_memory=message_history,
return_messages=True,
)
callback_manager = CallbackManager([StreamingStdOutCallbackHandler()])
n_gpu_layers = 1
n_batch = 512
llm = LlamaCpp(
model_path=llama_model_path,
n_gpu_layers=n_gpu_layers,
n_batch=n_batch,
n_ctx=4096,
f16_kv=True, # MUST set to True, otherwise you will run into problem after a couple of calls
callback_manager=callback_manager,
verbose=True, # Verbose is required to pass to the callback manager
)
# Create a chain that uses the Chroma vector store
chain = ConversationalRetrievalChain.from_llm(
llm=llm,
chain_type="stuff",
retriever=docsearch.as_retriever(),
memory=memory,
return_source_documents=True,
)
# Let the user know that the system is ready
msg.content = f"Processing `{file.name}` done. You can now ask questions!"
await msg.update()
cl.user_session.set("chain", chain)
@cl.on_message
async def main(message):
chain = cl.user_session.get("chain") # type: ConversationalRetrievalChain
cb = cl.AsyncLangchainCallbackHandler()
res = await chain.acall(message, callbacks=[cb])
answer = res["answer"]
source_documents = res["source_documents"] # type: List[Document]
text_elements = [] # type: List[cl.Text]
if source_documents:
for source_idx, source_doc in enumerate(source_documents):
source_name = f"source_{source_idx}"
# Create the text element referenced in the message
text_elements.append(
cl.Text(content=source_doc.page_content, name=source_name)
)
source_names = [text_el.name for text_el in text_elements]
if source_names:
answer += f"\nSources: {', '.join(source_names)}"
else:
answer += "\nNo sources found"
await cl.Message(content=answer, elements=text_elements).send()