-
Notifications
You must be signed in to change notification settings - Fork 0
/
03_query.py
61 lines (47 loc) · 1.69 KB
/
03_query.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
# Ask holisitc questions about the text
# Step 1: Initialize Pinecone
from vectordb import initialize_pinecone
index = initialize_pinecone("bible-rag")
import os
# Step 2: Fetch unique cluster ids
import pandas as pd
if "data.csv" not in os.listdir():
print("Data not found. Please run step 2 first.")
exit()
clusters = pd.read_csv(
"data.csv",
)
cluster_ids = clusters["cluster"].unique()
# Step 3: Query the index for each cluster
from vectordb import query
set_sample_size = 10
sample_set = {}
for cluster_id in cluster_ids:
filter_data_by_cluster = clusters[clusters["cluster"] == cluster_id].sample(
set_sample_size
)
text_to_keep = []
for id in filter_data_by_cluster["id"].values:
text = query(index, str(id))["matches"][0]["metadata"]["text"]
text_to_keep.append(text)
text_that_needs_to_be_summarized = "\n".join(text_to_keep)
sample_set[str(cluster_id)] = text_that_needs_to_be_summarized
# Step 4: Summarize and Progress
from phi.assistant import Assistant
from phi.llm.openai import OpenAIChat
assistant = Assistant(
llm=OpenAIChat(model="gpt-4o-mini"),
instructions=["Answer the users question given the context provided."],
)
holistic_question = "User Question:\nlist all the relationships and show connections"
answers = []
for cluster_id in cluster_ids:
context = "\nContext:\n" + sample_set[str(cluster_id)]
full_text = holistic_question + context
answer = assistant.run(full_text, stream=False)
answers.append(answer)
# Step 5: Print the answers
full_answers = "\n".join(answers)
full_question = holistic_question + "Context: \n" + full_answers
result = assistant.run(full_answers, stream=False)
print(result)