-
Notifications
You must be signed in to change notification settings - Fork 481
/
Copy pathAzureSearchHandler.py
74 lines (62 loc) · 2.4 KB
/
AzureSearchHandler.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
from .SearchHandlerBase import SearchHandlerBase
from ..helpers.AzureSearchHelper import AzureSearchHelper
from ..common.SourceDocument import SourceDocument
import json
class AzureSearchHandler(SearchHandlerBase):
def create_search_client(self):
vector_store_helper = AzureSearchHelper()
return vector_store_helper.get_vector_store().client
def perform_search(self, filename):
return self.search_client.search(
"*", select="title, content, metadata", filter=f"title eq '{filename}'"
)
def process_results(self, results):
data = [
[json.loads(result["metadata"])["chunk"], result["content"]]
for result in results
]
return data
def get_files(self):
return self.search_client.search(
"*", select="id, title", include_total_count=True
)
def output_results(self, results):
files = {}
for result in results:
id = result["id"]
filename = result["title"]
if filename in files:
files[filename].append(id)
else:
files[filename] = [id]
return files
def delete_files(self, files):
ids_to_delete = []
files_to_delete = []
for filename, ids in files.items():
files_to_delete.append(filename)
ids_to_delete += [{"id": id} for id in ids]
self.search_client.delete_documents(ids_to_delete)
return ", ".join(files_to_delete)
def query_search(self, question):
vector_store = AzureSearchHelper().get_vector_store()
return vector_store.similarity_search(
query=question,
k=self.env_helper.AZURE_SEARCH_TOP_K,
filters=self.env_helper.AZURE_SEARCH_FILTER,
)
def return_answer_source_documents(self, search_results):
source_documents = []
for source in search_results:
source_documents.append(
SourceDocument(
id=source.metadata["id"],
content=source.page_content,
title=source.metadata["title"],
source=source.metadata["source"],
chunk=source.metadata["chunk"],
offset=source.metadata["offset"],
page_number=source.metadata["page_number"],
)
)
return source_documents