Skip to content

Commit

Permalink
issue #14: add llamaindex search
Browse files Browse the repository at this point in the history
  • Loading branch information
ibrahim-kabir committed Apr 3, 2024
1 parent e7fe32f commit 4b3c473
Show file tree
Hide file tree
Showing 5 changed files with 41 additions and 40 deletions.
5 changes: 4 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -41,10 +41,13 @@ keys/
flask_session/

# Ignore local QnA json files
QnA
QnA/

# Ignore output of api-test and from the scripts
output/

# Ignore input of the scripts
input/

# Ignore the generated files from cache
cache/
2 changes: 1 addition & 1 deletion finesse/accuracy_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -210,7 +210,7 @@ def update_dict_bing_data(test_data: dict):
load_dotenv()
endpoint = os.getenv("BING_ENDPOINT")
subscription_key = os.getenv("BING_SEARCH_KEY")
search_engine = BingSearch(endpoint, subscription_key)
search_engine = BingSearch(endpoint, subscription_key, "finesse/cache/")
count = 1
for key, value in copy_data.items():
question = value.get("question")
Expand Down
8 changes: 7 additions & 1 deletion finesse/bing_search.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,16 +2,22 @@
from msrest.authentication import CognitiveServicesCredentials
import time
import statistics
from joblib import Memory


class BingSearch():
"""
A class for performing web searches using the Bing Search API.
"""

def __init__(self, endpoint, subscription_key):
def __init__(self, endpoint, subscription_key, cache_dir):
self.endpoint = endpoint
self.subscription_key = subscription_key
self.client = WebSearchClient(endpoint=self.endpoint, credentials=CognitiveServicesCredentials(self.subscription_key))
self.client.config.base_url = '{Endpoint}/v7.0' # Temporary change to fix the error. Issue opened https://github.com/Azure/azure-sdk-for-python/issues/34917
self.cache_dir = cache_dir
self.memory = Memory(cache_dir, verbose=0)
self.search_urls = self.memory.cache(self.search_urls, ignore=['self'])

def search_urls(self, query: str, num_results: int = 100) -> tuple[list[str], float]:
"""
Expand Down
65 changes: 28 additions & 37 deletions finesse/finesse_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,57 +31,48 @@ class FinesseUser(HttpUser):
def search_accuracy(self):
try:
json_data = next(self.qna_reader)
<<<<<<< HEAD
while json_data.get("skip") is True:
=======
while json_data.get("skip") == True:
>>>>>>> 678518b (issue #7: Removed punction mark on md files, added total number of 0, better rounding, sorted json files)
json_data = next(self.qna_reader)
except StopIteration:
if not self.once:
# Reset variables
self.on_start()
json_data = next(self.qna_reader)
<<<<<<< HEAD
while json_data.get("skip") is True:
=======
while json_data.get("skip") == True:
>>>>>>> 678518b (issue #7: Removed punction mark on md files, added total number of 0, better rounding, sorted json files)
json_data = next(self.qna_reader)
print("Restarting the running test")
else:
print("Stopping the running test")
self.environment.runner.quit()

if self.engine in ["ai-lab", "azure", "static"]:
question = json_data.get("question")
expected_url = json_data.get("url")
file_name = self.qna_reader.file_name
response_url : list[str] = []
search_url = f"{self.host}/search/{self.engine}?top={self.top}"
data = json.dumps({'query': f'{question}'})
headers = { "Content-Type": "application/json" }
response = self.client.post(search_url, data=data, headers=headers)
question = json_data.get("question")
expected_url = json_data.get("url")
file_name = self.qna_reader.file_name
response_url : list[str] = []
search_url = f"{self.host}/search/{self.engine}?top={self.top}"
data = json.dumps({'query': f'{question}'})
headers = { "Content-Type": "application/json" }
response = self.client.post(search_url, data=data, headers=headers)

if response.status_code == 200:
response_pages = response.json()
for page in response_pages:
response_url.append(page.get("url"))
accuracy_result = calculate_accuracy(response_url, expected_url)
time_taken = round(response.elapsed.total_seconds()*1000,3)
expected_page = json_data.copy()
del expected_page['question']
del expected_page['answer']
global_test_data[file_name] = {
"question": question,
"expected_page": expected_page,
"response_pages": response_pages,
"position": accuracy_result.position,
"total_pages": accuracy_result.total_pages,
"accuracy": accuracy_result.score,
"time": time_taken,
"top": self.top,
}
if response.status_code == 200:
response_pages = response.json()
for page in response_pages:
response_url.append(page.get("url"))
accuracy_result = calculate_accuracy(response_url, expected_url)
time_taken = round(response.elapsed.total_seconds()*1000,3)
expected_page = json_data.copy()
del expected_page['question']
del expected_page['answer']
global_test_data[file_name] = {
"question": question,
"expected_page": expected_page,
"response_pages": response_pages,
"position": accuracy_result.position,
"total_pages": accuracy_result.total_pages,
"accuracy": accuracy_result.score,
"time": time_taken,
"top": self.top,
}

def on_start(self):
self.qna_reader = JSONReader(self.path)
Expand Down Expand Up @@ -110,6 +101,6 @@ def quit(**_kwargs):
print("Starting bing search test")
update_dict_bing_data(global_test_data)
if settings.get("format") == "md":
save_to_markdown(global_test_data, "azure")
save_to_markdown(global_test_data, settings.get("engine"))
elif settings.get("format") == "csv":
save_to_csv(global_test_data, settings.get("engine"))
1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -5,3 +5,4 @@ azure-cognitiveservices-search-websearch
msrest
openpyxl
natsort
joblib

0 comments on commit 4b3c473

Please sign in to comment.