issue #14: add llamaindex search

ai-cfia · Apr 3, 2024 · 4b3c473 · 4b3c473
1 parent e7fe32f
commit 4b3c473
Show file tree

Hide file tree

Showing 5 changed files with 41 additions and 40 deletions.
diff --git a/.gitignore b/.gitignore
@@ -41,10 +41,13 @@ keys/
 flask_session/
 
 # Ignore local QnA json files
-QnA
+QnA/
 
 # Ignore output of api-test and from the scripts
 output/
 
 # Ignore input of the scripts
 input/
+
+# Ignore the generated files from cache
+cache/
diff --git a/finesse/accuracy_functions.py b/finesse/accuracy_functions.py
@@ -210,7 +210,7 @@ def update_dict_bing_data(test_data: dict):
     load_dotenv()
     endpoint = os.getenv("BING_ENDPOINT")
     subscription_key = os.getenv("BING_SEARCH_KEY")
-    search_engine = BingSearch(endpoint, subscription_key)
+    search_engine = BingSearch(endpoint, subscription_key, "finesse/cache/")
     count = 1
     for key, value in copy_data.items():
         question = value.get("question")

diff --git a/finesse/bing_search.py b/finesse/bing_search.py
@@ -2,16 +2,22 @@
 from msrest.authentication import CognitiveServicesCredentials
 import time
 import statistics
+from joblib import Memory
+
+
 class BingSearch():
     """
     A class for performing web searches using the Bing Search API.
     """
 
-    def __init__(self, endpoint, subscription_key):
+    def __init__(self, endpoint, subscription_key, cache_dir):
         self.endpoint = endpoint
         self.subscription_key = subscription_key
         self.client = WebSearchClient(endpoint=self.endpoint, credentials=CognitiveServicesCredentials(self.subscription_key))
         self.client.config.base_url = '{Endpoint}/v7.0' # Temporary change to fix the error. Issue opened https://github.com/Azure/azure-sdk-for-python/issues/34917
+        self.cache_dir = cache_dir
+        self.memory = Memory(cache_dir, verbose=0)
+        self.search_urls = self.memory.cache(self.search_urls, ignore=['self'])
 
     def search_urls(self, query: str, num_results: int = 100) -> tuple[list[str], float]:
         """

diff --git a/finesse/finesse_test.py b/finesse/finesse_test.py
@@ -31,57 +31,48 @@ class FinesseUser(HttpUser):
     def search_accuracy(self):
         try:
             json_data = next(self.qna_reader)
-<<<<<<< HEAD
             while json_data.get("skip") is True:
-=======
-            while json_data.get("skip") == True:
->>>>>>> 678518b (issue #7: Removed punction mark on md files, added total number of 0, better rounding, sorted json files)
                 json_data = next(self.qna_reader)
         except StopIteration:
             if not self.once:
                 # Reset variables
                 self.on_start()
                 json_data = next(self.qna_reader)
-<<<<<<< HEAD
                 while json_data.get("skip") is True:
-=======
-                while json_data.get("skip") == True:
->>>>>>> 678518b (issue #7: Removed punction mark on md files, added total number of 0, better rounding, sorted json files)
                     json_data = next(self.qna_reader)
                 print("Restarting the running test")
             else:
                 print("Stopping the running test")
                 self.environment.runner.quit()
 
-        if self.engine in ["ai-lab", "azure", "static"]:
-            question = json_data.get("question")
-            expected_url = json_data.get("url")
-            file_name = self.qna_reader.file_name
-            response_url : list[str] = []
-            search_url = f"{self.host}/search/{self.engine}?top={self.top}"
-            data = json.dumps({'query': f'{question}'})
-            headers = { "Content-Type": "application/json" }
-            response = self.client.post(search_url, data=data, headers=headers)
+        question = json_data.get("question")
+        expected_url = json_data.get("url")
+        file_name = self.qna_reader.file_name
+        response_url : list[str] = []
+        search_url = f"{self.host}/search/{self.engine}?top={self.top}"
+        data = json.dumps({'query': f'{question}'})
+        headers = { "Content-Type": "application/json" }
+        response = self.client.post(search_url, data=data, headers=headers)
 
-            if response.status_code == 200:
-                response_pages = response.json()
-                for page in response_pages:
-                    response_url.append(page.get("url"))
-                accuracy_result = calculate_accuracy(response_url, expected_url)
-                time_taken = round(response.elapsed.total_seconds()*1000,3)
-                expected_page = json_data.copy()
-                del expected_page['question']
-                del expected_page['answer']
-                global_test_data[file_name] = {
-                    "question": question,
-                    "expected_page": expected_page,
-                    "response_pages": response_pages,
-                    "position": accuracy_result.position,
-                    "total_pages": accuracy_result.total_pages,
-                    "accuracy": accuracy_result.score,
-                    "time": time_taken,
-                    "top": self.top,
-                }
+        if response.status_code == 200:
+            response_pages = response.json()
+            for page in response_pages:
+                response_url.append(page.get("url"))
+            accuracy_result = calculate_accuracy(response_url, expected_url)
+            time_taken = round(response.elapsed.total_seconds()*1000,3)
+            expected_page = json_data.copy()
+            del expected_page['question']
+            del expected_page['answer']
+            global_test_data[file_name] = {
+                "question": question,
+                "expected_page": expected_page,
+                "response_pages": response_pages,
+                "position": accuracy_result.position,
+                "total_pages": accuracy_result.total_pages,
+                "accuracy": accuracy_result.score,
+                "time": time_taken,
+                "top": self.top,
+            }
 
     def on_start(self):
         self.qna_reader = JSONReader(self.path)
@@ -110,6 +101,6 @@ def quit(**_kwargs):
     print("Starting bing search test")
     update_dict_bing_data(global_test_data)
     if settings.get("format") == "md":
-        save_to_markdown(global_test_data, "azure")
+        save_to_markdown(global_test_data,  settings.get("engine"))
     elif settings.get("format") == "csv":
         save_to_csv(global_test_data, settings.get("engine"))
diff --git a/requirements.txt b/requirements.txt
@@ -5,3 +5,4 @@ azure-cognitiveservices-search-websearch
 msrest
 openpyxl
 natsort
+joblib
-Original file line number
+Diff line change
@@ Expand Up / @@ -5,3 +5,4 @@ azure-cognitiveservices-search-websearch @@
     msrest
     openpyxl
     natsort
+    joblib