Fix and improve offline chat actor, director tests

- Use updated references schema with compiled key - Enable director tests that are now expected to pass and that do pass (with Gemma 2 at least)
khoj-ai · Jul 17, 2024 · 08956a4 · 08956a4
1 parent d36250f
commit 08956a4
Show file tree

Hide file tree

Showing 2 changed files with 51 additions and 34 deletions.
diff --git a/tests/test_offline_chat_actors.py b/tests/test_offline_chat_actors.py
@@ -286,7 +286,7 @@ def test_answer_from_chat_history_and_currently_retrieved_content(loaded_model):
     # Act
     response_gen = converse_offline(
         references=[
-            "Testatron was born on 1st April 1984 in Testville."
+            {"compiled": "Testatron was born on 1st April 1984 in Testville."}
         ],  # Assume context retrieved from notes for the user_query
         user_query="Where was I born?",
         conversation_log=populate_chat_history(message_list),
@@ -341,14 +341,22 @@ def test_answer_requires_current_date_awareness(loaded_model):
     "Chat actor should be able to answer questions relative to current date using provided notes"
     # Arrange
     context = [
-        f"""{datetime.now().strftime("%Y-%m-%d")} "Naco Taco" "Tacos for Dinner"
-Expenses:Food:Dining  10.00 USD""",
-        f"""{datetime.now().strftime("%Y-%m-%d")} "Sagar Ratna" "Dosa for Lunch"
-Expenses:Food:Dining  10.00 USD""",
-        f"""2020-04-01 "SuperMercado" "Bananas"
-Expenses:Food:Groceries  10.00 USD""",
-        f"""2020-01-01 "Naco Taco" "Burittos for Dinner"
-Expenses:Food:Dining  10.00 USD""",
+        {
+            "compiled": f"""{datetime.now().strftime("%Y-%m-%d")} "Naco Taco" "Tacos for Dinner"
+Expenses:Food:Dining  10.00 USD"""
+        },
+        {
+            "compiled": f"""{datetime.now().strftime("%Y-%m-%d")} "Sagar Ratna" "Dosa for Lunch"
+Expenses:Food:Dining  10.00 USD"""
+        },
+        {
+            "compiled": f"""2020-04-01 "SuperMercado" "Bananas"
+Expenses:Food:Groceries  10.00 USD"""
+        },
+        {
+            "compiled": f"""2020-01-01 "Naco Taco" "Burittos for Dinner"
+Expenses:Food:Dining  10.00 USD"""
+        },
     ]
 
     # Act
@@ -373,14 +381,22 @@ def test_answer_requires_date_aware_aggregation_across_provided_notes(loaded_mod
     "Chat actor should be able to answer questions that require date aware aggregation across multiple notes"
     # Arrange
     context = [
-        f"""# {datetime.now().strftime("%Y-%m-%d")} "Naco Taco" "Tacos for Dinner"
-Expenses:Food:Dining  10.00 USD""",
-        f"""{datetime.now().strftime("%Y-%m-%d")} "Sagar Ratna" "Dosa for Lunch"
-Expenses:Food:Dining  10.00 USD""",
-        f"""2020-04-01 "SuperMercado" "Bananas"
-Expenses:Food:Groceries  10.00 USD""",
-        f"""2020-01-01 "Naco Taco" "Burittos for Dinner"
-Expenses:Food:Dining  10.00 USD""",
+        {
+            "compiled": f"""# {datetime.now().strftime("%Y-%m-%d")} "Naco Taco" "Tacos for Dinner"
+Expenses:Food:Dining  10.00 USD"""
+        },
+        {
+            "compiled": f"""{datetime.now().strftime("%Y-%m-%d")} "Sagar Ratna" "Dosa for Lunch"
+Expenses:Food:Dining  10.00 USD"""
+        },
+        {
+            "compiled": f"""2020-04-01 "SuperMercado" "Bananas"
+Expenses:Food:Groceries  10.00 USD"""
+        },
+        {
+            "compiled": f"""2020-01-01 "Naco Taco" "Burittos for Dinner"
+Expenses:Food:Dining  10.00 USD"""
+        },
     ]
 
     # Act
@@ -430,12 +446,18 @@ def test_ask_for_clarification_if_not_enough_context_in_question(loaded_model):
     "Chat actor should ask for clarification if question cannot be answered unambiguously with the provided context"
     # Arrange
     context = [
-        f"""# Ramya
-My sister, Ramya, is married to Kali Devi. They have 2 kids, Ravi and Rani.""",
-        f"""# Fang
-My sister, Fang Liu is married to Xi Li. They have 1 kid, Xiao Li.""",
-        f"""# Aiyla
-My sister, Aiyla is married to Tolga. They have 3 kids, Yildiz, Ali and Ahmet.""",
+        {
+            "compiled": f"""# Ramya
+My sister, Ramya, is married to Kali Devi. They have 2 kids, Ravi and Rani."""
+        },
+        {
+            "compiled": f"""# Fang
+My sister, Fang Liu is married to Xi Li. They have 1 kid, Xiao Li."""
+        },
+        {
+            "compiled": f"""# Aiyla
+My sister, Aiyla is married to Tolga. They have 3 kids, Yildiz, Ali and Ahmet."""
+        },
     ]
 
     # Act
@@ -459,9 +481,9 @@ def test_agent_prompt_should_be_used(loaded_model, offline_agent):
     "Chat actor should ask be tuned to think like an accountant based on the agent definition"
     # Arrange
     context = [
-        f"""I went to the store and bought some bananas for 2.20""",
-        f"""I went to the store and bought some apples for 1.30""",
-        f"""I went to the store and bought some oranges for 6.00""",
+        {"compiled": f"""I went to the store and bought some bananas for 2.20"""},
+        {"compiled": f"""I went to the store and bought some apples for 1.30"""},
+        {"compiled": f"""I went to the store and bought some oranges for 6.00"""},
     ]
 
     # Act
@@ -499,7 +521,7 @@ def test_chat_does_not_exceed_prompt_size(loaded_model):
     "Ensure chat context and response together do not exceed max prompt size for the model"
     # Arrange
     prompt_size_exceeded_error = "ERROR: The prompt size exceeds the context window size and cannot be processed"
-    context = [" ".join([f"{number}" for number in range(2043)])]
+    context = [{"compiled": " ".join([f"{number}" for number in range(2043)])}]
 
     # Act
     response_gen = converse_offline(
@@ -530,15 +552,15 @@ def test_filter_questions():
 # ----------------------------------------------------------------------------------------------------
 @pytest.mark.anyio
 @pytest.mark.django_db(transaction=True)
-async def test_use_default_response_mode(client_offline_chat):
+async def test_use_text_response_mode(client_offline_chat):
     # Arrange
     user_query = "What's the latest in the Israel/Palestine conflict?"
 
     # Act
     mode = await aget_relevant_output_modes(user_query, {})
 
     # Assert
-    assert mode.value == "default"
+    assert mode.value == "text"
 
 
 # ----------------------------------------------------------------------------------------------------

diff --git a/tests/test_offline_chat_director.py b/tests/test_offline_chat_director.py
@@ -45,7 +45,6 @@ def create_conversation(message_list, user, agent=None):
 
 # Tests
 # ----------------------------------------------------------------------------------------------------
-@pytest.mark.xfail(AssertionError, reason="Chat director not capable of answering this question yet")
 @pytest.mark.chatquality
 @pytest.mark.django_db(transaction=True)
 def test_offline_chat_with_no_chat_history_or_retrieved_content(client_offline_chat):
@@ -179,10 +178,6 @@ def test_answer_from_chat_history_and_previously_retrieved_content(client_offlin
 
 
 # ----------------------------------------------------------------------------------------------------
-@pytest.mark.xfail(
-    AssertionError,
-    reason="Chat director not capable of answering this question yet because it requires extract_questions",
-)
 @pytest.mark.chatquality
 @pytest.mark.django_db(transaction=True)
 def test_answer_from_chat_history_and_currently_retrieved_content(client_offline_chat, default_user2):