From 97e4ec5b1a075a5c0c27db8e368b56ab5533cc13 Mon Sep 17 00:00:00 2001
From: Shreya Shankar <ss.shankar505@gmail.com>
Date: Thu, 3 Oct 2024 00:10:50 -0700
Subject: [PATCH 1/2] feat: print out LLM message history and tools when
 there's an InvalidOutputError

---
 docetl/operations/utils.py | 34 +++++++++++++++++++++++++++++-----
 1 file changed, 29 insertions(+), 5 deletions(-)

diff --git a/docetl/operations/utils.py b/docetl/operations/utils.py
index 157e1e14..cf06b13e 100644
--- a/docetl/operations/utils.py
+++ b/docetl/operations/utils.py
@@ -449,16 +449,34 @@ class InvalidOutputError(Exception):
     Attributes:
         message (str): Explanation of the error.
         output (str): The invalid output that caused the exception.
+        expected_schema (Dict[str, Any]): The expected schema for the output.
+        messages (List[Dict[str, str]]): The messages sent to the LLM.
+        tools (Optional[List[Dict[str, str]]]): The tools passed to the LLM.
     """
 
-    def __init__(self, message: str, output: str, expected_schema: Dict[str, Any]):
+    def __init__(
+        self,
+        message: str,
+        output: str,
+        expected_schema: Dict[str, Any],
+        messages: List[Dict[str, str]],
+        tools: Optional[List[Dict[str, str]]] = None,
+    ):
         self.message = message
         self.output = output
         self.expected_schema = expected_schema
+        self.messages = messages
+        self.tools = tools
         super().__init__(self.message)
 
     def __str__(self):
-        return f"{self.message}\nInvalid output: {self.output}\nExpected schema: {self.expected_schema}"
+        return (
+            f"{self.message}\n"
+            f"Invalid output: {self.output}\n"
+            f"Expected schema: {self.expected_schema}\n"
+            f"Messages sent to LLM: {self.messages}\n"
+            f"Tools passed to LLM: {self.tools}"
+        )
 
 
 def timeout(seconds):
@@ -707,7 +725,7 @@ def call_llm_with_gleaning(
     cost = 0.0
 
     # Parse the response
-    parsed_response = parse_llm_response(response, output_schema)
+    parsed_response = parse_llm_response(response, output_schema, messages=messages)
     output = parsed_response[0]
 
     messages = (
@@ -865,7 +883,7 @@ def parse_llm_response_helper(
         InvalidOutputError: If the response is not valid.
     """
     if not response:
-        raise InvalidOutputError("No response from LLM", [{}], schema)
+        raise InvalidOutputError("No response from LLM", [{}], schema, [], [])
 
     # Parse the response based on the provided tools
     if tools:
@@ -894,7 +912,9 @@ def parse_llm_response_helper(
             tool_calls = response.choices[0].message.tool_calls
 
             if not tool_calls:
-                raise InvalidOutputError("No tool calls in LLM response", [{}], schema)
+                raise InvalidOutputError(
+                    "No tool calls in LLM response", [{}], schema, response.choices, []
+                )
 
             outputs = []
             for tool_call in tool_calls:
@@ -920,12 +940,16 @@ def parse_llm_response_helper(
                         "Could not decode LLM JSON response",
                         [tool_call.function.arguments],
                         schema,
+                        response.choices,
+                        tools,
                     )
                 except Exception as e:
                     raise InvalidOutputError(
                         f"Error parsing LLM response: {e}",
                         [tool_call.function.arguments],
                         schema,
+                        response.choices,
+                        tools,
                     )
             return outputs
 

From ac486838b2acbc7762f74a9f87fc0cb30872afa6 Mon Sep 17 00:00:00 2001
From: Shreya Shankar <ss.shankar505@gmail.com>
Date: Thu, 3 Oct 2024 00:31:00 -0700
Subject: [PATCH 2/2] feat: print out LLM message history and tools when
 there's an InvalidOutputError

---
 docetl/operations/utils.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/docetl/operations/utils.py b/docetl/operations/utils.py
index cf06b13e..f75d171d 100644
--- a/docetl/operations/utils.py
+++ b/docetl/operations/utils.py
@@ -451,7 +451,7 @@ class InvalidOutputError(Exception):
         output (str): The invalid output that caused the exception.
         expected_schema (Dict[str, Any]): The expected schema for the output.
         messages (List[Dict[str, str]]): The messages sent to the LLM.
-        tools (Optional[List[Dict[str, str]]]): The tools passed to the LLM.
+        tools (Optional[List[Dict[str, str]]]): The tool calls generated by the LLM.
     """
 
     def __init__(
@@ -475,7 +475,7 @@ def __str__(self):
             f"Invalid output: {self.output}\n"
             f"Expected schema: {self.expected_schema}\n"
             f"Messages sent to LLM: {self.messages}\n"
-            f"Tools passed to LLM: {self.tools}"
+            f"Tool calls generated by LLM: {self.tools}"
         )