feat(playground): plumb through tools on spans to playground (#5203)

* fix(playground): make input /output content scrollable * plumb through span tools to playground * add tests * fix comment * fix python test
Arize-ai · Oct 28, 2024 · be1a103 · be1a103
1 parent 7f41824
commit be1a103
Show file tree

Hide file tree

Showing 16 changed files with 262 additions and 1,528 deletions.
diff --git a/app/src/pages/playground/Playground.tsx b/app/src/pages/playground/Playground.tsx
@@ -142,10 +142,8 @@ const playgroundPromptPanelContentCSS = css`
 `;
 
 const playgroundInputOutputPanelContentCSS = css`
-  display: flex;
-  flex-direction: column;
   height: 100%;
-  flex: 1 1 auto;
+  overflow: auto;
 `;
 
 function PlaygroundContent() {
@@ -205,7 +203,7 @@ function PlaygroundContent() {
                 ) : null
               }
             >
-              <View padding="size-200">
+              <View padding="size-200" height={"100%"}>
                 <PlaygroundInput />
               </View>
             </AccordionItem>

diff --git a/app/src/pages/playground/__tests__/fixtures.ts b/app/src/pages/playground/__tests__/fixtures.ts
@@ -44,3 +44,40 @@ export const spanAttributesWithInputMessages = {
   },
   openinference: { span: { kind: "LLM" } },
 } as const;
+
+export const testSpanToolCall = {
+  tool_call: {
+    id: "1",
+    function: {
+      name: "functionName",
+      arguments: JSON.stringify({ arg1: "value1" }),
+    },
+  },
+};
+
+export const expectedTestToolCall = {
+  id: "1",
+  function: {
+    name: "functionName",
+    arguments: JSON.stringify({ arg1: "value1" }),
+  },
+};
+
+export const testSpanToolJsonSchema = {
+  type: "function",
+  function: {
+    name: "get_weather",
+    parameters: {
+      type: "object",
+      properties: {
+        city: { type: "string" },
+      },
+    },
+  },
+};
+
+export const testSpanTool = {
+  tool: {
+    json_schema: JSON.stringify(testSpanToolJsonSchema),
+  },
+};
diff --git a/app/src/pages/playground/__tests__/playgroundUtils.test.ts b/app/src/pages/playground/__tests__/playgroundUtils.test.ts
@@ -13,6 +13,7 @@ import {
   OUTPUT_MESSAGES_PARSING_ERROR,
   OUTPUT_VALUE_PARSING_ERROR,
   SPAN_ATTRIBUTES_PARSING_ERROR,
+  TOOLS_PARSING_ERROR,
 } from "../constants";
 import {
   extractVariablesFromInstances,
@@ -21,13 +22,18 @@ import {
   getModelProviderFromModelName,
   getOutputFromAttributes,
   getTemplateMessagesFromAttributes,
+  getToolsFromAttributes,
   processAttributeToolCalls,
   transformSpanAttributesToPlaygroundInstance,
 } from "../playgroundUtils";
 
 import {
   basePlaygroundSpan,
+  expectedTestToolCall,
   spanAttributesWithInputMessages,
+  testSpanTool,
+  testSpanToolCall,
+  testSpanToolJsonSchema,
 } from "./fixtures";
 
 const baseTestPlaygroundInstance: PlaygroundInstance = {
@@ -200,7 +206,7 @@ describe("transformSpanAttributesToPlaygroundInstance", () => {
     });
   });
 
-  it("should normalize message roles in input and output messages", () => {
+  it("should normalize message roles, content, and toolCalls in input and output messages", () => {
     const span = {
       ...basePlaygroundSpan,
       attributes: JSON.stringify({
@@ -211,6 +217,7 @@ describe("transformSpanAttributesToPlaygroundInstance", () => {
               message: {
                 role: "human",
                 content: "You are a chatbot",
+                tool_calls: [testSpanToolCall],
               },
             },
           ],
@@ -240,6 +247,7 @@ describe("transformSpanAttributesToPlaygroundInstance", () => {
               id: 2,
               role: "user",
               content: "You are a chatbot",
+              toolCalls: [expectedTestToolCall],
             },
           ],
         },
@@ -249,6 +257,53 @@ describe("transformSpanAttributesToPlaygroundInstance", () => {
     });
   });
 
+  it("should correctly parse llm.tools", () => {
+    const span = {
+      ...basePlaygroundSpan,
+      attributes: JSON.stringify({
+        llm: {
+          model_name: "gpt-4o",
+          tools: [testSpanTool],
+          input_messages: [
+            { message: { content: "You are a chatbot", role: "system" } },
+            {
+              message: {
+                role: "human",
+                content: "hello?",
+              },
+            },
+          ],
+          output_messages: [
+            {
+              message: {
+                role: "assistant",
+                content: "This is an AI Answer",
+              },
+            },
+          ],
+        },
+      }),
+    };
+    expect(transformSpanAttributesToPlaygroundInstance(span)).toEqual({
+      playgroundInstance: {
+        ...expectedPlaygroundInstanceWithIO,
+        model: {
+          ...expectedPlaygroundInstanceWithIO.model,
+          provider: "OPENAI",
+          modelName: "gpt-4o",
+        },
+        tools: [
+          {
+            id: expect.any(Number),
+            definition: testSpanToolJsonSchema,
+          },
+        ],
+        output: [{ id: 4, content: "This is an AI Answer", role: "ai" }],
+      },
+      parsingErrors: [],
+    });
+  });
+
   it("should correctly parse the model name and infer the provider", () => {
     const openAiAttributes = JSON.stringify({
       ...spanAttributesWithInputMessages,
@@ -453,23 +508,6 @@ describe("getModelProviderFromModelName", () => {
   });
 });
 
-const testSpanToolCall = {
-  tool_call: {
-    id: "1",
-    function: {
-      name: "functionName",
-      arguments: JSON.stringify({ arg1: "value1" }),
-    },
-  },
-};
-
-const expectedTestToolCall = {
-  id: "1",
-  function: {
-    name: "functionName",
-    arguments: JSON.stringify({ arg1: "value1" }),
-  },
-};
 describe("processAttributeToolCalls", () => {
   it("should transform tool calls correctly", () => {
     const toolCalls = [testSpanToolCall];
@@ -708,3 +746,41 @@ describe("extractVariablesFromInstances", () => {
     ).toEqual(["name", "age"]);
   });
 });
+
+describe("getToolsFromAttributes", () => {
+  it("should return tools and no parsing errors if tools are valid", () => {
+    const parsedAttributes = {
+      llm: {
+        tools: [testSpanTool],
+      },
+    };
+    const result = getToolsFromAttributes(parsedAttributes);
+    expect(result).toEqual({
+      tools: [
+        {
+          id: expect.any(Number),
+          definition: testSpanToolJsonSchema,
+        },
+      ],
+      parsingErrors: [],
+    });
+  });
+
+  it("should return null tools and parsing errors if tools are invalid", () => {
+    const parsedAttributes = { llm: { tools: "invalid" } };
+    const result = getToolsFromAttributes(parsedAttributes);
+    expect(result).toEqual({
+      tools: null,
+      parsingErrors: [TOOLS_PARSING_ERROR],
+    });
+  });
+
+  it("should return null tools and no parsing errors if tools are not present", () => {
+    const parsedAttributes = { llm: {} };
+    const result = getToolsFromAttributes(parsedAttributes);
+    expect(result).toEqual({
+      tools: null,
+      parsingErrors: [],
+    });
+  });
+});
diff --git a/app/src/pages/playground/constants.tsx b/app/src/pages/playground/constants.tsx
@@ -26,6 +26,9 @@ export const MODEL_CONFIG_PARSING_ERROR =
   "Unable to parse model config, expected llm.model_name to be present.";
 export const MODEL_CONFIG_WITH_INVOCATION_PARAMETERS_PARSING_ERROR =
   "Unable to parse model config, expected llm.invocation_parameters json string to be present.";
+// TODO(parker / apowell) - adjust this error message with anthropic support https://github.com/Arize-ai/phoenix/issues/5100
+export const TOOLS_PARSING_ERROR =
+  "Unable to parse tools, expected tools to be an array of valid OpenAI tools.";
 
 export const modelProviderToModelPrefixMap: Record<ModelProvider, string[]> = {
   AZURE_OPENAI: [],

diff --git a/app/src/pages/playground/playgroundUtils.ts b/app/src/pages/playground/playgroundUtils.ts
@@ -4,11 +4,14 @@ import {
   DEFAULT_CHAT_ROLE,
   DEFAULT_MODEL_PROVIDER,
 } from "@phoenix/constants/generativeConstants";
-import { ModelConfig, PlaygroundInstance } from "@phoenix/store";
 import {
   ChatMessage,
   createPlaygroundInstance,
   generateMessageId,
+  generateToolId,
+  ModelConfig,
+  OpenAITool,
+  PlaygroundInstance,
 } from "@phoenix/store";
 import { assertUnreachable } from "@phoenix/typeUtils";
 import { safelyParseJSON } from "@phoenix/utils/jsonUtils";
@@ -22,12 +25,15 @@ import {
   OUTPUT_MESSAGES_PARSING_ERROR,
   OUTPUT_VALUE_PARSING_ERROR,
   SPAN_ATTRIBUTES_PARSING_ERROR,
+  TOOLS_PARSING_ERROR,
 } from "./constants";
 import {
   chatMessageRolesSchema,
   chatMessagesSchema,
   llmInputMessageSchema,
   llmOutputMessageSchema,
+  LlmToolSchema,
+  llmToolSchema,
   MessageSchema,
   modelConfigSchema,
   modelConfigWithInvocationParametersSchema,
@@ -229,6 +235,49 @@ export function getModelConfigFromAttributes(parsedAttributes: unknown): {
   return { modelConfig: null, parsingErrors: [MODEL_CONFIG_PARSING_ERROR] };
 }
 
+/**
+ * Processes the tools from the span attributes into OpenAI tools to be used in the playground
+ * @param tools tools from the span attributes
+ * @returns playground OpenAI tools
+ */
+function processAttributeTools(tools: LlmToolSchema): OpenAITool[] {
+  return (tools?.llm?.tools ?? [])
+    .map((tool) => {
+      if (tool?.tool == null) {
+        return null;
+      }
+      return {
+        id: generateToolId(),
+        definition: tool.tool.json_schema,
+      };
+    })
+    .filter((tool): tool is NonNullable<typeof tool> => tool != null);
+}
+
+/**
+ * Attempts to get llm.tools from the span attributes.
+ * @param parsedAttributes the JSON parsed span attributes
+ * @returns the tools from the span attributes
+ *
+ * NB: Only exported for testing
+ */
+export function getToolsFromAttributes(
+  parsedAttributes: unknown
+):
+  | { tools: OpenAITool[]; parsingErrors: never[] }
+  | { tools: null; parsingErrors: string[] } {
+  const { data, success } = llmToolSchema.safeParse(parsedAttributes);
+
+  if (!success) {
+    return { tools: null, parsingErrors: [TOOLS_PARSING_ERROR] };
+  }
+  // If there are no tools or llm attributes, we don't want to return parsing errors, it just means the span didn't have tools
+  if (data?.llm?.tools == null) {
+    return { tools: null, parsingErrors: [] };
+  }
+  return { tools: processAttributeTools(data), parsingErrors: [] };
+}
+
 /**
  * Takes a  {@link PlaygroundSpan|Span} and attempts to transform it's attributes into various fields on a {@link PlaygroundInstance}.
  * @param span the {@link PlaygroundSpan|Span} to transform into a playground instance
@@ -267,7 +316,10 @@ export function transformSpanAttributesToPlaygroundInstance(
   const { modelConfig, parsingErrors: modelConfigParsingErrors } =
     getModelConfigFromAttributes(parsedAttributes);
 
-  // TODO(parker): add support for tools, variables, and input / output variants
+  const { tools, parsingErrors: toolsParsingErrors } =
+    getToolsFromAttributes(parsedAttributes);
+
+  // TODO(parker): add support for prompt template variables
   // https://github.com/Arize-ai/phoenix/issues/4886
   return {
     playgroundInstance: {
@@ -282,11 +334,13 @@ export function transformSpanAttributesToPlaygroundInstance(
           : basePlaygroundInstance.template,
       output,
       spanId: span.id,
+      tools: tools ?? basePlaygroundInstance.tools,
     },
     parsingErrors: [
       ...messageParsingErrors,
       ...outputParsingErrors,
       ...modelConfigParsingErrors,
+      ...toolsParsingErrors,
     ],
   };
 }