From 69188bfc1fe1b914d137d92222aabcc05be82f4a Mon Sep 17 00:00:00 2001 From: Mark Sturdevant Date: Tue, 26 Nov 2024 19:47:45 -0800 Subject: [PATCH 1/3] feat: add python code tool over http * Keep the existing PythonTool over gRPC for now and add a similar tool to go over HTTP when we switch. * Add tests (written for gRPC but adapted for the new one). * The gRPC/HTTP versions coexist for now while our consumers (e.g. bee-api) make the switch. Signed-off-by: Mark Sturdevant --- .env.template | 2 +- src/tools/python/python_http.test.ts | 75 ++++++++ src/tools/python/python_http.ts | 212 +++++++++++++++++++++ test_dir/test_file.txt | 3 + tests/e2e/tools/python/python_http.test.ts | 71 +++++++ 5 files changed, 362 insertions(+), 1 deletion(-) create mode 100644 src/tools/python/python_http.test.ts create mode 100644 src/tools/python/python_http.ts create mode 100644 test_dir/test_file.txt create mode 100644 tests/e2e/tools/python/python_http.test.ts diff --git a/.env.template b/.env.template index 6ed8e403..b060cdac 100644 --- a/.env.template +++ b/.env.template @@ -50,7 +50,7 @@ BEE_FRAMEWORK_LOG_SINGLE_LINE="false" # GCP_VERTEXAI_LOCATION="" # Tools -# CODE_INTERPRETER_URL="http://127.0.0.1:50051" +# CODE_INTERPRETER_URL="http://127.0.0.1:50081" # For Google Search Tool # GOOGLE_API_KEY="your-google-api-key" diff --git a/src/tools/python/python_http.test.ts b/src/tools/python/python_http.test.ts new file mode 100644 index 00000000..3d64086d --- /dev/null +++ b/src/tools/python/python_http.test.ts @@ -0,0 +1,75 @@ +/** + * Copyright 2024 IBM Corp. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import { describe, it, expect } from "vitest"; +import { PythonHttpTool } from "@/tools/python/python_http.js"; +import { verifyDeserialization } from "@tests/e2e/utils.js"; +import { LocalPythonStorage } from "@/tools/python/storage.js"; + +const codeInterpreterUrl = process.env.CODE_INTERPRETER_URL || "http://localhost:50081"; + +const getPythonTool = () => + new PythonHttpTool({ + codeInterpreter: { url: codeInterpreterUrl }, + storage: new LocalPythonStorage({ + interpreterWorkingDir: "/tmp/code-interpreter-storage", + localWorkingDir: "./test_dir/", + }), + }); + +describe("PythonHttpTool", () => { + it("Is the expected tool", () => { + const tool = getPythonTool(); + expect(tool).toBeInstanceOf(PythonHttpTool); + expect(PythonHttpTool.isTool(tool)).toBe(true); + expect(tool.name).toBe("Python"); + expect(tool.description).toMatch("Run Python and/or shell code"); + }); + + it("Throws input validation error for wrong language", async () => { + await expect( + getPythonTool().run({ + // @ts-ignore + language: "PL/1", + + code: "# won't get this far because we don't support PL/1 yet", + inputFiles: [], + }), + ).rejects.toThrow("The received tool input does not match the expected schema."); + }); + + it("Throws input validation error for missing file", async () => { + const sourceCode = ` + with open("test_file.txt", 'r') as f: + print(f.read()) + `; + + await expect( + getPythonTool().run({ + language: "python", + code: sourceCode, + inputFiles: ["bogus_file.txt"], + }), + ).rejects.toThrow("The received tool input does not match the expected schema."); + }); + + it("serializes", async () => { + const tool = getPythonTool(); + const serialized = tool.serialize(); + const deserializedTool = PythonHttpTool.fromSerialized(serialized); + verifyDeserialization(tool, deserializedTool); + }); +}); diff --git a/src/tools/python/python_http.ts b/src/tools/python/python_http.ts new file mode 100644 index 00000000..8e73c293 --- /dev/null +++ b/src/tools/python/python_http.ts @@ -0,0 +1,212 @@ +/** + * Copyright 2024 IBM Corp. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import { BaseToolOptions, BaseToolRunOptions, Tool, ToolError, ToolInput } from "@/tools/base.js"; +import { z } from "zod"; +import { BaseLLMOutput } from "@/llms/base.js"; +import { LLM } from "@/llms/llm.js"; +import { PromptTemplate } from "@/template.js"; +import { filter, isIncludedIn, isTruthy, map, pipe, unique, uniqueBy } from "remeda"; +import { PythonFile, PythonStorage } from "@/tools/python/storage.js"; +import { PythonToolOutput } from "@/tools/python/output.js"; +import { ValidationError } from "ajv"; +import { ConnectionOptions } from "node:tls"; +import { RunContext } from "@/context.js"; +import { hasMinLength } from "@/internals/helpers/array.js"; + +export interface CodeInterpreterOptions { + url: string; + connectionOptions?: ConnectionOptions; +} + +export interface PythonToolOptions extends BaseToolOptions { + codeInterpreter: CodeInterpreterOptions; + executorId?: string; + preprocess?: { + llm: LLM; + promptTemplate: PromptTemplate.infer<{ input: string }>; + }; + storage: PythonStorage; +} + +export class PythonHttpTool extends Tool { + name = "Python"; + description = [ + "Run Python and/or shell code and return the console output. Use for isolated calculations, computations, data or file manipulation.", + "Files provided by the user, or created in a previous run, will be accesible if and only if they are specified in the input. It is necessary to always print() results.", + "The following shell commands are available:", + "Use ffmpeg to convert videos.", + "Use yt-dlp to download videos, and unless specified otherwise use `-S vcodec:h264,res,acodec:m4a` for video and `-x --audio-format mp3` for audio-only.", + "Use pandoc to convert documents between formats (like MD, DOC, DOCX, PDF) -- and don't forget that you can create PDFs by writing markdown and then converting.", + "In Python, the following modules are available:", + "Use numpy, pandas, scipy and sympy for working with data.", + "Use matplotlib to plot charts.", + "Use pillow (import PIL) to create and manipulate images.", + "Use moviepy for complex manipulations with videos.", + "Use PyPDF2, pikepdf, or fitz to manipulate PDFs.", + "Use pdf2image to convert PDF to images.", + "Other Python libraries are also available -- however, prefer using the ones above.", + "Prefer using qualified imports -- `import library; library.thing()` instead of `import thing from library`.", + "Do not attempt to install libraries manually -- it will not work.", + "Each invocation of Python runs in a completely fresh VM -- it will not remember anything from before.", + "Do not use this tool multiple times in a row, always write the full code you want to run in a single invocation.", + ].join(" "); + + public readonly storage: PythonStorage; + protected files: PythonFile[] = []; + + async inputSchema() { + this.files = await this.storage.list(); + const fileNames = unique(map(this.files, ({ filename }) => filename)); + return z.object({ + language: z.enum(["python", "shell"]).describe("Use shell for ffmpeg, pandoc, yt-dlp"), + code: z.string().describe("full source code file that will be executed"), + ...(hasMinLength(fileNames, 1) + ? { + inputFiles: z + .array(z.enum(fileNames)) + .describe( + "To access an existing file, you must specify it; otherwise, the file will not be accessible. IMPORTANT: If the file is not provided in the input, it will not be accessible.", + ), + } + : {}), + }); + } + + protected readonly preprocess; + + public constructor(options: PythonToolOptions) { + super(options); + if (!options.codeInterpreter.url) { + throw new ValidationError([ + { + message: "Property must be a valid URL!", + data: options, + propertyName: "codeInterpreter.url", + }, + ]); + } + this.preprocess = options.preprocess; + this.storage = options.storage; + } + + static { + this.register(); + } + + protected async _run( + input: ToolInput, + _options: BaseToolRunOptions | undefined, + run: RunContext, + ) { + const inputFiles = await pipe( + this.files ?? (await this.storage.list()), + uniqueBy((f) => f.filename), + filter((file) => isIncludedIn(file.filename, (input.inputFiles ?? []) as string[])), + (files) => this.storage.upload(files), + ); + + // replace relative paths in "files" with absolute paths by prepending "/workspace" + const getSourceCode = async () => { + if (this.preprocess) { + const { llm, promptTemplate } = this.preprocess; + const response = await llm.generate(promptTemplate.render({ input: input.code }), { + signal: run.signal, + stream: false, + }); + return response.getTextContent().trim(); + } + return input.code; + }; + + const prefix = "/workspace/"; + + let response; + const httpUrl = this.options.codeInterpreter.url + "/v1/execute"; + try { + response = await fetch(httpUrl, { + method: "POST", + headers: { + "Accept": "application/json", + "Content-Type": "application/json", + }, + body: JSON.stringify({ + source_code: await getSourceCode(), + executorId: this.options.executorId ?? "default", + files: Object.fromEntries( + inputFiles.map((file) => [`${prefix}${file.filename}`, file.pythonId]), + ), + }), + }); + } catch (error) { + if (error.cause.name == "HTTPParserError") { + throw new ToolError("Python tool over HTTP failed -- not using HTTP endpoint!", [error]); + } else { + throw new ToolError("Python tool over HTTP failed!", [error]); + } + } + + if (!response?.ok) { + throw new ToolError("HTTP request failed!", [new Error(await response.text())]); + } + + const result = await response.json(); + + // replace absolute paths in "files" with relative paths by removing "/workspace/" + // skip files that are not in "/workspace" + // skip entries that are also entries in filesInput + const filesOutput = await this.storage.download( + // @ts-ignore + Object.entries(result.files) + .map(([k, v]) => { + const file = { path: k, pythonId: v }; + if (!file.path.startsWith(prefix)) { + return; + } + + const filename = file.path.slice(prefix.length); + if ( + inputFiles.some( + (input) => input.filename === filename && input.pythonId === file.pythonId, + ) + ) { + return; + } + + return { + pythonId: file.pythonId, + filename, + }; + }) + .filter(isTruthy), + ); + + return new PythonToolOutput(result.stdout, result.stderr, result.exit_code, filesOutput); + } + + createSnapshot() { + return { + ...super.createSnapshot(), + files: this.files, + storage: this.storage, + preprocess: this.preprocess, + }; + } + + loadSnapshot(snapshot: ReturnType): void { + super.loadSnapshot(snapshot); + } +} diff --git a/test_dir/test_file.txt b/test_dir/test_file.txt new file mode 100644 index 00000000..f2b37bd5 --- /dev/null +++ b/test_dir/test_file.txt @@ -0,0 +1,3 @@ +This is a test. +This is only a test. + diff --git a/tests/e2e/tools/python/python_http.test.ts b/tests/e2e/tools/python/python_http.test.ts new file mode 100644 index 00000000..f18eef4f --- /dev/null +++ b/tests/e2e/tools/python/python_http.test.ts @@ -0,0 +1,71 @@ +/** + * Copyright 2024 IBM Corp. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import { expect } from "vitest"; +import { PythonHttpTool } from "@/tools/python/python_http.js"; +import { LocalPythonStorage } from "@/tools/python/storage.js"; + +import { ToolError } from "@/tools/base.js"; + +const codeInterpreterUrl = process.env.CODE_INTERPRETER_URL || "http://localhost:50081"; + +const getPythonTool = () => + new PythonHttpTool({ + codeInterpreter: { url: codeInterpreterUrl }, + storage: new LocalPythonStorage({ + interpreterWorkingDir: "/tmp/code-interpreter-storage", + localWorkingDir: "./test_dir/", + }), + }); + +describe("PythonTool", () => { + it("Returns zero exitCode and stdout results", async () => { + const result = await getPythonTool().run({ + language: "python", + code: "print('hello')", + inputFiles: [], + }); + + expect(result.exitCode).toBe(0); + expect(result.stdout).toBe("hello\n"); + }); + + it("Returns non-zero exitCode and stderr for bad python", async () => { + const result = await getPythonTool().run({ + language: "python", + code: "PUT LIST (((ARR(I,J) DO I = 1 TO 5) DO J = 1 TO 5))", + inputFiles: [], + }); + + expect(result.exitCode).toBe(1); + expect(result.stderr).toMatch("SyntaxError"); + }); + + it("Throws tool error for code exceptions", async () => { + const sourceCode = ` + with open("wrong_file_here.txt", 'r') as f: + print(f.read()) + `; + + await expect( + getPythonTool().run({ + language: "python", + code: sourceCode, + inputFiles: ["test_file.txt"], + }), + ).rejects.toThrowError(ToolError); + }); +}); From 8d45d6e744acc3f447fc04d4da51793a608d5119 Mon Sep 17 00:00:00 2001 From: Mark Sturdevant Date: Fri, 13 Dec 2024 15:20:27 -0800 Subject: [PATCH 2/3] fix: update python http tool after rebase Signed-off-by: Mark Sturdevant --- src/tools/python/python_http.ts | 21 +++++++++++++++++---- 1 file changed, 17 insertions(+), 4 deletions(-) diff --git a/src/tools/python/python_http.ts b/src/tools/python/python_http.ts index 8e73c293..7dee4329 100644 --- a/src/tools/python/python_http.ts +++ b/src/tools/python/python_http.ts @@ -14,7 +14,14 @@ * limitations under the License. */ -import { BaseToolOptions, BaseToolRunOptions, Tool, ToolError, ToolInput } from "@/tools/base.js"; +import { + BaseToolOptions, + BaseToolRunOptions, + ToolEmitter, + Tool, + ToolError, + ToolInput, +} from "@/tools/base.js"; import { z } from "zod"; import { BaseLLMOutput } from "@/llms/base.js"; import { LLM } from "@/llms/llm.js"; @@ -26,6 +33,7 @@ import { ValidationError } from "ajv"; import { ConnectionOptions } from "node:tls"; import { RunContext } from "@/context.js"; import { hasMinLength } from "@/internals/helpers/array.js"; +import { Emitter } from "@/emitter/emitter.js"; export interface CodeInterpreterOptions { url: string; @@ -45,8 +53,8 @@ export interface PythonToolOptions extends BaseToolOptions { export class PythonHttpTool extends Tool { name = "Python"; description = [ - "Run Python and/or shell code and return the console output. Use for isolated calculations, computations, data or file manipulation.", - "Files provided by the user, or created in a previous run, will be accesible if and only if they are specified in the input. It is necessary to always print() results.", + "Run Python and/or shell code and return the console output. Use for isolated calculations, computations, data or file manipulation but still prefer assistant's capabilities (IMPORTANT: Do not use for text analysis or summarization).", + "Files provided by the user, or created in a previous run, will be accessible if and only if they are specified in the input. It is necessary to always print() results.", "The following shell commands are available:", "Use ffmpeg to convert videos.", "Use yt-dlp to download videos, and unless specified otherwise use `-S vcodec:h264,res,acodec:m4a` for video and `-x --audio-format mp3` for audio-only.", @@ -68,6 +76,11 @@ export class PythonHttpTool extends Tool { public readonly storage: PythonStorage; protected files: PythonFile[] = []; + public readonly emitter: ToolEmitter, PythonToolOutput> = Emitter.root.child({ + namespace: ["tool", "python"], + creator: this, + }); + async inputSchema() { this.files = await this.storage.list(); const fileNames = unique(map(this.files, ({ filename }) => filename)); @@ -109,7 +122,7 @@ export class PythonHttpTool extends Tool { protected async _run( input: ToolInput, - _options: BaseToolRunOptions | undefined, + _options: Partial, run: RunContext, ) { const inputFiles = await pipe( From 0143054a496df3c6c6a24b7edfe90600dcc72718 Mon Sep 17 00:00:00 2001 From: Mark Sturdevant Date: Fri, 13 Dec 2024 15:34:58 -0800 Subject: [PATCH 3/3] feat!: use python code tool over http * Modify the existing PythonTool over gRPC to go over HTTP. * Add tests (written for gRPC but adapted for the new one). * Requires CODE_INTERPRETER_URL env var to point to exposed HTTP port (50081). BREAKING CHANGE: Requires exposed port and updated CODE_INTERPRETER_URL. Signed-off-by: Mark Sturdevant --- .../{python_http.test.ts => python.test.ts} | 12 +- src/tools/python/python.ts | 70 +++--- src/tools/python/python_http.ts | 225 ------------------ .../{python_http.test.ts => python.test.ts} | 4 +- 4 files changed, 49 insertions(+), 262 deletions(-) rename src/tools/python/{python_http.test.ts => python.test.ts} (88%) delete mode 100644 src/tools/python/python_http.ts rename tests/e2e/tools/python/{python_http.test.ts => python.test.ts} (95%) diff --git a/src/tools/python/python_http.test.ts b/src/tools/python/python.test.ts similarity index 88% rename from src/tools/python/python_http.test.ts rename to src/tools/python/python.test.ts index 3d64086d..05b9c4c9 100644 --- a/src/tools/python/python_http.test.ts +++ b/src/tools/python/python.test.ts @@ -15,14 +15,14 @@ */ import { describe, it, expect } from "vitest"; -import { PythonHttpTool } from "@/tools/python/python_http.js"; +import { PythonTool } from "@/tools/python/python.js"; import { verifyDeserialization } from "@tests/e2e/utils.js"; import { LocalPythonStorage } from "@/tools/python/storage.js"; const codeInterpreterUrl = process.env.CODE_INTERPRETER_URL || "http://localhost:50081"; const getPythonTool = () => - new PythonHttpTool({ + new PythonTool({ codeInterpreter: { url: codeInterpreterUrl }, storage: new LocalPythonStorage({ interpreterWorkingDir: "/tmp/code-interpreter-storage", @@ -30,11 +30,11 @@ const getPythonTool = () => }), }); -describe("PythonHttpTool", () => { +describe("PythonTool", () => { it("Is the expected tool", () => { const tool = getPythonTool(); - expect(tool).toBeInstanceOf(PythonHttpTool); - expect(PythonHttpTool.isTool(tool)).toBe(true); + expect(tool).toBeInstanceOf(PythonTool); + expect(PythonTool.isTool(tool)).toBe(true); expect(tool.name).toBe("Python"); expect(tool.description).toMatch("Run Python and/or shell code"); }); @@ -69,7 +69,7 @@ describe("PythonHttpTool", () => { it("serializes", async () => { const tool = getPythonTool(); const serialized = tool.serialize(); - const deserializedTool = PythonHttpTool.fromSerialized(serialized); + const deserializedTool = PythonTool.fromSerialized(serialized); verifyDeserialization(tool, deserializedTool); }); }); diff --git a/src/tools/python/python.ts b/src/tools/python/python.ts index 2969e160..d8cc16dd 100644 --- a/src/tools/python/python.ts +++ b/src/tools/python/python.ts @@ -14,10 +14,14 @@ * limitations under the License. */ -import { BaseToolOptions, BaseToolRunOptions, ToolEmitter, Tool, ToolInput } from "@/tools/base.js"; -import { createGrpcTransport } from "@connectrpc/connect-node"; -import { PromiseClient, createPromiseClient } from "@connectrpc/connect"; -import { CodeInterpreterService } from "bee-proto/code_interpreter/v1/code_interpreter_service_connect"; +import { + BaseToolOptions, + BaseToolRunOptions, + ToolEmitter, + Tool, + ToolError, + ToolInput, +} from "@/tools/base.js"; import { z } from "zod"; import { BaseLLMOutput } from "@/llms/base.js"; import { LLM } from "@/llms/llm.js"; @@ -95,7 +99,6 @@ export class PythonTool extends Tool { }); } - protected readonly client: PromiseClient; protected readonly preprocess; public constructor(options: PythonToolOptions) { @@ -109,7 +112,6 @@ export class PythonTool extends Tool { }, ]); } - this.client = this._createClient(); this.preprocess = options.preprocess; this.storage = options.storage; } @@ -118,17 +120,6 @@ export class PythonTool extends Tool { this.register(); } - protected _createClient(): PromiseClient { - return createPromiseClient( - CodeInterpreterService, - createGrpcTransport({ - baseUrl: this.options.codeInterpreter.url, - httpVersion: "2", - nodeOptions: this.options.codeInterpreter.connectionOptions, - }), - ); - } - protected async _run( input: ToolInput, _options: Partial, @@ -156,21 +147,42 @@ export class PythonTool extends Tool { const prefix = "/workspace/"; - const result = await this.client.execute( - { - sourceCode: await getSourceCode(), - executorId: this.options.executorId ?? "default", - files: Object.fromEntries( - inputFiles.map((file) => [`${prefix}${file.filename}`, file.pythonId]), - ), - }, - { signal: run.signal }, - ); + let response; + const httpUrl = this.options.codeInterpreter.url + "/v1/execute"; + try { + response = await fetch(httpUrl, { + method: "POST", + headers: { + "Accept": "application/json", + "Content-Type": "application/json", + }, + body: JSON.stringify({ + source_code: await getSourceCode(), + executorId: this.options.executorId ?? "default", + files: Object.fromEntries( + inputFiles.map((file) => [`${prefix}${file.filename}`, file.pythonId]), + ), + }), + }); + } catch (error) { + if (error.cause.name == "HTTPParserError") { + throw new ToolError("Python tool over HTTP failed -- not using HTTP endpoint!", [error]); + } else { + throw new ToolError("Python tool over HTTP failed!", [error]); + } + } + + if (!response?.ok) { + throw new ToolError("HTTP request failed!", [new Error(await response.text())]); + } + + const result = await response.json(); // replace absolute paths in "files" with relative paths by removing "/workspace/" // skip files that are not in "/workspace" // skip entries that are also entries in filesInput const filesOutput = await this.storage.download( + // @ts-ignore Object.entries(result.files) .map(([k, v]) => { const file = { path: k, pythonId: v }; @@ -194,12 +206,13 @@ export class PythonTool extends Tool { }) .filter(isTruthy), ); - return new PythonToolOutput(result.stdout, result.stderr, result.exitCode, filesOutput); + return new PythonToolOutput(result.stdout, result.stderr, result.exit_code, filesOutput); } createSnapshot() { return { ...super.createSnapshot(), + files: this.files, storage: this.storage, preprocess: this.preprocess, }; @@ -207,6 +220,5 @@ export class PythonTool extends Tool { loadSnapshot(snapshot: ReturnType): void { super.loadSnapshot(snapshot); - Object.assign(this, { client: this._createClient() }); } } diff --git a/src/tools/python/python_http.ts b/src/tools/python/python_http.ts deleted file mode 100644 index 7dee4329..00000000 --- a/src/tools/python/python_http.ts +++ /dev/null @@ -1,225 +0,0 @@ -/** - * Copyright 2024 IBM Corp. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import { - BaseToolOptions, - BaseToolRunOptions, - ToolEmitter, - Tool, - ToolError, - ToolInput, -} from "@/tools/base.js"; -import { z } from "zod"; -import { BaseLLMOutput } from "@/llms/base.js"; -import { LLM } from "@/llms/llm.js"; -import { PromptTemplate } from "@/template.js"; -import { filter, isIncludedIn, isTruthy, map, pipe, unique, uniqueBy } from "remeda"; -import { PythonFile, PythonStorage } from "@/tools/python/storage.js"; -import { PythonToolOutput } from "@/tools/python/output.js"; -import { ValidationError } from "ajv"; -import { ConnectionOptions } from "node:tls"; -import { RunContext } from "@/context.js"; -import { hasMinLength } from "@/internals/helpers/array.js"; -import { Emitter } from "@/emitter/emitter.js"; - -export interface CodeInterpreterOptions { - url: string; - connectionOptions?: ConnectionOptions; -} - -export interface PythonToolOptions extends BaseToolOptions { - codeInterpreter: CodeInterpreterOptions; - executorId?: string; - preprocess?: { - llm: LLM; - promptTemplate: PromptTemplate.infer<{ input: string }>; - }; - storage: PythonStorage; -} - -export class PythonHttpTool extends Tool { - name = "Python"; - description = [ - "Run Python and/or shell code and return the console output. Use for isolated calculations, computations, data or file manipulation but still prefer assistant's capabilities (IMPORTANT: Do not use for text analysis or summarization).", - "Files provided by the user, or created in a previous run, will be accessible if and only if they are specified in the input. It is necessary to always print() results.", - "The following shell commands are available:", - "Use ffmpeg to convert videos.", - "Use yt-dlp to download videos, and unless specified otherwise use `-S vcodec:h264,res,acodec:m4a` for video and `-x --audio-format mp3` for audio-only.", - "Use pandoc to convert documents between formats (like MD, DOC, DOCX, PDF) -- and don't forget that you can create PDFs by writing markdown and then converting.", - "In Python, the following modules are available:", - "Use numpy, pandas, scipy and sympy for working with data.", - "Use matplotlib to plot charts.", - "Use pillow (import PIL) to create and manipulate images.", - "Use moviepy for complex manipulations with videos.", - "Use PyPDF2, pikepdf, or fitz to manipulate PDFs.", - "Use pdf2image to convert PDF to images.", - "Other Python libraries are also available -- however, prefer using the ones above.", - "Prefer using qualified imports -- `import library; library.thing()` instead of `import thing from library`.", - "Do not attempt to install libraries manually -- it will not work.", - "Each invocation of Python runs in a completely fresh VM -- it will not remember anything from before.", - "Do not use this tool multiple times in a row, always write the full code you want to run in a single invocation.", - ].join(" "); - - public readonly storage: PythonStorage; - protected files: PythonFile[] = []; - - public readonly emitter: ToolEmitter, PythonToolOutput> = Emitter.root.child({ - namespace: ["tool", "python"], - creator: this, - }); - - async inputSchema() { - this.files = await this.storage.list(); - const fileNames = unique(map(this.files, ({ filename }) => filename)); - return z.object({ - language: z.enum(["python", "shell"]).describe("Use shell for ffmpeg, pandoc, yt-dlp"), - code: z.string().describe("full source code file that will be executed"), - ...(hasMinLength(fileNames, 1) - ? { - inputFiles: z - .array(z.enum(fileNames)) - .describe( - "To access an existing file, you must specify it; otherwise, the file will not be accessible. IMPORTANT: If the file is not provided in the input, it will not be accessible.", - ), - } - : {}), - }); - } - - protected readonly preprocess; - - public constructor(options: PythonToolOptions) { - super(options); - if (!options.codeInterpreter.url) { - throw new ValidationError([ - { - message: "Property must be a valid URL!", - data: options, - propertyName: "codeInterpreter.url", - }, - ]); - } - this.preprocess = options.preprocess; - this.storage = options.storage; - } - - static { - this.register(); - } - - protected async _run( - input: ToolInput, - _options: Partial, - run: RunContext, - ) { - const inputFiles = await pipe( - this.files ?? (await this.storage.list()), - uniqueBy((f) => f.filename), - filter((file) => isIncludedIn(file.filename, (input.inputFiles ?? []) as string[])), - (files) => this.storage.upload(files), - ); - - // replace relative paths in "files" with absolute paths by prepending "/workspace" - const getSourceCode = async () => { - if (this.preprocess) { - const { llm, promptTemplate } = this.preprocess; - const response = await llm.generate(promptTemplate.render({ input: input.code }), { - signal: run.signal, - stream: false, - }); - return response.getTextContent().trim(); - } - return input.code; - }; - - const prefix = "/workspace/"; - - let response; - const httpUrl = this.options.codeInterpreter.url + "/v1/execute"; - try { - response = await fetch(httpUrl, { - method: "POST", - headers: { - "Accept": "application/json", - "Content-Type": "application/json", - }, - body: JSON.stringify({ - source_code: await getSourceCode(), - executorId: this.options.executorId ?? "default", - files: Object.fromEntries( - inputFiles.map((file) => [`${prefix}${file.filename}`, file.pythonId]), - ), - }), - }); - } catch (error) { - if (error.cause.name == "HTTPParserError") { - throw new ToolError("Python tool over HTTP failed -- not using HTTP endpoint!", [error]); - } else { - throw new ToolError("Python tool over HTTP failed!", [error]); - } - } - - if (!response?.ok) { - throw new ToolError("HTTP request failed!", [new Error(await response.text())]); - } - - const result = await response.json(); - - // replace absolute paths in "files" with relative paths by removing "/workspace/" - // skip files that are not in "/workspace" - // skip entries that are also entries in filesInput - const filesOutput = await this.storage.download( - // @ts-ignore - Object.entries(result.files) - .map(([k, v]) => { - const file = { path: k, pythonId: v }; - if (!file.path.startsWith(prefix)) { - return; - } - - const filename = file.path.slice(prefix.length); - if ( - inputFiles.some( - (input) => input.filename === filename && input.pythonId === file.pythonId, - ) - ) { - return; - } - - return { - pythonId: file.pythonId, - filename, - }; - }) - .filter(isTruthy), - ); - - return new PythonToolOutput(result.stdout, result.stderr, result.exit_code, filesOutput); - } - - createSnapshot() { - return { - ...super.createSnapshot(), - files: this.files, - storage: this.storage, - preprocess: this.preprocess, - }; - } - - loadSnapshot(snapshot: ReturnType): void { - super.loadSnapshot(snapshot); - } -} diff --git a/tests/e2e/tools/python/python_http.test.ts b/tests/e2e/tools/python/python.test.ts similarity index 95% rename from tests/e2e/tools/python/python_http.test.ts rename to tests/e2e/tools/python/python.test.ts index f18eef4f..838dc0da 100644 --- a/tests/e2e/tools/python/python_http.test.ts +++ b/tests/e2e/tools/python/python.test.ts @@ -15,7 +15,7 @@ */ import { expect } from "vitest"; -import { PythonHttpTool } from "@/tools/python/python_http.js"; +import { PythonTool } from "@/tools/python/python.js"; import { LocalPythonStorage } from "@/tools/python/storage.js"; import { ToolError } from "@/tools/base.js"; @@ -23,7 +23,7 @@ import { ToolError } from "@/tools/base.js"; const codeInterpreterUrl = process.env.CODE_INTERPRETER_URL || "http://localhost:50081"; const getPythonTool = () => - new PythonHttpTool({ + new PythonTool({ codeInterpreter: { url: codeInterpreterUrl }, storage: new LocalPythonStorage({ interpreterWorkingDir: "/tmp/code-interpreter-storage",