From 2140fa0ad3bb058685bd7d209c35fec442abadc8 Mon Sep 17 00:00:00 2001 From: Peli de Halleux Date: Mon, 21 Apr 2025 04:07:24 +0000 Subject: [PATCH 1/2] adding fetch tool --- .vscode/settings.json | 5 + docs/genaisrc/tsconfig.json | 5 +- docs/src/components/BuiltinTools.mdx | 1 + .../content/docs/reference/scripts/system.mdx | 106 +++++++++++++++++- eval/extrism/genaisrc/tsconfig.json | 5 +- genaisrc/tsconfig.json | 5 +- packages/auto/tsconfig.json | 5 +- packages/cli/genaisrc/system.fetch.genai.mts | 90 +++++++++++++++ packages/core/bundleprompts.js | 3 +- packages/core/src/fetchtext.ts | 4 +- packages/core/src/types/prompt_template.d.ts | 2 +- packages/core/src/vars.ts | 21 +++- packages/sample/genaisrc/blog/tsconfig.json | 5 +- .../sample/genaisrc/fetch-tools.genai.mts | 11 ++ packages/sample/genaisrc/tsconfig.json | 5 +- packages/vscode/genaisrc/tsconfig.json | 5 +- 16 files changed, 247 insertions(+), 31 deletions(-) create mode 100644 packages/cli/genaisrc/system.fetch.genai.mts create mode 100644 packages/sample/genaisrc/fetch-tools.genai.mts diff --git a/.vscode/settings.json b/.vscode/settings.json index 22e62cc893..855ca6f5db 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -65,6 +65,7 @@ "echomodel", "emojify", "Entra", + "evalprompt", "Evals", "execa", "extname", @@ -92,6 +93,7 @@ "htmlescape", "huggingface", "icontains", + "importprompt", "jaegertracing", "Jamba", "JSONLLM", @@ -163,12 +165,14 @@ "previ", "PRICINGS", "priompt", + "promptcontext", "promptdom", "promptfoo", "promptfooconfig", "promptjson", "promptrunner", "prompty", + "proxify", "pyodide", "quoteify", "qwen", @@ -210,6 +214,7 @@ "tvly", "typecheck", "unfence", + "unmarkdown", "unthink", "unwrappers", "urllib", diff --git a/docs/genaisrc/tsconfig.json b/docs/genaisrc/tsconfig.json index c092f18586..0abdb26cc2 100644 --- a/docs/genaisrc/tsconfig.json +++ b/docs/genaisrc/tsconfig.json @@ -17,9 +17,8 @@ "erasableSyntaxOnly": true }, "include": [ - "*.mjs", - "*.mts", - "src/*.mts", + "**/*.mjs", + "**/*.mts", "./genaiscript.d.ts" ] } \ No newline at end of file diff --git a/docs/src/components/BuiltinTools.mdx b/docs/src/components/BuiltinTools.mdx index dd26159003..d5edc9ccef 100644 --- a/docs/src/components/BuiltinTools.mdx +++ b/docs/src/components/BuiltinTools.mdx @@ -6,6 +6,7 @@ import { LinkCard } from '@astrojs/starlight/components'; ### Builtin tools + diff --git a/docs/src/content/docs/reference/scripts/system.mdx b/docs/src/content/docs/reference/scripts/system.mdx index 3f4dbb8e1d..faf23c2dbc 100644 --- a/docs/src/content/docs/reference/scripts/system.mdx +++ b/docs/src/content/docs/reference/scripts/system.mdx @@ -60,8 +60,7 @@ system({ ..., parameters: { model: { type: "string", - description: "LLM model to use", - default: "gpt-35-turbo", + description: "LLM model to use" }, }, }) @@ -1184,6 +1183,109 @@ export default function (ctx: ChatGenerationContext) { ````` +### `system.fetch` + +A tool that can fetch data from a URL + + + +- tool `fetch`: Fetch data from a URL from allowed domains. + +`````js wrap title="system.fetch" +system({ + title: "A tool that can fetch data from a URL", + parameters: { + domains: { + type: "array", + items: { + type: "string", + description: "A list of allowed domains to fetch data from.", + }, + }, + }, +}) + +export default function (ctx: ChatGenerationContext) { + const { defTool, env } = ctx + + const dbg = host.logger(`system:fetch`) + const domains = env.vars["system.fetch.domains"] || [] + dbg(`allowed domains: %o`, domains) + + defTool( + "fetch", + "Fetch data from a URL from allowed domains.", + { + url: { + type: "string", + description: "The URL to fetch data from.", + required: true, + }, + convert: { + type: "string", + description: "Converts HTML to Markdown or plain text.", + required: false, + enum: ["markdown", "text"], + }, + ask: { + type: "string", + description: + "A LLM query to process and summarize the data content.", + required: false, + }, + }, + async ({ context, ...args }) => { + const { url, convert, ask } = args as { + url: string + convert: FetchTextOptions["convert"] + ask: string + } + const method = "GET" + const uri = new URL(url) + const domain = uri.hostname + if (!domains.includes(domain)) + return `error: domain ${domain} is not allowed.` + + dbg(`${method} ${url}`) + const res = await host.fetchText(url, { convert }) + dbg(`response: %d`, res.status) + if (!res.ok) return `error: ${res.status}` + if (!res.text) return res.file + + let result = res.text + if (ask) { + if (!convert) result = await HTML.convertToMarkdown(result) + const resAsk = await runPrompt( + (_) => { + const askVar = _.def("QUESTION", ask) + const contentVar = _.def("CONTENT", result) + _.$`Analyze the content of ${contentVar} and generate a respond for the question in ${askVar}. + Your response is the output of a LLM tool. + - Use information from ${contentVar} exclusively to answer. + - If you cannot find the information in ${contentVar}, respond with 'I do not have enough information to answer the question.'`.role( + "system" + ) + }, + { + model: "summarize", + responseType: "text", + systemSafety: true, + label: `asking fetched data`, + } + ) + if (!resAsk.error) result = resAsk.text + } + return result + }, + { + detectPromptInjection: "available", + } + ) +} + +````` + + ### `system.files` File generation diff --git a/eval/extrism/genaisrc/tsconfig.json b/eval/extrism/genaisrc/tsconfig.json index c092f18586..0abdb26cc2 100644 --- a/eval/extrism/genaisrc/tsconfig.json +++ b/eval/extrism/genaisrc/tsconfig.json @@ -17,9 +17,8 @@ "erasableSyntaxOnly": true }, "include": [ - "*.mjs", - "*.mts", - "src/*.mts", + "**/*.mjs", + "**/*.mts", "./genaiscript.d.ts" ] } \ No newline at end of file diff --git a/genaisrc/tsconfig.json b/genaisrc/tsconfig.json index c092f18586..0abdb26cc2 100644 --- a/genaisrc/tsconfig.json +++ b/genaisrc/tsconfig.json @@ -17,9 +17,8 @@ "erasableSyntaxOnly": true }, "include": [ - "*.mjs", - "*.mts", - "src/*.mts", + "**/*.mjs", + "**/*.mts", "./genaiscript.d.ts" ] } \ No newline at end of file diff --git a/packages/auto/tsconfig.json b/packages/auto/tsconfig.json index c092f18586..0abdb26cc2 100644 --- a/packages/auto/tsconfig.json +++ b/packages/auto/tsconfig.json @@ -17,9 +17,8 @@ "erasableSyntaxOnly": true }, "include": [ - "*.mjs", - "*.mts", - "src/*.mts", + "**/*.mjs", + "**/*.mts", "./genaiscript.d.ts" ] } \ No newline at end of file diff --git a/packages/cli/genaisrc/system.fetch.genai.mts b/packages/cli/genaisrc/system.fetch.genai.mts new file mode 100644 index 0000000000..112c6aa346 --- /dev/null +++ b/packages/cli/genaisrc/system.fetch.genai.mts @@ -0,0 +1,90 @@ +system({ + title: "A tool that can fetch data from a URL", + parameters: { + domains: { + type: "array", + items: { + type: "string", + description: "A list of allowed domains to fetch data from.", + }, + }, + }, +}) + +export default function (ctx: ChatGenerationContext) { + const { defTool, env } = ctx + + const dbg = host.logger(`system:fetch`) + const domains = env.vars["system.fetch.domains"] || [] + dbg(`allowed domains: %o`, domains) + + defTool( + "fetch", + "Fetch data from a URL from allowed domains.", + { + url: { + type: "string", + description: "The URL to fetch data from.", + required: true, + }, + convert: { + type: "string", + description: "Converts HTML to Markdown or plain text.", + required: false, + enum: ["markdown", "text"], + }, + ask: { + type: "string", + description: + "A LLM query to process and summarize the data content.", + required: false, + }, + }, + async ({ context, ...args }) => { + const { url, convert, ask } = args as { + url: string + convert: FetchTextOptions["convert"] + ask: string + } + const method = "GET" + const uri = new URL(url) + const domain = uri.hostname + if (!domains.includes(domain)) + return `error: domain ${domain} is not allowed.` + + dbg(`${method} ${url}`) + const res = await host.fetchText(url, { convert }) + dbg(`response: %d`, res.status) + if (!res.ok) return `error: ${res.status}` + if (!res.text) return res.file + + let result = res.text + if (ask) { + if (!convert) result = await HTML.convertToMarkdown(result) + const resAsk = await runPrompt( + (_) => { + const askVar = _.def("QUESTION", ask) + const contentVar = _.def("CONTENT", result) + _.$`Analyze the content of ${contentVar} and generate a respond for the question in ${askVar}. + Your response is the output of a LLM tool. + - Use information from ${contentVar} exclusively to answer. + - If you cannot find the information in ${contentVar}, respond with 'I do not have enough information to answer the question.'`.role( + "system" + ) + }, + { + model: "summarize", + responseType: "text", + systemSafety: true, + label: `asking fetched data`, + } + ) + if (!resAsk.error) result = resAsk.text + } + return result + }, + { + detectPromptInjection: "available", + } + ) +} diff --git a/packages/core/bundleprompts.js b/packages/core/bundleprompts.js index 1421274328..a007988739 100644 --- a/packages/core/bundleprompts.js +++ b/packages/core/bundleprompts.js @@ -218,8 +218,7 @@ system({ ..., parameters: { model: { type: "string", - description: "LLM model to use", - default: "gpt-35-turbo", + description: "LLM model to use" }, }, }) diff --git a/packages/core/src/fetchtext.ts b/packages/core/src/fetchtext.ts index 86a65eed12..68a69a7069 100644 --- a/packages/core/src/fetchtext.ts +++ b/packages/core/src/fetchtext.ts @@ -9,7 +9,7 @@ import { deleteUndefinedValues } from "./cleaners" import { prettyBytes } from "./pretty" import debug from "debug" import { uriRedact } from "./url" -import { HTMLToMarkdown, HTMLToText } from "./html" +import { HTMLTablesToJSON, HTMLToMarkdown, HTMLToText } from "./html" import { createFetch } from "./fetch" const dbg = debug("genaiscript:fetch:text") @@ -110,6 +110,8 @@ export async function fetchText( }) else if (convert === "text") content = await HTMLToText(content, { trace, cancellationToken }) + else if (convert === "tables") + content = JSON.stringify(await HTMLTablesToJSON(content)) } ok = true const file: WorkspaceFile = deleteUndefinedValues({ diff --git a/packages/core/src/types/prompt_template.d.ts b/packages/core/src/types/prompt_template.d.ts index c69a81b18b..bc0f3d2f3f 100644 --- a/packages/core/src/types/prompt_template.d.ts +++ b/packages/core/src/types/prompt_template.d.ts @@ -5695,7 +5695,7 @@ type FetchOptions = RequestInit & { } type FetchTextOptions = Omit & { - convert?: "markdown" | "text" + convert?: "markdown" | "text" | "tables" } interface PythonRuntimeOptions { diff --git a/packages/core/src/vars.ts b/packages/core/src/vars.ts index e7437be924..8a672c304f 100644 --- a/packages/core/src/vars.ts +++ b/packages/core/src/vars.ts @@ -8,6 +8,11 @@ import { promptParameterTypeToJSONSchema, } from "./parameters" import { normalizeFloat, normalizeInt, normalizeVarKey } from "./cleaners" +import { genaiscriptDebug } from "./debug" +const dbg = genaiscriptDebug("vars") +const dbgSchema = dbg.extend("schema") +const dbgSystem = dbg.extend("system") +dbgSchema.enabled = false /** * Resolves and generates a JSON schema object representing the parameters schema @@ -28,7 +33,7 @@ export function resolveScriptParametersSchema( properties: {}, } const schema = promptParametersSchemaToJSONSchema(script.parameters) - res.properties["script"] = schema + if (schema) res.properties["script"] = schema for (const system of resolveSystems(prj, script) .map((s) => resolveScript(prj, s)) .filter((t) => t?.parameters)) { @@ -38,6 +43,7 @@ export function resolveScriptParametersSchema( ) }) } + dbgSchema(`%s: %O`, script.id, res.properties) return res } @@ -131,6 +137,8 @@ export function parsePromptParameters( delete res[key] } } + + dbg(`%s: %O`, script.id, res) return Object.freeze(res) } @@ -145,7 +153,7 @@ export function parsePromptParameters( * - Keys are normalized using `normalizeVarKey`. * - The proxy supports fetching keys, enumerating own keys, and retrieving property descriptors. * - The `Object.prototype.toString` method is overridden to return a YAML stringified version - * of the proxified parameters. + * of the proxify-ed parameters. * - The proxy allows access to parameter values using normalized keys. */ export function proxifyEnvVars(res: PromptParameters) { @@ -203,7 +211,10 @@ export function mergeEnvVarsWithSystem( system: SystemPromptInstance ): ExpansionVariables { const { parameters, vars } = system - if (!parameters && !vars) return ev + if (!parameters && !vars) { + dbgSystem(`%s: no vars`, system.id) + return ev + } const { vars: envVars, ...rest } = ev const parameterVars = Object.fromEntries( @@ -214,7 +225,9 @@ export function mergeEnvVarsWithSystem( ) const newVars = { ...envVars, ...parameterVars, ...(vars || {}) } - return { vars: newVars, ...rest } + const res = { vars: newVars, ...rest } + dbgSystem(`%s: %O`, system.id, res.vars) + return res } /** diff --git a/packages/sample/genaisrc/blog/tsconfig.json b/packages/sample/genaisrc/blog/tsconfig.json index c092f18586..0abdb26cc2 100644 --- a/packages/sample/genaisrc/blog/tsconfig.json +++ b/packages/sample/genaisrc/blog/tsconfig.json @@ -17,9 +17,8 @@ "erasableSyntaxOnly": true }, "include": [ - "*.mjs", - "*.mts", - "src/*.mts", + "**/*.mjs", + "**/*.mts", "./genaiscript.d.ts" ] } \ No newline at end of file diff --git a/packages/sample/genaisrc/fetch-tools.genai.mts b/packages/sample/genaisrc/fetch-tools.genai.mts new file mode 100644 index 0000000000..a04cea3ab1 --- /dev/null +++ b/packages/sample/genaisrc/fetch-tools.genai.mts @@ -0,0 +1,11 @@ +script({ + system: [ + "system", + { + id: "system.fetch", + parameters: { domains: ["azure.microsoft.com"] }, + }, + ], +}) + +$`Summarize the LLM prices at https://azure.microsoft.com/en-us/pricing/details/cognitive-services/openai-service` diff --git a/packages/sample/genaisrc/tsconfig.json b/packages/sample/genaisrc/tsconfig.json index c092f18586..0abdb26cc2 100644 --- a/packages/sample/genaisrc/tsconfig.json +++ b/packages/sample/genaisrc/tsconfig.json @@ -17,9 +17,8 @@ "erasableSyntaxOnly": true }, "include": [ - "*.mjs", - "*.mts", - "src/*.mts", + "**/*.mjs", + "**/*.mts", "./genaiscript.d.ts" ] } \ No newline at end of file diff --git a/packages/vscode/genaisrc/tsconfig.json b/packages/vscode/genaisrc/tsconfig.json index c092f18586..0abdb26cc2 100644 --- a/packages/vscode/genaisrc/tsconfig.json +++ b/packages/vscode/genaisrc/tsconfig.json @@ -17,9 +17,8 @@ "erasableSyntaxOnly": true }, "include": [ - "*.mjs", - "*.mts", - "src/*.mts", + "**/*.mjs", + "**/*.mts", "./genaiscript.d.ts" ] } \ No newline at end of file From ce0140ed517d7a9ee72638b28d2a4c9096f39bb9 Mon Sep 17 00:00:00 2001 From: Peli de Halleux Date: Mon, 21 Apr 2025 04:20:08 +0000 Subject: [PATCH 2/2] basic fetch working --- .../content/docs/reference/scripts/system.mdx | 37 +++++-------------- packages/cli/genaisrc/system.fetch.genai.mts | 37 +++++-------------- packages/core/src/chatrenderterminal.ts | 19 ++++++---- .../sample/genaisrc/fetch-tools.genai.mts | 3 +- 4 files changed, 33 insertions(+), 63 deletions(-) diff --git a/docs/src/content/docs/reference/scripts/system.mdx b/docs/src/content/docs/reference/scripts/system.mdx index faf23c2dbc..625635238c 100644 --- a/docs/src/content/docs/reference/scripts/system.mdx +++ b/docs/src/content/docs/reference/scripts/system.mdx @@ -1227,18 +1227,17 @@ export default function (ctx: ChatGenerationContext) { required: false, enum: ["markdown", "text"], }, - ask: { + skipToContent: { type: "string", - description: - "A LLM query to process and summarize the data content.", + description: "Skip to a specific string in the content.", required: false, }, }, async ({ context, ...args }) => { - const { url, convert, ask } = args as { + const { url, convert, skipToContent } = args as { url: string convert: FetchTextOptions["convert"] - ask: string + skipToContent: string } const method = "GET" const uri = new URL(url) @@ -1250,30 +1249,14 @@ export default function (ctx: ChatGenerationContext) { const res = await host.fetchText(url, { convert }) dbg(`response: %d`, res.status) if (!res.ok) return `error: ${res.status}` - if (!res.text) return res.file + if (!res.text) return res.file ?? res.status let result = res.text - if (ask) { - if (!convert) result = await HTML.convertToMarkdown(result) - const resAsk = await runPrompt( - (_) => { - const askVar = _.def("QUESTION", ask) - const contentVar = _.def("CONTENT", result) - _.$`Analyze the content of ${contentVar} and generate a respond for the question in ${askVar}. - Your response is the output of a LLM tool. - - Use information from ${contentVar} exclusively to answer. - - If you cannot find the information in ${contentVar}, respond with 'I do not have enough information to answer the question.'`.role( - "system" - ) - }, - { - model: "summarize", - responseType: "text", - systemSafety: true, - label: `asking fetched data`, - } - ) - if (!resAsk.error) result = resAsk.text + if (skipToContent) { + const index = result.indexOf(skipToContent) + if (index === -1) + return `error: skipTo '${skipToContent}' not found.` + result = result.slice(index + skipToContent.length) } return result }, diff --git a/packages/cli/genaisrc/system.fetch.genai.mts b/packages/cli/genaisrc/system.fetch.genai.mts index 112c6aa346..07bd5ce3a3 100644 --- a/packages/cli/genaisrc/system.fetch.genai.mts +++ b/packages/cli/genaisrc/system.fetch.genai.mts @@ -33,18 +33,17 @@ export default function (ctx: ChatGenerationContext) { required: false, enum: ["markdown", "text"], }, - ask: { + skipToContent: { type: "string", - description: - "A LLM query to process and summarize the data content.", + description: "Skip to a specific string in the content.", required: false, }, }, async ({ context, ...args }) => { - const { url, convert, ask } = args as { + const { url, convert, skipToContent } = args as { url: string convert: FetchTextOptions["convert"] - ask: string + skipToContent: string } const method = "GET" const uri = new URL(url) @@ -56,30 +55,14 @@ export default function (ctx: ChatGenerationContext) { const res = await host.fetchText(url, { convert }) dbg(`response: %d`, res.status) if (!res.ok) return `error: ${res.status}` - if (!res.text) return res.file + if (!res.text) return res.file ?? res.status let result = res.text - if (ask) { - if (!convert) result = await HTML.convertToMarkdown(result) - const resAsk = await runPrompt( - (_) => { - const askVar = _.def("QUESTION", ask) - const contentVar = _.def("CONTENT", result) - _.$`Analyze the content of ${contentVar} and generate a respond for the question in ${askVar}. - Your response is the output of a LLM tool. - - Use information from ${contentVar} exclusively to answer. - - If you cannot find the information in ${contentVar}, respond with 'I do not have enough information to answer the question.'`.role( - "system" - ) - }, - { - model: "summarize", - responseType: "text", - systemSafety: true, - label: `asking fetched data`, - } - ) - if (!resAsk.error) result = resAsk.text + if (skipToContent) { + const index = result.indexOf(skipToContent) + if (index === -1) + return `error: skipTo '${skipToContent}' not found.` + result = result.slice(index + skipToContent.length) } return result }, diff --git a/packages/core/src/chatrenderterminal.ts b/packages/core/src/chatrenderterminal.ts index cb797c2398..2f99f11b7c 100644 --- a/packages/core/src/chatrenderterminal.ts +++ b/packages/core/src/chatrenderterminal.ts @@ -30,14 +30,17 @@ const dbg = genaiscriptDebug("chat:render") function renderTrimmed(s: string, rows: number, width: number) { const lines = s.split(/\n/g).filter((l) => !!l) - const head = Math.min(rows >> 1, lines.length - 1) - const tail = rows - head - const trimmed = lines.slice(0, head) - if (tail) { - const hidden = lines.length - head - tail - if (hidden === 1) trimmed.push(lines.at(-tail - 1)) - else if (hidden > 0) trimmed.push(`... (${hidden} lines)`) - trimmed.push(...lines.slice(-tail)) + let trimmed = lines.slice(0) + if (lines.length > rows) { + const head = Math.min(rows >> 1, lines.length - 1) + const tail = rows - head + trimmed = lines.slice(0, head) + if (tail) { + const hidden = lines.length - head - tail + if (hidden === 1) trimmed.push(lines.at(-tail - 1)) + else if (hidden > 0) trimmed.push(`... (${hidden} lines)`) + trimmed.push(...lines.slice(-tail)) + } } const res = trimmed.map((l) => wrapColor(CONSOLE_COLOR_DEBUG, "│" + ellipse(l, width) + "\n") diff --git a/packages/sample/genaisrc/fetch-tools.genai.mts b/packages/sample/genaisrc/fetch-tools.genai.mts index a04cea3ab1..14253b29ce 100644 --- a/packages/sample/genaisrc/fetch-tools.genai.mts +++ b/packages/sample/genaisrc/fetch-tools.genai.mts @@ -8,4 +8,5 @@ script({ ], }) -$`Summarize the LLM prices at https://azure.microsoft.com/en-us/pricing/details/cognitive-services/openai-service` +$`Generate a pricing table of the LLM prices at https://azure.microsoft.com/en-us/pricing/details/cognitive-services/openai-service . +Use today's prices.`