diff --git a/.vscode/settings.json b/.vscode/settings.json index 22e62cc893..855ca6f5db 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -65,6 +65,7 @@ "echomodel", "emojify", "Entra", + "evalprompt", "Evals", "execa", "extname", @@ -92,6 +93,7 @@ "htmlescape", "huggingface", "icontains", + "importprompt", "jaegertracing", "Jamba", "JSONLLM", @@ -163,12 +165,14 @@ "previ", "PRICINGS", "priompt", + "promptcontext", "promptdom", "promptfoo", "promptfooconfig", "promptjson", "promptrunner", "prompty", + "proxify", "pyodide", "quoteify", "qwen", @@ -210,6 +214,7 @@ "tvly", "typecheck", "unfence", + "unmarkdown", "unthink", "unwrappers", "urllib", diff --git a/docs/genaisrc/tsconfig.json b/docs/genaisrc/tsconfig.json index c092f18586..0abdb26cc2 100644 --- a/docs/genaisrc/tsconfig.json +++ b/docs/genaisrc/tsconfig.json @@ -17,9 +17,8 @@ "erasableSyntaxOnly": true }, "include": [ - "*.mjs", - "*.mts", - "src/*.mts", + "**/*.mjs", + "**/*.mts", "./genaiscript.d.ts" ] } \ No newline at end of file diff --git a/docs/src/components/BuiltinTools.mdx b/docs/src/components/BuiltinTools.mdx index dd26159003..d5edc9ccef 100644 --- a/docs/src/components/BuiltinTools.mdx +++ b/docs/src/components/BuiltinTools.mdx @@ -6,6 +6,7 @@ import { LinkCard } from '@astrojs/starlight/components'; ### Builtin tools + diff --git a/docs/src/content/docs/reference/scripts/system.mdx b/docs/src/content/docs/reference/scripts/system.mdx index 3f4dbb8e1d..625635238c 100644 --- a/docs/src/content/docs/reference/scripts/system.mdx +++ b/docs/src/content/docs/reference/scripts/system.mdx @@ -60,8 +60,7 @@ system({ ..., parameters: { model: { type: "string", - description: "LLM model to use", - default: "gpt-35-turbo", + description: "LLM model to use" }, }, }) @@ -1184,6 +1183,92 @@ export default function (ctx: ChatGenerationContext) { ````` +### `system.fetch` + +A tool that can fetch data from a URL + + + +- tool `fetch`: Fetch data from a URL from allowed domains. + +`````js wrap title="system.fetch" +system({ + title: "A tool that can fetch data from a URL", + parameters: { + domains: { + type: "array", + items: { + type: "string", + description: "A list of allowed domains to fetch data from.", + }, + }, + }, +}) + +export default function (ctx: ChatGenerationContext) { + const { defTool, env } = ctx + + const dbg = host.logger(`system:fetch`) + const domains = env.vars["system.fetch.domains"] || [] + dbg(`allowed domains: %o`, domains) + + defTool( + "fetch", + "Fetch data from a URL from allowed domains.", + { + url: { + type: "string", + description: "The URL to fetch data from.", + required: true, + }, + convert: { + type: "string", + description: "Converts HTML to Markdown or plain text.", + required: false, + enum: ["markdown", "text"], + }, + skipToContent: { + type: "string", + description: "Skip to a specific string in the content.", + required: false, + }, + }, + async ({ context, ...args }) => { + const { url, convert, skipToContent } = args as { + url: string + convert: FetchTextOptions["convert"] + skipToContent: string + } + const method = "GET" + const uri = new URL(url) + const domain = uri.hostname + if (!domains.includes(domain)) + return `error: domain ${domain} is not allowed.` + + dbg(`${method} ${url}`) + const res = await host.fetchText(url, { convert }) + dbg(`response: %d`, res.status) + if (!res.ok) return `error: ${res.status}` + if (!res.text) return res.file ?? res.status + + let result = res.text + if (skipToContent) { + const index = result.indexOf(skipToContent) + if (index === -1) + return `error: skipTo '${skipToContent}' not found.` + result = result.slice(index + skipToContent.length) + } + return result + }, + { + detectPromptInjection: "available", + } + ) +} + +````` + + ### `system.files` File generation diff --git a/eval/extrism/genaisrc/tsconfig.json b/eval/extrism/genaisrc/tsconfig.json index c092f18586..0abdb26cc2 100644 --- a/eval/extrism/genaisrc/tsconfig.json +++ b/eval/extrism/genaisrc/tsconfig.json @@ -17,9 +17,8 @@ "erasableSyntaxOnly": true }, "include": [ - "*.mjs", - "*.mts", - "src/*.mts", + "**/*.mjs", + "**/*.mts", "./genaiscript.d.ts" ] } \ No newline at end of file diff --git a/genaisrc/tsconfig.json b/genaisrc/tsconfig.json index c092f18586..0abdb26cc2 100644 --- a/genaisrc/tsconfig.json +++ b/genaisrc/tsconfig.json @@ -17,9 +17,8 @@ "erasableSyntaxOnly": true }, "include": [ - "*.mjs", - "*.mts", - "src/*.mts", + "**/*.mjs", + "**/*.mts", "./genaiscript.d.ts" ] } \ No newline at end of file diff --git a/packages/auto/tsconfig.json b/packages/auto/tsconfig.json index c092f18586..0abdb26cc2 100644 --- a/packages/auto/tsconfig.json +++ b/packages/auto/tsconfig.json @@ -17,9 +17,8 @@ "erasableSyntaxOnly": true }, "include": [ - "*.mjs", - "*.mts", - "src/*.mts", + "**/*.mjs", + "**/*.mts", "./genaiscript.d.ts" ] } \ No newline at end of file diff --git a/packages/cli/genaisrc/system.fetch.genai.mts b/packages/cli/genaisrc/system.fetch.genai.mts new file mode 100644 index 0000000000..07bd5ce3a3 --- /dev/null +++ b/packages/cli/genaisrc/system.fetch.genai.mts @@ -0,0 +1,73 @@ +system({ + title: "A tool that can fetch data from a URL", + parameters: { + domains: { + type: "array", + items: { + type: "string", + description: "A list of allowed domains to fetch data from.", + }, + }, + }, +}) + +export default function (ctx: ChatGenerationContext) { + const { defTool, env } = ctx + + const dbg = host.logger(`system:fetch`) + const domains = env.vars["system.fetch.domains"] || [] + dbg(`allowed domains: %o`, domains) + + defTool( + "fetch", + "Fetch data from a URL from allowed domains.", + { + url: { + type: "string", + description: "The URL to fetch data from.", + required: true, + }, + convert: { + type: "string", + description: "Converts HTML to Markdown or plain text.", + required: false, + enum: ["markdown", "text"], + }, + skipToContent: { + type: "string", + description: "Skip to a specific string in the content.", + required: false, + }, + }, + async ({ context, ...args }) => { + const { url, convert, skipToContent } = args as { + url: string + convert: FetchTextOptions["convert"] + skipToContent: string + } + const method = "GET" + const uri = new URL(url) + const domain = uri.hostname + if (!domains.includes(domain)) + return `error: domain ${domain} is not allowed.` + + dbg(`${method} ${url}`) + const res = await host.fetchText(url, { convert }) + dbg(`response: %d`, res.status) + if (!res.ok) return `error: ${res.status}` + if (!res.text) return res.file ?? res.status + + let result = res.text + if (skipToContent) { + const index = result.indexOf(skipToContent) + if (index === -1) + return `error: skipTo '${skipToContent}' not found.` + result = result.slice(index + skipToContent.length) + } + return result + }, + { + detectPromptInjection: "available", + } + ) +} diff --git a/packages/core/bundleprompts.js b/packages/core/bundleprompts.js index 1421274328..a007988739 100644 --- a/packages/core/bundleprompts.js +++ b/packages/core/bundleprompts.js @@ -218,8 +218,7 @@ system({ ..., parameters: { model: { type: "string", - description: "LLM model to use", - default: "gpt-35-turbo", + description: "LLM model to use" }, }, }) diff --git a/packages/core/src/chatrenderterminal.ts b/packages/core/src/chatrenderterminal.ts index cb797c2398..2f99f11b7c 100644 --- a/packages/core/src/chatrenderterminal.ts +++ b/packages/core/src/chatrenderterminal.ts @@ -30,14 +30,17 @@ const dbg = genaiscriptDebug("chat:render") function renderTrimmed(s: string, rows: number, width: number) { const lines = s.split(/\n/g).filter((l) => !!l) - const head = Math.min(rows >> 1, lines.length - 1) - const tail = rows - head - const trimmed = lines.slice(0, head) - if (tail) { - const hidden = lines.length - head - tail - if (hidden === 1) trimmed.push(lines.at(-tail - 1)) - else if (hidden > 0) trimmed.push(`... (${hidden} lines)`) - trimmed.push(...lines.slice(-tail)) + let trimmed = lines.slice(0) + if (lines.length > rows) { + const head = Math.min(rows >> 1, lines.length - 1) + const tail = rows - head + trimmed = lines.slice(0, head) + if (tail) { + const hidden = lines.length - head - tail + if (hidden === 1) trimmed.push(lines.at(-tail - 1)) + else if (hidden > 0) trimmed.push(`... (${hidden} lines)`) + trimmed.push(...lines.slice(-tail)) + } } const res = trimmed.map((l) => wrapColor(CONSOLE_COLOR_DEBUG, "│" + ellipse(l, width) + "\n") diff --git a/packages/core/src/fetchtext.ts b/packages/core/src/fetchtext.ts index 86a65eed12..68a69a7069 100644 --- a/packages/core/src/fetchtext.ts +++ b/packages/core/src/fetchtext.ts @@ -9,7 +9,7 @@ import { deleteUndefinedValues } from "./cleaners" import { prettyBytes } from "./pretty" import debug from "debug" import { uriRedact } from "./url" -import { HTMLToMarkdown, HTMLToText } from "./html" +import { HTMLTablesToJSON, HTMLToMarkdown, HTMLToText } from "./html" import { createFetch } from "./fetch" const dbg = debug("genaiscript:fetch:text") @@ -110,6 +110,8 @@ export async function fetchText( }) else if (convert === "text") content = await HTMLToText(content, { trace, cancellationToken }) + else if (convert === "tables") + content = JSON.stringify(await HTMLTablesToJSON(content)) } ok = true const file: WorkspaceFile = deleteUndefinedValues({ diff --git a/packages/core/src/types/prompt_template.d.ts b/packages/core/src/types/prompt_template.d.ts index c69a81b18b..bc0f3d2f3f 100644 --- a/packages/core/src/types/prompt_template.d.ts +++ b/packages/core/src/types/prompt_template.d.ts @@ -5695,7 +5695,7 @@ type FetchOptions = RequestInit & { } type FetchTextOptions = Omit & { - convert?: "markdown" | "text" + convert?: "markdown" | "text" | "tables" } interface PythonRuntimeOptions { diff --git a/packages/core/src/vars.ts b/packages/core/src/vars.ts index e7437be924..8a672c304f 100644 --- a/packages/core/src/vars.ts +++ b/packages/core/src/vars.ts @@ -8,6 +8,11 @@ import { promptParameterTypeToJSONSchema, } from "./parameters" import { normalizeFloat, normalizeInt, normalizeVarKey } from "./cleaners" +import { genaiscriptDebug } from "./debug" +const dbg = genaiscriptDebug("vars") +const dbgSchema = dbg.extend("schema") +const dbgSystem = dbg.extend("system") +dbgSchema.enabled = false /** * Resolves and generates a JSON schema object representing the parameters schema @@ -28,7 +33,7 @@ export function resolveScriptParametersSchema( properties: {}, } const schema = promptParametersSchemaToJSONSchema(script.parameters) - res.properties["script"] = schema + if (schema) res.properties["script"] = schema for (const system of resolveSystems(prj, script) .map((s) => resolveScript(prj, s)) .filter((t) => t?.parameters)) { @@ -38,6 +43,7 @@ export function resolveScriptParametersSchema( ) }) } + dbgSchema(`%s: %O`, script.id, res.properties) return res } @@ -131,6 +137,8 @@ export function parsePromptParameters( delete res[key] } } + + dbg(`%s: %O`, script.id, res) return Object.freeze(res) } @@ -145,7 +153,7 @@ export function parsePromptParameters( * - Keys are normalized using `normalizeVarKey`. * - The proxy supports fetching keys, enumerating own keys, and retrieving property descriptors. * - The `Object.prototype.toString` method is overridden to return a YAML stringified version - * of the proxified parameters. + * of the proxify-ed parameters. * - The proxy allows access to parameter values using normalized keys. */ export function proxifyEnvVars(res: PromptParameters) { @@ -203,7 +211,10 @@ export function mergeEnvVarsWithSystem( system: SystemPromptInstance ): ExpansionVariables { const { parameters, vars } = system - if (!parameters && !vars) return ev + if (!parameters && !vars) { + dbgSystem(`%s: no vars`, system.id) + return ev + } const { vars: envVars, ...rest } = ev const parameterVars = Object.fromEntries( @@ -214,7 +225,9 @@ export function mergeEnvVarsWithSystem( ) const newVars = { ...envVars, ...parameterVars, ...(vars || {}) } - return { vars: newVars, ...rest } + const res = { vars: newVars, ...rest } + dbgSystem(`%s: %O`, system.id, res.vars) + return res } /** diff --git a/packages/sample/genaisrc/blog/tsconfig.json b/packages/sample/genaisrc/blog/tsconfig.json index c092f18586..0abdb26cc2 100644 --- a/packages/sample/genaisrc/blog/tsconfig.json +++ b/packages/sample/genaisrc/blog/tsconfig.json @@ -17,9 +17,8 @@ "erasableSyntaxOnly": true }, "include": [ - "*.mjs", - "*.mts", - "src/*.mts", + "**/*.mjs", + "**/*.mts", "./genaiscript.d.ts" ] } \ No newline at end of file diff --git a/packages/sample/genaisrc/fetch-tools.genai.mts b/packages/sample/genaisrc/fetch-tools.genai.mts new file mode 100644 index 0000000000..14253b29ce --- /dev/null +++ b/packages/sample/genaisrc/fetch-tools.genai.mts @@ -0,0 +1,12 @@ +script({ + system: [ + "system", + { + id: "system.fetch", + parameters: { domains: ["azure.microsoft.com"] }, + }, + ], +}) + +$`Generate a pricing table of the LLM prices at https://azure.microsoft.com/en-us/pricing/details/cognitive-services/openai-service . +Use today's prices.` diff --git a/packages/sample/genaisrc/tsconfig.json b/packages/sample/genaisrc/tsconfig.json index c092f18586..0abdb26cc2 100644 --- a/packages/sample/genaisrc/tsconfig.json +++ b/packages/sample/genaisrc/tsconfig.json @@ -17,9 +17,8 @@ "erasableSyntaxOnly": true }, "include": [ - "*.mjs", - "*.mts", - "src/*.mts", + "**/*.mjs", + "**/*.mts", "./genaiscript.d.ts" ] } \ No newline at end of file diff --git a/packages/vscode/genaisrc/tsconfig.json b/packages/vscode/genaisrc/tsconfig.json index c092f18586..0abdb26cc2 100644 --- a/packages/vscode/genaisrc/tsconfig.json +++ b/packages/vscode/genaisrc/tsconfig.json @@ -17,9 +17,8 @@ "erasableSyntaxOnly": true }, "include": [ - "*.mjs", - "*.mts", - "src/*.mts", + "**/*.mjs", + "**/*.mts", "./genaiscript.d.ts" ] } \ No newline at end of file