diff --git a/packages/opencode/package.json b/packages/opencode/package.json index 9019635b105..6b8a1045540 100644 --- a/packages/opencode/package.json +++ b/packages/opencode/package.json @@ -46,7 +46,7 @@ "why-is-node-running": "3.2.2", "zod-to-json-schema": "3.24.5" }, - "dependencies": { +"dependencies": { "@actions/core": "1.11.1", "@actions/github": "6.0.1", "@agentclientprotocol/sdk": "0.5.1", @@ -107,6 +107,7 @@ "open": "10.1.2", "opentui-spinner": "0.0.6", "partial-json": "0.1.7", + "playwright": "^1.52.0", "remeda": "catalog:", "solid-js": "catalog:", "strip-ansi": "7.1.2", diff --git a/packages/opencode/src/browser/assertText.ts b/packages/opencode/src/browser/assertText.ts new file mode 100644 index 00000000000..59c1f6eb8ca --- /dev/null +++ b/packages/opencode/src/browser/assertText.ts @@ -0,0 +1,41 @@ +import z from "zod" +import { Tool } from "../tool/tool" +import { BrowserService } from "./index" + +export const BrowserAssertTextTool = Tool.define("browser_assertText", { + description: "Assert that an element's text matches an expected value", + parameters: z.object({ + selector: z.string().describe("Element selector"), + expected: z.string().describe("Expected text (or pattern)"), + contains: z.boolean().optional().default(false).describe("Should contain vs equal"), + }), + async execute(params, ctx) { + await ctx.ask({ + permission: "browser", + patterns: ["*"], + always: ["*"], + metadata: { + action: "assert_text", + selector: params.selector, + expected: params.expected, + }, + }) + + const result = await BrowserService.assertText(params.selector, params.expected, params.contains) + + if (!result.passed) { + throw new Error(`Assertion failed: Expected "${params.contains ? 'containing' : 'equal to'}" "${params.expected}", got "${result.actual}"`) + } + + return { + title: `Assertion passed`, + output: `✓ Text matches: ${params.expected}`, + metadata: { + selector: params.selector, + expected: params.expected, + actual: result.actual, + passed: true, + }, + } + }, +}) diff --git a/packages/opencode/src/browser/assertURL.ts b/packages/opencode/src/browser/assertURL.ts new file mode 100644 index 00000000000..4afe09c4b57 --- /dev/null +++ b/packages/opencode/src/browser/assertURL.ts @@ -0,0 +1,37 @@ +import z from "zod" +import { Tool } from "../tool/tool" +import { BrowserService } from "./index" + +export const BrowserAssertURLTool = Tool.define("browser_assertURL", { + description: "Assert that the current URL matches a pattern", + parameters: z.object({ + pattern: z.string().describe("URL pattern to match (substring or regex)"), + }), + async execute(params, ctx) { + await ctx.ask({ + permission: "browser", + patterns: ["*"], + always: ["*"], + metadata: { + action: "assert_url", + pattern: params.pattern, + }, + }) + + const result = await BrowserService.assertURL(params.pattern) + + if (!result.passed) { + throw new Error(`Assertion failed: URL "${result.currentUrl}" does not match "${params.pattern}"`) + } + + return { + title: `Assertion passed`, + output: `✓ URL matches: ${params.pattern}`, + metadata: { + pattern: params.pattern, + url: result.url, + passed: true, + }, + } + }, +}) diff --git a/packages/opencode/src/browser/assertVisible.ts b/packages/opencode/src/browser/assertVisible.ts new file mode 100644 index 00000000000..d2cfc38aada --- /dev/null +++ b/packages/opencode/src/browser/assertVisible.ts @@ -0,0 +1,39 @@ +import z from "zod" +import { Tool } from "../tool/tool" +import { BrowserService } from "./index" + +export const BrowserAssertVisibleTool = Tool.define("browser_assertVisible", { + description: "Assert that an element is visible or hidden", + parameters: z.object({ + selector: z.string().describe("Element selector"), + visible: z.boolean().optional().default(true).describe("Should be visible or hidden"), + }), + async execute(params, ctx) { + await ctx.ask({ + permission: "browser", + patterns: ["*"], + always: ["*"], + metadata: { + action: "assert_visible", + selector: params.selector, + visible: params.visible, + }, + }) + + const result = await BrowserService.assertVisible(params.selector, params.visible) + + if (!result.passed) { + throw new Error(`Assertion failed: Element "${params.selector}" should be ${params.visible ? 'visible' : 'hidden'}`) + } + + return { + title: `Assertion passed`, + output: `✓ Element is ${params.visible ? 'visible' : 'hidden'}: ${params.selector}`, + metadata: { + selector: params.selector, + visible: params.visible, + passed: true, + }, + } + }, +}) diff --git a/packages/opencode/src/browser/back.ts b/packages/opencode/src/browser/back.ts new file mode 100644 index 00000000000..82e46a16ea8 --- /dev/null +++ b/packages/opencode/src/browser/back.ts @@ -0,0 +1,29 @@ +import z from "zod" +import { Tool } from "../tool/tool" +import { BrowserService } from "./index" + +export const BrowserBackTool = Tool.define("browser_back", { + description: "Navigate back in browser history", + parameters: z.object({}), + async execute(params, ctx) { + await ctx.ask({ + permission: "browser", + patterns: ["*"], + always: ["*"], + metadata: { + action: "back", + }, + }) + + const result = await BrowserService.back() + + return { + title: `Went back`, + output: `Navigated back to: ${result.url}`, + metadata: { + url: result.url, + title: result.title, + }, + } + }, +}) diff --git a/packages/opencode/src/browser/browser.ts b/packages/opencode/src/browser/browser.ts new file mode 100644 index 00000000000..300167d947b --- /dev/null +++ b/packages/opencode/src/browser/browser.ts @@ -0,0 +1,32 @@ +import z from "zod" +import { Tool } from "../tool/tool" +import { BrowserService } from "./index" + +export const BrowserSetTool = Tool.define("browser_set", { + description: "Configure browser settings (chromium, firefox, webkit)", + parameters: z.object({ + browser: z.enum(["chromium", "firefox", "webkit"]).describe("Browser to use"), + }), + async execute(params, ctx) { + await ctx.ask({ + permission: "browser", + patterns: ["*"], + always: ["*"], + metadata: { + action: "set_browser", + browser: params.browser, + }, + }) + + const config = await BrowserService.getConfig() + config.browser = params.browser + + await BrowserService.close() + + return { + title: `Browser set to ${params.browser}`, + output: `Browser changed to ${params.browser}. Previous browser was closed.`, + metadata: {}, + } + }, +}) diff --git a/packages/opencode/src/browser/check.ts b/packages/opencode/src/browser/check.ts new file mode 100644 index 00000000000..d8b5ab65eff --- /dev/null +++ b/packages/opencode/src/browser/check.ts @@ -0,0 +1,34 @@ +import z from "zod" +import { Tool } from "../tool/tool" +import { BrowserService } from "./index" + +export const BrowserCheckTool = Tool.define("browser_check", { + description: "Check or uncheck a checkbox", + parameters: z.object({ + selector: z.string().describe("Checkbox selector"), + checked: z.boolean().optional().default(true).describe("Check or uncheck"), + }), + async execute(params, ctx) { + await ctx.ask({ + permission: "browser", + patterns: ["*"], + always: ["*"], + metadata: { + action: "check", + selector: params.selector, + checked: params.checked, + }, + }) + + await BrowserService.check(params.selector, params.checked) + + return { + title: `${params.checked ? 'Checked' : 'Unchecked'} checkbox`, + output: `${params.checked ? 'Checked' : 'Unchecked'}: ${params.selector}`, + metadata: { + selector: params.selector, + checked: params.checked, + }, + } + }, +}) diff --git a/packages/opencode/src/browser/clear.ts b/packages/opencode/src/browser/clear.ts new file mode 100644 index 00000000000..b8bf6f8dfd8 --- /dev/null +++ b/packages/opencode/src/browser/clear.ts @@ -0,0 +1,31 @@ +import z from "zod" +import { Tool } from "../tool/tool" +import { BrowserService } from "./index" + +export const BrowserClearTool = Tool.define("browser_clear", { + description: "Clear an input field", + parameters: z.object({ + selector: z.string().describe("Input element selector"), + }), + async execute(params, ctx) { + await ctx.ask({ + permission: "browser", + patterns: ["*"], + always: ["*"], + metadata: { + action: "clear", + selector: params.selector, + }, + }) + + await BrowserService.clear(params.selector) + + return { + title: `Cleared input`, + output: `Cleared: ${params.selector}`, + metadata: { + selector: params.selector, + }, + } + }, +}) diff --git a/packages/opencode/src/browser/clearStorage.ts b/packages/opencode/src/browser/clearStorage.ts new file mode 100644 index 00000000000..375d5ffc67b --- /dev/null +++ b/packages/opencode/src/browser/clearStorage.ts @@ -0,0 +1,31 @@ +import z from "zod" +import { Tool } from "../tool/tool" +import { BrowserService } from "./index" + +export const BrowserClearStorageTool = Tool.define("browser_clearStorage", { + description: "Clear browser storage (localStorage, sessionStorage, or all)", + parameters: z.object({ + type: z.enum(["localStorage", "sessionStorage", "all"]).optional().default("all").describe("Storage type to clear"), + }), + async execute(params, ctx) { + await ctx.ask({ + permission: "browser", + patterns: ["*"], + always: ["*"], + metadata: { + action: "clear_storage", + type: params.type, + }, + }) + + await BrowserService.clearStorage(params.type) + + return { + title: `Cleared ${params.type}`, + output: `Cleared ${params.type === 'all' ? 'all storage' : params.type}`, + metadata: { + type: params.type, + }, + } + }, +}) diff --git a/packages/opencode/src/browser/click.ts b/packages/opencode/src/browser/click.ts new file mode 100644 index 00000000000..20974b5a802 --- /dev/null +++ b/packages/opencode/src/browser/click.ts @@ -0,0 +1,32 @@ +import z from "zod" +import { Tool } from "../tool/tool" +import { BrowserService } from "./index" + +export const BrowserClickTool = Tool.define("browser_click", { + description: "Click on an element by selector", + parameters: z.object({ + selector: z.string().describe("Selector to identify the element to click"), + timeout: z.number().optional().describe("Timeout in milliseconds (default: 10000)"), + }), + async execute(params, ctx) { + await ctx.ask({ + permission: "browser", + patterns: ["*"], + always: ["*"], + metadata: { + selector: params.selector, + action: "click", + }, + }) + + const result = await BrowserService.click(params.selector, { + elementTimeoutMs: params.timeout, + }) + + return { + title: `Clicked element on ${result.url}`, + output: `Successfully clicked element\nPage URL: ${result.url}\nPage title: ${result.title}`, + metadata: {}, + } + }, +}) diff --git a/packages/opencode/src/browser/close.ts b/packages/opencode/src/browser/close.ts new file mode 100644 index 00000000000..273acbc36b8 --- /dev/null +++ b/packages/opencode/src/browser/close.ts @@ -0,0 +1,26 @@ +import z from "zod" +import { Tool } from "../tool/tool" +import { BrowserService } from "./index" + +export const BrowserCloseTool = Tool.define("browser_close", { + description: "Close the browser and all tabs", + parameters: z.object({}), + async execute(params, ctx) { + await ctx.ask({ + permission: "browser", + patterns: ["*"], + always: ["*"], + metadata: { + action: "close", + }, + }) + + const result = await BrowserService.close() + + return { + title: "Browser closed", + output: "Browser closed successfully", + metadata: {}, + } + }, +}) diff --git a/packages/opencode/src/browser/closeTab.ts b/packages/opencode/src/browser/closeTab.ts new file mode 100644 index 00000000000..13f1eadc1b5 --- /dev/null +++ b/packages/opencode/src/browser/closeTab.ts @@ -0,0 +1,32 @@ +import z from "zod" +import { Tool } from "../tool/tool" +import { BrowserService } from "./index" + +export const BrowserCloseTabTool = Tool.define("browser_closeTab", { + description: "Close a tab by index, or the current tab if no index specified", + parameters: z.object({ + index: z.number().optional().describe("Tab index to close (default: current tab)"), + }), + async execute(params, ctx) { + await ctx.ask({ + permission: "browser", + patterns: ["*"], + always: ["*"], + metadata: { + action: "close_tab", + index: params.index, + }, + }) + + const result = await BrowserService.closeTab(params.index) + + return { + title: `Closed tab ${params.index ?? 'current'}`, + output: `Closed tab at index ${params.index ?? 'current'}\nRemaining tabs: ${result.remaining}`, + metadata: { + remaining: result.remaining, + closedIndex: params.index, + }, + } + }, +}) diff --git a/packages/opencode/src/browser/deleteCookie.ts b/packages/opencode/src/browser/deleteCookie.ts new file mode 100644 index 00000000000..21ebce6bf24 --- /dev/null +++ b/packages/opencode/src/browser/deleteCookie.ts @@ -0,0 +1,32 @@ +import z from "zod" +import { Tool } from "../tool/tool" +import { BrowserService } from "./index" + +export const BrowserDeleteCookieTool = Tool.define("browser_deleteCookie", { + description: "Delete a cookie by name", + parameters: z.object({ + name: z.string().describe("Cookie name to delete"), + domain: z.string().optional().describe("Cookie domain"), + }), + async execute(params, ctx) { + await ctx.ask({ + permission: "browser", + patterns: ["*"], + always: ["*"], + metadata: { + action: "delete_cookie", + name: params.name, + }, + }) + + await BrowserService.deleteCookie(params.name, params.domain) + + return { + title: `Deleted cookie`, + output: `Deleted cookie "${params.name}"`, + metadata: { + name: params.name, + }, + } + }, +}) diff --git a/packages/opencode/src/browser/doubleClick.ts b/packages/opencode/src/browser/doubleClick.ts new file mode 100644 index 00000000000..c2bd1e0be24 --- /dev/null +++ b/packages/opencode/src/browser/doubleClick.ts @@ -0,0 +1,32 @@ +import z from "zod" +import { Tool } from "../tool/tool" +import { BrowserService } from "./index" + +export const BrowserDoubleClickTool = Tool.define("browser_doubleClick", { + description: "Double-click on an element by selector", + parameters: z.object({ + selector: z.string().describe("Selector for element to double-click"), + timeout: z.number().optional().describe("Timeout in milliseconds"), + }), + async execute(params, ctx) { + await ctx.ask({ + permission: "browser", + patterns: ["*"], + always: ["*"], + metadata: { + action: "double_click", + selector: params.selector, + }, + }) + + await BrowserService.doubleClick(params.selector, params.timeout) + + return { + title: `Double-clicked element`, + output: `Double-clicked: ${params.selector}`, + metadata: { + selector: params.selector, + }, + } + }, +}) diff --git a/packages/opencode/src/browser/dragDrop.ts b/packages/opencode/src/browser/dragDrop.ts new file mode 100644 index 00000000000..3bd9fa44100 --- /dev/null +++ b/packages/opencode/src/browser/dragDrop.ts @@ -0,0 +1,35 @@ +import z from "zod" +import { Tool } from "../tool/tool" +import { BrowserService } from "./index" + +export const BrowserDragDropTool = Tool.define("browser_dragDrop", { + description: "Drag an element and drop it onto another element", + parameters: z.object({ + source: z.string().describe("Selector for element to drag"), + target: z.string().describe("Selector for drop target"), + delay: z.number().optional().describe("Delay between drag and drop in ms"), + }), + async execute(params, ctx) { + await ctx.ask({ + permission: "browser", + patterns: ["*"], + always: ["*"], + metadata: { + action: "drag_drop", + source: params.source, + target: params.target, + }, + }) + + await BrowserService.dragDrop(params.source, params.target, params.delay) + + return { + title: `Dragged and dropped`, + output: `Dragged ${params.source} to ${params.target}`, + metadata: { + source: params.source, + target: params.target, + }, + } + }, +}) diff --git a/packages/opencode/src/browser/duplicateTab.ts b/packages/opencode/src/browser/duplicateTab.ts new file mode 100644 index 00000000000..585c10ad026 --- /dev/null +++ b/packages/opencode/src/browser/duplicateTab.ts @@ -0,0 +1,30 @@ +import z from "zod" +import { Tool } from "../tool/tool" +import { BrowserService } from "./index" + +export const BrowserDuplicateTabTool = Tool.define("browser_duplicateTab", { + description: "Duplicate the current tab", + parameters: z.object({}), + async execute(params, ctx) { + await ctx.ask({ + permission: "browser", + patterns: ["*"], + always: ["*"], + metadata: { + action: "duplicate_tab", + }, + }) + + const result = await BrowserService.duplicateTab() + + return { + title: `Duplicated tab`, + output: `Duplicated current tab\nNew tab: ${result.url}\nTitle: ${result.title}`, + metadata: { + url: result.url, + title: result.title, + newIndex: result.newIndex, + }, + } + }, +}) diff --git a/packages/opencode/src/browser/evaluate.ts b/packages/opencode/src/browser/evaluate.ts new file mode 100644 index 00000000000..c40cdf5a26b --- /dev/null +++ b/packages/opencode/src/browser/evaluate.ts @@ -0,0 +1,40 @@ +import z from "zod" +import { Tool } from "../tool/tool" +import { BrowserService } from "./index" + +export const BrowserEvaluateTool = Tool.define("browser_evaluate", { + description: "Execute JavaScript code in the browser context", + parameters: z.object({ + script: z.string().describe("JavaScript code to execute in the page context"), + }), + async execute(params, ctx) { + await ctx.ask({ + permission: "browser", + patterns: ["*"], + always: ["*"], + metadata: { + script: params.script.substring(0, 100), + action: "evaluate", + }, + }) + + const result = await BrowserService.evaluate(params.script) + + let output = "Script executed successfully" + if (result.console.length > 0) { + output += "\n\nConsole output:" + for (const msg of result.console) { + output += `\n[${msg.type}] ${msg.text}` + } + } + + return { + title: "Script executed", + output, + metadata: { + result: result.result, + console: result.console, + }, + } + }, +}) diff --git a/packages/opencode/src/browser/fill.ts b/packages/opencode/src/browser/fill.ts new file mode 100644 index 00000000000..2d632c498c4 --- /dev/null +++ b/packages/opencode/src/browser/fill.ts @@ -0,0 +1,34 @@ +import z from "zod" +import { Tool } from "../tool/tool" +import { BrowserService } from "./index" + +export const BrowserFillTool = Tool.define("browser_fill", { + description: "Fill an input field with text", + parameters: z.object({ + selector: z.string().describe("Selector to identify the input element"), + value: z.string().describe("Value to fill into the input field"), + timeout: z.number().optional().describe("Timeout in milliseconds (default: 10000)"), + }), + async execute(params, ctx) { + await ctx.ask({ + permission: "browser", + patterns: ["*"], + always: ["*"], + metadata: { + selector: params.selector, + value: params.value, + action: "fill", + }, + }) + + const result = await BrowserService.fill(params.selector, params.value, { + elementTimeoutMs: params.timeout, + }) + + return { + title: `Filled form field`, + output: `Successfully filled "${params.selector}" with value: ${params.value}`, + metadata: {}, + } + }, +}) diff --git a/packages/opencode/src/browser/forward.ts b/packages/opencode/src/browser/forward.ts new file mode 100644 index 00000000000..49c1adaa748 --- /dev/null +++ b/packages/opencode/src/browser/forward.ts @@ -0,0 +1,29 @@ +import z from "zod" +import { Tool } from "../tool/tool" +import { BrowserService } from "./index" + +export const BrowserForwardTool = Tool.define("browser_forward", { + description: "Navigate forward in browser history", + parameters: z.object({}), + async execute(params, ctx) { + await ctx.ask({ + permission: "browser", + patterns: ["*"], + always: ["*"], + metadata: { + action: "forward", + }, + }) + + const result = await BrowserService.forward() + + return { + title: `Went forward`, + output: `Navigated forward to: ${result.url}`, + metadata: { + url: result.url, + title: result.title, + }, + } + }, +}) diff --git a/packages/opencode/src/browser/getAttribute.ts b/packages/opencode/src/browser/getAttribute.ts new file mode 100644 index 00000000000..c69350759b2 --- /dev/null +++ b/packages/opencode/src/browser/getAttribute.ts @@ -0,0 +1,35 @@ +import z from "zod" +import { Tool } from "../tool/tool" +import { BrowserService } from "./index" + +export const BrowserGetAttributeTool = Tool.define("browser_getAttribute", { + description: "Get an attribute value from an element", + parameters: z.object({ + selector: z.string().describe("Element selector"), + attribute: z.string().describe("Attribute name to get"), + }), + async execute(params, ctx) { + await ctx.ask({ + permission: "browser", + patterns: ["*"], + always: ["*"], + metadata: { + action: "get_attribute", + selector: params.selector, + attribute: params.attribute, + }, + }) + + const result = await BrowserService.getAttribute(params.selector, params.attribute) + + return { + title: `Got attribute`, + output: `${params.attribute} of ${params.selector}: "${result.value}"`, + metadata: { + attribute: params.attribute, + value: result.value, + selector: params.selector, + }, + } + }, +}) diff --git a/packages/opencode/src/browser/getCSS.ts b/packages/opencode/src/browser/getCSS.ts new file mode 100644 index 00000000000..b8d3bf484ce --- /dev/null +++ b/packages/opencode/src/browser/getCSS.ts @@ -0,0 +1,35 @@ +import z from "zod" +import { Tool } from "../tool/tool" +import { BrowserService } from "./index" + +export const BrowserGetCSSTool = Tool.define("browser_getCSS", { + description: "Get a CSS property value from an element", + parameters: z.object({ + selector: z.string().describe("Element selector"), + property: z.string().describe("CSS property name"), + }), + async execute(params, ctx) { + await ctx.ask({ + permission: "browser", + patterns: ["*"], + always: ["*"], + metadata: { + action: "get_css", + selector: params.selector, + property: params.property, + }, + }) + + const result = await BrowserService.getCSS(params.selector, params.property) + + return { + title: `Got CSS value`, + output: `${params.property} of ${params.selector}: "${result.value}"`, + metadata: { + property: params.property, + value: result.value, + selector: params.selector, + }, + } + }, +}) diff --git a/packages/opencode/src/browser/getCookies.ts b/packages/opencode/src/browser/getCookies.ts new file mode 100644 index 00000000000..c123ed069ad --- /dev/null +++ b/packages/opencode/src/browser/getCookies.ts @@ -0,0 +1,34 @@ +import z from "zod" +import { Tool } from "../tool/tool" +import { BrowserService } from "./index" + +export const BrowserGetCookiesTool = Tool.define("browser_getCookies", { + description: "Get all cookies, optionally filtered by domain", + parameters: z.object({ + domain: z.string().optional().describe("Filter by domain"), + }), + async execute(params, ctx) { + await ctx.ask({ + permission: "browser", + patterns: ["*"], + always: ["*"], + metadata: { + action: "get_cookies", + domain: params.domain, + }, + }) + + const result = await BrowserService.getCookies(params.domain) + + return { + title: `Got ${result.cookies.length} cookies`, + output: result.cookies.length > 0 + ? result.cookies.map(c => `${c.name}=${c.value}`).join('\n') + : 'No cookies', + metadata: { + cookies: result.cookies, + count: result.cookies.length, + }, + } + }, +}) diff --git a/packages/opencode/src/browser/getLocalStorage.ts b/packages/opencode/src/browser/getLocalStorage.ts new file mode 100644 index 00000000000..6ff578c4d16 --- /dev/null +++ b/packages/opencode/src/browser/getLocalStorage.ts @@ -0,0 +1,33 @@ +import z from "zod" +import { Tool } from "../tool/tool" +import { BrowserService } from "./index" + +export const BrowserGetLocalStorageTool = Tool.define("browser_getLocalStorage", { + description: "Get a value from localStorage, or all values", + parameters: z.object({ + key: z.string().optional().describe("Key to get (default: all)"), + }), + async execute(params, ctx) { + await ctx.ask({ + permission: "browser", + patterns: ["*"], + always: ["*"], + metadata: { + action: "get_local_storage", + key: params.key, + }, + }) + + const result = await BrowserService.getLocalStorage(params.key) + + return { + title: params.key ? `Got localStorage: ${params.key}` : `Got all localStorage`, + output: params.key + ? `${params.key} = ${result.value}` + : `${Object.keys(result.all).length} items in localStorage`, + metadata: params.key + ? { key: params.key, value: result.value } + : { all: result.all, count: Object.keys(result.all).length }, + } + }, +}) diff --git a/packages/opencode/src/browser/getPageSource.ts b/packages/opencode/src/browser/getPageSource.ts new file mode 100644 index 00000000000..a577a9f0e3c --- /dev/null +++ b/packages/opencode/src/browser/getPageSource.ts @@ -0,0 +1,31 @@ +import z from "zod" +import { Tool } from "../tool/tool" +import { BrowserService } from "./index" + +export const BrowserGetPageSourceTool = Tool.define("browser_getPageSource", { + description: "Get the HTML source of the current page", + parameters: z.object({ + trimmed: z.boolean().optional().default(true).describe("Trim whitespace"), + }), + async execute(params, ctx) { + await ctx.ask({ + permission: "browser", + patterns: ["*"], + always: ["*"], + metadata: { + action: "get_page_source", + }, + }) + + const result = await BrowserService.getPageSource(params.trimmed) + + return { + title: `Got page source`, + output: `Page source: ${result.source.length} characters`, + metadata: { + source: result.source, + length: result.source.length, + }, + } + }, +}) diff --git a/packages/opencode/src/browser/getText.ts b/packages/opencode/src/browser/getText.ts new file mode 100644 index 00000000000..24044ae9ca0 --- /dev/null +++ b/packages/opencode/src/browser/getText.ts @@ -0,0 +1,32 @@ +import z from "zod" +import { Tool } from "../tool/tool" +import { BrowserService } from "./index" + +export const BrowserGetTextTool = Tool.define("browser_getText", { + description: "Get the text content of an element", + parameters: z.object({ + selector: z.string().describe("Element selector"), + }), + async execute(params, ctx) { + await ctx.ask({ + permission: "browser", + patterns: ["*"], + always: ["*"], + metadata: { + action: "get_text", + selector: params.selector, + }, + }) + + const result = await BrowserService.getText(params.selector) + + return { + title: `Got text`, + output: `Text from ${params.selector}: "${result.text}"`, + metadata: { + text: result.text, + selector: params.selector, + }, + } + }, +}) diff --git a/packages/opencode/src/browser/getValue.ts b/packages/opencode/src/browser/getValue.ts new file mode 100644 index 00000000000..8b648d6dde8 --- /dev/null +++ b/packages/opencode/src/browser/getValue.ts @@ -0,0 +1,32 @@ +import z from "zod" +import { Tool } from "../tool/tool" +import { BrowserService } from "./index" + +export const BrowserGetValueTool = Tool.define("browser_getValue", { + description: "Get the value of an input element", + parameters: z.object({ + selector: z.string().describe("Element selector"), + }), + async execute(params, ctx) { + await ctx.ask({ + permission: "browser", + patterns: ["*"], + always: ["*"], + metadata: { + action: "get_value", + selector: params.selector, + }, + }) + + const result = await BrowserService.getValue(params.selector) + + return { + title: `Got value`, + output: `Value of ${params.selector}: "${result.value}"`, + metadata: { + value: result.value, + selector: params.selector, + }, + } + }, +}) diff --git a/packages/opencode/src/browser/headed.ts b/packages/opencode/src/browser/headed.ts new file mode 100644 index 00000000000..d7357f852da --- /dev/null +++ b/packages/opencode/src/browser/headed.ts @@ -0,0 +1,36 @@ +import z from "zod" +import { Tool } from "../tool/tool" +import { BrowserService } from "./index" + +export const BrowserHeadedTool = Tool.define("browser_headed", { + description: "Toggle headed mode (visible browser window)", + parameters: z.object({ + enabled: z.boolean().optional().default(true).describe("Enable or disable headed mode"), + }), + async execute(params, ctx) { + await ctx.ask({ + permission: "browser", + patterns: ["*"], + always: ["*"], + metadata: { + action: "set_headed", + enabled: params.enabled, + }, + }) + + const config = await BrowserService.getConfig() + config.headed = params.enabled + + await BrowserService.close() + + return { + title: `Headed mode ${params.enabled ? 'enabled' : 'disabled'}`, + output: params.enabled + ? `Headed mode enabled. Browser will now be visible. Run browser_navigate to open the browser.` + : `Headed mode disabled. Browser will run in headless mode.`, + metadata: { + headed: params.enabled, + }, + } + }, +}) diff --git a/packages/opencode/src/browser/hover.ts b/packages/opencode/src/browser/hover.ts new file mode 100644 index 00000000000..7cbb7bb9801 --- /dev/null +++ b/packages/opencode/src/browser/hover.ts @@ -0,0 +1,32 @@ +import z from "zod" +import { Tool } from "../tool/tool" +import { BrowserService } from "./index" + +export const BrowserHoverTool = Tool.define("browser_hover", { + description: "Hover over an element by selector", + parameters: z.object({ + selector: z.string().describe("Selector for element to hover over"), + timeout: z.number().optional().describe("Timeout in milliseconds"), + }), + async execute(params, ctx) { + await ctx.ask({ + permission: "browser", + patterns: ["*"], + always: ["*"], + metadata: { + action: "hover", + selector: params.selector, + }, + }) + + const result = await BrowserService.hover(params.selector, params.timeout) + + return { + title: `Hovered over element`, + output: `Successfully hovered over: ${params.selector}`, + metadata: { + selector: params.selector, + }, + } + }, +}) diff --git a/packages/opencode/src/browser/index.ts b/packages/opencode/src/browser/index.ts new file mode 100644 index 00000000000..4c774512bbc --- /dev/null +++ b/packages/opencode/src/browser/index.ts @@ -0,0 +1,1077 @@ +import { Log } from "@/util/log" +import type { Browser, BrowserContext, Page, ElementHandle } from "playwright" + +const log = Log.create({ service: "browser" }) + +const DEFAULT_CONFIG: BrowserConfig = { + maxPages: 10, + maxMemoryMb: 512, + idleTimeoutMinutes: 30, + screenshotMaxBytes: 4 * 1024 * 1024, + scriptMaxBytes: 10 * 1024 * 1024, + scriptTimeoutMs: 5000, + navigationTimeoutMs: 30000, + elementTimeoutMs: 10000, + retryAttempts: 3, + retryDelayMs: 1000, + browser: 'chromium', + headed: true, // Headed mode by default - user sees the browser +} + +interface BrowserStateData { + browser: Browser | null + context: BrowserContext | null + lastActivity: number + pageCount: number +} + +let browserStateData: BrowserStateData = { + browser: null, + context: null, + lastActivity: Date.now(), + pageCount: 0, +} + +async function closeBrowser(state: BrowserStateData) { + if (state.context) { + try { + const pages = await state.context.pages() + for (const page of pages) { + try { + await page.close().catch(() => {}) + } catch {} + } + } catch {} + state.context = null + } + + if (state.browser) { + try { + await state.browser.close().catch(() => {}) + } catch {} + state.browser = null + } +} + +export interface BrowserConfig { + maxPages: number + maxMemoryMb: number + idleTimeoutMinutes: number + screenshotMaxBytes: number + scriptMaxBytes: number + scriptTimeoutMs: number + navigationTimeoutMs: number + elementTimeoutMs: number + retryAttempts: number + retryDelayMs: number + browser?: 'chromium' | 'firefox' | 'webkit' + headed?: boolean +} + +export interface BrowserPageInfo { + id: string + url: string + title: string +} + +export interface BrowserScreenshotResult { + base64: string + width: number + height: number + size: number +} + +export interface BrowserEvaluateResult { + result: T + console: Array<{ type: 'log' | 'error' | 'warn' | 'info'; text: string }> +} + +export type BrowserErrorCode = + | 'NAVIGATION_FAILED' + | 'ELEMENT_NOT_FOUND' + | 'ELEMENT_STALE' + | 'CONTEXT_CLOSED' + | 'RESOURCE_EXCEEDED' + | 'JAVASCRIPT_ERROR' + | 'PERMISSION_DENIED' + | 'INVALID_URL' + | 'TIMEOUT' + | 'BROWSER_CLOSED' + | 'SCRIPT_ERROR' + | 'NOT_IMPLEMENTED' + +export class BrowserError extends Error { + constructor( + readonly code: BrowserErrorCode, + message: string, + readonly details?: Record + ) { + super(message) + this.name = 'BrowserError' + } +} + +async function getBrowser(): Promise { + const state = browserStateData + + if (state.browser?.isConnected()) { + return state.browser + } + + await closeBrowser(state) + + const playwright = await import('playwright') + const browserType = DEFAULT_CONFIG.browser ?? 'chromium' + const isHeaded = DEFAULT_CONFIG.headed ?? false + + let browser: Browser + switch (browserType) { + case 'firefox': + browser = await playwright.firefox.launch({ + headless: !isHeaded, + args: isHeaded ? [] : [ + '--no-sandbox', + '--disable-setuid-sandbox', + '--disable-dev-shm-usage', + ], + }) + break + case 'webkit': + browser = await playwright.webkit.launch({ + headless: !isHeaded, + args: isHeaded ? [] : [ + '--no-sandbox', + '--disable-setuid-sandbox', + '--disable-dev-shm-usage', + ], + }) + break + default: + browser = await playwright.chromium.launch({ + headless: !isHeaded, + channel: isHeaded ? undefined : 'chromium', + args: isHeaded ? [] : [ + '--no-sandbox', + '--disable-setuid-sandbox', + '--disable-dev-shm-usage', + '--disable-gpu', + '--disable-software-rasterizer', + '--memory-pressure-off', + '--max-old-space-size=256', + ], + }) + } + + state.browser = browser + state.context = await browser.newContext({ + ignoreHTTPSErrors: true, + locale: 'en-US', + }) + + return browser +} + +async function getPage(state: BrowserStateData): Promise { + if (!state.context) { + throw new BrowserError('CONTEXT_CLOSED', 'Browser context has been closed') + } + + const pages = await state.context.pages() + + if (pages.length > 0) { + return pages[0] + } + + if (state.pageCount >= DEFAULT_CONFIG.maxPages) { + throw new BrowserError('RESOURCE_EXCEEDED', `Maximum page limit (${DEFAULT_CONFIG.maxPages}) reached`, { + maxPages: DEFAULT_CONFIG.maxPages, + }) + } + + const page = await state.context.newPage() + state.pageCount++ + state.lastActivity = Date.now() + + page.on('close', () => { + state.pageCount = Math.max(0, state.pageCount - 1) + }) + + return page +} + +async function withRetry( + fn: () => Promise, + config: Partial = {} +): Promise { + const cfg = { ...DEFAULT_CONFIG, ...config } + let lastError: Error | null = null + + for (let attempt = 0; attempt < cfg.retryAttempts; attempt++) { + try { + return await fn() + } catch (error) { + lastError = error as Error + + if (error instanceof BrowserError) { + if (['ELEMENT_NOT_FOUND', 'ELEMENT_STALE'].includes(error.code)) { + if (attempt < cfg.retryAttempts - 1) { + await new Promise(resolve => setTimeout(resolve, cfg.retryDelayMs)) + continue + } + } + } + + throw error + } + } + + throw lastError +} + +export const BrowserService = { + name: 'browser', + + async getConfig(): Promise { + return DEFAULT_CONFIG + }, + + async navigate(url: string, config: Partial = {}): Promise { + return withRetry(async () => { + const state = browserStateData + await getBrowser() + const page = await getPage(state) + const pageGuid = (page as any).guid + + state.lastActivity = Date.now() + + try { + await page.goto(url, { + waitUntil: 'domcontentloaded', + timeout: config.navigationTimeoutMs ?? DEFAULT_CONFIG.navigationTimeoutMs, + }) + } catch (error) { + const err = error as Error & { message?: string } + if (err.message?.includes('net::ERR_NAME_NOT_RESOLVED')) { + throw new BrowserError('INVALID_URL', `Could not resolve URL: ${url}`, { url }) + } + if (err.message?.includes('net::ERR_CONNECTION_REFUSED')) { + throw new BrowserError('NAVIGATION_FAILED', `Connection refused: ${url}`, { url }) + } + if (err.message?.includes('net::ERR_TIMED_OUT')) { + throw new BrowserError('TIMEOUT', `Navigation timed out: ${url}`, { url }) + } + throw new BrowserError('NAVIGATION_FAILED', `Failed to navigate to ${url}: ${err.message}`, { url }) + } + + return { + id: pageGuid, + url: page.url(), + title: await page.title(), + } + }, config) + }, + + async fill(selector: string, value: string, config: Partial = {}): Promise<{ success: boolean }> { + return withRetry(async () => { + const state = browserStateData + const page = await getPage(state) + + state.lastActivity = Date.now() + + const element = await findElement(page, selector, config) + if (!element) { + throw new BrowserError('ELEMENT_NOT_FOUND', `Element not found: ${selector}`, { selector }) + } + + try { + await element.fill(value, { + timeout: config.elementTimeoutMs ?? DEFAULT_CONFIG.elementTimeoutMs, + }) + } catch (error) { + throw new BrowserError('ELEMENT_STALE', `Element became stale: ${selector}`, { selector }) + } + + return { success: true } + }, config) + }, + + async click(selector: string, config: Partial = {}): Promise { + return withRetry(async () => { + const state = browserStateData + const page = await getPage(state) + const pageGuid = (page as any).guid + + state.lastActivity = Date.now() + + const element = await findElement(page, selector, config) + if (!element) { + throw new BrowserError('ELEMENT_NOT_FOUND', `Element not found: ${selector}`, { selector }) + } + + try { + await element.click({ + timeout: config.elementTimeoutMs ?? DEFAULT_CONFIG.elementTimeoutMs, + }) + } catch (error) { + throw new BrowserError('ELEMENT_STALE', `Element became stale: ${selector}`, { selector }) + } + + return { + id: pageGuid, + url: page.url(), + title: await page.title(), + } + }, config) + }, + + async screenshot(config: Partial = {}): Promise { + const state = browserStateData + const page = await getPage(state) + + state.lastActivity = Date.now() + + const screenshot = await page.screenshot({ + type: 'png', + fullPage: false, + }) + + if (screenshot.length > DEFAULT_CONFIG.screenshotMaxBytes) { + throw new BrowserError('RESOURCE_EXCEEDED', 'Screenshot too large', { + size: screenshot.length, + maxSize: DEFAULT_CONFIG.screenshotMaxBytes, + }) + } + + const dimensions = await page.evaluate(() => ({ + width: window.innerWidth, + height: window.innerHeight, + })) + + return { + base64: Buffer.from(screenshot).toString('base64'), + width: dimensions.width, + height: dimensions.height, + size: screenshot.length, + } + }, + + async evaluate(script: string, config: Partial = {}): Promise> { + const state = browserStateData + const page = await getPage(state) + + state.lastActivity = Date.now() + + if (script.length > DEFAULT_CONFIG.scriptMaxBytes) { + throw new BrowserError('RESOURCE_EXCEEDED', 'Script too large', { + size: script.length, + maxSize: DEFAULT_CONFIG.scriptMaxBytes, + }) + } + + const consoleMessages: Array<{ type: string; text: string }> = [] + + page.on('console', (msg: { type: () => string; text: () => string }) => { + consoleMessages.push({ + type: msg.type(), + text: msg.text(), + }) + }) + + let result: T + try { + result = await page.evaluate( + (code: string) => { + try { + return eval(code) + } catch (e) { + throw e instanceof Error ? e.message : String(e) + } + }, + script + ) + } catch (error) { + const err = error as Error & { message?: string } + throw new BrowserError('JAVASCRIPT_ERROR', `Script error: ${err.message}`, { script }) + } + + return { + result, + console: consoleMessages.map(c => ({ + type: c.type as 'log' | 'error' | 'warn' | 'info', + text: c.text, + })), + } + }, + + async close(): Promise<{ success: boolean }> { + await closeBrowser(browserStateData) + return { success: true } + }, + + async urls(): Promise { + const state = browserStateData + const urls: BrowserPageInfo[] = [] + + if (state.context) { + for (const page of await state.context.pages()) { + const pageGuid = (page as any).guid + urls.push({ + id: pageGuid, + url: page.url(), + title: await page.title().catch(() => ''), + }) + } + } + + return urls + }, + + async isHealthy(): Promise { + const state = browserStateData + return state.browser?.isConnected() ?? false + }, + + async open(url?: string): Promise { + const state = browserStateData + await getBrowser() + const page = await getPage(state) + const pageGuid = (page as any).guid + + state.lastActivity = Date.now() + + if (url) { + try { + await page.goto(url, { + waitUntil: 'domcontentloaded', + timeout: DEFAULT_CONFIG.navigationTimeoutMs, + }) + } catch (error) { + const err = error as Error & { message?: string } + throw new BrowserError('NAVIGATION_FAILED', `Failed to open ${url}: ${err.message}`, { url }) + } + } + + return { + id: pageGuid, + url: page.url(), + title: await page.title(), + } + }, + + async switchTab(index: number): Promise { + const state = browserStateData + await getBrowser() + const pages = await state.context?.pages() || [] + + if (index < 0 || index >= pages.length) { + throw new BrowserError('ELEMENT_NOT_FOUND', `Tab index ${index} not found`, { index, total: pages.length }) + } + + const page = pages[index] + const pageGuid = (page as any).guid + + await page.bringToFront() + state.lastActivity = Date.now() + + return { + id: pageGuid, + url: page.url(), + title: await page.title(), + } + }, + + async closeTab(index?: number): Promise<{ remaining: number }> { + const state = browserStateData + await getBrowser() + const pages = await state.context?.pages() || [] + + const targetIndex = index ?? pages.length - 1 + + if (targetIndex < 0 || targetIndex >= pages.length) { + throw new BrowserError('ELEMENT_NOT_FOUND', `Tab index ${targetIndex} not found`, { index: targetIndex, total: pages.length }) + } + + const pageToClose = pages[targetIndex] + await pageToClose.close() + state.pageCount = Math.max(0, state.pageCount - 1) + + const remainingPages = await state.context?.pages() || [] + return { remaining: remainingPages.length } + }, + + async duplicateTab(): Promise { + const state = browserStateData + await getBrowser() + const pages = await state.context?.pages() || [] + + if (pages.length === 0) { + throw new BrowserError('CONTEXT_CLOSED', 'No tabs to duplicate') + } + + const currentPage = pages[0] + const currentUrl = currentPage.url() + const currentTitle = await currentPage.title() + + const newPage = await state.context!.newPage() + if (currentUrl && currentUrl !== 'about:blank') { + await newPage.goto(currentUrl, { waitUntil: 'domcontentloaded' }) + } + + state.pageCount++ + const pageGuid = (newPage as any).guid + const newIndex = (await state.context?.pages() || []).length - 1 + + return { + id: pageGuid, + url: newPage.url(), + title: await newPage.title(), + newIndex, + } + }, + + async reopenTab(): Promise<{ success: boolean; url: string; title: string }> { + throw new BrowserError('NOT_IMPLEMENTED', 'Tab reopening requires browser history access') + }, + + async hover(selector: string, timeout?: number): Promise { + const state = browserStateData + const page = await getPage(state) + const element = await findElement(page, selector, { elementTimeoutMs: timeout }) + + if (!element) { + throw new BrowserError('ELEMENT_NOT_FOUND', `Element not found: ${selector}`, { selector }) + } + + await element.hover() + }, + + async rightClick(selector?: string, x?: number, y?: number): Promise { + const state = browserStateData + const page = await getPage(state) + + if (selector) { + const element = await findElement(page, selector) + if (!element) { + throw new BrowserError('ELEMENT_NOT_FOUND', `Element not found: ${selector}`, { selector }) + } + await element.click({ button: 'right' }) + } else if (x !== undefined && y !== undefined) { + await page.mouse.click(x, y, { button: 'right' }) + } else { + throw new BrowserError('INVALID_URL', 'Either selector or coordinates required') + } + }, + + async doubleClick(selector: string, timeout?: number): Promise { + const state = browserStateData + const page = await getPage(state) + const element = await findElement(page, selector, { elementTimeoutMs: timeout }) + + if (!element) { + throw new BrowserError('ELEMENT_NOT_FOUND', `Element not found: ${selector}`, { selector }) + } + + await element.dblclick() + }, + + async dragDrop(source: string, target: string, delay?: number): Promise { + const state = browserStateData + const page = await getPage(state) + + const sourceElement = await findElement(page, source) + const targetElement = await findElement(page, target) + + if (!sourceElement) { + throw new BrowserError('ELEMENT_NOT_FOUND', `Source element not found: ${source}`, { selector: source }) + } + + if (!targetElement) { + throw new BrowserError('ELEMENT_NOT_FOUND', `Target element not found: ${target}`, { selector: target }) + } + + const sourceBox = await sourceElement.boundingBox() + const targetBox = await targetElement.boundingBox() + + if (sourceBox && targetBox) { + await page.mouse.move(sourceBox.x + sourceBox.width / 2, sourceBox.y + sourceBox.height / 2) + await page.mouse.down() + await page.mouse.move(targetBox.x + targetBox.width / 2, targetBox.y + targetBox.height / 2, { steps: 10 }) + await page.mouse.up() + } + }, + + async scrollToTop(selector?: string): Promise { + const state = browserStateData + const page = await getPage(state) + + if (selector) { + const element = await findElement(page, selector) + if (element) { + await element.evaluate((el: Element) => el.scrollIntoView({ block: 'start' })) + } + } else { + await page.evaluate(() => window.scrollTo(0, 0)) + } + }, + + async scrollToBottom(selector?: string): Promise { + const state = browserStateData + const page = await getPage(state) + + if (selector) { + const element = await findElement(page, selector) + if (element) { + await element.evaluate((el: Element) => el.scrollIntoView({ block: 'end' })) + } + } else { + await page.evaluate(() => window.scrollTo(0, document.body.scrollHeight)) + } + }, + + async scroll(selector: string | undefined, direction: 'up' | 'down' | 'left' | 'right', pixels: number): Promise { + const state = browserStateData + const page = await getPage(state) + + const scrollAmount = direction === 'up' || direction === 'left' ? -pixels : pixels + + if (selector) { + const element = await findElement(page, selector) + if (element) { + await element.evaluate((el: HTMLElement) => { + el.scrollTop += direction === 'up' || direction === 'down' ? scrollAmount : 0 + el.scrollLeft += direction === 'left' || direction === 'right' ? scrollAmount : 0 + }) + } + } else { + await page.evaluate((params: { deltaX: number; deltaY: number }) => { + window.scrollBy(params.deltaX, params.deltaY) + }, { deltaX: direction === 'left' || direction === 'right' ? scrollAmount : 0, deltaY: direction === 'up' || direction === 'down' ? scrollAmount : 0 }) + } + }, + + async scrollTo(selector: string, block: 'start' | 'center' | 'end' | 'nearest' = 'center'): Promise { + const state = browserStateData + const page = await getPage(state) + const element = await findElement(page, selector) + + if (!element) { + throw new BrowserError('ELEMENT_NOT_FOUND', `Element not found: ${selector}`, { selector }) + } + + await element.scrollIntoViewIfNeeded() + }, + + async check(selector: string, checked: boolean = true): Promise { + const state = browserStateData + const page = await getPage(state) + const element = await findElement(page, selector) + + if (!element) { + throw new BrowserError('ELEMENT_NOT_FOUND', `Element not found: ${selector}`, { selector }) + } + + const isChecked = await element.isChecked() + if (isChecked !== checked) { + await element.click() + } + }, + + async select(selector: string, value?: string, label?: string): Promise<{ value: string; selectedLabel: string }> { + const state = browserStateData + const page = await getPage(state) + const element = await findElement(page, selector) + + if (!element) { + throw new BrowserError('ELEMENT_NOT_FOUND', `Element not found: ${selector}`, { selector }) + } + + if (value) { + await element.selectOption({ value }) + } else if (label) { + await element.selectOption({ label }) + } + + const selected = await element.evaluate((el: HTMLSelectElement) => { + return { value: el.value, label: el.options[el.selectedIndex]?.textContent || '' } + }) + + return { value: selected.value, selectedLabel: selected.label } + }, + + async clear(selector: string): Promise { + const state = browserStateData + const page = await getPage(state) + const element = await findElement(page, selector) + + if (!element) { + throw new BrowserError('ELEMENT_NOT_FOUND', `Element not found: ${selector}`, { selector }) + } + + await element.fill('') + }, + + async getValue(selector: string): Promise<{ value: string }> { + const state = browserStateData + const page = await getPage(state) + const element = await findElement(page, selector) + + if (!element) { + throw new BrowserError('ELEMENT_NOT_FOUND', `Element not found: ${selector}`, { selector }) + } + + const value = await element.evaluate((el: HTMLInputElement) => el.value) + return { value } + }, + + async back(): Promise { + const state = browserStateData + const page = await getPage(state) + + await page.goBack() + state.lastActivity = Date.now() + + return { + id: (page as any).guid, + url: page.url(), + title: await page.title(), + } + }, + + async forward(): Promise { + const state = browserStateData + const page = await getPage(state) + + await page.goForward() + state.lastActivity = Date.now() + + return { + id: (page as any).guid, + url: page.url(), + title: await page.title(), + } + }, + + async refresh(bypassCache: boolean = false): Promise { + const state = browserStateData + const page = await getPage(state) + + await page.reload({ bypassCache } as any) + state.lastActivity = Date.now() + + return { + id: (page as any).guid, + url: page.url(), + title: await page.title(), + } + }, + + async waitForElement(selector: string, state: 'attached' | 'detached' | 'visible' | 'hidden' = 'attached', timeout: number = 30000): Promise<{ found: boolean }> { + const cfg = { ...DEFAULT_CONFIG, elementTimeoutMs: timeout } + const stateObj = browserStateData + const page = await getPage(stateObj) + + try { + await page.waitForSelector(selector, { state, timeout: cfg.elementTimeoutMs }) + return { found: true } + } catch { + return { found: false } + } + }, + + async waitForURL(pattern: string, timeout: number = 30000): Promise<{ found: boolean; url: string; currentUrl: string }> { + const state = browserStateData + const page = await getPage(state) + + const startTime = Date.now() + const currentUrl = page.url() + + if (currentUrl.includes(pattern)) { + return { found: true, url: currentUrl, currentUrl } + } + + while (Date.now() - startTime < timeout) { + await new Promise(r => setTimeout(r, 100)) + const url = page.url() + if (url.includes(pattern)) { + return { found: true, url, currentUrl } + } + } + + return { found: false, url: '', currentUrl: page.url() } + }, + + async getText(selector: string): Promise<{ text: string }> { + const state = browserStateData + const page = await getPage(state) + const element = await findElement(page, selector) + + if (!element) { + throw new BrowserError('ELEMENT_NOT_FOUND', `Element not found: ${selector}`, { selector }) + } + + const text = await element.textContent() + return { text: text || '' } + }, + + async getAttribute(selector: string, attribute: string): Promise<{ value: string | null }> { + const state = browserStateData + const page = await getPage(state) + const element = await findElement(page, selector) + + if (!element) { + throw new BrowserError('ELEMENT_NOT_FOUND', `Element not found: ${selector}`, { selector }) + } + + const value = await element.getAttribute(attribute) + return { value } + }, + + async getCSS(selector: string, property: string): Promise<{ value: string }> { + const state = browserStateData + const page = await getPage(state) + const element = await findElement(page, selector) + + if (!element) { + throw new BrowserError('ELEMENT_NOT_FOUND', `Element not found: ${selector}`, { selector }) + } + + const value = await element.evaluate((el: Element, prop: string) => { + return window.getComputedStyle(el).getPropertyValue(prop) + }, property) + + return { value } + }, + + async getPageSource(trimmed: boolean = true): Promise<{ source: string }> { + const state = browserStateData + const page = await getPage(state) + let source = await page.content() + + if (trimmed) { + source = source.trim() + } + + return { source } + }, + + async getCookies(domain?: string): Promise<{ cookies: Array<{ name: string; value: string; domain: string; path: string; secure: boolean; httpOnly: boolean; sameSite: string }> }> { + const state = browserStateData + await getBrowser() + + const allCookies = await state.context?.cookies() || [] + const cookies = domain + ? allCookies.filter(c => c.domain.includes(domain)) + : allCookies + + return { cookies } + }, + + async setCookie(params: { name: string; value: string; domain: string; path?: string; secure?: boolean; httpOnly?: boolean; sameSite?: string }): Promise { + const state = browserStateData + await getBrowser() + + await state.context?.addCookies([{ + name: params.name, + value: params.value, + domain: params.domain, + path: params.path || '/', + secure: params.secure || false, + httpOnly: params.httpOnly || false, + sameSite: params.sameSite as any || 'Lax', + }]) + }, + + async deleteCookie(name: string, domain?: string): Promise { + const state = browserStateData + await getBrowser() + + const cookies = await state.context?.cookies() || [] + const toDelete = domain + ? cookies.filter(c => c.name === name && c.domain.includes(domain)) + : cookies.filter(c => c.name === name) + + for (const cookie of toDelete) { + await state.context?.clearCookies(cookie) + } + }, + + async getLocalStorage(key?: string): Promise<{ value: string | null; all: Record }> { + const state = browserStateData + const page = await getPage(state) + + if (key) { + const value = await page.evaluate((k: string) => localStorage.getItem(k), key) + return { value, all: {} } + } else { + const all = await page.evaluate(() => { + const obj: Record = {} + for (let i = 0; i < localStorage.length; i++) { + const k = localStorage.key(i)! + obj[k] = localStorage.getItem(k)! + } + return obj + }) + return { value: null, all } + } + }, + + async setLocalStorage(key: string, value: string): Promise { + const state = browserStateData + const page = await getPage(state) + + await page.evaluate((params: { k: string; v: string }) => { + localStorage.setItem(params.k, params.v) + }, { k: key, v: value }) + }, + + async clearStorage(type: 'localStorage' | 'sessionStorage' | 'all' = 'all'): Promise { + const state = browserStateData + const page = await getPage(state) + + if (type === 'all' || type === 'localStorage') { + await page.evaluate(() => localStorage.clear()) + } + if (type === 'all' || type === 'sessionStorage') { + await page.evaluate(() => sessionStorage.clear()) + } + }, + + async setViewport(width: number, height: number): Promise { + const state = browserStateData + await getBrowser() + + await state.context?.close() + state.context = await state.browser!.newContext({ + viewport: { width, height }, + ignoreHTTPSErrors: true, + locale: 'en-US', + }) + }, + + async setUserAgent(userAgent: string): Promise { + const state = browserStateData + await getBrowser() + + await state.context?.close() + state.context = await state.browser!.newContext({ + userAgent, + ignoreHTTPSErrors: true, + locale: 'en-US', + }) + }, + + async setGeolocation(latitude: number, longitude: number, accuracy: number = 100): Promise { + const state = browserStateData + await getBrowser() + + await state.context?.close() + state.context = await state.browser!.newContext({ + geolocation: { latitude, longitude, accuracy }, + ignoreHTTPSErrors: true, + locale: 'en-US', + }) + }, + + async setTimezone(timezone: string): Promise { + const state = browserStateData + await getBrowser() + + await state.context?.close() + state.context = await state.browser!.newContext({ + timezoneId: timezone, + ignoreHTTPSErrors: true, + locale: 'en-US', + }) + }, + + async assertText(selector: string, expected: string, contains: boolean = false): Promise<{ passed: boolean; actual: string }> { + const { text } = await this.getText(selector) + const passed = contains ? text.includes(expected) : text === expected + return { passed, actual: text } + }, + + async assertVisible(selector: string, visible: boolean = true): Promise<{ passed: boolean }> { + const state = browserStateData + const page = await getPage(state) + const element = await findElement(page, selector) + + if (!element) { + return { passed: !visible } + } + + const isVisible = await element.isVisible() + return { passed: isVisible === visible } + }, + + async assertURL(pattern: string): Promise<{ passed: boolean; url: string; currentUrl: string }> { + const state = browserStateData + const page = await getPage(state) + const url = page.url() + const passed = url.includes(pattern) + return { passed, url, currentUrl: url } + }, + + async reset(): Promise { + await closeBrowser(browserStateData) + browserStateData = { + browser: null, + context: null, + lastActivity: Date.now(), + pageCount: 0, + } + }, +} + +async function findElement( + page: Page, + selector: string, + config: Partial = {} +): Promise { + const cfg = { ...DEFAULT_CONFIG, ...config } + + for (const strategy of getSelectorStrategies(selector)) { + try { + const element = await page.waitForSelector(strategy, { + state: 'attached', + timeout: cfg.elementTimeoutMs, + }) + if (element) { + return element + } + } catch { + continue + } + } + + return null +} + +export function getSelectorStrategies(input: string): string[] { + if (input.startsWith('text=')) { + const text = input.slice(5).replace(/"/g, '').replace(/'/g, "\\'") + return [ + `xpath=//*[text()="${text}"]`, + `xpath=//*[contains(text(), "${text}")]`, + `text="${text}"`, + ] + } + + if (input.startsWith('[data-testid=') || input.startsWith('[aria-label=')) { + return [input] + } + + if (input.startsWith('.') || input.startsWith('#') || input.startsWith('[')) { + return [input] + } + + if (input.includes('/') || input.startsWith('//')) { + return [input] + } + + return [ + `text="${input}"`, + input, + `xpath=//*[contains(@*, "${input}")]`, + `xpath=//*[text()="${input}"]`, + ] +} diff --git a/packages/opencode/src/browser/navigate.ts b/packages/opencode/src/browser/navigate.ts new file mode 100644 index 00000000000..ff26ac4a484 --- /dev/null +++ b/packages/opencode/src/browser/navigate.ts @@ -0,0 +1,38 @@ +import z from "zod" +import { Tool } from "../tool/tool" +import { BrowserService, BrowserError } from "./index" + +export const BrowserNavigateTool = Tool.define("browser_navigate", { + description: "Navigate to a URL in the current tab", + parameters: z.object({ + url: z.string().describe("The URL to navigate to (must start with http:// or https://)"), + waitUntil: z + .enum(["load", "domcontentloaded", "networkidle"]) + .optional() + .default("domcontentloaded") + .describe("When to consider navigation complete"), + }), + async execute(params, ctx) { + if (!params.url.startsWith("http://") && !params.url.startsWith("https://")) { + throw new Error("URL must start with http:// or https://") + } + + await ctx.ask({ + permission: "browser", + patterns: [params.url], + always: ["*"], + metadata: { + url: params.url, + action: "navigate", + }, + }) + + const result = await BrowserService.navigate(params.url) + + return { + title: `Navigated to ${result.url}`, + output: `Successfully navigated to ${result.url}\nTitle: ${result.title}`, + metadata: {}, + } + }, +}) diff --git a/packages/opencode/src/browser/open.ts b/packages/opencode/src/browser/open.ts new file mode 100644 index 00000000000..4915a9267c4 --- /dev/null +++ b/packages/opencode/src/browser/open.ts @@ -0,0 +1,35 @@ +import z from "zod" +import { Tool } from "../tool/tool" +import { BrowserService } from "./index" + +export const BrowserOpenTool = Tool.define("browser_open", { + description: "Open a new tab, optionally at a specific URL", + parameters: z.object({ + url: z.string().describe("URL to open in new tab (optional, opens blank if not provided)"), + }), + async execute(params, ctx) { + await ctx.ask({ + permission: "browser", + patterns: params.url ? [params.url] : ["*"], + always: ["*"], + metadata: { + url: params.url, + action: "open", + }, + }) + + const result = await BrowserService.open(params.url) + + return { + title: `Opened new tab`, + output: params.url + ? `Opened new tab with URL: ${params.url}` + : `Opened new blank tab`, + metadata: { + url: result.url, + title: result.title, + id: result.id, + }, + } + }, +}) diff --git a/packages/opencode/src/browser/refresh.ts b/packages/opencode/src/browser/refresh.ts new file mode 100644 index 00000000000..2dc79ba85dc --- /dev/null +++ b/packages/opencode/src/browser/refresh.ts @@ -0,0 +1,32 @@ +import z from "zod" +import { Tool } from "../tool/tool" +import { BrowserService } from "./index" + +export const BrowserRefreshTool = Tool.define("browser_refresh", { + description: "Refresh the current page", + parameters: z.object({ + bypassCache: z.boolean().optional().default(false).describe("Bypass cache when reloading"), + }), + async execute(params, ctx) { + await ctx.ask({ + permission: "browser", + patterns: ["*"], + always: ["*"], + metadata: { + action: "refresh", + bypassCache: params.bypassCache, + }, + }) + + const result = await BrowserService.refresh(params.bypassCache) + + return { + title: `Refreshed page`, + output: `Refreshed: ${result.url}`, + metadata: { + url: result.url, + title: result.title, + }, + } + }, +}) diff --git a/packages/opencode/src/browser/reopenTab.ts b/packages/opencode/src/browser/reopenTab.ts new file mode 100644 index 00000000000..84647f28f2f --- /dev/null +++ b/packages/opencode/src/browser/reopenTab.ts @@ -0,0 +1,32 @@ +import z from "zod" +import { Tool } from "../tool/tool" +import { BrowserService } from "./index" + +export const BrowserReopenTabTool = Tool.define("browser_reopenTab", { + description: "Reopen a recently closed tab", + parameters: z.object({}), + async execute(params, ctx) { + await ctx.ask({ + permission: "browser", + patterns: ["*"], + always: ["*"], + metadata: { + action: "reopen_tab", + }, + }) + + const result = await BrowserService.reopenTab() + + return { + title: `Reopened closed tab`, + output: result.success + ? `Reopened closed tab\nURL: ${result.url}\nTitle: ${result.title}` + : 'No recently closed tabs to reopen', + metadata: { + success: result.success, + url: result.url, + title: result.title, + }, + } + }, +}) diff --git a/packages/opencode/src/browser/rightClick.ts b/packages/opencode/src/browser/rightClick.ts new file mode 100644 index 00000000000..29aca98ddf9 --- /dev/null +++ b/packages/opencode/src/browser/rightClick.ts @@ -0,0 +1,33 @@ +import z from "zod" +import { Tool } from "../tool/tool" +import { BrowserService } from "./index" + +export const BrowserRightClickTool = Tool.define("browser_rightClick", { + description: "Right-click on an element or at coordinates", + parameters: z.object({ + selector: z.string().optional().describe("Selector for element (default: current position)"), + x: z.number().optional().describe("X coordinate (if no selector)"), + y: z.number().optional().describe("Y coordinate (if no selector)"), + }), + async execute(params, ctx) { + await ctx.ask({ + permission: "browser", + patterns: ["*"], + always: ["*"], + metadata: { + action: "right_click", + selector: params.selector, + }, + }) + + const result = await BrowserService.rightClick(params.selector, params.x, params.y) + + return { + title: `Right-clicked`, + output: params.selector + ? `Right-clicked element: ${params.selector}` + : `Right-clicked at coordinates: (${params.x}, ${params.y})`, + metadata: {}, + } + }, +}) diff --git a/packages/opencode/src/browser/screenshot.ts b/packages/opencode/src/browser/screenshot.ts new file mode 100644 index 00000000000..9955c49f93f --- /dev/null +++ b/packages/opencode/src/browser/screenshot.ts @@ -0,0 +1,31 @@ +import z from "zod" +import { Tool } from "../tool/tool" +import { BrowserService } from "./index" + +export const BrowserScreenshotTool = Tool.define("browser_screenshot", { + description: "Take a screenshot of the current page", + parameters: z.object({}), + async execute(params, ctx) { + await ctx.ask({ + permission: "browser", + patterns: ["*"], + always: ["*"], + metadata: { + action: "screenshot", + }, + }) + + const result = await BrowserService.screenshot() + + return { + title: `Screenshot (${result.width}x${result.height})`, + output: `Screenshot captured\nDimensions: ${result.width}x${result.height}\nSize: ${result.size} bytes`, + metadata: { + screenshot: result.base64, + width: result.width, + height: result.height, + size: result.size, + }, + } + }, +}) diff --git a/packages/opencode/src/browser/scroll.ts b/packages/opencode/src/browser/scroll.ts new file mode 100644 index 00000000000..4fc00afa5b8 --- /dev/null +++ b/packages/opencode/src/browser/scroll.ts @@ -0,0 +1,35 @@ +import z from "zod" +import { Tool } from "../tool/tool" +import { BrowserService } from "./index" + +export const BrowserScrollTool = Tool.define("browser_scroll", { + description: "Scroll the page or an element in a direction", + parameters: z.object({ + selector: z.string().optional().describe("Element to scroll (default: page)"), + direction: z.enum(["up", "down", "left", "right"]).default("down").describe("Scroll direction"), + pixels: z.number().optional().default(300).describe("Pixels to scroll"), + }), + async execute(params, ctx) { + await ctx.ask({ + permission: "browser", + patterns: ["*"], + always: ["*"], + metadata: { + action: "scroll", + selector: params.selector, + direction: params.direction, + }, + }) + + await BrowserService.scroll(params.selector, params.direction, params.pixels) + + return { + title: `Scrolled ${params.direction}`, + output: `Scrolled ${params.direction} by ${params.pixels}px${params.selector ? ` in ${params.selector}` : ''}`, + metadata: { + direction: params.direction, + pixels: params.pixels, + }, + } + }, +}) diff --git a/packages/opencode/src/browser/scrollBottom.ts b/packages/opencode/src/browser/scrollBottom.ts new file mode 100644 index 00000000000..301c8b26073 --- /dev/null +++ b/packages/opencode/src/browser/scrollBottom.ts @@ -0,0 +1,31 @@ +import z from "zod" +import { Tool } from "../tool/tool" +import { BrowserService } from "./index" + +export const BrowserScrollBottomTool = Tool.define("browser_scrollBottom", { + description: "Scroll to the bottom of the page or an element", + parameters: z.object({ + selector: z.string().optional().describe("Element (default: page)"), + }), + async execute(params, ctx) { + await ctx.ask({ + permission: "browser", + patterns: ["*"], + always: ["*"], + metadata: { + action: "scroll_bottom", + selector: params.selector, + }, + }) + + await BrowserService.scrollToBottom(params.selector) + + return { + title: `Scrolled to bottom`, + output: params.selector + ? `Scrolled ${params.selector} to bottom` + : 'Scrolled page to bottom', + metadata: {}, + } + }, +}) diff --git a/packages/opencode/src/browser/scrollTo.ts b/packages/opencode/src/browser/scrollTo.ts new file mode 100644 index 00000000000..c346f75eb9f --- /dev/null +++ b/packages/opencode/src/browser/scrollTo.ts @@ -0,0 +1,33 @@ +import z from "zod" +import { Tool } from "../tool/tool" +import { BrowserService } from "./index" + +export const BrowserScrollToTool = Tool.define("browser_scrollTo", { + description: "Scroll an element into view", + parameters: z.object({ + selector: z.string().describe("Selector for element to scroll into view"), + block: z.enum(["start", "center", "end", "nearest"]).optional().default("center").describe("Scroll alignment"), + }), + async execute(params, ctx) { + await ctx.ask({ + permission: "browser", + patterns: ["*"], + always: ["*"], + metadata: { + action: "scroll_to", + selector: params.selector, + }, + }) + + await BrowserService.scrollTo(params.selector, params.block) + + return { + title: `Scrolled to element`, + output: `Scrolled to: ${params.selector} (block: ${params.block})`, + metadata: { + selector: params.selector, + block: params.block, + }, + } + }, +}) diff --git a/packages/opencode/src/browser/scrollTop.ts b/packages/opencode/src/browser/scrollTop.ts new file mode 100644 index 00000000000..458e266678a --- /dev/null +++ b/packages/opencode/src/browser/scrollTop.ts @@ -0,0 +1,31 @@ +import z from "zod" +import { Tool } from "../tool/tool" +import { BrowserService } from "./index" + +export const BrowserScrollTopTool = Tool.define("browser_scrollTop", { + description: "Scroll to the top of the page or an element", + parameters: z.object({ + selector: z.string().optional().describe("Element (default: page)"), + }), + async execute(params, ctx) { + await ctx.ask({ + permission: "browser", + patterns: ["*"], + always: ["*"], + metadata: { + action: "scroll_top", + selector: params.selector, + }, + }) + + await BrowserService.scrollToTop(params.selector) + + return { + title: `Scrolled to top`, + output: params.selector + ? `Scrolled ${params.selector} to top` + : 'Scrolled page to top', + metadata: {}, + } + }, +}) diff --git a/packages/opencode/src/browser/select.ts b/packages/opencode/src/browser/select.ts new file mode 100644 index 00000000000..14bbe98761a --- /dev/null +++ b/packages/opencode/src/browser/select.ts @@ -0,0 +1,35 @@ +import z from "zod" +import { Tool } from "../tool/tool" +import { BrowserService } from "./index" + +export const BrowserSelectTool = Tool.define("browser_select", { + description: "Select an option in a dropdown select element", + parameters: z.object({ + selector: z.string().describe("Select element selector"), + value: z.string().describe("Option value to select"), + label: z.string().optional().describe("Option label to select (alternative to value)"), + }), + async execute(params, ctx) { + await ctx.ask({ + permission: "browser", + patterns: ["*"], + always: ["*"], + metadata: { + action: "select", + selector: params.selector, + value: params.value, + }, + }) + + const result = await BrowserService.select(params.selector, params.value, params.label) + + return { + title: `Selected option`, + output: `Selected option "${result.selectedLabel || result.value}" in ${params.selector}`, + metadata: { + value: result.value, + label: result.selectedLabel, + }, + } + }, +}) diff --git a/packages/opencode/src/browser/setCookie.ts b/packages/opencode/src/browser/setCookie.ts new file mode 100644 index 00000000000..733f3f2190c --- /dev/null +++ b/packages/opencode/src/browser/setCookie.ts @@ -0,0 +1,47 @@ +import z from "zod" +import { Tool } from "../tool/tool" +import { BrowserService } from "./index" + +export const BrowserSetCookieTool = Tool.define("browser_setCookie", { + description: "Set a cookie for a domain", + parameters: z.object({ + name: z.string().describe("Cookie name"), + value: z.string().describe("Cookie value"), + domain: z.string().describe("Cookie domain"), + path: z.string().optional().default("/").describe("Cookie path"), + secure: z.boolean().optional().default(false).describe("Secure flag"), + httpOnly: z.boolean().optional().default(false).describe("HttpOnly flag"), + sameSite: z.enum(["Strict", "Lax", "None"]).optional().default("Lax").describe("SameSite value"), + }), + async execute(params, ctx) { + await ctx.ask({ + permission: "browser", + patterns: [params.domain], + always: ["*"], + metadata: { + action: "set_cookie", + name: params.name, + domain: params.domain, + }, + }) + + await BrowserService.setCookie({ + name: params.name, + value: params.value, + domain: params.domain, + path: params.path, + secure: params.secure, + httpOnly: params.httpOnly, + sameSite: params.sameSite, + }) + + return { + title: `Set cookie`, + output: `Set cookie "${params.name}" for ${params.domain}`, + metadata: { + name: params.name, + domain: params.domain, + }, + } + }, +}) diff --git a/packages/opencode/src/browser/setGeolocation.ts b/packages/opencode/src/browser/setGeolocation.ts new file mode 100644 index 00000000000..58f668b9305 --- /dev/null +++ b/packages/opencode/src/browser/setGeolocation.ts @@ -0,0 +1,36 @@ +import z from "zod" +import { Tool } from "../tool/tool" +import { BrowserService } from "./index" + +export const BrowserSetGeolocationTool = Tool.define("browser_setGeolocation", { + description: "Set the browser's geolocation", + parameters: z.object({ + latitude: z.number().describe("Latitude (-90 to 90)"), + longitude: z.number().describe("Longitude (-180 to 180)"), + accuracy: z.number().optional().default(100).describe("Accuracy in meters"), + }), + async execute(params, ctx) { + await ctx.ask({ + permission: "browser", + patterns: ["*"], + always: ["*"], + metadata: { + action: "set_geolocation", + latitude: params.latitude, + longitude: params.longitude, + }, + }) + + await BrowserService.setGeolocation(params.latitude, params.longitude, params.accuracy) + + return { + title: `Geolocation set`, + output: `Location: ${params.latitude}, ${params.longitude} (±${params.accuracy}m)`, + metadata: { + latitude: params.latitude, + longitude: params.longitude, + accuracy: params.accuracy, + }, + } + }, +}) diff --git a/packages/opencode/src/browser/setLocalStorage.ts b/packages/opencode/src/browser/setLocalStorage.ts new file mode 100644 index 00000000000..bf1f8c5b822 --- /dev/null +++ b/packages/opencode/src/browser/setLocalStorage.ts @@ -0,0 +1,33 @@ +import z from "zod" +import { Tool } from "../tool/tool" +import { BrowserService } from "./index" + +export const BrowserSetLocalStorageTool = Tool.define("browser_setLocalStorage", { + description: "Set a value in localStorage", + parameters: z.object({ + key: z.string().describe("Storage key"), + value: z.string().describe("Storage value"), + }), + async execute(params, ctx) { + await ctx.ask({ + permission: "browser", + patterns: ["*"], + always: ["*"], + metadata: { + action: "set_local_storage", + key: params.key, + }, + }) + + await BrowserService.setLocalStorage(params.key, params.value) + + return { + title: `Set localStorage`, + output: `Set ${params.key} = ${params.value}`, + metadata: { + key: params.key, + value: params.value, + }, + } + }, +}) diff --git a/packages/opencode/src/browser/setTimezone.ts b/packages/opencode/src/browser/setTimezone.ts new file mode 100644 index 00000000000..66cd65dcd6e --- /dev/null +++ b/packages/opencode/src/browser/setTimezone.ts @@ -0,0 +1,31 @@ +import z from "zod" +import { Tool } from "../tool/tool" +import { BrowserService } from "./index" + +export const BrowserSetTimezoneTool = Tool.define("browser_setTimezone", { + description: "Set the browser's timezone", + parameters: z.object({ + timezone: z.string().describe("Timezone ID (e.g., 'America/New_York')"), + }), + async execute(params, ctx) { + await ctx.ask({ + permission: "browser", + patterns: ["*"], + always: ["*"], + metadata: { + action: "set_timezone", + timezone: params.timezone, + }, + }) + + await BrowserService.setTimezone(params.timezone) + + return { + title: `Timezone set`, + output: `Timezone: ${params.timezone}`, + metadata: { + timezone: params.timezone, + }, + } + }, +}) diff --git a/packages/opencode/src/browser/setUserAgent.ts b/packages/opencode/src/browser/setUserAgent.ts new file mode 100644 index 00000000000..05e188fa45c --- /dev/null +++ b/packages/opencode/src/browser/setUserAgent.ts @@ -0,0 +1,31 @@ +import z from "zod" +import { Tool } from "../tool/tool" +import { BrowserService } from "./index" + +export const BrowserSetUserAgentTool = Tool.define("browser_setUserAgent", { + description: "Set the browser's user agent string", + parameters: z.object({ + userAgent: z.string().describe("User agent string to use"), + }), + async execute(params, ctx) { + await ctx.ask({ + permission: "browser", + patterns: ["*"], + always: ["*"], + metadata: { + action: "set_user_agent", + userAgent: params.userAgent, + }, + }) + + await BrowserService.setUserAgent(params.userAgent) + + return { + title: `User agent set`, + output: `User agent: ${params.userAgent}`, + metadata: { + userAgent: params.userAgent, + }, + } + }, +}) diff --git a/packages/opencode/src/browser/setViewport.ts b/packages/opencode/src/browser/setViewport.ts new file mode 100644 index 00000000000..087913632eb --- /dev/null +++ b/packages/opencode/src/browser/setViewport.ts @@ -0,0 +1,34 @@ +import z from "zod" +import { Tool } from "../tool/tool" +import { BrowserService } from "./index" + +export const BrowserSetViewportTool = Tool.define("browser_setViewport", { + description: "Set the browser viewport size", + parameters: z.object({ + width: z.number().describe("Viewport width in pixels"), + height: z.number().describe("Viewport height in pixels"), + }), + async execute(params, ctx) { + await ctx.ask({ + permission: "browser", + patterns: ["*"], + always: ["*"], + metadata: { + action: "set_viewport", + width: params.width, + height: params.height, + }, + }) + + await BrowserService.setViewport(params.width, params.height) + + return { + title: `Viewport set`, + output: `Viewport set to ${params.width}x${params.height}`, + metadata: { + width: params.width, + height: params.height, + }, + } + }, +}) diff --git a/packages/opencode/src/browser/switchTab.ts b/packages/opencode/src/browser/switchTab.ts new file mode 100644 index 00000000000..342b6790123 --- /dev/null +++ b/packages/opencode/src/browser/switchTab.ts @@ -0,0 +1,48 @@ +import z from "zod" +import { Tool } from "../tool/tool" +import { BrowserService } from "./index" + +const DESCRIPTION = `Switch to a specific tab by index. + +**Example:** +\`\`\` +browser_switchTab({"index": 0}) // Switch to first tab +browser_switchTab({"index": 2}) // Switch to third tab +\`\`\` + +**Returns:** +- \`url\`: Current tab URL +- \`title\`: Current tab title +- \`index\`: The index switched to + +**Note:** Tabs are 0-indexed (first tab is 0).` + +export const BrowserSwitchTabTool = Tool.define("browser_switchTab", { + description: "Switch to a specific tab by index", + parameters: z.object({ + index: z.number().describe("Tab index to switch to (0-based)"), + }), + async execute(params, ctx) { + await ctx.ask({ + permission: "browser", + patterns: ["*"], + always: ["*"], + metadata: { + action: "switch_tab", + index: params.index, + }, + }) + + const result = await BrowserService.switchTab(params.index) + + return { + title: `Switched to tab ${params.index}`, + output: `Switched to tab at index ${params.index}\nURL: ${result.url}\nTitle: ${result.title}`, + metadata: { + url: result.url, + title: result.title, + index: params.index, + }, + } + }, +}) diff --git a/packages/opencode/src/browser/urls.ts b/packages/opencode/src/browser/urls.ts new file mode 100644 index 00000000000..171f026ba46 --- /dev/null +++ b/packages/opencode/src/browser/urls.ts @@ -0,0 +1,32 @@ +import z from "zod" +import { Tool } from "../tool/tool" +import { BrowserService } from "./index" + +export const BrowserUrlsTool = Tool.define("browser_urls", { + description: "Get list of all open tabs with their URLs and titles", + parameters: z.object({}), + async execute(params, ctx) { + await ctx.ask({ + permission: "browser", + patterns: ["*"], + always: ["*"], + metadata: { + action: "list_urls", + }, + }) + + const urls = await BrowserService.urls() + + const output = urls.length > 0 + ? urls.map(u => `- ${u.title}: ${u.url}`).join("\n") + : "No open pages" + + return { + title: `Open pages (${urls.length})`, + output, + metadata: { + urls, + }, + } + }, +}) diff --git a/packages/opencode/src/browser/waitForElement.ts b/packages/opencode/src/browser/waitForElement.ts new file mode 100644 index 00000000000..c34f3b55fc0 --- /dev/null +++ b/packages/opencode/src/browser/waitForElement.ts @@ -0,0 +1,37 @@ +import z from "zod" +import { Tool } from "../tool/tool" +import { BrowserService } from "./index" + +export const BrowserWaitForElementTool = Tool.define("browser_waitForElement", { + description: "Wait for an element to appear or disappear", + parameters: z.object({ + selector: z.string().describe("Selector to wait for"), + state: z.enum(["attached", "detached", "visible", "hidden"]).optional().default("attached").describe("Element state"), + timeout: z.number().optional().default(30000).describe("Timeout in milliseconds"), + }), + async execute(params, ctx) { + await ctx.ask({ + permission: "browser", + patterns: ["*"], + always: ["*"], + metadata: { + action: "wait_for_element", + selector: params.selector, + }, + }) + + const result = await BrowserService.waitForElement(params.selector, params.state, params.timeout) + + return { + title: `Element ${result.found ? 'found' : 'not found'}`, + output: result.found + ? `Element ${params.selector} is ${params.state}` + : `Element ${params.selector} not found within ${params.timeout}ms`, + metadata: { + selector: params.selector, + state: params.state, + found: result.found, + }, + } + }, +}) diff --git a/packages/opencode/src/browser/waitForURL.ts b/packages/opencode/src/browser/waitForURL.ts new file mode 100644 index 00000000000..19b3092aec8 --- /dev/null +++ b/packages/opencode/src/browser/waitForURL.ts @@ -0,0 +1,36 @@ +import z from "zod" +import { Tool } from "../tool/tool" +import { BrowserService } from "./index" + +export const BrowserWaitForURLTool = Tool.define("browser_waitForURL", { + description: "Wait for the URL to match a pattern", + parameters: z.object({ + pattern: z.string().describe("URL pattern to match (regex or substring)"), + timeout: z.number().optional().default(30000).describe("Timeout in milliseconds"), + }), + async execute(params, ctx) { + await ctx.ask({ + permission: "browser", + patterns: ["*"], + always: ["*"], + metadata: { + action: "wait_for_url", + pattern: params.pattern, + }, + }) + + const result = await BrowserService.waitForURL(params.pattern, params.timeout) + + return { + title: `URL ${result.found ? 'matched' : 'not matched'}`, + output: result.found + ? `URL matched pattern "${params.pattern}": ${result.url}` + : `URL "${result.currentUrl}" did not match "${params.pattern}" within ${params.timeout}ms`, + metadata: { + pattern: params.pattern, + url: result.url, + found: result.found, + }, + } + }, +}) diff --git a/packages/opencode/src/config/config.ts b/packages/opencode/src/config/config.ts index bf4a6035bd8..54368d18066 100644 --- a/packages/opencode/src/config/config.ts +++ b/packages/opencode/src/config/config.ts @@ -483,7 +483,7 @@ export namespace Config { return result } - export const Permission = z +export const Permission = z .preprocess( permissionPreprocess, z @@ -504,6 +504,7 @@ export namespace Config { websearch: PermissionAction.optional(), codesearch: PermissionAction.optional(), lsp: PermissionRule.optional(), + browser: PermissionAction.optional(), doom_loop: PermissionAction.optional(), }) .catchall(PermissionRule) diff --git a/packages/opencode/src/tool/registry.ts b/packages/opencode/src/tool/registry.ts index 35e378f080b..dd2c10059a6 100644 --- a/packages/opencode/src/tool/registry.ts +++ b/packages/opencode/src/tool/registry.ts @@ -26,6 +26,54 @@ import { Log } from "@/util/log" import { LspTool } from "./lsp" import { Truncate } from "./truncation" import { PlanExitTool, PlanEnterTool } from "./plan" +import { BrowserNavigateTool } from "../browser/navigate" +import { BrowserClickTool } from "../browser/click" +import { BrowserFillTool } from "../browser/fill" +import { BrowserScreenshotTool } from "../browser/screenshot" +import { BrowserEvaluateTool } from "../browser/evaluate" +import { BrowserCloseTool } from "../browser/close" +import { BrowserUrlsTool } from "../browser/urls" +import { BrowserSetTool } from "../browser/browser" +import { BrowserOpenTool } from "../browser/open" +import { BrowserHeadedTool } from "../browser/headed" +import { BrowserSwitchTabTool } from "../browser/switchTab" +import { BrowserCloseTabTool } from "../browser/closeTab" +import { BrowserDuplicateTabTool } from "../browser/duplicateTab" +import { BrowserReopenTabTool } from "../browser/reopenTab" +import { BrowserHoverTool } from "../browser/hover" +import { BrowserRightClickTool } from "../browser/rightClick" +import { BrowserDoubleClickTool } from "../browser/doubleClick" +import { BrowserDragDropTool } from "../browser/dragDrop" +import { BrowserScrollTool } from "../browser/scroll" +import { BrowserScrollToTool } from "../browser/scrollTo" +import { BrowserScrollTopTool } from "../browser/scrollTop" +import { BrowserScrollBottomTool } from "../browser/scrollBottom" +import { BrowserCheckTool } from "../browser/check" +import { BrowserSelectTool } from "../browser/select" +import { BrowserClearTool } from "../browser/clear" +import { BrowserGetValueTool } from "../browser/getValue" +import { BrowserBackTool } from "../browser/back" +import { BrowserForwardTool } from "../browser/forward" +import { BrowserRefreshTool } from "../browser/refresh" +import { BrowserWaitForElementTool } from "../browser/waitForElement" +import { BrowserWaitForURLTool } from "../browser/waitForURL" +import { BrowserGetTextTool } from "../browser/getText" +import { BrowserGetAttributeTool } from "../browser/getAttribute" +import { BrowserGetCSSTool } from "../browser/getCSS" +import { BrowserGetPageSourceTool } from "../browser/getPageSource" +import { BrowserGetCookiesTool } from "../browser/getCookies" +import { BrowserSetCookieTool } from "../browser/setCookie" +import { BrowserDeleteCookieTool } from "../browser/deleteCookie" +import { BrowserGetLocalStorageTool } from "../browser/getLocalStorage" +import { BrowserSetLocalStorageTool } from "../browser/setLocalStorage" +import { BrowserClearStorageTool } from "../browser/clearStorage" +import { BrowserSetViewportTool } from "../browser/setViewport" +import { BrowserSetUserAgentTool } from "../browser/setUserAgent" +import { BrowserSetGeolocationTool } from "../browser/setGeolocation" +import { BrowserSetTimezoneTool } from "../browser/setTimezone" +import { BrowserAssertTextTool } from "../browser/assertText" +import { BrowserAssertVisibleTool } from "../browser/assertVisible" +import { BrowserAssertURLTool } from "../browser/assertURL" export namespace ToolRegistry { const log = Log.create({ service: "tool.registry" }) @@ -108,6 +156,54 @@ export namespace ToolRegistry { WebSearchTool, CodeSearchTool, SkillTool, + BrowserNavigateTool, + BrowserClickTool, + BrowserFillTool, + BrowserScreenshotTool, + BrowserEvaluateTool, + BrowserUrlsTool, + BrowserCloseTool, + BrowserSetTool, + BrowserOpenTool, + BrowserHeadedTool, + BrowserSwitchTabTool, + BrowserCloseTabTool, + BrowserDuplicateTabTool, + BrowserReopenTabTool, + BrowserHoverTool, + BrowserRightClickTool, + BrowserDoubleClickTool, + BrowserDragDropTool, + BrowserScrollTool, + BrowserScrollToTool, + BrowserScrollTopTool, + BrowserScrollBottomTool, + BrowserCheckTool, + BrowserSelectTool, + BrowserClearTool, + BrowserGetValueTool, + BrowserBackTool, + BrowserForwardTool, + BrowserRefreshTool, + BrowserWaitForElementTool, + BrowserWaitForURLTool, + BrowserGetTextTool, + BrowserGetAttributeTool, + BrowserGetCSSTool, + BrowserGetPageSourceTool, + BrowserGetCookiesTool, + BrowserSetCookieTool, + BrowserDeleteCookieTool, + BrowserGetLocalStorageTool, + BrowserSetLocalStorageTool, + BrowserClearStorageTool, + BrowserSetViewportTool, + BrowserSetUserAgentTool, + BrowserSetGeolocationTool, + BrowserSetTimezoneTool, + BrowserAssertTextTool, + BrowserAssertVisibleTool, + BrowserAssertURLTool, ...(Flag.OPENCODE_EXPERIMENTAL_LSP_TOOL ? [LspTool] : []), ...(config.experimental?.batch_tool === true ? [BatchTool] : []), ...(Flag.OPENCODE_EXPERIMENTAL_PLAN_MODE && Flag.OPENCODE_CLIENT === "cli" ? [PlanExitTool, PlanEnterTool] : []),