diff --git a/knip.json b/knip.json index a847111981b..e15c62bda1b 100644 --- a/knip.json +++ b/knip.json @@ -16,7 +16,7 @@ "project": ["**/*.ts"] }, "webview-ui": { - "entry": ["src/index.tsx"], + "entry": ["src/index.tsx", "src/browser-panel.tsx"], "project": ["src/**/*.{ts,tsx}", "../src/shared/*.ts"] }, "packages/{build,cloud,evals,ipc,telemetry,types}": { diff --git a/packages/types/src/message.ts b/packages/types/src/message.ts index 09737f9ea6d..548da850b62 100644 --- a/packages/types/src/message.ts +++ b/packages/types/src/message.ts @@ -166,6 +166,7 @@ export const clineSays = [ "shell_integration_warning", "browser_action", "browser_action_result", + "browser_session_status", "mcp_server_request_started", "mcp_server_response", "subtask_result", diff --git a/src/core/assistant-message/presentAssistantMessage.ts b/src/core/assistant-message/presentAssistantMessage.ts index 0955d5d111f..171209f6fba 100644 --- a/src/core/assistant-message/presentAssistantMessage.ts +++ b/src/core/assistant-message/presentAssistantMessage.ts @@ -437,8 +437,32 @@ export async function presentAssistantMessage(cline: Task) { return text.replace(tagRegex, "") } - if (block.name !== "browser_action") { - await cline.browserSession.closeBrowser() + // Keep browser open during an active session so other tools can run. + // Session is active if we've seen any browser_action_result and the last browser_action is not "close". + try { + const messages = cline.clineMessages || [] + const hasStarted = messages.some((m: any) => m.say === "browser_action_result") + let isClosed = false + for (let i = messages.length - 1; i >= 0; i--) { + const m = messages[i] + if (m.say === "browser_action") { + try { + const act = JSON.parse(m.text || "{}") + isClosed = act.action === "close" + } catch {} + break + } + } + const sessionActive = hasStarted && !isClosed + // Only auto-close when no active browser session is present, and this isn't a browser_action + if (!sessionActive && block.name !== "browser_action") { + await cline.browserSession.closeBrowser() + } + } catch { + // On any unexpected error, fall back to conservative behavior + if (block.name !== "browser_action") { + await cline.browserSession.closeBrowser() + } } if (!block.partial) { @@ -645,13 +669,14 @@ export async function presentAssistantMessage(cline: Task) { }) break case "browser_action": - await browserActionTool.handle(cline, block as ToolUse<"browser_action">, { + await browserActionTool( + cline, + block as ToolUse<"browser_action">, askApproval, handleError, pushToolResult, removeClosingTag, - toolProtocol, - }) + ) break case "execute_command": await executeCommandTool.handle(cline, block as ToolUse<"execute_command">, { diff --git a/src/core/environment/__tests__/getEnvironmentDetails.spec.ts b/src/core/environment/__tests__/getEnvironmentDetails.spec.ts index 9b346aeea9f..ef6d0513d47 100644 --- a/src/core/environment/__tests__/getEnvironmentDetails.spec.ts +++ b/src/core/environment/__tests__/getEnvironmentDetails.spec.ts @@ -118,6 +118,10 @@ describe("getEnvironmentDetails", () => { deref: vi.fn().mockReturnValue(mockProvider), [Symbol.toStringTag]: "WeakRef", } as unknown as WeakRef, + browserSession: { + isSessionActive: vi.fn().mockReturnValue(false), + getViewportSize: vi.fn().mockReturnValue({ width: 900, height: 600 }), + } as any, } // Mock other dependencies. @@ -393,7 +397,6 @@ describe("getEnvironmentDetails", () => { const result = await getEnvironmentDetails(cline as Task) expect(result).toContain("REMINDERS") }) - it("should include git status when maxGitStatusFiles > 0", async () => { ;(getGitStatus as Mock).mockResolvedValue("## main\nM file1.ts") mockProvider.getState.mockResolvedValue({ @@ -456,4 +459,18 @@ describe("getEnvironmentDetails", () => { expect(getGitStatus).toHaveBeenCalledWith(mockCwd, 5) }) + + it("should NOT include Browser Session Status when inactive", async () => { + const result = await getEnvironmentDetails(mockCline as Task) + expect(result).not.toContain("# Browser Session Status") + }) + + it("should include Browser Session Status with current viewport when active", async () => { + ;(mockCline.browserSession as any).isSessionActive = vi.fn().mockReturnValue(true) + ;(mockCline.browserSession as any).getViewportSize = vi.fn().mockReturnValue({ width: 1280, height: 720 }) + + const result = await getEnvironmentDetails(mockCline as Task) + expect(result).toContain("Active - A browser session is currently open and ready for browser_action commands") + expect(result).toContain("Current viewport size: 1280x720 pixels.") + }) }) diff --git a/src/core/environment/getEnvironmentDetails.ts b/src/core/environment/getEnvironmentDetails.ts index bf0e3c8392b..e42db79d402 100644 --- a/src/core/environment/getEnvironmentDetails.ts +++ b/src/core/environment/getEnvironmentDetails.ts @@ -248,6 +248,35 @@ export async function getEnvironmentDetails(cline: Task, includeFileDetails: boo } } + // Add browser session status - Only show when active to prevent cluttering context + const isBrowserActive = cline.browserSession.isSessionActive() + + if (isBrowserActive) { + // Build viewport info for status (prefer actual viewport if available, else fallback to configured setting) + const configuredViewport = (state?.browserViewportSize as string | undefined) ?? "900x600" + let configuredWidth: number | undefined + let configuredHeight: number | undefined + if (configuredViewport.includes("x")) { + const parts = configuredViewport.split("x").map((v) => Number(v)) + configuredWidth = parts[0] + configuredHeight = parts[1] + } + + let actualWidth: number | undefined + let actualHeight: number | undefined + const vp = cline.browserSession.getViewportSize?.() + if (vp) { + actualWidth = vp.width + actualHeight = vp.height + } + + const width = actualWidth ?? configuredWidth + const height = actualHeight ?? configuredHeight + const viewportInfo = width && height ? `\nCurrent viewport size: ${width}x${height} pixels.` : "" + + details += `\n# Browser Session Status\nActive - A browser session is currently open and ready for browser_action commands${viewportInfo}\n` + } + if (includeFileDetails) { details += `\n\n# Current Workspace Directory (${cline.cwd.toPosix()}) Files\n` const isDesktop = arePathsEqual(cline.cwd, path.join(os.homedir(), "Desktop")) diff --git a/src/core/prompts/__tests__/__snapshots__/system-prompt/with-computer-use-support.snap b/src/core/prompts/__tests__/__snapshots__/system-prompt/with-computer-use-support.snap index 03e66365c7c..323aa0bdbe4 100644 --- a/src/core/prompts/__tests__/__snapshots__/system-prompt/with-computer-use-support.snap +++ b/src/core/prompts/__tests__/__snapshots__/system-prompt/with-computer-use-support.snap @@ -228,10 +228,12 @@ Example for appending to the end of file: ## browser_action Description: Request to interact with a Puppeteer-controlled browser. Every action, except `close`, will be responded to with a screenshot of the browser's current state, along with any new console logs. You may only perform one browser action per message, and wait for the user's response including a screenshot and logs to determine the next action. -- The sequence of actions **must always start with** launching the browser at a URL, and **must always end with** closing the browser. If you need to visit a new URL that is not possible to navigate to from the current webpage, you must first close the browser, then launch again at the new URL. -- While the browser is active, only the `browser_action` tool can be used. No other tools should be called during this time. You may proceed to use other tools only after closing the browser. For example if you run into an error and need to fix a file, you must close the browser, then use other tools to make the necessary changes, then re-launch the browser to verify the result. -- The browser window has a resolution of **1280x800** pixels. When performing any click actions, ensure the coordinates are within this resolution range. -- Before clicking on any elements such as icons, links, or buttons, you must consult the provided screenshot of the page to determine the coordinates of the element. The click should be targeted at the **center of the element**, not on its edges. + +**Browser Session Lifecycle:** +- Browser sessions **start** with `launch` and **end** with `close` +- The session remains active across multiple messages and tool uses +- You can use other tools while the browser session is active - it will stay open in the background + Parameters: - action: (required) The action to perform. The available actions are: * launch: Launch a new Puppeteer-controlled browser instance at the specified URL. This **must always be the first action**. @@ -245,6 +247,12 @@ Parameters: - Always click in the center of an element (icon, button, link, etc.) based on coordinates derived from a screenshot. * type: Type a string of text on the keyboard. You might use this after clicking on a text field to input text. - Use with the `text` parameter to provide the string to type. + * press: Press a single keyboard key or key combination (e.g., Enter, Tab, Escape, Cmd+K, Shift+Enter). + - Use with the `text` parameter to provide the key name or combination. + - For single keys: Enter, Tab, Escape, etc. + - For key combinations: Cmd+K, Ctrl+C, Shift+Enter, Alt+F4, etc. + - Supported modifiers: Cmd/Command/Meta, Ctrl/Control, Shift, Alt/Option + - Example: Cmd+K or Shift+Enter * resize: Resize the viewport to a specific w,h size. - Use with the `size` parameter to specify the new size. * scroll_down: Scroll down the page by one page height. @@ -253,17 +261,24 @@ Parameters: - Example: `close` - url: (optional) Use this for providing the URL for the `launch` action. * Example: https://example.com -- coordinate: (optional) The X and Y coordinates for the `click` and `hover` actions. Coordinates should be within the **1280x800** resolution. - * Example: 450,300 +- coordinate: (optional) The X and Y coordinates for the `click` and `hover` actions. + * **CRITICAL**: Screenshot dimensions are NOT the same as the browser viewport dimensions + * Format: x,y@widthxheight + * Measure x,y on the screenshot image you see in chat + * The widthxheight MUST be the EXACT pixel size of that screenshot image (never the browser viewport) + * Never use the browser viewport size for widthxheight - the viewport is only a reference and is often larger than the screenshot + * Images are often downscaled before you see them, so the screenshot's dimensions will likely be smaller than the viewport + * Example A: If the screenshot you see is 1094x1092 and you want to click (450,300) on that image, use: 450,300@1094x1092 + * Example B: If the browser viewport is 1280x800 but the screenshot is 1000x625 and you want to click (500,300) on the screenshot, use: 500,300@1000x625 - size: (optional) The width and height for the `resize` action. * Example: 1280,720 - text: (optional) Use this for providing the text for the `type` action. * Example: Hello, world! Usage: -Action to perform (e.g., launch, click, type, scroll_down, scroll_up, close) +Action to perform (e.g., launch, click, type, press, scroll_down, scroll_up, close) URL to launch the browser at (optional) -x,y coordinates (optional) +x,y@widthxheight coordinates (optional) Text to type (optional) @@ -273,10 +288,10 @@ Example: Requesting to launch a browser at https://example.com https://example.com -Example: Requesting to click on the element at coordinates 450,300 +Example: Requesting to click on the element at coordinates 450,300 on a 1024x768 image click -450,300 +450,300@1024x768 ## ask_followup_question diff --git a/src/core/prompts/tools/browser-action.ts b/src/core/prompts/tools/browser-action.ts index e1b33b9d7d1..3f9a5c1ae29 100644 --- a/src/core/prompts/tools/browser-action.ts +++ b/src/core/prompts/tools/browser-action.ts @@ -6,10 +6,12 @@ export function getBrowserActionDescription(args: ToolArgs): string | undefined } return `## browser_action Description: Request to interact with a Puppeteer-controlled browser. Every action, except \`close\`, will be responded to with a screenshot of the browser's current state, along with any new console logs. You may only perform one browser action per message, and wait for the user's response including a screenshot and logs to determine the next action. -- The sequence of actions **must always start with** launching the browser at a URL, and **must always end with** closing the browser. If you need to visit a new URL that is not possible to navigate to from the current webpage, you must first close the browser, then launch again at the new URL. -- While the browser is active, only the \`browser_action\` tool can be used. No other tools should be called during this time. You may proceed to use other tools only after closing the browser. For example if you run into an error and need to fix a file, you must close the browser, then use other tools to make the necessary changes, then re-launch the browser to verify the result. -- The browser window has a resolution of **${args.browserViewportSize}** pixels. When performing any click actions, ensure the coordinates are within this resolution range. -- Before clicking on any elements such as icons, links, or buttons, you must consult the provided screenshot of the page to determine the coordinates of the element. The click should be targeted at the **center of the element**, not on its edges. + +**Browser Session Lifecycle:** +- Browser sessions **start** with \`launch\` and **end** with \`close\` +- The session remains active across multiple messages and tool uses +- You can use other tools while the browser session is active - it will stay open in the background + Parameters: - action: (required) The action to perform. The available actions are: * launch: Launch a new Puppeteer-controlled browser instance at the specified URL. This **must always be the first action**. @@ -23,6 +25,12 @@ Parameters: - Always click in the center of an element (icon, button, link, etc.) based on coordinates derived from a screenshot. * type: Type a string of text on the keyboard. You might use this after clicking on a text field to input text. - Use with the \`text\` parameter to provide the string to type. + * press: Press a single keyboard key or key combination (e.g., Enter, Tab, Escape, Cmd+K, Shift+Enter). + - Use with the \`text\` parameter to provide the key name or combination. + - For single keys: Enter, Tab, Escape, etc. + - For key combinations: Cmd+K, Ctrl+C, Shift+Enter, Alt+F4, etc. + - Supported modifiers: Cmd/Command/Meta, Ctrl/Control, Shift, Alt/Option + - Example: Cmd+K or Shift+Enter * resize: Resize the viewport to a specific w,h size. - Use with the \`size\` parameter to specify the new size. * scroll_down: Scroll down the page by one page height. @@ -31,17 +39,24 @@ Parameters: - Example: \`close\` - url: (optional) Use this for providing the URL for the \`launch\` action. * Example: https://example.com -- coordinate: (optional) The X and Y coordinates for the \`click\` and \`hover\` actions. Coordinates should be within the **${args.browserViewportSize}** resolution. - * Example: 450,300 +- coordinate: (optional) The X and Y coordinates for the \`click\` and \`hover\` actions. + * **CRITICAL**: Screenshot dimensions are NOT the same as the browser viewport dimensions + * Format: x,y@widthxheight + * Measure x,y on the screenshot image you see in chat + * The widthxheight MUST be the EXACT pixel size of that screenshot image (never the browser viewport) + * Never use the browser viewport size for widthxheight - the viewport is only a reference and is often larger than the screenshot + * Images are often downscaled before you see them, so the screenshot's dimensions will likely be smaller than the viewport + * Example A: If the screenshot you see is 1094x1092 and you want to click (450,300) on that image, use: 450,300@1094x1092 + * Example B: If the browser viewport is 1280x800 but the screenshot is 1000x625 and you want to click (500,300) on the screenshot, use: 500,300@1000x625 - size: (optional) The width and height for the \`resize\` action. * Example: 1280,720 - text: (optional) Use this for providing the text for the \`type\` action. * Example: Hello, world! Usage: -Action to perform (e.g., launch, click, type, scroll_down, scroll_up, close) +Action to perform (e.g., launch, click, type, press, scroll_down, scroll_up, close) URL to launch the browser at (optional) -x,y coordinates (optional) +x,y@widthxheight coordinates (optional) Text to type (optional) @@ -51,9 +66,9 @@ Example: Requesting to launch a browser at https://example.com https://example.com -Example: Requesting to click on the element at coordinates 450,300 +Example: Requesting to click on the element at coordinates 450,300 on a 1024x768 image click -450,300 +450,300@1024x768 ` } diff --git a/src/core/prompts/tools/native-tools/browser_action.ts b/src/core/prompts/tools/native-tools/browser_action.ts index 6f5df50a0c3..64977780b7a 100644 --- a/src/core/prompts/tools/native-tools/browser_action.ts +++ b/src/core/prompts/tools/native-tools/browser_action.ts @@ -5,7 +5,7 @@ export default { function: { name: "browser_action", description: - "Interact with a Puppeteer-controlled browser session. Always start by launching at a URL and always finish by closing the browser. While the browser is active, do not call any other tools. Use coordinates within the viewport to hover or click, provide text for typing, and ensure actions are grounded in the latest screenshot and console logs.", + "Interact with a browser session. Always start by launching at a URL and always finish by closing the browser. While the browser is active, do not call any other tools. Use coordinates within the viewport to hover or click, provide text for typing, and ensure actions are grounded in the latest screenshot and console logs.", strict: true, parameters: { type: "object", @@ -13,51 +13,29 @@ export default { action: { type: "string", description: "Browser action to perform", - enum: ["launch", "hover", "click", "type", "resize", "scroll_down", "scroll_up", "close"], + enum: ["launch", "click", "hover", "type", "press", "scroll_down", "scroll_up", "resize", "close"], }, url: { type: ["string", "null"], description: "URL to open when performing the launch action; must include protocol", }, coordinate: { - type: ["object", "null"], + type: ["string", "null"], description: - "Screen coordinate for hover or click actions; target the center of the desired element", - properties: { - x: { - type: "number", - description: "Horizontal pixel position within the current viewport", - }, - y: { - type: "number", - description: "Vertical pixel position within the current viewport", - }, - }, - required: ["x", "y"], - additionalProperties: false, + "Screen coordinate for hover or click actions in format 'x,y@WIDTHxHEIGHT' where x,y is the target position on the screenshot image and WIDTHxHEIGHT is the exact pixel dimensions of the screenshot image (not the browser viewport). Example: '450,203@900x600' means click at (450,203) on a 900x600 screenshot. The coordinates will be automatically scaled to match the actual viewport dimensions.", }, size: { - type: ["object", "null"], - description: "Viewport dimensions to apply when performing the resize action", - properties: { - width: { - type: "number", - description: "Viewport width in pixels", - }, - height: { - type: "number", - description: "Viewport height in pixels", - }, - }, - required: ["width", "height"], - additionalProperties: false, + type: ["string", "null"], + description: + "Viewport dimensions for the resize action in format 'WIDTHxHEIGHT' or 'WIDTH,HEIGHT'. Example: '1280x800' or '1280,800'", }, text: { type: ["string", "null"], - description: "Text to type when performing the type action", + description: + "Text to type when performing the type action, or key name to press when performing the press action (e.g., 'Enter', 'Tab', 'Escape')", }, }, - required: ["action", "url", "coordinate", "size", "text"], + required: ["action"], additionalProperties: false, }, }, diff --git a/src/core/task/Task.ts b/src/core/task/Task.ts index 7c0355e4982..925f4bf7e86 100644 --- a/src/core/task/Task.ts +++ b/src/core/task/Task.ts @@ -385,7 +385,28 @@ export class Task extends EventEmitter implements TaskLike { this.autoApprovalHandler = new AutoApprovalHandler() this.urlContentFetcher = new UrlContentFetcher(provider.context) - this.browserSession = new BrowserSession(provider.context) + this.browserSession = new BrowserSession(provider.context, (isActive: boolean) => { + // Add a message to indicate browser session status change + this.say("browser_session_status", isActive ? "Browser session opened" : "Browser session closed") + // Broadcast to browser panel + this.broadcastBrowserSessionUpdate() + + // When a browser session becomes active, automatically open/reveal the Browser Session tab + if (isActive) { + try { + // Lazy-load to avoid circular imports at module load time + const { BrowserSessionPanelManager } = require("../webview/BrowserSessionPanelManager") + const providerRef = this.providerRef.deref() + if (providerRef) { + BrowserSessionPanelManager.getInstance(providerRef) + .show() + .catch(() => {}) + } + } catch (err) { + console.error("[Task] Failed to auto-open Browser Session panel:", err) + } + } + }) this.diffEnabled = enableDiff this.fuzzyMatchThreshold = fuzzyMatchThreshold this.consecutiveMistakeLimit = consecutiveMistakeLimit ?? DEFAULT_CONSECUTIVE_MISTAKE_LIMIT @@ -1366,6 +1387,11 @@ export class Task extends EventEmitter implements TaskLike { contextCondense, }) } + + // Broadcast browser session updates to panel when browser-related messages are added + if (type === "browser_action" || type === "browser_action_result" || type === "browser_session_status") { + this.broadcastBrowserSessionUpdate() + } } async sayAndCreateMissingParamError(toolName: ToolName, paramName: string, relPath?: string) { @@ -1786,6 +1812,16 @@ export class Task extends EventEmitter implements TaskLike { } catch (error) { console.error("Error closing browser session:", error) } + // Also close the Browser Session panel when the task is disposed + try { + const provider = this.providerRef.deref() + if (provider) { + const { BrowserSessionPanelManager } = require("../webview/BrowserSessionPanelManager") + BrowserSessionPanelManager.getInstance(provider).dispose() + } + } catch (error) { + console.error("Error closing browser session panel:", error) + } try { if (this.rooIgnoreController) { @@ -3488,6 +3524,41 @@ export class Task extends EventEmitter implements TaskLike { return this.workspacePath } + /** + * Broadcast browser session updates to the browser panel (if open) + */ + private broadcastBrowserSessionUpdate(): void { + const provider = this.providerRef.deref() + if (!provider) { + return + } + + try { + const { BrowserSessionPanelManager } = require("../webview/BrowserSessionPanelManager") + const panelManager = BrowserSessionPanelManager.getInstance(provider) + + // Get browser session messages + const browserSessionStartIndex = this.clineMessages.findIndex( + (m) => + m.ask === "browser_action_launch" || + (m.say === "browser_session_status" && m.text?.includes("opened")), + ) + + const browserSessionMessages = + browserSessionStartIndex !== -1 ? this.clineMessages.slice(browserSessionStartIndex) : [] + + const isBrowserSessionActive = this.browserSession?.isSessionActive() ?? false + + // Update the panel asynchronously + panelManager.updateBrowserSession(browserSessionMessages, isBrowserSessionActive).catch((error: Error) => { + console.error("Failed to broadcast browser session update:", error) + }) + } catch (error) { + // Silently fail if panel manager is not available + console.debug("Browser panel not available for update:", error) + } + } + /** * Process any queued messages by dequeuing and submitting them. * This ensures that queued user messages are sent when appropriate, diff --git a/src/core/tools/BrowserActionTool.ts b/src/core/tools/BrowserActionTool.ts index 3e8f6f176e4..b9afae2fdb1 100644 --- a/src/core/tools/BrowserActionTool.ts +++ b/src/core/tools/BrowserActionTool.ts @@ -1,7 +1,5 @@ -import type { BrowserActionParams, Coordinate, Size } from "@roo-code/types" import { Task } from "../task/Task" -import { BaseTool, ToolCallbacks } from "./BaseTool" -import type { ToolUse } from "../../shared/tools" +import { ToolUse, AskApproval, HandleError, PushToolResult, RemoveClosingTag } from "../../shared/tools" import { BrowserAction, BrowserActionResult, @@ -9,171 +7,186 @@ import { ClineSayBrowserAction, } from "../../shared/ExtensionMessage" import { formatResponse } from "../prompts/responses" +import { Anthropic } from "@anthropic-ai/sdk" +import { scaleCoordinate } from "../../shared/browserUtils" -export class BrowserActionTool extends BaseTool<"browser_action"> { - readonly name = "browser_action" as const - - parseLegacy(params: Partial>): BrowserActionParams { - const action = params.action as BrowserAction | undefined - - // Parse coordinate if present - XML protocol sends "x,y" format - let coordinate: Coordinate | undefined - if (params.coordinate) { - // Try parsing as "x,y" string first (XML protocol) - const parts = params.coordinate.split(",") - if (parts.length === 2) { - const x = parseInt(parts[0], 10) - const y = parseInt(parts[1], 10) - if (!isNaN(x) && !isNaN(y)) { - coordinate = { x, y } - } - } else { - // Try parsing as JSON object (fallback) - try { - const parsed = JSON.parse(params.coordinate) - if (parsed && typeof parsed.x === "number" && typeof parsed.y === "number") { - coordinate = { x: parsed.x, y: parsed.y } - } - } catch (error) { - // Invalid coordinate format, leave undefined - } - } - } +export async function browserActionTool( + cline: Task, + block: ToolUse, + askApproval: AskApproval, + handleError: HandleError, + pushToolResult: PushToolResult, + removeClosingTag: RemoveClosingTag, +) { + const action: BrowserAction | undefined = block.params.action as BrowserAction + const url: string | undefined = block.params.url + const coordinate: string | undefined = block.params.coordinate + const text: string | undefined = block.params.text + const size: string | undefined = block.params.size - // Parse size if present - XML protocol sends "width,height" format - let size: Size | undefined - if (params.size) { - // Try parsing as "width,height" string first (XML protocol) - const parts = params.size.split(",") - if (parts.length === 2) { - const width = parseInt(parts[0], 10) - const height = parseInt(parts[1], 10) - if (!isNaN(width) && !isNaN(height)) { - size = { width, height } - } - } else { - // Try parsing as JSON object (fallback) - try { - const parsed = JSON.parse(params.size) - if (parsed && typeof parsed.width === "number" && typeof parsed.height === "number") { - size = { width: parsed.width, height: parsed.height } - } - } catch (error) { - // Invalid size format, leave undefined - } - } + if (!action || !browserActions.includes(action)) { + // checking for action to ensure it is complete and valid + if (!block.partial) { + // if the block is complete and we don't have a valid action cline is a mistake + cline.consecutiveMistakeCount++ + cline.recordToolError("browser_action") + pushToolResult(await cline.sayAndCreateMissingParamError("browser_action", "action")) + // Do not close the browser on parameter validation errors } - return { - action: action!, - url: params.url, - coordinate, - size, - text: params.text, - } + return } - async execute(params: BrowserActionParams, task: Task, callbacks: ToolCallbacks): Promise { - const { action, url, coordinate, text, size } = params - const { handleError, pushToolResult } = callbacks - - // Validate action - if (!action || !browserActions.includes(action)) { - task.consecutiveMistakeCount++ - task.recordToolError("browser_action") - pushToolResult(await task.sayAndCreateMissingParamError("browser_action", "action")) - await task.browserSession.closeBrowser() + try { + if (block.partial) { + if (action === "launch") { + await cline.ask("browser_action_launch", removeClosingTag("url", url), block.partial).catch(() => {}) + } else { + await cline.say( + "browser_action", + JSON.stringify({ + action: action as BrowserAction, + coordinate: removeClosingTag("coordinate", coordinate), + text: removeClosingTag("text", text), + size: removeClosingTag("size", size), + } satisfies ClineSayBrowserAction), + undefined, + block.partial, + ) + } return - } - - try { + } else { + // Initialize with empty object to avoid "used before assigned" errors let browserActionResult: BrowserActionResult = {} if (action === "launch") { if (!url) { - task.consecutiveMistakeCount++ - task.recordToolError("browser_action") - pushToolResult(await task.sayAndCreateMissingParamError("browser_action", "url")) - await task.browserSession.closeBrowser() + cline.consecutiveMistakeCount++ + cline.recordToolError("browser_action") + pushToolResult(await cline.sayAndCreateMissingParamError("browser_action", "url")) + // Do not close the browser on parameter validation errors return } - task.consecutiveMistakeCount = 0 - const didApprove = await callbacks.askApproval("browser_action_launch", url) + cline.consecutiveMistakeCount = 0 + const didApprove = await askApproval("browser_action_launch", url) if (!didApprove) { return } - await task.say("browser_action_result", "") - await task.browserSession.launchBrowser() - browserActionResult = await task.browserSession.navigateToUrl(url) + // NOTE: It's okay that we call cline message since the partial inspect_site is finished streaming. + // The only scenario we have to avoid is sending messages WHILE a partial message exists at the end of the messages array. + // For example the api_req_finished message would interfere with the partial message, so we needed to remove that. + + // Launch browser first (this triggers "Browser session opened" status message) + await cline.browserSession.launchBrowser() + + // Create browser_action say message AFTER launching so status appears first + await cline.say( + "browser_action", + JSON.stringify({ + action: "launch" as BrowserAction, + text: url, + } satisfies ClineSayBrowserAction), + undefined, + false, + ) + + browserActionResult = await cline.browserSession.navigateToUrl(url) } else { - // Validate parameters for specific actions + // Variables to hold validated and processed parameters + let processedCoordinate = coordinate + if (action === "click" || action === "hover") { if (!coordinate) { - task.consecutiveMistakeCount++ - task.recordToolError("browser_action") - pushToolResult(await task.sayAndCreateMissingParamError("browser_action", "coordinate")) - await task.browserSession.closeBrowser() + cline.consecutiveMistakeCount++ + cline.recordToolError("browser_action") + pushToolResult(await cline.sayAndCreateMissingParamError("browser_action", "coordinate")) + // Do not close the browser on parameter validation errors + return // can't be within an inner switch + } + + // Get viewport dimensions from the browser session + const viewportSize = cline.browserSession.getViewportSize() + const viewportWidth = viewportSize.width || 900 // default to 900 if not available + const viewportHeight = viewportSize.height || 600 // default to 600 if not available + + // Scale coordinate from image dimensions to viewport dimensions + try { + processedCoordinate = scaleCoordinate(coordinate, viewportWidth, viewportHeight) + } catch (error) { + cline.consecutiveMistakeCount++ + cline.recordToolError("browser_action") + pushToolResult( + await cline.sayAndCreateMissingParamError( + "browser_action", + "coordinate", + error instanceof Error ? error.message : String(error), + ), + ) return } } - if (action === "type") { + if (action === "type" || action === "press") { if (!text) { - task.consecutiveMistakeCount++ - task.recordToolError("browser_action") - pushToolResult(await task.sayAndCreateMissingParamError("browser_action", "text")) - await task.browserSession.closeBrowser() + cline.consecutiveMistakeCount++ + cline.recordToolError("browser_action") + pushToolResult(await cline.sayAndCreateMissingParamError("browser_action", "text")) + // Do not close the browser on parameter validation errors return } } if (action === "resize") { if (!size) { - task.consecutiveMistakeCount++ - task.recordToolError("browser_action") - pushToolResult(await task.sayAndCreateMissingParamError("browser_action", "size")) - await task.browserSession.closeBrowser() + cline.consecutiveMistakeCount++ + cline.recordToolError("browser_action") + pushToolResult(await cline.sayAndCreateMissingParamError("browser_action", "size")) + // Do not close the browser on parameter validation errors return } } - task.consecutiveMistakeCount = 0 + cline.consecutiveMistakeCount = 0 - await task.say( - "browser_action", - JSON.stringify({ - action: action as BrowserAction, - coordinate: coordinate ? `${coordinate.x},${coordinate.y}` : undefined, - text, - } satisfies ClineSayBrowserAction), - undefined, - false, - ) + // Prepare say payload; include executedCoordinate for pointer actions + const sayPayload: ClineSayBrowserAction & { executedCoordinate?: string } = { + action: action as BrowserAction, + coordinate, + text, + size, + } + if ((action === "click" || action === "hover") && processedCoordinate) { + sayPayload.executedCoordinate = processedCoordinate + } + await cline.say("browser_action", JSON.stringify(sayPayload), undefined, false) switch (action) { case "click": - browserActionResult = await task.browserSession.click(`${coordinate!.x},${coordinate!.y}`) + browserActionResult = await cline.browserSession.click(processedCoordinate!) break case "hover": - browserActionResult = await task.browserSession.hover(`${coordinate!.x},${coordinate!.y}`) + browserActionResult = await cline.browserSession.hover(processedCoordinate!) break case "type": - browserActionResult = await task.browserSession.type(text!) + browserActionResult = await cline.browserSession.type(text!) + break + case "press": + browserActionResult = await cline.browserSession.press(text!) break case "scroll_down": - browserActionResult = await task.browserSession.scrollDown() + browserActionResult = await cline.browserSession.scrollDown() break case "scroll_up": - browserActionResult = await task.browserSession.scrollUp() + browserActionResult = await cline.browserSession.scrollUp() break case "resize": - browserActionResult = await task.browserSession.resize(`${size!.width},${size!.height}`) + browserActionResult = await cline.browserSession.resize(size!) break case "close": - browserActionResult = await task.browserSession.closeBrowser() + browserActionResult = await cline.browserSession.closeBrowser() break } } @@ -183,62 +196,63 @@ export class BrowserActionTool extends BaseTool<"browser_action"> { case "click": case "hover": case "type": + case "press": case "scroll_down": case "scroll_up": - case "resize": - await task.say("browser_action_result", JSON.stringify(browserActionResult)) + case "resize": { + await cline.say("browser_action_result", JSON.stringify(browserActionResult)) - pushToolResult( - formatResponse.toolResult( - `The browser action has been executed. The console logs and screenshot have been captured for your analysis.\n\nConsole logs:\n${ - browserActionResult?.logs || "(No new logs)" - }\n\n(REMEMBER: if you need to proceed to using non-\`browser_action\` tools or launch a new browser, you MUST first close cline browser. For example, if after analyzing the logs and screenshot you need to edit a file, you must first close the browser before you can use the write_to_file tool.)`, - browserActionResult?.screenshot ? [browserActionResult.screenshot] : [], - ), - ) - break + const images = browserActionResult?.screenshot ? [browserActionResult.screenshot] : [] + + let messageText = `The browser action has been executed.` + + messageText += `\n\n**CRITICAL**: When providing click/hover coordinates:` + messageText += `\n1. Screenshot dimensions != Browser viewport dimensions` + messageText += `\n2. Measure x,y on the screenshot image you see below` + messageText += `\n3. Use format: x,y@WIDTHxHEIGHT where WIDTHxHEIGHT is the EXACT pixel size of the screenshot image` + messageText += `\n4. Never use the browser viewport size for WIDTHxHEIGHT - it is only for reference and is often larger than the screenshot` + messageText += `\n5. Screenshots are often downscaled - always use the dimensions you see in the image` + messageText += `\nExample: Viewport 1280x800, screenshot 1000x625, click (500,300) -> 500,300@1000x625` + // Include browser viewport dimensions (for reference only) + if (browserActionResult?.viewportWidth && browserActionResult?.viewportHeight) { + messageText += `\n\nBrowser viewport: ${browserActionResult.viewportWidth}x${browserActionResult.viewportHeight}` + } + + // Include cursor position if available + if (browserActionResult?.currentMousePosition) { + messageText += `\nCursor position: ${browserActionResult.currentMousePosition}` + } + + messageText += `\n\nConsole logs:\n${browserActionResult?.logs || "(No new logs)"}\n` + + if (images.length > 0) { + const blocks = [ + ...formatResponse.imageBlocks(images), + { type: "text", text: messageText } as Anthropic.TextBlockParam, + ] + pushToolResult(blocks) + } else { + pushToolResult(messageText) + } + + break + } case "close": pushToolResult( formatResponse.toolResult( `The browser has been closed. You may now proceed to using other tools.`, ), ) + break } - } catch (error) { - await task.browserSession.closeBrowser() - await handleError("executing browser action", error as Error) - } - } - override async handlePartial(task: Task, block: ToolUse<"browser_action">): Promise { - const action: BrowserAction | undefined = block.params.action as BrowserAction - const url: string | undefined = block.params.url - const coordinate: string | undefined = block.params.coordinate - const text: string | undefined = block.params.text - - if (!action || !browserActions.includes(action)) { return } - - if (action === "launch") { - await task - .ask("browser_action_launch", this.removeClosingTag("url", url, block.partial), block.partial) - .catch(() => {}) - } else { - await task.say( - "browser_action", - JSON.stringify({ - action: action as BrowserAction, - coordinate: this.removeClosingTag("coordinate", coordinate, block.partial), - text: this.removeClosingTag("text", text, block.partial), - } satisfies ClineSayBrowserAction), - undefined, - block.partial, - ) - } + } catch (error) { + // Keep the browser session alive on errors; report the error without terminating the session + await handleError("executing browser action", error) + return } } - -export const browserActionTool = new BrowserActionTool() diff --git a/src/core/tools/__tests__/BrowserActionTool.coordinateScaling.spec.ts b/src/core/tools/__tests__/BrowserActionTool.coordinateScaling.spec.ts new file mode 100644 index 00000000000..4294fff4d3a --- /dev/null +++ b/src/core/tools/__tests__/BrowserActionTool.coordinateScaling.spec.ts @@ -0,0 +1,84 @@ +// Test coordinate scaling functionality in browser actions +import { describe, it, expect } from "vitest" +import { scaleCoordinate } from "../../../shared/browserUtils" + +describe("Browser Action Coordinate Scaling", () => { + describe("Coordinate format validation", () => { + it("should match valid coordinate format with image dimensions", () => { + const validFormats = [ + "450,300@1024x768", + "0,0@1920x1080", + "1920,1080@1920x1080", + "100,200@800x600", + " 273 , 273 @ 1280x800 ", + "267,273@1280,800", // comma separator for dimensions + "450,300@1024,768", // comma separator for dimensions + ] + + validFormats.forEach((coord) => { + // Should not throw + expect(() => scaleCoordinate(coord, 900, 600)).not.toThrow() + }) + }) + + it("should not match invalid coordinate formats", () => { + const invalidFormats = [ + "450,300", // missing image dimensions + "450,300@", // incomplete dimensions + "450,300@1024", // missing height + "450,300@1024x", // missing height value + "@1024x768", // missing coordinates + "450@1024x768", // missing y coordinate + ",300@1024x768", // missing x coordinate + "450,300@1024x768x2", // extra dimension + "a,b@1024x768", // non-numeric coordinates + "450,300@axb", // non-numeric dimensions + ] + + invalidFormats.forEach((coord) => { + expect(() => scaleCoordinate(coord, 900, 600)).toThrow() + }) + }) + }) + + describe("Coordinate scaling logic", () => { + it("should correctly scale coordinates from image to viewport", () => { + // Test case 1: Same dimensions (no scaling) + expect(scaleCoordinate("450,300@900x600", 900, 600)).toBe("450,300") + + // Test case 2: Half dimensions (2x upscale) + expect(scaleCoordinate("225,150@450x300", 900, 600)).toBe("450,300") + + // Test case 3: Double dimensions (0.5x downscale) + expect(scaleCoordinate("900,600@1800x1200", 900, 600)).toBe("450,300") + + // Test case 4: Different aspect ratio + expect(scaleCoordinate("512,384@1024x768", 1920, 1080)).toBe("960,540") + + // Test case 5: Edge cases (0,0) + expect(scaleCoordinate("0,0@1024x768", 1920, 1080)).toBe("0,0") + + // Test case 6: Edge cases (max coordinates) + expect(scaleCoordinate("1024,768@1024x768", 1920, 1080)).toBe("1920,1080") + }) + + it("should throw error for invalid coordinate format", () => { + // Test invalid formats + expect(() => scaleCoordinate("450,300", 900, 600)).toThrow("Invalid coordinate format") + expect(() => scaleCoordinate("450,300@1024", 900, 600)).toThrow("Invalid coordinate format") + expect(() => scaleCoordinate("invalid", 900, 600)).toThrow("Invalid coordinate format") + }) + + it("should handle rounding correctly", () => { + // Test rounding behavior + // 333 / 1000 * 900 = 299.7 -> rounds to 300 + expect(scaleCoordinate("333,333@1000x1000", 900, 900)).toBe("300,300") + + // 666 / 1000 * 900 = 599.4 -> rounds to 599 + expect(scaleCoordinate("666,666@1000x1000", 900, 900)).toBe("599,599") + + // 500 / 1000 * 900 = 450.0 -> rounds to 450 + expect(scaleCoordinate("500,500@1000x1000", 900, 900)).toBe("450,450") + }) + }) +}) diff --git a/src/core/webview/BrowserSessionPanelManager.ts b/src/core/webview/BrowserSessionPanelManager.ts new file mode 100644 index 00000000000..514c1315f7f --- /dev/null +++ b/src/core/webview/BrowserSessionPanelManager.ts @@ -0,0 +1,310 @@ +import * as vscode from "vscode" +import type { ClineMessage } from "@roo-code/types" +import { getUri } from "./getUri" +import { getNonce } from "./getNonce" +import type { ClineProvider } from "./ClineProvider" +import { webviewMessageHandler } from "./webviewMessageHandler" + +export class BrowserSessionPanelManager { + private static instances: WeakMap = new WeakMap() + private panel: vscode.WebviewPanel | undefined + private disposables: vscode.Disposable[] = [] + private isReady: boolean = false + private pendingUpdate?: { messages: ClineMessage[]; isActive: boolean } + private pendingNavigateIndex?: number + private userManuallyClosedPanel: boolean = false + + private constructor(private readonly provider: ClineProvider) {} + + /** + * Get or create a BrowserSessionPanelManager instance for the given provider + */ + public static getInstance(provider: ClineProvider): BrowserSessionPanelManager { + let instance = BrowserSessionPanelManager.instances.get(provider) + if (!instance) { + instance = new BrowserSessionPanelManager(provider) + BrowserSessionPanelManager.instances.set(provider, instance) + } + return instance + } + + /** + * Show the browser session panel, creating it if necessary + */ + public async show(): Promise { + await this.createOrShowPanel() + + // Send initial browser session data + const task = this.provider.getCurrentTask() + if (task) { + const messages = task.clineMessages || [] + const browserSessionStartIndex = messages.findIndex( + (m) => + m.ask === "browser_action_launch" || + (m.say === "browser_session_status" && m.text?.includes("opened")), + ) + const browserSessionMessages = + browserSessionStartIndex !== -1 ? messages.slice(browserSessionStartIndex) : [] + const isBrowserSessionActive = task.browserSession?.isSessionActive() ?? false + + await this.updateBrowserSession(browserSessionMessages, isBrowserSessionActive) + } + } + + private async createOrShowPanel(): Promise { + // If panel already exists, show it + if (this.panel) { + this.panel.reveal(vscode.ViewColumn.One) + return + } + + const extensionUri = this.provider.context.extensionUri + const extensionMode = this.provider.context.extensionMode + + // Create new panel + this.panel = vscode.window.createWebviewPanel("roo.browserSession", "Browser Session", vscode.ViewColumn.One, { + enableScripts: true, + retainContextWhenHidden: true, + localResourceRoots: [extensionUri], + }) + + // Set up the webview's HTML content + this.panel.webview.html = + extensionMode === vscode.ExtensionMode.Development + ? await this.getHMRHtmlContent(this.panel.webview, extensionUri) + : this.getHtmlContent(this.panel.webview, extensionUri) + + // Wire message channel for this panel (state handshake + actions) + this.panel.webview.onDidReceiveMessage( + async (message: any) => { + try { + // Let the shared handler process commands that work for any webview + if (message?.type) { + await webviewMessageHandler(this.provider as any, message) + } + // Panel-specific readiness and initial state + if (message?.type === "webviewDidLaunch") { + this.isReady = true + // Send full extension state to this panel (the sidebar postState targets the main webview) + const state = await (this.provider as any).getStateToPostToWebview?.() + if (state) { + await this.panel?.webview.postMessage({ type: "state", state }) + } + // Flush any pending browser session update queued before readiness + if (this.pendingUpdate) { + await this.updateBrowserSession(this.pendingUpdate.messages, this.pendingUpdate.isActive) + this.pendingUpdate = undefined + } + // Flush any pending navigation request queued before readiness + if (this.pendingNavigateIndex !== undefined) { + await this.navigateToStep(this.pendingNavigateIndex) + this.pendingNavigateIndex = undefined + } + } + } catch (err) { + console.error("[BrowserSessionPanel] onDidReceiveMessage error:", err) + } + }, + undefined, + this.disposables, + ) + + // Handle panel disposal - track that user closed it manually + this.panel.onDidDispose( + () => { + // Mark that user manually closed the panel (unless we're programmatically disposing) + if (this.panel) { + this.userManuallyClosedPanel = true + } + this.panel = undefined + this.dispose() + }, + null, + this.disposables, + ) + } + + public async updateBrowserSession(messages: ClineMessage[], isBrowserSessionActive: boolean): Promise { + if (!this.panel) { + return + } + // If the panel isn't ready yet, queue the latest snapshot to post after handshake + if (!this.isReady) { + this.pendingUpdate = { messages, isActive: isBrowserSessionActive } + return + } + + await this.panel.webview.postMessage({ + type: "browserSessionUpdate", + browserSessionMessages: messages, + isBrowserSessionActive, + }) + } + + /** + * Navigate the Browser Session panel to a specific step index. + * If the panel isn't ready yet, queue the navigation to run after handshake. + */ + public async navigateToStep(stepIndex: number): Promise { + if (!this.panel) { + return + } + if (!this.isReady) { + this.pendingNavigateIndex = stepIndex + return + } + + await this.panel.webview.postMessage({ + type: "browserSessionNavigate", + stepIndex, + }) + } + + /** + * Reset the manual close flag (call this when a new browser session launches) + */ + public resetManualCloseFlag(): void { + this.userManuallyClosedPanel = false + } + + /** + * Check if auto-opening should be allowed (not manually closed by user) + */ + public shouldAllowAutoOpen(): boolean { + return !this.userManuallyClosedPanel + } + + /** + * Whether the Browser Session panel is currently open. + */ + public isOpen(): boolean { + return !!this.panel + } + + /** + * Toggle the Browser Session panel visibility. + * - If open: closes it + * - If closed: opens it and sends initial session snapshot + */ + public async toggle(): Promise { + if (this.panel) { + this.dispose() + } else { + await this.show() + } + } + + public dispose(): void { + // Clear the panel reference before disposing to prevent marking as manual close + const panelToDispose = this.panel + this.panel = undefined + + while (this.disposables.length) { + const disposable = this.disposables.pop() + if (disposable) { + disposable.dispose() + } + } + try { + panelToDispose?.dispose() + } catch {} + this.isReady = false + this.pendingUpdate = undefined + } + + private async getHMRHtmlContent(webview: vscode.Webview, extensionUri: vscode.Uri): Promise { + const fs = require("fs") + const path = require("path") + let localPort = "5173" + + try { + const portFilePath = path.resolve(__dirname, "../../.vite-port") + if (fs.existsSync(portFilePath)) { + localPort = fs.readFileSync(portFilePath, "utf8").trim() + } + } catch (err) { + console.error("[BrowserSessionPanel:Vite] Failed to read port file:", err) + } + + const localServerUrl = `localhost:${localPort}` + const nonce = getNonce() + + const stylesUri = getUri(webview, extensionUri, ["webview-ui", "build", "assets", "index.css"]) + const codiconsUri = getUri(webview, extensionUri, ["assets", "codicons", "codicon.css"]) + + const scriptUri = `http://${localServerUrl}/src/browser-panel.tsx` + + const reactRefresh = ` + + ` + + const csp = [ + "default-src 'none'", + `font-src ${webview.cspSource} data:`, + `style-src ${webview.cspSource} 'unsafe-inline' https://* http://${localServerUrl}`, + `img-src ${webview.cspSource} data:`, + `script-src 'unsafe-eval' ${webview.cspSource} http://${localServerUrl} 'nonce-${nonce}'`, + `connect-src ${webview.cspSource} ws://${localServerUrl} http://${localServerUrl}`, + ] + + return ` + + + + + + + + + Browser Session + + +
+ ${reactRefresh} + + + + ` + } + + private getHtmlContent(webview: vscode.Webview, extensionUri: vscode.Uri): string { + const stylesUri = getUri(webview, extensionUri, ["webview-ui", "build", "assets", "index.css"]) + const scriptUri = getUri(webview, extensionUri, ["webview-ui", "build", "assets", "browser-panel.js"]) + const codiconsUri = getUri(webview, extensionUri, ["assets", "codicons", "codicon.css"]) + + const nonce = getNonce() + + const csp = [ + "default-src 'none'", + `font-src ${webview.cspSource} data:`, + `style-src ${webview.cspSource} 'unsafe-inline'`, + `img-src ${webview.cspSource} data:`, + `script-src ${webview.cspSource} 'wasm-unsafe-eval' 'nonce-${nonce}'`, + `connect-src ${webview.cspSource}`, + ] + + return ` + + + + + + + + + Browser Session + + +
+ + + + ` + } +} diff --git a/src/core/webview/ClineProvider.ts b/src/core/webview/ClineProvider.ts index ff97d5f030a..deecc6c1f87 100644 --- a/src/core/webview/ClineProvider.ts +++ b/src/core/webview/ClineProvider.ts @@ -1925,6 +1925,7 @@ export class ClineProvider openRouterImageGenerationSelectedModel, openRouterUseMiddleOutTransform, featureRoomoteControlEnabled, + isBrowserSessionActive, } = await this.getState() let cloudOrganizations: CloudOrganizationMembership[] = [] @@ -1974,6 +1975,7 @@ export class ClineProvider alwaysAllowModeSwitch: alwaysAllowModeSwitch ?? false, alwaysAllowSubtasks: alwaysAllowSubtasks ?? false, alwaysAllowUpdateTodoList: alwaysAllowUpdateTodoList ?? false, + isBrowserSessionActive, allowedMaxRequests, allowedMaxCost, autoCondenseContext: autoCondenseContext ?? true, @@ -2187,6 +2189,9 @@ export class ClineProvider ) } + // Get actual browser session state + const isBrowserSessionActive = this.getCurrentTask()?.browserSession?.isSessionActive() ?? false + // Return the same structure as before. return { apiConfiguration: providerSettings, @@ -2205,6 +2210,7 @@ export class ClineProvider alwaysAllowSubtasks: stateValues.alwaysAllowSubtasks ?? false, alwaysAllowFollowupQuestions: stateValues.alwaysAllowFollowupQuestions ?? false, alwaysAllowUpdateTodoList: stateValues.alwaysAllowUpdateTodoList ?? false, + isBrowserSessionActive, followupAutoApproveTimeoutMs: stateValues.followupAutoApproveTimeoutMs ?? 60000, diagnosticsEnabled: stateValues.diagnosticsEnabled ?? true, allowedMaxRequests: stateValues.allowedMaxRequests, diff --git a/src/core/webview/__tests__/ClineProvider.spec.ts b/src/core/webview/__tests__/ClineProvider.spec.ts index d494715691c..70876373feb 100644 --- a/src/core/webview/__tests__/ClineProvider.spec.ts +++ b/src/core/webview/__tests__/ClineProvider.spec.ts @@ -503,6 +503,7 @@ describe("ClineProvider", () => { const mockState: ExtensionState = { version: "1.0.0", + isBrowserSessionActive: false, clineMessages: [], taskHistory: [], shouldShowAnnouncement: false, diff --git a/src/core/webview/webviewMessageHandler.ts b/src/core/webview/webviewMessageHandler.ts index 8f89a9ec516..2a8d225f0e6 100644 --- a/src/core/webview/webviewMessageHandler.ts +++ b/src/core/webview/webviewMessageHandler.ts @@ -23,6 +23,7 @@ import { type ApiMessage } from "../task-persistence/apiMessages" import { saveTaskMessages } from "../task-persistence" import { ClineProvider } from "./ClineProvider" +import { BrowserSessionPanelManager } from "./BrowserSessionPanelManager" import { handleCheckpointRestoreOperation } from "./checkpointRestoreHandler" import { changeLanguage, t } from "../../i18n" import { Package } from "../../shared/package" @@ -1116,6 +1117,101 @@ export const webviewMessageHandler = async ( case "cancelTask": await provider.cancelTask() break + case "killBrowserSession": + { + const task = provider.getCurrentTask() + if (task?.browserSession) { + await task.browserSession.closeBrowser() + await provider.postStateToWebview() + } + } + break + case "openBrowserSessionPanel": + { + // Toggle the Browser Session panel (open if closed, close if open) + const panelManager = BrowserSessionPanelManager.getInstance(provider) + await panelManager.toggle() + } + break + case "showBrowserSessionPanelAtStep": + { + const panelManager = BrowserSessionPanelManager.getInstance(provider) + + // If this is a launch action, reset the manual close flag + if (message.isLaunchAction) { + panelManager.resetManualCloseFlag() + } + + // Show panel if: + // 1. Manual click (forceShow) - always show + // 2. Launch action - always show and reset flag + // 3. Auto-open for non-launch action - only if user hasn't manually closed + if (message.forceShow || message.isLaunchAction || panelManager.shouldAllowAutoOpen()) { + // Ensure panel is shown and populated + await panelManager.show() + + // Navigate to a specific step if provided + // For launch actions: navigate to step 0 + // For manual clicks: navigate to the clicked step + // For auto-opens of regular actions: don't navigate, let BrowserSessionRow's + // internal auto-advance logic handle it (only advances if user is on most recent step) + if (typeof message.stepIndex === "number" && message.stepIndex >= 0) { + await panelManager.navigateToStep(message.stepIndex) + } + } + } + break + case "refreshBrowserSessionPanel": + { + // Re-send the latest browser session snapshot to the panel + const panelManager = BrowserSessionPanelManager.getInstance(provider) + const task = provider.getCurrentTask() + if (task) { + const messages = task.clineMessages || [] + const browserSessionStartIndex = messages.findIndex( + (m) => + m.ask === "browser_action_launch" || + (m.say === "browser_session_status" && m.text?.includes("opened")), + ) + const browserSessionMessages = + browserSessionStartIndex !== -1 ? messages.slice(browserSessionStartIndex) : [] + const isBrowserSessionActive = task.browserSession?.isSessionActive() ?? false + await panelManager.updateBrowserSession(browserSessionMessages, isBrowserSessionActive) + } + } + break + case "allowedCommands": { + // Validate and sanitize the commands array + const commands = message.commands ?? [] + const validCommands = Array.isArray(commands) + ? commands.filter((cmd) => typeof cmd === "string" && cmd.trim().length > 0) + : [] + + await updateGlobalState("allowedCommands", validCommands) + + // Also update workspace settings. + await vscode.workspace + .getConfiguration(Package.name) + .update("allowedCommands", validCommands, vscode.ConfigurationTarget.Global) + + break + } + case "deniedCommands": { + // Validate and sanitize the commands array + const commands = message.commands ?? [] + const validCommands = Array.isArray(commands) + ? commands.filter((cmd) => typeof cmd === "string" && cmd.trim().length > 0) + : [] + + await updateGlobalState("deniedCommands", validCommands) + + // Also update workspace settings. + await vscode.workspace + .getConfiguration(Package.name) + .update("deniedCommands", validCommands, vscode.ConfigurationTarget.Global) + + break + } case "openCustomModesSettings": { const customModesFilePath = await provider.customModesManager.getCustomModesFilePath() diff --git a/src/services/browser/BrowserSession.ts b/src/services/browser/BrowserSession.ts index 75b432f01d2..98f6f85e037 100644 --- a/src/services/browser/BrowserSession.ts +++ b/src/services/browser/BrowserSession.ts @@ -1,7 +1,7 @@ import * as vscode from "vscode" import * as fs from "fs/promises" import * as path from "path" -import { Browser, Page, ScreenshotOptions, TimeoutError, launch, connect } from "puppeteer-core" +import { Browser, Page, ScreenshotOptions, TimeoutError, launch, connect, KeyInput } from "puppeteer-core" // @ts-ignore import PCR from "puppeteer-chromium-resolver" import pWaitFor from "p-wait-for" @@ -25,9 +25,15 @@ export class BrowserSession { private currentMousePosition?: string private lastConnectionAttempt?: number private isUsingRemoteBrowser: boolean = false + private onStateChange?: (isActive: boolean) => void - constructor(context: vscode.ExtensionContext) { + // Track last known viewport to surface in environment details + private lastViewportWidth?: number + private lastViewportHeight?: number + + constructor(context: vscode.ExtensionContext, onStateChange?: (isActive: boolean) => void) { this.context = context + this.onStateChange = onStateChange } private async ensureChromiumExists(): Promise { @@ -189,21 +195,31 @@ export class BrowserSession { await this.launchLocalBrowser() } } + + // Notify that browser session is now active + if (this.browser && this.onStateChange) { + this.onStateChange(true) + } } /** * Closes the browser and resets browser state */ async closeBrowser(): Promise { - if (this.browser || this.page) { - console.log("closing browser...") + const wasActive = !!(this.browser || this.page) + if (wasActive) { if (this.isUsingRemoteBrowser && this.browser) { await this.browser.disconnect().catch(() => {}) } else { await this.browser?.close().catch(() => {}) } this.resetBrowserState() + + // Notify that browser session is now inactive + if (this.onStateChange) { + this.onStateChange(false) + } } return {} } @@ -216,12 +232,14 @@ export class BrowserSession { this.page = undefined this.currentMousePosition = undefined this.isUsingRemoteBrowser = false + this.lastViewportWidth = undefined + this.lastViewportHeight = undefined } async doAction(action: (page: Page) => Promise): Promise { if (!this.page) { throw new Error( - "Browser is not launched. This may occur if the browser was automatically closed by a non-`browser_action` tool.", + "Cannot perform browser action: no active browser session. The browser must be launched first using the 'launch' action before other browser actions can be performed.", ) } @@ -260,6 +278,11 @@ export class BrowserSession { interval: 100, }).catch(() => {}) + // Draw cursor indicator if we have a cursor position + if (this.currentMousePosition) { + await this.drawCursorIndicator(this.page, this.currentMousePosition) + } + let options: ScreenshotOptions = { encoding: "base64", @@ -291,15 +314,29 @@ export class BrowserSession { throw new Error("Failed to take screenshot.") } + // Remove cursor indicator after taking screenshot + if (this.currentMousePosition) { + await this.removeCursorIndicator(this.page) + } + // this.page.removeAllListeners() <- causes the page to crash! this.page.off("console", consoleListener) this.page.off("pageerror", errorListener) + // Get actual viewport dimensions + const viewport = this.page.viewport() + + // Persist last known viewport dimensions + this.lastViewportWidth = viewport?.width + this.lastViewportHeight = viewport?.height + return { screenshot, logs: logs.join("\n"), currentUrl: this.page.url(), currentMousePosition: this.currentMousePosition, + viewportWidth: viewport?.width, + viewportHeight: viewport?.height, } } @@ -453,6 +490,64 @@ export class BrowserSession { } } + /** + * Force links and window.open to navigate in the same tab. + * This makes clicks on anchors with target="_blank" stay in the current page + * and also intercepts window.open so SPA/open-in-new-tab patterns don't spawn popups. + */ + private async forceLinksToSameTab(page: Page): Promise { + try { + await page.evaluate(() => { + try { + // Ensure we only install once per document + if ((window as any).__ROO_FORCE_SAME_TAB__) return + ;(window as any).__ROO_FORCE_SAME_TAB__ = true + + // Override window.open to navigate current tab instead of creating a new one + const originalOpen = window.open + window.open = function (url: string | URL, target?: string, features?: string) { + try { + const href = typeof url === "string" ? url : String(url) + location.href = href + } catch { + // fall back to original if something unexpected occurs + try { + return originalOpen.apply(window, [url as any, "_self", features]) as any + } catch {} + } + return null as any + } as any + + // Rewrite anchors that explicitly open new tabs + document.querySelectorAll('a[target="_blank"]').forEach((a) => { + a.setAttribute("target", "_self") + }) + + // Defensive capture: if an element still tries to open in a new tab, force same-tab + document.addEventListener( + "click", + (ev) => { + const el = (ev.target as HTMLElement | null)?.closest?.( + 'a[target="_blank"]', + ) as HTMLAnchorElement | null + if (el && el.href) { + ev.preventDefault() + try { + location.href = el.href + } catch {} + } + }, + { capture: true, passive: false }, + ) + } catch { + // no-op; forcing same-tab is best-effort + } + }) + } catch { + // If evaluate fails (e.g., cross-origin/state), continue without breaking the action + } + } + /** * Handles mouse interaction with network activity monitoring */ @@ -463,6 +558,9 @@ export class BrowserSession { ): Promise { const [x, y] = coordinate.split(",").map(Number) + // Force any new-tab behavior (target="_blank", window.open) to stay in the same tab + await this.forceLinksToSameTab(page) + // Set up network request monitoring let hasNetworkActivity = false const requestListener = () => { @@ -506,6 +604,106 @@ export class BrowserSession { }) } + async press(key: string): Promise { + return this.doAction(async (page) => { + // Parse key combinations (e.g., "Cmd+K", "Shift+Enter") + const parts = key.split("+").map((k) => k.trim()) + const modifiers: string[] = [] + let mainKey = parts[parts.length - 1] + + // Identify modifiers + for (let i = 0; i < parts.length - 1; i++) { + const part = parts[i].toLowerCase() + if (part === "cmd" || part === "command" || part === "meta") { + modifiers.push("Meta") + } else if (part === "ctrl" || part === "control") { + modifiers.push("Control") + } else if (part === "shift") { + modifiers.push("Shift") + } else if (part === "alt" || part === "option") { + modifiers.push("Alt") + } + } + + // Map common key aliases to Puppeteer KeyInput values + const mapping: Record = { + esc: "Escape", + return: "Enter", + escape: "Escape", + enter: "Enter", + tab: "Tab", + space: "Space", + arrowup: "ArrowUp", + arrowdown: "ArrowDown", + arrowleft: "ArrowLeft", + arrowright: "ArrowRight", + } + mainKey = (mapping[mainKey.toLowerCase()] ?? mainKey) as string + + // Avoid new-tab behavior from Enter on links/buttons + await this.forceLinksToSameTab(page) + + // Track inflight requests so we can detect brief network bursts + let inflight = 0 + const onRequest = () => { + inflight++ + } + const onRequestDone = () => { + inflight = Math.max(0, inflight - 1) + } + page.on("request", onRequest) + page.on("requestfinished", onRequestDone) + page.on("requestfailed", onRequestDone) + + // Start a short navigation wait in parallel; if no nav, it times out harmlessly + const HARD_CAP_MS = 3000 + const navPromise = page + .waitForNavigation({ + // domcontentloaded is enough to confirm a submit navigated + waitUntil: ["domcontentloaded"], + timeout: HARD_CAP_MS, + }) + .catch(() => undefined) + + // Press key combination + if (modifiers.length > 0) { + // Hold down modifiers + for (const modifier of modifiers) { + await page.keyboard.down(modifier as KeyInput) + } + + // Press main key + await page.keyboard.press(mainKey as KeyInput) + + // Release modifiers + for (const modifier of modifiers) { + await page.keyboard.up(modifier as KeyInput) + } + } else { + // Single key press + await page.keyboard.press(mainKey as KeyInput) + } + + // Give time for any requests to kick off + await delay(120) + + // Hard-cap the wait to avoid UI hangs + await Promise.race([ + navPromise, + pWaitFor(() => inflight === 0, { timeout: HARD_CAP_MS, interval: 100 }).catch(() => {}), + delay(HARD_CAP_MS), + ]) + + // Stabilize DOM briefly before capturing screenshot (shorter cap) + await this.waitTillHTMLStable(page, 2_000) + + // Cleanup + page.off("request", onRequest) + page.off("requestfinished", onRequestDone) + page.off("requestfailed", onRequestDone) + }) + } + /** * Scrolls the page by the specified amount */ @@ -557,4 +755,107 @@ export class BrowserSession { }) }) } + + /** + * Draws a cursor indicator on the page at the specified position + */ + private async drawCursorIndicator(page: Page, coordinate: string): Promise { + const [x, y] = coordinate.split(",").map(Number) + + try { + await page.evaluate( + (cursorX: number, cursorY: number) => { + // Create a cursor indicator element + const cursor = document.createElement("div") + cursor.id = "__roo_cursor_indicator__" + cursor.style.cssText = ` + position: fixed; + left: ${cursorX}px; + top: ${cursorY}px; + width: 35px; + height: 35px; + pointer-events: none; + z-index: 2147483647; + ` + + // Create SVG cursor pointer + const svg = ` + + + + + ` + cursor.innerHTML = svg + + document.body.appendChild(cursor) + }, + x, + y, + ) + } catch (error) { + console.error("Failed to draw cursor indicator:", error) + } + } + + /** + * Removes the cursor indicator from the page + */ + private async removeCursorIndicator(page: Page): Promise { + try { + await page.evaluate(() => { + const cursor = document.getElementById("__roo_cursor_indicator__") + if (cursor) { + cursor.remove() + } + }) + } catch (error) { + console.error("Failed to remove cursor indicator:", error) + } + } + + /** + * Returns whether a browser session is currently active + */ + isSessionActive(): boolean { + return !!(this.browser && this.page) + } + + /** + * Returns the last known viewport size (if any) + * + * Prefer the live page viewport when available so we stay accurate after: + * - browser_action resize + * - manual window resizes (especially with remote browsers) + * + * Falls back to the configured default viewport when no prior information exists. + */ + getViewportSize(): { width?: number; height?: number } { + // If we have an active page, ask Puppeteer for the current viewport. + // This keeps us in sync with any resizes that happen outside of our own + // browser_action lifecycle (e.g. user dragging the window). + if (this.page) { + const vp = this.page.viewport() + if (vp?.width) this.lastViewportWidth = vp.width + if (vp?.height) this.lastViewportHeight = vp.height + } + + // If we've ever observed a viewport, use that. + if (this.lastViewportWidth && this.lastViewportHeight) { + return { + width: this.lastViewportWidth, + height: this.lastViewportHeight, + } + } + + // Otherwise fall back to the configured default so the tool can still + // operate before the first screenshot-based action has run. + const { width, height } = this.getViewport() + return { width, height } + } } diff --git a/src/services/browser/UrlContentFetcher.ts b/src/services/browser/UrlContentFetcher.ts index b271bc2ef41..2d8e4a3de84 100644 --- a/src/services/browser/UrlContentFetcher.ts +++ b/src/services/browser/UrlContentFetcher.ts @@ -90,9 +90,9 @@ export class UrlContentFetcher { throw new Error("Browser not initialized") } /* - - networkidle2 is equivalent to playwright's networkidle where it waits until there are no more than 2 network connections for at least 500 ms. - - domcontentloaded is when the basic DOM is loaded - this should be sufficient for most doc sites + - In Puppeteer, "networkidle2" waits until there are no more than 2 network connections for at least 500 ms (roughly equivalent to Playwright's "networkidle"). + - "domcontentloaded" is when the basic DOM is loaded. + This should be sufficient for most doc sites. */ try { await this.page.goto(url, { diff --git a/src/services/browser/__tests__/BrowserSession.spec.ts b/src/services/browser/__tests__/BrowserSession.spec.ts index b69fb2d1406..a7d9707ab39 100644 --- a/src/services/browser/__tests__/BrowserSession.spec.ts +++ b/src/services/browser/__tests__/BrowserSession.spec.ts @@ -229,4 +229,226 @@ describe("BrowserSession", () => { expect(mockBrowser.close).not.toHaveBeenCalled() }) }) + + it("forces same-tab behavior before click", async () => { + // Prepare a minimal mock page with required APIs + const page: any = { + on: vi.fn(), + off: vi.fn(), + screenshot: vi.fn().mockResolvedValue("mockScreenshotBase64"), + url: vi.fn().mockReturnValue("https://example.com"), + viewport: vi.fn().mockReturnValue({ width: 900, height: 600 }), + waitForNavigation: vi.fn().mockResolvedValue(undefined), + evaluate: vi.fn().mockResolvedValue(undefined), + mouse: { + click: vi.fn().mockResolvedValue(undefined), + move: vi.fn().mockResolvedValue(undefined), + }, + } + + ;(browserSession as any).page = page + + // Spy on the forceLinksToSameTab helper to ensure it's invoked + const forceSpy = vi.fn().mockResolvedValue(undefined) + ;(browserSession as any).forceLinksToSameTab = forceSpy + + await browserSession.click("10,20") + + expect(forceSpy).toHaveBeenCalledTimes(1) + expect(forceSpy).toHaveBeenCalledWith(page) + expect(page.mouse.click).toHaveBeenCalledWith(10, 20) + }) +}) + +describe("keyboard press", () => { + it("presses a keyboard key", async () => { + // Prepare a minimal mock page with required APIs + const page: any = { + on: vi.fn(), + off: vi.fn(), + screenshot: vi.fn().mockResolvedValue("mockScreenshotBase64"), + url: vi.fn().mockReturnValue("https://example.com"), + viewport: vi.fn().mockReturnValue({ width: 900, height: 600 }), + waitForNavigation: vi.fn().mockResolvedValue(undefined), + evaluate: vi.fn().mockResolvedValue(undefined), + keyboard: { + press: vi.fn().mockResolvedValue(undefined), + type: vi.fn().mockResolvedValue(undefined), + }, + } + + // Create a fresh BrowserSession with a mock context + const mockCtx: any = { + globalState: { get: vi.fn(), update: vi.fn() }, + globalStorageUri: { fsPath: "/mock/global/storage/path" }, + extensionUri: { fsPath: "/mock/extension/path" }, + } + const session = new BrowserSession(mockCtx) + + ;(session as any).page = page + + await session.press("Enter") + + expect(page.keyboard.press).toHaveBeenCalledTimes(1) + expect(page.keyboard.press).toHaveBeenCalledWith("Enter") + }) +}) + +describe("cursor visualization", () => { + it("should draw cursor indicator when cursor position exists", async () => { + // Prepare a minimal mock page with required APIs + const page: any = { + on: vi.fn(), + off: vi.fn(), + screenshot: vi.fn().mockResolvedValue("mockScreenshotBase64"), + url: vi.fn().mockReturnValue("https://example.com"), + viewport: vi.fn().mockReturnValue({ width: 900, height: 600 }), + evaluate: vi.fn().mockResolvedValue(undefined), + mouse: { + click: vi.fn().mockResolvedValue(undefined), + }, + } + + // Create a fresh BrowserSession with a mock context + const mockCtx: any = { + globalState: { get: vi.fn(), update: vi.fn() }, + globalStorageUri: { fsPath: "/mock/global/storage/path" }, + extensionUri: { fsPath: "/mock/extension/path" }, + } + const session = new BrowserSession(mockCtx) + + ;(session as any).page = page + + // Perform a click action which sets cursor position + const result = await session.click("100,200") + + // Verify cursor indicator was drawn and removed + // evaluate is called 3 times: 1 for forceLinksToSameTab, 1 for draw cursor, 1 for remove cursor + expect(page.evaluate).toHaveBeenCalled() + + // Verify the result includes cursor position + expect(result.currentMousePosition).toBe("100,200") + }) + + it("should include cursor position in action result", async () => { + // Prepare a minimal mock page with required APIs + const page: any = { + on: vi.fn(), + off: vi.fn(), + screenshot: vi.fn().mockResolvedValue("mockScreenshotBase64"), + url: vi.fn().mockReturnValue("https://example.com"), + viewport: vi.fn().mockReturnValue({ width: 900, height: 600 }), + evaluate: vi.fn().mockResolvedValue(undefined), + mouse: { + move: vi.fn().mockResolvedValue(undefined), + }, + } + + // Create a fresh BrowserSession with a mock context + const mockCtx: any = { + globalState: { get: vi.fn(), update: vi.fn() }, + globalStorageUri: { fsPath: "/mock/global/storage/path" }, + extensionUri: { fsPath: "/mock/extension/path" }, + } + const session = new BrowserSession(mockCtx) + + ;(session as any).page = page + + // Perform a hover action which sets cursor position + const result = await session.hover("150,250") + + // Verify the result includes cursor position + expect(result.currentMousePosition).toBe("150,250") + expect(result.viewportWidth).toBe(900) + expect(result.viewportHeight).toBe(600) + }) + + it("should not draw cursor indicator when no cursor position exists", async () => { + // Prepare a minimal mock page with required APIs + const page: any = { + on: vi.fn(), + off: vi.fn(), + screenshot: vi.fn().mockResolvedValue("mockScreenshotBase64"), + url: vi.fn().mockReturnValue("https://example.com"), + viewport: vi.fn().mockReturnValue({ width: 900, height: 600 }), + evaluate: vi.fn().mockResolvedValue(undefined), + } + + // Create a fresh BrowserSession with a mock context + const mockCtx: any = { + globalState: { get: vi.fn(), update: vi.fn() }, + globalStorageUri: { fsPath: "/mock/global/storage/path" }, + extensionUri: { fsPath: "/mock/extension/path" }, + } + const session = new BrowserSession(mockCtx) + + ;(session as any).page = page + + // Perform scroll action which doesn't set cursor position + const result = await session.scrollDown() + + // Verify evaluate was called only for scroll operation (not for cursor drawing/removal) + // scrollDown calls evaluate once for scrolling + expect(page.evaluate).toHaveBeenCalledTimes(1) + + // Verify no cursor position in result + expect(result.currentMousePosition).toBeUndefined() + }) + + describe("getViewportSize", () => { + it("falls back to configured viewport when no page or last viewport is available", () => { + const localCtx: any = { + globalState: { + get: vi.fn((key: string) => { + if (key === "browserViewportSize") return "1024x768" + return undefined + }), + update: vi.fn(), + }, + globalStorageUri: { fsPath: "/mock/global/storage/path" }, + extensionUri: { fsPath: "/mock/extension/path" }, + } + + const session = new BrowserSession(localCtx) + const vp = (session as any).getViewportSize() + expect(vp).toEqual({ width: 1024, height: 768 }) + }) + + it("returns live page viewport when available and updates lastViewport cache", () => { + const localCtx: any = { + globalState: { + get: vi.fn(), + update: vi.fn(), + }, + globalStorageUri: { fsPath: "/mock/global/storage/path" }, + extensionUri: { fsPath: "/mock/extension/path" }, + } + const session = new BrowserSession(localCtx) + ;(session as any).page = { + viewport: vi.fn().mockReturnValue({ width: 1111, height: 555 }), + } + + const vp = (session as any).getViewportSize() + expect(vp).toEqual({ width: 1111, height: 555 }) + expect((session as any).lastViewportWidth).toBe(1111) + expect((session as any).lastViewportHeight).toBe(555) + }) + + it("returns cached last viewport when page no longer exists", () => { + const localCtx: any = { + globalState: { + get: vi.fn(), + update: vi.fn(), + }, + globalStorageUri: { fsPath: "/mock/global/storage/path" }, + extensionUri: { fsPath: "/mock/extension/path" }, + } + const session = new BrowserSession(localCtx) + ;(session as any).lastViewportWidth = 800 + ;(session as any).lastViewportHeight = 600 + + const vp = (session as any).getViewportSize() + expect(vp).toEqual({ width: 800, height: 600 }) + }) + }) }) diff --git a/src/shared/ExtensionMessage.ts b/src/shared/ExtensionMessage.ts index 59745b9cf99..5b6214b1219 100644 --- a/src/shared/ExtensionMessage.ts +++ b/src/shared/ExtensionMessage.ts @@ -129,6 +129,8 @@ export interface ExtensionMessage { | "dismissedUpsells" | "organizationSwitchResult" | "interactionRequired" + | "browserSessionUpdate" + | "browserSessionNavigate" text?: string payload?: any // Add a generic payload for now, can refine later // Checkpoint warning message @@ -213,6 +215,9 @@ export interface ExtensionMessage { queuedMessages?: QueuedMessage[] list?: string[] // For dismissedUpsells organizationId?: string | null // For organizationSwitchResult + browserSessionMessages?: ClineMessage[] // For browser session panel updates + isBrowserSessionActive?: boolean // For browser session panel updates + stepIndex?: number // For browserSessionNavigate: the target step index to display } export type ExtensionState = Pick< @@ -333,6 +338,8 @@ export type ExtensionState = Pick< organizationAllowList: OrganizationAllowList organizationSettingsVersion?: number + isBrowserSessionActive: boolean // Actual browser session state + autoCondenseContext: boolean autoCondenseContextPercent: number marketplaceItems?: MarketplaceItem[] @@ -420,6 +427,7 @@ export const browserActions = [ "click", "hover", "type", + "press", "scroll_down", "scroll_up", "resize", @@ -433,6 +441,7 @@ export interface ClineSayBrowserAction { coordinate?: string size?: string text?: string + executedCoordinate?: string } export type BrowserActionResult = { @@ -440,6 +449,8 @@ export type BrowserActionResult = { logs?: string currentUrl?: string currentMousePosition?: string + viewportWidth?: number + viewportHeight?: number } export interface ClineAskUseMcpServer { diff --git a/src/shared/WebviewMessage.ts b/src/shared/WebviewMessage.ts index 1d403f16caa..5806da8e973 100644 --- a/src/shared/WebviewMessage.ts +++ b/src/shared/WebviewMessage.ts @@ -166,6 +166,13 @@ export interface WebviewMessage { | "dismissUpsell" | "getDismissedUpsells" | "updateSettings" + | "allowedCommands" + | "deniedCommands" + | "killBrowserSession" + | "openBrowserSessionPanel" + | "showBrowserSessionPanelAtStep" + | "refreshBrowserSessionPanel" + | "browserPanelDidLaunch" text?: string editedMessageContent?: string tab?: "settings" | "history" | "mcp" | "modes" | "chat" | "marketplace" | "cloud" @@ -177,6 +184,9 @@ export interface WebviewMessage { images?: string[] bool?: boolean value?: number + stepIndex?: number + isLaunchAction?: boolean + forceShow?: boolean commands?: string[] audioType?: AudioType serverName?: string diff --git a/src/shared/browserUtils.ts b/src/shared/browserUtils.ts new file mode 100644 index 00000000000..4e071121c1b --- /dev/null +++ b/src/shared/browserUtils.ts @@ -0,0 +1,95 @@ +/** + * Parses coordinate string and scales from image dimensions to viewport dimensions + * The LLM examines the screenshot it receives (which may be downscaled by the API) + * and reports coordinates in format: "x,y@widthxheight" where widthxheight is what the LLM observed + * + * Format: "x,y@widthxheight" (required) + * Returns: scaled coordinate string "x,y" in viewport coordinates + * Throws: Error if format is invalid or missing image dimensions + */ +export function scaleCoordinate(coordinate: string, viewportWidth: number, viewportHeight: number): string { + // Parse coordinate with required image dimensions (accepts both 'x' and ',' as dimension separators) + const match = coordinate.match(/^\s*(\d+)\s*,\s*(\d+)\s*@\s*(\d+)\s*[x,]\s*(\d+)\s*$/) + + if (!match) { + throw new Error( + `Invalid coordinate format: "${coordinate}". ` + + `Expected format: "x,y@widthxheight" (e.g., "450,300@1024x768")`, + ) + } + + const [, xStr, yStr, imgWidthStr, imgHeightStr] = match + const x = parseInt(xStr, 10) + const y = parseInt(yStr, 10) + const imgWidth = parseInt(imgWidthStr, 10) + const imgHeight = parseInt(imgHeightStr, 10) + + // Scale coordinates from image dimensions to viewport dimensions + const scaledX = Math.round((x / imgWidth) * viewportWidth) + const scaledY = Math.round((y / imgHeight) * viewportHeight) + + return `${scaledX},${scaledY}` +} + +/** + * Formats a key string into a more readable format (e.g., "Control+c" -> "Ctrl + C") + */ +export function prettyKey(k?: string): string { + if (!k) return "" + return k + .split("+") + .map((part) => { + const p = part.trim() + const lower = p.toLowerCase() + const map: Record = { + enter: "Enter", + tab: "Tab", + escape: "Esc", + esc: "Esc", + backspace: "Backspace", + space: "Space", + shift: "Shift", + control: "Ctrl", + ctrl: "Ctrl", + alt: "Alt", + meta: "Meta", + command: "Cmd", + cmd: "Cmd", + arrowup: "Arrow Up", + arrowdown: "Arrow Down", + arrowleft: "Arrow Left", + arrowright: "Arrow Right", + pageup: "Page Up", + pagedown: "Page Down", + home: "Home", + end: "End", + } + if (map[lower]) return map[lower] + const keyMatch = /^Key([A-Z])$/.exec(p) + if (keyMatch) return keyMatch[1].toUpperCase() + const digitMatch = /^Digit([0-9])$/.exec(p) + if (digitMatch) return digitMatch[1] + const spaced = p.replace(/([a-z])([A-Z])/g, "$1 $2") + return spaced.charAt(0).toUpperCase() + spaced.slice(1) + }) + .join(" + ") +} + +/** + * Wrapper around scaleCoordinate that handles failures gracefully by checking for simple coordinates + */ +export function getViewportCoordinate( + coord: string | undefined, + viewportWidth: number, + viewportHeight: number, +): string { + if (!coord) return "" + + try { + return scaleCoordinate(coord, viewportWidth, viewportHeight) + } catch (e) { + // Fallback to simple x,y parsing or return as is + const simpleMatch = /^\s*(\d+)\s*,\s*(\d+)/.exec(coord) + return simpleMatch ? `${simpleMatch[1]},${simpleMatch[2]}` : coord + } +} diff --git a/webview-ui/browser-panel.html b/webview-ui/browser-panel.html new file mode 100644 index 00000000000..92943abfe34 --- /dev/null +++ b/webview-ui/browser-panel.html @@ -0,0 +1,12 @@ + + + + + + Browser Session + + +
+ + + \ No newline at end of file diff --git a/webview-ui/src/browser-panel.tsx b/webview-ui/src/browser-panel.tsx new file mode 100644 index 00000000000..a7f5af891e6 --- /dev/null +++ b/webview-ui/src/browser-panel.tsx @@ -0,0 +1,12 @@ +import { StrictMode } from "react" +import { createRoot } from "react-dom/client" + +import "./index.css" +import BrowserSessionPanel from "./components/browser-session/BrowserSessionPanel" +import "../node_modules/@vscode/codicons/dist/codicon.css" + +createRoot(document.getElementById("root")!).render( + + + , +) diff --git a/webview-ui/src/components/browser-session/BrowserPanelStateProvider.tsx b/webview-ui/src/components/browser-session/BrowserPanelStateProvider.tsx new file mode 100644 index 00000000000..50b078c7402 --- /dev/null +++ b/webview-ui/src/components/browser-session/BrowserPanelStateProvider.tsx @@ -0,0 +1,60 @@ +import React, { createContext, useContext, useState, useEffect, useCallback } from "react" +import { ExtensionMessage } from "@roo/ExtensionMessage" + +interface BrowserPanelState { + browserViewportSize: string + isBrowserSessionActive: boolean + language: string +} + +const BrowserPanelStateContext = createContext(undefined) + +export const BrowserPanelStateProvider: React.FC<{ children: React.ReactNode }> = ({ children }) => { + const [state, setState] = useState({ + browserViewportSize: "900x600", + isBrowserSessionActive: false, + language: "en", + }) + + const handleMessage = useCallback((event: MessageEvent) => { + const message: ExtensionMessage = event.data + + switch (message.type) { + case "state": + if (message.state) { + setState((prev) => ({ + ...prev, + browserViewportSize: message.state?.browserViewportSize || "900x600", + isBrowserSessionActive: message.state?.isBrowserSessionActive || false, + language: message.state?.language || "en", + })) + } + break + case "browserSessionUpdate": + if (message.isBrowserSessionActive !== undefined) { + setState((prev) => ({ + ...prev, + isBrowserSessionActive: message.isBrowserSessionActive || false, + })) + } + break + } + }, []) + + useEffect(() => { + window.addEventListener("message", handleMessage) + return () => { + window.removeEventListener("message", handleMessage) + } + }, [handleMessage]) + + return {children} +} + +export const useBrowserPanelState = () => { + const context = useContext(BrowserPanelStateContext) + if (context === undefined) { + throw new Error("useBrowserPanelState must be used within a BrowserPanelStateProvider") + } + return context +} diff --git a/webview-ui/src/components/browser-session/BrowserSessionPanel.tsx b/webview-ui/src/components/browser-session/BrowserSessionPanel.tsx new file mode 100644 index 00000000000..fe88106ad27 --- /dev/null +++ b/webview-ui/src/components/browser-session/BrowserSessionPanel.tsx @@ -0,0 +1,102 @@ +import React, { useEffect, useState } from "react" +import { type ClineMessage } from "@roo-code/types" +import BrowserSessionRow from "../chat/BrowserSessionRow" +import { TooltipProvider } from "@src/components/ui/tooltip" +import ErrorBoundary from "../ErrorBoundary" +import TranslationProvider from "@src/i18n/TranslationContext" +import { ExtensionMessage } from "@roo/ExtensionMessage" +import { BrowserPanelStateProvider, useBrowserPanelState } from "./BrowserPanelStateProvider" +import { vscode } from "@src/utils/vscode" +import { ExtensionStateContextProvider } from "@/context/ExtensionStateContext" + +interface BrowserSessionPanelState { + messages: ClineMessage[] +} + +const BrowserSessionPanelContent: React.FC = () => { + const { browserViewportSize, isBrowserSessionActive } = useBrowserPanelState() + const [state, setState] = useState({ + messages: [], + }) + // Target page index to navigate BrowserSessionRow to + const [navigateToStepIndex, setNavigateToStepIndex] = useState(undefined) + + const [expandedRows, setExpandedRows] = useState>({}) + + useEffect(() => { + const handleMessage = (event: MessageEvent) => { + const message: ExtensionMessage = event.data + + switch (message.type) { + case "browserSessionUpdate": + if (message.browserSessionMessages) { + setState((prev) => ({ + ...prev, + messages: message.browserSessionMessages || [], + })) + } + break + case "browserSessionNavigate": + if (typeof message.stepIndex === "number" && message.stepIndex >= 0) { + setNavigateToStepIndex(message.stepIndex) + } + break + } + } + + window.addEventListener("message", handleMessage) + + return () => { + window.removeEventListener("message", handleMessage) + } + }, []) + + return ( +
+ expandedRows[messageTs] ?? false} + onToggleExpand={(messageTs: number) => { + setExpandedRows((prev: Record) => ({ + ...prev, + [messageTs]: !prev[messageTs], + })) + }} + fullScreen={true} + browserViewportSizeProp={browserViewportSize} + isBrowserSessionActiveProp={isBrowserSessionActive} + navigateToPageIndex={navigateToStepIndex} + /> +
+ ) +} + +const BrowserSessionPanel: React.FC = () => { + // Ensure the panel receives initial state and becomes "ready" without needing a second click + useEffect(() => { + try { + vscode.postMessage({ type: "webviewDidLaunch" }) + } catch { + // Ignore errors during initial launch + } + }, []) + + return ( + + + + + + + + + + + + ) +} + +export default BrowserSessionPanel diff --git a/webview-ui/src/components/chat/BrowserActionRow.tsx b/webview-ui/src/components/chat/BrowserActionRow.tsx new file mode 100644 index 00000000000..4eecc284ae0 --- /dev/null +++ b/webview-ui/src/components/chat/BrowserActionRow.tsx @@ -0,0 +1,184 @@ +import { memo, useMemo, useEffect, useRef } from "react" +import { ClineMessage } from "@roo-code/types" +import { ClineSayBrowserAction } from "@roo/ExtensionMessage" +import { vscode } from "@src/utils/vscode" +import { getViewportCoordinate as getViewportCoordinateShared, prettyKey } from "@roo/browserUtils" +import { + MousePointer as MousePointerIcon, + Keyboard, + ArrowDown, + ArrowUp, + Pointer, + Play, + Check, + Maximize2, +} from "lucide-react" +import { useExtensionState } from "@src/context/ExtensionStateContext" +import { useTranslation } from "react-i18next" + +interface BrowserActionRowProps { + message: ClineMessage + nextMessage?: ClineMessage + actionIndex?: number + totalActions?: number +} + +// Get icon for each action type +const getActionIcon = (action: string) => { + switch (action) { + case "click": + return + case "type": + case "press": + return + case "scroll_down": + return + case "scroll_up": + return + case "launch": + return + case "close": + return + case "resize": + return + case "hover": + default: + return + } +} + +const BrowserActionRow = memo(({ message, nextMessage, actionIndex, totalActions }: BrowserActionRowProps) => { + const { t } = useTranslation() + const { isBrowserSessionActive } = useExtensionState() + const hasHandledAutoOpenRef = useRef(false) + + // Parse this specific browser action + const browserAction = useMemo(() => { + try { + return JSON.parse(message.text || "{}") as ClineSayBrowserAction + } catch { + return null + } + }, [message.text]) + + // Get viewport dimensions from the result message if available + const viewportDimensions = useMemo(() => { + if (!nextMessage || nextMessage.say !== "browser_action_result") return null + try { + const result = JSON.parse(nextMessage.text || "{}") + return { + width: result.viewportWidth, + height: result.viewportHeight, + } + } catch { + return null + } + }, [nextMessage]) + + // Format action display text + const actionText = useMemo(() => { + if (!browserAction) return "Browser action" + + // Helper to scale coordinates from screenshot dimensions to viewport dimensions + // Matches the backend's scaleCoordinate function logic + const getViewportCoordinate = (coord?: string): string => + getViewportCoordinateShared(coord, viewportDimensions?.width ?? 0, viewportDimensions?.height ?? 0) + + switch (browserAction.action) { + case "launch": + return `Launched browser` + case "click": + return `Clicked at: ${browserAction.executedCoordinate || getViewportCoordinate(browserAction.coordinate)}` + case "type": + return `Typed: ${browserAction.text}` + case "press": + return `Pressed key: ${prettyKey(browserAction.text)}` + case "hover": + return `Hovered at: ${browserAction.executedCoordinate || getViewportCoordinate(browserAction.coordinate)}` + case "scroll_down": + return "Scrolled down" + case "scroll_up": + return "Scrolled up" + case "resize": + return `Resized to: ${browserAction.size?.split(/[x,]/).join(" x ")}` + case "close": + return "Closed browser" + default: + return browserAction.action + } + }, [browserAction, viewportDimensions]) + + // Auto-open Browser Session panel when: + // 1. This is a "launch" action (new browser session) - always opens and navigates to launch + // 2. Regular actions - only open panel if user hasn't manually closed it, let internal auto-advance logic handle step + // Only run this once per action to avoid re-sending messages when scrolling + useEffect(() => { + if (!isBrowserSessionActive || hasHandledAutoOpenRef.current) { + return + } + + const isLaunchAction = browserAction?.action === "launch" + + if (isLaunchAction) { + // Launch action: navigate to step 0 (the launch) + vscode.postMessage({ + type: "showBrowserSessionPanelAtStep", + stepIndex: 0, + isLaunchAction: true, + }) + hasHandledAutoOpenRef.current = true + } else { + // Regular actions: just show panel, don't navigate + // BrowserSessionRow's internal auto-advance logic will handle jumping to new steps + // only if user is currently on the most recent step + vscode.postMessage({ + type: "showBrowserSessionPanelAtStep", + isLaunchAction: false, + }) + hasHandledAutoOpenRef.current = true + } + }, [isBrowserSessionActive, browserAction]) + + const headerStyle: React.CSSProperties = { + display: "flex", + alignItems: "center", + gap: "10px", + marginBottom: "10px", + wordBreak: "break-word", + } + + return ( +
+ {/* Header with action description - clicking opens Browser Session panel at this step */} +
{ + const idx = typeof actionIndex === "number" ? Math.max(0, actionIndex - 1) : 0 + vscode.postMessage({ type: "showBrowserSessionPanelAtStep", stepIndex: idx, forceShow: true }) + }}> + + {t("chat:browser.actions.title")} + {actionIndex !== undefined && totalActions !== undefined && ( + + {" "} + - {actionIndex}/{totalActions} -{" "} + + )} + {browserAction && ( + <> + {getActionIcon(browserAction.action)} + {actionText} + + )} +
+
+ ) +}) + +BrowserActionRow.displayName = "BrowserActionRow" + +export default BrowserActionRow diff --git a/webview-ui/src/components/chat/BrowserSessionRow.tsx b/webview-ui/src/components/chat/BrowserSessionRow.tsx index 57cb0cf2432..8fc23c6d0b2 100644 --- a/webview-ui/src/components/chat/BrowserSessionRow.tsx +++ b/webview-ui/src/components/chat/BrowserSessionRow.tsx @@ -1,20 +1,97 @@ import React, { memo, useEffect, useMemo, useRef, useState } from "react" -import { useSize } from "react-use" import deepEqual from "fast-deep-equal" import { useTranslation } from "react-i18next" - import type { ClineMessage } from "@roo-code/types" import { BrowserAction, BrowserActionResult, ClineSayBrowserAction } from "@roo/ExtensionMessage" import { vscode } from "@src/utils/vscode" import { useExtensionState } from "@src/context/ExtensionStateContext" -import { Button } from "@src/components/ui" -import CodeBlock, { CODE_BLOCK_BG_COLOR } from "../common/CodeBlock" -import { ChatRowContent } from "./ChatRow" +import CodeBlock from "../common/CodeBlock" import { ProgressIndicator } from "./ProgressIndicator" -import { Globe, Pointer, SquareTerminal } from "lucide-react" +import { Button, StandardTooltip } from "@src/components/ui" +import { getViewportCoordinate as getViewportCoordinateShared, prettyKey } from "@roo/browserUtils" +import { + Globe, + Pointer, + SquareTerminal, + MousePointer as MousePointerIcon, + Keyboard, + ArrowDown, + ArrowUp, + Play, + Check, + Maximize2, + OctagonX, + ArrowLeft, + ArrowRight, + ChevronsLeft, + ChevronsRight, + ExternalLink, + Copy, +} from "lucide-react" + +const getBrowserActionText = ( + action: BrowserAction, + executedCoordinate?: string, + coordinate?: string, + text?: string, + size?: string, + viewportWidth?: number, + viewportHeight?: number, +) => { + // Helper to scale coordinates from screenshot dimensions to viewport dimensions + // Matches the backend's scaleCoordinate function logic + const getViewportCoordinate = (coord?: string): string => + getViewportCoordinateShared(coord, viewportWidth ?? 0, viewportHeight ?? 0) + + switch (action) { + case "launch": + return `Launched browser` + case "click": + return `Clicked at: ${executedCoordinate || getViewportCoordinate(coordinate)}` + case "type": + return `Typed: ${text}` + case "press": + return `Pressed key: ${prettyKey(text)}` + case "scroll_down": + return "Scrolled down" + case "scroll_up": + return "Scrolled up" + case "hover": + return `Hovered at: ${executedCoordinate || getViewportCoordinate(coordinate)}` + case "resize": + return `Resized to: ${size?.split(/[x,]/).join(" x ")}` + case "close": + return "Closed browser" + default: + return action + } +} + +const getActionIcon = (action: BrowserAction) => { + switch (action) { + case "click": + return + case "type": + case "press": + return + case "scroll_down": + return + case "scroll_up": + return + case "launch": + return + case "close": + return + case "resize": + return + case "hover": + default: + return + } +} interface BrowserSessionRowProps { messages: ClineMessage[] @@ -22,20 +99,67 @@ interface BrowserSessionRowProps { onToggleExpand: (messageTs: number) => void lastModifiedMessage?: ClineMessage isLast: boolean - onHeightChange: (isTaller: boolean) => void + onHeightChange?: (isTaller: boolean) => void isStreaming: boolean + onExpandChange?: (expanded: boolean) => void + fullScreen?: boolean + // Optional props for standalone panel (when not using ExtensionStateContext) + browserViewportSizeProp?: string + isBrowserSessionActiveProp?: boolean + // Optional: navigate to a specific page index (used by Browser Session panel) + navigateToPageIndex?: number } const BrowserSessionRow = memo((props: BrowserSessionRowProps) => { - const { messages, isLast, onHeightChange, lastModifiedMessage } = props + const { messages, isLast, onHeightChange, lastModifiedMessage, onExpandChange, fullScreen } = props const { t } = useTranslation() const prevHeightRef = useRef(0) - const [maxActionHeight, setMaxActionHeight] = useState(0) const [consoleLogsExpanded, setConsoleLogsExpanded] = useState(false) + const [nextActionsExpanded, setNextActionsExpanded] = useState(false) + const [logFilter, setLogFilter] = useState<"all" | "debug" | "info" | "warn" | "error" | "log">("all") + // Track screenshot container size for precise cursor positioning with object-fit: contain + const screenshotRef = useRef(null) + const [sW, setSW] = useState(0) + const [sH, setSH] = useState(0) + + // Auto-expand drawer when in fullScreen takeover mode so content is visible immediately + useEffect(() => { + if (fullScreen) { + setNextActionsExpanded(true) + } + }, [fullScreen]) + + // Observe screenshot container size to align cursor correctly with letterboxing + useEffect(() => { + const el = screenshotRef.current + if (!el) return + const update = () => { + const r = el.getBoundingClientRect() + setSW(r.width) + setSH(r.height) + } + update() + const ro = + typeof window !== "undefined" && "ResizeObserver" in window ? new ResizeObserver(() => update()) : null + if (ro) ro.observe(el) + return () => { + if (ro) ro.disconnect() + } + }, []) + + // Try to use ExtensionStateContext if available, otherwise use props + let browserViewportSize = props.browserViewportSizeProp || "900x600" + let isBrowserSessionActive = props.isBrowserSessionActiveProp || false + + try { + const extensionState = useExtensionState() + browserViewportSize = extensionState.browserViewportSize || "900x600" + isBrowserSessionActive = extensionState.isBrowserSessionActive || false + } catch (_e) { + // Not in ExtensionStateContext, use props + } - const { browserViewportSize = "900x600" } = useExtensionState() const [viewportWidth, viewportHeight] = browserViewportSize.split("x").map(Number) - const aspectRatio = ((viewportHeight / viewportWidth) * 100).toFixed(2) const defaultMousePosition = `${Math.round(viewportWidth / 2)},${Math.round(viewportHeight / 2)}` const isLastApiReqInterrupted = useMemo(() => { @@ -58,93 +182,106 @@ const BrowserSessionRow = memo((props: BrowserSessionRowProps) => { return isLast && messages.some((m) => m.say === "browser_action_result") && !isLastApiReqInterrupted // after user approves, browser_action_result with "" is sent to indicate that the session has started }, [isLast, messages, isLastApiReqInterrupted]) - // Organize messages into pages with current state and next action + // Organize messages into pages based on ALL browser actions (including those without screenshots) const pages = useMemo(() => { const result: { - currentState: { - url?: string - screenshot?: string - mousePosition?: string - consoleLogs?: string - messages: ClineMessage[] // messages up to and including the result - } - nextAction?: { - messages: ClineMessage[] // messages leading to next result - } + url?: string + screenshot?: string + mousePosition?: string + consoleLogs?: string + action?: ClineSayBrowserAction + size?: string + viewportWidth?: number + viewportHeight?: number }[] = [] - let currentStateMessages: ClineMessage[] = [] - let nextActionMessages: ClineMessage[] = [] - + // Build pages from browser_action messages and pair with results messages.forEach((message) => { - if (message.ask === "browser_action_launch") { - // Start first page - currentStateMessages = [message] - } else if (message.say === "browser_action_result") { - if (message.text === "") { - // first browser_action_result is an empty string that signals that session has started - return + if (message.say === "browser_action") { + try { + const action = JSON.parse(message.text || "{}") as ClineSayBrowserAction + // Find the corresponding result message + const resultMessage = messages.find( + (m) => m.say === "browser_action_result" && m.ts > message.ts && m.text !== "", + ) + + if (resultMessage) { + const resultData = JSON.parse(resultMessage.text || "{}") as BrowserActionResult + result.push({ + url: resultData.currentUrl, + screenshot: resultData.screenshot, + mousePosition: resultData.currentMousePosition, + consoleLogs: resultData.logs, + action, + size: action.size, + viewportWidth: resultData.viewportWidth, + viewportHeight: resultData.viewportHeight, + }) + } else { + // For actions without results (like close), add a page without screenshot + result.push({ action, size: action.size }) + } + } catch { + // ignore parse errors } - // Complete current state - currentStateMessages.push(message) - const resultData = JSON.parse(message.text || "{}") as BrowserActionResult - - // Add page with current state and previous next actions - result.push({ - currentState: { - url: resultData.currentUrl, - screenshot: resultData.screenshot, - mousePosition: resultData.currentMousePosition, - consoleLogs: resultData.logs, - messages: [...currentStateMessages], - }, - nextAction: - nextActionMessages.length > 0 - ? { - messages: [...nextActionMessages], - } - : undefined, - }) - - // Reset for next page - currentStateMessages = [] - nextActionMessages = [] - } else if ( - message.say === "api_req_started" || - message.say === "text" || - message.say === "browser_action" - ) { - // These messages lead to the next result, so they should always go in nextActionMessages - nextActionMessages.push(message) - } else { - // Any other message types - currentStateMessages.push(message) } }) - // Add incomplete page if exists - if (currentStateMessages.length > 0 || nextActionMessages.length > 0) { - result.push({ - currentState: { - messages: [...currentStateMessages], - }, - nextAction: - nextActionMessages.length > 0 - ? { - messages: [...nextActionMessages], - } - : undefined, - }) + // Add placeholder page if no actions yet + if (result.length === 0) { + result.push({}) } return result }, [messages]) - // Auto-advance to latest page + // Page index + user navigation guard (don't auto-jump while exploring history) const [currentPageIndex, setCurrentPageIndex] = useState(0) + const hasUserNavigatedRef = useRef(false) + const didInitIndexRef = useRef(false) + const prevPagesLengthRef = useRef(0) + + useEffect(() => { + // Initialize to last page on mount + if (!didInitIndexRef.current && pages.length > 0) { + didInitIndexRef.current = true + setCurrentPageIndex(pages.length - 1) + prevPagesLengthRef.current = pages.length + return + } + + // Auto-advance if user is on the most recent step and a new step arrives + if (pages.length > prevPagesLengthRef.current) { + const wasOnLastPage = currentPageIndex === prevPagesLengthRef.current - 1 + if (wasOnLastPage && !hasUserNavigatedRef.current) { + // User was on the most recent step, auto-advance to the new step + setCurrentPageIndex(pages.length - 1) + } + prevPagesLengthRef.current = pages.length + } + }, [pages.length, currentPageIndex]) + + // External navigation request (from panel host) + // Only navigate when navigateToPageIndex actually changes, not when pages.length changes + const prevNavigateToPageIndexRef = useRef() useEffect(() => { - setCurrentPageIndex(pages.length - 1) - }, [pages.length]) + if ( + typeof props.navigateToPageIndex === "number" && + props.navigateToPageIndex !== prevNavigateToPageIndexRef.current && + pages.length > 0 + ) { + const idx = Math.max(0, Math.min(pages.length - 1, props.navigateToPageIndex)) + setCurrentPageIndex(idx) + // Only reset manual navigation guard if navigating to the last page + // This allows auto-advance to work when clicking to the most recent step + // but prevents unwanted auto-advance when viewing historical steps + if (idx === pages.length - 1) { + hasUserNavigatedRef.current = false + } + prevNavigateToPageIndexRef.current = props.navigateToPageIndex + } + // eslint-disable-next-line react-hooks/exhaustive-deps + }, [props.navigateToPageIndex]) // Get initial URL from launch message const initialUrl = useMemo(() => { @@ -152,141 +289,601 @@ const BrowserSessionRow = memo((props: BrowserSessionRowProps) => { return launchMessage?.text || "" }, [messages]) - // Find the latest available URL and screenshot - const latestState = useMemo(() => { + const currentPage = pages[currentPageIndex] + + // Use actual viewport dimensions from result if available, otherwise fall back to settings + + // Find the last available screenshot and its associated data to use as placeholders + const lastPageWithScreenshot = useMemo(() => { for (let i = pages.length - 1; i >= 0; i--) { - const page = pages[i] - if (page.currentState.url || page.currentState.screenshot) { - return { - url: page.currentState.url, - mousePosition: page.currentState.mousePosition, - consoleLogs: page.currentState.consoleLogs, - screenshot: page.currentState.screenshot, - } + if (pages[i].screenshot) { + return pages[i] } } - return { url: undefined, mousePosition: undefined, consoleLogs: undefined, screenshot: undefined } + return undefined }, [pages]) - const currentPage = pages[currentPageIndex] - const isLastPage = currentPageIndex === pages.length - 1 - - // Use latest state if we're on the last page and don't have a state yet - const displayState = isLastPage - ? { - url: currentPage?.currentState.url || latestState.url || initialUrl, - mousePosition: - currentPage?.currentState.mousePosition || latestState.mousePosition || defaultMousePosition, - consoleLogs: currentPage?.currentState.consoleLogs, - screenshot: currentPage?.currentState.screenshot || latestState.screenshot, - } - : { - url: currentPage?.currentState.url || initialUrl, - mousePosition: currentPage?.currentState.mousePosition || defaultMousePosition, - consoleLogs: currentPage?.currentState.consoleLogs, - screenshot: currentPage?.currentState.screenshot, + // Find last mouse position up to current page (not from future pages) + const lastPageWithMousePositionUpToCurrent = useMemo(() => { + for (let i = currentPageIndex; i >= 0; i--) { + if (pages[i].mousePosition) { + return pages[i] } + } + return undefined + }, [pages, currentPageIndex]) - const [actionContent, { height: actionHeight }] = useSize( -
- {currentPage?.nextAction?.messages.map((message) => ( - - ))} - {!isBrowsing && messages.some((m) => m.say === "browser_action_result") && currentPageIndex === 0 && ( - - )} -
, - ) + // Display state from current page, with smart fallbacks + const displayState = { + url: currentPage?.url || initialUrl, + mousePosition: + currentPage?.mousePosition || lastPageWithMousePositionUpToCurrent?.mousePosition || defaultMousePosition, + consoleLogs: currentPage?.consoleLogs, + screenshot: currentPage?.screenshot || lastPageWithScreenshot?.screenshot, + } - useEffect(() => { - if (actionHeight === 0 || actionHeight === Infinity) { - return + // Parse logs for counts and filtering + const parsedLogs = useMemo(() => { + const counts = { debug: 0, info: 0, warn: 0, error: 0, log: 0 } + const byType: Record<"debug" | "info" | "warn" | "error" | "log", string[]> = { + debug: [], + info: [], + warn: [], + error: [], + log: [], + } + const raw = displayState.consoleLogs || "" + raw.split(/\r?\n/).forEach((line) => { + const trimmed = line.trim() + if (!trimmed) return + const m = /^\[([^\]]+)\]\s*/i.exec(trimmed) + let type = (m?.[1] || "").toLowerCase() + if (type === "warning") type = "warn" + if (!["debug", "info", "warn", "error", "log"].includes(type)) type = "log" + counts[type as keyof typeof counts]++ + byType[type as keyof typeof byType].push(line) + }) + return { counts, byType } + }, [displayState.consoleLogs]) + + const logsToShow = useMemo(() => { + if (!displayState.consoleLogs) return t("chat:browser.noNewLogs") as string + if (logFilter === "all") return displayState.consoleLogs + const arr = parsedLogs.byType[logFilter] + return arr.length ? arr.join("\n") : (t("chat:browser.noNewLogs") as string) + }, [displayState.consoleLogs, logFilter, parsedLogs, t]) + + // Meta for log badges (include "All" first) + const logTypeMeta = [ + { key: "all", label: "All" }, + { key: "debug", label: "Debug" }, + { key: "info", label: "Info" }, + { key: "warn", label: "Warn" }, + { key: "error", label: "Error" }, + { key: "log", label: "Log" }, + ] as const + + // Use a fixed standard aspect ratio and dimensions for the drawer to prevent flickering + // Even if viewport changes, the drawer maintains consistent size + const fixedDrawerWidth = 900 + const fixedDrawerHeight = 600 + const drawerAspectRatio = (fixedDrawerHeight / fixedDrawerWidth) * 100 + + // For cursor positioning, use the viewport dimensions from the same page as the data we're displaying + // This ensures cursor position matches the screenshot/mouse position being shown + let cursorViewportWidth: number + let cursorViewportHeight: number + + if (currentPage?.screenshot) { + // Current page has screenshot - use its dimensions + cursorViewportWidth = currentPage.viewportWidth ?? viewportWidth + cursorViewportHeight = currentPage.viewportHeight ?? viewportHeight + } else if (lastPageWithScreenshot) { + // Using placeholder screenshot - use dimensions from that page + cursorViewportWidth = lastPageWithScreenshot.viewportWidth ?? viewportWidth + cursorViewportHeight = lastPageWithScreenshot.viewportHeight ?? viewportHeight + } else { + // No screenshot available - use default settings + cursorViewportWidth = viewportWidth + cursorViewportHeight = viewportHeight + } + + // Get browser action for current page (now stored in pages array) + const currentPageAction = useMemo(() => { + return pages[currentPageIndex]?.action + }, [pages, currentPageIndex]) + + // Latest non-close browser_action for header summary (fallback) + + const lastBrowserActionOverall = useMemo(() => { + const all = messages.filter((m) => m.say === "browser_action") + return all.at(-1) + }, [messages]) + + // Use actual Playwright session state from extension (not message parsing) + const isBrowserSessionOpen = isBrowserSessionActive + + // Check if a browser action is currently in flight (for spinner) + const isActionRunning = useMemo(() => { + if (!lastBrowserActionOverall || isLastApiReqInterrupted) { + return false } - if (actionHeight > maxActionHeight) { - setMaxActionHeight(actionHeight) + + // Find the last browser_action_result (including empty text) to detect completion + const lastBrowserActionResult = [...messages].reverse().find((m) => m.say === "browser_action_result") + + if (!lastBrowserActionResult) { + // We have at least one action, but haven't seen any result yet + return true } - }, [actionHeight, maxActionHeight]) - // Track latest click coordinate - const latestClickPosition = useMemo(() => { - if (!isBrowsing) return undefined + // If the last action happened after the last result, it's still running + return lastBrowserActionOverall.ts > lastBrowserActionResult.ts + }, [messages, lastBrowserActionOverall, isLastApiReqInterrupted]) - // Look through current page's next actions for the latest browser_action - const actions = currentPage?.nextAction?.messages || [] - for (let i = actions.length - 1; i >= 0; i--) { - const message = actions[i] - if (message.say === "browser_action") { - const browserAction = JSON.parse(message.text || "{}") as ClineSayBrowserAction - if (browserAction.action === "click" && browserAction.coordinate) { - return browserAction.coordinate + // Browser session drawer never auto-expands - user must manually toggle it + + // Calculate total API cost for the browser session + const totalApiCost = useMemo(() => { + let total = 0 + messages.forEach((message) => { + if (message.say === "api_req_started" && message.text) { + try { + const data = JSON.parse(message.text) + if (data.cost && typeof data.cost === "number") { + total += data.cost + } + } catch { + // Ignore parsing errors } } + }) + return total + }, [messages]) + + // Local size tracking without react-use to avoid timers after unmount in tests + const containerRef = useRef(null) + const [rowHeight, setRowHeight] = useState(0) + useEffect(() => { + const el = containerRef.current + if (!el) return + let mounted = true + const setH = (h: number) => { + if (mounted) setRowHeight(h) } - return undefined - }, [isBrowsing, currentPage?.nextAction?.messages]) - - // Use latest click position while browsing, otherwise use display state - const mousePosition = isBrowsing - ? latestClickPosition || displayState.mousePosition - : displayState.mousePosition || defaultMousePosition - - const [browserSessionRow, { height: rowHeight }] = useSize( -
-
- {isBrowsing ? : } - - <>{t("chat:browser.rooWantsToUse")} + const ro = + typeof window !== "undefined" && "ResizeObserver" in window + ? new ResizeObserver((entries) => { + const entry = entries[0] + setH(entry?.contentRect?.height ?? el.getBoundingClientRect().height) + }) + : null + // initial + setH(el.getBoundingClientRect().height) + if (ro) ro.observe(el) + return () => { + mounted = false + if (ro) ro.disconnect() + } + }, []) + + const BrowserSessionHeader: React.FC = () => ( +
+ {/* Globe icon - green when browser session is open */} + + setNextActionsExpanded((v) => { + const nv = !v + onExpandChange?.(nv) + return nv + }), + })} + /> + + {/* Simple text: "Browser Session" with step counter */} + + setNextActionsExpanded((v) => { + const nv = !v + onExpandChange?.(nv) + return nv + }), + })} + style={{ + flex: 1, + fontSize: 13, + fontWeight: 500, + lineHeight: "22px", + color: "var(--vscode-editor-foreground)", + cursor: fullScreen ? "default" : "pointer", + display: "flex", + alignItems: "center", + gap: 8, + }}> + {t("chat:browser.session")} + {isActionRunning && ( + + )} + {pages.length > 0 && ( + + {currentPageIndex + 1}/{pages.length} + + )} + {/* Inline action summary to the right, similar to ChatView */} + + {(() => { + const action = currentPageAction + const pageSize = pages[currentPageIndex]?.size + const pageViewportWidth = pages[currentPageIndex]?.viewportWidth + const pageViewportHeight = pages[currentPageIndex]?.viewportHeight + if (action) { + return ( + <> + {getActionIcon(action.action)} + + {getBrowserActionText( + action.action, + action.executedCoordinate, + action.coordinate, + action.text, + pageSize, + pageViewportWidth, + pageViewportHeight, + )} + + + ) + } else if (initialUrl) { + return ( + <> + {getActionIcon("launch" as any)} + {getBrowserActionText("launch", undefined, initialUrl, undefined)} + + ) + } + return null + })()} -
+
+ + {/* Right side: cost badge and chevron */} + {totalApiCost > 0 && ( +
+ ${totalApiCost.toFixed(4)} +
+ )} + + {/* Chevron toggle hidden in fullScreen */} + {!fullScreen && ( + + setNextActionsExpanded((v) => { + const nv = !v + onExpandChange?.(nv) + return nv + }) + } + className={`codicon ${nextActionsExpanded ? "codicon-chevron-up" : "codicon-chevron-down"}`} + style={{ + fontSize: 13, + fontWeight: 500, + lineHeight: "22px", + color: "var(--vscode-editor-foreground)", + cursor: "pointer", + display: "inline-block", + transition: "transform 150ms ease", + }} + /> + )} + + {/* Kill browser button hidden from header in fullScreen; kept in toolbar */} + {isBrowserSessionOpen && !fullScreen && ( + + + + )} +
+ ) + + const BrowserSessionDrawer: React.FC = () => { + if (!nextActionsExpanded) return null + + return (
- {/* URL Bar */} + {/* Browser-like Toolbar */}
+ {/* Go to beginning */} + + + + + {/* Back */} + + + + + {/* Forward */} + + + + + {/* Go to end */} + + + + + {/* Address Bar */}
- - {displayState.url || "http"} + + + {displayState.url || "about:blank"} + + {/* Step counter removed */}
-
+ {/* Kill (Disconnect) replaces Reload */} + + + + + {/* Open External */} + + + + + {/* Copy URL */} + + + +
{/* Screenshot Area */}
{displayState.screenshot ? ( { width: "100%", height: "100%", objectFit: "contain", + objectPosition: "top center", cursor: "pointer", }} onClick={() => @@ -322,70 +920,173 @@ const BrowserSessionRow = memo((props: BrowserSessionRowProps) => { />
)} - {displayState.mousePosition && ( - - )} -
+ {displayState.mousePosition && + (() => { + // Use measured size if available; otherwise fall back to current client size so cursor remains visible + const containerW = sW || (screenshotRef.current?.clientWidth ?? 0) + const containerH = sH || (screenshotRef.current?.clientHeight ?? 0) + if (containerW <= 0 || containerH <= 0) { + // Minimal fallback to keep cursor visible before first measurement + return ( + + ) + } - {/* Console Logs Accordion */} -
{ - setConsoleLogsExpanded(!consoleLogsExpanded) - }} - className="flex items-center justify-between gap-2 text-vscode-editor-foreground/50 hover:text-vscode-editor-foreground transition-colors" - style={{ - width: "100%", - cursor: "pointer", - padding: `9px 10px ${consoleLogsExpanded ? 0 : 8}px 10px`, - }}> - - {t("chat:browser.consoleLogs")} - + // Compute displayed image box within the container for object-fit: contain; objectPosition: top center + const imgAspect = cursorViewportWidth / cursorViewportHeight + const containerAspect = containerW / containerH + let displayW = containerW + let displayH = containerH + let offsetX = 0 + let offsetY = 0 + if (containerAspect > imgAspect) { + // Full height, letterboxed left/right; top aligned + displayH = containerH + displayW = containerH * imgAspect + offsetX = (containerW - displayW) / 2 + offsetY = 0 + } else { + // Full width, potential space below; top aligned + displayW = containerW + displayH = containerW / imgAspect + offsetX = 0 + offsetY = 0 + } + + // Parse "x,y" or "x,y@widthxheight" for original basis + const m = /^\s*(\d+)\s*,\s*(\d+)(?:\s*@\s*(\d+)\s*[x,]\s*(\d+))?\s*$/.exec( + displayState.mousePosition || "", + ) + const mx = parseInt(m?.[1] || "0", 10) + const my = parseInt(m?.[2] || "0", 10) + const baseW = m?.[3] ? parseInt(m[3], 10) : cursorViewportWidth + const baseH = m?.[4] ? parseInt(m[4], 10) : cursorViewportHeight + + const leftPx = offsetX + (baseW > 0 ? (mx / baseW) * displayW : 0) + const topPx = offsetY + (baseH > 0 ? (my / baseH) * displayH : 0) + + return ( + + ) + })()}
- {consoleLogsExpanded && ( - - )} - - {/* Action content with min height */} -
{actionContent}
+ {/* Browser Action summary moved inline to header; row removed */} - {/* Pagination moved to bottom */} - {pages.length > 1 && ( + {/* Console Logs Section (collapsible, default collapsed) */}
-
- {t("chat:browser.navigation.step", { current: currentPageIndex + 1, total: pages.length })} -
-
- - +
{ + e.stopPropagation() + setConsoleLogsExpanded((v) => !v) + }} + className="text-vscode-editor-foreground/70 hover:text-vscode-editor-foreground transition-colors" + style={{ + display: "flex", + alignItems: "center", + gap: "8px", + marginBottom: consoleLogsExpanded ? "6px" : 0, + cursor: "pointer", + }}> + + + {t("chat:browser.consoleLogs")} + + + {/* Log type indicators */} +
e.stopPropagation()} + style={{ display: "flex", alignItems: "center", gap: 6, marginLeft: "auto" }}> + {logTypeMeta.map(({ key, label }) => { + const isAll = key === "all" + const count = isAll + ? (Object.values(parsedLogs.counts) as number[]).reduce((a, b) => a + b, 0) + : parsedLogs.counts[key as "debug" | "info" | "warn" | "error" | "log"] + const isActive = logFilter === (key as any) + const disabled = count === 0 + return ( + + ) + })} + setConsoleLogsExpanded((v) => !v)} + className={`codicon codicon-chevron-${consoleLogsExpanded ? "down" : "right"}`} + style={{ marginLeft: 6 }} + /> +
+ {consoleLogsExpanded && ( +
+ +
+ )}
- )} -
, + + ) + } + + const browserSessionRow = ( +
+ + + {/* Expanded drawer content - inline/fullscreen */} + +
) // Height change effect @@ -393,7 +1094,7 @@ const BrowserSessionRow = memo((props: BrowserSessionRowProps) => { const isInitialRender = prevHeightRef.current === 0 if (isLast && rowHeight !== 0 && rowHeight !== Infinity && rowHeight !== prevHeightRef.current) { if (!isInitialRender) { - onHeightChange(rowHeight > prevHeightRef.current) + onHeightChange?.(rowHeight > prevHeightRef.current) } prevHeightRef.current = rowHeight } @@ -402,150 +1103,6 @@ const BrowserSessionRow = memo((props: BrowserSessionRowProps) => { return browserSessionRow }, deepEqual) -interface BrowserSessionRowContentProps extends Omit { - message: ClineMessage - setMaxActionHeight: (height: number) => void - isStreaming: boolean -} - -const BrowserSessionRowContent = ({ - message, - isExpanded, - onToggleExpand, - lastModifiedMessage, - isLast, - setMaxActionHeight, - isStreaming, -}: BrowserSessionRowContentProps) => { - const { t } = useTranslation() - const headerStyle: React.CSSProperties = { - display: "flex", - alignItems: "center", - gap: "10px", - marginBottom: "10px", - wordBreak: "break-word", - } - - switch (message.type) { - case "say": - switch (message.say) { - case "api_req_started": - case "text": - return ( -
- { - if (message.say === "api_req_started") { - setMaxActionHeight(0) - } - onToggleExpand(message.ts) - }} - lastModifiedMessage={lastModifiedMessage} - isLast={isLast} - isStreaming={isStreaming} - /> -
- ) - - case "browser_action": - const browserAction = JSON.parse(message.text || "{}") as ClineSayBrowserAction - return ( - - ) - - default: - return null - } - - case "ask": - switch (message.ask) { - case "browser_action_launch": - return ( - <> -
- {t("chat:browser.sessionStarted")} -
-
- -
- - ) - - default: - return null - } - } -} - -const BrowserActionBox = ({ - action, - coordinate, - text, -}: { - action: BrowserAction - coordinate?: string - text?: string -}) => { - const { t } = useTranslation() - const getBrowserActionText = (action: BrowserAction, coordinate?: string, text?: string) => { - switch (action) { - case "launch": - return t("chat:browser.actions.launch", { url: text }) - case "click": - return t("chat:browser.actions.click", { coordinate: coordinate?.replace(",", ", ") }) - case "type": - return t("chat:browser.actions.type", { text }) - case "scroll_down": - return t("chat:browser.actions.scrollDown") - case "scroll_up": - return t("chat:browser.actions.scrollUp") - case "close": - return t("chat:browser.actions.close") - default: - return action - } - } - return ( -
-
-
- - {t("chat:browser.actions.title")} - {getBrowserActionText(action, coordinate, text)} - -
-
-
- ) -} - const BrowserCursor: React.FC<{ style?: React.CSSProperties }> = ({ style }) => { const { t } = useTranslation() // (can't use svgs in vsc extensions) diff --git a/webview-ui/src/components/chat/BrowserSessionStatusRow.tsx b/webview-ui/src/components/chat/BrowserSessionStatusRow.tsx new file mode 100644 index 00000000000..862dc80a62f --- /dev/null +++ b/webview-ui/src/components/chat/BrowserSessionStatusRow.tsx @@ -0,0 +1,34 @@ +import { memo } from "react" +import { Globe } from "lucide-react" +import { ClineMessage } from "@roo-code/types" + +interface BrowserSessionStatusRowProps { + message: ClineMessage +} + +const BrowserSessionStatusRow = memo(({ message }: BrowserSessionStatusRowProps) => { + const isOpened = message.text?.includes("opened") + + return ( +
+ + + {message.text} + +
+ ) +}) + +BrowserSessionStatusRow.displayName = "BrowserSessionStatusRow" + +export default BrowserSessionStatusRow diff --git a/webview-ui/src/components/chat/ChatRow.tsx b/webview-ui/src/components/chat/ChatRow.tsx index aaaaaae09d7..f2b1dc08728 100644 --- a/webview-ui/src/components/chat/ChatRow.tsx +++ b/webview-ui/src/components/chat/ChatRow.tsx @@ -160,6 +160,7 @@ export const ChatRowContent = ({ onSuggestionClick, onFollowUpUnmount, onBatchFileResponse, + editable, isFollowUpAnswered, }: ChatRowContentProps) => { const { t } = useTranslation() @@ -536,11 +537,24 @@ export const ChatRowContent = ({ } case "updateTodoList" as any: { const todos = (tool as any).todos || [] - // Get previous todos from the latest todos in the task context const previousTodos = getPreviousTodos(clineMessages, message.ts) - return + return ( + <> + + { + if (typeof vscode !== "undefined" && vscode?.postMessage) { + vscode.postMessage({ type: "updateTodoList", payload: { todos: updatedTodos } }) + } + }} + editable={!!(editable && isLast)} + /> + + ) } case "newFileCreated": return ( @@ -1381,6 +1395,10 @@ export const ChatRowContent = ({ ) + case "browser_action": + case "browser_action_result": + // Handled by BrowserSessionRow; prevent raw JSON (action/result) from rendering here + return null default: return ( <> diff --git a/webview-ui/src/components/chat/ChatTextArea.tsx b/webview-ui/src/components/chat/ChatTextArea.tsx index 0b8c89388c9..58f42a367bc 100644 --- a/webview-ui/src/components/chat/ChatTextArea.tsx +++ b/webview-ui/src/components/chat/ChatTextArea.tsx @@ -51,6 +51,9 @@ interface ChatTextAreaProps { // Edit mode props isEditMode?: boolean onCancel?: () => void + // Browser session status + isBrowserSessionActive?: boolean + showBrowserDockToggle?: boolean } export const ChatTextArea = forwardRef( @@ -71,6 +74,8 @@ export const ChatTextArea = forwardRef( modeShortcutText, isEditMode = false, onCancel, + isBrowserSessionActive = false, + showBrowserDockToggle = false, }, ref, ) => { @@ -1236,7 +1241,7 @@ export const ChatTextArea = forwardRef(
{isTtsPlaying && ( @@ -1261,6 +1266,12 @@ export const ChatTextArea = forwardRef( )} {!isEditMode ? : null} {!isEditMode && cloudUserInfo && } + {/* keep props referenced after moving browser button */} +
diff --git a/webview-ui/src/components/chat/ChatView.tsx b/webview-ui/src/components/chat/ChatView.tsx index 9adf603ee4b..e09cdc557a3 100644 --- a/webview-ui/src/components/chat/ChatView.tsx +++ b/webview-ui/src/components/chat/ChatView.tsx @@ -1,5 +1,5 @@ import React, { forwardRef, useCallback, useEffect, useImperativeHandle, useMemo, useRef, useState } from "react" -import { useDeepCompareEffect, useEvent, useMount } from "react-use" +import { useDeepCompareEffect, useEvent } from "react-use" import debounce from "debounce" import { Virtuoso, type VirtuosoHandle } from "react-virtuoso" import removeMd from "remove-markdown" @@ -13,7 +13,7 @@ import { appendImages } from "@src/utils/imageUtils" import type { ClineAsk, ClineMessage } from "@roo-code/types" -import { ClineSayBrowserAction, ClineSayTool, ExtensionMessage } from "@roo/ExtensionMessage" +import { ClineSayTool, ExtensionMessage } from "@roo/ExtensionMessage" import { findLast } from "@roo/array" import { SuggestionItem } from "@roo-code/types" import { combineApiRequests } from "@roo/combineApiRequests" @@ -37,7 +37,8 @@ import TelemetryBanner from "../common/TelemetryBanner" import VersionIndicator from "../common/VersionIndicator" import HistoryPreview from "../history/HistoryPreview" import Announcement from "./Announcement" -import BrowserSessionRow from "./BrowserSessionRow" +import BrowserActionRow from "./BrowserActionRow" +import BrowserSessionStatusRow from "./BrowserSessionStatusRow" import ChatRow from "./ChatRow" import { ChatTextArea } from "./ChatTextArea" import TaskHeader from "./TaskHeader" @@ -95,6 +96,7 @@ const ChatViewComponent: React.ForwardRefRenderFunction textAreaRef.current?.focus()) - const visibleMessages = useMemo(() => { // Pre-compute checkpoint hashes that have associated user messages for O(1) lookup const userMessageCheckpointHashes = new Set() @@ -965,97 +964,54 @@ const ChatViewComponent: React.ForwardRefRenderFunction { - // Which of visible messages are browser session messages, see above. - if (message.type === "ask") { - return ["browser_action_launch"].includes(message.ask!) - } - - if (message.type === "say") { - return ["api_req_started", "text", "browser_action", "browser_action_result"].includes(message.say!) - } - - return false - } - - const groupedMessages = useMemo(() => { - const result: (ClineMessage | ClineMessage[])[] = [] - let currentGroup: ClineMessage[] = [] - let isInBrowserSession = false - - const endBrowserSession = () => { - if (currentGroup.length > 0) { - result.push([...currentGroup]) - currentGroup = [] - isInBrowserSession = false + // Compute current browser session messages for the top banner (not grouped into chat stream) + // Find the FIRST browser session from the beginning to show ALL sessions + const browserSessionStartIndex = useMemo(() => { + for (let i = 0; i < messages.length; i++) { + if (messages[i].ask === "browser_action_launch") { + return i } } + return -1 + }, [messages]) - visibleMessages.forEach((message: ClineMessage) => { - if (message.ask === "browser_action_launch") { - // Complete existing browser session if any. - endBrowserSession() - // Start new. - isInBrowserSession = true - currentGroup.push(message) - } else if (isInBrowserSession) { - // End session if `api_req_started` is cancelled. - - if (message.say === "api_req_started") { - // Get last `api_req_started` in currentGroup to check if - // it's cancelled. If it is then this api req is not part - // of the current browser session. - const lastApiReqStarted = [...currentGroup].reverse().find((m) => m.say === "api_req_started") - - if (lastApiReqStarted?.text !== null && lastApiReqStarted?.text !== undefined) { - const info = JSON.parse(lastApiReqStarted.text) - const isCancelled = info.cancelReason !== null && info.cancelReason !== undefined - - if (isCancelled) { - endBrowserSession() - result.push(message) - return - } - } - } - - if (isBrowserSessionMessage(message)) { - currentGroup.push(message) + const _browserSessionMessages = useMemo(() => { + if (browserSessionStartIndex === -1) return [] + return messages.slice(browserSessionStartIndex) + }, [browserSessionStartIndex, messages]) - // Check if this is a close action - if (message.say === "browser_action") { - const browserAction = JSON.parse(message.text || "{}") as ClineSayBrowserAction - if (browserAction.action === "close") { - endBrowserSession() - } - } - } else { - // complete existing browser session if any - endBrowserSession() - result.push(message) - } - } else { - result.push(message) - } - }) + // Show globe toggle only when in a task that has a browser session (active or inactive) + const showBrowserDockToggle = useMemo( + () => Boolean(task && (browserSessionStartIndex !== -1 || isBrowserSessionActive)), + [task, browserSessionStartIndex, isBrowserSessionActive], + ) - // Handle case where browser session is the last group - if (currentGroup.length > 0) { - result.push([...currentGroup]) + const isBrowserSessionMessage = useCallback((message: ClineMessage): boolean => { + // Only the launch ask should be hidden from chat (it's shown in the drawer header) + if (message.type === "ask" && message.ask === "browser_action_launch") { + return true } + // browser_action_result messages are paired with browser_action and should not appear independently + if (message.type === "say" && message.say === "browser_action_result") { + return true + } + return false + }, []) + + const groupedMessages = useMemo(() => { + // Only filter out the launch ask and result messages - browser actions appear in chat + const result: ClineMessage[] = visibleMessages.filter((msg) => !isBrowserSessionMessage(msg)) if (isCondensing) { - // Show indicator after clicking condense button result.push({ type: "say", say: "condense_context", ts: Date.now(), partial: true, - }) + } as any) } - return result - }, [isCondensing, visibleMessages]) + }, [isCondensing, visibleMessages, isBrowserSessionMessage]) // scrolling @@ -1204,34 +1160,37 @@ const ChatViewComponent: React.ForwardRefRenderFunction { - // Mark that user has responded - userRespondedRef.current = true - }, []) - const itemContent = useCallback( - (index: number, messageOrGroup: ClineMessage | ClineMessage[]) => { - // browser session group - if (Array.isArray(messageOrGroup)) { + (index: number, messageOrGroup: ClineMessage) => { + const hasCheckpoint = modifiedMessages.some((message) => message.say === "checkpoint_saved") + + // Check if this is a browser action message + if (messageOrGroup.type === "say" && messageOrGroup.say === "browser_action") { + // Find the corresponding result message by looking for the next browser_action_result after this action's timestamp + const nextMessage = modifiedMessages.find( + (m) => m.ts > messageOrGroup.ts && m.say === "browser_action_result", + ) + + // Calculate action index and total count + const browserActions = modifiedMessages.filter((m) => m.say === "browser_action") + const actionIndex = browserActions.findIndex((m) => m.ts === messageOrGroup.ts) + 1 + const totalActions = browserActions.length + return ( - expandedRows[messageTs] ?? false} - onToggleExpand={(messageTs: number) => { - setExpandedRows((prev: Record) => ({ - ...prev, - [messageTs]: !prev[messageTs], - })) - }} + ) } - const hasCheckpoint = modifiedMessages.some((message) => message.say === "checkpoint_saved") + + // Check if this is a browser session status message + if (messageOrGroup.type === "say" && messageOrGroup.say === "browser_session_status") { + return + } // regular message return ( @@ -1246,7 +1205,6 @@ const ChatViewComponent: React.ForwardRefRenderFunction -
- { - setIsAtBottom(isAtBottom) - if (isAtBottom) { - disableAutoScrollRef.current = false - } - setShowScrollToBottom(disableAutoScrollRef.current && !isAtBottom) - }} - atBottomThreshold={10} - initialTopMostItemIndex={groupedMessages.length - 1} - /> +
+
+ { + setIsAtBottom(isAtBottom) + if (isAtBottom) { + disableAutoScrollRef.current = false + } + setShowScrollToBottom(disableAutoScrollRef.current && !isAtBottom) + }} + atBottomThreshold={10} + initialTopMostItemIndex={groupedMessages.length - 1} + /> +
{areButtonsVisible && (
{isProfileDisabled && ( diff --git a/webview-ui/src/components/chat/TaskHeader.tsx b/webview-ui/src/components/chat/TaskHeader.tsx index b06d6e64f5f..de499b9aade 100644 --- a/webview-ui/src/components/chat/TaskHeader.tsx +++ b/webview-ui/src/components/chat/TaskHeader.tsx @@ -1,4 +1,4 @@ -import { memo, useEffect, useRef, useState } from "react" +import { memo, useEffect, useRef, useState, useMemo } from "react" import { useTranslation } from "react-i18next" import { useCloudUpsell } from "@src/hooks/useCloudUpsell" import { CloudUpsellDialog } from "@src/components/cloud/CloudUpsellDialog" @@ -10,7 +10,8 @@ import { Coins, HardDriveDownload, HardDriveUpload, - FoldVerticalIcon, + FoldVertical, + Globe, } from "lucide-react" import prettyBytes from "pretty-bytes" @@ -21,9 +22,10 @@ import { findLastIndex } from "@roo/array" import { formatLargeNumber } from "@src/utils/format" import { cn } from "@src/lib/utils" -import { StandardTooltip } from "@src/components/ui" +import { StandardTooltip, Button } from "@src/components/ui" import { useExtensionState } from "@src/context/ExtensionStateContext" import { useSelectedModel } from "@/components/ui/hooks/useSelectedModel" +import { vscode } from "@src/utils/vscode" import Thumbnails from "../common/Thumbnails" @@ -59,7 +61,7 @@ const TaskHeader = ({ todos, }: TaskHeaderProps) => { const { t } = useTranslation() - const { apiConfiguration, currentTaskItem, clineMessages } = useExtensionState() + const { apiConfiguration, currentTaskItem, clineMessages, isBrowserSessionActive } = useExtensionState() const { id: modelId, info: model } = useSelectedModel(apiConfiguration) const [isTaskExpanded, setIsTaskExpanded] = useState(false) const [showLongRunningTaskMessage, setShowLongRunningTaskMessage] = useState(false) @@ -95,10 +97,22 @@ const TaskHeader = ({ const textRef = useRef(null) const contextWindow = model?.contextWindow || 1 + // Detect if this task had any browser session activity so we can show a grey globe when inactive + const browserSessionStartIndex = useMemo(() => { + const msgs = clineMessages || [] + for (let i = 0; i < msgs.length; i++) { + const m = msgs[i] as any + if (m?.ask === "browser_action_launch") return i + } + return -1 + }, [clineMessages]) + + const showBrowserGlobe = browserSessionStartIndex !== -1 || !!isBrowserSessionActive + const condenseButton = ( currentTaskItem && handleCondenseContext(currentTaskItem.id)} /> @@ -182,53 +196,93 @@ const TaskHeader = ({
{!isTaskExpanded && contextWindow > 0 && (
e.stopPropagation()}> - - -
- {t("chat:tokenProgress.tokensUsed", { - used: formatLargeNumber(contextTokens || 0), - total: formatLargeNumber(contextWindow), - })} -
- {(() => { - const maxTokens = model - ? getModelMaxOutputTokens({ modelId, model, settings: apiConfiguration }) - : 0 - const reservedForOutput = maxTokens || 0 - const availableSpace = contextWindow - (contextTokens || 0) - reservedForOutput +
+ + +
+ {t("chat:tokenProgress.tokensUsed", { + used: formatLargeNumber(contextTokens || 0), + total: formatLargeNumber(contextWindow), + })} +
+ {(() => { + const maxTokens = model + ? getModelMaxOutputTokens({ + modelId, + model, + settings: apiConfiguration, + }) + : 0 + const reservedForOutput = maxTokens || 0 + const availableSpace = + contextWindow - (contextTokens || 0) - reservedForOutput - return ( - <> - {reservedForOutput > 0 && ( -
- {t("chat:tokenProgress.reservedForResponse", { - amount: formatLargeNumber(reservedForOutput), - })} -
- )} - {availableSpace > 0 && ( -
- {t("chat:tokenProgress.availableSpace", { - amount: formatLargeNumber(availableSpace), - })} -
- )} - - ) - })()} -
- } - side="top" - sideOffset={8}> - - {formatLargeNumber(contextTokens || 0)} / {formatLargeNumber(contextWindow)} - -
- {!!totalCost && ${totalCost.toFixed(2)}} + return ( + <> + {reservedForOutput > 0 && ( +
+ {t("chat:tokenProgress.reservedForResponse", { + amount: formatLargeNumber(reservedForOutput), + })} +
+ )} + {availableSpace > 0 && ( +
+ {t("chat:tokenProgress.availableSpace", { + amount: formatLargeNumber(availableSpace), + })} +
+ )} + + ) + })()} +
+ } + side="top" + sideOffset={8}> + + {formatLargeNumber(contextTokens || 0)} / {formatLargeNumber(contextWindow)} + + + {!!totalCost && ${totalCost.toFixed(2)}} +
+ {showBrowserGlobe && ( +
e.stopPropagation()}> + + + + {isBrowserSessionActive && ( + + Active + + )} +
+ )} )} {/* Expanded state: Show task text and images */} diff --git a/webview-ui/src/components/chat/__tests__/BrowserSessionRow.aspect-ratio.spec.tsx b/webview-ui/src/components/chat/__tests__/BrowserSessionRow.aspect-ratio.spec.tsx new file mode 100644 index 00000000000..87465862032 --- /dev/null +++ b/webview-ui/src/components/chat/__tests__/BrowserSessionRow.aspect-ratio.spec.tsx @@ -0,0 +1,55 @@ +import { render, screen, fireEvent } from "@testing-library/react" +import React from "react" +import BrowserSessionRow from "../BrowserSessionRow" +import { ExtensionStateContext } from "@src/context/ExtensionStateContext" +import { TooltipProvider } from "@src/components/ui/tooltip" + +describe("BrowserSessionRow - screenshot area", () => { + const renderRow = (messages: any[]) => { + const mockExtState: any = { + // Ensure known viewport so expected aspect ratio is deterministic (600/900 = 66.67%) + browserViewportSize: "900x600", + isBrowserSessionActive: false, + } + + return render( + + + true} + onToggleExpand={() => {}} + lastModifiedMessage={undefined as any} + isLast={true} + onHeightChange={() => {}} + isStreaming={false} + /> + + , + ) + } + + it("reserves height while screenshot is loading (no layout collapse)", () => { + // Only a launch action, no corresponding browser_action_result yet (no screenshot) + const messages = [ + { + ts: 1, + say: "browser_action", + text: JSON.stringify({ action: "launch", url: "http://localhost:3000" }), + }, + ] + + renderRow(messages) + + // Open the browser session drawer + const globe = screen.getByLabelText("Browser interaction") + fireEvent.click(globe) + + const container = screen.getByTestId("screenshot-container") as HTMLDivElement + // padding-bottom should reflect aspect ratio (600/900 * 100) even without an image + const pb = parseFloat(container.style.paddingBottom || "0") + expect(pb).toBeGreaterThan(0) + // Be tolerant of rounding + expect(Math.round(pb)).toBe(67) + }) +}) diff --git a/webview-ui/src/components/chat/__tests__/BrowserSessionRow.disconnect-button.spec.tsx b/webview-ui/src/components/chat/__tests__/BrowserSessionRow.disconnect-button.spec.tsx new file mode 100644 index 00000000000..0c2b4762c4e --- /dev/null +++ b/webview-ui/src/components/chat/__tests__/BrowserSessionRow.disconnect-button.spec.tsx @@ -0,0 +1,42 @@ +import React from "react" +import { render, screen } from "@testing-library/react" +import BrowserSessionRow from "../BrowserSessionRow" +import { ExtensionStateContext } from "@src/context/ExtensionStateContext" +import { TooltipProvider } from "@radix-ui/react-tooltip" + +describe("BrowserSessionRow - Disconnect session button", () => { + const renderRow = (isActive: boolean) => { + const mockExtState: any = { + browserViewportSize: "900x600", + isBrowserSessionActive: isActive, + } + + return render( + + + false} + onToggleExpand={() => {}} + lastModifiedMessage={undefined as any} + isLast={true} + onHeightChange={() => {}} + isStreaming={false} + /> + + , + ) + } + + it("shows the Disconnect session button when a session is active", () => { + renderRow(true) + const btn = screen.getByLabelText("Disconnect session") + expect(btn).toBeInTheDocument() + }) + + it("does not render the button when no session is active", () => { + renderRow(false) + const btn = screen.queryByLabelText("Disconnect session") + expect(btn).toBeNull() + }) +}) diff --git a/webview-ui/src/components/chat/__tests__/BrowserSessionRow.spec.tsx b/webview-ui/src/components/chat/__tests__/BrowserSessionRow.spec.tsx new file mode 100644 index 00000000000..684145f2556 --- /dev/null +++ b/webview-ui/src/components/chat/__tests__/BrowserSessionRow.spec.tsx @@ -0,0 +1,126 @@ +import React from "react" +import { describe, it, expect, vi } from "vitest" +import { render, screen } from "@testing-library/react" + +import BrowserSessionRow from "../BrowserSessionRow" + +// Mock ExtensionStateContext so BrowserSessionRow falls back to props +vi.mock("@src/context/ExtensionStateContext", () => ({ + useExtensionState: () => { + throw new Error("No ExtensionStateContext in test environment") + }, +})) + +// Simplify i18n usage and provide initReactI18next for i18n setup +vi.mock("react-i18next", () => ({ + useTranslation: () => ({ + t: (key: string) => key, + }), + initReactI18next: { + type: "3rdParty", + init: () => {}, + }, +})) + +// Replace ProgressIndicator with a simple test marker +vi.mock("../ProgressIndicator", () => ({ + ProgressIndicator: () =>
, +})) + +const baseProps = { + isExpanded: () => false, + onToggleExpand: () => {}, + lastModifiedMessage: undefined, + isLast: true, + onHeightChange: () => {}, + isStreaming: false, +} + +describe("BrowserSessionRow - action spinner", () => { + it("does not show spinner when there are no browser actions", () => { + const messages = [ + { + type: "say", + say: "task", + ts: 1, + text: "Task started", + } as any, + ] + + render() + + expect(screen.queryByTestId("browser-session-spinner")).toBeNull() + }) + + it("shows spinner while the latest browser action is still running", () => { + const messages = [ + { + type: "say", + say: "task", + ts: 1, + text: "Task started", + } as any, + { + type: "say", + say: "browser_action", + ts: 2, + text: JSON.stringify({ action: "click" }), + } as any, + { + type: "say", + say: "browser_action_result", + ts: 3, + text: JSON.stringify({ currentUrl: "https://example.com" }), + } as any, + { + type: "say", + say: "browser_action", + ts: 4, + text: JSON.stringify({ action: "scroll_down" }), + } as any, + ] + + render() + + expect(screen.getByTestId("browser-session-spinner")).toBeInTheDocument() + }) + + it("hides spinner once the latest browser action has a result", () => { + const messages = [ + { + type: "say", + say: "task", + ts: 1, + text: "Task started", + } as any, + { + type: "say", + say: "browser_action", + ts: 2, + text: JSON.stringify({ action: "click" }), + } as any, + { + type: "say", + say: "browser_action_result", + ts: 3, + text: JSON.stringify({ currentUrl: "https://example.com" }), + } as any, + { + type: "say", + say: "browser_action", + ts: 4, + text: JSON.stringify({ action: "scroll_down" }), + } as any, + { + type: "say", + say: "browser_action_result", + ts: 5, + text: JSON.stringify({ currentUrl: "https://example.com/page2" }), + } as any, + ] + + render() + + expect(screen.queryByTestId("browser-session-spinner")).toBeNull() + }) +}) diff --git a/webview-ui/src/context/ExtensionStateContext.tsx b/webview-ui/src/context/ExtensionStateContext.tsx index 6443ccad93d..4bc03e259c7 100644 --- a/webview-ui/src/context/ExtensionStateContext.tsx +++ b/webview-ui/src/context/ExtensionStateContext.tsx @@ -200,6 +200,7 @@ export const ExtensionStateContextProvider: React.FC<{ children: React.ReactNode deniedCommands: [], soundEnabled: false, soundVolume: 0.5, + isBrowserSessionActive: false, ttsEnabled: false, ttsSpeed: 1.0, diffEnabled: false, diff --git a/webview-ui/src/context/__tests__/ExtensionStateContext.spec.tsx b/webview-ui/src/context/__tests__/ExtensionStateContext.spec.tsx index a0bdc51e75e..28899e342a0 100644 --- a/webview-ui/src/context/__tests__/ExtensionStateContext.spec.tsx +++ b/webview-ui/src/context/__tests__/ExtensionStateContext.spec.tsx @@ -214,6 +214,7 @@ describe("mergeExtensionState", () => { remoteControlEnabled: false, taskSyncEnabled: false, featureRoomoteControlEnabled: false, + isBrowserSessionActive: false, checkpointTimeout: DEFAULT_CHECKPOINT_TIMEOUT_SECONDS, // Add the checkpoint timeout property } diff --git a/webview-ui/src/i18n/locales/ca/chat.json b/webview-ui/src/i18n/locales/ca/chat.json index 9a7ea4d5404..b81362db405 100644 --- a/webview-ui/src/i18n/locales/ca/chat.json +++ b/webview-ui/src/i18n/locales/ca/chat.json @@ -306,6 +306,7 @@ "socialLinks": "Uneix-te a nosaltres a X, Discord, o r/RooCode 🚀" }, "browser": { + "session": "Sessió del navegador", "rooWantsToUse": "Roo vol utilitzar el navegador", "consoleLogs": "Registres de consola", "noNewLogs": "(Cap registre nou)", @@ -318,12 +319,14 @@ }, "sessionStarted": "Sessió de navegador iniciada", "actions": { - "title": "Acció de navegació: ", + "title": "Acció del navegador: ", "launch": "Iniciar navegador a {{url}}", "click": "Clic ({{coordinate}})", "type": "Escriure \"{{text}}\"", + "press": "Prem {{key}}", "scrollDown": "Desplaçar avall", "scrollUp": "Desplaçar amunt", + "hover": "Plana sobre ({{coordinate}})", "close": "Tancar navegador" } }, diff --git a/webview-ui/src/i18n/locales/de/chat.json b/webview-ui/src/i18n/locales/de/chat.json index 95412e0fdb5..6d87fa600be 100644 --- a/webview-ui/src/i18n/locales/de/chat.json +++ b/webview-ui/src/i18n/locales/de/chat.json @@ -306,6 +306,7 @@ "socialLinks": "Folge uns auf X, Discord oder r/RooCode 🚀" }, "browser": { + "session": "Browser-Sitzung", "rooWantsToUse": "Roo möchte den Browser verwenden", "consoleLogs": "Konsolenprotokolle", "noNewLogs": "(Keine neuen Protokolle)", @@ -322,8 +323,10 @@ "launch": "Browser starten auf {{url}}", "click": "Klicken ({{coordinate}})", "type": "Eingeben \"{{text}}\"", + "press": "{{key}} drücken", "scrollDown": "Nach unten scrollen", "scrollUp": "Nach oben scrollen", + "hover": "Hover ({{coordinate}})", "close": "Browser schließen" } }, diff --git a/webview-ui/src/i18n/locales/en/chat.json b/webview-ui/src/i18n/locales/en/chat.json index 68ec8ebb2fe..83966d6c4dc 100644 --- a/webview-ui/src/i18n/locales/en/chat.json +++ b/webview-ui/src/i18n/locales/en/chat.json @@ -321,6 +321,7 @@ "countdownDisplay": "{{count}}s" }, "browser": { + "session": "Browser Session", "rooWantsToUse": "Roo wants to use the browser", "consoleLogs": "Console Logs", "noNewLogs": "(No new logs)", @@ -333,12 +334,14 @@ }, "sessionStarted": "Browser Session Started", "actions": { - "title": "Browse Action: ", + "title": "Browser Action: ", "launch": "Launch browser at {{url}}", "click": "Click ({{coordinate}})", "type": "Type \"{{text}}\"", + "press": "Press {{key}}", "scrollDown": "Scroll down", "scrollUp": "Scroll up", + "hover": "Hover ({{coordinate}})", "close": "Close browser" } }, diff --git a/webview-ui/src/i18n/locales/es/chat.json b/webview-ui/src/i18n/locales/es/chat.json index 7accb141f6e..1d1db93cda7 100644 --- a/webview-ui/src/i18n/locales/es/chat.json +++ b/webview-ui/src/i18n/locales/es/chat.json @@ -306,6 +306,7 @@ "socialLinks": "Únete a nosotros en X, Discord, o r/RooCode 🚀" }, "browser": { + "session": "Sesión del navegador", "rooWantsToUse": "Roo quiere usar el navegador", "consoleLogs": "Registros de la consola", "noNewLogs": "(No hay nuevos registros)", @@ -318,12 +319,14 @@ }, "sessionStarted": "Sesión de navegador iniciada", "actions": { - "title": "Acción de navegación: ", + "title": "Acción del navegador: ", "launch": "Iniciar navegador en {{url}}", "click": "Clic ({{coordinate}})", "type": "Escribir \"{{text}}\"", + "press": "Pulsar {{key}}", "scrollDown": "Desplazar hacia abajo", "scrollUp": "Desplazar hacia arriba", + "hover": "Flotar ({{coordinate}})", "close": "Cerrar navegador" } }, diff --git a/webview-ui/src/i18n/locales/fr/chat.json b/webview-ui/src/i18n/locales/fr/chat.json index aab98f2ce69..e710ce8fb6a 100644 --- a/webview-ui/src/i18n/locales/fr/chat.json +++ b/webview-ui/src/i18n/locales/fr/chat.json @@ -306,6 +306,7 @@ "socialLinks": "Rejoins-nous sur X, Discord, ou r/RooCode 🚀" }, "browser": { + "session": "Session du navigateur", "rooWantsToUse": "Roo veut utiliser le navigateur", "consoleLogs": "Journaux de console", "noNewLogs": "(Pas de nouveaux journaux)", @@ -318,12 +319,14 @@ }, "sessionStarted": "Session de navigateur démarrée", "actions": { - "title": "Action de navigation : ", + "title": "Action du navigateur : ", "launch": "Lancer le navigateur sur {{url}}", "click": "Cliquer ({{coordinate}})", "type": "Saisir \"{{text}}\"", + "press": "Appuyer sur {{key}}", "scrollDown": "Défiler vers le bas", "scrollUp": "Défiler vers le haut", + "hover": "Survoler ({{coordinate}})", "close": "Fermer le navigateur" } }, diff --git a/webview-ui/src/i18n/locales/hi/chat.json b/webview-ui/src/i18n/locales/hi/chat.json index 3ca7516d725..09cc68cf975 100644 --- a/webview-ui/src/i18n/locales/hi/chat.json +++ b/webview-ui/src/i18n/locales/hi/chat.json @@ -306,6 +306,7 @@ "socialLinks": "X, Discord, या r/RooCode पर हमसे जुड़ें 🚀" }, "browser": { + "session": "ब्राउज़र सत्र", "rooWantsToUse": "Roo ब्राउज़र का उपयोग करना चाहता है", "consoleLogs": "कंसोल लॉग", "noNewLogs": "(कोई नया लॉग नहीं)", @@ -322,8 +323,10 @@ "launch": "{{url}} पर ब्राउज़र लॉन्च करें", "click": "क्लिक करें ({{coordinate}})", "type": "टाइप करें \"{{text}}\"", + "press": "{{key}} दबाएँ", "scrollDown": "नीचे स्क्रॉल करें", "scrollUp": "ऊपर स्क्रॉल करें", + "hover": "होवर करें ({{coordinate}})", "close": "ब्राउज़र बंद करें" } }, diff --git a/webview-ui/src/i18n/locales/id/chat.json b/webview-ui/src/i18n/locales/id/chat.json index e1836e61be1..c5b6dbc66c3 100644 --- a/webview-ui/src/i18n/locales/id/chat.json +++ b/webview-ui/src/i18n/locales/id/chat.json @@ -327,6 +327,7 @@ "countdownDisplay": "{{count}}dtk" }, "browser": { + "session": "Sesi Browser", "rooWantsToUse": "Roo ingin menggunakan browser", "consoleLogs": "Log Konsol", "noNewLogs": "(Tidak ada log baru)", @@ -339,12 +340,14 @@ }, "sessionStarted": "Sesi Browser Dimulai", "actions": { - "title": "Aksi Browse: ", + "title": "Aksi Browser: ", "launch": "Luncurkan browser di {{url}}", "click": "Klik ({{coordinate}})", "type": "Ketik \"{{text}}\"", + "press": "Tekan {{key}}", "scrollDown": "Gulir ke bawah", "scrollUp": "Gulir ke atas", + "hover": "Arahkan ({{coordinate}})", "close": "Tutup browser" } }, diff --git a/webview-ui/src/i18n/locales/it/chat.json b/webview-ui/src/i18n/locales/it/chat.json index 8c5544c9717..b45fa19d756 100644 --- a/webview-ui/src/i18n/locales/it/chat.json +++ b/webview-ui/src/i18n/locales/it/chat.json @@ -306,6 +306,7 @@ "socialLinks": "Unisciti a noi su X, Discord, o r/RooCode 🚀" }, "browser": { + "session": "Sessione del browser", "rooWantsToUse": "Roo vuole utilizzare il browser", "consoleLogs": "Log della console", "noNewLogs": "(Nessun nuovo log)", @@ -322,8 +323,10 @@ "launch": "Avvia browser su {{url}}", "click": "Clic ({{coordinate}})", "type": "Digita \"{{text}}\"", + "press": "Premi {{key}}", "scrollDown": "Scorri verso il basso", "scrollUp": "Scorri verso l'alto", + "hover": "Passa il mouse ({{coordinate}})", "close": "Chiudi browser" } }, diff --git a/webview-ui/src/i18n/locales/ja/chat.json b/webview-ui/src/i18n/locales/ja/chat.json index 4f6e40cadf7..45677c719f7 100644 --- a/webview-ui/src/i18n/locales/ja/chat.json +++ b/webview-ui/src/i18n/locales/ja/chat.json @@ -306,6 +306,7 @@ "socialLinks": "XDiscord、またはr/RooCodeでフォローしてください 🚀" }, "browser": { + "session": "ブラウザセッション", "rooWantsToUse": "Rooはブラウザを使用したい", "consoleLogs": "コンソールログ", "noNewLogs": "(新しいログはありません)", @@ -318,12 +319,14 @@ }, "sessionStarted": "ブラウザセッション開始", "actions": { - "title": "ブラウザアクション: ", + "title": "ブラウザ操作: ", "launch": "{{url}} でブラウザを起動", "click": "クリック ({{coordinate}})", "type": "入力 \"{{text}}\"", + "press": "{{key}}を押す", "scrollDown": "下にスクロール", "scrollUp": "上にスクロール", + "hover": "ホバー ({{coordinate}})", "close": "ブラウザを閉じる" } }, diff --git a/webview-ui/src/i18n/locales/ko/chat.json b/webview-ui/src/i18n/locales/ko/chat.json index 7d71191b017..430b9ccf743 100644 --- a/webview-ui/src/i18n/locales/ko/chat.json +++ b/webview-ui/src/i18n/locales/ko/chat.json @@ -306,6 +306,7 @@ "socialLinks": "X, Discord, 또는 r/RooCode에서 만나요 🚀" }, "browser": { + "session": "브라우저 세션", "rooWantsToUse": "Roo가 브라우저를 사용하고 싶어합니다", "consoleLogs": "콘솔 로그", "noNewLogs": "(새 로그 없음)", @@ -322,8 +323,10 @@ "launch": "{{url}}에서 브라우저 실행", "click": "클릭 ({{coordinate}})", "type": "입력 \"{{text}}\"", + "press": "{{key}} 누르기", "scrollDown": "아래로 스크롤", "scrollUp": "위로 스크롤", + "hover": "가리키기 ({{coordinate}})", "close": "브라우저 닫기" } }, diff --git a/webview-ui/src/i18n/locales/nl/chat.json b/webview-ui/src/i18n/locales/nl/chat.json index 82cdd0c46f9..86622dc7fa7 100644 --- a/webview-ui/src/i18n/locales/nl/chat.json +++ b/webview-ui/src/i18n/locales/nl/chat.json @@ -306,6 +306,7 @@ "countdownDisplay": "{{count}}s" }, "browser": { + "session": "Browsersessie", "rooWantsToUse": "Roo wil de browser gebruiken", "consoleLogs": "Console-logboeken", "noNewLogs": "(Geen nieuwe logboeken)", @@ -318,12 +319,14 @@ }, "sessionStarted": "Browsersessie gestart", "actions": { - "title": "Browse-actie: ", + "title": "Browseractie: ", "launch": "Browser starten op {{url}}", "click": "Klik ({{coordinate}})", "type": "Typ \"{{text}}\"", + "press": "Druk op {{key}}", "scrollDown": "Scroll naar beneden", "scrollUp": "Scroll naar boven", + "hover": "Zweven ({{coordinate}})", "close": "Browser sluiten" } }, diff --git a/webview-ui/src/i18n/locales/pl/chat.json b/webview-ui/src/i18n/locales/pl/chat.json index 7fe61db2a01..f5820122210 100644 --- a/webview-ui/src/i18n/locales/pl/chat.json +++ b/webview-ui/src/i18n/locales/pl/chat.json @@ -306,6 +306,7 @@ "socialLinks": "Dołącz do nas na X, Discord, lub r/RooCode 🚀" }, "browser": { + "session": "Sesja przeglądarki", "rooWantsToUse": "Roo chce użyć przeglądarki", "consoleLogs": "Logi konsoli", "noNewLogs": "(Brak nowych logów)", @@ -322,8 +323,10 @@ "launch": "Uruchom przeglądarkę na {{url}}", "click": "Kliknij ({{coordinate}})", "type": "Wpisz \"{{text}}\"", + "press": "Naciśnij {{key}}", "scrollDown": "Przewiń w dół", "scrollUp": "Przewiń w górę", + "hover": "Najedź ({{coordinate}})", "close": "Zamknij przeglądarkę" } }, diff --git a/webview-ui/src/i18n/locales/pt-BR/chat.json b/webview-ui/src/i18n/locales/pt-BR/chat.json index 4f287e6b538..661c7507c7f 100644 --- a/webview-ui/src/i18n/locales/pt-BR/chat.json +++ b/webview-ui/src/i18n/locales/pt-BR/chat.json @@ -306,6 +306,7 @@ "socialLinks": "Junte-se a nós no X, Discord, ou r/RooCode 🚀" }, "browser": { + "session": "Sessão do Navegador", "rooWantsToUse": "Roo quer usar o navegador", "consoleLogs": "Logs do console", "noNewLogs": "(Sem novos logs)", @@ -322,8 +323,10 @@ "launch": "Iniciar navegador em {{url}}", "click": "Clique ({{coordinate}})", "type": "Digitar \"{{text}}\"", + "press": "Pressione {{key}}", "scrollDown": "Rolar para baixo", "scrollUp": "Rolar para cima", + "hover": "Pairar ({{coordinate}})", "close": "Fechar navegador" } }, diff --git a/webview-ui/src/i18n/locales/ru/chat.json b/webview-ui/src/i18n/locales/ru/chat.json index 6cd11c1223b..d903c3f2b9f 100644 --- a/webview-ui/src/i18n/locales/ru/chat.json +++ b/webview-ui/src/i18n/locales/ru/chat.json @@ -307,6 +307,7 @@ "countdownDisplay": "{{count}}с" }, "browser": { + "session": "Сеанс браузера", "rooWantsToUse": "Roo хочет использовать браузер", "consoleLogs": "Логи консоли", "noNewLogs": "(Новых логов нет)", @@ -319,12 +320,14 @@ }, "sessionStarted": "Сессия браузера запущена", "actions": { - "title": "Действие в браузере: ", + "title": "Действие браузера: ", "launch": "Открыть браузер по адресу {{url}}", "click": "Клик ({{coordinate}})", "type": "Ввести \"{{text}}\"", + "press": "Нажать {{key}}", "scrollDown": "Прокрутить вниз", "scrollUp": "Прокрутить вверх", + "hover": "Навести ({{coordinate}})", "close": "Закрыть браузер" } }, diff --git a/webview-ui/src/i18n/locales/tr/chat.json b/webview-ui/src/i18n/locales/tr/chat.json index 44b9c5ae35b..7d1e1234259 100644 --- a/webview-ui/src/i18n/locales/tr/chat.json +++ b/webview-ui/src/i18n/locales/tr/chat.json @@ -307,6 +307,7 @@ "socialLinks": "Bize X, Discord, veya r/RooCode'da katılın 🚀" }, "browser": { + "session": "Tarayıcı Oturumu", "rooWantsToUse": "Roo tarayıcıyı kullanmak istiyor", "consoleLogs": "Konsol Kayıtları", "noNewLogs": "(Yeni kayıt yok)", @@ -319,12 +320,14 @@ }, "sessionStarted": "Tarayıcı Oturumu Başlatıldı", "actions": { - "title": "Tarayıcı İşlemi: ", + "title": "Tarayıcı Eylemi: ", "launch": "{{url}} adresinde tarayıcı başlat", "click": "Tıkla ({{coordinate}})", "type": "Yaz \"{{text}}\"", + "press": "{{key}} tuşuna bas", "scrollDown": "Aşağı kaydır", "scrollUp": "Yukarı kaydır", + "hover": "Üzerine gel ({{coordinate}})", "close": "Tarayıcıyı kapat" } }, diff --git a/webview-ui/src/i18n/locales/vi/chat.json b/webview-ui/src/i18n/locales/vi/chat.json index 1359d48b593..2488ba9cb39 100644 --- a/webview-ui/src/i18n/locales/vi/chat.json +++ b/webview-ui/src/i18n/locales/vi/chat.json @@ -307,6 +307,7 @@ "socialLinks": "Tham gia với chúng tôi trên X, Discord, hoặc r/RooCode 🚀" }, "browser": { + "session": "Phiên trình duyệt", "rooWantsToUse": "Roo muốn sử dụng trình duyệt", "consoleLogs": "Nhật ký bảng điều khiển", "noNewLogs": "(Không có nhật ký mới)", @@ -323,8 +324,10 @@ "launch": "Khởi chạy trình duyệt tại {{url}}", "click": "Nhấp ({{coordinate}})", "type": "Gõ \"{{text}}\"", + "press": "Nhấn {{key}}", "scrollDown": "Cuộn xuống", "scrollUp": "Cuộn lên", + "hover": "Di chuột ({{coordinate}})", "close": "Đóng trình duyệt" } }, diff --git a/webview-ui/src/i18n/locales/zh-CN/chat.json b/webview-ui/src/i18n/locales/zh-CN/chat.json index fa35b68590f..a03048e5833 100644 --- a/webview-ui/src/i18n/locales/zh-CN/chat.json +++ b/webview-ui/src/i18n/locales/zh-CN/chat.json @@ -307,6 +307,7 @@ "socialLinks": "在 XDiscordr/RooCode 上关注我们 🚀" }, "browser": { + "session": "浏览器会话", "rooWantsToUse": "Roo想使用浏览器", "consoleLogs": "控制台日志", "noNewLogs": "(没有新日志)", @@ -323,8 +324,10 @@ "launch": "访问 {{url}}", "click": "点击 ({{coordinate}})", "type": "输入 \"{{text}}\"", + "press": "按 {{key}}", "scrollDown": "向下滚动", "scrollUp": "向上滚动", + "hover": "悬停 ({{coordinate}})", "close": "关闭浏览器" } }, diff --git a/webview-ui/src/i18n/locales/zh-TW/chat.json b/webview-ui/src/i18n/locales/zh-TW/chat.json index a0dee75d306..ff2860e5211 100644 --- a/webview-ui/src/i18n/locales/zh-TW/chat.json +++ b/webview-ui/src/i18n/locales/zh-TW/chat.json @@ -325,6 +325,7 @@ "countdownDisplay": "{{count}} 秒" }, "browser": { + "session": "瀏覽器會話", "rooWantsToUse": "Roo 想要使用瀏覽器", "consoleLogs": "主控台記錄", "noNewLogs": "(沒有新記錄)", @@ -337,12 +338,14 @@ }, "sessionStarted": "瀏覽器工作階段已啟動", "actions": { - "title": "瀏覽器動作", + "title": "瀏覽器動作:", "launch": "在 {{url}} 啟動瀏覽器", "click": "點選 ({{coordinate}})", "type": "輸入「{{text}}」", + "press": "按下 {{key}}", "scrollDown": "向下捲動", "scrollUp": "向上捲動", + "hover": "懸停 ({{coordinate}})", "close": "關閉瀏覽器" } }, diff --git a/webview-ui/vite.config.ts b/webview-ui/vite.config.ts index b38452a9902..6bf6412bfb0 100644 --- a/webview-ui/vite.config.ts +++ b/webview-ui/vite.config.ts @@ -101,6 +101,10 @@ export default defineConfig(({ mode }) => { // Ensure source maps are properly included in the build minify: mode === "production" ? "esbuild" : false, rollupOptions: { + input: { + index: resolve(__dirname, "index.html"), + "browser-panel": resolve(__dirname, "browser-panel.html"), + }, output: { entryFileNames: `assets/[name].js`, chunkFileNames: (chunkInfo) => {