RooCodeInc · mrubens · Dec 10, 2025 · Dec 9, 2025 · Dec 9, 2025 · Dec 10, 2025
@@ -23,11 +23,12 @@ export interface Size {
 }
 
 export interface BrowserActionParams {
-	action: "launch" | "click" | "hover" | "type" | "scroll_down" | "scroll_up" | "resize" | "close"
+	action: "launch" | "click" | "hover" | "type" | "scroll_down" | "scroll_up" | "resize" | "close" | "screenshot"
 	url?: string
 	coordinate?: Coordinate
 	size?: Size
 	text?: string
+	path?: string
 }
 
 export interface GenerateImageParams {

@@ -406,6 +406,7 @@ export class NativeToolCallParser {
 						coordinate: partialArgs.coordinate,
 						size: partialArgs.size,
 						text: partialArgs.text,
+						path: partialArgs.path,
 					}
 				}
 				break
@@ -645,6 +646,7 @@ export class NativeToolCallParser {
 							coordinate: args.coordinate,
 							size: args.size,
 							text: args.text,
+							path: args.path,
 						} as NativeArgsFor<TName>
 					}
 					break

@@ -39,6 +39,10 @@ Parameters:
         - Use with the \`size\` parameter to specify the new size.
     * scroll_down: Scroll down the page by one page height.
     * scroll_up: Scroll up the page by one page height.
+    * screenshot: Take a screenshot and save it to a file.
+        - Use with the \`path\` parameter to specify the destination file path.
+        - Supported formats: .png, .jpeg, .webp
+        - Example: \`<action>screenshot</action>\` with \`<path>screenshots/result.png</path>\`
     * close: Close the Puppeteer-controlled browser instance. This **must always be the final browser action**.
         - Example: \`<action>close</action>\`
 - url: (optional) Use this for providing the URL for the \`launch\` action.
@@ -56,6 +60,9 @@ Parameters:
     * Example: <size>1280,720</size>
 - text: (optional) Use this for providing the text for the \`type\` action.
     * Example: <text>Hello, world!</text>
+- path: (optional) File path for the \`screenshot\` action. Path is relative to the workspace.
+    * Supported formats: .png, .jpeg, .webp
+    * Example: <path>screenshots/my-screenshot.png</path>
 Usage:
 <browser_action>
 <action>Action to perform (e.g., launch, click, type, press, scroll_down, scroll_up, close)</action>
@@ -74,5 +81,11 @@ Example: Requesting to click on the element at coordinates 450,300 on a 1024x768
 <browser_action>
 <action>click</action>
 <coordinate>450,300@1024x768</coordinate>
+</browser_action>
+
+Example: Taking a screenshot and saving it to a file
+<browser_action>
+<action>screenshot</action>
+<path>screenshots/result.png</path>
 </browser_action>`
 }
@@ -21,6 +21,8 @@ const SIZE_PARAMETER_DESCRIPTION = `Viewport dimensions for the resize action in
 
 const TEXT_PARAMETER_DESCRIPTION = `Text to type when performing the type action, or key name to press when performing the press action (e.g., 'Enter', 'Tab', 'Escape')`
 
+const PATH_PARAMETER_DESCRIPTION = `File path where the screenshot should be saved (relative to workspace). Required for screenshot action. Supports .png, .jpeg, and .webp extensions. Example: 'screenshots/result.png'`
+
 export default {
 	type: "function",
 	function: {
@@ -33,7 +35,18 @@ export default {
 				action: {
 					type: "string",
 					description: ACTION_PARAMETER_DESCRIPTION,
-					enum: ["launch", "click", "hover", "type", "press", "scroll_down", "scroll_up", "resize", "close"],
+					enum: [
+						"launch",
+						"click",
+						"hover",
+						"type",
+						"press",
+						"scroll_down",
+						"scroll_up",
+						"resize",
+						"close",
+						"screenshot",
+					],
 				},
 				url: {
 					type: ["string", "null"],
@@ -51,6 +64,10 @@ export default {
 					type: ["string", "null"],
 					description: TEXT_PARAMETER_DESCRIPTION,
 				},
+				path: {
+					type: ["string", "null"],
+					description: PATH_PARAMETER_DESCRIPTION,
+				},
 			},
 			required: ["action"],
 			additionalProperties: false,

@@ -23,6 +23,7 @@ export async function browserActionTool(
 	const coordinate: string | undefined = block.params.coordinate
 	const text: string | undefined = block.params.text
 	const size: string | undefined = block.params.size
+	const filePath: string | undefined = block.params.path
 
 	if (!action || !browserActions.includes(action)) {
 		// checking for action to ensure it is complete and valid
@@ -155,6 +156,17 @@ export async function browserActionTool(
 					}
 				}
 
+				if (action === "screenshot") {
+					if (!filePath) {
+						cline.consecutiveMistakeCount++
+						cline.recordToolError("browser_action")
+						cline.didToolFailInCurrentTurn = true
+						pushToolResult(await cline.sayAndCreateMissingParamError("browser_action", "path"))
+						// Do not close the browser on parameter validation errors
+						return
+					}
+				}
+
 				cline.consecutiveMistakeCount = 0
 
 				// Prepare say payload; include executedCoordinate for pointer actions
@@ -191,6 +203,9 @@ export async function browserActionTool(
 					case "resize":
 						browserActionResult = await cline.browserSession.resize(size!)
 						break
+					case "screenshot":
+						browserActionResult = await cline.browserSession.saveScreenshot(filePath!, cline.cwd)
+						break
 					case "close":
 						browserActionResult = await cline.browserSession.closeBrowser()
 						break
@@ -205,12 +220,16 @@ export async function browserActionTool(
 				case "press":
 				case "scroll_down":
 				case "scroll_up":
-				case "resize": {
+				case "resize":
+				case "screenshot": {
 					await cline.say("browser_action_result", JSON.stringify(browserActionResult))
 
 					const images = browserActionResult?.screenshot ? [browserActionResult.screenshot] : []
 
-					let messageText = `The browser action has been executed.`
+					let messageText =
+						action === "screenshot"
+							? `Screenshot saved to: ${filePath}`
+							: `The browser action has been executed.`
 
 					messageText += `\n\n**CRITICAL**: When providing click/hover coordinates:`
 					messageText += `\n1. Screenshot dimensions != Browser viewport dimensions`

@@ -0,0 +1,27 @@
+// Test screenshot action functionality in browser actions
+import { describe, it, expect } from "vitest"
+import { browserActions } from "../../../shared/ExtensionMessage"
+
+describe("Browser Action Screenshot", () => {
+	describe("browserActions array", () => {
+		it("should include screenshot action", () => {
+			expect(browserActions).toContain("screenshot")
+		})
+
+		it("should have screenshot as a valid browser action type", () => {
+			const allActions = [
+				"launch",
+				"click",
+				"hover",
+				"type",
+				"press",
+				"scroll_down",
+				"scroll_up",
+				"resize",
+				"close",
+				"screenshot",
+			]
+			expect(browserActions).toEqual(allActions)
+		})
+	})
+})
@@ -756,6 +756,55 @@ export class BrowserSession {
 		})
 	}
 
+	/**
+	 * Determines image type from file extension
+	 */
+	private getImageTypeFromPath(filePath: string): "png" | "jpeg" | "webp" {
+		const ext = path.extname(filePath).toLowerCase()
+		if (ext === ".jpg" || ext === ".jpeg") return "jpeg"
+		if (ext === ".webp") return "webp"
+		return "png"
+	}
+
+	/**
+	 * Takes a screenshot and saves it to the specified file path.
+	 * @param filePath - The destination file path (relative to workspace)
+	 * @param cwd - Current working directory for resolving relative paths
+	 * @returns BrowserActionResult with screenshot data and saved file path
+	 * @throws Error if the resolved path escapes the workspace directory
+	 */
+	async saveScreenshot(filePath: string, cwd: string): Promise<BrowserActionResult> {
+		// Always resolve the path against the workspace root
+		const normalizedCwd = path.resolve(cwd)
+		const fullPath = path.resolve(cwd, filePath)
+
+		// Validate that the resolved path stays within the workspace (before calling doAction)
+		if (!fullPath.startsWith(normalizedCwd + path.sep) && fullPath !== normalizedCwd) {
+			throw new Error(
+				`Screenshot path "${filePath}" resolves to "${fullPath}" which is outside the workspace "${normalizedCwd}". ` +
+					`Paths must be relative to the workspace and cannot escape it.`,
+			)
+		}
+
+		return this.doAction(async (page) => {
+			// Ensure directory exists
+			await fs.mkdir(path.dirname(fullPath), { recursive: true })
+
+			// Determine image type from extension
+			const imageType = this.getImageTypeFromPath(filePath)
+
+			// Take screenshot directly to file (more efficient than base64 for file saving)
+			await page.screenshot({
+				path: fullPath,
+				type: imageType,
+				quality:
+					imageType === "png"
+						? undefined
+						: ((this.context.globalState.get("screenshotQuality") as number | undefined) ?? 75),
+			})
+		})
+	}
+
 	/**
 	 * Draws a cursor indicator on the page at the specified position
 	 */