diff --git a/evals/evals.config.json b/evals/evals.config.json index 5c27ebeb7..6b4c02ce6 100644 --- a/evals/evals.config.json +++ b/evals/evals.config.json @@ -839,6 +839,12 @@ "categories": [ "external_agent_benchmarks" ] + }, + { + "name": "screenshot_cdp_toggle", + "categories": [ + "regression" + ] } ] } \ No newline at end of file diff --git a/evals/tasks/screenshot_cdp_toggle.ts b/evals/tasks/screenshot_cdp_toggle.ts new file mode 100644 index 000000000..a4c923a44 --- /dev/null +++ b/evals/tasks/screenshot_cdp_toggle.ts @@ -0,0 +1,239 @@ +import { EvalFunction } from "@/types/evals"; + +/** + * Test the useCDP flag for screenshot functionality in Browserbase environments. + * This test verifies that: + * 1. Screenshots work with CDP (useCDP: true) + * 2. Screenshots work with Playwright fallback (useCDP: false) + * 3. Options are properly passed through in both modes + */ +export const screenshot_cdp_toggle: EvalFunction = async ({ + debugUrl, + sessionUrl, + stagehand, + logger, +}) => { + try { + // Navigate to a test page + await stagehand.page.goto("https://example.com"); + + logger.log({ + message: "Testing screenshot with CDP enabled", + level: 1, + }); + + // Test 1: Screenshot with CDP + const cdpScreenshot = await stagehand.page.screenshot({ + fullPage: true, + useCDP: true, + }); + + if (!cdpScreenshot || cdpScreenshot.length === 0) { + logger.error({ + message: "CDP screenshot failed", + level: 0, + auxiliary: { + size: { + value: cdpScreenshot ? cdpScreenshot.length.toString() : "null", + type: "string", + }, + }, + }); + return { + _success: false, + error: "CDP screenshot produced empty result", + debugUrl, + sessionUrl, + logs: logger.getLogs(), + }; + } + + logger.log({ + message: `CDP screenshot successful: ${cdpScreenshot.length} bytes`, + level: 1, + }); + + logger.log({ + message: "Testing screenshot with Playwright (CDP disabled)", + level: 1, + }); + + // Test 2: Screenshot with Playwright + const playwrightScreenshot = await stagehand.page.screenshot({ + fullPage: true, + useCDP: false, + }); + + if (!playwrightScreenshot || playwrightScreenshot.length === 0) { + logger.error({ + message: "Playwright screenshot failed", + level: 0, + auxiliary: { + size: { + value: playwrightScreenshot + ? playwrightScreenshot.length.toString() + : "null", + type: "string", + }, + }, + }); + return { + _success: false, + error: "Playwright screenshot produced empty result", + debugUrl, + sessionUrl, + logs: logger.getLogs(), + }; + } + + logger.log({ + message: `Playwright screenshot successful: ${playwrightScreenshot.length} bytes`, + level: 1, + }); + + // Test 3: Test with additional options (JPEG format) + logger.log({ + message: "Testing screenshot with JPEG format and quality settings", + level: 1, + }); + + const jpegScreenshot = await stagehand.page.screenshot({ + type: "jpeg", + quality: 80, + useCDP: false, + }); + + if (!jpegScreenshot || jpegScreenshot.length === 0) { + logger.error({ + message: "JPEG screenshot failed", + level: 0, + }); + return { + _success: false, + error: "JPEG screenshot produced empty result", + debugUrl, + sessionUrl, + logs: logger.getLogs(), + }; + } + + logger.log({ + message: `JPEG screenshot successful: ${jpegScreenshot.length} bytes`, + level: 1, + }); + + // Test 4: Test with clip option + logger.log({ + message: "Testing screenshot with clip region", + level: 1, + }); + + const clippedScreenshot = await stagehand.page.screenshot({ + clip: { x: 0, y: 0, width: 500, height: 300 }, + useCDP: true, + }); + + if (!clippedScreenshot || clippedScreenshot.length === 0) { + logger.error({ + message: "Clipped screenshot failed", + level: 0, + }); + return { + _success: false, + error: "Clipped screenshot produced empty result", + debugUrl, + sessionUrl, + logs: logger.getLogs(), + }; + } + + // Verify clipped screenshot is smaller than full page + if (clippedScreenshot.length >= cdpScreenshot.length) { + logger.error({ + message: "Clipped screenshot is not smaller than full screenshot", + level: 0, + auxiliary: { + clipped_size: { + value: clippedScreenshot.length.toString(), + type: "integer", + }, + full_size: { + value: cdpScreenshot.length.toString(), + type: "integer", + }, + }, + }); + return { + _success: false, + error: "Clipped screenshot size validation failed", + debugUrl, + sessionUrl, + logs: logger.getLogs(), + }; + } + + logger.log({ + message: `Clipped screenshot successful: ${clippedScreenshot.length} bytes`, + level: 1, + }); + + logger.log({ + message: "All screenshot tests passed successfully", + level: 0, + auxiliary: { + cdp_size: { + value: cdpScreenshot.length.toString(), + type: "integer", + }, + playwright_size: { + value: playwrightScreenshot.length.toString(), + type: "integer", + }, + jpeg_size: { + value: jpegScreenshot.length.toString(), + type: "integer", + }, + clipped_size: { + value: clippedScreenshot.length.toString(), + type: "integer", + }, + }, + }); + + return { + _success: true, + cdpSize: cdpScreenshot.length, + playwrightSize: playwrightScreenshot.length, + jpegSize: jpegScreenshot.length, + clippedSize: clippedScreenshot.length, + debugUrl, + sessionUrl, + logs: logger.getLogs(), + }; + } catch (error) { + logger.error({ + message: "Screenshot CDP toggle test failed", + level: 0, + auxiliary: { + error: { + value: error.message || String(error), + type: "string", + }, + stack: { + value: error.stack || "", + type: "string", + }, + }, + }); + + return { + _success: false, + error: error.message || String(error), + debugUrl, + sessionUrl, + logs: logger.getLogs(), + }; + } finally { + await stagehand.close(); + } +}; diff --git a/lib/StagehandPage.ts b/lib/StagehandPage.ts index 2f9a1acc9..b67921e26 100644 --- a/lib/StagehandPage.ts +++ b/lib/StagehandPage.ts @@ -1,7 +1,11 @@ import type { CDPSession, Page as PlaywrightPage, Frame } from "playwright"; import { selectors } from "playwright"; import { z } from "zod/v3"; -import { Page, defaultExtractSchema } from "../types/page"; +import { + Page, + defaultExtractSchema, + StagehandScreenshotOptions, +} from "../types/page"; import { ExtractOptions, ExtractResult, @@ -415,37 +419,41 @@ ${scriptContent} \ } // Handle screenshots with CDP - if (prop === "screenshot" && this.stagehand.env === "BROWSERBASE") { - return async ( - options: { - type?: "png" | "jpeg"; - quality?: number; - fullPage?: boolean; - clip?: { x: number; y: number; width: number; height: number }; - omitBackground?: boolean; - } = {}, - ) => { - const cdpOptions: Record = { - format: options.type === "jpeg" ? "jpeg" : "png", - quality: options.quality, - clip: options.clip, - omitBackground: options.omitBackground, - fromSurface: true, - }; - - if (options.fullPage) { - cdpOptions.captureBeyondViewport = true; - } + if (prop === "screenshot") { + return async (options: StagehandScreenshotOptions = {}) => { + const rawScreenshot: typeof target.screenshot = + Object.getPrototypeOf(target).screenshot.bind(target); + + const { + useCDP = this.stagehand.env === "BROWSERBASE", + ...playwrightOptions + } = options; + + if (useCDP && this.stagehand.env === "BROWSERBASE") { + const cdpOptions: Record = { + format: options.type === "jpeg" ? "jpeg" : "png", + quality: options.quality, + clip: options.clip, + omitBackground: options.omitBackground, + fromSurface: true, + }; + + if (options.fullPage) { + cdpOptions.captureBeyondViewport = true; + } - const data = await this.sendCDP<{ data: string }>( - "Page.captureScreenshot", - cdpOptions, - ); + const data = await this.sendCDP<{ data: string }>( + "Page.captureScreenshot", + cdpOptions, + ); - // Convert base64 to buffer - const buffer = Buffer.from(data.data, "base64"); + // Convert base64 to buffer + const buffer = Buffer.from(data.data, "base64"); - return buffer; + return buffer; + } else { + return await rawScreenshot(playwrightOptions); + } }; } diff --git a/types/page.ts b/types/page.ts index 4f93b1fa5..de859efe6 100644 --- a/types/page.ts +++ b/types/page.ts @@ -2,6 +2,7 @@ import type { Browser as PlaywrightBrowser, BrowserContext as PlaywrightContext, Page as PlaywrightPage, + PageScreenshotOptions, } from "playwright"; import { z } from "zod/v3"; import type { @@ -21,7 +22,12 @@ export const pageTextSchema = z.object({ page_text: z.string(), }); -export interface Page extends Omit { +export interface StagehandScreenshotOptions extends PageScreenshotOptions { + /** Controls whether to use CDP for screenshots in Browserbase environment. Defaults to true. */ + useCDP?: boolean; +} + +export interface Page extends Omit { act(action: string): Promise; act(options: ActOptions): Promise; act(observation: ObserveResult): Promise; @@ -38,6 +44,8 @@ export interface Page extends Omit { observe(instruction: string): Promise; observe(options?: ObserveOptions): Promise; + screenshot(options?: StagehandScreenshotOptions): Promise; + on: { (event: "popup", listener: (page: Page) => unknown): Page; } & PlaywrightPage["on"];