diff --git a/.changeset/nasty-kids-visit.md b/.changeset/nasty-kids-visit.md new file mode 100644 index 000000000..002dc6a7d --- /dev/null +++ b/.changeset/nasty-kids-visit.md @@ -0,0 +1,5 @@ +--- +"@browserbasehq/stagehand": patch +--- + +fix: file uploads failing on Browserbase diff --git a/packages/core/lib/v3/dom/locatorScripts/scripts.ts b/packages/core/lib/v3/dom/locatorScripts/scripts.ts index db668d500..ce32035e5 100644 --- a/packages/core/lib/v3/dom/locatorScripts/scripts.ts +++ b/packages/core/lib/v3/dom/locatorScripts/scripts.ts @@ -24,6 +24,61 @@ export function ensureFileInputElement(this: Element): boolean { } } +export interface SerializedFilePayload { + name: string; + mimeType: string; + base64: string; + lastModified?: number; +} + +/** Attach File objects created from serialized payloads to an . */ +export function assignFilePayloadsToInputElement( + this: Element, + payloads: SerializedFilePayload[], +): boolean { + try { + const input = this as HTMLInputElement; + if (!input || input.tagName?.toLowerCase() !== "input") return false; + if ((input.type ?? "").toLowerCase() !== "file") return false; + + const transfer: DataTransfer | null = (() => { + try { + return new DataTransfer(); + } catch { + return null; + } + })(); + if (!transfer) return false; + + const entries = Array.isArray(payloads) ? payloads : []; + for (const payload of entries) { + if (!payload) continue; + const name = payload.name || "upload.bin"; + const mimeType = payload.mimeType || "application/octet-stream"; + const lastModified = + typeof payload.lastModified === "number" + ? payload.lastModified + : Date.now(); + + const binary = window.atob(payload.base64 ?? ""); + const bytes = new Uint8Array(binary.length); + for (let i = 0; i < binary.length; i += 1) { + bytes[i] = binary.charCodeAt(i); + } + const blob = new Blob([bytes], { type: mimeType }); + const file = new File([blob], name, { type: mimeType, lastModified }); + transfer.items.add(file); + } + + input.files = transfer.files; + input.dispatchEvent(new Event("input", { bubbles: true })); + input.dispatchEvent(new Event("change", { bubbles: true })); + return true; + } catch { + return false; + } +} + export function dispatchDomClick( this: Element, options?: ClickEventOptions, diff --git a/packages/core/lib/v3/tests/setinputfiles.spec.ts b/packages/core/lib/v3/tests/setinputfiles.spec.ts index c006485cb..7d9244e36 100644 --- a/packages/core/lib/v3/tests/setinputfiles.spec.ts +++ b/packages/core/lib/v3/tests/setinputfiles.spec.ts @@ -1,9 +1,30 @@ import { expect, test } from "@playwright/test"; +import { Buffer } from "buffer"; +import { promises as fs } from "fs"; +import path from "path"; +import crypto from "crypto"; +import type { Page as V3Page } from "../understudy/page"; import { V3 } from "../v3"; import { v3TestConfig } from "./v3.config"; +const FILE_UPLOAD_IFRAME_URL = + "https://browserbase.github.io/stagehand-eval-sites/sites/file-uploads-iframe/"; +const FILE_UPLOAD_V2_URL = + "https://browserbase.github.io/stagehand-eval-sites/sites/file-uploads-2/"; + +const RESUME_INPUT = "#resumeUpload"; +const RESUME_SUCCESS = "#resumeSuccess"; +const IMAGES_INPUT = "#imagesUpload"; +const IMAGES_SUCCESS = "#imagesSuccess"; +const AUDIO_INPUT = "#audioUpload"; +const AUDIO_SUCCESS = "#audioSuccess"; +const IFRAME_UPLOAD_INPUT = "/html/body/div/iframe/html/body/div/div[1]/input"; +const IFRAME_SUCCESS = + "body > div > iframe >> html > body > div > div:nth-of-type(2)"; + test.describe("tests setInputFiles()", () => { let v3: V3; + const fixtures: string[] = []; test.beforeEach(async () => { v3 = new V3(v3TestConfig); @@ -12,37 +33,133 @@ test.describe("tests setInputFiles()", () => { test.afterEach(async () => { await v3?.close?.().catch(() => {}); + await Promise.all( + fixtures.splice(0).map((file) => fs.unlink(file).catch(() => {})), + ); }); - test("deepLocator().setInputFiles() (inside an iframe)", async () => { + const createFixture = async ( + namePrefix: string, + contents: string, + ext = ".txt", + ): Promise => { + const normalizedExt = ext.startsWith(".") ? ext : `.${ext}`; + const filename = `${namePrefix}-${crypto.randomBytes(4).toString("hex")}${normalizedExt}`; + const filePath = path.resolve(process.cwd(), filename); + await fs.writeFile(filePath, contents, "utf-8"); + fixtures.push(filePath); + return filePath; + }; + + const expectUploadSuccess = async ( + page: V3Page, + successSelector: string, + expectedText: string, + ) => { + await expect + .poll( + () => + page.evaluate((selector) => { + const el = document.querySelector(selector); + if (!el) return ""; + const display = window.getComputedStyle(el).display; + if (display === "none") return ""; + return el.textContent ?? ""; + }, successSelector), + { message: `wait for success message at ${successSelector}` }, + ) + .toContain(expectedText); + }; + + const getInputFileCount = async (page: V3Page, inputSelector: string) => { + return await page.evaluate((selector) => { + const el = document.querySelector(selector); + if (!(el instanceof HTMLInputElement)) return 0; + return el.files?.length ?? 0; + }, inputSelector); + }; + + const expectFileCount = async ( + page: V3Page, + inputSelector: string, + expected: number, + ) => { + await expect + .poll(() => getInputFileCount(page, inputSelector), { + message: `wait for file count on ${inputSelector}`, + }) + .toBe(expected); + }; + + test("deepLocator uploads and validates within iframe", async () => { const page = v3.context.pages()[0]; - await page.goto( - "https://browserbase.github.io/stagehand-eval-sites/sites/file-uploads-iframe/", + await page.goto(FILE_UPLOAD_IFRAME_URL); + const fixture = await createFixture( + "iframe-upload", + "

iframe upload

", + ".txt", ); await page - .deepLocator("/html/body/div/iframe/html/body/div/div[1]/input") - .setInputFiles("fake.html"); - await new Promise((resolve) => setTimeout(resolve, 3000)); - const successMessage = await page - .deepLocator( - "body > div > iframe >> html > body > div > div:nth-of-type(2)", - ) - .textContent(); - expect(successMessage).toContain("file uploaded successfully"); + .deepLocator(IFRAME_UPLOAD_INPUT) + .setInputFiles(path.relative(process.cwd(), fixture)); + + const successLocator = page.deepLocator(IFRAME_SUCCESS); + await expect + .poll(async () => (await successLocator.textContent()) ?? "", { + message: "wait for iframe upload success", + }) + .toContain("file uploaded successfully"); }); - test("locator().setInputFiles() (no iframe)", async () => { + test("locator uploads resume via relative path string", async () => { const page = v3.context.pages()[0]; - await page.goto( - "https://browserbase.github.io/stagehand-eval-sites/sites/file-uploads/", - ); + await page.goto(FILE_UPLOAD_V2_URL); + const fixture = await createFixture("resume", "

resume

", ".pdf"); await page - .locator("/html/body/div/div[1]/input") - .setInputFiles("fake.html"); - await new Promise((resolve) => setTimeout(resolve, 3000)); - const successMessage = await page - .locator("body > div > div:nth-of-type(2)") - .textContent(); - expect(successMessage).toContain("file uploaded successfully"); + .locator(RESUME_INPUT) + .setInputFiles(path.relative(process.cwd(), fixture)); + await expectUploadSuccess(page, RESUME_SUCCESS, "Resume uploaded!"); + await expectFileCount(page, RESUME_INPUT, 1); + }); + + test("locator uploads multiple images via absolute paths", async () => { + const page = v3.context.pages()[0]; + await page.goto(FILE_UPLOAD_V2_URL); + const first = await createFixture("image-a", "

A

", ".png"); + const second = await createFixture("image-b", "

B

", ".jpeg"); + await page.locator(IMAGES_INPUT).setInputFiles([first, second]); + await expectUploadSuccess(page, IMAGES_SUCCESS, "Images uploaded!"); + await expectFileCount(page, IMAGES_INPUT, 2); + }); + + test("locator uploads audio via payload object", async () => { + const page = v3.context.pages()[0]; + await page.goto(FILE_UPLOAD_V2_URL); + await page.locator(AUDIO_INPUT).setInputFiles({ + name: "voice-sample.mp3", + mimeType: "audio/mpeg", + buffer: Buffer.from("fake audio bytes", "utf-8"), + }); + await expectUploadSuccess(page, AUDIO_SUCCESS, "Audio file uploaded!"); + await expectFileCount(page, AUDIO_INPUT, 1); + }); + + test("locator uploads multiple payload objects to images input", async () => { + const page = v3.context.pages()[0]; + await page.goto(FILE_UPLOAD_V2_URL); + await page.locator(IMAGES_INPUT).setInputFiles([ + { + name: "payload-a.png", + mimeType: "image/png", + buffer: Buffer.from("payload-a", "utf-8"), + }, + { + name: "payload-b.png", + mimeType: "image/png", + buffer: Buffer.from("payload-b", "utf-8"), + }, + ]); + await expectUploadSuccess(page, IMAGES_SUCCESS, "Images uploaded!"); + await expectFileCount(page, IMAGES_INPUT, 2); }); }); diff --git a/packages/core/lib/v3/types/private/locator.ts b/packages/core/lib/v3/types/private/locator.ts new file mode 100644 index 000000000..105e61964 --- /dev/null +++ b/packages/core/lib/v3/types/private/locator.ts @@ -0,0 +1,10 @@ +import { Buffer } from "buffer"; + +export interface NormalizedFilePayload { + name: string; + mimeType: string; + buffer: Buffer; + lastModified: number; + /** Absolute path to the source file when provided by the caller. */ + absolutePath?: string; +} diff --git a/packages/core/lib/v3/types/public/locator.ts b/packages/core/lib/v3/types/public/locator.ts new file mode 100644 index 000000000..51fa24fe2 --- /dev/null +++ b/packages/core/lib/v3/types/public/locator.ts @@ -0,0 +1,14 @@ +import { Buffer } from "buffer"; + +export interface SetInputFilePayload { + name: string; + mimeType?: string; + buffer: ArrayBuffer | Uint8Array | Buffer | string; + lastModified?: number; +} + +export type SetInputFilesArgument = + | string + | string[] + | SetInputFilePayload + | SetInputFilePayload[]; diff --git a/packages/core/lib/v3/understudy/context.ts b/packages/core/lib/v3/understudy/context.ts index 782cdac69..3b131a2a4 100644 --- a/packages/core/lib/v3/understudy/context.ts +++ b/packages/core/lib/v3/understudy/context.ts @@ -405,6 +405,7 @@ export class V3Context { info.targetId, this.apiClient, this.localBrowserLaunchOptions, + this.env === "BROWSERBASE", ); this.wireSessionToOwnerPage(sessionId, page); this.pagesByTarget.set(info.targetId, page); diff --git a/packages/core/lib/v3/understudy/fileUploadUtils.ts b/packages/core/lib/v3/understudy/fileUploadUtils.ts new file mode 100644 index 000000000..c5d285edd --- /dev/null +++ b/packages/core/lib/v3/understudy/fileUploadUtils.ts @@ -0,0 +1,102 @@ +import { promises as fs, type Stats } from "fs"; +import path from "path"; +import { Buffer } from "buffer"; +import { StagehandInvalidArgumentError } from "../types/public/sdkErrors"; +import { + SetInputFilesArgument, + SetInputFilePayload, +} from "../types/public/locator"; +import { NormalizedFilePayload } from "../types/private/locator"; + +const DEFAULT_MIME_TYPE = "application/octet-stream"; + +/** + * Normalize user-provided setInputFiles arguments into in-memory payloads. + * - Resolves string paths relative to the provided base directory. + * - Validates that each path exists and is a regular file. + * - Converts all buffers into Node Buffers for downstream processing. + */ +export async function normalizeInputFiles( + files: SetInputFilesArgument, + opts: { baseDir?: string } = {}, +): Promise { + if (files === null || files === undefined) return []; + + const flattened = Array.isArray(files) + ? (files as Array) + : [files]; + if (!flattened.length) return []; + + const baseDir = opts.baseDir ?? process.cwd(); + const normalized: NormalizedFilePayload[] = []; + + for (const entry of flattened) { + if (typeof entry === "string") { + const absolutePath = path.isAbsolute(entry) + ? entry + : path.resolve(baseDir, entry); + const stat = await statFile(absolutePath); + if (!stat.isFile()) { + throw new StagehandInvalidArgumentError( + `setInputFiles(): expected a file but received directory or special entry at ${absolutePath}`, + ); + } + const buffer = await fs.readFile(absolutePath); + normalized.push({ + name: path.basename(absolutePath) || "upload.bin", + mimeType: DEFAULT_MIME_TYPE, + buffer, + lastModified: stat.mtimeMs || Date.now(), + absolutePath, + }); + continue; + } + + if (entry && typeof entry === "object" && "buffer" in entry) { + const payload = entry as SetInputFilePayload; + const buffer = toBuffer(payload.buffer); + normalized.push({ + name: payload.name || "upload.bin", + mimeType: payload.mimeType || DEFAULT_MIME_TYPE, + buffer, + lastModified: + typeof payload.lastModified === "number" + ? payload.lastModified + : Date.now(), + }); + continue; + } + + throw new StagehandInvalidArgumentError( + "setInputFiles(): expected file path(s) or payload object(s)", + ); + } + + return normalized; +} + +async function statFile(absolutePath: string): Promise { + try { + return await fs.stat(absolutePath); + } catch (error) { + const code = (error as NodeJS.ErrnoException)?.code; + if (code === "ENOENT") { + throw new StagehandInvalidArgumentError( + `setInputFiles(): file not found at ${absolutePath}`, + ); + } + throw error; + } +} + +export function toBuffer( + data: ArrayBuffer | Uint8Array | Buffer | string, +): Buffer { + if (Buffer.isBuffer(data)) return data; + if (data instanceof Uint8Array) return Buffer.from(data); + if (typeof data === "string") return Buffer.from(data); + if (data instanceof ArrayBuffer) return Buffer.from(new Uint8Array(data)); + throw new StagehandInvalidArgumentError( + "Unsupported file payload buffer type", + ); +} diff --git a/packages/core/lib/v3/understudy/frame.ts b/packages/core/lib/v3/understudy/frame.ts index c9bdcee9a..c9e26bf96 100644 --- a/packages/core/lib/v3/understudy/frame.ts +++ b/packages/core/lib/v3/understudy/frame.ts @@ -26,10 +26,16 @@ export class Frame implements FrameManager { public session: CDPSessionLike, public frameId: string, public pageId: string, + private readonly remoteBrowser: boolean, ) { this.sessionId = this.session.id ?? null; } + /** True when the controlled browser runs on a different machine. */ + public isBrowserRemote(): boolean { + return this.remoteBrowser; + } + /** DOM.getNodeForLocation → DOM.describeNode */ async getNodeAtLocation(x: number, y: number): Promise { await this.session.send("DOM.enable"); @@ -218,7 +224,14 @@ export class Frame implements FrameManager { const collect = (tree: Protocol.Page.FrameTree) => { if (tree.frame.parentId === this.frameId) { - frames.push(new Frame(this.session, tree.frame.id, this.pageId)); + frames.push( + new Frame( + this.session, + tree.frame.id, + this.pageId, + this.remoteBrowser, + ), + ); } tree.childFrames?.forEach(collect); }; diff --git a/packages/core/lib/v3/understudy/locator.ts b/packages/core/lib/v3/understudy/locator.ts index 38acd8ca9..89e261cdd 100644 --- a/packages/core/lib/v3/understudy/locator.ts +++ b/packages/core/lib/v3/understudy/locator.ts @@ -3,7 +3,6 @@ import { Protocol } from "devtools-protocol"; import * as fs from "fs"; import * as os from "os"; import * as path from "path"; -import { Buffer } from "buffer"; import { locatorScriptSources } from "../dom/build/locatorScripts.generated"; import type { Frame } from "./frame"; import { FrameSelectorResolver, type SelectorQuery } from "./selectorResolver"; @@ -12,6 +11,11 @@ import { StagehandInvalidArgumentError, ElementNotVisibleError, } from "../types/public/sdkErrors"; +import { normalizeInputFiles } from "./fileUploadUtils"; +import { SetInputFilesArgument } from "../types/public/locator"; +import { NormalizedFilePayload } from "../types/private/locator"; + +const MAX_REMOTE_UPLOAD_BYTES = 50 * 1024 * 1024; // 50MB guard copied from Playwright type MouseButton = "left" | "right" | "middle"; @@ -69,42 +73,11 @@ export class Locator { * - Best‑effort dispatches change/input via CDP (Chrome does by default). * - Passing an empty array clears the selection. */ - public async setInputFiles( - files: - | string - | string[] - | { - name: string; - mimeType: string; - buffer: ArrayBuffer | Uint8Array | Buffer | string; - } - | Array<{ - name: string; - mimeType: string; - buffer: ArrayBuffer | Uint8Array | Buffer | string; - }>, - ): Promise { + public async setInputFiles(files: SetInputFilesArgument): Promise { const session = this.frame.session; const { objectId } = await this.resolveNode(); - // Normalize to array - const items = Array.isArray(files) - ? (files as unknown[]) - : [files as unknown]; - const tempFiles: string[] = []; - const filePaths: string[] = []; - - // Helper: normalize various buffer-like inputs to Node Buffer - const toBuffer = (data: unknown): Buffer => { - if (Buffer.isBuffer(data)) return data; - if (data instanceof Uint8Array) return Buffer.from(data); - if (typeof data === "string") return Buffer.from(data); - if (data instanceof ArrayBuffer) return Buffer.from(new Uint8Array(data)); - throw new StagehandInvalidArgumentError( - "Unsupported file payload buffer type", - ); - }; try { // Validate element is an @@ -130,42 +103,37 @@ export class Locator { ); } - // Build list of absolute file paths, creating temps for payloads - for (const it of items) { - if (typeof it === "string") { - filePaths.push(path.resolve(it)); - continue; - } - if ( - it && - typeof it === "object" && - "name" in it && - "mimeType" in it && - "buffer" in it - ) { - const payload = it as { - name: string; - mimeType: string; - buffer: ArrayBuffer | Uint8Array | Buffer | string; - }; - const base = payload.name || "upload.bin"; - const ext = path.extname(base); - const tmp = path.join( - os.tmpdir(), - `stagehand-upload-${Date.now()}-${Math.random().toString(36).slice(2)}${ext}`, - ); - const buf = toBuffer(payload.buffer); - await fs.promises.writeFile(tmp, buf); - tempFiles.push(tmp); - filePaths.push(tmp); + const normalized = await normalizeInputFiles(files); + + if (!normalized.length) { + await session.send("DOM.setFileInputFiles", { + objectId, + files: [], + }); + return; + } + + if (this.frame.isBrowserRemote()) { + await this.assignFilesViaPayloadInjection(objectId, normalized); + return; + } + + const filePaths: string[] = []; + for (const payload of normalized) { + if (payload.absolutePath) { + filePaths.push(payload.absolutePath); continue; } - throw new StagehandInvalidArgumentError( - "Unsupported setInputFiles item – expected path or payload", + const ext = path.extname(payload.name); + const tmp = path.join( + os.tmpdir(), + `stagehand-upload-${Date.now()}-${Math.random().toString(36).slice(2)}${ext}`, ); + await fs.promises.writeFile(tmp, payload.buffer); + tempFiles.push(tmp); + filePaths.push(tmp); } - // Apply files via CDP await session.send("DOM.setFileInputFiles", { objectId, files: filePaths, @@ -185,6 +153,58 @@ export class Locator { } } + /** + * Remote browser fallback: build File objects inside the page and attach them via JS. + * + * When Stagehand is driving a browser that cannot see the local filesystem (Browserbase, + * remote CDP, etc.), CDP's DOM.setFileInputFiles would fail because Chrome can't reach + * our temp files. Instead we base64-encode the payloads, send them into the page, and + * let a DOM helper create File objects + dispatch change/input events. + */ + private async assignFilesViaPayloadInjection( + objectId: Protocol.Runtime.RemoteObjectId, + files: NormalizedFilePayload[], + ): Promise { + const session = this.frame.session; + + for (const payload of files) { + if (payload.buffer.length > MAX_REMOTE_UPLOAD_BYTES) { + throw new StagehandInvalidArgumentError( + `setInputFiles(): file "${payload.name}" is larger than the 50MB limit for remote uploads`, + ); + } + } + + const serialized = files.map((payload) => ({ + name: payload.name, + mimeType: payload.mimeType, + lastModified: payload.lastModified, + base64: payload.buffer.toString("base64"), + })); + + const res = await session.send( + "Runtime.callFunctionOn", + { + objectId, + functionDeclaration: + locatorScriptSources.assignFilePayloadsToInputElement, + arguments: [ + { + value: serialized, + }, + ], + returnByValue: true, + }, + ); + + const ok = Boolean(res.result?.value); + if (!ok) { + throw new StagehandInvalidArgumentError( + "Unable to assign file payloads to remote input element", + ); + } + } + /** * Return the DOM backendNodeId for this locator's target element. * Useful for identity comparisons without needing element handles. diff --git a/packages/core/lib/v3/understudy/page.ts b/packages/core/lib/v3/understudy/page.ts index d9d972170..579552237 100644 --- a/packages/core/lib/v3/understudy/page.ts +++ b/packages/core/lib/v3/understudy/page.ts @@ -76,6 +76,7 @@ export class Page { /** cache Frames per frameId so everyone uses the same one */ private readonly frameCache = new Map(); + private readonly browserIsRemote: boolean; /** Stable id for Frames created by this Page (use top-level TargetId). */ private readonly pageId: string; @@ -100,9 +101,11 @@ export class Page { private readonly _targetId: string, mainFrameId: string, apiClient?: StagehandAPIClient | null, + browserIsRemote = false, ) { this.pageId = _targetId; this.apiClient = apiClient ?? null; + this.browserIsRemote = browserIsRemote; // own the main session if (mainSession.id) this.sessions.set(mainSession.id, mainSession); @@ -115,6 +118,7 @@ export class Page { this.mainSession, mainFrameId, this.pageId, + this.browserIsRemote, ); this.networkManager = new NetworkManager(); @@ -227,6 +231,7 @@ export class Page { targetId: string, apiClient?: StagehandAPIClient | null, localBrowserLaunchOptions?: LocalBrowserLaunchOptions | null, + browserIsRemote = false, ): Promise { await session.send("Page.enable").catch(() => {}); await session @@ -237,7 +242,14 @@ export class Page { }>("Page.getFrameTree"); const mainFrameId = frameTree.frame.id; - const page = new Page(conn, session, targetId, mainFrameId, apiClient); + const page = new Page( + conn, + session, + targetId, + mainFrameId, + apiClient, + browserIsRemote, + ); // Seed current URL from initial frame tree try { page._currentUrl = String(frameTree?.frame?.url ?? page._currentUrl); @@ -304,7 +316,12 @@ export class Page { if (newRoot !== prevRoot) { const oldOrd = this.frameOrdinals.get(prevRoot) ?? 0; this.frameOrdinals.set(newRoot, oldOrd); - this.mainFrameWrapper = new Frame(this.mainSession, newRoot, this.pageId); + this.mainFrameWrapper = new Frame( + this.mainSession, + newRoot, + this.pageId, + this.browserIsRemote, + ); } // Update cached URL if this navigation pertains to the current main frame @@ -440,7 +457,7 @@ export class Page { if (hit) return hit; const sess = this.getSessionForFrame(frameId); - const f = new Frame(sess, frameId, this.pageId); + const f = new Frame(sess, frameId, this.pageId, this.browserIsRemote); this.frameCache.set(frameId, f); return f; }