diff --git a/docs/src/api/class-browser.md b/docs/src/api/class-browser.md index 878bcda0409d9..e61bf6930c4cf 100644 --- a/docs/src/api/class-browser.md +++ b/docs/src/api/class-browser.md @@ -267,7 +267,7 @@ await browser.CloseAsync(); ### option: Browser.newContext.storageStatePath = %%-csharp-java-context-option-storage-state-path-%% * since: v1.9 -### option: Browser.newContext.agent = %%-js-context-option-agent-%% +### option: Browser.newContext.agent = %%-context-option-agent-%% * since: v1.58 ## async method: Browser.newPage diff --git a/docs/src/api/class-page.md b/docs/src/api/class-page.md index 7d372415133a4..76e9d6b1f2476 100644 --- a/docs/src/api/class-page.md +++ b/docs/src/api/class-page.md @@ -2026,8 +2026,12 @@ Callback function which will be called in Playwright's context. ## async method: Page.extract * since: v1.58 -* langs: js -- returns: <[any]> +- returns: <[Object]> + - `result` <[any]> + - `usage` <[Object]> + - `turns` <[int]> + - `inputTokens` <[int]> + - `outputTokens` <[int]> Extract information from the page using the agentic loop, return it in a given Zod format. @@ -2050,11 +2054,18 @@ Task to perform using agentic loop. * since: v1.58 - `schema` <[z.ZodSchema]> +### option: Page.extract.maxTokens +* since: v1.58 +- `maxTokens` <[int]> + +Maximum number of tokens to consume. The agentic loop will stop after input + output tokens exceed this value. +Defaults to context-wide value specified in `agent` property. + ### option: Page.extract.maxTurns * since: v1.58 - `maxTurns` <[int]> -Maximum number of agentic steps to take while extracting the information. +Maximum number of agentic turns during this call, defaults to context-wide value specified in `agent` property. ## async method: Page.fill * since: v1.8 @@ -3031,7 +3042,11 @@ Whether or not to embed the document outline into the PDF. Defaults to `false`. ## async method: Page.perform * since: v1.58 -* langs: js +- returns: <[Object]> + - `usage` <[Object]> + - `turns` <[int]> + - `inputTokens` <[int]> + - `outputTokens` <[int]> Perform action using agentic loop. @@ -3054,11 +3069,18 @@ Task to perform using agentic loop. All the agentic actions are converted to the Playwright calls and are cached. By default, they are cached globally with the `task` as a key. This option allows controlling the cache key explicitly. +### option: Page.perform.maxTokens +* since: v1.58 +- `maxTokens` <[int]> + +Maximum number of tokens to consume. The agentic loop will stop after input + output tokens exceed this value. +Defaults to context-wide value specified in `agent` property. + ### option: Page.perform.maxTurns * since: v1.58 - `maxTurns` <[int]> -Maximum number of agentic steps to take while performing this action. +Maximum number of agentic turns during this call, defaults to context-wide value specified in `agent` property. ## async method: Page.press diff --git a/docs/src/api/params.md b/docs/src/api/params.md index 9220e1271390b..babbdaf3b0a90 100644 --- a/docs/src/api/params.md +++ b/docs/src/api/params.md @@ -370,14 +370,15 @@ It makes the execution of the tests non-deterministic. Emulates consistent window screen size available inside web page via `window.screen`. Is only used when the [`option: viewport`] is set. -## js-context-option-agent -* langs: js +## context-option-agent - `agent` <[Object]> - `provider` <[string]> LLM provider to use. - `model` <[string]> Model identifier within provider. - `cacheFile` ?<[string]> Cache file to use/generate code for performed actions into. Cache is not used if not specified (default). - `cacheMode` ?<['force'|'ignore'|'auto']> Cache control, defaults to 'auto'. - `secrets` ?<[Object]<[string], [string]>> Secrets to hide from the LLM. + - `maxTurns` ?<[int]> Maximum number of agentic turns to take per call. Defaults to 10. + - `maxTokens` ?<[int]> Maximum number of tokens to consume per call. The agentic loop will stop after input + output tokens exceed this value. Defaults on unlimited. Agent settings for [`method: Page.perform`] and [`method: Page.extract`]. diff --git a/docs/src/test-api/class-testoptions.md b/docs/src/test-api/class-testoptions.md index f00a272332dc4..98cd67238e35a 100644 --- a/docs/src/test-api/class-testoptions.md +++ b/docs/src/test-api/class-testoptions.md @@ -46,7 +46,7 @@ export default defineConfig({ }); ``` -## property: TestOptions.agent = %%-js-context-option-agent-%% +## property: TestOptions.agent = %%-context-option-agent-%% * since: v1.58 diff --git a/packages/playwright-client/types/types.d.ts b/packages/playwright-client/types/types.d.ts index 77196a4080974..d9917c1a1d0ff 100644 --- a/packages/playwright-client/types/types.d.ts +++ b/packages/playwright-client/types/types.d.ts @@ -3839,10 +3839,24 @@ export interface Page { key?: string; /** - * Maximum number of agentic steps to take while performing this action. + * Maximum number of tokens to consume. The agentic loop will stop after input + output tokens exceed this value. + * Defaults to context-wide value specified in `agent` property. + */ + maxTokens?: number; + + /** + * Maximum number of agentic turns during this call, defaults to context-wide value specified in `agent` property. */ maxTurns?: number; - }): Promise; + }): Promise<{ + usage: { + turns: number; + + inputTokens: number; + + outputTokens: number; + }; + }>; /** * **NOTE** Use locator-based [locator.press(key[, options])](https://playwright.dev/docs/api/class-locator#locator-press) @@ -22110,6 +22124,17 @@ export interface BrowserContextOptions { * Secrets to hide from the LLM. */ secrets?: { [key: string]: string; }; + + /** + * Maximum number of agentic turns to take per call. Defaults to 10. + */ + maxTurns?: number; + + /** + * Maximum number of tokens to consume per call. The agentic loop will stop after input + output tokens exceed this + * value. Defaults on unlimited. + */ + maxTokens?: number; }; /** diff --git a/packages/playwright-core/src/client/page.ts b/packages/playwright-core/src/client/page.ts index c7c3dd1a8665f..4f36c15869531 100644 --- a/packages/playwright-core/src/client/page.ts +++ b/packages/playwright-core/src/client/page.ts @@ -846,13 +846,14 @@ export class Page extends ChannelOwner implements api.Page return result.pdf; } - async perform(task: string, options: { key?: string, maxTurns?: number } = {}): Promise { - await this._channel.perform({ task, ...options }); + async perform(task: string, options: { key?: string, maxTokens?: number, maxTurns?: number } = {}) { + const result = await this._channel.perform({ task, ...options }); + return { usage: { ...result } }; } - async extract(query: string, schema: Schema, options: { maxTurns?: number } = {}): Promise> { - const { result } = await this._channel.extract({ query, schema: this._platform.zodToJsonSchema(schema), ...options }); - return result; + async extract(query: string, schema: Schema, options: { maxTokens?: number, maxTurns?: number } = {}): Promise> { + const { result, ...usage } = await this._channel.extract({ query, schema: this._platform.zodToJsonSchema(schema), ...options }); + return { result, usage }; } async _snapshotForAI(options: TimeoutOptions & { track?: string } = {}): Promise<{ full: string, incremental?: string }> { diff --git a/packages/playwright-core/src/protocol/validator.ts b/packages/playwright-core/src/protocol/validator.ts index d10e8f420eb6d..2cf6e69c39768 100644 --- a/packages/playwright-core/src/protocol/validator.ts +++ b/packages/playwright-core/src/protocol/validator.ts @@ -608,6 +608,8 @@ scheme.BrowserTypeLaunchPersistentContextParams = tObject({ cacheFile: tOptional(tString), cacheMode: tOptional(tEnum(['ignore', 'force', 'auto'])), secrets: tOptional(tArray(tType('NameValue'))), + maxTurns: tOptional(tInt), + maxTokens: tOptional(tInt), })), userDataDir: tString, slowMo: tOptional(tFloat), @@ -707,6 +709,8 @@ scheme.BrowserNewContextParams = tObject({ cacheFile: tOptional(tString), cacheMode: tOptional(tEnum(['ignore', 'force', 'auto'])), secrets: tOptional(tArray(tType('NameValue'))), + maxTurns: tOptional(tInt), + maxTokens: tOptional(tInt), })), proxy: tOptional(tObject({ server: tString, @@ -785,6 +789,8 @@ scheme.BrowserNewContextForReuseParams = tObject({ cacheFile: tOptional(tString), cacheMode: tOptional(tEnum(['ignore', 'force', 'auto'])), secrets: tOptional(tArray(tType('NameValue'))), + maxTurns: tOptional(tInt), + maxTokens: tOptional(tInt), })), proxy: tOptional(tObject({ server: tString, @@ -908,6 +914,8 @@ scheme.BrowserContextInitializer = tObject({ cacheFile: tOptional(tString), cacheMode: tOptional(tEnum(['ignore', 'force', 'auto'])), secrets: tOptional(tArray(tType('NameValue'))), + maxTurns: tOptional(tInt), + maxTokens: tOptional(tInt), })), }), }); @@ -1514,8 +1522,13 @@ scheme.PagePerformParams = tObject({ task: tString, key: tOptional(tString), maxTurns: tOptional(tInt), + maxTokens: tOptional(tInt), +}); +scheme.PagePerformResult = tObject({ + turns: tInt, + inputTokens: tInt, + outputTokens: tInt, }); -scheme.PagePerformResult = tOptional(tObject({})); scheme.PageExtractParams = tObject({ query: tString, schema: tAny, @@ -1523,6 +1536,9 @@ scheme.PageExtractParams = tObject({ }); scheme.PageExtractResult = tObject({ result: tAny, + turns: tInt, + inputTokens: tInt, + outputTokens: tInt, }); scheme.FrameInitializer = tObject({ url: tString, @@ -2818,6 +2834,8 @@ scheme.AndroidDeviceLaunchBrowserParams = tObject({ cacheFile: tOptional(tString), cacheMode: tOptional(tEnum(['ignore', 'force', 'auto'])), secrets: tOptional(tArray(tType('NameValue'))), + maxTurns: tOptional(tInt), + maxTokens: tOptional(tInt), })), pkg: tOptional(tString), args: tOptional(tArray(tString)), diff --git a/packages/playwright-core/src/server/agent/agent.ts b/packages/playwright-core/src/server/agent/agent.ts index 05ca068fd225d..507d630e4c2a2 100644 --- a/packages/playwright-core/src/server/agent/agent.ts +++ b/packages/playwright-core/src/server/agent/agent.ts @@ -28,17 +28,27 @@ import type { Page } from '../page'; import type * as loopTypes from '@lowire/loop'; import type * as actions from './actions'; -export async function pagePerform(progress: Progress, page: Page, options: channels.PagePerformParams): Promise { +type Usage = { + turns: number, + inputTokens: number, + outputTokens: number, +}; + +export async function pagePerform(progress: Progress, page: Page, options: channels.PagePerformParams): Promise { const context = new Context(progress, page); if (await cachedPerform(context, options)) - return; + return { turns: 0, inputTokens: 0, outputTokens: 0 }; - await perform(context, options.task, undefined, options); + const { usage } = await perform(context, options.task, undefined, options); await updateCache(context, options); + return usage; } -export async function pageExtract(progress: Progress, page: Page, options: channels.PageExtractParams) { +export async function pageExtract(progress: Progress, page: Page, options: channels.PageExtractParams): Promise<{ + result: any, + usage: Usage +}> { const context = new Context(progress, page); const task = ` ### Instructions @@ -46,10 +56,14 @@ Extract the following information from the page. Do not perform any actions, jus ### Query ${options.query}`; - return await perform(context, task, options.schema, options); + const { result, usage } = await perform(context, task, options.schema, options); + return { result, usage }; } -async function perform(context: Context, userTask: string, resultSchema: loopTypes.Schema | undefined, options: { maxTurns?: number } = {}): Promise { +async function perform(context: Context, userTask: string, resultSchema: loopTypes.Schema | undefined, options: { maxTurns?: number, maxTokens?: number } = {}): Promise<{ + result: any, + usage: Usage +}> { const { progress, page } = context; const browserContext = page.browserContext; if (!browserContext._options.agent) @@ -58,13 +72,17 @@ async function perform(context: Context, userTask: string, resultSchema: loopTyp const { full } = await page.snapshotForAI(progress); const { tools, callTool } = toolsForLoop(context); + const limits = context.limits(options); + let turns = 0; const loop = new Loop(browserContext._options.agent.provider as any, { model: browserContext._options.agent.model, summarize: true, debug, callTool, tools, + ...limits, beforeTurn: params => { + ++turns; const lastReply = params.conversation.messages.findLast(m => m.role === 'assistant'); const toolCall = lastReply?.content.find(c => c.type === 'tool_call'); if (!resultSchema && toolCall && toolCall.arguments.thatShouldBeIt) @@ -80,8 +98,15 @@ async function perform(context: Context, userTask: string, resultSchema: loopTyp ${full} `; - const { result } = await loop.run(task, { resultSchema }); - return result; + const { result, usage } = await loop.run(task, { resultSchema }); + return { + result, + usage: { + turns, + inputTokens: usage.input, + outputTokens: usage.output, + } + }; } type CachedActions = Record { - await pagePerform(progress, this._page, params); + return await pagePerform(progress, this._page, params); } async extract(params: channels.PageExtractParams, progress: Progress): Promise { - return { result: await pageExtract(progress, this._page, params) }; + const { result, usage } = await pageExtract(progress, this._page, params); + return { result, ...usage }; } async requests(params: channels.PageRequestsParams, progress: Progress): Promise { diff --git a/packages/playwright-core/types/types.d.ts b/packages/playwright-core/types/types.d.ts index 77196a4080974..d9917c1a1d0ff 100644 --- a/packages/playwright-core/types/types.d.ts +++ b/packages/playwright-core/types/types.d.ts @@ -3839,10 +3839,24 @@ export interface Page { key?: string; /** - * Maximum number of agentic steps to take while performing this action. + * Maximum number of tokens to consume. The agentic loop will stop after input + output tokens exceed this value. + * Defaults to context-wide value specified in `agent` property. + */ + maxTokens?: number; + + /** + * Maximum number of agentic turns during this call, defaults to context-wide value specified in `agent` property. */ maxTurns?: number; - }): Promise; + }): Promise<{ + usage: { + turns: number; + + inputTokens: number; + + outputTokens: number; + }; + }>; /** * **NOTE** Use locator-based [locator.press(key[, options])](https://playwright.dev/docs/api/class-locator#locator-press) @@ -22110,6 +22124,17 @@ export interface BrowserContextOptions { * Secrets to hide from the LLM. */ secrets?: { [key: string]: string; }; + + /** + * Maximum number of agentic turns to take per call. Defaults to 10. + */ + maxTurns?: number; + + /** + * Maximum number of tokens to consume per call. The agentic loop will stop after input + output tokens exceed this + * value. Defaults on unlimited. + */ + maxTokens?: number; }; /** diff --git a/packages/protocol/src/channels.d.ts b/packages/protocol/src/channels.d.ts index 1c74085748d7f..3b2e83918ef68 100644 --- a/packages/protocol/src/channels.d.ts +++ b/packages/protocol/src/channels.d.ts @@ -1014,6 +1014,8 @@ export type BrowserTypeLaunchPersistentContextParams = { cacheFile?: string, cacheMode?: 'ignore' | 'force' | 'auto', secrets?: NameValue[], + maxTurns?: number, + maxTokens?: number, }, userDataDir: string, slowMo?: number, @@ -1103,6 +1105,8 @@ export type BrowserTypeLaunchPersistentContextOptions = { cacheFile?: string, cacheMode?: 'ignore' | 'force' | 'auto', secrets?: NameValue[], + maxTurns?: number, + maxTokens?: number, }, slowMo?: number, }; @@ -1231,6 +1235,8 @@ export type BrowserNewContextParams = { cacheFile?: string, cacheMode?: 'ignore' | 'force' | 'auto', secrets?: NameValue[], + maxTurns?: number, + maxTokens?: number, }, proxy?: { server: string, @@ -1306,6 +1312,8 @@ export type BrowserNewContextOptions = { cacheFile?: string, cacheMode?: 'ignore' | 'force' | 'auto', secrets?: NameValue[], + maxTurns?: number, + maxTokens?: number, }, proxy?: { server: string, @@ -1384,6 +1392,8 @@ export type BrowserNewContextForReuseParams = { cacheFile?: string, cacheMode?: 'ignore' | 'force' | 'auto', secrets?: NameValue[], + maxTurns?: number, + maxTokens?: number, }, proxy?: { server: string, @@ -1459,6 +1469,8 @@ export type BrowserNewContextForReuseOptions = { cacheFile?: string, cacheMode?: 'ignore' | 'force' | 'auto', secrets?: NameValue[], + maxTurns?: number, + maxTokens?: number, }, proxy?: { server: string, @@ -1601,6 +1613,8 @@ export type BrowserContextInitializer = { cacheFile?: string, cacheMode?: 'ignore' | 'force' | 'auto', secrets?: NameValue[], + maxTurns?: number, + maxTokens?: number, }, }, }; @@ -2627,12 +2641,18 @@ export type PagePerformParams = { task: string, key?: string, maxTurns?: number, + maxTokens?: number, }; export type PagePerformOptions = { key?: string, maxTurns?: number, + maxTokens?: number, +}; +export type PagePerformResult = { + turns: number, + inputTokens: number, + outputTokens: number, }; -export type PagePerformResult = void; export type PageExtractParams = { query: string, schema: any, @@ -2643,6 +2663,9 @@ export type PageExtractOptions = { }; export type PageExtractResult = { result: any, + turns: number, + inputTokens: number, + outputTokens: number, }; export interface PageEvents { @@ -4914,6 +4937,8 @@ export type AndroidDeviceLaunchBrowserParams = { cacheFile?: string, cacheMode?: 'ignore' | 'force' | 'auto', secrets?: NameValue[], + maxTurns?: number, + maxTokens?: number, }, pkg?: string, args?: string[], @@ -4987,6 +5012,8 @@ export type AndroidDeviceLaunchBrowserOptions = { cacheFile?: string, cacheMode?: 'ignore' | 'force' | 'auto', secrets?: NameValue[], + maxTurns?: number, + maxTokens?: number, }, pkg?: string, args?: string[], diff --git a/packages/protocol/src/protocol.yml b/packages/protocol/src/protocol.yml index 07c3dfad140e9..20dcdf07b0b59 100644 --- a/packages/protocol/src/protocol.yml +++ b/packages/protocol/src/protocol.yml @@ -604,6 +604,8 @@ ContextOptions: secrets: type: array? items: NameValue + maxTurns: int? + maxTokens: int? LocalUtils: type: interface @@ -2030,6 +2032,11 @@ Page: task: string key: string? maxTurns: int? + maxTokens: int? + returns: + turns: int + inputTokens: int + outputTokens: int extract: internal: true @@ -2039,6 +2046,9 @@ Page: maxTurns: int? returns: result: json + turns: int + inputTokens: int + outputTokens: int events: