Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion docs/src/api/params.md
Original file line number Diff line number Diff line change
Expand Up @@ -375,7 +375,7 @@ Emulates consistent window screen size available inside web page via `window.scr
- `agent` <[Object]>
- `provider` <[string]> LLM provider to use
- `model` <[string]> Model identifier within provider
- `cacheDir` ?<[string]> Cache folder to use/generate code for performed actions into. Cache is not used if not specified (default).
- `cacheFile` ?<[string]> Cache file to use/generate code for performed actions into. Cache is not used if not specified (default).
- `cacheMode` ?<['force'|'ignore'|'auto']> Cache control, defauls to 'auto'

Agent settings for [`method: Page.perform`] and [`method: Page.extract`].
Expand Down
4 changes: 2 additions & 2 deletions packages/playwright-client/types/types.d.ts
Original file line number Diff line number Diff line change
Expand Up @@ -22097,9 +22097,9 @@ export interface BrowserContextOptions {
model: string;

/**
* Cache folder to use/generate code for performed actions into. Cache is not used if not specified (default).
* Cache file to use/generate code for performed actions into. Cache is not used if not specified (default).
*/
cacheDir?: string;
cacheFile?: string;

/**
* Cache control, defauls to 'auto'
Expand Down
7 changes: 4 additions & 3 deletions packages/playwright-core/src/client/page.ts
Original file line number Diff line number Diff line change
Expand Up @@ -847,11 +847,12 @@ export class Page extends ChannelOwner<channels.PageChannel> implements api.Page
}

async perform(task: string, options: { key?: string, maxTurns?: number } = {}): Promise<void> {
throw new Error('Not implemented in playwright-core');
await this._channel.perform({ task, ...options });
}

extract<Schema extends z.ZodTypeAny>(query: string, schema: Schema, options: { maxTurns?: number } = {}): Promise<z.infer<Schema>> {
throw new Error('Not implemented in playwright-core');
async extract<Schema extends z.ZodTypeAny>(query: string, schema: Schema, options: { maxTurns?: number } = {}): Promise<z.infer<Schema>> {
const { result } = await this._channel.extract({ query, schema: this._platform.zodToJsonSchema(schema), ...options });
return result;
}

async _snapshotForAI(options: TimeoutOptions & { track?: string } = {}): Promise<{ full: string, incremental?: string }> {
Expand Down
5 changes: 5 additions & 0 deletions packages/playwright-core/src/client/platform.ts
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@ export type Platform = {
streamFile: (path: string, writable: Writable) => Promise<void>,
streamReadable: (channel: channels.StreamChannel) => Readable,
streamWritable: (channel: channels.WritableStreamChannel) => Writable,
zodToJsonSchema: (schema: any) => any,
zones: { empty: Zone, current: () => Zone; };
};

Expand Down Expand Up @@ -119,5 +120,9 @@ export const emptyPlatform: Platform = {
throw new Error('Streams are not available');
},

zodToJsonSchema: (schema: any) => {
throw new Error('Zod is not available');
},

zones: { empty: noopZone, current: () => noopZone },
};
24 changes: 19 additions & 5 deletions packages/playwright-core/src/protocol/validator.ts
Original file line number Diff line number Diff line change
Expand Up @@ -605,7 +605,7 @@ scheme.BrowserTypeLaunchPersistentContextParams = tObject({
agent: tOptional(tObject({
provider: tString,
model: tString,
cacheDir: tOptional(tString),
cacheFile: tOptional(tString),
cacheMode: tOptional(tEnum(['ignore', 'force', 'auto'])),
})),
userDataDir: tString,
Expand Down Expand Up @@ -703,7 +703,7 @@ scheme.BrowserNewContextParams = tObject({
agent: tOptional(tObject({
provider: tString,
model: tString,
cacheDir: tOptional(tString),
cacheFile: tOptional(tString),
cacheMode: tOptional(tEnum(['ignore', 'force', 'auto'])),
})),
proxy: tOptional(tObject({
Expand Down Expand Up @@ -780,7 +780,7 @@ scheme.BrowserNewContextForReuseParams = tObject({
agent: tOptional(tObject({
provider: tString,
model: tString,
cacheDir: tOptional(tString),
cacheFile: tOptional(tString),
cacheMode: tOptional(tEnum(['ignore', 'force', 'auto'])),
})),
proxy: tOptional(tObject({
Expand Down Expand Up @@ -902,7 +902,7 @@ scheme.BrowserContextInitializer = tObject({
agent: tOptional(tObject({
provider: tString,
model: tString,
cacheDir: tOptional(tString),
cacheFile: tOptional(tString),
cacheMode: tOptional(tEnum(['ignore', 'force', 'auto'])),
})),
}),
Expand Down Expand Up @@ -1506,6 +1506,20 @@ scheme.PageUpdateSubscriptionParams = tObject({
enabled: tBoolean,
});
scheme.PageUpdateSubscriptionResult = tOptional(tObject({}));
scheme.PagePerformParams = tObject({
task: tString,
key: tOptional(tString),
maxTurns: tOptional(tInt),
});
scheme.PagePerformResult = tOptional(tObject({}));
scheme.PageExtractParams = tObject({
query: tString,
schema: tAny,
maxTurns: tOptional(tInt),
});
scheme.PageExtractResult = tObject({
result: tAny,
});
scheme.FrameInitializer = tObject({
url: tString,
name: tString,
Expand Down Expand Up @@ -2797,7 +2811,7 @@ scheme.AndroidDeviceLaunchBrowserParams = tObject({
agent: tOptional(tObject({
provider: tString,
model: tString,
cacheDir: tOptional(tString),
cacheFile: tOptional(tString),
cacheMode: tOptional(tEnum(['ignore', 'force', 'auto'])),
})),
pkg: tOptional(tString),
Expand Down
6 changes: 6 additions & 0 deletions packages/playwright-core/src/server/agent/DEPS.list
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
[*]
../browserContext.ts
../page.ts
../progress.ts
../../mcpBundle.ts
../../utilsBundle.ts
52 changes: 52 additions & 0 deletions packages/playwright-core/src/server/agent/actionRunner.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
/**
* Copyright (c) Microsoft Corporation.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

import type * as actions from './actions';
import type { Page } from '../page';
import type { Progress } from '../progress';

export async function runAction(progress: Progress, page: Page, action: actions.Action) {
const frame = page.mainFrame();
switch (action.method) {
case 'click':
await frame.click(progress, action.selector, { ...action.options, ...strictTrue });
break;
case 'drag':
await frame.dragAndDrop(progress, action.sourceSelector, action.targetSelector, { ...strictTrue });
break;
case 'hover':
await frame.hover(progress, action.selector, { ...action.options, ...strictTrue });
break;
case 'selectOption':
await frame.selectOption(progress, action.selector, [], action.values.map(a => ({ value: a })), { ...strictTrue });
break;
case 'pressKey':
await page.keyboard.press(progress, action.key);
break;
case 'pressSequentially':
await frame.type(progress, action.selector, action.text, { ...strictTrue });
if (action.submit)
await page.keyboard.press(progress, 'Enter');
break;
case 'fill':
await frame.fill(progress, action.selector, action.text, { ...strictTrue });
if (action.submit)
await page.keyboard.press(progress, 'Enter');
break;
}
}

const strictTrue = { strict: true };
62 changes: 62 additions & 0 deletions packages/playwright-core/src/server/agent/actions.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
/**
* Copyright (c) Microsoft Corporation.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

import type * as channels from '@protocol/channels';

export type ClickAction = {
method: 'click';
selector: string;
options: Pick<channels.FrameClickParams, 'button' | 'clickCount' | 'modifiers'>;
};

export type DragAction = {
method: 'drag';
sourceSelector: string;
targetSelector: string;
};

export type HoverAction = {
method: 'hover';
selector: string;
options: Pick<channels.FrameHoverParams, 'modifiers'>;
};

export type SelectOptionAction = {
method: 'selectOption';
selector: string;
values: string[];
};

export type PressAction = {
method: 'pressKey';
key: string;
};

export type PressSequentiallyAction = {
method: 'pressSequentially';
selector: string;
text: string;
submit?: boolean;
};

export type FillAction = {
method: 'fill';
selector: string;
text: string;
submit?: boolean;
};

export type Action = ClickAction | DragAction | HoverAction | SelectOptionAction | PressAction | PressSequentiallyAction | FillAction;
124 changes: 124 additions & 0 deletions packages/playwright-core/src/server/agent/agent.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,124 @@
/**
* Copyright (c) Microsoft Corporation.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

import fs from 'fs';

import { toolsForLoop } from './backend';
import { debug } from '../../utilsBundle';
import { Loop, z, zodToJsonSchema } from '../../mcpBundle';
import { runAction } from './actionRunner';
import { Context } from './context';

import type { Progress } from '../progress';
import type * as channels from '@protocol/channels';
import type { Page } from '../page';
import type * as loopTypes from '@lowire/loop';
import type * as actions from './actions';

export async function pagePerform(progress: Progress, page: Page, options: channels.PagePerformParams): Promise<void> {
const context = new Context(progress, page);

if (await cachedPerform(context, options))
return;

await perform(context, options.task, zodToJsonSchema(z.object({
error: z.string().optional().describe('An error message if the task could not be completed successfully'),
})) as loopTypes.Schema, options);
await updateCache(context, options);
}

export async function pageExtract(progress: Progress, page: Page, options: channels.PageExtractParams) {
const context = new Context(progress, page);
const task = `
### Instructions
Extract the following information from the page. Do not perform any actions, just extract the information.

### Query
${options.query}`;
return await perform(context, task, options.schema, options);
}

async function perform(context: Context, userTask: string, resultSchema: loopTypes.Schema, options: { maxTurns?: number } = {}): Promise<any> {
const { progress, page } = context;
const browserContext = page.browserContext;
if (!browserContext._options.agent)
throw new Error(`page.perform() and page.extract() require the agent to be set on the browser context`);

const { full } = await page.snapshotForAI(progress);
const { tools, callTool } = toolsForLoop(context);

const loop = new Loop(browserContext._options.agent.provider as any, {
model: browserContext._options.agent.model,
summarize: true,
debug,
callTool,
tools,
...options
});

const task = `${userTask}

### Page snapshot
${full}
`;

return await loop.run(task, {
resultSchema
});
}

type CachedActions = Record<string, actions.Action[]>;

const allCaches = new Map<string, CachedActions>();

async function cachedPerform(context: Context, options: channels.PagePerformParams): Promise<boolean> {
const agentSettings = context.page.browserContext._options.agent;
if (!agentSettings?.cacheFile || agentSettings.cacheMode === 'ignore')
return false;

const cache = await cachedActions(agentSettings.cacheFile);
const cacheKey = options.key ?? options.task;
const actions = cache[cacheKey];
if (!actions) {
if (agentSettings.cacheMode === 'force')
throw new Error(`No cached actions for key "${cacheKey}", but cache mode is set to "force"`);
return false;
}

for (const action of actions)
await runAction(context.progress, context.page, action);
return true;
}

async function updateCache(context: Context, options: channels.PagePerformParams) {
const cacheFile = context.page.browserContext._options.agent?.cacheFile;
if (!cacheFile)
return;
const cache = await cachedActions(cacheFile);
const cacheKey = options.key ?? options.task;
cache[cacheKey] = context.actions;
await fs.promises.writeFile(cacheFile, JSON.stringify(cache, undefined, 2));
}

async function cachedActions(cacheFile: string): Promise<CachedActions> {
let cache = allCaches.get(cacheFile);
if (!cache) {
const text = await fs.promises.readFile(cacheFile, 'utf-8').catch(() => '{}');
cache = JSON.parse(text) as CachedActions;
allCaches.set(cacheFile, cache);
}
return cache;
}
Loading