Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion packages/playwright-client/types/types.d.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5281,7 +5281,7 @@ export interface PageAgent {
* @param schema
* @param options
*/
extract<Schema extends ZodTypeAny>(query: string, schema: Schema): Promise<ZodInfer<Schema>>;
extract<Schema extends ZodTypeAny>(query: string, schema: Schema): Promise<{ result: ZodInfer<Schema>, usage: { turns: number, inputTokens: number, outputTokens: number } }>;
/**
* Emitted when the agent makes a turn.
*/
Expand Down
2 changes: 1 addition & 1 deletion packages/playwright-core/src/client/pageAgent.ts
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ export class PageAgent extends ChannelOwner<channels.PageAgentChannel> implement
return { usage };
}

async extract<Schema extends z.ZodTypeAny>(query: string, schema: Schema, options: PageAgentOptions = {}): Promise<z.infer<Schema>> {
async extract<Schema extends z.ZodTypeAny>(query: string, schema: Schema, options: PageAgentOptions = {}): Promise<{ result: z.infer<Schema>, usage: channels.AgentUsage }> {
const { result, usage } = await this._channel.extract({ query, schema: this._page._platform.zodToJsonSchema(schema), ...options });
return { result, usage };
}
Expand Down
60 changes: 33 additions & 27 deletions packages/playwright-core/src/server/agent/context.ts
Original file line number Diff line number Diff line change
Expand Up @@ -27,47 +27,53 @@ import type { Language } from '../../utils/isomorphic/locatorGenerators.ts';
import type { ToolDefinition } from './tool';
import type * as channels from '@protocol/channels';


type HistoryItem = {
type: 'expect' | 'perform' | 'extract';
description: string;
};
export class Context {
readonly page: Page;
readonly actions: actions.ActionWithCode[] = [];
readonly sdkLanguage: Language;
readonly progress: Progress;
readonly options: channels.PageAgentParams;
private _callIntent: string | undefined;
readonly agentParams: channels.PageAgentParams;
readonly events: loopTypes.LoopEvents;
private _currentCallIntent: string | undefined;
readonly history: HistoryItem[] = [];

constructor(apiCallProgress: Progress, page: Page, options: channels.PageAgentParams) {
this.progress = apiCallProgress;
constructor(page: Page, agentParms: channels.PageAgentParams, events: loopTypes.LoopEvents) {
this.page = page;
this.options = options;
this.agentParams = agentParms;
this.sdkLanguage = page.browserContext._browser.sdkLanguage();
this.events = events;
}

async callTool(tool: ToolDefinition, params: any, options: { intent?: string }) {
this._callIntent = options.intent;
async callTool(progress: Progress, tool: ToolDefinition, params: any, options: { intent?: string }) {
this._currentCallIntent = options.intent;
try {
return await tool.handle(this, params);
return await tool.handle(progress, this, params);
} finally {
this._callIntent = undefined;
this._currentCallIntent = undefined;
}
}

async runActionAndWait(action: actions.Action) {
return await this.runActionsAndWait([action]);
async runActionAndWait(progress: Progress, action: actions.Action) {
return await this.runActionsAndWait(progress, [action]);
}

async runActionsAndWait(action: actions.Action[]) {
const error = await this.waitForCompletion(async () => {
async runActionsAndWait(progress: Progress, action: actions.Action[]) {
const error = await this.waitForCompletion(progress, async () => {
for (const a of action) {
await runAction(this.progress, 'generate', this.page, a, this.options?.secrets ?? []);
await runAction(progress, 'generate', this.page, a, this.agentParams?.secrets ?? []);
const code = await generateCode(this.sdkLanguage, a);
this.actions.push({ ...a, code, intent: this._callIntent });
this.actions.push({ ...a, code, intent: this._currentCallIntent });
}
return undefined;
}).catch((error: Error) => error);
return await this.snapshotResult(error);
return await this.snapshotResult(progress, error);
}

async waitForCompletion<R>(callback: () => Promise<R>): Promise<R> {
async waitForCompletion<R>(progress: Progress, callback: () => Promise<R>): Promise<R> {
const requests: Request[] = [];
const requestListener = (request: Request) => requests.push(request);
const disposeListeners = () => {
Expand All @@ -78,14 +84,14 @@ export class Context {
let result: R;
try {
result = await callback();
await this.progress.wait(500);
await progress.wait(500);
} finally {
disposeListeners();
}

const requestedNavigation = requests.some(request => request.isNavigationRequest());
if (requestedNavigation) {
await this.page.mainFrame().waitForLoadState(this.progress, 'load');
await this.page.mainFrame().waitForLoadState(progress, 'load');
return result;
}

Expand All @@ -96,15 +102,15 @@ export class Context {
else
promises.push(request.response());
}
await this.progress.race(promises, { timeout: 5000 });
await progress.race(promises, { timeout: 5000 });
if (requests.length)
await this.progress.wait(500);
await progress.wait(500);

return result;
}

async snapshotResult(error?: Error): Promise<loopTypes.ToolResult> {
let { full } = await this.page.snapshotForAI(this.progress);
async snapshotResult(progress: Progress, error?: Error): Promise<loopTypes.ToolResult> {
let { full } = await this.page.snapshotForAI(progress);
full = this._redactText(full);

const text: string[] = [];
Expand All @@ -130,10 +136,10 @@ export class Context {
};
}

async refSelectors(params: { element: string, ref: string }[]): Promise<string[]> {
async refSelectors(progress: Progress, params: { element: string, ref: string }[]): Promise<string[]> {
return Promise.all(params.map(async param => {
try {
const { resolvedSelector } = await this.page.mainFrame().resolveSelector(this.progress, `aria-ref=${param.ref}`);
const { resolvedSelector } = await this.page.mainFrame().resolveSelector(progress, `aria-ref=${param.ref}`);
return resolvedSelector;
} catch (e) {
throw new Error(`Ref ${param.ref} not found in the current page snapshot. Try capturing new snapshot.`);
Expand All @@ -142,7 +148,7 @@ export class Context {
}

private _redactText(text: string): string {
const secrets = this.options?.secrets;
const secrets = this.agentParams?.secrets;
if (!secrets)
return text;

Expand Down
20 changes: 10 additions & 10 deletions packages/playwright-core/src/server/agent/expectTools.ts
Original file line number Diff line number Diff line change
Expand Up @@ -33,8 +33,8 @@ const expectVisible = defineTool({
}),
},

handle: async (context, params) => {
return await context.runActionAndWait({
handle: async (progress, context, params) => {
return await context.runActionAndWait(progress, {
method: 'expectVisible',
selector: getByRoleSelector(params.role, { name: params.accessibleName }),
isNot: params.isNot,
Expand All @@ -53,8 +53,8 @@ const expectVisibleText = defineTool({
}),
},

handle: async (context, params) => {
return await context.runActionAndWait({
handle: async (progress, context, params) => {
return await context.runActionAndWait(progress, {
method: 'expectVisible',
selector: getByTextSelector(params.text),
isNot: params.isNot,
Expand All @@ -76,9 +76,9 @@ const expectValue = defineTool({
}),
},

handle: async (context, params) => {
const [selector] = await context.refSelectors([{ ref: params.ref, element: params.element }]);
return await context.runActionAndWait({
handle: async (progress, context, params) => {
const [selector] = await context.refSelectors(progress, [{ ref: params.ref, element: params.element }]);
return await context.runActionAndWait(progress, {
method: 'expectValue',
selector,
type: params.type,
Expand All @@ -102,10 +102,10 @@ const expectList = defineTool({
}),
},

handle: async (context, params) => {
handle: async (progress, context, params) => {
const template = `- ${params.listRole}:
${params.items.map(item => ` - ${params.itemRole}: ${yamlEscapeValueIfNeeded(item)}`).join('\n')}`;
return await context.runActionAndWait({
progress, ${params.items.map(item => ` - ${params.itemRole}: ${yamlEscapeValueIfNeeded(item)}`).join('\n')}`;
return await context.runActionAndWait(progress, {
method: 'expectAria',
template,
});
Expand Down
93 changes: 55 additions & 38 deletions packages/playwright-core/src/server/agent/pageAgent.ts
Original file line number Diff line number Diff line change
Expand Up @@ -24,14 +24,20 @@ import { Context } from './context';
import performTools from './performTools';
import expectTools from './expectTools';

import type * as channels from '@protocol/channels';
import type * as actions from './actions';
import type { ToolDefinition } from './tool';
import type * as loopTypes from '@lowire/loop';
import type { Progress } from '../progress';

export async function pageAgentPerform(context: Context, options: loopTypes.LoopEvents & channels.PageAgentPerformParams) {
const cacheKey = (options.cacheKey ?? options.task).trim();
if (await cachedPerform(context, cacheKey))
export type CallParams = {
cacheKey?: string;
maxTokens?: number;
maxTurns?: number;
};

export async function pageAgentPerform(progress: Progress, context: Context, userTask: string, callParams: CallParams) {
const cacheKey = (callParams.cacheKey ?? userTask).trim();
if (await cachedPerform(progress, context, cacheKey))
return;

const task = `
Expand All @@ -40,16 +46,17 @@ export async function pageAgentPerform(context: Context, options: loopTypes.Loop
- Your reply should be a tool call that performs action the page".

### Task
${options.task}
${userTask}
`;

await runLoop(context, performTools, task, undefined, options);
await runLoop(progress, context, performTools, task, undefined, callParams);
context.history.push({ type: 'perform', description: userTask });
await updateCache(context, cacheKey);
}

export async function pageAgentExpect(context: Context, options: loopTypes.LoopEvents & channels.PageAgentExpectParams) {
const cacheKey = (options.cacheKey ?? options.expectation).trim();
if (await cachedPerform(context, cacheKey))
export async function pageAgentExpect(progress: Progress, context: Context, expectation: string, callParams: CallParams) {
const cacheKey = (callParams.cacheKey ?? expectation).trim();
if (await cachedPerform(progress, context, cacheKey))
return;

const task = `
Expand All @@ -58,76 +65,86 @@ export async function pageAgentExpect(context: Context, options: loopTypes.LoopE
- You can call exactly one tool and it can't be report_results, must be one of the assertion tools.

### Expectation
${options.expectation}
${expectation}
`;

await runLoop(context, expectTools, task, undefined, options);
await runLoop(progress, context, expectTools, task, undefined, callParams);
context.history.push({ type: 'expect', description: expectation });
await updateCache(context, cacheKey);
}

export async function runLoop(context: Context, toolDefinitions: ToolDefinition[], userTask: string, resultSchema: loopTypes.Schema | undefined, options: loopTypes.LoopEvents & {
api?: string,
apiEndpoint?: string,
apiKey?: string,
model?: string,
maxTurns?: number;
maxTokens?: number;
}): Promise<{
export async function pageAgentExtract(progress: Progress, context: Context, query: string, schema: loopTypes.Schema, callParams: CallParams): Promise<any> {

const task = `
### Instructions
Extract the following information from the page. Do not perform any actions, just extract the information.

### Query
${query}`;
const { result } = await runLoop(progress, context, [], task, schema, callParams);
context.history.push({ type: 'extract', description: query });
return result;
}

async function runLoop(progress: Progress, context: Context, toolDefinitions: ToolDefinition[], userTask: string, resultSchema: loopTypes.Schema | undefined, params: CallParams): Promise<{
result: any
}> {
const { page } = context;
if (!context.agentParams.api || !context.agentParams.apiKey || !context.agentParams.model)
throw new Error(`This action requires the API and API key to be set on the page agent. Are you running with --run-agents=none mode?`);

if (!context.options?.api || !context.options?.apiKey || !context.options?.model)
throw new Error(`This action requires the API and API key to be set on the browser context`);

const { full } = await page.snapshotForAI(context.progress);
const { tools, callTool } = toolsForLoop(context, toolDefinitions, { resultSchema });
const { full } = await page.snapshotForAI(progress);
const { tools, callTool, reportedResult } = toolsForLoop(progress, context, toolDefinitions, { resultSchema });

const loop = new Loop({
api: context.options.api as any,
apiEndpoint: context.options.apiEndpoint,
apiKey: context.options.apiKey,
model: context.options.model,
maxTurns: context.options.maxTurns,
maxTokens: context.options.maxTokens,
api: context.agentParams.api as any,
apiEndpoint: context.agentParams.apiEndpoint,
apiKey: context.agentParams.apiKey,
model: context.agentParams.model,
maxTurns: params.maxTurns ?? context.agentParams.maxTurns,
maxTokens: params.maxTokens ?? context.agentParams.maxTokens,
summarize: true,
debug,
callTool,
tools,
...context.events,
});

const task = `${userTask}

### Context history
${context.history.map(h => `- ${h.type}: ${h.description}`).join('\n')}

### Page snapshot
${full}
`;

const { result } = await loop.run(task);
return { result };
await loop.run(task);
return { result: resultSchema ? reportedResult() : undefined };
}

type CachedActions = Record<string, {
timestamp: number,
actions: actions.ActionWithCode[],
}>;

async function cachedPerform(context: Context, cacheKey: string): Promise<actions.ActionWithCode[] | undefined> {
if (!context.options?.cacheFile)
async function cachedPerform(progress: Progress, context: Context, cacheKey: string): Promise<actions.ActionWithCode[] | undefined> {
if (!context.agentParams?.cacheFile)
return;

const cache = await cachedActions(context.options?.cacheFile);
const cache = await cachedActions(context.agentParams?.cacheFile);
const entry = cache.actions[cacheKey];
if (!entry)
return;

for (const action of entry.actions)
await runAction(context.progress, 'run', context.page, action, context.options.secrets ?? []);
await runAction(progress, 'run', context.page, action, context.agentParams.secrets ?? []);
return entry.actions;
}

async function updateCache(context: Context, cacheKey: string) {
const cacheFile = context.options?.cacheFile;
const cacheOutFile = context.options?.cacheOutFile;
const cacheFile = context.agentParams?.cacheFile;
const cacheOutFile = context.agentParams?.cacheOutFile;
const cacheFileKey = cacheFile ?? cacheOutFile;

const cache = cacheFileKey ? await cachedActions(cacheFileKey) : { actions: {}, newActions: {} };
Expand Down
Loading
Loading