diff --git a/packages/playwright-client/types/types.d.ts b/packages/playwright-client/types/types.d.ts index 81f9f4bad1f64..1d0a59d40ea96 100644 --- a/packages/playwright-client/types/types.d.ts +++ b/packages/playwright-client/types/types.d.ts @@ -5281,7 +5281,7 @@ export interface PageAgent { * @param schema * @param options */ - extract(query: string, schema: Schema): Promise>; + extract(query: string, schema: Schema): Promise<{ result: ZodInfer, usage: { turns: number, inputTokens: number, outputTokens: number } }>; /** * Emitted when the agent makes a turn. */ diff --git a/packages/playwright-core/src/client/pageAgent.ts b/packages/playwright-core/src/client/pageAgent.ts index 80a2338e9bf4e..7b3998a9e86ba 100644 --- a/packages/playwright-core/src/client/pageAgent.ts +++ b/packages/playwright-core/src/client/pageAgent.ts @@ -51,7 +51,7 @@ export class PageAgent extends ChannelOwner implement return { usage }; } - async extract(query: string, schema: Schema, options: PageAgentOptions = {}): Promise> { + async extract(query: string, schema: Schema, options: PageAgentOptions = {}): Promise<{ result: z.infer, usage: channels.AgentUsage }> { const { result, usage } = await this._channel.extract({ query, schema: this._page._platform.zodToJsonSchema(schema), ...options }); return { result, usage }; } diff --git a/packages/playwright-core/src/server/agent/context.ts b/packages/playwright-core/src/server/agent/context.ts index 8bb10692d92d6..925399826fdfa 100644 --- a/packages/playwright-core/src/server/agent/context.ts +++ b/packages/playwright-core/src/server/agent/context.ts @@ -27,47 +27,53 @@ import type { Language } from '../../utils/isomorphic/locatorGenerators.ts'; import type { ToolDefinition } from './tool'; import type * as channels from '@protocol/channels'; + +type HistoryItem = { + type: 'expect' | 'perform' | 'extract'; + description: string; +}; export class Context { readonly page: Page; readonly actions: actions.ActionWithCode[] = []; readonly sdkLanguage: Language; - readonly progress: Progress; - readonly options: channels.PageAgentParams; - private _callIntent: string | undefined; + readonly agentParams: channels.PageAgentParams; + readonly events: loopTypes.LoopEvents; + private _currentCallIntent: string | undefined; + readonly history: HistoryItem[] = []; - constructor(apiCallProgress: Progress, page: Page, options: channels.PageAgentParams) { - this.progress = apiCallProgress; + constructor(page: Page, agentParms: channels.PageAgentParams, events: loopTypes.LoopEvents) { this.page = page; - this.options = options; + this.agentParams = agentParms; this.sdkLanguage = page.browserContext._browser.sdkLanguage(); + this.events = events; } - async callTool(tool: ToolDefinition, params: any, options: { intent?: string }) { - this._callIntent = options.intent; + async callTool(progress: Progress, tool: ToolDefinition, params: any, options: { intent?: string }) { + this._currentCallIntent = options.intent; try { - return await tool.handle(this, params); + return await tool.handle(progress, this, params); } finally { - this._callIntent = undefined; + this._currentCallIntent = undefined; } } - async runActionAndWait(action: actions.Action) { - return await this.runActionsAndWait([action]); + async runActionAndWait(progress: Progress, action: actions.Action) { + return await this.runActionsAndWait(progress, [action]); } - async runActionsAndWait(action: actions.Action[]) { - const error = await this.waitForCompletion(async () => { + async runActionsAndWait(progress: Progress, action: actions.Action[]) { + const error = await this.waitForCompletion(progress, async () => { for (const a of action) { - await runAction(this.progress, 'generate', this.page, a, this.options?.secrets ?? []); + await runAction(progress, 'generate', this.page, a, this.agentParams?.secrets ?? []); const code = await generateCode(this.sdkLanguage, a); - this.actions.push({ ...a, code, intent: this._callIntent }); + this.actions.push({ ...a, code, intent: this._currentCallIntent }); } return undefined; }).catch((error: Error) => error); - return await this.snapshotResult(error); + return await this.snapshotResult(progress, error); } - async waitForCompletion(callback: () => Promise): Promise { + async waitForCompletion(progress: Progress, callback: () => Promise): Promise { const requests: Request[] = []; const requestListener = (request: Request) => requests.push(request); const disposeListeners = () => { @@ -78,14 +84,14 @@ export class Context { let result: R; try { result = await callback(); - await this.progress.wait(500); + await progress.wait(500); } finally { disposeListeners(); } const requestedNavigation = requests.some(request => request.isNavigationRequest()); if (requestedNavigation) { - await this.page.mainFrame().waitForLoadState(this.progress, 'load'); + await this.page.mainFrame().waitForLoadState(progress, 'load'); return result; } @@ -96,15 +102,15 @@ export class Context { else promises.push(request.response()); } - await this.progress.race(promises, { timeout: 5000 }); + await progress.race(promises, { timeout: 5000 }); if (requests.length) - await this.progress.wait(500); + await progress.wait(500); return result; } - async snapshotResult(error?: Error): Promise { - let { full } = await this.page.snapshotForAI(this.progress); + async snapshotResult(progress: Progress, error?: Error): Promise { + let { full } = await this.page.snapshotForAI(progress); full = this._redactText(full); const text: string[] = []; @@ -130,10 +136,10 @@ export class Context { }; } - async refSelectors(params: { element: string, ref: string }[]): Promise { + async refSelectors(progress: Progress, params: { element: string, ref: string }[]): Promise { return Promise.all(params.map(async param => { try { - const { resolvedSelector } = await this.page.mainFrame().resolveSelector(this.progress, `aria-ref=${param.ref}`); + const { resolvedSelector } = await this.page.mainFrame().resolveSelector(progress, `aria-ref=${param.ref}`); return resolvedSelector; } catch (e) { throw new Error(`Ref ${param.ref} not found in the current page snapshot. Try capturing new snapshot.`); @@ -142,7 +148,7 @@ export class Context { } private _redactText(text: string): string { - const secrets = this.options?.secrets; + const secrets = this.agentParams?.secrets; if (!secrets) return text; diff --git a/packages/playwright-core/src/server/agent/expectTools.ts b/packages/playwright-core/src/server/agent/expectTools.ts index 3df7cd0a3de7e..439ca499e2921 100644 --- a/packages/playwright-core/src/server/agent/expectTools.ts +++ b/packages/playwright-core/src/server/agent/expectTools.ts @@ -33,8 +33,8 @@ const expectVisible = defineTool({ }), }, - handle: async (context, params) => { - return await context.runActionAndWait({ + handle: async (progress, context, params) => { + return await context.runActionAndWait(progress, { method: 'expectVisible', selector: getByRoleSelector(params.role, { name: params.accessibleName }), isNot: params.isNot, @@ -53,8 +53,8 @@ const expectVisibleText = defineTool({ }), }, - handle: async (context, params) => { - return await context.runActionAndWait({ + handle: async (progress, context, params) => { + return await context.runActionAndWait(progress, { method: 'expectVisible', selector: getByTextSelector(params.text), isNot: params.isNot, @@ -76,9 +76,9 @@ const expectValue = defineTool({ }), }, - handle: async (context, params) => { - const [selector] = await context.refSelectors([{ ref: params.ref, element: params.element }]); - return await context.runActionAndWait({ + handle: async (progress, context, params) => { + const [selector] = await context.refSelectors(progress, [{ ref: params.ref, element: params.element }]); + return await context.runActionAndWait(progress, { method: 'expectValue', selector, type: params.type, @@ -102,10 +102,10 @@ const expectList = defineTool({ }), }, - handle: async (context, params) => { + handle: async (progress, context, params) => { const template = `- ${params.listRole}: -${params.items.map(item => ` - ${params.itemRole}: ${yamlEscapeValueIfNeeded(item)}`).join('\n')}`; - return await context.runActionAndWait({ +progress, ${params.items.map(item => ` - ${params.itemRole}: ${yamlEscapeValueIfNeeded(item)}`).join('\n')}`; + return await context.runActionAndWait(progress, { method: 'expectAria', template, }); diff --git a/packages/playwright-core/src/server/agent/pageAgent.ts b/packages/playwright-core/src/server/agent/pageAgent.ts index 878dca1fea8fa..d8fc98a5a94e1 100644 --- a/packages/playwright-core/src/server/agent/pageAgent.ts +++ b/packages/playwright-core/src/server/agent/pageAgent.ts @@ -24,14 +24,20 @@ import { Context } from './context'; import performTools from './performTools'; import expectTools from './expectTools'; -import type * as channels from '@protocol/channels'; import type * as actions from './actions'; import type { ToolDefinition } from './tool'; import type * as loopTypes from '@lowire/loop'; +import type { Progress } from '../progress'; -export async function pageAgentPerform(context: Context, options: loopTypes.LoopEvents & channels.PageAgentPerformParams) { - const cacheKey = (options.cacheKey ?? options.task).trim(); - if (await cachedPerform(context, cacheKey)) +export type CallParams = { + cacheKey?: string; + maxTokens?: number; + maxTurns?: number; +}; + +export async function pageAgentPerform(progress: Progress, context: Context, userTask: string, callParams: CallParams) { + const cacheKey = (callParams.cacheKey ?? userTask).trim(); + if (await cachedPerform(progress, context, cacheKey)) return; const task = ` @@ -40,16 +46,17 @@ export async function pageAgentPerform(context: Context, options: loopTypes.Loop - Your reply should be a tool call that performs action the page". ### Task -${options.task} +${userTask} `; - await runLoop(context, performTools, task, undefined, options); + await runLoop(progress, context, performTools, task, undefined, callParams); + context.history.push({ type: 'perform', description: userTask }); await updateCache(context, cacheKey); } -export async function pageAgentExpect(context: Context, options: loopTypes.LoopEvents & channels.PageAgentExpectParams) { - const cacheKey = (options.cacheKey ?? options.expectation).trim(); - if (await cachedPerform(context, cacheKey)) +export async function pageAgentExpect(progress: Progress, context: Context, expectation: string, callParams: CallParams) { + const cacheKey = (callParams.cacheKey ?? expectation).trim(); + if (await cachedPerform(progress, context, cacheKey)) return; const task = ` @@ -58,52 +65,62 @@ export async function pageAgentExpect(context: Context, options: loopTypes.LoopE - You can call exactly one tool and it can't be report_results, must be one of the assertion tools. ### Expectation -${options.expectation} +${expectation} `; - await runLoop(context, expectTools, task, undefined, options); + await runLoop(progress, context, expectTools, task, undefined, callParams); + context.history.push({ type: 'expect', description: expectation }); await updateCache(context, cacheKey); } -export async function runLoop(context: Context, toolDefinitions: ToolDefinition[], userTask: string, resultSchema: loopTypes.Schema | undefined, options: loopTypes.LoopEvents & { - api?: string, - apiEndpoint?: string, - apiKey?: string, - model?: string, - maxTurns?: number; - maxTokens?: number; -}): Promise<{ +export async function pageAgentExtract(progress: Progress, context: Context, query: string, schema: loopTypes.Schema, callParams: CallParams): Promise { + + const task = ` +### Instructions +Extract the following information from the page. Do not perform any actions, just extract the information. + +### Query +${query}`; + const { result } = await runLoop(progress, context, [], task, schema, callParams); + context.history.push({ type: 'extract', description: query }); + return result; +} + +async function runLoop(progress: Progress, context: Context, toolDefinitions: ToolDefinition[], userTask: string, resultSchema: loopTypes.Schema | undefined, params: CallParams): Promise<{ result: any }> { const { page } = context; + if (!context.agentParams.api || !context.agentParams.apiKey || !context.agentParams.model) + throw new Error(`This action requires the API and API key to be set on the page agent. Are you running with --run-agents=none mode?`); - if (!context.options?.api || !context.options?.apiKey || !context.options?.model) - throw new Error(`This action requires the API and API key to be set on the browser context`); - - const { full } = await page.snapshotForAI(context.progress); - const { tools, callTool } = toolsForLoop(context, toolDefinitions, { resultSchema }); + const { full } = await page.snapshotForAI(progress); + const { tools, callTool, reportedResult } = toolsForLoop(progress, context, toolDefinitions, { resultSchema }); const loop = new Loop({ - api: context.options.api as any, - apiEndpoint: context.options.apiEndpoint, - apiKey: context.options.apiKey, - model: context.options.model, - maxTurns: context.options.maxTurns, - maxTokens: context.options.maxTokens, + api: context.agentParams.api as any, + apiEndpoint: context.agentParams.apiEndpoint, + apiKey: context.agentParams.apiKey, + model: context.agentParams.model, + maxTurns: params.maxTurns ?? context.agentParams.maxTurns, + maxTokens: params.maxTokens ?? context.agentParams.maxTokens, summarize: true, debug, callTool, tools, + ...context.events, }); const task = `${userTask} +### Context history +${context.history.map(h => `- ${h.type}: ${h.description}`).join('\n')} + ### Page snapshot ${full} `; - const { result } = await loop.run(task); - return { result }; + await loop.run(task); + return { result: resultSchema ? reportedResult() : undefined }; } type CachedActions = Record; -async function cachedPerform(context: Context, cacheKey: string): Promise { - if (!context.options?.cacheFile) +async function cachedPerform(progress: Progress, context: Context, cacheKey: string): Promise { + if (!context.agentParams?.cacheFile) return; - const cache = await cachedActions(context.options?.cacheFile); + const cache = await cachedActions(context.agentParams?.cacheFile); const entry = cache.actions[cacheKey]; if (!entry) return; for (const action of entry.actions) - await runAction(context.progress, 'run', context.page, action, context.options.secrets ?? []); + await runAction(progress, 'run', context.page, action, context.agentParams.secrets ?? []); return entry.actions; } async function updateCache(context: Context, cacheKey: string) { - const cacheFile = context.options?.cacheFile; - const cacheOutFile = context.options?.cacheOutFile; + const cacheFile = context.agentParams?.cacheFile; + const cacheOutFile = context.agentParams?.cacheOutFile; const cacheFileKey = cacheFile ?? cacheOutFile; const cache = cacheFileKey ? await cachedActions(cacheFileKey) : { actions: {}, newActions: {} }; diff --git a/packages/playwright-core/src/server/agent/performTools.ts b/packages/playwright-core/src/server/agent/performTools.ts index 8563099edb0cc..c93fa5d3cae72 100644 --- a/packages/playwright-core/src/server/agent/performTools.ts +++ b/packages/playwright-core/src/server/agent/performTools.ts @@ -28,8 +28,8 @@ const snapshot = defineTool({ inputSchema: z.object({}), }, - handle: async (context, params) => { - return await context.snapshotResult(); + handle: async (progress, context, params) => { + return await context.snapshotResult(progress); }, }); @@ -52,9 +52,9 @@ const click = defineTool({ inputSchema: clickSchema, }, - handle: async (context, params) => { - const [selector] = await context.refSelectors([params]); - return await context.runActionAndWait({ + handle: async (progress, context, params) => { + const [selector] = await context.refSelectors(progress, [params]); + return await context.runActionAndWait(progress, { method: 'click', selector, options: { @@ -79,13 +79,13 @@ const drag = defineTool({ }), }, - handle: async (context, params) => { - const [sourceSelector, targetSelector] = await context.refSelectors([ + handle: async (progress, context, params) => { + const [sourceSelector, targetSelector] = await context.refSelectors(progress, [ { ref: params.startRef, element: params.startElement }, { ref: params.endRef, element: params.endElement }, ]); - return await context.runActionAndWait({ + return await context.runActionAndWait(progress, { method: 'drag', sourceSelector, targetSelector @@ -105,9 +105,9 @@ const hover = defineTool({ inputSchema: hoverSchema, }, - handle: async (context, params) => { - const [selector] = await context.refSelectors([params]); - return await context.runActionAndWait({ + handle: async (progress, context, params) => { + const [selector] = await context.refSelectors(progress, [params]); + return await context.runActionAndWait(progress, { method: 'hover', selector, options: { @@ -129,9 +129,9 @@ const selectOption = defineTool({ inputSchema: selectOptionSchema, }, - handle: async (context, params) => { - const [selector] = await context.refSelectors([params]); - return await context.runActionAndWait({ + handle: async (progress, context, params) => { + const [selector] = await context.refSelectors(progress, [params]); + return await context.runActionAndWait(progress, { method: 'selectOption', selector, labels: params.values @@ -149,8 +149,8 @@ const pressKey = defineTool({ }), }, - handle: async (context, params) => { - return await context.runActionAndWait({ + handle: async (progress, context, params) => { + return await context.runActionAndWait(progress, { method: 'pressKey', key: params.key }); @@ -171,17 +171,17 @@ const type = defineTool({ inputSchema: typeSchema, }, - handle: async (context, params) => { - const [selector] = await context.refSelectors([params]); + handle: async (progress, context, params) => { + const [selector] = await context.refSelectors(progress, [params]); if (params.slowly) { - return await context.runActionAndWait({ + return await context.runActionAndWait(progress, { method: 'pressSequentially', selector, text: params.text, submit: params.submit, }); } else { - return await context.runActionAndWait({ + return await context.runActionAndWait(progress, { method: 'fill', selector, text: params.text, @@ -206,10 +206,10 @@ const fillForm = defineTool({ }), }, - handle: async (context, params) => { + handle: async (progress, context, params) => { const actions: actions.Action[] = []; for (const field of params.fields) { - const [selector] = await context.refSelectors([{ ref: field.ref, element: field.name }]); + const [selector] = await context.refSelectors(progress, [{ ref: field.ref, element: field.name }]); if (field.type === 'textbox' || field.type === 'slider') { actions.push({ method: 'fill', @@ -230,7 +230,7 @@ const fillForm = defineTool({ }); } } - return await context.runActionsAndWait(actions); + return await context.runActionsAndWait(progress, actions); }, }); diff --git a/packages/playwright-core/src/server/agent/tool.ts b/packages/playwright-core/src/server/agent/tool.ts index 365cb865badca..95333685bc74b 100644 --- a/packages/playwright-core/src/server/agent/tool.ts +++ b/packages/playwright-core/src/server/agent/tool.ts @@ -19,6 +19,7 @@ import { zodToJsonSchema } from '../../mcpBundle'; import type zod from 'zod'; import type * as loopTypes from '@lowire/loop'; import type { Context } from './context'; +import type { Progress } from '../progress'; export type ToolSchema = Omit & { title: string; @@ -27,14 +28,14 @@ export type ToolSchema = Omit = { schema: ToolSchema; - handle: (context: Context, params: zod.output) => Promise; + handle: (progress: Progress, context: Context, params: zod.output) => Promise; }; export function defineTool(tool: ToolDefinition): ToolDefinition { return tool; } -export function toolsForLoop(context: Context, toolDefinitions: ToolDefinition[], options: { resultSchema?: loopTypes.Schema } = {}): { tools: loopTypes.Tool[], callTool: loopTypes.ToolCallback } { +export function toolsForLoop(progress: Progress, context: Context, toolDefinitions: ToolDefinition[], options: { resultSchema?: loopTypes.Schema } = {}): { tools: loopTypes.Tool[], callTool: loopTypes.ToolCallback, reportedResult: () => any } { const tools = toolDefinitions.map(tool => { const result: loopTypes.Tool = { name: tool.schema.name, @@ -51,11 +52,14 @@ export function toolsForLoop(context: Context, toolDefinitions: ToolDefinition[] }); } + let reportedResult: any; + const callTool: loopTypes.ToolCallback = async params => { const intent = params.arguments._meta?.['dev.lowire/intent']; if (params.name === 'report_result') { + reportedResult = params.arguments; return { - content: [{ type: 'text', text: JSON.stringify(params.arguments) }], + content: [{ type: 'text', text: 'Done' }], isError: false, }; } @@ -71,7 +75,7 @@ export function toolsForLoop(context: Context, toolDefinitions: ToolDefinition[] } try { - return await context.callTool(tool, params.arguments, { intent }); + return await context.callTool(progress, tool, params.arguments, { intent }); } catch (error) { return { content: [{ type: 'text', text: error.message }], @@ -83,5 +87,6 @@ export function toolsForLoop(context: Context, toolDefinitions: ToolDefinition[] return { tools, callTool, + reportedResult: () => reportedResult, }; } diff --git a/packages/playwright-core/src/server/dispatchers/pageAgentDispatcher.ts b/packages/playwright-core/src/server/dispatchers/pageAgentDispatcher.ts index fbf3cfc53d584..caca0f394e952 100644 --- a/packages/playwright-core/src/server/dispatchers/pageAgentDispatcher.ts +++ b/packages/playwright-core/src/server/dispatchers/pageAgentDispatcher.ts @@ -15,7 +15,7 @@ */ import { Dispatcher } from './dispatcher'; -import { pageAgentExpect, pageAgentPerform, runLoop } from '../agent/pageAgent'; +import { pageAgentExpect, pageAgentExtract, pageAgentPerform } from '../agent/pageAgent'; import { SdkObject } from '../instrumentation'; import { Context } from '../agent/context'; @@ -30,53 +30,27 @@ export class PageAgentDispatcher extends Dispatcher { - const resolvedParams = resolveCallOptions(this._agentParams, params); - const context = new Context(progress, this._page, resolvedParams); - - await pageAgentPerform(context, { - ...this._eventSupport(), - ...resolvedParams, - task: params.task, - }); + await pageAgentPerform(progress, this._context, params.task, params); return { usage: this._usage }; } async expect(params: channels.PageAgentExpectParams, progress: Progress): Promise { - const resolvedParams = resolveCallOptions(this._agentParams, params); - const context = new Context(progress, this._page, resolvedParams); - - await pageAgentExpect(context, { - ...this._eventSupport(), - ...resolvedParams, - expectation: params.expectation, - }); + await pageAgentExpect(progress, this._context, params.expectation, params); return { usage: this._usage }; } async extract(params: channels.PageAgentExtractParams, progress: Progress): Promise { - const resolvedParams = resolveCallOptions(this._agentParams, params); - const context = new Context(progress, this._page, resolvedParams); - - const task = ` - ### Instructions - Extract the following information from the page. Do not perform any actions, just extract the information. - - ### Query - ${params.query}`; - const { result } = await runLoop(context, [], task, params.schema, { - ...this._eventSupport(), - ...resolvedParams, - }); + const result = await pageAgentExtract(progress, this._context, params.query, params.schema, params); return { result, usage: this._usage }; } @@ -121,13 +95,6 @@ export class PageAgentDispatcher extends Dispatcher(query: string, schema: Schema): Promise>; + extract(query: string, schema: Schema): Promise<{ result: ZodInfer, usage: { turns: number, inputTokens: number, outputTokens: number } }>; /** * Emitted when the agent makes a turn. */ diff --git a/tests/library/perform-task.spec.ts b/tests/library/perform-task.spec.ts index 2f8b85da512bb..0d1d853a015da 100644 --- a/tests/library/perform-task.spec.ts +++ b/tests/library/perform-task.spec.ts @@ -88,3 +88,17 @@ test('page.perform expect value', async ({ page, agent }) => { - Check that the error message is displayed `); }); + +test('page.perform history', async ({ page, agent }) => { + test.skip(true, 'Skipping because it needs LLM'); + await page.setContent(` + + + + `); + await agent.perform('click the Fox button'); + const { result } = await agent.extract('return the name of the button you pressed', z.object({ + name: z.string(), + })); + expect(result.name).toBe('Fox'); +}); diff --git a/tests/library/perform-task.spec.ts-cache.json b/tests/library/perform-task.spec.ts-cache.json index dd17cb1a3617f..032a36ef31bc7 100644 --- a/tests/library/perform-task.spec.ts-cache.json +++ b/tests/library/perform-task.spec.ts-cache.json @@ -69,5 +69,17 @@ "intent": "I'll help you complete this task. Let me first enter \"bogus\" into the email field." } ] + }, + "click the Fox button": { + "timestamp": 1768072608059, + "actions": [ + { + "method": "click", + "selector": "internal:role=button[name=\"Fox\"i]", + "options": {}, + "code": "await page.getByRole('button', { name: 'Fox' }).click();", + "intent": "I'll click the Fox button for you." + } + ] } } \ No newline at end of file diff --git a/utils/generate_types/overrides.d.ts b/utils/generate_types/overrides.d.ts index f3ef39288c1ea..1f63d817eb620 100644 --- a/utils/generate_types/overrides.d.ts +++ b/utils/generate_types/overrides.d.ts @@ -80,7 +80,7 @@ export interface Page { } export interface PageAgent { - extract(query: string, schema: Schema): Promise>; + extract(query: string, schema: Schema): Promise<{ result: ZodInfer, usage: { turns: number, inputTokens: number, outputTokens: number } }>; } export interface Frame {