Skip to content

Commit

Permalink
Schema discovery: Add page summaries to help ground the candidate use…
Browse files Browse the repository at this point in the history
…r action responses (#659)
  • Loading branch information
hillary-mutisya authored Feb 3, 2025
1 parent 3588ffd commit f4c49f0
Show file tree
Hide file tree
Showing 6 changed files with 214 additions and 39 deletions.
39 changes: 38 additions & 1 deletion ts/packages/agents/browser/src/agent/discovery/actionHandler.mts
Original file line number Diff line number Diff line change
Expand Up @@ -24,16 +24,36 @@ export async function handleSchemaDiscoveryAction(
case "findUserActions":
await handleFindUserActions(action);
break;
case "summarizePage":
await handleGetPageSummary(action);
break;
}

async function handleFindUserActions(action: any) {
const htmlFragments = await browser.getHtmlFragments();
// const screenshot = await browser.getCurrentPageScreenshot();
const screenshot = "";
let pageSummary = "";

const summaryResponse = await agent.getPageSummary(
undefined,
htmlFragments,
screenshot,
);

if (summaryResponse.success) {
pageSummary =
"Page summary: \n" + JSON.stringify(summaryResponse.data, null, 2);
}

const timerName = `Analyzing page actions`;
console.time(timerName);

const response = await agent.getCandidateUserActions(
undefined,
htmlFragments,
undefined,
screenshot,
pageSummary,
);

if (!response.success) {
Expand All @@ -48,5 +68,22 @@ export async function handleSchemaDiscoveryAction(
return response.data;
}

async function handleGetPageSummary(action: any) {
const htmlFragments = await browser.getHtmlFragments();
const timerName = `Summarizing page`;
console.time(timerName);
const response = await agent.getPageSummary(undefined, htmlFragments);

if (!response.success) {
console.error("Attempt to get page summary failed");
console.error(response.message);
return;
}

console.timeEnd(timerName);
message = "Page summary: \n" + JSON.stringify(response.data, null, 2);
return response.data;
}

return message;
}
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,9 @@ export type FindUserActions = {

export type SummarizePage = {
actionName: "summarizePage";
parameters: {
allowDuplicates?: boolean;
};
};

export type SaveUserActions = {
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
// Copyright (c) Microsoft Corporation.
// Licensed under the MIT License.

// A description of the page, including layout information and summary of content.
export type PageDescription = {
description: string;
features: string[];
entities: string[];
possibleUserAction: string[];
};
90 changes: 53 additions & 37 deletions ts/packages/agents/browser/src/agent/discovery/schema/pageTypes.mts
Original file line number Diff line number Diff line change
@@ -1,55 +1,71 @@
// Copyright (c) Microsoft Corporation.
// Licensed under the MIT License.

export type SearchBox = {
featureName: "searchInputBox";
description: "Input box for searching on the page";
parameters: {
cssSelector: string;
};
};

export type SearchResultsList = {
featureName: "searchResultsList";
description: "List of products available from the search results";
parameters: {
cssSelector: string;
};
};

export type ProductDetailsCard = {
featureName: "productDetailsCard";
description: "A section that shows the product name, price, images and rating. This also gives an option to add the product to the shopping cart.";
parameters: {
cssSelector: string;
};
};

export type SearchForContent = {
actionName: "searchForProduct";
description: "Find content on the page";
parameters: {
value: string;
cssSelector: string;
};
};

export type LandingPage = {
description: "The default landing page for the site";
features: SearchBox;
};

export type SearchResultsPage = {
description: "The search results page";
features: SearchResultsList;
};

export type ProductDetailsPage = {
description: "A product details page, with focus on one product.";
features: ProductDetailsCard;
};

export type ShoppingCartPage = {
description: "The shopping cart page for the site";
features: SearchBox;
};

export type PastOrderPage = {
description: "The page showing a user's past orders";
};

export type UnknownPage = {
description: "A page that does not meet the previous more-specific categories";
};

export type CommercePageTypes =
| LandingPage
| SearchResultsPage
| ProductDetailsPage
| ShoppingCartPage
| PastOrderPage
| UnknownPage;

export type CrosswordPage = {
description: "The page showing a crossword puzzle";
};

export type NewsLandingPage = {
description: "The page showing news headlines for the day";
};

export type SportsLandingPage = {
description: "The page showing sports headlines for the day";
};

export type OpinionPage = {
description: "The page showing editorial opinions for the day";
};

export type ArticlePage = {
description: "The page showing an individual news article";
};

export type WeatherPage = {
description: "The page showing weather headlines";
};

export type PuzzlesPage = {
description: "The page showing a list of puzzles, such as sudoku, crossword, word matching games and more.";
};

export type NewsPageTypes =
| CrosswordPage
| NewsLandingPage
| SportsLandingPage
| OpinionPage
| ArticlePage
| PuzzlesPage
| UnknownPage;
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ export type SearchForProductAction = {
};
};

// This allows users to select individual results on the search results page.
export type SelectSearchResult = {
actionName: "selectSearchResult";
parameters: {
Expand All @@ -38,39 +39,63 @@ export type SelectSearchResult = {

export type NavigateToHomePage = {
actionName: "navigateToHomePage";
parameters: {
linkCssSelector: string;
};
};

// Follow a link to view a store landing page
export type NavigateToStorePage = {
actionName: "navigateToStorePage";
parameters: {
linkCssSelector: string;
};
};

// Follow a link to view a product details page
export type NavigateToProductPage = {
actionName: "navigateToProductPage";
parameters: {
linkCssSelector: string;
};
};

// Follow a link to view a recipe details page
// Follow a link to view a recipe details page. This link is typically named "Recipe" or "Recipes"
export type NavigateToRecipePage = {
actionName: "navigateToRecipePage";
parameters: {
linkCssSelector: string;
};
};

export type NavigateToListPage = {
actionName: "navigateToListPage";
parameters: {
linkCssSelector: string;
};
};

// Navigate to the "Buy it again" page. This page may also be called Past Orders.
export type NavigateToBuyItAgainPage = {
actionName: "navigateToBuyItAgainPage";
parameters: {
linkCssSelector: string;
};
};

// This link opens the shopping cart. Its usually indicated by a cart or bag icon.
export type NavigateToShoppingCartPage = {
actionName: "navigateToShoppingCartPage";
parameters: {
linkCssSelector: string;
};
};

export type NavigateToOtherPage = {
actionName: "navigateToOtherPage";
parameters: {
pageType: string;
linkCssSelector: string;
};
};

Expand Down
84 changes: 84 additions & 0 deletions ts/packages/agents/browser/src/agent/discovery/translator.mts
Original file line number Diff line number Diff line change
Expand Up @@ -250,6 +250,7 @@ export class SchemaDiscoveryAgent<T extends object> {
userRequest?: string,
fragments?: HtmlFragments[],
screenshot?: string,
pageSummary?: string,
) {
// prompt - present html, optional screenshot and list of candidate actions
const bootstrapTranslator = this.getBootstrapTranslator(
Expand All @@ -273,6 +274,19 @@ export class SchemaDiscoveryAgent<T extends object> {
`,
});
}
if (pageSummary) {
requestSection.push({
type: "text",
text: `
Here is a previously-generated summary of the page
'''
${pageSummary}
'''
`,
});
}

const promptSections = [
...prefixSection,
...screenshotSection,
Expand Down Expand Up @@ -303,4 +317,74 @@ export class SchemaDiscoveryAgent<T extends object> {
]);
return response;
}

async getPageSummary(
userRequest?: string,
fragments?: HtmlFragments[],
screenshot?: string,
) {
const packageRoot = path.join("..", "..", "..");
const resultsSchema = await fs.promises.readFile(
fileURLToPath(
new URL(
path.join(
packageRoot,
"./src/agent/discovery/schema/pageSummary.mts",
),
import.meta.url,
),
),
"utf8",
);

const bootstrapTranslator = this.getBootstrapTranslator(
"PageDescription",
resultsSchema,
);

const screenshotSection = getScreenshotPromptSection(screenshot, fragments);
const htmlSection = getHtmlPromptSection(fragments);
const prefixSection = getBootstrapPrefixPromptSection();
let requestSection = [];
if (userRequest) {
requestSection.push({
type: "text",
text: `
Here is user request
'''
${userRequest}
'''
`,
});
}
const promptSections = [
...prefixSection,
...screenshotSection,
...htmlSection,
{
type: "text",
text: `
Examine the layout information provided and determine the content of the page and the actions users can take on it.
Once you have this list, a SINGLE "PageDescription" response using the typescript schema below.
'''
${bootstrapTranslator.validator.getSchemaText()}
'''
`,
},
...requestSection,
{
type: "text",
text: `
The following is the COMPLETE JSON response object with 2 spaces of indentation and no properties with the value undefined:
`,
},
];

const response = await bootstrapTranslator.translate("", [
{ role: "user", content: JSON.stringify(promptSections) },
]);
return response;
}
}

0 comments on commit f4c49f0

Please sign in to comment.