Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion packages/core/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,6 @@
"dependencies": {
"@anthropic-ai/sdk": "0.33.1",
"@azure/identity": "4.5.0",
"@langchain/core": "0.3.26",
"@midscene/recorder": "workspace:*",
"@midscene/shared": "workspace:*",
"@ui-tars/action-parser": "1.2.3",
Expand Down
6 changes: 3 additions & 3 deletions packages/core/src/ai-model/inspect.ts
Original file line number Diff line number Diff line change
Expand Up @@ -145,7 +145,7 @@ export async function AiLocateElement<
targetElementDescription,
'cannot find the target element description',
);
const userInstructionPrompt = await findElementPrompt.format({
const userInstructionPrompt = findElementPrompt({
pageDescription: description,
targetElementDescription: extraTextFromUserPrompt(targetElementDescription),
});
Expand Down Expand Up @@ -307,7 +307,7 @@ export async function AiLocateSection(options: {
const { screenshotBase64 } = context;

const systemPrompt = systemPromptToLocateSection(vlMode);
const sectionLocatorInstructionText = await sectionLocatorInstruction.format({
const sectionLocatorInstructionText = sectionLocatorInstruction({
sectionDescription: extraTextFromUserPrompt(sectionDescription),
});
const msgs: AIArgs = [
Expand Down Expand Up @@ -433,7 +433,7 @@ export async function AiExtractElementInfo<
vlMode,
});

const extractDataPromptText = await extractDataQueryPrompt(
const extractDataPromptText = extractDataQueryPrompt(
description,
dataQuery,
);
Expand Down
20 changes: 6 additions & 14 deletions packages/core/src/ai-model/prompt/extraction.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
import { PromptTemplate } from '@langchain/core/prompts';
import type { ResponseFormatJSONSchema } from 'openai/resources/index';

export function systemPromptToExtract() {
Expand Down Expand Up @@ -87,7 +86,7 @@ By viewing the screenshot and page contents, you can extract the following data:
`;
}

export const extractDataQueryPrompt = async (
export const extractDataQueryPrompt = (
pageDescription: string,
dataQuery: string | Record<string, string>,
) => {
Expand All @@ -97,23 +96,16 @@ export const extractDataQueryPrompt = async (
} else {
dataQueryText = JSON.stringify(dataQuery, null, 2);
}
const extractDataPrompt = new PromptTemplate({
template: `

return `
<PageDescription>
{pageDescription}
${pageDescription}
</PageDescription>

<DATA_DEMAND>
{dataQuery}
${dataQueryText}
</DATA_DEMAND>
`,
inputVariables: ['pageDescription', 'dataQuery'],
});

return await extractDataPrompt.format({
pageDescription,
dataQuery: dataQueryText,
});
`;
};

export const extractDataSchema: ResponseFormatJSONSchema = {
Expand Down
18 changes: 10 additions & 8 deletions packages/core/src/ai-model/prompt/llm-locator.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
import { PromptTemplate } from '@langchain/core/prompts';
import type { TVlModeTypes } from '@midscene/shared/env';
import type { ResponseFormatJSONSchema } from 'openai/resources/index';
import { bboxDescription } from './common';
Expand Down Expand Up @@ -254,14 +253,17 @@ export const locatorSchema: ResponseFormatJSONSchema = {
},
};

export const findElementPrompt = new PromptTemplate({
template: `
export const findElementPrompt = ({
pageDescription,
targetElementDescription,
}: {
pageDescription: string;
targetElementDescription: string;
}) => `
Here is the item user want to find:
=====================================
{targetElementDescription}
${targetElementDescription}
=====================================

{pageDescription}
`,
inputVariables: ['pageDescription', 'targetElementDescription'],
});
${pageDescription}
`;
14 changes: 7 additions & 7 deletions packages/core/src/ai-model/prompt/llm-section-locator.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
import { PromptTemplate } from '@langchain/core/prompts';
import type { TVlModeTypes } from '@midscene/shared/env';
import { bboxDescription } from './common';

Expand Down Expand Up @@ -35,11 +34,12 @@ the return value should be like this:
`;
}

export const sectionLocatorInstruction = new PromptTemplate({
template: `Here is the target element user interested in:
export const sectionLocatorInstruction = ({
sectionDescription,
}: {
sectionDescription: string;
}) => `Here is the target element user interested in:
<targetDescription>
{sectionDescription}
${sectionDescription}
</targetDescription>
`,
inputVariables: ['sectionDescription'],
});
`;
8 changes: 4 additions & 4 deletions packages/core/tests/unit-test/prompt/prompt.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -189,16 +189,16 @@ describe('extract element', () => {
expect(prompt).toMatchSnapshot();
});

it('extract element by extractDataPrompt', async () => {
const prompt = await extractDataQueryPrompt(
it('extract element by extractDataPrompt', () => {
const prompt = extractDataQueryPrompt(
'todo title, string',
'todo title, string',
);
expect(prompt).toMatchSnapshot();
});

it('extract element by extractDataPrompt - object', async () => {
const prompt = await extractDataQueryPrompt('todo title, string', {
it('extract element by extractDataPrompt - object', () => {
const prompt = extractDataQueryPrompt('todo title, string', {
foo: 'an array indicates the foo',
});
expect(prompt).toMatchSnapshot();
Expand Down
76 changes: 0 additions & 76 deletions pnpm-lock.yaml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.