navapbc
diff --git a/‎mastra-test-app/package.json‎
Lines changed: 1 addition & 0 deletions b/‎mastra-test-app/package.json‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎mastra-test-app/pnpm-lock.yaml‎
Lines changed: 78 additions & 0 deletions b/‎mastra-test-app/pnpm-lock.yaml‎
Lines changed: 78 additions & 0 deletions
diff --git a/‎mastra-test-app/src/mastra/agents/web-automation-agent.ts‎
Lines changed: 23 additions & 2 deletions b/‎mastra-test-app/src/mastra/agents/web-automation-agent.ts‎
Lines changed: 23 additions & 2 deletions
diff --git a/‎mastra-test-app/src/mastra/scorers/languagePreference/index.ts‎
Lines changed: 91 additions & 0 deletions b/‎mastra-test-app/src/mastra/scorers/languagePreference/index.ts‎
Lines changed: 91 additions & 0 deletions
@@ -31,6 +31,7 @@
     "@ai-sdk/google-vertex": "^2.2.27",
     "@inquirer/prompts": "^7.7.1",
     "@mastra/core": "^0.13.2",
+    "@mastra/evals": "^0.12.0",
     "@mastra/libsql": "^0.13.2",
     "@mastra/loggers": "^0.10.6",
     "@mastra/mcp": "^0.10.11",
 
@@ -1,9 +1,14 @@
-import { postgresStore, pgVector } from '../storage';
+import {
+  createAnswerRelevancyScorer,
+  createToxicityScorer
+} from "@mastra/evals/scorers/llm";
 import { exaMCP, playwrightMCP } from '../mcp';
+import { pgVector, postgresStore } from '../storage';
 
 import { Agent } from '@mastra/core/agent';
 import { Memory } from '@mastra/memory';
 import { anthropic } from '@ai-sdk/anthropic';
+import { createLanguagePreferenceScorer } from "../scorers/languagePreference";
 import { databaseTools } from '../tools/database-tools';
 import { google } from '@ai-sdk/google';
 import { openai } from '@ai-sdk/openai';
@@ -76,7 +81,7 @@ export const webAutomationAgent = new Agent({
     **Web Navigation:**
     - Navigate to websites and analyze page structure
     - If participant has a preferred language, immediately look for and change the website language
-    - Common language selectors: language dropdowns, flag icons, "EN" buttons, or language preference settings
+    - Common language selectors: "Select Language" dropdowns, flag icons, buttons that say "EN" or "SP", or language preference settings
     - Identify and interact with elements (buttons, forms, links, dropdowns)
 
     When performing actions:
@@ -150,6 +155,22 @@ export const webAutomationAgent = new Agent({
     )
   },
   memory: memory,
+  scorers: {
+    relevancy: {
+      scorer: createAnswerRelevancyScorer({ model: google("gemini-2.5-pro") }),
+      sampling: { type: "ratio", rate: 0.5 }
+    },
+    safety: {
+      scorer: createToxicityScorer({ model: google("gemini-2.5-pro") }),
+      sampling: { type: "ratio", rate: 1 }
+    },
+    languagePreference: {
+      scorer: createLanguagePreferenceScorer({
+        model: google("gemini-2.5-pro"),
+      }),
+      sampling: { rate: 1, type: "ratio" },
+    },
+  },
   defaultStreamOptions: {
     maxSteps: 50,
     maxRetries: 3,
 
@@ -0,0 +1,91 @@
+import { LANGUAGE_PREFERENCE_PROMPT, createPreprocessPrompt, createAnalysisPrompt, createReasonPrompt } from './prompt';
+
+import { LanguageModel } from '@mastra/core';
+import { createScorer } from '@mastra/core/scores';
+import { z } from 'zod';
+
+export function createLanguagePreferenceScorer({
+    model,
+}: {
+    model: LanguageModel;
+}) {
+  return createScorer({
+    name: 'Language Preference Compliance',
+    description: 'Evaluates if the web automation agent changes website language to match participant language preferences',
+    judge: {
+      model,
+      instructions: LANGUAGE_PREFERENCE_PROMPT
+    }
+  })
+  .preprocess({
+    description: 'Extract language preferences and actions from the conversation',
+    outputSchema: z.object({
+      participantLanguage: z.string().nullable(),
+      languageChangeActions: z.array(z.string()),
+      websiteLanguageSet: z.boolean(),
+      targetLanguage: z.string().nullable()
+    }),
+    createPrompt: ({ run }) => {
+      // For web automation agent, the output contains the agent's actions and reasoning
+      const agentOutput = Array.isArray(run.output) ? 
+        run.output.map(msg => msg.content).join('\n') : 
+        run.output?.text || run.output || '';
+      
+      const userInput = Array.isArray(run.input) ? 
+        run.input.map(msg => msg.content).join('\n') : 
+        run.input?.text || run.input || '';
+
+      return createPreprocessPrompt({ userInput, agentOutput });
+    },
+  })
+  .analyze({
+    description: 'Evaluate language preference compliance',
+    outputSchema: z.object({
+      compliance: z.enum(['excellent', 'good', 'partial', 'poor', 'no_preference']),
+      languageMatch: z.boolean(),
+      actionsTaken: z.boolean(),
+      confidence: z.number().min(0).max(1),
+    }),
+    createPrompt: ({ run, results }) => {
+      const { participantLanguage, languageChangeActions, websiteLanguageSet, targetLanguage } = results.preprocessStepResult;
+      
+      return createAnalysisPrompt({
+        participantLanguage,
+        languageChangeActions,
+        websiteLanguageSet,
+        targetLanguage
+      });
+    },
+  })
+  .generateScore(({ results }) => {
+    const { compliance, confidence } = results.analyzeStepResult;
+    
+    // Convert compliance level to numerical score
+    const complianceScores = {
+      'excellent': 1.0,
+      'good': 0.8,
+      'partial': 0.5,
+      'poor': 0.2,
+      'no_preference': 1.0 // No penalty if no preference was specified
+    };
+    
+    const baseScore = complianceScores[compliance] || 0;
+    return baseScore * confidence;
+  })
+  .generateReason({
+    description: 'Generate a reason for the language preference compliance score',
+    createPrompt: ({ results, score }) => {
+      const { compliance, languageMatch, actionsTaken } = results.analyzeStepResult;
+      const { participantLanguage, targetLanguage } = results.preprocessStepResult;
+      
+      return createReasonPrompt({
+        score,
+        compliance,
+        languageMatch,
+        actionsTaken,
+        participantLanguage,
+        targetLanguage
+      });
+    },
+  });
+}