From cb6d6307a7cb7b11a3855211234b85f3c0f3937c Mon Sep 17 00:00:00 2001
From: Emily Klassen <emily.klassen@tenthousandcoffees.com>
Date: Tue, 16 Sep 2025 20:50:58 -0700
Subject: [PATCH 1/2] refactor(js/vertexai): improve evaluation typings,
 deduplicate types

---
 .../vertexai/src/evaluation/evaluation.ts     | 62 ++++++-------------
 .../src/evaluation/evaluator_factory.ts       | 16 +++--
 js/plugins/vertexai/src/evaluation/types.ts   | 42 +++++++++++--
 3 files changed, 67 insertions(+), 53 deletions(-)

diff --git a/js/plugins/vertexai/src/evaluation/evaluation.ts b/js/plugins/vertexai/src/evaluation/evaluation.ts
index 37d144e64c..65b0b5d7c0 100644
--- a/js/plugins/vertexai/src/evaluation/evaluation.ts
+++ b/js/plugins/vertexai/src/evaluation/evaluation.ts
@@ -14,40 +14,15 @@
  * limitations under the License.
  */
 
+import type { protos } from '@google-cloud/aiplatform';
 import { z, type Action, type Genkit } from 'genkit';
 import type { GoogleAuth } from 'google-auth-library';
 import { EvaluatorFactory } from './evaluator_factory.js';
-
-/**
- * Vertex AI Evaluation metrics. See API documentation for more information.
- * https://cloud.google.com/vertex-ai/generative-ai/docs/model-reference/evaluation#parameter-list
- */
-export enum VertexAIEvaluationMetricType {
-  // Update genkit/docs/plugins/vertex-ai.md when modifying the list of enums
-  BLEU = 'BLEU',
-  ROUGE = 'ROUGE',
-  FLUENCY = 'FLEUNCY',
-  SAFETY = 'SAFETY',
-  GROUNDEDNESS = 'GROUNDEDNESS',
-  SUMMARIZATION_QUALITY = 'SUMMARIZATION_QUALITY',
-  SUMMARIZATION_HELPFULNESS = 'SUMMARIZATION_HELPFULNESS',
-  SUMMARIZATION_VERBOSITY = 'SUMMARIZATION_VERBOSITY',
-}
-
-/**
- * Evaluation metric config. Use `metricSpec` to define the behavior of the metric.
- * The value of `metricSpec` will be included in the request to the API. See the API documentation
- * for details on the possible values of `metricSpec` for each metric.
- * https://cloud.google.com/vertex-ai/generative-ai/docs/model-reference/evaluation#parameter-list
- */
-export type VertexAIEvaluationMetricConfig = {
-  type: VertexAIEvaluationMetricType;
-  metricSpec: any;
-};
-
-export type VertexAIEvaluationMetric =
-  | VertexAIEvaluationMetricType
-  | VertexAIEvaluationMetricConfig;
+import type { VertexAIEvaluationMetricConfig } from './types';
+import {
+  VertexAIEvaluationMetric,
+  VertexAIEvaluationMetricType,
+} from './types';
 
 function stringify(input: unknown) {
   return typeof input === 'string' ? input : JSON.stringify(input);
@@ -62,8 +37,9 @@ export function vertexEvaluators(
 ): Action[] {
   const factory = new EvaluatorFactory(auth, location, projectId);
   return metrics.map((metric) => {
-    const metricType = isConfig(metric) ? metric.type : metric;
-    const metricSpec = isConfig(metric) ? metric.metricSpec : {};
+    const { type: metricType, metricSpec } = isConfig(metric)
+      ? metric
+      : { type: metric, metricSpec: {} };
 
     switch (metricType) {
       case VertexAIEvaluationMetricType.BLEU: {
@@ -110,7 +86,7 @@ const BleuResponseSchema = z.object({
 function createBleuEvaluator(
   ai: Genkit,
   factory: EvaluatorFactory,
-  metricSpec: any
+  metricSpec: protos.google.cloud.aiplatform.v1.IBleuSpec
 ): Action {
   return factory.create(
     ai,
@@ -128,7 +104,7 @@ function createBleuEvaluator(
           instances: [
             {
               prediction: stringify(datapoint.output),
-              reference: datapoint.reference,
+              reference: datapoint.reference as string,
             },
           ],
         },
@@ -152,7 +128,7 @@ const RougeResponseSchema = z.object({
 function createRougeEvaluator(
   ai: Genkit,
   factory: EvaluatorFactory,
-  metricSpec: any
+  metricSpec: protos.google.cloud.aiplatform.v1.IRougeSpec
 ): Action {
   return factory.create(
     ai,
@@ -169,7 +145,7 @@ function createRougeEvaluator(
           metricSpec,
           instances: {
             prediction: stringify(datapoint.output),
-            reference: datapoint.reference,
+            reference: datapoint.reference as string,
           },
         },
       };
@@ -193,7 +169,7 @@ const FluencyResponseSchema = z.object({
 function createFluencyEvaluator(
   ai: Genkit,
   factory: EvaluatorFactory,
-  metricSpec: any
+  metricSpec: protos.google.cloud.aiplatform.v1.IFluencySpec
 ): Action {
   return factory.create(
     ai,
@@ -235,7 +211,7 @@ const SafetyResponseSchema = z.object({
 function createSafetyEvaluator(
   ai: Genkit,
   factory: EvaluatorFactory,
-  metricSpec: any
+  metricSpec: protos.google.cloud.aiplatform.v1.ISafetySpec
 ): Action {
   return factory.create(
     ai,
@@ -277,7 +253,7 @@ const GroundednessResponseSchema = z.object({
 function createGroundednessEvaluator(
   ai: Genkit,
   factory: EvaluatorFactory,
-  metricSpec: any
+  metricSpec: protos.google.cloud.aiplatform.v1.IGroundednessSpec
 ): Action {
   return factory.create(
     ai,
@@ -321,7 +297,7 @@ const SummarizationQualityResponseSchema = z.object({
 function createSummarizationQualityEvaluator(
   ai: Genkit,
   factory: EvaluatorFactory,
-  metricSpec: any
+  metricSpec: protos.google.cloud.aiplatform.v1.ISummarizationQualitySpec
 ): Action {
   return factory.create(
     ai,
@@ -365,7 +341,7 @@ const SummarizationHelpfulnessResponseSchema = z.object({
 function createSummarizationHelpfulnessEvaluator(
   ai: Genkit,
   factory: EvaluatorFactory,
-  metricSpec: any
+  metricSpec: protos.google.cloud.aiplatform.v1.ISummarizationHelpfulnessSpec
 ): Action {
   return factory.create(
     ai,
@@ -410,7 +386,7 @@ const SummarizationVerbositySchema = z.object({
 function createSummarizationVerbosityEvaluator(
   ai: Genkit,
   factory: EvaluatorFactory,
-  metricSpec: any
+  metricSpec: protos.google.cloud.aiplatform.v1.ISummarizationVerbositySpec
 ): Action {
   return factory.create(
     ai,
diff --git a/js/plugins/vertexai/src/evaluation/evaluator_factory.ts b/js/plugins/vertexai/src/evaluation/evaluator_factory.ts
index de0c0c939e..76f47061d4 100644
--- a/js/plugins/vertexai/src/evaluation/evaluator_factory.ts
+++ b/js/plugins/vertexai/src/evaluation/evaluator_factory.ts
@@ -14,6 +14,7 @@
  * limitations under the License.
  */
 
+import type { protos } from '@google-cloud/aiplatform';
 import { type Action, type Genkit, type z } from 'genkit';
 import type { BaseEvalDataPoint, Score } from 'genkit/evaluator';
 import { runInNewSpan } from 'genkit/tracing';
@@ -36,7 +37,9 @@ export class EvaluatorFactory {
       definition: string;
       responseSchema: ResponseType;
     },
-    toRequest: (datapoint: BaseEvalDataPoint) => any,
+    toRequest: (
+      datapoint: BaseEvalDataPoint
+    ) => protos.google.cloud.aiplatform.v1.IEvaluateInstancesRequest,
     responseHandler: (response: z.infer<ResponseType>) => Score
   ): Action {
     return ai.defineEvaluator(
@@ -63,7 +66,7 @@ export class EvaluatorFactory {
 
   async evaluateInstances<ResponseType extends z.ZodTypeAny>(
     ai: Genkit,
-    partialRequest: any,
+    partialRequest: protos.google.cloud.aiplatform.v1.IEvaluateInstancesRequest,
     responseSchema: ResponseType
   ): Promise<z.infer<ResponseType>> {
     const locationName = `projects/${this.projectId}/locations/${this.location}`;
@@ -75,10 +78,11 @@ export class EvaluatorFactory {
         },
       },
       async (metadata, _otSpan) => {
-        const request = {
-          location: locationName,
-          ...partialRequest,
-        };
+        const request: protos.google.cloud.aiplatform.v1.IEvaluateInstancesRequest =
+          {
+            location: locationName,
+            ...partialRequest,
+          };
 
         metadata.input = request;
         const client = await this.auth.getClient();
diff --git a/js/plugins/vertexai/src/evaluation/types.ts b/js/plugins/vertexai/src/evaluation/types.ts
index 1510c95ece..b55d9f99a3 100644
--- a/js/plugins/vertexai/src/evaluation/types.ts
+++ b/js/plugins/vertexai/src/evaluation/types.ts
@@ -14,8 +14,13 @@
  * limitations under the License.
  */
 
+import type { protos } from '@google-cloud/aiplatform';
 import type { CommonPluginOptions } from '../common/types.js';
 
+/**
+ * Vertex AI Evaluation metrics. See API documentation for more information.
+ * https://cloud.google.com/vertex-ai/generative-ai/docs/model-reference/evaluation#parameter-list
+ */
 export enum VertexAIEvaluationMetricType {
   // Update genkit/docs/plugins/vertex-ai.md when modifying the list of enums
   BLEU = 'BLEU',
@@ -34,10 +39,39 @@ export enum VertexAIEvaluationMetricType {
  * for details on the possible values of `metricSpec` for each metric.
  * https://cloud.google.com/vertex-ai/generative-ai/docs/model-reference/evaluation#parameter-list
  */
-export type VertexAIEvaluationMetricConfig = {
-  type: VertexAIEvaluationMetricType;
-  metricSpec: any;
-};
+export type VertexAIEvaluationMetricConfig =
+  | {
+      type: VertexAIEvaluationMetricType.BLEU;
+      metricSpec: protos.google.cloud.aiplatform.v1.IBleuSpec;
+    }
+  | {
+      type: VertexAIEvaluationMetricType.ROUGE;
+      metricSpec: protos.google.cloud.aiplatform.v1.IRougeSpec;
+    }
+  | {
+      type: VertexAIEvaluationMetricType.FLUENCY;
+      metricSpec: protos.google.cloud.aiplatform.v1.IFluencySpec;
+    }
+  | {
+      type: VertexAIEvaluationMetricType.SAFETY;
+      metricSpec: protos.google.cloud.aiplatform.v1.ISafetySpec;
+    }
+  | {
+      type: VertexAIEvaluationMetricType.GROUNDEDNESS;
+      metricSpec: protos.google.cloud.aiplatform.v1.IGroundednessSpec;
+    }
+  | {
+      type: VertexAIEvaluationMetricType.SUMMARIZATION_QUALITY;
+      metricSpec: protos.google.cloud.aiplatform.v1.ISummarizationQualitySpec;
+    }
+  | {
+      type: VertexAIEvaluationMetricType.SUMMARIZATION_HELPFULNESS;
+      metricSpec: protos.google.cloud.aiplatform.v1.ISummarizationHelpfulnessSpec;
+    }
+  | {
+      type: VertexAIEvaluationMetricType.SUMMARIZATION_VERBOSITY;
+      metricSpec: protos.google.cloud.aiplatform.v1.ISummarizationVerbositySpec;
+    };
 
 export type VertexAIEvaluationMetric =
   | VertexAIEvaluationMetricType

From db2ee75c894f2bb7e86d97e9415cf8bef4703a30 Mon Sep 17 00:00:00 2001
From: Emily Klassen <emily.klassen@tenthousandcoffees.com>
Date: Tue, 16 Sep 2025 20:52:43 -0700
Subject: [PATCH 2/2] fix(js/vertexai): use correct type for rouge instances

---
 js/plugins/vertexai/src/evaluation/evaluation.ts | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/js/plugins/vertexai/src/evaluation/evaluation.ts b/js/plugins/vertexai/src/evaluation/evaluation.ts
index 65b0b5d7c0..e61c3a713b 100644
--- a/js/plugins/vertexai/src/evaluation/evaluation.ts
+++ b/js/plugins/vertexai/src/evaluation/evaluation.ts
@@ -143,10 +143,12 @@ function createRougeEvaluator(
       return {
         rougeInput: {
           metricSpec,
-          instances: {
-            prediction: stringify(datapoint.output),
-            reference: datapoint.reference as string,
-          },
+          instances: [
+            {
+              prediction: stringify(datapoint.output),
+              reference: datapoint.reference as string,
+            },
+          ],
         },
       };
     },