From cb6d6307a7cb7b11a3855211234b85f3c0f3937c Mon Sep 17 00:00:00 2001 From: Emily Klassen Date: Tue, 16 Sep 2025 20:50:58 -0700 Subject: [PATCH 1/2] refactor(js/vertexai): improve evaluation typings, deduplicate types --- .../vertexai/src/evaluation/evaluation.ts | 62 ++++++------------- .../src/evaluation/evaluator_factory.ts | 16 +++-- js/plugins/vertexai/src/evaluation/types.ts | 42 +++++++++++-- 3 files changed, 67 insertions(+), 53 deletions(-) diff --git a/js/plugins/vertexai/src/evaluation/evaluation.ts b/js/plugins/vertexai/src/evaluation/evaluation.ts index 37d144e64c..65b0b5d7c0 100644 --- a/js/plugins/vertexai/src/evaluation/evaluation.ts +++ b/js/plugins/vertexai/src/evaluation/evaluation.ts @@ -14,40 +14,15 @@ * limitations under the License. */ +import type { protos } from '@google-cloud/aiplatform'; import { z, type Action, type Genkit } from 'genkit'; import type { GoogleAuth } from 'google-auth-library'; import { EvaluatorFactory } from './evaluator_factory.js'; - -/** - * Vertex AI Evaluation metrics. See API documentation for more information. - * https://cloud.google.com/vertex-ai/generative-ai/docs/model-reference/evaluation#parameter-list - */ -export enum VertexAIEvaluationMetricType { - // Update genkit/docs/plugins/vertex-ai.md when modifying the list of enums - BLEU = 'BLEU', - ROUGE = 'ROUGE', - FLUENCY = 'FLEUNCY', - SAFETY = 'SAFETY', - GROUNDEDNESS = 'GROUNDEDNESS', - SUMMARIZATION_QUALITY = 'SUMMARIZATION_QUALITY', - SUMMARIZATION_HELPFULNESS = 'SUMMARIZATION_HELPFULNESS', - SUMMARIZATION_VERBOSITY = 'SUMMARIZATION_VERBOSITY', -} - -/** - * Evaluation metric config. Use `metricSpec` to define the behavior of the metric. - * The value of `metricSpec` will be included in the request to the API. See the API documentation - * for details on the possible values of `metricSpec` for each metric. - * https://cloud.google.com/vertex-ai/generative-ai/docs/model-reference/evaluation#parameter-list - */ -export type VertexAIEvaluationMetricConfig = { - type: VertexAIEvaluationMetricType; - metricSpec: any; -}; - -export type VertexAIEvaluationMetric = - | VertexAIEvaluationMetricType - | VertexAIEvaluationMetricConfig; +import type { VertexAIEvaluationMetricConfig } from './types'; +import { + VertexAIEvaluationMetric, + VertexAIEvaluationMetricType, +} from './types'; function stringify(input: unknown) { return typeof input === 'string' ? input : JSON.stringify(input); @@ -62,8 +37,9 @@ export function vertexEvaluators( ): Action[] { const factory = new EvaluatorFactory(auth, location, projectId); return metrics.map((metric) => { - const metricType = isConfig(metric) ? metric.type : metric; - const metricSpec = isConfig(metric) ? metric.metricSpec : {}; + const { type: metricType, metricSpec } = isConfig(metric) + ? metric + : { type: metric, metricSpec: {} }; switch (metricType) { case VertexAIEvaluationMetricType.BLEU: { @@ -110,7 +86,7 @@ const BleuResponseSchema = z.object({ function createBleuEvaluator( ai: Genkit, factory: EvaluatorFactory, - metricSpec: any + metricSpec: protos.google.cloud.aiplatform.v1.IBleuSpec ): Action { return factory.create( ai, @@ -128,7 +104,7 @@ function createBleuEvaluator( instances: [ { prediction: stringify(datapoint.output), - reference: datapoint.reference, + reference: datapoint.reference as string, }, ], }, @@ -152,7 +128,7 @@ const RougeResponseSchema = z.object({ function createRougeEvaluator( ai: Genkit, factory: EvaluatorFactory, - metricSpec: any + metricSpec: protos.google.cloud.aiplatform.v1.IRougeSpec ): Action { return factory.create( ai, @@ -169,7 +145,7 @@ function createRougeEvaluator( metricSpec, instances: { prediction: stringify(datapoint.output), - reference: datapoint.reference, + reference: datapoint.reference as string, }, }, }; @@ -193,7 +169,7 @@ const FluencyResponseSchema = z.object({ function createFluencyEvaluator( ai: Genkit, factory: EvaluatorFactory, - metricSpec: any + metricSpec: protos.google.cloud.aiplatform.v1.IFluencySpec ): Action { return factory.create( ai, @@ -235,7 +211,7 @@ const SafetyResponseSchema = z.object({ function createSafetyEvaluator( ai: Genkit, factory: EvaluatorFactory, - metricSpec: any + metricSpec: protos.google.cloud.aiplatform.v1.ISafetySpec ): Action { return factory.create( ai, @@ -277,7 +253,7 @@ const GroundednessResponseSchema = z.object({ function createGroundednessEvaluator( ai: Genkit, factory: EvaluatorFactory, - metricSpec: any + metricSpec: protos.google.cloud.aiplatform.v1.IGroundednessSpec ): Action { return factory.create( ai, @@ -321,7 +297,7 @@ const SummarizationQualityResponseSchema = z.object({ function createSummarizationQualityEvaluator( ai: Genkit, factory: EvaluatorFactory, - metricSpec: any + metricSpec: protos.google.cloud.aiplatform.v1.ISummarizationQualitySpec ): Action { return factory.create( ai, @@ -365,7 +341,7 @@ const SummarizationHelpfulnessResponseSchema = z.object({ function createSummarizationHelpfulnessEvaluator( ai: Genkit, factory: EvaluatorFactory, - metricSpec: any + metricSpec: protos.google.cloud.aiplatform.v1.ISummarizationHelpfulnessSpec ): Action { return factory.create( ai, @@ -410,7 +386,7 @@ const SummarizationVerbositySchema = z.object({ function createSummarizationVerbosityEvaluator( ai: Genkit, factory: EvaluatorFactory, - metricSpec: any + metricSpec: protos.google.cloud.aiplatform.v1.ISummarizationVerbositySpec ): Action { return factory.create( ai, diff --git a/js/plugins/vertexai/src/evaluation/evaluator_factory.ts b/js/plugins/vertexai/src/evaluation/evaluator_factory.ts index de0c0c939e..76f47061d4 100644 --- a/js/plugins/vertexai/src/evaluation/evaluator_factory.ts +++ b/js/plugins/vertexai/src/evaluation/evaluator_factory.ts @@ -14,6 +14,7 @@ * limitations under the License. */ +import type { protos } from '@google-cloud/aiplatform'; import { type Action, type Genkit, type z } from 'genkit'; import type { BaseEvalDataPoint, Score } from 'genkit/evaluator'; import { runInNewSpan } from 'genkit/tracing'; @@ -36,7 +37,9 @@ export class EvaluatorFactory { definition: string; responseSchema: ResponseType; }, - toRequest: (datapoint: BaseEvalDataPoint) => any, + toRequest: ( + datapoint: BaseEvalDataPoint + ) => protos.google.cloud.aiplatform.v1.IEvaluateInstancesRequest, responseHandler: (response: z.infer) => Score ): Action { return ai.defineEvaluator( @@ -63,7 +66,7 @@ export class EvaluatorFactory { async evaluateInstances( ai: Genkit, - partialRequest: any, + partialRequest: protos.google.cloud.aiplatform.v1.IEvaluateInstancesRequest, responseSchema: ResponseType ): Promise> { const locationName = `projects/${this.projectId}/locations/${this.location}`; @@ -75,10 +78,11 @@ export class EvaluatorFactory { }, }, async (metadata, _otSpan) => { - const request = { - location: locationName, - ...partialRequest, - }; + const request: protos.google.cloud.aiplatform.v1.IEvaluateInstancesRequest = + { + location: locationName, + ...partialRequest, + }; metadata.input = request; const client = await this.auth.getClient(); diff --git a/js/plugins/vertexai/src/evaluation/types.ts b/js/plugins/vertexai/src/evaluation/types.ts index 1510c95ece..b55d9f99a3 100644 --- a/js/plugins/vertexai/src/evaluation/types.ts +++ b/js/plugins/vertexai/src/evaluation/types.ts @@ -14,8 +14,13 @@ * limitations under the License. */ +import type { protos } from '@google-cloud/aiplatform'; import type { CommonPluginOptions } from '../common/types.js'; +/** + * Vertex AI Evaluation metrics. See API documentation for more information. + * https://cloud.google.com/vertex-ai/generative-ai/docs/model-reference/evaluation#parameter-list + */ export enum VertexAIEvaluationMetricType { // Update genkit/docs/plugins/vertex-ai.md when modifying the list of enums BLEU = 'BLEU', @@ -34,10 +39,39 @@ export enum VertexAIEvaluationMetricType { * for details on the possible values of `metricSpec` for each metric. * https://cloud.google.com/vertex-ai/generative-ai/docs/model-reference/evaluation#parameter-list */ -export type VertexAIEvaluationMetricConfig = { - type: VertexAIEvaluationMetricType; - metricSpec: any; -}; +export type VertexAIEvaluationMetricConfig = + | { + type: VertexAIEvaluationMetricType.BLEU; + metricSpec: protos.google.cloud.aiplatform.v1.IBleuSpec; + } + | { + type: VertexAIEvaluationMetricType.ROUGE; + metricSpec: protos.google.cloud.aiplatform.v1.IRougeSpec; + } + | { + type: VertexAIEvaluationMetricType.FLUENCY; + metricSpec: protos.google.cloud.aiplatform.v1.IFluencySpec; + } + | { + type: VertexAIEvaluationMetricType.SAFETY; + metricSpec: protos.google.cloud.aiplatform.v1.ISafetySpec; + } + | { + type: VertexAIEvaluationMetricType.GROUNDEDNESS; + metricSpec: protos.google.cloud.aiplatform.v1.IGroundednessSpec; + } + | { + type: VertexAIEvaluationMetricType.SUMMARIZATION_QUALITY; + metricSpec: protos.google.cloud.aiplatform.v1.ISummarizationQualitySpec; + } + | { + type: VertexAIEvaluationMetricType.SUMMARIZATION_HELPFULNESS; + metricSpec: protos.google.cloud.aiplatform.v1.ISummarizationHelpfulnessSpec; + } + | { + type: VertexAIEvaluationMetricType.SUMMARIZATION_VERBOSITY; + metricSpec: protos.google.cloud.aiplatform.v1.ISummarizationVerbositySpec; + }; export type VertexAIEvaluationMetric = | VertexAIEvaluationMetricType From db2ee75c894f2bb7e86d97e9415cf8bef4703a30 Mon Sep 17 00:00:00 2001 From: Emily Klassen Date: Tue, 16 Sep 2025 20:52:43 -0700 Subject: [PATCH 2/2] fix(js/vertexai): use correct type for rouge instances --- js/plugins/vertexai/src/evaluation/evaluation.ts | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/js/plugins/vertexai/src/evaluation/evaluation.ts b/js/plugins/vertexai/src/evaluation/evaluation.ts index 65b0b5d7c0..e61c3a713b 100644 --- a/js/plugins/vertexai/src/evaluation/evaluation.ts +++ b/js/plugins/vertexai/src/evaluation/evaluation.ts @@ -143,10 +143,12 @@ function createRougeEvaluator( return { rougeInput: { metricSpec, - instances: { - prediction: stringify(datapoint.output), - reference: datapoint.reference as string, - }, + instances: [ + { + prediction: stringify(datapoint.output), + reference: datapoint.reference as string, + }, + ], }, }; },