diff --git a/docs/plugins/vertex-ai.md b/docs/plugins/vertex-ai.md index 75972257ec..ac8d5fdc2a 100644 --- a/docs/plugins/vertex-ai.md +++ b/docs/plugins/vertex-ai.md @@ -14,6 +14,9 @@ It also provides access to subset of evaluation metrics through the Vertex AI [R - [Fluency](https://cloud.google.com/vertex-ai/docs/reference/rest/v1beta1/projects.locations/evaluateInstances#fluencyinput) - [Safety](https://cloud.google.com/vertex-ai/docs/reference/rest/v1beta1/projects.locations/evaluateInstances#safetyinput) - [Groundeness](https://cloud.google.com/vertex-ai/docs/reference/rest/v1beta1/projects.locations/evaluateInstances#groundednessinput) +- [Summarization Quality](https://cloud.google.com/vertex-ai/docs/reference/rest/v1beta1/projects.locations/evaluateInstances#summarizationqualityinput) +- [Summarization Helpfulness](https://cloud.google.com/vertex-ai/docs/reference/rest/v1beta1/projects.locations/evaluateInstances#summarizationhelpfulnessinput) +- [Summarization Verbosity](https://cloud.google.com/vertex-ai/docs/reference/rest/v1beta1/projects.locations/evaluateInstances#summarizationverbosityinput) ## Installation diff --git a/js/plugins/vertexai/src/evaluation.ts b/js/plugins/vertexai/src/evaluation.ts index ea494bcb61..57d886b010 100644 --- a/js/plugins/vertexai/src/evaluation.ts +++ b/js/plugins/vertexai/src/evaluation.ts @@ -31,6 +31,9 @@ export enum VertexAIEvaluationMetricType { FLUENCY = 'FLEUNCY', SAFETY = 'SAFETY', GROUNDEDNESS = 'GROUNDEDNESS', + SUMMARIZATION_QUALITY = 'SUMMARIZATION_QUALITY', + SUMMARIZATION_HELPFULNESS = 'SUMMARIZATION_HELPFULNESS', + SUMMARIZATION_VERBOSITY = 'SUMMARIZATION_VERBOSITY', } /** @@ -75,6 +78,15 @@ export function vertexEvaluators( case VertexAIEvaluationMetricType.GROUNDEDNESS: { return createGroundednessEvaluator(factory, metricSpec); } + case VertexAIEvaluationMetricType.SUMMARIZATION_QUALITY: { + return createSummarizationQualityEvaluator(factory, metricSpec); + } + case VertexAIEvaluationMetricType.SUMMARIZATION_HELPFULNESS: { + return createSummarizationHelpfulnessEvaluator(factory, metricSpec); + } + case VertexAIEvaluationMetricType.SUMMARIZATION_VERBOSITY: { + return createSummarizationVerbosityEvaluator(factory, metricSpec); + } } }); } @@ -105,10 +117,6 @@ function createBleuEvaluator( responseSchema: BleuResponseSchema, }, (datapoint) => { - if (!datapoint.reference) { - throw new Error('Reference is required'); - } - return { bleuInput: { metricSpec, @@ -149,10 +157,6 @@ function createRougeEvaluator( responseSchema: RougeResponseSchema, }, (datapoint) => { - if (!datapoint.reference) { - throw new Error('Reference is required'); - } - return { rougeInput: { metricSpec, @@ -292,3 +296,130 @@ function createGroundednessEvaluator( } ); } + +const SummarizationQualityResponseSchema = z.object({ + summarizationQualityResult: z.object({ + score: z.number(), + explanation: z.string(), + confidence: z.number(), + }), +}); + +function createSummarizationQualityEvaluator( + factory: EvaluatorFactory, + metricSpec: any +): Action { + return factory.create( + { + metric: VertexAIEvaluationMetricType.SUMMARIZATION_QUALITY, + displayName: 'Summarization quality', + definition: 'Assesses the overall ability to summarize text', + responseSchema: SummarizationQualityResponseSchema, + }, + (datapoint) => { + return { + summarizationQualityInput: { + metricSpec, + instance: { + prediction: datapoint.output as string, + instruction: datapoint.input as string, + context: datapoint.context?.join('. '), + }, + }, + }; + }, + (response) => { + return { + score: response.summarizationQualityResult.score, + details: { + reasoning: response.summarizationQualityResult.explanation, + }, + }; + } + ); +} + +const SummarizationHelpfulnessResponseSchema = z.object({ + summarizationHelpfulnessResult: z.object({ + score: z.number(), + explanation: z.string(), + confidence: z.number(), + }), +}); + +function createSummarizationHelpfulnessEvaluator( + factory: EvaluatorFactory, + metricSpec: any +): Action { + return factory.create( + { + metric: VertexAIEvaluationMetricType.SUMMARIZATION_HELPFULNESS, + displayName: 'Summarization helpfulness', + definition: + 'Assesses the ability to provide a summarization, which contains the details necessary to substitute the original text', + responseSchema: SummarizationHelpfulnessResponseSchema, + }, + (datapoint) => { + return { + summarizationHelpfulnessInput: { + metricSpec, + instance: { + prediction: datapoint.output as string, + instruction: datapoint.input as string, + context: datapoint.context?.join('. '), + }, + }, + }; + }, + (response) => { + return { + score: response.summarizationHelpfulnessResult.score, + details: { + reasoning: response.summarizationHelpfulnessResult.explanation, + }, + }; + } + ); +} + +const SummarizationVerbositySchema = z.object({ + summarizationVerbosityResult: z.object({ + score: z.number(), + explanation: z.string(), + confidence: z.number(), + }), +}); + +function createSummarizationVerbosityEvaluator( + factory: EvaluatorFactory, + metricSpec: any +): Action { + return factory.create( + { + metric: VertexAIEvaluationMetricType.SUMMARIZATION_VERBOSITY, + displayName: 'Summarization verbosity', + definition: 'Aassess the ability to provide a succinct summarization', + responseSchema: SummarizationVerbositySchema, + }, + (datapoint) => { + return { + summarizationVerbosityInput: { + metricSpec, + instance: { + prediction: datapoint.output as string, + instruction: datapoint.input as string, + context: datapoint.context?.join('. '), + }, + }, + }; + }, + (response) => { + return { + score: response.summarizationVerbosityResult.score, + details: { + reasoning: response.summarizationVerbosityResult.explanation, + }, + }; + } + ); +}