Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions docs/plugins/vertex-ai.md
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,9 @@ It also provides access to subset of evaluation metrics through the Vertex AI [R
- [Fluency](https://cloud.google.com/vertex-ai/docs/reference/rest/v1beta1/projects.locations/evaluateInstances#fluencyinput)
- [Safety](https://cloud.google.com/vertex-ai/docs/reference/rest/v1beta1/projects.locations/evaluateInstances#safetyinput)
- [Groundeness](https://cloud.google.com/vertex-ai/docs/reference/rest/v1beta1/projects.locations/evaluateInstances#groundednessinput)
- [Summarization Quality](https://cloud.google.com/vertex-ai/docs/reference/rest/v1beta1/projects.locations/evaluateInstances#summarizationqualityinput)
- [Summarization Helpfulness](https://cloud.google.com/vertex-ai/docs/reference/rest/v1beta1/projects.locations/evaluateInstances#summarizationhelpfulnessinput)
- [Summarization Verbosity](https://cloud.google.com/vertex-ai/docs/reference/rest/v1beta1/projects.locations/evaluateInstances#summarizationverbosityinput)

## Installation

Expand Down
147 changes: 139 additions & 8 deletions js/plugins/vertexai/src/evaluation.ts
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,9 @@ export enum VertexAIEvaluationMetricType {
FLUENCY = 'FLEUNCY',
SAFETY = 'SAFETY',
GROUNDEDNESS = 'GROUNDEDNESS',
SUMMARIZATION_QUALITY = 'SUMMARIZATION_QUALITY',
SUMMARIZATION_HELPFULNESS = 'SUMMARIZATION_HELPFULNESS',
SUMMARIZATION_VERBOSITY = 'SUMMARIZATION_VERBOSITY',
}

/**
Expand Down Expand Up @@ -75,6 +78,15 @@ export function vertexEvaluators(
case VertexAIEvaluationMetricType.GROUNDEDNESS: {
return createGroundednessEvaluator(factory, metricSpec);
}
case VertexAIEvaluationMetricType.SUMMARIZATION_QUALITY: {
return createSummarizationQualityEvaluator(factory, metricSpec);
}
case VertexAIEvaluationMetricType.SUMMARIZATION_HELPFULNESS: {
return createSummarizationHelpfulnessEvaluator(factory, metricSpec);
}
case VertexAIEvaluationMetricType.SUMMARIZATION_VERBOSITY: {
return createSummarizationVerbosityEvaluator(factory, metricSpec);
}
}
});
}
Expand Down Expand Up @@ -105,10 +117,6 @@ function createBleuEvaluator(
responseSchema: BleuResponseSchema,
},
(datapoint) => {
if (!datapoint.reference) {
throw new Error('Reference is required');
}

return {
bleuInput: {
metricSpec,
Expand Down Expand Up @@ -149,10 +157,6 @@ function createRougeEvaluator(
responseSchema: RougeResponseSchema,
},
(datapoint) => {
if (!datapoint.reference) {
throw new Error('Reference is required');
}

return {
rougeInput: {
metricSpec,
Expand Down Expand Up @@ -292,3 +296,130 @@ function createGroundednessEvaluator(
}
);
}

const SummarizationQualityResponseSchema = z.object({
summarizationQualityResult: z.object({
score: z.number(),
explanation: z.string(),
confidence: z.number(),
}),
});

function createSummarizationQualityEvaluator(
factory: EvaluatorFactory,
metricSpec: any
): Action {
return factory.create(
{
metric: VertexAIEvaluationMetricType.SUMMARIZATION_QUALITY,
displayName: 'Summarization quality',
definition: 'Assesses the overall ability to summarize text',
responseSchema: SummarizationQualityResponseSchema,
},
(datapoint) => {
return {
summarizationQualityInput: {
metricSpec,
instance: {
prediction: datapoint.output as string,
instruction: datapoint.input as string,
context: datapoint.context?.join('. '),
},
},
};
},
(response) => {
return {
score: response.summarizationQualityResult.score,
details: {
reasoning: response.summarizationQualityResult.explanation,
},
};
}
);
}

const SummarizationHelpfulnessResponseSchema = z.object({
summarizationHelpfulnessResult: z.object({
score: z.number(),
explanation: z.string(),
confidence: z.number(),
}),
});

function createSummarizationHelpfulnessEvaluator(
factory: EvaluatorFactory,
metricSpec: any
): Action {
return factory.create(
{
metric: VertexAIEvaluationMetricType.SUMMARIZATION_HELPFULNESS,
displayName: 'Summarization helpfulness',
definition:
'Assesses the ability to provide a summarization, which contains the details necessary to substitute the original text',
responseSchema: SummarizationHelpfulnessResponseSchema,
},
(datapoint) => {
return {
summarizationHelpfulnessInput: {
metricSpec,
instance: {
prediction: datapoint.output as string,
instruction: datapoint.input as string,
context: datapoint.context?.join('. '),
},
},
};
},
(response) => {
return {
score: response.summarizationHelpfulnessResult.score,
details: {
reasoning: response.summarizationHelpfulnessResult.explanation,
},
};
}
);
}

const SummarizationVerbositySchema = z.object({
summarizationVerbosityResult: z.object({
score: z.number(),
explanation: z.string(),
confidence: z.number(),
}),
});

function createSummarizationVerbosityEvaluator(
factory: EvaluatorFactory,
metricSpec: any
): Action {
return factory.create(
{
metric: VertexAIEvaluationMetricType.SUMMARIZATION_VERBOSITY,
displayName: 'Summarization verbosity',
definition: 'Aassess the ability to provide a succinct summarization',
responseSchema: SummarizationVerbositySchema,
},
(datapoint) => {
return {
summarizationVerbosityInput: {
metricSpec,
instance: {
prediction: datapoint.output as string,
instruction: datapoint.input as string,
context: datapoint.context?.join('. '),
},
},
};
},
(response) => {
return {
score: response.summarizationVerbosityResult.score,
details: {
reasoning: response.summarizationVerbosityResult.explanation,
},
};
}
);
}