From 2932dfff9b373c8ebabde7df0321c95e09ffa9a2 Mon Sep 17 00:00:00 2001 From: Bryan Atkinson Date: Wed, 26 Jun 2024 19:00:02 +0000 Subject: [PATCH 1/2] Add a status label to metrics so that they can be filtered by success/failure state. --- js/ai/src/telemetry.ts | 2 ++ js/core/src/telemetry.ts | 2 ++ js/flow/src/telemetry.ts | 9 ++++++--- js/plugins/google-cloud/tests/metrics_test.ts | 10 +++++++++- 4 files changed, 19 insertions(+), 4 deletions(-) diff --git a/js/ai/src/telemetry.ts b/js/ai/src/telemetry.ts index 3362e401a3..c832e06064 100644 --- a/js/ai/src/telemetry.ts +++ b/js/ai/src/telemetry.ts @@ -138,6 +138,7 @@ type SharedDimensions = { temperature?: number; topK?: number; topP?: number; + status?: string; source?: string; sourceVersion?: string; }; @@ -340,6 +341,7 @@ function doRecordGenerateActionMetrics( topP: dimensions.topP, source: dimensions.source, sourceVersion: dimensions.sourceVersion, + status: dimensions.err ? 'failure' : 'success', }; generateActionCounter.add(1, { diff --git a/js/core/src/telemetry.ts b/js/core/src/telemetry.ts index 467c799cf0..b22efabc3e 100644 --- a/js/core/src/telemetry.ts +++ b/js/core/src/telemetry.ts @@ -47,6 +47,7 @@ export function writeActionSuccess(actionName: string, latencyMs: number) { name: actionName, flowName: traceMetadataAls?.getStore()?.flowName, path: spanMetadataAls?.getStore()?.path, + status: 'success', source: 'ts', sourceVersion: GENKIT_VERSION, }; @@ -65,6 +66,7 @@ export function writeActionFailure( path: spanMetadataAls?.getStore()?.path, source: 'ts', sourceVersion: GENKIT_VERSION, + status: 'failure', error: err?.name, }; actionCounter.add(1, dimensions); diff --git a/js/flow/src/telemetry.ts b/js/flow/src/telemetry.ts index f82bbe4ce5..2c6ea9b635 100644 --- a/js/flow/src/telemetry.ts +++ b/js/flow/src/telemetry.ts @@ -71,6 +71,7 @@ export function recordError(err: any) { export function writeFlowSuccess(flowName: string, latencyMs: number) { const dimensions = { name: flowName, + status: 'success', source: 'ts', sourceVersion: GENKIT_VERSION, }; @@ -81,6 +82,7 @@ export function writeFlowSuccess(flowName: string, latencyMs: number) { if (paths) { const pathDimensions = { flowName: flowName, + status: 'success', source: 'ts', sourceVersion: GENKIT_VERSION, }; @@ -96,7 +98,6 @@ export function writeFlowSuccess(flowName: string, latencyMs: number) { relevantPaths.forEach((p) => { pathCounter.add(1, { ...pathDimensions, - success: 'success', path: p.path, }); @@ -115,6 +116,7 @@ export function writeFlowFailure( ) { const dimensions = { name: flowName, + status: 'failure', source: 'ts', sourceVersion: GENKIT_VERSION, error: err.name, @@ -144,19 +146,20 @@ export function writeFlowFailure( relevantPaths.forEach((p) => { pathCounter.add(1, { ...pathDimensions, - success: 'success', + status: 'success', path: p.path, }); pathLatencies.record(p.latency, { ...pathDimensions, + status: 'success', path: p.path, }); }); pathCounter.add(1, { ...pathDimensions, - success: 'failure', + status: 'failure', error: err.name, path: failPath, }); diff --git a/js/plugins/google-cloud/tests/metrics_test.ts b/js/plugins/google-cloud/tests/metrics_test.ts index e74e1fe72c..360a54aef5 100644 --- a/js/plugins/google-cloud/tests/metrics_test.ts +++ b/js/plugins/google-cloud/tests/metrics_test.ts @@ -80,10 +80,12 @@ describe('GoogleCloudMetrics', () => { assert.equal(requestCounter.value, 2); assert.equal(requestCounter.attributes.name, 'testFlow'); assert.equal(requestCounter.attributes.source, 'ts'); + assert.equal(requestCounter.attributes.status, 'success'); assert.ok(requestCounter.attributes.sourceVersion); assert.equal(latencyHistogram.value.count, 2); assert.equal(latencyHistogram.attributes.name, 'testFlow'); assert.equal(latencyHistogram.attributes.source, 'ts'); + assert.equal(latencyHistogram.attributes.status, 'success'); assert.ok(latencyHistogram.attributes.sourceVersion); }); @@ -102,6 +104,7 @@ describe('GoogleCloudMetrics', () => { assert.equal(requestCounter.attributes.name, 'testFlow'); assert.equal(requestCounter.attributes.source, 'ts'); assert.equal(requestCounter.attributes.error, 'TypeError'); + assert.equal(requestCounter.attributes.status, 'failure'); }); it('writes action metrics', async () => { @@ -122,10 +125,12 @@ describe('GoogleCloudMetrics', () => { assert.equal(requestCounter.value, 6); assert.equal(requestCounter.attributes.name, 'testAction'); assert.equal(requestCounter.attributes.source, 'ts'); + assert.equal(requestCounter.attributes.status, 'success'); assert.ok(requestCounter.attributes.sourceVersion); assert.equal(latencyHistogram.value.count, 6); assert.equal(latencyHistogram.attributes.name, 'testAction'); assert.equal(latencyHistogram.attributes.source, 'ts'); + assert.equal(latencyHistogram.attributes.status, 'success'); assert.ok(latencyHistogram.attributes.sourceVersion); }); @@ -163,6 +168,7 @@ describe('GoogleCloudMetrics', () => { assert.equal(requestCounter.value, 1); assert.equal(requestCounter.attributes.name, 'testActionWithFailure'); assert.equal(requestCounter.attributes.source, 'ts'); + assert.equal(requestCounter.attributes.status, 'failure'); assert.equal(requestCounter.attributes.error, 'TypeError'); }); @@ -253,6 +259,7 @@ describe('GoogleCloudMetrics', () => { assert.equal(metric.attributes.topK, 3); assert.equal(metric.attributes.topP, 5); assert.equal(metric.attributes.source, 'ts'); + assert.equal(metric.attributes.status, 'success'); assert.ok(metric.attributes.sourceVersion); } }); @@ -285,6 +292,7 @@ describe('GoogleCloudMetrics', () => { assert.equal(requestCounter.attributes.topK, 3); assert.equal(requestCounter.attributes.topP, 5); assert.equal(requestCounter.attributes.source, 'ts'); + assert.equal(requestCounter.attributes.status, 'failure'); assert.equal(requestCounter.attributes.error, 'TypeError'); assert.ok(requestCounter.attributes.sourceVersion); }); @@ -382,7 +390,7 @@ describe('GoogleCloudMetrics', () => { assert.equal(point.value, 1); assert.equal(point.attributes.flowName, 'pathTestFlow'); assert.equal(point.attributes.source, 'ts'); - assert.equal(point.attributes.success, 'success'); + assert.equal(point.attributes.status, 'success'); assert.ok(point.attributes.sourceVersion); }); }); From dbb4b283aac20ff03f99f786c7d1622998b351ac Mon Sep 17 00:00:00 2001 From: Bryan Atkinson Date: Wed, 26 Jun 2024 20:03:35 +0000 Subject: [PATCH 2/2] Add latency for failed paths. --- js/flow/src/telemetry.ts | 7 +++ js/plugins/google-cloud/tests/metrics_test.ts | 62 +++++++++++++++++-- 2 files changed, 65 insertions(+), 4 deletions(-) diff --git a/js/flow/src/telemetry.ts b/js/flow/src/telemetry.ts index 2c6ea9b635..5d65314a6a 100644 --- a/js/flow/src/telemetry.ts +++ b/js/flow/src/telemetry.ts @@ -163,6 +163,13 @@ export function writeFlowFailure( error: err.name, path: failPath, }); + + pathLatencies.record(latencyMs, { + ...pathDimensions, + status: 'failure', + error: err.name, + path: failPath, + }); } } diff --git a/js/plugins/google-cloud/tests/metrics_test.ts b/js/plugins/google-cloud/tests/metrics_test.ts index 360a54aef5..6170f60a22 100644 --- a/js/plugins/google-cloud/tests/metrics_test.ts +++ b/js/plugins/google-cloud/tests/metrics_test.ts @@ -382,6 +382,9 @@ describe('GoogleCloudMetrics', () => { const pathCounterPoints = await getCounterDataPoints( 'genkit/flow/path/requests' ); + const pathLatencyPoints = await getHistogramDataPoints( + 'genkit/flow/path/latency' + ); const paths = new Set( pathCounterPoints.map((point) => point.attributes.path) ); @@ -393,6 +396,47 @@ describe('GoogleCloudMetrics', () => { assert.equal(point.attributes.status, 'success'); assert.ok(point.attributes.sourceVersion); }); + pathLatencyPoints.forEach((point) => { + assert.equal(point.value.count, 1); + assert.equal(point.attributes.flowName, 'pathTestFlow'); + assert.equal(point.attributes.source, 'ts'); + assert.equal(point.attributes.status, 'success'); + assert.ok(point.attributes.sourceVersion); + }); + }); + + it('writes flow path failure metrics', async () => { + const flow = createFlow('testFlow', async () => { + const subPath = await run('sub-action', async () => { + return 'done'; + }); + return Promise.reject(new Error('failed')); + }); + + assert.rejects(async () => { + await runFlow(flow); + }); + + const reqPoints = await getCounterDataPoints('genkit/flow/path/requests'); + const reqStatuses = reqPoints.map((p) => [ + p.attributes.path, + p.attributes.status, + ]); + assert.deepEqual(reqStatuses, [ + ['/{testFlow,t:flow}/{sub-action,t:flowStep}', 'success'], + ['/{testFlow,t:flow}', 'failure'], + ]); + const latencyPoints = await getHistogramDataPoints( + 'genkit/flow/path/latency' + ); + const latencyStatuses = latencyPoints.map((p) => [ + p.attributes.path, + p.attributes.status, + ]); + assert.deepEqual(latencyStatuses, [ + ['/{testFlow,t:flow}/{sub-action,t:flowStep}', 'success'], + ['/{testFlow,t:flow}', 'failure'], + ]); }); describe('Configuration', () => { @@ -461,23 +505,33 @@ describe('GoogleCloudMetrics', () => { return getCounterDataPoints(metricName).then((points) => points.at(-1)); } - /** Finds a histogram metric with the given name in the in memory exporter */ - async function getHistogramMetric( + /** + * Finds all datapoints for a histogram metric with the given name in the in + * memory exporter. + */ + async function getHistogramDataPoints( metricName: string - ): Promise> { + ): Promise>> { const genkitMetrics = await getGenkitMetrics(); const histogramMetric: HistogramMetricData = genkitMetrics.metrics.find( (e) => e.descriptor.name === metricName && e.descriptor.type === 'HISTOGRAM' ); if (histogramMetric) { - return histogramMetric.dataPoints.at(-1); + return histogramMetric.dataPoints; } assert.fail( `No histogram metric named ${metricName} was found. Only found: ${genkitMetrics.metrics.map((e) => e.descriptor.name)}` ); } + /** Finds a histogram metric with the given name in the in memory exporter */ + async function getHistogramMetric( + metricName: string + ): Promise> { + return getHistogramDataPoints(metricName).then((points) => points.at(-1)); + } + /** Helper to create a flow with no inputs or outputs */ function createFlow(name: string, fn: () => Promise = async () => {}) { return defineFlow(