Skip to content

Commit

Permalink
[Security solution] Knowledge base entry telemetry (elastic#199225)
Browse files Browse the repository at this point in the history
  • Loading branch information
stephmilovic authored Nov 12, 2024
1 parent f54b951 commit 1127bf4
Show file tree
Hide file tree
Showing 16 changed files with 578 additions and 29 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/

export { TelemetryTracer } from './telemetry_tracer';
Original file line number Diff line number Diff line change
@@ -0,0 +1,204 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/

import { AnalyticsServiceSetup, Logger } from '@kbn/core/server';
import { TelemetryTracer, TelemetryParams } from './telemetry_tracer';
import { Run } from 'langsmith/schemas';
import { loggerMock } from '@kbn/logging-mocks';

const mockRun = {
inputs: {
responseLanguage: 'English',
conversationId: 'db8f74c5-7dca-43a3-b592-d56f219dffab',
llmType: 'openai',
isStream: false,
isOssModel: false,
},
outputs: {
input:
'Generate an ESQL query to find documents with `host.name` that contains my favorite color',
lastNode: 'agent',
steps: [
{
action: {
tool: 'KnowledgeBaseRetrievalTool',
toolInput: {
query: "user's favorite color",
},
},
observation:
'"[{\\"pageContent\\":\\"favorite color is blue\\",\\"metadata\\":{\\"source\\":\\"conversation\\",\\"required\\":false,\\"kbResource\\":\\"user\\"}},{\\"pageContent\\":\\"favorite food is pizza\\",\\"metadata\\":{\\"source\\":\\"conversation\\",\\"required\\":false,\\"kbResource\\":\\"user\\"}}]"',
},
{
action: {
tool: 'NaturalLanguageESQLTool',
toolInput: {
question: 'Generate an ESQL query to find documents with host.name that contains blue',
},
},
observation:
'"To find documents with `host.name` that contains \\"blue\\", you can use the `LIKE` operator with wildcards. Here is the ES|QL query:\\n\\n```esql\\nFROM your_index\\n| WHERE host.name LIKE \\"*blue*\\"\\n```\\n\\nReplace `your_index` with the actual name of your index. This query will filter documents where the `host.name` field contains the substring \\"blue\\"."',
},
{
action: {
tool: 'KnowledgeBaseRetrievalTool',
toolInput: {
query: "user's favorite food",
},
},
observation:
'"[{\\"pageContent\\":\\"favorite color is blue\\",\\"metadata\\":{\\"source\\":\\"conversation\\",\\"required\\":false,\\"kbResource\\":\\"user\\"}},{\\"pageContent\\":\\"favorite food is pizza\\",\\"metadata\\":{\\"source\\":\\"conversation\\",\\"required\\":false,\\"kbResource\\":\\"user\\"}}]"',
},
{
action: {
tool: 'CustomIndexTool',
toolInput: {
query: 'query about index',
},
},
observation: '"Wow this is totally cool."',
},
{
action: {
tool: 'CustomIndexTool',
toolInput: {
query: 'query about index',
},
},
observation: '"Wow this is totally cool."',
},
{
action: {
tool: 'CustomIndexTool',
toolInput: {
query: 'query about index',
},
},
observation: '"Wow this is totally cool."',
},
],
hasRespondStep: false,
agentOutcome: {
returnValues: {
output:
'To find documents with `host.name` that contains your favorite color "blue", you can use the `LIKE` operator with wildcards. Here is the ES|QL query:\n\n```esql\nFROM your_index\n| WHERE host.name LIKE "*blue*"\n```\n\nReplace `your_index` with the actual name of your index. This query will filter documents where the `host.name` field contains the substring "blue".',
},
log: 'To find documents with `host.name` that contains your favorite color "blue", you can use the `LIKE` operator with wildcards. Here is the ES|QL query:\n\n```esql\nFROM your_index\n| WHERE host.name LIKE "*blue*"\n```\n\nReplace `your_index` with the actual name of your index. This query will filter documents where the `host.name` field contains the substring "blue".',
},
messages: [],
chatTitle: 'Welcome',
llmType: 'openai',
isStream: false,
isOssModel: false,
conversation: {
timestamp: '2024-11-07T17:37:07.400Z',
createdAt: '2024-11-07T17:37:07.400Z',
users: [
{
id: 'u_mGBROF_q5bmFCATbLXAcCwKa0k8JvONAwSruelyKA5E_0',
name: 'elastic',
},
],
title: 'Welcome',
category: 'assistant',
apiConfig: {
connectorId: 'my-gpt4o-ai',
actionTypeId: '.gen-ai',
},
isDefault: true,
messages: [
{
timestamp: '2024-11-07T22:47:45.994Z',
content:
'Generate an ESQL query to find documents with `host.name` that contains my favorite color',
role: 'user',
},
],
updatedAt: '2024-11-08T17:01:21.958Z',
replacements: {},
namespace: 'default',
id: 'db8f74c5-7dca-43a3-b592-d56f219dffab',
},
conversationId: 'db8f74c5-7dca-43a3-b592-d56f219dffab',
responseLanguage: 'English',
},
end_time: 1731085297190,
start_time: 1731085289113,
} as unknown as Run;
const elasticTools = [
'AlertCountsTool',
'NaturalLanguageESQLTool',
'KnowledgeBaseRetrievalTool',
'KnowledgeBaseWriteTool',
'OpenAndAcknowledgedAlertsTool',
'SecurityLabsKnowledgeBaseTool',
];
const mockLogger = loggerMock.create();

describe('TelemetryTracer', () => {
let telemetry: AnalyticsServiceSetup;
let logger: Logger;
let telemetryParams: TelemetryParams;
let telemetryTracer: TelemetryTracer;
const reportEvent = jest.fn();
beforeEach(() => {
telemetry = {
reportEvent,
} as unknown as AnalyticsServiceSetup;
logger = mockLogger;
telemetryParams = {
eventType: 'INVOKE_AI_SUCCESS',
assistantStreamingEnabled: true,
actionTypeId: '.gen-ai',
isEnabledKnowledgeBase: true,
model: 'test_model',
};
telemetryTracer = new TelemetryTracer(
{
elasticTools,
telemetry,
telemetryParams,
totalTools: 9,
},
logger
);
});

it('should initialize correctly', () => {
expect(telemetryTracer.name).toBe('telemetry_tracer');
expect(telemetryTracer.elasticTools).toEqual(elasticTools);
expect(telemetryTracer.telemetry).toBe(telemetry);
expect(telemetryTracer.telemetryParams).toBe(telemetryParams);
expect(telemetryTracer.totalTools).toBe(9);
});

it('should not log and report event on chain end if parent_run_id exists', async () => {
await telemetryTracer.onChainEnd({ ...mockRun, parent_run_id: '123' });

expect(logger.get().debug).not.toHaveBeenCalled();
expect(telemetry.reportEvent).not.toHaveBeenCalled();
});

it('should log and report event on chain end', async () => {
await telemetryTracer.onChainEnd(mockRun);

expect(logger.get().debug).toHaveBeenCalledWith(expect.any(Function));
expect(telemetry.reportEvent).toHaveBeenCalledWith('INVOKE_AI_SUCCESS', {
assistantStreamingEnabled: true,
actionTypeId: '.gen-ai',
isEnabledKnowledgeBase: true,
model: 'test_model',
isOssModel: false,
durationMs: 8077,
toolsInvoked: {
KnowledgeBaseRetrievalTool: 2,
NaturalLanguageESQLTool: 1,
CustomTool: 3,
},
});
});
});
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/

import { BaseCallbackHandlerInput } from '@langchain/core/callbacks/base';
import type { Run } from 'langsmith/schemas';
import { BaseTracer } from '@langchain/core/tracers/base';
import { AnalyticsServiceSetup, Logger } from '@kbn/core/server';

export interface TelemetryParams {
assistantStreamingEnabled: boolean;
actionTypeId: string;
isEnabledKnowledgeBase: boolean;
eventType: string;
model?: string;
}
export interface LangChainTracerFields extends BaseCallbackHandlerInput {
elasticTools: string[];
telemetry: AnalyticsServiceSetup;
telemetryParams: TelemetryParams;
totalTools: number;
}
interface ToolRunStep {
action: {
tool: string;
};
}
/**
* TelemetryTracer is a tracer that uses event based telemetry to track LangChain events.
*/
export class TelemetryTracer extends BaseTracer implements LangChainTracerFields {
name = 'telemetry_tracer';
logger: Logger;
elasticTools: string[];
telemetry: AnalyticsServiceSetup;
telemetryParams: TelemetryParams;
totalTools: number;
constructor(fields: LangChainTracerFields, logger: Logger) {
super(fields);
this.logger = logger.get('telemetryTracer');
this.elasticTools = fields.elasticTools;
this.telemetry = fields.telemetry;
this.telemetryParams = fields.telemetryParams;
this.totalTools = fields.totalTools;
}

async onChainEnd(run: Run): Promise<void> {
if (!run.parent_run_id) {
const { eventType, ...telemetryParams } = this.telemetryParams;
const toolsInvoked =
run?.outputs && run?.outputs.steps.length
? run.outputs.steps.reduce((acc: { [k: string]: number }, event: ToolRunStep | never) => {
if ('action' in event && event?.action?.tool) {
if (this.elasticTools.includes(event.action.tool)) {
return {
...acc,
...(event.action.tool in acc
? { [event.action.tool]: acc[event.action.tool] + 1 }
: { [event.action.tool]: 1 }),
};
} else {
// Custom tool names are user data, so we strip them out
return {
...acc,
...('CustomTool' in acc
? { CustomTool: acc.CustomTool + 1 }
: { CustomTool: 1 }),
};
}
}
return acc;
}, {})
: {};
const telemetryValue = {
...telemetryParams,
durationMs: (run.end_time ?? 0) - (run.start_time ?? 0),
toolsInvoked,
...(telemetryParams.actionTypeId === '.gen-ai'
? { isOssModel: run.inputs.isOssModel }
: {}),
};
this.logger.debug(
() => `Invoke ${eventType} telemetry:\n${JSON.stringify(telemetryValue, null, 2)}`
);
this.telemetry.reportEvent(eventType, telemetryValue);
}
}

// everything below is required for type only
protected async persistRun(_run: Run): Promise<void> {}
}
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,12 @@
*/

import { v4 as uuidv4 } from 'uuid';
import { AuthenticatedUser, ElasticsearchClient, Logger } from '@kbn/core/server';
import {
AnalyticsServiceSetup,
AuthenticatedUser,
ElasticsearchClient,
Logger,
} from '@kbn/core/server';

import {
DocumentEntryCreateFields,
Expand All @@ -15,6 +20,10 @@ import {
KnowledgeBaseEntryUpdateProps,
Metadata,
} from '@kbn/elastic-assistant-common';
import {
CREATE_KNOWLEDGE_BASE_ENTRY_ERROR_EVENT,
CREATE_KNOWLEDGE_BASE_ENTRY_SUCCESS_EVENT,
} from '../../lib/telemetry/event_based_telemetry';
import { getKnowledgeBaseEntry } from './get_knowledge_base_entry';
import { CreateKnowledgeBaseEntrySchema, UpdateKnowledgeBaseEntrySchema } from './types';

Expand All @@ -27,6 +36,7 @@ export interface CreateKnowledgeBaseEntryParams {
knowledgeBaseEntry: KnowledgeBaseEntryCreateProps | LegacyKnowledgeBaseEntryCreateProps;
global?: boolean;
isV2?: boolean;
telemetry: AnalyticsServiceSetup;
}

export const createKnowledgeBaseEntry = async ({
Expand All @@ -38,6 +48,7 @@ export const createKnowledgeBaseEntry = async ({
logger,
global = false,
isV2 = false,
telemetry,
}: CreateKnowledgeBaseEntryParams): Promise<KnowledgeBaseEntryResponse | null> => {
const createdAt = new Date().toISOString();
const body = isV2
Expand All @@ -55,6 +66,12 @@ export const createKnowledgeBaseEntry = async ({
entry: knowledgeBaseEntry as unknown as TransformToLegacyCreateSchemaProps['entry'],
global,
});
const telemetryPayload = {
entryType: body.type,
required: body.required ?? false,
sharing: body.users.length ? 'private' : 'global',
...(body.type === 'document' ? { source: body.source } : {}),
};
try {
const response = await esClient.create({
body,
Expand All @@ -63,17 +80,24 @@ export const createKnowledgeBaseEntry = async ({
refresh: 'wait_for',
});

return await getKnowledgeBaseEntry({
const newKnowledgeBaseEntry = await getKnowledgeBaseEntry({
esClient,
knowledgeBaseIndex,
id: response._id,
logger,
user,
});

telemetry.reportEvent(CREATE_KNOWLEDGE_BASE_ENTRY_SUCCESS_EVENT.eventType, telemetryPayload);
return newKnowledgeBaseEntry;
} catch (err) {
logger.error(
`Error creating Knowledge Base Entry: ${err} with kbResource: ${knowledgeBaseEntry.name}`
);
telemetry.reportEvent(CREATE_KNOWLEDGE_BASE_ENTRY_ERROR_EVENT.eventType, {
...telemetryPayload,
errorMessage: err.message ?? 'Unknown error',
});
throw err;
}
};
Expand Down
Loading

0 comments on commit 1127bf4

Please sign in to comment.