Skip to content

Commit

Permalink
(EAI-581): Chatbot verified answers eval/improvments + export `mongod…
Browse files Browse the repository at this point in the history
…b-rag-core/dataSources` (#550)

* Start on VA evals

* prep evals for run

* standard eval format

* VA eval

* separate data sources exports

* latest changes

* Remove evals from build

* Fix broken tests

* Update packages/chatbot-server-mongodb-public/src/verifiedAnswers.eval.ts

Co-authored-by: Nick Larew <nick.larew@mongodb.com>

* Update packages/chatbot-server-mongodb-public/src/verifiedAnswers.eval.ts

Co-authored-by: Nick Larew <nick.larew@mongodb.com>

* Update packages/mongodb-rag-core/src/index.ts

Co-authored-by: Nick Larew <nick.larew@mongodb.com>

---------

Co-authored-by: Nick Larew <nick.larew@mongodb.com>
  • Loading branch information
mongodben and nlarew authored Nov 8, 2024
1 parent ab9596d commit 37be250
Show file tree
Hide file tree
Showing 28 changed files with 363 additions and 164 deletions.
144 changes: 18 additions & 126 deletions package-lock.json

Large diffs are not rendered by default.

6 changes: 4 additions & 2 deletions packages/chatbot-server-mongodb-public/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,8 @@
"lint:fix": "npm run lint -- --fix && prettier ./src --check --write",
"start": "pm2-runtime ./build/index.js",
"test": "jest --forceExit",
"release": "release-it"
"release": "release-it",
"evaluate": "braintrust eval"
},
"dependencies": {
"common-tags": "^1.8.2",
Expand All @@ -31,6 +32,7 @@
"express": "^4.18.2",
"mongodb-chatbot-evaluation": "*",
"mongodb-chatbot-server": "*",
"mongodb-chatbot-verified-answers": "*",
"mongodb-rag-core": "*",
"pm2": "^5.3.0",
"zod": "^3.23.8",
Expand All @@ -49,7 +51,7 @@
"@typescript-eslint/parser": "^5.58.0",
"autoevals": "^0.0.92",
"babel-jest": "^29.5.0",
"braintrust": "^0.0.160",
"braintrust": "^0.0.167",
"eslint": "^8.38.0",
"eslint-config-prettier": "^8.8.0",
"eslint-plugin-jest": "^27.2.1",
Expand Down
8 changes: 8 additions & 0 deletions packages/chatbot-server-mongodb-public/src/config.ts
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,12 @@ export const embeddedContentStore = makeMongoDbEmbeddedContentStore({
databaseName: MONGODB_DATABASE_NAME,
});

export const verifiedAnswerConfig = {
embeddingModel: OPENAI_EMBEDDING_DEPLOYMENT,
findNearestNeighborsOptions: {
minScore: 0.96,
},
};
export const retrievalConfig = {
model: OPENAI_EMBEDDING_DEPLOYMENT,
findNearestNeighborsOptions: {
Expand Down Expand Up @@ -137,6 +143,8 @@ export const findVerifiedAnswer = wrapTraced(
makeDefaultFindVerifiedAnswer({
embedder,
store: verifiedAnswerStore,
findNearestNeighborsOptions:
verifiedAnswerConfig.findNearestNeighborsOptions,
}),
{ name: "findVerifiedAnswer" }
);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ import {
UserMessage,
} from "mongodb-chatbot-server";
import { EVAL_ENV_VARS } from "../EvalEnvVars";
import { AzureOpenAI } from "openai";
import { AzureOpenAI } from "mongodb-rag-core/openai";
import { z } from "zod";
import { strict as assert } from "assert";

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,4 +28,10 @@ export const mongoDbTopics = [
{
id: "indexes",
},
{
id: "billing",
},
{
id: "iam",
},
] as const satisfies MongoDbTopic[];
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,8 @@ import {
makeStepBackUserQuery,
StepBackUserQueryMongoDbFunction,
} from "./makeStepBackUserQuery";
import { Message, ObjectId, updateFrontMatter } from "mongodb-chatbot-server";
import { Message, updateFrontMatter } from "mongodb-chatbot-server";
import { ObjectId } from "mongodb-rag-core/mongodb";
import { MongoDbTag } from "../mongoDbMetadata";
import {
OPENAI_PREPROCESSOR_CHAT_COMPLETION_DEPLOYMENT,
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
export function cosineSimilarity(a: number[], b: number[]) {
// https://towardsdatascience.com/how-to-build-a-textual-similarity-analysis-web-app-aa3139d4fb71

const magnitudeA = Math.sqrt(dotProduct(a, a));
const magnitudeB = Math.sqrt(dotProduct(b, b));
if (magnitudeA && magnitudeB) {
// https://towardsdatascience.com/how-to-measure-distances-in-machine-learning-13a396aa34ce
return dotProduct(a, b) / (magnitudeA * magnitudeB);
} else {
return 0;
}
}

function dotProduct(a: number[], b: number[]) {
return a.reduce((acc, cur, i) => acc + cur * b[i], 0);
}
16 changes: 0 additions & 16 deletions packages/chatbot-server-mongodb-public/src/test/testHelpers.ts
Original file line number Diff line number Diff line change
Expand Up @@ -62,22 +62,6 @@ export async function makeTestApp(defaultConfigOverrides?: Partial<AppConfig>) {
};
}

export function cosineSimilarity(a: number[], b: number[]) {
// https://towardsdatascience.com/how-to-build-a-textual-similarity-analysis-web-app-aa3139d4fb71

const magnitudeA = Math.sqrt(dotProduct(a, a));
const magnitudeB = Math.sqrt(dotProduct(b, b));
if (magnitudeA && magnitudeB) {
// https://towardsdatascience.com/how-to-measure-distances-in-machine-learning-13a396aa34ce
return dotProduct(a, b) / (magnitudeA * magnitudeB);
} else {
return 0;
}
}

function dotProduct(a: number[], b: number[]) {
return a.reduce((acc, cur, i) => acc + cur * b[i], 0);
}
export { systemPrompt };
export {
generateUserPrompt,
Expand Down
279 changes: 279 additions & 0 deletions packages/chatbot-server-mongodb-public/src/verifiedAnswers.eval.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,279 @@
import { Eval, EvalCase, EvalScorer } from "braintrust";
import { MongoDbTag } from "./mongoDbMetadata";
import {
findVerifiedAnswer,
verifiedAnswerConfig,
verifiedAnswerStore,
} from "./config";
import { FindVerifiedAnswerResult } from "mongodb-chatbot-server";
import {
parseVerifiedAnswerYaml,
VerifiedAnswerSpec,
} from "mongodb-chatbot-verified-answers";
import path from "path";
import fs from "fs";
import "dotenv/config";
import { cosineSimilarity } from "./test/cosineSimilarity";
import { strict as assert } from "assert";

interface VerifiedAnswersEvalCaseInput {
query: string;
}
interface VerifiedAnswersEvalCaseExpected {
/**
The expected verified answer to the query.
If `undefined`, expects no verified answer.
*/
answer?: string;
}

interface VerifiedAnswersEvalCaseMetadata extends Record<string, unknown> {
similarVerifiedAnswerQuery?: string;
description?: string;
}

type VerifiedAnswerTag = "perturbation" | "should_match" | "should_not_match";

type MongoDbVerifiedAnswerTag = MongoDbTag | VerifiedAnswerTag;

interface VerifiedAnswersEvalCase
extends EvalCase<
VerifiedAnswersEvalCaseInput,
VerifiedAnswersEvalCaseExpected,
VerifiedAnswersEvalCaseMetadata
> {
tags?: MongoDbVerifiedAnswerTag[];
}

type VerifiedAnswersTaskOutput = FindVerifiedAnswerResult;

type VerifiedAnswersEvalCaseScorer = EvalScorer<
VerifiedAnswersEvalCaseInput,
VerifiedAnswersTaskOutput,
VerifiedAnswersEvalCaseExpected,
VerifiedAnswersEvalCaseMetadata
>;

const verifiedAnswersPath = path.resolve(
__dirname,
"..",
"..",
"..",
"verified-answers.yaml"
);
const verifiedAnswerSpecs = parseVerifiedAnswerYaml(
fs.readFileSync(verifiedAnswersPath, "utf-8")
);
const verifiedAnswerIndex = makeVerifiedAnswerIndex(verifiedAnswerSpecs);

const verifiedAnswerEvalCases: VerifiedAnswersEvalCase[] = [
makeVerifiedAnswerEvalCase({
inputQuery: "what is the aggregation framework",
similarVerifiedAnswerQuery: "What is aggregation in MongoDB?",
tags: ["aggregation", "perturbation"],
verifiedAnswerIndex,
}),
makeVerifiedAnswerEvalCase({
inputQuery: "agg framework",
similarVerifiedAnswerQuery: "What is aggregation in MongoDB?",
tags: ["aggregation", "perturbation", "should_match"],
verifiedAnswerIndex,
}),
makeVerifiedAnswerEvalCase({
inputQuery: "what's the process to insert data into MongoDB",
similarVerifiedAnswerQuery: "How do I insert data into MongoDB?",
verifiedAnswerIndex,
tags: ["perturbation", "should_match"],
}),
makeVerifiedAnswerEvalCase({
inputQuery: "How can I insert data into MongoDB?",
similarVerifiedAnswerQuery: "How do I insert data into MongoDB?",
verifiedAnswerIndex,
tags: ["perturbation", "should_match"],
}),
makeVerifiedAnswerEvalCase({
inputQuery: "insert data into mongodb",
similarVerifiedAnswerQuery: "How do I insert data into MongoDB?",
verifiedAnswerIndex,
tags: ["perturbation", "should_match"],
}),
makeVerifiedAnswerEvalCase({
inputQuery: "password reset",
similarVerifiedAnswerQuery: "Can i reset my password",
verifiedAnswerIndex,
tags: ["perturbation", "iam", "should_match"],
}),
makeVerifiedAnswerEvalCase({
inputQuery: "reset my password",
similarVerifiedAnswerQuery: "Can i reset my password",
verifiedAnswerIndex,
tags: ["perturbation", "iam", "should_match"],
}),
makeVerifiedAnswerEvalCase({
inputQuery: "reset database password",
similarVerifiedAnswerQuery: "Can i reset my password",
verifiedAnswerIndex,
tags: ["perturbation", "iam", "should_match"],
}),
makeVerifiedAnswerEvalCase({
inputQuery: "connect to stream process",
verifiedAnswerIndex,
tags: ["atlas_stream_processing", "should_not_match"],
}),
makeVerifiedAnswerEvalCase({
inputQuery: "connect to database kotlin",
verifiedAnswerIndex,
tags: ["driver", "kotlin", "should_not_match"],
}),
makeVerifiedAnswerEvalCase({
inputQuery: "connect to database with Kotlin coroutine driver",
verifiedAnswerIndex,
tags: ["driver", "kotlin", "kotlin_coroutine_driver", "should_not_match"],
}),
// 👇 From EAI-580 👇
makeVerifiedAnswerEvalCase({
inputQuery: "how do I set up billing alerts in Atlas",
// No similar verified answer
tags: ["billing", "should_not_match"],
verifiedAnswerIndex,
}),
];

// Helper function to create a verified answer eval case
function makeVerifiedAnswerEvalCase(args: {
inputQuery: string;
similarVerifiedAnswerQuery?: string;
description?: string;
tags?: MongoDbVerifiedAnswerTag[];
verifiedAnswerIndex: VerifiedAnswerIndex;
}): VerifiedAnswersEvalCase {
return {
input: {
query: args.inputQuery,
},
expected: {
answer: args.similarVerifiedAnswerQuery
? findExactVerifiedAnswer(
args.similarVerifiedAnswerQuery,
args.verifiedAnswerIndex
)
: undefined,
},
tags: args.tags,
metadata: {
similarVerifiedAnswerQuery: args.similarVerifiedAnswerQuery,
description: args.description,
},
};
}

// -- Evaluation metrics --
const MatchesSomeVerifiedAnswer: VerifiedAnswersEvalCaseScorer = (args) => {
return {
name: "MatchesSomeVerifiedAnswer",
score: args.output.answer ? 1 : 0,
};
};

const MatchesExpectedOutput: VerifiedAnswersEvalCaseScorer = (args) => {
const isMatch = args.output.answer?.answer === args.expected.answer;
const matchType = [
isMatch ? "true" : "false",
args.expected.answer === undefined ? "negative" : "positive",
].join("_");

return {
name: "MatchesExpectedOutput",
score: isMatch ? 1 : 0,
metadata: {
type: matchType,
},
};
};

const SearchScore: VerifiedAnswersEvalCaseScorer = (args) => {
return {
name: "SearchScore",
score: args.output.answer?.score ?? null,
};
};

// BUG: Getting Mongo connection closed errors on this scorer with the clean up.
const ReferenceAnswerCosineSimilarity: VerifiedAnswersEvalCaseScorer = async (
args
) => {
const name = "ReferenceAnswerCosineSimilarity";
const { similarVerifiedAnswerQuery } = args.metadata;

if (!similarVerifiedAnswerQuery) {
return {
name,
score: null,
};
}
const [verifiedAnswer] = await verifiedAnswerStore.find({
"question.text": similarVerifiedAnswerQuery,
});
assert(
verifiedAnswer,
`No verified answer found for query: ${similarVerifiedAnswerQuery}`
);
return {
name,
score: cosineSimilarity(
args.output.queryEmbedding,
verifiedAnswer.question.embedding
),
};
};

type VerifiedAnswerIndex = Record<string, string>;
/**
Construct index of all verified answer for faster look up
*/
function makeVerifiedAnswerIndex(
verifiedAnswerSpecs: VerifiedAnswerSpec[]
): VerifiedAnswerIndex {
const verifiedAnswerIndex: VerifiedAnswerIndex = {};
for (const { questions, answer } of verifiedAnswerSpecs) {
questions.forEach((question) => {
verifiedAnswerIndex[question] = answer;
});
}
return verifiedAnswerIndex;
}

function findExactVerifiedAnswer(
query: string,
verifiedAnswerIndex: VerifiedAnswerIndex
): string | undefined {
return verifiedAnswerIndex[query];
}

Eval<
VerifiedAnswersEvalCaseInput,
VerifiedAnswersTaskOutput,
VerifiedAnswersEvalCaseExpected,
VerifiedAnswersEvalCaseMetadata
>("mongodb-chatbot-verified-answers", {
experimentName: `mongodb-chatbot-latest-${verifiedAnswerConfig.embeddingModel}-minScore-${verifiedAnswerConfig.findNearestNeighborsOptions.minScore}`,
metadata: {
description:
"Evaluates if gets the correct verified answers for a given query",
verifiedAnswerConfig: verifiedAnswerConfig,
},
async data() {
return verifiedAnswerEvalCases;
},
maxConcurrency: 5,
async task(input) {
const verifiedAnswer = await findVerifiedAnswer(input);
return verifiedAnswer;
},
scores: [
MatchesSomeVerifiedAnswer,
MatchesExpectedOutput,
ReferenceAnswerCosineSimilarity,
SearchScore,
],
});
Loading

0 comments on commit 37be250

Please sign in to comment.