Skip to content
This repository was archived by the owner on Sep 29, 2025. It is now read-only.

Commit 003cec9

Browse files
authored
(EAI-1149, EAI-1259) Updates to user message front matter (#892)
* Normalize URL in user msg formatting * Add labels to origin rules & use those for formatting user msg frontmatter
1 parent f6b41d1 commit 003cec9

File tree

3 files changed

+100
-18
lines changed

3 files changed

+100
-18
lines changed

packages/chatbot-server-mongodb-public/src/processors/formatUserMessageForGeneration.test.ts

Lines changed: 64 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
import { formatUserMessageForGeneration } from "./formatUserMessageForGeneration";
22
import { ConversationCustomData, logger } from "mongodb-rag-core";
3+
import { ORIGIN_RULES } from "mongodb-chatbot-server";
34

45
beforeAll(() => {
56
logger.error = jest.fn();
@@ -8,16 +9,19 @@ beforeAll(() => {
89
describe("formatUserMessageForGeneration", () => {
910
const userMessageText = "Hello, world!";
1011
const reqId = "test-request-id";
12+
const testMongoDbPageUrl = "https://mongodb.com";
13+
const resultMongoDbPageUrl = "mongodb.com";
1114

1215
it("formats front matter correctly for mongodb.com origin", () => {
13-
const origin = "https://mongodb.com";
1416
const result = formatUserMessageForGeneration({
1517
userMessageText,
1618
reqId,
17-
customData: { origin } satisfies ConversationCustomData,
19+
customData: {
20+
origin: testMongoDbPageUrl,
21+
} satisfies ConversationCustomData,
1822
});
1923
expect(result).toEqual(`---
20-
pageUrl: ${origin}
24+
pageUrl: ${resultMongoDbPageUrl}
2125
---
2226
2327
${userMessageText}`);
@@ -31,7 +35,40 @@ ${userMessageText}`);
3135
customData: { origin } satisfies ConversationCustomData,
3236
});
3337
expect(result).toEqual(`---
34-
pageUrl: ${origin}
38+
pageUrl: learn.mongodb.com
39+
---
40+
41+
${userMessageText}`);
42+
});
43+
44+
it("normalizes a URL with trailing backslash", () => {
45+
const origin = testMongoDbPageUrl + "/docs/pageName/";
46+
const result = formatUserMessageForGeneration({
47+
userMessageText,
48+
reqId,
49+
customData: {
50+
origin,
51+
} satisfies ConversationCustomData,
52+
});
53+
expect(result).toEqual(`---
54+
pageUrl: ${resultMongoDbPageUrl + "/docs/pageName"}
55+
---
56+
57+
${userMessageText}`);
58+
});
59+
60+
it("normalizes a URL with query", () => {
61+
const origin =
62+
"https://learn.mongodb.com/courses/mongodb-for-sql-experts?param1=value1&param2=value2";
63+
const result = formatUserMessageForGeneration({
64+
userMessageText,
65+
reqId,
66+
customData: {
67+
origin,
68+
} satisfies ConversationCustomData,
69+
});
70+
expect(result).toEqual(`---
71+
pageUrl: learn.mongodb.com/courses/mongodb-for-sql-experts
3572
---
3673
3774
${userMessageText}`);
@@ -67,7 +104,7 @@ ${userMessageText}`);
67104
} satisfies ConversationCustomData,
68105
});
69106
expect(result).toEqual(`---
70-
client: MongoDB VS Code plugin
107+
client: MongoDB VS Code extension
71108
---
72109
73110
${userMessageText}`);
@@ -92,6 +129,25 @@ client: ${expectedClientLabel}
92129
${userMessageText}`);
93130
});
94131

132+
it("does not add client front matter for unlabelled mongodb originCodes", () => {
133+
const unlabelledOriginCodes: string[] = [];
134+
ORIGIN_RULES.reduce((acc, rule) => {
135+
if (!rule.label) unlabelledOriginCodes.push(rule.code);
136+
return acc;
137+
}, unlabelledOriginCodes);
138+
139+
unlabelledOriginCodes.forEach((originCode) => {
140+
const result = formatUserMessageForGeneration({
141+
userMessageText,
142+
reqId,
143+
customData: {
144+
originCode,
145+
} satisfies ConversationCustomData,
146+
});
147+
expect(result).toEqual(userMessageText);
148+
});
149+
});
150+
95151
it("logs a warning and does not add pageUrl if origin is malformed", () => {
96152
const malformedOrigin = "http://not a url";
97153
const result = formatUserMessageForGeneration({
@@ -119,19 +175,18 @@ ${userMessageText}`);
119175
});
120176

121177
it("adds both pageUrl and client front matter if both are present", () => {
122-
const origin = "https://mongodb.com";
123178
const originCode = "VSCODE";
124-
const expectedClientLabel = "MongoDB VS Code plugin";
179+
const expectedClientLabel = "MongoDB VS Code extension";
125180
const result = formatUserMessageForGeneration({
126181
userMessageText,
127182
reqId,
128183
customData: {
129-
origin,
184+
origin: testMongoDbPageUrl,
130185
originCode,
131186
} satisfies ConversationCustomData,
132187
});
133188
expect(result).toEqual(`---
134-
pageUrl: ${origin}
189+
pageUrl: ${resultMongoDbPageUrl}
135190
client: ${expectedClientLabel}
136191
---
137192

packages/chatbot-server-mongodb-public/src/processors/formatUserMessageForGeneration.ts

Lines changed: 20 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
import { updateFrontMatter, ConversationCustomData } from "mongodb-rag-core";
2-
import { originCodes } from "mongodb-chatbot-server";
2+
import { originCodes, ORIGIN_RULES } from "mongodb-chatbot-server";
3+
import { normalizeUrl } from "mongodb-rag-core/dataSources";
34
import { z } from "zod";
45
import { logRequest } from "../utils";
56

@@ -19,6 +20,19 @@ type FormatUserMessageForGenerationParams = {
1920
customData: ConversationCustomData;
2021
};
2122

23+
interface OriginCodeLabels {
24+
/** Code to label mapping. */
25+
[code: string]: string;
26+
}
27+
28+
// Some origin codes have a label to add to the front matter
29+
const originCodeLabels = ORIGIN_RULES.reduce((acc, rule) => {
30+
if (rule.label !== undefined) {
31+
acc[rule.code] = rule.label;
32+
}
33+
return acc;
34+
}, {} as OriginCodeLabels);
35+
2236
export function formatUserMessageForGeneration({
2337
userMessageText,
2438
reqId,
@@ -52,7 +66,7 @@ export function formatUserMessageForGeneration({
5266
url.hostname === "mongodb.com" ||
5367
url.hostname.endsWith(".mongodb.com")
5468
) {
55-
frontMatter.pageUrl = parsedCustomData.origin;
69+
frontMatter.pageUrl = normalizeUrl({ url: parsedCustomData.origin });
5670
}
5771
} catch (e) {
5872
logRequest({
@@ -62,10 +76,10 @@ export function formatUserMessageForGeneration({
6276
});
6377
}
6478
}
65-
if (parsedCustomData.originCode === "VSCODE") {
66-
frontMatter.client = "MongoDB VS Code plugin";
67-
} else if (parsedCustomData.originCode === "GEMINI_CODE_ASSIST") {
68-
frontMatter.client = "Gemini Code Assist";
79+
80+
const originLabel = originCodeLabels[parsedCustomData.originCode ?? ""];
81+
if (originLabel) {
82+
frontMatter.client = originLabel;
6983
}
7084

7185
if (Object.keys(frontMatter).length === 0) {

packages/mongodb-chatbot-server/src/processors/addCustomData.ts

Lines changed: 16 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -41,15 +41,28 @@ export type OriginCode = (typeof originCodes)[number];
4141
interface OriginRule {
4242
regex: RegExp;
4343
code: OriginCode;
44+
/**
45+
Name used to label the origin when formatting the user message front matter.
46+
Leave empty if no label should be used.
47+
*/
48+
label?: string;
4449
}
4550

46-
const ORIGIN_RULES: OriginRule[] = [
51+
export const ORIGIN_RULES: OriginRule[] = [
4752
{ regex: /learn\.mongodb\.com/, code: "LEARN" },
4853
{ regex: /mongodb\.com\/developer/, code: "DEVELOPER" },
4954
{ regex: /mongodb\.com\/docs/, code: "DOCS" },
5055
{ regex: /mongodb\.com\//, code: "DOTCOM" },
51-
{ regex: /google-gemini-code-assist/, code: "GEMINI_CODE_ASSIST" },
52-
{ regex: /vscode-mongodb-copilot/, code: "VSCODE" },
56+
{
57+
regex: /google-gemini-code-assist/,
58+
code: "GEMINI_CODE_ASSIST",
59+
label: "Gemini Code Assist",
60+
},
61+
{
62+
regex: /vscode-mongodb-copilot/,
63+
code: "VSCODE",
64+
label: "MongoDB VS Code extension",
65+
},
5366
];
5467

5568
function getOriginCode(origin: string): OriginCode {

0 commit comments

Comments
 (0)