Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

(DOCSP-32253): Add more semantically relevant product names to metadata #83

Merged
merged 12 commits into from
Aug 21, 2023
16 changes: 12 additions & 4 deletions chat-core/src/DatabaseConnection.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,9 @@ describe("DatabaseConnection", () => {
body: "foo",
format: "md",
sourceName: "source1",
tags: [],
metadata: {
tags: [],
},
updated: new Date(),
url: "/x/y/z",
};
Expand Down Expand Up @@ -113,7 +115,9 @@ describe("DatabaseConnection", () => {
body: "foo",
format: "md",
sourceName: "source1",
tags: [],
metadata: {
tags: [],
},
updated: new Date(),
url: "/x/y/z",
};
Expand Down Expand Up @@ -153,7 +157,9 @@ describe("DatabaseConnection", () => {
body: "The Matrix (1999) comes out",
format: "md",
sourceName: "",
tags: [],
metadata: {
tags: [],
},
updated: new Date("1999-03-31"),
url: "matrix1",
},
Expand All @@ -162,7 +168,9 @@ describe("DatabaseConnection", () => {
body: "The Matrix: Reloaded (2003) comes out",
format: "md",
sourceName: "",
tags: [],
metadata: {
tags: [],
},
updated: new Date("2003-05-15"),
url: "matrix2",
},
Expand Down
2 changes: 1 addition & 1 deletion chat-core/src/EmbeddedContent.ts
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ export interface EmbeddedContent {
sourceName: string;

/**
The original text.
The text represented by the vector embedding.
*/
text: string;

Expand Down
10 changes: 8 additions & 2 deletions chat-core/src/Page.ts
Original file line number Diff line number Diff line change
Expand Up @@ -22,9 +22,15 @@ export type Page = {
sourceName: string;

/**
Arbitrary tags.
Arbitrary metadata for page.
*/
tags: string[];
metadata?: {
/**
Arbitrary tags.
*/
tags?: string[];
[k: string]: unknown;
};
};

export type PageAction = "created" | "updated" | "deleted";
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,8 @@ export interface MongoDbUserQueryPreprocessorResponse {
programmingLanguages: string[];
/** One or more MongoDB products present in the content.
Which MongoDB products is the user interested in? Ordered by relevancy.
Include driver if the user is asking about a programming language with a MongoDB driver.
@example ["atlas", "charts", "server", "compass", "bi-connector", "realm", "driver", ...other MongoDB products]
Include "Driver" if the user is asking about a programming language with a MongoDB driver.
@example ["MongoDB Atlas", "Atlas Charts", "Atlas Search", "Aggregation Framework", "MongoDB Server", "Compass", "MongoDB Connector for BI", "Realm SDK", "Driver", "Atlas App Services", ...other MongoDB products]
*/
mongoDbProducts: string[];
/** Using your knowledge of MongoDB and the conversational context,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ describe("makePreprocessMongoDbUserQuery()", () => {
} = response;
expect(outputQuery).toContain("MongoDB");
expect(outputQuery).toContain("code example");
expect(outputQuery).toContain("aggregation");
expect(outputQuery.toLowerCase()).toContain("aggregation");
expect(outputQuery).toContain("?");
expect(programmingLanguages).toStrictEqual(["shell"]);
expect(mongoDbProducts[0]).toBeDefined();
Expand All @@ -83,7 +83,7 @@ describe("makePreprocessMongoDbUserQuery()", () => {
messages,
});
const { mongoDbProducts } = response;
expect(mongoDbProducts[0]).toBe("charts");
expect(mongoDbProducts[0]).toBe("Atlas Charts");
});
test("should be aware of MongoDB", async () => {
const query = "ruby lookup example";
Expand Down
4 changes: 3 additions & 1 deletion ingest/src/DevCenterDataSource.ts
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,9 @@ export const makeDevCenterDataSource = async ({
}),
format: "md",
sourceName: name,
tags: [], // TODO
metadata: {
tags: [], // TODO
},
url: /^https?:\/\//.test(document.calculated_slug)
? document.calculated_slug
: new URL(
Expand Down
6 changes: 6 additions & 0 deletions ingest/src/ProjectBase.d.ts
Original file line number Diff line number Diff line change
Expand Up @@ -17,4 +17,10 @@ export interface ProjectBase {
@example ["kotlin", "docs", "driver"]
*/
tags?: string[];

/**
Name of the product.
@example "MongoDB Atlas"
*/
productName?: string;
}
20 changes: 15 additions & 5 deletions ingest/src/SnootyDataSource.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,9 @@ describe("SnootyDataSource", () => {
expect(pages[1]).toMatchObject({
format: "md",
sourceName: "snooty-test",
tags: ["docs", "manual"],
metadata: {
tags: ["docs", "manual"],
},
url: "https://mongodb.com/docs/v6.0/administration/",
body: firstPageText,
});
Expand All @@ -73,7 +75,9 @@ describe("SnootyDataSource", () => {
expect(pages[0]).toMatchObject({
format: "md",
sourceName: "snooty-docs",
tags: ["docs", "manual"],
metadata: {
tags: ["docs", "manual"],
},
url: "https://mongodb.com/docs/v6.0/",
});

Expand All @@ -83,23 +87,29 @@ describe("SnootyDataSource", () => {
expect(pages[2]).toMatchObject({
format: "md",
sourceName: "snooty-docs",
tags: ["docs", "manual"],
metadata: {
tags: ["docs", "manual"],
},
url: "https://mongodb.com/docs/v6.0/administration/analyzing-mongodb-performance/index/",
});

// This has index in the middle of the page_id that should not be stripped
expect(pages[3]).toMatchObject({
format: "md",
sourceName: "snooty-docs",
tags: ["docs", "manual"],
metadata: {
tags: ["docs", "manual"],
},
url: "https://mongodb.com/docs/v6.0/administration/index/backup-sharded-clusters/",
});

// This has index but part of a wider phrase so should not be stripped
expect(pages[4]).toMatchObject({
format: "md",
sourceName: "snooty-docs",
tags: ["docs", "manual"],
metadata: {
tags: ["docs", "manual"],
},
url: "https://mongodb.com/docs/v6.0/administration/change-streams-production-recommendations/how-to-index/",
});
});
Expand Down
17 changes: 14 additions & 3 deletions ingest/src/SnootyDataSource.ts
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,13 @@ export const makeSnootyDataSource = async ({
_snootyProjectName: string;
}
> => {
const { baseUrl, currentBranch, name: snootyProjectName, tags } = project;
const {
baseUrl,
currentBranch,
name: snootyProjectName,
tags,
productName,
} = project;
return {
// Additional members for testing purposes
_baseUrl: baseUrl,
Expand Down Expand Up @@ -128,7 +134,7 @@ export const makeSnootyDataSource = async ({
(async () => {
const page = await handlePage(
(entry as SnootyPageEntry).data,
{ sourceName, baseUrl, tags: tags ?? [] }
{ sourceName, baseUrl, tags: tags ?? [], productName }
);
pages.push(page);
})()
Expand Down Expand Up @@ -214,10 +220,12 @@ const handlePage = async (
sourceName,
baseUrl,
tags = [],
productName,
}: {
sourceName: string;
baseUrl: string;
tags: string[];
productName?: string;
}
): Promise<Page> => {
// Strip first three path segments - according to Snooty team, they'll always
Expand All @@ -242,6 +250,9 @@ const handlePage = async (
title: getTitleFromSnootyAst(page.ast),
body: snootyAstToMd(page.ast, { baseUrl }),
format: "md",
tags,
metadata: {
tags,
productName,
},
};
};
Loading