From 0bc619b8b1259710302c9d5112ba1f6ba9ec623a Mon Sep 17 00:00:00 2001 From: Felipe Martins Diel Date: Wed, 20 Nov 2024 17:45:30 -0300 Subject: [PATCH 1/3] Include BM25 score in the metadata --- libs/langchain-community/src/retrievers/bm25.ts | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/libs/langchain-community/src/retrievers/bm25.ts b/libs/langchain-community/src/retrievers/bm25.ts index dfc04709cba1..194a7b668292 100644 --- a/libs/langchain-community/src/retrievers/bm25.ts +++ b/libs/langchain-community/src/retrievers/bm25.ts @@ -53,6 +53,13 @@ export class BM25Retriever extends BaseRetriever { scoredDocs.sort((a, b) => b.score - a.score); - return scoredDocs.slice(0, this.k).map((item) => item.document); + return scoredDocs.slice(0, this.k).map((item) => new Document({ + ...item.document.id && { id: item.document.id }, + pageContent: item.document.pageContent, + metadata: { + score: item.score, + ...item.document.metadata, + } + })); } } From 10438ca85841f4423ab15ed0adb6f5864d9f4ee5 Mon Sep 17 00:00:00 2001 From: Jacob Lee Date: Thu, 21 Nov 2024 21:05:10 -0800 Subject: [PATCH 2/3] Update bm25.ts --- libs/langchain-community/src/retrievers/bm25.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libs/langchain-community/src/retrievers/bm25.ts b/libs/langchain-community/src/retrievers/bm25.ts index 194a7b668292..d8024babe495 100644 --- a/libs/langchain-community/src/retrievers/bm25.ts +++ b/libs/langchain-community/src/retrievers/bm25.ts @@ -57,7 +57,7 @@ export class BM25Retriever extends BaseRetriever { ...item.document.id && { id: item.document.id }, pageContent: item.document.pageContent, metadata: { - score: item.score, + bm25Score: item.score, ...item.document.metadata, } })); From 57267ab8118262191bbcbf6199d3b9d567cb76c3 Mon Sep 17 00:00:00 2001 From: jacoblee93 Date: Thu, 21 Nov 2024 21:13:47 -0800 Subject: [PATCH 3/3] Make score inclusion toggleable --- .../src/retrievers/bm25.ts | 24 +++++++++++++------ 1 file changed, 17 insertions(+), 7 deletions(-) diff --git a/libs/langchain-community/src/retrievers/bm25.ts b/libs/langchain-community/src/retrievers/bm25.ts index d8024babe495..716027cf684b 100644 --- a/libs/langchain-community/src/retrievers/bm25.ts +++ b/libs/langchain-community/src/retrievers/bm25.ts @@ -6,6 +6,7 @@ import { BM25 } from "../utils/@furkantoprak/bm25/BM25.js"; export type BM25RetrieverOptions = { docs: Document[]; k: number; + includeScore?: boolean; } & BaseRetrieverInput; /** @@ -14,6 +15,8 @@ export type BM25RetrieverOptions = { * The k parameter determines the number of documents to return for each query. */ export class BM25Retriever extends BaseRetriever { + includeScore = false; + static lc_name() { return "BM25Retriever"; } @@ -35,6 +38,7 @@ export class BM25Retriever extends BaseRetriever { super(options); this.docs = options.docs; this.k = options.k; + this.includeScore = options.includeScore ?? this.includeScore; } private preprocessFunc(text: string): string[] { @@ -53,13 +57,19 @@ export class BM25Retriever extends BaseRetriever { scoredDocs.sort((a, b) => b.score - a.score); - return scoredDocs.slice(0, this.k).map((item) => new Document({ - ...item.document.id && { id: item.document.id }, - pageContent: item.document.pageContent, - metadata: { - bm25Score: item.score, - ...item.document.metadata, + return scoredDocs.slice(0, this.k).map((item) => { + if (this.includeScore) { + return new Document({ + ...(item.document.id && { id: item.document.id }), + pageContent: item.document.pageContent, + metadata: { + bm25Score: item.score, + ...item.document.metadata, + }, + }); + } else { + return item.document; } - })); + }); } }