Skip to content

Commit 9fde195

Browse files
authored
feat(full-text-search): add an optional score explanation (#65)
1 parent 9b926e2 commit 9fde195

File tree

8 files changed

+159
-102
lines changed

8 files changed

+159
-102
lines changed

packages/full-text-search/spec/generic/full_text_search.spec.ts

+7
Original file line numberDiff line numberDiff line change
@@ -136,6 +136,13 @@ describe("full-text search", () => {
136136
expect(dv.applySortByScoring(true).data()).toEqual(sorted_asc);
137137
});
138138

139+
it("explain", () => {
140+
let query = new QueryBuilder().explain(true).fuzzy("name", "quak").fuzziness(2).build();
141+
let res = coll.chain().find({"$fts": query});
142+
expect(res.data().length).toBe(4);
143+
expect(res.getScoring()[0].explanation).toBeArrayOfObjects();
144+
});
145+
139146
it("from/to json", () => {
140147
const fts = coll["_fullTextSearch"];
141148
const fts2 = FullTextSearch.fromJSONObject(JSON.parse(JSON.stringify(fts)));

packages/full-text-search/spec/node/elasticsearch.spec.ts

+1-1
Original file line numberDiff line numberDiff line change
@@ -199,7 +199,7 @@ describe("Compare scoring against elasticsearch", () => {
199199
}
200200

201201
let esScore = Math.round(esHits[j]._score * COMPARE_PRECISION) / COMPARE_PRECISION;
202-
let ftsScore = Math.round(ftsHits[esID] * COMPARE_PRECISION) / COMPARE_PRECISION;
202+
let ftsScore = Math.round(ftsHits[esID].score * COMPARE_PRECISION) / COMPARE_PRECISION;
203203

204204
expect(esScore).toEqual(ftsScore);
205205
}

packages/full-text-search/src/full_text_search.ts

+2-2
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ import {Tokenizer} from "./tokenizer";
44
import {Dict} from "../../common/types";
55
import {PLUGINS} from "../../common/plugin";
66
import {Query} from "./query_builder";
7-
import {ScoreResult} from "./scorer";
7+
import {Scorer} from "./scorer";
88

99
export class FullTextSearch {
1010
private _id: string;
@@ -72,7 +72,7 @@ export class FullTextSearch {
7272
}
7373
}
7474

75-
public search(query: Query): ScoreResult {
75+
public search(query: Query): Scorer.ScoreResult {
7676
return this._idxSearcher.search(query);
7777
}
7878

packages/full-text-search/src/index_searcher.ts

+47-48
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,13 @@
1-
import {Scorer, ScoreResult} from "./scorer";
1+
import {Scorer} from "./scorer";
22
import {InvertedIndex, toCodePoints} from "./inverted_index";
33
import {FuzzyQuery, Query, QueryBuilder, WildcardQuery} from "./query_builder";
44
import {Dict} from "../../common/types";
55
import {RunAutomaton} from "./fuzzy/run_automaton";
66
import {LevenshteinAutomata} from "./fuzzy/levenshtein_automata";
7-
import DocResults = Scorer.DocResults;
7+
import QueryResults = Scorer.QueryResults;
88
import Index = InvertedIndex.Index;
99

1010

11-
1211
/**
1312
* @hidden
1413
*/
@@ -26,17 +25,17 @@ export class IndexSearcher {
2625
this._scorer = new Scorer(this._invIdxs);
2726
}
2827

29-
public search(query: Query): ScoreResult {
30-
let docResults = this._recursive(query.query, true);
28+
public search(query: Query): Scorer.ScoreResult {
29+
let queryResults = this._recursive(query.query, true);
3130

3231
// Do final scoring.
3332
if (query.final_scoring !== undefined ? query.final_scoring : true) {
34-
return this._scorer.finalScore(query, docResults);
33+
return this._scorer.finalScore(query, queryResults);
3534
}
3635

37-
const result: ScoreResult = {};
38-
for (const key of docResults.keys()) {
39-
result[key] = 1;
36+
const result: Scorer.ScoreResult = {};
37+
for (const key of queryResults.keys()) {
38+
result[key] = {score: 1};
4039
}
4140
return result;
4241
}
@@ -46,7 +45,7 @@ export class IndexSearcher {
4645
}
4746

4847
private _recursive(query: any, doScoring: boolean) {
49-
let docResults: DocResults = new Map();
48+
let queryResults: QueryResults = new Map();
5049
const boost = query.boost !== undefined ? query.boost : 1;
5150
const fieldName = query.field !== undefined ? query.field : null;
5251

@@ -59,19 +58,19 @@ export class IndexSearcher {
5958

6059
switch (query.type) {
6160
case "bool": {
62-
docResults = null;
61+
queryResults = null;
6362
if (query.must !== undefined) {
64-
docResults = this._getUnique(query.must.values, doScoring, docResults);
63+
queryResults = this._getUnique(query.must.values, doScoring, queryResults);
6564
}
6665
if (query.filter !== undefined) {
67-
docResults = this._getUnique(query.filter.values, false, docResults);
66+
queryResults = this._getUnique(query.filter.values, false, queryResults);
6867
}
6968
if (query.should !== undefined) {
7069
let shouldDocs = this._getAll(query.should.values, doScoring);
7170

7271
let empty = false;
73-
if (docResults === null) {
74-
docResults = new Map();
72+
if (queryResults === null) {
73+
queryResults = new Map();
7574
empty = true;
7675
}
7776

@@ -91,12 +90,12 @@ export class IndexSearcher {
9190
// Remove all docs with fewer matches.
9291
for (const [docId, res] of shouldDocs) {
9392
if (res.length >= msm) {
94-
if (docResults.has(docId)) {
95-
docResults.get(docId).push(...res);
93+
if (queryResults.has(docId)) {
94+
queryResults.get(docId).push(...res);
9695
} else if (empty) {
97-
docResults.set(docId, res);
96+
queryResults.set(docId, res);
9897
} else {
99-
docResults.delete(docId);
98+
queryResults.delete(docId);
10099
}
101100
}
102101
}
@@ -105,8 +104,8 @@ export class IndexSearcher {
105104
let notDocs = this._getAll(query.not.values, false);
106105
// Remove all docs.
107106
for (const docId of notDocs.keys()) {
108-
if (docResults.has(docId)) {
109-
docResults.delete(docId);
107+
if (queryResults.has(docId)) {
108+
queryResults.delete(docId);
110109
}
111110
}
112111
}
@@ -115,43 +114,43 @@ export class IndexSearcher {
115114
case "term": {
116115
const cps = toCodePoints(query.value);
117116
let termIdx = InvertedIndex.getTermIndex(cps, root);
118-
this._scorer.score(fieldName, boost, termIdx, doScoring, docResults, cps);
117+
this._scorer.score(fieldName, boost, termIdx, doScoring, queryResults, cps);
119118
break;
120119
}
121120
case "terms": {
122121
for (let i = 0; i < query.value.length; i++) {
123122
const cps = toCodePoints(query.value[i]);
124123
let termIdx = InvertedIndex.getTermIndex(cps, root);
125-
this._scorer.score(fieldName, boost, termIdx, doScoring, docResults, cps);
124+
this._scorer.score(fieldName, boost, termIdx, doScoring, queryResults, cps);
126125
}
127126
break;
128127
}
129128
case "fuzzy": {
130129
const f = fuzzySearch(query, root);
131130
for (let i = 0; i < f.length; i++) {
132-
this._scorer.score(fieldName, boost * f[i].boost, f[i].index, doScoring, docResults, f[i].term);
131+
this._scorer.score(fieldName, boost * f[i].boost, f[i].index, doScoring, queryResults, f[i].term);
133132
}
134133
break;
135134
}
136135
case "wildcard": {
137136
const enableScoring = query.enable_scoring !== undefined ? query.enable_scoring : false;
138137
const w = wildcardSearch(query, root);
139138
for (let i = 0; i < w.length; i++) {
140-
this._scorer.score(fieldName, boost, w[i].index, doScoring && enableScoring, docResults, w[i].term);
139+
this._scorer.score(fieldName, boost, w[i].index, doScoring && enableScoring, queryResults, w[i].term);
141140
}
142141
break;
143142
}
144143
case "match_all": {
145144
for (let docId of this._docs) {
146-
this._scorer.scoreConstant(boost, docId, docResults);
145+
this._scorer.scoreConstant(boost, docId, queryResults);
147146
}
148147
break;
149148
}
150149
case "constant_score": {
151-
let tmpDocResults = this._getAll(query.filter.values, false);
150+
let tmpQueryResults = this._getAll(query.filter.values, false);
152151
// Add to each document a constant score.
153-
for (const docId of tmpDocResults.keys()) {
154-
this._scorer.scoreConstant(boost, docId, docResults);
152+
for (const docId of tmpQueryResults.keys()) {
153+
this._scorer.scoreConstant(boost, docId, queryResults);
155154
}
156155
break;
157156
}
@@ -162,15 +161,15 @@ export class IndexSearcher {
162161
if (termIdx !== null) {
163162
const termIdxs = InvertedIndex.extendTermIndex(termIdx);
164163
for (let i = 0; i < termIdxs.length; i++) {
165-
this._scorer.score(fieldName, boost, termIdxs[i].index, doScoring && enableScoring, docResults, [...cps, ...termIdxs[i].term]);
164+
this._scorer.score(fieldName, boost, termIdxs[i].index, doScoring && enableScoring, queryResults, [...cps, ...termIdxs[i].term]);
166165
}
167166
}
168167
break;
169168
}
170169
case "exists": {
171170
if (root !== null) {
172171
for (const docId of this._invIdxs[fieldName].documentStore.keys()) {
173-
this._scorer.scoreConstant(boost, docId, docResults);
172+
this._scorer.scoreConstant(boost, docId, queryResults);
174173
}
175174
}
176175
break;
@@ -210,56 +209,56 @@ export class IndexSearcher {
210209
} else {
211210
tmpQuery = tmpQuery.endMust();
212211
}
213-
docResults = this._recursive(tmpQuery.build().query, doScoring);
212+
queryResults = this._recursive(tmpQuery.build().query, doScoring);
214213

215214
break;
216215
}
217216
default:
218217
break;
219218
}
220-
return docResults;
219+
return queryResults;
221220
}
222221

223-
private _getUnique(queries: any[], doScoring: boolean, docResults: DocResults) {
222+
private _getUnique(queries: any[], doScoring: boolean, queryResults: QueryResults) {
224223
if (queries.length === 0) {
225-
return docResults;
224+
return queryResults;
226225
}
227226

228227
for (let i = 0; i < queries.length; i++) {
229228
let currDocs = this._recursive(queries[i], doScoring);
230-
if (docResults === null) {
231-
docResults = this._recursive(queries[0], doScoring);
229+
if (queryResults === null) {
230+
queryResults = this._recursive(queries[0], doScoring);
232231
continue;
233232
}
234233

235-
for (const docId of docResults.keys()) {
234+
for (const docId of queryResults.keys()) {
236235
if (!currDocs.has(docId)) {
237-
docResults.delete(docId);
236+
queryResults.delete(docId);
238237
} else {
239-
docResults.get(docId).push(...currDocs.get(docId));
238+
queryResults.get(docId).push(...currDocs.get(docId));
240239
}
241240
}
242241
}
243-
return docResults;
242+
return queryResults;
244243
}
245244

246245
private _getAll(queries: any[], doScoring: boolean) {
247-
let docResults: DocResults = new Map();
246+
let queryResults: QueryResults = new Map();
248247
for (let i = 0; i < queries.length; i++) {
249248
let currDocs = this._recursive(queries[i], doScoring);
250249
for (const docId of currDocs.keys()) {
251-
if (!docResults.has(docId)) {
252-
docResults.set(docId, currDocs.get(docId));
250+
if (!queryResults.has(docId)) {
251+
queryResults.set(docId, currDocs.get(docId));
253252
} else {
254-
docResults.get(docId).push(...currDocs.get(docId));
253+
queryResults.get(docId).push(...currDocs.get(docId));
255254
}
256255
}
257256
}
258-
return docResults;
257+
return queryResults;
259258
}
260259
}
261260

262-
type FuzzyResult = {index: Index, term: number[], boost: number};
261+
type FuzzyResult = { index: Index, term: number[], boost: number };
263262

264263
/**
265264
* Performs a fuzzy search.
@@ -378,7 +377,7 @@ function fuzzySearch(query: FuzzyQuery, root: Index): FuzzyResult[] {
378377
return result;
379378
}
380379

381-
type WildcardResult = {index: Index, term: number[]};
380+
type WildcardResult = { index: Index, term: number[] };
382381

383382
/**
384383
* Performs a wildcard search.

packages/full-text-search/src/query_builder.ts

+15-4
Original file line numberDiff line numberDiff line change
@@ -752,24 +752,34 @@ export class QueryBuilder {
752752
/**
753753
* The query performs a final scoring over all scored sub queries.
754754
* @param {boolean} enable - flag to enable or disable final scoring
755-
* @return {QueryBuilder}
755+
* @return {this}
756756
*/
757757
enableFinalScoring(enable: boolean) {
758758
this._data.final_scoring = enable;
759759
return this;
760760
}
761761

762+
/**
763+
* Adds an explanation of the scoring of each document for all matched terms.
764+
* @param {boolean} enable -flag to enable or disable explanation
765+
* @returns {this}
766+
*/
767+
explain(enable: boolean) {
768+
this._data.explain = enable;
769+
return this;
770+
}
771+
762772
/**
763773
* Configures the [Okapi BM25]{@link https://en.wikipedia.org/wiki/Okapi_BM25} as scoring model.
764774
*
765775
* See also [Lucene#MatchAllDocsQuery]{@link https://lucene.apache.org/core/6_4_0/core/org/apache/lucene/search/similarities/BM25Similarity.html}
766776
* and [Elasticsearch#BM25]{@link https://www.elastic.co/guide/en/elasticsearch/guide/current/pluggable-similarites.html#bm25}.
767777
*
768778
* @param {number} [k1=1.2] - controls how quickly an increase in term frequency results in term-frequency saturation.
769-
* Lower values result in quicker saturation, and higher values in slower saturation.
779+
* Lower values result in quicker saturation, and higher values in slower saturation
770780
* @param {number} [b=0.75] - controls how much effect field-length normalization should have.
771-
* A value of 0.0 disables normalization completely, and a value of 1.0 normalizes fully.
772-
* @return {QueryBuilder}
781+
* A value of 0.0 disables normalization completely, and a value of 1.0 normalizes fully
782+
* @return {this}
773783
*/
774784
BM25Similarity(k1: number = 1.2, b: number = 0.75) {
775785
if (k1 < 0) {
@@ -841,6 +851,7 @@ export type QueryTypes = BoolQuery | ConstantScoreQuery | TermQuery | TermsQuery
841851
export interface Query {
842852
query: QueryTypes;
843853
final_scoring?: boolean;
854+
explain?: boolean;
844855
bm25?: {
845856
k1: number;
846857
b: number;

0 commit comments

Comments
 (0)