Skip to content

Commit

Permalink
feat(full-text-search): add an optional score explanation (#65)
Browse files Browse the repository at this point in the history
  • Loading branch information
Viatorus authored Jan 2, 2018
1 parent 9b926e2 commit 9fde195
Show file tree
Hide file tree
Showing 8 changed files with 159 additions and 102 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -136,6 +136,13 @@ describe("full-text search", () => {
expect(dv.applySortByScoring(true).data()).toEqual(sorted_asc);
});

it("explain", () => {
let query = new QueryBuilder().explain(true).fuzzy("name", "quak").fuzziness(2).build();
let res = coll.chain().find({"$fts": query});
expect(res.data().length).toBe(4);
expect(res.getScoring()[0].explanation).toBeArrayOfObjects();
});

it("from/to json", () => {
const fts = coll["_fullTextSearch"];
const fts2 = FullTextSearch.fromJSONObject(JSON.parse(JSON.stringify(fts)));
Expand Down
2 changes: 1 addition & 1 deletion packages/full-text-search/spec/node/elasticsearch.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -199,7 +199,7 @@ describe("Compare scoring against elasticsearch", () => {
}

let esScore = Math.round(esHits[j]._score * COMPARE_PRECISION) / COMPARE_PRECISION;
let ftsScore = Math.round(ftsHits[esID] * COMPARE_PRECISION) / COMPARE_PRECISION;
let ftsScore = Math.round(ftsHits[esID].score * COMPARE_PRECISION) / COMPARE_PRECISION;

expect(esScore).toEqual(ftsScore);
}
Expand Down
4 changes: 2 additions & 2 deletions packages/full-text-search/src/full_text_search.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ import {Tokenizer} from "./tokenizer";
import {Dict} from "../../common/types";
import {PLUGINS} from "../../common/plugin";
import {Query} from "./query_builder";
import {ScoreResult} from "./scorer";
import {Scorer} from "./scorer";

export class FullTextSearch {
private _id: string;
Expand Down Expand Up @@ -72,7 +72,7 @@ export class FullTextSearch {
}
}

public search(query: Query): ScoreResult {
public search(query: Query): Scorer.ScoreResult {
return this._idxSearcher.search(query);
}

Expand Down
95 changes: 47 additions & 48 deletions packages/full-text-search/src/index_searcher.ts
Original file line number Diff line number Diff line change
@@ -1,14 +1,13 @@
import {Scorer, ScoreResult} from "./scorer";
import {Scorer} from "./scorer";
import {InvertedIndex, toCodePoints} from "./inverted_index";
import {FuzzyQuery, Query, QueryBuilder, WildcardQuery} from "./query_builder";
import {Dict} from "../../common/types";
import {RunAutomaton} from "./fuzzy/run_automaton";
import {LevenshteinAutomata} from "./fuzzy/levenshtein_automata";
import DocResults = Scorer.DocResults;
import QueryResults = Scorer.QueryResults;
import Index = InvertedIndex.Index;



/**
* @hidden
*/
Expand All @@ -26,17 +25,17 @@ export class IndexSearcher {
this._scorer = new Scorer(this._invIdxs);
}

public search(query: Query): ScoreResult {
let docResults = this._recursive(query.query, true);
public search(query: Query): Scorer.ScoreResult {
let queryResults = this._recursive(query.query, true);

// Do final scoring.
if (query.final_scoring !== undefined ? query.final_scoring : true) {
return this._scorer.finalScore(query, docResults);
return this._scorer.finalScore(query, queryResults);
}

const result: ScoreResult = {};
for (const key of docResults.keys()) {
result[key] = 1;
const result: Scorer.ScoreResult = {};
for (const key of queryResults.keys()) {
result[key] = {score: 1};
}
return result;
}
Expand All @@ -46,7 +45,7 @@ export class IndexSearcher {
}

private _recursive(query: any, doScoring: boolean) {
let docResults: DocResults = new Map();
let queryResults: QueryResults = new Map();
const boost = query.boost !== undefined ? query.boost : 1;
const fieldName = query.field !== undefined ? query.field : null;

Expand All @@ -59,19 +58,19 @@ export class IndexSearcher {

switch (query.type) {
case "bool": {
docResults = null;
queryResults = null;
if (query.must !== undefined) {
docResults = this._getUnique(query.must.values, doScoring, docResults);
queryResults = this._getUnique(query.must.values, doScoring, queryResults);
}
if (query.filter !== undefined) {
docResults = this._getUnique(query.filter.values, false, docResults);
queryResults = this._getUnique(query.filter.values, false, queryResults);
}
if (query.should !== undefined) {
let shouldDocs = this._getAll(query.should.values, doScoring);

let empty = false;
if (docResults === null) {
docResults = new Map();
if (queryResults === null) {
queryResults = new Map();
empty = true;
}

Expand All @@ -91,12 +90,12 @@ export class IndexSearcher {
// Remove all docs with fewer matches.
for (const [docId, res] of shouldDocs) {
if (res.length >= msm) {
if (docResults.has(docId)) {
docResults.get(docId).push(...res);
if (queryResults.has(docId)) {
queryResults.get(docId).push(...res);
} else if (empty) {
docResults.set(docId, res);
queryResults.set(docId, res);
} else {
docResults.delete(docId);
queryResults.delete(docId);
}
}
}
Expand All @@ -105,8 +104,8 @@ export class IndexSearcher {
let notDocs = this._getAll(query.not.values, false);
// Remove all docs.
for (const docId of notDocs.keys()) {
if (docResults.has(docId)) {
docResults.delete(docId);
if (queryResults.has(docId)) {
queryResults.delete(docId);
}
}
}
Expand All @@ -115,43 +114,43 @@ export class IndexSearcher {
case "term": {
const cps = toCodePoints(query.value);
let termIdx = InvertedIndex.getTermIndex(cps, root);
this._scorer.score(fieldName, boost, termIdx, doScoring, docResults, cps);
this._scorer.score(fieldName, boost, termIdx, doScoring, queryResults, cps);
break;
}
case "terms": {
for (let i = 0; i < query.value.length; i++) {
const cps = toCodePoints(query.value[i]);
let termIdx = InvertedIndex.getTermIndex(cps, root);
this._scorer.score(fieldName, boost, termIdx, doScoring, docResults, cps);
this._scorer.score(fieldName, boost, termIdx, doScoring, queryResults, cps);
}
break;
}
case "fuzzy": {
const f = fuzzySearch(query, root);
for (let i = 0; i < f.length; i++) {
this._scorer.score(fieldName, boost * f[i].boost, f[i].index, doScoring, docResults, f[i].term);
this._scorer.score(fieldName, boost * f[i].boost, f[i].index, doScoring, queryResults, f[i].term);
}
break;
}
case "wildcard": {
const enableScoring = query.enable_scoring !== undefined ? query.enable_scoring : false;
const w = wildcardSearch(query, root);
for (let i = 0; i < w.length; i++) {
this._scorer.score(fieldName, boost, w[i].index, doScoring && enableScoring, docResults, w[i].term);
this._scorer.score(fieldName, boost, w[i].index, doScoring && enableScoring, queryResults, w[i].term);
}
break;
}
case "match_all": {
for (let docId of this._docs) {
this._scorer.scoreConstant(boost, docId, docResults);
this._scorer.scoreConstant(boost, docId, queryResults);
}
break;
}
case "constant_score": {
let tmpDocResults = this._getAll(query.filter.values, false);
let tmpQueryResults = this._getAll(query.filter.values, false);
// Add to each document a constant score.
for (const docId of tmpDocResults.keys()) {
this._scorer.scoreConstant(boost, docId, docResults);
for (const docId of tmpQueryResults.keys()) {
this._scorer.scoreConstant(boost, docId, queryResults);
}
break;
}
Expand All @@ -162,15 +161,15 @@ export class IndexSearcher {
if (termIdx !== null) {
const termIdxs = InvertedIndex.extendTermIndex(termIdx);
for (let i = 0; i < termIdxs.length; i++) {
this._scorer.score(fieldName, boost, termIdxs[i].index, doScoring && enableScoring, docResults, [...cps, ...termIdxs[i].term]);
this._scorer.score(fieldName, boost, termIdxs[i].index, doScoring && enableScoring, queryResults, [...cps, ...termIdxs[i].term]);
}
}
break;
}
case "exists": {
if (root !== null) {
for (const docId of this._invIdxs[fieldName].documentStore.keys()) {
this._scorer.scoreConstant(boost, docId, docResults);
this._scorer.scoreConstant(boost, docId, queryResults);
}
}
break;
Expand Down Expand Up @@ -210,56 +209,56 @@ export class IndexSearcher {
} else {
tmpQuery = tmpQuery.endMust();
}
docResults = this._recursive(tmpQuery.build().query, doScoring);
queryResults = this._recursive(tmpQuery.build().query, doScoring);

break;
}
default:
break;
}
return docResults;
return queryResults;
}

private _getUnique(queries: any[], doScoring: boolean, docResults: DocResults) {
private _getUnique(queries: any[], doScoring: boolean, queryResults: QueryResults) {
if (queries.length === 0) {
return docResults;
return queryResults;
}

for (let i = 0; i < queries.length; i++) {
let currDocs = this._recursive(queries[i], doScoring);
if (docResults === null) {
docResults = this._recursive(queries[0], doScoring);
if (queryResults === null) {
queryResults = this._recursive(queries[0], doScoring);
continue;
}

for (const docId of docResults.keys()) {
for (const docId of queryResults.keys()) {
if (!currDocs.has(docId)) {
docResults.delete(docId);
queryResults.delete(docId);
} else {
docResults.get(docId).push(...currDocs.get(docId));
queryResults.get(docId).push(...currDocs.get(docId));
}
}
}
return docResults;
return queryResults;
}

private _getAll(queries: any[], doScoring: boolean) {
let docResults: DocResults = new Map();
let queryResults: QueryResults = new Map();
for (let i = 0; i < queries.length; i++) {
let currDocs = this._recursive(queries[i], doScoring);
for (const docId of currDocs.keys()) {
if (!docResults.has(docId)) {
docResults.set(docId, currDocs.get(docId));
if (!queryResults.has(docId)) {
queryResults.set(docId, currDocs.get(docId));
} else {
docResults.get(docId).push(...currDocs.get(docId));
queryResults.get(docId).push(...currDocs.get(docId));
}
}
}
return docResults;
return queryResults;
}
}

type FuzzyResult = {index: Index, term: number[], boost: number};
type FuzzyResult = { index: Index, term: number[], boost: number };

/**
* Performs a fuzzy search.
Expand Down Expand Up @@ -378,7 +377,7 @@ function fuzzySearch(query: FuzzyQuery, root: Index): FuzzyResult[] {
return result;
}

type WildcardResult = {index: Index, term: number[]};
type WildcardResult = { index: Index, term: number[] };

/**
* Performs a wildcard search.
Expand Down
19 changes: 15 additions & 4 deletions packages/full-text-search/src/query_builder.ts
Original file line number Diff line number Diff line change
Expand Up @@ -752,24 +752,34 @@ export class QueryBuilder {
/**
* The query performs a final scoring over all scored sub queries.
* @param {boolean} enable - flag to enable or disable final scoring
* @return {QueryBuilder}
* @return {this}
*/
enableFinalScoring(enable: boolean) {
this._data.final_scoring = enable;
return this;
}

/**
* Adds an explanation of the scoring of each document for all matched terms.
* @param {boolean} enable -flag to enable or disable explanation
* @returns {this}
*/
explain(enable: boolean) {
this._data.explain = enable;
return this;
}

/**
* Configures the [Okapi BM25]{@link https://en.wikipedia.org/wiki/Okapi_BM25} as scoring model.
*
* See also [Lucene#MatchAllDocsQuery]{@link https://lucene.apache.org/core/6_4_0/core/org/apache/lucene/search/similarities/BM25Similarity.html}
* and [Elasticsearch#BM25]{@link https://www.elastic.co/guide/en/elasticsearch/guide/current/pluggable-similarites.html#bm25}.
*
* @param {number} [k1=1.2] - controls how quickly an increase in term frequency results in term-frequency saturation.
* Lower values result in quicker saturation, and higher values in slower saturation.
* Lower values result in quicker saturation, and higher values in slower saturation
* @param {number} [b=0.75] - controls how much effect field-length normalization should have.
* A value of 0.0 disables normalization completely, and a value of 1.0 normalizes fully.
* @return {QueryBuilder}
* A value of 0.0 disables normalization completely, and a value of 1.0 normalizes fully
* @return {this}
*/
BM25Similarity(k1: number = 1.2, b: number = 0.75) {
if (k1 < 0) {
Expand Down Expand Up @@ -841,6 +851,7 @@ export type QueryTypes = BoolQuery | ConstantScoreQuery | TermQuery | TermsQuery
export interface Query {
query: QueryTypes;
final_scoring?: boolean;
explain?: boolean;
bm25?: {
k1: number;
b: number;
Expand Down
Loading

0 comments on commit 9fde195

Please sign in to comment.