Skip to content

Commit

Permalink
fix(full-text-search): fix fuzzy's prefix length, edit distance and i…
Browse files Browse the repository at this point in the history
…df (#68)
  • Loading branch information
Viatorus authored Jan 14, 2018
1 parent ff74219 commit da06836
Show file tree
Hide file tree
Showing 6 changed files with 234 additions and 48 deletions.
8 changes: 4 additions & 4 deletions packages/full-text-search/spec/generic/search/fuzzy.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -103,13 +103,13 @@ describe("fuzzy query", () => {
query = new QB().fuzzy("body", "ddddx").prefixLength(5).fuzziness(2).build();
assertMatches(fts, query);

// Without prefix length (default should be 2).
// Without prefix length (default should be 0).
query = new QB().fuzzy("body", "aaaab").fuzziness(2).build();
assertMatches(fts, query, [0, 1, 2, 3]);
query = new QB().fuzzy("body", "aaabb").fuzziness(2).build();
assertMatches(fts, query, [0, 1, 2, 3]);
query = new QB().fuzzy("body", "aabbb").fuzziness(2).build();
assertMatches(fts, query, [1, 2, 3]);
assertMatches(fts, query, [0, 1, 2, 3, 4]);
query = new QB().fuzzy("body", "abbbb").fuzziness(2).build();
assertMatches(fts, query, [2, 3, 4, 5]);

// Empty.
query = new QB().fuzzy("body", "").build();
Expand Down
34 changes: 17 additions & 17 deletions packages/full-text-search/spec/node/MOCK_DATA.ts
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
export const DATA = [
{
"id": 1,
"msg": "varius nulla facilisi cras non velit nec nisi vulputate nonummy maecenas tincidunt lacus at velit vivamus vel nulla eget eros elementum pellentesque quisque porta volutpat erat quisque erat eros viverra eget congue eget semper rutrum nulla nunc purus"
"msg": "varius nulla facilisi cras non velit nec nisi vulputate nonummy maecenas tincidunt lacus at velit vivamus vel nulla eget eros elementum pellentesque quisque porta volutpat erat quisque erat eros viverra eget congue eget semper rutrum nulla nunc purus "
},
{
"id": 2,
Expand Down Expand Up @@ -658,67 +658,67 @@ export const DATA = [
},
{
"id": 184,
"msg": "ultrices phasellus id sapien in sapien iaculis congue vivamus metus arcu adipiscing molestie hendrerit at vulputate vitae nisl aenean lectus pellentesque eget nunc donec"
"msg": "ultrices justns phasellus id sapien in sapien iaculis congue vivamus metus arcu adipiscing molestie hendrerit at vulputate vitae nisl aenean lectus pellentesque eget nunc donec"
},
{
"id": 185,
"msg": "pede malesuada in imperdiet et commodo vulputate justo in blandit ultrices enim lorem ipsum dolor sit amet consectetuer adipiscing elit proin interdum mauris non ligula pellentesque ultrices phasellus id"
"msg": "pede jusnt malesuada in imperdiet et commodo vulputate justo in blandit ultrices enim lorem ipsum dolor sit amet consectetuer adipiscing elit proin interdum mauris non ligula pellentesque ultrices phasellus id"
},
{
"id": 186,
"msg": "lorem ipsum dolor sit amet consectetuer adipiscing elit proin interdum mauris non ligula pellentesque ultrices phasellus id sapien in sapien iaculis congue vivamus metus arcu adipiscing molestie hendrerit at vulputate vitae"
"msg": "lorem justn ipsum dolor sit amet consectetuer adipiscing elit proin interdum mauris non ligula pellentesque ultrices phasellus id sapien in sapien iaculis congue vivamus metus arcu adipiscing molestie hendrerit at vulputate vitae"
},
{
"id": 187,
"msg": "congue risus semper porta volutpat quam pede lobortis ligula sit amet eleifend pede libero quis orci nullam molestie nibh in lectus pellentesque at nulla suspendisse potenti cras in purus eu magna vulputate luctus cum sociis natoque penatibus et magnis dis"
"msg": "congue ous risus semper porta volutpat quam pede lobortis ligula sit amet eleifend pede libero quis orci nullam molestie nibh in lectus pellentesque at nulla suspendisse potenti cras in purus eu magna vulputate luctus cum sociis natoque penatibus et magnis dis"
},
{
"id": 188,
"msg": "in felis eu sapien cursus vestibulum proin eu mi nulla ac enim in tempor turpis nec euismod scelerisque quam turpis adipiscing lorem vitae mattis nibh ligula nec sem duis aliquam convallis nunc proin at turpis a pede posuere nonummy integer non velit donec diam neque vestibulum eget vulputate"
"msg": "in felis jsu eu sapien cursus vestibulum proin eu mi nulla ac enim in tempor turpis nec euismod scelerisque quam turpis adipiscing lorem vitae mattis nibh ligula nec sem duis aliquam convallis nunc proin at turpis a pede posuere nonummy integer non velit donec diam neque vestibulum eget vulputate"
},
{
"id": 189,
"msg": "est quam pharetra magna ac consequat metus sapien ut nunc vestibulum ante ipsum primis in faucibus orci luctus et ultrices posuere cubilia curae mauris viverra diam vitae"
"msg": "est justo quam pharetra magna ac consequat metus sapien ut nunc vestibulum ante ipsum primis in faucibus orci luctus et ultrices posuere cubilia curae mauris viverra diam vitae"
},
{
"id": 190,
"msg": "a suscipit nulla elit ac nulla sed vel enim sit amet nunc viverra dapibus nulla suscipit ligula in lacus curabitur at ipsum ac"
"msg": "a suscipit jus nulla elit ac nulla sed vel enim sit amet nunc viverra dapibus nulla suscipit ligula in lacus curabitur at ipsum ac"
},
{
"id": 191,
"msg": "et magnis dis parturient montes nascetur ridiculus mus etiam vel augue vestibulum rutrum rutrum neque aenean auctor gravida sem praesent id massa id nisl venenatis lacinia aenean sit amet justo morbi ut odio cras mi pede malesuada in imperdiet et commodo"
"msg": "et magnis xus dis parturient montes nascetur ridiculus mus etiam vel augue vestibulum rutrum rutrum neque aenean auctor gravida sem praesent id massa id nisl venenatis lacinia aenean sit amet justo morbi ut odio cras mi pede malesuada in imperdiet et commodo"
},
{
"id": 192,
"msg": "lacus morbi sem mauris laoreet ut rhoncus aliquet pulvinar sed nisl nunc rhoncus dui vel sem sed sagittis nam congue risus semper porta volutpat quam pede lobortis ligula sit amet eleifend pede libero quis orci nullam molestie nibh in lectus pellentesque at nulla suspendisse"
"msg": "lacus eu morbi sem mauris laoreet ut rhoncus aliquet pulvinar sed nisl nunc rhoncus dui vel sem sed sagittis nam congue risus semper porta volutpat quam pede lobortis ligula sit amet eleifend pede libero quis orci nullam molestie nibh in lectus pellentesque at nulla suspendisse"
},
{
"id": 193,
"msg": "massa donec dapibus duis at velit eu est congue elementum in hac habitasse platea dictumst morbi vestibulum velit id pretium iaculis diam erat fermentum justo nec condimentum neque sapien placerat ante nulla justo aliquam quis turpis eget elit sodales scelerisque"
"msg": "massa usx donec dapibus duis at velit eu est congue elementum in hac habitasse platea dictumst morbi vestibulum velit id pretium iaculis diam erat fermentum justo nec condimentum neque sapien placerat ante nulla justo aliquam quis turpis eget elit sodales scelerisque"
},
{
"id": 194,
"msg": "convallis nunc proin at turpis a pede posuere nonummy integer non velit donec diam neque vestibulum eget vulputate ut ultrices vel augue vestibulum ante ipsum primis in faucibus orci luctus et ultrices posuere"
"msg": "convallis js js jus nunc proin at turpis a pede posuere nonummy integer non velit donec diam neque vestibulum eget vulputate ut ultrices vel augue vestibulum ante ipsum primis in faucibus orci luctus et ultrices posuere"
},
{
"id": 195,
"msg": "augue luctus tincidunt nulla mollis molestie lorem quisque ut erat curabitur gravida nisi at nibh in hac habitasse platea"
"msg": "augue luctus us tincidunt nulla mollis molestie lorem quisque ut erat curabitur gravida nisi at nibh in hac habitasse platea"
},
{"id": 196, "msg": "proin leo odio porttitor id consequat in consequat ut nulla sed accumsan"},
{
"id": 197,
"msg": "ut rhoncus aliquet pulvinar sed nisl nunc rhoncus dui vel sem sed sagittis nam congue risus semper porta volutpat quam pede lobortis ligula sit amet eleifend pede libero quis orci nullam molestie nibh in lectus pellentesque at nulla suspendisse potenti cras"
"msg": "ut rhoncus just aliquet pulvinar sed nisl nunc rhoncus dui vel sem sed sagittis nam congue risus semper porta volutpat quam pede lobortis ligula sit amet eleifend pede libero quis orci nullam molestie nibh in lectus pellentesque at nulla suspendisse potenti cras"
},
{
"id": 198,
"msg": "donec semper sapien a libero nam dui proin leo odio porttitor id consequat in consequat ut nulla sed accumsan felis ut at dolor quis odio consequat"
"msg": "donec semper jur sapien a libero nam dui proin leo odio porttitor id consequat in consequat ut nulla sed accumsan felis ut at dolor quis odio consequat"
},
{
"id": 199,
"msg": "pede justo eu massa donec dapibus duis at velit eu Est congue elementum in hac habitasse platea dictumst morbi vestibulum velit id pretium iaculis diam erat"
"msg": "pede jt massa donec dapibus duis at velit eu Est congue elementum in hac habitasse platea dictumst morbi vestibulum velit id pretium iaculis diam erat"
},
{
"id": 200,
"msg": "sagittis dui vel nisl duis ac nibh fusce lacus purus aliquet at feugiat non pretium quis lectus suspendisse potenti in eleifend quam"
"msg": "sagittis jut dui vel nisl duis ac nibh fusce lacus purus aliquet at feugiat non pretium quis lectus suspendisse potenti in eleifend quam"
}
];
140 changes: 135 additions & 5 deletions packages/full-text-search/spec/node/QUERIES.ts
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,10 @@ export const QUERIES = [
.build(),
es: {
fuzzy: {
[FIELD_NAME_1]: "a"
[FIELD_NAME_1]: {
value: "a",
transpositions: true
}
}
}
},
Expand All @@ -46,7 +49,10 @@ export const QUERIES = [
.build(),
es: {
fuzzy: {
[FIELD_NAME_1]: "este"
[FIELD_NAME_1]: {
value: "este",
transpositions: true
}
}
}
},
Expand All @@ -58,7 +64,8 @@ export const QUERIES = [
fuzzy: {
[FIELD_NAME_1]: {
value: "est",
prefix_length: 3
prefix_length: 3,
transpositions: true
}
}
}
Expand All @@ -72,7 +79,8 @@ export const QUERIES = [
[FIELD_NAME_1]: {
value: "ege",
prefix_length: 3,
fuzziness: 2
fuzziness: 2,
transpositions: true
}
}
},
Expand All @@ -86,7 +94,69 @@ export const QUERIES = [
fuzzy: {
[FIELD_NAME_1]: {
value: "est",
fuzziness: 0
fuzziness: 0,
transpositions: true
}
}
}
},
{
fts: new QB()
.fuzzy(FIELD_NAME_1, "just").fuzziness(2)
.build(),
es: {
fuzzy: {
[FIELD_NAME_1]: {
value: "just",
fuzziness: 2,
transpositions: true
}
}
}
},
{
fts: new QB()
.explain(true)
.fuzzy(FIELD_NAME_1, "jus").fuzziness(1)
.build(),
es: {
fuzzy: {
[FIELD_NAME_1]: {
value: "jus",
fuzziness: 1,
transpositions: true
}
}
}
},
{
fts: new QB()
.explain(true)
.fuzzy(FIELD_NAME_1, "jus").fuzziness(2).prefixLength(1)
.build(),
es: {
fuzzy: {
[FIELD_NAME_1]: {
value: "jus",
fuzziness: 2,
prefix_length: 1,
transpositions: true
}
}
}
},
{
fts: new QB()
.explain(true)
.fuzzy(FIELD_NAME_1, "js").fuzziness(2)
.build(),
es: {
fuzzy: {
[FIELD_NAME_1]: {
value: "js",
fuzziness: 2,
prefix_length: 0,
transpositions: true
}
}
}
Expand Down Expand Up @@ -206,6 +276,7 @@ export const QUERIES = [
},
{
fts: new QB()
.explain(true)
.bool()
.beginMust().term(FIELD_NAME_1, "a").term(FIELD_NAME_1, "ac").endMust()
.build(),
Expand All @@ -226,6 +297,65 @@ export const QUERIES = [
}
}
},
{
fts: new QB()
.explain(true)
.bool()
.beginMust().term(FIELD_NAME_1, "a").fuzzy(FIELD_NAME_1, "just").term(FIELD_NAME_1, "ac").endMust()
.build(),
es: {
bool: {
must: [
{
term: {
[FIELD_NAME_1]: "a"
}
},
{
fuzzy: {
[FIELD_NAME_1]: "just"
}
},
{
term: {
[FIELD_NAME_1]: "ac"
}
}
]
}
}
},
{
fts: new QB()
.explain(true)
.bool()
.beginMust().term(FIELD_NAME_1, "a").wildcard(FIELD_NAME_1, "j*").enableScoring(true).term(FIELD_NAME_1, "ac").endMust()
.build(),
es: {
bool: {
must: [
{
term: {
[FIELD_NAME_1]: "a"
}
},
{
wildcard: {
[FIELD_NAME_1]: {
value: "j*",
rewrite: "scoring_boolean"
}
}
},
{
term: {
[FIELD_NAME_1]: "ac"
}
}
]
}
}
},
{
fts: new QB()
.bool()
Expand Down
13 changes: 7 additions & 6 deletions packages/full-text-search/spec/node/elasticsearch.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,11 @@ import {FullTextSearch} from "../../src/full_text_search";
import {Tokenizer} from "../../src/tokenizer";
import {Client} from "elasticsearch";
import {Scorer} from "../../src/scorer";
import * as util from "util";

const INDEX_NAME = "test_index";
const INDEX_TYPE = "MockUp";
const FIELD_NAME_1 = "msg";
const COMPARE_PRECISION = 1e4;
const COMPARE_PRECISION = 1e3;

function fieldLengthES5(fieldLength: number) {
// Lucene 5 uses a SmallFloat (size of 1 byte) to store the field length in scoring.
Expand Down Expand Up @@ -178,7 +177,7 @@ describe("Compare scoring against elasticsearch", () => {
}

// Check if esHits should be empty.
if (query.hasOwnProperty("empty") && query.empty === true) {
if (query.empty === true) {
expect(esHits.length).toEqual(0);
done();
return;
Expand All @@ -198,14 +197,14 @@ describe("Compare scoring against elasticsearch", () => {
continue;
}

let esScore = Math.round(esHits[j]._score * COMPARE_PRECISION) / COMPARE_PRECISION;
let ftsScore = Math.round(ftsHits[esID].score * COMPARE_PRECISION) / COMPARE_PRECISION;
let esScore = Math.round(esHits[j]._score * COMPARE_PRECISION) / COMPARE_PRECISION;

expect(esScore).toEqual(ftsScore);
expect(ftsScore).toEqual(esScore);
}
done();
})
.catch(() => {
.catch((e) => {
expect(false).toBe(true);
done();
});
Expand Down Expand Up @@ -237,6 +236,8 @@ describe("Compare scoring against elasticsearch", () => {
}
},
settings: {
number_of_shards: 1,
number_of_replicas: 1,
analysis: {
analyzer: {
my_analyzer: {
Expand Down
Loading

0 comments on commit da06836

Please sign in to comment.