Skip to content

Commit

Permalink
feat(full-text-search): add full-text-search as separate package (#35)
Browse files Browse the repository at this point in the history
  • Loading branch information
Viatorus authored Oct 8, 2017
1 parent 67d61ac commit 14b9947
Show file tree
Hide file tree
Showing 28 changed files with 5,176 additions and 7 deletions.
3 changes: 2 additions & 1 deletion .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -19,15 +19,16 @@ addons:
- google-chrome
packages:
- google-chrome-stable
- oracle-java8-set-default
- python3
- python3-pip

before_install:
- curl -O https://artifacts.elastic.co/downloads/elasticsearch/elasticsearch-5.4.0.deb && sudo dpkg -i --force-confnew elasticsearch-5.4.0.deb && sudo service elasticsearch start
- pip3 install mkdocs
- export CHROME_BIN=chromium-browser
- export DISPLAY=:99.0
- sh -e /etc/init.d/xvfb start

script:
- npm run lint || travis_terminate 1
- npm test || travis_terminate 1
Expand Down
2 changes: 1 addition & 1 deletion CONTRIBUTING.md
Original file line number Diff line number Diff line change
Expand Up @@ -140,7 +140,7 @@ The following is the list of supported scopes:
* **local-storage**: The local storage adapter.
* **indexed-storage**: The indexed db storage adapter.
* **fs-storage**: The file system storage adapter.
* **fts**: A full text search for the database.
* **full-text-search**: The full text search engine.

There are currently a few exceptions to the "use package name" rule:

Expand Down
18 changes: 13 additions & 5 deletions config/build.js
Original file line number Diff line number Diff line change
Expand Up @@ -8,11 +8,12 @@ const stream = require("stream");
const conventionalChangelog = require("conventional-changelog");

const PACKAGES = [
"loki",
"partitioning-adapter",
"local-storage",
"indexed-storage",
"fs-storage"
// "loki",
// "partitioning-adapter",
// "local-storage",
// "indexed-storage",
// "fs-storage",
"full-text-search"
];

const ROOT_DIR = process.cwd();
Expand Down Expand Up @@ -194,6 +195,13 @@ function build() {
}
}
}
if (json.optionalDependencies) {
for (let pack of Object.keys(json.optionalDependencies)) {
if (pack.startsWith("@lokijs/")) {
json.optionalDependencies[pack] = VERSION;
}
}
}
fs.writeFileSync(`/${NPM_DIR}/package.json`, JSON.stringify(json, null, 2));
}
}
Expand Down
1 change: 1 addition & 0 deletions config/eslintrc.js
Original file line number Diff line number Diff line change
Expand Up @@ -23,5 +23,6 @@ module.exports = {
"always"
],
"arrow-parens": [2, "always"],
"no-constant-condition": 0
}
};
46 changes: 46 additions & 0 deletions package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@
"babel-register": "^6.26.0",
"conventional-changelog": "^1.1.5",
"coveralls": "^2.13.1",
"elasticsearch": "^13.3.1",
"eslint": "^4.6.1",
"eslint-loader": "^1.9.0",
"istanbul-instrumenter-loader": "^3.0.0",
Expand Down
14 changes: 14 additions & 0 deletions packages/full-text-search/package.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
{
"name": "@lokijs/full-text-search",
"description": "A full text search engine.",
"author": "Various authors",
"license": "MIT",
"repository": {
"type": "git",
"url": "https://github.com/LokiJS-Forge/LokiJS2.git"
},
"main": "lokijs.full-text-search.js",
"optionalDependencies": {
"@lokijs/loki": "0"
}
}
166 changes: 166 additions & 0 deletions packages/full-text-search/spec/generic/inverted_index.spec.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,166 @@
/* global describe, it, expect */
import {InvertedIndex} from "../../src/inverted_index";

describe("inverted index", () => {

let ii = new InvertedIndex();

let field1 = "Hello world, how are you today?!";
let field2 = "Well done world...";
let field3 = "I am good, and you?";
let field4 = "Now again inside today! You...";
let field5 = "Good bye NO! for all worlds...";

it ("get", () => {
expect(ii.documentCount).toBeNumber();
expect(ii.documentStore).toBeObject();
expect(ii.totalFieldLength).toBeNumber();
expect(ii.tokenizer).toBeObject();
expect(ii.root).toBeObject();
});

it("insert", () => {
ii.insert(field1, 1);
expect(() =>ii.insert(field2, 1)).toThrowErrorOfType("Error");
ii.insert(field3, 2);

ii.tokenizer.add("bad_tokenizer", () => [""]);
ii.insert(field4, 3);
ii.tokenizer.remove("bad_tokenizer");
ii.insert(field4, 4);
ii.insert(field5, 5);
});

it("remove", () => {
ii.remove(1);
ii.remove(4);
ii.remove(15);
});

it("getTermIndex", () => {
expect(InvertedIndex.getTermIndex("you", ii.root)).not.toBe(null);
expect(InvertedIndex.getTermIndex("ayou", ii.root, 1)).not.toBe(null);
expect(InvertedIndex.getTermIndex("you", ii.root, 10)).toBe(null);
expect(InvertedIndex.getTermIndex("xyz1234", ii.root)).toBe(null);
});

it("getNextTermIndex", () => {
InvertedIndex.getNextTermIndex(ii.root);
let idx = InvertedIndex.getTermIndex("you", ii.root);
expect(InvertedIndex.getNextTermIndex(idx)).not.toBe(null);
});

it("extendTermIndex", () => {
expect(InvertedIndex.extendTermIndex(ii.root)).toBeArray();
});

it("serialize", () => {
let ii1 = new InvertedIndex();
ii1.insert(field1, 1);
ii1.insert(field2, 2);
ii1.insert(field3, 3);

let ii2 = new InvertedIndex();
ii2.insert(field1, 1);
ii2.insert(field2, 2);
ii2.insert(field3, 3);
ii2.insert(field4, 4);

let ii3 = InvertedIndex.fromJSONObject(JSON.parse(JSON.stringify(ii2)));

expect(JSON.stringify(ii3)).toEqual(JSON.stringify(ii2));
ii2.remove(4);
ii3.remove(4);
expect(JSON.stringify(ii2)).toEqual(JSON.stringify(ii1));
expect(JSON.stringify(ii3)).toEqual(JSON.stringify(ii1));
expect(JSON.stringify(ii3)).toEqual(JSON.stringify(ii2));

ii2.remove(1);
ii3.remove(2);
expect(JSON.stringify(ii2)).not.toEqual(JSON.stringify(ii1));
expect(JSON.stringify(ii3)).not.toEqual(JSON.stringify(ii1));
expect(JSON.stringify(ii3)).not.toEqual(JSON.stringify(ii2));

ii1.remove(1);
ii1.remove(2);
ii2.remove(2);
ii3.remove(1);
expect(JSON.stringify(ii2)).toEqual(JSON.stringify(ii1));
expect(JSON.stringify(ii3)).toEqual(JSON.stringify(ii1));
expect(JSON.stringify(ii3)).toEqual(JSON.stringify(ii2));

ii2 = InvertedIndex.fromJSONObject(JSON.parse(JSON.stringify(ii1)));
expect(JSON.stringify(ii2)).toEqual(JSON.stringify(ii1));

ii1.insert(field5, 5);
expect(JSON.stringify(ii2)).not.toEqual(JSON.stringify(ii1));
expect(JSON.stringify(ii3)).not.toEqual(JSON.stringify(ii1));

ii1.remove(5);
expect(JSON.stringify(ii2)).toEqual(JSON.stringify(ii1));
expect(JSON.stringify(ii3)).toEqual(JSON.stringify(ii1));
expect(JSON.stringify(ii3)).toEqual(JSON.stringify(ii2));

// Check if still can be used
ii3.insert(field5, 6);
ii3.remove(6);
});

it("serialize without optimization", () => {
let ii1 = new InvertedIndex({optimizeChanges: false});
ii1.insert(field1, 1);
ii1.insert(field2, 2);
ii1.insert(field3, 3);

let ii2 = new InvertedIndex({optimizeChanges: false});
ii2.insert(field1, 1);
ii2.insert(field2, 2);
ii2.insert(field3, 3);
ii2.insert(field4, 4);

let ii3 = InvertedIndex.fromJSONObject(JSON.parse(JSON.stringify(ii2)));

expect(JSON.stringify(ii3)).toEqual(JSON.stringify(ii2));
ii2.remove(4);
ii3.remove(4);
expect(JSON.stringify(ii2)).toEqual(JSON.stringify(ii1));
expect(JSON.stringify(ii3)).toEqual(JSON.stringify(ii1));
expect(JSON.stringify(ii3)).toEqual(JSON.stringify(ii2));

ii2.remove(1);
ii3.remove(2);
expect(JSON.stringify(ii2)).not.toEqual(JSON.stringify(ii1));
expect(JSON.stringify(ii3)).not.toEqual(JSON.stringify(ii1));
expect(JSON.stringify(ii3)).not.toEqual(JSON.stringify(ii2));

// Compare with optimized inverted index.
let iio3 = new InvertedIndex();
iio3.insert(field1, 1);
iio3.insert(field3, 3);
expect(JSON.stringify(ii3.root)).toEqual(JSON.stringify(iio3.root));

ii1.remove(1);
ii1.remove(2);
ii2.remove(2);
ii3.remove(1);
expect(JSON.stringify(ii2)).toEqual(JSON.stringify(ii1));
expect(JSON.stringify(ii3)).toEqual(JSON.stringify(ii1));
expect(JSON.stringify(ii3)).toEqual(JSON.stringify(ii2));

ii2 = InvertedIndex.fromJSONObject(JSON.parse(JSON.stringify(ii1)));
expect(JSON.stringify(ii2)).toEqual(JSON.stringify(ii1));

ii1.insert(field5, 5);
expect(JSON.stringify(ii2)).not.toEqual(JSON.stringify(ii1));
expect(JSON.stringify(ii3)).not.toEqual(JSON.stringify(ii1));

ii1.remove(5);
expect(JSON.stringify(ii2)).toEqual(JSON.stringify(ii1));
expect(JSON.stringify(ii3)).toEqual(JSON.stringify(ii1));
expect(JSON.stringify(ii3)).toEqual(JSON.stringify(ii2));

// Check if still can be used
ii3.insert(field5, 6);
ii3.remove(6);
});
});
38 changes: 38 additions & 0 deletions packages/full-text-search/spec/generic/languages/de.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
import {DE} from "../../../src/language/de";

export const de = {
tokenizer: DE,
docs: [
"An Deutschland grenzen neun Nachbarländer und naturräumlich im Norden die Gewässer der Nord- und Ostsee, im Süden das Bergland der Alpen. Es liegt in der gemäßigten Klimazone, zählt mit rund 80 Millionen Einwohnern zu den dicht besiedelten Flächenstaaten und gilt international als das Land mit der dritthöchsten Zahl von Einwanderern. aufeinanderfolgenden. auffassen.",
"Deutschland als Urlaubsziel verfügt über günstige Voraussetzungen: Gebirgslandschaften (Alpen und Mittelgebirge), See- und Flusslandschaften, die Küsten und Inseln der Nord- und Ostsee, zahlreiche Kulturdenkmäler und eine Vielzahl geschichtsträchtiger Städte sowie gut ausgebaute Infrastruktur. Vorteilhaft ist die zentrale Lage in Europa."
],
tests: [{
what: "find the word",
search: "deutschland",
found: [0, 1]
}, {
what: "find the word",
search: "urlaubsziel",
found: [1]
}, {
what: "find the word",
search: "gewass",
found: [0]
}, {
what: "find the word",
search: "verfugt",
found: [1]
}, {
what: "never find a word that does not exist, like",
search: "inexistent",
found: []
}, {
what: "never find a stop word like",
search: "und",
found: []
}, {
what: "find a correctly stemmed word",
search: "auffassung",
found: [0]
}]
};
Loading

0 comments on commit 14b9947

Please sign in to comment.