Skip to content

Commit d611722

Browse files
authored
feat(loki): add diagnostic function to test binary index validity (#85)
Since validating indices might take a while if the collection contains many documents, random sampling allows sampling random points within a binary index to verify it is sequenced properly with its next neighbor. See techfort/LokiJS@1c2aa85
1 parent 4d1b25b commit d611722

File tree

2 files changed

+198
-2
lines changed

2 files changed

+198
-2
lines changed

packages/loki/spec/generic/binaryidx.spec.ts

+60-1
Original file line numberDiff line numberDiff line change
@@ -76,7 +76,7 @@ describe("binary indices", () => {
7676
}
7777

7878
// Add a collection to the database
79-
const dirtydata = db.addCollection<AB, {"some.b": any}>("dirtydata", {
79+
const dirtydata = db.addCollection<AB, { "some.b": any }>("dirtydata", {
8080
nestedProperties: ["some.b"],
8181
indices: ["some.b"]
8282
});
@@ -556,4 +556,63 @@ describe("binary indices", () => {
556556

557557
expect(newDatabase.getCollection("users").adaptiveBinaryIndices).toBe(false);
558558
});
559+
560+
it("checkIndex works", () => {
561+
const db = new Loki("bitest.db");
562+
const coll = db.addCollection<{ a: number }>("bitest", {indices: ["a"]});
563+
coll.insert([{a: 9}, {a: 3}, {a: 7}, {a: 0}, {a: 1}]);
564+
565+
// verify our initial order is valid
566+
expect(coll.checkIndex("a")).toBe(true);
567+
568+
// now force index corruption by tampering with it
569+
coll.binaryIndices["a"].values.reverse();
570+
571+
// verify out index is now invalid
572+
expect(coll.checkIndex("a")).toBe(false);
573+
574+
// also verify our test of all indices reports false
575+
let result = coll.checkAllIndexes();
576+
expect(result.length).toBe(1);
577+
expect(result[0]).toBe("a");
578+
579+
// let's just make sure that random sampling doesn't throw error
580+
coll.checkIndex("a", {randomSampling: true, randomSamplingFactor: .5});
581+
582+
// now have checkindex repair the index
583+
// also expect it to report that it was invalid before fixing
584+
expect(coll.checkIndex("a", {repair: true})).toBe(false);
585+
586+
// now expect it to report that the index is valid
587+
expect(coll.checkIndex("a")).toBe(true);
588+
589+
// now leave index ordering valid but remove the last value (from index)
590+
coll.binaryIndices["a"].values.pop();
591+
592+
// expect checkIndex to report index to be invalid
593+
expect(coll.checkIndex("a")).toBe(false);
594+
595+
// now have checkindex repair the index
596+
// also expect it to report that it was invalid before fixing
597+
expect(coll.checkIndex("a", {repair: true})).toBe(false);
598+
599+
// now expect it to report that the index is valid
600+
expect(coll.checkIndex("a")).toBe(true);
601+
602+
// verify the check all indexes function returns empty array
603+
expect(coll.checkAllIndexes().length).toBe(0);
604+
605+
// Keep only one document.
606+
coll.clear();
607+
coll.insert({a: 1});
608+
expect(coll.checkIndex("a")).toBe(true);
609+
610+
// Make index invalid.
611+
coll.binaryIndices["a"].values = [1];
612+
expect(coll.checkIndex("a")).toBe(false);
613+
614+
// Repair.
615+
expect(coll.checkIndex("a", {repair: true})).toBe(false);
616+
expect(coll.checkIndex("a")).toBe(true);
617+
});
559618
});

packages/loki/src/collection.ts

+138-1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
import {LokiEventEmitter} from "./event_emitter";
22
import {UniqueIndex} from "./unique_index";
3-
import {ResultSet} from "./result_set";
3+
import {ResultSet, LokiOps} from "./result_set";
44
import {DynamicView} from "./dynamic_view";
55
import {ltHelper, gtHelper, aeqHelper} from "./helper";
66
import {Loki} from "./loki";
@@ -595,6 +595,137 @@ export class Collection<TData extends object = object, TNested extends object =
595595
this.dirty = true; // for autosave scenarios
596596
}
597597

598+
599+
/**
600+
* Perform checks to determine validity/consistency of a binary index.
601+
* @param {string} field - the field name of the binary-indexed to check
602+
* @param {object=} options - optional configuration object
603+
* @param {boolean} [options.randomSampling=false] - whether (faster) random sampling should be used
604+
* @param {number} [options.randomSamplingFactor=0.10] - percentage of total rows to randomly sample
605+
* @param {boolean} [options.repair=false] - whether to fix problems if they are encountered
606+
* @returns {boolean} whether the index was found to be valid (before optional correcting).
607+
* @example
608+
* // full test
609+
* var valid = coll.checkIndex('name');
610+
* // full test with repair (if issues found)
611+
* valid = coll.checkIndex('name', { repair: true });
612+
* // random sampling (default is 10% of total document count)
613+
* valid = coll.checkIndex('name', { randomSampling: true });
614+
* // random sampling (sample 20% of total document count)
615+
* valid = coll.checkIndex('name', { randomSampling: true, randomSamplingFactor: 0.20 });
616+
* // random sampling (implied boolean)
617+
* valid = coll.checkIndex('name', { randomSamplingFactor: 0.20 });
618+
* // random sampling with repair (if issues found)
619+
* valid = coll.checkIndex('name', { repair: true, randomSampling: true });
620+
*/
621+
public checkIndex(field: keyof (TData & TNested), options: Collection.CheckIndexOptions = {repair: false}) {
622+
// if lazy indexing, rebuild only if flagged as dirty
623+
if (!this.adaptiveBinaryIndices) {
624+
this.ensureIndex(field);
625+
}
626+
627+
// if 'randomSamplingFactor' specified but not 'randomSampling', assume true
628+
if (options.randomSamplingFactor && options.randomSampling !== false) {
629+
options.randomSampling = true;
630+
}
631+
options.randomSamplingFactor = options.randomSamplingFactor || 0.1;
632+
if (options.randomSamplingFactor < 0 || options.randomSamplingFactor > 1) {
633+
options.randomSamplingFactor = 0.1;
634+
}
635+
636+
const biv = this.binaryIndices[field].values;
637+
const len = biv.length;
638+
639+
// if the index has an incorrect number of values
640+
if (len !== this._data.length) {
641+
if (options.repair) {
642+
this.ensureIndex(field, true);
643+
}
644+
return false;
645+
}
646+
647+
if (len === 0) {
648+
return true;
649+
}
650+
651+
let valid = true;
652+
if (len === 1) {
653+
valid = (biv[0] === 0);
654+
} else {
655+
if (options.randomSampling) {
656+
// validate first and last
657+
if (!LokiOps.$lte(this._data[biv[0]][field], this._data[biv[1]][field])) {
658+
valid = false;
659+
}
660+
if (!LokiOps.$lte(this._data[biv[len - 2]][field], this._data[biv[len - 1]][field])) {
661+
valid = false;
662+
}
663+
664+
// if first and last positions are sorted correctly with their nearest neighbor,
665+
// continue onto random sampling phase...
666+
if (valid) {
667+
// # random samplings = total count * sampling factor
668+
const iter = Math.floor((len - 1) * options.randomSamplingFactor);
669+
670+
// for each random sampling, validate that the binary index is sequenced properly
671+
// with next higher value.
672+
for (let idx = 0; idx < iter; idx++) {
673+
// calculate random position
674+
const pos = Math.floor(Math.random() * (len - 1));
675+
if (!LokiOps.$lte(this._data[biv[pos]][field], this._data[biv[pos + 1]][field])) {
676+
valid = false;
677+
break;
678+
}
679+
}
680+
}
681+
}
682+
else {
683+
// validate that the binary index is sequenced properly
684+
for (let idx = 0; idx < len - 1; idx++) {
685+
if (!LokiOps.$lte(this._data[biv[idx]][field], this._data[biv[idx + 1]][field])) {
686+
valid = false;
687+
break;
688+
}
689+
}
690+
}
691+
}
692+
693+
// if incorrectly sequenced and we are to fix problems, rebuild index
694+
if (!valid && options.repair) {
695+
this.ensureIndex(field, true);
696+
}
697+
698+
return valid;
699+
}
700+
701+
/**
702+
* Perform checks to determine validity/consistency of all binary indices
703+
* @param {object=} options - optional configuration object
704+
* @param {boolean} [options.randomSampling=false] - whether (faster) random sampling should be used
705+
* @param {number} [options.randomSamplingFactor=0.10] - percentage of total rows to randomly sample
706+
* @param {boolean} [options.repair=false] - whether to fix problems if they are encountered
707+
* @returns {string[]} array of index names where problems were found
708+
* @example
709+
* // check all indices on a collection, returns array of invalid index names
710+
* var result = coll.checkAllIndexes({ repair: true, randomSampling: true, randomSamplingFactor: 0.15 });
711+
* if (result.length > 0) {
712+
* results.forEach(function(name) {
713+
* console.log('problem encountered with index : ' + name);
714+
* });
715+
* }
716+
*/
717+
public checkAllIndexes(options?: Collection.CheckIndexOptions): (keyof TData & TNested)[] {
718+
const results = [];
719+
let keys = Object.keys(this.binaryIndices) as (keyof TData & TNested)[];
720+
for (let i = 0; i < keys.length; i++) {
721+
const result = this.checkIndex(keys[i], options);
722+
if (!result) {
723+
results.push(keys[i]);
724+
}
725+
}
726+
return results;
727+
}
728+
598729
public ensureUniqueIndex(field: keyof (TData & TNested)) {
599730
let index = new UniqueIndex<TData & TNested>(field);
600731

@@ -2161,6 +2292,12 @@ export namespace Collection {
21612292
_fullTextSearch: FullTextSearch;
21622293
}
21632294

2295+
export interface CheckIndexOptions {
2296+
randomSampling?: boolean;
2297+
randomSamplingFactor?: number;
2298+
repair?: boolean;
2299+
}
2300+
21642301
export type Transform<TData extends object = object, TNested extends object = object> = {
21652302
type: "find";
21662303
value: ResultSet.Query<Doc<TData & TNested>> | string;

0 commit comments

Comments
 (0)