Skip to content

Commit

Permalink
added random sampling to collection.checkIndex
Browse files Browse the repository at this point in the history
since validating indices might take a while if the collection contains many documents, random sampling allows sampling random points within a binary index to verify it is sequenced properly with its next neighbor.
- added 'randomSampling' and 'randomSamplingFactor' (0.0 - 1.0) options
- added collection.checkAllIndexes which returns string array of invalid binary indices
  • Loading branch information
obeliskos committed Feb 4, 2018
1 parent 82b5eb0 commit 1c2aa85
Show file tree
Hide file tree
Showing 4 changed files with 98 additions and 42 deletions.
44 changes: 8 additions & 36 deletions benchmark/bindex-stress.js
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ console.log("");
// perform random permutations
console.log("performing " + ITERATION_COUNT + " random permutations (inserts/updates/removes)...");

var results, rnd, len;
var result, rnd, len;

for(idx=0;idx<ITERATION_COUNT;idx++) {
// randomly determine if this permutation will be insert/update/remove
Expand Down Expand Up @@ -82,43 +82,15 @@ console.log("b index length : " + coll.binaryIndices['b'].values.length);
console.log("c index length : " + coll.binaryIndices['c'].values.length);
console.log("");

// verify index ordering
var aIsValid=true, bIsValid=true, cIsValid=true;

// perform full index verification
console.log("verifying 'a' index ordering...");
results = coll.chain().simplesort("a").data();
len = results.length;
for(idx=0; idx<len-1; idx++) {
if (!loki.LokiOps.$lte(results[idx]['a'], results[idx+1]['a'])) {
aIsValid=false;
console.err("@idx:"+idx+ ": " + results[idx]['a'] + " is not $lte " + results[idx+1]['a']);
}
}
result = coll.checkIndex('a');
console.log("'a' index validation " + result?"SUCCESSFUL":"-FAILED-");

console.log("verifying 'b' index ordering...");
results = coll.chain().simplesort("b").data();
len = results.length;
for(idx=0; idx<len-1; idx++) {
if (!loki.LokiOps.$lte(results[idx]['b'], results[idx+1]['b'])) {
bIsValid=false;
console.err("@idx:"+idx+ ": " + results[idx]['b'] + " is not $lte " + results[idx+1]['b']);
}
}
result = coll.checkIndex('b');
console.log("'c' index validation " + result?"SUCCESSFUL":"-FAILED-");

console.log("verifying 'c' index ordering...");
results = coll.chain().simplesort("c").data();
len = results.length;
for(idx=0; idx<len-1; idx++) {
if (!loki.LokiOps.$lte(results[idx]['c'], results[idx+1]['c'])) {
cIsValid=false;
console.err("@idx:"+idx+ ": " + results[idx]['c'] + " is not $lte " + results[idx+1]['c']);
}
}

console.log("");
if (!aIsValid || !bIsValid || !cIsValid) {
console.log("binary index order validation -FAILED-");
}
else {
console.log("binary index order validation SUCCESSFUL");
}
result = coll.checkIndex('c');
console.log("'c' index validation " + result?"SUCCESSFUL":"-FAILED-");
2 changes: 1 addition & 1 deletion build/lokijs.min.js

Large diffs are not rendered by default.

11 changes: 11 additions & 0 deletions spec/generic/binaryidx.spec.js
Original file line number Diff line number Diff line change
Expand Up @@ -552,6 +552,14 @@ describe('binary indices', function () {
// verify out index is now invalid
expect(coll.checkIndex('a')).toBe(false);

// also verify our test of all indices reports false
var result = coll.checkAllIndexes();
expect(result.length).toBe(1);
expect(result[0]).toBe('a');

// let's just make sure that random sampling doesn't throw error
coll.checkIndex('a', { randomSampling: true, randomSamplingFactor: .5 });

// now have checkindex repair the index
// also expect it to report that it was invalid before fixing
expect(coll.checkIndex('a', { repair: true })).toBe(false);
Expand All @@ -571,6 +579,9 @@ describe('binary indices', function () {

// now expect it to report that the index is valid
expect(coll.checkIndex('a')).toBe(true);

// verify the check all indexes function returns empty array
expect(coll.checkAllIndexes().length).toBe(0);
});
});
});
83 changes: 78 additions & 5 deletions src/lokijs.js
Original file line number Diff line number Diff line change
Expand Up @@ -5034,18 +5034,53 @@
this.dirty = true; // for autosave scenarios
};

/**
* Perform checks to determine validity/consistency of all binary indices
* @param {object=} options - optional configuration object
* @param {boolean} [options.randomSampling=false] - whether (faster) random sampling should be used
* @param {number} [options.randomSamplingFactor=0.10] - percentage of total rows to randomly sample
* @param {boolean} [options.repair=false] - whether to fix problems if they are encountered
* @returns {string[]} array of index names where problems were found.
* @memberof Collection
*/
Collection.prototype.checkAllIndexes = function (options) {
var key, bIndices = this.binaryIndices;
var results = [], result;

for (key in bIndices) {
if (hasOwnProperty.call(bIndices, key)) {
result = this.checkIndex(key, options);
if (!result) {
results.push(key);
}
}
}

return results;
};

/**
* Perform checks to determine validity/consistency of a binary index
* @param {string} property - name of the binary-indexed property to check
* @param {object=} options - optional configuration object
* @param {boolean} [options.randomSampling=false] - whether (faster) random sampling should be used
* @param {number} [options.randomSamplingFactor=0.10] - percentage of total rows to randomly sample
* @param {boolean} [options.repair=false] - whether to fix problems if they are encountered
* @returns {boolean} whether the index was found to be incorrect (before optional correcting).
* @returns {boolean} whether the index was found to be valid (before optional correcting).
* @memberof Collection
*/
Collection.prototype.checkIndex = function (property, options) {
options = options || {};
// if 'randomSamplingFactor' specified but not 'randomSampling', assume true
if (options.randomSamplingFactor && options.randomSampling !== false) {
options.randomSampling = true;
}
options.randomSamplingFactor = options.randomSamplingFactor || 0.1;
if (options.randomSamplingFactor < 0 || options.randomSamplingFactor > 1) {
options.randomSamplingFactor = 0.1;
}

var valid=true, idx, len, biv;
var valid=true, idx, iter, pos, len, biv;

// make sure we are passed a valid binary index name
if (!this.binaryIndices.hasOwnProperty(property)) {
Expand All @@ -5068,11 +5103,49 @@
return false;
}

// validate that the binary index is sequenced properly
for(idx=0; idx<len-1; idx++) {
if (!LokiOps.$lte(this.data[biv[idx]][property], this.data[biv[idx+1]][property])) {
if (len === 0) {
return true;
}

if (len === 1) {
valid = (biv[0] === 0);
}

if (options.randomSampling) {
// validate first and last
if (!LokiOps.$lte(this.data[biv[0]][property], this.data[biv[1]][property])) {
valid=false;
}
if (!LokiOps.$lte(this.data[biv[len-2]][property], this.data[biv[len-1]][property])) {
valid=false;
}

// if first and last positions are sorted correctly with their nearest neighbor,
// continue onto random sampling phase...
if (valid) {
// # random samplings = total count * sampling factor
iter = Math.floor((len-1) * options.randomSamplingFactor);

// for each random sampling, validate that the binary index is sequenced properly
// with next higher value.
for(idx=0; idx<len-1; idx++) {
// calculate random position
pos = Math.floor(Math.random() * (len-1));
if (!LokiOps.$lte(this.data[biv[pos]][property], this.data[biv[pos+1]][property])) {
valid=false;
break;
}
}
}
}
else {
// validate that the binary index is sequenced properly
for(idx=0; idx<len-1; idx++) {
if (!LokiOps.$lte(this.data[biv[idx]][property], this.data[biv[idx+1]][property])) {
valid=false;
break;
}
}
}

// if incorrectly sequenced and we are to fix problems, rebuild index
Expand Down

0 comments on commit 1c2aa85

Please sign in to comment.