Skip to content

Commit

Permalink
Allow custom popped attributes
Browse files Browse the repository at this point in the history
This new config option, `attributesToPop`, lets developer choose which
attributes will get pruned when the record is too big.
  • Loading branch information
nitriques committed Jan 28, 2019
1 parent 9d23871 commit 85d4c57
Show file tree
Hide file tree
Showing 3 changed files with 57 additions and 6 deletions.
8 changes: 7 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,13 @@ Only records that are old and match the filter will be deleted.

#### maxRecordSize: Integer

The maximum size in bytes of a record to be sent to Algolia. The default is 10,000 but could vary based on different plans.
The maximum size in bytes of a record to be sent to Algolia.
The default is 10,000 but could vary based on different plans.

#### attributesToPop: Array<String>

When the record is too big (based on maxRecordSize), the crawler will remove values from the text key.
Use this attribute to configure which keys should be pruned when the record is too big.

#### index: Object

Expand Down
23 changes: 19 additions & 4 deletions lib/process.js
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ const _trim = require('trim');
const trim = (s) => !s ? null : _trim(s);
const entities = (H => new H.XmlEntities)(require('html-entities'));
const defaultAttributes = ['content', 'value'];
const defaultPoppedAttributes = ['text'];

const recursiveFindValue = (node, array, attribs) => {
if (!node || node.type === 'comment') {
Expand Down Expand Up @@ -158,12 +159,26 @@ const parse = (record, data, config) => {
};

const trimmer = (record, config) => {
const bytes = (s) => { return ~-encodeURI(s).split(/%..|./).length }
const jsonSize = (s) => { return bytes(JSON.stringify(s)) }
const bytes = (s) => ~-encodeURI(s).split(/%..|./).length;
const jsonSize = (s) => bytes(JSON.stringify(s));
const limit = config.maxRecordSize;
const attributes = config.attributesToPop || defaultPoppedAttributes;

while (jsonSize(record) > limit && record.text.length > 0) {
record.text.pop();
while (jsonSize(record) > limit) {
const acc = _.reduce(attributes, (memo, attr) => {
if (!record[attr]) {
return memo;
}
if (memo.max < record[attr].length) {
memo.max = record[attr].length;
memo.attr = attr;
}
return memo;
}, {max: 0, attr: null});
if (!acc.attr || acc.max === 0) {
throw new Error('Failed to find an attribute to pop');
}
record[acc.attr].pop();
}
};

Expand Down
32 changes: 31 additions & 1 deletion test/process.js
Original file line number Diff line number Diff line change
Expand Up @@ -16,4 +16,34 @@ test('maxRecordSize', (t) => {
process.trimmer(rec, c);
t.equal(rec.text.length, 6);
t.end();
});
});

test('custom popped attribute', (t) => {
const rec = {
text: (new Array(5).fill('aaaaaaaaaa')),
text2: (new Array(100).fill('aaaaaaaaaa'))
};
t.equal(rec.text.length, 5);
t.equal(rec.text2.length, 100);
const c = _.clone(config);
c.attributesToPop = ['text2'];
process.trimmer(rec, c);
t.equal(rec.text.length, 5);
t.equal(rec.text2.length, 1);
t.end();
});

test('custom popped attributes', (t) => {
const rec = {
text: (new Array(5).fill('aaaaaaaaaa')),
text2: (new Array(100).fill('aaaaaaaaaa'))
};
t.equal(rec.text.length, 5);
t.equal(rec.text2.length, 100);
const c = _.clone(config);
c.attributesToPop = ['text2', 'text'];
process.trimmer(rec, c);
t.equal(rec.text.length, 3);
t.equal(rec.text2.length, 3);
t.end();
});

0 comments on commit 85d4c57

Please sign in to comment.