From 0ea94b2df7157a81974ed73e1006578f326d8a99 Mon Sep 17 00:00:00 2001 From: Wilk Maia Date: Wed, 6 Apr 2022 12:44:59 -0300 Subject: [PATCH 1/6] feat: add support for better handling with large arrays --- bench.js | 87 +++++++++++++++++++++++++++++++++++++++++++++++++++----- index.js | 55 +++++++++++++++++++++++++++++++++-- 2 files changed, 133 insertions(+), 9 deletions(-) diff --git a/bench.js b/bench.js index 2b353297..7adcf648 100644 --- a/bench.js +++ b/bench.js @@ -3,6 +3,10 @@ const benchmark = require('benchmark') const suite = new benchmark.Suite() +const STR_LEN = 1e4 +const LARGE_ARRAY_SIZE = 2e4 +const MULTI_ARRAY_LENGHT = 1e3 + const schema = { title: 'Example Schema', type: 'object', @@ -89,7 +93,8 @@ const obj = { const date = new Date() -const multiArray = [] +const multiArray = new Array(MULTI_ARRAY_LENGHT) +const largeArray = new Array(LARGE_ARRAY_SIZE) const CJS = require('compile-json-stringify') const CJSStringify = CJS(schemaCJS) @@ -99,7 +104,13 @@ const CJSStringifyString = CJS({ type: 'string' }) const FJS = require('.') const stringify = FJS(schema) -const stringifyArray = FJS(arraySchema) +const stringifyArrayDefault = FJS(arraySchema) +const stringifyArrayJSONStringify = FJS(arraySchema, { + largeArrayMechanism: 'json-stringify' +}) +const stringifyArrayArrayJoin = FJS(arraySchema, { + largeArrayMechanism: 'array-join' +}) const stringifyDate = FJS(dateFormatSchema) const stringifyString = FJS({ type: 'string' }) let str = '' @@ -110,18 +121,48 @@ const ajvSerialize = ajv.compileSerializer(schemaAJVJTD) const ajvSerializeArray = ajv.compileSerializer(arraySchemaAJVJTD) const ajvSerializeString = ajv.compileSerializer({ type: 'string' }) +const getRandomString = (length) => { + if (!Number.isInteger(length)) { + throw new Error('Expected integer length') + } + + const validCharacters = 'abcdefghijklmnopqrstuvwxyz' + const nValidCharacters = 26 + + let result = '' + for (let i = 0; i < length; ++i) { + result += validCharacters[Math.floor(Math.random() * nValidCharacters)] + } + + return result[0].toUpperCase() + result.slice(1) +} + // eslint-disable-next-line -for (var i = 0; i < 10000; i++) { +for (let i = 0; i < STR_LEN; i++) { + largeArray[i] = { + firstName: getRandomString(8), + lastName: getRandomString(6), + age: Math.ceil(Math.random() * 99) + } + str += i if (i % 100 === 0) { str += '"' } } +for (let i = STR_LEN; i < LARGE_ARRAY_SIZE; ++i) { + largeArray[i] = { + firstName: getRandomString(10), + lastName: getRandomString(4), + age: Math.ceil(Math.random() * 99) + } +} + Number(str) -for (i = 0; i < 1000; i++) { - multiArray.push(obj) +for (let i = 0; i < MULTI_ARRAY_LENGHT; i++) { + multiArray[i] = obj } suite.add('FJS creation', function () { @@ -138,8 +179,16 @@ suite.add('JSON.stringify array', function () { JSON.stringify(multiArray) }) -suite.add('fast-json-stringify array', function () { - stringifyArray(multiArray) +suite.add('fast-json-stringify array default', function () { + stringifyArrayDefault(multiArray) +}) + +suite.add('fast-json-stringify array json-stringify', function () { + stringifyArrayJSONStringify(multiArray) +}) + +suite.add('fast-json-stringify array array-join', function () { + stringifyArrayArrayJoin(multiArray) }) suite.add('compile-json-stringify array', function () { @@ -150,6 +199,30 @@ suite.add('AJV Serialize array', function () { ajvSerializeArray(multiArray) }) +suite.add('JSON.stringify large array', function () { + JSON.stringify(largeArray) +}) + +suite.add('fast-json-stringify large array default', function () { + stringifyArrayDefault(largeArray) +}) + +suite.add('fast-json-stringify large array json-stringify', function () { + stringifyArrayJSONStringify(largeArray) +}) + +suite.add('fast-json-stringify large array array-join', function () { + stringifyArrayArrayJoin(largeArray) +}) + +suite.add('compile-json-stringify large array', function () { + CJSStringifyArray(largeArray) +}) + +suite.add('AJV Serialize large array', function () { + ajvSerializeArray(largeArray) +}) + suite.add('JSON.stringify long string', function () { JSON.stringify(str) }) diff --git a/index.js b/index.js index 502f3cf5..0d326671 100644 --- a/index.js +++ b/index.js @@ -11,7 +11,14 @@ const fjsCloned = Symbol('fast-json-stringify.cloned') const { randomUUID } = require('crypto') const validate = require('./schema-validator') + let stringSimilarity = null +let largeArrayMechanism = 'default' +const validLargeArrayMechanisms = [ + 'default', + 'json-stringify', + 'array-join' +] const addComma = ` if (addComma) { @@ -73,6 +80,14 @@ function build (schema, options) { } } + if (options.largeArrayMechanism) { + if (validLargeArrayMechanisms.includes(options.largeArrayMechanism)) { + largeArrayMechanism = options.largeArrayMechanism + } else { + throw new Error(`Unsupported large array mechanism ${options.rounding}`) + } + } + /* eslint no-new-func: "off" */ let code = ` 'use strict' @@ -1028,7 +1043,9 @@ function buildArray (location, code, name, key = null) { } code += ` - var l = obj.length + var l = obj.length` + + const concatSnippet = ` var jsonOutput= '' for (var i = 0; i < l; i++) { var json = '' @@ -1040,7 +1057,32 @@ function buildArray (location, code, name, key = null) { } } return \`[\${jsonOutput}]\` + }` + + switch (largeArrayMechanism) { + case 'default': + break + + case 'json-stringify': + code += ` + if (l && l >= 20000) { + return JSON.stringify(obj) + }` + break + + case 'array-join': + code += ` + if (l && l >= 20000) { + return \`[\${obj.map(${result.mapFnName}).join(',')}]\` + }` + break + + default: + throw new Error(`Unsupported large array mechanism ${largeArrayMechanism}`) } + + code += ` + ${concatSnippet} ${result.laterCode} ` @@ -1148,22 +1190,27 @@ function nested (laterCode, name, key, location, subKey, isArray) { switch (type) { case 'null': + funcName = '$asNull' code += ` json += $asNull() ` break case 'string': { + funcName = '$asString' const stringSerializer = getStringSerializer(schema.format) code += nullable ? `json += obj${accessor} === null ? null : ${stringSerializer}(obj${accessor})` : `json += ${stringSerializer}(obj${accessor})` break } case 'integer': + funcName = '$asInteger' code += nullable ? `json += obj${accessor} === null ? null : $asInteger(obj${accessor})` : `json += $asInteger(obj${accessor})` break case 'number': + funcName = '$asNumber' code += nullable ? `json += obj${accessor} === null ? null : $asNumber(obj${accessor})` : `json += $asNumber(obj${accessor})` break case 'boolean': + funcName = '$asBoolean' code += nullable ? `json += obj${accessor} === null ? null : $asBoolean(obj${accessor})` : `json += $asBoolean(obj${accessor})` break case 'object': @@ -1181,6 +1228,7 @@ function nested (laterCode, name, key, location, subKey, isArray) { ` break case undefined: + funcName = '$asNull' if ('anyOf' in schema) { // beware: dereferenceOfRefs has side effects and changes schema.anyOf const anyOfLocations = dereferenceOfRefs(location, 'anyOf') @@ -1319,7 +1367,8 @@ function nested (laterCode, name, key, location, subKey, isArray) { return { code, - laterCode + laterCode, + mapFnName: funcName } } @@ -1335,6 +1384,8 @@ function isEmpty (schema) { module.exports = build +module.exports.validLargeArrayMechanisms = validLargeArrayMechanisms + module.exports.restore = function ({ code, ajv }) { // eslint-disable-next-line return (Function.apply(null, ['ajv', code]) From f44faf528f9d42344c91e5ecf102a8e361751eef Mon Sep 17 00:00:00 2001 From: Wilk Maia Date: Wed, 6 Apr 2022 15:26:01 -0300 Subject: [PATCH 2/6] chore: add large arrays section to readme --- README.md | 43 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 43 insertions(+) diff --git a/README.md b/README.md index 247f6d39..91dd4c46 100644 --- a/README.md +++ b/README.md @@ -61,6 +61,7 @@ compile-json-stringify date format x 1,086,187 ops/sec ±0.16% (99 runs sampled) - `Long integers` - `Integers` - `Nullable` + - `Large Arrays` - `Security Notice` - `Acknowledgements` - `License` @@ -117,6 +118,7 @@ const stringify = fastJson(mySchema, { - `schema`: external schemas references by $ref property. [More details](#ref) - `ajv`: [ajv v8 instance's settings](https://ajv.js.org/options.html) for those properties that require `ajv`. [More details](#anyof) - `rounding`: setup how the `integer` types will be rounded when not integers. [More details](#integer) +- `largeArrayMechanism`: settle the mechanism that should be used to handle large (over `20000` items) arrays. [More details](#largearrays) @@ -582,6 +584,47 @@ Otherwise, instead of raising an error, null values will be coerced as follows: - `string` -> `""` - `boolean` -> `false` + +#### Large Arrays + +Large arrays are, for the scope of this document, defined as arrays containing +`20000` items or more. + +At some point the overhead caused by the default mechanism used by +`fast-json-stringify` to handle arrays starts increasing exponentially, leading +to slow overall executions. + +In order to improve that the user can set the `largeArrayMechanism` option with +one of the following values: + +- `default` - Default behavior +- `json-stringify` - This option will remove support for schema validation +within **large arrays** completely. By doing so the overhead previously +mentioned is nulled, greatly improving execution time. Mind there's no change +in behavior for arrays with less than `20000` items +- `array-join` - This option is a compromise between the last two. +`fastify-json-stringify` works by concatenating lots of string pieces into the +final JSON string. With this option set, **large arrays** would be stringified +by joining their elements' stringified versions using `Array.join`, instead +of string concatenation + +##### Benchmarks + +For reference, here goes some benchmarks for comparison over the three +mechanisms. Benchmarks conducted on an old machine. + +- Machine: `ST1000LM024 HN-M 1TB HDD, Intel Core i7-3610QM @ 2.3Ghz, 12GB RAM, 4C/8T`. +- Node.js `v16.13.1` + +``` +JSON.stringify large array x 157 ops/sec ±0.73% (86 runs sampled) +fast-json-stringify large array default x 48.72 ops/sec ±4.92% (48 runs sampled) +fast-json-stringify large array json-stringify x 157 ops/sec ±0.76% (86 runs sampled) +fast-json-stringify large array array-join x 69.04 ops/sec ±4.47% (53 runs sampled) +compile-json-stringify large array x 175 ops/sec ±4.47% (79 runs sampled) +AJV Serialize large array x 58.76 ops/sec ±4.59% (60 runs sampled) +``` + ## Security notice From fafb9f40ff0b01603a722be74c1d13fe825f3f0f Mon Sep 17 00:00:00 2001 From: Wilk Maia Date: Wed, 6 Apr 2022 15:35:12 -0300 Subject: [PATCH 3/6] fix: fix typo on unit --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 91dd4c46..e532bace 100644 --- a/README.md +++ b/README.md @@ -613,7 +613,7 @@ of string concatenation For reference, here goes some benchmarks for comparison over the three mechanisms. Benchmarks conducted on an old machine. -- Machine: `ST1000LM024 HN-M 1TB HDD, Intel Core i7-3610QM @ 2.3Ghz, 12GB RAM, 4C/8T`. +- Machine: `ST1000LM024 HN-M 1TB HDD, Intel Core i7-3610QM @ 2.3GHz, 12GB RAM, 4C/8T`. - Node.js `v16.13.1` ``` From bad848ad8516df3b46510649d5c80ce5e7289a2d Mon Sep 17 00:00:00 2001 From: Wilk Maia Date: Tue, 12 Apr 2022 10:42:53 -0300 Subject: [PATCH 4/6] chore: set array-join as default large array mechanism --- README.md | 14 ++++++-------- bench.js | 11 ----------- index.js | 21 ++++++++------------- 3 files changed, 14 insertions(+), 32 deletions(-) diff --git a/README.md b/README.md index e532bace..46445b4c 100644 --- a/README.md +++ b/README.md @@ -118,7 +118,7 @@ const stringify = fastJson(mySchema, { - `schema`: external schemas references by $ref property. [More details](#ref) - `ajv`: [ajv v8 instance's settings](https://ajv.js.org/options.html) for those properties that require `ajv`. [More details](#anyof) - `rounding`: setup how the `integer` types will be rounded when not integers. [More details](#integer) -- `largeArrayMechanism`: settle the mechanism that should be used to handle large (over `20000` items) arrays. [More details](#largearrays) +- `largeArrayMechanism`: set the mechanism that should be used to handle large (`20000` or more items) arrays. [More details](#largearrays) @@ -597,16 +597,15 @@ to slow overall executions. In order to improve that the user can set the `largeArrayMechanism` option with one of the following values: -- `default` - Default behavior +- `default` - This option is a compromise between performance and feature set by +still providing the expected functionality out of this lib but giving up some +possible performance gain. With this option set, **large arrays** would be +stringified by joining their stringified elements using `Array.join` instead of +string concatenation for better performance - `json-stringify` - This option will remove support for schema validation within **large arrays** completely. By doing so the overhead previously mentioned is nulled, greatly improving execution time. Mind there's no change in behavior for arrays with less than `20000` items -- `array-join` - This option is a compromise between the last two. -`fastify-json-stringify` works by concatenating lots of string pieces into the -final JSON string. With this option set, **large arrays** would be stringified -by joining their elements' stringified versions using `Array.join`, instead -of string concatenation ##### Benchmarks @@ -620,7 +619,6 @@ mechanisms. Benchmarks conducted on an old machine. JSON.stringify large array x 157 ops/sec ±0.73% (86 runs sampled) fast-json-stringify large array default x 48.72 ops/sec ±4.92% (48 runs sampled) fast-json-stringify large array json-stringify x 157 ops/sec ±0.76% (86 runs sampled) -fast-json-stringify large array array-join x 69.04 ops/sec ±4.47% (53 runs sampled) compile-json-stringify large array x 175 ops/sec ±4.47% (79 runs sampled) AJV Serialize large array x 58.76 ops/sec ±4.59% (60 runs sampled) ``` diff --git a/bench.js b/bench.js index 7adcf648..c51032c0 100644 --- a/bench.js +++ b/bench.js @@ -108,9 +108,6 @@ const stringifyArrayDefault = FJS(arraySchema) const stringifyArrayJSONStringify = FJS(arraySchema, { largeArrayMechanism: 'json-stringify' }) -const stringifyArrayArrayJoin = FJS(arraySchema, { - largeArrayMechanism: 'array-join' -}) const stringifyDate = FJS(dateFormatSchema) const stringifyString = FJS({ type: 'string' }) let str = '' @@ -187,10 +184,6 @@ suite.add('fast-json-stringify array json-stringify', function () { stringifyArrayJSONStringify(multiArray) }) -suite.add('fast-json-stringify array array-join', function () { - stringifyArrayArrayJoin(multiArray) -}) - suite.add('compile-json-stringify array', function () { CJSStringifyArray(multiArray) }) @@ -211,10 +204,6 @@ suite.add('fast-json-stringify large array json-stringify', function () { stringifyArrayJSONStringify(largeArray) }) -suite.add('fast-json-stringify large array array-join', function () { - stringifyArrayArrayJoin(largeArray) -}) - suite.add('compile-json-stringify large array', function () { CJSStringifyArray(largeArray) }) diff --git a/index.js b/index.js index 0d326671..d6d2a55a 100644 --- a/index.js +++ b/index.js @@ -16,8 +16,7 @@ let stringSimilarity = null let largeArrayMechanism = 'default' const validLargeArrayMechanisms = [ 'default', - 'json-stringify', - 'array-join' + 'json-stringify' ] const addComma = ` @@ -1043,9 +1042,12 @@ function buildArray (location, code, name, key = null) { } code += ` - var l = obj.length` + var l = obj.length + if (l && l >= 20000) {` const concatSnippet = ` + } + var jsonOutput= '' for (var i = 0; i < l; i++) { var json = '' @@ -1061,20 +1063,13 @@ function buildArray (location, code, name, key = null) { switch (largeArrayMechanism) { case 'default': - break - - case 'json-stringify': code += ` - if (l && l >= 20000) { - return JSON.stringify(obj) - }` + return \`[\${obj.map(${result.mapFnName}).join(',')}]\`` break - case 'array-join': + case 'json-stringify': code += ` - if (l && l >= 20000) { - return \`[\${obj.map(${result.mapFnName}).join(',')}]\` - }` + return JSON.stringify(obj)` break default: From 007916c1e51b86186d22e550762d4721d0e1fa63 Mon Sep 17 00:00:00 2001 From: Wilk Maia Date: Tue, 12 Apr 2022 11:13:12 -0300 Subject: [PATCH 5/6] chore: add tests for large array mechanisms --- test/array.test.js | 47 ++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 45 insertions(+), 2 deletions(-) diff --git a/test/array.test.js b/test/array.test.js index 92431f88..ab255296 100644 --- a/test/array.test.js +++ b/test/array.test.js @@ -5,12 +5,12 @@ const test = require('tap').test const validator = require('is-my-json-valid') const build = require('..') -function buildTest (schema, toStringify) { +function buildTest (schema, toStringify, options) { test(`render a ${schema.title} as JSON`, (t) => { t.plan(3) const validate = validator(schema) - const stringify = build(schema) + const stringify = build(schema, options) const output = stringify(toStringify) t.same(JSON.parse(output), toStringify) @@ -319,3 +319,46 @@ test('object array with anyOf and symbol', (t) => { ]) t.equal(value, '[{"name":"name-0","option":"Foo"},{"name":"name-1","option":"Bar"}]') }) + +const largeArray = new Array(2e4).fill({ a: 'test', b: 1 }) +buildTest({ + title: 'large array with default mechanism', + type: 'object', + properties: { + ids: { + type: 'array', + items: { + type: 'object', + properties: { + a: { type: 'string' }, + b: { type: 'number' } + } + } + } + } +}, { + ids: largeArray +}, { + largeArrayMechanism: 'default' +}) + +buildTest({ + title: 'large array with json-stringify mechanism', + type: 'object', + properties: { + ids: { + type: 'array', + items: { + type: 'object', + properties: { + a: { type: 'string' }, + b: { type: 'number' } + } + } + } + } +}, { + ids: largeArray +}, { + largeArrayMechanism: 'json-stringify' +}) From ea8710677b7724834afc543f18880993b6cf1be2 Mon Sep 17 00:00:00 2001 From: Wilk Maia Date: Wed, 13 Apr 2022 11:34:06 -0300 Subject: [PATCH 6/6] feat: add option for the user to specify the large array size --- README.md | 27 +++++++++++++++++++++------ index.js | 11 ++++++++++- test/array.test.js | 1 + 3 files changed, 32 insertions(+), 7 deletions(-) diff --git a/README.md b/README.md index 46445b4c..bc6be5df 100644 --- a/README.md +++ b/README.md @@ -118,7 +118,8 @@ const stringify = fastJson(mySchema, { - `schema`: external schemas references by $ref property. [More details](#ref) - `ajv`: [ajv v8 instance's settings](https://ajv.js.org/options.html) for those properties that require `ajv`. [More details](#anyof) - `rounding`: setup how the `integer` types will be rounded when not integers. [More details](#integer) -- `largeArrayMechanism`: set the mechanism that should be used to handle large (`20000` or more items) arrays. [More details](#largearrays) +- `largeArrayMechanism`: set the mechanism that should be used to handle large +(by default `20000` or more items) arrays. [More details](#largearrays) @@ -587,15 +588,20 @@ Otherwise, instead of raising an error, null values will be coerced as follows: #### Large Arrays -Large arrays are, for the scope of this document, defined as arrays containing -`20000` items or more. +Large arrays are, for the scope of this document, defined as arrays containing, +by default, `20000` elements or more. That value can be adjusted via the option +parameter `largeArraySize`. At some point the overhead caused by the default mechanism used by `fast-json-stringify` to handle arrays starts increasing exponentially, leading to slow overall executions. -In order to improve that the user can set the `largeArrayMechanism` option with -one of the following values: +##### Settings + +In order to improve that the user can set the `largeArrayMechanism` and +`largeArraySize` options. + +`largeArrayMechanism`'s default value is `default`. Valid values for it are: - `default` - This option is a compromise between performance and feature set by still providing the expected functionality out of this lib but giving up some @@ -605,7 +611,16 @@ string concatenation for better performance - `json-stringify` - This option will remove support for schema validation within **large arrays** completely. By doing so the overhead previously mentioned is nulled, greatly improving execution time. Mind there's no change -in behavior for arrays with less than `20000` items +in behavior for arrays not considered _large_ + +`largeArraySize`'s default value is `20000`. Valid values for it are +integer-like values, such as: + +- `20000` +- `2e4` +- `'20000'` +- `'2e4'` - _note this will be converted to `2`, not `20000`_ +- `1.5` - _note this will be converted to `1`_ ##### Benchmarks diff --git a/index.js b/index.js index d6d2a55a..436fd7e0 100644 --- a/index.js +++ b/index.js @@ -12,6 +12,7 @@ const { randomUUID } = require('crypto') const validate = require('./schema-validator') +let largeArraySize = 2e4 let stringSimilarity = null let largeArrayMechanism = 'default' const validLargeArrayMechanisms = [ @@ -87,6 +88,14 @@ function build (schema, options) { } } + if (options.largeArraySize) { + if (!Number.isNaN(Number.parseInt(options.largeArraySize, 10))) { + largeArraySize = options.largeArraySize + } else { + throw new Error(`Unsupported large array size. Expected integer-like, got ${options.largeArraySize}`) + } + } + /* eslint no-new-func: "off" */ let code = ` 'use strict' @@ -1043,7 +1052,7 @@ function buildArray (location, code, name, key = null) { code += ` var l = obj.length - if (l && l >= 20000) {` + if (l && l >= ${largeArraySize}) {` const concatSnippet = ` } diff --git a/test/array.test.js b/test/array.test.js index ab255296..72bf72a8 100644 --- a/test/array.test.js +++ b/test/array.test.js @@ -339,6 +339,7 @@ buildTest({ }, { ids: largeArray }, { + largeArraySize: 2e4, largeArrayMechanism: 'default' })