Skip to content

Commit

Permalink
feat: add support for better handling with large arrays (#402)
Browse files Browse the repository at this point in the history
  • Loading branch information
wilkmaia authored Apr 17, 2022
1 parent d6d12e5 commit 66b79f0
Show file tree
Hide file tree
Showing 4 changed files with 227 additions and 10 deletions.
56 changes: 56 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,7 @@ compile-json-stringify date format x 1,086,187 ops/sec ±0.16% (99 runs sampled)
- <a href="#long">`Long integers`</a>
- <a href="#integer">`Integers`</a>
- <a href="#nullable">`Nullable`</a>
- <a href="#largearrays">`Large Arrays`</a>
- <a href="#security">`Security Notice`</a>
- <a href="#acknowledgements">`Acknowledgements`</a>
- <a href="#license">`License`</a>
Expand Down Expand Up @@ -117,6 +118,8 @@ const stringify = fastJson(mySchema, {
- `schema`: external schemas references by $ref property. [More details](#ref)
- `ajv`: [ajv v8 instance's settings](https://ajv.js.org/options.html) for those properties that require `ajv`. [More details](#anyof)
- `rounding`: setup how the `integer` types will be rounded when not integers. [More details](#integer)
- `largeArrayMechanism`: set the mechanism that should be used to handle large
(by default `20000` or more items) arrays. [More details](#largearrays)


<a name="api"></a>
Expand Down Expand Up @@ -582,6 +585,59 @@ Otherwise, instead of raising an error, null values will be coerced as follows:
- `string` -> `""`
- `boolean` -> `false`

<a name="largearrays"></a>
#### Large Arrays

Large arrays are, for the scope of this document, defined as arrays containing,
by default, `20000` elements or more. That value can be adjusted via the option
parameter `largeArraySize`.

At some point the overhead caused by the default mechanism used by
`fast-json-stringify` to handle arrays starts increasing exponentially, leading
to slow overall executions.

##### Settings

In order to improve that the user can set the `largeArrayMechanism` and
`largeArraySize` options.

`largeArrayMechanism`'s default value is `default`. Valid values for it are:

- `default` - This option is a compromise between performance and feature set by
still providing the expected functionality out of this lib but giving up some
possible performance gain. With this option set, **large arrays** would be
stringified by joining their stringified elements using `Array.join` instead of
string concatenation for better performance
- `json-stringify` - This option will remove support for schema validation
within **large arrays** completely. By doing so the overhead previously
mentioned is nulled, greatly improving execution time. Mind there's no change
in behavior for arrays not considered _large_

`largeArraySize`'s default value is `20000`. Valid values for it are
integer-like values, such as:

- `20000`
- `2e4`
- `'20000'`
- `'2e4'` - _note this will be converted to `2`, not `20000`_
- `1.5` - _note this will be converted to `1`_

##### Benchmarks

For reference, here goes some benchmarks for comparison over the three
mechanisms. Benchmarks conducted on an old machine.

- Machine: `ST1000LM024 HN-M 1TB HDD, Intel Core i7-3610QM @ 2.3GHz, 12GB RAM, 4C/8T`.
- Node.js `v16.13.1`

```
JSON.stringify large array x 157 ops/sec ±0.73% (86 runs sampled)
fast-json-stringify large array default x 48.72 ops/sec ±4.92% (48 runs sampled)
fast-json-stringify large array json-stringify x 157 ops/sec ±0.76% (86 runs sampled)
compile-json-stringify large array x 175 ops/sec ±4.47% (79 runs sampled)
AJV Serialize large array x 58.76 ops/sec ±4.59% (60 runs sampled)
```

<a name="security"></a>
## Security notice

Expand Down
76 changes: 69 additions & 7 deletions bench.js
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,10 @@
const benchmark = require('benchmark')
const suite = new benchmark.Suite()

const STR_LEN = 1e4
const LARGE_ARRAY_SIZE = 2e4
const MULTI_ARRAY_LENGHT = 1e3

const schema = {
title: 'Example Schema',
type: 'object',
Expand Down Expand Up @@ -89,7 +93,8 @@ const obj = {

const date = new Date()

const multiArray = []
const multiArray = new Array(MULTI_ARRAY_LENGHT)
const largeArray = new Array(LARGE_ARRAY_SIZE)

const CJS = require('compile-json-stringify')
const CJSStringify = CJS(schemaCJS)
Expand All @@ -99,7 +104,10 @@ const CJSStringifyString = CJS({ type: 'string' })

const FJS = require('.')
const stringify = FJS(schema)
const stringifyArray = FJS(arraySchema)
const stringifyArrayDefault = FJS(arraySchema)
const stringifyArrayJSONStringify = FJS(arraySchema, {
largeArrayMechanism: 'json-stringify'
})
const stringifyDate = FJS(dateFormatSchema)
const stringifyString = FJS({ type: 'string' })
let str = ''
Expand All @@ -110,18 +118,48 @@ const ajvSerialize = ajv.compileSerializer(schemaAJVJTD)
const ajvSerializeArray = ajv.compileSerializer(arraySchemaAJVJTD)
const ajvSerializeString = ajv.compileSerializer({ type: 'string' })

const getRandomString = (length) => {
if (!Number.isInteger(length)) {
throw new Error('Expected integer length')
}

const validCharacters = 'abcdefghijklmnopqrstuvwxyz'
const nValidCharacters = 26

let result = ''
for (let i = 0; i < length; ++i) {
result += validCharacters[Math.floor(Math.random() * nValidCharacters)]
}

return result[0].toUpperCase() + result.slice(1)
}

// eslint-disable-next-line
for (var i = 0; i < 10000; i++) {
for (let i = 0; i < STR_LEN; i++) {
largeArray[i] = {
firstName: getRandomString(8),
lastName: getRandomString(6),
age: Math.ceil(Math.random() * 99)
}

str += i
if (i % 100 === 0) {
str += '"'
}
}

for (let i = STR_LEN; i < LARGE_ARRAY_SIZE; ++i) {
largeArray[i] = {
firstName: getRandomString(10),
lastName: getRandomString(4),
age: Math.ceil(Math.random() * 99)
}
}

Number(str)

for (i = 0; i < 1000; i++) {
multiArray.push(obj)
for (let i = 0; i < MULTI_ARRAY_LENGHT; i++) {
multiArray[i] = obj
}

suite.add('FJS creation', function () {
Expand All @@ -138,8 +176,12 @@ suite.add('JSON.stringify array', function () {
JSON.stringify(multiArray)
})

suite.add('fast-json-stringify array', function () {
stringifyArray(multiArray)
suite.add('fast-json-stringify array default', function () {
stringifyArrayDefault(multiArray)
})

suite.add('fast-json-stringify array json-stringify', function () {
stringifyArrayJSONStringify(multiArray)
})

suite.add('compile-json-stringify array', function () {
Expand All @@ -150,6 +192,26 @@ suite.add('AJV Serialize array', function () {
ajvSerializeArray(multiArray)
})

suite.add('JSON.stringify large array', function () {
JSON.stringify(largeArray)
})

suite.add('fast-json-stringify large array default', function () {
stringifyArrayDefault(largeArray)
})

suite.add('fast-json-stringify large array json-stringify', function () {
stringifyArrayJSONStringify(largeArray)
})

suite.add('compile-json-stringify large array', function () {
CJSStringifyArray(largeArray)
})

suite.add('AJV Serialize large array', function () {
ajvSerializeArray(largeArray)
})

suite.add('JSON.stringify long string', function () {
JSON.stringify(str)
})
Expand Down
57 changes: 56 additions & 1 deletion index.js
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,14 @@ const fjsCloned = Symbol('fast-json-stringify.cloned')
const { randomUUID } = require('crypto')

const validate = require('./schema-validator')

let largeArraySize = 2e4
let stringSimilarity = null
let largeArrayMechanism = 'default'
const validLargeArrayMechanisms = [
'default',
'json-stringify'
]

const addComma = `
if (addComma) {
Expand Down Expand Up @@ -73,6 +80,22 @@ function build (schema, options) {
}
}

if (options.largeArrayMechanism) {
if (validLargeArrayMechanisms.includes(options.largeArrayMechanism)) {
largeArrayMechanism = options.largeArrayMechanism
} else {
throw new Error(`Unsupported large array mechanism ${options.rounding}`)
}
}

if (options.largeArraySize) {
if (!Number.isNaN(Number.parseInt(options.largeArraySize, 10))) {
largeArraySize = options.largeArraySize
} else {
throw new Error(`Unsupported large array size. Expected integer-like, got ${options.largeArraySize}`)
}
}

/* eslint no-new-func: "off" */
let code = `
'use strict'
Expand Down Expand Up @@ -1029,6 +1052,11 @@ function buildArray (location, code, name, key = null) {

code += `
var l = obj.length
if (l && l >= ${largeArraySize}) {`

const concatSnippet = `
}
var jsonOutput= ''
for (var i = 0; i < l; i++) {
var json = ''
Expand All @@ -1040,7 +1068,25 @@ function buildArray (location, code, name, key = null) {
}
}
return \`[\${jsonOutput}]\`
}`

switch (largeArrayMechanism) {
case 'default':
code += `
return \`[\${obj.map(${result.mapFnName}).join(',')}]\``
break

case 'json-stringify':
code += `
return JSON.stringify(obj)`
break

default:
throw new Error(`Unsupported large array mechanism ${largeArrayMechanism}`)
}

code += `
${concatSnippet}
${result.laterCode}
`

Expand Down Expand Up @@ -1148,22 +1194,27 @@ function nested (laterCode, name, key, location, subKey, isArray) {

switch (type) {
case 'null':
funcName = '$asNull'
code += `
json += $asNull()
`
break
case 'string': {
funcName = '$asString'
const stringSerializer = getStringSerializer(schema.format)
code += nullable ? `json += obj${accessor} === null ? null : ${stringSerializer}(obj${accessor})` : `json += ${stringSerializer}(obj${accessor})`
break
}
case 'integer':
funcName = '$asInteger'
code += nullable ? `json += obj${accessor} === null ? null : $asInteger(obj${accessor})` : `json += $asInteger(obj${accessor})`
break
case 'number':
funcName = '$asNumber'
code += nullable ? `json += obj${accessor} === null ? null : $asNumber(obj${accessor})` : `json += $asNumber(obj${accessor})`
break
case 'boolean':
funcName = '$asBoolean'
code += nullable ? `json += obj${accessor} === null ? null : $asBoolean(obj${accessor})` : `json += $asBoolean(obj${accessor})`
break
case 'object':
Expand All @@ -1181,6 +1232,7 @@ function nested (laterCode, name, key, location, subKey, isArray) {
`
break
case undefined:
funcName = '$asNull'
if ('anyOf' in schema) {
// beware: dereferenceOfRefs has side effects and changes schema.anyOf
const anyOfLocations = dereferenceOfRefs(location, 'anyOf')
Expand Down Expand Up @@ -1319,7 +1371,8 @@ function nested (laterCode, name, key, location, subKey, isArray) {

return {
code,
laterCode
laterCode,
mapFnName: funcName
}
}

Expand All @@ -1335,6 +1388,8 @@ function isEmpty (schema) {

module.exports = build

module.exports.validLargeArrayMechanisms = validLargeArrayMechanisms

module.exports.restore = function ({ code, ajv }) {
// eslint-disable-next-line
return (Function.apply(null, ['ajv', code])
Expand Down
48 changes: 46 additions & 2 deletions test/array.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,12 @@ const test = require('tap').test
const validator = require('is-my-json-valid')
const build = require('..')

function buildTest (schema, toStringify) {
function buildTest (schema, toStringify, options) {
test(`render a ${schema.title} as JSON`, (t) => {
t.plan(3)

const validate = validator(schema)
const stringify = build(schema)
const stringify = build(schema, options)
const output = stringify(toStringify)

t.same(JSON.parse(output), toStringify)
Expand Down Expand Up @@ -319,3 +319,47 @@ test('object array with anyOf and symbol', (t) => {
])
t.equal(value, '[{"name":"name-0","option":"Foo"},{"name":"name-1","option":"Bar"}]')
})

const largeArray = new Array(2e4).fill({ a: 'test', b: 1 })
buildTest({
title: 'large array with default mechanism',
type: 'object',
properties: {
ids: {
type: 'array',
items: {
type: 'object',
properties: {
a: { type: 'string' },
b: { type: 'number' }
}
}
}
}
}, {
ids: largeArray
}, {
largeArraySize: 2e4,
largeArrayMechanism: 'default'
})

buildTest({
title: 'large array with json-stringify mechanism',
type: 'object',
properties: {
ids: {
type: 'array',
items: {
type: 'object',
properties: {
a: { type: 'string' },
b: { type: 'number' }
}
}
}
}
}, {
ids: largeArray
}, {
largeArrayMechanism: 'json-stringify'
})

0 comments on commit 66b79f0

Please sign in to comment.