Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

perf: optimize check invalid field-vchar #2889

Merged
merged 4 commits into from
Mar 1, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
57 changes: 57 additions & 0 deletions benchmarks/core/is-valid-header-char.mjs
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
import { bench, group, run } from 'mitata'
import { isValidHeaderChar } from '../../lib/core/util.js'

const html = 'text/html'
const json = 'application/json; charset=UTF-8'

const headerCharRegex = /[^\t\x20-\x7e\x80-\xff]/

/**
* @param {string} characters
*/
function charCodeAtApproach (characters) {
// Validate if characters is a valid field-vchar.
// field-value = *( field-content / obs-fold )
// field-content = field-vchar [ 1*( SP / HTAB ) field-vchar ]
// field-vchar = VCHAR / obs-text
for (let i = 0; i < characters.length; ++i) {
const code = characters.charCodeAt(i)
// not \x20-\x7e, \t and \x80-\xff
if ((code < 0x20 && code !== 0x09) || code === 0x7f || code > 0xff) {
return false
}
}
return true
}

group(`isValidHeaderChar# ${html}`, () => {
bench('regexp.test', () => {
return !headerCharRegex.test(html)
})
bench('regexp.exec', () => {
return headerCharRegex.exec(html) === null
})
bench('charCodeAt', () => {
return charCodeAtApproach(html)
})
bench('isValidHeaderChar', () => {
return isValidHeaderChar(html)
})
})

group(`isValidHeaderChar# ${json}`, () => {
bench('regexp.test', () => {
return !headerCharRegex.test(json)
})
bench('regexp.exec', () => {
return headerCharRegex.exec(json) === null
})
bench('charCodeAt', () => {
return charCodeAtApproach(json)
})
bench('isValidHeaderChar', () => {
return isValidHeaderChar(json)
})
})

await run()
56 changes: 28 additions & 28 deletions lib/core/request.js
Original file line number Diff line number Diff line change
Expand Up @@ -5,21 +5,22 @@ const {
NotSupportedError
} = require('./errors')
const assert = require('node:assert')
const util = require('./util')
const {
isValidHTTPToken,
isValidHeaderChar,
isStream,
destroy,
isBuffer,
isFormDataLike,
isIterable,
isBlobLike,
buildURL,
validateHandler,
getServerName
} = require('./util')
const { channels } = require('./diagnostics.js')
const { headerNameLowerCasedRecord } = require('./constants')

// headerCharRegex have been lifted from
// https://github.com/nodejs/node/blob/main/lib/_http_common.js

/**
* Matches if val contains an invalid field-vchar
* field-value = *( field-content / obs-fold )
* field-content = field-vchar [ 1*( SP / HTAB ) field-vchar ]
* field-vchar = VCHAR / obs-text
*/
const headerCharRegex = /[^\t\x20-\x7e\x80-\xff]/

// Verifies that a given path is valid does not contain control chars \x00 to \x20
const invalidPathRegex = /[^\u0021-\u00ff]/

Expand Down Expand Up @@ -55,7 +56,7 @@ class Request {

if (typeof method !== 'string') {
throw new InvalidArgumentError('method must be a string')
} else if (!util.isValidHTTPToken(method)) {
} else if (!isValidHTTPToken(method)) {
throw new InvalidArgumentError('invalid request method')
}

Expand Down Expand Up @@ -91,13 +92,13 @@ class Request {

if (body == null) {
this.body = null
} else if (util.isStream(body)) {
} else if (isStream(body)) {
this.body = body

const rState = this.body._readableState
if (!rState || !rState.autoDestroy) {
this.endHandler = function autoDestroy () {
util.destroy(this)
destroy(this)
}
this.body.on('end', this.endHandler)
}
Expand All @@ -110,15 +111,15 @@ class Request {
}
}
this.body.on('error', this.errorHandler)
} else if (util.isBuffer(body)) {
} else if (isBuffer(body)) {
this.body = body.byteLength ? body : null
} else if (ArrayBuffer.isView(body)) {
this.body = body.buffer.byteLength ? Buffer.from(body.buffer, body.byteOffset, body.byteLength) : null
} else if (body instanceof ArrayBuffer) {
this.body = body.byteLength ? Buffer.from(body) : null
} else if (typeof body === 'string') {
this.body = body.length ? Buffer.from(body) : null
} else if (util.isFormDataLike(body) || util.isIterable(body) || util.isBlobLike(body)) {
} else if (isFormDataLike(body) || isIterable(body) || isBlobLike(body)) {
this.body = body
} else {
throw new InvalidArgumentError('body must be a string, a Buffer, a Readable stream, an iterable, or an async iterable')
Expand All @@ -130,7 +131,7 @@ class Request {

this.upgrade = upgrade || null

this.path = query ? util.buildURL(path, query) : path
this.path = query ? buildURL(path, query) : path

this.origin = origin

Expand Down Expand Up @@ -166,22 +167,21 @@ class Request {
if (!Array.isArray(header) || header.length !== 2) {
throw new InvalidArgumentError('headers must be in key-value pair format')
}
const [key, value] = header
processHeader(this, key, value)
processHeader(this, header[0], header[1])
}
} else {
const keys = Object.keys(headers)
for (const key of keys) {
processHeader(this, key, headers[key])
for (let i = 0; i < keys.length; ++i) {
processHeader(this, keys[i], headers[keys[i]])
}
}
} else if (headers != null) {
throw new InvalidArgumentError('headers must be an object or an array')
}

util.validateHandler(handler, method, upgrade)
validateHandler(handler, method, upgrade)

this.servername = util.getServerName(this.host)
this.servername = getServerName(this.host)

this[kHandler] = handler

Expand Down Expand Up @@ -315,7 +315,7 @@ class Request {
}
}

function processHeader (request, key, val, skipAppend = false) {
function processHeader (request, key, val) {
if (val && (typeof val === 'object' && !Array.isArray(val))) {
throw new InvalidArgumentError(`invalid ${key} header`)
} else if (val === undefined) {
Expand All @@ -326,7 +326,7 @@ function processHeader (request, key, val, skipAppend = false) {

if (headerName === undefined) {
headerName = key.toLowerCase()
if (headerNameLowerCasedRecord[headerName] === undefined && !util.isValidHTTPToken(headerName)) {
if (headerNameLowerCasedRecord[headerName] === undefined && !isValidHTTPToken(headerName)) {
throw new InvalidArgumentError('invalid header key')
}
}
Expand All @@ -335,7 +335,7 @@ function processHeader (request, key, val, skipAppend = false) {
const arr = []
for (let i = 0; i < val.length; i++) {
if (typeof val[i] === 'string') {
if (headerCharRegex.exec(val[i]) !== null) {
if (!isValidHeaderChar(val[i])) {
throw new InvalidArgumentError(`invalid ${key} header`)
}
arr.push(val[i])
Expand All @@ -349,7 +349,7 @@ function processHeader (request, key, val, skipAppend = false) {
}
val = arr
} else if (typeof val === 'string') {
if (headerCharRegex.exec(val) !== null) {
if (!isValidHeaderChar(val)) {
throw new InvalidArgumentError(`invalid ${key} header`)
}
} else if (val === null) {
Expand Down
20 changes: 19 additions & 1 deletion lib/core/util.js
Original file line number Diff line number Diff line change
Expand Up @@ -495,6 +495,24 @@ function isValidHTTPToken (characters) {
return true
}

// headerCharRegex have been lifted from
// https://github.com/nodejs/node/blob/main/lib/_http_common.js

/**
* Matches if val contains an invalid field-vchar
* field-value = *( field-content / obs-fold )
* field-content = field-vchar [ 1*( SP / HTAB ) field-vchar ]
* field-vchar = VCHAR / obs-text
*/
const headerCharRegex = /[^\t\x20-\x7e\x80-\xff]/

/**
* @param {string} characters
*/
function isValidHeaderChar (characters) {
tsctx marked this conversation as resolved.
Show resolved Hide resolved
return !headerCharRegex.test(characters)
}
Comment on lines +512 to +514
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

interesting.

i wonder how the performance is if we flip it and do it like this

const isInvalidHeaderChar = headerCharRegex.test.bind(headerCharRegex)

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There is no change.


benchmark                     time (avg)             (min … max)       p75       p99      p999
---------------------------------------------------------------- -----------------------------
• isValidHeaderChar# text/html
---------------------------------------------------------------- -----------------------------
regexp.test                33.69 ns/iter     (28.27 ns … 297 ns)  31.45 ns  79.35 ns    143 ns
regexp.exec                35.91 ns/iter     (31.64 ns … 273 ns)  34.62 ns  80.37 ns    153 ns
charCodeAt                 27.58 ns/iter        (25 ns … 532 ns)  25.88 ns  62.16 ns    133 ns
isValidHeaderChar          32.33 ns/iter      (29.1 ns … 452 ns)  30.62 ns  78.03 ns    163 ns
isInvalidHeaderCharRegex   30.87 ns/iter     (28.22 ns … 195 ns)  30.03 ns  66.36 ns    133 ns

summary for isValidHeaderChar# text/html
  charCodeAt
   1.12x faster than isInvalidHeaderCharRegex
   1.17x faster than isValidHeaderChar
   1.22x faster than regexp.test
   1.3x faster than regexp.exec

• isValidHeaderChar# application/json; charset=UTF-8
---------------------------------------------------------------- -----------------------------
regexp.test                59.13 ns/iter      (54.2 ns … 443 ns)   56.4 ns    120 ns    229 ns
regexp.exec                64.15 ns/iter     (58.35 ns … 268 ns)  60.64 ns    134 ns    212 ns
charCodeAt                 97.63 ns/iter     (89.45 ns … 355 ns)  98.97 ns    180 ns    303 ns
isValidHeaderChar           74.7 ns/iter     (57.71 ns … 357 ns)     75 ns    170 ns    277 ns
isInvalidHeaderCharRegex   59.08 ns/iter     (54.25 ns … 280 ns)  56.25 ns    147 ns    245 ns

summary for isValidHeaderChar# application/json; charset=UTF-8
  isInvalidHeaderCharRegex
   1x faster than regexp.test
   1.09x faster than regexp.exec
   1.26x faster than isValidHeaderChar
   1.65x faster than charCodeAt

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

thank you for the investigation


// Parsed accordingly to RFC 9110
// https://www.rfc-editor.org/rfc/rfc9110#field.content-range
function parseRangeHeader (range) {
Expand All @@ -516,7 +534,6 @@ kEnumerableProperty.enumerable = true
module.exports = {
kEnumerableProperty,
nop,

isDisturbed,
isErrored,
isReadable,
Expand Down Expand Up @@ -546,6 +563,7 @@ module.exports = {
buildURL,
addAbortListener,
isValidHTTPToken,
isValidHeaderChar,
isTokenCharCode,
parseRangeHeader,
nodeMajor,
Expand Down
Loading