Skip to content

Commit

Permalink
perf(fetch): Improve fetch of detaurl (nodejs#2479)
Browse files Browse the repository at this point in the history
* perf(fetch): Improve data url base64

* format

* fix: comment position

* add comment

* add comment

* suggestion change

* perf: avoid replace

* fixup

* refactor

* fixup

* Revert "fixup"

This reverts commit 058dc02.

* fixup

* remove
  • Loading branch information
tsctx authored and crysmags committed Feb 27, 2024
1 parent 3c3d5a7 commit 851084a
Show file tree
Hide file tree
Showing 2 changed files with 62 additions and 52 deletions.
93 changes: 49 additions & 44 deletions lib/fetch/dataURL.js
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
const assert = require('assert')
const { atob } = require('buffer')
const { isomorphicDecode } = require('./util')

const encoder = new TextEncoder()
Expand All @@ -8,7 +7,8 @@ const encoder = new TextEncoder()
* @see https://mimesniff.spec.whatwg.org/#http-token-code-point
*/
const HTTP_TOKEN_CODEPOINTS = /^[!#$%&'*+-.^_|~A-Za-z0-9]+$/
const HTTP_WHITESPACE_REGEX = /(\u000A|\u000D|\u0009|\u0020)/ // eslint-disable-line
const HTTP_WHITESPACE_REGEX = /[\u000A|\u000D|\u0009|\u0020]/ // eslint-disable-line
const ASCII_WHITESPACE_REPLACE_REGEX = /[\u0009\u000A\u000C\u000D\u0020]/g // eslint-disable-line
/**
* @see https://mimesniff.spec.whatwg.org/#http-quoted-string-token-code-point
*/
Expand Down Expand Up @@ -188,20 +188,26 @@ function stringPercentDecode (input) {
return percentDecode(bytes)
}

function isHexCharByte (byte) {
// 0-9 A-F a-f
return (byte >= 0x30 && byte <= 0x39) || (byte >= 0x41 && byte <= 0x46) || (byte >= 0x61 && byte <= 0x66)
}

// https://url.spec.whatwg.org/#percent-decode
/** @param {Uint8Array} input */
function percentDecode (input) {
const length = input.length
// 1. Let output be an empty byte sequence.
/** @type {number[]} */
const output = []

/** @type {Uint8Array} */
const output = new Uint8Array(length)
let j = 0
// 2. For each byte byte in input:
for (let i = 0; i < input.length; i++) {
for (let i = 0; i < length; ++i) {
const byte = input[i]

// 1. If byte is not 0x25 (%), then append byte to output.
if (byte !== 0x25) {
output.push(byte)
output[j++] = byte

// 2. Otherwise, if byte is 0x25 (%) and the next two bytes
// after byte in input are not in the ranges
Expand All @@ -210,9 +216,9 @@ function percentDecode (input) {
// to output.
} else if (
byte === 0x25 &&
!/^[0-9A-Fa-f]{2}$/i.test(String.fromCharCode(input[i + 1], input[i + 2]))
!(isHexCharByte(input[i + 1]) && isHexCharByte(input[i + 2]))
) {
output.push(0x25)
output[j++] = 0x25

// 3. Otherwise:
} else {
Expand All @@ -222,15 +228,15 @@ function percentDecode (input) {
const bytePoint = Number.parseInt(nextTwoBytes, 16)

// 2. Append a byte whose value is bytePoint to output.
output.push(bytePoint)
output[j++] = bytePoint

// 3. Skip the next two bytes in input.
i += 2
}
}

// 3. Return output.
return Uint8Array.from(output)
return length === j ? output : output.subarray(0, j)
}

// https://mimesniff.spec.whatwg.org/#parse-a-mime-type
Expand Down Expand Up @@ -410,19 +416,25 @@ function parseMIMEType (input) {
/** @param {string} data */
function forgivingBase64 (data) {
// 1. Remove all ASCII whitespace from data.
data = data.replace(/[\u0009\u000A\u000C\u000D\u0020]/g, '') // eslint-disable-line
data = data.replace(ASCII_WHITESPACE_REPLACE_REGEX, '') // eslint-disable-line

let dataLength = data.length
// 2. If data’s code point length divides by 4 leaving
// no remainder, then:
if (data.length % 4 === 0) {
if (dataLength % 4 === 0) {
// 1. If data ends with one or two U+003D (=) code points,
// then remove them from data.
data = data.replace(/=?=$/, '')
if (data.charCodeAt(dataLength - 1) === 0x003D) {
--dataLength
if (data.charCodeAt(dataLength - 1) === 0x003D) {
--dataLength
}
}
}

// 3. If data’s code point length divides by 4 leaving
// a remainder of 1, then return failure.
if (data.length % 4 === 1) {
if (dataLength % 4 === 1) {
return 'failure'
}

Expand All @@ -431,18 +443,12 @@ function forgivingBase64 (data) {
// U+002F (/)
// ASCII alphanumeric
// then return failure.
if (/[^+/0-9A-Za-z]/.test(data)) {
if (/[^+/0-9A-Za-z]/.test(data.length === dataLength ? data : data.substring(0, dataLength))) {
return 'failure'
}

const binary = atob(data)
const bytes = new Uint8Array(binary.length)

for (let byte = 0; byte < binary.length; byte++) {
bytes[byte] = binary.charCodeAt(byte)
}

return bytes
const buffer = Buffer.from(data, 'base64')
return new Uint8Array(buffer.buffer, buffer.byteOffset, buffer.byteLength)
}

// https://fetch.spec.whatwg.org/#collect-an-http-quoted-string
Expand Down Expand Up @@ -570,55 +576,54 @@ function serializeAMimeType (mimeType) {

/**
* @see https://fetch.spec.whatwg.org/#http-whitespace
* @param {string} char
* @param {number} char
*/
function isHTTPWhiteSpace (char) {
return char === '\r' || char === '\n' || char === '\t' || char === ' '
// "\r\n\t "
return char === 0x00d || char === 0x00a || char === 0x009 || char === 0x020
}

/**
* @see https://fetch.spec.whatwg.org/#http-whitespace
* @param {string} str
* @param {boolean} [leading=true]
* @param {boolean} [trailing=true]
*/
function removeHTTPWhitespace (str, leading = true, trailing = true) {
let lead = 0
let trail = str.length - 1

let i = 0; let j = str.length
if (leading) {
for (; lead < str.length && isHTTPWhiteSpace(str[lead]); lead++);
while (j > i && isHTTPWhiteSpace(str.charCodeAt(i))) --i
}

if (trailing) {
for (; trail > 0 && isHTTPWhiteSpace(str[trail]); trail--);
while (j > i && isHTTPWhiteSpace(str.charCodeAt(j - 1))) --j
}

return str.slice(lead, trail + 1)
return i === 0 && j === str.length ? str : str.substring(i, j)
}

/**
* @see https://infra.spec.whatwg.org/#ascii-whitespace
* @param {string} char
* @param {number} char
*/
function isASCIIWhitespace (char) {
return char === '\r' || char === '\n' || char === '\t' || char === '\f' || char === ' '
// "\r\n\t\f "
return char === 0x00d || char === 0x00a || char === 0x009 || char === 0x00c || char === 0x020
}

/**
* @see https://infra.spec.whatwg.org/#strip-leading-and-trailing-ascii-whitespace
* @param {string} str
* @param {boolean} [leading=true]
* @param {boolean} [trailing=true]
*/
function removeASCIIWhitespace (str, leading = true, trailing = true) {
let lead = 0
let trail = str.length - 1

let i = 0; let j = str.length
if (leading) {
for (; lead < str.length && isASCIIWhitespace(str[lead]); lead++);
while (j > i && isASCIIWhitespace(str.charCodeAt(i))) --i
}

if (trailing) {
for (; trail > 0 && isASCIIWhitespace(str[trail]); trail--);
while (j > i && isASCIIWhitespace(str.charCodeAt(j - 1))) --j
}

return str.slice(lead, trail + 1)
return i === 0 && j === str.length ? str : str.substring(i, j)
}

module.exports = {
Expand Down
21 changes: 13 additions & 8 deletions lib/fetch/util.js
Original file line number Diff line number Diff line change
Expand Up @@ -897,22 +897,27 @@ function isReadableStreamLike (stream) {
)
}

const MAXIMUM_ARGUMENT_LENGTH = 65535

/**
* @see https://infra.spec.whatwg.org/#isomorphic-decode
* @param {number[]|Uint8Array} input
* @param {Uint8Array} input
*/
function isomorphicDecode (input) {
// 1. To isomorphic decode a byte sequence input, return a string whose code point
// length is equal to input’s length and whose code points have the same values
// as the values of input’s bytes, in the same order.

if (input.length < MAXIMUM_ARGUMENT_LENGTH) {
return String.fromCharCode(...input)
const length = input.length
if ((2 << 15) - 1 > length) {
return String.fromCharCode.apply(null, input)
}

return input.reduce((previous, current) => previous + String.fromCharCode(current), '')
let result = ''; let i = 0
let addition = (2 << 15) - 1
while (i < length) {
if (i + addition > length) {
addition = length - i
}
result += String.fromCharCode.apply(null, input.subarray(i, i += addition))
}
return result
}

/**
Expand Down

0 comments on commit 851084a

Please sign in to comment.