Skip to content

fix(NODE-6124): utf8 validation is insufficiently strict #680

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
May 2, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 22 additions & 12 deletions etc/rollup/rollup-plugin-require-vendor/require_vendor.mjs
Original file line number Diff line number Diff line change
@@ -1,9 +1,12 @@
import MagicString from 'magic-string';

const REQUIRE_POLYFILLS =
`const { TextEncoder, TextDecoder } = require('../vendor/text-encoding');
const REQUIRE_WEB_UTILS_POLYFILLS =
`const { TextEncoder } = require('../vendor/text-encoding');
const { encode: btoa, decode: atob } = require('../vendor/base64');\n`

const REQUIRE_PARSE_UTF8_POLYFILLS =
`const { TextDecoder } = require('../vendor/text-encoding');\n`;

export class RequireVendor {
/**
* Take the compiled source code input; types are expected to already have been removed.
Expand All @@ -14,17 +17,24 @@ export class RequireVendor {
* @returns {{ code: string; map: import('magic-string').SourceMap }}
*/
transform(code, id) {
if (!id.includes('web_byte_utils')) {
return;
}
if (id.includes('parse_utf8')) {
// MagicString lets us edit the source code and still generate an accurate source map
const magicString = new MagicString(code);
magicString.prepend(REQUIRE_PARSE_UTF8_POLYFILLS);

// MagicString lets us edit the source code and still generate an accurate source map
const magicString = new MagicString(code);
magicString.prepend(REQUIRE_POLYFILLS);
return {
code: magicString.toString(),
map: magicString.generateMap({ hires: true })
};
} else if (id.includes('web_byte_utils')) {
// MagicString lets us edit the source code and still generate an accurate source map
const magicString = new MagicString(code);
magicString.prepend(REQUIRE_WEB_UTILS_POLYFILLS);

return {
code: magicString.toString(),
map: magicString.generateMap({ hires: true })
};
return {
code: magicString.toString(),
map: magicString.generateMap({ hires: true })
};
}
}
}
4 changes: 2 additions & 2 deletions src/binary.ts
Original file line number Diff line number Diff line change
Expand Up @@ -224,8 +224,8 @@ export class Binary extends BSONValue {
if (encoding === 'hex') return ByteUtils.toHex(this.buffer.subarray(0, this.position));
if (encoding === 'base64') return ByteUtils.toBase64(this.buffer.subarray(0, this.position));
if (encoding === 'utf8' || encoding === 'utf-8')
return ByteUtils.toUTF8(this.buffer, 0, this.position);
return ByteUtils.toUTF8(this.buffer, 0, this.position);
return ByteUtils.toUTF8(this.buffer, 0, this.position, false);
return ByteUtils.toUTF8(this.buffer, 0, this.position, false);
}

/** @internal */
Expand Down
35 changes: 35 additions & 0 deletions src/parse_utf8.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
import { BSONError } from './error';

type TextDecoder = {
readonly encoding: string;
readonly fatal: boolean;
readonly ignoreBOM: boolean;
decode(input?: Uint8Array): string;
};
type TextDecoderConstructor = {
new (label: 'utf8', options: { fatal: boolean; ignoreBOM?: boolean }): TextDecoder;
};

// parse utf8 globals
declare const TextDecoder: TextDecoderConstructor;
let TextDecoderFatal: TextDecoder;
let TextDecoderNonFatal: TextDecoder;

/**
* Determines if the passed in bytes are valid utf8
* @param bytes - An array of 8-bit bytes. Must be indexable and have length property
* @param start - The index to start validating
* @param end - The index to end validating
*/
export function parseUtf8(buffer: Uint8Array, start: number, end: number, fatal: boolean): string {
if (fatal) {
TextDecoderFatal ??= new TextDecoder('utf8', { fatal: true });
try {
return TextDecoderFatal.decode(buffer.subarray(start, end));
} catch (cause) {
throw new BSONError('Invalid UTF-8 string in BSON document');
}
}
TextDecoderNonFatal ??= new TextDecoder('utf8', { fatal: false });
return TextDecoderNonFatal.decode(buffer.subarray(start, end));
}
47 changes: 10 additions & 37 deletions src/parser/deserializer.ts
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@ import { BSONRegExp } from '../regexp';
import { BSONSymbol } from '../symbol';
import { Timestamp } from '../timestamp';
import { BSONDataView, ByteUtils } from '../utils/byte_utils';
import { validateUtf8 } from '../validate_utf8';

/** @public */
export interface DeserializeOptions {
Expand Down Expand Up @@ -236,7 +235,7 @@ function deserializeObject(
if (i >= buffer.byteLength) throw new BSONError('Bad BSON Document: illegal CString');

// Represents the key
const name = isArray ? arrayIndex++ : ByteUtils.toUTF8(buffer, index, i);
const name = isArray ? arrayIndex++ : ByteUtils.toUTF8(buffer, index, i, false);

// shouldValidateKey is true if the key should be validated, false otherwise
let shouldValidateKey = true;
Expand Down Expand Up @@ -266,7 +265,7 @@ function deserializeObject(
) {
throw new BSONError('bad string length in bson');
}
value = getValidatedString(buffer, index, index + stringSize - 1, shouldValidateKey);
value = ByteUtils.toUTF8(buffer, index, index + stringSize - 1, shouldValidateKey);
index = index + stringSize;
} else if (elementType === constants.BSON_DATA_OID) {
const oid = ByteUtils.allocate(12);
Expand Down Expand Up @@ -476,7 +475,7 @@ function deserializeObject(
// If are at the end of the buffer there is a problem with the document
if (i >= buffer.length) throw new BSONError('Bad BSON Document: illegal CString');
// Return the C string
const source = ByteUtils.toUTF8(buffer, index, i);
const source = ByteUtils.toUTF8(buffer, index, i, false);
// Create the regexp
index = i + 1;

Expand All @@ -489,7 +488,7 @@ function deserializeObject(
// If are at the end of the buffer there is a problem with the document
if (i >= buffer.length) throw new BSONError('Bad BSON Document: illegal CString');
// Return the C string
const regExpOptions = ByteUtils.toUTF8(buffer, index, i);
const regExpOptions = ByteUtils.toUTF8(buffer, index, i, false);
index = i + 1;

// For each option add the corresponding one for javascript
Expand Down Expand Up @@ -521,7 +520,7 @@ function deserializeObject(
// If are at the end of the buffer there is a problem with the document
if (i >= buffer.length) throw new BSONError('Bad BSON Document: illegal CString');
// Return the C string
const source = ByteUtils.toUTF8(buffer, index, i);
const source = ByteUtils.toUTF8(buffer, index, i, false);
index = i + 1;

// Get the start search index
Expand All @@ -533,7 +532,7 @@ function deserializeObject(
// If are at the end of the buffer there is a problem with the document
if (i >= buffer.length) throw new BSONError('Bad BSON Document: illegal CString');
// Return the C string
const regExpOptions = ByteUtils.toUTF8(buffer, index, i);
const regExpOptions = ByteUtils.toUTF8(buffer, index, i, false);
index = i + 1;

// Set the object
Expand All @@ -551,7 +550,7 @@ function deserializeObject(
) {
throw new BSONError('bad string length in bson');
}
const symbol = getValidatedString(buffer, index, index + stringSize - 1, shouldValidateKey);
const symbol = ByteUtils.toUTF8(buffer, index, index + stringSize - 1, shouldValidateKey);
value = promoteValues ? symbol : new BSONSymbol(symbol);
index = index + stringSize;
} else if (elementType === constants.BSON_DATA_TIMESTAMP) {
Expand Down Expand Up @@ -587,7 +586,7 @@ function deserializeObject(
) {
throw new BSONError('bad string length in bson');
}
const functionString = getValidatedString(
const functionString = ByteUtils.toUTF8(
buffer,
index,
index + stringSize - 1,
Expand Down Expand Up @@ -626,7 +625,7 @@ function deserializeObject(
}

// Javascript function
const functionString = getValidatedString(
const functionString = ByteUtils.toUTF8(
buffer,
index,
index + stringSize - 1,
Expand Down Expand Up @@ -673,12 +672,7 @@ function deserializeObject(
)
throw new BSONError('bad string length in bson');
// Namespace
if (validation != null && validation.utf8) {
if (!validateUtf8(buffer, index, index + stringSize - 1)) {
throw new BSONError('Invalid UTF-8 string in BSON document');
}
}
const namespace = ByteUtils.toUTF8(buffer, index, index + stringSize - 1);
const namespace = ByteUtils.toUTF8(buffer, index, index + stringSize - 1, shouldValidateKey);
// Update parse index position
index = index + stringSize;

Expand Down Expand Up @@ -728,24 +722,3 @@ function deserializeObject(

return object;
}

function getValidatedString(
buffer: Uint8Array,
start: number,
end: number,
shouldValidateUtf8: boolean
) {
const value = ByteUtils.toUTF8(buffer, start, end);
// if utf8 validation is on, do the check
if (shouldValidateUtf8) {
for (let i = 0; i < value.length; i++) {
if (value.charCodeAt(i) === 0xfffd) {
if (!validateUtf8(buffer, start, end)) {
throw new BSONError('Invalid UTF-8 string in BSON document');
}
break;
}
}
}
return value;
}
2 changes: 1 addition & 1 deletion src/utils/byte_utils.ts
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ export type ByteUtils = {
/** Create a Uint8Array containing utf8 code units from a string */
fromUTF8: (text: string) => Uint8Array;
/** Create a string from utf8 code units */
toUTF8: (buffer: Uint8Array, start: number, end: number) => string;
toUTF8: (buffer: Uint8Array, start: number, end: number, fatal: boolean) => string;
/** Get the utf8 code unit count from a string if it were to be transformed to utf8 */
utf8ByteLength: (input: string) => number;
/** Encode UTF8 bytes generated from `source` string into `destination` at byteOffset. Returns the number of bytes encoded. */
Expand Down
13 changes: 11 additions & 2 deletions src/utils/node_byte_utils.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import { BSONError } from '../error';
import { parseUtf8 } from '../parse_utf8';

type NodeJsEncoding = 'base64' | 'hex' | 'utf8' | 'binary';
type NodeJsBuffer = ArrayBufferView &
Expand Down Expand Up @@ -125,8 +126,16 @@ export const nodeJsByteUtils = {
return Buffer.from(text, 'utf8');
},

toUTF8(buffer: Uint8Array, start: number, end: number): string {
return nodeJsByteUtils.toLocalBufferType(buffer).toString('utf8', start, end);
toUTF8(buffer: Uint8Array, start: number, end: number, fatal: boolean): string {
const value = nodeJsByteUtils.toLocalBufferType(buffer).toString('utf8', start, end);
if (fatal) {
for (let i = 0; i < value.length; i++) {
if (value.charCodeAt(i) === 0xfffd) {
parseUtf8(buffer, start, end, fatal);
}
}
}
return value;
},

utf8ByteLength(input: string): number {
Expand Down
16 changes: 3 additions & 13 deletions src/utils/web_byte_utils.ts
Original file line number Diff line number Diff line change
@@ -1,14 +1,5 @@
import { BSONError } from '../error';

type TextDecoder = {
readonly encoding: string;
readonly fatal: boolean;
readonly ignoreBOM: boolean;
decode(input?: Uint8Array): string;
};
type TextDecoderConstructor = {
new (label: 'utf8', options: { fatal: boolean; ignoreBOM?: boolean }): TextDecoder;
};
import { parseUtf8 } from '../parse_utf8';

type TextEncoder = {
readonly encoding: string;
Expand All @@ -19,7 +10,6 @@ type TextEncoderConstructor = {
};

// Web global
declare const TextDecoder: TextDecoderConstructor;
declare const TextEncoder: TextEncoderConstructor;
declare const atob: (base64: string) => string;
declare const btoa: (binary: string) => string;
Expand Down Expand Up @@ -172,8 +162,8 @@ export const webByteUtils = {
return new TextEncoder().encode(text);
},

toUTF8(uint8array: Uint8Array, start: number, end: number): string {
return new TextDecoder('utf8', { fatal: false }).decode(uint8array.slice(start, end));
toUTF8(uint8array: Uint8Array, start: number, end: number, fatal: boolean): string {
return parseUtf8(uint8array, start, end, fatal);
},

utf8ByteLength(input: string): number {
Expand Down
47 changes: 0 additions & 47 deletions src/validate_utf8.ts

This file was deleted.

19 changes: 16 additions & 3 deletions test/node/byte_utils.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ import { webByteUtils } from '../../src/utils/web_byte_utils';
import * as sinon from 'sinon';
import { loadCJSModuleBSON, loadReactNativeCJSModuleBSON, loadESModuleBSON } from '../load_bson';
import * as crypto from 'node:crypto';
import { utf8WebPlatformSpecTests } from './data/utf8_wpt_error_cases';

type ByteUtilTest<K extends keyof ByteUtils> = {
name: string;
Expand Down Expand Up @@ -400,20 +401,32 @@ const fromUTF8Tests: ByteUtilTest<'fromUTF8'>[] = [
const toUTF8Tests: ByteUtilTest<'toUTF8'>[] = [
{
name: 'should create utf8 string from buffer input',
inputs: [Buffer.from('abc\u{1f913}', 'utf8')],
inputs: [Buffer.from('abc\u{1f913}', 'utf8'), 0, 7, false],
expectation({ output, error }) {
expect(error).to.be.null;
expect(output).to.deep.equal(Buffer.from('abc\u{1f913}', 'utf8').toString('utf8'));
}
},
{
name: 'should return empty string for empty buffer input',
inputs: [Buffer.alloc(0)],
inputs: [Buffer.alloc(0), 0, 0, false],
expectation({ output, error }) {
expect(error).to.be.null;
expect(output).to.be.a('string').with.lengthOf(0);
}
}
},
...utf8WebPlatformSpecTests.map(t => ({
name: t.name,
inputs: [Uint8Array.from(t.input), 0, t.input.length, true] as [
buffer: Uint8Array,
start: number,
end: number,
fatal: boolean
],
expectation({ error }) {
expect(error).to.match(/Invalid UTF-8 string in BSON document/i);
}
}))
];
const utf8ByteLengthTests: ByteUtilTest<'utf8ByteLength'>[] = [
{
Expand Down
Loading