Skip to content

Commit

Permalink
fix(NODE-6124): utf8 validation is insufficiently strict (#680)
Browse files Browse the repository at this point in the history
  • Loading branch information
aditi-khare-mongoDB authored May 2, 2024
1 parent d6b15f8 commit 6460989
Show file tree
Hide file tree
Showing 12 changed files with 213 additions and 119 deletions.
34 changes: 22 additions & 12 deletions etc/rollup/rollup-plugin-require-vendor/require_vendor.mjs
Original file line number Diff line number Diff line change
@@ -1,9 +1,12 @@
import MagicString from 'magic-string';

const REQUIRE_POLYFILLS =
`const { TextEncoder, TextDecoder } = require('../vendor/text-encoding');
const REQUIRE_WEB_UTILS_POLYFILLS =
`const { TextEncoder } = require('../vendor/text-encoding');
const { encode: btoa, decode: atob } = require('../vendor/base64');\n`

const REQUIRE_PARSE_UTF8_POLYFILLS =
`const { TextDecoder } = require('../vendor/text-encoding');\n`;

export class RequireVendor {
/**
* Take the compiled source code input; types are expected to already have been removed.
Expand All @@ -14,17 +17,24 @@ export class RequireVendor {
* @returns {{ code: string; map: import('magic-string').SourceMap }}
*/
transform(code, id) {
if (!id.includes('web_byte_utils')) {
return;
}
if (id.includes('parse_utf8')) {
// MagicString lets us edit the source code and still generate an accurate source map
const magicString = new MagicString(code);
magicString.prepend(REQUIRE_PARSE_UTF8_POLYFILLS);

// MagicString lets us edit the source code and still generate an accurate source map
const magicString = new MagicString(code);
magicString.prepend(REQUIRE_POLYFILLS);
return {
code: magicString.toString(),
map: magicString.generateMap({ hires: true })
};
} else if (id.includes('web_byte_utils')) {
// MagicString lets us edit the source code and still generate an accurate source map
const magicString = new MagicString(code);
magicString.prepend(REQUIRE_WEB_UTILS_POLYFILLS);

return {
code: magicString.toString(),
map: magicString.generateMap({ hires: true })
};
return {
code: magicString.toString(),
map: magicString.generateMap({ hires: true })
};
}
}
}
4 changes: 2 additions & 2 deletions src/binary.ts
Original file line number Diff line number Diff line change
Expand Up @@ -224,8 +224,8 @@ export class Binary extends BSONValue {
if (encoding === 'hex') return ByteUtils.toHex(this.buffer.subarray(0, this.position));
if (encoding === 'base64') return ByteUtils.toBase64(this.buffer.subarray(0, this.position));
if (encoding === 'utf8' || encoding === 'utf-8')
return ByteUtils.toUTF8(this.buffer, 0, this.position);
return ByteUtils.toUTF8(this.buffer, 0, this.position);
return ByteUtils.toUTF8(this.buffer, 0, this.position, false);
return ByteUtils.toUTF8(this.buffer, 0, this.position, false);
}

/** @internal */
Expand Down
35 changes: 35 additions & 0 deletions src/parse_utf8.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
import { BSONError } from './error';

type TextDecoder = {
readonly encoding: string;
readonly fatal: boolean;
readonly ignoreBOM: boolean;
decode(input?: Uint8Array): string;
};
type TextDecoderConstructor = {
new (label: 'utf8', options: { fatal: boolean; ignoreBOM?: boolean }): TextDecoder;
};

// parse utf8 globals
declare const TextDecoder: TextDecoderConstructor;
let TextDecoderFatal: TextDecoder;
let TextDecoderNonFatal: TextDecoder;

/**
* Determines if the passed in bytes are valid utf8
* @param bytes - An array of 8-bit bytes. Must be indexable and have length property
* @param start - The index to start validating
* @param end - The index to end validating
*/
export function parseUtf8(buffer: Uint8Array, start: number, end: number, fatal: boolean): string {
if (fatal) {
TextDecoderFatal ??= new TextDecoder('utf8', { fatal: true });
try {
return TextDecoderFatal.decode(buffer.subarray(start, end));
} catch (cause) {
throw new BSONError('Invalid UTF-8 string in BSON document');
}
}
TextDecoderNonFatal ??= new TextDecoder('utf8', { fatal: false });
return TextDecoderNonFatal.decode(buffer.subarray(start, end));
}
47 changes: 10 additions & 37 deletions src/parser/deserializer.ts
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@ import { BSONRegExp } from '../regexp';
import { BSONSymbol } from '../symbol';
import { Timestamp } from '../timestamp';
import { BSONDataView, ByteUtils } from '../utils/byte_utils';
import { validateUtf8 } from '../validate_utf8';

/** @public */
export interface DeserializeOptions {
Expand Down Expand Up @@ -236,7 +235,7 @@ function deserializeObject(
if (i >= buffer.byteLength) throw new BSONError('Bad BSON Document: illegal CString');

// Represents the key
const name = isArray ? arrayIndex++ : ByteUtils.toUTF8(buffer, index, i);
const name = isArray ? arrayIndex++ : ByteUtils.toUTF8(buffer, index, i, false);

// shouldValidateKey is true if the key should be validated, false otherwise
let shouldValidateKey = true;
Expand Down Expand Up @@ -266,7 +265,7 @@ function deserializeObject(
) {
throw new BSONError('bad string length in bson');
}
value = getValidatedString(buffer, index, index + stringSize - 1, shouldValidateKey);
value = ByteUtils.toUTF8(buffer, index, index + stringSize - 1, shouldValidateKey);
index = index + stringSize;
} else if (elementType === constants.BSON_DATA_OID) {
const oid = ByteUtils.allocate(12);
Expand Down Expand Up @@ -476,7 +475,7 @@ function deserializeObject(
// If are at the end of the buffer there is a problem with the document
if (i >= buffer.length) throw new BSONError('Bad BSON Document: illegal CString');
// Return the C string
const source = ByteUtils.toUTF8(buffer, index, i);
const source = ByteUtils.toUTF8(buffer, index, i, false);
// Create the regexp
index = i + 1;

Expand All @@ -489,7 +488,7 @@ function deserializeObject(
// If are at the end of the buffer there is a problem with the document
if (i >= buffer.length) throw new BSONError('Bad BSON Document: illegal CString');
// Return the C string
const regExpOptions = ByteUtils.toUTF8(buffer, index, i);
const regExpOptions = ByteUtils.toUTF8(buffer, index, i, false);
index = i + 1;

// For each option add the corresponding one for javascript
Expand Down Expand Up @@ -521,7 +520,7 @@ function deserializeObject(
// If are at the end of the buffer there is a problem with the document
if (i >= buffer.length) throw new BSONError('Bad BSON Document: illegal CString');
// Return the C string
const source = ByteUtils.toUTF8(buffer, index, i);
const source = ByteUtils.toUTF8(buffer, index, i, false);
index = i + 1;

// Get the start search index
Expand All @@ -533,7 +532,7 @@ function deserializeObject(
// If are at the end of the buffer there is a problem with the document
if (i >= buffer.length) throw new BSONError('Bad BSON Document: illegal CString');
// Return the C string
const regExpOptions = ByteUtils.toUTF8(buffer, index, i);
const regExpOptions = ByteUtils.toUTF8(buffer, index, i, false);
index = i + 1;

// Set the object
Expand All @@ -551,7 +550,7 @@ function deserializeObject(
) {
throw new BSONError('bad string length in bson');
}
const symbol = getValidatedString(buffer, index, index + stringSize - 1, shouldValidateKey);
const symbol = ByteUtils.toUTF8(buffer, index, index + stringSize - 1, shouldValidateKey);
value = promoteValues ? symbol : new BSONSymbol(symbol);
index = index + stringSize;
} else if (elementType === constants.BSON_DATA_TIMESTAMP) {
Expand Down Expand Up @@ -587,7 +586,7 @@ function deserializeObject(
) {
throw new BSONError('bad string length in bson');
}
const functionString = getValidatedString(
const functionString = ByteUtils.toUTF8(
buffer,
index,
index + stringSize - 1,
Expand Down Expand Up @@ -626,7 +625,7 @@ function deserializeObject(
}

// Javascript function
const functionString = getValidatedString(
const functionString = ByteUtils.toUTF8(
buffer,
index,
index + stringSize - 1,
Expand Down Expand Up @@ -673,12 +672,7 @@ function deserializeObject(
)
throw new BSONError('bad string length in bson');
// Namespace
if (validation != null && validation.utf8) {
if (!validateUtf8(buffer, index, index + stringSize - 1)) {
throw new BSONError('Invalid UTF-8 string in BSON document');
}
}
const namespace = ByteUtils.toUTF8(buffer, index, index + stringSize - 1);
const namespace = ByteUtils.toUTF8(buffer, index, index + stringSize - 1, shouldValidateKey);
// Update parse index position
index = index + stringSize;

Expand Down Expand Up @@ -728,24 +722,3 @@ function deserializeObject(

return object;
}

function getValidatedString(
buffer: Uint8Array,
start: number,
end: number,
shouldValidateUtf8: boolean
) {
const value = ByteUtils.toUTF8(buffer, start, end);
// if utf8 validation is on, do the check
if (shouldValidateUtf8) {
for (let i = 0; i < value.length; i++) {
if (value.charCodeAt(i) === 0xfffd) {
if (!validateUtf8(buffer, start, end)) {
throw new BSONError('Invalid UTF-8 string in BSON document');
}
break;
}
}
}
return value;
}
2 changes: 1 addition & 1 deletion src/utils/byte_utils.ts
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ export type ByteUtils = {
/** Create a Uint8Array containing utf8 code units from a string */
fromUTF8: (text: string) => Uint8Array;
/** Create a string from utf8 code units */
toUTF8: (buffer: Uint8Array, start: number, end: number) => string;
toUTF8: (buffer: Uint8Array, start: number, end: number, fatal: boolean) => string;
/** Get the utf8 code unit count from a string if it were to be transformed to utf8 */
utf8ByteLength: (input: string) => number;
/** Encode UTF8 bytes generated from `source` string into `destination` at byteOffset. Returns the number of bytes encoded. */
Expand Down
13 changes: 11 additions & 2 deletions src/utils/node_byte_utils.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import { BSONError } from '../error';
import { parseUtf8 } from '../parse_utf8';

type NodeJsEncoding = 'base64' | 'hex' | 'utf8' | 'binary';
type NodeJsBuffer = ArrayBufferView &
Expand Down Expand Up @@ -125,8 +126,16 @@ export const nodeJsByteUtils = {
return Buffer.from(text, 'utf8');
},

toUTF8(buffer: Uint8Array, start: number, end: number): string {
return nodeJsByteUtils.toLocalBufferType(buffer).toString('utf8', start, end);
toUTF8(buffer: Uint8Array, start: number, end: number, fatal: boolean): string {
const value = nodeJsByteUtils.toLocalBufferType(buffer).toString('utf8', start, end);
if (fatal) {
for (let i = 0; i < value.length; i++) {
if (value.charCodeAt(i) === 0xfffd) {
parseUtf8(buffer, start, end, fatal);
}
}
}
return value;
},

utf8ByteLength(input: string): number {
Expand Down
16 changes: 3 additions & 13 deletions src/utils/web_byte_utils.ts
Original file line number Diff line number Diff line change
@@ -1,14 +1,5 @@
import { BSONError } from '../error';

type TextDecoder = {
readonly encoding: string;
readonly fatal: boolean;
readonly ignoreBOM: boolean;
decode(input?: Uint8Array): string;
};
type TextDecoderConstructor = {
new (label: 'utf8', options: { fatal: boolean; ignoreBOM?: boolean }): TextDecoder;
};
import { parseUtf8 } from '../parse_utf8';

type TextEncoder = {
readonly encoding: string;
Expand All @@ -19,7 +10,6 @@ type TextEncoderConstructor = {
};

// Web global
declare const TextDecoder: TextDecoderConstructor;
declare const TextEncoder: TextEncoderConstructor;
declare const atob: (base64: string) => string;
declare const btoa: (binary: string) => string;
Expand Down Expand Up @@ -172,8 +162,8 @@ export const webByteUtils = {
return new TextEncoder().encode(text);
},

toUTF8(uint8array: Uint8Array, start: number, end: number): string {
return new TextDecoder('utf8', { fatal: false }).decode(uint8array.slice(start, end));
toUTF8(uint8array: Uint8Array, start: number, end: number, fatal: boolean): string {
return parseUtf8(uint8array, start, end, fatal);
},

utf8ByteLength(input: string): number {
Expand Down
47 changes: 0 additions & 47 deletions src/validate_utf8.ts

This file was deleted.

19 changes: 16 additions & 3 deletions test/node/byte_utils.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ import { webByteUtils } from '../../src/utils/web_byte_utils';
import * as sinon from 'sinon';
import { loadCJSModuleBSON, loadReactNativeCJSModuleBSON, loadESModuleBSON } from '../load_bson';
import * as crypto from 'node:crypto';
import { utf8WebPlatformSpecTests } from './data/utf8_wpt_error_cases';

type ByteUtilTest<K extends keyof ByteUtils> = {
name: string;
Expand Down Expand Up @@ -400,20 +401,32 @@ const fromUTF8Tests: ByteUtilTest<'fromUTF8'>[] = [
const toUTF8Tests: ByteUtilTest<'toUTF8'>[] = [
{
name: 'should create utf8 string from buffer input',
inputs: [Buffer.from('abc\u{1f913}', 'utf8')],
inputs: [Buffer.from('abc\u{1f913}', 'utf8'), 0, 7, false],
expectation({ output, error }) {
expect(error).to.be.null;
expect(output).to.deep.equal(Buffer.from('abc\u{1f913}', 'utf8').toString('utf8'));
}
},
{
name: 'should return empty string for empty buffer input',
inputs: [Buffer.alloc(0)],
inputs: [Buffer.alloc(0), 0, 0, false],
expectation({ output, error }) {
expect(error).to.be.null;
expect(output).to.be.a('string').with.lengthOf(0);
}
}
},
...utf8WebPlatformSpecTests.map(t => ({
name: t.name,
inputs: [Uint8Array.from(t.input), 0, t.input.length, true] as [
buffer: Uint8Array,
start: number,
end: number,
fatal: boolean
],
expectation({ error }) {
expect(error).to.match(/Invalid UTF-8 string in BSON document/i);
}
}))
];
const utf8ByteLengthTests: ByteUtilTest<'utf8ByteLength'>[] = [
{
Expand Down
Loading

0 comments on commit 6460989

Please sign in to comment.