Skip to content

Commit

Permalink
Switched UTF string encoding approach to handle multibyte characters
Browse files Browse the repository at this point in the history
  • Loading branch information
Brian Vaughn committed Sep 27, 2021
1 parent 8464d69 commit 5b8ea25
Show file tree
Hide file tree
Showing 4 changed files with 85 additions and 53 deletions.
3 changes: 0 additions & 3 deletions packages/react-devtools-shared/src/__tests__/setupEnv.js
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,3 @@ global.process.env.DARK_MODE_DIMMED_LOG_COLOR = DARK_MODE_DIMMED_LOG_COLOR;
global.process.env.LIGHT_MODE_DIMMED_WARNING_COLOR = LIGHT_MODE_DIMMED_WARNING_COLOR;
global.process.env.LIGHT_MODE_DIMMED_ERROR_COLOR = LIGHT_MODE_DIMMED_ERROR_COLOR;
global.process.env.LIGHT_MODE_DIMMED_LOG_COLOR = LIGHT_MODE_DIMMED_LOG_COLOR;

global.TextEncoder = require('util').TextEncoder;
global.TextDecoder = require('util').TextDecoder;
13 changes: 13 additions & 0 deletions packages/react-devtools-shared/src/__tests__/store-test.js
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,19 @@ describe('Store', () => {
`);
});

it('should handle multibyte character strings', () => {
const Component = () => null;
Component.displayName = '🟩💜🔵';

const container = document.createElement('div');

act(() => legacyRender(<Component />, container));
expect(store).toMatchInlineSnapshot(`
[root]
<🟩💜🔵>
`);
});

describe('collapseNodesByDefault:false', () => {
beforeEach(() => {
store.collapseNodesByDefault = false;
Expand Down
61 changes: 41 additions & 20 deletions packages/react-devtools-shared/src/backend/renderer.js
Original file line number Diff line number Diff line change
Expand Up @@ -1513,11 +1513,16 @@ export function attach(

type OperationsArray = Array<number>;

type StringTableEntry = {|
encodedString: Array<number>,
id: number,
|};

const pendingOperations: OperationsArray = [];
const pendingRealUnmountedIDs: Array<number> = [];
const pendingSimulatedUnmountedIDs: Array<number> = [];
let pendingOperationsQueue: Array<OperationsArray> | null = [];
const pendingStringTable: Map<string, number> = new Map();
const pendingStringTable: Map<string, StringTableEntry> = new Map();
let pendingStringTableLength: number = 0;
let pendingUnmountedRootID: number | null = null;

Expand Down Expand Up @@ -1735,13 +1740,19 @@ export function attach(
// Now fill in the string table.
// [stringTableLength, str1Length, ...str1, str2Length, ...str2, ...]
operations[i++] = pendingStringTableLength;
pendingStringTable.forEach((value, key) => {
operations[i++] = key.length;
const encodedKey = utfEncodeString(key);
for (let j = 0; j < encodedKey.length; j++) {
operations[i + j] = encodedKey[j];
pendingStringTable.forEach((entry, stringKey) => {
const encodedString = entry.encodedString;

// Don't use the string length.
// It won't work for multibyte characters (like emoji).
const length = encodedString.length;

operations[i++] = length;
for (let j = 0; j < length; j++) {
operations[i + j] = encodedString[j];
}
i += key.length;

i += length;
});

if (numUnmountIDs > 0) {
Expand Down Expand Up @@ -1788,21 +1799,31 @@ export function attach(
pendingStringTableLength = 0;
}

function getStringID(str: string | null): number {
if (str === null) {
function getStringID(string: string | null): number {
if (string === null) {
return 0;
}
const existingID = pendingStringTable.get(str);
if (existingID !== undefined) {
return existingID;
}
const stringID = pendingStringTable.size + 1;
pendingStringTable.set(str, stringID);
// The string table total length needs to account
// both for the string length, and for the array item
// that contains the length itself. Hence + 1.
pendingStringTableLength += str.length + 1;
return stringID;
const existingEntry = pendingStringTable.get(string);
if (existingEntry !== undefined) {
return existingEntry.id;
}

const id = pendingStringTable.size + 1;
const encodedString = utfEncodeString(string);

pendingStringTable.set(string, {
encodedString,
id,
});

// The string table total length needs to account both for the string length,
// and for the array item that contains the length itself.
//
// Don't use string length for this table.
// It won't work for multibyte characters (like emoji).
pendingStringTableLength += encodedString.length + 1;

return id;
}

function recordMount(fiber: Fiber, parentFiber: Fiber | null) {
Expand Down
61 changes: 31 additions & 30 deletions packages/react-devtools-shared/src/utils.js
Original file line number Diff line number Diff line change
Expand Up @@ -56,10 +56,7 @@ const cachedDisplayNames: WeakMap<Function, string> = new WeakMap();

// On large trees, encoding takes significant time.
// Try to reuse the already encoded strings.
const encodedStringCache: LRUCache<
string,
Array<number> | Uint8Array,
> = new LRU({
const encodedStringCache: LRUCache<string, Array<number>> = new LRU({
max: 1000,
});

Expand Down Expand Up @@ -128,42 +125,46 @@ export function getUID(): number {
return ++uidCounter;
}

const isTextEncoderSupported =
typeof TextDecoder === 'function' && typeof TextEncoder === 'function';

export function utfDecodeString(array: Array<number>): string {
if (isTextEncoderSupported) {
// Handles multi-byte characters; use if available.
return new TextDecoder().decode(new Uint8Array(array));
} else {
// Avoid spreading the array (e.g. String.fromCodePoint(...array))
// Functions arguments are first placed on the stack before the function is called
// which throws a RangeError for large arrays.
// See github.com/facebook/react/issues/22293
let string = '';
for (let i = 0; i < array.length; i++) {
const char = array[i];
string += String.fromCodePoint(char);
}
return string;
// Avoid spreading the array (e.g. String.fromCodePoint(...array))
// Functions arguments are first placed on the stack before the function is called
// which throws a RangeError for large arrays.
// See github.com/facebook/react/issues/22293
let string = '';
for (let i = 0; i < array.length; i++) {
const char = array[i];
string += String.fromCodePoint(char);
}
return string;
}

export function utfEncodeString(string: string): Array<number> | Uint8Array {
function surrogatePairToCodePoint(
charCode1: number,
charCode2: number,
): number {
return ((charCode1 & 0x3ff) << 10) + (charCode2 & 0x3ff) + 0x10000;
}

// Credit for this encoding approach goes to Tim Down:
// https://stackoverflow.com/questions/4877326/how-can-i-tell-if-a-string-contains-multibyte-characters-in-javascript
export function utfEncodeString(string: string): Array<number> {
const cached = encodedStringCache.get(string);
if (cached !== undefined) {
return cached;
}

let encoded;
if (isTextEncoderSupported) {
// Handles multi-byte characters; use if available.
encoded = new TextEncoder().encode(string);
} else {
encoded = new Array(string.length);
for (let i = 0; i < string.length; i++) {
encoded[i] = string.codePointAt(i);
const encoded = [];
let i = 0;
let charCode;
while (i < string.length) {
charCode = string.charCodeAt(i);
// Handle multibyte unicode characters (like emoji).
if ((charCode & 0xf800) === 0xd800) {
encoded.push(surrogatePairToCodePoint(charCode, string.charCodeAt(++i)));
} else {
encoded.push(charCode);
}
++i;
}

encodedStringCache.set(string, encoded);
Expand Down

0 comments on commit 5b8ea25

Please sign in to comment.