Skip to content

Commit

Permalink
[lexical] Bug Fix: Workaround for delete character with emoji graphem…
Browse files Browse the repository at this point in the history
…e customers that do not include non-BMP code points (#7175)
  • Loading branch information
etrepum authored Feb 18, 2025
1 parent 94db7c7 commit 25e5314
Show file tree
Hide file tree
Showing 6 changed files with 376 additions and 102 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,246 @@
/**
* Copyright (c) Meta Platforms, Inc. and affiliates.
*
* This source code is licensed under the MIT license found in the
* LICENSE file in the root directory of this source tree.
*
*/

import {
moveLeft,
moveRight,
pressBackspace,
selectAll,
} from '../keyboardShortcuts/index.mjs';
import {
assertHTML,
assertSelection,
focusEditor,
html,
initialize,
test,
} from '../utils/index.mjs';

// Regression tests for #7163
test.describe.configure({mode: 'parallel'});
test.describe('Grapheme deleteCharacter', () => {
test.beforeEach(({isPlainText, isCollab, page}) =>
initialize({isCollab, isPlainText, page}),
);
[
// Original scenarios
{
backspaceCount: 3,
caretDistance: 1,
description: 'grapheme cluster',
// Hangul grapheme cluster.
// https://www.compart.com/en/unicode/U+AC01
grapheme: '\u1100\u1161\u11A8',
},
{
backspaceCount: 2,
caretDistance: 1,
description: 'extended grapheme cluster',
// Tamil 'ni' grapheme cluster.
// http://unicode.org/reports/tr29/#Table_Sample_Grapheme_Clusters
grapheme: '\u0BA8\u0BBF',
},
{
backspaceCount: 4,
caretDistance: 1,
description: 'tailored grapheme cluster',

// Unclear why Firefox behaves differently here
firefoxCaretDistance: 2,

// Devangari 'kshi' tailored grapheme cluster.
// http://unicode.org/reports/tr29/#Table_Sample_Grapheme_Clusters
grapheme: '\u0915\u094D\u0937\u093F',
},
{
backspaceCount: 1,
caretDistance: 1,
description: 'Emoji sequence combined using zero-width joiners',
// https://emojipedia.org/family-woman-woman-girl-boy/
grapheme:
'\uD83D\uDC69\u200D\uD83D\uDC69\u200D\uD83D\uDC67\u200D\uD83D\uDC66',
},
{
backspaceCount: 1,
caretDistance: 1,
description: 'Emoji sequence with skin-tone modifier',
// https://emojipedia.org/clapping-hands-medium-skin-tone/
grapheme: '\uD83D\uDC4F\uD83C\uDFFD',
},
// New ones
{
backspaceCount: 2,
caretDistance: 1,
description: 'Arabic text with accent',
dir: 'rtl',
grapheme: '\u0647\u064e',
},
{
// Editors that do normalization would do this in 1 backspace
backspaceCount: 2,
caretDistance: 1,
description: 'Latin with decomposed combining character',
grapheme: 'n\u0303',
},
{
backspaceCount: 1,
caretDistance: 1,
description:
'Multiple codepoint Emoji with variation selector entirely in the BMP',
grapheme: '\u2764\ufe0f',
},
{
backspaceCount: 1,
caretDistance: 1,
description: 'Multiple codepoint keycap Emoji',
grapheme: '#\ufe0f\u20e3',
},
{
backspaceCount: 8,
caretDistance: 4,
description: 'Hindi',
// Unclear why this differs
firefoxCaretDistance: 5,

grapheme: '\u0905\u0928\u0941\u091a\u094d\u091b\u0947\u0926',
},
{
backspaceCount: 4,
caretDistance: 2,
description: 'Korean',
grapheme: '\u1103\u1167\u1109\u1170',
},
{
backspaceCount: 5,
caretDistance: 5,
description: 'Emojis outside the BMP',
grapheme: '\ud83c\udf37\ud83c\udf81\ud83d\udca9\ud83d\ude1c\ud83d\udc4d',
},
{
backspaceCount: 1,
caretDistance: 1,
description:
'ZWJ emoji cluster with variation selection that looks like 4 characters but treated as one',
grapheme:
'\ud83d\udc69\ud83c\udffd\u200d\ud83d\udc68\ud83c\udffd\u200d\ud83d\udc76\ud83c\udffd\u200d\ud83d\udc66\ud83c\udffd',
},
{
backspaceCount: 1,
caretDistance: 1,
description: 'Flag emoji with ZWJ and variation selector',
grapheme: '\ud83c\udff3\ufe0f\u200d\ud83c\udf08',
},
{
backspaceCount: 1,
caretDistance: 1,
description: 'Chinese for seaborgium (surrogate pair)',
grapheme: '\ud862\udf4e',
},
].forEach(
({
backspaceCount,
caretDistance,
description,
grapheme,
dir = 'ltr',
skip = false,
firefoxCaretDistance = undefined,
}) => {
test(description, async ({page, browserName, isCollab, isPlainText}) => {
// We are only concerned about input here, not collab.
test.skip(isCollab || skip);

// You can render a grapheme with escape sequences like this:
// const fmt = (s) => "'" + Array.from({ length: s.length }, (_, i) => `\\u${s.charCodeAt(i).toString(16).padStart(4, '0')}`).join('') + "'";
// e.g. from dev tools copy(fmt('emoji here')) and then paste it in your text editor
await focusEditor(page);
const codeUnits = grapheme.length;
await page.keyboard.type(description);
await page.keyboard.press('Enter');
const expectedInitialHTML = isPlainText
? html`
<p dir="ltr">
<span data-lexical-text="true">${description}</span>
<br />
<br />
</p>
`
: html`
<p dir="ltr">
<span data-lexical-text="true">${description}</span>
</p>
<p><br /></p>
`;
const expectedGraphemeHTML = isPlainText
? html`
<p dir="ltr">
<span data-lexical-text="true">${description}</span>
<br />
<span data-lexical-text="true">${grapheme}</span>
</p>
`
: html`
<p dir="ltr">
<span data-lexical-text="true">${description}</span>
</p>
<p dir="${dir}">
<span data-lexical-text="true">${grapheme}</span>
</p>
`;
await assertHTML(page, expectedInitialHTML, undefined, {
ignoreClasses: true,
ignoreInlineStyles: true,
});
await page.keyboard.insertText(grapheme);
await assertHTML(page, expectedGraphemeHTML, undefined, {
ignoreClasses: true,
ignoreInlineStyles: true,
});
const selectionFromOffset = (offset, path) => ({
anchorOffset: offset,
anchorPath: path,
focusOffset: offset,
focusPath: path,
});
const graphemePath = isPlainText ? [0, 2, 0] : [1, 0, 0];
const initialPath = isPlainText ? [0] : [1];
const initialOffset = isPlainText ? 2 : 0;
await assertSelection(
page,
selectionFromOffset(codeUnits, graphemePath),
);
if (dir !== 'rtl') {
// It's unclear why Firefox navigates differently in these specific,
// cases, but that's not pertinent to what we're testing
const testedCaretDistance =
browserName === 'firefox' && firefoxCaretDistance !== undefined
? firefoxCaretDistance
: caretDistance;
await moveLeft(page, testedCaretDistance);
await assertSelection(page, selectionFromOffset(0, graphemePath));
await moveRight(page, testedCaretDistance);
}
await assertSelection(
page,
selectionFromOffset(codeUnits, graphemePath),
);
await pressBackspace(page, backspaceCount);
await assertSelection(
page,
selectionFromOffset(initialOffset, initialPath),
);
await assertHTML(page, expectedInitialHTML, undefined, {
ignoreClasses: true,
ignoreInlineStyles: true,
});
await selectAll(page);
await pressBackspace(page);
});
},
);
});
9 changes: 5 additions & 4 deletions packages/lexical-react/src/LexicalHashtagPlugin.ts
Original file line number Diff line number Diff line change
Expand Up @@ -201,9 +201,10 @@ function getHashtagRegexStringChars(): Readonly<{
'\u1040-\u1049\u17E0-\u17E9\u1810-\u1819\u1946-\u194F\u19D0-\u19D9' +
'\uFF10-\uFF19';

// An alpha char is a unicode chars including unicode marks or
// letter or char in otherChars range
const alpha = unicodeLetters + unicodeAccents + otherChars;
// An alpha char is a unicode chars excluding unicode combining marks
// but including other chars, a hashtag must start with one of these,
// it does not make sense to have a combining mark before a base character.
const alpha = unicodeLetters + otherChars;

// A numeric character is any with the number digit property, or
// underscore. These characters can be included in hashtags, but a hashtag
Expand All @@ -212,7 +213,7 @@ function getHashtagRegexStringChars(): Readonly<{

// Alphanumeric char is any alpha char or a unicode char with decimal
// number property \p{Nd}
const alphanumeric = alpha + numeric;
const alphanumeric = alpha + unicodeAccents + numeric;
const hashChars = '#\\uFF03'; // normal '#' or full-width '#'

return {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -239,39 +239,6 @@ describe('LexicalSelection tests', () => {
expect(actualSelection.focusOffset).toBe(expectedSelection.focusOffset);
}

// eslint-disable-next-line @typescript-eslint/no-unused-vars
const GRAPHEME_SCENARIOS = [
{
description: 'grapheme cluster',
// Hangul grapheme cluster.
// https://www.compart.com/en/unicode/U+AC01
grapheme: '\u1100\u1161\u11A8',
},
{
description: 'extended grapheme cluster',
// Tamil 'ni' grapheme cluster.
// http://unicode.org/reports/tr29/#Table_Sample_Grapheme_Clusters
grapheme: '\u0BA8\u0BBF',
},
{
description: 'tailored grapheme cluster',
// Devangari 'kshi' tailored grapheme cluster.
// http://unicode.org/reports/tr29/#Table_Sample_Grapheme_Clusters
grapheme: '\u0915\u094D\u0937\u093F',
},
{
description: 'Emoji sequence combined using zero-width joiners',
// https://emojipedia.org/family-woman-woman-girl-boy/
grapheme:
'\uD83D\uDC69\u200D\uD83D\uDC69\u200D\uD83D\uDC67\u200D\uD83D\uDC66',
},
{
description: 'Emoji sequence with skin-tone modifier',
// https://emojipedia.org/clapping-hands-medium-skin-tone/
grapheme: '\uD83D\uDC4F\uD83C\uDFFD',
},
];

const suite = [
{
expectedHTML:
Expand Down Expand Up @@ -735,64 +702,6 @@ describe('LexicalSelection tests', () => {
},

// Tests need fixing:
// ...GRAPHEME_SCENARIOS.flatMap(({description, grapheme}) => [
// {
// name: `Delete backward eliminates entire ${description} (${grapheme})`,
// inputs: [insertText(grapheme + grapheme), deleteBackward(1)],
// expectedHTML: `<div contenteditable="true" style="user-select: text; white-space: pre-wrap; word-break: break-word;" data-lexical-editor="true"><p dir=\"ltr\"><span>${grapheme}</span></p></div>`,
// expectedSelection: {
// anchorPath: [0, 0, 0],
// anchorOffset: grapheme.length,
// focusPath: [0, 0, 0],
// focusOffset: grapheme.length,
// },
// setup: emptySetup,
// },
// {
// name: `Delete forward eliminates entire ${description} (${grapheme})`,
// inputs: [
// insertText(grapheme + grapheme),
// moveNativeSelection([0, 0, 0], 0, [0, 0, 0], 0),
// deleteForward(),
// ],
// expectedHTML: `<div contenteditable="true" style="user-select: text; white-space: pre-wrap; word-break: break-word;" data-lexical-editor="true"><p dir=\"ltr\"><span>${grapheme}</span></p></div>`,
// expectedSelection: {
// anchorPath: [0, 0, 0],
// anchorOffset: 0,
// focusPath: [0, 0, 0],
// focusOffset: 0,
// },
// setup: emptySetup,
// },
// {
// name: `Move backward skips over grapheme cluster (${grapheme})`,
// inputs: [insertText(grapheme + grapheme), moveBackward(1)],
// expectedHTML: `<div contenteditable="true" style="user-select: text; white-space: pre-wrap; word-break: break-word;" data-lexical-editor="true"><p dir=\"ltr\"><span>${grapheme}${grapheme}</span></p></div>`,
// expectedSelection: {
// anchorPath: [0, 0, 0],
// anchorOffset: grapheme.length,
// focusPath: [0, 0, 0],
// focusOffset: grapheme.length,
// },
// setup: emptySetup,
// },
// {
// name: `Move forward skips over grapheme cluster (${grapheme})`,
// inputs: [
// insertText(grapheme + grapheme),
// moveNativeSelection([0, 0, 0], 0, [0, 0, 0], 0),
// moveForward(),
// ],
// expectedHTML: `<div contenteditable="true" style="user-select: text; white-space: pre-wrap; word-break: break-word;" data-lexical-editor="true"><p dir=\"ltr\"><span>${grapheme}${grapheme}</span></p></div>`,
// expectedSelection: {
// anchorPath: [0, 0, 0],
// anchorOffset: grapheme.length,
// focusPath: [0, 0, 0],
// focusOffset: grapheme.length,
// },
// setup: emptySetup,
// },
// ]),
// {
// name: 'Jump to beginning and insert',
// inputs: [
Expand Down
6 changes: 4 additions & 2 deletions packages/lexical/src/LexicalEvents.ts
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,7 @@ import {
$updateSelectedTextFromDOM,
$updateTextNodeFromDOMContent,
dispatchCommand,
doesContainGrapheme,
doesContainSurrogatePair,
getAnchorTextFromDOM,
getDOMSelection,
getDOMSelectionFromTarget,
Expand Down Expand Up @@ -224,7 +224,9 @@ function $shouldPreventDefaultAndInsertText(
// a dangling `input` event caused by execCommand('insertText').
lastBeforeInputInsertTextTimeStamp < timeStamp + 50)) ||
(anchorNode.isDirty() && textLength < 2) ||
doesContainGrapheme(text)) &&
// TODO consider if there are other scenarios when multiple code units
// should be addressed here
doesContainSurrogatePair(text)) &&
anchor.offset !== focus.offset &&
!anchorNode.isComposing()) ||
// Any non standard text node.
Expand Down
Loading

0 comments on commit 25e5314

Please sign in to comment.