From 44831542bbde3b7a66dadebebb18735eff0d5d1f Mon Sep 17 00:00:00 2001 From: Trevor Buckner Date: Mon, 14 Aug 2023 14:30:19 -0400 Subject: [PATCH] Fix unicode Regex miscounting emoji length Many emojis are 2+ unicode bytes long. The \u tag which allows searching for punctuation also counts emojis as single chars. Slicing the strings into an array restores the correct character count. --- src/Tokenizer.ts | 7 ++++--- test/specs/new/emoji_inline.html | 11 +++++++++++ test/specs/new/emoji_inline.md | 21 +++++++++++++++++++++ 3 files changed, 36 insertions(+), 3 deletions(-) create mode 100644 test/specs/new/emoji_inline.html create mode 100644 test/specs/new/emoji_inline.md diff --git a/src/Tokenizer.ts b/src/Tokenizer.ts index 769bdb9fd3..f5cd49775e 100644 --- a/src/Tokenizer.ts +++ b/src/Tokenizer.ts @@ -625,7 +625,8 @@ export class _Tokenizer { const nextChar = match[1] || match[2] || ''; if (!nextChar || !prevChar || this.rules.inline.punctuation.exec(prevChar)) { - const lLength = match[0].length - 1; + // unicode Regex counts emoji as 1 char; spread into array for proper count (used multiple times below) + const lLength = [...match[0]].length - 1; let rDelim, rLength, delimTotal = lLength, midDelimTotal = 0; const endReg = match[0][0] === '*' ? this.rules.inline.emStrong.rDelimAst : this.rules.inline.emStrong.rDelimUnd; @@ -639,7 +640,7 @@ export class _Tokenizer { if (!rDelim) continue; // skip single * in __abc*abc__ - rLength = rDelim.length; + rLength = [...rDelim].length; if (match[3] || match[4]) { // found another Left Delim delimTotal += rLength; @@ -658,7 +659,7 @@ export class _Tokenizer { // Remove extra characters. *a*** -> *a* rLength = Math.min(rLength, rLength + delimTotal + midDelimTotal); - const raw = src.slice(0, lLength + match.index + rLength + 1); + const raw = [...src].slice(0, lLength + match.index + rLength + 1).join(''); // Create `em` if smallest delimiter has odd char count. *a*** if (Math.min(lLength, rLength) % 2) { diff --git a/test/specs/new/emoji_inline.html b/test/specs/new/emoji_inline.html new file mode 100644 index 0000000000..d0b19a20a7 --- /dev/null +++ b/test/specs/new/emoji_inline.html @@ -0,0 +1,11 @@ +

Situations where it fails:

+

test 💁

+

💁 test

+

🤓 test

+

🏖️ test

+

🏖️🤓💁 test

+

Situations where it works:

+

**💁 **

+

⚠️ test

+

Here, the emoji rendering works, but the text doesn't get rendered in italic.

+

💁 test

\ No newline at end of file diff --git a/test/specs/new/emoji_inline.md b/test/specs/new/emoji_inline.md new file mode 100644 index 0000000000..36f6385677 --- /dev/null +++ b/test/specs/new/emoji_inline.md @@ -0,0 +1,21 @@ +Situations where it fails: + +**test 💁** + +**💁 test** + +**🤓 test** + +**🏖️ test** + +**🏖️🤓💁 test** + +Situations where it works: + +**💁 ** + +**⚠️ test** + +Here, the emoji rendering works, but the text doesn't get rendered in italic. + +*💁 test* \ No newline at end of file