From 825a9f82af05448d85618bbac6ade8fbf9df286b Mon Sep 17 00:00:00 2001 From: Trevor Buckner Date: Tue, 1 Jun 2021 15:26:35 -0400 Subject: [PATCH] fix: em strong (#2075) --- src/Tokenizer.js | 29 ++++++++++--------- src/rules.js | 6 ++-- test/specs/new/em_strong_complex_nesting.html | 1 + test/specs/new/em_strong_complex_nesting.md | 1 + test/specs/new/em_strong_multiline.html | 3 ++ test/specs/new/em_strong_multiline.md | 3 ++ 6 files changed, 26 insertions(+), 17 deletions(-) create mode 100644 test/specs/new/em_strong_complex_nesting.html create mode 100644 test/specs/new/em_strong_complex_nesting.md create mode 100644 test/specs/new/em_strong_multiline.html create mode 100644 test/specs/new/em_strong_multiline.md diff --git a/src/Tokenizer.js b/src/Tokenizer.js index 33aef2b2b1..0177551af7 100644 --- a/src/Tokenizer.js +++ b/src/Tokenizer.js @@ -550,7 +550,8 @@ module.exports = class Tokenizer { let match = this.rules.inline.emStrong.lDelim.exec(src); if (!match) return; - if (match[3] && prevChar.match(/[\p{L}\p{N}]/u)) return; // _ can't be between two alphanumerics. \p{L}\p{N} includes non-english alphabet/numbers as well + // _ can't be between two alphanumerics. \p{L}\p{N} includes non-english alphabet/numbers as well + if (match[3] && prevChar.match(/[\p{L}\p{N}]/u)) return; const nextChar = match[1] || match[2] || ''; @@ -561,12 +562,13 @@ module.exports = class Tokenizer { const endReg = match[0][0] === '*' ? this.rules.inline.emStrong.rDelimAst : this.rules.inline.emStrong.rDelimUnd; endReg.lastIndex = 0; - maskedSrc = maskedSrc.slice(-1 * src.length + lLength); // Bump maskedSrc to same section of string as src (move to lexer?) + // Clip maskedSrc to same section of string as src (move to lexer?) + maskedSrc = maskedSrc.slice(-1 * src.length + lLength); while ((match = endReg.exec(maskedSrc)) != null) { rDelim = match[1] || match[2] || match[3] || match[4] || match[5] || match[6]; - if (!rDelim) continue; // matched the first alternative in rules.js (skip the * in __abc*abc__) + if (!rDelim) continue; // skip single * in __abc*abc__ rLength = rDelim.length; @@ -584,11 +586,10 @@ module.exports = class Tokenizer { if (delimTotal > 0) continue; // Haven't found enough closing delimiters - // If this is the last rDelimiter, remove extra characters. *a*** -> *a* - if (delimTotal + midDelimTotal - rLength <= 0 && !maskedSrc.slice(endReg.lastIndex).match(endReg)) { - rLength = Math.min(rLength, rLength + delimTotal + midDelimTotal); - } + // Remove extra characters. *a*** -> *a* + rLength = Math.min(rLength, rLength + delimTotal + midDelimTotal); + // Create `em` if smallest delimiter has odd char count. *a*** if (Math.min(lLength, rLength) % 2) { return { type: 'em', @@ -596,13 +597,13 @@ module.exports = class Tokenizer { text: src.slice(1, lLength + match.index + rLength) }; } - if (Math.min(lLength, rLength) % 2 === 0) { - return { - type: 'strong', - raw: src.slice(0, lLength + match.index + rLength + 1), - text: src.slice(2, lLength + match.index + rLength - 1) - }; - } + + // Create 'strong' if smallest delimiter has even char count. **a*** + return { + type: 'strong', + raw: src.slice(0, lLength + match.index + rLength + 1), + text: src.slice(2, lLength + match.index + rLength - 1) + }; } } } diff --git a/src/rules.js b/src/rules.js index 9732208851..3cd33fe0d7 100644 --- a/src/rules.js +++ b/src/rules.js @@ -176,9 +176,9 @@ const inline = { emStrong: { lDelim: /^(?:\*+(?:([punct_])|[^\s*]))|^_+(?:([punct*])|([^\s_]))/, // (1) and (2) can only be a Right Delimiter. (3) and (4) can only be Left. (5) and (6) can be either Left or Right. - // () Skip other delimiter (1) #*** (2) a***#, a*** (3) #***a, ***a (4) ***# (5) #***# (6) a***a - rDelimAst: /\_\_[^_]*?\*[^_]*?\_\_|[punct_](\*+)(?=[\s]|$)|[^punct*_\s](\*+)(?=[punct_\s]|$)|[punct_\s](\*+)(?=[^punct*_\s])|[\s](\*+)(?=[punct_])|[punct_](\*+)(?=[punct_])|[^punct*_\s](\*+)(?=[^punct*_\s])/, - rDelimUnd: /\*\*[^*]*?\_[^*]*?\*\*|[punct*](\_+)(?=[\s]|$)|[^punct*_\s](\_+)(?=[punct*\s]|$)|[punct*\s](\_+)(?=[^punct*_\s])|[\s](\_+)(?=[punct*])|[punct*](\_+)(?=[punct*])/ // ^- Not allowed for _ + // () Skip other delimiter (1) #*** (2) a***#, a*** (3) #***a, ***a (4) ***# (5) #***# (6) a***a + rDelimAst: /\_\_[^_*]*?\*[^_*]*?\_\_|[punct_](\*+)(?=[\s]|$)|[^punct*_\s](\*+)(?=[punct_\s]|$)|[punct_\s](\*+)(?=[^punct*_\s])|[\s](\*+)(?=[punct_])|[punct_](\*+)(?=[punct_])|[^punct*_\s](\*+)(?=[^punct*_\s])/, + rDelimUnd: /\*\*[^_*]*?\_[^_*]*?\*\*|[punct*](\_+)(?=[\s]|$)|[^punct*_\s](\_+)(?=[punct*\s]|$)|[punct*\s](\_+)(?=[^punct*_\s])|[\s](\_+)(?=[punct*])|[punct*](\_+)(?=[punct*])/ // ^- Not allowed for _ }, code: /^(`+)([^`]|[^`][\s\S]*?[^`])\1(?!`)/, br: /^( {2,}|\\)\n(?!\s*$)/, diff --git a/test/specs/new/em_strong_complex_nesting.html b/test/specs/new/em_strong_complex_nesting.html new file mode 100644 index 0000000000..690fcd8f41 --- /dev/null +++ b/test/specs/new/em_strong_complex_nesting.html @@ -0,0 +1 @@ +

Emphasis

diff --git a/test/specs/new/em_strong_complex_nesting.md b/test/specs/new/em_strong_complex_nesting.md new file mode 100644 index 0000000000..28f334908a --- /dev/null +++ b/test/specs/new/em_strong_complex_nesting.md @@ -0,0 +1 @@ +**E*mp****ha****si*s** diff --git a/test/specs/new/em_strong_multiline.html b/test/specs/new/em_strong_multiline.html new file mode 100644 index 0000000000..859a2cd7ed --- /dev/null +++ b/test/specs/new/em_strong_multiline.html @@ -0,0 +1,3 @@ +

italic bold italic +italic bold italic +italic bold italic

diff --git a/test/specs/new/em_strong_multiline.md b/test/specs/new/em_strong_multiline.md new file mode 100644 index 0000000000..85bd96032e --- /dev/null +++ b/test/specs/new/em_strong_multiline.md @@ -0,0 +1,3 @@ +_italic **bold** italic_ +_italic **bold** italic_ +_italic **bold** italic_