From f848e7703956a6c37871432a7e128f51b1700aa0 Mon Sep 17 00:00:00 2001 From: Trevor Buckner Date: Sun, 7 Feb 2021 17:22:47 -0500 Subject: [PATCH] fix: Join adjacent inlineText tokens (#1926) --- docs/USING_PRO.md | 4 +-- lib/marked.esm.js | 68 +++++++++++++++++++------------------- lib/marked.js | 72 ++++++++++++++++++++++------------------- src/Lexer.js | 47 +++++++++++++++++++-------- src/Tokenizer.js | 21 ++---------- test/unit/Lexer-spec.js | 7 ++-- 6 files changed, 112 insertions(+), 107 deletions(-) diff --git a/docs/USING_PRO.md b/docs/USING_PRO.md index b8fc872c6b..770a2ca18f 100644 --- a/docs/USING_PRO.md +++ b/docs/USING_PRO.md @@ -157,7 +157,7 @@ console.log(marked('$ latex code $\n\n` other code `')); ### Block level tokenizer methods - space(*string* src) -- code(*string* src, *array* tokens) +- code(*string* src) - fences(*string* src) - heading(*string* src) - nptable(*string* src) @@ -169,7 +169,7 @@ console.log(marked('$ latex code $\n\n` other code `')); - table(*string* src) - lheading(*string* src) - paragraph(*string* src) -- text(*string* src, *array* tokens) +- text(*string* src) ### Inline level tokenizer methods diff --git a/lib/marked.esm.js b/lib/marked.esm.js index 3b294451db..31ed6e80be 100644 --- a/lib/marked.esm.js +++ b/lib/marked.esm.js @@ -391,18 +391,9 @@ var Tokenizer_1 = class Tokenizer { } } - code(src, tokens) { + code(src) { const cap = this.rules.block.code.exec(src); if (cap) { - const lastToken = tokens[tokens.length - 1]; - // An indented code block cannot interrupt a paragraph. - if (lastToken && lastToken.type === 'paragraph') { - return { - raw: cap[0], - text: cap[0].trimRight() - }; - } - const text = cap[0].replace(/^ {1,4}/gm, ''); return { type: 'code', @@ -722,17 +713,9 @@ var Tokenizer_1 = class Tokenizer { } } - text(src, tokens) { + text(src) { const cap = this.rules.block.text.exec(src); if (cap) { - const lastToken = tokens[tokens.length - 1]; - if (lastToken && lastToken.type === 'text') { - return { - raw: cap[0], - text: cap[0] - }; - } - return { type: 'text', raw: cap[0], @@ -1505,14 +1488,15 @@ var Lexer_1 = class Lexer { } // code - if (token = this.tokenizer.code(src, tokens)) { + if (token = this.tokenizer.code(src)) { src = src.substring(token.raw.length); - if (token.type) { - tokens.push(token); - } else { - lastToken = tokens[tokens.length - 1]; + lastToken = tokens[tokens.length - 1]; + // An indented code block cannot interrupt a paragraph. + if (lastToken && lastToken.type === 'paragraph') { lastToken.raw += '\n' + token.raw; lastToken.text += '\n' + token.text; + } else { + tokens.push(token); } continue; } @@ -1605,14 +1589,14 @@ var Lexer_1 = class Lexer { } // text - if (token = this.tokenizer.text(src, tokens)) { + if (token = this.tokenizer.text(src)) { src = src.substring(token.raw.length); - if (token.type) { - tokens.push(token); - } else { - lastToken = tokens[tokens.length - 1]; + lastToken = tokens[tokens.length - 1]; + if (lastToken && lastToken.type === 'text') { lastToken.raw += '\n' + token.raw; lastToken.text += '\n' + token.text; + } else { + tokens.push(token); } continue; } @@ -1697,7 +1681,7 @@ var Lexer_1 = class Lexer { * Lexing/Compiling */ inlineTokens(src, tokens = [], inLink = false, inRawBlock = false) { - let token; + let token, lastToken; // String with links masked to avoid interference with em and strong let maskedSrc = src; @@ -1737,7 +1721,13 @@ var Lexer_1 = class Lexer { src = src.substring(token.raw.length); inLink = token.inLink; inRawBlock = token.inRawBlock; - tokens.push(token); + lastToken = tokens[tokens.length - 1]; + if (lastToken && token.type === 'text' && lastToken.type === 'text') { + lastToken.raw += token.raw; + lastToken.text += token.text; + } else { + tokens.push(token); + } continue; } @@ -1754,10 +1744,16 @@ var Lexer_1 = class Lexer { // reflink, nolink if (token = this.tokenizer.reflink(src, this.tokens.links)) { src = src.substring(token.raw.length); + lastToken = tokens[tokens.length - 1]; if (token.type === 'link') { token.tokens = this.inlineTokens(token.text, [], true, inRawBlock); + tokens.push(token); + } else if (lastToken && token.type === 'text' && lastToken.type === 'text') { + lastToken.raw += token.raw; + lastToken.text += token.text; + } else { + tokens.push(token); } - tokens.push(token); continue; } @@ -1818,7 +1814,13 @@ var Lexer_1 = class Lexer { src = src.substring(token.raw.length); prevChar = token.raw.slice(-1); keepPrevChar = true; - tokens.push(token); + lastToken = tokens[tokens.length - 1]; + if (lastToken && lastToken.type === 'text') { + lastToken.raw += token.raw; + lastToken.text += token.text; + } else { + tokens.push(token); + } continue; } diff --git a/lib/marked.js b/lib/marked.js index 5b4e2d78d4..904a9f1077 100644 --- a/lib/marked.js +++ b/lib/marked.js @@ -488,19 +488,10 @@ } }; - _proto.code = function code(src, tokens) { + _proto.code = function code(src) { var cap = this.rules.block.code.exec(src); if (cap) { - var lastToken = tokens[tokens.length - 1]; // An indented code block cannot interrupt a paragraph. - - if (lastToken && lastToken.type === 'paragraph') { - return { - raw: cap[0], - text: cap[0].trimRight() - }; - } - var text = cap[0].replace(/^ {1,4}/gm, ''); return { type: 'code', @@ -812,19 +803,10 @@ } }; - _proto.text = function text(src, tokens) { + _proto.text = function text(src) { var cap = this.rules.block.text.exec(src); if (cap) { - var lastToken = tokens[tokens.length - 1]; - - if (lastToken && lastToken.type === 'text') { - return { - raw: cap[0], - text: cap[0] - }; - } - return { type: 'text', raw: cap[0], @@ -1497,15 +1479,15 @@ } // code - if (token = this.tokenizer.code(src, tokens)) { + if (token = this.tokenizer.code(src)) { src = src.substring(token.raw.length); + lastToken = tokens[tokens.length - 1]; // An indented code block cannot interrupt a paragraph. - if (token.type) { - tokens.push(token); - } else { - lastToken = tokens[tokens.length - 1]; + if (lastToken && lastToken.type === 'paragraph') { lastToken.raw += '\n' + token.raw; lastToken.text += '\n' + token.text; + } else { + tokens.push(token); } continue; @@ -1603,15 +1585,15 @@ } // text - if (token = this.tokenizer.text(src, tokens)) { + if (token = this.tokenizer.text(src)) { src = src.substring(token.raw.length); + lastToken = tokens[tokens.length - 1]; - if (token.type) { - tokens.push(token); - } else { - lastToken = tokens[tokens.length - 1]; + if (lastToken && lastToken.type === 'text') { lastToken.raw += '\n' + token.raw; lastToken.text += '\n' + token.text; + } else { + tokens.push(token); } continue; @@ -1718,7 +1700,7 @@ inRawBlock = false; } - var token; // String with links masked to avoid interference with em and strong + var token, lastToken; // String with links masked to avoid interference with em and strong var maskedSrc = src; var match; @@ -1759,7 +1741,15 @@ src = src.substring(token.raw.length); inLink = token.inLink; inRawBlock = token.inRawBlock; - tokens.push(token); + var _lastToken = tokens[tokens.length - 1]; + + if (_lastToken && token.type === 'text' && _lastToken.type === 'text') { + _lastToken.raw += token.raw; + _lastToken.text += token.text; + } else { + tokens.push(token); + } + continue; } // link @@ -1778,12 +1768,18 @@ if (token = this.tokenizer.reflink(src, this.tokens.links)) { src = src.substring(token.raw.length); + var _lastToken2 = tokens[tokens.length - 1]; if (token.type === 'link') { token.tokens = this.inlineTokens(token.text, [], true, inRawBlock); + tokens.push(token); + } else if (_lastToken2 && token.type === 'text' && _lastToken2.type === 'text') { + _lastToken2.raw += token.raw; + _lastToken2.text += token.text; + } else { + tokens.push(token); } - tokens.push(token); continue; } // strong @@ -1844,7 +1840,15 @@ src = src.substring(token.raw.length); prevChar = token.raw.slice(-1); keepPrevChar = true; - tokens.push(token); + lastToken = tokens[tokens.length - 1]; + + if (lastToken && lastToken.type === 'text') { + lastToken.raw += token.raw; + lastToken.text += token.text; + } else { + tokens.push(token); + } + continue; } diff --git a/src/Lexer.js b/src/Lexer.js index bb04b43101..111cc96304 100644 --- a/src/Lexer.js +++ b/src/Lexer.js @@ -136,14 +136,15 @@ module.exports = class Lexer { } // code - if (token = this.tokenizer.code(src, tokens)) { + if (token = this.tokenizer.code(src)) { src = src.substring(token.raw.length); - if (token.type) { - tokens.push(token); - } else { - lastToken = tokens[tokens.length - 1]; + lastToken = tokens[tokens.length - 1]; + // An indented code block cannot interrupt a paragraph. + if (lastToken && lastToken.type === 'paragraph') { lastToken.raw += '\n' + token.raw; lastToken.text += '\n' + token.text; + } else { + tokens.push(token); } continue; } @@ -236,14 +237,14 @@ module.exports = class Lexer { } // text - if (token = this.tokenizer.text(src, tokens)) { + if (token = this.tokenizer.text(src)) { src = src.substring(token.raw.length); - if (token.type) { - tokens.push(token); - } else { - lastToken = tokens[tokens.length - 1]; + lastToken = tokens[tokens.length - 1]; + if (lastToken && lastToken.type === 'text') { lastToken.raw += '\n' + token.raw; lastToken.text += '\n' + token.text; + } else { + tokens.push(token); } continue; } @@ -331,7 +332,7 @@ module.exports = class Lexer { * Lexing/Compiling */ inlineTokens(src, tokens = [], inLink = false, inRawBlock = false) { - let token; + let token, lastToken; // String with links masked to avoid interference with em and strong let maskedSrc = src; @@ -371,7 +372,13 @@ module.exports = class Lexer { src = src.substring(token.raw.length); inLink = token.inLink; inRawBlock = token.inRawBlock; - tokens.push(token); + const lastToken = tokens[tokens.length - 1]; + if (lastToken && token.type === 'text' && lastToken.type === 'text') { + lastToken.raw += token.raw; + lastToken.text += token.text; + } else { + tokens.push(token); + } continue; } @@ -388,10 +395,16 @@ module.exports = class Lexer { // reflink, nolink if (token = this.tokenizer.reflink(src, this.tokens.links)) { src = src.substring(token.raw.length); + const lastToken = tokens[tokens.length - 1]; if (token.type === 'link') { token.tokens = this.inlineTokens(token.text, [], true, inRawBlock); + tokens.push(token); + } else if (lastToken && token.type === 'text' && lastToken.type === 'text') { + lastToken.raw += token.raw; + lastToken.text += token.text; + } else { + tokens.push(token); } - tokens.push(token); continue; } @@ -452,7 +465,13 @@ module.exports = class Lexer { src = src.substring(token.raw.length); prevChar = token.raw.slice(-1); keepPrevChar = true; - tokens.push(token); + lastToken = tokens[tokens.length - 1]; + if (lastToken && lastToken.type === 'text') { + lastToken.raw += token.raw; + lastToken.text += token.text; + } else { + tokens.push(token); + } continue; } diff --git a/src/Tokenizer.js b/src/Tokenizer.js index 056cc3ff40..4b1eebf96d 100644 --- a/src/Tokenizer.js +++ b/src/Tokenizer.js @@ -79,18 +79,9 @@ module.exports = class Tokenizer { } } - code(src, tokens) { + code(src) { const cap = this.rules.block.code.exec(src); if (cap) { - const lastToken = tokens[tokens.length - 1]; - // An indented code block cannot interrupt a paragraph. - if (lastToken && lastToken.type === 'paragraph') { - return { - raw: cap[0], - text: cap[0].trimRight() - }; - } - const text = cap[0].replace(/^ {1,4}/gm, ''); return { type: 'code', @@ -410,17 +401,9 @@ module.exports = class Tokenizer { } } - text(src, tokens) { + text(src) { const cap = this.rules.block.text.exec(src); if (cap) { - const lastToken = tokens[tokens.length - 1]; - if (lastToken && lastToken.type === 'text') { - return { - raw: cap[0], - text: cap[0] - }; - } - return { type: 'text', raw: cap[0], diff --git a/test/unit/Lexer-spec.js b/test/unit/Lexer-spec.js index 192c8cc5fd..3a507f010a 100644 --- a/test/unit/Lexer-spec.js +++ b/test/unit/Lexer-spec.js @@ -616,9 +616,7 @@ a | b md: '
html
', options: { sanitize: true }, tokens: [ - { type: 'text', raw: '
', inLink: false, inRawBlock: false, text: '<div>' }, - { type: 'text', raw: 'html', text: 'html' }, - { type: 'text', raw: '
', inLink: false, inRawBlock: false, text: '</div>' } + { type: 'text', raw: '
html
', inLink: false, inRawBlock: false, text: '<div>html</div>' } ] }); }); @@ -740,8 +738,7 @@ a | b expectInlineTokens({ md: '[link]', tokens: [ - { type: 'text', raw: '[', text: '[' }, - { type: 'text', raw: 'link]', text: 'link]' } + { type: 'text', raw: '[link]', text: '[link]' } ] }); });