Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: Join adjacent inlineText tokens #1926

Merged
merged 9 commits into from
Feb 7, 2021
4 changes: 2 additions & 2 deletions docs/USING_PRO.md
Original file line number Diff line number Diff line change
Expand Up @@ -157,7 +157,7 @@ console.log(marked('$ latex code $\n\n` other code `'));
### Block level tokenizer methods

- space(*string* src)
- code(*string* src, *array* tokens)
- code(*string* src)
- fences(*string* src)
- heading(*string* src)
- nptable(*string* src)
Expand All @@ -169,7 +169,7 @@ console.log(marked('$ latex code $\n\n` other code `'));
- table(*string* src)
- lheading(*string* src)
- paragraph(*string* src)
- text(*string* src, *array* tokens)
- text(*string* src)

### Inline level tokenizer methods

Expand Down
68 changes: 35 additions & 33 deletions lib/marked.esm.js
Original file line number Diff line number Diff line change
Expand Up @@ -391,18 +391,9 @@ var Tokenizer_1 = class Tokenizer {
}
}

code(src, tokens) {
code(src) {
const cap = this.rules.block.code.exec(src);
if (cap) {
const lastToken = tokens[tokens.length - 1];
// An indented code block cannot interrupt a paragraph.
if (lastToken && lastToken.type === 'paragraph') {
return {
raw: cap[0],
text: cap[0].trimRight()
};
}

const text = cap[0].replace(/^ {1,4}/gm, '');
return {
type: 'code',
Expand Down Expand Up @@ -719,17 +710,9 @@ var Tokenizer_1 = class Tokenizer {
}
}

text(src, tokens) {
text(src) {
const cap = this.rules.block.text.exec(src);
if (cap) {
const lastToken = tokens[tokens.length - 1];
if (lastToken && lastToken.type === 'text') {
return {
raw: cap[0],
text: cap[0]
};
}

return {
type: 'text',
raw: cap[0],
Expand Down Expand Up @@ -1502,14 +1485,15 @@ var Lexer_1 = class Lexer {
}

// code
if (token = this.tokenizer.code(src, tokens)) {
if (token = this.tokenizer.code(src)) {
src = src.substring(token.raw.length);
if (token.type) {
tokens.push(token);
} else {
lastToken = tokens[tokens.length - 1];
lastToken = tokens[tokens.length - 1];
// An indented code block cannot interrupt a paragraph.
if (lastToken && lastToken.type === 'paragraph') {
lastToken.raw += '\n' + token.raw;
lastToken.text += '\n' + token.text;
} else {
tokens.push(token);
}
continue;
}
Expand Down Expand Up @@ -1602,14 +1586,14 @@ var Lexer_1 = class Lexer {
}

// text
if (token = this.tokenizer.text(src, tokens)) {
if (token = this.tokenizer.text(src)) {
src = src.substring(token.raw.length);
if (token.type) {
tokens.push(token);
} else {
lastToken = tokens[tokens.length - 1];
lastToken = tokens[tokens.length - 1];
if (lastToken && lastToken.type === 'text') {
lastToken.raw += '\n' + token.raw;
lastToken.text += '\n' + token.text;
} else {
tokens.push(token);
}
continue;
}
Expand Down Expand Up @@ -1694,7 +1678,7 @@ var Lexer_1 = class Lexer {
* Lexing/Compiling
*/
inlineTokens(src, tokens = [], inLink = false, inRawBlock = false) {
let token;
let token, lastToken;

// String with links masked to avoid interference with em and strong
let maskedSrc = src;
Expand Down Expand Up @@ -1734,7 +1718,13 @@ var Lexer_1 = class Lexer {
src = src.substring(token.raw.length);
inLink = token.inLink;
inRawBlock = token.inRawBlock;
tokens.push(token);
lastToken = tokens[tokens.length - 1];
if (lastToken && token.type === 'text' && lastToken.type === 'text') {
lastToken.raw += token.raw;
lastToken.text += token.text;
} else {
tokens.push(token);
}
continue;
}

Expand All @@ -1751,10 +1741,16 @@ var Lexer_1 = class Lexer {
// reflink, nolink
if (token = this.tokenizer.reflink(src, this.tokens.links)) {
src = src.substring(token.raw.length);
lastToken = tokens[tokens.length - 1];
if (token.type === 'link') {
token.tokens = this.inlineTokens(token.text, [], true, inRawBlock);
tokens.push(token);
} else if (lastToken && token.type === 'text' && lastToken.type === 'text') {
lastToken.raw += token.raw;
lastToken.text += token.text;
} else {
tokens.push(token);
}
tokens.push(token);
continue;
}

Expand Down Expand Up @@ -1815,7 +1811,13 @@ var Lexer_1 = class Lexer {
src = src.substring(token.raw.length);
prevChar = token.raw.slice(-1);
keepPrevChar = true;
tokens.push(token);
lastToken = tokens[tokens.length - 1];
if (lastToken && lastToken.type === 'text') {
lastToken.raw += token.raw;
lastToken.text += token.text;
} else {
tokens.push(token);
}
continue;
}

Expand Down
72 changes: 38 additions & 34 deletions lib/marked.js
Original file line number Diff line number Diff line change
Expand Up @@ -488,19 +488,10 @@
}
};

_proto.code = function code(src, tokens) {
_proto.code = function code(src) {
var cap = this.rules.block.code.exec(src);

if (cap) {
var lastToken = tokens[tokens.length - 1]; // An indented code block cannot interrupt a paragraph.

if (lastToken && lastToken.type === 'paragraph') {
return {
raw: cap[0],
text: cap[0].trimRight()
};
}

var text = cap[0].replace(/^ {1,4}/gm, '');
return {
type: 'code',
Expand Down Expand Up @@ -812,19 +803,10 @@
}
};

_proto.text = function text(src, tokens) {
_proto.text = function text(src) {
var cap = this.rules.block.text.exec(src);

if (cap) {
var lastToken = tokens[tokens.length - 1];

if (lastToken && lastToken.type === 'text') {
return {
raw: cap[0],
text: cap[0]
};
}

return {
type: 'text',
raw: cap[0],
Expand Down Expand Up @@ -1497,15 +1479,15 @@
} // code


if (token = this.tokenizer.code(src, tokens)) {
if (token = this.tokenizer.code(src)) {
src = src.substring(token.raw.length);
lastToken = tokens[tokens.length - 1]; // An indented code block cannot interrupt a paragraph.

if (token.type) {
tokens.push(token);
} else {
lastToken = tokens[tokens.length - 1];
if (lastToken && lastToken.type === 'paragraph') {
lastToken.raw += '\n' + token.raw;
lastToken.text += '\n' + token.text;
} else {
tokens.push(token);
}

continue;
Expand Down Expand Up @@ -1603,15 +1585,15 @@
} // text


if (token = this.tokenizer.text(src, tokens)) {
if (token = this.tokenizer.text(src)) {
src = src.substring(token.raw.length);
lastToken = tokens[tokens.length - 1];

if (token.type) {
tokens.push(token);
} else {
lastToken = tokens[tokens.length - 1];
if (lastToken && lastToken.type === 'text') {
lastToken.raw += '\n' + token.raw;
lastToken.text += '\n' + token.text;
} else {
tokens.push(token);
}

continue;
Expand Down Expand Up @@ -1718,7 +1700,7 @@
inRawBlock = false;
}

var token; // String with links masked to avoid interference with em and strong
var token, lastToken; // String with links masked to avoid interference with em and strong

var maskedSrc = src;
var match;
Expand Down Expand Up @@ -1759,7 +1741,15 @@
src = src.substring(token.raw.length);
inLink = token.inLink;
inRawBlock = token.inRawBlock;
tokens.push(token);
var _lastToken = tokens[tokens.length - 1];

if (_lastToken && token.type === 'text' && _lastToken.type === 'text') {
_lastToken.raw += token.raw;
_lastToken.text += token.text;
} else {
tokens.push(token);
}

continue;
} // link

Expand All @@ -1778,12 +1768,18 @@

if (token = this.tokenizer.reflink(src, this.tokens.links)) {
src = src.substring(token.raw.length);
var _lastToken2 = tokens[tokens.length - 1];

if (token.type === 'link') {
token.tokens = this.inlineTokens(token.text, [], true, inRawBlock);
tokens.push(token);
} else if (_lastToken2 && token.type === 'text' && _lastToken2.type === 'text') {
_lastToken2.raw += token.raw;
_lastToken2.text += token.text;
} else {
tokens.push(token);
}

tokens.push(token);
continue;
} // strong

Expand Down Expand Up @@ -1844,7 +1840,15 @@
src = src.substring(token.raw.length);
prevChar = token.raw.slice(-1);
keepPrevChar = true;
tokens.push(token);
lastToken = tokens[tokens.length - 1];

if (lastToken && lastToken.type === 'text') {
lastToken.raw += token.raw;
lastToken.text += token.text;
} else {
tokens.push(token);
}

continue;
}

Expand Down
47 changes: 33 additions & 14 deletions src/Lexer.js
Original file line number Diff line number Diff line change
Expand Up @@ -136,14 +136,15 @@ module.exports = class Lexer {
}

// code
if (token = this.tokenizer.code(src, tokens)) {
if (token = this.tokenizer.code(src)) {
src = src.substring(token.raw.length);
if (token.type) {
tokens.push(token);
} else {
lastToken = tokens[tokens.length - 1];
lastToken = tokens[tokens.length - 1];
// An indented code block cannot interrupt a paragraph.
if (lastToken && lastToken.type === 'paragraph') {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I wonder if we should check for a paragraph before we call the code tokenizer? That might save some work that doesn't need to be done.

Copy link
Contributor Author

@calculuschild calculuschild Feb 6, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Probably worth trying. It's a tradeoff of always checking LastToken and sometimes calling codeTokenizer vs always calling codeTokenizer and sometimes checking LastToken. I'm not sure how often this situation comes up that one would be better than the other.

Edit: If LastToken is a paragraph though what do we do? Just continue? or call the "text" tokenizer out of sequence?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If LastToken is a paragraph though what do we do? Just continue? or call the "text" tokenizer out of sequence?

Good point. I suppose we would still need to call code tokenizer to see if we should skip the other tokens. Maybe it is still better to check code tokenizer first.

lastToken.raw += '\n' + token.raw;
lastToken.text += '\n' + token.text;
} else {
tokens.push(token);
}
continue;
}
Expand Down Expand Up @@ -236,14 +237,14 @@ module.exports = class Lexer {
}

// text
if (token = this.tokenizer.text(src, tokens)) {
if (token = this.tokenizer.text(src)) {
src = src.substring(token.raw.length);
if (token.type) {
tokens.push(token);
} else {
lastToken = tokens[tokens.length - 1];
lastToken = tokens[tokens.length - 1];
if (lastToken && lastToken.type === 'text') {
lastToken.raw += '\n' + token.raw;
lastToken.text += '\n' + token.text;
} else {
tokens.push(token);
}
continue;
}
Expand Down Expand Up @@ -331,7 +332,7 @@ module.exports = class Lexer {
* Lexing/Compiling
*/
inlineTokens(src, tokens = [], inLink = false, inRawBlock = false) {
let token;
let token, lastToken;

// String with links masked to avoid interference with em and strong
let maskedSrc = src;
Expand Down Expand Up @@ -371,7 +372,13 @@ module.exports = class Lexer {
src = src.substring(token.raw.length);
inLink = token.inLink;
inRawBlock = token.inRawBlock;
tokens.push(token);
const lastToken = tokens[tokens.length - 1];
if (lastToken && token.type === 'text' && lastToken.type === 'text') {
lastToken.raw += token.raw;
lastToken.text += token.text;
} else {
tokens.push(token);
}
continue;
}

Expand All @@ -388,10 +395,16 @@ module.exports = class Lexer {
// reflink, nolink
if (token = this.tokenizer.reflink(src, this.tokens.links)) {
src = src.substring(token.raw.length);
const lastToken = tokens[tokens.length - 1];
if (token.type === 'link') {
token.tokens = this.inlineTokens(token.text, [], true, inRawBlock);
tokens.push(token);
} else if (lastToken && token.type === 'text' && lastToken.type === 'text') {
lastToken.raw += token.raw;
lastToken.text += token.text;
} else {
tokens.push(token);
}
tokens.push(token);
continue;
}

Expand Down Expand Up @@ -452,7 +465,13 @@ module.exports = class Lexer {
src = src.substring(token.raw.length);
prevChar = token.raw.slice(-1);
keepPrevChar = true;
tokens.push(token);
lastToken = tokens[tokens.length - 1];
if (lastToken && lastToken.type === 'text') {
lastToken.raw += token.raw;
lastToken.text += token.text;
} else {
tokens.push(token);
}
continue;
}

Expand Down
Loading