Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Unify the blockquote parsing logic for messages and rawInput #820

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
38 changes: 16 additions & 22 deletions __tests__/ExpensiMark-HTML-test.js
Original file line number Diff line number Diff line change
Expand Up @@ -1125,13 +1125,13 @@ test('Test quotes markdown replacement and removing <br/> from <br/><pre> and </

test('Test quotes markdown replacement skipping blank quotes', () => {
const testString = '> \n>';
const resultString = '&gt; <br />&gt;';
const resultString = '<blockquote> </blockquote>&gt;';
expect(parser.replace(testString)).toBe(resultString);
});

test('Test quotes markdown replacement with text starts with blank quote', () => {
const testString = '> \ntest';
const resultString = '&gt; <br />test';
const resultString = '<blockquote> </blockquote>test';
expect(parser.replace(testString)).toBe(resultString);
});

Expand All @@ -1143,7 +1143,7 @@ test('Test quotes markdown replacement with quotes starts with blank quote row',

test('Test quotes markdown replacement with quotes ends with blank quote rows', () => {
const testString = '> test\n> \n>';
const resultString = '<blockquote>test<br /> <br /> </blockquote>';
const resultString = '<blockquote>test<br /> </blockquote>&gt;';
expect(parser.replace(testString)).toBe(resultString);
});

Expand All @@ -1162,14 +1162,14 @@ test('Test quotes markdown replacement with quotes includes multiple middle blan
test('Test quotes markdown replacement with text includes blank quotes', () => {
const testString = '> \n> quote1 line a\n> quote1 line b\ntest\n> \ntest\n> quote2 line a\n> \n> \n> quote2 line b with an empty line above';
const resultString =
'<blockquote> <br />quote1 line a<br />quote1 line b</blockquote>test<br />&gt; <br />test<br /><blockquote>quote2 line a<br /> <br /> <br />quote2 line b with an empty line above</blockquote>';
'<blockquote> <br />quote1 line a<br />quote1 line b</blockquote>test<br /><blockquote> </blockquote>test<br /><blockquote>quote2 line a<br /> <br /> <br />quote2 line b with an empty line above</blockquote>';
expect(parser.replace(testString)).toBe(resultString);
});

test('Test quotes markdown replacement with text includes multiple spaces', () => {
const quoteTestStartString = '> Indented\n>No indent\n> Indented \n> > Nested indented \n> Indented ';
const quoteTestStartString = '> Indented\n>No indent\n> Indented \n>> Nested indented \n> > Nested not indented \n> Indented ';
const quoteTestReplacedString =
'<blockquote> Indented</blockquote>&gt;No indent<br /><blockquote> Indented <br /><blockquote> Nested indented </blockquote> Indented </blockquote>';
'<blockquote> Indented</blockquote>&gt;No indent<br /><blockquote> Indented <br /><blockquote> Nested indented </blockquote>&gt; Nested not indented <br /> Indented </blockquote>';
expect(parser.replace(quoteTestStartString)).toBe(quoteTestReplacedString);
});

Expand All @@ -1192,13 +1192,7 @@ test('Test markdown quotes without spaces after > should not be parsed', () => {

test('Test markdown quotes without spaces after > should not be parsed', () => {
const testString = '> > > test';
const resultString = '<blockquote><blockquote><blockquote>test</blockquote></blockquote></blockquote>';
expect(parser.replace(testString)).toBe(resultString);
});

test('Test markdown quotes without spaces after > should not be parsed', () => {
const testString = '>>> test';
const resultString = '&gt;&gt;&gt; test';
const resultString = '<blockquote>&gt; &gt; test</blockquote>';
expect(parser.replace(testString)).toBe(resultString);
});

Expand Down Expand Up @@ -2027,54 +2021,54 @@ test('Test italic/bold/strikethrough markdown to keep consistency', () => {

describe('multi-level blockquote', () => {
test('test max level of blockquote (3)', () => {
const quoteTestStartString = '> > > > > Hello world';
const quoteTestReplacedString = '<blockquote><blockquote><blockquote>&gt; &gt; Hello world</blockquote></blockquote></blockquote>';
const quoteTestStartString = '>>>>> Hello world';
const quoteTestReplacedString = '<blockquote><blockquote><blockquote>&gt;&gt; Hello world</blockquote></blockquote></blockquote>';

expect(parser.replace(quoteTestStartString)).toBe(quoteTestReplacedString);
});
test('multi-level blockquote with single space', () => {
const quoteTestStartString = '> > > Hello world';
const quoteTestStartString = '>>> Hello world';
const quoteTestReplacedString = '<blockquote><blockquote><blockquote>Hello world</blockquote></blockquote></blockquote>';

expect(parser.replace(quoteTestStartString)).toBe(quoteTestReplacedString);
});
test('multi-level blockquote with multiple spaces', () => {
const quoteTestStartString = '> > > Hello world';
const quoteTestReplacedString = '<blockquote><blockquote><blockquote> Hello world</blockquote></blockquote></blockquote>';
const quoteTestReplacedString = '<blockquote> &gt; &gt; Hello world</blockquote>';

expect(parser.replace(quoteTestStartString)).toBe(quoteTestReplacedString);
});

test('multi-level blockquote with mixed spaces', () => {
const quoteTestStartString = '> > > Hello world';
const quoteTestReplacedString = '<blockquote><blockquote><blockquote> Hello world</blockquote></blockquote></blockquote>';
const quoteTestReplacedString = '<blockquote> &gt; &gt; Hello world</blockquote>';

expect(parser.replace(quoteTestStartString)).toBe(quoteTestReplacedString);
});

test('multi-level blockquote with diffrent syntax', () => {
const quoteTestStartString = '> > _Hello_ *world*';
const quoteTestStartString = '>> _Hello_ *world*';
const quoteTestReplacedString = '<blockquote><blockquote><em>Hello</em> <strong>world</strong></blockquote></blockquote>';

expect(parser.replace(quoteTestStartString)).toBe(quoteTestReplacedString);
});

test('multi-level blockquote with nested heading', () => {
const quoteTestStartString = '> > # Hello world';
const quoteTestStartString = '>> # Hello world';
const quoteTestReplacedString = '<blockquote><blockquote><h1>Hello world</h1></blockquote></blockquote>';

expect(parser.replace(quoteTestStartString)).toBe(quoteTestReplacedString);
});

test('multiline multi-level blockquote', () => {
const quoteTestStartString = '> > Hello my\n> > beautiful\n> > world\n';
const quoteTestStartString = '>> Hello my\n>> beautiful\n>> world\n';
const quoteTestReplacedString = '<blockquote><blockquote>Hello my<br />beautiful<br />world</blockquote></blockquote>';

expect(parser.replace(quoteTestStartString)).toBe(quoteTestReplacedString);
});

test('multiline blockquote with diffrent levels', () => {
const quoteTestStartString = '> > > Hello my\n> > beautiful\n> world\n';
const quoteTestStartString = '>>> Hello my\n>> beautiful\n> world\n';
const quoteTestReplacedString = '<blockquote><blockquote><blockquote>Hello my</blockquote>beautiful</blockquote>world</blockquote>';

expect(parser.replace(quoteTestStartString)).toBe(quoteTestReplacedString);
Expand Down
227 changes: 98 additions & 129 deletions lib/ExpensiMark.ts
Original file line number Diff line number Diff line change
Expand Up @@ -421,58 +421,26 @@ export default class ExpensiMark {
// block quotes naturally appear on their own line. Blockquotes should not appear in code fences or
// inline code blocks. A single prepending space should be stripped if it exists
process: (textToProcess, replacement, shouldKeepRawInput = false) => {
const regex = /^(?:&gt;)+ +(?! )(?![^<]*(?:<\/pre>|<\/code>|<\/video>))([^\v\n\r]+)/gm;
const regex = /^(?:&gt;)+ +(?! )(?![^<]*(?:<\/pre>|<\/code>|<\/video>))([^\v\n\r]*)/gm;

let replacedText = this.replaceTextWithExtras(textToProcess, regex, EXTRAS_DEFAULT, replacement);
if (shouldKeepRawInput) {
const rawInputRegex = /^(?:&gt;)+ +(?! )(?![^<]*(?:<\/pre>|<\/code>|<\/video>))([^\v\n\r]*)/gm;
return this.replaceTextWithExtras(textToProcess, rawInputRegex, EXTRAS_DEFAULT, replacement);
return replacedText;
}

for (let i = this.maxQuoteDepth; i > 0; i--) {
replacedText = replacedText.replaceAll(`${'</blockquote>'.repeat(i)}\n${'<blockquote>'.repeat(i)}`, '\n');
}
return this.modifyTextForQuote(regex, textToProcess, replacement as ReplacementFn);
replacedText = replacedText.replaceAll('</blockquote>\n', '</blockquote>');
return replacedText;
},
replacement: (_extras, g1) => {
// We want to enable 2 options of nested heading inside the blockquote: "># heading" and "> # heading".
// To do this we need to parse body of the quote without first space
const handleMatch = (match: string) => match;
const textToReplace = g1.replace(/^&gt;( )?/gm, handleMatch);
const filterRules = ['heading1'];

// if we don't reach the max quote depth we allow the recursive call to process possible quote
if (this.currentQuoteDepth < this.maxQuoteDepth - 1) {
filterRules.push('quote');
this.currentQuoteDepth++;
}

const replacedText = this.replace(textToReplace, {
filterRules,
shouldEscapeText: false,
shouldKeepRawInput: false,
});
this.currentQuoteDepth = 0;
return `<blockquote>${replacedText}</blockquote>`;
const {replacedText} = this.replaceQuoteText(g1, false);
return `<blockquote>${replacedText || ' '}</blockquote>`;
},
rawInputReplacement: (_extras, g1) => {
// We want to enable 2 options of nested heading inside the blockquote: "># heading" and "> # heading".
// To do this we need to parse body of the quote without first space
let isStartingWithSpace = false;
const handleMatch = (_match: string, g2: string) => {
isStartingWithSpace = !!g2;
return '';
};
const textToReplace = g1.replace(/^&gt;( )?/gm, handleMatch);
const filterRules = ['heading1'];

// if we don't reach the max quote depth we allow the recursive call to process possible quote
if (this.currentQuoteDepth < this.maxQuoteDepth - 1 || isStartingWithSpace) {
filterRules.push('quote');
this.currentQuoteDepth++;
}

const replacedText = this.replace(textToReplace, {
filterRules,
shouldEscapeText: false,
shouldKeepRawInput: true,
});
this.currentQuoteDepth = 0;
return `<blockquote>${isStartingWithSpace ? ' ' : ''}${replacedText}</blockquote>`;
const {replacedText, shouldAddSpace} = this.replaceQuoteText(g1, true);
return `<blockquote>${shouldAddSpace ? ' ' : ''}${replacedText}</blockquote>`;
},
},
/**
Expand Down Expand Up @@ -1129,8 +1097,9 @@ export default class ExpensiMark {
return;
}

const nextItem = splitText?.[index + 1];
// Insert '\n' unless it ends with '\n' or '>' or it's the last element, or if it's a header ('# ') with a space.
if (text.match(/[\n|>][>]?[\s]?$/) || index === splitText.length - 1 || text === '# ') {
if ((nextItem && text.match(/>[\s]?$/) && !nextItem.startsWith('> ')) || text.match(/\n[\s]?$/) || index === splitText.length - 1 || text === '# ') {
joinedText += text;
} else {
joinedText += `${text}\n`;
Expand All @@ -1142,6 +1111,65 @@ export default class ExpensiMark {
return joinedText;
}

/**
* Unpacks nested quote HTML tags that have been packed by the 'quote' rule in this.rules for shouldKeepRawInput = false
*
* For example, it parses the following HTML:
* <blockquote>
* quote 1
* <blockquote>
* quote 2
* </blockquote>
* quote 3
* </blockquote>
*
* into:
* <blockquote> quote 1</blockquote>
* <blockquote><blockquote> quote 2</blockquote>
* <blockquote> quote 3</blockquote>
*
* Note that there will always be only a single closing tag, even if multiple opening tags exist.
* Only one closing tag is needed to detect if a nested quote has ended.
*/
unpackNestedQuotes(text: string): string {
let parsedText = text.replace(/((<\/blockquote>)+(<br \/>)?)|(<br \/>)/g, (match) => {
return `${match}</split>`;
});
const splittedText = parsedText.split('</split>');
if (splittedText.length > 0 && splittedText[splittedText.length - 1] === '') {
splittedText.pop();
}

let count = 0;
parsedText = splittedText
.map((line) => {
const hasBR = line.endsWith('<br />');
if (line === '' && count === 0) {
return '';
}

const textLine = line.replace(/(<br \/>)$/g, '');
if (textLine.startsWith('<blockquote>')) {
count += (textLine.match(/<blockquote>/g) || []).length;
}
if (textLine.endsWith('</blockquote>')) {
count -= (textLine.match(/<\/blockquote>/g) || []).length;
if (count > 0) {
return `${textLine}${'<blockquote>'.repeat(count)}`;
}
}

if (count > 0) {
return `${textLine}${'</blockquote>'}${'<blockquote>'.repeat(count)}`;
}

return textLine + (hasBR ? '<br />' : '');
})
.join('');

return parsedText;
}

/**
* Replaces HTML with markdown
*/
Expand All @@ -1154,6 +1182,7 @@ export default class ExpensiMark {
if (parseBodyTag) {
generatedMarkdown = parseBodyTag[2];
}
generatedMarkdown = this.unpackNestedQuotes(generatedMarkdown);

const processRule = (rule: RuleWithRegex) => {
// Pre-processes input HTML before applying regex
Expand Down Expand Up @@ -1186,91 +1215,31 @@ export default class ExpensiMark {
}

/**
* Modify text for Quotes replacing chevrons with html elements
* Main text to html 'quote' parsing logic.
* Removes &gt;( ) from text and recursively calls replace function to process nested quotes and build blockquote HTML result.
* @param shouldKeepRawInput determines if the raw input should be kept for nested quotes.
*/
modifyTextForQuote(regex: RegExp, textToCheck: string, replacement: ReplacementFn): string {
let replacedText = '';
let textToFormat = '';
const match = textToCheck.match(regex);

// If there's matches we need to modify the quotes
if (match !== null) {
let insideCodefence = false;

// Split the textToCheck in lines
const textSplitted = textToCheck.split('\n');

for (let i = 0; i < textSplitted.length; i++) {
if (!insideCodefence) {
// We need to know when there is a start of codefence so we dont quote
insideCodefence = Str.contains(textSplitted[i], '<pre>');
}

// Since the last space will be trimmed and would incorrectly disable a condition we check it manually
const isLastBlockquote = textSplitted[i] === '&gt;' && i === textSplitted.length - 1;

// We only want to modify lines starting with "&gt; " that is not codefence
if ((Str.startsWith(textSplitted[i], '&gt; ') || isLastBlockquote) && !insideCodefence) {
if (textSplitted[i] === '&gt;') {
textToFormat += `${textSplitted[i]} \n`;
insideCodefence = true;
} else {
textToFormat += `${textSplitted[i]}\n`;
}
} else {
// Make sure we will only modify if we have Text needed to be formatted for quote
if (textToFormat !== '') {
replacedText += this.formatTextForQuote(regex, textToFormat, replacement);
textToFormat = '';
}

// We dont want a \n after the textSplitted if it is the last row
if (i === textSplitted.length - 1) {
replacedText += `${textSplitted[i]}`;
} else {
replacedText += `${textSplitted[i]}\n`;
}

// We need to know when we are not inside codefence anymore
if (insideCodefence) {
insideCodefence = !Str.contains(textSplitted[i], '</pre>');
}
}
}

// When loop ends we need the last quote to be formatted if we have quotes at last rows
if (textToFormat !== '') {
replacedText += this.formatTextForQuote(regex, textToFormat, replacement);
}
} else {
// If we doesn't have matches make sure the function will return the same textToCheck
replacedText = textToCheck;
replaceQuoteText(text: string, shouldKeepRawInput: boolean): {replacedText: string; shouldAddSpace: boolean} {
pecanoro marked this conversation as resolved.
Show resolved Hide resolved
let isStartingWithSpace = false;
const handleMatch = (_match: string, g2: string) => {
isStartingWithSpace = !!g2;
return '';
};
const textToReplace = text.replace(/^&gt;( )?/gm, handleMatch);
const filterRules = ['heading1'];
// If we don't reach the max quote depth, we allow the recursive call to process other possible quotes
if (this.currentQuoteDepth < this.maxQuoteDepth - 1 && !isStartingWithSpace) {
filterRules.push('quote');
this.currentQuoteDepth++;
}
return replacedText;
}

/**
* Format the content of blockquote if the text matches the regex or else just return the original text
*/
formatTextForQuote(regex: RegExp, textToCheck: string, replacement: ReplacementFn): string {
if (textToCheck.match(regex)) {
// Remove '&gt;' and trim the spaces between nested quotes
const formatRow = (row: string) => {
let quoteContent = row[4] === ' ' ? row.substr(5) : row.substr(4);
if (row === '&gt; ') quoteContent = row.substr(4);

if (quoteContent.trimStart().startsWith('&gt;')) {
return quoteContent.trimStart();
}
return quoteContent;
};
let textToFormat = textToCheck.split('\n').map(formatRow).join('\n');
const replacedText = this.replace(textToReplace, {
filterRules,
shouldEscapeText: false,
shouldKeepRawInput,
});
this.currentQuoteDepth = 0;

// Remove leading and trailing line breaks
textToFormat = textToFormat.replace(/^\n+|\n+$/g, '');
return replacement(EXTRAS_DEFAULT, textToFormat);
}
return textToCheck;
return {replacedText, shouldAddSpace: isStartingWithSpace};
}

/**
Expand Down
Loading