Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

🔧 ExpensiMark: Markdown image shorthand syntax #676

Merged
merged 8 commits into from
Apr 15, 2024
36 changes: 30 additions & 6 deletions __tests__/ExpensiMark-HTML-test.js
Original file line number Diff line number Diff line change
Expand Up @@ -1910,21 +1910,33 @@ describe('multi-level blockquote', () => {
});

describe('Image markdown conversion to html tag', () => {
test('Single image', () => {
test('Single image with alt text', () => {
const testString = '![test](https://example.com/image.png)';
const resultString = '<img src="https://example.com/image.png" alt="test" />';
expect(parser.replace(testString)).toBe(resultString);
});

test('Single image with empty alt text', () => {
const testString = '![](https://example.com/image.png)';
const resultString = '<img src="https://example.com/image.png" />';
expect(parser.replace(testString)).toBe(resultString);
});

test('Single image short syntax without alt text', () => {
const testString = '!(https://example.com/image.png)';
const resultString = '<img src="https://example.com/image.png" />';
expect(parser.replace(testString)).toBe(resultString);
});

test('Text containing images', () => {
const testString = 'An image of a banana: ![banana](https://example.com/banana.png) an image of a developer: ![dev](https://example.com/developer.png)';
const resultString = 'An image of a banana: <img src="https://example.com/banana.png" alt="banana" /> an image of a developer: <img src="https://example.com/developer.png" alt="dev" />';
const testString = 'An image of a banana: ![banana](https://example.com/banana.png) an image without alt: !(https://example.com/developer.png)';
const resultString = 'An image of a banana: <img src="https://example.com/banana.png" alt="banana" /> an image without alt: <img src="https://example.com/developer.png" />';
expect(parser.replace(testString)).toBe(resultString);
});

test('Image with alt text containing markdown', () => {
const testString = '![# fake-heading *bold* _italic_ ~strike~ [:-)]](https://example.com/image.png)';
const resultString = '<img src="https://example.com/image.png" alt="# fake-heading &ast;bold&ast; &lowbar;italic&lowbar; &#126;strike&#126; &lbrack;:-)&rbrack;" />';
const resultString = '<img src="https://example.com/image.png" alt="# fake-heading *bold* _italic_ ~strike~ [:-)]" />';
chiragsalian marked this conversation as resolved.
Show resolved Hide resolved
expect(parser.replace(testString)).toBe(resultString);
});

Expand All @@ -1940,6 +1952,12 @@ describe('Image markdown conversion to html tag', () => {
expect(parser.replace(testString, {shouldKeepRawInput: true})).toBe(resultString);
});

test('Single short syntax image with raw data attributes', () => {
const testString = '!(https://example.com/image.png)';
const resultString = '<img src="https://example.com/image.png" data-raw-href="https://example.com/image.png" data-link-variant="auto" />';
expect(parser.replace(testString, {shouldKeepRawInput: true})).toBe(resultString);
})

test('Image with invalid url should remain unchanged', () => {
const testString = '![test](invalid)';
expect(parser.replace(testString)).toBe(testString);
Expand All @@ -1963,7 +1981,13 @@ describe('Image markdown conversion to html tag', () => {

test('No html inside the src attribute', () => {
const testString = '![`code`](https://example.com/image.png)';
const resultString = '<img src="https://example.com/image.png" alt="<code>code</code>" />';
const resultString = '<img src="https://example.com/image.png" alt="&#x60;code&#x60;" />';
expect(parser.replace(testString)).toBe(resultString);
})
});

test('No html inside the alt attribute - pre tag', () => {
const testString = '![```code```](https://example.com/image.png)';
const resultString = '<img src="https://example.com/image.png" alt="&#x60;&#x60;&#x60;code&#x60;&#x60;&#x60;" data-raw-href="https://example.com/image.png" data-link-variant="labeled" />';
expect(parser.replace(testString, {shouldKeepRawInput: true})).toBe(resultString);
});
});
28 changes: 20 additions & 8 deletions __tests__/ExpensiMark-Markdown-test.js
Original file line number Diff line number Diff line change
Expand Up @@ -758,21 +758,33 @@ test('Mention html to markdown', () => {
});

describe('Image tag conversion to markdown', () => {
test('Image with alt attribute', () => {
const testString = '<img src="https://example.com/image.png" alt="image" />';
const resultString = '![image](https://example.com/image.png)';
expect(parser.htmlToMarkdown(testString)).toBe(resultString);
});
test('Image with alt attribute', () => {
const testString = '<img src="https://example.com/image.png" alt="image" />';
const resultString = '![image](https://example.com/image.png)';
expect(parser.htmlToMarkdown(testString)).toBe(resultString);
});

test('Image without alt attribute', () => {
test('Image without alt attribute', () => {
const testString = '<img src="https://example.com/image.png" />';
const resultString = '![https://example.com/image.png](https://example.com/image.png)';
const resultString = '!(https://example.com/image.png)';
expect(parser.htmlToMarkdown(testString)).toBe(resultString);
});
});

test('Image with alt text containing escaped markdown', () => {
const testString = '<img src="https://example.com/image.png" alt="&ast;bold&ast; &lowbar;italic&lowbar; &#126;strike&#126;" />';
const resultString = '![*bold* _italic_ ~strike~](https://example.com/image.png)';
expect(parser.htmlToMarkdown(testString)).toBe(resultString);
});

test('Image with alt text containing unescaped markdown', () => {
const testString = '<img src="https://example.com/image.png" alt="*bold* _italic_ ~strike~" />';
const resultString = '![*bold* _italic_ ~strike~](https://example.com/image.png)';
expect(parser.htmlToMarkdown(testString)).toBe(resultString);
});

test('Alt attribute with complex/escaped content', () => {
const testString = '<img src="https://example.com/image.png" alt="&#x60;&#x60;&#x60;code&#x60;&#x60;&#x60;" data-raw-href="https://example.com/image.png" data-link-variant="labeled" />';
const resultString = '![```code```](https://example.com/image.png)';
expect(parser.htmlToMarkdown(testString)).toBe(resultString);
});
});
58 changes: 29 additions & 29 deletions lib/ExpensiMark.js
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,8 @@ import Str from './str';
import * as Constants from './CONST';
import * as UrlPatterns from './Url';

const MARKDOWN_LINK = `\\[([^\\][]*(?:\\[[^\\][]*][^\\][]*)*)]\\(${UrlPatterns.MARKDOWN_URL_REGEX}\\)(?![^<]*(<\\/pre>|<\\/code>))`;
const MARKDOWN_LINK_REGEX = new RegExp(MARKDOWN_LINK, 'gi');
const MARKDOWN_IMAGE_REGEX = new RegExp(`\\!${MARKDOWN_LINK}`, 'gi');
const MARKDOWN_LINK_REGEX = new RegExp(`\\[([^\\][]*(?:\\[[^\\][]*][^\\][]*)*)]\\(${UrlPatterns.MARKDOWN_URL_REGEX}\\)(?![^<]*(<\\/pre>|<\\/code>))`, 'gi');
const MARKDOWN_IMAGE_REGEX = new RegExp(`\\!(?:\\[([^\\][]*(?:\\[[^\\][]*][^\\][]*)*)])?\\(${UrlPatterns.MARKDOWN_URL_REGEX}\\)(?![^<]*(<\\/pre>|<\\/code>))`, 'gi');

const SLACK_SPAN_NEW_LINE_TAG = '<span class="c-mrkdwn__br" data-stringify-type="paragraph-break" style="box-sizing: inherit; display: block; height: unset;"></span>';

Expand Down Expand Up @@ -118,13 +117,14 @@ export default class ExpensiMark {
* Converts markdown style images to img tags e.g. ![Expensify](https://www.expensify.com/attachment.png)
* We need to convert before linking rules since they will not try to create a link from an existing img
* tag.
* Additional sanitization is done to the alt attribute to prevent parsing it further to html by later rules.
* Additional sanitization is done to the alt attribute to prevent parsing it further to html by later
* rules.
*/
{
name: 'image',
regex: MARKDOWN_IMAGE_REGEX,
replacement: (match, g1, g2) => `<img src="${Str.sanitizeURL(g2)}" alt="${this.escapeMarkdownEntities(g1)}" />`,
rawInputReplacement: (match, g1, g2) => `<img src="${Str.sanitizeURL(g2)}" alt="${this.escapeMarkdownEntities(g1)}" data-raw-href="${g2}" data-link-variant="labeled" />`
replacement: (match, g1, g2) => `<img src="${Str.sanitizeURL(g2)}"${g1 ? ` alt="${this.escapeAttributeContent(g1)}"` : ''} />`,
rawInputReplacement: (match, g1, g2) => `<img src="${Str.sanitizeURL(g2)}"${g1 ? ` alt="${this.escapeAttributeContent(g1)}"` : ''} data-raw-href="${g2}" data-link-variant="${typeof(g1) === 'string' ? 'labeled': 'auto'}" />`
},

/**
Expand Down Expand Up @@ -267,7 +267,7 @@ export default class ExpensiMark {
* Use [\s\S]* instead of .* to match newline
*/
name: 'italic',
regex: /(\b_+|\b)(?!_blank")_((?![\s_])[\s\S]*?[^\s_])_(?![^\W_])(?![^<]*(<\/pre>|<\/code>|<\/a>|<\/mention-user>|_blank))/g,
regex: /(?<!<[^>]*)(\b_+|\b)(?!_blank")_((?![\s_])[\s\S]*?[^\s_](?<!\s))_(?![^\W_])(?![^<]*>)(?![^<]*(<\/pre>|<\/code>|<\/a>|<\/mention-user>|_blank))/g,

// We want to add extraLeadingUnderscores back before the <em> tag unless textWithinUnderscores starts with valid email
replacement: (match, extraLeadingUnderscores, textWithinUnderscores) => {
Expand Down Expand Up @@ -298,12 +298,12 @@ export default class ExpensiMark {
// \B will match everything that \b doesn't, so it works
// for * and ~: https://www.rexegg.com/regex-boundaries.html#notb
name: 'bold',
regex: /\B\*((?![\s*])[\s\S]*?[^\s*])\*\B(?![^<]*(<\/pre>|<\/code>|<\/a>))/g,
regex: /(?<!<[^>]*)\B\*((?![\s*])[\s\S]*?[^\s*](?<!\s))\*\B(?![^<]*>)(?![^<]*(<\/pre>|<\/code>|<\/a>))/g,
replacement: (match, g1) => (g1.includes('</pre>') || this.containsNonPairTag(g1) ? match : `<strong>${g1}</strong>`),
},
{
name: 'strikethrough',
regex: /\B~((?![\s~])[\s\S]*?[^\s~])~\B(?![^<]*(<\/pre>|<\/code>|<\/a>))/g,
regex: /(?<!<[^>]*)\B~((?![\s~])[\s\S]*?[^\s~](?<!\s))~\B(?![^<]*>)(?![^<]*(<\/pre>|<\/code>|<\/a>))/g,
replacement: (match, g1) => (g1.includes('</pre>') || this.containsNonPairTag(g1) ? match : `<del>${g1}</del>`),
},
{
Expand Down Expand Up @@ -433,7 +433,13 @@ export default class ExpensiMark {
{
name: 'image',
regex: /<img[^><]*src\s*=\s*(['"])(.*?)\1(?:[^><]*alt\s*=\s*(['"])(.*?)\3)?[^><]*>*(?![^<][\s\S]*?(<\/pre>|<\/code>))/gi,
replacement: (match, g1, g2, g3, g4) => `![${g4 || g2}](${g2})`
replacement: (match, g1, g2, g3, g4) => {
if (g4) {
return `![${g4}](${g2})`;
}

return `!(${g2})`;
}
}
];

Expand Down Expand Up @@ -956,25 +962,19 @@ export default class ExpensiMark {
}

/**
* Replace MD characters with their HTML entity equivalent
* @param {String} text
* @return {String}
* Escapes the content of an HTML attribute value
* @param {String} content - string content that possible contains HTML
* @returns {String} - original MD content escaped for use in HTML attribute value
*/
escapeMarkdownEntities(text) {
// A regex pattern matching special MD characters we'd like to escape
const pattern = /([*_{}[\]~])/g;

// A map of MD characters to their HTML entity equivalent
const entities = {
'*': '&ast;',
_: '&lowbar;',
'{': '&lbrace;',
'}': '&rbrace;',
'[': '&lbrack;',
']': '&rbrack;',
'~': '&#126;',
};

return text.replace(pattern, char => entities[char] || char);
escapeAttributeContent(content) {
let originalContent = this.htmlToMarkdown(content);
if (content === originalContent) {
return content;
}

// When the attribute contains HTML and is converted back to MD we need to re-escape it to avoid
// illegal attribute value characters like `," or ' which might break the HTML
originalContent = Str.replaceAll(originalContent, '\n', '');
return _.escape(originalContent);
}
}
Loading