Expensify · chiragsalian · Apr 15, 2024 · Apr 2, 2024 · Apr 2, 2024 · Apr 2, 2024
@@ -1910,21 +1910,33 @@ describe('multi-level blockquote', () => {
 });
 
 describe('Image markdown conversion to html tag', () => {
-    test('Single image', () => {
+    test('Single image with alt text', () => {
         const testString = '![test](https://example.com/image.png)';
         const resultString = '<img src="https://example.com/image.png" alt="test" />';
         expect(parser.replace(testString)).toBe(resultString);
     });
 
+    test('Single image with empty alt text', () => {
+        const testString = '![](https://example.com/image.png)';
+        const resultString = '<img src="https://example.com/image.png" />';
+        expect(parser.replace(testString)).toBe(resultString);
+    });
+
+    test('Single image short syntax without alt text', () => {
+        const testString = '!(https://example.com/image.png)';
+        const resultString = '<img src="https://example.com/image.png" />';
+        expect(parser.replace(testString)).toBe(resultString);
+    });
+
     test('Text containing images', () => {
-        const testString = 'An image of a banana: ![banana](https://example.com/banana.png) an image of a developer: ![dev](https://example.com/developer.png)';
-        const resultString = 'An image of a banana: <img src="https://example.com/banana.png" alt="banana" /> an image of a developer: <img src="https://example.com/developer.png" alt="dev" />';
+        const testString = 'An image of a banana: ![banana](https://example.com/banana.png) an image without alt: !(https://example.com/developer.png)';
+        const resultString = 'An image of a banana: <img src="https://example.com/banana.png" alt="banana" /> an image without alt: <img src="https://example.com/developer.png" />';
         expect(parser.replace(testString)).toBe(resultString);
     });
 
     test('Image with alt text containing markdown', () => {
         const testString = '![# fake-heading *bold* _italic_ ~strike~ [:-)]](https://example.com/image.png)';
-        const resultString = '<img src="https://example.com/image.png" alt="# fake-heading &ast;bold&ast; &lowbar;italic&lowbar; &#126;strike&#126; &lbrack;:-)&rbrack;" />';
+        const resultString = '<img src="https://example.com/image.png" alt="# fake-heading *bold* _italic_ ~strike~ [:-)]" />';
         expect(parser.replace(testString)).toBe(resultString);
     });
 
@@ -1940,6 +1952,12 @@ describe('Image markdown conversion to html tag', () => {
         expect(parser.replace(testString, {shouldKeepRawInput: true})).toBe(resultString);
     });
 
+    test('Single short syntax image with raw data attributes', () => {
+        const testString = '!(https://example.com/image.png)';
+        const resultString = '<img src="https://example.com/image.png" data-raw-href="https://example.com/image.png" data-link-variant="auto" />';
+        expect(parser.replace(testString, {shouldKeepRawInput: true})).toBe(resultString);
+    })
+
     test('Image with invalid url should remain unchanged', () => {
         const testString = '![test](invalid)';
         expect(parser.replace(testString)).toBe(testString);
@@ -1963,7 +1981,13 @@ describe('Image markdown conversion to html tag', () => {
 
     test('No html inside the src attribute', () => {
         const testString = '![`code`](https://example.com/image.png)';
-        const resultString = '<img src="https://example.com/image.png" alt="<code>code</code>" />';
+        const resultString = '<img src="https://example.com/image.png" alt="&#x60;code&#x60;" />';
         expect(parser.replace(testString)).toBe(resultString);
-    })
+    });
+
+    test('No html inside the alt attribute - pre tag', () => {
+        const testString = '![```code```](https://example.com/image.png)';
+        const resultString = '<img src="https://example.com/image.png" alt="&#x60;&#x60;&#x60;code&#x60;&#x60;&#x60;" data-raw-href="https://example.com/image.png" data-link-variant="labeled" />';
+        expect(parser.replace(testString, {shouldKeepRawInput: true})).toBe(resultString);
+    });
 });
@@ -758,21 +758,33 @@ test('Mention html to markdown', () => {
 });
 
 describe('Image tag conversion to markdown', () => {
-   test('Image with alt attribute', () => {
-         const testString = '<img src="https://example.com/image.png" alt="image" />';
-         const resultString = '![image](https://example.com/image.png)';
-         expect(parser.htmlToMarkdown(testString)).toBe(resultString);
-   });
+    test('Image with alt attribute', () => {
+        const testString = '<img src="https://example.com/image.png" alt="image" />';
+        const resultString = '![image](https://example.com/image.png)';
+        expect(parser.htmlToMarkdown(testString)).toBe(resultString);
+    });
 
-   test('Image without alt attribute', () => {
+    test('Image without alt attribute', () => {
         const testString = '<img src="https://example.com/image.png" />';
-        const resultString = '![https://example.com/image.png](https://example.com/image.png)';
+        const resultString = '!(https://example.com/image.png)';
         expect(parser.htmlToMarkdown(testString)).toBe(resultString);
-   });
+    });
 
     test('Image with alt text containing escaped markdown', () => {
         const testString = '<img src="https://example.com/image.png" alt="&ast;bold&ast; &lowbar;italic&lowbar; &#126;strike&#126;" />';
         const resultString = '![*bold* _italic_ ~strike~](https://example.com/image.png)';
         expect(parser.htmlToMarkdown(testString)).toBe(resultString);
     });
+
+    test('Image with alt text containing unescaped markdown', () => {
+        const testString = '<img src="https://example.com/image.png" alt="*bold* _italic_ ~strike~" />';
+        const resultString = '![*bold* _italic_ ~strike~](https://example.com/image.png)';
+        expect(parser.htmlToMarkdown(testString)).toBe(resultString);
+    });
+
+    test('Alt attribute with complex/escaped content', () => {
+        const testString = '<img src="https://example.com/image.png" alt="&#x60;&#x60;&#x60;code&#x60;&#x60;&#x60;" data-raw-href="https://example.com/image.png" data-link-variant="labeled" />';
+        const resultString = '![```code```](https://example.com/image.png)';
+        expect(parser.htmlToMarkdown(testString)).toBe(resultString);
+    });
 });
@@ -3,9 +3,8 @@ import Str from './str';
 import * as Constants from './CONST';
 import * as UrlPatterns from './Url';
 
-const MARKDOWN_LINK = `\\[([^\\][]*(?:\\[[^\\][]*][^\\][]*)*)]\\(${UrlPatterns.MARKDOWN_URL_REGEX}\\)(?![^<]*(<\\/pre>|<\\/code>))`;
-const MARKDOWN_LINK_REGEX = new RegExp(MARKDOWN_LINK, 'gi');
-const MARKDOWN_IMAGE_REGEX = new RegExp(`\\!${MARKDOWN_LINK}`, 'gi');
+const MARKDOWN_LINK_REGEX = new RegExp(`\\[([^\\][]*(?:\\[[^\\][]*][^\\][]*)*)]\\(${UrlPatterns.MARKDOWN_URL_REGEX}\\)(?![^<]*(<\\/pre>|<\\/code>))`, 'gi');
+const MARKDOWN_IMAGE_REGEX = new RegExp(`\\!(?:\\[([^\\][]*(?:\\[[^\\][]*][^\\][]*)*)])?\\(${UrlPatterns.MARKDOWN_URL_REGEX}\\)(?![^<]*(<\\/pre>|<\\/code>))`, 'gi');
 
 const SLACK_SPAN_NEW_LINE_TAG = '<span class="c-mrkdwn__br" data-stringify-type="paragraph-break" style="box-sizing: inherit; display: block; height: unset;"></span>';
 
@@ -118,13 +117,14 @@ export default class ExpensiMark {
              * Converts markdown style images to img tags e.g. ![Expensify](https://www.expensify.com/attachment.png)
              * We need to convert before linking rules since they will not try to create a link from an existing img
              * tag.
-             * Additional sanitization is done to the alt attribute to prevent parsing it further to html by later rules.
+             * Additional sanitization is done to the alt attribute to prevent parsing it further to html by later
+             * rules.
              */
             {
                 name: 'image',
                 regex: MARKDOWN_IMAGE_REGEX,
-                replacement: (match, g1, g2) => `<img src="${Str.sanitizeURL(g2)}" alt="${this.escapeMarkdownEntities(g1)}" />`,
-                rawInputReplacement: (match, g1, g2) => `<img src="${Str.sanitizeURL(g2)}" alt="${this.escapeMarkdownEntities(g1)}" data-raw-href="${g2}" data-link-variant="labeled" />`
+                replacement: (match, g1, g2) => `<img src="${Str.sanitizeURL(g2)}"${g1 ? ` alt="${this.escapeAttributeContent(g1)}"` : ''} />`,
+                rawInputReplacement: (match, g1, g2) => `<img src="${Str.sanitizeURL(g2)}"${g1 ? ` alt="${this.escapeAttributeContent(g1)}"` : ''} data-raw-href="${g2}" data-link-variant="${typeof(g1) === 'string' ? 'labeled': 'auto'}" />`
             },
 
             /**
@@ -267,7 +267,7 @@ export default class ExpensiMark {
                  * Use [\s\S]* instead of .* to match newline
                  */
                 name: 'italic',
-                regex: /(\b_+|\b)(?!_blank")_((?![\s_])[\s\S]*?[^\s_])_(?![^\W_])(?![^<]*(<\/pre>|<\/code>|<\/a>|<\/mention-user>|_blank))/g,
+                regex: /(?<!<[^>]*)(\b_+|\b)(?!_blank")_((?![\s_])[\s\S]*?[^\s_](?<!\s))_(?![^\W_])(?![^<]*>)(?![^<]*(<\/pre>|<\/code>|<\/a>|<\/mention-user>|_blank))/g,
 
                 // We want to add extraLeadingUnderscores back before the <em> tag unless textWithinUnderscores starts with valid email
                 replacement: (match, extraLeadingUnderscores, textWithinUnderscores) => {
@@ -298,12 +298,12 @@ export default class ExpensiMark {
                 // \B will match everything that \b doesn't, so it works
                 // for * and ~: https://www.rexegg.com/regex-boundaries.html#notb
                 name: 'bold',
-                regex: /\B\*((?![\s*])[\s\S]*?[^\s*])\*\B(?![^<]*(<\/pre>|<\/code>|<\/a>))/g,
+                regex: /(?<!<[^>]*)\B\*((?![\s*])[\s\S]*?[^\s*](?<!\s))\*\B(?![^<]*>)(?![^<]*(<\/pre>|<\/code>|<\/a>))/g,
                 replacement: (match, g1) => (g1.includes('</pre>') || this.containsNonPairTag(g1) ? match : `<strong>${g1}</strong>`),
             },
             {
                 name: 'strikethrough',
-                regex: /\B~((?![\s~])[\s\S]*?[^\s~])~\B(?![^<]*(<\/pre>|<\/code>|<\/a>))/g,
+                regex: /(?<!<[^>]*)\B~((?![\s~])[\s\S]*?[^\s~](?<!\s))~\B(?![^<]*>)(?![^<]*(<\/pre>|<\/code>|<\/a>))/g,
                 replacement: (match, g1) => (g1.includes('</pre>') || this.containsNonPairTag(g1) ? match : `<del>${g1}</del>`),
             },
             {
@@ -433,7 +433,13 @@ export default class ExpensiMark {
             {
                 name: 'image',
                 regex: /<img[^><]*src\s*=\s*(['"])(.*?)\1(?:[^><]*alt\s*=\s*(['"])(.*?)\3)?[^><]*>*(?![^<][\s\S]*?(<\/pre>|<\/code>))/gi,
-                replacement: (match, g1, g2, g3, g4) => `![${g4 || g2}](${g2})`
+                replacement: (match, g1, g2, g3, g4) => {
+                    if (g4) {
+                        return `![${g4}](${g2})`;
+                    }
+
+                    return `!(${g2})`;
+                }
             }
         ];
 
@@ -956,25 +962,19 @@ export default class ExpensiMark {
     }
 
     /**
-     * Replace MD characters with their HTML entity equivalent
-     * @param {String} text
-     * @return {String}
+     * Escapes the content of an HTML attribute value
+     * @param {String} content - string content that possible contains HTML
+     * @returns {String} - original MD content escaped for use in HTML attribute value
      */
-    escapeMarkdownEntities(text) {
-        // A regex pattern matching special MD characters we'd like to escape
-        const pattern = /([*_{}[\]~])/g;
-
-        // A map of MD characters to their HTML entity equivalent
-        const entities = {
-            '*': '&ast;',
-            _: '&lowbar;',
-            '{': '&lbrace;',
-            '}': '&rbrace;',
-            '[': '&lbrack;',
-            ']': '&rbrack;',
-            '~': '&#126;',
-        };
-
-        return text.replace(pattern, char => entities[char] || char);
+    escapeAttributeContent(content) {
+        let originalContent = this.htmlToMarkdown(content);
+        if (content === originalContent) {
+            return content;
+        }
+
+        // When the attribute contains HTML and is converted back to MD we need to re-escape it to avoid
+        // illegal attribute value characters like `," or ' which might break the HTML
+        originalContent = Str.replaceAll(originalContent, '\n', '');
+        return _.escape(originalContent);
     }
 }