|
6 | 6 | const escape_html_attr_dict = {
|
7 | 7 | '&': '&',
|
8 | 8 | '"': '"'
|
| 9 | + // Svelte also escapes < because the escape function could be called inside a `noscript` there |
| 10 | + // https://github.com/sveltejs/svelte/security/advisories/GHSA-8266-84wp-wv5c |
| 11 | + // However, that doesn't apply in SvelteKit |
9 | 12 | };
|
10 | 13 |
|
| 14 | +/** |
| 15 | + * @type {Record<string, string>} |
| 16 | + */ |
| 17 | +const escape_html_dict = { |
| 18 | + '&': '&', |
| 19 | + '<': '<' |
| 20 | +}; |
| 21 | + |
| 22 | +const surrogates = // high surrogate without paired low surrogate |
| 23 | + '[\\ud800-\\udbff](?![\\udc00-\\udfff])|' + |
| 24 | + // a valid surrogate pair, the only match with 2 code units |
| 25 | + // we match it so that we can match unpaired low surrogates in the same pass |
| 26 | + // TODO: use lookbehind assertions once they are widely supported: (?<![\ud800-udbff])[\udc00-\udfff] |
| 27 | + '[\\ud800-\\udbff][\\udc00-\\udfff]|' + |
| 28 | + // unpaired low surrogate (see previous match) |
| 29 | + '[\\udc00-\\udfff]'; |
| 30 | + |
11 | 31 | const escape_html_attr_regex = new RegExp(
|
12 |
| - // special characters |
13 |
| - `[${Object.keys(escape_html_attr_dict).join('')}]|` + |
14 |
| - // high surrogate without paired low surrogate |
15 |
| - '[\\ud800-\\udbff](?![\\udc00-\\udfff])|' + |
16 |
| - // a valid surrogate pair, the only match with 2 code units |
17 |
| - // we match it so that we can match unpaired low surrogates in the same pass |
18 |
| - // TODO: use lookbehind assertions once they are widely supported: (?<![\ud800-udbff])[\udc00-\udfff] |
19 |
| - '[\\ud800-\\udbff][\\udc00-\\udfff]|' + |
20 |
| - // unpaired low surrogate (see previous match) |
21 |
| - '[\\udc00-\\udfff]', |
| 32 | + `[${Object.keys(escape_html_attr_dict).join('')}]|` + surrogates, |
| 33 | + 'g' |
| 34 | +); |
| 35 | + |
| 36 | +const escape_html_regex = new RegExp( |
| 37 | + `[${Object.keys(escape_html_dict).join('')}]|` + surrogates, |
22 | 38 | 'g'
|
23 | 39 | );
|
24 | 40 |
|
25 | 41 | /**
|
26 |
| - * Formats a string to be used as an attribute's value in raw HTML. |
27 |
| - * |
28 |
| - * It escapes unpaired surrogates (which are allowed in js strings but invalid in HTML), escapes |
29 |
| - * characters that are special in attributes, and surrounds the whole string in double-quotes. |
| 42 | + * Escapes unpaired surrogates (which are allowed in js strings but invalid in HTML) and |
| 43 | + * escapes characters that are special. |
30 | 44 | *
|
31 | 45 | * @param {string} str
|
32 |
| - * @returns {string} Escaped string surrounded by double-quotes. |
33 |
| - * @example const html = `<tag data-value=${escape_html_attr('value')}>...</tag>`; |
| 46 | + * @param {boolean} [is_attr] |
| 47 | + * @returns {string} escaped string |
| 48 | + * @example const html = `<tag data-value="${escape_html('value', true)}">...</tag>`; |
34 | 49 | */
|
35 |
| -export function escape_html_attr(str) { |
36 |
| - const escaped_str = str.replace(escape_html_attr_regex, (match) => { |
| 50 | +export function escape_html(str, is_attr) { |
| 51 | + const dict = is_attr ? escape_html_attr_dict : escape_html_dict; |
| 52 | + const escaped_str = str.replace(is_attr ? escape_html_attr_regex : escape_html_regex, (match) => { |
37 | 53 | if (match.length === 2) {
|
38 | 54 | // valid surrogate pair
|
39 | 55 | return match;
|
40 | 56 | }
|
41 | 57 |
|
42 |
| - return escape_html_attr_dict[match] ?? `&#${match.charCodeAt(0)};`; |
| 58 | + return dict[match] ?? `&#${match.charCodeAt(0)};`; |
43 | 59 | });
|
44 | 60 |
|
45 |
| - return `"${escaped_str}"`; |
| 61 | + return escaped_str; |
46 | 62 | }
|
0 commit comments