From 9fa45bc6430c53bc5ac97a25c618d45ebadfc20e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ale=C5=A1=20Roub=C3=AD=C4=8Dek?= Date: Mon, 30 Sep 2024 15:36:21 +0200 Subject: [PATCH 1/6] fix: Make srcset parsing HTML spec compliant (#16323) --- .../vite/src/node/__tests__/utils.spec.ts | 10 +++ packages/vite/src/node/utils.ts | 69 ++++++++----------- 2 files changed, 39 insertions(+), 40 deletions(-) diff --git a/packages/vite/src/node/__tests__/utils.spec.ts b/packages/vite/src/node/__tests__/utils.spec.ts index ac10229fc1b2e5..e022d04fcf328c 100644 --- a/packages/vite/src/node/__tests__/utils.spec.ts +++ b/packages/vite/src/node/__tests__/utils.spec.ts @@ -359,6 +359,16 @@ describe('processSrcSetSync', () => { processSrcSetSync('https://anydomain/image.jpg', ({ url }) => url), ).toBe(source) }) + + test('should not break URLs with commas in srcSet', async () => { + const source = ` + \thttps://example.com/dpr_1,f_auto,fl_progressive,q_auto,w_100/v1/img 1x, + \thttps://example.com/dpr_2,f_auto,fl_progressive,q_auto,w_100/v1/img\t\t2x + ` + const result = + 'https://example.com/dpr_1,f_auto,fl_progressive,q_auto,w_100/v1/img 1x, https://example.com/dpr_2,f_auto,fl_progressive,q_auto,w_100/v1/img 2x' + expect(processSrcSetSync(source, ({ url }) => url)).toBe(result) + }) }) describe('flattenId', () => { diff --git a/packages/vite/src/node/utils.ts b/packages/vite/src/node/utils.ts index 254b52b468e4fa..d94b1e043d5b37 100644 --- a/packages/vite/src/node/utils.ts +++ b/packages/vite/src/node/utils.ts @@ -720,26 +720,40 @@ interface ImageCandidate { url: string descriptor: string } -const escapedSpaceCharacters = /(?: |\\t|\\n|\\f|\\r)+/g -const imageSetUrlRE = /^(?:[\w-]+\(.*?\)|'.*?'|".*?"|\S*)/ + function joinSrcset(ret: ImageCandidate[]) { return ret .map(({ url, descriptor }) => url + (descriptor ? ` ${descriptor}` : '')) .join(', ') } -// NOTE: The returned `url` should perhaps be decoded so all handled URLs within Vite are consistently decoded. -// However, this may also require a refactor for `cssReplacer` to accept decoded URLs instead. -function splitSrcSetDescriptor(srcs: string): ImageCandidate[] { - return splitSrcSet(srcs) - .map((s) => { - const src = s.replace(escapedSpaceCharacters, ' ').trim() - const url = imageSetUrlRE.exec(src)?.[0] ?? '' +/** + This regex represents a loose rule of an “image candidate string”. + + @see https://html.spec.whatwg.org/multipage/images.html#srcset-attribute - return { - url, - descriptor: src.slice(url.length).trim(), - } + An “image candidate string” roughly consists of the following: + 1. Zero or more whitespace characters. + 2. A non-empty URL that does not start or end with `,`. + 3. Zero or more whitespace characters. + 4. An optional “descriptor” that starts with a whitespace character. + 5. Zero or more whitespace characters. + 6. Each image candidate string is separated by a `,`. + */ +// eslint-disable-next-line regexp/no-super-linear-backtracking +const imageCandidateRegex = /\s*([^,]\S*[^,](?:\s[^,]+)?)\s*(?:,|$)/ +const escapedSpaceCharacters = /(?: |\\t|\\n|\\f|\\r)+/g + +export function parseSrcset(string: string): ImageCandidate[] { + return string + .replace(escapedSpaceCharacters, ' ') + .replace(/\r?\n/, '') + .replace(/,\s+/, ', ') + .split(imageCandidateRegex) + .filter((_part, index) => index % 2 === 1) + .map((part) => { + const [url, descriptor] = part.trim().split(/\s+/) + return { url, descriptor } }) .filter(({ url }) => !!url) } @@ -749,7 +763,7 @@ export function processSrcSet( replacer: (arg: ImageCandidate) => Promise, ): Promise { return Promise.all( - splitSrcSetDescriptor(srcs).map(async ({ url, descriptor }) => ({ + parseSrcset(srcs).map(async ({ url, descriptor }) => ({ url: await replacer({ url, descriptor }), descriptor, })), @@ -761,38 +775,13 @@ export function processSrcSetSync( replacer: (arg: ImageCandidate) => string, ): string { return joinSrcset( - splitSrcSetDescriptor(srcs).map(({ url, descriptor }) => ({ + parseSrcset(srcs).map(({ url, descriptor }) => ({ url: replacer({ url, descriptor }), descriptor, })), ) } -const cleanSrcSetRE = - /(?:url|image|gradient|cross-fade)\([^)]*\)|"([^"]|(?<=\\)")*"|'([^']|(?<=\\)')*'|data:\w+\/[\w.+-]+;base64,[\w+/=]+|\?\S+,/g -function splitSrcSet(srcs: string) { - const parts: string[] = [] - /** - * There could be a ',' inside of: - * - url(data:...) - * - linear-gradient(...) - * - "data:..." - * - data:... - * - query parameter ?... - */ - const cleanedSrcs = srcs.replace(cleanSrcSetRE, blankReplacer) - let startIndex = 0 - let splitIndex: number - do { - splitIndex = cleanedSrcs.indexOf(',', startIndex) - parts.push( - srcs.slice(startIndex, splitIndex !== -1 ? splitIndex : undefined), - ) - startIndex = splitIndex + 1 - } while (splitIndex !== -1) - return parts -} - const windowsDriveRE = /^[A-Z]:/ const replaceWindowsDriveRE = /^([A-Z]):\// const linuxAbsolutePathRE = /^\/[^/]/ From 04505c703158a88ccca34b66e9cff0990ef44d47 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ale=C5=A1=20Roub=C3=AD=C4=8Dek?= Date: Tue, 1 Oct 2024 06:16:02 +0200 Subject: [PATCH 2/6] fix: make srcset parsing also conformant with CSS image-set spec --- .../vite/src/node/__tests__/utils.spec.ts | 38 +++++++++++++++++++ packages/vite/src/node/utils.ts | 13 +++++-- 2 files changed, 48 insertions(+), 3 deletions(-) diff --git a/packages/vite/src/node/__tests__/utils.spec.ts b/packages/vite/src/node/__tests__/utils.spec.ts index e022d04fcf328c..c61e1e6695d9ef 100644 --- a/packages/vite/src/node/__tests__/utils.spec.ts +++ b/packages/vite/src/node/__tests__/utils.spec.ts @@ -369,6 +369,44 @@ describe('processSrcSetSync', () => { 'https://example.com/dpr_1,f_auto,fl_progressive,q_auto,w_100/v1/img 1x, https://example.com/dpr_2,f_auto,fl_progressive,q_auto,w_100/v1/img 2x' expect(processSrcSetSync(source, ({ url }) => url)).toBe(result) }) + + test('should not break URLs with commas in image-set-options', async () => { + const source = `url(https://example.com/dpr_1,f_auto,fl_progressive,q_auto,w_100/v1/img) 1x, + url("https://example.com/dpr_2,f_auto,fl_progressive,q_auto,w_100/v1/img")\t\t2x + ` + const result = + 'url(https://example.com/dpr_1,f_auto,fl_progressive,q_auto,w_100/v1/img) 1x, url("https://example.com/dpr_2,f_auto,fl_progressive,q_auto,w_100/v1/img") 2x' + expect(processSrcSetSync(source, ({ url }) => url)).toBe(result) + }) + + test('should parse image-set-options with resolution', async () => { + const source = ` "foo.png" 1x, + "foo-2x.png" 2x, + "foo-print.png" 600dpi` + const result = '"foo.png" 1x, "foo-2x.png" 2x, "foo-print.png" 600dpi' + expect(processSrcSetSync(source, ({ url }) => url)).toBe(result) + }) + + test('should parse image-set-options with type', async () => { + const source = ` "foo.avif" type("image/avif"), + "foo.jpg" type("image/jpeg") ` + const result = '"foo.avif" type("image/avif"), "foo.jpg" type("image/jpeg")' + expect(processSrcSetSync(source, ({ url }) => url)).toBe(result) + }) + + test('should parse image-set-options with linear-gradient', async () => { + const source = `linear-gradient(cornflowerblue, white) 1x, + url("detailed-gradient.png") 3x` + const result = + 'linear-gradient(cornflowerblue, white) 1x, url("detailed-gradient.png") 3x' + expect(processSrcSetSync(source, ({ url }) => url)).toBe(result) + }) + + test('should parse image-set-options with resolution and type specified', async () => { + const source = `url("picture.png")\t1x\t type("image/jpeg")` + const result = 'url("picture.png") 1x type("image/jpeg")' + expect(processSrcSetSync(source, ({ url }) => url)).toBe(result) + }) }) describe('flattenId', () => { diff --git a/packages/vite/src/node/utils.ts b/packages/vite/src/node/utils.ts index d94b1e043d5b37..037fe5641330c2 100644 --- a/packages/vite/src/node/utils.ts +++ b/packages/vite/src/node/utils.ts @@ -727,10 +727,15 @@ function joinSrcset(ret: ImageCandidate[]) { .join(', ') } +/*! + * Based on https://github.com/sindresorhus/srcset + * MIT License, Copyright (c) Sindre Sorhus (https://sindresorhus.com) + */ /** - This regex represents a loose rule of an “image candidate string”. + This regex represents a loose rule of an “image candidate string” and "image set options". @see https://html.spec.whatwg.org/multipage/images.html#srcset-attribute + @see https://drafts.csswg.org/css-images-4/#image-set-notation An “image candidate string” roughly consists of the following: 1. Zero or more whitespace characters. @@ -752,8 +757,10 @@ export function parseSrcset(string: string): ImageCandidate[] { .split(imageCandidateRegex) .filter((_part, index) => index % 2 === 1) .map((part) => { - const [url, descriptor] = part.trim().split(/\s+/) - return { url, descriptor } + const [url, ...descriptors] = part.trim().split(/\s+/) + // in `image-set()` context descriptor can have `resolution` and `type` parts separated by whitespace, + // we need to join them back together + return { url, descriptor: descriptors.join(' ') } }) .filter(({ url }) => !!url) } From d4faba99f281786f3dfb65168705e2579d122925 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ale=C5=A1=20Roub=C3=AD=C4=8Dek?= Date: Tue, 1 Oct 2024 14:41:53 +0200 Subject: [PATCH 3/6] fix: srcset - rework regex to extract url and descriptor via group matches --- .../vite/src/node/__tests__/utils.spec.ts | 15 ++++++ packages/vite/src/node/utils.ts | 47 +++++++------------ 2 files changed, 32 insertions(+), 30 deletions(-) diff --git a/packages/vite/src/node/__tests__/utils.spec.ts b/packages/vite/src/node/__tests__/utils.spec.ts index c61e1e6695d9ef..784df1f1b38d2e 100644 --- a/packages/vite/src/node/__tests__/utils.spec.ts +++ b/packages/vite/src/node/__tests__/utils.spec.ts @@ -407,6 +407,21 @@ describe('processSrcSetSync', () => { const result = 'url("picture.png") 1x type("image/jpeg")' expect(processSrcSetSync(source, ({ url }) => url)).toBe(result) }) + + test('should capture whole image set options', async () => { + const source = `linear-gradient(cornflowerblue, white) 1x, + url("detailed-gradient.png") 3x` + const expected = [ + 'linear-gradient(cornflowerblue, white)', + 'url("detailed-gradient.png")', + ] + const result: string[] = [] + processSrcSetSync(source, ({ url }) => { + result.push(url) + return url + }) + expect(result).toEqual(expected) + }) }) describe('flattenId', () => { diff --git a/packages/vite/src/node/utils.ts b/packages/vite/src/node/utils.ts index 037fe5641330c2..635deea549e992 100644 --- a/packages/vite/src/node/utils.ts +++ b/packages/vite/src/node/utils.ts @@ -727,42 +727,29 @@ function joinSrcset(ret: ImageCandidate[]) { .join(', ') } -/*! - * Based on https://github.com/sindresorhus/srcset - * MIT License, Copyright (c) Sindre Sorhus (https://sindresorhus.com) - */ -/** - This regex represents a loose rule of an “image candidate string” and "image set options". - - @see https://html.spec.whatwg.org/multipage/images.html#srcset-attribute - @see https://drafts.csswg.org/css-images-4/#image-set-notation - - An “image candidate string” roughly consists of the following: - 1. Zero or more whitespace characters. - 2. A non-empty URL that does not start or end with `,`. - 3. Zero or more whitespace characters. - 4. An optional “descriptor” that starts with a whitespace character. - 5. Zero or more whitespace characters. - 6. Each image candidate string is separated by a `,`. - */ -// eslint-disable-next-line regexp/no-super-linear-backtracking -const imageCandidateRegex = /\s*([^,]\S*[^,](?:\s[^,]+)?)\s*(?:,|$)/ const escapedSpaceCharacters = /(?: |\\t|\\n|\\f|\\r)+/g export function parseSrcset(string: string): ImageCandidate[] { - return string + const matches = string + .trim() .replace(escapedSpaceCharacters, ' ') .replace(/\r?\n/, '') .replace(/,\s+/, ', ') - .split(imageCandidateRegex) - .filter((_part, index) => index % 2 === 1) - .map((part) => { - const [url, ...descriptors] = part.trim().split(/\s+/) - // in `image-set()` context descriptor can have `resolution` and `type` parts separated by whitespace, - // we need to join them back together - return { url, descriptor: descriptors.join(' ') } - }) - .filter(({ url }) => !!url) + .replaceAll(/\s+/g, ' ') + .matchAll( + /** + This regex represents a loose rule of an “image candidate string” and "image set options". + + @see https://html.spec.whatwg.org/multipage/images.html#srcset-attribute + @see https://drafts.csswg.org/css-images-4/#image-set-notation + */ + // eslint-disable-next-line regexp/no-super-linear-backtracking + /\s*(?[\w-]+\([^)]*\)|"[^"]*"|'[^']*'|[^,]\S*[^,])\s*(?[^,]*(?:\stype\([^)]*\))?)\s*(?:,|$)/g, + ) + return Array.from(matches, ({ groups }) => ({ + url: groups.url.trim(), + descriptor: groups.descriptor.trim(), + })).filter(({ url }) => !!url) } export function processSrcSet( From ac100295e2b3203db92b709ddd69f547d200ba18 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ale=C5=A1=20Roub=C3=AD=C4=8Dek?= Date: Tue, 1 Oct 2024 14:46:23 +0200 Subject: [PATCH 4/6] fix possibly undefined groups --- packages/vite/src/node/utils.ts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/packages/vite/src/node/utils.ts b/packages/vite/src/node/utils.ts index 635deea549e992..e7398a3f4ee743 100644 --- a/packages/vite/src/node/utils.ts +++ b/packages/vite/src/node/utils.ts @@ -747,8 +747,8 @@ export function parseSrcset(string: string): ImageCandidate[] { /\s*(?[\w-]+\([^)]*\)|"[^"]*"|'[^']*'|[^,]\S*[^,])\s*(?[^,]*(?:\stype\([^)]*\))?)\s*(?:,|$)/g, ) return Array.from(matches, ({ groups }) => ({ - url: groups.url.trim(), - descriptor: groups.descriptor.trim(), + url: groups?.url?.trim() ?? '', + descriptor: groups?.descriptor?.trim() ?? '', })).filter(({ url }) => !!url) } From 2349c9b0cf0167f48672a8d7ff8239742819ab77 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ale=C5=A1=20Roub=C3=AD=C4=8Dek?= Date: Wed, 2 Oct 2024 04:55:49 +0200 Subject: [PATCH 5/6] improved regex so we can remove eslint ignore rule, improved doc comment for regex --- packages/vite/src/node/utils.ts | 27 +++++++++++++++++---------- 1 file changed, 17 insertions(+), 10 deletions(-) diff --git a/packages/vite/src/node/utils.ts b/packages/vite/src/node/utils.ts index e7398a3f4ee743..3d8c6ad4aacc0a 100644 --- a/packages/vite/src/node/utils.ts +++ b/packages/vite/src/node/utils.ts @@ -727,6 +727,22 @@ function joinSrcset(ret: ImageCandidate[]) { .join(', ') } +/** + This regex represents a loose rule of an “image candidate string” and "image set options". + + @see https://html.spec.whatwg.org/multipage/images.html#srcset-attribute + @see https://drafts.csswg.org/css-images-4/#image-set-notation + + The Regex has named capturing groups `url` and `descriptor`. + The `url` group can be: + * any CSS function + * CSS string (single or double-quoted) + * or URL string. + The `descriptor` is anything after the space and before the comma, + and can have optional `type(...)` for the `image-set` case. + */ +const imageCandidateRegex = + /(?:^|\s)(?[\w-]+\([^)]*\)|"[^"]*"|'[^']*'|[^,]\S*[^,])\s*(?:\s(?[^,\s]+(?:\stype\([^)]*\))?)\s*)?(?:,|$)/g const escapedSpaceCharacters = /(?: |\\t|\\n|\\f|\\r)+/g export function parseSrcset(string: string): ImageCandidate[] { @@ -736,16 +752,7 @@ export function parseSrcset(string: string): ImageCandidate[] { .replace(/\r?\n/, '') .replace(/,\s+/, ', ') .replaceAll(/\s+/g, ' ') - .matchAll( - /** - This regex represents a loose rule of an “image candidate string” and "image set options". - - @see https://html.spec.whatwg.org/multipage/images.html#srcset-attribute - @see https://drafts.csswg.org/css-images-4/#image-set-notation - */ - // eslint-disable-next-line regexp/no-super-linear-backtracking - /\s*(?[\w-]+\([^)]*\)|"[^"]*"|'[^']*'|[^,]\S*[^,])\s*(?[^,]*(?:\stype\([^)]*\))?)\s*(?:,|$)/g, - ) + .matchAll(imageCandidateRegex) return Array.from(matches, ({ groups }) => ({ url: groups?.url?.trim() ?? '', descriptor: groups?.descriptor?.trim() ?? '', From 770d6330cbd2af2feab6379e44c3eec604a253c6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ale=C5=A1=20Roub=C3=AD=C4=8Dek?= Date: Wed, 2 Oct 2024 08:39:14 +0200 Subject: [PATCH 6/6] Makes descriptor regex more generic, added test case for any order of descriptor parts --- packages/vite/src/node/__tests__/utils.spec.ts | 5 +++-- packages/vite/src/node/utils.ts | 7 +++---- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/packages/vite/src/node/__tests__/utils.spec.ts b/packages/vite/src/node/__tests__/utils.spec.ts index 784df1f1b38d2e..76e11050d8d401 100644 --- a/packages/vite/src/node/__tests__/utils.spec.ts +++ b/packages/vite/src/node/__tests__/utils.spec.ts @@ -403,8 +403,9 @@ describe('processSrcSetSync', () => { }) test('should parse image-set-options with resolution and type specified', async () => { - const source = `url("picture.png")\t1x\t type("image/jpeg")` - const result = 'url("picture.png") 1x type("image/jpeg")' + const source = `url("picture.png")\t1x\t type("image/jpeg"), url("picture.png")\t type("image/jpeg")\t2x` + const result = + 'url("picture.png") 1x type("image/jpeg"), url("picture.png") type("image/jpeg") 2x' expect(processSrcSetSync(source, ({ url }) => url)).toBe(result) }) diff --git a/packages/vite/src/node/utils.ts b/packages/vite/src/node/utils.ts index 3d8c6ad4aacc0a..6c54841118fb19 100644 --- a/packages/vite/src/node/utils.ts +++ b/packages/vite/src/node/utils.ts @@ -737,12 +737,11 @@ function joinSrcset(ret: ImageCandidate[]) { The `url` group can be: * any CSS function * CSS string (single or double-quoted) - * or URL string. - The `descriptor` is anything after the space and before the comma, - and can have optional `type(...)` for the `image-set` case. + * URL string (unquoted) + The `descriptor` is anything after the space and before the comma. */ const imageCandidateRegex = - /(?:^|\s)(?[\w-]+\([^)]*\)|"[^"]*"|'[^']*'|[^,]\S*[^,])\s*(?:\s(?[^,\s]+(?:\stype\([^)]*\))?)\s*)?(?:,|$)/g + /(?:^|\s)(?[\w-]+\([^)]*\)|"[^"]*"|'[^']*'|[^,]\S*[^,])\s*(?:\s(?\w[^,]+))?(?:,|$)/g const escapedSpaceCharacters = /(?: |\\t|\\n|\\f|\\r)+/g export function parseSrcset(string: string): ImageCandidate[] {