From 6ef8dcdf7d90a0b1fefd3546266d3029f5825a2b Mon Sep 17 00:00:00 2001 From: Harlan Wilton Date: Mon, 2 Sep 2024 16:11:38 +1000 Subject: [PATCH] fix: improve entry `loc` normalizing (#354) --- .../nitro/sitemap/builder/sitemap-index.ts | 2 +- src/runtime/nitro/sitemap/builder/sitemap.ts | 10 ++-- src/runtime/nitro/sitemap/urlset/normalise.ts | 31 +++++++++--- src/runtime/types.ts | 4 ++ test/integration/single/news.test.ts | 22 +++++++++ test/integration/single/queryRoutes.test.ts | 2 +- test/unit/i18n.test.ts | 11 ++++- test/unit/normalise.test.ts | 47 +++++++++++++++++++ 8 files changed, 115 insertions(+), 14 deletions(-) diff --git a/src/runtime/nitro/sitemap/builder/sitemap-index.ts b/src/runtime/nitro/sitemap/builder/sitemap-index.ts index a7922788..041663c1 100644 --- a/src/runtime/nitro/sitemap/builder/sitemap-index.ts +++ b/src/runtime/nitro/sitemap/builder/sitemap-index.ts @@ -39,7 +39,7 @@ export async function buildSitemapIndex(resolvers: NitroUrlResolvers, runtimeCon const sitemap = sitemaps.chunks // we need to figure out how many entries we're dealing with const sources = await resolveSitemapSources(await globalSitemapSources()) - const normalisedUrls = resolveSitemapEntries(sitemap, sources, { autoI18n, isI18nMapped }) + const normalisedUrls = resolveSitemapEntries(sitemap, sources, { autoI18n, isI18nMapped }, resolvers) // 2. enhance const enhancedUrls: ResolvedSitemapUrl[] = normalisedUrls .map(e => defu(e, sitemap.defaults) as ResolvedSitemapUrl) diff --git a/src/runtime/nitro/sitemap/builder/sitemap.ts b/src/runtime/nitro/sitemap/builder/sitemap.ts index 1d481c9f..3c6351e6 100644 --- a/src/runtime/nitro/sitemap/builder/sitemap.ts +++ b/src/runtime/nitro/sitemap/builder/sitemap.ts @@ -21,7 +21,7 @@ export interface NormalizedI18n extends ResolvedSitemapUrl { _index?: number } -export function resolveSitemapEntries(sitemap: SitemapDefinition, sources: SitemapSourceResolved[], runtimeConfig: Pick): ResolvedSitemapUrl[] { +export function resolveSitemapEntries(sitemap: SitemapDefinition, sources: SitemapSourceResolved[], runtimeConfig: Pick, resolvers?: NitroUrlResolvers): ResolvedSitemapUrl[] { const { autoI18n, isI18nMapped, @@ -32,7 +32,7 @@ export function resolveSitemapEntries(sitemap: SitemapDefinition, sources: Sitem }) // 1. normalise const _urls = sources.flatMap(e => e.urls).map((_e) => { - const e = preNormalizeEntry(_e) + const e = preNormalizeEntry(_e, resolvers) if (!e.loc || !filterPath(e.loc)) return false return e @@ -46,7 +46,7 @@ export function resolveSitemapEntries(sitemap: SitemapDefinition, sources: Sitem validI18nUrlsForTransform = _urls.map((_e, i) => { if (_e._abs) return false - const split = splitForLocales(_e.loc, localeCodes) + const split = splitForLocales(_e._relativeLoc, localeCodes) let localeCode = split[0] const pathWithoutPrefix = split[1] if (!localeCode) @@ -149,7 +149,7 @@ export function resolveSitemapEntries(sitemap: SitemapDefinition, sources: Sitem href, } }).filter(Boolean), - }) + }, resolvers) if (e._locale.code === newEntry._locale.code) { // replace _urls[e._index] = newEntry @@ -226,7 +226,7 @@ export async function buildSitemapUrls(sitemap: SitemapDefinition, resolvers: Ni sources.push(...await childSitemapSources(sitemap)) const resolvedSources = await resolveSitemapSources(sources, resolvers.event) - const enhancedUrls = resolveSitemapEntries(sitemap, resolvedSources, { autoI18n, isI18nMapped }) + const enhancedUrls = resolveSitemapEntries(sitemap, resolvedSources, { autoI18n, isI18nMapped }, resolvers) // 3. filtered urls // TODO make sure include and exclude start with baseURL? const filteredUrls = enhancedUrls.filter((e) => { diff --git a/src/runtime/nitro/sitemap/urlset/normalise.ts b/src/runtime/nitro/sitemap/urlset/normalise.ts index 87ebbe6a..c0d92bd0 100644 --- a/src/runtime/nitro/sitemap/urlset/normalise.ts +++ b/src/runtime/nitro/sitemap/urlset/normalise.ts @@ -1,4 +1,13 @@ -import { hasProtocol, parsePath, parseURL } from 'ufo' +import { + encodePath, + hasProtocol, + parsePath, + parseQuery, + parseURL, + stringifyParsedURL, + stringifyQuery, + withoutTrailingSlash, +} from 'ufo' import { defu } from 'defu' import type { AlternativeEntry, @@ -27,7 +36,7 @@ function removeTrailingSlash(s: string) { return s.replace(/\/(\?|#|$)/, '$1') } -export function preNormalizeEntry(_e: SitemapUrl | string): ResolvedSitemapUrl { +export function preNormalizeEntry(_e: SitemapUrl | string, resolvers?: NitroUrlResolvers): ResolvedSitemapUrl { const e = (typeof _e === 'string' ? { loc: _e } : { ..._e }) as ResolvedSitemapUrl if (e.url && !e.loc) { e.loc = e.url @@ -45,14 +54,24 @@ export function preNormalizeEntry(_e: SitemapUrl | string): ResolvedSitemapUrl { catch (e) { e._path = null } - if (e._path?.pathname === '') - e.loc = `${e.loc}/` if (e._path) { - e._key = `${e._sitemap || ''}${e._path?.pathname || '/'}${e._path.search}` + const query = parseQuery(e._path.search) + const qs = stringifyQuery(query) + e._relativeLoc = `${encodePath(e._path?.pathname)}${qs.length ? `?${qs}` : ''}` + if (e._path.host) { + e.loc = stringifyParsedURL(e._path) + } + else { + e.loc = e._relativeLoc + } } else { - e._key = e.loc + e.loc = encodeURI(e.loc) } + if (e.loc === '') + e.loc = `/` + e.loc = resolve(e.loc, resolvers) + e._key = `${e._sitemap || ''}${withoutTrailingSlash(e.loc)}` return e as ResolvedSitemapUrl } diff --git a/src/runtime/types.ts b/src/runtime/types.ts index 3d5994b5..ddb4abd9 100644 --- a/src/runtime/types.ts +++ b/src/runtime/types.ts @@ -265,6 +265,10 @@ export type ResolvedSitemapUrl = Omit & Required { expect(sitemap).toMatchInlineSnapshot(` " + + https://harlanzw.com/ + + + Harlan Wilton + en + + Sitemap test + 2008-12-23 + + https://nuxtseo.com/ diff --git a/test/integration/single/queryRoutes.test.ts b/test/integration/single/queryRoutes.test.ts index 13fbf775..5ff46a4b 100644 --- a/test/integration/single/queryRoutes.test.ts +++ b/test/integration/single/queryRoutes.test.ts @@ -23,6 +23,6 @@ describe('query routes', () => { expect(sitemap).toContain('https://nuxtseo.com/query-no-slash?foo=bar') expect(sitemap).toContain('https://nuxtseo.com/query-slash?foo=bar') - expect(sitemap).toContain('https://nuxtseo.com/query-slash-hash?foo=bar#hash') + expect(sitemap).not.toContain('https://nuxtseo.com/query-slash-hash?foo=bar#hash') }, 60000) }) diff --git a/test/unit/i18n.test.ts b/test/unit/i18n.test.ts index 5483736f..829ae5d9 100644 --- a/test/unit/i18n.test.ts +++ b/test/unit/i18n.test.ts @@ -85,6 +85,7 @@ describe('i18n', () => { "pathname": "/__sitemap/url", "search": "", }, + "_relativeLoc": "/__sitemap/url", "changefreq": "weekly", "loc": "/__sitemap/url", }, @@ -133,6 +134,7 @@ describe('i18n', () => { "search": "", }, "_pathWithoutPrefix": "/__sitemap/url", + "_relativeLoc": "/__sitemap/url", "_sitemap": "en-US", "alternatives": [ { @@ -168,6 +170,7 @@ describe('i18n', () => { "search": "", }, "_pathWithoutPrefix": "/__sitemap/url", + "_relativeLoc": "/fr/__sitemap/url", "_sitemap": "fr-FR", "alternatives": [ { @@ -243,6 +246,7 @@ describe('i18n', () => { "search": "", }, "_pathWithoutPrefix": "/dynamic/foo", + "_relativeLoc": "/en/dynamic/foo", "_sitemap": "en-US", "alternatives": [ { @@ -277,6 +281,7 @@ describe('i18n', () => { "search": "", }, "_pathWithoutPrefix": "/dynamic/foo", + "_relativeLoc": "/fr/dynamic/foo", "_sitemap": "fr-FR", "alternatives": [ { @@ -311,6 +316,7 @@ describe('i18n', () => { "search": "", }, "_pathWithoutPrefix": "endless-dungeon", + "_relativeLoc": "endless-dungeon", "_sitemap": "en-US", "alternatives": [ { @@ -345,6 +351,7 @@ describe('i18n', () => { "search": "", }, "_pathWithoutPrefix": "english-url", + "_relativeLoc": "english-url", "_sitemap": "en-US", "alternatives": [ { @@ -360,7 +367,7 @@ describe('i18n', () => { }, { "_abs": true, - "_key": "/abc/def", + "_key": "https://www.somedomain.com/abc/def", "_path": { "auth": "", "hash": "", @@ -370,6 +377,7 @@ describe('i18n', () => { "search": "", Symbol(ufo:protocolRelative): false, }, + "_relativeLoc": "/abc/def", "loc": "https://www.somedomain.com/abc/def", }, { @@ -389,6 +397,7 @@ describe('i18n', () => { "search": "", }, "_pathWithoutPrefix": "endless-dungeon", + "_relativeLoc": "/fr/endless-dungeon", "_sitemap": "fr-FR", "alternatives": [ { diff --git a/test/unit/normalise.test.ts b/test/unit/normalise.test.ts index 13ca45ac..004b5b41 100644 --- a/test/unit/normalise.test.ts +++ b/test/unit/normalise.test.ts @@ -13,6 +13,7 @@ describe('normalise', () => { "pathname": "/query", "search": "?foo=bar", }, + "_relativeLoc": "/query?foo=bar", "loc": "/query?foo=bar", } `) @@ -26,8 +27,54 @@ describe('normalise', () => { "pathname": "/query", "search": "?foo=bar", }, + "_relativeLoc": "/query?foo=bar", "loc": "/query?foo=bar", } `) }) + + it('encoding', () => { + const normalisedWithoutSlash = preNormalizeEntry({ loc: '/this/is a test' }) + expect(normalisedWithoutSlash).toMatchInlineSnapshot(` + { + "_abs": false, + "_key": "/this/is%20a%20test", + "_path": { + "hash": "", + "pathname": "/this/is a test", + "search": "", + }, + "_relativeLoc": "/this/is%20a%20test", + "loc": "/this/is%20a%20test", + } + `) + const withQuery = preNormalizeEntry({ loc: '/this/is a test?withAQuery=foo' }) + expect(withQuery).toMatchInlineSnapshot(` + { + "_abs": false, + "_key": "/this/is%20a%20test?withAQuery=foo", + "_path": { + "hash": "", + "pathname": "/this/is a test", + "search": "?withAQuery=foo", + }, + "_relativeLoc": "/this/is%20a%20test?withAQuery=foo", + "loc": "/this/is%20a%20test?withAQuery=foo", + } + `) + const withQueryWeird = preNormalizeEntry({ loc: '/this/is a test?with A some weirdformat=foo' }) + expect(withQueryWeird).toMatchInlineSnapshot(` + { + "_abs": false, + "_key": "/this/is%20a%20test?with+A+some+weirdformat=foo", + "_path": { + "hash": "", + "pathname": "/this/is a test", + "search": "?with A some weirdformat=foo", + }, + "_relativeLoc": "/this/is%20a%20test?with+A+some+weirdformat=foo", + "loc": "/this/is%20a%20test?with+A+some+weirdformat=foo", + } + `) + }) })