diff --git a/.changeset/famous-seals-camp.md b/.changeset/famous-seals-camp.md
new file mode 100644
index 000000000000..b1703ee45ee0
--- /dev/null
+++ b/.changeset/famous-seals-camp.md
@@ -0,0 +1,5 @@
+---
+"@astrojs/sitemap": patch
+---
+
+Improves performance when generating the sitemap data
diff --git a/packages/integrations/sitemap/src/generate-sitemap.ts b/packages/integrations/sitemap/src/generate-sitemap.ts
index b10771ce481a..abd5d2c6ff49 100644
--- a/packages/integrations/sitemap/src/generate-sitemap.ts
+++ b/packages/integrations/sitemap/src/generate-sitemap.ts
@@ -1,51 +1,77 @@
import type { EnumChangefreq } from 'sitemap';
import type { SitemapItem, SitemapOptions } from './index.js';
-import { parseUrl } from './utils/parse-url.js';
+import { parseI18nUrl } from './utils/parse-i18n-url.js';
/** Construct sitemap.xml given a set of URLs */
-export function generateSitemap(pages: string[], finalSiteUrl: string, opts: SitemapOptions) {
- const { changefreq, priority, lastmod: lastmodSrc, i18n } = opts!;
+export function generateSitemap(pages: string[], finalSiteUrl: string, opts?: SitemapOptions) {
+ const { changefreq, priority, lastmod: lastmodSrc, i18n } = opts ?? {};
// TODO: find way to respect URLs here
const urls = [...pages];
urls.sort((a, b) => a.localeCompare(b, 'en', { numeric: true })); // sort alphabetically so sitemap is same each time
const lastmod = lastmodSrc?.toISOString();
- const { locales, defaultLocale } = i18n || {};
- const localeCodes = Object.keys(locales || {});
+ // Parse URLs for i18n matching later
+ const { defaultLocale, locales } = i18n ?? {};
+ let getI18nLinks: GetI18nLinks | undefined;
+ if (defaultLocale && locales) {
+ getI18nLinks = createGetI18nLinks(urls, defaultLocale, locales, finalSiteUrl);
+ }
- const getPath = (url: string) => {
- const result = parseUrl(url, i18n?.defaultLocale || '', localeCodes, finalSiteUrl);
- return result?.path;
- };
- const getLocale = (url: string) => {
- const result = parseUrl(url, i18n?.defaultLocale || '', localeCodes, finalSiteUrl);
- return result?.locale;
- };
+ const urlData: SitemapItem[] = urls.map((url, i) => ({
+ url,
+ links: getI18nLinks?.(i),
+ lastmod,
+ priority,
+ changefreq: changefreq as EnumChangefreq,
+ }));
+
+ return urlData;
+}
+
+type GetI18nLinks = (urlIndex: number) => SitemapItem['links'] | undefined;
+
+function createGetI18nLinks(
+ urls: string[],
+ defaultLocale: string,
+ locales: Record,
+ finalSiteUrl: string
+): GetI18nLinks {
+ // `parsedI18nUrls` will have the same length as `urls`, matching correspondingly
+ const parsedI18nUrls = urls.map((url) => parseI18nUrl(url, defaultLocale, locales, finalSiteUrl));
+ // Cache as multiple i18n URLs with the same path will have the same links
+ const i18nPathToLinksCache = new Map();
- const urlData: SitemapItem[] = urls.map((url) => {
- let links;
- if (defaultLocale && locales) {
- const currentPath = getPath(url);
- if (currentPath) {
- const filtered = urls.filter((subUrl) => getPath(subUrl) === currentPath);
- if (filtered.length > 1) {
- links = filtered.map((subUrl) => ({
- url: subUrl,
- lang: locales[getLocale(subUrl)!],
- }));
- }
+ return (urlIndex) => {
+ const i18nUrl = parsedI18nUrls[urlIndex];
+ if (!i18nUrl) {
+ return undefined;
+ }
+
+ const cached = i18nPathToLinksCache.get(i18nUrl.path);
+ if (cached) {
+ return cached;
+ }
+
+ // Find all URLs with the same path (without the locale part), e.g. /en/foo and /es/foo
+ const links: NonNullable = [];
+ for (let i = 0; i < parsedI18nUrls.length; i++) {
+ const parsed = parsedI18nUrls[i];
+ if (parsed?.path === i18nUrl.path) {
+ links.push({
+ url: urls[i],
+ lang: locales[parsed.locale],
+ });
}
}
- return {
- url,
- links,
- lastmod,
- priority,
- changefreq: changefreq as EnumChangefreq,
- };
- });
+ // If 0 or 1 (which is itself), return undefined to not create any links.
+ // We also don't need to cache this as we know there's no other URLs that would've match this.
+ if (links.length <= 1) {
+ return undefined;
+ }
- return urlData;
+ i18nPathToLinksCache.set(i18nUrl.path, links);
+ return links;
+ };
}
diff --git a/packages/integrations/sitemap/src/utils/parse-i18n-url.ts b/packages/integrations/sitemap/src/utils/parse-i18n-url.ts
new file mode 100644
index 000000000000..5b7ebf78550d
--- /dev/null
+++ b/packages/integrations/sitemap/src/utils/parse-i18n-url.ts
@@ -0,0 +1,42 @@
+interface ParsedI18nUrl {
+ locale: string;
+ path: string;
+}
+
+// NOTE: The parameters have been schema-validated with Zod
+export function parseI18nUrl(
+ url: string,
+ defaultLocale: string,
+ locales: Record,
+ base: string
+): ParsedI18nUrl | undefined {
+ if (!url.startsWith(base)) {
+ return undefined;
+ }
+
+ let s = url.slice(base.length);
+
+ // Handle root URL
+ if (!s || s === '/') {
+ return { locale: defaultLocale, path: '/' };
+ }
+
+ if (s[0] !== '/') {
+ s = '/' + s;
+ }
+
+ // Get locale from path, e.g.
+ // "/en-US/" -> "en-US"
+ // "/en-US/foo" -> "en-US"
+ const locale = s.split('/')[1];
+ if (locale in locales) {
+ // "/en-US/foo" -> "/foo"
+ let path = s.slice(1 + locale.length);
+ if (!path) {
+ path = '/';
+ }
+ return { locale, path };
+ }
+
+ return { locale: defaultLocale, path: s };
+}
diff --git a/packages/integrations/sitemap/src/utils/parse-url.ts b/packages/integrations/sitemap/src/utils/parse-url.ts
deleted file mode 100644
index f9189cf7d60d..000000000000
--- a/packages/integrations/sitemap/src/utils/parse-url.ts
+++ /dev/null
@@ -1,39 +0,0 @@
-export const parseUrl = (
- url: string,
- defaultLocale: string,
- localeCodes: string[],
- base: string
-) => {
- if (
- !url ||
- !defaultLocale ||
- localeCodes.length === 0 ||
- localeCodes.some((key) => !key) ||
- !base
- ) {
- throw new Error('parseUrl: some parameters are empty');
- }
- if (url.indexOf(base) !== 0) {
- return undefined;
- }
- let s = url.replace(base, '');
- if (!s || s === '/') {
- return { locale: defaultLocale, path: '/' };
- }
- if (!s.startsWith('/')) {
- s = '/' + s;
- }
- const a = s.split('/');
- const locale = a[1];
- if (localeCodes.some((key) => key === locale)) {
- let path = a.slice(2).join('/');
- if (path === '//') {
- path = '/';
- }
- if (path !== '/' && !path.startsWith('/')) {
- path = '/' + path;
- }
- return { locale, path };
- }
- return { locale: defaultLocale, path: s };
-};
diff --git a/packages/integrations/sitemap/test/units/generate-sitemap.test.js b/packages/integrations/sitemap/test/units/generate-sitemap.test.js
new file mode 100644
index 000000000000..2af0e42ab8a4
--- /dev/null
+++ b/packages/integrations/sitemap/test/units/generate-sitemap.test.js
@@ -0,0 +1,147 @@
+import assert from 'node:assert/strict';
+import { describe, it } from 'node:test';
+import { generateSitemap } from '../../dist/generate-sitemap.js';
+
+const site = 'http://example.com';
+
+describe('generateSitemap', () => {
+ describe('basic', () => {
+ it('works', () => {
+ const items = generateSitemap(
+ [
+ // All pages
+ `${site}/a`,
+ `${site}/b`,
+ `${site}/c`,
+ ],
+ site
+ );
+ assert.equal(items.length, 3);
+ assert.equal(items[0].url, `${site}/a`);
+ assert.equal(items[1].url, `${site}/b`);
+ assert.equal(items[2].url, `${site}/c`);
+ });
+
+ it('sorts the items', () => {
+ const items = generateSitemap(
+ [
+ // All pages
+ `${site}/c`,
+ `${site}/a`,
+ `${site}/b`,
+ ],
+ site
+ );
+ assert.equal(items.length, 3);
+ assert.equal(items[0].url, `${site}/a`);
+ assert.equal(items[1].url, `${site}/b`);
+ assert.equal(items[2].url, `${site}/c`);
+ });
+
+ it('sitemap props are passed to items', () => {
+ const now = new Date();
+ const items = generateSitemap(
+ [
+ // All pages
+ `${site}/a`,
+ `${site}/b`,
+ `${site}/c`,
+ ],
+ site,
+ {
+ changefreq: 'monthly',
+ lastmod: now,
+ priority: 0.5,
+ }
+ );
+
+ assert.equal(items.length, 3);
+
+ assert.equal(items[0].url, `${site}/a`);
+ assert.equal(items[0].changefreq, 'monthly');
+ assert.equal(items[0].lastmod, now.toISOString());
+ assert.equal(items[0].priority, 0.5);
+
+ assert.equal(items[1].url, `${site}/b`);
+ assert.equal(items[1].changefreq, 'monthly');
+ assert.equal(items[1].lastmod, now.toISOString());
+ assert.equal(items[1].priority, 0.5);
+
+ assert.equal(items[2].url, `${site}/c`);
+ assert.equal(items[2].changefreq, 'monthly');
+ assert.equal(items[2].lastmod, now.toISOString());
+ assert.equal(items[2].priority, 0.5);
+ });
+ });
+
+ describe('i18n', () => {
+ it('works', () => {
+ const items = generateSitemap(
+ [
+ // All pages
+ `${site}/a`,
+ `${site}/b`,
+ `${site}/c`,
+ `${site}/es/a`,
+ `${site}/es/b`,
+ `${site}/es/c`,
+ `${site}/fr/a`,
+ `${site}/fr/b`,
+ // `${site}/fr-CA/c`, (intentionally missing for testing)
+ ],
+ site,
+ {
+ i18n: {
+ defaultLocale: 'en',
+ locales: {
+ en: 'en-US',
+ es: 'es-ES',
+ fr: 'fr-CA',
+ },
+ },
+ }
+ );
+
+ assert.equal(items.length, 8);
+
+ const aLinks = [
+ { url: `${site}/a`, lang: 'en-US' },
+ { url: `${site}/es/a`, lang: 'es-ES' },
+ { url: `${site}/fr/a`, lang: 'fr-CA' },
+ ];
+ const bLinks = [
+ { url: `${site}/b`, lang: 'en-US' },
+ { url: `${site}/es/b`, lang: 'es-ES' },
+ { url: `${site}/fr/b`, lang: 'fr-CA' },
+ ];
+ const cLinks = [
+ { url: `${site}/c`, lang: 'en-US' },
+ { url: `${site}/es/c`, lang: 'es-ES' },
+ ];
+
+ assert.equal(items[0].url, `${site}/a`);
+ assert.deepEqual(items[0].links, aLinks);
+
+ assert.equal(items[1].url, `${site}/b`);
+ assert.deepEqual(items[1].links, bLinks);
+
+ assert.equal(items[2].url, `${site}/c`);
+ assert.deepEqual(items[2].links, cLinks);
+
+ assert.equal(items[3].url, `${site}/es/a`);
+ assert.deepEqual(items[3].links, aLinks);
+
+ assert.equal(items[4].url, `${site}/es/b`);
+ assert.deepEqual(items[4].links, bLinks);
+
+ assert.equal(items[5].url, `${site}/es/c`);
+ assert.deepEqual(items[5].links, cLinks);
+
+ assert.equal(items[6].url, `${site}/fr/a`);
+ assert.deepEqual(items[6].links, aLinks);
+
+ assert.equal(items[7].url, `${site}/fr/b`);
+ assert.deepEqual(items[7].links, bLinks);
+ });
+ });
+});