From 17491ceb0411147eceda494ffd9f75281897a434 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bal=C3=A1zs=20Re=C3=A9?= Date: Thu, 2 Jan 2025 14:12:42 +0100 Subject: [PATCH 1/2] Stop generating sitemap.xml.gz (#6561) We generate sitemap-index.xml which is also put correctly into robots.txt. However we still provide the old sitemap.xml.gz. But, while this serves no purpose in addition to the index, it can cause problems if Google somehow parses it (for example it's submitted to the Google Search console). For this reason, we stop providing sitemap.xml.gz. - also serve the batched sitemap under the old name sitemap.xml.gz - update comment in robots.txt --- packages/volto/news/6561.bugfix | 1 + packages/volto/src/express-middleware/sitemap.js | 15 ++++++++++++++- packages/volto/src/helpers/Sitemap/Sitemap.js | 11 +++++++++-- 3 files changed, 24 insertions(+), 3 deletions(-) create mode 100644 packages/volto/news/6561.bugfix diff --git a/packages/volto/news/6561.bugfix b/packages/volto/news/6561.bugfix new file mode 100644 index 0000000000..def9b0aaf9 --- /dev/null +++ b/packages/volto/news/6561.bugfix @@ -0,0 +1 @@ +Stop generating sitemap.xml.gz @reebalazs \ No newline at end of file diff --git a/packages/volto/src/express-middleware/sitemap.js b/packages/volto/src/express-middleware/sitemap.js index 59bf641c8f..fedd01d78c 100644 --- a/packages/volto/src/express-middleware/sitemap.js +++ b/packages/volto/src/express-middleware/sitemap.js @@ -47,11 +47,24 @@ export const sitemapIndex = function (req, res, next) { }); }; +export const sitemapIndexCompatibility = function (req, res, next) { + generateSitemapIndex(req, true).then((sitemapIndex) => { + res.set('Content-Type', 'application/x-gzip'); + res.set('Content-Disposition', 'attachment; filename="sitemap.xml.gz"'); + res.send(sitemapIndex); + }); +}; + export default function sitemapMiddleware() { const middleware = express.Router(); - middleware.all('**/sitemap.xml.gz', sitemap); + // For backwards compatibility, and allow a graceful transition for + // sites that are already set up on the Google Search Console, we continue delivering + // the new batched sitemap under the old sitemap.xml.gz name. + middleware.all('**/sitemap.xml.gz', sitemapIndexCompatibility); middleware.all('**/sitemap:batch.xml.gz', sitemap); + // For new setups, `sitemap-index.xml` should be added to the + // Google Search Console. middleware.all('**/sitemap-index.xml', sitemapIndex); middleware.id = 'sitemap.xml.gz'; return middleware; diff --git a/packages/volto/src/helpers/Sitemap/Sitemap.js b/packages/volto/src/helpers/Sitemap/Sitemap.js index 0562cfa5f2..a317925400 100644 --- a/packages/volto/src/helpers/Sitemap/Sitemap.js +++ b/packages/volto/src/helpers/Sitemap/Sitemap.js @@ -61,7 +61,7 @@ export const generateSitemap = (_req, start = 0, size = undefined) => * @param {Object} _req Request object * @return {string} Generated sitemap index */ -export const generateSitemapIndex = (_req) => +export const generateSitemapIndex = (_req, gzip = false) => new Promise((resolve) => { const { settings } = config; const APISUFIX = settings.legacyTraverse ? '' : '/++api++'; @@ -88,7 +88,14 @@ export const generateSitemapIndex = (_req) => const result = ` ${items.join('\n')}\n`; - resolve(result); + + if (gzip) { + zlib.gzip(Buffer.from(result, 'utf8'), (_err, buffer) => { + resolve(buffer); + }); + } else { + resolve(result); + } } }); }); From 13ba1837a7b009e861c4ca9348096b69c6198cb3 Mon Sep 17 00:00:00 2001 From: David Glick Date: Wed, 12 Feb 2025 21:30:02 -0800 Subject: [PATCH 2/2] Update packages/volto/news/6561.bugfix --- packages/volto/news/6561.bugfix | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/volto/news/6561.bugfix b/packages/volto/news/6561.bugfix index def9b0aaf9..4505d28941 100644 --- a/packages/volto/news/6561.bugfix +++ b/packages/volto/news/6561.bugfix @@ -1 +1 @@ -Stop generating sitemap.xml.gz @reebalazs \ No newline at end of file +Serve `sitemap.xml.gz` as an alias for `sitemap-index.xml`. @reebalazs \ No newline at end of file