From 63261fd861461a7a2746ba9bd4158ee1278336c7 Mon Sep 17 00:00:00 2001 From: Travis Brace Date: Wed, 20 Jul 2022 14:29:57 -0400 Subject: [PATCH 1/3] update generatesitemap.py for shtml Hoping adding "shtml" would help treat index.shtml pages the same as index.html pages removing the from sitemap so pages end in / tests shtml as html extension Co-Authored-By: Travis Brace Co-Authored-By: Vincent A. Cicirello <762030+cicirello@users.noreply.github.com> --- generatesitemap.py | 2 +- tests/tests.py | 19 ++++++++++++++----- 2 files changed, 15 insertions(+), 6 deletions(-) diff --git a/generatesitemap.py b/generatesitemap.py index 492fdbd4..1a9d4366 100755 --- a/generatesitemap.py +++ b/generatesitemap.py @@ -113,7 +113,7 @@ def getFileExtension(f) : i = f.rfind(".") return f[i+1:].lower() if i >= 0 and f.rfind("/") < i else None -HTML_EXTENSIONS = { "html", "htm" } +HTML_EXTENSIONS = { "html", "htm", "shtml" } def isHTMLFile(f) : """Checks if the file is an HTML file, diff --git a/tests/tests.py b/tests/tests.py index 48d8c56d..ad51e1b7 100644 --- a/tests/tests.py +++ b/tests/tests.py @@ -46,22 +46,22 @@ def validateDate(s) : class TestGenerateSitemap(unittest.TestCase) : def test_createExtensionSet_htmlOnly(self): - self.assertEqual({"html", "htm"}, gs.createExtensionSet(True, False, set())) + self.assertEqual({"html", "htm", "shtml"}, gs.createExtensionSet(True, False, set())) def test_createExtensionSet_pdfOnly(self): self.assertEqual({"pdf"}, gs.createExtensionSet(False, True, set())) def test_createExtensionSet_htmlAndPdf(self): - self.assertEqual({"html", "htm", "pdf"}, gs.createExtensionSet(True, True, set())) + self.assertEqual({"html", "htm", "shtml", "pdf"}, gs.createExtensionSet(True, True, set())) def test_createExtensionSet_html_and_more(self): - self.assertEqual({"html", "htm", "abc"}, gs.createExtensionSet(True, False, {"abc"})) + self.assertEqual({"html", "htm", "shtml", "abc"}, gs.createExtensionSet(True, False, {"abc"})) def test_createExtensionSet_pdf_and_more(self): self.assertEqual({"pdf", "abc", "def"}, gs.createExtensionSet(False, True, {"abc", "def"})) def test_createExtensionSet_htmlAndPdf_and_more(self): - self.assertEqual({"html", "htm", "pdf", "abc"}, gs.createExtensionSet(True, True, {"abc"})) + self.assertEqual({"html", "htm", "shtml", "pdf", "abc"}, gs.createExtensionSet(True, True, {"abc"})) def test_createExtensionSet_only_additional(self): self.assertEqual({"abc", "def"}, gs.createExtensionSet(False, False, {"abc", "def"})) @@ -133,7 +133,16 @@ def test_isHTMLFile(self) : "b/a.html", "b/a.htm", "b/index.html", - "b/index.htm" + "b/index.htm", + ".shtml", + "a.shtml", + "index.shtml", + "/.shtml", + "/a.shtml", + "/index.shtml", + "b/.shtml", + "b/a.shtml", + "b/index.shtml" ] nonHtmlFilenames = [ ".0html", ".0htm", From 34e1e209b7d5704c98b5253b0bbbaf9900423b36 Mon Sep 17 00:00:00 2001 From: "Vincent A. Cicirello" Date: Fri, 22 Jul 2022 13:23:24 -0400 Subject: [PATCH 2/3] eliminate redundant code --- generatesitemap.py | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/generatesitemap.py b/generatesitemap.py index 1a9d4366..fde2e4fe 100755 --- a/generatesitemap.py +++ b/generatesitemap.py @@ -233,12 +233,7 @@ def urlstring(f, baseUrl, dropExtension=False) : u = f[1:] else : u = f - if len(u) >= 11 and u[-11:] == "/index.html" : - u = u[:-10] - elif u == "index.html" : - u = "" - elif dropExtension and len(u) >= 5 and u[-5:] == ".html" : - u = u[:-5] + u = sortname(u, dropExtension) if len(u) >= 1 and u[0]=="/" and len(baseUrl) >= 1 and baseUrl[-1]=="/" : u = u[1:] elif (len(u)==0 or u[0]!="/") and (len(baseUrl)==0 or baseUrl[-1]!="/") : From 0e18a9ed7d4cf65ed569b52bcc86275b5fb39b98 Mon Sep 17 00:00:00 2001 From: "Vincent A. Cicirello" Date: Mon, 25 Jul 2022 11:32:02 -0400 Subject: [PATCH 3/3] Update CHANGELOG.md --- CHANGELOG.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 1e9a89e3..f02240ad 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,7 +4,7 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). -## [Unreleased] - 2022-06-25 +## [Unreleased] - 2022-07-25 ### Added @@ -16,6 +16,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Removed ### Fixed +* Checks .shtml files for noindex directives, excluding those that do from the sitemap. ### CI/CD