From b716391527102b436bcf13698d2c96bc62b22869 Mon Sep 17 00:00:00 2001 From: Jeroen Seegers Date: Sun, 24 Mar 2024 10:21:06 +0100 Subject: [PATCH 1/2] Add equal-sign to parse safe characters --- src/protego.py | 2 +- tests/test_protego.py | 10 ++++++++++ 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/src/protego.py b/src/protego.py index 3b2c0fe..6f89123 100644 --- a/src/protego.py +++ b/src/protego.py @@ -184,7 +184,7 @@ def _quote_pattern(self, pattern): parts = urlparse(pattern) pattern = self._unquote(parts.path, ignore="/*$%") - pattern = quote(pattern, safe="/*%") + pattern = quote(pattern, safe="/*%=") parts = ParseResult( "", "", pattern + last_char, parts.params, parts.query, parts.fragment diff --git a/tests/test_protego.py b/tests/test_protego.py index 3571d11..aa15c85 100644 --- a/tests/test_protego.py +++ b/tests/test_protego.py @@ -1141,6 +1141,16 @@ def test_parse_time_period(self): self.assertEqual(start_time, time(5, 0)) self.assertEqual(end_time, time(6, 0)) + def test_disallow_query_wildcard(self): + content = ( + "User-agent: * \n" + "Disallow: /*s=" + ) + rp = Protego.parse(content=content) + self.assertTrue(rp.can_fetch("https://www.site.local/", "*")) + self.assertTrue(rp.can_fetch("https://www.site.local/s/", "*")) + self.assertFalse(rp.can_fetch("https://www.site.local/?s=asd", "*")) + @pytest.mark.parametrize( "allow,disallow,url,allowed", From 37546bf276e2b3d412de97193b8b1e4188670708 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adri=C3=A1n=20Chaves?= Date: Thu, 4 Apr 2024 11:40:19 +0200 Subject: [PATCH 2/2] Apply black --- tests/test_protego.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/tests/test_protego.py b/tests/test_protego.py index aa15c85..9f6520e 100644 --- a/tests/test_protego.py +++ b/tests/test_protego.py @@ -1142,10 +1142,7 @@ def test_parse_time_period(self): self.assertEqual(end_time, time(6, 0)) def test_disallow_query_wildcard(self): - content = ( - "User-agent: * \n" - "Disallow: /*s=" - ) + content = "User-agent: * \nDisallow: /*s=" rp = Protego.parse(content=content) self.assertTrue(rp.can_fetch("https://www.site.local/", "*")) self.assertTrue(rp.can_fetch("https://www.site.local/s/", "*"))