lms/djangoapps/discussion/rest_api/serializers.py

-Original file line number
+Diff line change
@@ Expand Up / @@ -155,13 +155,6 @@ def filter_spam_urls_from_html(html_string): @@
             escaped = domain.replace(".", r"\.")
             domain_pattern = rf"(\w+\.)*{escaped}(?:/\S*)*"
             patterns.append(re.compile(rf"(https?://)?{domain_pattern}", re.IGNORECASE))
-            spaced_parts = list(domain)
-            spaced_pattern = "".join(
-                rf"{re.escape(char)}(?:\s|&nbsp;|\u00A0)*" if char != "." else r"\.(?:\s|&nbsp;|\u00A0)*"
-                for char in spaced_parts
-            )
-            spaced_pattern += r"(?:\/(?:\s|&nbsp;|\u00A0|\w)*)*"
-            patterns.append(re.compile(spaced_pattern, re.IGNORECASE))
         for a_tag in soup.find_all("a", href=True):
             href = a_tag.get('href')
@@ Expand Down @@

-Original file line number
+Diff line change
@@ Expand Up / @@ -1130,7 +1130,3 @@ def test_filter(self): @@
                 filter_spam_urls_from_html('<div>example.com/abc/def</div>')[0],
                 '<div></div>'
             )
-            self.assertEqual(
-                filter_spam_urls_from_html('<div>e x a m p l e . c o m / a b c / d e f</div>')[0],
-                '<div></div>'
-            )

feat: removed spaces check for spam urls #37238

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged

muhammadadeeltajamul merged 1 commit into master from inf-2122

Aug 20, 2025

-Original file line number
+Diff line change
@@ Expand Up / @@ -155,13 +155,6 @@ def filter_spam_urls_from_html(html_string): @@
             escaped = domain.replace(".", r"\.")
             domain_pattern = rf"(\w+\.)*{escaped}(?:/\S*)*"
             patterns.append(re.compile(rf"(https?://)?{domain_pattern}", re.IGNORECASE))
-            spaced_parts = list(domain)
-            spaced_pattern = "".join(
-                rf"{re.escape(char)}(?:\s|&nbsp;|\u00A0)*" if char != "." else r"\.(?:\s|&nbsp;|\u00A0)*"
-                for char in spaced_parts
-            )
-            spaced_pattern += r"(?:\/(?:\s|&nbsp;|\u00A0|\w)*)*"
-            patterns.append(re.compile(spaced_pattern, re.IGNORECASE))
         for a_tag in soup.find_all("a", href=True):
             href = a_tag.get('href')
@@ Expand Down @@

-Original file line number
+Diff line change
@@ Expand Up / @@ -1130,7 +1130,3 @@ def test_filter(self): @@
                 filter_spam_urls_from_html('<div>example.com/abc/def</div>')[0],
                 '<div></div>'
             )
-            self.assertEqual(
-                filter_spam_urls_from_html('<div>e x a m p l e . c o m / a b c / d e f</div>')[0],
-                '<div></div>'
-            )

Provide feedback