Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Better tests for spam patterns and signatures #2605

Merged
merged 1 commit into from
Nov 3, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
56 changes: 30 additions & 26 deletions peterbecom/plog/spamprevention.py
Original file line number Diff line number Diff line change
@@ -1,20 +1,27 @@
import re

import bleach
from django.conf import settings
from django.db.models import F

from peterbecom.plog.models import SpamCommentPattern
from peterbecom.plog.models import SpamCommentPattern, SpamCommentSignature


def increment_pattern(id: int):
SpamCommentPattern.objects.filter(id=id).update(kills=F("kills") + 1)


def increment_signature(id: int):
SpamCommentSignature.objects.filter(id=id).update(kills=F("kills") + 1)


def contains_spam_url_patterns(text):
html = bleach.clean(text)

problems = []

qs = SpamCommentPattern.objects.filter(is_url_pattern=True).values_list(
"pattern", flat=True
)
regex = re.compile(r"|".join([re.escape(x) for x in qs]))
qs = SpamCommentPattern.objects.filter(is_url_pattern=True).values("pattern", "id")
patterns_map = {x["pattern"]: x["id"] for x in qs}
regex = re.compile(r"|".join([re.escape(x) for x in patterns_map.keys()]))

def scrutinize_link(attrs, new, **kwargs):
href_key = (None, "href")
Expand All @@ -32,38 +39,35 @@ def scrutinize_link(attrs, new, **kwargs):
# Bail if it's not a HTTP URL, such as ssh:// or ftp://
return

found = regex.findall(href)
if found:
for found in regex.findall(href):
problems.append(found)
increment_pattern(patterns_map[found])

bleach.linkify(html, callbacks=[scrutinize_link])
return bool(problems)


def contains_spam_patterns(text):
qs = SpamCommentPattern.objects.filter(
is_url_pattern=False, is_regex=False
).values_list("pattern", flat=True)
qs = SpamCommentPattern.objects.filter(is_url_pattern=False, is_regex=False).values(
"pattern", "id"
)
for pattern in qs:
if pattern in text:
if pattern["pattern"] in text:
increment_pattern(pattern["id"])
return True
return False


def is_trash_commenter(**params):
def match(pattern, value):
if hasattr(pattern, "search"):
# It's a regex!
return bool(pattern.search(value))
return value == pattern
def is_trash_commenter(name, email):
for signature in SpamCommentSignature.objects.all().values("id", "name", "email"):
if signature["name"] is not None and name is not None:
if signature["name"] == name:
increment_signature(signature["id"])
return True

for combo in settings.TRASH_COMMENT_COMBINATIONS:
assert combo
assert None not in combo.values()

# We can only check on things that are in params.
common_keys = set(combo) & set([k for k, v in params.items() if v is not None])
if common_keys and all(match(combo[k], params[k]) for k in common_keys):
return True
if signature["email"] is not None and email is not None:
if signature["email"] == email:
increment_signature(signature["id"])
return True

return False
4 changes: 4 additions & 0 deletions peterbecom/plog/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -140,6 +140,8 @@ def prepare_json(request):
@ensure_csrf_cookie
@require_POST
def preview_json(request):
raise Exception("deprecated")

comment = request.POST.get("comment", "").strip()
name = request.POST.get("name", "").strip()
email = request.POST.get("email", "").strip()
Expand All @@ -161,6 +163,8 @@ def preview_json(request):
@require_POST
@transaction.atomic
def submit_json(request, oid):
raise Exception("deprecated")

post = get_object_or_404(BlogItem, oid=oid, archived__isnull=True)
if post.disallow_comments:
return http.HttpResponseBadRequest("No comments please")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,12 @@
from django.urls import reverse
from django.utils import timezone

from peterbecom.plog.models import BlogComment, BlogItem
from peterbecom.plog.models import (
BlogComment,
BlogItem,
SpamCommentPattern,
SpamCommentSignature,
)


@pytest.mark.django_db
Expand Down Expand Up @@ -66,6 +71,119 @@ def test_submit_comment_x_forward_for(client):
assert blog_comment.ip_address == "2601:201:8a7e:38e0:79f6:4326:ff50:23b3"


@pytest.mark.django_db
def test_spamy_comment(client):
url = reverse("publicapi:submit_comment")
blogitem = BlogItem.objects.create(
oid="oid",
title="Title",
text="*Text*",
text_rendered=BlogItem.render("*Text*", "markdown", ""),
display_format="markdown",
summary="Summary",
pub_date=timezone.now(),
)
pattern = SpamCommentPattern.objects.create(
is_url_pattern=True,
pattern="example.com",
)
response = client.post(
url,
{"oid": blogitem.oid, "comment": "Comment text http://example.com"},
)
assert response.status_code == 400
assert response.content.decode("utf-8") == "Looks too spammy"
pattern.refresh_from_db()
assert pattern.kills == 1

pattern = SpamCommentPattern.objects.create(
pattern="skype",
)
response = client.post(
url,
{"oid": blogitem.oid, "comment": "Don't mention skype"},
)
assert response.status_code == 400
assert response.content.decode("utf-8") == "Looks too spammy"
pattern.refresh_from_db()
assert pattern.kills == 1


@pytest.mark.django_db
def test_trash_commenter(client):
url = reverse("publicapi:submit_comment")
blogitem = BlogItem.objects.create(
oid="oid",
title="Title",
text="*Text*",
text_rendered=BlogItem.render("*Text*", "markdown", ""),
display_format="markdown",
summary="Summary",
pub_date=timezone.now(),
)
signature = SpamCommentSignature.objects.create(
name="John Doe", email="john@example.com"
)
response = client.post(
url,
{
"oid": blogitem.oid,
"comment": "Comment text",
"name": "John Doe",
"email": "john@example.com",
},
)
assert response.status_code == 400
assert response.json()["trash"]
signature.refresh_from_db()
assert signature.kills == 1

signature.name = None
signature.save()
response = client.post(
url,
{
"oid": blogitem.oid,
"comment": "Comment text",
"name": "Whatever",
"email": "john@example.com",
},
)
assert response.status_code == 400
assert response.json()["trash"]
signature.refresh_from_db()
assert signature.kills == 2

signature.email = None
signature.name = "John Doe"
signature.save()
response = client.post(
url,
{
"oid": blogitem.oid,
"comment": "Comment text",
"name": "John Doe",
"email": "",
},
)
assert response.status_code == 400
assert response.json()["trash"]
signature.refresh_from_db()
assert signature.kills == 3

signature.delete()
response = client.post(
url,
{
"oid": blogitem.oid,
"comment": "Comment text",
"name": "John Doe",
"email": "john@example.com",
},
)
assert response.status_code == 200


@pytest.mark.django_db
def test_submit_with_name_and_email(client):
url = reverse("publicapi:submit_comment")
Expand Down
4 changes: 1 addition & 3 deletions peterbecom/publicapi/views/comments.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,9 +72,7 @@ def make_cache_key(hash):

user_agent = request.headers.get("User-Agent")

if is_trash_commenter(
name=name, email=email, ip_address=ip_address, user_agent=user_agent
):
if is_trash_commenter(name=name, email=email):
return http.JsonResponse({"trash": True}, status=400)

search = {"comment": comment}
Expand Down