Skip to content
This repository has been archived by the owner on Apr 26, 2024. It is now read-only.

Filter the results of user directory searching via the spam checker #6888

Merged
merged 9 commits into from
Feb 14, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions changelog.d/6888.feature
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
The result of a user directory search can now be filtered via the spam checker.
clokep marked this conversation as resolved.
Show resolved Hide resolved
3 changes: 3 additions & 0 deletions docs/spam_checker.md
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,9 @@ class ExampleSpamChecker:

def user_may_publish_room(self, userid, room_id):
return True # allow publishing of all rooms

def check_username_for_spam(self, user_profile):
return False # allow all usernames
```

## Configuration
Expand Down
27 changes: 27 additions & 0 deletions synapse/events/spamcheck.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
# limitations under the License.

import inspect
from typing import Dict

from synapse.spam_checker_api import SpamCheckerApi

Expand Down Expand Up @@ -125,3 +126,29 @@ def user_may_publish_room(self, userid, room_id):
return True

return self.spam_checker.user_may_publish_room(userid, room_id)

def check_username_for_spam(self, user_profile: Dict[str, str]) -> bool:
"""Checks if a user ID or display name are considered "spammy" by this server.

If the server considers a username spammy, then it will not be included in
user directory results.

Args:
user_profile: The user information to check, it contains the keys:
* user_id
* display_name
* avatar_url

Returns:
True if the user is spammy.
"""
if self.spam_checker is None:
return False

# For backwards compatibility, if the method does not exist on the spam checker, fallback to not interfering.
checker = getattr(self.spam_checker, "check_username_for_spam", None)
if not checker:
return False
# Make a copy of the user profile object to ensure the spam checker
# cannot modify it.
return checker(user_profile.copy())
14 changes: 12 additions & 2 deletions synapse/handlers/user_directory.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@ def __init__(self, hs):
self.is_mine_id = hs.is_mine_id
self.update_user_directory = hs.config.update_user_directory
self.search_all_users = hs.config.user_directory_search_all_users
self.spam_checker = hs.get_spam_checker()
# The current position in the current_state_delta stream
self.pos = None

Expand All @@ -65,7 +66,7 @@ def __init__(self, hs):
# we start populating the user directory
self.clock.call_later(0, self.notify_new_event)

def search_users(self, user_id, search_term, limit):
async def search_users(self, user_id, search_term, limit):
clokep marked this conversation as resolved.
Show resolved Hide resolved
"""Searches for users in directory

Returns:
Expand All @@ -82,7 +83,16 @@ def search_users(self, user_id, search_term, limit):
]
}
"""
return self.store.search_user_dir(user_id, search_term, limit)
results = await self.store.search_user_dir(user_id, search_term, limit)

# Remove any spammy users from the results.
results["results"] = [
user
for user in results["results"]
if not self.spam_checker.check_username_for_spam(user)
]

return results

def notify_new_event(self):
"""Called when there may be more deltas to process
Expand Down
92 changes: 92 additions & 0 deletions tests/handlers/test_user_directory.py
Original file line number Diff line number Diff line change
Expand Up @@ -147,6 +147,98 @@ def test_private_room(self):
s = self.get_success(self.handler.search_users(u1, "user3", 10))
self.assertEqual(len(s["results"]), 0)

def test_spam_checker(self):
"""
A user which fails to the spam checks will not appear in search results.
"""
u1 = self.register_user("user1", "pass")
u1_token = self.login(u1, "pass")
u2 = self.register_user("user2", "pass")
u2_token = self.login(u2, "pass")

# We do not add users to the directory until they join a room.
s = self.get_success(self.handler.search_users(u1, "user2", 10))
self.assertEqual(len(s["results"]), 0)

room = self.helper.create_room_as(u1, is_public=False, tok=u1_token)
self.helper.invite(room, src=u1, targ=u2, tok=u1_token)
self.helper.join(room, user=u2, tok=u2_token)

# Check we have populated the database correctly.
shares_private = self.get_users_who_share_private_rooms()
public_users = self.get_users_in_public_rooms()

self.assertEqual(
self._compress_shared(shares_private), set([(u1, u2, room), (u2, u1, room)])
)
self.assertEqual(public_users, [])

# We get one search result when searching for user2 by user1.
s = self.get_success(self.handler.search_users(u1, "user2", 10))
self.assertEqual(len(s["results"]), 1)

# Configure a spam checker that does not filter any users.
spam_checker = self.hs.get_spam_checker()

class AllowAll(object):
def check_username_for_spam(self, user_profile):
# Allow all users.
return False

spam_checker.spam_checker = AllowAll()

# The results do not change:
# We get one search result when searching for user2 by user1.
s = self.get_success(self.handler.search_users(u1, "user2", 10))
self.assertEqual(len(s["results"]), 1)

# Configure a spam checker that filters all users.
class BlockAll(object):
def check_username_for_spam(self, user_profile):
# All users are spammy.
return True

spam_checker.spam_checker = BlockAll()

# User1 now gets no search results for any of the other users.
s = self.get_success(self.handler.search_users(u1, "user2", 10))
self.assertEqual(len(s["results"]), 0)

def test_legacy_spam_checker(self):
"""
A spam checker without the expected method should be ignored.
"""
u1 = self.register_user("user1", "pass")
u1_token = self.login(u1, "pass")
u2 = self.register_user("user2", "pass")
u2_token = self.login(u2, "pass")

# We do not add users to the directory until they join a room.
s = self.get_success(self.handler.search_users(u1, "user2", 10))
self.assertEqual(len(s["results"]), 0)

room = self.helper.create_room_as(u1, is_public=False, tok=u1_token)
self.helper.invite(room, src=u1, targ=u2, tok=u1_token)
self.helper.join(room, user=u2, tok=u2_token)

# Check we have populated the database correctly.
shares_private = self.get_users_who_share_private_rooms()
public_users = self.get_users_in_public_rooms()

self.assertEqual(
self._compress_shared(shares_private), set([(u1, u2, room), (u2, u1, room)])
)
self.assertEqual(public_users, [])

# Configure a spam checker.
spam_checker = self.hs.get_spam_checker()
# The spam checker doesn't need any methods, so create a bare object.
spam_checker.spam_checker = object()

# We get one search result when searching for user2 by user1.
s = self.get_success(self.handler.search_users(u1, "user2", 10))
self.assertEqual(len(s["results"]), 1)

def _compress_shared(self, shared):
"""
Compress a list of users who share rooms dicts to a list of tuples.
Expand Down