From 6cf7b2e767893d2bfb0484c4b3598ca7ce873fa7 Mon Sep 17 00:00:00 2001 From: David Teller Date: Fri, 20 May 2022 09:04:35 +0200 Subject: [PATCH] Uniformize spam-checker API, part 2: check_event_for_spam Signed-off-by: David Teller --- changelog.d/12808.feature | 1 + docs/modules/spam_checker_callbacks.md | 27 +++++++++------ synapse/events/spamcheck.py | 46 ++++++++++++++++++++------ synapse/federation/federation_base.py | 5 +-- synapse/handlers/message.py | 11 +++--- synapse/spam_checker_api/__init__.py | 31 ++++++++++++++++- 6 files changed, 93 insertions(+), 28 deletions(-) create mode 100644 changelog.d/12808.feature diff --git a/changelog.d/12808.feature b/changelog.d/12808.feature new file mode 100644 index 000000000000..561c8b9d34a4 --- /dev/null +++ b/changelog.d/12808.feature @@ -0,0 +1 @@ +Update to `check_event_for_spam`. Deprecate the current callback signature, replace it with a new signature that is both less ambiguous (replacing booleans with explicit allow/block) and more powerful (ability to return explicit error codes). \ No newline at end of file diff --git a/docs/modules/spam_checker_callbacks.md b/docs/modules/spam_checker_callbacks.md index 472d95718087..e6ec07e17412 100644 --- a/docs/modules/spam_checker_callbacks.md +++ b/docs/modules/spam_checker_callbacks.md @@ -11,22 +11,29 @@ The available spam checker callbacks are: ### `check_event_for_spam` _First introduced in Synapse v1.37.0_ +_Signature extended to support Allow and Code in Synapse v1.60.0_ +_Boolean return value deprecated in Synapse v1.60.0_ ```python -async def check_event_for_spam(event: "synapse.events.EventBase") -> Union[bool, str] +async def check_event_for_spam(event: "synapse.events.EventBase") -> Union[Allow, Code, DEPRECATED_STR, DEPRECATED_BOOL] ``` -Called when receiving an event from a client or via federation. The callback must return -either: -- an error message string, to indicate the event must be rejected because of spam and - give a rejection reason to forward to clients; -- the boolean `True`, to indicate that the event is spammy, but not provide further details; or -- the booelan `False`, to indicate that the event is not considered spammy. +Called when receiving an event from a client or via federation. The callback must return either: + - `synapse.spam_checker_api.ALLOW`, to allow the operation. Other callbacks + may still decide to reject it. + - `synapse.api.errors.Code` to reject the operation with an error code. In case + of doubt, `Code.FORBIDDEN` is a good error code. + - (deprecated) a `str` to reject the operation and specify an error message. Note that clients + typically will not localize the error message to the user's preferred locale. + - (deprecated) on `False`, behave as `ALLOW`. Deprecated as confusing, as some + callbacks in expect `True` to allow and others `True` to reject. + - (deprecated) on `True`, behave as `Code.FORBIDDEN`. Deprecated as confusing, as + some callbacks in expect `True` to allow and others `True` to reject. If multiple modules implement this callback, they will be considered in order. If a -callback returns `False`, Synapse falls through to the next one. The value of the first -callback that does not return `False` will be used. If this happens, Synapse will not call -any of the subsequent implementations of this callback. +callback returns `ALLOW`, Synapse falls through to the next one. The value of the +first callback that does not return `ALLOW` will be used. If this happens, Synapse +will not call any of the subsequent implementations of this callback. ### `user_may_join_room` diff --git a/synapse/events/spamcheck.py b/synapse/events/spamcheck.py index f30207376ae2..8845e3f94640 100644 --- a/synapse/events/spamcheck.py +++ b/synapse/events/spamcheck.py @@ -27,9 +27,10 @@ Union, ) +from synapse.api.errors import Code from synapse.rest.media.v1._base import FileInfo from synapse.rest.media.v1.media_storage import ReadableFileWrapper -from synapse.spam_checker_api import RegistrationBehaviour +from synapse.spam_checker_api import ALLOW, Allow, Decision, RegistrationBehaviour from synapse.types import RoomAlias, UserProfile from synapse.util.async_helpers import delay_cancellation, maybe_awaitable from synapse.util.metrics import Measure @@ -40,9 +41,16 @@ logger = logging.getLogger(__name__) + +# A boolean returned value, kept for backwards compatibility but deprecated. +DEPRECATED_BOOL = bool + +# A string returned value, kept for backwards compatibility but deprecated. +DEPRECATED_STR = str + CHECK_EVENT_FOR_SPAM_CALLBACK = Callable[ ["synapse.events.EventBase"], - Awaitable[Union[bool, str]], + Awaitable[Union[Allow, Code, DEPRECATED_BOOL, DEPRECATED_STR]], ] USER_MAY_JOIN_ROOM_CALLBACK = Callable[[str, str, bool], Awaitable[bool]] USER_MAY_INVITE_CALLBACK = Callable[[str, str, str], Awaitable[bool]] @@ -244,7 +252,7 @@ def register_callbacks( async def check_event_for_spam( self, event: "synapse.events.EventBase" - ) -> Union[bool, str]: + ) -> Union[Decision, str]: """Checks if a given event is considered "spammy" by this server. If the server considers an event spammy, then it will be rejected if @@ -255,18 +263,36 @@ async def check_event_for_spam( event: the event to be checked Returns: - True or a string if the event is spammy. If a string is returned it - will be used as the error message returned to the user. + - on `ALLOW`, the event is considered good (non-spammy) and should + be let through. Other spamcheck filters may still reject it. + - on `Code`, the event is considered spammy and is rejected with a specific + error message/code. + - on `str`, the event is considered spammy and the string is used as error + message. This usage is generally discouraged as it doesn't support + internationalization. """ for callback in self._check_event_for_spam_callbacks: with Measure( self.clock, "{}.{}".format(callback.__module__, callback.__qualname__) ): - res: Union[bool, str] = await delay_cancellation(callback(event)) - if res: - return res - - return False + res: Union[ + Decision, DEPRECATED_STR, DEPRECATED_BOOL + ] = await delay_cancellation(callback(event)) + if res is False or res is ALLOW: + # This spam-checker accepts the event. + # Other spam-checkers may reject it, though. + continue + elif res is True: + # This spam-checker rejects the event with deprecated + # return value `True` + return Code.FORBIDDEN + else: + # This spam-checker rejects the event either with a `str` + # or with a `Code`. In either case, we stop here. + return res + + # No spam-checker has rejected the event, let it pass. + return ALLOW async def user_may_join_room( self, user_id: str, room_id: str, is_invited: bool diff --git a/synapse/federation/federation_base.py b/synapse/federation/federation_base.py index 41ac49fdc8bf..00a1aa9e6281 100644 --- a/synapse/federation/federation_base.py +++ b/synapse/federation/federation_base.py @@ -15,6 +15,7 @@ import logging from typing import TYPE_CHECKING +import synapse from synapse.api.constants import MAX_DEPTH, EventContentFields, EventTypes, Membership from synapse.api.errors import Codes, SynapseError from synapse.api.room_versions import EventFormatVersions, RoomVersion @@ -98,9 +99,9 @@ async def _check_sigs_and_hash( ) return redacted_event - result = await self.spam_checker.check_event_for_spam(pdu) + spam_check = await self.spam_checker.check_event_for_spam(pdu) - if result: + if spam_check is synapse.spam_checker_api.ALLOW: logger.warning("Event contains spam, soft-failing %s", pdu.event_id) # we redact (to save disk space) as well as soft-failing (to stop # using the event in prev_events). diff --git a/synapse/handlers/message.py b/synapse/handlers/message.py index 0951b9c71f75..aeefcc66b1e7 100644 --- a/synapse/handlers/message.py +++ b/synapse/handlers/message.py @@ -23,6 +23,7 @@ from twisted.internet.interfaces import IDelayedCall +import synapse from synapse import event_auth from synapse.api.constants import ( EventContentFields, @@ -881,11 +882,11 @@ async def create_and_send_nonmember_event( event.sender, ) - spam_error = await self.spam_checker.check_event_for_spam(event) - if spam_error: - if not isinstance(spam_error, str): - spam_error = "Spam is not permitted here" - raise SynapseError(403, spam_error, Codes.FORBIDDEN) + spam_check = await self.spam_checker.check_event_for_spam(event) + if spam_check is not synapse.spam_checker_api.ALLOW: + raise SynapseError( + 403, "This message had been rejected as probable spam", spam_check + ) ev = await self.handle_new_client_event( requester=requester, diff --git a/synapse/spam_checker_api/__init__.py b/synapse/spam_checker_api/__init__.py index 73018f2d002e..1e847a9ed0a8 100644 --- a/synapse/spam_checker_api/__init__.py +++ b/synapse/spam_checker_api/__init__.py @@ -12,13 +12,42 @@ # See the License for the specific language governing permissions and # limitations under the License. from enum import Enum +from typing import NewType, Union + +from synapse.api.errors import Code class RegistrationBehaviour(Enum): """ - Enum to define whether a registration request should allowed, denied, or shadow-banned. + Enum to define whether a registration request should be allowed, denied, or shadow-banned. """ ALLOW = "allow" SHADOW_BAN = "shadow_ban" DENY = "deny" + + +# Define a strongly-typed singleton value `ALLOW`. + +# Private NewType, to make sure that nobody outside this module +# defines an instance of `Allow`. +_Allow = NewType("_Allow", str) + +# Public NewType, to let the rest of the code mention type `Allow`. +Allow = NewType("Allow", _Allow) + +ALLOW = Allow(_Allow("Allow")) +""" +Return this constant to allow a message to pass. + +This is the ONLY legal value of type `Allow`. +""" + +Decision = Union[Allow, Code] +""" +Union to define whether a request should be allowed or rejected. + +To accept a request, return `ALLOW`. + +To reject a request without any specific information, use `Codes.FORBIDDEN`. +"""