From 10280fc9437038f7ef715873e491d54b0a6d2208 Mon Sep 17 00:00:00 2001 From: David Teller Date: Fri, 20 May 2022 14:53:25 +0200 Subject: [PATCH 01/14] Uniformize spam-checker API, part 1: the `Code` enum. (#12703) --- changelog.d/12703.misc | 1 + synapse/api/errors.py | 11 +++++++++-- 2 files changed, 10 insertions(+), 2 deletions(-) create mode 100644 changelog.d/12703.misc diff --git a/changelog.d/12703.misc b/changelog.d/12703.misc new file mode 100644 index 000000000000..9aaa1bbaa3d0 --- /dev/null +++ b/changelog.d/12703.misc @@ -0,0 +1 @@ +Convert namespace class `Codes` into a string enum. \ No newline at end of file diff --git a/synapse/api/errors.py b/synapse/api/errors.py index cb3b7323d568..9614be6b4e46 100644 --- a/synapse/api/errors.py +++ b/synapse/api/errors.py @@ -17,6 +17,7 @@ import logging import typing +from enum import Enum from http import HTTPStatus from typing import Any, Dict, List, Optional, Union @@ -30,7 +31,11 @@ logger = logging.getLogger(__name__) -class Codes: +class Codes(str, Enum): + """ + All known error codes, as an enum of strings. + """ + UNRECOGNIZED = "M_UNRECOGNIZED" UNAUTHORIZED = "M_UNAUTHORIZED" FORBIDDEN = "M_FORBIDDEN" @@ -265,7 +270,9 @@ class UnrecognizedRequestError(SynapseError): """An error indicating we don't understand the request you're trying to make""" def __init__( - self, msg: str = "Unrecognized request", errcode: str = Codes.UNRECOGNIZED + self, + msg: str = "Unrecognized request", + errcode: str = Codes.UNRECOGNIZED, ): super().__init__(400, msg, errcode) From 39dee30f0120290d6ef3504815655df1a6cf47a5 Mon Sep 17 00:00:00 2001 From: reivilibre Date: Fri, 20 May 2022 15:28:23 +0100 Subject: [PATCH 02/14] Send `USER_IP` commands on a different Redis channel, in order to reduce traffic to workers that do not process these commands. (#12809) --- changelog.d/12672.feature | 1 + changelog.d/12672.misc | 1 - changelog.d/12809.feature | 1 + synapse/replication/tcp/commands.py | 12 ++++++++++++ synapse/replication/tcp/redis.py | 6 +++--- 5 files changed, 17 insertions(+), 4 deletions(-) create mode 100644 changelog.d/12672.feature delete mode 100644 changelog.d/12672.misc create mode 100644 changelog.d/12809.feature diff --git a/changelog.d/12672.feature b/changelog.d/12672.feature new file mode 100644 index 000000000000..b989e0d208c4 --- /dev/null +++ b/changelog.d/12672.feature @@ -0,0 +1 @@ +Send `USER_IP` commands on a different Redis channel, in order to reduce traffic to workers that do not process these commands. \ No newline at end of file diff --git a/changelog.d/12672.misc b/changelog.d/12672.misc deleted file mode 100644 index 265e0a801f78..000000000000 --- a/changelog.d/12672.misc +++ /dev/null @@ -1 +0,0 @@ -Lay some foundation work to allow workers to only subscribe to some kinds of messages, reducing replication traffic. \ No newline at end of file diff --git a/changelog.d/12809.feature b/changelog.d/12809.feature new file mode 100644 index 000000000000..b989e0d208c4 --- /dev/null +++ b/changelog.d/12809.feature @@ -0,0 +1 @@ +Send `USER_IP` commands on a different Redis channel, in order to reduce traffic to workers that do not process these commands. \ No newline at end of file diff --git a/synapse/replication/tcp/commands.py b/synapse/replication/tcp/commands.py index fe34948168ab..32f52e54d8c7 100644 --- a/synapse/replication/tcp/commands.py +++ b/synapse/replication/tcp/commands.py @@ -58,6 +58,15 @@ def get_logcontext_id(self) -> str: # by default, we just use the command name. return self.NAME + def redis_channel_name(self, prefix: str) -> str: + """ + Returns the Redis channel name upon which to publish this command. + + Args: + prefix: The prefix for the channel. + """ + return prefix + SC = TypeVar("SC", bound="_SimpleCommand") @@ -395,6 +404,9 @@ def __repr__(self) -> str: f"{self.user_agent!r}, {self.device_id!r}, {self.last_seen})" ) + def redis_channel_name(self, prefix: str) -> str: + return f"{prefix}/USER_IP" + class RemoteServerUpCommand(_SimpleCommand): """Sent when a worker has detected that a remote server is no longer diff --git a/synapse/replication/tcp/redis.py b/synapse/replication/tcp/redis.py index 73294654eff1..fd1c0ec6afa2 100644 --- a/synapse/replication/tcp/redis.py +++ b/synapse/replication/tcp/redis.py @@ -221,10 +221,10 @@ async def _async_send_command(self, cmd: Command) -> None: # remote instances. tcp_outbound_commands_counter.labels(cmd.NAME, "redis").inc() + channel_name = cmd.redis_channel_name(self.synapse_stream_prefix) + await make_deferred_yieldable( - self.synapse_outbound_redis_connection.publish( - self.synapse_stream_prefix, encoded_string - ) + self.synapse_outbound_redis_connection.publish(channel_name, encoded_string) ) From fbf904bd54071ca22c8918e0e106dd2fb008d0fb Mon Sep 17 00:00:00 2001 From: reivilibre Date: Mon, 23 May 2022 10:28:56 +0100 Subject: [PATCH 03/14] Fix media thumbnails being unusable before the index had been added in the background. (#12823) --- changelog.d/12823.bugfix | 1 + synapse/storage/database.py | 2 ++ 2 files changed, 3 insertions(+) create mode 100644 changelog.d/12823.bugfix diff --git a/changelog.d/12823.bugfix b/changelog.d/12823.bugfix new file mode 100644 index 000000000000..1a1f5957e712 --- /dev/null +++ b/changelog.d/12823.bugfix @@ -0,0 +1 @@ +Fix a bug, introduced in Synapse 1.21.0, that led to media thumbnails being unusable before the index has been added in the background. diff --git a/synapse/storage/database.py b/synapse/storage/database.py index 5ddb58a8a2ca..a78d68a9d7fe 100644 --- a/synapse/storage/database.py +++ b/synapse/storage/database.py @@ -90,6 +90,8 @@ "device_lists_remote_extremeties": "device_lists_remote_extremeties_unique_idx", "device_lists_remote_cache": "device_lists_remote_cache_unique_idx", "event_search": "event_search_event_id_idx", + "local_media_repository_thumbnails": "local_media_repository_thumbnails_method_idx", + "remote_media_cache_thumbnails": "remote_media_repository_thumbnails_method_idx", } From 4fef76ca348209b7c9dd3c17d5f3d8ef12623c1b Mon Sep 17 00:00:00 2001 From: reivilibre Date: Mon, 23 May 2022 10:29:24 +0100 Subject: [PATCH 04/14] Remove Caddy from the Synapse workers image used in Complement. (#12818) --- changelog.d/12818.misc | 1 + docker/complement/SynapseWorkers.Dockerfile | 12 +--- .../conf-workers/caddy.complement.json | 72 ------------------- .../conf-workers/caddy.supervisord.conf | 7 -- .../start-complement-synapse-workers.sh | 18 ++++- docker/conf-workers/nginx.conf.j2 | 16 +++++ docker/configure_workers_and_start.py | 5 ++ 7 files changed, 38 insertions(+), 93 deletions(-) create mode 100644 changelog.d/12818.misc delete mode 100644 docker/complement/conf-workers/caddy.complement.json delete mode 100644 docker/complement/conf-workers/caddy.supervisord.conf diff --git a/changelog.d/12818.misc b/changelog.d/12818.misc new file mode 100644 index 000000000000..2f9dacc21dd9 --- /dev/null +++ b/changelog.d/12818.misc @@ -0,0 +1 @@ +Remove Caddy from the Synapse workers image used in Complement. \ No newline at end of file diff --git a/docker/complement/SynapseWorkers.Dockerfile b/docker/complement/SynapseWorkers.Dockerfile index 9a4438e7303b..99a09cbc2bab 100644 --- a/docker/complement/SynapseWorkers.Dockerfile +++ b/docker/complement/SynapseWorkers.Dockerfile @@ -6,12 +6,6 @@ # https://github.com/matrix-org/synapse/blob/develop/docker/README-testing.md#testing-with-postgresql-and-single-or-multi-process-synapse FROM matrixdotorg/synapse-workers -# Download a caddy server to stand in front of nginx and terminate TLS using Complement's -# custom CA. -# We include this near the top of the file in order to cache the result. -RUN curl -OL "https://github.com/caddyserver/caddy/releases/download/v2.3.0/caddy_2.3.0_linux_amd64.tar.gz" && \ - tar xzf caddy_2.3.0_linux_amd64.tar.gz && rm caddy_2.3.0_linux_amd64.tar.gz && mv caddy /root - # Install postgresql RUN apt-get update && \ DEBIAN_FRONTEND=noninteractive apt-get install --no-install-recommends -y postgresql-13 @@ -31,16 +25,12 @@ COPY conf-workers/workers-shared.yaml /conf/workers/shared.yaml WORKDIR /data -# Copy the caddy config -COPY conf-workers/caddy.complement.json /root/caddy.json - COPY conf-workers/postgres.supervisord.conf /etc/supervisor/conf.d/postgres.conf -COPY conf-workers/caddy.supervisord.conf /etc/supervisor/conf.d/caddy.conf # Copy the entrypoint COPY conf-workers/start-complement-synapse-workers.sh / -# Expose caddy's listener ports +# Expose nginx's listener ports EXPOSE 8008 8448 ENTRYPOINT ["/start-complement-synapse-workers.sh"] diff --git a/docker/complement/conf-workers/caddy.complement.json b/docker/complement/conf-workers/caddy.complement.json deleted file mode 100644 index 09e2136af2e2..000000000000 --- a/docker/complement/conf-workers/caddy.complement.json +++ /dev/null @@ -1,72 +0,0 @@ -{ - "apps": { - "http": { - "servers": { - "srv0": { - "listen": [ - ":8448" - ], - "routes": [ - { - "match": [ - { - "host": [ - "{{ server_name }}" - ] - } - ], - "handle": [ - { - "handler": "subroute", - "routes": [ - { - "handle": [ - { - "handler": "reverse_proxy", - "upstreams": [ - { - "dial": "localhost:8008" - } - ] - } - ] - } - ] - } - ], - "terminal": true - } - ] - } - } - }, - "tls": { - "automation": { - "policies": [ - { - "subjects": [ - "{{ server_name }}" - ], - "issuers": [ - { - "module": "internal" - } - ], - "on_demand": true - } - ] - } - }, - "pki": { - "certificate_authorities": { - "local": { - "name": "Complement CA", - "root": { - "certificate": "/complement/ca/ca.crt", - "private_key": "/complement/ca/ca.key" - } - } - } - } - } - } diff --git a/docker/complement/conf-workers/caddy.supervisord.conf b/docker/complement/conf-workers/caddy.supervisord.conf deleted file mode 100644 index d9ddb51dac46..000000000000 --- a/docker/complement/conf-workers/caddy.supervisord.conf +++ /dev/null @@ -1,7 +0,0 @@ -[program:caddy] -command=/usr/local/bin/prefix-log /root/caddy run --config /root/caddy.json -autorestart=unexpected -stdout_logfile=/dev/stdout -stdout_logfile_maxbytes=0 -stderr_logfile=/dev/stderr -stderr_logfile_maxbytes=0 diff --git a/docker/complement/conf-workers/start-complement-synapse-workers.sh b/docker/complement/conf-workers/start-complement-synapse-workers.sh index b9a6b55bbe8e..a10b57a53f5e 100755 --- a/docker/complement/conf-workers/start-complement-synapse-workers.sh +++ b/docker/complement/conf-workers/start-complement-synapse-workers.sh @@ -9,9 +9,6 @@ function log { echo "$d $@" } -# Replace the server name in the caddy config -sed -i "s/{{ server_name }}/${SERVER_NAME}/g" /root/caddy.json - # Set the server name of the homeserver export SYNAPSE_SERVER_NAME=${SERVER_NAME} @@ -39,6 +36,21 @@ export SYNAPSE_WORKER_TYPES="\ appservice, \ pusher" + +# Generate a TLS key, then generate a certificate by having Complement's CA sign it +# Note that both the key and certificate are in PEM format (not DER). +openssl genrsa -out /conf/server.tls.key 2048 + +openssl req -new -key /conf/server.tls.key -out /conf/server.tls.csr \ + -subj "/CN=${SERVER_NAME}" + +openssl x509 -req -in /conf/server.tls.csr \ + -CA /complement/ca/ca.crt -CAkey /complement/ca/ca.key -set_serial 1 \ + -out /conf/server.tls.crt + +export SYNAPSE_TLS_CERT=/conf/server.tls.crt +export SYNAPSE_TLS_KEY=/conf/server.tls.key + # Run the script that writes the necessary config files and starts supervisord, which in turn # starts everything else exec /configure_workers_and_start.py diff --git a/docker/conf-workers/nginx.conf.j2 b/docker/conf-workers/nginx.conf.j2 index 1081979e06a0..967fc65e798c 100644 --- a/docker/conf-workers/nginx.conf.j2 +++ b/docker/conf-workers/nginx.conf.j2 @@ -9,6 +9,22 @@ server { listen 8008; listen [::]:8008; + {% if tls_cert_path is not none and tls_key_path is not none %} + listen 8448 ssl; + listen [::]:8448 ssl; + + ssl_certificate {{ tls_cert_path }}; + ssl_certificate_key {{ tls_key_path }}; + + # Some directives from cipherlist.eu (fka cipherli.st): + ssl_protocols TLSv1 TLSv1.1 TLSv1.2 TLSv1.3; + ssl_prefer_server_ciphers on; + ssl_ciphers "EECDH+AESGCM:EDH+AESGCM:AES256+EECDH:AES256+EDH"; + ssl_ecdh_curve secp384r1; # Requires nginx >= 1.1.0 + ssl_session_cache shared:SSL:10m; + ssl_session_tickets off; # Requires nginx >= 1.5.9 + {% endif %} + server_name localhost; # Nginx by default only allows file uploads up to 1M in size diff --git a/docker/configure_workers_and_start.py b/docker/configure_workers_and_start.py index b2b7938ae801..f46b9b675e90 100755 --- a/docker/configure_workers_and_start.py +++ b/docker/configure_workers_and_start.py @@ -21,6 +21,9 @@ # * SYNAPSE_REPORT_STATS: Whether to report stats. # * SYNAPSE_WORKER_TYPES: A comma separated list of worker names as specified in WORKER_CONFIG # below. Leave empty for no workers, or set to '*' for all possible workers. +# * SYNAPSE_TLS_CERT: Path to a TLS certificate in PEM format. +# * SYNAPSE_TLS_KEY: Path to a TLS key. If this and SYNAPSE_TLS_CERT are specified, +# Nginx will be configured to serve TLS on port 8448. # # NOTE: According to Complement's ENTRYPOINT expectations for a homeserver image (as defined # in the project's README), this script may be run multiple times, and functionality should @@ -501,6 +504,8 @@ def generate_worker_files( "/etc/nginx/conf.d/matrix-synapse.conf", worker_locations=nginx_location_config, upstream_directives=nginx_upstream_config, + tls_cert_path=os.environ.get("SYNAPSE_TLS_CERT"), + tls_key_path=os.environ.get("SYNAPSE_TLS_KEY"), ) # Supervisord config From a6ab3f56196d0067a5be25917c24988a734f0d51 Mon Sep 17 00:00:00 2001 From: Andrew Morgan <1342360+anoadragon453@users.noreply.github.com> Date: Mon, 23 May 2022 11:28:14 +0100 Subject: [PATCH 05/14] Add a windows->unix file endings commit to git blame ignore file (#12824) --- .git-blame-ignore-revs | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.git-blame-ignore-revs b/.git-blame-ignore-revs index 83ddd568c207..50d28c68eeb8 100644 --- a/.git-blame-ignore-revs +++ b/.git-blame-ignore-revs @@ -6,3 +6,6 @@ aff1eb7c671b0a3813407321d2702ec46c71fa56 # Update black to 20.8b1 (#9381). 0a00b7ff14890987f09112a2ae696c61001e6cf1 + +# Convert tests/rest/admin/test_room.py to unix file endings (#7953). +c4268e3da64f1abb5b31deaeb5769adb6510c0a7 \ No newline at end of file From 438925c422fec9bffe6e90633abe8875c0c5fb5c Mon Sep 17 00:00:00 2001 From: reivilibre Date: Mon, 23 May 2022 12:20:30 +0100 Subject: [PATCH 06/14] Fix Complement `TestCanRegisterAdmin` with workers, by adding Complement's shared registration secret. (#12819) --- changelog.d/12819.misc | 1 + docker/complement/conf-workers/workers-shared.yaml | 6 ++++++ 2 files changed, 7 insertions(+) create mode 100644 changelog.d/12819.misc diff --git a/changelog.d/12819.misc b/changelog.d/12819.misc new file mode 100644 index 000000000000..7a03102a632d --- /dev/null +++ b/changelog.d/12819.misc @@ -0,0 +1 @@ +Add Complement's shared registration secret to the Complement worker image. This fixes tests that depend on it. \ No newline at end of file diff --git a/docker/complement/conf-workers/workers-shared.yaml b/docker/complement/conf-workers/workers-shared.yaml index 86ee11ecd0e5..cd7b50c65cc3 100644 --- a/docker/complement/conf-workers/workers-shared.yaml +++ b/docker/complement/conf-workers/workers-shared.yaml @@ -5,6 +5,12 @@ enable_registration: true enable_registration_without_verification: true bcrypt_rounds: 4 +## Registration ## + +# Needed by Complement to register admin users +# DO NOT USE in a production configuration! This should be a random secret. +registration_shared_secret: complement + ## Federation ## # trust certs signed by Complement's CA From 444588c5fc5e4fd0f3796d389fe5f062acc55286 Mon Sep 17 00:00:00 2001 From: Dirk Klimpel <5740567+dklimpel@users.noreply.github.com> Date: Mon, 23 May 2022 13:23:26 +0200 Subject: [PATCH 07/14] Add some type hints to tests files (#12833) Co-authored-by: Andrew Morgan <1342360+anoadragon453@users.noreply.github.com> --- changelog.d/12833.misc | 1 + mypy.ini | 8 -------- tests/http/test_servlet.py | 14 ++++++++------ tests/http/test_site.py | 2 +- tests/scripts/test_new_matrix_user.py | 13 +++++++------ tests/storage/test_base.py | 2 +- tests/storage/test_roommember.py | 2 +- 7 files changed, 19 insertions(+), 23 deletions(-) create mode 100644 changelog.d/12833.misc diff --git a/changelog.d/12833.misc b/changelog.d/12833.misc new file mode 100644 index 000000000000..fad5df1afa34 --- /dev/null +++ b/changelog.d/12833.misc @@ -0,0 +1 @@ +Add some type hints to test files. \ No newline at end of file diff --git a/mypy.ini b/mypy.ini index df2622df983a..fe3e3f9b8efd 100644 --- a/mypy.ini +++ b/mypy.ini @@ -41,16 +41,11 @@ exclude = (?x) |tests/events/test_utils.py |tests/federation/test_federation_catch_up.py |tests/federation/test_federation_sender.py - |tests/federation/test_federation_server.py |tests/federation/transport/test_knocking.py - |tests/federation/transport/test_server.py |tests/handlers/test_typing.py |tests/http/federation/test_matrix_federation_agent.py |tests/http/federation/test_srv_resolver.py - |tests/http/test_fedclient.py |tests/http/test_proxyagent.py - |tests/http/test_servlet.py - |tests/http/test_site.py |tests/logging/__init__.py |tests/logging/test_terse_json.py |tests/module_api/test_api.py @@ -59,12 +54,9 @@ exclude = (?x) |tests/push/test_push_rule_evaluator.py |tests/rest/client/test_transactions.py |tests/rest/media/v1/test_media_storage.py - |tests/scripts/test_new_matrix_user.py |tests/server.py |tests/server_notices/test_resource_limits_server_notices.py |tests/state/test_v2.py - |tests/storage/test_base.py - |tests/storage/test_roommember.py |tests/test_metrics.py |tests/test_server.py |tests/test_state.py diff --git a/tests/http/test_servlet.py b/tests/http/test_servlet.py index ad521525cfaa..b3655d7b44c2 100644 --- a/tests/http/test_servlet.py +++ b/tests/http/test_servlet.py @@ -49,19 +49,21 @@ def test_parse_json_value(self): """Basic tests for parse_json_value_from_request.""" # Test round-tripping. obj = {"foo": 1} - result = parse_json_value_from_request(make_request(obj)) - self.assertEqual(result, obj) + result1 = parse_json_value_from_request(make_request(obj)) + self.assertEqual(result1, obj) # Results don't have to be objects. - result = parse_json_value_from_request(make_request(b'["foo"]')) - self.assertEqual(result, ["foo"]) + result2 = parse_json_value_from_request(make_request(b'["foo"]')) + self.assertEqual(result2, ["foo"]) # Test empty. with self.assertRaises(SynapseError): parse_json_value_from_request(make_request(b"")) - result = parse_json_value_from_request(make_request(b""), allow_empty_body=True) - self.assertIsNone(result) + result3 = parse_json_value_from_request( + make_request(b""), allow_empty_body=True + ) + self.assertIsNone(result3) # Invalid UTF-8. with self.assertRaises(SynapseError): diff --git a/tests/http/test_site.py b/tests/http/test_site.py index 8c13b4f6931e..b2dbf76d33b1 100644 --- a/tests/http/test_site.py +++ b/tests/http/test_site.py @@ -36,7 +36,7 @@ def test_large_request(self): # as a control case, first send a regular request. # complete the connection and wire it up to a fake transport - client_address = IPv6Address("TCP", "::1", "2345") + client_address = IPv6Address("TCP", "::1", 2345) protocol = factory.buildProtocol(client_address) transport = StringTransport() protocol.makeConnection(transport) diff --git a/tests/scripts/test_new_matrix_user.py b/tests/scripts/test_new_matrix_user.py index 19a145eeb65e..22f99c6ab1ce 100644 --- a/tests/scripts/test_new_matrix_user.py +++ b/tests/scripts/test_new_matrix_user.py @@ -12,6 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. +from typing import List from unittest.mock import Mock, patch from synapse._scripts.register_new_matrix_user import request_registration @@ -49,8 +50,8 @@ def post(url, json=None, verify=None): requests.post = post # The fake stdout will be written here - out = [] - err_code = [] + out: List[str] = [] + err_code: List[int] = [] with patch("synapse._scripts.register_new_matrix_user.requests", requests): request_registration( @@ -85,8 +86,8 @@ def get(url, verify=None): requests.get = get # The fake stdout will be written here - out = [] - err_code = [] + out: List[str] = [] + err_code: List[int] = [] with patch("synapse._scripts.register_new_matrix_user.requests", requests): request_registration( @@ -137,8 +138,8 @@ def post(url, json=None, verify=None): requests.post = post # The fake stdout will be written here - out = [] - err_code = [] + out: List[str] = [] + err_code: List[int] = [] with patch("synapse._scripts.register_new_matrix_user.requests", requests): request_registration( diff --git a/tests/storage/test_base.py b/tests/storage/test_base.py index a8ffb52c0503..cce8e75c7475 100644 --- a/tests/storage/test_base.py +++ b/tests/storage/test_base.py @@ -60,7 +60,7 @@ def runWithConnection(func, *args, **kwargs): db = DatabasePool(Mock(), Mock(config=sqlite_config), fake_engine) db._db_pool = self.db_pool - self.datastore = SQLBaseStore(db, None, hs) + self.datastore = SQLBaseStore(db, None, hs) # type: ignore[arg-type] @defer.inlineCallbacks def test_insert_1col(self): diff --git a/tests/storage/test_roommember.py b/tests/storage/test_roommember.py index a2a9c05f24c8..1218786d79d8 100644 --- a/tests/storage/test_roommember.py +++ b/tests/storage/test_roommember.py @@ -34,7 +34,7 @@ class RoomMemberStoreTestCase(unittest.HomeserverTestCase): room.register_servlets, ] - def prepare(self, reactor: MemoryReactor, clock: Clock, hs: TestHomeServer) -> None: + def prepare(self, reactor: MemoryReactor, clock: Clock, hs: TestHomeServer) -> None: # type: ignore[override] # We can't test the RoomMemberStore on its own without the other event # storage logic From 67aae05ece9b6e07fedc73f737c0d6db6351d6c7 Mon Sep 17 00:00:00 2001 From: reivilibre Date: Mon, 23 May 2022 14:11:06 +0100 Subject: [PATCH 08/14] Support registering Application Services when running with workers under Complement. (#12826) Co-authored-by: Patrick Cloke --- changelog.d/12826.misc | 1 + .../start-complement-synapse-workers.sh | 5 +++++ docker/conf-workers/shared.yaml.j2 | 11 ++++++++++- docker/configure_workers_and_start.py | 15 +++++++++++++++ 4 files changed, 31 insertions(+), 1 deletion(-) create mode 100644 changelog.d/12826.misc diff --git a/changelog.d/12826.misc b/changelog.d/12826.misc new file mode 100644 index 000000000000..f5e91f1ed592 --- /dev/null +++ b/changelog.d/12826.misc @@ -0,0 +1 @@ +Support registering Application Services when running with workers under Complement. \ No newline at end of file diff --git a/docker/complement/conf-workers/start-complement-synapse-workers.sh b/docker/complement/conf-workers/start-complement-synapse-workers.sh index a10b57a53f5e..b7e24440006f 100755 --- a/docker/complement/conf-workers/start-complement-synapse-workers.sh +++ b/docker/complement/conf-workers/start-complement-synapse-workers.sh @@ -36,6 +36,11 @@ export SYNAPSE_WORKER_TYPES="\ appservice, \ pusher" +# Add Complement's appservice registration directory, if there is one +# (It can be absent when there are no application services in this test!) +if [ -d /complement/appservice ]; then + export SYNAPSE_AS_REGISTRATION_DIR=/complement/appservice +fi # Generate a TLS key, then generate a certificate by having Complement's CA sign it # Note that both the key and certificate are in PEM format (not DER). diff --git a/docker/conf-workers/shared.yaml.j2 b/docker/conf-workers/shared.yaml.j2 index f94b8c6aca0f..644ed788f3d5 100644 --- a/docker/conf-workers/shared.yaml.j2 +++ b/docker/conf-workers/shared.yaml.j2 @@ -6,4 +6,13 @@ redis: enabled: true -{{ shared_worker_config }} \ No newline at end of file +{% if appservice_registrations is not none %} +## Application Services ## +# A list of application service config files to use. +app_service_config_files: +{%- for path in appservice_registrations %} + - "{{ path }}" +{%- endfor %} +{%- endif %} + +{{ shared_worker_config }} diff --git a/docker/configure_workers_and_start.py b/docker/configure_workers_and_start.py index f46b9b675e90..b6ad14117325 100755 --- a/docker/configure_workers_and_start.py +++ b/docker/configure_workers_and_start.py @@ -21,6 +21,8 @@ # * SYNAPSE_REPORT_STATS: Whether to report stats. # * SYNAPSE_WORKER_TYPES: A comma separated list of worker names as specified in WORKER_CONFIG # below. Leave empty for no workers, or set to '*' for all possible workers. +# * SYNAPSE_AS_REGISTRATION_DIR: If specified, a directory in which .yaml and .yml files +# will be treated as Application Service registration files. # * SYNAPSE_TLS_CERT: Path to a TLS certificate in PEM format. # * SYNAPSE_TLS_KEY: Path to a TLS key. If this and SYNAPSE_TLS_CERT are specified, # Nginx will be configured to serve TLS on port 8448. @@ -32,6 +34,7 @@ import os import subprocess import sys +from pathlib import Path from typing import Any, Dict, List, Mapping, MutableMapping, NoReturn, Set import jinja2 @@ -491,11 +494,23 @@ def generate_worker_files( master_log_config = generate_worker_log_config(environ, "master", data_dir) shared_config["log_config"] = master_log_config + # Find application service registrations + appservice_registrations = None + appservice_registration_dir = os.environ.get("SYNAPSE_AS_REGISTRATION_DIR") + if appservice_registration_dir: + # Scan for all YAML files that should be application service registrations. + appservice_registrations = [ + str(reg_path.resolve()) + for reg_path in Path(appservice_registration_dir).iterdir() + if reg_path.suffix.lower() in (".yaml", ".yml") + ] + # Shared homeserver config convert( "/conf/shared.yaml.j2", "/conf/workers/shared.yaml", shared_worker_config=yaml.dump(shared_config), + appservice_registrations=appservice_registrations, ) # Nginx config From 7a68203cde312c57137735a19c274a6d8470a2bf Mon Sep 17 00:00:00 2001 From: reivilibre Date: Mon, 23 May 2022 17:27:05 +0100 Subject: [PATCH 09/14] Disable 'faster room join' Complement tests when testing against Synapse with workers. (#12842) --- changelog.d/12842.misc | 1 + scripts-dev/complement.sh | 9 ++++++++- 2 files changed, 9 insertions(+), 1 deletion(-) create mode 100644 changelog.d/12842.misc diff --git a/changelog.d/12842.misc b/changelog.d/12842.misc new file mode 100644 index 000000000000..cec3f97d86fd --- /dev/null +++ b/changelog.d/12842.misc @@ -0,0 +1 @@ +Disable 'faster room join' Complement tests when testing against Synapse with workers. \ No newline at end of file diff --git a/scripts-dev/complement.sh b/scripts-dev/complement.sh index 190df6909a6a..ca476d9a5e61 100755 --- a/scripts-dev/complement.sh +++ b/scripts-dev/complement.sh @@ -45,6 +45,8 @@ docker build -t matrixdotorg/synapse -f "docker/Dockerfile" . extra_test_args=() +test_tags="synapse_blacklist,msc2716,msc3030" + # If we're using workers, modify the docker files slightly. if [[ -n "$WORKERS" ]]; then # Build the workers docker image (from the base Synapse image). @@ -65,6 +67,10 @@ if [[ -n "$WORKERS" ]]; then else export COMPLEMENT_BASE_IMAGE=complement-synapse COMPLEMENT_DOCKERFILE=Dockerfile + + # We only test faster room joins on monoliths, because they are purposefully + # being developed without worker support to start with. + test_tags="$test_tags,faster_joins" fi # Build the Complement image from the Synapse image we just built. @@ -73,4 +79,5 @@ docker build -t $COMPLEMENT_BASE_IMAGE -f "docker/complement/$COMPLEMENT_DOCKERF # Run the tests! echo "Images built; running complement" cd "$COMPLEMENT_DIR" -go test -v -tags synapse_blacklist,msc2716,msc3030,faster_joins -count=1 "${extra_test_args[@]}" "$@" ./tests/... + +go test -v -tags $test_tags -count=1 "${extra_test_args[@]}" "$@" ./tests/... From a608ac847b36dd72634f21502be42e785add8b65 Mon Sep 17 00:00:00 2001 From: Jess Porter Date: Mon, 23 May 2022 17:36:21 +0100 Subject: [PATCH 10/14] add SpamChecker callback for silently dropping inbound federated events (#12744) Signed-off-by: jesopo --- changelog.d/12744.feature | 1 + docs/modules/spam_checker_callbacks.md | 18 ++++++++++ synapse/events/spamcheck.py | 40 +++++++++++++++++++++ synapse/federation/federation_server.py | 48 ++++++++++++++++++++++--- synapse/module_api/__init__.py | 5 +++ 5 files changed, 108 insertions(+), 4 deletions(-) create mode 100644 changelog.d/12744.feature diff --git a/changelog.d/12744.feature b/changelog.d/12744.feature new file mode 100644 index 000000000000..9836d94f8ca6 --- /dev/null +++ b/changelog.d/12744.feature @@ -0,0 +1 @@ +Add a `drop_federated_event` callback to `SpamChecker` to disregard inbound federated events before they take up much processing power, in an emergency. diff --git a/docs/modules/spam_checker_callbacks.md b/docs/modules/spam_checker_callbacks.md index 472d95718087..27c5a0ed5cfe 100644 --- a/docs/modules/spam_checker_callbacks.md +++ b/docs/modules/spam_checker_callbacks.md @@ -249,6 +249,24 @@ callback returns `False`, Synapse falls through to the next one. The value of th callback that does not return `False` will be used. If this happens, Synapse will not call any of the subsequent implementations of this callback. +### `should_drop_federated_event` + +_First introduced in Synapse v1.60.0_ + +```python +async def should_drop_federated_event(event: "synapse.events.EventBase") -> bool +``` + +Called when checking whether a remote server can federate an event with us. **Returning +`True` from this function will silently drop a federated event and split-brain our view +of a room's DAG, and thus you shouldn't use this callback unless you know what you are +doing.** + +If multiple modules implement this callback, they will be considered in order. If a +callback returns `False`, Synapse falls through to the next one. The value of the first +callback that does not return `False` will be used. If this happens, Synapse will not call +any of the subsequent implementations of this callback. + ## Example The example below is a module that implements the spam checker callback diff --git a/synapse/events/spamcheck.py b/synapse/events/spamcheck.py index f30207376ae2..61bcbe2abe60 100644 --- a/synapse/events/spamcheck.py +++ b/synapse/events/spamcheck.py @@ -44,6 +44,10 @@ ["synapse.events.EventBase"], Awaitable[Union[bool, str]], ] +SHOULD_DROP_FEDERATED_EVENT_CALLBACK = Callable[ + ["synapse.events.EventBase"], + Awaitable[Union[bool, str]], +] USER_MAY_JOIN_ROOM_CALLBACK = Callable[[str, str, bool], Awaitable[bool]] USER_MAY_INVITE_CALLBACK = Callable[[str, str, str], Awaitable[bool]] USER_MAY_SEND_3PID_INVITE_CALLBACK = Callable[[str, str, str, str], Awaitable[bool]] @@ -168,6 +172,9 @@ def __init__(self, hs: "synapse.server.HomeServer") -> None: self.clock = hs.get_clock() self._check_event_for_spam_callbacks: List[CHECK_EVENT_FOR_SPAM_CALLBACK] = [] + self._should_drop_federated_event_callbacks: List[ + SHOULD_DROP_FEDERATED_EVENT_CALLBACK + ] = [] self._user_may_join_room_callbacks: List[USER_MAY_JOIN_ROOM_CALLBACK] = [] self._user_may_invite_callbacks: List[USER_MAY_INVITE_CALLBACK] = [] self._user_may_send_3pid_invite_callbacks: List[ @@ -191,6 +198,9 @@ def __init__(self, hs: "synapse.server.HomeServer") -> None: def register_callbacks( self, check_event_for_spam: Optional[CHECK_EVENT_FOR_SPAM_CALLBACK] = None, + should_drop_federated_event: Optional[ + SHOULD_DROP_FEDERATED_EVENT_CALLBACK + ] = None, user_may_join_room: Optional[USER_MAY_JOIN_ROOM_CALLBACK] = None, user_may_invite: Optional[USER_MAY_INVITE_CALLBACK] = None, user_may_send_3pid_invite: Optional[USER_MAY_SEND_3PID_INVITE_CALLBACK] = None, @@ -209,6 +219,11 @@ def register_callbacks( if check_event_for_spam is not None: self._check_event_for_spam_callbacks.append(check_event_for_spam) + if should_drop_federated_event is not None: + self._should_drop_federated_event_callbacks.append( + should_drop_federated_event + ) + if user_may_join_room is not None: self._user_may_join_room_callbacks.append(user_may_join_room) @@ -268,6 +283,31 @@ async def check_event_for_spam( return False + async def should_drop_federated_event( + self, event: "synapse.events.EventBase" + ) -> Union[bool, str]: + """Checks if a given federated event is considered "spammy" by this + server. + + If the server considers an event spammy, it will be silently dropped, + and in doing so will split-brain our view of the room's DAG. + + Args: + event: the event to be checked + + Returns: + True if the event should be silently dropped + """ + for callback in self._should_drop_federated_event_callbacks: + with Measure( + self.clock, "{}.{}".format(callback.__module__, callback.__qualname__) + ): + res: Union[bool, str] = await delay_cancellation(callback(event)) + if res: + return res + + return False + async def user_may_join_room( self, user_id: str, room_id: str, is_invited: bool ) -> bool: diff --git a/synapse/federation/federation_server.py b/synapse/federation/federation_server.py index 884b5d60b4f9..b8232e5257d2 100644 --- a/synapse/federation/federation_server.py +++ b/synapse/federation/federation_server.py @@ -110,6 +110,7 @@ def __init__(self, hs: "HomeServer"): self.handler = hs.get_federation_handler() self.storage = hs.get_storage() + self._spam_checker = hs.get_spam_checker() self._federation_event_handler = hs.get_federation_event_handler() self.state = hs.get_state_handler() self._event_auth_handler = hs.get_event_auth_handler() @@ -1019,6 +1020,12 @@ async def _handle_received_pdu(self, origin: str, pdu: EventBase) -> None: except SynapseError as e: raise FederationError("ERROR", e.code, e.msg, affected=pdu.event_id) + if await self._spam_checker.should_drop_federated_event(pdu): + logger.warning( + "Unstaged federated event contains spam, dropping %s", pdu.event_id + ) + return + # Add the event to our staging area await self.store.insert_received_event_to_staging(origin, pdu) @@ -1032,6 +1039,41 @@ async def _handle_received_pdu(self, origin: str, pdu: EventBase) -> None: pdu.room_id, room_version, lock, origin, pdu ) + async def _get_next_nonspam_staged_event_for_room( + self, room_id: str, room_version: RoomVersion + ) -> Optional[Tuple[str, EventBase]]: + """Fetch the first non-spam event from staging queue. + + Args: + room_id: the room to fetch the first non-spam event in. + room_version: the version of the room. + + Returns: + The first non-spam event in that room. + """ + + while True: + # We need to do this check outside the lock to avoid a race between + # a new event being inserted by another instance and it attempting + # to acquire the lock. + next = await self.store.get_next_staged_event_for_room( + room_id, room_version + ) + + if next is None: + return None + + origin, event = next + + if await self._spam_checker.should_drop_federated_event(event): + logger.warning( + "Staged federated event contains spam, dropping %s", + event.event_id, + ) + continue + + return next + @wrap_as_background_process("_process_incoming_pdus_in_room_inner") async def _process_incoming_pdus_in_room_inner( self, @@ -1109,12 +1151,10 @@ async def _process_incoming_pdus_in_room_inner( (self._clock.time_msec() - received_ts) / 1000 ) - # We need to do this check outside the lock to avoid a race between - # a new event being inserted by another instance and it attempting - # to acquire the lock. - next = await self.store.get_next_staged_event_for_room( + next = await self._get_next_nonspam_staged_event_for_room( room_id, room_version ) + if not next: break diff --git a/synapse/module_api/__init__.py b/synapse/module_api/__init__.py index 73f92d2df8d6..c4f661bb9382 100644 --- a/synapse/module_api/__init__.py +++ b/synapse/module_api/__init__.py @@ -47,6 +47,7 @@ CHECK_MEDIA_FILE_FOR_SPAM_CALLBACK, CHECK_REGISTRATION_FOR_SPAM_CALLBACK, CHECK_USERNAME_FOR_SPAM_CALLBACK, + SHOULD_DROP_FEDERATED_EVENT_CALLBACK, USER_MAY_CREATE_ROOM_ALIAS_CALLBACK, USER_MAY_CREATE_ROOM_CALLBACK, USER_MAY_INVITE_CALLBACK, @@ -234,6 +235,9 @@ def register_spam_checker_callbacks( self, *, check_event_for_spam: Optional[CHECK_EVENT_FOR_SPAM_CALLBACK] = None, + should_drop_federated_event: Optional[ + SHOULD_DROP_FEDERATED_EVENT_CALLBACK + ] = None, user_may_join_room: Optional[USER_MAY_JOIN_ROOM_CALLBACK] = None, user_may_invite: Optional[USER_MAY_INVITE_CALLBACK] = None, user_may_send_3pid_invite: Optional[USER_MAY_SEND_3PID_INVITE_CALLBACK] = None, @@ -254,6 +258,7 @@ def register_spam_checker_callbacks( """ return self._spam_checker.register_callbacks( check_event_for_spam=check_event_for_spam, + should_drop_federated_event=should_drop_federated_event, user_may_join_room=user_may_join_room, user_may_invite=user_may_invite, user_may_send_3pid_invite=user_may_send_3pid_invite, From 4cc4229cd7a55d2556c798fecbb1c9660dc821c8 Mon Sep 17 00:00:00 2001 From: Brendan Abolivier Date: Mon, 23 May 2022 19:18:23 +0200 Subject: [PATCH 11/14] Prevent expired events from being filtered out when retention is disabled (#12611) Co-authored-by: Richard van der Hoff <1389908+richvdh@users.noreply.github.com> Co-authored-by: Patrick Cloke --- changelog.d/12611.bugfix | 1 + synapse/handlers/pagination.py | 2 +- synapse/storage/databases/main/room.py | 45 ++++++++++++++------------ synapse/types.py | 6 ++++ synapse/visibility.py | 6 ++-- tests/rest/client/test_relations.py | 8 ++--- tests/rest/client/test_retention.py | 35 +++++++++++++++++--- 7 files changed, 71 insertions(+), 32 deletions(-) create mode 100644 changelog.d/12611.bugfix diff --git a/changelog.d/12611.bugfix b/changelog.d/12611.bugfix new file mode 100644 index 000000000000..093c45a20b7f --- /dev/null +++ b/changelog.d/12611.bugfix @@ -0,0 +1 @@ +Fix a bug introduced in Synapse 1.7.0 that would prevent events from being sent to clients if there's a retention policy in the room when the support for retention policies is disabled. diff --git a/synapse/handlers/pagination.py b/synapse/handlers/pagination.py index 6ae88add9526..19a440705027 100644 --- a/synapse/handlers/pagination.py +++ b/synapse/handlers/pagination.py @@ -239,7 +239,7 @@ async def purge_history_for_rooms_in_range( # defined in the server's configuration, we can safely assume that's the # case and use it for this room. max_lifetime = ( - retention_policy["max_lifetime"] or self._retention_default_max_lifetime + retention_policy.max_lifetime or self._retention_default_max_lifetime ) # Cap the effective max_lifetime to be within the range allowed in the diff --git a/synapse/storage/databases/main/room.py b/synapse/storage/databases/main/room.py index 87e9482c6054..ded15b92ef84 100644 --- a/synapse/storage/databases/main/room.py +++ b/synapse/storage/databases/main/room.py @@ -45,7 +45,7 @@ from synapse.storage.databases.main.cache import CacheInvalidationWorkerStore from synapse.storage.types import Cursor from synapse.storage.util.id_generators import IdGenerator -from synapse.types import JsonDict, ThirdPartyInstanceID +from synapse.types import JsonDict, RetentionPolicy, ThirdPartyInstanceID from synapse.util import json_encoder from synapse.util.caches.descriptors import cached from synapse.util.stringutils import MXC_REGEX @@ -699,7 +699,7 @@ def delete_ratelimit_txn(txn: LoggingTransaction) -> None: await self.db_pool.runInteraction("delete_ratelimit", delete_ratelimit_txn) @cached() - async def get_retention_policy_for_room(self, room_id: str) -> Dict[str, int]: + async def get_retention_policy_for_room(self, room_id: str) -> RetentionPolicy: """Get the retention policy for a given room. If no retention policy has been found for this room, returns a policy defined @@ -707,12 +707,20 @@ async def get_retention_policy_for_room(self, room_id: str) -> Dict[str, int]: the 'max_lifetime' if no default policy has been defined in the server's configuration). + If support for retention policies is disabled, a policy with a 'min_lifetime' and + 'max_lifetime' of None is returned. + Args: room_id: The ID of the room to get the retention policy of. Returns: A dict containing "min_lifetime" and "max_lifetime" for this room. """ + # If the room retention feature is disabled, return a policy with no minimum nor + # maximum. This prevents incorrectly filtering out events when sending to + # the client. + if not self.config.retention.retention_enabled: + return RetentionPolicy() def get_retention_policy_for_room_txn( txn: LoggingTransaction, @@ -736,10 +744,10 @@ def get_retention_policy_for_room_txn( # If we don't know this room ID, ret will be None, in this case return the default # policy. if not ret: - return { - "min_lifetime": self.config.retention.retention_default_min_lifetime, - "max_lifetime": self.config.retention.retention_default_max_lifetime, - } + return RetentionPolicy( + min_lifetime=self.config.retention.retention_default_min_lifetime, + max_lifetime=self.config.retention.retention_default_max_lifetime, + ) min_lifetime = ret[0]["min_lifetime"] max_lifetime = ret[0]["max_lifetime"] @@ -754,10 +762,10 @@ def get_retention_policy_for_room_txn( if max_lifetime is None: max_lifetime = self.config.retention.retention_default_max_lifetime - return { - "min_lifetime": min_lifetime, - "max_lifetime": max_lifetime, - } + return RetentionPolicy( + min_lifetime=min_lifetime, + max_lifetime=max_lifetime, + ) async def get_media_mxcs_in_room(self, room_id: str) -> Tuple[List[str], List[str]]: """Retrieves all the local and remote media MXC URIs in a given room @@ -994,7 +1002,7 @@ def _quarantine_media_txn( async def get_rooms_for_retention_period_in_range( self, min_ms: Optional[int], max_ms: Optional[int], include_null: bool = False - ) -> Dict[str, Dict[str, Optional[int]]]: + ) -> Dict[str, RetentionPolicy]: """Retrieves all of the rooms within the given retention range. Optionally includes the rooms which don't have a retention policy. @@ -1016,7 +1024,7 @@ async def get_rooms_for_retention_period_in_range( def get_rooms_for_retention_period_in_range_txn( txn: LoggingTransaction, - ) -> Dict[str, Dict[str, Optional[int]]]: + ) -> Dict[str, RetentionPolicy]: range_conditions = [] args = [] @@ -1047,10 +1055,10 @@ def get_rooms_for_retention_period_in_range_txn( rooms_dict = {} for row in rows: - rooms_dict[row["room_id"]] = { - "min_lifetime": row["min_lifetime"], - "max_lifetime": row["max_lifetime"], - } + rooms_dict[row["room_id"]] = RetentionPolicy( + min_lifetime=row["min_lifetime"], + max_lifetime=row["max_lifetime"], + ) if include_null: # If required, do a second query that retrieves all of the rooms we know @@ -1065,10 +1073,7 @@ def get_rooms_for_retention_period_in_range_txn( # policy in its state), add it with a null policy. for row in rows: if row["room_id"] not in rooms_dict: - rooms_dict[row["room_id"]] = { - "min_lifetime": None, - "max_lifetime": None, - } + rooms_dict[row["room_id"]] = RetentionPolicy() return rooms_dict diff --git a/synapse/types.py b/synapse/types.py index bd8071d51d78..6f7128ddd604 100644 --- a/synapse/types.py +++ b/synapse/types.py @@ -932,3 +932,9 @@ class UserProfile(TypedDict): user_id: str display_name: Optional[str] avatar_url: Optional[str] + + +@attr.s(auto_attribs=True, frozen=True, slots=True) +class RetentionPolicy: + min_lifetime: Optional[int] = None + max_lifetime: Optional[int] = None diff --git a/synapse/visibility.py b/synapse/visibility.py index de6d2ffc526a..da4af02796c3 100644 --- a/synapse/visibility.py +++ b/synapse/visibility.py @@ -22,7 +22,7 @@ from synapse.events.utils import prune_event from synapse.storage import Storage from synapse.storage.state import StateFilter -from synapse.types import StateMap, get_domain_from_id +from synapse.types import RetentionPolicy, StateMap, get_domain_from_id logger = logging.getLogger(__name__) @@ -94,7 +94,7 @@ async def filter_events_for_client( if filter_send_to_client: room_ids = {e.room_id for e in events} - retention_policies = {} + retention_policies: Dict[str, RetentionPolicy] = {} for room_id in room_ids: retention_policies[ @@ -137,7 +137,7 @@ def allowed(event: EventBase) -> Optional[EventBase]: # events. if not event.is_state(): retention_policy = retention_policies[event.room_id] - max_lifetime = retention_policy.get("max_lifetime") + max_lifetime = retention_policy.max_lifetime if max_lifetime is not None: oldest_allowed_ts = storage.main.clock.time_msec() - max_lifetime diff --git a/tests/rest/client/test_relations.py b/tests/rest/client/test_relations.py index 27dee8f6975d..bc9cc51b92d5 100644 --- a/tests/rest/client/test_relations.py +++ b/tests/rest/client/test_relations.py @@ -995,7 +995,7 @@ def assert_annotations(bundled_aggregations: JsonDict) -> None: bundled_aggregations, ) - self._test_bundled_aggregations(RelationTypes.ANNOTATION, assert_annotations, 7) + self._test_bundled_aggregations(RelationTypes.ANNOTATION, assert_annotations, 6) def test_annotation_to_annotation(self) -> None: """Any relation to an annotation should be ignored.""" @@ -1031,7 +1031,7 @@ def assert_annotations(bundled_aggregations: JsonDict) -> None: bundled_aggregations, ) - self._test_bundled_aggregations(RelationTypes.REFERENCE, assert_annotations, 7) + self._test_bundled_aggregations(RelationTypes.REFERENCE, assert_annotations, 6) def test_thread(self) -> None: """ @@ -1060,7 +1060,7 @@ def assert_thread(bundled_aggregations: JsonDict) -> None: bundled_aggregations.get("latest_event"), ) - self._test_bundled_aggregations(RelationTypes.THREAD, assert_thread, 10) + self._test_bundled_aggregations(RelationTypes.THREAD, assert_thread, 9) def test_thread_with_bundled_aggregations_for_latest(self) -> None: """ @@ -1106,7 +1106,7 @@ def assert_thread(bundled_aggregations: JsonDict) -> None: bundled_aggregations["latest_event"].get("unsigned"), ) - self._test_bundled_aggregations(RelationTypes.THREAD, assert_thread, 10) + self._test_bundled_aggregations(RelationTypes.THREAD, assert_thread, 9) def test_nested_thread(self) -> None: """ diff --git a/tests/rest/client/test_retention.py b/tests/rest/client/test_retention.py index 7b8fe6d02522..2cd7a9e6c5f8 100644 --- a/tests/rest/client/test_retention.py +++ b/tests/rest/client/test_retention.py @@ -11,6 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +from typing import Any, Dict from unittest.mock import Mock from twisted.test.proto_helpers import MemoryReactor @@ -252,16 +253,24 @@ class RetentionNoDefaultPolicyTestCase(unittest.HomeserverTestCase): room.register_servlets, ] - def make_homeserver(self, reactor: MemoryReactor, clock: Clock) -> HomeServer: - config = self.default_config() - config["retention"] = { + def default_config(self) -> Dict[str, Any]: + config = super().default_config() + + retention_config = { "enabled": True, } + # Update this config with what's in the default config so that + # override_config works as expected. + retention_config.update(config.get("retention", {})) + config["retention"] = retention_config + + return config + + def make_homeserver(self, reactor: MemoryReactor, clock: Clock) -> HomeServer: mock_federation_client = Mock(spec=["backfill"]) self.hs = self.setup_test_homeserver( - config=config, federation_client=mock_federation_client, ) return self.hs @@ -295,6 +304,24 @@ def test_state_policy(self) -> None: self._test_retention(room_id, expected_code_for_first_event=404) + @unittest.override_config({"retention": {"enabled": False}}) + def test_visibility_when_disabled(self) -> None: + """Retention policies should be ignored when the retention feature is disabled.""" + room_id = self.helper.create_room_as(self.user_id, tok=self.token) + + self.helper.send_state( + room_id=room_id, + event_type=EventTypes.Retention, + body={"max_lifetime": one_day_ms}, + tok=self.token, + ) + + resp = self.helper.send(room_id=room_id, body="test", tok=self.token) + + self.reactor.advance(one_day_ms * 2 / 1000) + + self.get_event(room_id, resp["event_id"]) + def _test_retention( self, room_id: str, expected_code_for_first_event: int = 200 ) -> None: From 28199e93579b5a73841a95ed4d355322227432b5 Mon Sep 17 00:00:00 2001 From: David Teller Date: Mon, 23 May 2022 19:27:39 +0200 Subject: [PATCH 12/14] Uniformize spam-checker API, part 2: check_event_for_spam (#12808) Signed-off-by: David Teller --- changelog.d/12808.feature | 1 + docs/modules/spam_checker_callbacks.md | 27 ++++++++------ docs/upgrade.md | 29 +++++++++++++++ synapse/api/errors.py | 4 +-- synapse/events/spamcheck.py | 49 ++++++++++++++++++++------ synapse/federation/federation_base.py | 5 +-- synapse/handlers/message.py | 11 +++--- synapse/module_api/__init__.py | 5 +++ synapse/module_api/errors.py | 2 ++ synapse/spam_checker_api/__init__.py | 27 +++++++++++++- 10 files changed, 129 insertions(+), 31 deletions(-) create mode 100644 changelog.d/12808.feature diff --git a/changelog.d/12808.feature b/changelog.d/12808.feature new file mode 100644 index 000000000000..561c8b9d34a4 --- /dev/null +++ b/changelog.d/12808.feature @@ -0,0 +1 @@ +Update to `check_event_for_spam`. Deprecate the current callback signature, replace it with a new signature that is both less ambiguous (replacing booleans with explicit allow/block) and more powerful (ability to return explicit error codes). \ No newline at end of file diff --git a/docs/modules/spam_checker_callbacks.md b/docs/modules/spam_checker_callbacks.md index 27c5a0ed5cfe..71f6f9f0ab45 100644 --- a/docs/modules/spam_checker_callbacks.md +++ b/docs/modules/spam_checker_callbacks.md @@ -11,22 +11,29 @@ The available spam checker callbacks are: ### `check_event_for_spam` _First introduced in Synapse v1.37.0_ +_Signature extended to support Allow and Code in Synapse v1.60.0_ +_Boolean and string return value types deprecated in Synapse v1.60.0_ ```python -async def check_event_for_spam(event: "synapse.events.EventBase") -> Union[bool, str] +async def check_event_for_spam(event: "synapse.module_api.EventBase") -> Union["synapse.module_api.ALLOW", "synapse.module_api.error.Codes", str, bool] ``` -Called when receiving an event from a client or via federation. The callback must return -either: -- an error message string, to indicate the event must be rejected because of spam and - give a rejection reason to forward to clients; -- the boolean `True`, to indicate that the event is spammy, but not provide further details; or -- the booelan `False`, to indicate that the event is not considered spammy. +Called when receiving an event from a client or via federation. The callback must return either: + - `synapse.module_api.ALLOW`, to allow the operation. Other callbacks + may still decide to reject it. + - `synapse.api.Codes` to reject the operation with an error code. In case + of doubt, `synapse.api.error.Codes.FORBIDDEN` is a good error code. + - (deprecated) a `str` to reject the operation and specify an error message. Note that clients + typically will not localize the error message to the user's preferred locale. + - (deprecated) on `False`, behave as `ALLOW`. Deprecated as confusing, as some + callbacks in expect `True` to allow and others `True` to reject. + - (deprecated) on `True`, behave as `synapse.api.error.Codes.FORBIDDEN`. Deprecated as confusing, as + some callbacks in expect `True` to allow and others `True` to reject. If multiple modules implement this callback, they will be considered in order. If a -callback returns `False`, Synapse falls through to the next one. The value of the first -callback that does not return `False` will be used. If this happens, Synapse will not call -any of the subsequent implementations of this callback. +callback returns `synapse.module_api.ALLOW`, Synapse falls through to the next one. The value of the +first callback that does not return `synapse.module_api.ALLOW` will be used. If this happens, Synapse +will not call any of the subsequent implementations of this callback. ### `user_may_join_room` diff --git a/docs/upgrade.md b/docs/upgrade.md index 92ca31b2f8de..e7eadadb64bf 100644 --- a/docs/upgrade.md +++ b/docs/upgrade.md @@ -177,7 +177,36 @@ has queries that can be used to check a database for this problem in advance. +## SpamChecker API's `check_event_for_spam` has a new signature. +The previous signature has been deprecated. + +Whereas `check_event_for_spam` callbacks used to return `Union[str, bool]`, they should now return `Union["synapse.module_api.Allow", "synapse.module_api.errors.Codes"]`. + +This is part of an ongoing refactoring of the SpamChecker API to make it less ambiguous and more powerful. + +If your module implements `check_event_for_spam` as follows: + +```python +async def check_event_for_spam(event): + if ...: + # Event is spam + return True + # Event is not spam + return False +``` + +you should rewrite it as follows: + +```python +async def check_event_for_spam(event): + if ...: + # Event is spam, mark it as forbidden (you may use some more precise error + # code if it is useful). + return synapse.module_api.errors.Codes.FORBIDDEN + # Event is not spam, mark it as `ALLOW`. + return synapse.module_api.ALLOW +``` # Upgrading to v1.59.0 diff --git a/synapse/api/errors.py b/synapse/api/errors.py index 9614be6b4e46..6650e826d5af 100644 --- a/synapse/api/errors.py +++ b/synapse/api/errors.py @@ -270,9 +270,7 @@ class UnrecognizedRequestError(SynapseError): """An error indicating we don't understand the request you're trying to make""" def __init__( - self, - msg: str = "Unrecognized request", - errcode: str = Codes.UNRECOGNIZED, + self, msg: str = "Unrecognized request", errcode: str = Codes.UNRECOGNIZED ): super().__init__(400, msg, errcode) diff --git a/synapse/events/spamcheck.py b/synapse/events/spamcheck.py index 61bcbe2abe60..7984874e21df 100644 --- a/synapse/events/spamcheck.py +++ b/synapse/events/spamcheck.py @@ -27,9 +27,10 @@ Union, ) +from synapse.api.errors import Codes from synapse.rest.media.v1._base import FileInfo from synapse.rest.media.v1.media_storage import ReadableFileWrapper -from synapse.spam_checker_api import RegistrationBehaviour +from synapse.spam_checker_api import Allow, Decision, RegistrationBehaviour from synapse.types import RoomAlias, UserProfile from synapse.util.async_helpers import delay_cancellation, maybe_awaitable from synapse.util.metrics import Measure @@ -40,9 +41,19 @@ logger = logging.getLogger(__name__) + CHECK_EVENT_FOR_SPAM_CALLBACK = Callable[ ["synapse.events.EventBase"], - Awaitable[Union[bool, str]], + Awaitable[ + Union[ + Allow, + Codes, + # Deprecated + bool, + # Deprecated + str, + ] + ], ] SHOULD_DROP_FEDERATED_EVENT_CALLBACK = Callable[ ["synapse.events.EventBase"], @@ -259,7 +270,7 @@ def register_callbacks( async def check_event_for_spam( self, event: "synapse.events.EventBase" - ) -> Union[bool, str]: + ) -> Union[Decision, str]: """Checks if a given event is considered "spammy" by this server. If the server considers an event spammy, then it will be rejected if @@ -270,18 +281,36 @@ async def check_event_for_spam( event: the event to be checked Returns: - True or a string if the event is spammy. If a string is returned it - will be used as the error message returned to the user. + - on `ALLOW`, the event is considered good (non-spammy) and should + be let through. Other spamcheck filters may still reject it. + - on `Code`, the event is considered spammy and is rejected with a specific + error message/code. + - on `str`, the event is considered spammy and the string is used as error + message. This usage is generally discouraged as it doesn't support + internationalization. """ for callback in self._check_event_for_spam_callbacks: with Measure( self.clock, "{}.{}".format(callback.__module__, callback.__qualname__) ): - res: Union[bool, str] = await delay_cancellation(callback(event)) - if res: - return res - - return False + res: Union[Decision, str, bool] = await delay_cancellation( + callback(event) + ) + if res is False or res is Allow.ALLOW: + # This spam-checker accepts the event. + # Other spam-checkers may reject it, though. + continue + elif res is True: + # This spam-checker rejects the event with deprecated + # return value `True` + return Codes.FORBIDDEN + else: + # This spam-checker rejects the event either with a `str` + # or with a `Codes`. In either case, we stop here. + return res + + # No spam-checker has rejected the event, let it pass. + return Allow.ALLOW async def should_drop_federated_event( self, event: "synapse.events.EventBase" diff --git a/synapse/federation/federation_base.py b/synapse/federation/federation_base.py index 41ac49fdc8bf..1e866b19d87b 100644 --- a/synapse/federation/federation_base.py +++ b/synapse/federation/federation_base.py @@ -15,6 +15,7 @@ import logging from typing import TYPE_CHECKING +import synapse from synapse.api.constants import MAX_DEPTH, EventContentFields, EventTypes, Membership from synapse.api.errors import Codes, SynapseError from synapse.api.room_versions import EventFormatVersions, RoomVersion @@ -98,9 +99,9 @@ async def _check_sigs_and_hash( ) return redacted_event - result = await self.spam_checker.check_event_for_spam(pdu) + spam_check = await self.spam_checker.check_event_for_spam(pdu) - if result: + if spam_check is not synapse.spam_checker_api.Allow.ALLOW: logger.warning("Event contains spam, soft-failing %s", pdu.event_id) # we redact (to save disk space) as well as soft-failing (to stop # using the event in prev_events). diff --git a/synapse/handlers/message.py b/synapse/handlers/message.py index e566ff1f8ed8..cb1bc4c06f1c 100644 --- a/synapse/handlers/message.py +++ b/synapse/handlers/message.py @@ -23,6 +23,7 @@ from twisted.internet.interfaces import IDelayedCall +import synapse from synapse import event_auth from synapse.api.constants import ( EventContentFields, @@ -885,11 +886,11 @@ async def create_and_send_nonmember_event( event.sender, ) - spam_error = await self.spam_checker.check_event_for_spam(event) - if spam_error: - if not isinstance(spam_error, str): - spam_error = "Spam is not permitted here" - raise SynapseError(403, spam_error, Codes.FORBIDDEN) + spam_check = await self.spam_checker.check_event_for_spam(event) + if spam_check is not synapse.spam_checker_api.Allow.ALLOW: + raise SynapseError( + 403, "This message had been rejected as probable spam", spam_check + ) ev = await self.handle_new_client_event( requester=requester, diff --git a/synapse/module_api/__init__.py b/synapse/module_api/__init__.py index c4f661bb9382..95f3b2792793 100644 --- a/synapse/module_api/__init__.py +++ b/synapse/module_api/__init__.py @@ -35,6 +35,7 @@ from twisted.internet import defer from twisted.web.resource import Resource +from synapse import spam_checker_api from synapse.api.errors import SynapseError from synapse.events import EventBase from synapse.events.presence_router import ( @@ -140,6 +141,9 @@ PRESENCE_ALL_USERS = PresenceRouter.ALL_USERS +ALLOW = spam_checker_api.Allow.ALLOW +# Singleton value used to mark a message as permitted. + __all__ = [ "errors", "make_deferred_yieldable", @@ -147,6 +151,7 @@ "respond_with_html", "run_in_background", "cached", + "Allow", "UserID", "DatabasePool", "LoggingTransaction", diff --git a/synapse/module_api/errors.py b/synapse/module_api/errors.py index e58e0e60feab..bedd045d6fe1 100644 --- a/synapse/module_api/errors.py +++ b/synapse/module_api/errors.py @@ -15,6 +15,7 @@ """Exception types which are exposed as part of the stable module API""" from synapse.api.errors import ( + Codes, InvalidClientCredentialsError, RedirectException, SynapseError, @@ -24,6 +25,7 @@ from synapse.storage.push_rule import RuleNotFoundException __all__ = [ + "Codes", "InvalidClientCredentialsError", "RedirectException", "SynapseError", diff --git a/synapse/spam_checker_api/__init__.py b/synapse/spam_checker_api/__init__.py index 73018f2d002e..95132c80b70e 100644 --- a/synapse/spam_checker_api/__init__.py +++ b/synapse/spam_checker_api/__init__.py @@ -12,13 +12,38 @@ # See the License for the specific language governing permissions and # limitations under the License. from enum import Enum +from typing import Union + +from synapse.api.errors import Codes class RegistrationBehaviour(Enum): """ - Enum to define whether a registration request should allowed, denied, or shadow-banned. + Enum to define whether a registration request should be allowed, denied, or shadow-banned. """ ALLOW = "allow" SHADOW_BAN = "shadow_ban" DENY = "deny" + + +# We define the following singleton enum rather than a string to be able to +# write `Union[Allow, ..., str]` in some of the callbacks for the spam-checker +# API, where the `str` is required to maintain backwards compatibility with +# previous versions of the API. +class Allow(Enum): + """ + Singleton to allow events to pass through in SpamChecker APIs. + """ + + ALLOW = "allow" + + +Decision = Union[Allow, Codes] +""" +Union to define whether a request should be allowed or rejected. + +To accept a request, return `ALLOW`. + +To reject a request without any specific information, use `Codes.FORBIDDEN`. +""" From 7c2a78bb3bd2091439722e9f1e4601837bcb8fc4 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Mon, 23 May 2022 20:43:37 -0500 Subject: [PATCH 13/14] Marker events as state - MSC2716 (#12718) Sending marker events as state now so they are always able to be seen by homeservers (not lost in some timeline gap). Part of [MSC2716](https://github.com/matrix-org/matrix-spec-proposals/pull/2716) Complement tests: https://github.com/matrix-org/complement/pull/371 As initially discussed at https://github.com/matrix-org/matrix-spec-proposals/pull/2716#discussion_r782629097 and https://github.com/matrix-org/matrix-spec-proposals/pull/2716#discussion_r876684431 When someone joins a room, process all of the marker events we see in the current state. Marker events should be sent with a unique `state_key` so that they can all resolve in the current state to easily be discovered. Marker events as state - If we re-use the same `state_key` (like `""`), then we would have to fetch previous snapshots of state up through time to find all of the marker events. This way we can avoid all of that. This PR was originally doing this but then thought of the smarter way to tackle in an [out of band discussion with @erikjohnston](https://docs.google.com/document/d/1JJDuPfcPNX75fprdTWlxlaKjWOdbdJylbpZ03hzo638/edit#bookmark=id.sm92fqyq7vpp). - Also avoids state resolution conflicts where only one of the marker events win As a homeserver, when we see new marker state, we know there is new history imported somewhere back in time and should process it to fetch the insertion event where the historical messages are and set it as an insertion extremity. This way we know where to backfill more messages when someone asks for scrollback. --- changelog.d/12718.feature | 1 + synapse/handlers/federation_event.py | 26 +++++++++++++++++++++++++- 2 files changed, 26 insertions(+), 1 deletion(-) create mode 100644 changelog.d/12718.feature diff --git a/changelog.d/12718.feature b/changelog.d/12718.feature new file mode 100644 index 000000000000..1056f519a4c1 --- /dev/null +++ b/changelog.d/12718.feature @@ -0,0 +1 @@ +Update [MSC2716](https://github.com/matrix-org/matrix-spec-proposals/pull/2716) implementation to process marker events from the current state to avoid markers being lost in timeline gaps for federated servers which would cause the imported history to be undiscovered. diff --git a/synapse/handlers/federation_event.py b/synapse/handlers/federation_event.py index 05c122f22491..ca82df8a6d9e 100644 --- a/synapse/handlers/federation_event.py +++ b/synapse/handlers/federation_event.py @@ -477,7 +477,23 @@ async def process_remote_join( # and discover that we do not have it. event.internal_metadata.proactively_send = False - return await self.persist_events_and_notify(room_id, [(event, context)]) + stream_id_after_persist = await self.persist_events_and_notify( + room_id, [(event, context)] + ) + + # If we're joining the room again, check if there is new marker + # state indicating that there is new history imported somewhere in + # the DAG. Multiple markers can exist in the current state with + # unique state_keys. + # + # Do this after the state from the remote join was persisted (via + # `persist_events_and_notify`). Otherwise we can run into a + # situation where the create event doesn't exist yet in the + # `current_state_events` + for e in state: + await self._handle_marker_event(origin, e) + + return stream_id_after_persist async def update_state_for_partial_state_event( self, destination: str, event: EventBase @@ -1230,6 +1246,14 @@ async def _handle_marker_event(self, origin: str, marker_event: EventBase) -> No # Nothing to retrieve then (invalid marker) return + already_seen_insertion_event = await self._store.have_seen_event( + marker_event.room_id, insertion_event_id + ) + if already_seen_insertion_event: + # No need to process a marker again if we have already seen the + # insertion event that it was pointing to + return + logger.debug( "_handle_marker_event: backfilling insertion event %s", insertion_event_id ) From f5b1c09909e81182bacc167e70188a7c43aad813 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 24 May 2022 11:35:08 +0100 Subject: [PATCH 14/14] Pin poetry.core in Docker images (#12853) --- changelog.d/12853.docker | 1 + docker/Dockerfile | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) create mode 100644 changelog.d/12853.docker diff --git a/changelog.d/12853.docker b/changelog.d/12853.docker new file mode 100644 index 000000000000..cad10a79cc82 --- /dev/null +++ b/changelog.d/12853.docker @@ -0,0 +1 @@ +Fix the docker file after a dependency update. diff --git a/docker/Dockerfile b/docker/Dockerfile index ccc6a9f77849..7af0e51f97d2 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -55,7 +55,7 @@ RUN \ # NB: In poetry 1.2 `poetry export` will be moved into a plugin; we'll need to also # pip install poetry-plugin-export (https://github.com/python-poetry/poetry-plugin-export). RUN --mount=type=cache,target=/root/.cache/pip \ - pip install --user git+https://github.com/python-poetry/poetry.git@fb13b3a676f476177f7937ffa480ee5cff9a90a5 + pip install --user "poetry-core==1.1.0a7" "git+https://github.com/python-poetry/poetry.git@fb13b3a676f476177f7937ffa480ee5cff9a90a5" WORKDIR /synapse