diff --git a/changelog.d/10820.misc b/changelog.d/10820.misc new file mode 100644 index 000000000000..4373bf6f6b6c --- /dev/null +++ b/changelog.d/10820.misc @@ -0,0 +1 @@ +Fix a long-standing bug where an `m.room.message` event containing a null byte would cause an internal server error. \ No newline at end of file diff --git a/synapse/storage/databases/main/search.py b/synapse/storage/databases/main/search.py index 6480d5a9f5eb..2a1e99e17a90 100644 --- a/synapse/storage/databases/main/search.py +++ b/synapse/storage/databases/main/search.py @@ -15,12 +15,12 @@ import logging import re from collections import namedtuple -from typing import Collection, List, Optional, Set +from typing import Collection, Iterable, List, Optional, Set from synapse.api.errors import SynapseError from synapse.events import EventBase from synapse.storage._base import SQLBaseStore, db_to_json, make_in_list_sql_clause -from synapse.storage.database import DatabasePool +from synapse.storage.database import DatabasePool, LoggingTransaction from synapse.storage.databases.main.events_worker import EventRedactBehaviour from synapse.storage.engines import PostgresEngine, Sqlite3Engine @@ -32,14 +32,24 @@ ) +def _clean_value_for_search(value: str) -> str: + """ + Replaces any null code points in the string with spaces as + Postgres and SQLite do not like the insertion of strings with + null code points into the full-text search tables. + """ + return value.replace("\u0000", " ") + + class SearchWorkerStore(SQLBaseStore): - def store_search_entries_txn(self, txn, entries): + def store_search_entries_txn( + self, txn: LoggingTransaction, entries: Iterable[SearchEntry] + ) -> None: """Add entries to the search table Args: - txn (cursor): - entries (iterable[SearchEntry]): - entries to be added to the table + txn: + entries: entries to be added to the table """ if not self.hs.config.enable_search: return @@ -55,7 +65,7 @@ def store_search_entries_txn(self, txn, entries): entry.event_id, entry.room_id, entry.key, - entry.value, + _clean_value_for_search(entry.value), entry.stream_ordering, entry.origin_server_ts, ) @@ -70,11 +80,16 @@ def store_search_entries_txn(self, txn, entries): " VALUES (?,?,?,?)" ) args = ( - (entry.event_id, entry.room_id, entry.key, entry.value) + ( + entry.event_id, + entry.room_id, + entry.key, + _clean_value_for_search(entry.value), + ) for entry in entries ) - txn.execute_batch(sql, args) + else: # This should be unreachable. raise Exception("Unrecognized database engine") @@ -646,6 +661,7 @@ def f(txn): for key in ("body", "name", "topic"): v = event.content.get(key, None) if v: + v = _clean_value_for_search(v) values.append(v) if not values: diff --git a/tests/storage/test_room_search.py b/tests/storage/test_room_search.py new file mode 100644 index 000000000000..8971ecccbd6d --- /dev/null +++ b/tests/storage/test_room_search.py @@ -0,0 +1,74 @@ +# Copyright 2021 The Matrix.org Foundation C.I.C. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import synapse.rest.admin +from synapse.rest.client import login, room +from synapse.storage.engines import PostgresEngine + +from tests.unittest import HomeserverTestCase + + +class NullByteInsertionTest(HomeserverTestCase): + servlets = [ + synapse.rest.admin.register_servlets_for_client_rest_resource, + login.register_servlets, + room.register_servlets, + ] + + def test_null_byte(self): + """ + Postgres/SQLite don't like null bytes going into the search tables. Internally + we replace those with a space. + + Ensure this doesn't break anything. + """ + + # Register a user and create a room, create some messages + self.register_user("alice", "password") + access_token = self.login("alice", "password") + room_id = self.helper.create_room_as("alice", tok=access_token) + + # Send messages and ensure they don't cause an internal server + # error + for body in ["hi\u0000bob", "another message", "hi alice"]: + response = self.helper.send(room_id, body, tok=access_token) + self.assertIn("event_id", response) + + # Check that search works for the message where the null byte was replaced + store = self.hs.get_datastore() + result = self.get_success( + store.search_msgs([room_id], "hi bob", ["content.body"]) + ) + self.assertEquals(result.get("count"), 1) + if isinstance(store.database_engine, PostgresEngine): + self.assertIn("hi", result.get("highlights")) + self.assertIn("bob", result.get("highlights")) + + # Check that search works for an unrelated message + result = self.get_success( + store.search_msgs([room_id], "another", ["content.body"]) + ) + self.assertEquals(result.get("count"), 1) + if isinstance(store.database_engine, PostgresEngine): + self.assertIn("another", result.get("highlights")) + + # Check that search works for a search term that overlaps with the message + # containing a null byte and an unrelated message. + result = self.get_success(store.search_msgs([room_id], "hi", ["content.body"])) + self.assertEquals(result.get("count"), 2) + result = self.get_success( + store.search_msgs([room_id], "hi alice", ["content.body"]) + ) + if isinstance(store.database_engine, PostgresEngine): + self.assertIn("alice", result.get("highlights"))