From eb8a3d3bcda45dbabb72dc7aeb8b71512918365a Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 3 Aug 2022 11:51:23 +0100 Subject: [PATCH 1/7] Add some comments about how event push actions are stored --- .../databases/main/event_push_actions.py | 61 +++++++++++++++++++ 1 file changed, 61 insertions(+) diff --git a/synapse/storage/databases/main/event_push_actions.py b/synapse/storage/databases/main/event_push_actions.py index dd2627037c38..89b707446890 100644 --- a/synapse/storage/databases/main/event_push_actions.py +++ b/synapse/storage/databases/main/event_push_actions.py @@ -12,6 +12,67 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. + +"""Responsible for storing and fetching push actions / notifications. + +There are two main uses for push actions: + 1. Sending out push to a user's device; and + 2. Tracking per-room per-user notification counts (used in sync requests). + +For the former we simply use the `event_push_actions` table, which contains all +the calculated actions for a given user (which were calculated by the +`BulkPushRuleEvaluator`). + +For the latter we could simply count the number of rows in `event_push_actions` +table for a given room/user, but in practice this could be *very* heavyweight +when there were a large number of notifications (due to e.g. the user never +reading a room). Plus, keeping all push actions indefinitely uses a lot of disk +space. + +To fix these issues, we add a new table `event_push_summary` that tracks +per-user per-room count of all notifications that happened before a stream +ordering S. Thus, to get the notification count for a user / room we can simply +query a single row in `event_push_summary` and count the number of rows in +`event_push_actions` with a stream ordering larger than S (and as long as S is +"recent", the number of rows needing to be scanned will be small). + +The `event_push_summary` table is updated via a background job that periodically +choses a new stream ordering S' (usually the latest stream ordering), counts all +notifications in `event_push_actions` between the existing S and S', and adds +them to the existing counts in `event_push_summary`. + +This allows us to delete old rows from `event_push_actions` once those rows have +been counted and added to `event_push_summary` (we call this process +"rotation"). + + +We need to handle when a user sends a read receipt to the room. Again this is +done as a background process. For each receipt we clear the row in +`event_push_summary` and count the number of notifications in +`event_push_actions` that happened after the receipt but before S, and insert +that count into `event_push_summary`. + +Note that its possible that if the read receipt is for an old event the relevant +`event_push_actions` rows will have been rotated and we get the wrong count +(it'll be too low). We accept this as a rare edge case that is unlikely to +impact the user much (since the vast majority of read receipts will be for the +latest event). + +The last complication is to handle the race where we request the notifications +counts after a user sends a read receipt into the room, but *before* the +background update handles the receipt (without any special handling the counts +would be outdated). We fix this by including in `event_push_summary` the read +receipt we used when updating `event_push_summary`, and every time we query the +table we check if that matches the most recent read receipt in the room. If yes, +continue as above, if not we simply query the `event_push_actions` table +directly. + +Since read receipts are almost always for recent events, scanning the +`event_push_actions` table in this case is unlikely to be a problem. Even if it +is a problem, it is temporary until the background job handles the new read +receipt. +""" + import logging from typing import TYPE_CHECKING, Dict, List, Optional, Tuple, Union, cast From 1a35b70111a5871f0cf8b36bfd1398e92c690e57 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 3 Aug 2022 11:52:35 +0100 Subject: [PATCH 2/7] Newsfile --- changelog.d/13445.misc | 1 + 1 file changed, 1 insertion(+) create mode 100644 changelog.d/13445.misc diff --git a/changelog.d/13445.misc b/changelog.d/13445.misc new file mode 100644 index 000000000000..17462c56f39b --- /dev/null +++ b/changelog.d/13445.misc @@ -0,0 +1 @@ +Add some comments about how event push actions are stored. From cb3f54bd3dc72bd03f6b2fb4e3e200b2d4316052 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 3 Aug 2022 15:28:37 +0100 Subject: [PATCH 3/7] Apply suggestions from code review Co-authored-by: Patrick Cloke --- synapse/storage/databases/main/event_push_actions.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/synapse/storage/databases/main/event_push_actions.py b/synapse/storage/databases/main/event_push_actions.py index 89b707446890..8ba2b65ee18a 100644 --- a/synapse/storage/databases/main/event_push_actions.py +++ b/synapse/storage/databases/main/event_push_actions.py @@ -24,13 +24,12 @@ `BulkPushRuleEvaluator`). For the latter we could simply count the number of rows in `event_push_actions` -table for a given room/user, but in practice this could be *very* heavyweight -when there were a large number of notifications (due to e.g. the user never -reading a room). Plus, keeping all push actions indefinitely uses a lot of disk -space. +table for a given room/user, but in practice this is *very* heavyweight when there +were a large number of notifications (due to e.g. the user never reading a room). +Plus, keeping all push actions indefinitely uses a lot of disk space. To fix these issues, we add a new table `event_push_summary` that tracks -per-user per-room count of all notifications that happened before a stream +per-user per-room counts of all notifications that happened before a stream ordering S. Thus, to get the notification count for a user / room we can simply query a single row in `event_push_summary` and count the number of rows in `event_push_actions` with a stream ordering larger than S (and as long as S is From 1dbbd7699cf43753f13e5352b20ef4d81497fc79 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 3 Aug 2022 15:28:44 +0100 Subject: [PATCH 4/7] Update synapse/storage/databases/main/event_push_actions.py Co-authored-by: Patrick Cloke --- synapse/storage/databases/main/event_push_actions.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/synapse/storage/databases/main/event_push_actions.py b/synapse/storage/databases/main/event_push_actions.py index 8ba2b65ee18a..64039b694cda 100644 --- a/synapse/storage/databases/main/event_push_actions.py +++ b/synapse/storage/databases/main/event_push_actions.py @@ -36,7 +36,7 @@ "recent", the number of rows needing to be scanned will be small). The `event_push_summary` table is updated via a background job that periodically -choses a new stream ordering S' (usually the latest stream ordering), counts all +chooses a new stream ordering S' (usually the latest stream ordering), counts all notifications in `event_push_actions` between the existing S and S', and adds them to the existing counts in `event_push_summary`. From f82338d1fcfd4942b095b62c0a1b4ff8b16efff5 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 3 Aug 2022 15:31:58 +0100 Subject: [PATCH 5/7] Rewrap --- synapse/storage/databases/main/event_push_actions.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/synapse/storage/databases/main/event_push_actions.py b/synapse/storage/databases/main/event_push_actions.py index 64039b694cda..14580717027c 100644 --- a/synapse/storage/databases/main/event_push_actions.py +++ b/synapse/storage/databases/main/event_push_actions.py @@ -24,9 +24,9 @@ `BulkPushRuleEvaluator`). For the latter we could simply count the number of rows in `event_push_actions` -table for a given room/user, but in practice this is *very* heavyweight when there -were a large number of notifications (due to e.g. the user never reading a room). -Plus, keeping all push actions indefinitely uses a lot of disk space. +table for a given room/user, but in practice this is *very* heavyweight when +there were a large number of notifications (due to e.g. the user never reading a +room). Plus, keeping all push actions indefinitely uses a lot of disk space. To fix these issues, we add a new table `event_push_summary` that tracks per-user per-room counts of all notifications that happened before a stream @@ -36,9 +36,9 @@ "recent", the number of rows needing to be scanned will be small). The `event_push_summary` table is updated via a background job that periodically -chooses a new stream ordering S' (usually the latest stream ordering), counts all -notifications in `event_push_actions` between the existing S and S', and adds -them to the existing counts in `event_push_summary`. +chooses a new stream ordering S' (usually the latest stream ordering), counts +all notifications in `event_push_actions` between the existing S and S', and +adds them to the existing counts in `event_push_summary`. This allows us to delete old rows from `event_push_actions` once those rows have been counted and added to `event_push_summary` (we call this process From 373cd3ef036d5018a6fe674d4fb0de2c098098e3 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 3 Aug 2022 15:32:03 +0100 Subject: [PATCH 6/7] Clarify --- synapse/storage/databases/main/event_push_actions.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/synapse/storage/databases/main/event_push_actions.py b/synapse/storage/databases/main/event_push_actions.py index 14580717027c..6cb0f70084d5 100644 --- a/synapse/storage/databases/main/event_push_actions.py +++ b/synapse/storage/databases/main/event_push_actions.py @@ -49,7 +49,8 @@ done as a background process. For each receipt we clear the row in `event_push_summary` and count the number of notifications in `event_push_actions` that happened after the receipt but before S, and insert -that count into `event_push_summary`. +that count into `event_push_summary` (If the receipt happened *after* S then we +simply clear the `event_push_summary`). Note that its possible that if the read receipt is for an old event the relevant `event_push_actions` rows will have been rotated and we get the wrong count From bcaee0c424ff9728f188d885560718ccc598085b Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 4 Aug 2022 19:48:47 +0100 Subject: [PATCH 7/7] Update synapse/storage/databases/main/event_push_actions.py Co-authored-by: Patrick Cloke --- synapse/storage/databases/main/event_push_actions.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/synapse/storage/databases/main/event_push_actions.py b/synapse/storage/databases/main/event_push_actions.py index 6cb0f70084d5..92f6ff00ee88 100644 --- a/synapse/storage/databases/main/event_push_actions.py +++ b/synapse/storage/databases/main/event_push_actions.py @@ -50,7 +50,7 @@ `event_push_summary` and count the number of notifications in `event_push_actions` that happened after the receipt but before S, and insert that count into `event_push_summary` (If the receipt happened *after* S then we -simply clear the `event_push_summary`). +simply clear the `event_push_summary`.) Note that its possible that if the read receipt is for an old event the relevant `event_push_actions` rows will have been rotated and we get the wrong count