Skip to content
This repository has been archived by the owner on Apr 26, 2024. It is now read-only.

Add a background database update to purge account data for deactivated users. #11655

Merged
merged 22 commits into from
Feb 2, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
22 commits
Select commit Hold shift + click to select a range
68b663d
Remove account data upon user deactivation
reivilibre Dec 21, 2021
13fec0d
Document that account data is removed upon user deactivation
reivilibre Dec 21, 2021
c1fed49
Newsfile
reivilibre Dec 21, 2021
4da869e
Remove account data upon user deactivation
reivilibre Dec 21, 2021
b132bba
Test the removal of account data upon deactivation
reivilibre Dec 29, 2021
1589d6a
Pull out the account data transaction so it can be reused in other tr…
reivilibre Dec 29, 2021
5344446
Add a background job to retroactively purge account data for deactiva…
reivilibre Dec 29, 2021
da884c0
Add a schema delta to trigger the clean-up job
reivilibre Dec 29, 2021
288b0e8
Newsfile
reivilibre Dec 29, 2021
2690bd6
Add tests for the background update job
reivilibre Jan 17, 2022
9413a46
Fix port_db script not being able to run the background job
reivilibre Jan 17, 2022
3b48c9d
Merge branch 'develop' into rei/add_job
reivilibre Jan 24, 2022
2337b28
Reformat after merge
reivilibre Jan 24, 2022
2ee3d8f
Simplify test after merge
reivilibre Jan 24, 2022
d362e31
Fix order of args after merge
reivilibre Jan 24, 2022
3ecc6bc
Move schema delta to correct version
reivilibre Jan 24, 2022
9a4ec65
Fix database port script's Method Resolution Order
reivilibre Jan 24, 2022
0543946
Fix abstract method error in synapse_port_db
reivilibre Jan 28, 2022
d18e2dd
job → update
reivilibre Jan 28, 2022
99353f3
Remove obsolete manual cache invalidations in tests
reivilibre Feb 2, 2022
2587e51
Merge branch 'develop' into rei/add_job
reivilibre Feb 2, 2022
2f79559
Fix up background update delta number
reivilibre Feb 2, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions changelog.d/11655.feature
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Remove account data (including client config, push rules and ignored users) upon user deactivation.
4 changes: 4 additions & 0 deletions scripts/synapse_port_db
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,8 @@ from synapse.logging.context import (
run_in_background,
)
from synapse.storage.database import DatabasePool, make_conn
from synapse.storage.databases.main import PushRuleStore
from synapse.storage.databases.main.account_data import AccountDataWorkerStore
from synapse.storage.databases.main.client_ips import ClientIpBackgroundUpdateStore
from synapse.storage.databases.main.deviceinbox import DeviceInboxBackgroundUpdateStore
from synapse.storage.databases.main.devices import DeviceBackgroundUpdateStore
Expand Down Expand Up @@ -180,6 +182,8 @@ class Store(
UserDirectoryBackgroundUpdateStore,
EndToEndKeyBackgroundStore,
StatsStore,
AccountDataWorkerStore,
PushRuleStore,
PusherWorkerStore,
PresenceBackgroundUpdateStore,
GroupServerWorkerStore,
Expand Down
164 changes: 109 additions & 55 deletions synapse/storage/databases/main/account_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,11 @@ def __init__(
"AccountDataAndTagsChangeCache", account_max
)

self.db_pool.updates.register_background_update_handler(
"delete_account_data_for_deactivated_users",
self._delete_account_data_for_deactivated_users,
)

def get_max_account_data_stream_id(self) -> int:
"""Get the current max stream ID for account data stream
Expand Down Expand Up @@ -549,72 +554,121 @@ def _add_account_data_for_user(

async def purge_account_data_for_user(self, user_id: str) -> None:
"""
Removes the account data for a user.
Removes ALL the account data for a user.
Intended to be used upon user deactivation.
This is intended to be used upon user deactivation and also removes any
derived information from account data (e.g. push rules and ignored users).
Also purges the user from the ignored_users cache table
and the push_rules cache tables.
"""

Args:
user_id: The user ID to remove data for.
await self.db_pool.runInteraction(
"purge_account_data_for_user_txn",
self._purge_account_data_for_user_txn,
user_id,
)

def _purge_account_data_for_user_txn(
self, txn: LoggingTransaction, user_id: str
) -> None:
"""
See `purge_account_data_for_user`.
"""
# Purge from the primary account_data tables.
self.db_pool.simple_delete_txn(
txn, table="account_data", keyvalues={"user_id": user_id}
)

def purge_account_data_for_user_txn(txn: LoggingTransaction) -> None:
# Purge from the primary account_data tables.
self.db_pool.simple_delete_txn(
txn, table="account_data", keyvalues={"user_id": user_id}
)
self.db_pool.simple_delete_txn(
txn, table="room_account_data", keyvalues={"user_id": user_id}
)

self.db_pool.simple_delete_txn(
txn, table="room_account_data", keyvalues={"user_id": user_id}
)
# Purge from ignored_users where this user is the ignorer.
# N.B. We don't purge where this user is the ignoree, because that
# interferes with other users' account data.
# It's also not this user's data to delete!
self.db_pool.simple_delete_txn(
txn, table="ignored_users", keyvalues={"ignorer_user_id": user_id}
)

# Purge from ignored_users where this user is the ignorer.
# N.B. We don't purge where this user is the ignoree, because that
# interferes with other users' account data.
# It's also not this user's data to delete!
self.db_pool.simple_delete_txn(
txn, table="ignored_users", keyvalues={"ignorer_user_id": user_id}
)
# Remove the push rules
self.db_pool.simple_delete_txn(
txn, table="push_rules", keyvalues={"user_name": user_id}
)
self.db_pool.simple_delete_txn(
txn, table="push_rules_enable", keyvalues={"user_name": user_id}
)
self.db_pool.simple_delete_txn(
txn, table="push_rules_stream", keyvalues={"user_id": user_id}
)

# Remove the push rules
self.db_pool.simple_delete_txn(
txn, table="push_rules", keyvalues={"user_name": user_id}
)
self.db_pool.simple_delete_txn(
txn, table="push_rules_enable", keyvalues={"user_name": user_id}
)
self.db_pool.simple_delete_txn(
txn, table="push_rules_stream", keyvalues={"user_id": user_id}
)
# Invalidate caches as appropriate
self._invalidate_cache_and_stream(
txn, self.get_account_data_for_room_and_type, (user_id,)
)
self._invalidate_cache_and_stream(
txn, self.get_account_data_for_user, (user_id,)
)
self._invalidate_cache_and_stream(
txn, self.get_global_account_data_by_type_for_user, (user_id,)
)
self._invalidate_cache_and_stream(
txn, self.get_account_data_for_room, (user_id,)
)
self._invalidate_cache_and_stream(txn, self.get_push_rules_for_user, (user_id,))
self._invalidate_cache_and_stream(
txn, self.get_push_rules_enabled_for_user, (user_id,)
)
# This user might be contained in the ignored_by cache for other users,
# so we have to invalidate it all.
self._invalidate_all_cache_and_stream(txn, self.ignored_by)

# Invalidate caches as appropriate
self._invalidate_cache_and_stream(
txn, self.get_account_data_for_room_and_type, (user_id,)
)
self._invalidate_cache_and_stream(
txn, self.get_account_data_for_user, (user_id,)
)
self._invalidate_cache_and_stream(
txn, self.get_global_account_data_by_type_for_user, (user_id,)
)
self._invalidate_cache_and_stream(
txn, self.get_account_data_for_room, (user_id,)
)
self._invalidate_cache_and_stream(
txn, self.get_push_rules_for_user, (user_id,)
)
self._invalidate_cache_and_stream(
txn, self.get_push_rules_enabled_for_user, (user_id,)
)
# This user might be contained in the ignored_by cache for other users,
# so we have to invalidate it all.
self._invalidate_all_cache_and_stream(txn, self.ignored_by)
async def _delete_account_data_for_deactivated_users(
self, progress: dict, batch_size: int
) -> int:
"""
Retroactively purges account data for users that have already been deactivated.
Gets run as a background update caused by a schema delta.
"""

await self.db_pool.runInteraction(
"purge_account_data_for_user_txn",
purge_account_data_for_user_txn,
last_user: str = progress.get("last_user", "")

def _delete_account_data_for_deactivated_users_txn(
txn: LoggingTransaction,
) -> int:
sql = """
SELECT name FROM users
WHERE deactivated = ? and name > ?
ORDER BY name ASC
LIMIT ?
"""

txn.execute(sql, (1, last_user, batch_size))
users = [row[0] for row in txn]

for user in users:
self._purge_account_data_for_user_txn(txn, user_id=user)

if users:
self.db_pool.updates._background_update_progress_txn(
txn,
"delete_account_data_for_deactivated_users",
{"last_user": users[-1]},
)

return len(users)

number_deleted = await self.db_pool.runInteraction(
"_delete_account_data_for_deactivated_users",
_delete_account_data_for_deactivated_users_txn,
)

if number_deleted < batch_size:
await self.db_pool.updates._end_background_update(
"delete_account_data_for_deactivated_users"
)

return number_deleted


class AccountDataStore(AccountDataWorkerStore):
pass
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
/* Copyright 2021 The Matrix.org Foundation C.I.C
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/


-- We want to retroactively delete account data for users that were already
-- deactivated.
INSERT INTO background_updates (ordering, update_name, progress_json) VALUES
(6803, 'delete_account_data_for_deactivated_users', '{}');
106 changes: 106 additions & 0 deletions tests/handlers/test_deactivate_account.py
Original file line number Diff line number Diff line change
Expand Up @@ -217,3 +217,109 @@ def test_ignored_users_deleted_upon_deactivation(self) -> None:
self.assertEqual(
self.get_success(self._store.ignored_by("@sheltie:test")), set()
)

def _rerun_retroactive_account_data_deletion_update(self) -> None:
# Reset the 'all done' flag
self._store.db_pool.updates._all_done = False

self.get_success(
self._store.db_pool.simple_insert(
"background_updates",
{
"update_name": "delete_account_data_for_deactivated_users",
"progress_json": "{}",
},
)
)

self.wait_for_background_updates()

def test_account_data_deleted_retroactively_by_background_update_if_deactivated(
self,
) -> None:
"""
Tests that a user, who deactivated their account before account data was
deleted automatically upon deactivation, has their account data retroactively
scrubbed by the background update.
"""

# Request the deactivation of our account
self._deactivate_my_account()

# Add some account data
# (we do this after the deactivation so that the act of deactivating doesn't
# clear it out. This emulates a user that was deactivated before this was cleared
# upon deactivation.)
self.get_success(
self._store.add_account_data_for_user(
self.user,
AccountDataTypes.DIRECT,
{"@someone:remote": ["!somewhere:remote"]},
)
)

# Check that the account data is there.
self.assertIsNotNone(
self.get_success(
self._store.get_global_account_data_by_type_for_user(
self.user,
AccountDataTypes.DIRECT,
)
),
)

# Re-run the retroactive deletion update
self._rerun_retroactive_account_data_deletion_update()

# Check that the account data was cleared.
self.assertIsNone(
self.get_success(
self._store.get_global_account_data_by_type_for_user(
self.user,
AccountDataTypes.DIRECT,
)
),
)

def test_account_data_preserved_by_background_update_if_not_deactivated(
self,
) -> None:
"""
Tests that the background update does not scrub account data for users that have
not been deactivated.
"""

# Add some account data
# (we do this after the deactivation so that the act of deactivating doesn't
# clear it out. This emulates a user that was deactivated before this was cleared
# upon deactivation.)
self.get_success(
self._store.add_account_data_for_user(
self.user,
AccountDataTypes.DIRECT,
{"@someone:remote": ["!somewhere:remote"]},
)
)

# Check that the account data is there.
self.assertIsNotNone(
self.get_success(
self._store.get_global_account_data_by_type_for_user(
self.user,
AccountDataTypes.DIRECT,
)
),
)

# Re-run the retroactive deletion update
self._rerun_retroactive_account_data_deletion_update()

# Check that the account data was NOT cleared.
self.assertIsNotNone(
self.get_success(
self._store.get_global_account_data_by_type_for_user(
self.user,
AccountDataTypes.DIRECT,
)
),
)