diff --git a/src/olympia/blocklist/tasks.py b/src/olympia/blocklist/tasks.py index 5b8852301a78..7d67e5c73214 100644 --- a/src/olympia/blocklist/tasks.py +++ b/src/olympia/blocklist/tasks.py @@ -101,37 +101,54 @@ def monitor_remote_settings(): def upload_filter(generation_time, filter_list=None, create_stash=False): # We cannot send enum values to tasks so we serialize them as strings # and deserialize them here back to the enum values. - filter_list: List[BlockType] = ( - [] if filter_list is None else [BlockType[filter] for filter in filter_list] - ) + filters_to_upload: List[BlockType] = [] + base_filter_ids = dict() bucket = settings.REMOTE_SETTINGS_WRITER_BUCKET server = RemoteSettings( bucket, REMOTE_SETTINGS_COLLECTION_MLBF, sign_off_needed=False ) mlbf = MLBF.load_from_storage(generation_time, error_on_missing=True) - is_base = len(filter_list) > 0 # Download old records before uploading new ones # this ensures we do not delete any records we just uplaoded old_records = server.records() attachment_types_to_delete = [] - if is_base: - for block_type in filter_list: - attachment_type = BLOCKLIST_RECORD_MLBF_BASE(block_type) - data = { - 'key_format': MLBF.KEY_FORMAT, - 'generation_time': generation_time, - 'attachment_type': attachment_type, - } - with mlbf.storage.open(mlbf.filter_path(block_type), 'rb') as filter_file: - attachment = ('filter.bin', filter_file, 'application/octet-stream') - server.publish_attachment(data, attachment) - statsd.incr('blocklist.tasks.upload_filter.upload_mlbf') - # After we have succesfully uploaded the new filter - # we can safely delete others of that type - attachment_types_to_delete.append(attachment_type) + for block_type in BlockType: + if block_type == BlockType.SOFT_BLOCKED and not waffle.switch_is_active( + 'enable-soft-blocking' + ): + continue + + if filter_list and block_type.name in filter_list: + filters_to_upload.append(block_type) + + base_filter_id = get_config( + MLBF_BASE_ID_CONFIG_KEY(block_type, compat=True), + json_value=True, + ) - statsd.incr('blocklist.tasks.upload_filter.upload_mlbf.base') + if base_filter_id is not None: + base_filter_ids[block_type] = base_filter_id + + for block_type in filters_to_upload: + attachment_type = BLOCKLIST_RECORD_MLBF_BASE(block_type) + data = { + 'key_format': MLBF.KEY_FORMAT, + 'generation_time': generation_time, + 'attachment_type': attachment_type, + } + with mlbf.storage.open(mlbf.filter_path(block_type), 'rb') as filter_file: + attachment = ('filter.bin', filter_file, 'application/octet-stream') + server.publish_attachment(data, attachment) + statsd.incr('blocklist.tasks.upload_filter.upload_mlbf') + # After we have succesfully uploaded the new filter + # we can safely delete others of that type + attachment_types_to_delete.append(attachment_type) + + statsd.incr('blocklist.tasks.upload_filter.upload_mlbf.base') + # Update the base filter id for this block type to the generation time + # so we can delete stashes older than this new filter + base_filter_ids[block_type] = generation_time # It is possible to upload a stash and a filter in the same task if create_stash: @@ -146,34 +163,11 @@ def upload_filter(generation_time, filter_list=None, create_stash=False): server.publish_record(stash_upload_data) statsd.incr('blocklist.tasks.upload_filter.upload_stash') - oldest_base_filter_id: int | None = None - - # Get the oldest base_filter_id from the set of defined IDs - # We should delete stashes that are older than this time - for block_type in BlockType: - # Ignore soft blocked config timestamps if the switch is not active. - if block_type == BlockType.SOFT_BLOCKED and not waffle.switch_is_active( - 'enable-soft-blocking' - ): - continue - - if block_type in filter_list: - base_filter_id = generation_time - else: - base_filter_id = get_config( - # Currently we read from the old singular config key for - # hard blocks to preserve backward compatibility. - # In https://github.com/mozilla/addons/issues/15193 - # we can remove this and start reading from the new plural key. - MLBF_BASE_ID_CONFIG_KEY(block_type, compat=True), - json_value=True, - ) - - if base_filter_id is not None: - if oldest_base_filter_id is None: - oldest_base_filter_id = base_filter_id - else: - oldest_base_filter_id = min(oldest_base_filter_id, base_filter_id) + # Get the oldest base filter id so we can delete only stashes + # that are definitely not needed anymore + oldest_base_filter_id = ( + min(base_filter_ids.values()) if base_filter_ids else None + ) for record in old_records: # Delete attachment records that match the @@ -203,7 +197,7 @@ def upload_filter(generation_time, filter_list=None, create_stash=False): set_config(MLBF_TIME_CONFIG_KEY, generation_time, json_value=True) # Update the base_filter_id for uploaded filters - for block_type in filter_list: + for block_type in filters_to_upload: # We currently write to the old singular config key for hard blocks # to preserve backward compatibility. # In https://github.com/mozilla/addons/issues/15193 diff --git a/src/olympia/blocklist/tests/test_tasks.py b/src/olympia/blocklist/tests/test_tasks.py index 3ecd3238f288..a24313c25f88 100644 --- a/src/olympia/blocklist/tests/test_tasks.py +++ b/src/olympia/blocklist/tests/test_tasks.py @@ -20,7 +20,7 @@ ) from olympia.blocklist.mlbf import MLBF from olympia.constants.blocklist import MLBF_BASE_ID_CONFIG_KEY, MLBF_TIME_CONFIG_KEY -from olympia.zadmin.models import set_config +from olympia.zadmin.models import get_config, set_config from ..models import BlocklistSubmission, BlockType, BlockVersion from ..tasks import ( @@ -183,11 +183,13 @@ def test_invalid_block_type_raises(self): with self.assertRaises(ValueError): BLOCKLIST_RECORD_MLBF_BASE('foo') - with self.assertRaises(KeyError): - upload_filter.delay( - self.generation_time, - filter_list=['foo'], - ) + MLBF.generate_from_db(self.generation_time) + upload_filter( + self.generation_time, + filter_list=['foo'], + ) + assert not self.mocks['delete_record'].called + assert not self.mocks['publish_record'].called def _test_upload_base_filter(self, *block_types: BlockType): self._block_version(is_signed=True) @@ -195,7 +197,7 @@ def _test_upload_base_filter(self, *block_types: BlockType): for block_type in block_types: mlbf.generate_and_write_filter(block_type) - upload_filter.delay( + upload_filter( self.generation_time, filter_list=[block_type.name for block_type in block_types], ) @@ -248,112 +250,127 @@ def _test_upload_base_filter(self, *block_types: BlockType): def test_upload_blocked_filter(self): self._test_upload_base_filter(BlockType.BLOCKED) + @override_switch('enable-soft-blocking', active=True) def test_upload_soft_blocked_filter(self): self._test_upload_base_filter(BlockType.SOFT_BLOCKED) + @override_switch('enable-soft-blocking', active=True) def test_upload_soft_and_blocked_filter(self): self._test_upload_base_filter(BlockType.BLOCKED, BlockType.SOFT_BLOCKED) - def _test_cleanup_old_records( - self, - filter_list: Dict[BlockType, int], - create_stash: bool, - records: List[Dict[str, int]], - expected_calls: List[any], - ): - self._block_version(is_signed=True) - mlbf = MLBF.generate_from_db(self.generation_time) + def test_skip_cleanup_when_no_filters_or_config_keys(self): + MLBF.generate_from_db(self.generation_time) + self.mocks['records'].return_value = [ + self._attachment(0, 'bloomfilter-base', self.generation_time), + self._stash(1, self.generation_time - 1), + ] - create_filter = len(filter_list) > 0 - - if create_stash: - mlbf.generate_and_write_stash() - - for block_type, base_id in filter_list.items(): - if create_filter: - mlbf.generate_and_write_filter(block_type) - # We currently write to the old singular config key for hard blocks - # to preserve backward compatibility. - # In https://github.com/mozilla/addons/issues/15193 - # we can remove this and start writing to the new plural key. - set_config( - MLBF_BASE_ID_CONFIG_KEY(block_type, compat=True), - base_id, - json_value=True, - ) + upload_filter(self.generation_time, filter_list=[]) + + assert get_config(MLBF_BASE_ID_CONFIG_KEY(BlockType.BLOCKED, compat=True)) == None + assert not self.mocks['delete_record'].called - self.mocks['records'].return_value = records - upload_filter.delay( + set_config( + MLBF_BASE_ID_CONFIG_KEY(BlockType.BLOCKED, compat=True), self.generation_time, - filter_list=[block_type.name for block_type in filter_list], - create_stash=create_stash, + json_value=True, ) + upload_filter(self.generation_time, filter_list=[]) - assert self.mocks['delete_record'].call_args_list == expected_calls + # We should delete the record older than the config key + assert self.mocks['delete_record'].call_args_list == [mock.call(1)] - if len(filter_list.values()) > 0: - self.mocks['cleanup_old_files.delay'].assert_called_with( - base_filter_id=min(filter_list.values()) - ) - self.mocks['statsd.incr'].assert_called_with( - 'blocklist.tasks.upload_filter.reset_collection' - ) + def test_cleanup_records_with_matching_attachment(self): + mlbf = MLBF.generate_from_db(self.generation_time) + mlbf.generate_and_write_filter(BlockType.BLOCKED) - def test_skip_cleanup_when_no_filters(self): - self._test_cleanup_old_records( - filter_list={}, - records=[{'id': '0', 'generation_time': self.generation_time}], - expected_calls=[], - create_stash=False, + self.mocks['records'].return_value = [ + self._attachment(0, 'bloomfilter-base', self.generation_time - 1), + self._attachment(1, 'softblocks-bloomfilter-base', self.generation_time), + ] + + upload_filter(self.generation_time, filter_list=[BlockType.BLOCKED.name]) + + # Delete the matching block filter record + assert self.mocks['delete_record'].call_args_list == [mock.call(0)] + + # Don't delete the soft blocked filter record because the switch is disabled + upload_filter( + self.generation_time, + filter_list=[BlockType.SOFT_BLOCKED.name], ) + assert mock.call(1) not in self.mocks['delete_record'].call_args_list + + with override_switch('enable-soft-blocking', active=True): + upload_filter( + self.generation_time, + filter_list=[BlockType.SOFT_BLOCKED.name], + ) + assert mock.call(1) in self.mocks['delete_record'].call_args_list - def test_cleanup_old_records(self): + def test_cleanup_records_older_than_oldest_base_filter_id(self): """ - Clean up 0 because it's the only record matching the uploaded filters - attachment_type or is older than generation_time. + Delete records older than the oldest active base filter id """ - self._test_cleanup_old_records( - filter_list={ - BlockType.BLOCKED: self.generation_time, - }, - records=[ - self._attachment(0, 'bloomfilter-base', self.generation_time - 1), - self._attachment( - 1, 'softblocks-bloomfilter-base', self.generation_time - ), - self._stash(2, self.generation_time + 1), - ], - expected_calls=[mock.call(0)], - create_stash=False, + t0 = self.generation_time - 3 + t1 = self.generation_time - 2 + t2 = self.generation_time - 1 + t3 = self.generation_time + + mlbf = MLBF.generate_from_db(t3) + mlbf.generate_and_write_filter(BlockType.BLOCKED) + + # Return existing records for all times before t3 + self.mocks['records'].return_value = [ + self._stash(0, t0), + self._stash(1, t1), + self._stash(2, t2), + ] + + # Delete records older than the oldest config key + # which is t2 + set_config( + MLBF_BASE_ID_CONFIG_KEY(BlockType.BLOCKED, compat=True), + t2, + json_value=True, ) + upload_filter(self.generation_time, filter_list=[]) + assert self.mocks['delete_record'].call_args_list == [mock.call(0), mock.call(1)] + self.mocks['delete_record'].reset_mock() - def test_cleanup_oldest_stash_records(self): - """ - The oldest base id time is (self.generation_time - 3) - Delete 0 as matching attachment - Delete 1 as older than base id - """ - self._test_cleanup_old_records( - filter_list={ - BlockType.BLOCKED: self.generation_time - 3, - }, - records=[ - # Deleted, matching attachment - self._attachment(0, 'bloomfilter-base', self.generation_time - 5), - self._stash( - 1, self.generation_time - 4 - ), # deleted older than oldest filter - # Not deleted, not matching attachment - self._attachment( - 1, 'softblocks-bloomfilter-base', self.generation_time - 3 - ), - # Both Not delted, not older than oldest filter - self._stash(1, self.generation_time - 2), - self._stash(2, self.generation_time - 1), - ], - expected_calls=[mock.call(0), mock.call(1)], - create_stash=False, + + # Delete all records older than the new active filter + # which is t3 + upload_filter(t3, filter_list=[BlockType.BLOCKED.name]) + assert self.mocks['delete_record'].call_args_list == [ + mock.call(0), + mock.call(1), + mock.call(2), + ] + self.mocks['delete_record'].reset_mock() + + # Create an older soft block base filter ID, + # but ignore it because the switch is disabled + set_config( + MLBF_BASE_ID_CONFIG_KEY(BlockType.SOFT_BLOCKED), + t1, + json_value=True, ) + upload_filter(self.generation_time, filter_list=[]) + assert self.mocks['delete_record'].call_args_list == [ + mock.call(0), + mock.call(1), + ] + self.mocks['delete_record'].reset_mock() + + # If the switch is enabled, consider the soft blocked filter + # and only delete records older than the oldest active base id + # which is now t1 + with override_switch('enable-soft-blocking', active=True): + upload_filter(self.generation_time, filter_list=[]) + assert self.mocks['delete_record'].call_args_list == [ + mock.call(0), + ] def test_cleanup_old_stash_records_when_uploading_new_filter(self): """ @@ -380,7 +397,7 @@ def test_cleanup_old_stash_records_when_uploading_new_filter(self): json_value=True, ) - upload_filter.delay(self.generation_time, filter_list=[BlockType.BLOCKED.name]) + upload_filter(self.generation_time, filter_list=[BlockType.BLOCKED.name]) assert self.mocks['delete_record'].call_args_list == [ mock.call(0), # old attachment is deleted @@ -402,14 +419,14 @@ def test_ignore_soft_blocked_if_switch_is_disabled(self): self._stash(1, t0), ] - upload_filter.delay( + upload_filter( self.generation_time, filter_list=[BlockType.SOFT_BLOCKED.name] ) assert not self.mocks['delete_record'].called with override_switch('enable-soft-blocking', active=True): - upload_filter.delay( + upload_filter( self.generation_time, filter_list=[BlockType.SOFT_BLOCKED.name] ) assert self.mocks['delete_record'].called @@ -420,7 +437,7 @@ def test_create_stashed_filter(self): mlbf = MLBF.generate_from_db(self.generation_time) mlbf.generate_and_write_stash(old_mlbf) - upload_filter.delay(self.generation_time, create_stash=True) + upload_filter(self.generation_time, create_stash=True) assert not self.mocks['delete_record'].called with mlbf.storage.open(mlbf.stash_path, 'rb') as stash_file: @@ -452,19 +469,19 @@ def test_create_stashed_filter(self): def test_raises_when_no_filter_exists(self): with self.assertRaises(FileNotFoundError): - upload_filter.delay( + upload_filter( self.generation_time, filter_list=[BlockType.BLOCKED.name] ) def test_raises_when_no_stash_exists(self): with self.assertRaises(FileNotFoundError): - upload_filter.delay(self.generation_time, create_stash=True) + upload_filter(self.generation_time, create_stash=True) def test_default_is_no_op(self): MLBF.generate_from_db(self.generation_time).generate_and_write_filter( BlockType.BLOCKED ) - upload_filter.delay(self.generation_time) + upload_filter(self.generation_time) assert not self.mocks['delete_record'].called assert not self.mocks['publish_record'].called @@ -499,11 +516,9 @@ def test_complete_session_is_called_after_upload_stash(self): manager.records.return_value = [attachment_record, stash_record] with mock.patch('olympia.blocklist.tasks.RemoteSettings', return_value=manager): - self._test_cleanup_old_records( - filter_list={BlockType.BLOCKED: self.generation_time - 1}, - records=[attachment_record, stash_record], - # We expect to delete the attachment and the stash record - expected_calls=[mock.call(0), mock.call(1)], + upload_filter( + self.generation_time, + filter_list=[BlockType.BLOCKED.name], create_stash=True, ) @@ -535,7 +550,7 @@ def test_config_is_updated_only_after_complete_session(self): self.mocks['complete_session'].side_effect = Exception('Something went wrong') with self.assertRaises(Exception): # noqa: B017 - upload_filter.delay( + upload_filter( self.generation_time, filter_list=[BlockType.BLOCKED.name], create_stash=True,