From 75689b9c68926617fe73a1f705da2e5af14b305b Mon Sep 17 00:00:00 2001 From: Mathieu Pillard Date: Thu, 21 Nov 2024 11:27:36 +0100 Subject: [PATCH 1/3] Change growth threshold algorithm do use percentage points increase Also contains a refactor to make the code easier to test, as well as admin & reviewer tools tweaks to make the consequences of changing that threshold more obvious. If the average hotness in a tier is 0.1, and the growth threshold is set to 50, we used to flag add-ons in that tier with hotness over 0.15 (percentage increase) now we'll flag over 0.51 (percentage points increase). --- src/olympia/addons/tasks.py | 27 ++------ src/olympia/addons/tests/test_tasks.py | 29 ++++---- src/olympia/reviewers/admin.py | 33 +++++++++ src/olympia/reviewers/models.py | 55 ++++++++++++++- .../reviewers/addon_details_box.html | 2 +- src/olympia/reviewers/tests/test_models.py | 68 ++++++++++++++++++- 6 files changed, 173 insertions(+), 41 deletions(-) diff --git a/src/olympia/addons/tasks.py b/src/olympia/addons/tasks.py index 613f2c3f5d00..46d69917ad40 100644 --- a/src/olympia/addons/tasks.py +++ b/src/olympia/addons/tasks.py @@ -5,7 +5,7 @@ from django.core.files.storage import default_storage as storage from django.db import transaction -from django.db.models import Avg, Q, Value +from django.db.models import Q, Value from django.db.models.functions import Collate from elasticsearch_dsl import Search @@ -536,28 +536,13 @@ def flag_high_hotness_according_to_review_tier(): # Need a hotness threshold to be set for the tier. growth_threshold_before_flagging__isnull=False, ) - base_qs = Addon.unfiltered.exclude(status=amo.STATUS_DISABLED).filter( - type=amo.ADDON_EXTENSION - ) + # Build a single queryset with all add-ons we want to flag for each tier. + base_qs = UsageTier.get_base_addons() hotness_per_tier_filters = Q() for usage_tier in usage_tiers: - tier_filters = { - 'average_daily_users__gte': usage_tier.lower_adu_threshold, - 'average_daily_users__lt': usage_tier.upper_adu_threshold, - } - mean_hotness_in_tier = ( - base_qs.filter(**tier_filters) - .aggregate(Avg('hotness', default=0)) - .get('hotness__avg') - ) - if not mean_hotness_in_tier: - continue - hotness_ceiling_before_flagging = mean_hotness_in_tier * ( - 1 + usage_tier.growth_threshold_before_flagging / 100 - ) - hotness_per_tier_filters |= Q( - hotness__gt=hotness_ceiling_before_flagging, **tier_filters - ) + # Note: get_growth_threshold_q_object can return an empty Q() object + # if the average hotness for that tier is exactly 0. + hotness_per_tier_filters |= usage_tier.get_growth_threshold_q_object() if not hotness_per_tier_filters: return qs = base_qs.filter(hotness_per_tier_filters) diff --git a/src/olympia/addons/tests/test_tasks.py b/src/olympia/addons/tests/test_tasks.py index 43029a47da14..e6991d89f8af 100644 --- a/src/olympia/addons/tests/test_tasks.py +++ b/src/olympia/addons/tests/test_tasks.py @@ -256,7 +256,7 @@ def test_flag_high_hotness_according_to_review_tier(): name='D tier (below minimum usage for hotness)', lower_adu_threshold=0, upper_adu_threshold=100, - growth_threshold_before_flagging=0.1, + growth_threshold_before_flagging=1, ) UsageTier.objects.create( name='C tier (no growth threshold)', @@ -267,13 +267,13 @@ def test_flag_high_hotness_according_to_review_tier(): name='B tier', lower_adu_threshold=200, upper_adu_threshold=250, - growth_threshold_before_flagging=35, + growth_threshold_before_flagging=11, ) UsageTier.objects.create( name='A tier', lower_adu_threshold=250, upper_adu_threshold=1000, - growth_threshold_before_flagging=20, + growth_threshold_before_flagging=7, ) UsageTier.objects.create( name='S tier (no upper threshold)', @@ -332,14 +332,14 @@ def test_flag_high_hotness_according_to_review_tier(): ) flagged = [ - # B tier average hotness should be 0.325, with a threshold of 35, so - # Add-ons with a hotness over 0.43875000000000003 should be flagged. + # B tier average hotness should be 0.32, with a threshold of 11, so + # Add-ons with a hotness over 0.43 should be flagged. addon_factory(name='B tier', average_daily_users=200, hotness=0.44), - # A tier average hotness should be 0.375, with a threshold of 20, so - # Add-ons with a hotness over 0.44999999999999996 should be flagged. - addon_factory(name='A tier', average_daily_users=250, hotness=0.45), + # A tier average hotness should be 0.3755, with a threshold of 7, so + # Add-ons with a hotness over 0.4455 should be flagged. + addon_factory(name='A tier', average_daily_users=250, hotness=0.451), addon_factory( - name='A tier with inactive flags', average_daily_users=250, hotness=0.45 + name='A tier with inactive flags', average_daily_users=250, hotness=0.451 ), ] @@ -401,7 +401,7 @@ def test_flag_high_hotness_according_to_review_tier_threshold_check(): ) # Average hotness should be 0.5666666666666667, # growth_threshold_before_flagging for that tier is 10 so we should flag - # add-ons with hotness above 0.6233333333333334, meaning no add-ons should + # add-ons with hotness above 0.6666666666666667, meaning no add-ons should # be flagged. addon_factory(average_daily_users=251, hotness=0.55) addon_factory(average_daily_users=251, hotness=0.55) @@ -412,11 +412,10 @@ def test_flag_high_hotness_according_to_review_tier_threshold_check(): assert NeedsHumanReview.objects.count() == 0 - # Average hotness should now be 0.6, - # growth_threshold_before_flagging for that tier is 10 so we should flag - # add-ons with hotness above 0.66, meaning that add-on should - # be flagged. - addon.update(hotness=0.7) + # Average hotness should now be 0.65, growth_threshold_before_flagging for + # that tier is 10 so we should flag add-ons with hotness above 0.75, + # meaning that this add-on should be flagged. + addon.update(hotness=0.85) flag_high_hotness_according_to_review_tier() assert NeedsHumanReview.objects.count() == 1 diff --git a/src/olympia/reviewers/admin.py b/src/olympia/reviewers/admin.py index d539ac7c4d2d..e1c0a1a94eab 100644 --- a/src/olympia/reviewers/admin.py +++ b/src/olympia/reviewers/admin.py @@ -63,8 +63,41 @@ class UsageTierAdmin(AMOModelAdmin): 'lower_adu_threshold', 'upper_adu_threshold', 'growth_threshold_before_flagging', + 'computed_growth_threshold_before_flagging', + 'computed_number_of_addons_that_would_be_flagged_for_growth', 'abuse_reports_ratio_threshold_before_flagging', ) + readonly_fields = ( + 'computed_growth_threshold_before_flagging', + 'computed_number_of_addons_that_would_be_flagged_for_growth', + ) + + def computed_growth_threshold_before_flagging(self, obj): + return obj.get_growth_threshold() + + def computed_number_of_addons_that_would_be_flagged_for_growth(self, obj): + return ( + UsageTier.get_base_addons() + .filter(obj.get_growth_threshold_q_object()) + .count() + ) + + def get_form(self, request, obj=None, **kwargs): + if obj: + help_texts = { + 'computed_growth_threshold_before_flagging': ( + 'Actual growth threshold above which we would flag add-ons in that ' + 'tier, as computed using the percentage defined above and the ' + 'currrent average growth of add-ons (currently {}) in that tier.' + ).format(obj.average_growth), + 'computed_number_of_addons_that_would_be_flagged_for_growth': ( + 'Number of add-ons that would be flagged for growth using the ' + 'percentage defined above, if the task ran now with the current ' + 'add-on growth values.' + ), + } + kwargs.update({'help_texts': help_texts}) + return super().get_form(request, obj, **kwargs) admin.site.register(UsageTier, UsageTierAdmin) diff --git a/src/olympia/reviewers/models.py b/src/olympia/reviewers/models.py index ad3e3c2d11b1..968e9c66c270 100644 --- a/src/olympia/reviewers/models.py +++ b/src/olympia/reviewers/models.py @@ -4,10 +4,11 @@ from django.conf import settings from django.core.cache import cache from django.db import models -from django.db.models import Q +from django.db.models import Avg, Q from django.dispatch import receiver from django.template import loader from django.urls import reverse +from django.utils.functional import cached_property from extended_choices import Choices @@ -780,8 +781,10 @@ class UsageTier(ModelBase): blank=True, default=None, null=True, - help_text='Usage growth percentage threshold before we start automatically ' - 'flagging the add-on for human review.', + help_text='Percentage point increase in growth over the average in that tier ' + 'before we start automatically flagging the add-on for human review. For ' + 'example, if set to 50 and the average hotness in that tier is 0.01, ' + 'add-ons over 0.51 hotness get flagged.', ) abuse_reports_ratio_threshold_before_flagging = models.IntegerField( blank=True, @@ -798,6 +801,52 @@ class Meta: def __str__(self): return self.name + @classmethod + def get_base_addons(cls): + """Return base queryset of add-ons we consider when we look at growth + thresholds. + + This is a classmethod, it is not specific to a particular tier.""" + return Addon.unfiltered.exclude(status=amo.STATUS_DISABLED).filter( + type=amo.ADDON_EXTENSION + ) + + def get_tier_boundaries(self): + """Return boundaries used to filter add-ons for that tier instance.""" + return { + 'average_daily_users__gte': self.lower_adu_threshold, + 'average_daily_users__lt': self.upper_adu_threshold, + } + + @cached_property + def average_growth(self): + return ( + self.get_base_addons() + .filter(**self.get_tier_boundaries()) + .aggregate(Avg('hotness', default=0)) + .get('hotness__avg') + ) + + def get_growth_threshold(self): + """Return the growth threshold for that tier, or None. + + The value is computed from the average growth (hotness) of add-ons in + that tier plus the growth_threshold_before_flagging converted to + decimal.""" + return self.average_growth + self.growth_threshold_before_flagging / 100 + + def get_growth_threshold_q_object(self): + """Return Q object containing filters to apply to find add-ons over the + computed growth threshold for that tier.""" + hotness_ceiling_before_flagging = self.get_growth_threshold() + if hotness_ceiling_before_flagging is not None: + return Q( + hotness__gt=hotness_ceiling_before_flagging, + **self.get_tier_boundaries(), + ) + else: + return Q() + class NeedsHumanReview(ModelBase): """Model holding information about why a version was flagged for human diff --git a/src/olympia/reviewers/templates/reviewers/addon_details_box.html b/src/olympia/reviewers/templates/reviewers/addon_details_box.html index c7e5c0242aed..87cee174034e 100644 --- a/src/olympia/reviewers/templates/reviewers/addon_details_box.html +++ b/src/olympia/reviewers/templates/reviewers/addon_details_box.html @@ -131,7 +131,7 @@

{{ addon_name }}

Average Daily Users {{ - addon.average_daily_users|numberfmt }} + addon.average_daily_users|numberfmt }} ({% if addon.hotness >= 0 %}+{% endif %}{{ addon.hotness * 100}}% from last week) {% endif %} diff --git a/src/olympia/reviewers/tests/test_models.py b/src/olympia/reviewers/tests/test_models.py index 802563abb492..ab5f51f1e6c5 100644 --- a/src/olympia/reviewers/tests/test_models.py +++ b/src/olympia/reviewers/tests/test_models.py @@ -12,7 +12,12 @@ from olympia.abuse.models import AbuseReport from olympia.access.models import Group, GroupUser from olympia.activity.models import ActivityLog -from olympia.addons.models import AddonApprovalsCounter, AddonReviewerFlags, AddonUser +from olympia.addons.models import ( + Addon, + AddonApprovalsCounter, + AddonReviewerFlags, + AddonUser, +) from olympia.amo.tests import ( TestCase, addon_factory, @@ -38,6 +43,7 @@ NeedsHumanReview, ReviewActionReason, ReviewerSubscription, + UsageTier, get_flags, send_notifications, set_reviewing_cache, @@ -1860,3 +1866,63 @@ def test_save_existing_does_not_record_an_activity(self): flagged.reason = NeedsHumanReview.REASONS.DEVELOPER_REPLY flagged.save() assert ActivityLog.objects.count() == 0 + + +class UsageTierTests(TestCase): + def setUp(self): + self.tier = UsageTier.objects.create( + lower_adu_threshold=100, + upper_adu_threshold=1000, + growth_threshold_before_flagging=50, + ) + + def test_get_base_addons(self): + addon_factory(status=amo.STATUS_DISABLED) + addon_factory(type=amo.ADDON_STATICTHEME) + expected = {addon_factory()} + assert set(self.tier.get_base_addons()) == expected + + def test_get_tier_boundaries(self): + assert self.tier.get_tier_boundaries() == { + 'average_daily_users__gte': 100, + 'average_daily_users__lt': 1000, + } + + def test_average_growth(self): + addon_factory(hotness=0.5, average_daily_users=1000) # Different tier + addon_factory( + hotness=0.5, average_daily_users=999, status=amo.STATUS_DISABLED + ) # Right tier but disabled + addon_factory( + hotness=0.5, average_daily_users=999, type=amo.ADDON_STATICTHEME + ) # Right tier but not an extension + addon_factory(hotness=0.1, average_daily_users=100) + addon_factory(hotness=0.2, average_daily_users=999) + assert round(self.tier.average_growth, ndigits=2) == 0.15 + + # Value is cached on the instance + addon_factory(hotness=0.3, average_daily_users=500) + assert round(self.tier.average_growth, ndigits=2) == 0.15 + del self.tier.average_growth + assert round(self.tier.average_growth, ndigits=2) == 0.2 + + def test_get_growth_threshold(self): + assert round(self.tier.get_growth_threshold(), ndigits=2) == 0.5 + addon_factory(hotness=0.01, average_daily_users=100) + addon_factory(hotness=0.01, average_daily_users=999) + del self.tier.average_growth + assert round(self.tier.get_growth_threshold(), ndigits=2) == 0.51 + + addon_factory(hotness=0.78, average_daily_users=999) + del self.tier.average_growth + assert round(self.tier.get_growth_threshold(), ndigits=2) == 0.77 + + def test_get_growth_threshold_q_object(self): + addon_factory(hotness=0.01, average_daily_users=100) + addon_factory(hotness=0.01, average_daily_users=999) + expected = [addon_factory(hotness=0.78, average_daily_users=999)] + + assert ( + list(Addon.objects.filter(self.tier.get_growth_threshold_q_object())) + == expected + ) From b0a647158306cc65637d80ad5e73548bd91e428e Mon Sep 17 00:00:00 2001 From: Mathieu Pillard Date: Fri, 22 Nov 2024 12:15:26 +0100 Subject: [PATCH 2/3] Add test with negative average hotness --- src/olympia/addons/tests/test_tasks.py | 30 ++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/src/olympia/addons/tests/test_tasks.py b/src/olympia/addons/tests/test_tasks.py index e6991d89f8af..ed56f204edde 100644 --- a/src/olympia/addons/tests/test_tasks.py +++ b/src/olympia/addons/tests/test_tasks.py @@ -427,6 +427,36 @@ def test_flag_high_hotness_according_to_review_tier_threshold_check(): ) +@freeze_time('2023-05-15 11:00') +@pytest.mark.django_db +def test_flag_high_hotness_according_to_review_tier_threshold_check_negative(): + user_factory(pk=settings.TASK_USER_ID) + UsageTier.objects.create( + name='A tier', + lower_adu_threshold=250, + upper_adu_threshold=1000, + growth_threshold_before_flagging=10, + ) + # Average hotness should be negative, -0.1. + # growth_threshold_before_flagging for that tier is 10 so we should flag + # add-ons with hotness above 0 + + addon_factory(average_daily_users=251, hotness=-0.1) + addon_factory(average_daily_users=251, hotness=-0.2) + addon = addon_factory(average_daily_users=251, hotness=0.0) + File.objects.update(is_signed=True) + + flag_high_hotness_according_to_review_tier() + + assert NeedsHumanReview.objects.count() == 1 + assert ( + addon.current_version.needshumanreview_set.filter( + reason=NeedsHumanReview.REASONS.HOTNESS_THRESHOLD, is_active=True + ).count() + == 1 + ) + + @pytest.mark.django_db def test_flag_high_hotness_according_to_review_tier_no_tiers_defined(): user_factory(pk=settings.TASK_USER_ID) From 3e0a058768e81ea1ad6f33b691ccbb6b0d8817e4 Mon Sep 17 00:00:00 2001 From: Mathieu Pillard Date: Fri, 22 Nov 2024 13:02:11 +0100 Subject: [PATCH 3/3] Add assert num queries --- src/olympia/addons/tests/test_tasks.py | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/src/olympia/addons/tests/test_tasks.py b/src/olympia/addons/tests/test_tasks.py index ed56f204edde..3b3d278991d8 100644 --- a/src/olympia/addons/tests/test_tasks.py +++ b/src/olympia/addons/tests/test_tasks.py @@ -388,11 +388,19 @@ def test_flag_high_hotness_according_to_review_tier(): @freeze_time('2023-05-15 11:00') @pytest.mark.django_db -def test_flag_high_hotness_according_to_review_tier_threshold_check(): +def test_flag_high_hotness_according_to_review_tier_threshold_check( + django_assert_num_queries, +): # Simplified version of the test above with fewer extra non-flagged add-ons # ensuring that we flag add-ons above the threshold, and don't flag add-ons # under. user_factory(pk=settings.TASK_USER_ID) + UsageTier.objects.create( + name='B tier', + lower_adu_threshold=1000, + upper_adu_threshold=100000, + growth_threshold_before_flagging=0, + ) UsageTier.objects.create( name='A tier', lower_adu_threshold=250, @@ -408,7 +416,13 @@ def test_flag_high_hotness_according_to_review_tier_threshold_check(): addon = addon_factory(average_daily_users=251, hotness=0.6) File.objects.update(is_signed=True) - flag_high_hotness_according_to_review_tier() + with django_assert_num_queries(4): + # 4 queries: + # - get all tiers + # - get average hotness for tier A + # - get average hotness for tier B + # - find all add-ons to flag + flag_high_hotness_according_to_review_tier() assert NeedsHumanReview.objects.count() == 0