From 24781807e3d164010b1ce226f8864a293bb1b637 Mon Sep 17 00:00:00 2001 From: Lyn Nagara Date: Tue, 10 Sep 2019 15:17:35 -0700 Subject: [PATCH] feat: Remove sampling --- src/sentry/conf/server.py | 17 --- src/sentry/event_manager.py | 109 +++++------------- .../event_manager/test_event_manager.py | 22 ---- tests/snuba/eventstream/test_eventstream.py | 2 - 4 files changed, 28 insertions(+), 122 deletions(-) diff --git a/src/sentry/conf/server.py b/src/sentry/conf/server.py index 1021ff875267af..65d17e96735ac5 100644 --- a/src/sentry/conf/server.py +++ b/src/sentry/conf/server.py @@ -935,23 +935,6 @@ def create_partitioned_queues(name): # 'scheduled-name': 'monitor_guid', } -# Only store a portion of all messages per unique group. -SENTRY_SAMPLE_DATA = False - -# The following values control the sampling rates -SENTRY_SAMPLE_RATES = ( - # up until N events, store 1 in M - (50, 1), - (1000, 2), - (10000, 10), - (100000, 50), - (1000000, 300), - (10000000, 2000), -) -SENTRY_MAX_SAMPLE_RATE = 10000 -SENTRY_SAMPLE_TIMES = ((3600, 1), (360, 10), (60, 60)) -SENTRY_MAX_SAMPLE_TIME = 10000 - # Web Service SENTRY_WEB_HOST = "localhost" SENTRY_WEB_PORT = 9000 diff --git a/src/sentry/event_manager.py b/src/sentry/event_manager.py index 0c57b670f0c98d..a8dd506be5e60d 100644 --- a/src/sentry/event_manager.py +++ b/src/sentry/event_manager.py @@ -15,7 +15,7 @@ from django.utils import timezone from django.utils.encoding import force_text -from sentry import buffer, eventtypes, eventstream, features, tagstore, tsdb +from sentry import buffer, eventtypes, eventstream, tagstore, tsdb from sentry.constants import ( DEFAULT_STORE_NORMALIZER_ARGS, LOG_LEVELS, @@ -102,22 +102,6 @@ def get_tag(data, key): return v -def count_limit(count): - # TODO: could we do something like num_to_store = max(math.sqrt(100*count)+59, 200) ? - # ~ 150 * ((log(n) - 1.5) ^ 2 - 0.25) - for amount, sample_rate in settings.SENTRY_SAMPLE_RATES: - if count <= amount: - return sample_rate - return settings.SENTRY_MAX_SAMPLE_RATE - - -def time_limit(silence): # ~ 3600 per hour - for amount, sample_rate in settings.SENTRY_SAMPLE_TIMES: - if silence >= amount: - return sample_rate - return settings.SENTRY_MAX_SAMPLE_TIME - - def validate_and_set_timestamp(data, timestamp): """ Helper function for event processors/enhancers to avoid setting broken timestamps. @@ -156,26 +140,6 @@ def parse_client_as_sdk(value): return {"name": name, "version": version} -if not settings.SENTRY_SAMPLE_DATA: - - def should_sample(current_datetime, last_seen, times_seen): - return False - - -else: - - def should_sample(current_datetime, last_seen, times_seen): - silence = current_datetime - last_seen - - if times_seen % count_limit(times_seen) == 0: - return False - - if times_seen % time_limit(silence) == 0: - return False - - return True - - def plugin_is_regression(group, event): project = event.project for plugin in plugins.for_project(project): @@ -689,7 +653,7 @@ def save(self, project_id, raw=False, assume_normalized=False): kwargs["first_release"] = release try: - group, is_new, is_regression, is_sample = self._save_aggregate( + group, is_new, is_regression = self._save_aggregate( event=event, hashes=hashes, release=release, **kwargs ) except HashDiscarded: @@ -708,7 +672,6 @@ def save(self, project_id, raw=False, assume_normalized=False): group = None is_new = False is_regression = False - is_sample = False event_saved.send_robust(project=project, event_size=event.size, sender=EventManager) # store a reference to the group id to guarantee validation of isolation @@ -779,33 +742,32 @@ def save(self, project_id, raw=False, assume_normalized=False): group=group, environment=environment ) - # save the event unless its been sampled - if not is_sample: - try: - with transaction.atomic(using=router.db_for_write(Event)): - event.save() - except IntegrityError: - logger.info( - "duplicate.found", - exc_info=True, - extra={ - "event_uuid": event_id, - "project_id": project.id, - "group_id": group.id if group else None, - "model": Event.__name__, - }, - ) - return event - - tagstore.delay_index_event_tags( - organization_id=project.organization_id, - project_id=project.id, - group_id=group.id if group else None, - environment_id=environment.id, - event_id=event.id, - tags=event.tags, - date_added=event.datetime, + # save the event + try: + with transaction.atomic(using=router.db_for_write(Event)): + event.save() + except IntegrityError: + logger.info( + "duplicate.found", + exc_info=True, + extra={ + "event_uuid": event_id, + "project_id": project.id, + "group_id": group.id if group else None, + "model": Event.__name__, + }, ) + return event + + tagstore.delay_index_event_tags( + organization_id=project.organization_id, + project_id=project.id, + group_id=group.id if group else None, + environment_id=environment.id, + event_id=event.id, + tags=event.tags, + date_added=event.datetime, + ) if event_user: counters = [ @@ -855,7 +817,6 @@ def save(self, project_id, raw=False, assume_normalized=False): group=group, event=event, is_new=is_new, - is_sample=is_sample, is_regression=is_regression, is_new_group_environment=is_new_group_environment, primary_hash=hashes[0], @@ -994,14 +955,6 @@ def _save_aggregate(self, event, hashes, release, **kwargs): if group_is_new and len(new_hashes) == len(all_hashes): is_new = True - # XXX(dcramer): it's important this gets called **before** the aggregate - # is processed as otherwise values like last_seen will get mutated - can_sample = features.has("projects:sample-events", project=project) and should_sample( - event.data.get("received") or float(event.datetime.strftime("%s")), - group.data.get("last_received") or float(group.last_seen.strftime("%s")), - group.times_seen, - ) - if not is_new: is_regression = self._process_existing_aggregate( group=group, event=event, data=kwargs, release=release @@ -1009,13 +962,7 @@ def _save_aggregate(self, event, hashes, release, **kwargs): else: is_regression = False - # Determine if we've sampled enough data to store this event - if is_new or is_regression: - is_sample = False - else: - is_sample = can_sample - - return group, is_new, is_regression, is_sample + return group, is_new, is_regression def _handle_regression(self, group, event, release): if not group.is_resolved(): diff --git a/tests/sentry/event_manager/test_event_manager.py b/tests/sentry/event_manager/test_event_manager.py index 25fb49e3b5a28d..82c3600fe6a63d 100644 --- a/tests/sentry/event_manager/test_event_manager.py +++ b/tests/sentry/event_manager/test_event_manager.py @@ -74,26 +74,6 @@ def test_similar_message_prefix_doesnt_group(self): assert event1.group_id != event2.group_id - @mock.patch("sentry.event_manager.should_sample") - def test_does_not_save_event_when_sampled(self, should_sample): - with self.feature("projects:sample-events"): - should_sample.return_value = True - event_id = "a" * 32 - - manager = EventManager(make_event(event_id=event_id)) - manager.save(1) - - # This is a brand new event, so it is actually saved. - assert Event.objects.filter(event_id=event_id).exists() - - event_id = "b" * 32 - - manager = EventManager(make_event(event_id=event_id)) - manager.save(1) - - # This second is a dupe, so should be sampled - assert not Event.objects.filter(event_id=event_id).exists() - def test_ephemral_interfaces_removed_on_save(self): manager = EventManager(make_event(platform="python")) manager.normalize() @@ -794,7 +774,6 @@ def save_event(): group=event.group, event=event, is_new=True, - is_sample=False, is_regression=False, is_new_group_environment=True, primary_hash="acbd18db4cc2f85cedef654fccc4a4d8", @@ -809,7 +788,6 @@ def save_event(): group=event.group, event=event, is_new=False, - is_sample=False, is_regression=None, # XXX: wut is_new_group_environment=False, primary_hash="acbd18db4cc2f85cedef654fccc4a4d8", diff --git a/tests/snuba/eventstream/test_eventstream.py b/tests/snuba/eventstream/test_eventstream.py index 799873c61635da..77c26608a4a1e5 100644 --- a/tests/snuba/eventstream/test_eventstream.py +++ b/tests/snuba/eventstream/test_eventstream.py @@ -77,7 +77,6 @@ def _get_event_count(): "is_new_group_environment": True, "is_new": True, "is_regression": False, - "is_sample": False, "primary_hash": "acbd18db4cc2f85cedef654fccc4a4d8", "skip_consume": False, } @@ -101,7 +100,6 @@ def test_issueless(self, mock_delay_index_event_tags, mock_eventstream_insert): "is_new_group_environment": True, "is_new": True, "is_regression": False, - "is_sample": False, "primary_hash": "acbd18db4cc2f85cedef654fccc4a4d8", "skip_consume": False, }