From c7d3f5c43426275d6685c36f43bc44267acd7535 Mon Sep 17 00:00:00 2001 From: Joris Bayer Date: Wed, 5 Oct 2022 15:59:43 +0200 Subject: [PATCH 01/34] ref: Pass metrics buckets through processor --- relay-server/src/actors/processor.rs | 59 +++++++++++++++++++++++- relay-server/src/actors/project_cache.rs | 14 ++++++ 2 files changed, 71 insertions(+), 2 deletions(-) diff --git a/relay-server/src/actors/processor.rs b/relay-server/src/actors/processor.rs index 51442be6108..4ba46ca354f 100644 --- a/relay-server/src/actors/processor.rs +++ b/relay-server/src/actors/processor.rs @@ -32,13 +32,15 @@ use relay_general::store::{ClockDriftProcessor, LightNormalizationConfig}; use relay_general::types::{Annotated, Array, FromValue, Object, ProcessingAction, Value}; use relay_log::LogError; use relay_metrics::{Bucket, InsertMetrics, MergeBuckets, Metric}; -use relay_quotas::{DataCategory, RateLimits, ReasonCode}; +use relay_quotas::{DataCategory, RateLimits, ReasonCode, Scoping}; use relay_redis::RedisPool; use relay_sampling::{DynamicSamplingContext, RuleId}; use relay_statsd::metric; use relay_system::{Addr, FromMessage, NoResponse, Service}; -use crate::actors::envelopes::{EnvelopeManager, SendEnvelope, SendEnvelopeError, SubmitEnvelope}; +use crate::actors::envelopes::{ + EnvelopeManager, SendEnvelope, SendEnvelopeError, SendMetrics, SubmitEnvelope, +}; use crate::actors::outcome::{DiscardReason, Outcome, TrackOutcome}; use crate::actors::project::{Feature, ProjectState}; use crate::actors::project_cache::ProjectCache; @@ -527,12 +529,34 @@ impl EncodeEnvelope { } } +/// Enforce rate limits on metrics buckets, and forward them to the EnvelopeManager if there +/// is still enough quota. +/// +/// NOTE: This struct has the same contents as [`super::envelopes::SendMetrics`], but +/// is defined as a separate entity to clarify its purpose. +/// +#[cfg(feature = "processing")] +#[derive(Debug)] +pub struct RateLimitMetricsBuckets { + /// The pre-aggregated metric buckets. + pub buckets: Vec, + /// Scoping information for the metrics. + pub scoping: Scoping, + /// The key of the logical partition to send the metrics to. + pub partition_key: Option, +} + +/// Enforce rate limits on a metrics bucket and forward it to EnvelopeManager +/// to be published. + /// CPU-intensive processing tasks for envelopes. #[derive(Debug)] pub enum EnvelopeProcessor { ProcessEnvelope(Box), ProcessMetrics(Box), EncodeEnvelope(Box), + #[cfg(feature = "processing")] + RateLimitMetricsBuckets(RateLimitMetricsBuckets), // TODO: why are the others Box<_>? } impl EnvelopeProcessor { @@ -567,6 +591,15 @@ impl FromMessage for EnvelopeProcessor { } } +#[cfg(feature = "processing")] +impl FromMessage for EnvelopeProcessor { + type Response = NoResponse; + + fn from_message(message: RateLimitMetricsBuckets, _: ()) -> Self { + Self::RateLimitMetricsBuckets(message) + } +} + /// Service implementing the [`EnvelopeProcessor`] interface. /// /// This service handles messages in a worker pool with configurable concurrency. @@ -2178,6 +2211,24 @@ impl EnvelopeProcessorService { } } + #[cfg(feature = "processing")] + fn handle_rate_limit_metrics_buckets(&self, message: RateLimitMetricsBuckets) { + let RateLimitMetricsBuckets { + buckets, + scoping, + partition_key, + } = message; + + // TODO: Enforce rate limits here. + + // All good, forward to envelope manager. + EnvelopeManager::from_registry().send(SendMetrics { + buckets, + scoping, + partition_key, + }); + } + fn encode_envelope_body( body: Vec, http_encoding: HttpEncoding, @@ -2225,6 +2276,10 @@ impl EnvelopeProcessorService { EnvelopeProcessor::ProcessEnvelope(message) => self.handle_process_envelope(*message), EnvelopeProcessor::ProcessMetrics(message) => self.handle_process_metrics(*message), EnvelopeProcessor::EncodeEnvelope(message) => self.handle_encode_envelope(*message), + #[cfg(feature = "processing")] + EnvelopeProcessor::RateLimitMetricsBuckets(message) => { + self.handle_rate_limit_metrics_buckets(message) + } } } } diff --git a/relay-server/src/actors/project_cache.rs b/relay-server/src/actors/project_cache.rs index 8e3854c81d4..fe978bebc3f 100644 --- a/relay-server/src/actors/project_cache.rs +++ b/relay-server/src/actors/project_cache.rs @@ -25,6 +25,7 @@ use crate::envelope::Envelope; use crate::statsd::{RelayCounters, RelayGauges, RelayHistograms, RelayTimers}; use crate::utils::{self, EnvelopeContext, GarbageDisposal, Response}; +use super::processor::{EnvelopeProcessor, RateLimitMetricsBuckets}; use super::project::ExpiryState; #[cfg(feature = "processing")] @@ -626,6 +627,19 @@ impl Handler for ProjectCache { return; } + // In processing mode, let the Processor rate limit the outgoing metrics bucket. + #[cfg(feature = "processing")] + if self.config.processing_enabled() { + EnvelopeProcessor::from_registry().send(RateLimitMetricsBuckets { + buckets, + scoping, + partition_key, + }); + + return; + } + + // In non-processing mode, send the buckets to the envelope manager directly. EnvelopeManager::from_registry().send(SendMetrics { buckets, scoping, From 90fb99e1e2e8a95857325ececaeb6a641299c731 Mon Sep 17 00:00:00 2001 From: Joris Bayer Date: Wed, 5 Oct 2022 17:04:36 +0200 Subject: [PATCH 02/34] wip --- relay-server/src/actors/processor.rs | 76 +++++++++++++++++-- relay-server/src/actors/project_cache.rs | 14 +++- tests/integration/test_store.py | 93 ++++++++++++++++++++++++ 3 files changed, 175 insertions(+), 8 deletions(-) diff --git a/relay-server/src/actors/processor.rs b/relay-server/src/actors/processor.rs index 4ba46ca354f..81294a8d7bb 100644 --- a/relay-server/src/actors/processor.rs +++ b/relay-server/src/actors/processor.rs @@ -32,7 +32,7 @@ use relay_general::store::{ClockDriftProcessor, LightNormalizationConfig}; use relay_general::types::{Annotated, Array, FromValue, Object, ProcessingAction, Value}; use relay_log::LogError; use relay_metrics::{Bucket, InsertMetrics, MergeBuckets, Metric}; -use relay_quotas::{DataCategory, RateLimits, ReasonCode, Scoping}; +use relay_quotas::{DataCategory, Quota, RateLimits, ReasonCode, Scoping}; use relay_redis::RedisPool; use relay_sampling::{DynamicSamplingContext, RuleId}; use relay_statsd::metric; @@ -532,12 +532,11 @@ impl EncodeEnvelope { /// Enforce rate limits on metrics buckets, and forward them to the EnvelopeManager if there /// is still enough quota. /// -/// NOTE: This struct has the same contents as [`super::envelopes::SendMetrics`], but -/// is defined as a separate entity to clarify its purpose. -/// #[cfg(feature = "processing")] #[derive(Debug)] pub struct RateLimitMetricsBuckets { + /// TODO: docs + pub quotas: Vec, /// The pre-aggregated metric buckets. pub buckets: Vec, /// Scoping information for the metrics. @@ -2213,13 +2212,78 @@ impl EnvelopeProcessorService { #[cfg(feature = "processing")] fn handle_rate_limit_metrics_buckets(&self, message: RateLimitMetricsBuckets) { + use relay_metrics::{MetricNamespace, MetricResourceIdentifier}; + use relay_quotas::ItemScoping; + let RateLimitMetricsBuckets { - buckets, + quotas: quota, + mut buckets, scoping, partition_key, } = message; - // TODO: Enforce rate limits here. + let rate_limiter = match self.rate_limiter.as_ref() { + Some(rate_limiter) => rate_limiter, + None => { + relay_log::error!("handle_rate_limit_metrics_buckets called without rate limiter"); + return; + } + }; + + let item_scoping = ItemScoping { + category: DataCategory::TransactionProcessed, + scoping: &scoping, + }; + + // For every metrics bucket, one of three cases applies + // 1. The metric is not in the transaction namespace. + // TODO: Should we rate limit sessions as well? + // 2. The metric is in the transaction namespace. + // a. The metric is "duration". This is extracted exactly once for every processed + // transaction, so we use it to count against the quota. + // b. For any other metric in the transaction namespace, we check the limit but we + // do not count against the quota. + buckets.retain(|bucket| { + // TODO: Split this off into `should_keep_bucket`. + // FIXME: Add namespace enum to `Bucket` so we don't have to parse the MRI for every bucket. + let mri = match MetricResourceIdentifier::parse(bucket.name.as_str()) { + Ok(mri) => mri, + Err(_) => { + relay_log::error!("Invalid MRI: {}", bucket.name); + return true; + } + }; + + if !matches!(mri.namespace, MetricNamespace::Transactions) { + return true; + } + + dbg!(mri.name); + let quantity = if mri.name == "duration" { + bucket.value.len() + } else { + 0 + }; + + // If necessary, we could minimize the number of calls to the rate limiter + // by grouping buckets here somehow. But be aware that that would mean we might + // under-accept, because the sum of all buckets might exceed the quota, but not individual ones. + match rate_limiter.is_rate_limited("a, item_scoping, quantity) { + Ok(limits) => { + let is_limited = limits.is_limited(); + if is_limited { + ProjectCache::from_registry() + .do_send(UpdateRateLimits::new(scoping.project_key, limits)); + } + is_limited + } + Err(_) => todo!(), + } + }); + + if buckets.is_empty() { + return; + } // All good, forward to envelope manager. EnvelopeManager::from_registry().send(SendMetrics { diff --git a/relay-server/src/actors/project_cache.rs b/relay-server/src/actors/project_cache.rs index fe978bebc3f..7ae697f0965 100644 --- a/relay-server/src/actors/project_cache.rs +++ b/relay-server/src/actors/project_cache.rs @@ -6,12 +6,12 @@ use actix_web::ResponseError; use failure::Fail; use futures01::{future, Future}; -use relay_common::ProjectKey; +use relay_common::{DataCategory, ProjectKey}; use relay_config::{Config, RelayMode}; use relay_metrics::{ self, AggregateMetricsError, Aggregator, FlushBuckets, InsertMetrics, MergeBuckets, }; -use relay_quotas::RateLimits; +use relay_quotas::{Quota, RateLimits}; use relay_redis::RedisPool; use relay_statsd::metric; @@ -630,7 +630,17 @@ impl Handler for ProjectCache { // In processing mode, let the Processor rate limit the outgoing metrics bucket. #[cfg(feature = "processing")] if self.config.processing_enabled() { + let quotas = project_state + .config + .quotas + .iter() + .filter(|q| q.categories.contains(&DataCategory::TransactionProcessed)) + .map(Quota::clone) + .collect::>(); + + // TODO: bypass if quota is empty? EnvelopeProcessor::from_registry().send(RateLimitMetricsBuckets { + quotas, buckets, scoping, partition_key, diff --git a/tests/integration/test_store.py b/tests/integration/test_store.py index 906f39dd790..fb67003bb79 100644 --- a/tests/integration/test_store.py +++ b/tests/integration/test_store.py @@ -470,6 +470,99 @@ def test_processing_quotas( assert event["logentry"]["formatted"] == f"otherkey{i}" +def test_rate_limit_metrics_buckets( + mini_sentry, + relay_with_processing, + outcomes_consumer, + events_consumer, + transactions_consumer, + event_type, + window, + max_rate_limit, +): + + relay = relay_with_processing({"processing": {"max_rate_limit": max_rate_limit}}) + + project_id = 42 + projectconfig = mini_sentry.add_full_project_config(project_id) + # add another dsn key (we want 2 keys so we can set limits per key) + mini_sentry.add_dsn_key_to_project(project_id) + + # we should have 2 keys (one created with the config and one added above) + public_keys = mini_sentry.get_dsn_public_key_configs(project_id) + + key_id = public_keys[0]["numericId"] + + # Default events are also mapped to "error" by Relay. + category = "error" if event_type == "default" else event_type + + projectconfig["config"]["quotas"] = [ + { + "id": "test_rate_limiting_{}".format(uuid.uuid4().hex), + "scope": "key", + "scopeId": six.text_type(key_id), + "categories": [category], + "limit": 5, + "window": window, + "reasonCode": "get_lost", + } + ] + + if event_type == "transaction": + transform = make_transaction + elif event_type == "error": + transform = make_error + else: + transform = lambda e: e + + for i in range(5): + # send using the first dsn + relay.send_event( + project_id, transform({"message": f"regular{i}"}), dsn_key_idx=0 + ) + + event, _ = events_consumer.get_event() + assert event["logentry"]["formatted"] == f"regular{i}" + + # this one will not get a 429 but still get rate limited (silently) because + # of our caching + relay.send_event(project_id, transform({"message": "some_message"}), dsn_key_idx=0) + + if outcomes_consumer is not None: + outcomes_consumer.assert_rate_limited( + "get_lost", key_id=key_id, categories=[category] + ) + else: + # since we don't wait for the outcome, wait a little for the event to go through + sleep(0.1) + + for _ in range(5): + with pytest.raises(HTTPError) as excinfo: + # Failed: DID NOT RAISE + relay.send_event(project_id, transform({"message": "rate_limited"})) + headers = excinfo.value.response.headers + + retry_after = headers["retry-after"] + assert int(retry_after) <= window + assert int(retry_after) <= max_rate_limit + retry_after2, rest = headers["x-sentry-rate-limits"].split(":", 1) + assert int(retry_after2) == int(retry_after) + assert rest == "%s:key:get_lost" % category + if outcomes_consumer is not None: + outcomes_consumer.assert_rate_limited( + "get_lost", key_id=key_id, categories=[category] + ) + + for i in range(10): + # now send using the second key + relay.send_event( + project_id, transform({"message": f"otherkey{i}"}), dsn_key_idx=1 + ) + event, _ = events_consumer.get_event() + + assert event["logentry"]["formatted"] == f"otherkey{i}" + + def test_events_buffered_before_auth(relay, mini_sentry): evt = threading.Event() From e1a0b22a56a093665eb9b91ef6f5c0b64329e476 Mon Sep 17 00:00:00 2001 From: Joris Bayer Date: Thu, 6 Oct 2022 11:06:54 +0200 Subject: [PATCH 03/34] test: Basic integration test --- relay-server/src/actors/processor.rs | 1 + tests/integration/fixtures/__init__.py | 11 +++ tests/integration/test_store.py | 130 ++++++++++++------------- 3 files changed, 74 insertions(+), 68 deletions(-) diff --git a/relay-server/src/actors/processor.rs b/relay-server/src/actors/processor.rs index 81294a8d7bb..bc430b09efc 100644 --- a/relay-server/src/actors/processor.rs +++ b/relay-server/src/actors/processor.rs @@ -2255,6 +2255,7 @@ impl EnvelopeProcessorService { }; if !matches!(mri.namespace, MetricNamespace::Transactions) { + // TODO: Should we rate limit sessions as well? return true; } diff --git a/tests/integration/fixtures/__init__.py b/tests/integration/fixtures/__init__.py index 5cac16d513e..2dd5ce94c37 100644 --- a/tests/integration/fixtures/__init__.py +++ b/tests/integration/fixtures/__init__.py @@ -224,6 +224,17 @@ def send_metrics(self, project_id, payload, timestamp=None): ) self.send_envelope(project_id, envelope) + def send_metrics_buckets(self, project_id, payload, timestamp=None): + envelope = Envelope() + envelope.add_item( + Item( + payload=PayloadRef(json=payload), + type="metric_buckets", + headers=None if timestamp is None else {"timestamp": timestamp}, + ) + ) + self.send_envelope(project_id, envelope) + def send_security_report( self, project_id, diff --git a/tests/integration/test_store.py b/tests/integration/test_store.py index fb67003bb79..6fd9afd289a 100644 --- a/tests/integration/test_store.py +++ b/tests/integration/test_store.py @@ -1,12 +1,13 @@ import json import os import queue -import datetime +from time import sleep import uuid import six import socket import threading import pytest +from datetime import datetime from requests.exceptions import HTTPError from flask import abort, Response @@ -471,96 +472,89 @@ def test_processing_quotas( def test_rate_limit_metrics_buckets( - mini_sentry, - relay_with_processing, - outcomes_consumer, - events_consumer, - transactions_consumer, - event_type, - window, - max_rate_limit, + mini_sentry, relay_with_processing, metrics_consumer, ): - - relay = relay_with_processing({"processing": {"max_rate_limit": max_rate_limit}}) + bucket_interval = 1 # second + relay = relay_with_processing( + { + "processing": {"max_rate_limit": 2 * 86400}, + "aggregator": { + "bucket_interval": bucket_interval, + "initial_delay": 0, + "debounce_delay": 0, + }, + } + ) + metrics_consumer = metrics_consumer() project_id = 42 projectconfig = mini_sentry.add_full_project_config(project_id) # add another dsn key (we want 2 keys so we can set limits per key) mini_sentry.add_dsn_key_to_project(project_id) - # we should have 2 keys (one created with the config and one added above) public_keys = mini_sentry.get_dsn_public_key_configs(project_id) - key_id = public_keys[0]["numericId"] - # Default events are also mapped to "error" by Relay. - category = "error" if event_type == "default" else event_type - projectconfig["config"]["quotas"] = [ { "id": "test_rate_limiting_{}".format(uuid.uuid4().hex), "scope": "key", "scopeId": six.text_type(key_id), - "categories": [category], + "categories": ["transaction_processed"], "limit": 5, - "window": window, + "window": 86400, "reasonCode": "get_lost", } ] - if event_type == "transaction": - transform = make_transaction - elif event_type == "error": - transform = make_error - else: - transform = lambda e: e - - for i in range(5): - # send using the first dsn - relay.send_event( - project_id, transform({"message": f"regular{i}"}), dsn_key_idx=0 - ) - - event, _ = events_consumer.get_event() - assert event["logentry"]["formatted"] == f"regular{i}" - - # this one will not get a 429 but still get rate limited (silently) because - # of our caching - relay.send_event(project_id, transform({"message": "some_message"}), dsn_key_idx=0) - - if outcomes_consumer is not None: - outcomes_consumer.assert_rate_limited( - "get_lost", key_id=key_id, categories=[category] - ) - else: - # since we don't wait for the outcome, wait a little for the event to go through - sleep(0.1) - - for _ in range(5): - with pytest.raises(HTTPError) as excinfo: - # Failed: DID NOT RAISE - relay.send_event(project_id, transform({"message": "rate_limited"})) - headers = excinfo.value.response.headers + def generate_ticks(): + # Generate a new timestamp for every bucket, so they do not get merged by the aggregator + tick = int(datetime.utcnow().timestamp() // bucket_interval * bucket_interval) + while True: + yield tick + tick += bucket_interval + + tick = generate_ticks() + + def make_bucket(name, type_, values): + return { + "org_id": 1, + "project_id": project_id, + "timestamp": next(tick), + "name": name, + "type": type_, + "value": values, + "width": bucket_interval, + } - retry_after = headers["retry-after"] - assert int(retry_after) <= window - assert int(retry_after) <= max_rate_limit - retry_after2, rest = headers["x-sentry-rate-limits"].split(":", 1) - assert int(retry_after2) == int(retry_after) - assert rest == "%s:key:get_lost" % category - if outcomes_consumer is not None: - outcomes_consumer.assert_rate_limited( - "get_lost", key_id=key_id, categories=[category] - ) + relay.send_metrics_buckets( + project_id, + [ + # Send a few non-duration buckets, they will not deplete the quota + make_bucket("d:transactions/measurements.lcp@millisecond", "d", 10 * [1.0]), + # Session metrics are accepted + make_bucket("d:sessions/session@none", "c", 1), + make_bucket("d:sessions/duration@second", "d", 9 * [1]), + # Duration metric, subtract 3 from quota + make_bucket("d:transactions/duration@millisecond", "d", [1, 2, 3]), + # Can still send unlimited other metrics + make_bucket("d:transactions/measurements.lcp@millisecond", "d", 10 * [1.0]), + # Duration metric, subtract 2 from quota. Now, quota is exceeded. + make_bucket("d:transactions/duration@millisecond", "d", [4, 5]), + # FCP buckets won't make it into kakfa. + make_bucket("d:transactions/measurements.fcp@millisecond", "d", 10 * [1.0]), + # Another three for duration, won't make it into kafka. + make_bucket("d:transactions/duration@millisecond", "d", [6, 7, 8]), + # Session metrics are still accepted. + make_bucket("d:sessions/session@user", "s", [1254]), + ], + ) - for i in range(10): - # now send using the second key - relay.send_event( - project_id, transform({"message": f"otherkey{i}"}), dsn_key_idx=1 - ) - event, _ = events_consumer.get_event() + # Expect two of 9 buckets to be dropped: + produced_buckets = [metrics_consumer.get_metric() for _ in range(7)] + metrics_consumer.assert_empty() - assert event["logentry"]["formatted"] == f"otherkey{i}" + assert produced_buckets == [] def test_events_buffered_before_auth(relay, mini_sentry): From b6070aab7961b799e974ee8b7e8f3674e23a3bac Mon Sep 17 00:00:00 2001 From: Joris Bayer Date: Thu, 6 Oct 2022 12:09:41 +0200 Subject: [PATCH 04/34] test: integration test still fails --- relay-server/src/actors/processor.rs | 7 +- tests/integration/test_store.py | 102 ++++++++++++++++++++++++--- 2 files changed, 96 insertions(+), 13 deletions(-) diff --git a/relay-server/src/actors/processor.rs b/relay-server/src/actors/processor.rs index bc430b09efc..1696414e10e 100644 --- a/relay-server/src/actors/processor.rs +++ b/relay-server/src/actors/processor.rs @@ -2244,7 +2244,8 @@ impl EnvelopeProcessorService { // b. For any other metric in the transaction namespace, we check the limit but we // do not count against the quota. buckets.retain(|bucket| { - // TODO: Split this off into `should_keep_bucket`. + // NOTE: The order of buckets is not deterministic here, because `Aggregator::pop_flush_buckets` + // produces a hash map. // FIXME: Add namespace enum to `Bucket` so we don't have to parse the MRI for every bucket. let mri = match MetricResourceIdentifier::parse(bucket.name.as_str()) { Ok(mri) => mri, @@ -2259,7 +2260,6 @@ impl EnvelopeProcessorService { return true; } - dbg!(mri.name); let quantity = if mri.name == "duration" { bucket.value.len() } else { @@ -2276,7 +2276,8 @@ impl EnvelopeProcessorService { ProjectCache::from_registry() .do_send(UpdateRateLimits::new(scoping.project_key, limits)); } - is_limited + // only retain if not limited: + !is_limited } Err(_) => todo!(), } diff --git a/tests/integration/test_store.py b/tests/integration/test_store.py index 6fd9afd289a..12fd90f93ff 100644 --- a/tests/integration/test_store.py +++ b/tests/integration/test_store.py @@ -500,7 +500,9 @@ def test_rate_limit_metrics_buckets( "id": "test_rate_limiting_{}".format(uuid.uuid4().hex), "scope": "key", "scopeId": six.text_type(key_id), - "categories": ["transaction_processed"], + "categories": [ + "transactionprocessed" + ], # TODO: change to transaction_processed "limit": 5, "window": 86400, "reasonCode": "get_lost", @@ -527,34 +529,114 @@ def make_bucket(name, type_, values): "width": bucket_interval, } - relay.send_metrics_buckets( - project_id, + def send_buckets(buckets): + relay.send_metrics_buckets(project_id, buckets) + sleep(0.1) # give relay time to flush the buckets + + # NOTE: Sending these buckets in multiple envelopes because the order of flushing + # and also the order of rate limiting is not deterministic. + send_buckets( [ # Send a few non-duration buckets, they will not deplete the quota make_bucket("d:transactions/measurements.lcp@millisecond", "d", 10 * [1.0]), # Session metrics are accepted make_bucket("d:sessions/session@none", "c", 1), make_bucket("d:sessions/duration@second", "d", 9 * [1]), + ] + ) + send_buckets( + [ # Duration metric, subtract 3 from quota make_bucket("d:transactions/duration@millisecond", "d", [1, 2, 3]), - # Can still send unlimited other metrics - make_bucket("d:transactions/measurements.lcp@millisecond", "d", 10 * [1.0]), + ], + ) + send_buckets( + [ + # Can still send unlimited non-duration metrics + make_bucket("d:transactions/measurements.lcp@millisecond", "d", 10 * [2.0]), + ], + ) + send_buckets( + [ # Duration metric, subtract 2 from quota. Now, quota is exceeded. - make_bucket("d:transactions/duration@millisecond", "d", [4, 5]), + make_bucket("d:transactions/duration@millisecond", "d", [4, 5, 6]), + ], + ) + send_buckets( + [ # FCP buckets won't make it into kakfa. - make_bucket("d:transactions/measurements.fcp@millisecond", "d", 10 * [1.0]), + make_bucket("d:transactions/measurements.fcp@millisecond", "d", 10 * [7.0]), # Another three for duration, won't make it into kafka. - make_bucket("d:transactions/duration@millisecond", "d", [6, 7, 8]), + ], + ) + send_buckets( + [ + make_bucket("d:transactions/duration@millisecond", "d", [7, 8, 9]), # Session metrics are still accepted. make_bucket("d:sessions/session@user", "s", [1254]), ], ) # Expect two of 9 buckets to be dropped: - produced_buckets = [metrics_consumer.get_metric() for _ in range(7)] + produced_buckets = [metrics_consumer.get_metric(timeout=1) for _ in range(7)] metrics_consumer.assert_empty() - assert produced_buckets == [] + # Sort buckets to prevent ordering flakiness: + produced_buckets.sort(key=lambda b: b["name"]) + for bucket in produced_buckets: + del bucket["timestamp"] + + assert produced_buckets == [ + { + "name": "d:sessions/duration@second", + "org_id": 1, + "project_id": 42, + "type": "d", + "value": [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0], + }, + { + "name": "d:sessions/session@none", + "org_id": 1, + "project_id": 42, + "type": "c", + "value": 1.0, + }, + { + "name": "d:sessions/session@user", + "org_id": 1, + "project_id": 42, + "type": "s", + "value": [1254], + }, + { + "name": "d:transactions/duration@millisecond", + "org_id": 1, + "project_id": 42, + "type": "d", + "value": [1.0, 2.0, 3.0], + }, + # { + # "name": "d:transactions/measurements.fcp@millisecond", + # "org_id": 1, + # "project_id": 42, + # "type": "d", + # "value": [7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0], + # }, + { + "name": "d:transactions/measurements.lcp@millisecond", + "org_id": 1, + "project_id": 42, + "type": "d", + "value": [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0], + }, + { + "name": "d:transactions/measurements.lcp@millisecond", + "org_id": 1, + "project_id": 42, + "type": "d", + "value": [2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0], + }, + ] def test_events_buffered_before_auth(relay, mini_sentry): From 99ecb743288315e6fb190a2fe5b3216fac4859c1 Mon Sep 17 00:00:00 2001 From: Jan Michael Auer Date: Mon, 10 Oct 2022 09:20:39 +0200 Subject: [PATCH 05/34] fix: Import error on default features --- relay-server/src/actors/processor.rs | 16 ++++++++-------- relay-server/src/actors/project_cache.rs | 9 ++++----- 2 files changed, 12 insertions(+), 13 deletions(-) diff --git a/relay-server/src/actors/processor.rs b/relay-server/src/actors/processor.rs index 085144c753c..7da6bdc48da 100644 --- a/relay-server/src/actors/processor.rs +++ b/relay-server/src/actors/processor.rs @@ -523,7 +523,7 @@ impl EncodeEnvelope { /// #[cfg(feature = "processing")] #[derive(Debug)] -pub struct RateLimitMetricsBuckets { +pub struct RateLimitMetrics { /// TODO: docs pub quotas: Vec, /// The pre-aggregated metric buckets. @@ -544,7 +544,7 @@ pub enum EnvelopeProcessor { ProcessMetrics(Box), EncodeEnvelope(Box), #[cfg(feature = "processing")] - RateLimitMetricsBuckets(RateLimitMetricsBuckets), // TODO: why are the others Box<_>? + RateLimitMetrics(RateLimitMetrics), // TODO: why are the others Box<_>? } impl EnvelopeProcessor { @@ -580,11 +580,11 @@ impl FromMessage for EnvelopeProcessor { } #[cfg(feature = "processing")] -impl FromMessage for EnvelopeProcessor { +impl FromMessage for EnvelopeProcessor { type Response = NoResponse; - fn from_message(message: RateLimitMetricsBuckets, _: ()) -> Self { - Self::RateLimitMetricsBuckets(message) + fn from_message(message: RateLimitMetrics, _: ()) -> Self { + Self::RateLimitMetrics(message) } } @@ -2197,11 +2197,11 @@ impl EnvelopeProcessorService { } #[cfg(feature = "processing")] - fn handle_rate_limit_metrics_buckets(&self, message: RateLimitMetricsBuckets) { + fn handle_rate_limit_metrics_buckets(&self, message: RateLimitMetrics) { use relay_metrics::{MetricNamespace, MetricResourceIdentifier}; use relay_quotas::ItemScoping; - let RateLimitMetricsBuckets { + let RateLimitMetrics { quotas: quota, mut buckets, scoping, @@ -2329,7 +2329,7 @@ impl EnvelopeProcessorService { EnvelopeProcessor::ProcessMetrics(message) => self.handle_process_metrics(*message), EnvelopeProcessor::EncodeEnvelope(message) => self.handle_encode_envelope(*message), #[cfg(feature = "processing")] - EnvelopeProcessor::RateLimitMetricsBuckets(message) => { + EnvelopeProcessor::RateLimitMetrics(message) => { self.handle_rate_limit_metrics_buckets(message) } } diff --git a/relay-server/src/actors/project_cache.rs b/relay-server/src/actors/project_cache.rs index 7ae697f0965..641be77ed78 100644 --- a/relay-server/src/actors/project_cache.rs +++ b/relay-server/src/actors/project_cache.rs @@ -18,16 +18,15 @@ use relay_statsd::metric; use crate::actors::envelopes::{EnvelopeManager, SendMetrics}; use crate::actors::outcome::DiscardReason; use crate::actors::processor::ProcessEnvelope; -use crate::actors::project::{Project, ProjectState}; +#[cfg(feature = "processing")] +use crate::actors::processor::{EnvelopeProcessor, RateLimitMetrics}; +use crate::actors::project::{ExpiryState, Project, ProjectState}; use crate::actors::project_local::LocalProjectSource; use crate::actors::project_upstream::UpstreamProjectSource; use crate::envelope::Envelope; use crate::statsd::{RelayCounters, RelayGauges, RelayHistograms, RelayTimers}; use crate::utils::{self, EnvelopeContext, GarbageDisposal, Response}; -use super::processor::{EnvelopeProcessor, RateLimitMetricsBuckets}; -use super::project::ExpiryState; - #[cfg(feature = "processing")] use {crate::actors::project_redis::RedisProjectSource, relay_common::clone}; @@ -639,7 +638,7 @@ impl Handler for ProjectCache { .collect::>(); // TODO: bypass if quota is empty? - EnvelopeProcessor::from_registry().send(RateLimitMetricsBuckets { + EnvelopeProcessor::from_registry().send(RateLimitMetrics { quotas, buckets, scoping, From 71c20a39390f95d00eba98e30789a50957fa0008 Mon Sep 17 00:00:00 2001 From: Joris Bayer Date: Mon, 10 Oct 2022 13:28:18 +0200 Subject: [PATCH 06/34] test: Fix integration test to match rate limiter behavior --- relay-server/src/actors/processor.rs | 21 ++++++++++----------- tests/integration/test_store.py | 27 +++++++++++++-------------- 2 files changed, 23 insertions(+), 25 deletions(-) diff --git a/relay-server/src/actors/processor.rs b/relay-server/src/actors/processor.rs index 7da6bdc48da..b4048e29df9 100644 --- a/relay-server/src/actors/processor.rs +++ b/relay-server/src/actors/processor.rs @@ -2221,18 +2221,11 @@ impl EnvelopeProcessorService { scoping: &scoping, }; - // For every metrics bucket, one of three cases applies - // 1. The metric is not in the transaction namespace. - // TODO: Should we rate limit sessions as well? - // 2. The metric is in the transaction namespace. - // a. The metric is "duration". This is extracted exactly once for every processed - // transaction, so we use it to count against the quota. - // b. For any other metric in the transaction namespace, we check the limit but we - // do not count against the quota. + // Check which buckets are rate limited: buckets.retain(|bucket| { // NOTE: The order of buckets is not deterministic here, because `Aggregator::pop_flush_buckets` // produces a hash map. - // FIXME: Add namespace enum to `Bucket` so we don't have to parse the MRI for every bucket. + // TODO: Add namespace enum to `Bucket` so we don't have to parse the MRI for every bucket? let mri = match MetricResourceIdentifier::parse(bucket.name.as_str()) { Ok(mri) => mri, Err(_) => { @@ -2241,23 +2234,29 @@ impl EnvelopeProcessorService { } }; + // Keep all metrics that are not transaction related: if !matches!(mri.namespace, MetricNamespace::Transactions) { // TODO: Should we rate limit sessions as well? return true; } - let quantity = if mri.name == "duration" { + let transaction_count = if mri.name == "duration" { + // The "duration" metric is extracted exactly once for every processed transaction, + // so we can use it to count the number of transactions. bucket.value.len() } else { + // For any other metric in the transaction namespace, we check the limit with quantity=0 + // so transactions are not double counted against the quota. 0 }; // If necessary, we could minimize the number of calls to the rate limiter // by grouping buckets here somehow. But be aware that that would mean we might // under-accept, because the sum of all buckets might exceed the quota, but not individual ones. - match rate_limiter.is_rate_limited("a, item_scoping, quantity) { + match rate_limiter.is_rate_limited("a, item_scoping, transaction_count) { Ok(limits) => { let is_limited = limits.is_limited(); + if is_limited { ProjectCache::from_registry() .do_send(UpdateRateLimits::new(scoping.project_key, limits)); diff --git a/tests/integration/test_store.py b/tests/integration/test_store.py index 12fd90f93ff..9b254469969 100644 --- a/tests/integration/test_store.py +++ b/tests/integration/test_store.py @@ -500,9 +500,7 @@ def test_rate_limit_metrics_buckets( "id": "test_rate_limiting_{}".format(uuid.uuid4().hex), "scope": "key", "scopeId": six.text_type(key_id), - "categories": [ - "transactionprocessed" - ], # TODO: change to transaction_processed + "categories": ["transaction_processed"], "limit": 5, "window": 86400, "reasonCode": "get_lost", @@ -559,25 +557,26 @@ def send_buckets(buckets): send_buckets( [ # Duration metric, subtract 2 from quota. Now, quota is exceeded. - make_bucket("d:transactions/duration@millisecond", "d", [4, 5, 6]), + make_bucket("d:transactions/duration@millisecond", "d", [4, 5]), ], ) send_buckets( [ - # FCP buckets won't make it into kakfa. + # FCP buckets won't make it into kakfa, because we are exactly at the limit, + # and for quantity=0, the lua script does a >= check. make_bucket("d:transactions/measurements.fcp@millisecond", "d", 10 * [7.0]), - # Another three for duration, won't make it into kafka. ], ) send_buckets( [ + # Another three for duration, won't make it into kafka. make_bucket("d:transactions/duration@millisecond", "d", [7, 8, 9]), # Session metrics are still accepted. make_bucket("d:sessions/session@user", "s", [1254]), ], ) - # Expect two of 9 buckets to be dropped: + # Expect 2 of 9 buckets to be dropped: produced_buckets = [metrics_consumer.get_metric(timeout=1) for _ in range(7)] metrics_consumer.assert_empty() @@ -615,13 +614,13 @@ def send_buckets(buckets): "type": "d", "value": [1.0, 2.0, 3.0], }, - # { - # "name": "d:transactions/measurements.fcp@millisecond", - # "org_id": 1, - # "project_id": 42, - # "type": "d", - # "value": [7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0], - # }, + { + "name": "d:transactions/duration@millisecond", + "org_id": 1, + "project_id": 42, + "type": "d", + "value": [4.0, 5.0], + }, { "name": "d:transactions/measurements.lcp@millisecond", "org_id": 1, From 86ff9f622349c04808aee74bedf09cedc4bd3e13 Mon Sep 17 00:00:00 2001 From: Joris Bayer Date: Mon, 10 Oct 2022 15:58:55 +0200 Subject: [PATCH 07/34] doc: Add TODOs --- relay-server/src/actors/processor.rs | 11 +++++------ relay-server/src/actors/project.rs | 3 +++ relay-server/src/actors/project_cache.rs | 3 ++- 3 files changed, 10 insertions(+), 7 deletions(-) diff --git a/relay-server/src/actors/processor.rs b/relay-server/src/actors/processor.rs index b4048e29df9..b1b569d6078 100644 --- a/relay-server/src/actors/processor.rs +++ b/relay-server/src/actors/processor.rs @@ -2225,7 +2225,6 @@ impl EnvelopeProcessorService { buckets.retain(|bucket| { // NOTE: The order of buckets is not deterministic here, because `Aggregator::pop_flush_buckets` // produces a hash map. - // TODO: Add namespace enum to `Bucket` so we don't have to parse the MRI for every bucket? let mri = match MetricResourceIdentifier::parse(bucket.name.as_str()) { Ok(mri) => mri, Err(_) => { @@ -2235,8 +2234,8 @@ impl EnvelopeProcessorService { }; // Keep all metrics that are not transaction related: - if !matches!(mri.namespace, MetricNamespace::Transactions) { - // TODO: Should we rate limit sessions as well? + if mri.namespace != MetricNamespace::Transactions { + // Not limiting sessions for now. return true; } @@ -2250,9 +2249,7 @@ impl EnvelopeProcessorService { 0 }; - // If necessary, we could minimize the number of calls to the rate limiter - // by grouping buckets here somehow. But be aware that that would mean we might - // under-accept, because the sum of all buckets might exceed the quota, but not individual ones. + // TODO: Call rate limiter only once for each call of this function match rate_limiter.is_rate_limited("a, item_scoping, transaction_count) { Ok(limits) => { let is_limited = limits.is_limited(); @@ -2268,6 +2265,8 @@ impl EnvelopeProcessorService { } }); + // TODO: log outcomes for dropped buckets. () + if buckets.is_empty() { return; } diff --git a/relay-server/src/actors/project.rs b/relay-server/src/actors/project.rs index dc712fb2b37..d4c830889ad 100644 --- a/relay-server/src/actors/project.rs +++ b/relay-server/src/actors/project.rs @@ -600,6 +600,7 @@ impl Project { /// /// The buckets will be keyed underneath this project key. pub fn merge_buckets(&mut self, buckets: Vec) { + // TODO: rate limits if self.metrics_allowed() { Aggregator::from_registry().do_send(MergeBuckets::new(self.project_key, buckets)); } @@ -609,6 +610,8 @@ impl Project { /// /// The metrics will be keyed underneath this project key. pub fn insert_metrics(&mut self, metrics: Vec) { + // TODO: rate limits + // TODO: code sharing for incoming buckets, incoming metrics, outgoing buckets if self.metrics_allowed() { Aggregator::from_registry().do_send(InsertMetrics::new(self.project_key, metrics)); } diff --git a/relay-server/src/actors/project_cache.rs b/relay-server/src/actors/project_cache.rs index 641be77ed78..df1dd0cb6e7 100644 --- a/relay-server/src/actors/project_cache.rs +++ b/relay-server/src/actors/project_cache.rs @@ -637,7 +637,8 @@ impl Handler for ProjectCache { .map(Quota::clone) .collect::>(); - // TODO: bypass if quota is empty? + // TODO: bypass if quota is 0 or rate limit cached on the project? + // (run project.rate_limits.check_with_quotas) EnvelopeProcessor::from_registry().send(RateLimitMetrics { quotas, buckets, From 10d9dcf78c01c06d9cfafba65c8583c151caf446 Mon Sep 17 00:00:00 2001 From: Joris Bayer Date: Mon, 10 Oct 2022 16:21:04 +0200 Subject: [PATCH 08/34] test: Parametrize test to cover exact exhaustion vs. exceeding exhaustion --- tests/integration/test_store.py | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/tests/integration/test_store.py b/tests/integration/test_store.py index 9b254469969..49af5fb84a1 100644 --- a/tests/integration/test_store.py +++ b/tests/integration/test_store.py @@ -471,9 +471,15 @@ def test_processing_quotas( assert event["logentry"]["formatted"] == f"otherkey{i}" +@pytest.mark.parametrize("violating_bucket", [[4.0, 5.0], [4.0, 5.0, 6.0]]) def test_rate_limit_metrics_buckets( - mini_sentry, relay_with_processing, metrics_consumer, + mini_sentry, relay_with_processing, metrics_consumer, violating_bucket ): + """ + param violating_bucket is parametrized so we cover both cases: + 1. the quota is matched exactly + 2. quota is exceeded by one + """ bucket_interval = 1 # second relay = relay_with_processing( { @@ -556,14 +562,14 @@ def send_buckets(buckets): ) send_buckets( [ - # Duration metric, subtract 2 from quota. Now, quota is exceeded. - make_bucket("d:transactions/duration@millisecond", "d", [4, 5]), + # Duration metric, subtract from quota. This bucket is still accepted, but the rest + # will be exceeded. + make_bucket("d:transactions/duration@millisecond", "d", violating_bucket), ], ) send_buckets( [ - # FCP buckets won't make it into kakfa, because we are exactly at the limit, - # and for quantity=0, the lua script does a >= check. + # FCP buckets won't make it into kakfa make_bucket("d:transactions/measurements.fcp@millisecond", "d", 10 * [7.0]), ], ) @@ -619,7 +625,7 @@ def send_buckets(buckets): "org_id": 1, "project_id": 42, "type": "d", - "value": [4.0, 5.0], + "value": violating_bucket, }, { "name": "d:transactions/measurements.lcp@millisecond", From ca12d23bc43bacdeb6a44ab588b0c7eb939e24d4 Mon Sep 17 00:00:00 2001 From: Joris Bayer Date: Mon, 10 Oct 2022 17:05:38 +0200 Subject: [PATCH 09/34] ref: Call rate limiter only once for a batch of buckets --- relay-server/src/actors/processor.rs | 94 ++++++++++++++++------------ 1 file changed, 54 insertions(+), 40 deletions(-) diff --git a/relay-server/src/actors/processor.rs b/relay-server/src/actors/processor.rs index b1b569d6078..68aafcd24d3 100644 --- a/relay-server/src/actors/processor.rs +++ b/relay-server/src/actors/processor.rs @@ -2203,7 +2203,7 @@ impl EnvelopeProcessorService { let RateLimitMetrics { quotas: quota, - mut buckets, + buckets, scoping, partition_key, } = message; @@ -2221,51 +2221,65 @@ impl EnvelopeProcessorService { scoping: &scoping, }; - // Check which buckets are rate limited: - buckets.retain(|bucket| { - // NOTE: The order of buckets is not deterministic here, because `Aggregator::pop_flush_buckets` - // produces a hash map. - let mri = match MetricResourceIdentifier::parse(bucket.name.as_str()) { - Ok(mri) => mri, - Err(_) => { - relay_log::error!("Invalid MRI: {}", bucket.name); - return true; + // Collect bucket information (bucket, transaction_count) + let mut buckets: Vec<_> = buckets + .into_iter() + .map(|bucket| { + let mri = match MetricResourceIdentifier::parse(bucket.name.as_str()) { + Ok(mri) => mri, + Err(_) => { + relay_log::error!("Invalid MRI: {}", bucket.name); + return (bucket, None); + } + }; + + // Keep all metrics that are not transaction related: + if mri.namespace != MetricNamespace::Transactions { + // Not limiting sessions for now. + return (bucket, None); } - }; - // Keep all metrics that are not transaction related: - if mri.namespace != MetricNamespace::Transactions { - // Not limiting sessions for now. - return true; - } + if mri.name == "duration" { + // The "duration" metric is extracted exactly once for every processed transaction, + // so we can use it to count the number of transactions. + let count = bucket.value.len(); + (bucket, Some(count)) + } else { + // For any other metric in the transaction namespace, we check the limit with quantity=0 + // so transactions are not double counted against the quota. + (bucket, Some(0)) + } + }) + .collect(); + + // Accumulate the total transaction count: + let transaction_count = + buckets.iter().fold( + None, + |acc, (_, transaction_count)| match transaction_count { + Some(count) => Some(acc.unwrap_or(0) + count), + None => acc, + }, + ); - let transaction_count = if mri.name == "duration" { - // The "duration" metric is extracted exactly once for every processed transaction, - // so we can use it to count the number of transactions. - bucket.value.len() - } else { - // For any other metric in the transaction namespace, we check the limit with quantity=0 - // so transactions are not double counted against the quota. - 0 + // Call rate limiter if necessary: + if let Some(transaction_count) = transaction_count { + let limits = match rate_limiter.is_rate_limited("a, item_scoping, transaction_count) + { + Ok(limits) => limits, + Err(_) => todo!(), }; - // TODO: Call rate limiter only once for each call of this function - match rate_limiter.is_rate_limited("a, item_scoping, transaction_count) { - Ok(limits) => { - let is_limited = limits.is_limited(); + if limits.is_limited() { + ProjectCache::from_registry() + .do_send(UpdateRateLimits::new(scoping.project_key, limits)); - if is_limited { - ProjectCache::from_registry() - .do_send(UpdateRateLimits::new(scoping.project_key, limits)); - } - // only retain if not limited: - !is_limited - } - Err(_) => todo!(), - } - }); + // Only keep non-transaction buckets: + buckets.retain(|(_, count)| count.is_none()); - // TODO: log outcomes for dropped buckets. () + // TODO: log outcomes for dropped buckets. () + } + } if buckets.is_empty() { return; @@ -2273,7 +2287,7 @@ impl EnvelopeProcessorService { // All good, forward to envelope manager. EnvelopeManager::from_registry().send(SendMetrics { - buckets, + buckets: buckets.into_iter().map(|(b, _)| b).collect(), scoping, partition_key, }); From 699d0cba16ae9356db5704ce165decd43131dcd2 Mon Sep 17 00:00:00 2001 From: Joris Bayer Date: Mon, 10 Oct 2022 17:20:59 +0200 Subject: [PATCH 10/34] feat: Emit outcome for rate limited metrics --- relay-server/src/actors/processor.rs | 24 ++++++++++++++++++++---- 1 file changed, 20 insertions(+), 4 deletions(-) diff --git a/relay-server/src/actors/processor.rs b/relay-server/src/actors/processor.rs index 68aafcd24d3..0df99927f4c 100644 --- a/relay-server/src/actors/processor.rs +++ b/relay-server/src/actors/processor.rs @@ -2271,13 +2271,29 @@ impl EnvelopeProcessorService { }; if limits.is_limited() { - ProjectCache::from_registry() - .do_send(UpdateRateLimits::new(scoping.project_key, limits)); - // Only keep non-transaction buckets: buckets.retain(|(_, count)| count.is_none()); - // TODO: log outcomes for dropped buckets. () + // Track outcome for the processed transactions we dropped here: + for limit in &limits { + if limit + .categories + .contains(&DataCategory::TransactionProcessed) + { + TrackOutcome::from_registry().send(TrackOutcome { + timestamp: UnixTimestamp::now().as_datetime(), // as good as any timestamp + scoping: *item_scoping, + outcome: Outcome::RateLimited(limit.reason_code.clone()), + event_id: None, + remote_addr: None, + category: DataCategory::TransactionProcessed, + quantity: transaction_count as u32, + }); + } + } + + ProjectCache::from_registry() + .do_send(UpdateRateLimits::new(scoping.project_key, limits)); } } From 3f5bbb0a67bb36b4d66c6b5c81aba04e0733a265 Mon Sep 17 00:00:00 2001 From: Joris Bayer Date: Mon, 10 Oct 2022 17:32:15 +0200 Subject: [PATCH 11/34] test: Check outcomes were produced --- relay-server/src/actors/processor.rs | 34 +++++++++++++----------- tests/integration/fixtures/processing.py | 8 +++++- tests/integration/test_store.py | 15 +++++++++-- 3 files changed, 38 insertions(+), 19 deletions(-) diff --git a/relay-server/src/actors/processor.rs b/relay-server/src/actors/processor.rs index 0df99927f4c..d9f9939c3b4 100644 --- a/relay-server/src/actors/processor.rs +++ b/relay-server/src/actors/processor.rs @@ -2253,7 +2253,7 @@ impl EnvelopeProcessorService { .collect(); // Accumulate the total transaction count: - let transaction_count = + let total_transaction_count = buckets.iter().fold( None, |acc, (_, transaction_count)| match transaction_count { @@ -2263,7 +2263,7 @@ impl EnvelopeProcessorService { ); // Call rate limiter if necessary: - if let Some(transaction_count) = transaction_count { + if let Some(transaction_count) = total_transaction_count { let limits = match rate_limiter.is_rate_limited("a, item_scoping, transaction_count) { Ok(limits) => limits, @@ -2275,20 +2275,22 @@ impl EnvelopeProcessorService { buckets.retain(|(_, count)| count.is_none()); // Track outcome for the processed transactions we dropped here: - for limit in &limits { - if limit - .categories - .contains(&DataCategory::TransactionProcessed) - { - TrackOutcome::from_registry().send(TrackOutcome { - timestamp: UnixTimestamp::now().as_datetime(), // as good as any timestamp - scoping: *item_scoping, - outcome: Outcome::RateLimited(limit.reason_code.clone()), - event_id: None, - remote_addr: None, - category: DataCategory::TransactionProcessed, - quantity: transaction_count as u32, - }); + if transaction_count > 0 { + for limit in &limits { + if limit + .categories + .contains(&DataCategory::TransactionProcessed) + { + TrackOutcome::from_registry().send(TrackOutcome { + timestamp: UnixTimestamp::now().as_datetime(), // as good as any timestamp + scoping: *item_scoping, + outcome: Outcome::RateLimited(limit.reason_code.clone()), + event_id: None, + remote_addr: None, + category: DataCategory::TransactionProcessed, + quantity: transaction_count as u32, + }); + } } } diff --git a/tests/integration/fixtures/processing.py b/tests/integration/fixtures/processing.py index 2c42528bbeb..9777736b9fd 100644 --- a/tests/integration/fixtures/processing.py +++ b/tests/integration/fixtures/processing.py @@ -200,6 +200,8 @@ def category_value(category): return 4 if category == "session": return 5 + if category == "transaction_processed": + return 8 assert False, "invalid category" @@ -224,7 +226,7 @@ def get_outcome(self): assert len(outcomes) == 1, "More than one outcome was consumed" return outcomes[0] - def assert_rate_limited(self, reason, key_id=None, categories=None): + def assert_rate_limited(self, reason, key_id=None, categories=None, quantity=None): if categories is None: outcome = self.get_outcome() assert isinstance(outcome["category"], int) @@ -241,6 +243,10 @@ def assert_rate_limited(self, reason, key_id=None, categories=None): if key_id is not None: assert outcome["key_id"] == key_id + if quantity is not None: + count = sum(outcome["quantity"] for outcome in outcomes) + assert count == quantity + @pytest.fixture def events_consumer(kafka_consumer): diff --git a/tests/integration/test_store.py b/tests/integration/test_store.py index 49af5fb84a1..6569dab3f0b 100644 --- a/tests/integration/test_store.py +++ b/tests/integration/test_store.py @@ -473,7 +473,11 @@ def test_processing_quotas( @pytest.mark.parametrize("violating_bucket", [[4.0, 5.0], [4.0, 5.0, 6.0]]) def test_rate_limit_metrics_buckets( - mini_sentry, relay_with_processing, metrics_consumer, violating_bucket + mini_sentry, + relay_with_processing, + metrics_consumer, + outcomes_consumer, + violating_bucket, ): """ param violating_bucket is parametrized so we cover both cases: @@ -492,6 +496,7 @@ def test_rate_limit_metrics_buckets( } ) metrics_consumer = metrics_consumer() + outcomes_consumer = outcomes_consumer() project_id = 42 projectconfig = mini_sentry.add_full_project_config(project_id) @@ -501,6 +506,8 @@ def test_rate_limit_metrics_buckets( public_keys = mini_sentry.get_dsn_public_key_configs(project_id) key_id = public_keys[0]["numericId"] + reason_code = uuid.uuid4().hex + projectconfig["config"]["quotas"] = [ { "id": "test_rate_limiting_{}".format(uuid.uuid4().hex), @@ -509,7 +516,7 @@ def test_rate_limit_metrics_buckets( "categories": ["transaction_processed"], "limit": 5, "window": 86400, - "reasonCode": "get_lost", + "reasonCode": reason_code, } ] @@ -643,6 +650,10 @@ def send_buckets(buckets): }, ] + outcomes_consumer.assert_rate_limited( + reason_code, key_id=key_id, categories=["transaction_processed"], quantity=3, + ) + def test_events_buffered_before_auth(relay, mini_sentry): evt = threading.Event() From 3c43037ce478f1cc35788144524630102cb3b33d Mon Sep 17 00:00:00 2001 From: Joris Bayer Date: Tue, 11 Oct 2022 08:56:20 +0200 Subject: [PATCH 12/34] doc: changelog --- CHANGELOG.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index a88065b40b8..d14d10bc23c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,8 +6,9 @@ - Limit the number of custom measurements per event. ([#1483](https://github.com/getsentry/relay/pull/1483))) - Add INP web vital as a measurement. ([#1487](https://github.com/getsentry/relay/pull/1487)) +- Enforce rate limits on metrics buckets using the transactions_processed quota. ([#1515](https://github.com/getsentry/relay/pull/1515)) -** Bug Fixes**: +**Bug Fixes**: - Make sure that non-processing Relays drop all invalid transactions. ([#1513](https://github.com/getsentry/relay/pull/1513)) From 6f3f88915a97247d1df49d8978a5354eb72ce8db Mon Sep 17 00:00:00 2001 From: Joris Bayer Date: Tue, 11 Oct 2022 09:09:33 +0200 Subject: [PATCH 13/34] ref: clippy --- relay-server/src/actors/processor.rs | 10 ++++++---- relay-server/src/actors/project_cache.rs | 13 ++++++++----- 2 files changed, 14 insertions(+), 9 deletions(-) diff --git a/relay-server/src/actors/processor.rs b/relay-server/src/actors/processor.rs index d9f9939c3b4..57412f71ccc 100644 --- a/relay-server/src/actors/processor.rs +++ b/relay-server/src/actors/processor.rs @@ -32,15 +32,17 @@ use relay_general::store::{ClockDriftProcessor, LightNormalizationConfig}; use relay_general::types::{Annotated, Array, FromValue, Object, ProcessingAction, Value}; use relay_log::LogError; use relay_metrics::{Bucket, InsertMetrics, MergeBuckets, Metric}; -use relay_quotas::{DataCategory, Quota, ReasonCode, Scoping}; +use relay_quotas::{DataCategory, ReasonCode}; +#[cfg(feature = "processing")] +use relay_quotas::{Quota, Scoping}; use relay_redis::RedisPool; use relay_sampling::{DynamicSamplingContext, RuleId}; use relay_statsd::metric; use relay_system::{Addr, FromMessage, NoResponse, Service}; -use crate::actors::envelopes::{ - EnvelopeManager, SendEnvelope, SendEnvelopeError, SendMetrics, SubmitEnvelope, -}; +#[cfg(feature = "processing")] +use crate::actors::envelopes::SendMetrics; +use crate::actors::envelopes::{EnvelopeManager, SendEnvelope, SendEnvelopeError, SubmitEnvelope}; use crate::actors::outcome::{DiscardReason, Outcome, TrackOutcome}; use crate::actors::project::{Feature, ProjectState}; use crate::actors::project_cache::ProjectCache; diff --git a/relay-server/src/actors/project_cache.rs b/relay-server/src/actors/project_cache.rs index 0f56847d15b..136cd3b0286 100644 --- a/relay-server/src/actors/project_cache.rs +++ b/relay-server/src/actors/project_cache.rs @@ -6,11 +6,15 @@ use actix_web::ResponseError; use failure::Fail; use futures01::{future, Future}; -use relay_common::{DataCategory, ProjectKey}; +use relay_common::ProjectKey; +#[cfg(feature = "processing")] +use relay_common::{clone, DataCategory}; use relay_config::{Config, RelayMode}; use relay_metrics::{self, AggregateMetricsError, FlushBuckets, InsertMetrics, MergeBuckets}; -use relay_quotas::{Quota, RateLimits}; +#[cfg(feature = "processing")] +use relay_quotas::Quota; +use relay_quotas::RateLimits; use relay_redis::RedisPool; use relay_statsd::metric; @@ -22,15 +26,14 @@ use crate::actors::processor::ProcessEnvelope; use crate::actors::processor::{EnvelopeProcessor, RateLimitMetrics}; use crate::actors::project::{ExpiryState, Project, ProjectState}; use crate::actors::project_local::LocalProjectSource; +#[cfg(feature = "processing")] +use crate::actors::project_redis::RedisProjectSource; use crate::actors::project_upstream::UpstreamProjectSource; use crate::envelope::Envelope; use crate::service::Registry; use crate::statsd::{RelayCounters, RelayGauges, RelayHistograms, RelayTimers}; use crate::utils::{self, EnvelopeContext, GarbageDisposal, Response}; -#[cfg(feature = "processing")] -use {crate::actors::project_redis::RedisProjectSource, relay_common::clone}; - #[derive(Fail, Debug)] pub enum ProjectError { #[fail(display = "failed to fetch project state from upstream")] From b9bcd98334d2e57b4ae54dcb79b283c7498b023b Mon Sep 17 00:00:00 2001 From: Joris Bayer Date: Tue, 11 Oct 2022 12:01:41 +0200 Subject: [PATCH 14/34] wip: Move logic from processor to project_cache --- relay-server/src/actors/processor.rs | 140 ++++--------------- relay-server/src/actors/project.rs | 5 + relay-server/src/actors/project_cache.rs | 164 +++++++++++++++++++---- 3 files changed, 169 insertions(+), 140 deletions(-) diff --git a/relay-server/src/actors/processor.rs b/relay-server/src/actors/processor.rs index 57412f71ccc..e741397151e 100644 --- a/relay-server/src/actors/processor.rs +++ b/relay-server/src/actors/processor.rs @@ -34,14 +34,12 @@ use relay_log::LogError; use relay_metrics::{Bucket, InsertMetrics, MergeBuckets, Metric}; use relay_quotas::{DataCategory, ReasonCode}; #[cfg(feature = "processing")] -use relay_quotas::{Quota, Scoping}; +use relay_quotas::{Quota, RateLimits, Scoping}; use relay_redis::RedisPool; use relay_sampling::{DynamicSamplingContext, RuleId}; use relay_statsd::metric; -use relay_system::{Addr, FromMessage, NoResponse, Service}; +use relay_system::{Addr, AsyncResponse, FromMessage, NoResponse, Sender, Service}; -#[cfg(feature = "processing")] -use crate::actors::envelopes::SendMetrics; use crate::actors::envelopes::{EnvelopeManager, SendEnvelope, SendEnvelopeError, SubmitEnvelope}; use crate::actors::outcome::{DiscardReason, Outcome, TrackOutcome}; use crate::actors::project::{Feature, ProjectState}; @@ -525,15 +523,15 @@ impl EncodeEnvelope { /// #[cfg(feature = "processing")] #[derive(Debug)] -pub struct RateLimitMetrics { - /// TODO: docs +pub struct CheckRateLimits { + /// Quotas to be checked. pub quotas: Vec, - /// The pre-aggregated metric buckets. - pub buckets: Vec, - /// Scoping information for the metrics. + /// Category to check against. + pub category: DataCategory, + /// Scoping information for the project. pub scoping: Scoping, - /// The key of the logical partition to send the metrics to. - pub partition_key: Option, + /// By how much the quota usage is incremented. + pub quantity: usize, } /// Enforce rate limits on a metrics bucket and forward it to EnvelopeManager @@ -546,7 +544,7 @@ pub enum EnvelopeProcessor { ProcessMetrics(Box), EncodeEnvelope(Box), #[cfg(feature = "processing")] - RateLimitMetrics(RateLimitMetrics), // TODO: why are the others Box<_>? + CheckRateLimits(CheckRateLimits, Sender), // TODO: why are the others Box<_>? } impl EnvelopeProcessor { @@ -582,11 +580,11 @@ impl FromMessage for EnvelopeProcessor { } #[cfg(feature = "processing")] -impl FromMessage for EnvelopeProcessor { - type Response = NoResponse; +impl FromMessage for EnvelopeProcessor { + type Response = AsyncResponse; - fn from_message(message: RateLimitMetrics, _: ()) -> Self { - Self::RateLimitMetrics(message) + fn from_message(message: CheckRateLimits, sender: Sender) -> Self { + Self::CheckRateLimits(message, sender) } } @@ -2198,23 +2196,23 @@ impl EnvelopeProcessorService { } } + /// Returns `true` if a rate limit is active. #[cfg(feature = "processing")] - fn handle_rate_limit_metrics_buckets(&self, message: RateLimitMetrics) { - use relay_metrics::{MetricNamespace, MetricResourceIdentifier}; + fn handle_rate_limit_metrics_buckets(&self, message: CheckRateLimits) -> RateLimits { use relay_quotas::ItemScoping; - let RateLimitMetrics { - quotas: quota, - buckets, + let CheckRateLimits { + quotas, + category, scoping, - partition_key, + quantity, } = message; let rate_limiter = match self.rate_limiter.as_ref() { Some(rate_limiter) => rate_limiter, None => { relay_log::error!("handle_rate_limit_metrics_buckets called without rate limiter"); - return; + return RateLimits::new(); // empty } }; @@ -2223,94 +2221,12 @@ impl EnvelopeProcessorService { scoping: &scoping, }; - // Collect bucket information (bucket, transaction_count) - let mut buckets: Vec<_> = buckets - .into_iter() - .map(|bucket| { - let mri = match MetricResourceIdentifier::parse(bucket.name.as_str()) { - Ok(mri) => mri, - Err(_) => { - relay_log::error!("Invalid MRI: {}", bucket.name); - return (bucket, None); - } - }; - - // Keep all metrics that are not transaction related: - if mri.namespace != MetricNamespace::Transactions { - // Not limiting sessions for now. - return (bucket, None); - } - - if mri.name == "duration" { - // The "duration" metric is extracted exactly once for every processed transaction, - // so we can use it to count the number of transactions. - let count = bucket.value.len(); - (bucket, Some(count)) - } else { - // For any other metric in the transaction namespace, we check the limit with quantity=0 - // so transactions are not double counted against the quota. - (bucket, Some(0)) - } - }) - .collect(); - - // Accumulate the total transaction count: - let total_transaction_count = - buckets.iter().fold( - None, - |acc, (_, transaction_count)| match transaction_count { - Some(count) => Some(acc.unwrap_or(0) + count), - None => acc, - }, - ); - - // Call rate limiter if necessary: - if let Some(transaction_count) = total_transaction_count { - let limits = match rate_limiter.is_rate_limited("a, item_scoping, transaction_count) - { - Ok(limits) => limits, - Err(_) => todo!(), - }; - - if limits.is_limited() { - // Only keep non-transaction buckets: - buckets.retain(|(_, count)| count.is_none()); - - // Track outcome for the processed transactions we dropped here: - if transaction_count > 0 { - for limit in &limits { - if limit - .categories - .contains(&DataCategory::TransactionProcessed) - { - TrackOutcome::from_registry().send(TrackOutcome { - timestamp: UnixTimestamp::now().as_datetime(), // as good as any timestamp - scoping: *item_scoping, - outcome: Outcome::RateLimited(limit.reason_code.clone()), - event_id: None, - remote_addr: None, - category: DataCategory::TransactionProcessed, - quantity: transaction_count as u32, - }); - } - } - } - - ProjectCache::from_registry() - .do_send(UpdateRateLimits::new(scoping.project_key, limits)); - } - } - - if buckets.is_empty() { - return; - } + let limits = match rate_limiter.is_rate_limited("a, item_scoping, quantity) { + Ok(limits) => limits, + Err(_) => todo!(), + }; - // All good, forward to envelope manager. - EnvelopeManager::from_registry().send(SendMetrics { - buckets: buckets.into_iter().map(|(b, _)| b).collect(), - scoping, - partition_key, - }); + limits } fn encode_envelope_body( @@ -2361,8 +2277,8 @@ impl EnvelopeProcessorService { EnvelopeProcessor::ProcessMetrics(message) => self.handle_process_metrics(*message), EnvelopeProcessor::EncodeEnvelope(message) => self.handle_encode_envelope(*message), #[cfg(feature = "processing")] - EnvelopeProcessor::RateLimitMetrics(message) => { - self.handle_rate_limit_metrics_buckets(message) + EnvelopeProcessor::CheckRateLimits(message, sender) => { + sender.send(self.handle_rate_limit_metrics_buckets(message)); } } } diff --git a/relay-server/src/actors/project.rs b/relay-server/src/actors/project.rs index 8ec5abbe7a1..f269c9ac276 100644 --- a/relay-server/src/actors/project.rs +++ b/relay-server/src/actors/project.rs @@ -585,6 +585,11 @@ impl Project { } } + /// The rate limits that are active for this project. + pub fn rate_limits(&self) -> &RateLimits { + &self.rate_limits + } + /// The last time the project state was updated pub fn last_updated_at(&self) -> Instant { self.last_updated_at diff --git a/relay-server/src/actors/project_cache.rs b/relay-server/src/actors/project_cache.rs index 136cd3b0286..08e5689c5fa 100644 --- a/relay-server/src/actors/project_cache.rs +++ b/relay-server/src/actors/project_cache.rs @@ -6,15 +6,18 @@ use actix_web::ResponseError; use failure::Fail; use futures01::{future, Future}; -use relay_common::ProjectKey; #[cfg(feature = "processing")] use relay_common::{clone, DataCategory}; +use relay_common::{ProjectKey, UnixTimestamp}; use relay_config::{Config, RelayMode}; -use relay_metrics::{self, AggregateMetricsError, FlushBuckets, InsertMetrics, MergeBuckets}; +use relay_metrics::{ + self, AggregateMetricsError, Bucket, FlushBuckets, InsertMetrics, MergeBuckets, + MetricNamespace, MetricResourceIdentifier, +}; #[cfg(feature = "processing")] use relay_quotas::Quota; -use relay_quotas::RateLimits; +use relay_quotas::{ItemScoping, RateLimits}; use relay_redis::RedisPool; use relay_statsd::metric; @@ -23,7 +26,7 @@ use crate::actors::envelopes::{EnvelopeManager, SendMetrics}; use crate::actors::outcome::DiscardReason; use crate::actors::processor::ProcessEnvelope; #[cfg(feature = "processing")] -use crate::actors::processor::{EnvelopeProcessor, RateLimitMetrics}; +use crate::actors::processor::{CheckRateLimits, EnvelopeProcessor}; use crate::actors::project::{ExpiryState, Project, ProjectState}; use crate::actors::project_local::LocalProjectSource; #[cfg(feature = "processing")] @@ -34,6 +37,8 @@ use crate::service::Registry; use crate::statsd::{RelayCounters, RelayGauges, RelayHistograms, RelayTimers}; use crate::utils::{self, EnvelopeContext, GarbageDisposal, Response}; +use super::outcome::{Outcome, TrackOutcome}; + #[derive(Fail, Debug)] pub enum ProjectError { #[fail(display = "failed to fetch project state from upstream")] @@ -126,6 +131,127 @@ impl ProjectCache { Project::new(project_key, config) }) } + + /// Remove rate limited metrics buckets and track outcomes for them. + async fn rate_limit_buckets( + &self, + buckets: Vec, + project: &Project, + project_state: &ProjectState, + ) -> Vec { + let quotas = project_state + .config + .quotas + .iter() + .filter(|q| q.categories.contains(&DataCategory::TransactionProcessed)) + .map(Quota::clone) + .collect::>(); + + // Collect bucket information (bucket, transaction_count) + let mut annotated_buckets: Vec<_> = buckets + .into_iter() + .map(|bucket| { + let mri = match MetricResourceIdentifier::parse(bucket.name.as_str()) { + Ok(mri) => mri, + Err(_) => { + relay_log::error!("Invalid MRI: {}", bucket.name); + return (bucket, None); + } + }; + + // Keep all metrics that are not transaction related: + if mri.namespace != MetricNamespace::Transactions { + // Not limiting sessions for now. + return (bucket, None); + } + + if mri.name == "duration" { + // The "duration" metric is extracted exactly once for every processed transaction, + // so we can use it to count the number of transactions. + let count = bucket.value.len(); + (bucket, Some(count)) + } else { + // For any other metric in the transaction namespace, we check the limit with quantity=0 + // so transactions are not double counted against the quota. + (bucket, Some(0)) + } + }) + .collect(); + + // Accumulate the total transaction count: + let total_transaction_count = + annotated_buckets + .iter() + .fold( + None, + |acc, (_, transaction_count)| match transaction_count { + Some(count) => Some(acc.unwrap_or(0) + count), + None => acc, + }, + ); + + // Check rate limits if necessary: + if let Some(transaction_count) = total_transaction_count { + let item_scoping = ItemScoping { + category: DataCategory::TransactionProcessed, + scoping: project.scoping(), + }; + let rate_limits = project + .rate_limits() + .check_with_quotas(quotas.as_slice(), item_scoping); + + #[cfg(feature = "processing")] + if processing() && !rate_limits.is_limited() { + // Await rate limits from redis before continuing. + let res = EnvelopeProcessor::from_registry().send(CheckRateLimits { + quotas, + category: DataCategory::TransactionProcessed, + scoping, + quantity: transaction_count, + }); + + let limits = res.await; + + let redis_rate_limits = match limits { + Ok(limits) => limits, + Err(_) => todo!(), + }; + + rate_limits.merge(redis_rate_limits); + + // Also update our own cache: + project.merge_rate_limits(rate_limits); + } + + if rate_limits.is_limited() { + // Only keep non-transaction buckets: + annotated_buckets.retain(|(_, count)| count.is_none()); + + // Track outcome for the processed transactions we dropped here: + if transaction_count > 0 { + for limit in &rate_limits { + if limit + .categories + .contains(&DataCategory::TransactionProcessed) + { + TrackOutcome::from_registry().send(TrackOutcome { + timestamp: UnixTimestamp::now().as_datetime(), // as good as any timestamp + scoping: *item_scoping, + outcome: Outcome::RateLimited(limit.reason_code.clone()), + event_id: None, + remote_addr: None, + category: DataCategory::TransactionProcessed, + quantity: transaction_count as u32, + }); + } + } + } + } + } + + // Restore vector of buckets + annotated_buckets.into_iter().map(|(b, _)| b).collect() + } } impl Actor for ProjectCache { @@ -628,34 +754,16 @@ impl Handler for ProjectCache { return; } - // In processing mode, let the Processor rate limit the outgoing metrics bucket. - #[cfg(feature = "processing")] - if self.config.processing_enabled() { - let quotas = project_state - .config - .quotas - .iter() - .filter(|q| q.categories.contains(&DataCategory::TransactionProcessed)) - .map(Quota::clone) - .collect::>(); - - // TODO: bypass if quota is 0 or rate limit cached on the project? - // (run project.rate_limits.check_with_quotas) - EnvelopeProcessor::from_registry().send(RateLimitMetrics { - quotas, + let buckets = self + .rate_limit_buckets(buckets, project, project_state.as_ref()) + .into_actor(); + + if !buckets.is_empty() { + EnvelopeManager::from_registry().send(SendMetrics { buckets, scoping, partition_key, }); - - return; } - - // In non-processing mode, send the buckets to the envelope manager directly. - EnvelopeManager::from_registry().send(SendMetrics { - buckets, - scoping, - partition_key, - }); } } From 308a0660718001edee75afdcab2322a1a15844f4 Mon Sep 17 00:00:00 2001 From: Joris Bayer Date: Tue, 11 Oct 2022 16:21:58 +0200 Subject: [PATCH 15/34] wip --- relay-server/src/actors/processor.rs | 2 +- relay-server/src/actors/project_cache.rs | 46 ++++++++++++++---------- 2 files changed, 29 insertions(+), 19 deletions(-) diff --git a/relay-server/src/actors/processor.rs b/relay-server/src/actors/processor.rs index e741397151e..151aef803ce 100644 --- a/relay-server/src/actors/processor.rs +++ b/relay-server/src/actors/processor.rs @@ -2221,7 +2221,7 @@ impl EnvelopeProcessorService { scoping: &scoping, }; - let limits = match rate_limiter.is_rate_limited("a, item_scoping, quantity) { + let limits = match rate_limiter.is_rate_limited("as, item_scoping, quantity) { Ok(limits) => limits, Err(_) => todo!(), }; diff --git a/relay-server/src/actors/project_cache.rs b/relay-server/src/actors/project_cache.rs index 08e5689c5fa..caac7219d92 100644 --- a/relay-server/src/actors/project_cache.rs +++ b/relay-server/src/actors/project_cache.rs @@ -17,7 +17,7 @@ use relay_metrics::{ #[cfg(feature = "processing")] use relay_quotas::Quota; -use relay_quotas::{ItemScoping, RateLimits}; +use relay_quotas::{ItemScoping, RateLimits, Scoping}; use relay_redis::RedisPool; use relay_statsd::metric; @@ -136,8 +136,10 @@ impl ProjectCache { async fn rate_limit_buckets( &self, buckets: Vec, - project: &Project, + project: &mut Project, project_state: &ProjectState, + scoping: &Scoping, + processing_enabled: bool, ) -> Vec { let quotas = project_state .config @@ -194,19 +196,19 @@ impl ProjectCache { if let Some(transaction_count) = total_transaction_count { let item_scoping = ItemScoping { category: DataCategory::TransactionProcessed, - scoping: project.scoping(), + scoping, }; - let rate_limits = project + let mut rate_limits = project .rate_limits() .check_with_quotas(quotas.as_slice(), item_scoping); #[cfg(feature = "processing")] - if processing() && !rate_limits.is_limited() { + if processing_enabled && !rate_limits.is_limited() { // Await rate limits from redis before continuing. let res = EnvelopeProcessor::from_registry().send(CheckRateLimits { quotas, category: DataCategory::TransactionProcessed, - scoping, + scoping: *scoping, quantity: transaction_count, }); @@ -220,7 +222,7 @@ impl ProjectCache { rate_limits.merge(redis_rate_limits); // Also update our own cache: - project.merge_rate_limits(rate_limits); + project.merge_rate_limits(rate_limits.clone()); } if rate_limits.is_limited() { @@ -711,7 +713,7 @@ impl Handler for ProjectCache { impl Handler for ProjectCache { type Result = (); - fn handle(&mut self, message: FlushBuckets, _context: &mut Self::Context) -> Self::Result { + fn handle(&mut self, message: FlushBuckets, context: &mut Self::Context) -> Self::Result { let FlushBuckets { project_key, partition_key, @@ -754,16 +756,24 @@ impl Handler for ProjectCache { return; } - let buckets = self - .rate_limit_buckets(buckets, project, project_state.as_ref()) - .into_actor(); + let future = futures::compat::Compat::new(self.rate_limit_buckets( + buckets, + project, + project_state.as_ref(), + &scoping, + self.config.processing_enabled(), + )); + + let future = future.and_then(|buckets| { + if !buckets.is_empty() { + EnvelopeManager::from_registry().send(SendMetrics { + buckets, + scoping, + partition_key, + }); + } + }); - if !buckets.is_empty() { - EnvelopeManager::from_registry().send(SendMetrics { - buckets, - scoping, - partition_key, - }); - } + context.spawn(future); } } From afc84a411451f0632e711ea2140eaea9fe5f4f36 Mon Sep 17 00:00:00 2001 From: Joris Bayer Date: Tue, 11 Oct 2022 18:29:16 +0200 Subject: [PATCH 16/34] fix: Get async fn working --- relay-server/src/actors/project_cache.rs | 288 ++++++++++++----------- 1 file changed, 147 insertions(+), 141 deletions(-) diff --git a/relay-server/src/actors/project_cache.rs b/relay-server/src/actors/project_cache.rs index caac7219d92..2ef4762f1d1 100644 --- a/relay-server/src/actors/project_cache.rs +++ b/relay-server/src/actors/project_cache.rs @@ -2,8 +2,10 @@ use std::sync::Arc; use std::time::Instant; use actix::prelude::*; +use actix_web::http::header::CacheDirective; use actix_web::ResponseError; use failure::Fail; +use futures::{FutureExt, TryFuture, TryFutureExt}; use futures01::{future, Future}; #[cfg(feature = "processing")] @@ -50,6 +52,120 @@ pub enum ProjectError { impl ResponseError for ProjectError {} +/// Remove rate limited metrics buckets and track outcomes for them. +async fn rate_limit_buckets( + buckets: Vec, + quotas: Vec, + cached_rate_limits: &RateLimits, + scoping: Scoping, + processing_enabled: bool, +) -> Vec { + dbg!("DOING THE RATE LIMITING"); + + // Collect bucket information (bucket, transaction_count) + let mut annotated_buckets: Vec<_> = buckets + .into_iter() + .map(|bucket| { + let mri = match MetricResourceIdentifier::parse(bucket.name.as_str()) { + Ok(mri) => mri, + Err(_) => { + relay_log::error!("Invalid MRI: {}", bucket.name); + return (bucket, None); + } + }; + + // Keep all metrics that are not transaction related: + if mri.namespace != MetricNamespace::Transactions { + // Not limiting sessions for now. + return (bucket, None); + } + + if mri.name == "duration" { + // The "duration" metric is extracted exactly once for every processed transaction, + // so we can use it to count the number of transactions. + let count = bucket.value.len(); + (bucket, Some(count)) + } else { + // For any other metric in the transaction namespace, we check the limit with quantity=0 + // so transactions are not double counted against the quota. + (bucket, Some(0)) + } + }) + .collect(); + + // Accumulate the total transaction count: + let total_transaction_count = annotated_buckets.iter().fold( + None, + |acc, (_, transaction_count)| match transaction_count { + Some(count) => Some(acc.unwrap_or(0) + count), + None => acc, + }, + ); + + // Check rate limits if necessary: + if let Some(transaction_count) = total_transaction_count { + let item_scoping = ItemScoping { + category: DataCategory::TransactionProcessed, + scoping: &scoping, + }; + let mut rate_limits = cached_rate_limits.check_with_quotas(quotas.as_slice(), item_scoping); + + #[cfg(feature = "processing")] + if processing_enabled && !rate_limits.is_limited() { + // Await rate limits from redis before continuing. + let res = EnvelopeProcessor::from_registry().send(CheckRateLimits { + quotas, + category: DataCategory::TransactionProcessed, + scoping, + quantity: transaction_count, + }); + + let limits = res.await; + + let redis_rate_limits = match limits { + Ok(limits) => limits, + Err(_) => todo!(), + }; + + rate_limits.merge(redis_rate_limits); + + // Also update the in-memory cache: + ProjectCache::from_registry().do_send(UpdateRateLimits::new( + scoping.project_key, + rate_limits.clone(), + )); + } + + if rate_limits.is_limited() { + // Only keep non-transaction buckets: + annotated_buckets.retain(|(_, count)| count.is_none()); + + // Track outcome for the processed transactions we dropped here: + if transaction_count > 0 { + for limit in &rate_limits { + if limit + .categories + .contains(&DataCategory::TransactionProcessed) + { + TrackOutcome::from_registry().send(TrackOutcome { + timestamp: UnixTimestamp::now().as_datetime(), // as good as any timestamp + scoping: *item_scoping, + outcome: Outcome::RateLimited(limit.reason_code.clone()), + event_id: None, + remote_addr: None, + category: DataCategory::TransactionProcessed, + quantity: transaction_count as u32, + }); + } + } + } + } + } + + // Restore vector of buckets + annotated_buckets.into_iter().map(|(b, _)| b).collect() +} + pub struct ProjectCache { config: Arc, projects: hashbrown::HashMap, // need hashbrown because drain_filter is not stable in std yet @@ -131,129 +247,6 @@ impl ProjectCache { Project::new(project_key, config) }) } - - /// Remove rate limited metrics buckets and track outcomes for them. - async fn rate_limit_buckets( - &self, - buckets: Vec, - project: &mut Project, - project_state: &ProjectState, - scoping: &Scoping, - processing_enabled: bool, - ) -> Vec { - let quotas = project_state - .config - .quotas - .iter() - .filter(|q| q.categories.contains(&DataCategory::TransactionProcessed)) - .map(Quota::clone) - .collect::>(); - - // Collect bucket information (bucket, transaction_count) - let mut annotated_buckets: Vec<_> = buckets - .into_iter() - .map(|bucket| { - let mri = match MetricResourceIdentifier::parse(bucket.name.as_str()) { - Ok(mri) => mri, - Err(_) => { - relay_log::error!("Invalid MRI: {}", bucket.name); - return (bucket, None); - } - }; - - // Keep all metrics that are not transaction related: - if mri.namespace != MetricNamespace::Transactions { - // Not limiting sessions for now. - return (bucket, None); - } - - if mri.name == "duration" { - // The "duration" metric is extracted exactly once for every processed transaction, - // so we can use it to count the number of transactions. - let count = bucket.value.len(); - (bucket, Some(count)) - } else { - // For any other metric in the transaction namespace, we check the limit with quantity=0 - // so transactions are not double counted against the quota. - (bucket, Some(0)) - } - }) - .collect(); - - // Accumulate the total transaction count: - let total_transaction_count = - annotated_buckets - .iter() - .fold( - None, - |acc, (_, transaction_count)| match transaction_count { - Some(count) => Some(acc.unwrap_or(0) + count), - None => acc, - }, - ); - - // Check rate limits if necessary: - if let Some(transaction_count) = total_transaction_count { - let item_scoping = ItemScoping { - category: DataCategory::TransactionProcessed, - scoping, - }; - let mut rate_limits = project - .rate_limits() - .check_with_quotas(quotas.as_slice(), item_scoping); - - #[cfg(feature = "processing")] - if processing_enabled && !rate_limits.is_limited() { - // Await rate limits from redis before continuing. - let res = EnvelopeProcessor::from_registry().send(CheckRateLimits { - quotas, - category: DataCategory::TransactionProcessed, - scoping: *scoping, - quantity: transaction_count, - }); - - let limits = res.await; - - let redis_rate_limits = match limits { - Ok(limits) => limits, - Err(_) => todo!(), - }; - - rate_limits.merge(redis_rate_limits); - - // Also update our own cache: - project.merge_rate_limits(rate_limits.clone()); - } - - if rate_limits.is_limited() { - // Only keep non-transaction buckets: - annotated_buckets.retain(|(_, count)| count.is_none()); - - // Track outcome for the processed transactions we dropped here: - if transaction_count > 0 { - for limit in &rate_limits { - if limit - .categories - .contains(&DataCategory::TransactionProcessed) - { - TrackOutcome::from_registry().send(TrackOutcome { - timestamp: UnixTimestamp::now().as_datetime(), // as good as any timestamp - scoping: *item_scoping, - outcome: Outcome::RateLimited(limit.reason_code.clone()), - event_id: None, - remote_addr: None, - category: DataCategory::TransactionProcessed, - quantity: transaction_count as u32, - }); - } - } - } - } - } - - // Restore vector of buckets - annotated_buckets.into_iter().map(|(b, _)| b).collect() - } } impl Actor for ProjectCache { @@ -756,24 +749,37 @@ impl Handler for ProjectCache { return; } - let future = futures::compat::Compat::new(self.rate_limit_buckets( - buckets, - project, - project_state.as_ref(), - &scoping, - self.config.processing_enabled(), - )); - - let future = future.and_then(|buckets| { - if !buckets.is_empty() { - EnvelopeManager::from_registry().send(SendMetrics { - buckets, - scoping, - partition_key, - }); - } - }); + // Check rate limits: + let quotas = project_state + .config + .quotas + .iter() + .filter(|q| q.categories.contains(&DataCategory::TransactionProcessed)) + .map(Quota::clone) + .collect::>(); + + let processing_enabled = config.processing_enabled(); + let rate_limits = project.rate_limits().clone(); + let future = async move { + rate_limit_buckets(buckets, quotas, &rate_limits, scoping, processing_enabled).await; + Ok(()) + }; + + let future = future.boxed(); + + let future = future.compat(); + + // let future = future.and_then(|buckets| { + // if !buckets.is_empty() { + // EnvelopeManager::from_registry().send(SendMetrics { + // buckets, + // scoping, + // partition_key, + // }); + // } + // }); - context.spawn(future); + let actor_future = future.into_actor(self); + actor_future.spawn(context); } } From af5b358dfdd66dc9e1448f2a432994b737f53381 Mon Sep 17 00:00:00 2001 From: Joris Bayer Date: Tue, 11 Oct 2022 18:33:20 +0200 Subject: [PATCH 17/34] fix: Restore sending of buckets --- relay-server/src/actors/project_cache.rs | 27 ++++++++++++------------ 1 file changed, 14 insertions(+), 13 deletions(-) diff --git a/relay-server/src/actors/project_cache.rs b/relay-server/src/actors/project_cache.rs index 2ef4762f1d1..3563774e194 100644 --- a/relay-server/src/actors/project_cache.rs +++ b/relay-server/src/actors/project_cache.rs @@ -60,8 +60,6 @@ async fn rate_limit_buckets( scoping: Scoping, processing_enabled: bool, ) -> Vec { - dbg!("DOING THE RATE LIMITING"); - // Collect bucket information (bucket, transaction_count) let mut annotated_buckets: Vec<_> = buckets .into_iter() @@ -761,23 +759,26 @@ impl Handler for ProjectCache { let processing_enabled = config.processing_enabled(); let rate_limits = project.rate_limits().clone(); let future = async move { - rate_limit_buckets(buckets, quotas, &rate_limits, scoping, processing_enabled).await; - Ok(()) + let buckets = + rate_limit_buckets(buckets, quotas, &rate_limits, scoping, processing_enabled) + .await; + Ok(buckets) }; let future = future.boxed(); let future = future.compat(); - // let future = future.and_then(|buckets| { - // if !buckets.is_empty() { - // EnvelopeManager::from_registry().send(SendMetrics { - // buckets, - // scoping, - // partition_key, - // }); - // } - // }); + let future = future.and_then(move |buckets| { + if !buckets.is_empty() { + EnvelopeManager::from_registry().send(SendMetrics { + buckets, + scoping, + partition_key, + }); + } + Ok(()) + }); let actor_future = future.into_actor(self); actor_future.spawn(context); From 4061ad518e385b1c45a9e509cf605dbe131a73bc Mon Sep 17 00:00:00 2001 From: Joris Bayer Date: Tue, 11 Oct 2022 18:39:10 +0200 Subject: [PATCH 18/34] ref: function name, docs, respect category arg --- relay-server/src/actors/processor.rs | 21 +++++++-------------- 1 file changed, 7 insertions(+), 14 deletions(-) diff --git a/relay-server/src/actors/processor.rs b/relay-server/src/actors/processor.rs index 151aef803ce..e45e4f5bc7b 100644 --- a/relay-server/src/actors/processor.rs +++ b/relay-server/src/actors/processor.rs @@ -518,9 +518,7 @@ impl EncodeEnvelope { } } -/// Enforce rate limits on metrics buckets, and forward them to the EnvelopeManager if there -/// is still enough quota. -/// +/// TODO: docs #[cfg(feature = "processing")] #[derive(Debug)] pub struct CheckRateLimits { @@ -534,9 +532,6 @@ pub struct CheckRateLimits { pub quantity: usize, } -/// Enforce rate limits on a metrics bucket and forward it to EnvelopeManager -/// to be published. - /// CPU-intensive processing tasks for envelopes. #[derive(Debug)] pub enum EnvelopeProcessor { @@ -2196,9 +2191,9 @@ impl EnvelopeProcessorService { } } - /// Returns `true` if a rate limit is active. + /// Returns redis rate limits. #[cfg(feature = "processing")] - fn handle_rate_limit_metrics_buckets(&self, message: CheckRateLimits) -> RateLimits { + fn handle_check_rate_limits(&self, message: CheckRateLimits) -> RateLimits { use relay_quotas::ItemScoping; let CheckRateLimits { @@ -2217,16 +2212,14 @@ impl EnvelopeProcessorService { }; let item_scoping = ItemScoping { - category: DataCategory::TransactionProcessed, + category, scoping: &scoping, }; - let limits = match rate_limiter.is_rate_limited("as, item_scoping, quantity) { + match rate_limiter.is_rate_limited("as, item_scoping, quantity) { Ok(limits) => limits, Err(_) => todo!(), - }; - - limits + } } fn encode_envelope_body( @@ -2278,7 +2271,7 @@ impl EnvelopeProcessorService { EnvelopeProcessor::EncodeEnvelope(message) => self.handle_encode_envelope(*message), #[cfg(feature = "processing")] EnvelopeProcessor::CheckRateLimits(message, sender) => { - sender.send(self.handle_rate_limit_metrics_buckets(message)); + sender.send(self.handle_check_rate_limits(message)); } } } From e8fa434d3ffd2f5d1daa98b5f828212edaf52dd1 Mon Sep 17 00:00:00 2001 From: Joris Bayer Date: Wed, 12 Oct 2022 10:07:17 +0200 Subject: [PATCH 19/34] ref: Replace async block by plain function call --- relay-server/src/actors/processor.rs | 27 ++++++------ relay-server/src/actors/project_cache.rs | 56 +++++++++++------------- 2 files changed, 38 insertions(+), 45 deletions(-) diff --git a/relay-server/src/actors/processor.rs b/relay-server/src/actors/processor.rs index e45e4f5bc7b..90abbf38758 100644 --- a/relay-server/src/actors/processor.rs +++ b/relay-server/src/actors/processor.rs @@ -16,6 +16,19 @@ use once_cell::sync::OnceCell; use serde_json::Value as SerdeValue; use tokio::sync::Semaphore; +use crate::actors::envelopes::{EnvelopeManager, SendEnvelope, SendEnvelopeError, SubmitEnvelope}; +use crate::actors::outcome::{DiscardReason, Outcome, TrackOutcome}; +use crate::actors::project::{Feature, ProjectState}; +use crate::actors::project_cache::ProjectCache; +use crate::actors::upstream::{SendRequest, UpstreamRelay}; +use crate::envelope::{AttachmentType, ContentType, Envelope, Item, ItemType}; +use crate::metrics_extraction::sessions::{extract_session_metrics, SessionMetricsConfig}; +use crate::metrics_extraction::transactions::extract_transaction_metrics; +use crate::service::{ServerError, REGISTRY}; +use crate::statsd::{RelayCounters, RelayHistograms, RelayTimers}; +use crate::utils::{ + self, ChunkedFormDataAggregator, EnvelopeContext, ErrorBoundary, FormDataIter, SamplingResult, +}; use relay_auth::RelayVersion; use relay_common::{ProjectId, ProjectKey, UnixTimestamp}; use relay_config::{Config, HttpEncoding}; @@ -40,20 +53,6 @@ use relay_sampling::{DynamicSamplingContext, RuleId}; use relay_statsd::metric; use relay_system::{Addr, AsyncResponse, FromMessage, NoResponse, Sender, Service}; -use crate::actors::envelopes::{EnvelopeManager, SendEnvelope, SendEnvelopeError, SubmitEnvelope}; -use crate::actors::outcome::{DiscardReason, Outcome, TrackOutcome}; -use crate::actors::project::{Feature, ProjectState}; -use crate::actors::project_cache::ProjectCache; -use crate::actors::upstream::{SendRequest, UpstreamRelay}; -use crate::envelope::{AttachmentType, ContentType, Envelope, Item, ItemType}; -use crate::metrics_extraction::sessions::{extract_session_metrics, SessionMetricsConfig}; -use crate::metrics_extraction::transactions::extract_transaction_metrics; -use crate::service::{ServerError, REGISTRY}; -use crate::statsd::{RelayCounters, RelayHistograms, RelayTimers}; -use crate::utils::{ - self, ChunkedFormDataAggregator, EnvelopeContext, ErrorBoundary, FormDataIter, SamplingResult, -}; - #[cfg(feature = "processing")] use { crate::actors::project_cache::UpdateRateLimits, diff --git a/relay-server/src/actors/project_cache.rs b/relay-server/src/actors/project_cache.rs index 3563774e194..921e637f8b5 100644 --- a/relay-server/src/actors/project_cache.rs +++ b/relay-server/src/actors/project_cache.rs @@ -2,15 +2,14 @@ use std::sync::Arc; use std::time::Instant; use actix::prelude::*; -use actix_web::http::header::CacheDirective; use actix_web::ResponseError; use failure::Fail; -use futures::{FutureExt, TryFuture, TryFutureExt}; +// use futures::{FutureExt, TryFuture, TryFutureExt}; use futures01::{future, Future}; #[cfg(feature = "processing")] -use relay_common::{clone, DataCategory}; -use relay_common::{ProjectKey, UnixTimestamp}; +use relay_common::clone; +use relay_common::{DataCategory, ProjectKey, UnixTimestamp}; use relay_config::{Config, RelayMode}; use relay_metrics::{ self, AggregateMetricsError, Bucket, FlushBuckets, InsertMetrics, MergeBuckets, @@ -53,13 +52,15 @@ pub enum ProjectError { impl ResponseError for ProjectError {} /// Remove rate limited metrics buckets and track outcomes for them. +/// +/// Returns any buckets that were *not* rate limited. async fn rate_limit_buckets( buckets: Vec, quotas: Vec, - cached_rate_limits: &RateLimits, + cached_rate_limits: RateLimits, scoping: Scoping, - processing_enabled: bool, -) -> Vec { + check_redis: bool, +) -> Result, ()> { // Collect bucket information (bucket, transaction_count) let mut annotated_buckets: Vec<_> = buckets .into_iter() @@ -109,7 +110,7 @@ async fn rate_limit_buckets( let mut rate_limits = cached_rate_limits.check_with_quotas(quotas.as_slice(), item_scoping); #[cfg(feature = "processing")] - if processing_enabled && !rate_limits.is_limited() { + if check_redis && !rate_limits.is_limited() { // Await rate limits from redis before continuing. let res = EnvelopeProcessor::from_registry().send(CheckRateLimits { quotas, @@ -161,7 +162,7 @@ async fn rate_limit_buckets( } // Restore vector of buckets - annotated_buckets.into_iter().map(|(b, _)| b).collect() + Ok(annotated_buckets.into_iter().map(|(b, _)| b).collect()) } pub struct ProjectCache { @@ -755,30 +756,23 @@ impl Handler for ProjectCache { .filter(|q| q.categories.contains(&DataCategory::TransactionProcessed)) .map(Quota::clone) .collect::>(); - let processing_enabled = config.processing_enabled(); let rate_limits = project.rate_limits().clone(); - let future = async move { - let buckets = - rate_limit_buckets(buckets, quotas, &rate_limits, scoping, processing_enabled) - .await; - Ok(buckets) - }; - - let future = future.boxed(); - - let future = future.compat(); - - let future = future.and_then(move |buckets| { - if !buckets.is_empty() { - EnvelopeManager::from_registry().send(SendMetrics { - buckets, - scoping, - partition_key, - }); - } - Ok(()) - }); + use futures::{FutureExt, TryFutureExt}; + let future = rate_limit_buckets(buckets, quotas, rate_limits, scoping, processing_enabled) + .boxed() + .compat() + .and_then(move |buckets| { + // After checking rate limits, send buckets to envelope manager: + if !buckets.is_empty() { + EnvelopeManager::from_registry().send(SendMetrics { + buckets, + scoping, + partition_key, + }); + } + Ok(()) + }); let actor_future = future.into_actor(self); actor_future.spawn(context); From 9302fac7c7ae4cfad2882f4884bcd07b7c620af1 Mon Sep 17 00:00:00 2001 From: Joris Bayer Date: Wed, 12 Oct 2022 10:57:48 +0200 Subject: [PATCH 20/34] ref: Track internal outcomes for errors --- relay-server/src/actors/processor.rs | 24 ++-- relay-server/src/actors/project_cache.rs | 133 +++++++++++++++-------- 2 files changed, 100 insertions(+), 57 deletions(-) diff --git a/relay-server/src/actors/processor.rs b/relay-server/src/actors/processor.rs index 90abbf38758..313fb156810 100644 --- a/relay-server/src/actors/processor.rs +++ b/relay-server/src/actors/processor.rs @@ -538,7 +538,10 @@ pub enum EnvelopeProcessor { ProcessMetrics(Box), EncodeEnvelope(Box), #[cfg(feature = "processing")] - CheckRateLimits(CheckRateLimits, Sender), // TODO: why are the others Box<_>? + CheckRateLimits( + CheckRateLimits, + Sender>, + ), // TODO: why are the others Box<_>? } impl EnvelopeProcessor { @@ -575,9 +578,12 @@ impl FromMessage for EnvelopeProcessor { #[cfg(feature = "processing")] impl FromMessage for EnvelopeProcessor { - type Response = AsyncResponse; + type Response = AsyncResponse>; - fn from_message(message: CheckRateLimits, sender: Sender) -> Self { + fn from_message( + message: CheckRateLimits, + sender: Sender>, + ) -> Self { Self::CheckRateLimits(message, sender) } } @@ -2192,7 +2198,10 @@ impl EnvelopeProcessorService { /// Returns redis rate limits. #[cfg(feature = "processing")] - fn handle_check_rate_limits(&self, message: CheckRateLimits) -> RateLimits { + fn handle_check_rate_limits( + &self, + message: CheckRateLimits, + ) -> Result { use relay_quotas::ItemScoping; let CheckRateLimits { @@ -2206,7 +2215,7 @@ impl EnvelopeProcessorService { Some(rate_limiter) => rate_limiter, None => { relay_log::error!("handle_rate_limit_metrics_buckets called without rate limiter"); - return RateLimits::new(); // empty + return Ok(RateLimits::new()); // empty } }; @@ -2215,10 +2224,7 @@ impl EnvelopeProcessorService { scoping: &scoping, }; - match rate_limiter.is_rate_limited("as, item_scoping, quantity) { - Ok(limits) => limits, - Err(_) => todo!(), - } + rate_limiter.is_rate_limited("as, item_scoping, quantity) } fn encode_envelope_body( diff --git a/relay-server/src/actors/project_cache.rs b/relay-server/src/actors/project_cache.rs index 921e637f8b5..3860ff14aae 100644 --- a/relay-server/src/actors/project_cache.rs +++ b/relay-server/src/actors/project_cache.rs @@ -51,6 +51,48 @@ pub enum ProjectError { impl ResponseError for ProjectError {} +#[cfg(feature = "processing")] +async fn check_rate_limits( + transaction_count: usize, + cached_rate_limits: &RateLimits, + quotas: Vec, + item_scoping: ItemScoping<'_>, + check_redis: bool, +) -> Result { + let scoping = *item_scoping.scoping; + let mut rate_limits = cached_rate_limits.check_with_quotas(quotas.as_slice(), item_scoping); + + if check_redis && !rate_limits.is_limited() { + // Await rate limits from redis before continuing. + let request = EnvelopeProcessor::from_registry().send(CheckRateLimits { + quotas, + category: DataCategory::TransactionProcessed, + scoping, + quantity: transaction_count, + }); + + let redis_rate_limits = match request.await { + Ok(Ok(limits)) => limits, + Err(_) | Ok(Err(_)) => { + // There was either a SendError or a problem with redis. Track outcomes and + // drop all buckets. + return Err(Outcome::Invalid(DiscardReason::Internal)); + // TODO: track outcomes + } + }; + + rate_limits.merge(redis_rate_limits); + + // Also update the in-memory cache: + ProjectCache::from_registry().do_send(UpdateRateLimits::new( + scoping.project_key, + rate_limits.clone(), + )); + } + + Ok(rate_limits) +} + /// Remove rate limited metrics buckets and track outcomes for them. /// /// Returns any buckets that were *not* rate limited. @@ -107,58 +149,53 @@ async fn rate_limit_buckets( category: DataCategory::TransactionProcessed, scoping: &scoping, }; - let mut rate_limits = cached_rate_limits.check_with_quotas(quotas.as_slice(), item_scoping); - #[cfg(feature = "processing")] - if check_redis && !rate_limits.is_limited() { - // Await rate limits from redis before continuing. - let res = EnvelopeProcessor::from_registry().send(CheckRateLimits { - quotas, - category: DataCategory::TransactionProcessed, - scoping, - quantity: transaction_count, - }); - - let limits = res.await; - - let redis_rate_limits = match limits { - Ok(limits) => limits, - Err(_) => todo!(), - }; - - rate_limits.merge(redis_rate_limits); - - // Also update the in-memory cache: - ProjectCache::from_registry().do_send(UpdateRateLimits::new( - scoping.project_key, - rate_limits.clone(), - )); - } - - if rate_limits.is_limited() { - // Only keep non-transaction buckets: - annotated_buckets.retain(|(_, count)| count.is_none()); - - // Track outcome for the processed transactions we dropped here: - if transaction_count > 0 { - for limit in &rate_limits { - if limit - .categories - .contains(&DataCategory::TransactionProcessed) - { - TrackOutcome::from_registry().send(TrackOutcome { - timestamp: UnixTimestamp::now().as_datetime(), // as good as any timestamp - scoping: *item_scoping, - outcome: Outcome::RateLimited(limit.reason_code.clone()), - event_id: None, - remote_addr: None, - category: DataCategory::TransactionProcessed, - quantity: transaction_count as u32, - }); + let rate_limits = check_rate_limits( + transaction_count, + &cached_rate_limits, + quotas, + item_scoping, + check_redis, + ) + .await; + + match rate_limits { + Ok(rate_limits) => { + // Only keep non-transaction buckets: + annotated_buckets.retain(|(_, count)| count.is_none()); + + // Track outcome for the processed transactions we dropped here: + if transaction_count > 0 { + for limit in &rate_limits { + if limit + .categories + .contains(&DataCategory::TransactionProcessed) + { + TrackOutcome::from_registry().send(TrackOutcome { + timestamp: UnixTimestamp::now().as_datetime(), // as good as any timestamp + scoping: *item_scoping, + outcome: Outcome::RateLimited(limit.reason_code.clone()), + event_id: None, + remote_addr: None, + category: DataCategory::TransactionProcessed, + quantity: transaction_count as u32, + }); + } } } } - } + Err(outcome) => { + TrackOutcome::from_registry().send(TrackOutcome { + timestamp: UnixTimestamp::now().as_datetime(), // as good as any timestamp + scoping: *item_scoping, + outcome, + event_id: None, + remote_addr: None, + category: DataCategory::TransactionProcessed, + quantity: transaction_count as u32, + }); + } + }; } // Restore vector of buckets From e66b46d2a433e2a604ebffea3d43bf2fbe76fe4f Mon Sep 17 00:00:00 2001 From: Joris Bayer Date: Wed, 12 Oct 2022 12:52:06 +0200 Subject: [PATCH 21/34] fix: Actually check rate limits --- relay-server/src/actors/project_cache.rs | 44 +++++++++++++----------- tests/integration/test_store.py | 6 +++- 2 files changed, 28 insertions(+), 22 deletions(-) diff --git a/relay-server/src/actors/project_cache.rs b/relay-server/src/actors/project_cache.rs index 3860ff14aae..953af70e97f 100644 --- a/relay-server/src/actors/project_cache.rs +++ b/relay-server/src/actors/project_cache.rs @@ -51,7 +51,6 @@ pub enum ProjectError { impl ResponseError for ProjectError {} -#[cfg(feature = "processing")] async fn check_rate_limits( transaction_count: usize, cached_rate_limits: &RateLimits, @@ -62,6 +61,7 @@ async fn check_rate_limits( let scoping = *item_scoping.scoping; let mut rate_limits = cached_rate_limits.check_with_quotas(quotas.as_slice(), item_scoping); + #[cfg(feature = "processing")] if check_redis && !rate_limits.is_limited() { // Await rate limits from redis before continuing. let request = EnvelopeProcessor::from_registry().send(CheckRateLimits { @@ -77,7 +77,6 @@ async fn check_rate_limits( // There was either a SendError or a problem with redis. Track outcomes and // drop all buckets. return Err(Outcome::Invalid(DiscardReason::Internal)); - // TODO: track outcomes } }; @@ -161,25 +160,28 @@ async fn rate_limit_buckets( match rate_limits { Ok(rate_limits) => { - // Only keep non-transaction buckets: - annotated_buckets.retain(|(_, count)| count.is_none()); - - // Track outcome for the processed transactions we dropped here: - if transaction_count > 0 { - for limit in &rate_limits { - if limit - .categories - .contains(&DataCategory::TransactionProcessed) - { - TrackOutcome::from_registry().send(TrackOutcome { - timestamp: UnixTimestamp::now().as_datetime(), // as good as any timestamp - scoping: *item_scoping, - outcome: Outcome::RateLimited(limit.reason_code.clone()), - event_id: None, - remote_addr: None, - category: DataCategory::TransactionProcessed, - quantity: transaction_count as u32, - }); + // If a rate limit is active, discard transaction buckets. + if rate_limits.is_limited() { + annotated_buckets.retain(|(_, count)| count.is_none()); + + // Track outcome for the processed transactions we dropped here: + if transaction_count > 0 { + // TODO: Should we really loop here? That creates multiple outcomes for the same bucket. + for limit in &rate_limits { + if limit + .categories + .contains(&DataCategory::TransactionProcessed) + { + TrackOutcome::from_registry().send(TrackOutcome { + timestamp: UnixTimestamp::now().as_datetime(), // as good as any timestamp + scoping: *item_scoping, + outcome: Outcome::RateLimited(limit.reason_code.clone()), + event_id: None, + remote_addr: None, + category: DataCategory::TransactionProcessed, + quantity: transaction_count as u32, + }); + } } } } diff --git a/tests/integration/test_store.py b/tests/integration/test_store.py index 6569dab3f0b..90ae2ae4b5d 100644 --- a/tests/integration/test_store.py +++ b/tests/integration/test_store.py @@ -590,7 +590,11 @@ def send_buckets(buckets): ) # Expect 2 of 9 buckets to be dropped: - produced_buckets = [metrics_consumer.get_metric(timeout=1) for _ in range(7)] + num_expected_buckets = 7 + + produced_buckets = [ + metrics_consumer.get_metric(timeout=1) for _ in range(num_expected_buckets) + ] metrics_consumer.assert_empty() # Sort buckets to prevent ordering flakiness: From 36393352852748f8c40fe4f8919d890d9c9462a6 Mon Sep 17 00:00:00 2001 From: Joris Bayer Date: Wed, 12 Oct 2022 14:42:04 +0200 Subject: [PATCH 22/34] fix: Don't loop over rate limits --- relay-server/src/actors/processor.rs | 3 +- relay-server/src/actors/project_cache.rs | 35 ++++++++++-------------- 2 files changed, 16 insertions(+), 22 deletions(-) diff --git a/relay-server/src/actors/processor.rs b/relay-server/src/actors/processor.rs index 313fb156810..686180e9b8a 100644 --- a/relay-server/src/actors/processor.rs +++ b/relay-server/src/actors/processor.rs @@ -51,7 +51,7 @@ use relay_quotas::{Quota, RateLimits, Scoping}; use relay_redis::RedisPool; use relay_sampling::{DynamicSamplingContext, RuleId}; use relay_statsd::metric; -use relay_system::{Addr, AsyncResponse, FromMessage, NoResponse, Sender, Service}; +use relay_system::{Addr, FromMessage, NoResponse, Service}; #[cfg(feature = "processing")] use { @@ -61,6 +61,7 @@ use { failure::ResultExt, relay_general::store::{GeoIpLookup, StoreConfig, StoreProcessor}, relay_quotas::{RateLimitingError, RedisRateLimiter}, + relay_system::{AsyncResponse, Sender}, symbolic_unreal::{Unreal4Error, Unreal4ErrorKind}, }; diff --git a/relay-server/src/actors/project_cache.rs b/relay-server/src/actors/project_cache.rs index 953af70e97f..e2e9c016923 100644 --- a/relay-server/src/actors/project_cache.rs +++ b/relay-server/src/actors/project_cache.rs @@ -16,9 +16,7 @@ use relay_metrics::{ MetricNamespace, MetricResourceIdentifier, }; -#[cfg(feature = "processing")] -use relay_quotas::Quota; -use relay_quotas::{ItemScoping, RateLimits, Scoping}; +use relay_quotas::{ItemScoping, Quota, RateLimits, Scoping}; use relay_redis::RedisPool; use relay_statsd::metric; @@ -161,28 +159,23 @@ async fn rate_limit_buckets( match rate_limits { Ok(rate_limits) => { // If a rate limit is active, discard transaction buckets. - if rate_limits.is_limited() { + if let Some(limit) = rate_limits.into_iter().find(|r| { + !r.retry_after.expired() + && r.categories.contains(&DataCategory::TransactionProcessed) + }) { annotated_buckets.retain(|(_, count)| count.is_none()); // Track outcome for the processed transactions we dropped here: if transaction_count > 0 { - // TODO: Should we really loop here? That creates multiple outcomes for the same bucket. - for limit in &rate_limits { - if limit - .categories - .contains(&DataCategory::TransactionProcessed) - { - TrackOutcome::from_registry().send(TrackOutcome { - timestamp: UnixTimestamp::now().as_datetime(), // as good as any timestamp - scoping: *item_scoping, - outcome: Outcome::RateLimited(limit.reason_code.clone()), - event_id: None, - remote_addr: None, - category: DataCategory::TransactionProcessed, - quantity: transaction_count as u32, - }); - } - } + TrackOutcome::from_registry().send(TrackOutcome { + timestamp: UnixTimestamp::now().as_datetime(), // as good as any timestamp + scoping: *item_scoping, + outcome: Outcome::RateLimited(limit.reason_code.clone()), + event_id: None, + remote_addr: None, + category: DataCategory::TransactionProcessed, + quantity: transaction_count as u32, + }); } } } From 650f0f4a328116dd9185b62fa60dba3273adb496 Mon Sep 17 00:00:00 2001 From: Joris Bayer Date: Wed, 12 Oct 2022 14:48:34 +0200 Subject: [PATCH 23/34] ref: Add helper to RateLimits --- relay-quotas/src/rate_limit.rs | 8 +++++++- relay-server/src/actors/project_cache.rs | 5 +---- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/relay-quotas/src/rate_limit.rs b/relay-quotas/src/rate_limit.rs index bd6225714d4..677586b1396 100644 --- a/relay-quotas/src/rate_limit.rs +++ b/relay-quotas/src/rate_limit.rs @@ -2,7 +2,7 @@ use std::fmt; use std::str::FromStr; use std::time::{Duration, Instant}; -use relay_common::{ProjectId, ProjectKey}; +use relay_common::{DataCategory, ProjectId, ProjectKey}; use crate::quota::{DataCategories, ItemScoping, Quota, QuotaScope, ReasonCode, Scoping}; use crate::REJECT_ALL_SECS; @@ -235,6 +235,12 @@ impl RateLimits { self.iter().any(|limit| !limit.retry_after.expired()) } + /// Returns `true` if this instance contains active rate limits for the given data category. + pub fn limit_for(&self, category: DataCategory) -> Option<&RateLimit> { + self.iter() + .find(|limit| !limit.retry_after.expired() && limit.categories.contains(&category)) + } + /// Removes expired rate limits from this instance. pub fn clean_expired(&mut self) { self.limits.retain(|limit| !limit.retry_after.expired()); diff --git a/relay-server/src/actors/project_cache.rs b/relay-server/src/actors/project_cache.rs index e2e9c016923..d1edeb3554f 100644 --- a/relay-server/src/actors/project_cache.rs +++ b/relay-server/src/actors/project_cache.rs @@ -159,10 +159,7 @@ async fn rate_limit_buckets( match rate_limits { Ok(rate_limits) => { // If a rate limit is active, discard transaction buckets. - if let Some(limit) = rate_limits.into_iter().find(|r| { - !r.retry_after.expired() - && r.categories.contains(&DataCategory::TransactionProcessed) - }) { + if let Some(limit) = rate_limits.limit_for(DataCategory::TransactionProcessed) { annotated_buckets.retain(|(_, count)| count.is_none()); // Track outcome for the processed transactions we dropped here: From 1cabbb1b9419422b19d90c5c5953bf5fb9bab572 Mon Sep 17 00:00:00 2001 From: Joris Bayer Date: Wed, 12 Oct 2022 15:14:12 +0200 Subject: [PATCH 24/34] ref: Update some comments --- relay-server/src/actors/processor.rs | 5 ++--- relay-server/src/actors/project.rs | 1 - relay-server/src/endpoints/common.rs | 1 + 3 files changed, 3 insertions(+), 4 deletions(-) diff --git a/relay-server/src/actors/processor.rs b/relay-server/src/actors/processor.rs index 686180e9b8a..34e7f7ffd3e 100644 --- a/relay-server/src/actors/processor.rs +++ b/relay-server/src/actors/processor.rs @@ -517,8 +517,7 @@ impl EncodeEnvelope { Self { request } } } - -/// TODO: docs +/// Check rate limits and increment the quota. #[cfg(feature = "processing")] #[derive(Debug)] pub struct CheckRateLimits { @@ -542,7 +541,7 @@ pub enum EnvelopeProcessor { CheckRateLimits( CheckRateLimits, Sender>, - ), // TODO: why are the others Box<_>? + ), } impl EnvelopeProcessor { diff --git a/relay-server/src/actors/project.rs b/relay-server/src/actors/project.rs index f269c9ac276..e19d3513c35 100644 --- a/relay-server/src/actors/project.rs +++ b/relay-server/src/actors/project.rs @@ -617,7 +617,6 @@ impl Project { /// The metrics will be keyed underneath this project key. pub fn insert_metrics(&mut self, metrics: Vec) { // TODO: rate limits - // TODO: code sharing for incoming buckets, incoming metrics, outgoing buckets if self.metrics_allowed() { Registry::aggregator().send(InsertMetrics::new(self.project_key, metrics)); } diff --git a/relay-server/src/endpoints/common.rs b/relay-server/src/endpoints/common.rs index 64e8feaa310..8cf3589ed6e 100644 --- a/relay-server/src/endpoints/common.rs +++ b/relay-server/src/endpoints/common.rs @@ -282,6 +282,7 @@ fn queue_envelope( if !metric_items.is_empty() { relay_log::trace!("sending metrics into processing queue"); + EnvelopeProcessor::from_registry().send(ProcessMetrics { items: metric_items, project_key: envelope.meta().public_key(), From 7494af07353bcf8b985371790f169db825fe02b3 Mon Sep 17 00:00:00 2001 From: Joris Bayer Date: Wed, 12 Oct 2022 16:49:44 +0200 Subject: [PATCH 25/34] fix: lint --- relay-server/src/actors/project_cache.rs | 2 ++ relay-server/src/endpoints/common.rs | 1 - 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/relay-server/src/actors/project_cache.rs b/relay-server/src/actors/project_cache.rs index d1edeb3554f..590d22fb96d 100644 --- a/relay-server/src/actors/project_cache.rs +++ b/relay-server/src/actors/project_cache.rs @@ -49,6 +49,8 @@ pub enum ProjectError { impl ResponseError for ProjectError {} +// Helper function for `rate_limit_buckets`. +#[cfg_attr(not(feature = "processing"), allow(unused_mut, unused_variables))] async fn check_rate_limits( transaction_count: usize, cached_rate_limits: &RateLimits, diff --git a/relay-server/src/endpoints/common.rs b/relay-server/src/endpoints/common.rs index 8cf3589ed6e..64e8feaa310 100644 --- a/relay-server/src/endpoints/common.rs +++ b/relay-server/src/endpoints/common.rs @@ -282,7 +282,6 @@ fn queue_envelope( if !metric_items.is_empty() { relay_log::trace!("sending metrics into processing queue"); - EnvelopeProcessor::from_registry().send(ProcessMetrics { items: metric_items, project_key: envelope.meta().public_key(), From 00331f3f8ab2a10fd0372bfd1deb7741b68cd5a9 Mon Sep 17 00:00:00 2001 From: Joris Bayer Date: Wed, 12 Oct 2022 16:50:11 +0200 Subject: [PATCH 26/34] fix: Drop buckets on internal error --- relay-server/src/actors/project_cache.rs | 58 +++++++++++++++--------- 1 file changed, 36 insertions(+), 22 deletions(-) diff --git a/relay-server/src/actors/project_cache.rs b/relay-server/src/actors/project_cache.rs index 590d22fb96d..32f1ac691f1 100644 --- a/relay-server/src/actors/project_cache.rs +++ b/relay-server/src/actors/project_cache.rs @@ -92,6 +92,30 @@ async fn check_rate_limits( Ok(rate_limits) } +// Helper function for `rate_limit_buckets`. +fn drop_with_outcome( + annotated_buckets: &mut Vec<(Bucket, Option)>, + transaction_count: usize, + outcome: Outcome, + scoping: Scoping, +) { + // Drop transaction buckets: + annotated_buckets.retain(|(_, count)| count.is_none()); + + // Track outcome for the processed transactions we dropped here: + if transaction_count > 0 { + TrackOutcome::from_registry().send(TrackOutcome { + timestamp: UnixTimestamp::now().as_datetime(), // as good as any timestamp + scoping, + outcome, + event_id: None, + remote_addr: None, + category: DataCategory::TransactionProcessed, + quantity: transaction_count as u32, + }); + } +} + /// Remove rate limited metrics buckets and track outcomes for them. /// /// Returns any buckets that were *not* rate limited. @@ -162,32 +186,22 @@ async fn rate_limit_buckets( Ok(rate_limits) => { // If a rate limit is active, discard transaction buckets. if let Some(limit) = rate_limits.limit_for(DataCategory::TransactionProcessed) { - annotated_buckets.retain(|(_, count)| count.is_none()); - - // Track outcome for the processed transactions we dropped here: - if transaction_count > 0 { - TrackOutcome::from_registry().send(TrackOutcome { - timestamp: UnixTimestamp::now().as_datetime(), // as good as any timestamp - scoping: *item_scoping, - outcome: Outcome::RateLimited(limit.reason_code.clone()), - event_id: None, - remote_addr: None, - category: DataCategory::TransactionProcessed, - quantity: transaction_count as u32, - }); - } + drop_with_outcome( + &mut annotated_buckets, + transaction_count, + Outcome::RateLimited(limit.reason_code.clone()), + *item_scoping, + ); } } Err(outcome) => { - TrackOutcome::from_registry().send(TrackOutcome { - timestamp: UnixTimestamp::now().as_datetime(), // as good as any timestamp - scoping: *item_scoping, + // Error from rate limiter, drop transaction buckets. + drop_with_outcome( + &mut annotated_buckets, + transaction_count, outcome, - event_id: None, - remote_addr: None, - category: DataCategory::TransactionProcessed, - quantity: transaction_count as u32, - }); + *item_scoping, + ); } }; } From 157ce04cfe67119439c6084cf6faaf51a539be19 Mon Sep 17 00:00:00 2001 From: Joris Bayer Date: Thu, 13 Oct 2022 10:19:21 +0200 Subject: [PATCH 27/34] fix: Call rate limiter with over_accept_once --- relay-server/src/actors/processor.rs | 6 +++++- relay-server/src/actors/project_cache.rs | 4 ++++ 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/relay-server/src/actors/processor.rs b/relay-server/src/actors/processor.rs index 4fd215d6146..da953588b2b 100644 --- a/relay-server/src/actors/processor.rs +++ b/relay-server/src/actors/processor.rs @@ -529,6 +529,9 @@ pub struct CheckRateLimits { pub scoping: Scoping, /// By how much the quota usage is incremented. pub quantity: usize, + /// Do we accept the first call that goes over the rate limit? + /// Useful to ensure that subsequent calls with quantity=0 will be rejected. + pub over_accept_once: bool, } /// CPU-intensive processing tasks for envelopes. @@ -2209,6 +2212,7 @@ impl EnvelopeProcessorService { category, scoping, quantity, + over_accept_once, } = message; let rate_limiter = match self.rate_limiter.as_ref() { @@ -2224,7 +2228,7 @@ impl EnvelopeProcessorService { scoping: &scoping, }; - rate_limiter.is_rate_limited("as, item_scoping, quantity) + rate_limiter.is_rate_limited("as, item_scoping, quantity, over_accept_once) } fn encode_envelope_body( diff --git a/relay-server/src/actors/project_cache.rs b/relay-server/src/actors/project_cache.rs index 32f1ac691f1..76cd5e11cc9 100644 --- a/relay-server/src/actors/project_cache.rs +++ b/relay-server/src/actors/project_cache.rs @@ -69,6 +69,10 @@ async fn check_rate_limits( category: DataCategory::TransactionProcessed, scoping, quantity: transaction_count, + // Because we call this function with transaction_count=0 (for metrics other than + // "duration"), we need to over-accept once to ensure that the limit is reached + // and those cases are actually rejected. + over_accept_once: true, }); let redis_rate_limits = match request.await { From e784b34dfba366846f44ed1350efb61a40f99f14 Mon Sep 17 00:00:00 2001 From: Joris Bayer Date: Thu, 13 Oct 2022 17:56:51 +0200 Subject: [PATCH 28/34] ref: Move rate limiting to utils --- relay-quotas/src/rate_limit.rs | 17 +- relay-server/src/actors/project.rs | 4 +- relay-server/src/actors/project_cache.rs | 215 ++---------------- relay-server/src/utils/metrics_rate_limits.rs | 129 +++++++++++ relay-server/src/utils/mod.rs | 2 + 5 files changed, 167 insertions(+), 200 deletions(-) create mode 100644 relay-server/src/utils/metrics_rate_limits.rs diff --git a/relay-quotas/src/rate_limit.rs b/relay-quotas/src/rate_limit.rs index 677586b1396..1e28bf3e782 100644 --- a/relay-quotas/src/rate_limit.rs +++ b/relay-quotas/src/rate_limit.rs @@ -235,10 +235,11 @@ impl RateLimits { self.iter().any(|limit| !limit.retry_after.expired()) } - /// Returns `true` if this instance contains active rate limits for the given data category. - pub fn limit_for(&self, category: DataCategory) -> Option<&RateLimit> { + /// Returns the longest limit for the given data category, if any exists. + pub fn longest_for(&self, category: DataCategory) -> Option<&RateLimit> { self.iter() - .find(|limit| !limit.retry_after.expired() && limit.categories.contains(&category)) + .filter(|limit| !limit.retry_after.expired() && limit.categories.contains(&category)) + .max_by_key(|limit| limit.retry_after) } /// Removes expired rate limits from this instance. @@ -251,7 +252,7 @@ impl RateLimits { /// If no limits match, then the returned `RateLimits` instance evalutes `is_ok`. Otherwise, it /// contains rate limits that match the given scoping. pub fn check(&self, scoping: ItemScoping<'_>) -> Self { - self.check_with_quotas(&[], scoping) + self.check_with_quotas([].iter(), scoping) } /// Checks whether any rate limits apply to the given scoping. @@ -261,7 +262,11 @@ impl RateLimits { /// /// If no limits or quotas match, then the returned `RateLimits` instance evalutes `is_ok`. /// Otherwise, it contains rate limits that match the given scoping. - pub fn check_with_quotas(&self, quotas: &[Quota], scoping: ItemScoping<'_>) -> Self { + pub fn check_with_quotas<'a>( + &self, + quotas: impl Iterator, + scoping: ItemScoping<'_>, + ) -> Self { let mut applied_limits = Self::new(); for quota in quotas { @@ -811,7 +816,7 @@ mod tests { reason_code: Some(ReasonCode::new("zero")), }]; - let applied_limits = rate_limits.check_with_quotas(quotas, item_scoping); + let applied_limits = rate_limits.check_with_quotas(quotas.iter(), item_scoping); insta::assert_ron_snapshot!(applied_limits, @r###" RateLimits( diff --git a/relay-server/src/actors/project.rs b/relay-server/src/actors/project.rs index e19d3513c35..1349caf625f 100644 --- a/relay-server/src/actors/project.rs +++ b/relay-server/src/actors/project.rs @@ -851,7 +851,9 @@ impl Project { let quotas = state.as_deref().map(|s| s.get_quotas()).unwrap_or(&[]); let envelope_limiter = EnvelopeLimiter::new(|item_scoping, _| { - Ok(self.rate_limits.check_with_quotas(quotas, item_scoping)) + Ok(self + .rate_limits + .check_with_quotas(quotas.iter(), item_scoping)) }); let (enforcement, rate_limits) = envelope_limiter.enforce(&mut envelope, &scoping)?; diff --git a/relay-server/src/actors/project_cache.rs b/relay-server/src/actors/project_cache.rs index 76cd5e11cc9..027b2bef5d3 100644 --- a/relay-server/src/actors/project_cache.rs +++ b/relay-server/src/actors/project_cache.rs @@ -9,7 +9,7 @@ use futures01::{future, Future}; #[cfg(feature = "processing")] use relay_common::clone; -use relay_common::{DataCategory, ProjectKey, UnixTimestamp}; +use relay_common::{DataCategory, ProjectKey}; use relay_config::{Config, RelayMode}; use relay_metrics::{ self, AggregateMetricsError, Bucket, FlushBuckets, InsertMetrics, MergeBuckets, @@ -34,7 +34,7 @@ use crate::actors::project_upstream::UpstreamProjectSource; use crate::envelope::Envelope; use crate::service::Registry; use crate::statsd::{RelayCounters, RelayGauges, RelayHistograms, RelayTimers}; -use crate::utils::{self, EnvelopeContext, GarbageDisposal, Response}; +use crate::utils::{self, AnnotatedBuckets, EnvelopeContext, GarbageDisposal, Response}; use super::outcome::{Outcome, TrackOutcome}; @@ -49,171 +49,6 @@ pub enum ProjectError { impl ResponseError for ProjectError {} -// Helper function for `rate_limit_buckets`. -#[cfg_attr(not(feature = "processing"), allow(unused_mut, unused_variables))] -async fn check_rate_limits( - transaction_count: usize, - cached_rate_limits: &RateLimits, - quotas: Vec, - item_scoping: ItemScoping<'_>, - check_redis: bool, -) -> Result { - let scoping = *item_scoping.scoping; - let mut rate_limits = cached_rate_limits.check_with_quotas(quotas.as_slice(), item_scoping); - - #[cfg(feature = "processing")] - if check_redis && !rate_limits.is_limited() { - // Await rate limits from redis before continuing. - let request = EnvelopeProcessor::from_registry().send(CheckRateLimits { - quotas, - category: DataCategory::TransactionProcessed, - scoping, - quantity: transaction_count, - // Because we call this function with transaction_count=0 (for metrics other than - // "duration"), we need to over-accept once to ensure that the limit is reached - // and those cases are actually rejected. - over_accept_once: true, - }); - - let redis_rate_limits = match request.await { - Ok(Ok(limits)) => limits, - Err(_) | Ok(Err(_)) => { - // There was either a SendError or a problem with redis. Track outcomes and - // drop all buckets. - return Err(Outcome::Invalid(DiscardReason::Internal)); - } - }; - - rate_limits.merge(redis_rate_limits); - - // Also update the in-memory cache: - ProjectCache::from_registry().do_send(UpdateRateLimits::new( - scoping.project_key, - rate_limits.clone(), - )); - } - - Ok(rate_limits) -} - -// Helper function for `rate_limit_buckets`. -fn drop_with_outcome( - annotated_buckets: &mut Vec<(Bucket, Option)>, - transaction_count: usize, - outcome: Outcome, - scoping: Scoping, -) { - // Drop transaction buckets: - annotated_buckets.retain(|(_, count)| count.is_none()); - - // Track outcome for the processed transactions we dropped here: - if transaction_count > 0 { - TrackOutcome::from_registry().send(TrackOutcome { - timestamp: UnixTimestamp::now().as_datetime(), // as good as any timestamp - scoping, - outcome, - event_id: None, - remote_addr: None, - category: DataCategory::TransactionProcessed, - quantity: transaction_count as u32, - }); - } -} - -/// Remove rate limited metrics buckets and track outcomes for them. -/// -/// Returns any buckets that were *not* rate limited. -async fn rate_limit_buckets( - buckets: Vec, - quotas: Vec, - cached_rate_limits: RateLimits, - scoping: Scoping, - check_redis: bool, -) -> Result, ()> { - // Collect bucket information (bucket, transaction_count) - let mut annotated_buckets: Vec<_> = buckets - .into_iter() - .map(|bucket| { - let mri = match MetricResourceIdentifier::parse(bucket.name.as_str()) { - Ok(mri) => mri, - Err(_) => { - relay_log::error!("Invalid MRI: {}", bucket.name); - return (bucket, None); - } - }; - - // Keep all metrics that are not transaction related: - if mri.namespace != MetricNamespace::Transactions { - // Not limiting sessions for now. - return (bucket, None); - } - - if mri.name == "duration" { - // The "duration" metric is extracted exactly once for every processed transaction, - // so we can use it to count the number of transactions. - let count = bucket.value.len(); - (bucket, Some(count)) - } else { - // For any other metric in the transaction namespace, we check the limit with quantity=0 - // so transactions are not double counted against the quota. - (bucket, Some(0)) - } - }) - .collect(); - - // Accumulate the total transaction count: - let total_transaction_count = annotated_buckets.iter().fold( - None, - |acc, (_, transaction_count)| match transaction_count { - Some(count) => Some(acc.unwrap_or(0) + count), - None => acc, - }, - ); - - // Check rate limits if necessary: - if let Some(transaction_count) = total_transaction_count { - let item_scoping = ItemScoping { - category: DataCategory::TransactionProcessed, - scoping: &scoping, - }; - - let rate_limits = check_rate_limits( - transaction_count, - &cached_rate_limits, - quotas, - item_scoping, - check_redis, - ) - .await; - - match rate_limits { - Ok(rate_limits) => { - // If a rate limit is active, discard transaction buckets. - if let Some(limit) = rate_limits.limit_for(DataCategory::TransactionProcessed) { - drop_with_outcome( - &mut annotated_buckets, - transaction_count, - Outcome::RateLimited(limit.reason_code.clone()), - *item_scoping, - ); - } - } - Err(outcome) => { - // Error from rate limiter, drop transaction buckets. - drop_with_outcome( - &mut annotated_buckets, - transaction_count, - outcome, - *item_scoping, - ); - } - }; - } - - // Restore vector of buckets - Ok(annotated_buckets.into_iter().map(|(b, _)| b).collect()) -} - pub struct ProjectCache { config: Arc, projects: hashbrown::HashMap, // need hashbrown because drain_filter is not stable in std yet @@ -798,32 +633,26 @@ impl Handler for ProjectCache { } // Check rate limits: - let quotas = project_state - .config - .quotas - .iter() - .filter(|q| q.categories.contains(&DataCategory::TransactionProcessed)) - .map(Quota::clone) - .collect::>(); - let processing_enabled = config.processing_enabled(); - let rate_limits = project.rate_limits().clone(); - use futures::{FutureExt, TryFutureExt}; - let future = rate_limit_buckets(buckets, quotas, rate_limits, scoping, processing_enabled) - .boxed() - .compat() - .and_then(move |buckets| { - // After checking rate limits, send buckets to envelope manager: - if !buckets.is_empty() { - EnvelopeManager::from_registry().send(SendMetrics { - buckets, - scoping, - partition_key, - }); - } - Ok(()) - }); + let quotas = project_state.config.quotas.iter(); + let item_scoping = ItemScoping { + category: DataCategory::TransactionProcessed, + scoping: &scoping, + }; + let cached_rate_limits = project + .rate_limits() + .check_with_quotas(quotas, item_scoping); + + let (buckets, was_rate_limited) = + AnnotatedBuckets::new(buckets, item_scoping).enforce_limits(Ok(cached_rate_limits)); + + // TODO: If no rate limits were applied, send to processor. - let actor_future = future.into_actor(self); - actor_future.spawn(context); + if !buckets.is_empty() { + EnvelopeManager::from_registry().send(SendMetrics { + buckets, + scoping, + partition_key, + }); + } } } diff --git a/relay-server/src/utils/metrics_rate_limits.rs b/relay-server/src/utils/metrics_rate_limits.rs new file mode 100644 index 00000000000..4e16732d073 --- /dev/null +++ b/relay-server/src/utils/metrics_rate_limits.rs @@ -0,0 +1,129 @@ +//! Quota and rate limiting helpers for metrics buckets. + +use relay_common::{DataCategory, UnixTimestamp}; +use relay_metrics::{Bucket, MetricNamespace, MetricResourceIdentifier}; +use relay_quotas::{ItemScoping, RateLimitingError, RateLimits, Scoping}; + +use crate::actors::outcome::{DiscardReason, Outcome, TrackOutcome}; + +/// Holds metrics buckets with some information about their contents. +pub struct AnnotatedBuckets<'a> { + /// A list of metrics buckets. + buckets: Vec, + + item_scoping: ItemScoping<'a>, + + /// Binary index of buckets in the transaction namespace (used to retain). + transaction_buckets: Vec, + + /// The number of transactions contributing to these buckets. + /// If no transaction buckets are contained, the value is None. + transaction_count: Option, +} + +impl<'a> AnnotatedBuckets<'a> { + /// TODO: docs + pub fn new(buckets: Vec, item_scoping: ItemScoping<'a>) -> Self { + let transaction_counts: Vec<_> = buckets + .iter() + .map(|bucket| { + let mri = match MetricResourceIdentifier::parse(bucket.name.as_str()) { + Ok(mri) => mri, + Err(_) => { + relay_log::error!("Invalid MRI: {}", bucket.name); + return None; + } + }; + + // Keep all metrics that are not transaction related: + if mri.namespace != MetricNamespace::Transactions { + return None; + } + + if mri.name == "duration" { + // The "duration" metric is extracted exactly once for every processed transaction, + // so we can use it to count the number of transactions. + let count = bucket.value.len(); + Some(count) + } else { + // For any other metric in the transaction namespace, we check the limit with quantity=0 + // so transactions are not double counted against the quota. + Some(0) + } + }) + .collect(); + + let transaction_buckets = transaction_counts.iter().map(Option::is_some).collect(); + + // Accumulate the total transaction count: + let transaction_count = transaction_counts + .iter() + .fold(None, |acc, transaction_count| match transaction_count { + Some(count) => Some(acc.unwrap_or(0) + count), + None => acc, + }); + + Self { + buckets, + item_scoping, + transaction_buckets, + transaction_count, + } + } + + fn drop_with_outcome(&mut self, outcome: Outcome) { + if let Some(transaction_count) = self.transaction_count { + // Drop transaction buckets: + let buckets = std::mem::take(&mut self.buckets); + + self.buckets = buckets + .into_iter() + .zip(self.transaction_buckets.iter()) + .filter_map(|(bucket, is_transaction_bucket)| { + (!is_transaction_bucket).then_some(bucket) + }) + .collect(); + + // Track outcome for the processed transactions we dropped here: + if transaction_count > 0 { + TrackOutcome::from_registry().send(TrackOutcome { + timestamp: UnixTimestamp::now().as_datetime(), // as good as any timestamp + scoping: *self.item_scoping, + outcome, + event_id: None, + remote_addr: None, + category: DataCategory::TransactionProcessed, + quantity: transaction_count as u32, + }); + } + } + } + + // TODO: docs + // Returns true if any buckets were dropped. + pub fn enforce_limits( + mut self, + rate_limits: Result, + ) -> (Vec, bool) { + let mut dropped_stuff = false; + if self.transaction_count.is_some() { + match rate_limits { + Ok(rate_limits) => { + // If a rate limit is active, discard transaction buckets. + if let Some(limit) = rate_limits.longest_for(DataCategory::TransactionProcessed) + { + self.drop_with_outcome(Outcome::RateLimited(limit.reason_code.clone())); + dropped_stuff = true; + } + } + Err(_) => { + // Error from rate limiter, drop transaction buckets. + self.drop_with_outcome(Outcome::Invalid(DiscardReason::Internal)); + dropped_stuff = true; + } + }; + } + + (self.buckets, dropped_stuff) + } +} diff --git a/relay-server/src/utils/mod.rs b/relay-server/src/utils/mod.rs index f95cb174df3..6ba2cc72806 100644 --- a/relay-server/src/utils/mod.rs +++ b/relay-server/src/utils/mod.rs @@ -5,6 +5,7 @@ mod dynamic_sampling; mod envelope_context; mod error_boundary; mod garbage; +mod metrics_rate_limits; mod multipart; mod param_parser; mod rate_limits; @@ -29,6 +30,7 @@ pub use self::dynamic_sampling::*; pub use self::envelope_context::*; pub use self::error_boundary::*; pub use self::garbage::*; +pub use self::metrics_rate_limits::*; pub use self::multipart::*; pub use self::param_parser::*; pub use self::rate_limits::*; From 3cf84996128401e31800f6bbc0df34675df9753e Mon Sep 17 00:00:00 2001 From: Joris Bayer Date: Fri, 14 Oct 2022 09:07:25 +0200 Subject: [PATCH 29/34] wip --- relay-server/src/actors/project_cache.rs | 40 +++--- relay-server/src/utils/metrics_rate_limits.rs | 118 +++++++++--------- 2 files changed, 85 insertions(+), 73 deletions(-) diff --git a/relay-server/src/actors/project_cache.rs b/relay-server/src/actors/project_cache.rs index 027b2bef5d3..00d50b657d2 100644 --- a/relay-server/src/actors/project_cache.rs +++ b/relay-server/src/actors/project_cache.rs @@ -34,7 +34,7 @@ use crate::actors::project_upstream::UpstreamProjectSource; use crate::envelope::Envelope; use crate::service::Registry; use crate::statsd::{RelayCounters, RelayGauges, RelayHistograms, RelayTimers}; -use crate::utils::{self, AnnotatedBuckets, EnvelopeContext, GarbageDisposal, Response}; +use crate::utils::{self, BucketEnforcement, EnvelopeContext, GarbageDisposal, Response}; use super::outcome::{Outcome, TrackOutcome}; @@ -589,7 +589,7 @@ impl Handler for ProjectCache { impl Handler for ProjectCache { type Result = (); - fn handle(&mut self, message: FlushBuckets, context: &mut Self::Context) -> Self::Result { + fn handle(&mut self, message: FlushBuckets, _context: &mut Self::Context) -> Self::Result { let FlushBuckets { project_key, partition_key, @@ -632,20 +632,32 @@ impl Handler for ProjectCache { return; } - // Check rate limits: - let quotas = project_state.config.quotas.iter(); - let item_scoping = ItemScoping { - category: DataCategory::TransactionProcessed, - scoping: &scoping, - }; - let cached_rate_limits = project - .rate_limits() - .check_with_quotas(quotas, item_scoping); + // Check rate limits if necessary: + let buckets = match BucketEnforcement::create(buckets, scoping) { + Ok(mut enforcement) => { + let quotas = project_state.config.quotas.iter(); + let item_scoping = ItemScoping { + category: DataCategory::TransactionProcessed, + scoping: &scoping, + }; + let cached_rate_limits = project + .rate_limits() + .check_with_quotas(quotas, item_scoping); - let (buckets, was_rate_limited) = - AnnotatedBuckets::new(buckets, item_scoping).enforce_limits(Ok(cached_rate_limits)); + let was_rate_limited = enforcement.enforce_limits(Ok(cached_rate_limits)); - // TODO: If no rate limits were applied, send to processor. + if !was_rate_limited { + // If there were no cached rate limits active, let the processor check redis: + // TODO + // EnvelopeProcessor::from_registry().send(enforcement); + + vec![] + } else { + enforcement.into_buckets() + } + } + Err(buckets) => buckets, + }; if !buckets.is_empty() { EnvelopeManager::from_registry().send(SendMetrics { diff --git a/relay-server/src/utils/metrics_rate_limits.rs b/relay-server/src/utils/metrics_rate_limits.rs index 4e16732d073..9ac904be162 100644 --- a/relay-server/src/utils/metrics_rate_limits.rs +++ b/relay-server/src/utils/metrics_rate_limits.rs @@ -2,28 +2,28 @@ use relay_common::{DataCategory, UnixTimestamp}; use relay_metrics::{Bucket, MetricNamespace, MetricResourceIdentifier}; -use relay_quotas::{ItemScoping, RateLimitingError, RateLimits, Scoping}; +use relay_quotas::{RateLimitingError, RateLimits, Scoping}; use crate::actors::outcome::{DiscardReason, Outcome, TrackOutcome}; /// Holds metrics buckets with some information about their contents. -pub struct AnnotatedBuckets<'a> { +pub struct BucketEnforcement { /// A list of metrics buckets. buckets: Vec, - item_scoping: ItemScoping<'a>, + /// Project information. + scoping: Scoping, /// Binary index of buckets in the transaction namespace (used to retain). transaction_buckets: Vec, /// The number of transactions contributing to these buckets. - /// If no transaction buckets are contained, the value is None. - transaction_count: Option, + transaction_count: usize, } -impl<'a> AnnotatedBuckets<'a> { - /// TODO: docs - pub fn new(buckets: Vec, item_scoping: ItemScoping<'a>) -> Self { +impl BucketEnforcement { + /// Returns Ok if `buckets` contain transaction metrics, `buckets` otherwise. + pub fn create(buckets: Vec, scoping: Scoping) -> Result> { let transaction_counts: Vec<_> = buckets .iter() .map(|bucket| { @@ -53,8 +53,6 @@ impl<'a> AnnotatedBuckets<'a> { }) .collect(); - let transaction_buckets = transaction_counts.iter().map(Option::is_some).collect(); - // Accumulate the total transaction count: let transaction_count = transaction_counts .iter() @@ -63,67 +61,69 @@ impl<'a> AnnotatedBuckets<'a> { None => acc, }); - Self { - buckets, - item_scoping, - transaction_buckets, - transaction_count, + if let Some(transaction_count) = transaction_count { + let transaction_buckets = transaction_counts.iter().map(Option::is_some).collect(); + Ok(Self { + buckets, + scoping, + transaction_buckets, + transaction_count, + }) + } else { + Err(buckets) } } fn drop_with_outcome(&mut self, outcome: Outcome) { - if let Some(transaction_count) = self.transaction_count { - // Drop transaction buckets: - let buckets = std::mem::take(&mut self.buckets); - - self.buckets = buckets - .into_iter() - .zip(self.transaction_buckets.iter()) - .filter_map(|(bucket, is_transaction_bucket)| { - (!is_transaction_bucket).then_some(bucket) - }) - .collect(); - - // Track outcome for the processed transactions we dropped here: - if transaction_count > 0 { - TrackOutcome::from_registry().send(TrackOutcome { - timestamp: UnixTimestamp::now().as_datetime(), // as good as any timestamp - scoping: *self.item_scoping, - outcome, - event_id: None, - remote_addr: None, - category: DataCategory::TransactionProcessed, - quantity: transaction_count as u32, - }); - } + // Drop transaction buckets: + let buckets = std::mem::take(&mut self.buckets); + + self.buckets = buckets + .into_iter() + .zip(self.transaction_buckets.iter()) + .filter_map(|(bucket, is_transaction_bucket)| { + (!is_transaction_bucket).then_some(bucket) + }) + .collect(); + + // Track outcome for the processed transactions we dropped here: + if self.transaction_count > 0 { + TrackOutcome::from_registry().send(TrackOutcome { + timestamp: UnixTimestamp::now().as_datetime(), // as good as any timestamp + scoping: self.scoping, + outcome, + event_id: None, + remote_addr: None, + category: DataCategory::TransactionProcessed, + quantity: self.transaction_count as u32, + }); } } // TODO: docs // Returns true if any buckets were dropped. - pub fn enforce_limits( - mut self, - rate_limits: Result, - ) -> (Vec, bool) { + pub fn enforce_limits(&mut self, rate_limits: Result) -> bool { let mut dropped_stuff = false; - if self.transaction_count.is_some() { - match rate_limits { - Ok(rate_limits) => { - // If a rate limit is active, discard transaction buckets. - if let Some(limit) = rate_limits.longest_for(DataCategory::TransactionProcessed) - { - self.drop_with_outcome(Outcome::RateLimited(limit.reason_code.clone())); - dropped_stuff = true; - } - } - Err(_) => { - // Error from rate limiter, drop transaction buckets. - self.drop_with_outcome(Outcome::Invalid(DiscardReason::Internal)); + match rate_limits { + Ok(rate_limits) => { + // If a rate limit is active, discard transaction buckets. + if let Some(limit) = rate_limits.longest_for(DataCategory::TransactionProcessed) { + self.drop_with_outcome(Outcome::RateLimited(limit.reason_code.clone())); dropped_stuff = true; } - }; - } + } + Err(_) => { + // Error from rate limiter, drop transaction buckets. + self.drop_with_outcome(Outcome::Invalid(DiscardReason::Internal)); + dropped_stuff = true; + } + }; + + dropped_stuff + } - (self.buckets, dropped_stuff) + /// Consume this struct and return its buckets. + pub fn into_buckets(self) -> Vec { + self.buckets } } From de54f9d93c24d8ccae60c04b0e99b39821dddba4 Mon Sep 17 00:00:00 2001 From: Joris Bayer Date: Fri, 14 Oct 2022 11:58:28 +0200 Subject: [PATCH 30/34] fix: Send to processor --- relay-quotas/src/rate_limit.rs | 17 +-- relay-server/src/actors/processor.rs | 109 +++++++++--------- relay-server/src/actors/project.rs | 4 +- relay-server/src/actors/project_cache.rs | 43 +++---- relay-server/src/utils/metrics_rate_limits.rs | 35 +++++- 5 files changed, 108 insertions(+), 100 deletions(-) diff --git a/relay-quotas/src/rate_limit.rs b/relay-quotas/src/rate_limit.rs index 1e28bf3e782..9819f274d6d 100644 --- a/relay-quotas/src/rate_limit.rs +++ b/relay-quotas/src/rate_limit.rs @@ -2,7 +2,7 @@ use std::fmt; use std::str::FromStr; use std::time::{Duration, Instant}; -use relay_common::{DataCategory, ProjectId, ProjectKey}; +use relay_common::{ProjectId, ProjectKey}; use crate::quota::{DataCategories, ItemScoping, Quota, QuotaScope, ReasonCode, Scoping}; use crate::REJECT_ALL_SECS; @@ -235,13 +235,6 @@ impl RateLimits { self.iter().any(|limit| !limit.retry_after.expired()) } - /// Returns the longest limit for the given data category, if any exists. - pub fn longest_for(&self, category: DataCategory) -> Option<&RateLimit> { - self.iter() - .filter(|limit| !limit.retry_after.expired() && limit.categories.contains(&category)) - .max_by_key(|limit| limit.retry_after) - } - /// Removes expired rate limits from this instance. pub fn clean_expired(&mut self) { self.limits.retain(|limit| !limit.retry_after.expired()); @@ -252,7 +245,7 @@ impl RateLimits { /// If no limits match, then the returned `RateLimits` instance evalutes `is_ok`. Otherwise, it /// contains rate limits that match the given scoping. pub fn check(&self, scoping: ItemScoping<'_>) -> Self { - self.check_with_quotas([].iter(), scoping) + self.check_with_quotas(&[], scoping) } /// Checks whether any rate limits apply to the given scoping. @@ -262,11 +255,7 @@ impl RateLimits { /// /// If no limits or quotas match, then the returned `RateLimits` instance evalutes `is_ok`. /// Otherwise, it contains rate limits that match the given scoping. - pub fn check_with_quotas<'a>( - &self, - quotas: impl Iterator, - scoping: ItemScoping<'_>, - ) -> Self { + pub fn check_with_quotas(&self, quotas: &[Quota], scoping: ItemScoping<'_>) -> Self { let mut applied_limits = Self::new(); for quota in quotas { diff --git a/relay-server/src/actors/processor.rs b/relay-server/src/actors/processor.rs index da953588b2b..f0392aec0a6 100644 --- a/relay-server/src/actors/processor.rs +++ b/relay-server/src/actors/processor.rs @@ -27,7 +27,8 @@ use crate::metrics_extraction::transactions::extract_transaction_metrics; use crate::service::{ServerError, REGISTRY}; use crate::statsd::{RelayCounters, RelayHistograms, RelayTimers}; use crate::utils::{ - self, ChunkedFormDataAggregator, EnvelopeContext, ErrorBoundary, FormDataIter, SamplingResult, + self, BucketEnforcement, ChunkedFormDataAggregator, EnvelopeContext, ErrorBoundary, + FormDataIter, SamplingResult, }; use relay_auth::RelayVersion; use relay_common::{ProjectId, ProjectKey, UnixTimestamp}; @@ -46,8 +47,6 @@ use relay_general::types::{Annotated, Array, FromValue, Object, ProcessingAction use relay_log::LogError; use relay_metrics::{Bucket, InsertMetrics, MergeBuckets, Metric}; use relay_quotas::{DataCategory, ReasonCode}; -#[cfg(feature = "processing")] -use relay_quotas::{Quota, RateLimits, Scoping}; use relay_redis::RedisPool; use relay_sampling::{DynamicSamplingContext, RuleId}; use relay_statsd::metric; @@ -61,7 +60,6 @@ use { failure::ResultExt, relay_general::store::{GeoIpLookup, StoreConfig, StoreProcessor}, relay_quotas::{RateLimitingError, RedisRateLimiter}, - relay_system::{AsyncResponse, Sender}, symbolic_unreal::{Unreal4Error, Unreal4ErrorKind}, }; @@ -517,21 +515,13 @@ impl EncodeEnvelope { Self { request } } } -/// Check rate limits and increment the quota. + +/// Applies rate limits to metrics buckets and forwards them to the envelope manager. #[cfg(feature = "processing")] #[derive(Debug)] -pub struct CheckRateLimits { - /// Quotas to be checked. - pub quotas: Vec, - /// Category to check against. - pub category: DataCategory, - /// Scoping information for the project. - pub scoping: Scoping, - /// By how much the quota usage is incremented. - pub quantity: usize, - /// Do we accept the first call that goes over the rate limit? - /// Useful to ensure that subsequent calls with quantity=0 will be rejected. - pub over_accept_once: bool, +pub struct RateLimitFlushBuckets { + pub enforcement: BucketEnforcement, + pub partition_key: Option, } /// CPU-intensive processing tasks for envelopes. @@ -541,10 +531,7 @@ pub enum EnvelopeProcessor { ProcessMetrics(Box), EncodeEnvelope(Box), #[cfg(feature = "processing")] - CheckRateLimits( - CheckRateLimits, - Sender>, - ), + RateLimitFlushBuckets(RateLimitFlushBuckets), } impl EnvelopeProcessor { @@ -580,14 +567,11 @@ impl FromMessage for EnvelopeProcessor { } #[cfg(feature = "processing")] -impl FromMessage for EnvelopeProcessor { - type Response = AsyncResponse>; +impl FromMessage for EnvelopeProcessor { + type Response = NoResponse; - fn from_message( - message: CheckRateLimits, - sender: Sender>, - ) -> Self { - Self::CheckRateLimits(message, sender) + fn from_message(message: RateLimitFlushBuckets, _: ()) -> Self { + Self::RateLimitFlushBuckets(message) } } @@ -2199,36 +2183,55 @@ impl EnvelopeProcessorService { } } - /// Returns redis rate limits. + /// Check and apply rate limits to metrics buckets. #[cfg(feature = "processing")] - fn handle_check_rate_limits( - &self, - message: CheckRateLimits, - ) -> Result { + fn handle_rate_limit_flush_buckets(&self, message: RateLimitFlushBuckets) { use relay_quotas::ItemScoping; - let CheckRateLimits { - quotas, - category, - scoping, - quantity, - over_accept_once, + use crate::actors::envelopes::SendMetrics; + + let RateLimitFlushBuckets { + mut enforcement, + partition_key, } = message; - let rate_limiter = match self.rate_limiter.as_ref() { - Some(rate_limiter) => rate_limiter, - None => { - relay_log::error!("handle_rate_limit_metrics_buckets called without rate limiter"); - return Ok(RateLimits::new()); // empty - } - }; + let scoping = *enforcement.scoping(); - let item_scoping = ItemScoping { - category, - scoping: &scoping, - }; + if let Some(rate_limiter) = self.rate_limiter.as_ref() { + let item_scoping = ItemScoping { + category: DataCategory::TransactionProcessed, + scoping: &scoping, + }; + // We set over_accept_once such that the limit is actually reached, which allows subsequent + // calls with quantity=0 to be rate limited. + let over_accept_once = true; + let rate_limits = rate_limiter.is_rate_limited( + &enforcement.quotas(), + item_scoping, + enforcement.transaction_count(), + over_accept_once, + ); + + let was_enforced = enforcement.enforce_limits(rate_limits.as_ref()); + + if was_enforced { + if let Ok(limits) = rate_limits { + // Update the rate limits in the project cache. + ProjectCache::from_registry() + .do_send(UpdateRateLimits::new(scoping.project_key, limits)); + } + } + } - rate_limiter.is_rate_limited("as, item_scoping, quantity, over_accept_once) + let buckets = enforcement.into_buckets(); + if !buckets.is_empty() { + // Forward buckets to envelope manager to send them to upstream or kafka: + EnvelopeManager::from_registry().send(SendMetrics { + buckets, + scoping, + partition_key, + }); + } } fn encode_envelope_body( @@ -2279,8 +2282,8 @@ impl EnvelopeProcessorService { EnvelopeProcessor::ProcessMetrics(message) => self.handle_process_metrics(*message), EnvelopeProcessor::EncodeEnvelope(message) => self.handle_encode_envelope(*message), #[cfg(feature = "processing")] - EnvelopeProcessor::CheckRateLimits(message, sender) => { - sender.send(self.handle_check_rate_limits(message)); + EnvelopeProcessor::RateLimitFlushBuckets(message) => { + self.handle_rate_limit_flush_buckets(message); } } } diff --git a/relay-server/src/actors/project.rs b/relay-server/src/actors/project.rs index 1349caf625f..e19d3513c35 100644 --- a/relay-server/src/actors/project.rs +++ b/relay-server/src/actors/project.rs @@ -851,9 +851,7 @@ impl Project { let quotas = state.as_deref().map(|s| s.get_quotas()).unwrap_or(&[]); let envelope_limiter = EnvelopeLimiter::new(|item_scoping, _| { - Ok(self - .rate_limits - .check_with_quotas(quotas.iter(), item_scoping)) + Ok(self.rate_limits.check_with_quotas(quotas, item_scoping)) }); let (enforcement, rate_limits) = envelope_limiter.enforce(&mut envelope, &scoping)?; diff --git a/relay-server/src/actors/project_cache.rs b/relay-server/src/actors/project_cache.rs index 00d50b657d2..d03c20aef80 100644 --- a/relay-server/src/actors/project_cache.rs +++ b/relay-server/src/actors/project_cache.rs @@ -8,15 +8,17 @@ use failure::Fail; use futures01::{future, Future}; #[cfg(feature = "processing")] -use relay_common::clone; -use relay_common::{DataCategory, ProjectKey}; -use relay_config::{Config, RelayMode}; -use relay_metrics::{ - self, AggregateMetricsError, Bucket, FlushBuckets, InsertMetrics, MergeBuckets, - MetricNamespace, MetricResourceIdentifier, +use { + crate::actors::processor::{EnvelopeProcessor, RateLimitFlushBuckets}, + crate::actors::project_redis::RedisProjectSource, + relay_common::clone, }; -use relay_quotas::{ItemScoping, Quota, RateLimits, Scoping}; +use relay_common::ProjectKey; +use relay_config::{Config, RelayMode}; +use relay_metrics::{self, AggregateMetricsError, FlushBuckets, InsertMetrics, MergeBuckets}; + +use relay_quotas::{RateLimits, Scoping}; use relay_redis::RedisPool; use relay_statsd::metric; @@ -24,20 +26,14 @@ use relay_statsd::metric; use crate::actors::envelopes::{EnvelopeManager, SendMetrics}; use crate::actors::outcome::DiscardReason; use crate::actors::processor::ProcessEnvelope; -#[cfg(feature = "processing")] -use crate::actors::processor::{CheckRateLimits, EnvelopeProcessor}; use crate::actors::project::{ExpiryState, Project, ProjectState}; use crate::actors::project_local::LocalProjectSource; -#[cfg(feature = "processing")] -use crate::actors::project_redis::RedisProjectSource; use crate::actors::project_upstream::UpstreamProjectSource; use crate::envelope::Envelope; use crate::service::Registry; use crate::statsd::{RelayCounters, RelayGauges, RelayHistograms, RelayTimers}; use crate::utils::{self, BucketEnforcement, EnvelopeContext, GarbageDisposal, Response}; -use super::outcome::{Outcome, TrackOutcome}; - #[derive(Fail, Debug)] pub enum ProjectError { #[fail(display = "failed to fetch project state from upstream")] @@ -633,24 +629,19 @@ impl Handler for ProjectCache { } // Check rate limits if necessary: - let buckets = match BucketEnforcement::create(buckets, scoping) { + let quotas = project_state.config.quotas.clone(); + let buckets = match BucketEnforcement::create(buckets, quotas, scoping) { Ok(mut enforcement) => { let quotas = project_state.config.quotas.iter(); - let item_scoping = ItemScoping { - category: DataCategory::TransactionProcessed, - scoping: &scoping, - }; - let cached_rate_limits = project - .rate_limits() - .check_with_quotas(quotas, item_scoping); - - let was_rate_limited = enforcement.enforce_limits(Ok(cached_rate_limits)); + let cached_rate_limits = project.rate_limits().clone(); + let was_rate_limited = enforcement.enforce_limits(Ok(&cached_rate_limits)); if !was_rate_limited { // If there were no cached rate limits active, let the processor check redis: - // TODO - // EnvelopeProcessor::from_registry().send(enforcement); - + EnvelopeProcessor::from_registry().send(RateLimitFlushBuckets { + enforcement, + partition_key, + }); vec![] } else { enforcement.into_buckets() diff --git a/relay-server/src/utils/metrics_rate_limits.rs b/relay-server/src/utils/metrics_rate_limits.rs index 9ac904be162..476ea1c3351 100644 --- a/relay-server/src/utils/metrics_rate_limits.rs +++ b/relay-server/src/utils/metrics_rate_limits.rs @@ -2,15 +2,19 @@ use relay_common::{DataCategory, UnixTimestamp}; use relay_metrics::{Bucket, MetricNamespace, MetricResourceIdentifier}; -use relay_quotas::{RateLimitingError, RateLimits, Scoping}; +use relay_quotas::{ItemScoping, Quota, RateLimitingError, RateLimits, Scoping}; use crate::actors::outcome::{DiscardReason, Outcome, TrackOutcome}; /// Holds metrics buckets with some information about their contents. +#[derive(Debug)] pub struct BucketEnforcement { /// A list of metrics buckets. buckets: Vec, + /// The quotas set on the current project. + quotas: Vec, + /// Project information. scoping: Scoping, @@ -23,7 +27,11 @@ pub struct BucketEnforcement { impl BucketEnforcement { /// Returns Ok if `buckets` contain transaction metrics, `buckets` otherwise. - pub fn create(buckets: Vec, scoping: Scoping) -> Result> { + pub fn create( + buckets: Vec, + quotas: Vec, + scoping: Scoping, + ) -> Result> { let transaction_counts: Vec<_> = buckets .iter() .map(|bucket| { @@ -65,6 +73,7 @@ impl BucketEnforcement { let transaction_buckets = transaction_counts.iter().map(Option::is_some).collect(); Ok(Self { buckets, + quotas, scoping, transaction_buckets, transaction_count, @@ -74,6 +83,18 @@ impl BucketEnforcement { } } + pub fn scoping(&self) -> &Scoping { + &self.scoping + } + + pub fn quotas(&self) -> &[Quota] { + self.quotas.as_ref() + } + + pub fn transaction_count(&self) -> usize { + self.transaction_count + } + fn drop_with_outcome(&mut self, outcome: Outcome) { // Drop transaction buckets: let buckets = std::mem::take(&mut self.buckets); @@ -102,12 +123,18 @@ impl BucketEnforcement { // TODO: docs // Returns true if any buckets were dropped. - pub fn enforce_limits(&mut self, rate_limits: Result) -> bool { + pub fn enforce_limits(&mut self, rate_limits: Result<&RateLimits, &RateLimitingError>) -> bool { let mut dropped_stuff = false; match rate_limits { Ok(rate_limits) => { + let item_scoping = ItemScoping { + category: DataCategory::TransactionProcessed, + scoping: &self.scoping, + }; + let applied_rate_limits = rate_limits.check_with_quotas(&self.quotas, item_scoping); + // If a rate limit is active, discard transaction buckets. - if let Some(limit) = rate_limits.longest_for(DataCategory::TransactionProcessed) { + if let Some(limit) = applied_rate_limits.longest() { self.drop_with_outcome(Outcome::RateLimited(limit.reason_code.clone())); dropped_stuff = true; } From f1bca59cb2bef21607df10942fde141051dcbebd Mon Sep 17 00:00:00 2001 From: Joris Bayer Date: Fri, 14 Oct 2022 12:53:04 +0200 Subject: [PATCH 31/34] ref: clippy --- relay-quotas/src/rate_limit.rs | 2 +- relay-server/src/actors/processor.rs | 13 +++++----- relay-server/src/actors/project_cache.rs | 24 ++++++++++--------- relay-server/src/utils/metrics_rate_limits.rs | 18 ++++++++++---- 4 files changed, 33 insertions(+), 24 deletions(-) diff --git a/relay-quotas/src/rate_limit.rs b/relay-quotas/src/rate_limit.rs index 9819f274d6d..bd6225714d4 100644 --- a/relay-quotas/src/rate_limit.rs +++ b/relay-quotas/src/rate_limit.rs @@ -805,7 +805,7 @@ mod tests { reason_code: Some(ReasonCode::new("zero")), }]; - let applied_limits = rate_limits.check_with_quotas(quotas.iter(), item_scoping); + let applied_limits = rate_limits.check_with_quotas(quotas, item_scoping); insta::assert_ron_snapshot!(applied_limits, @r###" RateLimits( diff --git a/relay-server/src/actors/processor.rs b/relay-server/src/actors/processor.rs index f0392aec0a6..b9befb88623 100644 --- a/relay-server/src/actors/processor.rs +++ b/relay-server/src/actors/processor.rs @@ -27,8 +27,7 @@ use crate::metrics_extraction::transactions::extract_transaction_metrics; use crate::service::{ServerError, REGISTRY}; use crate::statsd::{RelayCounters, RelayHistograms, RelayTimers}; use crate::utils::{ - self, BucketEnforcement, ChunkedFormDataAggregator, EnvelopeContext, ErrorBoundary, - FormDataIter, SamplingResult, + self, ChunkedFormDataAggregator, EnvelopeContext, ErrorBoundary, FormDataIter, SamplingResult, }; use relay_auth::RelayVersion; use relay_common::{ProjectId, ProjectKey, UnixTimestamp}; @@ -56,7 +55,7 @@ use relay_system::{Addr, FromMessage, NoResponse, Service}; use { crate::actors::project_cache::UpdateRateLimits, crate::service::ServerErrorKind, - crate::utils::EnvelopeLimiter, + crate::utils::{BucketLimiter, EnvelopeLimiter}, failure::ResultExt, relay_general::store::{GeoIpLookup, StoreConfig, StoreProcessor}, relay_quotas::{RateLimitingError, RedisRateLimiter}, @@ -520,7 +519,7 @@ impl EncodeEnvelope { #[cfg(feature = "processing")] #[derive(Debug)] pub struct RateLimitFlushBuckets { - pub enforcement: BucketEnforcement, + pub bucket_limiter: BucketLimiter, pub partition_key: Option, } @@ -2191,7 +2190,7 @@ impl EnvelopeProcessorService { use crate::actors::envelopes::SendMetrics; let RateLimitFlushBuckets { - mut enforcement, + bucket_limiter: mut enforcement, partition_key, } = message; @@ -2206,13 +2205,13 @@ impl EnvelopeProcessorService { // calls with quantity=0 to be rate limited. let over_accept_once = true; let rate_limits = rate_limiter.is_rate_limited( - &enforcement.quotas(), + enforcement.quotas(), item_scoping, enforcement.transaction_count(), over_accept_once, ); - let was_enforced = enforcement.enforce_limits(rate_limits.as_ref()); + let was_enforced = enforcement.enforce_limits(rate_limits.as_ref().map_err(|_| ())); if was_enforced { if let Ok(limits) = rate_limits { diff --git a/relay-server/src/actors/project_cache.rs b/relay-server/src/actors/project_cache.rs index d03c20aef80..6225fbcd850 100644 --- a/relay-server/src/actors/project_cache.rs +++ b/relay-server/src/actors/project_cache.rs @@ -18,7 +18,7 @@ use relay_common::ProjectKey; use relay_config::{Config, RelayMode}; use relay_metrics::{self, AggregateMetricsError, FlushBuckets, InsertMetrics, MergeBuckets}; -use relay_quotas::{RateLimits, Scoping}; +use relay_quotas::RateLimits; use relay_redis::RedisPool; use relay_statsd::metric; @@ -32,7 +32,7 @@ use crate::actors::project_upstream::UpstreamProjectSource; use crate::envelope::Envelope; use crate::service::Registry; use crate::statsd::{RelayCounters, RelayGauges, RelayHistograms, RelayTimers}; -use crate::utils::{self, BucketEnforcement, EnvelopeContext, GarbageDisposal, Response}; +use crate::utils::{self, BucketLimiter, EnvelopeContext, GarbageDisposal, Response}; #[derive(Fail, Debug)] pub enum ProjectError { @@ -630,22 +630,24 @@ impl Handler for ProjectCache { // Check rate limits if necessary: let quotas = project_state.config.quotas.clone(); - let buckets = match BucketEnforcement::create(buckets, quotas, scoping) { - Ok(mut enforcement) => { - let quotas = project_state.config.quotas.iter(); + let buckets = match BucketLimiter::create(buckets, quotas, scoping) { + Ok(mut bucket_limiter) => { let cached_rate_limits = project.rate_limits().clone(); - let was_rate_limited = enforcement.enforce_limits(Ok(&cached_rate_limits)); + #[allow(unused_variables)] + let was_rate_limited = bucket_limiter.enforce_limits(Ok(&cached_rate_limits)); - if !was_rate_limited { + #[cfg(feature = "processing")] + if !was_rate_limited && config.processing_enabled() { // If there were no cached rate limits active, let the processor check redis: EnvelopeProcessor::from_registry().send(RateLimitFlushBuckets { - enforcement, + bucket_limiter, partition_key, }); - vec![] - } else { - enforcement.into_buckets() + + return; } + + bucket_limiter.into_buckets() } Err(buckets) => buckets, }; diff --git a/relay-server/src/utils/metrics_rate_limits.rs b/relay-server/src/utils/metrics_rate_limits.rs index 476ea1c3351..94b94c2ecd8 100644 --- a/relay-server/src/utils/metrics_rate_limits.rs +++ b/relay-server/src/utils/metrics_rate_limits.rs @@ -2,13 +2,13 @@ use relay_common::{DataCategory, UnixTimestamp}; use relay_metrics::{Bucket, MetricNamespace, MetricResourceIdentifier}; -use relay_quotas::{ItemScoping, Quota, RateLimitingError, RateLimits, Scoping}; +use relay_quotas::{ItemScoping, Quota, RateLimits, Scoping}; use crate::actors::outcome::{DiscardReason, Outcome, TrackOutcome}; /// Holds metrics buckets with some information about their contents. #[derive(Debug)] -pub struct BucketEnforcement { +pub struct BucketLimiter { /// A list of metrics buckets. buckets: Vec, @@ -25,7 +25,7 @@ pub struct BucketEnforcement { transaction_count: usize, } -impl BucketEnforcement { +impl BucketLimiter { /// Returns Ok if `buckets` contain transaction metrics, `buckets` otherwise. pub fn create( buckets: Vec, @@ -83,14 +83,17 @@ impl BucketEnforcement { } } + #[allow(dead_code)] pub fn scoping(&self) -> &Scoping { &self.scoping } + #[allow(dead_code)] pub fn quotas(&self) -> &[Quota] { self.quotas.as_ref() } + #[allow(dead_code)] pub fn transaction_count(&self) -> usize { self.transaction_count } @@ -121,9 +124,14 @@ impl BucketEnforcement { } } - // TODO: docs + // Drop transaction-related buckets and create outcomes for any active rate limits. + // + // If rate limits could not be checked for some reason, pass an `Err` to this function. + // In this case, transaction-related metrics buckets will also be dropped, and an "internal" + // outcome is generated. + // // Returns true if any buckets were dropped. - pub fn enforce_limits(&mut self, rate_limits: Result<&RateLimits, &RateLimitingError>) -> bool { + pub fn enforce_limits(&mut self, rate_limits: Result<&RateLimits, ()>) -> bool { let mut dropped_stuff = false; match rate_limits { Ok(rate_limits) => { From d97d12700e8009916bdfd849d7aae181f652cc9f Mon Sep 17 00:00:00 2001 From: Joris Bayer Date: Fri, 14 Oct 2022 13:32:41 +0200 Subject: [PATCH 32/34] ref: self-review --- relay-server/src/actors/processor.rs | 45 ++++++++++++------------ relay-server/src/actors/project_cache.rs | 14 ++++---- 2 files changed, 29 insertions(+), 30 deletions(-) diff --git a/relay-server/src/actors/processor.rs b/relay-server/src/actors/processor.rs index b9befb88623..89b431d6da5 100644 --- a/relay-server/src/actors/processor.rs +++ b/relay-server/src/actors/processor.rs @@ -16,19 +16,6 @@ use once_cell::sync::OnceCell; use serde_json::Value as SerdeValue; use tokio::sync::Semaphore; -use crate::actors::envelopes::{EnvelopeManager, SendEnvelope, SendEnvelopeError, SubmitEnvelope}; -use crate::actors::outcome::{DiscardReason, Outcome, TrackOutcome}; -use crate::actors::project::{Feature, ProjectState}; -use crate::actors::project_cache::ProjectCache; -use crate::actors::upstream::{SendRequest, UpstreamRelay}; -use crate::envelope::{AttachmentType, ContentType, Envelope, Item, ItemType}; -use crate::metrics_extraction::sessions::{extract_session_metrics, SessionMetricsConfig}; -use crate::metrics_extraction::transactions::extract_transaction_metrics; -use crate::service::{ServerError, REGISTRY}; -use crate::statsd::{RelayCounters, RelayHistograms, RelayTimers}; -use crate::utils::{ - self, ChunkedFormDataAggregator, EnvelopeContext, ErrorBoundary, FormDataIter, SamplingResult, -}; use relay_auth::RelayVersion; use relay_common::{ProjectId, ProjectKey, UnixTimestamp}; use relay_config::{Config, HttpEncoding}; @@ -51,13 +38,29 @@ use relay_sampling::{DynamicSamplingContext, RuleId}; use relay_statsd::metric; use relay_system::{Addr, FromMessage, NoResponse, Service}; +use crate::actors::envelopes::{EnvelopeManager, SendEnvelope, SendEnvelopeError, SubmitEnvelope}; +use crate::actors::outcome::{DiscardReason, Outcome, TrackOutcome}; +use crate::actors::project::{Feature, ProjectState}; +use crate::actors::project_cache::ProjectCache; +use crate::actors::upstream::{SendRequest, UpstreamRelay}; +use crate::envelope::{AttachmentType, ContentType, Envelope, Item, ItemType}; +use crate::metrics_extraction::sessions::{extract_session_metrics, SessionMetricsConfig}; +use crate::metrics_extraction::transactions::extract_transaction_metrics; +use crate::service::{ServerError, REGISTRY}; +use crate::statsd::{RelayCounters, RelayHistograms, RelayTimers}; +use crate::utils::{ + self, ChunkedFormDataAggregator, EnvelopeContext, ErrorBoundary, FormDataIter, SamplingResult, +}; + #[cfg(feature = "processing")] use { + crate::actors::envelopes::SendMetrics, crate::actors::project_cache::UpdateRateLimits, crate::service::ServerErrorKind, crate::utils::{BucketLimiter, EnvelopeLimiter}, failure::ResultExt, relay_general::store::{GeoIpLookup, StoreConfig, StoreProcessor}, + relay_quotas::ItemScoping, relay_quotas::{RateLimitingError, RedisRateLimiter}, symbolic_unreal::{Unreal4Error, Unreal4ErrorKind}, }; @@ -2185,16 +2188,12 @@ impl EnvelopeProcessorService { /// Check and apply rate limits to metrics buckets. #[cfg(feature = "processing")] fn handle_rate_limit_flush_buckets(&self, message: RateLimitFlushBuckets) { - use relay_quotas::ItemScoping; - - use crate::actors::envelopes::SendMetrics; - let RateLimitFlushBuckets { - bucket_limiter: mut enforcement, + mut bucket_limiter, partition_key, } = message; - let scoping = *enforcement.scoping(); + let scoping = *bucket_limiter.scoping(); if let Some(rate_limiter) = self.rate_limiter.as_ref() { let item_scoping = ItemScoping { @@ -2205,13 +2204,13 @@ impl EnvelopeProcessorService { // calls with quantity=0 to be rate limited. let over_accept_once = true; let rate_limits = rate_limiter.is_rate_limited( - enforcement.quotas(), + bucket_limiter.quotas(), item_scoping, - enforcement.transaction_count(), + bucket_limiter.transaction_count(), over_accept_once, ); - let was_enforced = enforcement.enforce_limits(rate_limits.as_ref().map_err(|_| ())); + let was_enforced = bucket_limiter.enforce_limits(rate_limits.as_ref().map_err(|_| ())); if was_enforced { if let Ok(limits) = rate_limits { @@ -2222,7 +2221,7 @@ impl EnvelopeProcessorService { } } - let buckets = enforcement.into_buckets(); + let buckets = bucket_limiter.into_buckets(); if !buckets.is_empty() { // Forward buckets to envelope manager to send them to upstream or kafka: EnvelopeManager::from_registry().send(SendMetrics { diff --git a/relay-server/src/actors/project_cache.rs b/relay-server/src/actors/project_cache.rs index 1e7aaae0da4..34d7fcdc7a1 100644 --- a/relay-server/src/actors/project_cache.rs +++ b/relay-server/src/actors/project_cache.rs @@ -6,13 +6,6 @@ use actix_web::ResponseError; use failure::Fail; use futures01::{future, Future}; -#[cfg(feature = "processing")] -use { - crate::actors::processor::{EnvelopeProcessor, RateLimitFlushBuckets}, - crate::actors::project_redis::RedisProjectSource, - relay_common::clone, -}; - use relay_common::ProjectKey; use relay_config::{Config, RelayMode}; use relay_metrics::{self, AggregateMetricsError, FlushBuckets, InsertMetrics, MergeBuckets}; @@ -33,6 +26,13 @@ use crate::service::Registry; use crate::statsd::{RelayCounters, RelayGauges, RelayHistograms, RelayTimers}; use crate::utils::{self, BucketLimiter, EnvelopeContext, GarbageDisposal, Response}; +#[cfg(feature = "processing")] +use { + crate::actors::processor::{EnvelopeProcessor, RateLimitFlushBuckets}, + crate::actors::project_redis::RedisProjectSource, + relay_common::clone, +}; + #[derive(Fail, Debug)] pub enum ProjectError { #[fail(display = "failed to fetch project state from upstream")] From 8d5f406261af7cd2c2fdc759b23e24f0c6a77e89 Mon Sep 17 00:00:00 2001 From: Joris Bayer Date: Fri, 14 Oct 2022 14:14:30 +0200 Subject: [PATCH 33/34] test: Try to unflake --- tests/integration/fixtures/processing.py | 9 +++++++++ tests/integration/test_store.py | 18 ++++++------------ 2 files changed, 15 insertions(+), 12 deletions(-) diff --git a/tests/integration/fixtures/processing.py b/tests/integration/fixtures/processing.py index 9777736b9fd..99dc46b9623 100644 --- a/tests/integration/fixtures/processing.py +++ b/tests/integration/fixtures/processing.py @@ -298,6 +298,15 @@ def get_metric(self, timeout=None): return json.loads(message.value()) + def get_metrics(self, timeout=None, max_attempts=100): + for _ in range(max_attempts): + message = self.poll(timeout=timeout) + if message is None: + return + else: + assert message.error() is None + yield json.loads(message.value()) + class SessionsConsumer(ConsumerBase): def get_session(self): diff --git a/tests/integration/test_store.py b/tests/integration/test_store.py index 90ae2ae4b5d..b564cb7f066 100644 --- a/tests/integration/test_store.py +++ b/tests/integration/test_store.py @@ -7,7 +7,7 @@ import socket import threading import pytest -from datetime import datetime +from datetime import datetime, timedelta from requests.exceptions import HTTPError from flask import abort, Response @@ -270,12 +270,12 @@ def test_store_not_normalized(mini_sentry, relay): def make_transaction(event): - now = datetime.datetime.utcnow() + now = datetime.utcnow() event.update( { "type": "transaction", "timestamp": now.isoformat(), - "start_timestamp": (now - datetime.timedelta(seconds=2)).isoformat(), + "start_timestamp": (now - timedelta(seconds=2)).isoformat(), "spans": [], "contexts": { "trace": { @@ -542,7 +542,7 @@ def make_bucket(name, type_, values): def send_buckets(buckets): relay.send_metrics_buckets(project_id, buckets) - sleep(0.1) # give relay time to flush the buckets + sleep(0.2) # NOTE: Sending these buckets in multiple envelopes because the order of flushing # and also the order of rate limiting is not deterministic. @@ -589,16 +589,10 @@ def send_buckets(buckets): ], ) - # Expect 2 of 9 buckets to be dropped: - num_expected_buckets = 7 - - produced_buckets = [ - metrics_consumer.get_metric(timeout=1) for _ in range(num_expected_buckets) - ] - metrics_consumer.assert_empty() + produced_buckets = list(metrics_consumer.get_metrics(timeout=2)) # Sort buckets to prevent ordering flakiness: - produced_buckets.sort(key=lambda b: b["name"]) + produced_buckets.sort(key=lambda b: (b["name"], b["value"])) for bucket in produced_buckets: del bucket["timestamp"] From aa9359139e50e305cf25dbec93deca142a3b879e Mon Sep 17 00:00:00 2001 From: Joris Bayer Date: Fri, 14 Oct 2022 14:36:46 +0200 Subject: [PATCH 34/34] test: unflake, attempt #2 --- tests/integration/test_store.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/integration/test_store.py b/tests/integration/test_store.py index b564cb7f066..9e162121f5b 100644 --- a/tests/integration/test_store.py +++ b/tests/integration/test_store.py @@ -589,7 +589,7 @@ def send_buckets(buckets): ], ) - produced_buckets = list(metrics_consumer.get_metrics(timeout=2)) + produced_buckets = list(metrics_consumer.get_metrics(timeout=4)) # Sort buckets to prevent ordering flakiness: produced_buckets.sort(key=lambda b: (b["name"], b["value"]))