Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix(server): Rate limit outcomes emited only for events on "fast-path" #809

Merged
merged 6 commits into from
Oct 15, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,12 +13,12 @@
- Fix issue where `$span` would not be recognized in Advanced Data Scrubbing. ([#781](https://github.com/getsentry/relay/pull/781))
- Accept big-endian minidumps. ([#789](https://github.com/getsentry/relay/pull/789))
- Detect network outages and retry sending events instead of dropping them. ([#788](https://github.com/getsentry/relay/pull/788))
- Rate limit outcomes emitted only for events. ([#806](https://github.com/getsentry/relay/pull/806))

**Internal**:

- Project states are now cached separately per DSN public key instead of per project ID. This means that there will be multiple separate cache entries for projects with more than one DSN. ([#778](https://github.com/getsentry/relay/pull/778))
- Relay no longer uses the Sentry endpoint to resolve project IDs by public key. Ingestion for the legacy store endpoint has been refactored to rely on key-based caches only. As a result, the legacy endpoint is supported only on managed Relays. ([#800](https://github.com/getsentry/relay/pull/800))
- Fix rate limit outcomes, now emitted only for error events but not transactions. ([#806](https://github.com/getsentry/relay/pull/806), [#809](https://github.com/getsentry/relay/pull/809))

## 20.9.0

Expand Down
77 changes: 77 additions & 0 deletions relay-quotas/src/rate_limit.rs
Original file line number Diff line number Diff line change
Expand Up @@ -286,6 +286,21 @@ impl RateLimits {
pub fn longest(&self) -> Option<&RateLimit> {
self.iter().max_by_key(|limit| limit.retry_after)
}

/// Returns the longest rate limit that is error releated.
///
/// The most relevant rate limit from the point of view of an error generating an outcome
/// is the longest rate limit for error messages.
pub fn longest_error(&self) -> Option<&RateLimit> {
let is_event_related = |rate_limit: &&RateLimit| {
rate_limit.categories.is_empty()
|| rate_limit.categories.iter().any(|cat| cat.is_error())
};

self.iter()
.filter(is_event_related)
.max_by_key(|limit| limit.retry_after)
}
}

/// Immutable rate limits iterator.
Expand Down Expand Up @@ -651,6 +666,68 @@ mod tests {
"###);
}

#[test]
fn test_rate_limits_longest_error_none() {
let mut rate_limits = RateLimits::new();

rate_limits.add(RateLimit {
categories: smallvec![DataCategory::Transaction],
scope: RateLimitScope::Organization(42),
reason_code: None,
retry_after: RetryAfter::from_secs(1),
});
rate_limits.add(RateLimit {
categories: smallvec![DataCategory::Attachment],
scope: RateLimitScope::Organization(42),
reason_code: None,
retry_after: RetryAfter::from_secs(1),
});
rate_limits.add(RateLimit {
categories: smallvec![DataCategory::Session],
scope: RateLimitScope::Organization(42),
reason_code: None,
retry_after: RetryAfter::from_secs(1),
});

// only non event rate limits so nothing relevant
assert_eq!(rate_limits.longest_error(), None)
}

#[test]
fn test_rate_limits_longest_error() {
let mut rate_limits = RateLimits::new();
rate_limits.add(RateLimit {
categories: smallvec![DataCategory::Transaction],
scope: RateLimitScope::Organization(40),
reason_code: None,
retry_after: RetryAfter::from_secs(100),
});
rate_limits.add(RateLimit {
categories: smallvec![DataCategory::Error],
scope: RateLimitScope::Organization(41),
reason_code: None,
retry_after: RetryAfter::from_secs(5),
});
rate_limits.add(RateLimit {
categories: smallvec![DataCategory::Error],
scope: RateLimitScope::Organization(42),
reason_code: None,
retry_after: RetryAfter::from_secs(7),
});

let rate_limit = rate_limits.longest().unwrap();
insta::assert_ron_snapshot!(rate_limit, @r###"
RateLimit(
categories: [
transaction,
],
scope: Organization(40),
reason_code: None,
retry_after: RetryAfter(100),
)
"###);
}

#[test]
fn test_rate_limits_clean_expired() {
let mut rate_limits = RateLimits::new();
Expand Down
85 changes: 4 additions & 81 deletions relay-server/src/actors/events.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ use relay_general::protocol::{
};
use relay_general::store::ClockDriftProcessor;
use relay_general::types::{Annotated, Array, Object, ProcessingAction, Value};
use relay_quotas::{RateLimit, RateLimits};
use relay_quotas::RateLimits;
use relay_redis::RedisPool;

use crate::actors::outcome::{DiscardReason, Outcome, OutcomeProducer, TrackOutcome};
Expand Down Expand Up @@ -142,9 +142,9 @@ impl ProcessingError {
Self::InvalidTransaction => Some(Outcome::Invalid(DiscardReason::InvalidTransaction)),
Self::DuplicateItem(_) => Some(Outcome::Invalid(DiscardReason::DuplicateItem)),
Self::NoEventPayload => Some(Outcome::Invalid(DiscardReason::NoEventPayload)),
Self::RateLimited(ref rate_limits) => {
most_relevant(rate_limits).map(|r| Outcome::RateLimited(r.reason_code.clone()))
}
Self::RateLimited(ref rate_limits) => rate_limits
.longest_error()
.map(|r| Outcome::RateLimited(r.reason_code.clone())),

// Processing-only outcomes (Sentry-internal Relays)
#[cfg(feature = "processing")]
Expand All @@ -170,21 +170,6 @@ impl ProcessingError {
}
}

/// Returns the most relevant rate limit.
///
/// The most relevant rate limit is the longest rate limit for events and if there is no
/// rate limit for events then the longest rate limit for anything else
fn most_relevant(rate_limits: &RateLimits) -> Option<&RateLimit> {
let is_event_related = |rate_limit: &&RateLimit| {
rate_limit.categories.is_empty() || rate_limit.categories.iter().any(|cat| cat.is_error())
};

rate_limits
.iter()
.filter(is_event_related)
.max_by_key(|limit| limit.retry_after)
}

type ExtractedEvent = (Annotated<Event>, usize);

/// A state container for envelope processing.
Expand Down Expand Up @@ -1626,13 +1611,9 @@ impl Handler<GetCapturedEvent> for EventManager {
#[cfg(test)]
mod tests {
use super::*;
use smallvec::smallvec;

use chrono::{DateTime, TimeZone, Utc};

use relay_common::DataCategory;
use relay_quotas::{RateLimitScope, RetryAfter};

fn create_breadcrumbs_item(breadcrumbs: &[(Option<DateTime<Utc>>, &str)]) -> Item {
let mut data = Vec::new();

Expand Down Expand Up @@ -1774,62 +1755,4 @@ mod tests {
// regression test to ensure we don't fail parsing an empty file
result.expect("event_from_attachments");
}

#[test]
/// Test that only rate limits related to events are returned
fn test_most_relevant_only_selects_event_rate_limits() {
let mut rate_limits = RateLimits::new();
rate_limits.add(RateLimit {
categories: smallvec![DataCategory::Transaction],
scope: RateLimitScope::Organization(42),
reason_code: None,
retry_after: RetryAfter::from_secs(1),
});
rate_limits.add(RateLimit {
categories: smallvec![DataCategory::Attachment],
scope: RateLimitScope::Organization(42),
reason_code: None,
retry_after: RetryAfter::from_secs(1),
});
rate_limits.add(RateLimit {
categories: smallvec![DataCategory::Session],
scope: RateLimitScope::Organization(42),
reason_code: None,
retry_after: RetryAfter::from_secs(1),
});

// only non event rate limits so nothing relevant
assert_eq!(most_relevant(&rate_limits), None)
}

#[test]
/// Test that the longest event related rate limit is returned
fn test_most_relevant_selects_longest_event_rate_limit() {
let mut rate_limits = RateLimits::new();
rate_limits.add(RateLimit {
categories: smallvec![DataCategory::Transaction],
scope: RateLimitScope::Organization(40),
reason_code: None,
retry_after: RetryAfter::from_secs(100),
});
rate_limits.add(RateLimit {
categories: smallvec![DataCategory::Error],
scope: RateLimitScope::Organization(41),
reason_code: None,
retry_after: RetryAfter::from_secs(5),
});
let longest = RateLimit {
categories: smallvec![DataCategory::Error],
scope: RateLimitScope::Organization(42),
reason_code: None,
retry_after: RetryAfter::from_secs(7),
};
rate_limits.add(longest);

let limit = most_relevant(&rate_limits);
//we do have an event rate limit
assert!(limit.is_some());
// the longest event rate limit is for org 42
assert_eq!(limit.unwrap().scope, RateLimitScope::Organization(42))
}
}
30 changes: 15 additions & 15 deletions relay-server/src/endpoints/common.rs
Original file line number Diff line number Diff line change
Expand Up @@ -79,8 +79,8 @@ pub enum BadStoreRequest {
}

impl BadStoreRequest {
fn to_outcome(&self) -> Outcome {
match self {
fn to_outcome(&self) -> Option<Outcome> {
Some(match self {
BadStoreRequest::UnsupportedProtocolVersion(_) => {
Outcome::Invalid(DiscardReason::AuthVersion)
}
Expand Down Expand Up @@ -119,16 +119,14 @@ impl BadStoreRequest {
}

BadStoreRequest::RateLimited(rate_limits) => {
let reason_code = rate_limits
.longest()
.and_then(|limit| limit.reason_code.clone());

Outcome::RateLimited(reason_code)
return rate_limits
.longest_error()
.map(|r| Outcome::RateLimited(r.reason_code.clone()));
}

// should actually never create an outcome
BadStoreRequest::InvalidEventId => Outcome::Invalid(DiscardReason::Internal),
}
})
}
}

Expand Down Expand Up @@ -457,13 +455,15 @@ where
metric!(counter(RelayCounters::EnvelopeRejected) += 1);

if is_event {
outcome_producer.do_send(TrackOutcome {
timestamp: start_time,
scoping: *scoping.borrow(),
outcome: error.to_outcome(),
event_id: *event_id.borrow(),
remote_addr,
});
if let Some(outcome) = error.to_outcome() {
outcome_producer.do_send(TrackOutcome {
timestamp: start_time,
scoping: *scoping.borrow(),
outcome,
event_id: *event_id.borrow(),
remote_addr,
});
}
}

if !emit_rate_limit && matches!(error, BadStoreRequest::RateLimited(_)) {
RaduW marked this conversation as resolved.
Show resolved Hide resolved
Expand Down
3 changes: 2 additions & 1 deletion tests/integration/test_store.py
Original file line number Diff line number Diff line change
Expand Up @@ -550,7 +550,8 @@ def test_processing_quotas(
retry_after2, rest = headers["x-sentry-rate-limits"].split(":", 1)
assert int(retry_after2) == int(retry_after)
assert rest == "%s:key" % category
outcomes_consumer.assert_rate_limited("get_lost", key_id=key_id)
if generates_outcomes:
outcomes_consumer.assert_rate_limited("get_lost", key_id=key_id)

relay.dsn_public_key = second_key["publicKey"]

Expand Down