From 4cf5da72a64bd27cafba4acea068356e47f5e49b Mon Sep 17 00:00:00 2001 From: Radu Woinaroski Date: Mon, 24 Aug 2020 15:00:56 +0200 Subject: [PATCH 1/8] re-authenticate at regular intervals --- relay-server/src/actors/upstream.rs | 39 ++++++++++++++++++++++++----- 1 file changed, 33 insertions(+), 6 deletions(-) diff --git a/relay-server/src/actors/upstream.rs b/relay-server/src/actors/upstream.rs index e5b28f8eb9..1c1f9d08a8 100644 --- a/relay-server/src/actors/upstream.rs +++ b/relay-server/src/actors/upstream.rs @@ -22,6 +22,7 @@ use std::collections::VecDeque; use std::fmt; use std::str; use std::sync::Arc; +use std::time::{Duration, Instant}; use ::actix::fut; use ::actix::prelude::*; @@ -32,6 +33,7 @@ use actix_web::{Error as ActixError, HttpMessage}; use failure::Fail; use futures::{future, prelude::*, sync::oneshot}; use itertools::Itertools; +use lazy_static::lazy_static; use serde::de::DeserializeOwned; use serde::ser::Serialize; @@ -45,6 +47,21 @@ use relay_quotas::{ use crate::metrics::RelayHistograms; use crate::utils::{self, ApiErrorResponse, IntoTracked, TrackedFutureFinished}; +/// How often should re authentication with upstream be done (in seconds) +const AUTHENTICATION_INTERVAL: u64 = 60; +/// What is the amount of time from a successful authentication after which we +/// loose the IsAuthenticated state (if no successful re authentication happens) +const AUTHENTICATION_VALIDITY_INTERVAL: u64 = AUTHENTICATION_INTERVAL + 20; + +lazy_static! { + /// Duration after which a re-authentication should be retried + static ref RE_AUTHENTICATION_DURATION: Duration = Duration::from_secs(AUTHENTICATION_INTERVAL); + /// Duration after the last successful authentication after which we loose the + /// authenticated state ( if we didn't manage to reauthenticate) + static ref AUTHENTICATION_VALIDITY_DURATION: Duration = + Duration::from_secs(AUTHENTICATION_VALIDITY_INTERVAL); +} + #[derive(Fail, Debug)] pub enum UpstreamRequestError { #[fail(display = "attempted to send request while not yet authenticated")] @@ -79,8 +96,6 @@ pub enum UpstreamRequestError { #[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)] enum AuthState { Unknown, - RegisterRequestChallenge, - RegisterChallengeResponse, Registered, Error, } @@ -195,6 +210,7 @@ struct UpstreamRequest { pub struct UpstreamRelay { backoff: RetryBackoff, + last_authentication: Option, config: Arc, auth_state: AuthState, max_inflight_requests: usize, @@ -262,6 +278,7 @@ impl UpstreamRelay { num_inflight_requests: 0, high_prio_requests: VecDeque::new(), low_prio_requests: VecDeque::new(), + last_authentication: None, } } @@ -466,7 +483,6 @@ impl Handler for UpstreamRelay { self.config.upstream_descriptor() ); - self.auth_state = AuthState::RegisterRequestChallenge; let request = RegisterRequest::new(&credentials.id, &credentials.public_key); let future = self @@ -474,7 +490,6 @@ impl Handler for UpstreamRelay { .into_actor(self) .and_then(|challenge, slf, ctx| { log::debug!("got register challenge (token = {})", challenge.token()); - slf.auth_state = AuthState::RegisterChallengeResponse; let challenge_response = challenge.create_response(); log::debug!("sending register challenge response"); @@ -482,13 +497,25 @@ impl Handler for UpstreamRelay { ctx.notify(PumpHttpMessageQueue); fut }) - .map(|_, slf, _ctx| { + .map(|_, slf, ctx| { log::info!("relay successfully registered with upstream"); slf.auth_state = AuthState::Registered; + slf.backoff.reset(); + slf.last_authentication = Some(Instant::now()); + ctx.notify_later(Authenticate, *RE_AUTHENTICATION_DURATION); }) .map_err(|err, slf, ctx| { log::error!("authentication encountered error: {}", LogError(&err)); - slf.auth_state = AuthState::Error; + + if let Some(last_authentication) = slf.last_authentication { + if last_authentication + *AUTHENTICATION_VALIDITY_DURATION < Instant::now() { + // we are past our grace period we can no longer claim we are authenticated + slf.auth_state = AuthState::Error; + } + } else { + // we never managed to authenticate and now we just got an error + slf.auth_state = AuthState::Error; + } // Do not retry client errors including authentication failures since client errors // are usually permanent. This allows the upstream to reject unsupported Relays From 2ae4c818e28fdbecf6eca5ecbd0f3ddccf1753ba Mon Sep 17 00:00:00 2001 From: Radu Woinaroski Date: Mon, 24 Aug 2020 17:06:14 +0200 Subject: [PATCH 2/8] made authentication intervals configurable. --- CHANGELOG.md | 1 + relay-config/src/config.rs | 20 +++++++++++++++++++ relay-server/src/actors/upstream.rs | 30 +++++++++++------------------ 3 files changed, 32 insertions(+), 19 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 0773f0cde6..393dc7eef6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -32,6 +32,7 @@ - Report all Kafka producer errors to Sentry. Previously, only immediate errors were reported but not those during asynchronous flushing of messages. ([#677](https://github.com/getsentry/relay/pull/677)) - Add "HubSpot Crawler" to the list of web crawlers for inbound filters. ([#693](https://github.com/getsentry/relay/pull/693)) - Improved typing for span data of transaction events, no breaking changes. ([#713](https://github.com/getsentry/relay/pull/713)) +- Add periodic re-authentication with the Upstream server, previously there was only one initial authentication. ([#731](https://github.com/getsentry/relay/pull/731)) ## 20.7.2 diff --git a/relay-config/src/config.rs b/relay-config/src/config.rs index add7373170..60a0f6411b 100644 --- a/relay-config/src/config.rs +++ b/relay-config/src/config.rs @@ -354,6 +354,12 @@ pub struct Relay { pub tls_identity_path: Option, /// Password for the PKCS12 archive. pub tls_identity_password: Option, + /// The number of seconds after a successful authentication after which + /// a Relay tries to re-authenticate with the upstream server. + pub upstream_reauthentication_interval: u64, + /// The number of seconds allowed for a Relay to re-authenticate after which + /// it is considered that re-authentication has failed. + pub upstream_reauthentication_grace_period: u64, } impl Default for Relay { @@ -366,6 +372,8 @@ impl Default for Relay { tls_port: None, tls_identity_path: None, tls_identity_password: None, + upstream_reauthentication_interval: 60, + upstream_reauthentication_grace_period: 20, } } } @@ -1100,6 +1108,18 @@ impl Config { self.values.relay.tls_identity_password.as_deref() } + /// Returns the interval at which Realy should try to re-authenticate with the upstream + pub fn upstream_reathentication_interval(&self) -> Duration { + Duration::from_secs(self.values.relay.upstream_reauthentication_interval) + } + + /// The maximum amount of time that a Relay is allowed to take to re-authenticate with + /// the upstream after which it is declared as un-authenticated (if it is not able to + /// authenticate). + pub fn upstream_reauthentication_grace_period(&self) -> Duration { + Duration::from_secs(self.values.relay.upstream_reauthentication_grace_period) + } + /// Returns whether this Relay should emit outcomes. /// /// This is `true` either if `outcomes.emit_outcomes` is explicitly enabled, or if this Relay is diff --git a/relay-server/src/actors/upstream.rs b/relay-server/src/actors/upstream.rs index 1c1f9d08a8..ae56f61780 100644 --- a/relay-server/src/actors/upstream.rs +++ b/relay-server/src/actors/upstream.rs @@ -33,7 +33,6 @@ use actix_web::{Error as ActixError, HttpMessage}; use failure::Fail; use futures::{future, prelude::*, sync::oneshot}; use itertools::Itertools; -use lazy_static::lazy_static; use serde::de::DeserializeOwned; use serde::ser::Serialize; @@ -47,21 +46,6 @@ use relay_quotas::{ use crate::metrics::RelayHistograms; use crate::utils::{self, ApiErrorResponse, IntoTracked, TrackedFutureFinished}; -/// How often should re authentication with upstream be done (in seconds) -const AUTHENTICATION_INTERVAL: u64 = 60; -/// What is the amount of time from a successful authentication after which we -/// loose the IsAuthenticated state (if no successful re authentication happens) -const AUTHENTICATION_VALIDITY_INTERVAL: u64 = AUTHENTICATION_INTERVAL + 20; - -lazy_static! { - /// Duration after which a re-authentication should be retried - static ref RE_AUTHENTICATION_DURATION: Duration = Duration::from_secs(AUTHENTICATION_INTERVAL); - /// Duration after the last successful authentication after which we loose the - /// authenticated state ( if we didn't manage to reauthenticate) - static ref AUTHENTICATION_VALIDITY_DURATION: Duration = - Duration::from_secs(AUTHENTICATION_VALIDITY_INTERVAL); -} - #[derive(Fail, Debug)] pub enum UpstreamRequestError { #[fail(display = "attempted to send request while not yet authenticated")] @@ -219,6 +203,11 @@ pub struct UpstreamRelay { high_prio_requests: VecDeque, // low priority request queue low_prio_requests: VecDeque, + // interval after which an authenticated Relay tries to re-authenticate + reauthentication_interval: Duration, + // interval after which a previously authenticated Relay is considered to not be + // authenticated anymore. + max_reauthentication_interval: Duration, } /// Handles a response returned from the upstream. @@ -273,12 +262,15 @@ impl UpstreamRelay { UpstreamRelay { backoff: RetryBackoff::new(config.http_max_retry_interval()), max_inflight_requests: config.max_concurrent_requests(), - config, auth_state: AuthState::Unknown, num_inflight_requests: 0, high_prio_requests: VecDeque::new(), low_prio_requests: VecDeque::new(), last_authentication: None, + reauthentication_interval: config.upstream_reathentication_interval(), + max_reauthentication_interval: config.upstream_reathentication_interval() + + config.upstream_reauthentication_grace_period(), + config, } } @@ -502,13 +494,13 @@ impl Handler for UpstreamRelay { slf.auth_state = AuthState::Registered; slf.backoff.reset(); slf.last_authentication = Some(Instant::now()); - ctx.notify_later(Authenticate, *RE_AUTHENTICATION_DURATION); + ctx.notify_later(Authenticate, slf.reauthentication_interval); }) .map_err(|err, slf, ctx| { log::error!("authentication encountered error: {}", LogError(&err)); if let Some(last_authentication) = slf.last_authentication { - if last_authentication + *AUTHENTICATION_VALIDITY_DURATION < Instant::now() { + if last_authentication + slf.max_reauthentication_interval < Instant::now() { // we are past our grace period we can no longer claim we are authenticated slf.auth_state = AuthState::Error; } From 0ad53d1b672c0973a29efd1f396fced0e2b20e84 Mon Sep 17 00:00:00 2001 From: Radu Woinaroski <5281987+RaduW@users.noreply.github.com> Date: Wed, 26 Aug 2020 13:34:23 +0200 Subject: [PATCH 3/8] Update relay-config/src/config.rs Co-authored-by: Jan Michael Auer --- relay-config/src/config.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/relay-config/src/config.rs b/relay-config/src/config.rs index 60a0f6411b..ed1d3e3b82 100644 --- a/relay-config/src/config.rs +++ b/relay-config/src/config.rs @@ -1109,7 +1109,7 @@ impl Config { } /// Returns the interval at which Realy should try to re-authenticate with the upstream - pub fn upstream_reathentication_interval(&self) -> Duration { + pub fn upstream_reauthentication_interval(&self) -> Duration { Duration::from_secs(self.values.relay.upstream_reauthentication_interval) } From c94a796798643531a2488c9bce66a6c5a7778506 Mon Sep 17 00:00:00 2001 From: Jan Michael Auer Date: Wed, 26 Aug 2020 14:32:48 +0200 Subject: [PATCH 4/8] WIP: Only apply grace period to network errors --- relay-config/src/config.rs | 24 ++++++++--------- relay-server/src/actors/upstream.rs | 40 ++++++++++++++--------------- 2 files changed, 31 insertions(+), 33 deletions(-) diff --git a/relay-config/src/config.rs b/relay-config/src/config.rs index ed1d3e3b82..af66058612 100644 --- a/relay-config/src/config.rs +++ b/relay-config/src/config.rs @@ -354,12 +354,6 @@ pub struct Relay { pub tls_identity_path: Option, /// Password for the PKCS12 archive. pub tls_identity_password: Option, - /// The number of seconds after a successful authentication after which - /// a Relay tries to re-authenticate with the upstream server. - pub upstream_reauthentication_interval: u64, - /// The number of seconds allowed for a Relay to re-authenticate after which - /// it is considered that re-authentication has failed. - pub upstream_reauthentication_grace_period: u64, } impl Default for Relay { @@ -372,8 +366,6 @@ impl Default for Relay { tls_port: None, tls_identity_path: None, tls_identity_password: None, - upstream_reauthentication_interval: 60, - upstream_reauthentication_grace_period: 20, } } } @@ -532,6 +524,12 @@ struct Http { max_retry_interval: u32, /// The custom HTTP Host header to send to the upstream. host_header: Option, + /// The number of seconds after a successful authentication after which + /// a Relay tries to re-authenticate with the upstream server. + auth_interval: u64, + /// The number of seconds allowed for a Relay to re-authenticate after which + /// it is considered that re-authentication has failed. + auth_grace_period: u64, } impl Default for Http { @@ -541,6 +539,8 @@ impl Default for Http { connection_timeout: 3, max_retry_interval: 60, host_header: None, + auth_interval: 60, + auth_grace_period: 10, } } } @@ -1109,15 +1109,15 @@ impl Config { } /// Returns the interval at which Realy should try to re-authenticate with the upstream - pub fn upstream_reauthentication_interval(&self) -> Duration { - Duration::from_secs(self.values.relay.upstream_reauthentication_interval) + pub fn http_auth_interval(&self) -> Duration { + Duration::from_secs(self.values.http.auth_interval) } /// The maximum amount of time that a Relay is allowed to take to re-authenticate with /// the upstream after which it is declared as un-authenticated (if it is not able to /// authenticate). - pub fn upstream_reauthentication_grace_period(&self) -> Duration { - Duration::from_secs(self.values.relay.upstream_reauthentication_grace_period) + pub fn http_auth_grace_period(&self) -> Duration { + Duration::from_secs(self.values.http.auth_grace_period) } /// Returns whether this Relay should emit outcomes. diff --git a/relay-server/src/actors/upstream.rs b/relay-server/src/actors/upstream.rs index ae56f61780..cbfc14fc48 100644 --- a/relay-server/src/actors/upstream.rs +++ b/relay-server/src/actors/upstream.rs @@ -22,7 +22,7 @@ use std::collections::VecDeque; use std::fmt; use std::str; use std::sync::Arc; -use std::time::{Duration, Instant}; +use std::time::Instant; use ::actix::fut; use ::actix::prelude::*; @@ -76,6 +76,12 @@ pub enum UpstreamRequestError { ChannelClosed, } +impl UpstreamRequestError { + fn is_network_error(&self) -> bool { + matches!(self, Self::SendFailed(_) | Self::PayloadFailed(_)) + } +} + /// Represents the current auth state. #[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)] enum AuthState { @@ -194,7 +200,7 @@ struct UpstreamRequest { pub struct UpstreamRelay { backoff: RetryBackoff, - last_authentication: Option, + first_error: Option, config: Arc, auth_state: AuthState, max_inflight_requests: usize, @@ -203,11 +209,6 @@ pub struct UpstreamRelay { high_prio_requests: VecDeque, // low priority request queue low_prio_requests: VecDeque, - // interval after which an authenticated Relay tries to re-authenticate - reauthentication_interval: Duration, - // interval after which a previously authenticated Relay is considered to not be - // authenticated anymore. - max_reauthentication_interval: Duration, } /// Handles a response returned from the upstream. @@ -266,10 +267,7 @@ impl UpstreamRelay { num_inflight_requests: 0, high_prio_requests: VecDeque::new(), low_prio_requests: VecDeque::new(), - last_authentication: None, - reauthentication_interval: config.upstream_reathentication_interval(), - max_reauthentication_interval: config.upstream_reathentication_interval() - + config.upstream_reauthentication_grace_period(), + first_error: None, config, } } @@ -460,7 +458,7 @@ impl Message for Authenticate { /// message was successfully handled). /// /// **Note:** Relay has retry functionality, outside this actor, that periodically sends Authenticate -/// messages until successful Authentication with the upstream server was achieved. +/// messages until successful Authentication with the upstream server was achieved. impl Handler for UpstreamRelay { type Result = ResponseActFuture; @@ -492,20 +490,20 @@ impl Handler for UpstreamRelay { .map(|_, slf, ctx| { log::info!("relay successfully registered with upstream"); slf.auth_state = AuthState::Registered; + slf.backoff.reset(); - slf.last_authentication = Some(Instant::now()); - ctx.notify_later(Authenticate, slf.reauthentication_interval); + slf.first_error = None; + + ctx.notify_later(Authenticate, slf.config.http_auth_interval()); }) .map_err(|err, slf, ctx| { log::error!("authentication encountered error: {}", LogError(&err)); - if let Some(last_authentication) = slf.last_authentication { - if last_authentication + slf.max_reauthentication_interval < Instant::now() { - // we are past our grace period we can no longer claim we are authenticated - slf.auth_state = AuthState::Error; - } - } else { - // we never managed to authenticate and now we just got an error + let now = Instant::now(); + let first_error = *slf.first_error.get_or_insert(now); + if !err.is_network_error() + || first_error + slf.config.http_auth_grace_period() < now + { slf.auth_state = AuthState::Error; } From b23aacfa17518ab6681a11e97c61db82b31e0482 Mon Sep 17 00:00:00 2001 From: Jan Michael Auer Date: Wed, 26 Aug 2020 14:52:43 +0200 Subject: [PATCH 5/8] ref: Move network errors into helper functions --- relay-server/src/actors/upstream.rs | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/relay-server/src/actors/upstream.rs b/relay-server/src/actors/upstream.rs index cbfc14fc48..e09b1bb3aa 100644 --- a/relay-server/src/actors/upstream.rs +++ b/relay-server/src/actors/upstream.rs @@ -280,6 +280,17 @@ impl UpstreamRelay { } } + fn handle_network_error(&mut self) { + let now = Instant::now(); + let first_error = *self.first_error.get_or_insert(now); + if first_error + self.config.http_auth_grace_period() > now { + return; + } + + self.auth_state = AuthState::Error; + // TODO: initiate authentication loop if not already running + } + fn send_request( &mut self, request: UpstreamRequest, @@ -499,11 +510,9 @@ impl Handler for UpstreamRelay { .map_err(|err, slf, ctx| { log::error!("authentication encountered error: {}", LogError(&err)); - let now = Instant::now(); - let first_error = *slf.first_error.get_or_insert(now); - if !err.is_network_error() - || first_error + slf.config.http_auth_grace_period() < now - { + if err.is_network_error() { + slf.handle_network_error(); + } else { slf.auth_state = AuthState::Error; } From 3145e7fdd03092594c6e8fd5b1bfa3882435ac76 Mon Sep 17 00:00:00 2001 From: Jan Michael Auer Date: Wed, 26 Aug 2020 14:59:38 +0200 Subject: [PATCH 6/8] meta: Move changelog entry to unreleased section --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 393dc7eef6..3132bb1f00 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,7 @@ **Features**: - Support chunked formdata keys for event payloads on the Minidump endpoint. Since crashpad has a limit for the length of custom attributes, the sentry event payload can be split up into `sentry__1`, `sentry__2`, etc. ([#721](https://github.com/getsentry/relay/pull/721)) +- Add periodic re-authentication with the upstream server, previously there was only one initial authentication. ([#731](https://github.com/getsentry/relay/pull/731)) **Internal**: @@ -32,7 +33,6 @@ - Report all Kafka producer errors to Sentry. Previously, only immediate errors were reported but not those during asynchronous flushing of messages. ([#677](https://github.com/getsentry/relay/pull/677)) - Add "HubSpot Crawler" to the list of web crawlers for inbound filters. ([#693](https://github.com/getsentry/relay/pull/693)) - Improved typing for span data of transaction events, no breaking changes. ([#713](https://github.com/getsentry/relay/pull/713)) -- Add periodic re-authentication with the Upstream server, previously there was only one initial authentication. ([#731](https://github.com/getsentry/relay/pull/731)) ## 20.7.2 From 996fae90a7b0ea7461f91a45c606bb653f5dc486 Mon Sep 17 00:00:00 2001 From: Jan Michael Auer Date: Wed, 26 Aug 2020 15:18:18 +0200 Subject: [PATCH 7/8] ref: Add 502 and 503 to network errors --- relay-server/src/actors/upstream.rs | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/relay-server/src/actors/upstream.rs b/relay-server/src/actors/upstream.rs index e09b1bb3aa..3d87460ada 100644 --- a/relay-server/src/actors/upstream.rs +++ b/relay-server/src/actors/upstream.rs @@ -78,7 +78,11 @@ pub enum UpstreamRequestError { impl UpstreamRequestError { fn is_network_error(&self) -> bool { - matches!(self, Self::SendFailed(_) | Self::PayloadFailed(_)) + match self { + Self::SendFailed(_) | Self::PayloadFailed(_) => true, + Self::ResponseError(code, _) => matches!(code.as_u16(), 502 | 503), + _ => false, + } } } From a7dbf3e2e80c907b8c4899732d55a1b39b6be2ff Mon Sep 17 00:00:00 2001 From: Radu Woinaroski Date: Thu, 27 Aug 2020 09:56:43 +0200 Subject: [PATCH 8/8] minor, also retry on 504 --- relay-server/src/actors/upstream.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/relay-server/src/actors/upstream.rs b/relay-server/src/actors/upstream.rs index 3d87460ada..dcc4d62874 100644 --- a/relay-server/src/actors/upstream.rs +++ b/relay-server/src/actors/upstream.rs @@ -80,7 +80,7 @@ impl UpstreamRequestError { fn is_network_error(&self) -> bool { match self { Self::SendFailed(_) | Self::PayloadFailed(_) => true, - Self::ResponseError(code, _) => matches!(code.as_u16(), 502 | 503), + Self::ResponseError(code, _) => matches!(code.as_u16(), 502 | 503 | 504), _ => false, } }