From 25a551bfffe04def158b1a9b06ac0acc71f6aea4 Mon Sep 17 00:00:00 2001 From: Philip Jenvey Date: Mon, 14 Mar 2022 10:09:46 -0700 Subject: [PATCH] feat: include os-family and form-factor as metric tags (#371) and kill other unused UA metric tags which have too high of cardinality anyway also tag mobile endpoint requests and update regex per new CVE Closes #369 --- Cargo.lock | 4 +- src/adm/tiles.rs | 5 ++- src/metrics.rs | 5 +-- src/server/img_storage.rs | 3 +- src/tags.rs | 72 ++++------------------------------ src/web/extractors.rs | 4 +- src/web/mod.rs | 2 +- src/web/test.rs | 81 ++++++++++++++++++++++++++++++++++----- 8 files changed, 91 insertions(+), 85 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 312eb3bd..501b5a7f 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2252,9 +2252,9 @@ dependencies = [ [[package]] name = "regex" -version = "1.5.4" +version = "1.5.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d07a8629359eb56f1e2fb1652bb04212c072a87ba68546a04065d525673ac461" +checksum = "1a11647b6b25ff05a515cb92c365cec08801e83423a235b51e231e1808747286" dependencies = [ "aho-corasick", "memchr", diff --git a/src/adm/tiles.rs b/src/adm/tiles.rs index 4fe3cf38..214beea1 100644 --- a/src/adm/tiles.rs +++ b/src/adm/tiles.rs @@ -175,9 +175,12 @@ pub async fn get_tiles( .into_string() .unwrap_or_else(|_| "Unknown".to_owned()), ); + if device_info.is_mobile() { + tags.add_tag("endpoint", "mobile"); + } tags.add_extra("adm_url", adm_url); - metrics.incr("tiles.adm.request"); + metrics.incr_with_tags("tiles.adm.request", Some(tags)); let response: AdmTileResponse = match state.settings.test_mode { crate::settings::TestModes::TestFakeResponse => { let default = HeaderValue::from_str(DEFAULT).unwrap(); diff --git a/src/metrics.rs b/src/metrics.rs index 1d7be6a4..1ad4e956 100644 --- a/src/metrics.rs +++ b/src/metrics.rs @@ -255,9 +255,8 @@ mod tests { let tags = Tags::from_head(&rh, &settings); - assert_eq!(tags.tags.get("ua.os.ver"), Some(&"NT 10.0".to_owned())); - assert_eq!(tags.tags.get("ua.os.family"), Some(&"Windows".to_owned())); - assert_eq!(tags.tags.get("ua.browser.ver"), Some(&"72.0".to_owned())); + assert_eq!(tags.tags.get("ua.os.family"), Some(&"windows".to_owned())); + assert_eq!(tags.tags.get("ua.form_factor"), Some(&"desktop".to_owned())); assert_eq!(tags.tags.get("uri.method"), Some(&"GET".to_owned())); } diff --git a/src/server/img_storage.rs b/src/server/img_storage.rs index 6583ad97..d411d6b7 100644 --- a/src/server/img_storage.rs +++ b/src/server/img_storage.rs @@ -646,8 +646,7 @@ mod tests { img_store.store(&target).await.expect("Store failed"); assert_eq!(rx.len(), 4); let spied_metrics: Vec = rx - .iter() - .take(4) + .try_iter() .map(|x| String::from_utf8(x).unwrap()) .collect(); assert_eq!( diff --git a/src/tags.rs b/src/tags.rs index 4a0447c4..4cc345f2 100644 --- a/src/tags.rs +++ b/src/tags.rs @@ -18,59 +18,8 @@ use serde::{ }; use serde_json::value::Value; use slog::{Key, Record, KV}; -use woothee::parser::{Parser, WootheeResult}; -use crate::settings::Settings; - -/* -/// List of valid user-agent attributes to keep, anything not in this -/// list is considered 'Other'. We log the user-agent on connect always -/// to retain the full string, but for DD more tags are expensive so we -/// limit to these. -/// Note: We currently limit to only Firefox UA. -// -// const VALID_UA_BROWSER: &[&str] = &["Chrome", "Firefox", "Safari", "Opera"]; -*/ - -/// See dataset.rs in https://github.com/woothee/woothee-rust for the -/// full list (WootheeResult's 'os' field may fall back to its 'name' -/// field). Windows has many values and we only care that its Windows -const VALID_UA_OS: &[&str] = &["Firefox OS", "Linux", "Mac OSX"]; - -/// Primative User Agent parser. -/// -/// We only care about a subset of the results for this (to avoid cardinality with -/// metrics and logging). -pub fn parse_user_agent(agent: &str) -> (WootheeResult<'_>, &str) { - let parser = Parser::new(); - let wresult = parser.parse(agent).unwrap_or_else(|| WootheeResult { - name: "", - category: "", - os: "", - os_version: "".into(), - browser_type: "", - version: "", - vendor: "", - }); - - // Determine a base os/browser for metrics' tags - let metrics_os = if wresult.os.starts_with("Windows") { - "Windows" - } else if VALID_UA_OS.contains(&wresult.os) { - wresult.os - } else { - "Other" - }; - // We currently limit to only Firefox UA. - /* - let metrics_browser = if VALID_UA_BROWSER.contains(&wresult.name) { - wresult.name - } else { - "Other" - }; - */ - (wresult, metrics_os) -} +use crate::{settings::Settings, web::get_device_info}; /// Tags are a set of meta information passed along with sentry errors and metrics. /// @@ -101,13 +50,6 @@ impl Serialize for Tags { } } -/// Insert a into a hashmap only if the `val` is not empty. -fn insert_if_not_empty(label: &str, val: &str, tags: &mut HashMap) { - if !val.is_empty() { - tags.insert(label.to_owned(), val.to_owned()); - } -} - impl Tags { pub fn from_head(req_head: &RequestHead, settings: &Settings) -> Self { // Return an Option<> type because the later consumers (HandlerErrors) presume that @@ -116,11 +58,13 @@ impl Tags { let mut extra = HashMap::new(); if let Some(ua) = req_head.headers().get(USER_AGENT) { if let Ok(uas) = ua.to_str() { - // if you wanted to parse out the user agent using some out-of-scope user agent parser like woothee - let (ua_result, metrics_os) = parse_user_agent(uas); - insert_if_not_empty("ua.os.family", metrics_os, &mut tags); - insert_if_not_empty("ua.os.ver", &ua_result.os_version.to_owned(), &mut tags); - insert_if_not_empty("ua.browser.ver", ua_result.version, &mut tags); + if let Ok(device_info) = get_device_info(uas) { + tags.insert("ua.os.family".to_owned(), device_info.os_family.to_string()); + tags.insert( + "ua.form_factor".to_owned(), + device_info.form_factor.to_string(), + ); + } extra.insert("ua".to_owned(), uas.to_string()); } } diff --git a/src/web/extractors.rs b/src/web/extractors.rs index d74b7bdd..e7b3542f 100644 --- a/src/web/extractors.rs +++ b/src/web/extractors.rs @@ -5,8 +5,7 @@ use actix_web::{ dev::Payload, - http::header, - http::{HeaderName, HeaderValue}, + http::{header, HeaderValue}, Error, FromRequest, HttpRequest, }; use futures::future::{self, FutureExt, LocalBoxFuture}; @@ -19,7 +18,6 @@ use crate::{ lazy_static! { static ref EMPTY_HEADER: HeaderValue = HeaderValue::from_static(""); - static ref X_FORWARDED_FOR: HeaderName = HeaderName::from_static("x-forwarded-for"); } impl FromRequest for DeviceInfo { diff --git a/src/web/mod.rs b/src/web/mod.rs index cefc2b11..ac9cc6bf 100644 --- a/src/web/mod.rs +++ b/src/web/mod.rs @@ -8,4 +8,4 @@ pub(crate) mod test; mod user_agent; pub use dockerflow::DOCKER_FLOW_ENDPOINTS; -pub use user_agent::{DeviceInfo, FormFactor, OsFamily}; +pub use user_agent::{get_device_info, DeviceInfo, FormFactor, OsFamily}; diff --git a/src/web/test.rs b/src/web/test.rs index e03a0d02..32ae15dc 100644 --- a/src/web/test.rs +++ b/src/web/test.rs @@ -8,6 +8,7 @@ use actix_web::{ http::header, http::StatusCode, middleware::errhandlers::ErrorHandlers, test, web, App, HttpRequest, HttpResponse, HttpServer, }; +use cadence::{SpyMetricSink, StatsdClient}; use futures::{channel::mpsc, StreamExt}; use serde_json::{json, Value}; use url::Url; @@ -16,7 +17,6 @@ use crate::{ adm::{AdmFilter, AdmFilterSettings, DEFAULT}, build_app, error::{HandlerError, HandlerResult}, - metrics::Metrics, server::{cache, location::location_config_from_settings, ServerState}, settings::{test_settings, Settings}, web::{dockerflow, handlers, middleware}, @@ -27,6 +27,8 @@ const UA_91: &str = "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:72.0) Gecko/20100101 Firefox/91.0"; const UA_90: &str = "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:72.0) Gecko/20100101 Firefox/90.0"; +const UA_IPHONE: &str = + "Mozilla/5.0 (iPhone; CPU iPhone OS 14_8_1 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) FxiOS/40.2 Mobile/15E148 Safari/605.1.15"; const MMDB_LOC: &str = "mmdb/GeoLite2-City-Test.mmdb"; const TEST_ADDR: &str = "216.160.83.56"; @@ -41,17 +43,19 @@ fn get_test_settings() -> Settings { } } -macro_rules! init_app { +/// Create a test application with a `SpyMetricSink` +macro_rules! init_app_with_spy { () => { async { - let settings = get_test_settings(); - init_app!(settings).await + let mut settings = get_test_settings(); + init_app_with_spy!(settings).await } }; ($settings:expr) => { async { crate::logging::init_logging(false).unwrap(); - let metrics = Metrics::sink(); + let (spy, sink) = SpyMetricSink::new(); + let metrics = &StatsdClient::builder("contile", sink).build(); let excluded_dmas = if let Some(exclude_dmas) = &$settings.exclude_dma { serde_json::from_str(exclude_dmas).expect("Invalid exclude_dma field") } else { @@ -74,11 +78,22 @@ macro_rules! init_app { }; let location_config = location_config_from_settings(&$settings, &metrics); - test::init_service(build_app!(state, location_config)).await + let service = test::init_service(build_app!(state, location_config)).await; + (service, spy) } }; } +/// Create a test application, ignoring the `SpyMetricSink` +macro_rules! init_app { + ($( $args:expr )*) => { + async { + let (app, _) = init_app_with_spy!($( $args )*).await; + app + } + } +} + struct MockAdm { pub endpoint_url: String, pub request_rx: mpsc::UnboundedReceiver, @@ -634,9 +649,7 @@ async fn include_regions() { #[actix_rt::test] async fn test_loc() { - let mut settings = get_test_settings(); - - let mut app = init_app!(settings).await; + let mut app = init_app!().await; let req = test::TestRequest::get() .uri("/__loc_test__") @@ -650,3 +663,53 @@ async fn test_loc() { assert_eq!(result["country"], "US"); assert_eq!(result["region"], "WA"); } + +#[actix_rt::test] +async fn test_metrics() { + let adm = init_mock_adm(MOCK_RESPONSE1.to_owned()); + let mut settings = Settings { + adm_endpoint_url: adm.endpoint_url, + adm_settings: json!(adm_settings()).to_string(), + ..get_test_settings() + }; + let (mut app, spy) = init_app_with_spy!(settings).await; + + let req = test::TestRequest::get() + .uri("/v1/tiles") + .header(header::USER_AGENT, UA_91) + .to_request(); + let resp = test::call_service(&mut app, req).await; + assert_eq!(resp.status(), StatusCode::OK); + + // Find all metric lines with matching prefixes + let find_metrics = |prefixes: &[&str]| -> Vec<_> { + spy.try_iter() + .filter_map(|m| { + let m = String::from_utf8(m).unwrap(); + prefixes.iter().any(|name| m.starts_with(name)).then(|| m) + }) + .collect() + }; + + let prefixes = &["contile.tiles.get:1", "contile.tiles.adm.request:1"]; + let metrics = find_metrics(prefixes); + assert_eq!(metrics.len(), 2); + let get_metric = &metrics[0]; + assert!(get_metric.contains("ua.form_factor:desktop")); + assert!(get_metric.contains("ua.os.family:windows")); + assert!(!&metrics[1].contains("endpoint:mobile")); + + let req = test::TestRequest::get() + .uri("/v1/tiles") + .header(header::USER_AGENT, UA_IPHONE) + .to_request(); + let resp = test::call_service(&mut app, req).await; + assert_eq!(resp.status(), StatusCode::OK); + + let metrics = find_metrics(prefixes); + assert_eq!(metrics.len(), 2); + let get_metric = &metrics[0]; + assert!(get_metric.contains("ua.form_factor:phone")); + assert!(get_metric.contains("ua.os.family:ios")); + assert!(&metrics[1].contains("endpoint:mobile")); +}