From 8a429095f4947c5a45f28552f804c80eb5bcfb33 Mon Sep 17 00:00:00 2001 From: Bruno Rosa Date: Tue, 20 Sep 2022 13:43:13 -0400 Subject: [PATCH 01/12] Update URL metric limit to 8k --- glean-core/src/metrics/url.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/glean-core/src/metrics/url.rs b/glean-core/src/metrics/url.rs index 8026e4ee75..979f30ec1a 100644 --- a/glean-core/src/metrics/url.rs +++ b/glean-core/src/metrics/url.rs @@ -12,7 +12,7 @@ use crate::CommonMetricData; use crate::Glean; // The maximum number of characters a URL Metric may have, before encoding. -const MAX_URL_LENGTH: usize = 2048; +const MAX_URL_LENGTH: usize = 8192; /// A URL metric. /// @@ -80,7 +80,7 @@ impl UrlMetric { return; } - let s = value.into(); + let mut s = value.into(); if s.len() > MAX_URL_LENGTH { let msg = format!( "Value length {} exceeds maximum of {}", @@ -88,7 +88,7 @@ impl UrlMetric { MAX_URL_LENGTH ); record_error(glean, &self.meta, ErrorType::InvalidOverflow, msg, None); - return; + s = s[..MAX_URL_LENGTH].to_string(); } if s.starts_with("data:") { From fefbe7aca74862e1a8e9a2afba1b176d094981ff Mon Sep 17 00:00:00 2001 From: Bruno Rosa Date: Tue, 20 Sep 2022 13:52:01 -0400 Subject: [PATCH 02/12] Update rust tests for URL metric --- glean-core/src/metrics/url.rs | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/glean-core/src/metrics/url.rs b/glean-core/src/metrics/url.rs index 979f30ec1a..c3821197b7 100644 --- a/glean-core/src/metrics/url.rs +++ b/glean-core/src/metrics/url.rs @@ -204,12 +204,24 @@ mod test { dynamic_label: None, }); - let long_path = "testing".repeat(2000); - let test_url = format!("glean://{}", long_path); + // Whenever the URL is longer than our MAX_URL_LENGTH, we truncate the URL to the + // MAX_URL_LENGTH. + // + // This 8-character string was chosen so we could have an even number that is + // a divisor of our MAX_URL_LENGTH. + let long_path = "abcdefgh"; + + // Using 2000 creates a string > 16000 characters, well over MAX_URL_LENGTH. + let test_url = format!("glean://{}", long_path.repeat(2000)); metric.set_sync(&glean, test_url); - assert!(metric.get_value(&glean, "store1").is_none()); + // "glean://" is 8 characters + // "abcdefgh" (long_path) is 8 characters + // `long_path` is repeated 1023 times (8184) + // 8 + 8184 = 8192 (MAX_URL_LENGTH) + let expected = format!("glean://{}", long_path.repeat(1023)); + assert_eq!(metric.get_value(&glean, "store1").unwrap(), expected); assert_eq!( 1, test_get_num_recorded_errors(&glean, metric.meta(), ErrorType::InvalidOverflow) From ecee222ea67d1763d7cc54d5a4d566649946d1be Mon Sep 17 00:00:00 2001 From: Bruno Rosa Date: Tue, 20 Sep 2022 13:59:49 -0400 Subject: [PATCH 03/12] Update swift tests for URL metric --- .../GleanTests/Metrics/UrlMetricTests.swift | 21 ++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) diff --git a/glean-core/ios/GleanTests/Metrics/UrlMetricTests.swift b/glean-core/ios/GleanTests/Metrics/UrlMetricTests.swift index aa0f35a9b9..f66b40e716 100644 --- a/glean-core/ios/GleanTests/Metrics/UrlMetricTests.swift +++ b/glean-core/ios/GleanTests/Metrics/UrlMetricTests.swift @@ -91,9 +91,24 @@ class UrlMetricTypeTests: XCTestCase { disabled: false )) - let host = String(repeating: "testing", count: 2000) - urlMetric.set("glean://" + host) - + // Whenever the URL is longer than our MAX_URL_LENGTH, we truncate the URL to the + // MAX_URL_LENGTH. + // + // This 8-character string was chosen so we could have an even number that is + // a divisor of our MAX_URL_LENGTH. + let longPath = "abcdefgh" + + // Using 2000 creates a string > 16000 characters, well over MAX_URL_LENGTH. + let testUrl = "glean://" + String(repeating: longPath, count: 2000) + urlMetric.set(testUrl) + + // "glean://" is 8 characters + // "abcdefgh" (long_path) is 8 characters + // `long_path` is repeated 1023 times (8184) + // 8 + 8184 = 8192 (MAX_URL_LENGTH) + let expected = "glean://" + String(repeating: longPath, count: 1023) + + XCTAssertEqual(expected, urlMetric.testGetValue()) XCTAssertEqual(1, urlMetric.testGetNumRecordedErrors(.invalidOverflow)) } From c21e81ecf3a24860b7366f742ac80e401ab96588 Mon Sep 17 00:00:00 2001 From: Bruno Rosa Date: Tue, 20 Sep 2022 14:21:08 -0400 Subject: [PATCH 04/12] Update kotlin tests for URL metric --- .../glean/private/UrlMetricTypeTest.kt | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/glean-core/android/src/test/java/mozilla/telemetry/glean/private/UrlMetricTypeTest.kt b/glean-core/android/src/test/java/mozilla/telemetry/glean/private/UrlMetricTypeTest.kt index 0411a0346b..df24662616 100644 --- a/glean-core/android/src/test/java/mozilla/telemetry/glean/private/UrlMetricTypeTest.kt +++ b/glean-core/android/src/test/java/mozilla/telemetry/glean/private/UrlMetricTypeTest.kt @@ -117,8 +117,25 @@ class UrlMetricTypeTest { ) ) - urlMetric.set("glean://" + "testing".repeat(2000)) + // Whenever the URL is longer than our MAX_URL_LENGTH, we truncate the URL to the + // MAX_URL_LENGTH. + // + // This 8-character string was chosen so we could have an even number that is + // a divisor of our MAX_URL_LENGTH. + val longPath = "abcdefgh" + val testUrl = "glean://" + longPath.repeat(2000) + + // Using 2000 creates a string > 16000 characters, well over MAX_URL_LENGTH. + urlMetric.set(testUrl) + + // "glean://" is 8 characters + // "abcdefgh" (long_path) is 8 characters + // `long_path` is repeated 1023 times (8184) + // 8 + 8184 = 8192 (MAX_URL_LENGTH) + val expected = "glean://" + longPath.repeat(1023) + + assertEquals(urlMetric.testGetValue("store1"), expected) assertEquals(1, urlMetric.testGetNumRecordedErrors(ErrorType.INVALID_OVERFLOW)) } } From 05fba861809407d313b19fedfb4ceaf7b23c9d45 Mon Sep 17 00:00:00 2001 From: Bruno Rosa Date: Tue, 20 Sep 2022 14:41:22 -0400 Subject: [PATCH 05/12] Update python tests for URL metric --- glean-core/python/tests/metrics/test_url.py | 20 ++++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) diff --git a/glean-core/python/tests/metrics/test_url.py b/glean-core/python/tests/metrics/test_url.py index 0685f2210f..61392e9935 100644 --- a/glean-core/python/tests/metrics/test_url.py +++ b/glean-core/python/tests/metrics/test_url.py @@ -79,8 +79,24 @@ def test_setting_a_long_url_records_an_error(): ) ) - url_metric.set("glean://" + "testing" * 2000) - + # Whenever the URL is longer than our MAX_URL_LENGTH, we truncate the URL to the + # MAX_URL_LENGTH. + # + # This 8-character string was chosen so we could have an even number that is + # a divisor of our MAX_URL_LENGTH. + long_path = "abcdefgh"; + + # Using 2000 creates a string > 16000 characters, well over MAX_URL_LENGTH. + test_url = "glean://" + (long_path * 2000) + url_metric.set(test_url) + + # "glean://" is 8 characters + # "abcdefgh" (long_path) is 8 characters + # `long_path` is repeated 1023 times (8184) + # 8 + 8184 = 8192 (MAX_URL_LENGTH) + expected = "glean://" + (long_path * 1023) + + assert expected == url_metric.test_get_value() assert 1 == url_metric.test_get_num_recorded_errors( testing.ErrorType.INVALID_OVERFLOW ) From 5d58c096595780e4aafc9f2673aafd211fa72642 Mon Sep 17 00:00:00 2001 From: Bruno Rosa Date: Tue, 20 Sep 2022 14:56:14 -0400 Subject: [PATCH 06/12] Update URL metric docs in Glean Book for new char limit --- docs/user/reference/metrics/url.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/user/reference/metrics/url.md b/docs/user/reference/metrics/url.md index 5411a8350d..c19f866f00 100644 --- a/docs/user/reference/metrics/url.md +++ b/docs/user/reference/metrics/url.md @@ -132,12 +132,12 @@ search.template.setUrl(new URL("https://mysearchengine.com/")); * [`invalid_value`](../../user/metrics/error-reporting.md): * If the URL passed does not start with a [scheme](https://url.spec.whatwg.org/#url-representation) followed by a `:` character. * If the URL passed uses the `data:` protocol. -* [`invalid_overflow`](../../user/metrics/error-reporting.md): if the URL passed is longer than 2048 characters (before encoding). +* [`invalid_overflow`](../../user/metrics/error-reporting.md): if the URL passed is longer than 8192 characters (before encoding). * [`invalid_type`](../../user/metrics/error-reporting.md): if a non-string value is given. #### Limits -* Fixed maximum URL length: 2048. Longer URLs are dropped. +* Fixed maximum URL length: 8192. Longer URLs are truncated and recorded along with an `invalid_overflow` error. ## Testing API From e21208977447ac2b2f996e7b099c674473c54899 Mon Sep 17 00:00:00 2001 From: Bruno Rosa Date: Tue, 20 Sep 2022 15:03:48 -0400 Subject: [PATCH 07/12] Rename `long_path` to `long_path_base` --- .../telemetry/glean/private/UrlMetricTypeTest.kt | 10 +++++----- glean-core/ios/GleanTests/Metrics/UrlMetricTests.swift | 10 +++++----- glean-core/python/tests/metrics/test_url.py | 10 +++++----- glean-core/src/metrics/url.rs | 10 +++++----- 4 files changed, 20 insertions(+), 20 deletions(-) diff --git a/glean-core/android/src/test/java/mozilla/telemetry/glean/private/UrlMetricTypeTest.kt b/glean-core/android/src/test/java/mozilla/telemetry/glean/private/UrlMetricTypeTest.kt index df24662616..3a856ff8df 100644 --- a/glean-core/android/src/test/java/mozilla/telemetry/glean/private/UrlMetricTypeTest.kt +++ b/glean-core/android/src/test/java/mozilla/telemetry/glean/private/UrlMetricTypeTest.kt @@ -122,18 +122,18 @@ class UrlMetricTypeTest { // // This 8-character string was chosen so we could have an even number that is // a divisor of our MAX_URL_LENGTH. - val longPath = "abcdefgh" + val longPathBase = "abcdefgh" - val testUrl = "glean://" + longPath.repeat(2000) + val testUrl = "glean://" + longPathBase.repeat(2000) // Using 2000 creates a string > 16000 characters, well over MAX_URL_LENGTH. urlMetric.set(testUrl) // "glean://" is 8 characters - // "abcdefgh" (long_path) is 8 characters - // `long_path` is repeated 1023 times (8184) + // "abcdefgh" (longPathBase) is 8 characters + // `longPathBase` is repeated 1023 times (8184) // 8 + 8184 = 8192 (MAX_URL_LENGTH) - val expected = "glean://" + longPath.repeat(1023) + val expected = "glean://" + longPathBase.repeat(1023) assertEquals(urlMetric.testGetValue("store1"), expected) assertEquals(1, urlMetric.testGetNumRecordedErrors(ErrorType.INVALID_OVERFLOW)) diff --git a/glean-core/ios/GleanTests/Metrics/UrlMetricTests.swift b/glean-core/ios/GleanTests/Metrics/UrlMetricTests.swift index f66b40e716..32d53656d6 100644 --- a/glean-core/ios/GleanTests/Metrics/UrlMetricTests.swift +++ b/glean-core/ios/GleanTests/Metrics/UrlMetricTests.swift @@ -96,17 +96,17 @@ class UrlMetricTypeTests: XCTestCase { // // This 8-character string was chosen so we could have an even number that is // a divisor of our MAX_URL_LENGTH. - let longPath = "abcdefgh" + let longPathBase = "abcdefgh" // Using 2000 creates a string > 16000 characters, well over MAX_URL_LENGTH. - let testUrl = "glean://" + String(repeating: longPath, count: 2000) + let testUrl = "glean://" + String(repeating: longPathBase, count: 2000) urlMetric.set(testUrl) // "glean://" is 8 characters - // "abcdefgh" (long_path) is 8 characters - // `long_path` is repeated 1023 times (8184) + // "abcdefgh" (longPathBase) is 8 characters + // `longPathBase` is repeated 1023 times (8184) // 8 + 8184 = 8192 (MAX_URL_LENGTH) - let expected = "glean://" + String(repeating: longPath, count: 1023) + let expected = "glean://" + String(repeating: longPathBase, count: 1023) XCTAssertEqual(expected, urlMetric.testGetValue()) XCTAssertEqual(1, urlMetric.testGetNumRecordedErrors(.invalidOverflow)) diff --git a/glean-core/python/tests/metrics/test_url.py b/glean-core/python/tests/metrics/test_url.py index 61392e9935..7b174797fa 100644 --- a/glean-core/python/tests/metrics/test_url.py +++ b/glean-core/python/tests/metrics/test_url.py @@ -84,17 +84,17 @@ def test_setting_a_long_url_records_an_error(): # # This 8-character string was chosen so we could have an even number that is # a divisor of our MAX_URL_LENGTH. - long_path = "abcdefgh"; + long_path_base = "abcdefgh"; # Using 2000 creates a string > 16000 characters, well over MAX_URL_LENGTH. - test_url = "glean://" + (long_path * 2000) + test_url = "glean://" + (long_path_base * 2000) url_metric.set(test_url) # "glean://" is 8 characters - # "abcdefgh" (long_path) is 8 characters - # `long_path` is repeated 1023 times (8184) + # "abcdefgh" (long_path_base) is 8 characters + # `long_path_base` is repeated 1023 times (8184) # 8 + 8184 = 8192 (MAX_URL_LENGTH) - expected = "glean://" + (long_path * 1023) + expected = "glean://" + (long_path_base * 1023) assert expected == url_metric.test_get_value() assert 1 == url_metric.test_get_num_recorded_errors( diff --git a/glean-core/src/metrics/url.rs b/glean-core/src/metrics/url.rs index c3821197b7..099e88678d 100644 --- a/glean-core/src/metrics/url.rs +++ b/glean-core/src/metrics/url.rs @@ -209,17 +209,17 @@ mod test { // // This 8-character string was chosen so we could have an even number that is // a divisor of our MAX_URL_LENGTH. - let long_path = "abcdefgh"; + let long_path_base = "abcdefgh"; // Using 2000 creates a string > 16000 characters, well over MAX_URL_LENGTH. - let test_url = format!("glean://{}", long_path.repeat(2000)); + let test_url = format!("glean://{}", long_path_base.repeat(2000)); metric.set_sync(&glean, test_url); // "glean://" is 8 characters - // "abcdefgh" (long_path) is 8 characters - // `long_path` is repeated 1023 times (8184) + // "abcdefgh" (long_path_base) is 8 characters + // `long_path_base` is repeated 1023 times (8184) // 8 + 8184 = 8192 (MAX_URL_LENGTH) - let expected = format!("glean://{}", long_path.repeat(1023)); + let expected = format!("glean://{}", long_path_base.repeat(1023)); assert_eq!(metric.get_value(&glean, "store1").unwrap(), expected); assert_eq!( From 434a06d6988ca95b14fbefec9250bece5f3dd9f2 Mon Sep 17 00:00:00 2001 From: Bruno Rosa Date: Tue, 20 Sep 2022 15:32:08 -0400 Subject: [PATCH 08/12] Update changelog for #2199 --- CHANGELOG.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 1dfb3b50aa..583b6f2913 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,8 @@ [Full changelog](https://github.com/mozilla/glean/compare/v51.2.0...main) +* General + * Update URL metric character length to 8192 ([#2199](https://github.com/mozilla/glean/pull/2199)) * Kotlin * Gradle plugin: Fix quoting issue in Python wrapper code ([#2193](https://github.com/mozilla/glean/pull/2193)) * Bumped the required Android NDK to version 25.1.8937393 ([#2195](https://github.com/mozilla/glean/pull/2195)) From da462b98e195867300c07c1f27166ba7426b3119 Mon Sep 17 00:00:00 2001 From: Bruno Rosa Date: Tue, 20 Sep 2022 15:35:50 -0400 Subject: [PATCH 09/12] Fix python lint errors --- glean-core/python/tests/metrics/test_url.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/glean-core/python/tests/metrics/test_url.py b/glean-core/python/tests/metrics/test_url.py index 7b174797fa..f5f0c2d7de 100644 --- a/glean-core/python/tests/metrics/test_url.py +++ b/glean-core/python/tests/metrics/test_url.py @@ -84,7 +84,7 @@ def test_setting_a_long_url_records_an_error(): # # This 8-character string was chosen so we could have an even number that is # a divisor of our MAX_URL_LENGTH. - long_path_base = "abcdefgh"; + long_path_base = "abcdefgh" # Using 2000 creates a string > 16000 characters, well over MAX_URL_LENGTH. test_url = "glean://" + (long_path_base * 2000) From 591e57f6bb01c435d1e08e5291d78bcdfd44f815 Mon Sep 17 00:00:00 2001 From: Bruno Rosa Date: Wed, 21 Sep 2022 09:08:28 -0400 Subject: [PATCH 10/12] Update changelog entry for #2199 --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 583b6f2913..15697e2e6f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,7 +3,7 @@ [Full changelog](https://github.com/mozilla/glean/compare/v51.2.0...main) * General - * Update URL metric character length to 8192 ([#2199](https://github.com/mozilla/glean/pull/2199)) + * Update URL metric character limit to 8k to support longer URLs. URLs that are too long now are truncated to `MAX_URL_LENGTH` and still recorded along with an Overflow error. ([#2199](https://github.com/mozilla/glean/pull/2199)) * Kotlin * Gradle plugin: Fix quoting issue in Python wrapper code ([#2193](https://github.com/mozilla/glean/pull/2193)) * Bumped the required Android NDK to version 25.1.8937393 ([#2195](https://github.com/mozilla/glean/pull/2195)) From e0eede33369df85e8a767ccb75a50ad3b513432d Mon Sep 17 00:00:00 2001 From: Bruno Rosa Date: Wed, 21 Sep 2022 09:24:35 -0400 Subject: [PATCH 11/12] Use helper function for URL truncating and error reporting --- glean-core/src/metrics/url.rs | 12 ++---------- 1 file changed, 2 insertions(+), 10 deletions(-) diff --git a/glean-core/src/metrics/url.rs b/glean-core/src/metrics/url.rs index 099e88678d..e1e88460ef 100644 --- a/glean-core/src/metrics/url.rs +++ b/glean-core/src/metrics/url.rs @@ -10,6 +10,7 @@ use crate::metrics::MetricType; use crate::storage::StorageManager; use crate::CommonMetricData; use crate::Glean; +use crate::util::truncate_string_at_boundary_with_error; // The maximum number of characters a URL Metric may have, before encoding. const MAX_URL_LENGTH: usize = 8192; @@ -80,16 +81,7 @@ impl UrlMetric { return; } - let mut s = value.into(); - if s.len() > MAX_URL_LENGTH { - let msg = format!( - "Value length {} exceeds maximum of {}", - s.len(), - MAX_URL_LENGTH - ); - record_error(glean, &self.meta, ErrorType::InvalidOverflow, msg, None); - s = s[..MAX_URL_LENGTH].to_string(); - } + let s = truncate_string_at_boundary_with_error(glean, &self.meta, value, MAX_URL_LENGTH); if s.starts_with("data:") { record_error( From c137f711ba5506cf8b8a84ba9e79eedd403104de Mon Sep 17 00:00:00 2001 From: Bruno Rosa Date: Wed, 21 Sep 2022 09:27:38 -0400 Subject: [PATCH 12/12] Rust formatting fix --- glean-core/src/metrics/url.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/glean-core/src/metrics/url.rs b/glean-core/src/metrics/url.rs index e1e88460ef..698ea34f94 100644 --- a/glean-core/src/metrics/url.rs +++ b/glean-core/src/metrics/url.rs @@ -8,9 +8,9 @@ use crate::error_recording::{record_error, test_get_num_recorded_errors, ErrorTy use crate::metrics::Metric; use crate::metrics::MetricType; use crate::storage::StorageManager; +use crate::util::truncate_string_at_boundary_with_error; use crate::CommonMetricData; use crate::Glean; -use crate::util::truncate_string_at_boundary_with_error; // The maximum number of characters a URL Metric may have, before encoding. const MAX_URL_LENGTH: usize = 8192;