From 4e4c2831addfe04340adbd6496517d74d2c7a026 Mon Sep 17 00:00:00 2001 From: Steven Fackler Date: Wed, 1 Feb 2023 15:34:26 -0500 Subject: [PATCH 1/7] Sync client.request metric up to Dialogue --- conjure-runtime/src/lib.rs | 14 +++---- conjure-runtime/src/service/metrics.rs | 56 +++++++++++++++----------- 2 files changed, 38 insertions(+), 32 deletions(-) diff --git a/conjure-runtime/src/lib.rs b/conjure-runtime/src/lib.rs index 3c67626d..00e55602 100644 --- a/conjure-runtime/src/lib.rs +++ b/conjure-runtime/src/lib.rs @@ -162,15 +162,11 @@ //! //! ### Standard Metrics //! -//! * `client.request (service: )` - A `Timer` recording request durations, tagged by service. Note that -//! the requests timed by this metric are the user-percieved request, including any backoffs/retries/etc. It only -//! records the time until response headers are received, not until the entire response body is read. -//! * `client.request.error (service: , reason: IOException)` - A `Meter` tracking the rate of IO errors, -//! tagged by service. Like the `client.request` metric, this tracks overall user-perceived request. The `reason` -//! tag has a value of `IOException` to align with [`dialogue`]'s metric. -//! * `tls.handshake (context: , protocol: , cipher: )` - A `Meter` -//! tracking the rate of TLS handshakes, tagged by the service, TLS protocol version (e.g. `TLSv1.3`), and cipher -//! name (e.g. `TLS_CHACHA20_POLY1305_SHA256`). +//! * `client.response (channel-name: , service-name: , endpoint: , status: +//! )` - A `Timer` recording request durations per endpoint. Note that the requests timed by this metric +//! are the user-percieved request, including any backoffs/retries/etc. It only records the time until response +//! headers are received, not until the entire response body is read. The `status` tag will be `success` if the +//! response status was 2xx and will be `failure` otherwise (QoS failure, internal server error, IO error, etc). //! * `conjure-runtime.concurrencylimiter.max (service: , hostIndex: )` - A `Gauge` reporting //! the maximum number of concurrent requests which are currently permitted to be made to a specific host. //! * `conjure-runtime.concurrencylimiter.in-flight (service: , hostIndex: )` - A `Gauge` diff --git a/conjure-runtime/src/service/metrics.rs b/conjure-runtime/src/service/metrics.rs index 286ef0e1..b1e9c441 100644 --- a/conjure-runtime/src/service/metrics.rs +++ b/conjure-runtime/src/service/metrics.rs @@ -12,22 +12,23 @@ // See the License for the specific language governing permissions and // limitations under the License. use crate::raw::Service; -use crate::service::map_error::RawClientError; use crate::service::Layer; use crate::Builder; use conjure_error::Error; +use conjure_http::client::Endpoint; use futures::ready; +use http::{Request, Response}; use pin_project::pin_project; use std::future::Future; use std::pin::Pin; use std::sync::Arc; use std::task::{Context, Poll}; use tokio::time::Instant; -use witchcraft_metrics::{Meter, MetricId, Timer}; +use witchcraft_metrics::{MetricId, MetricRegistry}; struct Metrics { - response_timer: Arc, - io_error_meter: Arc, + metrics: Arc, + service_name: String, } /// A layer which updates the `client.response` and `client.response.error` metrics. @@ -42,15 +43,8 @@ impl MetricsLayer { MetricsLayer { metrics: builder.get_metrics().map(|m| { Arc::new(Metrics { - response_timer: m.timer( - MetricId::new("client.response") - .with_tag("service-name", service.to_string()), - ), - io_error_meter: m.meter( - MetricId::new("client.response.error") - .with_tag("service-name", service.to_string()) - .with_tag("reason", "IOException"), - ), + metrics: m.clone(), + service_name: service.to_string(), }) }), } @@ -73,16 +67,21 @@ pub struct MetricsService { metrics: Option>, } -impl Service for MetricsService +impl Service> for MetricsService where - S: Service, + S: Service, Response = Response, Error = Error>, { type Response = S::Response; type Error = S::Error; type Future = MetricsFuture; - fn call(&self, req: R) -> Self::Future { + fn call(&self, req: Request) -> Self::Future { MetricsFuture { + endpoint: req + .extensions() + .get::() + .expect("Request extensions missing Endpoint") + .clone(), future: self.inner.call(req), start: Instant::now(), metrics: self.metrics.clone(), @@ -95,12 +94,13 @@ pub struct MetricsFuture { #[pin] future: F, start: Instant, + endpoint: Endpoint, metrics: Option>, } -impl Future for MetricsFuture +impl Future for MetricsFuture where - F: Future>, + F: Future, Error>>, { type Output = F::Output; @@ -110,11 +110,21 @@ where let result = ready!(this.future.poll(cx)); if let Some(metrics) = this.metrics { - match &result { - Ok(_) => metrics.response_timer.update(this.start.elapsed()), - Err(e) if e.cause().is::() => metrics.io_error_meter.mark(1), - _ => {} - } + let status = match &result { + Ok(r) if r.status().is_success() => "success", + _ => "failure", + }; + + metrics + .metrics + .timer( + MetricId::new("client.response") + .with_tag("channel-name", metrics.service_name.clone()) + .with_tag("service-name", this.endpoint.service()) + .with_tag("endpoint", this.endpoint.name()) + .with_tag("status", status), + ) + .update(this.start.elapsed()); } Poll::Ready(result) From 4117fb2be43548ac3b0959c07dd80aa69530d3e9 Mon Sep 17 00:00:00 2001 From: svc-changelog Date: Wed, 1 Feb 2023 20:37:40 +0000 Subject: [PATCH 2/7] Add generated changelog entries --- changelog/@unreleased/pr-150.v2.yml | 5 +++++ 1 file changed, 5 insertions(+) create mode 100644 changelog/@unreleased/pr-150.v2.yml diff --git a/changelog/@unreleased/pr-150.v2.yml b/changelog/@unreleased/pr-150.v2.yml new file mode 100644 index 00000000..6e545a02 --- /dev/null +++ b/changelog/@unreleased/pr-150.v2.yml @@ -0,0 +1,5 @@ +type: improvement +improvement: + description: Updated the `client.response` metric to match Dialogue. + links: + - https://github.com/palantir/conjure-rust-runtime/pull/150 From 9cadba925ca2d76d9b5c8f3c4e100571b0a0d4a0 Mon Sep 17 00:00:00 2001 From: Steven Fackler Date: Wed, 1 Feb 2023 16:17:37 -0500 Subject: [PATCH 3/7] bump version --- Cargo.toml | 3 +++ conjure-runtime-config/Cargo.toml | 2 +- conjure-runtime/Cargo.toml | 4 ++-- conjure-verification-api/Cargo.toml | 2 +- conjure-verification/Cargo.toml | 2 +- simulation/Cargo.toml | 2 +- 6 files changed, 9 insertions(+), 6 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 404ea78b..1f0b2404 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,3 +1,6 @@ +[workspace.package] +version = "4.3.0" + [workspace] members = [ "conjure-runtime", diff --git a/conjure-runtime-config/Cargo.toml b/conjure-runtime-config/Cargo.toml index f456de00..cdd40b10 100644 --- a/conjure-runtime-config/Cargo.toml +++ b/conjure-runtime-config/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "conjure-runtime-config" -version = "4.2.0" +version.workspace = true authors = ["Steven Fackler "] edition = "2018" license = "Apache-2.0" diff --git a/conjure-runtime/Cargo.toml b/conjure-runtime/Cargo.toml index a3a70e52..08a5e7de 100644 --- a/conjure-runtime/Cargo.toml +++ b/conjure-runtime/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "conjure-runtime" -version = "4.2.0" +version.workspace = true authors = ["Steven Fackler "] edition = "2018" license = "Apache-2.0" @@ -45,7 +45,7 @@ tower-service = "0.3" url = "2.0" zipkin = "0.4" -conjure-runtime-config = { version = "4.2.0", path = "../conjure-runtime-config" } +conjure-runtime-config = { version = "4.3.0", path = "../conjure-runtime-config" } [dev-dependencies] futures-test = "0.3" diff --git a/conjure-verification-api/Cargo.toml b/conjure-verification-api/Cargo.toml index d0729230..c7942109 100644 --- a/conjure-verification-api/Cargo.toml +++ b/conjure-verification-api/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "conjure-verification-api" -version = "2.2.0" +version.workspace = true authors = ["Steven Fackler "] edition = "2018" publish = false diff --git a/conjure-verification/Cargo.toml b/conjure-verification/Cargo.toml index 6a44c9fe..25d32e19 100644 --- a/conjure-verification/Cargo.toml +++ b/conjure-verification/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "conjure-verification" -version = "2.2.0" +version.workspace = true authors = ["Steven Fackler "] edition = "2018" publish = false diff --git a/simulation/Cargo.toml b/simulation/Cargo.toml index b4f3ce5a..919fbe7a 100644 --- a/simulation/Cargo.toml +++ b/simulation/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "simulation" -version = "2.2.0" +version.workspace = true authors = ["Steven Fackler "] edition = "2018" From f507967d1703179fbd24c34ba428c0d9c78166fc Mon Sep 17 00:00:00 2001 From: Steven Fackler Date: Wed, 1 Feb 2023 16:47:26 -0500 Subject: [PATCH 4/7] restore docs --- conjure-runtime/src/lib.rs | 3 +++ 1 file changed, 3 insertions(+) diff --git a/conjure-runtime/src/lib.rs b/conjure-runtime/src/lib.rs index 00e55602..cf26b78a 100644 --- a/conjure-runtime/src/lib.rs +++ b/conjure-runtime/src/lib.rs @@ -167,6 +167,9 @@ //! are the user-percieved request, including any backoffs/retries/etc. It only records the time until response //! headers are received, not until the entire response body is read. The `status` tag will be `success` if the //! response status was 2xx and will be `failure` otherwise (QoS failure, internal server error, IO error, etc). +//! * `tls.handshake (context: , protocol: , cipher: )` - A `Meter` +//! tracking the rate of TLS handshakes, tagged by the service, TLS protocol version (e.g. `TLSv1.3`), and cipher +//! name (e.g. `TLS_CHACHA20_POLY1305_SHA256`). //! * `conjure-runtime.concurrencylimiter.max (service: , hostIndex: )` - A `Gauge` reporting //! the maximum number of concurrent requests which are currently permitted to be made to a specific host. //! * `conjure-runtime.concurrencylimiter.in-flight (service: , hostIndex: )` - A `Gauge` From cb69892149c1e1db80085824502b73de5090d3e4 Mon Sep 17 00:00:00 2001 From: Steven Fackler Date: Thu, 2 Feb 2023 09:35:39 -0500 Subject: [PATCH 5/7] simplify status check --- conjure-runtime/src/service/metrics.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/conjure-runtime/src/service/metrics.rs b/conjure-runtime/src/service/metrics.rs index b1e9c441..1723d049 100644 --- a/conjure-runtime/src/service/metrics.rs +++ b/conjure-runtime/src/service/metrics.rs @@ -111,8 +111,8 @@ where if let Some(metrics) = this.metrics { let status = match &result { - Ok(r) if r.status().is_success() => "success", - _ => "failure", + Ok(_) => "success", + Err(_) => "failure", }; metrics From b4cf26a5f202e820122dc3449bb8bc21751e1f71 Mon Sep 17 00:00:00 2001 From: Steven Fackler Date: Thu, 2 Feb 2023 09:44:52 -0500 Subject: [PATCH 6/7] update simluations --- simulation/src/harness.rs | 2 +- simulation/src/metrics.rs | 13 +++++++++++-- simulation/src/server.rs | 6 +++--- simulation/src/simulation.rs | 10 ++++++---- 4 files changed, 21 insertions(+), 10 deletions(-) diff --git a/simulation/src/harness.rs b/simulation/src/harness.rs index e02d5076..2bec94e9 100644 --- a/simulation/src/harness.rs +++ b/simulation/src/harness.rs @@ -136,7 +136,7 @@ impl Harness { current - + diff --git a/simulation/src/metrics.rs b/simulation/src/metrics.rs index 358ed716..38d3be51 100644 --- a/simulation/src/metrics.rs +++ b/simulation/src/metrics.rs @@ -35,9 +35,18 @@ pub fn request_counter(registry: &MetricRegistry, server: &str, endpoint: &str) ) } -pub fn responses_timer(registry: &MetricRegistry, service_name: &'static str) -> Arc { +pub fn responses_timer( + registry: &MetricRegistry, + channel: &'static str, + service: &'static str, + endpoint: &'static str, +) -> Arc { registry.timer_with( - MetricId::new("client.response").with_tag("service-name", service_name), + MetricId::new("client.response") + .with_tag("channel-name", channel) + .with_tag("service-name", service) + .with_tag("endpoint", endpoint) + .with_tag("status", "success"), || { Timer::new_with( FullyBufferedReservoir { diff --git a/simulation/src/server.rs b/simulation/src/server.rs index 94a4d1dd..a5ea6b61 100644 --- a/simulation/src/server.rs +++ b/simulation/src/server.rs @@ -11,8 +11,8 @@ // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. -use crate::metrics; use crate::recorder::SimulationMetricsRecorder; +use crate::{metrics, simulation}; use bytes::Bytes; use conjure_error::Error; use conjure_http::client::{self, AsyncBody}; @@ -68,9 +68,9 @@ impl Endpoint { .method(self.method.clone()) .uri(self.path) .extension(client::Endpoint::new( - "testService", + simulation::SERVICE, None, - "testEndpoint", + simulation::ENDPOINT, self.path, )) .body(AsyncBody::Empty) diff --git a/simulation/src/simulation.rs b/simulation/src/simulation.rs index 0976d667..7b4f2727 100644 --- a/simulation/src/simulation.rs +++ b/simulation/src/simulation.rs @@ -34,7 +34,9 @@ use tokio::runtime::{self, Runtime}; use tokio::time::{self, Duration, Instant}; use witchcraft_metrics::{Clock, MetricRegistry}; -const SERVICE: &str = "simulation"; +const CHANNEL: &str = "simulation"; +pub const SERVICE: &str = "testService"; +pub const ENDPOINT: &str = "testEndpoint"; pub struct SimulationBuilder0; @@ -52,7 +54,7 @@ impl SimulationBuilder0 { // initialize the responses timer with our custom reservoir let _guard = runtime.enter(); - metrics::responses_timer(&metrics, SERVICE); + metrics::responses_timer(&metrics, CHANNEL, SERVICE, ENDPOINT); let mut recorder = SimulationMetricsRecorder::new(&metrics); recorder.filter_metrics(|id| { @@ -104,7 +106,7 @@ impl SimulationBuilder1 { let mut builder = Builder::new(); self.strategy.apply(&mut builder); builder - .service(SERVICE) + .service(CHANNEL) .user_agent(UserAgent::new(Agent::new("simulation", "0.0.0"))) .metrics(self.metrics.clone()); for server in &self.servers { @@ -268,7 +270,7 @@ impl Runner { let status_codes = request_runner.status_codes.into_inner(); SimulationReport { client_mean: Duration::from_nanos( - metrics::responses_timer(&self.metrics, SERVICE) + metrics::responses_timer(&self.metrics, CHANNEL, SERVICE, ENDPOINT) .snapshot() .mean() as u64, ), From f59ce81fbfe8e62bd475a24a622f5db5f6dd809a Mon Sep 17 00:00:00 2001 From: Steven Fackler Date: Thu, 2 Feb 2023 09:48:46 -0500 Subject: [PATCH 7/7] rerender simulations --- simulation/results/report.md | 78 ++++++++++++++++++------------------ 1 file changed, 39 insertions(+), 39 deletions(-) diff --git a/simulation/results/report.md b/simulation/results/report.md index cb8808e2..7714fc36 100644 --- a/simulation/results/report.md +++ b/simulation/results/report.md @@ -52,7 +52,7 @@ one_endpoint_dies_on_each_server[CONCURRENCY_LIMITER_PIN_UNTIL_ERROR]: success=6 current - + @@ -64,7 +64,7 @@ one_endpoint_dies_on_each_server[CONCURRENCY_LIMITER_PIN_UNTIL_ERROR]: success=6 current - + @@ -76,7 +76,7 @@ one_endpoint_dies_on_each_server[CONCURRENCY_LIMITER_PIN_UNTIL_ERROR]: success=6 current - + @@ -88,7 +88,7 @@ one_endpoint_dies_on_each_server[CONCURRENCY_LIMITER_PIN_UNTIL_ERROR]: success=6 current - + @@ -100,7 +100,7 @@ one_endpoint_dies_on_each_server[CONCURRENCY_LIMITER_PIN_UNTIL_ERROR]: success=6 current - + @@ -112,7 +112,7 @@ one_endpoint_dies_on_each_server[CONCURRENCY_LIMITER_PIN_UNTIL_ERROR]: success=6 current - + @@ -124,7 +124,7 @@ one_endpoint_dies_on_each_server[CONCURRENCY_LIMITER_PIN_UNTIL_ERROR]: success=6 current - + @@ -136,7 +136,7 @@ one_endpoint_dies_on_each_server[CONCURRENCY_LIMITER_PIN_UNTIL_ERROR]: success=6 current - + @@ -148,7 +148,7 @@ one_endpoint_dies_on_each_server[CONCURRENCY_LIMITER_PIN_UNTIL_ERROR]: success=6 current - + @@ -160,7 +160,7 @@ one_endpoint_dies_on_each_server[CONCURRENCY_LIMITER_PIN_UNTIL_ERROR]: success=6 current - + @@ -172,7 +172,7 @@ one_endpoint_dies_on_each_server[CONCURRENCY_LIMITER_PIN_UNTIL_ERROR]: success=6 current - + @@ -184,7 +184,7 @@ one_endpoint_dies_on_each_server[CONCURRENCY_LIMITER_PIN_UNTIL_ERROR]: success=6 current - + @@ -196,7 +196,7 @@ one_endpoint_dies_on_each_server[CONCURRENCY_LIMITER_PIN_UNTIL_ERROR]: success=6 current - + @@ -208,7 +208,7 @@ one_endpoint_dies_on_each_server[CONCURRENCY_LIMITER_PIN_UNTIL_ERROR]: success=6 current - + @@ -220,7 +220,7 @@ one_endpoint_dies_on_each_server[CONCURRENCY_LIMITER_PIN_UNTIL_ERROR]: success=6 current - + @@ -232,7 +232,7 @@ one_endpoint_dies_on_each_server[CONCURRENCY_LIMITER_PIN_UNTIL_ERROR]: success=6 current - + @@ -244,7 +244,7 @@ one_endpoint_dies_on_each_server[CONCURRENCY_LIMITER_PIN_UNTIL_ERROR]: success=6 current - + @@ -256,7 +256,7 @@ one_endpoint_dies_on_each_server[CONCURRENCY_LIMITER_PIN_UNTIL_ERROR]: success=6 current - + @@ -268,7 +268,7 @@ one_endpoint_dies_on_each_server[CONCURRENCY_LIMITER_PIN_UNTIL_ERROR]: success=6 current - + @@ -280,7 +280,7 @@ one_endpoint_dies_on_each_server[CONCURRENCY_LIMITER_PIN_UNTIL_ERROR]: success=6 current - + @@ -292,7 +292,7 @@ one_endpoint_dies_on_each_server[CONCURRENCY_LIMITER_PIN_UNTIL_ERROR]: success=6 current - + @@ -304,7 +304,7 @@ one_endpoint_dies_on_each_server[CONCURRENCY_LIMITER_PIN_UNTIL_ERROR]: success=6 current - + @@ -316,7 +316,7 @@ one_endpoint_dies_on_each_server[CONCURRENCY_LIMITER_PIN_UNTIL_ERROR]: success=6 current - + @@ -328,7 +328,7 @@ one_endpoint_dies_on_each_server[CONCURRENCY_LIMITER_PIN_UNTIL_ERROR]: success=6 current - + @@ -340,7 +340,7 @@ one_endpoint_dies_on_each_server[CONCURRENCY_LIMITER_PIN_UNTIL_ERROR]: success=6 current - + @@ -352,7 +352,7 @@ one_endpoint_dies_on_each_server[CONCURRENCY_LIMITER_PIN_UNTIL_ERROR]: success=6 current - + @@ -364,7 +364,7 @@ one_endpoint_dies_on_each_server[CONCURRENCY_LIMITER_PIN_UNTIL_ERROR]: success=6 current - + @@ -376,7 +376,7 @@ one_endpoint_dies_on_each_server[CONCURRENCY_LIMITER_PIN_UNTIL_ERROR]: success=6 current - + @@ -388,7 +388,7 @@ one_endpoint_dies_on_each_server[CONCURRENCY_LIMITER_PIN_UNTIL_ERROR]: success=6 current - + @@ -400,7 +400,7 @@ one_endpoint_dies_on_each_server[CONCURRENCY_LIMITER_PIN_UNTIL_ERROR]: success=6 current - + @@ -412,7 +412,7 @@ one_endpoint_dies_on_each_server[CONCURRENCY_LIMITER_PIN_UNTIL_ERROR]: success=6 current - + @@ -424,7 +424,7 @@ one_endpoint_dies_on_each_server[CONCURRENCY_LIMITER_PIN_UNTIL_ERROR]: success=6 current - + @@ -436,7 +436,7 @@ one_endpoint_dies_on_each_server[CONCURRENCY_LIMITER_PIN_UNTIL_ERROR]: success=6 current - + @@ -448,7 +448,7 @@ one_endpoint_dies_on_each_server[CONCURRENCY_LIMITER_PIN_UNTIL_ERROR]: success=6 current - + @@ -460,7 +460,7 @@ one_endpoint_dies_on_each_server[CONCURRENCY_LIMITER_PIN_UNTIL_ERROR]: success=6 current - + @@ -472,7 +472,7 @@ one_endpoint_dies_on_each_server[CONCURRENCY_LIMITER_PIN_UNTIL_ERROR]: success=6 current - + @@ -484,7 +484,7 @@ one_endpoint_dies_on_each_server[CONCURRENCY_LIMITER_PIN_UNTIL_ERROR]: success=6 current - + @@ -496,7 +496,7 @@ one_endpoint_dies_on_each_server[CONCURRENCY_LIMITER_PIN_UNTIL_ERROR]: success=6 current - + @@ -508,7 +508,7 @@ one_endpoint_dies_on_each_server[CONCURRENCY_LIMITER_PIN_UNTIL_ERROR]: success=6 current - +