From ae3f9ec597d1439321428f15634daef6a8b4ee6e Mon Sep 17 00:00:00 2001 From: Geoffroy Couprie Date: Fri, 23 Aug 2024 10:15:43 +0200 Subject: [PATCH 1/7] Add a histogram metric tracking evaluated query plans the `experimental_plans_limit` option can be used to limit the number of query plans evaluated for a query, but there was no way to know what is a reasonable value for that option. This adds a histogram metric tracking the number of evaluated query plans, which should give more context to configure this. --- Cargo.lock | 27 +++++++++++++++++-- apollo-router/Cargo.toml | 3 ++- .../src/query_planner/bridge_query_planner.rs | 10 +++++++ 3 files changed, 37 insertions(+), 3 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 55ca449bc6..4ce233f7e9 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -335,7 +335,7 @@ dependencies = [ "reqwest", "rhai", "rmp", - "router-bridge", + "router-bridge 0.5.30+v2.8.3 (git+https://github.com/apollographql/federation-rs?branch=geal/plan-count)", "rowan", "rstack", "rust-embed", @@ -5803,6 +5803,29 @@ dependencies = [ "which", ] +[[package]] +name = "router-bridge" +version = "0.5.30+v2.8.3" +source = "git+https://github.com/apollographql/federation-rs?branch=geal/plan-count#2883d18d519b13a0d6830898c0eb197b59ad4571" +dependencies = [ + "anyhow", + "async-channel 1.9.0", + "deno_console", + "deno_core", + "deno_url", + "deno_web", + "deno_webidl", + "rand 0.8.5", + "serde", + "serde_json", + "thiserror", + "tokio", + "tower", + "tower-service", + "tracing", + "which", +] + [[package]] name = "router-fuzz" version = "0.0.0" @@ -5818,7 +5841,7 @@ dependencies = [ "libfuzzer-sys", "log", "reqwest", - "router-bridge", + "router-bridge 0.5.30+v2.8.3 (registry+https://github.com/rust-lang/crates.io-index)", "schemars", "serde", "serde_json", diff --git a/apollo-router/Cargo.toml b/apollo-router/Cargo.toml index dbf481bf5a..d43513b7fe 100644 --- a/apollo-router/Cargo.toml +++ b/apollo-router/Cargo.toml @@ -198,7 +198,8 @@ regex = "1.10.5" reqwest.workspace = true # note: this dependency should _always_ be pinned, prefix the version with an `=` -router-bridge = "=0.5.30+v2.8.3" +#router-bridge = "=0.5.30+v2.8.3" +router-bridge = { git = "https://github.com/apollographql/federation-rs", branch = "geal/plan-count" } rust-embed = { version = "8.4.0", features = ["include-exclude"] } rustls = "0.21.12" diff --git a/apollo-router/src/query_planner/bridge_query_planner.rs b/apollo-router/src/query_planner/bridge_query_planner.rs index fc4ccce41d..3390862ed0 100644 --- a/apollo-router/src/query_planner/bridge_query_planner.rs +++ b/apollo-router/src/query_planner/bridge_query_planner.rs @@ -315,6 +315,8 @@ impl PlannerMode { query_plan: QueryPlan { node: root_node.map(Arc::new), }, + evaluated_plan_count: plan.statistics.evaluated_plan_count.into_inner() + as u64, }, }) } @@ -554,6 +556,7 @@ impl BridgeQueryPlanner { QueryPlanResult { query_plan: QueryPlan { node: Some(node) }, formatted_query_plan, + evaluated_plan_count, }, mut usage_reporting, } => { @@ -561,6 +564,12 @@ impl BridgeQueryPlanner { usage_reporting.stats_report_key = sig; } + u64_histogram!( + "apollo.router.query_planning.evaluated_plans", + "Number of query plans evaluated for a query before choosing the best one", + evaluated_plan_count + ); + if matches!( self.configuration .experimental_apollo_metrics_generation_mode, @@ -949,6 +958,7 @@ impl BridgeQueryPlanner { pub struct QueryPlanResult { pub(super) formatted_query_plan: Option>, pub(super) query_plan: QueryPlan, + pub(super) evaluated_plan_count: u64, } impl QueryPlanResult { From 071fe1c655e180415492796f1e19d28b454cc025 Mon Sep 17 00:00:00 2001 From: Geoffroy Couprie Date: Fri, 23 Aug 2024 10:38:08 +0200 Subject: [PATCH 2/7] lint --- apollo-router/Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/apollo-router/Cargo.toml b/apollo-router/Cargo.toml index d43513b7fe..e6237b84c6 100644 --- a/apollo-router/Cargo.toml +++ b/apollo-router/Cargo.toml @@ -199,7 +199,7 @@ reqwest.workspace = true # note: this dependency should _always_ be pinned, prefix the version with an `=` #router-bridge = "=0.5.30+v2.8.3" -router-bridge = { git = "https://github.com/apollographql/federation-rs", branch = "geal/plan-count" } +router-bridge = { git = "https://github.com/apollographql/federation-rs", branch = "geal/plan-count", version = "=0.5.30+v2.8.3" } rust-embed = { version = "8.4.0", features = ["include-exclude"] } rustls = "0.21.12" From 1e55b760c4cf28939637e9e389d22a908576e440 Mon Sep 17 00:00:00 2001 From: Jesse Rosenberger Date: Tue, 3 Sep 2024 13:08:01 +0300 Subject: [PATCH 3/7] Use actually published version of `router-bridge@0.6.1+v2.9.0` --- Cargo.lock | 31 ++++--------------------------- apollo-router/Cargo.toml | 3 +-- fuzz/Cargo.toml | 2 +- 3 files changed, 6 insertions(+), 30 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index fc1626db86..135142ad6d 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -338,7 +338,7 @@ dependencies = [ "reqwest", "rhai", "rmp", - "router-bridge 0.5.30+v2.8.3 (git+https://github.com/apollographql/federation-rs?branch=geal/plan-count)", + "router-bridge", "rowan", "rstack", "rust-embed", @@ -5630,32 +5630,9 @@ dependencies = [ [[package]] name = "router-bridge" -version = "0.6.0+v2.9.0" +version = "0.6.1+v2.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "96ef4910ade6753863c8437a76e88e236ab91688dcfe739d73417ae7848f3b92" -dependencies = [ - "anyhow", - "async-channel 1.9.0", - "deno_console", - "deno_core", - "deno_url", - "deno_web", - "deno_webidl", - "rand 0.8.5", - "serde", - "serde_json", - "thiserror", - "tokio", - "tower", - "tower-service", - "tracing", - "which", -] - -[[package]] -name = "router-bridge" -version = "0.5.30+v2.8.3" -source = "git+https://github.com/apollographql/federation-rs?branch=geal/plan-count#2883d18d519b13a0d6830898c0eb197b59ad4571" +checksum = "b3be2d3ebbcbceb19dc813f5cee507295c673bb4e3f7a7cd532c8c27c95f92fc" dependencies = [ "anyhow", "async-channel 1.9.0", @@ -5690,7 +5667,7 @@ dependencies = [ "libfuzzer-sys", "log", "reqwest", - "router-bridge 0.5.30+v2.8.3 (registry+https://github.com/rust-lang/crates.io-index)", + "router-bridge", "schemars", "serde", "serde_json", diff --git a/apollo-router/Cargo.toml b/apollo-router/Cargo.toml index 0040f1eca9..11d470ec24 100644 --- a/apollo-router/Cargo.toml +++ b/apollo-router/Cargo.toml @@ -197,8 +197,7 @@ regex = "1.10.5" reqwest.workspace = true # note: this dependency should _always_ be pinned, prefix the version with an `=` -#router-bridge = "=0.5.30+v2.8.3" -router-bridge = { git = "https://github.com/apollographql/federation-rs", branch = "main", version = "=0.6.0+v2.9.0" } +router-bridge = "=0.6.1+v2.9.0" rust-embed = { version = "8.4.0", features = ["include-exclude"] } rustls = "0.21.12" diff --git a/fuzz/Cargo.toml b/fuzz/Cargo.toml index f01ccac721..dfbb4c9725 100644 --- a/fuzz/Cargo.toml +++ b/fuzz/Cargo.toml @@ -20,7 +20,7 @@ reqwest = { workspace = true, features = ["json", "blocking"] } serde_json.workspace = true tokio.workspace = true # note: this dependency should _always_ be pinned, prefix the version with an `=` -router-bridge = "=0.6.0+v2.9.0" +router-bridge = "=0.6.1+v2.9.0" [dev-dependencies] anyhow = "1" From 1102f3dc4f73849d299aef198c0ca26f88b16d00 Mon Sep 17 00:00:00 2001 From: Geoffroy Couprie Date: Tue, 3 Sep 2024 16:08:50 +0200 Subject: [PATCH 4/7] rename, add an attribute and test the metric --- .../src/query_planner/bridge_query_planner.rs | 52 +++++++++++++++++-- .../src/query_planner/dual_query_planner.rs | 9 ++++ 2 files changed, 58 insertions(+), 3 deletions(-) diff --git a/apollo-router/src/query_planner/bridge_query_planner.rs b/apollo-router/src/query_planner/bridge_query_planner.rs index 299d3a3cc0..58a3a8e49b 100644 --- a/apollo-router/src/query_planner/bridge_query_planner.rs +++ b/apollo-router/src/query_planner/bridge_query_planner.rs @@ -273,6 +273,13 @@ impl PlannerMode { .into_result() .map_err(PlanErrors::from)?; + u64_histogram!( + "apollo.router.query_planning.plan.evaluated_plans", + "Number of query plans evaluated for a query before choosing the best one", + success.data.evaluated_plan_count, + planner = "js" + ); + if let Some(root_node) = &mut success.data.query_plan.node { // Arc freshly deserialized from Deno should be unique, so this doesn’t clone: let root_node = Arc::make_mut(root_node); @@ -304,6 +311,16 @@ impl PlannerMode { if let Some(node) = &mut root_node { init_query_plan_root_node(node)?; } + + let evaluated_plan_count = + plan.statistics.evaluated_plan_count.clone().into_inner() as u64; + u64_histogram!( + "apollo.router.query_planning.plan.evaluated_plans", + "Number of query plans evaluated for a query before choosing the best one", + evaluated_plan_count, + planner = "rust" + ); + Ok(PlanSuccess { usage_reporting, data: QueryPlanResult { @@ -311,8 +328,7 @@ impl PlannerMode { query_plan: QueryPlan { node: root_node.map(Arc::new), }, - evaluated_plan_count: plan.statistics.evaluated_plan_count.into_inner() - as u64, + evaluated_plan_count, }, }) } @@ -336,6 +352,13 @@ impl PlannerMode { let root_node = Arc::make_mut(root_node); init_query_plan_root_node(root_node)?; } + + u64_histogram!( + "apollo.router.query_planning.plan.evaluated_plans", + "Number of query plans evaluated for a query before choosing the best one", + success.data.evaluated_plan_count, + planner = "js" + ); } BothModeComparisonJob { @@ -553,7 +576,7 @@ impl BridgeQueryPlanner { }; u64_histogram!( - "apollo.router.query_planning.evaluated_plans", + "apollo.router.query_planning.plan.evaluated_plans", "Number of query plans evaluated for a query before choosing the best one", evaluated_plan_count ); @@ -1605,4 +1628,27 @@ mod tests { "init.is_success" = false ); } + + #[test(tokio::test)] + async fn test_evaluated_plans_histogram() { + async { + let _ = plan( + EXAMPLE_SCHEMA, + include_str!("testdata/query.graphql"), + include_str!("testdata/query.graphql"), + None, + PlanOptions::default(), + ) + .await + .unwrap(); + + assert_histogram_exists!( + "apollo.router.query_planning.plan.evaluated_plans", + u64, + planner = "js" + ); + } + .with_metrics() + .await; + } } diff --git a/apollo-router/src/query_planner/dual_query_planner.rs b/apollo-router/src/query_planner/dual_query_planner.rs index 6a880cf538..541d3818e8 100644 --- a/apollo-router/src/query_planner/dual_query_planner.rs +++ b/apollo-router/src/query_planner/dual_query_planner.rs @@ -90,6 +90,15 @@ impl BothModeComparisonJob { metric_query_planning_plan_duration(RUST_QP_MODE, start); + if let Ok(ref plan) = &result { + u64_histogram!( + "apollo.router.query_planning.plan.evaluated_plans", + "Number of query plans evaluated for a query before choosing the best one", + plan.statistics.evaluated_plan_count.clone().into_inner() as u64, + planner = "rust" + ); + } + // … to here, so the thread can only eiher reach here or panic. // We unset USING_CATCH_UNWIND in both cases. USING_CATCH_UNWIND.set(false); From a5578dca7ca6fcde611ca8c72feb12ebc56c9961 Mon Sep 17 00:00:00 2001 From: Geoffroy Couprie Date: Tue, 3 Sep 2024 16:14:34 +0200 Subject: [PATCH 5/7] remove the attribute --- .../src/query_planner/bridge_query_planner.rs | 35 ++++--------------- .../src/query_planner/dual_query_planner.rs | 9 ----- 2 files changed, 6 insertions(+), 38 deletions(-) diff --git a/apollo-router/src/query_planner/bridge_query_planner.rs b/apollo-router/src/query_planner/bridge_query_planner.rs index 58a3a8e49b..43024cdea6 100644 --- a/apollo-router/src/query_planner/bridge_query_planner.rs +++ b/apollo-router/src/query_planner/bridge_query_planner.rs @@ -273,13 +273,6 @@ impl PlannerMode { .into_result() .map_err(PlanErrors::from)?; - u64_histogram!( - "apollo.router.query_planning.plan.evaluated_plans", - "Number of query plans evaluated for a query before choosing the best one", - success.data.evaluated_plan_count, - planner = "js" - ); - if let Some(root_node) = &mut success.data.query_plan.node { // Arc freshly deserialized from Deno should be unique, so this doesn’t clone: let root_node = Arc::make_mut(root_node); @@ -312,15 +305,6 @@ impl PlannerMode { init_query_plan_root_node(node)?; } - let evaluated_plan_count = - plan.statistics.evaluated_plan_count.clone().into_inner() as u64; - u64_histogram!( - "apollo.router.query_planning.plan.evaluated_plans", - "Number of query plans evaluated for a query before choosing the best one", - evaluated_plan_count, - planner = "rust" - ); - Ok(PlanSuccess { usage_reporting, data: QueryPlanResult { @@ -328,7 +312,11 @@ impl PlannerMode { query_plan: QueryPlan { node: root_node.map(Arc::new), }, - evaluated_plan_count, + evaluated_plan_count: plan + .statistics + .evaluated_plan_count + .clone() + .into_inner() as u64, }, }) } @@ -352,13 +340,6 @@ impl PlannerMode { let root_node = Arc::make_mut(root_node); init_query_plan_root_node(root_node)?; } - - u64_histogram!( - "apollo.router.query_planning.plan.evaluated_plans", - "Number of query plans evaluated for a query before choosing the best one", - success.data.evaluated_plan_count, - planner = "js" - ); } BothModeComparisonJob { @@ -1642,11 +1623,7 @@ mod tests { .await .unwrap(); - assert_histogram_exists!( - "apollo.router.query_planning.plan.evaluated_plans", - u64, - planner = "js" - ); + assert_histogram_exists!("apollo.router.query_planning.plan.evaluated_plans", u64); } .with_metrics() .await; diff --git a/apollo-router/src/query_planner/dual_query_planner.rs b/apollo-router/src/query_planner/dual_query_planner.rs index 541d3818e8..6a880cf538 100644 --- a/apollo-router/src/query_planner/dual_query_planner.rs +++ b/apollo-router/src/query_planner/dual_query_planner.rs @@ -90,15 +90,6 @@ impl BothModeComparisonJob { metric_query_planning_plan_duration(RUST_QP_MODE, start); - if let Ok(ref plan) = &result { - u64_histogram!( - "apollo.router.query_planning.plan.evaluated_plans", - "Number of query plans evaluated for a query before choosing the best one", - plan.statistics.evaluated_plan_count.clone().into_inner() as u64, - planner = "rust" - ); - } - // … to here, so the thread can only eiher reach here or panic. // We unset USING_CATCH_UNWIND in both cases. USING_CATCH_UNWIND.set(false); From 49ddc3ff1a30d0b400af2c5214c2d6e935a304ec Mon Sep 17 00:00:00 2001 From: Geoffroy Couprie Date: Tue, 3 Sep 2024 16:41:03 +0200 Subject: [PATCH 6/7] changeset --- .changesets/feat_geal_evaluate_plan_count.md | 6 ++++++ 1 file changed, 6 insertions(+) create mode 100644 .changesets/feat_geal_evaluate_plan_count.md diff --git a/.changesets/feat_geal_evaluate_plan_count.md b/.changesets/feat_geal_evaluate_plan_count.md new file mode 100644 index 0000000000..663ce74239 --- /dev/null +++ b/.changesets/feat_geal_evaluate_plan_count.md @@ -0,0 +1,6 @@ +### Add a histogram metric tracking evaluated query plans ([PR #5875](https://github.com/apollographql/router/pull/5875)) + +The `supergraph.query_planning.experimental_plans_limit` option can be used to limit the number of query plans evaluated for a query, to reduce the time spent planning. When reaching that limit, the planner would still return a valid query plan, but maybe the most optimized one. +This adds the `apollo.router.query_planning.plan.evaluated_plans` histogram metric to track the number of evaluated query plans, giving more context to configure this option. + +By [@Geal](https://github.com/Geal) in https://github.com/apollographql/router/pull/5875 \ No newline at end of file From 626bb7c32698b075bc600f7fb5f5523db9a45c32 Mon Sep 17 00:00:00 2001 From: Geoffroy Couprie Date: Tue, 3 Sep 2024 16:45:03 +0200 Subject: [PATCH 7/7] doc --- .../telemetry/instrumentation/standard-instruments.mdx | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/source/configuration/telemetry/instrumentation/standard-instruments.mdx b/docs/source/configuration/telemetry/instrumentation/standard-instruments.mdx index d29cbf1fca..828a764f24 100644 --- a/docs/source/configuration/telemetry/instrumentation/standard-instruments.mdx +++ b/docs/source/configuration/telemetry/instrumentation/standard-instruments.mdx @@ -66,6 +66,7 @@ The coprocessor operations metric has the following attributes: - `apollo.router.query_planning.plan.duration` - Histogram of plan durations isolated to query planning time only. - `apollo.router.query_planning.total.duration` - Histogram of plan durations including queue time. - `apollo.router.query_planning.queued` - A gauge of the number of queued plans requests. +- `apollo.router.query_planning.plan.evaluated_plans` - Histogram of the number of evaluated query plans. - `apollo.router.v8.heap.used` - heap memory used by V8, in bytes. - `apollo.router.v8.heap.total` - total heap allocated by V8, in bytes.