Skip to content

Commit 3fa9d73

Browse files
keivenchangdillon-cullinan
authored andcommitted
fix: reduce nats stats query frequency (#2847)
Signed-off-by: Keiven Chang <keivenchang@users.noreply.github.com> Co-authored-by: Keiven Chang <keivenchang@users.noreply.github.com>
1 parent 98800f6 commit 3fa9d73

File tree

2 files changed

+12
-12
lines changed

2 files changed

+12
-12
lines changed

lib/runtime/src/component.rs

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -273,13 +273,12 @@ impl Component {
273273
/// Add Prometheus metrics for this component's NATS service stats.
274274
///
275275
/// Starts a background task that periodically requests service statistics from NATS
276-
/// and updates the corresponding Prometheus metrics. The scraping interval is set to
277-
/// approximately 873ms (MAX_DELAY_MS), which is arbitrary but any value less than a second
278-
/// is fair game. This frequent scraping provides real-time service statistics updates.
276+
/// and updates the corresponding Prometheus metrics. The first scrape happens immediately,
277+
/// then subsequent scrapes occur at a fixed interval of 9.8 seconds (MAX_WAIT_MS),
278+
/// which should be near or smaller than typical Prometheus scraping intervals to ensure
279+
/// metrics are fresh when Prometheus collects them.
279280
pub fn start_scraping_nats_service_component_metrics(&self) -> Result<()> {
280-
const NATS_TIMEOUT_AND_INITIAL_DELAY_MS: std::time::Duration =
281-
std::time::Duration::from_millis(300);
282-
const MAX_DELAY_MS: std::time::Duration = std::time::Duration::from_millis(873);
281+
const MAX_WAIT_MS: std::time::Duration = std::time::Duration::from_millis(9800); // Should be <= Prometheus scrape interval
283282

284283
// If there is another component with the same service name, this will fail.
285284
let component_metrics = ComponentNatsServerPrometheusMetrics::new(self)?;
@@ -308,8 +307,8 @@ impl Component {
308307
// By using the DRT's own runtime handle, we ensure the task runs in the
309308
// correct runtime that will persist for the lifetime of the component.
310309
c.drt().runtime().secondary().spawn(async move {
311-
let timeout = NATS_TIMEOUT_AND_INITIAL_DELAY_MS;
312-
let mut interval = tokio::time::interval(MAX_DELAY_MS);
310+
let timeout = std::time::Duration::from_millis(500);
311+
let mut interval = tokio::time::interval(MAX_WAIT_MS);
313312
interval.set_missed_tick_behavior(tokio::time::MissedTickBehavior::Skip);
314313

315314
loop {
@@ -326,6 +325,7 @@ impl Component {
326325
m.reset_to_zeros();
327326
}
328327
}
328+
329329
interval.tick().await;
330330
}
331331
});

lib/runtime/src/metrics.rs

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1527,6 +1527,10 @@ mod test_metricsregistry_nats {
15271527
}
15281528
println!("✓ Sent messages and received responses successfully");
15291529

1530+
println!("\n=== Waiting 500ms for metrics to update ===");
1531+
sleep(Duration::from_millis(500)).await;
1532+
println!("✓ Wait complete, getting final metrics...");
1533+
15301534
let final_drt_output = drt.prometheus_metrics_fmt().unwrap();
15311535
println!("\n=== Final Prometheus DRT output ===");
15321536
println!("{}", final_drt_output);
@@ -1542,10 +1546,6 @@ mod test_metricsregistry_nats {
15421546
.filter_map(|line| super::test_helpers::parse_prometheus_metric(line.as_str()))
15431547
.collect();
15441548

1545-
println!("\n=== Waiting 1 second for metrics to stabilize ===");
1546-
sleep(Duration::from_secs(1)).await;
1547-
println!("✓ Wait complete, checking final metrics...");
1548-
15491549
let post_expected_metric_values = [
15501550
// DRT NATS metrics
15511551
(

0 commit comments

Comments
 (0)