Skip to content

Commit d72f48c

Browse files
committed
refactor: simplify NATS metrics scraping interval logic
- Remove INITIAL_WAIT_MS constant and exponential backoff - Use fixed 9.8s interval (MAX_WAIT_MS) for all scrapes - Perform first scrape immediately for faster metrics availability - Inline NATS timeout constant (500ms) - Update tests to use 500ms waits since metrics are available immediately Signed-off-by: Keiven Chang <keivenchang@users.noreply.github.com>
1 parent d852159 commit d72f48c

File tree

2 files changed

+11
-21
lines changed

2 files changed

+11
-21
lines changed

lib/runtime/src/component.rs

Lines changed: 7 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -273,13 +273,12 @@ impl Component {
273273
/// Add Prometheus metrics for this component's NATS service stats.
274274
///
275275
/// Starts a background task that periodically requests service statistics from NATS
276-
/// and updates the corresponding Prometheus metrics. The scraping interval starts at
277-
/// 500ms (INITIAL_WAIT_MS) and doubles after each scrape (regardless of success or failure)
278-
/// up to 9.8 seconds (MAX_WAIT_MS).
276+
/// and updates the corresponding Prometheus metrics. The first scrape happens immediately,
277+
/// then subsequent scrapes occur at a fixed interval of 9.8 seconds (MAX_WAIT_MS),
278+
/// which should be near or smaller than typical Prometheus scraping intervals to ensure
279+
/// metrics are fresh when Prometheus collects them.
279280
pub fn start_scraping_nats_service_component_metrics(&self) -> Result<()> {
280-
const NATS_TIMEOUT_MS: std::time::Duration = std::time::Duration::from_millis(300);
281-
const INITIAL_WAIT_MS: std::time::Duration = std::time::Duration::from_millis(500);
282-
const MAX_WAIT_MS: std::time::Duration = std::time::Duration::from_millis(9800); // Arbitrary value
281+
const MAX_WAIT_MS: std::time::Duration = std::time::Duration::from_millis(9800); // Should be <= Prometheus scrape interval
283282

284283
// If there is another component with the same service name, this will fail.
285284
let component_metrics = ComponentNatsServerPrometheusMetrics::new(self)?;
@@ -308,9 +307,8 @@ impl Component {
308307
// By using the DRT's own runtime handle, we ensure the task runs in the
309308
// correct runtime that will persist for the lifetime of the component.
310309
c.drt().runtime().secondary().spawn(async move {
311-
let timeout = NATS_TIMEOUT_MS;
312-
let mut current_wait = INITIAL_WAIT_MS;
313-
let mut interval = tokio::time::interval(current_wait);
310+
let timeout = std::time::Duration::from_millis(500);
311+
let mut interval = tokio::time::interval(MAX_WAIT_MS);
314312
interval.set_missed_tick_behavior(tokio::time::MissedTickBehavior::Skip);
315313

316314
loop {
@@ -328,14 +326,6 @@ impl Component {
328326
}
329327
}
330328

331-
// Always double the wait time up to MAX_WAIT_MS
332-
let new_wait = std::cmp::min(current_wait * 2, MAX_WAIT_MS);
333-
if new_wait != current_wait {
334-
current_wait = new_wait;
335-
interval = tokio::time::interval(current_wait);
336-
interval.set_missed_tick_behavior(tokio::time::MissedTickBehavior::Skip);
337-
}
338-
339329
interval.tick().await;
340330
}
341331
});

lib/runtime/src/metrics.rs

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1527,6 +1527,10 @@ mod test_metricsregistry_nats {
15271527
}
15281528
println!("✓ Sent messages and received responses successfully");
15291529

1530+
println!("\n=== Waiting 500ms for metrics to update ===");
1531+
sleep(Duration::from_millis(500)).await;
1532+
println!("✓ Wait complete, getting final metrics...");
1533+
15301534
let final_drt_output = drt.prometheus_metrics_fmt().unwrap();
15311535
println!("\n=== Final Prometheus DRT output ===");
15321536
println!("{}", final_drt_output);
@@ -1542,10 +1546,6 @@ mod test_metricsregistry_nats {
15421546
.filter_map(|line| super::test_helpers::parse_prometheus_metric(line.as_str()))
15431547
.collect();
15441548

1545-
println!("\n=== Waiting 1 second for metrics to stabilize ===");
1546-
sleep(Duration::from_secs(1)).await;
1547-
println!("✓ Wait complete, checking final metrics...");
1548-
15491549
let post_expected_metric_values = [
15501550
// DRT NATS metrics
15511551
(

0 commit comments

Comments
 (0)