Skip to content

Commit 43cbb41

Browse files
committed
fix: implement exponential backoff for NATS metrics scraping
Split NATS_TIMEOUT_AND_INITIAL_DELAY_MS into separate constants: - NATS_TIMEOUT_MS (300ms) for NATS operation timeout - INITIAL_WAIT_MS (500ms) for initial scraping interval - MAX_WAIT_MS (9800ms) for maximum interval The scraping interval now starts at 500ms and doubles after each scrape up to 9.8 seconds, reducing load while maintaining responsiveness. Signed-off-by: Keiven Chang <keivenchang@users.noreply.github.com>
1 parent 0d44031 commit 43cbb41

File tree

1 file changed

+18
-8
lines changed

1 file changed

+18
-8
lines changed

lib/runtime/src/component.rs

Lines changed: 18 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -273,13 +273,13 @@ impl Component {
273273
/// Add Prometheus metrics for this component's NATS service stats.
274274
///
275275
/// Starts a background task that periodically requests service statistics from NATS
276-
/// and updates the corresponding Prometheus metrics. The scraping interval is set to
277-
/// approximately 873ms (MAX_DELAY_MS), which is arbitrary but any value less than a second
278-
/// is fair game. This frequent scraping provides real-time service statistics updates.
276+
/// and updates the corresponding Prometheus metrics. The scraping interval starts at
277+
/// 500ms (INITIAL_WAIT_MS) and doubles after each scrape (regardless of success or failure)
278+
/// up to 9.8 seconds (MAX_WAIT_MS).
279279
pub fn start_scraping_nats_service_component_metrics(&self) -> Result<()> {
280-
const NATS_TIMEOUT_AND_INITIAL_DELAY_MS: std::time::Duration =
281-
std::time::Duration::from_millis(300);
282-
const MAX_DELAY_MS: std::time::Duration = std::time::Duration::from_millis(873);
280+
const NATS_TIMEOUT_MS: std::time::Duration = std::time::Duration::from_millis(300);
281+
const INITIAL_WAIT_MS: std::time::Duration = std::time::Duration::from_millis(500);
282+
const MAX_WAIT_MS: std::time::Duration = std::time::Duration::from_millis(9800); // Arbitrary value
283283

284284
// If there is another component with the same service name, this will fail.
285285
let component_metrics = ComponentNatsServerPrometheusMetrics::new(self)?;
@@ -308,8 +308,9 @@ impl Component {
308308
// By using the DRT's own runtime handle, we ensure the task runs in the
309309
// correct runtime that will persist for the lifetime of the component.
310310
c.drt().runtime().secondary().spawn(async move {
311-
let timeout = NATS_TIMEOUT_AND_INITIAL_DELAY_MS;
312-
let mut interval = tokio::time::interval(MAX_DELAY_MS);
311+
let timeout = NATS_TIMEOUT_MS;
312+
let mut current_wait = INITIAL_WAIT_MS;
313+
let mut interval = tokio::time::interval(current_wait);
313314
interval.set_missed_tick_behavior(tokio::time::MissedTickBehavior::Skip);
314315

315316
loop {
@@ -326,6 +327,15 @@ impl Component {
326327
m.reset_to_zeros();
327328
}
328329
}
330+
331+
// Always double the wait time up to MAX_WAIT_MS
332+
let new_wait = std::cmp::min(current_wait * 2, MAX_WAIT_MS);
333+
if new_wait != current_wait {
334+
current_wait = new_wait;
335+
interval = tokio::time::interval(current_wait);
336+
interval.set_missed_tick_behavior(tokio::time::MissedTickBehavior::Skip);
337+
}
338+
329339
interval.tick().await;
330340
}
331341
});

0 commit comments

Comments
 (0)