Skip to content

Commit d9d9420

Browse files
committed
feat: Base metrics: add generic ingress handler metrics
1 parent ba3ac23 commit d9d9420

File tree

15 files changed

+987
-265
lines changed

15 files changed

+987
-265
lines changed

components/router/src/main.rs

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -66,14 +66,16 @@ async fn app(runtime: Runtime) -> Result<()> {
6666

6767
let selector = Box::new(CustomWorkerSelector::default());
6868

69-
let router = KvRouter::new(component.clone(), args.block_size, Some(selector), true).await?;
70-
let router = Ingress::for_engine(Arc::new(router))?;
71-
72-
component
69+
let endpoint = component
7370
.service_builder()
7471
.create()
7572
.await?
76-
.endpoint("generate")
73+
.endpoint("generate");
74+
75+
let router = KvRouter::new(component.clone(), args.block_size, Some(selector), true).await?;
76+
let router = Ingress::for_engine_with_metrics(Arc::new(router), &endpoint)?;
77+
78+
endpoint
7779
.endpoint_builder()
7880
.handler(router)
7981
.start()

deploy/metrics/prometheus.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@ scrape_configs:
3838
- job_name: 'llm-demo'
3939
scrape_interval: 10s
4040
static_configs:
41-
- targets: ['host.docker.internal:8000'] # on the "monitoring" network
41+
- targets: ['host.docker.internal:8080'] # on the "monitoring" network
4242

4343
# This is another demo aggregator that needs to be launched manually. See components/metrics/README.md
4444
# Note that you may need to disable the firewall on your host. On Ubuntu: sudo ufw allow 9091/tcp

lib/llm/src/kv_router/publisher.rs

Lines changed: 11 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -499,11 +499,11 @@ impl WorkerMetricsPublisher {
499499

500500
pub async fn create_endpoint(&self, component: Component) -> Result<()> {
501501
let mut metrics_rx = self.rx.clone();
502-
let handler = Arc::new(KvLoadEndpoingHander::new(metrics_rx.clone()));
503-
let handler = Ingress::for_engine(handler)?;
502+
let endpoint = component.endpoint(KV_METRICS_ENDPOINT);
503+
let handler = Arc::new(KvLoadEndpoingHander::new(metrics_rx.clone(), endpoint.clone()));
504+
let handler = Ingress::for_engine_with_metrics(handler, &endpoint)?;
504505

505-
component
506-
.endpoint(KV_METRICS_ENDPOINT)
506+
endpoint
507507
.endpoint_builder()
508508
.stats_handler(move |_| {
509509
let metrics = metrics_rx.borrow_and_update().clone();
@@ -517,11 +517,16 @@ impl WorkerMetricsPublisher {
517517

518518
struct KvLoadEndpoingHander {
519519
metrics_rx: tokio::sync::watch::Receiver<Arc<ForwardPassMetrics>>,
520+
#[allow(dead_code)]
521+
endpoint: dynamo_runtime::component::Endpoint,
520522
}
521523

522524
impl KvLoadEndpoingHander {
523-
pub fn new(metrics_rx: tokio::sync::watch::Receiver<Arc<ForwardPassMetrics>>) -> Self {
524-
Self { metrics_rx }
525+
pub fn new(
526+
metrics_rx: tokio::sync::watch::Receiver<Arc<ForwardPassMetrics>>,
527+
endpoint: dynamo_runtime::component::Endpoint,
528+
) -> Self {
529+
Self { metrics_rx, endpoint }
525530
}
526531
}
527532

0 commit comments

Comments
 (0)