Skip to content

Commit 6046b44

Browse files
ziqifan617nv-anants
authored andcommitted
feat: enable dynamo metrics on KVBM (#2626)
1 parent 9325d45 commit 6046b44

File tree

15 files changed

+396
-10
lines changed

15 files changed

+396
-10
lines changed
Lines changed: 234 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,234 @@
1+
{
2+
"annotations": {
3+
"list": [
4+
{
5+
"builtIn": 1,
6+
"datasource": {
7+
"type": "grafana",
8+
"uid": "-- Grafana --"
9+
},
10+
"enable": true,
11+
"hide": true,
12+
"iconColor": "rgba(0, 211, 255, 1)",
13+
"name": "Annotations & Alerts",
14+
"type": "dashboard"
15+
}
16+
]
17+
},
18+
"description": "All KVBM related metrics",
19+
"editable": true,
20+
"fiscalYearStartMonth": 0,
21+
"graphTooltip": 0,
22+
"id": 4,
23+
"links": [],
24+
"panels": [
25+
{
26+
"datasource": {
27+
"type": "prometheus",
28+
"uid": "P1809F7CD0C75ACF3"
29+
},
30+
"fieldConfig": {
31+
"defaults": {
32+
"color": {
33+
"mode": "palette-classic"
34+
},
35+
"custom": {
36+
"axisBorderShow": false,
37+
"axisCenteredZero": false,
38+
"axisColorMode": "text",
39+
"axisLabel": "",
40+
"axisPlacement": "auto",
41+
"barAlignment": 0,
42+
"barWidthFactor": 0.6,
43+
"drawStyle": "line",
44+
"fillOpacity": 0,
45+
"gradientMode": "none",
46+
"hideFrom": {
47+
"legend": false,
48+
"tooltip": false,
49+
"viz": false
50+
},
51+
"insertNulls": false,
52+
"lineInterpolation": "linear",
53+
"lineWidth": 1,
54+
"pointSize": 5,
55+
"scaleDistribution": {
56+
"type": "linear"
57+
},
58+
"showPoints": "auto",
59+
"spanNulls": false,
60+
"stacking": {
61+
"group": "A",
62+
"mode": "none"
63+
},
64+
"thresholdsStyle": {
65+
"mode": "off"
66+
}
67+
},
68+
"mappings": [],
69+
"thresholds": {
70+
"mode": "absolute",
71+
"steps": [
72+
{
73+
"color": "green"
74+
},
75+
{
76+
"color": "red",
77+
"value": 80
78+
}
79+
]
80+
}
81+
},
82+
"overrides": []
83+
},
84+
"gridPos": {
85+
"h": 8,
86+
"w": 12,
87+
"x": 0,
88+
"y": 0
89+
},
90+
"id": 1,
91+
"options": {
92+
"legend": {
93+
"calcs": [],
94+
"displayMode": "list",
95+
"placement": "bottom",
96+
"showLegend": true
97+
},
98+
"tooltip": {
99+
"hideZeros": false,
100+
"mode": "single",
101+
"sort": "none"
102+
}
103+
},
104+
"pluginVersion": "12.0.1",
105+
"targets": [
106+
{
107+
"disableTextWrap": false,
108+
"editorMode": "builder",
109+
"expr": "dynamo_component_save_kv_layer_requests{dynamo_namespace=\"kvbm_connector_worker\"}",
110+
"fullMetaSearch": false,
111+
"includeNullMetadata": true,
112+
"legendFormat": "__auto",
113+
"range": true,
114+
"refId": "A",
115+
"useBackend": false
116+
}
117+
],
118+
"title": "KVBM Worker: save kv layer requests",
119+
"type": "timeseries"
120+
},
121+
{
122+
"datasource": {
123+
"type": "prometheus",
124+
"uid": "P1809F7CD0C75ACF3"
125+
},
126+
"fieldConfig": {
127+
"defaults": {
128+
"color": {
129+
"mode": "palette-classic"
130+
},
131+
"custom": {
132+
"axisBorderShow": false,
133+
"axisCenteredZero": false,
134+
"axisColorMode": "text",
135+
"axisLabel": "",
136+
"axisPlacement": "auto",
137+
"barAlignment": 0,
138+
"barWidthFactor": 0.6,
139+
"drawStyle": "line",
140+
"fillOpacity": 0,
141+
"gradientMode": "none",
142+
"hideFrom": {
143+
"legend": false,
144+
"tooltip": false,
145+
"viz": false
146+
},
147+
"insertNulls": false,
148+
"lineInterpolation": "linear",
149+
"lineWidth": 1,
150+
"pointSize": 5,
151+
"scaleDistribution": {
152+
"type": "linear"
153+
},
154+
"showPoints": "auto",
155+
"spanNulls": false,
156+
"stacking": {
157+
"group": "A",
158+
"mode": "none"
159+
},
160+
"thresholdsStyle": {
161+
"mode": "off"
162+
}
163+
},
164+
"mappings": [],
165+
"thresholds": {
166+
"mode": "absolute",
167+
"steps": [
168+
{
169+
"color": "green"
170+
},
171+
{
172+
"color": "red",
173+
"value": 80
174+
}
175+
]
176+
}
177+
},
178+
"overrides": []
179+
},
180+
"gridPos": {
181+
"h": 8,
182+
"w": 12,
183+
"x": 0,
184+
"y": 8
185+
},
186+
"id": 2,
187+
"options": {
188+
"legend": {
189+
"calcs": [],
190+
"displayMode": "list",
191+
"placement": "bottom",
192+
"showLegend": true
193+
},
194+
"tooltip": {
195+
"hideZeros": false,
196+
"mode": "single",
197+
"sort": "none"
198+
}
199+
},
200+
"pluginVersion": "12.0.1",
201+
"targets": [
202+
{
203+
"disableTextWrap": false,
204+
"editorMode": "builder",
205+
"expr": "dynamo_component_offload_requests{dynamo_namespace=\"kvbm_connector_leader\"}",
206+
"fullMetaSearch": false,
207+
"includeNullMetadata": true,
208+
"legendFormat": "__auto",
209+
"range": true,
210+
"refId": "A",
211+
"useBackend": false
212+
}
213+
],
214+
"title": "KVBM Leader: offload requests",
215+
"type": "timeseries"
216+
}
217+
],
218+
"preload": false,
219+
"refresh": "auto",
220+
"schemaVersion": 41,
221+
"tags": [],
222+
"templating": {
223+
"list": []
224+
},
225+
"time": {
226+
"from": "now-15m",
227+
"to": "now"
228+
},
229+
"timepicker": {},
230+
"timezone": "browser",
231+
"title": "KVBM Dashboard",
232+
"uid": "3f679257-70a5-402c-92b4-05382337b548",
233+
"version": 7
234+
}

deploy/metrics/prometheus.yml

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,18 @@ scrape_configs:
5858
# - targets: ['localhost:9091'] # metrics aggregation service on host
5959
- targets: ['host.docker.internal:9091'] # metrics aggregation service on host
6060

61+
# KVBM leader related metrics
62+
- job_name: 'kvbm-leader-metrics'
63+
scrape_interval: 2s
64+
static_configs:
65+
- targets: ['host.docker.internal:6881']
66+
67+
# KVBM worker related metrics
68+
- job_name: 'kvbm-worker-metrics'
69+
scrape_interval: 2s
70+
static_configs:
71+
- targets: ['host.docker.internal:6880']
72+
6173
# Uncomment to see its own Prometheus metrics
6274
# - job_name: 'prometheus'
6375
# scrape_interval: 5s

docs/guides/run_kvbm_in_vllm.md

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,3 +59,21 @@ curl localhost:8000/v1/chat/completions -H "Content-Type: application/json"
5959
"max_tokens": 30
6060
}'
6161
```
62+
63+
## Enable and View KVBM Metrics
64+
65+
Follow below steps to enable metrics collection and view via Grafana dashboard:
66+
```bash
67+
# Start the basic services (etcd & natsd), along with Prometheus and Grafana
68+
docker compose -f deploy/docker-compose.yml --profile metrics up -d
69+
70+
# start vllm with DYN_SYSTEM_ENABLED set to true and DYN_SYSTEM_PORT port to 6880.
71+
# NOTE: Make sure port 6880 (for KVBM worker metrics) and port 6881 (for KVBM leader metrics) are available.
72+
DYN_SYSTEM_ENABLED=true DYN_SYSTEM_PORT=6880 vllm serve --kv-transfer-config '{"kv_connector":"DynamoConnector","kv_role":"kv_both", "kv_connector_module_path": "dynamo.llm.vllm_integration.connector"}' deepseek-ai/DeepSeek-R1-Distill-Llama-8B
73+
74+
# optional if firewall blocks KVBM metrics ports to send prometheus metrics
75+
sudo ufw allow 6880/tcp
76+
sudo ufw allow 6881/tcp
77+
```
78+
79+
View grafana metrics via http://localhost:3001 (default login: dynamo/dynamo) and look for KVBM Dashboard

lib/bindings/python/Cargo.lock

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

lib/bindings/python/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,7 @@ pythonize = "0.23"
8080

8181
dlpark = { version = "0.5", features = ["pyo3", "half"], optional = true }
8282
cudarc = { version = "0.16.2", features = ["cuda-12020"], optional = true }
83+
prometheus = "0.14.0"
8384

8485

8586
[dev-dependencies]

lib/bindings/python/rust/llm/block_manager/vllm/connector/leader.rs

Lines changed: 15 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ pub mod recorder;
55
pub mod slot;
66

77
use super::*;
8+
use dynamo_llm::block_manager::metrics_kvbm::KvbmMetrics;
89
use dynamo_runtime::DistributedRuntime;
910
use slot::{ConnectorSlotManager, SlotError, SlotManager, SlotState};
1011

@@ -14,6 +15,7 @@ use crate::llm::block_manager::{
1415
vllm::KvbmRequest, VllmBlockManager,
1516
};
1617
use crate::DistributedRuntime as PyDistributedRuntime;
18+
use dynamo_runtime::metrics::prometheus_names::kvbm_connector;
1719

1820
use dynamo_llm::block_manager::{
1921
block::{
@@ -25,10 +27,7 @@ use dynamo_llm::block_manager::{
2527
};
2628
use dynamo_llm::tokens::{SaltHash, TokenBlockSequence, Tokens};
2729

28-
use std::{
29-
collections::HashSet,
30-
sync::{Arc, Mutex},
31-
};
30+
use std::{collections::HashSet, sync::Mutex};
3231
use tokio;
3332
use tokio::sync::mpsc;
3433

@@ -104,8 +103,19 @@ impl KvConnectorLeader {
104103
// if we need a drt, get it from here
105104
let drt = drt.inner().clone();
106105

106+
let ns = drt
107+
.namespace(kvbm_connector::KVBM_CONNECTOR_LEADER)
108+
.unwrap();
109+
110+
let kvbm_metrics = KvbmMetrics::new(&ns);
111+
107112
Self {
108-
slot_manager: ConnectorSlotManager::new(block_manager.clone(), leader, drt.clone()),
113+
slot_manager: ConnectorSlotManager::new(
114+
block_manager.clone(),
115+
leader,
116+
drt.clone(),
117+
kvbm_metrics,
118+
),
109119
block_size,
110120
inflight_requests: HashSet::new(),
111121
onboarding_slots: HashSet::new(),

lib/bindings/python/rust/llm/block_manager/vllm/connector/leader/recorder.rs

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -109,14 +109,23 @@ impl KvConnectorLeaderRecorder {
109109
let output_path = "/tmp/records.jsonl";
110110
tracing::info!("recording events to {}", output_path);
111111

112+
let ns = drt.namespace("kvbm_connector_leader").unwrap();
113+
114+
let kvbm_metrics = KvbmMetrics::new(&ns);
115+
112116
let recorder = drt
113117
.runtime()
114118
.primary()
115119
.block_on(async { Recorder::new(token, &output_path, None, None, None).await })
116120
.unwrap();
117121

118122
let connector_leader = KvConnectorLeader {
119-
slot_manager: ConnectorSlotManager::new(block_manager.clone(), leader, drt.clone()),
123+
slot_manager: ConnectorSlotManager::new(
124+
block_manager.clone(),
125+
leader,
126+
drt.clone(),
127+
kvbm_metrics,
128+
),
120129
block_size,
121130
inflight_requests: HashSet::new(),
122131
onboarding_slots: HashSet::new(),

0 commit comments

Comments
 (0)