Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
234 changes: 234 additions & 0 deletions deploy/metrics/grafana_dashboards/grafana-kvbm-dashboard.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,234 @@
{
"annotations": {
"list": [
{
"builtIn": 1,
"datasource": {
"type": "grafana",
"uid": "-- Grafana --"
},
"enable": true,
"hide": true,
"iconColor": "rgba(0, 211, 255, 1)",
"name": "Annotations & Alerts",
"type": "dashboard"
}
]
},
"description": "All KVBM related metrics",
"editable": true,
"fiscalYearStartMonth": 0,
"graphTooltip": 0,
"id": 4,
"links": [],
"panels": [
{
"datasource": {
"type": "prometheus",
"uid": "P1809F7CD0C75ACF3"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"barWidthFactor": 0.6,
"drawStyle": "line",
"fillOpacity": 0,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "auto",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green"
},
{
"color": "red",
"value": 80
}
]
}
},
"overrides": []
},
"gridPos": {
"h": 8,
"w": 12,
"x": 0,
"y": 0
},
"id": 1,
"options": {
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"hideZeros": false,
"mode": "single",
"sort": "none"
}
},
"pluginVersion": "12.0.1",
"targets": [
{
"disableTextWrap": false,
"editorMode": "builder",
"expr": "dynamo_component_save_kv_layer_requests{dynamo_namespace=\"kvbm_connector_worker\"}",
"fullMetaSearch": false,
"includeNullMetadata": true,
"legendFormat": "__auto",
"range": true,
"refId": "A",
"useBackend": false
}
],
"title": "KVBM Worker: save kv layer requests",
"type": "timeseries"
},
{
"datasource": {
"type": "prometheus",
"uid": "P1809F7CD0C75ACF3"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"barWidthFactor": 0.6,
"drawStyle": "line",
"fillOpacity": 0,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "auto",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green"
},
{
"color": "red",
"value": 80
}
]
}
},
"overrides": []
},
"gridPos": {
"h": 8,
"w": 12,
"x": 0,
"y": 8
},
"id": 2,
"options": {
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"hideZeros": false,
"mode": "single",
"sort": "none"
}
},
"pluginVersion": "12.0.1",
"targets": [
{
"disableTextWrap": false,
"editorMode": "builder",
"expr": "dynamo_component_offload_requests{dynamo_namespace=\"kvbm_connector_leader\"}",
"fullMetaSearch": false,
"includeNullMetadata": true,
"legendFormat": "__auto",
"range": true,
"refId": "A",
"useBackend": false
}
],
"title": "KVBM Leader: offload requests",
"type": "timeseries"
}
],
"preload": false,
"refresh": "auto",
"schemaVersion": 41,
"tags": [],
"templating": {
"list": []
},
"time": {
"from": "now-15m",
"to": "now"
},
"timepicker": {},
"timezone": "browser",
"title": "KVBM Dashboard",
"uid": "3f679257-70a5-402c-92b4-05382337b548",
"version": 7
}
12 changes: 12 additions & 0 deletions deploy/metrics/prometheus.yml
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,18 @@ scrape_configs:
# - targets: ['localhost:9091'] # metrics aggregation service on host
- targets: ['host.docker.internal:9091'] # metrics aggregation service on host

# KVBM leader related metrics
- job_name: 'kvbm-leader-metrics'
scrape_interval: 2s
static_configs:
- targets: ['host.docker.internal:6881']

# KVBM worker related metrics
- job_name: 'kvbm-worker-metrics'
scrape_interval: 2s
static_configs:
- targets: ['host.docker.internal:6880']

# Uncomment to see its own Prometheus metrics
# - job_name: 'prometheus'
# scrape_interval: 5s
Expand Down
18 changes: 18 additions & 0 deletions docs/guides/run_kvbm_in_vllm.md
Original file line number Diff line number Diff line change
Expand Up @@ -59,3 +59,21 @@ curl localhost:8000/v1/chat/completions -H "Content-Type: application/json"
"max_tokens": 30
}'
```

## Enable and View KVBM Metrics

Follow below steps to enable metrics collection and view via Grafana dashboard:
```bash
# Start the basic services (etcd & natsd), along with Prometheus and Grafana
docker compose -f deploy/docker-compose.yml --profile metrics up -d

# start vllm with DYN_SYSTEM_ENABLED set to true and DYN_SYSTEM_PORT port to 6880.
# NOTE: Make sure port 6880 (for KVBM worker metrics) and port 6881 (for KVBM leader metrics) are available.
DYN_SYSTEM_ENABLED=true DYN_SYSTEM_PORT=6880 vllm serve --kv-transfer-config '{"kv_connector":"DynamoConnector","kv_role":"kv_both", "kv_connector_module_path": "dynamo.llm.vllm_integration.connector"}' deepseek-ai/DeepSeek-R1-Distill-Llama-8B

# optional if firewall blocks KVBM metrics ports to send prometheus metrics
sudo ufw allow 6880/tcp
sudo ufw allow 6881/tcp
```

View grafana metrics via http://localhost:3001 (default login: dynamo/dynamo) and look for KVBM Dashboard
1 change: 1 addition & 0 deletions lib/bindings/python/Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions lib/bindings/python/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,7 @@ pythonize = "0.23"

dlpark = { version = "0.5", features = ["pyo3", "half"], optional = true }
cudarc = { version = "0.16.2", features = ["cuda-12020"], optional = true }
prometheus = "0.14.0"


[dev-dependencies]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ pub mod recorder;
pub mod slot;

use super::*;
use dynamo_llm::block_manager::metrics_kvbm::KvbmMetrics;
use dynamo_runtime::DistributedRuntime;
use slot::{ConnectorSlotManager, SlotError, SlotManager, SlotState};

Expand All @@ -14,6 +15,7 @@ use crate::llm::block_manager::{
vllm::KvbmRequest, VllmBlockManager,
};
use crate::DistributedRuntime as PyDistributedRuntime;
use dynamo_runtime::metrics::prometheus_names::kvbm_connector;

use dynamo_llm::block_manager::{
block::{
Expand All @@ -25,10 +27,7 @@ use dynamo_llm::block_manager::{
};
use dynamo_llm::tokens::{SaltHash, TokenBlockSequence, Tokens};

use std::{
collections::HashSet,
sync::{Arc, Mutex},
};
use std::{collections::HashSet, sync::Mutex};
use tokio;
use tokio::sync::mpsc;

Expand Down Expand Up @@ -104,8 +103,19 @@ impl KvConnectorLeader {
// if we need a drt, get it from here
let drt = drt.inner().clone();

let ns = drt
.namespace(kvbm_connector::KVBM_CONNECTOR_LEADER)
.unwrap();

let kvbm_metrics = KvbmMetrics::new(&ns);

Self {
slot_manager: ConnectorSlotManager::new(block_manager.clone(), leader, drt.clone()),
slot_manager: ConnectorSlotManager::new(
block_manager.clone(),
leader,
drt.clone(),
kvbm_metrics,
),
block_size,
inflight_requests: HashSet::new(),
onboarding_slots: HashSet::new(),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -109,14 +109,23 @@ impl KvConnectorLeaderRecorder {
let output_path = "/tmp/records.jsonl";
tracing::info!("recording events to {}", output_path);

let ns = drt.namespace("kvbm_connector_leader").unwrap();

let kvbm_metrics = KvbmMetrics::new(&ns);

let recorder = drt
.runtime()
.primary()
.block_on(async { Recorder::new(token, &output_path, None, None, None).await })
.unwrap();

let connector_leader = KvConnectorLeader {
slot_manager: ConnectorSlotManager::new(block_manager.clone(), leader, drt.clone()),
slot_manager: ConnectorSlotManager::new(
block_manager.clone(),
leader,
drt.clone(),
kvbm_metrics,
),
block_size,
inflight_requests: HashSet::new(),
onboarding_slots: HashSet::new(),
Expand Down
Loading
Loading