Skip to content

Commit 5debae8

Browse files
Add totalRunningRequestsMetric to EPP pod metrics for upcomming latency prediciton features (#1899)
1 parent 0419f7f commit 5debae8

File tree

8 files changed

+66
-23
lines changed

8 files changed

+66
-23
lines changed

cmd/epp/runner/runner.go

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -126,6 +126,7 @@ var (
126126
"then a self-signed certificate is used.")
127127
// metric flags
128128
totalQueuedRequestsMetric = flag.String("total-queued-requests-metric", runserver.DefaultTotalQueuedRequestsMetric, "Prometheus metric for the number of queued requests.")
129+
totalRunningRequestsMetric = flag.String("total-running-requests-metric", runserver.DefaultTotalRunningRequestsMetric, "Prometheus metric for the number of running requests.")
129130
kvCacheUsagePercentageMetric = flag.String("kv-cache-usage-percentage-metric", runserver.DefaultKvCacheUsagePercentageMetric, "Prometheus metric for the fraction of KV-cache blocks currently in use (from 0 to 1).")
130131
// LoRA metrics
131132
loraInfoMetric = flag.String("lora-info-metric", runserver.DefaultLoraInfoMetric, "Prometheus metric for the LoRA info metrics (must be in vLLM label format).")
@@ -553,6 +554,7 @@ func (r *Runner) setupMetricsCollection(setupLog logr.Logger, useExperimentalDat
553554
func setupMetricsV1(setupLog logr.Logger) (datalayer.EndpointFactory, error) {
554555
mapping, err := backendmetrics.NewMetricMapping(
555556
*totalQueuedRequestsMetric,
557+
*totalRunningRequestsMetric,
556558
*kvCacheUsagePercentageMetric,
557559
*loraInfoMetric,
558560
*cacheInfoMetric,
@@ -601,6 +603,7 @@ func setupDatalayer(logger logr.Logger) (datalayer.EndpointFactory, error) {
601603
*modelServerMetricsHttpsInsecureSkipVerify,
602604
nil)
603605
extractor, err := dlmetrics.NewExtractor(*totalQueuedRequestsMetric,
606+
*totalRunningRequestsMetric,
604607
*kvCacheUsagePercentageMetric,
605608
*loraInfoMetric, *cacheInfoMetric)
606609

pkg/epp/backend/metrics/metrics.go

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -97,6 +97,15 @@ func (p *PodMetricsClientImpl) promToPodMetrics(
9797
}
9898
}
9999

100+
if p.MetricMapping.TotalRunningRequests != nil {
101+
running, err := p.getMetric(metricFamilies, *p.MetricMapping.TotalRunningRequests)
102+
if err == nil {
103+
updated.RunningQueueSize = int(running.GetGauge().GetValue())
104+
} else {
105+
errs = multierr.Append(errs, err)
106+
}
107+
}
108+
100109
if p.MetricMapping.KVCacheUtilization != nil {
101110
usage, err := p.getMetric(metricFamilies, *p.MetricMapping.KVCacheUtilization)
102111
if err == nil {

pkg/epp/backend/metrics/metrics_spec.go

Lines changed: 15 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -29,10 +29,11 @@ type MetricSpec struct {
2929

3030
// MetricMapping holds named MetricSpecs.
3131
type MetricMapping struct {
32-
TotalQueuedRequests *MetricSpec
33-
KVCacheUtilization *MetricSpec
34-
LoraRequestInfo *MetricSpec
35-
CacheConfigInfo *MetricSpec
32+
TotalQueuedRequests *MetricSpec
33+
TotalRunningRequests *MetricSpec
34+
KVCacheUtilization *MetricSpec
35+
LoraRequestInfo *MetricSpec
36+
CacheConfigInfo *MetricSpec
3637
}
3738

3839
// stringToMetricSpec converts a string to a MetricSpec.
@@ -94,11 +95,15 @@ func stringToMetricSpec(specStr string) (*MetricSpec, error) {
9495
}
9596

9697
// NewMetricMapping creates a MetricMapping from string values.
97-
func NewMetricMapping(queuedStr, kvUsageStr, loraReqInfoStr, cacheInfoMetric string) (*MetricMapping, error) {
98+
func NewMetricMapping(queuedStr, runningStr, kvUsageStr, loraReqInfoStr, cacheInfoMetric string) (*MetricMapping, error) {
9899
queuedSpec, err := stringToMetricSpec(queuedStr)
99100
if err != nil {
100101
return nil, fmt.Errorf("error parsing WaitingRequests: %w", err)
101102
}
103+
runningSpec, err := stringToMetricSpec(runningStr)
104+
if err != nil {
105+
return nil, fmt.Errorf("error parsing RunningRequests: %w", err)
106+
}
102107
kvUsageSpec, err := stringToMetricSpec(kvUsageStr)
103108
if err != nil {
104109
return nil, fmt.Errorf("error parsing KVCacheUsage: %w", err)
@@ -114,10 +119,11 @@ func NewMetricMapping(queuedStr, kvUsageStr, loraReqInfoStr, cacheInfoMetric str
114119
}
115120

116121
mapping := &MetricMapping{
117-
TotalQueuedRequests: queuedSpec,
118-
KVCacheUtilization: kvUsageSpec,
119-
LoraRequestInfo: loraReqInfoSpec,
120-
CacheConfigInfo: cacheInfoSpec,
122+
TotalQueuedRequests: queuedSpec,
123+
TotalRunningRequests: runningSpec,
124+
KVCacheUtilization: kvUsageSpec,
125+
LoraRequestInfo: loraReqInfoSpec,
126+
CacheConfigInfo: cacheInfoSpec,
121127
}
122128

123129
return mapping, nil

pkg/epp/datalayer/metrics/datasource_test.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ import (
2929

3030
func TestDatasource(t *testing.T) {
3131
source := NewDataSource("https", "/metrics", true, nil)
32-
extractor, err := NewExtractor(defaultTotalQueuedRequestsMetric, "", "", "")
32+
extractor, err := NewExtractor(defaultTotalQueuedRequestsMetric, "", "", "", "")
3333
assert.Nil(t, err, "failed to create extractor")
3434

3535
name := source.Name()

pkg/epp/datalayer/metrics/extractor.go

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -66,8 +66,8 @@ func Produces() map[string]any {
6666
// configured with the given metrics' specifications.
6767
// These are mandatory metrics per the MSP specification, and are used
6868
// as the basis for the built-in scheduling plugins.
69-
func NewExtractor(queueSpec, kvusageSpec, loraSpec, cacheInfoSpec string) (*Extractor, error) {
70-
mapping, err := NewMapping(queueSpec, kvusageSpec, loraSpec, cacheInfoSpec)
69+
func NewExtractor(queueSpec, runningSpec, kvusageSpec, loraSpec, cacheInfoSpec string) (*Extractor, error) {
70+
mapping, err := NewMapping(queueSpec, runningSpec, kvusageSpec, loraSpec, cacheInfoSpec)
7171
if err != nil {
7272
return nil, fmt.Errorf("failed to create extractor metrics Mapping - %w", err)
7373
}
@@ -109,6 +109,15 @@ func (ext *Extractor) Extract(ctx context.Context, data any, ep datalayer.Endpoi
109109
}
110110
}
111111

112+
if spec := ext.mapping.TotalRunningRequests; spec != nil { // extract running requests
113+
if metric, err := spec.getLatestMetric(families); err != nil {
114+
errs = append(errs, err)
115+
} else {
116+
clone.RunningQueueSize = int(extractValue(metric))
117+
updated = true
118+
}
119+
}
120+
112121
if spec := ext.mapping.KVCacheUtilization; spec != nil { // extract KV cache usage
113122
if metric, err := spec.getLatestMetric(families); err != nil {
114123
errs = append(errs, err)

pkg/epp/datalayer/metrics/extractor_test.go

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@ import (
3131
const (
3232
// use hardcoded values - importing causes cycle
3333
defaultTotalQueuedRequestsMetric = "vllm:num_requests_waiting"
34+
defaultTotalRunningRequestsMetric = "vllm:num_requests_running"
3435
defaultKvCacheUsagePercentageMetric = "vllm:gpu_cache_usage_perc"
3536
defaultLoraInfoMetric = "vllm:lora_requests_info"
3637
defaultCacheInfoMetric = "vllm:cache_config_info"
@@ -39,11 +40,11 @@ const (
3940
func TestExtractorExtract(t *testing.T) {
4041
ctx := context.Background()
4142

42-
if _, err := NewExtractor("vllm: dummy", "", "", ""); err == nil {
43+
if _, err := NewExtractor("vllm: dummy", "", "", "", ""); err == nil {
4344
t.Error("expected to fail to create extractor with invalid specification")
4445
}
4546

46-
extractor, err := NewExtractor(defaultTotalQueuedRequestsMetric,
47+
extractor, err := NewExtractor(defaultTotalQueuedRequestsMetric, defaultTotalRunningRequestsMetric,
4748
defaultKvCacheUsagePercentageMetric, defaultLoraInfoMetric, defaultCacheInfoMetric)
4849
if err != nil {
4950
t.Fatalf("failed to create extractor: %v", err)
@@ -106,6 +107,14 @@ func TestExtractorExtract(t *testing.T) {
106107
},
107108
},
108109
},
110+
defaultTotalRunningRequestsMetric: &dto.MetricFamily{
111+
Type: dto.MetricType_GAUGE.Enum(),
112+
Metric: []*dto.Metric{
113+
{
114+
Gauge: &dto.Gauge{Value: ptr.To(1.0)},
115+
},
116+
},
117+
},
109118
defaultKvCacheUsagePercentageMetric: &dto.MetricFamily{
110119
Type: dto.MetricType_GAUGE.Enum(),
111120
Metric: []*dto.Metric{

pkg/epp/datalayer/metrics/mapping.go

Lines changed: 15 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -23,20 +23,25 @@ import (
2323
// Mapping holds specifications for the well-known metrics defined
2424
// in the Model Server Protocol.
2525
type Mapping struct {
26-
TotalQueuedRequests *Spec
27-
KVCacheUtilization *Spec
28-
LoraRequestInfo *LoRASpec
29-
CacheInfo *Spec
26+
TotalQueuedRequests *Spec
27+
TotalRunningRequests *Spec
28+
KVCacheUtilization *Spec
29+
LoraRequestInfo *LoRASpec
30+
CacheInfo *Spec
3031
}
3132

3233
// NewMapping creates a metrics.Mapping from the input specification strings.
33-
func NewMapping(queue, kvusage, lora, cacheInfo string) (*Mapping, error) {
34+
func NewMapping(queue, running, kvusage, lora, cacheInfo string) (*Mapping, error) {
3435
var errs []error
3536

3637
queueSpec, err := parseStringToSpec(queue)
3738
if err != nil {
3839
errs = append(errs, err)
3940
}
41+
runningSpec, err := parseStringToSpec(running)
42+
if err != nil {
43+
errs = append(errs, err)
44+
}
4045
kvusageSpec, err := parseStringToSpec(kvusage)
4146
if err != nil {
4247
errs = append(errs, err)
@@ -55,9 +60,10 @@ func NewMapping(queue, kvusage, lora, cacheInfo string) (*Mapping, error) {
5560
return nil, errors.Join(errs...)
5661
}
5762
return &Mapping{
58-
TotalQueuedRequests: queueSpec,
59-
KVCacheUtilization: kvusageSpec,
60-
LoraRequestInfo: loraSpec,
61-
CacheInfo: cacheInfoSpec,
63+
TotalQueuedRequests: queueSpec,
64+
TotalRunningRequests: runningSpec,
65+
KVCacheUtilization: kvusageSpec,
66+
LoraRequestInfo: loraSpec,
67+
CacheInfo: cacheInfoSpec,
6268
}, nil
6369
}

pkg/epp/server/runserver.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -78,6 +78,7 @@ const (
7878
DefaultHealthChecking = false // default for --health-checking
7979
DefaultEnablePprof = true // default for --enable-pprof
8080
DefaultTotalQueuedRequestsMetric = "vllm:num_requests_waiting" // default for --total-queued-requests-metric
81+
DefaultTotalRunningRequestsMetric = "vllm:num_requests_running" // default for --total-running-requests-metric
8182
DefaultKvCacheUsagePercentageMetric = "vllm:gpu_cache_usage_perc" // default for --kv-cache-usage-percentage-metric
8283
DefaultLoraInfoMetric = "vllm:lora_requests_info" // default for --lora-info-metric
8384
DefaultCacheInfoMetric = "vllm:cache_config_info" // default for --cache-info-metric

0 commit comments

Comments
 (0)