From 1a17f16a8343b0fbe1ad7d350b075e791ddb0478 Mon Sep 17 00:00:00 2001 From: RishabhSaini Date: Fri, 7 Nov 2025 12:16:44 -0500 Subject: [PATCH] go.mod: update GAIE to slo aware routing branch pkg/plugins/scorer: Make it compile with the new interface --- go.mod | 4 +++- go.sum | 12 +++++------ pkg/plugins/scorer/active_request.go | 5 +++++ pkg/plugins/scorer/load_aware.go | 5 +++++ pkg/plugins/scorer/precise_prefix_cache.go | 5 +++++ pkg/plugins/scorer/session_affinity.go | 23 +++++++++++++--------- 6 files changed, 38 insertions(+), 16 deletions(-) diff --git a/go.mod b/go.mod index 6567ed6e..10004f6b 100644 --- a/go.mod +++ b/go.mod @@ -24,6 +24,8 @@ require ( sigs.k8s.io/gateway-api-inference-extension v1.0.0 ) +replace sigs.k8s.io/gateway-api-inference-extension => github.com/RishabhSaini/gateway-api-inference-extension v0.0.0-20251107192434-0e0db125e842 + require ( cel.dev/expr v0.24.0 // indirect github.com/Masterminds/semver/v3 v3.4.0 // indirect @@ -117,7 +119,7 @@ require ( gomodules.xyz/jsonpatch/v2 v2.4.0 // indirect google.golang.org/genproto/googleapis/api v0.0.0-20250707201910-8d1bb00bc6a7 // indirect google.golang.org/genproto/googleapis/rpc v0.0.0-20250707201910-8d1bb00bc6a7 // indirect - google.golang.org/protobuf v1.36.7 // indirect + google.golang.org/protobuf v1.36.8 // indirect gopkg.in/evanphx/json-patch.v4 v4.12.0 // indirect gopkg.in/inf.v0 v0.9.1 // indirect gopkg.in/yaml.v3 v3.0.1 // indirect diff --git a/go.sum b/go.sum index fcb3ede5..ff232ba1 100644 --- a/go.sum +++ b/go.sum @@ -16,6 +16,8 @@ github.com/AzureAD/microsoft-authentication-library-for-go v1.4.2 h1:oygO0locgZJ github.com/AzureAD/microsoft-authentication-library-for-go v1.4.2/go.mod h1:wP83P5OoQ5p6ip3ScPr0BAq0BvuPAvacpEuSzyouqAI= github.com/Masterminds/semver/v3 v3.4.0 h1:Zog+i5UMtVoCU8oKka5P7i9q9HgrJeGzI9SA1Xbatp0= github.com/Masterminds/semver/v3 v3.4.0/go.mod h1:4V+yj/TJE1HU9XfppCwVMZq3I84lprf4nC11bSS5beM= +github.com/RishabhSaini/gateway-api-inference-extension v0.0.0-20251107192434-0e0db125e842 h1:DR95wyTit+CVNFmgvG/x8JirvTdXAL1xtQ2Ui5GNxHQ= +github.com/RishabhSaini/gateway-api-inference-extension v0.0.0-20251107192434-0e0db125e842/go.mod h1:vTI7FeIhaNOOUC1eX+htUOruRjC8zunVBT/KGA9VSQE= github.com/alecthomas/units v0.0.0-20240927000941-0f3dac36c52b h1:mimo19zliBX/vSQ6PWWSL9lK8qwHozUj03+zLoEB8O0= github.com/alecthomas/units v0.0.0-20240927000941-0f3dac36c52b/go.mod h1:fvzegU4vN3H1qMT+8wDmzjAcDONcgo2/SZ/TyfdUOFs= github.com/alicebob/miniredis/v2 v2.35.0 h1:QwLphYqCEAo1eu1TqPRN2jgVMPBweeQcR21jeqDCONI= @@ -192,8 +194,8 @@ github.com/mxk/go-flowrate v0.0.0-20140419014527-cca7078d478f/go.mod h1:ZdcZmHo+ github.com/oklog/ulid v1.3.1 h1:EGfNDEx6MqHz8B3uNV6QAib1UR2Lm97sHi3ocA6ESJ4= github.com/oklog/ulid/v2 v2.1.1 h1:suPZ4ARWLOJLegGFiZZ1dFAkqzhMjL3J1TzI+5wHz8s= github.com/oklog/ulid/v2 v2.1.1/go.mod h1:rcEKHmBBKfef9DhnvX7y1HZBYxjXb0cP5ExxNsTT1QQ= -github.com/onsi/ginkgo/v2 v2.25.2 h1:hepmgwx1D+llZleKQDMEvy8vIlCxMGt7W5ZxDjIEhsw= -github.com/onsi/ginkgo/v2 v2.25.2/go.mod h1:43uiyQC4Ed2tkOzLsEYm7hnrb7UJTWHYNsuy3bG/snE= +github.com/onsi/ginkgo/v2 v2.25.3 h1:Ty8+Yi/ayDAGtk4XxmmfUy4GabvM+MegeB4cDLRi6nw= +github.com/onsi/ginkgo/v2 v2.25.3/go.mod h1:43uiyQC4Ed2tkOzLsEYm7hnrb7UJTWHYNsuy3bG/snE= github.com/onsi/gomega v1.38.2 h1:eZCjf2xjZAqe+LeWvKb5weQ+NcPwX84kqJ0cZNxok2A= github.com/onsi/gomega v1.38.2/go.mod h1:W2MJcYxRGV63b418Ai34Ud0hEdTVXq9NW9+Sx6uXf3k= github.com/openai/openai-go v1.12.0 h1:NBQCnXzqOTv5wsgNC36PrFEiskGfO5wccfCWDo9S1U0= @@ -356,8 +358,8 @@ google.golang.org/genproto/googleapis/rpc v0.0.0-20250707201910-8d1bb00bc6a7 h1: google.golang.org/genproto/googleapis/rpc v0.0.0-20250707201910-8d1bb00bc6a7/go.mod h1:qQ0YXyHHx3XkvlzUtpXDkS29lDSafHMZBAZDc03LQ3A= google.golang.org/grpc v1.75.0 h1:+TW+dqTd2Biwe6KKfhE5JpiYIBWq865PhKGSXiivqt4= google.golang.org/grpc v1.75.0/go.mod h1:JtPAzKiq4v1xcAB2hydNlWI2RnF85XXcV0mhKXr2ecQ= -google.golang.org/protobuf v1.36.7 h1:IgrO7UwFQGJdRNXH/sQux4R1Dj1WAKcLElzeeRaXV2A= -google.golang.org/protobuf v1.36.7/go.mod h1:jduwjTPXsFjZGTmRluh+L6NjiWu7pchiJ2/5YcXBHnY= +google.golang.org/protobuf v1.36.8 h1:xHScyCOEuuwZEc6UtSOvPbAT4zRh0xcNRYekJwfqyMc= +google.golang.org/protobuf v1.36.8/go.mod h1:fuxRtAxBytpl4zzqUh6/eyUujkJdNiuEkXntxiD/uRU= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk= gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q= @@ -394,8 +396,6 @@ sigs.k8s.io/controller-runtime v0.22.0 h1:mTOfibb8Hxwpx3xEkR56i7xSjB+nH4hZG37Srl sigs.k8s.io/controller-runtime v0.22.0/go.mod h1:FwiwRjkRPbiN+zp2QRp7wlTCzbUXxZ/D4OzuQUDwBHY= sigs.k8s.io/gateway-api v1.3.0 h1:q6okN+/UKDATola4JY7zXzx40WO4VISk7i9DIfOvr9M= sigs.k8s.io/gateway-api v1.3.0/go.mod h1:d8NV8nJbaRbEKem+5IuxkL8gJGOZ+FJ+NvOIltV8gDk= -sigs.k8s.io/gateway-api-inference-extension v1.0.0 h1:GsHvlu1Cn1t6+vrHoPdNNlpwKxf/y1HuQSlUjd58Ds8= -sigs.k8s.io/gateway-api-inference-extension v1.0.0/go.mod h1:qxSY10qt2+YnZJ43VfpMXa6wpiENPderI2BnNZ4Kxfc= sigs.k8s.io/json v0.0.0-20241014173422-cfa47c3a1cc8 h1:gBQPwqORJ8d8/YNZWEjoZs7npUVDpVXUUOFfW6CgAqE= sigs.k8s.io/json v0.0.0-20241014173422-cfa47c3a1cc8/go.mod h1:mdzfpAEoE6DHQEN0uh9ZbOCuHbLK5wOm7dK4ctXE9Tg= sigs.k8s.io/randfill v1.0.0 h1:JfjMILfT8A6RbawdsK2JXGBR5AQVfd+9TbzrlneTyrU= diff --git a/pkg/plugins/scorer/active_request.go b/pkg/plugins/scorer/active_request.go index f4018d96..b16532e0 100644 --- a/pkg/plugins/scorer/active_request.go +++ b/pkg/plugins/scorer/active_request.go @@ -128,6 +128,11 @@ func (s *ActiveRequest) WithName(name string) *ActiveRequest { return s } +// Dependencies returns the list of plugin dependencies. +func (s *ActiveRequest) Dependencies() []plugins.TypedName { + return []plugins.TypedName{} // No dependencies +} + // Score scores the given pods based on the number of active requests // being served by each pod. The score is normalized to a range of 0-1. func (s *ActiveRequest) Score(ctx context.Context, _ *types.CycleState, _ *types.LLMRequest, diff --git a/pkg/plugins/scorer/load_aware.go b/pkg/plugins/scorer/load_aware.go index c4f86d0b..78585211 100644 --- a/pkg/plugins/scorer/load_aware.go +++ b/pkg/plugins/scorer/load_aware.go @@ -69,6 +69,11 @@ func (s *LoadAware) WithName(name string) *LoadAware { return s } +// Dependencies returns the list of plugin dependencies. +func (s *LoadAware) Dependencies() []plugins.TypedName { + return []plugins.TypedName{} // No dependencies +} + // Score scores the given pod in range of 0-1 // Currently metrics contains number of requests waiting in the queue, there is no information about number of requests // that can be processed in the given pod immediately. diff --git a/pkg/plugins/scorer/precise_prefix_cache.go b/pkg/plugins/scorer/precise_prefix_cache.go index 636fb288..d3d54ee5 100644 --- a/pkg/plugins/scorer/precise_prefix_cache.go +++ b/pkg/plugins/scorer/precise_prefix_cache.go @@ -112,6 +112,11 @@ func (s *PrecisePrefixCacheScorer) WithName(name string) *PrecisePrefixCacheScor return s } +// Dependencies returns the list of plugin dependencies. +func (s *PrecisePrefixCacheScorer) Dependencies() []plugins.TypedName { + return []plugins.TypedName{} // No dependencies +} + // Score scores the provided pod based on the KVCache index state. // The returned scores are normalized to a range of 0-1. func (s *PrecisePrefixCacheScorer) Score(ctx context.Context, _ *types.CycleState, request *types.LLMRequest, pods []types.Pod) map[types.Pod]float64 { diff --git a/pkg/plugins/scorer/session_affinity.go b/pkg/plugins/scorer/session_affinity.go index a20de574..5e23e730 100644 --- a/pkg/plugins/scorer/session_affinity.go +++ b/pkg/plugins/scorer/session_affinity.go @@ -6,7 +6,7 @@ import ( "encoding/json" "sigs.k8s.io/controller-runtime/pkg/log" - "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/backend" + "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/handlers" "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/plugins" "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/requestcontrol" "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/scheduling/framework" @@ -56,6 +56,11 @@ func (s *SessionAffinity) WithName(name string) *SessionAffinity { return s } +// Dependencies returns the list of plugin dependencies. +func (s *SessionAffinity) Dependencies() []plugins.TypedName { + return []plugins.TypedName{} // No dependencies +} + // Score assign a high score to the pod used in previous requests and zero to others func (s *SessionAffinity) Score(ctx context.Context, _ *types.CycleState, request *types.LLMRequest, pods []types.Pod) map[types.Pod]float64 { scoredPods := make(map[types.Pod]float64) @@ -84,19 +89,19 @@ func (s *SessionAffinity) Score(ctx context.Context, _ *types.CycleState, reques // TODO: this should be using a cookie and ensure not overriding any other // cookie values if present. // Tracked in https://github.com/llm-d/llm-d-inference-scheduler/issues/28 -func (s *SessionAffinity) PostResponse(ctx context.Context, _ *types.LLMRequest, response *requestcontrol.Response, targetPod *backend.Pod) { - if response == nil || targetPod == nil { +func (s *SessionAffinity) PostResponse(ctx context.Context, reqCtx *handlers.RequestContext) { + if reqCtx == nil || reqCtx.Response == nil || reqCtx.TargetPod == nil { reqID := "undefined" - if response != nil { - reqID = response.RequestId + if reqCtx != nil && reqCtx.SchedulingRequest != nil { + reqID = reqCtx.SchedulingRequest.RequestId } - log.FromContext(ctx).V(logutil.DEBUG).Info("Session affinity scorer - skip post response because one of response, targetPod is nil", "req id", reqID) + log.FromContext(ctx).V(logutil.DEBUG).Info("Session affinity scorer - skip post response because one of reqCtx, response, targetPod is nil", "req id", reqID) return } - if response.Headers == nil { // TODO should always be populated? - response.Headers = make(map[string]string) + if reqCtx.Response.Headers == nil { // TODO should always be populated? + reqCtx.Response.Headers = make(map[string]string) } - response.Headers[sessionTokenHeader] = base64.StdEncoding.EncodeToString([]byte(targetPod.NamespacedName.String())) + reqCtx.Response.Headers[sessionTokenHeader] = base64.StdEncoding.EncodeToString([]byte(reqCtx.TargetPod.NamespacedName.String())) }