Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 9 additions & 4 deletions pkg/llm-d-inference-sim/config_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,10 @@ import (
"k8s.io/klog/v2"
)

const qwenModelName = "Qwen/Qwen2-0.5B"
const (
qwenModelName = "Qwen/Qwen2-0.5B"
seedInConfigFile = 100100100
)

func createSimConfig(args []string) (*configuration, error) {
oldArgs := os.Args
Expand Down Expand Up @@ -76,7 +79,7 @@ var _ = Describe("Simulator configuration", func() {
c.TimeToFirstToken = 2
c.InterTokenLatency = 1
c.LoraModules = []loraModule{{Name: "lora1", Path: "/path/to/lora1"}, {Name: "lora2", Path: "/path/to/lora2"}}
c.Seed = 100100100
c.Seed = seedInConfigFile
test = testCase{
name: "config file",
args: []string{"cmd", "--config", "../../manifests/config.yaml"},
Expand Down Expand Up @@ -124,7 +127,7 @@ var _ = Describe("Simulator configuration", func() {
c.MaxNumSeqs = 5
c.TimeToFirstToken = 2
c.InterTokenLatency = 1
c.Seed = 100100100
c.Seed = seedInConfigFile
c.LoraModules = []loraModule{{Name: "lora3", Path: "/path/to/lora3"}}
c.LoraModulesString = []string{
"{\"name\":\"lora3\",\"path\":\"/path/to/lora3\"}",
Expand All @@ -149,7 +152,7 @@ var _ = Describe("Simulator configuration", func() {
c.MaxNumSeqs = 5
c.TimeToFirstToken = 2
c.InterTokenLatency = 1
c.Seed = 100100100
c.Seed = seedInConfigFile
c.LoraModules = []loraModule{{Name: "lora3", Path: "/path/to/lora3"}}
c.LoraModulesString = []string{
"{\"name\":\"lora3\",\"path\":\"/path/to/lora3\"}",
Expand All @@ -176,6 +179,7 @@ var _ = Describe("Simulator configuration", func() {
c.InterTokenLatency = 1
c.LoraModules = []loraModule{}
c.LoraModulesString = []string{}
c.Seed = seedInConfigFile
test = testCase{
name: "config file with command line args with empty string for loras",
args: []string{"cmd", "--config", "../../manifests/config.yaml", "--lora-modules", ""},
Expand All @@ -195,6 +199,7 @@ var _ = Describe("Simulator configuration", func() {
c.InterTokenLatency = 1
c.LoraModules = []loraModule{}
c.LoraModulesString = []string{}
c.Seed = seedInConfigFile
test = testCase{
name: "config file with command line args with empty parameter for loras",
args: []string{"cmd", "--config", "../../manifests/config.yaml", "--lora-modules"},
Expand Down
127 changes: 127 additions & 0 deletions pkg/llm-d-inference-sim/lora_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,127 @@
/*
Copyright 2025 The llm-d-inference-sim Authors.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

package llmdinferencesim

import (
"context"
"encoding/json"
"errors"

vllmapi "github.com/llm-d/llm-d-inference-sim/pkg/vllm-api"
. "github.com/onsi/ginkgo/v2"
. "github.com/onsi/gomega"
"github.com/openai/openai-go"
"github.com/openai/openai-go/option"
)

var _ = Describe("LoRAs", func() {
Context("LoRAs config and load", func() {
It("Should config, load and load LoRAs correctly", func() {
ctx := context.TODO()
client, err := startServerWithArgs(ctx, "",
[]string{"cmd", "--model", model, "--mode", modeEcho,
"--lora-modules", "{\"name\":\"lora3\",\"path\":\"/path/to/lora3\"}",
"{\"name\":\"lora4\",\"path\":\"/path/to/lora4\"}"})
Expect(err).NotTo(HaveOccurred())

openaiclient := openai.NewClient(
option.WithBaseURL(baseURL),
option.WithHTTPClient(client))

// Request to lora3
params := openai.ChatCompletionNewParams{
Messages: []openai.ChatCompletionMessageParamUnion{
openai.UserMessage(userMessage),
},
Model: "lora3",
}
resp, err := openaiclient.Chat.Completions.New(ctx, params)
Expect(err).ToNot(HaveOccurred())

Expect(resp.Choices).ShouldNot(BeEmpty())
Expect(string(resp.Object)).To(Equal(chatCompletionObject))

msg := resp.Choices[0].Message.Content
Expect(msg).Should(Equal(userMessage))

// Unknown model, should return 404
params.Model = "lora1"
_, err = openaiclient.Chat.Completions.New(ctx, params)
Expect(err).To(HaveOccurred())
var openaiError *openai.Error
ok := errors.As(err, &openaiError)
Expect(ok).To(BeTrue())
Expect(openaiError.StatusCode).To(Equal(404))

// Add lora1
payload := map[string]string{
"lora_name": "lora1", // Name to register the adapter as
"lora_path": "/path/to/lora1", // Local or remote path
}

loraParams, err := json.Marshal(payload)
Expect(err).ToNot(HaveOccurred())

options := option.WithHeader("Content-Type", "application/json")
err = openaiclient.Post(ctx, "/load_lora_adapter", loraParams, nil, options)
Expect(err).ToNot(HaveOccurred())

// Should be four models: base model and three LoRAs
var modelsResp vllmapi.ModelsResponse
err = openaiclient.Get(ctx, "/models", nil, &modelsResp)
Expect(err).ToNot(HaveOccurred())
Expect(modelsResp).NotTo(BeNil())
Expect(modelsResp.Data).To(HaveLen(4))

// Request to lora1, should work now
resp, err = openaiclient.Chat.Completions.New(ctx, params)
Expect(err).ToNot(HaveOccurred())

Expect(resp.Choices).ShouldNot(BeEmpty())
Expect(string(resp.Object)).To(Equal(chatCompletionObject))

msg = resp.Choices[0].Message.Content
Expect(msg).Should(Equal(userMessage))

// Unload lora3
payload = map[string]string{
"lora_name": "lora3", // Name to register the adapter as
"lora_path": "/path/to/lora3", // Local or remote path
}

loraParams, err = json.Marshal(payload)
Expect(err).ToNot(HaveOccurred())
options = option.WithHeader("Content-Type", "application/json")
err = openaiclient.Post(ctx, "/unload_lora_adapter", loraParams, nil, options)
Expect(err).ToNot(HaveOccurred())

// We should now get an error now
params.Model = "lora3"
_, err = openaiclient.Chat.Completions.New(ctx, params)
Expect(err).To(HaveOccurred())
ok = errors.As(err, &openaiError)
Expect(ok).To(BeTrue())
Expect(openaiError.StatusCode).To(Equal(404))

// Should be three models: base model and two LoRAs
err = openaiclient.Get(ctx, "/models", nil, &modelsResp)
Expect(err).ToNot(HaveOccurred())
Expect(modelsResp).NotTo(BeNil())
Expect(modelsResp.Data).To(HaveLen(3))
})
})
})
6 changes: 5 additions & 1 deletion pkg/llm-d-inference-sim/metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -114,8 +114,12 @@ func (s *VllmSimulator) setInitialPrometheusMetrics() {

// reportLoras sets information about loaded LoRA adapters
func (s *VllmSimulator) reportLoras() {
var loras []string
if s.loraInfo == nil {
// Happens in the tests
return
}

var loras []string
s.runningLoras.Range(func(key interface{}, _ interface{}) bool {
if lora, ok := key.(string); ok {
loras = append(loras, lora)
Expand Down
3 changes: 1 addition & 2 deletions pkg/llm-d-inference-sim/simulator.go
Original file line number Diff line number Diff line change
Expand Up @@ -324,7 +324,7 @@ func (s *VllmSimulator) HandleLoadLora(ctx *fasthttp.RequestCtx) {
}

func (s *VllmSimulator) HandleUnloadLora(ctx *fasthttp.RequestCtx) {
s.logger.Info("load lora request received")
s.logger.Info("unload lora request received")
s.unloadLora(ctx)
}

Expand Down Expand Up @@ -512,7 +512,6 @@ func (s *VllmSimulator) responseSentCallback(model string) {
}

s.reportLoras()

}

// sendCompletionError sends an error response for the current completion request
Expand Down