llm-d · irar2 · Jul 13, 2025 · Jul 13, 2025 · Jul 13, 2025 · Jul 13, 2025
diff --git a/pkg/llm-d-inference-sim/config_test.go b/pkg/llm-d-inference-sim/config_test.go
@@ -24,7 +24,10 @@ import (
 	"k8s.io/klog/v2"
 )
 
-const qwenModelName = "Qwen/Qwen2-0.5B"
+const (
+	qwenModelName    = "Qwen/Qwen2-0.5B"
+	seedInConfigFile = 100100100
+)
 
 func createSimConfig(args []string) (*configuration, error) {
 	oldArgs := os.Args
@@ -76,7 +79,7 @@ var _ = Describe("Simulator configuration", func() {
 	c.TimeToFirstToken = 2
 	c.InterTokenLatency = 1
 	c.LoraModules = []loraModule{{Name: "lora1", Path: "/path/to/lora1"}, {Name: "lora2", Path: "/path/to/lora2"}}
-	c.Seed = 100100100
+	c.Seed = seedInConfigFile
 	test = testCase{
 		name:           "config file",
 		args:           []string{"cmd", "--config", "../../manifests/config.yaml"},
@@ -124,7 +127,7 @@ var _ = Describe("Simulator configuration", func() {
 	c.MaxNumSeqs = 5
 	c.TimeToFirstToken = 2
 	c.InterTokenLatency = 1
-	c.Seed = 100100100
+	c.Seed = seedInConfigFile
 	c.LoraModules = []loraModule{{Name: "lora3", Path: "/path/to/lora3"}}
 	c.LoraModulesString = []string{
 		"{\"name\":\"lora3\",\"path\":\"/path/to/lora3\"}",
@@ -149,7 +152,7 @@ var _ = Describe("Simulator configuration", func() {
 	c.MaxNumSeqs = 5
 	c.TimeToFirstToken = 2
 	c.InterTokenLatency = 1
-	c.Seed = 100100100
+	c.Seed = seedInConfigFile
 	c.LoraModules = []loraModule{{Name: "lora3", Path: "/path/to/lora3"}}
 	c.LoraModulesString = []string{
 		"{\"name\":\"lora3\",\"path\":\"/path/to/lora3\"}",
@@ -176,6 +179,7 @@ var _ = Describe("Simulator configuration", func() {
 	c.InterTokenLatency = 1
 	c.LoraModules = []loraModule{}
 	c.LoraModulesString = []string{}
+	c.Seed = seedInConfigFile
 	test = testCase{
 		name:           "config file with command line args with empty string for loras",
 		args:           []string{"cmd", "--config", "../../manifests/config.yaml", "--lora-modules", ""},
@@ -195,6 +199,7 @@ var _ = Describe("Simulator configuration", func() {
 	c.InterTokenLatency = 1
 	c.LoraModules = []loraModule{}
 	c.LoraModulesString = []string{}
+	c.Seed = seedInConfigFile
 	test = testCase{
 		name:           "config file with command line args with empty parameter for loras",
 		args:           []string{"cmd", "--config", "../../manifests/config.yaml", "--lora-modules"},

diff --git a/pkg/llm-d-inference-sim/lora_test.go b/pkg/llm-d-inference-sim/lora_test.go
@@ -0,0 +1,127 @@
+/*
+Copyright 2025 The llm-d-inference-sim Authors.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+	http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package llmdinferencesim
+
+import (
+	"context"
+	"encoding/json"
+	"errors"
+
+	vllmapi "github.com/llm-d/llm-d-inference-sim/pkg/vllm-api"
+	. "github.com/onsi/ginkgo/v2"
+	. "github.com/onsi/gomega"
+	"github.com/openai/openai-go"
+	"github.com/openai/openai-go/option"
+)
+
+var _ = Describe("LoRAs", func() {
+	Context("LoRAs config and load", func() {
+		It("Should config, load and load LoRAs correctly", func() {
+			ctx := context.TODO()
+			client, err := startServerWithArgs(ctx, "",
+				[]string{"cmd", "--model", model, "--mode", modeEcho,
+					"--lora-modules", "{\"name\":\"lora3\",\"path\":\"/path/to/lora3\"}",
+					"{\"name\":\"lora4\",\"path\":\"/path/to/lora4\"}"})
+			Expect(err).NotTo(HaveOccurred())
+
+			openaiclient := openai.NewClient(
+				option.WithBaseURL(baseURL),
+				option.WithHTTPClient(client))
+
+			// Request to lora3
+			params := openai.ChatCompletionNewParams{
+				Messages: []openai.ChatCompletionMessageParamUnion{
+					openai.UserMessage(userMessage),
+				},
+				Model: "lora3",
+			}
+			resp, err := openaiclient.Chat.Completions.New(ctx, params)
+			Expect(err).ToNot(HaveOccurred())
+
+			Expect(resp.Choices).ShouldNot(BeEmpty())
+			Expect(string(resp.Object)).To(Equal(chatCompletionObject))
+
+			msg := resp.Choices[0].Message.Content
+			Expect(msg).Should(Equal(userMessage))
+
+			// Unknown model, should return 404
+			params.Model = "lora1"
+			_, err = openaiclient.Chat.Completions.New(ctx, params)
+			Expect(err).To(HaveOccurred())
+			var openaiError *openai.Error
+			ok := errors.As(err, &openaiError)
+			Expect(ok).To(BeTrue())
+			Expect(openaiError.StatusCode).To(Equal(404))
+
+			// Add lora1
+			payload := map[string]string{
+				"lora_name": "lora1",          // Name to register the adapter as
+				"lora_path": "/path/to/lora1", // Local or remote path
+			}
+
+			loraParams, err := json.Marshal(payload)
+			Expect(err).ToNot(HaveOccurred())
+
+			options := option.WithHeader("Content-Type", "application/json")
+			err = openaiclient.Post(ctx, "/load_lora_adapter", loraParams, nil, options)
+			Expect(err).ToNot(HaveOccurred())
+
+			// Should be four models: base model and three LoRAs
+			var modelsResp vllmapi.ModelsResponse
+			err = openaiclient.Get(ctx, "/models", nil, &modelsResp)
+			Expect(err).ToNot(HaveOccurred())
+			Expect(modelsResp).NotTo(BeNil())
+			Expect(modelsResp.Data).To(HaveLen(4))
+
+			// Request to lora1, should work now
+			resp, err = openaiclient.Chat.Completions.New(ctx, params)
+			Expect(err).ToNot(HaveOccurred())
+
+			Expect(resp.Choices).ShouldNot(BeEmpty())
+			Expect(string(resp.Object)).To(Equal(chatCompletionObject))
+
+			msg = resp.Choices[0].Message.Content
+			Expect(msg).Should(Equal(userMessage))
+
+			// Unload lora3
+			payload = map[string]string{
+				"lora_name": "lora3",          // Name to register the adapter as
+				"lora_path": "/path/to/lora3", // Local or remote path
+			}
+
+			loraParams, err = json.Marshal(payload)
+			Expect(err).ToNot(HaveOccurred())
+			options = option.WithHeader("Content-Type", "application/json")
+			err = openaiclient.Post(ctx, "/unload_lora_adapter", loraParams, nil, options)
+			Expect(err).ToNot(HaveOccurred())
+
+			// We should now get an error now
+			params.Model = "lora3"
+			_, err = openaiclient.Chat.Completions.New(ctx, params)
+			Expect(err).To(HaveOccurred())
+			ok = errors.As(err, &openaiError)
+			Expect(ok).To(BeTrue())
+			Expect(openaiError.StatusCode).To(Equal(404))
+
+			// Should be three models: base model and two LoRAs
+			err = openaiclient.Get(ctx, "/models", nil, &modelsResp)
+			Expect(err).ToNot(HaveOccurred())
+			Expect(modelsResp).NotTo(BeNil())
+			Expect(modelsResp.Data).To(HaveLen(3))
+		})
+	})
+})
diff --git a/pkg/llm-d-inference-sim/metrics.go b/pkg/llm-d-inference-sim/metrics.go
@@ -114,8 +114,12 @@ func (s *VllmSimulator) setInitialPrometheusMetrics() {
 
 // reportLoras sets information about loaded LoRA adapters
 func (s *VllmSimulator) reportLoras() {
-	var loras []string
+	if s.loraInfo == nil {
+		// Happens in the tests
+		return
+	}
 
+	var loras []string
 	s.runningLoras.Range(func(key interface{}, _ interface{}) bool {
 		if lora, ok := key.(string); ok {
 			loras = append(loras, lora)

diff --git a/pkg/llm-d-inference-sim/simulator.go b/pkg/llm-d-inference-sim/simulator.go
@@ -324,7 +324,7 @@ func (s *VllmSimulator) HandleLoadLora(ctx *fasthttp.RequestCtx) {
 }
 
 func (s *VllmSimulator) HandleUnloadLora(ctx *fasthttp.RequestCtx) {
-	s.logger.Info("load lora request received")
+	s.logger.Info("unload lora request received")
 	s.unloadLora(ctx)
 }
 
@@ -512,7 +512,6 @@ func (s *VllmSimulator) responseSentCallback(model string) {
 	}
 
 	s.reportLoras()
-
 }
 
 // sendCompletionError sends an error response for the current completion request