Added tests for LoRAs

irar2 · irar2 · commit 547d9090c4a5 · 2025-07-13T09:35:05.000+03:00
Signed-off-by: Ira &lt;IRAR@il.ibm.com&gt;
diff --git a/pkg/llm-d-inference-sim/config_test.go b/pkg/llm-d-inference-sim/config_test.go
@@ -176,6 +176,7 @@ var _ = Describe("Simulator configuration", func() {
 	c.InterTokenLatency = 1
 	c.LoraModules = []loraModule{}
 	c.LoraModulesString = []string{}
+	c.Seed = 100100100
 	test = testCase{
 		name:           "config file with command line args with empty string for loras",
 		args:           []string{"cmd", "--config", "../../manifests/config.yaml", "--lora-modules", ""},
@@ -195,6 +196,7 @@ var _ = Describe("Simulator configuration", func() {
 	c.InterTokenLatency = 1
 	c.LoraModules = []loraModule{}
 	c.LoraModulesString = []string{}
+	c.Seed = 100100100
 	test = testCase{
 		name:           "config file with command line args with empty parameter for loras",
 		args:           []string{"cmd", "--config", "../../manifests/config.yaml", "--lora-modules"},
diff --git a/pkg/llm-d-inference-sim/lora_test.go b/pkg/llm-d-inference-sim/lora_test.go
@@ -0,0 +1,131 @@
+/*
+Copyright 2025 The llm-d-inference-sim Authors.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+	http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package llmdinferencesim
+
+import (
+	"context"
+	"encoding/json"
+	"errors"
+	"fmt"
+
+	. "github.com/onsi/ginkgo/v2"
+	. "github.com/onsi/gomega"
+	"github.com/openai/openai-go"
+	"github.com/openai/openai-go/option"
+)
+
+var _ = Describe("LoRAs", func() {
+	Context("LoRAs config and load", func() {
+		It("Should config, load and load LoRAs correctly", func() {
+			ctx := context.TODO()
+			client, err := startServerWithArgs(ctx, "",
+				[]string{"cmd", "--model", model, "--mode", modeEcho,
+					"--lora-modules", "{\"name\":\"lora3\",\"path\":\"/path/to/lora3\"}",
+					"{\"name\":\"lora4\",\"path\":\"/path/to/lora4\"}"})
+			Expect(err).NotTo(HaveOccurred())
+
+			openaiclient := openai.NewClient(
+				option.WithBaseURL(baseURL),
+				option.WithHTTPClient(client))
+
+			// Request to lora3
+			params := openai.ChatCompletionNewParams{
+				Messages: []openai.ChatCompletionMessageParamUnion{
+					openai.UserMessage(userMessage),
+				},
+				Model: "lora3",
+			}
+			resp, err := openaiclient.Chat.Completions.New(ctx, params)
+			Expect(err).ToNot(HaveOccurred())
+
+			Expect(resp.Choices).ShouldNot(BeEmpty())
+			Expect(string(resp.Object)).To(Equal(chatCompletionObject))
+
+			msg := resp.Choices[0].Message.Content
+			Expect(msg).Should(Equal(userMessage))
+
+			// Unknown model, should return 404
+			params = openai.ChatCompletionNewParams{
+				Messages: []openai.ChatCompletionMessageParamUnion{
+					openai.UserMessage(userMessage),
+				},
+				Model: "lora1",
+			}
+			_, err = openaiclient.Chat.Completions.New(ctx, params)
+			Expect(err).To(HaveOccurred())
+			var openaiError *openai.Error
+			ok := errors.As(err, &openaiError)
+			Expect(ok).To(BeTrue())
+			Expect(openaiError.StatusCode).To(Equal(404))
+
+			// Add lora1
+			payload := map[string]string{
+				"lora_name": "lora1",          // Name to register the adapter as
+				"lora_path": "/path/to/lora1", // Local or remote path
+			}
+
+			loraParams, err := json.Marshal(payload)
+			Expect(err).ToNot(HaveOccurred())
+
+			options := option.WithHeader("Content-Type", "application/json")
+			err = openaiclient.Post(ctx, "/load_lora_adapter", loraParams, nil, options)
+			Expect(err).ToNot(HaveOccurred())
+
+			// Request to lora1, should work now
+			params = openai.ChatCompletionNewParams{
+				Messages: []openai.ChatCompletionMessageParamUnion{
+					openai.UserMessage(userMessage),
+				},
+				Model: "lora1",
+			}
+			resp, err = openaiclient.Chat.Completions.New(ctx, params)
+			Expect(err).ToNot(HaveOccurred())
+
+			Expect(resp.Choices).ShouldNot(BeEmpty())
+			Expect(string(resp.Object)).To(Equal(chatCompletionObject))
+
+			msg = resp.Choices[0].Message.Content
+			Expect(msg).Should(Equal(userMessage))
+
+			// Unload lora3
+			payload = map[string]string{
+				"lora_name": "lora3",          // Name to register the adapter as
+				"lora_path": "/path/to/lora3", // Local or remote path
+			}
+
+			loraParams, err = json.Marshal(payload)
+			Expect(err).ToNot(HaveOccurred())
+			options = option.WithHeader("Content-Type", "application/json")
+			err = openaiclient.Post(ctx, "/unload_lora_adapter", loraParams, nil, options)
+			fmt.Println(err)
+			Expect(err).ToNot(HaveOccurred())
+
+			// We should now get an error now
+			params = openai.ChatCompletionNewParams{
+				Messages: []openai.ChatCompletionMessageParamUnion{
+					openai.UserMessage(userMessage),
+				},
+				Model: "lora3",
+			}
+			_, err = openaiclient.Chat.Completions.New(ctx, params)
+			Expect(err).To(HaveOccurred())
+			ok = errors.As(err, &openaiError)
+			Expect(ok).To(BeTrue())
+			Expect(openaiError.StatusCode).To(Equal(404))
+		})
+	})
+})
diff --git a/pkg/llm-d-inference-sim/metrics.go b/pkg/llm-d-inference-sim/metrics.go
@@ -114,6 +114,10 @@ func (s *VllmSimulator) setInitialPrometheusMetrics() {
 
 // reportLoras sets information about loaded LoRA adapters
 func (s *VllmSimulator) reportLoras() {
+	if s.loraInfo == nil {
+		return
+	}
+
 	var loras []string
 
 	s.runningLoras.Range(func(key interface{}, _ interface{}) bool {
diff --git a/pkg/llm-d-inference-sim/simulator.go b/pkg/llm-d-inference-sim/simulator.go
@@ -324,7 +324,7 @@ func (s *VllmSimulator) HandleLoadLora(ctx *fasthttp.RequestCtx) {
 }
 
 func (s *VllmSimulator) HandleUnloadLora(ctx *fasthttp.RequestCtx) {
-	s.logger.Info("load lora request received")
+	s.logger.Info("unload lora request received")
 	s.unloadLora(ctx)
 }
 
@@ -512,7 +512,6 @@ func (s *VllmSimulator) responseSentCallback(model string) {
 	}
 
 	s.reportLoras()
-
 }
 
 // sendCompletionError sends an error response for the current completion request

Original file line number	Diff line number	Diff line change
`@@ -324,7 +324,7 @@ func (s VllmSimulator) HandleLoadLora(ctx fasthttp.RequestCtx) {`
`324`	`324`	`}`
`325`	`325`
`326`	`326`	`func (s VllmSimulator) HandleUnloadLora(ctx fasthttp.RequestCtx) {`
`327`		`- s.logger.Info("load lora request received")`
	`327`	`+ s.logger.Info("unload lora request received")`
`328`	`328`	`s.unloadLora(ctx)`
`329`	`329`	`}`
`330`	`330`
`@@ -512,7 +512,6 @@ func (s *VllmSimulator) responseSentCallback(model string) {`
`512`	`512`	`}`
`513`	`513`
`514`	`514`	`s.reportLoras()`
`515`		`-`
`516`	`515`	`}`
`517`	`516`
`518`	`517`	`// sendCompletionError sends an error response for the current completion request`