Skip to content

Commit 547d909

Browse files
committed
Added tests for LoRAs
Signed-off-by: Ira <IRAR@il.ibm.com>
1 parent eed4e1d commit 547d909

File tree

4 files changed

+138
-2
lines changed

4 files changed

+138
-2
lines changed

pkg/llm-d-inference-sim/config_test.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -176,6 +176,7 @@ var _ = Describe("Simulator configuration", func() {
176176
c.InterTokenLatency = 1
177177
c.LoraModules = []loraModule{}
178178
c.LoraModulesString = []string{}
179+
c.Seed = 100100100
179180
test = testCase{
180181
name: "config file with command line args with empty string for loras",
181182
args: []string{"cmd", "--config", "../../manifests/config.yaml", "--lora-modules", ""},
@@ -195,6 +196,7 @@ var _ = Describe("Simulator configuration", func() {
195196
c.InterTokenLatency = 1
196197
c.LoraModules = []loraModule{}
197198
c.LoraModulesString = []string{}
199+
c.Seed = 100100100
198200
test = testCase{
199201
name: "config file with command line args with empty parameter for loras",
200202
args: []string{"cmd", "--config", "../../manifests/config.yaml", "--lora-modules"},
Lines changed: 131 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,131 @@
1+
/*
2+
Copyright 2025 The llm-d-inference-sim Authors.
3+
4+
Licensed under the Apache License, Version 2.0 (the "License");
5+
you may not use this file except in compliance with the License.
6+
You may obtain a copy of the License at
7+
8+
http://www.apache.org/licenses/LICENSE-2.0
9+
10+
Unless required by applicable law or agreed to in writing, software
11+
distributed under the License is distributed on an "AS IS" BASIS,
12+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
See the License for the specific language governing permissions and
14+
limitations under the License.
15+
*/
16+
17+
package llmdinferencesim
18+
19+
import (
20+
"context"
21+
"encoding/json"
22+
"errors"
23+
"fmt"
24+
25+
. "github.com/onsi/ginkgo/v2"
26+
. "github.com/onsi/gomega"
27+
"github.com/openai/openai-go"
28+
"github.com/openai/openai-go/option"
29+
)
30+
31+
var _ = Describe("LoRAs", func() {
32+
Context("LoRAs config and load", func() {
33+
It("Should config, load and load LoRAs correctly", func() {
34+
ctx := context.TODO()
35+
client, err := startServerWithArgs(ctx, "",
36+
[]string{"cmd", "--model", model, "--mode", modeEcho,
37+
"--lora-modules", "{\"name\":\"lora3\",\"path\":\"/path/to/lora3\"}",
38+
"{\"name\":\"lora4\",\"path\":\"/path/to/lora4\"}"})
39+
Expect(err).NotTo(HaveOccurred())
40+
41+
openaiclient := openai.NewClient(
42+
option.WithBaseURL(baseURL),
43+
option.WithHTTPClient(client))
44+
45+
// Request to lora3
46+
params := openai.ChatCompletionNewParams{
47+
Messages: []openai.ChatCompletionMessageParamUnion{
48+
openai.UserMessage(userMessage),
49+
},
50+
Model: "lora3",
51+
}
52+
resp, err := openaiclient.Chat.Completions.New(ctx, params)
53+
Expect(err).ToNot(HaveOccurred())
54+
55+
Expect(resp.Choices).ShouldNot(BeEmpty())
56+
Expect(string(resp.Object)).To(Equal(chatCompletionObject))
57+
58+
msg := resp.Choices[0].Message.Content
59+
Expect(msg).Should(Equal(userMessage))
60+
61+
// Unknown model, should return 404
62+
params = openai.ChatCompletionNewParams{
63+
Messages: []openai.ChatCompletionMessageParamUnion{
64+
openai.UserMessage(userMessage),
65+
},
66+
Model: "lora1",
67+
}
68+
_, err = openaiclient.Chat.Completions.New(ctx, params)
69+
Expect(err).To(HaveOccurred())
70+
var openaiError *openai.Error
71+
ok := errors.As(err, &openaiError)
72+
Expect(ok).To(BeTrue())
73+
Expect(openaiError.StatusCode).To(Equal(404))
74+
75+
// Add lora1
76+
payload := map[string]string{
77+
"lora_name": "lora1", // Name to register the adapter as
78+
"lora_path": "/path/to/lora1", // Local or remote path
79+
}
80+
81+
loraParams, err := json.Marshal(payload)
82+
Expect(err).ToNot(HaveOccurred())
83+
84+
options := option.WithHeader("Content-Type", "application/json")
85+
err = openaiclient.Post(ctx, "/load_lora_adapter", loraParams, nil, options)
86+
Expect(err).ToNot(HaveOccurred())
87+
88+
// Request to lora1, should work now
89+
params = openai.ChatCompletionNewParams{
90+
Messages: []openai.ChatCompletionMessageParamUnion{
91+
openai.UserMessage(userMessage),
92+
},
93+
Model: "lora1",
94+
}
95+
resp, err = openaiclient.Chat.Completions.New(ctx, params)
96+
Expect(err).ToNot(HaveOccurred())
97+
98+
Expect(resp.Choices).ShouldNot(BeEmpty())
99+
Expect(string(resp.Object)).To(Equal(chatCompletionObject))
100+
101+
msg = resp.Choices[0].Message.Content
102+
Expect(msg).Should(Equal(userMessage))
103+
104+
// Unload lora3
105+
payload = map[string]string{
106+
"lora_name": "lora3", // Name to register the adapter as
107+
"lora_path": "/path/to/lora3", // Local or remote path
108+
}
109+
110+
loraParams, err = json.Marshal(payload)
111+
Expect(err).ToNot(HaveOccurred())
112+
options = option.WithHeader("Content-Type", "application/json")
113+
err = openaiclient.Post(ctx, "/unload_lora_adapter", loraParams, nil, options)
114+
fmt.Println(err)
115+
Expect(err).ToNot(HaveOccurred())
116+
117+
// We should now get an error now
118+
params = openai.ChatCompletionNewParams{
119+
Messages: []openai.ChatCompletionMessageParamUnion{
120+
openai.UserMessage(userMessage),
121+
},
122+
Model: "lora3",
123+
}
124+
_, err = openaiclient.Chat.Completions.New(ctx, params)
125+
Expect(err).To(HaveOccurred())
126+
ok = errors.As(err, &openaiError)
127+
Expect(ok).To(BeTrue())
128+
Expect(openaiError.StatusCode).To(Equal(404))
129+
})
130+
})
131+
})

pkg/llm-d-inference-sim/metrics.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -114,6 +114,10 @@ func (s *VllmSimulator) setInitialPrometheusMetrics() {
114114

115115
// reportLoras sets information about loaded LoRA adapters
116116
func (s *VllmSimulator) reportLoras() {
117+
if s.loraInfo == nil {
118+
return
119+
}
120+
117121
var loras []string
118122

119123
s.runningLoras.Range(func(key interface{}, _ interface{}) bool {

pkg/llm-d-inference-sim/simulator.go

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -324,7 +324,7 @@ func (s *VllmSimulator) HandleLoadLora(ctx *fasthttp.RequestCtx) {
324324
}
325325

326326
func (s *VllmSimulator) HandleUnloadLora(ctx *fasthttp.RequestCtx) {
327-
s.logger.Info("load lora request received")
327+
s.logger.Info("unload lora request received")
328328
s.unloadLora(ctx)
329329
}
330330

@@ -512,7 +512,6 @@ func (s *VllmSimulator) responseSentCallback(model string) {
512512
}
513513

514514
s.reportLoras()
515-
516515
}
517516

518517
// sendCompletionError sends an error response for the current completion request

0 commit comments

Comments
 (0)