Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion pkg/llm-d-inference-sim/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -121,7 +121,7 @@ func (c *configuration) validate() error {
// Upstream vLLM behaviour: when --served-model-name is not provided,
// it falls back to using the value of --model as the single public name
// returned by the API and exposed in Prometheus metrics.
if len(c.ServedModelNames) == 0 || c.ServedModelNames[0] == "" {
if len(c.ServedModelNames) == 0 {
c.ServedModelNames = []string{c.Model}
}

Expand Down
52 changes: 47 additions & 5 deletions pkg/llm-d-inference-sim/config_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,8 @@ import (
"k8s.io/klog/v2"
)

const qwenModelName = "Qwen/Qwen2-0.5B"

func createSimConfig(args []string) (*configuration, error) {
oldArgs := os.Args
defer func() {
Expand Down Expand Up @@ -65,7 +67,7 @@ var _ = Describe("Simulator configuration", func() {
// Config from config.yaml file
c = newConfig()
c.Port = 8001
c.Model = "Qwen/Qwen2-0.5B"
c.Model = qwenModelName
c.ServedModelNames = []string{"model1", "model2"}
c.MaxLoras = 2
c.MaxCPULoras = 5
Expand Down Expand Up @@ -124,7 +126,7 @@ var _ = Describe("Simulator configuration", func() {
"{\"name\":\"lora3\",\"path\":\"/path/to/lora3\"}",
}
test = testCase{
name: "config file with command line args",
name: "config file with command line args with different format",
args: []string{"cmd", "--model", model, "--config", "../../manifests/config.yaml", "--port", "8002",
"--served-model-name",
"--lora-modules={\"name\":\"lora3\",\"path\":\"/path/to/lora3\"}",
Expand All @@ -148,7 +150,7 @@ var _ = Describe("Simulator configuration", func() {
"{\"name\":\"lora3\",\"path\":\"/path/to/lora3\"}",
}
test = testCase{
name: "config file with command line args",
name: "config file with command line args with empty string",
args: []string{"cmd", "--model", model, "--config", "../../manifests/config.yaml", "--port", "8002",
"--served-model-name", "",
"--lora-modules", "{\"name\":\"lora3\",\"path\":\"/path/to/lora3\"}",
Expand All @@ -157,6 +159,44 @@ var _ = Describe("Simulator configuration", func() {
}
tests = append(tests, test)

// Config from config.yaml file plus command line args with empty string for loras
c = newConfig()
c.Port = 8001
c.Model = qwenModelName
c.ServedModelNames = []string{"model1", "model2"}
c.MaxLoras = 2
c.MaxCPULoras = 5
c.MaxNumSeqs = 5
c.TimeToFirstToken = 2
c.InterTokenLatency = 1
c.LoraModules = []loraModule{}
c.LoraModulesString = []string{}
test = testCase{
name: "config file with command line args with empty string for loras",
args: []string{"cmd", "--config", "../../manifests/config.yaml", "--lora-modules", ""},
expectedConfig: c,
}
tests = append(tests, test)

// Config from config.yaml file plus command line args with empty parameter for loras
c = newConfig()
c.Port = 8001
c.Model = qwenModelName
c.ServedModelNames = []string{"model1", "model2"}
c.MaxLoras = 2
c.MaxCPULoras = 5
c.MaxNumSeqs = 5
c.TimeToFirstToken = 2
c.InterTokenLatency = 1
c.LoraModules = []loraModule{}
c.LoraModulesString = []string{}
test = testCase{
name: "config file with command line args with empty parameter for loras",
args: []string{"cmd", "--config", "../../manifests/config.yaml", "--lora-modules"},
expectedConfig: c,
}
tests = append(tests, test)

// Invalid configurations
test = testCase{
name: "invalid model",
Expand Down Expand Up @@ -200,17 +240,19 @@ var _ = Describe("Simulator configuration", func() {
Entry(tests[2].name, tests[2].args, tests[2].expectedConfig),
Entry(tests[3].name, tests[3].args, tests[3].expectedConfig),
Entry(tests[4].name, tests[4].args, tests[4].expectedConfig),
Entry(tests[5].name, tests[5].args, tests[5].expectedConfig),
Entry(tests[6].name, tests[6].args, tests[6].expectedConfig),
)

DescribeTable("invalid configurations",
func(args []string) {
_, err := createSimConfig(args)
Expect(err).To(HaveOccurred())
},
Entry(tests[5].name, tests[5].args),
Entry(tests[6].name, tests[6].args),
Entry(tests[7].name, tests[7].args),
Entry(tests[8].name, tests[8].args),
Entry(tests[9].name, tests[9].args),
Entry(tests[10].name, tests[10].args),
Entry(tests[11].name, tests[11].args),
)
})
20 changes: 12 additions & 8 deletions pkg/llm-d-inference-sim/simulator.go
Original file line number Diff line number Diff line change
Expand Up @@ -143,7 +143,7 @@ func (s *VllmSimulator) parseCommandParamsAndLoadConfig() error {
servedModelNames := getParamValueFromArgs("served-model-name")
loraModuleNames := getParamValueFromArgs("lora-modules")

f := pflag.NewFlagSet("llm-d-inference-sim flags", pflag.ExitOnError)
f := pflag.NewFlagSet("llm-d-inference-sim flags", pflag.ContinueOnError)

f.IntVar(&config.Port, "port", config.Port, "Port")
f.StringVar(&config.Model, "model", config.Model, "Currently 'loaded' model")
Expand All @@ -156,12 +156,14 @@ func (s *VllmSimulator) parseCommandParamsAndLoadConfig() error {
f.IntVar(&config.TimeToFirstToken, "time-to-first-token", config.TimeToFirstToken, "Time to first token (in milliseconds)")

// These values were manually parsed above in getParamValueFromArgs, we leave this in order to get these flags in --help
var servedModelNameStrings multiString
f.Var(&servedModelNameStrings, "served-model-name", "Model names exposed by the API (a list of space-separated strings)")
var configFile string
f.StringVar(&configFile, "config", "", "The configuration file")
var loras multiString
f.Var(&loras, "lora-modules", "List of LoRA adapters (a list of space-separated JSON strings)")
var dummyString string
f.StringVar(&dummyString, "config", "", "The configuration file")
var dummyMultiString multiString
f.Var(&dummyMultiString, "served-model-name", "Model names exposed by the API (a list of space-separated strings)")
f.Var(&dummyMultiString, "lora-modules", "List of LoRA adapters (a list of space-separated JSON strings)")
// In order to allow empty arguments, we set a dummy NoOptDefVal for these flags
f.Lookup("served-model-name").NoOptDefVal = "dummy"
f.Lookup("lora-modules").NoOptDefVal = "dummy"

flagSet := flag.NewFlagSet("simFlagSet", flag.ExitOnError)
klog.InitFlags(flagSet)
Expand Down Expand Up @@ -205,7 +207,9 @@ func getParamValueFromArgs(param string) []string {
if strings.HasPrefix(arg, "--") {
break
}
values = append(values, arg)
if arg != "" {
values = append(values, arg)
}
} else {
if arg == "--"+param {
readValues = true
Expand Down