From 58e79e50b37868a3534768255e6c2d03bf7756d4 Mon Sep 17 00:00:00 2001 From: Ira Date: Tue, 8 Jul 2025 12:04:11 +0300 Subject: [PATCH 1/2] Don't add empty strings to arg values, support empty flags, tests Signed-off-by: Ira --- pkg/llm-d-inference-sim/config.go | 2 +- pkg/llm-d-inference-sim/config_test.go | 48 +++++++++++++++++++++++--- pkg/llm-d-inference-sim/simulator.go | 20 ++++++----- 3 files changed, 57 insertions(+), 13 deletions(-) diff --git a/pkg/llm-d-inference-sim/config.go b/pkg/llm-d-inference-sim/config.go index 1a1d19cc..4aad824b 100644 --- a/pkg/llm-d-inference-sim/config.go +++ b/pkg/llm-d-inference-sim/config.go @@ -121,7 +121,7 @@ func (c *configuration) validate() error { // Upstream vLLM behaviour: when --served-model-name is not provided, // it falls back to using the value of --model as the single public name // returned by the API and exposed in Prometheus metrics. - if len(c.ServedModelNames) == 0 || c.ServedModelNames[0] == "" { + if len(c.ServedModelNames) == 0 { c.ServedModelNames = []string{c.Model} } diff --git a/pkg/llm-d-inference-sim/config_test.go b/pkg/llm-d-inference-sim/config_test.go index 3699f6b9..76d8b10f 100644 --- a/pkg/llm-d-inference-sim/config_test.go +++ b/pkg/llm-d-inference-sim/config_test.go @@ -124,7 +124,7 @@ var _ = Describe("Simulator configuration", func() { "{\"name\":\"lora3\",\"path\":\"/path/to/lora3\"}", } test = testCase{ - name: "config file with command line args", + name: "config file with command line args with different format", args: []string{"cmd", "--model", model, "--config", "../../manifests/config.yaml", "--port", "8002", "--served-model-name", "--lora-modules={\"name\":\"lora3\",\"path\":\"/path/to/lora3\"}", @@ -148,7 +148,7 @@ var _ = Describe("Simulator configuration", func() { "{\"name\":\"lora3\",\"path\":\"/path/to/lora3\"}", } test = testCase{ - name: "config file with command line args", + name: "config file with command line args with empty string", args: []string{"cmd", "--model", model, "--config", "../../manifests/config.yaml", "--port", "8002", "--served-model-name", "", "--lora-modules", "{\"name\":\"lora3\",\"path\":\"/path/to/lora3\"}", @@ -157,6 +157,44 @@ var _ = Describe("Simulator configuration", func() { } tests = append(tests, test) + // Config from config.yaml file plus command line args with empty string for loras + c = newConfig() + c.Port = 8001 + c.Model = "Qwen/Qwen2-0.5B" + c.ServedModelNames = []string{"model1", "model2"} + c.MaxLoras = 2 + c.MaxCPULoras = 5 + c.MaxNumSeqs = 5 + c.TimeToFirstToken = 2 + c.InterTokenLatency = 1 + c.LoraModules = []loraModule{} + c.LoraModulesString = []string{} + test = testCase{ + name: "config file with command line args with empty string for loras", + args: []string{"cmd", "--config", "../../manifests/config.yaml", "--lora-modules", ""}, + expectedConfig: c, + } + tests = append(tests, test) + + // Config from config.yaml file plus command line args with empty parameter for loras + c = newConfig() + c.Port = 8001 + c.Model = "Qwen/Qwen2-0.5B" + c.ServedModelNames = []string{"model1", "model2"} + c.MaxLoras = 2 + c.MaxCPULoras = 5 + c.MaxNumSeqs = 5 + c.TimeToFirstToken = 2 + c.InterTokenLatency = 1 + c.LoraModules = []loraModule{} + c.LoraModulesString = []string{} + test = testCase{ + name: "config file with command line args with empty parameter for loras", + args: []string{"cmd", "--config", "../../manifests/config.yaml", "--lora-modules"}, + expectedConfig: c, + } + tests = append(tests, test) + // Invalid configurations test = testCase{ name: "invalid model", @@ -200,6 +238,8 @@ var _ = Describe("Simulator configuration", func() { Entry(tests[2].name, tests[2].args, tests[2].expectedConfig), Entry(tests[3].name, tests[3].args, tests[3].expectedConfig), Entry(tests[4].name, tests[4].args, tests[4].expectedConfig), + Entry(tests[5].name, tests[5].args, tests[5].expectedConfig), + Entry(tests[6].name, tests[6].args, tests[6].expectedConfig), ) DescribeTable("invalid configurations", @@ -207,10 +247,10 @@ var _ = Describe("Simulator configuration", func() { _, err := createSimConfig(args) Expect(err).To(HaveOccurred()) }, - Entry(tests[5].name, tests[5].args), - Entry(tests[6].name, tests[6].args), Entry(tests[7].name, tests[7].args), Entry(tests[8].name, tests[8].args), Entry(tests[9].name, tests[9].args), + Entry(tests[10].name, tests[10].args), + Entry(tests[11].name, tests[11].args), ) }) diff --git a/pkg/llm-d-inference-sim/simulator.go b/pkg/llm-d-inference-sim/simulator.go index cfc5a257..6a235147 100644 --- a/pkg/llm-d-inference-sim/simulator.go +++ b/pkg/llm-d-inference-sim/simulator.go @@ -143,7 +143,7 @@ func (s *VllmSimulator) parseCommandParamsAndLoadConfig() error { servedModelNames := getParamValueFromArgs("served-model-name") loraModuleNames := getParamValueFromArgs("lora-modules") - f := pflag.NewFlagSet("llm-d-inference-sim flags", pflag.ExitOnError) + f := pflag.NewFlagSet("llm-d-inference-sim flags", pflag.ContinueOnError) f.IntVar(&config.Port, "port", config.Port, "Port") f.StringVar(&config.Model, "model", config.Model, "Currently 'loaded' model") @@ -156,12 +156,14 @@ func (s *VllmSimulator) parseCommandParamsAndLoadConfig() error { f.IntVar(&config.TimeToFirstToken, "time-to-first-token", config.TimeToFirstToken, "Time to first token (in milliseconds)") // These values were manually parsed above in getParamValueFromArgs, we leave this in order to get these flags in --help - var servedModelNameStrings multiString - f.Var(&servedModelNameStrings, "served-model-name", "Model names exposed by the API (a list of space-separated strings)") - var configFile string - f.StringVar(&configFile, "config", "", "The configuration file") - var loras multiString - f.Var(&loras, "lora-modules", "List of LoRA adapters (a list of space-separated JSON strings)") + var dummyString string + f.StringVar(&dummyString, "config", "", "The configuration file") + var dummyMultiString multiString + f.Var(&dummyMultiString, "served-model-name", "Model names exposed by the API (a list of space-separated strings)") + f.Var(&dummyMultiString, "lora-modules", "List of LoRA adapters (a list of space-separated JSON strings)") + // In order to allow empty arguments, we set a dummy NoOptDefVal for these flags + f.Lookup("served-model-name").NoOptDefVal = "dummy" + f.Lookup("lora-modules").NoOptDefVal = "dummy" flagSet := flag.NewFlagSet("simFlagSet", flag.ExitOnError) klog.InitFlags(flagSet) @@ -205,7 +207,9 @@ func getParamValueFromArgs(param string) []string { if strings.HasPrefix(arg, "--") { break } - values = append(values, arg) + if arg != "" { + values = append(values, arg) + } } else { if arg == "--"+param { readValues = true From 58f24377dd586b80a2cbf547a152f5d320eca6d3 Mon Sep 17 00:00:00 2001 From: Ira Date: Tue, 8 Jul 2025 12:06:30 +0300 Subject: [PATCH 2/2] Fixed lint error Signed-off-by: Ira --- pkg/llm-d-inference-sim/config_test.go | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/pkg/llm-d-inference-sim/config_test.go b/pkg/llm-d-inference-sim/config_test.go index 76d8b10f..a908efcb 100644 --- a/pkg/llm-d-inference-sim/config_test.go +++ b/pkg/llm-d-inference-sim/config_test.go @@ -24,6 +24,8 @@ import ( "k8s.io/klog/v2" ) +const qwenModelName = "Qwen/Qwen2-0.5B" + func createSimConfig(args []string) (*configuration, error) { oldArgs := os.Args defer func() { @@ -65,7 +67,7 @@ var _ = Describe("Simulator configuration", func() { // Config from config.yaml file c = newConfig() c.Port = 8001 - c.Model = "Qwen/Qwen2-0.5B" + c.Model = qwenModelName c.ServedModelNames = []string{"model1", "model2"} c.MaxLoras = 2 c.MaxCPULoras = 5 @@ -160,7 +162,7 @@ var _ = Describe("Simulator configuration", func() { // Config from config.yaml file plus command line args with empty string for loras c = newConfig() c.Port = 8001 - c.Model = "Qwen/Qwen2-0.5B" + c.Model = qwenModelName c.ServedModelNames = []string{"model1", "model2"} c.MaxLoras = 2 c.MaxCPULoras = 5 @@ -179,7 +181,7 @@ var _ = Describe("Simulator configuration", func() { // Config from config.yaml file plus command line args with empty parameter for loras c = newConfig() c.Port = 8001 - c.Model = "Qwen/Qwen2-0.5B" + c.Model = qwenModelName c.ServedModelNames = []string{"model1", "model2"} c.MaxLoras = 2 c.MaxCPULoras = 5