From 855b3c168aed25c0db6e76d7b931bd64508bf909 Mon Sep 17 00:00:00 2001 From: Brian Fox Date: Mon, 24 Nov 2025 11:23:18 +0100 Subject: [PATCH 1/3] feat(controller): support configuration using env variables instead of args Signed-off-by: Brian Fox <878612+onematchfox@users.noreply.github.com> --- go/pkg/app/app.go | 37 ++++++- go/pkg/app/app_test.go | 219 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 251 insertions(+), 5 deletions(-) diff --git a/go/pkg/app/app.go b/go/pkg/app/app.go index 1a7b7ce69..1ee1e65e1 100644 --- a/go/pkg/app/app.go +++ b/go/pkg/app/app.go @@ -20,6 +20,7 @@ import ( "context" "crypto/tls" "flag" + "fmt" "net/http" "net/http/pprof" "os" @@ -29,6 +30,7 @@ import ( "github.com/gorilla/mux" + "github.com/hashicorp/go-multierror" "github.com/kagent-dev/kagent/go/internal/version" "k8s.io/apimachinery/pkg/api/resource" @@ -147,6 +149,30 @@ func (cfg *Config) SetFlags(commandLine *flag.FlagSet) { commandLine.Var(&cfg.Streaming.MaxBufSize, "streaming-max-buf-size", "The maximum size of the streaming buffer.") commandLine.Var(&cfg.Streaming.InitialBufSize, "streaming-initial-buf-size", "The initial size of the streaming buffer.") commandLine.DurationVar(&cfg.Streaming.Timeout, "streaming-timeout", 60*time.Second, "The timeout for the streaming connection.") + + commandLine.StringVar(&agent_translator.DefaultImageConfig.Registry, "image-registry", agent_translator.DefaultImageConfig.Registry, "The registry to use for the image.") + commandLine.StringVar(&agent_translator.DefaultImageConfig.Tag, "image-tag", agent_translator.DefaultImageConfig.Tag, "The tag to use for the image.") + commandLine.StringVar(&agent_translator.DefaultImageConfig.PullPolicy, "image-pull-policy", agent_translator.DefaultImageConfig.PullPolicy, "The pull policy to use for the image.") + commandLine.StringVar(&agent_translator.DefaultImageConfig.PullSecret, "image-pull-secret", "", "The pull secret name for the agent image.") + commandLine.StringVar(&agent_translator.DefaultImageConfig.Repository, "image-repository", agent_translator.DefaultImageConfig.Repository, "The repository to use for the agent image.") +} + +// LoadFromEnv loads configuration values from environment variables. +// Flag names are converted to uppercase with underscores (e.g., metrics-bind-address -> METRICS_BIND_ADDRESS). +func LoadFromEnv(fs *flag.FlagSet) error { + var loadErr error + + fs.VisitAll(func(f *flag.Flag) { + envName := strings.ToUpper(strings.ReplaceAll(f.Name, "-", "_")) + + if envVal := os.Getenv(envName); envVal != "" { + if err := f.Value.Set(envVal); err != nil { + loadErr = multierror.Append(loadErr, fmt.Errorf("failed to set flag %s from env %s=%s: %w", f.Name, envName, envVal, err)) + } + } + }) + + return loadErr } type BootstrapConfig struct { @@ -174,16 +200,17 @@ func Start(getExtensionConfig GetExtensionConfig) { ctx := context.Background() cfg.SetFlags(flag.CommandLine) - flag.StringVar(&agent_translator.DefaultImageConfig.Registry, "image-registry", agent_translator.DefaultImageConfig.Registry, "The registry to use for the image.") - flag.StringVar(&agent_translator.DefaultImageConfig.Tag, "image-tag", agent_translator.DefaultImageConfig.Tag, "The tag to use for the image.") - flag.StringVar(&agent_translator.DefaultImageConfig.PullPolicy, "image-pull-policy", agent_translator.DefaultImageConfig.PullPolicy, "The pull policy to use for the image.") - flag.StringVar(&agent_translator.DefaultImageConfig.PullSecret, "image-pull-secret", "", "The pull secret name for the agent image.") - flag.StringVar(&agent_translator.DefaultImageConfig.Repository, "image-repository", agent_translator.DefaultImageConfig.Repository, "The repository to use for the agent image.") opts := zap.Options{} opts.BindFlags(flag.CommandLine) flag.Parse() + // Load configuration from environment variables (overrides flags) + if err := LoadFromEnv(flag.CommandLine); err != nil { + setupLog.Error(err, "failed to load configuration from environment variables") + os.Exit(1) + } + logger := zap.New(zap.UseFlagOptions(&opts)) ctrl.SetLogger(logger) diff --git a/go/pkg/app/app_test.go b/go/pkg/app/app_test.go index d18991178..a75b06a58 100644 --- a/go/pkg/app/app_test.go +++ b/go/pkg/app/app_test.go @@ -1,10 +1,13 @@ package app import ( + "flag" "strings" "testing" + "time" "github.com/stretchr/testify/assert" + "k8s.io/apimachinery/pkg/api/resource" ) func TestFilterValidNamespaces(t *testing.T) { @@ -112,3 +115,219 @@ func TestConfigureNamespaceWatching(t *testing.T) { }) } } + +func TestLoadFromEnv(t *testing.T) { + tests := []struct { + name string + envVars map[string]string + flagName string + flagDefault string + wantValue string + }{ + { + name: "string flag with hyphen", + envVars: map[string]string{ + "METRICS_BIND_ADDRESS": ":9090", + }, + flagName: "metrics-bind-address", + flagDefault: ":8080", + wantValue: ":9090", + }, + { + name: "flag without env var uses default", + envVars: map[string]string{ + "OTHER_FLAG": "value", + }, + flagName: "test-flag", + flagDefault: "default", + wantValue: "default", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + // Set environment variables + for k, v := range tt.envVars { + t.Setenv(k, v) + } + + // Create a new flag set for testing + fs := flag.NewFlagSet("test", flag.ContinueOnError) + var testVar string + fs.StringVar(&testVar, tt.flagName, tt.flagDefault, "test flag") + + // Load from environment + if err := LoadFromEnv(fs); err != nil { + t.Fatalf("LoadFromEnv() error = %v", err) + } + + // Check the value + if testVar != tt.wantValue { + t.Errorf("flag value = %v, want %v", testVar, tt.wantValue) + } + }) + } +} + +func TestLoadFromEnvBoolFlags(t *testing.T) { + tests := []struct { + name string + envValue string + wantValue bool + wantErr bool + }{ + { + name: "true value", + envValue: "true", + wantValue: true, + wantErr: false, + }, + { + name: "false value", + envValue: "false", + wantValue: false, + wantErr: false, + }, + { + name: "1 value", + envValue: "1", + wantValue: true, + wantErr: false, + }, + { + name: "0 value", + envValue: "0", + wantValue: false, + wantErr: false, + }, + { + name: "invalid value", + envValue: "invalid", + wantValue: false, + wantErr: true, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + envName := "TEST_BOOL" + t.Setenv(envName, tt.envValue) + + fs := flag.NewFlagSet("test", flag.ContinueOnError) + var testVar bool + fs.BoolVar(&testVar, "test-bool", false, "test bool flag") + + err := LoadFromEnv(fs) + if (err != nil) != tt.wantErr { + t.Errorf("LoadFromEnv() error = %v, wantErr %v", err, tt.wantErr) + return + } + + if !tt.wantErr && testVar != tt.wantValue { + t.Errorf("flag value = %v, want %v", testVar, tt.wantValue) + } + }) + } +} + +func TestLoadFromEnvDurationFlags(t *testing.T) { + envName := "TEST_DURATION" + t.Setenv(envName, "5m") + + fs := flag.NewFlagSet("test", flag.ContinueOnError) + var testVar time.Duration + fs.DurationVar(&testVar, "test-duration", 1*time.Second, "test duration flag") + + if err := LoadFromEnv(fs); err != nil { + t.Fatalf("LoadFromEnv() error = %v", err) + } + + wantValue := 5 * time.Minute + if testVar != wantValue { + t.Errorf("flag value = %v, want %v", testVar, wantValue) + } +} + +func TestLoadFromEnvIntegration(t *testing.T) { + envVars := map[string]string{ + "METRICS_BIND_ADDRESS": ":9090", + "HEALTH_PROBE_BIND_ADDRESS": ":8081", + "LEADER_ELECT": "true", + "METRICS_SECURE": "false", + "ENABLE_HTTP2": "true", + "DEFAULT_MODEL_CONFIG_NAME": "custom-model", + "DEFAULT_MODEL_CONFIG_NAMESPACE": "custom-ns", + "HTTP_SERVER_ADDRESS": ":9000", + "A2A_BASE_URL": "http://example.com:9000", + "DATABASE_TYPE": "postgres", + "POSTGRES_DATABASE_URL": "postgres://localhost:5432/testdb", + "WATCH_NAMESPACES": "ns1,ns2,ns3", + "STREAMING_TIMEOUT": "120s", + "STREAMING_MAX_BUF_SIZE": "2Mi", + "STREAMING_INITIAL_BUF_SIZE": "8Ki", + } + + for k, v := range envVars { + t.Setenv(k, v) + } + + fs := flag.NewFlagSet("test", flag.ContinueOnError) + cfg := Config{} + cfg.SetFlags(fs) // Sets flags and defaults + + if err := LoadFromEnv(fs); err != nil { + t.Fatalf("LoadFromEnv() error = %v", err) + } + + // Verify values - env vars should override default flags + if cfg.Metrics.Addr != ":9090" { + t.Errorf("Metrics.Addr = %v, want :9090", cfg.Metrics.Addr) + } + if cfg.ProbeAddr != ":8081" { + t.Errorf("ProbeAddr = %v, want :8081", cfg.ProbeAddr) + } + if !cfg.LeaderElection { + t.Errorf("LeaderElection = false, want true") + } + if cfg.SecureMetrics { + t.Errorf("SecureMetrics = true, want false") + } + if !cfg.EnableHTTP2 { + t.Errorf("EnableHTTP2 = false, want true") + } + if cfg.DefaultModelConfig.Name != "custom-model" { + t.Errorf("DefaultModelConfig.Name = %v, want custom-model", cfg.DefaultModelConfig.Name) + } + if cfg.DefaultModelConfig.Namespace != "custom-ns" { + t.Errorf("DefaultModelConfig.Namespace = %v, want custom-ns", cfg.DefaultModelConfig.Namespace) + } + if cfg.HttpServerAddr != ":9000" { + t.Errorf("HttpServerAddr = %v, want :9000", cfg.HttpServerAddr) + } + if cfg.A2ABaseUrl != "http://example.com:9000" { + t.Errorf("A2ABaseUrl = %v, want http://example.com:9000", cfg.A2ABaseUrl) + } + if cfg.Database.Type != "postgres" { + t.Errorf("Database.Type = %v, want postgres", cfg.Database.Type) + } + if cfg.Database.Url != "postgres://localhost:5432/testdb" { + t.Errorf("Database.Url = %v, want postgres://localhost:5432/testdb", cfg.Database.Url) + } + if cfg.WatchNamespaces != "ns1,ns2,ns3" { + t.Errorf("WatchNamespaces = %v, want ns1,ns2,ns3", cfg.WatchNamespaces) + } + if cfg.Streaming.Timeout != 120*time.Second { + t.Errorf("Streaming.Timeout = %v, want 120s", cfg.Streaming.Timeout) + } + + // Check quantity values + expectedMaxBuf := resource.MustParse("2Mi") + if cfg.Streaming.MaxBufSize.Cmp(expectedMaxBuf) != 0 { + t.Errorf("Streaming.MaxBufSize = %v, want 2Mi", cfg.Streaming.MaxBufSize) + } + + expectedInitBuf := resource.MustParse("8Ki") + if cfg.Streaming.InitialBufSize.Cmp(expectedInitBuf) != 0 { + t.Errorf("Streaming.InitialBufSize = %v, want 8Ki", cfg.Streaming.InitialBufSize) + } +} From fdedb892f7f17a65278ea2aafa21b31b3cd59172 Mon Sep 17 00:00:00 2001 From: Brian Fox Date: Mon, 24 Nov 2025 11:52:17 +0100 Subject: [PATCH 2/3] feat(helm): migrate all args to env var supplied via configmap Signed-off-by: Brian Fox <878612+onematchfox@users.noreply.github.com> --- .../templates/controller-configmap.yaml | 36 ++++++++++ .../templates/controller-deployment.yaml | 66 +++---------------- .../tests/controller-deployment_test.yaml | 40 +++++------ 3 files changed, 62 insertions(+), 80 deletions(-) create mode 100644 helm/kagent/templates/controller-configmap.yaml diff --git a/helm/kagent/templates/controller-configmap.yaml b/helm/kagent/templates/controller-configmap.yaml new file mode 100644 index 000000000..792b58c01 --- /dev/null +++ b/helm/kagent/templates/controller-configmap.yaml @@ -0,0 +1,36 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + name: {{ include "kagent.fullname" . }}-controller + namespace: {{ include "kagent.namespace" . }} + labels: + {{- include "kagent.controller.labels" . | nindent 4 }} +data: + DATABASE_TYPE: {{ .Values.database.type | quote }} + DEFAULT_MODEL_CONFIG_NAME: {{ include "kagent.defaultModelConfigName" . | quote }} + IMAGE_PULL_POLICY: {{ .Values.controller.agentImage.pullPolicy | default .Values.imagePullPolicy | quote }} + {{- if and .Values.controller.agentImage.pullSecret (not (eq .Values.controller.agentImage.pullSecret "")) }} + IMAGE_PULL_SECRET: {{ .Values.controller.agentImage.pullSecret | quote }} + {{- end }} + IMAGE_REGISTRY: {{ .Values.controller.agentImage.registry | default .Values.registry | quote }} + IMAGE_REPOSITORY: {{ .Values.controller.agentImage.repository | quote }} + IMAGE_TAG: {{ coalesce .Values.controller.agentImage.tag .Values.tag .Chart.Version | quote }} + OTEL_EXPORTER_OTLP_ENDPOINT: {{ .Values.otel.tracing.exporter.otlp.endpoint | quote }} + OTEL_EXPORTER_OTLP_LOGS_ENDPOINT: {{ .Values.otel.logging.exporter.otlp.endpoint | quote }} + OTEL_EXPORTER_OTLP_LOGS_INSECURE: {{ .Values.otel.logging.exporter.otlp.insecure | quote }} + OTEL_EXPORTER_OTLP_LOGS_TIMEOUT: {{ .Values.otel.logging.exporter.otlp.timeout | quote }} + OTEL_EXPORTER_OTLP_TRACES_INSECURE: {{ .Values.otel.tracing.exporter.otlp.insecure | quote }} + OTEL_EXPORTER_OTLP_TRACES_TIMEOUT: {{ .Values.otel.tracing.exporter.otlp.timeout | quote }} + OTEL_LOGGING_ENABLED: {{ .Values.otel.logging.enabled | quote }} + OTEL_TRACING_ENABLED: {{ .Values.otel.tracing.enabled | quote }} + OTEL_TRACING_EXPORTER_OTLP_ENDPOINT: {{ .Values.otel.tracing.exporter.otlp.endpoint | quote }} + {{- if eq .Values.database.type "sqlite" }} + SQLITE_DATABASE_PATH: /sqlite-volume/{{ .Values.database.sqlite.databaseName }} + {{- else if and (eq .Values.database.type "postgres") (not (eq .Values.database.postgres.url "")) }} + POSTGRES_DATABASE_URL: {{ .Values.database.postgres.url | quote }} + {{- end }} + STREAMING_INITIAL_BUF_SIZE: {{ .Values.controller.streaming.initialBufSize | quote }} + STREAMING_MAX_BUF_SIZE: {{ .Values.controller.streaming.maxBufSize | quote }} + STREAMING_TIMEOUT: {{ .Values.controller.streaming.timeout | quote }} + WATCH_NAMESPACES: {{ include "kagent.watchNamespaces" . | quote }} + ZAP_LOG_LEVEL: {{ .Values.controller.loglevel | quote }} diff --git a/helm/kagent/templates/controller-deployment.yaml b/helm/kagent/templates/controller-deployment.yaml index 2044c332f..65e2521ec 100644 --- a/helm/kagent/templates/controller-deployment.yaml +++ b/helm/kagent/templates/controller-deployment.yaml @@ -13,6 +13,7 @@ spec: template: metadata: annotations: + checksum/configmap: {{ include (print $.Template.BasePath "/controller-configmap.yaml") . | sha256sum }} {{- with .Values.controller.podAnnotations }} {{- toYaml . | nindent 8 }} {{- end }} @@ -44,78 +45,27 @@ spec: {{- end }} containers: - name: controller - args: - # Consider using env vars (stored in a dedicated ConfigMap(s)) rather than this - {{/* #we need to pass the default model config name to the app otherwise helm upgrade will not allow provider type change due to validations */}} - - -default-model-config-name - - {{ include "kagent.defaultModelConfigName" . | quote }} - - -zap-log-level - - {{ .Values.controller.loglevel }} - - -watch-namespaces - - "{{ include "kagent.watchNamespaces" . }}" - - -streaming-max-buf-size - - {{ .Values.controller.streaming.maxBufSize | quote }} - - -streaming-initial-buf-size - - {{ .Values.controller.streaming.initialBufSize | quote }} - - -streaming-timeout - - {{ .Values.controller.streaming.timeout | quote }} - - -database-type - - {{ .Values.database.type }} - {{- if eq .Values.database.type "sqlite" }} - - -sqlite-database-path - - /sqlite-volume/{{ .Values.database.sqlite.databaseName }} - {{- else if eq .Values.database.type "postgres" }} - - -postgres-database-url - - {{ .Values.database.postgres.url }} - {{- end }} - - -image-registry - - {{ .Values.controller.agentImage.registry | default .Values.registry }} - - -image-repository - - {{ .Values.controller.agentImage.repository }} - - -image-tag - - {{ coalesce .Values.controller.agentImage.tag .Values.tag .Chart.Version }} - - -image-pull-policy - - {{ .Values.controller.agentImage.pullPolicy | default .Values.imagePullPolicy }} - {{- if and .Values.controller.agentImage.pullSecret (not (eq .Values.controller.agentImage.pullSecret "")) }} - - -image-pull-secret - - {{ .Values.controller.agentImage.pullSecret | default "" }} - {{- end }} - securityContext: - {{- toYaml .Values.controller.securityContext | nindent 12 }} image: "{{ .Values.controller.image.registry | default .Values.registry }}/{{ .Values.controller.image.repository }}:{{ coalesce .Values.tag .Values.controller.image.tag .Chart.Version }}" imagePullPolicy: {{ .Values.controller.image.pullPolicy | default .Values.imagePullPolicy }} - resources: - {{- toYaml .Values.controller.resources | nindent 12 }} env: - name: KAGENT_NAMESPACE valueFrom: fieldRef: fieldPath: metadata.namespace - - name: OTEL_TRACING_ENABLED - value: {{ .Values.otel.tracing.enabled | quote }} - - name: OTEL_EXPORTER_OTLP_ENDPOINT - value: {{ .Values.otel.tracing.exporter.otlp.endpoint | quote }} - - name: OTEL_TRACING_EXPORTER_OTLP_ENDPOINT - value: {{ .Values.otel.tracing.exporter.otlp.endpoint | quote }} - - name: OTEL_EXPORTER_OTLP_TRACES_TIMEOUT - value: {{ .Values.otel.tracing.exporter.otlp.timeout | quote }} - - name: OTEL_EXPORTER_OTLP_TRACES_INSECURE - value: {{ .Values.otel.tracing.exporter.otlp.insecure | quote }} - - name: OTEL_LOGGING_ENABLED - value: {{ .Values.otel.logging.enabled | quote }} - - name: OTEL_EXPORTER_OTLP_LOGS_ENDPOINT - value: {{ .Values.otel.logging.exporter.otlp.endpoint | quote }} - - name: OTEL_EXPORTER_OTLP_LOGS_TIMEOUT - value: {{ .Values.otel.logging.exporter.otlp.timeout | quote }} - - name: OTEL_EXPORTER_OTLP_LOGS_INSECURE - value: {{ .Values.otel.logging.exporter.otlp.insecure | quote }} {{- with .Values.controller.env }} {{- toYaml . | nindent 12 }} {{- end }} + envFrom: + - configMapRef: + name: {{ include "kagent.fullname" . }}-controller ports: - name: http containerPort: {{ .Values.controller.service.ports.targetPort }} protocol: TCP + resources: + {{- toYaml .Values.controller.resources | nindent 12 }} + securityContext: + {{- toYaml .Values.controller.securityContext | nindent 12 }} startupProbe: httpGet: path: /health diff --git a/helm/kagent/tests/controller-deployment_test.yaml b/helm/kagent/tests/controller-deployment_test.yaml index f62fdef9f..ec2d09aae 100644 --- a/helm/kagent/tests/controller-deployment_test.yaml +++ b/helm/kagent/tests/controller-deployment_test.yaml @@ -1,6 +1,7 @@ suite: test controller deployment templates: - controller-deployment.yaml + - controller-configmap.yaml tests: - it: should render controller deployment with default values template: controller-deployment.yaml @@ -76,54 +77,48 @@ tests: value: 8083 - it: should use custom loglevel when set - template: controller-deployment.yaml + template: controller-configmap.yaml set: controller: loglevel: "debug" asserts: - - contains: - path: spec.template.spec.containers[0].args - content: "debug" + - equal: + path: data.ZAP_LOG_LEVEL + value: "debug" - it: should use controller.agentImage.pullSecret when set - template: controller-deployment.yaml + template: controller-configmap.yaml set: controller: agentImage: pullSecret: "pull-secret" asserts: - - contains: - path: spec.template.spec.containers[0].args - content: "-image-pull-secret" - - contains: - path: spec.template.spec.containers[0].args - content: "pull-secret" + - equal: + path: data.IMAGE_PULL_SECRET + value: "pull-secret" - it: should not use controller.agentImage.pullSecret when not set - template: controller-deployment.yaml + template: controller-configmap.yaml set: controller: agentImage: pullSecret: "" asserts: - - notContains: - path: spec.template.spec.containers[0].args - content: "-image-pull-secret" - - notContains: - path: spec.template.spec.containers[0].args - content: "pull-secret" + - notExists: + path: data.IMAGE_PULL_SECRET - it: should configure watch namespaces - template: controller-deployment.yaml + template: controller-configmap.yaml set: controller: watchNamespaces: - namespace-1 - namespace-2 asserts: - - contains: - path: spec.template.spec.containers[0].args - content: "namespace-1,namespace-2" + - equal: + path: data.WATCH_NAMESPACES + value: "namespace-1,namespace-2" - it: should set nodeSelector + template: controller-deployment.yaml set: controller: nodeSelector: @@ -135,6 +130,7 @@ tests: role: AI - it: should set tolerations + template: controller-deployment.yaml set: controller: tolerations: From e30f1c5c4a8b3ec58b2db7adea3205b7af90bd61 Mon Sep 17 00:00:00 2001 From: Brian Fox Date: Mon, 24 Nov 2025 11:55:17 +0100 Subject: [PATCH 3/3] feat(helm): add support for user-supplied `envFrom` for controller Enables configuration of sensitive variable (e.g. postgres database URL) via secrets rather than configmap. Signed-off-by: Brian Fox <878612+onematchfox@users.noreply.github.com> --- helm/kagent/templates/controller-deployment.yaml | 3 +++ helm/kagent/values.yaml | 1 + 2 files changed, 4 insertions(+) diff --git a/helm/kagent/templates/controller-deployment.yaml b/helm/kagent/templates/controller-deployment.yaml index 65e2521ec..84042203c 100644 --- a/helm/kagent/templates/controller-deployment.yaml +++ b/helm/kagent/templates/controller-deployment.yaml @@ -58,6 +58,9 @@ spec: envFrom: - configMapRef: name: {{ include "kagent.fullname" . }}-controller + {{- with .Values.controller.envFrom }} + {{- toYaml . | nindent 12 }} + {{- end }} ports: - name: http containerPort: {{ .Values.controller.service.ports.targetPort }} diff --git a/helm/kagent/values.yaml b/helm/kagent/values.yaml index bc28e3e98..b4e6ad3bf 100644 --- a/helm/kagent/values.yaml +++ b/helm/kagent/values.yaml @@ -104,6 +104,7 @@ controller: port: 8083 targetPort: 8083 env: [] + envFrom: [] # Additional volumes on the output Deployment definition. volumes: []