From be73faa42f41ae3a06969eca1bc17c5ec21fa242 Mon Sep 17 00:00:00 2001 From: David Jumani Date: Wed, 13 Nov 2024 11:48:33 -0500 Subject: [PATCH 01/19] update apis --- docs/content/reference/values.txt | 47 +++++ ...ateway.gloo.solo.io_gatewayparameters.yaml | 171 ++++++++++++++++++ install/helm/gloo/generate/values.go | 11 ++ projects/gateway2/api/v1alpha1/kube_types.go | 82 +++++++++ .../api/v1alpha1/zz_generated.deepcopy.go | 45 +++++ projects/gateway2/deployer/values.go | 19 +- 6 files changed, 368 insertions(+), 7 deletions(-) diff --git a/docs/content/reference/values.txt b/docs/content/reference/values.txt index 49cc3166acf..af026605931 100644 --- a/docs/content/reference/values.txt +++ b/docs/content/reference/values.txt @@ -184,6 +184,53 @@ |kubeGateway.gatewayParameters.glooGateway.aiExtension.ports[].protocol|string||| |kubeGateway.gatewayParameters.glooGateway.aiExtension.ports[].hostIP|string||| |kubeGateway.gatewayParameters.glooGateway.floatingUserId|bool||If true, allows the cluster to dynamically assign a user ID for the processes running in the container. Default is false.| +|kubeGateway.gatewayParameters.glooGateway.podTemplate.gracefulShutdown.enabled|bool||Enable grace period before shutdown to finish current requests while Envoy health checks fail to e.g. notify external load balancers. *NOTE:* This will not have any effect if you have not defined health checks via the health check filter| +|kubeGateway.gatewayParameters.glooGateway.podTemplate.gracefulShutdown.sleepTimeSeconds|int||Time (in seconds) for the preStop hook to wait before allowing Envoy to terminate| +|kubeGateway.gatewayParameters.glooGateway.podTemplate.terminationGracePeriodSeconds|int||Time in seconds to wait for the pod to terminate gracefully. See [kubernetes docs](https://kubernetes.io/docs/reference/kubernetes-api/workload-resources/pod-v1/#istio-lifecycle) for more info.| +|kubeGateway.gatewayParameters.glooGateway.podTemplate.probes|bool||Set to true to enable a readiness probe (default is false). Then, you can also enable a liveness probe.| +|kubeGateway.gatewayParameters.glooGateway.podTemplate.livenessProbeEnabled|bool||Set to true to enable a liveness probe (default is false).| +|kubeGateway.gatewayParameters.glooGateway.podTemplate.customReadinessProbe.exec.command[]|string||| +|kubeGateway.gatewayParameters.glooGateway.podTemplate.customReadinessProbe.httpGet.path|string||| +|kubeGateway.gatewayParameters.glooGateway.podTemplate.customReadinessProbe.httpGet.port|int64||| +|kubeGateway.gatewayParameters.glooGateway.podTemplate.customReadinessProbe.httpGet.port|int32||| +|kubeGateway.gatewayParameters.glooGateway.podTemplate.customReadinessProbe.httpGet.port|string||| +|kubeGateway.gatewayParameters.glooGateway.podTemplate.customReadinessProbe.httpGet.host|string||| +|kubeGateway.gatewayParameters.glooGateway.podTemplate.customReadinessProbe.httpGet.scheme|string||| +|kubeGateway.gatewayParameters.glooGateway.podTemplate.customReadinessProbe.httpGet.httpHeaders[].name|string||| +|kubeGateway.gatewayParameters.glooGateway.podTemplate.customReadinessProbe.httpGet.httpHeaders[].value|string||| +|kubeGateway.gatewayParameters.glooGateway.podTemplate.customReadinessProbe.tcpSocket.port|int64||| +|kubeGateway.gatewayParameters.glooGateway.podTemplate.customReadinessProbe.tcpSocket.port|int32||| +|kubeGateway.gatewayParameters.glooGateway.podTemplate.customReadinessProbe.tcpSocket.port|string||| +|kubeGateway.gatewayParameters.glooGateway.podTemplate.customReadinessProbe.tcpSocket.host|string||| +|kubeGateway.gatewayParameters.glooGateway.podTemplate.customReadinessProbe.grpc.port|int32||| +|kubeGateway.gatewayParameters.glooGateway.podTemplate.customReadinessProbe.grpc.service|string||| +|kubeGateway.gatewayParameters.glooGateway.podTemplate.customReadinessProbe.initialDelaySeconds|int32||| +|kubeGateway.gatewayParameters.glooGateway.podTemplate.customReadinessProbe.timeoutSeconds|int32||| +|kubeGateway.gatewayParameters.glooGateway.podTemplate.customReadinessProbe.periodSeconds|int32||| +|kubeGateway.gatewayParameters.glooGateway.podTemplate.customReadinessProbe.successThreshold|int32||| +|kubeGateway.gatewayParameters.glooGateway.podTemplate.customReadinessProbe.failureThreshold|int32||| +|kubeGateway.gatewayParameters.glooGateway.podTemplate.customReadinessProbe.terminationGracePeriodSeconds|int64||| +|kubeGateway.gatewayParameters.glooGateway.podTemplate.customLivenessProbe.exec.command[]|string||| +|kubeGateway.gatewayParameters.glooGateway.podTemplate.customLivenessProbe.httpGet.path|string||| +|kubeGateway.gatewayParameters.glooGateway.podTemplate.customLivenessProbe.httpGet.port|int64||| +|kubeGateway.gatewayParameters.glooGateway.podTemplate.customLivenessProbe.httpGet.port|int32||| +|kubeGateway.gatewayParameters.glooGateway.podTemplate.customLivenessProbe.httpGet.port|string||| +|kubeGateway.gatewayParameters.glooGateway.podTemplate.customLivenessProbe.httpGet.host|string||| +|kubeGateway.gatewayParameters.glooGateway.podTemplate.customLivenessProbe.httpGet.scheme|string||| +|kubeGateway.gatewayParameters.glooGateway.podTemplate.customLivenessProbe.httpGet.httpHeaders[].name|string||| +|kubeGateway.gatewayParameters.glooGateway.podTemplate.customLivenessProbe.httpGet.httpHeaders[].value|string||| +|kubeGateway.gatewayParameters.glooGateway.podTemplate.customLivenessProbe.tcpSocket.port|int64||| +|kubeGateway.gatewayParameters.glooGateway.podTemplate.customLivenessProbe.tcpSocket.port|int32||| +|kubeGateway.gatewayParameters.glooGateway.podTemplate.customLivenessProbe.tcpSocket.port|string||| +|kubeGateway.gatewayParameters.glooGateway.podTemplate.customLivenessProbe.tcpSocket.host|string||| +|kubeGateway.gatewayParameters.glooGateway.podTemplate.customLivenessProbe.grpc.port|int32||| +|kubeGateway.gatewayParameters.glooGateway.podTemplate.customLivenessProbe.grpc.service|string||| +|kubeGateway.gatewayParameters.glooGateway.podTemplate.customLivenessProbe.initialDelaySeconds|int32||| +|kubeGateway.gatewayParameters.glooGateway.podTemplate.customLivenessProbe.timeoutSeconds|int32||| +|kubeGateway.gatewayParameters.glooGateway.podTemplate.customLivenessProbe.periodSeconds|int32||| +|kubeGateway.gatewayParameters.glooGateway.podTemplate.customLivenessProbe.successThreshold|int32||| +|kubeGateway.gatewayParameters.glooGateway.podTemplate.customLivenessProbe.failureThreshold|int32||| +|kubeGateway.gatewayParameters.glooGateway.podTemplate.customLivenessProbe.terminationGracePeriodSeconds|int64||| |kubeGateway.portal.enabled|bool|false|Enable the Gloo Gateway Portal controller and web server.| |settings.watchNamespaces[]|string||whitelist of namespaces for Gloo Edge to watch for services and CRDs. Empty list means all namespaces. If this and WatchNamespaceSelectors are specified, this takes precedence and WatchNamespaceSelectors is ignored| |settings.watchNamespaceSelectors|interface||A list of Kubernetes selectors that specify the set of namespaces to restrict the namespaces that Gloo controllers take into consideration when watching for resources. Elements in the list are disjunctive (OR semantics), i.e. a namespace will be included if it matches any selector. An empty list means all namespaces. If this and WatchNamespaces are specified, WatchNamespaces takes precedence and this is ignored| diff --git a/install/helm/gloo/crds/gateway.gloo.solo.io_gatewayparameters.yaml b/install/helm/gloo/crds/gateway.gloo.solo.io_gatewayparameters.yaml index 322516cabae..7e7fb5fbe5f 100644 --- a/install/helm/gloo/crds/gateway.gloo.solo.io_gatewayparameters.yaml +++ b/install/helm/gloo/crds/gateway.gloo.solo.io_gatewayparameters.yaml @@ -1667,6 +1667,13 @@ spec: additionalProperties: type: string type: object + gracefulShutdown: + properties: + enabled: + type: boolean + sleepTimeSeconds: + type: integer + type: object imagePullSecrets: items: properties: @@ -1676,10 +1683,172 @@ spec: type: object x-kubernetes-map-type: atomic type: array + livenessProbe: + properties: + exec: + properties: + command: + items: + type: string + type: array + x-kubernetes-list-type: atomic + type: object + failureThreshold: + format: int32 + type: integer + grpc: + properties: + port: + format: int32 + type: integer + service: + default: "" + type: string + required: + - port + type: object + httpGet: + properties: + host: + type: string + httpHeaders: + items: + properties: + name: + type: string + value: + type: string + required: + - name + - value + type: object + type: array + x-kubernetes-list-type: atomic + path: + type: string + port: + anyOf: + - type: integer + - type: string + x-kubernetes-int-or-string: true + scheme: + type: string + required: + - port + type: object + initialDelaySeconds: + format: int32 + type: integer + periodSeconds: + format: int32 + type: integer + successThreshold: + format: int32 + type: integer + tcpSocket: + properties: + host: + type: string + port: + anyOf: + - type: integer + - type: string + x-kubernetes-int-or-string: true + required: + - port + type: object + terminationGracePeriodSeconds: + format: int64 + type: integer + timeoutSeconds: + format: int32 + type: integer + type: object nodeSelector: additionalProperties: type: string type: object + readinessProbe: + properties: + exec: + properties: + command: + items: + type: string + type: array + x-kubernetes-list-type: atomic + type: object + failureThreshold: + format: int32 + type: integer + grpc: + properties: + port: + format: int32 + type: integer + service: + default: "" + type: string + required: + - port + type: object + httpGet: + properties: + host: + type: string + httpHeaders: + items: + properties: + name: + type: string + value: + type: string + required: + - name + - value + type: object + type: array + x-kubernetes-list-type: atomic + path: + type: string + port: + anyOf: + - type: integer + - type: string + x-kubernetes-int-or-string: true + scheme: + type: string + required: + - port + type: object + initialDelaySeconds: + format: int32 + type: integer + periodSeconds: + format: int32 + type: integer + successThreshold: + format: int32 + type: integer + tcpSocket: + properties: + host: + type: string + port: + anyOf: + - type: integer + - type: string + x-kubernetes-int-or-string: true + required: + - port + type: object + terminationGracePeriodSeconds: + format: int64 + type: integer + timeoutSeconds: + format: int32 + type: integer + type: object securityContext: properties: appArmorProfile: @@ -1757,6 +1926,8 @@ spec: type: string type: object type: object + terminationGracePeriodSeconds: + type: integer tolerations: items: properties: diff --git a/install/helm/gloo/generate/values.go b/install/helm/gloo/generate/values.go index fdfd06998c8..87ce1536aa4 100644 --- a/install/helm/gloo/generate/values.go +++ b/install/helm/gloo/generate/values.go @@ -339,9 +339,20 @@ type GatewayParameters struct { Stats *GatewayParamsStatsConfig `json:"stats,omitempty" desc:"Config used to manage the stats endpoints exposed on the deployed proxies"` AIExtension *GatewayParamsAIExtension `json:"aiExtension,omitempty" desc:"Config used to manage the Gloo Gateway AI extension."` FloatingUserId *bool `json:"floatingUserId,omitempty" desc:"If true, allows the cluster to dynamically assign a user ID for the processes running in the container. Default is false."` + PodTemplate *GatewayParamsPodTemplate `json:"podTemplate,omitempty"` // TODO(npolshak): Add support for GlooMtls } +// GatewayProxyPodTemplate contains the Helm API available to configure the PodTemplate on the gateway Deployment +type GatewayParamsPodTemplate struct { + GracefulShutdown *GracefulShutdownSpec `json:"gracefulShutdown,omitempty"` + TerminationGracePeriodSeconds *int `json:"terminationGracePeriodSeconds,omitempty" desc:"Time in seconds to wait for the pod to terminate gracefully. See [kubernetes docs](https://kubernetes.io/docs/reference/kubernetes-api/workload-resources/pod-v1/#istio-lifecycle) for more info."` + Probes *bool `json:"probes,omitempty" desc:"Set to true to enable a readiness probe (default is false). Then, you can also enable a liveness probe."` + LivenessProbeEnabled *bool `json:"livenessProbeEnabled,omitempty" desc:"Set to true to enable a liveness probe (default is false)."` + CustomReadinessProbe *corev1.Probe `json:"customReadinessProbe,omitempty"` + CustomLivenessProbe *corev1.Probe `json:"customLivenessProbe,omitempty"` +} + type GatewayParamsStatsConfig struct { Enabled *bool `json:"enabled,omitempty" desc:"Enable the prometheus endpoint"` RoutePrefixRewrite *string `json:"routePrefixRewrite,omitempty" desc:"Set the prefix rewrite used for the prometheus endpoint"` diff --git a/projects/gateway2/api/v1alpha1/kube_types.go b/projects/gateway2/api/v1alpha1/kube_types.go index 25f28c269ce..64028b3e39a 100644 --- a/projects/gateway2/api/v1alpha1/kube_types.go +++ b/projects/gateway2/api/v1alpha1/kube_types.go @@ -199,6 +199,34 @@ type Pod struct { // // +kubebuilder:validation:Optional Tolerations []*corev1.Toleration `json:"tolerations,omitempty"` + + // If specified, the pod's graceful shutdown spec. + // + // +kubebuilder:validation:Optional + GracefulShutdown *GracefulShutdownSpec `json:"gracefulShutdown,omitempty"` + + // If specified, the pod's termination grace period in seconds. See + // https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.26/#pod-v1-core + // for details + // + // +kubebuilder:validation:Optional + TerminationGracePeriodSeconds *int `json:"terminationGracePeriodSeconds,omitempty"` + + // If specified, the pod's readiness probe. Periodic probe of container service readiness. + // Container will be removed from service endpoints if the probe fails. See + // https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.26/#probe-v1-core + // for details. + // + // +kubebuilder:validation:Optional + ReadinessProbe *corev1.Probe `json:"readinessProbe,omitempty"` + + // If specified, the pod's liveness probe. Periodic probe of container service readiness. + // Container will be restarted if the probe fails. See + // https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.26/#probe-v1-core + // for details. + // + // +kubebuilder:validation:Optional + LivenessProbe *corev1.Probe `json:"livenessProbe,omitempty"` } func (in *Pod) GetExtraLabels() map[string]string { @@ -249,3 +277,57 @@ func (in *Pod) GetTolerations() []*corev1.Toleration { } return in.Tolerations } + +func (in *Pod) GetReadinessProbe() *corev1.Probe { + if in == nil { + return nil + } + return in.ReadinessProbe +} + +func (in *Pod) GetGracefulShutdown() *GracefulShutdownSpec { + if in == nil { + return nil + } + return in.GracefulShutdown +} + +func (in *Pod) GetTerminationGracePeriodSeconds() *int { + if in == nil { + return nil + } + return in.TerminationGracePeriodSeconds +} + +func (in *Pod) GetLivenessProbe() *corev1.Probe { + if in == nil { + return nil + } + return in.LivenessProbe +} + +type GracefulShutdownSpec struct { + // Enable grace period before shutdown to finish current requests while Envoy health checks fail to e.g. notify external load balancers. *NOTE:* This will not have any effect if you have not defined health checks via the health check filter + // + // +kubebuilder:validation:Optional + Enabled *bool `json:"enabled,omitempty"` + + // Time (in seconds) for the preStop hook to wait before allowing Envoy to terminate + // + // +kubebuilder:validation:Optional + SleepTimeSeconds *int `json:"sleepTimeSeconds,omitempty"` +} + +func (in *GracefulShutdownSpec) GetEnabled() *bool { + if in == nil { + return nil + } + return in.Enabled +} + +func (in *GracefulShutdownSpec) GetSleepTimeSeconds() *int { + if in == nil { + return nil + } + return in.SleepTimeSeconds +} diff --git a/projects/gateway2/api/v1alpha1/zz_generated.deepcopy.go b/projects/gateway2/api/v1alpha1/zz_generated.deepcopy.go index 5f6d242d0f7..212c08e2b49 100644 --- a/projects/gateway2/api/v1alpha1/zz_generated.deepcopy.go +++ b/projects/gateway2/api/v1alpha1/zz_generated.deepcopy.go @@ -372,6 +372,31 @@ func (in *GatewayParametersStatus) DeepCopy() *GatewayParametersStatus { return out } +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *GracefulShutdownSpec) DeepCopyInto(out *GracefulShutdownSpec) { + *out = *in + if in.Enabled != nil { + in, out := &in.Enabled, &out.Enabled + *out = new(bool) + **out = **in + } + if in.SleepTimeSeconds != nil { + in, out := &in.SleepTimeSeconds, &out.SleepTimeSeconds + *out = new(int) + **out = **in + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new GracefulShutdownSpec. +func (in *GracefulShutdownSpec) DeepCopy() *GracefulShutdownSpec { + if in == nil { + return nil + } + out := new(GracefulShutdownSpec) + in.DeepCopyInto(out) + return out +} + // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *Image) DeepCopyInto(out *Image) { *out = *in @@ -608,6 +633,26 @@ func (in *Pod) DeepCopyInto(out *Pod) { } } } + if in.GracefulShutdown != nil { + in, out := &in.GracefulShutdown, &out.GracefulShutdown + *out = new(GracefulShutdownSpec) + (*in).DeepCopyInto(*out) + } + if in.TerminationGracePeriodSeconds != nil { + in, out := &in.TerminationGracePeriodSeconds, &out.TerminationGracePeriodSeconds + *out = new(int) + **out = **in + } + if in.ReadinessProbe != nil { + in, out := &in.ReadinessProbe, &out.ReadinessProbe + *out = new(v1.Probe) + (*in).DeepCopyInto(*out) + } + if in.LivenessProbe != nil { + in, out := &in.LivenessProbe, &out.LivenessProbe + *out = new(v1.Probe) + (*in).DeepCopyInto(*out) + } } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new Pod. diff --git a/projects/gateway2/deployer/values.go b/projects/gateway2/deployer/values.go index 7cc5741cceb..4fc0f2a6621 100644 --- a/projects/gateway2/deployer/values.go +++ b/projects/gateway2/deployer/values.go @@ -1,6 +1,7 @@ package deployer import ( + "github.com/solo-io/gloo/projects/gateway2/api/v1alpha1" corev1 "k8s.io/api/core/v1" ) @@ -28,13 +29,17 @@ type helmGateway struct { ServiceAccount *helmServiceAccount `json:"serviceAccount,omitempty"` // pod template values - ExtraPodAnnotations map[string]string `json:"extraPodAnnotations,omitempty"` - ExtraPodLabels map[string]string `json:"extraPodLabels,omitempty"` - ImagePullSecrets []corev1.LocalObjectReference `json:"imagePullSecrets,omitempty"` - PodSecurityContext *corev1.PodSecurityContext `json:"podSecurityContext,omitempty"` - NodeSelector map[string]string `json:"nodeSelector,omitempty"` - Affinity *corev1.Affinity `json:"affinity,omitempty"` - Tolerations []*corev1.Toleration `json:"tolerations,omitempty"` + ExtraPodAnnotations map[string]string `json:"extraPodAnnotations,omitempty"` + ExtraPodLabels map[string]string `json:"extraPodLabels,omitempty"` + ImagePullSecrets []corev1.LocalObjectReference `json:"imagePullSecrets,omitempty"` + PodSecurityContext *corev1.PodSecurityContext `json:"podSecurityContext,omitempty"` + NodeSelector map[string]string `json:"nodeSelector,omitempty"` + Affinity *corev1.Affinity `json:"affinity,omitempty"` + Tolerations []*corev1.Toleration `json:"tolerations,omitempty"` + ReadinessProbe *corev1.Probe `json:"readinessProbe,omitempty"` + LivenessProbe *corev1.Probe `json:"livenessProbe,omitempty"` + GracefulShutdown *v1alpha1.GracefulShutdownSpec `json:"gracefulShutdown,omitempty"` + TerminationGracePeriodSeconds *int `json:"terminationGracePeriodSeconds,omitempty"` // sds container values SdsContainer *helmSdsContainer `json:"sdsContainer,omitempty"` From 7ee19929bc7290a5568a2987691e57593a2e4c9f Mon Sep 17 00:00:00 2001 From: David Jumani Date: Wed, 13 Nov 2024 11:48:59 -0500 Subject: [PATCH 02/19] helm changes --- .../gloo/templates/43-gatewayparameters.yaml | 41 ++++++++++++++++++- projects/gateway2/deployer/deployer.go | 4 ++ .../templates/gateway/proxy-deployment.yaml | 40 ++++++++++++++---- 3 files changed, 75 insertions(+), 10 deletions(-) diff --git a/install/helm/gloo/templates/43-gatewayparameters.yaml b/install/helm/gloo/templates/43-gatewayparameters.yaml index e7bda342e21..c55457e9c3a 100644 --- a/install/helm/gloo/templates/43-gatewayparameters.yaml +++ b/install/helm/gloo/templates/43-gatewayparameters.yaml @@ -72,7 +72,46 @@ spec: {{- end }}{{/* if $gg.envoyContainer */}} podTemplate: extraLabels: - {{- include "gloo-gateway.constLabels" . | nindent 8 }} + {{- include "gloo-gateway.constLabels" . | nindent 8 }} +{{- if $gg.podTemplate }} +{{- if $gg.podTemplate.terminationGracePeriodSeconds }} + terminationGracePeriodSeconds: {{ $gg.podTemplate.terminationGracePeriodSeconds }} +{{- end }}{{/* if $gg.podTemplate.terminationGracePeriodSeconds */}} +{{- if $gg.podTemplate.gracefulShutdown }} + gracefulShutdown: + {{- toYaml $gg.podTemplate.gracefulShutdown | nindent 8 }} +{{- end }}{{/* if $gg.podTemplate.gracefulShutdown */}} +{{- if $gg.podTemplate.probes }} + readinessProbe: +{{- if $gg.podTemplate.customReadinessProbe }} +{{ toYaml $gg.podTemplate.customReadinessProbe | indent 8}} +{{- else }} + httpGet: + scheme: HTTP + port: 8082 + path: /envoy-hc + initialDelaySeconds: 5 + periodSeconds: 5 + failureThreshold: 2 +{{- end}}{{/* if $gg.podTemplate.customReadinessProbe */}} +{{- if $gg.podTemplate.livenessProbeEnabled }} + livenessProbe: +{{- if $gg.podTemplate.customLivenessProbe }} +{{ toYaml $gg.podTemplate.customLivenessProbe | indent 8}} +{{- else }} + exec: + command: + - wget + - -O + - /dev/null + - 127.0.0.1:19000/server_info + initialDelaySeconds: 3 + periodSeconds: 10 + failureThreshold: 3 +{{- end }}{{/*if $gg.podTemplate.customLivenessProbe*/}} +{{- end }}{{/*if $gg.podTemplate.livenessProbeEnabled*/}} +{{- end }}{{/*if $gg.podTemplate.probes*/}} +{{- end }}{{/*if $gg.podTemplate */}} {{- if $gg.sdsContainer }} sdsContainer: image: diff --git a/projects/gateway2/deployer/deployer.go b/projects/gateway2/deployer/deployer.go index bb03aae028d..eb0ce8b2ac0 100644 --- a/projects/gateway2/deployer/deployer.go +++ b/projects/gateway2/deployer/deployer.go @@ -326,6 +326,10 @@ func (d *Deployer) getValues(gw *api.Gateway, gwParam *v1alpha1.GatewayParameter gateway.NodeSelector = podConfig.GetNodeSelector() gateway.Affinity = podConfig.GetAffinity() gateway.Tolerations = podConfig.GetTolerations() + gateway.ReadinessProbe = podConfig.GetReadinessProbe() + gateway.LivenessProbe = podConfig.GetLivenessProbe() + gateway.GracefulShutdown = podConfig.GetGracefulShutdown() + gateway.TerminationGracePeriodSeconds = podConfig.GetTerminationGracePeriodSeconds() // envoy container values logLevel := envoyContainerConfig.GetBootstrap().GetLogLevel() diff --git a/projects/gateway2/helm/gloo-gateway/templates/gateway/proxy-deployment.yaml b/projects/gateway2/helm/gloo-gateway/templates/gateway/proxy-deployment.yaml index 9796b4c14f8..200263fec18 100644 --- a/projects/gateway2/helm/gloo-gateway/templates/gateway/proxy-deployment.yaml +++ b/projects/gateway2/helm/gloo-gateway/templates/gateway/proxy-deployment.yaml @@ -89,15 +89,25 @@ spec: - name: http-monitoring containerPort: 9091 {{- end }} - exec: - command: - - wget - - -O - - /dev/null - - 127.0.0.1:19000/ready - initialDelaySeconds: 3 - periodSeconds: 10 - failureThreshold: 3 +{{- if $gateway.readinessProbe }} + readinessProbe: +{{ toYaml $gateway.readinessProbe | indent 10}} +{{- end }}{{/*if $gateway.readinessProbe*/}} +{{- if $gateway.livenessProbe }} + livenessProbe: +{{ toYaml $gateway.livenessProbe | indent 10}} +{{- end }}{{/*if $gateway.livenessProbe*/}} +{{- if $gateway.gracefulShutdown }} +{{- if $gateway.gracefulShutdown.enabled }} + lifecycle: + preStop: + exec: + command: + - /bin/sh + - -c + - wget --post-data "" -O /dev/null 127.0.0.1:19000/healthcheck/fail; sleep {{ $gateway.gracefulShutdown.sleepTimeSeconds | default "10" }} +{{- end}}{{/*if $gateway.gracefulShutdown.enabled*/}} +{{- end}}{{/*if $gateway.gracefulShutdown*/}} {{- if $gateway.resources }} resources: {{- toYaml $gateway.resources | nindent 10 }} @@ -301,6 +311,9 @@ spec: tolerations: {{- toYaml . | nindent 8 }} {{- end }} + {{- if $gateway.terminationGracePeriodSeconds }} + terminationGracePeriodSeconds: {{ $gateway.terminationGracePeriodSeconds }} + {{- end }} volumes: - configMap: name: {{ include "gloo-gateway.gateway.fullname" . }} @@ -439,6 +452,15 @@ data: route: cluster: admin_port_cluster http_filters: +{{- if $gateway.readinessProbe }} + - name: envoy.filters.http.health_check + typed_config: + "@type": type.googleapis.com/envoy.extensions.filters.http.health_check.v3.HealthCheck + pass_through_mode: false + headers: + - name: ":path" + exact_match: "/envoy-hc" +{{- end }}{{/*if $gateway.readinessProbe*/}} - name: envoy.filters.http.router typed_config: "@type": type.googleapis.com/envoy.extensions.filters.http.router.v3.Router From 33ebc017b507d761666c8d2173a4519ad5a5712d Mon Sep 17 00:00:00 2001 From: David Jumani Date: Wed, 13 Nov 2024 11:49:39 -0500 Subject: [PATCH 03/19] add tests --- install/test/k8sgateway_test.go | 144 ++++++++++++++++++++ projects/gateway2/deployer/deployer_test.go | 79 +++++++++++ 2 files changed, 223 insertions(+) diff --git a/install/test/k8sgateway_test.go b/install/test/k8sgateway_test.go index 845fec4fca2..85ed653a25b 100644 --- a/install/test/k8sgateway_test.go +++ b/install/test/k8sgateway_test.go @@ -15,6 +15,8 @@ import ( . "github.com/solo-io/k8s-utils/manifesttestutils" corev1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/api/resource" + "k8s.io/apimachinery/pkg/util/intstr" + "k8s.io/utils/pointer" ) var _ = Describe("Kubernetes Gateway API integration", func() { @@ -355,6 +357,148 @@ var _ = Describe("Kubernetes Gateway API integration", func() { Entry("locally undefined, globally undefined", false), ) }) + + Context("probes and graceful shutdown", func() { + When("nothing is specified", func() { + It("does not render probes and graceful shutdown", func() { + gwp := getDefaultGatewayParameters(testManifest) + + gwpPT := gwp.Spec.Kube.PodTemplate + Expect(gwpPT).ToNot(BeNil()) + + Expect(gwpPT.LivenessProbe).To(BeNil()) + Expect(gwpPT.ReadinessProbe).To(BeNil()) + Expect(gwpPT.GracefulShutdown).To(BeNil()) + Expect(gwpPT.TerminationGracePeriodSeconds).To(BeNil()) + }) + }) + + When("probes are enabled", func() { + BeforeEach(func() { + extraValuesArgs := []string{ + "kubeGateway.gatewayParameters.glooGateway.podTemplate.probes=true", + "kubeGateway.gatewayParameters.glooGateway.podTemplate.livenessProbeEnabled=true", + } + + valuesArgs = append(valuesArgs, extraValuesArgs...) + }) + + It("sets the default values of the probes", func() { + gwp := getDefaultGatewayParameters(testManifest) + gwpPT := gwp.Spec.Kube.PodTemplate + Expect(*gwpPT.LivenessProbe).To(BeEquivalentTo(corev1.Probe{ + ProbeHandler: corev1.ProbeHandler{ + Exec: &corev1.ExecAction{ + Command: []string{ + "wget", + "-O", + "/dev/null", + "127.0.0.1:19000/server_info", + }, + }, + }, + InitialDelaySeconds: 3, + PeriodSeconds: 10, + FailureThreshold: 3, + })) + Expect(*gwpPT.ReadinessProbe).To(BeEquivalentTo(corev1.Probe{ + ProbeHandler: corev1.ProbeHandler{ + HTTPGet: &corev1.HTTPGetAction{ + Scheme: "HTTP", + Port: intstr.IntOrString{ + IntVal: 8082, + }, + Path: "/envoy-hc", + }, + }, + InitialDelaySeconds: 5, + PeriodSeconds: 5, + FailureThreshold: 2, + })) + + }) + }) + + When("custom probes are defined", func() { + BeforeEach(func() { + extraValuesArgs := []string{ + "kubeGateway.gatewayParameters.glooGateway.podTemplate.probes=true", + "kubeGateway.gatewayParameters.glooGateway.podTemplate.customReadinessProbe.httpGet.scheme=HTTP", + "kubeGateway.gatewayParameters.glooGateway.podTemplate.customReadinessProbe.httpGet.port=9090", + "kubeGateway.gatewayParameters.glooGateway.podTemplate.customReadinessProbe.httpGet.path=/custom-readiness", + "kubeGateway.gatewayParameters.glooGateway.podTemplate.customReadinessProbe.failureThreshold=1", + "kubeGateway.gatewayParameters.glooGateway.podTemplate.customReadinessProbe.initialDelaySeconds=2", + "kubeGateway.gatewayParameters.glooGateway.podTemplate.customReadinessProbe.periodSeconds=3", + "kubeGateway.gatewayParameters.glooGateway.podTemplate.livenessProbeEnabled=true", + "kubeGateway.gatewayParameters.glooGateway.podTemplate.customLivenessProbe.exec.command[0]=wget", + "kubeGateway.gatewayParameters.glooGateway.podTemplate.customLivenessProbe.exec.command[1]=-O", + "kubeGateway.gatewayParameters.glooGateway.podTemplate.customLivenessProbe.exec.command[2]=/dev/null", + "kubeGateway.gatewayParameters.glooGateway.podTemplate.customLivenessProbe.exec.command[3]=127.0.0.1:9090/custom-liveness", + "kubeGateway.gatewayParameters.glooGateway.podTemplate.customLivenessProbe.failureThreshold=4", + "kubeGateway.gatewayParameters.glooGateway.podTemplate.customLivenessProbe.initialDelaySeconds=5", + "kubeGateway.gatewayParameters.glooGateway.podTemplate.customLivenessProbe.periodSeconds=6", + } + + valuesArgs = append(valuesArgs, extraValuesArgs...) + }) + + It("sets the custom values of the probes", func() { + gwp := getDefaultGatewayParameters(testManifest) + gwpPT := gwp.Spec.Kube.PodTemplate + Expect(*gwpPT.ReadinessProbe).To(BeEquivalentTo(corev1.Probe{ + ProbeHandler: corev1.ProbeHandler{ + HTTPGet: &corev1.HTTPGetAction{ + Scheme: "HTTP", + Port: intstr.IntOrString{ + IntVal: 9090, + }, + Path: "/custom-readiness", + }, + }, + FailureThreshold: 1, + InitialDelaySeconds: 2, + PeriodSeconds: 3, + })) + Expect(*gwpPT.LivenessProbe).To(BeEquivalentTo(corev1.Probe{ + ProbeHandler: corev1.ProbeHandler{ + Exec: &corev1.ExecAction{ + Command: []string{ + "wget", + "-O", + "/dev/null", + "127.0.0.1:9090/custom-liveness", + }, + }, + }, + FailureThreshold: 4, + InitialDelaySeconds: 5, + PeriodSeconds: 6, + })) + }) + }) + + When("gracefulShutdown and terminationGracePeriod is enabled", func() { + BeforeEach(func() { + extraValuesArgs := []string{ + "kubeGateway.gatewayParameters.glooGateway.podTemplate.terminationGracePeriodSeconds=7", + "kubeGateway.gatewayParameters.glooGateway.podTemplate.gracefulShutdown.enabled=true", + "kubeGateway.gatewayParameters.glooGateway.podTemplate.gracefulShutdown.sleepTimeSeconds=5", + } + + valuesArgs = append(valuesArgs, extraValuesArgs...) + }) + + It("sets the custom values", func() { + gwp := getDefaultGatewayParameters(testManifest) + gwpPT := gwp.Spec.Kube.PodTemplate + Expect(*gwpPT.TerminationGracePeriodSeconds).To(Equal(7)) + Expect(*gwpPT.GracefulShutdown).To(BeEquivalentTo(v1alpha1.GracefulShutdownSpec{ + Enabled: pointer.Bool(true), + SleepTimeSeconds: pointer.Int(5), + })) + }) + }) + }) }) When("kube gateway integration is disabled (default)", func() { diff --git a/projects/gateway2/deployer/deployer_test.go b/projects/gateway2/deployer/deployer_test.go index 5a928f28a6f..9890800f16d 100644 --- a/projects/gateway2/deployer/deployer_test.go +++ b/projects/gateway2/deployer/deployer_test.go @@ -25,7 +25,9 @@ import ( corev1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/api/resource" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/util/intstr" "k8s.io/apimachinery/pkg/util/yaml" + "k8s.io/utils/pointer" "k8s.io/utils/ptr" "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/client/fake" @@ -752,6 +754,18 @@ var _ = Describe("Deployer", func() { return fullyDefinedGatewayParameters(wellknown.DefaultGatewayParametersName, defaultNamespace) } + fullyDefinedGatewayParamsWithProbes = func() *gw2_v1alpha1.GatewayParameters { + params := fullyDefinedGatewayParameters(wellknown.DefaultGatewayParametersName, defaultNamespace) + params.Spec.Kube.PodTemplate.LivenessProbe = generateLivenessProbe() + params.Spec.Kube.PodTemplate.ReadinessProbe = generateReadinessProbe() + params.Spec.Kube.PodTemplate.TerminationGracePeriodSeconds = pointer.Int(5) + params.Spec.Kube.PodTemplate.GracefulShutdown = &gw2_v1alpha1.GracefulShutdownSpec{ + Enabled: pointer.Bool(true), + SleepTimeSeconds: pointer.Int(7), + } + return params + } + fullyDefinedGatewayParamsWithFloatingUserId = func() *gw2_v1alpha1.GatewayParameters { params := fullyDefinedGatewayParameters(wellknown.DefaultGatewayParametersName, defaultNamespace) params.Spec.Kube.FloatingUserId = ptr.To(true) @@ -795,6 +809,7 @@ var _ = Describe("Deployer", func() { Expect(dep.Spec.Template.Spec.Containers[0].Image).To(ContainSubstring(":" + version.Version)) } Expect(dep.Spec.Template.Spec.Containers[0].ImagePullPolicy).To(Equal(*expectedGwp.EnvoyContainer.Image.PullPolicy)) + Expect(dep.Spec.Template.Annotations).To(matchers.ContainMapElements(expectedGwp.PodTemplate.ExtraAnnotations)) Expect(dep.Spec.Template.Annotations).To(HaveKeyWithValue("prometheus.io/scrape", "true")) Expect(dep.Spec.Template.Spec.SecurityContext.RunAsUser).To(Equal(expectedGwp.PodTemplate.SecurityContext.RunAsUser)) @@ -973,6 +988,27 @@ var _ = Describe("Deployer", func() { return nil } + fullyDefinedValidationWithProbes := func(objs clientObjects, inp *input) error { + err := fullyDefinedValidationWithoutRunAsUser(objs, inp) + if err != nil { + return err + } + + dep := objs.findDeployment(defaultNamespace, defaultDeploymentName) + Expect(*dep.Spec.Template.Spec.TerminationGracePeriodSeconds).To(Equal(int64(5))) + + envoyContainer := dep.Spec.Template.Spec.Containers[0] + Expect(envoyContainer.LivenessProbe).To(BeEquivalentTo(generateLivenessProbe())) + Expect(envoyContainer.ReadinessProbe).To(BeEquivalentTo(generateReadinessProbe())) + Expect(envoyContainer.Lifecycle.PreStop.Exec.Command).To(BeEquivalentTo([]string{ + "/bin/sh", + "-c", + "wget --post-data \"\" -O /dev/null 127.0.0.1:19000/healthcheck/fail; sleep 7", + })) + + return nil + } + fullyDefinedValidationFloatingUserId := func(objs clientObjects, inp *input) error { err := fullyDefinedValidationWithoutRunAsUser(objs, inp) if err != nil { @@ -1143,6 +1179,14 @@ var _ = Describe("Deployer", func() { }, &expectedOutput{ validationFunc: fullyDefinedValidation, }), + Entry("Fully defined GatewayParameters with probes", &input{ + dInputs: istioEnabledDeployerInputs(), + gw: defaultGateway(), + defaultGwp: fullyDefinedGatewayParamsWithProbes(), + }, &expectedOutput{ + validationFunc: fullyDefinedValidationWithProbes, + }), + Entry("Fully defined GatewayParameters with floating user id", &input{ dInputs: istioEnabledDeployerInputs(), gw: defaultGateway(), @@ -1550,3 +1594,38 @@ func fullyDefinedGatewayParameters(name, namespace string) *gw2_v1alpha1.Gateway }, } } + +func generateLivenessProbe() *corev1.Probe { + return &corev1.Probe{ + ProbeHandler: corev1.ProbeHandler{ + Exec: &corev1.ExecAction{ + Command: []string{ + "wget", + "-O", + "/dev/null", + "127.0.0.1:19000/server_info", + }, + }, + }, + InitialDelaySeconds: 3, + PeriodSeconds: 10, + FailureThreshold: 3, + } +} + +func generateReadinessProbe() *corev1.Probe { + return &corev1.Probe{ + ProbeHandler: corev1.ProbeHandler{ + HTTPGet: &corev1.HTTPGetAction{ + Scheme: "HTTP", + Port: intstr.IntOrString{ + IntVal: 8082, + }, + Path: "/envoy-hc", + }, + }, + InitialDelaySeconds: 5, + PeriodSeconds: 5, + FailureThreshold: 2, + } +} From debf04af5607053a82bb5f8d84f6d9dfd6b8c424 Mon Sep 17 00:00:00 2001 From: David Jumani Date: Wed, 13 Nov 2024 11:53:00 -0500 Subject: [PATCH 04/19] add probes to tests --- .../tests/manifests/profiles/kubernetes-gateway.yaml | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/test/kubernetes/e2e/tests/manifests/profiles/kubernetes-gateway.yaml b/test/kubernetes/e2e/tests/manifests/profiles/kubernetes-gateway.yaml index 4941924918d..89067f667a6 100644 --- a/test/kubernetes/e2e/tests/manifests/profiles/kubernetes-gateway.yaml +++ b/test/kubernetes/e2e/tests/manifests/profiles/kubernetes-gateway.yaml @@ -13,6 +13,16 @@ global: # Configuration for the Kubernetes Gateway integration in Gloo Gateway kubeGateway: enabled: true + # Enable the probes to ensure zero downtime + gatewayParameters: + glooGateway: + podTemplate: + terminationGracePeriodSeconds: 7 + gracefulShutdown: + enabled: true + sleepTimeSeconds: 5 + probes: true + livenessProbeEnabled: true # Configuration for the statically deployed gateway-proxy that ships by default with Gloo Gateway gatewayProxies: From fd6152efc1d25950fe995109143a4a63b91ceb22 Mon Sep 17 00:00:00 2001 From: David Jumani Date: Wed, 13 Nov 2024 13:05:57 -0500 Subject: [PATCH 05/19] add changelog --- .../add-readiness-liveness-probe.yaml | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) create mode 100644 changelog/v1.18.0-beta35/add-readiness-liveness-probe.yaml diff --git a/changelog/v1.18.0-beta35/add-readiness-liveness-probe.yaml b/changelog/v1.18.0-beta35/add-readiness-liveness-probe.yaml new file mode 100644 index 00000000000..fae9796e072 --- /dev/null +++ b/changelog/v1.18.0-beta35/add-readiness-liveness-probe.yaml @@ -0,0 +1,16 @@ +changelog: +- type: NEW_FEATURE + issueLink: https://github.com/solo-io/solo-projects/issues/7084 + resolvesIssue: false + description: Adds the ability for users to enable as well as add custom readiness and liveness probes to the Kubernetes Gloo Gateway. +- type: HELM + issueLink: https://github.com/solo-io/solo-projects/issues/7084 + resolvesIssue: false + description: >- + Adds the following new fields that configure the Kubernetes Gloo Gateway pod : + - `kubeGateway.gatewayParameters.glooGateway.podTemplate.terminationGracePeriodSeconds` to specify the terminationGracePeriodSeconds. + - `kubeGateway.gatewayParameters.glooGateway.podTemplate.gracefulShutdown` to configure the graceful shutdown config for the envoy container. + - `kubeGateway.gatewayParameters.glooGateway.podTemplate.customLivenessProbe` to specify a custom liveness probe for the envoy container. + - `kubeGateway.gatewayParameters.glooGateway.podTemplate.livenessProbeEnabled` to enable the liveness probe. If the customLivenessProbe is not specified, a default liveness probe is set. + - `kubeGateway.gatewayParameters.glooGateway.podTemplate.customReadinessProbe` to specify a custom readiness probe for the envoy container. + - `kubeGateway.gatewayParameters.glooGateway.podTemplate.probes` to enable the readiness probe. If the customReadinessProbe is not specified, a default readiness probe is set. From 74de62bfd23a4751681464ba521c575783450883 Mon Sep 17 00:00:00 2001 From: David Jumani Date: Wed, 13 Nov 2024 13:23:55 -0500 Subject: [PATCH 06/19] codegen --- .../api/v1alpha1/gateway_parameters.md | 642 ++++++++++++++++++ 1 file changed, 642 insertions(+) diff --git a/docs/content/reference/api/github.com/solo-io/gloo/projects/gateway2/api/v1alpha1/gateway_parameters.md b/docs/content/reference/api/github.com/solo-io/gloo/projects/gateway2/api/v1alpha1/gateway_parameters.md index 21b69b9b825..700cad8eb9f 100644 --- a/docs/content/reference/api/github.com/solo-io/gloo/projects/gateway2/api/v1alpha1/gateway_parameters.md +++ b/docs/content/reference/api/github.com/solo-io/gloo/projects/gateway2/api/v1alpha1/gateway_parameters.md @@ -4697,6 +4697,13 @@ Resource Types:
false + + gracefulShutdown + object + +
+ + false imagePullSecrets []object @@ -4704,6 +4711,13 @@ Resource Types:
false + + livenessProbe + object + +
+ + false nodeSelector map[string]string @@ -4711,6 +4725,13 @@ Resource Types:
false + + readinessProbe + object + +
+ + false securityContext object @@ -4718,6 +4739,13 @@ Resource Types:
false + + terminationGracePeriodSeconds + integer + +
+ + false tolerations []object @@ -6087,6 +6115,40 @@ Resource Types: +### GatewayParameters.spec.kube.podTemplate.gracefulShutdown +[↩ Parent](#gatewayparametersspeckubepodtemplate) + + + + + + + + + + + + + + + + + + + + + + + + + +
NameTypeDescriptionRequired
enabledboolean +
+
false
sleepTimeSecondsinteger +
+
false
+ + ### GatewayParameters.spec.kube.podTemplate.imagePullSecrets[index] [↩ Parent](#gatewayparametersspeckubepodtemplate) @@ -6116,6 +6178,586 @@ Resource Types: +### GatewayParameters.spec.kube.podTemplate.livenessProbe +[↩ Parent](#gatewayparametersspeckubepodtemplate) + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
NameTypeDescriptionRequired
execobject +
+
false
failureThresholdinteger +
+
+ Format: int32
+
false
grpcobject +
+
false
httpGetobject +
+
false
initialDelaySecondsinteger +
+
+ Format: int32
+
false
periodSecondsinteger +
+
+ Format: int32
+
false
successThresholdinteger +
+
+ Format: int32
+
false
tcpSocketobject +
+
false
terminationGracePeriodSecondsinteger +
+
+ Format: int64
+
false
timeoutSecondsinteger +
+
+ Format: int32
+
false
+ + +### GatewayParameters.spec.kube.podTemplate.livenessProbe.exec +[↩ Parent](#gatewayparametersspeckubepodtemplatelivenessprobe) + + + + + + + + + + + + + + + + + + + + +
NameTypeDescriptionRequired
command[]string +
+
false
+ + +### GatewayParameters.spec.kube.podTemplate.livenessProbe.grpc +[↩ Parent](#gatewayparametersspeckubepodtemplatelivenessprobe) + + + + + + + + + + + + + + + + + + + + + + + + + +
NameTypeDescriptionRequired
portinteger +
+
+ Format: int32
+
true
servicestring +
+
+ Default:
+
false
+ + +### GatewayParameters.spec.kube.podTemplate.livenessProbe.httpGet +[↩ Parent](#gatewayparametersspeckubepodtemplatelivenessprobe) + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
NameTypeDescriptionRequired
portint or string +
+
true
hoststring +
+
false
httpHeaders[]object +
+
false
pathstring +
+
false
schemestring +
+
false
+ + +### GatewayParameters.spec.kube.podTemplate.livenessProbe.httpGet.httpHeaders[index] +[↩ Parent](#gatewayparametersspeckubepodtemplatelivenessprobehttpget) + + + + + + + + + + + + + + + + + + + + + + + + + +
NameTypeDescriptionRequired
namestring +
+
true
valuestring +
+
true
+ + +### GatewayParameters.spec.kube.podTemplate.livenessProbe.tcpSocket +[↩ Parent](#gatewayparametersspeckubepodtemplatelivenessprobe) + + + + + + + + + + + + + + + + + + + + + + + + + +
NameTypeDescriptionRequired
portint or string +
+
true
hoststring +
+
false
+ + +### GatewayParameters.spec.kube.podTemplate.readinessProbe +[↩ Parent](#gatewayparametersspeckubepodtemplate) + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
NameTypeDescriptionRequired
execobject +
+
false
failureThresholdinteger +
+
+ Format: int32
+
false
grpcobject +
+
false
httpGetobject +
+
false
initialDelaySecondsinteger +
+
+ Format: int32
+
false
periodSecondsinteger +
+
+ Format: int32
+
false
successThresholdinteger +
+
+ Format: int32
+
false
tcpSocketobject +
+
false
terminationGracePeriodSecondsinteger +
+
+ Format: int64
+
false
timeoutSecondsinteger +
+
+ Format: int32
+
false
+ + +### GatewayParameters.spec.kube.podTemplate.readinessProbe.exec +[↩ Parent](#gatewayparametersspeckubepodtemplatereadinessprobe) + + + + + + + + + + + + + + + + + + + + +
NameTypeDescriptionRequired
command[]string +
+
false
+ + +### GatewayParameters.spec.kube.podTemplate.readinessProbe.grpc +[↩ Parent](#gatewayparametersspeckubepodtemplatereadinessprobe) + + + + + + + + + + + + + + + + + + + + + + + + + +
NameTypeDescriptionRequired
portinteger +
+
+ Format: int32
+
true
servicestring +
+
+ Default:
+
false
+ + +### GatewayParameters.spec.kube.podTemplate.readinessProbe.httpGet +[↩ Parent](#gatewayparametersspeckubepodtemplatereadinessprobe) + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
NameTypeDescriptionRequired
portint or string +
+
true
hoststring +
+
false
httpHeaders[]object +
+
false
pathstring +
+
false
schemestring +
+
false
+ + +### GatewayParameters.spec.kube.podTemplate.readinessProbe.httpGet.httpHeaders[index] +[↩ Parent](#gatewayparametersspeckubepodtemplatereadinessprobehttpget) + + + + + + + + + + + + + + + + + + + + + + + + + +
NameTypeDescriptionRequired
namestring +
+
true
valuestring +
+
true
+ + +### GatewayParameters.spec.kube.podTemplate.readinessProbe.tcpSocket +[↩ Parent](#gatewayparametersspeckubepodtemplatereadinessprobe) + + + + + + + + + + + + + + + + + + + + + + + + + +
NameTypeDescriptionRequired
portint or string +
+
true
hoststring +
+
false
+ + ### GatewayParameters.spec.kube.podTemplate.securityContext [↩ Parent](#gatewayparametersspeckubepodtemplate) From 43266cdbe2b3d650b0a22b55712e6050bd711063 Mon Sep 17 00:00:00 2001 From: David Jumani Date: Wed, 13 Nov 2024 14:35:54 -0500 Subject: [PATCH 07/19] add merge --- projects/gateway2/deployer/merge.go | 132 ++++++++++++++++++++++++++++ 1 file changed, 132 insertions(+) diff --git a/projects/gateway2/deployer/merge.go b/projects/gateway2/deployer/merge.go index 7805e7404a0..608c0b36e7b 100644 --- a/projects/gateway2/deployer/merge.go +++ b/projects/gateway2/deployer/merge.go @@ -3,6 +3,7 @@ package deployer import ( "github.com/solo-io/gloo/projects/gateway2/api/v1alpha1" corev1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/util/intstr" ) // mergePointers will decide whether to use dst or src without dereferencing or recursing @@ -49,6 +50,15 @@ func deepMergeSlices[T any](dst, src []T) []T { return dst } +func overrideSlices[T any](dst, src []T) []T { + // nil src override means just use dst + if src == nil { + return dst + } + + return src +} + // Check against base value func mergeComparable[T comparable](dst, src T) T { var t T @@ -130,6 +140,10 @@ func deepMergePodTemplate(dst, src *v1alpha1.Pod) *v1alpha1.Pod { dst.NodeSelector = deepMergeMaps(dst.GetNodeSelector(), src.GetNodeSelector()) dst.Affinity = deepMergeAffinity(dst.GetAffinity(), src.GetAffinity()) dst.Tolerations = deepMergeSlices(dst.GetTolerations(), src.GetTolerations()) + dst.GracefulShutdown = deepMergeGracefulShutdown(dst.GetGracefulShutdown(), src.GetGracefulShutdown()) + dst.TerminationGracePeriodSeconds = mergePointers(dst.TerminationGracePeriodSeconds, src.TerminationGracePeriodSeconds) + dst.ReadinessProbe = deepMergeProbe(dst.GetReadinessProbe(), src.GetReadinessProbe()) + dst.LivenessProbe = deepMergeProbe(dst.GetLivenessProbe(), src.GetLivenessProbe()) return dst } @@ -290,6 +304,124 @@ func deepMergePodAntiAffinity(dst, src *corev1.PodAntiAffinity) *corev1.PodAntiA return dst } +func deepMergeProbe(dst, src *corev1.Probe) *corev1.Probe { + // nil src override means just use dst + if src == nil { + return dst + } + + if dst == nil { + return src + } + + dst.ProbeHandler = deepMergeProbeHandler(dst.ProbeHandler, src.ProbeHandler) + dst.InitialDelaySeconds = mergeComparable(dst.InitialDelaySeconds, src.InitialDelaySeconds) + dst.TimeoutSeconds = mergeComparable(dst.TimeoutSeconds, src.TimeoutSeconds) + dst.PeriodSeconds = mergeComparable(dst.PeriodSeconds, src.PeriodSeconds) + dst.SuccessThreshold = mergeComparable(dst.SuccessThreshold, src.SuccessThreshold) + dst.FailureThreshold = mergeComparable(dst.FailureThreshold, src.FailureThreshold) + dst.TerminationGracePeriodSeconds = mergePointers(dst.TerminationGracePeriodSeconds, src.TerminationGracePeriodSeconds) + + return dst +} + +func deepMergeProbeHandler(dst, src corev1.ProbeHandler) corev1.ProbeHandler { + dst.Exec = deepMergeExecAction(dst.Exec, src.Exec) + dst.HTTPGet = deepMergeHTTPGetAction(dst.HTTPGet, src.HTTPGet) + dst.TCPSocket = deepMergeTCPSocketAction(dst.TCPSocket, src.TCPSocket) + dst.GRPC = deepMergeGRPCAction(dst.GRPC, src.GRPC) + + return dst +} + +func deepMergeExecAction(dst, src *corev1.ExecAction) *corev1.ExecAction { + // nil src override means just use dst + if src == nil { + return dst + } + + if dst == nil { + return src + } + + // Don't merge the command string as that can break the entire probe + dst.Command = overrideSlices(dst.Command, src.Command) + + return dst +} + +func deepMergeHTTPGetAction(dst, src *corev1.HTTPGetAction) *corev1.HTTPGetAction { + // nil src override means just use dst + if src == nil { + return dst + } + + if dst == nil { + return src + } + + dst.Path = mergeComparable(dst.Path, src.Path) + dst.Port = mergeIntOrString(dst.Port, src.Port) + dst.Host = mergeComparable(dst.Host, src.Host) + dst.Scheme = mergeComparable(dst.Scheme, src.Scheme) + dst.HTTPHeaders = deepMergeSlices(dst.HTTPHeaders, src.HTTPHeaders) + + return dst +} + +func mergeIntOrString(dst, src intstr.IntOrString) intstr.IntOrString { + // Do not deep merge as this can cause a conflict between the name and number of the port to access on the container + return mergeComparable(dst, src) +} + +func deepMergeTCPSocketAction(dst, src *corev1.TCPSocketAction) *corev1.TCPSocketAction { + // nil src override means just use dst + if src == nil { + return dst + } + + if dst == nil { + return src + } + + dst.Port = mergeIntOrString(dst.Port, src.Port) + dst.Host = mergeComparable(dst.Host, src.Host) + + return dst +} + +func deepMergeGRPCAction(dst, src *corev1.GRPCAction) *corev1.GRPCAction { + // nil src override means just use dst + if src == nil { + return dst + } + + if dst == nil { + return src + } + + dst.Port = mergeComparable(dst.Port, src.Port) + dst.Service = mergePointers(dst.Service, src.Service) + + return dst +} + +func deepMergeGracefulShutdown(dst, src *v1alpha1.GracefulShutdownSpec) *v1alpha1.GracefulShutdownSpec { + // nil src override means just use dst + if src == nil { + return dst + } + + if dst == nil { + return src + } + + dst.Enabled = mergePointers(dst.Enabled, src.Enabled) + dst.SleepTimeSeconds = mergePointers(dst.SleepTimeSeconds, src.SleepTimeSeconds) + + return dst +} + func deepMergeService(dst, src *v1alpha1.Service) *v1alpha1.Service { // nil src override means just use dst if src == nil { From 4aa0e386da6da38b4d68dad1a6bd42c9e7805e2e Mon Sep 17 00:00:00 2001 From: David Jumani Date: Wed, 13 Nov 2024 20:35:27 -0500 Subject: [PATCH 08/19] add kubernetes tests --- .github/workflows/pr-kubernetes-tests.yaml | 2 +- pkg/utils/cmdutils/cmd.go | 11 ++ pkg/utils/cmdutils/local.go | 43 +++++- .../features/zero_downtime_rollout/suite.go | 126 ++++++++++++++++++ .../testdata/route-with-service.yaml | 56 ++++++++ .../testdata/service-for-route.yaml | 26 ++++ .../features/zero_downtime_rollout/types.go | 43 ++++++ test/kubernetes/e2e/tests/base/base_suite.go | 20 +++ .../profiles/kubernetes-gateway.yaml | 4 +- .../e2e/tests/zero_downtime_test.go | 52 ++++++++ .../e2e/tests/zero_downtime_tests.go | 12 ++ 11 files changed, 391 insertions(+), 4 deletions(-) create mode 100644 test/kubernetes/e2e/features/zero_downtime_rollout/suite.go create mode 100644 test/kubernetes/e2e/features/zero_downtime_rollout/testdata/route-with-service.yaml create mode 100644 test/kubernetes/e2e/features/zero_downtime_rollout/testdata/service-for-route.yaml create mode 100644 test/kubernetes/e2e/features/zero_downtime_rollout/types.go create mode 100644 test/kubernetes/e2e/tests/zero_downtime_test.go create mode 100644 test/kubernetes/e2e/tests/zero_downtime_tests.go diff --git a/.github/workflows/pr-kubernetes-tests.yaml b/.github/workflows/pr-kubernetes-tests.yaml index d4af78516fd..7e7a9690bc7 100644 --- a/.github/workflows/pr-kubernetes-tests.yaml +++ b/.github/workflows/pr-kubernetes-tests.yaml @@ -84,7 +84,7 @@ jobs: # October 10, 2024: 12 minutes - cluster-name: 'cluster-six' go-test-args: '-v -timeout=25m' - go-test-run-regex: '^TestDiscoveryWatchlabels$$|^TestK8sGatewayNoValidation$$|^TestHelm$$|^TestHelmSettings$$|^TestK8sGatewayAws$$' + go-test-run-regex: '^TestDiscoveryWatchlabels$$|^TestK8sGatewayNoValidation$$|^TestHelm$$|^TestHelmSettings$$|^TestK8sGatewayAws$$|^TestZeroDowntimeRollout$$' # In our PR tests, we run the suite of tests using the upper ends of versions that we claim to support # The versions should mirror: https://docs.solo.io/gloo-edge/latest/reference/support/ diff --git a/pkg/utils/cmdutils/cmd.go b/pkg/utils/cmdutils/cmd.go index 1532d11df3a..b69980b384a 100644 --- a/pkg/utils/cmdutils/cmd.go +++ b/pkg/utils/cmdutils/cmd.go @@ -11,6 +11,17 @@ type Cmd interface { // It returns a *RunError if there is any error, nil otherwise Run() *RunError + // Start starts the command but doesn't block + // If the returned error is non-nil, it should be of type *RunError + Start() *RunError + + // Wait waits for the command to finish + // If the returned error is non-nil, it should be of type *RunError + Wait() *RunError + + // Output returns the output of the executed command + Output() []byte + // WithEnv sets the Env variables for the Cmd // Each entry should be of the form "key=value" WithEnv(...string) Cmd diff --git a/pkg/utils/cmdutils/local.go b/pkg/utils/cmdutils/local.go index ed0509e0b77..13d371c1990 100644 --- a/pkg/utils/cmdutils/local.go +++ b/pkg/utils/cmdutils/local.go @@ -29,8 +29,10 @@ type LocalCmder struct{} // Command returns a Cmd which includes the running process's `Environment` func (c *LocalCmder) Command(ctx context.Context, name string, arg ...string) Cmd { + var combinedOutput threadsafe.Buffer cmd := &LocalCmd{ - Cmd: exec.CommandContext(ctx, name, arg...), + Cmd: exec.CommandContext(ctx, name, arg...), + combinedOutput: &combinedOutput, } // By default, assign the env variables for the command @@ -41,6 +43,7 @@ func (c *LocalCmder) Command(ctx context.Context, name string, arg ...string) Cm // LocalCmd wraps os/exec.Cmd, implementing the cmdutils.Cmd interface type LocalCmd struct { *exec.Cmd + combinedOutput *threadsafe.Buffer } // WithEnv sets env @@ -93,3 +96,41 @@ func (cmd *LocalCmd) Run() *RunError { } return nil } + +// Start starts the command but doesn't block +// If the returned error is non-nil, it should be of type *RunError +func (cmd *LocalCmd) Start() *RunError { + + cmd.Stdout = io.MultiWriter(cmd.Stdout, cmd.combinedOutput) + cmd.Stderr = io.MultiWriter(cmd.Stderr, cmd.combinedOutput) + + if err := cmd.Cmd.Start(); err != nil { + return &RunError{ + command: cmd.Args, + output: cmd.combinedOutput.Bytes(), + inner: err, + stackTrace: errors.WithStack(err), + } + } + return nil +} + +// Wait waits for the command to finish +// If the returned error is non-nil, it should be of type *RunError +func (cmd *LocalCmd) Wait() *RunError { + if err := cmd.Cmd.Wait(); err != nil { + return &RunError{ + command: cmd.Args, + output: cmd.combinedOutput.Bytes(), + inner: err, + stackTrace: errors.WithStack(err), + } + } + return nil +} + +// Output returns the output of the command +// If the returned error is non-nil, it should be of type *RunError +func (cmd *LocalCmd) Output() []byte { + return cmd.combinedOutput.Bytes() +} diff --git a/test/kubernetes/e2e/features/zero_downtime_rollout/suite.go b/test/kubernetes/e2e/features/zero_downtime_rollout/suite.go new file mode 100644 index 00000000000..1890dda86ea --- /dev/null +++ b/test/kubernetes/e2e/features/zero_downtime_rollout/suite.go @@ -0,0 +1,126 @@ +package zero_downtime_rollout + +import ( + "context" + "net/http" + "time" + + . "github.com/onsi/gomega" + "github.com/stretchr/testify/suite" + + "github.com/solo-io/gloo/pkg/utils/kubeutils" + "github.com/solo-io/gloo/pkg/utils/requestutils/curl" + testmatchers "github.com/solo-io/gloo/test/gomega/matchers" + "github.com/solo-io/gloo/test/kubernetes/e2e" + "github.com/solo-io/gloo/test/kubernetes/e2e/defaults" + "github.com/solo-io/gloo/test/kubernetes/e2e/tests/base" +) + +type testingSuite struct { + *base.BaseTestingSuite +} + +func NewTestingSuite(ctx context.Context, testInst *e2e.TestInstallation) suite.TestingSuite { + return &testingSuite{ + base.NewBaseTestingSuite(ctx, testInst, e2e.MustTestHelper(ctx, testInst), base.SimpleTestCase{}, zeroDowntimeTestCases), + } +} + +func (s *testingSuite) TestZeroDowntimeRollout() { + // Ensure the gloo gateway pod is up and running + s.TestInstallation.Assertions.EventuallyRunningReplicas(s.Ctx, glooProxyObjectMeta, Equal(1)) + s.TestInstallation.Assertions.AssertEventualCurlResponse( + s.Ctx, + defaults.CurlPodExecOpt, + []curl.Option{ + curl.WithHost(kubeutils.ServiceFQDN(proxyService.ObjectMeta)), + curl.WithHostHeader("example.com"), + }, + &testmatchers.HttpResponse{ + StatusCode: http.StatusOK, + }) + + // Send traffic to the gloo gateway pod while we restart the deployment + // Run this for 30s which is long enough to restart the deployment since there's no easy way + // to stop this command once the test is over + // This executes 600 req @ 4 req/sec = 15s (2 * terminationGracePeriodSeconds (5) + buffer) + // kubectl exec -n hey hey -- hey -disable-keepalive -c 4 -q 10 --cpus 1 -n 1200 -m GET -t 1 -host example.com http://gloo-proxy-gw.default.svc.cluster.local:8080 + args := []string{"exec", "-n", "hey", "hey", "--", "hey", "-disable-keepalive", "-c", "4", "-q", "10", "--cpus", "1", "-n", "600", "-m", "GET", "-t", "1", "-host", "example.com", "http://gloo-proxy-gw.default.svc.cluster.local:8080"} + + var err error + cmd := s.TestHelper.Cli.Command(s.Ctx, args...) + err = cmd.Start() + Expect(err).ToNot(HaveOccurred()) + + // Restart the deployment. There should be no downtime since the gloo gateway pod should have the readiness probes configured + err = s.TestHelper.RestartDeploymentAndWait(s.Ctx, "gloo-proxy-gw") + Expect(err).ToNot(HaveOccurred()) + + time.Sleep(1 * time.Second) + + // We're just flexing at this point + err = s.TestHelper.RestartDeploymentAndWait(s.Ctx, "gloo-proxy-gw") + Expect(err).ToNot(HaveOccurred()) + + now := time.Now() + err = cmd.Wait() + Expect(err).ToNot(HaveOccurred()) + + // Since there's no easy way to stop the command after we've restarted the deployment, + // we ensure that at least 1 second has passed since we began sending traffic to the gloo gateway pod + after := int(time.Now().Sub(now).Abs().Seconds()) + s.GreaterOrEqual(after, 1) + + // Summary: + // Total: 30.0113 secs + // Slowest: 0.0985 secs + // Fastest: 0.0025 secs + // Average: 0.0069 secs + // Requests/sec: 39.9849 + // + // Total data: 738000 bytes + // Size/request: 615 bytes + // + // Response time histogram: + // 0.003 [1] | + // 0.012 [1165] |■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■ + // 0.022 [24] |■ + // 0.031 [4] | + // 0.041 [0] | + // 0.050 [0] | + // 0.060 [0] | + // 0.070 [0] | + // 0.079 [0] | + // 0.089 [1] | + // 0.098 [5] | + // + // Latency distribution: + // 10% in 0.0036 secs + // 25% in 0.0044 secs + // 50% in 0.0060 secs + // 75% in 0.0082 secs + // 90% in 0.0099 secs + // 95% in 0.0109 secs + // 99% in 0.0187 secs + // + // Details (average, fastest, slowest): + // DNS+dialup: 0.0028 secs, 0.0025 secs, 0.0985 secs + // DNS-lookup: 0.0016 secs, 0.0001 secs, 0.0116 secs + // req write: 0.0003 secs, 0.0001 secs, 0.0041 secs + // resp wait: 0.0034 secs, 0.0012 secs, 0.0782 secs + // resp read: 0.0003 secs, 0.0001 secs, 0.0039 secs + // + // Status code distribution: + // [200] 600 responses + // + // ***** Should not contain something like this ***** + // Status code distribution: + // [200] 579 responses + // Error distribution: + // [17] Get http://gloo-proxy-gw.default.svc.cluster.local:8080: dial tcp 10.96.177.91:8080: connection refused + // [4] Get http://gloo-proxy-gw.default.svc.cluster.local:8080: net/http: request canceled while waiting for connection + + // Verify that there were no errors + Expect(cmd.Output()).To(ContainSubstring("[200] 600 responses")) + Expect(cmd.Output()).ToNot(ContainSubstring("Error distribution")) +} diff --git a/test/kubernetes/e2e/features/zero_downtime_rollout/testdata/route-with-service.yaml b/test/kubernetes/e2e/features/zero_downtime_rollout/testdata/route-with-service.yaml new file mode 100644 index 00000000000..490dd34b4c3 --- /dev/null +++ b/test/kubernetes/e2e/features/zero_downtime_rollout/testdata/route-with-service.yaml @@ -0,0 +1,56 @@ +kind: Gateway +apiVersion: gateway.networking.k8s.io/v1 +metadata: + name: gw +spec: + gatewayClassName: gloo-gateway + listeners: + - protocol: HTTP + port: 8080 + name: http + allowedRoutes: + namespaces: + from: Same +--- +apiVersion: gateway.networking.k8s.io/v1 +kind: HTTPRoute +metadata: + name: example-route +spec: + parentRefs: + - name: gw + hostnames: + - "example.com" + rules: + - backendRefs: + - name: example-svc + port: 8080 +--- +apiVersion: v1 +kind: Namespace +metadata: + name: hey +--- +apiVersion: v1 +kind: Pod +metadata: + name: hey + namespace: hey + labels: + app: hey + version: v1 + app.kubernetes.io/name: hey +spec: + containers: + - name: hey + image: ricoli/hey + imagePullPolicy: IfNotPresent + command: + - "tail" + - "-f" + - "/dev/null" + resources: + requests: + cpu: "100m" + limits: + cpu: "200m" diff --git a/test/kubernetes/e2e/features/zero_downtime_rollout/testdata/service-for-route.yaml b/test/kubernetes/e2e/features/zero_downtime_rollout/testdata/service-for-route.yaml new file mode 100644 index 00000000000..8944dc7be68 --- /dev/null +++ b/test/kubernetes/e2e/features/zero_downtime_rollout/testdata/service-for-route.yaml @@ -0,0 +1,26 @@ +--- +apiVersion: v1 +kind: Service +metadata: + name: example-svc +spec: + selector: + app.kubernetes.io/name: nginx + ports: + - protocol: TCP + port: 8080 + targetPort: http-web-svc +--- +apiVersion: v1 +kind: Pod +metadata: + name: nginx + labels: + app.kubernetes.io/name: nginx +spec: + containers: + - name: nginx + image: nginx:stable + ports: + - containerPort: 80 + name: http-web-svc diff --git a/test/kubernetes/e2e/features/zero_downtime_rollout/types.go b/test/kubernetes/e2e/features/zero_downtime_rollout/types.go new file mode 100644 index 00000000000..286e5865239 --- /dev/null +++ b/test/kubernetes/e2e/features/zero_downtime_rollout/types.go @@ -0,0 +1,43 @@ +package zero_downtime_rollout + +import ( + "path/filepath" + + "github.com/solo-io/gloo/test/kubernetes/e2e/defaults" + "github.com/solo-io/gloo/test/kubernetes/e2e/tests/base" + "sigs.k8s.io/controller-runtime/pkg/client" + + "github.com/solo-io/skv2/codegen/util" + + appsv1 "k8s.io/api/apps/v1" + corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" +) + +var ( + routeWithServiceManifest = filepath.Join(util.MustGetThisDir(), "testdata", "route-with-service.yaml") + serviceManifest = filepath.Join(util.MustGetThisDir(), "testdata", "service-for-route.yaml") + + glooProxyObjectMeta = metav1.ObjectMeta{ + Name: "gloo-proxy-gw", + Namespace: "default", + } + proxyDeployment = &appsv1.Deployment{ObjectMeta: glooProxyObjectMeta} + proxyService = &corev1.Service{ObjectMeta: glooProxyObjectMeta} + + heyPod = &corev1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "hey", + Namespace: "hey", + }, + } + + zeroDowntimeTestCases = map[string]*base.TestCase{ + "TestZeroDowntimeRollout": { + SimpleTestCase: base.SimpleTestCase{ + Manifests: []string{defaults.CurlPodManifest, serviceManifest, routeWithServiceManifest}, + Resources: []client.Object{proxyDeployment, proxyService, defaults.CurlPod, heyPod}, + }, + }, + } +) diff --git a/test/kubernetes/e2e/tests/base/base_suite.go b/test/kubernetes/e2e/tests/base/base_suite.go index 6acf8bf8560..7d5dd011bce 100644 --- a/test/kubernetes/e2e/tests/base/base_suite.go +++ b/test/kubernetes/e2e/tests/base/base_suite.go @@ -2,6 +2,7 @@ package base import ( "context" + "fmt" "slices" "time" @@ -9,6 +10,8 @@ import ( "github.com/solo-io/gloo/test/kubernetes/e2e" "github.com/solo-io/gloo/test/kubernetes/testutils/helper" "github.com/stretchr/testify/suite" + corev1 "k8s.io/api/core/v1" + v1 "k8s.io/apimachinery/pkg/apis/meta/v1" "sigs.k8s.io/controller-runtime/pkg/client" ) @@ -86,6 +89,14 @@ func (s *BaseTestingSuite) SetupSuite() { // Ensure the resources exist if s.Setup.Resources != nil { s.TestInstallation.Assertions.EventuallyObjectsExist(s.Ctx, s.Setup.Resources...) + + for _, resource := range s.Setup.Resources { + if pod, ok := resource.(*corev1.Pod); ok { + s.TestInstallation.Assertions.EventuallyPodsRunning(s.Ctx, pod.Namespace, v1.ListOptions{ + LabelSelector: fmt.Sprintf("app.kubernetes.io/name=%s", pod.Name), + }) + } + } } if s.Setup.UpgradeValues != "" { @@ -166,6 +177,15 @@ func (s *BaseTestingSuite) BeforeTest(suiteName, testName string) { }, 10*time.Second, 1*time.Second).Should(gomega.Succeed(), "can apply "+manifest) } s.TestInstallation.Assertions.EventuallyObjectsExist(s.Ctx, testCase.Resources...) + + for _, resource := range testCase.Resources { + if pod, ok := resource.(*corev1.Pod); ok { + s.TestInstallation.Assertions.EventuallyPodsRunning(s.Ctx, pod.Namespace, v1.ListOptions{ + LabelSelector: fmt.Sprintf("app.kubernetes.io/name=%s", pod.Name), + }) + } + } + } func (s *BaseTestingSuite) AfterTest(suiteName, testName string) { diff --git a/test/kubernetes/e2e/tests/manifests/profiles/kubernetes-gateway.yaml b/test/kubernetes/e2e/tests/manifests/profiles/kubernetes-gateway.yaml index 89067f667a6..eaeb8c1e1c9 100644 --- a/test/kubernetes/e2e/tests/manifests/profiles/kubernetes-gateway.yaml +++ b/test/kubernetes/e2e/tests/manifests/profiles/kubernetes-gateway.yaml @@ -17,10 +17,10 @@ kubeGateway: gatewayParameters: glooGateway: podTemplate: - terminationGracePeriodSeconds: 7 + terminationGracePeriodSeconds: 5 gracefulShutdown: enabled: true - sleepTimeSeconds: 5 + sleepTimeSeconds: 2 probes: true livenessProbeEnabled: true diff --git a/test/kubernetes/e2e/tests/zero_downtime_test.go b/test/kubernetes/e2e/tests/zero_downtime_test.go new file mode 100644 index 00000000000..b7eca8c41a7 --- /dev/null +++ b/test/kubernetes/e2e/tests/zero_downtime_test.go @@ -0,0 +1,52 @@ +package tests_test + +import ( + "context" + "os" + "testing" + "time" + + "github.com/solo-io/gloo/pkg/utils/envutils" + "github.com/solo-io/gloo/test/kubernetes/e2e" + . "github.com/solo-io/gloo/test/kubernetes/e2e/tests" + "github.com/solo-io/gloo/test/kubernetes/testutils/gloogateway" + "github.com/solo-io/gloo/test/testutils" +) + +func TestZeroDowntimeRollout(t *testing.T) { + ctx := context.Background() + installNs, nsEnvPredefined := envutils.LookupOrDefault(testutils.InstallNamespace, "zero-downtime") + testInstallation := e2e.CreateTestInstallation( + t, + &gloogateway.Context{ + InstallNamespace: installNs, + ProfileValuesManifestFile: e2e.KubernetesGatewayProfilePath, + ValuesManifestFile: e2e.EmptyValuesManifestPath, + }, + ) + + testHelper := e2e.MustTestHelper(ctx, testInstallation) + + // Set the env to the install namespace if it is not already set + if !nsEnvPredefined { + os.Setenv(testutils.InstallNamespace, installNs) + } + + // We register the cleanup function _before_ we actually perform the installation. + // This allows us to uninstall Gloo Gateway, in case the original installation only completed partially + t.Cleanup(func() { + if !nsEnvPredefined { + os.Unsetenv(testutils.InstallNamespace) + } + if t.Failed() { + testInstallation.PreFailHandler(ctx) + } + + testInstallation.UninstallGlooGatewayWithTestHelper(ctx, testHelper) + }) + + // Install Gloo Gateway with correct validation settings + testInstallation.InstallGlooGatewayWithTestHelper(ctx, testHelper, 5*time.Minute) + + ZeroDowntimeRolloutSuiteRunner().Run(ctx, t, testInstallation) +} diff --git a/test/kubernetes/e2e/tests/zero_downtime_tests.go b/test/kubernetes/e2e/tests/zero_downtime_tests.go new file mode 100644 index 00000000000..3a502380169 --- /dev/null +++ b/test/kubernetes/e2e/tests/zero_downtime_tests.go @@ -0,0 +1,12 @@ +package tests + +import ( + "github.com/solo-io/gloo/test/kubernetes/e2e" + "github.com/solo-io/gloo/test/kubernetes/e2e/features/zero_downtime_rollout" +) + +func ZeroDowntimeRolloutSuiteRunner() e2e.SuiteRunner { + zeroDowntimeSuiteRunner := e2e.NewSuiteRunner(false) + zeroDowntimeSuiteRunner.Register("ZeroDowntimeRollout", zero_downtime_rollout.NewTestingSuite) + return zeroDowntimeSuiteRunner +} From 88a361bdb2aeb20fbfd1389af2f40dcef0103812 Mon Sep 17 00:00:00 2001 From: David Jumani Date: Thu, 14 Nov 2024 12:42:14 -0500 Subject: [PATCH 09/19] fixes --- .github/workflows/pr-kubernetes-tests.yaml | 2 +- test/kubernetes/e2e/tests/base/base_suite.go | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/.github/workflows/pr-kubernetes-tests.yaml b/.github/workflows/pr-kubernetes-tests.yaml index 7e7a9690bc7..15a3c3519ea 100644 --- a/.github/workflows/pr-kubernetes-tests.yaml +++ b/.github/workflows/pr-kubernetes-tests.yaml @@ -81,7 +81,7 @@ jobs: go-test-args: '-v -timeout=35m' go-test-run-regex: '^TestFullEnvoyValidation$$|^TestValidationStrict$$|^TestValidationAlwaysAccept$$|^TestTransformationValidationDisabled$$|^TestGloomtlsGatewayEdgeGateway$$|^TestWatchNamespaceSelector$$' - # October 10, 2024: 12 minutes + # October 10, 2024: 18 minutes - cluster-name: 'cluster-six' go-test-args: '-v -timeout=25m' go-test-run-regex: '^TestDiscoveryWatchlabels$$|^TestK8sGatewayNoValidation$$|^TestHelm$$|^TestHelmSettings$$|^TestK8sGatewayAws$$|^TestZeroDowntimeRollout$$' diff --git a/test/kubernetes/e2e/tests/base/base_suite.go b/test/kubernetes/e2e/tests/base/base_suite.go index 7d5dd011bce..edc41b01c95 100644 --- a/test/kubernetes/e2e/tests/base/base_suite.go +++ b/test/kubernetes/e2e/tests/base/base_suite.go @@ -11,6 +11,7 @@ import ( "github.com/solo-io/gloo/test/kubernetes/testutils/helper" "github.com/stretchr/testify/suite" corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" v1 "k8s.io/apimachinery/pkg/apis/meta/v1" "sigs.k8s.io/controller-runtime/pkg/client" ) @@ -92,7 +93,7 @@ func (s *BaseTestingSuite) SetupSuite() { for _, resource := range s.Setup.Resources { if pod, ok := resource.(*corev1.Pod); ok { - s.TestInstallation.Assertions.EventuallyPodsRunning(s.Ctx, pod.Namespace, v1.ListOptions{ + s.TestInstallation.Assertions.EventuallyPodsRunning(s.Ctx, pod.Namespace, metav1.ListOptions{ LabelSelector: fmt.Sprintf("app.kubernetes.io/name=%s", pod.Name), }) } From 15d209a8fd078df3c582825695fdb2611a4950c5 Mon Sep 17 00:00:00 2001 From: David Jumani Date: Thu, 14 Nov 2024 12:54:26 -0500 Subject: [PATCH 10/19] fix codegen --- test/kubernetes/e2e/tests/base/base_suite.go | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/test/kubernetes/e2e/tests/base/base_suite.go b/test/kubernetes/e2e/tests/base/base_suite.go index edc41b01c95..e83da8c3f2a 100644 --- a/test/kubernetes/e2e/tests/base/base_suite.go +++ b/test/kubernetes/e2e/tests/base/base_suite.go @@ -12,7 +12,6 @@ import ( "github.com/stretchr/testify/suite" corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - v1 "k8s.io/apimachinery/pkg/apis/meta/v1" "sigs.k8s.io/controller-runtime/pkg/client" ) @@ -181,7 +180,7 @@ func (s *BaseTestingSuite) BeforeTest(suiteName, testName string) { for _, resource := range testCase.Resources { if pod, ok := resource.(*corev1.Pod); ok { - s.TestInstallation.Assertions.EventuallyPodsRunning(s.Ctx, pod.Namespace, v1.ListOptions{ + s.TestInstallation.Assertions.EventuallyPodsRunning(s.Ctx, pod.Namespace, metav1.ListOptions{ LabelSelector: fmt.Sprintf("app.kubernetes.io/name=%s", pod.Name), }) } From 6bbe51afb201b25c01c5c964c43c3f18c124d5fe Mon Sep 17 00:00:00 2001 From: David Jumani Date: Thu, 14 Nov 2024 15:23:52 -0500 Subject: [PATCH 11/19] increase duration --- .../e2e/features/zero_downtime_rollout/suite.go | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/test/kubernetes/e2e/features/zero_downtime_rollout/suite.go b/test/kubernetes/e2e/features/zero_downtime_rollout/suite.go index 1890dda86ea..948bb0d1835 100644 --- a/test/kubernetes/e2e/features/zero_downtime_rollout/suite.go +++ b/test/kubernetes/e2e/features/zero_downtime_rollout/suite.go @@ -43,9 +43,9 @@ func (s *testingSuite) TestZeroDowntimeRollout() { // Send traffic to the gloo gateway pod while we restart the deployment // Run this for 30s which is long enough to restart the deployment since there's no easy way // to stop this command once the test is over - // This executes 600 req @ 4 req/sec = 15s (2 * terminationGracePeriodSeconds (5) + buffer) + // This executes 800 req @ 4 req/sec = 20s (3 * terminationGracePeriodSeconds (5) + buffer) // kubectl exec -n hey hey -- hey -disable-keepalive -c 4 -q 10 --cpus 1 -n 1200 -m GET -t 1 -host example.com http://gloo-proxy-gw.default.svc.cluster.local:8080 - args := []string{"exec", "-n", "hey", "hey", "--", "hey", "-disable-keepalive", "-c", "4", "-q", "10", "--cpus", "1", "-n", "600", "-m", "GET", "-t", "1", "-host", "example.com", "http://gloo-proxy-gw.default.svc.cluster.local:8080"} + args := []string{"exec", "-n", "hey", "hey", "--", "hey", "-disable-keepalive", "-c", "4", "-q", "10", "--cpus", "1", "-n", "800", "-m", "GET", "-t", "1", "-host", "example.com", "http://gloo-proxy-gw.default.svc.cluster.local:8080"} var err error cmd := s.TestHelper.Cli.Command(s.Ctx, args...) @@ -111,16 +111,16 @@ func (s *testingSuite) TestZeroDowntimeRollout() { // resp read: 0.0003 secs, 0.0001 secs, 0.0039 secs // // Status code distribution: - // [200] 600 responses + // [200] 800 responses // // ***** Should not contain something like this ***** // Status code distribution: - // [200] 579 responses + // [200] 779 responses // Error distribution: // [17] Get http://gloo-proxy-gw.default.svc.cluster.local:8080: dial tcp 10.96.177.91:8080: connection refused // [4] Get http://gloo-proxy-gw.default.svc.cluster.local:8080: net/http: request canceled while waiting for connection // Verify that there were no errors - Expect(cmd.Output()).To(ContainSubstring("[200] 600 responses")) + Expect(cmd.Output()).To(ContainSubstring("[200] 800 responses")) Expect(cmd.Output()).ToNot(ContainSubstring("Error distribution")) } From 6b5438bba0c0a876e9dc441b6e825fd38a4aa95c Mon Sep 17 00:00:00 2001 From: David Jumani Date: Fri, 15 Nov 2024 11:19:02 -0500 Subject: [PATCH 12/19] add zero downtime profile --- .../tests/manifests/profiles/kubernetes-gateway.yaml | 10 ---------- .../e2e/tests/manifests/zero-downtime-rollout.yaml | 11 +++++++++++ test/kubernetes/e2e/tests/zero_downtime_test.go | 2 +- 3 files changed, 12 insertions(+), 11 deletions(-) create mode 100644 test/kubernetes/e2e/tests/manifests/zero-downtime-rollout.yaml diff --git a/test/kubernetes/e2e/tests/manifests/profiles/kubernetes-gateway.yaml b/test/kubernetes/e2e/tests/manifests/profiles/kubernetes-gateway.yaml index eaeb8c1e1c9..4941924918d 100644 --- a/test/kubernetes/e2e/tests/manifests/profiles/kubernetes-gateway.yaml +++ b/test/kubernetes/e2e/tests/manifests/profiles/kubernetes-gateway.yaml @@ -13,16 +13,6 @@ global: # Configuration for the Kubernetes Gateway integration in Gloo Gateway kubeGateway: enabled: true - # Enable the probes to ensure zero downtime - gatewayParameters: - glooGateway: - podTemplate: - terminationGracePeriodSeconds: 5 - gracefulShutdown: - enabled: true - sleepTimeSeconds: 2 - probes: true - livenessProbeEnabled: true # Configuration for the statically deployed gateway-proxy that ships by default with Gloo Gateway gatewayProxies: diff --git a/test/kubernetes/e2e/tests/manifests/zero-downtime-rollout.yaml b/test/kubernetes/e2e/tests/manifests/zero-downtime-rollout.yaml new file mode 100644 index 00000000000..7a726cd0954 --- /dev/null +++ b/test/kubernetes/e2e/tests/manifests/zero-downtime-rollout.yaml @@ -0,0 +1,11 @@ +kubeGateway: + # Enable the probes to ensure zero downtime + gatewayParameters: + glooGateway: + podTemplate: + terminationGracePeriodSeconds: 5 + gracefulShutdown: + enabled: true + sleepTimeSeconds: 2 + probes: true + livenessProbeEnabled: true diff --git a/test/kubernetes/e2e/tests/zero_downtime_test.go b/test/kubernetes/e2e/tests/zero_downtime_test.go index b7eca8c41a7..541c458686e 100644 --- a/test/kubernetes/e2e/tests/zero_downtime_test.go +++ b/test/kubernetes/e2e/tests/zero_downtime_test.go @@ -21,7 +21,7 @@ func TestZeroDowntimeRollout(t *testing.T) { &gloogateway.Context{ InstallNamespace: installNs, ProfileValuesManifestFile: e2e.KubernetesGatewayProfilePath, - ValuesManifestFile: e2e.EmptyValuesManifestPath, + ValuesManifestFile: e2e.ManifestPath("zero-downtime-rollout.yaml"), }, ) From 45c2f97ab8537ae0bcc09881ab5b54508c7d0ece Mon Sep 17 00:00:00 2001 From: David Jumani Date: Fri, 15 Nov 2024 12:26:20 -0500 Subject: [PATCH 13/19] remove default liveness probe --- .../add-readiness-liveness-probe.yaml | 2 +- .../gloo/templates/43-gatewayparameters.yaml | 12 +----------- install/test/k8sgateway_test.go | 18 ++---------------- 3 files changed, 4 insertions(+), 28 deletions(-) diff --git a/changelog/v1.18.0-beta35/add-readiness-liveness-probe.yaml b/changelog/v1.18.0-beta35/add-readiness-liveness-probe.yaml index fae9796e072..7e11efe24d7 100644 --- a/changelog/v1.18.0-beta35/add-readiness-liveness-probe.yaml +++ b/changelog/v1.18.0-beta35/add-readiness-liveness-probe.yaml @@ -11,6 +11,6 @@ changelog: - `kubeGateway.gatewayParameters.glooGateway.podTemplate.terminationGracePeriodSeconds` to specify the terminationGracePeriodSeconds. - `kubeGateway.gatewayParameters.glooGateway.podTemplate.gracefulShutdown` to configure the graceful shutdown config for the envoy container. - `kubeGateway.gatewayParameters.glooGateway.podTemplate.customLivenessProbe` to specify a custom liveness probe for the envoy container. - - `kubeGateway.gatewayParameters.glooGateway.podTemplate.livenessProbeEnabled` to enable the liveness probe. If the customLivenessProbe is not specified, a default liveness probe is set. + - `kubeGateway.gatewayParameters.glooGateway.podTemplate.livenessProbeEnabled` to enable the liveness probe. If the customLivenessProbe is not specified, no liveness probe is set. - `kubeGateway.gatewayParameters.glooGateway.podTemplate.customReadinessProbe` to specify a custom readiness probe for the envoy container. - `kubeGateway.gatewayParameters.glooGateway.podTemplate.probes` to enable the readiness probe. If the customReadinessProbe is not specified, a default readiness probe is set. diff --git a/install/helm/gloo/templates/43-gatewayparameters.yaml b/install/helm/gloo/templates/43-gatewayparameters.yaml index c55457e9c3a..cfceb340263 100644 --- a/install/helm/gloo/templates/43-gatewayparameters.yaml +++ b/install/helm/gloo/templates/43-gatewayparameters.yaml @@ -95,19 +95,9 @@ spec: failureThreshold: 2 {{- end}}{{/* if $gg.podTemplate.customReadinessProbe */}} {{- if $gg.podTemplate.livenessProbeEnabled }} - livenessProbe: {{- if $gg.podTemplate.customLivenessProbe }} + livenessProbe: {{ toYaml $gg.podTemplate.customLivenessProbe | indent 8}} -{{- else }} - exec: - command: - - wget - - -O - - /dev/null - - 127.0.0.1:19000/server_info - initialDelaySeconds: 3 - periodSeconds: 10 - failureThreshold: 3 {{- end }}{{/*if $gg.podTemplate.customLivenessProbe*/}} {{- end }}{{/*if $gg.podTemplate.livenessProbeEnabled*/}} {{- end }}{{/*if $gg.podTemplate.probes*/}} diff --git a/install/test/k8sgateway_test.go b/install/test/k8sgateway_test.go index 85ed653a25b..9e91dca0897 100644 --- a/install/test/k8sgateway_test.go +++ b/install/test/k8sgateway_test.go @@ -386,21 +386,6 @@ var _ = Describe("Kubernetes Gateway API integration", func() { It("sets the default values of the probes", func() { gwp := getDefaultGatewayParameters(testManifest) gwpPT := gwp.Spec.Kube.PodTemplate - Expect(*gwpPT.LivenessProbe).To(BeEquivalentTo(corev1.Probe{ - ProbeHandler: corev1.ProbeHandler{ - Exec: &corev1.ExecAction{ - Command: []string{ - "wget", - "-O", - "/dev/null", - "127.0.0.1:19000/server_info", - }, - }, - }, - InitialDelaySeconds: 3, - PeriodSeconds: 10, - FailureThreshold: 3, - })) Expect(*gwpPT.ReadinessProbe).To(BeEquivalentTo(corev1.Probe{ ProbeHandler: corev1.ProbeHandler{ HTTPGet: &corev1.HTTPGetAction{ @@ -415,7 +400,8 @@ var _ = Describe("Kubernetes Gateway API integration", func() { PeriodSeconds: 5, FailureThreshold: 2, })) - + // There is no default liveness probe + Expect(gwpPT.LivenessProbe).To(BeNil()) }) }) From fe410b03e1b4ca9ad95041627933faff24896613 Mon Sep 17 00:00:00 2001 From: David Jumani Date: Fri, 15 Nov 2024 12:50:55 -0500 Subject: [PATCH 14/19] more clenaup --- install/helm/gloo/generate/values.go | 1 - install/helm/gloo/templates/43-gatewayparameters.yaml | 2 -- install/test/k8sgateway_test.go | 2 -- .../e2e/tests/manifests/zero-downtime-rollout.yaml | 11 ++++++++++- 4 files changed, 10 insertions(+), 6 deletions(-) diff --git a/install/helm/gloo/generate/values.go b/install/helm/gloo/generate/values.go index bd5bf835615..ba38ffb9650 100644 --- a/install/helm/gloo/generate/values.go +++ b/install/helm/gloo/generate/values.go @@ -348,7 +348,6 @@ type GatewayParamsPodTemplate struct { GracefulShutdown *GracefulShutdownSpec `json:"gracefulShutdown,omitempty"` TerminationGracePeriodSeconds *int `json:"terminationGracePeriodSeconds,omitempty" desc:"Time in seconds to wait for the pod to terminate gracefully. See [kubernetes docs](https://kubernetes.io/docs/reference/kubernetes-api/workload-resources/pod-v1/#istio-lifecycle) for more info."` Probes *bool `json:"probes,omitempty" desc:"Set to true to enable a readiness probe (default is false). Then, you can also enable a liveness probe."` - LivenessProbeEnabled *bool `json:"livenessProbeEnabled,omitempty" desc:"Set to true to enable a liveness probe (default is false)."` CustomReadinessProbe *corev1.Probe `json:"customReadinessProbe,omitempty"` CustomLivenessProbe *corev1.Probe `json:"customLivenessProbe,omitempty"` } diff --git a/install/helm/gloo/templates/43-gatewayparameters.yaml b/install/helm/gloo/templates/43-gatewayparameters.yaml index cfceb340263..b7cb803fa48 100644 --- a/install/helm/gloo/templates/43-gatewayparameters.yaml +++ b/install/helm/gloo/templates/43-gatewayparameters.yaml @@ -94,12 +94,10 @@ spec: periodSeconds: 5 failureThreshold: 2 {{- end}}{{/* if $gg.podTemplate.customReadinessProbe */}} -{{- if $gg.podTemplate.livenessProbeEnabled }} {{- if $gg.podTemplate.customLivenessProbe }} livenessProbe: {{ toYaml $gg.podTemplate.customLivenessProbe | indent 8}} {{- end }}{{/*if $gg.podTemplate.customLivenessProbe*/}} -{{- end }}{{/*if $gg.podTemplate.livenessProbeEnabled*/}} {{- end }}{{/*if $gg.podTemplate.probes*/}} {{- end }}{{/*if $gg.podTemplate */}} {{- if $gg.sdsContainer }} diff --git a/install/test/k8sgateway_test.go b/install/test/k8sgateway_test.go index 9e91dca0897..b5a322d4670 100644 --- a/install/test/k8sgateway_test.go +++ b/install/test/k8sgateway_test.go @@ -377,7 +377,6 @@ var _ = Describe("Kubernetes Gateway API integration", func() { BeforeEach(func() { extraValuesArgs := []string{ "kubeGateway.gatewayParameters.glooGateway.podTemplate.probes=true", - "kubeGateway.gatewayParameters.glooGateway.podTemplate.livenessProbeEnabled=true", } valuesArgs = append(valuesArgs, extraValuesArgs...) @@ -415,7 +414,6 @@ var _ = Describe("Kubernetes Gateway API integration", func() { "kubeGateway.gatewayParameters.glooGateway.podTemplate.customReadinessProbe.failureThreshold=1", "kubeGateway.gatewayParameters.glooGateway.podTemplate.customReadinessProbe.initialDelaySeconds=2", "kubeGateway.gatewayParameters.glooGateway.podTemplate.customReadinessProbe.periodSeconds=3", - "kubeGateway.gatewayParameters.glooGateway.podTemplate.livenessProbeEnabled=true", "kubeGateway.gatewayParameters.glooGateway.podTemplate.customLivenessProbe.exec.command[0]=wget", "kubeGateway.gatewayParameters.glooGateway.podTemplate.customLivenessProbe.exec.command[1]=-O", "kubeGateway.gatewayParameters.glooGateway.podTemplate.customLivenessProbe.exec.command[2]=/dev/null", diff --git a/test/kubernetes/e2e/tests/manifests/zero-downtime-rollout.yaml b/test/kubernetes/e2e/tests/manifests/zero-downtime-rollout.yaml index 7a726cd0954..e1548309080 100644 --- a/test/kubernetes/e2e/tests/manifests/zero-downtime-rollout.yaml +++ b/test/kubernetes/e2e/tests/manifests/zero-downtime-rollout.yaml @@ -8,4 +8,13 @@ kubeGateway: enabled: true sleepTimeSeconds: 2 probes: true - livenessProbeEnabled: true + customLivenessProbe: + exec: + command: + - wget + - -O + - /dev/null + - 127.0.0.1:19000/server_info + initialDelaySeconds: 3 + periodSeconds: 10 + failureThreshold: 3 From 9ae80f16dd8d28ffd9eb008abc2626814f9afa49 Mon Sep 17 00:00:00 2001 From: David Jumani Date: Fri, 15 Nov 2024 14:13:09 -0500 Subject: [PATCH 15/19] cleanup --- docs/content/reference/values.txt | 1 - 1 file changed, 1 deletion(-) diff --git a/docs/content/reference/values.txt b/docs/content/reference/values.txt index 7b243dafd20..f0c00e7e30b 100644 --- a/docs/content/reference/values.txt +++ b/docs/content/reference/values.txt @@ -188,7 +188,6 @@ |kubeGateway.gatewayParameters.glooGateway.podTemplate.gracefulShutdown.sleepTimeSeconds|int||Time (in seconds) for the preStop hook to wait before allowing Envoy to terminate| |kubeGateway.gatewayParameters.glooGateway.podTemplate.terminationGracePeriodSeconds|int||Time in seconds to wait for the pod to terminate gracefully. See [kubernetes docs](https://kubernetes.io/docs/reference/kubernetes-api/workload-resources/pod-v1/#istio-lifecycle) for more info.| |kubeGateway.gatewayParameters.glooGateway.podTemplate.probes|bool||Set to true to enable a readiness probe (default is false). Then, you can also enable a liveness probe.| -|kubeGateway.gatewayParameters.glooGateway.podTemplate.livenessProbeEnabled|bool||Set to true to enable a liveness probe (default is false).| |kubeGateway.gatewayParameters.glooGateway.podTemplate.customReadinessProbe.exec.command[]|string||| |kubeGateway.gatewayParameters.glooGateway.podTemplate.customReadinessProbe.httpGet.path|string||| |kubeGateway.gatewayParameters.glooGateway.podTemplate.customReadinessProbe.httpGet.port|int64||| From 4c8c16a8b93374b81a15f6a79436006adb6cad9d Mon Sep 17 00:00:00 2001 From: David Jumani Date: Fri, 15 Nov 2024 14:51:16 -0500 Subject: [PATCH 16/19] update changelog --- changelog/v1.18.0-beta35/add-readiness-liveness-probe.yaml | 1 - 1 file changed, 1 deletion(-) diff --git a/changelog/v1.18.0-beta35/add-readiness-liveness-probe.yaml b/changelog/v1.18.0-beta35/add-readiness-liveness-probe.yaml index 7e11efe24d7..cf713da819b 100644 --- a/changelog/v1.18.0-beta35/add-readiness-liveness-probe.yaml +++ b/changelog/v1.18.0-beta35/add-readiness-liveness-probe.yaml @@ -11,6 +11,5 @@ changelog: - `kubeGateway.gatewayParameters.glooGateway.podTemplate.terminationGracePeriodSeconds` to specify the terminationGracePeriodSeconds. - `kubeGateway.gatewayParameters.glooGateway.podTemplate.gracefulShutdown` to configure the graceful shutdown config for the envoy container. - `kubeGateway.gatewayParameters.glooGateway.podTemplate.customLivenessProbe` to specify a custom liveness probe for the envoy container. - - `kubeGateway.gatewayParameters.glooGateway.podTemplate.livenessProbeEnabled` to enable the liveness probe. If the customLivenessProbe is not specified, no liveness probe is set. - `kubeGateway.gatewayParameters.glooGateway.podTemplate.customReadinessProbe` to specify a custom readiness probe for the envoy container. - `kubeGateway.gatewayParameters.glooGateway.podTemplate.probes` to enable the readiness probe. If the customReadinessProbe is not specified, a default readiness probe is set. From 93ba4f6ed80ed90c13a89b1a9150c7a54d6621a5 Mon Sep 17 00:00:00 2001 From: David Jumani Date: Fri, 15 Nov 2024 14:53:05 -0500 Subject: [PATCH 17/19] update changelog again --- changelog/v1.18.0-beta35/add-readiness-liveness-probe.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/changelog/v1.18.0-beta35/add-readiness-liveness-probe.yaml b/changelog/v1.18.0-beta35/add-readiness-liveness-probe.yaml index cf713da819b..d01891d054d 100644 --- a/changelog/v1.18.0-beta35/add-readiness-liveness-probe.yaml +++ b/changelog/v1.18.0-beta35/add-readiness-liveness-probe.yaml @@ -10,6 +10,6 @@ changelog: Adds the following new fields that configure the Kubernetes Gloo Gateway pod : - `kubeGateway.gatewayParameters.glooGateway.podTemplate.terminationGracePeriodSeconds` to specify the terminationGracePeriodSeconds. - `kubeGateway.gatewayParameters.glooGateway.podTemplate.gracefulShutdown` to configure the graceful shutdown config for the envoy container. - - `kubeGateway.gatewayParameters.glooGateway.podTemplate.customLivenessProbe` to specify a custom liveness probe for the envoy container. + - `kubeGateway.gatewayParameters.glooGateway.podTemplate.customLivenessProbe` to specify a custom liveness probe for the envoy container. No default liveness probe is set - `kubeGateway.gatewayParameters.glooGateway.podTemplate.customReadinessProbe` to specify a custom readiness probe for the envoy container. - - `kubeGateway.gatewayParameters.glooGateway.podTemplate.probes` to enable the readiness probe. If the customReadinessProbe is not specified, a default readiness probe is set. + - `kubeGateway.gatewayParameters.glooGateway.podTemplate.probes` to enable the readiness probe. If the customReadinessProbe is not specified, a default readiness probe is set. No default liveness probe is set. From e818e1114e3494a0ede90b87825047bf1cb8018f Mon Sep 17 00:00:00 2001 From: David Jumani Date: Mon, 18 Nov 2024 06:42:43 -0500 Subject: [PATCH 18/19] Update projects/gateway2/helm/gloo-gateway/templates/gateway/proxy-deployment.yaml Co-authored-by: Jenny Shu <28537278+jenshu@users.noreply.github.com> --- .../helm/gloo-gateway/templates/gateway/proxy-deployment.yaml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/projects/gateway2/helm/gloo-gateway/templates/gateway/proxy-deployment.yaml b/projects/gateway2/helm/gloo-gateway/templates/gateway/proxy-deployment.yaml index 200263fec18..80f1802c7ce 100644 --- a/projects/gateway2/helm/gloo-gateway/templates/gateway/proxy-deployment.yaml +++ b/projects/gateway2/helm/gloo-gateway/templates/gateway/proxy-deployment.yaml @@ -97,8 +97,7 @@ spec: livenessProbe: {{ toYaml $gateway.livenessProbe | indent 10}} {{- end }}{{/*if $gateway.livenessProbe*/}} -{{- if $gateway.gracefulShutdown }} -{{- if $gateway.gracefulShutdown.enabled }} +{{- if ($gateway.gracefulShutdown).enabled }} lifecycle: preStop: exec: From 8077c25ae2a58567642e19c2fe6fdf0524ae7d79 Mon Sep 17 00:00:00 2001 From: David Jumani Date: Mon, 18 Nov 2024 17:33:29 +0530 Subject: [PATCH 19/19] add docs --- install/helm/gloo/generate/values.go | 12 ++++++------ .../templates/gateway/proxy-deployment.yaml | 3 +-- 2 files changed, 7 insertions(+), 8 deletions(-) diff --git a/install/helm/gloo/generate/values.go b/install/helm/gloo/generate/values.go index ba38ffb9650..2aa2f193b5f 100644 --- a/install/helm/gloo/generate/values.go +++ b/install/helm/gloo/generate/values.go @@ -339,17 +339,17 @@ type GatewayParameters struct { Stats *GatewayParamsStatsConfig `json:"stats,omitempty" desc:"Config used to manage the stats endpoints exposed on the deployed proxies"` AIExtension *GatewayParamsAIExtension `json:"aiExtension,omitempty" desc:"Config used to manage the Gloo Gateway AI extension."` FloatingUserId *bool `json:"floatingUserId,omitempty" desc:"If true, allows the cluster to dynamically assign a user ID for the processes running in the container. Default is false."` - PodTemplate *GatewayParamsPodTemplate `json:"podTemplate,omitempty"` + PodTemplate *GatewayParamsPodTemplate `json:"podTemplate,omitempty" desc:"The template used to generate the gatewayParams pod"` // TODO(npolshak): Add support for GlooMtls } // GatewayProxyPodTemplate contains the Helm API available to configure the PodTemplate on the gateway Deployment type GatewayParamsPodTemplate struct { - GracefulShutdown *GracefulShutdownSpec `json:"gracefulShutdown,omitempty"` + GracefulShutdown *GracefulShutdownSpec `json:"gracefulShutdown,omitempty" desc:"If enabled, it calls the /'healthcheck/fail' endpoint on the envoy container prior to shutdown. This is useful for draining a server prior to shutting it down or doing a full"` TerminationGracePeriodSeconds *int `json:"terminationGracePeriodSeconds,omitempty" desc:"Time in seconds to wait for the pod to terminate gracefully. See [kubernetes docs](https://kubernetes.io/docs/reference/kubernetes-api/workload-resources/pod-v1/#istio-lifecycle) for more info."` Probes *bool `json:"probes,omitempty" desc:"Set to true to enable a readiness probe (default is false). Then, you can also enable a liveness probe."` - CustomReadinessProbe *corev1.Probe `json:"customReadinessProbe,omitempty"` - CustomLivenessProbe *corev1.Probe `json:"customLivenessProbe,omitempty"` + CustomReadinessProbe *corev1.Probe `json:"customReadinessProbe,omitempty" desc:"Defines a custom readiness probe. If not provided, a default one is set"` + CustomLivenessProbe *corev1.Probe `json:"customLivenessProbe,omitempty" desc:"Defines a custom liveness probe. If not specified, no default liveness probe is set"` } type GatewayParamsStatsConfig struct { @@ -573,7 +573,7 @@ type CertGenCron struct { type GatewayProxy struct { Kind *GatewayProxyKind `json:"kind,omitempty" desc:"value to determine how the gateway proxy is deployed"` Namespace *string `json:"namespace,omitempty" desc:"Namespace in which to deploy this gateway proxy. Defaults to the value of Settings.WriteNamespace"` - PodTemplate *GatewayProxyPodTemplate `json:"podTemplate,omitempty"` + PodTemplate *GatewayProxyPodTemplate `json:"podTemplate,omitempty" desc:"The template used to generate the gateway proxy pod"` ConfigMap *ConfigMap `json:"configMap,omitempty"` CustomStaticLayer interface{} `json:"customStaticLayer,omitempty" desc:"Configure the static layer for global overrides to Envoy behavior, as defined in the Envoy bootstrap YAML. You cannot use this setting to set overload or upstream layers. For more info, see the Envoy docs. https://www.envoyproxy.io/docs/envoy/latest/configuration/operations/runtime#config-runtime"` GlobalDownstreamMaxConnections *uint32 `json:"globalDownstreamMaxConnections,omitempty" desc:"the number of concurrent connections needed. limit used to protect against exhausting file descriptors on host machine"` @@ -692,7 +692,7 @@ type GatewayProxyPodTemplate struct { FloatingUserId *bool `json:"floatingUserId,omitempty" desc:"If true, allows the cluster to dynamically assign a user ID for the processes running in the container. If podSecurityContext is defined, this value is not applied."` RunAsUser *float64 `json:"runAsUser,omitempty" desc:"Explicitly set the user ID for the processes in the container to run as. Default is 10101. If a SecurityContext is defined for the pod or container, this value is not applied for the pod/container."` FsGroup *float64 `json:"fsGroup,omitempty" desc:"Explicitly set the group ID for volume ownership. Default is 10101. If podSecurityContext is defined, this value is not applied."` - GracefulShutdown *GracefulShutdownSpec `json:"gracefulShutdown,omitempty"` + GracefulShutdown *GracefulShutdownSpec `json:"gracefulShutdown,omitempty" desc:"If enabled, it calls the /'healthcheck/fail' endpoint on the envoy container prior to shutdown. This is useful for draining a server prior to shutting it down or doing a full"` TerminationGracePeriodSeconds *int `json:"terminationGracePeriodSeconds,omitempty" desc:"Time in seconds to wait for the pod to terminate gracefully. See [kubernetes docs](https://kubernetes.io/docs/reference/kubernetes-api/workload-resources/pod-v1/#istio-lifecycle) for more info."` CustomReadinessProbe *corev1.Probe `json:"customReadinessProbe,omitempty"` CustomLivenessProbe *corev1.Probe `json:"customLivenessProbe,omitempty"` diff --git a/projects/gateway2/helm/gloo-gateway/templates/gateway/proxy-deployment.yaml b/projects/gateway2/helm/gloo-gateway/templates/gateway/proxy-deployment.yaml index 80f1802c7ce..54c55071971 100644 --- a/projects/gateway2/helm/gloo-gateway/templates/gateway/proxy-deployment.yaml +++ b/projects/gateway2/helm/gloo-gateway/templates/gateway/proxy-deployment.yaml @@ -105,8 +105,7 @@ spec: - /bin/sh - -c - wget --post-data "" -O /dev/null 127.0.0.1:19000/healthcheck/fail; sleep {{ $gateway.gracefulShutdown.sleepTimeSeconds | default "10" }} -{{- end}}{{/*if $gateway.gracefulShutdown.enabled*/}} -{{- end}}{{/*if $gateway.gracefulShutdown*/}} +{{- end}}{{/*if ($gateway.gracefulShutdown).enabled */}} {{- if $gateway.resources }} resources: {{- toYaml $gateway.resources | nindent 10 }}