diff --git a/UPGRADE.md b/UPGRADE.md index 3a550a2779f..11d78fe4a0b 100644 --- a/UPGRADE.md +++ b/UPGRADE.md @@ -3,6 +3,16 @@ Upgrades In this document we will try to document relevant upgrade notes for the MinIO Operator. +v6.0.0 +--- + +This release is focused on a variety of improvements and bug fixes. Mainly reducing the number of times we need to do a +rolling restart for a MinIO Tenant, for example when the MinIO Operator is upgraded or downgraded. + +This release introduces a readiness probe to prevent kubernetes from routing traffic to a MinIO pod that is not ready + +> ⚠️ Upgrading to v6.0.0 will cause all pods to restart upon upgrade. + v5.0.0 --- @@ -40,7 +50,8 @@ kubectl -n $NAMESPACE get svc $TENANT_NAME-prometheus-hl-svc -o yaml > $TENANT_N After exporting these objects, remove `.metadata.ownerReferences` for all these files. -After upgrading, to have the MinIO Tenant keep using these services, just add the following environment variables to `.spec.env` +After upgrading, to have the MinIO Tenant keep using these services, just add the following environment variables +to `.spec.env` ```yaml - name: MINIO_LOG_QUERY_AUTH_TOKEN @@ -56,7 +67,6 @@ After upgrading, to have the MinIO Tenant keep using these services, just add th value: http://-prometheus-hl-svc:9090 ``` - v4.4.5 --- @@ -101,7 +111,7 @@ securityContext: This scenario is automatically handled by the operator, however if the tenant is updated from a pre-stored source (i.e: a yaml file) which is missing the added `securityContext` this problem may arise again, so update your stored yamls -respectively. +respectively. v4.2.2 to v4.2.3 --- @@ -110,7 +120,8 @@ Before upgrading the `MinIO Operator` you need to make the following changes to - Update your current `MinIO image` to the latest version in the tenant spec. - Make sure every `pool` in `tenant.spec.pools` explicitly set a `securityContext` if not configured already, if this is - the first time you are configuring a `securityContext` then your `MinIO` pods are running as root, and you need to use: + the first time you are configuring a `securityContext` then your `MinIO` pods are running as root, and you need to + use: ```yaml securityContext: @@ -124,7 +135,7 @@ Before upgrading the `MinIO Operator` you need to make the following changes to ```yaml image: "minio/minio:$(LATEST-VERSION)" - ... + ... pools: - servers: 4 name: "pool-0" @@ -176,7 +187,7 @@ Before upgrading the `MinIO Operator` you need to make the following changes to ```yaml image: "minio/minio:$(LATEST-VERSION)" - ... + ... zones: - servers: 4 name: "zone-0" @@ -195,29 +206,30 @@ Before upgrading the `MinIO Operator` you need to make the following changes to runAsGroup: 0 runAsNonRoot: false fsGroup: 0 - - servers: 4 - name: "zone-1" - volumesPerServer: 4 - volumeClaimTemplate: - metadata: - name: data - spec: - accessModes: - - ReadWriteOnce - resources: - requests: - storage: 1Ti - securityContext: - runAsUser: 0 - runAsGroup: 0 - runAsNonRoot: false - fsGroup: 0 + - servers: 4 + name: "zone-1" + volumesPerServer: 4 + volumeClaimTemplate: + metadata: + name: data + spec: + accessModes: + - ReadWriteOnce + resources: + requests: + storage: 1Ti + securityContext: + runAsUser: 0 + runAsGroup: 0 + runAsNonRoot: false + fsGroup: 0 ``` You can make all the changes directly via `kubectl edit tenants $(TENANT-NAME) -n $(NAMESPACE)` or edit your `tenant.yaml` and apply the changes: `kubectl apply -f tenant.yaml`. -Failing to apply this changes will cause some issues during the upgrade such as the tenants not able to provision because +Failing to apply this changes will cause some issues during the upgrade such as the tenants not able to provision +because of wrong `persistent volume claims` (this happens if you don't add the zone name) or MinIO not able to `read/write` on existing volumes (this happens if you don't add the right `securityContext`) or they will take too long to start. @@ -236,7 +248,8 @@ existing tenant. # Upgrade MinIO Operator via Helm Charts -Make sure your current version of the `tenants.minio.min.io` `CRD` includes the necessary `labels` and `annotations` for `Helm` +Make sure your current version of the `tenants.minio.min.io` `CRD` includes the necessary `labels` and `annotations` +for `Helm` to perform the upgrade: ```bash diff --git a/pkg/controller/minio-services.go b/pkg/controller/minio-services.go index f3dbd9a2109..61a43048b3c 100644 --- a/pkg/controller/minio-services.go +++ b/pkg/controller/minio-services.go @@ -160,6 +160,7 @@ func (c *Controller) checkMinIOHLSvc(ctx context.Context, tenant *miniov2.Tenant hlSvc.ObjectMeta.Annotations = expectedHlSvc.ObjectMeta.Annotations hlSvc.ObjectMeta.Labels = expectedHlSvc.ObjectMeta.Labels hlSvc.Spec.Ports = expectedHlSvc.Spec.Ports + hlSvc.Spec.PublishNotReadyAddresses = expectedHlSvc.Spec.PublishNotReadyAddresses // update the selector hlSvc.Spec.Selector = expectedHlSvc.Spec.Selector diff --git a/pkg/controller/upgrades.go b/pkg/controller/upgrades.go index 2f7b304636e..5913ad6c71f 100644 --- a/pkg/controller/upgrades.go +++ b/pkg/controller/upgrades.go @@ -22,6 +22,7 @@ import ( "github.com/minio/operator/pkg/controller/legacy" corev1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/types" "github.com/blang/semver/v4" "github.com/hashicorp/go-version" @@ -41,8 +42,9 @@ const ( version430 = "v4.3.0" version45 = "v4.5" version500 = "v5.0.0" + version600 = "v6.0.0" // currentVersion will point to the latest released update version - currentVersion = version500 + currentVersion = version600 ) // Legacy const @@ -62,12 +64,13 @@ func (c *Controller) checkForUpgrades(ctx context.Context, tenant *miniov2.Tenan version430: c.upgrade430, version45: c.upgrade45, version500: c.upgrade500, + version600: c.upgrade600, } // if tenant has no version we mark it with latest version upgrade released if tenant.Status.SyncVersion == "" { tenant.Status.SyncVersion = currentVersion - return c.updateTenantSyncVersion(ctx, tenant, version500) + return c.updateTenantSyncVersion(ctx, tenant, version600) } // if the version is empty, upgrades might not been applied, we apply them all @@ -84,6 +87,7 @@ func (c *Controller) checkForUpgrades(ctx context.Context, tenant *miniov2.Tenan version430, version45, version500, + version600, } for _, v := range versionsThatNeedUpgrades { vp, _ := version.NewVersion(v) @@ -414,3 +418,25 @@ func (c *Controller) upgrade500(ctx context.Context, tenant *miniov2.Tenant) (*m } return c.updateTenantSyncVersion(ctx, tenant, version500) } + +// Upgrades the sync version to v6.0.0 +// since we are adding `publishNotReadyAddresses` to the headless service, we need to restart all pods +func (c *Controller) upgrade600(ctx context.Context, tenant *miniov2.Tenant) (*miniov2.Tenant, error) { + nsName := types.NamespacedName{Namespace: tenant.Namespace, Name: tenant.Name} + // Check MinIO Headless Service used for internode communication + err := c.checkMinIOHLSvc(ctx, tenant, nsName) + if err != nil { + klog.V(2).Infof("error consolidating headless service: %s", err.Error()) + return nil, err + } + // restart all pods for this tenant + listOpts := metav1.ListOptions{ + LabelSelector: fmt.Sprintf("%s=%s", miniov2.TenantLabel, tenant.Name), + } + err = c.kubeClientSet.CoreV1().Pods(tenant.Namespace).DeleteCollection(ctx, metav1.DeleteOptions{}, listOpts) + if err != nil { + klog.V(2).Infof("error deleting pods: %s", err.Error()) + return nil, err + } + return c.updateTenantSyncVersion(ctx, tenant, version600) +} diff --git a/pkg/resources/services/service.go b/pkg/resources/services/service.go index 3e5da06d394..2a7a6021e7c 100644 --- a/pkg/resources/services/service.go +++ b/pkg/resources/services/service.go @@ -58,9 +58,10 @@ func NewClusterIPForMinIO(t *miniov2.Tenant) *corev1.Service { Annotations: annotations, }, Spec: corev1.ServiceSpec{ - Ports: []corev1.ServicePort{minioPort}, - Selector: t.MinIOPodLabels(), - Type: corev1.ServiceTypeClusterIP, + Ports: []corev1.ServicePort{minioPort}, + Selector: t.MinIOPodLabels(), + Type: corev1.ServiceTypeClusterIP, + PublishNotReadyAddresses: false, }, } // check if the service is meant to be exposed @@ -171,10 +172,11 @@ func NewHeadlessForMinIO(t *miniov2.Tenant) *corev1.Service { OwnerReferences: t.OwnerRef(), }, Spec: corev1.ServiceSpec{ - Ports: ports, - Selector: t.MinIOPodLabels(), - Type: corev1.ServiceTypeClusterIP, - ClusterIP: corev1.ClusterIPNone, + Ports: ports, + Selector: t.MinIOPodLabels(), + Type: corev1.ServiceTypeClusterIP, + ClusterIP: corev1.ClusterIPNone, + PublishNotReadyAddresses: true, }, } diff --git a/pkg/resources/statefulsets/minio-statefulset.go b/pkg/resources/statefulsets/minio-statefulset.go index 159367dba4c..17f492203ea 100644 --- a/pkg/resources/statefulsets/minio-statefulset.go +++ b/pkg/resources/statefulsets/minio-statefulset.go @@ -21,6 +21,8 @@ import ( "strconv" "strings" + "k8s.io/apimachinery/pkg/util/intstr" + "github.com/minio/operator/pkg/certs" "github.com/minio/operator/pkg/common" @@ -54,10 +56,6 @@ func minioEnvironmentVars(t *miniov2.Tenant, skipEnvVars map[string][]byte, opVe Name: "MINIO_UPDATE_MINISIGN_PUBKEY", Value: "RWTx5Zr1tiHQLwG9keckT0c45M3AGeHD6IvimQHpyRywVWGbP1aVSGav", }, - "MINIO_OPERATOR_VERSION": { - Name: "MINIO_OPERATOR_VERSION", - Value: opVersion, - }, "MINIO_PROMETHEUS_JOB_ID": { Name: "MINIO_PROMETHEUS_JOB_ID", Value: t.PrometheusConfigJobName(), @@ -909,6 +907,25 @@ func getInitContainer(t *miniov2.Tenant, pool *miniov2.Pool) corev1.Container { } func getSideCarContainer(t *miniov2.Tenant, pool *miniov2.Pool) corev1.Container { + scheme := corev1.URISchemeHTTP + + readinessProbe := &corev1.Probe{ + ProbeHandler: corev1.ProbeHandler{ + HTTPGet: &corev1.HTTPGetAction{ + Path: "/ready", + Port: intstr.IntOrString{ + IntVal: 4444, + }, + // Host: "localhost", + Scheme: scheme, + HTTPHeaders: nil, + }, + }, + InitialDelaySeconds: 5, + PeriodSeconds: 1, + FailureThreshold: 1, + } + sidecarContainer := corev1.Container{ Name: "sidecar", Image: getSidecarImage(), @@ -929,6 +946,7 @@ func getSideCarContainer(t *miniov2.Tenant, pool *miniov2.Pool) corev1.Container CfgVolumeMount, }, SecurityContext: poolContainerSecurityContext(pool), + ReadinessProbe: readinessProbe, } if t.Spec.SideCars != nil && t.Spec.SideCars.Resources != nil { sidecarContainer.Resources = *t.Spec.SideCars.Resources diff --git a/sidecar/DEVELOPMENT.md b/sidecar/DEVELOPMENT.md new file mode 100644 index 00000000000..9b613214a47 --- /dev/null +++ b/sidecar/DEVELOPMENT.md @@ -0,0 +1,18 @@ +# MinIO Operator Sidecar + +This document provides information on how to build and test the sidecar container. + +# Testing + +Build this project into a container image and run it with the following command: + +```shell +TAG=miniodev/operator-sidecar:sc GOOS=linux make docker +``` + +Patch the MinIO Operator deployment to include the sidecar container via the `OPERATOR_SIDECAR_IMAGE` environment +variable: + +```shell +kubectl patch deployment minio-operator -n minio-operator --type='json' -p='[{"op": "add", "path": "/spec/template/spec/containers/1", "value": {"name": "sidecar", "image": "miniodev/operator-sidecar:sc"}}]' +``` \ No newline at end of file diff --git a/sidecar/pkg/common/generate_config.go b/sidecar/pkg/common/generate_config.go new file mode 100644 index 00000000000..4330b007e9b --- /dev/null +++ b/sidecar/pkg/common/generate_config.go @@ -0,0 +1,54 @@ +// This file is part of MinIO Operator +// Copyright (c) 2023 MinIO, Inc. +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU Affero General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Affero General Public License for more details. +// +// You should have received a copy of the GNU Affero General Public License +// along with this program. If not, see . + +package common + +import ( + "fmt" + "log" + "os" + "strings" + + miniov2 "github.com/minio/operator/pkg/apis/minio.min.io/v2" + "github.com/minio/operator/pkg/resources/statefulsets" +) + +// AttachGeneratedConfig attaches the generated config to the file contents which will be stored in /tmp/minio/config.env +func AttachGeneratedConfig(tenant *miniov2.Tenant, fileContents string) string { + args, err := GetTenantArgs(tenant) + if err != nil { + log.Println(err) + os.Exit(1) + } + + fileContents = fileContents + fmt.Sprintf("export MINIO_ARGS=\"%s\"\n", args) + + return fileContents +} + +// GetTenantArgs returns the arguments for the tenant based on the tenants they have +func GetTenantArgs(tenant *miniov2.Tenant) (string, error) { + if tenant == nil { + return "", fmt.Errorf("tenant is nil") + } + // Validate the MinIO Tenant + if err := tenant.Validate(); err != nil { + log.Println(err) + return "", err + } + args := strings.Join(statefulsets.GetContainerArgs(tenant, ""), " ") + return args, nil +} diff --git a/sidecar/pkg/sidecar/sidecar_utils.go b/sidecar/pkg/sidecar/sidecar_utils.go index 746cde2130f..e40e4859705 100644 --- a/sidecar/pkg/sidecar/sidecar_utils.go +++ b/sidecar/pkg/sidecar/sidecar_utils.go @@ -25,6 +25,9 @@ import ( "strings" "time" + common2 "github.com/minio/operator/sidecar/pkg/common" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + v2 "github.com/minio/operator/pkg/apis/minio.min.io/v2" clientset "github.com/minio/operator/pkg/client/clientset/versioned" minioInformers "github.com/minio/operator/pkg/client/informers/externalversions" @@ -65,8 +68,18 @@ func StartSideCar(tenantName string, secretName string) { klog.Fatalf("Error building MinIO clientset: %s", err.Error()) } - controller := NewSideCarController(kubeClient, controllerClient, tenantName, secretName) + namespace := v2.GetNSFromFile() + // get the only tenant in this namespace + tenant, err := controllerClient.MinioV2().Tenants(namespace).Get(context.Background(), tenantName, metav1.GetOptions{}) + if err != nil { + log.Fatal(err) + } + + tenant.EnsureDefaults() + + controller := NewSideCarController(kubeClient, controllerClient, namespace, tenantName, secretName) controller.ws = configureWebhookServer(controller) + controller.probeServer = configureProbesServer(controller, tenant.TLS()) stopControllerCh := make(chan struct{}) @@ -84,6 +97,14 @@ func StartSideCar(tenantName string, secretName string) { } }() + go func() { + if err = controller.probeServer.ListenAndServe(); err != nil { + // if the web server exits, + klog.Error(err) + close(stopControllerCh) + } + }() + <-stopControllerCh } @@ -99,12 +120,11 @@ type Controller struct { namespace string informerFactory informers.SharedInformerFactory ws *http.Server + probeServer *http.Server } // NewSideCarController returns an instance of Controller with the provided clients -func NewSideCarController(kubeClient *kubernetes.Clientset, controllerClient *clientset.Clientset, tenantName string, secretName string) *Controller { - namespace := v2.GetNSFromFile() - +func NewSideCarController(kubeClient *kubernetes.Clientset, controllerClient *clientset.Clientset, namespace string, tenantName string, secretName string) *Controller { factory := informers.NewSharedInformerFactoryWithOptions(kubeClient, time.Hour*1, informers.WithNamespace(namespace)) secretInformer := factory.Core().V1().Secrets() @@ -201,13 +221,14 @@ func (c *Controller) regenCfg(tenantName string, namespace string) { func (c *Controller) regenCfgWithCfg(tenantName string, namespace string, fileContents string) { ctx := context.Background() - args, err := validator.GetTenantArgs(ctx, c.controllerClient, tenantName, namespace) + tenant, err := c.controllerClient.MinioV2().Tenants(namespace).Get(ctx, tenantName, metav1.GetOptions{}) if err != nil { - log.Println(err) + log.Println("could not get tenant", err) return } + tenant.EnsureDefaults() - fileContents = fileContents + fmt.Sprintf("export MINIO_ARGS=\"%s\"\n", args) + fileContents = common2.AttachGeneratedConfig(tenant, fileContents) err = os.WriteFile(v2.CfgFile, []byte(fileContents), 0o644) if err != nil { diff --git a/sidecar/pkg/sidecar/webhook_server.go b/sidecar/pkg/sidecar/webhook_server.go index 0ba622a11b1..e30bc51aa0a 100644 --- a/sidecar/pkg/sidecar/webhook_server.go +++ b/sidecar/pkg/sidecar/webhook_server.go @@ -17,6 +17,8 @@ package sidecar import ( + "crypto/tls" + "fmt" "net/http" "time" @@ -55,3 +57,61 @@ func configureWebhookServer(c *Controller) *http.Server { return s } + +func configureProbesServer(c *Controller, tenantTLS bool) *http.Server { + router := mux.NewRouter().SkipClean(true).UseEncodedPath() + + router.Methods(http.MethodGet). + Path("/ready"). + HandlerFunc(readinessHandler(tenantTLS)) + + router.NotFoundHandler = http.NotFoundHandler() + + s := &http.Server{ + Addr: "0.0.0.0:4444", + Handler: router, + ReadTimeout: time.Minute, + WriteTimeout: time.Minute, + MaxHeaderBytes: 1 << 20, + } + + return s +} + +func readinessHandler(tenantTLS bool) func(w http.ResponseWriter, r *http.Request) { + return func(w http.ResponseWriter, r *http.Request) { + schema := "https" + if !tenantTLS { + schema = "http" + } + // we only check against the local instance of MinIO + url := schema + "://localhost:9000" + request, err := http.NewRequest("GET", url, nil) + if err != nil { + http.Error(w, fmt.Sprintf("Failed to create request: %s", err), http.StatusInternalServerError) + return + } + + // we do insecure skip verify because we are checking against the local instance and don't care for the + // certificate + client := &http.Client{ + Timeout: time.Millisecond * 500, + Transport: &http.Transport{ + TLSClientConfig: &tls.Config{InsecureSkipVerify: true}, + }, + } + + response, err := client.Do(request) + if err != nil { + http.Error(w, fmt.Sprintf("HTTP request failed: %s", err), http.StatusInternalServerError) + return + } + defer response.Body.Close() + + if response.StatusCode == 403 { + fmt.Fprintln(w, "Readiness probe succeeded.") + } else { + http.Error(w, fmt.Sprintf("Readiness probe failed: expected status 403, got %d", response.StatusCode), http.StatusServiceUnavailable) + } + } +} diff --git a/sidecar/pkg/validator/validator.go b/sidecar/pkg/validator/validator.go index d554a8207ae..79c9496804b 100644 --- a/sidecar/pkg/validator/validator.go +++ b/sidecar/pkg/validator/validator.go @@ -19,14 +19,14 @@ package validator import ( "bufio" "context" - "fmt" "log" "os" "strings" + common2 "github.com/minio/operator/sidecar/pkg/common" + miniov2 "github.com/minio/operator/pkg/apis/minio.min.io/v2" clientset "github.com/minio/operator/pkg/client/clientset/versioned" - "github.com/minio/operator/pkg/resources/statefulsets" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/client-go/rest" "k8s.io/klog/v2" @@ -58,13 +58,15 @@ func Validate(tenantName string) { ctx := context.Background() - args, err := GetTenantArgs(ctx, controllerClient, tenantName, namespace) + // get the only tenant in this namespace + tenant, err := controllerClient.MinioV2().Tenants(namespace).Get(ctx, tenantName, metav1.GetOptions{}) if err != nil { log.Println(err) - os.Exit(1) + panic(err) } + tenant.EnsureDefaults() - fileContents = fileContents + fmt.Sprintf("export MINIO_ARGS=\"%s\"\n", args) + fileContents = common2.AttachGeneratedConfig(tenant, fileContents) if !rootUserFound || !rootPwdFound { log.Println("Missing root credentials in the configuration.") @@ -78,27 +80,6 @@ func Validate(tenantName string) { } } -// GetTenantArgs returns the arguments for the tenant based on the tenants they have -func GetTenantArgs(ctx context.Context, controllerClient *clientset.Clientset, tenantName string, namespace string) (string, error) { - // get the only tenant in this namespace - tenant, err := controllerClient.MinioV2().Tenants(namespace).Get(ctx, tenantName, metav1.GetOptions{}) - if err != nil { - log.Println(err) - return "", err - } - - tenant.EnsureDefaults() - - // Validate the MinIO Tenant - if err = tenant.Validate(); err != nil { - log.Println(err) - return "", err - } - - args := strings.Join(statefulsets.GetContainerArgs(tenant, ""), " ") - return args, err -} - // ReadTmpConfig reads the seeded configuration from a tmp location func ReadTmpConfig() (bool, bool, string, error) { file, err := os.Open("/tmp/minio-config/config.env")