From 3b24cd35d93ec0399be4a77f75790e756b2c1340 Mon Sep 17 00:00:00 2001 From: Njal Karevoll Date: Fri, 5 Jul 2019 11:04:02 +0200 Subject: [PATCH 01/31] Store transport keys and certificates in a single shared secret. This facilitates a move to StatefulSets where the mounted secrets must be the same between all the Pods in the same StatefulSet --- .../certificates/ca_reconcile.go | 2 +- .../certificates/transport/csr.go | 4 +- .../certificates/transport/csr_test.go | 6 +- .../certificates/transport/pod_secret.go | 266 +++++++++--- .../certificates/transport/pod_secret_test.go | 258 +++++++---- .../certificates/transport/reconcile.go | 304 +++++-------- .../certificates/transport/reconcile_test.go | 405 +++--------------- .../transport/transport_fixtures_test.go | 129 ++++++ .../controller/elasticsearch/driver/pods.go | 40 +- .../elasticsearch/driver/pods_test.go | 14 +- .../elasticsearch/initcontainer/prepare_fs.go | 31 +- .../initcontainer/prepare_fs_script.go | 46 +- .../pkg/controller/elasticsearch/name/name.go | 26 +- .../pkg/controller/elasticsearch/pod/pod.go | 9 + .../elasticsearch/settings/merged_config.go | 18 +- .../elasticsearch/version/common.go | 10 +- .../controller/elasticsearch/volume/names.go | 5 + .../controller/kibana/config/settings_test.go | 2 +- .../test/e2e/test/elasticsearch/checks_k8s.go | 2 +- operators/test/e2e/test/k8s_client.go | 9 +- 20 files changed, 800 insertions(+), 786 deletions(-) create mode 100644 operators/pkg/controller/elasticsearch/certificates/transport/transport_fixtures_test.go diff --git a/operators/pkg/controller/elasticsearch/certificates/ca_reconcile.go b/operators/pkg/controller/elasticsearch/certificates/ca_reconcile.go index dd400f712a..a2c638e778 100644 --- a/operators/pkg/controller/elasticsearch/certificates/ca_reconcile.go +++ b/operators/pkg/controller/elasticsearch/certificates/ca_reconcile.go @@ -107,7 +107,7 @@ func Reconcile( } // reconcile transport certificates - result, err := transport.ReconcileTransportCertificateSecrets( + result, err := transport.ReconcileTransportCertificatesSecrets( c, scheme, transportCA, diff --git a/operators/pkg/controller/elasticsearch/certificates/transport/csr.go b/operators/pkg/controller/elasticsearch/certificates/transport/csr.go index 3da84f70ba..b13e5cc3a4 100644 --- a/operators/pkg/controller/elasticsearch/certificates/transport/csr.go +++ b/operators/pkg/controller/elasticsearch/certificates/transport/csr.go @@ -18,8 +18,8 @@ import ( corev1 "k8s.io/api/core/v1" ) -// CreateValidatedCertificateTemplate validates a CSR and creates a certificate template. -func CreateValidatedCertificateTemplate( +// createValidatedCertificateTemplate validates a CSR and creates a certificate template. +func createValidatedCertificateTemplate( pod corev1.Pod, cluster v1alpha1.Elasticsearch, svcs []corev1.Service, diff --git a/operators/pkg/controller/elasticsearch/certificates/transport/csr_test.go b/operators/pkg/controller/elasticsearch/certificates/transport/csr_test.go index dd2d3f3385..5d0953cc89 100644 --- a/operators/pkg/controller/elasticsearch/certificates/transport/csr_test.go +++ b/operators/pkg/controller/elasticsearch/certificates/transport/csr_test.go @@ -35,8 +35,8 @@ func Test_createValidatedCertificateTemplate(t *testing.T) { // we expect this name to be used for both the common name as well as the es othername cn := "test-pod-name.node.test-es-name.test-namespace.es.local" - validatedCert, err := CreateValidatedCertificateTemplate( - testPod, testCluster, []corev1.Service{testSvc}, testCSR, certificates.DefaultCertValidity, + validatedCert, err := createValidatedCertificateTemplate( + testPod, testES, []corev1.Service{testSvc}, testCSR, certificates.DefaultCertValidity, ) require.NoError(t, err) @@ -86,7 +86,7 @@ func Test_buildGeneralNames(t *testing.T) { { name: "no svcs and user-provided SANs", args: args{ - cluster: testCluster, + cluster: testES, pod: testPod, }, want: []certificates.GeneralName{ diff --git a/operators/pkg/controller/elasticsearch/certificates/transport/pod_secret.go b/operators/pkg/controller/elasticsearch/certificates/transport/pod_secret.go index 24e932eb52..2b35ca810d 100644 --- a/operators/pkg/controller/elasticsearch/certificates/transport/pod_secret.go +++ b/operators/pkg/controller/elasticsearch/certificates/transport/pod_secret.go @@ -5,80 +5,216 @@ package transport import ( + cryptorand "crypto/rand" + "crypto/rsa" + "crypto/x509" + "fmt" + "reflect" + "time" + "github.com/elastic/cloud-on-k8s/operators/pkg/apis/elasticsearch/v1alpha1" - "github.com/elastic/cloud-on-k8s/operators/pkg/controller/common/reconciler" - "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/label" - "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/name" - "github.com/elastic/cloud-on-k8s/operators/pkg/utils/k8s" + "github.com/elastic/cloud-on-k8s/operators/pkg/controller/common/certificates" corev1 "k8s.io/api/core/v1" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "k8s.io/apimachinery/pkg/runtime" ) -const ( - // LabelCertificateType is a label key that specifies what type of certificates the secret contains - LabelCertificateType = "certificates.elasticsearch.k8s.elastic.co/type" - // LabelCertificateTypeTransport is the LabelCertificateType value used for transport certificates - LabelCertificateTypeTransport = "transport" -) +// PodKeyFileName returns the name of the private key entry for a specific pod in a transport certificates secret. +func PodKeyFileName(podName string) string { + return fmt.Sprintf("%s.%s", podName, certificates.KeyFileName) +} -// EnsureTransportCertificateSecretExists ensures the existence and Labels of the corev1.Secret that at a later point -// in time will contain the transport certificates. -func EnsureTransportCertificateSecretExists( - c k8s.Client, - scheme *runtime.Scheme, +// PodCertFileName returns the name of the certificates entry for a specific pod in a transport certificates secret. +func PodCertFileName(podName string) string { + return fmt.Sprintf("%s.%s", podName, certificates.CertFileName) +} + +// ensureTransportCertificatesSecretContentsForPod ensures that the transport certificates secret has the correct +// content for a specific pod +func ensureTransportCertificatesSecretContentsForPod( es v1alpha1.Elasticsearch, + secret *corev1.Secret, pod corev1.Pod, -) (*corev1.Secret, error) { - expected := corev1.Secret{ - ObjectMeta: metav1.ObjectMeta{ - Namespace: pod.Namespace, - Name: name.TransportCertsSecret(pod.Name), - - Labels: map[string]string{ - // a label that allows us to list secrets of this type - LabelCertificateType: LabelCertificateTypeTransport, - // a label referencing the related pod so we can look up the pod from this secret - label.PodNameLabelName: pod.Name, - // a label showing which cluster this pod belongs to - label.ClusterNameLabelName: es.Name, - }, - }, + svcs []corev1.Service, + ca *certificates.CA, + rotationParams certificates.RotationParams, +) error { + // verify that the secret contains a parsable private key, create if it does not exist + var privateKey *rsa.PrivateKey + needsNewPrivateKey := true + if privateKeyData, ok := secret.Data[PodKeyFileName(pod.Name)]; ok { + storedPrivateKey, err := certificates.ParsePEMPrivateKey(privateKeyData) + if err != nil { + log.Error(err, "Unable to parse stored private key", "pod", pod.Name) + } else { + needsNewPrivateKey = false + privateKey = storedPrivateKey + } + } + + // if we need a new private key, generate it + if needsNewPrivateKey { + generatedPrivateKey, err := rsa.GenerateKey(cryptorand.Reader, 2048) + if err != nil { + return err + } + + privateKey = generatedPrivateKey + secret.Data[PodKeyFileName(pod.Name)] = certificates.EncodePEMPrivateKey(*privateKey) + } + + if shouldIssueNewCertificate(es, *secret, pod, privateKey, svcs, ca, rotationParams.RotateBefore) { + log.Info( + "Issuing new certificate", + "pod", pod.Name, + ) + + csr, err := x509.CreateCertificateRequest(cryptorand.Reader, &x509.CertificateRequest{}, privateKey) + if err != nil { + return err + } + + // create a cert from the csr + parsedCSR, err := x509.ParseCertificateRequest(csr) + if err != nil { + return err + } + + validatedCertificateTemplate, err := createValidatedCertificateTemplate( + pod, es, svcs, parsedCSR, rotationParams.Validity, + ) + if err != nil { + return err + } + // sign the certificate + certData, err := ca.CreateCertificate(*validatedCertificateTemplate) + if err != nil { + return err + } + + // store the issued certificate in a secret mounted into the pod + secret.Data[PodCertFileName(pod.Name)] = certificates.EncodePEMCert(certData, ca.Cert.Raw) + } + + return nil +} + +// shouldIssueNewCertificate returns true if we should issue a new certificate. +// +// Reasons for reissuing a certificate: +// - no certificate yet +// - certificate has the wrong format +// - certificate is invalid or expired +// - certificate SAN and IP does not match pod SAN and IP +func shouldIssueNewCertificate( + es v1alpha1.Elasticsearch, + secret corev1.Secret, + pod corev1.Pod, + privateKey *rsa.PrivateKey, + svcs []corev1.Service, + ca *certificates.CA, + certReconcileBefore time.Duration, +) bool { + certCommonName := buildCertificateCommonName(pod, es.Name, es.Namespace) + + generalNames, err := buildGeneralNames(es, svcs, pod) + if err != nil { + log.Error(err, "Cannot create GeneralNames for the TLS certificate", "pod", pod.Name) + return true } - // reconcile the secret resource - var reconciled corev1.Secret - if err := reconciler.ReconcileResource(reconciler.Params{ - Client: c, - Scheme: scheme, - Owner: &es, - Expected: &expected, - Reconciled: &reconciled, - NeedsUpdate: func() bool { - // we only care about labels, not contents at this point, and we can allow additional labels - if reconciled.Labels == nil { - return true - } - - for k, v := range expected.Labels { - if rv, ok := reconciled.Labels[k]; !ok || rv != v { - return true - } - } - return false - }, - UpdateReconciled: func() { - if reconciled.Labels == nil { - reconciled.Labels = expected.Labels - } else { - for k, v := range expected.Labels { - reconciled.Labels[k] = v - } - } - }, - }); err != nil { - return nil, err + cert := extractTransportCert(secret, pod, certCommonName) + if cert == nil { + return true } - return &reconciled, nil + publicKey, publicKeyOk := cert.PublicKey.(*rsa.PublicKey) + if !publicKeyOk || publicKey.N.Cmp(privateKey.PublicKey.N) != 0 || publicKey.E != privateKey.PublicKey.E { + log.Info( + "Certificate belongs do a different public key, should issue new", + "subject", cert.Subject, + "issuer", cert.Issuer, + "current_ca_subject", ca.Cert.Subject, + "pod", pod.Name, + ) + return true + } + + pool := x509.NewCertPool() + pool.AddCert(ca.Cert) + verifyOpts := x509.VerifyOptions{ + DNSName: certCommonName, + Roots: pool, + Intermediates: pool, + } + if _, err := cert.Verify(verifyOpts); err != nil { + log.Info( + fmt.Sprintf("Certificate was not valid, should issue new: %s", err), + "subject", cert.Subject, + "issuer", cert.Issuer, + "current_ca_subject", ca.Cert.Subject, + "pod", pod.Name, + ) + return true + } + + if time.Now().After(cert.NotAfter.Add(-certReconcileBefore)) { + log.Info("Certificate soon to expire, should issue new", "pod", pod.Name) + return true + } + + // compare actual vs. expected SANs + expected, err := certificates.MarshalToSubjectAlternativeNamesData(generalNames) + if err != nil { + log.Error(err, "Cannot marshal subject alternative names", "pod", pod.Name) + return true + } + extraExtensionFound := false + for _, ext := range cert.Extensions { + if !ext.Id.Equal(certificates.SubjectAlternativeNamesObjectIdentifier) { + continue + } + extraExtensionFound = true + if !reflect.DeepEqual(ext.Value, expected) { + log.Info("Certificate SANs do not match expected one, should issue new", "pod", pod.Name) + return true + } + } + if !extraExtensionFound { + log.Info("SAN extra extension not found, should issue new certificate", "pod", pod.Name) + return true + } + + return false +} + +// extractTransportCert extracts the transport certificate for the pod with the commonName from the Secret +func extractTransportCert(secret corev1.Secret, pod corev1.Pod, commonName string) *x509.Certificate { + certData, ok := secret.Data[PodCertFileName(pod.Name)] + if !ok { + log.Info("No tls certificate found in secret", "pod", pod.Name) + return nil + } + + certs, err := certificates.ParsePEMCerts(certData) + if err != nil { + log.Error(err, "Invalid certificate data found, issuing new certificate", "pod", pod.Name) + return nil + } + + // look for the certificate based on the CommonName + var names []string + for _, c := range certs { + if c.Subject.CommonName == commonName { + return c + } + names = append(names, c.Subject.CommonName) + } + + log.Info( + "Did not find a certificate with the expected common name", + "pod", pod.Name, + "expected", commonName, + "found", names, + ) + + return nil } diff --git a/operators/pkg/controller/elasticsearch/certificates/transport/pod_secret_test.go b/operators/pkg/controller/elasticsearch/certificates/transport/pod_secret_test.go index 19d1dfdbf2..c3bbad9d0d 100644 --- a/operators/pkg/controller/elasticsearch/certificates/transport/pod_secret_test.go +++ b/operators/pkg/controller/elasticsearch/certificates/transport/pod_secret_test.go @@ -6,121 +6,219 @@ package transport import ( "testing" + "time" - "github.com/elastic/cloud-on-k8s/operators/pkg/apis/elasticsearch/v1alpha1" - "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/label" - "github.com/elastic/cloud-on-k8s/operators/pkg/utils/k8s" + "github.com/elastic/cloud-on-k8s/operators/pkg/controller/common/certificates" "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "k8s.io/apimachinery/pkg/runtime" - "k8s.io/apimachinery/pkg/types" - "k8s.io/client-go/kubernetes/scheme" - "sigs.k8s.io/controller-runtime/pkg/client/fake" ) -func TestEnsureTransportCertificateSecretExists(t *testing.T) { - es := v1alpha1.Elasticsearch{ - ObjectMeta: metav1.ObjectMeta{ - Name: "test-es", - }, +func Test_shouldIssueNewCertificate(t *testing.T) { + type args struct { + secret corev1.Secret + pod *corev1.Pod + rotateBefore time.Duration } - - pod := corev1.Pod{ - ObjectMeta: metav1.ObjectMeta{ - Name: "pod", + tests := []struct { + name string + args args + want bool + }{ + { + name: "missing cert in secret", + args: args{ + secret: corev1.Secret{}, + rotateBefore: certificates.DefaultRotateBefore, + }, + want: true, }, - } - - defaultPodSecret := &corev1.Secret{ - ObjectMeta: metav1.ObjectMeta{ - Name: "pod-certs", - Labels: map[string]string{ - LabelCertificateType: LabelCertificateTypeTransport, - label.PodNameLabelName: pod.Name, - label.ClusterNameLabelName: es.Name, + { + name: "invalid cert data", + args: args{ + secret: corev1.Secret{ + Data: map[string][]byte{ + PodCertFileName(testPod.Name): []byte("invalid"), + }, + }, + rotateBefore: certificates.DefaultRotateBefore, }, + want: true, + }, + { + name: "pod name mismatch", + args: args{ + secret: corev1.Secret{ + Data: map[string][]byte{ + PodCertFileName(testPod.Name): pemCert, + }, + }, + pod: &corev1.Pod{ObjectMeta: metav1.ObjectMeta{Name: "different"}}, + rotateBefore: certificates.DefaultRotateBefore, + }, + want: true, + }, + { + name: "valid cert", + args: args{ + secret: corev1.Secret{ + Data: map[string][]byte{ + PodCertFileName(testPod.Name): pemCert, + }, + }, + rotateBefore: certificates.DefaultRotateBefore, + }, + want: false, + }, + { + name: "should be rotated soon", + args: args{ + secret: corev1.Secret{ + Data: map[string][]byte{ + PodCertFileName(testPod.Name): pemCert, + }, + }, + rotateBefore: certificates.DefaultCertValidity, // rotate before the same duration as total validity + }, + want: true, }, } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + if tt.args.pod == nil { + tt.args.pod = &testPod + } - defaultPodSecretWith := func(setter func(secret *corev1.Secret)) *corev1.Secret { - secret := defaultPodSecret.DeepCopy() - setter(secret) - return secret + if got := shouldIssueNewCertificate( + testES, + tt.args.secret, + *tt.args.pod, + testRSAPrivateKey, + []corev1.Service{testSvc}, + testCA, + tt.args.rotateBefore, + ); got != tt.want { + t.Errorf("shouldIssueNewCertificate() = %v, want %v", got, tt.want) + } + }) } +} - type args struct { - c k8s.Client - scheme *runtime.Scheme - owner v1alpha1.Elasticsearch - pod corev1.Pod - labels map[string]string - } +func Test_ensureTransportCertificatesSecretContentsForPod(t *testing.T) { tests := []struct { - name string - args args - want func(*testing.T, *corev1.Secret) - wantErr bool + name string + secret *corev1.Secret + pod *corev1.Pod + assertions func(t *testing.T, before corev1.Secret, after corev1.Secret) + wantErr func(t *testing.T, err error) }{ { - name: "should create a secret if it does not already exist", - args: args{ - c: k8s.WrapClient(fake.NewFakeClient()), - owner: es, - pod: pod, + name: "no private key in the secret", + secret: &corev1.Secret{ + Data: map[string][]byte{ + PodCertFileName(testPod.Name): pemCert, + }, }, - want: func(t *testing.T, secret *corev1.Secret) { - // owner references are set upon creation, so ignore for comparison - expected := defaultPodSecretWith(func(s *corev1.Secret) { - s.OwnerReferences = secret.OwnerReferences - }) - assert.Equal(t, expected, secret) + assertions: func(t *testing.T, before corev1.Secret, after corev1.Secret) { + assert.NotEmpty(t, after.Data[PodKeyFileName(testPod.Name)]) + assert.NotEmpty(t, after.Data[PodCertFileName(testPod.Name)]) + + // cert should be re-generated + assert.NotEqual(t, after.Data[PodCertFileName(testPod.Name)], before.Data[PodCertFileName(testPod.Name)]) }, }, { - name: "should update an existing secret", - args: args{ - c: k8s.WrapClient(fake.NewFakeClient(defaultPodSecretWith(func(secret *corev1.Secret) { - secret.ObjectMeta.UID = types.UID("42") - }))), - owner: es, - pod: pod, + name: "no cert in the secret", + secret: &corev1.Secret{ + Data: map[string][]byte{ + PodKeyFileName(testPod.Name): certificates.EncodePEMPrivateKey(*testRSAPrivateKey), + }, }, - want: func(t *testing.T, secret *corev1.Secret) { - // UID should be kept the same - assert.Equal(t, defaultPodSecretWith(func(secret *corev1.Secret) { - secret.ObjectMeta.UID = types.UID("42") - }), secret) + assertions: func(t *testing.T, before corev1.Secret, after corev1.Secret) { + assert.NotEmpty(t, after.Data[PodKeyFileName(testPod.Name)]) + assert.NotEmpty(t, after.Data[PodCertFileName(testPod.Name)]) + + // key should be re-used + assert.Equal(t, before.Data[PodKeyFileName(testPod.Name)], after.Data[PodKeyFileName(testPod.Name)]) }, }, { - name: "should allow additional labels in the secret", - args: args{ - c: k8s.WrapClient(fake.NewFakeClient(defaultPodSecretWith(func(secret *corev1.Secret) { - secret.ObjectMeta.Labels["foo"] = "bar" - }))), - owner: es, - pod: pod, + name: "cert does not belong to the key in the secret", + secret: &corev1.Secret{ + Data: map[string][]byte{ + PodKeyFileName(testPod.Name): certificates.EncodePEMPrivateKey(*testRSAPrivateKey), + PodCertFileName(testPod.Name): certificates.EncodePEMCert(testCA.Cert.Raw), + }, }, - want: func(t *testing.T, secret *corev1.Secret) { - assert.Equal(t, defaultPodSecretWith(func(secret *corev1.Secret) { - secret.ObjectMeta.Labels["foo"] = "bar" - }), secret) + assertions: func(t *testing.T, before corev1.Secret, after corev1.Secret) { + assert.NotEmpty(t, after.Data[PodKeyFileName(testPod.Name)]) + assert.NotEmpty(t, after.Data[PodCertFileName(testPod.Name)]) + + // key should be re-used + assert.Equal(t, before.Data[PodKeyFileName(testPod.Name)], after.Data[PodKeyFileName(testPod.Name)]) + assert.NotEqual(t, after.Data[PodCertFileName(testPod.Name)], before.Data[PodCertFileName(testPod.Name)]) + }, + }, + { + name: "invalid cert in the secret", + secret: &corev1.Secret{ + Data: map[string][]byte{ + PodKeyFileName(testPod.Name): certificates.EncodePEMPrivateKey(*testRSAPrivateKey), + PodCertFileName(testPod.Name): []byte("invalid"), + }, + }, + assertions: func(t *testing.T, before corev1.Secret, after corev1.Secret) { + assert.NotEmpty(t, after.Data[PodKeyFileName(testPod.Name)]) + assert.NotEmpty(t, after.Data[PodCertFileName(testPod.Name)]) + + // key should be re-used + assert.Equal(t, before.Data[PodKeyFileName(testPod.Name)], after.Data[PodKeyFileName(testPod.Name)]) + assert.NotEqual(t, after.Data[PodCertFileName(testPod.Name)], before.Data[PodCertFileName(testPod.Name)]) + }, + }, + { + name: "valid data should not require updating", + secret: &corev1.Secret{ + Data: map[string][]byte{ + PodKeyFileName(testPod.Name): certificates.EncodePEMPrivateKey(*testRSAPrivateKey), + PodCertFileName(testPod.Name): pemCert, + }, + }, + assertions: func(t *testing.T, before corev1.Secret, after corev1.Secret) { + assert.Equal(t, before, after) }, }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - if tt.args.scheme == nil { - tt.args.scheme = scheme.Scheme + if tt.secret == nil { + tt.secret = &corev1.Secret{} } + if tt.pod == nil { + tt.pod = testPod.DeepCopy() + } + + beforeSecret := tt.secret.DeepCopy() - got, err := EnsureTransportCertificateSecretExists(tt.args.c, tt.args.scheme, tt.args.owner, tt.args.pod) - if (err != nil) != tt.wantErr { - t.Errorf("EnsureTransportCertificateSecretExists() error = %v, wantErr %v", err, tt.wantErr) + err := ensureTransportCertificatesSecretContentsForPod( + testES, + tt.secret, + *tt.pod, + []corev1.Service{testSvc}, + testCA, + certificates.RotationParams{ + Validity: certificates.DefaultCertValidity, + RotateBefore: certificates.DefaultRotateBefore, + }, + ) + if tt.wantErr != nil { + tt.wantErr(t, err) return } - tt.want(t, got) + require.NoError(t, err) + + tt.assertions(t, *beforeSecret, *tt.secret) }) } } diff --git a/operators/pkg/controller/elasticsearch/certificates/transport/reconcile.go b/operators/pkg/controller/elasticsearch/certificates/transport/reconcile.go index 9634cada02..bb7601ac9d 100644 --- a/operators/pkg/controller/elasticsearch/certificates/transport/reconcile.go +++ b/operators/pkg/controller/elasticsearch/certificates/transport/reconcile.go @@ -6,19 +6,19 @@ package transport import ( "bytes" - cryptorand "crypto/rand" - "crypto/rsa" - "crypto/x509" - "fmt" "reflect" - "time" + "strings" "github.com/elastic/cloud-on-k8s/operators/pkg/apis/elasticsearch/v1alpha1" "github.com/elastic/cloud-on-k8s/operators/pkg/controller/common/annotation" "github.com/elastic/cloud-on-k8s/operators/pkg/controller/common/certificates" + "github.com/elastic/cloud-on-k8s/operators/pkg/controller/common/reconciler" "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/label" + "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/name" + "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/pod" "github.com/elastic/cloud-on-k8s/operators/pkg/utils/k8s" corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime" "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/reconcile" @@ -27,9 +27,9 @@ import ( var log = logf.Log.WithName("transport") -// ReconcileTransportCertificateSecrets reconciles certificate secrets for nodes -// of the given es cluster. -func ReconcileTransportCertificateSecrets( +// ReconcileTransportCertificatesSecrets reconciles the secret containing transport certificates for all nodes in the +// cluster. +func ReconcileTransportCertificatesSecrets( c k8s.Client, scheme *runtime.Scheme, ca *certificates.CA, @@ -37,7 +37,7 @@ func ReconcileTransportCertificateSecrets( services []corev1.Service, rotationParams certificates.RotationParams, ) (reconcile.Result, error) { - log.Info("Reconciling transport certificate secrets") + log.Info("Reconciling transport certificates secrets") var pods corev1.PodList if err := c.List(&client.ListOptions{ @@ -47,235 +47,127 @@ func ReconcileTransportCertificateSecrets( return reconcile.Result{}, err } + secret, err := ensureTransportCertificatesSecretExists(c, scheme, es) + if err != nil { + return reconcile.Result{}, err + } + // defensive copy of the current secret so we can check whether we need to update later on + currentTransportCertificatesSecret := secret.DeepCopy() + for _, pod := range pods.Items { if pod.Status.PodIP == "" { log.Info("Skipping pod because it has no IP yet", "pod", pod.Name) continue } - if res, err := doReconcileTransportCertificateSecret( - c, scheme, es, pod, services, ca, rotationParams, + if err := ensureTransportCertificatesSecretContentsForPod( + es, secret, pod, services, ca, rotationParams, ); err != nil { - return res, err - } - } - - return reconcile.Result{}, nil -} - -// doReconcileTransportCertificateSecret ensures that the transport certificate secret has the correct content. -func doReconcileTransportCertificateSecret( - c k8s.Client, - scheme *runtime.Scheme, - es v1alpha1.Elasticsearch, - pod corev1.Pod, - svcs []corev1.Service, - ca *certificates.CA, - rotationParams certificates.RotationParams, -) (reconcile.Result, error) { - secret, err := EnsureTransportCertificateSecretExists(c, scheme, es, pod) - if err != nil { - return reconcile.Result{}, err - } - - // a placeholder secret may have nil entries, create them if needed - if secret.Data == nil { - secret.Data = make(map[string][]byte) - } - if secret.Annotations == nil { - secret.Annotations = make(map[string]string) - } - - // verify that the secret contains a parsable private key, create if it does not exist - var privateKey *rsa.PrivateKey - needsNewPrivateKey := true - if privateKeyData, ok := secret.Data[certificates.KeyFileName]; ok { - storedPrivateKey, err := certificates.ParsePEMPrivateKey(privateKeyData) - if err != nil { - log.Error(err, "Unable to parse stored private key", "secret", secret.Name) - } else { - needsNewPrivateKey = false - privateKey = storedPrivateKey - } - } - - // if we need a new private key, generate it - if needsNewPrivateKey { - generatedPrivateKey, err := rsa.GenerateKey(cryptorand.Reader, 2048) - if err != nil { return reconcile.Result{}, err } - - privateKey = generatedPrivateKey - secret.Data[certificates.KeyFileName] = certificates.EncodePEMPrivateKey(*privateKey) } - // check if the existing cert is correct - issueNewCertificate := shouldIssueNewCertificate(es, svcs, *secret, privateKey, ca, pod, rotationParams.RotateBefore) - - if issueNewCertificate { - log.Info( - "Issuing new certificate", - "pod", pod.Name, - "es", k8s.ExtractNamespacedName(&es), - ) - - csr, err := x509.CreateCertificateRequest(cryptorand.Reader, &x509.CertificateRequest{}, privateKey) - if err != nil { - return reconcile.Result{}, err + // remove certificates and keys for deleted pods + podsByName := pod.PodsByName(pods.Items) + keysToPrune := make([]string, 0) + for secretDataKey := range secret.Data { + if secretDataKey == certificates.CAFileName { + // never remove the CA file + continue } - // create a cert from the csr - parsedCSR, err := x509.ParseCertificateRequest(csr) - if err != nil { - return reconcile.Result{}, err - } + // get the pod name from the secret key name (the first segment before the ".") + podNameForKey := strings.SplitN(secretDataKey, ".", 2)[0] - validatedCertificateTemplate, err := CreateValidatedCertificateTemplate(pod, es, svcs, parsedCSR, rotationParams.Validity) - if err != nil { - return reconcile.Result{}, err - } - // sign the certificate - certData, err := ca.CreateCertificate(*validatedCertificateTemplate) - if err != nil { - return reconcile.Result{}, err + if _, ok := podsByName[podNameForKey]; !ok { + // pod no longer exists, so the element is safe to delete. + keysToPrune = append(keysToPrune, secretDataKey) } + } + if len(keysToPrune) > 0 { + log.Info("Pruning keys from certificates secret", "keys", keysToPrune) - // store the issued certificate in a secret mounted into the pod - secret.Data[certificates.CertFileName] = certificates.EncodePEMCert(certData, ca.Cert.Raw) + for _, keyToRemove := range keysToPrune { + delete(secret.Data, keyToRemove) + } } - // prepare trusted CA certs: CA of this node - trusted := certificates.EncodePEMCert(ca.Cert.Raw) + caBytes := certificates.EncodePEMCert(ca.Cert.Raw) // compare with current trusted CA certs. - updateTrustedCACerts := !bytes.Equal(trusted, secret.Data[certificates.CAFileName]) - if updateTrustedCACerts { - secret.Data[certificates.CAFileName] = trusted + if !bytes.Equal(caBytes, secret.Data[certificates.CAFileName]) { + secret.Data[certificates.CAFileName] = caBytes } - if needsNewPrivateKey || issueNewCertificate || updateTrustedCACerts { - log.Info("Updating transport certificate secret", "secret", secret.Name) + if !reflect.DeepEqual(secret, currentTransportCertificatesSecret) { if err := c.Update(secret); err != nil { return reconcile.Result{}, err } - annotation.MarkPodAsUpdated(c, pod) - } - - return reconcile.Result{}, nil -} - -// extractTransportCert extracts the transport certificate with the commonName from the Secret -func extractTransportCert(secret corev1.Secret, commonName string) *x509.Certificate { - certData, ok := secret.Data[certificates.CertFileName] - if !ok { - log.Info("No tls certificate found in secret", "secret", secret.Name) - return nil - } - - certs, err := certificates.ParsePEMCerts(certData) - if err != nil { - log.Error(err, "Invalid certificate data found, issuing new certificate", "secret", secret.Name) - return nil - } - - // look for the certificate based on the CommonName - var names []string - for _, c := range certs { - if c.Subject.CommonName == commonName { - return c + for _, pod := range pods.Items { + annotation.MarkPodAsUpdated(c, pod) } - names = append(names, c.Subject.CommonName) } - log.Info("Did not found a certificate with the expected common name", "secret", secret.Name, "expected", commonName, "found", names) - - return nil + return reconcile.Result{}, nil } -// shouldIssueNewCertificate returns true if we should issue a new certificate. -// -// Reasons for reissuing a certificate: -// - no certificate yet -// - certificate has the wrong format -// - certificate is invalid or expired -// - certificate SAN and IP does not match pod SAN and IP -func shouldIssueNewCertificate( - cluster v1alpha1.Elasticsearch, - svcs []corev1.Service, - secret corev1.Secret, - privateKey *rsa.PrivateKey, - ca *certificates.CA, - pod corev1.Pod, - certReconcileBefore time.Duration, -) bool { - certCommonName := buildCertificateCommonName(pod, cluster.Name, cluster.Namespace) - - generalNames, err := buildGeneralNames(cluster, svcs, pod) - if err != nil { - log.Error(err, "Cannot create GeneralNames for the TLS certificate", "pod", pod.Name) - return true - } - - cert := extractTransportCert(secret, certCommonName) - if cert == nil { - return true - } - - publicKey, publicKeyOk := cert.PublicKey.(*rsa.PublicKey) - if !publicKeyOk || publicKey.N.Cmp(privateKey.PublicKey.N) != 0 || publicKey.E != privateKey.PublicKey.E { - log.Info( - "Certificate belongs do a different public key, should issue new", - "subject", cert.Subject, - "issuer", cert.Issuer, - "current_ca_subject", ca.Cert.Subject, - ) - return true - } - - pool := x509.NewCertPool() - pool.AddCert(ca.Cert) - verifyOpts := x509.VerifyOptions{ - DNSName: certCommonName, - Roots: pool, - Intermediates: pool, - } - if _, err := cert.Verify(verifyOpts); err != nil { - log.Info( - fmt.Sprintf("Certificate was not valid, should issue new: %s", err), - "subject", cert.Subject, - "issuer", cert.Issuer, - "current_ca_subject", ca.Cert.Subject, - ) - return true - } - - if time.Now().After(cert.NotAfter.Add(-certReconcileBefore)) { - log.Info("Certificate soon to expire, should issue new", "secret", secret.Name) - return true +// ensureTransportCertificatesSecretExists ensures the existence and Labels of the Secret that at a later point +// in time will contain the transport certificates. +func ensureTransportCertificatesSecretExists( + c k8s.Client, + scheme *runtime.Scheme, + es v1alpha1.Elasticsearch, +) (*corev1.Secret, error) { + expected := corev1.Secret{ + ObjectMeta: metav1.ObjectMeta{ + Namespace: es.Namespace, + Name: name.TransportCertificatesSecret(es.Name), + + Labels: map[string]string{ + // a label showing which es these certificates belongs to + label.ClusterNameLabelName: es.Name, + }, + }, + } + + // reconcile the secret resource + var reconciled corev1.Secret + if err := reconciler.ReconcileResource(reconciler.Params{ + Client: c, + Scheme: scheme, + Owner: &es, + Expected: &expected, + Reconciled: &reconciled, + NeedsUpdate: func() bool { + // we only care about labels, not contents at this point, and we can allow additional labels + if reconciled.Labels == nil { + return true + } + + for k, v := range expected.Labels { + if rv, ok := reconciled.Labels[k]; !ok || rv != v { + return true + } + } + return false + }, + UpdateReconciled: func() { + if reconciled.Labels == nil { + reconciled.Labels = expected.Labels + } else { + for k, v := range expected.Labels { + reconciled.Labels[k] = v + } + } + }, + }); err != nil { + return nil, err } - // compare actual vs. expected SANs - expected, err := certificates.MarshalToSubjectAlternativeNamesData(generalNames) - if err != nil { - log.Error(err, "Cannot marshal subject alternative names", "secret", secret.Name) - return true - } - extraExtensionFound := false - for _, ext := range cert.Extensions { - if !ext.Id.Equal(certificates.SubjectAlternativeNamesObjectIdentifier) { - continue - } - extraExtensionFound = true - if !reflect.DeepEqual(ext.Value, expected) { - log.Info("Certificate SANs do not match expected one, should issue new", "secret", secret.Name) - return true - } - } - if !extraExtensionFound { - log.Info("SAN extra extension not found, should issue new certificate", "secret", secret.Name) - return true + // a placeholder secret may have nil entries, create them if needed + if reconciled.Data == nil { + reconciled.Data = make(map[string][]byte) } - return false + return &reconciled, nil } diff --git a/operators/pkg/controller/elasticsearch/certificates/transport/reconcile_test.go b/operators/pkg/controller/elasticsearch/certificates/transport/reconcile_test.go index 4724506674..7e066129e3 100644 --- a/operators/pkg/controller/elasticsearch/certificates/transport/reconcile_test.go +++ b/operators/pkg/controller/elasticsearch/certificates/transport/reconcile_test.go @@ -5,397 +5,108 @@ package transport import ( - cryptorand "crypto/rand" - "crypto/rsa" - "crypto/x509" - "crypto/x509/pkix" - "encoding/pem" - "reflect" "testing" - "time" + + "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/name" "github.com/elastic/cloud-on-k8s/operators/pkg/apis/elasticsearch/v1alpha1" - "github.com/elastic/cloud-on-k8s/operators/pkg/controller/common/annotation" - "github.com/elastic/cloud-on-k8s/operators/pkg/controller/common/certificates" "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/label" - "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/name" "github.com/elastic/cloud-on-k8s/operators/pkg/utils/k8s" "github.com/stretchr/testify/assert" - "github.com/stretchr/testify/require" corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/runtime" + "k8s.io/apimachinery/pkg/types" "k8s.io/client-go/kubernetes/scheme" "sigs.k8s.io/controller-runtime/pkg/client/fake" ) -// fixtures -var ( - testCA *certificates.CA - testRSAPrivateKey *rsa.PrivateKey - testCSRBytes []byte - testCSR *x509.CertificateRequest - validatedCertificateTemplate *certificates.ValidatedCertificateTemplate - certData []byte - pemCert []byte - testIP = "1.2.3.4" - testCluster = v1alpha1.Elasticsearch{ObjectMeta: metav1.ObjectMeta{Name: "test-es-name", Namespace: "test-namespace"}} - testPod = corev1.Pod{ +func Test_ensureTransportCertificateSecretExists(t *testing.T) { + defaultSecret := &corev1.Secret{ ObjectMeta: metav1.ObjectMeta{ - Name: "test-pod-name", - }, - Status: corev1.PodStatus{ - PodIP: testIP, - }, - } - testSvc = corev1.Service{ - ObjectMeta: metav1.ObjectMeta{ - Name: "test-service", - Namespace: "default", - }, - Spec: corev1.ServiceSpec{ - ClusterIP: "2.2.3.3", + Name: name.TransportCertificatesSecret(testES.Name), + Namespace: testES.Namespace, + Labels: map[string]string{ + label.ClusterNameLabelName: testES.Name, + }, }, - } - additionalCA = [][]byte{[]byte(testAdditionalCA)} -) - -const ( - testPemPrivateKey = ` ------BEGIN RSA PRIVATE KEY----- -MIICXAIBAAKBgQCxoeCUW5KJxNPxMp+KmCxKLc1Zv9Ny+4CFqcUXVUYH69L3mQ7v -IWrJ9GBfcaA7BPQqUlWxWM+OCEQZH1EZNIuqRMNQVuIGCbz5UQ8w6tS0gcgdeGX7 -J7jgCQ4RK3F/PuCM38QBLaHx988qG8NMc6VKErBjctCXFHQt14lerd5KpQIDAQAB -AoGAYrf6Hbk+mT5AI33k2Jt1kcweodBP7UkExkPxeuQzRVe0KVJw0EkcFhywKpr1 -V5eLMrILWcJnpyHE5slWwtFHBG6a5fLaNtsBBtcAIfqTQ0Vfj5c6SzVaJv0Z5rOd -7gQF6isy3t3w9IF3We9wXQKzT6q5ypPGdm6fciKQ8RnzREkCQQDZwppKATqQ41/R -vhSj90fFifrGE6aVKC1hgSpxGQa4oIdsYYHwMzyhBmWW9Xv/R+fPyr8ZwPxp2c12 -33QwOLPLAkEA0NNUb+z4ebVVHyvSwF5jhfJxigim+s49KuzJ1+A2RaSApGyBZiwS -rWvWkB471POAKUYt5ykIWVZ83zcceQiNTwJBAMJUFQZX5GDqWFc/zwGoKkeR49Yi -MTXIvf7Wmv6E++eFcnT461FlGAUHRV+bQQXGsItR/opIG7mGogIkVXa3E1MCQARX -AAA7eoZ9AEHflUeuLn9QJI/r0hyQQLEtrpwv6rDT1GCWaLII5HJ6NUFVf4TTcqxo -6vdM4QGKTJoO+SaCyP0CQFdpcxSAuzpFcKv0IlJ8XzS/cy+mweCMwyJ1PFEc4FX6 -wg/HcAJWY60xZTJDFN+Qfx8ZQvBEin6c2/h+zZi5IVY= ------END RSA PRIVATE KEY----- -` - testAdditionalCA = `-----BEGIN CERTIFICATE----- -MIIDKzCCAhOgAwIBAgIRAK7i/u/wsh+i2G0yUygsJckwDQYJKoZIhvcNAQELBQAw -LzEZMBcGA1UECxMQNG1jZnhjbnh0ZjZuNHA5bDESMBAGA1UEAxMJdHJ1c3Qtb25l -MB4XDTE5MDMyMDIwNDg1NloXDTIwMDMxOTIwNDk1NlowLzEZMBcGA1UECxMQNG1j -Znhjbnh0ZjZuNHA5bDESMBAGA1UEAxMJdHJ1c3Qtb25lMIIBIjANBgkqhkiG9w0B -AQEFAAOCAQ8AMIIBCgKCAQEAu/Pws5FcyJw843pNow/Y95rApWAuGanU99DEmeOG -ggtpc3qtDWWKwLZ6cU+av3u82tf0HYSpy0Z2hn3PS2dGGgHPTr/tTGYA5alu1dn5 -CgqQDBVLbkKA1lDcm8w98fRavRw6a0TX5DURqXs+smhdMztQjDNCl3kJ40JbXVAY -x5vhD2pKPCK0VIr9uYK0E/9dvrU0SJGLUlB+CY/DU7c8t22oer2T6fjCZzh3Fhwi -/aOKEwEUoE49orte0N9b1HSKlVePzIUuTTc3UU2ntWi96Uf2FesuAubU11WH4kIL -wRlofty7ewBzVmGte1fKUMjHB3mgb+WYwkEFwjpQL4LhkQIDAQABo0IwQDAOBgNV -HQ8BAf8EBAMCAoQwHQYDVR0lBBYwFAYIKwYBBQUHAwEGCCsGAQUFBwMCMA8GA1Ud -EwEB/wQFMAMBAf8wDQYJKoZIhvcNAQELBQADggEBAI+qczKQgkb5L5dXzn+KW92J -Sq1rrmaYUYLRTtPFH7t42REPYLs4UV0qR+6v/hJljQbAS+Vu3BioLWuxq85NsIjf -OK1KO7D8lwVI9tAetE0tKILqljTjwZpqfZLZ8fFqwzd9IM/WfoI7Z05k8BSL6XdM -FaRfSe/GJ+DR1dCwnWAVKGxAry4JSceVS9OXxYNRTcfQuT5s8h/6X5UaonTbhil7 -91fQFaX8LSuZj23/3kgDTnjPmvj2sz5nODymI4YeTHLjdlMmTufWSJj901ITp7Bw -DMO3GhRADFpMz3vjHA2rHA4AQ6nC8N4lIYTw0AF1VAOC0SDntf6YEgrhRKRFAUY= ------END CERTIFICATE-----` -) - -func init() { - if err := v1alpha1.AddToScheme(scheme.Scheme); err != nil { - panic(err) - } - - var err error - block, _ := pem.Decode([]byte(testPemPrivateKey)) - if testRSAPrivateKey, err = x509.ParsePKCS1PrivateKey(block.Bytes); err != nil { - panic("Failed to parse private key: " + err.Error()) - } - - if testCA, err = certificates.NewSelfSignedCA(certificates.CABuilderOptions{ - Subject: pkix.Name{CommonName: "test-common-name"}, - PrivateKey: testRSAPrivateKey, - }); err != nil { - panic("Failed to create new self signed CA: " + err.Error()) + Data: make(map[string][]byte), } - testCSRBytes, err = x509.CreateCertificateRequest(cryptorand.Reader, &x509.CertificateRequest{}, testRSAPrivateKey) - if err != nil { - panic("Failed to create CSR:" + err.Error()) + defaultSecretWith := func(setter func(secret *corev1.Secret)) *corev1.Secret { + secret := defaultSecret.DeepCopy() + setter(secret) + return secret } - testCSR, err = x509.ParseCertificateRequest(testCSRBytes) - validatedCertificateTemplate, err = CreateValidatedCertificateTemplate( - testPod, testCluster, []corev1.Service{testSvc}, testCSR, certificates.DefaultCertValidity) - if err != nil { - panic("Failed to create validated cert template:" + err.Error()) - } - - certData, err = testCA.CreateCertificate(*validatedCertificateTemplate) - if err != nil { - panic("Failed to create cert data:" + err.Error()) - } - - pemCert = certificates.EncodePEMCert(certData, testCA.Cert.Raw) -} - -func Test_shouldIssueNewCertificate(t *testing.T) { type args struct { - secret corev1.Secret - pod corev1.Pod - cluster v1alpha1.Elasticsearch - rotateBefore time.Duration + c k8s.Client + scheme *runtime.Scheme + owner v1alpha1.Elasticsearch + labels map[string]string } tests := []struct { - name string - args args - want bool + name string + args args + want func(*testing.T, *corev1.Secret) + wantErr bool }{ { - name: "missing cert in secret", + name: "should create a secret if it does not already exist", args: args{ - secret: corev1.Secret{}, - pod: testPod, - cluster: testCluster, - rotateBefore: certificates.DefaultRotateBefore, + c: k8s.WrapClient(fake.NewFakeClient()), + owner: testES, }, - want: true, - }, - { - name: "invalid cert data", - args: args{ - secret: corev1.Secret{ - Data: map[string][]byte{ - certificates.CertFileName: []byte("invalid"), - }, - }, - pod: testPod, - cluster: testCluster, - rotateBefore: certificates.DefaultRotateBefore, + want: func(t *testing.T, secret *corev1.Secret) { + // owner references are set upon creation, so ignore for comparison + expected := defaultSecretWith(func(s *corev1.Secret) { + s.OwnerReferences = secret.OwnerReferences + }) + assert.Equal(t, expected, secret) }, - want: true, }, { - name: "pod name mismatch", + name: "should update an existing secret", args: args{ - secret: corev1.Secret{ - Data: map[string][]byte{ - certificates.CertFileName: pemCert, - }, - }, - pod: corev1.Pod{ObjectMeta: metav1.ObjectMeta{Name: "different"}}, - cluster: testCluster, - rotateBefore: certificates.DefaultRotateBefore, + c: k8s.WrapClient(fake.NewFakeClient(defaultSecretWith(func(secret *corev1.Secret) { + secret.ObjectMeta.UID = types.UID("42") + }))), + owner: testES, }, - want: true, - }, - { - name: "pod name mismatch", - args: args{ - secret: corev1.Secret{ - Data: map[string][]byte{ - certificates.CertFileName: pemCert, - }, - }, - pod: corev1.Pod{ObjectMeta: metav1.ObjectMeta{Name: "different"}}, - cluster: testCluster, - rotateBefore: certificates.DefaultRotateBefore, + want: func(t *testing.T, secret *corev1.Secret) { + // UID should be kept the same + assert.Equal(t, defaultSecretWith(func(secret *corev1.Secret) { + secret.ObjectMeta.UID = types.UID("42") + }), secret) }, - want: true, }, { - name: "valid cert", + name: "should allow additional labels in the secret", args: args{ - secret: corev1.Secret{ - Data: map[string][]byte{ - certificates.CertFileName: pemCert, - }, - }, - pod: testPod, - cluster: testCluster, - rotateBefore: certificates.DefaultRotateBefore, + c: k8s.WrapClient(fake.NewFakeClient(defaultSecretWith(func(secret *corev1.Secret) { + secret.ObjectMeta.Labels["foo"] = "bar" + }))), + owner: testES, }, - want: false, - }, - { - name: "should be rotated soon", - args: args{ - secret: corev1.Secret{ - Data: map[string][]byte{ - certificates.CertFileName: pemCert, - }, - }, - pod: testPod, - cluster: testCluster, - rotateBefore: certificates.DefaultCertValidity, // rotate before the same duration as total validity + want: func(t *testing.T, secret *corev1.Secret) { + assert.Equal(t, defaultSecretWith(func(secret *corev1.Secret) { + secret.ObjectMeta.Labels["foo"] = "bar" + }), secret) }, - want: true, }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - if got := shouldIssueNewCertificate(tt.args.cluster, []corev1.Service{testSvc}, tt.args.secret, testRSAPrivateKey, testCA, tt.args.pod, tt.args.rotateBefore); got != tt.want { - t.Errorf("shouldIssueNewCertificate() = %v, want %v", got, tt.want) + if tt.args.scheme == nil { + tt.args.scheme = scheme.Scheme } - }) - } -} -func Test_doReconcileTransportCertificateSecret(t *testing.T) { - objMeta := metav1.ObjectMeta{ - Namespace: "namespace", - Name: name.TransportCertsSecret(testPod.Name), - Labels: map[string]string{ - LabelCertificateType: LabelCertificateTypeTransport, - label.PodNameLabelName: testPod.Name, - label.ClusterNameLabelName: testCluster.Name, - }, - } - - tests := []struct { - name string - secret corev1.Secret - pod corev1.Pod - additionalTrustedCAsPemEncoded [][]byte - wantSecretUpdated bool - wantCertUpdateAnnotationUpdated bool - wantErr func(t *testing.T, err error) - }{ - { - name: "do not requeue without updating secret if there is an additional CA", - secret: corev1.Secret{ - ObjectMeta: objMeta, - Data: map[string][]byte{ - certificates.CertFileName: pemCert, - certificates.CAFileName: certificates.EncodePEMCert(testCA.Cert.Raw), - }, - }, - additionalTrustedCAsPemEncoded: additionalCA, - pod: testPod, - wantSecretUpdated: true, - wantCertUpdateAnnotationUpdated: false, - }, - { - name: "no private key in the secret", - secret: corev1.Secret{ - ObjectMeta: objMeta, - Data: map[string][]byte{ - certificates.CertFileName: pemCert, - certificates.CAFileName: certificates.EncodePEMCert(testCA.Cert.Raw), - }, - }, - pod: testPod, - wantSecretUpdated: true, - wantCertUpdateAnnotationUpdated: true, - }, - { - name: "no cert in the secret", - secret: corev1.Secret{ - ObjectMeta: objMeta, - Data: map[string][]byte{ - certificates.KeyFileName: certificates.EncodePEMPrivateKey(*testRSAPrivateKey), - certificates.CAFileName: certificates.EncodePEMCert(testCA.Cert.Raw), - }, - }, - pod: testPod, - wantSecretUpdated: true, - wantCertUpdateAnnotationUpdated: true, - }, - { - name: "cert does not belong to the key in the secret", - secret: corev1.Secret{ - ObjectMeta: objMeta, - Data: map[string][]byte{ - certificates.KeyFileName: certificates.EncodePEMPrivateKey(*testRSAPrivateKey), - certificates.CertFileName: certificates.EncodePEMCert(testCA.Cert.Raw), - certificates.CAFileName: certificates.EncodePEMCert(testCA.Cert.Raw), - }, - }, - pod: testPod, - wantSecretUpdated: true, - wantCertUpdateAnnotationUpdated: true, - }, - { - name: "invalid cert in the secret", - secret: corev1.Secret{ - ObjectMeta: objMeta, - Data: map[string][]byte{ - certificates.KeyFileName: certificates.EncodePEMPrivateKey(*testRSAPrivateKey), - certificates.CertFileName: []byte("invalid"), - certificates.CAFileName: certificates.EncodePEMCert(testCA.Cert.Raw), - }, - }, - pod: testPod, - wantSecretUpdated: true, - wantCertUpdateAnnotationUpdated: true, - }, - { - name: "no cert generated, but pod has no IP yet: requeue", - secret: corev1.Secret{ObjectMeta: objMeta}, - pod: corev1.Pod{}, - wantErr: func(t *testing.T, err error) { - assert.Contains(t, err.Error(), "pod currently has no valid IP") - }, - }, - { - name: "valid data should not require updating", - secret: corev1.Secret{ - ObjectMeta: objMeta, - Data: map[string][]byte{ - certificates.KeyFileName: certificates.EncodePEMPrivateKey(*testRSAPrivateKey), - certificates.CertFileName: pemCert, - certificates.CAFileName: certificates.EncodePEMCert(testCA.Cert.Raw), - }, - }, - pod: testPod, - wantSecretUpdated: false, - }, - } - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - secret := tt.secret.DeepCopy() - fakeClient := k8s.WrapClient(fake.NewFakeClient(secret)) - err := fakeClient.Create(&tt.pod) - require.NoError(t, err) - - _, err = doReconcileTransportCertificateSecret( - fakeClient, - scheme.Scheme, - testCluster, - tt.pod, - []corev1.Service{testSvc}, - testCA, - certificates.RotationParams{ - Validity: certificates.DefaultCertValidity, - RotateBefore: certificates.DefaultRotateBefore, - }, - ) - if tt.wantErr != nil { - tt.wantErr(t, err) + got, err := ensureTransportCertificatesSecretExists(tt.args.c, tt.args.scheme, tt.args.owner) + if (err != nil) != tt.wantErr { + t.Errorf("EnsureTransportCertificateSecretExists() error = %v, wantErr %v", err, tt.wantErr) return } - require.NoError(t, err) - - var updatedSecret corev1.Secret - err = fakeClient.Get(k8s.ExtractNamespacedName(&objMeta), &updatedSecret) - require.NoError(t, err) - - var updatedPod corev1.Pod - err = fakeClient.Get(k8s.ExtractNamespacedName(&tt.pod), &updatedPod) - - isUpdated := !reflect.DeepEqual(tt.secret, updatedSecret) - require.Equal(t, tt.wantSecretUpdated, isUpdated, "want secret updated") - - if tt.wantSecretUpdated { - assert.NotEmpty(t, updatedSecret.Data[certificates.CAFileName]) - assert.NotEmpty(t, updatedSecret.Data[certificates.CertFileName]) - if tt.wantCertUpdateAnnotationUpdated { - // check that the pod annotation has been updated - assert.NotEmpty(t, updatedPod.Annotations[annotation.UpdateAnnotation]) - lastPodUpdate, err := time.Parse(time.RFC3339Nano, updatedPod.Annotations[annotation.UpdateAnnotation]) - require.NoError(t, err) - assert.True(t, lastPodUpdate.Add(-5*time.Minute).Before(time.Now())) - } - } else { - assert.Equal(t, tt.secret.Data, updatedSecret.Data) - } + tt.want(t, got) }) } } diff --git a/operators/pkg/controller/elasticsearch/certificates/transport/transport_fixtures_test.go b/operators/pkg/controller/elasticsearch/certificates/transport/transport_fixtures_test.go new file mode 100644 index 0000000000..03a65cdcde --- /dev/null +++ b/operators/pkg/controller/elasticsearch/certificates/transport/transport_fixtures_test.go @@ -0,0 +1,129 @@ +// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +// or more contributor license agreements. Licensed under the Elastic License; +// you may not use this file except in compliance with the Elastic License. + +package transport + +import ( + cryptorand "crypto/rand" + "crypto/rsa" + "crypto/x509" + "crypto/x509/pkix" + "encoding/pem" + + "github.com/elastic/cloud-on-k8s/operators/pkg/apis/elasticsearch/v1alpha1" + "github.com/elastic/cloud-on-k8s/operators/pkg/controller/common/certificates" + corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/client-go/kubernetes/scheme" +) + +// fixtures +var ( + testCA *certificates.CA + testRSAPrivateKey *rsa.PrivateKey + testCSRBytes []byte + testCSR *x509.CertificateRequest + validatedCertificateTemplate *certificates.ValidatedCertificateTemplate + certData []byte + pemCert []byte + testIP = "1.2.3.4" + testES = v1alpha1.Elasticsearch{ + ObjectMeta: metav1.ObjectMeta{Name: "test-es-name", Namespace: "test-namespace"}, + } + testPod = corev1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-pod-name", + }, + Status: corev1.PodStatus{ + PodIP: testIP, + }, + } + testSvc = corev1.Service{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-service", + Namespace: "default", + }, + Spec: corev1.ServiceSpec{ + ClusterIP: "2.2.3.3", + }, + } + additionalCA = [][]byte{[]byte(testAdditionalCA)} +) + +const ( + testPemPrivateKey = ` +-----BEGIN RSA PRIVATE KEY----- +MIICXAIBAAKBgQCxoeCUW5KJxNPxMp+KmCxKLc1Zv9Ny+4CFqcUXVUYH69L3mQ7v +IWrJ9GBfcaA7BPQqUlWxWM+OCEQZH1EZNIuqRMNQVuIGCbz5UQ8w6tS0gcgdeGX7 +J7jgCQ4RK3F/PuCM38QBLaHx988qG8NMc6VKErBjctCXFHQt14lerd5KpQIDAQAB +AoGAYrf6Hbk+mT5AI33k2Jt1kcweodBP7UkExkPxeuQzRVe0KVJw0EkcFhywKpr1 +V5eLMrILWcJnpyHE5slWwtFHBG6a5fLaNtsBBtcAIfqTQ0Vfj5c6SzVaJv0Z5rOd +7gQF6isy3t3w9IF3We9wXQKzT6q5ypPGdm6fciKQ8RnzREkCQQDZwppKATqQ41/R +vhSj90fFifrGE6aVKC1hgSpxGQa4oIdsYYHwMzyhBmWW9Xv/R+fPyr8ZwPxp2c12 +33QwOLPLAkEA0NNUb+z4ebVVHyvSwF5jhfJxigim+s49KuzJ1+A2RaSApGyBZiwS +rWvWkB471POAKUYt5ykIWVZ83zcceQiNTwJBAMJUFQZX5GDqWFc/zwGoKkeR49Yi +MTXIvf7Wmv6E++eFcnT461FlGAUHRV+bQQXGsItR/opIG7mGogIkVXa3E1MCQARX +AAA7eoZ9AEHflUeuLn9QJI/r0hyQQLEtrpwv6rDT1GCWaLII5HJ6NUFVf4TTcqxo +6vdM4QGKTJoO+SaCyP0CQFdpcxSAuzpFcKv0IlJ8XzS/cy+mweCMwyJ1PFEc4FX6 +wg/HcAJWY60xZTJDFN+Qfx8ZQvBEin6c2/h+zZi5IVY= +-----END RSA PRIVATE KEY----- +` + testAdditionalCA = `-----BEGIN CERTIFICATE----- +MIIDKzCCAhOgAwIBAgIRAK7i/u/wsh+i2G0yUygsJckwDQYJKoZIhvcNAQELBQAw +LzEZMBcGA1UECxMQNG1jZnhjbnh0ZjZuNHA5bDESMBAGA1UEAxMJdHJ1c3Qtb25l +MB4XDTE5MDMyMDIwNDg1NloXDTIwMDMxOTIwNDk1NlowLzEZMBcGA1UECxMQNG1j +Znhjbnh0ZjZuNHA5bDESMBAGA1UEAxMJdHJ1c3Qtb25lMIIBIjANBgkqhkiG9w0B +AQEFAAOCAQ8AMIIBCgKCAQEAu/Pws5FcyJw843pNow/Y95rApWAuGanU99DEmeOG +ggtpc3qtDWWKwLZ6cU+av3u82tf0HYSpy0Z2hn3PS2dGGgHPTr/tTGYA5alu1dn5 +CgqQDBVLbkKA1lDcm8w98fRavRw6a0TX5DURqXs+smhdMztQjDNCl3kJ40JbXVAY +x5vhD2pKPCK0VIr9uYK0E/9dvrU0SJGLUlB+CY/DU7c8t22oer2T6fjCZzh3Fhwi +/aOKEwEUoE49orte0N9b1HSKlVePzIUuTTc3UU2ntWi96Uf2FesuAubU11WH4kIL +wRlofty7ewBzVmGte1fKUMjHB3mgb+WYwkEFwjpQL4LhkQIDAQABo0IwQDAOBgNV +HQ8BAf8EBAMCAoQwHQYDVR0lBBYwFAYIKwYBBQUHAwEGCCsGAQUFBwMCMA8GA1Ud +EwEB/wQFMAMBAf8wDQYJKoZIhvcNAQELBQADggEBAI+qczKQgkb5L5dXzn+KW92J +Sq1rrmaYUYLRTtPFH7t42REPYLs4UV0qR+6v/hJljQbAS+Vu3BioLWuxq85NsIjf +OK1KO7D8lwVI9tAetE0tKILqljTjwZpqfZLZ8fFqwzd9IM/WfoI7Z05k8BSL6XdM +FaRfSe/GJ+DR1dCwnWAVKGxAry4JSceVS9OXxYNRTcfQuT5s8h/6X5UaonTbhil7 +91fQFaX8LSuZj23/3kgDTnjPmvj2sz5nODymI4YeTHLjdlMmTufWSJj901ITp7Bw +DMO3GhRADFpMz3vjHA2rHA4AQ6nC8N4lIYTw0AF1VAOC0SDntf6YEgrhRKRFAUY= +-----END CERTIFICATE-----` +) + +func init() { + if err := v1alpha1.AddToScheme(scheme.Scheme); err != nil { + panic(err) + } + + var err error + block, _ := pem.Decode([]byte(testPemPrivateKey)) + if testRSAPrivateKey, err = x509.ParsePKCS1PrivateKey(block.Bytes); err != nil { + panic("Failed to parse private key: " + err.Error()) + } + + if testCA, err = certificates.NewSelfSignedCA(certificates.CABuilderOptions{ + Subject: pkix.Name{CommonName: "test-common-name"}, + PrivateKey: testRSAPrivateKey, + }); err != nil { + panic("Failed to create new self signed CA: " + err.Error()) + } + + testCSRBytes, err = x509.CreateCertificateRequest(cryptorand.Reader, &x509.CertificateRequest{}, testRSAPrivateKey) + if err != nil { + panic("Failed to create CSR:" + err.Error()) + } + testCSR, err = x509.ParseCertificateRequest(testCSRBytes) + + validatedCertificateTemplate, err = createValidatedCertificateTemplate( + testPod, testES, []corev1.Service{testSvc}, testCSR, certificates.DefaultCertValidity) + if err != nil { + panic("Failed to create validated cert template:" + err.Error()) + } + + certData, err = testCA.CreateCertificate(*validatedCertificateTemplate) + if err != nil { + panic("Failed to create cert data:" + err.Error()) + } + + pemCert = certificates.EncodePEMCert(certData, testCA.Cert.Raw) +} diff --git a/operators/pkg/controller/elasticsearch/driver/pods.go b/operators/pkg/controller/elasticsearch/driver/pods.go index 7018161986..2764079072 100644 --- a/operators/pkg/controller/elasticsearch/driver/pods.go +++ b/operators/pkg/controller/elasticsearch/driver/pods.go @@ -7,16 +7,8 @@ package driver import ( "fmt" - corev1 "k8s.io/api/core/v1" - apierrors "k8s.io/apimachinery/pkg/api/errors" - "k8s.io/apimachinery/pkg/runtime" - "sigs.k8s.io/controller-runtime/pkg/controller/controllerutil" - "sigs.k8s.io/controller-runtime/pkg/reconcile" - "github.com/elastic/cloud-on-k8s/operators/pkg/apis/elasticsearch/v1alpha1" "github.com/elastic/cloud-on-k8s/operators/pkg/controller/common/events" - "github.com/elastic/cloud-on-k8s/operators/pkg/controller/common/volume" - "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/certificates/transport" "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/label" "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/name" "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/pod" @@ -24,9 +16,13 @@ import ( pvcutils "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/pvc" esreconcile "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/reconcile" "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/settings" - esvolume "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/volume" "github.com/elastic/cloud-on-k8s/operators/pkg/utils/k8s" "github.com/elastic/cloud-on-k8s/operators/pkg/utils/stringsutil" + corev1 "k8s.io/api/core/v1" + apierrors "k8s.io/apimachinery/pkg/api/errors" + "k8s.io/apimachinery/pkg/runtime" + "sigs.k8s.io/controller-runtime/pkg/controller/controllerutil" + "sigs.k8s.io/controller-runtime/pkg/reconcile" ) // createElasticsearchPod creates the given elasticsearch pod @@ -79,26 +75,6 @@ func createElasticsearchPod( pod = replaceVolume(pod, vol) } - // create the transport certificates secret for this pod because it must exist before we're able to create the - // pod - log.Info("Ensuring that transport certificate secret exists for pod", "pod", pod.Name) - transportCertificatesSecret, err := transport.EnsureTransportCertificateSecretExists( - c, - scheme, - es, - pod, - ) - if err != nil { - return err - } - - // we finally have the transport certificates secret made, so we can inject the secret volume into the pod - transportCertsVolume := volume.NewSecretVolumeWithMountPath( - transportCertificatesSecret.Name, - esvolume.TransportCertificatesSecretVolumeName, - esvolume.TransportCertificatesSecretVolumeMountPath).Volume() - pod = replaceVolume(pod, transportCertsVolume) - // create the config volume for this pod, now that we have a proper name for the pod if err := settings.ReconcileConfig(c, es, pod, podSpecCtx.Config); err != nil { return err @@ -151,6 +127,12 @@ func getOrCreatePVC(pod *corev1.Pod, if err != nil { return nil, err } + + // update the hostname if we defaulted it earlier + if pod.Spec.Hostname == pod.Name { + pod.Spec.Hostname = podName + } + pod.Name = podName log.Info("Reusing PVC", "pod", pod.Name, "pvc", pvc.Name) return pvc, nil diff --git a/operators/pkg/controller/elasticsearch/driver/pods_test.go b/operators/pkg/controller/elasticsearch/driver/pods_test.go index d21f0ff41c..80996b2431 100644 --- a/operators/pkg/controller/elasticsearch/driver/pods_test.go +++ b/operators/pkg/controller/elasticsearch/driver/pods_test.go @@ -151,19 +151,11 @@ func Test_createElasticsearchPod(t *testing.T) { err = client.Get(k8s.ExtractNamespacedName(&pod), &pod) require.NoError(t, err) - // should have a volume for transport certs (existing one replaced) - found := false - for _, v := range pod.Spec.Volumes { - if v.Name == esvolume.TransportCertificatesSecretVolumeName { - require.NotEqual(t, "should-be-replaced", v.Secret.SecretName) - found = true - } - } - require.True(t, found) // should have a volume for config (existing one replaced) - found = false + found := false + configSecretVolumeName := settings.ConfigSecretVolume(pod.Name).Name() for _, v := range pod.Spec.Volumes { - if v.Name == esvolume.TransportCertificatesSecretVolumeName { + if v.Name == configSecretVolumeName { require.NotEqual(t, "should-be-replaced", v.Secret.SecretName) found = true } diff --git a/operators/pkg/controller/elasticsearch/initcontainer/prepare_fs.go b/operators/pkg/controller/elasticsearch/initcontainer/prepare_fs.go index ad714ec899..73496ee39a 100644 --- a/operators/pkg/controller/elasticsearch/initcontainer/prepare_fs.go +++ b/operators/pkg/controller/elasticsearch/initcontainer/prepare_fs.go @@ -5,11 +5,9 @@ package initcontainer import ( - "fmt" "path" - "github.com/elastic/cloud-on-k8s/operators/pkg/controller/common/certificates" - volume "github.com/elastic/cloud-on-k8s/operators/pkg/controller/common/volume" + "github.com/elastic/cloud-on-k8s/operators/pkg/controller/common/volume" "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/name" "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/settings" "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/user" @@ -19,7 +17,7 @@ import ( ) const ( - transportCertificatesVolumeMountPath = "/mnt/elastic-internal/transport-certificates" + initContainerTransportCertificatesVolumeMountPath = "/mnt/elastic-internal/transport-certificates" ) // Volumes that are shared between the prepare-fs init container and the ES container @@ -35,7 +33,7 @@ var ( EsConfigSharedVolume = SharedVolume{ Name: "elastic-internal-elasticsearch-config-local", InitContainerMountPath: "/mnt/elastic-internal/elasticsearch-config-local", - EsContainerMountPath: "/usr/share/elasticsearch/config", + EsContainerMountPath: esvolume.ConfigVolumeMountPath, } // EsPluginsSharedVolume contains the ES plugins/ directory @@ -93,7 +91,7 @@ func NewPrepareFSInitContainer( // will attempt to move all the files under the configuration directory to a different volume, and it should not // be attempting to move files from this secret volume mount (any attempt to do so will be logged as errors). certificatesVolumeMount := transportCertificatesVolume.VolumeMount() - certificatesVolumeMount.MountPath = transportCertificatesVolumeMountPath + certificatesVolumeMount.MountPath = initContainerTransportCertificatesVolumeMountPath scriptsVolume := volume.NewConfigMapVolumeWithMode( name.ScriptsConfigMap(clusterName), @@ -109,6 +107,11 @@ func NewPrepareFSInitContainer( SecurityContext: &corev1.SecurityContext{ Privileged: &privileged, }, + Env: []corev1.EnvVar{ + {Name: settings.EnvPodName, Value: "", ValueFrom: &corev1.EnvVarSource{ + FieldRef: &corev1.ObjectFieldSelector{APIVersion: "v1", FieldPath: "metadata.name"}, + }}, + }, Command: []string{"bash", "-c", path.Join(esvolume.ScriptsVolumeMountPath, PrepareFsScriptConfigKey)}, VolumeMounts: append( PluginVolumes.InitContainerVolumeMounts(), @@ -130,9 +133,17 @@ func RenderPrepareFsScript() (string, error) { esvolume.ElasticsearchDataMountPath, esvolume.ElasticsearchLogsMountPath, }, - TransportCertificatesKeyPath: fmt.Sprintf( - "%s/%s", - transportCertificatesVolumeMountPath, - certificates.KeyFileName), + InitContainerTransportCertificatesSecretVolumeMountPath: initContainerTransportCertificatesVolumeMountPath, + InitContainerNodeTransportCertificatesKeyPath: path.Join( + EsConfigSharedVolume.InitContainerMountPath, + esvolume.NodeTransportCertificatePathSegment, + esvolume.NodeTransportCertificateKeyFile, + ), + InitContainerNodeTransportCertificatesCertPath: path.Join( + EsConfigSharedVolume.InitContainerMountPath, + esvolume.NodeTransportCertificatePathSegment, + esvolume.NodeTransportCertificateCertFile, + ), + TransportCertificatesSecretVolumeMountPath: esvolume.TransportCertificatesSecretVolumeMountPath, }) } diff --git a/operators/pkg/controller/elasticsearch/initcontainer/prepare_fs_script.go b/operators/pkg/controller/elasticsearch/initcontainer/prepare_fs_script.go index f7b56765f5..e2a277636c 100644 --- a/operators/pkg/controller/elasticsearch/initcontainer/prepare_fs_script.go +++ b/operators/pkg/controller/elasticsearch/initcontainer/prepare_fs_script.go @@ -17,8 +17,21 @@ type TemplateParams struct { LinkedFiles LinkedFilesArray // ChownToElasticsearch are paths that need to be chowned to the Elasticsearch user/group. ChownToElasticsearch []string - // TransportCertificatesKeyPath is a path that should exist when the transport certificates have been reconciled. - TransportCertificatesKeyPath string + + // InitContainerTransportCertificatesSecretVolumeMountPath is the path to the volume in the init container that + // contains the transport certificates. + InitContainerTransportCertificatesSecretVolumeMountPath string + + // InitContainerNodeTransportCertificatesKeyPath is the path within the init container where the private key for the + // node transport certificates should be found. + InitContainerNodeTransportCertificatesKeyPath string + // InitContainerNodeTransportCertificatesCertPath is the path within the init container where the certificate for + // the node transport should be found. + InitContainerNodeTransportCertificatesCertPath string + + // TransportCertificatesSecretVolumeMountPath is the path to the volume in the es container that contains the + // transport certificates. + TransportCertificatesSecretVolumeMountPath string } // RenderScriptTemplate renders scriptTemplate using the given TemplateParams @@ -105,15 +118,40 @@ var scriptTemplate = template.Must(template.New("").Parse( # Wait for certs # ###################### + INIT_CONTAINER_LOCAL_KEY_PATH={{ .InitContainerTransportCertificatesSecretVolumeMountPath }}/${POD_NAME}.tls.key + # wait for the transport certificates to show up - echo "waiting for the transport certificates" + echo "waiting for the transport certificates (${INIT_CONTAINER_LOCAL_KEY_PATH})" wait_start=$(date +%s) - while [ ! -f {{ .TransportCertificatesKeyPath }} ] + while [ ! -f ${INIT_CONTAINER_LOCAL_KEY_PATH} ] do sleep 0.2 done echo "wait duration: $(duration wait_start) sec." + ###################### + # Certs linking # + ###################### + + KEY_SOURCE_PATH={{ .TransportCertificatesSecretVolumeMountPath }}/${POD_NAME}.tls.key + KEY_TARGET_PATH={{ .InitContainerNodeTransportCertificatesKeyPath }} + + CERT_SOURCE_PATH={{ .TransportCertificatesSecretVolumeMountPath }}/${POD_NAME}.tls.crt + CERT_TARGET_PATH={{ .InitContainerNodeTransportCertificatesCertPath }} + + # Link individual files from their mount location into the config dir + # to a volume, to be used by the ES container + ln_start=$(date +%s) + + echo "Linking $CERT_SOURCE_PATH to $CERT_TARGET_PATH" + mkdir -p $(dirname $KEY_TARGET_PATH) + ln -sf $KEY_SOURCE_PATH $KEY_TARGET_PATH + echo "Linking $CERT_SOURCE_PATH to $CERT_TARGET_PATH" + mkdir -p $(dirname $CERT_TARGET_PATH) + ln -sf $CERT_SOURCE_PATH $CERT_TARGET_PATH + + echo "Certs linking duration: $(duration $ln_start) sec." + ###################### # End # ###################### diff --git a/operators/pkg/controller/elasticsearch/name/name.go b/operators/pkg/controller/elasticsearch/name/name.go index bf087bd321..73bec15cb0 100644 --- a/operators/pkg/controller/elasticsearch/name/name.go +++ b/operators/pkg/controller/elasticsearch/name/name.go @@ -22,17 +22,17 @@ const ( // podRandomSuffixLength represents the length of the random suffix that is appended in NewPodName. podRandomSuffixLength = 10 - configSecretSuffix = "config" - secureSettingsSecretSuffix = "secure-settings" - certsSecretSuffix = "certs" - httpServiceSuffix = "http" - elasticUserSecretSuffix = "elastic-user" - xpackFileRealmSecretSuffix = "xpack-file-realm" - internalUsersSecretSuffix = "internal-users" - unicastHostsConfigMapSuffix = "unicast-hosts" - licenseSecretSuffix = "license" - defaultPodDisruptionBudget = "default" - scriptsConfigMapSuffix = "scripts" + configSecretSuffix = "config" + secureSettingsSecretSuffix = "secure-settings" + httpServiceSuffix = "http" + elasticUserSecretSuffix = "elastic-user" + xpackFileRealmSecretSuffix = "xpack-file-realm" + internalUsersSecretSuffix = "internal-users" + unicastHostsConfigMapSuffix = "unicast-hosts" + licenseSecretSuffix = "license" + defaultPodDisruptionBudget = "default" + scriptsConfigMapSuffix = "scripts" + transportCertificatesSecretSuffix = "transport-certificates" ) // ESNamer is a Namer that is configured with the defaults for resources related to an ES cluster. @@ -90,8 +90,8 @@ func SecureSettingsSecret(esName string) string { return ESNamer.Suffix(esName, secureSettingsSecretSuffix) } -func TransportCertsSecret(podName string) string { - return esNoDefaultSuffixesNamer.Suffix(podName, certsSecretSuffix) +func TransportCertificatesSecret(esName string) string { + return ESNamer.Suffix(esName, transportCertificatesSecretSuffix) } func HTTPService(esName string) string { diff --git a/operators/pkg/controller/elasticsearch/pod/pod.go b/operators/pkg/controller/elasticsearch/pod/pod.go index fc50b04d6a..0b3c383624 100644 --- a/operators/pkg/controller/elasticsearch/pod/pod.go +++ b/operators/pkg/controller/elasticsearch/pod/pod.go @@ -121,3 +121,12 @@ func PodMapToNames(pods map[string]corev1.Pod) []string { } return names } + +// PodsByName returns a map of pod names to pods +func PodsByName(pods []corev1.Pod) map[string]corev1.Pod { + podMap := make(map[string]corev1.Pod, len(pods)) + for _, pod := range pods { + podMap[pod.Name] = pod + } + return podMap +} diff --git a/operators/pkg/controller/elasticsearch/settings/merged_config.go b/operators/pkg/controller/elasticsearch/settings/merged_config.go index e927e9bd63..57ef74cd21 100644 --- a/operators/pkg/controller/elasticsearch/settings/merged_config.go +++ b/operators/pkg/controller/elasticsearch/settings/merged_config.go @@ -67,10 +67,20 @@ func xpackConfig() *CanonicalConfig { XPackSecurityHttpSslCertificate: path.Join(volume.HTTPCertificatesSecretVolumeMountPath, certificates.CertFileName), // x-pack security transport settings - XPackSecurityTransportSslEnabled: "true", - XPackSecurityTransportSslKey: path.Join(volume.TransportCertificatesSecretVolumeMountPath, certificates.KeyFileName), - XPackSecurityTransportSslCertificate: path.Join(volume.TransportCertificatesSecretVolumeMountPath, certificates.CertFileName), - XPackSecurityTransportSslCertificateAuthorities: []string{path.Join(volume.TransportCertificatesSecretVolumeMountPath, certificates.CAFileName)}, + XPackSecurityTransportSslEnabled: "true", + XPackSecurityTransportSslKey: path.Join( + volume.ConfigVolumeMountPath, + volume.NodeTransportCertificatePathSegment, + volume.NodeTransportCertificateKeyFile, + ), + XPackSecurityTransportSslCertificate: path.Join( + volume.ConfigVolumeMountPath, + volume.NodeTransportCertificatePathSegment, + volume.NodeTransportCertificateCertFile, + ), + XPackSecurityTransportSslCertificateAuthorities: []string{ + path.Join(volume.TransportCertificatesSecretVolumeMountPath, certificates.CAFileName), + }, } return &CanonicalConfig{common.MustCanonicalConfig(cfg)} } diff --git a/operators/pkg/controller/elasticsearch/version/common.go b/operators/pkg/controller/elasticsearch/version/common.go index d144a32fe4..3c1f9548ad 100644 --- a/operators/pkg/controller/elasticsearch/version/common.go +++ b/operators/pkg/controller/elasticsearch/version/common.go @@ -111,6 +111,11 @@ func podSpecContext( esvolume.HTTPCertificatesSecretVolumeName, esvolume.HTTPCertificatesSecretVolumeMountPath, ) + transportCertificatesVolume := volume.NewSecretVolumeWithMountPath( + name.TransportCertificatesSecret(p.Elasticsearch.Name), + esvolume.TransportCertificatesSecretVolumeName, + esvolume.TransportCertificatesSecretVolumeMountPath, + ) // A few secret volumes will be generated based on the pod name. // At this point the (maybe future) pod does not have a name yet: we still want to @@ -119,11 +124,6 @@ func podSpecContext( // and secret refs in Volumes Mounts will be fixed right before pod creation, // if this spec ends up leading to a new pod creation. podNamePlaceholder := "pod-name-placeholder" - transportCertificatesVolume := volume.NewSecretVolumeWithMountPath( - name.TransportCertsSecret(podNamePlaceholder), - esvolume.TransportCertificatesSecretVolumeName, - esvolume.TransportCertificatesSecretVolumeMountPath, - ) configVolume := settings.ConfigSecretVolume(podNamePlaceholder) // append future volumes from PVCs (not resolved to a claim yet) diff --git a/operators/pkg/controller/elasticsearch/volume/names.go b/operators/pkg/controller/elasticsearch/volume/names.go index dd7f27399e..10ca54fb67 100644 --- a/operators/pkg/controller/elasticsearch/volume/names.go +++ b/operators/pkg/controller/elasticsearch/volume/names.go @@ -12,6 +12,11 @@ const ( KeystoreUserSecretMountPath = "/mnt/elastic-internal/keystore-user" KeystoreUserVolumeName = "elastic-internal-keystore-user" + ConfigVolumeMountPath = "/usr/share/elasticsearch/config" + NodeTransportCertificatePathSegment = "node-transport-cert" + NodeTransportCertificateKeyFile = "transport.tls.key" + NodeTransportCertificateCertFile = "transport.tls.crt" + TransportCertificatesSecretVolumeName = "elastic-internal-transport-certificates" TransportCertificatesSecretVolumeMountPath = "/usr/share/elasticsearch/config/transport-certs" diff --git a/operators/pkg/controller/kibana/config/settings_test.go b/operators/pkg/controller/kibana/config/settings_test.go index 2662bf02a4..63e26974c8 100644 --- a/operators/pkg/controller/kibana/config/settings_test.go +++ b/operators/pkg/controller/kibana/config/settings_test.go @@ -11,7 +11,7 @@ import ( "github.com/elastic/cloud-on-k8s/operators/pkg/apis/kibana/v1alpha1" "github.com/elastic/cloud-on-k8s/operators/pkg/controller/common/settings" "github.com/elastic/cloud-on-k8s/operators/pkg/utils/k8s" - "github.com/elastic/go-ucfg" + ucfg "github.com/elastic/go-ucfg" uyaml "github.com/elastic/go-ucfg/yaml" "github.com/go-test/deep" "github.com/stretchr/testify/require" diff --git a/operators/test/e2e/test/elasticsearch/checks_k8s.go b/operators/test/e2e/test/elasticsearch/checks_k8s.go index 82a6e27420..7740b452e1 100644 --- a/operators/test/e2e/test/elasticsearch/checks_k8s.go +++ b/operators/test/e2e/test/elasticsearch/checks_k8s.go @@ -64,7 +64,7 @@ func CheckPodCertificates(b Builder, k *test.K8sClient) test.Step { return err } for _, pod := range pods { - _, _, err := k.GetTransportCert(pod.Name) + _, _, err := k.GetTransportCert(b.Elasticsearch.Name, pod.Name) if err != nil { return err } diff --git a/operators/test/e2e/test/k8s_client.go b/operators/test/e2e/test/k8s_client.go index 24fe4864da..953728c1f9 100644 --- a/operators/test/e2e/test/k8s_client.go +++ b/operators/test/e2e/test/k8s_client.go @@ -19,6 +19,7 @@ import ( "github.com/elastic/cloud-on-k8s/operators/pkg/controller/common/certificates" "github.com/elastic/cloud-on-k8s/operators/pkg/controller/common/certificates/http" "github.com/elastic/cloud-on-k8s/operators/pkg/controller/common/name" + "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/certificates/transport" "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/label" esname "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/name" kblabel "github.com/elastic/cloud-on-k8s/operators/pkg/controller/kibana/label" @@ -232,11 +233,11 @@ func (k *K8sClient) GetCA(ownerName string, caType certificates.CAType) (*certif } // GetTransportCert retrieves the certificate of the CA and the transport certificate -func (k *K8sClient) GetTransportCert(podName string) (caCert, transportCert []*x509.Certificate, err error) { +func (k *K8sClient) GetTransportCert(esName, podName string) (caCert, transportCert []*x509.Certificate, err error) { var secret corev1.Secret key := types.NamespacedName{ Namespace: Namespace, - Name: esname.TransportCertsSecret(podName), + Name: esname.TransportCertificatesSecret(esName), } if err = k.Client.Get(key, &secret); err != nil { return nil, nil, err @@ -249,9 +250,9 @@ func (k *K8sClient) GetTransportCert(podName string) (caCert, transportCert []*x if err != nil { return nil, nil, err } - transportCertBytes, exists := secret.Data[certificates.CertFileName] + transportCertBytes, exists := secret.Data[transport.PodCertFileName(podName)] if !exists || len(transportCertBytes) == 0 { - return nil, nil, fmt.Errorf("no value found for secret %s", certificates.CertFileName) + return nil, nil, fmt.Errorf("no value found for secret %s", transport.PodCertFileName(podName)) } transportCert, err = certificates.ParsePEMCerts(transportCertBytes) if err != nil { From d495796116a956d4d09c773bfd55921b8ddd0702 Mon Sep 17 00:00:00 2001 From: Sebastien Guilloux Date: Wed, 10 Jul 2019 10:04:53 +0200 Subject: [PATCH 02/31] Replace Pod-based reconciliation by StatefulSet-based reconciliation (#1205) Build a set of StatefulSets matching the ES CRD NodeSpec, and reconcile them instead of reconciling pods. This is a very first iteration. Bootstrapping an ES cluster seems to work fine. Upgrading this cluster (and everything related to zen1/zen2) does not. Main bits of this change: - Require the name field in the NodeSpec, to derive the StatefulSet name - Build a StatefulSet per NodeSpec, along with a headless service - Label the StatefulSet with a hash of its template, for comparison purpose - Create a single Config secret per StatefulSet (instead of per pod) - Label the pod template with a hash of the config (before zen2 initial master nodes), to rotate pods on config changes - Reconcile StatefulSet, service and config in the default driver, commenting out most of the existing pod reconciliation logic The code is left in a rather dirty state: I had to make a few changes deep down in functions that build pod spec and config. These changes bubble up pretty high, to changes calculations and the default driver. Most of the default driver code and the mutation package is probably doomed, I chose to keep it there commented instead of deleting it for now, since it I think it may help in the short-term refactoring ("where does that logic live in the new code? here? OK I can remove."). Let me know if it still deserves more cleanup. To be refactored and improved in follow-up commits. --- .../elasticsearch_v1alpha1_elasticsearch.yaml | 4 +- .../all-in-one/cluster_role.template.yaml | 1 + .../global/cluster_role.template.yaml | 1 + .../namespace/cluster_role.template.yaml | 1 + .../config/samples/apm/apm_es_kibana.yaml | 3 +- .../samples/elasticsearch/elasticsearch.yaml | 3 +- .../elasticsearch_local_volume.yaml | 3 +- .../config/samples/kibana/kibana_es.yaml | 3 +- .../v1alpha1/elasticsearch_types.go | 4 +- operators/pkg/controller/common/hash/hash.go | 7 +- .../elasticsearch/driver/default.go | 563 ++++++++------- .../elasticsearch/driver/default_test.go | 656 +++++++++--------- .../controller/elasticsearch/driver/driver.go | 4 +- .../controller/elasticsearch/driver/pods.go | 406 ++++++----- .../elasticsearch/driver/pods_test.go | 178 ----- .../controller/elasticsearch/label/label.go | 39 +- .../elasticsearch/mutation/calculate_test.go | 176 +++-- .../mutation/comparison/pod_test.go | 458 ------------ .../pkg/controller/elasticsearch/name/name.go | 9 +- .../elasticsearch/nodespec/resources.go | 65 ++ .../reconcile/resources_state.go | 2 +- .../reconcile/resources_state_test.go | 21 +- .../elasticsearch/settings/config_volume.go | 41 +- .../settings/config_volume_test.go | 134 ++-- .../controller/elasticsearch/sset/build.go | 103 +++ .../pkg/controller/elasticsearch/sset/list.go | 35 + .../elasticsearch/sset/reconcile.go | 34 + .../elasticsearch/version/common.go | 139 ++-- .../elasticsearch/version/common_test.go | 111 +-- .../version/version6/podspecs.go | 31 +- .../version/version6/podspecs_test.go | 118 +--- .../elasticsearch/version/version6/zen1.go | 9 +- .../version/version6/zen1_test.go | 33 +- .../license_controller_integration_test.go | 1 + 34 files changed, 1408 insertions(+), 1988 deletions(-) delete mode 100644 operators/pkg/controller/elasticsearch/driver/pods_test.go delete mode 100644 operators/pkg/controller/elasticsearch/mutation/comparison/pod_test.go create mode 100644 operators/pkg/controller/elasticsearch/nodespec/resources.go create mode 100644 operators/pkg/controller/elasticsearch/sset/build.go create mode 100644 operators/pkg/controller/elasticsearch/sset/list.go create mode 100644 operators/pkg/controller/elasticsearch/sset/reconcile.go diff --git a/operators/config/crds/elasticsearch_v1alpha1_elasticsearch.yaml b/operators/config/crds/elasticsearch_v1alpha1_elasticsearch.yaml index 377dab42f5..14c27efa02 100644 --- a/operators/config/crds/elasticsearch_v1alpha1_elasticsearch.yaml +++ b/operators/config/crds/elasticsearch_v1alpha1_elasticsearch.yaml @@ -123,7 +123,7 @@ spec: description: Name is a logical name for this set of nodes. Used as a part of the managed Elasticsearch node.name setting. maxLength: 12 - pattern: '[a-zA-Z0-9-]*' + pattern: '[a-zA-Z0-9-]+' type: string nodeCount: description: NodeCount defines how many nodes have this topology @@ -147,6 +147,8 @@ spec: items: type: object type: array + required: + - name type: object type: array podDisruptionBudget: diff --git a/operators/config/operator/all-in-one/cluster_role.template.yaml b/operators/config/operator/all-in-one/cluster_role.template.yaml index e22f9962ce..246a51ec15 100644 --- a/operators/config/operator/all-in-one/cluster_role.template.yaml +++ b/operators/config/operator/all-in-one/cluster_role.template.yaml @@ -29,6 +29,7 @@ rules: - apps resources: - deployments + - statefulsets verbs: - get - list diff --git a/operators/config/operator/global/cluster_role.template.yaml b/operators/config/operator/global/cluster_role.template.yaml index e195fbc1a2..0d4d63c401 100644 --- a/operators/config/operator/global/cluster_role.template.yaml +++ b/operators/config/operator/global/cluster_role.template.yaml @@ -28,6 +28,7 @@ rules: - apps resources: - deployments + - statefulsets verbs: - get - list diff --git a/operators/config/operator/namespace/cluster_role.template.yaml b/operators/config/operator/namespace/cluster_role.template.yaml index b8d15ce35e..6492b7e1e3 100644 --- a/operators/config/operator/namespace/cluster_role.template.yaml +++ b/operators/config/operator/namespace/cluster_role.template.yaml @@ -41,6 +41,7 @@ rules: - apps resources: - deployments + - statefulsets verbs: - get - list diff --git a/operators/config/samples/apm/apm_es_kibana.yaml b/operators/config/samples/apm/apm_es_kibana.yaml index 9c02076b06..371cb4ea15 100644 --- a/operators/config/samples/apm/apm_es_kibana.yaml +++ b/operators/config/samples/apm/apm_es_kibana.yaml @@ -7,7 +7,8 @@ metadata: spec: version: "7.1.0" nodes: - - nodeCount: 3 + - name: all + nodeCount: 3 --- apiVersion: apm.k8s.elastic.co/v1alpha1 kind: ApmServer diff --git a/operators/config/samples/elasticsearch/elasticsearch.yaml b/operators/config/samples/elasticsearch/elasticsearch.yaml index bc2176ad1b..457197d319 100644 --- a/operators/config/samples/elasticsearch/elasticsearch.yaml +++ b/operators/config/samples/elasticsearch/elasticsearch.yaml @@ -6,7 +6,8 @@ metadata: spec: version: "7.1.0" nodes: - - config: + - name: all + config: # most Elasticsearch configuration parameters are possible to set, e.g: node.attr.attr_name: attr_value podTemplate: diff --git a/operators/config/samples/elasticsearch/elasticsearch_local_volume.yaml b/operators/config/samples/elasticsearch/elasticsearch_local_volume.yaml index a3dd16f83f..b16f17010f 100644 --- a/operators/config/samples/elasticsearch/elasticsearch_local_volume.yaml +++ b/operators/config/samples/elasticsearch/elasticsearch_local_volume.yaml @@ -7,7 +7,8 @@ metadata: spec: version: "7.1.0" nodes: - - nodeCount: 3 + - name: all + nodeCount: 3 volumeClaimTemplates: - metadata: name: elasticsearch-data diff --git a/operators/config/samples/kibana/kibana_es.yaml b/operators/config/samples/kibana/kibana_es.yaml index bd41d230ef..db0f8d5498 100644 --- a/operators/config/samples/kibana/kibana_es.yaml +++ b/operators/config/samples/kibana/kibana_es.yaml @@ -6,7 +6,8 @@ metadata: spec: version: "7.1.0" nodes: - - nodeCount: 1 + - name: all + nodeCount: 1 --- apiVersion: kibana.k8s.elastic.co/v1alpha1 kind: Kibana diff --git a/operators/pkg/apis/elasticsearch/v1alpha1/elasticsearch_types.go b/operators/pkg/apis/elasticsearch/v1alpha1/elasticsearch_types.go index b2fc3b88ed..7ff6c3e6d1 100644 --- a/operators/pkg/apis/elasticsearch/v1alpha1/elasticsearch_types.go +++ b/operators/pkg/apis/elasticsearch/v1alpha1/elasticsearch_types.go @@ -65,9 +65,9 @@ func (es ElasticsearchSpec) NodeCount() int32 { // NodeSpec defines a common topology for a set of Elasticsearch nodes type NodeSpec struct { // Name is a logical name for this set of nodes. Used as a part of the managed Elasticsearch node.name setting. - // +kubebuilder:validation:Pattern=[a-zA-Z0-9-]* + // +kubebuilder:validation:Pattern=[a-zA-Z0-9-]+ // +kubebuilder:validation:MaxLength=12 - Name string `json:"name,omitempty"` + Name string `json:"name"` // Config represents Elasticsearch configuration. Config *commonv1alpha1.Config `json:"config,omitempty"` diff --git a/operators/pkg/controller/common/hash/hash.go b/operators/pkg/controller/common/hash/hash.go index b595223738..b0b65f2381 100644 --- a/operators/pkg/controller/common/hash/hash.go +++ b/operators/pkg/controller/common/hash/hash.go @@ -20,11 +20,16 @@ const ( // SetTemplateHashLabel adds a label containing the hash of the given template into the // given labels. This label can then be used for template comparisons. func SetTemplateHashLabel(labels map[string]string, template interface{}) map[string]string { + return SetHashLabel(TemplateHashLabelName, labels, template) +} + +func SetHashLabel(labelName string, labels map[string]string, template interface{}) map[string]string { if labels == nil { labels = map[string]string{} } - labels[TemplateHashLabelName] = HashObject(template) + labels[labelName] = HashObject(template) return labels + } // GetTemplateHashLabel returns the template hash label value if set, or an empty string. diff --git a/operators/pkg/controller/elasticsearch/driver/default.go b/operators/pkg/controller/elasticsearch/driver/default.go index b2ec523ee5..15f076de28 100644 --- a/operators/pkg/controller/elasticsearch/driver/default.go +++ b/operators/pkg/controller/elasticsearch/driver/default.go @@ -8,13 +8,13 @@ import ( "crypto/x509" "fmt" - "github.com/pkg/errors" corev1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/runtime" "k8s.io/apimachinery/pkg/types" controller "sigs.k8s.io/controller-runtime/pkg/reconcile" "github.com/elastic/cloud-on-k8s/operators/pkg/apis/elasticsearch/v1alpha1" + "github.com/elastic/cloud-on-k8s/operators/pkg/controller/common" "github.com/elastic/cloud-on-k8s/operators/pkg/controller/common/events" "github.com/elastic/cloud-on-k8s/operators/pkg/controller/common/reconciler" "github.com/elastic/cloud-on-k8s/operators/pkg/controller/common/version" @@ -25,20 +25,20 @@ import ( "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/configmap" "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/initcontainer" "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/license" - "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/migration" "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/mutation" "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/name" "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/network" + "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/nodespec" "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/observer" "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/pdb" "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/pod" - "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/pvc" "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/reconcile" - "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/restart" "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/services" "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/settings" + "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/sset" "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/user" esversion "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/version" + "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/version/version6" esvolume "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/volume" "github.com/elastic/cloud-on-k8s/operators/pkg/utils/k8s" ) @@ -192,7 +192,7 @@ func (d *defaultDriver) Reconcile( return results.WithError(err) } - podsState := mutation.NewPodsState(*resourcesState, observedState) + //podsState := mutation.NewPodsState(*resourcesState, observedState) if err := d.supportedVersions.VerifySupportsExistingPods(resourcesState.CurrentPods.Pods()); err != nil { return results.WithError(err) @@ -213,58 +213,58 @@ func (d *defaultDriver) Reconcile( } namespacedName := k8s.ExtractNamespacedName(&es) - - // There might be some ongoing creations and deletions our k8s client cache - // hasn't seen yet. In such case, requeue until we are in-sync. - // Otherwise, we could end up re-creating multiple times the same pod with - // different generated names through multiple reconciliation iterations. - if !d.PodsExpectations.Fulfilled(namespacedName) { - log.Info("Pods creations and deletions expectations are not satisfied yet. Requeuing.") - return results.WithResult(defaultRequeue) - } - - changes, err := d.calculateChanges(internalUsers, es, *resourcesState) - if err != nil { - return results.WithError(err) - } - - log.Info( - "Calculated all required changes", - "to_create:", len(changes.ToCreate), - "to_keep:", len(changes.ToKeep), - "to_delete:", len(changes.ToDelete), - ) - - // restart ES processes that need to be restarted before going on with other changes - done, err := restart.HandleESRestarts( - restart.RestartContext{ - Cluster: es, - EventsRecorder: reconcileState.Recorder, - K8sClient: d.Client, - Changes: *changes, - Dialer: d.Dialer, - EsClient: esClient, - }, - ) - if err != nil { - return results.WithError(err) - } - if !done { - log.V(1).Info("Pods restart is not over yet, re-queueing.") - return results.WithResult(defaultRequeue) - } - - // figure out what changes we can perform right now - performableChanges, err := mutation.CalculatePerformableChanges(es.Spec.UpdateStrategy, *changes, podsState) - if err != nil { - return results.WithError(err) - } - - log.Info( - "Calculated performable changes", - "schedule_for_creation_count", len(performableChanges.ToCreate), - "schedule_for_deletion_count", len(performableChanges.ToDelete), - ) + // + //// There might be some ongoing creations and deletions our k8s client cache + //// hasn't seen yet. In such case, requeue until we are in-sync. + //// Otherwise, we could end up re-creating multiple times the same pod with + //// different generated names through multiple reconciliation iterations. + //if !d.PodsExpectations.Fulfilled(namespacedName) { + // log.Info("Pods creations and deletions expectations are not satisfied yet. Requeuing.") + // return results.WithResult(defaultRequeue) + //} + // + //changes, err := d.calculateChanges(internalUsers, es, *resourcesState) + //if err != nil { + // return results.WithError(err) + //} + // + //log.Info( + // "Calculated all required changes", + // "to_create:", len(changes.ToCreate), + // "to_keep:", len(changes.ToKeep), + // "to_delete:", len(changes.ToDelete), + //) + // + //// restart ES processes that need to be restarted before going on with other changes + //done, err := restart.HandleESRestarts( + // restart.RestartContext{ + // Cluster: es, + // EventsRecorder: reconcileState.Recorder, + // K8sClient: d.Client, + // Changes: *changes, + // Dialer: d.Dialer, + // EsClient: esClient, + // }, + //) + //if err != nil { + // return results.WithError(err) + //} + //if !done { + // log.V(1).Info("Pods restart is not over yet, re-queueing.") + // return results.WithResult(defaultRequeue) + //} + // + //// figure out what changes we can perform right now + //performableChanges, err := mutation.CalculatePerformableChanges(es.Spec.UpdateStrategy, *changes, podsState) + //if err != nil { + // return results.WithError(err) + //} + // + //log.Info( + // "Calculated performable changes", + // "schedule_for_creation_count", len(performableChanges.ToCreate), + // "schedule_for_deletion_count", len(performableChanges.ToDelete), + //) results.Apply( "reconcile-cluster-license", @@ -286,65 +286,152 @@ func (d *defaultDriver) Reconcile( return controller.Result{}, err }, ) - - if d.clusterInitialMasterNodesEnforcer != nil { - performableChanges, err = d.clusterInitialMasterNodesEnforcer(*performableChanges, *resourcesState) - if err != nil { - return results.WithError(err) - } - } + // + //if d.clusterInitialMasterNodesEnforcer != nil { + // performableChanges, err = d.clusterInitialMasterNodesEnforcer(*performableChanges, *resourcesState) + // if err != nil { + // return results.WithError(err) + // } + //} // Compute seed hosts based on current masters with a podIP if err := settings.UpdateSeedHostsConfigMap(d.Client, d.Scheme, es, resourcesState.AllPods); err != nil { return results.WithError(err) } - // Call Zen1 setting updater before new masters are created to ensure that they immediately start with the - // correct value for minimum_master_nodes. - // For instance if a 3 master nodes cluster is updated and a grow-and-shrink strategy of one node is applied then - // minimum_master_nodes is increased from 2 to 3 for new and current nodes. - if d.zen1SettingsUpdater != nil { - requeue, err := d.zen1SettingsUpdater( + // TODO: this is a mess, refactor and unit test correctly + podTemplateSpecBuilder := func(nodeSpec v1alpha1.NodeSpec, cfg settings.CanonicalConfig) (corev1.PodTemplateSpec, error) { + return esversion.BuildPodTemplateSpec( es, - d.Client, - esClient, - resourcesState.AllPods, - performableChanges, - reconcileState, + nodeSpec, + pod.NewPodSpecParams{ + ProbeUser: internalUsers.ProbeUser.Auth(), + KeystoreUser: internalUsers.KeystoreUser.Auth(), + UnicastHostsVolume: volume.NewConfigMapVolume( + name.UnicastHostsConfigMap(es.Name), esvolume.UnicastHostsVolumeName, esvolume.UnicastHostsVolumeMountPath, + ), + UsersSecretVolume: volume.NewSecretVolumeWithMountPath( + user.XPackFileRealmSecretName(es.Name), + esvolume.XPackFileRealmVolumeName, + esvolume.XPackFileRealmVolumeMountPath, + ), + }, + cfg, + version6.NewEnvironmentVars, + initcontainer.NewInitContainers, + d.OperatorImage, ) + } + + actualStatefulSets, err := sset.RetrieveActualStatefulSets(d.Client, namespacedName) + if err != nil { + return results.WithError(err) + } + + nodeSpecResources, err := nodespec.BuildExpectedResources(es, podTemplateSpecBuilder) + if err != nil { + return results.WithError(err) + } + // TODO: handle zen2 initial master nodes more cleanly + // should be empty once cluster is bootstraped + var initialMasters []string + // TODO: refactor/move + for _, res := range nodeSpecResources { + cfg, err := res.Config.Unpack() if err != nil { return results.WithError(err) } - - if requeue { - results.WithResult(defaultRequeue) + if cfg.Node.Master { + for i := 0; i < int(*res.StatefulSet.Spec.Replicas); i++ { + initialMasters = append(initialMasters, fmt.Sprintf("%s-%d", res.StatefulSet.Name, i)) + } } } - - // List the orphaned PVCs before the Pods are created. - // If there are some orphaned PVCs they will be adopted and remove sequentially from the list when Pods are created. - orphanedPVCs, err := pvc.FindOrphanedVolumeClaims(d.Client, es) - if err != nil { - return results.WithError(err) + for i := range nodeSpecResources { + if err := nodeSpecResources[i].Config.SetStrings(settings.ClusterInitialMasterNodes, initialMasters...); err != nil { + return results.WithError(err) + } } - for _, change := range performableChanges.ToCreate { - d.PodsExpectations.ExpectCreation(namespacedName) - if err := createElasticsearchPod( - d.Client, - d.Scheme, - es, - reconcileState, - change.Pod, - change.PodSpecCtx, - orphanedPVCs, - ); err != nil { - // pod was not created, cancel our expectation by marking it observed - d.PodsExpectations.CreationObserved(namespacedName) + // create or update all expected ssets + // TODO: safe upgrades + for _, nodeSpec := range nodeSpecResources { + if err := settings.ReconcileConfig(d.Client, es, nodeSpec.StatefulSet.Name, nodeSpec.Config); err != nil { + return results.WithError(err) + } + if _, err := common.ReconcileService(d.Client, d.Scheme, &nodeSpec.HeadlessService, &es); err != nil { + return results.WithError(err) + } + if err := sset.ReconcileStatefulSet(d.Client, d.Scheme, es, nodeSpec.StatefulSet); err != nil { return results.WithError(err) } } + + // delete all unexpected ssets + for _, actual := range actualStatefulSets { + if _, shouldExist := nodeSpecResources.StatefulSets().GetByName(actual.Name); !shouldExist { + // TODO: safe node removal + if err := d.Client.Delete(&actual); err != nil { + return results.WithError(err) + } + } + } + + // TODO: + // - safe sset replacement + // - safe node removal (data migration) + // - safe node upgrade (rollingUpdate.Partition + shards allocation) + // - change budget + // - zen1, zen2 + + // + //// Call Zen1 setting updater before new masters are created to ensure that they immediately start with the + //// correct value for minimum_master_nodes. + //// For instance if a 3 master nodes cluster is updated and a grow-and-shrink strategy of one node is applied then + //// minimum_master_nodes is increased from 2 to 3 for new and current nodes. + //if d.zen1SettingsUpdater != nil { + // requeue, err := d.zen1SettingsUpdater( + // es, + // d.Client, + // esClient, + // resourcesState.AllPods, + // performableChanges, + // reconcileState, + // ) + // + // if err != nil { + // return results.WithError(err) + // } + // + // if requeue { + // results.WithResult(defaultRequeue) + // } + //} + // + //// List the orphaned PVCs before the Pods are created. + //// If there are some orphaned PVCs they will be adopted and remove sequentially from the list when Pods are created. + //orphanedPVCs, err := pvc.FindOrphanedVolumeClaims(d.Client, es) + //if err != nil { + // return results.WithError(err) + //} + // + //for _, change := range performableChanges.ToCreate { + // d.PodsExpectations.ExpectCreation(namespacedName) + // if err := createElasticsearchPod( + // d.Client, + // d.Scheme, + // es, + // reconcileState, + // change.Pod, + // change.PodSpecCtx, + // orphanedPVCs, + // ); err != nil { + // // pod was not created, cancel our expectation by marking it observed + // d.PodsExpectations.CreationObserved(namespacedName) + // return results.WithError(err) + // } + //} // passed this point, any pods resource listing should check expectations first if !esReachable { @@ -358,117 +445,118 @@ func (d *defaultDriver) Reconcile( return results.WithResult(defaultRequeue) } - - if d.zen2SettingsUpdater != nil { - // TODO: would prefer to do this after MigrateData iff there's no changes? or is that an premature optimization? - if err := d.zen2SettingsUpdater( - esClient, - *min, - *changes, - *performableChanges, - ); err != nil { - return results.WithResult(defaultRequeue).WithError(err) - } - } - - if !changes.HasChanges() { - // Current state matches expected state - reconcileState.UpdateElasticsearchOperational(*resourcesState, observedState) - return results - } - - // Start migrating data away from all pods to be deleted - leavingNodeNames := pod.PodListToNames(performableChanges.ToDelete.Pods()) - if err = migration.MigrateData(esClient, leavingNodeNames); err != nil { - return results.WithError(errors.Wrap(err, "error during migrate data")) - } - - // Shrink clusters by deleting deprecated pods - if err = d.attemptPodsDeletion( - performableChanges, - reconcileState, - resourcesState, - observedState, - results, - esClient, - es, - ); err != nil { - return results.WithError(err) - } - // past this point, any pods resource listing should check expectations first - - if changes.HasChanges() && !performableChanges.HasChanges() { - // if there are changes we'd like to perform, but none that were performable, we try again later - results.WithResult(defaultRequeue) - } + // + //if d.zen2SettingsUpdater != nil { + // // TODO: would prefer to do this after MigrateData iff there's no changes? or is that an premature optimization? + // if err := d.zen2SettingsUpdater( + // esClient, + // *min, + // *changes, + // *performableChanges, + // ); err != nil { + // return results.WithResult(defaultRequeue).WithError(err) + // } + //} + // + //if !changes.HasChanges() { + // // Current state matches expected state + // reconcileState.UpdateElasticsearchOperational(*resourcesState, observedState) + // return results + //} + // + //// Start migrating data away from all pods to be deleted + //leavingNodeNames := pod.PodListToNames(performableChanges.ToDelete.Pods()) + //if err = migration.MigrateData(esClient, leavingNodeNames); err != nil { + // return results.WithError(errors.Wrap(err, "error during migrate data")) + //} + // + //// Shrink clusters by deleting deprecated pods + //if err = d.attemptPodsDeletion( + // performableChanges, + // reconcileState, + // resourcesState, + // observedState, + // results, + // esClient, + // es, + //); err != nil { + // return results.WithError(err) + //} + //// past this point, any pods resource listing should check expectations first + // + //if changes.HasChanges() && !performableChanges.HasChanges() { + // // if there are changes we'd like to perform, but none that were performable, we try again later + // results.WithResult(defaultRequeue) + //} reconcileState.UpdateElasticsearchState(*resourcesState, observedState) return results } -// attemptPodsDeletion deletes a list of pods after checking there is no migrating data for each of them -func (d *defaultDriver) attemptPodsDeletion( - changes *mutation.PerformableChanges, - reconcileState *reconcile.State, - resourcesState *reconcile.ResourcesState, - observedState observer.State, - results *reconciler.Results, - esClient esclient.Client, - elasticsearch v1alpha1.Elasticsearch, -) error { - newState := make([]corev1.Pod, len(resourcesState.CurrentPods)) - copy(newState, resourcesState.CurrentPods.Pods()) - for _, pod := range changes.ToDelete.Pods() { - newState = removePodFromList(newState, pod) - preDelete := func() error { - if d.zen1SettingsUpdater != nil { - requeue, err := d.zen1SettingsUpdater( - elasticsearch, - d.Client, - esClient, - newState, - changes, - reconcileState) - - if err != nil { - return err - } - - if requeue { - results.WithResult(defaultRequeue) - } - } - return nil - } - - // do not delete a pod or expect a deletion if a data migration is in progress - isMigratingData := migration.IsMigratingData(observedState, pod, changes.ToDelete.Pods()) - if isMigratingData { - log.Info("Skipping deletion because of migrating data", "pod", pod.Name) - reconcileState.UpdateElasticsearchMigrating(*resourcesState, observedState) - results.WithResult(defaultRequeue) - continue - } - - namespacedName := k8s.ExtractNamespacedName(&elasticsearch) - d.PodsExpectations.ExpectDeletion(namespacedName) - result, err := deleteElasticsearchPod( - d.Client, - reconcileState, - *resourcesState, - pod, - preDelete, - ) - if err != nil { - // pod was not deleted, cancel our expectation by marking it observed - d.PodsExpectations.DeletionObserved(namespacedName) - return err - } - results.WithResult(result) - } - return nil -} +// +//// attemptPodsDeletion deletes a list of pods after checking there is no migrating data for each of them +//func (d *defaultDriver) attemptPodsDeletion( +// changes *mutation.PerformableChanges, +// reconcileState *reconcile.State, +// resourcesState *reconcile.ResourcesState, +// observedState observer.State, +// results *reconciler.Results, +// esClient esclient.Client, +// elasticsearch v1alpha1.Elasticsearch, +//) error { +// newState := make([]corev1.Pod, len(resourcesState.CurrentPods)) +// copy(newState, resourcesState.CurrentPods.Pods()) +// for _, pod := range changes.ToDelete.Pods() { +// newState = removePodFromList(newState, pod) +// preDelete := func() error { +// if d.zen1SettingsUpdater != nil { +// requeue, err := d.zen1SettingsUpdater( +// elasticsearch, +// d.Client, +// esClient, +// newState, +// changes, +// reconcileState) +// +// if err != nil { +// return err +// } +// +// if requeue { +// results.WithResult(defaultRequeue) +// } +// } +// return nil +// } +// +// // do not delete a pod or expect a deletion if a data migration is in progress +// isMigratingData := migration.IsMigratingData(observedState, pod, changes.ToDelete.Pods()) +// if isMigratingData { +// log.Info("Skipping deletion because of migrating data", "pod", pod.Name) +// reconcileState.UpdateElasticsearchMigrating(*resourcesState, observedState) +// results.WithResult(defaultRequeue) +// continue +// } +// +// namespacedName := k8s.ExtractNamespacedName(&elasticsearch) +// d.PodsExpectations.ExpectDeletion(namespacedName) +// result, err := deleteElasticsearchPod( +// d.Client, +// reconcileState, +// *resourcesState, +// pod, +// preDelete, +// ) +// if err != nil { +// // pod was not deleted, cancel our expectation by marking it observed +// d.PodsExpectations.DeletionObserved(namespacedName) +// return err +// } +// results.WithResult(result) +// } +// return nil +//} // removePodFromList removes a single pod from the list, matching by pod name. func removePodFromList(pods []corev1.Pod, pod corev1.Pod) []corev1.Pod { @@ -480,41 +568,42 @@ func removePodFromList(pods []corev1.Pod, pod corev1.Pod) []corev1.Pod { return pods } -// calculateChanges calculates the changes we'd need to perform to go from the current cluster configuration to the -// desired one. -func (d *defaultDriver) calculateChanges( - internalUsers *user.InternalUsers, - es v1alpha1.Elasticsearch, - resourcesState reconcile.ResourcesState, -) (*mutation.Changes, error) { - expectedPodSpecCtxs, err := d.expectedPodsAndResourcesResolver( - es, - pod.NewPodSpecParams{ - ProbeUser: internalUsers.ProbeUser.Auth(), - KeystoreUser: internalUsers.KeystoreUser.Auth(), - UnicastHostsVolume: volume.NewConfigMapVolume( - name.UnicastHostsConfigMap(es.Name), esvolume.UnicastHostsVolumeName, esvolume.UnicastHostsVolumeMountPath, - ), - }, - d.OperatorImage, - ) - if err != nil { - return nil, err - } - - changes, err := mutation.CalculateChanges( - es, - expectedPodSpecCtxs, - resourcesState, - func(ctx pod.PodSpecContext) corev1.Pod { - return esversion.NewPod(es, ctx) - }, - ) - if err != nil { - return nil, err - } - return &changes, nil -} +// +//// calculateChanges calculates the changes we'd need to perform to go from the current cluster configuration to the +//// desired one. +//func (d *defaultDriver) calculateChanges( +// internalUsers *user.InternalUsers, +// es v1alpha1.Elasticsearch, +// resourcesState reconcile.ResourcesState, +//) (*mutation.Changes, error) { +// expectedPodSpecCtxs, err := d.expectedPodsAndResourcesResolver( +// es, +// pod.NewPodSpecParams{ +// ProbeUser: internalUsers.ProbeUser.Auth(), +// KeystoreUser: internalUsers.KeystoreUser.Auth(), +// UnicastHostsVolume: volume.NewConfigMapVolume( +// name.UnicastHostsConfigMap(es.Name), esvolume.UnicastHostsVolumeName, esvolume.UnicastHostsVolumeMountPath, +// ), +// }, +// d.OperatorImage, +// ) +// if err != nil { +// return nil, err +// } +// +// changes, err := mutation.CalculateChanges( +// es, +// expectedPodSpecCtxs, +// resourcesState, +// func(ctx pod.PodSpecContext) corev1.Pod { +// return esversion.NewPod(es, ctx) +// }, +// ) +// if err != nil { +// return nil, err +// } +// return &changes, nil +//} // newElasticsearchClient creates a new Elasticsearch HTTP client for this cluster using the provided user func (d *defaultDriver) newElasticsearchClient(service corev1.Service, user user.User, v version.Version, caCerts []*x509.Certificate) esclient.Client { diff --git a/operators/pkg/controller/elasticsearch/driver/default_test.go b/operators/pkg/controller/elasticsearch/driver/default_test.go index b9e9a3b632..b8ee928c56 100644 --- a/operators/pkg/controller/elasticsearch/driver/default_test.go +++ b/operators/pkg/controller/elasticsearch/driver/default_test.go @@ -4,341 +4,323 @@ package driver -import ( - "encoding/json" - "testing" +// +//const ( +// ClusterStateSample = ` +//{ +// "cluster_name": "elasticsearch-sample", +// "compressed_size_in_bytes": 10281, +// "cluster_uuid": "fW1CurdKQpa-vsEYgTwkvg", +// "version": 28, +// "state_uuid": "0_7Tkm3ERdeB5eOqEgdOcA", +// "master_node": "EizpW8QWRty_T1nJpr-dNQ", +// "nodes": { +// "EizpW8QWRty_T1nJpr-dNQ": { +// "name": "elasticsearch-sample-es-fnsgkkdl85", +// "ephemeral_id": "hd8VlWVdTlyCriXKDW-5kg", +// "transport_address": "172.17.0.10:9300", +// "attributes": { +// "xpack.installed": "true" +// } +// }, +// "NRqCLTmhTLuSxzlWcTae3A": { +// "name": "elasticsearch-sample-es-79gc6p57rs", +// "ephemeral_id": "VHAy3TOxTby3fNaPpMgfkg", +// "transport_address": "172.17.0.9:9300", +// "attributes": { +// "xpack.installed": "true" +// } +// }, +// "q--ANfDnTKW2WS9pEBuLWQ": { +// "name": "elasticsearch-sample-es-jfpqbt2s4q", +// "ephemeral_id": "USglep8YTW-4vZ9M7PyRqA", +// "transport_address": "172.17.0.7:9300", +// "attributes": { +// "xpack.installed": "true" +// } +// } +// }, +// "routing_table": { +// "indices": { +// "shakespeare": { +// "shards": { +// "0": [ +// { +// "state": "STARTED", +// "primary": true, +// "node": "q--ANfDnTKW2WS9pEBuLWQ", +// "relocating_node": null, +// "shard": 0, +// "index": "shakespeare", +// "allocation_id": { +// "id": "TtAx_PMwRCmanPR7XddWmg" +// } +// }, +// { +// "state": "STARTED", +// "primary": false, +// "node": "EizpW8QWRty_T1nJpr-dNQ", +// "relocating_node": null, +// "shard": 0, +// "index": "shakespeare", +// "allocation_id": { +// "id": "QddiDZTHTuStDTIKSOIk5A" +// } +// } +// ], +// "1": [ +// { +// "state": "STARTED", +// "primary": true, +// "node": "NRqCLTmhTLuSxzlWcTae3A", +// "relocating_node": null, +// "shard": 1, +// "index": "shakespeare", +// "allocation_id": { +// "id": "IzFuExmARziQWcX8RlaZdg" +// } +// }, +// { +// "state": "STARTED", +// "primary": false, +// "node": "EizpW8QWRty_T1nJpr-dNQ", +// "relocating_node": null, +// "shard": 1, +// "index": "shakespeare", +// "allocation_id": { +// "id": "XqIv4y1rQf6aL5C63Xsbhg" +// } +// } +// ], +// "2": [ +// { +// "state": "STARTED", +// "primary": false, +// "node": "q--ANfDnTKW2WS9pEBuLWQ", +// "relocating_node": null, +// "shard": 2, +// "index": "shakespeare", +// "allocation_id": { +// "id": "XCAywOULRf66CR2xugkIpg" +// } +// }, +// { +// "state": "STARTED", +// "primary": true, +// "node": "EizpW8QWRty_T1nJpr-dNQ", +// "relocating_node": null, +// "shard": 2, +// "index": "shakespeare", +// "allocation_id": { +// "id": "yNuj-Rw7QkC74opnoRQIqQ" +// } +// } +// ], +// "3": [ +// { +// "state": "STARTED", +// "primary": true, +// "node": "q--ANfDnTKW2WS9pEBuLWQ", +// "relocating_node": null, +// "shard": 3, +// "index": "shakespeare", +// "allocation_id": { +// "id": "foOkK0oWTAaFTg-M41sMgQ" +// } +// }, +// { +// "state": "STARTED", +// "primary": false, +// "node": "NRqCLTmhTLuSxzlWcTae3A", +// "relocating_node": null, +// "shard": 3, +// "index": "shakespeare", +// "allocation_id": { +// "id": "MdjjvB9KTfu4gs_skXDyXg" +// } +// } +// ], +// "4": [ +// { +// "state": "STARTED", +// "primary": false, +// "node": "q--ANfDnTKW2WS9pEBuLWQ", +// "relocating_node": null, +// "shard": 4, +// "index": "shakespeare", +// "allocation_id": { +// "id": "exBumbxRT6KY7LVmGOSIZA" +// } +// }, +// { +// "state": "STARTED", +// "primary": true, +// "node": "NRqCLTmhTLuSxzlWcTae3A", +// "relocating_node": null, +// "shard": 4, +// "index": "shakespeare", +// "allocation_id": { +// "id": "pUhEb1k5TC24EKD-OjS7Iw" +// } +// } +// ] +// } +// } +// } +// } +//} +//` +//) +// +//func newPod(name, namespace string) corev1.Pod { +// pod := corev1.Pod{ +// ObjectMeta: metav1.ObjectMeta{ +// Name: name, +// Namespace: namespace, +// }, +// } +// return pod +//} - "github.com/elastic/cloud-on-k8s/operators/pkg/apis/elasticsearch/v1alpha1" - "github.com/elastic/cloud-on-k8s/operators/pkg/controller/common/events" - "github.com/elastic/cloud-on-k8s/operators/pkg/controller/common/reconciler" - esclient "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/client" - "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/mutation" - "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/observer" - "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/pod" - "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/reconcile" - "github.com/elastic/cloud-on-k8s/operators/pkg/utils/k8s" - "github.com/stretchr/testify/assert" - corev1 "k8s.io/api/core/v1" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "sigs.k8s.io/controller-runtime/pkg/client/fake" - k8sreconcile "sigs.k8s.io/controller-runtime/pkg/reconcile" -) - -const ( - ClusterStateSample = ` -{ - "cluster_name": "elasticsearch-sample", - "compressed_size_in_bytes": 10281, - "cluster_uuid": "fW1CurdKQpa-vsEYgTwkvg", - "version": 28, - "state_uuid": "0_7Tkm3ERdeB5eOqEgdOcA", - "master_node": "EizpW8QWRty_T1nJpr-dNQ", - "nodes": { - "EizpW8QWRty_T1nJpr-dNQ": { - "name": "elasticsearch-sample-es-fnsgkkdl85", - "ephemeral_id": "hd8VlWVdTlyCriXKDW-5kg", - "transport_address": "172.17.0.10:9300", - "attributes": { - "xpack.installed": "true" - } - }, - "NRqCLTmhTLuSxzlWcTae3A": { - "name": "elasticsearch-sample-es-79gc6p57rs", - "ephemeral_id": "VHAy3TOxTby3fNaPpMgfkg", - "transport_address": "172.17.0.9:9300", - "attributes": { - "xpack.installed": "true" - } - }, - "q--ANfDnTKW2WS9pEBuLWQ": { - "name": "elasticsearch-sample-es-jfpqbt2s4q", - "ephemeral_id": "USglep8YTW-4vZ9M7PyRqA", - "transport_address": "172.17.0.7:9300", - "attributes": { - "xpack.installed": "true" - } - } - }, - "routing_table": { - "indices": { - "shakespeare": { - "shards": { - "0": [ - { - "state": "STARTED", - "primary": true, - "node": "q--ANfDnTKW2WS9pEBuLWQ", - "relocating_node": null, - "shard": 0, - "index": "shakespeare", - "allocation_id": { - "id": "TtAx_PMwRCmanPR7XddWmg" - } - }, - { - "state": "STARTED", - "primary": false, - "node": "EizpW8QWRty_T1nJpr-dNQ", - "relocating_node": null, - "shard": 0, - "index": "shakespeare", - "allocation_id": { - "id": "QddiDZTHTuStDTIKSOIk5A" - } - } - ], - "1": [ - { - "state": "STARTED", - "primary": true, - "node": "NRqCLTmhTLuSxzlWcTae3A", - "relocating_node": null, - "shard": 1, - "index": "shakespeare", - "allocation_id": { - "id": "IzFuExmARziQWcX8RlaZdg" - } - }, - { - "state": "STARTED", - "primary": false, - "node": "EizpW8QWRty_T1nJpr-dNQ", - "relocating_node": null, - "shard": 1, - "index": "shakespeare", - "allocation_id": { - "id": "XqIv4y1rQf6aL5C63Xsbhg" - } - } - ], - "2": [ - { - "state": "STARTED", - "primary": false, - "node": "q--ANfDnTKW2WS9pEBuLWQ", - "relocating_node": null, - "shard": 2, - "index": "shakespeare", - "allocation_id": { - "id": "XCAywOULRf66CR2xugkIpg" - } - }, - { - "state": "STARTED", - "primary": true, - "node": "EizpW8QWRty_T1nJpr-dNQ", - "relocating_node": null, - "shard": 2, - "index": "shakespeare", - "allocation_id": { - "id": "yNuj-Rw7QkC74opnoRQIqQ" - } - } - ], - "3": [ - { - "state": "STARTED", - "primary": true, - "node": "q--ANfDnTKW2WS9pEBuLWQ", - "relocating_node": null, - "shard": 3, - "index": "shakespeare", - "allocation_id": { - "id": "foOkK0oWTAaFTg-M41sMgQ" - } - }, - { - "state": "STARTED", - "primary": false, - "node": "NRqCLTmhTLuSxzlWcTae3A", - "relocating_node": null, - "shard": 3, - "index": "shakespeare", - "allocation_id": { - "id": "MdjjvB9KTfu4gs_skXDyXg" - } - } - ], - "4": [ - { - "state": "STARTED", - "primary": false, - "node": "q--ANfDnTKW2WS9pEBuLWQ", - "relocating_node": null, - "shard": 4, - "index": "shakespeare", - "allocation_id": { - "id": "exBumbxRT6KY7LVmGOSIZA" - } - }, - { - "state": "STARTED", - "primary": true, - "node": "NRqCLTmhTLuSxzlWcTae3A", - "relocating_node": null, - "shard": 4, - "index": "shakespeare", - "allocation_id": { - "id": "pUhEb1k5TC24EKD-OjS7Iw" - } - } - ] - } - } - } - } -} -` -) - -func newPod(name, namespace string) corev1.Pod { - pod := corev1.Pod{ - ObjectMeta: metav1.ObjectMeta{ - Name: name, - Namespace: namespace, - }, - } - return pod -} - -func Test_defaultDriver_attemptPodsDeletion(t *testing.T) { - var clusterState esclient.ClusterState - b := []byte(ClusterStateSample) - err := json.Unmarshal(b, &clusterState) - if err != nil { - t.Error(err) - } - pod1 := newPod("elasticsearch-sample-es-79gc6p57rs", "default") - pod2 := newPod("elasticsearch-sample-es-fnsgkkdl85", "default") - pod3 := newPod("elasticsearch-sample-es-jfpqbt2s4q", "default") - pod4 := newPod("elasticsearch-sample-es-nope", "default") - - expectedResult1 := reconciler.Results{} - expectedResult1.WithResult(defaultRequeue).WithResult(defaultRequeue) - - expectedEmptyResult := reconciler.Results{} - expectedEmptyResult.WithResult(k8sreconcile.Result{}) - - elasticsearch := v1alpha1.Elasticsearch{ - ObjectMeta: metav1.ObjectMeta{ - Namespace: "default", - Name: "elasticsearch-sample", - }, - } - - type fields struct { - Options Options - } - - type args struct { - ToDelete *mutation.PerformableChanges - reconcileState *reconcile.State - resourcesState *reconcile.ResourcesState - observedState observer.State - results *reconciler.Results - esClient esclient.Client - elasticsearch v1alpha1.Elasticsearch - } - - type want struct { - results *reconciler.Results - fulfilledExpectation bool - } - - tests := []struct { - name string - fields fields - args args - wantErr bool - want want - }{ - { - name: "Do not delete a pod with migrating data", - args: args{ - elasticsearch: elasticsearch, - ToDelete: &mutation.PerformableChanges{ - Changes: mutation.Changes{ - ToDelete: pod.PodsWithConfig{ - pod.PodWithConfig{Pod: pod1}, - pod.PodWithConfig{Pod: pod2}, - }, - }, - }, - resourcesState: &reconcile.ResourcesState{ - CurrentPods: pod.PodsWithConfig{ - {Pod: pod1}, - {Pod: pod2}, - {Pod: pod3}, - }, - }, - observedState: observer.State{ - ClusterState: &clusterState, - }, - reconcileState: &reconcile.State{Recorder: events.NewRecorder()}, - results: &reconciler.Results{}, - }, - fields: fields{ - Options: Options{ - PodsExpectations: reconciler.NewExpectations(), - }, - }, - wantErr: false, - want: want{ - results: &expectedResult1, - fulfilledExpectation: true, // pod deletion is delayed, do not expect anything - }, - }, - { - name: "Delete a pod with no data", - args: args{ - elasticsearch: elasticsearch, - ToDelete: &mutation.PerformableChanges{ - Changes: mutation.Changes{ - ToDelete: pod.PodsWithConfig{ - pod.PodWithConfig{Pod: pod4}, - }, - }, - }, - resourcesState: &reconcile.ResourcesState{ - CurrentPods: pod.PodsWithConfig{ - {Pod: pod1}, - {Pod: pod2}, - {Pod: pod3}, - {Pod: pod4}, - }, - }, - observedState: observer.State{ - ClusterState: &clusterState, - }, - reconcileState: &reconcile.State{Recorder: events.NewRecorder()}, - results: &reconciler.Results{}, - }, - fields: fields{ - Options: Options{ - PodsExpectations: reconciler.NewExpectations(), - Client: k8s.WrapClient(fake.NewFakeClient()), - }, - }, - wantErr: false, - want: want{ - results: &expectedEmptyResult, - fulfilledExpectation: false, // pod4 is expected to be deleted - }, - }, - } - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - d := &defaultDriver{ - Options: tt.fields.Options, - } - if err := d.attemptPodsDeletion( - tt.args.ToDelete, tt.args.reconcileState, tt.args.resourcesState, - tt.args.observedState, tt.args.results, tt.args.esClient, tt.args.elasticsearch); (err != nil) != tt.wantErr { - t.Errorf("defaultDriver.attemptPodsDeletion() error = %v, wantErr %v", err, tt.wantErr) - } - assert.EqualValues(t, tt.want.results, tt.args.results) - nn := k8s.ExtractNamespacedName(&tt.args.elasticsearch) - assert.EqualValues(t, tt.want.fulfilledExpectation, tt.fields.Options.PodsExpectations.Fulfilled(nn)) - }) - } -} +// +//func Test_defaultDriver_attemptPodsDeletion(t *testing.T) { +// var clusterState esclient.ClusterState +// b := []byte(ClusterStateSample) +// err := json.Unmarshal(b, &clusterState) +// if err != nil { +// t.Error(err) +// } +// pod1 := newPod("elasticsearch-sample-es-79gc6p57rs", "default") +// pod2 := newPod("elasticsearch-sample-es-fnsgkkdl85", "default") +// pod3 := newPod("elasticsearch-sample-es-jfpqbt2s4q", "default") +// pod4 := newPod("elasticsearch-sample-es-nope", "default") +// +// expectedResult1 := reconciler.Results{} +// expectedResult1.WithResult(defaultRequeue).WithResult(defaultRequeue) +// +// expectedEmptyResult := reconciler.Results{} +// expectedEmptyResult.WithResult(k8sreconcile.Result{}) +// +// elasticsearch := v1alpha1.Elasticsearch{ +// ObjectMeta: metav1.ObjectMeta{ +// Namespace: "default", +// Name: "elasticsearch-sample", +// }, +// } +// +// type fields struct { +// Options Options +// } +// +// type args struct { +// ToDelete *mutation.PerformableChanges +// reconcileState *reconcile.State +// resourcesState *reconcile.ResourcesState +// observedState observer.State +// results *reconciler.Results +// esClient esclient.Client +// elasticsearch v1alpha1.Elasticsearch +// } +// +// type want struct { +// results *reconciler.Results +// fulfilledExpectation bool +// } +// +// tests := []struct { +// name string +// fields fields +// args args +// wantErr bool +// want want +// }{ +// { +// name: "Do not delete a pod with migrating data", +// args: args{ +// elasticsearch: elasticsearch, +// ToDelete: &mutation.PerformableChanges{ +// Changes: mutation.Changes{ +// ToDelete: pod.PodsWithConfig{ +// pod.PodWithConfig{Pod: pod1}, +// pod.PodWithConfig{Pod: pod2}, +// }, +// }, +// }, +// resourcesState: &reconcile.ResourcesState{ +// CurrentPods: pod.PodsWithConfig{ +// {Pod: pod1}, +// {Pod: pod2}, +// {Pod: pod3}, +// }, +// }, +// observedState: observer.State{ +// ClusterState: &clusterState, +// }, +// reconcileState: &reconcile.State{Recorder: events.NewRecorder()}, +// results: &reconciler.Results{}, +// }, +// fields: fields{ +// Options: Options{ +// PodsExpectations: reconciler.NewExpectations(), +// }, +// }, +// wantErr: false, +// want: want{ +// results: &expectedResult1, +// fulfilledExpectation: true, // pod deletion is delayed, do not expect anything +// }, +// }, +// { +// name: "Delete a pod with no data", +// args: args{ +// elasticsearch: elasticsearch, +// ToDelete: &mutation.PerformableChanges{ +// Changes: mutation.Changes{ +// ToDelete: pod.PodsWithConfig{ +// pod.PodWithConfig{Pod: pod4}, +// }, +// }, +// }, +// resourcesState: &reconcile.ResourcesState{ +// CurrentPods: pod.PodsWithConfig{ +// {Pod: pod1}, +// {Pod: pod2}, +// {Pod: pod3}, +// {Pod: pod4}, +// }, +// }, +// observedState: observer.State{ +// ClusterState: &clusterState, +// }, +// reconcileState: &reconcile.State{Recorder: events.NewRecorder()}, +// results: &reconciler.Results{}, +// }, +// fields: fields{ +// Options: Options{ +// PodsExpectations: reconciler.NewExpectations(), +// Client: k8s.WrapClient(fake.NewFakeClient()), +// }, +// }, +// wantErr: false, +// want: want{ +// results: &expectedEmptyResult, +// fulfilledExpectation: false, // pod4 is expected to be deleted +// }, +// }, +// } +// for _, tt := range tests { +// t.Run(tt.name, func(t *testing.T) { +// d := &defaultDriver{ +// Options: tt.fields.Options, +// } +// if err := d.attemptPodsDeletion( +// tt.args.ToDelete, tt.args.reconcileState, tt.args.resourcesState, +// tt.args.observedState, tt.args.results, tt.args.esClient, tt.args.elasticsearch); (err != nil) != tt.wantErr { +// t.Errorf("defaultDriver.attemptPodsDeletion() error = %v, wantErr %v", err, tt.wantErr) +// } +// assert.EqualValues(t, tt.want.results, tt.args.results) +// nn := k8s.ExtractNamespacedName(&tt.args.elasticsearch) +// assert.EqualValues(t, tt.want.fulfilledExpectation, tt.fields.Options.PodsExpectations.Fulfilled(nn)) +// }) +// } +//} diff --git a/operators/pkg/controller/elasticsearch/driver/driver.go b/operators/pkg/controller/elasticsearch/driver/driver.go index 9867f457ef..7056a84616 100644 --- a/operators/pkg/controller/elasticsearch/driver/driver.go +++ b/operators/pkg/controller/elasticsearch/driver/driver.go @@ -74,7 +74,7 @@ func NewDriver(opts Options) (Driver, error) { switch opts.Version.Major { case 7: - driver.expectedPodsAndResourcesResolver = version6.ExpectedPodSpecs + //driver.expectedPodsAndResourcesResolver = version6.ExpectedPodSpecs driver.clusterInitialMasterNodesEnforcer = version7.ClusterInitialMasterNodesEnforcer @@ -85,7 +85,7 @@ func NewDriver(opts Options) (Driver, error) { // TODO: only do this if there's 6.x masters in the cluster. driver.zen1SettingsUpdater = version6.UpdateZen1Discovery case 6: - driver.expectedPodsAndResourcesResolver = version6.ExpectedPodSpecs + //driver.expectedPodsAndResourcesResolver = version6.ExpectedPodSpecs driver.zen1SettingsUpdater = version6.UpdateZen1Discovery default: return nil, fmt.Errorf("unsupported version: %s", opts.Version) diff --git a/operators/pkg/controller/elasticsearch/driver/pods.go b/operators/pkg/controller/elasticsearch/driver/pods.go index 2764079072..c129e71d1b 100644 --- a/operators/pkg/controller/elasticsearch/driver/pods.go +++ b/operators/pkg/controller/elasticsearch/driver/pods.go @@ -4,216 +4,196 @@ package driver -import ( - "fmt" - - "github.com/elastic/cloud-on-k8s/operators/pkg/apis/elasticsearch/v1alpha1" - "github.com/elastic/cloud-on-k8s/operators/pkg/controller/common/events" - "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/label" - "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/name" - "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/pod" - pvcpkg "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/pvc" - pvcutils "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/pvc" - esreconcile "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/reconcile" - "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/settings" - "github.com/elastic/cloud-on-k8s/operators/pkg/utils/k8s" - "github.com/elastic/cloud-on-k8s/operators/pkg/utils/stringsutil" - corev1 "k8s.io/api/core/v1" - apierrors "k8s.io/apimachinery/pkg/api/errors" - "k8s.io/apimachinery/pkg/runtime" - "sigs.k8s.io/controller-runtime/pkg/controller/controllerutil" - "sigs.k8s.io/controller-runtime/pkg/reconcile" -) - -// createElasticsearchPod creates the given elasticsearch pod -func createElasticsearchPod( - c k8s.Client, - scheme *runtime.Scheme, - es v1alpha1.Elasticsearch, - reconcileState *esreconcile.State, - pod corev1.Pod, - podSpecCtx pod.PodSpecContext, - orphanedPVCs *pvcutils.OrphanedPersistentVolumeClaims, -) error { - // when can we re-use a metav1.PersistentVolumeClaim? - // - It is the same size, storageclass etc, or resizable as such - // (https://kubernetes.io/docs/concepts/storage/persistent-volumes/#expanding-persistent-volumes-claims) - // - If a local volume: when we know it's going to the same node - // - How can we tell? - // - Only guaranteed if a required node affinity specifies a specific, singular node. - // - Usually they are more generic, yielding a range of possible target nodes - // - If an EBS and non-regional PDs (GCP) volume: when we know it's going to the same AZ: - // - How can we tell? - // - Only guaranteed if a required node affinity specifies a specific availability zone - // - Often - // - This is /hard/ - // - Other persistent - // - // - Limitations - // - Node-specific volume limits: https://kubernetes.io/docs/concepts/storage/storage-limits/ - // - // How to technically re-use a volume: - // - Re-use the same name for the PVC. - // - E.g, List PVCs, if a PVC we want to use exist - - for _, claimTemplate := range podSpecCtx.NodeSpec.VolumeClaimTemplates { - // TODO : we are creating PVC way too far in the process, it's almost too late to compare them with existing ones - pvc, err := getOrCreatePVC(&pod, claimTemplate, orphanedPVCs, c, scheme, es) - if err != nil { - return err - } - - vol := corev1.Volume{ - Name: claimTemplate.Name, - VolumeSource: corev1.VolumeSource{ - PersistentVolumeClaim: &corev1.PersistentVolumeClaimVolumeSource{ - ClaimName: pvc.Name, - // TODO: support read only pvcs - }, - }, - } - pod = replaceVolume(pod, vol) - } - - // create the config volume for this pod, now that we have a proper name for the pod - if err := settings.ReconcileConfig(c, es, pod, podSpecCtx.Config); err != nil { - return err - } - configSecretVolume := settings.ConfigSecretVolume(pod.Name).Volume() - pod = replaceVolume(pod, configSecretVolume) - - if err := controllerutil.SetControllerReference(&es, &pod, scheme); err != nil { - return err - } - if err := c.Create(&pod); err != nil { - reconcileState.AddEvent(corev1.EventTypeWarning, events.EventReasonUnexpected, fmt.Sprintf("Cannot create pod %s: %s", pod.Name, err.Error())) - return err - } - reconcileState.AddEvent(corev1.EventTypeNormal, events.EventReasonCreated, stringsutil.Concat("Created pod ", pod.Name)) - log.Info("Created pod", "name", pod.Name, "namespace", pod.Namespace) - - return nil -} - -// replaceVolume replaces an existing volume in the pod that has the same name as the given one. -func replaceVolume(pod corev1.Pod, volume corev1.Volume) corev1.Pod { - for i, v := range pod.Spec.Volumes { - if v.Name == volume.Name { - pod.Spec.Volumes[i] = volume - break - } - } - return pod -} - -// getOrCreatePVC tries to attach a PVC that already exists or attaches a new one otherwise. -func getOrCreatePVC(pod *corev1.Pod, - claimTemplate corev1.PersistentVolumeClaim, - orphanedPVCs *pvcutils.OrphanedPersistentVolumeClaims, - c k8s.Client, - scheme *runtime.Scheme, - es v1alpha1.Elasticsearch, -) (*corev1.PersistentVolumeClaim, error) { - // Generate the desired PVC from the template - pvc := newPVCFromTemplate(claimTemplate, pod) - // Seek for an orphaned PVC that matches the desired one - orphanedPVC := orphanedPVCs.GetOrphanedVolumeClaim(pvc) - - if orphanedPVC != nil { - // ReUSE the orphaned PVC - pvc = orphanedPVC - // Update the name of the pod to reflect the change - podName, err := pvcutils.GetPodNameFromLabels(pvc) - if err != nil { - return nil, err - } - - // update the hostname if we defaulted it earlier - if pod.Spec.Hostname == pod.Name { - pod.Spec.Hostname = podName - } - - pod.Name = podName - log.Info("Reusing PVC", "pod", pod.Name, "pvc", pvc.Name) - return pvc, nil - } - - // No match, create a new PVC - log.Info("Creating PVC", "pod", pod.Name, "pvc", pvc.Name) - if err := controllerutil.SetControllerReference(&es, pvc, scheme); err != nil { - return nil, err - } - err := c.Create(pvc) - if err != nil && !apierrors.IsAlreadyExists(err) { - return nil, err - } - return pvc, nil -} - -func newPVCFromTemplate(claimTemplate corev1.PersistentVolumeClaim, pod *corev1.Pod) *corev1.PersistentVolumeClaim { - pvc := claimTemplate.DeepCopy() - pvc.Name = name.NewPVCName(pod.Name, claimTemplate.Name) - pvc.Namespace = pod.Namespace - // reuse some labels also applied to the pod for comparison purposes - if pvc.Labels == nil { - pvc.Labels = map[string]string{} - } - for _, k := range pvcpkg.PodLabelsInPVCs { - pvc.Labels[k] = pod.Labels[k] - } - // Add the current pod name as a label - pvc.Labels[label.PodNameLabelName] = pod.Name - pvc.Labels[label.VolumeNameLabelName] = claimTemplate.Name - return pvc -} - -// deleteElasticsearchPod deletes the given elasticsearch pod. Tests to check if the pod can be safely deleted must -// be done before the call to this function. -func deleteElasticsearchPod( - c k8s.Client, - reconcileState *esreconcile.State, - resourcesState esreconcile.ResourcesState, - pod corev1.Pod, - preDelete func() error, -) (reconcile.Result, error) { - - // delete all PVCs associated with this pod - // TODO: perhaps this is better to reconcile after the fact? - for _, volume := range pod.Spec.Volumes { - if volume.PersistentVolumeClaim == nil { - continue - } - - // TODO: perhaps not assuming all PVCs will be managed by us? and maybe we should not categorically delete? - pvc, err := resourcesState.FindPVCByName(volume.PersistentVolumeClaim.ClaimName) - if err != nil { - return reconcile.Result{}, err - } - - if err := c.Delete(&pvc); err != nil && !apierrors.IsNotFound(err) { - return reconcile.Result{}, err - } - } - - if err := preDelete(); err != nil { - return reconcile.Result{}, err - } - if err := c.Delete(&pod); err != nil && !apierrors.IsNotFound(err) { - return reconcile.Result{}, err - } - reconcileState.AddEvent( - corev1.EventTypeNormal, events.EventReasonDeleted, stringsutil.Concat("Deleted pod ", pod.Name), - ) - log.Info("Deleted pod", "name", pod.Name, "namespace", pod.Namespace) - - // delete configuration for that pod (would be garbage collected otherwise) - secret, err := settings.GetESConfigSecret(c, k8s.ExtractNamespacedName(&pod)) - if err != nil && !apierrors.IsNotFound(err) { - return reconcile.Result{}, err - } - if err = c.Delete(&secret); err != nil && !apierrors.IsNotFound(err) { - return reconcile.Result{}, err - } - - return reconcile.Result{}, nil -} +// +//// createElasticsearchPod creates the given elasticsearch pod +//func createElasticsearchPod( +// c k8s.Client, +// scheme *runtime.Scheme, +// es v1alpha1.Elasticsearch, +// reconcileState *esreconcile.State, +// pod corev1.Pod, +// podSpecCtx pod.PodSpecContext, +// orphanedPVCs *pvcutils.OrphanedPersistentVolumeClaims, +//) error { +// // when can we re-use a metav1.PersistentVolumeClaim? +// // - It is the same size, storageclass etc, or resizable as such +// // (https://kubernetes.io/docs/concepts/storage/persistent-volumes/#expanding-persistent-volumes-claims) +// // - If a local volume: when we know it's going to the same node +// // - How can we tell? +// // - Only guaranteed if a required node affinity specifies a specific, singular node. +// // - Usually they are more generic, yielding a range of possible target nodes +// // - If an EBS and non-regional PDs (GCP) volume: when we know it's going to the same AZ: +// // - How can we tell? +// // - Only guaranteed if a required node affinity specifies a specific availability zone +// // - Often +// // - This is /hard/ +// // - Other persistent +// // +// // - Limitations +// // - Node-specific volume limits: https://kubernetes.io/docs/concepts/storage/storage-limits/ +// // +// // How to technically re-use a volume: +// // - Re-use the same name for the PVC. +// // - E.g, List PVCs, if a PVC we want to use exist +// +// for _, claimTemplate := range podSpecCtx.NodeSpec.VolumeClaimTemplates { +// // TODO : we are creating PVC way too far in the process, it's almost too late to compare them with existing ones +// pvc, err := getOrCreatePVC(&pod, claimTemplate, orphanedPVCs, c, scheme, es) +// if err != nil { +// return err +// } +// +// vol := corev1.Volume{ +// Name: claimTemplate.Name, +// VolumeSource: corev1.VolumeSource{ +// PersistentVolumeClaim: &corev1.PersistentVolumeClaimVolumeSource{ +// ClaimName: pvc.Name, +// // TODO: support read only pvcs +// }, +// }, +// } +// pod = replaceVolume(pod, vol) +// } +// +// // create the config volume for this pod, now that we have a proper name for the pod +// if err := settings.ReconcileConfig(c, es, pod, podSpecCtx.Config); err != nil { +// return err +// } +// configSecretVolume := settings.ConfigSecretVolume(pod.Name).Volume() +// pod = replaceVolume(pod, configSecretVolume) +// +// if err := controllerutil.SetControllerReference(&es, &pod, scheme); err != nil { +// return err +// } +// if err := c.Create(&pod); err != nil { +// reconcileState.AddEvent(corev1.EventTypeWarning, events.EventReasonUnexpected, fmt.Sprintf("Cannot create pod %s: %s", pod.Name, err.Error())) +// return err +// } +// reconcileState.AddEvent(corev1.EventTypeNormal, events.EventReasonCreated, stringsutil.Concat("Created pod ", pod.Name)) +// log.Info("Created pod", "name", pod.Name, "namespace", pod.Namespace) +// +// return nil +//} +// +//// replaceVolume replaces an existing volume in the pod that has the same name as the given one. +//func replaceVolume(pod corev1.Pod, volume corev1.Volume) corev1.Pod { +// for i, v := range pod.Spec.Volumes { +// if v.Name == volume.Name { +// pod.Spec.Volumes[i] = volume +// break +// } +// } +// return pod +//} +// +//// getOrCreatePVC tries to attach a PVC that already exists or attaches a new one otherwise. +//func getOrCreatePVC(pod *corev1.Pod, +// claimTemplate corev1.PersistentVolumeClaim, +// orphanedPVCs *pvcutils.OrphanedPersistentVolumeClaims, +// c k8s.Client, +// scheme *runtime.Scheme, +// es v1alpha1.Elasticsearch, +//) (*corev1.PersistentVolumeClaim, error) { +// // Generate the desired PVC from the template +// pvc := newPVCFromTemplate(claimTemplate, pod) +// // Seek for an orphaned PVC that matches the desired one +// orphanedPVC := orphanedPVCs.GetOrphanedVolumeClaim(pvc) +// +// if orphanedPVC != nil { +// // ReUSE the orphaned PVC +// pvc = orphanedPVC +// // Update the name of the pod to reflect the change +// podName, err := pvcutils.GetPodNameFromLabels(pvc) +// if err != nil { +// return nil, err +// } +// +// // update the hostname if we defaulted it earlier +// if pod.Spec.Hostname == pod.Name { +// pod.Spec.Hostname = podName +// } +// +// pod.Name = podName +// log.Info("Reusing PVC", "pod", pod.Name, "pvc", pvc.Name) +// return pvc, nil +// } +// +// // No match, create a new PVC +// log.Info("Creating PVC", "pod", pod.Name, "pvc", pvc.Name) +// if err := controllerutil.SetControllerReference(&es, pvc, scheme); err != nil { +// return nil, err +// } +// err := c.Create(pvc) +// if err != nil && !apierrors.IsAlreadyExists(err) { +// return nil, err +// } +// return pvc, nil +//} +// +//func newPVCFromTemplate(claimTemplate corev1.PersistentVolumeClaim, pod *corev1.Pod) *corev1.PersistentVolumeClaim { +// pvc := claimTemplate.DeepCopy() +// pvc.Name = name.NewPVCName(pod.Name, claimTemplate.Name) +// pvc.Namespace = pod.Namespace +// // reuse some labels also applied to the pod for comparison purposes +// if pvc.Labels == nil { +// pvc.Labels = map[string]string{} +// } +// for _, k := range pvcpkg.PodLabelsInPVCs { +// pvc.Labels[k] = pod.Labels[k] +// } +// // Add the current pod name as a label +// pvc.Labels[label.PodNameLabelName] = pod.Name +// pvc.Labels[label.VolumeNameLabelName] = claimTemplate.Name +// return pvc +//} +// +//// deleteElasticsearchPod deletes the given elasticsearch pod. Tests to check if the pod can be safely deleted must +//// be done before the call to this function. +//func deleteElasticsearchPod( +// c k8s.Client, +// reconcileState *esreconcile.State, +// resourcesState esreconcile.ResourcesState, +// pod corev1.Pod, +// preDelete func() error, +//) (reconcile.Result, error) { +// +// // delete all PVCs associated with this pod +// // TODO: perhaps this is better to reconcile after the fact? +// for _, volume := range pod.Spec.Volumes { +// if volume.PersistentVolumeClaim == nil { +// continue +// } +// +// // TODO: perhaps not assuming all PVCs will be managed by us? and maybe we should not categorically delete? +// pvc, err := resourcesState.FindPVCByName(volume.PersistentVolumeClaim.ClaimName) +// if err != nil { +// return reconcile.Result{}, err +// } +// +// if err := c.Delete(&pvc); err != nil && !apierrors.IsNotFound(err) { +// return reconcile.Result{}, err +// } +// } +// +// if err := preDelete(); err != nil { +// return reconcile.Result{}, err +// } +// if err := c.Delete(&pod); err != nil && !apierrors.IsNotFound(err) { +// return reconcile.Result{}, err +// } +// reconcileState.AddEvent( +// corev1.EventTypeNormal, events.EventReasonDeleted, stringsutil.Concat("Deleted pod ", pod.Name), +// ) +// log.Info("Deleted pod", "name", pod.Name, "namespace", pod.Namespace) +// +// // delete configuration for that pod (would be garbage collected otherwise) +// secret, err := settings.GetESConfigSecret(c, k8s.ExtractNamespacedName(&pod)) +// if err != nil && !apierrors.IsNotFound(err) { +// return reconcile.Result{}, err +// } +// if err = c.Delete(&secret); err != nil && !apierrors.IsNotFound(err) { +// return reconcile.Result{}, err +// } +// +// return reconcile.Result{}, nil +//} diff --git a/operators/pkg/controller/elasticsearch/driver/pods_test.go b/operators/pkg/controller/elasticsearch/driver/pods_test.go deleted file mode 100644 index 80996b2431..0000000000 --- a/operators/pkg/controller/elasticsearch/driver/pods_test.go +++ /dev/null @@ -1,178 +0,0 @@ -// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one -// or more contributor license agreements. Licensed under the Elastic License; -// you may not use this file except in compliance with the Elastic License. - -package driver - -import ( - "testing" - - "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/label" - "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/volume" - "github.com/go-test/deep" - "github.com/stretchr/testify/require" - corev1 "k8s.io/api/core/v1" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "k8s.io/apimachinery/pkg/types" - "k8s.io/client-go/kubernetes/scheme" - "sigs.k8s.io/controller-runtime/pkg/client/fake" - - "github.com/elastic/cloud-on-k8s/operators/pkg/apis/elasticsearch/v1alpha1" - "github.com/elastic/cloud-on-k8s/operators/pkg/controller/common" - "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/pod" - pvcutils "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/pvc" - "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/reconcile" - "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/settings" - esvolume "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/volume" - "github.com/elastic/cloud-on-k8s/operators/pkg/utils/k8s" -) - -func Test_newPVCFromTemplate(t *testing.T) { - type args struct { - claimTemplate corev1.PersistentVolumeClaim - pod *corev1.Pod - } - tests := []struct { - name string - args args - want *corev1.PersistentVolumeClaim - }{ - { - name: "Create a simple PVC from a template and a pod", - args: args{ - claimTemplate: corev1.PersistentVolumeClaim{ - ObjectMeta: metav1.ObjectMeta{ - Name: esvolume.ElasticsearchDataVolumeName, - }, - }, - pod: &corev1.Pod{ - ObjectMeta: metav1.ObjectMeta{ - Name: "elasticsearch-sample-es-6bw9qkw77k", - Labels: map[string]string{ - "l1": "v1", - "l2": "v2", - common.TypeLabelName: "elasticsearch", - label.ClusterNameLabelName: "cluster-name", - string(label.NodeTypesMasterLabelName): "true", - string(label.NodeTypesMLLabelName): "true", - string(label.NodeTypesIngestLabelName): "true", - string(label.NodeTypesDataLabelName): "true", - label.VersionLabelName: "7.1.0", - }, - }, - }, - }, - want: &corev1.PersistentVolumeClaim{ - ObjectMeta: metav1.ObjectMeta{ - Name: "elasticsearch-sample-es-6bw9qkw77k-" + esvolume.ElasticsearchDataVolumeName, - Labels: map[string]string{ - // only a subset of labels should be copied over the pvc - common.TypeLabelName: "elasticsearch", - label.ClusterNameLabelName: "cluster-name", - string(label.NodeTypesMasterLabelName): "true", - string(label.NodeTypesMLLabelName): "true", - string(label.NodeTypesIngestLabelName): "true", - string(label.NodeTypesDataLabelName): "true", - label.VersionLabelName: "7.1.0", - // additional pod name label should be there - label.PodNameLabelName: "elasticsearch-sample-es-6bw9qkw77k", - label.VolumeNameLabelName: volume.ElasticsearchDataVolumeName, - }, - }, - }, - }, - } - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - if diff := deep.Equal(newPVCFromTemplate(tt.args.claimTemplate, tt.args.pod), tt.want); diff != nil { - t.Error(diff) - } - }) - } -} - -func Test_createElasticsearchPod(t *testing.T) { - client := k8s.WrapClient(fake.NewFakeClient()) - podSpecCtx := pod.PodSpecContext{ - PodTemplate: corev1.PodTemplateSpec{ - Spec: corev1.PodSpec{ - Containers: []corev1.Container{{Name: "foo"}}, - }, - }, - NodeSpec: v1alpha1.NodeSpec{ - VolumeClaimTemplates: []corev1.PersistentVolumeClaim{ - { - ObjectMeta: metav1.ObjectMeta{ - Name: esvolume.ElasticsearchDataVolumeName, - }, - Spec: corev1.PersistentVolumeClaimSpec{}, - }, - }, - }, - } - es := v1alpha1.Elasticsearch{} - pod := corev1.Pod{ - ObjectMeta: metav1.ObjectMeta{ - Namespace: "ns", - Name: "name", - Labels: map[string]string{ - "a": "b", - }, - }, - Spec: corev1.PodSpec{ - Volumes: []corev1.Volume{ - { - Name: esvolume.TransportCertificatesSecretVolumeName, - VolumeSource: corev1.VolumeSource{ - Secret: &corev1.SecretVolumeSource{ - SecretName: "should-be-replaced", - }, - }, - }, - { - Name: settings.ConfigVolumeName, - VolumeSource: corev1.VolumeSource{ - Secret: &corev1.SecretVolumeSource{ - SecretName: "should-be-replaced", - }, - }, - }, - { - Name: esvolume.ElasticsearchDataVolumeName, - VolumeSource: corev1.VolumeSource{}, - }, - }, - }, - } - require.NoError(t, v1alpha1.AddToScheme(scheme.Scheme)) - err := createElasticsearchPod(client, scheme.Scheme, es, reconcile.NewState(es), pod, podSpecCtx, &pvcutils.OrphanedPersistentVolumeClaims{}) - require.NoError(t, err) - - err = client.Get(k8s.ExtractNamespacedName(&pod), &pod) - require.NoError(t, err) - - // should have a volume for config (existing one replaced) - found := false - configSecretVolumeName := settings.ConfigSecretVolume(pod.Name).Name() - for _, v := range pod.Spec.Volumes { - if v.Name == configSecretVolumeName { - require.NotEqual(t, "should-be-replaced", v.Secret.SecretName) - found = true - } - } - require.True(t, found) - // should have a PVC assigned (volume replaced) - found = false - pvcName := "" - for _, v := range pod.Spec.Volumes { - if v.Name == esvolume.ElasticsearchDataVolumeName { - pvcName = v.PersistentVolumeClaim.ClaimName - require.NotEmpty(t, pvcName) - found = true - } - } - require.True(t, found) - // PVC should be created - var pvc corev1.PersistentVolumeClaim - require.NoError(t, client.Get(types.NamespacedName{Namespace: "ns", Name: pvcName}, &pvc)) -} diff --git a/operators/pkg/controller/elasticsearch/label/label.go b/operators/pkg/controller/elasticsearch/label/label.go index 32c0c1f3f7..3c4aaa6793 100644 --- a/operators/pkg/controller/elasticsearch/label/label.go +++ b/operators/pkg/controller/elasticsearch/label/label.go @@ -10,7 +10,6 @@ import ( "github.com/elastic/cloud-on-k8s/operators/pkg/apis/elasticsearch/v1alpha1" "github.com/elastic/cloud-on-k8s/operators/pkg/controller/common" "github.com/elastic/cloud-on-k8s/operators/pkg/controller/common/version" - "github.com/elastic/cloud-on-k8s/operators/pkg/utils/k8s" "github.com/pkg/errors" corev1 "k8s.io/api/core/v1" @@ -28,6 +27,8 @@ const ( VersionLabelName = "elasticsearch.k8s.elastic.co/version" // PodNameLabelName used to store the name of the pod on other objects PodNameLabelName = "elasticsearch.k8s.elastic.co/pod-name" + // StatefulSetNameLabelName used to store the name of the statefulset + StatefulSetNameLabelName = "elasticsearch.k8s.elastic.co/statefulset" // VolumeNameLabelName is the name of the volume e.g. elasticsearch-data a PVC was used for. VolumeNameLabelName = "elasticsearch.k8s.elastic.co/volume-name" @@ -40,6 +41,8 @@ const ( // NodeTypesMLLabelName is a label set to true on nodes with the ml role NodeTypesMLLabelName common.TrueFalseLabel = "elasticsearch.k8s.elastic.co/node-ml" + ConfigTemplateHashLabelName = "elasticsearch.k8s.elastic.co/config-template-hash" + // Type represents the Elasticsearch type Type = "elasticsearch" ) @@ -76,18 +79,38 @@ func NewLabels(es types.NamespacedName) map[string]string { } // NewPodLabels returns labels to apply for a new Elasticsearch pod. -func NewPodLabels(es v1alpha1.Elasticsearch, version version.Version, cfg v1alpha1.ElasticsearchSettings) map[string]string { +func NewPodLabels(es types.NamespacedName, ssetName string, version version.Version, nodeRoles v1alpha1.Node, configHash string) (map[string]string, error) { // cluster name based labels - labels := NewLabels(k8s.ExtractNamespacedName(&es)) + labels := NewLabels(es) // version label labels[VersionLabelName] = version.String() + // node types labels - NodeTypesMasterLabelName.Set(cfg.Node.Master, labels) - NodeTypesDataLabelName.Set(cfg.Node.Data, labels) - NodeTypesIngestLabelName.Set(cfg.Node.Ingest, labels) - NodeTypesMLLabelName.Set(cfg.Node.ML, labels) + NodeTypesMasterLabelName.Set(nodeRoles.Master, labels) + NodeTypesDataLabelName.Set(nodeRoles.Data, labels) + NodeTypesIngestLabelName.Set(nodeRoles.Ingest, labels) + NodeTypesMLLabelName.Set(nodeRoles.ML, labels) + + // config hash label, to rotate pods on config changes + labels[ConfigTemplateHashLabelName] = configHash + + // apply stateful set label selector + for k, v := range NewStatefulSetLabels(es, ssetName) { + labels[k] = v + } + + return labels, nil +} + +// NewConfigLabels returns labels to apply for an Elasticsearch Config secret. +func NewConfigLabels(es types.NamespacedName, ssetName string) map[string]string { + return NewStatefulSetLabels(es, ssetName) +} - return labels +func NewStatefulSetLabels(es types.NamespacedName, ssetName string) map[string]string { + lbls := NewLabels(es) + lbls[StatefulSetNameLabelName] = ssetName + return lbls } // NewLabelSelectorForElasticsearch returns a labels.Selector that matches the labels as constructed by NewLabels diff --git a/operators/pkg/controller/elasticsearch/mutation/calculate_test.go b/operators/pkg/controller/elasticsearch/mutation/calculate_test.go index fb9be0248c..8cda8fcd8e 100644 --- a/operators/pkg/controller/elasticsearch/mutation/calculate_test.go +++ b/operators/pkg/controller/elasticsearch/mutation/calculate_test.go @@ -5,17 +5,12 @@ package mutation import ( - "testing" - "github.com/elastic/cloud-on-k8s/operators/pkg/apis/elasticsearch/v1alpha1" "github.com/elastic/cloud-on-k8s/operators/pkg/controller/common/hash" "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/label" "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/name" "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/pod" - "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/reconcile" - "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/version" - "github.com/stretchr/testify/assert" corev1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/api/resource" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" @@ -91,88 +86,89 @@ func ESPodSpecContext(image string, cpuLimit string) pod.PodSpecContext { } } -func TestCalculateChanges(t *testing.T) { - type args struct { - expected []pod.PodSpecContext - state reconcile.ResourcesState - } - tests := []struct { - name string - args args - want Changes - }{ - { - name: "Wait for 2 pods to be terminated, create 1", - args: args{ - expected: []pod.PodSpecContext{defaultPodSpecCtx, defaultPodSpecCtx, defaultPodSpecCtx}, - state: reconcile.ResourcesState{DeletingPods: pod.PodsWithConfig{defaultPodWithConfig, defaultPodWithConfig}}, - }, - want: Changes{ - ToKeep: pod.PodsWithConfig{defaultPodWithConfig, defaultPodWithConfig}, - ToCreate: []PodToCreate{{PodSpecCtx: defaultPodSpecCtx}}, - }, - }, - { - name: "Do not wait for 2 pods to be terminated, create 3", - args: args{ - expected: []pod.PodSpecContext{defaultPodSpecCtxV2, defaultPodSpecCtxV2, defaultPodSpecCtxV2}, - state: reconcile.ResourcesState{DeletingPods: pod.PodsWithConfig{defaultPodWithConfig, defaultPodWithConfig}}, - }, - want: Changes{ - ToKeep: pod.PodsWithConfig{}, - ToDelete: pod.PodsWithConfig{}, - ToCreate: []PodToCreate{{PodSpecCtx: defaultPodSpecCtxV2}, {PodSpecCtx: defaultPodSpecCtxV2}, {PodSpecCtx: defaultPodSpecCtxV2}}, - }, - }, - { - name: "no changes", - args: args{ - expected: []pod.PodSpecContext{defaultPodSpecCtx, defaultPodSpecCtx}, - state: reconcile.ResourcesState{CurrentPods: pod.PodsWithConfig{defaultPodWithConfig, defaultPodWithConfig}}, - }, - want: Changes{ToKeep: pod.PodsWithConfig{defaultPodWithConfig, defaultPodWithConfig}}, - }, - { - name: "2 new pods", - args: args{ - expected: []pod.PodSpecContext{defaultPodSpecCtx, defaultPodSpecCtx, defaultPodSpecCtx, defaultPodSpecCtx}, - state: reconcile.ResourcesState{CurrentPods: pod.PodsWithConfig{defaultPodWithConfig, defaultPodWithConfig}}, - }, - want: Changes{ - ToKeep: pod.PodsWithConfig{defaultPodWithConfig, defaultPodWithConfig}, - ToCreate: []PodToCreate{{PodSpecCtx: defaultPodSpecCtx}, {PodSpecCtx: defaultPodSpecCtx}}, - }, - }, - { - name: "2 less pods", - args: args{ - expected: []pod.PodSpecContext{}, - state: reconcile.ResourcesState{CurrentPods: pod.PodsWithConfig{defaultPodWithConfig, defaultPodWithConfig}}, - }, - want: Changes{ToDelete: pod.PodsWithConfig{defaultPodWithConfig, defaultPodWithConfig}}, - }, - { - name: "1 pod replaced", - args: args{ - expected: []pod.PodSpecContext{defaultPodSpecCtx, ESPodSpecContext("another-image", defaultCPULimit)}, - state: reconcile.ResourcesState{CurrentPods: pod.PodsWithConfig{defaultPodWithConfig, defaultPodWithConfig}}, - }, - want: Changes{ - ToKeep: pod.PodsWithConfig{defaultPodWithConfig}, - ToDelete: pod.PodsWithConfig{defaultPodWithConfig}, - ToCreate: []PodToCreate{{PodSpecCtx: ESPodSpecContext("another-image", defaultCPULimit)}}, - }, - }, - } - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - got, err := CalculateChanges(es, tt.args.expected, tt.args.state, func(ctx pod.PodSpecContext) corev1.Pod { - return version.NewPod(es, ctx) - }) - assert.NoError(t, err) - assert.Equal(t, len(tt.want.ToKeep), len(got.ToKeep)) - assert.Equal(t, len(tt.want.ToCreate), len(got.ToCreate)) - assert.Equal(t, len(tt.want.ToDelete), len(got.ToDelete)) - }) - } -} +// +//func TestCalculateChanges(t *testing.T) { +// type args struct { +// expected []pod.PodSpecContext +// state reconcile.ResourcesState +// } +// tests := []struct { +// name string +// args args +// want Changes +// }{ +// { +// name: "Wait for 2 pods to be terminated, create 1", +// args: args{ +// expected: []pod.PodSpecContext{defaultPodSpecCtx, defaultPodSpecCtx, defaultPodSpecCtx}, +// state: reconcile.ResourcesState{DeletingPods: pod.PodsWithConfig{defaultPodWithConfig, defaultPodWithConfig}}, +// }, +// want: Changes{ +// ToKeep: pod.PodsWithConfig{defaultPodWithConfig, defaultPodWithConfig}, +// ToCreate: []PodToCreate{{PodSpecCtx: defaultPodSpecCtx}}, +// }, +// }, +// { +// name: "Do not wait for 2 pods to be terminated, create 3", +// args: args{ +// expected: []pod.PodSpecContext{defaultPodSpecCtxV2, defaultPodSpecCtxV2, defaultPodSpecCtxV2}, +// state: reconcile.ResourcesState{DeletingPods: pod.PodsWithConfig{defaultPodWithConfig, defaultPodWithConfig}}, +// }, +// want: Changes{ +// ToKeep: pod.PodsWithConfig{}, +// ToDelete: pod.PodsWithConfig{}, +// ToCreate: []PodToCreate{{PodSpecCtx: defaultPodSpecCtxV2}, {PodSpecCtx: defaultPodSpecCtxV2}, {PodSpecCtx: defaultPodSpecCtxV2}}, +// }, +// }, +// { +// name: "no changes", +// args: args{ +// expected: []pod.PodSpecContext{defaultPodSpecCtx, defaultPodSpecCtx}, +// state: reconcile.ResourcesState{CurrentPods: pod.PodsWithConfig{defaultPodWithConfig, defaultPodWithConfig}}, +// }, +// want: Changes{ToKeep: pod.PodsWithConfig{defaultPodWithConfig, defaultPodWithConfig}}, +// }, +// { +// name: "2 new pods", +// args: args{ +// expected: []pod.PodSpecContext{defaultPodSpecCtx, defaultPodSpecCtx, defaultPodSpecCtx, defaultPodSpecCtx}, +// state: reconcile.ResourcesState{CurrentPods: pod.PodsWithConfig{defaultPodWithConfig, defaultPodWithConfig}}, +// }, +// want: Changes{ +// ToKeep: pod.PodsWithConfig{defaultPodWithConfig, defaultPodWithConfig}, +// ToCreate: []PodToCreate{{PodSpecCtx: defaultPodSpecCtx}, {PodSpecCtx: defaultPodSpecCtx}}, +// }, +// }, +// { +// name: "2 less pods", +// args: args{ +// expected: []pod.PodSpecContext{}, +// state: reconcile.ResourcesState{CurrentPods: pod.PodsWithConfig{defaultPodWithConfig, defaultPodWithConfig}}, +// }, +// want: Changes{ToDelete: pod.PodsWithConfig{defaultPodWithConfig, defaultPodWithConfig}}, +// }, +// { +// name: "1 pod replaced", +// args: args{ +// expected: []pod.PodSpecContext{defaultPodSpecCtx, ESPodSpecContext("another-image", defaultCPULimit)}, +// state: reconcile.ResourcesState{CurrentPods: pod.PodsWithConfig{defaultPodWithConfig, defaultPodWithConfig}}, +// }, +// want: Changes{ +// ToKeep: pod.PodsWithConfig{defaultPodWithConfig}, +// ToDelete: pod.PodsWithConfig{defaultPodWithConfig}, +// ToCreate: []PodToCreate{{PodSpecCtx: ESPodSpecContext("another-image", defaultCPULimit)}}, +// }, +// }, +// } +// for _, tt := range tests { +// t.Run(tt.name, func(t *testing.T) { +// got, err := CalculateChanges(es, tt.args.expected, tt.args.state, func(ctx pod.PodSpecContext) corev1.Pod { +// return version.NewPod(es, ctx) +// }) +// assert.NoError(t, err) +// assert.Equal(t, len(tt.want.ToKeep), len(got.ToKeep)) +// assert.Equal(t, len(tt.want.ToCreate), len(got.ToCreate)) +// assert.Equal(t, len(tt.want.ToDelete), len(got.ToDelete)) +// }) +// } +//} diff --git a/operators/pkg/controller/elasticsearch/mutation/comparison/pod_test.go b/operators/pkg/controller/elasticsearch/mutation/comparison/pod_test.go deleted file mode 100644 index 01b20df1e2..0000000000 --- a/operators/pkg/controller/elasticsearch/mutation/comparison/pod_test.go +++ /dev/null @@ -1,458 +0,0 @@ -// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one -// or more contributor license agreements. Licensed under the Elastic License; -// you may not use this file except in compliance with the Elastic License. - -package comparison - -import ( - "fmt" - "testing" - - "github.com/stretchr/testify/assert" - corev1 "k8s.io/api/core/v1" - "k8s.io/apimachinery/pkg/api/resource" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - - "github.com/elastic/cloud-on-k8s/operators/pkg/apis/elasticsearch/v1alpha1" - "github.com/elastic/cloud-on-k8s/operators/pkg/controller/common/hash" - "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/label" - "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/name" - "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/pod" - "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/reconcile" - "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/version" - "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/volume" -) - -var es = v1alpha1.Elasticsearch{ - ObjectMeta: metav1.ObjectMeta{ - Name: "elasticsearch", - }, -} - -func ESPodWithConfig(spec pod.PodSpecContext) pod.PodWithConfig { - return pod.PodWithConfig{Pod: version.NewPod(es, spec)} -} - -func ESPodSpecContext(image string, cpuLimit string) pod.PodSpecContext { - return pod.PodSpecContext{ - PodTemplate: corev1.PodTemplateSpec{ - ObjectMeta: metav1.ObjectMeta{ - Labels: map[string]string{ - label.ClusterNameLabelName: es.Name, - }, - }, - Spec: corev1.PodSpec{ - Containers: []corev1.Container{{ - Image: image, - ImagePullPolicy: corev1.PullIfNotPresent, - Name: v1alpha1.ElasticsearchContainerName, - Ports: pod.DefaultContainerPorts, - Resources: corev1.ResourceRequirements{ - Limits: corev1.ResourceList{ - corev1.ResourceCPU: resource.MustParse(cpuLimit), - corev1.ResourceMemory: resource.MustParse("2Gi"), - }, - Requests: corev1.ResourceList{ - corev1.ResourceCPU: resource.MustParse("100m"), - corev1.ResourceMemory: resource.MustParse("2Gi"), - }, - }, - Env: []corev1.EnvVar{ - { - Name: "var1", - Value: "value1", - }, - { - Name: "var2", - Value: "value2", - }, - }, - ReadinessProbe: &corev1.Probe{ - FailureThreshold: 3, - InitialDelaySeconds: 10, - PeriodSeconds: 10, - SuccessThreshold: 3, - TimeoutSeconds: 5, - Handler: corev1.Handler{ - Exec: &corev1.ExecAction{ - Command: []string{ - "sh", - "-c", - "script here", - }, - }, - }, - }, - }}, - }, - }, - } -} - -var defaultPod = ESPodWithConfig(ESPodSpecContext(defaultImage, defaultCPULimit)) - -func defaultPodWithNoHash() pod.PodWithConfig { - p := pod.PodWithConfig{ - Config: defaultPod.Config, - Pod: *defaultPod.Pod.DeepCopy(), - } - delete(p.Pod.Labels, hash.TemplateHashLabelName) - return p -} - -func defaultPodWithPatchedLabel() pod.PodWithConfig { - p := pod.PodWithConfig{ - Config: defaultPod.Config, - Pod: *defaultPod.Pod.DeepCopy(), - } - p.Pod.Labels[label.ClusterNameLabelName] = "patched" - return p -} - -var defaultSpecCtx = ESPodSpecContext(defaultImage, defaultCPULimit) - -var defaultCPULimit = "800m" -var defaultImage = "image" - -// withPVCs is a small utility function to add PVCs to a Pod spec, the varargs argument is the volume name and claim names. -func withPVCs(p pod.PodSpecContext, nameAndClaimNames ...string) pod.PodSpecContext { - lenNameAndClaimNames := len(nameAndClaimNames) - - if lenNameAndClaimNames%2 != 0 { - panic(fmt.Sprintf("odd number of arguments passed as key-value pairs to withPVCs")) - } - - for i := 0; i < lenNameAndClaimNames; i += 2 { - volumeName := nameAndClaimNames[i] - claimName := nameAndClaimNames[i+1] - - p.PodTemplate.Spec.Volumes = append(p.PodTemplate.Spec.Volumes, corev1.Volume{ - Name: volumeName, - VolumeSource: corev1.VolumeSource{ - PersistentVolumeClaim: &corev1.PersistentVolumeClaimVolumeSource{ - ClaimName: claimName, - }, - }, - }) - } - return p -} - -func Test_PodMatchesSpec(t *testing.T) { - fs := corev1.PersistentVolumeFilesystem - block := corev1.PersistentVolumeBlock - type args struct { - pod pod.PodWithConfig - spec pod.PodSpecContext - state reconcile.ResourcesState - } - tests := []struct { - name string - args args - want bool - expectedMismatches []string - expectedMismatchesContain string - }{ - { - name: "Matching pod should match", - args: args{ - pod: defaultPod, - spec: defaultSpecCtx, - }, - want: true, - }, - { - name: "Pod is missing the hash label", - args: args{ - pod: defaultPodWithNoHash(), - spec: defaultSpecCtx, - }, - want: false, - expectedMismatchesContain: fmt.Sprintf("No %s label set on the existing pod", hash.TemplateHashLabelName), - }, - { - name: "Pod label was patched by a user: should still match", - args: args{ - pod: defaultPodWithPatchedLabel(), - spec: defaultSpecCtx, - }, - want: true, - }, - { - name: "Non-matching image should not match", - args: args{ - pod: defaultPod, - spec: ESPodSpecContext("another-image", defaultCPULimit), - }, - want: false, - expectedMismatches: []string{"Spec hash and running pod spec hash are not equal"}, - }, - { - name: "Spec has different NodeSpec.Name", - args: args{ - pod: pod.PodWithConfig{ - Pod: corev1.Pod{ - ObjectMeta: metav1.ObjectMeta{ - Name: name.NewPodName(es.Name, v1alpha1.NodeSpec{ - Name: "foo", - }), - }, - Spec: defaultSpecCtx.PodTemplate.Spec, - }, - }, - spec: pod.PodSpecContext{ - PodTemplate: defaultSpecCtx.PodTemplate, - NodeSpec: v1alpha1.NodeSpec{ - Name: "bar", - }, - }, - }, - want: false, - expectedMismatches: []string{"Pod base name mismatch: expected elasticsearch-es-bar, actual elasticsearch-es-foo"}, - }, - { - name: "Pod has empty NodeSpec.Name", - args: args{ - pod: pod.PodWithConfig{ - Pod: corev1.Pod{ - ObjectMeta: metav1.ObjectMeta{ - Name: name.NewPodName(es.Name, v1alpha1.NodeSpec{}), - }, - Spec: defaultSpecCtx.PodTemplate.Spec, - }, - }, - spec: pod.PodSpecContext{ - PodTemplate: defaultSpecCtx.PodTemplate, - NodeSpec: v1alpha1.NodeSpec{ - Name: "bar", - }, - }, - }, - want: false, - expectedMismatches: []string{"Pod base name mismatch: expected elasticsearch-es-bar, actual elasticsearch-es"}, - }, - { - name: "Non-matching resources should not match", - args: args{ - pod: defaultPod, - spec: ESPodSpecContext(defaultImage, "600m"), - }, - want: false, - expectedMismatchesContain: "Spec hash and running pod spec hash are not equal", - }, - { - name: "Pod is missing a PVC", - args: args{ - pod: defaultPod, - spec: pod.PodSpecContext{ - PodTemplate: defaultSpecCtx.PodTemplate, - NodeSpec: v1alpha1.NodeSpec{ - VolumeClaimTemplates: []corev1.PersistentVolumeClaim{ - { - ObjectMeta: metav1.ObjectMeta{ - Name: "test", - }, - }, - }, - }, - }, - }, - want: false, - expectedMismatchesContain: "Unmatched volumeClaimTemplate: test has no match in volumes []", - }, - { - name: "Pod is missing a PVC, but has another", - args: args{ - pod: ESPodWithConfig(withPVCs( - defaultSpecCtx, "foo", "claim-foo")), - spec: pod.PodSpecContext{ - PodTemplate: withPVCs(defaultSpecCtx).PodTemplate, - NodeSpec: v1alpha1.NodeSpec{ - VolumeClaimTemplates: []corev1.PersistentVolumeClaim{ - { - ObjectMeta: metav1.ObjectMeta{ - Name: "test", - }, - }, - }, - }, - }, - state: reconcile.ResourcesState{ - PVCs: []corev1.PersistentVolumeClaim{ - { - ObjectMeta: metav1.ObjectMeta{Name: "claim-foo"}, - }, - }, - }, - }, - want: false, - expectedMismatchesContain: "Spec hash and running pod spec hash are not equal", - }, - { - name: "Pod has a PVC with an empty VolumeMode", - args: args{ - pod: ESPodWithConfig( - withPVCs( - defaultSpecCtx, - volume.ElasticsearchDataVolumeName, - "claim-name", - )), - spec: pod.PodSpecContext{ - PodTemplate: withPVCs( - defaultSpecCtx, - volume.ElasticsearchDataVolumeName, - "claim-name").PodTemplate, - NodeSpec: v1alpha1.NodeSpec{ - VolumeClaimTemplates: []corev1.PersistentVolumeClaim{ - { - ObjectMeta: metav1.ObjectMeta{ - Name: volume.ElasticsearchDataVolumeName, - }, - Spec: corev1.PersistentVolumeClaimSpec{ - VolumeMode: nil, - }, - }, - }, - }, - }, - state: reconcile.ResourcesState{ - PVCs: []corev1.PersistentVolumeClaim{ - { - ObjectMeta: metav1.ObjectMeta{ - Name: "claim-name", - }, - Spec: corev1.PersistentVolumeClaimSpec{ - VolumeMode: &fs, - }, - }, - }, - }, - }, - want: true, - }, - { - name: "Pod has a PVC with a VolumeMode set to something else than default setting", - args: args{ - pod: ESPodWithConfig(withPVCs( - defaultSpecCtx, - volume.ElasticsearchDataVolumeName, - "claim-name", - )), - spec: pod.PodSpecContext{ - PodTemplate: withPVCs( - defaultSpecCtx, - volume.ElasticsearchDataVolumeName, - "claim-name").PodTemplate, - NodeSpec: v1alpha1.NodeSpec{ - VolumeClaimTemplates: []corev1.PersistentVolumeClaim{ - { - ObjectMeta: metav1.ObjectMeta{ - Name: volume.ElasticsearchDataVolumeName, - }, - Spec: corev1.PersistentVolumeClaimSpec{ - VolumeMode: &block, - }, - }, - }, - }, - }, - state: reconcile.ResourcesState{ - PVCs: []corev1.PersistentVolumeClaim{ - { - ObjectMeta: metav1.ObjectMeta{ - Name: "claim-name", - }, - Spec: corev1.PersistentVolumeClaimSpec{ - VolumeMode: &block, - }, - }, - }, - }, - }, - want: true, - }, - { - name: "Pod has matching PVC", - args: args{ - pod: ESPodWithConfig(withPVCs( - defaultSpecCtx, - "volume-name", "claim-name"), - ), - spec: pod.PodSpecContext{ - PodTemplate: withPVCs(defaultSpecCtx, - "volume-name", "claim-name").PodTemplate, - NodeSpec: v1alpha1.NodeSpec{ - VolumeClaimTemplates: []corev1.PersistentVolumeClaim{ - { - ObjectMeta: metav1.ObjectMeta{ - Name: "volume-name", - }, - }, - }, - }, - }, - state: reconcile.ResourcesState{ - PVCs: []corev1.PersistentVolumeClaim{ - { - ObjectMeta: metav1.ObjectMeta{Name: "claim-name"}, - }, - }, - }, - }, - want: true, - }, - { - name: "Pod has matching PVC, but spec does not match", - args: args{ - pod: ESPodWithConfig( - withPVCs( - defaultSpecCtx, - "volume-name", "claim-name"), - ), - spec: pod.PodSpecContext{ - PodTemplate: withPVCs( - defaultSpecCtx, - "volume-name", "claim-name").PodTemplate, - NodeSpec: v1alpha1.NodeSpec{ - VolumeClaimTemplates: []corev1.PersistentVolumeClaim{ - { - ObjectMeta: metav1.ObjectMeta{ - Name: "volume-name", - }, - Spec: corev1.PersistentVolumeClaimSpec{ - Resources: corev1.ResourceRequirements{ - Requests: corev1.ResourceList{ - corev1.ResourceStorage: resource.MustParse("2Gi"), - }, - }, - }, - }, - }, - }, - }, - state: reconcile.ResourcesState{ - PVCs: []corev1.PersistentVolumeClaim{ - { - ObjectMeta: metav1.ObjectMeta{Name: "claim-name"}, - }, - }, - }, - }, - want: false, - expectedMismatchesContain: "Unmatched volumeClaimTemplate: volume-name has no match in volumes [ volume-name]", - }, - } - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - match, mismatchReasons, err := PodMatchesSpec(es, tt.args.pod, tt.args.spec, tt.args.state) - assert.NoError(t, err, "No container named elasticsearch in the given pod") - assert.Equal(t, tt.want, match, mismatchReasons) - if tt.expectedMismatches != nil { - assert.EqualValues(t, tt.expectedMismatches, mismatchReasons) - } - if tt.expectedMismatchesContain != "" { - assert.Contains(t, mismatchReasons[0], tt.expectedMismatchesContain) - } - }) - } -} diff --git a/operators/pkg/controller/elasticsearch/name/name.go b/operators/pkg/controller/elasticsearch/name/name.go index 73bec15cb0..838842a1f6 100644 --- a/operators/pkg/controller/elasticsearch/name/name.go +++ b/operators/pkg/controller/elasticsearch/name/name.go @@ -71,6 +71,11 @@ func Basename(podName string) string { return podName[0:idx] } +// StatefulSet returns the name of the StatefulSet corresponding to the given NodeSpec. +func StatefulSet(esName string, nodeSpecName string) string { + return ESNamer.Suffix(esName, nodeSpecName) +} + // NewPVCName returns a unique PVC name given a pod name and a PVC template name. // Uniqueness is guaranteed by the pod name that contains a random id. // The PVC template name is trimmed so that the PVC name does not exceed the max @@ -82,8 +87,8 @@ func NewPVCName(podName string, pvcTemplateName string) string { return esNoDefaultSuffixesNamer.Suffix(podName, pvcTemplateName) } -func ConfigSecret(podName string) string { - return esNoDefaultSuffixesNamer.Suffix(podName, configSecretSuffix) +func ConfigSecret(ssetName string) string { + return ESNamer.Suffix(ssetName, configSecretSuffix) } func SecureSettingsSecret(esName string) string { diff --git a/operators/pkg/controller/elasticsearch/nodespec/resources.go b/operators/pkg/controller/elasticsearch/nodespec/resources.go new file mode 100644 index 0000000000..112afdc81d --- /dev/null +++ b/operators/pkg/controller/elasticsearch/nodespec/resources.go @@ -0,0 +1,65 @@ +// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +// or more contributor license agreements. Licensed under the Elastic License; +// you may not use this file except in compliance with the Elastic License. + +package nodespec + +import ( + appsv1 "k8s.io/api/apps/v1" + corev1 "k8s.io/api/core/v1" + + commonv1alpha1 "github.com/elastic/cloud-on-k8s/operators/pkg/apis/common/v1alpha1" + "github.com/elastic/cloud-on-k8s/operators/pkg/apis/elasticsearch/v1alpha1" + "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/settings" + "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/sset" + "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/version" + "github.com/elastic/cloud-on-k8s/operators/pkg/utils/k8s" +) + +// Resources contain per-NodeSpec resources to be created. +type Resources struct { + StatefulSet appsv1.StatefulSet + HeadlessService corev1.Service + Config settings.CanonicalConfig + // TLS certs +} + +type ResourcesList []Resources + +func (l ResourcesList) StatefulSets() sset.StatefulSetList { + ssetList := make(sset.StatefulSetList, 0, len(l)) + for _, nodeSpec := range l { + ssetList = append(ssetList, nodeSpec.StatefulSet) + } + return ssetList +} + +func BuildExpectedResources(es v1alpha1.Elasticsearch, podTemplateBuilder version.PodTemplateSpecBuilder) (ResourcesList, error) { + nodesResources := make(ResourcesList, 0, len(es.Spec.Nodes)) + + for _, nodeSpec := range es.Spec.Nodes { + // build es config + userCfg := commonv1alpha1.Config{} + if nodeSpec.Config != nil { + userCfg = *nodeSpec.Config + } + cfg, err := settings.NewMergedESConfig(es.Name, userCfg) + if err != nil { + return nil, err + } + + // build stateful set and associated headless service + statefulSet, err := sset.BuildStatefulSet(k8s.ExtractNamespacedName(&es), nodeSpec, cfg, podTemplateBuilder) + if err != nil { + return nil, err + } + headlessSvc := sset.HeadlessService(k8s.ExtractNamespacedName(&es), statefulSet.Name) + + nodesResources = append(nodesResources, Resources{ + StatefulSet: statefulSet, + HeadlessService: headlessSvc, + Config: cfg, + }) + } + return nodesResources, nil +} diff --git a/operators/pkg/controller/elasticsearch/reconcile/resources_state.go b/operators/pkg/controller/elasticsearch/reconcile/resources_state.go index 0c3074f8e0..af3f820ce3 100644 --- a/operators/pkg/controller/elasticsearch/reconcile/resources_state.go +++ b/operators/pkg/controller/elasticsearch/reconcile/resources_state.go @@ -55,7 +55,7 @@ func NewResourcesStateFromAPI(c k8s.Client, es v1alpha1.Elasticsearch) (*Resourc // filter out pods scheduled for deletion for _, p := range allPods { // retrieve es configuration - config, err := settings.GetESConfigContent(c, k8s.ExtractNamespacedName(&p)) + config, err := settings.GetESConfigContent(c, p.Namespace, p.Labels[label.StatefulSetNameLabelName]) if err != nil { if apierrors.IsNotFound(err) { // We have an ES pod for which no configuration secret can be found. diff --git a/operators/pkg/controller/elasticsearch/reconcile/resources_state_test.go b/operators/pkg/controller/elasticsearch/reconcile/resources_state_test.go index f15ec9345e..bc56a91b5a 100644 --- a/operators/pkg/controller/elasticsearch/reconcile/resources_state_test.go +++ b/operators/pkg/controller/elasticsearch/reconcile/resources_state_test.go @@ -27,6 +27,7 @@ func TestNewResourcesStateFromAPI_MissingPodConfiguration(t *testing.T) { // This test focuses on the edge case where // no configuration secret is found for a given pod. v1alpha1.AddToScheme(scheme.Scheme) + ssetName := "sset" cluster := v1alpha1.Elasticsearch{ ObjectMeta: metav1.ObjectMeta{ Namespace: "ns", @@ -48,16 +49,22 @@ func TestNewResourcesStateFromAPI_MissingPodConfiguration(t *testing.T) { } oldPod := corev1.Pod{ ObjectMeta: metav1.ObjectMeta{ - Namespace: "ns", - Name: "pod", + Namespace: "ns", + Name: "pod", + Labels: map[string]string{ + label.StatefulSetNameLabelName: ssetName, + }, CreationTimestamp: metav1.NewTime(time.Now().Add(-cleanup.DeleteAfter).Add(-1 * time.Minute)), }, } deletionTimestamp := metav1.NewTime(time.Now().Add(1 * time.Hour)) deletingPod := corev1.Pod{ ObjectMeta: metav1.ObjectMeta{ - Namespace: "ns", - Name: "pod", + Namespace: "ns", + Name: "pod", + Labels: map[string]string{ + label.StatefulSetNameLabelName: ssetName, + }, CreationTimestamp: metav1.NewTime(time.Now().Add(-cleanup.DeleteAfter).Add(-1 * time.Minute)), DeletionTimestamp: &deletionTimestamp, }, @@ -68,10 +75,10 @@ func TestNewResourcesStateFromAPI_MissingPodConfiguration(t *testing.T) { configSecret := corev1.Secret{ ObjectMeta: metav1.ObjectMeta{ Namespace: "ns", - Name: settings.ConfigSecretName(oldPod.Name), + Name: settings.ConfigSecretName(ssetName), Labels: map[string]string{ - label.ClusterNameLabelName: cluster.Name, - label.PodNameLabelName: oldPod.Name, + label.ClusterNameLabelName: cluster.Name, + label.StatefulSetNameLabelName: oldPod.Name, }, }, Data: map[string][]byte{ diff --git a/operators/pkg/controller/elasticsearch/settings/config_volume.go b/operators/pkg/controller/elasticsearch/settings/config_volume.go index aca6b09a88..04c07a2f29 100644 --- a/operators/pkg/controller/elasticsearch/settings/config_volume.go +++ b/operators/pkg/controller/elasticsearch/settings/config_volume.go @@ -29,33 +29,33 @@ const ( ConfigVolumeMountPath = "/mnt/elastic-internal/elasticsearch-config" ) -// ConfigSecretName is the name of the secret that holds the ES config for the given pod. -func ConfigSecretName(podName string) string { - return name.ConfigSecret(podName) +// ConfigSecretName is the name of the secret that holds the ES config for the given StatefulSet. +func ConfigSecretName(ssetName string) string { + return name.ConfigSecret(ssetName) } -// ConfigSecretVolume returns a SecretVolume to hold the config of the given pod. -func ConfigSecretVolume(podName string) volume.SecretVolume { +// ConfigSecretVolume returns a SecretVolume to hold the config of nodes in the given stateful set.. +func ConfigSecretVolume(ssetName string) volume.SecretVolume { return volume.NewSecretVolumeWithMountPath( - ConfigSecretName(podName), + ConfigSecretName(ssetName), ConfigVolumeName, ConfigVolumeMountPath, ) } -// GetESConfigContent retrieves the configuration secret of the given pod, +// GetESConfigContent retrieves the configuration secret of the given stateful set, // and returns the corresponding CanonicalConfig. -func GetESConfigContent(client k8s.Client, esPod types.NamespacedName) (CanonicalConfig, error) { - secret, err := GetESConfigSecret(client, esPod) +func GetESConfigContent(client k8s.Client, namespace string, ssetName string) (CanonicalConfig, error) { + secret, err := GetESConfigSecret(client, namespace, ssetName) if err != nil { return CanonicalConfig{}, err } if len(secret.Data) == 0 { - return CanonicalConfig{}, fmt.Errorf("no configuration found in secret %s", ConfigSecretName(esPod.Name)) + return CanonicalConfig{}, fmt.Errorf("no configuration found in secret %s", ConfigSecretName(ssetName)) } content := secret.Data[ConfigFileName] if len(content) == 0 { - return CanonicalConfig{}, fmt.Errorf("no configuration found in secret %s", ConfigSecretName(esPod.Name)) + return CanonicalConfig{}, fmt.Errorf("no configuration found in secret %s", ConfigSecretName(ssetName)) } cfg, err := common.ParseConfig(content) @@ -66,11 +66,11 @@ func GetESConfigContent(client k8s.Client, esPod types.NamespacedName) (Canonica } // GetESConfigSecret returns the secret holding the ES configuration for the given pod -func GetESConfigSecret(client k8s.Client, esPod types.NamespacedName) (corev1.Secret, error) { +func GetESConfigSecret(client k8s.Client, namespace string, ssetName string) (corev1.Secret, error) { var secret corev1.Secret if err := client.Get(types.NamespacedName{ - Namespace: esPod.Namespace, - Name: ConfigSecretName(esPod.Name), + Namespace: namespace, + Name: ConfigSecretName(ssetName), }, &secret); err != nil { return corev1.Secret{}, err } @@ -78,19 +78,16 @@ func GetESConfigSecret(client k8s.Client, esPod types.NamespacedName) (corev1.Se } // ReconcileConfig ensures the ES config for the pod is set in the apiserver. -func ReconcileConfig(client k8s.Client, cluster v1alpha1.Elasticsearch, pod corev1.Pod, config CanonicalConfig) error { +func ReconcileConfig(client k8s.Client, es v1alpha1.Elasticsearch, ssetName string, config CanonicalConfig) error { rendered, err := config.Render() if err != nil { return err } expected := corev1.Secret{ ObjectMeta: metav1.ObjectMeta{ - Namespace: pod.Namespace, - Name: ConfigSecretName(pod.Name), - Labels: map[string]string{ - label.ClusterNameLabelName: cluster.Name, - label.PodNameLabelName: pod.Name, - }, + Namespace: es.Namespace, + Name: ConfigSecretName(ssetName), + Labels: label.NewConfigLabels(k8s.ExtractNamespacedName(&es), ssetName), }, Data: map[string][]byte{ ConfigFileName: rendered, @@ -103,7 +100,7 @@ func ReconcileConfig(client k8s.Client, cluster v1alpha1.Elasticsearch, pod core NeedsUpdate: func() bool { return !reflect.DeepEqual(reconciled.Data, expected.Data) }, - Owner: &cluster, + Owner: &es, Reconciled: &reconciled, Scheme: scheme.Scheme, UpdateReconciled: func() { reconciled.Data = expected.Data }, diff --git a/operators/pkg/controller/elasticsearch/settings/config_volume_test.go b/operators/pkg/controller/elasticsearch/settings/config_volume_test.go index e0615d045d..b55b3e1d31 100644 --- a/operators/pkg/controller/elasticsearch/settings/config_volume_test.go +++ b/operators/pkg/controller/elasticsearch/settings/config_volume_test.go @@ -16,24 +16,21 @@ import ( "github.com/stretchr/testify/require" corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "k8s.io/apimachinery/pkg/types" "k8s.io/client-go/kubernetes/scheme" "sigs.k8s.io/controller-runtime/pkg/client/fake" ) func TestConfigSecretName(t *testing.T) { - require.Equal(t, "mypod-config", ConfigSecretName("mypod")) + require.Equal(t, "ssetName-es-config", ConfigSecretName("ssetName")) } func TestGetESConfigContent(t *testing.T) { - pod := types.NamespacedName{ - Name: "pod", - Namespace: "namespace", - } + namespace := "namespace" + ssetName := "sset" secret := corev1.Secret{ ObjectMeta: metav1.ObjectMeta{ - Name: "pod-config", - Namespace: "namespace", + Name: ConfigSecretName(ssetName), + Namespace: namespace, }, Data: map[string][]byte{ ConfigFileName: []byte("a: b\nc: d\n"), @@ -41,45 +38,49 @@ func TestGetESConfigContent(t *testing.T) { } secretInvalid := corev1.Secret{ ObjectMeta: metav1.ObjectMeta{ - Name: "pod-config", - Namespace: "namespace", + Name: ConfigSecretName(ssetName), + Namespace: namespace, }, Data: map[string][]byte{ ConfigFileName: []byte("yolo"), }, } tests := []struct { - name string - client k8s.Client - esPod types.NamespacedName - want CanonicalConfig - wantErr bool + name string + client k8s.Client + namespace string + ssetName string + want CanonicalConfig + wantErr bool }{ { - name: "valid config exists", - client: k8s.WrapClient(fake.NewFakeClient(&secret)), - esPod: pod, - want: CanonicalConfig{common.MustCanonicalConfig(map[string]string{"a": "b", "c": "d"})}, - wantErr: false, + name: "valid config exists", + client: k8s.WrapClient(fake.NewFakeClient(&secret)), + namespace: namespace, + ssetName: ssetName, + want: CanonicalConfig{common.MustCanonicalConfig(map[string]string{"a": "b", "c": "d"})}, + wantErr: false, }, { - name: "config does not exist", - client: k8s.WrapClient(fake.NewFakeClient()), - esPod: pod, - want: CanonicalConfig{}, - wantErr: true, + name: "config does not exist", + client: k8s.WrapClient(fake.NewFakeClient()), + namespace: namespace, + ssetName: ssetName, + want: CanonicalConfig{}, + wantErr: true, }, { - name: "stored config is invalid", - client: k8s.WrapClient(fake.NewFakeClient(&secretInvalid)), - esPod: pod, - want: CanonicalConfig{}, - wantErr: true, + name: "stored config is invalid", + client: k8s.WrapClient(fake.NewFakeClient(&secretInvalid)), + namespace: namespace, + ssetName: ssetName, + want: CanonicalConfig{}, + wantErr: true, }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - got, err := GetESConfigContent(tt.client, tt.esPod) + got, err := GetESConfigContent(tt.client, tt.namespace, tt.ssetName) if (err != nil) != tt.wantErr { t.Errorf("GetESConfigContent() error = %v, wantErr %v", err, tt.wantErr) return @@ -94,28 +95,23 @@ func TestGetESConfigContent(t *testing.T) { func TestReconcileConfig(t *testing.T) { err := v1alpha1.AddToScheme(scheme.Scheme) assert.NoError(t, err) - cluster := v1alpha1.Elasticsearch{ + es := v1alpha1.Elasticsearch{ ObjectMeta: metav1.ObjectMeta{ - Namespace: "namespace", + Namespace: "ns", Name: "cluster", }, } - pod := corev1.Pod{ - ObjectMeta: metav1.ObjectMeta{ - Namespace: "namespace", - Name: "pod", - }, - } + ssetName := "sset" config := CanonicalConfig{common.MustCanonicalConfig(map[string]string{"a": "b", "c": "d"})} rendered, err := config.Render() require.NoError(t, err) configSecret := corev1.Secret{ ObjectMeta: metav1.ObjectMeta{ - Namespace: pod.Namespace, - Name: ConfigSecretName(pod.Name), + Namespace: es.Namespace, + Name: ConfigSecretName(ssetName), Labels: map[string]string{ - label.ClusterNameLabelName: cluster.Name, - label.PodNameLabelName: pod.Name, + label.ClusterNameLabelName: es.Name, + label.StatefulSetNameLabelName: ssetName, }, }, Data: map[string][]byte{ @@ -123,45 +119,45 @@ func TestReconcileConfig(t *testing.T) { }, } tests := []struct { - name string - client k8s.Client - cluster v1alpha1.Elasticsearch - pod corev1.Pod - config CanonicalConfig - wantErr bool + name string + client k8s.Client + es v1alpha1.Elasticsearch + ssetName string + config CanonicalConfig + wantErr bool }{ { - name: "config does not exist", - client: k8s.WrapClient(fake.NewFakeClient()), - cluster: cluster, - pod: pod, - config: config, - wantErr: false, + name: "config does not exist", + client: k8s.WrapClient(fake.NewFakeClient()), + es: es, + ssetName: ssetName, + config: config, + wantErr: false, }, { - name: "config already exists", - client: k8s.WrapClient(fake.NewFakeClient(&configSecret)), - cluster: cluster, - pod: pod, - config: config, - wantErr: false, + name: "config already exists", + client: k8s.WrapClient(fake.NewFakeClient(&configSecret)), + es: es, + ssetName: ssetName, + config: config, + wantErr: false, }, { - name: "config should be updated", - client: k8s.WrapClient(fake.NewFakeClient(&configSecret)), - cluster: cluster, - pod: pod, - config: CanonicalConfig{common.MustCanonicalConfig(map[string]string{"a": "b", "c": "different"})}, - wantErr: false, + name: "config should be updated", + client: k8s.WrapClient(fake.NewFakeClient(&configSecret)), + es: es, + ssetName: ssetName, + config: CanonicalConfig{common.MustCanonicalConfig(map[string]string{"a": "b", "c": "different"})}, + wantErr: false, }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - if err := ReconcileConfig(tt.client, tt.cluster, tt.pod, tt.config); (err != nil) != tt.wantErr { + if err := ReconcileConfig(tt.client, tt.es, tt.ssetName, tt.config); (err != nil) != tt.wantErr { t.Errorf("ReconcileConfig() error = %v, wantErr %v", err, tt.wantErr) } // config in the apiserver should be the expected one - parsed, err := GetESConfigContent(tt.client, k8s.ExtractNamespacedName(&pod)) + parsed, err := GetESConfigContent(tt.client, tt.es.Namespace, tt.ssetName) require.NoError(t, err) require.Equal(t, tt.config, parsed) }) diff --git a/operators/pkg/controller/elasticsearch/sset/build.go b/operators/pkg/controller/elasticsearch/sset/build.go new file mode 100644 index 0000000000..36a8163388 --- /dev/null +++ b/operators/pkg/controller/elasticsearch/sset/build.go @@ -0,0 +1,103 @@ +// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +// or more contributor license agreements. Licensed under the Elastic License; +// you may not use this file except in compliance with the Elastic License. + +package sset + +import ( + "github.com/elastic/cloud-on-k8s/operators/pkg/apis/elasticsearch/v1alpha1" + "github.com/elastic/cloud-on-k8s/operators/pkg/controller/common/defaults" + "github.com/elastic/cloud-on-k8s/operators/pkg/controller/common/hash" + "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/label" + "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/name" + "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/settings" + "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/version" + esvolume "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/volume" + + appsv1 "k8s.io/api/apps/v1" + corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/types" +) + +// HeadlessServiceName returns the name of the headless service for the given StatefulSet. +func HeadlessServiceName(ssetName string) string { + // just use the sset name + return ssetName +} + +// HeadlessService returns a headless service for the given StatefulSet +func HeadlessService(es types.NamespacedName, ssetName string) corev1.Service { + return corev1.Service{ + ObjectMeta: metav1.ObjectMeta{ + Namespace: es.Namespace, + Name: HeadlessServiceName(ssetName), + Labels: label.NewStatefulSetLabels(es, ssetName), + }, + Spec: corev1.ServiceSpec{ + Type: corev1.ServiceTypeClusterIP, + ClusterIP: corev1.ClusterIPNone, + Selector: label.NewStatefulSetLabels(es, ssetName), + }, + } +} + +func BuildStatefulSet(es types.NamespacedName, nodes v1alpha1.NodeSpec, cfg settings.CanonicalConfig, podTemplateBuilder version.PodTemplateSpecBuilder) (appsv1.StatefulSet, error) { + statefulSetName := name.StatefulSet(es.Name, nodes.Name) + + // ssetSelector is used to match the sset pods + ssetSelector := label.NewStatefulSetLabels(es, statefulSetName) + + // add default PVCs to the node spec + nodes.VolumeClaimTemplates = defaults.AppendDefaultPVCs( + nodes.VolumeClaimTemplates, nodes.PodTemplate.Spec, esvolume.DefaultVolumeClaimTemplates..., + ) + // build pod template + podTemplate, err := podTemplateBuilder(nodes, cfg) + if err != nil { + return appsv1.StatefulSet{}, err + } + + // build sset labels on top of the selector + // TODO: inherit user-provided labels and annotations from the CRD? + ssetLabels := make(map[string]string) + for k, v := range ssetSelector { + ssetLabels[k] = v + } + + sset := appsv1.StatefulSet{ + ObjectMeta: metav1.ObjectMeta{ + Namespace: es.Namespace, + Name: statefulSetName, + Labels: ssetLabels, + }, + Spec: appsv1.StatefulSetSpec{ + // we manage the partition ordinal to orchestrate nodes upgrades + UpdateStrategy: appsv1.StatefulSetUpdateStrategy{ + Type: appsv1.RollingUpdateStatefulSetStrategyType, + RollingUpdate: &appsv1.RollingUpdateStatefulSetStrategy{ + Partition: &nodes.NodeCount, + }, + }, + // we don't care much about pods creation ordering, and manage deletion ordering ourselves, + // so we're fine with the StatefulSet controller spawning all pods in parallel + PodManagementPolicy: appsv1.ParallelPodManagement, + // use default revision history limit + RevisionHistoryLimit: nil, + // build a headless service per StatefulSet, matching the StatefulSet labels + ServiceName: HeadlessServiceName(statefulSetName), + Selector: &metav1.LabelSelector{ + MatchLabels: ssetSelector, + }, + + Replicas: &nodes.NodeCount, + VolumeClaimTemplates: nodes.VolumeClaimTemplates, + Template: podTemplate, + }, + } + + // store a hash of the sset resource in its labels for comparison purposes + sset.Labels = hash.SetTemplateHashLabel(sset.Labels, sset.Spec) + + return sset, nil +} diff --git a/operators/pkg/controller/elasticsearch/sset/list.go b/operators/pkg/controller/elasticsearch/sset/list.go new file mode 100644 index 0000000000..9c357436f7 --- /dev/null +++ b/operators/pkg/controller/elasticsearch/sset/list.go @@ -0,0 +1,35 @@ +// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +// or more contributor license agreements. Licensed under the Elastic License; +// you may not use this file except in compliance with the Elastic License. + +package sset + +import ( + appsv1 "k8s.io/api/apps/v1" + "k8s.io/apimachinery/pkg/types" + "sigs.k8s.io/controller-runtime/pkg/client" + + "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/label" + "github.com/elastic/cloud-on-k8s/operators/pkg/utils/k8s" +) + +type StatefulSetList []appsv1.StatefulSet + +// RetrieveActualStatefulSets returns the list of existing StatefulSets labeled for the given es cluster. +func RetrieveActualStatefulSets(c k8s.Client, es types.NamespacedName) (StatefulSetList, error) { + var ssets appsv1.StatefulSetList + err := c.List(&client.ListOptions{ + Namespace: es.Namespace, + LabelSelector: label.NewLabelSelectorForElasticsearchClusterName(es.Name), + }, &ssets) + return StatefulSetList(ssets.Items), err +} + +func (l StatefulSetList) GetByName(ssetName string) (appsv1.StatefulSet, bool) { + for _, sset := range l { + if sset.Name == ssetName { + return sset, true + } + } + return appsv1.StatefulSet{}, false +} diff --git a/operators/pkg/controller/elasticsearch/sset/reconcile.go b/operators/pkg/controller/elasticsearch/sset/reconcile.go new file mode 100644 index 0000000000..bfb096bf2c --- /dev/null +++ b/operators/pkg/controller/elasticsearch/sset/reconcile.go @@ -0,0 +1,34 @@ +// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +// or more contributor license agreements. Licensed under the Elastic License; +// you may not use this file except in compliance with the Elastic License. + +package sset + +import ( + "github.com/elastic/cloud-on-k8s/operators/pkg/apis/elasticsearch/v1alpha1" + "github.com/elastic/cloud-on-k8s/operators/pkg/controller/common/hash" + "github.com/elastic/cloud-on-k8s/operators/pkg/controller/common/reconciler" + "github.com/elastic/cloud-on-k8s/operators/pkg/utils/k8s" + appsv1 "k8s.io/api/apps/v1" + "k8s.io/apimachinery/pkg/runtime" +) + +func ReconcileStatefulSet(c k8s.Client, scheme *runtime.Scheme, es v1alpha1.Elasticsearch, expected appsv1.StatefulSet) error { + var reconciled appsv1.StatefulSet + return reconciler.ReconcileResource(reconciler.Params{ + Client: c, + Scheme: scheme, + Owner: &es, + Expected: &expected, + Reconciled: &reconciled, + NeedsUpdate: func() bool { + if len(reconciled.Labels) == 0 { + return true + } + return expected.Labels[hash.TemplateHashLabelName] != reconciled.Labels[hash.TemplateHashLabelName] + }, + UpdateReconciled: func() { + expected.DeepCopyInto(&reconciled) + }, + }) +} diff --git a/operators/pkg/controller/elasticsearch/version/common.go b/operators/pkg/controller/elasticsearch/version/common.go index 3c1f9548ad..69e00a5180 100644 --- a/operators/pkg/controller/elasticsearch/version/common.go +++ b/operators/pkg/controller/elasticsearch/version/common.go @@ -6,13 +6,14 @@ package version import ( "github.com/elastic/cloud-on-k8s/operators/pkg/controller/common/certificates" + "github.com/elastic/cloud-on-k8s/operators/pkg/controller/common/hash" + "github.com/elastic/cloud-on-k8s/operators/pkg/utils/k8s" + corev1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/api/resource" - commonv1alpha1 "github.com/elastic/cloud-on-k8s/operators/pkg/apis/common/v1alpha1" "github.com/elastic/cloud-on-k8s/operators/pkg/apis/elasticsearch/v1alpha1" "github.com/elastic/cloud-on-k8s/operators/pkg/controller/common/defaults" - "github.com/elastic/cloud-on-k8s/operators/pkg/controller/common/hash" "github.com/elastic/cloud-on-k8s/operators/pkg/controller/common/version" "github.com/elastic/cloud-on-k8s/operators/pkg/controller/common/volume" "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/initcontainer" @@ -37,61 +38,53 @@ var ( } ) -// NewExpectedPodSpecs creates PodSpecContexts for all Elasticsearch nodes in the given Elasticsearch cluster -func NewExpectedPodSpecs( +// TODO: refactor +type PodTemplateSpecBuilder func(v1alpha1.NodeSpec, settings.CanonicalConfig) (corev1.PodTemplateSpec, error) + +// TODO: refactor to avoid all the params mess +func BuildPodTemplateSpec( es v1alpha1.Elasticsearch, + nodeSpec v1alpha1.NodeSpec, paramsTmpl pod.NewPodSpecParams, + cfg settings.CanonicalConfig, newEnvironmentVarsFn func(p pod.NewPodSpecParams, certs, creds, securecommon volume.SecretVolume) []corev1.EnvVar, - newESConfigFn func(clusterName string, config commonv1alpha1.Config) (settings.CanonicalConfig, error), newInitContainersFn func(imageName string, operatorImage string, setVMMaxMapCount *bool, transportCerts volume.SecretVolume, clusterName string) ([]corev1.Container, error), operatorImage string, -) ([]pod.PodSpecContext, error) { - podSpecs := make([]pod.PodSpecContext, 0, es.Spec.NodeCount()) - - for _, node := range es.Spec.Nodes { - // add default PVCs to the node spec - node.VolumeClaimTemplates = defaults.AppendDefaultPVCs( - node.VolumeClaimTemplates, node.PodTemplate.Spec, esvolume.DefaultVolumeClaimTemplates..., - ) - - for i := int32(0); i < node.NodeCount; i++ { - params := pod.NewPodSpecParams{ - // cluster-wide params - Elasticsearch: es, - // volumes - UsersSecretVolume: paramsTmpl.UsersSecretVolume, - ProbeUser: paramsTmpl.ProbeUser, - KeystoreUser: paramsTmpl.KeystoreUser, - UnicastHostsVolume: paramsTmpl.UnicastHostsVolume, - // pod params - NodeSpec: node, - } - podSpecCtx, err := podSpecContext( - params, - operatorImage, - newEnvironmentVarsFn, - newESConfigFn, - newInitContainersFn, - ) - if err != nil { - return nil, err - } - - podSpecs = append(podSpecs, podSpecCtx) - } +) (corev1.PodTemplateSpec, error) { + params := pod.NewPodSpecParams{ + // cluster-wide params + Elasticsearch: es, + // volumes + UsersSecretVolume: paramsTmpl.UsersSecretVolume, + ProbeUser: paramsTmpl.ProbeUser, + KeystoreUser: paramsTmpl.KeystoreUser, + UnicastHostsVolume: paramsTmpl.UnicastHostsVolume, + // pod params + NodeSpec: nodeSpec, } - - return podSpecs, nil + podSpecCtx, err := podSpecContext( + params, + operatorImage, + cfg, + newEnvironmentVarsFn, + newInitContainersFn, + ) + if err != nil { + return corev1.PodTemplateSpec{}, err + } + return podSpecCtx.PodTemplate, nil } // podSpecContext creates a new PodSpecContext for an Elasticsearch node func podSpecContext( p pod.NewPodSpecParams, operatorImage string, + cfg settings.CanonicalConfig, newEnvironmentVarsFn func(p pod.NewPodSpecParams, certs, creds, keystore volume.SecretVolume) []corev1.EnvVar, - newESConfigFn func(clusterName string, config commonv1alpha1.Config) (settings.CanonicalConfig, error), newInitContainersFn func(elasticsearchImage string, operatorImage string, setVMMaxMapCount *bool, transportCerts volume.SecretVolume, clusterName string) ([]corev1.Container, error), ) (pod.PodSpecContext, error) { + statefulSetName := name.StatefulSet(p.Elasticsearch.Name, p.NodeSpec.Name) + // setup volumes probeSecret := volume.NewSelectiveSecretVolumeWithMountPath( user.ElasticInternalUsersSecretName(p.Elasticsearch.Name), esvolume.ProbeUserVolumeName, @@ -117,14 +110,8 @@ func podSpecContext( esvolume.TransportCertificatesSecretVolumeMountPath, ) - // A few secret volumes will be generated based on the pod name. - // At this point the (maybe future) pod does not have a name yet: we still want to - // create corresponding volumes and volume mounts for pod spec comparisons. - // Let's create them with a placeholder for the pod name. Volume mounts will be correct, - // and secret refs in Volumes Mounts will be fixed right before pod creation, - // if this spec ends up leading to a new pod creation. - podNamePlaceholder := "pod-name-placeholder" - configVolume := settings.ConfigSecretVolume(podNamePlaceholder) + ssetName := name.StatefulSet(p.Elasticsearch.Name, p.NodeSpec.Name) + configVolume := settings.ConfigSecretVolume(ssetName) // append future volumes from PVCs (not resolved to a claim yet) persistentVolumes := make([]corev1.Volume, 0, len(p.NodeSpec.VolumeClaimTemplates)) @@ -206,66 +193,30 @@ func podSpecContext( WithInitContainerDefaults(). WithInitContainers(initContainers...) - // generate the configuration - // actual volumes to propagate it will be created later on - config := p.NodeSpec.Config - if config == nil { - config = &commonv1alpha1.Config{} - } - esConfig, err := newESConfigFn(p.Elasticsearch.Name, *config) + // set labels + version, err := version.Parse(p.Elasticsearch.Spec.Version) if err != nil { return pod.PodSpecContext{}, err } - unpackedCfg, err := esConfig.Unpack() + unpackedCfg, err := cfg.Unpack() if err != nil { return pod.PodSpecContext{}, err } - - // set labels - version, err := version.Parse(p.Elasticsearch.Spec.Version) + nodeRoles := unpackedCfg.Node + // label with a hash of the config to rotate the pod on config changes + cfgHash := hash.HashObject(cfg) + podLabels, err := label.NewPodLabels(k8s.ExtractNamespacedName(&p.Elasticsearch), statefulSetName, *version, nodeRoles, cfgHash) if err != nil { return pod.PodSpecContext{}, err } - builder = builder.WithLabels(label.NewPodLabels(p.Elasticsearch, *version, unpackedCfg)) + builder = builder.WithLabels(podLabels) return pod.PodSpecContext{ NodeSpec: p.NodeSpec, PodTemplate: builder.PodTemplate, - Config: esConfig, }, nil } -// NewPod constructs a pod from the given parameters. -func NewPod( - es v1alpha1.Elasticsearch, - podSpecCtx pod.PodSpecContext, -) corev1.Pod { - // build a pod based on the podSpecCtx template - template := *podSpecCtx.PodTemplate.DeepCopy() - pod := corev1.Pod{ - ObjectMeta: template.ObjectMeta, - Spec: template.Spec, - } - - // label the pod with a hash of its template, for comparison purpose, - // before it gets assigned a name - pod.Labels = hash.SetTemplateHashLabel(pod.Labels, template) - - // set name & namespace - pod.Name = name.NewPodName(es.Name, podSpecCtx.NodeSpec) - pod.Namespace = es.Namespace - - // set hostname and subdomain based on pod and cluster names - if pod.Spec.Hostname == "" { - pod.Spec.Hostname = pod.Name - } - if pod.Spec.Subdomain == "" { - pod.Spec.Subdomain = es.Name - } - - return pod -} - // quantityToMegabytes returns the megabyte value of the provided resource.Quantity func quantityToMegabytes(q resource.Quantity) int { return int(q.Value()) / 1024 / 1024 diff --git a/operators/pkg/controller/elasticsearch/version/common_test.go b/operators/pkg/controller/elasticsearch/version/common_test.go index 70c6822e19..69ab6050db 100644 --- a/operators/pkg/controller/elasticsearch/version/common_test.go +++ b/operators/pkg/controller/elasticsearch/version/common_test.go @@ -13,7 +13,6 @@ import ( "k8s.io/apimachinery/pkg/api/resource" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - commonv1alpha1 "github.com/elastic/cloud-on-k8s/operators/pkg/apis/common/v1alpha1" "github.com/elastic/cloud-on-k8s/operators/pkg/apis/elasticsearch/v1alpha1" "github.com/elastic/cloud-on-k8s/operators/pkg/controller/common/hash" "github.com/elastic/cloud-on-k8s/operators/pkg/controller/common/volume" @@ -44,92 +43,6 @@ func Test_quantityToMegabytes(t *testing.T) { } } -func TestNewPod(t *testing.T) { - esMeta := metav1.ObjectMeta{ - Namespace: "ns", - Name: "name", - } - es := v1alpha1.Elasticsearch{ - ObjectMeta: esMeta, - Spec: v1alpha1.ElasticsearchSpec{ - Version: "7.1.0", - }, - } - podTemplate := corev1.PodTemplateSpec{ - ObjectMeta: metav1.ObjectMeta{ - Labels: map[string]string{ - "a": "b", - }, - }, - Spec: corev1.PodSpec{ - Containers: []corev1.Container{ - { - Name: "container1", - }, - }, - }, - } - withCustomHostnameSubdomains := corev1.PodTemplateSpec{ - ObjectMeta: podTemplate.ObjectMeta, - Spec: corev1.PodSpec{ - Containers: podTemplate.Spec.Containers, - Hostname: "custom-hostname", - Subdomain: "custom-subdomain", - }, - } - - tests := []struct { - name string - es v1alpha1.Elasticsearch - podSpecCtx pod.PodSpecContext - want func() corev1.Pod - }{ - { - name: "happy path", - es: es, - podSpecCtx: pod.PodSpecContext{PodTemplate: podTemplate}, - want: func() corev1.Pod { - p := corev1.Pod{ - ObjectMeta: *podTemplate.ObjectMeta.DeepCopy(), - Spec: *podTemplate.Spec.DeepCopy(), - } - p.Namespace = esMeta.Namespace - p.Labels[hash.TemplateHashLabelName] = hash.HashObject(podTemplate) - p.Spec.Subdomain = es.Name - return p - }, - }, - { - name: "with custom hostname and subdomain", - es: es, - podSpecCtx: pod.PodSpecContext{PodTemplate: withCustomHostnameSubdomains}, - want: func() corev1.Pod { - p := corev1.Pod{ - ObjectMeta: *withCustomHostnameSubdomains.ObjectMeta.DeepCopy(), - Spec: *withCustomHostnameSubdomains.Spec.DeepCopy(), - } - p.Namespace = esMeta.Namespace - p.Labels[hash.TemplateHashLabelName] = hash.HashObject(withCustomHostnameSubdomains) - return p - }, - }, - } - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - got := NewPod(tt.es, tt.podSpecCtx) - // since the name is random, don't test its equality and inject it to the expected output - require.NotEmpty(t, got.Name) - require.NotEmpty(t, got.Spec.Hostname) - want := tt.want() - want.Name = got.Name - if tt.podSpecCtx.PodTemplate.Spec.Hostname == "" { - want.Spec.Hostname = got.Spec.Hostname - } - require.Equal(t, want, got) - }) - } -} - func Test_podSpec(t *testing.T) { // this test focuses on testing user-provided pod template overrides // setup mocks for env vars func, es config func and init-containers func @@ -145,9 +58,6 @@ func Test_podSpec(t *testing.T) { }, } } - newESConfigFn := func(clusterName string, config commonv1alpha1.Config) (settings.CanonicalConfig, error) { - return settings.NewCanonicalConfig(), nil - } newInitContainersFn := func(elasticsearchImage string, operatorImage string, setVMMaxMapCount *bool, nodeCertificatesVolume volume.SecretVolume, clusterName string) ([]corev1.Container, error) { return []corev1.Container{ { @@ -162,6 +72,10 @@ func Test_podSpec(t *testing.T) { varTrue := true varInt64 := int64(12) es71 := v1alpha1.Elasticsearch{ + ObjectMeta: metav1.ObjectMeta{ + Namespace: "ns", + Name: "es71", + }, Spec: v1alpha1.ElasticsearchSpec{ Version: "7.1.0", }, @@ -463,6 +377,7 @@ func Test_podSpec(t *testing.T) { params: pod.NewPodSpecParams{ Elasticsearch: es71, NodeSpec: v1alpha1.NodeSpec{ + Name: "node-spec-name", PodTemplate: corev1.PodTemplateSpec{ ObjectMeta: metav1.ObjectMeta{ Labels: map[string]string{ @@ -478,19 +393,21 @@ func Test_podSpec(t *testing.T) { "a": "b", "c": "d", "common.k8s.elastic.co/type": "elasticsearch", - "elasticsearch.k8s.elastic.co/cluster-name": "", - "elasticsearch.k8s.elastic.co/node-data": "true", - "elasticsearch.k8s.elastic.co/node-ingest": "true", - "elasticsearch.k8s.elastic.co/node-master": "true", - "elasticsearch.k8s.elastic.co/node-ml": "true", - "elasticsearch.k8s.elastic.co/version": "7.1.0", + "elasticsearch.k8s.elastic.co/cluster-name": "es71", + "elasticsearch.k8s.elastic.co/config-template-hash": hash.HashObject(settings.NewCanonicalConfig()), + "elasticsearch.k8s.elastic.co/node-data": "true", + "elasticsearch.k8s.elastic.co/node-ingest": "true", + "elasticsearch.k8s.elastic.co/node-master": "true", + "elasticsearch.k8s.elastic.co/node-ml": "true", + "elasticsearch.k8s.elastic.co/statefulset": "es71-es-node-spec-name", + "elasticsearch.k8s.elastic.co/version": "7.1.0", }, specCtx.PodTemplate.Labels) }, }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - spec, err := podSpecContext(tt.params, "operator-image", newEnvVarsFn, newESConfigFn, newInitContainersFn) + spec, err := podSpecContext(tt.params, "operator-image", settings.NewCanonicalConfig(), newEnvVarsFn, newInitContainersFn) require.NoError(t, err) tt.assertions(t, spec) }) diff --git a/operators/pkg/controller/elasticsearch/version/version6/podspecs.go b/operators/pkg/controller/elasticsearch/version/version6/podspecs.go index 30efe2ce1e..7045338ea6 100644 --- a/operators/pkg/controller/elasticsearch/version/version6/podspecs.go +++ b/operators/pkg/controller/elasticsearch/version/version6/podspecs.go @@ -7,45 +7,18 @@ package version6 import ( "path" - "github.com/elastic/cloud-on-k8s/operators/pkg/apis/elasticsearch/v1alpha1" "github.com/elastic/cloud-on-k8s/operators/pkg/controller/common/certificates" "github.com/elastic/cloud-on-k8s/operators/pkg/controller/common/volume" - "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/initcontainer" "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/keystore" "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/pod" "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/processmanager" "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/settings" - "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/user" - "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/version" esvolume "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/volume" corev1 "k8s.io/api/core/v1" ) -// ExpectedPodSpecs returns a list of pod specs with context that we would expect to find in the Elasticsearch cluster. -func ExpectedPodSpecs( - es v1alpha1.Elasticsearch, - paramsTmpl pod.NewPodSpecParams, - operatorImage string, -) ([]pod.PodSpecContext, error) { - // the contents of the file realm volume needs to be symlinked into place - paramsTmpl.UsersSecretVolume = volume.NewSecretVolumeWithMountPath( - user.XPackFileRealmSecretName(es.Name), - esvolume.XPackFileRealmVolumeName, - esvolume.XPackFileRealmVolumeMountPath, - ) - - return version.NewExpectedPodSpecs( - es, - paramsTmpl, - newEnvironmentVars, - settings.NewMergedESConfig, - initcontainer.NewInitContainers, - operatorImage, - ) -} - -// newEnvironmentVars returns the environment vars to be associated to a pod -func newEnvironmentVars( +// NewEnvironmentVars returns the environment vars to be associated to a pod +func NewEnvironmentVars( p pod.NewPodSpecParams, httpCertificatesVolume volume.SecretVolume, keystoreUserSecretVolume volume.SecretVolume, diff --git a/operators/pkg/controller/elasticsearch/version/version6/podspecs_test.go b/operators/pkg/controller/elasticsearch/version/version6/podspecs_test.go index 33e14784b1..5ab52c36d4 100644 --- a/operators/pkg/controller/elasticsearch/version/version6/podspecs_test.go +++ b/operators/pkg/controller/elasticsearch/version/version6/podspecs_test.go @@ -12,13 +12,11 @@ import ( corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - commonv1alpha1 "github.com/elastic/cloud-on-k8s/operators/pkg/apis/common/v1alpha1" "github.com/elastic/cloud-on-k8s/operators/pkg/apis/elasticsearch/v1alpha1" "github.com/elastic/cloud-on-k8s/operators/pkg/controller/common/certificates" "github.com/elastic/cloud-on-k8s/operators/pkg/controller/common/volume" "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/client" "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/keystore" - "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/name" "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/pod" "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/processmanager" "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/settings" @@ -46,7 +44,7 @@ func TestNewEnvironmentVars(t *testing.T) { wantEnv []corev1.EnvVar }{ { - name: "2 nodes", + name: "sample cluster", args: args{ p: pod.NewPodSpecParams{ ProbeUser: testProbeUser, @@ -89,121 +87,9 @@ func TestNewEnvironmentVars(t *testing.T) { } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - got := newEnvironmentVars(tt.args.p, tt.args.httpCertificatesVolume, + got := NewEnvironmentVars(tt.args.p, tt.args.httpCertificatesVolume, tt.args.keystoreUserVolume, tt.args.secureSettingsVolume) assert.Equal(t, tt.wantEnv, got) }) } } - -func TestCreateExpectedPodSpecsReturnsCorrectNodeCount(t *testing.T) { - tests := []struct { - name string - es v1alpha1.Elasticsearch - expectedPodCount int - }{ - { - name: "2 nodes es", - es: v1alpha1.Elasticsearch{ - ObjectMeta: testObjectMeta, - Spec: v1alpha1.ElasticsearchSpec{ - Version: "7.1.0", - Nodes: []v1alpha1.NodeSpec{ - { - NodeCount: 2, - }, - }, - }, - }, - expectedPodCount: 2, - }, - { - name: "1 master 2 data", - es: v1alpha1.Elasticsearch{ - ObjectMeta: testObjectMeta, - Spec: v1alpha1.ElasticsearchSpec{ - Version: "7.1.0", - Nodes: []v1alpha1.NodeSpec{ - { - NodeCount: 1, - Config: &commonv1alpha1.Config{ - Data: map[string]interface{}{ - v1alpha1.NodeMaster: "true", - }, - }, - }, - { - NodeCount: 2, - Config: &commonv1alpha1.Config{ - Data: map[string]interface{}{ - v1alpha1.NodeData: "true", - }, - }, - }, - }, - }, - }, - expectedPodCount: 3, - }, - } - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - podSpecs, err := ExpectedPodSpecs( - tt.es, - pod.NewPodSpecParams{ProbeUser: testProbeUser}, - "operator-image-dummy", - ) - assert.NoError(t, err) - assert.Equal(t, tt.expectedPodCount, len(podSpecs)) - }) - } -} - -func TestCreateExpectedPodSpecsReturnsCorrectPodSpec(t *testing.T) { - es := v1alpha1.Elasticsearch{ - ObjectMeta: testObjectMeta, - Spec: v1alpha1.ElasticsearchSpec{ - Version: "1.2.3", - Image: "custom-image", - Nodes: []v1alpha1.NodeSpec{ - { - NodeCount: 1, - Config: &commonv1alpha1.Config{ - Data: map[string]interface{}{ - v1alpha1.NodeMaster: "true", - }, - }, - }, - }, - }, - } - podSpec, err := ExpectedPodSpecs( - es, - pod.NewPodSpecParams{ - ProbeUser: testProbeUser, - UsersSecretVolume: volume.NewSecretVolumeWithMountPath("", "user-secret-vol", "/mount/path"), - UnicastHostsVolume: volume.NewConfigMapVolume( - name.UnicastHostsConfigMap(es.Name), - esvolume.UnicastHostsVolumeName, - esvolume.UnicastHostsVolumeMountPath, - ), - }, - "operator-image-dummy", - ) - assert.NoError(t, err) - assert.Equal(t, 1, len(podSpec)) - - esPodSpec := podSpec[0].PodTemplate.Spec - assert.Equal(t, 1, len(esPodSpec.Containers)) - assert.Equal(t, 3, len(esPodSpec.InitContainers)) - assert.Equal(t, 15, len(esPodSpec.Volumes)) - - esContainer := esPodSpec.Containers[0] - assert.Equal(t, 15, len(esContainer.VolumeMounts)) - assert.NotEqual(t, 0, esContainer.Env) - // esContainer.Env actual values are tested in environment_test.go - assert.Equal(t, "custom-image", esContainer.Image) - assert.NotNil(t, esContainer.ReadinessProbe) - assert.ElementsMatch(t, pod.DefaultContainerPorts, esContainer.Ports) - assert.NotEmpty(t, esContainer.ReadinessProbe.Handler.Exec.Command) -} diff --git a/operators/pkg/controller/elasticsearch/version/version6/zen1.go b/operators/pkg/controller/elasticsearch/version/version6/zen1.go index 781357d7dc..6bc4acb22f 100644 --- a/operators/pkg/controller/elasticsearch/version/version6/zen1.go +++ b/operators/pkg/controller/elasticsearch/version/version6/zen1.go @@ -66,7 +66,7 @@ func UpdateZen1Discovery( "minimum_master_nodes", minimumMasterNodes, ) for _, p := range allPods { - config, err := settings.GetESConfigContent(c, k8s.ExtractNamespacedName(&p)) + config, err := settings.GetESConfigContent(c, p.Namespace, p.Labels[label.StatefulSetNameLabelName]) if err != nil { return false, err } @@ -79,9 +79,10 @@ func UpdateZen1Discovery( if err != nil { return false, err } - if err := settings.ReconcileConfig(c, cluster, p, config); err != nil { - return false, err - } + // TODO: fix for sset + //if err := settings.ReconcileConfig(c, cluster, p, config); err != nil { + // return false, err + //} } // Update the current value for each new pod that is about to be created diff --git a/operators/pkg/controller/elasticsearch/version/version6/zen1_test.go b/operators/pkg/controller/elasticsearch/version/version6/zen1_test.go index 088b5f014e..c186756e19 100644 --- a/operators/pkg/controller/elasticsearch/version/version6/zen1_test.go +++ b/operators/pkg/controller/elasticsearch/version/version6/zen1_test.go @@ -52,12 +52,15 @@ func fakeEsClient(raiseError bool) client.Client { }) } -func newMasterPod(name, namespace string) corev1.Pod { +func newMasterPod(name, namespace, ssetName string) corev1.Pod { pod := corev1.Pod{ ObjectMeta: metav1.ObjectMeta{ Name: name, Namespace: namespace, - Labels: map[string]string{string(label.NodeTypesMasterLabelName): strconv.FormatBool(true)}, + Labels: map[string]string{ + string(label.NodeTypesMasterLabelName): strconv.FormatBool(true), + label.StatefulSetNameLabelName: ssetName, + }, }, Status: corev1.PodStatus{ Conditions: []corev1.PodCondition{ @@ -75,11 +78,11 @@ func newMasterPod(name, namespace string) corev1.Pod { return pod } -func podConfig(podName, namespace string) *corev1.Secret { +func ssetConfig(namespace, ssetName string) *corev1.Secret { return &corev1.Secret{ ObjectMeta: metav1.ObjectMeta{ Namespace: namespace, - Name: settings.ConfigSecretName(podName), + Name: settings.ConfigSecretName(ssetName), }, Data: map[string][]byte{ settings.ConfigFileName: []byte("a: b\nc: d\n"), @@ -102,6 +105,7 @@ func TestUpdateZen1Discovery(t *testing.T) { Name: "es1", }, } + ssetName := "master-nodes" type args struct { cluster v1alpha1.Elasticsearch c k8s.Client @@ -121,12 +125,12 @@ func TestUpdateZen1Discovery(t *testing.T) { name: "Update a one master node cluster", args: args{ esClient: fakeEsClient(true), // second master is not created, raise an error if API is called - c: k8s.WrapClient(fake.NewFakeClientWithScheme(s, podConfig("master1", "ns1"))), + c: k8s.WrapClient(fake.NewFakeClientWithScheme(s, ssetConfig("ns1", ssetName))), performableChanges: &mutation.PerformableChanges{ Changes: mutation.Changes{ ToCreate: []mutation.PodToCreate{ { - Pod: newMasterPod("master2", "ns1"), + Pod: newMasterPod("master2", "ns1", ssetName), PodSpecCtx: pod.PodSpecContext{ Config: settings.CanonicalConfig{CanonicalConfig: common.NewCanonicalConfig()}, }, @@ -135,7 +139,7 @@ func TestUpdateZen1Discovery(t *testing.T) { }, }, allPods: []corev1.Pod{ - newMasterPod("master1", "ns1"), + newMasterPod("master1", "ns1", ssetName), }, state: reconcile.NewState(cluster), }, @@ -149,17 +153,14 @@ func TestUpdateZen1Discovery(t *testing.T) { esClient: fakeEsClient(false), // a majority of master is available, call the API c: k8s.WrapClient(fake.NewFakeClientWithScheme( s, - podConfig("master1", "ns1"), - podConfig("master2", "ns1"), - podConfig("master3", "ns1"), - podConfig("master4", "ns1"), + ssetConfig("ns1", ssetName), ), ), performableChanges: &mutation.PerformableChanges{ Changes: mutation.Changes{ ToCreate: []mutation.PodToCreate{ { - Pod: newMasterPod("master5", "ns1"), + Pod: newMasterPod("master5", "ns1", ssetName), PodSpecCtx: pod.PodSpecContext{ Config: settings.CanonicalConfig{CanonicalConfig: common.NewCanonicalConfig()}, }, @@ -168,10 +169,10 @@ func TestUpdateZen1Discovery(t *testing.T) { }, }, allPods: []corev1.Pod{ - newMasterPod("master1", "ns1"), - newMasterPod("master2", "ns1"), - newMasterPod("master3", "ns1"), - newMasterPod("master4", "ns1"), + newMasterPod("master1", "ns1", ssetName), + newMasterPod("master2", "ns1", ssetName), + newMasterPod("master3", "ns1", ssetName), + newMasterPod("master4", "ns1", ssetName), }, state: reconcile.NewState(cluster), }, diff --git a/operators/pkg/controller/license/license_controller_integration_test.go b/operators/pkg/controller/license/license_controller_integration_test.go index d7f345245b..93b6594040 100644 --- a/operators/pkg/controller/license/license_controller_integration_test.go +++ b/operators/pkg/controller/license/license_controller_integration_test.go @@ -100,6 +100,7 @@ func TestReconcile(t *testing.T) { SetVMMaxMapCount: &varFalse, Nodes: []v1alpha1.NodeSpec{ { + Name: "all", NodeCount: 3, }, }, From a15245a432580698eed28878e5b87bc801b993c1 Mon Sep 17 00:00:00 2001 From: Sebastien Guilloux Date: Wed, 10 Jul 2019 15:50:35 +0200 Subject: [PATCH 03/31] Handle StatefulSets scale up/down/replacement (#1218) Add support for scaling a StatefulSet up (just update sset replicas) or down (migrate data away then remove nodes when ready). Which also adds support for renaming a StatefulSet, by creating the new sset, then slowly remove the existing one with data migration. It ignores any zen1/zen2 consideration for now (works fine with zen2 in most cases). It ignores any change budget consideration for now. Unit tests and E2E tests are missing, but a refactoring is to be expected to handle zen2/zen2 and the changeBudget. I'd prefer waiting for this refactoring to happen before dealing with large unit testing. Consider this as still work in progress. The PR also modifies the way pods and ssets are watched by the Elasticsearch controller. --- .../elasticsearch_v1alpha1_elasticsearch.yaml | 2 +- .../config/samples/apm/apm_es_kibana.yaml | 2 +- .../samples/elasticsearch/elasticsearch.yaml | 2 +- .../elasticsearch_local_volume.yaml | 2 +- .../config/samples/kibana/kibana_es.yaml | 2 +- .../v1alpha1/elasticsearch_types.go | 2 +- .../elasticsearch/driver/default.go | 229 +++++++++++++----- .../elasticsearch/elasticsearch_controller.go | 72 +++--- .../elasticsearch/migration/migrate_data.go | 12 +- .../migration/migrate_data_test.go | 54 ++++- .../elasticsearch/nodespec/resources.go | 5 +- .../controller/elasticsearch/sset/getter.go | 17 ++ .../pkg/controller/elasticsearch/sset/pod.go | 11 + 13 files changed, 301 insertions(+), 111 deletions(-) create mode 100644 operators/pkg/controller/elasticsearch/sset/getter.go create mode 100644 operators/pkg/controller/elasticsearch/sset/pod.go diff --git a/operators/config/crds/elasticsearch_v1alpha1_elasticsearch.yaml b/operators/config/crds/elasticsearch_v1alpha1_elasticsearch.yaml index 14c27efa02..70c9306d9a 100644 --- a/operators/config/crds/elasticsearch_v1alpha1_elasticsearch.yaml +++ b/operators/config/crds/elasticsearch_v1alpha1_elasticsearch.yaml @@ -122,7 +122,7 @@ spec: name: description: Name is a logical name for this set of nodes. Used as a part of the managed Elasticsearch node.name setting. - maxLength: 12 + maxLength: 19 pattern: '[a-zA-Z0-9-]+' type: string nodeCount: diff --git a/operators/config/samples/apm/apm_es_kibana.yaml b/operators/config/samples/apm/apm_es_kibana.yaml index 371cb4ea15..d0a12c0557 100644 --- a/operators/config/samples/apm/apm_es_kibana.yaml +++ b/operators/config/samples/apm/apm_es_kibana.yaml @@ -7,7 +7,7 @@ metadata: spec: version: "7.1.0" nodes: - - name: all + - name: default nodeCount: 3 --- apiVersion: apm.k8s.elastic.co/v1alpha1 diff --git a/operators/config/samples/elasticsearch/elasticsearch.yaml b/operators/config/samples/elasticsearch/elasticsearch.yaml index 457197d319..2264698d39 100644 --- a/operators/config/samples/elasticsearch/elasticsearch.yaml +++ b/operators/config/samples/elasticsearch/elasticsearch.yaml @@ -6,7 +6,7 @@ metadata: spec: version: "7.1.0" nodes: - - name: all + - name: default config: # most Elasticsearch configuration parameters are possible to set, e.g: node.attr.attr_name: attr_value diff --git a/operators/config/samples/elasticsearch/elasticsearch_local_volume.yaml b/operators/config/samples/elasticsearch/elasticsearch_local_volume.yaml index b16f17010f..6ed411ac9d 100644 --- a/operators/config/samples/elasticsearch/elasticsearch_local_volume.yaml +++ b/operators/config/samples/elasticsearch/elasticsearch_local_volume.yaml @@ -7,7 +7,7 @@ metadata: spec: version: "7.1.0" nodes: - - name: all + - name: default nodeCount: 3 volumeClaimTemplates: - metadata: diff --git a/operators/config/samples/kibana/kibana_es.yaml b/operators/config/samples/kibana/kibana_es.yaml index db0f8d5498..44caaf58dd 100644 --- a/operators/config/samples/kibana/kibana_es.yaml +++ b/operators/config/samples/kibana/kibana_es.yaml @@ -6,7 +6,7 @@ metadata: spec: version: "7.1.0" nodes: - - name: all + - name: default nodeCount: 1 --- apiVersion: kibana.k8s.elastic.co/v1alpha1 diff --git a/operators/pkg/apis/elasticsearch/v1alpha1/elasticsearch_types.go b/operators/pkg/apis/elasticsearch/v1alpha1/elasticsearch_types.go index 7ff6c3e6d1..913f53510e 100644 --- a/operators/pkg/apis/elasticsearch/v1alpha1/elasticsearch_types.go +++ b/operators/pkg/apis/elasticsearch/v1alpha1/elasticsearch_types.go @@ -66,7 +66,7 @@ func (es ElasticsearchSpec) NodeCount() int32 { type NodeSpec struct { // Name is a logical name for this set of nodes. Used as a part of the managed Elasticsearch node.name setting. // +kubebuilder:validation:Pattern=[a-zA-Z0-9-]+ - // +kubebuilder:validation:MaxLength=12 + // +kubebuilder:validation:MaxLength=19 Name string `json:"name"` // Config represents Elasticsearch configuration. diff --git a/operators/pkg/controller/elasticsearch/driver/default.go b/operators/pkg/controller/elasticsearch/driver/default.go index 15f076de28..b162617920 100644 --- a/operators/pkg/controller/elasticsearch/driver/default.go +++ b/operators/pkg/controller/elasticsearch/driver/default.go @@ -8,11 +8,15 @@ import ( "crypto/x509" "fmt" + appsv1 "k8s.io/api/apps/v1" corev1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/runtime" "k8s.io/apimachinery/pkg/types" controller "sigs.k8s.io/controller-runtime/pkg/reconcile" + "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/migration" + esvolume "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/volume" + "github.com/elastic/cloud-on-k8s/operators/pkg/apis/elasticsearch/v1alpha1" "github.com/elastic/cloud-on-k8s/operators/pkg/controller/common" "github.com/elastic/cloud-on-k8s/operators/pkg/controller/common/events" @@ -39,7 +43,6 @@ import ( "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/user" esversion "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/version" "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/version/version6" - esvolume "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/volume" "github.com/elastic/cloud-on-k8s/operators/pkg/utils/k8s" ) @@ -212,7 +215,6 @@ func (d *defaultDriver) Reconcile( return results.WithError(err) } - namespacedName := k8s.ExtractNamespacedName(&es) // //// There might be some ongoing creations and deletions our k8s client cache //// hasn't seen yet. In such case, requeue until we are in-sync. @@ -323,68 +325,11 @@ func (d *defaultDriver) Reconcile( ) } - actualStatefulSets, err := sset.RetrieveActualStatefulSets(d.Client, namespacedName) - if err != nil { - return results.WithError(err) - } - - nodeSpecResources, err := nodespec.BuildExpectedResources(es, podTemplateSpecBuilder) - if err != nil { - return results.WithError(err) - } - - // TODO: handle zen2 initial master nodes more cleanly - // should be empty once cluster is bootstraped - var initialMasters []string - // TODO: refactor/move - for _, res := range nodeSpecResources { - cfg, err := res.Config.Unpack() - if err != nil { - return results.WithError(err) - } - if cfg.Node.Master { - for i := 0; i < int(*res.StatefulSet.Spec.Replicas); i++ { - initialMasters = append(initialMasters, fmt.Sprintf("%s-%d", res.StatefulSet.Name, i)) - } - } - } - for i := range nodeSpecResources { - if err := nodeSpecResources[i].Config.SetStrings(settings.ClusterInitialMasterNodes, initialMasters...); err != nil { - return results.WithError(err) - } - } - - // create or update all expected ssets - // TODO: safe upgrades - for _, nodeSpec := range nodeSpecResources { - if err := settings.ReconcileConfig(d.Client, es, nodeSpec.StatefulSet.Name, nodeSpec.Config); err != nil { - return results.WithError(err) - } - if _, err := common.ReconcileService(d.Client, d.Scheme, &nodeSpec.HeadlessService, &es); err != nil { - return results.WithError(err) - } - if err := sset.ReconcileStatefulSet(d.Client, d.Scheme, es, nodeSpec.StatefulSet); err != nil { - return results.WithError(err) - } - } - - // delete all unexpected ssets - for _, actual := range actualStatefulSets { - if _, shouldExist := nodeSpecResources.StatefulSets().GetByName(actual.Name); !shouldExist { - // TODO: safe node removal - if err := d.Client.Delete(&actual); err != nil { - return results.WithError(err) - } - } + res = d.reconcileNodeSpecs(es, podTemplateSpecBuilder, esClient, observedState) + if results.WithResults(res).HasError() { + return results } - // TODO: - // - safe sset replacement - // - safe node removal (data migration) - // - safe node upgrade (rollingUpdate.Partition + shards allocation) - // - change budget - // - zen1, zen2 - // //// Call Zen1 setting updater before new masters are created to ensure that they immediately start with the //// correct value for minimum_master_nodes. @@ -568,6 +513,166 @@ func removePodFromList(pods []corev1.Pod, pod corev1.Pod) []corev1.Pod { return pods } +func (d *defaultDriver) reconcileNodeSpecs( + es v1alpha1.Elasticsearch, + podSpecBuilder esversion.PodTemplateSpecBuilder, + esClient esclient.Client, + observedState observer.State, +) *reconciler.Results { + results := &reconciler.Results{} + + actualStatefulSets, err := sset.RetrieveActualStatefulSets(d.Client, k8s.ExtractNamespacedName(&es)) + if err != nil { + return results.WithError(err) + } + + nodeSpecResources, err := nodespec.BuildExpectedResources(es, podSpecBuilder) + if err != nil { + return results.WithError(err) + } + + // TODO: handle zen2 initial master nodes more cleanly + // should be empty once cluster is bootstraped + var initialMasters []string + // TODO: refactor/move + for _, res := range nodeSpecResources { + cfg, err := res.Config.Unpack() + if err != nil { + return results.WithError(err) + } + if cfg.Node.Master { + for i := 0; i < int(*res.StatefulSet.Spec.Replicas); i++ { + initialMasters = append(initialMasters, fmt.Sprintf("%s-%d", res.StatefulSet.Name, i)) + } + } + } + for i := range nodeSpecResources { + if err := nodeSpecResources[i].Config.SetStrings(settings.ClusterInitialMasterNodes, initialMasters...); err != nil { + return results.WithError(err) + } + } + + // Phase 1: apply expected StatefulSets resources, but don't scale down. + // The goal is to: + // 1. scale sset up (eg. go from 3 to 5 replicas). + // 2. apply configuration changes on the sset resource, to be used for future pods creation/recreation, + // but do not rotate pods yet. + // 3. do **not** apply replicas scale down, otherwise nodes would be deleted before + // we handle a clean deletion. + for _, nodeSpecRes := range nodeSpecResources { + // always reconcile config (will apply to new & recreated pods) + if err := settings.ReconcileConfig(d.Client, es, nodeSpecRes.StatefulSet.Name, nodeSpecRes.Config); err != nil { + return results.WithError(err) + } + if _, err := common.ReconcileService(d.Client, d.Scheme, &nodeSpecRes.HeadlessService, &es); err != nil { + return results.WithError(err) + } + ssetToApply := *nodeSpecRes.StatefulSet.DeepCopy() + actual, exists := actualStatefulSets.GetByName(ssetToApply.Name) + if exists && sset.Replicas(ssetToApply) < sset.Replicas(actual) { + // sset needs to be scaled down + // update the sset to use the new spec but don't scale replicas down for now + ssetToApply.Spec.Replicas = actual.Spec.Replicas + } + if err := sset.ReconcileStatefulSet(d.Client, d.Scheme, es, ssetToApply); err != nil { + return results.WithError(err) + } + } + + // Phase 2: handle sset scale down. + // We want to safely remove nodes from the cluster, either because the sset requires less replicas, + // or because it should be removed entirely. + for i, actual := range actualStatefulSets { + expected, shouldExist := nodeSpecResources.StatefulSets().GetByName(actual.Name) + switch { + // stateful set removal + case !shouldExist: + target := int32(0) + removalResult := d.scaleStatefulSetDown(&actualStatefulSets[i], target, esClient, observedState) + results.WithResults(removalResult) + if removalResult.HasError() { + return results + } + // stateful set downscale + case actual.Spec.Replicas != nil && sset.Replicas(expected) < sset.Replicas(actual): + target := sset.Replicas(expected) + downscaleResult := d.scaleStatefulSetDown(&actualStatefulSets[i], target, esClient, observedState) + if downscaleResult.HasError() { + return results + } + } + } + + // TODO: + // - safe node upgrade (rollingUpdate.Partition + shards allocation) + // - change budget + // - zen1, zen2 + return results +} + +func (d *defaultDriver) scaleStatefulSetDown( + statefulSet *appsv1.StatefulSet, + targetReplicas int32, + esClient esclient.Client, + observedState observer.State, +) *reconciler.Results { + results := &reconciler.Results{} + logger := log.WithValues("statefulset", k8s.ExtractNamespacedName(statefulSet)) + + if sset.Replicas(*statefulSet) == 0 && targetReplicas == 0 { + // we don't expect any new replicas in this statefulset, remove it + logger.Info("Deleting statefulset") + if err := d.Client.Delete(statefulSet); err != nil { + return results.WithError(err) + } + } + // copy the current replicas, to be decremented with nodes to remove + initialReplicas := sset.Replicas(*statefulSet) + updatedReplicas := initialReplicas + + // leaving nodes names can be built from StatefulSet name and ordinals + // nodes are ordered by highest ordinal first + var leavingNodes []string + for i := initialReplicas - 1; i > targetReplicas-1; i-- { + leavingNodes = append(leavingNodes, sset.PodName(statefulSet.Name, int(i))) + } + + // TODO: don't remove last master/last data nodes? + // TODO: detect cases where data migration cannot happen since no nodes to host shards? + + // migrate data away from these nodes before removing them + logger.V(1).Info("Migrating data away from nodes", "nodes", leavingNodes) + if err := migration.MigrateData(esClient, leavingNodes); err != nil { + return results.WithError(err) + } + + for _, node := range leavingNodes { + if migration.IsMigratingData(observedState, node, leavingNodes) { + // data migration not over yet: schedule a requeue + logger.V(1).Info("Data migration not over yet, skipping node deletion", "node", node) + results.WithResult(defaultRequeue) + // no need to check other nodes since we remove them in order and this one isn't ready anyway + break + } + // data migration over: allow pod to be removed + updatedReplicas-- + } + + if updatedReplicas != initialReplicas { + // update cluster coordination settings to account for nodes deletion + // TODO: update zen1/zen2 + + // trigger deletion of nodes whose data migration is over + logger.V(1).Info("Scaling replicas down", "from", initialReplicas, "to", updatedReplicas) + statefulSet.Spec.Replicas = &updatedReplicas + if err := d.Client.Update(statefulSet); err != nil { + return results.WithError(err) + } + } + + return nil +} + // //// calculateChanges calculates the changes we'd need to perform to go from the current cluster configuration to the //// desired one. diff --git a/operators/pkg/controller/elasticsearch/elasticsearch_controller.go b/operators/pkg/controller/elasticsearch/elasticsearch_controller.go index 40b2589f3c..a863144ff6 100644 --- a/operators/pkg/controller/elasticsearch/elasticsearch_controller.go +++ b/operators/pkg/controller/elasticsearch/elasticsearch_controller.go @@ -9,6 +9,19 @@ import ( "sync/atomic" "time" + appsv1 "k8s.io/api/apps/v1" + corev1 "k8s.io/api/core/v1" + apierrors "k8s.io/apimachinery/pkg/api/errors" + "k8s.io/apimachinery/pkg/runtime" + "k8s.io/apimachinery/pkg/types" + "k8s.io/client-go/tools/record" + "sigs.k8s.io/controller-runtime/pkg/controller" + "sigs.k8s.io/controller-runtime/pkg/handler" + "sigs.k8s.io/controller-runtime/pkg/manager" + "sigs.k8s.io/controller-runtime/pkg/reconcile" + logf "sigs.k8s.io/controller-runtime/pkg/runtime/log" + "sigs.k8s.io/controller-runtime/pkg/source" + elasticsearchv1alpha1 "github.com/elastic/cloud-on-k8s/operators/pkg/apis/elasticsearch/v1alpha1" "github.com/elastic/cloud-on-k8s/operators/pkg/controller/common" "github.com/elastic/cloud-on-k8s/operators/pkg/controller/common/certificates/http" @@ -25,16 +38,6 @@ import ( "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/settings" "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/validation" "github.com/elastic/cloud-on-k8s/operators/pkg/utils/k8s" - corev1 "k8s.io/api/core/v1" - apierrors "k8s.io/apimachinery/pkg/api/errors" - "k8s.io/apimachinery/pkg/runtime" - "k8s.io/client-go/tools/record" - "sigs.k8s.io/controller-runtime/pkg/controller" - "sigs.k8s.io/controller-runtime/pkg/handler" - "sigs.k8s.io/controller-runtime/pkg/manager" - "sigs.k8s.io/controller-runtime/pkg/reconcile" - logf "sigs.k8s.io/controller-runtime/pkg/runtime/log" - "sigs.k8s.io/controller-runtime/pkg/source" ) const name = "elasticsearch-controller" @@ -87,29 +90,36 @@ func addWatches(c controller.Controller, r *ReconcileElasticsearch) error { return err } - // Watch pods - if err := c.Watch(&source.Kind{Type: &corev1.Pod{}}, r.dynamicWatches.Pods); err != nil { + // Watch StatefulSets + if err := c.Watch( + &source.Kind{Type: &appsv1.StatefulSet{}}, &handler.EnqueueRequestForOwner{ + IsController: true, + OwnerType: &elasticsearchv1alpha1.Elasticsearch{}, + }, + ); err != nil { return err } - if err := r.dynamicWatches.Pods.AddHandlers( - // trigger reconciliation loop on ES pods owned by this controller - &watches.OwnerWatch{ - EnqueueRequestForOwner: handler.EnqueueRequestForOwner{ - IsController: true, - OwnerType: &elasticsearchv1alpha1.Elasticsearch{}, - }, - }, - // Reconcile pods expectations. - // This does not technically need to be part of a dynamic watch, since it will - // stay there forever (nothing dynamic here). - // Turns out our dynamic watch mechanism happens to be a pretty nice way to - // setup multiple "static" handlers for a single watch. - watches.NewExpectationsWatch( - "pods-expectations", - r.podsExpectations, - // retrieve cluster name from pod labels - label.ClusterFromResourceLabels, - )); err != nil { + + // Watch pods belonging to ES clusters + if err := c.Watch(&source.Kind{Type: &corev1.Pod{}}, + &handler.EnqueueRequestsFromMapFunc{ + ToRequests: handler.ToRequestsFunc( + func(object handler.MapObject) []reconcile.Request { + labels := object.Meta.GetLabels() + clusterName, isSet := labels[label.ClusterNameLabelName] + if !isSet { + return nil + } + return []reconcile.Request{ + { + NamespacedName: types.NamespacedName{ + Namespace: object.Meta.GetNamespace(), + Name: clusterName, + }, + }, + } + }), + }); err != nil { return err } diff --git a/operators/pkg/controller/elasticsearch/migration/migrate_data.go b/operators/pkg/controller/elasticsearch/migration/migrate_data.go index 866e677e41..885c5a84e0 100644 --- a/operators/pkg/controller/elasticsearch/migration/migrate_data.go +++ b/operators/pkg/controller/elasticsearch/migration/migrate_data.go @@ -8,8 +8,6 @@ import ( "context" "strings" - corev1 "k8s.io/api/core/v1" - "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/client" "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/observer" ) @@ -66,16 +64,16 @@ func nodeIsMigratingData(nodeName string, shards []client.Shard, exclusions map[ // IsMigratingData looks only at the presence of shards on a given node // and checks if there is at least one other copy of the shard in the cluster // that is started and not relocating. -func IsMigratingData(state observer.State, pod corev1.Pod, exclusions []corev1.Pod) bool { +func IsMigratingData(state observer.State, podName string, exclusions []string) bool { clusterState := state.ClusterState - if clusterState.IsEmpty() { + if clusterState == nil || clusterState.IsEmpty() { return true // we don't know if the request timed out or the cluster has not formed yet } excludedNodes := make(map[string]struct{}, len(exclusions)) - for _, n := range exclusions { - excludedNodes[n.Name] = struct{}{} + for _, name := range exclusions { + excludedNodes[name] = struct{}{} } - return nodeIsMigratingData(pod.Name, clusterState.GetShards(), excludedNodes) + return nodeIsMigratingData(podName, clusterState.GetShards(), excludedNodes) } // AllocationSettings captures Elasticsearch API calls around allocation filtering. diff --git a/operators/pkg/controller/elasticsearch/migration/migrate_data_test.go b/operators/pkg/controller/elasticsearch/migration/migrate_data_test.go index a02c627a06..b8f18aaec1 100644 --- a/operators/pkg/controller/elasticsearch/migration/migrate_data_test.go +++ b/operators/pkg/controller/elasticsearch/migration/migrate_data_test.go @@ -8,10 +8,10 @@ import ( "context" "testing" + "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/client" + "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/observer" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" - - "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/client" ) func TestEnoughRedundancy(t *testing.T) { @@ -128,3 +128,53 @@ func TestMigrateData(t *testing.T) { assert.Contains(t, esClient.getAndReset(), tt.want) } } + +func TestIsMigratingData(t *testing.T) { + type args struct { + state observer.State + podName string + exclusions []string + } + tests := []struct { + name string + args args + want bool + }{ + { + name: "cluster state is nil", + args: args{ + state: observer.State{ClusterState: nil}, + podName: "pod", + exclusions: nil, + }, + want: true, + }, + { + name: "cluster state is empty", + args: args{ + state: observer.State{ClusterState: &client.ClusterState{}}, + podName: "pod", + exclusions: nil, + }, + want: true, + }, + { + name: "no data migration in progress", + args: args{ + state: observer.State{ClusterState: &client.ClusterState{ + ClusterName: "name", + }}, + podName: "pod", + exclusions: nil, + }, + want: false, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + if got := IsMigratingData(tt.args.state, tt.args.podName, tt.args.exclusions); got != tt.want { + t.Errorf("IsMigratingData() = %v, want %v", got, tt.want) + } + }) + } +} diff --git a/operators/pkg/controller/elasticsearch/nodespec/resources.go b/operators/pkg/controller/elasticsearch/nodespec/resources.go index 112afdc81d..847e98b73e 100644 --- a/operators/pkg/controller/elasticsearch/nodespec/resources.go +++ b/operators/pkg/controller/elasticsearch/nodespec/resources.go @@ -21,15 +21,14 @@ type Resources struct { StatefulSet appsv1.StatefulSet HeadlessService corev1.Service Config settings.CanonicalConfig - // TLS certs } type ResourcesList []Resources func (l ResourcesList) StatefulSets() sset.StatefulSetList { ssetList := make(sset.StatefulSetList, 0, len(l)) - for _, nodeSpec := range l { - ssetList = append(ssetList, nodeSpec.StatefulSet) + for _, resource := range l { + ssetList = append(ssetList, resource.StatefulSet) } return ssetList } diff --git a/operators/pkg/controller/elasticsearch/sset/getter.go b/operators/pkg/controller/elasticsearch/sset/getter.go new file mode 100644 index 0000000000..6771adb7e0 --- /dev/null +++ b/operators/pkg/controller/elasticsearch/sset/getter.go @@ -0,0 +1,17 @@ +// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +// or more contributor license agreements. Licensed under the Elastic License; +// you may not use this file except in compliance with the Elastic License. + +package sset + +import ( + appsv1 "k8s.io/api/apps/v1" +) + +// Replicas returns the replicas configured for this StatefulSet, or 0 if nil. +func Replicas(statefulSet appsv1.StatefulSet) int32 { + if statefulSet.Spec.Replicas != nil { + return *statefulSet.Spec.Replicas + } + return 0 +} diff --git a/operators/pkg/controller/elasticsearch/sset/pod.go b/operators/pkg/controller/elasticsearch/sset/pod.go new file mode 100644 index 0000000000..9af5d8074f --- /dev/null +++ b/operators/pkg/controller/elasticsearch/sset/pod.go @@ -0,0 +1,11 @@ +// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +// or more contributor license agreements. Licensed under the Elastic License; +// you may not use this file except in compliance with the Elastic License. + +package sset + +import "fmt" + +func PodName(ssetName string, ordinal int) string { + return fmt.Sprintf("%s-%d", ssetName, ordinal) +} From 0546f81fa5b0ebaeba0e3adea87bea3572950d77 Mon Sep 17 00:00:00 2001 From: Sebastien Guilloux Date: Fri, 12 Jul 2019 16:11:01 +0200 Subject: [PATCH 04/31] Merge master branch into statefulset-refactoring (#1232) * Support for APM server configuration (#1181) * Add a config section to the APM server configuration * APM: Add support for keystore * Factorize ElasticsearchAuthSettings * Update dev setup doc + fix GKE bootstrap script (#1203) * Update dev setup doc + fix GKE bootstrap script * Update wording of container registry authentication * Ensure disks removal after removing cluster in GKE (#1163) * Update gke-cluster.sh * Implement cleanup for unused disks in GCP * Update Makefile * Update CI jobs to do proper cleanup * Normalize the raw config when creating canonical configs (#1208) This aims at counteracting the difference between JSON centric serialization and the use of YAML as the serialization format in canonical config. If not normalizing numeric values like 1 will differ when comparing configs as JSON deserializes integer numbers to float64 and YAML to uint64. * Homogenize logs (#1168) * Don't run tests if only docs are changed (#1216) * Update Jenkinsfile * Simplify notOnlyDocs() * Update Jenkinsfile * Push snapshot ECK release on successful PR build (#1184) * Update makefile's to support snapshots * Add snapshot releases to Jenkins pipelines * Cleanup * Rename RELEASE to USE_ELASTIC_DOCKER_REGISTRY * Update Jenkinsfile * Add a note on EKS inbound traffic & validating webhook (#1211) EKS users must explicitly enable communication from the k8s control plane and nodes port 443 in order for the control plane to reach the validating webhook. Should help with https://github.com/elastic/cloud-on-k8s/issues/896. * Update PodSpec with Hostname from PVC when re-using (#1204) * Bind the Debug HTTP server to localhost by default (#1220) * Run e2e tests against custom Docker image (#1135) * Add implementation * Update makefile's * Update Makefile * Rename Jenkisnfile * Fix review comments * Update e2e-custom.yml * Update e2e-custom.yml * Return deploy-all-in-one to normal * Delete GKE cluster only if changes not in docs (#1223) * Add operator version to resources (#1224) * Warn if unsupported distribution (#1228) The operator only works with the official ES distributions to enable the security available with the basic (free), gold and platinum licenses in order to ensure that all clusters launched are secured by default. A check is done in the prepare-fs script by looking at the existence of the Elastic License. If not present, the script exit with a custom exit code. Then the ES reconcilation loop sends an event of type warning if it detects that a prepare-fs init container terminated with this exit code. * Document Elasticsearch update strategy change budget & groups (#1210) Add documentation for the `updateStrategy` section of the Elasticsearch spec. It documents how (and why) `changeBudget` and `groups` are used by ECK, and how both settings can be specified by the user. --- .ci/jobs/e2e-custom.yml | 30 +++ build/ci/Makefile | 38 ++- build/ci/delete_unused_disks.py | 25 ++ build/ci/e2e/GKE_k8s_versions.jenkinsfile | 2 +- build/ci/e2e/Jenkinsfile | 2 +- .../ci/e2e/custom_operator_image.jenkinsfile | 53 ++++ build/ci/pr/Jenkinsfile | 39 ++- build/ci/release/Jenkinsfile | 1 + docs/elasticsearch-spec.asciidoc | 138 ++++++++++ docs/k8s-quickstart.asciidoc | 2 + operators/Makefile | 25 +- operators/cmd/manager/main.go | 8 +- .../config/crds/apm_v1alpha1_apmserver.yaml | 20 ++ .../elasticsearch_v1alpha1_elasticsearch.yaml | 4 + .../config/crds/kibana_v1alpha1_kibana.yaml | 6 + operators/dev-setup.md | 9 +- operators/hack/gke-cluster.sh | 9 +- .../pkg/apis/apm/v1alpha1/apmserver_types.go | 48 ++-- .../apm/v1alpha1/zz_generated.deepcopy.go | 52 +--- .../pkg/apis/common/v1alpha1/association.go | 16 ++ .../apis/common/v1alpha1/authentication.go | 28 ++ .../common/v1alpha1/zz_generated.deepcopy.go | 46 ++++ .../v1alpha1/elasticsearch_types.go | 2 + .../pkg/apis/kibana/v1alpha1/kibana_types.go | 37 +-- .../apis/kibana/v1alpha1/kibana_types_test.go | 12 +- .../kibana/v1alpha1/zz_generated.deepcopy.go | 43 --- .../apmserver/apmserver_controller.go | 229 +++++++++------- .../pkg/controller/apmserver/config/config.go | 248 ++++++------------ .../controller/apmserver/config/reconcile.go | 81 ++++++ .../pkg/controller/apmserver/deployment.go | 14 - .../apmserver/deployment_control.go | 7 +- .../apmserver/{ => labels}/labels.go | 2 +- .../pkg/controller/apmserver/name/name.go | 43 +++ operators/pkg/controller/apmserver/pod.go | 36 ++- .../pkg/controller/apmserver/pod_test.go | 17 +- .../pkg/controller/apmserver/services.go | 7 +- operators/pkg/controller/apmserver/state.go | 8 + ...rverelasticsearchassociation_controller.go | 12 +- .../apmserverelasticsearchassociation/user.go | 4 +- .../pkg/controller/common/annotation/pod.go | 10 +- .../common/association/association.go | 37 +++ .../association/association_test.go} | 42 ++- .../association/keystore}/initcontainer.go | 53 +++- .../common/association/keystore/resources.go | 64 +++++ .../association/keystore/resources_test.go | 154 +++++++++++ .../association/keystore}/user_secret.go | 63 +++-- .../association/keystore}/user_secret_test.go | 9 +- .../common/association/keystore/volumes.go | 30 +++ .../common/certificates/ca_reconcile.go | 20 +- .../common/certificates/http/reconcile.go | 24 +- .../common/certificates/x509_othername.go | 3 +- .../controller/common/finalizer/handler.go | 13 +- .../common/reconciler/reconciler.go | 4 +- .../controller/common/reconciler/results.go | 7 +- .../common/settings/canonical_config.go | 14 +- .../common/settings/canonical_config_test.go | 39 +++ .../common/watches/expectations_watch.go | 4 +- .../pkg/controller/common/watches/handler.go | 4 +- .../controller/common/watches/named_watch.go | 10 +- .../certificates/transport/reconcile.go | 6 +- .../elasticsearch/driver/default.go | 21 +- .../elasticsearch/elasticsearch_controller.go | 14 +- .../initcontainer/initcontainer.go | 2 +- .../elasticsearch/initcontainer/prepare_fs.go | 2 +- .../initcontainer/prepare_fs_script.go | 13 +- .../elasticsearch/keystore/updater.go | 2 +- .../elasticsearch/mutation/change_group.go | 18 +- .../elasticsearch/mutation/performable.go | 2 +- .../elasticsearch/observer/manager.go | 2 +- .../elasticsearch/observer/observer.go | 6 +- .../elasticsearch/observer/state.go | 13 +- .../elasticsearch/processmanager/process.go | 2 +- .../elasticsearch/processmanager/state.go | 1 + .../elasticsearch/reconcile/state.go | 10 + .../elasticsearch/restart/annotations.go | 6 +- .../elasticsearch/restart/coordinated.go | 8 +- .../elasticsearch/restart/process_manager.go | 12 +- .../elasticsearch/restart/restart.go | 4 +- .../elasticsearch/settings/masters.go | 4 +- .../elasticsearch/settings/secure_settings.go | 2 +- .../elasticsearch/version/version6/zen1.go | 9 +- .../pkg/controller/kibana/config/settings.go | 32 +-- .../controller/kibana/config/settings_test.go | 2 + operators/pkg/controller/kibana/driver.go | 28 +- .../pkg/controller/kibana/driver_test.go | 2 +- .../controller/kibana/kibana_controller.go | 18 +- operators/pkg/controller/kibana/name/name.go | 7 +- operators/pkg/controller/kibana/pod/pod.go | 20 +- .../pkg/controller/kibana/pod/pod_test.go | 27 +- .../kibana/securesettings/securesettings.go | 40 --- .../securesettings/securesettings_test.go | 101 ------- operators/pkg/controller/kibana/state.go | 10 + .../pkg/controller/kibana/volume/volumes.go | 3 - .../association_controller.go | 20 +- .../controller/license/license_controller.go | 8 +- .../license/trial/trial_controller.go | 4 +- .../pkg/dev/portforward/pod_forwarder.go | 5 +- operators/test/e2e/apm/configuration_test.go | 210 +++++++++++++++ operators/test/e2e/kb/keystore_test.go | 12 +- operators/test/e2e/test/apmserver/builder.go | 32 +++ .../test/e2e/test/apmserver/checks_k8s.go | 4 +- .../test/e2e/test/elasticsearch/steps_init.go | 2 +- operators/test/e2e/test/k8s_client.go | 6 +- .../test/e2e/test/kibana/checks_keystore.go | 54 ---- operators/test/e2e/test/params.go | 2 +- operators/test/e2e/test/utils.go | 39 +++ 106 files changed, 1949 insertions(+), 905 deletions(-) create mode 100644 .ci/jobs/e2e-custom.yml create mode 100755 build/ci/delete_unused_disks.py create mode 100644 build/ci/e2e/custom_operator_image.jenkinsfile create mode 100644 operators/pkg/apis/common/v1alpha1/authentication.go create mode 100644 operators/pkg/controller/apmserver/config/reconcile.go delete mode 100644 operators/pkg/controller/apmserver/deployment.go rename operators/pkg/controller/apmserver/{ => labels}/labels.go (97%) create mode 100644 operators/pkg/controller/apmserver/name/name.go create mode 100644 operators/pkg/controller/common/association/association.go rename operators/pkg/controller/{apmserver/config/config_test.go => common/association/association_test.go} (64%) rename operators/pkg/controller/{kibana/securesettings => common/association/keystore}/initcontainer.go (50%) create mode 100644 operators/pkg/controller/common/association/keystore/resources.go create mode 100644 operators/pkg/controller/common/association/keystore/resources_test.go rename operators/pkg/controller/{kibana/securesettings => common/association/keystore}/user_secret.go (59%) rename operators/pkg/controller/{kibana/securesettings => common/association/keystore}/user_secret_test.go (95%) create mode 100644 operators/pkg/controller/common/association/keystore/volumes.go delete mode 100644 operators/pkg/controller/kibana/securesettings/securesettings.go delete mode 100644 operators/pkg/controller/kibana/securesettings/securesettings_test.go create mode 100644 operators/test/e2e/apm/configuration_test.go delete mode 100644 operators/test/e2e/test/kibana/checks_keystore.go diff --git a/.ci/jobs/e2e-custom.yml b/.ci/jobs/e2e-custom.yml new file mode 100644 index 0000000000..146f5c1b17 --- /dev/null +++ b/.ci/jobs/e2e-custom.yml @@ -0,0 +1,30 @@ +--- +- job: + description: Job that runs e2e tests against custom ECK image running in a dedicated k8s cluster in GKE. This Job is managed by JJB. + logrotate: + daysToKeep: 7 + numToKeep: 100 + artifactDaysToKeep: 5 + artifactNumToKeep: 10 + name: cloud-on-k8s-e2e-tests-custom + project-type: pipeline + parameters: + - string: + name: IMAGE + description: "Docker image with ECK" + - string: + name: VERSION + default: 1.12 + description: "Kubernetes version, default is 1.12" + concurrent: true + pipeline-scm: + scm: + - git: + url: https://github.com/elastic/cloud-on-k8s + branches: + - master + credentials-id: 'f6c7695a-671e-4f4f-a331-acdce44ff9ba' + script-path: build/ci/e2e/custom_operator_image.jenkinsfile + lightweight-checkout: true + wrappers: + - ansicolor diff --git a/build/ci/Makefile b/build/ci/Makefile index a0ec04ea30..c329b1a404 100644 --- a/build/ci/Makefile +++ b/build/ci/Makefile @@ -97,7 +97,8 @@ ci-release: vault-public-key vault-docker-creds -e "REPOSITORY=$(REPOSITORY)" \ -e "ELASTIC_DOCKER_LOGIN=$(DOCKER_LOGIN)" \ -e "ELASTIC_DOCKER_PASSWORD=$(shell cat $(DOCKER_CREDENTIALS_FILE))" \ - -e "RELEASE=true" \ + -e "USE_ELASTIC_DOCKER_REGISTRY=true" \ + -e "SNAPSHOT_RELEASE=$(SNAPSHOT_RELEASE)" \ cloud-on-k8s-ci-release \ bash -c "make -C operators ci-release" @@ -130,7 +131,26 @@ ci-e2e: vault-gke-creds -e "TESTS_MATCH=$(TESTS_MATCH)" \ -e "GKE_CLUSTER_VERSION=$(GKE_CLUSTER_VERSION)" \ cloud-on-k8s-ci-e2e \ - bash -c "make -C operators ci-e2e GKE_MACHINE_TYPE=n1-standard-8" + bash -c "make -C operators ci-e2e" + +# Run e2e tests in GKE against provided ECK image +ci-e2e-rc: vault-gke-creds + docker build -f Dockerfile -t cloud-on-k8s-ci-e2e . + docker run --rm -t \ + -v /var/run/docker.sock:/var/run/docker.sock \ + -v $(ROOT_DIR):$(GO_MOUNT_PATH) \ + -w $(GO_MOUNT_PATH) \ + -e "IMG_SUFFIX=-ci" \ + -e "GCLOUD_PROJECT=$(GCLOUD_PROJECT)" \ + -e "REGISTRY=$(REGISTRY)" \ + -e "REPOSITORY=$(GCLOUD_PROJECT)" \ + -e "GKE_CLUSTER_NAME=$(GKE_CLUSTER_NAME)" \ + -e "GKE_SERVICE_ACCOUNT_KEY_FILE=$(GO_MOUNT_PATH)/build/ci/$(GKE_CREDS_FILE)" \ + -e "TESTS_MATCH=$(TESTS_MATCH)" \ + -e "GKE_CLUSTER_VERSION=$(GKE_CLUSTER_VERSION)" \ + -e "OPERATOR_IMAGE=$(OPERATOR_IMAGE)" \ + cloud-on-k8s-ci-e2e \ + bash -c "make -C operators ci-e2e-rc" # Remove k8s cluster ci-e2e-delete-cluster: vault-gke-creds @@ -145,10 +165,22 @@ ci-e2e-delete-cluster: vault-gke-creds cloud-on-k8s-ci-e2e \ bash -c "make -C operators set-context-gke delete-gke" +# Remove all unused resources in GKE +ci-gke-cleanup: ci-e2e-delete-cluster + docker run --rm -t \ + -v $(ROOT_DIR):$(GO_MOUNT_PATH) \ + -w $(GO_MOUNT_PATH) \ + -e "GCLOUD_PROJECT=$(GCLOUD_PROJECT)" \ + -e "GKE_CLUSTER_NAME=$(GKE_CLUSTER_NAME)" \ + -e "GKE_SERVICE_ACCOUNT_KEY_FILE=$(GO_MOUNT_PATH)/build/ci/$(GKE_CREDS_FILE)" \ + cloud-on-k8s-ci-e2e \ + bash -c "GKE_CLUSTER_VERSION=1.11 $(GO_MOUNT_PATH)/operators/hack/gke-cluster.sh auth && \ + $(GO_MOUNT_PATH)/build/ci/delete_unused_disks.py" + # Run docs build ci-build-docs: docker run --rm -t \ - -v $(ROOT_DIR):$(GO_MOUNT_PATH) \ + -v $(ROOT_DIR):$(GO_MOUNT_PATH) \ docker.elastic.co/docs/build:1 \ bash -c "git clone https://github.com/elastic/docs.git && \ /docs/build_docs.pl --doc $(GO_MOUNT_PATH)/docs/index.asciidoc --out $(GO_MOUNT_PATH)/docs/html --chunk 1 && \ diff --git a/build/ci/delete_unused_disks.py b/build/ci/delete_unused_disks.py new file mode 100755 index 0000000000..c90249acfd --- /dev/null +++ b/build/ci/delete_unused_disks.py @@ -0,0 +1,25 @@ +#!/usr/bin/env python + +import os +import json + +project = os.environ['GCLOUD_PROJECT'] + +os.system('gcloud compute disks list --filter="-users:*" --format="json" --project {} > unused_disks.json' + .format(project)) + +with open('unused_disks.json', 'r') as f: + content = f.read() + try: + parsed_json_dict = json.loads(content) + if len(parsed_json_dict) == 0: + print("There is no unused disks. Congratulations!") + else: + for entry in parsed_json_dict: + name = entry['name'] + head, tail = os.path.split(entry['zone']) + os.system('gcloud compute disks delete {} --project {} --zone {} --quiet' + .format(name, project, tail)) + except: + print("Can't parse JSON:") + print(content) diff --git a/build/ci/e2e/GKE_k8s_versions.jenkinsfile b/build/ci/e2e/GKE_k8s_versions.jenkinsfile index 18742e7dbe..c917d409f1 100644 --- a/build/ci/e2e/GKE_k8s_versions.jenkinsfile +++ b/build/ci/e2e/GKE_k8s_versions.jenkinsfile @@ -76,7 +76,7 @@ pipeline { for (int i = 0; i < clusters.size(); i++) { sh """ export GKE_CLUSTER_NAME=${clusters[i]} - make -C build/ci ci-e2e-delete-cluster + make -C build/ci ci-gke-cleanup """ } } diff --git a/build/ci/e2e/Jenkinsfile b/build/ci/e2e/Jenkinsfile index a8c4481bae..e24318dae6 100644 --- a/build/ci/e2e/Jenkinsfile +++ b/build/ci/e2e/Jenkinsfile @@ -43,7 +43,7 @@ pipeline { } } cleanup { - sh 'make -C build/ci ci-e2e-delete-cluster' + sh 'make -C build/ci ci-gke-cleanup' cleanWs() } } diff --git a/build/ci/e2e/custom_operator_image.jenkinsfile b/build/ci/e2e/custom_operator_image.jenkinsfile new file mode 100644 index 0000000000..7bbfe0af60 --- /dev/null +++ b/build/ci/e2e/custom_operator_image.jenkinsfile @@ -0,0 +1,53 @@ +pipeline { + + agent { + label 'linux' + } + + options { + timeout(time: 150, unit: 'MINUTES') + } + + environment { + VAULT_ADDR = credentials('vault-addr') + VAULT_ROLE_ID = credentials('vault-role-id') + VAULT_SECRET_ID = credentials('vault-secret-id') + REGISTRY = "eu.gcr.io" + GCLOUD_PROJECT = credentials('k8s-operators-gcloud-project') + GKE_CLUSTER_VERSION = "${VERSION}" + GKE_CLUSTER_NAME = "${BUILD_TAG}" + OPERATOR_IMAGE = "${IMAGE}" + LATEST_RELEASED_IMG = "${IMAGE}" + } + + stages { + stage('Checkout from GitHub') { + steps { + checkout scm + } + } + stage("Run E2E tests") { + steps { + sh 'make -C build/ci ci-e2e-rc' + } + } + } + + post { + unsuccessful { + script { + def msg = "E2E tests failed!\r\n" + env.BUILD_URL + slackSend botUser: true, + channel: '#cloud-k8s', + color: 'danger', + message: msg, + tokenCredentialId: 'cloud-ci-slack-integration-token' + } + } + cleanup { + sh 'make -C build/ci ci-e2e-delete-cluster' + cleanWs() + } + } + +} diff --git a/build/ci/pr/Jenkinsfile b/build/ci/pr/Jenkinsfile index ea56930e7e..2aca2b1d45 100644 --- a/build/ci/pr/Jenkinsfile +++ b/build/ci/pr/Jenkinsfile @@ -23,11 +23,16 @@ pipeline { stage('Run tests in parallel') { parallel { stage("Run unit and integration tests") { + when { + expression { + checkout scm + notOnlyDocs() + } + } agent { label 'linux' } steps { - checkout scm sh 'make -C build/ci ci-pr' } } @@ -38,11 +43,16 @@ pipeline { } } stage("Run smoke E2E tests") { + when { + expression { + checkout scm + notOnlyDocs() + } + } agent { label 'linux' } steps { - checkout scm sh 'make -C build/ci ci-e2e' } } @@ -51,10 +61,33 @@ pipeline { } post { + success { + withEnv([ + 'REGISTRY=push.docker.elastic.co', + 'REPOSITORY=eck-snapshots', + 'IMG_SUFFIX=', + 'SNAPSHOT_RELEASE=true', + 'TAG_NAME=${ghprbPullId}' + ]) { + sh 'make -C build/ci ci-release' + } + } cleanup { - sh 'make -C build/ci ci-e2e-delete-cluster' + script { + if (notOnlyDocs()) { + sh 'make -C build/ci ci-gke-cleanup' + } + } cleanWs() } } } + +def notOnlyDocs() { + // grep succeeds if there is at least one line without docs/ + return sh ( + script: "git diff --name-status HEAD~1 HEAD | grep -v docs/", + returnStatus: true + ) == 0 +} diff --git a/build/ci/release/Jenkinsfile b/build/ci/release/Jenkinsfile index 27322e769d..db7ecd5905 100644 --- a/build/ci/release/Jenkinsfile +++ b/build/ci/release/Jenkinsfile @@ -13,6 +13,7 @@ pipeline { IMG_NAME = 'eck-operator' IMG_SUFFIX = '' LATEST_RELEASED_IMG = "docker.elastic.co/${REPOSITORY}/${IMG_NAME}:${TAG_NAME}" + SNAPSHOT_RELEASE = 'false' } options { diff --git a/docs/elasticsearch-spec.asciidoc b/docs/elasticsearch-spec.asciidoc index 7b1c435c33..ac826c98eb 100644 --- a/docs/elasticsearch-spec.asciidoc +++ b/docs/elasticsearch-spec.asciidoc @@ -100,3 +100,141 @@ Example to create a Kubernetes TLS secret with a self-signed certificate: $ openssl req -x509 -newkey rsa:4096 -keyout tls.key -out tls.crt -days 365 -nodes $ kubectl create secret tls my-cert --cert tls.crt --key tls.key ---- + +[id="{p}-update-strategy"] +=== Update strategy + +The Elasticsearch cluster configuration can be updated at any time: + +* add new nodes +* remove some nodes +* change Elasticsearch configuration +* change pod resources (example: memory limits, cpu limit, environment variables, etc.) + +On any change, ECK reconciles Kubernetes resources towards the desired cluster definition. Changes are done in a rolling fashion: the state of the cluster is continuously monitored, to allow addition of new nodes and removal of deprecated nodes. + +[id="{p}-change-budget"] +==== Change budget + +No downtime should be expected when the cluster topology changes. Shards on deprecated nodes are migrated away so the node can be safely removed. + +For example, in order to mutate a 3-nodes cluster with 16GB memory limit on each node to a 3-nodes cluster with 32GB memory limit on each node, ECK will: + +1. add a new 32GB node: the cluster temporarily has 4 nodes +2. migrate data away from the first 16GB node +3. once data is migrated, remove the first 16GB node +4. follow the same steps for the 2 other 16GB nodes + +The cluster health stays green during the entire process. +By default, only one extra node can be added on top of the expected ones. In the example above, a 3-nodes cluster may temporarily be composed of 4 nodes while data migration is in progress. + +This behaviour can be controlled through the `changeBudget` section of the Cluster specification `updateStrategy`. If not specified, it defaults to the following: + +[source,yaml] +---- +spec: + updateStrategy: + changeBudget: + maxSurge: 1 + maxUnavailable: 0 +---- + +* `maxSurge` specifies the number of pods that can be added to the cluster, on top of the desired number of nodes in the spec during cluster updates +* `maxUnavailable` specifies the number of pods that can be made unavailable during cluster updates + +The default of `maxSurge: 1; maxUnavailable: 0` spins up an additional Elasticsearch node during cluster updates. +It is possible to speed up cluster topology changes by increasing `maxSurge`. For example, setting `maxSurge: 3` would allow 3 new nodes to be created while the original 3 migrate data in parallel. +The cluster would then temporarily have 6 nodes. + +Setting `maxSurge` to 0 and `maxUnavailable` to a positive value only allows a maximum number of pods to exist on the Kubernetes cluster. +For example, `maxSurge: 0; maxUnavailable: 1` would perform the 3 nodes upgrade this way: + +1. migrate data away from the first 16GB node +2. once data is migrated, remove the 16GB node: the cluster temporarily has 2 nodes +3. add a new 32GB node: the cluster grows to 3 nodes +4. follow the same steps for the 2 other 16GB nodes + +Even though any `changeBudget` can be specified, ECK will make sure some invariants are respected while a mutation is in progress: + +* there must be at least one master node alive in the cluster +* there must be at least one data node alive in the cluster + +Under certain circumstances, ECK will therefore ignore the change budget. For example, a safe migration from a 1-node cluster to another 1-node cluster can only be done by temporarily setting up a 2-nodes cluster. + +It is possible to configure the `changeBudget` to optimize for reusing Persistent Volumes instead of migrating data across nodes. This feature is not supported yet: more details to come in the next release. + +[id="{p}-group-definitions"] +==== Group definitions + +To optimize upgrades for highly available setups, ECK can take into account arbitrary nodes grouping. It prioritizes recovery of entire availability zones in catastrophic scenarios. + +For example, let's create a zone-aware Elasticsearch cluster. Some nodes will be created in `europe-west3-a`, and some others in `europe-west3-b`: + +[source,yaml] +---- +apiVersion: elasticsearch.k8s.elastic.co/v1alpha1 +kind: Elasticsearch +metadata: + name: quickstart +spec: + version: 7.1.0 + nodes: + - nodeCount: 3 + config: + node.attr.zone: europe-west3-a + cluster.routing.allocation.awareness.attributes: zone + podTemplate: + meta: + labels: + nodesGroup: group-a + spec: + affinity: + nodeAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + nodeSelectorTerms: + - matchExpressions: + - key: failure-domain.beta.kubernetes.io/zone + operator: In + values: + - europe-west3-a + - nodeCount: 3 + config: + node.attr.zone: europe-west3-b + cluster.routing.allocation.awareness.attributes: zone + podTemplate: + meta: + labels: + nodesGroup: group-b + spec: + affinity: + nodeAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + nodeSelectorTerms: + - matchExpressions: + - key: failure-domain.beta.kubernetes.io/zone + operator: In + values: + - europe-west3-b + updateStrategy: + changeBudget: + maxSurge: 1 + maxUnavailable: 0 + groups: + - selector: + matchLabels: + nodesGroup: group-a + - selector: + matchLabels: + nodesGroup: group-b +---- + +If a modification is applied to the Elasticsearch configuration of these 6 nodes, ECK will slowly upgrade the cluster nodes, taking the provided `changeBudget` into account. +In this example, it will spawn one additional node at a time, and migrate data away from one node at a time. + +Imagine a catastrophic situation occurs while the mutation is in progress: all nodes in `europe-west3-b` suddenly disappear. +ECK will detect it, and recreate the 3 missing nodes as expected. However, since a cluster upgrade is already in progress, the current `changeBudget may already be maxed out, preventing new nodes to be created in `europe-west3-b`. + +In this situation, it would be preferable to first recreate the missing nodes in `europe-west-3b`, then continue the cluster upgrade. + +In order to do so, ECK must know about the logical grouping of nodes. Since this is an arbitrary setting (can represent availability zones, but also nodes roles, hot-warm topologies, etc.), it must be specified in the `updateStrategy.groups` section of the Elasticsearch specification. +Nodes grouping is expressed through labels on the resources. In the example above, 3 pods are labeled with `group-a`, and the 3 other pods with `group-b`. \ No newline at end of file diff --git a/docs/k8s-quickstart.asciidoc b/docs/k8s-quickstart.asciidoc index 3d4f4e239a..bbaf0c274b 100644 --- a/docs/k8s-quickstart.asciidoc +++ b/docs/k8s-quickstart.asciidoc @@ -22,6 +22,8 @@ Make sure that you have link:https://kubernetes.io/docs/tasks/tools/install-kube NOTE: If you are using GKE, make sure your user has `cluster-admin` permissions. For more information, see link:https://cloud.google.com/kubernetes-engine/docs/how-to/role-based-access-control#iam-rolebinding-bootstrap[Prerequisites for using Kubernetes RBAC on GKE]. +NOTE: If you are using Amazon EKS, make sure the Kubernetes control plane is allowed to communicate with nodes port 443. This is required for communication with the Validating Webhook. For more information, see link:https://docs.aws.amazon.com/eks/latest/userguide/sec-group-reqs.html[Recommended inbound traffic]. + . Install link:https://kubernetes.io/docs/concepts/extend-kubernetes/api-extension/custom-resources/[custom resource definitions] and the operator with its RBAC rules: + [source,sh] diff --git a/operators/Makefile b/operators/Makefile index 50ec5fcc09..e83c756545 100644 --- a/operators/Makefile +++ b/operators/Makefile @@ -38,7 +38,10 @@ endif IMG_SUFFIX ?= -$(subst _,,$(USER)) IMG ?= $(REGISTRY)/$(REPOSITORY)/$(NAME)$(IMG_SUFFIX) TAG ?= $(shell git rev-parse --short --verify HEAD) -OPERATOR_IMAGE ?= $(IMG):$(TAG) +OPERATOR_IMAGE ?= $(IMG):$(VERSION)-$(TAG) +ifeq ($(SNAPSHOT_RELEASE), false) + OPERATOR_IMAGE = $(IMG):$(TAG) +endif OPERATOR_IMAGE_LATEST ?= $(IMG):latest GO_LDFLAGS := -X github.com/elastic/cloud-on-k8s/operators/pkg/about.version=$(VERSION) \ @@ -46,6 +49,9 @@ GO_LDFLAGS := -X github.com/elastic/cloud-on-k8s/operators/pkg/about.version=$(V -X github.com/elastic/cloud-on-k8s/operators/pkg/about.buildDate=$(shell date -u +'%Y-%m-%dT%H:%M:%SZ') \ -X github.com/elastic/cloud-on-k8s/operators/pkg/about.buildSnapshot=$(SNAPSHOT) +# Setting for CI, if set to true will prevent building and using local Docker image +SKIP_DOCKER_COMMAND ?= false + ## -- Namespaces # namespace in which the global operator is deployed (see config/global-operator) @@ -146,7 +152,11 @@ endif endif # Deploy both the global and namespace operators against the current k8s cluster -deploy: check-gke install-crds docker-build docker-push apply-operators +deploy: check-gke install-crds +ifeq ($(SKIP_DOCKER_COMMAND), false) + $(MAKE) docker-build docker-push +endif + $(MAKE) apply-operators apply-operators: OPERATOR_IMAGE=$(OPERATOR_IMAGE) \ @@ -235,8 +245,10 @@ bootstrap-gke: require-gcloud-project ifeq ($(PSP), 1) kubectl apply -f config/dev/elastic-psp.yaml endif +ifeq ($(SKIP_DOCKER_COMMAND), false) # push "latest" operator image to be used for init containers when running the operator locally $(MAKE) docker-build docker-push OPERATOR_IMAGE=$(OPERATOR_IMAGE_LATEST) +endif delete-gke: require-gcloud-project GKE_CLUSTER_VERSION=$(GKE_CLUSTER_VERSION) ./hack/gke-cluster.sh delete @@ -269,7 +281,7 @@ docker-build: -t $(OPERATOR_IMAGE) docker-push: -ifeq ($(RELEASE), true) +ifeq ($(USE_ELASTIC_DOCKER_REGISTRY), true) @ docker login -u $(ELASTIC_DOCKER_LOGIN) -p $(ELASTIC_DOCKER_PASSWORD) push.docker.elastic.co endif ifeq ($(KUBECTL_CLUSTER), minikube) @@ -341,14 +353,19 @@ ci: dep-vendor-only check-fmt generate check-local-changes unit integration e2e- # Let's use n1-standard-8 machine to have enough room for multiple pods on a single node. ci-e2e: ci-bootstrap-gke e2e +# Run e2e tests in gke using custom operator image +ci-e2e-rc: export SKIP_DOCKER_COMMAND=true +ci-e2e-rc: ci-bootstrap-gke e2e + ci-bootstrap-gke: PSP=1 GKE_MACHINE_TYPE=n1-standard-8 $(MAKE) bootstrap-gke ci-release: export GO_TAGS = release ci-release: export LICENSE_PUBKEY = $(ROOT_DIR)/build/ci/license.key -ci-release: export LATEST_RELEASED_IMG = docker.elastic.co/eck/eck-operator:$(TAG) +ci-release: export LATEST_RELEASED_IMG = docker.elastic.co/$(REPOSITORY)/eck-operator:$(TAG) ci-release: @ $(MAKE) dep-vendor-only generate docker-build docker-push + @ echo $(OPERATOR_IMAGE) was pushed! ########################## diff --git a/operators/cmd/manager/main.go b/operators/cmd/manager/main.go index c9f8995711..5b108ade60 100644 --- a/operators/cmd/manager/main.go +++ b/operators/cmd/manager/main.go @@ -135,11 +135,10 @@ func init() { ) Cmd.Flags().String( DebugHTTPServerListenAddressFlag, - ":6060", + "localhost:6060", "Listen address for debug HTTP server (only available in development mode)", ) - viper.BindPFlags(Cmd.Flags()) // enable using dashed notation in flags and underscores in env viper.SetEnvKeyReplacer(strings.NewReplacer("-", "_")) @@ -212,11 +211,14 @@ func execute() { // restrict the operator to watch resources within a single namespace, unless empty Namespace: viper.GetString(NamespaceFlagName), } + + // only expose prometheus metrics if provided a specific port metricsPort := viper.GetInt(MetricsPortFlag) if metricsPort != 0 { + log.Info("Exposing Prometheus metrics on /metrics", "port", metricsPort) opts.MetricsBindAddress = fmt.Sprintf(":%d", metricsPort) - log.Info(fmt.Sprintf("Exposing Prometheus metrics on /metrics%s", opts.MetricsBindAddress)) } + mgr, err := manager.New(cfg, opts) if err != nil { log.Error(err, "unable to set up overall controller manager") diff --git a/operators/config/crds/apm_v1alpha1_apmserver.yaml b/operators/config/crds/apm_v1alpha1_apmserver.yaml index 1f57e3dadd..deee499398 100644 --- a/operators/config/crds/apm_v1alpha1_apmserver.yaml +++ b/operators/config/crds/apm_v1alpha1_apmserver.yaml @@ -47,6 +47,9 @@ spec: type: object spec: properties: + config: + description: Config represents the APM configuration. + type: object featureFlags: description: FeatureFlags are apm-specific flags that enable or disable specific experimental features @@ -130,6 +133,9 @@ spec: username: description: User is the username to use. type: string + required: + - username + - password type: object secret: description: SecretKeyRef is a secret that contains the @@ -173,12 +179,26 @@ spec: variables, affinity, resources, etc. for the pods created from this NodeSpec. type: object + secureSettings: + description: SecureSettings reference a secret containing secure settings, + to be injected into the APM keystore on each node. Each individual + key/value entry in the referenced secret is considered as an individual + secure setting to be injected. The secret must exist in the same namespace + as the APM resource. + properties: + secretName: + type: string + type: object version: description: Version represents the version of the APM Server type: string type: object status: properties: + controllerVersion: + description: ControllerVersion is the version of the controller that + last updated the ApmServer instance + type: string health: type: string secretTokenSecret: diff --git a/operators/config/crds/elasticsearch_v1alpha1_elasticsearch.yaml b/operators/config/crds/elasticsearch_v1alpha1_elasticsearch.yaml index 70c9306d9a..5e6d467f9b 100644 --- a/operators/config/crds/elasticsearch_v1alpha1_elasticsearch.yaml +++ b/operators/config/crds/elasticsearch_v1alpha1_elasticsearch.yaml @@ -246,6 +246,10 @@ spec: properties: clusterUUID: type: string + controllerVersion: + description: ControllerVersion is the version of the controller that + last updated the Elasticsearch cluster + type: string health: type: string masterNode: diff --git a/operators/config/crds/kibana_v1alpha1_kibana.yaml b/operators/config/crds/kibana_v1alpha1_kibana.yaml index 31b68ed907..77750a3464 100644 --- a/operators/config/crds/kibana_v1alpha1_kibana.yaml +++ b/operators/config/crds/kibana_v1alpha1_kibana.yaml @@ -72,6 +72,8 @@ spec: - password type: object secret: + description: SecretKeyRef is a secret that contains the credentials + to use. type: object type: object certificateAuthorities: @@ -187,6 +189,10 @@ spec: properties: associationStatus: type: string + controllerVersion: + description: ControllerVersion is the version of the controller that + last updated the Kibana instance + type: string health: type: string type: object diff --git a/operators/dev-setup.md b/operators/dev-setup.md index b227794025..9859cff7bb 100644 --- a/operators/dev-setup.md +++ b/operators/dev-setup.md @@ -30,7 +30,9 @@ Run `make check-requisites` to check that all dependencies are installed. ## Development -1. Get a working development Kubernetes cluster. You can either use: +1. Run `make dep-vendor-only` to download extra Go libraries needed to compile the project and store them in the vendor directory. + +2. Get a working development Kubernetes cluster. You can either use: [Minikube](https://kubernetes.io/docs/tasks/tools/install-minikube/#install-minikube) @@ -41,15 +43,16 @@ Run `make check-requisites` to check that all dependencies are installed. or [GKE](https://cloud.google.com/kubernetes-engine/) + Make sure that container registry authentication is correctly configured as described [here](https://cloud.google.com/container-registry/docs/advanced-authentication). + ```bash export GCLOUD_PROJECT=my-project-id make bootstrap-gke # Sets up GKE cluster with required resources ``` -2. Deploy the operator. +3. Deploy the operator. - * `make dep-vendor-only` to download extra Go libraries needed to compile the project and stores them in the vendor directory. * `make run` to run the operator locally, or `make deploy` to deploy the operators into the configured k8s cluster. * `make samples` to apply a sample stack resource. diff --git a/operators/hack/gke-cluster.sh b/operators/hack/gke-cluster.sh index 63c4aaf7ce..969ceda131 100755 --- a/operators/hack/gke-cluster.sh +++ b/operators/hack/gke-cluster.sh @@ -8,7 +8,7 @@ # of the necessary default settings so that no environment variable has to # be specified. # -# Usage: gke-cluster.sh (create|delete|name|registry|credentials) +# Usage: gke-cluster.sh (create|delete|name|registry|credentials|auth) # set -eu @@ -58,7 +58,7 @@ create_cluster() { exit 0 fi - local PSP_OPTION + local PSP_OPTION="" if [ "$PSP" == "1" ]; then PSP_OPTION="--enable-pod-security-policy" fi @@ -120,8 +120,11 @@ main() { auth_service_account export_credentials ;; + auth) + auth_service_account + ;; *) - echo "Usage: gke-cluster.sh (create|delete|name|registry|credentials)"; exit 1 + echo "Usage: gke-cluster.sh (create|delete|name|registry|credentials|auth)"; exit 1 ;; esac } diff --git a/operators/pkg/apis/apm/v1alpha1/apmserver_types.go b/operators/pkg/apis/apm/v1alpha1/apmserver_types.go index 1cf041f91a..150ab0e4a0 100644 --- a/operators/pkg/apis/apm/v1alpha1/apmserver_types.go +++ b/operators/pkg/apis/apm/v1alpha1/apmserver_types.go @@ -7,7 +7,6 @@ package v1alpha1 import ( commonv1alpha1 "github.com/elastic/cloud-on-k8s/operators/pkg/apis/common/v1alpha1" corev1 "k8s.io/api/core/v1" - v1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" ) @@ -24,6 +23,9 @@ type ApmServerSpec struct { // NodeCount defines how many nodes the Apm Server deployment must have. NodeCount int32 `json:"nodeCount,omitempty"` + // Config represents the APM configuration. + Config *commonv1alpha1.Config `json:"config,omitempty"` + // HTTP contains settings for HTTP. HTTP commonv1alpha1.HTTPConfig `json:"http,omitempty"` @@ -36,6 +38,13 @@ type ApmServerSpec struct { // +optional PodTemplate corev1.PodTemplateSpec `json:"podTemplate,omitempty"` + // SecureSettings reference a secret containing secure settings, to be injected + // into the APM keystore on each node. + // Each individual key/value entry in the referenced secret is considered as an + // individual secure setting to be injected. + // The secret must exist in the same namespace as the APM resource. + SecureSettings *commonv1alpha1.SecretRef `json:"secureSettings,omitempty"` + // FeatureFlags are apm-specific flags that enable or disable specific experimental features FeatureFlags commonv1alpha1.FeatureFlags `json:"featureFlags,omitempty"` } @@ -56,7 +65,7 @@ type ElasticsearchOutput struct { Hosts []string `json:"hosts,omitempty"` // Auth configures authentication for APM Server to use. - Auth ElasticsearchAuth `json:"auth,omitempty"` + Auth commonv1alpha1.ElasticsearchAuth `json:"auth,omitempty"` // SSL configures TLS-related configuration for Elasticsearch SSL ElasticsearchOutputSSL `json:"ssl,omitempty"` @@ -89,6 +98,8 @@ type ApmServerStatus struct { SecretTokenSecretName string `json:"secretTokenSecret,omitempty"` // Association is the status of any auto-linking to Elasticsearch clusters. Association commonv1alpha1.AssociationStatus + // ControllerVersion is the version of the controller that last updated the ApmServer instance + ControllerVersion string `json:"controllerVersion,omitempty"` } // IsDegraded returns true if the current status is worse than the previous. @@ -101,23 +112,6 @@ func (e ElasticsearchOutput) IsConfigured() bool { return len(e.Hosts) > 0 } -// ElasticsearchAuth contains auth config for APM Server to use with an Elasticsearch cluster -// TODO: this is a good candidate for sharing/reuse between this and Kibana due to association reuse potential. -type ElasticsearchAuth struct { - // Inline is auth provided as plaintext inline credentials. - Inline *ElasticsearchInlineAuth `json:"inline,omitempty"` - // SecretKeyRef is a secret that contains the credentials to use. - SecretKeyRef *v1.SecretKeySelector `json:"secret,omitempty"` -} - -// ElasticsearchInlineAuth is a basic username/password combination. -type ElasticsearchInlineAuth struct { - // User is the username to use. - Username string `json:"username,omitempty"` - // Password is the password to use. - Password string `json:"password,omitempty"` -} - // +genclient // +k8s:deepcopy-gen:interfaces=k8s.io/apimachinery/pkg/runtime.Object @@ -149,3 +143,19 @@ type ApmServerList struct { func init() { SchemeBuilder.Register(&ApmServer{}, &ApmServerList{}) } + +// IsMarkedForDeletion returns true if the APM is going to be deleted +func (as *ApmServer) IsMarkedForDeletion() bool { + if as.DeletionTimestamp.IsZero() { // already handles nil pointer + return false + } + return true +} + +func (as *ApmServer) ElasticsearchAuth() commonv1alpha1.ElasticsearchAuth { + return as.Spec.Output.Elasticsearch.Auth +} + +func (as *ApmServer) SecureSettings() *commonv1alpha1.SecretRef { + return as.Spec.SecureSettings +} diff --git a/operators/pkg/apis/apm/v1alpha1/zz_generated.deepcopy.go b/operators/pkg/apis/apm/v1alpha1/zz_generated.deepcopy.go index 7d818712b6..d9a2ed652b 100644 --- a/operators/pkg/apis/apm/v1alpha1/zz_generated.deepcopy.go +++ b/operators/pkg/apis/apm/v1alpha1/zz_generated.deepcopy.go @@ -10,7 +10,6 @@ package v1alpha1 import ( commonv1alpha1 "github.com/elastic/cloud-on-k8s/operators/pkg/apis/common/v1alpha1" - v1 "k8s.io/api/core/v1" runtime "k8s.io/apimachinery/pkg/runtime" ) @@ -78,9 +77,18 @@ func (in *ApmServerList) DeepCopyObject() runtime.Object { // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *ApmServerSpec) DeepCopyInto(out *ApmServerSpec) { *out = *in + if in.Config != nil { + in, out := &in.Config, &out.Config + *out = (*in).DeepCopy() + } in.HTTP.DeepCopyInto(&out.HTTP) in.Output.DeepCopyInto(&out.Output) in.PodTemplate.DeepCopyInto(&out.PodTemplate) + if in.SecureSettings != nil { + in, out := &in.SecureSettings, &out.SecureSettings + *out = new(commonv1alpha1.SecretRef) + **out = **in + } if in.FeatureFlags != nil { in, out := &in.FeatureFlags, &out.FeatureFlags *out = make(commonv1alpha1.FeatureFlags, len(*in)) @@ -118,48 +126,6 @@ func (in *ApmServerStatus) DeepCopy() *ApmServerStatus { return out } -// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. -func (in *ElasticsearchAuth) DeepCopyInto(out *ElasticsearchAuth) { - *out = *in - if in.Inline != nil { - in, out := &in.Inline, &out.Inline - *out = new(ElasticsearchInlineAuth) - **out = **in - } - if in.SecretKeyRef != nil { - in, out := &in.SecretKeyRef, &out.SecretKeyRef - *out = new(v1.SecretKeySelector) - (*in).DeepCopyInto(*out) - } - return -} - -// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ElasticsearchAuth. -func (in *ElasticsearchAuth) DeepCopy() *ElasticsearchAuth { - if in == nil { - return nil - } - out := new(ElasticsearchAuth) - in.DeepCopyInto(out) - return out -} - -// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. -func (in *ElasticsearchInlineAuth) DeepCopyInto(out *ElasticsearchInlineAuth) { - *out = *in - return -} - -// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ElasticsearchInlineAuth. -func (in *ElasticsearchInlineAuth) DeepCopy() *ElasticsearchInlineAuth { - if in == nil { - return nil - } - out := new(ElasticsearchInlineAuth) - in.DeepCopyInto(out) - return out -} - // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *ElasticsearchOutput) DeepCopyInto(out *ElasticsearchOutput) { *out = *in diff --git a/operators/pkg/apis/common/v1alpha1/association.go b/operators/pkg/apis/common/v1alpha1/association.go index 4bb8088352..37858fcca1 100644 --- a/operators/pkg/apis/common/v1alpha1/association.go +++ b/operators/pkg/apis/common/v1alpha1/association.go @@ -4,6 +4,11 @@ package v1alpha1 +import ( + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/runtime" +) + // AssociationStatus is the status of an assocation resource. type AssociationStatus string @@ -12,3 +17,14 @@ const ( AssociationEstablished AssociationStatus = "Established" AssociationFailed AssociationStatus = "Failed" ) + +// Associated interface represents a Elastic stack application that is associated with an Elasticsearch cluster. +// An associated object needs some credentials to establish a connection to the Elasticsearch cluster and usually it +// offers a keystore which in ECK is represented with an underlying Secret. +// Kibana and the APM server are two examples of associated objects. +type Associated interface { + metav1.Object + runtime.Object + ElasticsearchAuth() ElasticsearchAuth + SecureSettings() *SecretRef +} diff --git a/operators/pkg/apis/common/v1alpha1/authentication.go b/operators/pkg/apis/common/v1alpha1/authentication.go new file mode 100644 index 0000000000..878b71ed3a --- /dev/null +++ b/operators/pkg/apis/common/v1alpha1/authentication.go @@ -0,0 +1,28 @@ +// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +// or more contributor license agreements. Licensed under the Elastic License; +// you may not use this file except in compliance with the Elastic License. + +package v1alpha1 + +import v1 "k8s.io/api/core/v1" + +// ElasticsearchAuth contains auth config for Kibana to use with an Elasticsearch cluster +type ElasticsearchAuth struct { + // Inline is auth provided as plaintext inline credentials. + Inline *ElasticsearchInlineAuth `json:"inline,omitempty"` + // SecretKeyRef is a secret that contains the credentials to use. + SecretKeyRef *v1.SecretKeySelector `json:"secret,omitempty"` +} + +// IsConfigured returns true if one of the possible auth mechanisms is configured. +func (ea ElasticsearchAuth) IsConfigured() bool { + return ea.Inline != nil || ea.SecretKeyRef != nil +} + +// ElasticsearchInlineAuth is a basic username/password combination. +type ElasticsearchInlineAuth struct { + // User is the username to use. + Username string `json:"username"` + // Password is the password to use. + Password string `json:"password"` +} diff --git a/operators/pkg/apis/common/v1alpha1/zz_generated.deepcopy.go b/operators/pkg/apis/common/v1alpha1/zz_generated.deepcopy.go index d5d044d36e..08e0e99834 100644 --- a/operators/pkg/apis/common/v1alpha1/zz_generated.deepcopy.go +++ b/operators/pkg/apis/common/v1alpha1/zz_generated.deepcopy.go @@ -8,6 +8,10 @@ package v1alpha1 +import ( + v1 "k8s.io/api/core/v1" +) + // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new Config. func (in *Config) DeepCopy() *Config { if in == nil { @@ -18,6 +22,48 @@ func (in *Config) DeepCopy() *Config { return out } +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *ElasticsearchAuth) DeepCopyInto(out *ElasticsearchAuth) { + *out = *in + if in.Inline != nil { + in, out := &in.Inline, &out.Inline + *out = new(ElasticsearchInlineAuth) + **out = **in + } + if in.SecretKeyRef != nil { + in, out := &in.SecretKeyRef, &out.SecretKeyRef + *out = new(v1.SecretKeySelector) + (*in).DeepCopyInto(*out) + } + return +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ElasticsearchAuth. +func (in *ElasticsearchAuth) DeepCopy() *ElasticsearchAuth { + if in == nil { + return nil + } + out := new(ElasticsearchAuth) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *ElasticsearchInlineAuth) DeepCopyInto(out *ElasticsearchInlineAuth) { + *out = *in + return +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ElasticsearchInlineAuth. +func (in *ElasticsearchInlineAuth) DeepCopy() *ElasticsearchInlineAuth { + if in == nil { + return nil + } + out := new(ElasticsearchInlineAuth) + in.DeepCopyInto(out) + return out +} + // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *FeatureFlagState) DeepCopyInto(out *FeatureFlagState) { *out = *in diff --git a/operators/pkg/apis/elasticsearch/v1alpha1/elasticsearch_types.go b/operators/pkg/apis/elasticsearch/v1alpha1/elasticsearch_types.go index 913f53510e..5ba8616fe7 100644 --- a/operators/pkg/apis/elasticsearch/v1alpha1/elasticsearch_types.go +++ b/operators/pkg/apis/elasticsearch/v1alpha1/elasticsearch_types.go @@ -219,6 +219,8 @@ type ElasticsearchStatus struct { MasterNode string `json:"masterNode,omitempty"` ExternalService string `json:"service,omitempty"` ZenDiscovery ZenDiscoveryStatus `json:"zenDiscovery,omitempty"` + // ControllerVersion is the version of the controller that last updated the Elasticsearch cluster + ControllerVersion string `json:"controllerVersion,omitempty"` } type ZenDiscoveryStatus struct { diff --git a/operators/pkg/apis/kibana/v1alpha1/kibana_types.go b/operators/pkg/apis/kibana/v1alpha1/kibana_types.go index 4b48091c53..7f3c4de325 100644 --- a/operators/pkg/apis/kibana/v1alpha1/kibana_types.go +++ b/operators/pkg/apis/kibana/v1alpha1/kibana_types.go @@ -6,7 +6,6 @@ package v1alpha1 import ( corev1 "k8s.io/api/core/v1" - v1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" commonv1alpha1 "github.com/elastic/cloud-on-k8s/operators/pkg/apis/common/v1alpha1" @@ -62,7 +61,7 @@ type BackendElasticsearch struct { URL string `json:"url"` // Auth configures authentication for Kibana to use. - Auth ElasticsearchAuth `json:"auth,omitempty"` + Auth commonv1alpha1.ElasticsearchAuth `json:"auth,omitempty"` // CertificateAuthorities names a secret that contains a CA file entry to use. CertificateAuthorities commonv1alpha1.SecretRef `json:"certificateAuthorities,omitempty"` @@ -73,26 +72,6 @@ func (b BackendElasticsearch) IsConfigured() bool { return b.URL != "" && b.Auth.IsConfigured() && b.CertificateAuthorities.SecretName != "" } -// ElasticsearchAuth contains auth config for Kibana to use with an Elasticsearch cluster -type ElasticsearchAuth struct { - // Inline is auth provided as plaintext inline credentials. - Inline *ElasticsearchInlineAuth `json:"inline,omitempty"` - SecretKeyRef *v1.SecretKeySelector `json:"secret,omitempty"` -} - -// IsConfigured returns true if one of the possible auth mechanisms is configured. -func (ea ElasticsearchAuth) IsConfigured() bool { - return ea.Inline != nil || ea.SecretKeyRef != nil -} - -// ElasticsearchInlineAuth is a basic username/password combination. -type ElasticsearchInlineAuth struct { - // User is the username to use. - Username string `json:"username"` - // Password is the password to use. - Password string `json:"password"` -} - // KibanaHealth expresses the status of the Kibana instances. type KibanaHealth string @@ -108,6 +87,8 @@ type KibanaStatus struct { commonv1alpha1.ReconcilerStatus Health KibanaHealth `json:"health,omitempty"` AssociationStatus commonv1alpha1.AssociationStatus `json:"associationStatus,omitempty"` + // ControllerVersion is the version of the controller that last updated the Kibana instance + ControllerVersion string `json:"controllerVersion,omitempty"` } // IsDegraded returns true if the current status is worse than the previous. @@ -116,13 +97,21 @@ func (ks KibanaStatus) IsDegraded(prev KibanaStatus) bool { } // IsMarkedForDeletion returns true if the Kibana is going to be deleted -func (e Kibana) IsMarkedForDeletion() bool { - if e.DeletionTimestamp.IsZero() { // already handles nil pointer +func (k Kibana) IsMarkedForDeletion() bool { + if k.DeletionTimestamp.IsZero() { // already handles nil pointer return false } return true } +func (k *Kibana) ElasticsearchAuth() commonv1alpha1.ElasticsearchAuth { + return k.Spec.Elasticsearch.Auth +} + +func (k *Kibana) SecureSettings() *commonv1alpha1.SecretRef { + return k.Spec.SecureSettings +} + // +genclient // +k8s:deepcopy-gen:interfaces=k8s.io/apimachinery/pkg/runtime.Object diff --git a/operators/pkg/apis/kibana/v1alpha1/kibana_types_test.go b/operators/pkg/apis/kibana/v1alpha1/kibana_types_test.go index fedf466662..e490cff985 100644 --- a/operators/pkg/apis/kibana/v1alpha1/kibana_types_test.go +++ b/operators/pkg/apis/kibana/v1alpha1/kibana_types_test.go @@ -14,7 +14,7 @@ func TestBackendElasticsearch_IsConfigured(t *testing.T) { caSecretName := "ca-dummy" type fields struct { URL string - Auth ElasticsearchAuth + Auth v1alpha1.ElasticsearchAuth CertificateAuthorities v1alpha1.SecretRef } tests := []struct { @@ -25,7 +25,7 @@ func TestBackendElasticsearch_IsConfigured(t *testing.T) { { name: "empty backend is not configured", fields: fields{ - Auth: ElasticsearchAuth{}, + Auth: v1alpha1.ElasticsearchAuth{}, }, want: false, }, @@ -33,8 +33,8 @@ func TestBackendElasticsearch_IsConfigured(t *testing.T) { name: "some fields missing is not configured", fields: fields{ URL: "i am an url", - Auth: ElasticsearchAuth{ - Inline: &ElasticsearchInlineAuth{ + Auth: v1alpha1.ElasticsearchAuth{ + Inline: &v1alpha1.ElasticsearchInlineAuth{ Username: "foo", Password: "bar", }, @@ -46,8 +46,8 @@ func TestBackendElasticsearch_IsConfigured(t *testing.T) { name: "all fields configured", fields: fields{ URL: "i am an url", - Auth: ElasticsearchAuth{ - Inline: &ElasticsearchInlineAuth{ + Auth: v1alpha1.ElasticsearchAuth{ + Inline: &v1alpha1.ElasticsearchInlineAuth{ Username: "foo", Password: "bar", }, diff --git a/operators/pkg/apis/kibana/v1alpha1/zz_generated.deepcopy.go b/operators/pkg/apis/kibana/v1alpha1/zz_generated.deepcopy.go index 795d5c8a4a..609a388217 100644 --- a/operators/pkg/apis/kibana/v1alpha1/zz_generated.deepcopy.go +++ b/operators/pkg/apis/kibana/v1alpha1/zz_generated.deepcopy.go @@ -10,7 +10,6 @@ package v1alpha1 import ( commonv1alpha1 "github.com/elastic/cloud-on-k8s/operators/pkg/apis/common/v1alpha1" - v1 "k8s.io/api/core/v1" runtime "k8s.io/apimachinery/pkg/runtime" ) @@ -32,48 +31,6 @@ func (in *BackendElasticsearch) DeepCopy() *BackendElasticsearch { return out } -// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. -func (in *ElasticsearchAuth) DeepCopyInto(out *ElasticsearchAuth) { - *out = *in - if in.Inline != nil { - in, out := &in.Inline, &out.Inline - *out = new(ElasticsearchInlineAuth) - **out = **in - } - if in.SecretKeyRef != nil { - in, out := &in.SecretKeyRef, &out.SecretKeyRef - *out = new(v1.SecretKeySelector) - (*in).DeepCopyInto(*out) - } - return -} - -// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ElasticsearchAuth. -func (in *ElasticsearchAuth) DeepCopy() *ElasticsearchAuth { - if in == nil { - return nil - } - out := new(ElasticsearchAuth) - in.DeepCopyInto(out) - return out -} - -// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. -func (in *ElasticsearchInlineAuth) DeepCopyInto(out *ElasticsearchInlineAuth) { - *out = *in - return -} - -// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ElasticsearchInlineAuth. -func (in *ElasticsearchInlineAuth) DeepCopy() *ElasticsearchInlineAuth { - if in == nil { - return nil - } - out := new(ElasticsearchInlineAuth) - in.DeepCopyInto(out) - return out -} - // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *Kibana) DeepCopyInto(out *Kibana) { *out = *in diff --git a/operators/pkg/controller/apmserver/apmserver_controller.go b/operators/pkg/controller/apmserver/apmserver_controller.go index afe3916cb9..c80565a7be 100644 --- a/operators/pkg/controller/apmserver/apmserver_controller.go +++ b/operators/pkg/controller/apmserver/apmserver_controller.go @@ -7,10 +7,26 @@ package apmserver import ( "crypto/sha256" "fmt" + "path/filepath" "reflect" "sync/atomic" "time" + apmv1alpha1 "github.com/elastic/cloud-on-k8s/operators/pkg/apis/apm/v1alpha1" + "github.com/elastic/cloud-on-k8s/operators/pkg/controller/apmserver/config" + "github.com/elastic/cloud-on-k8s/operators/pkg/controller/apmserver/labels" + apmname "github.com/elastic/cloud-on-k8s/operators/pkg/controller/apmserver/name" + "github.com/elastic/cloud-on-k8s/operators/pkg/controller/common" + "github.com/elastic/cloud-on-k8s/operators/pkg/controller/common/association/keystore" + "github.com/elastic/cloud-on-k8s/operators/pkg/controller/common/certificates" + "github.com/elastic/cloud-on-k8s/operators/pkg/controller/common/defaults" + "github.com/elastic/cloud-on-k8s/operators/pkg/controller/common/events" + "github.com/elastic/cloud-on-k8s/operators/pkg/controller/common/finalizer" + "github.com/elastic/cloud-on-k8s/operators/pkg/controller/common/operator" + "github.com/elastic/cloud-on-k8s/operators/pkg/controller/common/reconciler" + "github.com/elastic/cloud-on-k8s/operators/pkg/controller/common/volume" + "github.com/elastic/cloud-on-k8s/operators/pkg/controller/common/watches" + "github.com/elastic/cloud-on-k8s/operators/pkg/utils/k8s" appsv1 "k8s.io/api/apps/v1" corev1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/api/errors" @@ -25,53 +41,58 @@ import ( "sigs.k8s.io/controller-runtime/pkg/reconcile" logf "sigs.k8s.io/controller-runtime/pkg/runtime/log" "sigs.k8s.io/controller-runtime/pkg/source" - "sigs.k8s.io/yaml" - - apmv1alpha1 "github.com/elastic/cloud-on-k8s/operators/pkg/apis/apm/v1alpha1" - "github.com/elastic/cloud-on-k8s/operators/pkg/controller/apmserver/config" - "github.com/elastic/cloud-on-k8s/operators/pkg/controller/common" - "github.com/elastic/cloud-on-k8s/operators/pkg/controller/common/certificates" - "github.com/elastic/cloud-on-k8s/operators/pkg/controller/common/defaults" - "github.com/elastic/cloud-on-k8s/operators/pkg/controller/common/events" - "github.com/elastic/cloud-on-k8s/operators/pkg/controller/common/operator" - "github.com/elastic/cloud-on-k8s/operators/pkg/controller/common/reconciler" - "github.com/elastic/cloud-on-k8s/operators/pkg/controller/common/volume" - "github.com/elastic/cloud-on-k8s/operators/pkg/utils/k8s" ) const ( name = "apmserver-controller" esCAChecksumLabelName = "apm.k8s.elastic.co/es-ca-file-checksum" configChecksumLabelName = "apm.k8s.elastic.co/config-file-checksum" + + // ApmBaseDir is the base directory of the APM server + ApmBaseDir = "/usr/share/apm-server" ) -var log = logf.Log.WithName(name) +var ( + log = logf.Log.WithName(name) + + // ApmServerBin is the apm server binary file + ApmServerBin = filepath.Join(ApmBaseDir, "apm-server") + + initContainerParameters = keystore.InitContainerParameters{ + KeystoreCreateCommand: ApmServerBin + " keystore create --force", + KeystoreAddCommand: ApmServerBin + " keystore add", + SecureSettingsVolumeMountPath: keystore.SecureSettingsVolumeMountPath, + DataVolumePath: DataVolumePath, + } +) // Add creates a new ApmServer Controller and adds it to the Manager with default RBAC. The Manager will set fields on the Controller // and Start it when the Manager is Started. func Add(mgr manager.Manager, params operator.Parameters) error { - return add(mgr, newReconciler(mgr)) + reconciler := newReconciler(mgr, params) + c, err := add(mgr, reconciler) + if err != nil { + return err + } + return addWatches(c, reconciler) } // newReconciler returns a new reconcile.Reconciler -func newReconciler(mgr manager.Manager) reconcile.Reconciler { +func newReconciler(mgr manager.Manager, params operator.Parameters) *ReconcileApmServer { + client := k8s.WrapClient(mgr.GetClient()) return &ReconcileApmServer{ - Client: k8s.WrapClient(mgr.GetClient()), - scheme: mgr.GetScheme(), - recorder: mgr.GetRecorder(name), + Client: client, + scheme: mgr.GetScheme(), + recorder: mgr.GetRecorder(name), + dynamicWatches: watches.NewDynamicWatches(), + finalizers: finalizer.NewHandler(client), + Parameters: params, } } -// add adds a new Controller to mgr with r as the reconcile.Reconciler -func add(mgr manager.Manager, r reconcile.Reconciler) error { - // Create a new controller - c, err := controller.New(name, mgr, controller.Options{Reconciler: r}) - if err != nil { - return err - } - +func addWatches(c controller.Controller, r *ReconcileApmServer) error { // Watch for changes to ApmServer - err = c.Watch(&source.Kind{Type: &apmv1alpha1.ApmServer{}}, &handler.EnqueueRequestForObject{}) + err := c.Watch(&source.Kind{Type: &apmv1alpha1.ApmServer{}}, &handler.EnqueueRequestForObject{}) if err != nil { return err } @@ -100,17 +121,30 @@ func add(mgr manager.Manager, r reconcile.Reconciler) error { return err } + // dynamically watch referenced secrets to connect to Elasticsearch + if err := c.Watch(&source.Kind{Type: &corev1.Secret{}}, r.dynamicWatches.Secrets); err != nil { + return err + } + return nil } +// add adds a new Controller to mgr with r as the reconcile.Reconciler +func add(mgr manager.Manager, r reconcile.Reconciler) (controller.Controller, error) { + // Create a new controller + return controller.New(name, mgr, controller.Options{Reconciler: r}) +} + var _ reconcile.Reconciler = &ReconcileApmServer{} // ReconcileApmServer reconciles an ApmServer object type ReconcileApmServer struct { k8s.Client - scheme *runtime.Scheme - recorder record.EventRecorder - + scheme *runtime.Scheme + recorder record.EventRecorder + dynamicWatches watches.DynamicWatches + finalizers finalizer.Handler + operator.Parameters // iteration is the number of times this controller has run its Reconcile method iteration int64 } @@ -121,20 +155,15 @@ func (r *ReconcileApmServer) Reconcile(request reconcile.Request) (reconcile.Res // atomically update the iteration to support concurrent runs. currentIteration := atomic.AddInt64(&r.iteration, 1) iterationStartTime := time.Now() - log.Info("Start reconcile iteration", "iteration", currentIteration) + log.Info("Start reconcile iteration", "iteration", currentIteration, "namespace", request.Namespace, "as_name", request.Name) defer func() { - log.Info("End reconcile iteration", "iteration", currentIteration, "took", time.Since(iterationStartTime)) + log.Info("End reconcile iteration", "iteration", currentIteration, "took", time.Since(iterationStartTime), "namespace", request.Namespace, "as_name", request.Name) }() // Fetch the ApmServer resource as := &apmv1alpha1.ApmServer{} err := r.Get(request.NamespacedName, as) - if common.IsPaused(as.ObjectMeta) { - log.Info("Paused : skipping reconciliation", "iteration", currentIteration) - return common.PauseRequeue, nil - } - if err != nil { if errors.IsNotFound(err) { // Object not found, return. Created objects are automatically garbage collected. @@ -145,10 +174,33 @@ func (r *ReconcileApmServer) Reconcile(request reconcile.Request) (reconcile.Res return reconcile.Result{}, err } + if common.IsPaused(as.ObjectMeta) { + log.Info("Object is paused. Skipping reconciliation", "namespace", as.Namespace, "as_name", as.Name, "iteration", currentIteration) + return common.PauseRequeue, nil + } + + if err := r.finalizers.Handle(as, r.finalizersFor(*as)...); err != nil { + if errors.IsConflict(err) { + log.V(1).Info("Conflict while handling secret watch finalizer") + return reconcile.Result{Requeue: true}, nil + } + return reconcile.Result{}, err + } + + if as.IsMarkedForDeletion() { + // APM server will be deleted nothing to do other than run finalizers + return reconcile.Result{}, nil + } + state := NewState(request, as) + state.UpdateApmServerControllerVersion(r.OperatorInfo.BuildInfo.Version) state, err = r.reconcileApmServerDeployment(state, as) if err != nil { + if errors.IsConflict(err) { + log.V(1).Info("Conflict while updating status") + return reconcile.Result{Requeue: true}, nil + } return state.Result, err } @@ -169,18 +221,18 @@ func (r *ReconcileApmServer) reconcileApmServerDeployment( as *apmv1alpha1.ApmServer, ) (State, error) { if !as.Spec.Output.Elasticsearch.IsConfigured() { - log.Info("Aborting ApmServer deployment reconciliation as no Elasticsearch output is configured") + log.Info("Aborting ApmServer deployment reconciliation as no Elasticsearch output is configured", + "namespace", as.Namespace, "as_name", as.Name) return state, nil } - // TODO: move server and config secrets into separate methods + // TODO: move server secret into separate method expectedApmServerSecret := &corev1.Secret{ ObjectMeta: metav1.ObjectMeta{ Namespace: as.Namespace, - // TODO: suffix+trim properly - Name: as.Name + "-apm-server", - Labels: NewLabels(as.Name), + Name: apmname.SecretToken(as.Name), + Labels: labels.NewLabels(as.Name), }, Data: map[string][]byte{ SecretTokenKey: []byte(rand.String(24)), @@ -222,65 +274,32 @@ func (r *ReconcileApmServer) reconcileApmServerDeployment( reconciledApmServerSecret.Data = expectedApmServerSecret.Data }, PreCreate: func() { - log.Info("Creating apm server secret", "name", expectedApmServerSecret.Name) + log.Info("Creating apm server secret", "namespace", expectedApmServerSecret.Namespace, "secret_name", expectedApmServerSecret.Name, "as_name", as.Name) }, PreUpdate: func() { - log.Info("Updating apm server secret", "name", expectedApmServerSecret.Name) + log.Info("Updating apm server secret", "namespace", expectedApmServerSecret.Namespace, "secret_name", expectedApmServerSecret.Name, "as_name", as.Name) }, }, ); err != nil { return state, err } - cfg, err := config.FromResourceSpec(r.Client, *as) + reconciledConfigSecret, err := config.Reconcile(r.Client, r.scheme, as) if err != nil { return state, err } - cfgBytes, err := yaml.Marshal(cfg) + keystoreResources, err := keystore.NewResources( + r.Client, + r.recorder, + r.dynamicWatches, + as, + initContainerParameters, + ) if err != nil { return state, err } - expectedConfigSecret := &corev1.Secret{ - ObjectMeta: metav1.ObjectMeta{ - Namespace: as.Namespace, - // TODO: suffix+trim properly - Name: as.Name + "-config", - Labels: NewLabels(as.Name), - }, - Data: map[string][]byte{ - "apm-server.yml": cfgBytes, - }, - } - reconciledConfigSecret := &corev1.Secret{} - if err := reconciler.ReconcileResource( - reconciler.Params{ - Client: r.Client, - Scheme: r.scheme, - - Owner: as, - Expected: expectedConfigSecret, - Reconciled: reconciledConfigSecret, - - NeedsUpdate: func() bool { - return true - }, - UpdateReconciled: func() { - reconciledConfigSecret.Labels = expectedConfigSecret.Labels - reconciledConfigSecret.Data = expectedConfigSecret.Data - }, - PreCreate: func() { - log.Info("Creating config secret", "name", expectedConfigSecret.Name) - }, - PreUpdate: func() { - log.Info("Updating config secret", "name", expectedConfigSecret.Name) - }, - }, - ); err != nil { - return state, err - } - apmServerPodSpecParams := PodSpecParams{ Version: as.Spec.Version, CustomImageName: as.Spec.Image, @@ -289,13 +308,21 @@ func (r *ReconcileApmServer) reconcileApmServerDeployment( ApmServerSecret: *reconciledApmServerSecret, ConfigSecret: *reconciledConfigSecret, + + keystoreResources: keystoreResources, } - podSpec := NewPodSpec(apmServerPodSpecParams) + podSpec := newPodSpec(as, apmServerPodSpecParams) + + podLabels := labels.NewLabels(as.Name) - podLabels := NewLabels(as.Name) - // add the config file checksum to the pod labels so a change triggers a rolling update - podLabels[configChecksumLabelName] = fmt.Sprintf("%x", sha256.Sum224(cfgBytes)) + // Build a checksum of the configuration, add it to the pod labels so a change triggers a rolling update + configChecksum := sha256.New224() + configChecksum.Write(reconciledConfigSecret.Data[config.ApmCfgSecretKey]) + if keystoreResources != nil { + configChecksum.Write([]byte(keystoreResources.Version)) + } + podLabels[configChecksumLabelName] = fmt.Sprintf("%x", configChecksum.Sum(nil)) esCASecretName := as.Spec.Output.Elasticsearch.SSL.CertificateAuthorities.SecretName if esCASecretName != "" { @@ -305,7 +332,7 @@ func (r *ReconcileApmServer) reconcileApmServerDeployment( esCAVolume := volume.NewSecretVolumeWithMountPath( esCASecretName, "elasticsearch-certs", - "/usr/share/apm-server/config/elasticsearch-certs", + filepath.Join(ApmBaseDir, config.CertificatesDir), ) // build a checksum of the cert file used by ES, which we can use to cause the Deployment to roll the Apm Server @@ -337,12 +364,11 @@ func (r *ReconcileApmServer) reconcileApmServerDeployment( // TODO: also need to hash secret token? - deploymentLabels := NewLabels(as.Name) + deploymentLabels := labels.NewLabels(as.Name) podSpec.Labels = defaults.SetDefaultLabels(podSpec.Labels, podLabels) deploy := NewDeployment(DeploymentParams{ - // TODO: revisit naming? - Name: PseudoNamespacedResourceName(*as), + Name: apmname.Deployment(as.Name), Namespace: as.Namespace, Replicas: as.Spec.NodeCount, Selector: deploymentLabels, @@ -365,6 +391,19 @@ func (r *ReconcileApmServer) updateStatus(state State) (reconcile.Result, error) if state.ApmServer.Status.IsDegraded(current.Status) { r.recorder.Event(current, corev1.EventTypeWarning, events.EventReasonUnhealthy, "Apm Server health degraded") } - log.Info("Updating status", "iteration", atomic.LoadInt64(&r.iteration)) - return state.Result, r.Status().Update(state.ApmServer) + log.Info("Updating status", "namespace", state.ApmServer.Namespace, "as_name", state.ApmServer.Name, "iteration", atomic.LoadInt64(&r.iteration)) + err := r.Status().Update(state.ApmServer) + if err != nil && errors.IsConflict(err) { + log.V(1).Info("Conflict while updating status") + return reconcile.Result{Requeue: true}, nil + } + + return state.Result, err +} + +// finalizersFor returns the list of finalizers applying to a given APM deployment +func (r *ReconcileApmServer) finalizersFor(as apmv1alpha1.ApmServer) []finalizer.Finalizer { + return []finalizer.Finalizer{ + keystore.Finalizer(k8s.ExtractNamespacedName(&as), r.dynamicWatches, &as), + } } diff --git a/operators/pkg/controller/apmserver/config/config.go b/operators/pkg/controller/apmserver/config/config.go index 6e3366e554..a8612e1727 100644 --- a/operators/pkg/controller/apmserver/config/config.go +++ b/operators/pkg/controller/apmserver/config/config.go @@ -5,189 +5,101 @@ package config import ( - "encoding/json" "fmt" + "path/filepath" "github.com/elastic/cloud-on-k8s/operators/pkg/apis/apm/v1alpha1" + commonv1alpha1 "github.com/elastic/cloud-on-k8s/operators/pkg/apis/common/v1alpha1" + "github.com/elastic/cloud-on-k8s/operators/pkg/controller/common/association" "github.com/elastic/cloud-on-k8s/operators/pkg/controller/common/certificates" + "github.com/elastic/cloud-on-k8s/operators/pkg/controller/common/settings" "github.com/elastic/cloud-on-k8s/operators/pkg/utils/k8s" - v1 "k8s.io/api/core/v1" - "k8s.io/apimachinery/pkg/types" ) -// DefaultHTTPPort is the (default) port used by ApmServer -const DefaultHTTPPort = 8200 +const ( + // DefaultHTTPPort is the (default) port used by ApmServer + DefaultHTTPPort = 8200 -// FromResourceSpec resolves the ApmServer configuration to use based on the provided spec. -// TODO: missing test -func FromResourceSpec(c k8s.Client, as v1alpha1.ApmServer) (*Config, error) { - // TODO: consider scaling the default values provided based on the apm server resources - // these defaults are taken (without scaling) from a defaulted ECE install + // Certificates + CertificatesDir = "config/elasticsearch-certs" +) - username, password, err := getCredentials(c, as) - if err != nil { - return nil, err +// DefaultConfiguration is the default configuration of an APM server. +// These defaults are taken (without scaling) from a defaulted ECE install +// TODO: consider scaling the default values provided based on the apm server resources +var DefaultConfiguration = []byte(` +apm-server: + concurrent_requests: 1 + max_unzipped_size: 5242880 + read_timeout: 3600 + rum: + enabled: true + rate_limit: 10 + shutdown_timeout: 30s + ssl: + enabled: false +logging: + json: true + metrics.enabled: true +output: + elasticsearch: + compression_level: 5 + max_bulk_size: 267 + worker: 5 +queue: + mem: + events: 2000 + flush: + min_events: 267 + timeout: 1s +setup.template.settings.index: + auto_expand_replicas: 0-2 + number_of_replicas: 1 + number_of_shards: 1 +xpack.monitoring.enabled: true +`) + +func NewConfigFromSpec(c k8s.Client, as v1alpha1.ApmServer) (*settings.CanonicalConfig, error) { + specConfig := as.Spec.Config + if specConfig == nil { + specConfig = &commonv1alpha1.Config{} } - return &Config{ - Name: "${POD_NAME}", - ApmServer: ApmServerConfig{ - Host: fmt.Sprintf(":%d", DefaultHTTPPort), - SecretToken: "${SECRET_TOKEN}", - ReadTimeout: 3600, - ShutdownTimeout: "30s", - Rum: RumConfig{Enabled: true, RateLimit: 10}, - ConcurrentRequests: 1, - MaxUnzippedSize: 5242880, - // TODO: TLS support for the server itself - SSL: TLSConfig{ - Enabled: false, - }, - }, - XPackMonitoringEnabled: true, - - Logging: LoggingConfig{ - JSON: true, - MetricsEnabled: true, - }, - Queue: QueueConfig{ - Mem: QueueMemConfig{ - Events: 2000, - Flush: FlushConfig{ - MinEvents: 267, - Timeout: "1s", - }, - }, - }, - SetupTemplateSettingsIndex: SetupTemplateSettingsIndex{ - NumberOfShards: 1, - NumberOfReplicas: 1, - AutoExpandReplicas: "0-2", - }, - Output: OutputConfig{ - Elasticsearch: ElasticsearchOutputConfig{ - Worker: 5, - MaxBulkSize: 267, - CompressionLevel: 5, - Hosts: as.Spec.Output.Elasticsearch.Hosts, - Username: username, - Password: password, - // TODO: optional TLS - SSL: TLSConfig{ - Enabled: true, - // TODO: hardcoded path - CertificateAuthorities: []string{"config/elasticsearch-certs/" + certificates.CertFileName}, - }, - // TODO: include indices? or will they be defaulted fine? - }, - }, - }, nil -} - -func getCredentials(c k8s.Client, as v1alpha1.ApmServer) (username, password string, err error) { - auth := as.Spec.Output.Elasticsearch.Auth - - if auth.Inline != nil { - return auth.Inline.Username, auth.Inline.Password, nil + userSettings, err := settings.NewCanonicalConfigFrom(specConfig.Data) + if err != nil { + return nil, err } - // if auth is provided via a secret, resolve credentials from it. - if auth.SecretKeyRef != nil { - secretObjKey := types.NamespacedName{Namespace: as.Namespace, Name: auth.SecretKeyRef.Name} - var secret v1.Secret - if err := c.Get(secretObjKey, &secret); err != nil { - return "", "", err - } - return auth.SecretKeyRef.Key, string(secret.Data[auth.SecretKeyRef.Key]), nil + // Get username and password + username, password, err := association.ElasticsearchAuthSettings(c, &as) + if err != nil { + return nil, err } - // no authentication method provided, return an empty credential - return "", "", nil -} - -type Config struct { - Name string `json:"name,omitempty"` - ApmServer ApmServerConfig `json:"apm-server,omitempty"` - XPackMonitoringEnabled bool `json:"xpack.monitoring.enabled,omitempty"` - Logging LoggingConfig `json:"logging,omitempty"` - Queue QueueConfig `json:"queue,omitempty"` - Output OutputConfig `json:"output,omitempty"` - SetupTemplateSettingsIndex SetupTemplateSettingsIndex `json:"setup.template.settings.index,omitempty"` -} - -type OutputConfig struct { - Elasticsearch ElasticsearchOutputConfig `json:"elasticsearch,omitempty"` - // TODO support other outputs. -} - -type SetupTemplateSettingsIndex struct { - NumberOfShards int `json:"number_of_shards,omitempty"` - NumberOfReplicas int `json:"number_of_replicas,omitempty"` - AutoExpandReplicas string `json:"auto_expand_replicas,omitempty"` -} - -type ApmServerConfig struct { - Host string `json:"host,omitempty"` - ReadTimeout int `json:"read_timeout,omitempty"` - ShutdownTimeout string `json:"shutdown_timeout,omitempty"` - SecretToken string `json:"secret_token,omitempty"` - SSL TLSConfig `json:"ssl,omitempty"` - Rum RumConfig `json:"rum,omitempty"` - ConcurrentRequests int `json:"concurrent_requests,omitempty"` - MaxUnzippedSize int `json:"max_unzipped_size,omitempty"` -} - -type RumConfig struct { - Enabled bool `json:"enabled,omitempty"` - RateLimit int `json:"rate_limit,omitempty"` -} - -type TLSConfig struct { - Enabled bool `json:"enabled"` - Certificate string `json:"certificate,omitempty"` - Key string `json:"key,omitempty"` - CertificateAuthorities []string `json:"certificate_authorities,omitempty"` -} + // Create a base configuration. + cfg := settings.MustCanonicalConfig(map[string]interface{}{ + "apm-server.host": fmt.Sprintf(":%d", DefaultHTTPPort), + "apm-server.secret_token": "${SECRET_TOKEN}", + }) -type LoggingConfig struct { - Level string `json:"level,omitempty"` - ToFiles bool `json:"to_files,omitempty"` - JSON bool `json:"json,omitempty"` - MetricsEnabled bool `json:"metrics.enabled,omitempty"` -} - -type LoggingFilesConfig struct { - Path string `json:"path,omitempty"` - Name string `json:"name,omitempty"` - Keepfiles int `json:"keepfiles,omitempty"` -} - -type LoggingMetricsConfig struct { - Enabled bool `json:"enabled,omitempty"` -} - -type QueueConfig struct { - Mem QueueMemConfig `json:"mem,omitempty"` -} - -type QueueMemConfig struct { - Events int `json:"events,omitempty"` - Flush FlushConfig `json:"flush,omitempty"` -} - -type FlushConfig struct { - MinEvents int `json:"min_events,omitempty"` - Timeout string `json:"timeout,omitempty"` -} + // Build the default configuration + defaultCfg, err := settings.ParseConfig(DefaultConfiguration) + if err != nil { + return nil, err + } -type ElasticsearchOutputConfig struct { - Hosts []string `json:"hosts,omitempty"` - SSL TLSConfig `json:"ssl,omitempty"` - Username string `json:"username,omitempty"` - Password string `json:"password,omitempty"` - Headers map[string]string `json:"headers,omitempty"` - Worker int `json:"worker,omitempty"` - MaxBulkSize int `json:"max_bulk_size,omitempty"` - CompressionLevel int `json:"compression_level,omitempty"` - Indices []json.RawMessage `json:"indices,omitempty"` + // Merge the configuration with userSettings last so they take precedence. + err = cfg.MergeWith( + defaultCfg, + settings.MustCanonicalConfig( + map[string]interface{}{ + "output.elasticsearch.hosts": as.Spec.Output.Elasticsearch.Hosts, + "output.elasticsearch.username": username, + "output.elasticsearch.password": password, + "output.elasticsearch.ssl.certificate_authorities": []string{filepath.Join(CertificatesDir, certificates.CertFileName)}, + }, + ), + userSettings, + ) + return cfg, nil } diff --git a/operators/pkg/controller/apmserver/config/reconcile.go b/operators/pkg/controller/apmserver/config/reconcile.go new file mode 100644 index 0000000000..5557c0030a --- /dev/null +++ b/operators/pkg/controller/apmserver/config/reconcile.go @@ -0,0 +1,81 @@ +// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +// or more contributor license agreements. Licensed under the Elastic License; +// you may not use this file except in compliance with the Elastic License. + +package config + +import ( + "reflect" + + "github.com/elastic/cloud-on-k8s/operators/pkg/apis/apm/v1alpha1" + "github.com/elastic/cloud-on-k8s/operators/pkg/controller/apmserver/labels" + "github.com/elastic/cloud-on-k8s/operators/pkg/controller/apmserver/name" + "github.com/elastic/cloud-on-k8s/operators/pkg/controller/common/reconciler" + "github.com/elastic/cloud-on-k8s/operators/pkg/utils/k8s" + corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/runtime" + logf "sigs.k8s.io/controller-runtime/pkg/runtime/log" +) + +const ApmCfgSecretKey = "apm-server.yml" + +var log = logf.Log.WithName("apmserver-config") + +// Reconcile reconciles the configuration of the APM server: it first creates the configuration from the APM +// specification and then reconcile the underlying secret. +func Reconcile(client k8s.Client, scheme *runtime.Scheme, as *v1alpha1.ApmServer) (*corev1.Secret, error) { + + // Create a new configuration from the APM object spec. + cfg, err := NewConfigFromSpec(client, *as) + if err != nil { + return nil, err + } + + cfgBytes, err := cfg.Render() + if err != nil { + return nil, err + } + + // Reconcile the configuration in a secret + expectedConfigSecret := &corev1.Secret{ + ObjectMeta: metav1.ObjectMeta{ + Namespace: as.Namespace, + Name: name.Config(as.Name), + Labels: labels.NewLabels(as.Name), + }, + Data: map[string][]byte{ + ApmCfgSecretKey: cfgBytes, + }, + } + + reconciledConfigSecret := &corev1.Secret{} + if err := reconciler.ReconcileResource( + reconciler.Params{ + Client: client, + Scheme: scheme, + + Owner: as, + Expected: expectedConfigSecret, + Reconciled: reconciledConfigSecret, + + NeedsUpdate: func() bool { + return !reflect.DeepEqual(reconciledConfigSecret.Data, expectedConfigSecret.Data) || + !reflect.DeepEqual(reconciledConfigSecret.Labels, expectedConfigSecret.Labels) + }, + UpdateReconciled: func() { + reconciledConfigSecret.Labels = expectedConfigSecret.Labels + reconciledConfigSecret.Data = expectedConfigSecret.Data + }, + PreCreate: func() { + log.Info("Creating config secret", "name", expectedConfigSecret.Name) + }, + PreUpdate: func() { + log.Info("Updating config secret", "name", expectedConfigSecret.Name) + }, + }, + ); err != nil { + return nil, err + } + return reconciledConfigSecret, nil +} diff --git a/operators/pkg/controller/apmserver/deployment.go b/operators/pkg/controller/apmserver/deployment.go deleted file mode 100644 index 3227359f22..0000000000 --- a/operators/pkg/controller/apmserver/deployment.go +++ /dev/null @@ -1,14 +0,0 @@ -// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one -// or more contributor license agreements. Licensed under the Elastic License; -// you may not use this file except in compliance with the Elastic License. - -package apmserver - -import ( - "github.com/elastic/cloud-on-k8s/operators/pkg/apis/apm/v1alpha1" - "github.com/elastic/cloud-on-k8s/operators/pkg/utils/stringsutil" -) - -func PseudoNamespacedResourceName(as v1alpha1.ApmServer) string { - return stringsutil.Concat(as.Name, "-apm-server") -} diff --git a/operators/pkg/controller/apmserver/deployment_control.go b/operators/pkg/controller/apmserver/deployment_control.go index c6d421818d..4bf7975f99 100644 --- a/operators/pkg/controller/apmserver/deployment_control.go +++ b/operators/pkg/controller/apmserver/deployment_control.go @@ -63,10 +63,9 @@ func (r *ReconcileApmServer) ReconcileDeployment(expected appsv1.Deployment, own !reflect.DeepEqual(expected.Spec.Template.ObjectMeta, reconciled.Spec.Template.ObjectMeta) || !reflect.DeepEqual(expected.Spec.Template.Spec.Containers[0].Name, reconciled.Spec.Template.Spec.Containers[0].Name) || !reflect.DeepEqual(expected.Spec.Template.Spec.Containers[0].Env, reconciled.Spec.Template.Spec.Containers[0].Env) || - !reflect.DeepEqual(expected.Spec.Template.Spec.Containers[0].Image, reconciled.Spec.Template.Spec.Containers[0].Image) - // TODO: do something better than reflect.DeepEqual above? - // TODO: containers[0] is a bit flaky - // TODO: technically not only the Spec may be different, but deployment labels etc. + !reflect.DeepEqual(expected.Spec.Template.Spec.Containers[0].Image, reconciled.Spec.Template.Spec.Containers[0].Image) || + !reflect.DeepEqual(expected.Spec.Template.Spec.InitContainers, reconciled.Spec.Template.Spec.InitContainers) + // TODO: use a hash }, UpdateReconciled: func() { // Update the found object and write the result back if there are any changes diff --git a/operators/pkg/controller/apmserver/labels.go b/operators/pkg/controller/apmserver/labels/labels.go similarity index 97% rename from operators/pkg/controller/apmserver/labels.go rename to operators/pkg/controller/apmserver/labels/labels.go index 7af9b5622e..296d68e71e 100644 --- a/operators/pkg/controller/apmserver/labels.go +++ b/operators/pkg/controller/apmserver/labels/labels.go @@ -2,7 +2,7 @@ // or more contributor license agreements. Licensed under the Elastic License; // you may not use this file except in compliance with the Elastic License. -package apmserver +package labels import "github.com/elastic/cloud-on-k8s/operators/pkg/controller/common" diff --git a/operators/pkg/controller/apmserver/name/name.go b/operators/pkg/controller/apmserver/name/name.go new file mode 100644 index 0000000000..0a5ed03ef3 --- /dev/null +++ b/operators/pkg/controller/apmserver/name/name.go @@ -0,0 +1,43 @@ +// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +// or more contributor license agreements. Licensed under the Elastic License; +// you may not use this file except in compliance with the Elastic License. + +package name + +import ( + common_name "github.com/elastic/cloud-on-k8s/operators/pkg/controller/common/name" +) + +const ( + // APM name, used as prefix, is limited to 36 characters, + MaxAPMNameLength = 36 + // this leaves common_name.MaxNameLength - 36 characters for a suffix. + MaxSuffixLength = common_name.MaxNameLength - MaxAPMNameLength + + secretTokenSuffix = "token" + httpServiceSuffix = "http" + configSuffix = "config" + deploymentSuffix = "server" +) + +// KBNamer is a Namer that is configured with the defaults for resources related to an APM resource. +var APMNamer = common_name.Namer{ + MaxSuffixLength: MaxSuffixLength, + DefaultSuffixes: []string{"apm"}, +} + +func SecretToken(apmName string) string { + return APMNamer.Suffix(apmName, secretTokenSuffix) +} + +func HTTPService(apmName string) string { + return APMNamer.Suffix(apmName, httpServiceSuffix) +} + +func Deployment(apmName string) string { + return APMNamer.Suffix(apmName, deploymentSuffix) +} + +func Config(apmName string) string { + return APMNamer.Suffix(apmName, configSuffix) +} diff --git a/operators/pkg/controller/apmserver/pod.go b/operators/pkg/controller/apmserver/pod.go index 682485fef9..1fc57ba92e 100644 --- a/operators/pkg/controller/apmserver/pod.go +++ b/operators/pkg/controller/apmserver/pod.go @@ -5,14 +5,17 @@ package apmserver import ( - corev1 "k8s.io/api/core/v1" - "k8s.io/apimachinery/pkg/util/intstr" + "path/filepath" + "strings" "github.com/elastic/cloud-on-k8s/operators/pkg/apis/apm/v1alpha1" "github.com/elastic/cloud-on-k8s/operators/pkg/controller/apmserver/config" + "github.com/elastic/cloud-on-k8s/operators/pkg/controller/common/association/keystore" "github.com/elastic/cloud-on-k8s/operators/pkg/controller/common/defaults" "github.com/elastic/cloud-on-k8s/operators/pkg/controller/common/volume" "github.com/elastic/cloud-on-k8s/operators/pkg/utils/stringsutil" + corev1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/util/intstr" ) const ( @@ -22,6 +25,9 @@ const ( defaultImageRepositoryAndName string = "docker.elastic.co/apm/apm-server" SecretTokenKey string = "secret-token" + + DataVolumePath = ApmBaseDir + "/data" + ConfigVolumePath = ApmBaseDir + "/config" ) var readinessProbe = corev1.Probe{ @@ -50,7 +56,7 @@ var command = []string{ "-c", "config/config-secret/apm-server.yml", } -var configVolume = volume.NewEmptyDirVolume("config-volume", "/usr/share/apm-server/config") +var configVolume = volume.NewEmptyDirVolume("config-volume", ConfigVolumePath) type PodSpecParams struct { Version string @@ -60,17 +66,19 @@ type PodSpecParams struct { ApmServerSecret corev1.Secret ConfigSecret corev1.Secret + + keystoreResources *keystore.Resources } func imageWithVersion(image string, version string) string { return stringsutil.Concat(image, ":", version) } -func NewPodSpec(p PodSpecParams) corev1.PodTemplateSpec { +func newPodSpec(as *v1alpha1.ApmServer, p PodSpecParams) corev1.PodTemplateSpec { configSecretVolume := volume.NewSecretVolumeWithMountPath( p.ConfigSecret.Name, "config", - "/usr/share/apm-server/config/config-secret", + filepath.Join(ConfigVolumePath, "config-secret"), ) env := []corev1.EnvVar{ @@ -91,7 +99,7 @@ func NewPodSpec(p PodSpecParams) corev1.PodTemplateSpec { }, } - return defaults.NewPodTemplateBuilder( + builder := defaults.NewPodTemplateBuilder( p.PodTemplate, v1alpha1.APMServerContainerName). WithDockerImage(p.CustomImageName, imageWithVersion(defaultImageRepositoryAndName, p.Version)). WithReadinessProbe(readinessProbe). @@ -99,6 +107,18 @@ func NewPodSpec(p PodSpecParams) corev1.PodTemplateSpec { WithCommand(command). WithVolumes(configVolume.Volume(), configSecretVolume.Volume()). WithVolumeMounts(configVolume.VolumeMount(), configSecretVolume.VolumeMount()). - WithEnv(env...). - PodTemplate + WithEnv(env...) + + if p.keystoreResources != nil { + dataVolume := keystore.DataVolume( + strings.ToLower(as.Kind), + DataVolumePath, + ) + builder.WithInitContainers(p.keystoreResources.InitContainer). + WithVolumes(p.keystoreResources.Volume, dataVolume.Volume()). + WithVolumeMounts(dataVolume.VolumeMount()). + WithInitContainerDefaults() + } + + return builder.PodTemplate } diff --git a/operators/pkg/controller/apmserver/pod_test.go b/operators/pkg/controller/apmserver/pod_test.go index 5a5a8af3ce..3a087045a0 100644 --- a/operators/pkg/controller/apmserver/pod_test.go +++ b/operators/pkg/controller/apmserver/pod_test.go @@ -8,11 +8,10 @@ import ( "reflect" "testing" - corev1 "k8s.io/api/core/v1" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "github.com/elastic/cloud-on-k8s/operators/pkg/apis/apm/v1alpha1" "github.com/elastic/cloud-on-k8s/operators/pkg/controller/common/volume" + corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" ) func TestNewPodSpec(t *testing.T) { @@ -24,11 +23,21 @@ func TestNewPodSpec(t *testing.T) { varFalse := false tests := []struct { name string + as v1alpha1.ApmServer p PodSpecParams want corev1.PodTemplateSpec }{ { name: "create default pod spec", + as: v1alpha1.ApmServer{ + TypeMeta: metav1.TypeMeta{ + Kind: "ApmServer", + }, + ObjectMeta: metav1.ObjectMeta{ + Name: "fake-apm", + Namespace: "default", + }, + }, p: PodSpecParams{ Version: "7.0.1", ConfigSecret: corev1.Secret{ @@ -84,7 +93,7 @@ func TestNewPodSpec(t *testing.T) { } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - if got := NewPodSpec(tt.p); !reflect.DeepEqual(got, tt.want) { + if got := newPodSpec(&tt.as, tt.p); !reflect.DeepEqual(got, tt.want) { t.Errorf("NewPodSpec() = %v, want %v", got, tt.want) } }) diff --git a/operators/pkg/controller/apmserver/services.go b/operators/pkg/controller/apmserver/services.go index 5ffe36a42c..3bb720abf6 100644 --- a/operators/pkg/controller/apmserver/services.go +++ b/operators/pkg/controller/apmserver/services.go @@ -6,8 +6,9 @@ package apmserver import ( "github.com/elastic/cloud-on-k8s/operators/pkg/apis/apm/v1alpha1" + "github.com/elastic/cloud-on-k8s/operators/pkg/controller/apmserver/labels" + apmname "github.com/elastic/cloud-on-k8s/operators/pkg/controller/apmserver/name" "github.com/elastic/cloud-on-k8s/operators/pkg/controller/common/defaults" - corev1 "k8s.io/api/core/v1" ) @@ -18,9 +19,9 @@ func NewService(as v1alpha1.ApmServer) *corev1.Service { } svc.ObjectMeta.Namespace = as.Namespace - svc.ObjectMeta.Name = PseudoNamespacedResourceName(as) + svc.ObjectMeta.Name = apmname.HTTPService(as.Name) - labels := NewLabels(as.Name) + labels := labels.NewLabels(as.Name) ports := []corev1.ServicePort{ { Protocol: corev1.ProtocolTCP, diff --git a/operators/pkg/controller/apmserver/state.go b/operators/pkg/controller/apmserver/state.go index f6b82273ea..64ba2f9fb5 100644 --- a/operators/pkg/controller/apmserver/state.go +++ b/operators/pkg/controller/apmserver/state.go @@ -43,3 +43,11 @@ func (s State) UpdateApmServerState(deployment v1.Deployment, apmServerSecret co func (s State) UpdateApmServerExternalService(svc corev1.Service) { s.ApmServer.Status.ExternalService = svc.Name } + +func (s *State) UpdateApmServerControllerVersion(version string) { + s.ApmServer.Status.ControllerVersion = version +} + +func (s *State) GetApmServerControllerVersion() string { + return s.ApmServer.Status.ControllerVersion +} diff --git a/operators/pkg/controller/apmserverelasticsearchassociation/apmserverelasticsearchassociation_controller.go b/operators/pkg/controller/apmserverelasticsearchassociation/apmserverelasticsearchassociation_controller.go index a8512d2eef..b03e6f6886 100644 --- a/operators/pkg/controller/apmserverelasticsearchassociation/apmserverelasticsearchassociation_controller.go +++ b/operators/pkg/controller/apmserverelasticsearchassociation/apmserverelasticsearchassociation_controller.go @@ -111,9 +111,9 @@ func (r *ReconcileApmServerElasticsearchAssociation) Reconcile(request reconcile // atomically update the iteration to support concurrent runs. currentIteration := atomic.AddInt64(&r.iteration, 1) iterationStartTime := time.Now() - log.Info("Start reconcile iteration", "iteration", currentIteration) + log.Info("Start reconcile iteration", "iteration", currentIteration, "namespace", request.Namespace, "as_name", request.Name) defer func() { - log.Info("End reconcile iteration", "iteration", currentIteration, "took", time.Since(iterationStartTime)) + log.Info("End reconcile iteration", "iteration", currentIteration, "took", time.Since(iterationStartTime), "namespace", request.Namespace, "as_name", request.Name) }() var apmServer apmtype.ApmServer @@ -127,7 +127,7 @@ func (r *ReconcileApmServerElasticsearchAssociation) Reconcile(request reconcile } if common.IsPaused(apmServer.ObjectMeta) { - log.Info("Paused : skipping reconciliation", "iteration", currentIteration) + log.Info("Object is paused. Skipping reconciliation", "namespace", apmServer.Namespace, "as_name", apmServer.Name, "iteration", currentIteration) return common.PauseRequeue, nil } @@ -247,14 +247,14 @@ func (r *ReconcileApmServerElasticsearchAssociation) reconcileInternal(apmServer // TODO: this is a bit rough if !reflect.DeepEqual(apmServer.Spec.Output.Elasticsearch, expectedEsConfig) { apmServer.Spec.Output.Elasticsearch = expectedEsConfig - log.Info("Updating Apm Server spec with Elasticsearch output configuration") + log.Info("Updating Apm Server spec with Elasticsearch output configuration", "namespace", apmServer.Namespace, "as_name", apmServer.Name) if err := r.Update(&apmServer); err != nil { return commonv1alpha1.AssociationPending, err } } if err := deleteOrphanedResources(r, apmServer); err != nil { - log.Error(err, "Error while trying to delete orphaned resources. Continuing.") + log.Error(err, "Error while trying to delete orphaned resources. Continuing.", "namespace", apmServer.Namespace, "as_name", apmServer.Name) } return commonv1alpha1.AssociationEstablished, nil @@ -274,7 +274,7 @@ func deleteOrphanedResources(c k8s.Client, apm apmtype.ApmServer) error { for _, s := range secrets.Items { controlledBy := metav1.IsControlledBy(&s, &apm) if controlledBy && !apm.Spec.Output.Elasticsearch.ElasticsearchRef.IsDefined() { - log.Info("Deleting", "secret", k8s.ExtractNamespacedName(&s)) + log.Info("Deleting secret", "namespace", s.Namespace, "secret_name", s.Name, "as_name", apm.Name) if err := c.Delete(&s); err != nil { return err } diff --git a/operators/pkg/controller/apmserverelasticsearchassociation/user.go b/operators/pkg/controller/apmserverelasticsearchassociation/user.go index e0fd4a3c62..e5c8af5704 100644 --- a/operators/pkg/controller/apmserverelasticsearchassociation/user.go +++ b/operators/pkg/controller/apmserverelasticsearchassociation/user.go @@ -9,7 +9,7 @@ import ( apmtype "github.com/elastic/cloud-on-k8s/operators/pkg/apis/apm/v1alpha1" "github.com/elastic/cloud-on-k8s/operators/pkg/apis/elasticsearch/v1alpha1" - "github.com/elastic/cloud-on-k8s/operators/pkg/controller/apmserver" + "github.com/elastic/cloud-on-k8s/operators/pkg/controller/apmserver/labels" "github.com/elastic/cloud-on-k8s/operators/pkg/controller/common/reconciler" common "github.com/elastic/cloud-on-k8s/operators/pkg/controller/common/user" "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/label" @@ -73,7 +73,7 @@ func reconcileEsUser(c k8s.Client, s *runtime.Scheme, apm apmtype.ApmServer, es // TODO: more flexible user-name (suffixed-trimmed?) so multiple associations do not conflict pw := common.RandomPasswordBytes() // the secret will be on the Apm side of the association so we are applying the Apm labels here - secretLabels := apmserver.NewLabels(apm.Name) + secretLabels := labels.NewLabels(apm.Name) secretLabels[AssociationLabelName] = apm.Name // add ES labels for k, v := range label.NewLabels(apm.Spec.Output.Elasticsearch.ElasticsearchRef.NamespacedName()) { diff --git a/operators/pkg/controller/common/annotation/pod.go b/operators/pkg/controller/common/annotation/pod.go index 5896343fd9..44282b5bc1 100644 --- a/operators/pkg/controller/common/annotation/pod.go +++ b/operators/pkg/controller/common/annotation/pod.go @@ -15,6 +15,7 @@ import ( ) const ( + // UpdateAnnotation is the name of the annotation applied to pods to force kubelet to resync secrets UpdateAnnotation = "update.k8s.elastic.co/timestamp" ) @@ -52,10 +53,10 @@ func MarkPodAsUpdated( pod corev1.Pod, ) { log.V(1).Info( - "Update annotation on pod", + "Updating annotation on pod", "annotation", UpdateAnnotation, "namespace", pod.Namespace, - "pod", pod.Name, + "pod_name", pod.Name, ) if pod.Annotations == nil { pod.Annotations = map[string]string{} @@ -64,12 +65,13 @@ func MarkPodAsUpdated( time.Now().Format(time.RFC3339Nano) // nano should be enough to avoid collisions and keep it readable by a human. if err := c.Update(&pod); err != nil { if errors.IsConflict(err) { - log.V(1).Info("Conflict while updating pod annotation") + // Conflicts are expected and will be handled on the next reconcile loop, no need to error out here + log.V(1).Info("Conflict while updating pod annotation", "namespace", pod.Namespace, "pod_name", pod.Name) } else { log.Error(err, "failed to update pod annotation", "annotation", UpdateAnnotation, "namespace", pod.Namespace, - "pod", pod.Name) + "pod_name", pod.Name) } } } diff --git a/operators/pkg/controller/common/association/association.go b/operators/pkg/controller/common/association/association.go new file mode 100644 index 0000000000..ccaee86588 --- /dev/null +++ b/operators/pkg/controller/common/association/association.go @@ -0,0 +1,37 @@ +// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +// or more contributor license agreements. Licensed under the Elastic License; +// you may not use this file except in compliance with the Elastic License. + +package association + +import ( + "github.com/elastic/cloud-on-k8s/operators/pkg/apis/common/v1alpha1" + "github.com/elastic/cloud-on-k8s/operators/pkg/utils/k8s" + v1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/types" +) + +// ElasticsearchAuthSettings returns the user and the password to be used by an associated object to authenticate +// against an Elasticsearch cluster. +func ElasticsearchAuthSettings( + c k8s.Client, + associated v1alpha1.Associated, +) (username, password string, err error) { + auth := associated.ElasticsearchAuth() + if auth.Inline != nil { + return auth.Inline.Username, auth.Inline.Password, nil + } + + // if auth is provided via a secret, resolve credentials from it. + if auth.SecretKeyRef != nil { + secretObjKey := types.NamespacedName{Namespace: associated.GetNamespace(), Name: auth.SecretKeyRef.Name} + var secret v1.Secret + if err := c.Get(secretObjKey, &secret); err != nil { + return "", "", err + } + return auth.SecretKeyRef.Key, string(secret.Data[auth.SecretKeyRef.Key]), nil + } + + // no authentication method provided, return an empty credential + return "", "", nil +} diff --git a/operators/pkg/controller/apmserver/config/config_test.go b/operators/pkg/controller/common/association/association_test.go similarity index 64% rename from operators/pkg/controller/apmserver/config/config_test.go rename to operators/pkg/controller/common/association/association_test.go index f0e61ccddf..82d34803fb 100644 --- a/operators/pkg/controller/apmserver/config/config_test.go +++ b/operators/pkg/controller/common/association/association_test.go @@ -2,18 +2,24 @@ // or more contributor license agreements. Licensed under the Elastic License; // you may not use this file except in compliance with the Elastic License. -package config +package association import ( "testing" "github.com/elastic/cloud-on-k8s/operators/pkg/apis/apm/v1alpha1" + commonv1alpha1 "github.com/elastic/cloud-on-k8s/operators/pkg/apis/common/v1alpha1" "github.com/elastic/cloud-on-k8s/operators/pkg/utils/k8s" corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "sigs.k8s.io/controller-runtime/pkg/client/fake" ) +var elasticsearhInlineAuth = commonv1alpha1.ElasticsearchInlineAuth{ + Username: "foo_username", + Password: "foo_password", +} + func Test_getCredentials(t *testing.T) { type args struct { c k8s.Client @@ -45,7 +51,7 @@ func Test_getCredentials(t *testing.T) { Output: v1alpha1.Output{ Elasticsearch: v1alpha1.ElasticsearchOutput{ Hosts: []string{"https://elasticsearch-sample-es-http.default.svc:9200"}, - Auth: v1alpha1.ElasticsearchAuth{ + Auth: commonv1alpha1.ElasticsearchAuth{ SecretKeyRef: &corev1.SecretKeySelector{ Key: "elastic-internal-apm", LocalObjectReference: corev1.LocalObjectReference{ @@ -61,10 +67,40 @@ func Test_getCredentials(t *testing.T) { wantUsername: "elastic-internal-apm", wantPassword: "a2s1Nmt0N3Nwdmg4cmpqdDlucWhsN3cy", }, + { + name: "Test inline credentials", + args: args{ + c: k8s.WrapClient(fake.NewFakeClient(&corev1.Secret{ + ObjectMeta: metav1.ObjectMeta{ + Name: "apmelasticsearchassociation-sample-elastic-internal-apm", + Namespace: "default", + }, + Data: map[string][]byte{"elastic-internal-apm": []byte("a2s1Nmt0N3Nwdmg4cmpqdDlucWhsN3cy")}, + })), + as: v1alpha1.ApmServer{ + ObjectMeta: metav1.ObjectMeta{ + Name: "apm-server-sample", + Namespace: "default", + }, + Spec: v1alpha1.ApmServerSpec{ + Output: v1alpha1.Output{ + Elasticsearch: v1alpha1.ElasticsearchOutput{ + Hosts: []string{"https://elasticsearch-sample-es-http.default.svc:9200"}, + Auth: commonv1alpha1.ElasticsearchAuth{ + Inline: &elasticsearhInlineAuth, + }, + }, + }, + }, + }, + }, + wantUsername: "foo_username", + wantPassword: "foo_password", + }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - gotUsername, gotPassword, err := getCredentials(tt.args.c, tt.args.as) + gotUsername, gotPassword, err := ElasticsearchAuthSettings(tt.args.c, &tt.args.as) if (err != nil) != tt.wantErr { t.Errorf("getCredentials() error = %v, wantErr %v", err, tt.wantErr) return diff --git a/operators/pkg/controller/kibana/securesettings/initcontainer.go b/operators/pkg/controller/common/association/keystore/initcontainer.go similarity index 50% rename from operators/pkg/controller/kibana/securesettings/initcontainer.go rename to operators/pkg/controller/common/association/keystore/initcontainer.go index 9ab87f42da..18d4000850 100644 --- a/operators/pkg/controller/kibana/securesettings/initcontainer.go +++ b/operators/pkg/controller/common/association/keystore/initcontainer.go @@ -2,11 +2,13 @@ // or more contributor license agreements. Licensed under the Elastic License; // you may not use this file except in compliance with the Elastic License. -package securesettings +package keystore import ( + "bytes" + "text/template" + "github.com/elastic/cloud-on-k8s/operators/pkg/controller/common/volume" - kbvolume "github.com/elastic/cloud-on-k8s/operators/pkg/controller/kibana/volume" corev1 "k8s.io/api/core/v1" ) @@ -14,32 +16,55 @@ const ( InitContainerName = "init-keystore" ) -// script is a small bash script to create a Kibana keystore, +// InitContainerParameters helps to create a valid keystore init script for Kibana or the APM server. +type InitContainerParameters struct { + // Where the user provided secured settings should be mounted + SecureSettingsVolumeMountPath string + // Where the data will be copied + DataVolumePath string + // Keystore add command + KeystoreAddCommand string + // Keystore create command + KeystoreCreateCommand string +} + +// script is a small bash script to create a Kibana or APM keystore, // then add all entries from the secure settings secret volume into it. const script = `#!/usr/bin/env bash -set -eu +set -eux -echo "Initializing Kibana keystore." +echo "Initializing keystore." # create a keystore in the default data path -./bin/kibana-keystore create +{{ .KeystoreCreateCommand }} # add all existing secret entries into it -for filename in ` + kbvolume.SecureSettingsVolumeMountPath + `/*; do +for filename in {{ .SecureSettingsVolumeMountPath }}/*; do [[ -e "$filename" ]] || continue # glob does not match key=$(basename "$filename") echo "Adding "$key" to the keystore." - ./bin/kibana-keystore add "$key" --stdin < "$filename" + {{ .KeystoreAddCommand }} "$key" --stdin < "$filename" done echo "Keystore initialization successful." ` +var scriptTemplate = template.Must(template.New("").Parse(script)) + // initContainer returns an init container that executes a bash script -// to create the Kibana Keystore. -func initContainer(secureSettingsSecret volume.SecretVolume) corev1.Container { +// to create the APM Keystore. +func initContainer( + secureSettingsSecret volume.SecretVolume, + volumePrefix string, + parameters InitContainerParameters, +) (corev1.Container, error) { privileged := false + tplBuffer := bytes.Buffer{} + if err := scriptTemplate.Execute(&tplBuffer, parameters); err != nil { + return corev1.Container{}, err + } + return corev1.Container{ // Image will be inherited from pod template defaults Kibana Docker image ImagePullPolicy: corev1.PullIfNotPresent, @@ -47,12 +72,12 @@ func initContainer(secureSettingsSecret volume.SecretVolume) corev1.Container { SecurityContext: &corev1.SecurityContext{ Privileged: &privileged, }, - Command: []string{"/usr/bin/env", "bash", "-c", script}, + Command: []string{"/usr/bin/env", "bash", "-c", tplBuffer.String()}, VolumeMounts: []corev1.VolumeMount{ // access secure settings secureSettingsSecret.VolumeMount(), - // write the keystore in Kibana data volume - kbvolume.KibanaDataVolume.VolumeMount(), + // write the keystore in the data volume + DataVolume(volumePrefix, parameters.DataVolumePath).VolumeMount(), }, - } + }, nil } diff --git a/operators/pkg/controller/common/association/keystore/resources.go b/operators/pkg/controller/common/association/keystore/resources.go new file mode 100644 index 0000000000..e114b7fa86 --- /dev/null +++ b/operators/pkg/controller/common/association/keystore/resources.go @@ -0,0 +1,64 @@ +// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +// or more contributor license agreements. Licensed under the Elastic License; +// you may not use this file except in compliance with the Elastic License. + +package keystore + +import ( + "strings" + + commonv1alpha1 "github.com/elastic/cloud-on-k8s/operators/pkg/apis/common/v1alpha1" + "github.com/elastic/cloud-on-k8s/operators/pkg/controller/common/watches" + "github.com/elastic/cloud-on-k8s/operators/pkg/utils/k8s" + corev1 "k8s.io/api/core/v1" + "k8s.io/client-go/tools/record" + logf "sigs.k8s.io/controller-runtime/pkg/runtime/log" +) + +var log = logf.Log.WithName("keystore") + +// Resources holds all the resources needed to create a keystore in Kibana or in the APM server. +type Resources struct { + // volume which contains the keystore data as provided by the user + Volume corev1.Volume + // init container used to create the keystore + InitContainer corev1.Container + // version of the secret provided by the user + Version string +} + +// NewResources optionally returns a volume and init container to include in pods, +// in order to create a Keystore from secure settings referenced in the Kibana spec. +func NewResources( + c k8s.Client, + recorder record.EventRecorder, + watches watches.DynamicWatches, + associated commonv1alpha1.Associated, + initContainerParams InitContainerParameters, +) (*Resources, error) { + // setup a volume from the user-provided secure settings secret + secretVolume, version, err := secureSettingsVolume(c, recorder, watches, associated) + if err != nil { + return nil, err + } + if secretVolume == nil { + // nothing to do + return nil, nil + } + + // build an init container to create Kibana keystore from the secure settings volume + initContainer, err := initContainer( + *secretVolume, + strings.ToLower(associated.GetObjectKind().GroupVersionKind().Kind), + initContainerParams, + ) + if err != nil { + return nil, err + } + + return &Resources{ + Volume: secretVolume.Volume(), + InitContainer: initContainer, + Version: version, + }, nil +} diff --git a/operators/pkg/controller/common/association/keystore/resources_test.go b/operators/pkg/controller/common/association/keystore/resources_test.go new file mode 100644 index 0000000000..7b4c502dde --- /dev/null +++ b/operators/pkg/controller/common/association/keystore/resources_test.go @@ -0,0 +1,154 @@ +// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +// or more contributor license agreements. Licensed under the Elastic License; +// you may not use this file except in compliance with the Elastic License. + +package keystore + +import ( + "testing" + + commonv1alpha1 "github.com/elastic/cloud-on-k8s/operators/pkg/apis/common/v1alpha1" + "github.com/elastic/cloud-on-k8s/operators/pkg/apis/kibana/v1alpha1" + watches2 "github.com/elastic/cloud-on-k8s/operators/pkg/controller/common/watches" + "github.com/elastic/cloud-on-k8s/operators/pkg/utils/k8s" + "github.com/magiconair/properties/assert" + "github.com/stretchr/testify/require" + corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/client-go/kubernetes/scheme" + "k8s.io/client-go/tools/record" + "sigs.k8s.io/controller-runtime/pkg/client/fake" +) + +var ( + initContainersParameters = InitContainerParameters{ + KeystoreCreateCommand: "/keystore/bin/keystore create", + KeystoreAddCommand: "/keystore/bin/keystore add", + SecureSettingsVolumeMountPath: "/foo/secret", + DataVolumePath: "/bar/data", + } + + testSecureSettingsSecretName = "secure-settings-secret" + testSecureSettingsSecret = corev1.Secret{ + ObjectMeta: metav1.ObjectMeta{ + Namespace: "namespace", + Name: testSecureSettingsSecretName, + ResourceVersion: "resource-version", + }, + } + testSecureSettingsSecretRef = commonv1alpha1.SecretRef{ + SecretName: testSecureSettingsSecretName, + } + testKibana = v1alpha1.Kibana{ + ObjectMeta: metav1.ObjectMeta{ + Namespace: "namespace", + Name: "kibana", + }, + } + testKibanaWithSecureSettings = v1alpha1.Kibana{ + TypeMeta: metav1.TypeMeta{ + Kind: "kibana", + }, + ObjectMeta: testKibana.ObjectMeta, + Spec: v1alpha1.KibanaSpec{ + SecureSettings: &testSecureSettingsSecretRef, + }, + } +) + +func TestResources(t *testing.T) { + varFalse := false + tests := []struct { + name string + client k8s.Client + kb v1alpha1.Kibana + wantNil bool + wantContainers *corev1.Container + wantVersion string + }{ + { + name: "no secure settings specified: no resources", + client: k8s.WrapClient(fake.NewFakeClient()), + kb: v1alpha1.Kibana{}, + wantContainers: nil, + wantVersion: "", + wantNil: true, + }, + { + name: "secure settings specified: return volume, init container and version", + client: k8s.WrapClient(fake.NewFakeClient(&testSecureSettingsSecret)), + kb: testKibanaWithSecureSettings, + wantContainers: &corev1.Container{ + Command: []string{ + "/usr/bin/env", + "bash", + "-c", + `#!/usr/bin/env bash + +set -eux + +echo "Initializing keystore." + +# create a keystore in the default data path +/keystore/bin/keystore create + +# add all existing secret entries into it +for filename in /foo/secret/*; do + [[ -e "$filename" ]] || continue # glob does not match + key=$(basename "$filename") + echo "Adding "$key" to the keystore." + /keystore/bin/keystore add "$key" --stdin < "$filename" +done + +echo "Keystore initialization successful." +`, + }, + VolumeMounts: []corev1.VolumeMount{ + { + Name: "elastic-internal-secure-settings", + ReadOnly: true, + MountPath: "/mnt/elastic-internal/secure-settings", + }, + { + Name: "kibana-data", + ReadOnly: false, + MountPath: "/bar/data", + }, + }, + SecurityContext: &corev1.SecurityContext{ + Privileged: &varFalse, + }, + }, + wantVersion: testSecureSettingsSecret.ResourceVersion, + wantNil: false, + }, + { + name: "secure settings specified but secret not there: no resources", + client: k8s.WrapClient(fake.NewFakeClient()), + kb: testKibanaWithSecureSettings, + wantContainers: nil, + wantVersion: "", + wantNil: true, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + recorder := record.NewFakeRecorder(1000) + watches := watches2.NewDynamicWatches() + require.NoError(t, watches.InjectScheme(scheme.Scheme)) + resources, err := NewResources(tt.client, recorder, watches, &tt.kb, initContainersParameters) + require.NoError(t, err) + if tt.wantNil { + require.Nil(t, resources) + } else { + require.NotNil(t, resources) + assert.Equal(t, resources.InitContainer.Name, "init-keystore") + assert.Equal(t, resources.InitContainer.Command, tt.wantContainers.Command) + assert.Equal(t, resources.InitContainer.VolumeMounts, tt.wantContainers.VolumeMounts) + assert.Equal(t, resources.InitContainer.SecurityContext, tt.wantContainers.SecurityContext) + assert.Equal(t, resources.Version, tt.wantVersion) + } + + }) + } +} diff --git a/operators/pkg/controller/kibana/securesettings/user_secret.go b/operators/pkg/controller/common/association/keystore/user_secret.go similarity index 59% rename from operators/pkg/controller/kibana/securesettings/user_secret.go rename to operators/pkg/controller/common/association/keystore/user_secret.go index 5cd0192ea9..d6179f51e2 100644 --- a/operators/pkg/controller/kibana/securesettings/user_secret.go +++ b/operators/pkg/controller/common/association/keystore/user_secret.go @@ -2,29 +2,29 @@ // or more contributor license agreements. Licensed under the Elastic License; // you may not use this file except in compliance with the Elastic License. -package securesettings +package keystore import ( "fmt" + "strings" commonv1alpha1 "github.com/elastic/cloud-on-k8s/operators/pkg/apis/common/v1alpha1" - "github.com/elastic/cloud-on-k8s/operators/pkg/apis/kibana/v1alpha1" "github.com/elastic/cloud-on-k8s/operators/pkg/controller/common/events" "github.com/elastic/cloud-on-k8s/operators/pkg/controller/common/finalizer" "github.com/elastic/cloud-on-k8s/operators/pkg/controller/common/volume" "github.com/elastic/cloud-on-k8s/operators/pkg/controller/common/watches" - kbvolume "github.com/elastic/cloud-on-k8s/operators/pkg/controller/kibana/volume" "github.com/elastic/cloud-on-k8s/operators/pkg/utils/k8s" corev1 "k8s.io/api/core/v1" apierrors "k8s.io/apimachinery/pkg/api/errors" + "k8s.io/apimachinery/pkg/runtime" "k8s.io/apimachinery/pkg/types" "k8s.io/client-go/tools/record" ) // secureSettingsVolume creates a volume from the optional user-provided secure settings secret. // -// Secure settings are provided by the user in the Kibana Spec through a secret reference. -// This secret is mounted into Kibana pods for secure settings to be injected into Kibana keystore. +// Secure settings are provided by the user in the APM or Kibana Spec through a secret reference. +// This secret is mounted into the pods for secure settings to be injected into a keystore. // The user-provided secret is watched to reconcile on any change. // The user secret resource version is returned along with the volume, so that // any change in the user secret leads to pod rotation. @@ -32,22 +32,21 @@ func secureSettingsVolume( c k8s.Client, recorder record.EventRecorder, watches watches.DynamicWatches, - kb v1alpha1.Kibana, + associated commonv1alpha1.Associated, ) (*volume.SecretVolume, string, error) { // setup (or remove) watches for the user-provided secret to reconcile on any change - userSecretRef := kb.Spec.SecureSettings - err := watchSecureSettings(watches, userSecretRef, k8s.ExtractNamespacedName(&kb)) + err := watchSecureSettings(watches, associated.SecureSettings(), k8s.ExtractNamespacedName(associated)) if err != nil { return nil, "", err } - if userSecretRef == nil { + if associated.SecureSettings() == nil { // no secure settings secret specified return nil, "", nil } - // retrieve the secret referenced by the user in the Kibana namespace - userSecret, exists, err := retrieveUserSecret(c, kb, recorder, kb.Namespace, userSecretRef.SecretName) + // retrieve the secret referenced by the user in the same namespace + userSecret, exists, err := retrieveUserSecret(c, associated, recorder) if err != nil { return nil, "", err } @@ -59,24 +58,26 @@ func secureSettingsVolume( // build a volume from that secret secureSettingsVolume := volume.NewSecretVolumeWithMountPath( userSecret.Name, - kbvolume.SecureSettingsVolumeName, - kbvolume.SecureSettingsVolumeMountPath, + SecureSettingsVolumeName, + SecureSettingsVolumeMountPath, ) - // resource version will be included in Kibana pod labels, + // resource version will be included in pod labels, // to recreate pods on any secret change. resourceVersion := userSecret.GetResourceVersion() return &secureSettingsVolume, resourceVersion, nil } -func retrieveUserSecret(c k8s.Client, kibana v1alpha1.Kibana, recorder record.EventRecorder, namespace string, name string) (*corev1.Secret, bool, error) { +func retrieveUserSecret(c k8s.Client, associated commonv1alpha1.Associated, recorder record.EventRecorder) (*corev1.Secret, bool, error) { + secretName := associated.SecureSettings().SecretName + namespace := associated.GetNamespace() userSecret := corev1.Secret{} - err := c.Get(types.NamespacedName{Namespace: namespace, Name: name}, &userSecret) + err := c.Get(types.NamespacedName{Namespace: namespace, Name: secretName}, &userSecret) if err != nil && apierrors.IsNotFound(err) { msg := "Secure settings secret not found" - log.Info(msg, "name", name) - recorder.Event(&kibana, corev1.EventTypeWarning, events.EventReasonUnexpected, msg+": "+name) + log.Info(msg, "namespace", namespace, "secret_name", secretName) + recorder.Event(associated, corev1.EventTypeWarning, events.EventReasonUnexpected, msg+": "+secretName) return nil, false, nil } else if err != nil { return nil, false, err @@ -84,10 +85,10 @@ func retrieveUserSecret(c k8s.Client, kibana v1alpha1.Kibana, recorder record.Ev return &userSecret, true, nil } -// secureSettingsWatchName returns the watch name according to the Kibana deployment name. -// It is unique per Kibana deployment. -func secureSettingsWatchName(kibana types.NamespacedName) string { - return fmt.Sprintf("%s-%s-secure-settings", kibana.Namespace, kibana.Name) +// secureSettingsWatchName returns the watch name according to the deployment name. +// It is unique per APM or Kibana deployment. +func secureSettingsWatchName(namespacedName types.NamespacedName) string { + return fmt.Sprintf("%s-%s-secure-settings", namespacedName.Namespace, namespacedName.Name) } // watchSecureSettings registers a watch for the given secure settings. @@ -95,8 +96,8 @@ func secureSettingsWatchName(kibana types.NamespacedName) string { // Only one watch per cluster is registered: // - if it already exists with a different secret, it is replaced to watch the new secret. // - if the given user secret is nil, the watch is removed. -func watchSecureSettings(watched watches.DynamicWatches, secureSettingsRef *commonv1alpha1.SecretRef, kibana types.NamespacedName) error { - watchName := secureSettingsWatchName(kibana) +func watchSecureSettings(watched watches.DynamicWatches, secureSettingsRef *commonv1alpha1.SecretRef, nn types.NamespacedName) error { + watchName := secureSettingsWatchName(nn) if secureSettingsRef == nil { watched.Secrets.RemoveHandlerForKey(watchName) return nil @@ -104,19 +105,23 @@ func watchSecureSettings(watched watches.DynamicWatches, secureSettingsRef *comm return watched.Secrets.AddHandler(watches.NamedWatch{ Name: watchName, Watched: types.NamespacedName{ - Namespace: kibana.Namespace, + Namespace: nn.Namespace, Name: secureSettingsRef.SecretName, }, - Watcher: kibana, + Watcher: nn, }) } +func getKind(object runtime.Object) string { + return strings.ToLower(object.GetObjectKind().GroupVersionKind().Kind) +} + // Finalizer removes any dynamic watches on external user created secret. -func Finalizer(kibana types.NamespacedName, watched watches.DynamicWatches) finalizer.Finalizer { +func Finalizer(namespacedName types.NamespacedName, watched watches.DynamicWatches, object runtime.Object) finalizer.Finalizer { return finalizer.Finalizer{ - Name: "secure-settings.finalizers.kibana.k8s.elastic.co", + Name: "secure-settings.finalizers." + getKind(object) + ".k8s.elastic.co", Execute: func() error { - watched.Secrets.RemoveHandlerForKey(secureSettingsWatchName(kibana)) + watched.Secrets.RemoveHandlerForKey(secureSettingsWatchName(namespacedName)) return nil }, } diff --git a/operators/pkg/controller/kibana/securesettings/user_secret_test.go b/operators/pkg/controller/common/association/keystore/user_secret_test.go similarity index 95% rename from operators/pkg/controller/kibana/securesettings/user_secret_test.go rename to operators/pkg/controller/common/association/keystore/user_secret_test.go index a00d6e20e3..c9513e196f 100644 --- a/operators/pkg/controller/kibana/securesettings/user_secret_test.go +++ b/operators/pkg/controller/common/association/keystore/user_secret_test.go @@ -2,7 +2,7 @@ // or more contributor license agreements. Licensed under the Elastic License; // you may not use this file except in compliance with the Elastic License. -package securesettings +package keystore import ( "reflect" @@ -11,7 +11,6 @@ import ( "github.com/elastic/cloud-on-k8s/operators/pkg/apis/kibana/v1alpha1" "github.com/elastic/cloud-on-k8s/operators/pkg/controller/common/volume" "github.com/elastic/cloud-on-k8s/operators/pkg/controller/common/watches" - kbvolume "github.com/elastic/cloud-on-k8s/operators/pkg/controller/kibana/volume" "github.com/elastic/cloud-on-k8s/operators/pkg/utils/k8s" "github.com/stretchr/testify/require" "k8s.io/apimachinery/pkg/types" @@ -27,8 +26,8 @@ func Test_secureSettingsWatchName(t *testing.T) { func Test_secureSettingsVolume(t *testing.T) { expectedSecretVolume := volume.NewSecretVolumeWithMountPath( testSecureSettingsSecret.Name, - kbvolume.SecureSettingsVolumeName, - kbvolume.SecureSettingsVolumeMountPath, + SecureSettingsVolumeName, + SecureSettingsVolumeMountPath, ) createWatches := func(handlerName string) watches.DynamicWatches { w := watches.NewDynamicWatches() @@ -91,7 +90,7 @@ func Test_secureSettingsVolume(t *testing.T) { for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { recorder := record.NewFakeRecorder(1000) - vol, version, err := secureSettingsVolume(tt.c, recorder, tt.w, tt.kb) + vol, version, err := secureSettingsVolume(tt.c, recorder, tt.w, &tt.kb) require.NoError(t, err) if !reflect.DeepEqual(vol, tt.wantVolume) { diff --git a/operators/pkg/controller/common/association/keystore/volumes.go b/operators/pkg/controller/common/association/keystore/volumes.go new file mode 100644 index 0000000000..66d46d7464 --- /dev/null +++ b/operators/pkg/controller/common/association/keystore/volumes.go @@ -0,0 +1,30 @@ +// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +// or more contributor license agreements. Licensed under the Elastic License; +// you may not use this file except in compliance with the Elastic License. + +package keystore + +import ( + "fmt" + + "github.com/elastic/cloud-on-k8s/operators/pkg/controller/common/volume" +) + +const ( + DataVolumeNamePattern = "%s-data" + + SecureSettingsVolumeName = "elastic-internal-secure-settings" + SecureSettingsVolumeMountPath = "/mnt/elastic-internal/secure-settings" +) + +// dataVolumeName returns the volume name in which the keystore will be stored. +func dataVolumeName(prefix string) string { + return fmt.Sprintf(DataVolumeNamePattern, prefix) +} + +// DataVolume returns the volume used to propagate the keystore file from the init container to +// the server running in the main container. +// Since the APM server or Kibana are stateless and the keystore is created on pod start, an EmptyDir is fine here. +func DataVolume(prefix string, dataVolumePath string) volume.EmptyDirVolume { + return volume.NewEmptyDirVolume(dataVolumeName(prefix), dataVolumePath) +} diff --git a/operators/pkg/controller/common/certificates/ca_reconcile.go b/operators/pkg/controller/common/certificates/ca_reconcile.go index 588583c64a..12d2df14c3 100644 --- a/operators/pkg/controller/common/certificates/ca_reconcile.go +++ b/operators/pkg/controller/common/certificates/ca_reconcile.go @@ -54,7 +54,6 @@ func ReconcileCAForOwner( caType CAType, rotationParams RotationParams, ) (*CA, error) { - ownerNsn := k8s.ExtractNamespacedName(owner) // retrieve current CA secret caInternalSecret := corev1.Secret{} @@ -67,25 +66,25 @@ func ReconcileCAForOwner( return nil, err } if apierrors.IsNotFound(err) { - log.Info("No internal CA certificate Secret found, creating a new one", "owner", ownerNsn, "ca_type", caType) + log.Info("No internal CA certificate Secret found, creating a new one", "owner_namespace", owner.GetNamespace(), "owner_name", owner.GetName(), "ca_type", caType) return renewCA(cl, namer, owner, labels, rotationParams.Validity, scheme, caType) } // build CA ca := buildCAFromSecret(caInternalSecret) if ca == nil { - log.Info("Cannot build CA from secret, creating a new one", "owner", ownerNsn, "ca_type", caType) + log.Info("Cannot build CA from secret, creating a new one", "owner_namespace", owner.GetNamespace(), "owner_name", owner.GetName(), "ca_type", caType) return renewCA(cl, namer, owner, labels, rotationParams.Validity, scheme, caType) } // renew if cannot reuse if !canReuseCA(ca, rotationParams.RotateBefore) { - log.Info("Cannot reuse existing CA, creating a new one", "owner", ownerNsn, "ca_type", caType) + log.Info("Cannot reuse existing CA, creating a new one", "owner_namespace", owner.GetNamespace(), "owner_name", owner.GetName(), "ca_type", caType) return renewCA(cl, namer, owner, labels, rotationParams.Validity, scheme, caType) } // reuse existing CA - log.V(1).Info("Reusing existing CA", "owner", ownerNsn, "ca_type", caType) + log.V(1).Info("Reusing existing CA", "owner_namespace", owner.GetNamespace(), "owner_name", owner.GetName(), "ca_type", caType) return ca, nil } @@ -138,11 +137,11 @@ func canReuseCA(ca *CA, expirationSafetyMargin time.Duration) bool { func certIsValid(cert x509.Certificate, expirationSafetyMargin time.Duration) bool { now := time.Now() if now.Before(cert.NotBefore) { - log.Info("CA cert is not valid yet, will create a new one") + log.Info("CA cert is not valid yet", "subject", cert.Subject) return false } if now.After(cert.NotAfter.Add(-expirationSafetyMargin)) { - log.Info("CA cert expired or soon to expire, will create a new one", "expiration", cert.NotAfter) + log.Info("CA cert expired or soon to expire", "subject", cert.Subject, "expiration", cert.NotAfter) return false } return true @@ -181,7 +180,7 @@ func buildCAFromSecret(caInternalSecret corev1.Secret) *CA { } certs, err := ParsePEMCerts(caBytes) if err != nil { - log.Info("Cannot parse PEM cert from CA secret, will create a new one", "err", err) + log.Error(err, "Cannot parse PEM cert from CA secret, will create a new one", "namespace", caInternalSecret.Namespace, "secret_name", caInternalSecret.Name) return nil } if len(certs) == 0 { @@ -190,7 +189,8 @@ func buildCAFromSecret(caInternalSecret corev1.Secret) *CA { if len(certs) > 1 { log.Info( "More than 1 certificate in the CA secret, continuing with the first one", - "secret", caInternalSecret.Name, + "namespace", caInternalSecret.Namespace, + "secret_name", caInternalSecret.Name, ) } cert := certs[0] @@ -201,7 +201,7 @@ func buildCAFromSecret(caInternalSecret corev1.Secret) *CA { } privateKey, err := ParsePEMPrivateKey(privateKeyBytes) if err != nil { - log.Info("Cannot parse PEM private key from CA secret, will create a new one", "err", err) + log.Error(err, "Cannot parse PEM private key from CA secret, will create a new one", "namespace", caInternalSecret.Namespace, "secret_name", caInternalSecret.Name) return nil } return NewCA(privateKey, cert) diff --git a/operators/pkg/controller/common/certificates/http/reconcile.go b/operators/pkg/controller/common/certificates/http/reconcile.go index b7ec56d63e..cd422ea594 100644 --- a/operators/pkg/controller/common/certificates/http/reconcile.go +++ b/operators/pkg/controller/common/certificates/http/reconcile.go @@ -9,7 +9,6 @@ import ( "crypto/rsa" "crypto/x509" "crypto/x509/pkix" - "fmt" "net" "reflect" "strings" @@ -135,12 +134,12 @@ func reconcileHTTPInternalCertificatesSecret( if needsUpdate { if shouldCreateSecret { - log.Info("Creating HTTP internal certificate secret", "secret", secret.Name) + log.Info("Creating HTTP internal certificate secret", "namespace", secret.Namespace, "secret_name", secret.Name) if err := c.Create(&secret); err != nil { return nil, err } } else { - log.Info("Updating HTTP internal certificate secret", "secret", secret.Name) + log.Info("Updating HTTP internal certificate secret", "namespace", secret.Namespace, "secret_name", secret.Name) if err := c.Update(&secret); err != nil { return nil, err } @@ -172,7 +171,7 @@ func ensureInternalSelfSignedCertificateSecretContents( if privateKeyData, ok := secret.Data[certificates.KeyFileName]; ok { storedPrivateKey, err := certificates.ParsePEMPrivateKey(privateKeyData) if err != nil { - log.Error(err, "Unable to parse stored private key", "secret", secret.Name) + log.Error(err, "Unable to parse stored private key", "namespace", secret.Namespace, "secret_name", secret.Name) } else { needsNewPrivateKey = false privateKey = storedPrivateKey @@ -195,9 +194,10 @@ func ensureInternalSelfSignedCertificateSecretContents( if shouldIssueNewHTTPCertificate(owner, namer, tls, secret, svcs, ca, rotationParam.RotateBefore) { log.Info( "Issuing new HTTP certificate", - "secret", secret.Name, - "namespace", owner.Namespace, - "name", owner.Name, + "namespace", secret.Namespace, + "secret_name", secret.Name, + "owner_namespace", owner.Namespace, + "owner_name", owner.Name, ) csr, err := x509.CreateCertificateRequest(cryptorand.Reader, &x509.CertificateRequest{}, privateKey) @@ -263,7 +263,7 @@ func shouldIssueNewHTTPCertificate( } else { certs, err := certificates.ParsePEMCerts(certData) if err != nil { - log.Error(err, "Invalid certificate data found, issuing new certificate", "secret", secret.Name) + log.Error(err, "Invalid certificate data found, issuing new certificate", "namespace", secret.Namespace, "secret_name", secret.Name) return true } @@ -289,16 +289,20 @@ func shouldIssueNewHTTPCertificate( } if _, err := certificate.Verify(verifyOpts); err != nil { log.Info( - fmt.Sprintf("Certificate was not valid, should issue new: %s", err), + "Certificate was not valid, should issue new", + "validation_failure", err, "subject", certificate.Subject, "issuer", certificate.Issuer, "current_ca_subject", ca.Cert.Subject, + "secret_name", secret.Name, + "namespace", secret.Namespace, + "owner_name", owner.Name, ) return true } if time.Now().After(certificate.NotAfter.Add(-certReconcileBefore)) { - log.Info("Certificate soon to expire, should issue new", "secret", secret.Name) + log.Info("Certificate soon to expire, should issue new", "namespace", secret.Namespace, "secret_name", secret.Name) return true } diff --git a/operators/pkg/controller/common/certificates/x509_othername.go b/operators/pkg/controller/common/certificates/x509_othername.go index 0b470f9ec6..c8f0edd643 100644 --- a/operators/pkg/controller/common/certificates/x509_othername.go +++ b/operators/pkg/controller/common/certificates/x509_othername.go @@ -8,7 +8,6 @@ import ( "crypto/x509" "encoding/asn1" "errors" - "fmt" logf "sigs.k8s.io/controller-runtime/pkg/runtime/log" ) @@ -209,7 +208,7 @@ func ParseSANGeneralNamesOtherNamesOnly(c *x509.Certificate) ([]GeneralName, err }, }) default: - log.Info(fmt.Sprintf("Ignoring unsupported GeneralNames tag [%d]", generalName.Tag)) + log.Info("Ignoring unsupported GeneralNames tag", "tag", generalName.Tag, "subject", c.Subject) } } } diff --git a/operators/pkg/controller/common/finalizer/handler.go b/operators/pkg/controller/common/finalizer/handler.go index f09e12a7eb..4cdd959129 100644 --- a/operators/pkg/controller/common/finalizer/handler.go +++ b/operators/pkg/controller/common/finalizer/handler.go @@ -49,7 +49,7 @@ func (h *Handler) Handle(resource runtime.Object, finalizers ...Finalizer) error var finalizerErr error if metaObject.GetDeletionTimestamp().IsZero() { // resource is not being deleted, make sure all finalizers are there - needUpdate = h.reconcileFinalizers(finalizers, metaObject, resource) + needUpdate = h.reconcileFinalizers(finalizers, metaObject) } else { // resource is being deleted, let's execute finalizers needUpdate, finalizerErr = h.executeFinalizers(finalizers, metaObject, resource) @@ -62,15 +62,14 @@ func (h *Handler) Handle(resource runtime.Object, finalizers ...Finalizer) error return finalizerErr } -// ReconcileFinalizers makes sure all finalizers exist in the given objectMeta. -// If some finalizers need to be added to objectMeta, -// an update to the apiserver will be issued for the given resource. -func (h *Handler) reconcileFinalizers(finalizers []Finalizer, object metav1.Object, resource runtime.Object) bool { +// reconcileFinalizers ensures all finalizers exist in the given objectMeta. +// Returns a bool indicating if an update is required to the object +func (h *Handler) reconcileFinalizers(finalizers []Finalizer, object metav1.Object) bool { needUpdate := false for _, finalizer := range finalizers { // add finalizer if not already there if !stringsutil.StringInSlice(finalizer.Name, object.GetFinalizers()) { - log.Info("Registering finalizer", "name", finalizer.Name) + log.Info("Registering finalizer", "finalizer_name", finalizer.Name, "namespace", object.GetNamespace(), "name", object.GetName()) object.SetFinalizers(append(object.GetFinalizers(), finalizer.Name)) needUpdate = true } @@ -87,7 +86,7 @@ func (h *Handler) executeFinalizers(finalizers []Finalizer, object metav1.Object for _, finalizer := range finalizers { // for each registered finalizer, execute it, then remove from the list if stringsutil.StringInSlice(finalizer.Name, object.GetFinalizers()) { - log.Info("Executing finalizer", "name", finalizer.Name) + log.Info("Executing finalizer", "finalizer_name", finalizer.Name, "namespace", object.GetNamespace(), "name", object.GetName()) if finalizerErr = finalizer.Execute(); finalizerErr != nil { break } diff --git a/operators/pkg/controller/common/reconciler/reconciler.go b/operators/pkg/controller/common/reconciler/reconciler.go index 0bd129e7d3..3a3020ff4f 100644 --- a/operators/pkg/controller/common/reconciler/reconciler.go +++ b/operators/pkg/controller/common/reconciler/reconciler.go @@ -94,7 +94,7 @@ func ReconcileResource(params Params) error { err = params.Client.Get(types.NamespacedName{Name: name, Namespace: namespace}, params.Reconciled) if err != nil && apierrors.IsNotFound(err) { // Create if needed - log.Info(fmt.Sprintf("Creating %s %s/%s", kind, namespace, name)) + log.Info("Creating resource", "kind", kind, "namespace", namespace, "name", name) if params.PreCreate != nil { params.PreCreate() } @@ -118,7 +118,7 @@ func ReconcileResource(params Params) error { // Update if needed if params.NeedsUpdate() { - log.Info(fmt.Sprintf("Updating %s %s/%s ", kind, namespace, name)) + log.Info("Updating resource", "kind", kind, "namespace", namespace, "name", name) if params.PreUpdate != nil { params.PreUpdate() } diff --git a/operators/pkg/controller/common/reconciler/results.go b/operators/pkg/controller/common/reconciler/results.go index 9f2e7a7af9..218c22bc23 100644 --- a/operators/pkg/controller/common/reconciler/results.go +++ b/operators/pkg/controller/common/reconciler/results.go @@ -5,10 +5,7 @@ package reconciler import ( - "fmt" - k8serrors "k8s.io/apimachinery/pkg/util/errors" - "sigs.k8s.io/controller-runtime/pkg/reconcile" ) @@ -54,7 +51,7 @@ func (r *Results) WithResult(res reconcile.Result) *Results { func (r *Results) Apply(step string, recoverableStep func() (reconcile.Result, error)) *Results { result, err := recoverableStep() if err != nil { - log.Error(err, fmt.Sprintf("Error during %s, continuing", step)) + log.Error(err, "Recoverable error during step, continuing", "step", step) } return r.WithError(err).WithResult(result) } @@ -69,7 +66,7 @@ func (r *Results) Aggregate() (reconcile.Result, error) { current = next } } - log.Info(fmt.Sprintf("Aggregate reconcile result is %+v", current)) + log.Info("Aggregated reconciliation results complete", "result", current) return current, k8serrors.NewAggregate(r.errors) } diff --git a/operators/pkg/controller/common/settings/canonical_config.go b/operators/pkg/controller/common/settings/canonical_config.go index 0124c157f5..6a395fcf36 100644 --- a/operators/pkg/controller/common/settings/canonical_config.go +++ b/operators/pkg/controller/common/settings/canonical_config.go @@ -30,9 +30,19 @@ func NewCanonicalConfig() *CanonicalConfig { return fromConfig(ucfg.New()) } -// NewCanonicalConfigFrom creates a new config from the API type. +// NewCanonicalConfigFrom creates a new config from the API type after normalizing the data. func NewCanonicalConfigFrom(data untypedDict) (*CanonicalConfig, error) { - config, err := ucfg.NewFrom(data, Options...) + // not great: round trip through yaml to normalize untyped dict before creating config + // to avoid numeric differences in configs due to JSON marshalling/deep copies being restricted to float + bytes, err := yaml.Marshal(data) + if err != nil { + return nil, err + } + var normalized untypedDict + if err := yaml.Unmarshal(bytes, &normalized); err != nil { + return nil, err + } + config, err := ucfg.NewFrom(normalized, Options...) if err != nil { return nil, err } diff --git a/operators/pkg/controller/common/settings/canonical_config_test.go b/operators/pkg/controller/common/settings/canonical_config_test.go index f6d43b917d..1d60816614 100644 --- a/operators/pkg/controller/common/settings/canonical_config_test.go +++ b/operators/pkg/controller/common/settings/canonical_config_test.go @@ -409,3 +409,42 @@ func TestCanonicalConfig_SetStrings(t *testing.T) { }) } } + +func TestNewCanonicalConfigFrom(t *testing.T) { + type args struct { + data untypedDict + } + tests := []struct { + name string + args args + want *CanonicalConfig + wantErr bool + }{ + { + name: "should normalize numeric types", + args: args{ + data: map[string]interface{}{ + "a": float64(1), // after json round trip or deep copy typically a float + "b": 1.2, + }, + }, + want: MustCanonicalConfig(map[string]interface{}{ + "a": 1, + "b": 1.2, + }), + wantErr: false, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got, err := NewCanonicalConfigFrom(tt.args.data) + if (err != nil) != tt.wantErr { + t.Errorf("NewCanonicalConfigFrom() error = %v, wantErr %v", err, tt.wantErr) + return + } + if diff := got.Diff(tt.want, nil); len(diff) > 0 { + t.Error(diff) + } + }) + } +} diff --git a/operators/pkg/controller/common/watches/expectations_watch.go b/operators/pkg/controller/common/watches/expectations_watch.go index 14eb57c8fb..9f91cecae5 100644 --- a/operators/pkg/controller/common/watches/expectations_watch.go +++ b/operators/pkg/controller/common/watches/expectations_watch.go @@ -54,7 +54,7 @@ func (p *ExpectationsWatch) Create(evt event.CreateEvent, q workqueue.RateLimiti resource, exists := p.resourceRetriever(evt.Meta) if exists { p.expectations.CreationObserved(resource) - log.V(4).Info("Marking creation observed in expectations", "resource", resource) + log.V(1).Info("Marking creation observed in expectations", "name", resource.Name, "namespace", resource.Namespace) } } @@ -63,7 +63,7 @@ func (p *ExpectationsWatch) Delete(evt event.DeleteEvent, q workqueue.RateLimiti resource, exists := p.resourceRetriever(evt.Meta) if exists { p.expectations.DeletionObserved(resource) - log.V(4).Info("Marking deletion observed in expectations", "resource", resource) + log.V(1).Info("Marking deletion observed in expectations", "name", resource.Name, "namespace", resource.Namespace) } } diff --git a/operators/pkg/controller/common/watches/handler.go b/operators/pkg/controller/common/watches/handler.go index 3c0fd6dad7..e782173d44 100644 --- a/operators/pkg/controller/common/watches/handler.go +++ b/operators/pkg/controller/common/watches/handler.go @@ -63,7 +63,7 @@ func (d *DynamicEnqueueRequest) AddHandler(handler HandlerRegistration) error { defer d.mutex.Unlock() inject.SchemeInto(d.scheme, handler) d.registrations[handler.Key()] = handler - log.V(4).Info("Added new handler registration", "Now", d.registrations) + log.V(1).Info("Added new handler registration", "current_registrations", d.registrations) return nil } @@ -77,7 +77,7 @@ func (d *DynamicEnqueueRequest) RemoveHandlerForKey(key string) { d.mutex.Lock() defer d.mutex.Unlock() delete(d.registrations, key) - log.V(4).Info("Removed handler registration", "removed", key, "now", d.registrations) + log.V(1).Info("Removed handler registration", "removed", key, "current_registrations", d.registrations) } // Registrations returns the list of registered handler names. diff --git a/operators/pkg/controller/common/watches/named_watch.go b/operators/pkg/controller/common/watches/named_watch.go index f9705d2fa9..4103f971ef 100644 --- a/operators/pkg/controller/common/watches/named_watch.go +++ b/operators/pkg/controller/common/watches/named_watch.go @@ -28,32 +28,32 @@ var _ handler.EventHandler = &NamedWatch{} func (w NamedWatch) Create(evt event.CreateEvent, q workqueue.RateLimitingInterface) { for _, req := range w.toReconcileRequest(evt.Meta) { - log.V(4).Info("Create event transformed", "key", w.Key()) + log.V(1).Info("Create event transformed", "key", w.Key()) q.Add(req) } } func (w NamedWatch) Update(evt event.UpdateEvent, q workqueue.RateLimitingInterface) { for _, req := range w.toReconcileRequest(evt.MetaOld) { - log.V(4).Info("Update event transformed (old)", "key", w.Key()) + log.V(1).Info("Update event transformed (old)", "key", w.Key()) q.Add(req) } for _, req := range w.toReconcileRequest(evt.MetaNew) { - log.V(4).Info("Update event transformed (new)", "key", w.Key()) + log.V(1).Info("Update event transformed (new)", "key", w.Key()) q.Add(req) } } func (w NamedWatch) Delete(evt event.DeleteEvent, q workqueue.RateLimitingInterface) { for _, req := range w.toReconcileRequest(evt.Meta) { - log.V(4).Info("Delete event transformed", "key", w.Key()) + log.V(1).Info("Delete event transformed", "key", w.Key()) q.Add(req) } } func (w NamedWatch) Generic(evt event.GenericEvent, q workqueue.RateLimitingInterface) { for _, req := range w.toReconcileRequest(evt.Meta) { - log.V(4).Info("Generic event transformed", "key", w.Key()) + log.V(1).Info("Generic event transformed", "key", w.Key()) q.Add(req) } } diff --git a/operators/pkg/controller/elasticsearch/certificates/transport/reconcile.go b/operators/pkg/controller/elasticsearch/certificates/transport/reconcile.go index bb7601ac9d..a0a6a4d31d 100644 --- a/operators/pkg/controller/elasticsearch/certificates/transport/reconcile.go +++ b/operators/pkg/controller/elasticsearch/certificates/transport/reconcile.go @@ -37,7 +37,7 @@ func ReconcileTransportCertificatesSecrets( services []corev1.Service, rotationParams certificates.RotationParams, ) (reconcile.Result, error) { - log.Info("Reconciling transport certificates secrets") + log.Info("Reconciling transport certificate secrets", "namespace", es.Namespace, "es_name", es.Name) var pods corev1.PodList if err := c.List(&client.ListOptions{ @@ -56,7 +56,7 @@ func ReconcileTransportCertificatesSecrets( for _, pod := range pods.Items { if pod.Status.PodIP == "" { - log.Info("Skipping pod because it has no IP yet", "pod", pod.Name) + log.Info("Skipping pod because it has no IP yet", "namespace", pod.Namespace, "pod_name", pod.Name) continue } @@ -85,7 +85,7 @@ func ReconcileTransportCertificatesSecrets( } } if len(keysToPrune) > 0 { - log.Info("Pruning keys from certificates secret", "keys", keysToPrune) + log.Info("Pruning keys from certificates secret", "namespace", es.Namespace, "secret_name", secret.Name, "keys", keysToPrune) for _, keyToRemove := range keysToPrune { delete(secret.Data, keyToRemove) diff --git a/operators/pkg/controller/elasticsearch/driver/default.go b/operators/pkg/controller/elasticsearch/driver/default.go index b162617920..0557e25725 100644 --- a/operators/pkg/controller/elasticsearch/driver/default.go +++ b/operators/pkg/controller/elasticsearch/driver/default.go @@ -176,6 +176,8 @@ func (d *defaultDriver) Reconcile( min = &d.Version } + warnUnsupportedDistro(resourcesState.AllPods, reconcileState.Recorder) + observedState := d.observedStateResolver( k8s.ExtractNamespacedName(&es), certificateResources.TrustedHTTPCertificates, @@ -384,7 +386,7 @@ func (d *defaultDriver) Reconcile( // cannot be reached, hence we cannot delete pods. // Probably it was just created and is not ready yet. // Let's retry in a while. - log.Info("ES external service not ready yet for shard migration reconciliation. Requeuing.") + log.Info("ES external service not ready yet for shard migration reconciliation. Requeuing.", "namespace", es.Namespace, "es_name", es.Name) reconcileState.UpdateElasticsearchPending(resourcesState.CurrentPods.Pods()) @@ -621,7 +623,7 @@ func (d *defaultDriver) scaleStatefulSetDown( if sset.Replicas(*statefulSet) == 0 && targetReplicas == 0 { // we don't expect any new replicas in this statefulset, remove it - logger.Info("Deleting statefulset") + logger.Info("Deleting statefulset", "namespace", statefulSet.Namespace, "name", statefulSet.Name) if err := d.Client.Delete(statefulSet); err != nil { return results.WithError(err) } @@ -735,3 +737,18 @@ func reconcileScriptsConfigMap(c k8s.Client, scheme *runtime.Scheme, es v1alpha1 return nil } + +// warnUnsupportedDistro sends an event of type warning if the Elasticsearch Docker image is not a supported +// distribution by looking at if the prepare fs init container terminated with the UnsupportedDistro exit code. +func warnUnsupportedDistro(pods []corev1.Pod, recorder *events.Recorder) { + for _, p := range pods { + for _, s := range p.Status.InitContainerStatuses { + state := s.LastTerminationState.Terminated + if s.Name == initcontainer.PrepareFilesystemContainerName && + state != nil && state.ExitCode == initcontainer.UnsupportedDistroExitCode { + recorder.AddEvent(corev1.EventTypeWarning, events.EventReasonUnexpected, + "Unsupported distribution") + } + } + } +} diff --git a/operators/pkg/controller/elasticsearch/elasticsearch_controller.go b/operators/pkg/controller/elasticsearch/elasticsearch_controller.go index a863144ff6..002075b076 100644 --- a/operators/pkg/controller/elasticsearch/elasticsearch_controller.go +++ b/operators/pkg/controller/elasticsearch/elasticsearch_controller.go @@ -5,7 +5,6 @@ package elasticsearch import ( - "fmt" "sync/atomic" "time" @@ -181,9 +180,9 @@ func (r *ReconcileElasticsearch) Reconcile(request reconcile.Request) (reconcile // atomically update the iteration to support concurrent runs. currentIteration := atomic.AddInt64(&r.iteration, 1) iterationStartTime := time.Now() - log.Info("Start reconcile iteration", "iteration", currentIteration, "request", request) + log.Info("Start reconcile iteration", "iteration", currentIteration, "namespace", request.Namespace, "es_name", request.Name) defer func() { - log.Info("End reconcile iteration", "iteration", currentIteration, "took", time.Since(iterationStartTime), "request", request) + log.Info("End reconcile iteration", "iteration", currentIteration, "took", time.Since(iterationStartTime), "namespace", request.Namespace, "es_ame", request.Name) }() // Fetch the Elasticsearch instance @@ -200,15 +199,16 @@ func (r *ReconcileElasticsearch) Reconcile(request reconcile.Request) (reconcile } if common.IsPaused(es.ObjectMeta) { - log.Info("Paused : skipping reconciliation", "iteration", currentIteration) + log.Info("Object is paused. Skipping reconciliation", "namespace", es.Namespace, "es_name", es.Name, "iteration", currentIteration) return common.PauseRequeue, nil } state := esreconcile.NewState(es) + state.UpdateElasticsearchControllerVersion(r.OperatorInfo.BuildInfo.Version) results := r.internalReconcile(es, state) err = r.updateStatus(es, state) if err != nil && apierrors.IsConflict(err) { - log.V(1).Info("Conflict while updating status") + log.V(1).Info("Conflict while updating status", "namespace", es.Namespace, "es_name", es.Name) return reconcile.Result{Requeue: true}, nil } return results.WithError(err).Aggregate() @@ -266,10 +266,10 @@ func (r *ReconcileElasticsearch) updateStatus( es elasticsearchv1alpha1.Elasticsearch, reconcileState *esreconcile.State, ) error { - log.Info("Updating status", "iteration", atomic.LoadInt64(&r.iteration)) + log.Info("Updating status", "iteration", atomic.LoadInt64(&r.iteration), "namespace", es.Namespace, "es_name", es.Name) events, cluster := reconcileState.Apply() for _, evt := range events { - log.Info(fmt.Sprintf("Recording event %+v", evt)) + log.V(1).Info("Recording event", "event", evt) r.recorder.Event(&es, evt.EventType, evt.Reason, evt.Message) } if cluster == nil { diff --git a/operators/pkg/controller/elasticsearch/initcontainer/initcontainer.go b/operators/pkg/controller/elasticsearch/initcontainer/initcontainer.go index 639100a7f5..3dcb682d48 100644 --- a/operators/pkg/controller/elasticsearch/initcontainer/initcontainer.go +++ b/operators/pkg/controller/elasticsearch/initcontainer/initcontainer.go @@ -18,7 +18,7 @@ const ( // osSettingsContainerName is the name of the container that tweaks os-level settings osSettingsContainerName = "elastic-internal-init-os-settings" // prepareFilesystemContainerName is the name of the container that prepares the filesystem - prepareFilesystemContainerName = "elastic-internal-init-filesystem" + PrepareFilesystemContainerName = "elastic-internal-init-filesystem" ) // NewInitContainers creates init containers according to the given parameters diff --git a/operators/pkg/controller/elasticsearch/initcontainer/prepare_fs.go b/operators/pkg/controller/elasticsearch/initcontainer/prepare_fs.go index 73496ee39a..3250a57267 100644 --- a/operators/pkg/controller/elasticsearch/initcontainer/prepare_fs.go +++ b/operators/pkg/controller/elasticsearch/initcontainer/prepare_fs.go @@ -103,7 +103,7 @@ func NewPrepareFSInitContainer( container := corev1.Container{ Image: imageName, ImagePullPolicy: corev1.PullIfNotPresent, - Name: prepareFilesystemContainerName, + Name: PrepareFilesystemContainerName, SecurityContext: &corev1.SecurityContext{ Privileged: &privileged, }, diff --git a/operators/pkg/controller/elasticsearch/initcontainer/prepare_fs_script.go b/operators/pkg/controller/elasticsearch/initcontainer/prepare_fs_script.go index e2a277636c..58642185dc 100644 --- a/operators/pkg/controller/elasticsearch/initcontainer/prepare_fs_script.go +++ b/operators/pkg/controller/elasticsearch/initcontainer/prepare_fs_script.go @@ -6,6 +6,7 @@ package initcontainer import ( "bytes" + "fmt" "html/template" ) @@ -43,7 +44,10 @@ func RenderScriptTemplate(params TemplateParams) (string, error) { return tplBuffer.String(), nil } -const PrepareFsScriptConfigKey = "prepare-fs.sh" +const ( + PrepareFsScriptConfigKey = "prepare-fs.sh" + UnsupportedDistroExitCode = 42 +) // scriptTemplate is the main script to be run // in the prepare-fs init container before ES starts @@ -52,6 +56,13 @@ var scriptTemplate = template.Must(template.New("").Parse( set -eu + # the operator only works with the default ES distribution + license=/usr/share/elasticsearch/LICENSE.txt + if [[ ! -f $license || $(grep -Fxc "ELASTIC LICENSE AGREEMENT" $license) -ne 1 ]]; then + >&2 echo "unsupported_distribution" + exit ` + fmt.Sprintf("%d", UnsupportedDistroExitCode) + ` + fi + # compute time in seconds since the given start time function duration() { local start=$1 diff --git a/operators/pkg/controller/elasticsearch/keystore/updater.go b/operators/pkg/controller/elasticsearch/keystore/updater.go index 730456603c..aaa4e5f680 100644 --- a/operators/pkg/controller/elasticsearch/keystore/updater.go +++ b/operators/pkg/controller/elasticsearch/keystore/updater.go @@ -167,7 +167,7 @@ func (u *Updater) updateKeystore() (error, string) { for _, file := range fileInfos { if strings.HasPrefix(file.Name(), ".") { - log.Info(fmt.Sprintf("Ignoring %s", file.Name())) + log.Info("Ignoring file", "file_name", file.Name()) continue } diff --git a/operators/pkg/controller/elasticsearch/mutation/change_group.go b/operators/pkg/controller/elasticsearch/mutation/change_group.go index 4893cb183f..08e6922be0 100644 --- a/operators/pkg/controller/elasticsearch/mutation/change_group.go +++ b/operators/pkg/controller/elasticsearch/mutation/change_group.go @@ -90,16 +90,11 @@ func (s ChangeGroup) calculatePerformableChanges( ) error { changeStats := s.ChangeStats() - log.V(3).Info( + log.V(1).Info( "Calculating performable changes for group", "group_name", s.Name, "change_stats", changeStats, "pods_state_status", s.PodsState.Status(), - ) - - log.V(4).Info( - "Calculating performable changes for group", - "group_name", s.Name, "pods_state_summary", s.PodsState.Summary(), ) @@ -144,9 +139,10 @@ func (s ChangeGroup) calculatePerformableChanges( // schedule for creation as many pods as we can for _, newPodToCreate := range s.Changes.ToCreate { if changeStats.CurrentSurge >= maxSurge { - log.V(4).Info( + log.V(1).Info( "Hit the max surge limit in a group.", "group_name", s.Name, + "namespace", newPodToCreate.Pod.Namespace, "change_stats", changeStats, ) result.MaxSurgeGroups = append(result.MaxSurgeGroups, s.Name) @@ -160,6 +156,8 @@ func (s ChangeGroup) calculatePerformableChanges( "Scheduling a pod for creation", "group_name", s.Name, "change_stats", changeStats, + "pod_name", newPodToCreate.Pod.Name, + "namespace", newPodToCreate.Pod.Namespace, "mismatch_reasons", newPodToCreate.MismatchReasons, ) @@ -181,7 +179,7 @@ func (s ChangeGroup) calculatePerformableChanges( } if changeStats.CurrentUnavailable >= maxUnavailable { - log.V(4).Info( + log.V(1).Info( "Hit the max unavailable limit in a group.", "group_name", s.Name, "change_stats", changeStats, @@ -194,9 +192,11 @@ func (s ChangeGroup) calculatePerformableChanges( changeStats.CurrentUnavailable++ changeStats.CurrentRunningReadyPods-- - log.V(4).Info( + log.V(1).Info( "Scheduling a pod for deletion", "group_name", s.Name, + "pod_name", pod.Pod.Name, + "namespace", pod.Pod.Namespace, "change_stats", changeStats, ) diff --git a/operators/pkg/controller/elasticsearch/mutation/performable.go b/operators/pkg/controller/elasticsearch/mutation/performable.go index 223d217a1d..a3563b26b8 100644 --- a/operators/pkg/controller/elasticsearch/mutation/performable.go +++ b/operators/pkg/controller/elasticsearch/mutation/performable.go @@ -59,7 +59,7 @@ func CalculatePerformableChanges( if err != nil { return nil, err } - log.V(3).Info("Created change groups", "count", len(changeGroups)) + log.V(1).Info("Created change groups", "name", AllGroupName, "count", len(changeGroups)) podRestrictions := NewPodRestrictions(allPodsState) diff --git a/operators/pkg/controller/elasticsearch/observer/manager.go b/operators/pkg/controller/elasticsearch/observer/manager.go index f847f5f36c..df2a1054e8 100644 --- a/operators/pkg/controller/elasticsearch/observer/manager.go +++ b/operators/pkg/controller/elasticsearch/observer/manager.go @@ -53,7 +53,7 @@ func (m *Manager) Observe(cluster types.NamespacedName, caCerts []*x509.Certific case !exists: return m.createObserver(cluster, caCerts, esClient) case exists && (!observer.esClient.Equal(esClient) || !reflect.DeepEqual(observer.caCerts, caCerts)): - log.Info("Replacing observer HTTP client", "cluster", cluster) + log.Info("Replacing observer HTTP client", "namespace", cluster.Namespace, "es_name", cluster.Name) m.StopObserving(cluster) return m.createObserver(cluster, caCerts, esClient) default: diff --git a/operators/pkg/controller/elasticsearch/observer/observer.go b/operators/pkg/controller/elasticsearch/observer/observer.go index 1aaa2d99fd..eb28afb3da 100644 --- a/operators/pkg/controller/elasticsearch/observer/observer.go +++ b/operators/pkg/controller/elasticsearch/observer/observer.go @@ -93,7 +93,7 @@ func NewObserver( } observer.pmClientFactory = observer.createProcessManagerClient - log.Info("Creating observer", "cluster", cluster) + log.Info("Creating observer for cluster", "namespace", cluster.Namespace, "es_name", cluster.Name) return &observer } @@ -140,7 +140,7 @@ func (o *Observer) runPeriodically(ctx context.Context) { case <-ticker.C: o.retrieveState(ctx) case <-ctx.Done(): - log.Info("Stopping observer", "cluster", o.cluster) + log.Info("Stopping observer for cluster", "namespace", o.cluster.Namespace, "es_name", o.cluster.Name) return } } @@ -149,7 +149,7 @@ func (o *Observer) runPeriodically(ctx context.Context) { // retrieveState retrieves the current ES state, executes onObservation, // and stores the new state func (o *Observer) retrieveState(ctx context.Context) { - log.V(4).Info("Retrieving state", "cluster", o.cluster) + log.V(1).Info("Retrieving cluster state", "es_name", o.cluster.Name, "namespace", o.cluster.Namespace) timeoutCtx, cancel := context.WithTimeout(ctx, o.settings.RequestTimeout) defer cancel() diff --git a/operators/pkg/controller/elasticsearch/observer/state.go b/operators/pkg/controller/elasticsearch/observer/state.go index 8342f24fb4..ecf2931584 100644 --- a/operators/pkg/controller/elasticsearch/observer/state.go +++ b/operators/pkg/controller/elasticsearch/observer/state.go @@ -47,7 +47,8 @@ func RetrieveState( go func() { clusterState, err := esClient.GetClusterState(ctx) if err != nil { - log.V(3).Info("Unable to retrieve cluster state", "error", err) + // This is expected to happen from time to time + log.V(1).Info("Unable to retrieve cluster state", "error", err, "namespace", cluster.Namespace, "es_name", cluster.Name) clusterStateChan <- nil return } @@ -57,7 +58,7 @@ func RetrieveState( go func() { health, err := esClient.GetClusterHealth(ctx) if err != nil { - log.V(3).Info("Unable to retrieve cluster health", "error", err) + log.V(1).Info("Unable to retrieve cluster health", "error", err, "namespace", cluster.Namespace, "es_name", cluster.Name) healthChan <- nil return } @@ -67,7 +68,7 @@ func RetrieveState( go func() { license, err := esClient.GetLicense(ctx) if err != nil { - log.V(3).Info("Unable to retrieve cluster license", "error", err) + log.V(1).Info("Unable to retrieve cluster license", "error", err, "namespace", cluster.Namespace, "es_name", cluster.Name) licenseChan <- nil return } @@ -109,7 +110,7 @@ func RetrieveState( func getKeystoreStatus(ctx context.Context, pmClientFactory pmClientFactory, pod corev1.Pod) keystore.Status { if !k8s.IsPodReady(pod) { - log.V(3).Info("Pod not ready to retrieve keystore status", "pod_name", pod.Name) + log.V(1).Info("Pod not ready to retrieve keystore status", "namespace", pod.Namespace, "pod_name", pod.Name) return keystore.Status{State: keystore.WaitingState, Reason: "Pod not ready"} } @@ -117,10 +118,10 @@ func getKeystoreStatus(ctx context.Context, pmClientFactory pmClientFactory, pod defer client.Close() status, err := client.KeystoreStatus(ctx) if err != nil { - log.V(3).Info("Unable to retrieve keystore status", "pod_name", pod.Name, "error", err) + log.Error(err, "Unable to retrieve keystore status", "namespace", pod.Namespace, "pod_name", pod.Name) return keystore.Status{State: keystore.FailedState, Reason: "Unable to retrieve keystore status"} } - log.V(3).Info("Keystore updater", "pod_name", pod.Name, "status", status) + log.V(1).Info("Keystore status retrieved successfully", "namespace", pod.Namespace, "pod_name", pod.Name, "status", status) return status } diff --git a/operators/pkg/controller/elasticsearch/processmanager/process.go b/operators/pkg/controller/elasticsearch/processmanager/process.go index 8730b64642..3e338d1ec8 100644 --- a/operators/pkg/controller/elasticsearch/processmanager/process.go +++ b/operators/pkg/controller/elasticsearch/processmanager/process.go @@ -216,7 +216,7 @@ func exitCode(err error) int { exitCode = waitStatus.ExitStatus() } } else { - log.Info("Failed to terminate process", "err", err.Error()) + log.Error(err, "Failed to terminate process") exitCode = 1 } } diff --git a/operators/pkg/controller/elasticsearch/processmanager/state.go b/operators/pkg/controller/elasticsearch/processmanager/state.go index 9b4492f955..d4f8294f56 100644 --- a/operators/pkg/controller/elasticsearch/processmanager/state.go +++ b/operators/pkg/controller/elasticsearch/processmanager/state.go @@ -101,6 +101,7 @@ func (p *Process) updateState(action string, signal syscall.Signal, lastErr erro err := p.state.Write() if err != nil { + log.Error(err, "Failed to write process state, exiting") Exit(fmt.Sprintf("Failed to write process state: %s", err), 1) } diff --git a/operators/pkg/controller/elasticsearch/reconcile/state.go b/operators/pkg/controller/elasticsearch/reconcile/state.go index b02db91547..85c5bfc738 100644 --- a/operators/pkg/controller/elasticsearch/reconcile/state.go +++ b/operators/pkg/controller/elasticsearch/reconcile/state.go @@ -157,3 +157,13 @@ func (s *State) UpdateElasticsearchInvalid(results []validation.Result) { s.AddEvent(corev1.EventTypeWarning, events.EventReasonValidation, r.Reason) } } + +// UpdateElasticsearchControllerVersion sets the elasticsearch controller version that last updated the ES cluster +func (s *State) UpdateElasticsearchControllerVersion(version string) { + s.status.ControllerVersion = version +} + +// GetElasticsearchControllerVersion returns the elasticsearch controller version that last updated the ES cluster +func (s *State) GetElasticsearchControllerVersion() string { + return s.status.ControllerVersion +} diff --git a/operators/pkg/controller/elasticsearch/restart/annotations.go b/operators/pkg/controller/elasticsearch/restart/annotations.go index 0cebd30426..b14aa758aa 100644 --- a/operators/pkg/controller/elasticsearch/restart/annotations.go +++ b/operators/pkg/controller/elasticsearch/restart/annotations.go @@ -82,7 +82,8 @@ func isAnnotatedForRestart(pod corev1.Pod) bool { func setPhase(client k8s.Client, pod corev1.Pod, phase Phase) error { log.V(1).Info( "Setting restart phase", - "pod", pod.Name, + "namespace", pod.Namespace, + "pod_name", pod.Name, "phase", phase, ) if pod.Annotations == nil { @@ -116,7 +117,8 @@ func getStrategy(pod corev1.Pod) Strategy { func setScheduleRestartAnnotations(client k8s.Client, pod corev1.Pod, strategy Strategy, startTime time.Time) error { log.V(1).Info( "Scheduling restart", - "pod", pod.Name, + "namespace", pod.Namespace, + "pod_name", pod.Name, "strategy", strategy, ) if pod.Annotations == nil { diff --git a/operators/pkg/controller/elasticsearch/restart/coordinated.go b/operators/pkg/controller/elasticsearch/restart/coordinated.go index 489b3f2cf1..7f062ef444 100644 --- a/operators/pkg/controller/elasticsearch/restart/coordinated.go +++ b/operators/pkg/controller/elasticsearch/restart/coordinated.go @@ -189,7 +189,7 @@ func (c *CoordinatedRestart) start() Step { } if podsDone != len(pods) { - log.V(1).Info("Some pods are not started yet", "expected", len(pods), "actual", podsDone) + log.V(1).Info("Some pods are not started yet", "namespace", c.Cluster.Namespace, "es_name", c.Cluster.Name, "expected", len(pods), "actual", podsDone) return false, nil // requeue } @@ -202,12 +202,12 @@ func (c *CoordinatedRestart) start() Step { return false, err } if !esReachable { - log.V(1).Info("Cluster is not ready to receive requests yet") + log.V(1).Info("Cluster is not ready to receive requests yet", "namespace", c.Cluster.Namespace, "es_name", c.Cluster.Name) return false, nil // requeue } // re-enable shard allocation - log.V(1).Info("Enabling shards allocation") + log.V(1).Info("Enabling shards allocation", "namespace", c.Cluster.Namespace, "es_name", c.Cluster.Name) ctx, cancel := context.WithTimeout(context.Background(), client.DefaultReqTimeout) defer cancel() if err := c.EsClient.EnableShardAllocation(ctx); err != nil { @@ -225,7 +225,7 @@ func (c *CoordinatedRestart) start() Step { corev1.EventTypeNormal, events.EventReasonRestart, fmt.Sprintf("Coordinated restart complete for cluster %s", c.Cluster.Name), ) - log.Info("Coordinated restart complete", "cluster", c.Cluster.Name) + log.Info("Coordinated restart complete", "es_name", c.Cluster.Name) return true, nil }, diff --git a/operators/pkg/controller/elasticsearch/restart/process_manager.go b/operators/pkg/controller/elasticsearch/restart/process_manager.go index b9791b5560..006594b56e 100644 --- a/operators/pkg/controller/elasticsearch/restart/process_manager.go +++ b/operators/pkg/controller/elasticsearch/restart/process_manager.go @@ -51,7 +51,7 @@ func ensureESProcessStopped(pmClient processmanager.Client, podName string) (boo // request ES process stop (idempotent) ctx, cancel := context.WithTimeout(context.Background(), processmanager.DefaultReqTimeout) defer cancel() - log.V(1).Info("Requesting ES process stop", "pod", podName) + log.V(1).Info("Requesting ES process stop", "pod_name", podName) status, err := pmClient.Stop(ctx) if err != nil { return false, err @@ -59,12 +59,12 @@ func ensureESProcessStopped(pmClient processmanager.Client, podName string) (boo // we got the current status back, check if the process is stopped if status.State != processmanager.Stopped { - log.V(1).Info("ES process is not stopped yet", "pod", podName, "state", status.State) + log.V(1).Info("ES process is not stopped yet", "pod_name", podName, "state", status.State) // not stopped yet, requeue return false, nil } - log.V(1).Info("ES process successfully stopped", "pod", podName) + log.V(1).Info("ES process successfully stopped", "pod_name", podName) return true, nil } @@ -73,7 +73,7 @@ func ensureESProcessStarted(pmClient processmanager.Client, podName string) (boo // request ES process start (idempotent) ctx, cancel := context.WithTimeout(context.Background(), processmanager.DefaultReqTimeout) defer cancel() - log.V(1).Info("Requesting ES process start", "pod", podName) + log.V(1).Info("Requesting ES process start", "pod_name", podName) status, err := pmClient.Start(ctx) if err != nil { return false, err @@ -81,11 +81,11 @@ func ensureESProcessStarted(pmClient processmanager.Client, podName string) (boo // we got the current status back, check if the process is started if status.State != processmanager.Started { - log.V(1).Info("ES process is not started yet", "pod", podName, "state", status.State) + log.V(1).Info("ES process is not started yet", "pod_name", podName, "state", status.State) // not started yet, requeue return false, nil } - log.V(1).Info("ES process successfully started", "pod", podName) + log.V(1).Info("ES process successfully started", "pod_name", podName) return true, nil } diff --git a/operators/pkg/controller/elasticsearch/restart/restart.go b/operators/pkg/controller/elasticsearch/restart/restart.go index 046bf6b40e..495c4d607e 100644 --- a/operators/pkg/controller/elasticsearch/restart/restart.go +++ b/operators/pkg/controller/elasticsearch/restart/restart.go @@ -80,7 +80,7 @@ func processOngoingRestarts(restartContext RestartContext) (done bool, err error return true, nil } - log.V(1).Info("Pods annotated for restart") + log.V(1).Info("Pods annotated for restart", "namespace", restartContext.Cluster.Namespace, "es_name", restartContext.Cluster.Name) if len(annotatedPods[StrategyCoordinated]) > 0 { // run the coordinated restart @@ -129,7 +129,7 @@ func schedulePodsRestart(c k8s.Client, pods pod.PodsWithConfig, strategy Strateg count := 0 for _, p := range pods { if isAnnotatedForRestart(p.Pod) { - log.V(1).Info("Pod already in a restart phase", "pod", p.Pod.Name) + log.V(1).Info("Pod already in a restart phase", "namespace", p.Pod.Namespace, "pod_name", p.Pod.Name) continue } if err := setScheduleRestartAnnotations(c, p.Pod, strategy, time.Now()); err != nil { diff --git a/operators/pkg/controller/elasticsearch/settings/masters.go b/operators/pkg/controller/elasticsearch/settings/masters.go index 00fb940431..81ad7b7783 100644 --- a/operators/pkg/controller/elasticsearch/settings/masters.go +++ b/operators/pkg/controller/elasticsearch/settings/masters.go @@ -88,10 +88,10 @@ func UpdateSeedHostsConfigMap( reconciled.Data = expected.Data }, PreCreate: func() { - log.Info("Creating seed hosts", "hosts", seedHosts) + log.Info("Creating seed hosts", "namespace", es.Namespace, "es_name", es.Name, "hosts", seedHosts) }, PostUpdate: func() { - log.Info("Seed hosts updated", "hosts", seedHosts) + log.Info("Seed hosts updated", "namespace", es.Namespace, "es_name", es.Name, "hosts", seedHosts) annotation.MarkPodsAsUpdated(c, client.ListOptions{ Namespace: es.Namespace, diff --git a/operators/pkg/controller/elasticsearch/settings/secure_settings.go b/operators/pkg/controller/elasticsearch/settings/secure_settings.go index 9506062d83..9307fdfb25 100644 --- a/operators/pkg/controller/elasticsearch/settings/secure_settings.go +++ b/operators/pkg/controller/elasticsearch/settings/secure_settings.go @@ -84,7 +84,7 @@ func retrieveUserSecret(c k8s.Client, eventsRecorder *events.Recorder, namespace err := c.Get(types.NamespacedName{Namespace: namespace, Name: name}, &userSecret) if err != nil && apierrors.IsNotFound(err) { msg := "Secure settings secret not found" - log.Info(msg, "name", name) + log.Info(msg, "namespace", namespace, "secret_name", name) eventsRecorder.AddEvent(corev1.EventTypeWarning, events.EventReasonUnexpected, msg+": "+name) } else if err != nil { return nil, err diff --git a/operators/pkg/controller/elasticsearch/version/version6/zen1.go b/operators/pkg/controller/elasticsearch/version/version6/zen1.go index 6bc4acb22f..1a9b5426d9 100644 --- a/operators/pkg/controller/elasticsearch/version/version6/zen1.go +++ b/operators/pkg/controller/elasticsearch/version/version6/zen1.go @@ -61,6 +61,8 @@ func UpdateZen1Discovery( // Update the current value in the configuration of existing pods log.V(1).Info("Set minimum master nodes", "how", "configuration", + "namespace", cluster.Namespace, + "es_name", cluster.Name, "currentMasterCount", currentMasterCount, "nextMasterCount", nextMasterCount, "minimum_master_nodes", minimumMasterNodes, @@ -106,7 +108,10 @@ func UpdateZen1Discovery( // Do not attempt to make an API call if there is not enough available masters if currentAvailableMasterCount < minimumMasterNodes { + // This is expected to happen from time to time log.V(1).Info("Not enough masters to update the API", + "namespace", cluster.Namespace, + "es_name", cluster.Name, "current", currentAvailableMasterCount, "required", minimumMasterNodes) // We can't update the minimum master nodes right now, it is the case if a new master node is not created yet. @@ -114,8 +119,10 @@ func UpdateZen1Discovery( return true, nil } - log.Info("Update minimum master nodes", + log.Info("Updating minimum master nodes", "how", "api", + "namespace", cluster.Namespace, + "es_name", cluster.Name, "currentMasterCount", currentMasterCount, "nextMasterCount", nextMasterCount, "minimum_master_nodes", minimumMasterNodes, diff --git a/operators/pkg/controller/kibana/config/settings.go b/operators/pkg/controller/kibana/config/settings.go index 557649bc9d..705c7232d2 100644 --- a/operators/pkg/controller/kibana/config/settings.go +++ b/operators/pkg/controller/kibana/config/settings.go @@ -9,6 +9,7 @@ import ( commonv1alpha1 "github.com/elastic/cloud-on-k8s/operators/pkg/apis/common/v1alpha1" "github.com/elastic/cloud-on-k8s/operators/pkg/apis/kibana/v1alpha1" + "github.com/elastic/cloud-on-k8s/operators/pkg/controller/common/association" "github.com/elastic/cloud-on-k8s/operators/pkg/controller/common/certificates" "github.com/elastic/cloud-on-k8s/operators/pkg/controller/common/settings" "github.com/elastic/cloud-on-k8s/operators/pkg/controller/kibana/es" @@ -37,7 +38,7 @@ func NewConfigSettings(client k8s.Client, kb v1alpha1.Kibana) (CanonicalConfig, return CanonicalConfig{}, err } - esAuthSettings, err := elasticsearchAuthSettings(client, kb) + username, password, err := association.ElasticsearchAuthSettings(client, &kb) if err != nil { return CanonicalConfig{}, err } @@ -48,7 +49,12 @@ func NewConfigSettings(client k8s.Client, kb v1alpha1.Kibana) (CanonicalConfig, err = cfg.MergeWith( settings.MustCanonicalConfig(kibanaTLSSettings(kb)), settings.MustCanonicalConfig(elasticsearchTLSSettings(kb)), - settings.MustCanonicalConfig(esAuthSettings), + settings.MustCanonicalConfig( + map[string]interface{}{ + ElasticsearchUsername: username, + ElasticsearchPassword: password, + }, + ), userSettings, ) if err != nil { @@ -85,25 +91,3 @@ func elasticsearchTLSSettings(kb v1alpha1.Kibana) map[string]interface{} { ElasticsearchSslVerificationMode: "certificate", } } - -func elasticsearchAuthSettings(client k8s.Client, kb v1alpha1.Kibana) (map[string]interface{}, error) { - authSettings := map[string]interface{}{} - auth := kb.Spec.Elasticsearch.Auth - if auth.Inline != nil { - authSettings = map[string]interface{}{ - ElasticsearchUsername: auth.Inline.Username, - ElasticsearchPassword: auth.Inline.Password, - } - } - if auth.SecretKeyRef != nil { - secret, err := es.GetAuthSecret(client, kb) - if err != nil { - return nil, err - } - authSettings = map[string]interface{}{ - ElasticsearchUsername: auth.SecretKeyRef.Key, - ElasticsearchPassword: string(secret.Data[auth.SecretKeyRef.Key]), - } - } - return authSettings, nil -} diff --git a/operators/pkg/controller/kibana/config/settings_test.go b/operators/pkg/controller/kibana/config/settings_test.go index 63e26974c8..a9a0807f5c 100644 --- a/operators/pkg/controller/kibana/config/settings_test.go +++ b/operators/pkg/controller/kibana/config/settings_test.go @@ -21,6 +21,8 @@ var defaultConfig = []byte(` elasticsearch: hosts: - "" + username: "" + password: "" ssl: certificateAuthorities: /usr/share/kibana/config/elasticsearch-certs/tls.crt verificationMode: certificate diff --git a/operators/pkg/controller/kibana/driver.go b/operators/pkg/controller/kibana/driver.go index d3a558e904..a2b96faa48 100644 --- a/operators/pkg/controller/kibana/driver.go +++ b/operators/pkg/controller/kibana/driver.go @@ -10,6 +10,7 @@ import ( kbtype "github.com/elastic/cloud-on-k8s/operators/pkg/apis/kibana/v1alpha1" "github.com/elastic/cloud-on-k8s/operators/pkg/controller/common" + "github.com/elastic/cloud-on-k8s/operators/pkg/controller/common/association/keystore" "github.com/elastic/cloud-on-k8s/operators/pkg/controller/common/certificates" "github.com/elastic/cloud-on-k8s/operators/pkg/controller/common/finalizer" "github.com/elastic/cloud-on-k8s/operators/pkg/controller/common/operator" @@ -23,9 +24,9 @@ import ( "github.com/elastic/cloud-on-k8s/operators/pkg/controller/kibana/label" kbname "github.com/elastic/cloud-on-k8s/operators/pkg/controller/kibana/name" "github.com/elastic/cloud-on-k8s/operators/pkg/controller/kibana/pod" - "github.com/elastic/cloud-on-k8s/operators/pkg/controller/kibana/securesettings" "github.com/elastic/cloud-on-k8s/operators/pkg/controller/kibana/version/version6" "github.com/elastic/cloud-on-k8s/operators/pkg/controller/kibana/version/version7" + "github.com/elastic/cloud-on-k8s/operators/pkg/controller/kibana/volume" "github.com/elastic/cloud-on-k8s/operators/pkg/utils/k8s" corev1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/runtime" @@ -33,6 +34,14 @@ import ( "k8s.io/client-go/tools/record" ) +// initContainersParameters is used to generate the init container that will load the secure settings into a keystore +var initContainersParameters = keystore.InitContainerParameters{ + KeystoreCreateCommand: "/usr/share/kibana/bin/kibana-keystore create", + KeystoreAddCommand: "/usr/share/kibana/bin/kibana-keystore add", + SecureSettingsVolumeMountPath: keystore.SecureSettingsVolumeMountPath, + DataVolumePath: volume.DataVolumeMountPath, +} + type driver struct { client k8s.Client scheme *runtime.Scheme @@ -57,18 +66,27 @@ func secretWatchFinalizer(kibana kbtype.Kibana, watches watches.DynamicWatches) func (d *driver) deploymentParams(kb *kbtype.Kibana) (*DeploymentParams, error) { // setup a keystore with secure settings in an init container, if specified by the user - volumes, initContainers, secureSettingsVersion, err := securesettings.Resources(d.client, d.recorder, d.dynamicWatches, *kb) + //volumes, initContainers, secureSettingsVersion, err := securesettings.Resources(d.client, d.recorder, d.dynamicWatches, *kb) + keystoreResources, err := keystore.NewResources( + d.client, + d.recorder, + d.dynamicWatches, + kb, + initContainersParameters, + ) if err != nil { return nil, err } - kibanaPodSpec := pod.NewPodTemplateSpec(*kb, volumes, initContainers) + kibanaPodSpec := pod.NewPodTemplateSpec(*kb, keystoreResources) // Build a checksum of the configuration, which we can use to cause the Deployment to roll Kibana // instances in case of any change in the CA file, secure settings or credentials contents. // This is done because Kibana does not support updating those without restarting the process. configChecksum := sha256.New224() - configChecksum.Write([]byte(secureSettingsVersion)) + if keystoreResources != nil { + configChecksum.Write([]byte(keystoreResources.Version)) + } // we need to deref the secret here (if any) to include it in the checksum otherwise Kibana will not be rolled on contents changes if kb.Spec.Elasticsearch.Auth.SecretKeyRef != nil { @@ -178,7 +196,7 @@ func (d *driver) Reconcile( ) *reconciler.Results { results := reconciler.Results{} if !kb.Spec.Elasticsearch.IsConfigured() { - log.Info("Aborting Kibana deployment reconciliation as no Elasticsearch backend is configured") + log.Info("Aborting Kibana deployment reconciliation as no Elasticsearch backend is configured", "namespace", kb.Namespace, "kibana_name", kb.Name) return &results } diff --git a/operators/pkg/controller/kibana/driver_test.go b/operators/pkg/controller/kibana/driver_test.go index 279fb11f6e..a803ad9593 100644 --- a/operators/pkg/controller/kibana/driver_test.go +++ b/operators/pkg/controller/kibana/driver_test.go @@ -145,7 +145,7 @@ func Test_driver_deploymentParams(t *testing.T) { NodeCount: 1, Elasticsearch: kbtype.BackendElasticsearch{ URL: "https://localhost:9200", - Auth: kbtype.ElasticsearchAuth{ + Auth: v1alpha1.ElasticsearchAuth{ SecretKeyRef: &corev1.SecretKeySelector{ LocalObjectReference: corev1.LocalObjectReference{ Name: "test-auth", diff --git a/operators/pkg/controller/kibana/kibana_controller.go b/operators/pkg/controller/kibana/kibana_controller.go index 75a316a0b4..64d851d6cc 100644 --- a/operators/pkg/controller/kibana/kibana_controller.go +++ b/operators/pkg/controller/kibana/kibana_controller.go @@ -11,12 +11,12 @@ import ( kibanav1alpha1 "github.com/elastic/cloud-on-k8s/operators/pkg/apis/kibana/v1alpha1" "github.com/elastic/cloud-on-k8s/operators/pkg/controller/common" + "github.com/elastic/cloud-on-k8s/operators/pkg/controller/common/association/keystore" "github.com/elastic/cloud-on-k8s/operators/pkg/controller/common/events" "github.com/elastic/cloud-on-k8s/operators/pkg/controller/common/finalizer" "github.com/elastic/cloud-on-k8s/operators/pkg/controller/common/operator" "github.com/elastic/cloud-on-k8s/operators/pkg/controller/common/version" "github.com/elastic/cloud-on-k8s/operators/pkg/controller/common/watches" - "github.com/elastic/cloud-on-k8s/operators/pkg/controller/kibana/securesettings" "github.com/elastic/cloud-on-k8s/operators/pkg/utils/k8s" appsv1 "k8s.io/api/apps/v1" corev1 "k8s.io/api/core/v1" @@ -129,9 +129,9 @@ func (r *ReconcileKibana) Reconcile(request reconcile.Request) (reconcile.Result // atomically update the iteration to support concurrent runs. currentIteration := atomic.AddInt64(&r.iteration, 1) iterationStartTime := time.Now() - log.Info("Start reconcile iteration", "iteration", currentIteration) + log.Info("Start reconcile iteration", "iteration", currentIteration, "namespace", request.Namespace, "kibana_name", request.Name) defer func() { - log.Info("End reconcile iteration", "iteration", currentIteration, "took", time.Since(iterationStartTime)) + log.Info("End reconcile iteration", "iteration", currentIteration, "took", time.Since(iterationStartTime), "namespace", request.Namespace, "kibana_name", request.Name) }() // Fetch the Kibana instance @@ -148,13 +148,14 @@ func (r *ReconcileKibana) Reconcile(request reconcile.Request) (reconcile.Result } if common.IsPaused(kb.ObjectMeta) { - log.Info("Paused : skipping reconciliation", "iteration", currentIteration) + log.Info("Object is paused. Skipping reconciliation", "namespace", kb.Namespace, "kibana_name", kb.Name, "iteration", currentIteration) return common.PauseRequeue, nil } if err := r.finalizers.Handle(kb, r.finalizersFor(*kb)...); err != nil { if errors.IsConflict(err) { - log.V(1).Info("Conflict while handling secret watch finalizer") + // Conflicts are expected and should be resolved on next loop + log.V(1).Info("Conflict while handling secret watch finalizer", "namespace", kb.Namespace, "kibana_name", kb.Name) return reconcile.Result{Requeue: true}, nil } return reconcile.Result{}, err @@ -170,6 +171,7 @@ func (r *ReconcileKibana) Reconcile(request reconcile.Request) (reconcile.Result return reconcile.Result{}, err } state := NewState(request, kb) + state.UpdateKibanaControllerVersion(r.params.OperatorInfo.BuildInfo.Version) driver, err := newDriver(r, r.scheme, *ver, r.dynamicWatches, r.recorder) if err != nil { return reconcile.Result{}, err @@ -179,7 +181,7 @@ func (r *ReconcileKibana) Reconcile(request reconcile.Request) (reconcile.Result // update status err = r.updateStatus(state) if err != nil && errors.IsConflict(err) { - log.V(1).Info("Conflict while updating status") + log.V(1).Info("Conflict while updating status", "namespace", kb.Namespace, "kibana_name", kb.Name) return reconcile.Result{Requeue: true}, nil } return results.WithError(err).Aggregate() @@ -193,7 +195,7 @@ func (r *ReconcileKibana) updateStatus(state State) error { if state.Kibana.Status.IsDegraded(current.Status) { r.recorder.Event(current, corev1.EventTypeWarning, events.EventReasonUnhealthy, "Kibana health degraded") } - log.Info("Updating status", "iteration", atomic.LoadInt64(&r.iteration)) + log.Info("Updating status", "iteration", atomic.LoadInt64(&r.iteration), "namespace", state.Kibana.Namespace, "kibana_name", state.Kibana.Name) return r.Status().Update(state.Kibana) } @@ -201,6 +203,6 @@ func (r *ReconcileKibana) updateStatus(state State) error { func (r *ReconcileKibana) finalizersFor(kb kibanav1alpha1.Kibana) []finalizer.Finalizer { return []finalizer.Finalizer{ secretWatchFinalizer(kb, r.dynamicWatches), - securesettings.Finalizer(k8s.ExtractNamespacedName(&kb), r.dynamicWatches), + keystore.Finalizer(k8s.ExtractNamespacedName(&kb), r.dynamicWatches, &kb), } } diff --git a/operators/pkg/controller/kibana/name/name.go b/operators/pkg/controller/kibana/name/name.go index cd31b926a9..35d0be62e0 100644 --- a/operators/pkg/controller/kibana/name/name.go +++ b/operators/pkg/controller/kibana/name/name.go @@ -16,8 +16,7 @@ const ( // this leaves 63 - 36 = 27 characters for a suffix. MaxSuffixLength = MaxLabelLength - MaxElasticsearchNameLength - httpServiceSuffix = "http" - secureSettingsSecretSuffix = "secure-settings" + httpServiceSuffix = "http" ) // KBNamer is a Namer that is configured with the defaults for resources related to a Kibana resource. @@ -33,7 +32,3 @@ func HTTPService(kbName string) string { func Deployment(kbName string) string { return KBNamer.Suffix(kbName) } - -func SecureSettingsSecret(kbName string) string { - return KBNamer.Suffix(kbName, secureSettingsSecretSuffix) -} diff --git a/operators/pkg/controller/kibana/pod/pod.go b/operators/pkg/controller/kibana/pod/pod.go index 6991d14d8b..ce9900a039 100644 --- a/operators/pkg/controller/kibana/pod/pod.go +++ b/operators/pkg/controller/kibana/pod/pod.go @@ -6,6 +6,7 @@ package pod import ( "github.com/elastic/cloud-on-k8s/operators/pkg/apis/kibana/v1alpha1" + "github.com/elastic/cloud-on-k8s/operators/pkg/controller/common/association/keystore" "github.com/elastic/cloud-on-k8s/operators/pkg/controller/common/defaults" "github.com/elastic/cloud-on-k8s/operators/pkg/controller/kibana/label" "github.com/elastic/cloud-on-k8s/operators/pkg/controller/kibana/volume" @@ -52,17 +53,22 @@ func imageWithVersion(image string, version string) string { return stringsutil.Concat(image, ":", version) } -func NewPodTemplateSpec(kb v1alpha1.Kibana, additionalVolumes []corev1.Volume, initContainers []corev1.Container) corev1.PodTemplateSpec { - return defaults.NewPodTemplateBuilder(kb.Spec.PodTemplate, v1alpha1.KibanaContainerName). +func NewPodTemplateSpec(kb v1alpha1.Kibana, keystore *keystore.Resources) corev1.PodTemplateSpec { + builder := defaults.NewPodTemplateBuilder(kb.Spec.PodTemplate, v1alpha1.KibanaContainerName). WithLabels(label.NewLabels(kb.Name)). WithDockerImage(kb.Spec.Image, imageWithVersion(defaultImageRepositoryAndName, kb.Spec.Version)). WithReadinessProbe(readinessProbe(kb.Spec.HTTP.TLS.Enabled())). WithPorts(ports). - WithVolumes(append(additionalVolumes, volume.KibanaDataVolume.Volume())...). - WithVolumeMounts(volume.KibanaDataVolume.VolumeMount()). - WithInitContainers(initContainers...). - WithInitContainerDefaults(). - PodTemplate + WithVolumes(volume.KibanaDataVolume.Volume()). + WithVolumeMounts(volume.KibanaDataVolume.VolumeMount()) + + if keystore != nil { + builder.WithVolumes(keystore.Volume). + WithInitContainers(keystore.InitContainer). + WithInitContainerDefaults() + } + + return builder.PodTemplate } // GetKibanaContainer returns the Kibana container from the given podSpec. diff --git a/operators/pkg/controller/kibana/pod/pod_test.go b/operators/pkg/controller/kibana/pod/pod_test.go index 3e6361baa8..e8bde21630 100644 --- a/operators/pkg/controller/kibana/pod/pod_test.go +++ b/operators/pkg/controller/kibana/pod/pod_test.go @@ -8,8 +8,8 @@ import ( "testing" "github.com/elastic/cloud-on-k8s/operators/pkg/apis/kibana/v1alpha1" + "github.com/elastic/cloud-on-k8s/operators/pkg/controller/common/association/keystore" "github.com/elastic/cloud-on-k8s/operators/pkg/controller/kibana/label" - "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" corev1 "k8s.io/api/core/v1" @@ -45,11 +45,10 @@ func Test_imageWithVersion(t *testing.T) { func TestNewPodTemplateSpec(t *testing.T) { tests := []struct { - name string - kb v1alpha1.Kibana - additionalVolumes []corev1.Volume - initContainers []corev1.Container - assertions func(pod corev1.PodTemplateSpec) + name string + kb v1alpha1.Kibana + keystore *keystore.Resources + assertions func(pod corev1.PodTemplateSpec) }{ { name: "defaults", @@ -58,6 +57,7 @@ func TestNewPodTemplateSpec(t *testing.T) { Version: "7.1.0", }, }, + keystore: nil, assertions: func(pod corev1.PodTemplateSpec) { assert.Equal(t, false, *pod.Spec.AutomountServiceAccountToken) assert.Len(t, pod.Spec.Containers, 1) @@ -72,14 +72,16 @@ func TestNewPodTemplateSpec(t *testing.T) { }, }, { - name: "with additional volumes and init containers", + name: "with additional volumes and init containers for the Keystore", kb: v1alpha1.Kibana{ Spec: v1alpha1.KibanaSpec{ Version: "7.1.0", }, }, - additionalVolumes: []corev1.Volume{{Name: "vol"}}, - initContainers: []corev1.Container{{Name: "init"}}, + keystore: &keystore.Resources{ + InitContainer: corev1.Container{Name: "init"}, + Volume: corev1.Volume{Name: "vol"}, + }, assertions: func(pod corev1.PodTemplateSpec) { assert.Len(t, pod.Spec.InitContainers, 1) assert.Len(t, pod.Spec.Volumes, 2) @@ -91,6 +93,7 @@ func TestNewPodTemplateSpec(t *testing.T) { Image: "my-custom-image:1.0.0", Version: "7.1.0", }}, + keystore: nil, assertions: func(pod corev1.PodTemplateSpec) { assert.Equal(t, "my-custom-image:1.0.0", GetKibanaContainer(pod.Spec).Image) }, @@ -108,12 +111,14 @@ func TestNewPodTemplateSpec(t *testing.T) { }, }, }}, + keystore: nil, assertions: func(pod corev1.PodTemplateSpec) { assert.Len(t, pod.Spec.InitContainers, 1) }, }, { - name: "with user-provided labels", + name: "with user-provided labels", + keystore: nil, kb: v1alpha1.Kibana{ ObjectMeta: metav1.ObjectMeta{ Name: "kibana-name", @@ -191,7 +196,7 @@ func TestNewPodTemplateSpec(t *testing.T) { } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - got := NewPodTemplateSpec(tt.kb, tt.additionalVolumes, tt.initContainers) + got := NewPodTemplateSpec(tt.kb, tt.keystore) tt.assertions(got) }) } diff --git a/operators/pkg/controller/kibana/securesettings/securesettings.go b/operators/pkg/controller/kibana/securesettings/securesettings.go deleted file mode 100644 index 5ff2d980f0..0000000000 --- a/operators/pkg/controller/kibana/securesettings/securesettings.go +++ /dev/null @@ -1,40 +0,0 @@ -// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one -// or more contributor license agreements. Licensed under the Elastic License; -// you may not use this file except in compliance with the Elastic License. - -package securesettings - -import ( - "github.com/elastic/cloud-on-k8s/operators/pkg/apis/kibana/v1alpha1" - "github.com/elastic/cloud-on-k8s/operators/pkg/controller/common/watches" - "github.com/elastic/cloud-on-k8s/operators/pkg/utils/k8s" - corev1 "k8s.io/api/core/v1" - "k8s.io/client-go/tools/record" - logf "sigs.k8s.io/controller-runtime/pkg/runtime/log" -) - -var log = logf.Log.WithName("secure-settings") - -// Resources optionally returns a volume and init container to include in Kibana pods, -// in order to create a Keystore from secure settings referenced in the Kibana spec. -func Resources( - c k8s.Client, - recorder record.EventRecorder, - watches watches.DynamicWatches, - kb v1alpha1.Kibana, -) ([]corev1.Volume, []corev1.Container, string, error) { - // setup a volume from the user-provided secure settings secret - secretVolume, version, err := secureSettingsVolume(c, recorder, watches, kb) - if err != nil { - return nil, nil, "", err - } - if secretVolume == nil { - // nothing to do - return nil, nil, "", nil - } - - // build an init container to create Kibana keystore from the secure settings volume - initContainer := initContainer(*secretVolume) - - return []corev1.Volume{secretVolume.Volume()}, []corev1.Container{initContainer}, version, nil -} diff --git a/operators/pkg/controller/kibana/securesettings/securesettings_test.go b/operators/pkg/controller/kibana/securesettings/securesettings_test.go deleted file mode 100644 index a391d7d61e..0000000000 --- a/operators/pkg/controller/kibana/securesettings/securesettings_test.go +++ /dev/null @@ -1,101 +0,0 @@ -// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one -// or more contributor license agreements. Licensed under the Elastic License; -// you may not use this file except in compliance with the Elastic License. - -package securesettings - -import ( - "reflect" - "testing" - - commonv1alpha1 "github.com/elastic/cloud-on-k8s/operators/pkg/apis/common/v1alpha1" - "github.com/elastic/cloud-on-k8s/operators/pkg/apis/kibana/v1alpha1" - watches2 "github.com/elastic/cloud-on-k8s/operators/pkg/controller/common/watches" - "github.com/elastic/cloud-on-k8s/operators/pkg/utils/k8s" - "github.com/stretchr/testify/require" - corev1 "k8s.io/api/core/v1" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "k8s.io/client-go/kubernetes/scheme" - "k8s.io/client-go/tools/record" - "sigs.k8s.io/controller-runtime/pkg/client/fake" -) - -var ( - testSecureSettingsSecretName = "secure-settings-secret" - testSecureSettingsSecret = corev1.Secret{ - ObjectMeta: metav1.ObjectMeta{ - Namespace: "namespace", - Name: testSecureSettingsSecretName, - ResourceVersion: "resource-version", - }, - } - testSecureSettingsSecretRef = commonv1alpha1.SecretRef{ - SecretName: testSecureSettingsSecretName, - } - testKibana = v1alpha1.Kibana{ - ObjectMeta: metav1.ObjectMeta{ - Namespace: "namespace", - Name: "kibana", - }, - } - testKibanaWithSecureSettings = v1alpha1.Kibana{ - ObjectMeta: testKibana.ObjectMeta, - Spec: v1alpha1.KibanaSpec{ - SecureSettings: &testSecureSettingsSecretRef, - }, - } -) - -func TestResources(t *testing.T) { - tests := []struct { - name string - client k8s.Client - kb v1alpha1.Kibana - wantVolumes int - wantContainers int - wantVersion string - }{ - { - name: "no secure settings specified: no resources", - client: k8s.WrapClient(fake.NewFakeClient()), - kb: v1alpha1.Kibana{}, - wantVolumes: 0, - wantContainers: 0, - wantVersion: "", - }, - { - name: "secure settings specified: return volume, init container and version", - client: k8s.WrapClient(fake.NewFakeClient(&testSecureSettingsSecret)), - kb: testKibanaWithSecureSettings, - wantVolumes: 1, - wantContainers: 1, - wantVersion: testSecureSettingsSecret.ResourceVersion, - }, - { - name: "secure settings specified but secret not there: no resources", - client: k8s.WrapClient(fake.NewFakeClient()), - kb: testKibanaWithSecureSettings, - wantVolumes: 0, - wantContainers: 0, - wantVersion: "", - }, - } - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - recorder := record.NewFakeRecorder(1000) - watches := watches2.NewDynamicWatches() - require.NoError(t, watches.InjectScheme(scheme.Scheme)) - wantVolumes, wantContainers, wantVersion, err := Resources(tt.client, recorder, watches, tt.kb) - require.NoError(t, err) - if !reflect.DeepEqual(len(wantVolumes), tt.wantVolumes) { - t.Errorf("Resources() got = %v, want %v", wantVolumes, tt.wantVolumes) - } - if !reflect.DeepEqual(len(wantContainers), tt.wantContainers) { - t.Errorf("Resources() got1 = %v, want %v", wantContainers, tt.wantContainers) - } - if wantVersion != tt.wantVersion { - t.Errorf("Resources() got2 = %v, want %v", wantVersion, tt.wantVersion) - } - }) - } -} diff --git a/operators/pkg/controller/kibana/state.go b/operators/pkg/controller/kibana/state.go index 072628d0a1..f0d66fc722 100644 --- a/operators/pkg/controller/kibana/state.go +++ b/operators/pkg/controller/kibana/state.go @@ -36,3 +36,13 @@ func (s State) UpdateKibanaState(deployment v1.Deployment) { } } } + +// UpdateKibanaControllerVersion updates the Kibana status with the controller version that last updated the Kibana instance +func (s *State) UpdateKibanaControllerVersion(version string) { + s.Kibana.Status.ControllerVersion = version +} + +// GetKibanaControllerVersion returns the controller version that last updated the Kibana instance +func (s *State) GetKibanaControllerVersion() string { + return s.Kibana.Status.ControllerVersion +} diff --git a/operators/pkg/controller/kibana/volume/volumes.go b/operators/pkg/controller/kibana/volume/volumes.go index 9e21898588..ecfacc61a4 100644 --- a/operators/pkg/controller/kibana/volume/volumes.go +++ b/operators/pkg/controller/kibana/volume/volumes.go @@ -12,9 +12,6 @@ const ( DataVolumeName = "kibana-data" DataVolumeMountPath = "/usr/share/kibana/data" - SecureSettingsVolumeName = "elastic-internal-secure-settings" - SecureSettingsVolumeMountPath = "/mnt/elastic-internal/secure-settings" - HTTPCertificatesSecretVolumeName = "elastic-internal-http-certificates" HTTPCertificatesSecretVolumeMountPath = "/mnt/elastic-internal/http-certs" ) diff --git a/operators/pkg/controller/kibanaassociation/association_controller.go b/operators/pkg/controller/kibanaassociation/association_controller.go index bfab59820e..fd8771fdea 100644 --- a/operators/pkg/controller/kibanaassociation/association_controller.go +++ b/operators/pkg/controller/kibanaassociation/association_controller.go @@ -109,9 +109,9 @@ func (r *ReconcileAssociation) Reconcile(request reconcile.Request) (reconcile.R // atomically update the iteration to support concurrent runs. currentIteration := atomic.AddInt64(&r.iteration, 1) iterationStartTime := time.Now() - log.Info("Start reconcile iteration", "iteration", currentIteration, "request", request) + log.Info("Start reconcile iteration", "iteration", currentIteration, "namespace", request.Namespace, "kibana_name", request.Name) defer func() { - log.Info("End reconcile iteration", "iteration", currentIteration, "took", time.Since(iterationStartTime)) + log.Info("End reconcile iteration", "iteration", currentIteration, "took", time.Since(iterationStartTime), "namespace", request.Namespace, "kibana_name", request.Name) }() // retrieve Kibana resource @@ -134,6 +134,7 @@ func (r *ReconcileAssociation) Reconcile(request reconcile.Request) (reconcile.R ) if err != nil { if apierrors.IsConflict(err) { + // Conflicts are expected here and should be resolved on next loop log.V(1).Info("Conflict while handling finalizer") return reconcile.Result{Requeue: true}, nil } @@ -147,7 +148,7 @@ func (r *ReconcileAssociation) Reconcile(request reconcile.Request) (reconcile.R } if common.IsPaused(kibana.ObjectMeta) { - log.Info("Paused : skipping reconciliation", "iteration", currentIteration) + log.Info("Object is paused. Skipping reconciliation", "namespace", kibana.Namespace, "kibana_name", kibana.Name, "iteration", currentIteration) return common.PauseRequeue, nil } @@ -157,7 +158,8 @@ func (r *ReconcileAssociation) Reconcile(request reconcile.Request) (reconcile.R kibana.Status.AssociationStatus = newStatus if err := r.Status().Update(&kibana); err != nil { if apierrors.IsConflict(err) { - log.V(1).Info("Conflict while updating status") + // Conflicts are expected and will be resolved on next loop + log.V(1).Info("Conflict while updating status", "namespace", kibana.Namespace, "kibana_name", kibana.Name) return reconcile.Result{Requeue: true}, nil } @@ -183,7 +185,7 @@ func (r *ReconcileAssociation) reconcileInternal(kibana kbtype.Kibana) (commonv1 // garbage collect leftover resources that are not required anymore if err := deleteOrphanedResources(r, kibana); err != nil { - log.Error(err, "Error while trying to delete orphaned resources. Continuing.") + log.Error(err, "Error while trying to delete orphaned resources. Continuing.", "namespace", kibana.Namespace, "kibana_name", kibana.Name) } if kibana.Spec.ElasticsearchRef.Name == "" { @@ -230,7 +232,7 @@ func (r *ReconcileAssociation) reconcileInternal(kibana kbtype.Kibana) (commonv1 // remove connection details if they are set if (kibana.Spec.Elasticsearch != kbtype.BackendElasticsearch{}) { kibana.Spec.Elasticsearch = kbtype.BackendElasticsearch{} - log.Info("Removing Elasticsearch configuration from managed association", "kibana", kibana.Name) + log.Info("Removing Elasticsearch configuration from managed association", "namespace", kibana.Namespace, "kibana_name", kibana.Name) if err := r.Update(&kibana); err != nil { return commonv1alpha1.AssociationPending, err } @@ -257,7 +259,7 @@ func (r *ReconcileAssociation) reconcileInternal(kibana kbtype.Kibana) (commonv1 if !reflect.DeepEqual(kibana.Spec.Elasticsearch, expectedEsConfig) { kibana.Spec.Elasticsearch = expectedEsConfig - log.Info("Updating Kibana spec with Elasticsearch backend configuration") + log.Info("Updating Kibana spec with Elasticsearch backend configuration", "namespace", kibana.Namespace, "kibana_name", kibana.Name) if err := r.Update(&kibana); err != nil { return commonv1alpha1.AssociationPending, err } @@ -288,14 +290,14 @@ func deleteOrphanedResources(c k8s.Client, kibana kbtype.Kibana) error { if !kibana.Spec.ElasticsearchRef.IsDefined() { // look for association secrets owned by this kibana instance // which should not exist since no ES referenced in the spec - log.Info("Deleting", "secret", k8s.ExtractNamespacedName(&s)) + log.Info("Deleting secret", "namespace", s.Namespace, "secret_name", s.Name, "kibana_name", kibana.Name) if err := c.Delete(&s); err != nil && !apierrors.IsNotFound(err) { return err } } else if value, ok := s.Labels[common.TypeLabelName]; ok && value == user.UserType && esRefNamespace != s.Namespace { // User secret may live in an other namespace, check if it has changed - log.Info("Deleting", "secret", k8s.ExtractNamespacedName(&s)) + log.Info("Deleting secret", "namespace", s.Namespace, "secretname", s.Name, "kibana_name", kibana.Name) if err := c.Delete(&s); err != nil && !apierrors.IsNotFound(err) { return err } diff --git a/operators/pkg/controller/license/license_controller.go b/operators/pkg/controller/license/license_controller.go index b1d28369bb..096a06e342 100644 --- a/operators/pkg/controller/license/license_controller.go +++ b/operators/pkg/controller/license/license_controller.go @@ -51,16 +51,16 @@ func (r *ReconcileLicenses) Reconcile(request reconcile.Request) (reconcile.Resu // atomically update the iteration to support concurrent runs. currentIteration := atomic.AddInt64(&r.iteration, 1) iterationStartTime := time.Now() - log.Info("Start reconcile iteration", "iteration", currentIteration, "request", request) + log.Info("Start reconcile iteration", "iteration", currentIteration, "namespace", request.Namespace, "es_name", request.Name) defer func() { - log.Info("End reconcile iteration", "iteration", currentIteration, "took", time.Since(iterationStartTime)) + log.Info("End reconcile iteration", "iteration", currentIteration, "took", time.Since(iterationStartTime), "namespace", request.Namespace, "es_name", request.Name) }() result, err := r.reconcileInternal(request) if result.Requeue { - log.Info("Re-queuing new license check immediately (rate-limited)", "cluster", request.NamespacedName) + log.Info("Re-queuing new license check immediately (rate-limited)", "namespace", request.Namespace, "es_name", request.Name) } if result.RequeueAfter > 0 { - log.Info("Re-queuing new license check", "cluster", request.NamespacedName, "RequeueAfter", result.RequeueAfter) + log.Info("Re-queuing new license check", "namespace", request.Namespace, "es_name", request.Name, "RequeueAfter", result.RequeueAfter) } return result, err } diff --git a/operators/pkg/controller/license/trial/trial_controller.go b/operators/pkg/controller/license/trial/trial_controller.go index 5eaa8b067c..854949af5f 100644 --- a/operators/pkg/controller/license/trial/trial_controller.go +++ b/operators/pkg/controller/license/trial/trial_controller.go @@ -56,9 +56,9 @@ func (r *ReconcileTrials) Reconcile(request reconcile.Request) (reconcile.Result // atomically update the iteration to support concurrent runs. currentIteration := atomic.AddInt64(&r.iteration, 1) iterationStartTime := time.Now() - log.Info("Start reconcile iteration", "iteration", currentIteration, "request", request) + log.Info("Start reconcile iteration", "iteration", currentIteration, "namespace", request.Namespace, "secret_name", request.Name) defer func() { - log.Info("End reconcile iteration", "iteration", currentIteration, "took", time.Since(iterationStartTime)) + log.Info("End reconcile iteration", "iteration", currentIteration, "took", time.Since(iterationStartTime), "namespace", request.Namespace, "secret_name", request.Name) }() secret, license, err := licensing.TrialLicense(r, request.NamespacedName) diff --git a/operators/pkg/dev/portforward/pod_forwarder.go b/operators/pkg/dev/portforward/pod_forwarder.go index 26d8d0f2fa..71436e3986 100644 --- a/operators/pkg/dev/portforward/pod_forwarder.go +++ b/operators/pkg/dev/portforward/pod_forwarder.go @@ -201,7 +201,7 @@ func (f *podForwarder) Run(ctx context.Context) error { defer runCtxCancel() if f.clientset != nil { - log.Info("Watching pod for changes", "pod", f.podNSN) + log.Info("Watching pod for changes", "namespace", f.podNSN.Namespace, "pod_name", f.podNSN.Name) w, err := f.clientset.CoreV1().Pods(f.podNSN.Namespace).Watch(metav1.ListOptions{ FieldSelector: fields.OneTermEqualSelector("metadata.name", f.podNSN.Name).String(), }) @@ -217,7 +217,8 @@ func (f *podForwarder) Run(ctx context.Context) error { if evt.Type == watch.Deleted || evt.Type == watch.Error || evt.Type == "" { log.Info( "Pod is deleted or watch failed/closed, closing pod forwarder", - "pod", f.podNSN, + "namespace", f.podNSN.Namespace, + "pod_name", f.podNSN.Name, ) runCtxCancel() return diff --git a/operators/test/e2e/apm/configuration_test.go b/operators/test/e2e/apm/configuration_test.go new file mode 100644 index 0000000000..8e7c4e021b --- /dev/null +++ b/operators/test/e2e/apm/configuration_test.go @@ -0,0 +1,210 @@ +// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +// or more contributor license agreements. Licensed under the Elastic License; +// you may not use this file except in compliance with the Elastic License. + +package apm + +import ( + "fmt" + "testing" + + apmtype "github.com/elastic/cloud-on-k8s/operators/pkg/apis/apm/v1alpha1" + "github.com/elastic/cloud-on-k8s/operators/pkg/apis/common/v1alpha1" + commonv1alpha1 "github.com/elastic/cloud-on-k8s/operators/pkg/apis/common/v1alpha1" + "github.com/elastic/cloud-on-k8s/operators/pkg/utils/k8s" + "github.com/elastic/cloud-on-k8s/operators/test/e2e/test" + "github.com/elastic/cloud-on-k8s/operators/test/e2e/test/apmserver" + "github.com/elastic/cloud-on-k8s/operators/test/e2e/test/elasticsearch" + "github.com/pkg/errors" + "github.com/stretchr/testify/require" + yaml "gopkg.in/yaml.v2" + corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/types" +) + +const ( + APMKeystoreBin = "/usr/share/apm-server/apm-server" + APMKeystoreOption = "keystore" +) + +var APMKeystoreCmd = []string{APMKeystoreBin, APMKeystoreOption} + +type PartialApmConfiguration struct { + Output struct { + Elasticsearch struct { + CompressionLevel int `yaml:"compression_level"` + } `yaml:"elasticsearch"` + } `yaml:"output"` +} + +func TestUpdateConfiguration(t *testing.T) { + + // user-provided secure settings secret + secureSettingsSecretName := "secure-settings-secret" + secureSettings := corev1.Secret{ + ObjectMeta: metav1.ObjectMeta{ + Name: secureSettingsSecretName, + Namespace: test.Namespace, + }, + Data: map[string][]byte{ + "logging.verbose": []byte("true"), + }, + } + + name := "test-apm-configuration" + esBuilder := elasticsearch.NewBuilder(name). + WithESMasterDataNodes(1, elasticsearch.DefaultResources) + apmBuilder := apmserver.NewBuilder(name). + WithNamespace(test.Namespace). + WithVersion(test.ElasticStackVersion). + WithRestrictedSecurityContext() + + var previousPodUID *types.UID + + initStepsFn := func(k *test.K8sClient) test.StepList { + return test.StepList{ + { + Name: "Create secure settings secret", + Test: func(t *testing.T) { + // remove if already exists (ignoring errors) + _ = k.Client.Delete(&secureSettings) + // and create a fresh one + err := k.Client.Create(&secureSettings) + require.NoError(t, err) + }, + }, + // Keystore should be empty + test.CheckKeystoreEntries(k, test.ApmServerPodListOptions(name), APMKeystoreCmd, nil), + } + } + apmNamespacedName := types.NamespacedName{ + Name: name, + Namespace: test.Namespace, + } + + stepsFn := func(k *test.K8sClient) test.StepList { + return test.StepList{ + { + Name: "Check the value of a parameter in the configuration", + Test: func(t *testing.T) { + config, err := partialAPMConfiguration(k, name) + require.NoError(t, err) + require.Equal(t, config.Output.Elasticsearch.CompressionLevel, 5) // 5 is the expected default value + }, + }, + test.Step{ + Name: "Add a Keystore to the APM server", + Test: func(t *testing.T) { + // get current pod id + pods, err := k.GetPods(test.ApmServerPodListOptions(name)) + require.NoError(t, err) + require.True(t, len(pods) == 1) + previousPodUID = &pods[0].UID + + var apm apmtype.ApmServer + require.NoError(t, k.Client.Get(apmNamespacedName, &apm)) + apm.Spec.SecureSettings = &v1alpha1.SecretRef{ + SecretName: secureSettingsSecretName, + } + require.NoError(t, k.Client.Update(&apm)) + }, + }, + test.Step{ + Name: "APM Pod should be recreated", + Test: test.Eventually(func() error { + // get current pod id + pods, err := k.GetPods(test.ApmServerPodListOptions(name)) + if err != nil { + return err + } + if len(pods) != 1 { + return fmt.Errorf("1 APM pod expected, got %d", len(pods)) + } + if pods[0].UID == *previousPodUID { + return fmt.Errorf("APM pod is still the same, uid: %s", pods[0].UID) + } + return nil + }), + }, + + test.CheckKeystoreEntries(k, test.ApmServerPodListOptions(name), APMKeystoreCmd, []string{"logging.verbose"}), + + test.Step{ + Name: "Customize configuration of the APM server", + Test: func(t *testing.T) { + // get current pod id + pods, err := k.GetPods(test.ApmServerPodListOptions(name)) + require.NoError(t, err) + require.True(t, len(pods) == 1) + previousPodUID = &pods[0].UID + + var apm apmtype.ApmServer + require.NoError(t, k.Client.Get(apmNamespacedName, &apm)) + customConfig := commonv1alpha1.Config{ + Data: map[string]interface{}{"output.elasticsearch.compression_level": 1}, + } + apm.Spec.Config = &customConfig + require.NoError(t, k.Client.Update(&apm)) + }, + }, + test.Step{ + Name: "APM Pod should be recreated", + Test: test.Eventually(func() error { + // get current pod id + pods, err := k.GetPods(test.ApmServerPodListOptions(name)) + if err != nil { + return err + } + if len(pods) != 1 { + return fmt.Errorf("1 APM pod expected, got %d", len(pods)) + } + if pods[0].UID == *previousPodUID { + return fmt.Errorf("APM pod is still the same, uid: %s", pods[0].UID) + } + return nil + }), + }, + + test.Step{ + Name: "Check the value of a parameter in the configuration", + Test: func(t *testing.T) { + config, err := partialAPMConfiguration(k, name) + require.NoError(t, err) + require.Equal(t, config.Output.Elasticsearch.CompressionLevel, 1) // value should be updated to 1 + }, + }, + + // cleanup extra resources + test.Step{ + Name: "Delete secure settings secret", + Test: func(t *testing.T) { + err := k.Client.Delete(&secureSettings) + require.NoError(t, err) + }, + }, + } + } + + test.Sequence(initStepsFn, stepsFn, esBuilder, apmBuilder).RunSequential(t) + +} + +func partialAPMConfiguration(k *test.K8sClient, name string) (PartialApmConfiguration, error) { + var config PartialApmConfiguration + // get current pod id + pods, err := k.GetPods(test.ApmServerPodListOptions(name)) + if err != nil { + return config, err + } + // exec into the pod to list keystore entries + stdout, stderr, err := k.Exec(k8s.ExtractNamespacedName(&pods[0]), []string{"cat", "/usr/share/apm-server/config/config-secret/apm-server.yml"}) + if err != nil { + return config, errors.Wrap(err, fmt.Sprintf("stdout:\n%s\nstderr:\n%s", stdout, stderr)) + } + err = yaml.Unmarshal([]byte(stdout), &config) + if err != nil { + return config, err + } + return config, nil +} diff --git a/operators/test/e2e/kb/keystore_test.go b/operators/test/e2e/kb/keystore_test.go index df0d054c75..27096bb84e 100644 --- a/operators/test/e2e/kb/keystore_test.go +++ b/operators/test/e2e/kb/keystore_test.go @@ -17,6 +17,12 @@ import ( metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" ) +const ( + KibanaKeystoreBin = "/usr/share/kibana/bin/kibana-keystore" +) + +var KibanaKeystoreCmd = []string{KibanaKeystoreBin} + func TestUpdateKibanaSecureSettings(t *testing.T) { // user-provided secure settings secret secureSettingsSecretName := "secure-settings-secret" @@ -55,7 +61,7 @@ func TestUpdateKibanaSecureSettings(t *testing.T) { } stepsFn := func(k *test.K8sClient) test.StepList { return test.StepList{ - kibana.CheckKibanaKeystoreEntries(k, kbBuilder.Kibana, []string{"logging.verbose"}), + test.CheckKeystoreEntries(k, test.KibanaPodListOptions(name), KibanaKeystoreCmd, []string{"logging.verbose"}), // modify the secure settings secret test.Step{ Name: "Modify secure settings secret", @@ -71,7 +77,7 @@ func TestUpdateKibanaSecureSettings(t *testing.T) { }, // keystore should be updated accordingly - kibana.CheckKibanaKeystoreEntries(k, kbBuilder.Kibana, []string{"logging.json", "logging.verbose"}), + test.CheckKeystoreEntries(k, test.KibanaPodListOptions(name), KibanaKeystoreCmd, []string{"logging.json", "logging.verbose"}), // remove the secure settings reference test.Step{ @@ -89,7 +95,7 @@ func TestUpdateKibanaSecureSettings(t *testing.T) { }, // keystore should be updated accordingly - kibana.CheckKibanaKeystoreEntries(k, kbBuilder.Kibana, nil), + test.CheckKeystoreEntries(k, test.KibanaPodListOptions(name), KibanaKeystoreCmd, nil), // cleanup extra resources test.Step{ diff --git a/operators/test/e2e/test/apmserver/builder.go b/operators/test/e2e/test/apmserver/builder.go index 072697744f..5594d98413 100644 --- a/operators/test/e2e/test/apmserver/builder.go +++ b/operators/test/e2e/test/apmserver/builder.go @@ -7,7 +7,10 @@ package apmserver import ( apmtype "github.com/elastic/cloud-on-k8s/operators/pkg/apis/apm/v1alpha1" common "github.com/elastic/cloud-on-k8s/operators/pkg/apis/common/v1alpha1" + commonv1alpha1 "github.com/elastic/cloud-on-k8s/operators/pkg/apis/common/v1alpha1" "github.com/elastic/cloud-on-k8s/operators/test/e2e/test" + corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime" ) @@ -16,6 +19,35 @@ type Builder struct { ApmServer apmtype.ApmServer } +func NewBuilder(name string) Builder { + meta := metav1.ObjectMeta{ + Name: name, + Namespace: test.Namespace, + } + return Builder{ + ApmServer: apmtype.ApmServer{ + ObjectMeta: meta, + Spec: apmtype.ApmServerSpec{ + NodeCount: 1, + Version: test.ElasticStackVersion, + Output: apmtype.Output{ + Elasticsearch: apmtype.ElasticsearchOutput{ + ElasticsearchRef: &commonv1alpha1.ObjectSelector{ + Name: name, + Namespace: test.Namespace, + }, + }, + }, + PodTemplate: corev1.PodTemplateSpec{ + Spec: corev1.PodSpec{ + SecurityContext: test.DefaultSecurityContext(), + }, + }, + }, + }, + } +} + func (b Builder) WithRestrictedSecurityContext() Builder { b.ApmServer.Spec.PodTemplate.Spec.SecurityContext = test.DefaultSecurityContext() return b diff --git a/operators/test/e2e/test/apmserver/checks_k8s.go b/operators/test/e2e/test/apmserver/checks_k8s.go index d26c97f8a4..5b60eb0faa 100644 --- a/operators/test/e2e/test/apmserver/checks_k8s.go +++ b/operators/test/e2e/test/apmserver/checks_k8s.go @@ -83,7 +83,7 @@ func CheckServices(b Builder, k *test.K8sClient) test.Step { Name: "ApmServer services should be created", Test: test.Eventually(func() error { for _, s := range []string{ - b.ApmServer.Name + "-apm-server", + b.ApmServer.Name + "-apm-http", } { if _, err := k.GetService(s); err != nil { return err @@ -100,7 +100,7 @@ func CheckServicesEndpoints(b Builder, k *test.K8sClient) test.Step { Name: "ApmServer services should have endpoints", Test: test.Eventually(func() error { for endpointName, addrCount := range map[string]int{ - b.ApmServer.Name + "-apm-server": int(b.ApmServer.Spec.NodeCount), + b.ApmServer.Name + "-apm-http": int(b.ApmServer.Spec.NodeCount), } { endpoints, err := k.GetEndpoints(endpointName) if err != nil { diff --git a/operators/test/e2e/test/elasticsearch/steps_init.go b/operators/test/e2e/test/elasticsearch/steps_init.go index f2cf9785b9..a7afcd6731 100644 --- a/operators/test/e2e/test/elasticsearch/steps_init.go +++ b/operators/test/e2e/test/elasticsearch/steps_init.go @@ -67,7 +67,7 @@ func (b Builder) InitTestSteps(k *test.K8sClient) test.StepList { return err } if err == nil { - return fmt.Errorf("elasticsearch %s is still there", b.Elasticsearch.Name) + return fmt.Errorf("elasticsearch %s is still there", k8s.ExtractNamespacedName(&b.Elasticsearch)) } return nil })(t) diff --git a/operators/test/e2e/test/k8s_client.go b/operators/test/e2e/test/k8s_client.go index 953728c1f9..c1c1d7d69f 100644 --- a/operators/test/e2e/test/k8s_client.go +++ b/operators/test/e2e/test/k8s_client.go @@ -14,7 +14,7 @@ import ( assoctype "github.com/elastic/cloud-on-k8s/operators/pkg/apis/associations/v1alpha1" estype "github.com/elastic/cloud-on-k8s/operators/pkg/apis/elasticsearch/v1alpha1" kbtype "github.com/elastic/cloud-on-k8s/operators/pkg/apis/kibana/v1alpha1" - "github.com/elastic/cloud-on-k8s/operators/pkg/controller/apmserver" + apmlabels "github.com/elastic/cloud-on-k8s/operators/pkg/controller/apmserver/labels" "github.com/elastic/cloud-on-k8s/operators/pkg/controller/common" "github.com/elastic/cloud-on-k8s/operators/pkg/controller/common/certificates" "github.com/elastic/cloud-on-k8s/operators/pkg/controller/common/certificates/http" @@ -324,8 +324,8 @@ func ApmServerPodListOptions(apmName string) client.ListOptions { return client.ListOptions{ Namespace: Namespace, LabelSelector: labels.SelectorFromSet(labels.Set(map[string]string{ - common.TypeLabelName: apmserver.Type, - apmserver.ApmServerNameLabelName: apmName, + common.TypeLabelName: apmlabels.Type, + apmlabels.ApmServerNameLabelName: apmName, }))} } diff --git a/operators/test/e2e/test/kibana/checks_keystore.go b/operators/test/e2e/test/kibana/checks_keystore.go deleted file mode 100644 index b2be9b29a1..0000000000 --- a/operators/test/e2e/test/kibana/checks_keystore.go +++ /dev/null @@ -1,54 +0,0 @@ -// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one -// or more contributor license agreements. Licensed under the Elastic License; -// you may not use this file except in compliance with the Elastic License. - -package kibana - -import ( - "fmt" - "reflect" - "strings" - - kbtype "github.com/elastic/cloud-on-k8s/operators/pkg/apis/kibana/v1alpha1" - "github.com/elastic/cloud-on-k8s/operators/pkg/utils/k8s" - "github.com/elastic/cloud-on-k8s/operators/test/e2e/test" - "github.com/pkg/errors" - corev1 "k8s.io/api/core/v1" -) - -const ( - keystoreBin = "/usr/share/kibana/bin/kibana-keystore" -) - -func CheckKibanaKeystoreEntries(k *test.K8sClient, kb kbtype.Kibana, expectedKeys []string) test.Step { - return test.Step{ - Name: "Kibana secure settings should eventually be set in all nodes keystore", - Test: test.Eventually(func() error { - pods, err := k.GetPods(test.KibanaPodListOptions(kb.Name)) - if err != nil { - return err - } - return test.OnAllPods(pods, func(p corev1.Pod) error { - // exec into the pod to list keystore entries - stdout, stderr, err := k.Exec(k8s.ExtractNamespacedName(&p), []string{keystoreBin, "list"}) - if err != nil { - return errors.Wrap(err, fmt.Sprintf("stdout:\n%s\nstderr:\n%s", stdout, stderr)) - } - - // parse entries from stdout - var entries []string - // remove trailing newlines and whitespaces - trimmed := strings.TrimSpace(stdout) - // split by lines, unless no output - if trimmed != "" { - entries = strings.Split(trimmed, "\n") - } - - if !reflect.DeepEqual(expectedKeys, entries) { - return fmt.Errorf("invalid keystore entries. Expected: %s. Actual: %s", expectedKeys, entries) - } - return nil - }) - }), - } -} diff --git a/operators/test/e2e/test/params.go b/operators/test/e2e/test/params.go index 6b2bfb79da..2a4b02a124 100644 --- a/operators/test/e2e/test/params.go +++ b/operators/test/e2e/test/params.go @@ -33,5 +33,5 @@ func init() { flag.Parse() logf.SetLogger(logf.ZapLogger(true)) - log.Info("Info", "version", ElasticStackVersion, "ns", Namespace) + log.Info("Info", "version", ElasticStackVersion, "namespace", Namespace) } diff --git a/operators/test/e2e/test/utils.go b/operators/test/e2e/test/utils.go index bbf66cbc20..5b020fcfe9 100644 --- a/operators/test/e2e/test/utils.go +++ b/operators/test/e2e/test/utils.go @@ -7,11 +7,17 @@ package test import ( "fmt" "os" + "reflect" + "strings" "testing" "time" + "github.com/elastic/cloud-on-k8s/operators/pkg/utils/k8s" "github.com/elastic/cloud-on-k8s/operators/pkg/utils/retry" + "github.com/pkg/errors" "github.com/stretchr/testify/require" + corev1 "k8s.io/api/core/v1" + "sigs.k8s.io/controller-runtime/pkg/client" ) const ( @@ -19,6 +25,39 @@ const ( defaultTimeout = 5 * time.Minute ) +func CheckKeystoreEntries(k *K8sClient, listOption client.ListOptions, KeystoreCmd []string, expectedKeys []string) Step { + return Step{ + Name: "secure settings should eventually be set in all nodes keystore", + Test: Eventually(func() error { + pods, err := k.GetPods(listOption) + if err != nil { + return err + } + return OnAllPods(pods, func(p corev1.Pod) error { + // exec into the pod to list keystore entries + stdout, stderr, err := k.Exec(k8s.ExtractNamespacedName(&p), append(KeystoreCmd, "list")) + if err != nil { + return errors.Wrap(err, fmt.Sprintf("stdout:\n%s\nstderr:\n%s", stdout, stderr)) + } + + // parse entries from stdout + var entries []string + // remove trailing newlines and whitespaces + trimmed := strings.TrimSpace(stdout) + // split by lines, unless no output + if trimmed != "" { + entries = strings.Split(trimmed, "\n") + } + + if !reflect.DeepEqual(expectedKeys, entries) { + return fmt.Errorf("invalid keystore entries. Expected: %s. Actual: %s", expectedKeys, entries) + } + return nil + }) + }), + } +} + // ExitOnErr exits with code 1 if the given error is not nil func ExitOnErr(err error) { if err != nil { From bb7e511de6578b9f813612f226e24d9c55d10901 Mon Sep 17 00:00:00 2001 From: sebgl Date: Mon, 15 Jul 2019 10:13:48 +0200 Subject: [PATCH 05/31] Add sset upgrade expectations based on Generation --- .../elasticsearch/driver/default.go | 2 ++ .../controller/elasticsearch/driver/driver.go | 7 ++-- .../elasticsearch/driver/generation.go | 34 +++++++++++++++++++ 3 files changed, 40 insertions(+), 3 deletions(-) create mode 100644 operators/pkg/controller/elasticsearch/driver/generation.go diff --git a/operators/pkg/controller/elasticsearch/driver/default.go b/operators/pkg/controller/elasticsearch/driver/default.go index 0557e25725..56dd6cf279 100644 --- a/operators/pkg/controller/elasticsearch/driver/default.go +++ b/operators/pkg/controller/elasticsearch/driver/default.go @@ -51,6 +51,8 @@ type defaultDriver struct { // Options are the options that the driver was created with. Options + expectations *Expectations + // supportedVersions verifies whether we can support upgrading from the current pods. supportedVersions esversion.LowestHighestSupportedVersions diff --git a/operators/pkg/controller/elasticsearch/driver/driver.go b/operators/pkg/controller/elasticsearch/driver/driver.go index 7056a84616..a3e4703e9b 100644 --- a/operators/pkg/controller/elasticsearch/driver/driver.go +++ b/operators/pkg/controller/elasticsearch/driver/driver.go @@ -52,9 +52,9 @@ type Options struct { Observers *observer.Manager // DynamicWatches are handles to currently registered dynamic watches. DynamicWatches watches.DynamicWatches - // PodsExpectations control ongoing pod creations and deletions - // that might not be in-sync yet with our k8s client cache - PodsExpectations *reconciler.Expectations + // Expectations control some expectations set on resources in the cache, in order to + // avoid doing certain operations if the cache hasn't seen an up-to-date resource yet. + Expectations *Expectations } // NewDriver returns a Driver that can operate the provided version @@ -66,6 +66,7 @@ func NewDriver(opts Options) (Driver, error) { driver := &defaultDriver{ Options: opts, + expectations: NewGenerationExpectations(), observedStateResolver: opts.Observers.ObservedStateResolver, resourcesStateResolver: esreconcile.NewResourcesStateFromAPI, usersReconciler: user.ReconcileUsers, diff --git a/operators/pkg/controller/elasticsearch/driver/generation.go b/operators/pkg/controller/elasticsearch/driver/generation.go new file mode 100644 index 0000000000..5bedbb063e --- /dev/null +++ b/operators/pkg/controller/elasticsearch/driver/generation.go @@ -0,0 +1,34 @@ +// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +// or more contributor license agreements. Licensed under the Elastic License; +// you may not use this file except in compliance with the Elastic License. + +package driver + +import ( + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/types" +) + +// TODO: garbage collect/finalize deprecated UIDs +type Expectations struct { + generations map[types.UID]int64 +} + +func NewGenerationExpectations() *Expectations { + return &Expectations{ + generations: make(map[types.UID]int64), + } +} + +func (e *Expectations) ExpectGeneration(meta metav1.ObjectMeta) { + e.generations[meta.UID] = meta.Generation +} + +func (e *Expectations) GenerationExpected(metaObjs ...metav1.ObjectMeta) bool { + for _, meta := range metaObjs { + if expectedGen, exists := e.generations[meta.UID]; exists && meta.Generation < expectedGen { + return false + } + } + return true +} From eda6051bf81c77620c122df4688d0e2c406a6750 Mon Sep 17 00:00:00 2001 From: sebgl Date: Mon, 15 Jul 2019 10:15:02 +0200 Subject: [PATCH 06/31] Improve es client routing allocation settings calls --- .../controller/elasticsearch/client/client.go | 6 +- .../controller/elasticsearch/client/model.go | 35 +++++++++++- .../elasticsearch/client/model_test.go | 11 ++++ .../pkg/controller/elasticsearch/client/v6.go | 55 ++++++++++++++++--- .../elasticsearch/restart/elasticsearch.go | 2 +- 5 files changed, 94 insertions(+), 15 deletions(-) diff --git a/operators/pkg/controller/elasticsearch/client/client.go b/operators/pkg/controller/elasticsearch/client/client.go index e7e2a79218..3f629783dd 100644 --- a/operators/pkg/controller/elasticsearch/client/client.go +++ b/operators/pkg/controller/elasticsearch/client/client.go @@ -69,13 +69,15 @@ type Client interface { GetClusterInfo(ctx context.Context) (Info, error) // GetClusterState returns the current cluster state GetClusterState(ctx context.Context) (ClusterState, error) + // GetClusterRoutingAllocation retrieves the cluster routing allocation settings. + GetClusterRoutingAllocation(ctx context.Context) (ClusterRoutingAllocation, error) // UpdateSettings updates the settings of a cluster. UpdateSettings(ctx context.Context, settings Settings) error // ExcludeFromShardAllocation takes a comma-separated string of node names and // configures transient allocation excludes for the given nodes. ExcludeFromShardAllocation(ctx context.Context, nodes string) error - // DisableShardAllocation disables shards allocation on the cluster. - DisableShardAllocation(ctx context.Context) error + // DisableReplicaShardsAllocation disables shards allocation on the cluster (only primaries are allocated). + DisableReplicaShardsAllocation(ctx context.Context) error // EnableShardAllocation enables shards allocation on the cluster. EnableShardAllocation(ctx context.Context) error // SyncedFlush requests a synced flush on the cluster. diff --git a/operators/pkg/controller/elasticsearch/client/model.go b/operators/pkg/controller/elasticsearch/client/model.go index d967abdc32..1fadb6eb3d 100644 --- a/operators/pkg/controller/elasticsearch/client/model.go +++ b/operators/pkg/controller/elasticsearch/client/model.go @@ -52,6 +52,14 @@ type Nodes struct { Nodes map[string]Node `json:"nodes"` } +func (n Nodes) Names() []string { + names := make([]string, 0, len(n.Nodes)) + for _, node := range n.Nodes { + names = append(names, node.Name) + } + return names +} + // Node partially models an Elasticsearch node retrieved from /_nodes type Node struct { Name string `json:"name"` @@ -187,13 +195,34 @@ func (s Shard) Key() string { // AllocationSettings model a subset of the supported attributes for dynamic Elasticsearch cluster settings. type AllocationSettings struct { - ExcludeName string `json:"cluster.routing.allocation.exclude._name"` - Enable string `json:"cluster.routing.allocation.enable"` + Cluster ClusterRoutingSettings `json:"cluster,omitempty"` } // TODO awareness settings +type ClusterRoutingSettings struct { + Routing RoutingSettings `json:"routing,omitempty"` +} + +type RoutingSettings struct { + Allocation RoutingAllocationSettings `json:"allocation,omitempty"` +} + +type RoutingAllocationSettings struct { + Exclude AllocationExclude `json:"exclude,omitempty"` + Enable string `json:"enable,omitempty"` +} + +type AllocationExclude struct { + Name string `json:"_name,omitempty"` +} + +func (s AllocationSettings) IsShardsAllocationEnabled() bool { + enable := s.Cluster.Routing.Allocation.Enable + return enable == "" || enable == "all" +} + // ClusterRoutingAllocation models a subset of transient allocation settings for an Elasticsearch cluster. type ClusterRoutingAllocation struct { - Transient AllocationSettings `json:"transient"` + Transient AllocationSettings `json:"transient,omitempty"` } // DiscoveryZen set minimum number of master eligible nodes that must be visible to form a cluster. diff --git a/operators/pkg/controller/elasticsearch/client/model_test.go b/operators/pkg/controller/elasticsearch/client/model_test.go index 27173e9d1b..f183323a6d 100644 --- a/operators/pkg/controller/elasticsearch/client/model_test.go +++ b/operators/pkg/controller/elasticsearch/client/model_test.go @@ -9,6 +9,7 @@ import ( "testing" "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" ) func TestModel_RemoteCluster(t *testing.T) { @@ -56,3 +57,13 @@ func TestModel_RemoteCluster(t *testing.T) { }) } } + +func TestClusterRoutingAllocation(t *testing.T) { + clusterSettingsSample := `{"persistent":{},"transient":{"cluster":{"routing":{"allocation":{"enable":"none","exclude":{"_name":"excluded"}}}}}}` + expected := ClusterRoutingAllocation{Transient: AllocationSettings{Cluster: ClusterRoutingSettings{Routing: RoutingSettings{Allocation: RoutingAllocationSettings{Enable: "none", Exclude: AllocationExclude{Name: "excluded"}}}}}} + + var settings ClusterRoutingAllocation + require.NoError(t, json.Unmarshal([]byte(clusterSettingsSample), &settings)) + require.Equal(t, expected, settings) + require.Equal(t, false, settings.Transient.IsShardsAllocationEnabled()) +} diff --git a/operators/pkg/controller/elasticsearch/client/v6.go b/operators/pkg/controller/elasticsearch/client/v6.go index 4ba5baec06..6b740dd35e 100644 --- a/operators/pkg/controller/elasticsearch/client/v6.go +++ b/operators/pkg/controller/elasticsearch/client/v6.go @@ -19,6 +19,11 @@ func (c *clientV6) GetClusterInfo(ctx context.Context) (Info, error) { return info, c.get(ctx, "/", &info) } +func (c *clientV6) GetClusterRoutingAllocation(ctx context.Context) (ClusterRoutingAllocation, error) { + var settings ClusterRoutingAllocation + return settings, c.get(ctx, "/_cluster/settings", &settings) +} + func (c *clientV6) GetClusterState(ctx context.Context) (ClusterState, error) { var clusterState ClusterState return clusterState, c.get(ctx, "/_cluster/state/dispatcher,master_node,nodes,routing_table", &clusterState) @@ -29,18 +34,50 @@ func (c *clientV6) UpdateSettings(ctx context.Context, settings Settings) error } func (c *clientV6) ExcludeFromShardAllocation(ctx context.Context, nodes string) error { - allocationSetting := ClusterRoutingAllocation{AllocationSettings{ExcludeName: nodes, Enable: "all"}} - return c.put(ctx, "/_cluster/settings", allocationSetting, nil) + allocationSettings := ClusterRoutingAllocation{ + Transient: AllocationSettings{ + Cluster: ClusterRoutingSettings{ + Routing: RoutingSettings{ + Allocation: RoutingAllocationSettings{ + Exclude: AllocationExclude{ + Name: nodes, + }, + }, + }, + }, + }, + } + return c.put(ctx, "/_cluster/settings", allocationSettings, nil) } func (c *clientV6) EnableShardAllocation(ctx context.Context) error { - allocationSetting := ClusterRoutingAllocation{AllocationSettings{Enable: "all"}} - return c.put(ctx, "/_cluster/settings", allocationSetting, nil) -} - -func (c *clientV6) DisableShardAllocation(ctx context.Context) error { - allocationSetting := ClusterRoutingAllocation{AllocationSettings{Enable: "none"}} - return c.put(ctx, "/_cluster/settings", allocationSetting, nil) + allocationSettings := ClusterRoutingAllocation{ + Transient: AllocationSettings{ + Cluster: ClusterRoutingSettings{ + Routing: RoutingSettings{ + Allocation: RoutingAllocationSettings{ + Enable: "all", + }, + }, + }, + }, + } + return c.put(ctx, "/_cluster/settings", allocationSettings, nil) +} + +func (c *clientV6) DisableReplicaShardsAllocation(ctx context.Context) error { + allocationSettings := ClusterRoutingAllocation{ + Transient: AllocationSettings{ + Cluster: ClusterRoutingSettings{ + Routing: RoutingSettings{ + Allocation: RoutingAllocationSettings{ + Enable: "primaries", + }, + }, + }, + }, + } + return c.put(ctx, "/_cluster/settings", allocationSettings, nil) } func (c *clientV6) SyncedFlush(ctx context.Context) error { diff --git a/operators/pkg/controller/elasticsearch/restart/elasticsearch.go b/operators/pkg/controller/elasticsearch/restart/elasticsearch.go index 4a2e46f2d0..a4ef705d5c 100644 --- a/operators/pkg/controller/elasticsearch/restart/elasticsearch.go +++ b/operators/pkg/controller/elasticsearch/restart/elasticsearch.go @@ -18,7 +18,7 @@ func prepareClusterForStop(esClient client.Client) error { log.V(1).Info("Disabling shards allocation for coordinated restart") ctx, cancel := context.WithTimeout(context.Background(), client.DefaultReqTimeout) defer cancel() - if err := esClient.DisableShardAllocation(ctx); err != nil { + if err := esClient.DisableReplicaShardsAllocation(ctx); err != nil { return err } From 242411c449d6ba9aa6f5c291abae4a4c0aec8c98 Mon Sep 17 00:00:00 2001 From: sebgl Date: Mon, 15 Jul 2019 10:15:28 +0200 Subject: [PATCH 07/31] Add a helper struct to lazily call the ES cluster for upgrade needs --- .../elasticsearch/driver/esstate.go | 129 ++++++++++++++++++ 1 file changed, 129 insertions(+) create mode 100644 operators/pkg/controller/elasticsearch/driver/esstate.go diff --git a/operators/pkg/controller/elasticsearch/driver/esstate.go b/operators/pkg/controller/elasticsearch/driver/esstate.go new file mode 100644 index 0000000000..0b45dcc513 --- /dev/null +++ b/operators/pkg/controller/elasticsearch/driver/esstate.go @@ -0,0 +1,129 @@ +package driver + +// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +// or more contributor license agreements. Licensed under the Elastic License; +// you may not use this file except in compliance with the Elastic License. + +import ( + "context" + "sync" + + "github.com/elastic/cloud-on-k8s/operators/pkg/apis/elasticsearch/v1alpha1" + esclient "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/client" + "github.com/elastic/cloud-on-k8s/operators/pkg/utils/stringsutil" +) + +type ESState interface { + NodesInCluster(nodeNames []string) (bool, error) + ShardAllocationsEnabled() (bool, error) + GreenHealth() (bool, error) +} + +type LazyESState struct { + esClient esclient.Client + *lazyNodes + *lazyShardsAllocationEnabled + *lazyGreenHealth +} + +func NewLazyESState(esClient esclient.Client) ESState { + return &LazyESState{ + esClient: esClient, + lazyNodes: &lazyNodes{esClient: esClient}, + lazyShardsAllocationEnabled: &lazyShardsAllocationEnabled{esClient: esClient}, + lazyGreenHealth: &lazyGreenHealth{esClient: esClient}, + } +} + +func initOnce(once *sync.Once, f func() error) error { + var err error + once.Do(func() { + err = f() + }) + return err +} + +// -- Nodes + +type lazyNodes struct { + once sync.Once + esClient esclient.Client + nodes []string +} + +func (n *lazyNodes) initialize() error { + ctx, cancel := context.WithTimeout(context.Background(), esclient.DefaultReqTimeout) + defer cancel() + nodes, err := n.esClient.GetNodes(ctx) + if err != nil { + return err + } + n.nodes = nodes.Names() + return nil +} + +func (n *lazyNodes) nodeInCluster(nodeName string) (bool, error) { + if err := initOnce(&n.once, n.initialize); err != nil { + return false, err + } + return stringsutil.StringInSlice(nodeName, n.nodes), nil +} + +func (n *lazyNodes) NodesInCluster(nodeNames []string) (bool, error) { + if err := initOnce(&n.once, n.initialize); err != nil { + return false, err + } + return stringsutil.StringsInSlice(nodeNames, n.nodes), nil +} + +// -- Shards allocation enabled + +type lazyShardsAllocationEnabled struct { + once sync.Once + esClient esclient.Client + enabled bool +} + +func (s *lazyShardsAllocationEnabled) initialize() error { + ctx, cancel := context.WithTimeout(context.Background(), esclient.DefaultReqTimeout) + defer cancel() + allocationSettings, err := s.esClient.GetClusterRoutingAllocation(ctx) + if err != nil { + return err + } + s.enabled = allocationSettings.Transient.IsShardsAllocationEnabled() + return nil +} + +func (s *lazyShardsAllocationEnabled) ShardAllocationsEnabled() (bool, error) { + if err := initOnce(&s.once, s.initialize); err != nil { + return false, err + } + return s.enabled, nil +} + +// -- Green health + +type lazyGreenHealth struct { + once sync.Once + esClient esclient.Client + greenHealth bool +} + +func (h *lazyGreenHealth) initialize() error { + ctx, cancel := context.WithTimeout(context.Background(), esclient.DefaultReqTimeout) + defer cancel() + health, err := h.esClient.GetClusterHealth(ctx) + if err != nil { + return err + } + h.greenHealth = health.Status == string(v1alpha1.ElasticsearchGreenHealth) + return nil +} + +func (h *lazyGreenHealth) GreenHealth() (bool, error) { + if err := initOnce(&h.once, h.initialize); err != nil { + return false, err + } + return h.greenHealth, nil +} From a08537c7591fa9268a4f9ddd53d074db29560e6c Mon Sep 17 00:00:00 2001 From: sebgl Date: Mon, 15 Jul 2019 10:16:39 +0200 Subject: [PATCH 08/31] Add StatefulSet helper functions --- .../pkg/controller/elasticsearch/sset/list.go | 44 +++++++++++++-- .../pkg/controller/elasticsearch/sset/pod.go | 53 ++++++++++++++++++- 2 files changed, 92 insertions(+), 5 deletions(-) diff --git a/operators/pkg/controller/elasticsearch/sset/list.go b/operators/pkg/controller/elasticsearch/sset/list.go index 9c357436f7..60e5279c87 100644 --- a/operators/pkg/controller/elasticsearch/sset/list.go +++ b/operators/pkg/controller/elasticsearch/sset/list.go @@ -5,12 +5,12 @@ package sset import ( + "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/label" + "github.com/elastic/cloud-on-k8s/operators/pkg/utils/k8s" appsv1 "k8s.io/api/apps/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/types" "sigs.k8s.io/controller-runtime/pkg/client" - - "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/label" - "github.com/elastic/cloud-on-k8s/operators/pkg/utils/k8s" ) type StatefulSetList []appsv1.StatefulSet @@ -33,3 +33,41 @@ func (l StatefulSetList) GetByName(ssetName string) (appsv1.StatefulSet, bool) { } return appsv1.StatefulSet{}, false } + +func (l StatefulSetList) ObjectMetas() []metav1.ObjectMeta { + objs := make([]metav1.ObjectMeta, len(l)) + for _, sset := range l { + objs = append(objs, sset.ObjectMeta) + } + return objs +} + +// RevisionUpdateScheduled returns true if at least one revision update is scheduled. +func (l StatefulSetList) RevisionUpdateScheduled() bool { + for _, s := range l { + if s.Status.UpdateRevision != "" && s.Status.UpdateRevision != s.Status.CurrentRevision { + return true + } + } + return false +} + +// PodNames returns the names of the pods for all StatefulSets in the list. +func (l StatefulSetList) PodNames() []string { + var names []string + for _, s := range l { + names = append(names, PodNames(s)...) + } + return names +} + +// GetUpdatePartition returns the updateStrategy.Partition index, or falls back to the number of replicas if not set. +func GetUpdatePartition(statefulSet appsv1.StatefulSet) int32 { + if statefulSet.Spec.UpdateStrategy.RollingUpdate.Partition != nil { + return *statefulSet.Spec.UpdateStrategy.RollingUpdate.Partition + } + if statefulSet.Spec.Replicas != nil { + return *statefulSet.Spec.Replicas + } + return 0 +} diff --git a/operators/pkg/controller/elasticsearch/sset/pod.go b/operators/pkg/controller/elasticsearch/sset/pod.go index 9af5d8074f..bd6ec6fe64 100644 --- a/operators/pkg/controller/elasticsearch/sset/pod.go +++ b/operators/pkg/controller/elasticsearch/sset/pod.go @@ -4,8 +4,57 @@ package sset -import "fmt" +import ( + "fmt" -func PodName(ssetName string, ordinal int) string { + appsv1 "k8s.io/api/apps/v1" + corev1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/api/errors" + "k8s.io/apimachinery/pkg/types" + + "github.com/elastic/cloud-on-k8s/operators/pkg/utils/k8s" +) + +func PodName(ssetName string, ordinal int32) string { return fmt.Sprintf("%s-%d", ssetName, ordinal) } + +func PodNames(sset appsv1.StatefulSet) []string { + names := make([]string, 0, Replicas(sset)) + for i := int32(0); i < Replicas(sset); i++ { + names = append(names, PodName(sset.Name, i)) + } + return names +} + +func PodRevision(pod corev1.Pod) string { + return pod.Labels[appsv1.StatefulSetRevisionLabel] +} + +// ScheduledUpgradesDone returns true if all pods scheduled for upgrade have been upgraded. +// This is done by checking the revision of pods whose ordinal is higher or equal than the StatefulSet +// rollingUpdate.Partition index. +func ScheduledUpgradesDone(c k8s.Client, statefulSets StatefulSetList) (bool, error) { + for _, s := range statefulSets { + if s.Status.UpdateRevision == "" { + // no upgrade scheduled + continue + } + partition := GetUpdatePartition(s) + for i := Replicas(s) - 1; i >= partition; i-- { + var pod corev1.Pod + err := c.Get(types.NamespacedName{Namespace: s.Namespace, Name: PodName(s.Name, i)}, &pod) + if errors.IsNotFound(err) { + // pod probably being terminated + return false, nil + } + if err != nil { + return false, err + } + if PodRevision(pod) != s.Status.UpdateRevision { + return false, nil + } + } + } + return true, nil +} From 89c2078c87fec59c7583201eb9602d193ff850f3 Mon Sep 17 00:00:00 2001 From: sebgl Date: Mon, 15 Jul 2019 10:17:02 +0200 Subject: [PATCH 09/31] Get rid of the previous expectations implementation --- .../common/reconciler/expectations.go | 327 ------------------ .../common/reconciler/expectations_test.go | 164 --------- .../common/watches/expectations_watch.go | 74 ---- .../common/watches/expectations_watch_test.go | 143 -------- 4 files changed, 708 deletions(-) delete mode 100644 operators/pkg/controller/common/reconciler/expectations.go delete mode 100644 operators/pkg/controller/common/reconciler/expectations_test.go delete mode 100644 operators/pkg/controller/common/watches/expectations_watch.go delete mode 100644 operators/pkg/controller/common/watches/expectations_watch_test.go diff --git a/operators/pkg/controller/common/reconciler/expectations.go b/operators/pkg/controller/common/reconciler/expectations.go deleted file mode 100644 index 4ac0c5452b..0000000000 --- a/operators/pkg/controller/common/reconciler/expectations.go +++ /dev/null @@ -1,327 +0,0 @@ -// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one -// or more contributor license agreements. Licensed under the Elastic License; -// you may not use this file except in compliance with the Elastic License. - -package reconciler - -// Expectations are a way for controllers to mitigate effects of -// the K8s client cache lagging behind the apiserver. -// -// ## Context: client cache might be out-of-sync -// -// The default K8s client implementation does use a cache for all resources we get or list. -// Listing pods effectively returns pods that have been observed in the cache, relying on a -// watch being set up by the client behind the scenes. -// Hence, resources we get from a list operation may slightly lag behind resources in the apiserver. -// The cache is not invalidated on resource creation. The following can happen in a controller: -// -// * list pods: we get 2 -// * create a new pod -// * list pods again: we still get 2 (cache in not in sync yet) -// -// This could lead to creating the pod a second time (with a different generated name) at the -// next iteration of the reconciliation loop. -// The same goes for deletions. -// -// This is only a problem for resources whose name is non-deterministic. Creating twice the same -// resource with the same name is considered OK, since the second time would simply fail. -// -// ## Expectations as a solution to mitigate cache inconsistencies -// -// ReplicaSets implementation in K8s does rely on runtime expectations to mitigate those inconsistencies. -// See the expectations implementation: https://github.com/kubernetes/kubernetes/blob/v1.13.2/pkg/controller/controller_utils.go -// And its usage for ReplicaSets: https://github.com/kubernetes/kubernetes/blob/v1.13.2/pkg/controller/replicaset/replica_set.go#L90 -// -// The idea is the following: -// -// * When a resource is created, increase the expected creations for this resource. -// Example: "expect 1 pod creation for this ElasticsearchCluster". Note that expectations -// are associated to the ElasticsearchCluster resource here, but effectively observe pods. -// * Once the resource creation event is observed, decrease the expected creations (expectation observed). -// * Expectations are considered satisfied when the count is equal to 0: we can consider our cache in-sync. -// * Checking whether expectations are satisfied within a reconciliation loop iteration is a way to know -// whether we can move forward with an up-to-date cache to next reconciliation steps. -// * The same goes for deletions. -// -// Expectations have a time-to-live (5 minutes). Once reached, we consider an expectation to be fulfilled, even -// though its internal counters may not be 0. This is to avoid staying stuck with inconsistent expectation events. -// -// ## Why not reusing K8s expectations implementations? -// -// We could absolutely reuse the existing `controller.Expectations` implementations. -// Doing so forces us to vendor the whole `kubernetes` package tree, which in turns -// requires vendoring the apiserver package tree. That's a lot of imports. -// -// Also, the Expectations API is not very user-friendly. -// -// A common usage in our situation is to increment expectations whenever we create a pod. -// Two ways to do that with K8s Expectations API: -// -// * `expectations.ExpectCreations(controllerKey string, adds int)`: overrides any previous value. -// * `expectations.RaiseExpectations(controllerKey string, add, del int)`: only works if expectations exist, -// meaning `expectations.SetExpectations was called at least once before. -// -// This is replaced in our implementation by a simpler `expectations.ExpectCreations(controllerKey)`, -// that does increment the creation counter, and creates it if it doesn't exist yet. -// -// A few other things that differ in our implementation from the K8s one: -// -// * We don't accept negative counters as a correct value: it does not make sense to set the creations -// counter to -1 if it was already at 0 (could be a leftover creation from a previous controller that -// we don't care about, since we don't have expectations for it). -// * Once an expectations TTL is reached, we consider we probably missed an event, hence we choose to -// reset expectations to 0 explicitely, instead of keeping counters value but still consider expectations -// to be fulfilled. -// * `controller.UIDTrackingControllerExpectations` is an extended expectations implementation meant to handle -// update events that have a non-zero DeletionTimestamp (can be issued multiple times but should be counted -// only once). Since we do rely on controller-runtime deletion events instead, but don't need this here. -// * We only use atomic int64 here, no generic `cache.Store`: no need to deal with error handling in the caller. -// -// ## Usage -// -// Expected usage pseudo-code: -// ``` -// if !expectations.fulfilled(responsibleResourceID) { -// // expected creations and deletions are not fulfilled yet, -// // let's requeue -// return -// } -// for _, res := range resourcesToCreate { -// // expect a creation -// expectations.ExpectCreation(responsibleResourceID) -// if err := client.Create(res); err != nil { -// // cancel our expectation, since resource wasn't created -// expectations.CreationObserved(responsibleResourceID) -// return err -// } -// } -// // same mechanism for deletions -// ``` -// -// Note that the `responsibleResourceID` in this context does not map to resources we create -// or delete. For instance, it would be the ID of our ElasticsearchCluster, even though the -// resources that we effectively create and deletes are pods associated with this cluster. -// - -import ( - "sync" - "sync/atomic" - "time" - - "github.com/elastic/cloud-on-k8s/operators/pkg/controller/common/finalizer" - "k8s.io/apimachinery/pkg/types" -) - -const ( - // ExpectationsTTLNanosec is the default expectations time-to-live, - // for cases where we expect an event (creation or deletion) that never happens. - // - // Set to 5 minutes similar to https://github.com/kubernetes/kubernetes/blob/v1.13.2/pkg/controller/controller_utils.go - ExpectationsTTLNanosec = 5 * time.Minute // time is internally represented as int64 nanoseconds - - // ExpectationsFinalizerName designates a finalizer to clean up expectations on es cluster deletion. - ExpectationsFinalizerName = "expectations.finalizers.elasticsearch.k8s.elastic.co" -) - -// NewExpectations creates expectations with the default TTL. -func NewExpectations() *Expectations { - return &Expectations{ - mutex: sync.RWMutex{}, - counters: map[types.NamespacedName]*expectationsCounters{}, - ttl: ExpectationsTTLNanosec, - } -} - -// Expectations holds our creation and deletions expectations for -// various resources, up to the configured TTL. -// Safe for concurrent use. -type Expectations struct { - mutex sync.RWMutex - counters map[types.NamespacedName]*expectationsCounters - ttl time.Duration -} - -// ExpectCreation marks a creation for the given resource as expected. -func (e *Expectations) ExpectCreation(namespacedName types.NamespacedName) { - e.getOrCreateCounters(namespacedName).AddCreations(1) -} - -// ExpectDeletion marks a deletion for the given resource as expected. -func (e *Expectations) ExpectDeletion(namespacedName types.NamespacedName) { - e.getOrCreateCounters(namespacedName).AddDeletions(1) -} - -// CreationObserved marks a creation event for the given resource as observed, -// cancelling the effect of a previous call to e.ExpectCreation. -func (e *Expectations) CreationObserved(namespacedName types.NamespacedName) { - e.getOrCreateCounters(namespacedName).AddCreations(-1) -} - -// DeletionObserved marks a deletion event for the given resource as observed, -// cancelling the effect of a previous call to e.ExpectDeletion. -func (e *Expectations) DeletionObserved(namespacedName types.NamespacedName) { - e.getOrCreateCounters(namespacedName).AddDeletions(-1) -} - -// Fulfilled returns true if all the expectations for the given resource -// are fulfilled (both creations and deletions). Meaning we can consider -// the controller is in-sync with resources in the apiserver. -func (e *Expectations) Fulfilled(namespacedName types.NamespacedName) bool { - creations, deletions := e.get(namespacedName) - if creations == 0 && deletions == 0 { - return true - } - return false -} - -// get creations and deletions expectations for the expected resource. -func (e *Expectations) get(namespacedName types.NamespacedName) (creations int64, deletions int64) { - return e.getOrCreateCounters(namespacedName).Get() -} - -// getOrCreateCounters returns the counters associated to the given resource. -// They may not exist yet: in such case we create and initialize them first. -func (e *Expectations) getOrCreateCounters(namespacedName types.NamespacedName) *expectationsCounters { - e.mutex.RLock() - counters, exists := e.counters[namespacedName] - e.mutex.RUnlock() - if !exists { - counters = e.createCounters(namespacedName) - } - return counters -} - -func (e *Expectations) createCounters(namespacedName types.NamespacedName) *expectationsCounters { - e.mutex.Lock() - defer e.mutex.Unlock() - // if this method is called, counters probably don't exist yet - // still re-check with lock acquired in case they would be created - // in-between 2 concurrent calls to e.getOrCreateCounters - counters, exists := e.counters[namespacedName] - if exists { - return counters - } - counters = newExpectationsCounters(e.ttl) - e.counters[namespacedName] = counters - return counters -} - -// expectationsCounters hold creations and deletions counters, -// and manages counters TTL through their last activity timestamp. -// Counters that would go below 0 will be reset to 0. -// Expectations that would exceed their TTL will be reset to 0. -// Safe for concurrent use. -type expectationsCounters struct { - creations *int64 // atomic int64 counter - deletions *int64 // atomic int64 counter - timestamp *int64 // unix timestamp in nanoseconds - ttl int64 // duration in nanoseconds -} - -// newExpectationsCounters returns an initiliazed expectationsCounters -// with the given ttl, and timestamp set to now. -func newExpectationsCounters(ttl time.Duration) *expectationsCounters { - creations := int64(0) - deletions := int64(0) - timestamp := timestampNow() - return &expectationsCounters{ - creations: &creations, - deletions: &deletions, - timestamp: ×tamp, - ttl: ttl.Nanoseconds(), - } -} - -// Get returns the current creations and deletions counters. -// If counters are expired, they are reset to 0 beforehand. -func (e *expectationsCounters) Get() (creations, deletions int64) { - if e.isExpired() { - e.reset() - } - return e.getPtrValue(e.creations), e.getPtrValue(e.deletions) -} - -// AddCreations increments the creations counter with the given value, -// which can be negative for substractions. -// If the value goes below 0, it will be reset to 0. -func (e *expectationsCounters) AddCreations(value int64) { - e.add(e.creations, value) -} - -// AddDeletions increments the deletions counter with the given value, -// which can be negative for substractions. -// If the value goes below 0, it will be reset to 0. -func (e *expectationsCounters) AddDeletions(value int64) { - e.add(e.deletions, value) -} - -// isExpired returns true if the last operation on the counters -// exceeds the configured TTL. -func (e *expectationsCounters) isExpired() bool { - if e.timestamp == nil { - return false - } - timestamp := atomic.LoadInt64(e.timestamp) - if timestampNow()-timestamp > e.ttl { - return true - } - return false -} - -// resetTimestamp sets the timestamp value to the current time. -func (e *expectationsCounters) resetTimestamp() { - atomic.StoreInt64(e.timestamp, timestampNow()) -} - -// reset sets counters values to 0, and the -// timestamp value to the current time. -func (e *expectationsCounters) reset() { - atomic.StoreInt64(e.creations, 0) - atomic.StoreInt64(e.deletions, 0) - e.resetTimestamp() -} - -// getPtrValue returns the int64 value stored at the given pointer. -// Meant to be used for internal values, eg. `getPtrValue(e.creations)`. -func (e *expectationsCounters) getPtrValue(ptr *int64) int64 { - value := atomic.LoadInt64(ptr) - if value < 0 { - // In-between situation where we have a negative value, - // return 0 instead (see `e.add` implementation). - return 0 - } - return value -} - -// add increments the int64 stored at the given pointer with the given value, -// which can be negative for substractions. -// Meant to be used for internal values, eg. `add(e.creations, -1)`. -// If the value goes below 0, it will be reset to 0. -func (e *expectationsCounters) add(ptr *int64, value int64) { - e.resetTimestamp() - newValue := atomic.AddInt64(ptr, value) - if newValue < 0 && value < 0 { - // We are reaching a negative value after a substraction: - // cancel what we just did. - // The value is still negative in-between these 2 atomic ops. - atomic.AddInt64(ptr, -value) - } -} - -// timestampNow returns the current unix timestamp in nanoseconds -func timestampNow() int64 { - return time.Now().UnixNano() -} - -// ExpectationsFinalizer removes the given cluster entry from the expectations map. -func ExpectationsFinalizer(cluster types.NamespacedName, expectations *Expectations) finalizer.Finalizer { - return finalizer.Finalizer{ - Name: ExpectationsFinalizerName, - Execute: func() error { - expectations.mutex.Lock() - defer expectations.mutex.Unlock() - delete(expectations.counters, cluster) - return nil - }, - } -} diff --git a/operators/pkg/controller/common/reconciler/expectations_test.go b/operators/pkg/controller/common/reconciler/expectations_test.go deleted file mode 100644 index 322f089197..0000000000 --- a/operators/pkg/controller/common/reconciler/expectations_test.go +++ /dev/null @@ -1,164 +0,0 @@ -// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one -// or more contributor license agreements. Licensed under the Elastic License; -// you may not use this file except in compliance with the Elastic License. - -package reconciler - -import ( - "testing" - "time" - - "github.com/stretchr/testify/require" - "k8s.io/apimachinery/pkg/types" -) - -var nsn1 = types.NamespacedName{ - Namespace: "namespace", - Name: "name", -} - -var nsn2 = types.NamespacedName{ - Namespace: "namespace", - Name: "name2", -} - -func checkExpectations(t *testing.T, e *Expectations, namespacedName types.NamespacedName, expectedCreations int64, expectedDeletions int64) { - // check creations and deletions counters - actualCreations, actualDeletions := e.get(namespacedName) - require.Equal(t, expectedCreations, actualCreations) - require.Equal(t, expectedDeletions, actualDeletions) - // check expectations fulfilled - expectedFulfilled := false - if expectedCreations == 0 && expectedDeletions == 0 { - expectedFulfilled = true - } - require.Equal(t, expectedFulfilled, e.Fulfilled(namespacedName)) -} - -func TestExpectationsTTL(t *testing.T) { - // validate default behaviour with default TTL - exp := NewExpectations() - exp.ExpectCreation(nsn1) - checkExpectations(t, exp, nsn1, 1, 0) - // same test, but with a custom short TTL - exp = NewExpectations() - exp.ttl = 1 * time.Nanosecond - exp.ExpectCreation(nsn1) - // counters should be reset and expectations fulfilled - // once TTL is reached - time.Sleep(2 * time.Nanosecond) - checkExpectations(t, exp, nsn1, 0, 0) -} - -func TestExpectations(t *testing.T) { - // tests are performing operations and checks on the same expectations object, - // with state preserved between tests - e := NewExpectations() - tests := []struct { - name string - events func(e *Expectations) - expected map[types.NamespacedName][2]int64 // namespacedName -> [creations, deletions] - }{ - { - name: "empty", - events: func(e *Expectations) {}, - expected: map[types.NamespacedName][2]int64{ - nsn1: [2]int64{0, 0}, - nsn2: [2]int64{0, 0}, - }, - }, - { - name: "add an expected creation for nsn1", - events: func(e *Expectations) { - e.ExpectCreation(nsn1) - }, - expected: map[types.NamespacedName][2]int64{ - nsn1: [2]int64{1, 0}, - nsn2: [2]int64{0, 0}, - }, - }, - { - name: "add 2 more expected creations for nsn1", - events: func(e *Expectations) { - e.ExpectCreation(nsn1) - e.ExpectCreation(nsn1) - }, - expected: map[types.NamespacedName][2]int64{ - nsn1: [2]int64{3, 0}, - nsn2: [2]int64{0, 0}, - }, - }, - { - name: "add an expected creation for nsn2", - events: func(e *Expectations) { - e.ExpectCreation(nsn2) - }, - expected: map[types.NamespacedName][2]int64{ - nsn1: [2]int64{3, 0}, - nsn2: [2]int64{1, 0}, - }, - }, - { - name: "observe creation for nsn1", - events: func(e *Expectations) { - e.CreationObserved(nsn1) - }, - expected: map[types.NamespacedName][2]int64{ - nsn1: [2]int64{2, 0}, - nsn2: [2]int64{1, 0}, - }, - }, - { - name: "observe 2 creations for nsn1", - events: func(e *Expectations) { - e.CreationObserved(nsn1) - e.CreationObserved(nsn1) - }, - expected: map[types.NamespacedName][2]int64{ - nsn1: [2]int64{0, 0}, - nsn2: [2]int64{1, 0}, - }, - }, - { - name: "observe creation for nsn2", - events: func(e *Expectations) { - e.CreationObserved(nsn2) - }, - expected: map[types.NamespacedName][2]int64{ - nsn1: [2]int64{0, 0}, - nsn2: [2]int64{0, 0}, - }, - }, - { - name: "observe creation when counter is already at 0 should be a no-op", - events: func(e *Expectations) { - e.CreationObserved(nsn1) - }, - expected: map[types.NamespacedName][2]int64{ - nsn1: [2]int64{0, 0}, - nsn2: [2]int64{0, 0}, - }, - }, - } - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - tt.events(e) - for nsn, expectationsSlice := range tt.expected { - checkExpectations(t, e, nsn, expectationsSlice[0], expectationsSlice[1]) - } - }) - } -} - -func TestExpectationsFinalizer(t *testing.T) { - expectations := NewExpectations() - expectations.ExpectCreation(nsn1) - require.Contains(t, expectations.counters, nsn1) - // applying finalizer should remove the entry from the map - err := ExpectationsFinalizer(nsn1, expectations).Execute() - require.NoError(t, err) - require.NotContains(t, expectations.counters, nsn1) - // applying finalizer on non-existing entry should be fine - err = ExpectationsFinalizer(nsn1, expectations).Execute() - require.NoError(t, err) -} diff --git a/operators/pkg/controller/common/watches/expectations_watch.go b/operators/pkg/controller/common/watches/expectations_watch.go deleted file mode 100644 index 9f91cecae5..0000000000 --- a/operators/pkg/controller/common/watches/expectations_watch.go +++ /dev/null @@ -1,74 +0,0 @@ -// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one -// or more contributor license agreements. Licensed under the Elastic License; -// you may not use this file except in compliance with the Elastic License. - -package watches - -import ( - "github.com/elastic/cloud-on-k8s/operators/pkg/controller/common/reconciler" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "k8s.io/apimachinery/pkg/types" - "k8s.io/client-go/util/workqueue" - "sigs.k8s.io/controller-runtime/pkg/event" - "sigs.k8s.io/controller-runtime/pkg/handler" -) - -// ExpectationsResourceRetriever is a function that allows retrieving, from a given resource, -// the associated resource that holds expectations resources. -// For instance, from a given pod, we might want to retrieve the ElasticsearchCluster associated -// to it (see `label.ClusterFromResourceLabels`). -type ExpectationsResourceRetriever func(metaObject metav1.Object) (types.NamespacedName, bool) - -// ExpectationsWatch is an event handler for watches that markes resources creations and deletions -// as observed for the given reconciler expectations. -type ExpectationsWatch struct { - handlerKey string - expectations *reconciler.Expectations - resourceRetriever ExpectationsResourceRetriever -} - -// Make sure our ExpectationsWatch implements HandlerRegistration. -var _ HandlerRegistration = &ExpectationsWatch{} - -// NewExpectationsWatch creates an ExpectationsWatch from the given arguments. -func NewExpectationsWatch(handlerKey string, expectations *reconciler.Expectations, resourceRetriever ExpectationsResourceRetriever) *ExpectationsWatch { - return &ExpectationsWatch{ - handlerKey: handlerKey, - expectations: expectations, - resourceRetriever: resourceRetriever, - } -} - -// Key returns the key associated to this handler. -func (p *ExpectationsWatch) Key() string { - return p.handlerKey -} - -// EventHandler returns the ExpectationsWatch as an handler.EventHandler. -func (p *ExpectationsWatch) EventHandler() handler.EventHandler { - return p -} - -// Create marks a resource creation as observed in the expectations. -func (p *ExpectationsWatch) Create(evt event.CreateEvent, q workqueue.RateLimitingInterface) { - resource, exists := p.resourceRetriever(evt.Meta) - if exists { - p.expectations.CreationObserved(resource) - log.V(1).Info("Marking creation observed in expectations", "name", resource.Name, "namespace", resource.Namespace) - } -} - -// Delete marks a resource deletion as observed in the expectations. -func (p *ExpectationsWatch) Delete(evt event.DeleteEvent, q workqueue.RateLimitingInterface) { - resource, exists := p.resourceRetriever(evt.Meta) - if exists { - p.expectations.DeletionObserved(resource) - log.V(1).Info("Marking deletion observed in expectations", "name", resource.Name, "namespace", resource.Namespace) - } -} - -// Update is a no-op operation in this context. -func (p *ExpectationsWatch) Update(evt event.UpdateEvent, q workqueue.RateLimitingInterface) {} - -// Generic is a no-op operation in this context. -func (p *ExpectationsWatch) Generic(evt event.GenericEvent, q workqueue.RateLimitingInterface) {} diff --git a/operators/pkg/controller/common/watches/expectations_watch_test.go b/operators/pkg/controller/common/watches/expectations_watch_test.go deleted file mode 100644 index e921a076e7..0000000000 --- a/operators/pkg/controller/common/watches/expectations_watch_test.go +++ /dev/null @@ -1,143 +0,0 @@ -// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one -// or more contributor license agreements. Licensed under the Elastic License; -// you may not use this file except in compliance with the Elastic License. - -package watches - -import ( - "testing" - - "github.com/elastic/cloud-on-k8s/operators/pkg/controller/common/reconciler" - "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/label" - "github.com/stretchr/testify/require" - corev1 "k8s.io/api/core/v1" - "k8s.io/apimachinery/pkg/api/meta" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "k8s.io/apimachinery/pkg/types" - "sigs.k8s.io/controller-runtime/pkg/event" -) - -const testHandlerKey = "pod-expectations" - -var testCluster = types.NamespacedName{ - Namespace: "namespace", - Name: "cluster", -} - -func TestExpectationsWatch_Key(t *testing.T) { - w := NewExpectationsWatch(testHandlerKey, nil, label.ClusterFromResourceLabels) - require.Equal(t, testHandlerKey, w.Key()) -} - -func createPodMetaObject(t *testing.T, name string) metav1.Object { - pod1 := &corev1.Pod{ - ObjectMeta: metav1.ObjectMeta{ - Name: name, - Namespace: testCluster.Namespace, - Labels: map[string]string{ - label.ClusterNameLabelName: testCluster.Name, - }, - }, - } - asMetaObj, err := meta.Accessor(pod1) - require.NoError(t, err) - return asMetaObj -} - -func TestExpectationsWatch_Create(t *testing.T) { - expectations := reconciler.NewExpectations() - w := NewExpectationsWatch(testHandlerKey, expectations, label.ClusterFromResourceLabels) - - tests := []struct { - name string - events func() - expectedFulfilled bool - }{ - { - name: "initially fulfilled", - events: func() {}, - expectedFulfilled: true, - }, - { - name: "expect 2 creations", - events: func() { - expectations.ExpectCreation(testCluster) - expectations.ExpectCreation(testCluster) - }, - expectedFulfilled: false, - }, - { - name: "observe 1 creation", - events: func() { - w.Create(event.CreateEvent{ - Meta: createPodMetaObject(t, "pod1"), - }, nil) - }, - expectedFulfilled: false, - }, - { - name: "observe the 2nd creation", - events: func() { - w.Create(event.CreateEvent{ - Meta: createPodMetaObject(t, "pod2"), - }, nil) - }, - expectedFulfilled: true, - }, - } - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - tt.events() - require.Equal(t, tt.expectedFulfilled, expectations.Fulfilled(testCluster)) - }) - } -} - -func TestExpectationsWatch_Delete(t *testing.T) { - expectations := reconciler.NewExpectations() - w := NewExpectationsWatch(testHandlerKey, expectations, label.ClusterFromResourceLabels) - - tests := []struct { - name string - events func() - expectedFulfilled bool - }{ - { - name: "initially fulfilled", - events: func() {}, - expectedFulfilled: true, - }, - { - name: "expect 2 deletions", - events: func() { - expectations.ExpectDeletion(testCluster) - expectations.ExpectDeletion(testCluster) - }, - expectedFulfilled: false, - }, - { - name: "observe 1 deletion", - events: func() { - w.Delete(event.DeleteEvent{ - Meta: createPodMetaObject(t, "pod1"), - }, nil) - }, - expectedFulfilled: false, - }, - { - name: "observe the 2nd deletions", - events: func() { - w.Delete(event.DeleteEvent{ - Meta: createPodMetaObject(t, "pod2"), - }, nil) - }, - expectedFulfilled: true, - }, - } - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - tt.events() - require.Equal(t, tt.expectedFulfilled, expectations.Fulfilled(testCluster)) - }) - } -} From ee2974d89b520b96ec6e85a7ed51075fce5d2b44 Mon Sep 17 00:00:00 2001 From: sebgl Date: Mon, 15 Jul 2019 10:17:31 +0200 Subject: [PATCH 10/31] Handle StatefulSet rolling upgrades --- .../elasticsearch/driver/default.go | 245 ++------------ .../elasticsearch/driver/upgrade.go | 305 ++++++++++++++++++ .../elasticsearch/elasticsearch_controller.go | 21 +- operators/pkg/utils/stringsutil/strings.go | 14 + 4 files changed, 348 insertions(+), 237 deletions(-) create mode 100644 operators/pkg/controller/elasticsearch/driver/upgrade.go diff --git a/operators/pkg/controller/elasticsearch/driver/default.go b/operators/pkg/controller/elasticsearch/driver/default.go index 56dd6cf279..a363ff9c61 100644 --- a/operators/pkg/controller/elasticsearch/driver/default.go +++ b/operators/pkg/controller/elasticsearch/driver/default.go @@ -199,8 +199,6 @@ func (d *defaultDriver) Reconcile( return results.WithError(err) } - //podsState := mutation.NewPodsState(*resourcesState, observedState) - if err := d.supportedVersions.VerifySupportsExistingPods(resourcesState.CurrentPods.Pods()); err != nil { return results.WithError(err) } @@ -219,59 +217,6 @@ func (d *defaultDriver) Reconcile( return results.WithError(err) } - // - //// There might be some ongoing creations and deletions our k8s client cache - //// hasn't seen yet. In such case, requeue until we are in-sync. - //// Otherwise, we could end up re-creating multiple times the same pod with - //// different generated names through multiple reconciliation iterations. - //if !d.PodsExpectations.Fulfilled(namespacedName) { - // log.Info("Pods creations and deletions expectations are not satisfied yet. Requeuing.") - // return results.WithResult(defaultRequeue) - //} - // - //changes, err := d.calculateChanges(internalUsers, es, *resourcesState) - //if err != nil { - // return results.WithError(err) - //} - // - //log.Info( - // "Calculated all required changes", - // "to_create:", len(changes.ToCreate), - // "to_keep:", len(changes.ToKeep), - // "to_delete:", len(changes.ToDelete), - //) - // - //// restart ES processes that need to be restarted before going on with other changes - //done, err := restart.HandleESRestarts( - // restart.RestartContext{ - // Cluster: es, - // EventsRecorder: reconcileState.Recorder, - // K8sClient: d.Client, - // Changes: *changes, - // Dialer: d.Dialer, - // EsClient: esClient, - // }, - //) - //if err != nil { - // return results.WithError(err) - //} - //if !done { - // log.V(1).Info("Pods restart is not over yet, re-queueing.") - // return results.WithResult(defaultRequeue) - //} - // - //// figure out what changes we can perform right now - //performableChanges, err := mutation.CalculatePerformableChanges(es.Spec.UpdateStrategy, *changes, podsState) - //if err != nil { - // return results.WithError(err) - //} - // - //log.Info( - // "Calculated performable changes", - // "schedule_for_creation_count", len(performableChanges.ToCreate), - // "schedule_for_deletion_count", len(performableChanges.ToDelete), - //) - results.Apply( "reconcile-cluster-license", func() (controller.Result, error) { @@ -329,7 +274,7 @@ func (d *defaultDriver) Reconcile( ) } - res = d.reconcileNodeSpecs(es, podTemplateSpecBuilder, esClient, observedState) + res = d.reconcileNodeSpecs(es, esReachable, podTemplateSpecBuilder, esClient, observedState) if results.WithResults(res).HasError() { return results } @@ -357,31 +302,6 @@ func (d *defaultDriver) Reconcile( // results.WithResult(defaultRequeue) // } //} - // - //// List the orphaned PVCs before the Pods are created. - //// If there are some orphaned PVCs they will be adopted and remove sequentially from the list when Pods are created. - //orphanedPVCs, err := pvc.FindOrphanedVolumeClaims(d.Client, es) - //if err != nil { - // return results.WithError(err) - //} - // - //for _, change := range performableChanges.ToCreate { - // d.PodsExpectations.ExpectCreation(namespacedName) - // if err := createElasticsearchPod( - // d.Client, - // d.Scheme, - // es, - // reconcileState, - // change.Pod, - // change.PodSpecCtx, - // orphanedPVCs, - // ); err != nil { - // // pod was not created, cancel our expectation by marking it observed - // d.PodsExpectations.CreationObserved(namespacedName) - // return results.WithError(err) - // } - //} - // passed this point, any pods resource listing should check expectations first if !esReachable { // We cannot manipulate ES allocation exclude settings if the ES cluster @@ -406,119 +326,15 @@ func (d *defaultDriver) Reconcile( // return results.WithResult(defaultRequeue).WithError(err) // } //} - // - //if !changes.HasChanges() { - // // Current state matches expected state - // reconcileState.UpdateElasticsearchOperational(*resourcesState, observedState) - // return results - //} - // - //// Start migrating data away from all pods to be deleted - //leavingNodeNames := pod.PodListToNames(performableChanges.ToDelete.Pods()) - //if err = migration.MigrateData(esClient, leavingNodeNames); err != nil { - // return results.WithError(errors.Wrap(err, "error during migrate data")) - //} - // - //// Shrink clusters by deleting deprecated pods - //if err = d.attemptPodsDeletion( - // performableChanges, - // reconcileState, - // resourcesState, - // observedState, - // results, - // esClient, - // es, - //); err != nil { - // return results.WithError(err) - //} - //// past this point, any pods resource listing should check expectations first - // - //if changes.HasChanges() && !performableChanges.HasChanges() { - // // if there are changes we'd like to perform, but none that were performable, we try again later - // results.WithResult(defaultRequeue) - //} reconcileState.UpdateElasticsearchState(*resourcesState, observedState) return results } -// -//// attemptPodsDeletion deletes a list of pods after checking there is no migrating data for each of them -//func (d *defaultDriver) attemptPodsDeletion( -// changes *mutation.PerformableChanges, -// reconcileState *reconcile.State, -// resourcesState *reconcile.ResourcesState, -// observedState observer.State, -// results *reconciler.Results, -// esClient esclient.Client, -// elasticsearch v1alpha1.Elasticsearch, -//) error { -// newState := make([]corev1.Pod, len(resourcesState.CurrentPods)) -// copy(newState, resourcesState.CurrentPods.Pods()) -// for _, pod := range changes.ToDelete.Pods() { -// newState = removePodFromList(newState, pod) -// preDelete := func() error { -// if d.zen1SettingsUpdater != nil { -// requeue, err := d.zen1SettingsUpdater( -// elasticsearch, -// d.Client, -// esClient, -// newState, -// changes, -// reconcileState) -// -// if err != nil { -// return err -// } -// -// if requeue { -// results.WithResult(defaultRequeue) -// } -// } -// return nil -// } -// -// // do not delete a pod or expect a deletion if a data migration is in progress -// isMigratingData := migration.IsMigratingData(observedState, pod, changes.ToDelete.Pods()) -// if isMigratingData { -// log.Info("Skipping deletion because of migrating data", "pod", pod.Name) -// reconcileState.UpdateElasticsearchMigrating(*resourcesState, observedState) -// results.WithResult(defaultRequeue) -// continue -// } -// -// namespacedName := k8s.ExtractNamespacedName(&elasticsearch) -// d.PodsExpectations.ExpectDeletion(namespacedName) -// result, err := deleteElasticsearchPod( -// d.Client, -// reconcileState, -// *resourcesState, -// pod, -// preDelete, -// ) -// if err != nil { -// // pod was not deleted, cancel our expectation by marking it observed -// d.PodsExpectations.DeletionObserved(namespacedName) -// return err -// } -// results.WithResult(result) -// } -// return nil -//} - -// removePodFromList removes a single pod from the list, matching by pod name. -func removePodFromList(pods []corev1.Pod, pod corev1.Pod) []corev1.Pod { - for i, p := range pods { - if p.Name == pod.Name { - return append(pods[:i], pods[i+1:]...) - } - } - return pods -} - func (d *defaultDriver) reconcileNodeSpecs( es v1alpha1.Elasticsearch, + esReachable bool, podSpecBuilder esversion.PodTemplateSpecBuilder, esClient esclient.Client, observedState observer.State, @@ -583,6 +399,11 @@ func (d *defaultDriver) reconcileNodeSpecs( } } + if !esReachable { + // cannot perform downscale or rolling upgrade if we cannot request Elasticsearch + return results.WithResult(defaultRequeue) + } + // Phase 2: handle sset scale down. // We want to safely remove nodes from the cluster, either because the sset requires less replicas, // or because it should be removed entirely. @@ -607,8 +428,15 @@ func (d *defaultDriver) reconcileNodeSpecs( } } + // Phase 3: handle rolling upgrades. + // Control nodes restart (upgrade) by manually decrementing rollingUpdate.Partition. + rollingUpgradesRes := d.handleRollingUpgrades(es, esClient, actualStatefulSets) + results.WithResults(rollingUpgradesRes) + if rollingUpgradesRes.HasError() { + return results + } + // TODO: - // - safe node upgrade (rollingUpdate.Partition + shards allocation) // - change budget // - zen1, zen2 return results @@ -638,7 +466,7 @@ func (d *defaultDriver) scaleStatefulSetDown( // nodes are ordered by highest ordinal first var leavingNodes []string for i := initialReplicas - 1; i > targetReplicas-1; i-- { - leavingNodes = append(leavingNodes, sset.PodName(statefulSet.Name, int(i))) + leavingNodes = append(leavingNodes, sset.PodName(statefulSet.Name, i)) } // TODO: don't remove last master/last data nodes? @@ -674,45 +502,10 @@ func (d *defaultDriver) scaleStatefulSetDown( } } - return nil -} + // TODO: clear allocation excludes -// -//// calculateChanges calculates the changes we'd need to perform to go from the current cluster configuration to the -//// desired one. -//func (d *defaultDriver) calculateChanges( -// internalUsers *user.InternalUsers, -// es v1alpha1.Elasticsearch, -// resourcesState reconcile.ResourcesState, -//) (*mutation.Changes, error) { -// expectedPodSpecCtxs, err := d.expectedPodsAndResourcesResolver( -// es, -// pod.NewPodSpecParams{ -// ProbeUser: internalUsers.ProbeUser.Auth(), -// KeystoreUser: internalUsers.KeystoreUser.Auth(), -// UnicastHostsVolume: volume.NewConfigMapVolume( -// name.UnicastHostsConfigMap(es.Name), esvolume.UnicastHostsVolumeName, esvolume.UnicastHostsVolumeMountPath, -// ), -// }, -// d.OperatorImage, -// ) -// if err != nil { -// return nil, err -// } -// -// changes, err := mutation.CalculateChanges( -// es, -// expectedPodSpecCtxs, -// resourcesState, -// func(ctx pod.PodSpecContext) corev1.Pod { -// return esversion.NewPod(es, ctx) -// }, -// ) -// if err != nil { -// return nil, err -// } -// return &changes, nil -//} + return results +} // newElasticsearchClient creates a new Elasticsearch HTTP client for this cluster using the provided user func (d *defaultDriver) newElasticsearchClient(service corev1.Service, user user.User, v version.Version, caCerts []*x509.Certificate) esclient.Client { diff --git a/operators/pkg/controller/elasticsearch/driver/upgrade.go b/operators/pkg/controller/elasticsearch/driver/upgrade.go new file mode 100644 index 0000000000..3f49b3a025 --- /dev/null +++ b/operators/pkg/controller/elasticsearch/driver/upgrade.go @@ -0,0 +1,305 @@ +// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +// or more contributor license agreements. Licensed under the Elastic License; +// you may not use this file except in compliance with the Elastic License. + +package driver + +import ( + "context" + + "github.com/elastic/cloud-on-k8s/operators/pkg/apis/elasticsearch/v1alpha1" + "github.com/elastic/cloud-on-k8s/operators/pkg/controller/common/reconciler" + esclient "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/client" + "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/sset" + "github.com/elastic/cloud-on-k8s/operators/pkg/utils/k8s" + appsv1 "k8s.io/api/apps/v1" + corev1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/api/errors" + "k8s.io/apimachinery/pkg/types" +) + +func (d *defaultDriver) handleRollingUpgrades( + es v1alpha1.Elasticsearch, + esClient esclient.Client, + statefulSets sset.StatefulSetList, +) *reconciler.Results { + results := &reconciler.Results{} + + // We need an up-to-date ES state, but avoid requesting information we may not need. + esState := NewLazyESState(esClient) + + // Maybe upgrade some of the nodes. + res := d.doRollingUpgrade(es, statefulSets, esClient, esState) + results.WithResults(res) + + // Maybe re-enable shards allocation if upgraded nodes are back into the cluster. + res = d.MaybeEnableShardsAllocation(es, esClient, esState, statefulSets) + results.WithResults(res) + + return results +} + +func (d *defaultDriver) doRollingUpgrade( + es v1alpha1.Elasticsearch, + statefulSets sset.StatefulSetList, + esClient esclient.Client, + esState ESState, +) *reconciler.Results { + results := &reconciler.Results{} + + if !d.expectations.GenerationExpected(statefulSets.ObjectMetas()...) { + // Our cache of SatefulSets is out of date compared to previous reconciliation operations. + // It does not matter much here since operations are idempotent, but we might as well avoid + // useless operations that would end up in a resource update conflict anyway. + log.V(1).Info("StatefulSet cache out-of-date, re-queueing") + return results.WithResult(defaultRequeue) + } + + if !statefulSets.RevisionUpdateScheduled() { + // nothing to upgrade + return results + } + + // TODO: deal with multiple restarts at once, taking the changeBudget into account. + // We'd need to stop checking cluster health and do something smarter, since cluster health green check + // should be done **in between** restarts to make sense, which is pretty hard to do since we don't + // trigger restarts but just allow the sset controller to do it at its own pace. + // Instead of green health, we could look at shards status, taking into account nodes + // we scheduled for a restart (maybe not restarted yet). + + // TODO: don't upgrade more than 1 master concurrently (ok for now since we upgrade 1 node at a time anyway) + + maxConcurrentUpgrades := 1 + scheduledUpgrades := 0 + + for i, statefulSet := range statefulSets { + // Inspect each pod, starting from the highest ordinal, and decrement the partition to allow + // pod upgrades to go through, controlled by the StatefulSet controller. + for partition := sset.GetUpdatePartition(statefulSet); partition >= 0; partition-- { + if scheduledUpgrades >= maxConcurrentUpgrades { + return results.WithResult(defaultRequeue) + } + if partition >= sset.Replicas(statefulSet) { + continue + } + + // Do we need to upgrade that pod? + podName := sset.PodName(statefulSet.Name, partition) + podRef := types.NamespacedName{Namespace: statefulSet.Namespace, Name: podName} + alreadyUpgraded, err := podUpgradeDone(d.Client, esState, podRef, statefulSet.Status.UpdateRevision) + if err != nil { + return results.WithError(err) + } + if alreadyUpgraded { + continue + } + + // An upgrade is required for that pod. + scheduledUpgrades++ + + // Is the pod upgrade already scheduled? + if partition == sset.GetUpdatePartition(statefulSet) { + continue + } + + // Is the cluster ready for the node upgrade? + clusterReady, err := clusterReadyForNodeRestart(es, esState) + if err != nil { + return results.WithError(err) + } + if !clusterReady { + // retry later + return results.WithResult(defaultRequeue) + } + + log.Info("Preparing cluster for node restart", "namespace", es.Namespace, "name", es.Name) + if err := prepareClusterForNodeRestart(esClient, esState); err != nil { + return results.WithError(err) + } + + // Upgrade the pod. + if err := d.upgradeStatefulSetPartition(es, &statefulSets[i], esClient, partition); err != nil { + return results.WithError(err) + } + scheduledUpgrades++ + } + } + return results +} + +func (d *defaultDriver) upgradeStatefulSetPartition( + es v1alpha1.Elasticsearch, + statefulSet *appsv1.StatefulSet, + esClient esclient.Client, + newPartition int32, +) error { + // TODO: zen1, zen2 + + // Node can be removed, update the StatefulSet rollingUpdate.Partition ordinal. + log.Info("Updating rollingUpdate.Partition", + "namespace", statefulSet.Namespace, + "name", statefulSet.Name, + "from", statefulSet.Spec.UpdateStrategy.RollingUpdate.Partition, + "to", &newPartition, + ) + statefulSet.Spec.UpdateStrategy.RollingUpdate = &appsv1.RollingUpdateStatefulSetStrategy{ + Partition: &newPartition, + } + if err := d.Client.Update(statefulSet); err != nil { + return err + } + + // Register the updated sset generation to deal with out-of-date sset cache. + d.Expectations.ExpectGeneration(statefulSet.ObjectMeta) + + return nil +} + +func prepareClusterForNodeRestart(esClient esclient.Client, esState ESState) error { + // Disable shard allocations to avoid shards moving around while the node is temporarily down + shardsAllocationEnabled, err := esState.ShardAllocationsEnabled() + if err != nil { + return err + } + if shardsAllocationEnabled { + if err := disableShardsAllocation(esClient); err != nil { + return err + } + } + + // Request a sync flush to optimize indices recovery when the node restarts. + if err := doSyncFlush(esClient); err != nil { + return err + } + + // TODO: halt ML jobs on that node + return nil +} + +// clusterReadyForNodeRestart returns true if the ES cluster allows a node to be restarted +// with minimized downtime and no unexpected data loss. +func clusterReadyForNodeRestart(es v1alpha1.Elasticsearch, esState ESState) (bool, error) { + // Check the cluster health: only allow node restart if health is green. + // This would cause downtime if some shards have 0 replicas, but we consider that's on the user. + // TODO: we could technically still restart a node if the cluster is yellow, + // as long as there are other copies of the shards in-sync on other nodes + // TODO: the fact we rely on a cached health here would prevent more than 1 restart + // in a single reconciliation + green, err := esState.GreenHealth() + if err != nil { + return false, err + } + if !green { + log.Info("Skipping node rolling upgrade since cluster is not green", "namespace", es.Namespace, "name", es.Name) + return false, nil + } + return true, nil +} + +// podUpgradeDone inspects the given pod and returns true if it was successfully upgraded. +func podUpgradeDone(c k8s.Client, esState ESState, podRef types.NamespacedName, expectedRevision string) (bool, error) { + if expectedRevision == "" { + // no upgrade scheduled for the sset + return false, nil + } + // retrieve pod to inspect its revision label + var pod corev1.Pod + err := c.Get(podRef, &pod) + if err != nil && !errors.IsNotFound(err) { + return false, err + } + if errors.IsNotFound(err) || !pod.DeletionTimestamp.IsZero() { + // pod is terminating + return false, nil + } + if sset.PodRevision(pod) != expectedRevision { + // pod revision does not match the sset upgrade revision + return false, nil + } + // is the pod ready? + if !k8s.IsPodReady(pod) { + return false, nil + } + // has the node joined the cluster yet? + inCluster, err := esState.NodesInCluster([]string{podRef.Name}) + if err != nil { + return false, err + } + if !inCluster { + log.V(1).Info("Node has not joined the cluster yet", "namespace", podRef.Namespace, "name", podRef.Name) + return false, err + } + return true, nil +} + +func disableShardsAllocation(esClient esclient.Client) error { + ctx, cancel := context.WithTimeout(context.Background(), esclient.DefaultReqTimeout) + defer cancel() + return esClient.DisableReplicaShardsAllocation(ctx) +} + +func doSyncFlush(esClient esclient.Client) error { + ctx, cancel := context.WithTimeout(context.Background(), esclient.DefaultReqTimeout) + defer cancel() + return esClient.SyncedFlush(ctx) +} + +func (d *defaultDriver) MaybeEnableShardsAllocation( + es v1alpha1.Elasticsearch, + esClient esclient.Client, + esState ESState, + statefulSets sset.StatefulSetList, +) *reconciler.Results { + results := &reconciler.Results{} + // Since we rely on sset rollingUpdate.Partition, requeue in case our cache hasn't seen a sset update yet. + // Otherwise we could re-enable shards allocation while a pod was just scheduled for termination, + // with the partition in the sset cache being outdated. + if !d.Expectations.GenerationExpected(statefulSets.ObjectMetas()...) { + return results.WithResult(defaultRequeue) + } + + alreadyEnabled, err := esState.ShardAllocationsEnabled() + if err != nil { + return results.WithError(err) + } + if alreadyEnabled { + return results + } + + // Make sure all pods scheduled for upgrade have been upgraded. + scheduledUpgradesDone, err := sset.ScheduledUpgradesDone(d.Client, statefulSets) + if err != nil { + return results.WithError(err) + } + if !scheduledUpgradesDone { + log.V(1).Info( + "Rolling upgrade not over yet, some pods don't have the updated revision, keeping shard allocations disabled", + "namespace", es.Namespace, + "name", es.Name, + ) + return results.WithResult(defaultRequeue) + } + + // Make sure all nodes scheduled for upgrade are back into the cluster. + nodesInCluster, err := esState.NodesInCluster(statefulSets.PodNames()) + if err != nil { + return results.WithError(err) + } + if !nodesInCluster { + log.V(1).Info( + "Some upgraded nodes are not back in the cluster yet, keeping shard allocations disabled", + "namespace", es.Namespace, + "name", es.Name, + ) + return results.WithResult(defaultRequeue) + } + + log.Info("Enabling shards allocation", "namespace", es.Namespace, "name", es.Name) + ctx, cancel := context.WithTimeout(context.Background(), esclient.DefaultReqTimeout) + defer cancel() + if err := esClient.EnableShardAllocation(ctx); err != nil { + return results.WithError(err) + } + + return results +} diff --git a/operators/pkg/controller/elasticsearch/elasticsearch_controller.go b/operators/pkg/controller/elasticsearch/elasticsearch_controller.go index 002075b076..f4ebf7c7d0 100644 --- a/operators/pkg/controller/elasticsearch/elasticsearch_controller.go +++ b/operators/pkg/controller/elasticsearch/elasticsearch_controller.go @@ -67,9 +67,9 @@ func newReconciler(mgr manager.Manager, params operator.Parameters) (*ReconcileE esObservers: observer.NewManager(params.Dialer, client, observer.DefaultSettings), - finalizers: finalizer.NewHandler(client), - dynamicWatches: watches.NewDynamicWatches(), - podsExpectations: reconciler.NewExpectations(), + finalizers: finalizer.NewHandler(client), + dynamicWatches: watches.NewDynamicWatches(), + expectations: driver.NewGenerationExpectations(), Parameters: params, }, nil @@ -166,9 +166,9 @@ type ReconcileElasticsearch struct { dynamicWatches watches.DynamicWatches - // podsExpectations help dealing with inconsistencies in our client cache, - // by marking Pods creation/deletion as expected, and waiting til they are effectively observed. - podsExpectations *reconciler.Expectations + // expectations help dealing with inconsistencies in our client cache, + // by marking resources updates as expected, and skipping some operations if the cache is not up-to-date. + expectations *driver.Expectations // iteration is the number of times this controller has run its Reconcile method iteration int64 @@ -250,10 +250,10 @@ func (r *ReconcileElasticsearch) internalReconcile( Version: *ver, - Observers: r.esObservers, - DynamicWatches: r.dynamicWatches, - PodsExpectations: r.podsExpectations, - Parameters: r.Parameters, + Expectations: r.expectations, + Observers: r.esObservers, + DynamicWatches: r.dynamicWatches, + Parameters: r.Parameters, }) if err != nil { return results.WithError(err) @@ -285,7 +285,6 @@ func (r *ReconcileElasticsearch) finalizersFor( ) []finalizer.Finalizer { clusterName := k8s.ExtractNamespacedName(&es) return []finalizer.Finalizer{ - reconciler.ExpectationsFinalizer(clusterName, r.podsExpectations), r.esObservers.Finalizer(clusterName), settings.SecureSettingsFinalizer(clusterName, watched), http.DynamicWatchesFinalizer(r.dynamicWatches, es.Name, esname.ESNamer), diff --git a/operators/pkg/utils/stringsutil/strings.go b/operators/pkg/utils/stringsutil/strings.go index fa713eff27..2d9a95ff30 100644 --- a/operators/pkg/utils/stringsutil/strings.go +++ b/operators/pkg/utils/stringsutil/strings.go @@ -29,6 +29,20 @@ func StringInSlice(str string, list []string) bool { return false } +// StringsInSlice returns true if the given strings are found in the provided slice, else returns false +func StringsInSlice(strings []string, slice []string) bool { + asMap := make(map[string]struct{}, len(slice)) + for _, s := range slice { + asMap[s] = struct{}{} + } + for _, s := range strings { + if _, exists := asMap[s]; !exists { + return false + } + } + return true +} + // RemoveStringInSlice returns a new slice with all occurrences of s removed, // keeping the given slice unmodified func RemoveStringInSlice(s string, slice []string) []string { From 443cde739f90bf0216628202558a4d6671f27d34 Mon Sep 17 00:00:00 2001 From: sebgl Date: Wed, 17 Jul 2019 14:59:05 +0200 Subject: [PATCH 11/31] Fix linter warnings --- .../certificates/transport/pod_secret.go | 2 +- .../certificates/transport/reconcile_test.go | 1 - .../transport/transport_fixtures_test.go | 23 +-- .../elasticsearch/driver/default.go | 8 +- .../elasticsearch/driver/esstate.go | 11 +- .../elasticsearch/driver/upgrade.go | 4 +- .../elasticsearch/mutation/calculate_test.go | 132 +++++++----------- .../elasticsearch/mutation/changes_test.go | 2 - .../pkg/controller/elasticsearch/sset/list.go | 2 +- .../version/version6/podspecs_test.go | 5 - 10 files changed, 64 insertions(+), 126 deletions(-) diff --git a/operators/pkg/controller/elasticsearch/certificates/transport/pod_secret.go b/operators/pkg/controller/elasticsearch/certificates/transport/pod_secret.go index 2b35ca810d..e0ac9262dd 100644 --- a/operators/pkg/controller/elasticsearch/certificates/transport/pod_secret.go +++ b/operators/pkg/controller/elasticsearch/certificates/transport/pod_secret.go @@ -201,7 +201,7 @@ func extractTransportCert(secret corev1.Secret, pod corev1.Pod, commonName strin } // look for the certificate based on the CommonName - var names []string + names := make([]string, 0, len(certs)) for _, c := range certs { if c.Subject.CommonName == commonName { return c diff --git a/operators/pkg/controller/elasticsearch/certificates/transport/reconcile_test.go b/operators/pkg/controller/elasticsearch/certificates/transport/reconcile_test.go index 7e066129e3..e072fbd76a 100644 --- a/operators/pkg/controller/elasticsearch/certificates/transport/reconcile_test.go +++ b/operators/pkg/controller/elasticsearch/certificates/transport/reconcile_test.go @@ -43,7 +43,6 @@ func Test_ensureTransportCertificateSecretExists(t *testing.T) { c k8s.Client scheme *runtime.Scheme owner v1alpha1.Elasticsearch - labels map[string]string } tests := []struct { name string diff --git a/operators/pkg/controller/elasticsearch/certificates/transport/transport_fixtures_test.go b/operators/pkg/controller/elasticsearch/certificates/transport/transport_fixtures_test.go index 03a65cdcde..e38a6e298f 100644 --- a/operators/pkg/controller/elasticsearch/certificates/transport/transport_fixtures_test.go +++ b/operators/pkg/controller/elasticsearch/certificates/transport/transport_fixtures_test.go @@ -48,7 +48,6 @@ var ( ClusterIP: "2.2.3.3", }, } - additionalCA = [][]byte{[]byte(testAdditionalCA)} ) const ( @@ -69,25 +68,6 @@ AAA7eoZ9AEHflUeuLn9QJI/r0hyQQLEtrpwv6rDT1GCWaLII5HJ6NUFVf4TTcqxo wg/HcAJWY60xZTJDFN+Qfx8ZQvBEin6c2/h+zZi5IVY= -----END RSA PRIVATE KEY----- ` - testAdditionalCA = `-----BEGIN CERTIFICATE----- -MIIDKzCCAhOgAwIBAgIRAK7i/u/wsh+i2G0yUygsJckwDQYJKoZIhvcNAQELBQAw -LzEZMBcGA1UECxMQNG1jZnhjbnh0ZjZuNHA5bDESMBAGA1UEAxMJdHJ1c3Qtb25l -MB4XDTE5MDMyMDIwNDg1NloXDTIwMDMxOTIwNDk1NlowLzEZMBcGA1UECxMQNG1j -Znhjbnh0ZjZuNHA5bDESMBAGA1UEAxMJdHJ1c3Qtb25lMIIBIjANBgkqhkiG9w0B -AQEFAAOCAQ8AMIIBCgKCAQEAu/Pws5FcyJw843pNow/Y95rApWAuGanU99DEmeOG -ggtpc3qtDWWKwLZ6cU+av3u82tf0HYSpy0Z2hn3PS2dGGgHPTr/tTGYA5alu1dn5 -CgqQDBVLbkKA1lDcm8w98fRavRw6a0TX5DURqXs+smhdMztQjDNCl3kJ40JbXVAY -x5vhD2pKPCK0VIr9uYK0E/9dvrU0SJGLUlB+CY/DU7c8t22oer2T6fjCZzh3Fhwi -/aOKEwEUoE49orte0N9b1HSKlVePzIUuTTc3UU2ntWi96Uf2FesuAubU11WH4kIL -wRlofty7ewBzVmGte1fKUMjHB3mgb+WYwkEFwjpQL4LhkQIDAQABo0IwQDAOBgNV -HQ8BAf8EBAMCAoQwHQYDVR0lBBYwFAYIKwYBBQUHAwEGCCsGAQUFBwMCMA8GA1Ud -EwEB/wQFMAMBAf8wDQYJKoZIhvcNAQELBQADggEBAI+qczKQgkb5L5dXzn+KW92J -Sq1rrmaYUYLRTtPFH7t42REPYLs4UV0qR+6v/hJljQbAS+Vu3BioLWuxq85NsIjf -OK1KO7D8lwVI9tAetE0tKILqljTjwZpqfZLZ8fFqwzd9IM/WfoI7Z05k8BSL6XdM -FaRfSe/GJ+DR1dCwnWAVKGxAry4JSceVS9OXxYNRTcfQuT5s8h/6X5UaonTbhil7 -91fQFaX8LSuZj23/3kgDTnjPmvj2sz5nODymI4YeTHLjdlMmTufWSJj901ITp7Bw -DMO3GhRADFpMz3vjHA2rHA4AQ6nC8N4lIYTw0AF1VAOC0SDntf6YEgrhRKRFAUY= ------END CERTIFICATE-----` ) func init() { @@ -113,6 +93,9 @@ func init() { panic("Failed to create CSR:" + err.Error()) } testCSR, err = x509.ParseCertificateRequest(testCSRBytes) + if err != nil { + panic("Failed to parse CSR:" + err.Error()) + } validatedCertificateTemplate, err = createValidatedCertificateTemplate( testPod, testES, []corev1.Service{testSvc}, testCSR, certificates.DefaultCertValidity) diff --git a/operators/pkg/controller/elasticsearch/driver/default.go b/operators/pkg/controller/elasticsearch/driver/default.go index 71b42f8fc1..7cb36f1629 100644 --- a/operators/pkg/controller/elasticsearch/driver/default.go +++ b/operators/pkg/controller/elasticsearch/driver/default.go @@ -76,10 +76,10 @@ type defaultDriver struct { // Elasticsearch cluster. // // paramsTmpl argument is a partially filled NewPodSpecParams (TODO: refactor into its own params struct) - expectedPodsAndResourcesResolver func( - es v1alpha1.Elasticsearch, - paramsTmpl pod.NewPodSpecParams, - ) ([]pod.PodSpecContext, error) + //expectedPodsAndResourcesResolver func( + // es v1alpha1.Elasticsearch, + // paramsTmpl pod.NewPodSpecParams, + //) ([]pod.PodSpecContext, error) // observedStateResolver resolves the currently observed state of Elasticsearch from the ES API observedStateResolver func(clusterName types.NamespacedName, esClient esclient.Client) observer.State diff --git a/operators/pkg/controller/elasticsearch/driver/esstate.go b/operators/pkg/controller/elasticsearch/driver/esstate.go index 0b45dcc513..ad719d32f2 100644 --- a/operators/pkg/controller/elasticsearch/driver/esstate.go +++ b/operators/pkg/controller/elasticsearch/driver/esstate.go @@ -62,13 +62,6 @@ func (n *lazyNodes) initialize() error { return nil } -func (n *lazyNodes) nodeInCluster(nodeName string) (bool, error) { - if err := initOnce(&n.once, n.initialize); err != nil { - return false, err - } - return stringsutil.StringInSlice(nodeName, n.nodes), nil -} - func (n *lazyNodes) NodesInCluster(nodeNames []string) (bool, error) { if err := initOnce(&n.once, n.initialize); err != nil { return false, err @@ -79,9 +72,9 @@ func (n *lazyNodes) NodesInCluster(nodeNames []string) (bool, error) { // -- Shards allocation enabled type lazyShardsAllocationEnabled struct { + enabled bool once sync.Once esClient esclient.Client - enabled bool } func (s *lazyShardsAllocationEnabled) initialize() error { @@ -105,9 +98,9 @@ func (s *lazyShardsAllocationEnabled) ShardAllocationsEnabled() (bool, error) { // -- Green health type lazyGreenHealth struct { + greenHealth bool once sync.Once esClient esclient.Client - greenHealth bool } func (h *lazyGreenHealth) initialize() error { diff --git a/operators/pkg/controller/elasticsearch/driver/upgrade.go b/operators/pkg/controller/elasticsearch/driver/upgrade.go index 3f49b3a025..6002daed6f 100644 --- a/operators/pkg/controller/elasticsearch/driver/upgrade.go +++ b/operators/pkg/controller/elasticsearch/driver/upgrade.go @@ -118,7 +118,7 @@ func (d *defaultDriver) doRollingUpgrade( } // Upgrade the pod. - if err := d.upgradeStatefulSetPartition(es, &statefulSets[i], esClient, partition); err != nil { + if err := d.upgradeStatefulSetPartition(&statefulSets[i], partition); err != nil { return results.WithError(err) } scheduledUpgrades++ @@ -128,9 +128,7 @@ func (d *defaultDriver) doRollingUpgrade( } func (d *defaultDriver) upgradeStatefulSetPartition( - es v1alpha1.Elasticsearch, statefulSet *appsv1.StatefulSet, - esClient esclient.Client, newPartition int32, ) error { // TODO: zen1, zen2 diff --git a/operators/pkg/controller/elasticsearch/mutation/calculate_test.go b/operators/pkg/controller/elasticsearch/mutation/calculate_test.go index 8cda8fcd8e..f9091f9c18 100644 --- a/operators/pkg/controller/elasticsearch/mutation/calculate_test.go +++ b/operators/pkg/controller/elasticsearch/mutation/calculate_test.go @@ -4,87 +4,59 @@ package mutation -import ( - "github.com/elastic/cloud-on-k8s/operators/pkg/apis/elasticsearch/v1alpha1" - "github.com/elastic/cloud-on-k8s/operators/pkg/controller/common/hash" - "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/label" - "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/name" - "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/pod" - - corev1 "k8s.io/api/core/v1" - "k8s.io/apimachinery/pkg/api/resource" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" -) - -var defaultCPULimit = "800m" -var defaultImage = "image" -var defaultPodSpecCtxV2 = ESPodSpecContext(defaultImage, "1000m") - -var es = v1alpha1.Elasticsearch{ - ObjectMeta: metav1.ObjectMeta{ - Name: "elasticsearch", - }, -} - -func ESPodWithConfig(image string, cpuLimit string) pod.PodWithConfig { - tpl := ESPodSpecContext(image, cpuLimit).PodTemplate - return pod.PodWithConfig{ - Pod: corev1.Pod{ - ObjectMeta: metav1.ObjectMeta{ - Name: name.NewPodName(es.Name, v1alpha1.NodeSpec{}), - Labels: hash.SetTemplateHashLabel(nil, tpl), - }, - Spec: tpl.Spec, - }, - } -} +// +//var es = v1alpha1.Elasticsearch{ +// ObjectMeta: metav1.ObjectMeta{ +// Name: "elasticsearch", +// }, +//} -func ESPodSpecContext(image string, cpuLimit string) pod.PodSpecContext { - return pod.PodSpecContext{ - PodTemplate: corev1.PodTemplateSpec{ - ObjectMeta: metav1.ObjectMeta{ - Labels: map[string]string{ - label.ClusterNameLabelName: es.Name, - }, - }, - Spec: corev1.PodSpec{ - Containers: []corev1.Container{{ - Image: image, - ImagePullPolicy: corev1.PullIfNotPresent, - Name: v1alpha1.ElasticsearchContainerName, - Ports: pod.DefaultContainerPorts, - // TODO: Hardcoded resource limits and requests - Resources: corev1.ResourceRequirements{ - Limits: corev1.ResourceList{ - corev1.ResourceCPU: resource.MustParse(cpuLimit), - corev1.ResourceMemory: resource.MustParse("2Gi"), - }, - Requests: corev1.ResourceList{ - corev1.ResourceCPU: resource.MustParse("100m"), - corev1.ResourceMemory: resource.MustParse("2Gi"), - }, - }, - ReadinessProbe: &corev1.Probe{ - FailureThreshold: 3, - InitialDelaySeconds: 10, - PeriodSeconds: 5, - SuccessThreshold: 1, - TimeoutSeconds: 5, - Handler: corev1.Handler{ - Exec: &corev1.ExecAction{ - Command: []string{ - "sh", - "-c", - "script here", - }, - }, - }, - }, - }}, - }, - }, - } -} +//func ESPodSpecContext(image string, cpuLimit string) pod.PodSpecContext { +// return pod.PodSpecContext{ +// PodTemplate: corev1.PodTemplateSpec{ +// ObjectMeta: metav1.ObjectMeta{ +// Labels: map[string]string{ +// label.ClusterNameLabelName: es.Name, +// }, +// }, +// Spec: corev1.PodSpec{ +// Containers: []corev1.Container{{ +// Image: image, +// ImagePullPolicy: corev1.PullIfNotPresent, +// Name: v1alpha1.ElasticsearchContainerName, +// Ports: pod.DefaultContainerPorts, +// // TODO: Hardcoded resource limits and requests +// Resources: corev1.ResourceRequirements{ +// Limits: corev1.ResourceList{ +// corev1.ResourceCPU: resource.MustParse(cpuLimit), +// corev1.ResourceMemory: resource.MustParse("2Gi"), +// }, +// Requests: corev1.ResourceList{ +// corev1.ResourceCPU: resource.MustParse("100m"), +// corev1.ResourceMemory: resource.MustParse("2Gi"), +// }, +// }, +// ReadinessProbe: &corev1.Probe{ +// FailureThreshold: 3, +// InitialDelaySeconds: 10, +// PeriodSeconds: 5, +// SuccessThreshold: 1, +// TimeoutSeconds: 5, +// Handler: corev1.Handler{ +// Exec: &corev1.ExecAction{ +// Command: []string{ +// "sh", +// "-c", +// "script here", +// }, +// }, +// }, +// }, +// }}, +// }, +// }, +// } +//} // //func TestCalculateChanges(t *testing.T) { diff --git a/operators/pkg/controller/elasticsearch/mutation/changes_test.go b/operators/pkg/controller/elasticsearch/mutation/changes_test.go index 99135c586e..bcb54c6a36 100644 --- a/operators/pkg/controller/elasticsearch/mutation/changes_test.go +++ b/operators/pkg/controller/elasticsearch/mutation/changes_test.go @@ -16,9 +16,7 @@ import ( metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" ) -var defaultPodWithConfig = ESPodWithConfig(defaultImage, defaultCPULimit) var emptyPodWithConfig = pod.PodWithConfig{Pod: corev1.Pod{}} -var defaultPodSpecCtx = ESPodSpecContext(defaultImage, defaultCPULimit) func namedPod(name string) pod.PodWithConfig { return pod.PodWithConfig{ diff --git a/operators/pkg/controller/elasticsearch/sset/list.go b/operators/pkg/controller/elasticsearch/sset/list.go index 60e5279c87..8c8450c4ea 100644 --- a/operators/pkg/controller/elasticsearch/sset/list.go +++ b/operators/pkg/controller/elasticsearch/sset/list.go @@ -54,7 +54,7 @@ func (l StatefulSetList) RevisionUpdateScheduled() bool { // PodNames returns the names of the pods for all StatefulSets in the list. func (l StatefulSetList) PodNames() []string { - var names []string + names := make([]string, 0, len(l)) for _, s := range l { names = append(names, PodNames(s)...) } diff --git a/operators/pkg/controller/elasticsearch/version/version6/podspecs_test.go b/operators/pkg/controller/elasticsearch/version/version6/podspecs_test.go index c47371755b..585038136e 100644 --- a/operators/pkg/controller/elasticsearch/version/version6/podspecs_test.go +++ b/operators/pkg/controller/elasticsearch/version/version6/podspecs_test.go @@ -10,7 +10,6 @@ import ( "github.com/stretchr/testify/assert" corev1 "k8s.io/api/core/v1" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "github.com/elastic/cloud-on-k8s/operators/pkg/apis/elasticsearch/v1alpha1" "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/client" @@ -20,10 +19,6 @@ import ( ) var testProbeUser = client.UserAuth{Name: "username1", Password: "supersecure"} -var testObjectMeta = metav1.ObjectMeta{ - Name: "my-es", - Namespace: "default", -} func TestNewEnvironmentVars(t *testing.T) { type args struct { From c0194f6ec7e82beaff47b3d26ab50b03e7dd025b Mon Sep 17 00:00:00 2001 From: Michael Morello Date: Mon, 22 Jul 2019 14:28:08 +0200 Subject: [PATCH 12/31] sset: small optimization/fix (#1322) --- operators/pkg/controller/elasticsearch/sset/list.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/operators/pkg/controller/elasticsearch/sset/list.go b/operators/pkg/controller/elasticsearch/sset/list.go index 8c8450c4ea..6eaedc8dba 100644 --- a/operators/pkg/controller/elasticsearch/sset/list.go +++ b/operators/pkg/controller/elasticsearch/sset/list.go @@ -36,8 +36,8 @@ func (l StatefulSetList) GetByName(ssetName string) (appsv1.StatefulSet, bool) { func (l StatefulSetList) ObjectMetas() []metav1.ObjectMeta { objs := make([]metav1.ObjectMeta, len(l)) - for _, sset := range l { - objs = append(objs, sset.ObjectMeta) + for i, sset := range l { + objs[i] = sset.ObjectMeta } return objs } From 8b75c62c779d819461341f4ab21bebd6fa425e0d Mon Sep 17 00:00:00 2001 From: Sebastien Guilloux Date: Wed, 24 Jul 2019 11:01:45 +0200 Subject: [PATCH 13/31] Merge master into statefulset-refactoring (#1358) * Use the setvmmaxmapcount initcontainer by default in E2E tests (#1300) Let's keep our default defaults :) The setting is disabled explicitly for E2E tests where we enable a restricted security context. * Add docs for plugins, custom configuration files and secure settings (#1298) * Allow license secret webhook to fail (#1301) Webhooks on core k8s objects are just too debilitating in case our webhook service fails. This sets the failure policy for the secret webhook to ignore to strike a balance between UX (immediate feedback) and keeping the users k8s cluster in a working state. Also we have an additional validation run on controller level so this does not allow circumventing our validation logic. * Revert "Use the setvmmaxmapcount initcontainer by default in E2E tests (#1300)" (#1302) This reverts commit fff15269be1c431121fdefd8ca8c6ad93db8c9df. This commit is breaking our E2E tests chain, which deploy a PodSecurityPolicy by default. Any privileged init container will not work. I'll open an issue for a longer-term fix to properly handle this. * Update quickstart (#1307) * Update the name of the secret for the elastic user * Bump the Elastic Stack version from 7.1.0 to 7.2.0 * Change Kibana readiness endpoint to return a 200 OK (#1309) The previous endpoint returned an http code 302. While this is fine for Kubernetes, some derived systems like GCP LoadBalancers mimic the container readiness check for their own readiness check. Except GCP Loadbalancers only work with status 200. It's not up to us to adapt GCP LoadBalancers to K8s, but this is a fairly trivial fix. * Fix pod_forwarder to support two part DNS names, adjust e2e http_client (#1297) * Fix pod_forwarder to support two part DNS names, adjust e2e http_client url * Revert removing .svc in e2e http_client * [DOC] Resources management and volume claim template (#1252) * Add resources and persistent volume templates documentation * Ignore resources reconciled by older controllers (#1286) * Document PodDisruptionBudget section of the ES spec (#1306) * Document PodDisruptionBudget section of the ES spec I suspect this might slightly change in the feature depending on how we handle the readiness check, so I'm keeping this doc minimal for now: * what is a PDB, briefly (with a link) * default PDB we apply * how to set a different PDB * how to disable the default PDB * Move version out from Makefile (#1312) * Add release note generation tool (#1314) * no external dependencies * inspects PRs by version label * generates structured release notes in asciidoc grouped by type label * Add console output to standalone apm sample (#1321) * Update Quickstart to 0.9.0 (#1317) * Update doc (#1319) * Update persistent storage section * Update kibana localhost url to use https * Update k8s resources names in accessing-services doc * Mention SSL browser warning * Fix bulleted list * Add CI job for nightly builds (#1248) * Move version to a file * Add CI implementation * Update VERSION * Depend on another PR for moving out version from Makefile * Update Jenkinsfile * Don't build and push operator image in bootstrap-gke (#1332) We don't need to do that anymore, since we don't use an init container based on the operator image. * Remove Docker image publishing from devops-ci (#1339) * Suppress output of certain commands from Makefile (#1342) * Document how to disable TLS (#1341) * Use new credentials for Docker registry (#1346) * Workaround controller-runtime webhook upsert bug (#1337) * Fix docs build on PR job (#1351) * Fix docs build on PR job * Cleanup workspace before doing other steps * APM: remove "output" element and add elasticsearchRef (#1345) * Don't rely on buggy metaObject Kind (#1324) * Don't rely on buggy metaObject Kind A bug in our client implementation may clear the object's Kind on certain scenarios. See https://github.com/kubernetes-sigs/controller-runtime/issues/406. Let's avoid that by fixing a constant Kind returned by a method call on the resource. --- .ci/jobs/gke-e2e-versions.yml | 6 +- build/ci/Makefile | 27 +- build/ci/e2e/GKE_k8s_versions.jenkinsfile | 8 +- build/ci/nightly/Jenkinsfile | 59 ++++ build/ci/pr/Jenkinsfile | 23 +- docs/accessing-services.asciidoc | 36 ++- docs/elasticsearch-spec.asciidoc | 150 ++++++++++ docs/index.asciidoc | 2 + docs/k8s-quickstart.asciidoc | 26 +- docs/managing-compute-resources.asciidoc | 117 ++++++++ operators/Makefile | 6 +- operators/VERSION | 1 + .../config/crds/apm_v1alpha1_apmserver.yaml | 113 ++++---- .../config/samples/apm/apm_es_kibana.yaml | 13 +- operators/config/samples/apm/apmserver.yaml | 5 +- operators/hack/release_notes.go | 266 ++++++++++++++++++ operators/hack/release_notes_test.go | 263 +++++++++++++++++ .../pkg/apis/apm/v1alpha1/apmserver_types.go | 29 +- .../apm/v1alpha1/zz_generated.deepcopy.go | 25 +- .../v1alpha1/elasticsearch_types.go | 11 +- .../pkg/apis/kibana/v1alpha1/kibana_types.go | 11 +- .../apmserver/apmserver_controller.go | 15 +- .../pkg/controller/apmserver/config/config.go | 4 +- operators/pkg/controller/apmserver/pod.go | 2 +- ...rverelasticsearchassociation_controller.go | 54 ++-- ...lasticsearchassociation_controller_test.go | 8 +- .../apmserverelasticsearchassociation/user.go | 17 +- .../user_test.go | 22 +- .../common/annotation/controller_version.go | 95 +++++++ .../annotation/controller_version_test.go | 152 +++++++++- .../common/association/association_test.go | 26 +- .../controller/common/keystore/resources.go | 12 +- .../controller/common/keystore/user_secret.go | 3 +- .../elasticsearch/elasticsearch_controller.go | 13 +- .../pkg/controller/kibana/driver_test.go | 2 +- .../controller/kibana/kibana_controller.go | 14 +- operators/pkg/controller/kibana/pod/pod.go | 2 +- .../association_controller.go | 30 +- .../pkg/dev/portforward/pod_forwarder.go | 4 +- .../pkg/dev/portforward/pod_forwarder_test.go | 9 +- operators/pkg/webhook/server.go | 16 +- operators/test/e2e/apm/standalone_test.go | 5 +- operators/test/e2e/test/apmserver/builder.go | 22 +- .../e2e/test/elasticsearch/http_client.go | 2 +- 44 files changed, 1441 insertions(+), 285 deletions(-) create mode 100644 build/ci/nightly/Jenkinsfile create mode 100644 docs/managing-compute-resources.asciidoc create mode 100644 operators/VERSION create mode 100644 operators/hack/release_notes.go create mode 100644 operators/hack/release_notes_test.go diff --git a/.ci/jobs/gke-e2e-versions.yml b/.ci/jobs/gke-e2e-versions.yml index 58c220917a..10f9c49566 100644 --- a/.ci/jobs/gke-e2e-versions.yml +++ b/.ci/jobs/gke-e2e-versions.yml @@ -8,8 +8,10 @@ artifactNumToKeep: 10 name: cloud-on-k8s-versions-gke project-type: pipeline - triggers: - - timed: '0 0 * * 1-5' + parameters: + - string: + name: IMAGE + description: "Docker image with ECK" pipeline-scm: scm: - git: diff --git a/build/ci/Makefile b/build/ci/Makefile index 77fba62175..71919f2f56 100644 --- a/build/ci/Makefile +++ b/build/ci/Makefile @@ -11,7 +11,7 @@ VAULT_GKE_CREDS_SECRET ?= secret/cloud-team/cloud-ci/ci-gcp-k8s-operator GKE_CREDS_FILE ?= credentials.json VAULT_PUBLIC_KEY ?= secret/release/license PUBLIC_KEY_FILE ?= license.key -VAULT_DOCKER_CREDENTIALS ?= secret/cloud-team/cloud-ci/cloudadmin +VAULT_DOCKER_CREDENTIALS ?= secret/devops-ci/cloud-on-k8s/eckadmin DOCKER_CREDENTIALS_FILE ?= docker_credentials.file VAULT_AWS_CREDS ?= secret/cloud-team/cloud-ci/eck-release VAULT_AWS_ACCESS_KEY_FILE ?= aws_access_key.file @@ -48,7 +48,7 @@ vault-docker-creds: @ VAULT_TOKEN=$(VAULT_TOKEN) \ vault read \ -address=$(VAULT_ADDR) \ - -field=password \ + -field=value \ $(VAULT_DOCKER_CREDENTIALS) \ > $(DOCKER_CREDENTIALS_FILE) @@ -71,7 +71,7 @@ vault-aws-creds: ci-pr: check-license-header docker build -f Dockerfile -t cloud-on-k8s-ci-pr . - docker run --rm -t \ + @ docker run --rm -t \ -v /var/run/docker.sock:/var/run/docker.sock \ -v $(ROOT_DIR):$(GO_MOUNT_PATH) \ -w $(GO_MOUNT_PATH) \ @@ -86,7 +86,7 @@ ci-pr: check-license-header ci-release: vault-public-key vault-docker-creds docker build -f Dockerfile -t cloud-on-k8s-ci-release . - docker run --rm -t \ + @ docker run --rm -t \ -v /var/run/docker.sock:/var/run/docker.sock \ -v $(ROOT_DIR):$(GO_MOUNT_PATH) \ -w $(GO_MOUNT_PATH) \ @@ -105,7 +105,7 @@ ci-release: vault-public-key vault-docker-creds # Will be uploaded to https://download.elastic.co/downloads/eck/$TAG_NAME/all-in-one.yaml yaml-upload: vault-aws-creds docker build -f Dockerfile -t cloud-on-k8s-ci-release . - docker run --rm -t \ + @ docker run --rm -t \ -v $(ROOT_DIR):$(GO_MOUNT_PATH) \ -w $(GO_MOUNT_PATH) \ -e "AWS_ACCESS_KEY_ID=$(shell cat $(VAULT_AWS_ACCESS_KEY_FILE))" \ @@ -119,7 +119,7 @@ yaml-upload: vault-aws-creds # Spawn a k8s cluster, and run e2e tests against it ci-e2e: vault-gke-creds docker build -f Dockerfile -t cloud-on-k8s-ci-e2e . - docker run --rm -t \ + @ docker run --rm -t \ -v /var/run/docker.sock:/var/run/docker.sock \ -v $(ROOT_DIR):$(GO_MOUNT_PATH) \ -w $(GO_MOUNT_PATH) \ @@ -137,7 +137,7 @@ ci-e2e: vault-gke-creds # Run e2e tests in GKE against provided ECK image ci-e2e-rc: vault-gke-creds docker build -f Dockerfile -t cloud-on-k8s-ci-e2e . - docker run --rm -t \ + @ docker run --rm -t \ -v /var/run/docker.sock:/var/run/docker.sock \ -v $(ROOT_DIR):$(GO_MOUNT_PATH) \ -w $(GO_MOUNT_PATH) \ @@ -156,7 +156,7 @@ ci-e2e-rc: vault-gke-creds # Remove k8s cluster ci-e2e-delete-cluster: vault-gke-creds docker build -f Dockerfile -t cloud-on-k8s-ci-e2e . - docker run --rm -t \ + @ docker run --rm -t \ -v /var/run/docker.sock:/var/run/docker.sock \ -v $(ROOT_DIR):$(GO_MOUNT_PATH) \ -w $(GO_MOUNT_PATH) \ @@ -168,7 +168,7 @@ ci-e2e-delete-cluster: vault-gke-creds # Remove all unused resources in GKE ci-gke-cleanup: ci-e2e-delete-cluster - docker run --rm -t \ + @ docker run --rm -t \ -v $(ROOT_DIR):$(GO_MOUNT_PATH) \ -w $(GO_MOUNT_PATH) \ -e "GCLOUD_PROJECT=$(GCLOUD_PROJECT)" \ @@ -177,12 +177,3 @@ ci-gke-cleanup: ci-e2e-delete-cluster cloud-on-k8s-ci-e2e \ bash -c "GKE_CLUSTER_VERSION=1.11 $(GO_MOUNT_PATH)/operators/hack/gke-cluster.sh auth && \ $(GO_MOUNT_PATH)/build/ci/delete_unused_disks.py" - -# Run docs build -ci-build-docs: - docker run --rm -t \ - -v $(ROOT_DIR):$(GO_MOUNT_PATH) \ - docker.elastic.co/docs/build:1 \ - bash -c "git clone https://github.com/elastic/docs.git && \ - /docs/build_docs.pl --doc $(GO_MOUNT_PATH)/docs/index.asciidoc --out $(GO_MOUNT_PATH)/docs/html --chunk 1 && \ - test -e $(GO_MOUNT_PATH)/docs/html/index.html" diff --git a/build/ci/e2e/GKE_k8s_versions.jenkinsfile b/build/ci/e2e/GKE_k8s_versions.jenkinsfile index 0df4eca802..567c828ba5 100644 --- a/build/ci/e2e/GKE_k8s_versions.jenkinsfile +++ b/build/ci/e2e/GKE_k8s_versions.jenkinsfile @@ -14,6 +14,8 @@ pipeline { VAULT_SECRET_ID = credentials('vault-secret-id') REGISTRY = "eu.gcr.io" GCLOUD_PROJECT = credentials('k8s-operators-gcloud-project') + OPERATOR_IMAGE = "${IMAGE}" + LATEST_RELEASED_IMG = "${IMAGE}" } stages { @@ -26,7 +28,7 @@ pipeline { } steps { checkout scm - sh 'make -C build/ci ci-e2e' + sh 'make -C build/ci ci-e2e-rc' } } stage("1.12") { @@ -39,7 +41,7 @@ pipeline { } steps { checkout scm - sh 'make -C build/ci ci-e2e' + sh 'make -C build/ci ci-e2e-rc' } } stage("1.13") { @@ -52,7 +54,7 @@ pipeline { } steps { checkout scm - sh 'make -C build/ci ci-e2e' + sh 'make -C build/ci ci-e2e-rc' } } } diff --git a/build/ci/nightly/Jenkinsfile b/build/ci/nightly/Jenkinsfile new file mode 100644 index 0000000000..9d7b616423 --- /dev/null +++ b/build/ci/nightly/Jenkinsfile @@ -0,0 +1,59 @@ +pipeline { + + agent { + label 'linux' + } + + options { + timeout(time: 1, unit: 'HOURS') + } + + environment { + VAULT_ADDR = credentials('vault-addr') + VAULT_ROLE_ID = credentials('vault-role-id') + VAULT_SECRET_ID = credentials('vault-secret-id') + GCLOUD_PROJECT = credentials('k8s-operators-gcloud-project') + REGISTRY = "push.docker.elastic.co" + REPOSITORY = "eck-snapshots" + IMG_NAME = "eck-operator" + SNAPSHOT = "true" + DOCKER_IMAGE_NO_TAG = "docker.elastic.co/${REPOSITORY}/${IMG_NAME}" + } + + stages { + stage('Run unit and integration tests') { + steps { + sh 'make -C build/ci ci-pr' + } + } + stage('Build and push Docker image') { + steps { + sh """ + export VERSION=\$(cat $WORKSPACE/operators/VERSION)-\$(date +%F)-\$(git rev-parse --short --verify HEAD) + export OPERATOR_IMAGE=${REGISTRY}/${REPOSITORY}/${IMG_NAME}:\$VERSION + make -C build/ci ci-release + """ + } + } + } + + post { + success { + script { + def version = sh(returnStdout: true, script: 'cat $WORKSPACE/operators/VERSION') + def hash = sh(returnStdout: true, script: 'git rev-parse --short --verify HEAD') + def date = new Date() + def image = env.DOCKER_IMAGE_NO_TAG + ":" + version + "-" + date.format("yyyy-MM-dd") + "-" + hash + currentBuild.description = image + + build job: 'cloud-on-k8s-versions-gke', + parameters: [string(name: 'IMAGE', value: image)], + wait: false + } + } + cleanup { + cleanWs() + } + } + +} diff --git a/build/ci/pr/Jenkinsfile b/build/ci/pr/Jenkinsfile index e2822e8c63..94159b5f51 100644 --- a/build/ci/pr/Jenkinsfile +++ b/build/ci/pr/Jenkinsfile @@ -38,8 +38,16 @@ pipeline { } stage("Run docs build") { steps { - checkout scm - sh 'make -C build/ci ci-build-docs' + cleanWs() + sh 'git clone git@github.com:elastic/docs.git' + sh 'git clone git@github.com:elastic/cloud-on-k8s.git' + sh """ + $WORKSPACE/docs/build_docs \ + --doc $WORKSPACE/cloud-on-k8s/docs/index.asciidoc \ + --out $WORKSPACE/cloud-on-k8s/docs/html \ + --chunk 1 + """ + sh 'test -e $WORKSPACE/cloud-on-k8s/docs/html/index.html' } } stage("Run smoke E2E tests") { @@ -61,17 +69,6 @@ pipeline { } post { - success { - withEnv([ - 'REGISTRY=push.docker.elastic.co', - 'REPOSITORY=eck-snapshots', - 'IMG_SUFFIX=', - 'SNAPSHOT_RELEASE=true', - 'TAG_NAME=${ghprbPullId}' - ]) { - sh 'make -C build/ci ci-release' - } - } cleanup { script { if (notOnlyDocs()) { diff --git a/docs/accessing-services.asciidoc b/docs/accessing-services.asciidoc index 1c789edb0f..940aa8e682 100644 --- a/docs/accessing-services.asciidoc +++ b/docs/accessing-services.asciidoc @@ -25,7 +25,7 @@ To access Elasticsearch, Kibana or APM Server, the operator manages a default us [source,sh] ---- -> kubectl get secret hulk-elastic-user -o go-template='{{.data.elastic | base64decode }}' +> kubectl get secret hulk-es-elastic-user -o go-template='{{.data.elastic | base64decode }}' 42xyz42citsale42xyz42 ---- @@ -46,6 +46,7 @@ For each resource, `Elasticsearch`, `Kibana` or `ApmServer`, the operator manage > kubectl get svc NAME TYPE CLUSTER-IP EXTERNAL-IP PORT(S) AGE +hulk-apm-http ClusterIP 10.19.212.105 8200:31000/TCP 1m hulk-es-http ClusterIP 10.19.252.160 9200:31320/TCP 1m hulk-kb-http ClusterIP 10.19.247.151 5601:31380/TCP 1m ---- @@ -76,6 +77,7 @@ spec: > kubectl get svc NAME TYPE CLUSTER-IP EXTERNAL-IP PORT(S) AGE +hulk-apm-http ClusterIP 10.19.212.105 35.176.227.106 8200:31000/TCP 1m hulk-es-http LoadBalancer 10.19.252.160 35.198.131.115 9200:31320/TCP 1m hulk-kb-http LoadBalancer 10.19.247.151 35.242.197.228 5601:31380/TCP 1m ---- @@ -141,8 +143,9 @@ spec: You can bring your own certificate to configure TLS to ensure that communication between HTTP clients and the cluster is encrypted. Create a Kubernetes secret with: -. tls.crt: the certificate (or a chain). -. tls.key: the private key to the first certificate in the certificate chain. + +- tls.crt: the certificate (or a chain). +- tls.key: the private key to the first certificate in the certificate chain. [source,sh] ---- @@ -160,6 +163,23 @@ spec: secretName: my-cert ---- +[float] +[id="{p}-disable-tls"] +==== Disable TLS + +You can explicitly disable TLS for Kibana or APM Server if you want to. + +[source,yaml] +---- +spec: + http: + tls: + selfSignedCertificate: + disabled: true +---- + +TLS cannot be disabled for Elasticsearch. + [float] [id="{p}-request-elasticsearch-endpoint"] === Requesting the Elasticsearch endpoint @@ -178,7 +198,7 @@ NAME=hulk kubectl get secret "$NAME-ca" -o go-template='{{index .data "ca.pem" | base64decode }}' > ca.pem PW=$(kubectl get secret "$NAME-elastic-user" -o go-template='{{.data.elastic | base64decode }}') -curl --cacert ca.pem -u elastic:$PW https://$NAME-es:9200/ +curl --cacert ca.pem -u elastic:$PW https://$NAME-es-http:9200/ ---- *Outside the Kubernetes cluster* @@ -191,11 +211,11 @@ curl --cacert ca.pem -u elastic:$PW https://$NAME-es:9200/ ---- NAME=hulk -kubectl get secret "$NAME-ca" -o go-template='{{index .data "ca.pem" | base64decode }}' > ca.pem -IP=$(kubectl get svc "$NAME-es" -o jsonpath='{.status.loadBalancer.ingress[].ip}') -PW=$(kubectl get secret "$NAME-elastic-user" -o go-template='{{.data.elastic | base64decode }}') +kubectl get secret "$NAME-es-http-certs-public" -o go-template='{{index .data "tls.crt" | base64decode }}' > tls.crt +IP=$(kubectl get svc "$NAME-es-http" -o jsonpath='{.status.loadBalancer.ingress[].ip}') +PW=$(kubectl get secret "$NAME-es-elastic-user" -o go-template='{{.data.elastic | base64decode }}') -curl --cacert ca.pem -u elastic:$PW https://$IP:9200/ +curl --cacert tls.crt -u elastic:$PW https://$IP:9200/ ---- Now you should get this message: diff --git a/docs/elasticsearch-spec.asciidoc b/docs/elasticsearch-spec.asciidoc index 8935e873e3..463366a4e1 100644 --- a/docs/elasticsearch-spec.asciidoc +++ b/docs/elasticsearch-spec.asciidoc @@ -50,6 +50,45 @@ spec: For more information on Elasticsearch settings, see https://www.elastic.co/guide/en/elasticsearch/reference/current/settings.html[Configuring Elasticsearch]. +[id="{p}-volume-claim-templates"] +=== Volume Claim Templates + +By default the operator creates a https://kubernetes.io/docs/concepts/storage/persistent-volumes/[`PersistentVolumeClaim`] with a capacity of 1Gi for every Pod in an Elasticsearch cluster. This is to ensure that there is no data loss if a Pod is deleted. + +You can customize the volume claim templates used by Elasticsearch to adjust the storage to your needs, the name in the template must be `elasticsearch-data`: + +[source,yaml] +---- +spec: + nodes: + - volumeClaimTemplates: + - metadata: + name: elasticsearch-data + spec: + accessModes: + - ReadWriteOnce + resources: + requests: + storage: 5Gi + storageClassName: standard +---- + +For some reasons you may want to use an `emptyDir` volume, this can be done by specifying the `elasticsearch-data` volume in the `podTemplate`: + +[source,yaml] +---- +spec: + nodes: + - config: + podTemplate: + spec: + volumes: + - name: elasticsearch-data + emptyDir: {} +---- + +Keep in mind that using `emptyDir` may result in data loss and is not recommended. + [id="{p}-http-settings-tls-sans"] === HTTP settings & TLS SANs @@ -101,6 +140,75 @@ $ openssl req -x509 -newkey rsa:4096 -keyout tls.key -out tls.crt -days 365 -nod $ kubectl create secret tls my-cert --cert tls.crt --key tls.key ---- +[id="{p}-es-secure-settings"] +=== Secure Settings + +link:https://www.elastic.co/guide/en/elasticsearch/reference/current/secure-settings.html[Secure settings] can be specified via a Kubernetes secret. +The secret should contain a key-value pair for each secure setting you want to add. Reference that secret in the Elasticsearch +resource spec for ECK to automatically inject those settings into the keystore on each node before it starts Elasticsearch. + +[source,yaml] +---- +spec: + secureSettings: + secretName: your-secure-settings-secret +---- + +See link:snapshots.asciidoc[How to create automated snapshots] for an example use case. + + +[id="{p}-bundles-plugins"] +=== Custom Configuration Files and Plugins + +To run Elasticsearch with specific plugins or configurations files installed on ECK you have two options: + +1. create a custom Docker image with the plugins or files pre-installed +2. install the plugins or configuration files at pod startup time + +NOTE: The first option has the advantage that you can verify the correctness of the image before rolling it out to your ECK installation, while the second option gives you +maximum flexibility. But the second option also means you might catch any errors only at runtime. Plugin installation at runtime has another drawback in that it needs access to the Internet from your cluster +and downloads each plugin multiple times, once for each Elasticsearch node. + +Building your custom Docker images is outside the scope of this documentation despite being the better solution for most users. + +The following therefore describes option 2 using a repository plugin as the example. To install the plugin before the Elasticsearch +nodes start, use an init container to run the link:https://www.elastic.co/guide/en/elasticsearch/plugins/current/installation.html[plugin installation tool]. + +[source,yaml] +---- +podTemplate: + spec: + initContainers: + - name: install-plugins + command: + - sh + - -c + - | + bin/elasticsearch-plugin install --batch repository-azure +---- + +To install custom configuration files you can use volumes and volume mounts. The next example shows how to add a synonyms file for the +link:https://www.elastic.co/guide/en/elasticsearch/reference/current/analysis-synonym-tokenfilter.html[synonym token filter] in Elasticsearch. +But you can use the same approach for any kind of file you want to mount into the configuration directory of Elasticsearch. + +[source,yaml] +---- +podTemplate: + spec: + containers: + - name: elasticsearch <1> + volumeMounts: + - name: synonyms + mountPath: /usr/share/elasticsearch/config/dictionaries + volumes: + - name: synonyms + configMap: + name: synonyms <2> +---- + +<1> Elasticsearch runs by convention in a container called 'elasticsearch' +<2> assuming you have created a config map in the same namespace as Elasticsearch with the name 'synonyms' containing the synonyms file(s) + [id="{p}-virtual-memory"] === Virtual memory @@ -271,3 +379,45 @@ In this situation, it would be preferable to first recreate the missing nodes in In order to do so, ECK must know about the logical grouping of nodes. Since this is an arbitrary setting (can represent availability zones, but also nodes roles, hot-warm topologies, etc.), it must be specified in the `updateStrategy.groups` section of the Elasticsearch specification. Nodes grouping is expressed through labels on the resources. In the example above, 3 pods are labeled with `group-a`, and the 3 other pods with `group-b`. +[id="{p}-pod-disruption-budget"] +=== Pod disruption budget + +A link:https://kubernetes.io/docs/tasks/run-application/configure-pdb/[Pod Disruption Budget] allows limiting disruptions on an existing set of pods while the Kubernetes cluster administrator manages cluster nodes. +With Elasticsearch, we'd like to make sure some indices don't become unavailable. + +A default PDB of 1 `maxUnavailable` pod on the entire cluster is enforced by default. + +This default can be tweaked in the Elasticsearch specification: + +[source,yaml] +---- +apiVersion: elasticsearch.k8s.elastic.co/v1alpha1 +kind: Elasticsearch +metadata: + name: quickstart +spec: + version: 7.2.0 + nodes: + - nodeCount: 3 + podDisruptionBudget: + spec: + maxUnavailable: 2 + selector: + matchLabels: + elasticsearch.k8s.elastic.co/cluster-name: quickstart +---- + +It can also be explicitly disabled: + +[source,yaml] +---- +apiVersion: elasticsearch.k8s.elastic.co/v1alpha1 +kind: Elasticsearch +metadata: + name: quickstart +spec: + version: 7.2.0 + nodes: + - nodeCount: 3 + podDisruptionBudget: {} +---- diff --git a/docs/index.asciidoc b/docs/index.asciidoc index fcd147c800..73198d3b31 100644 --- a/docs/index.asciidoc +++ b/docs/index.asciidoc @@ -10,4 +10,6 @@ include::overview.asciidoc[] include::k8s-quickstart.asciidoc[] include::accessing-services.asciidoc[] include::advanced-node-scheduling.asciidoc[] +include::managing-compute-resources.asciidoc[] include::snapshots.asciidoc[] +include::elasticsearch-spec.asciidoc[] diff --git a/docs/k8s-quickstart.asciidoc b/docs/k8s-quickstart.asciidoc index bbaf0c274b..0b3aafdef3 100644 --- a/docs/k8s-quickstart.asciidoc +++ b/docs/k8s-quickstart.asciidoc @@ -28,7 +28,7 @@ NOTE: If you are using Amazon EKS, make sure the Kubernetes control plane is all + [source,sh] ---- -kubectl apply -f https://download.elastic.co/downloads/eck/0.8.0/all-in-one.yaml +kubectl apply -f https://download.elastic.co/downloads/eck/0.9.0/all-in-one.yaml ---- . Monitor the operator logs: @@ -54,7 +54,7 @@ kind: Elasticsearch metadata: name: quickstart spec: - version: 7.1.0 + version: 7.2.0 nodes: - nodeCount: 1 config: @@ -79,7 +79,7 @@ kubectl get elasticsearch [source,sh] ---- NAME HEALTH NODES VERSION PHASE AGE -quickstart green 1 7.1.0 Operational 1m +quickstart green 1 7.2.0 Operational 1m ---- When you create the cluster, there is no `HEALTH` status and the `PHASE` is `Pending`. After a while, the `PHASE` turns into `Operational`, and `HEALTH` becomes `green`. @@ -126,7 +126,7 @@ A default user named `elastic` is automatically created. Its password is stored + [source,sh] ---- -PASSWORD=$(kubectl get secret quickstart-elastic-user -o=jsonpath='{.data.elastic}' | base64 --decode) +PASSWORD=$(kubectl get secret quickstart-es-elastic-user -o=jsonpath='{.data.elastic}' | base64 --decode) ---- . Request the Elasticsearch endpoint. @@ -161,7 +161,7 @@ NOTE: For testing purposes only, you can specify the `-k` option to turn off cer "cluster_name" : "quickstart", "cluster_uuid" : "XqWg0xIiRmmEBg4NMhnYPg", "version" : { - "number" : "7.1.0", + "number" : "7.2.0", "build_flavor" : "default", "build_type" : "docker", "build_hash" : "04116c9", @@ -191,7 +191,7 @@ kind: Kibana metadata: name: quickstart spec: - version: 7.1.0 + version: 7.2.0 nodeCount: 1 elasticsearchRef: name: quickstart @@ -230,13 +230,13 @@ Use `kubectl port-forward` to access Kibana from your local workstation: kubectl port-forward service/quickstart-kb-http 5601 ---- + -Open `http://localhost:5601` in your browser. +Open `https://localhost:5601` in your browser. Your browser will show a warning because the self-signed certificate configured by default is not verified by a third party certificate authority and not trusted by your browser. You can either configure a link:k8s-accessing-elastic-services.html#k8s-setting-up-your-own-certificate[valid certificate] or acknowledge the warning for the purposes of this quick start. + Login with the `elastic` user. Retrieve its password with: + [source,sh] ---- -echo $(kubectl get secret quickstart-elastic-user -o=jsonpath='{.data.elastic}' | base64 --decode) +echo $(kubectl get secret quickstart-es-elastic-user -o=jsonpath='{.data.elastic}' | base64 --decode) ---- [float] @@ -255,7 +255,7 @@ kind: Elasticsearch metadata: name: quickstart spec: - version: 7.1.0 + version: 7.2.0 nodes: - nodeCount: 3 config: @@ -267,11 +267,11 @@ EOF [float] [id="{p}-persistent-storage"] -=== Use persistent storage +=== Update persistent storage -Now that you have completed the quickstart, you can try out more features like using persistent storage. The cluster that you deployed in this quickstart uses a default persistent volume claim of 1GiB, without a storage class set. This means that the default storage class defined in the Kubernetes cluster is the one that will be provisioned. +Now that you have completed the quickstart, you can try out more features like tweaking persistent storage. The cluster that you deployed in this quickstart uses a default persistent volume claim of 1GiB, without a storage class set. This means that the default storage class defined in the Kubernetes cluster is the one that will be provisioned. -You can request a `PersistentVolumeClaim` in the cluster specification, to target any `PersistentVolume` class available in your Kubernetes cluster: +You can request a `PersistentVolumeClaim` with a larger size in the Elasticsearch specification or target any `PersistentVolume` class available in your Kubernetes cluster: [source,yaml] ---- @@ -281,7 +281,7 @@ kind: Elasticsearch metadata: name: quickstart spec: - version: 7.1.0 + version: 7.2.0 nodes: - nodeCount: 3 config: diff --git a/docs/managing-compute-resources.asciidoc b/docs/managing-compute-resources.asciidoc new file mode 100644 index 0000000000..1b337bae8f --- /dev/null +++ b/docs/managing-compute-resources.asciidoc @@ -0,0 +1,117 @@ +[id="{p}-managing-compute-resources"] +== Managing compute resources + +When a Pod is created it may request CPU and RAM resources. It may also specify the maximum resources that the containers are allowed to consume. Both Pod `limits` and `requests` can be set in the specification of any object managed by the operator (Elasticsearch, Kibana or the APM server). For more information about how this is used by Kubernetes please see https://kubernetes.io/docs/concepts/configuration/manage-compute-resources-container/[Managing Compute Resources for Containers]. + +[float] +[id="{p}-custom-resources"] +=== Set custom resources + +The `resources` can be customized in the `podTemplate` of an object. + +Here is an example for Elasticsearch: + +[source,yaml] +---- +spec: + nodes: + - podTemplate: + spec: + containers: + - name: elasticsearch + env: + - name: ES_JAVA_OPTS + value: -Xms2048M -Xmx2048M + resources: + requests: + memory: 2Gi + cpu: 1 + limits: + memory: 4Gi + cpu: 2 +---- + +This example also demonstrates how to set the JVM memory options accordingly using the `ES_JAVA_OPTS` environment variable. + +The same applies for every object managed by the operator, here is how to set some custom resources for Kibana: + +[source,yaml] +---- +spec: + podTemplate: + spec: + containers: + - name: kibana + resources: + requests: + memory: 1Gi + cpu: 1 + limits: + memory: 2Gi + cpu: 2 +---- + +And here is how to set custom resources on the APM server: + +[source,yaml] +---- +spec: + podTemplate: + spec: + containers: + - name: apm-server + resources: + requests: + memory: 1Gi + cpu: 1 + limits: + memory: 2Gi + cpu: 2 +---- + +[float] +[id="{p}-default-behavior"] +=== Default behavior + +If there's no `resources` set in the specification of an object then no `requests` or `limits` will be applied on the containers, with the notable exception of Elasticsearch. +It is important to understand that by default, if no memory requirement is set in the specification of Elasticsearch then the operator will apply a default memory request of 2Gi. The reason is that it is critical for Elasticsearch to have a minimum amount of memory to perform correctly. But this can be a problem if resources are https://kubernetes.io/docs/tasks/administer-cluster/manage-resources/memory-default-namespace/[managed with some LimitRanges at the namespace level] and if a minimum memory constraint is imposed. + +For example you may want to apply a default request of 3Gi and enforce it as a minimum with a constraint: + +[source,yaml] +---- +apiVersion: v1 +kind: LimitRange +metadata: + name: default-mem-per-container +spec: + limits: + - min: + memory: "3Gi" + defaultRequest: + memory: "3Gi" + type: Container +---- + +But if there is no `resources` declared in the specification then the Pod can't be created and the following event is generated: + +................................... +default 0s Warning Unexpected elasticsearch/elasticsearch-sample Cannot create pod elasticsearch-sample-es-ldbgj48c7r: pods "elasticsearch-sample-es-ldbgj48c7r" is forbidden: minimum memory usage per Container is 3Gi, but request is 2Gi +................................... + +In order to solve this situation you can specify an empty `limits` section in the specification: + +[source,yaml] +---- +spec: + nodes: + - podTemplate: + spec: + containers: + - name: elasticsearch + resources: + # specify empty limits + limits: {} +---- + +The default `requests` will not be set by the operator and the Pod will be created. \ No newline at end of file diff --git a/operators/Makefile b/operators/Makefile index fbb346c375..a7a9c1a1c2 100644 --- a/operators/Makefile +++ b/operators/Makefile @@ -17,7 +17,7 @@ GKE_CLUSTER_VERSION ?= 1.12 REPOSITORY ?= eck NAME ?= eck-operator -VERSION ?= 0.9.0-SNAPSHOT +VERSION ?= $(shell cat VERSION) SNAPSHOT ?= true LATEST_RELEASED_IMG ?= "docker.elastic.co/eck/$(NAME):0.8.0" @@ -241,10 +241,6 @@ bootstrap-gke: require-gcloud-project ifeq ($(PSP), 1) kubectl apply -f config/dev/elastic-psp.yaml endif -ifeq ($(SKIP_DOCKER_COMMAND), false) - # push "latest" operator image to be used for init containers when running the operator locally - $(MAKE) docker-build docker-push -endif delete-gke: require-gcloud-project GKE_CLUSTER_VERSION=$(GKE_CLUSTER_VERSION) ./hack/gke-cluster.sh delete diff --git a/operators/VERSION b/operators/VERSION new file mode 100644 index 0000000000..5ea35de7ad --- /dev/null +++ b/operators/VERSION @@ -0,0 +1 @@ +0.9.0-SNAPSHOT diff --git a/operators/config/crds/apm_v1alpha1_apmserver.yaml b/operators/config/crds/apm_v1alpha1_apmserver.yaml index aa93cc5473..9776fc7f58 100644 --- a/operators/config/crds/apm_v1alpha1_apmserver.yaml +++ b/operators/config/crds/apm_v1alpha1_apmserver.yaml @@ -50,6 +50,60 @@ spec: config: description: Config represents the APM configuration. type: object + elasticsearch: + description: Elasticsearch configures how the APM server connects to + Elasticsearch + properties: + auth: + description: Auth configures authentication for APM Server to use. + properties: + inline: + description: Inline is auth provided as plaintext inline credentials. + properties: + password: + description: Password is the password to use. + type: string + username: + description: User is the username to use. + type: string + required: + - username + - password + type: object + secret: + description: SecretKeyRef is a secret that contains the credentials + to use. + type: object + type: object + hosts: + description: Hosts are the URLs of the output Elasticsearch nodes. + items: + type: string + type: array + ssl: + description: SSL configures TLS-related configuration for Elasticsearch + properties: + certificateAuthorities: + description: CertificateAuthorities is a secret that contains + a `tls.crt` entry that contain certificates for server verifications. + properties: + secretName: + type: string + type: object + type: object + type: object + elasticsearchRef: + description: ElasticsearchRef references an Elasticsearch resource in + the Kubernetes cluster. If the namespace is not specified, the current + resource namespace will be used. + properties: + name: + type: string + namespace: + type: string + required: + - name + type: object featureFlags: description: FeatureFlags are apm-specific flags that enable or disable specific experimental features @@ -114,65 +168,6 @@ spec: must have. format: int32 type: integer - output: - properties: - elasticsearch: - description: Elasticsearch configures the Elasticsearch output - properties: - auth: - description: Auth configures authentication for APM Server to - use. - properties: - inline: - description: Inline is auth provided as plaintext inline - credentials. - properties: - password: - description: Password is the password to use. - type: string - username: - description: User is the username to use. - type: string - required: - - username - - password - type: object - secret: - description: SecretKeyRef is a secret that contains the - credentials to use. - type: object - type: object - hosts: - description: Hosts are the URLs of the output Elasticsearch - nodes. - items: - type: string - type: array - ref: - description: ElasticsearchRef allows users to reference a Elasticsearch - cluster inside k8s to automatically derive the other fields. - properties: - name: - type: string - namespace: - type: string - required: - - name - type: object - ssl: - description: SSL configures TLS-related configuration for Elasticsearch - properties: - certificateAuthorities: - description: CertificateAuthorities is a secret that contains - a `tls.crt` entry that contain certificates for server - verifications. - properties: - secretName: - type: string - type: object - type: object - type: object - type: object podTemplate: description: PodTemplate can be used to propagate configuration to APM Server pods. This allows specifying custom annotations, labels, environment diff --git a/operators/config/samples/apm/apm_es_kibana.yaml b/operators/config/samples/apm/apm_es_kibana.yaml index d0a12c0557..ca5c6f0bc4 100644 --- a/operators/config/samples/apm/apm_es_kibana.yaml +++ b/operators/config/samples/apm/apm_es_kibana.yaml @@ -5,7 +5,7 @@ kind: Elasticsearch metadata: name: elasticsearch-sample spec: - version: "7.1.0" + version: "7.2.0" nodes: - name: default nodeCount: 3 @@ -15,20 +15,17 @@ kind: ApmServer metadata: name: apm-server-sample spec: - version: "7.1.0" + version: "7.2.0" nodeCount: 1 - output: - elasticsearch: - ref: - name: elasticsearch-sample - namespace: default + elasticsearchRef: + name: "elasticsearch-sample" --- apiVersion: kibana.k8s.elastic.co/v1alpha1 kind: Kibana metadata: name: kibana-sample spec: - version: "7.1.0" + version: "7.2.0" nodeCount: 1 elasticsearchRef: name: "elasticsearch-sample" diff --git a/operators/config/samples/apm/apmserver.yaml b/operators/config/samples/apm/apmserver.yaml index e66e4a837c..8348ebd85e 100644 --- a/operators/config/samples/apm/apmserver.yaml +++ b/operators/config/samples/apm/apmserver.yaml @@ -3,5 +3,8 @@ kind: ApmServer metadata: name: apmserver-sample spec: - version: "7.1.0" + version: "7.2.0" nodeCount: 1 + config: + output.console: + pretty: true diff --git a/operators/hack/release_notes.go b/operators/hack/release_notes.go new file mode 100644 index 0000000000..f5ef7ad8f7 --- /dev/null +++ b/operators/hack/release_notes.go @@ -0,0 +1,266 @@ +// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +// or more contributor license agreements. Licensed under the Elastic License; +// you may not use this file except in compliance with the Elastic License. + +package main + +import ( + "encoding/json" + "errors" + "fmt" + "html/template" + "io" + "net/http" + "os" + "regexp" + "sort" + "strconv" + "strings" +) + +const ( + baseURL = "https://api.github.com/repos/" + repo = "elastic/cloud-on-k8s/" + releaseNoteTemplate = `:issue: https://github.com/{{.Repo}}issues/ +:pull: https://github.com/{{.Repo}}pull/ + +[[release-notes-{{.Version}}]] +== {n} version {{.Version}} +{{range $group, $prs := .Groups}} +[[{{- id $group -}}-{{$.Version}}]] +[float] +=== {{index $.GroupLabels $group}} +{{range $prs}} +* {{.Title}} {pull}{{.Number}}[#{{.Number}}]{{with .RelatedIssues -}} +{{$length := len .}} (issue{{if gt $length 1}}s{{end}}: {{range $idx, $el := .}}{{if $idx}}, {{end}}{issue}{{$el}}[#{{$el}}]{{end}}) +{{- end}} +{{- end}} +{{end}} +` +) + +var ( + groupLabels = map[string]string{ + ">breaking": "Breaking changes", + ">deprecation": "Deprecations", + ">feature": "New features", + ">enhancement": "Enhancements", + ">bug": "Bug fixes", + "nogroup": "Misc", + } + + ignore = map[string]bool{ + ">non-issue": true, + ">refactoring": true, + ">docs": true, + ">test": true, + ":ci": true, + "backport": true, + } +) + +// Label models a subset of a GitHub label. +type Label struct { + Name string `json:"name"` +} + +// Issue models a subset of a Github issue. +type Issue struct { + Labels []Label `json:"labels"` + Body string `json:"body"` + Title string `json:"title"` + Number int `json:"number"` + PullRequest map[string]string `json:"pull_request,omitempty"` + RelatedIssues []int +} + +type GroupedIssues = map[string][]Issue + +type TemplateParams struct { + Version string + Repo string + GroupLabels map[string]string + Groups GroupedIssues +} + +func fetch(url string, out interface{}) (string, error) { + resp, err := http.Get(url) + if err != nil { + return "", err + } + defer resp.Body.Close() + + nextLink := extractNextLink(resp.Header) + + if resp.StatusCode < 200 || resp.StatusCode >= 300 { + return "", errors.New(fmt.Sprintf("%s: %d %s ", url, resp.StatusCode, resp.Status)) + } + + if err = json.NewDecoder(resp.Body).Decode(&out); err != nil { + return "", err + } + return nextLink, nil +} + +func extractNextLink(headers http.Header) string { + var nextLink string + nextRe := regexp.MustCompile(`<([^>]+)>; rel="next"`) + links := headers["Link"] + for _, lnk := range links { + matches := nextRe.FindAllStringSubmatch(lnk, 1) + if matches != nil && matches[0][1] != "" { + nextLink = matches[0][1] + break + } + } + return nextLink +} + +func fetchVersionLabels() ([]string, error) { + var versionLabels []string + url := fmt.Sprintf("%s%slabels?page=1", baseURL, repo) +FETCH: + var labels []Label + next, err := fetch(url, &labels) + if err != nil { + return nil, err + } + for _, l := range labels { + if strings.HasPrefix(l.Name, "v") { + versionLabels = append(versionLabels, l.Name) + } + } + if next != "" { + url = next + goto FETCH + } + + return versionLabels, nil +} + +func fetchIssues(version string) (GroupedIssues, error) { + url := fmt.Sprintf("%s%sissues?labels=%s&pagesize=100&state=all&page=1", baseURL, repo, version) + var prs []Issue +FETCH: + var tranche []Issue + next, err := fetch(url, &tranche) + if err != nil { + return nil, err + } + for _, issue := range tranche { + // only look at PRs + if issue.PullRequest != nil { + prs = append(prs, issue) + } + } + if next != "" { + url = next + goto FETCH + } + result := make(GroupedIssues) + noGroup := "nogroup" +PR: + for _, pr := range prs { + prLabels := make(map[string]bool) + for _, lbl := range pr.Labels { + // remove PRs that have labels to be ignored + if ignore[lbl.Name] { + continue PR + } + // build a lookup table of all labels for this PR + prLabels[lbl.Name] = true + } + + // extract related issues from PR body + if err := extractRelatedIssues(&pr); err != nil { + return nil, err + } + + // group PRs by type label + for typeLabel := range groupLabels { + if prLabels[typeLabel] { + result[typeLabel] = append(result[typeLabel], pr) + continue PR + } + } + // or fall back to a default group + result[noGroup] = append(result[noGroup], pr) + } + return result, nil +} + +func extractRelatedIssues(issue *Issue) error { + re := regexp.MustCompile(fmt.Sprintf(`https://github.com/%sissues/(\d+)`, repo)) + matches := re.FindAllStringSubmatch(issue.Body, -1) + issues := map[int]struct{}{} + for _, capture := range matches { + issueNum, err := strconv.Atoi(capture[1]) + if err != nil { + return err + } + issues[issueNum] = struct{}{} + + } + for rel := range issues { + issue.RelatedIssues = append(issue.RelatedIssues, rel) + } + sort.Ints(issue.RelatedIssues) + return nil +} + +func dumpIssues(params TemplateParams, out io.Writer) { + funcs := template.FuncMap{ + "id": func(s string) string { + return strings.TrimPrefix(s, ">") + }, + } + tpl := template.Must(template.New("release_notes").Funcs(funcs).Parse(releaseNoteTemplate)) + err := tpl.Execute(out, params) + if err != nil { + println(err) + } +} + +func main() { + labels, err := fetchVersionLabels() + if err != nil { + panic(err) + } + + if len(os.Args) != 2 { + usage(labels) + } + + version := os.Args[1] + found := false + for _, l := range labels { + if l == version { + found = true + } + } + if !found { + usage(labels) + } + + groupedIssues, err := fetchIssues(version) + if err != nil { + panic(err) + } + dumpIssues(TemplateParams{ + Version: strings.TrimPrefix(version, "v"), + Repo: repo, + GroupLabels: groupLabels, + Groups: groupedIssues, + }, os.Stdout) + +} + +func usage(labels []string) { + println(fmt.Sprintf("USAGE: %s version > outfile", os.Args[0])) + println("Known versions:") + sort.Strings(labels) + for _, l := range labels { + println(l) + } + os.Exit(1) +} diff --git a/operators/hack/release_notes_test.go b/operators/hack/release_notes_test.go new file mode 100644 index 0000000000..e7bf002206 --- /dev/null +++ b/operators/hack/release_notes_test.go @@ -0,0 +1,263 @@ +// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +// or more contributor license agreements. Licensed under the Elastic License; +// you may not use this file except in compliance with the Elastic License. + +package main + +import ( + "bytes" + "net/http" + "reflect" + "testing" +) + +func Test_dumpIssues(t *testing.T) { + type args struct { + params TemplateParams + } + tests := []struct { + name string + args args + wantOut string + }{ + { + name: "two issues--no related", + args: args{ + params: TemplateParams{ + Version: "0.9.0", + Repo: "me/my-repo/", + GroupLabels: map[string]string{ + ">bugs": "Bug Fixes", + }, + Groups: GroupedIssues{ + ">bugs": []Issue{ + { + Labels: nil, + Body: "body", + Title: "title", + Number: 123, + PullRequest: nil, + RelatedIssues: nil, + }, + { + Labels: nil, + Body: "body2", + Title: "title2", + Number: 456, + PullRequest: nil, + RelatedIssues: nil, + }, + }, + }, + }, + }, + wantOut: `:issue: https://github.com/me/my-repo/issues/ +:pull: https://github.com/me/my-repo/pull/ + +[[release-notes-0.9.0]] +== {n} version 0.9.0 + +[[bugs-0.9.0]] +[float] +=== Bug Fixes + +* title {pull}123[#123] +* title2 {pull}456[#456] + +`, + }, + { + name: "single issue with related", + args: args{ + params: TemplateParams{ + Version: "0.9.0", + Repo: "me/my-repo/", + GroupLabels: map[string]string{ + ">bugs": "Bug Fixes", + }, + Groups: GroupedIssues{ + ">bugs": []Issue{ + { + Labels: nil, + Body: "body", + Title: "title", + Number: 123, + PullRequest: nil, + RelatedIssues: []int{456}, + }, + }, + }, + }, + }, + wantOut: `:issue: https://github.com/me/my-repo/issues/ +:pull: https://github.com/me/my-repo/pull/ + +[[release-notes-0.9.0]] +== {n} version 0.9.0 + +[[bugs-0.9.0]] +[float] +=== Bug Fixes + +* title {pull}123[#123] (issue: {issue}456[#456]) + +`, + }, + { + name: "single issue--two related", + args: args{ + params: TemplateParams{ + Version: "0.9.0", + Repo: "me/my-repo/", + GroupLabels: map[string]string{ + ">bugs": "Bug Fixes", + }, + Groups: GroupedIssues{ + ">bugs": []Issue{ + { + Labels: nil, + Body: "body", + Title: "title", + Number: 123, + PullRequest: nil, + RelatedIssues: []int{456, 789}, + }, + }, + }, + }, + }, + wantOut: `:issue: https://github.com/me/my-repo/issues/ +:pull: https://github.com/me/my-repo/pull/ + +[[release-notes-0.9.0]] +== {n} version 0.9.0 + +[[bugs-0.9.0]] +[float] +=== Bug Fixes + +* title {pull}123[#123] (issues: {issue}456[#456], {issue}789[#789]) + +`, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + out := &bytes.Buffer{} + dumpIssues(tt.args.params, out) + if gotOut := out.String(); gotOut != tt.wantOut { + t.Errorf("dumpIssues() = %v, want %v", gotOut, tt.wantOut) + } + }) + } +} + +func Test_extractRelatedIssues(t *testing.T) { + type args struct { + issue *Issue + } + tests := []struct { + name string + args args + want []int + wantErr bool + }{ + { + name: "single issue", + args: args{ + issue: &Issue{ + Body: "Resolves https://github.com/elastic/cloud-on-k8s/issues/1241\r\n\r\n* If there is no existing annotation on a resource", + }, + }, + want: []int{1241}, + wantErr: false, + }, + { + name: "multi issue", + args: args{ + issue: &Issue{ + Body: "Resolves https://github.com/elastic/cloud-on-k8s/issues/1241\r\n\r\nRelated https://github.com/elastic/cloud-on-k8s/issues/1245\r\n\r\n", + }, + }, + want: []int{1241, 1245}, + wantErr: false, + }, + { + name: "non issue", + args: args{ + issue: &Issue{ + Body: "Resolves https://github.com/elastic/cloud-on-k8s/issues/1241\r\n\r\nSee all issues https://github.com/elastic/cloud-on-k8s/issues/\r\n\r\n", + }, + }, + want: []int{1241}, + wantErr: false, + }, + { + name: "duplicate issue", + args: args{ + issue: &Issue{ + Body: "Resolves https://github.com/elastic/cloud-on-k8s/issues/1241\r\n\r\nRelated https://github.com/elastic/cloud-on-k8s/issues/1241\r\n\r\n", + }, + }, + want: []int{1241}, + wantErr: false, + }, + { + name: "ordered", + args: args{ + issue: &Issue{ + Body: "Resolves https://github.com/elastic/cloud-on-k8s/issues/1245\r\n\r\nRelated https://github.com/elastic/cloud-on-k8s/issues/1241\r\n\r\n", + }, + }, + want: []int{1241, 1245}, + wantErr: false, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + if err := extractRelatedIssues(tt.args.issue); (err != nil) != tt.wantErr { + t.Errorf("extractRelatedIssues() error = %v, wantErr %v", err, tt.wantErr) + } + if !reflect.DeepEqual(tt.want, tt.args.issue.RelatedIssues) { + t.Errorf("extractRelatedIssues() got = %v, want %v", tt.args.issue.RelatedIssues, tt.want) + } + }) + } +} + +func Test_extractNextLink(t *testing.T) { + type args struct { + headers http.Header + } + tests := []struct { + name string + args args + want string + }{ + { + name: "no link", + args: args{ + headers: http.Header{}, + }, + want: "", + }, + { + name: "with next link", + args: args{ + headers: http.Header{ + "Link": []string{ + `; rel="next", ; rel="last"`, + }, + }, + }, + want: "https://api.github.com/repositories/155368246/issues?page=2", + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + if got := extractNextLink(tt.args.headers); got != tt.want { + t.Errorf("extractNextLink() = %v, want %v", got, tt.want) + } + }) + } +} diff --git a/operators/pkg/apis/apm/v1alpha1/apmserver_types.go b/operators/pkg/apis/apm/v1alpha1/apmserver_types.go index f6be41f9cd..dd202fce2b 100644 --- a/operators/pkg/apis/apm/v1alpha1/apmserver_types.go +++ b/operators/pkg/apis/apm/v1alpha1/apmserver_types.go @@ -10,7 +10,10 @@ import ( metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" ) -const APMServerContainerName = "apm-server" +const ( + APMServerContainerName = "apm-server" + Kind = "ApmServer" +) // ApmServerSpec defines the desired state of ApmServer type ApmServerSpec struct { @@ -29,8 +32,13 @@ type ApmServerSpec struct { // HTTP contains settings for HTTP. HTTP commonv1alpha1.HTTPConfig `json:"http,omitempty"` + // ElasticsearchRef references an Elasticsearch resource in the Kubernetes cluster. + // If the namespace is not specified, the current resource namespace will be used. + ElasticsearchRef commonv1alpha1.ObjectSelector `json:"elasticsearchRef,omitempty"` + + // Elasticsearch configures how the APM server connects to Elasticsearch // +optional - Output Output `json:"output,omitempty"` + Elasticsearch ElasticsearchOutput `json:"elasticsearch,omitempty"` // PodTemplate can be used to propagate configuration to APM Server pods. // This allows specifying custom annotations, labels, environment variables, @@ -49,17 +57,8 @@ type ApmServerSpec struct { FeatureFlags commonv1alpha1.FeatureFlags `json:"featureFlags,omitempty"` } -// Output contains output configuration for supported outputs -type Output struct { - // Elasticsearch configures the Elasticsearch output - // +optional - Elasticsearch ElasticsearchOutput `json:"elasticsearch,omitempty"` -} - // Elasticsearch contains configuration for the Elasticsearch output type ElasticsearchOutput struct { - // ElasticsearchRef allows users to reference a Elasticsearch cluster inside k8s to automatically derive the other fields. - ElasticsearchRef *commonv1alpha1.ObjectSelector `json:"ref,omitempty"` // Hosts are the URLs of the output Elasticsearch nodes. Hosts []string `json:"hosts,omitempty"` @@ -148,9 +147,15 @@ func (as *ApmServer) IsMarkedForDeletion() bool { } func (as *ApmServer) ElasticsearchAuth() commonv1alpha1.ElasticsearchAuth { - return as.Spec.Output.Elasticsearch.Auth + return as.Spec.Elasticsearch.Auth } func (as *ApmServer) SecureSettings() *commonv1alpha1.SecretRef { return as.Spec.SecureSettings } + +// Kind can technically be retrieved from metav1.Object, but there is a bug preventing us to retrieve it +// see https://github.com/kubernetes-sigs/controller-runtime/issues/406 +func (as *ApmServer) Kind() string { + return Kind +} diff --git a/operators/pkg/apis/apm/v1alpha1/zz_generated.deepcopy.go b/operators/pkg/apis/apm/v1alpha1/zz_generated.deepcopy.go index d9a2ed652b..90db926ef0 100644 --- a/operators/pkg/apis/apm/v1alpha1/zz_generated.deepcopy.go +++ b/operators/pkg/apis/apm/v1alpha1/zz_generated.deepcopy.go @@ -82,7 +82,8 @@ func (in *ApmServerSpec) DeepCopyInto(out *ApmServerSpec) { *out = (*in).DeepCopy() } in.HTTP.DeepCopyInto(&out.HTTP) - in.Output.DeepCopyInto(&out.Output) + out.ElasticsearchRef = in.ElasticsearchRef + in.Elasticsearch.DeepCopyInto(&out.Elasticsearch) in.PodTemplate.DeepCopyInto(&out.PodTemplate) if in.SecureSettings != nil { in, out := &in.SecureSettings, &out.SecureSettings @@ -129,11 +130,6 @@ func (in *ApmServerStatus) DeepCopy() *ApmServerStatus { // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *ElasticsearchOutput) DeepCopyInto(out *ElasticsearchOutput) { *out = *in - if in.ElasticsearchRef != nil { - in, out := &in.ElasticsearchRef, &out.ElasticsearchRef - *out = new(commonv1alpha1.ObjectSelector) - **out = **in - } if in.Hosts != nil { in, out := &in.Hosts, &out.Hosts *out = make([]string, len(*in)) @@ -170,20 +166,3 @@ func (in *ElasticsearchOutputSSL) DeepCopy() *ElasticsearchOutputSSL { in.DeepCopyInto(out) return out } - -// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. -func (in *Output) DeepCopyInto(out *Output) { - *out = *in - in.Elasticsearch.DeepCopyInto(&out.Elasticsearch) - return -} - -// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new Output. -func (in *Output) DeepCopy() *Output { - if in == nil { - return nil - } - out := new(Output) - in.DeepCopyInto(out) - return out -} diff --git a/operators/pkg/apis/elasticsearch/v1alpha1/elasticsearch_types.go b/operators/pkg/apis/elasticsearch/v1alpha1/elasticsearch_types.go index 53e5f39a62..55877db6a5 100644 --- a/operators/pkg/apis/elasticsearch/v1alpha1/elasticsearch_types.go +++ b/operators/pkg/apis/elasticsearch/v1alpha1/elasticsearch_types.go @@ -10,7 +10,10 @@ import ( metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" ) -const ElasticsearchContainerName = "elasticsearch" +const ( + ElasticsearchContainerName = "elasticsearch" + Kind = "Elasticsearch" +) // ElasticsearchSpec defines the desired state of Elasticsearch type ElasticsearchSpec struct { @@ -260,6 +263,12 @@ func (e Elasticsearch) SecureSettings() *commonv1alpha1.SecretRef { return e.Spec.SecureSettings } +// Kind can technically be retrieved from metav1.Object, but there is a bug preventing us to retrieve it +// see https://github.com/kubernetes-sigs/controller-runtime/issues/406 +func (e Elasticsearch) Kind() string { + return Kind +} + // +k8s:deepcopy-gen:interfaces=k8s.io/apimachinery/pkg/runtime.Object // ElasticsearchList contains a list of Elasticsearch clusters diff --git a/operators/pkg/apis/kibana/v1alpha1/kibana_types.go b/operators/pkg/apis/kibana/v1alpha1/kibana_types.go index 8acfe3e511..8b926cdfd9 100644 --- a/operators/pkg/apis/kibana/v1alpha1/kibana_types.go +++ b/operators/pkg/apis/kibana/v1alpha1/kibana_types.go @@ -11,7 +11,10 @@ import ( commonv1alpha1 "github.com/elastic/cloud-on-k8s/operators/pkg/apis/common/v1alpha1" ) -const KibanaContainerName = "kibana" +const ( + KibanaContainerName = "kibana" + Kind = "Kibana" +) // KibanaSpec defines the desired state of Kibana type KibanaSpec struct { @@ -107,6 +110,12 @@ func (k *Kibana) SecureSettings() *commonv1alpha1.SecretRef { return k.Spec.SecureSettings } +// Kind can technically be retrieved from metav1.Object, but there is a bug preventing us to retrieve it +// see https://github.com/kubernetes-sigs/controller-runtime/issues/406 +func (k *Kibana) Kind() string { + return Kind +} + // +genclient // +k8s:deepcopy-gen:interfaces=k8s.io/apimachinery/pkg/runtime.Object diff --git a/operators/pkg/controller/apmserver/apmserver_controller.go b/operators/pkg/controller/apmserver/apmserver_controller.go index 5f3797a8fd..e970bccb3e 100644 --- a/operators/pkg/controller/apmserver/apmserver_controller.go +++ b/operators/pkg/controller/apmserver/apmserver_controller.go @@ -35,6 +35,7 @@ import ( corev1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + k8slabels "k8s.io/apimachinery/pkg/labels" "k8s.io/apimachinery/pkg/runtime" "k8s.io/apimachinery/pkg/types" "k8s.io/apimachinery/pkg/util/rand" @@ -196,6 +197,16 @@ func (r *ReconcileApmServer) Reconcile(request reconcile.Request) (reconcile.Res return reconcile.Result{}, nil } + selector := k8slabels.Set(map[string]string{labels.ApmServerNameLabelName: as.Name}).AsSelector() + compat, err := annotation.ReconcileCompatibility(r.Client, as, selector, r.OperatorInfo.BuildInfo.Version) + if err != nil { + return reconcile.Result{}, err + } + if !compat { + // this resource is not able to be reconciled by this version of the controller, so we will skip it and not requeue + return reconcile.Result{}, nil + } + err = annotation.UpdateControllerVersion(r.Client, as, r.OperatorInfo.BuildInfo.Version) if err != nil { return reconcile.Result{}, err @@ -296,7 +307,7 @@ func (r *ReconcileApmServer) deploymentParams( _, _ = configChecksum.Write([]byte(params.keystoreResources.Version)) } - esCASecretName := as.Spec.Output.Elasticsearch.SSL.CertificateAuthorities.SecretName + esCASecretName := as.Spec.Elasticsearch.SSL.CertificateAuthorities.SecretName if esCASecretName != "" { // TODO: use apmServerCa to generate cert for deployment @@ -439,6 +450,6 @@ func (r *ReconcileApmServer) updateStatus(state State) (reconcile.Result, error) // finalizersFor returns the list of finalizers applying to a given APM deployment func (r *ReconcileApmServer) finalizersFor(as apmv1alpha1.ApmServer) []finalizer.Finalizer { return []finalizer.Finalizer{ - keystore.Finalizer(k8s.ExtractNamespacedName(&as), r.dynamicWatches, "apmserver"), + keystore.Finalizer(k8s.ExtractNamespacedName(&as), r.dynamicWatches, as.Kind()), } } diff --git a/operators/pkg/controller/apmserver/config/config.go b/operators/pkg/controller/apmserver/config/config.go index b17f9c5096..8921aa9466 100644 --- a/operators/pkg/controller/apmserver/config/config.go +++ b/operators/pkg/controller/apmserver/config/config.go @@ -45,7 +45,7 @@ func NewConfigFromSpec(c k8s.Client, as v1alpha1.ApmServer) (*settings.Canonical } outputCfg := settings.NewCanonicalConfig() - if as.Spec.Output.Elasticsearch.IsConfigured() { + if as.Spec.Elasticsearch.IsConfigured() { // Get username and password username, password, err := association.ElasticsearchAuthSettings(c, &as) if err != nil { @@ -53,7 +53,7 @@ func NewConfigFromSpec(c k8s.Client, as v1alpha1.ApmServer) (*settings.Canonical } outputCfg = settings.MustCanonicalConfig( map[string]interface{}{ - "output.elasticsearch.hosts": as.Spec.Output.Elasticsearch.Hosts, + "output.elasticsearch.hosts": as.Spec.Elasticsearch.Hosts, "output.elasticsearch.username": username, "output.elasticsearch.password": password, "output.elasticsearch.ssl.certificate_authorities": []string{filepath.Join(CertificatesDir, certificates.CertFileName)}, diff --git a/operators/pkg/controller/apmserver/pod.go b/operators/pkg/controller/apmserver/pod.go index b40f94c0a4..7f483fb904 100644 --- a/operators/pkg/controller/apmserver/pod.go +++ b/operators/pkg/controller/apmserver/pod.go @@ -117,7 +117,7 @@ func newPodSpec(as *v1alpha1.ApmServer, p PodSpecParams) corev1.PodTemplateSpec if p.keystoreResources != nil { dataVolume := keystore.DataVolume( - strings.ToLower(as.Kind), + strings.ToLower(as.Kind()), DataVolumePath, ) builder.WithInitContainers(p.keystoreResources.InitContainer). diff --git a/operators/pkg/controller/apmserverelasticsearchassociation/apmserverelasticsearchassociation_controller.go b/operators/pkg/controller/apmserverelasticsearchassociation/apmserverelasticsearchassociation_controller.go index 9f6e0a3238..ed249dd1af 100644 --- a/operators/pkg/controller/apmserverelasticsearchassociation/apmserverelasticsearchassociation_controller.go +++ b/operators/pkg/controller/apmserverelasticsearchassociation/apmserverelasticsearchassociation_controller.go @@ -12,7 +12,9 @@ import ( apmtype "github.com/elastic/cloud-on-k8s/operators/pkg/apis/apm/v1alpha1" commonv1alpha1 "github.com/elastic/cloud-on-k8s/operators/pkg/apis/common/v1alpha1" estype "github.com/elastic/cloud-on-k8s/operators/pkg/apis/elasticsearch/v1alpha1" + "github.com/elastic/cloud-on-k8s/operators/pkg/controller/apmserver/labels" "github.com/elastic/cloud-on-k8s/operators/pkg/controller/common" + "github.com/elastic/cloud-on-k8s/operators/pkg/controller/common/annotation" "github.com/elastic/cloud-on-k8s/operators/pkg/controller/common/certificates/http" "github.com/elastic/cloud-on-k8s/operators/pkg/controller/common/finalizer" "github.com/elastic/cloud-on-k8s/operators/pkg/controller/common/operator" @@ -24,6 +26,7 @@ import ( corev1 "k8s.io/api/core/v1" apierrors "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + k8slabels "k8s.io/apimachinery/pkg/labels" "k8s.io/apimachinery/pkg/runtime" "k8s.io/apimachinery/pkg/types" "k8s.io/client-go/tools/record" @@ -45,8 +48,8 @@ var ( // Add creates a new ApmServerElasticsearchAssociation Controller and adds it to the Manager with default RBAC. The Manager will set fields on the Controller // and Start it when the Manager is Started. -func Add(mgr manager.Manager, _ operator.Parameters) error { - r := newReconciler(mgr) +func Add(mgr manager.Manager, params operator.Parameters) error { + r := newReconciler(mgr, params) c, err := add(mgr, r) if err != nil { return err @@ -55,13 +58,14 @@ func Add(mgr manager.Manager, _ operator.Parameters) error { } // newReconciler returns a new reconcile.Reconciler -func newReconciler(mgr manager.Manager) *ReconcileApmServerElasticsearchAssociation { +func newReconciler(mgr manager.Manager, params operator.Parameters) *ReconcileApmServerElasticsearchAssociation { client := k8s.WrapClient(mgr.GetClient()) return &ReconcileApmServerElasticsearchAssociation{ - Client: client, - scheme: mgr.GetScheme(), - watches: watches.NewDynamicWatches(), - recorder: mgr.GetRecorder(name), + Client: client, + scheme: mgr.GetScheme(), + watches: watches.NewDynamicWatches(), + recorder: mgr.GetRecorder(name), + Parameters: params, } } @@ -97,7 +101,7 @@ type ReconcileApmServerElasticsearchAssociation struct { scheme *runtime.Scheme recorder record.EventRecorder watches watches.DynamicWatches - + operator.Parameters // iteration is the number of times this controller has run its Reconcile method iteration int64 } @@ -144,6 +148,21 @@ func (r *ReconcileApmServerElasticsearchAssociation) Reconcile(request reconcile return reconcile.Result{}, nil } + selector := k8slabels.Set(map[string]string{labels.ApmServerNameLabelName: apmServer.Name}).AsSelector() + compat, err := annotation.ReconcileCompatibility(r.Client, &apmServer, selector, r.OperatorInfo.BuildInfo.Version) + if err != nil { + return reconcile.Result{}, err + } + if !compat { + // this resource is not able to be reconciled by this version of the controller, so we will skip it and not requeue + return reconcile.Result{}, nil + } + + err = annotation.UpdateControllerVersion(r.Client, &apmServer, r.OperatorInfo.BuildInfo.Version) + if err != nil { + return reconcile.Result{}, err + } + newStatus, err := r.reconcileInternal(apmServer) // maybe update status origStatus := apmServer.Status.DeepCopy() @@ -185,13 +204,16 @@ func resultFromStatus(status commonv1alpha1.AssociationStatus) reconcile.Result } func (r *ReconcileApmServerElasticsearchAssociation) reconcileInternal(apmServer apmtype.ApmServer) (commonv1alpha1.AssociationStatus, error) { - assocKey := k8s.ExtractNamespacedName(&apmServer) // no auto-association nothing to do - elasticsearchRef := apmServer.Spec.Output.Elasticsearch.ElasticsearchRef - if elasticsearchRef == nil { + elasticsearchRef := apmServer.Spec.ElasticsearchRef + if !elasticsearchRef.IsDefined() { return "", nil } - + if elasticsearchRef.Namespace == "" { + // no namespace provided: default to the APM server namespace + elasticsearchRef.Namespace = apmServer.Namespace + } + assocKey := k8s.ExtractNamespacedName(&apmServer) // Make sure we see events from Elasticsearch using a dynamic watch // will become more relevant once we refactor user handling to CRDs and implement // syncing of user credentials across namespaces @@ -221,8 +243,6 @@ func (r *ReconcileApmServerElasticsearchAssociation) reconcileInternal(apmServer } var expectedEsConfig apmtype.ElasticsearchOutput - expectedEsConfig.ElasticsearchRef = apmServer.Spec.Output.Elasticsearch.ElasticsearchRef - // TODO: look up public certs secret name from the ES cluster resource instead of relying on naming convention var publicCertsSecret corev1.Secret publicCertsSecretKey := http.PublicCertsSecretRef( @@ -238,8 +258,8 @@ func (r *ReconcileApmServerElasticsearchAssociation) reconcileInternal(apmServer expectedEsConfig.Auth.SecretKeyRef = clearTextSecretKeySelector(apmServer) // TODO: this is a bit rough - if !reflect.DeepEqual(apmServer.Spec.Output.Elasticsearch, expectedEsConfig) { - apmServer.Spec.Output.Elasticsearch = expectedEsConfig + if !reflect.DeepEqual(apmServer.Spec.Elasticsearch, expectedEsConfig) { + apmServer.Spec.Elasticsearch = expectedEsConfig log.Info("Updating Apm Server spec with Elasticsearch output configuration", "namespace", apmServer.Namespace, "as_name", apmServer.Name) if err := r.Update(&apmServer); err != nil { return commonv1alpha1.AssociationPending, err @@ -266,7 +286,7 @@ func deleteOrphanedResources(c k8s.Client, apm apmtype.ApmServer) error { for _, s := range secrets.Items { controlledBy := metav1.IsControlledBy(&s, &apm) - if controlledBy && !apm.Spec.Output.Elasticsearch.ElasticsearchRef.IsDefined() { + if controlledBy && !apm.Spec.ElasticsearchRef.IsDefined() { log.Info("Deleting secret", "namespace", s.Namespace, "secret_name", s.Name, "as_name", apm.Name) if err := c.Delete(&s); err != nil { return err diff --git a/operators/pkg/controller/apmserverelasticsearchassociation/apmserverelasticsearchassociation_controller_test.go b/operators/pkg/controller/apmserverelasticsearchassociation/apmserverelasticsearchassociation_controller_test.go index d54e11beef..b3d0a330d6 100644 --- a/operators/pkg/controller/apmserverelasticsearchassociation/apmserverelasticsearchassociation_controller_test.go +++ b/operators/pkg/controller/apmserverelasticsearchassociation/apmserverelasticsearchassociation_controller_test.go @@ -77,13 +77,7 @@ func Test_deleteOrphanedResources(t *testing.T) { Name: "as", Namespace: "default", }, - Spec: apmtype.ApmServerSpec{ - Output: apmtype.Output{ - Elasticsearch: apmtype.ElasticsearchOutput{ - ElasticsearchRef: nil, - }, - }, - }, + Spec: apmtype.ApmServerSpec{}, }, initialObjects: []runtime.Object{ &corev1.Secret{ diff --git a/operators/pkg/controller/apmserverelasticsearchassociation/user.go b/operators/pkg/controller/apmserverelasticsearchassociation/user.go index e349c2bbe6..fdcba2fe13 100644 --- a/operators/pkg/controller/apmserverelasticsearchassociation/user.go +++ b/operators/pkg/controller/apmserverelasticsearchassociation/user.go @@ -34,13 +34,18 @@ func apmUserObjectName(assocName string) string { // userKey is the namespaced name to identify the customer user resource created by the controller. func userKey(apm apmtype.ApmServer) *types.NamespacedName { - - ref := apm.Spec.Output.Elasticsearch.ElasticsearchRef - if ref == nil { + esRef := apm.Spec.ElasticsearchRef + if !esRef.IsDefined() { return nil } + + esNamespace := esRef.Namespace + if esNamespace == "" { + // no namespace given, default to APM's one + esNamespace = apm.Namespace + } return &types.NamespacedName{ - Namespace: ref.Namespace, + Namespace: esNamespace, Name: userName(apm), } } @@ -76,7 +81,7 @@ func reconcileEsUser(c k8s.Client, s *runtime.Scheme, apm apmtype.ApmServer, es secretLabels := labels.NewLabels(apm.Name) secretLabels[AssociationLabelName] = apm.Name // add ES labels - for k, v := range label.NewLabels(apm.Spec.Output.Elasticsearch.ElasticsearchRef.NamespacedName()) { + for k, v := range label.NewLabels(apm.Spec.ElasticsearchRef.NamespacedName()) { secretLabels[k] = v } secKey := secretKey(apm) @@ -120,7 +125,7 @@ func reconcileEsUser(c k8s.Client, s *runtime.Scheme, apm apmtype.ApmServer, es } // analogous to the secret: the user goes on the Elasticsearch side of the association, we apply the ES labels for visibility - userLabels := common.NewLabels(apm.Spec.Output.Elasticsearch.ElasticsearchRef.NamespacedName()) + userLabels := common.NewLabels(apm.Spec.ElasticsearchRef.NamespacedName()) userLabels[AssociationLabelName] = apm.Name userLabels[AssociationLabelNamespace] = apm.Namespace expectedEsUser := &corev1.Secret{ diff --git a/operators/pkg/controller/apmserverelasticsearchassociation/user_test.go b/operators/pkg/controller/apmserverelasticsearchassociation/user_test.go index 842c8870e7..1704ec408d 100644 --- a/operators/pkg/controller/apmserverelasticsearchassociation/user_test.go +++ b/operators/pkg/controller/apmserverelasticsearchassociation/user_test.go @@ -38,14 +38,11 @@ var apmFixture = apmtype.ApmServer{ Namespace: "default", }, Spec: apmtype.ApmServerSpec{ - Output: apmtype.Output{ - Elasticsearch: apmtype.ElasticsearchOutput{ - ElasticsearchRef: &commonv1alpha1.ObjectSelector{ - Name: "es", - Namespace: "default", - }, - }, + ElasticsearchRef: commonv1alpha1.ObjectSelector{ + Name: "es", + Namespace: "default", }, + Elasticsearch: apmtype.ElasticsearchOutput{}, }, } @@ -186,14 +183,11 @@ func Test_reconcileEsUser(t *testing.T) { Namespace: "ns-2", }, Spec: apmtype.ApmServerSpec{ - Output: apmtype.Output{ - Elasticsearch: apmtype.ElasticsearchOutput{ - ElasticsearchRef: &commonv1alpha1.ObjectSelector{ - Name: "es", - Namespace: "ns-1", - }, - }, + ElasticsearchRef: commonv1alpha1.ObjectSelector{ + Name: "es", + Namespace: "ns-1", }, + Elasticsearch: apmtype.ElasticsearchOutput{}, }, }, }, diff --git a/operators/pkg/controller/common/annotation/controller_version.go b/operators/pkg/controller/common/annotation/controller_version.go index 33f1c5dfb4..37e1677f51 100644 --- a/operators/pkg/controller/common/annotation/controller_version.go +++ b/operators/pkg/controller/common/annotation/controller_version.go @@ -5,9 +5,14 @@ package annotation import ( + "github.com/elastic/cloud-on-k8s/operators/pkg/controller/common/version" "github.com/elastic/cloud-on-k8s/operators/pkg/utils/k8s" + "github.com/pkg/errors" + corev1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/api/meta" + "k8s.io/apimachinery/pkg/labels" "k8s.io/apimachinery/pkg/runtime" + ctrlclient "sigs.k8s.io/controller-runtime/pkg/client" ) // ControllerVersionAnnotation is the annotation name that indicates the last controller version to update a resource @@ -50,3 +55,93 @@ func UpdateControllerVersion(client k8s.Client, obj runtime.Object, version stri log.V(1).Info("updating controller version annotation", "namespace", namespace, "name", name, "kind", obj.GetObjectKind()) return client.Update(obj) } + +// ReconcileCompatibility determines if this controller is compatible with a given resource by examining the controller version annotation +// controller versions 0.9.0+ cannot reconcile resources created with earlier controllers, so this lets our controller skip those resources until they can be manually recreated +// if an object does not have an annotation, it will determine if it is a new object or if it has been previously reconciled by an older controller version, as this annotation +// was not applied by earlier controller versions. it will update the object's annotations indicating it is incompatible if so +func ReconcileCompatibility(client k8s.Client, obj runtime.Object, selector labels.Selector, controllerVersion string) (bool, error) { + accessor := meta.NewAccessor() + namespace, err := accessor.Namespace(obj) + if err != nil { + log.Error(err, "error getting namespace", "kind", obj.GetObjectKind().GroupVersionKind().Kind) + return false, err + } + name, err := accessor.Name(obj) + if err != nil { + log.Error(err, "error getting name", "namespace", namespace, "kind", obj.GetObjectKind().GroupVersionKind().Kind) + return false, err + } + annotations, err := accessor.Annotations(obj) + if err != nil { + log.Error(err, "error getting annotations", "namespace", namespace, "name", name, "kind", obj.GetObjectKind().GroupVersionKind().Kind) + return false, err + } + + annExists := annotations != nil && annotations[ControllerVersionAnnotation] != "" + + // if the annotation does not exist, it might indicate it was reconciled by an older controller version that did not add the version annotation, + // in which case it is incompatible with the current controller, or it is a brand new resource that has not been reconciled by any controller yet + if !annExists { + exist, err := checkExistingResources(client, obj, selector) + if err != nil { + return false, err + } + if exist { + log.Info("Resource was previously reconciled by incompatible controller version and missing annotation, adding annotation", "controller_version", controllerVersion, "namespace", namespace, "name", name, "kind", obj.GetObjectKind().GroupVersionKind().Kind) + err = UpdateControllerVersion(client, obj, "0.8.0-UNKNOWN") + return false, err + } + // no annotation exists and there are no existing resources, so this has not previously been reconciled + err = UpdateControllerVersion(client, obj, controllerVersion) + return true, err + } + + currentVersion, err := version.Parse(annotations[ControllerVersionAnnotation]) + if err != nil { + return false, errors.Wrap(err, "Error parsing current version on resource") + } + minVersion, err := version.Parse("0.9.0-ALPHA") + if err != nil { + return false, errors.Wrap(err, "Error parsing minimum compatible version") + } + ctrlVersion, err := version.Parse(controllerVersion) + if err != nil { + return false, errors.Wrap(err, "Error parsing controller version") + } + + // if the current version is gte the minimum version then they are compatible + if currentVersion.IsSameOrAfter(*minVersion) { + log.V(1).Info("Current controller version on resource is compatible with running controller version", "controller_version", ctrlVersion, + "resource_controller_version", currentVersion, "namespace", namespace, "name", name) + return true, nil + } + + log.Info("Resource was created with older version of operator, will not take action", "controller_version", ctrlVersion, + "resource_controller_version", currentVersion, "namespace", namespace, "name", name) + return false, nil +} + +// checkExistingResources returns a bool indicating if there are existing resources created for a given resource +func checkExistingResources(client k8s.Client, obj runtime.Object, selector labels.Selector) (bool, error) { + + accessor := meta.NewAccessor() + namespace, err := accessor.Namespace(obj) + if err != nil { + log.Error(err, "error getting namespace", "kind", obj.GetObjectKind().GroupVersionKind().Kind) + return false, err + } + // if there's no controller version annotation on the object, then we need to see maybe the object has been reconciled by an older, incompatible controller version + opts := ctrlclient.ListOptions{ + LabelSelector: selector, + Namespace: namespace, + } + var svcs corev1.ServiceList + err = client.List(&opts, &svcs) + if err != nil { + return false, err + } + // if we listed any services successfully, then we know this cluster was reconciled by an old version since any objects reconciled by a 0.9.0+ operator would have a label + return len(svcs.Items) != 0, nil + +} diff --git a/operators/pkg/controller/common/annotation/controller_version_test.go b/operators/pkg/controller/common/annotation/controller_version_test.go index d844e25a2a..23598a175a 100644 --- a/operators/pkg/controller/common/annotation/controller_version_test.go +++ b/operators/pkg/controller/common/annotation/controller_version_test.go @@ -8,17 +8,21 @@ import ( "testing" kibanav1alpha1 "github.com/elastic/cloud-on-k8s/operators/pkg/apis/kibana/v1alpha1" + "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/label" "github.com/elastic/cloud-on-k8s/operators/pkg/utils/k8s" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" + corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/labels" "k8s.io/apimachinery/pkg/runtime" "k8s.io/apimachinery/pkg/types" "sigs.k8s.io/controller-runtime/pkg/client/fake" apmtype "github.com/elastic/cloud-on-k8s/operators/pkg/apis/apm/v1alpha1" assoctype "github.com/elastic/cloud-on-k8s/operators/pkg/apis/associations/v1alpha1" + "github.com/elastic/cloud-on-k8s/operators/pkg/apis/elasticsearch/v1alpha1" estype "github.com/elastic/cloud-on-k8s/operators/pkg/apis/elasticsearch/v1alpha1" "k8s.io/client-go/kubernetes/scheme" ) @@ -66,21 +70,145 @@ func TestAnnotationCreated(t *testing.T) { assert.Equal(t, actualKibana.GetAnnotations()[ControllerVersionAnnotation], "newversion") } -// setupScheme creates a scheme to use for our fake clients so they know about our custom resources -// TODO move this into one of the upper level common packages and make public, refactor out this code that's in a lot of our tests -func setupScheme(t *testing.T) *runtime.Scheme { - sc := scheme.Scheme - if err := assoctype.SchemeBuilder.AddToScheme(sc); err != nil { - assert.Fail(t, "failed to add Association types") +// TestMissingAnnotationOldVersion tests that we skip reconciling an object missing annotations that has already been reconciled by +// a previous operator version, and add an annotation indicating an old controller version +func TestMissingAnnotationOldVersion(t *testing.T) { + + es := &v1alpha1.Elasticsearch{ + ObjectMeta: metav1.ObjectMeta{ + Namespace: "ns", + Name: "es", + }, } - if err := apmtype.SchemeBuilder.AddToScheme(sc); err != nil { - assert.Fail(t, "failed to add APM types") + svc := &corev1.Service{ + ObjectMeta: metav1.ObjectMeta{ + Namespace: "ns", + Name: "svc", + Labels: map[string]string{ + label.ClusterNameLabelName: "es", + }, + }, } - if err := estype.SchemeBuilder.AddToScheme(sc); err != nil { - assert.Fail(t, "failed to add ES types") + sc := setupScheme(t) + client := k8s.WrapClient(fake.NewFakeClientWithScheme(sc, es, svc)) + selector := getElasticsearchSelector(es) + compat, err := ReconcileCompatibility(client, es, selector, "0.9.0-SNAPSHOT") + require.NoError(t, err) + assert.False(t, compat) + + // check old version annotation was added + require.NotNil(t, es.Annotations) + assert.Equal(t, "0.8.0-UNKNOWN", es.Annotations[ControllerVersionAnnotation]) +} + +// TestMissingAnnotationNewObject tests that we add an annotation for new objects +func TestMissingAnnotationNewObject(t *testing.T) { + es := &v1alpha1.Elasticsearch{ + ObjectMeta: metav1.ObjectMeta{ + Namespace: "ns", + Name: "es", + }, } - if err := kibanav1alpha1.SchemeBuilder.AddToScheme(sc); err != nil { - assert.Fail(t, "failed to add Kibana types") + // TODO this is currently broken due to an upstream bug in the fake client. when we upgrade controller runtime + // to a version that contains this PR we can uncomment this and add the service to the client + + // add existing svc that is not part of cluster to make sure we have label selectors correct + // https://github.com/kubernetes-sigs/controller-runtime/pull/311 + // svc := &corev1.Service{ + // ObjectMeta: metav1.ObjectMeta{ + // Namespace: "ns", + // Name: "svc", + // Labels: map[string]string{ + // label.ClusterNameLabelName: "literallyanything", + // }, + // }, + // } + + sc := setupScheme(t) + // client := k8s.WrapClient(fake.NewFakeClientWithScheme(sc, es, svc)) + client := k8s.WrapClient(fake.NewFakeClientWithScheme(sc, es)) + selector := getElasticsearchSelector(es) + compat, err := ReconcileCompatibility(client, es, selector, "0.9.0-SNAPSHOT") + require.NoError(t, err) + assert.True(t, compat) + + // check version annotation was added + require.NotNil(t, es.Annotations) + assert.Equal(t, "0.9.0-SNAPSHOT", es.Annotations[ControllerVersionAnnotation]) +} + +// +func TestSameAnnotation(t *testing.T) { + es := &v1alpha1.Elasticsearch{ + ObjectMeta: metav1.ObjectMeta{ + Namespace: "ns", + Name: "es", + Annotations: map[string]string{ + ControllerVersionAnnotation: "0.9.0-SNAPSHOT", + }, + }, } + sc := setupScheme(t) + client := k8s.WrapClient(fake.NewFakeClientWithScheme(sc, es)) + selector := getElasticsearchSelector(es) + compat, err := ReconcileCompatibility(client, es, selector, "0.9.0-SNAPSHOT") + require.NoError(t, err) + assert.True(t, compat) + assert.Equal(t, "0.9.0-SNAPSHOT", es.Annotations[ControllerVersionAnnotation]) +} + +func TestIncompatibleAnnotation(t *testing.T) { + es := &v1alpha1.Elasticsearch{ + ObjectMeta: metav1.ObjectMeta{ + Namespace: "ns", + Name: "es", + Annotations: map[string]string{ + ControllerVersionAnnotation: "0.8.0-FOOBAR", + }, + }, + } + sc := setupScheme(t) + client := k8s.WrapClient(fake.NewFakeClientWithScheme(sc, es)) + selector := getElasticsearchSelector(es) + compat, err := ReconcileCompatibility(client, es, selector, "0.9.0-SNAPSHOT") + require.NoError(t, err) + assert.False(t, compat) + // check we did not update the annotation + assert.Equal(t, "0.8.0-FOOBAR", es.Annotations[ControllerVersionAnnotation]) +} + +func TestNewerAnnotation(t *testing.T) { + es := &v1alpha1.Elasticsearch{ + ObjectMeta: metav1.ObjectMeta{ + Namespace: "ns", + Name: "es", + Annotations: map[string]string{ + ControllerVersionAnnotation: "2.0.0", + }, + }, + } + sc := setupScheme(t) + client := k8s.WrapClient(fake.NewFakeClientWithScheme(sc, es)) + selector := getElasticsearchSelector(es) + compat, err := ReconcileCompatibility(client, es, selector, "0.9.0-SNAPSHOT") + assert.NoError(t, err) + assert.True(t, compat) +} + +// setupScheme creates a scheme to use for our fake clients so they know about our custom resources +func setupScheme(t *testing.T) *runtime.Scheme { + sc := scheme.Scheme + err := assoctype.SchemeBuilder.AddToScheme(sc) + require.NoError(t, err) + err = apmtype.SchemeBuilder.AddToScheme(sc) + require.NoError(t, err) + err = estype.SchemeBuilder.AddToScheme(sc) + require.NoError(t, err) + err = kibanav1alpha1.SchemeBuilder.AddToScheme(sc) + require.NoError(t, err) return sc } + +func getElasticsearchSelector(es *v1alpha1.Elasticsearch) labels.Selector { + return labels.Set(map[string]string{label.ClusterNameLabelName: es.Name}).AsSelector() +} diff --git a/operators/pkg/controller/common/association/association_test.go b/operators/pkg/controller/common/association/association_test.go index 82d34803fb..98302c6b57 100644 --- a/operators/pkg/controller/common/association/association_test.go +++ b/operators/pkg/controller/common/association/association_test.go @@ -48,15 +48,13 @@ func Test_getCredentials(t *testing.T) { Namespace: "default", }, Spec: v1alpha1.ApmServerSpec{ - Output: v1alpha1.Output{ - Elasticsearch: v1alpha1.ElasticsearchOutput{ - Hosts: []string{"https://elasticsearch-sample-es-http.default.svc:9200"}, - Auth: commonv1alpha1.ElasticsearchAuth{ - SecretKeyRef: &corev1.SecretKeySelector{ - Key: "elastic-internal-apm", - LocalObjectReference: corev1.LocalObjectReference{ - Name: "apmelasticsearchassociation-sample-elastic-internal-apm", - }, + Elasticsearch: v1alpha1.ElasticsearchOutput{ + Hosts: []string{"https://elasticsearch-sample-es-http.default.svc:9200"}, + Auth: commonv1alpha1.ElasticsearchAuth{ + SecretKeyRef: &corev1.SecretKeySelector{ + Key: "elastic-internal-apm", + LocalObjectReference: corev1.LocalObjectReference{ + Name: "apmelasticsearchassociation-sample-elastic-internal-apm", }, }, }, @@ -83,12 +81,10 @@ func Test_getCredentials(t *testing.T) { Namespace: "default", }, Spec: v1alpha1.ApmServerSpec{ - Output: v1alpha1.Output{ - Elasticsearch: v1alpha1.ElasticsearchOutput{ - Hosts: []string{"https://elasticsearch-sample-es-http.default.svc:9200"}, - Auth: commonv1alpha1.ElasticsearchAuth{ - Inline: &elasticsearhInlineAuth, - }, + Elasticsearch: v1alpha1.ElasticsearchOutput{ + Hosts: []string{"https://elasticsearch-sample-es-http.default.svc:9200"}, + Auth: commonv1alpha1.ElasticsearchAuth{ + Inline: &elasticsearhInlineAuth, }, }, }, diff --git a/operators/pkg/controller/common/keystore/resources.go b/operators/pkg/controller/common/keystore/resources.go index 3f0b0c4a94..7c1fd81d5d 100644 --- a/operators/pkg/controller/common/keystore/resources.go +++ b/operators/pkg/controller/common/keystore/resources.go @@ -7,14 +7,15 @@ package keystore import ( "strings" - commonv1alpha1 "github.com/elastic/cloud-on-k8s/operators/pkg/apis/common/v1alpha1" - "github.com/elastic/cloud-on-k8s/operators/pkg/controller/common/watches" - "github.com/elastic/cloud-on-k8s/operators/pkg/utils/k8s" corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime" "k8s.io/client-go/tools/record" logf "sigs.k8s.io/controller-runtime/pkg/runtime/log" + + commonv1alpha1 "github.com/elastic/cloud-on-k8s/operators/pkg/apis/common/v1alpha1" + "github.com/elastic/cloud-on-k8s/operators/pkg/controller/common/watches" + "github.com/elastic/cloud-on-k8s/operators/pkg/utils/k8s" ) var log = logf.Log.WithName("keystore") @@ -35,6 +36,9 @@ type HasKeystore interface { metav1.Object runtime.Object SecureSettings() *commonv1alpha1.SecretRef + // Kind can technically be retrieved from metav1.Object, but there is a bug preventing us to retrieve it + // see https://github.com/kubernetes-sigs/controller-runtime/issues/406 + Kind() string } // NewResources optionally returns a volume and init container to include in pods, @@ -60,7 +64,7 @@ func NewResources( // build an init container to create the keystore from the secure settings volume initContainer, err := initContainer( *secretVolume, - strings.ToLower(hasKeystore.GetObjectKind().GroupVersionKind().Kind), + strings.ToLower(hasKeystore.Kind()), initContainerParams, ) if err != nil { diff --git a/operators/pkg/controller/common/keystore/user_secret.go b/operators/pkg/controller/common/keystore/user_secret.go index 7e857b596a..ee44f4a434 100644 --- a/operators/pkg/controller/common/keystore/user_secret.go +++ b/operators/pkg/controller/common/keystore/user_secret.go @@ -6,6 +6,7 @@ package keystore import ( "fmt" + "strings" commonv1alpha1 "github.com/elastic/cloud-on-k8s/operators/pkg/apis/common/v1alpha1" "github.com/elastic/cloud-on-k8s/operators/pkg/controller/common/events" @@ -116,7 +117,7 @@ func watchSecureSettings(watched watches.DynamicWatches, secureSettingsRef *comm // to be reliable with controller-runtime < v0.2.0-beta.4 func Finalizer(namespacedName types.NamespacedName, watched watches.DynamicWatches, kind string) finalizer.Finalizer { return finalizer.Finalizer{ - Name: "secure-settings.finalizers." + kind + ".k8s.elastic.co", + Name: "secure-settings.finalizers." + strings.ToLower(kind) + ".k8s.elastic.co", Execute: func() error { watched.Secrets.RemoveHandlerForKey(secureSettingsWatchName(namespacedName)) return nil diff --git a/operators/pkg/controller/elasticsearch/elasticsearch_controller.go b/operators/pkg/controller/elasticsearch/elasticsearch_controller.go index 9db9f6e235..6ccd561da2 100644 --- a/operators/pkg/controller/elasticsearch/elasticsearch_controller.go +++ b/operators/pkg/controller/elasticsearch/elasticsearch_controller.go @@ -11,6 +11,7 @@ import ( appsv1 "k8s.io/api/apps/v1" corev1 "k8s.io/api/core/v1" apierrors "k8s.io/apimachinery/pkg/api/errors" + "k8s.io/apimachinery/pkg/labels" "k8s.io/apimachinery/pkg/runtime" "k8s.io/apimachinery/pkg/types" "k8s.io/client-go/tools/record" @@ -201,6 +202,16 @@ func (r *ReconcileElasticsearch) Reconcile(request reconcile.Request) (reconcile return common.PauseRequeue, nil } + selector := labels.Set(map[string]string{label.ClusterNameLabelName: es.Name}).AsSelector() + compat, err := annotation.ReconcileCompatibility(r.Client, &es, selector, r.OperatorInfo.BuildInfo.Version) + if err != nil { + return reconcile.Result{}, err + } + if !compat { + // this resource is not able to be reconciled by this version of the controller, so we will skip it and not requeue + return reconcile.Result{}, nil + } + err = annotation.UpdateControllerVersion(r.Client, &es, r.OperatorInfo.BuildInfo.Version) if err != nil { return reconcile.Result{}, err @@ -288,7 +299,7 @@ func (r *ReconcileElasticsearch) finalizersFor( clusterName := k8s.ExtractNamespacedName(&es) return []finalizer.Finalizer{ r.esObservers.Finalizer(clusterName), - keystore.Finalizer(k8s.ExtractNamespacedName(&es), r.dynamicWatches, "elasticsearch"), + keystore.Finalizer(k8s.ExtractNamespacedName(&es), r.dynamicWatches, es.Kind()), http.DynamicWatchesFinalizer(r.dynamicWatches, es.Name, esname.ESNamer), } } diff --git a/operators/pkg/controller/kibana/driver_test.go b/operators/pkg/controller/kibana/driver_test.go index 2244030331..021b055a0c 100644 --- a/operators/pkg/controller/kibana/driver_test.go +++ b/operators/pkg/controller/kibana/driver_test.go @@ -116,7 +116,7 @@ func expectedDeploymentParams() *DeploymentParams { Handler: corev1.Handler{ HTTPGet: &corev1.HTTPGetAction{ Port: intstr.FromInt(5601), - Path: "/", + Path: "/login", Scheme: corev1.URISchemeHTTPS, }, }, diff --git a/operators/pkg/controller/kibana/kibana_controller.go b/operators/pkg/controller/kibana/kibana_controller.go index 0e7f841a17..fecf36cec0 100644 --- a/operators/pkg/controller/kibana/kibana_controller.go +++ b/operators/pkg/controller/kibana/kibana_controller.go @@ -18,10 +18,12 @@ import ( "github.com/elastic/cloud-on-k8s/operators/pkg/controller/common/operator" "github.com/elastic/cloud-on-k8s/operators/pkg/controller/common/version" "github.com/elastic/cloud-on-k8s/operators/pkg/controller/common/watches" + "github.com/elastic/cloud-on-k8s/operators/pkg/controller/kibana/label" "github.com/elastic/cloud-on-k8s/operators/pkg/utils/k8s" appsv1 "k8s.io/api/apps/v1" corev1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/api/errors" + "k8s.io/apimachinery/pkg/labels" "k8s.io/apimachinery/pkg/runtime" "k8s.io/client-go/tools/record" "sigs.k8s.io/controller-runtime/pkg/controller" @@ -153,6 +155,16 @@ func (r *ReconcileKibana) Reconcile(request reconcile.Request) (reconcile.Result return common.PauseRequeue, nil } + selector := labels.Set(map[string]string{label.KibanaNameLabelName: kb.Name}).AsSelector() + compat, err := annotation.ReconcileCompatibility(r.Client, kb, selector, r.params.OperatorInfo.BuildInfo.Version) + if err != nil { + return reconcile.Result{}, err + } + if !compat { + // this resource is not able to be reconciled by this version of the controller, so we will skip it and not requeue + return reconcile.Result{}, nil + } + if err := r.finalizers.Handle(kb, r.finalizersFor(*kb)...); err != nil { if errors.IsConflict(err) { // Conflicts are expected and should be resolved on next loop @@ -210,6 +222,6 @@ func (r *ReconcileKibana) updateStatus(state State) error { func (r *ReconcileKibana) finalizersFor(kb kibanav1alpha1.Kibana) []finalizer.Finalizer { return []finalizer.Finalizer{ secretWatchFinalizer(kb, r.dynamicWatches), - keystore.Finalizer(k8s.ExtractNamespacedName(&kb), r.dynamicWatches, "kibana"), + keystore.Finalizer(k8s.ExtractNamespacedName(&kb), r.dynamicWatches, kb.Kind()), } } diff --git a/operators/pkg/controller/kibana/pod/pod.go b/operators/pkg/controller/kibana/pod/pod.go index 98b1d3eb4c..f397b15504 100644 --- a/operators/pkg/controller/kibana/pod/pod.go +++ b/operators/pkg/controller/kibana/pod/pod.go @@ -43,7 +43,7 @@ func readinessProbe(useTLS bool) corev1.Probe { Handler: corev1.Handler{ HTTPGet: &corev1.HTTPGetAction{ Port: intstr.FromInt(HTTPPort), - Path: "/", + Path: "/login", Scheme: scheme, }, }, diff --git a/operators/pkg/controller/kibanaassociation/association_controller.go b/operators/pkg/controller/kibanaassociation/association_controller.go index 5d954af2ff..083d685803 100644 --- a/operators/pkg/controller/kibanaassociation/association_controller.go +++ b/operators/pkg/controller/kibanaassociation/association_controller.go @@ -13,15 +13,18 @@ import ( estype "github.com/elastic/cloud-on-k8s/operators/pkg/apis/elasticsearch/v1alpha1" kbtype "github.com/elastic/cloud-on-k8s/operators/pkg/apis/kibana/v1alpha1" "github.com/elastic/cloud-on-k8s/operators/pkg/controller/common" + "github.com/elastic/cloud-on-k8s/operators/pkg/controller/common/annotation" "github.com/elastic/cloud-on-k8s/operators/pkg/controller/common/finalizer" "github.com/elastic/cloud-on-k8s/operators/pkg/controller/common/operator" "github.com/elastic/cloud-on-k8s/operators/pkg/controller/common/user" "github.com/elastic/cloud-on-k8s/operators/pkg/controller/common/watches" "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/services" + "github.com/elastic/cloud-on-k8s/operators/pkg/controller/kibana/label" "github.com/elastic/cloud-on-k8s/operators/pkg/utils/k8s" corev1 "k8s.io/api/core/v1" apierrors "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/labels" "k8s.io/apimachinery/pkg/runtime" "k8s.io/client-go/tools/record" "sigs.k8s.io/controller-runtime/pkg/client" @@ -57,8 +60,8 @@ var ( // Add creates a new Association Controller and adds it to the Manager with default RBAC. The Manager will set fields on the Controller // and Start it when the Manager is Started. -func Add(mgr manager.Manager, _ operator.Parameters) error { - r := newReconciler(mgr) +func Add(mgr manager.Manager, params operator.Parameters) error { + r := newReconciler(mgr, params) c, err := add(mgr, r) if err != nil { return err @@ -67,13 +70,14 @@ func Add(mgr manager.Manager, _ operator.Parameters) error { } // newReconciler returns a new reconcile.Reconciler -func newReconciler(mgr manager.Manager) *ReconcileAssociation { +func newReconciler(mgr manager.Manager, params operator.Parameters) *ReconcileAssociation { client := k8s.WrapClient(mgr.GetClient()) return &ReconcileAssociation{ - Client: client, - scheme: mgr.GetScheme(), - watches: watches.NewDynamicWatches(), - recorder: mgr.GetRecorder(name), + Client: client, + scheme: mgr.GetScheme(), + watches: watches.NewDynamicWatches(), + recorder: mgr.GetRecorder(name), + Parameters: params, } } @@ -95,7 +99,7 @@ type ReconcileAssociation struct { scheme *runtime.Scheme recorder record.EventRecorder watches watches.DynamicWatches - + operator.Parameters // iteration is the number of times this controller has run its Reconcile method iteration int64 } @@ -149,6 +153,16 @@ func (r *ReconcileAssociation) Reconcile(request reconcile.Request) (reconcile.R return common.PauseRequeue, nil } + selector := labels.Set(map[string]string{label.KibanaNameLabelName: kibana.Name}).AsSelector() + compat, err := annotation.ReconcileCompatibility(r.Client, &kibana, selector, r.OperatorInfo.BuildInfo.Version) + if err != nil { + return reconcile.Result{}, err + } + if !compat { + // this resource is not able to be reconciled by this version of the controller, so we will skip it and not requeue + return reconcile.Result{}, nil + } + newStatus, err := r.reconcileInternal(kibana) // maybe update status if !reflect.DeepEqual(kibana.Status.AssociationStatus, newStatus) { diff --git a/operators/pkg/dev/portforward/pod_forwarder.go b/operators/pkg/dev/portforward/pod_forwarder.go index c733bd87a7..2856b36bec 100644 --- a/operators/pkg/dev/portforward/pod_forwarder.go +++ b/operators/pkg/dev/portforward/pod_forwarder.go @@ -98,7 +98,7 @@ func newDefaultKubernetesClientset() (*kubernetes.Clientset, error) { } // podDNSRegex matches pods FQDN such as {name}.{namespace}.pod -var podDNSRegex = regexp.MustCompile(`^.+\..+\..*$`) +var podDNSRegex = regexp.MustCompile(`^.+\..+$`) // podIPRegex matches any ipv4 address. var podIPv4Regex = regexp.MustCompile(`^\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}$`) @@ -118,7 +118,7 @@ func parsePodAddr(addr string, clientSet *kubernetes.Clientset) (*types.Namespac // retrieve pod name and namespace from addr // TODO: subdomains in pod names would change this. parts := strings.SplitN(host, ".", 3) - if len(parts) <= 2 { + if len(parts) <= 1 { return nil, fmt.Errorf("unsupported pod address format: %s", host) } return &types.NamespacedName{Namespace: parts[1], Name: parts[0]}, nil diff --git a/operators/pkg/dev/portforward/pod_forwarder_test.go b/operators/pkg/dev/portforward/pod_forwarder_test.go index 555aea5230..866322fc11 100644 --- a/operators/pkg/dev/portforward/pod_forwarder_test.go +++ b/operators/pkg/dev/portforward/pod_forwarder_test.go @@ -143,10 +143,15 @@ func Test_parsePodAddr(t *testing.T) { args: args{addr: "foo.bar.pod:1234"}, want: types.NamespacedName{Namespace: "bar", Name: "foo"}, }, + { + name: "pod DNS with pod and namespace only", + args: args{addr: "foopod.barnamespace:1234"}, + want: types.NamespacedName{Namespace: "barnamespace", Name: "foopod"}, + }, { name: "invalid", - args: args{addr: "example.com:1234"}, - wantErr: errors.New("unsupported pod address format: example.com"), + args: args{addr: "foobar:1234"}, + wantErr: errors.New("unsupported pod address format: foobar"), }, } for _, tt := range tests { diff --git a/operators/pkg/webhook/server.go b/operators/pkg/webhook/server.go index 0493a75317..6032aeafcb 100644 --- a/operators/pkg/webhook/server.go +++ b/operators/pkg/webhook/server.go @@ -5,11 +5,14 @@ package webhook import ( + "context" + "github.com/elastic/cloud-on-k8s/operators/pkg/apis/elasticsearch/v1alpha1" "github.com/elastic/cloud-on-k8s/operators/pkg/webhook/elasticsearch" "github.com/elastic/cloud-on-k8s/operators/pkg/webhook/license" admission "k8s.io/api/admissionregistration/v1beta1" corev1 "k8s.io/api/core/v1" + v1 "k8s.io/apimachinery/pkg/apis/meta/v1" "sigs.k8s.io/controller-runtime/pkg/manager" "sigs.k8s.io/controller-runtime/pkg/webhook" @@ -41,7 +44,7 @@ func RegisterValidations(mgr manager.Manager, params Parameters) error { licWh, err := builder.NewWebhookBuilder(). Name("validation.license.elastic.co"). Validating(). - FailurePolicy(admission.Fail). + FailurePolicy(admission.Ignore). ForType(&corev1.Secret{}). Handlers(&license.ValidationHandler{}). WithManager(mgr). @@ -51,6 +54,17 @@ func RegisterValidations(mgr manager.Manager, params Parameters) error { } disabled := !params.AutoInstall + if params.AutoInstall { + // nasty side effect in register function + webhookSvc := corev1.Service{ + ObjectMeta: v1.ObjectMeta{ + Name: params.Bootstrap.Service.Name, + Namespace: params.Bootstrap.Service.Namespace, + }, + } + // best effort deletion attempt to handle incompatible services from previous versions + _ = mgr.GetClient().Delete(context.Background(), &webhookSvc) + } svr, err := webhook.NewServer(admissionServerName, mgr, webhook.ServerOptions{ Port: serverPort, CertDir: "/tmp/cert", diff --git a/operators/test/e2e/apm/standalone_test.go b/operators/test/e2e/apm/standalone_test.go index 2e66d6539a..7ce2175595 100644 --- a/operators/test/e2e/apm/standalone_test.go +++ b/operators/test/e2e/apm/standalone_test.go @@ -7,7 +7,6 @@ package apm import ( "testing" - apmtype "github.com/elastic/cloud-on-k8s/operators/pkg/apis/apm/v1alpha1" "github.com/elastic/cloud-on-k8s/operators/pkg/apis/common/v1alpha1" "github.com/elastic/cloud-on-k8s/operators/test/e2e/test" "github.com/elastic/cloud-on-k8s/operators/test/e2e/test/apmserver" @@ -16,7 +15,7 @@ import ( // TestApmStandalone runs a test suite on an APM server that is not outputting to Elasticsearch func TestApmStandalone(t *testing.T) { apmBuilder := apmserver.NewBuilder("standalone"). - WithOutput(apmtype.Output{}). + WithElasticsearchRef(v1alpha1.ObjectSelector{}). WithConfig(map[string]interface{}{ "output.console": map[string]interface{}{ "pretty": true, @@ -29,7 +28,7 @@ func TestApmStandalone(t *testing.T) { func TestApmStandaloneNoTLS(t *testing.T) { apmBuilder := apmserver.NewBuilder("standalone-no-tls"). - WithOutput(apmtype.Output{}). + WithElasticsearchRef(v1alpha1.ObjectSelector{}). WithConfig(map[string]interface{}{ "output.console": map[string]interface{}{ "pretty": true, diff --git a/operators/test/e2e/test/apmserver/builder.go b/operators/test/e2e/test/apmserver/builder.go index 370f2001fa..9173bf7005 100644 --- a/operators/test/e2e/test/apmserver/builder.go +++ b/operators/test/e2e/test/apmserver/builder.go @@ -6,7 +6,6 @@ package apmserver import ( apmtype "github.com/elastic/cloud-on-k8s/operators/pkg/apis/apm/v1alpha1" - common "github.com/elastic/cloud-on-k8s/operators/pkg/apis/common/v1alpha1" commonv1alpha1 "github.com/elastic/cloud-on-k8s/operators/pkg/apis/common/v1alpha1" "github.com/elastic/cloud-on-k8s/operators/test/e2e/test" corev1 "k8s.io/api/core/v1" @@ -30,13 +29,9 @@ func NewBuilder(name string) Builder { Spec: apmtype.ApmServerSpec{ NodeCount: 1, Version: test.ElasticStackVersion, - Output: apmtype.Output{ - Elasticsearch: apmtype.ElasticsearchOutput{ - ElasticsearchRef: &commonv1alpha1.ObjectSelector{ - Name: name, - Namespace: test.Namespace, - }, - }, + ElasticsearchRef: commonv1alpha1.ObjectSelector{ + Name: name, + Namespace: test.Namespace, }, PodTemplate: corev1.PodTemplateSpec{ Spec: corev1.PodSpec{ @@ -55,12 +50,7 @@ func (b Builder) WithRestrictedSecurityContext() Builder { func (b Builder) WithNamespace(namespace string) Builder { b.ApmServer.ObjectMeta.Namespace = namespace - ref := b.ApmServer.Spec.Output.Elasticsearch.ElasticsearchRef - if ref == nil { - ref = &common.ObjectSelector{} - } - ref.Namespace = namespace - b.ApmServer.Spec.Output.Elasticsearch.ElasticsearchRef = ref + b.ApmServer.Spec.ElasticsearchRef.Namespace = namespace return b } @@ -74,8 +64,8 @@ func (b Builder) WithNodeCount(count int) Builder { return b } -func (b Builder) WithOutput(out apmtype.Output) Builder { - b.ApmServer.Spec.Output = out +func (b Builder) WithElasticsearchRef(ref commonv1alpha1.ObjectSelector) Builder { + b.ApmServer.Spec.ElasticsearchRef = ref return b } diff --git a/operators/test/e2e/test/elasticsearch/http_client.go b/operators/test/e2e/test/elasticsearch/http_client.go index 154dac2409..dae9d32b37 100644 --- a/operators/test/e2e/test/elasticsearch/http_client.go +++ b/operators/test/e2e/test/elasticsearch/http_client.go @@ -28,7 +28,7 @@ func NewElasticsearchClient(es v1alpha1.Elasticsearch, k *test.K8sClient) (clien if err != nil { return nil, err } - inClusterURL := fmt.Sprintf("https://%s:9200", name.HTTPService(es.Name)) + inClusterURL := fmt.Sprintf("https://%s.%s.svc:9200", name.HTTPService(es.Name), es.Namespace) var dialer net.Dialer if test.AutoPortForward { dialer = portforward.NewForwardingDialer() From 622feedacb39ad0147812396cd36f87cb30681b1 Mon Sep 17 00:00:00 2001 From: Sebastien Guilloux Date: Thu, 1 Aug 2019 10:40:50 +0200 Subject: [PATCH 14/31] Orchestrate zen1 and zen2 settings for StatefulSets (#1262) Change the way we setup zen1 and zen2 settings to adapt to StatefulSets upscale and downscale. Also improve StatefulSet downscale code, which is moved to its own file, and add a few helper functions around pods and master nodes. In a nutshell: set minimum_master_nodes (zen1) and initial_master_nodes (zen2) in the configuration of new nodes to create, corresponding to the expected nodes that will be created. The setting is automagically updated in existing nodes configuration , where it is ignored until nodes restart. update minimum_master_nodes (zen1) and voting_config_exclusions (zen2) whenever we remove a node. For now, we don't (yet) do any change on rolling upgrades, which are limited to a single node at a time at the moment. This only impacts sset scale up/down. --- operators/config/e2e/global_operator.yaml | 1 + operators/config/e2e/namespace_operator.yaml | 1 + .../pkg/controller/elasticsearch/client/v6.go | 22 +- .../elasticsearch/driver/default.go | 247 ++++------------- .../elasticsearch/driver/downscale.go | 136 +++++++++ .../controller/elasticsearch/driver/driver.go | 12 +- .../elasticsearch/driver/esstate.go | 4 +- .../elasticsearch/driver/upgrade.go | 59 ++-- .../controller/elasticsearch/label/label.go | 15 +- .../elasticsearch/label/label_test.go | 21 +- .../elasticsearch/nodespec/fixtures.go | 33 +++ .../elasticsearch/nodespec/resources.go | 16 ++ .../elasticsearch/nodespec/resources_test.go | 43 +++ .../pkg/controller/elasticsearch/sset/list.go | 45 ++- .../elasticsearch/sset/list_test.go | 88 ++++++ .../pkg/controller/elasticsearch/sset/pod.go | 30 ++ .../elasticsearch/version/running_versions.go | 5 +- .../version/supported_versions.go | 2 +- .../version/version6/zen1_test.go | 214 -------------- .../version/version7/initial_master_nodes.go | 83 ------ .../version7/initial_master_nodes_test.go | 261 ------------------ .../elasticsearch/version/version7/zen2.go | 58 ---- .../version/version7/zen2_test.go | 131 --------- .../version/zen1/compatibility.go | 28 ++ .../version/zen1/compatibility_test.go | 94 +++++++ .../zen1.go => zen1/minimum_masters.go} | 124 ++++----- .../version/zen1/minimum_masters_test.go | 200 ++++++++++++++ .../version/{version6 => zen1}/podspecs.go | 2 +- .../{version6 => zen1}/podspecs_test.go | 2 +- .../version/zen2/compatibility.go | 28 ++ .../version/zen2/compatibility_test.go | 94 +++++++ .../version/zen2/initial_master_nodes.go | 87 ++++++ .../version/zen2/initial_master_nodes_test.go | 178 ++++++++++++ .../version/zen2/voting_exclusions.go | 86 ++++++ .../version/zen2/voting_exclusions_test.go | 96 +++++++ 35 files changed, 1449 insertions(+), 1097 deletions(-) create mode 100644 operators/pkg/controller/elasticsearch/driver/downscale.go create mode 100644 operators/pkg/controller/elasticsearch/nodespec/fixtures.go create mode 100644 operators/pkg/controller/elasticsearch/nodespec/resources_test.go create mode 100644 operators/pkg/controller/elasticsearch/sset/list_test.go delete mode 100644 operators/pkg/controller/elasticsearch/version/version6/zen1_test.go delete mode 100644 operators/pkg/controller/elasticsearch/version/version7/initial_master_nodes.go delete mode 100644 operators/pkg/controller/elasticsearch/version/version7/initial_master_nodes_test.go delete mode 100644 operators/pkg/controller/elasticsearch/version/version7/zen2.go delete mode 100644 operators/pkg/controller/elasticsearch/version/version7/zen2_test.go create mode 100644 operators/pkg/controller/elasticsearch/version/zen1/compatibility.go create mode 100644 operators/pkg/controller/elasticsearch/version/zen1/compatibility_test.go rename operators/pkg/controller/elasticsearch/version/{version6/zen1.go => zen1/minimum_masters.go} (50%) create mode 100644 operators/pkg/controller/elasticsearch/version/zen1/minimum_masters_test.go rename operators/pkg/controller/elasticsearch/version/{version6 => zen1}/podspecs.go (98%) rename operators/pkg/controller/elasticsearch/version/{version6 => zen1}/podspecs_test.go (99%) create mode 100644 operators/pkg/controller/elasticsearch/version/zen2/compatibility.go create mode 100644 operators/pkg/controller/elasticsearch/version/zen2/compatibility_test.go create mode 100644 operators/pkg/controller/elasticsearch/version/zen2/initial_master_nodes.go create mode 100644 operators/pkg/controller/elasticsearch/version/zen2/initial_master_nodes_test.go create mode 100644 operators/pkg/controller/elasticsearch/version/zen2/voting_exclusions.go create mode 100644 operators/pkg/controller/elasticsearch/version/zen2/voting_exclusions_test.go diff --git a/operators/config/e2e/global_operator.yaml b/operators/config/e2e/global_operator.yaml index f27d59d4b5..6ce4fc0dc6 100644 --- a/operators/config/e2e/global_operator.yaml +++ b/operators/config/e2e/global_operator.yaml @@ -46,6 +46,7 @@ rules: - apps resources: - deployments + - statefulsets verbs: - get - list diff --git a/operators/config/e2e/namespace_operator.yaml b/operators/config/e2e/namespace_operator.yaml index 23146a5bfc..5d2dfc943c 100644 --- a/operators/config/e2e/namespace_operator.yaml +++ b/operators/config/e2e/namespace_operator.yaml @@ -47,6 +47,7 @@ rules: - apps resources: - deployments + - statefulsets verbs: - get - list diff --git a/operators/pkg/controller/elasticsearch/client/v6.go b/operators/pkg/controller/elasticsearch/client/v6.go index b15591192b..2b212c7cfe 100644 --- a/operators/pkg/controller/elasticsearch/client/v6.go +++ b/operators/pkg/controller/elasticsearch/client/v6.go @@ -53,34 +53,28 @@ func (c *clientV6) ExcludeFromShardAllocation(ctx context.Context, nodes string) return c.put(ctx, "/_cluster/settings", allocationSettings, nil) } -func (c *clientV6) EnableShardAllocation(ctx context.Context) error { +func (c *clientV6) updateAllocationEnable(ctx context.Context, value string) error { allocationSettings := ClusterRoutingAllocation{ Transient: AllocationSettings{ Cluster: ClusterRoutingSettings{ Routing: RoutingSettings{ Allocation: RoutingAllocationSettings{ - Enable: "all", + Enable: value, }, }, }, }, } return c.put(ctx, "/_cluster/settings", allocationSettings, nil) + +} + +func (c *clientV6) EnableShardAllocation(ctx context.Context) error { + return c.updateAllocationEnable(ctx, "all") } func (c *clientV6) DisableReplicaShardsAllocation(ctx context.Context) error { - allocationSettings := ClusterRoutingAllocation{ - Transient: AllocationSettings{ - Cluster: ClusterRoutingSettings{ - Routing: RoutingSettings{ - Allocation: RoutingAllocationSettings{ - Enable: "primaries", - }, - }, - }, - }, - } - return c.put(ctx, "/_cluster/settings", allocationSettings, nil) + return c.updateAllocationEnable(ctx, "primaries") } func (c *clientV6) SyncedFlush(ctx context.Context) error { diff --git a/operators/pkg/controller/elasticsearch/driver/default.go b/operators/pkg/controller/elasticsearch/driver/default.go index 567440b964..2822b65a28 100644 --- a/operators/pkg/controller/elasticsearch/driver/default.go +++ b/operators/pkg/controller/elasticsearch/driver/default.go @@ -8,14 +8,14 @@ import ( "crypto/x509" "fmt" - "github.com/elastic/cloud-on-k8s/operators/pkg/controller/common/keystore" - appsv1 "k8s.io/api/apps/v1" corev1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/runtime" "k8s.io/apimachinery/pkg/types" controller "sigs.k8s.io/controller-runtime/pkg/reconcile" - "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/migration" + "github.com/elastic/cloud-on-k8s/operators/pkg/controller/common/keystore" + + "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/version/zen2" esvolume "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/volume" "github.com/elastic/cloud-on-k8s/operators/pkg/apis/elasticsearch/v1alpha1" @@ -30,7 +30,6 @@ import ( "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/configmap" "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/initcontainer" "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/license" - "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/mutation" "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/name" "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/network" "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/nodespec" @@ -43,7 +42,7 @@ import ( "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/sset" "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/user" esversion "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/version" - "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/version/version6" + "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/version/zen1" "github.com/elastic/cloud-on-k8s/operators/pkg/utils/k8s" ) @@ -90,36 +89,6 @@ type defaultDriver struct { es v1alpha1.Elasticsearch, ) (*reconcile.ResourcesState, error) - // clusterInitialMasterNodesEnforcer enforces that cluster.initial_master_nodes is set where relevant - // this can safely be set to nil when it's not relevant (e.g for ES <= 6) - clusterInitialMasterNodesEnforcer func( - cluster v1alpha1.Elasticsearch, - clusterState observer.State, - c k8s.Client, - performableChanges mutation.PerformableChanges, - resourcesState reconcile.ResourcesState, - ) (*mutation.PerformableChanges, error) - - // zen1SettingsUpdater updates the zen1 settings for the current pods. - // this can safely be set to nil when it's not relevant (e.g when all nodes in the cluster is >= 7) - zen1SettingsUpdater func( - cluster v1alpha1.Elasticsearch, - c k8s.Client, - esClient esclient.Client, - allPods []corev1.Pod, - performableChanges *mutation.PerformableChanges, - reconcileState *reconcile.State, - ) (bool, error) - - // zen2SettingsUpdater updates the zen2 settings for the current changes. - // this can safely be set to nil when it's not relevant (e.g when all nodes in the cluster is <7) - zen2SettingsUpdater func( - esClient esclient.Client, - minVersion version.Version, - changes mutation.Changes, - performableChanges mutation.PerformableChanges, - ) error - // TODO: implement // // apiObjectsGarbageCollector garbage collects API objects for older versions once they are no longer needed. // apiObjectsGarbageCollector func( @@ -244,14 +213,6 @@ func (d *defaultDriver) Reconcile( }, ) - // - //if d.clusterInitialMasterNodesEnforcer != nil { - // performableChanges, err = d.clusterInitialMasterNodesEnforcer(*performableChanges, *resourcesState) - // if err != nil { - // return results.WithError(err) - // } - //} - // Compute seed hosts based on current masters with a podIP if err := settings.UpdateSeedHostsConfigMap(d.Client, d.Scheme, es, resourcesState.AllPods); err != nil { return results.WithError(err) @@ -287,64 +248,16 @@ func (d *defaultDriver) Reconcile( KeystoreResources: keystoreResources, }, cfg, - version6.NewEnvironmentVars, + zen1.NewEnvironmentVars, initcontainer.NewInitContainers, ) } - res = d.reconcileNodeSpecs(es, esReachable, podTemplateSpecBuilder, esClient, observedState) + res = d.reconcileNodeSpecs(es, esReachable, podTemplateSpecBuilder, esClient, reconcileState, observedState, *resourcesState) if results.WithResults(res).HasError() { return results } - // - //// Call Zen1 setting updater before new masters are created to ensure that they immediately start with the - //// correct value for minimum_master_nodes. - //// For instance if a 3 master nodes cluster is updated and a grow-and-shrink strategy of one node is applied then - //// minimum_master_nodes is increased from 2 to 3 for new and current nodes. - //if d.zen1SettingsUpdater != nil { - // requeue, err := d.zen1SettingsUpdater( - // es, - // d.Client, - // esClient, - // resourcesState.AllPods, - // performableChanges, - // reconcileState, - // ) - // - // if err != nil { - // return results.WithError(err) - // } - // - // if requeue { - // results.WithResult(defaultRequeue) - // } - //} - - if !esReachable { - // We cannot manipulate ES allocation exclude settings if the ES cluster - // cannot be reached, hence we cannot delete pods. - // Probably it was just created and is not ready yet. - // Let's retry in a while. - log.Info("ES external service not ready yet for shard migration reconciliation. Requeuing.", "namespace", es.Namespace, "es_name", es.Name) - - reconcileState.UpdateElasticsearchPending(resourcesState.CurrentPods.Pods()) - - return results.WithResult(defaultRequeue) - } - // - //if d.zen2SettingsUpdater != nil { - // // TODO: would prefer to do this after MigrateData iff there's no changes? or is that an premature optimization? - // if err := d.zen2SettingsUpdater( - // esClient, - // *min, - // *changes, - // *performableChanges, - // ); err != nil { - // return results.WithResult(defaultRequeue).WithError(err) - // } - //} - reconcileState.UpdateElasticsearchState(*resourcesState, observedState) return results @@ -355,7 +268,9 @@ func (d *defaultDriver) reconcileNodeSpecs( esReachable bool, podSpecBuilder esversion.PodTemplateSpecBuilder, esClient esclient.Client, + reconcileState *reconcile.State, observedState observer.State, + resourcesState reconcile.ResourcesState, ) *reconciler.Results { results := &reconciler.Results{} @@ -364,30 +279,29 @@ func (d *defaultDriver) reconcileNodeSpecs( return results.WithError(err) } + if !d.expectations.GenerationExpected(actualStatefulSets.ObjectMetas()...) { + // Our cache of StatefulSets is out of date compared to previous reconciliation operations. + // This will probably lead to conflicting sset updates (which is ok), but also to + // conflicting ES calls (set/reset zen1/zen2/allocation excludes, etc.), which may not be ok. + log.V(1).Info("StatefulSet cache out-of-date, re-queueing", "namespace", es.Namespace, "es_name", es.Name) + return results.WithResult(defaultRequeue) + } + nodeSpecResources, err := nodespec.BuildExpectedResources(es, podSpecBuilder) if err != nil { return results.WithError(err) } - // TODO: handle zen2 initial master nodes more cleanly - // should be empty once cluster is bootstraped - var initialMasters []string - // TODO: refactor/move - for _, res := range nodeSpecResources { - cfg, err := res.Config.Unpack() - if err != nil { - return results.WithError(err) - } - if cfg.Node.Master { - for i := 0; i < int(*res.StatefulSet.Spec.Replicas); i++ { - initialMasters = append(initialMasters, fmt.Sprintf("%s-%d", res.StatefulSet.Name, i)) - } - } + // TODO: there is a split brain possibility here if going from 1 to 3 masters or 3 to 7. + // See https://github.com/elastic/cloud-on-k8s/issues/1281. + + // patch configs to consider zen1 minimum master nodes + if err := zen1.SetupMinimumMasterNodesConfig(nodeSpecResources); err != nil { + return results.WithError(err) } - for i := range nodeSpecResources { - if err := nodeSpecResources[i].Config.SetStrings(settings.ClusterInitialMasterNodes, initialMasters...); err != nil { - return results.WithError(err) - } + // patch configs to consider zen2 initial master nodes + if err := zen2.SetupInitialMasterNodes(es, observedState, d.Client, nodeSpecResources); err != nil { + return results.WithError(err) } // Phase 1: apply expected StatefulSets resources, but don't scale down. @@ -418,32 +332,36 @@ func (d *defaultDriver) reconcileNodeSpecs( } if !esReachable { - // cannot perform downscale or rolling upgrade if we cannot request Elasticsearch + // Cannot perform next operations if we cannot request Elasticsearch. + log.Info("ES external service not ready yet for further reconciliation, re-queuing.", "namespace", es.Namespace, "es_name", es.Name) + reconcileState.UpdateElasticsearchPending(resourcesState.CurrentPods.Pods()) return results.WithResult(defaultRequeue) } + // Update Zen1 minimum master nodes through the API, corresponding to the current nodes we have. + requeue, err := zen1.UpdateMinimumMasterNodes(d.Client, es, esClient, actualStatefulSets, reconcileState) + if err != nil { + return results.WithError(err) + } + if requeue { + results.WithResult(defaultRequeue) + } + // Maybe clear zen2 voting config exclusions. + requeue, err = zen2.ClearVotingConfigExclusions(es, d.Client, esClient, actualStatefulSets) + if err != nil { + return results.WithError(err) + } + if requeue { + results.WithResult(defaultRequeue) + } + // Phase 2: handle sset scale down. // We want to safely remove nodes from the cluster, either because the sset requires less replicas, // or because it should be removed entirely. - for i, actual := range actualStatefulSets { - expected, shouldExist := nodeSpecResources.StatefulSets().GetByName(actual.Name) - switch { - // stateful set removal - case !shouldExist: - target := int32(0) - removalResult := d.scaleStatefulSetDown(&actualStatefulSets[i], target, esClient, observedState) - results.WithResults(removalResult) - if removalResult.HasError() { - return results - } - // stateful set downscale - case actual.Spec.Replicas != nil && sset.Replicas(expected) < sset.Replicas(actual): - target := sset.Replicas(expected) - downscaleResult := d.scaleStatefulSetDown(&actualStatefulSets[i], target, esClient, observedState) - if downscaleResult.HasError() { - return results - } - } + downscaleRes := d.HandleDownscale(es, nodeSpecResources.StatefulSets(), actualStatefulSets, esClient, observedState, reconcileState) + results.WithResults(downscaleRes) + if downscaleRes.HasError() { + return results } // Phase 3: handle rolling upgrades. @@ -456,72 +374,7 @@ func (d *defaultDriver) reconcileNodeSpecs( // TODO: // - change budget - // - zen1, zen2 - return results -} - -func (d *defaultDriver) scaleStatefulSetDown( - statefulSet *appsv1.StatefulSet, - targetReplicas int32, - esClient esclient.Client, - observedState observer.State, -) *reconciler.Results { - results := &reconciler.Results{} - logger := log.WithValues("statefulset", k8s.ExtractNamespacedName(statefulSet)) - - if sset.Replicas(*statefulSet) == 0 && targetReplicas == 0 { - // we don't expect any new replicas in this statefulset, remove it - logger.Info("Deleting statefulset", "namespace", statefulSet.Namespace, "name", statefulSet.Name) - if err := d.Client.Delete(statefulSet); err != nil { - return results.WithError(err) - } - } - // copy the current replicas, to be decremented with nodes to remove - initialReplicas := sset.Replicas(*statefulSet) - updatedReplicas := initialReplicas - - // leaving nodes names can be built from StatefulSet name and ordinals - // nodes are ordered by highest ordinal first - var leavingNodes []string - for i := initialReplicas - 1; i > targetReplicas-1; i-- { - leavingNodes = append(leavingNodes, sset.PodName(statefulSet.Name, i)) - } - - // TODO: don't remove last master/last data nodes? - // TODO: detect cases where data migration cannot happen since no nodes to host shards? - - // migrate data away from these nodes before removing them - logger.V(1).Info("Migrating data away from nodes", "nodes", leavingNodes) - if err := migration.MigrateData(esClient, leavingNodes); err != nil { - return results.WithError(err) - } - - for _, node := range leavingNodes { - if migration.IsMigratingData(observedState, node, leavingNodes) { - // data migration not over yet: schedule a requeue - logger.V(1).Info("Data migration not over yet, skipping node deletion", "node", node) - results.WithResult(defaultRequeue) - // no need to check other nodes since we remove them in order and this one isn't ready anyway - break - } - // data migration over: allow pod to be removed - updatedReplicas-- - } - - if updatedReplicas != initialReplicas { - // update cluster coordination settings to account for nodes deletion - // TODO: update zen1/zen2 - - // trigger deletion of nodes whose data migration is over - logger.V(1).Info("Scaling replicas down", "from", initialReplicas, "to", updatedReplicas) - statefulSet.Spec.Replicas = &updatedReplicas - if err := d.Client.Update(statefulSet); err != nil { - return results.WithError(err) - } - } - - // TODO: clear allocation excludes - + // - grow and shrink return results } diff --git a/operators/pkg/controller/elasticsearch/driver/downscale.go b/operators/pkg/controller/elasticsearch/driver/downscale.go new file mode 100644 index 0000000000..1a86700018 --- /dev/null +++ b/operators/pkg/controller/elasticsearch/driver/downscale.go @@ -0,0 +1,136 @@ +// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +// or more contributor license agreements. Licensed under the Elastic License; +// you may not use this file except in compliance with the Elastic License. + +package driver + +import ( + "github.com/elastic/cloud-on-k8s/operators/pkg/apis/elasticsearch/v1alpha1" + "github.com/elastic/cloud-on-k8s/operators/pkg/controller/common/reconciler" + esclient "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/client" + "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/label" + "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/migration" + "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/observer" + "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/reconcile" + "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/sset" + "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/version/zen1" + "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/version/zen2" + "github.com/elastic/cloud-on-k8s/operators/pkg/utils/k8s" + appsv1 "k8s.io/api/apps/v1" +) + +func (d *defaultDriver) HandleDownscale( + es v1alpha1.Elasticsearch, + expectedStatefulSets sset.StatefulSetList, + actualStatefulSets sset.StatefulSetList, + esClient esclient.Client, + observedState observer.State, + reconcileState *reconcile.State, +) *reconciler.Results { + results := &reconciler.Results{} + + // compute the list of nodes leaving the cluster, from which + // data should be migrated away + leavingNodes := []string{} + + // process each statefulset for downscale + for i, actual := range actualStatefulSets { + expected, shouldExist := expectedStatefulSets.GetByName(actual.Name) + targetReplicas := int32(0) // sset removal + if shouldExist { // sset downscale + targetReplicas = sset.Replicas(expected) + } + leaving, removalResult := d.scaleStatefulSetDown(es, actualStatefulSets, &actualStatefulSets[i], targetReplicas, esClient, observedState, reconcileState) + results.WithResults(removalResult) + if removalResult.HasError() { + return results + } + leavingNodes = append(leavingNodes, leaving...) + } + + // migrate data away from nodes leaving the cluster + log.V(1).Info("Migrating data away from nodes", "nodes", leavingNodes) + if err := migration.MigrateData(esClient, leavingNodes); err != nil { + return results.WithError(err) + } + + return results +} + +// scaleStatefulSetDown scales the given StatefulSet down to targetReplicas, if possible. +// It returns the names of the nodes that will leave the cluster. +func (d *defaultDriver) scaleStatefulSetDown( + es v1alpha1.Elasticsearch, + allStatefulSets sset.StatefulSetList, + ssetToScaleDown *appsv1.StatefulSet, + targetReplicas int32, + esClient esclient.Client, + observedState observer.State, + reconcileState *reconcile.State, +) ([]string, *reconciler.Results) { + results := &reconciler.Results{} + logger := log.WithValues("statefulset", k8s.ExtractNamespacedName(ssetToScaleDown)) + + if sset.Replicas(*ssetToScaleDown) == 0 && targetReplicas == 0 { + // no replicas expected, StatefulSet can be safely deleted + logger.Info("Deleting statefulset", "namespace", ssetToScaleDown.Namespace, "name", ssetToScaleDown.Name) + if err := d.Client.Delete(ssetToScaleDown); err != nil { + return nil, results.WithError(err) + } + } + // copy the current replicas, to be decremented with nodes to remove + initialReplicas := sset.Replicas(*ssetToScaleDown) + updatedReplicas := initialReplicas + + // leaving nodes names can be built from StatefulSet name and ordinals + // nodes are ordered by highest ordinal first + var leavingNodes []string + for i := initialReplicas - 1; i > targetReplicas-1; i-- { + leavingNodes = append(leavingNodes, sset.PodName(ssetToScaleDown.Name, i)) + } + + // TODO: don't remove last master/last data nodes? + // TODO: detect cases where data migration cannot happen since no nodes to host shards? + + for _, node := range leavingNodes { + if migration.IsMigratingData(observedState, node, leavingNodes) { + // data migration not over yet: schedule a requeue + logger.V(1).Info("Data migration not over yet, skipping node deletion", "node", node) + results.WithResult(defaultRequeue) + // no need to check other nodes since we remove them in order and this one isn't ready anyway + break + } + // data migration over: allow pod to be removed + updatedReplicas-- + } + + if updatedReplicas < initialReplicas { + // trigger deletion of nodes whose data migration is over + logger.V(1).Info("Scaling replicas down", "from", initialReplicas, "to", updatedReplicas) + ssetToScaleDown.Spec.Replicas = &updatedReplicas + + if label.IsMasterNodeSet(*ssetToScaleDown) { + // Update Zen1 minimum master nodes API, accounting for the updated downscaled replicas. + _, err := zen1.UpdateMinimumMasterNodes(d.Client, es, esClient, allStatefulSets, reconcileState) + if err != nil { + return nil, results.WithError(err) + } + // Update zen2 settings to exclude leaving master nodes from voting. + excludeNodes := make([]string, 0, initialReplicas-updatedReplicas) + for i := updatedReplicas; i < initialReplicas; i++ { + excludeNodes = append(excludeNodes, sset.PodName(ssetToScaleDown.Name, i)) + } + if err := zen2.AddToVotingConfigExclusions(esClient, *ssetToScaleDown, excludeNodes); err != nil { + return nil, results.WithError(err) + } + } + + if err := d.Client.Update(ssetToScaleDown); err != nil { + return nil, results.WithError(err) + } + // Expect the updated statefulset in the cache for next reconciliation. + d.expectations.ExpectGeneration(ssetToScaleDown.ObjectMeta) + } + + return leavingNodes, results +} diff --git a/operators/pkg/controller/elasticsearch/driver/driver.go b/operators/pkg/controller/elasticsearch/driver/driver.go index 549a32ea49..0800dea20f 100644 --- a/operators/pkg/controller/elasticsearch/driver/driver.go +++ b/operators/pkg/controller/elasticsearch/driver/driver.go @@ -17,8 +17,6 @@ import ( esreconcile "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/reconcile" "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/user" esversion "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/version" - "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/version/version6" - "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/version/version7" "github.com/elastic/cloud-on-k8s/operators/pkg/utils/k8s" "k8s.io/apimachinery/pkg/runtime" "k8s.io/client-go/tools/record" @@ -78,18 +76,18 @@ func NewDriver(opts Options) (Driver, error) { switch opts.Version.Major { case 7: //driver.expectedPodsAndResourcesResolver = version6.ExpectedPodSpecs - - driver.clusterInitialMasterNodesEnforcer = version7.ClusterInitialMasterNodesEnforcer + // + //driver.clusterInitialMasterNodesEnforcer = version7.ClusterInitialMasterNodesEnforcer // version 7 uses zen2 instead of zen - driver.zen2SettingsUpdater = version7.UpdateZen2Settings + //driver.zen2SettingsUpdater = version7.UpdateZen2Settings // .. except we still have to manage minimum_master_nodes while doing a rolling upgrade from 6 -> 7 // we approximate this by also handling zen 1, even in 7 // TODO: only do this if there's 6.x masters in the cluster. - driver.zen1SettingsUpdater = version6.UpdateZen1Discovery + //driver.zen1SettingsUpdater = version6.UpdateZen1Discovery case 6: //driver.expectedPodsAndResourcesResolver = version6.ExpectedPodSpecs - driver.zen1SettingsUpdater = version6.UpdateZen1Discovery + //driver.zen1SettingsUpdater = version6.UpdateZen1Discovery default: return nil, fmt.Errorf("unsupported version: %s", opts.Version) } diff --git a/operators/pkg/controller/elasticsearch/driver/esstate.go b/operators/pkg/controller/elasticsearch/driver/esstate.go index ad719d32f2..02a60e2736 100644 --- a/operators/pkg/controller/elasticsearch/driver/esstate.go +++ b/operators/pkg/controller/elasticsearch/driver/esstate.go @@ -1,9 +1,9 @@ -package driver - // Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one // or more contributor license agreements. Licensed under the Elastic License; // you may not use this file except in compliance with the Elastic License. +package driver + import ( "context" "sync" diff --git a/operators/pkg/controller/elasticsearch/driver/upgrade.go b/operators/pkg/controller/elasticsearch/driver/upgrade.go index 6002daed6f..f66fecae55 100644 --- a/operators/pkg/controller/elasticsearch/driver/upgrade.go +++ b/operators/pkg/controller/elasticsearch/driver/upgrade.go @@ -7,15 +7,17 @@ package driver import ( "context" + appsv1 "k8s.io/api/apps/v1" + corev1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/api/errors" + "k8s.io/apimachinery/pkg/types" + "github.com/elastic/cloud-on-k8s/operators/pkg/apis/elasticsearch/v1alpha1" "github.com/elastic/cloud-on-k8s/operators/pkg/controller/common/reconciler" esclient "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/client" + "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/label" "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/sset" "github.com/elastic/cloud-on-k8s/operators/pkg/utils/k8s" - appsv1 "k8s.io/api/apps/v1" - corev1 "k8s.io/api/core/v1" - "k8s.io/apimachinery/pkg/api/errors" - "k8s.io/apimachinery/pkg/types" ) func (d *defaultDriver) handleRollingUpgrades( @@ -47,14 +49,6 @@ func (d *defaultDriver) doRollingUpgrade( ) *reconciler.Results { results := &reconciler.Results{} - if !d.expectations.GenerationExpected(statefulSets.ObjectMetas()...) { - // Our cache of SatefulSets is out of date compared to previous reconciliation operations. - // It does not matter much here since operations are idempotent, but we might as well avoid - // useless operations that would end up in a resource update conflict anyway. - log.V(1).Info("StatefulSet cache out-of-date, re-queueing") - return results.WithResult(defaultRequeue) - } - if !statefulSets.RevisionUpdateScheduled() { // nothing to upgrade return results @@ -67,20 +61,26 @@ func (d *defaultDriver) doRollingUpgrade( // Instead of green health, we could look at shards status, taking into account nodes // we scheduled for a restart (maybe not restarted yet). - // TODO: don't upgrade more than 1 master concurrently (ok for now since we upgrade 1 node at a time anyway) - maxConcurrentUpgrades := 1 scheduledUpgrades := 0 + // Only update 1 master node at a time, for safety and zen settings convenience. + // This can slow down the upgrade, but the number of master nodes should be small anyway. + maxMasterNodeUpgrades := 1 + scheduledMasterNodeUpgrades := 0 + for i, statefulSet := range statefulSets { // Inspect each pod, starting from the highest ordinal, and decrement the partition to allow // pod upgrades to go through, controlled by the StatefulSet controller. for partition := sset.GetUpdatePartition(statefulSet); partition >= 0; partition-- { + if partition >= sset.Replicas(statefulSet) { + continue + } if scheduledUpgrades >= maxConcurrentUpgrades { return results.WithResult(defaultRequeue) } - if partition >= sset.Replicas(statefulSet) { - continue + if label.IsMasterNodeSet(statefulSet) && scheduledMasterNodeUpgrades >= maxMasterNodeUpgrades { + return results.WithResult(defaultRequeue) } // Do we need to upgrade that pod? @@ -112,11 +112,21 @@ func (d *defaultDriver) doRollingUpgrade( return results.WithResult(defaultRequeue) } - log.Info("Preparing cluster for node restart", "namespace", es.Namespace, "name", es.Name) + log.Info("Preparing cluster for node restart", "namespace", es.Namespace, "es_name", es.Name) if err := prepareClusterForNodeRestart(esClient, esState); err != nil { return results.WithError(err) } + if label.IsMasterNodeSet(statefulSet) { + scheduledMasterNodeUpgrades++ + // TODO if the node is a master: + // - zen1: update minimum_master_node to account for master node deletion. Otherwise upgrading a 2-masters + // cluster provokes downtime since m_m_n=2. + // Problem: how to prevent this to be reverted at the next reconciliation, before the pod gets deleted? + // - zen2: set voting config exclusions: same problem, this is not easy. But since we only delete + // one master at a time, maybe it's not required? + } + // Upgrade the pod. if err := d.upgradeStatefulSetPartition(&statefulSets[i], partition); err != nil { return results.WithError(err) @@ -131,8 +141,6 @@ func (d *defaultDriver) upgradeStatefulSetPartition( statefulSet *appsv1.StatefulSet, newPartition int32, ) error { - // TODO: zen1, zen2 - // Node can be removed, update the StatefulSet rollingUpdate.Partition ordinal. log.Info("Updating rollingUpdate.Partition", "namespace", statefulSet.Namespace, @@ -249,13 +257,6 @@ func (d *defaultDriver) MaybeEnableShardsAllocation( statefulSets sset.StatefulSetList, ) *reconciler.Results { results := &reconciler.Results{} - // Since we rely on sset rollingUpdate.Partition, requeue in case our cache hasn't seen a sset update yet. - // Otherwise we could re-enable shards allocation while a pod was just scheduled for termination, - // with the partition in the sset cache being outdated. - if !d.Expectations.GenerationExpected(statefulSets.ObjectMetas()...) { - return results.WithResult(defaultRequeue) - } - alreadyEnabled, err := esState.ShardAllocationsEnabled() if err != nil { return results.WithError(err) @@ -273,7 +274,7 @@ func (d *defaultDriver) MaybeEnableShardsAllocation( log.V(1).Info( "Rolling upgrade not over yet, some pods don't have the updated revision, keeping shard allocations disabled", "namespace", es.Namespace, - "name", es.Name, + "es_name", es.Name, ) return results.WithResult(defaultRequeue) } @@ -287,12 +288,12 @@ func (d *defaultDriver) MaybeEnableShardsAllocation( log.V(1).Info( "Some upgraded nodes are not back in the cluster yet, keeping shard allocations disabled", "namespace", es.Namespace, - "name", es.Name, + "es_name", es.Name, ) return results.WithResult(defaultRequeue) } - log.Info("Enabling shards allocation", "namespace", es.Namespace, "name", es.Name) + log.Info("Enabling shards allocation", "namespace", es.Namespace, "es_name", es.Name) ctx, cancel := context.WithTimeout(context.Background(), esclient.DefaultReqTimeout) defer cancel() if err := esClient.EnableShardAllocation(ctx); err != nil { diff --git a/operators/pkg/controller/elasticsearch/label/label.go b/operators/pkg/controller/elasticsearch/label/label.go index 5b57dff103..40dc4ebb01 100644 --- a/operators/pkg/controller/elasticsearch/label/label.go +++ b/operators/pkg/controller/elasticsearch/label/label.go @@ -12,6 +12,7 @@ import ( "github.com/elastic/cloud-on-k8s/operators/pkg/controller/common/version" "github.com/pkg/errors" + appsv1 "k8s.io/api/apps/v1" corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/labels" @@ -54,20 +55,24 @@ func IsMasterNode(pod corev1.Pod) bool { return NodeTypesMasterLabelName.HasValue(true, pod.Labels) } +func IsMasterNodeSet(statefulSet appsv1.StatefulSet) bool { + return NodeTypesMasterLabelName.HasValue(true, statefulSet.Spec.Template.Labels) +} + // IsDataNode returns true if the pod has the data node label func IsDataNode(pod corev1.Pod) bool { return NodeTypesDataLabelName.HasValue(true, pod.Labels) } -// ExtractVersion extracts the Elasticsearch version from a pod label. -func ExtractVersion(pod corev1.Pod) (*version.Version, error) { - labelValue, ok := pod.Labels[VersionLabelName] +// ExtractVersion extracts the Elasticsearch version from the given labels. +func ExtractVersion(labels map[string]string) (*version.Version, error) { + labelValue, ok := labels[VersionLabelName] if !ok { - return nil, fmt.Errorf("pod %s is missing the version label %s", pod.Name, VersionLabelName) + return nil, fmt.Errorf("version label %s is missing", VersionLabelName) } v, err := version.Parse(labelValue) if err != nil { - return nil, errors.Wrapf(err, "pod %s has an invalid version label", pod.Name) + return nil, errors.Wrapf(err, "version label %s is invalid: %s", VersionLabelName, labelValue) } return v, nil } diff --git a/operators/pkg/controller/elasticsearch/label/label_test.go b/operators/pkg/controller/elasticsearch/label/label_test.go index 819c0075f4..df0bcd50ed 100644 --- a/operators/pkg/controller/elasticsearch/label/label_test.go +++ b/operators/pkg/controller/elasticsearch/label/label_test.go @@ -16,7 +16,6 @@ import ( corev1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/api/meta" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - v1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/labels" "k8s.io/apimachinery/pkg/types" ) @@ -73,36 +72,28 @@ func TestNewLabelSelectorForElasticsearch(t *testing.T) { func TestExtractVersion(t *testing.T) { tests := []struct { name string - args corev1.Pod + args map[string]string want *version.Version wantErr bool }{ { name: "no version", - args: corev1.Pod{}, + args: nil, want: nil, wantErr: true, }, { name: "invalid version", - args: corev1.Pod{ - ObjectMeta: v1.ObjectMeta{ - Labels: map[string]string{ - VersionLabelName: "no a version", - }, - }, + args: map[string]string{ + VersionLabelName: "not a version", }, want: nil, wantErr: true, }, { name: "valid version", - args: corev1.Pod{ - ObjectMeta: v1.ObjectMeta{ - Labels: map[string]string{ - VersionLabelName: "1.0.0", - }, - }, + args: map[string]string{ + VersionLabelName: "1.0.0", }, want: &version.Version{ Major: 1, diff --git a/operators/pkg/controller/elasticsearch/nodespec/fixtures.go b/operators/pkg/controller/elasticsearch/nodespec/fixtures.go new file mode 100644 index 0000000000..e635d101e4 --- /dev/null +++ b/operators/pkg/controller/elasticsearch/nodespec/fixtures.go @@ -0,0 +1,33 @@ +// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +// or more contributor license agreements. Licensed under the Elastic License; +// you may not use this file except in compliance with the Elastic License. + +package nodespec + +import ( + "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/label" + appsv1 "k8s.io/api/apps/v1" + corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" +) + +func CreateTestSset(name string, esversion string, replicas int32, master bool, data bool) appsv1.StatefulSet { + labels := map[string]string{ + label.VersionLabelName: esversion, + } + label.NodeTypesMasterLabelName.Set(master, labels) + label.NodeTypesDataLabelName.Set(data, labels) + return appsv1.StatefulSet{ + ObjectMeta: metav1.ObjectMeta{ + Name: name, + }, + Spec: appsv1.StatefulSetSpec{ + Replicas: &replicas, + Template: corev1.PodTemplateSpec{ + ObjectMeta: metav1.ObjectMeta{ + Labels: labels, + }, + }, + }, + } +} diff --git a/operators/pkg/controller/elasticsearch/nodespec/resources.go b/operators/pkg/controller/elasticsearch/nodespec/resources.go index 847e98b73e..c62da54be6 100644 --- a/operators/pkg/controller/elasticsearch/nodespec/resources.go +++ b/operators/pkg/controller/elasticsearch/nodespec/resources.go @@ -10,6 +10,7 @@ import ( commonv1alpha1 "github.com/elastic/cloud-on-k8s/operators/pkg/apis/common/v1alpha1" "github.com/elastic/cloud-on-k8s/operators/pkg/apis/elasticsearch/v1alpha1" + "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/label" "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/settings" "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/sset" "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/version" @@ -60,5 +61,20 @@ func BuildExpectedResources(es v1alpha1.Elasticsearch, podTemplateBuilder versio Config: cfg, }) } + return nodesResources, nil } + +// MasterNodesNames returns the names of the master nodes for this ResourcesList. +func (l ResourcesList) MasterNodesNames() []string { + var masters []string + for _, s := range l.StatefulSets() { + if label.IsMasterNodeSet(s) { + for i := int32(0); i < sset.Replicas(s); i++ { + masters = append(masters, sset.PodName(s.Name, i)) + } + } + } + + return masters +} diff --git a/operators/pkg/controller/elasticsearch/nodespec/resources_test.go b/operators/pkg/controller/elasticsearch/nodespec/resources_test.go new file mode 100644 index 0000000000..72bcad4776 --- /dev/null +++ b/operators/pkg/controller/elasticsearch/nodespec/resources_test.go @@ -0,0 +1,43 @@ +// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +// or more contributor license agreements. Licensed under the Elastic License; +// you may not use this file except in compliance with the Elastic License. + +package nodespec + +import ( + "reflect" + "testing" +) + +func TestResourcesList_MasterNodesNames(t *testing.T) { + tests := []struct { + name string + l ResourcesList + want []string + }{ + { + name: "no nodes", + l: nil, + want: nil, + }, + { + name: "3 master-only nodes, 3 master-data nodes, 3 data nodes", + l: ResourcesList{ + {StatefulSet: CreateTestSset("master-only", "7.2.0", 3, true, false)}, + {StatefulSet: CreateTestSset("master-data", "7.2.0", 3, true, true)}, + {StatefulSet: CreateTestSset("data-only", "7.2.0", 3, false, true)}, + }, + want: []string{ + "master-only-0", "master-only-1", "master-only-2", + "master-data-0", "master-data-1", "master-data-2", + }, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + if got := tt.l.MasterNodesNames(); !reflect.DeepEqual(got, tt.want) { + t.Errorf("ResourcesList.MasterNodesNames() = %v, want %v", got, tt.want) + } + }) + } +} diff --git a/operators/pkg/controller/elasticsearch/sset/list.go b/operators/pkg/controller/elasticsearch/sset/list.go index 6eaedc8dba..0fd21839d6 100644 --- a/operators/pkg/controller/elasticsearch/sset/list.go +++ b/operators/pkg/controller/elasticsearch/sset/list.go @@ -5,14 +5,20 @@ package sset import ( - "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/label" - "github.com/elastic/cloud-on-k8s/operators/pkg/utils/k8s" appsv1 "k8s.io/api/apps/v1" + corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/types" "sigs.k8s.io/controller-runtime/pkg/client" + logf "sigs.k8s.io/controller-runtime/pkg/runtime/log" + + "github.com/elastic/cloud-on-k8s/operators/pkg/controller/common/version" + "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/label" + "github.com/elastic/cloud-on-k8s/operators/pkg/utils/k8s" ) +var log = logf.Log.WithName("statefulset") + type StatefulSetList []appsv1.StatefulSet // RetrieveActualStatefulSets returns the list of existing StatefulSets labeled for the given es cluster. @@ -61,6 +67,19 @@ func (l StatefulSetList) PodNames() []string { return names } +// GetActualPods returns the list of pods currently existing in the StatefulSetList. +func (l StatefulSetList) GetActualPods(c k8s.Client) ([]corev1.Pod, error) { + allPods := []corev1.Pod{} + for _, statefulSet := range l { + pods, err := GetActualPods(c, statefulSet) + if err != nil { + return nil, err + } + allPods = append(allPods, pods...) + } + return allPods, nil +} + // GetUpdatePartition returns the updateStrategy.Partition index, or falls back to the number of replicas if not set. func GetUpdatePartition(statefulSet appsv1.StatefulSet) int32 { if statefulSet.Spec.UpdateStrategy.RollingUpdate.Partition != nil { @@ -71,3 +90,25 @@ func GetUpdatePartition(statefulSet appsv1.StatefulSet) int32 { } return 0 } + +func ForStatefulSet(statefulSet appsv1.StatefulSet) (*version.Version, error) { + return label.ExtractVersion(statefulSet.Spec.Template.Labels) +} + +func ESVersionMatch(statefulSet appsv1.StatefulSet, condition func(v version.Version) bool) bool { + v, err := ForStatefulSet(statefulSet) + if err != nil || v == nil { + log.Error(err, "cannot parse version from StatefulSet", "namespace", statefulSet.Namespace, "name", statefulSet.Name) + return false + } + return condition(*v) +} + +func AtLeastOneESVersionMatch(statefulSets StatefulSetList, condition func(v version.Version) bool) bool { + for _, s := range statefulSets { + if ESVersionMatch(s, condition) { + return true + } + } + return false +} diff --git a/operators/pkg/controller/elasticsearch/sset/list_test.go b/operators/pkg/controller/elasticsearch/sset/list_test.go new file mode 100644 index 0000000000..6c7f508b31 --- /dev/null +++ b/operators/pkg/controller/elasticsearch/sset/list_test.go @@ -0,0 +1,88 @@ +// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +// or more contributor license agreements. Licensed under the Elastic License; +// you may not use this file except in compliance with the Elastic License. + +package sset + +import ( + "testing" + + "github.com/stretchr/testify/require" + appsv1 "k8s.io/api/apps/v1" + corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "sigs.k8s.io/controller-runtime/pkg/client/fake" + + "github.com/elastic/cloud-on-k8s/operators/pkg/controller/common/version" + "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/label" + "github.com/elastic/cloud-on-k8s/operators/pkg/utils/k8s" +) + +var ssetv7 = appsv1.StatefulSet{ + Spec: appsv1.StatefulSetSpec{ + Template: corev1.PodTemplateSpec{ + ObjectMeta: metav1.ObjectMeta{ + Labels: map[string]string{ + label.VersionLabelName: "7.1.0", + }, + }, + }, + }, +} + +func TestESVersionMatch(t *testing.T) { + require.Equal(t, true, + ESVersionMatch(ssetv7, func(v version.Version) bool { + return v.Major == 7 + }), + ) + require.Equal(t, false, + ESVersionMatch(ssetv7, func(v version.Version) bool { + return v.Major == 6 + }), + ) +} + +func TestAtLeastOneESVersionMatch(t *testing.T) { + ssetv6 := *ssetv7.DeepCopy() + ssetv6.Spec.Template.Labels[label.VersionLabelName] = "6.8.0" + + require.Equal(t, true, + AtLeastOneESVersionMatch(StatefulSetList{ssetv6, ssetv7}, func(v version.Version) bool { + return v.Major == 7 + }), + ) + require.Equal(t, false, + AtLeastOneESVersionMatch(StatefulSetList{ssetv6, ssetv6}, func(v version.Version) bool { + return v.Major == 7 + }), + ) +} + +func TestStatefulSetList_GetExistingPods(t *testing.T) { + // 2 pods that belong to the sset + pod1 := corev1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "pod1", + Labels: map[string]string{ + label.StatefulSetNameLabelName: ssetv7.Name, + }, + }, + } + pod2 := corev1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "pod2", + Labels: map[string]string{ + label.StatefulSetNameLabelName: ssetv7.Name, + }, + }, + } + client := k8s.WrapClient(fake.NewFakeClient(&pod1, &pod2)) + pods, err := StatefulSetList{ssetv7}.GetActualPods(client) + require.NoError(t, err) + require.Equal(t, []corev1.Pod{pod1, pod2}, pods) + // TODO: test with an additional pod that does not belong to the sset and + // check it is not returned. + // This cannot be done currently since the fake client does not support label list options. + // See https://github.com/kubernetes-sigs/controller-runtime/pull/311 +} diff --git a/operators/pkg/controller/elasticsearch/sset/pod.go b/operators/pkg/controller/elasticsearch/sset/pod.go index bd6ec6fe64..ceca255ca8 100644 --- a/operators/pkg/controller/elasticsearch/sset/pod.go +++ b/operators/pkg/controller/elasticsearch/sset/pod.go @@ -10,8 +10,11 @@ import ( appsv1 "k8s.io/api/apps/v1" corev1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/api/errors" + "k8s.io/apimachinery/pkg/labels" "k8s.io/apimachinery/pkg/types" + "sigs.k8s.io/controller-runtime/pkg/client" + "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/label" "github.com/elastic/cloud-on-k8s/operators/pkg/utils/k8s" ) @@ -31,6 +34,33 @@ func PodRevision(pod corev1.Pod) string { return pod.Labels[appsv1.StatefulSetRevisionLabel] } +// GetActualPods return the existing pods associated to this StatefulSet. +// The returned pods may not match the expected StatefulSet replicas in a transient situation. +func GetActualPods(c k8s.Client, sset appsv1.StatefulSet) ([]corev1.Pod, error) { + var pods corev1.PodList + if err := c.List(&client.ListOptions{ + Namespace: sset.Namespace, + LabelSelector: labels.SelectorFromSet(map[string]string{ + label.StatefulSetNameLabelName: sset.Name, + }), + }, &pods); err != nil { + return nil, err + } + return pods.Items, nil +} + +func GetActualPodsNames(c k8s.Client, sset appsv1.StatefulSet) ([]string, error) { + actualPods, err := GetActualPods(c, sset) + if err != nil { + return nil, err + } + names := make([]string, 0, len(actualPods)) + for _, p := range actualPods { + names = append(names, p.Name) + } + return names, nil +} + // ScheduledUpgradesDone returns true if all pods scheduled for upgrade have been upgraded. // This is done by checking the revision of pods whose ordinal is higher or equal than the StatefulSet // rollingUpdate.Partition index. diff --git a/operators/pkg/controller/elasticsearch/version/running_versions.go b/operators/pkg/controller/elasticsearch/version/running_versions.go index 6a4f0bbdf4..ddfee435b3 100644 --- a/operators/pkg/controller/elasticsearch/version/running_versions.go +++ b/operators/pkg/controller/elasticsearch/version/running_versions.go @@ -5,16 +5,17 @@ package version import ( + corev1 "k8s.io/api/core/v1" + "github.com/elastic/cloud-on-k8s/operators/pkg/controller/common/version" "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/label" - corev1 "k8s.io/api/core/v1" ) // MinVersion extracts the currently running Elasticsearch versions from the running pods func MinVersion(pods []corev1.Pod) (*version.Version, error) { var vs []version.Version for _, pod := range pods { - v, err := label.ExtractVersion(pod) + v, err := label.ExtractVersion(pod.Labels) if err != nil { return nil, err } diff --git a/operators/pkg/controller/elasticsearch/version/supported_versions.go b/operators/pkg/controller/elasticsearch/version/supported_versions.go index e458cb4acb..f2db7e0f5e 100644 --- a/operators/pkg/controller/elasticsearch/version/supported_versions.go +++ b/operators/pkg/controller/elasticsearch/version/supported_versions.go @@ -25,7 +25,7 @@ func (lh LowestHighestSupportedVersions) VerifySupportsExistingPods( pods []corev1.Pod, ) error { for _, pod := range pods { - v, err := label.ExtractVersion(pod) + v, err := label.ExtractVersion(pod.Labels) if err != nil { return err } diff --git a/operators/pkg/controller/elasticsearch/version/version6/zen1_test.go b/operators/pkg/controller/elasticsearch/version/version6/zen1_test.go deleted file mode 100644 index d4bf96a861..0000000000 --- a/operators/pkg/controller/elasticsearch/version/version6/zen1_test.go +++ /dev/null @@ -1,214 +0,0 @@ -// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one -// or more contributor license agreements. Licensed under the Elastic License; -// you may not use this file except in compliance with the Elastic License. - -package version6 - -import ( - "bytes" - "io" - "io/ioutil" - "net/http" - "strconv" - "testing" - - "github.com/elastic/cloud-on-k8s/operators/pkg/apis/elasticsearch/v1alpha1" - common "github.com/elastic/cloud-on-k8s/operators/pkg/controller/common/settings" - "github.com/elastic/cloud-on-k8s/operators/pkg/controller/common/version" - "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/client" - "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/label" - "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/mutation" - "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/pod" - "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/reconcile" - "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/settings" - "github.com/elastic/cloud-on-k8s/operators/pkg/utils/k8s" - "github.com/stretchr/testify/assert" - corev1 "k8s.io/api/core/v1" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "k8s.io/apimachinery/pkg/runtime" - "k8s.io/client-go/kubernetes/scheme" - "sigs.k8s.io/controller-runtime/pkg/client/fake" -) - -func fakeEsClient(raiseError bool) client.Client { - return client.NewMockClient(version.MustParse("6.7.0"), func(req *http.Request) *http.Response { - var statusCode int - var respBody io.ReadCloser - - if raiseError { - respBody = ioutil.NopCloser(bytes.NewBufferString("KO")) - statusCode = 400 - } else { - respBody = ioutil.NopCloser(bytes.NewBufferString("OK")) - statusCode = 200 - } - - return &http.Response{ - StatusCode: statusCode, - Body: respBody, - Header: make(http.Header), - Request: req, - } - }) -} - -func newMasterPod(name, namespace, ssetName string) corev1.Pod { - pod := corev1.Pod{ - ObjectMeta: metav1.ObjectMeta{ - Name: name, - Namespace: namespace, - Labels: map[string]string{ - string(label.NodeTypesMasterLabelName): strconv.FormatBool(true), - label.StatefulSetNameLabelName: ssetName, - }, - }, - Status: corev1.PodStatus{ - Conditions: []corev1.PodCondition{ - { - Type: corev1.ContainersReady, - Status: corev1.ConditionTrue, - }, - { - Type: corev1.ContainersReady, - Status: corev1.ConditionTrue, - }, - }, - }, - } - return pod -} - -func ssetConfig(namespace, ssetName string) *corev1.Secret { - return &corev1.Secret{ - ObjectMeta: metav1.ObjectMeta{ - Namespace: namespace, - Name: settings.ConfigSecretName(ssetName), - }, - Data: map[string][]byte{ - settings.ConfigFileName: []byte("a: b\nc: d\n"), - }, - } -} -func setupScheme(t *testing.T) *runtime.Scheme { - sc := scheme.Scheme - if err := v1alpha1.SchemeBuilder.AddToScheme(sc); err != nil { - assert.Fail(t, "failed to add Es types") - } - return sc -} - -func TestUpdateZen1Discovery(t *testing.T) { - s := setupScheme(t) - cluster := v1alpha1.Elasticsearch{ - ObjectMeta: metav1.ObjectMeta{ - Namespace: "ns1", - Name: "es1", - }, - } - ssetName := "master-nodes" - type args struct { - cluster v1alpha1.Elasticsearch - c k8s.Client - esClient client.Client - allPods []corev1.Pod - performableChanges *mutation.PerformableChanges - state *reconcile.State - } - tests := []struct { - args args - name string - expectedMinimumMasterNode string - want bool - wantErr bool - }{ - { - name: "Update a one master node cluster", - args: args{ - esClient: fakeEsClient(true), // second master is not created, raise an error if API is called - c: k8s.WrapClient(fake.NewFakeClientWithScheme(s, ssetConfig("ns1", ssetName))), - performableChanges: &mutation.PerformableChanges{ - Changes: mutation.Changes{ - ToCreate: []mutation.PodToCreate{ - { - Pod: newMasterPod("master2", "ns1", ssetName), - PodSpecCtx: pod.PodSpecContext{ - Config: settings.CanonicalConfig{CanonicalConfig: common.NewCanonicalConfig()}, - }, - }, - }, - }, - }, - allPods: []corev1.Pod{ - newMasterPod("master1", "ns1", ssetName), - }, - state: reconcile.NewState(cluster), - }, - want: true, - wantErr: false, - expectedMinimumMasterNode: "2", - }, - { - name: "Add a master to a four master node cluster", - args: args{ - esClient: fakeEsClient(false), // a majority of master is available, call the API - c: k8s.WrapClient(fake.NewFakeClientWithScheme( - s, - ssetConfig("ns1", ssetName), - ), - ), - performableChanges: &mutation.PerformableChanges{ - Changes: mutation.Changes{ - ToCreate: []mutation.PodToCreate{ - { - Pod: newMasterPod("master5", "ns1", ssetName), - PodSpecCtx: pod.PodSpecContext{ - Config: settings.CanonicalConfig{CanonicalConfig: common.NewCanonicalConfig()}, - }, - }, - }, - }, - }, - allPods: []corev1.Pod{ - newMasterPod("master1", "ns1", ssetName), - newMasterPod("master2", "ns1", ssetName), - newMasterPod("master3", "ns1", ssetName), - newMasterPod("master4", "ns1", ssetName), - }, - state: reconcile.NewState(cluster), - }, - want: false, // mmn should also be updated with the API - wantErr: false, - expectedMinimumMasterNode: "3", - }, - } - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - got, err := UpdateZen1Discovery( - tt.args.cluster, - tt.args.c, - tt.args.esClient, - tt.args.allPods, - tt.args.performableChanges, - tt.args.state, - ) - if (err != nil) != tt.wantErr { - t.Errorf("UpdateZen1Discovery() error = %v, wantErr %v", err, tt.wantErr) - return - } - if got != tt.want { - t.Errorf("UpdateZen1Discovery() = %v, want %v", got, tt.want) - } - // Check the mmn in the new pods - for _, newPod := range tt.args.performableChanges.ToCreate { - expectedConfiguration := - common.MustNewSingleValue(settings.DiscoveryZenMinimumMasterNodes, tt.expectedMinimumMasterNode) - if diff := newPod.PodSpecCtx.Config.Diff(expectedConfiguration, nil); diff != nil { - t.Errorf("zen1.UpdateZen1Discovery() = %v, want %v", diff, tt.want) - } - } - if !tt.want { // requeue not returned: it means that minimum_master_nodes should be saved in status - assert.Equal(t, tt.expectedMinimumMasterNode, strconv.Itoa(tt.args.state.GetZen1MinimumMasterNodes())) - } - }) - } -} diff --git a/operators/pkg/controller/elasticsearch/version/version7/initial_master_nodes.go b/operators/pkg/controller/elasticsearch/version/version7/initial_master_nodes.go deleted file mode 100644 index e0cf320bcc..0000000000 --- a/operators/pkg/controller/elasticsearch/version/version7/initial_master_nodes.go +++ /dev/null @@ -1,83 +0,0 @@ -// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one -// or more contributor license agreements. Licensed under the Elastic License; -// you may not use this file except in compliance with the Elastic License. - -package version7 - -import ( - "github.com/elastic/cloud-on-k8s/operators/pkg/apis/elasticsearch/v1alpha1" - "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/label" - "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/mutation" - "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/observer" - "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/reconcile" - "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/settings" - "github.com/elastic/cloud-on-k8s/operators/pkg/utils/k8s" -) - -const ( - // ClusterUUIDAnnotationName used to store the cluster UUID as an annotation when cluster has been bootstrapped. - ClusterUUIDAnnotationName = "elasticsearch.k8s.elastic.co/cluster-uuid" -) - -// ClusterInitialMasterNodesEnforcer enforces that cluster.initial_master_nodes is set if the cluster is bootstrapping. -// It's also save the cluster UUID as an annotation to ensure that it's not set if the cluster has already been bootstrapped. -func ClusterInitialMasterNodesEnforcer( - cluster v1alpha1.Elasticsearch, - observedState observer.State, - c k8s.Client, - performableChanges mutation.PerformableChanges, - resourcesState reconcile.ResourcesState, -) (*mutation.PerformableChanges, error) { - - // Check if the cluster has an UUID, if not try to fetch it from the observer state and store it as an annotation. - _, ok := cluster.Annotations[ClusterUUIDAnnotationName] - if ok { - // existence of the annotation shows that the cluster has been bootstrapped - return &performableChanges, nil - } - - // no annotation, let see if the cluster has been bootstrapped by looking at it's UUID - if observedState.ClusterState != nil && len(observedState.ClusterState.ClusterUUID) > 0 { - // UUID is set, let's update the annotation on the Elasticsearch object - if cluster.Annotations == nil { - cluster.Annotations = make(map[string]string) - } - cluster.Annotations[ClusterUUIDAnnotationName] = observedState.ClusterState.ClusterUUID - if err := c.Update(&cluster); err != nil { - return nil, err - } - return &performableChanges, nil - } - - var masterEligibleNodeNames []string - for _, pod := range resourcesState.CurrentPods { - if label.IsMasterNode(pod.Pod) { - masterEligibleNodeNames = append(masterEligibleNodeNames, pod.Pod.Name) - } - } - - // collect the master eligible node names from the pods we're about to create - for _, change := range performableChanges.ToCreate { - if label.IsMasterNode(change.Pod) { - masterEligibleNodeNames = append(masterEligibleNodeNames, change.Pod.Name) - } - } - - // make every master node in the cluster aware of the others: - for i, change := range performableChanges.ToCreate { - if !label.IsMasterNode(change.Pod) { - // we only need to set this on master nodes - continue - } - - err := performableChanges.ToCreate[i].PodSpecCtx.Config.SetStrings( - settings.ClusterInitialMasterNodes, - masterEligibleNodeNames..., - ) - if err != nil { - return nil, err - } - } - - return &performableChanges, nil -} diff --git a/operators/pkg/controller/elasticsearch/version/version7/initial_master_nodes_test.go b/operators/pkg/controller/elasticsearch/version/version7/initial_master_nodes_test.go deleted file mode 100644 index 21ef4e75b3..0000000000 --- a/operators/pkg/controller/elasticsearch/version/version7/initial_master_nodes_test.go +++ /dev/null @@ -1,261 +0,0 @@ -// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one -// or more contributor license agreements. Licensed under the Elastic License; -// you may not use this file except in compliance with the Elastic License. - -package version7 - -import ( - "testing" - - "github.com/elastic/cloud-on-k8s/operators/pkg/apis/elasticsearch/v1alpha1" - common "github.com/elastic/cloud-on-k8s/operators/pkg/controller/common/settings" - esclient "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/client" - "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/label" - "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/mutation" - "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/observer" - "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/pod" - "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/reconcile" - "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/settings" - "github.com/elastic/cloud-on-k8s/operators/pkg/utils/k8s" - "github.com/stretchr/testify/assert" - "github.com/stretchr/testify/require" - corev1 "k8s.io/api/core/v1" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "k8s.io/apimachinery/pkg/runtime" - "k8s.io/apimachinery/pkg/types" - "k8s.io/client-go/kubernetes/scheme" - "sigs.k8s.io/controller-runtime/pkg/client/fake" -) - -const ( - defaultClusterUUID = "jiMyMA1hQ-WMPK3vEStZuw" -) - -func setupScheme(t *testing.T) *runtime.Scheme { - sc := scheme.Scheme - if err := v1alpha1.SchemeBuilder.AddToScheme(sc); err != nil { - assert.Fail(t, "failed to add Es types") - } - return sc -} - -var esNN = types.NamespacedName{ - Namespace: "ns1", - Name: "foo", -} - -func newElasticsearch() *v1alpha1.Elasticsearch { - return &v1alpha1.Elasticsearch{ - ObjectMeta: metav1.ObjectMeta{ - Namespace: esNN.Namespace, - Name: esNN.Name, - }, - } -} - -func withAnnotation(es *v1alpha1.Elasticsearch, name, value string) *v1alpha1.Elasticsearch { - if es.Annotations == nil { - es.Annotations = make(map[string]string) - } - es.Annotations[name] = value - return es -} - -// newPod creates a new named potentially labeled as master -func newPod(name string, master bool) pod.PodWithConfig { - p := corev1.Pod{ - ObjectMeta: metav1.ObjectMeta{ - Name: name, - Labels: make(map[string]string), - }, - Spec: corev1.PodSpec{ - Containers: []corev1.Container{{}}, - }, - } - - label.NodeTypesMasterLabelName.Set(master, p.Labels) - - return pod.PodWithConfig{Pod: p, Config: settings.CanonicalConfig{CanonicalConfig: common.NewCanonicalConfig()}} -} - -func assertInitialMasterNodes(t *testing.T, changes *mutation.PerformableChanges, shouldBeSet bool, nodeNames ...string) { - for _, change := range changes.ToCreate { - cfg, err := change.PodSpecCtx.Config.Unpack() - require.NoError(t, err) - nodes := cfg.Cluster.InitialMasterNodes - if !label.IsMasterNode(change.Pod) { - require.Nil(t, nodes) - } else if !shouldBeSet { - require.Nil(t, nodes) - } else { - require.NotNil(t, nodes) - require.Equal(t, nodeNames, nodes) - } - } -} - -func TestClusterInitialMasterNodesEnforcer(t *testing.T) { - s := setupScheme(t) - type args struct { - cluster *v1alpha1.Elasticsearch - clusterState observer.State - performableChanges mutation.PerformableChanges - resourcesState reconcile.ResourcesState - } - tests := []struct { - name string - args args - assertions func(t *testing.T, changes *mutation.PerformableChanges) - wantClusterUUIDAnnotation bool - wantErr bool - }{ - { - name: "not set when likely already bootstrapped", - args: args{ - cluster: withAnnotation(newElasticsearch(), ClusterUUIDAnnotationName, defaultClusterUUID), - clusterState: observer.State{ - ClusterState: &esclient.ClusterState{ - ClusterUUID: defaultClusterUUID, - }, - }, - performableChanges: mutation.PerformableChanges{ - Changes: mutation.Changes{ - ToCreate: []mutation.PodToCreate{{ - Pod: newPod("b", true).Pod, - PodSpecCtx: pod.PodSpecContext{ - Config: settings.CanonicalConfig{CanonicalConfig: common.NewCanonicalConfig()}, - }, - }}, - }, - }, - resourcesState: reconcile.ResourcesState{ - CurrentPods: pod.PodsWithConfig{newPod("a", true)}, - }, - }, - assertions: func(t *testing.T, changes *mutation.PerformableChanges) { - assertInitialMasterNodes(t, changes, false) - }, - wantClusterUUIDAnnotation: true, - }, - { - name: "set when likely not bootstrapped", - args: args{ - cluster: newElasticsearch(), - clusterState: observer.State{}, - performableChanges: mutation.PerformableChanges{ - Changes: mutation.Changes{ - ToCreate: []mutation.PodToCreate{{ - Pod: newPod("b", true).Pod, - PodSpecCtx: pod.PodSpecContext{ - Config: settings.CanonicalConfig{CanonicalConfig: common.NewCanonicalConfig()}, - }, - }}, - }, - }, - resourcesState: reconcile.ResourcesState{ - CurrentPods: pod.PodsWithConfig{newPod("a", false)}, - }, - }, - assertions: func(t *testing.T, changes *mutation.PerformableChanges) { - assertInitialMasterNodes(t, changes, true, "b") - }, - }, - { - name: "just been bootstrapped, annotation should be set", - args: args{ - cluster: newElasticsearch(), - clusterState: observer.State{ - ClusterState: &esclient.ClusterState{ - ClusterUUID: defaultClusterUUID, - }, - }, - performableChanges: mutation.PerformableChanges{ - Changes: mutation.Changes{ - ToCreate: []mutation.PodToCreate{{ - Pod: newPod("b", true).Pod, - PodSpecCtx: pod.PodSpecContext{ - Config: settings.CanonicalConfig{CanonicalConfig: common.NewCanonicalConfig()}, - }, - }}, - }, - }, - resourcesState: reconcile.ResourcesState{ - CurrentPods: pod.PodsWithConfig{newPod("a", true)}, - }, - }, - assertions: func(t *testing.T, changes *mutation.PerformableChanges) { - assertInitialMasterNodes(t, changes, false) - }, - wantClusterUUIDAnnotation: true, - }, - { - name: "all masters are informed of all masters", - args: args{ - cluster: newElasticsearch(), - performableChanges: mutation.PerformableChanges{ - Changes: mutation.Changes{ - ToCreate: []mutation.PodToCreate{ - { - Pod: newPod("b", true).Pod, - PodSpecCtx: pod.PodSpecContext{ - Config: settings.CanonicalConfig{CanonicalConfig: common.NewCanonicalConfig()}, - }, - }, - { - Pod: newPod("c", true).Pod, - PodSpecCtx: pod.PodSpecContext{ - Config: settings.CanonicalConfig{CanonicalConfig: common.NewCanonicalConfig()}, - }, - }, - { - Pod: newPod("d", true).Pod, - PodSpecCtx: pod.PodSpecContext{ - Config: settings.CanonicalConfig{CanonicalConfig: common.NewCanonicalConfig()}, - }, - }, - { - Pod: newPod("e", true).Pod, - PodSpecCtx: pod.PodSpecContext{ - Config: settings.CanonicalConfig{CanonicalConfig: common.NewCanonicalConfig()}, - }, - }, - // f is not master, so masters should not be informed of it - { - Pod: newPod("f", false).Pod, - PodSpecCtx: pod.PodSpecContext{ - Config: settings.CanonicalConfig{CanonicalConfig: common.NewCanonicalConfig()}, - }, - }, - }, - }, - }, - }, - assertions: func(t *testing.T, changes *mutation.PerformableChanges) { - assertInitialMasterNodes(t, changes, true, "b", "c", "d", "e") - }, - }, - } - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - client := k8s.WrapClient(fake.NewFakeClientWithScheme(s, tt.args.cluster)) - got, err := ClusterInitialMasterNodesEnforcer( - *tt.args.cluster, - tt.args.clusterState, - client, - tt.args.performableChanges, - tt.args.resourcesState, - ) - if (err != nil) != tt.wantErr { - t.Errorf("ClusterInitialMasterNodesEnforcer() error = %v, wantErr %v", err, tt.wantErr) - return - } - var es v1alpha1.Elasticsearch - err = client.Get(esNN, &es) - assert.NoError(t, err) - annotation := es.Annotations != nil && len(es.Annotations[ClusterUUIDAnnotationName]) > 0 - assert.Equal(t, tt.wantClusterUUIDAnnotation, annotation) - - tt.assertions(t, got) - }) - } -} diff --git a/operators/pkg/controller/elasticsearch/version/version7/zen2.go b/operators/pkg/controller/elasticsearch/version/version7/zen2.go deleted file mode 100644 index b38251b1e5..0000000000 --- a/operators/pkg/controller/elasticsearch/version/version7/zen2.go +++ /dev/null @@ -1,58 +0,0 @@ -// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one -// or more contributor license agreements. Licensed under the Elastic License; -// you may not use this file except in compliance with the Elastic License. - -package version7 - -import ( - "context" - - "github.com/elastic/cloud-on-k8s/operators/pkg/controller/common/version" - esclient "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/client" - "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/label" - "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/mutation" - - logf "sigs.k8s.io/controller-runtime/pkg/runtime/log" -) - -var ( - log = logf.Log.WithName("version7") -) - -func UpdateZen2Settings( - esClient esclient.Client, - minVersion version.Version, - changes mutation.Changes, - performableChanges mutation.PerformableChanges, -) error { - if !minVersion.IsSameOrAfter(version.MustParse("7.0.0")) { - log.Info("not setting zen2 exclusions", "min version in cluster", minVersion) - return nil - } - if !changes.HasChanges() { - log.Info("Ensuring no voting exclusions are set") - ctx, cancel := context.WithTimeout(context.Background(), esclient.DefaultReqTimeout) - defer cancel() - if err := esClient.DeleteVotingConfigExclusions(ctx, false); err != nil { - return err - } - return nil - } - - leavingMasters := make([]string, 0) - for _, pod := range performableChanges.ToDelete { - if label.IsMasterNode(pod.Pod) { - leavingMasters = append(leavingMasters, pod.Pod.Name) - } - } - if len(leavingMasters) != 0 { - // TODO: only update if required and remove old exclusions as well - log.Info("Setting voting config exclusions", "excluding", leavingMasters) - ctx, cancel := context.WithTimeout(context.Background(), esclient.DefaultReqTimeout) - defer cancel() - if err := esClient.AddVotingConfigExclusions(ctx, leavingMasters, ""); err != nil { - return err - } - } - return nil -} diff --git a/operators/pkg/controller/elasticsearch/version/version7/zen2_test.go b/operators/pkg/controller/elasticsearch/version/version7/zen2_test.go deleted file mode 100644 index fcf4e9c73a..0000000000 --- a/operators/pkg/controller/elasticsearch/version/version7/zen2_test.go +++ /dev/null @@ -1,131 +0,0 @@ -// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one -// or more contributor license agreements. Licensed under the Elastic License; -// you may not use this file except in compliance with the Elastic License. - -package version7 - -import ( - "bytes" - "io" - "io/ioutil" - "net/http" - "strings" - "testing" - - "github.com/elastic/cloud-on-k8s/operators/pkg/controller/common/version" - esclient "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/client" - "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/label" - "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/mutation" - "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/pod" - corev1 "k8s.io/api/core/v1" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" -) - -func fakeEsClient(method string) esclient.Client { - return esclient.NewMockClient(version.MustParse("7.0.0"), func(req *http.Request) *http.Response { - var statusCode int - var respBody io.ReadCloser - - if strings.Contains(req.URL.RequestURI(), "/_cluster/voting_config_exclusions") && - req.Method == method { - respBody = ioutil.NopCloser(bytes.NewBufferString("OK")) - statusCode = 200 - - } else { - respBody = ioutil.NopCloser(bytes.NewBufferString("KO")) - statusCode = 400 - } - - return &http.Response{ - StatusCode: statusCode, - Body: respBody, - Header: make(http.Header), - Request: req, - } - }) -} - -func TestUpdateZen2Settings(t *testing.T) { - type args struct { - esClient esclient.Client - minVersion version.Version - changes mutation.Changes - performableChanges mutation.PerformableChanges - } - masterPodFixture := corev1.Pod{ - ObjectMeta: metav1.ObjectMeta{ - Labels: label.NodeTypesMasterLabelName.AsMap(true), - }, - } - tests := []struct { - name string - args args - wantErr bool - }{ - { - name: "Mixed clusters with pre-7.x.x nodes, don't use zen2 API", - args: args{ - esClient: fakeEsClient("No request expected"), - minVersion: version.MustParse("6.8.0"), - changes: mutation.Changes{ - ToCreate: nil, - ToKeep: nil, - ToDelete: []pod.PodWithConfig{ - {Pod: masterPodFixture}, - }, - }, - performableChanges: mutation.PerformableChanges{ - Changes: mutation.Changes{ - ToCreate: nil, - ToKeep: nil, - ToDelete: []pod.PodWithConfig{ - {Pod: masterPodFixture}, - }, - }, - }, - }, - wantErr: false, - }, - { - name: "No changes: delete voting exclusions", - args: args{ - esClient: fakeEsClient(http.MethodDelete), - minVersion: version.MustParse("7.0.0"), - changes: mutation.Changes{}, - performableChanges: mutation.PerformableChanges{}, - }, - wantErr: false, - }, - { - name: "Delete master: set voting exclusion", - args: args{ - esClient: fakeEsClient(http.MethodPost), - minVersion: version.MustParse("7.0.0"), - changes: mutation.Changes{ - ToCreate: nil, - ToKeep: nil, - ToDelete: []pod.PodWithConfig{ - {Pod: masterPodFixture}, - }, - }, - performableChanges: mutation.PerformableChanges{ - Changes: mutation.Changes{ - ToCreate: nil, - ToKeep: nil, - ToDelete: []pod.PodWithConfig{ - {Pod: masterPodFixture}, - }, - }, - }, - }, - wantErr: false, - }, - } - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - if err := UpdateZen2Settings(tt.args.esClient, tt.args.minVersion, tt.args.changes, tt.args.performableChanges); (err != nil) != tt.wantErr { - t.Errorf("UpdateZen2Settings() error = %v, wantErr %v", err, tt.wantErr) - } - }) - } -} diff --git a/operators/pkg/controller/elasticsearch/version/zen1/compatibility.go b/operators/pkg/controller/elasticsearch/version/zen1/compatibility.go new file mode 100644 index 0000000000..af745cc1d0 --- /dev/null +++ b/operators/pkg/controller/elasticsearch/version/zen1/compatibility.go @@ -0,0 +1,28 @@ +// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +// or more contributor license agreements. Licensed under the Elastic License; +// you may not use this file except in compliance with the Elastic License. + +package zen1 + +import ( + appsv1 "k8s.io/api/apps/v1" + + "github.com/elastic/cloud-on-k8s/operators/pkg/controller/common/version" + "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/sset" +) + +// zen1VersionMatch returns true if the given Elasticsearch version is compatible with zen1. +func zen1VersionMatch(v version.Version) bool { + return v.Major < 7 +} + +// IsCompatibleForZen1 returns true if the given StatefulSet is compatible with zen1. +func IsCompatibleForZen1(statefulSet appsv1.StatefulSet) bool { + return sset.ESVersionMatch(statefulSet, zen1VersionMatch) +} + +// AtLeastOneNodeCompatibleForZen1 returns true if the given StatefulSetList contains +// at least one StatefulSet compatible with zen1. +func AtLeastOneNodeCompatibleForZen1(statefulSets sset.StatefulSetList) bool { + return sset.AtLeastOneESVersionMatch(statefulSets, zen1VersionMatch) +} diff --git a/operators/pkg/controller/elasticsearch/version/zen1/compatibility_test.go b/operators/pkg/controller/elasticsearch/version/zen1/compatibility_test.go new file mode 100644 index 0000000000..d71fda9cf8 --- /dev/null +++ b/operators/pkg/controller/elasticsearch/version/zen1/compatibility_test.go @@ -0,0 +1,94 @@ +// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +// or more contributor license agreements. Licensed under the Elastic License; +// you may not use this file except in compliance with the Elastic License. + +package zen1 + +import ( + "testing" + + appsv1 "k8s.io/api/apps/v1" + corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + + "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/label" + "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/sset" +) + +func createStatefulSetWithVersion(version string) appsv1.StatefulSet { + return appsv1.StatefulSet{Spec: appsv1.StatefulSetSpec{Template: corev1.PodTemplateSpec{ + ObjectMeta: metav1.ObjectMeta{ + Labels: map[string]string{ + label.VersionLabelName: version, + }, + }, + }}} +} + +func TestIsCompatibleForZen1(t *testing.T) { + + tests := []struct { + name string + sset appsv1.StatefulSet + want bool + }{ + { + name: "version 6.8.0", + sset: createStatefulSetWithVersion("6.8.0"), + want: true, + }, + { + name: "version 7.0.0", + sset: createStatefulSetWithVersion("7.0.0"), + want: false, + }, + { + name: "no version", + sset: createStatefulSetWithVersion(""), + want: false, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + if got := IsCompatibleForZen1(tt.sset); got != tt.want { + t.Errorf("IsCompatibleForZen1() = %v, want %v", got, tt.want) + } + }) + } +} + +func TestAtLeastOneNodeCompatibleForZen1(t *testing.T) { + tests := []struct { + name string + statefulSets sset.StatefulSetList + want bool + }{ + { + name: "no sset", + statefulSets: nil, + want: false, + }, + { + name: "none compatible", + statefulSets: sset.StatefulSetList{createStatefulSetWithVersion("7.0.0"), createStatefulSetWithVersion("7.1.0")}, + want: false, + }, + { + name: "one compatible", + statefulSets: sset.StatefulSetList{createStatefulSetWithVersion("6.8.0"), createStatefulSetWithVersion("7.1.0")}, + want: true, + }, + { + name: "all compatible", + statefulSets: sset.StatefulSetList{createStatefulSetWithVersion("6.8.0"), createStatefulSetWithVersion("6.9.0")}, + want: true, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + if got := AtLeastOneNodeCompatibleForZen1(tt.statefulSets); got != tt.want { + t.Errorf("AtLeastOneNodeCompatibleForZen1() = %v, want %v", got, tt.want) + } + }) + } +} diff --git a/operators/pkg/controller/elasticsearch/version/version6/zen1.go b/operators/pkg/controller/elasticsearch/version/zen1/minimum_masters.go similarity index 50% rename from operators/pkg/controller/elasticsearch/version/version6/zen1.go rename to operators/pkg/controller/elasticsearch/version/zen1/minimum_masters.go index 1a9b5426d9..8a4bc63e8b 100644 --- a/operators/pkg/controller/elasticsearch/version/version6/zen1.go +++ b/operators/pkg/controller/elasticsearch/version/zen1/minimum_masters.go @@ -2,45 +2,73 @@ // or more contributor license agreements. Licensed under the Elastic License; // you may not use this file except in compliance with the Elastic License. -package version6 +package zen1 import ( "context" "strconv" + logf "sigs.k8s.io/controller-runtime/pkg/runtime/log" + "github.com/elastic/cloud-on-k8s/operators/pkg/apis/elasticsearch/v1alpha1" common "github.com/elastic/cloud-on-k8s/operators/pkg/controller/common/settings" "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/client" "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/label" - "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/mutation" + "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/nodespec" "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/reconcile" "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/settings" + "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/sset" "github.com/elastic/cloud-on-k8s/operators/pkg/utils/k8s" - corev1 "k8s.io/api/core/v1" - logf "sigs.k8s.io/controller-runtime/pkg/runtime/log" ) var ( - log = logf.Log.WithName("version") + log = logf.Log.WithName("zen1") ) -// UpdateZen1Discovery updates the secret that contains the configuration of the nodes with the expected value -// of discovery.zen.minimum_master_nodes. It also attempts to update this specific setting for the already existing nodes -// through the API. If an update can't be done immediately (e.g. because some master nodes are not created yet) then the -// function returns true in order to notify the caller that a new reconciliation loop should be triggered to try again later. -func UpdateZen1Discovery( - cluster v1alpha1.Elasticsearch, +// SetupMinimumMasterNodesConfig modifies the ES config of the given resources to setup +// zen1 minimum master nodes. +func SetupMinimumMasterNodesConfig(nodeSpecResources nodespec.ResourcesList) error { + masters := nodeSpecResources.MasterNodesNames() + quorum := settings.Quorum(len(masters)) + for i, res := range nodeSpecResources { + if !IsCompatibleForZen1(res.StatefulSet) { + continue + } + // patch config with the expected minimum master nodes + if err := nodeSpecResources[i].Config.MergeWith( + common.MustNewSingleValue( + settings.DiscoveryZenMinimumMasterNodes, + strconv.Itoa(quorum), + ), + ); err != nil { + return err + } + } + return nil +} + +// UpdateMinimumMasterNodes calls the ES API to update the minimum_master_nodes setting if required. +// It returns true if this should be retried later (re-queued). +func UpdateMinimumMasterNodes( c k8s.Client, + es v1alpha1.Elasticsearch, esClient client.Client, - allPods []corev1.Pod, - performableChanges *mutation.PerformableChanges, + actualStatefulSets sset.StatefulSetList, reconcileState *reconcile.State, ) (bool, error) { + if !AtLeastOneNodeCompatibleForZen1(actualStatefulSets) { + // nothing to do + return false, nil + } + pods, err := actualStatefulSets.GetActualPods(c) + if err != nil { + return false, err + } // Get current master nodes count currentMasterCount := 0 // Among them get the ones that are ready currentAvailableMasterCount := 0 - for _, p := range allPods { + for _, p := range pods { if label.IsMasterNode(p) { currentMasterCount++ if k8s.IsPodReady(p) { @@ -48,60 +76,9 @@ func UpdateZen1Discovery( } } } + minimumMasterNodes := settings.Quorum(currentMasterCount) - nextMasterCount := currentMasterCount - // Add masters that must be created by this reconciliation loop - for _, pod := range performableChanges.ToCreate.Pods() { - if label.IsMasterNode(pod) { - nextMasterCount++ - } - } - - minimumMasterNodes := settings.Quorum(nextMasterCount) - // Update the current value in the configuration of existing pods - log.V(1).Info("Set minimum master nodes", - "how", "configuration", - "namespace", cluster.Namespace, - "es_name", cluster.Name, - "currentMasterCount", currentMasterCount, - "nextMasterCount", nextMasterCount, - "minimum_master_nodes", minimumMasterNodes, - ) - for _, p := range allPods { - config, err := settings.GetESConfigContent(c, p.Namespace, p.Labels[label.StatefulSetNameLabelName]) - if err != nil { - return false, err - } - err = config.MergeWith( - common.MustNewSingleValue( - settings.DiscoveryZenMinimumMasterNodes, - strconv.Itoa(minimumMasterNodes), - ), - ) - if err != nil { - return false, err - } - // TODO: fix for sset - //if err := settings.ReconcileConfig(c, cluster, p, config); err != nil { - // return false, err - //} - } - - // Update the current value for each new pod that is about to be created - for _, change := range performableChanges.ToCreate { - // Update the minimum_master_nodes before pod creation in order to avoid split brain situation. - err := change.PodSpecCtx.Config.MergeWith( - common.MustNewSingleValue( - settings.DiscoveryZenMinimumMasterNodes, - strconv.Itoa(minimumMasterNodes), - ), - ) - if err != nil { - return false, err - } - } - - // Check if we really need to update minimum_master_nodes with a API call + // Check if we really need to update minimum_master_nodes with an API call if minimumMasterNodes == reconcileState.GetZen1MinimumMasterNodes() { return false, nil } @@ -110,10 +87,10 @@ func UpdateZen1Discovery( if currentAvailableMasterCount < minimumMasterNodes { // This is expected to happen from time to time log.V(1).Info("Not enough masters to update the API", - "namespace", cluster.Namespace, - "es_name", cluster.Name, + "namespace", es.Namespace, + "es_name", es.Name, "current", currentAvailableMasterCount, - "required", minimumMasterNodes) + "minimum_master_nodes", minimumMasterNodes) // We can't update the minimum master nodes right now, it is the case if a new master node is not created yet. // In that case we need to requeue later. return true, nil @@ -121,10 +98,9 @@ func UpdateZen1Discovery( log.Info("Updating minimum master nodes", "how", "api", - "namespace", cluster.Namespace, - "es_name", cluster.Name, - "currentMasterCount", currentMasterCount, - "nextMasterCount", nextMasterCount, + "namespace", es.Namespace, + "es_name", es.Name, + "current", currentMasterCount, "minimum_master_nodes", minimumMasterNodes, ) ctx, cancel := context.WithTimeout(context.Background(), client.DefaultReqTimeout) diff --git a/operators/pkg/controller/elasticsearch/version/zen1/minimum_masters_test.go b/operators/pkg/controller/elasticsearch/version/zen1/minimum_masters_test.go new file mode 100644 index 0000000000..0d11f06c0c --- /dev/null +++ b/operators/pkg/controller/elasticsearch/version/zen1/minimum_masters_test.go @@ -0,0 +1,200 @@ +// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +// or more contributor license agreements. Licensed under the Elastic License; +// you may not use this file except in compliance with the Elastic License. + +package zen1 + +import ( + "context" + "testing" + + "github.com/elastic/cloud-on-k8s/operators/pkg/apis/elasticsearch/v1alpha1" + settings2 "github.com/elastic/cloud-on-k8s/operators/pkg/controller/common/settings" + "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/client" + "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/label" + "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/nodespec" + "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/reconcile" + "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/settings" + "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/sset" + "github.com/elastic/cloud-on-k8s/operators/pkg/utils/k8s" + "github.com/stretchr/testify/require" + corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "sigs.k8s.io/controller-runtime/pkg/client/fake" +) + +func TestSetupMinimumMasterNodesConfig(t *testing.T) { + tests := []struct { + name string + nodeSpecResources nodespec.ResourcesList + expected []settings.CanonicalConfig + }{ + { + name: "no master nodes", + nodeSpecResources: nodespec.ResourcesList{ + {StatefulSet: nodespec.CreateTestSset("data", "7.1.0", 3, false, true), Config: settings.NewCanonicalConfig()}, + }, + expected: []settings.CanonicalConfig{settings.NewCanonicalConfig()}, + }, + { + name: "3 masters, 3 master+data, 3 data", + nodeSpecResources: nodespec.ResourcesList{ + {StatefulSet: nodespec.CreateTestSset("master", "6.8.0", 3, true, false), Config: settings.NewCanonicalConfig()}, + {StatefulSet: nodespec.CreateTestSset("masterdata", "6.8.0", 3, true, true), Config: settings.NewCanonicalConfig()}, + {StatefulSet: nodespec.CreateTestSset("data", "6.8.0", 3, false, true), Config: settings.NewCanonicalConfig()}, + }, + expected: []settings.CanonicalConfig{ + {CanonicalConfig: settings2.MustCanonicalConfig(map[string]string{ + settings.DiscoveryZenMinimumMasterNodes: "4", + })}, + {CanonicalConfig: settings2.MustCanonicalConfig(map[string]string{ + settings.DiscoveryZenMinimumMasterNodes: "4", + })}, + {CanonicalConfig: settings2.MustCanonicalConfig(map[string]string{ + settings.DiscoveryZenMinimumMasterNodes: "4", + })}, + }, + }, + { + name: "version 7: nothing should appear in the config", + nodeSpecResources: nodespec.ResourcesList{ + {StatefulSet: nodespec.CreateTestSset("master", "7.1.0", 3, true, false), Config: settings.NewCanonicalConfig()}, + }, + expected: []settings.CanonicalConfig{settings.NewCanonicalConfig()}, + }, + { + name: "mixed v6 & v7: include all masters but only in v6 configs", + nodeSpecResources: nodespec.ResourcesList{ + {StatefulSet: nodespec.CreateTestSset("masterv6", "6.8.0", 3, true, false), Config: settings.NewCanonicalConfig()}, + {StatefulSet: nodespec.CreateTestSset("masterv7", "7.1.0", 3, true, false), Config: settings.NewCanonicalConfig()}, + }, + expected: []settings.CanonicalConfig{ + {CanonicalConfig: settings2.MustCanonicalConfig(map[string]string{ + settings.DiscoveryZenMinimumMasterNodes: "4", + })}, + settings.NewCanonicalConfig(), + }, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + err := SetupMinimumMasterNodesConfig(tt.nodeSpecResources) + require.NoError(t, err) + for i := 0; i < len(tt.nodeSpecResources); i++ { + expected, err := tt.expected[i].Render() + require.NoError(t, err) + actual, err := tt.nodeSpecResources[i].Config.Render() + require.NoError(t, err) + require.Equal(t, expected, actual) + } + }) + } +} + +type fakeESClient struct { + called bool + calledWith int + client.Client +} + +func (f *fakeESClient) SetMinimumMasterNodes(ctx context.Context, count int) error { + f.called = true + f.calledWith = count + return nil +} + +func TestUpdateMinimumMasterNodes(t *testing.T) { + ssetSample := nodespec.CreateTestSset("nodes", "6.8.0", 3, true, true) + // simulate 3/3 pods ready + labels := map[string]string{ + label.StatefulSetNameLabelName: ssetSample.Name, + } + label.NodeTypesMasterLabelName.Set(true, labels) + label.NodeTypesDataLabelName.Set(true, labels) + podsReady3 := make([]corev1.Pod, 0, 3) + for _, podName := range sset.PodNames(ssetSample) { + podsReady3 = append(podsReady3, corev1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Namespace: ssetSample.Namespace, + Name: podName, + Labels: labels, + }, + Status: corev1.PodStatus{ + Conditions: []corev1.PodCondition{ + { + Status: corev1.ConditionTrue, + Type: corev1.ContainersReady, + }, + { + Status: corev1.ConditionTrue, + Type: corev1.PodReady, + }, + }, + }, + }) + } + // simulate 1/3 pods ready + podsReady1 := make([]corev1.Pod, 3) + podsReady1[0] = *podsReady3[0].DeepCopy() + podsReady1[0].Status.Conditions[0].Status = corev1.ConditionFalse + podsReady1[1] = *podsReady3[1].DeepCopy() + podsReady1[1].Status.Conditions[0].Status = corev1.ConditionFalse + podsReady1[2] = *podsReady3[2].DeepCopy() + + tests := []struct { + wantCalled bool + wantRequeue bool + wantCalledWith int + c k8s.Client + name string + actualStatefulSets sset.StatefulSetList + reconcileState *reconcile.State + }{ + { + name: "no v6 nodes", + actualStatefulSets: sset.StatefulSetList{nodespec.CreateTestSset("nodes", "7.1.0", 3, true, true)}, + wantCalled: false, + }, + { + name: "correct mmn already set in ES status", + c: k8s.WrapClient(fake.NewFakeClient(&podsReady3[0], &podsReady3[1], &podsReady3[2])), + actualStatefulSets: sset.StatefulSetList{ssetSample}, + reconcileState: reconcile.NewState(v1alpha1.Elasticsearch{Status: v1alpha1.ElasticsearchStatus{ZenDiscovery: v1alpha1.ZenDiscoveryStatus{MinimumMasterNodes: 2}}}), + wantCalled: false, + }, + { + name: "mmn should be updated, it's different in the ES status", + c: k8s.WrapClient(fake.NewFakeClient(&podsReady3[0], &podsReady3[1], &podsReady3[2])), + actualStatefulSets: sset.StatefulSetList{ssetSample}, + reconcileState: reconcile.NewState(v1alpha1.Elasticsearch{Status: v1alpha1.ElasticsearchStatus{ZenDiscovery: v1alpha1.ZenDiscoveryStatus{MinimumMasterNodes: 1}}}), + wantCalled: true, + wantCalledWith: 2, + }, + { + name: "mmn should be updated, it isn't set in the ES status", + c: k8s.WrapClient(fake.NewFakeClient(&podsReady3[0], &podsReady3[1], &podsReady3[2])), + actualStatefulSets: sset.StatefulSetList{ssetSample}, + reconcileState: reconcile.NewState(v1alpha1.Elasticsearch{}), + wantCalled: true, + wantCalledWith: 2, + }, + { + name: "cannot update since not enough masters available", + c: k8s.WrapClient(fake.NewFakeClient(&podsReady1[0], &podsReady1[1], &podsReady1[2])), + actualStatefulSets: sset.StatefulSetList{ssetSample}, + reconcileState: reconcile.NewState(v1alpha1.Elasticsearch{}), + wantCalled: false, + wantRequeue: true, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + esClient := &fakeESClient{} + requeue, err := UpdateMinimumMasterNodes(tt.c, v1alpha1.Elasticsearch{}, esClient, tt.actualStatefulSets, tt.reconcileState) + require.NoError(t, err) + require.Equal(t, tt.wantRequeue, requeue) + require.Equal(t, tt.wantCalled, esClient.called) + require.Equal(t, tt.wantCalledWith, esClient.calledWith) + }) + } +} diff --git a/operators/pkg/controller/elasticsearch/version/version6/podspecs.go b/operators/pkg/controller/elasticsearch/version/zen1/podspecs.go similarity index 98% rename from operators/pkg/controller/elasticsearch/version/version6/podspecs.go rename to operators/pkg/controller/elasticsearch/version/zen1/podspecs.go index b9d89c749c..9fa8a23495 100644 --- a/operators/pkg/controller/elasticsearch/version/version6/podspecs.go +++ b/operators/pkg/controller/elasticsearch/version/zen1/podspecs.go @@ -2,7 +2,7 @@ // or more contributor license agreements. Licensed under the Elastic License; // you may not use this file except in compliance with the Elastic License. -package version6 +package zen1 import ( "path" diff --git a/operators/pkg/controller/elasticsearch/version/version6/podspecs_test.go b/operators/pkg/controller/elasticsearch/version/zen1/podspecs_test.go similarity index 99% rename from operators/pkg/controller/elasticsearch/version/version6/podspecs_test.go rename to operators/pkg/controller/elasticsearch/version/zen1/podspecs_test.go index 585038136e..c48bd2854e 100644 --- a/operators/pkg/controller/elasticsearch/version/version6/podspecs_test.go +++ b/operators/pkg/controller/elasticsearch/version/zen1/podspecs_test.go @@ -2,7 +2,7 @@ // or more contributor license agreements. Licensed under the Elastic License; // you may not use this file except in compliance with the Elastic License. -package version6 +package zen1 import ( "path" diff --git a/operators/pkg/controller/elasticsearch/version/zen2/compatibility.go b/operators/pkg/controller/elasticsearch/version/zen2/compatibility.go new file mode 100644 index 0000000000..a15f37570d --- /dev/null +++ b/operators/pkg/controller/elasticsearch/version/zen2/compatibility.go @@ -0,0 +1,28 @@ +// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +// or more contributor license agreements. Licensed under the Elastic License; +// you may not use this file except in compliance with the Elastic License. + +package zen2 + +import ( + appsv1 "k8s.io/api/apps/v1" + + "github.com/elastic/cloud-on-k8s/operators/pkg/controller/common/version" + "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/sset" +) + +// zen2VersionMatch returns true if the given Elasticsearch version is compatible with zen2. +func zen2VersionMatch(v version.Version) bool { + return v.Major >= 7 +} + +// IsCompatibleForZen2 returns true if the given StatefulSet is compatible with zen2. +func IsCompatibleForZen2(statefulSet appsv1.StatefulSet) bool { + return sset.ESVersionMatch(statefulSet, zen2VersionMatch) +} + +// AtLeastOneNodeCompatibleForZen2 returns true if the given StatefulSetList contains +// at least one StatefulSet compatible with zen2. +func AtLeastOneNodeCompatibleForZen2(statefulSets sset.StatefulSetList) bool { + return sset.AtLeastOneESVersionMatch(statefulSets, zen2VersionMatch) +} diff --git a/operators/pkg/controller/elasticsearch/version/zen2/compatibility_test.go b/operators/pkg/controller/elasticsearch/version/zen2/compatibility_test.go new file mode 100644 index 0000000000..73cd250a40 --- /dev/null +++ b/operators/pkg/controller/elasticsearch/version/zen2/compatibility_test.go @@ -0,0 +1,94 @@ +// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +// or more contributor license agreements. Licensed under the Elastic License; +// you may not use this file except in compliance with the Elastic License. + +package zen2 + +import ( + "testing" + + appsv1 "k8s.io/api/apps/v1" + corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + + "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/label" + "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/sset" +) + +func createStatefulSetWithESVersion(version string) appsv1.StatefulSet { + return appsv1.StatefulSet{Spec: appsv1.StatefulSetSpec{Template: corev1.PodTemplateSpec{ + ObjectMeta: metav1.ObjectMeta{ + Labels: map[string]string{ + label.VersionLabelName: version, + }, + }, + }}} +} + +func TestIsCompatibleForZen2(t *testing.T) { + + tests := []struct { + name string + sset appsv1.StatefulSet + want bool + }{ + { + name: "version 6.8.0", + sset: createStatefulSetWithESVersion("6.8.0"), + want: false, + }, + { + name: "version 7.0.0", + sset: createStatefulSetWithESVersion("7.0.0"), + want: true, + }, + { + name: "no version", + sset: createStatefulSetWithESVersion(""), + want: false, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + if got := IsCompatibleForZen2(tt.sset); got != tt.want { + t.Errorf("IsCompatibleForZen2() = %v, want %v", got, tt.want) + } + }) + } +} + +func TestAtLeastOneNodeCompatibleForZen2(t *testing.T) { + tests := []struct { + name string + statefulSets sset.StatefulSetList + want bool + }{ + { + name: "no sset", + statefulSets: nil, + want: false, + }, + { + name: "none compatible", + statefulSets: sset.StatefulSetList{createStatefulSetWithESVersion("6.8.0"), createStatefulSetWithESVersion("6.8.1")}, + want: false, + }, + { + name: "one compatible", + statefulSets: sset.StatefulSetList{createStatefulSetWithESVersion("6.8.0"), createStatefulSetWithESVersion("7.1.0")}, + want: true, + }, + { + name: "all compatible", + statefulSets: sset.StatefulSetList{createStatefulSetWithESVersion("7.1.0"), createStatefulSetWithESVersion("7.2.0")}, + want: true, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + if got := AtLeastOneNodeCompatibleForZen2(tt.statefulSets); got != tt.want { + t.Errorf("AtLeastOneNodeCompatibleForZen2() = %v, want %v", got, tt.want) + } + }) + } +} diff --git a/operators/pkg/controller/elasticsearch/version/zen2/initial_master_nodes.go b/operators/pkg/controller/elasticsearch/version/zen2/initial_master_nodes.go new file mode 100644 index 0000000000..57ee428469 --- /dev/null +++ b/operators/pkg/controller/elasticsearch/version/zen2/initial_master_nodes.go @@ -0,0 +1,87 @@ +// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +// or more contributor license agreements. Licensed under the Elastic License; +// you may not use this file except in compliance with the Elastic License. + +package zen2 + +import ( + "github.com/elastic/cloud-on-k8s/operators/pkg/apis/elasticsearch/v1alpha1" + "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/label" + "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/nodespec" + "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/observer" + "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/settings" + "github.com/elastic/cloud-on-k8s/operators/pkg/utils/k8s" +) + +const ( + // ClusterUUIDAnnotationName used to store the cluster UUID as an annotation when cluster has been bootstrapped. + ClusterUUIDAnnotationName = "elasticsearch.k8s.elastic.co/cluster-uuid" +) + +// annotatedForBootstrap returns true if the cluster has been annotated with the UUID already. +func annotatedForBootstrap(cluster v1alpha1.Elasticsearch) bool { + _, bootstrapped := cluster.Annotations[ClusterUUIDAnnotationName] + return bootstrapped +} + +// clusterIsBootstrapped returns true if the cluster has formed and has a UUID. +func clusterIsBootstrapped(observedState observer.State) bool { + return observedState.ClusterState != nil && len(observedState.ClusterState.ClusterUUID) > 0 +} + +// annotateWithUUID annotates the cluster with its UUID, to mark it as "bootstrapped". +func annotateWithUUID(cluster v1alpha1.Elasticsearch, observedState observer.State, c k8s.Client) error { + log.Info("Annotating bootstrapped cluster with its UUID", "namespace", cluster.Namespace, "es_name", cluster.Name) + if cluster.Annotations == nil { + cluster.Annotations = make(map[string]string) + } + cluster.Annotations[ClusterUUIDAnnotationName] = observedState.ClusterState.ClusterUUID + if err := c.Update(&cluster); err != nil { + return err + } + return nil +} + +// SetupInitialMasterNodes modifies the ES config of the given resources to setup +// cluster initial master nodes. +// It also saves the cluster UUID as an annotation to ensure that it's not set +// if the cluster has already been bootstrapped. +func SetupInitialMasterNodes( + cluster v1alpha1.Elasticsearch, + observedState observer.State, + c k8s.Client, + nodeSpecResources nodespec.ResourcesList, +) error { + if annotatedForBootstrap(cluster) { + // Cluster already bootstrapped, nothing to do. + return nil + } + + if clusterIsBootstrapped(observedState) { + // Cluster is not annotated for bootstrap, but should be. + if err := annotateWithUUID(cluster, observedState, c); err != nil { + return err + } + return nil + } + + // Cluster is not bootstrapped yet, set initial_master_nodes setting in each master node config. + masters := nodeSpecResources.MasterNodesNames() + if len(masters) == 0 { + return nil + } + for i, res := range nodeSpecResources { + if !IsCompatibleForZen2(res.StatefulSet) { + continue + } + if !label.IsMasterNodeSet(res.StatefulSet) { + // we only care about master nodes config here + continue + } + // patch config with the expected initial master nodes + if err := nodeSpecResources[i].Config.SetStrings(settings.ClusterInitialMasterNodes, masters...); err != nil { + return err + } + } + return nil +} diff --git a/operators/pkg/controller/elasticsearch/version/zen2/initial_master_nodes_test.go b/operators/pkg/controller/elasticsearch/version/zen2/initial_master_nodes_test.go new file mode 100644 index 0000000000..d0f3391f81 --- /dev/null +++ b/operators/pkg/controller/elasticsearch/version/zen2/initial_master_nodes_test.go @@ -0,0 +1,178 @@ +// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +// or more contributor license agreements. Licensed under the Elastic License; +// you may not use this file except in compliance with the Elastic License. + +package zen2 + +import ( + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/runtime" + "k8s.io/apimachinery/pkg/types" + "k8s.io/client-go/kubernetes/scheme" + "sigs.k8s.io/controller-runtime/pkg/client/fake" + + "github.com/elastic/cloud-on-k8s/operators/pkg/apis/elasticsearch/v1alpha1" + settings2 "github.com/elastic/cloud-on-k8s/operators/pkg/controller/common/settings" + "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/client" + "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/nodespec" + "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/observer" + "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/settings" + "github.com/elastic/cloud-on-k8s/operators/pkg/utils/k8s" +) + +const ( + defaultClusterUUID = "jiMyMA1hQ-WMPK3vEStZuw" +) + +func setupScheme(t *testing.T) *runtime.Scheme { + sc := scheme.Scheme + if err := v1alpha1.SchemeBuilder.AddToScheme(sc); err != nil { + assert.Fail(t, "failed to add Es types") + } + return sc +} + +var esNN = types.NamespacedName{ + Namespace: "ns1", + Name: "foo", +} + +func newElasticsearch() v1alpha1.Elasticsearch { + return v1alpha1.Elasticsearch{ + ObjectMeta: metav1.ObjectMeta{ + Namespace: esNN.Namespace, + Name: esNN.Name, + }, + } +} + +func withAnnotation(es v1alpha1.Elasticsearch, name, value string) v1alpha1.Elasticsearch { + if es.Annotations == nil { + es.Annotations = make(map[string]string) + } + es.Annotations[name] = value + return es +} + +func TestSetupInitialMasterNodes_AlreadyBootstrapped(t *testing.T) { + s := setupScheme(t) + tests := []struct { + name string + es v1alpha1.Elasticsearch + observedState observer.State + nodeSpecResources nodespec.ResourcesList + expected []settings.CanonicalConfig + expectedEs v1alpha1.Elasticsearch + }{ + { + name: "cluster already annotated for bootstrap: no changes", + es: withAnnotation(newElasticsearch(), ClusterUUIDAnnotationName, defaultClusterUUID), + nodeSpecResources: nodespec.ResourcesList{ + {StatefulSet: nodespec.CreateTestSset("data", "7.1.0", 3, false, true), Config: settings.NewCanonicalConfig()}, + }, + expected: []settings.CanonicalConfig{settings.NewCanonicalConfig()}, + expectedEs: withAnnotation(newElasticsearch(), ClusterUUIDAnnotationName, defaultClusterUUID), + }, + { + name: "cluster bootstrapped but not annotated: should be annotated", + es: newElasticsearch(), + observedState: observer.State{ClusterState: &client.ClusterState{ClusterUUID: defaultClusterUUID}}, + nodeSpecResources: nodespec.ResourcesList{ + {StatefulSet: nodespec.CreateTestSset("data", "7.1.0", 3, false, true), Config: settings.NewCanonicalConfig()}, + }, + expected: []settings.CanonicalConfig{settings.NewCanonicalConfig()}, + expectedEs: withAnnotation(newElasticsearch(), ClusterUUIDAnnotationName, defaultClusterUUID), + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + client := k8s.WrapClient(fake.NewFakeClientWithScheme(s, &tt.es)) + err := SetupInitialMasterNodes(tt.es, tt.observedState, client, tt.nodeSpecResources) + require.NoError(t, err) + // check if the ES resource was annotated + var es v1alpha1.Elasticsearch + err = client.Get(esNN, &es) + assert.NoError(t, err) + require.Equal(t, tt.expectedEs, es) + // check if nodespec config were modified + for i := 0; i < len(tt.nodeSpecResources); i++ { + expected, err := tt.expected[i].Render() + require.NoError(t, err) + actual, err := tt.nodeSpecResources[i].Config.Render() + require.NoError(t, err) + require.Equal(t, expected, actual) + } + }) + } +} + +func TestSetupInitialMasterNodes_NotBootstrapped(t *testing.T) { + tests := []struct { + name string + nodeSpecResources nodespec.ResourcesList + expected []settings.CanonicalConfig + }{ + { + name: "no master nodes", + nodeSpecResources: nodespec.ResourcesList{ + {StatefulSet: nodespec.CreateTestSset("data", "7.1.0", 3, false, true), Config: settings.NewCanonicalConfig()}, + }, + expected: []settings.CanonicalConfig{settings.NewCanonicalConfig()}, + }, + { + name: "3 masters, 3 master+data, 3 data", + nodeSpecResources: nodespec.ResourcesList{ + {StatefulSet: nodespec.CreateTestSset("master", "7.1.0", 3, true, false), Config: settings.NewCanonicalConfig()}, + {StatefulSet: nodespec.CreateTestSset("masterdata", "7.1.0", 3, true, true), Config: settings.NewCanonicalConfig()}, + {StatefulSet: nodespec.CreateTestSset("data", "7.1.0", 3, false, true), Config: settings.NewCanonicalConfig()}, + }, + expected: []settings.CanonicalConfig{ + {CanonicalConfig: settings2.MustCanonicalConfig(map[string][]string{ + settings.ClusterInitialMasterNodes: {"master-0", "master-1", "master-2", "masterdata-0", "masterdata-1", "masterdata-2"}, + })}, + {CanonicalConfig: settings2.MustCanonicalConfig(map[string][]string{ + settings.ClusterInitialMasterNodes: {"master-0", "master-1", "master-2", "masterdata-0", "masterdata-1", "masterdata-2"}, + })}, + // no config set on non-master nodes + {CanonicalConfig: settings2.NewCanonicalConfig()}, + }, + }, + { + name: "version <7: nothing should appear in the config", + nodeSpecResources: nodespec.ResourcesList{ + {StatefulSet: nodespec.CreateTestSset("master", "6.8.0", 3, true, false), Config: settings.NewCanonicalConfig()}, + }, + expected: []settings.CanonicalConfig{settings.NewCanonicalConfig()}, + }, + { + name: "mixed v6 & v7: include all masters but only in v7 configs", + nodeSpecResources: nodespec.ResourcesList{ + {StatefulSet: nodespec.CreateTestSset("masterv6", "6.8.0", 3, true, false), Config: settings.NewCanonicalConfig()}, + {StatefulSet: nodespec.CreateTestSset("masterv7", "7.1.0", 3, true, false), Config: settings.NewCanonicalConfig()}, + }, + expected: []settings.CanonicalConfig{ + settings.NewCanonicalConfig(), + {CanonicalConfig: settings2.MustCanonicalConfig(map[string][]string{ + settings.ClusterInitialMasterNodes: {"masterv6-0", "masterv6-1", "masterv6-2", "masterv7-0", "masterv7-1", "masterv7-2"}, + })}, + }, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + err := SetupInitialMasterNodes(v1alpha1.Elasticsearch{}, observer.State{}, k8s.WrapClient(fake.NewFakeClient()), tt.nodeSpecResources) + require.NoError(t, err) + for i := 0; i < len(tt.nodeSpecResources); i++ { + expected, err := tt.expected[i].Render() + require.NoError(t, err) + actual, err := tt.nodeSpecResources[i].Config.Render() + require.NoError(t, err) + require.Equal(t, expected, actual) + } + }) + } +} diff --git a/operators/pkg/controller/elasticsearch/version/zen2/voting_exclusions.go b/operators/pkg/controller/elasticsearch/version/zen2/voting_exclusions.go new file mode 100644 index 0000000000..f7d6a77065 --- /dev/null +++ b/operators/pkg/controller/elasticsearch/version/zen2/voting_exclusions.go @@ -0,0 +1,86 @@ +// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +// or more contributor license agreements. Licensed under the Elastic License; +// you may not use this file except in compliance with the Elastic License. + +package zen2 + +import ( + "context" + + appsv1 "k8s.io/api/apps/v1" + logf "sigs.k8s.io/controller-runtime/pkg/runtime/log" + + "github.com/elastic/cloud-on-k8s/operators/pkg/apis/elasticsearch/v1alpha1" + "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/client" + "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/label" + "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/sset" + "github.com/elastic/cloud-on-k8s/operators/pkg/utils/k8s" + "github.com/elastic/cloud-on-k8s/operators/pkg/utils/stringsutil" +) + +var ( + log = logf.Log.WithName("zen2") +) + +// AddToVotingConfigExclusions adds the given node names to exclude from voting config exclusions. +func AddToVotingConfigExclusions(esClient client.Client, sset appsv1.StatefulSet, excludeNodes []string) error { + if !IsCompatibleForZen2(sset) { + return nil + } + log.Info("Setting voting config exclusions", "namespace", sset.Namespace, "nodes", excludeNodes) + ctx, cancel := context.WithTimeout(context.Background(), client.DefaultReqTimeout) + defer cancel() + if err := esClient.AddVotingConfigExclusions(ctx, excludeNodes, ""); err != nil { + return err + } + return nil +} + +// canClearVotingConfigExclusions returns true if it is safe to clear voting config exclusions. +func canClearVotingConfigExclusions(c k8s.Client, actualStatefulSets sset.StatefulSetList) (bool, error) { + // Voting config exclusions are set before master nodes are removed on sset downscale. + // They can be cleared when: + // - nodes are effectively removed + // - nodes are expected to be in the cluster (shouldn't be removed anymore) + // They cannot be cleared when: + // - expected nodes to remove are not removed yet + for _, s := range actualStatefulSets { + if label.IsMasterNodeSet(s) { + actualPods, err := sset.GetActualPodsNames(c, s) + if err != nil { + return false, err + } + expectedPods := sset.PodNames(s) + if !stringsutil.StringsInSlice(actualPods, expectedPods) { + // some of the actual pods are not expected: they are probably not deleted yet + return false, nil + } + } + } + + return true, nil +} + +// ClearVotingConfigExclusions resets the voting config exclusions if all excluded nodes are properly removed. +// It returns true if this should be retried later (re-queued). +func ClearVotingConfigExclusions(es v1alpha1.Elasticsearch, c k8s.Client, esClient client.Client, actualStatefulSets sset.StatefulSetList) (bool, error) { + if !AtLeastOneNodeCompatibleForZen2(actualStatefulSets) { + return false, nil + } + canClear, err := canClearVotingConfigExclusions(c, actualStatefulSets) + if err != nil { + return false, err + } + if !canClear { + log.V(1).Info("Cannot clear voting exclusions yet", "namespace", es.Namespace, "name", es.Name) + return true, nil // requeue + } + + ctx, cancel := context.WithTimeout(context.Background(), client.DefaultReqTimeout) + defer cancel() + log.Info("Ensuring no voting exclusions are set", "namespace", es.Namespace, "name", es.Name) + if err := esClient.DeleteVotingConfigExclusions(ctx, false); err != nil { + return false, err + } + return false, nil +} diff --git a/operators/pkg/controller/elasticsearch/version/zen2/voting_exclusions_test.go b/operators/pkg/controller/elasticsearch/version/zen2/voting_exclusions_test.go new file mode 100644 index 0000000000..32ab7dfdb6 --- /dev/null +++ b/operators/pkg/controller/elasticsearch/version/zen2/voting_exclusions_test.go @@ -0,0 +1,96 @@ +// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +// or more contributor license agreements. Licensed under the Elastic License; +// you may not use this file except in compliance with the Elastic License. + +package zen2 + +import ( + "context" + "testing" + + "github.com/stretchr/testify/require" + corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "sigs.k8s.io/controller-runtime/pkg/client/fake" + + "github.com/elastic/cloud-on-k8s/operators/pkg/apis/elasticsearch/v1alpha1" + "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/client" + "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/label" + "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/nodespec" + "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/sset" + "github.com/elastic/cloud-on-k8s/operators/pkg/utils/k8s" +) + +type fakeESClient struct { + called bool + client.Client +} + +func (f *fakeESClient) DeleteVotingConfigExclusions(ctx context.Context, waitForRemoval bool) error { + f.called = true + return nil +} + +func Test_ClearVotingConfigExclusions(t *testing.T) { + // dummy statefulset with 3 pods + statefulSet3rep := nodespec.CreateTestSset("nodes", "7.2.0", 3, true, true) + pods := make([]corev1.Pod, 0, *statefulSet3rep.Spec.Replicas) + for _, podName := range sset.PodNames(statefulSet3rep) { + pods = append(pods, corev1.Pod{ObjectMeta: metav1.ObjectMeta{ + Namespace: statefulSet3rep.Namespace, + Name: podName, + Labels: map[string]string{ + label.StatefulSetNameLabelName: statefulSet3rep.Name, + }, + }}) + } + // simulate 2 pods out of the 3 + statefulSet2rep := nodespec.CreateTestSset("nodes", "7.2.0", 2, true, true) + tests := []struct { + name string + c k8s.Client + actualStatefulSets sset.StatefulSetList + wantCall bool + wantRequeue bool + }{ + { + name: "no v7 nodes", + c: k8s.WrapClient(fake.NewFakeClient()), + actualStatefulSets: sset.StatefulSetList{ + createStatefulSetWithESVersion("6.8.0"), + }, + wantCall: false, + wantRequeue: false, + }, + { + name: "3/3 nodes there: can clear", + c: k8s.WrapClient(fake.NewFakeClient(&statefulSet3rep, &pods[0], &pods[1], &pods[2])), + actualStatefulSets: sset.StatefulSetList{statefulSet3rep}, + wantCall: true, + wantRequeue: false, + }, + { + name: "2/3 nodes there: can clear", + c: k8s.WrapClient(fake.NewFakeClient(&statefulSet3rep, &pods[0], &pods[1])), + actualStatefulSets: sset.StatefulSetList{statefulSet3rep}, + wantCall: true, + wantRequeue: false, + }, + { + name: "3/2 nodes there: cannot clear, should requeue", + c: k8s.WrapClient(fake.NewFakeClient(&statefulSet2rep, &pods[0], &pods[1], &pods[2])), + actualStatefulSets: sset.StatefulSetList{statefulSet2rep}, + wantCall: false, + wantRequeue: true, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + clientMock := &fakeESClient{} + requeue, err := ClearVotingConfigExclusions(v1alpha1.Elasticsearch{}, tt.c, clientMock, tt.actualStatefulSets) + require.NoError(t, err) + require.Equal(t, tt.wantRequeue, requeue) + require.Equal(t, tt.wantCall, clientMock.called) + }) + } +} From 1fce57a6be50a71170a15b07710de93477d263dc Mon Sep 17 00:00:00 2001 From: Sebastien Guilloux Date: Fri, 2 Aug 2019 09:45:29 +0200 Subject: [PATCH 15/31] Fix E2E tests to work with StatefulSets (#1459) * Add mandatory NodeSpec name * Rename apm samples resources to something unique Otherwise, volumes of resources from other tests are reused for this one. Should be solved once we remove PVCs in https://github.com/elastic/cloud-on-k8s/issues/1288. --- operators/config/samples/apm/apm_es_kibana.yaml | 10 +++++----- operators/test/e2e/test/elasticsearch/builder.go | 3 +++ 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/operators/config/samples/apm/apm_es_kibana.yaml b/operators/config/samples/apm/apm_es_kibana.yaml index 0c4b3892de..2ffc444f01 100644 --- a/operators/config/samples/apm/apm_es_kibana.yaml +++ b/operators/config/samples/apm/apm_es_kibana.yaml @@ -3,7 +3,7 @@ apiVersion: elasticsearch.k8s.elastic.co/v1alpha1 kind: Elasticsearch metadata: - name: elasticsearch-sample + name: es-apm-sample spec: version: 7.2.0 nodes: @@ -13,19 +13,19 @@ spec: apiVersion: apm.k8s.elastic.co/v1alpha1 kind: ApmServer metadata: - name: apm-server-sample + name: apm-apm-sample spec: version: 7.2.0 nodeCount: 1 elasticsearchRef: - name: "elasticsearch-sample" + name: "es-apm-sample" --- apiVersion: kibana.k8s.elastic.co/v1alpha1 kind: Kibana metadata: - name: kibana-sample + name: kb-apm-sample spec: version: 7.2.0 nodeCount: 1 elasticsearchRef: - name: "elasticsearch-sample" + name: "es-apm-sample" diff --git a/operators/test/e2e/test/elasticsearch/builder.go b/operators/test/e2e/test/elasticsearch/builder.go index 6331ea8ab0..8328fe589c 100644 --- a/operators/test/e2e/test/elasticsearch/builder.go +++ b/operators/test/e2e/test/elasticsearch/builder.go @@ -92,6 +92,7 @@ func (b Builder) WithNoESTopology() Builder { func (b Builder) WithESMasterNodes(count int, resources corev1.ResourceRequirements) Builder { return b.withESTopologyElement(estype.NodeSpec{ + Name: "master", NodeCount: int32(count), Config: &commonv1alpha1.Config{ Data: map[string]interface{}{ @@ -104,6 +105,7 @@ func (b Builder) WithESMasterNodes(count int, resources corev1.ResourceRequireme func (b Builder) WithESDataNodes(count int, resources corev1.ResourceRequirements) Builder { return b.withESTopologyElement(estype.NodeSpec{ + Name: "data", NodeCount: int32(count), Config: &commonv1alpha1.Config{ Data: map[string]interface{}{ @@ -116,6 +118,7 @@ func (b Builder) WithESDataNodes(count int, resources corev1.ResourceRequirement func (b Builder) WithESMasterDataNodes(count int, resources corev1.ResourceRequirements) Builder { return b.withESTopologyElement(estype.NodeSpec{ + Name: "masterdata", NodeCount: int32(count), Config: &commonv1alpha1.Config{ Data: map[string]interface{}{}, From 58b28fb3a233b0b0ff96ca0c9248b59bd233aadf Mon Sep 17 00:00:00 2001 From: Sebastien Guilloux Date: Fri, 2 Aug 2019 11:41:57 +0200 Subject: [PATCH 16/31] Refactor driver & pod creation logic (#1295) Refactor the main driver and ES pod creation logic, towards simplification of the codebase. Main changes: StatefulSet and PodSpec creation logic lives in the NodeSpec package No more catch-all version/common.go. The only difference between versions for now is explicitly zen1 vs. zen2. No more pod package (with conflicting variable/package name) No more nested indirection from driver -> defaultDriver -> NewPodSpecParams -> podSpec(func, func, func, func) -> buildPodTemplateSpec(func, func, func, func). Default driver code attempts to be minimalistic (while still being a sequential list of function calls) No more "generic resources" since that was only for the external service anyway Remove deprecated code in mutation, pvc, pod package (a lot of the comparison/CalculateChanges/PVC reuse/PodWithConfig/PodSpecContext mechanism we had before) * Move supported version check from driver to version pkg * Simplify driver creation * Extract driver nodes reconciliation to its own file * Move expectations to the common pkg and add unit tests * Move configmap reconciliation to its own file * Move DynamicEnvVars to defaults pkg and use it in more places * Extract es env vars away from the version pkg * Move keystore init container params away from the main driver * Move sset build logic into the nodespec pkg * Move readiness probe in the nodespec pkg * Move some es pod consts into the nodespec pkg * Refactor pod template build * Remove concept of generic resources since used for a single service * Remove deprecated pod creation + PVC reuse code * Remove most of the mutation package (RIP) * Remove the PVC package (RIP) * Remove PodWithConfig and PodSpecContext (yay!) * Get rid of the pod pkg (yay!) * Fix supported versions initialization * Add missing license header * Remove deprecated pv reuse e2e test * Remove a bunch of unused functions * Make linter happy with appendAssign * Fix downward env defaults in apm unit tests * Move PodsByName to the utils pkg * Fix typo * Fix reference to the keystore initcontainer and add a unit test * Remove dead code * Address PR review --- .../apmserver/apmserver_controller_test.go | 46 +- operators/pkg/controller/apmserver/pod.go | 22 +- .../pkg/controller/apmserver/pod_test.go | 8 + .../common/defaults/pod_template.go | 19 +- .../common/defaults/pod_template_test.go | 9 +- .../reconciler/expectations.go} | 4 +- .../common/reconciler/expectations_test.go | 35 ++ .../certificates/transport/reconcile.go | 3 +- .../elasticsearch/configmap/configmap.go | 25 + .../elasticsearch/driver/default.go | 420 --------------- .../elasticsearch/driver/default_test.go | 326 ------------ .../elasticsearch/driver/downscale.go | 17 +- .../controller/elasticsearch/driver/driver.go | 280 +++++++--- .../elasticsearch/driver/driver_test.go | 66 --- .../elasticsearch/driver/generic_resources.go | 42 -- .../controller/elasticsearch/driver/nodes.go | 134 +++++ .../controller/elasticsearch/driver/pods.go | 199 ------- .../elasticsearch/driver/upgrade.go | 21 +- .../elasticsearch/elasticsearch_controller.go | 43 +- .../pkg/controller/elasticsearch/env/vars.go | 21 - .../initcontainer/initcontainer.go | 7 +- .../initcontainer/initcontainer_test.go | 13 + .../elasticsearch/initcontainer/keystore.go | 18 + .../elasticsearch/initcontainer/prepare_fs.go | 7 +- .../elasticsearch/mutation/calculate.go | 137 ----- .../elasticsearch/mutation/calculate_test.go | 146 ------ .../elasticsearch/mutation/change_group.go | 281 ---------- .../mutation/change_group_test.go | 280 ---------- .../elasticsearch/mutation/changes.go | 157 ------ .../elasticsearch/mutation/changes_test.go | 349 ------------- .../mutation/comparison/comparison.go | 32 -- .../mutation/comparison/config.go | 32 -- .../mutation/comparison/config_test.go | 132 ----- .../elasticsearch/mutation/comparison/pod.go | 73 --- .../elasticsearch/mutation/comparison/pvc.go | 134 ----- .../elasticsearch/mutation/mutation.go | 11 - .../elasticsearch/mutation/performable.go | 120 ----- .../mutation/performable_test.go | 354 ------------- .../elasticsearch/mutation/podrestrictions.go | 74 --- .../mutation/podrestrictions_test.go | 165 ------ .../elasticsearch/mutation/pods_state.go | 314 ----------- .../elasticsearch/mutation/pods_state_test.go | 203 -------- .../elasticsearch/mutation/sorting.go | 74 --- .../elasticsearch/mutation/sorting_test.go | 130 ----- .../elasticsearch/nodespec/defaults.go | 77 +++ .../elasticsearch/nodespec/podspec.go | 113 ++++ .../elasticsearch/nodespec/podspec_test.go | 182 +++++++ .../{pod => nodespec}/readiness_probe.go | 2 +- .../elasticsearch/nodespec/resources.go | 8 +- .../build.go => nodespec/statefulset.go} | 28 +- .../elasticsearch/nodespec/volumes.go | 93 ++++ .../pkg/controller/elasticsearch/pod/pod.go | 131 ----- .../pkg/controller/elasticsearch/pvc/pvc.go | 185 ------- .../controller/elasticsearch/pvc/pvc_test.go | 489 ------------------ .../controller/elasticsearch/reconcile/log.go | 9 - .../reconcile/resources_state.go | 110 +--- .../reconcile/resources_state_test.go | 148 ------ .../elasticsearch/reconcile/state.go | 2 +- .../validation/upgrade_checks.go | 4 +- .../elasticsearch/validation/validations.go | 4 +- .../elasticsearch/version/common.go | 220 -------- .../elasticsearch/version/common_test.go | 422 --------------- .../version/supported_versions.go | 21 + .../version/supported_versions_test.go | 56 ++ .../version/zen1/compatibility.go | 8 +- .../version/zen1/compatibility_test.go | 8 +- .../version/zen1/minimum_masters.go | 4 +- .../elasticsearch/version/zen1/podspecs.go | 29 -- .../version/zen1/podspecs_test.go | 63 --- .../version/zen2/compatibility.go | 8 +- .../version/zen2/compatibility_test.go | 8 +- .../version/zen2/initial_master_nodes.go | 2 +- .../version/zen2/voting_exclusions.go | 4 +- operators/pkg/utils/k8s/k8sutils.go | 9 + operators/test/e2e/es/failure_test.go | 124 ----- 75 files changed, 1140 insertions(+), 6414 deletions(-) rename operators/pkg/controller/{elasticsearch/driver/generation.go => common/reconciler/expectations.go} (93%) create mode 100644 operators/pkg/controller/common/reconciler/expectations_test.go delete mode 100644 operators/pkg/controller/elasticsearch/driver/default.go delete mode 100644 operators/pkg/controller/elasticsearch/driver/default_test.go delete mode 100644 operators/pkg/controller/elasticsearch/driver/driver_test.go delete mode 100644 operators/pkg/controller/elasticsearch/driver/generic_resources.go create mode 100644 operators/pkg/controller/elasticsearch/driver/nodes.go delete mode 100644 operators/pkg/controller/elasticsearch/driver/pods.go delete mode 100644 operators/pkg/controller/elasticsearch/env/vars.go create mode 100644 operators/pkg/controller/elasticsearch/initcontainer/keystore.go delete mode 100644 operators/pkg/controller/elasticsearch/mutation/calculate.go delete mode 100644 operators/pkg/controller/elasticsearch/mutation/calculate_test.go delete mode 100644 operators/pkg/controller/elasticsearch/mutation/change_group.go delete mode 100644 operators/pkg/controller/elasticsearch/mutation/change_group_test.go delete mode 100644 operators/pkg/controller/elasticsearch/mutation/changes.go delete mode 100644 operators/pkg/controller/elasticsearch/mutation/changes_test.go delete mode 100644 operators/pkg/controller/elasticsearch/mutation/comparison/comparison.go delete mode 100644 operators/pkg/controller/elasticsearch/mutation/comparison/config.go delete mode 100644 operators/pkg/controller/elasticsearch/mutation/comparison/config_test.go delete mode 100644 operators/pkg/controller/elasticsearch/mutation/comparison/pod.go delete mode 100644 operators/pkg/controller/elasticsearch/mutation/comparison/pvc.go delete mode 100644 operators/pkg/controller/elasticsearch/mutation/mutation.go delete mode 100644 operators/pkg/controller/elasticsearch/mutation/performable.go delete mode 100644 operators/pkg/controller/elasticsearch/mutation/performable_test.go delete mode 100644 operators/pkg/controller/elasticsearch/mutation/podrestrictions.go delete mode 100644 operators/pkg/controller/elasticsearch/mutation/podrestrictions_test.go delete mode 100644 operators/pkg/controller/elasticsearch/mutation/pods_state.go delete mode 100644 operators/pkg/controller/elasticsearch/mutation/pods_state_test.go delete mode 100644 operators/pkg/controller/elasticsearch/mutation/sorting.go delete mode 100644 operators/pkg/controller/elasticsearch/mutation/sorting_test.go create mode 100644 operators/pkg/controller/elasticsearch/nodespec/defaults.go create mode 100644 operators/pkg/controller/elasticsearch/nodespec/podspec.go create mode 100644 operators/pkg/controller/elasticsearch/nodespec/podspec_test.go rename operators/pkg/controller/elasticsearch/{pod => nodespec}/readiness_probe.go (98%) rename operators/pkg/controller/elasticsearch/{sset/build.go => nodespec/statefulset.go} (78%) create mode 100644 operators/pkg/controller/elasticsearch/nodespec/volumes.go delete mode 100644 operators/pkg/controller/elasticsearch/pod/pod.go delete mode 100644 operators/pkg/controller/elasticsearch/pvc/pvc.go delete mode 100644 operators/pkg/controller/elasticsearch/pvc/pvc_test.go delete mode 100644 operators/pkg/controller/elasticsearch/reconcile/log.go delete mode 100644 operators/pkg/controller/elasticsearch/reconcile/resources_state_test.go delete mode 100644 operators/pkg/controller/elasticsearch/version/common.go delete mode 100644 operators/pkg/controller/elasticsearch/version/common_test.go delete mode 100644 operators/pkg/controller/elasticsearch/version/zen1/podspecs.go delete mode 100644 operators/pkg/controller/elasticsearch/version/zen1/podspecs_test.go diff --git a/operators/pkg/controller/apmserver/apmserver_controller_test.go b/operators/pkg/controller/apmserver/apmserver_controller_test.go index 736b1b231c..1264ed0e08 100644 --- a/operators/pkg/controller/apmserver/apmserver_controller_test.go +++ b/operators/pkg/controller/apmserver/apmserver_controller_test.go @@ -10,6 +10,7 @@ import ( apmv1alpha1 "github.com/elastic/cloud-on-k8s/operators/pkg/apis/apm/v1alpha1" "github.com/elastic/cloud-on-k8s/operators/pkg/controller/common/certificates" "github.com/elastic/cloud-on-k8s/operators/pkg/controller/common/certificates/http" + "github.com/elastic/cloud-on-k8s/operators/pkg/controller/common/defaults" "github.com/elastic/cloud-on-k8s/operators/pkg/controller/common/keystore" "github.com/elastic/cloud-on-k8s/operators/pkg/controller/common/watches" "github.com/elastic/cloud-on-k8s/operators/pkg/utils/k8s" @@ -69,25 +70,7 @@ func (tp testParams) withInitContainer() testParams { }, Name: "", Image: "docker.elastic.co/apm/apm-server:1.0", - Env: []corev1.EnvVar{{ - Name: "POD_NAME", - ValueFrom: &corev1.EnvVarSource{ - FieldRef: &corev1.ObjectFieldSelector{ - APIVersion: "v1", - FieldPath: "metadata.name", - }, - }, - }, - { - Name: "POD_IP", - ValueFrom: &corev1.EnvVarSource{ - FieldRef: &corev1.ObjectFieldSelector{ - APIVersion: "v1", - FieldPath: "status.podIP", - }, - }, - }, - }, + Env: defaults.PodDownwardEnvVars, }, } return tp @@ -137,7 +120,6 @@ func expectedDeploymentParams() testParams { }, Containers: []corev1.Container{{ VolumeMounts: []corev1.VolumeMount{ - { Name: "config", ReadOnly: true, @@ -163,27 +145,17 @@ func expectedDeploymentParams() testParams { "-c", "config/config-secret/apm-server.yml", }, - Env: []corev1.EnvVar{{ - Name: "POD_NAME", + Env: append(defaults.PodDownwardEnvVars, corev1.EnvVar{ + Name: "SECRET_TOKEN", ValueFrom: &corev1.EnvVarSource{ - FieldRef: &corev1.ObjectFieldSelector{ - APIVersion: "v1", - FieldPath: "metadata.name", - }, - }, - }, - { - Name: "SECRET_TOKEN", - ValueFrom: &corev1.EnvVarSource{ - SecretKeyRef: &corev1.SecretKeySelector{ - LocalObjectReference: corev1.LocalObjectReference{ - Name: "test-apm-server-apm-token", - }, - Key: "secret-token", + SecretKeyRef: &corev1.SecretKeySelector{ + LocalObjectReference: corev1.LocalObjectReference{ + Name: "test-apm-server-apm-token", }, + Key: "secret-token", }, }, - }, + }), Ports: []corev1.ContainerPort{ {Name: "http", ContainerPort: int32(8200), Protocol: corev1.ProtocolTCP}, }, diff --git a/operators/pkg/controller/apmserver/pod.go b/operators/pkg/controller/apmserver/pod.go index 7f483fb904..d2f08eb0ff 100644 --- a/operators/pkg/controller/apmserver/pod.go +++ b/operators/pkg/controller/apmserver/pod.go @@ -87,23 +87,15 @@ func newPodSpec(as *v1alpha1.ApmServer, p PodSpecParams) corev1.PodTemplateSpec filepath.Join(ConfigVolumePath, "config-secret"), ) - env := []corev1.EnvVar{ - { - Name: "POD_NAME", - ValueFrom: &corev1.EnvVarSource{ - FieldRef: &corev1.ObjectFieldSelector{APIVersion: "v1", FieldPath: "metadata.name"}, + env := append(defaults.PodDownwardEnvVars, corev1.EnvVar{ + Name: "SECRET_TOKEN", + ValueFrom: &corev1.EnvVarSource{ + SecretKeyRef: &corev1.SecretKeySelector{ + LocalObjectReference: corev1.LocalObjectReference{Name: p.ApmServerSecret.Name}, + Key: SecretTokenKey, }, }, - { - Name: "SECRET_TOKEN", - ValueFrom: &corev1.EnvVarSource{ - SecretKeyRef: &corev1.SecretKeySelector{ - LocalObjectReference: corev1.LocalObjectReference{Name: p.ApmServerSecret.Name}, - Key: SecretTokenKey, - }, - }, - }, - } + }) builder := defaults.NewPodTemplateBuilder( p.PodTemplate, v1alpha1.APMServerContainerName). diff --git a/operators/pkg/controller/apmserver/pod_test.go b/operators/pkg/controller/apmserver/pod_test.go index 875e2660c3..4a38957a78 100644 --- a/operators/pkg/controller/apmserver/pod_test.go +++ b/operators/pkg/controller/apmserver/pod_test.go @@ -10,6 +10,8 @@ import ( "github.com/elastic/cloud-on-k8s/operators/pkg/apis/apm/v1alpha1" "github.com/elastic/cloud-on-k8s/operators/pkg/controller/common/volume" + "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/settings" + corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" ) @@ -64,6 +66,12 @@ func TestNewPodSpec(t *testing.T) { Name: v1alpha1.APMServerContainerName, Image: imageWithVersion(defaultImageRepositoryAndName, "7.0.1"), Env: []corev1.EnvVar{ + { + Name: settings.EnvPodIP, + ValueFrom: &corev1.EnvVarSource{ + FieldRef: &corev1.ObjectFieldSelector{APIVersion: "v1", FieldPath: "status.podIP"}, + }, + }, { Name: "POD_NAME", ValueFrom: &corev1.EnvVarSource{ diff --git a/operators/pkg/controller/common/defaults/pod_template.go b/operators/pkg/controller/common/defaults/pod_template.go index f5cfc36415..9d1e673b3d 100644 --- a/operators/pkg/controller/common/defaults/pod_template.go +++ b/operators/pkg/controller/common/defaults/pod_template.go @@ -7,8 +7,21 @@ package defaults import ( "sort" - "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/env" corev1 "k8s.io/api/core/v1" + + "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/settings" +) + +var ( + // PodDownwardEnvVars inject the runtime Pod Name and IP as environment variables. + PodDownwardEnvVars = []corev1.EnvVar{ + {Name: settings.EnvPodIP, Value: "", ValueFrom: &corev1.EnvVarSource{ + FieldRef: &corev1.ObjectFieldSelector{APIVersion: "v1", FieldPath: "status.podIP"}, + }}, + {Name: settings.EnvPodName, Value: "", ValueFrom: &corev1.EnvVarSource{ + FieldRef: &corev1.ObjectFieldSelector{APIVersion: "v1", FieldPath: "metadata.name"}, + }}, + } ) // PodTemplateBuilder helps with building a pod template inheriting values @@ -184,7 +197,7 @@ func (b *PodTemplateBuilder) envExists(name string) bool { return false } -// WithEnv appends the given en vars to the Container, unless already provided in the template. +// WithEnv appends the given env vars to the Container, unless already provided in the template. func (b *PodTemplateBuilder) WithEnv(vars ...corev1.EnvVar) *PodTemplateBuilder { for _, v := range vars { if !b.envExists(v.Name) { @@ -246,7 +259,7 @@ func (b *PodTemplateBuilder) WithInitContainerDefaults() *PodTemplateBuilder { } // append the dynamic pod name and IP env vars - c.Env = append(c.Env, env.DynamicPodEnvVars...) + c.Env = append(c.Env, PodDownwardEnvVars...) } return b } diff --git a/operators/pkg/controller/common/defaults/pod_template_test.go b/operators/pkg/controller/common/defaults/pod_template_test.go index afe4468552..936ddc95db 100644 --- a/operators/pkg/controller/common/defaults/pod_template_test.go +++ b/operators/pkg/controller/common/defaults/pod_template_test.go @@ -8,7 +8,6 @@ import ( "reflect" "testing" - "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/env" "github.com/stretchr/testify/require" corev1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/api/resource" @@ -755,13 +754,13 @@ func TestPodTemplateBuilder_WithInitContainerDefaults(t *testing.T) { { Name: "user-init-container1", Image: "user-image", - Env: env.DynamicPodEnvVars, + Env: PodDownwardEnvVars, VolumeMounts: defaultVolumeMounts, }, { Name: "user-init-container2", Image: "default-image", - Env: env.DynamicPodEnvVars, + Env: PodDownwardEnvVars, VolumeMounts: []corev1.VolumeMount{{ Name: "foo", MountPath: "/foo", @@ -771,7 +770,7 @@ func TestPodTemplateBuilder_WithInitContainerDefaults(t *testing.T) { { Name: "user-init-container3", Image: "default-image", - Env: env.DynamicPodEnvVars, + Env: PodDownwardEnvVars, // uses the same mount path as a default mount, so default mount should not be used VolumeMounts: []corev1.VolumeMount{{ Name: "bar", @@ -781,7 +780,7 @@ func TestPodTemplateBuilder_WithInitContainerDefaults(t *testing.T) { { Name: "user-init-container4", Image: "default-image", - Env: env.DynamicPodEnvVars, + Env: PodDownwardEnvVars, // uses the same name as a default mount, so default mount should not be used VolumeMounts: []corev1.VolumeMount{{ Name: defaultVolumeMount.Name, diff --git a/operators/pkg/controller/elasticsearch/driver/generation.go b/operators/pkg/controller/common/reconciler/expectations.go similarity index 93% rename from operators/pkg/controller/elasticsearch/driver/generation.go rename to operators/pkg/controller/common/reconciler/expectations.go index 5bedbb063e..b92b4e9927 100644 --- a/operators/pkg/controller/elasticsearch/driver/generation.go +++ b/operators/pkg/controller/common/reconciler/expectations.go @@ -2,7 +2,7 @@ // or more contributor license agreements. Licensed under the Elastic License; // you may not use this file except in compliance with the Elastic License. -package driver +package reconciler import ( metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" @@ -14,7 +14,7 @@ type Expectations struct { generations map[types.UID]int64 } -func NewGenerationExpectations() *Expectations { +func NewExpectations() *Expectations { return &Expectations{ generations: make(map[types.UID]int64), } diff --git a/operators/pkg/controller/common/reconciler/expectations_test.go b/operators/pkg/controller/common/reconciler/expectations_test.go new file mode 100644 index 0000000000..52d687414c --- /dev/null +++ b/operators/pkg/controller/common/reconciler/expectations_test.go @@ -0,0 +1,35 @@ +// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +// or more contributor license agreements. Licensed under the Elastic License; +// you may not use this file except in compliance with the Elastic License. + +package reconciler + +import ( + "testing" + + "github.com/stretchr/testify/require" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/types" +) + +func TestExpectations(t *testing.T) { + expectations := NewExpectations() + // check expectations that were not set + obj := metav1.ObjectMeta{ + UID: types.UID("abc"), + Name: "name", + Namespace: "namespace", + Generation: 2, + } + require.True(t, expectations.GenerationExpected(obj)) + // set expectations + expectations.ExpectGeneration(obj) + // check expectations are met for this object + require.True(t, expectations.GenerationExpected(obj)) + // but not for the same object with a smaller generation + obj.Generation = 1 + require.False(t, expectations.GenerationExpected(obj)) + // a different object (different UID) should have expectations met + obj.UID = types.UID("another") + require.True(t, expectations.GenerationExpected(obj)) +} diff --git a/operators/pkg/controller/elasticsearch/certificates/transport/reconcile.go b/operators/pkg/controller/elasticsearch/certificates/transport/reconcile.go index a0a6a4d31d..2130a156b1 100644 --- a/operators/pkg/controller/elasticsearch/certificates/transport/reconcile.go +++ b/operators/pkg/controller/elasticsearch/certificates/transport/reconcile.go @@ -15,7 +15,6 @@ import ( "github.com/elastic/cloud-on-k8s/operators/pkg/controller/common/reconciler" "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/label" "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/name" - "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/pod" "github.com/elastic/cloud-on-k8s/operators/pkg/utils/k8s" corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" @@ -68,7 +67,7 @@ func ReconcileTransportCertificatesSecrets( } // remove certificates and keys for deleted pods - podsByName := pod.PodsByName(pods.Items) + podsByName := k8s.PodsByName(pods.Items) keysToPrune := make([]string, 0) for secretDataKey := range secret.Data { if secretDataKey == certificates.CAFileName { diff --git a/operators/pkg/controller/elasticsearch/configmap/configmap.go b/operators/pkg/controller/elasticsearch/configmap/configmap.go index a3520662cb..afea378314 100644 --- a/operators/pkg/controller/elasticsearch/configmap/configmap.go +++ b/operators/pkg/controller/elasticsearch/configmap/configmap.go @@ -5,9 +5,15 @@ package configmap import ( + "github.com/elastic/cloud-on-k8s/operators/pkg/apis/elasticsearch/v1alpha1" + "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/initcontainer" "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/label" + "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/name" + "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/nodespec" + "github.com/elastic/cloud-on-k8s/operators/pkg/utils/k8s" corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/runtime" "k8s.io/apimachinery/pkg/types" ) @@ -22,3 +28,22 @@ func NewConfigMapWithData(es types.NamespacedName, data map[string]string) corev Data: data, } } + +// ReconcileScriptsConfigMap reconciles a configmap containing scripts used by +// init containers and readiness probe. +func ReconcileScriptsConfigMap(c k8s.Client, scheme *runtime.Scheme, es v1alpha1.Elasticsearch) error { + fsScript, err := initcontainer.RenderPrepareFsScript() + if err != nil { + return err + } + + scriptsConfigMap := NewConfigMapWithData( + types.NamespacedName{Namespace: es.Namespace, Name: name.ScriptsConfigMap(es.Name)}, + map[string]string{ + nodespec.ReadinessProbeScriptConfigKey: nodespec.ReadinessProbeScript, + initcontainer.PrepareFsScriptConfigKey: fsScript, + }, + ) + + return ReconcileConfigMap(c, scheme, es, scriptsConfigMap) +} diff --git a/operators/pkg/controller/elasticsearch/driver/default.go b/operators/pkg/controller/elasticsearch/driver/default.go deleted file mode 100644 index 2822b65a28..0000000000 --- a/operators/pkg/controller/elasticsearch/driver/default.go +++ /dev/null @@ -1,420 +0,0 @@ -// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one -// or more contributor license agreements. Licensed under the Elastic License; -// you may not use this file except in compliance with the Elastic License. - -package driver - -import ( - "crypto/x509" - "fmt" - - corev1 "k8s.io/api/core/v1" - "k8s.io/apimachinery/pkg/runtime" - "k8s.io/apimachinery/pkg/types" - controller "sigs.k8s.io/controller-runtime/pkg/reconcile" - - "github.com/elastic/cloud-on-k8s/operators/pkg/controller/common/keystore" - - "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/version/zen2" - esvolume "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/volume" - - "github.com/elastic/cloud-on-k8s/operators/pkg/apis/elasticsearch/v1alpha1" - "github.com/elastic/cloud-on-k8s/operators/pkg/controller/common" - "github.com/elastic/cloud-on-k8s/operators/pkg/controller/common/events" - "github.com/elastic/cloud-on-k8s/operators/pkg/controller/common/reconciler" - "github.com/elastic/cloud-on-k8s/operators/pkg/controller/common/version" - "github.com/elastic/cloud-on-k8s/operators/pkg/controller/common/volume" - "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/certificates" - "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/cleanup" - esclient "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/client" - "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/configmap" - "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/initcontainer" - "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/license" - "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/name" - "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/network" - "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/nodespec" - "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/observer" - "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/pdb" - "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/pod" - "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/reconcile" - "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/services" - "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/settings" - "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/sset" - "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/user" - esversion "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/version" - "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/version/zen1" - "github.com/elastic/cloud-on-k8s/operators/pkg/utils/k8s" -) - -// initContainerParams is used to generate the init container that will load the secure settings into a keystore -var initContainerParams = keystore.InitContainerParameters{ - KeystoreCreateCommand: "/usr/share/elasticsearch/bin/elasticsearch-keystore create", - KeystoreAddCommand: `/usr/share/elasticsearch/bin/elasticsearch-keystore add-file "$key" "$filename"`, - SecureSettingsVolumeMountPath: keystore.SecureSettingsVolumeMountPath, - DataVolumePath: esvolume.ElasticsearchDataMountPath, -} - -// defaultDriver is the default Driver implementation -type defaultDriver struct { - // Options are the options that the driver was created with. - Options - - expectations *Expectations - - // supportedVersions verifies whether we can support upgrading from the current pods. - supportedVersions esversion.LowestHighestSupportedVersions - - // usersReconciler reconciles external and internal users and returns the current internal users. - usersReconciler func( - c k8s.Client, - scheme *runtime.Scheme, - es v1alpha1.Elasticsearch, - ) (*user.InternalUsers, error) - - // expectedPodsAndResourcesResolver returns a list of pod specs with context that we would expect to find in the - // Elasticsearch cluster. - // - // paramsTmpl argument is a partially filled NewPodSpecParams (TODO: refactor into its own params struct) - //expectedPodsAndResourcesResolver func( - // es v1alpha1.Elasticsearch, - // paramsTmpl pod.NewPodSpecParams, - //) ([]pod.PodSpecContext, error) - - // observedStateResolver resolves the currently observed state of Elasticsearch from the ES API - observedStateResolver func(clusterName types.NamespacedName, esClient esclient.Client) observer.State - - // resourcesStateResolver resolves the current state of the K8s resources from the K8s API - resourcesStateResolver func( - c k8s.Client, - es v1alpha1.Elasticsearch, - ) (*reconcile.ResourcesState, error) - - // TODO: implement - // // apiObjectsGarbageCollector garbage collects API objects for older versions once they are no longer needed. - // apiObjectsGarbageCollector func( - // c k8s.Client, - // es v1alpha1.Elasticsearch, - // version version.Version, - // state mutation.PodsState, - // ) (reconcile.Result, error) // could get away with one impl -} - -// Reconcile fulfills the Driver interface and reconciles the cluster resources. -func (d *defaultDriver) Reconcile( - es v1alpha1.Elasticsearch, - reconcileState *reconcile.State, -) *reconciler.Results { - results := &reconciler.Results{} - - // garbage collect secrets attached to this cluster that we don't need anymore - if err := cleanup.DeleteOrphanedSecrets(d.Client, es); err != nil { - return results.WithError(err) - } - - if err := reconcileScriptsConfigMap(d.Client, d.Scheme, es); err != nil { - return results.WithError(err) - } - - genericResources, res := reconcileGenericResources( - d.Client, - d.Scheme, - es, - ) - if results.WithResults(res).HasError() { - return results - } - - certificateResources, res := certificates.Reconcile( - d.Client, - d.Scheme, - d.DynamicWatches, - es, - []corev1.Service{genericResources.ExternalService}, - d.Parameters.CACertRotation, - d.Parameters.CertRotation, - ) - if results.WithResults(res).HasError() { - return results - } - - internalUsers, err := d.usersReconciler(d.Client, d.Scheme, es) - if err != nil { - return results.WithError(err) - } - - resourcesState, err := d.resourcesStateResolver(d.Client, es) - if err != nil { - return results.WithError(err) - } - min, err := esversion.MinVersion(resourcesState.CurrentPods.Pods()) - if err != nil { - return results.WithError(err) - } - if min == nil { - min = &d.Version - } - - warnUnsupportedDistro(resourcesState.AllPods, reconcileState.Recorder) - - observedState := d.observedStateResolver( - k8s.ExtractNamespacedName(&es), - d.newElasticsearchClient( - genericResources.ExternalService, - internalUsers.ControllerUser, - *min, - certificateResources.TrustedHTTPCertificates, - )) - - // always update the elasticsearch state bits - if observedState.ClusterState != nil && observedState.ClusterHealth != nil { - reconcileState.UpdateElasticsearchState(*resourcesState, observedState) - } - - if err := pdb.Reconcile(d.Client, d.Scheme, es); err != nil { - return results.WithError(err) - } - - if err := d.supportedVersions.VerifySupportsExistingPods(resourcesState.CurrentPods.Pods()); err != nil { - return results.WithError(err) - } - - // TODO: support user-supplied certificate (non-ca) - esClient := d.newElasticsearchClient( - genericResources.ExternalService, - internalUsers.ControllerUser, - *min, - certificateResources.TrustedHTTPCertificates, - ) - defer esClient.Close() - - esReachable, err := services.IsServiceReady(d.Client, genericResources.ExternalService) - if err != nil { - return results.WithError(err) - } - - results.Apply( - "reconcile-cluster-license", - func() (controller.Result, error) { - err := license.Reconcile( - d.Client, - es, - esClient, - observedState.ClusterLicense, - ) - if err != nil && esReachable { - reconcileState.AddEvent( - corev1.EventTypeWarning, - events.EventReasonUnexpected, - fmt.Sprintf("Could not update cluster license: %s", err.Error()), - ) - return defaultRequeue, err - } - return controller.Result{}, err - }, - ) - - // Compute seed hosts based on current masters with a podIP - if err := settings.UpdateSeedHostsConfigMap(d.Client, d.Scheme, es, resourcesState.AllPods); err != nil { - return results.WithError(err) - } - - // setup a keystore with secure settings in an init container, if specified by the user - keystoreResources, err := keystore.NewResources( - d.Client, - d.Recorder, - d.DynamicWatches, - &es, - initContainerParams, - ) - if err != nil { - return results.WithError(err) - } - - // TODO: this is a mess, refactor and unit test correctly - podTemplateSpecBuilder := func(nodeSpec v1alpha1.NodeSpec, cfg settings.CanonicalConfig) (corev1.PodTemplateSpec, error) { - return esversion.BuildPodTemplateSpec( - es, - nodeSpec, - pod.NewPodSpecParams{ - ProbeUser: internalUsers.ProbeUser.Auth(), - UnicastHostsVolume: volume.NewConfigMapVolume( - name.UnicastHostsConfigMap(es.Name), esvolume.UnicastHostsVolumeName, esvolume.UnicastHostsVolumeMountPath, - ), - UsersSecretVolume: volume.NewSecretVolumeWithMountPath( - user.XPackFileRealmSecretName(es.Name), - esvolume.XPackFileRealmVolumeName, - esvolume.XPackFileRealmVolumeMountPath, - ), - KeystoreResources: keystoreResources, - }, - cfg, - zen1.NewEnvironmentVars, - initcontainer.NewInitContainers, - ) - } - - res = d.reconcileNodeSpecs(es, esReachable, podTemplateSpecBuilder, esClient, reconcileState, observedState, *resourcesState) - if results.WithResults(res).HasError() { - return results - } - - reconcileState.UpdateElasticsearchState(*resourcesState, observedState) - - return results -} - -func (d *defaultDriver) reconcileNodeSpecs( - es v1alpha1.Elasticsearch, - esReachable bool, - podSpecBuilder esversion.PodTemplateSpecBuilder, - esClient esclient.Client, - reconcileState *reconcile.State, - observedState observer.State, - resourcesState reconcile.ResourcesState, -) *reconciler.Results { - results := &reconciler.Results{} - - actualStatefulSets, err := sset.RetrieveActualStatefulSets(d.Client, k8s.ExtractNamespacedName(&es)) - if err != nil { - return results.WithError(err) - } - - if !d.expectations.GenerationExpected(actualStatefulSets.ObjectMetas()...) { - // Our cache of StatefulSets is out of date compared to previous reconciliation operations. - // This will probably lead to conflicting sset updates (which is ok), but also to - // conflicting ES calls (set/reset zen1/zen2/allocation excludes, etc.), which may not be ok. - log.V(1).Info("StatefulSet cache out-of-date, re-queueing", "namespace", es.Namespace, "es_name", es.Name) - return results.WithResult(defaultRequeue) - } - - nodeSpecResources, err := nodespec.BuildExpectedResources(es, podSpecBuilder) - if err != nil { - return results.WithError(err) - } - - // TODO: there is a split brain possibility here if going from 1 to 3 masters or 3 to 7. - // See https://github.com/elastic/cloud-on-k8s/issues/1281. - - // patch configs to consider zen1 minimum master nodes - if err := zen1.SetupMinimumMasterNodesConfig(nodeSpecResources); err != nil { - return results.WithError(err) - } - // patch configs to consider zen2 initial master nodes - if err := zen2.SetupInitialMasterNodes(es, observedState, d.Client, nodeSpecResources); err != nil { - return results.WithError(err) - } - - // Phase 1: apply expected StatefulSets resources, but don't scale down. - // The goal is to: - // 1. scale sset up (eg. go from 3 to 5 replicas). - // 2. apply configuration changes on the sset resource, to be used for future pods creation/recreation, - // but do not rotate pods yet. - // 3. do **not** apply replicas scale down, otherwise nodes would be deleted before - // we handle a clean deletion. - for _, nodeSpecRes := range nodeSpecResources { - // always reconcile config (will apply to new & recreated pods) - if err := settings.ReconcileConfig(d.Client, es, nodeSpecRes.StatefulSet.Name, nodeSpecRes.Config); err != nil { - return results.WithError(err) - } - if _, err := common.ReconcileService(d.Client, d.Scheme, &nodeSpecRes.HeadlessService, &es); err != nil { - return results.WithError(err) - } - ssetToApply := *nodeSpecRes.StatefulSet.DeepCopy() - actual, exists := actualStatefulSets.GetByName(ssetToApply.Name) - if exists && sset.Replicas(ssetToApply) < sset.Replicas(actual) { - // sset needs to be scaled down - // update the sset to use the new spec but don't scale replicas down for now - ssetToApply.Spec.Replicas = actual.Spec.Replicas - } - if err := sset.ReconcileStatefulSet(d.Client, d.Scheme, es, ssetToApply); err != nil { - return results.WithError(err) - } - } - - if !esReachable { - // Cannot perform next operations if we cannot request Elasticsearch. - log.Info("ES external service not ready yet for further reconciliation, re-queuing.", "namespace", es.Namespace, "es_name", es.Name) - reconcileState.UpdateElasticsearchPending(resourcesState.CurrentPods.Pods()) - return results.WithResult(defaultRequeue) - } - - // Update Zen1 minimum master nodes through the API, corresponding to the current nodes we have. - requeue, err := zen1.UpdateMinimumMasterNodes(d.Client, es, esClient, actualStatefulSets, reconcileState) - if err != nil { - return results.WithError(err) - } - if requeue { - results.WithResult(defaultRequeue) - } - // Maybe clear zen2 voting config exclusions. - requeue, err = zen2.ClearVotingConfigExclusions(es, d.Client, esClient, actualStatefulSets) - if err != nil { - return results.WithError(err) - } - if requeue { - results.WithResult(defaultRequeue) - } - - // Phase 2: handle sset scale down. - // We want to safely remove nodes from the cluster, either because the sset requires less replicas, - // or because it should be removed entirely. - downscaleRes := d.HandleDownscale(es, nodeSpecResources.StatefulSets(), actualStatefulSets, esClient, observedState, reconcileState) - results.WithResults(downscaleRes) - if downscaleRes.HasError() { - return results - } - - // Phase 3: handle rolling upgrades. - // Control nodes restart (upgrade) by manually decrementing rollingUpdate.Partition. - rollingUpgradesRes := d.handleRollingUpgrades(es, esClient, actualStatefulSets) - results.WithResults(rollingUpgradesRes) - if rollingUpgradesRes.HasError() { - return results - } - - // TODO: - // - change budget - // - grow and shrink - return results -} - -// newElasticsearchClient creates a new Elasticsearch HTTP client for this cluster using the provided user -func (d *defaultDriver) newElasticsearchClient(service corev1.Service, user user.User, v version.Version, caCerts []*x509.Certificate) esclient.Client { - url := fmt.Sprintf("https://%s.%s.svc:%d", service.Name, service.Namespace, network.HTTPPort) - return esclient.NewElasticsearchClient(d.Dialer, url, user.Auth(), v, caCerts) -} - -func reconcileScriptsConfigMap(c k8s.Client, scheme *runtime.Scheme, es v1alpha1.Elasticsearch) error { - fsScript, err := initcontainer.RenderPrepareFsScript() - if err != nil { - return err - } - - scriptsConfigMap := configmap.NewConfigMapWithData( - types.NamespacedName{Namespace: es.Namespace, Name: name.ScriptsConfigMap(es.Name)}, - map[string]string{ - pod.ReadinessProbeScriptConfigKey: pod.ReadinessProbeScript, - initcontainer.PrepareFsScriptConfigKey: fsScript, - }) - - if err := configmap.ReconcileConfigMap(c, scheme, es, scriptsConfigMap); err != nil { - return err - } - - return nil -} - -// warnUnsupportedDistro sends an event of type warning if the Elasticsearch Docker image is not a supported -// distribution by looking at if the prepare fs init container terminated with the UnsupportedDistro exit code. -func warnUnsupportedDistro(pods []corev1.Pod, recorder *events.Recorder) { - for _, p := range pods { - for _, s := range p.Status.InitContainerStatuses { - state := s.LastTerminationState.Terminated - if s.Name == initcontainer.PrepareFilesystemContainerName && - state != nil && state.ExitCode == initcontainer.UnsupportedDistroExitCode { - recorder.AddEvent(corev1.EventTypeWarning, events.EventReasonUnexpected, - "Unsupported distribution") - } - } - } -} diff --git a/operators/pkg/controller/elasticsearch/driver/default_test.go b/operators/pkg/controller/elasticsearch/driver/default_test.go deleted file mode 100644 index b8ee928c56..0000000000 --- a/operators/pkg/controller/elasticsearch/driver/default_test.go +++ /dev/null @@ -1,326 +0,0 @@ -// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one -// or more contributor license agreements. Licensed under the Elastic License; -// you may not use this file except in compliance with the Elastic License. - -package driver - -// -//const ( -// ClusterStateSample = ` -//{ -// "cluster_name": "elasticsearch-sample", -// "compressed_size_in_bytes": 10281, -// "cluster_uuid": "fW1CurdKQpa-vsEYgTwkvg", -// "version": 28, -// "state_uuid": "0_7Tkm3ERdeB5eOqEgdOcA", -// "master_node": "EizpW8QWRty_T1nJpr-dNQ", -// "nodes": { -// "EizpW8QWRty_T1nJpr-dNQ": { -// "name": "elasticsearch-sample-es-fnsgkkdl85", -// "ephemeral_id": "hd8VlWVdTlyCriXKDW-5kg", -// "transport_address": "172.17.0.10:9300", -// "attributes": { -// "xpack.installed": "true" -// } -// }, -// "NRqCLTmhTLuSxzlWcTae3A": { -// "name": "elasticsearch-sample-es-79gc6p57rs", -// "ephemeral_id": "VHAy3TOxTby3fNaPpMgfkg", -// "transport_address": "172.17.0.9:9300", -// "attributes": { -// "xpack.installed": "true" -// } -// }, -// "q--ANfDnTKW2WS9pEBuLWQ": { -// "name": "elasticsearch-sample-es-jfpqbt2s4q", -// "ephemeral_id": "USglep8YTW-4vZ9M7PyRqA", -// "transport_address": "172.17.0.7:9300", -// "attributes": { -// "xpack.installed": "true" -// } -// } -// }, -// "routing_table": { -// "indices": { -// "shakespeare": { -// "shards": { -// "0": [ -// { -// "state": "STARTED", -// "primary": true, -// "node": "q--ANfDnTKW2WS9pEBuLWQ", -// "relocating_node": null, -// "shard": 0, -// "index": "shakespeare", -// "allocation_id": { -// "id": "TtAx_PMwRCmanPR7XddWmg" -// } -// }, -// { -// "state": "STARTED", -// "primary": false, -// "node": "EizpW8QWRty_T1nJpr-dNQ", -// "relocating_node": null, -// "shard": 0, -// "index": "shakespeare", -// "allocation_id": { -// "id": "QddiDZTHTuStDTIKSOIk5A" -// } -// } -// ], -// "1": [ -// { -// "state": "STARTED", -// "primary": true, -// "node": "NRqCLTmhTLuSxzlWcTae3A", -// "relocating_node": null, -// "shard": 1, -// "index": "shakespeare", -// "allocation_id": { -// "id": "IzFuExmARziQWcX8RlaZdg" -// } -// }, -// { -// "state": "STARTED", -// "primary": false, -// "node": "EizpW8QWRty_T1nJpr-dNQ", -// "relocating_node": null, -// "shard": 1, -// "index": "shakespeare", -// "allocation_id": { -// "id": "XqIv4y1rQf6aL5C63Xsbhg" -// } -// } -// ], -// "2": [ -// { -// "state": "STARTED", -// "primary": false, -// "node": "q--ANfDnTKW2WS9pEBuLWQ", -// "relocating_node": null, -// "shard": 2, -// "index": "shakespeare", -// "allocation_id": { -// "id": "XCAywOULRf66CR2xugkIpg" -// } -// }, -// { -// "state": "STARTED", -// "primary": true, -// "node": "EizpW8QWRty_T1nJpr-dNQ", -// "relocating_node": null, -// "shard": 2, -// "index": "shakespeare", -// "allocation_id": { -// "id": "yNuj-Rw7QkC74opnoRQIqQ" -// } -// } -// ], -// "3": [ -// { -// "state": "STARTED", -// "primary": true, -// "node": "q--ANfDnTKW2WS9pEBuLWQ", -// "relocating_node": null, -// "shard": 3, -// "index": "shakespeare", -// "allocation_id": { -// "id": "foOkK0oWTAaFTg-M41sMgQ" -// } -// }, -// { -// "state": "STARTED", -// "primary": false, -// "node": "NRqCLTmhTLuSxzlWcTae3A", -// "relocating_node": null, -// "shard": 3, -// "index": "shakespeare", -// "allocation_id": { -// "id": "MdjjvB9KTfu4gs_skXDyXg" -// } -// } -// ], -// "4": [ -// { -// "state": "STARTED", -// "primary": false, -// "node": "q--ANfDnTKW2WS9pEBuLWQ", -// "relocating_node": null, -// "shard": 4, -// "index": "shakespeare", -// "allocation_id": { -// "id": "exBumbxRT6KY7LVmGOSIZA" -// } -// }, -// { -// "state": "STARTED", -// "primary": true, -// "node": "NRqCLTmhTLuSxzlWcTae3A", -// "relocating_node": null, -// "shard": 4, -// "index": "shakespeare", -// "allocation_id": { -// "id": "pUhEb1k5TC24EKD-OjS7Iw" -// } -// } -// ] -// } -// } -// } -// } -//} -//` -//) -// -//func newPod(name, namespace string) corev1.Pod { -// pod := corev1.Pod{ -// ObjectMeta: metav1.ObjectMeta{ -// Name: name, -// Namespace: namespace, -// }, -// } -// return pod -//} - -// -//func Test_defaultDriver_attemptPodsDeletion(t *testing.T) { -// var clusterState esclient.ClusterState -// b := []byte(ClusterStateSample) -// err := json.Unmarshal(b, &clusterState) -// if err != nil { -// t.Error(err) -// } -// pod1 := newPod("elasticsearch-sample-es-79gc6p57rs", "default") -// pod2 := newPod("elasticsearch-sample-es-fnsgkkdl85", "default") -// pod3 := newPod("elasticsearch-sample-es-jfpqbt2s4q", "default") -// pod4 := newPod("elasticsearch-sample-es-nope", "default") -// -// expectedResult1 := reconciler.Results{} -// expectedResult1.WithResult(defaultRequeue).WithResult(defaultRequeue) -// -// expectedEmptyResult := reconciler.Results{} -// expectedEmptyResult.WithResult(k8sreconcile.Result{}) -// -// elasticsearch := v1alpha1.Elasticsearch{ -// ObjectMeta: metav1.ObjectMeta{ -// Namespace: "default", -// Name: "elasticsearch-sample", -// }, -// } -// -// type fields struct { -// Options Options -// } -// -// type args struct { -// ToDelete *mutation.PerformableChanges -// reconcileState *reconcile.State -// resourcesState *reconcile.ResourcesState -// observedState observer.State -// results *reconciler.Results -// esClient esclient.Client -// elasticsearch v1alpha1.Elasticsearch -// } -// -// type want struct { -// results *reconciler.Results -// fulfilledExpectation bool -// } -// -// tests := []struct { -// name string -// fields fields -// args args -// wantErr bool -// want want -// }{ -// { -// name: "Do not delete a pod with migrating data", -// args: args{ -// elasticsearch: elasticsearch, -// ToDelete: &mutation.PerformableChanges{ -// Changes: mutation.Changes{ -// ToDelete: pod.PodsWithConfig{ -// pod.PodWithConfig{Pod: pod1}, -// pod.PodWithConfig{Pod: pod2}, -// }, -// }, -// }, -// resourcesState: &reconcile.ResourcesState{ -// CurrentPods: pod.PodsWithConfig{ -// {Pod: pod1}, -// {Pod: pod2}, -// {Pod: pod3}, -// }, -// }, -// observedState: observer.State{ -// ClusterState: &clusterState, -// }, -// reconcileState: &reconcile.State{Recorder: events.NewRecorder()}, -// results: &reconciler.Results{}, -// }, -// fields: fields{ -// Options: Options{ -// PodsExpectations: reconciler.NewExpectations(), -// }, -// }, -// wantErr: false, -// want: want{ -// results: &expectedResult1, -// fulfilledExpectation: true, // pod deletion is delayed, do not expect anything -// }, -// }, -// { -// name: "Delete a pod with no data", -// args: args{ -// elasticsearch: elasticsearch, -// ToDelete: &mutation.PerformableChanges{ -// Changes: mutation.Changes{ -// ToDelete: pod.PodsWithConfig{ -// pod.PodWithConfig{Pod: pod4}, -// }, -// }, -// }, -// resourcesState: &reconcile.ResourcesState{ -// CurrentPods: pod.PodsWithConfig{ -// {Pod: pod1}, -// {Pod: pod2}, -// {Pod: pod3}, -// {Pod: pod4}, -// }, -// }, -// observedState: observer.State{ -// ClusterState: &clusterState, -// }, -// reconcileState: &reconcile.State{Recorder: events.NewRecorder()}, -// results: &reconciler.Results{}, -// }, -// fields: fields{ -// Options: Options{ -// PodsExpectations: reconciler.NewExpectations(), -// Client: k8s.WrapClient(fake.NewFakeClient()), -// }, -// }, -// wantErr: false, -// want: want{ -// results: &expectedEmptyResult, -// fulfilledExpectation: false, // pod4 is expected to be deleted -// }, -// }, -// } -// for _, tt := range tests { -// t.Run(tt.name, func(t *testing.T) { -// d := &defaultDriver{ -// Options: tt.fields.Options, -// } -// if err := d.attemptPodsDeletion( -// tt.args.ToDelete, tt.args.reconcileState, tt.args.resourcesState, -// tt.args.observedState, tt.args.results, tt.args.esClient, tt.args.elasticsearch); (err != nil) != tt.wantErr { -// t.Errorf("defaultDriver.attemptPodsDeletion() error = %v, wantErr %v", err, tt.wantErr) -// } -// assert.EqualValues(t, tt.want.results, tt.args.results) -// nn := k8s.ExtractNamespacedName(&tt.args.elasticsearch) -// assert.EqualValues(t, tt.want.fulfilledExpectation, tt.fields.Options.PodsExpectations.Fulfilled(nn)) -// }) -// } -//} diff --git a/operators/pkg/controller/elasticsearch/driver/downscale.go b/operators/pkg/controller/elasticsearch/driver/downscale.go index 1a86700018..e78db93a81 100644 --- a/operators/pkg/controller/elasticsearch/driver/downscale.go +++ b/operators/pkg/controller/elasticsearch/driver/downscale.go @@ -5,7 +5,6 @@ package driver import ( - "github.com/elastic/cloud-on-k8s/operators/pkg/apis/elasticsearch/v1alpha1" "github.com/elastic/cloud-on-k8s/operators/pkg/controller/common/reconciler" esclient "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/client" "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/label" @@ -15,15 +14,14 @@ import ( "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/sset" "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/version/zen1" "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/version/zen2" - "github.com/elastic/cloud-on-k8s/operators/pkg/utils/k8s" appsv1 "k8s.io/api/apps/v1" ) func (d *defaultDriver) HandleDownscale( - es v1alpha1.Elasticsearch, expectedStatefulSets sset.StatefulSetList, actualStatefulSets sset.StatefulSetList, esClient esclient.Client, + resourcesState reconcile.ResourcesState, observedState observer.State, reconcileState *reconcile.State, ) *reconciler.Results { @@ -40,7 +38,7 @@ func (d *defaultDriver) HandleDownscale( if shouldExist { // sset downscale targetReplicas = sset.Replicas(expected) } - leaving, removalResult := d.scaleStatefulSetDown(es, actualStatefulSets, &actualStatefulSets[i], targetReplicas, esClient, observedState, reconcileState) + leaving, removalResult := d.scaleStatefulSetDown(actualStatefulSets, &actualStatefulSets[i], targetReplicas, esClient, resourcesState, observedState, reconcileState) results.WithResults(removalResult) if removalResult.HasError() { return results @@ -60,16 +58,16 @@ func (d *defaultDriver) HandleDownscale( // scaleStatefulSetDown scales the given StatefulSet down to targetReplicas, if possible. // It returns the names of the nodes that will leave the cluster. func (d *defaultDriver) scaleStatefulSetDown( - es v1alpha1.Elasticsearch, allStatefulSets sset.StatefulSetList, ssetToScaleDown *appsv1.StatefulSet, targetReplicas int32, esClient esclient.Client, + resourcesState reconcile.ResourcesState, observedState observer.State, reconcileState *reconcile.State, ) ([]string, *reconciler.Results) { results := &reconciler.Results{} - logger := log.WithValues("statefulset", k8s.ExtractNamespacedName(ssetToScaleDown)) + logger := log.WithValues("namespace", ssetToScaleDown.Namespace, "statefulset", ssetToScaleDown.Name) if sset.Replicas(*ssetToScaleDown) == 0 && targetReplicas == 0 { // no replicas expected, StatefulSet can be safely deleted @@ -96,6 +94,7 @@ func (d *defaultDriver) scaleStatefulSetDown( if migration.IsMigratingData(observedState, node, leavingNodes) { // data migration not over yet: schedule a requeue logger.V(1).Info("Data migration not over yet, skipping node deletion", "node", node) + reconcileState.UpdateElasticsearchMigrating(resourcesState, observedState) results.WithResult(defaultRequeue) // no need to check other nodes since we remove them in order and this one isn't ready anyway break @@ -106,12 +105,12 @@ func (d *defaultDriver) scaleStatefulSetDown( if updatedReplicas < initialReplicas { // trigger deletion of nodes whose data migration is over - logger.V(1).Info("Scaling replicas down", "from", initialReplicas, "to", updatedReplicas) + logger.Info("Scaling replicas down", "from", initialReplicas, "to", updatedReplicas) ssetToScaleDown.Spec.Replicas = &updatedReplicas if label.IsMasterNodeSet(*ssetToScaleDown) { // Update Zen1 minimum master nodes API, accounting for the updated downscaled replicas. - _, err := zen1.UpdateMinimumMasterNodes(d.Client, es, esClient, allStatefulSets, reconcileState) + _, err := zen1.UpdateMinimumMasterNodes(d.Client, d.ES, esClient, allStatefulSets, reconcileState) if err != nil { return nil, results.WithError(err) } @@ -129,7 +128,7 @@ func (d *defaultDriver) scaleStatefulSetDown( return nil, results.WithError(err) } // Expect the updated statefulset in the cache for next reconciliation. - d.expectations.ExpectGeneration(ssetToScaleDown.ObjectMeta) + d.Expectations.ExpectGeneration(ssetToScaleDown.ObjectMeta) } return leavingNodes, results diff --git a/operators/pkg/controller/elasticsearch/driver/driver.go b/operators/pkg/controller/elasticsearch/driver/driver.go index 0800dea20f..daf0fb6c8f 100644 --- a/operators/pkg/controller/elasticsearch/driver/driver.go +++ b/operators/pkg/controller/elasticsearch/driver/driver.go @@ -5,113 +5,247 @@ package driver import ( + "crypto/x509" "fmt" "time" + corev1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/runtime" + "k8s.io/client-go/tools/record" + controller "sigs.k8s.io/controller-runtime/pkg/reconcile" + logf "sigs.k8s.io/controller-runtime/pkg/runtime/log" + "github.com/elastic/cloud-on-k8s/operators/pkg/apis/elasticsearch/v1alpha1" + "github.com/elastic/cloud-on-k8s/operators/pkg/controller/common" + "github.com/elastic/cloud-on-k8s/operators/pkg/controller/common/events" + "github.com/elastic/cloud-on-k8s/operators/pkg/controller/common/keystore" "github.com/elastic/cloud-on-k8s/operators/pkg/controller/common/operator" "github.com/elastic/cloud-on-k8s/operators/pkg/controller/common/reconciler" "github.com/elastic/cloud-on-k8s/operators/pkg/controller/common/version" "github.com/elastic/cloud-on-k8s/operators/pkg/controller/common/watches" + "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/certificates" + "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/cleanup" + esclient "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/client" + "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/configmap" + "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/initcontainer" + "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/license" + "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/network" "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/observer" - esreconcile "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/reconcile" + "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/pdb" + "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/reconcile" + "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/services" + "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/settings" "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/user" esversion "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/version" "github.com/elastic/cloud-on-k8s/operators/pkg/utils/k8s" - "k8s.io/apimachinery/pkg/runtime" - "k8s.io/client-go/tools/record" - "sigs.k8s.io/controller-runtime/pkg/reconcile" - logf "sigs.k8s.io/controller-runtime/pkg/runtime/log" ) var ( - log = logf.Log.WithName("driver") - - defaultRequeue = reconcile.Result{Requeue: true, RequeueAfter: 10 * time.Second} + log = logf.Log.WithName("driver") + defaultRequeue = controller.Result{Requeue: true, RequeueAfter: 10 * time.Second} ) -// Driver is something that can reconcile an Elasticsearch resource +// Driver orchestrates the reconciliation of an Elasticsearch resource. +// Its lifecycle is bound to a single reconciliation attempt. type Driver interface { - Reconcile( - es v1alpha1.Elasticsearch, - reconcileState *esreconcile.State, - ) *reconciler.Results + Reconcile() *reconciler.Results +} + +// NewDefaultDriver returns the default driver implementation. +func NewDefaultDriver(parameters DefaultDriverParameters) Driver { + return &defaultDriver{DefaultDriverParameters: parameters} } -// Options are used to create a driver. See NewDriver -type Options struct { - operator.Parameters - // Version is the version of Elasticsearch we want to reconcile towards +// DefaultDriverParameters contain parameters for this driver. +// Most of them are persisted across driver creations. +type DefaultDriverParameters struct { + // OperatorParameters contain global parameters about the operator. + OperatorParameters operator.Parameters + + // ES is the Elasticsearch resource to reconcile + ES v1alpha1.Elasticsearch + // SupportedVersions verifies whether we can support upgrading from the current pods. + SupportedVersions esversion.LowestHighestSupportedVersions + + // Version is the version of Elasticsearch we want to reconcile towards. Version version.Version - // Client is used to access the Kubernetes API + // Client is used to access the Kubernetes API. Client k8s.Client Scheme *runtime.Scheme Recorder record.EventRecorder - // Observers that observe es clusters state + // State holds the accumulated state during the reconcile loop + ReconcileState *reconcile.State + // Observers that observe es clusters state. Observers *observer.Manager // DynamicWatches are handles to currently registered dynamic watches. DynamicWatches watches.DynamicWatches // Expectations control some expectations set on resources in the cache, in order to // avoid doing certain operations if the cache hasn't seen an up-to-date resource yet. - Expectations *Expectations + Expectations *reconciler.Expectations } -// NewDriver returns a Driver that can operate the provided version -func NewDriver(opts Options) (Driver, error) { - supported := SupportedVersions(opts.Version) - if supported == nil { - return nil, fmt.Errorf("unsupported version: %s", opts.Version) - } - driver := &defaultDriver{ - Options: opts, - - expectations: NewGenerationExpectations(), - observedStateResolver: opts.Observers.ObservedStateResolver, - resourcesStateResolver: esreconcile.NewResourcesStateFromAPI, - usersReconciler: user.ReconcileUsers, - supportedVersions: *supported, - } - - switch opts.Version.Major { - case 7: - //driver.expectedPodsAndResourcesResolver = version6.ExpectedPodSpecs - // - //driver.clusterInitialMasterNodesEnforcer = version7.ClusterInitialMasterNodesEnforcer - - // version 7 uses zen2 instead of zen - //driver.zen2SettingsUpdater = version7.UpdateZen2Settings - // .. except we still have to manage minimum_master_nodes while doing a rolling upgrade from 6 -> 7 - // we approximate this by also handling zen 1, even in 7 - // TODO: only do this if there's 6.x masters in the cluster. - //driver.zen1SettingsUpdater = version6.UpdateZen1Discovery - case 6: - //driver.expectedPodsAndResourcesResolver = version6.ExpectedPodSpecs - //driver.zen1SettingsUpdater = version6.UpdateZen1Discovery - default: - return nil, fmt.Errorf("unsupported version: %s", opts.Version) - } - - return driver, nil +// defaultDriver is the default Driver implementation +type defaultDriver struct { + DefaultDriverParameters } -func SupportedVersions(v version.Version) *esversion.LowestHighestSupportedVersions { - var res *esversion.LowestHighestSupportedVersions - switch v.Major { - case 6: - res = &esversion.LowestHighestSupportedVersions{ - // Min. version is 6.7.0 for now. Will be 6.8.0 soon. - LowestSupportedVersion: version.MustParse("6.7.0"), - // higher may be possible, but not proven yet, lower may also be a requirement... - HighestSupportedVersion: version.MustParse("6.99.99"), - } - case 7: - res = &esversion.LowestHighestSupportedVersions{ - // 6.7.0 is the lowest wire compatibility version for 7.x - LowestSupportedVersion: version.MustParse("6.7.0"), - // higher may be possible, but not proven yet, lower may also be a requirement... - HighestSupportedVersion: version.MustParse("7.99.99"), +// Reconcile fulfills the Driver interface and reconciles the cluster resources. +func (d *defaultDriver) Reconcile() *reconciler.Results { + results := &reconciler.Results{} + + // garbage collect secrets attached to this cluster that we don't need anymore + if err := cleanup.DeleteOrphanedSecrets(d.Client, d.ES); err != nil { + return results.WithError(err) + } + + if err := configmap.ReconcileScriptsConfigMap(d.Client, d.Scheme, d.ES); err != nil { + return results.WithError(err) + } + + externalService, err := common.ReconcileService(d.Client, d.Scheme, services.NewExternalService(d.ES), &d.ES) + if err != nil { + return results.WithError(err) + } + + certificateResources, res := certificates.Reconcile( + d.Client, + d.Scheme, + d.DynamicWatches, + d.ES, + []corev1.Service{*externalService}, + d.OperatorParameters.CACertRotation, + d.OperatorParameters.CertRotation, + ) + if results.WithResults(res).HasError() { + return results + } + + internalUsers, err := user.ReconcileUsers(d.Client, d.Scheme, d.ES) + if err != nil { + return results.WithError(err) + } + + resourcesState, err := reconcile.NewResourcesStateFromAPI(d.Client, d.ES) + if err != nil { + return results.WithError(err) + } + min, err := esversion.MinVersion(resourcesState.CurrentPods) + if err != nil { + return results.WithError(err) + } + if min == nil { + min = &d.Version + } + + warnUnsupportedDistro(resourcesState.AllPods, d.ReconcileState.Recorder) + + observedState := d.Observers.ObservedStateResolver( + k8s.ExtractNamespacedName(&d.ES), + d.newElasticsearchClient( + *externalService, + internalUsers.ControllerUser, + *min, + certificateResources.TrustedHTTPCertificates, + )) + + // always update the elasticsearch state bits + if observedState.ClusterState != nil && observedState.ClusterHealth != nil { + d.ReconcileState.UpdateElasticsearchState(*resourcesState, observedState) + } + + if err := pdb.Reconcile(d.Client, d.Scheme, d.ES); err != nil { + return results.WithError(err) + } + + if err := d.SupportedVersions.VerifySupportsExistingPods(resourcesState.CurrentPods); err != nil { + return results.WithError(err) + } + + // TODO: support user-supplied certificate (non-ca) + esClient := d.newElasticsearchClient( + *externalService, + internalUsers.ControllerUser, + *min, + certificateResources.TrustedHTTPCertificates, + ) + defer esClient.Close() + + esReachable, err := services.IsServiceReady(d.Client, *externalService) + if err != nil { + return results.WithError(err) + } + + results.Apply( + "reconcile-cluster-license", + func() (controller.Result, error) { + err := license.Reconcile( + d.Client, + d.ES, + esClient, + observedState.ClusterLicense, + ) + if err != nil && esReachable { + d.ReconcileState.AddEvent( + corev1.EventTypeWarning, + events.EventReasonUnexpected, + fmt.Sprintf("Could not update cluster license: %s", err.Error()), + ) + return defaultRequeue, err + } + return controller.Result{}, err + }, + ) + + // Compute seed hosts based on current masters with a podIP + if err := settings.UpdateSeedHostsConfigMap(d.Client, d.Scheme, d.ES, resourcesState.AllPods); err != nil { + return results.WithError(err) + } + + // setup a keystore with secure settings in an init container, if specified by the user + keystoreResources, err := keystore.NewResources( + d.Client, + d.Recorder, + d.DynamicWatches, + &d.ES, + initcontainer.KeystoreParams, + ) + if err != nil { + return results.WithError(err) + } + + res = d.reconcileNodeSpecs(esReachable, esClient, d.ReconcileState, observedState, *resourcesState, keystoreResources) + if results.WithResults(res).HasError() { + return results + } + + d.ReconcileState.UpdateElasticsearchState(*resourcesState, observedState) + + return results +} + +// newElasticsearchClient creates a new Elasticsearch HTTP client for this cluster using the provided user +func (d *defaultDriver) newElasticsearchClient( + service corev1.Service, + user user.User, + v version.Version, + caCerts []*x509.Certificate, +) esclient.Client { + url := fmt.Sprintf("https://%s.%s.svc:%d", service.Name, service.Namespace, network.HTTPPort) + return esclient.NewElasticsearchClient(d.OperatorParameters.Dialer, url, user.Auth(), v, caCerts) +} + +// warnUnsupportedDistro sends an event of type warning if the Elasticsearch Docker image is not a supported +// distribution by looking at if the prepare fs init container terminated with the UnsupportedDistro exit code. +func warnUnsupportedDistro(pods []corev1.Pod, recorder *events.Recorder) { + for _, p := range pods { + for _, s := range p.Status.InitContainerStatuses { + state := s.LastTerminationState.Terminated + if s.Name == initcontainer.PrepareFilesystemContainerName && + state != nil && state.ExitCode == initcontainer.UnsupportedDistroExitCode { + recorder.AddEvent(corev1.EventTypeWarning, events.EventReasonUnexpected, + "Unsupported distribution") + } } } - return res } diff --git a/operators/pkg/controller/elasticsearch/driver/driver_test.go b/operators/pkg/controller/elasticsearch/driver/driver_test.go deleted file mode 100644 index 085d2d8caf..0000000000 --- a/operators/pkg/controller/elasticsearch/driver/driver_test.go +++ /dev/null @@ -1,66 +0,0 @@ -// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one -// or more contributor license agreements. Licensed under the Elastic License; -// you may not use this file except in compliance with the Elastic License. - -package driver - -import ( - "testing" - - "github.com/elastic/cloud-on-k8s/operators/pkg/controller/common/version" - "github.com/stretchr/testify/require" -) - -func TestSupportedVersions(t *testing.T) { - type args struct { - v version.Version - } - tests := []struct { - name string - args args - supported []version.Version - unsupported []version.Version - }{ - { - name: "6.x", - args: args{ - v: version.MustParse("6.8.0"), - }, - supported: []version.Version{ - version.MustParse("6.7.0"), - version.MustParse("6.8.0"), - version.MustParse("6.99.99"), - }, - unsupported: []version.Version{ - version.MustParse("6.5.0"), - version.MustParse("7.0.0"), - }, - }, - { - name: "7.x", - args: args{ - v: version.MustParse("7.1.0"), - }, - supported: []version.Version{ - version.MustParse("6.7.0"), //wire compat - version.MustParse("7.2.0"), - version.MustParse("7.99.99"), - }, - unsupported: []version.Version{ - version.MustParse("6.6.0"), - version.MustParse("8.0.0"), - }, - }, - } - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - vs := SupportedVersions(tt.args.v) - for _, v := range tt.supported { - require.NoError(t, vs.Supports(v)) - } - for _, v := range tt.unsupported { - require.Error(t, vs.Supports(v)) - } - }) - } -} diff --git a/operators/pkg/controller/elasticsearch/driver/generic_resources.go b/operators/pkg/controller/elasticsearch/driver/generic_resources.go deleted file mode 100644 index 4959ab8f9a..0000000000 --- a/operators/pkg/controller/elasticsearch/driver/generic_resources.go +++ /dev/null @@ -1,42 +0,0 @@ -// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one -// or more contributor license agreements. Licensed under the Elastic License; -// you may not use this file except in compliance with the Elastic License. - -package driver - -import ( - "github.com/elastic/cloud-on-k8s/operators/pkg/apis/elasticsearch/v1alpha1" - "github.com/elastic/cloud-on-k8s/operators/pkg/controller/common" - "github.com/elastic/cloud-on-k8s/operators/pkg/controller/common/reconciler" - "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/services" - "github.com/elastic/cloud-on-k8s/operators/pkg/utils/k8s" - corev1 "k8s.io/api/core/v1" - "k8s.io/apimachinery/pkg/runtime" -) - -// GenericResources are resources that all clusters have. -type GenericResources struct { - // ExternalService is the user-facing service - ExternalService corev1.Service -} - -// reconcileGenericResources reconciles the expected generic resources of a cluster. -func reconcileGenericResources( - c k8s.Client, - scheme *runtime.Scheme, - es v1alpha1.Elasticsearch, -) (*GenericResources, *reconciler.Results) { - // TODO: these reconciles do not necessarily use the services as in-out params. - // TODO: consider removing the "res" bits of the ReconcileService signature? - results := &reconciler.Results{} - - externalService := services.NewExternalService(es) - _, err := common.ReconcileService(c, scheme, externalService, &es) - if err != nil { - return nil, results.WithError(err) - } - - return &GenericResources{ - ExternalService: *externalService, - }, results -} diff --git a/operators/pkg/controller/elasticsearch/driver/nodes.go b/operators/pkg/controller/elasticsearch/driver/nodes.go new file mode 100644 index 0000000000..437b67ab03 --- /dev/null +++ b/operators/pkg/controller/elasticsearch/driver/nodes.go @@ -0,0 +1,134 @@ +// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +// or more contributor license agreements. Licensed under the Elastic License; +// you may not use this file except in compliance with the Elastic License. + +package driver + +import ( + "github.com/elastic/cloud-on-k8s/operators/pkg/controller/common" + "github.com/elastic/cloud-on-k8s/operators/pkg/controller/common/keystore" + "github.com/elastic/cloud-on-k8s/operators/pkg/controller/common/reconciler" + esclient "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/client" + "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/nodespec" + "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/observer" + "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/reconcile" + "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/settings" + "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/sset" + "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/version/zen1" + "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/version/zen2" + "github.com/elastic/cloud-on-k8s/operators/pkg/utils/k8s" +) + +func (d *defaultDriver) reconcileNodeSpecs( + esReachable bool, + esClient esclient.Client, + reconcileState *reconcile.State, + observedState observer.State, + resourcesState reconcile.ResourcesState, + keystoreResources *keystore.Resources, +) *reconciler.Results { + results := &reconciler.Results{} + + actualStatefulSets, err := sset.RetrieveActualStatefulSets(d.Client, k8s.ExtractNamespacedName(&d.ES)) + if err != nil { + return results.WithError(err) + } + + if !d.Expectations.GenerationExpected(actualStatefulSets.ObjectMetas()...) { + // Our cache of StatefulSets is out of date compared to previous reconciliation operations. + // This will probably lead to conflicting sset updates (which is ok), but also to + // conflicting ES calls (set/reset zen1/zen2/allocation excludes, etc.), which may not be ok. + log.V(1).Info("StatefulSet cache out-of-date, re-queueing", "namespace", d.ES.Namespace, "es_name", d.ES.Name) + return results.WithResult(defaultRequeue) + } + + nodeSpecResources, err := nodespec.BuildExpectedResources(d.ES, keystoreResources) + if err != nil { + return results.WithError(err) + } + + // TODO: there is a split brain possibility here if going from 1 to 3 masters or 3 to 7. + // See https://github.com/elastic/cloud-on-k8s/issues/1281. + + // patch configs to consider zen1 minimum master nodes + if err := zen1.SetupMinimumMasterNodesConfig(nodeSpecResources); err != nil { + return results.WithError(err) + } + // patch configs to consider zen2 initial master nodes + if err := zen2.SetupInitialMasterNodes(d.ES, observedState, d.Client, nodeSpecResources); err != nil { + return results.WithError(err) + } + + // Phase 1: apply expected StatefulSets resources, but don't scale down. + // The goal is to: + // 1. scale sset up (eg. go from 3 to 5 replicas). + // 2. apply configuration changes on the sset resource, to be used for future pods creation/recreation, + // but do not rotate pods yet. + // 3. do **not** apply replicas scale down, otherwise nodes would be deleted before + // we handle a clean deletion. + for _, nodeSpecRes := range nodeSpecResources { + // always reconcile config (will apply to new & recreated pods) + if err := settings.ReconcileConfig(d.Client, d.ES, nodeSpecRes.StatefulSet.Name, nodeSpecRes.Config); err != nil { + return results.WithError(err) + } + if _, err := common.ReconcileService(d.Client, d.Scheme, &nodeSpecRes.HeadlessService, &d.ES); err != nil { + return results.WithError(err) + } + ssetToApply := *nodeSpecRes.StatefulSet.DeepCopy() + actual, exists := actualStatefulSets.GetByName(ssetToApply.Name) + if exists && sset.Replicas(ssetToApply) < sset.Replicas(actual) { + // sset needs to be scaled down + // update the sset to use the new spec but don't scale replicas down for now + ssetToApply.Spec.Replicas = actual.Spec.Replicas + } + if err := sset.ReconcileStatefulSet(d.Client, d.Scheme, d.ES, ssetToApply); err != nil { + return results.WithError(err) + } + } + + if !esReachable { + // Cannot perform next operations if we cannot request Elasticsearch. + log.Info("ES external service not ready yet for further reconciliation, re-queuing.", "namespace", d.ES.Namespace, "es_name", d.ES.Name) + reconcileState.UpdateElasticsearchPending(resourcesState.CurrentPods) + return results.WithResult(defaultRequeue) + } + + // Update Zen1 minimum master nodes through the API, corresponding to the current nodes we have. + requeue, err := zen1.UpdateMinimumMasterNodes(d.Client, d.ES, esClient, actualStatefulSets, reconcileState) + if err != nil { + return results.WithError(err) + } + if requeue { + results.WithResult(defaultRequeue) + } + // Maybe clear zen2 voting config exclusions. + requeue, err = zen2.ClearVotingConfigExclusions(d.ES, d.Client, esClient, actualStatefulSets) + if err != nil { + return results.WithError(err) + } + if requeue { + results.WithResult(defaultRequeue) + } + + // Phase 2: handle sset scale down. + // We want to safely remove nodes from the cluster, either because the sset requires less replicas, + // or because it should be removed entirely. + downscaleRes := d.HandleDownscale(nodeSpecResources.StatefulSets(), actualStatefulSets, esClient, resourcesState, observedState, reconcileState) + results.WithResults(downscaleRes) + if downscaleRes.HasError() { + return results + } + + // Phase 3: handle rolling upgrades. + // Control nodes restart (upgrade) by manually decrementing rollingUpdate.Partition. + rollingUpgradesRes := d.handleRollingUpgrades(esClient, actualStatefulSets) + results.WithResults(rollingUpgradesRes) + if rollingUpgradesRes.HasError() { + return results + } + + // TODO: + // - change budget + // - grow and shrink + return results +} diff --git a/operators/pkg/controller/elasticsearch/driver/pods.go b/operators/pkg/controller/elasticsearch/driver/pods.go deleted file mode 100644 index c129e71d1b..0000000000 --- a/operators/pkg/controller/elasticsearch/driver/pods.go +++ /dev/null @@ -1,199 +0,0 @@ -// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one -// or more contributor license agreements. Licensed under the Elastic License; -// you may not use this file except in compliance with the Elastic License. - -package driver - -// -//// createElasticsearchPod creates the given elasticsearch pod -//func createElasticsearchPod( -// c k8s.Client, -// scheme *runtime.Scheme, -// es v1alpha1.Elasticsearch, -// reconcileState *esreconcile.State, -// pod corev1.Pod, -// podSpecCtx pod.PodSpecContext, -// orphanedPVCs *pvcutils.OrphanedPersistentVolumeClaims, -//) error { -// // when can we re-use a metav1.PersistentVolumeClaim? -// // - It is the same size, storageclass etc, or resizable as such -// // (https://kubernetes.io/docs/concepts/storage/persistent-volumes/#expanding-persistent-volumes-claims) -// // - If a local volume: when we know it's going to the same node -// // - How can we tell? -// // - Only guaranteed if a required node affinity specifies a specific, singular node. -// // - Usually they are more generic, yielding a range of possible target nodes -// // - If an EBS and non-regional PDs (GCP) volume: when we know it's going to the same AZ: -// // - How can we tell? -// // - Only guaranteed if a required node affinity specifies a specific availability zone -// // - Often -// // - This is /hard/ -// // - Other persistent -// // -// // - Limitations -// // - Node-specific volume limits: https://kubernetes.io/docs/concepts/storage/storage-limits/ -// // -// // How to technically re-use a volume: -// // - Re-use the same name for the PVC. -// // - E.g, List PVCs, if a PVC we want to use exist -// -// for _, claimTemplate := range podSpecCtx.NodeSpec.VolumeClaimTemplates { -// // TODO : we are creating PVC way too far in the process, it's almost too late to compare them with existing ones -// pvc, err := getOrCreatePVC(&pod, claimTemplate, orphanedPVCs, c, scheme, es) -// if err != nil { -// return err -// } -// -// vol := corev1.Volume{ -// Name: claimTemplate.Name, -// VolumeSource: corev1.VolumeSource{ -// PersistentVolumeClaim: &corev1.PersistentVolumeClaimVolumeSource{ -// ClaimName: pvc.Name, -// // TODO: support read only pvcs -// }, -// }, -// } -// pod = replaceVolume(pod, vol) -// } -// -// // create the config volume for this pod, now that we have a proper name for the pod -// if err := settings.ReconcileConfig(c, es, pod, podSpecCtx.Config); err != nil { -// return err -// } -// configSecretVolume := settings.ConfigSecretVolume(pod.Name).Volume() -// pod = replaceVolume(pod, configSecretVolume) -// -// if err := controllerutil.SetControllerReference(&es, &pod, scheme); err != nil { -// return err -// } -// if err := c.Create(&pod); err != nil { -// reconcileState.AddEvent(corev1.EventTypeWarning, events.EventReasonUnexpected, fmt.Sprintf("Cannot create pod %s: %s", pod.Name, err.Error())) -// return err -// } -// reconcileState.AddEvent(corev1.EventTypeNormal, events.EventReasonCreated, stringsutil.Concat("Created pod ", pod.Name)) -// log.Info("Created pod", "name", pod.Name, "namespace", pod.Namespace) -// -// return nil -//} -// -//// replaceVolume replaces an existing volume in the pod that has the same name as the given one. -//func replaceVolume(pod corev1.Pod, volume corev1.Volume) corev1.Pod { -// for i, v := range pod.Spec.Volumes { -// if v.Name == volume.Name { -// pod.Spec.Volumes[i] = volume -// break -// } -// } -// return pod -//} -// -//// getOrCreatePVC tries to attach a PVC that already exists or attaches a new one otherwise. -//func getOrCreatePVC(pod *corev1.Pod, -// claimTemplate corev1.PersistentVolumeClaim, -// orphanedPVCs *pvcutils.OrphanedPersistentVolumeClaims, -// c k8s.Client, -// scheme *runtime.Scheme, -// es v1alpha1.Elasticsearch, -//) (*corev1.PersistentVolumeClaim, error) { -// // Generate the desired PVC from the template -// pvc := newPVCFromTemplate(claimTemplate, pod) -// // Seek for an orphaned PVC that matches the desired one -// orphanedPVC := orphanedPVCs.GetOrphanedVolumeClaim(pvc) -// -// if orphanedPVC != nil { -// // ReUSE the orphaned PVC -// pvc = orphanedPVC -// // Update the name of the pod to reflect the change -// podName, err := pvcutils.GetPodNameFromLabels(pvc) -// if err != nil { -// return nil, err -// } -// -// // update the hostname if we defaulted it earlier -// if pod.Spec.Hostname == pod.Name { -// pod.Spec.Hostname = podName -// } -// -// pod.Name = podName -// log.Info("Reusing PVC", "pod", pod.Name, "pvc", pvc.Name) -// return pvc, nil -// } -// -// // No match, create a new PVC -// log.Info("Creating PVC", "pod", pod.Name, "pvc", pvc.Name) -// if err := controllerutil.SetControllerReference(&es, pvc, scheme); err != nil { -// return nil, err -// } -// err := c.Create(pvc) -// if err != nil && !apierrors.IsAlreadyExists(err) { -// return nil, err -// } -// return pvc, nil -//} -// -//func newPVCFromTemplate(claimTemplate corev1.PersistentVolumeClaim, pod *corev1.Pod) *corev1.PersistentVolumeClaim { -// pvc := claimTemplate.DeepCopy() -// pvc.Name = name.NewPVCName(pod.Name, claimTemplate.Name) -// pvc.Namespace = pod.Namespace -// // reuse some labels also applied to the pod for comparison purposes -// if pvc.Labels == nil { -// pvc.Labels = map[string]string{} -// } -// for _, k := range pvcpkg.PodLabelsInPVCs { -// pvc.Labels[k] = pod.Labels[k] -// } -// // Add the current pod name as a label -// pvc.Labels[label.PodNameLabelName] = pod.Name -// pvc.Labels[label.VolumeNameLabelName] = claimTemplate.Name -// return pvc -//} -// -//// deleteElasticsearchPod deletes the given elasticsearch pod. Tests to check if the pod can be safely deleted must -//// be done before the call to this function. -//func deleteElasticsearchPod( -// c k8s.Client, -// reconcileState *esreconcile.State, -// resourcesState esreconcile.ResourcesState, -// pod corev1.Pod, -// preDelete func() error, -//) (reconcile.Result, error) { -// -// // delete all PVCs associated with this pod -// // TODO: perhaps this is better to reconcile after the fact? -// for _, volume := range pod.Spec.Volumes { -// if volume.PersistentVolumeClaim == nil { -// continue -// } -// -// // TODO: perhaps not assuming all PVCs will be managed by us? and maybe we should not categorically delete? -// pvc, err := resourcesState.FindPVCByName(volume.PersistentVolumeClaim.ClaimName) -// if err != nil { -// return reconcile.Result{}, err -// } -// -// if err := c.Delete(&pvc); err != nil && !apierrors.IsNotFound(err) { -// return reconcile.Result{}, err -// } -// } -// -// if err := preDelete(); err != nil { -// return reconcile.Result{}, err -// } -// if err := c.Delete(&pod); err != nil && !apierrors.IsNotFound(err) { -// return reconcile.Result{}, err -// } -// reconcileState.AddEvent( -// corev1.EventTypeNormal, events.EventReasonDeleted, stringsutil.Concat("Deleted pod ", pod.Name), -// ) -// log.Info("Deleted pod", "name", pod.Name, "namespace", pod.Namespace) -// -// // delete configuration for that pod (would be garbage collected otherwise) -// secret, err := settings.GetESConfigSecret(c, k8s.ExtractNamespacedName(&pod)) -// if err != nil && !apierrors.IsNotFound(err) { -// return reconcile.Result{}, err -// } -// if err = c.Delete(&secret); err != nil && !apierrors.IsNotFound(err) { -// return reconcile.Result{}, err -// } -// -// return reconcile.Result{}, nil -//} diff --git a/operators/pkg/controller/elasticsearch/driver/upgrade.go b/operators/pkg/controller/elasticsearch/driver/upgrade.go index f66fecae55..67201e6158 100644 --- a/operators/pkg/controller/elasticsearch/driver/upgrade.go +++ b/operators/pkg/controller/elasticsearch/driver/upgrade.go @@ -21,7 +21,6 @@ import ( ) func (d *defaultDriver) handleRollingUpgrades( - es v1alpha1.Elasticsearch, esClient esclient.Client, statefulSets sset.StatefulSetList, ) *reconciler.Results { @@ -31,18 +30,17 @@ func (d *defaultDriver) handleRollingUpgrades( esState := NewLazyESState(esClient) // Maybe upgrade some of the nodes. - res := d.doRollingUpgrade(es, statefulSets, esClient, esState) + res := d.doRollingUpgrade(statefulSets, esClient, esState) results.WithResults(res) // Maybe re-enable shards allocation if upgraded nodes are back into the cluster. - res = d.MaybeEnableShardsAllocation(es, esClient, esState, statefulSets) + res = d.MaybeEnableShardsAllocation(esClient, esState, statefulSets) results.WithResults(res) return results } func (d *defaultDriver) doRollingUpgrade( - es v1alpha1.Elasticsearch, statefulSets sset.StatefulSetList, esClient esclient.Client, esState ESState, @@ -103,7 +101,7 @@ func (d *defaultDriver) doRollingUpgrade( } // Is the cluster ready for the node upgrade? - clusterReady, err := clusterReadyForNodeRestart(es, esState) + clusterReady, err := clusterReadyForNodeRestart(d.ES, esState) if err != nil { return results.WithError(err) } @@ -112,7 +110,7 @@ func (d *defaultDriver) doRollingUpgrade( return results.WithResult(defaultRequeue) } - log.Info("Preparing cluster for node restart", "namespace", es.Namespace, "es_name", es.Name) + log.Info("Preparing cluster for node restart", "namespace", d.ES.Namespace, "es_name", d.ES.Name) if err := prepareClusterForNodeRestart(esClient, esState); err != nil { return results.WithError(err) } @@ -251,7 +249,6 @@ func doSyncFlush(esClient esclient.Client) error { } func (d *defaultDriver) MaybeEnableShardsAllocation( - es v1alpha1.Elasticsearch, esClient esclient.Client, esState ESState, statefulSets sset.StatefulSetList, @@ -273,8 +270,8 @@ func (d *defaultDriver) MaybeEnableShardsAllocation( if !scheduledUpgradesDone { log.V(1).Info( "Rolling upgrade not over yet, some pods don't have the updated revision, keeping shard allocations disabled", - "namespace", es.Namespace, - "es_name", es.Name, + "namespace", d.ES.Namespace, + "es_name", d.ES.Name, ) return results.WithResult(defaultRequeue) } @@ -287,13 +284,13 @@ func (d *defaultDriver) MaybeEnableShardsAllocation( if !nodesInCluster { log.V(1).Info( "Some upgraded nodes are not back in the cluster yet, keeping shard allocations disabled", - "namespace", es.Namespace, - "es_name", es.Name, + "namespace", d.ES.Namespace, + "es_name", d.ES.Name, ) return results.WithResult(defaultRequeue) } - log.Info("Enabling shards allocation", "namespace", es.Namespace, "es_name", es.Name) + log.Info("Enabling shards allocation", "namespace", d.ES.Namespace, "es_name", d.ES.Name) ctx, cancel := context.WithTimeout(context.Background(), esclient.DefaultReqTimeout) defer cancel() if err := esClient.EnableShardAllocation(ctx); err != nil { diff --git a/operators/pkg/controller/elasticsearch/elasticsearch_controller.go b/operators/pkg/controller/elasticsearch/elasticsearch_controller.go index 6ccd561da2..da9787511c 100644 --- a/operators/pkg/controller/elasticsearch/elasticsearch_controller.go +++ b/operators/pkg/controller/elasticsearch/elasticsearch_controller.go @@ -5,6 +5,7 @@ package elasticsearch import ( + "fmt" "sync/atomic" "time" @@ -22,6 +23,8 @@ import ( logf "sigs.k8s.io/controller-runtime/pkg/runtime/log" "sigs.k8s.io/controller-runtime/pkg/source" + esversion "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/version" + elasticsearchv1alpha1 "github.com/elastic/cloud-on-k8s/operators/pkg/apis/elasticsearch/v1alpha1" "github.com/elastic/cloud-on-k8s/operators/pkg/controller/common" "github.com/elastic/cloud-on-k8s/operators/pkg/controller/common/annotation" @@ -68,7 +71,7 @@ func newReconciler(mgr manager.Manager, params operator.Parameters) *ReconcileEl finalizers: finalizer.NewHandler(client), dynamicWatches: watches.NewDynamicWatches(), - expectations: driver.NewGenerationExpectations(), + expectations: reconciler.NewExpectations(), Parameters: params, } @@ -167,7 +170,7 @@ type ReconcileElasticsearch struct { // expectations help dealing with inconsistencies in our client cache, // by marking resources updates as expected, and skipping some operations if the cache is not up-to-date. - expectations *driver.Expectations + expectations *reconciler.Expectations // iteration is the number of times this controller has run its Reconcile method iteration int64 @@ -243,11 +246,6 @@ func (r *ReconcileElasticsearch) internalReconcile( return results } - ver, err := commonversion.Parse(es.Spec.Version) - if err != nil { - return results.WithError(err) - } - violations, err := validation.Validate(es) if err != nil { return results.WithError(err) @@ -257,23 +255,28 @@ func (r *ReconcileElasticsearch) internalReconcile( return results } - driver, err := driver.NewDriver(driver.Options{ - Client: r.Client, - Scheme: r.scheme, - Recorder: r.recorder, - - Version: *ver, - - Expectations: r.expectations, - Observers: r.esObservers, - DynamicWatches: r.dynamicWatches, - Parameters: r.Parameters, - }) + ver, err := commonversion.Parse(es.Spec.Version) if err != nil { return results.WithError(err) } + supported := esversion.SupportedVersions(*ver) + if supported == nil { + return results.WithError(fmt.Errorf("unsupported version: %s", ver)) + } - return driver.Reconcile(es, reconcileState) + return driver.NewDefaultDriver(driver.DefaultDriverParameters{ + OperatorParameters: r.Parameters, + ES: es, + ReconcileState: reconcileState, + Client: r.Client, + Scheme: r.scheme, + Recorder: r.recorder, + Version: *ver, + Expectations: r.expectations, + Observers: r.esObservers, + DynamicWatches: r.dynamicWatches, + SupportedVersions: *supported, + }).Reconcile() } func (r *ReconcileElasticsearch) updateStatus( diff --git a/operators/pkg/controller/elasticsearch/env/vars.go b/operators/pkg/controller/elasticsearch/env/vars.go deleted file mode 100644 index 37cc03e223..0000000000 --- a/operators/pkg/controller/elasticsearch/env/vars.go +++ /dev/null @@ -1,21 +0,0 @@ -// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one -// or more contributor license agreements. Licensed under the Elastic License; -// you may not use this file except in compliance with the Elastic License. - -package env - -import ( - "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/settings" - corev1 "k8s.io/api/core/v1" -) - -// DynamicPodEnvVars are environment variables to dynamically inject pod name and IP, -// to be referenced in Elasticsearch configuration file -var DynamicPodEnvVars = []corev1.EnvVar{ - {Name: settings.EnvPodName, Value: "", ValueFrom: &corev1.EnvVarSource{ - FieldRef: &corev1.ObjectFieldSelector{APIVersion: "v1", FieldPath: "metadata.name"}, - }}, - {Name: settings.EnvPodIP, Value: "", ValueFrom: &corev1.EnvVarSource{ - FieldRef: &corev1.ObjectFieldSelector{APIVersion: "v1", FieldPath: "status.podIP"}, - }}, -} diff --git a/operators/pkg/controller/elasticsearch/initcontainer/initcontainer.go b/operators/pkg/controller/elasticsearch/initcontainer/initcontainer.go index ed2f26418a..82397c2308 100644 --- a/operators/pkg/controller/elasticsearch/initcontainer/initcontainer.go +++ b/operators/pkg/controller/elasticsearch/initcontainer/initcontainer.go @@ -5,6 +5,7 @@ package initcontainer import ( + "github.com/elastic/cloud-on-k8s/operators/pkg/controller/common/keystore" "github.com/elastic/cloud-on-k8s/operators/pkg/controller/common/volume" corev1 "k8s.io/api/core/v1" ) @@ -25,6 +26,7 @@ func NewInitContainers( setVMMaxMapCount *bool, transportCertificatesVolume volume.SecretVolume, clusterName string, + keystoreResources *keystore.Resources, ) ([]corev1.Container, error) { var containers []corev1.Container // create the privileged init container if not explicitly disabled by the user @@ -39,8 +41,11 @@ func NewInitContainers( if err != nil { return nil, err } - containers = append(containers, prepareFsContainer) + if keystoreResources != nil { + containers = append(containers, keystoreResources.InitContainer) + } + return containers, nil } diff --git a/operators/pkg/controller/elasticsearch/initcontainer/initcontainer_test.go b/operators/pkg/controller/elasticsearch/initcontainer/initcontainer_test.go index 327bc61921..bf1c02f459 100644 --- a/operators/pkg/controller/elasticsearch/initcontainer/initcontainer_test.go +++ b/operators/pkg/controller/elasticsearch/initcontainer/initcontainer_test.go @@ -7,6 +7,7 @@ package initcontainer import ( "testing" + "github.com/elastic/cloud-on-k8s/operators/pkg/controller/common/keystore" "github.com/elastic/cloud-on-k8s/operators/pkg/controller/common/volume" "github.com/stretchr/testify/assert" ) @@ -18,6 +19,7 @@ func TestNewInitContainers(t *testing.T) { elasticsearchImage string operatorImage string SetVMMaxMapCount *bool + keystoreResources *keystore.Resources } tests := []struct { name string @@ -51,6 +53,16 @@ func TestNewInitContainers(t *testing.T) { }, expectedNumberOfContainers: 1, }, + { + name: "with keystore resources", + args: args{ + elasticsearchImage: "es-image", + operatorImage: "op-image", + SetVMMaxMapCount: nil, + keystoreResources: &keystore.Resources{}, + }, + expectedNumberOfContainers: 3, + }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { @@ -59,6 +71,7 @@ func TestNewInitContainers(t *testing.T) { tt.args.SetVMMaxMapCount, volume.SecretVolume{}, "clusterName", + tt.args.keystoreResources, ) assert.NoError(t, err) assert.Equal(t, tt.expectedNumberOfContainers, len(containers)) diff --git a/operators/pkg/controller/elasticsearch/initcontainer/keystore.go b/operators/pkg/controller/elasticsearch/initcontainer/keystore.go new file mode 100644 index 0000000000..ba4f2dddda --- /dev/null +++ b/operators/pkg/controller/elasticsearch/initcontainer/keystore.go @@ -0,0 +1,18 @@ +// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +// or more contributor license agreements. Licensed under the Elastic License; +// you may not use this file except in compliance with the Elastic License. + +package initcontainer + +import ( + "github.com/elastic/cloud-on-k8s/operators/pkg/controller/common/keystore" + esvolume "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/volume" +) + +// KeystoreParams is used to generate the init container that will load the secure settings into a keystore. +var KeystoreParams = keystore.InitContainerParameters{ + KeystoreCreateCommand: "/usr/share/elasticsearch/bin/elasticsearch-keystore create", + KeystoreAddCommand: "/usr/share/elasticsearch/bin/elasticsearch-keystore add", + SecureSettingsVolumeMountPath: keystore.SecureSettingsVolumeMountPath, + DataVolumePath: esvolume.ElasticsearchDataMountPath, +} diff --git a/operators/pkg/controller/elasticsearch/initcontainer/prepare_fs.go b/operators/pkg/controller/elasticsearch/initcontainer/prepare_fs.go index 3250a57267..c3a77f66ef 100644 --- a/operators/pkg/controller/elasticsearch/initcontainer/prepare_fs.go +++ b/operators/pkg/controller/elasticsearch/initcontainer/prepare_fs.go @@ -7,6 +7,7 @@ package initcontainer import ( "path" + "github.com/elastic/cloud-on-k8s/operators/pkg/controller/common/defaults" "github.com/elastic/cloud-on-k8s/operators/pkg/controller/common/volume" "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/name" "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/settings" @@ -107,11 +108,7 @@ func NewPrepareFSInitContainer( SecurityContext: &corev1.SecurityContext{ Privileged: &privileged, }, - Env: []corev1.EnvVar{ - {Name: settings.EnvPodName, Value: "", ValueFrom: &corev1.EnvVarSource{ - FieldRef: &corev1.ObjectFieldSelector{APIVersion: "v1", FieldPath: "metadata.name"}, - }}, - }, + Env: defaults.PodDownwardEnvVars, Command: []string{"bash", "-c", path.Join(esvolume.ScriptsVolumeMountPath, PrepareFsScriptConfigKey)}, VolumeMounts: append( PluginVolumes.InitContainerVolumeMounts(), diff --git a/operators/pkg/controller/elasticsearch/mutation/calculate.go b/operators/pkg/controller/elasticsearch/mutation/calculate.go deleted file mode 100644 index e6060379aa..0000000000 --- a/operators/pkg/controller/elasticsearch/mutation/calculate.go +++ /dev/null @@ -1,137 +0,0 @@ -// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one -// or more contributor license agreements. Licensed under the Elastic License; -// you may not use this file except in compliance with the Elastic License. - -package mutation - -import ( - "sort" - - "github.com/elastic/cloud-on-k8s/operators/pkg/apis/elasticsearch/v1alpha1" - "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/mutation/comparison" - "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/pod" - "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/reconcile" - corev1 "k8s.io/api/core/v1" -) - -// PodBuilder is a function that is able to create pods from a PodSpecContext, -// mostly used by the various supported versions -type PodBuilder func(ctx pod.PodSpecContext) corev1.Pod - -// PodComparisonResult holds information about pod comparison result -type PodComparisonResult struct { - IsMatch bool - MatchingPod pod.PodWithConfig - MismatchReasonsPerPod map[string][]string - RemainingPods pod.PodsWithConfig -} - -// CalculateChanges returns Changes to perform by comparing actual pods to expected pods spec -func CalculateChanges( - es v1alpha1.Elasticsearch, - expectedPodSpecCtxs []pod.PodSpecContext, - state reconcile.ResourcesState, - podBuilder PodBuilder, -) (Changes, error) { - // work on copies of the arrays, on which we can safely remove elements - expectedCopy := make([]pod.PodSpecContext, len(expectedPodSpecCtxs)) - copy(expectedCopy, expectedPodSpecCtxs) - actualCopy := make(pod.PodsWithConfig, len(state.CurrentPods)) - copy(actualCopy, state.CurrentPods) - deletingCopy := make(pod.PodsWithConfig, len(state.DeletingPods)) - copy(deletingCopy, state.DeletingPods) - - return mutableCalculateChanges(es, expectedCopy, actualCopy, state, podBuilder, deletingCopy) -} - -func mutableCalculateChanges( - es v1alpha1.Elasticsearch, - expectedPodSpecCtxs []pod.PodSpecContext, - actualPods pod.PodsWithConfig, - state reconcile.ResourcesState, - podBuilder PodBuilder, - deletingPods pod.PodsWithConfig, -) (Changes, error) { - changes := EmptyChanges() - - for _, expectedPodSpecCtx := range expectedPodSpecCtxs { - - // look for a matching pod in the current ones - actualComparisonResult, err := getAndRemoveMatchingPod(es, expectedPodSpecCtx, actualPods, state) - if err != nil { - return changes, err - } - if actualComparisonResult.IsMatch { - // matching pod already exists, keep it - changes.ToKeep = append(changes.ToKeep, actualComparisonResult.MatchingPod) - // one less pod to compare with - actualPods = actualComparisonResult.RemainingPods - continue - } - - // look for a matching pod in the ones that are being deleted - deletingComparisonResult, err := getAndRemoveMatchingPod(es, expectedPodSpecCtx, deletingPods, state) - if err != nil { - return changes, err - } - if deletingComparisonResult.IsMatch { - // a matching pod is terminating, wait in order to reuse its resources - changes.ToKeep = append(changes.ToKeep, deletingComparisonResult.MatchingPod) - // one less pod to compare with - deletingPods = deletingComparisonResult.RemainingPods - continue - } - - // no matching pod, a new one should be created - pod := podBuilder(expectedPodSpecCtx) - - changes.ToCreate = append(changes.ToCreate, PodToCreate{ - Pod: pod, - PodSpecCtx: expectedPodSpecCtx, - MismatchReasons: actualComparisonResult.MismatchReasonsPerPod, - }) - } - // remaining actual pods should be deleted - changes.ToDelete = actualPods - - // sort changes for idempotent processing - sort.SliceStable(changes.ToKeep, sortPodByCreationTimestampAsc(changes.ToKeep)) - sort.SliceStable(changes.ToDelete, sortPodByCreationTimestampAsc(changes.ToDelete)) - - return changes, nil -} - -func getAndRemoveMatchingPod( - es v1alpha1.Elasticsearch, - podSpecCtx pod.PodSpecContext, - podsWithConfig pod.PodsWithConfig, - state reconcile.ResourcesState, -) (PodComparisonResult, error) { - mismatchReasonsPerPod := map[string][]string{} - - for i, podWithConfig := range podsWithConfig { - pod := podWithConfig.Pod - - isMatch, mismatchReasons, err := comparison.PodMatchesSpec(es, podWithConfig, podSpecCtx, state) - if err != nil { - return PodComparisonResult{}, err - } - if isMatch { - // matching pod found - // remove it from the remaining pods - return PodComparisonResult{ - IsMatch: true, - MatchingPod: podWithConfig, - MismatchReasonsPerPod: mismatchReasonsPerPod, - RemainingPods: append(podsWithConfig[:i], podsWithConfig[i+1:]...), - }, nil - } - mismatchReasonsPerPod[pod.Name] = mismatchReasons - } - // no matching pod found - return PodComparisonResult{ - IsMatch: false, - MismatchReasonsPerPod: mismatchReasonsPerPod, - RemainingPods: podsWithConfig, - }, nil -} diff --git a/operators/pkg/controller/elasticsearch/mutation/calculate_test.go b/operators/pkg/controller/elasticsearch/mutation/calculate_test.go deleted file mode 100644 index f9091f9c18..0000000000 --- a/operators/pkg/controller/elasticsearch/mutation/calculate_test.go +++ /dev/null @@ -1,146 +0,0 @@ -// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one -// or more contributor license agreements. Licensed under the Elastic License; -// you may not use this file except in compliance with the Elastic License. - -package mutation - -// -//var es = v1alpha1.Elasticsearch{ -// ObjectMeta: metav1.ObjectMeta{ -// Name: "elasticsearch", -// }, -//} - -//func ESPodSpecContext(image string, cpuLimit string) pod.PodSpecContext { -// return pod.PodSpecContext{ -// PodTemplate: corev1.PodTemplateSpec{ -// ObjectMeta: metav1.ObjectMeta{ -// Labels: map[string]string{ -// label.ClusterNameLabelName: es.Name, -// }, -// }, -// Spec: corev1.PodSpec{ -// Containers: []corev1.Container{{ -// Image: image, -// ImagePullPolicy: corev1.PullIfNotPresent, -// Name: v1alpha1.ElasticsearchContainerName, -// Ports: pod.DefaultContainerPorts, -// // TODO: Hardcoded resource limits and requests -// Resources: corev1.ResourceRequirements{ -// Limits: corev1.ResourceList{ -// corev1.ResourceCPU: resource.MustParse(cpuLimit), -// corev1.ResourceMemory: resource.MustParse("2Gi"), -// }, -// Requests: corev1.ResourceList{ -// corev1.ResourceCPU: resource.MustParse("100m"), -// corev1.ResourceMemory: resource.MustParse("2Gi"), -// }, -// }, -// ReadinessProbe: &corev1.Probe{ -// FailureThreshold: 3, -// InitialDelaySeconds: 10, -// PeriodSeconds: 5, -// SuccessThreshold: 1, -// TimeoutSeconds: 5, -// Handler: corev1.Handler{ -// Exec: &corev1.ExecAction{ -// Command: []string{ -// "sh", -// "-c", -// "script here", -// }, -// }, -// }, -// }, -// }}, -// }, -// }, -// } -//} - -// -//func TestCalculateChanges(t *testing.T) { -// type args struct { -// expected []pod.PodSpecContext -// state reconcile.ResourcesState -// } -// tests := []struct { -// name string -// args args -// want Changes -// }{ -// { -// name: "Wait for 2 pods to be terminated, create 1", -// args: args{ -// expected: []pod.PodSpecContext{defaultPodSpecCtx, defaultPodSpecCtx, defaultPodSpecCtx}, -// state: reconcile.ResourcesState{DeletingPods: pod.PodsWithConfig{defaultPodWithConfig, defaultPodWithConfig}}, -// }, -// want: Changes{ -// ToKeep: pod.PodsWithConfig{defaultPodWithConfig, defaultPodWithConfig}, -// ToCreate: []PodToCreate{{PodSpecCtx: defaultPodSpecCtx}}, -// }, -// }, -// { -// name: "Do not wait for 2 pods to be terminated, create 3", -// args: args{ -// expected: []pod.PodSpecContext{defaultPodSpecCtxV2, defaultPodSpecCtxV2, defaultPodSpecCtxV2}, -// state: reconcile.ResourcesState{DeletingPods: pod.PodsWithConfig{defaultPodWithConfig, defaultPodWithConfig}}, -// }, -// want: Changes{ -// ToKeep: pod.PodsWithConfig{}, -// ToDelete: pod.PodsWithConfig{}, -// ToCreate: []PodToCreate{{PodSpecCtx: defaultPodSpecCtxV2}, {PodSpecCtx: defaultPodSpecCtxV2}, {PodSpecCtx: defaultPodSpecCtxV2}}, -// }, -// }, -// { -// name: "no changes", -// args: args{ -// expected: []pod.PodSpecContext{defaultPodSpecCtx, defaultPodSpecCtx}, -// state: reconcile.ResourcesState{CurrentPods: pod.PodsWithConfig{defaultPodWithConfig, defaultPodWithConfig}}, -// }, -// want: Changes{ToKeep: pod.PodsWithConfig{defaultPodWithConfig, defaultPodWithConfig}}, -// }, -// { -// name: "2 new pods", -// args: args{ -// expected: []pod.PodSpecContext{defaultPodSpecCtx, defaultPodSpecCtx, defaultPodSpecCtx, defaultPodSpecCtx}, -// state: reconcile.ResourcesState{CurrentPods: pod.PodsWithConfig{defaultPodWithConfig, defaultPodWithConfig}}, -// }, -// want: Changes{ -// ToKeep: pod.PodsWithConfig{defaultPodWithConfig, defaultPodWithConfig}, -// ToCreate: []PodToCreate{{PodSpecCtx: defaultPodSpecCtx}, {PodSpecCtx: defaultPodSpecCtx}}, -// }, -// }, -// { -// name: "2 less pods", -// args: args{ -// expected: []pod.PodSpecContext{}, -// state: reconcile.ResourcesState{CurrentPods: pod.PodsWithConfig{defaultPodWithConfig, defaultPodWithConfig}}, -// }, -// want: Changes{ToDelete: pod.PodsWithConfig{defaultPodWithConfig, defaultPodWithConfig}}, -// }, -// { -// name: "1 pod replaced", -// args: args{ -// expected: []pod.PodSpecContext{defaultPodSpecCtx, ESPodSpecContext("another-image", defaultCPULimit)}, -// state: reconcile.ResourcesState{CurrentPods: pod.PodsWithConfig{defaultPodWithConfig, defaultPodWithConfig}}, -// }, -// want: Changes{ -// ToKeep: pod.PodsWithConfig{defaultPodWithConfig}, -// ToDelete: pod.PodsWithConfig{defaultPodWithConfig}, -// ToCreate: []PodToCreate{{PodSpecCtx: ESPodSpecContext("another-image", defaultCPULimit)}}, -// }, -// }, -// } -// for _, tt := range tests { -// t.Run(tt.name, func(t *testing.T) { -// got, err := CalculateChanges(es, tt.args.expected, tt.args.state, func(ctx pod.PodSpecContext) corev1.Pod { -// return version.NewPod(es, ctx) -// }) -// assert.NoError(t, err) -// assert.Equal(t, len(tt.want.ToKeep), len(got.ToKeep)) -// assert.Equal(t, len(tt.want.ToCreate), len(got.ToCreate)) -// assert.Equal(t, len(tt.want.ToDelete), len(got.ToDelete)) -// }) -// } -//} diff --git a/operators/pkg/controller/elasticsearch/mutation/change_group.go b/operators/pkg/controller/elasticsearch/mutation/change_group.go deleted file mode 100644 index 08e6922be0..0000000000 --- a/operators/pkg/controller/elasticsearch/mutation/change_group.go +++ /dev/null @@ -1,281 +0,0 @@ -// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one -// or more contributor license agreements. Licensed under the Elastic License; -// you may not use this file except in compliance with the Elastic License. - -package mutation - -import ( - "fmt" - "sort" - - "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/pod" - - "github.com/elastic/cloud-on-k8s/operators/pkg/apis/elasticsearch/v1alpha1" -) - -const ( - // AllGroupName is the name used in ChangeGroups that is used for - // changes that have not been partitioned into groups - AllGroupName = "all" - - // UnmatchedGroupName is the name used in ChangeGroups for - // a group that was not selected by the user-specified groups - UnmatchedGroupName = "unmatched" - - // indexedGroupNamePrefix is the prefix used for dynamically named ChangeGroups - indexedGroupNamePrefix = "group-" -) - -// empty is used internally when referring to an empty struct instance -var empty struct{} - -// indexedGroupName returns the group name to use for the given indexed group -func indexedGroupName(index int) string { - return fmt.Sprintf("%s%d", indexedGroupNamePrefix, index) -} - -// ChangeGroup holds changes for a specific group of pods -type ChangeGroup struct { - // Name is a logical name for these changes - Name string - // Changes contains the changes in this group - Changes Changes - // PodsState contains the state of all the pods in this group - PodsState PodsState -} - -// ChangeStats contains key numbers for a ChangeGroup, used to execute an upgrade budget -type ChangeStats struct { - // TargetPods is the number of pods we should have in the final state - TargetPods int `json:"targetPods"` - // CurrentPods is the current number of pods in the cluster that might be using resources - CurrentPods int `json:"currentPods"` - // CurrentSurge is the number of pods above the target the cluster is using - CurrentSurge int `json:"currentSurge"` - // CurrentRunningReadyPods is the number of pods that are running and have joined the current master - CurrentRunningReadyPods int `json:"currentRunningReady"` - // CurrentUnavailable is the number of pods below the target the cluster is currently using - CurrentUnavailable int `json:"currentUnavailable"` -} - -// ChangeStats calculates and returns the ChangeStats for this ChangeGroup -func (s ChangeGroup) ChangeStats() ChangeStats { - // when we're done, we should have ToKeep + ToCreate pods in the group. - targetPodsCount := len(s.Changes.ToKeep) + len(s.Changes.ToCreate) - - currentPodsCount := s.PodsState.CurrentPodsCount() - - // surge is the number of pods potentially consuming any resources we currently have above the target - currentSurge := currentPodsCount - targetPodsCount - - currentRunningReadyPods := len(s.PodsState.RunningReady) - - // unavailable is the number of "running and ready" pods that are missing compared to the target, iow pods - currentUnavailable := targetPodsCount - currentRunningReadyPods - - return ChangeStats{ - TargetPods: targetPodsCount, - CurrentPods: currentPodsCount, - CurrentSurge: currentSurge, - CurrentRunningReadyPods: currentRunningReadyPods, - CurrentUnavailable: currentUnavailable, - } -} - -// calculatePerformableChanges calculates the PerformableChanges for this group with the given budget -func (s ChangeGroup) calculatePerformableChanges( - budget v1alpha1.ChangeBudget, - podRestrictions *PodRestrictions, - result *PerformableChanges, -) error { - changeStats := s.ChangeStats() - - log.V(1).Info( - "Calculating performable changes for group", - "group_name", s.Name, - "change_stats", changeStats, - "pods_state_status", s.PodsState.Status(), - "pods_state_summary", s.PodsState.Summary(), - ) - - // ensure we consider removing terminal pods first and the master node last in these changes - sort.SliceStable( - s.Changes.ToDelete, - sortPodsByTerminalFirstMasterNodeLastAndCreationTimestampAsc( - s.PodsState.Terminal, - s.PodsState.MasterNodePod, - s.Changes.ToDelete, - ), - ) - - // ensure we create master nodes first in this group - sort.SliceStable( - s.Changes.ToCreate, - sortPodsToCreateByMasterNodesFirstThenNameAsc(s.Changes.ToCreate), - ) - - // TODO: MaxUnavailable and MaxSurge would be great to have as intstrs, but due to - // https://github.com/kubernetes-sigs/kubebuilder/issues/442 this is not currently an option. - maxSurge := budget.MaxSurge - //maxSurge, err := intstr.GetValueFromIntOrPercent( - // &s.Definition.ChangeBudget.MaxSurge, - // targetPodsCount, - // true, - //) - //if err != nil { - // return err - //} - - maxUnavailable := budget.MaxUnavailable - //maxUnavailable, err := intstr.GetValueFromIntOrPercent( - // &s.Definition.ChangeBudget.MaxUnavailable, - // targetPodsCount, - // false, - //) - //if err != nil { - // return err - //} - - // schedule for creation as many pods as we can - for _, newPodToCreate := range s.Changes.ToCreate { - if changeStats.CurrentSurge >= maxSurge { - log.V(1).Info( - "Hit the max surge limit in a group.", - "group_name", s.Name, - "namespace", newPodToCreate.Pod.Namespace, - "change_stats", changeStats, - ) - result.MaxSurgeGroups = append(result.MaxSurgeGroups, s.Name) - break - } - - changeStats.CurrentSurge++ - changeStats.CurrentPods++ - - log.V(1).Info( - "Scheduling a pod for creation", - "group_name", s.Name, - "change_stats", changeStats, - "pod_name", newPodToCreate.Pod.Name, - "namespace", newPodToCreate.Pod.Namespace, - "mismatch_reasons", newPodToCreate.MismatchReasons, - ) - - result.ToCreate = append(result.ToCreate, newPodToCreate) - } - - // schedule for deletion as many pods as we can - for _, pod := range s.Changes.ToDelete { - if _, ok := s.PodsState.Terminal[pod.Pod.Name]; ok { - // removing terminal pods do not affect our availability budget, so we can always delete - result.ToDelete = append(result.ToDelete, pod) - continue - } - - if err := podRestrictions.CanDelete(pod.Pod); err != nil { - // cannot remove pod due to restrictions - result.RestrictedPods[pod.Pod.Name] = err - continue - } - - if changeStats.CurrentUnavailable >= maxUnavailable { - log.V(1).Info( - "Hit the max unavailable limit in a group.", - "group_name", s.Name, - "change_stats", changeStats, - ) - - result.MaxUnavailableGroups = append(result.MaxUnavailableGroups, s.Name) - break - } - - changeStats.CurrentUnavailable++ - changeStats.CurrentRunningReadyPods-- - - log.V(1).Info( - "Scheduling a pod for deletion", - "group_name", s.Name, - "pod_name", pod.Pod.Name, - "namespace", pod.Pod.Namespace, - "change_stats", changeStats, - ) - - podRestrictions.Remove(pod.Pod) - result.ToDelete = append(result.ToDelete, pod) - } - - return nil -} - -// simulatePerformableChangesApplied applies the performable changes to the ChangeGroup -func (s *ChangeGroup) simulatePerformableChangesApplied( - performableChanges PerformableChanges, -) { - // convert the scheduled for deletion pods to a map for faster lookup - toDeleteByName := make(map[string]struct{}, len(performableChanges.ToDelete)) - for _, pod := range performableChanges.ToDelete { - toDeleteByName[pod.Pod.Name] = empty - } - - // for each pod we intend to remove, simulate a deletion - for i := len(s.Changes.ToDelete) - 1; i >= 0; i-- { - podToDelete := s.Changes.ToDelete[i] - if _, ok := toDeleteByName[podToDelete.Pod.Name]; ok { - // pop from list of pods to delete - s.Changes.ToDelete = append(s.Changes.ToDelete[:i], s.Changes.ToDelete[i+1:]...) - } - } - - // convert the scheduled for creation pods to a map for faster lookup - toCreateByName := make(map[string]struct{}, len(performableChanges.ToCreate)) - for _, podToCreate := range performableChanges.ToCreate { - toCreateByName[podToCreate.Pod.Name] = empty - } - - // for each pod we intend to create, simulate the creation - for i := len(s.Changes.ToCreate) - 1; i >= 0; i-- { - podToCreate := s.Changes.ToCreate[i] - if _, ok := toCreateByName[podToCreate.Pod.Name]; ok { - // pop from list of pods to create - s.Changes.ToCreate = append(s.Changes.ToCreate[:i], s.Changes.ToCreate[i+1:]...) - // pretend we created it, which would move it to Pending - s.PodsState.Pending[podToCreate.Pod.Name] = podToCreate.Pod - // also pretend we're intending to keep it instead of creating it - s.Changes.ToKeep = append(s.Changes.ToKeep, pod.PodWithConfig{Pod: podToCreate.Pod, Config: podToCreate.PodSpecCtx.Config}) - } - } - - var remaining PodsState - // update the current pod states to match the simulated changes - s.PodsState, remaining = s.PodsState.Partition(s.Changes) - // The partition above removes any pods not part of the .Changes from the PodsState, which includes pods that have - // been deleted by an external process or another reconciliation iteration. These should still exist in the - // simulated PodsState, so we need to add these back in specifically. - for _, pod := range remaining.Deleting { - s.PodsState.Deleting[pod.Name] = pod - } - - // deleted pods should eventually go into a Deleting state, - // simulate that for deleted pods to be counted as unavailable - for _, pod := range performableChanges.ToDelete { - s.PodsState.Deleting[pod.Pod.Name] = pod.Pod - } -} - -// ChangeGroups is a list of ChangeGroup -type ChangeGroups []ChangeGroup - -// calculatePerformableChanges calculates the PerformableChanges for each group with the given budget -func (s ChangeGroups) calculatePerformableChanges( - budget v1alpha1.ChangeBudget, - podRestrictions *PodRestrictions, - result *PerformableChanges, -) error { - for _, group := range s { - if err := group.calculatePerformableChanges(budget, podRestrictions, result); err != nil { - return err - } - } - - return nil -} diff --git a/operators/pkg/controller/elasticsearch/mutation/change_group_test.go b/operators/pkg/controller/elasticsearch/mutation/change_group_test.go deleted file mode 100644 index a2897a1247..0000000000 --- a/operators/pkg/controller/elasticsearch/mutation/change_group_test.go +++ /dev/null @@ -1,280 +0,0 @@ -// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one -// or more contributor license agreements. Licensed under the Elastic License; -// you may not use this file except in compliance with the Elastic License. - -package mutation - -import ( - "testing" - - "github.com/elastic/cloud-on-k8s/operators/pkg/apis/elasticsearch/v1alpha1" - "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/pod" - "github.com/stretchr/testify/assert" - corev1 "k8s.io/api/core/v1" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" -) - -func TestChangeGroups_CalculatePerformableChanges(t *testing.T) { - tests := []struct { - name string - s ChangeGroups - budget v1alpha1.ChangeBudget - podRestrictions PodRestrictions - performableChanges *PerformableChanges - want *PerformableChanges - wantErr bool - }{ - { - name: "empty", - s: ChangeGroups{}, - performableChanges: &PerformableChanges{}, - want: &PerformableChanges{}, - }, - { - name: "can only create if unavailable budget is maxed out", - s: ChangeGroups{ - ChangeGroup{ - Name: "foo", - Changes: Changes{ - ToCreate: []PodToCreate{{Pod: namedPod("1").Pod}}, - ToDelete: pod.PodsWithConfig{namedPod("2")}, - }, - PodsState: initializePodsState(PodsState{ - RunningReady: map[string]corev1.Pod{"2": namedPod("2").Pod}, - }), - }, - }, - performableChanges: &PerformableChanges{}, - budget: v1alpha1.ChangeBudget{ - MaxSurge: 1, - MaxUnavailable: 0, - }, - want: &PerformableChanges{ - Changes: Changes{ - ToCreate: []PodToCreate{ - {Pod: namedPod("1").Pod, PodSpecCtx: pod.PodSpecContext{}}, - }, - }, - MaxUnavailableGroups: []string{"foo"}, - }, - }, - { - name: "can only delete if surge budget is maxed out", - s: ChangeGroups{ - ChangeGroup{ - Name: "foo", - Changes: Changes{ - ToCreate: []PodToCreate{{Pod: namedPod("1").Pod}}, - ToDelete: pod.PodsWithConfig{namedPod("2")}, - }, - PodsState: initializePodsState(PodsState{ - RunningReady: map[string]corev1.Pod{"2": namedPod("2").Pod, "3": namedPod("3").Pod}, - }), - }, - }, - performableChanges: &PerformableChanges{}, - budget: v1alpha1.ChangeBudget{ - MaxSurge: 1, - MaxUnavailable: 1, - }, - want: &PerformableChanges{ - Changes: Changes{ - ToDelete: pod.PodsWithConfig{ - namedPod("2"), - }, - }, - MaxSurgeGroups: []string{"foo"}, - }, - }, - { - name: "can both delete and create up to the surge and unavailability budgets are exhausted", - s: ChangeGroups{ - ChangeGroup{ - Name: "foo", - Changes: Changes{ - ToCreate: []PodToCreate{{Pod: namedPod("create-1").Pod}, {Pod: namedPod("create-2").Pod}}, - ToKeep: pod.PodsWithConfig{namedPod("keep-3")}, - ToDelete: pod.PodsWithConfig{namedPod("delete-1"), namedPod("delete-2")}, - }, - PodsState: initializePodsState(PodsState{ - RunningReady: map[string]corev1.Pod{ - "delete-1": namedPod("delete-1").Pod, - "delete-2": namedPod("delete-2").Pod, - "keep-3": namedPod("keep-3").Pod, - }, - }), - }, - }, - performableChanges: &PerformableChanges{}, - budget: v1alpha1.ChangeBudget{ - MaxSurge: 1, - MaxUnavailable: 1, - }, - want: &PerformableChanges{ - Changes: Changes{ - ToCreate: []PodToCreate{ - {Pod: namedPod("create-1").Pod, PodSpecCtx: pod.PodSpecContext{}}, - }, - ToDelete: pod.PodsWithConfig{ - namedPod("delete-1"), - }, - }, - MaxSurgeGroups: []string{"foo"}, - MaxUnavailableGroups: []string{"foo"}, - }, - }, - } - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - err := tt.s.calculatePerformableChanges(tt.budget, &tt.podRestrictions, tt.performableChanges) - if (err != nil) != tt.wantErr { - t.Errorf("ChangeGroups.calculatePerformableChanges() error = %v, wantErr %v", err, tt.wantErr) - return - } - - assert.Equal(t, tt.want, tt.performableChanges) - }) - } -} - -func TestChangeGroups_ChangeStats(t *testing.T) { - type fields struct { - Name string - Definition v1alpha1.GroupingDefinition - Changes Changes - PodsState PodsState - } - tests := []struct { - name string - fields fields - want ChangeStats - }{ - { - name: "sample", - fields: fields{ - Definition: v1alpha1.GroupingDefinition{ - Selector: metav1.LabelSelector{}, - }, - Changes: Changes{ - ToCreate: []PodToCreate{{Pod: namedPod("create-1").Pod}, {Pod: namedPod("create-2").Pod}}, - ToKeep: pod.PodsWithConfig{namedPod("keep-3")}, - ToDelete: pod.PodsWithConfig{namedPod("delete-1"), namedPod("delete-2")}, - }, - PodsState: initializePodsState(PodsState{ - RunningReady: map[string]corev1.Pod{ - "delete-1": namedPod("delete-1").Pod, - "delete-2": namedPod("delete-2").Pod, - "keep-3": namedPod("keep-3").Pod, - }, - }), - }, - want: ChangeStats{ - TargetPods: 3, - CurrentPods: 3, - CurrentSurge: 0, - CurrentRunningReadyPods: 3, - CurrentUnavailable: 0, - }, - }, - } - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - s := ChangeGroup{ - Name: tt.fields.Name, - Changes: tt.fields.Changes, - PodsState: tt.fields.PodsState, - } - - assert.Equal(t, tt.want, s.ChangeStats()) - }) - } -} - -func TestChangeGroups_simulatePerformableChangesApplied(t *testing.T) { - type fields struct { - Name string - Changes Changes - PodsState PodsState - } - type args struct { - performableChanges PerformableChanges - } - tests := []struct { - name string - fields fields - args args - want ChangeGroup - }{ - { - name: "deletion", - fields: fields{ - Changes: Changes{ - ToKeep: pod.PodsWithConfig{namedPod("bar")}, - ToDelete: pod.PodsWithConfig{namedPod("foo"), namedPod("baz")}, - }, - PodsState: initializePodsState(PodsState{ - Deleting: map[string]corev1.Pod{"baz": namedPod("baz").Pod}, - RunningReady: map[string]corev1.Pod{"foo": namedPod("foo").Pod, "bar": namedPod("bar").Pod}, - }), - }, - args: args{ - performableChanges: PerformableChanges{ - Changes: Changes{ - ToDelete: pod.PodsWithConfig{namedPod("foo")}, - }, - }, - }, - want: ChangeGroup{ - Changes: Changes{ - ToKeep: pod.PodsWithConfig{namedPod("bar")}, - ToDelete: pod.PodsWithConfig{namedPod("baz")}, - }, - PodsState: initializePodsState(PodsState{ - RunningReady: map[string]corev1.Pod{"bar": namedPod("bar").Pod}, - Deleting: map[string]corev1.Pod{"foo": namedPod("foo").Pod, "baz": namedPod("baz").Pod}, - }), - }, - }, - { - name: "creation", - fields: fields{ - Changes: Changes{ - ToKeep: pod.PodsWithConfig{namedPod("bar")}, - ToCreate: []PodToCreate{{Pod: namedPod("foo").Pod}, {Pod: namedPod("baz").Pod}}, - }, - PodsState: initializePodsState(PodsState{ - RunningReady: map[string]corev1.Pod{"bar": namedPod("bar").Pod}, - }), - }, - args: args{ - performableChanges: PerformableChanges{ - Changes: Changes{ - ToCreate: []PodToCreate{{Pod: namedPod("foo").Pod}}, - }, - }, - }, - want: ChangeGroup{ - Changes: Changes{ - ToCreate: []PodToCreate{{Pod: namedPod("baz").Pod}}, - ToKeep: pod.PodsWithConfig{namedPod("bar"), namedPod("foo")}, - }, - PodsState: initializePodsState(PodsState{ - RunningReady: map[string]corev1.Pod{"bar": namedPod("bar").Pod}, - Pending: map[string]corev1.Pod{"foo": namedPod("foo").Pod}, - }), - }, - }, - } - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - s := &ChangeGroup{ - Name: tt.fields.Name, - Changes: tt.fields.Changes, - PodsState: tt.fields.PodsState, - } - s.simulatePerformableChangesApplied(tt.args.performableChanges) - - assert.Equal(t, &tt.want, s) - }) - } -} diff --git a/operators/pkg/controller/elasticsearch/mutation/changes.go b/operators/pkg/controller/elasticsearch/mutation/changes.go deleted file mode 100644 index 445e21e9a3..0000000000 --- a/operators/pkg/controller/elasticsearch/mutation/changes.go +++ /dev/null @@ -1,157 +0,0 @@ -// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one -// or more contributor license agreements. Licensed under the Elastic License; -// you may not use this file except in compliance with the Elastic License. - -package mutation - -import ( - "github.com/elastic/cloud-on-k8s/operators/pkg/apis/elasticsearch/v1alpha1" - "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/label" - "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/pod" - corev1 "k8s.io/api/core/v1" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "k8s.io/apimachinery/pkg/labels" -) - -// Changes represents the changes to perform on the Elasticsearch pods -type Changes struct { - ToCreate PodsToCreate - ToKeep pod.PodsWithConfig - ToDelete pod.PodsWithConfig -} - -// PodToCreate defines a pod to be created, along with -// the reasons why it doesn't match any existing pod -type PodToCreate struct { - Pod corev1.Pod - PodSpecCtx pod.PodSpecContext - MismatchReasons map[string][]string -} - -// PodsToCreate is simply a list of PodToCreate -type PodsToCreate []PodToCreate - -// Pods is a helper method to retrieve pods only (no spec context or mismatch reasons) -func (p PodsToCreate) Pods() []corev1.Pod { - pods := make([]corev1.Pod, len(p)) - for i, pod := range p { - pods[i] = pod.Pod - } - return pods -} - -// EmptyChanges creates an empty Changes with empty arrays (not nil) -func EmptyChanges() Changes { - return Changes{ - ToCreate: []PodToCreate{}, - ToKeep: pod.PodsWithConfig{}, - ToDelete: pod.PodsWithConfig{}, - } -} - -// HasChanges returns true if there are no topology changes to performed -func (c Changes) HasChanges() bool { - return len(c.ToCreate) > 0 || len(c.ToDelete) > 0 -} - -// HasMasterChanges returns true if some masters are involved in the topology changes. -func (c Changes) HasMasterChanges() bool { - for _, pod := range c.ToCreate { - if label.IsMasterNode(pod.Pod) { - return true - } - } - for _, pod := range c.ToDelete { - if label.IsMasterNode(pod.Pod) { - return true - } - } - return false -} - -// IsEmpty returns true if this set has no deletion, creation or kept pods -func (c Changes) IsEmpty() bool { - return len(c.ToCreate) == 0 && len(c.ToDelete) == 0 && len(c.ToKeep) == 0 -} - -// Copy copies this Changes. It copies the underlying slices and maps, but not their contents -func (c Changes) Copy() Changes { - res := Changes{ - ToCreate: append([]PodToCreate{}, c.ToCreate...), - ToKeep: append(pod.PodsWithConfig{}, c.ToKeep...), - ToDelete: append(pod.PodsWithConfig{}, c.ToDelete...), - } - return res -} - -// Group groups the current changes into groups based on the GroupingDefinitions -func (c Changes) Group( - groupingDefinitions []v1alpha1.GroupingDefinition, - remainingPodsState PodsState, -) (ChangeGroups, error) { - remainingChanges := c.Copy() - groups := make([]ChangeGroup, 0, len(groupingDefinitions)+1) - - for i, gd := range groupingDefinitions { - group := ChangeGroup{ - Name: indexedGroupName(i), - } - selector, err := metav1.LabelSelectorAsSelector(&gd.Selector) - if err != nil { - return nil, err - } - - group.Changes, remainingChanges = remainingChanges.Partition(selector) - if group.Changes.IsEmpty() { - // selector does not match anything - continue - } - group.PodsState, remainingPodsState = remainingPodsState.Partition(group.Changes) - groups = append(groups, group) - } - - if !remainingChanges.IsEmpty() { - // remaining changes do not match any group definition selector, group them together as a single group - groups = append(groups, ChangeGroup{ - Name: UnmatchedGroupName, - PodsState: remainingPodsState, - Changes: remainingChanges, - }) - } - - return groups, nil -} - -// Partition divides changes into 2 changes based on the given selector: -// changes that match the selector, and changes that don't -func (c Changes) Partition(selector labels.Selector) (Changes, Changes) { - matchingChanges := EmptyChanges() - remainingChanges := EmptyChanges() - - matchingChanges.ToKeep, remainingChanges.ToKeep = partitionPodsBySelector(selector, c.ToKeep) - matchingChanges.ToDelete, remainingChanges.ToDelete = partitionPodsBySelector(selector, c.ToDelete) - for _, toCreate := range c.ToCreate { - if selector.Matches(labels.Set(toCreate.Pod.Labels)) { - matchingChanges.ToCreate = append(matchingChanges.ToCreate, toCreate) - } else { - remainingChanges.ToCreate = append(remainingChanges.ToCreate, toCreate) - } - } - - return matchingChanges, remainingChanges -} - -// partitionPodsBySelector partitions pods into two sets: one for pods matching the selector and one for the rest. it -// guarantees that the order of the pods are not changed. -func partitionPodsBySelector(selector labels.Selector, pods pod.PodsWithConfig) (pod.PodsWithConfig, pod.PodsWithConfig) { - matchingPods := make(pod.PodsWithConfig, 0, len(pods)) - remainingPods := make(pod.PodsWithConfig, 0, len(pods)) - for _, p := range pods { - if selector.Matches(labels.Set(p.Pod.Labels)) { - matchingPods = append(matchingPods, p) - } else { - remainingPods = append(remainingPods, p) - } - } - return matchingPods, remainingPods -} diff --git a/operators/pkg/controller/elasticsearch/mutation/changes_test.go b/operators/pkg/controller/elasticsearch/mutation/changes_test.go deleted file mode 100644 index bcb54c6a36..0000000000 --- a/operators/pkg/controller/elasticsearch/mutation/changes_test.go +++ /dev/null @@ -1,349 +0,0 @@ -// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one -// or more contributor license agreements. Licensed under the Elastic License; -// you may not use this file except in compliance with the Elastic License. - -package mutation - -import ( - "testing" - "time" - - "github.com/elastic/cloud-on-k8s/operators/pkg/apis/elasticsearch/v1alpha1" - "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/pod" - "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/settings" - "github.com/stretchr/testify/assert" - corev1 "k8s.io/api/core/v1" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" -) - -var emptyPodWithConfig = pod.PodWithConfig{Pod: corev1.Pod{}} - -func namedPod(name string) pod.PodWithConfig { - return pod.PodWithConfig{ - Pod: corev1.Pod{ - ObjectMeta: metav1.ObjectMeta{ - Name: name, - }, - }, - Config: settings.CanonicalConfig{}, - } -} - -func namedPodWithCreationTimestamp(name string, creationTimestamp time.Time) pod.PodWithConfig { - return pod.PodWithConfig{ - Pod: corev1.Pod{ - ObjectMeta: metav1.ObjectMeta{ - Name: name, - CreationTimestamp: metav1.Time{Time: creationTimestamp}, - }, - }, - Config: settings.CanonicalConfig{}, - } -} - -func withLabels(p pod.PodWithConfig, labels map[string]string) pod.PodWithConfig { - p.Pod.Labels = labels - return p -} - -func TestChanges_HasChanges(t *testing.T) { - type fields struct { - ToCreate []PodToCreate - ToKeep pod.PodsWithConfig - ToDelete pod.PodsWithConfig - } - tests := []struct { - name string - fields fields - want bool - }{ - { - name: "empty has no changes", - fields: fields{}, - want: false, - }, - { - name: "something to keep still has no changes", - fields: fields{ - ToKeep: pod.PodsWithConfig{emptyPodWithConfig}, - }, - want: false, - }, - { - name: "something to create has changes", - fields: fields{ - ToCreate: []PodToCreate{{}}, - }, - want: true, - }, - { - name: "something to delete has changes", - fields: fields{ - ToDelete: pod.PodsWithConfig{emptyPodWithConfig}, - }, - want: true, - }, - { - name: "create and delete has changes", - fields: fields{ - ToCreate: []PodToCreate{{}}, - ToDelete: pod.PodsWithConfig{emptyPodWithConfig}, - }, - want: true, - }, - } - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - c := Changes{ - ToCreate: tt.fields.ToCreate, - ToKeep: tt.fields.ToKeep, - ToDelete: tt.fields.ToDelete, - } - if got := c.HasChanges(); got != tt.want { - t.Errorf("Changes.HasChanges() = %v, want %v", got, tt.want) - } - }) - } -} - -func TestChanges_IsEmpty(t *testing.T) { - type fields struct { - ToCreate []PodToCreate - ToKeep pod.PodsWithConfig - ToDelete pod.PodsWithConfig - } - tests := []struct { - name string - fields fields - want bool - }{ - { - name: "no inner list should be empty", - fields: fields{}, - want: true, - }, - { - name: "empty inner lists should be empty", - fields: fields{ - ToCreate: []PodToCreate{}, - ToKeep: pod.PodsWithConfig{}, - ToDelete: pod.PodsWithConfig{}, - }, - want: true, - }, - { - name: "with pod to create should not be empty", - fields: fields{ - ToCreate: []PodToCreate{{}}, - ToKeep: pod.PodsWithConfig{}, - ToDelete: pod.PodsWithConfig{}, - }, - want: false, - }, - { - name: "with pod to keep not be empty", - fields: fields{ - ToCreate: []PodToCreate{}, - ToKeep: pod.PodsWithConfig{{}}, - ToDelete: pod.PodsWithConfig{}, - }, - want: false, - }, - { - name: "with pod to delete should not empty", - fields: fields{ - ToCreate: []PodToCreate{}, - ToKeep: pod.PodsWithConfig{}, - ToDelete: pod.PodsWithConfig{{}}, - }, - want: false, - }, - } - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - c := Changes{ - ToCreate: tt.fields.ToCreate, - ToKeep: tt.fields.ToKeep, - ToDelete: tt.fields.ToDelete, - } - if got := c.IsEmpty(); got != tt.want { - t.Errorf("Changes.IsEmpty() = %v, want %v", got, tt.want) - } - }) - } -} - -func TestChanges_Group(t *testing.T) { - fooMatchingGroupingDefinition := v1alpha1.GroupingDefinition{ - Selector: metav1.LabelSelector{MatchLabels: map[string]string{"foo": "bar"}}, - } - - fooPod := withLabels(namedPod("1"), map[string]string{"foo": "bar"}) - barPod := withLabels(namedPod("2"), map[string]string{"bar": "bar"}) - bazPodToCreate := PodToCreate{ - Pod: withLabels(namedPod("3"), map[string]string{"baz": "bar"}).Pod, - PodSpecCtx: pod.PodSpecContext{PodTemplate: corev1.PodTemplateSpec{Spec: corev1.PodSpec{Hostname: "baz"}}}, - } - - foobarPod := withLabels(namedPod("4"), map[string]string{"foo": "bar", "bar": "baz"}) - - type args struct { - groupingDefinitions []v1alpha1.GroupingDefinition - remainingPodsState PodsState - } - tests := []struct { - name string - changes Changes - args args - want ChangeGroups - wantErr bool - }{ - { - name: "empty", - changes: Changes{}, - args: args{ - remainingPodsState: NewEmptyPodsState()}, - want: ChangeGroups{}, - }, - { - name: "no group definitions should result in a defaulted group", - changes: Changes{ToKeep: pod.PodsWithConfig{namedPod("1")}}, - args: args{ - remainingPodsState: NewEmptyPodsState(), - }, - want: ChangeGroups{ - ChangeGroup{ - Name: UnmatchedGroupName, - Changes: Changes{ - ToKeep: pod.PodsWithConfig{namedPod("1")}, - ToCreate: []PodToCreate{}, - ToDelete: pod.PodsWithConfig{}, - }, - PodsState: NewEmptyPodsState(), - }, - }, - }, - { - name: "non-matching group definitions should result in a defaulted group", - changes: Changes{ToKeep: pod.PodsWithConfig{namedPod("1")}}, - args: args{ - groupingDefinitions: []v1alpha1.GroupingDefinition{ - fooMatchingGroupingDefinition, - }, - remainingPodsState: NewEmptyPodsState(), - }, - want: ChangeGroups{ - ChangeGroup{ - Name: UnmatchedGroupName, - Changes: Changes{ - ToKeep: pod.PodsWithConfig{namedPod("1")}, - ToCreate: []PodToCreate{}, - ToDelete: pod.PodsWithConfig{}, - }, - PodsState: NewEmptyPodsState(), - }, - }, - }, - { - name: "pods should be bucketed into the groups based on the selector and include relevant PodsState", - changes: Changes{ - ToCreate: []PodToCreate{bazPodToCreate}, - ToKeep: pod.PodsWithConfig{fooPod}, - ToDelete: pod.PodsWithConfig{barPod}, - }, - args: args{ - groupingDefinitions: []v1alpha1.GroupingDefinition{ - fooMatchingGroupingDefinition, - }, - remainingPodsState: initializePodsState(PodsState{ - Pending: map[string]corev1.Pod{fooPod.Pod.Name: fooPod.Pod}, - RunningJoining: map[string]corev1.Pod{barPod.Pod.Name: barPod.Pod}, - }), - }, - want: ChangeGroups{ - ChangeGroup{ - Name: indexedGroupName(0), - Changes: Changes{ - ToCreate: []PodToCreate{}, - ToKeep: pod.PodsWithConfig{fooPod}, - ToDelete: pod.PodsWithConfig{}, - }, - PodsState: initializePodsState(PodsState{ - Pending: map[string]corev1.Pod{fooPod.Pod.Name: fooPod.Pod}, - }), - }, - ChangeGroup{ - Name: UnmatchedGroupName, - Changes: Changes{ - ToKeep: pod.PodsWithConfig{}, - ToDelete: pod.PodsWithConfig{barPod}, - ToCreate: []PodToCreate{bazPodToCreate}, - }, - PodsState: initializePodsState(PodsState{ - RunningJoining: map[string]corev1.Pod{barPod.Pod.Name: barPod.Pod}, - }), - }, - }, - }, - { - name: "should match when there are multiple labels", - changes: Changes{ - ToCreate: []PodToCreate{bazPodToCreate}, - ToKeep: pod.PodsWithConfig{fooPod}, - ToDelete: pod.PodsWithConfig{foobarPod}, - }, - args: args{ - groupingDefinitions: []v1alpha1.GroupingDefinition{ - { - Selector: metav1.LabelSelector{ - MatchLabels: map[string]string{ - "foo": "bar", - "bar": "baz", - }, - }, - }, - }, - remainingPodsState: initializePodsState(PodsState{ - Pending: map[string]corev1.Pod{fooPod.Pod.Name: fooPod.Pod}, - RunningJoining: map[string]corev1.Pod{foobarPod.Pod.Name: foobarPod.Pod}, - }), - }, - want: ChangeGroups{ - ChangeGroup{ - Name: indexedGroupName(0), - Changes: Changes{ - ToCreate: []PodToCreate{}, - ToKeep: pod.PodsWithConfig{}, - ToDelete: pod.PodsWithConfig{foobarPod}, - }, - PodsState: initializePodsState(PodsState{ - RunningJoining: map[string]corev1.Pod{foobarPod.Pod.Name: foobarPod.Pod}, - }), - }, - ChangeGroup{ - Name: UnmatchedGroupName, - Changes: Changes{ - ToKeep: pod.PodsWithConfig{fooPod}, - ToDelete: pod.PodsWithConfig{}, - ToCreate: []PodToCreate{bazPodToCreate}, - }, - PodsState: initializePodsState(PodsState{ - Pending: map[string]corev1.Pod{fooPod.Pod.Name: fooPod.Pod}, - }), - }, - }, - }, - } - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - s := tt.changes - got, err := s.Group(tt.args.groupingDefinitions, tt.args.remainingPodsState) - if (err != nil) != tt.wantErr { - t.Errorf("Changes.Group() error = %v, wantErr %v", err, tt.wantErr) - return - } - - assert.Equal(t, tt.want, got) - }) - } -} diff --git a/operators/pkg/controller/elasticsearch/mutation/comparison/comparison.go b/operators/pkg/controller/elasticsearch/mutation/comparison/comparison.go deleted file mode 100644 index 1f406fd775..0000000000 --- a/operators/pkg/controller/elasticsearch/mutation/comparison/comparison.go +++ /dev/null @@ -1,32 +0,0 @@ -// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one -// or more contributor license agreements. Licensed under the Elastic License; -// you may not use this file except in compliance with the Elastic License. - -package comparison - -import ( - "fmt" - - logf "sigs.k8s.io/controller-runtime/pkg/runtime/log" -) - -var log = logf.Log.WithName("mutation") - -type Comparison struct { - Match bool - MismatchReasons []string -} - -func NewComparison(match bool, mismatchReasons ...string) Comparison { - return Comparison{Match: match, MismatchReasons: mismatchReasons} -} - -var ComparisonMatch = NewComparison(true) - -func ComparisonMismatch(mismatchReasons ...string) Comparison { - return NewComparison(false, mismatchReasons...) -} - -func NewStringComparison(expected string, actual string, name string) Comparison { - return NewComparison(expected == actual, fmt.Sprintf("%s mismatch: expected %s, actual %s", name, expected, actual)) -} diff --git a/operators/pkg/controller/elasticsearch/mutation/comparison/config.go b/operators/pkg/controller/elasticsearch/mutation/comparison/config.go deleted file mode 100644 index ef354e2456..0000000000 --- a/operators/pkg/controller/elasticsearch/mutation/comparison/config.go +++ /dev/null @@ -1,32 +0,0 @@ -// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one -// or more contributor license agreements. Licensed under the Elastic License; -// you may not use this file except in compliance with the Elastic License. - -package comparison - -import ( - "fmt" - - "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/settings" -) - -func compareConfigs(actual settings.CanonicalConfig, expected settings.CanonicalConfig) Comparison { - // check for settings in actual that do not match expected - diff := actual.Diff(expected.CanonicalConfig, toIgnore) - if len(diff) == 0 { - return ComparisonMatch - } - - reasons := make([]string, len(diff)) - for i, mismatch := range diff { - reasons[i] = fmt.Sprintf("Configuration setting mismatch: %s.", mismatch) - } - return ComparisonMismatch(reasons...) -} - -var toIgnore = []string{ - settings.NodeName, - settings.DiscoveryZenMinimumMasterNodes, - settings.ClusterInitialMasterNodes, - settings.NetworkPublishHost, -} diff --git a/operators/pkg/controller/elasticsearch/mutation/comparison/config_test.go b/operators/pkg/controller/elasticsearch/mutation/comparison/config_test.go deleted file mode 100644 index 3f7487160a..0000000000 --- a/operators/pkg/controller/elasticsearch/mutation/comparison/config_test.go +++ /dev/null @@ -1,132 +0,0 @@ -// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one -// or more contributor license agreements. Licensed under the Elastic License; -// you may not use this file except in compliance with the Elastic License. - -package comparison - -import ( - "encoding/json" - "reflect" - "testing" - - common "github.com/elastic/cloud-on-k8s/operators/pkg/controller/common/settings" - "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/settings" - "github.com/stretchr/testify/require" -) - -func Test_compareConfigs(t *testing.T) { - var intSlice map[string]interface{} - require.NoError(t, json.Unmarshal([]byte(`{"b": [1, 2, 3]}`), &intSlice)) - tests := []struct { - name string - expected settings.CanonicalConfig - actual settings.CanonicalConfig - want Comparison - }{ - { - name: "same config", - expected: settings.CanonicalConfig{CanonicalConfig: common.MustCanonicalConfig(map[string]interface{}{ - "a": "b", - "c": "d", - })}, - actual: settings.CanonicalConfig{CanonicalConfig: common.MustCanonicalConfig(map[string]interface{}{ - "a": "b", - "c": "d", - })}, - want: ComparisonMatch, - }, - { - name: "different config item", - expected: settings.CanonicalConfig{CanonicalConfig: common.MustCanonicalConfig(map[string]interface{}{ - "a": "b", - "c": "d", - })}, - actual: settings.CanonicalConfig{CanonicalConfig: common.MustCanonicalConfig(map[string]interface{}{ - "a": "b", - "c": "eee", - })}, - want: ComparisonMismatch("Configuration setting mismatch: c."), - }, - { - name: "one more item in expected", - expected: settings.CanonicalConfig{CanonicalConfig: common.MustCanonicalConfig(map[string]interface{}{ - "a": "b", - "c": "d", - "e": "f", - })}, - actual: settings.CanonicalConfig{CanonicalConfig: common.MustCanonicalConfig(map[string]interface{}{ - "a": "b", - "c": "d", - })}, - want: ComparisonMismatch("Configuration setting mismatch: e."), - }, - { - name: "one more item in actual", - expected: settings.CanonicalConfig{CanonicalConfig: common.MustCanonicalConfig(map[string]interface{}{ - "a": "b", - "c": "d", - })}, - actual: settings.CanonicalConfig{CanonicalConfig: common.MustCanonicalConfig(map[string]interface{}{ - "a": "b", - "c": "d", - "e": "f", - })}, - want: ComparisonMismatch("Configuration setting mismatch: e."), - }, - { - name: "some fields should be ignored", - expected: settings.CanonicalConfig{CanonicalConfig: common.MustCanonicalConfig(map[string]interface{}{ - "a": "b", - settings.NodeName: "expected-node", - settings.DiscoveryZenMinimumMasterNodes: 1, - settings.ClusterInitialMasterNodes: []string{"x"}, - settings.NetworkPublishHost: "1.2.3.4", - })}, - actual: settings.CanonicalConfig{CanonicalConfig: common.MustCanonicalConfig(map[string]interface{}{ - "a": "b", - settings.NodeName: "actual-node", - settings.DiscoveryZenMinimumMasterNodes: 12, - settings.ClusterInitialMasterNodes: []string{"x", "y", "z"}, - settings.NetworkPublishHost: "1.2.3.45", - })}, - want: ComparisonMatch, - }, - { - name: "some fields should be ignored but should not prevent mismatch", - expected: settings.CanonicalConfig{CanonicalConfig: common.MustCanonicalConfig(map[string]interface{}{ - "a": "b", - settings.NodeName: "expected-node", - settings.DiscoveryZenMinimumMasterNodes: 1, - settings.ClusterInitialMasterNodes: []string{"x"}, - settings.NetworkPublishHost: "1.2.3.4", - })}, - actual: settings.CanonicalConfig{CanonicalConfig: common.MustCanonicalConfig(map[string]interface{}{ - "a": "mismatch", - settings.NodeName: "actual-node", - settings.DiscoveryZenMinimumMasterNodes: 12, - settings.ClusterInitialMasterNodes: []string{"x", "y", "z"}, - settings.NetworkPublishHost: "1.2.3.45", - })}, - want: ComparisonMismatch("Configuration setting mismatch: a."), - }, - { - name: "int config", - expected: settings.CanonicalConfig{CanonicalConfig: common.MustCanonicalConfig(map[string]interface{}{ - "a": intSlice, - "b": 2, - })}, - actual: settings.CanonicalConfig{CanonicalConfig: common.MustCanonicalConfig(map[string]interface{}{ - "a": intSlice, - "b": 3, - })}, - want: ComparisonMismatch("Configuration setting mismatch: b."), - }, - } - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - if got := compareConfigs(tt.actual, tt.expected); !reflect.DeepEqual(got, tt.want) { - t.Errorf("compareConfigs() = %v, want %v", got, tt.want) - } - }) - } -} diff --git a/operators/pkg/controller/elasticsearch/mutation/comparison/pod.go b/operators/pkg/controller/elasticsearch/mutation/comparison/pod.go deleted file mode 100644 index 11863d92d9..0000000000 --- a/operators/pkg/controller/elasticsearch/mutation/comparison/pod.go +++ /dev/null @@ -1,73 +0,0 @@ -// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one -// or more contributor license agreements. Licensed under the Elastic License; -// you may not use this file except in compliance with the Elastic License. - -package comparison - -import ( - "fmt" - - "github.com/elastic/cloud-on-k8s/operators/pkg/apis/elasticsearch/v1alpha1" - "github.com/elastic/cloud-on-k8s/operators/pkg/controller/common/hash" - "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/name" - "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/pod" - "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/reconcile" - corev1 "k8s.io/api/core/v1" -) - -// PodMatchesSpec compares an existing pod and its config with an expected pod spec, and returns true if the -// existing pod matches the expected pod spec, or returns a list of reasons why it does not match. -// -// A pod matches the spec if: -// - it has the same namespace and base name -// - it has the same configuration -// - it has the same PVC spec -// - it was created using the same pod template (whose hash is stored in the pod annotations) -func PodMatchesSpec( - es v1alpha1.Elasticsearch, - podWithConfig pod.PodWithConfig, - spec pod.PodSpecContext, - state reconcile.ResourcesState, -) (bool, []string, error) { - pod := podWithConfig.Pod - config := podWithConfig.Config - - comparisons := []Comparison{ - // require same namespace - NewStringComparison(es.Namespace, pod.Namespace, "Pod namespace"), - // require same base pod name - NewStringComparison(name.Basename(name.NewPodName(es.Name, spec.NodeSpec)), name.Basename(pod.Name), "Pod base name"), - // require strict template equality - ComparePodTemplate(spec.PodTemplate, pod), - // require pvc compatibility - comparePersistentVolumeClaims(pod.Spec.Volumes, spec.NodeSpec.VolumeClaimTemplates, state), - // require strict config equality - compareConfigs(config, spec.Config), - } - - for _, c := range comparisons { - if !c.Match { - return false, c.MismatchReasons, nil - } - } - - return true, nil, nil -} - -// ComparePodSpec returns a ComparisonMatch if the given spec matches the spec of the given pod. -// Comparison is based on the hash of the pod spec (before resource creation), stored in a label in the pod. -// -// Since the hash was computed from the existing pod template, before its creation, it only accounts -// for fields in the pod that were set by the operator. -// Any defaulted environment variables, resources, containers from Kubernetes or a mutating webhook is ignored. -// Any label or annotation set by something external (user, webhook, defaulted value) is also ignored. -func ComparePodTemplate(template corev1.PodTemplateSpec, existingPod corev1.Pod) Comparison { - existingPodHash := hash.GetTemplateHashLabel(existingPod.Labels) - if existingPodHash == "" { - return ComparisonMismatch(fmt.Sprintf("No %s label set on the existing pod", hash.TemplateHashLabelName)) - } - if hash.HashObject(template) != existingPodHash { - return ComparisonMismatch("Spec hash and running pod spec hash are not equal") - } - return ComparisonMatch -} diff --git a/operators/pkg/controller/elasticsearch/mutation/comparison/pvc.go b/operators/pkg/controller/elasticsearch/mutation/comparison/pvc.go deleted file mode 100644 index a22a07ad54..0000000000 --- a/operators/pkg/controller/elasticsearch/mutation/comparison/pvc.go +++ /dev/null @@ -1,134 +0,0 @@ -// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one -// or more contributor license agreements. Licensed under the Elastic License; -// you may not use this file except in compliance with the Elastic License. - -package comparison - -import ( - "fmt" - "reflect" - - "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/reconcile" - corev1 "k8s.io/api/core/v1" -) - -// volumeAndPVC holds a volume and a PVC -type volumeAndPVC struct { - volume corev1.Volume - pvc corev1.PersistentVolumeClaim -} - -// comparePersistentVolumeClaims returns true if the expected persistent volume claims are found in the list of volumes -func comparePersistentVolumeClaims( - actual []corev1.Volume, - expected []corev1.PersistentVolumeClaim, - state reconcile.ResourcesState, -) Comparison { - // TODO: handle extra PVCs that are in volumes, but not in expected claim templates - - var volumeAndPVCs []volumeAndPVC - for _, volume := range actual { - if volume.PersistentVolumeClaim == nil { - continue - } - claimName := volume.PersistentVolumeClaim.ClaimName - - pvc, err := state.FindPVCByName(claimName) - if err != nil { - // this is rather unexpected, and we have two options: - // 1. return the error to our caller (possibly changing our signature a little) - // 2. consider the pod not matching - // we usually expect all claims from a pod to exist, so we consider this case exceptional, but we chose to - // go with option 2 because we'd rather see the pod be replaced than potentially getting stuck in the - // reconciliation loop without being able to reconcile further. we also chose to log it as an error level - // to call more attention to the fact this was occurring because we would like to try to get a better - // understanding of the scenarios in which this may happen. - msg := "Volume is referring to unknown PVC" - log.Error(err, msg) - return ComparisonMismatch(fmt.Sprintf("%s: %s", msg, err)) - } - - volumeAndPVCs = append(volumeAndPVCs, volumeAndPVC{volume: volume, pvc: pvc}) - } - -ExpectedTemplates: - for _, pvcTemplate := range expected { - for i, actualVolumeAndPVC := range volumeAndPVCs { - if templateMatchesActualVolumeAndPvc(pvcTemplate, actualVolumeAndPVC) { - // remove the current from the remaining volumes so it cannot be used to match another template - volumeAndPVCs = append(volumeAndPVCs[:i], volumeAndPVCs[i+1:]...) - - // continue the outer loop because this pvc template had a match - continue ExpectedTemplates - } - } - - // at this point, we were unable to match the template with any of the volumes, so the comparison should not - // match - - volumeNames := make([]string, len(volumeAndPVCs)) - for _, avp := range volumeAndPVCs { - volumeNames = append(volumeNames, avp.volume.Name) - } - - return ComparisonMismatch(fmt.Sprintf( - "Unmatched volumeClaimTemplate: %s has no match in volumes %v", - pvcTemplate.Name, - volumeNames, - )) - } - - return ComparisonMatch -} - -// templateMatchesActualVolumeAndPvc returns true if the pvc matches the volumeAndPVC -func templateMatchesActualVolumeAndPvc(pvcTemplate corev1.PersistentVolumeClaim, actualVolumeAndPVC volumeAndPVC) bool { - - if actualVolumeAndPVC.pvc.DeletionTimestamp != nil { - // PVC is being deleted - return false - } - - if pvcTemplate.Name != actualVolumeAndPVC.volume.Name { - // name from template does not match actual, no match - return false - } - - // labels - for templateLabelKey, templateLabelValue := range pvcTemplate.Labels { - if actualValue, ok := actualVolumeAndPVC.pvc.Labels[templateLabelKey]; !ok { - // actual is missing a key, no match - return false - } else if templateLabelValue != actualValue { - // values differ, no match - return false - } - } - - if !reflect.DeepEqual(pvcTemplate.Spec.AccessModes, actualVolumeAndPVC.pvc.Spec.AccessModes) { - return false - } - - if !reflect.DeepEqual(pvcTemplate.Spec.Resources, actualVolumeAndPVC.pvc.Spec.Resources) { - return false - } - - // this may be set to nil to be defaulted, so here we're assuming that the storage class name - // may have been defaulted. this may cause an unintended match, which can be worked around by - // being explicit in the pvc template spec. - if pvcTemplate.Spec.StorageClassName != nil && - !reflect.DeepEqual(pvcTemplate.Spec.StorageClassName, actualVolumeAndPVC.pvc.Spec.StorageClassName) { - return false - } - - if pvcTemplate.Spec.VolumeMode != nil && - !reflect.DeepEqual(pvcTemplate.Spec.VolumeMode, actualVolumeAndPVC.pvc.Spec.VolumeMode) { - return false - } - - if !reflect.DeepEqual(pvcTemplate.Spec.Selector, actualVolumeAndPVC.pvc.Spec.Selector) { - return false - } - - return true -} diff --git a/operators/pkg/controller/elasticsearch/mutation/mutation.go b/operators/pkg/controller/elasticsearch/mutation/mutation.go deleted file mode 100644 index 10f488eecf..0000000000 --- a/operators/pkg/controller/elasticsearch/mutation/mutation.go +++ /dev/null @@ -1,11 +0,0 @@ -// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one -// or more contributor license agreements. Licensed under the Elastic License; -// you may not use this file except in compliance with the Elastic License. - -package mutation - -import logf "sigs.k8s.io/controller-runtime/pkg/runtime/log" - -var ( - log = logf.Log.WithName("mutation") -) diff --git a/operators/pkg/controller/elasticsearch/mutation/performable.go b/operators/pkg/controller/elasticsearch/mutation/performable.go deleted file mode 100644 index a3563b26b8..0000000000 --- a/operators/pkg/controller/elasticsearch/mutation/performable.go +++ /dev/null @@ -1,120 +0,0 @@ -// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one -// or more contributor license agreements. Licensed under the Elastic License; -// you may not use this file except in compliance with the Elastic License. - -package mutation - -import ( - "github.com/elastic/cloud-on-k8s/operators/pkg/apis/elasticsearch/v1alpha1" -) - -var ( - // pass1ChangeBudget is a very restrictive change budget - // used for the first pass when calculating performable changes - pass1ChangeBudget = v1alpha1.ChangeBudget{} -) - -// PerformableChanges contains changes that can be performed to pod resources -type PerformableChanges struct { - // Changes that can be safely performed - Changes - - // informational values - // RestrictedPods are pods that were prevented from being scheduled for deletion - RestrictedPods map[string]error - // MaxSurgeGroups are groups that hit their max surge. - MaxSurgeGroups []string - // MaxUnavailableGroups are groups that hit their max unavailable number. - MaxUnavailableGroups []string -} - -// initializePerformableChanges initializes nil values in PerformableChanges -func initializePerformableChanges(changes PerformableChanges) PerformableChanges { - if changes.RestrictedPods == nil { - changes.RestrictedPods = make(map[string]error) - } - return changes -} - -// CalculatePerformableChanges calculates which changes can be performed in the current state. -func CalculatePerformableChanges( - strategy v1alpha1.UpdateStrategy, - allPodChanges Changes, - allPodsState PodsState, -) (*PerformableChanges, error) { - performableChanges := initializePerformableChanges(PerformableChanges{}) - - // resolve the change budget - budget := strategy.ResolveChangeBudget() - - // allChanges is a ChangeGroup that contains all the changes in a single group - allChanges := ChangeGroup{ - Name: AllGroupName, - Changes: allPodChanges, - PodsState: allPodsState, - } - - // group all our changes into groups based on the potentially user-specified groups - changeGroups, err := allPodChanges.Group(strategy.Groups, allPodsState) - if err != nil { - return nil, err - } - log.V(1).Info("Created change groups", "name", AllGroupName, "count", len(changeGroups)) - - podRestrictions := NewPodRestrictions(allPodsState) - - // pass 1: - // - give every group a chance to perform changes, but do not allow for any surge or unavailability. this is - // intended to ensure that we're able to recover from larger failures (e.g a pod failing or a failure domain - // falling apart). this is to ensure that the surge/unavailability room that's created by the failing pods do not - // get eaten up other, simultaneous changes. - if err := changeGroups.calculatePerformableChanges( - pass1ChangeBudget, - &podRestrictions, - &performableChanges, - ); err != nil { - return nil, err - } - - // apply the performable changes to the "all" (ungrouped) change group. this is done in order to account for the - // changes pass 1 is intending to do. - allChanges.simulatePerformableChangesApplied(performableChanges) - - // pass 2: - // - calculate the performable changes using the proper budget. - if err := allChanges.calculatePerformableChanges( - budget, - &podRestrictions, - &performableChanges, - ); err != nil { - return nil, err - } - - // pass 3: - // - in which we allow breaking the surge budget if we have changes we would like to apply, but were not allowed to - // due to the surge budget - // - this is required for scenarios such as converting from one MasterData node to one Master and One Data node. In - // this situation we *must* create both new nodes before we delete the existing one - // TODO: consider requiring this being enabled in the update strategy? - if !allChanges.Changes.IsEmpty() && - !performableChanges.HasChanges() && - !allPodsState.HasPodsInTransientStates() { - - changeStats := allChanges.ChangeStats() - newBudget := v1alpha1.ChangeBudget{ - MaxSurge: changeStats.CurrentSurge + 1, - } - - // - here we do not have to simulate performing changes because we know it has no changes - - if err := allChanges.calculatePerformableChanges( - newBudget, - &podRestrictions, - &performableChanges, - ); err != nil { - return nil, err - } - } - - return &performableChanges, nil -} diff --git a/operators/pkg/controller/elasticsearch/mutation/performable_test.go b/operators/pkg/controller/elasticsearch/mutation/performable_test.go deleted file mode 100644 index 787ac7fa0f..0000000000 --- a/operators/pkg/controller/elasticsearch/mutation/performable_test.go +++ /dev/null @@ -1,354 +0,0 @@ -// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one -// or more contributor license agreements. Licensed under the Elastic License; -// you may not use this file except in compliance with the Elastic License. - -package mutation - -import ( - "fmt" - "testing" - - "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/pod" - - "github.com/elastic/cloud-on-k8s/operators/pkg/apis/elasticsearch/v1alpha1" - "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/label" - "github.com/stretchr/testify/assert" - corev1 "k8s.io/api/core/v1" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" -) - -func TestPerformableChanges_HasChanges(t *testing.T) { - tests := []struct { - name string - changes PerformableChanges - want bool - }{ - {name: "empty", changes: PerformableChanges{}, want: false}, - { - name: "creation", - changes: PerformableChanges{Changes: Changes{ToCreate: []PodToCreate{{}}}}, - want: true, - }, - { - name: "deletion", - changes: PerformableChanges{Changes: Changes{ToDelete: pod.PodsWithConfig{{}}}}, - want: true, - }, - { - name: "creation and deletion", - changes: PerformableChanges{ - Changes: Changes{ - ToCreate: []PodToCreate{{}}, - ToDelete: pod.PodsWithConfig{{}}, - }, - }, - want: true, - }, - } - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - c := tt.changes - if got := c.HasChanges(); got != tt.want { - t.Errorf("PerformableChanges.HasChanges() = %v, want %v", got, tt.want) - } - }) - } -} - -func generatePodsN(n int, namePrefix string, labels map[string]string) pod.PodsWithConfig { - pods := make(pod.PodsWithConfig, n) - for i := range pods { - pods[i] = withLabels(namedPod(fmt.Sprintf("%s%d", namePrefix, i)), labels) - } - return pods -} - -func podListToMap(pods []corev1.Pod) map[string]corev1.Pod { - result := make(map[string]corev1.Pod) - for _, pod := range pods { - result[pod.Name] = pod - } - return result -} - -func concatPodList(podLists ...pod.PodsWithConfig) pod.PodsWithConfig { - res := make(pod.PodsWithConfig, 0) - for _, pods := range podLists { - res = append(res, pods...) - } - return res -} - -func podToCreateList(pods []corev1.Pod) []PodToCreate { - res := make([]PodToCreate, 0, len(pods)) - for _, p := range pods { - res = append(res, PodToCreate{Pod: p}) - } - return res -} - -func TestCalculatePerformableChanges(t *testing.T) { - podsA := generatePodsN(4, "a-", map[string]string{"zone": "a"}) - podsB := generatePodsN(4, "b-", map[string]string{"zone": "b"}) - podsC := generatePodsN(4, "c-", map[string]string{"zone": "c"}) - - updateStrategyWithZonesAsGroups := v1alpha1.UpdateStrategy{ - Groups: []v1alpha1.GroupingDefinition{ - {Selector: metav1.LabelSelector{MatchLabels: map[string]string{"zone": "a"}}}, - {Selector: metav1.LabelSelector{MatchLabels: map[string]string{"zone": "b"}}}, - {Selector: metav1.LabelSelector{MatchLabels: map[string]string{"zone": "c"}}}, - }, - } - - masterDataLabels := label.NodeTypesDataLabelName.AsMap(true) - label.NodeTypesMasterLabelName.Set(true, masterDataLabels) - - masterDataPods := generatePodsN(2, "master-data-", masterDataLabels) - masterPods := generatePodsN(2, "master-", label.NodeTypesMasterLabelName.AsMap(true)) - dataPods := generatePodsN(2, "data-", label.NodeTypesDataLabelName.AsMap(true)) - - type args struct { - strategy v1alpha1.UpdateStrategy - allPodChanges Changes - allPodsState PodsState - } - - tests := []struct { - name string - args args - want PerformableChanges - wantErr bool - }{ - { - name: "3 dying pods", - args: args{ - strategy: v1alpha1.UpdateStrategy{}, - allPodChanges: Changes{ - ToCreate: podToCreateList(generatePodsN(3, "new-", map[string]string{"zone": "a"}).Pods()), - }, - allPodsState: initializePodsState(PodsState{ - Deleting: podListToMap(generatePodsN(3, "old-", map[string]string{"zone": "a"}).Pods()), - }), - }, - want: initializePerformableChanges(PerformableChanges{ - Changes: Changes{ - ToCreate: podToCreateList(generatePodsN(1, "new-", map[string]string{"zone": "a"}).Pods()), - }, - MaxSurgeGroups: []string{UnmatchedGroupName, AllGroupName}, - }), - }, - { - name: "scale down two pods", - args: args{ - strategy: v1alpha1.UpdateStrategy{}, - allPodChanges: Changes{ - ToKeep: concatPodList(podsA[:2], podsC[:2]), - ToDelete: concatPodList(podsB[:2]), - }, - allPodsState: initializePodsState(PodsState{ - RunningReady: podListToMap(concatPodList(podsA[:2], podsB[:2], podsC[:2]).Pods()), - }), - }, - want: initializePerformableChanges(PerformableChanges{ - Changes: Changes{ - ToDelete: concatPodList(podsB[:2]), - }, - }), - }, - { - name: "basic scale-down with a failed zone", - args: args{ - strategy: v1alpha1.UpdateStrategy{}, - allPodChanges: Changes{ - ToKeep: concatPodList(podsA[:2], podsC[:2]), - ToDelete: concatPodList(podsB[:2]), - }, - allPodsState: initializePodsState(PodsState{ - RunningReady: podListToMap(concatPodList(podsA[:2], podsC[:2]).Pods()), - Terminal: podListToMap(podsB[:2].Pods()), - }), - }, - want: initializePerformableChanges(PerformableChanges{ - Changes: Changes{ - ToDelete: concatPodList(podsB[:2]), - }, - }), - }, - { - name: "scale-down with groups", - args: args{ - strategy: updateStrategyWithZonesAsGroups, - allPodChanges: Changes{ - ToKeep: concatPodList(podsA[:2], podsC[:2]), - ToDelete: concatPodList(podsB[:2]), - }, - allPodsState: initializePodsState(PodsState{ - RunningReady: podListToMap(concatPodList(podsA[:2], podsC[:2]).Pods()), - Terminal: podListToMap(podsB[:2].Pods()), - }), - }, - want: initializePerformableChanges(PerformableChanges{ - Changes: Changes{ - ToDelete: concatPodList(podsB[:2]), - }, - }), - }, - { - name: "multi-zone failure recovery during rolling change without groups", - args: args{ - strategy: v1alpha1.UpdateStrategy{}, - allPodChanges: Changes{ - ToCreate: podToCreateList(concatPodList(podsA[2:4], podsB[2:4], podsC[2:4]).Pods()), - ToKeep: concatPodList(), - ToDelete: concatPodList(podsA[:2], podsB[:2], podsC[:2]), - }, - allPodsState: initializePodsState(PodsState{ - RunningReady: podListToMap(concatPodList(podsA[:2], podsC[:2]).Pods()), - Terminal: podListToMap(podsB[:2].Pods()), - }), - }, - want: initializePerformableChanges(PerformableChanges{ - Changes: Changes{ - // note that this is not an optimal solution, as zone B is now completely down and we used our change - // budget trying to rotate nodes in A - // but since no groups where specified, we have no knowledge of a "zone B" - ToCreate: []PodToCreate{{Pod: podsA[2].Pod}, {Pod: podsA[3].Pod}}, - ToDelete: concatPodList(podsB[:2]), - }, - MaxSurgeGroups: []string{UnmatchedGroupName, AllGroupName}, - MaxUnavailableGroups: []string{UnmatchedGroupName, AllGroupName}, - }), - }, - { - name: "multi-zone failure recovery during rolling change with groups", - args: args{ - strategy: updateStrategyWithZonesAsGroups, - allPodChanges: Changes{ - ToCreate: podToCreateList(concatPodList(podsA[2:4], podsB[2:4], podsC[2:4]).Pods()), - ToKeep: concatPodList(), - ToDelete: concatPodList(podsA[:2], podsB[:2], podsC[:2]), - }, - allPodsState: initializePodsState(PodsState{ - RunningReady: podListToMap(concatPodList(podsA[:2], podsC[:2]).Pods()), - Terminal: podListToMap(podsB[:2].Pods()), - }), - }, - want: initializePerformableChanges(PerformableChanges{ - Changes: Changes{ - // we might have expected podsA[2] be be created here, but it can't be. why? - // trivia: which phase does a terminal pod (failed/succeeded) go to when a delete issued? - ToCreate: []PodToCreate{{Pod: podsB[2].Pod}, {Pod: podsB[3].Pod}}, - ToDelete: concatPodList(podsB[:2]), - }, - - MaxSurgeGroups: []string{indexedGroupName(0), indexedGroupName(2), AllGroupName}, - MaxUnavailableGroups: []string{indexedGroupName(0), indexedGroupName(2), AllGroupName}, - }), - }, - { - name: "cannot end up without master or data nodes when removing nodes", - args: args{ - strategy: updateStrategyWithZonesAsGroups, - allPodChanges: Changes{ - ToKeep: concatPodList(), - ToDelete: concatPodList(masterPods, dataPods), - }, - allPodsState: initializePodsState(PodsState{ - RunningReady: podListToMap(concatPodList(masterPods, dataPods).Pods()), - }), - }, - want: initializePerformableChanges(PerformableChanges{ - Changes: Changes{ - ToDelete: concatPodList(masterPods[:1], dataPods[:1]), - }, - RestrictedPods: map[string]error{ - masterPods[1].Pod.Name: ErrNotEnoughMasterEligiblePods, - dataPods[1].Pod.Name: ErrNotEnoughDataEligiblePods, - }, - }), - }, - { - name: "going from mdi node to dedicated m/d nodes", - args: args{ - strategy: updateStrategyWithZonesAsGroups, - allPodChanges: Changes{ - ToCreate: podToCreateList(concatPodList(masterPods[:1], dataPods[:1]).Pods()), - ToKeep: concatPodList(), - ToDelete: concatPodList(masterDataPods[:1]), - }, - allPodsState: initializePodsState(PodsState{ - RunningReady: podListToMap(concatPodList(masterDataPods[:1]).Pods()), - }), - }, - want: initializePerformableChanges(PerformableChanges{ - Changes: Changes{ - ToCreate: []PodToCreate{{Pod: masterPods[0].Pod}, {Pod: dataPods[0].Pod}}, - }, - RestrictedPods: map[string]error{ - masterDataPods[0].Pod.Name: ErrNotEnoughMasterEligiblePods, - }, - MaxSurgeGroups: []string{UnmatchedGroupName}, - }), - }, - { - name: "going from dedicated m/d nodes to mdi node", - args: args{ - strategy: updateStrategyWithZonesAsGroups, - allPodChanges: Changes{ - ToCreate: podToCreateList(concatPodList(masterDataPods[:1]).Pods()), - ToKeep: concatPodList(), - ToDelete: concatPodList(masterPods[:1], dataPods[:1]), - }, - allPodsState: initializePodsState(PodsState{ - RunningReady: podListToMap(concatPodList(masterPods[:1], dataPods[:1]).Pods()), - }), - }, - want: initializePerformableChanges(PerformableChanges{ - Changes: Changes{ - ToCreate: []PodToCreate{{Pod: masterDataPods[0].Pod}}, - }, - RestrictedPods: map[string]error{ - masterPods[0].Pod.Name: ErrNotEnoughMasterEligiblePods, - dataPods[0].Pod.Name: ErrNotEnoughDataEligiblePods, - }, - MaxSurgeGroups: []string{UnmatchedGroupName, AllGroupName}, - }), - }, - { - name: "going from dedicated m/d nodes to mdi node with an existing mdi node", - args: args{ - strategy: updateStrategyWithZonesAsGroups, - allPodChanges: Changes{ - ToCreate: podToCreateList(concatPodList(masterDataPods[:1]).Pods()), - ToKeep: concatPodList(masterDataPods[1:]), - ToDelete: concatPodList(masterPods[:1], dataPods[:1]), - }, - allPodsState: initializePodsState(PodsState{ - RunningJoining: podListToMap(concatPodList(masterDataPods[1:]).Pods()), - RunningReady: podListToMap(concatPodList(masterPods[:1], dataPods[:1]).Pods()), - }), - }, - want: initializePerformableChanges(PerformableChanges{ - Changes: Changes{}, - // we have to wait for the mdi node to join before we can start deleting master/data nodes - RestrictedPods: map[string]error{ - masterPods[0].Pod.Name: ErrNotEnoughMasterEligiblePods, - dataPods[0].Pod.Name: ErrNotEnoughDataEligiblePods, - }, - MaxSurgeGroups: []string{UnmatchedGroupName, AllGroupName}, - }), - }, - } - - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - got, err := CalculatePerformableChanges(tt.args.strategy, tt.args.allPodChanges, tt.args.allPodsState) - if (err != nil) != tt.wantErr { - t.Errorf("CalculatePerformableChanges() error = %v, wantErr %v", err, tt.wantErr) - return - } - - assert.Equal(t, tt.want, *got) - }) - } -} diff --git a/operators/pkg/controller/elasticsearch/mutation/podrestrictions.go b/operators/pkg/controller/elasticsearch/mutation/podrestrictions.go deleted file mode 100644 index a43dce2b4c..0000000000 --- a/operators/pkg/controller/elasticsearch/mutation/podrestrictions.go +++ /dev/null @@ -1,74 +0,0 @@ -// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one -// or more contributor license agreements. Licensed under the Elastic License; -// you may not use this file except in compliance with the Elastic License. - -package mutation - -import ( - "errors" - - "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/label" - corev1 "k8s.io/api/core/v1" -) - -var ( - // ErrNotEnoughMasterEligiblePods is an error used if a master-eligible pod cannot be deleted. - ErrNotEnoughMasterEligiblePods = errors.New("not enough master eligible pods left") - // ErrNotEnoughDataEligiblePods is an error used if a data-eligible pod cannot be deleted. - ErrNotEnoughDataEligiblePods = errors.New("not enough data eligible pods left") -) - -// PodRestrictions can be used to verify that invariants around available pods are not broken. -type PodRestrictions struct { - MasterNodeNames map[string]struct{} - DataNodeNames map[string]struct{} -} - -// NewPodRestrictions creates a new PodRestrictions by looking at the current state of pods. -func NewPodRestrictions(podsState PodsState) PodRestrictions { - masterEligiblePods := make(map[string]struct{}) - dataEligiblePods := make(map[string]struct{}) - - // restrictions should only count master / data nodes that are known good - // this has the drawback of only being able to delete nodes when there is an elected master in the cluster. - for name, pod := range podsState.RunningReady { - if label.IsMasterNode(pod) { - masterEligiblePods[name] = empty - } - if label.IsDataNode(pod) { - dataEligiblePods[name] = empty - } - } - - return PodRestrictions{ - MasterNodeNames: masterEligiblePods, - DataNodeNames: dataEligiblePods, - } -} - -// CanDelete returns an error if the pod cannot be safely deleted -func (r *PodRestrictions) CanDelete(pod corev1.Pod) error { - switch { - case label.IsMasterNode(pod) && isTheOnly(pod.Name, r.MasterNodeNames): - return ErrNotEnoughMasterEligiblePods - case label.IsDataNode(pod) && isTheOnly(pod.Name, r.DataNodeNames): - return ErrNotEnoughDataEligiblePods - default: - return nil - } -} - -// isTheOnly returns true if the name is the only entry in the map -func isTheOnly(name string, fromMap map[string]struct{}) bool { - _, exists := fromMap[name] - if len(fromMap) == 1 && exists { - return true - } - return false -} - -// Remove removes the pod from the restrictions. -func (r *PodRestrictions) Remove(pod corev1.Pod) { - delete(r.MasterNodeNames, pod.Name) - delete(r.DataNodeNames, pod.Name) -} diff --git a/operators/pkg/controller/elasticsearch/mutation/podrestrictions_test.go b/operators/pkg/controller/elasticsearch/mutation/podrestrictions_test.go deleted file mode 100644 index 5a517e0444..0000000000 --- a/operators/pkg/controller/elasticsearch/mutation/podrestrictions_test.go +++ /dev/null @@ -1,165 +0,0 @@ -// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one -// or more contributor license agreements. Licensed under the Elastic License; -// you may not use this file except in compliance with the Elastic License. - -package mutation - -import ( - "reflect" - "testing" - - "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/label" - "github.com/stretchr/testify/assert" - corev1 "k8s.io/api/core/v1" -) - -func podListToSetLike(pods []corev1.Pod) map[string]struct{} { - result := make(map[string]struct{}) - for _, pod := range pods { - result[pod.Name] = empty - } - return result -} - -func TestNewPodRestrictions(t *testing.T) { - masterPod := withLabels(namedPod("master"), label.NodeTypesMasterLabelName.AsMap(true)).Pod - dataPod := withLabels(namedPod("data"), label.NodeTypesDataLabelName.AsMap(true)).Pod - - type args struct { - podsState PodsState - } - tests := []struct { - name string - args args - want PodRestrictions - }{ - { - name: "uses RunningReady state", - args: args{ - podsState: initializePodsState(PodsState{ - RunningReady: podListToMap([]corev1.Pod{ - namedPod("foo").Pod, - masterPod, - dataPod, - }), - }), - }, - want: PodRestrictions{ - MasterNodeNames: podListToSetLike([]corev1.Pod{masterPod}), - DataNodeNames: podListToSetLike([]corev1.Pod{dataPod}), - }, - }, - } - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - if got := NewPodRestrictions(tt.args.podsState); !reflect.DeepEqual(got, tt.want) { - t.Errorf("NewPodRestrictions() = %v, want %v", got, tt.want) - } - }) - } -} - -func TestPodRestrictions_CanDelete(t *testing.T) { - masterPod := withLabels(namedPod("master"), label.NodeTypesMasterLabelName.AsMap(true)).Pod - dataPod := withLabels(namedPod("data"), label.NodeTypesDataLabelName.AsMap(true)).Pod - - type args struct { - pod corev1.Pod - } - tests := []struct { - name string - podRestrictions PodRestrictions - args args - wantErr error - }{ - { - name: "cant delete last master node", - podRestrictions: PodRestrictions{ - MasterNodeNames: podListToSetLike([]corev1.Pod{masterPod}), - }, - args: args{ - pod: masterPod, - }, - wantErr: ErrNotEnoughMasterEligiblePods, - }, - { - name: "can delete non-last master node", - podRestrictions: PodRestrictions{ - MasterNodeNames: podListToSetLike([]corev1.Pod{masterPod, namedPod("bar").Pod}), - }, - args: args{ - pod: masterPod, - }, - }, - { - name: "cant delete last data node", - podRestrictions: PodRestrictions{ - DataNodeNames: podListToSetLike([]corev1.Pod{dataPod}), - }, - args: args{ - pod: dataPod, - }, - wantErr: ErrNotEnoughDataEligiblePods, - }, - { - name: "can delete non-last data node", - podRestrictions: PodRestrictions{ - DataNodeNames: podListToSetLike([]corev1.Pod{dataPod, namedPod("bar").Pod}), - }, - args: args{ - pod: dataPod, - }, - }, - } - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - err := tt.podRestrictions.CanDelete(tt.args.pod) - - if tt.wantErr == nil { - assert.NoError(t, err) - } else { - assert.Equal(t, tt.wantErr, err) - } - }) - } -} - -func TestPodRestrictions_Remove(t *testing.T) { - type args struct { - pod corev1.Pod - } - tests := []struct { - name string - podRestrictions PodRestrictions - args args - want PodRestrictions - }{ - { - name: "can delete", - podRestrictions: PodRestrictions{ - MasterNodeNames: podListToSetLike([]corev1.Pod{namedPod("foo").Pod, namedPod("bar").Pod}), - DataNodeNames: podListToSetLike([]corev1.Pod{namedPod("foo").Pod, namedPod("bar").Pod}), - }, - args: args{ - pod: namedPod("foo").Pod, - }, - want: PodRestrictions{ - MasterNodeNames: podListToSetLike([]corev1.Pod{namedPod("bar").Pod}), - DataNodeNames: podListToSetLike([]corev1.Pod{namedPod("bar").Pod}), - }, - }, - { - name: "can delete nonexistent without failing", - args: args{ - pod: namedPod("foo").Pod, - }, - }, - } - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - tt.podRestrictions.Remove(tt.args.pod) - - assert.Equal(t, tt.want, tt.podRestrictions) - }) - } -} diff --git a/operators/pkg/controller/elasticsearch/mutation/pods_state.go b/operators/pkg/controller/elasticsearch/mutation/pods_state.go deleted file mode 100644 index 858f00f721..0000000000 --- a/operators/pkg/controller/elasticsearch/mutation/pods_state.go +++ /dev/null @@ -1,314 +0,0 @@ -// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one -// or more contributor license agreements. Licensed under the Elastic License; -// you may not use this file except in compliance with the Elastic License. - -package mutation - -import ( - "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/observer" - "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/pod" - "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/reconcile" - corev1 "k8s.io/api/core/v1" -) - -// PodsState contains state about different pods related to a cluster. -type PodsState struct { - // Pending contains pods in the PodPending phase - Pending map[string]corev1.Pod - // RunningJoining contains pods in the PodRunning phase that are NOT part of the cluster - RunningJoining map[string]corev1.Pod - // RunningReady contains pods in the PodRunning phase that are part of the cluster - RunningReady map[string]corev1.Pod - // RunningUnknown contains pods in the PodRunning phase that may or may not be part of the cluster. This usually - // happens because we were unable to determine the current cluster state. - RunningUnknown map[string]corev1.Pod - // Unknown contains pods in the PodUnknown phase (e.g Kubelet is not reporting their status) - Unknown map[string]corev1.Pod - // Terminal contains pods in a PodFailed or PodSucceeded state. - Terminal map[string]corev1.Pod - // Deleting contains pods that have been deleted, but have not yet been fully processed for deletion. - Deleting map[string]corev1.Pod - - // MasterNodePod if non-nil is the Pod that currently is the elected master. A master might still be elected even - // if this is nil, it just means that we were unable to get it from the current observed cluster state. - MasterNodePod *corev1.Pod -} - -// NewPodsState creates a new PodsState categorizing pods based on the provided state and intended changes. -func NewPodsState( - resourcesState reconcile.ResourcesState, - observedState observer.State, -) PodsState { - podsState := NewEmptyPodsState() - - // pending Pods are pods that have been created in the API but are not scheduled or running yet. - for _, pod := range resourcesState.CurrentPodsByPhase[corev1.PodPending] { - podsState.Pending[pod.Pod.Name] = pod.Pod - } - - if observedState.ClusterState != nil { - // since we have a cluster state, attempt to categorize pods further into Joining/Ready and capture the - // MasterNodePod - nodesByName := observedState.ClusterState.NodesByNodeName() - masterNodeName := observedState.ClusterState.MasterNodeName() - - for _, pod := range resourcesState.CurrentPodsByPhase[corev1.PodRunning] { - if _, ok := nodesByName[pod.Pod.Name]; ok { - // the pod is found in the cluster state, so count it as ready - podsState.RunningReady[pod.Pod.Name] = pod.Pod - } else { - // if the pod is not found in the cluster state, we assume it's supposed to join - podsState.RunningJoining[pod.Pod.Name] = pod.Pod - } - - if pod.Pod.Name == masterNodeName { - // create a new reference here, otherwise we would be setting the master node pod to the iterator - masterNodePod := pod - podsState.MasterNodePod = &masterNodePod.Pod - } - } - } else { - // no cluster state was available, so all the pods go into the RunningUnknown state - for _, pod := range resourcesState.CurrentPodsByPhase[corev1.PodRunning] { - podsState.RunningUnknown[pod.Pod.Name] = pod.Pod - } - } - - for _, pod := range resourcesState.CurrentPodsByPhase[corev1.PodSucceeded] { - podsState.Terminal[pod.Pod.Name] = pod.Pod - } - for _, pod := range resourcesState.CurrentPodsByPhase[corev1.PodFailed] { - podsState.Terminal[pod.Pod.Name] = pod.Pod - } - for _, pod := range resourcesState.CurrentPodsByPhase[corev1.PodUnknown] { - podsState.Unknown[pod.Pod.Name] = pod.Pod - } - - // deletingPods are pods we have issued a delete request for, but haven't disappeared from the API yet - for _, pod := range resourcesState.DeletingPods { - podsState.Deleting[pod.Pod.Name] = pod.Pod - } - - return podsState -} - -// NewEmptyPodsState initializes a PodsState with empty maps. -func NewEmptyPodsState() PodsState { - return initializePodsState(PodsState{}) -} - -// initializePodsState ensures that all maps in the PodsState are non-nil -func initializePodsState(state PodsState) PodsState { - if state.Pending == nil { - state.Pending = make(map[string]corev1.Pod) - } - if state.RunningJoining == nil { - state.RunningJoining = make(map[string]corev1.Pod) - } - if state.RunningReady == nil { - state.RunningReady = make(map[string]corev1.Pod) - } - if state.RunningUnknown == nil { - state.RunningUnknown = make(map[string]corev1.Pod) - } - if state.Unknown == nil { - state.Unknown = make(map[string]corev1.Pod) - } - if state.Terminal == nil { - state.Terminal = make(map[string]corev1.Pod) - } - if state.Deleting == nil { - state.Deleting = make(map[string]corev1.Pod) - } - return state -} - -// CurrentPodsCount returns the count of pods that might be consuming resources in the Kubernetes cluster. -func (s PodsState) CurrentPodsCount() int { - return len(s.Pending) + - len(s.RunningJoining) + - len(s.RunningReady) + - len(s.RunningUnknown) + - len(s.Unknown) + - len(s.Deleting) -} - -// Partition partitions the PodsState into two: one set that contains pods in the provided Changes, and one set -// containing the rest. -func (s PodsState) Partition(changes Changes) (PodsState, PodsState) { - selected := NewEmptyPodsState() - selected.MasterNodePod = s.MasterNodePod - - remaining := s - - // no need to consider changes.ToCreate here, as they will not exist in a PodsState - for _, pods := range []pod.PodsWithConfig{changes.ToDelete, changes.ToKeep} { - var partialState PodsState - partialState, remaining = remaining.partitionByPods(pods.Pods()) - selected.mergeFrom(partialState) - } - return selected, remaining -} - -// partitionByPods partitions the PodsState into two: -// - one set that contains pods in the provided list of pods -// - one set containing the rest -func (s PodsState) partitionByPods(pods []corev1.Pod) (PodsState, PodsState) { - source := s.Copy() - - selected := NewEmptyPodsState() - selected.MasterNodePod = source.MasterNodePod - - for _, pod := range pods { - switch { - case movePodToFrom(pod, selected.Pending, source.Pending): - case movePodToFrom(pod, selected.RunningJoining, source.RunningJoining): - case movePodToFrom(pod, selected.RunningReady, source.RunningReady): - case movePodToFrom(pod, selected.RunningUnknown, source.RunningUnknown): - case movePodToFrom(pod, selected.Unknown, source.Unknown): - case movePodToFrom(pod, selected.Terminal, source.Terminal): - case movePodToFrom(pod, selected.Deleting, source.Deleting): - default: - log.Info("Unable to find pod in pods state", "pod_name", pod.Name) - } - } - - return selected, source -} - -// movePodToFrom moves a pod from one map to another if it existed in from, returning true if the pod was moved -func movePodToFrom(pod corev1.Pod, to, from map[string]corev1.Pod) bool { - if _, ok := from[pod.Name]; ok { - to[pod.Name] = pod - delete(from, pod.Name) - return true - } - return false -} - -// mergeFrom merges the provided PodsState into this one. If some pods exist in both, values in "other" take precedence. -func (s *PodsState) mergeFrom(other PodsState) { - if other.MasterNodePod != nil { - s.MasterNodePod = other.MasterNodePod - } - - mapCopy(s.Pending, other.Pending) - mapCopy(s.RunningJoining, other.RunningJoining) - mapCopy(s.RunningReady, other.RunningReady) - mapCopy(s.RunningUnknown, other.RunningUnknown) - mapCopy(s.Unknown, other.Unknown) - mapCopy(s.Terminal, other.Terminal) - mapCopy(s.Deleting, other.Deleting) -} - -// PodsStateSummary contains a shorter summary of a PodsState -type PodsStateSummary struct { - Pending []string `json:"pending,omitempty"` - RunningJoining []string `json:"runningJoining,omitempty"` - RunningReady []string `json:"runningReady,omitempty"` - RunningUnknown []string `json:"runningUnknown,omitempty"` - Unknown []string `json:"unknown,omitempty"` - Terminal []string `json:"terminal,omitempty"` - Deleting []string `json:"deleting,omitempty"` - - MasterNodeName string `json:"masterNodeName,omitEmpty"` -} - -// Summary creates a summary of PodsState, useful for debug-level printing and troubleshooting. Beware that for large -// clusters this may still be very verbose and you might consider looking at Status() instead. -func (s PodsState) Summary() PodsStateSummary { - summary := PodsStateSummary{} - - if s.MasterNodePod != nil { - summary.MasterNodeName = s.MasterNodePod.Name - } - - summary.Pending = pod.PodMapToNames(s.Pending) - summary.RunningJoining = pod.PodMapToNames(s.RunningJoining) - summary.RunningReady = pod.PodMapToNames(s.RunningReady) - summary.RunningUnknown = pod.PodMapToNames(s.RunningUnknown) - summary.Unknown = pod.PodMapToNames(s.Unknown) - summary.Terminal = pod.PodMapToNames(s.Terminal) - summary.Deleting = pod.PodMapToNames(s.Deleting) - - return summary -} - -// PodsStateStatus is a short status of a PodsState. -type PodsStateStatus struct { - Pending int `json:"pending,omitempty"` - RunningJoining int `json:"runningJoining,omitempty"` - RunningReady int `json:"runningReady,omitempty"` - RunningUnknown int `json:"runningUnknown,omitempty"` - Unknown int `json:"unknown,omitempty"` - Terminal int `json:"terminal,omitempty"` - Deleting int `json:"deleting,omitempty"` - - MasterNodeName string `json:"masterNodeName,omitEmpty"` -} - -// Status returns a short status of the state. -func (s PodsState) Status() PodsStateStatus { - status := PodsStateStatus{ - Pending: len(s.Pending), - RunningJoining: len(s.RunningJoining), - RunningReady: len(s.RunningReady), - RunningUnknown: len(s.RunningUnknown), - Unknown: len(s.Unknown), - Terminal: len(s.Terminal), - Deleting: len(s.Deleting), - } - - if s.MasterNodePod != nil { - status.MasterNodeName = s.MasterNodePod.Name - } - - return status -} - -// Copy copies the PodsState. It copies the underlying maps, but not their contents. -func (s PodsState) Copy() PodsState { - newState := PodsState{ - MasterNodePod: s.MasterNodePod, - - Pending: make(map[string]corev1.Pod, len(s.Pending)), - RunningJoining: make(map[string]corev1.Pod, len(s.RunningJoining)), - RunningReady: make(map[string]corev1.Pod, len(s.RunningReady)), - RunningUnknown: make(map[string]corev1.Pod, len(s.RunningUnknown)), - Unknown: make(map[string]corev1.Pod, len(s.Unknown)), - Terminal: make(map[string]corev1.Pod, len(s.Terminal)), - Deleting: make(map[string]corev1.Pod, len(s.Deleting)), - } - - mapCopy(newState.Pending, s.Pending) - mapCopy(newState.RunningJoining, s.RunningJoining) - mapCopy(newState.RunningReady, s.RunningReady) - mapCopy(newState.RunningUnknown, s.RunningUnknown) - mapCopy(newState.Unknown, s.Unknown) - mapCopy(newState.Terminal, s.Terminal) - mapCopy(newState.Deleting, s.Deleting) - - return newState -} - -// HasPodsInTransientStates returns true if there are pods in transient states. -// -// Transient states are: Pending, RunningJoining, RunningUnknown, Unknown, Deleting -// Non-transient states are: RunningReady, Terminal. -func (s PodsState) HasPodsInTransientStates() bool { - if len(s.Pending) > 0 || - len(s.RunningJoining) > 0 || - len(s.RunningUnknown) > 0 || - len(s.Unknown) > 0 || - len(s.Deleting) > 0 { - return true - } - return false -} - -// mapCopy copies all key/value pairs in src into dst -func mapCopy(dst, src map[string]corev1.Pod) { - for k, v := range src { - dst[k] = v - } -} diff --git a/operators/pkg/controller/elasticsearch/mutation/pods_state_test.go b/operators/pkg/controller/elasticsearch/mutation/pods_state_test.go deleted file mode 100644 index 738a888458..0000000000 --- a/operators/pkg/controller/elasticsearch/mutation/pods_state_test.go +++ /dev/null @@ -1,203 +0,0 @@ -// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one -// or more contributor license agreements. Licensed under the Elastic License; -// you may not use this file except in compliance with the Elastic License. - -package mutation - -import ( - "testing" - - "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/pod" - - "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/client" - "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/observer" - "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/reconcile" - "github.com/stretchr/testify/assert" - corev1 "k8s.io/api/core/v1" -) - -func TestNewPodsState(t *testing.T) { - exampleMasterNodePod := namedPod("master") - - type args struct { - resourcesState reconcile.ResourcesState - observedState observer.State - } - tests := []struct { - name string - args args - want PodsState - }{ - { - name: "should bucket pods into the expected states", - args: args{ - resourcesState: reconcile.ResourcesState{ - CurrentPodsByPhase: map[corev1.PodPhase]pod.PodsWithConfig{ - corev1.PodPending: {namedPod("1")}, - corev1.PodRunning: {exampleMasterNodePod, namedPod("2"), namedPod("3")}, - corev1.PodUnknown: {namedPod("5")}, - corev1.PodFailed: {namedPod("6")}, - corev1.PodSucceeded: {namedPod("7")}, - }, - DeletingPods: pod.PodsWithConfig{namedPod("8")}, - }, - observedState: observer.State{ - ClusterState: &client.ClusterState{ - MasterNode: "master-node-id", - Nodes: map[string]client.ClusterStateNode{ - "master-node-id": {Name: exampleMasterNodePod.Pod.Name}, - "a": {Name: "3"}, - }, - }, - }, - }, - want: PodsState{ - Pending: map[string]corev1.Pod{"1": namedPod("1").Pod}, - RunningJoining: map[string]corev1.Pod{"2": namedPod("2").Pod}, - RunningReady: map[string]corev1.Pod{"master": exampleMasterNodePod.Pod, "3": namedPod("3").Pod}, - RunningUnknown: map[string]corev1.Pod{}, - Unknown: map[string]corev1.Pod{"5": namedPod("5").Pod}, - Terminal: map[string]corev1.Pod{"6": namedPod("6").Pod, "7": namedPod("7").Pod}, - Deleting: map[string]corev1.Pod{"8": namedPod("8").Pod}, - - MasterNodePod: &exampleMasterNodePod.Pod, - }, - }, - { - name: "should bucket pods into the expected states when no cluster state is available", - args: args{ - resourcesState: reconcile.ResourcesState{ - CurrentPodsByPhase: map[corev1.PodPhase]pod.PodsWithConfig{ - corev1.PodPending: {namedPod("1")}, - corev1.PodRunning: {exampleMasterNodePod, namedPod("2"), namedPod("3")}, - corev1.PodUnknown: {namedPod("5")}, - corev1.PodFailed: {namedPod("6")}, - corev1.PodSucceeded: {namedPod("7")}, - }, - DeletingPods: pod.PodsWithConfig{namedPod("8")}, - }, - observedState: observer.State{}, - }, - want: PodsState{ - Pending: map[string]corev1.Pod{"1": namedPod("1").Pod}, - RunningJoining: map[string]corev1.Pod{}, - RunningReady: map[string]corev1.Pod{}, - RunningUnknown: map[string]corev1.Pod{ - "2": namedPod("2").Pod, - "master": exampleMasterNodePod.Pod, - "3": namedPod("3").Pod, - }, - Unknown: map[string]corev1.Pod{"5": namedPod("5").Pod}, - Terminal: map[string]corev1.Pod{"6": namedPod("6").Pod, "7": namedPod("7").Pod}, - Deleting: map[string]corev1.Pod{"8": namedPod("8").Pod}, - - MasterNodePod: nil, - }, - }, - } - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - got := NewPodsState(tt.args.resourcesState, tt.args.observedState) - - assert.Equal(t, tt.want, got) - }) - } -} - -func Test_NewEmptyPodsState(t *testing.T) { - s := NewEmptyPodsState() - - assert.Nil(t, s.MasterNodePod) - - assert.NotNil(t, s.Pending) - assert.NotNil(t, s.RunningJoining) - assert.NotNil(t, s.RunningReady) - assert.NotNil(t, s.RunningUnknown) - assert.NotNil(t, s.Unknown) - assert.NotNil(t, s.Terminal) - assert.NotNil(t, s.Deleting) -} - -func TestPodsState_CurrentPodsCount(t *testing.T) { - tests := []struct { - name string - podsState PodsState - want int - }{ - { - name: "should count all non-terminal pods", - podsState: PodsState{ - Pending: map[string]corev1.Pod{"1": {}}, - RunningJoining: map[string]corev1.Pod{"2": {}}, - RunningReady: map[string]corev1.Pod{"3": {}}, - RunningUnknown: map[string]corev1.Pod{"4": {}}, - Unknown: map[string]corev1.Pod{"5": {}}, - Terminal: map[string]corev1.Pod{"6": {}, "6.1": {}, "6.2": {}, "6.3": {}, "6.4": {}, "6.5": {}}, - Deleting: map[string]corev1.Pod{"7": {}}, - }, - want: 6, - }, - } - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - s := tt.podsState - if got := s.CurrentPodsCount(); got != tt.want { - t.Errorf("PodsState.CurrentPodsCount() = %v, want %v", got, tt.want) - } - }) - } -} - -func TestPodsState_Partition(t *testing.T) { - type args struct { - changes Changes - } - tests := []struct { - name string - podsState PodsState - args args - want PodsState - want1 PodsState - }{ - { - name: "a sample set", - podsState: PodsState{ - Pending: map[string]corev1.Pod{"1": namedPod("1").Pod}, - RunningJoining: map[string]corev1.Pod{"2": namedPod("2").Pod}, - RunningReady: map[string]corev1.Pod{"3": namedPod("3").Pod}, - RunningUnknown: map[string]corev1.Pod{"4": namedPod("4").Pod}, - Unknown: map[string]corev1.Pod{"5": namedPod("5").Pod}, - Terminal: map[string]corev1.Pod{"6": namedPod("6").Pod}, - Deleting: map[string]corev1.Pod{"7": namedPod("7").Pod}, - }, - args: args{ - changes: Changes{ - ToDelete: pod.PodsWithConfig{namedPod("2")}, - ToKeep: pod.PodsWithConfig{namedPod("3")}, - // expecting this to be ignored, and just kept in the remainder. - ToCreate: []PodToCreate{{Pod: namedPod("4").Pod}}, - }, - }, - want: initializePodsState(PodsState{ - RunningJoining: map[string]corev1.Pod{"2": namedPod("2").Pod}, - RunningReady: map[string]corev1.Pod{"3": namedPod("3").Pod}, - }), - want1: initializePodsState(PodsState{ - Pending: map[string]corev1.Pod{"1": namedPod("1").Pod}, - RunningUnknown: map[string]corev1.Pod{"4": namedPod("4").Pod}, - Unknown: map[string]corev1.Pod{"5": namedPod("5").Pod}, - Terminal: map[string]corev1.Pod{"6": namedPod("6").Pod}, - Deleting: map[string]corev1.Pod{"7": namedPod("7").Pod}, - }), - }, - } - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - s := tt.podsState - got, got1 := s.Partition(tt.args.changes) - - assert.Equal(t, tt.want, got, "PodsState.Partition() got") - assert.Equal(t, tt.want1, got1, "PodsState.Partition() got1") - }) - } -} diff --git a/operators/pkg/controller/elasticsearch/mutation/sorting.go b/operators/pkg/controller/elasticsearch/mutation/sorting.go deleted file mode 100644 index c1e5f53756..0000000000 --- a/operators/pkg/controller/elasticsearch/mutation/sorting.go +++ /dev/null @@ -1,74 +0,0 @@ -// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one -// or more contributor license agreements. Licensed under the Elastic License; -// you may not use this file except in compliance with the Elastic License. - -package mutation - -import ( - "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/label" - "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/pod" - corev1 "k8s.io/api/core/v1" -) - -// sortPodsByTerminalFirstMasterNodeLastAndCreationTimestampAsc sorts pods in a preferred deletion order: -// - terminal pods first -// - current master node always last -// - remaining pods by oldest first. -func sortPodsByTerminalFirstMasterNodeLastAndCreationTimestampAsc( - terminalPods map[string]corev1.Pod, - masterNode *corev1.Pod, - pods pod.PodsWithConfig, -) func(i, j int) bool { - return func(i, j int) bool { - iPod := pods[i].Pod - jPod := pods[j].Pod - - _, iIsTerminal := terminalPods[iPod.Name] - _, jIsTerminal := terminalPods[jPod.Name] - - switch { - case iIsTerminal && !jIsTerminal: - return true - case !iIsTerminal && jIsTerminal: - return false - case masterNode != nil && iPod.Name == masterNode.Name: - return false - case masterNode != nil && jPod.Name == masterNode.Name: - return true - default: - // if neither is the master node, fall back to sorting by creation timestamp, removing the oldest first. - return iPod.CreationTimestamp.Before(&jPod.CreationTimestamp) - } - } -} - -func comparePodByMasterNodesFirstThenNameAsc(iPod corev1.Pod, jPod corev1.Pod) bool { - iIsMaster := label.NodeTypesMasterLabelName.HasValue(true, iPod.Labels) - jIsMaster := label.NodeTypesMasterLabelName.HasValue(true, jPod.Labels) - - switch { - case iIsMaster && !jIsMaster: - // i is master, j is not, so i should come first - return true - case jIsMaster && !iIsMaster: - // i is not master, j is master, so j should come first - return false - default: - // neither or both are masters, sort by names - return iPod.Name < jPod.Name - } -} - -// sortPodsToCreateByMasterNodesFirstThenNameAsc sorts podToCreate in a preferred creation order: -// - master nodes first -// - by name otherwise, which is used to ensure a stable sort order. -func sortPodsToCreateByMasterNodesFirstThenNameAsc(podsToCreate []PodToCreate) func(i, j int) bool { - return func(i, j int) bool { - return comparePodByMasterNodesFirstThenNameAsc(podsToCreate[i].Pod, podsToCreate[j].Pod) - } -} - -// sortPodByCreationTimestampAsc is a sort function for a list of pods -func sortPodByCreationTimestampAsc(pods pod.PodsWithConfig) func(i, j int) bool { - return func(i, j int) bool { return pods[i].Pod.CreationTimestamp.Before(&pods[j].Pod.CreationTimestamp) } -} diff --git a/operators/pkg/controller/elasticsearch/mutation/sorting_test.go b/operators/pkg/controller/elasticsearch/mutation/sorting_test.go deleted file mode 100644 index 724e2a5aaa..0000000000 --- a/operators/pkg/controller/elasticsearch/mutation/sorting_test.go +++ /dev/null @@ -1,130 +0,0 @@ -// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one -// or more contributor license agreements. Licensed under the Elastic License; -// you may not use this file except in compliance with the Elastic License. - -package mutation - -import ( - "sort" - "testing" - "time" - - "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/pod" - - "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/label" - "github.com/stretchr/testify/assert" - corev1 "k8s.io/api/core/v1" -) - -func Test_sortPodsByMasterNodeLastAndCreationTimestampAsc(t *testing.T) { - masterNode := namedPodWithCreationTimestamp("master", time.Unix(5, 0)) - - type args struct { - terminal map[string]corev1.Pod - masterNode *pod.PodWithConfig - pods pod.PodsWithConfig - } - tests := []struct { - name string - args args - want pod.PodsWithConfig - }{ - { - name: "sample", - args: args{ - masterNode: &masterNode, - pods: pod.PodsWithConfig{ - masterNode, - namedPodWithCreationTimestamp("4", time.Unix(4, 0)), - namedPodWithCreationTimestamp("3", time.Unix(3, 0)), - namedPodWithCreationTimestamp("6", time.Unix(6, 0)), - }, - }, - want: pod.PodsWithConfig{ - namedPodWithCreationTimestamp("3", time.Unix(3, 0)), - namedPodWithCreationTimestamp("4", time.Unix(4, 0)), - namedPodWithCreationTimestamp("6", time.Unix(6, 0)), - masterNode, - }, - }, - { - name: "terminal pods first", - args: args{ - masterNode: &masterNode, - pods: pod.PodsWithConfig{ - masterNode, - namedPodWithCreationTimestamp("4", time.Unix(4, 0)), - namedPodWithCreationTimestamp("3", time.Unix(3, 0)), - namedPodWithCreationTimestamp("6", time.Unix(6, 0)), - }, - terminal: map[string]corev1.Pod{"6": namedPod("6").Pod}, - }, - want: pod.PodsWithConfig{ - namedPodWithCreationTimestamp("6", time.Unix(6, 0)), - namedPodWithCreationTimestamp("3", time.Unix(3, 0)), - namedPodWithCreationTimestamp("4", time.Unix(4, 0)), - masterNode, - }, - }, - } - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - sort.SliceStable( - tt.args.pods, - sortPodsByTerminalFirstMasterNodeLastAndCreationTimestampAsc( - tt.args.terminal, - &tt.args.masterNode.Pod, - tt.args.pods, - ), - ) - - assert.Equal(t, tt.want, tt.args.pods) - }) - } -} - -func Test_sortPodsToCreateByMasterNodesFirstThenNameAsc(t *testing.T) { - masterNode5 := PodToCreate{Pod: namedPodWithCreationTimestamp("master5", time.Unix(5, 0)).Pod} - masterNode5.Pod.Labels = label.NodeTypesMasterLabelName.AsMap(true) - masterNode6 := PodToCreate{Pod: namedPodWithCreationTimestamp("master6", time.Unix(6, 0)).Pod} - masterNode6.Pod.Labels = label.NodeTypesMasterLabelName.AsMap(true) - - type args struct { - pods []PodToCreate - } - tests := []struct { - name string - args args - want []PodToCreate - }{ - { - name: "sample", - args: args{ - pods: []PodToCreate{ - {Pod: namedPodWithCreationTimestamp("4", time.Unix(4, 0)).Pod}, - masterNode6, - {Pod: namedPodWithCreationTimestamp("3", time.Unix(3, 0)).Pod}, - masterNode5, - {Pod: namedPodWithCreationTimestamp("6", time.Unix(6, 0)).Pod}, - }, - }, - want: []PodToCreate{ - masterNode5, - masterNode6, - {Pod: namedPodWithCreationTimestamp("3", time.Unix(3, 0)).Pod}, - {Pod: namedPodWithCreationTimestamp("4", time.Unix(4, 0)).Pod}, - {Pod: namedPodWithCreationTimestamp("6", time.Unix(6, 0)).Pod}, - }, - }, - } - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - sort.SliceStable( - tt.args.pods, - sortPodsToCreateByMasterNodesFirstThenNameAsc(tt.args.pods), - ) - - assert.Equal(t, tt.want, tt.args.pods) - }) - } -} diff --git a/operators/pkg/controller/elasticsearch/nodespec/defaults.go b/operators/pkg/controller/elasticsearch/nodespec/defaults.go new file mode 100644 index 0000000000..23f7440924 --- /dev/null +++ b/operators/pkg/controller/elasticsearch/nodespec/defaults.go @@ -0,0 +1,77 @@ +// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +// or more contributor license agreements. Licensed under the Elastic License; +// you may not use this file except in compliance with the Elastic License. + +package nodespec + +import ( + "path" + + corev1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/api/resource" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + + "github.com/elastic/cloud-on-k8s/operators/pkg/controller/common/defaults" + "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/label" + "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/network" + "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/settings" + "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/user" + esvolume "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/volume" +) + +const ( + // DefaultImageRepository is the default image name without a tag + DefaultImageRepository string = "docker.elastic.co/elasticsearch/elasticsearch" + + // DefaultTerminationGracePeriodSeconds is the termination grace period for the Elasticsearch containers + DefaultTerminationGracePeriodSeconds int64 = 120 +) + +var ( + // DefaultContainerPorts are the default Elasticsearch port mappings + DefaultContainerPorts = []corev1.ContainerPort{ + {Name: "http", ContainerPort: network.HTTPPort, Protocol: corev1.ProtocolTCP}, + {Name: "transport", ContainerPort: network.TransportPort, Protocol: corev1.ProtocolTCP}, + } + + // DefaultResources for the Elasticsearch container. The JVM default heap size is 1Gi, so we + // request at least 2Gi for the container to make sure ES can work properly. + // Not applying this minimum default would make ES randomly crash (OOM) on small machines. + DefaultResources = corev1.ResourceRequirements{ + Requests: map[corev1.ResourceName]resource.Quantity{ + corev1.ResourceMemory: resource.MustParse("2Gi"), + }, + } + + // EnvVars are environment variables injected into Elasticsearch pods. + EnvVars = append( + defaults.PodDownwardEnvVars, + []corev1.EnvVar{ + {Name: settings.EnvProbePasswordFile, Value: path.Join(esvolume.ProbeUserSecretMountPath, user.InternalProbeUserName)}, + {Name: settings.EnvProbeUsername, Value: user.InternalProbeUserName}, + {Name: settings.EnvReadinessProbeProtocol, Value: "https"}, + }..., + ) +) + +// DefaultAffinity returns the default affinity for pods in a cluster. +func DefaultAffinity(esName string) *corev1.Affinity { + return &corev1.Affinity{ + // prefer to avoid two pods in the same cluster being co-located on a single node + PodAntiAffinity: &corev1.PodAntiAffinity{ + PreferredDuringSchedulingIgnoredDuringExecution: []corev1.WeightedPodAffinityTerm{ + { + Weight: 100, + PodAffinityTerm: corev1.PodAffinityTerm{ + TopologyKey: "kubernetes.io/hostname", + LabelSelector: &metav1.LabelSelector{ + MatchLabels: map[string]string{ + label.ClusterNameLabelName: esName, + }, + }, + }, + }, + }, + }, + } +} diff --git a/operators/pkg/controller/elasticsearch/nodespec/podspec.go b/operators/pkg/controller/elasticsearch/nodespec/podspec.go new file mode 100644 index 0000000000..55faf05e17 --- /dev/null +++ b/operators/pkg/controller/elasticsearch/nodespec/podspec.go @@ -0,0 +1,113 @@ +// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +// or more contributor license agreements. Licensed under the Elastic License; +// you may not use this file except in compliance with the Elastic License. + +package nodespec + +import ( + "crypto/sha256" + "fmt" + + corev1 "k8s.io/api/core/v1" + + "github.com/elastic/cloud-on-k8s/operators/pkg/apis/elasticsearch/v1alpha1" + "github.com/elastic/cloud-on-k8s/operators/pkg/controller/common/defaults" + "github.com/elastic/cloud-on-k8s/operators/pkg/controller/common/hash" + "github.com/elastic/cloud-on-k8s/operators/pkg/controller/common/keystore" + "github.com/elastic/cloud-on-k8s/operators/pkg/controller/common/version" + "github.com/elastic/cloud-on-k8s/operators/pkg/controller/common/volume" + "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/initcontainer" + "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/label" + "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/name" + "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/settings" + esvolume "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/volume" + "github.com/elastic/cloud-on-k8s/operators/pkg/utils/k8s" + "github.com/elastic/cloud-on-k8s/operators/pkg/utils/stringsutil" +) + +// BuildPodTemplateSpec builds a new PodTemplateSpec for an Elasticsearch node. +func BuildPodTemplateSpec( + es v1alpha1.Elasticsearch, + nodeSpec v1alpha1.NodeSpec, + cfg settings.CanonicalConfig, + keystoreResources *keystore.Resources, +) (corev1.PodTemplateSpec, error) { + volumes, volumeMounts := buildVolumes(es.Name, nodeSpec, keystoreResources) + labels, err := buildLabels(es, cfg, nodeSpec, keystoreResources) + if err != nil { + return corev1.PodTemplateSpec{}, err + } + + builder := defaults.NewPodTemplateBuilder(nodeSpec.PodTemplate, v1alpha1.ElasticsearchContainerName). + WithDockerImage(es.Spec.Image, stringsutil.Concat(DefaultImageRepository, ":", es.Spec.Version)) + + initContainers, err := initcontainer.NewInitContainers( + builder.Container.Image, + es.Spec.SetVMMaxMapCount, + transportCertificatesVolume(es.Name), + es.Name, + keystoreResources, + ) + if err != nil { + return corev1.PodTemplateSpec{}, err + } + + builder = builder. + WithResources(DefaultResources). + WithTerminationGracePeriod(DefaultTerminationGracePeriodSeconds). + WithPorts(DefaultContainerPorts). + WithReadinessProbe(*NewReadinessProbe()). + WithAffinity(DefaultAffinity(es.Name)). + WithEnv(EnvVars...). + WithVolumes(volumes...). + WithVolumeMounts(volumeMounts...). + WithLabels(labels). + WithInitContainers(initContainers...). + WithInitContainerDefaults() + + return builder.PodTemplate, nil +} + +func transportCertificatesVolume(esName string) volume.SecretVolume { + return volume.NewSecretVolumeWithMountPath( + name.TransportCertificatesSecret(esName), + esvolume.TransportCertificatesSecretVolumeName, + esvolume.TransportCertificatesSecretVolumeMountPath, + ) +} + +func buildLabels( + es v1alpha1.Elasticsearch, + cfg settings.CanonicalConfig, + nodeSpec v1alpha1.NodeSpec, + keystoreResources *keystore.Resources, +) (map[string]string, error) { + // label with a hash of the config to rotate the pod on config changes + unpackedCfg, err := cfg.Unpack() + if err != nil { + return nil, err + } + nodeRoles := unpackedCfg.Node + cfgHash := hash.HashObject(cfg) + + // label with version + ver, err := version.Parse(es.Spec.Version) + if err != nil { + return nil, err + } + + podLabels, err := label.NewPodLabels(k8s.ExtractNamespacedName(&es), name.StatefulSet(es.Name, nodeSpec.Name), *ver, nodeRoles, cfgHash) + if err != nil { + return nil, err + } + + if keystoreResources != nil { + // label with a checksum of the secure settings to rotate the pod on secure settings change + // TODO: use hash.HashObject instead && fix the config checksum label name? + configChecksum := sha256.New224() + _, _ = configChecksum.Write([]byte(keystoreResources.Version)) + podLabels[label.ConfigChecksumLabelName] = fmt.Sprintf("%x", configChecksum.Sum(nil)) + } + + return podLabels, nil +} diff --git a/operators/pkg/controller/elasticsearch/nodespec/podspec_test.go b/operators/pkg/controller/elasticsearch/nodespec/podspec_test.go new file mode 100644 index 0000000000..91c58c760d --- /dev/null +++ b/operators/pkg/controller/elasticsearch/nodespec/podspec_test.go @@ -0,0 +1,182 @@ +// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +// or more contributor license agreements. Licensed under the Elastic License; +// you may not use this file except in compliance with the Elastic License. + +package nodespec + +import ( + "sort" + "testing" + + commonv1alpha1 "github.com/elastic/cloud-on-k8s/operators/pkg/apis/common/v1alpha1" + "github.com/elastic/cloud-on-k8s/operators/pkg/apis/elasticsearch/v1alpha1" + "github.com/elastic/cloud-on-k8s/operators/pkg/controller/common/defaults" + "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/initcontainer" + "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/settings" + + "github.com/stretchr/testify/require" + corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" +) + +var sampleES = v1alpha1.Elasticsearch{ + ObjectMeta: metav1.ObjectMeta{ + Namespace: "namespace", + Name: "name", + Labels: map[string]string{ + "cluster-label-name": "cluster-label-value", + }, + Annotations: map[string]string{ + "cluster-annotation-name": "cluster-annotation-value", + }, + }, + Spec: v1alpha1.ElasticsearchSpec{ + Version: "7.2.0", + Nodes: []v1alpha1.NodeSpec{ + { + Name: "nodespec-1", + NodeCount: 2, + Config: &commonv1alpha1.Config{ + Data: map[string]interface{}{ + "node.attr.foo": "bar", + "node.master": "true", + "node.data": "false", + }, + }, + PodTemplate: corev1.PodTemplateSpec{ + ObjectMeta: metav1.ObjectMeta{ + Labels: map[string]string{ + "pod-template-label-name": "pod-template-label-value", + }, + Annotations: map[string]string{ + "pod-template-annotation-name": "pod-template-annotation-value", + }, + }, + Spec: corev1.PodSpec{ + Containers: []corev1.Container{ + { + Name: "additional-container", + }, + { + Name: "elasticsearch", + Env: []corev1.EnvVar{ + { + Name: "my-env", + Value: "my-value", + }, + }, + }, + }, + InitContainers: []corev1.Container{ + { + Name: "additional-init-container", + }, + }, + }, + }, + VolumeClaimTemplates: []corev1.PersistentVolumeClaim{}, + }, + { + Name: "nodespec-1", + NodeCount: 2, + }, + }, + }, +} + +func TestBuildPodTemplateSpec(t *testing.T) { + nodeSpec := sampleES.Spec.Nodes[0] + cfg, err := settings.NewMergedESConfig(sampleES.Name, *nodeSpec.Config) + require.NoError(t, err) + + actual, err := BuildPodTemplateSpec(sampleES, sampleES.Spec.Nodes[0], cfg, nil) + require.NoError(t, err) + + // build expected PodTemplateSpec + + terminationGracePeriodSeconds := DefaultTerminationGracePeriodSeconds + varFalse := false + + volumes, volumeMounts := buildVolumes(sampleES.Name, nodeSpec, nil) + // should be sorted + sort.Slice(volumes, func(i, j int) bool { return volumes[i].Name < volumes[j].Name }) + sort.Slice(volumeMounts, func(i, j int) bool { return volumeMounts[i].Name < volumeMounts[j].Name }) + + initContainers, err := initcontainer.NewInitContainers( + "docker.elastic.co/elasticsearch/elasticsearch:7.2.0", + nil, + transportCertificatesVolume(sampleES.Name), + sampleES.Name, + nil, + ) + require.NoError(t, err) + // should be patched with volume and env + for i := range initContainers { + initContainers[i].Env = append(initContainers[i].Env, defaults.PodDownwardEnvVars...) + initContainers[i].VolumeMounts = append(initContainers[i].VolumeMounts, volumeMounts...) + } + + // remove the prepare-fs init-container from comparison, it has its own volume mount logic + // that is harder to test + for i, c := range initContainers { + if c.Name == initcontainer.PrepareFilesystemContainerName { + initContainers = append(initContainers[:i], initContainers[i+1:]...) + } + } + for i, c := range actual.Spec.InitContainers { + if c.Name == initcontainer.PrepareFilesystemContainerName { + actual.Spec.InitContainers = append(actual.Spec.InitContainers[:i], actual.Spec.InitContainers[i+1:]...) + } + } + + expected := corev1.PodTemplateSpec{ + ObjectMeta: metav1.ObjectMeta{ + Labels: map[string]string{ + "common.k8s.elastic.co/type": "elasticsearch", + "elasticsearch.k8s.elastic.co/cluster-name": "name", + "elasticsearch.k8s.elastic.co/config-template-hash": "349152269", + "elasticsearch.k8s.elastic.co/node-data": "false", + "elasticsearch.k8s.elastic.co/node-ingest": "true", + "elasticsearch.k8s.elastic.co/node-master": "true", + "elasticsearch.k8s.elastic.co/node-ml": "true", + "elasticsearch.k8s.elastic.co/statefulset": "name-es-nodespec-1", + "elasticsearch.k8s.elastic.co/version": "7.2.0", + "pod-template-label-name": "pod-template-label-value", + }, + Annotations: map[string]string{ + "pod-template-annotation-name": "pod-template-annotation-value", + }, + }, + Spec: corev1.PodSpec{ + Volumes: volumes, + InitContainers: append(initContainers, corev1.Container{ + Name: "additional-init-container", + Image: "docker.elastic.co/elasticsearch/elasticsearch:7.2.0", + Env: defaults.PodDownwardEnvVars, + VolumeMounts: volumeMounts, + }), + Containers: []corev1.Container{ + { + Name: "additional-container", + }, + { + Name: "elasticsearch", + Image: "docker.elastic.co/elasticsearch/elasticsearch:7.2.0", + Ports: []corev1.ContainerPort{ + {Name: "http", HostPort: 0, ContainerPort: 9200, Protocol: "TCP", HostIP: ""}, + {Name: "transport", HostPort: 0, ContainerPort: 9300, Protocol: "TCP", HostIP: ""}, + }, + Env: append(EnvVars, corev1.EnvVar{Name: "my-env", Value: "my-value"}), + Resources: DefaultResources, + VolumeMounts: volumeMounts, + ReadinessProbe: NewReadinessProbe(), + }, + }, + TerminationGracePeriodSeconds: &terminationGracePeriodSeconds, + AutomountServiceAccountToken: &varFalse, + Affinity: DefaultAffinity(sampleES.Name), + }, + } + + require.Equal(t, expected, actual) +} diff --git a/operators/pkg/controller/elasticsearch/pod/readiness_probe.go b/operators/pkg/controller/elasticsearch/nodespec/readiness_probe.go similarity index 98% rename from operators/pkg/controller/elasticsearch/pod/readiness_probe.go rename to operators/pkg/controller/elasticsearch/nodespec/readiness_probe.go index b16baafd4c..0ba5681422 100644 --- a/operators/pkg/controller/elasticsearch/pod/readiness_probe.go +++ b/operators/pkg/controller/elasticsearch/nodespec/readiness_probe.go @@ -2,7 +2,7 @@ // or more contributor license agreements. Licensed under the Elastic License; // you may not use this file except in compliance with the Elastic License. -package pod +package nodespec import ( "path" diff --git a/operators/pkg/controller/elasticsearch/nodespec/resources.go b/operators/pkg/controller/elasticsearch/nodespec/resources.go index c62da54be6..0ca681f0f7 100644 --- a/operators/pkg/controller/elasticsearch/nodespec/resources.go +++ b/operators/pkg/controller/elasticsearch/nodespec/resources.go @@ -10,10 +10,10 @@ import ( commonv1alpha1 "github.com/elastic/cloud-on-k8s/operators/pkg/apis/common/v1alpha1" "github.com/elastic/cloud-on-k8s/operators/pkg/apis/elasticsearch/v1alpha1" + "github.com/elastic/cloud-on-k8s/operators/pkg/controller/common/keystore" "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/label" "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/settings" "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/sset" - "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/version" "github.com/elastic/cloud-on-k8s/operators/pkg/utils/k8s" ) @@ -34,7 +34,7 @@ func (l ResourcesList) StatefulSets() sset.StatefulSetList { return ssetList } -func BuildExpectedResources(es v1alpha1.Elasticsearch, podTemplateBuilder version.PodTemplateSpecBuilder) (ResourcesList, error) { +func BuildExpectedResources(es v1alpha1.Elasticsearch, keystoreResources *keystore.Resources) (ResourcesList, error) { nodesResources := make(ResourcesList, 0, len(es.Spec.Nodes)) for _, nodeSpec := range es.Spec.Nodes { @@ -49,11 +49,11 @@ func BuildExpectedResources(es v1alpha1.Elasticsearch, podTemplateBuilder versio } // build stateful set and associated headless service - statefulSet, err := sset.BuildStatefulSet(k8s.ExtractNamespacedName(&es), nodeSpec, cfg, podTemplateBuilder) + statefulSet, err := BuildStatefulSet(es, nodeSpec, cfg, keystoreResources) if err != nil { return nil, err } - headlessSvc := sset.HeadlessService(k8s.ExtractNamespacedName(&es), statefulSet.Name) + headlessSvc := HeadlessService(k8s.ExtractNamespacedName(&es), statefulSet.Name) nodesResources = append(nodesResources, Resources{ StatefulSet: statefulSet, diff --git a/operators/pkg/controller/elasticsearch/sset/build.go b/operators/pkg/controller/elasticsearch/nodespec/statefulset.go similarity index 78% rename from operators/pkg/controller/elasticsearch/sset/build.go rename to operators/pkg/controller/elasticsearch/nodespec/statefulset.go index 36a8163388..9f3ef902e0 100644 --- a/operators/pkg/controller/elasticsearch/sset/build.go +++ b/operators/pkg/controller/elasticsearch/nodespec/statefulset.go @@ -2,17 +2,18 @@ // or more contributor license agreements. Licensed under the Elastic License; // you may not use this file except in compliance with the Elastic License. -package sset +package nodespec import ( "github.com/elastic/cloud-on-k8s/operators/pkg/apis/elasticsearch/v1alpha1" "github.com/elastic/cloud-on-k8s/operators/pkg/controller/common/defaults" "github.com/elastic/cloud-on-k8s/operators/pkg/controller/common/hash" + "github.com/elastic/cloud-on-k8s/operators/pkg/controller/common/keystore" "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/label" "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/name" "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/settings" - "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/version" esvolume "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/volume" + "github.com/elastic/cloud-on-k8s/operators/pkg/utils/k8s" appsv1 "k8s.io/api/apps/v1" corev1 "k8s.io/api/core/v1" @@ -42,18 +43,23 @@ func HeadlessService(es types.NamespacedName, ssetName string) corev1.Service { } } -func BuildStatefulSet(es types.NamespacedName, nodes v1alpha1.NodeSpec, cfg settings.CanonicalConfig, podTemplateBuilder version.PodTemplateSpecBuilder) (appsv1.StatefulSet, error) { - statefulSetName := name.StatefulSet(es.Name, nodes.Name) +func BuildStatefulSet( + es v1alpha1.Elasticsearch, + nodeSpec v1alpha1.NodeSpec, + cfg settings.CanonicalConfig, + keystoreResources *keystore.Resources, +) (appsv1.StatefulSet, error) { + statefulSetName := name.StatefulSet(es.Name, nodeSpec.Name) // ssetSelector is used to match the sset pods - ssetSelector := label.NewStatefulSetLabels(es, statefulSetName) + ssetSelector := label.NewStatefulSetLabels(k8s.ExtractNamespacedName(&es), statefulSetName) // add default PVCs to the node spec - nodes.VolumeClaimTemplates = defaults.AppendDefaultPVCs( - nodes.VolumeClaimTemplates, nodes.PodTemplate.Spec, esvolume.DefaultVolumeClaimTemplates..., + nodeSpec.VolumeClaimTemplates = defaults.AppendDefaultPVCs( + nodeSpec.VolumeClaimTemplates, nodeSpec.PodTemplate.Spec, esvolume.DefaultVolumeClaimTemplates..., ) // build pod template - podTemplate, err := podTemplateBuilder(nodes, cfg) + podTemplate, err := BuildPodTemplateSpec(es, nodeSpec, cfg, keystoreResources) if err != nil { return appsv1.StatefulSet{}, err } @@ -76,7 +82,7 @@ func BuildStatefulSet(es types.NamespacedName, nodes v1alpha1.NodeSpec, cfg sett UpdateStrategy: appsv1.StatefulSetUpdateStrategy{ Type: appsv1.RollingUpdateStatefulSetStrategyType, RollingUpdate: &appsv1.RollingUpdateStatefulSetStrategy{ - Partition: &nodes.NodeCount, + Partition: &nodeSpec.NodeCount, }, }, // we don't care much about pods creation ordering, and manage deletion ordering ourselves, @@ -90,8 +96,8 @@ func BuildStatefulSet(es types.NamespacedName, nodes v1alpha1.NodeSpec, cfg sett MatchLabels: ssetSelector, }, - Replicas: &nodes.NodeCount, - VolumeClaimTemplates: nodes.VolumeClaimTemplates, + Replicas: &nodeSpec.NodeCount, + VolumeClaimTemplates: nodeSpec.VolumeClaimTemplates, Template: podTemplate, }, } diff --git a/operators/pkg/controller/elasticsearch/nodespec/volumes.go b/operators/pkg/controller/elasticsearch/nodespec/volumes.go new file mode 100644 index 0000000000..df4decfe52 --- /dev/null +++ b/operators/pkg/controller/elasticsearch/nodespec/volumes.go @@ -0,0 +1,93 @@ +// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +// or more contributor license agreements. Licensed under the Elastic License; +// you may not use this file except in compliance with the Elastic License. + +package nodespec + +import ( + corev1 "k8s.io/api/core/v1" + + "github.com/elastic/cloud-on-k8s/operators/pkg/apis/elasticsearch/v1alpha1" + "github.com/elastic/cloud-on-k8s/operators/pkg/controller/common/certificates" + "github.com/elastic/cloud-on-k8s/operators/pkg/controller/common/keystore" + "github.com/elastic/cloud-on-k8s/operators/pkg/controller/common/volume" + "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/initcontainer" + "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/name" + "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/settings" + "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/user" + esvolume "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/volume" +) + +func buildVolumes(esName string, nodeSpec v1alpha1.NodeSpec, keystoreResources *keystore.Resources) ([]corev1.Volume, []corev1.VolumeMount) { + + configVolume := settings.ConfigSecretVolume(name.StatefulSet(esName, nodeSpec.Name)) + probeSecret := volume.NewSelectiveSecretVolumeWithMountPath( + user.ElasticInternalUsersSecretName(esName), esvolume.ProbeUserVolumeName, + esvolume.ProbeUserSecretMountPath, []string{user.InternalProbeUserName}, + ) + httpCertificatesVolume := volume.NewSecretVolumeWithMountPath( + certificates.HTTPCertsInternalSecretName(name.ESNamer, esName), + esvolume.HTTPCertificatesSecretVolumeName, + esvolume.HTTPCertificatesSecretVolumeMountPath, + ) + transportCertificatesVolume := transportCertificatesVolume(esName) + unicastHostsVolume := volume.NewConfigMapVolume( + name.UnicastHostsConfigMap(esName), esvolume.UnicastHostsVolumeName, esvolume.UnicastHostsVolumeMountPath, + ) + usersSecretVolume := volume.NewSecretVolumeWithMountPath( + user.XPackFileRealmSecretName(esName), + esvolume.XPackFileRealmVolumeName, + esvolume.XPackFileRealmVolumeMountPath, + ) + scriptsVolume := volume.NewConfigMapVolumeWithMode( + name.ScriptsConfigMap(esName), + esvolume.ScriptsVolumeName, + esvolume.ScriptsVolumeMountPath, + 0755) + + // append future volumes from PVCs (not resolved to a claim yet) + persistentVolumes := make([]corev1.Volume, 0, len(nodeSpec.VolumeClaimTemplates)) + for _, claimTemplate := range nodeSpec.VolumeClaimTemplates { + persistentVolumes = append(persistentVolumes, corev1.Volume{ + Name: claimTemplate.Name, + VolumeSource: corev1.VolumeSource{ + PersistentVolumeClaim: &corev1.PersistentVolumeClaimVolumeSource{ + // actual claim name will be resolved and fixed right before pod creation + ClaimName: "claim-name-placeholder", + }, + }, + }) + } + + volumes := append( + persistentVolumes, // includes the data volume, unless specified differently in the pod template + append( + initcontainer.PluginVolumes.Volumes(), + esvolume.DefaultLogsVolume, + usersSecretVolume.Volume(), + unicastHostsVolume.Volume(), + probeSecret.Volume(), + transportCertificatesVolume.Volume(), + httpCertificatesVolume.Volume(), + scriptsVolume.Volume(), + configVolume.Volume(), + )...) + if keystoreResources != nil { + volumes = append(volumes, keystoreResources.Volume) + } + + volumeMounts := append( + initcontainer.PluginVolumes.EsContainerVolumeMounts(), + esvolume.DefaultDataVolumeMount, + esvolume.DefaultLogsVolumeMount, + usersSecretVolume.VolumeMount(), + unicastHostsVolume.VolumeMount(), + probeSecret.VolumeMount(), + transportCertificatesVolume.VolumeMount(), + httpCertificatesVolume.VolumeMount(), + scriptsVolume.VolumeMount(), + configVolume.VolumeMount(), + ) + + return volumes, volumeMounts +} diff --git a/operators/pkg/controller/elasticsearch/pod/pod.go b/operators/pkg/controller/elasticsearch/pod/pod.go deleted file mode 100644 index bc0c7fc590..0000000000 --- a/operators/pkg/controller/elasticsearch/pod/pod.go +++ /dev/null @@ -1,131 +0,0 @@ -// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one -// or more contributor license agreements. Licensed under the Elastic License; -// you may not use this file except in compliance with the Elastic License. - -package pod - -import ( - "github.com/elastic/cloud-on-k8s/operators/pkg/controller/common/keystore" - corev1 "k8s.io/api/core/v1" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - - "github.com/elastic/cloud-on-k8s/operators/pkg/apis/elasticsearch/v1alpha1" - "github.com/elastic/cloud-on-k8s/operators/pkg/controller/common/volume" - "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/client" - "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/label" - "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/network" - "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/settings" -) - -const ( - // DefaultImageRepository is the default image name without a tag - DefaultImageRepository string = "docker.elastic.co/elasticsearch/elasticsearch" - - // DefaultTerminationGracePeriodSeconds is the termination grace period for the Elasticsearch containers - DefaultTerminationGracePeriodSeconds int64 = 120 -) - -var ( - // DefaultContainerPorts are the default Elasticsearch port mappings - DefaultContainerPorts = []corev1.ContainerPort{ - {Name: "http", ContainerPort: network.HTTPPort, Protocol: corev1.ProtocolTCP}, - {Name: "transport", ContainerPort: network.TransportPort, Protocol: corev1.ProtocolTCP}, - } -) - -// DefaultAffinity returns the default affinity for pods in a cluster. -func DefaultAffinity(esName string) *corev1.Affinity { - return &corev1.Affinity{ - // prefer to avoid two pods in the same cluster being co-located on a single node - PodAntiAffinity: &corev1.PodAntiAffinity{ - PreferredDuringSchedulingIgnoredDuringExecution: []corev1.WeightedPodAffinityTerm{ - { - Weight: 100, - PodAffinityTerm: corev1.PodAffinityTerm{ - TopologyKey: "kubernetes.io/hostname", - LabelSelector: &metav1.LabelSelector{ - MatchLabels: map[string]string{ - label.ClusterNameLabelName: esName, - }, - }, - }, - }, - }, - }, - } -} - -// PodWithConfig contains a pod and its configuration -type PodWithConfig struct { - Pod corev1.Pod - Config settings.CanonicalConfig -} - -// PodsWithConfig is simply a list of PodWithConfig -type PodsWithConfig []PodWithConfig - -// Pods is a helper method to retrieve pods only (no configuration) -func (p PodsWithConfig) Pods() []corev1.Pod { - pods := make([]corev1.Pod, len(p)) - for i, withConfig := range p { - pods[i] = withConfig.Pod - } - return pods -} - -// NewPodSpecParams is used to build resources associated with an Elasticsearch Cluster -type NewPodSpecParams struct { - // Elasticsearch is the Elasticsearch cluster specification. - Elasticsearch v1alpha1.Elasticsearch - - // DiscoveryZenMinimumMasterNodes is the setting for minimum master node in Zen Discovery - DiscoveryZenMinimumMasterNodes int - - // NodeSpec is the user-provided spec to apply on the target pod - NodeSpec v1alpha1.NodeSpec - - // ESConfigVolume is the secret volume that contains elasticsearch.yml configuration - ESConfigVolume volume.SecretVolume - // UsersSecretVolume is the volume that contains x-pack configuration (users, users_roles) - UsersSecretVolume volume.SecretVolume - // ProbeUser is the user that should be used for the readiness probes. - ProbeUser client.UserAuth - // UnicastHostsVolume contains a file with the seed hosts. - UnicastHostsVolume volume.ConfigMapVolume - // KeystoreResources are k8s resources to load user-provided secure settings in the Elastisearch keystore - KeystoreResources *keystore.Resources -} - -// PodSpecContext contains a pod template and some additional context pertaining to its creation. -type PodSpecContext struct { - PodTemplate corev1.PodTemplateSpec - NodeSpec v1alpha1.NodeSpec - Config settings.CanonicalConfig -} - -// PodListToNames returns a list of pod names from the list of pods. -func PodListToNames(pods []corev1.Pod) []string { - names := make([]string, len(pods)) - for i, pod := range pods { - names[i] = pod.Name - } - return names -} - -// PodMapToNames returns a list of pod names from a map of pod names to pods -func PodMapToNames(pods map[string]corev1.Pod) []string { - names := make([]string, 0, len(pods)) - for podName := range pods { - names = append(names, podName) - } - return names -} - -// PodsByName returns a map of pod names to pods -func PodsByName(pods []corev1.Pod) map[string]corev1.Pod { - podMap := make(map[string]corev1.Pod, len(pods)) - for _, pod := range pods { - podMap[pod.Name] = pod - } - return podMap -} diff --git a/operators/pkg/controller/elasticsearch/pvc/pvc.go b/operators/pkg/controller/elasticsearch/pvc/pvc.go deleted file mode 100644 index 985c12c491..0000000000 --- a/operators/pkg/controller/elasticsearch/pvc/pvc.go +++ /dev/null @@ -1,185 +0,0 @@ -// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one -// or more contributor license agreements. Licensed under the Elastic License; -// you may not use this file except in compliance with the Elastic License. - -package pvc - -import ( - "errors" - "reflect" - - "github.com/elastic/cloud-on-k8s/operators/pkg/apis/elasticsearch/v1alpha1" - "github.com/elastic/cloud-on-k8s/operators/pkg/controller/common" - "github.com/elastic/cloud-on-k8s/operators/pkg/controller/common/version" - "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/label" - "github.com/elastic/cloud-on-k8s/operators/pkg/utils/k8s" - corev1 "k8s.io/api/core/v1" - "sigs.k8s.io/controller-runtime/pkg/client" - logf "sigs.k8s.io/controller-runtime/pkg/runtime/log" -) - -var ( - log = logf.Log.WithName("pvc") - ErrNotNodeNameLabelNotFound = errors.New("node name not found as a label on the PVC") - // PodLabelsInPVCs is the list of labels PVCs inherit from pods they are associated with - PodLabelsInPVCs = []string{ - label.ClusterNameLabelName, - common.TypeLabelName, - string(label.NodeTypesMasterLabelName), - string(label.NodeTypesIngestLabelName), - string(label.NodeTypesDataLabelName), - string(label.NodeTypesMLLabelName), - label.VersionLabelName, - } - // requiredLabelMatch is the list of labels for which PVC values must match their reference values to trigger PVC reuse - requiredLabelMatch = []string{ - label.ClusterNameLabelName, - common.TypeLabelName, - string(label.NodeTypesMasterLabelName), - string(label.NodeTypesDataLabelName), - string(label.VolumeNameLabelName), - } -) - -type OrphanedPersistentVolumeClaims struct { - orphanedPersistentVolumeClaims []corev1.PersistentVolumeClaim -} - -// ListVolumeClaims lists the persistent volume claims for the given Elasticsearch cluster. -func ListVolumeClaims(c k8s.Client, es v1alpha1.Elasticsearch) ([]corev1.PersistentVolumeClaim, error) { - labelSelector := label.NewLabelSelectorForElasticsearch(es) - // List PVC - listPVCOptions := client.ListOptions{ - Namespace: es.Namespace, - LabelSelector: labelSelector, - } - - var persistentVolumeClaims corev1.PersistentVolumeClaimList - if err := c.List(&listPVCOptions, &persistentVolumeClaims); err != nil { - return nil, err - } - return persistentVolumeClaims.Items, nil - -} - -// FindOrphanedVolumeClaims returns PVC which are not used in any Pod within a given namespace -func FindOrphanedVolumeClaims( - c k8s.Client, - es v1alpha1.Elasticsearch, -) (*OrphanedPersistentVolumeClaims, error) { - - persistentVolumeClaims, err := ListVolumeClaims(c, es) - if err != nil { - return nil, err - } - - // Maintain a map of the retrieved PVCs - pvcByName := map[string]corev1.PersistentVolumeClaim{} - for _, p := range persistentVolumeClaims { - if p.DeletionTimestamp != nil { - continue // PVC is being deleted, ignore it - } - pvcByName[p.Name] = p - } - - // List running pods - labelSelector := label.NewLabelSelectorForElasticsearch(es) - listPodSOptions := client.ListOptions{ - Namespace: es.Namespace, - LabelSelector: labelSelector, - } - - var pods corev1.PodList - if err := c.List(&listPodSOptions, &pods); err != nil { - return nil, err - } - - // Remove the PVCs that are attached - for _, p := range pods.Items { - for _, v := range p.Spec.Volumes { - if v.PersistentVolumeClaim != nil { - delete(pvcByName, v.PersistentVolumeClaim.ClaimName) - } - } - } - - // The result is the remaining list of PVC - orphanedPVCs := make([]corev1.PersistentVolumeClaim, 0, len(pvcByName)) - for _, pvc := range pvcByName { - orphanedPVCs = append(orphanedPVCs, pvc) - } - - return &OrphanedPersistentVolumeClaims{orphanedPersistentVolumeClaims: orphanedPVCs}, nil -} - -// GetOrphanedVolumeClaim extract and remove a matching existing and orphaned PVC, returns nil if none is found -func (o *OrphanedPersistentVolumeClaims) GetOrphanedVolumeClaim( - claim *corev1.PersistentVolumeClaim, -) *corev1.PersistentVolumeClaim { - for i := 0; i < len(o.orphanedPersistentVolumeClaims); i++ { - candidate := o.orphanedPersistentVolumeClaims[i] - if compareLabels(claim.Labels, candidate.Labels) && - compareStorageClass(claim, &candidate) && - compareResources(claim, &candidate) { - o.orphanedPersistentVolumeClaims = append(o.orphanedPersistentVolumeClaims[:i], o.orphanedPersistentVolumeClaims[i+1:]...) - return &candidate - } - } - return nil -} - -// TODO : Should we accept a storage with more space than needed ? -func compareResources(claim, candidate *corev1.PersistentVolumeClaim) bool { - claimStorage := claim.Spec.Resources.Requests["storage"] - candidateStorage := candidate.Spec.Resources.Requests["storage"] - return claimStorage.Cmp(candidateStorage) == 0 -} - -func compareStorageClass(claim, candidate *corev1.PersistentVolumeClaim) bool { - if claim.Spec.StorageClassName == nil { - // volumeClaimTemplate has no storageClass set: it should use the k8s cluster default - // since we don't know that default, we fallback to reusing any available volume - // from the same cluster (whatever the storage class actually is) - return true - } - return reflect.DeepEqual(claim.Spec.StorageClassName, candidate.Spec.StorageClassName) -} - -// compareLabels returns true if pvc labels match expectd pvc labels. -// It does not perform a strict comparison, but just compares the expected labels. -// Both expected pvc and existing pvc are allowed to have more labels than the expected ones. -// It also explicitly compares the Elasticsearch version, to make sure we don't -// run a old ES version with data from a newer ES version. -func compareLabels(expectedLabels map[string]string, actualLabels map[string]string) bool { - // compare subset of labels that must match - for _, k := range requiredLabelMatch { - valueInActual, existsInActual := actualLabels[k] - valueInExpected, existsInExpected := expectedLabels[k] - if !existsInExpected || !existsInActual || valueInExpected != valueInActual { - return false - } - } - // only allow pvc to be used for a same or higher version of Elasticsearch - expectedVersion, err := version.Parse(expectedLabels[label.VersionLabelName]) - if err != nil { - log.Error(err, "Invalid version in labels", "key", label.VersionLabelName, "value", label.VersionLabelName) - return false - } - actualVersion, err := version.Parse(actualLabels[label.VersionLabelName]) - if err != nil { - log.Error(err, "Invalid version in labels", "key", label.VersionLabelName, "value", label.VersionLabelName) - return false - } - if !expectedVersion.IsSameOrAfter(*actualVersion) { - // we are trying to run Elasticsearch with data from a newer version - return false - } - return true -} - -func GetPodNameFromLabels(pvc *corev1.PersistentVolumeClaim) (string, error) { - if name, ok := pvc.Labels[label.PodNameLabelName]; ok { - return name, nil - } - return "", ErrNotNodeNameLabelNotFound -} diff --git a/operators/pkg/controller/elasticsearch/pvc/pvc_test.go b/operators/pkg/controller/elasticsearch/pvc/pvc_test.go deleted file mode 100644 index 415419adaa..0000000000 --- a/operators/pkg/controller/elasticsearch/pvc/pvc_test.go +++ /dev/null @@ -1,489 +0,0 @@ -// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one -// or more contributor license agreements. Licensed under the Elastic License; -// you may not use this file except in compliance with the Elastic License. - -package pvc - -import ( - "testing" - - "github.com/elastic/cloud-on-k8s/operators/pkg/apis/elasticsearch/v1alpha1" - "github.com/elastic/cloud-on-k8s/operators/pkg/controller/common" - "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/label" - "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/name" - "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/volume" - "github.com/elastic/cloud-on-k8s/operators/pkg/utils/k8s" - "github.com/go-test/deep" - "github.com/stretchr/testify/assert" - corev1 "k8s.io/api/core/v1" - "k8s.io/apimachinery/pkg/api/resource" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "k8s.io/apimachinery/pkg/runtime" - "sigs.k8s.io/controller-runtime/pkg/client/fake" -) - -var ( - fastStorageClassname = "fast" - sampleLabels1 = map[string]string{ - common.TypeLabelName: "elasticsearch", - label.ClusterNameLabelName: "cluster-name", - string(label.NodeTypesMasterLabelName): "true", - string(label.NodeTypesMLLabelName): "true", - string(label.NodeTypesIngestLabelName): "true", - string(label.NodeTypesDataLabelName): "true", - label.VersionLabelName: "7.1.0", - label.VolumeNameLabelName: volume.ElasticsearchDataVolumeName, - } - sampleLabels2 = map[string]string{ - common.TypeLabelName: "elasticsearch", - label.ClusterNameLabelName: "another-cluster", - string(label.NodeTypesMasterLabelName): "true", - string(label.NodeTypesMLLabelName): "true", - string(label.NodeTypesIngestLabelName): "true", - string(label.NodeTypesDataLabelName): "true", - label.VersionLabelName: "7.1.0", - } -) - -func newPVC(podName string, volumeName string, sourceLabels map[string]string, - storageQty string, storageClassName *string) *corev1.PersistentVolumeClaim { - labels := make(map[string]string) - for k, v := range sourceLabels { - labels[k] = v - } - labels[label.PodNameLabelName] = podName - return &corev1.PersistentVolumeClaim{ - ObjectMeta: metav1.ObjectMeta{ - Name: name.NewPVCName(podName, volumeName), - Labels: sourceLabels, - }, - Spec: corev1.PersistentVolumeClaimSpec{ - StorageClassName: storageClassName, - Resources: corev1.ResourceRequirements{ - Limits: map[corev1.ResourceName]resource.Quantity{ - "storage": resource.MustParse(storageQty), - }, - }, - }, - } -} - -func deletePVC(pvc *corev1.PersistentVolumeClaim) *corev1.PersistentVolumeClaim { - now := metav1.Now() - pvc.DeletionTimestamp = &now - return pvc -} - -func newPod(name string, sourceLabels map[string]string) *corev1.Pod { - return &corev1.Pod{ - ObjectMeta: metav1.ObjectMeta{ - Name: name, - Labels: newPodLabel(name, sourceLabels), - }, - } -} - -func newPodLabel(podName string, sourceLabels map[string]string) map[string]string { - newMap := make(map[string]string) - for key, value := range sourceLabels { - newMap[key] = value - } - newMap[label.PodNameLabelName] = podName - return newMap -} - -func withPVC(pod *corev1.Pod, volumeName string, claimName string) *corev1.Pod { - pod.Spec.Volumes = []corev1.Volume{ - { - Name: volumeName, - VolumeSource: corev1.VolumeSource{ - PersistentVolumeClaim: &corev1.PersistentVolumeClaimVolumeSource{ - ClaimName: claimName, - ReadOnly: false, - }, - }, - }, - } - return pod -} - -func TestFindOrphanedVolumeClaims(t *testing.T) { - pvc1 := newPVC( - "elasticsearch-sample-es-2l59jptdq6", - volume.ElasticsearchDataVolumeName, - sampleLabels1, - "1Gi", - nil, - ) - pvc2 := newPVC( - "elasticsearch-sample-es-6bw9qkw77k", - volume.ElasticsearchDataVolumeName, - sampleLabels1, - "1Gi", - nil, - ) - pvc3 := newPVC( - "elasticsearch-sample-es-6qg4hmd9dj", - volume.ElasticsearchDataVolumeName, - sampleLabels2, - "1Gi", - nil, - ) - type args struct { - initialObjects []runtime.Object - es v1alpha1.Elasticsearch - } - tests := []struct { - name string - args args - want *OrphanedPersistentVolumeClaims - wantErr bool - }{ - { - name: "Simple", - args: args{ - initialObjects: []runtime.Object{ - // create 1 Pod - withPVC( - newPod("elasticsearch-sample-es-2l59jptdq6", sampleLabels1), - volume.ElasticsearchDataVolumeName, - "elasticsearch-sample-es-2l59jptdq6-"+volume.ElasticsearchDataVolumeName, - ), - // create 3 PVCs - pvc1, - pvc2, - pvc3, - }, - es: v1alpha1.Elasticsearch{ - ObjectMeta: metav1.ObjectMeta{ - Name: "elasticsearch-sample", - }, - }, - }, - want: &OrphanedPersistentVolumeClaims{[]corev1.PersistentVolumeClaim{*pvc2, *pvc3}}, - wantErr: false, - }, - { - name: "With a deleted PVC", - args: args{ - initialObjects: []runtime.Object{ - // create 1 Pod - withPVC( - newPod("elasticsearch-sample-es-2l59jptdq6", sampleLabels1), - volume.ElasticsearchDataVolumeName, - "elasticsearch-sample-es-2l59jptdq6-"+volume.ElasticsearchDataVolumeName, - ), - // create 3 PVCs, but one of them is scheduled to be deleted - pvc1, - pvc2, - deletePVC(pvc3.DeepCopy()), - }, - es: v1alpha1.Elasticsearch{ - ObjectMeta: metav1.ObjectMeta{ - Name: "elasticsearch-sample", - }, - }, - }, - want: &OrphanedPersistentVolumeClaims{[]corev1.PersistentVolumeClaim{*pvc2}}, - wantErr: false, - }, - } - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - fakeClient := k8s.WrapClient(fake.NewFakeClient(tt.args.initialObjects...)) - got, err := FindOrphanedVolumeClaims(fakeClient, tt.args.es) - if (err != nil) != tt.wantErr { - t.Errorf("FindOrphanedVolumeClaims() error = %v, wantErr %v", err, tt.wantErr) - return - } - if !assert.ElementsMatch(t, got.orphanedPersistentVolumeClaims, tt.want.orphanedPersistentVolumeClaims) { - t.Errorf("FindOrphanedVolumeClaims() = %v, want %v", got, tt.want) - } - }) - } -} - -func TestOrphanedPersistentVolumeClaims_GetOrphanedVolumeClaim(t *testing.T) { - type fields struct { - orphanedPersistentVolumeClaims []corev1.PersistentVolumeClaim - } - type args struct { - claim *corev1.PersistentVolumeClaim - } - tests := []struct { - name string - fields fields - args args - want *corev1.PersistentVolumeClaim - }{ - { - name: "Simple test with a standard storage class and 1Gi of storage", - fields: fields{ - []corev1.PersistentVolumeClaim{ - *newPVC( - "elasticsearch-sample-es-6bw9qkw77k", - volume.ElasticsearchDataVolumeName, - sampleLabels1, - "1Gi", - nil, - ), - *newPVC( - "elasticsearch-sample-es-6qg4hmd9dj", - volume.ElasticsearchDataVolumeName, - sampleLabels1, - "1Gi", - nil, - ), - }}, - args: args{ - claim: &corev1.PersistentVolumeClaim{ - ObjectMeta: metav1.ObjectMeta{ - Labels: sampleLabels1, - }, - Spec: corev1.PersistentVolumeClaimSpec{ - Resources: corev1.ResourceRequirements{ - Limits: map[corev1.ResourceName]resource.Quantity{ - "storage": resource.MustParse("1Gi"), - }, - }, - }, - }, - }, - want: newPVC( - "elasticsearch-sample-es-6bw9qkw77k", - volume.ElasticsearchDataVolumeName, - sampleLabels1, - "1Gi", - nil, - ), - }, { - name: "Labels mismatch", - fields: fields{ - []corev1.PersistentVolumeClaim{ - *newPVC( - "elasticsearch-sample-es-6bw9qkw77k", - volume.ElasticsearchDataVolumeName, - sampleLabels2, - "1Gi", - nil, - ), - *newPVC( - "elasticsearch-sample-es-6qg4hmd9dj", - volume.ElasticsearchDataVolumeName, - sampleLabels2, - "1Gi", - nil, - ), - }}, - args: args{ - claim: &corev1.PersistentVolumeClaim{ - ObjectMeta: metav1.ObjectMeta{ - Labels: sampleLabels1, - }, - Spec: corev1.PersistentVolumeClaimSpec{ - Resources: corev1.ResourceRequirements{ - Limits: map[corev1.ResourceName]resource.Quantity{ - "storage": resource.MustParse("1Gi"), - }, - }, - }, - }, - }, - want: nil, - }, { - name: "Matching storage class", - fields: fields{ - []corev1.PersistentVolumeClaim{ - *newPVC( - "elasticsearch-sample-es-6bw9qkw77k", - volume.ElasticsearchDataVolumeName, - sampleLabels1, - "1Gi", - &fastStorageClassname, - ), - *newPVC( - "elasticsearch-sample-es-6qg4hmd9dj", - volume.ElasticsearchDataVolumeName, - sampleLabels1, - "1Gi", - &fastStorageClassname, - ), - }}, - args: args{ - claim: &corev1.PersistentVolumeClaim{ - ObjectMeta: metav1.ObjectMeta{ - Labels: sampleLabels1, - }, - Spec: corev1.PersistentVolumeClaimSpec{ - StorageClassName: &fastStorageClassname, - Resources: corev1.ResourceRequirements{ - Limits: map[corev1.ResourceName]resource.Quantity{ - "storage": resource.MustParse("1024Mi"), - }, - }, - }, - }, - }, - want: newPVC( - "elasticsearch-sample-es-6bw9qkw77k", - volume.ElasticsearchDataVolumeName, - sampleLabels1, - "1Gi", - &fastStorageClassname, - ), - }, - { - name: "Storage class mismatch", - fields: fields{ - []corev1.PersistentVolumeClaim{ - *newPVC( - "elasticsearch-sample-es-6bw9qkw77k", - volume.ElasticsearchDataVolumeName, - sampleLabels1, - "1Gi", - nil, - ), - *newPVC( - "elasticsearch-sample-es-6qg4hmd9dj", - volume.ElasticsearchDataVolumeName, - sampleLabels1, - "1Gi", - nil, - ), - }}, - args: args{ - claim: &corev1.PersistentVolumeClaim{ - ObjectMeta: metav1.ObjectMeta{ - Labels: sampleLabels1, - }, - Spec: corev1.PersistentVolumeClaimSpec{ - StorageClassName: &fastStorageClassname, - Resources: corev1.ResourceRequirements{ - Limits: map[corev1.ResourceName]resource.Quantity{ - "storage": resource.MustParse("1Gi"), - }, - }, - }, - }, - }, - want: nil, - }, - { - name: "Volume name mismatch", - fields: fields{ - []corev1.PersistentVolumeClaim{ - *newPVC( - "elasticsearch-sample-es-6qg4hmd9dj", - volume.ElasticsearchDataVolumeName, - sampleLabels1, - "1Gi", - nil, - ), - }, - }, - args: args{ - claim: &corev1.PersistentVolumeClaim{ - ObjectMeta: metav1.ObjectMeta{ - Labels: func() map[string]string { - labels := make(map[string]string) - for k, v := range sampleLabels1 { - labels[k] = v - } - labels[label.VolumeNameLabelName] = "other-data" - return labels - }(), - }, - Spec: corev1.PersistentVolumeClaimSpec{ - Resources: corev1.ResourceRequirements{ - Limits: map[corev1.ResourceName]resource.Quantity{ - "storage": resource.MustParse("1Gi"), - }, - }, - }, - }, - }, - want: nil, - }, - } - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - o := &OrphanedPersistentVolumeClaims{ - orphanedPersistentVolumeClaims: tt.fields.orphanedPersistentVolumeClaims, - } - if diff := deep.Equal(o.GetOrphanedVolumeClaim(tt.args.claim), tt.want); diff != nil { - t.Error(diff) - } - }) - } -} - -func Test_compareLabels(t *testing.T) { - mergeLabels := func(labels map[string]string, mergeWith map[string]string) func() map[string]string { - return func() map[string]string { - merged := map[string]string{} - for k, v := range labels { - merged[k] = v - } - for k, v := range mergeWith { - merged[k] = v - } - return merged - } - } - tests := []struct { - name string - pvcLabels func() map[string]string - podLabels func() map[string]string - want bool - }{ - { - name: "same labels", - pvcLabels: mergeLabels(sampleLabels1, nil), - podLabels: mergeLabels(sampleLabels1, nil), - want: true, - }, - { - name: "same labels, with more on pvc", - pvcLabels: mergeLabels(sampleLabels1, map[string]string{"foo": "bar"}), - podLabels: mergeLabels(sampleLabels1, nil), - want: true, - }, - { - name: "same labels, with more on pod", - pvcLabels: mergeLabels(sampleLabels1, nil), - podLabels: mergeLabels(sampleLabels1, map[string]string{"foo": "bar"}), - want: true, - }, - { - name: "different cluster name", - pvcLabels: mergeLabels(sampleLabels1, map[string]string{label.ClusterNameLabelName: "cluster-name"}), - podLabels: mergeLabels(sampleLabels1, map[string]string{label.ClusterNameLabelName: "another-cluster"}), - want: false, - }, - { - name: "ingest vs. not ingest: ok", - pvcLabels: mergeLabels(sampleLabels1, map[string]string{string(label.NodeTypesIngestLabelName): "true"}), - podLabels: mergeLabels(sampleLabels1, map[string]string{string(label.NodeTypesIngestLabelName): "false"}), - want: true, - }, - { - name: "version on pod is higher than version on pvc: ok", - pvcLabels: mergeLabels(sampleLabels1, map[string]string{label.VersionLabelName: "7.1.0"}), - podLabels: mergeLabels(sampleLabels1, map[string]string{label.VersionLabelName: "7.2.0"}), - want: true, - }, - { - name: "version on pvc is higher than version on pod: not ok", - pvcLabels: mergeLabels(sampleLabels1, map[string]string{label.VersionLabelName: "7.2.0"}), - podLabels: mergeLabels(sampleLabels1, map[string]string{label.VersionLabelName: "7.1.0"}), - want: false, - }, - } - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - if got := compareLabels(tt.podLabels(), tt.pvcLabels()); got != tt.want { - t.Errorf("compareLabels() = %v, want %v", got, tt.want) - } - }) - } -} diff --git a/operators/pkg/controller/elasticsearch/reconcile/log.go b/operators/pkg/controller/elasticsearch/reconcile/log.go deleted file mode 100644 index cc2defa9b8..0000000000 --- a/operators/pkg/controller/elasticsearch/reconcile/log.go +++ /dev/null @@ -1,9 +0,0 @@ -// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one -// or more contributor license agreements. Licensed under the Elastic License; -// you may not use this file except in compliance with the Elastic License. - -package reconcile - -import logf "sigs.k8s.io/controller-runtime/pkg/runtime/log" - -var log = logf.Log.WithName("reconcile") diff --git a/operators/pkg/controller/elasticsearch/reconcile/resources_state.go b/operators/pkg/controller/elasticsearch/reconcile/resources_state.go index cc7dfe698f..b44c3b9dfc 100644 --- a/operators/pkg/controller/elasticsearch/reconcile/resources_state.go +++ b/operators/pkg/controller/elasticsearch/reconcile/resources_state.go @@ -5,22 +5,15 @@ package reconcile import ( - "errors" - "fmt" + corev1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/fields" + "k8s.io/apimachinery/pkg/labels" + "sigs.k8s.io/controller-runtime/pkg/client" "github.com/elastic/cloud-on-k8s/operators/pkg/apis/elasticsearch/v1alpha1" - common "github.com/elastic/cloud-on-k8s/operators/pkg/controller/common/settings" - "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/cleanup" "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/label" - "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/pod" "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/services" - "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/settings" "github.com/elastic/cloud-on-k8s/operators/pkg/utils/k8s" - corev1 "k8s.io/api/core/v1" - apierrors "k8s.io/apimachinery/pkg/api/errors" - "k8s.io/apimachinery/pkg/fields" - "k8s.io/apimachinery/pkg/labels" - "sigs.k8s.io/controller-runtime/pkg/client" ) // ResourcesState contains information about a deployments resources. @@ -29,13 +22,11 @@ type ResourcesState struct { // DeletionTimestamp tombstone set. AllPods []corev1.Pod // CurrentPods are all non-deleted Elasticsearch pods. - CurrentPods pod.PodsWithConfig + CurrentPods []corev1.Pod // CurrentPodsByPhase are all non-deleted Elasticsearch indexed by their PodPhase - CurrentPodsByPhase map[corev1.PodPhase]pod.PodsWithConfig + CurrentPodsByPhase map[corev1.PodPhase][]corev1.Pod // DeletingPods are all deleted Elasticsearch pods. - DeletingPods pod.PodsWithConfig - // PVCs are all the PVCs related to this deployment. - PVCs []corev1.PersistentVolumeClaim + DeletingPods []corev1.Pod // ExternalService is the user-facing service related to the Elasticsearch cluster. ExternalService corev1.Service } @@ -49,67 +40,27 @@ func NewResourcesStateFromAPI(c k8s.Client, es v1alpha1.Elasticsearch) (*Resourc return nil, err } - deletingPods := make(pod.PodsWithConfig, 0) - currentPods := make(pod.PodsWithConfig, 0, len(allPods)) - currentPodsByPhase := make(map[corev1.PodPhase]pod.PodsWithConfig) + deletingPods := make([]corev1.Pod, 0) + currentPods := make([]corev1.Pod, 0, len(allPods)) + currentPodsByPhase := make(map[corev1.PodPhase][]corev1.Pod) // filter out pods scheduled for deletion for _, p := range allPods { - // retrieve es configuration - config, err := settings.GetESConfigContent(c, p.Namespace, p.Labels[label.StatefulSetNameLabelName]) - if err != nil { - if apierrors.IsNotFound(err) { - // We have an ES pod for which no configuration secret can be found. - // This is rather unfortunate, since the config secret is supposed to - // be created before the pod, and we cannot take any decision if the pod - // does not have any config attached. - // - // 3 possibilities here: - if p.DeletionTimestamp != nil { - // 1. the pod was recently deleted along with its config. - // The pod is not terminated yet, but the config isn't there anymore. - // That's ok: just give it a dummy config, it will be deleted anyway. - config = settings.CanonicalConfig{CanonicalConfig: common.MustNewSingleValue("pod.deletion", "in.progress")} - } else if cleanup.IsTooYoungForGC(&p) { - // 2. the pod was created recently and the config is not there yet - // in our client cache: let's just requeue. - return nil, fmt.Errorf("configuration secret for pod %s not yet in the cache, re-queueing", p.Name) - } else { - // 3. the pod was created a while ago, and its config was deleted. - // There is no point in keeping that pod around in an inconsistent state. - // Let's return it with a dummy configuration: it should then be safely - // replaced since it will not match any expected pod. - errMsg := "no configuration secret volume found for that pod, scheduling it for deletion" - log.Error(errors.New(errMsg), "Missing secret, replacing pod", "pod", p.Name) - config = settings.CanonicalConfig{CanonicalConfig: common.MustNewSingleValue("error.pod.to.replace", errMsg)} - - } - } else { - return nil, err - } - } - podWithConfig := pod.PodWithConfig{Pod: p, Config: config} - if p.DeletionTimestamp != nil { - deletingPods = append(deletingPods, podWithConfig) + deletingPods = append(deletingPods, p) continue } - currentPods = append(currentPods, podWithConfig) + currentPods = append(currentPods, p) podsInPhase, ok := currentPodsByPhase[p.Status.Phase] if !ok { - podsInPhase = pod.PodsWithConfig{podWithConfig} + podsInPhase = []corev1.Pod{p} } else { - podsInPhase = append(podsInPhase, podWithConfig) + podsInPhase = append(podsInPhase, p) } currentPodsByPhase[p.Status.Phase] = podsInPhase } - pvcs, err := getPersistentVolumeClaims(c, es, labelSelector, nil) - if err != nil { - return nil, err - } - externalService, err := services.GetExternalService(c, es) if err != nil { return nil, err @@ -120,23 +71,12 @@ func NewResourcesStateFromAPI(c k8s.Client, es v1alpha1.Elasticsearch) (*Resourc CurrentPods: currentPods, CurrentPodsByPhase: currentPodsByPhase, DeletingPods: deletingPods, - PVCs: pvcs, ExternalService: externalService, } return &state, nil } -// FindPVCByName looks up a PVC by claim name. -func (state ResourcesState) FindPVCByName(name string) (corev1.PersistentVolumeClaim, error) { - for _, pvc := range state.PVCs { - if pvc.Name == name { - return pvc, nil - } - } - return corev1.PersistentVolumeClaim{}, fmt.Errorf("no PVC named %s found", name) -} - // getPods returns list of pods in the current namespace with a specific set of selectors. func getPods( c k8s.Client, @@ -158,25 +98,3 @@ func getPods( return podList.Items, nil } - -// getPersistentVolumeClaims returns a list of PVCs in the current namespace with a specific set of selectors. -func getPersistentVolumeClaims( - c k8s.Client, - es v1alpha1.Elasticsearch, - labelSelectors labels.Selector, - fieldSelectors fields.Selector, -) ([]corev1.PersistentVolumeClaim, error) { - var pvcs corev1.PersistentVolumeClaimList - - listOpts := client.ListOptions{ - Namespace: es.Namespace, - LabelSelector: labelSelectors, - FieldSelector: fieldSelectors, - } - - if err := c.List(&listOpts, &pvcs); err != nil { - return nil, err - } - - return pvcs.Items, nil -} diff --git a/operators/pkg/controller/elasticsearch/reconcile/resources_state_test.go b/operators/pkg/controller/elasticsearch/reconcile/resources_state_test.go deleted file mode 100644 index 2bb5ec385d..0000000000 --- a/operators/pkg/controller/elasticsearch/reconcile/resources_state_test.go +++ /dev/null @@ -1,148 +0,0 @@ -// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one -// or more contributor license agreements. Licensed under the Elastic License; -// you may not use this file except in compliance with the Elastic License. - -package reconcile - -import ( - "testing" - "time" - - "github.com/elastic/cloud-on-k8s/operators/pkg/apis/elasticsearch/v1alpha1" - common "github.com/elastic/cloud-on-k8s/operators/pkg/controller/common/settings" - "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/cleanup" - "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/label" - "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/pod" - "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/services" - "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/settings" - "github.com/elastic/cloud-on-k8s/operators/pkg/utils/k8s" - "github.com/stretchr/testify/require" - corev1 "k8s.io/api/core/v1" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "k8s.io/client-go/kubernetes/scheme" - "sigs.k8s.io/controller-runtime/pkg/client/fake" -) - -func TestNewResourcesStateFromAPI_MissingPodConfiguration(t *testing.T) { - // This test focuses on the edge case where - // no configuration secret is found for a given pod. - require.NoError(t, v1alpha1.AddToScheme(scheme.Scheme)) - ssetName := "sset" - cluster := v1alpha1.Elasticsearch{ - ObjectMeta: metav1.ObjectMeta{ - Namespace: "ns", - Name: "es", - }, - } - externalService := corev1.Service{ - ObjectMeta: metav1.ObjectMeta{ - Namespace: "ns", - Name: services.ExternalServiceName(cluster.Name), - }, - } - recentPod := corev1.Pod{ - ObjectMeta: metav1.ObjectMeta{ - Namespace: "ns", - Name: "pod", - CreationTimestamp: metav1.NewTime(time.Now()), - }, - } - oldPod := corev1.Pod{ - ObjectMeta: metav1.ObjectMeta{ - Namespace: "ns", - Name: "pod", - Labels: map[string]string{ - label.StatefulSetNameLabelName: ssetName, - }, - CreationTimestamp: metav1.NewTime(time.Now().Add(-cleanup.DeleteAfter).Add(-1 * time.Minute)), - }, - } - deletionTimestamp := metav1.NewTime(time.Now().Add(1 * time.Hour)) - deletingPod := corev1.Pod{ - ObjectMeta: metav1.ObjectMeta{ - Namespace: "ns", - Name: "pod", - Labels: map[string]string{ - label.StatefulSetNameLabelName: ssetName, - }, - CreationTimestamp: metav1.NewTime(time.Now().Add(-cleanup.DeleteAfter).Add(-1 * time.Minute)), - DeletionTimestamp: &deletionTimestamp, - }, - } - config := settings.CanonicalConfig{CanonicalConfig: common.MustNewSingleValue("a", "b")} - rendered, err := config.Render() - require.NoError(t, err) - configSecret := corev1.Secret{ - ObjectMeta: metav1.ObjectMeta{ - Namespace: "ns", - Name: settings.ConfigSecretName(ssetName), - Labels: map[string]string{ - label.ClusterNameLabelName: cluster.Name, - label.StatefulSetNameLabelName: oldPod.Name, - }, - }, - Data: map[string][]byte{ - settings.ConfigFileName: rendered, - }, - } - - tests := []struct { - name string - c k8s.Client - es v1alpha1.Elasticsearch - wantCurrentPods pod.PodsWithConfig - wantDeletingPods pod.PodsWithConfig - wantErr string - }{ - { - name: "configuration found", - c: k8s.WrapClient(fake.NewFakeClient(&cluster, &externalService, &oldPod, &configSecret)), - es: cluster, - wantCurrentPods: pod.PodsWithConfig{{Pod: oldPod, Config: config}}, - wantErr: "", - }, - { - name: "no configuration found, pod is terminating: continue with a dummy config", - c: k8s.WrapClient(fake.NewFakeClient(&cluster, &externalService, &deletingPod)), - es: cluster, - wantDeletingPods: pod.PodsWithConfig{{Pod: deletingPod, Config: settings.CanonicalConfig{CanonicalConfig: common.MustNewSingleValue( - "pod.deletion", "in.progress", - )}}}, - wantErr: "", - }, - { - name: "no configuration found, pod is recent: requeue", - c: k8s.WrapClient(fake.NewFakeClient(&cluster, &externalService, &recentPod)), - es: cluster, - wantCurrentPods: nil, - wantErr: "configuration secret for pod pod not yet in the cache, re-queueing", - }, - { - name: "no configuration found, pod is old: should be associated a dummy config for replacement", - c: k8s.WrapClient(fake.NewFakeClient(&cluster, &externalService, &oldPod)), - es: cluster, - wantCurrentPods: pod.PodsWithConfig{{Pod: oldPod, Config: settings.CanonicalConfig{CanonicalConfig: common.MustNewSingleValue( - "error.pod.to.replace", "no configuration secret volume found for that pod, scheduling it for deletion", - )}}}, - wantErr: "", - }, - } - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - got, err := NewResourcesStateFromAPI(tt.c, tt.es) - if tt.wantErr != "" { - require.EqualError(t, err, tt.wantErr) - } else { - require.NoError(t, err) - require.Equal(t, len(tt.wantCurrentPods), len(got.CurrentPods)) - if len(tt.wantCurrentPods) > 0 { - require.Equal(t, tt.wantCurrentPods[0].Config, got.CurrentPods[0].Config) - } - require.Equal(t, len(tt.wantDeletingPods), len(got.DeletingPods)) - if len(tt.wantDeletingPods) > 0 { - require.Equal(t, tt.wantDeletingPods[0].Config, got.DeletingPods[0].Config) - } - } - }) - } -} diff --git a/operators/pkg/controller/elasticsearch/reconcile/state.go b/operators/pkg/controller/elasticsearch/reconcile/state.go index b02db91547..c35a81c418 100644 --- a/operators/pkg/controller/elasticsearch/reconcile/state.go +++ b/operators/pkg/controller/elasticsearch/reconcile/state.go @@ -50,7 +50,7 @@ func (s *State) updateWithPhase( s.status.MasterNode = observedState.ClusterState.MasterNodeName() } - s.status.AvailableNodes = len(AvailableElasticsearchNodes(resourcesState.CurrentPods.Pods())) + s.status.AvailableNodes = len(AvailableElasticsearchNodes(resourcesState.CurrentPods)) s.status.Phase = phase s.status.ExternalService = resourcesState.ExternalService.Name diff --git a/operators/pkg/controller/elasticsearch/validation/upgrade_checks.go b/operators/pkg/controller/elasticsearch/validation/upgrade_checks.go index fe4fe9db28..49a7bde88b 100644 --- a/operators/pkg/controller/elasticsearch/validation/upgrade_checks.go +++ b/operators/pkg/controller/elasticsearch/validation/upgrade_checks.go @@ -9,7 +9,7 @@ import ( "github.com/elastic/cloud-on-k8s/operators/pkg/controller/common/validation" "github.com/elastic/cloud-on-k8s/operators/pkg/controller/common/version" - "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/driver" + esversion "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/version" ) const ( @@ -39,7 +39,7 @@ func validUpgradePath(ctx Context) validation.Result { return validation.OK } - v := driver.SupportedVersions(ctx.Proposed.Version) + v := esversion.SupportedVersions(ctx.Proposed.Version) if v == nil { return validation.Result{Allowed: false, Reason: unsupportedVersion(&ctx.Proposed.Version)} } diff --git a/operators/pkg/controller/elasticsearch/validation/validations.go b/operators/pkg/controller/elasticsearch/validation/validations.go index 45370692c5..b1642b74a6 100644 --- a/operators/pkg/controller/elasticsearch/validation/validations.go +++ b/operators/pkg/controller/elasticsearch/validation/validations.go @@ -14,9 +14,9 @@ import ( "github.com/elastic/cloud-on-k8s/operators/pkg/apis/elasticsearch/v1alpha1" common "github.com/elastic/cloud-on-k8s/operators/pkg/controller/common/settings" "github.com/elastic/cloud-on-k8s/operators/pkg/controller/common/validation" - "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/driver" "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/name" "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/settings" + esversion "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/version" netutil "github.com/elastic/cloud-on-k8s/operators/pkg/utils/net" "github.com/elastic/cloud-on-k8s/operators/pkg/utils/set" ) @@ -43,7 +43,7 @@ func nameLength(ctx Context) validation.Result { // supportedVersion checks if the version is supported. func supportedVersion(ctx Context) validation.Result { - if v := driver.SupportedVersions(ctx.Proposed.Version); v == nil { + if v := esversion.SupportedVersions(ctx.Proposed.Version); v == nil { return validation.Result{Allowed: false, Reason: unsupportedVersion(&ctx.Proposed.Version)} } return validation.OK diff --git a/operators/pkg/controller/elasticsearch/version/common.go b/operators/pkg/controller/elasticsearch/version/common.go deleted file mode 100644 index 5120b0773f..0000000000 --- a/operators/pkg/controller/elasticsearch/version/common.go +++ /dev/null @@ -1,220 +0,0 @@ -// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one -// or more contributor license agreements. Licensed under the Elastic License; -// you may not use this file except in compliance with the Elastic License. - -package version - -import ( - "crypto/sha256" - "fmt" - - corev1 "k8s.io/api/core/v1" - "k8s.io/apimachinery/pkg/api/resource" - - "github.com/elastic/cloud-on-k8s/operators/pkg/apis/elasticsearch/v1alpha1" - "github.com/elastic/cloud-on-k8s/operators/pkg/controller/common/certificates" - "github.com/elastic/cloud-on-k8s/operators/pkg/controller/common/defaults" - "github.com/elastic/cloud-on-k8s/operators/pkg/controller/common/hash" - "github.com/elastic/cloud-on-k8s/operators/pkg/controller/common/version" - "github.com/elastic/cloud-on-k8s/operators/pkg/controller/common/volume" - "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/initcontainer" - "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/label" - "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/name" - "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/pod" - "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/settings" - "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/user" - esvolume "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/volume" - "github.com/elastic/cloud-on-k8s/operators/pkg/utils/k8s" - "github.com/elastic/cloud-on-k8s/operators/pkg/utils/stringsutil" -) - -var ( - // DefaultResources for the Elasticsearch container. The JVM default heap size is 1Gi, so we - // request at least 2Gi for the container to make sure ES can work properly. - // Not applying this minimum default would make ES randomly crash (OOM) on small machines. - DefaultResources = corev1.ResourceRequirements{ - Requests: map[corev1.ResourceName]resource.Quantity{ - corev1.ResourceMemory: resource.MustParse("2Gi"), - }, - } -) - -// TODO: refactor -type PodTemplateSpecBuilder func(v1alpha1.NodeSpec, settings.CanonicalConfig) (corev1.PodTemplateSpec, error) - -// TODO: refactor to avoid all the params mess -func BuildPodTemplateSpec( - es v1alpha1.Elasticsearch, - nodeSpec v1alpha1.NodeSpec, - paramsTmpl pod.NewPodSpecParams, - cfg settings.CanonicalConfig, - newEnvironmentVarsFn func(p pod.NewPodSpecParams) []corev1.EnvVar, - newInitContainersFn func(imageName string, setVMMaxMapCount *bool, transportCerts volume.SecretVolume, clusterName string) ([]corev1.Container, error), -) (corev1.PodTemplateSpec, error) { - params := pod.NewPodSpecParams{ - // cluster-wide params - Elasticsearch: es, - // volumes - UsersSecretVolume: paramsTmpl.UsersSecretVolume, - ProbeUser: paramsTmpl.ProbeUser, - UnicastHostsVolume: paramsTmpl.UnicastHostsVolume, - // volume and init container for the keystore - KeystoreResources: paramsTmpl.KeystoreResources, - // pod params - NodeSpec: nodeSpec, - } - podSpecCtx, err := podSpecContext( - params, - cfg, - newEnvironmentVarsFn, - newInitContainersFn, - ) - if err != nil { - return corev1.PodTemplateSpec{}, err - } - return podSpecCtx.PodTemplate, nil -} - -// podSpecContext creates a new PodSpecContext for an Elasticsearch node -func podSpecContext( - p pod.NewPodSpecParams, - cfg settings.CanonicalConfig, - newEnvironmentVarsFn func(p pod.NewPodSpecParams) []corev1.EnvVar, - newInitContainersFn func(elasticsearchImage string, setVMMaxMapCount *bool, transportCerts volume.SecretVolume, clusterName string) ([]corev1.Container, error), -) (pod.PodSpecContext, error) { - statefulSetName := name.StatefulSet(p.Elasticsearch.Name, p.NodeSpec.Name) - - // setup volumes - probeSecret := volume.NewSelectiveSecretVolumeWithMountPath( - user.ElasticInternalUsersSecretName(p.Elasticsearch.Name), esvolume.ProbeUserVolumeName, - esvolume.ProbeUserSecretMountPath, []string{p.ProbeUser.Name}, - ) - httpCertificatesVolume := volume.NewSecretVolumeWithMountPath( - certificates.HTTPCertsInternalSecretName(name.ESNamer, p.Elasticsearch.Name), - esvolume.HTTPCertificatesSecretVolumeName, - esvolume.HTTPCertificatesSecretVolumeMountPath, - ) - transportCertificatesVolume := volume.NewSecretVolumeWithMountPath( - name.TransportCertificatesSecret(p.Elasticsearch.Name), - esvolume.TransportCertificatesSecretVolumeName, - esvolume.TransportCertificatesSecretVolumeMountPath, - ) - - ssetName := name.StatefulSet(p.Elasticsearch.Name, p.NodeSpec.Name) - configVolume := settings.ConfigSecretVolume(ssetName) - - // append future volumes from PVCs (not resolved to a claim yet) - persistentVolumes := make([]corev1.Volume, 0, len(p.NodeSpec.VolumeClaimTemplates)) - for _, claimTemplate := range p.NodeSpec.VolumeClaimTemplates { - persistentVolumes = append(persistentVolumes, corev1.Volume{ - Name: claimTemplate.Name, - VolumeSource: corev1.VolumeSource{ - PersistentVolumeClaim: &corev1.PersistentVolumeClaimVolumeSource{ - // actual claim name will be resolved and fixed right before pod creation - ClaimName: "claim-name-placeholder", - }, - }, - }) - } - - // build on top of the user-provided pod template spec - builder := defaults.NewPodTemplateBuilder(p.NodeSpec.PodTemplate, v1alpha1.ElasticsearchContainerName). - WithDockerImage(p.Elasticsearch.Spec.Image, stringsutil.Concat(pod.DefaultImageRepository, ":", p.Elasticsearch.Spec.Version)). - WithResources(DefaultResources). - WithTerminationGracePeriod(pod.DefaultTerminationGracePeriodSeconds). - WithPorts(pod.DefaultContainerPorts). - WithReadinessProbe(*pod.NewReadinessProbe()). - WithAffinity(pod.DefaultAffinity(p.Elasticsearch.Name)). - WithEnv(newEnvironmentVarsFn(p)...) - - // setup init containers - initContainers, err := newInitContainersFn( - builder.Container.Image, - p.Elasticsearch.Spec.SetVMMaxMapCount, - transportCertificatesVolume, - p.Elasticsearch.Name) - - if err != nil { - return pod.PodSpecContext{}, err - } - - scriptsVolume := volume.NewConfigMapVolumeWithMode( - name.ScriptsConfigMap(p.Elasticsearch.Name), - esvolume.ScriptsVolumeName, - esvolume.ScriptsVolumeMountPath, - 0755) - - builder = builder. - WithVolumes( - append( - persistentVolumes, // includes the data volume, unless specified differently in the pod template - append( - initcontainer.PluginVolumes.Volumes(), - esvolume.DefaultLogsVolume, - p.UsersSecretVolume.Volume(), - p.UnicastHostsVolume.Volume(), - probeSecret.Volume(), - transportCertificatesVolume.Volume(), - httpCertificatesVolume.Volume(), - scriptsVolume.Volume(), - configVolume.Volume(), - )...)...). - WithVolumeMounts( - append( - initcontainer.PluginVolumes.EsContainerVolumeMounts(), - esvolume.DefaultDataVolumeMount, - esvolume.DefaultLogsVolumeMount, - p.UsersSecretVolume.VolumeMount(), - p.UnicastHostsVolume.VolumeMount(), - probeSecret.VolumeMount(), - transportCertificatesVolume.VolumeMount(), - httpCertificatesVolume.VolumeMount(), - scriptsVolume.VolumeMount(), - configVolume.VolumeMount(), - )...) - - if p.KeystoreResources != nil { - builder = builder. - WithVolumes(p.KeystoreResources.Volume). - WithInitContainers(p.KeystoreResources.InitContainer) - } - - builder = builder. - WithInitContainers(initContainers...). - WithInitContainerDefaults() - - // set labels - version, err := version.Parse(p.Elasticsearch.Spec.Version) - if err != nil { - return pod.PodSpecContext{}, err - } - unpackedCfg, err := cfg.Unpack() - if err != nil { - return pod.PodSpecContext{}, err - } - nodeRoles := unpackedCfg.Node - // label with a hash of the config to rotate the pod on config changes - cfgHash := hash.HashObject(cfg) - podLabels, err := label.NewPodLabels(k8s.ExtractNamespacedName(&p.Elasticsearch), statefulSetName, *version, nodeRoles, cfgHash) - if err != nil { - return pod.PodSpecContext{}, err - } - if p.KeystoreResources != nil { - // label with a checksum of the secure settings to rotate the pod on secure settings change - // TODO: use hash.HashObject instead && fix the config checksum label name? - configChecksum := sha256.New224() - _, _ = configChecksum.Write([]byte(p.KeystoreResources.Version)) - podLabels[label.ConfigChecksumLabelName] = fmt.Sprintf("%x", configChecksum.Sum(nil)) - } - builder = builder.WithLabels(podLabels) - - return pod.PodSpecContext{ - NodeSpec: p.NodeSpec, - PodTemplate: builder.PodTemplate, - }, nil -} - -// quantityToMegabytes returns the megabyte value of the provided resource.Quantity -func quantityToMegabytes(q resource.Quantity) int { - return int(q.Value()) / 1024 / 1024 -} diff --git a/operators/pkg/controller/elasticsearch/version/common_test.go b/operators/pkg/controller/elasticsearch/version/common_test.go deleted file mode 100644 index 9fbfb96598..0000000000 --- a/operators/pkg/controller/elasticsearch/version/common_test.go +++ /dev/null @@ -1,422 +0,0 @@ -// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one -// or more contributor license agreements. Licensed under the Elastic License; -// you may not use this file except in compliance with the Elastic License. - -package version - -import ( - "fmt" - "testing" - - "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/env" - "github.com/stretchr/testify/require" - corev1 "k8s.io/api/core/v1" - "k8s.io/apimachinery/pkg/api/resource" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - - "github.com/elastic/cloud-on-k8s/operators/pkg/apis/elasticsearch/v1alpha1" - "github.com/elastic/cloud-on-k8s/operators/pkg/controller/common/hash" - "github.com/elastic/cloud-on-k8s/operators/pkg/controller/common/volume" - "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/pod" - "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/settings" -) - -func Test_quantityToMegabytes(t *testing.T) { - type args struct { - q resource.Quantity - } - tests := []struct { - name string - args args - want int - }{ - {name: "simple", args: args{q: resource.MustParse("2Gi")}, want: 2 * 1024}, - {name: "large", args: args{q: resource.MustParse("9Ti")}, want: 9 * 1024 * 1024}, - {name: "small", args: args{q: resource.MustParse("0.25Gi")}, want: 256}, - } - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - if got := quantityToMegabytes(tt.args.q); got != tt.want { - t.Errorf("quantityToMegabytes() = %v, want %v", got, tt.want) - } - }) - } -} - -func Test_podSpec(t *testing.T) { - // this test focuses on testing user-provided pod template overrides - // setup mocks for env vars func, es config func and init-containers func - newEnvVarsFn := func(p pod.NewPodSpecParams) []corev1.EnvVar { - return []corev1.EnvVar{ - { - Name: "var1", - Value: "value1", - }, - { - Name: "var2", - Value: "value2", - }, - } - } - newInitContainersFn := func(elasticsearchImage string, setVMMaxMapCount *bool, nodeCertificatesVolume volume.SecretVolume, clusterName string) ([]corev1.Container, error) { - return []corev1.Container{ - { - Name: "init-container1", - }, - { - Name: "init-container2", - }, - }, nil - } - varFalse := false - varTrue := true - varInt64 := int64(12) - es71 := v1alpha1.Elasticsearch{ - ObjectMeta: metav1.ObjectMeta{ - Namespace: "ns", - Name: "es71", - }, - Spec: v1alpha1.ElasticsearchSpec{ - Version: "7.1.0", - }, - } - - tests := []struct { - name string - params pod.NewPodSpecParams - assertions func(t *testing.T, specCtx pod.PodSpecContext) - }{ - { - name: "no podTemplate: default happy path", - params: pod.NewPodSpecParams{ - Elasticsearch: es71, - }, - assertions: func(t *testing.T, specCtx pod.PodSpecContext) { - podSpec := specCtx.PodTemplate.Spec - require.Equal(t, fmt.Sprintf("%s:%s", pod.DefaultImageRepository, "7.1.0"), podSpec.Containers[0].Image) - require.Equal(t, pod.DefaultTerminationGracePeriodSeconds, *podSpec.TerminationGracePeriodSeconds) - require.Equal(t, &varFalse, podSpec.AutomountServiceAccountToken) - require.NotEmpty(t, podSpec.Volumes) - require.Len(t, podSpec.InitContainers, 2) - require.Len(t, podSpec.Containers, 1) - esContainer := podSpec.Containers[0] - require.NotEmpty(t, esContainer.VolumeMounts) - require.Len(t, esContainer.Env, 2) - require.Equal(t, DefaultResources, esContainer.Resources) - require.Equal(t, pod.DefaultContainerPorts, esContainer.Ports) - require.Equal(t, pod.NewReadinessProbe(), esContainer.ReadinessProbe) - }, - }, - { - name: "custom image", - params: pod.NewPodSpecParams{ - Elasticsearch: v1alpha1.Elasticsearch{ - Spec: v1alpha1.ElasticsearchSpec{ - Image: "customImageName", - Version: "7.1.0", - }, - }, - }, - assertions: func(t *testing.T, specCtx pod.PodSpecContext) { - require.Equal(t, "customImageName", specCtx.PodTemplate.Spec.Containers[0].Image) - }, - }, - { - name: "custom termination grace period & automount sa token", - params: pod.NewPodSpecParams{ - Elasticsearch: es71, - NodeSpec: v1alpha1.NodeSpec{ - PodTemplate: corev1.PodTemplateSpec{ - Spec: corev1.PodSpec{ - TerminationGracePeriodSeconds: &varInt64, - AutomountServiceAccountToken: &varTrue, - }, - }, - }, - }, - assertions: func(t *testing.T, specCtx pod.PodSpecContext) { - require.Equal(t, &varInt64, specCtx.PodTemplate.Spec.TerminationGracePeriodSeconds) - require.Equal(t, &varTrue, specCtx.PodTemplate.Spec.AutomountServiceAccountToken) - }, - }, - { - name: "user-provided volumes & volume mounts", - params: pod.NewPodSpecParams{ - Elasticsearch: es71, - NodeSpec: v1alpha1.NodeSpec{ - PodTemplate: corev1.PodTemplateSpec{ - Spec: corev1.PodSpec{ - Volumes: []corev1.Volume{ - { - Name: "user-volume-1", - }, - { - Name: "user-volume-2", - }, - }, - Containers: []corev1.Container{ - { - Name: v1alpha1.ElasticsearchContainerName, - VolumeMounts: []corev1.VolumeMount{ - { - Name: "user-volume-mount-1", - }, - { - Name: "user-volume-mount-2", - }, - }, - }, - }, - }, - }, - }, - }, - assertions: func(t *testing.T, specCtx pod.PodSpecContext) { - podSpec := specCtx.PodTemplate.Spec - require.True(t, len(podSpec.Volumes) > 1) - foundUserVolumes := 0 - for _, v := range podSpec.Volumes { - if v.Name == "user-volume-1" || v.Name == "user-volume-2" { - foundUserVolumes++ - } - } - require.Equal(t, 2, foundUserVolumes) - foundUserVolumeMounts := 0 - for _, v := range podSpec.Containers[0].VolumeMounts { - if v.Name == "user-volume-mount-1" || v.Name == "user-volume-mount-2" { - foundUserVolumeMounts++ - } - } - require.Equal(t, 2, foundUserVolumeMounts) - }, - }, - { - name: "user-provided init containers", - params: pod.NewPodSpecParams{ - Elasticsearch: es71, - NodeSpec: v1alpha1.NodeSpec{ - PodTemplate: corev1.PodTemplateSpec{ - Spec: corev1.PodSpec{ - InitContainers: []corev1.Container{ - { - Name: "user-init-container-1", - Image: "my-custom-image", - }, - { - Name: "user-init-container-2", - VolumeMounts: []corev1.VolumeMount{{ - Name: "foo", - MountPath: "/foo", - }}, - }, - }, - }, - }, - }, - }, - assertions: func(t *testing.T, specCtx pod.PodSpecContext) { - podSpec := specCtx.PodTemplate.Spec - require.Equal(t, []corev1.Container{ - { - Name: "init-container1", - Image: podSpec.Containers[0].Image, - Env: env.DynamicPodEnvVars, - VolumeMounts: podSpec.Containers[0].VolumeMounts, - }, - { - Name: "init-container2", - Image: podSpec.Containers[0].Image, - Env: env.DynamicPodEnvVars, - VolumeMounts: podSpec.Containers[0].VolumeMounts, - }, - { - Name: "user-init-container-1", - Image: "my-custom-image", - Env: env.DynamicPodEnvVars, - VolumeMounts: podSpec.Containers[0].VolumeMounts, - }, - { - Name: "user-init-container-2", - Image: podSpec.Containers[0].Image, - Env: env.DynamicPodEnvVars, - VolumeMounts: append( - []corev1.VolumeMount{{ - Name: "foo", - MountPath: "/foo", - }}, - podSpec.Containers[0].VolumeMounts..., - ), - }, - }, podSpec.InitContainers) - }, - }, - { - name: "user-provided environment", - params: pod.NewPodSpecParams{ - Elasticsearch: es71, - NodeSpec: v1alpha1.NodeSpec{ - PodTemplate: corev1.PodTemplateSpec{ - Spec: corev1.PodSpec{ - Containers: []corev1.Container{ - { - Name: v1alpha1.ElasticsearchContainerName, - Env: []corev1.EnvVar{ - { - Name: "user-env-1", - Value: "user-env-1-value", - }, - { - Name: "user-env-2", - Value: "user-env-2-value", - }, - }, - }, - }, - }, - }, - }, - }, - assertions: func(t *testing.T, specCtx pod.PodSpecContext) { - require.Equal(t, []corev1.EnvVar{ - { - Name: "user-env-1", - Value: "user-env-1-value", - }, - { - Name: "user-env-2", - Value: "user-env-2-value", - }, - { - Name: "var1", - Value: "value1", - }, - { - Name: "var2", - Value: "value2", - }, - }, specCtx.PodTemplate.Spec.Containers[0].Env) - }, - }, - { - name: "user-provided environment overrides", - params: pod.NewPodSpecParams{ - Elasticsearch: es71, - NodeSpec: v1alpha1.NodeSpec{ - PodTemplate: corev1.PodTemplateSpec{ - Spec: corev1.PodSpec{ - Containers: []corev1.Container{ - { - Name: v1alpha1.ElasticsearchContainerName, - Env: []corev1.EnvVar{ - { - Name: "var1", - Value: "user-overridden-var-1", - }, - { - Name: "user-env-2", - Value: "user-env-2-value", - }, - }, - }, - }, - }, - }, - }, - }, - assertions: func(t *testing.T, specCtx pod.PodSpecContext) { - require.Equal(t, []corev1.EnvVar{ - { - Name: "user-env-2", - Value: "user-env-2-value", - }, - { - Name: "var1", - Value: "user-overridden-var-1", - }, - { - Name: "var2", - Value: "value2", - }, - }, specCtx.PodTemplate.Spec.Containers[0].Env) - }, - }, - { - name: "default affinity", - params: pod.NewPodSpecParams{ - Elasticsearch: v1alpha1.Elasticsearch{ - ObjectMeta: metav1.ObjectMeta{ - Name: "my-cluster", - }, - Spec: v1alpha1.ElasticsearchSpec{ - Version: "7.1.0", - }, - }, - }, - assertions: func(t *testing.T, specCtx pod.PodSpecContext) { - require.Equal(t, pod.DefaultAffinity("my-cluster"), specCtx.PodTemplate.Spec.Affinity) - }, - }, - { - name: "custom affinity", - params: pod.NewPodSpecParams{ - Elasticsearch: v1alpha1.Elasticsearch{ - ObjectMeta: metav1.ObjectMeta{ - Name: "my-cluster", - }, - Spec: v1alpha1.ElasticsearchSpec{ - Version: "7.1.0", - }, - }, - NodeSpec: v1alpha1.NodeSpec{ - PodTemplate: corev1.PodTemplateSpec{ - Spec: corev1.PodSpec{ - Affinity: &corev1.Affinity{}, - }, - }, - }, - }, - assertions: func(t *testing.T, specCtx pod.PodSpecContext) { - require.Equal(t, &corev1.Affinity{}, specCtx.PodTemplate.Spec.Affinity) - }, - }, - { - name: "user-provided labels", - params: pod.NewPodSpecParams{ - Elasticsearch: es71, - NodeSpec: v1alpha1.NodeSpec{ - Name: "node-spec-name", - PodTemplate: corev1.PodTemplateSpec{ - ObjectMeta: metav1.ObjectMeta{ - Labels: map[string]string{ - "a": "b", - "c": "d", - }, - }, - }, - }, - }, - assertions: func(t *testing.T, specCtx pod.PodSpecContext) { - require.Equal(t, map[string]string{ - "a": "b", - "c": "d", - "common.k8s.elastic.co/type": "elasticsearch", - "elasticsearch.k8s.elastic.co/cluster-name": "es71", - "elasticsearch.k8s.elastic.co/config-template-hash": hash.HashObject(settings.NewCanonicalConfig()), - "elasticsearch.k8s.elastic.co/node-data": "true", - "elasticsearch.k8s.elastic.co/node-ingest": "true", - "elasticsearch.k8s.elastic.co/node-master": "true", - "elasticsearch.k8s.elastic.co/node-ml": "true", - "elasticsearch.k8s.elastic.co/statefulset": "es71-es-node-spec-name", - "elasticsearch.k8s.elastic.co/version": "7.1.0", - }, specCtx.PodTemplate.Labels) - }, - }, - } - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - spec, err := podSpecContext(tt.params, settings.NewCanonicalConfig(), newEnvVarsFn, newInitContainersFn) - require.NoError(t, err) - tt.assertions(t, spec) - }) - } -} diff --git a/operators/pkg/controller/elasticsearch/version/supported_versions.go b/operators/pkg/controller/elasticsearch/version/supported_versions.go index f2db7e0f5e..7e0c564cab 100644 --- a/operators/pkg/controller/elasticsearch/version/supported_versions.go +++ b/operators/pkg/controller/elasticsearch/version/supported_versions.go @@ -20,6 +20,27 @@ type LowestHighestSupportedVersions struct { HighestSupportedVersion version.Version } +func SupportedVersions(v version.Version) *LowestHighestSupportedVersions { + switch v.Major { + case 6: + return &LowestHighestSupportedVersions{ + // Min. version is 6.7.0 for now. Will be 6.8.0 soon. + LowestSupportedVersion: version.MustParse("6.7.0"), + // higher may be possible, but not proven yet, lower may also be a requirement... + HighestSupportedVersion: version.MustParse("6.99.99"), + } + case 7: + return &LowestHighestSupportedVersions{ + // 6.7.0 is the lowest wire compatibility version for 7.x + LowestSupportedVersion: version.MustParse("6.7.0"), + // higher may be possible, but not proven yet, lower may also be a requirement... + HighestSupportedVersion: version.MustParse("7.99.99"), + } + default: + return nil + } +} + // VerifySupportsExistingPods checks the given pods against the supported version range in lh. func (lh LowestHighestSupportedVersions) VerifySupportsExistingPods( pods []corev1.Pod, diff --git a/operators/pkg/controller/elasticsearch/version/supported_versions_test.go b/operators/pkg/controller/elasticsearch/version/supported_versions_test.go index be0b2acf25..83d3c760ce 100644 --- a/operators/pkg/controller/elasticsearch/version/supported_versions_test.go +++ b/operators/pkg/controller/elasticsearch/version/supported_versions_test.go @@ -7,6 +7,8 @@ package version import ( "testing" + "github.com/stretchr/testify/require" + "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/label" "github.com/elastic/cloud-on-k8s/operators/pkg/controller/common/version" @@ -18,6 +20,60 @@ var ( testPodWithoutVersionLabel = corev1.Pod{} ) +func TestSupportedVersions(t *testing.T) { + type args struct { + v version.Version + } + tests := []struct { + name string + args args + supported []version.Version + unsupported []version.Version + }{ + { + name: "6.x", + args: args{ + v: version.MustParse("6.8.0"), + }, + supported: []version.Version{ + version.MustParse("6.7.0"), + version.MustParse("6.8.0"), + version.MustParse("6.99.99"), + }, + unsupported: []version.Version{ + version.MustParse("6.5.0"), + version.MustParse("7.0.0"), + }, + }, + { + name: "7.x", + args: args{ + v: version.MustParse("7.1.0"), + }, + supported: []version.Version{ + version.MustParse("6.7.0"), //wire compat + version.MustParse("7.2.0"), + version.MustParse("7.99.99"), + }, + unsupported: []version.Version{ + version.MustParse("6.6.0"), + version.MustParse("8.0.0"), + }, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + vs := SupportedVersions(tt.args.v) + for _, v := range tt.supported { + require.NoError(t, vs.Supports(v)) + } + for _, v := range tt.unsupported { + require.Error(t, vs.Supports(v)) + } + }) + } +} + func Test_lowestHighestSupportedVersions_VerifySupportsExistingPods(t *testing.T) { newPodWithVersionLabel := func(v version.Version) corev1.Pod { return corev1.Pod{ diff --git a/operators/pkg/controller/elasticsearch/version/zen1/compatibility.go b/operators/pkg/controller/elasticsearch/version/zen1/compatibility.go index af745cc1d0..a6b8a88ab8 100644 --- a/operators/pkg/controller/elasticsearch/version/zen1/compatibility.go +++ b/operators/pkg/controller/elasticsearch/version/zen1/compatibility.go @@ -16,13 +16,13 @@ func zen1VersionMatch(v version.Version) bool { return v.Major < 7 } -// IsCompatibleForZen1 returns true if the given StatefulSet is compatible with zen1. -func IsCompatibleForZen1(statefulSet appsv1.StatefulSet) bool { +// IsCompatibleWithZen1 returns true if the given StatefulSet is compatible with zen1. +func IsCompatibleWithZen1(statefulSet appsv1.StatefulSet) bool { return sset.ESVersionMatch(statefulSet, zen1VersionMatch) } -// AtLeastOneNodeCompatibleForZen1 returns true if the given StatefulSetList contains +// AtLeastOneNodeCompatibleWithZen1 returns true if the given StatefulSetList contains // at least one StatefulSet compatible with zen1. -func AtLeastOneNodeCompatibleForZen1(statefulSets sset.StatefulSetList) bool { +func AtLeastOneNodeCompatibleWithZen1(statefulSets sset.StatefulSetList) bool { return sset.AtLeastOneESVersionMatch(statefulSets, zen1VersionMatch) } diff --git a/operators/pkg/controller/elasticsearch/version/zen1/compatibility_test.go b/operators/pkg/controller/elasticsearch/version/zen1/compatibility_test.go index d71fda9cf8..ef509df008 100644 --- a/operators/pkg/controller/elasticsearch/version/zen1/compatibility_test.go +++ b/operators/pkg/controller/elasticsearch/version/zen1/compatibility_test.go @@ -50,8 +50,8 @@ func TestIsCompatibleForZen1(t *testing.T) { } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - if got := IsCompatibleForZen1(tt.sset); got != tt.want { - t.Errorf("IsCompatibleForZen1() = %v, want %v", got, tt.want) + if got := IsCompatibleWithZen1(tt.sset); got != tt.want { + t.Errorf("IsCompatibleWithZen1() = %v, want %v", got, tt.want) } }) } @@ -86,8 +86,8 @@ func TestAtLeastOneNodeCompatibleForZen1(t *testing.T) { } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - if got := AtLeastOneNodeCompatibleForZen1(tt.statefulSets); got != tt.want { - t.Errorf("AtLeastOneNodeCompatibleForZen1() = %v, want %v", got, tt.want) + if got := AtLeastOneNodeCompatibleWithZen1(tt.statefulSets); got != tt.want { + t.Errorf("AtLeastOneNodeCompatibleWithZen1() = %v, want %v", got, tt.want) } }) } diff --git a/operators/pkg/controller/elasticsearch/version/zen1/minimum_masters.go b/operators/pkg/controller/elasticsearch/version/zen1/minimum_masters.go index 8a4bc63e8b..8d95608d76 100644 --- a/operators/pkg/controller/elasticsearch/version/zen1/minimum_masters.go +++ b/operators/pkg/controller/elasticsearch/version/zen1/minimum_masters.go @@ -31,7 +31,7 @@ func SetupMinimumMasterNodesConfig(nodeSpecResources nodespec.ResourcesList) err masters := nodeSpecResources.MasterNodesNames() quorum := settings.Quorum(len(masters)) for i, res := range nodeSpecResources { - if !IsCompatibleForZen1(res.StatefulSet) { + if !IsCompatibleWithZen1(res.StatefulSet) { continue } // patch config with the expected minimum master nodes @@ -56,7 +56,7 @@ func UpdateMinimumMasterNodes( actualStatefulSets sset.StatefulSetList, reconcileState *reconcile.State, ) (bool, error) { - if !AtLeastOneNodeCompatibleForZen1(actualStatefulSets) { + if !AtLeastOneNodeCompatibleWithZen1(actualStatefulSets) { // nothing to do return false, nil } diff --git a/operators/pkg/controller/elasticsearch/version/zen1/podspecs.go b/operators/pkg/controller/elasticsearch/version/zen1/podspecs.go deleted file mode 100644 index 9fa8a23495..0000000000 --- a/operators/pkg/controller/elasticsearch/version/zen1/podspecs.go +++ /dev/null @@ -1,29 +0,0 @@ -// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one -// or more contributor license agreements. Licensed under the Elastic License; -// you may not use this file except in compliance with the Elastic License. - -package zen1 - -import ( - "path" - - "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/env" - "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/pod" - "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/settings" - esvolume "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/volume" - corev1 "k8s.io/api/core/v1" -) - -// NewEnvironmentVars returns the environment vars to be associated to a pod -func NewEnvironmentVars( - p pod.NewPodSpecParams, -) []corev1.EnvVar { - vars := []corev1.EnvVar{ - {Name: settings.EnvReadinessProbeProtocol, Value: "https"}, - {Name: settings.EnvProbeUsername, Value: p.ProbeUser.Name}, - {Name: settings.EnvProbePasswordFile, Value: path.Join(esvolume.ProbeUserSecretMountPath, p.ProbeUser.Name)}, - } - vars = append(vars, env.DynamicPodEnvVars...) - - return vars -} diff --git a/operators/pkg/controller/elasticsearch/version/zen1/podspecs_test.go b/operators/pkg/controller/elasticsearch/version/zen1/podspecs_test.go deleted file mode 100644 index c48bd2854e..0000000000 --- a/operators/pkg/controller/elasticsearch/version/zen1/podspecs_test.go +++ /dev/null @@ -1,63 +0,0 @@ -// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one -// or more contributor license agreements. Licensed under the Elastic License; -// you may not use this file except in compliance with the Elastic License. - -package zen1 - -import ( - "path" - "testing" - - "github.com/stretchr/testify/assert" - corev1 "k8s.io/api/core/v1" - - "github.com/elastic/cloud-on-k8s/operators/pkg/apis/elasticsearch/v1alpha1" - "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/client" - "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/pod" - "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/settings" - esvolume "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/volume" -) - -var testProbeUser = client.UserAuth{Name: "username1", Password: "supersecure"} - -func TestNewEnvironmentVars(t *testing.T) { - type args struct { - p pod.NewPodSpecParams - } - tests := []struct { - name string - args args - wantEnv []corev1.EnvVar - }{ - { - name: "sample cluster", - args: args{ - p: pod.NewPodSpecParams{ - ProbeUser: testProbeUser, - Elasticsearch: v1alpha1.Elasticsearch{ - Spec: v1alpha1.ElasticsearchSpec{ - Version: "7.1.0", - }, - }, - }, - }, - wantEnv: []corev1.EnvVar{ - {Name: settings.EnvReadinessProbeProtocol, Value: "https"}, - {Name: settings.EnvProbeUsername, Value: "username1"}, - {Name: settings.EnvProbePasswordFile, Value: path.Join(esvolume.ProbeUserSecretMountPath, "username1")}, - {Name: settings.EnvPodName, Value: "", ValueFrom: &corev1.EnvVarSource{ - FieldRef: &corev1.ObjectFieldSelector{APIVersion: "v1", FieldPath: "metadata.name"}, - }}, - {Name: settings.EnvPodIP, Value: "", ValueFrom: &corev1.EnvVarSource{ - FieldRef: &corev1.ObjectFieldSelector{APIVersion: "v1", FieldPath: "status.podIP"}, - }}, - }, - }, - } - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - got := NewEnvironmentVars(tt.args.p) - assert.Equal(t, tt.wantEnv, got) - }) - } -} diff --git a/operators/pkg/controller/elasticsearch/version/zen2/compatibility.go b/operators/pkg/controller/elasticsearch/version/zen2/compatibility.go index a15f37570d..ee8a9acaba 100644 --- a/operators/pkg/controller/elasticsearch/version/zen2/compatibility.go +++ b/operators/pkg/controller/elasticsearch/version/zen2/compatibility.go @@ -16,13 +16,13 @@ func zen2VersionMatch(v version.Version) bool { return v.Major >= 7 } -// IsCompatibleForZen2 returns true if the given StatefulSet is compatible with zen2. -func IsCompatibleForZen2(statefulSet appsv1.StatefulSet) bool { +// IsCompatibleWithZen2 returns true if the given StatefulSet is compatible with zen2. +func IsCompatibleWithZen2(statefulSet appsv1.StatefulSet) bool { return sset.ESVersionMatch(statefulSet, zen2VersionMatch) } -// AtLeastOneNodeCompatibleForZen2 returns true if the given StatefulSetList contains +// AtLeastOneNodeCompatibleWithZen2 returns true if the given StatefulSetList contains // at least one StatefulSet compatible with zen2. -func AtLeastOneNodeCompatibleForZen2(statefulSets sset.StatefulSetList) bool { +func AtLeastOneNodeCompatibleWithZen2(statefulSets sset.StatefulSetList) bool { return sset.AtLeastOneESVersionMatch(statefulSets, zen2VersionMatch) } diff --git a/operators/pkg/controller/elasticsearch/version/zen2/compatibility_test.go b/operators/pkg/controller/elasticsearch/version/zen2/compatibility_test.go index 73cd250a40..37dc0b1cb3 100644 --- a/operators/pkg/controller/elasticsearch/version/zen2/compatibility_test.go +++ b/operators/pkg/controller/elasticsearch/version/zen2/compatibility_test.go @@ -50,8 +50,8 @@ func TestIsCompatibleForZen2(t *testing.T) { } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - if got := IsCompatibleForZen2(tt.sset); got != tt.want { - t.Errorf("IsCompatibleForZen2() = %v, want %v", got, tt.want) + if got := IsCompatibleWithZen2(tt.sset); got != tt.want { + t.Errorf("IsCompatibleWithZen2() = %v, want %v", got, tt.want) } }) } @@ -86,8 +86,8 @@ func TestAtLeastOneNodeCompatibleForZen2(t *testing.T) { } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - if got := AtLeastOneNodeCompatibleForZen2(tt.statefulSets); got != tt.want { - t.Errorf("AtLeastOneNodeCompatibleForZen2() = %v, want %v", got, tt.want) + if got := AtLeastOneNodeCompatibleWithZen2(tt.statefulSets); got != tt.want { + t.Errorf("AtLeastOneNodeCompatibleWithZen2() = %v, want %v", got, tt.want) } }) } diff --git a/operators/pkg/controller/elasticsearch/version/zen2/initial_master_nodes.go b/operators/pkg/controller/elasticsearch/version/zen2/initial_master_nodes.go index 57ee428469..44a333dd30 100644 --- a/operators/pkg/controller/elasticsearch/version/zen2/initial_master_nodes.go +++ b/operators/pkg/controller/elasticsearch/version/zen2/initial_master_nodes.go @@ -71,7 +71,7 @@ func SetupInitialMasterNodes( return nil } for i, res := range nodeSpecResources { - if !IsCompatibleForZen2(res.StatefulSet) { + if !IsCompatibleWithZen2(res.StatefulSet) { continue } if !label.IsMasterNodeSet(res.StatefulSet) { diff --git a/operators/pkg/controller/elasticsearch/version/zen2/voting_exclusions.go b/operators/pkg/controller/elasticsearch/version/zen2/voting_exclusions.go index f7d6a77065..84f384802e 100644 --- a/operators/pkg/controller/elasticsearch/version/zen2/voting_exclusions.go +++ b/operators/pkg/controller/elasticsearch/version/zen2/voting_exclusions.go @@ -24,7 +24,7 @@ var ( // AddToVotingConfigExclusions adds the given node names to exclude from voting config exclusions. func AddToVotingConfigExclusions(esClient client.Client, sset appsv1.StatefulSet, excludeNodes []string) error { - if !IsCompatibleForZen2(sset) { + if !IsCompatibleWithZen2(sset) { return nil } log.Info("Setting voting config exclusions", "namespace", sset.Namespace, "nodes", excludeNodes) @@ -64,7 +64,7 @@ func canClearVotingConfigExclusions(c k8s.Client, actualStatefulSets sset.Statef // ClearVotingConfigExclusions resets the voting config exclusions if all excluded nodes are properly removed. // It returns true if this should be retried later (re-queued). func ClearVotingConfigExclusions(es v1alpha1.Elasticsearch, c k8s.Client, esClient client.Client, actualStatefulSets sset.StatefulSetList) (bool, error) { - if !AtLeastOneNodeCompatibleForZen2(actualStatefulSets) { + if !AtLeastOneNodeCompatibleWithZen2(actualStatefulSets) { return false, nil } canClear, err := canClearVotingConfigExclusions(c, actualStatefulSets) diff --git a/operators/pkg/utils/k8s/k8sutils.go b/operators/pkg/utils/k8s/k8sutils.go index fb1003910a..d93bd08931 100644 --- a/operators/pkg/utils/k8s/k8sutils.go +++ b/operators/pkg/utils/k8s/k8sutils.go @@ -64,6 +64,15 @@ func GetPods( return podList.Items, nil } +// PodsByName returns a map of pod names to pods +func PodsByName(pods []corev1.Pod) map[string]corev1.Pod { + podMap := make(map[string]corev1.Pod, len(pods)) + for _, pod := range pods { + podMap[pod.Name] = pod + } + return podMap +} + // GetServiceDNSName returns the fully qualified DNS name for a service func GetServiceDNSName(svc corev1.Service) []string { return []string{ diff --git a/operators/test/e2e/es/failure_test.go b/operators/test/e2e/es/failure_test.go index 6d8712e139..528b88186d 100644 --- a/operators/test/e2e/es/failure_test.go +++ b/operators/test/e2e/es/failure_test.go @@ -5,19 +5,14 @@ package es import ( - "fmt" "testing" "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/label" esname "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/name" - "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/pvc" - "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/volume" - "github.com/elastic/cloud-on-k8s/operators/pkg/utils/stringsutil" "github.com/elastic/cloud-on-k8s/operators/test/e2e/test" "github.com/elastic/cloud-on-k8s/operators/test/e2e/test/elasticsearch" "github.com/stretchr/testify/require" corev1 "k8s.io/api/core/v1" - apierrors "k8s.io/apimachinery/pkg/api/errors" "k8s.io/apimachinery/pkg/types" ) @@ -64,125 +59,6 @@ func TestKillSingleNodeReusePV(t *testing.T) { b) } -// TestKillCorrectPVReuse sets up a cluster with multiple PVs per node, kills a node, then makes sure that: -// - PVC are reused with the correct volume (eg. do not bind the "data" PVC to a "non-data" volume) -// - if no PVC is available, a new one is created -func TestKillCorrectPVReuse(t *testing.T) { - test.MinVersionOrSkip(t, "v1.12.0") - - k := test.NewK8sClientOrFatal() - - b := elasticsearch.NewBuilder("test-failure-pvc"). - WithESMasterDataNodes(3, elasticsearch.DefaultResources). - WithPersistentVolumes("not-data"). - WithPersistentVolumes(volume.ElasticsearchDataVolumeName) - - var clusterUUID string - var deletedPVC corev1.PersistentVolumeClaim - var seenPVCs []string - var killedPod corev1.Pod - - test.StepList{}. - WithSteps(b.InitTestSteps(k)). - WithSteps(b.CreationTestSteps(k)). - WithSteps(test.CheckTestSteps(b, k)). - WithStep(elasticsearch.RetrieveClusterUUIDStep(b.Elasticsearch, k, &clusterUUID)). - // Simulate a pod deletion - WithStep(elasticsearch.PauseReconciliation(b.Elasticsearch, k)). - WithSteps(test.StepList{ - { - Name: "Kill a node", - Test: func(t *testing.T) { - pods, err := k.GetPods(test.ESPodListOptions(b.Elasticsearch.Name)) - require.NoError(t, err) - require.True(t, len(pods) > 0, "need at least one pod to kill") - for i, pod := range pods { - if i == 0 { - killedPod = pod - } - } - err = k.DeletePod(killedPod) - require.NoError(t, err) - }, - }, - { - Name: "Wait for pod to be deleted", - Test: test.Eventually(func() error { - pod, err := k.GetPod(killedPod.Name) - if err != nil && !apierrors.IsNotFound(err) { - return err - } - if apierrors.IsNotFound(err) || killedPod.UID != pod.UID { - return nil - } - return fmt.Errorf("pod %s not deleted yet", killedPod.Name) - }), - }, - { - Name: "Delete one of the es-data PVCs", - Test: test.Eventually(func() error { - pvcs, err := pvc.ListVolumeClaims(k.Client, b.Elasticsearch) - if err != nil { - return err - } - for _, pvc := range pvcs { - seenPVCs = append(seenPVCs, string(pvc.UID)) - if pvc.Labels[label.VolumeNameLabelName] == volume.ElasticsearchDataVolumeName && - pvc.Labels[label.PodNameLabelName] == killedPod.Name { - // this should ensure that when we resume reconciliation the operator creates a new PVC - // we also test correct reuse by keeping the non-data volume claim around and unchanged - deletedPVC = pvc - if err := k.Client.Delete(&pvc); err != nil { - return err - } - } - } - return nil - }), - }, - elasticsearch.ResumeReconciliation(b.Elasticsearch, k), - }). - // Check we recover - WithSteps(test.CheckTestSteps(b, k)). - // Check PVCs have been reused correctly - WithStep(test.Step{ - Name: "No PVC should have been reused for elasticsearch-data", - Test: func(t *testing.T) { - // should be resurrected with same name due to second PVC still around and forcing the pods name - // back to the old one - pod, err := k.GetPod(killedPod.Name) - require.NoError(t, err) - var checkedVolumes bool - for _, v := range pod.Spec.Volumes { - // find the volumes sourced from PVCs - pvcSrc := v.VolumeSource.PersistentVolumeClaim - if pvcSrc == nil { - // we have a few non-PVC volumes - continue - } - checkedVolumes = true - // fetch the corresponding claim - var pvc corev1.PersistentVolumeClaim - require.NoError(t, k.Client.Get(types.NamespacedName{Namespace: pod.Namespace, Name: pvcSrc.ClaimName}, &pvc)) - - // for elasticsearch-data ensure it's a new one (we deleted the old one above) - if v.Name == volume.ElasticsearchDataVolumeName && deletedPVC.UID == pvc.UID { - t.Errorf("expected new PVC but was reused %v, %v, seen: %v", pvc.Name, pvc.UID, deletedPVC.UID) - // for all the other volumes expect reuse - } else if v.Name != volume.ElasticsearchDataVolumeName && !stringsutil.StringInSlice(string(pvc.UID), seenPVCs) { - t.Errorf("expected reused PVC but %v is new, %v , seen: %v", pvc.Name, pvc.UID, seenPVCs) - } - } - require.True(t, checkedVolumes, "unexpected: no persistent volume claims where found") - }, - }, - ). - // And that the cluster UUID has not changed - WithStep(elasticsearch.CompareClusterUUIDStep(b.Elasticsearch, k, &clusterUUID)). - WithSteps(b.DeletionTestSteps(k)). - RunSequential(t) -} - func TestDeleteServices(t *testing.T) { b := elasticsearch.NewBuilder("test-failure-delete-services"). WithESMasterDataNodes(1, elasticsearch.DefaultResources) From 1b89f47f001a1fa30fdf50ce0169d552876e8046 Mon Sep 17 00:00:00 2001 From: Sebastien Guilloux Date: Mon, 5 Aug 2019 09:43:10 +0200 Subject: [PATCH 17/31] Remove PVCs at the end of E2E tests (#1464) To cleanup resources and avoid any PVC reuse for different E2E stacks, let's remove PVCs at the end of E2E tests. This should probably be done by the operator directly, see https://github.com/elastic/cloud-on-k8s/issues/1288. --- .../e2e/test/elasticsearch/steps_deletion.go | 25 +++++++++++++++++-- 1 file changed, 23 insertions(+), 2 deletions(-) diff --git a/operators/test/e2e/test/elasticsearch/steps_deletion.go b/operators/test/e2e/test/elasticsearch/steps_deletion.go index 99467d3669..64a0968ceb 100644 --- a/operators/test/e2e/test/elasticsearch/steps_deletion.go +++ b/operators/test/e2e/test/elasticsearch/steps_deletion.go @@ -7,13 +7,16 @@ package elasticsearch import ( "testing" + "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/label" "github.com/elastic/cloud-on-k8s/operators/pkg/utils/k8s" "github.com/elastic/cloud-on-k8s/operators/test/e2e/test" "github.com/pkg/errors" "github.com/stretchr/testify/require" - "k8s.io/apimachinery/pkg/api/meta" - + corev1 "k8s.io/api/core/v1" apierrors "k8s.io/apimachinery/pkg/api/errors" + "k8s.io/apimachinery/pkg/api/meta" + "k8s.io/apimachinery/pkg/labels" + "sigs.k8s.io/controller-runtime/pkg/client" ) func (b Builder) DeletionTestSteps(k *test.K8sClient) test.StepList { @@ -54,5 +57,23 @@ func (b Builder) DeletionTestSteps(k *test.K8sClient) test.StepList { return k.CheckPodCount(test.ESPodListOptions(b.Elasticsearch.Name), 0) }), }, + { + Name: "Remove leftover PVCs", + // TODO: remove when https://github.com/elastic/cloud-on-k8s/issues/1288 is fixed. + Test: func(t *testing.T) { + var pvcs corev1.PersistentVolumeClaimList + err := k.Client.List(&client.ListOptions{ + Namespace: b.Elasticsearch.Namespace, + LabelSelector: labels.SelectorFromSet(map[string]string{ + label.ClusterNameLabelName: b.Elasticsearch.Name, + }), + }, &pvcs) + require.NoError(t, err) + for _, pvc := range pvcs.Items { + err := k.Client.Delete(&pvc) + require.NoError(t, err) + } + }, + }, } } From 612f3a60331998f6b5ebe23075ffb46287d881da Mon Sep 17 00:00:00 2001 From: sebgl Date: Mon, 5 Aug 2019 09:47:54 +0200 Subject: [PATCH 18/31] Remove local-volume es sample --- .../elasticsearch_local_volume.yaml | 21 ------------------- 1 file changed, 21 deletions(-) delete mode 100644 operators/config/samples/elasticsearch/elasticsearch_local_volume.yaml diff --git a/operators/config/samples/elasticsearch/elasticsearch_local_volume.yaml b/operators/config/samples/elasticsearch/elasticsearch_local_volume.yaml deleted file mode 100644 index 6ed411ac9d..0000000000 --- a/operators/config/samples/elasticsearch/elasticsearch_local_volume.yaml +++ /dev/null @@ -1,21 +0,0 @@ -# This sample sets up an Elasticsearch cluster with 3 nodes, -# using the elastic-local persistent volume provider -apiVersion: elasticsearch.k8s.elastic.co/v1alpha1 -kind: Elasticsearch -metadata: - name: es-local-volume-sample -spec: - version: "7.1.0" - nodes: - - name: default - nodeCount: 3 - volumeClaimTemplates: - - metadata: - name: elasticsearch-data - spec: - accessModes: - - ReadWriteOnce - resources: - requests: - storage: 10M - storageClassName: elastic-local From b29dd50442ff40993c3cec0e9f8b9e38ebe4d37a Mon Sep 17 00:00:00 2001 From: sebgl Date: Mon, 5 Aug 2019 09:53:51 +0200 Subject: [PATCH 19/31] Improve log fields names --- .../pkg/controller/apmserver/pod_test.go | 6 +-- .../certificates/transport/pod_secret.go | 37 ++++++++++++------- .../version/zen2/voting_exclusions.go | 4 +- .../e2e/test/elasticsearch/steps_deletion.go | 7 ++-- 4 files changed, 33 insertions(+), 21 deletions(-) diff --git a/operators/pkg/controller/apmserver/pod_test.go b/operators/pkg/controller/apmserver/pod_test.go index 4a38957a78..59682dc094 100644 --- a/operators/pkg/controller/apmserver/pod_test.go +++ b/operators/pkg/controller/apmserver/pod_test.go @@ -8,12 +8,12 @@ import ( "reflect" "testing" + corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "github.com/elastic/cloud-on-k8s/operators/pkg/apis/apm/v1alpha1" "github.com/elastic/cloud-on-k8s/operators/pkg/controller/common/volume" "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/settings" - - corev1 "k8s.io/api/core/v1" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" ) func TestNewPodSpec(t *testing.T) { diff --git a/operators/pkg/controller/elasticsearch/certificates/transport/pod_secret.go b/operators/pkg/controller/elasticsearch/certificates/transport/pod_secret.go index e0ac9262dd..5dce676960 100644 --- a/operators/pkg/controller/elasticsearch/certificates/transport/pod_secret.go +++ b/operators/pkg/controller/elasticsearch/certificates/transport/pod_secret.go @@ -43,7 +43,8 @@ func ensureTransportCertificatesSecretContentsForPod( if privateKeyData, ok := secret.Data[PodKeyFileName(pod.Name)]; ok { storedPrivateKey, err := certificates.ParsePEMPrivateKey(privateKeyData) if err != nil { - log.Error(err, "Unable to parse stored private key", "pod", pod.Name) + log.Error(err, "Unable to parse stored private key", + "namespace", pod.Namespace, "pod_name", pod.Name) } else { needsNewPrivateKey = false privateKey = storedPrivateKey @@ -64,7 +65,7 @@ func ensureTransportCertificatesSecretContentsForPod( if shouldIssueNewCertificate(es, *secret, pod, privateKey, svcs, ca, rotationParams.RotateBefore) { log.Info( "Issuing new certificate", - "pod", pod.Name, + "pod_name", pod.Name, ) csr, err := x509.CreateCertificateRequest(cryptorand.Reader, &x509.CertificateRequest{}, privateKey) @@ -117,7 +118,8 @@ func shouldIssueNewCertificate( generalNames, err := buildGeneralNames(es, svcs, pod) if err != nil { - log.Error(err, "Cannot create GeneralNames for the TLS certificate", "pod", pod.Name) + log.Error(err, "Cannot create GeneralNames for the TLS certificate", + "namespace", pod.Namespace, "pod_name", pod.Name) return true } @@ -130,10 +132,11 @@ func shouldIssueNewCertificate( if !publicKeyOk || publicKey.N.Cmp(privateKey.PublicKey.N) != 0 || publicKey.E != privateKey.PublicKey.E { log.Info( "Certificate belongs do a different public key, should issue new", + "namespace", pod.Namespace, "subject", cert.Subject, "issuer", cert.Issuer, "current_ca_subject", ca.Cert.Subject, - "pod", pod.Name, + "pod_name", pod.Name, ) return true } @@ -148,6 +151,7 @@ func shouldIssueNewCertificate( if _, err := cert.Verify(verifyOpts); err != nil { log.Info( fmt.Sprintf("Certificate was not valid, should issue new: %s", err), + "namespace", pod.Namespace, "subject", cert.Subject, "issuer", cert.Issuer, "current_ca_subject", ca.Cert.Subject, @@ -157,14 +161,16 @@ func shouldIssueNewCertificate( } if time.Now().After(cert.NotAfter.Add(-certReconcileBefore)) { - log.Info("Certificate soon to expire, should issue new", "pod", pod.Name) + log.Info("Certificate soon to expire, should issue new", + "namespace", pod.Namespace, "pod", pod.Name) return true } // compare actual vs. expected SANs expected, err := certificates.MarshalToSubjectAlternativeNamesData(generalNames) if err != nil { - log.Error(err, "Cannot marshal subject alternative names", "pod", pod.Name) + log.Error(err, "Cannot marshal subject alternative names", + "namespace", pod.Namespace, "pod_name", pod.Name) return true } extraExtensionFound := false @@ -174,12 +180,14 @@ func shouldIssueNewCertificate( } extraExtensionFound = true if !reflect.DeepEqual(ext.Value, expected) { - log.Info("Certificate SANs do not match expected one, should issue new", "pod", pod.Name) + log.Info("Certificate SANs do not match expected one, should issue new", + "namespace", pod.Namespace, "pod_name", pod.Name) return true } } if !extraExtensionFound { - log.Info("SAN extra extension not found, should issue new certificate", "pod", pod.Name) + log.Info("SAN extra extension not found, should issue new certificate", + "namespace", pod.Namespace, "pod_name", pod.Name) return true } @@ -190,13 +198,15 @@ func shouldIssueNewCertificate( func extractTransportCert(secret corev1.Secret, pod corev1.Pod, commonName string) *x509.Certificate { certData, ok := secret.Data[PodCertFileName(pod.Name)] if !ok { - log.Info("No tls certificate found in secret", "pod", pod.Name) + log.Info("No tls certificate found in secret", + "namespace", pod.Namespace, "pod_name", pod.Name) return nil } certs, err := certificates.ParsePEMCerts(certData) if err != nil { - log.Error(err, "Invalid certificate data found, issuing new certificate", "pod", pod.Name) + log.Error(err, "Invalid certificate data found, issuing new certificate", + "namespace", pod.Namespace, "pod_name", pod.Name) return nil } @@ -211,9 +221,10 @@ func extractTransportCert(secret corev1.Secret, pod corev1.Pod, commonName strin log.Info( "Did not find a certificate with the expected common name", - "pod", pod.Name, - "expected", commonName, - "found", names, + "namespace", pod.Namespace, + "pod_name", pod.Name, + "expected_name", commonName, + "actual_name", names, ) return nil diff --git a/operators/pkg/controller/elasticsearch/version/zen2/voting_exclusions.go b/operators/pkg/controller/elasticsearch/version/zen2/voting_exclusions.go index 84f384802e..a1ce4432bd 100644 --- a/operators/pkg/controller/elasticsearch/version/zen2/voting_exclusions.go +++ b/operators/pkg/controller/elasticsearch/version/zen2/voting_exclusions.go @@ -72,13 +72,13 @@ func ClearVotingConfigExclusions(es v1alpha1.Elasticsearch, c k8s.Client, esClie return false, err } if !canClear { - log.V(1).Info("Cannot clear voting exclusions yet", "namespace", es.Namespace, "name", es.Name) + log.V(1).Info("Cannot clear voting exclusions yet", "namespace", es.Namespace, "es_name", es.Name) return true, nil // requeue } ctx, cancel := context.WithTimeout(context.Background(), client.DefaultReqTimeout) defer cancel() - log.Info("Ensuring no voting exclusions are set", "namespace", es.Namespace, "name", es.Name) + log.Info("Ensuring no voting exclusions are set", "namespace", es.Namespace, "es_name", es.Name) if err := esClient.DeleteVotingConfigExclusions(ctx, false); err != nil { return false, err } diff --git a/operators/test/e2e/test/elasticsearch/steps_deletion.go b/operators/test/e2e/test/elasticsearch/steps_deletion.go index 64a0968ceb..a421a67027 100644 --- a/operators/test/e2e/test/elasticsearch/steps_deletion.go +++ b/operators/test/e2e/test/elasticsearch/steps_deletion.go @@ -7,9 +7,6 @@ package elasticsearch import ( "testing" - "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/label" - "github.com/elastic/cloud-on-k8s/operators/pkg/utils/k8s" - "github.com/elastic/cloud-on-k8s/operators/test/e2e/test" "github.com/pkg/errors" "github.com/stretchr/testify/require" corev1 "k8s.io/api/core/v1" @@ -17,6 +14,10 @@ import ( "k8s.io/apimachinery/pkg/api/meta" "k8s.io/apimachinery/pkg/labels" "sigs.k8s.io/controller-runtime/pkg/client" + + "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/label" + "github.com/elastic/cloud-on-k8s/operators/pkg/utils/k8s" + "github.com/elastic/cloud-on-k8s/operators/test/e2e/test" ) func (b Builder) DeletionTestSteps(k *test.K8sClient) test.StepList { From 4294ca74b98a9df79a75ef2fe5a28e0ddaaf7062 Mon Sep 17 00:00:00 2001 From: sebgl Date: Mon, 5 Aug 2019 09:55:54 +0200 Subject: [PATCH 20/31] Fix min version comment --- .../pkg/controller/elasticsearch/version/supported_versions.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/operators/pkg/controller/elasticsearch/version/supported_versions.go b/operators/pkg/controller/elasticsearch/version/supported_versions.go index 7e0c564cab..872f07084f 100644 --- a/operators/pkg/controller/elasticsearch/version/supported_versions.go +++ b/operators/pkg/controller/elasticsearch/version/supported_versions.go @@ -24,7 +24,7 @@ func SupportedVersions(v version.Version) *LowestHighestSupportedVersions { switch v.Major { case 6: return &LowestHighestSupportedVersions{ - // Min. version is 6.7.0 for now. Will be 6.8.0 soon. + // Min. version is 6.7.0. LowestSupportedVersion: version.MustParse("6.7.0"), // higher may be possible, but not proven yet, lower may also be a requirement... HighestSupportedVersion: version.MustParse("6.99.99"), From 63e3664e4026e2886e3a94bd78515472d5837d22 Mon Sep 17 00:00:00 2001 From: sebgl Date: Mon, 5 Aug 2019 10:34:36 +0200 Subject: [PATCH 21/31] Fix wrong keystore command from merge conflict --- .../pkg/controller/elasticsearch/initcontainer/keystore.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/operators/pkg/controller/elasticsearch/initcontainer/keystore.go b/operators/pkg/controller/elasticsearch/initcontainer/keystore.go index ba4f2dddda..ffbbe4d9a0 100644 --- a/operators/pkg/controller/elasticsearch/initcontainer/keystore.go +++ b/operators/pkg/controller/elasticsearch/initcontainer/keystore.go @@ -12,7 +12,7 @@ import ( // KeystoreParams is used to generate the init container that will load the secure settings into a keystore. var KeystoreParams = keystore.InitContainerParameters{ KeystoreCreateCommand: "/usr/share/elasticsearch/bin/elasticsearch-keystore create", - KeystoreAddCommand: "/usr/share/elasticsearch/bin/elasticsearch-keystore add", + KeystoreAddCommand: `/usr/share/elasticsearch/bin/elasticsearch-keystore add-file "$key" "$filename"`, SecureSettingsVolumeMountPath: keystore.SecureSettingsVolumeMountPath, DataVolumePath: esvolume.ElasticsearchDataMountPath, } From c7952b86d23e8aa163747b00876e23ee8646e8b2 Mon Sep 17 00:00:00 2001 From: sebgl Date: Mon, 5 Aug 2019 10:43:50 +0200 Subject: [PATCH 22/31] Fix missing dependency in Gopkg.lock --- operators/Gopkg.lock | 1 + 1 file changed, 1 insertion(+) diff --git a/operators/Gopkg.lock b/operators/Gopkg.lock index 30d669c51d..fc695add6c 100644 --- a/operators/Gopkg.lock +++ b/operators/Gopkg.lock @@ -1246,6 +1246,7 @@ "github.com/ghodss/yaml", "github.com/go-logr/logr", "github.com/go-test/deep", + "github.com/imdario/mergo", "github.com/magiconair/properties/assert", "github.com/pkg/errors", "github.com/spf13/cobra", From a85d0676d7402cb61827c02c576f9e29eb3cb8f3 Mon Sep 17 00:00:00 2001 From: sebgl Date: Mon, 5 Aug 2019 10:55:14 +0200 Subject: [PATCH 23/31] Remove pod name creation helpers --- .../v1alpha1/elasticsearch_types.go | 1 + .../pkg/controller/elasticsearch/name/name.go | 45 ----- .../elasticsearch/name/name_test.go | 186 ------------------ 3 files changed, 1 insertion(+), 231 deletions(-) delete mode 100644 operators/pkg/controller/elasticsearch/name/name_test.go diff --git a/operators/pkg/apis/elasticsearch/v1alpha1/elasticsearch_types.go b/operators/pkg/apis/elasticsearch/v1alpha1/elasticsearch_types.go index 55877db6a5..2144e2c918 100644 --- a/operators/pkg/apis/elasticsearch/v1alpha1/elasticsearch_types.go +++ b/operators/pkg/apis/elasticsearch/v1alpha1/elasticsearch_types.go @@ -70,6 +70,7 @@ type NodeSpec struct { // Name is a logical name for this set of nodes. Used as a part of the managed Elasticsearch node.name setting. // +kubebuilder:validation:Pattern=[a-zA-Z0-9-]+ // +kubebuilder:validation:MaxLength=19 + // TODO: refactor and explain name length conventions Name string `json:"name"` // Config represents Elasticsearch configuration. diff --git a/operators/pkg/controller/elasticsearch/name/name.go b/operators/pkg/controller/elasticsearch/name/name.go index 838842a1f6..ad3a94fdda 100644 --- a/operators/pkg/controller/elasticsearch/name/name.go +++ b/operators/pkg/controller/elasticsearch/name/name.go @@ -5,11 +5,7 @@ package name import ( - "strings" - - "github.com/elastic/cloud-on-k8s/operators/pkg/apis/elasticsearch/v1alpha1" "github.com/elastic/cloud-on-k8s/operators/pkg/controller/common/name" - "k8s.io/apimachinery/pkg/util/rand" ) const ( @@ -19,8 +15,6 @@ const ( MaxElasticsearchNameLength = 36 // this leaves 63 - 36 = 27 characters for a suffix. MaxSuffixLength = MaxLabelLength - MaxElasticsearchNameLength - // podRandomSuffixLength represents the length of the random suffix that is appended in NewPodName. - podRandomSuffixLength = 10 configSecretSuffix = "config" secureSettingsSecretSuffix = "secure-settings" @@ -43,50 +37,11 @@ var ESNamer = name.Namer{ var esNoDefaultSuffixesNamer = ESNamer.WithDefaultSuffixes() -// NewPodName returns a unique name to be used for the pod name and the -// Elasticsearch cluster node name. -// The generated pod name follows the pattern "{esName}-es-[{nodeSpec.Name}-]{random suffix}". -func NewPodName(esName string, nodeSpec v1alpha1.NodeSpec) string { - var sfx strings.Builder - - // it's safe to ignore the result here as strings.Builder cannot error on sfx.WriteString - if nodeSpec.Name != "" { - sfx.WriteString(nodeSpec.Name) // #nosec G104 - sfx.WriteString("-") // #nosec G104 - } - - sfx.WriteString(rand.String(podRandomSuffixLength)) // #nosec G104 - - return ESNamer.Suffix(esName, sfx.String()) -} - -// Basename returns the base name (without the random suffix) for the provided pod. -// E.g: A pod named foo-bar-baz-{suffix} has a basename of "foo-bar-baz". -func Basename(podName string) string { - idx := strings.LastIndex(podName, "-") - if idx == -1 { - // no segments in the provided pod name, so return the full pod name - return podName - } - return podName[0:idx] -} - // StatefulSet returns the name of the StatefulSet corresponding to the given NodeSpec. func StatefulSet(esName string, nodeSpecName string) string { return ESNamer.Suffix(esName, nodeSpecName) } -// NewPVCName returns a unique PVC name given a pod name and a PVC template name. -// Uniqueness is guaranteed by the pod name that contains a random id. -// The PVC template name is trimmed so that the PVC name does not exceed the max -// length for a label. -func NewPVCName(podName string, pvcTemplateName string) string { - if len(pvcTemplateName) > MaxSuffixLength { - pvcTemplateName = pvcTemplateName[:MaxSuffixLength-1] - } - return esNoDefaultSuffixesNamer.Suffix(podName, pvcTemplateName) -} - func ConfigSecret(ssetName string) string { return ESNamer.Suffix(ssetName, configSecretSuffix) } diff --git a/operators/pkg/controller/elasticsearch/name/name_test.go b/operators/pkg/controller/elasticsearch/name/name_test.go deleted file mode 100644 index 16007bf6c0..0000000000 --- a/operators/pkg/controller/elasticsearch/name/name_test.go +++ /dev/null @@ -1,186 +0,0 @@ -// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one -// or more contributor license agreements. Licensed under the Elastic License; -// you may not use this file except in compliance with the Elastic License. - -package name - -import ( - "fmt" - "testing" - - "github.com/elastic/cloud-on-k8s/operators/pkg/apis/elasticsearch/v1alpha1" - "github.com/stretchr/testify/assert" - v1 "k8s.io/apimachinery/pkg/apis/meta/v1" -) - -var es = v1alpha1.Elasticsearch{ - ObjectMeta: v1.ObjectMeta{Name: "elasticsearch"}, -} - -func TestNewNodeName(t *testing.T) { - type args struct { - clusterName string - nodeSpec v1alpha1.NodeSpec - } - tests := []struct { - name string - args args - want string - }{ - { - name: "Generates a random name from a short elasticsearch name", - args: args{ - clusterName: "some-es-name", - }, - want: "some-es-name-es-(.*)", - }, - { - name: "Generates a random name from a long elasticsearch name", - args: args{ - clusterName: "some-es-name-that-is-quite-long-and-will-be-trimmed", - }, - want: "some-es-name-that-is-quite-long-and-will-be-trimm-es-(.*)", - }, - { - name: "Generates a random name from a short elasticsearch name with a nodeSpec.Name", - args: args{ - clusterName: "some-es-name", - nodeSpec: v1alpha1.NodeSpec{ - Name: "foo", - }, - }, - want: "some-es-name-es-foo-(.*)", - }, - { - name: "Generates a random name from a long elasticsearch name with a nodeSpec.Name", - args: args{ - clusterName: "some-es-name-that-is-quite-long-and-will-be-trimmed", - nodeSpec: v1alpha1.NodeSpec{ - Name: "foo", - }, - }, - want: "some-es-name-that-is-quite-long-and-will-be-t-es-foo-(.*)", - }, - } - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - got := NewPodName(tt.args.clusterName, tt.args.nodeSpec) - if len(got) > MaxLabelLength { - assert.Len(t, got, MaxLabelLength, - got, fmt.Sprintf("should be maximum %d characters long", MaxLabelLength)) - } - - assert.Regexp(t, tt.want, got) - }) - } -} - -func TestNewPVCName(t *testing.T) { - type args struct { - podName string - pvcTemplateName string - } - tests := []struct { - name string - args args - want string - }{ - { - name: "Generates a random name from a short pvc template name", - args: args{ - podName: "some-es-name-xxxxxxxxx-es-2qnjmqsv4s", - pvcTemplateName: "a-pvc-name", - }, - want: "some-es-name-xxxxxxxxx-es-2qnjmqsv4s-a-pvc-name", - }, - { - name: "Generates a random name from a long pod name (should not happen)", - args: args{ - podName: "some-es-name-that-is-quite-long-and-will-be-trimmed-es-2qnjmqsv4s", - pvcTemplateName: "a-pvc-name", - }, - want: "some-es-name-that-is-quite-long-and-will-be-trimmed--a-pvc-name", - }, - { - name: "Generates a random name from a long pvc template name", - args: args{ - podName: "some-es-name-xxxxxxxxx-es-2qnjmqsv4s", - pvcTemplateName: "some-pvc-name-that-is-quite-loooooong", - }, - want: "some-es-name-xxxxxxxxx-es-2qnjmqsv4s-some-pvc-name-that-is-quit", - }, - { - name: "Generates a random name from a long pod name (should not happen) and a long pvc template name", - args: args{ - podName: "some-es-name-that-is-quite-long-and-will-be-trimmed-es-2qnjmqsv4s", - pvcTemplateName: "some-pvc-name-that-is-quite-loooooong", - }, - want: "some-es-name-that-is-quite-long-and--some-pvc-name-that-is-quit", - }, - } - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - got := NewPVCName(tt.args.podName, tt.args.pvcTemplateName) - if len(got) > MaxLabelLength { - assert.Len(t, got, MaxLabelLength, - got, fmt.Sprintf("should be maximum %d characters long", MaxLabelLength)) - } - - assert.Equal(t, tt.want, got) - }) - } -} - -func TestBasename(t *testing.T) { - type args struct { - podName string - } - tests := []struct { - name string - args args - want string - }{ - { - name: "pod name with no segments", - args: args{ - podName: "foo", - }, - want: "foo", - }, - { - name: "sample pod name", - args: args{ - podName: "sample-1-es-mqjcddtv6g", - }, - want: "sample-1-es", - }, - { - name: "sample pod name with nodespec name", - args: args{ - podName: "sample-1-es-foo-mqjcddtv6g", - }, - want: "sample-1-es-foo", - }, - { - name: "new pod", - args: args{ - podName: NewPodName(es.Name, v1alpha1.NodeSpec{}), - }, - want: "elasticsearch-es", - }, - { - name: "new pod with nodespec name", - args: args{ - podName: NewPodName(es.Name, v1alpha1.NodeSpec{Name: "foo"}), - }, - want: "elasticsearch-es-foo", - }, - } - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - if got := Basename(tt.args.podName); got != tt.want { - t.Errorf("Basename() = %v, want %v", got, tt.want) - } - }) - } -} From 57187c14ed3d7b598c576aff6e45c4c3d7e16aba Mon Sep 17 00:00:00 2001 From: sebgl Date: Mon, 5 Aug 2019 11:02:46 +0200 Subject: [PATCH 24/31] Remove useless services in transport certs generation --- .../elasticsearch/certificates/ca_reconcile.go | 1 - .../elasticsearch/certificates/transport/csr.go | 4 +--- .../elasticsearch/certificates/transport/csr_test.go | 5 ++--- .../certificates/transport/pod_secret.go | 8 +++----- .../certificates/transport/pod_secret_test.go | 2 -- .../elasticsearch/certificates/transport/reconcile.go | 3 +-- .../certificates/transport/transport_fixtures_test.go | 11 +---------- 7 files changed, 8 insertions(+), 26 deletions(-) diff --git a/operators/pkg/controller/elasticsearch/certificates/ca_reconcile.go b/operators/pkg/controller/elasticsearch/certificates/ca_reconcile.go index a2c638e778..498dea8569 100644 --- a/operators/pkg/controller/elasticsearch/certificates/ca_reconcile.go +++ b/operators/pkg/controller/elasticsearch/certificates/ca_reconcile.go @@ -112,7 +112,6 @@ func Reconcile( scheme, transportCA, es, - services, certRotation, ) if results.WithResult(result).WithError(err).HasError() { diff --git a/operators/pkg/controller/elasticsearch/certificates/transport/csr.go b/operators/pkg/controller/elasticsearch/certificates/transport/csr.go index b13e5cc3a4..bea7b8cc5c 100644 --- a/operators/pkg/controller/elasticsearch/certificates/transport/csr.go +++ b/operators/pkg/controller/elasticsearch/certificates/transport/csr.go @@ -22,11 +22,10 @@ import ( func createValidatedCertificateTemplate( pod corev1.Pod, cluster v1alpha1.Elasticsearch, - svcs []corev1.Service, csr *x509.CertificateRequest, certValidity time.Duration, ) (*certificates.ValidatedCertificateTemplate, error) { - generalNames, err := buildGeneralNames(cluster, svcs, pod) + generalNames, err := buildGeneralNames(cluster, pod) if err != nil { return nil, err } @@ -64,7 +63,6 @@ func createValidatedCertificateTemplate( func buildGeneralNames( cluster v1alpha1.Elasticsearch, - svcs []corev1.Service, pod corev1.Pod, ) ([]certificates.GeneralName, error) { podIP := net.ParseIP(pod.Status.PodIP) diff --git a/operators/pkg/controller/elasticsearch/certificates/transport/csr_test.go b/operators/pkg/controller/elasticsearch/certificates/transport/csr_test.go index 484c705e6a..738457d5ed 100644 --- a/operators/pkg/controller/elasticsearch/certificates/transport/csr_test.go +++ b/operators/pkg/controller/elasticsearch/certificates/transport/csr_test.go @@ -36,7 +36,7 @@ func Test_createValidatedCertificateTemplate(t *testing.T) { cn := "test-pod-name.node.test-es-name.test-namespace.es.local" validatedCert, err := createValidatedCertificateTemplate( - testPod, testES, []corev1.Service{testSvc}, testCSR, certificates.DefaultCertValidity, + testPod, testES, testCSR, certificates.DefaultCertValidity, ) require.NoError(t, err) @@ -74,7 +74,6 @@ func Test_buildGeneralNames(t *testing.T) { type args struct { cluster v1alpha1.Elasticsearch - svcs []corev1.Service pod corev1.Pod } tests := []struct { @@ -98,7 +97,7 @@ func Test_buildGeneralNames(t *testing.T) { } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - got, err := buildGeneralNames(tt.args.cluster, tt.args.svcs, tt.args.pod) + got, err := buildGeneralNames(tt.args.cluster, tt.args.pod) require.NoError(t, err) require.Equal(t, tt.want, got) }) diff --git a/operators/pkg/controller/elasticsearch/certificates/transport/pod_secret.go b/operators/pkg/controller/elasticsearch/certificates/transport/pod_secret.go index 5dce676960..19b5fb984a 100644 --- a/operators/pkg/controller/elasticsearch/certificates/transport/pod_secret.go +++ b/operators/pkg/controller/elasticsearch/certificates/transport/pod_secret.go @@ -33,7 +33,6 @@ func ensureTransportCertificatesSecretContentsForPod( es v1alpha1.Elasticsearch, secret *corev1.Secret, pod corev1.Pod, - svcs []corev1.Service, ca *certificates.CA, rotationParams certificates.RotationParams, ) error { @@ -62,7 +61,7 @@ func ensureTransportCertificatesSecretContentsForPod( secret.Data[PodKeyFileName(pod.Name)] = certificates.EncodePEMPrivateKey(*privateKey) } - if shouldIssueNewCertificate(es, *secret, pod, privateKey, svcs, ca, rotationParams.RotateBefore) { + if shouldIssueNewCertificate(es, *secret, pod, privateKey, ca, rotationParams.RotateBefore) { log.Info( "Issuing new certificate", "pod_name", pod.Name, @@ -80,7 +79,7 @@ func ensureTransportCertificatesSecretContentsForPod( } validatedCertificateTemplate, err := createValidatedCertificateTemplate( - pod, es, svcs, parsedCSR, rotationParams.Validity, + pod, es, parsedCSR, rotationParams.Validity, ) if err != nil { return err @@ -110,13 +109,12 @@ func shouldIssueNewCertificate( secret corev1.Secret, pod corev1.Pod, privateKey *rsa.PrivateKey, - svcs []corev1.Service, ca *certificates.CA, certReconcileBefore time.Duration, ) bool { certCommonName := buildCertificateCommonName(pod, es.Name, es.Namespace) - generalNames, err := buildGeneralNames(es, svcs, pod) + generalNames, err := buildGeneralNames(es, pod) if err != nil { log.Error(err, "Cannot create GeneralNames for the TLS certificate", "namespace", pod.Namespace, "pod_name", pod.Name) diff --git a/operators/pkg/controller/elasticsearch/certificates/transport/pod_secret_test.go b/operators/pkg/controller/elasticsearch/certificates/transport/pod_secret_test.go index c3bbad9d0d..f3356b22bd 100644 --- a/operators/pkg/controller/elasticsearch/certificates/transport/pod_secret_test.go +++ b/operators/pkg/controller/elasticsearch/certificates/transport/pod_secret_test.go @@ -95,7 +95,6 @@ func Test_shouldIssueNewCertificate(t *testing.T) { tt.args.secret, *tt.args.pod, testRSAPrivateKey, - []corev1.Service{testSvc}, testCA, tt.args.rotateBefore, ); got != tt.want { @@ -205,7 +204,6 @@ func Test_ensureTransportCertificatesSecretContentsForPod(t *testing.T) { testES, tt.secret, *tt.pod, - []corev1.Service{testSvc}, testCA, certificates.RotationParams{ Validity: certificates.DefaultCertValidity, diff --git a/operators/pkg/controller/elasticsearch/certificates/transport/reconcile.go b/operators/pkg/controller/elasticsearch/certificates/transport/reconcile.go index 2130a156b1..9d08d2ce2e 100644 --- a/operators/pkg/controller/elasticsearch/certificates/transport/reconcile.go +++ b/operators/pkg/controller/elasticsearch/certificates/transport/reconcile.go @@ -33,7 +33,6 @@ func ReconcileTransportCertificatesSecrets( scheme *runtime.Scheme, ca *certificates.CA, es v1alpha1.Elasticsearch, - services []corev1.Service, rotationParams certificates.RotationParams, ) (reconcile.Result, error) { log.Info("Reconciling transport certificate secrets", "namespace", es.Namespace, "es_name", es.Name) @@ -60,7 +59,7 @@ func ReconcileTransportCertificatesSecrets( } if err := ensureTransportCertificatesSecretContentsForPod( - es, secret, pod, services, ca, rotationParams, + es, secret, pod, ca, rotationParams, ); err != nil { return reconcile.Result{}, err } diff --git a/operators/pkg/controller/elasticsearch/certificates/transport/transport_fixtures_test.go b/operators/pkg/controller/elasticsearch/certificates/transport/transport_fixtures_test.go index e38a6e298f..15174fecf0 100644 --- a/operators/pkg/controller/elasticsearch/certificates/transport/transport_fixtures_test.go +++ b/operators/pkg/controller/elasticsearch/certificates/transport/transport_fixtures_test.go @@ -39,15 +39,6 @@ var ( PodIP: testIP, }, } - testSvc = corev1.Service{ - ObjectMeta: metav1.ObjectMeta{ - Name: "test-service", - Namespace: "default", - }, - Spec: corev1.ServiceSpec{ - ClusterIP: "2.2.3.3", - }, - } ) const ( @@ -98,7 +89,7 @@ func init() { } validatedCertificateTemplate, err = createValidatedCertificateTemplate( - testPod, testES, []corev1.Service{testSvc}, testCSR, certificates.DefaultCertValidity) + testPod, testES, testCSR, certificates.DefaultCertValidity) if err != nil { panic("Failed to create validated cert template:" + err.Error()) } From 478e791fb5f58bb1ae248d0dfdfcc634ca67f346 Mon Sep 17 00:00:00 2001 From: sebgl Date: Mon, 5 Aug 2019 11:08:20 +0200 Subject: [PATCH 25/31] Minor fixes in transport secrets logs and comments --- .../elasticsearch/certificates/transport/pod_secret.go | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/operators/pkg/controller/elasticsearch/certificates/transport/pod_secret.go b/operators/pkg/controller/elasticsearch/certificates/transport/pod_secret.go index 19b5fb984a..1809d11e13 100644 --- a/operators/pkg/controller/elasticsearch/certificates/transport/pod_secret.go +++ b/operators/pkg/controller/elasticsearch/certificates/transport/pod_secret.go @@ -8,6 +8,7 @@ import ( cryptorand "crypto/rand" "crypto/rsa" "crypto/x509" + "errors" "fmt" "reflect" "time" @@ -103,6 +104,7 @@ func ensureTransportCertificatesSecretContentsForPod( // - no certificate yet // - certificate has the wrong format // - certificate is invalid or expired +// - certificate has no SAN extra extension // - certificate SAN and IP does not match pod SAN and IP func shouldIssueNewCertificate( es v1alpha1.Elasticsearch, @@ -167,7 +169,7 @@ func shouldIssueNewCertificate( // compare actual vs. expected SANs expected, err := certificates.MarshalToSubjectAlternativeNamesData(generalNames) if err != nil { - log.Error(err, "Cannot marshal subject alternative names", + log.Error(err, "Cannot marshal subject alternative names, will issue new certificate", "namespace", pod.Namespace, "pod_name", pod.Name) return true } @@ -184,7 +186,8 @@ func shouldIssueNewCertificate( } } if !extraExtensionFound { - log.Info("SAN extra extension not found, should issue new certificate", + log.Error(errors.New("no SAN extra extension"), + "SAN extra extension not found, should issue new certificate", "namespace", pod.Namespace, "pod_name", pod.Name) return true } @@ -203,7 +206,7 @@ func extractTransportCert(secret corev1.Secret, pod corev1.Pod, commonName strin certs, err := certificates.ParsePEMCerts(certData) if err != nil { - log.Error(err, "Invalid certificate data found, issuing new certificate", + log.Error(err, "Invalid certificate data found", "namespace", pod.Namespace, "pod_name", pod.Name) return nil } From f87f4b42bd64174555cf43a3a5f9ab198a39500d Mon Sep 17 00:00:00 2001 From: sebgl Date: Mon, 5 Aug 2019 11:15:36 +0200 Subject: [PATCH 26/31] Update scheduledUpgrades only once per iteration --- operators/pkg/controller/elasticsearch/driver/upgrade.go | 1 - 1 file changed, 1 deletion(-) diff --git a/operators/pkg/controller/elasticsearch/driver/upgrade.go b/operators/pkg/controller/elasticsearch/driver/upgrade.go index 67201e6158..3a4d850da9 100644 --- a/operators/pkg/controller/elasticsearch/driver/upgrade.go +++ b/operators/pkg/controller/elasticsearch/driver/upgrade.go @@ -129,7 +129,6 @@ func (d *defaultDriver) doRollingUpgrade( if err := d.upgradeStatefulSetPartition(&statefulSets[i], partition); err != nil { return results.WithError(err) } - scheduledUpgrades++ } } return results From 8402c7bcbeb59d9300e74672da5d9a93e26eba69 Mon Sep 17 00:00:00 2001 From: sebgl Date: Mon, 5 Aug 2019 11:31:09 +0200 Subject: [PATCH 27/31] Remove basename comparisons in E2E tests --- operators/pkg/controller/elasticsearch/name/name.go | 2 -- operators/test/e2e/test/elasticsearch/checks_es.go | 7 +------ 2 files changed, 1 insertion(+), 8 deletions(-) diff --git a/operators/pkg/controller/elasticsearch/name/name.go b/operators/pkg/controller/elasticsearch/name/name.go index ad3a94fdda..106d49ea07 100644 --- a/operators/pkg/controller/elasticsearch/name/name.go +++ b/operators/pkg/controller/elasticsearch/name/name.go @@ -35,8 +35,6 @@ var ESNamer = name.Namer{ DefaultSuffixes: []string{"es"}, } -var esNoDefaultSuffixesNamer = ESNamer.WithDefaultSuffixes() - // StatefulSet returns the name of the StatefulSet corresponding to the given NodeSpec. func StatefulSet(esName string, nodeSpecName string) string { return ESNamer.Suffix(esName, nodeSpecName) diff --git a/operators/test/e2e/test/elasticsearch/checks_es.go b/operators/test/e2e/test/elasticsearch/checks_es.go index 976a17bbd5..04df4eac19 100644 --- a/operators/test/e2e/test/elasticsearch/checks_es.go +++ b/operators/test/e2e/test/elasticsearch/checks_es.go @@ -13,7 +13,6 @@ import ( "github.com/elastic/cloud-on-k8s/operators/pkg/apis/elasticsearch/v1alpha1" estype "github.com/elastic/cloud-on-k8s/operators/pkg/apis/elasticsearch/v1alpha1" "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/client" - "github.com/elastic/cloud-on-k8s/operators/pkg/controller/elasticsearch/name" "github.com/elastic/cloud-on-k8s/operators/test/e2e/test" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" @@ -146,8 +145,6 @@ func (e *esClusterChecks) CheckESNodesTopology(es estype.Elasticsearch) test.Ste return err } - podNameExample := name.NewPodName(es.Name, topoElem) - // ES returns a string, parse it as an int64, base10: cgroupMemoryLimitsInBytes, err := strconv.ParseInt( nodeStats.OS.CGroup.Memory.LimitInBytes, 10, 64, @@ -157,9 +154,7 @@ func (e *esClusterChecks) CheckESNodesTopology(es estype.Elasticsearch) test.Ste } if cfg.Node == nodeRoles && - compareMemoryLimit(topoElem, cgroupMemoryLimitsInBytes) && - // compare the base names of the pod and topology to ensure they're from the same nodespec - name.Basename(node.Name) == name.Basename(podNameExample) { + compareMemoryLimit(topoElem, cgroupMemoryLimitsInBytes) { // no need to match this topology anymore expectedTopology = append(expectedTopology[:i], expectedTopology[i+1:]...) break From b55bc084bfaebdac3ab3af9dd08ef12d691142fd Mon Sep 17 00:00:00 2001 From: sebgl Date: Mon, 5 Aug 2019 11:59:50 +0200 Subject: [PATCH 28/31] Run make generate for missing comment --- .../config/crds/elasticsearch_v1alpha1_elasticsearch.yaml | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/operators/config/crds/elasticsearch_v1alpha1_elasticsearch.yaml b/operators/config/crds/elasticsearch_v1alpha1_elasticsearch.yaml index 70c9306d9a..e0ac182491 100644 --- a/operators/config/crds/elasticsearch_v1alpha1_elasticsearch.yaml +++ b/operators/config/crds/elasticsearch_v1alpha1_elasticsearch.yaml @@ -120,8 +120,9 @@ spec: description: Config represents Elasticsearch configuration. type: object name: - description: Name is a logical name for this set of nodes. Used - as a part of the managed Elasticsearch node.name setting. + description: 'Name is a logical name for this set of nodes. Used + as a part of the managed Elasticsearch node.name setting. TODO: + refactor and explain name length conventions' maxLength: 19 pattern: '[a-zA-Z0-9-]+' type: string From bef28676b4cdfc1259d5e974186614af50b1fd0e Mon Sep 17 00:00:00 2001 From: sebgl Date: Mon, 5 Aug 2019 14:42:16 +0200 Subject: [PATCH 29/31] Work with the list of filtered ssets to update --- .../pkg/controller/elasticsearch/driver/upgrade.go | 7 +------ operators/pkg/controller/elasticsearch/sset/list.go | 11 ++++++----- 2 files changed, 7 insertions(+), 11 deletions(-) diff --git a/operators/pkg/controller/elasticsearch/driver/upgrade.go b/operators/pkg/controller/elasticsearch/driver/upgrade.go index 3a4d850da9..c70f892ec7 100644 --- a/operators/pkg/controller/elasticsearch/driver/upgrade.go +++ b/operators/pkg/controller/elasticsearch/driver/upgrade.go @@ -47,11 +47,6 @@ func (d *defaultDriver) doRollingUpgrade( ) *reconciler.Results { results := &reconciler.Results{} - if !statefulSets.RevisionUpdateScheduled() { - // nothing to upgrade - return results - } - // TODO: deal with multiple restarts at once, taking the changeBudget into account. // We'd need to stop checking cluster health and do something smarter, since cluster health green check // should be done **in between** restarts to make sense, which is pretty hard to do since we don't @@ -67,7 +62,7 @@ func (d *defaultDriver) doRollingUpgrade( maxMasterNodeUpgrades := 1 scheduledMasterNodeUpgrades := 0 - for i, statefulSet := range statefulSets { + for i, statefulSet := range statefulSets.ToUpdate() { // Inspect each pod, starting from the highest ordinal, and decrement the partition to allow // pod upgrades to go through, controlled by the StatefulSet controller. for partition := sset.GetUpdatePartition(statefulSet); partition >= 0; partition-- { diff --git a/operators/pkg/controller/elasticsearch/sset/list.go b/operators/pkg/controller/elasticsearch/sset/list.go index 0fd21839d6..84ff9f171a 100644 --- a/operators/pkg/controller/elasticsearch/sset/list.go +++ b/operators/pkg/controller/elasticsearch/sset/list.go @@ -48,14 +48,15 @@ func (l StatefulSetList) ObjectMetas() []metav1.ObjectMeta { return objs } -// RevisionUpdateScheduled returns true if at least one revision update is scheduled. -func (l StatefulSetList) RevisionUpdateScheduled() bool { +// ToUpdate filters the StatefulSetList to the ones having an update revision scheduled. +func (l StatefulSetList) ToUpdate() StatefulSetList { + toUpdate := StatefulSetList{} for _, s := range l { - if s.Status.UpdateRevision != "" && s.Status.UpdateRevision != s.Status.CurrentRevision { - return true + if s.Status.UpdateRevision != "" && (s.Status.UpdateRevision != s.Status.CurrentRevision) { + toUpdate = append(toUpdate, s) } } - return false + return toUpdate } // PodNames returns the names of the pods for all StatefulSets in the list. From ff405dcf5b85306bbebc8566c2368123b17ed40e Mon Sep 17 00:00:00 2001 From: sebgl Date: Wed, 7 Aug 2019 09:22:38 +0200 Subject: [PATCH 30/31] Clarify expectations comment --- operators/pkg/controller/elasticsearch/driver/nodes.go | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/operators/pkg/controller/elasticsearch/driver/nodes.go b/operators/pkg/controller/elasticsearch/driver/nodes.go index 437b67ab03..ee6ec1ee54 100644 --- a/operators/pkg/controller/elasticsearch/driver/nodes.go +++ b/operators/pkg/controller/elasticsearch/driver/nodes.go @@ -36,8 +36,10 @@ func (d *defaultDriver) reconcileNodeSpecs( if !d.Expectations.GenerationExpected(actualStatefulSets.ObjectMetas()...) { // Our cache of StatefulSets is out of date compared to previous reconciliation operations. - // This will probably lead to conflicting sset updates (which is ok), but also to - // conflicting ES calls (set/reset zen1/zen2/allocation excludes, etc.), which may not be ok. + // Continuing with the reconciliation at this point may lead to: + // - errors on rejected sset updates (conflict since cached resource out of date): that's ok + // - calling ES orchestration settings (zen1/zen2/allocation excludes) with wrong assumptions: that's not ok + // Hence we choose to abort the reconciliation early: will run again later with an updated cache. log.V(1).Info("StatefulSet cache out-of-date, re-queueing", "namespace", d.ES.Namespace, "es_name", d.ES.Name) return results.WithResult(defaultRequeue) } From f013f2412042da665af7d2e89380ceb08edf909a Mon Sep 17 00:00:00 2001 From: sebgl Date: Wed, 7 Aug 2019 09:22:52 +0200 Subject: [PATCH 31/31] Fix unit test name to match function name --- .../elasticsearch/version/zen1/compatibility_test.go | 4 ++-- .../elasticsearch/version/zen2/compatibility_test.go | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/operators/pkg/controller/elasticsearch/version/zen1/compatibility_test.go b/operators/pkg/controller/elasticsearch/version/zen1/compatibility_test.go index ef509df008..9916a7e3c4 100644 --- a/operators/pkg/controller/elasticsearch/version/zen1/compatibility_test.go +++ b/operators/pkg/controller/elasticsearch/version/zen1/compatibility_test.go @@ -25,7 +25,7 @@ func createStatefulSetWithVersion(version string) appsv1.StatefulSet { }}} } -func TestIsCompatibleForZen1(t *testing.T) { +func TestIsCompatibleWithZen1(t *testing.T) { tests := []struct { name string @@ -57,7 +57,7 @@ func TestIsCompatibleForZen1(t *testing.T) { } } -func TestAtLeastOneNodeCompatibleForZen1(t *testing.T) { +func TestAtLeastOneNodeCompatibleWithZen1(t *testing.T) { tests := []struct { name string statefulSets sset.StatefulSetList diff --git a/operators/pkg/controller/elasticsearch/version/zen2/compatibility_test.go b/operators/pkg/controller/elasticsearch/version/zen2/compatibility_test.go index 37dc0b1cb3..47b6f34afd 100644 --- a/operators/pkg/controller/elasticsearch/version/zen2/compatibility_test.go +++ b/operators/pkg/controller/elasticsearch/version/zen2/compatibility_test.go @@ -25,7 +25,7 @@ func createStatefulSetWithESVersion(version string) appsv1.StatefulSet { }}} } -func TestIsCompatibleForZen2(t *testing.T) { +func TestIsCompatibleWithZen2(t *testing.T) { tests := []struct { name string @@ -57,7 +57,7 @@ func TestIsCompatibleForZen2(t *testing.T) { } } -func TestAtLeastOneNodeCompatibleForZen2(t *testing.T) { +func TestAtLeastOneNodeCompatibleWithZen2(t *testing.T) { tests := []struct { name string statefulSets sset.StatefulSetList