From fb7726fd53d5ccdecb284bcc073cfe12ce503380 Mon Sep 17 00:00:00 2001 From: Peter Brachwitz Date: Wed, 15 Mar 2023 11:42:14 -0400 Subject: [PATCH 01/18] Call _nodes/shutdown from pre-stop hook --- .../elasticsearch/configmap/configmap.go | 8 +- .../elasticsearch/nodespec/defaults.go | 2 +- .../elasticsearch/nodespec/lifecycle_hook.go | 193 +++++++++++++++++- .../elasticsearch/nodespec/podspec_test.go | 2 +- .../elasticsearch/nodespec/volumes.go | 2 +- .../elasticsearch/user/predefined.go | 7 +- .../elasticsearch/user/predefined_test.go | 16 +- .../elasticsearch/user/reconcile_test.go | 6 +- pkg/controller/elasticsearch/user/roles.go | 9 +- pkg/controller/elasticsearch/volume/names.go | 4 +- 10 files changed, 224 insertions(+), 25 deletions(-) diff --git a/pkg/controller/elasticsearch/configmap/configmap.go b/pkg/controller/elasticsearch/configmap/configmap.go index c4ff823a1a..e3d97ebd8d 100644 --- a/pkg/controller/elasticsearch/configmap/configmap.go +++ b/pkg/controller/elasticsearch/configmap/configmap.go @@ -17,6 +17,7 @@ import ( "github.com/elastic/cloud-on-k8s/v2/pkg/controller/elasticsearch/initcontainer" "github.com/elastic/cloud-on-k8s/v2/pkg/controller/elasticsearch/label" "github.com/elastic/cloud-on-k8s/v2/pkg/controller/elasticsearch/nodespec" + "github.com/elastic/cloud-on-k8s/v2/pkg/controller/elasticsearch/services" "github.com/elastic/cloud-on-k8s/v2/pkg/utils/k8s" ) @@ -43,11 +44,16 @@ func ReconcileScriptsConfigMap(ctx context.Context, c k8s.Client, es esv1.Elasti return err } + preStopScript, err := nodespec.RenderPreStopHookScript(services.InternalServiceURL(es)) + if err != nil { + return err + } + scriptsConfigMap := NewConfigMapWithData( types.NamespacedName{Namespace: es.Namespace, Name: esv1.ScriptsConfigMap(es.Name)}, map[string]string{ nodespec.ReadinessProbeScriptConfigKey: nodespec.ReadinessProbeScript, - nodespec.PreStopHookScriptConfigKey: nodespec.PreStopHookScript, + nodespec.PreStopHookScriptConfigKey: preStopScript, initcontainer.PrepareFsScriptConfigKey: fsScript, initcontainer.SuspendScriptConfigKey: initcontainer.SuspendScript, initcontainer.SuspendedHostsFile: initcontainer.RenderSuspendConfiguration(es), diff --git a/pkg/controller/elasticsearch/nodespec/defaults.go b/pkg/controller/elasticsearch/nodespec/defaults.go index 750bb13e57..3ffd511190 100644 --- a/pkg/controller/elasticsearch/nodespec/defaults.go +++ b/pkg/controller/elasticsearch/nodespec/defaults.go @@ -45,7 +45,7 @@ var ( func DefaultEnvVars(httpCfg commonv1.HTTPConfig, headlessServiceName string) []corev1.EnvVar { return defaults.ExtendPodDownwardEnvVars( []corev1.EnvVar{ - {Name: settings.EnvProbePasswordPath, Value: path.Join(esvolume.ProbeUserSecretMountPath, user.ProbeUserName)}, + {Name: settings.EnvProbePasswordPath, Value: path.Join(esvolume.PodMountedUsersSecretMountPath, user.ProbeUserName)}, {Name: settings.EnvProbeUsername, Value: user.ProbeUserName}, {Name: settings.EnvReadinessProbeProtocol, Value: httpCfg.Protocol()}, {Name: settings.HeadlessServiceName, Value: headlessServiceName}, diff --git a/pkg/controller/elasticsearch/nodespec/lifecycle_hook.go b/pkg/controller/elasticsearch/nodespec/lifecycle_hook.go index 1191bdb089..a0b7d33608 100644 --- a/pkg/controller/elasticsearch/nodespec/lifecycle_hook.go +++ b/pkg/controller/elasticsearch/nodespec/lifecycle_hook.go @@ -5,10 +5,15 @@ package nodespec import ( + "bytes" "path" + "path/filepath" + "text/template" v1 "k8s.io/api/core/v1" + "github.com/elastic/cloud-on-k8s/v2/pkg/controller/elasticsearch/label" + "github.com/elastic/cloud-on-k8s/v2/pkg/controller/elasticsearch/user" "github.com/elastic/cloud-on-k8s/v2/pkg/controller/elasticsearch/volume" ) @@ -19,8 +24,22 @@ func NewPreStopHook() *v1.LifecycleHandler { } } +/* + - use probe user: + - need to elevate permissions + - if permission elevations fails because ES not operational: status quo + - maintenance issue: misleading user name/pw path + - add new user + - somewhat unnecessary, does not improve security posture + - can use meaningful name + - probably best compromise + - rename probe user and extend permissions + - transition issue: readiness probe will fail for existing nodes: this is a blocker +*/ + const PreStopHookScriptConfigKey = "pre-stop-hook-script.sh" -const PreStopHookScript = `#!/usr/bin/env bash + +var preStopHookScriptTemplate = template.Must(template.New("pre-stop").Parse(`#!/usr/bin/env bash set -euo pipefail @@ -36,5 +55,173 @@ set -euo pipefail # target the Pod IP before Elasticsearch stops. PRE_STOP_ADDITIONAL_WAIT_SECONDS=${PRE_STOP_ADDITIONAL_WAIT_SECONDS:=50} -sleep $PRE_STOP_ADDITIONAL_WAIT_SECONDS -` +# capture response bodies in a temp file for better error messages and to extract necessary information for subsequent requests +resp_body=$(mktemp) +trap "rm -f $resp_body" EXIT + +script_start=$(date +%s) + +# compute time in seconds since the given start time +function duration() { + local start=$1 + end=$(date +%s) + echo $((end-start)) +} + +# use DNS errors as a proxy to abort this script early if there is no chance of successful completion +# DNS errors are for example expected when the whole cluster including its service is being deleted +# and the service URL can no longer be resolved even though we still have running Pods. +max_dns_errors=2 +global_dns_error_cnt=0 + +function request() { + local status exit + status=$(curl -k -sS -o $resp_body -w "%{http_code}" "$@") + exit=$? + if [ "$exit" -ne 0 ] || [ "$status" -lt 200 ] || [ "$status" -gt 299 ]; then + # track curl DNS errors separately + if [ "$exit" -eq 6 ]; then ((global_dns_error_cnt++)); fi + echo $status $resp_body + return $exit + fi + global_dns_error_cnt=0 + return 0 +} + +function retry() { + local retries=$1 + shift + + local count=0 + until "$@"; do + exit=$? + wait=$((2 ** count)) + count=$((count + 1)) + if [ $global_dns_error_cnt -gt $max_dns_errors ]; then + error_exit "too many DNS errors, giving up" + fi + if [ $count -lt "$retries" ]; then + printf "Retry %s/%s exited %s, retrying in %s seconds...\n" "$count" "$retries" "$exit" "$wait" >&2 + sleep $wait + else + printf "Retry %s/%s exited %s, no more retries left.\n" "$count" "$retries" "$exit" >&2 + return $exit + fi + done + return 0 +} + +function error_exit() { + echo $1 1>&2 + exit 1 +} + +function delayed_exit() { + local elapsed=$(duration $script_start) + sleep $(($PRE_STOP_ADDITIONAL_WAIT_SECONDS - $elapsed)) + exit 0 +} + +function is_master(){ + labels="{{.LabelsFile}}" + grep 'master="true"' $labels +} + +function supports_node_shutdown() { + local version="$1" + version=${version#[vV]} + major="${version%%\.*}" + minor="${version#*.}" + minor="${minor%.*}" + patch="${version##*.}" + # node shutdown is supported as of 7.15.2 + if [ "$major" -lt 7 ] || ([ "$major" -eq 7 ] && [ "$minor" -eq 15 ] && [ "$patch" -lt 2 ]); then + return 1 + fi + return 0 +} + +version="" +if [[ -f "{{.LabelsFile}}" ]]; then + # get Elasticsearch version from the downward API + version=$(grep "{{.VersionLabelName}}" {{.LabelsFile}} | cut -d '=' -f 2) + # remove quotes + version=$(echo "${version}" | tr -d '"') +fi + +# if ES version does not support node shutdown exit early TODO bash regex +if ! supports_node_shutdown $version; then + delayed_exit +fi + +# setup basic auth if credentials are available TODO dedicated user? +if [ -f "{{.PreStopUserPasswordPath}}" ]; then + PROBE_PASSWORD=$(<{{.PreStopUserPasswordPath}}) + BASIC_AUTH="-u {{.PreStopUserName}}:${PROBE_PASSWORD}" +else + BASIC_AUTH='' +fi + +ES_URL={{.ServiceURL}} + +if is_master; then + retry 10 request -X POST "$ES_URL/_cluster/voting_config_exclusions?node_names=$POD_NAME" $BASIC_AUTH + # we ignore the error here and try to call at least node shutdown +fi + +echo "retrieving node ID" +retry 10 request -X GET "$ES_URL/_cat/nodes?full_id=true&h=id,name" $BASIC_AUTH +if [ "$?" -ne 0 ]; then + error_exit $status +fi + +NODE_ID=$(grep $POD_NAME $resp_body | cut -f 1 -d ' ') + +# check if there is an ongoing shutdown request +request -X GET $ES_URL/_nodes/$NODE_ID/shutdown $BASIC_AUTH +if grep -q -v '"nodes":\[\]' $resp_body; then + delayed_exit +fi + +echo "initiating node shutdown" +retry 10 request -X PUT $ES_URL/_nodes/$NODE_ID/shutdown $BASIC_AUTH -H 'Content-Type: application/json' -d' +{ + "type": "restart", + "reason": "pre-stop hook" +} +' +if [ "$?" -ne 0 ]; then + error_exit "Failed to call node shutdown API" $resp_body +fi + +while : +do + echo "waiting for node shutdown to complete" + request -X GET $ES_URL/_nodes/$NODE_ID/shutdown $BASIC_AUTH + if [ "$?" -ne 0 ]; then + continue + fi + if grep -q -v 'IN_PROGRESS\|STALLED' $resp_body; then + break + fi + sleep 10 +done + +delayed_exit +`)) + +func RenderPreStopHookScript(svcURL string) (string, error) { + vars := map[string]string{ + "PreStopUserName": user.PreStopUserName, + "PreStopUserPasswordPath": filepath.Join(volume.PodMountedUsersSecretMountPath, user.PreStopUserName), + // edge case: protocol change (http/https) combined with external node shutdown might not work out well due to + // script propagation delays. But it is not a legitimate production use case as users are not expected to change + // protocol on production systems + "ServiceURL": svcURL, + "LabelsFile": filepath.Join(volume.DownwardAPIMountPath, volume.LabelsFile), + "VersionLabelName": label.VersionLabelName, + } + var script bytes.Buffer + err := preStopHookScriptTemplate.Execute(&script, vars) + return script.String(), err +} diff --git a/pkg/controller/elasticsearch/nodespec/podspec_test.go b/pkg/controller/elasticsearch/nodespec/podspec_test.go index 9b8d3f25dc..a77fafeaf1 100644 --- a/pkg/controller/elasticsearch/nodespec/podspec_test.go +++ b/pkg/controller/elasticsearch/nodespec/podspec_test.go @@ -248,7 +248,7 @@ func TestBuildPodTemplateSpec(t *testing.T) { initContainerEnv := defaults.ExtendPodDownwardEnvVars( []corev1.EnvVar{ {Name: "my-env", Value: "my-value"}, - {Name: settings.EnvProbePasswordPath, Value: path.Join(esvolume.ProbeUserSecretMountPath, user.ProbeUserName)}, + {Name: settings.EnvProbePasswordPath, Value: path.Join(esvolume.PodMountedUsersSecretMountPath, user.ProbeUserName)}, {Name: settings.EnvProbeUsername, Value: user.ProbeUserName}, {Name: settings.EnvReadinessProbeProtocol, Value: sampleES.Spec.HTTP.Protocol()}, {Name: settings.HeadlessServiceName, Value: HeadlessServiceName(esv1.StatefulSet(sampleES.Name, nodeSet.Name))}, diff --git a/pkg/controller/elasticsearch/nodespec/volumes.go b/pkg/controller/elasticsearch/nodespec/volumes.go index 81bc168380..d8a51e1fb9 100644 --- a/pkg/controller/elasticsearch/nodespec/volumes.go +++ b/pkg/controller/elasticsearch/nodespec/volumes.go @@ -29,7 +29,7 @@ func buildVolumes( configVolume := settings.ConfigSecretVolume(esv1.StatefulSet(esName, nodeSpec.Name)) probeSecret := volume.NewSelectiveSecretVolumeWithMountPath( esv1.InternalUsersSecret(esName), esvolume.ProbeUserVolumeName, - esvolume.ProbeUserSecretMountPath, []string{user.ProbeUserName}, + esvolume.PodMountedUsersSecretMountPath, []string{user.ProbeUserName, user.PreStopUserName}, ) httpCertificatesVolume := volume.NewSecretVolumeWithMountPath( certificates.InternalCertsSecretName(esv1.ESNamer, esName), diff --git a/pkg/controller/elasticsearch/user/predefined.go b/pkg/controller/elasticsearch/user/predefined.go index b2f7fe719a..6c08a6c0b1 100644 --- a/pkg/controller/elasticsearch/user/predefined.go +++ b/pkg/controller/elasticsearch/user/predefined.go @@ -29,10 +29,12 @@ const ( // ControllerUserName is the controller user to interact with ES. ControllerUserName = "elastic-internal" - // ProbeUserName is used for the Elasticsearch readiness probe. - ProbeUserName = "elastic-internal-probe" // MonitoringUserName is used for the Elasticsearch monitoring. MonitoringUserName = "elastic-internal-monitoring" + // PreStopUserName is used for API interactions from the pre-stop Pod lifecycle hook + PreStopUserName = "elastic-internal-pre-stop" + // ProbeUserName is used for the Elasticsearch readiness probe. + ProbeUserName = "elastic-internal-probe" ) // reconcileElasticUser reconciles a single secret holding the "elastic" user password. @@ -84,6 +86,7 @@ func reconcileInternalUsers( existingFileRealm, users{ {Name: ControllerUserName, Roles: []string{SuperUserBuiltinRole}}, + {Name: PreStopUserName, Roles: []string{ClusterManageRole}}, {Name: ProbeUserName, Roles: []string{ProbeUserRole}}, {Name: MonitoringUserName, Roles: []string{RemoteMonitoringCollectorBuiltinRole}}, }, diff --git a/pkg/controller/elasticsearch/user/predefined_test.go b/pkg/controller/elasticsearch/user/predefined_test.go index d60946b76e..8845ad4de4 100644 --- a/pkg/controller/elasticsearch/user/predefined_test.go +++ b/pkg/controller/elasticsearch/user/predefined_test.go @@ -205,8 +205,8 @@ func Test_reconcileInternalUsers(t *testing.T) { // passwords and hashes should be reused require.Equal(t, []byte("controllerUserPassword"), u[0].Password) require.Equal(t, []byte("$2a$10$lUuxZpa.ByS.Tid3PcMII.PrELwGjti3Mx1WRT0itwy.Ajpf.BsEG"), u[0].PasswordHash) - require.Equal(t, []byte("probeUserPassword"), u[1].Password) - require.Equal(t, []byte("$2a$10$8.9my2W7FVDqDnh.E1RwouN5RzkZGulQ3ZMgmoy3CH4xRvr5uYPbS"), u[1].PasswordHash) + require.Equal(t, []byte("probeUserPassword"), u[2].Password) + require.Equal(t, []byte("$2a$10$8.9my2W7FVDqDnh.E1RwouN5RzkZGulQ3ZMgmoy3CH4xRvr5uYPbS"), u[2].PasswordHash) }, }, { @@ -229,9 +229,9 @@ func Test_reconcileInternalUsers(t *testing.T) { require.Equal(t, []byte("controllerUserPassword"), u[0].Password) require.Equal(t, []byte("$2a$10$lUuxZpa.ByS.Tid3PcMII.PrELwGjti3Mx1WRT0itwy.Ajpf.BsEG"), u[0].PasswordHash) // password of probe user should be reused, but hash should be re-computed - require.Equal(t, []byte("probeUserPassword"), u[1].Password) + require.Equal(t, []byte("probeUserPassword"), u[2].Password) require.NotEmpty(t, u[1].PasswordHash) - require.NotEqual(t, "does-not-match-password", u[1].PasswordHash) + require.NotEqual(t, "does-not-match-password", u[2].PasswordHash) }, }, { @@ -254,8 +254,8 @@ func Test_reconcileInternalUsers(t *testing.T) { require.Equal(t, []byte("controllerUserPassword"), u[0].Password) require.Equal(t, []byte("$2a$10$lUuxZpa.ByS.Tid3PcMII.PrELwGjti3Mx1WRT0itwy.Ajpf.BsEG"), u[0].PasswordHash) // password of probe user should be reused, and hash should be re-computed - require.Equal(t, []byte("probeUserPassword"), u[1].Password) - require.NotEmpty(t, u[1].PasswordHash) + require.Equal(t, []byte("probeUserPassword"), u[2].Password) + require.NotEmpty(t, u[2].PasswordHash) }, }, } @@ -265,9 +265,9 @@ func Test_reconcileInternalUsers(t *testing.T) { got, err := reconcileInternalUsers(context.Background(), c, es, tt.existingFileRealm, testPasswordHasher) require.NoError(t, err) // check returned users - require.Len(t, got, 3) + require.Len(t, got, 4) controllerUser := got[0] - probeUser := got[1] + probeUser := got[2] // names and roles are always the same require.Equal(t, ControllerUserName, controllerUser.Name) require.Equal(t, []string{SuperUserBuiltinRole}, controllerUser.Roles) diff --git a/pkg/controller/elasticsearch/user/reconcile_test.go b/pkg/controller/elasticsearch/user/reconcile_test.go index 3b8b925679..5dd147c620 100644 --- a/pkg/controller/elasticsearch/user/reconcile_test.go +++ b/pkg/controller/elasticsearch/user/reconcile_test.go @@ -89,13 +89,13 @@ func Test_aggregateFileRealm(t *testing.T) { require.NoError(t, err) require.NotEmpty(t, controllerUser.Password) actualUsers := fileRealm.UserNames() - require.ElementsMatch(t, []string{"elastic", "elastic-internal", "elastic-internal-probe", "elastic-internal-monitoring", "user1", "user2", "user3"}, actualUsers) + require.ElementsMatch(t, []string{"elastic", "elastic-internal", "elastic-internal-pre-stop", "elastic-internal-probe", "elastic-internal-monitoring", "user1", "user2", "user3"}, actualUsers) } func Test_aggregateRoles(t *testing.T) { c := k8s.NewFakeClient(sampleUserProvidedRolesSecret...) roles, err := aggregateRoles(context.Background(), c, sampleEsWithAuth, initDynamicWatches(), record.NewFakeRecorder(10)) require.NoError(t, err) - require.Len(t, roles, 52) - require.Contains(t, roles, ProbeUserRole, "role1", "role2") + require.Len(t, roles, 53) + require.Contains(t, roles, ProbeUserRole, ClusterManageRole, "role1", "role2") } diff --git a/pkg/controller/elasticsearch/user/roles.go b/pkg/controller/elasticsearch/user/roles.go index 4688dea3b1..80bafd8dcb 100644 --- a/pkg/controller/elasticsearch/user/roles.go +++ b/pkg/controller/elasticsearch/user/roles.go @@ -7,10 +7,10 @@ package user import ( "fmt" + "gopkg.in/yaml.v3" + beatv1beta1 "github.com/elastic/cloud-on-k8s/v2/pkg/apis/beat/v1beta1" esclient "github.com/elastic/cloud-on-k8s/v2/pkg/controller/elasticsearch/client" - - "gopkg.in/yaml.v3" ) const ( @@ -19,6 +19,8 @@ const ( // SuperUserBuiltinRole is the name of the built-in superuser role. SuperUserBuiltinRole = "superuser" + // ClusterManageRole is the name of a custom role to manage the cluster. + ClusterManageRole = "elastic-internal_cluster_manage" // ProbeUserRole is the name of the role used by the internal probe user. ProbeUserRole = "elastic_internal_probe_user" // RemoteMonitoringCollectorBuiltinRole is the name of the built-in remote_monitoring_collector role. @@ -58,7 +60,8 @@ const ( var ( // PredefinedRoles to create for internal needs. PredefinedRoles = RolesFileContent{ - ProbeUserRole: esclient.Role{Cluster: []string{"monitor"}}, + ProbeUserRole: esclient.Role{Cluster: []string{"monitor"}}, + ClusterManageRole: esclient.Role{Cluster: []string{"manage"}}, ApmUserRoleV6: esclient.Role{ Cluster: []string{"monitor", "manage_index_templates"}, Indices: []esclient.IndexRole{ diff --git a/pkg/controller/elasticsearch/volume/names.go b/pkg/controller/elasticsearch/volume/names.go index 547fcb080a..3205751f81 100644 --- a/pkg/controller/elasticsearch/volume/names.go +++ b/pkg/controller/elasticsearch/volume/names.go @@ -6,8 +6,8 @@ package volume // Default values for the volume name and paths const ( - ProbeUserSecretMountPath = "/mnt/elastic-internal/probe-user" //nolint:gosec - ProbeUserVolumeName = "elastic-internal-probe-user" + PodMountedUsersSecretMountPath = "/mnt/elastic-internal/pod-mounted-users" //nolint:gosec + ProbeUserVolumeName = "elastic-internal-probe-user" ConfigVolumeMountPath = "/usr/share/elasticsearch/config" NodeTransportCertificatePathSegment = "node-transport-cert" From c7c97573b633e5a6a81539c1977794cd3d9870ce Mon Sep 17 00:00:00 2001 From: Peter Brachwitz Date: Fri, 17 Mar 2023 11:42:31 -0400 Subject: [PATCH 02/18] Remove design note --- .../elasticsearch/nodespec/lifecycle_hook.go | 13 ------------- 1 file changed, 13 deletions(-) diff --git a/pkg/controller/elasticsearch/nodespec/lifecycle_hook.go b/pkg/controller/elasticsearch/nodespec/lifecycle_hook.go index a0b7d33608..3171ae3390 100644 --- a/pkg/controller/elasticsearch/nodespec/lifecycle_hook.go +++ b/pkg/controller/elasticsearch/nodespec/lifecycle_hook.go @@ -24,19 +24,6 @@ func NewPreStopHook() *v1.LifecycleHandler { } } -/* - - use probe user: - - need to elevate permissions - - if permission elevations fails because ES not operational: status quo - - maintenance issue: misleading user name/pw path - - add new user - - somewhat unnecessary, does not improve security posture - - can use meaningful name - - probably best compromise - - rename probe user and extend permissions - - transition issue: readiness probe will fail for existing nodes: this is a blocker -*/ - const PreStopHookScriptConfigKey = "pre-stop-hook-script.sh" var preStopHookScriptTemplate = template.Must(template.New("pre-stop").Parse(`#!/usr/bin/env bash From 6110ca36fa8f6c6e4accc88b53a03185cbb00b45 Mon Sep 17 00:00:00 2001 From: Peter Brachwitz Date: Fri, 17 Mar 2023 12:16:24 -0400 Subject: [PATCH 03/18] Document and add overrides via env vars --- .../elasticsearch/prestop.asciidoc | 7 +++++++ pkg/controller/elasticsearch/nodespec/lifecycle_hook.go | 9 +++++++-- 2 files changed, 14 insertions(+), 2 deletions(-) diff --git a/docs/orchestrating-elastic-stack-applications/elasticsearch/prestop.asciidoc b/docs/orchestrating-elastic-stack-applications/elasticsearch/prestop.asciidoc index 5fd77771d8..bd36bab6c1 100644 --- a/docs/orchestrating-elastic-stack-applications/elasticsearch/prestop.asciidoc +++ b/docs/orchestrating-elastic-stack-applications/elasticsearch/prestop.asciidoc @@ -36,3 +36,10 @@ spec: - name: PRE_STOP_ADDITIONAL_WAIT_SECONDS value: "5" ---- + +The pre-stop lifecycle hook also tries to gracefully shut down the Elasticsearch node in case of a termination that is not caused by the ECK operator. Examples of such terminations could be Kubernetes node maintenance or a Kubernetes upgrade. In these cases the script will try to interact with the Elasticsearch API to notify Elasticsearch of the impending termination of the node. The intent is to avoid relocation and recovery of shards while the Elasticsearch node is only temporarily unavailable. + +This is done on a best effort basis. In particular requests to an Elasticsearch cluster already in the process of shutting down might fail if the Kubernetes service has already been removed. +The script allows for `PRE_STOP_MAX_DNS_ERRORS` which default to 2 before giving up. + +When using local persistent volumes a different behaviour might be desirable because the Elasticsearch node's associated storage will not be available anymore on the new Kuberenetes node. `PRE_STOP_SHUTDOWN_TYPE` allows to override the default shutdown type to one of the link:https://www.elastic.co/guide/en/elasticsearch/reference/current/put-shutdown.html[possible values]. Please be aware that setting it to anything other than `restart` might mean that the pre-stop hook will run longer than `terminationGracePeriodSeconds` of the Pod while moving data out of the terminating Pod and will not be able to complete unless you also adjust that value in the `podTemplate`. \ No newline at end of file diff --git a/pkg/controller/elasticsearch/nodespec/lifecycle_hook.go b/pkg/controller/elasticsearch/nodespec/lifecycle_hook.go index 3171ae3390..2874f32f50 100644 --- a/pkg/controller/elasticsearch/nodespec/lifecycle_hook.go +++ b/pkg/controller/elasticsearch/nodespec/lifecycle_hook.go @@ -42,6 +42,11 @@ set -euo pipefail # target the Pod IP before Elasticsearch stops. PRE_STOP_ADDITIONAL_WAIT_SECONDS=${PRE_STOP_ADDITIONAL_WAIT_SECONDS:=50} +# PRE_STOP_SHUTDOWN_TYPE controls the type of shutdown that will be communicated to Elasticsearch. This should not be +# changed to anything but restart. Specifically setting remove can lead to extensive data migration that might exceed the +# terminationGracePeriodSeconds and lead to an incomplete shutdown. +shutdown_type=${PRE_STOP_SHUTDOWN_TYPE:=restart} + # capture response bodies in a temp file for better error messages and to extract necessary information for subsequent requests resp_body=$(mktemp) trap "rm -f $resp_body" EXIT @@ -58,7 +63,7 @@ function duration() { # use DNS errors as a proxy to abort this script early if there is no chance of successful completion # DNS errors are for example expected when the whole cluster including its service is being deleted # and the service URL can no longer be resolved even though we still have running Pods. -max_dns_errors=2 +max_dns_errors=${PRE_STOP_MAX_DNS_ERRORS:=2} global_dns_error_cnt=0 function request() { @@ -173,7 +178,7 @@ fi echo "initiating node shutdown" retry 10 request -X PUT $ES_URL/_nodes/$NODE_ID/shutdown $BASIC_AUTH -H 'Content-Type: application/json' -d' { - "type": "restart", + "type": "$shutdown_type", "reason": "pre-stop hook" } ' From 42b7f667959a2981220015a98f70e3c593b12050 Mon Sep 17 00:00:00 2001 From: Peter Brachwitz Date: Fri, 17 Mar 2023 12:36:34 -0400 Subject: [PATCH 04/18] Exit code on error and templating --- .../elasticsearch/nodespec/lifecycle_hook.go | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/pkg/controller/elasticsearch/nodespec/lifecycle_hook.go b/pkg/controller/elasticsearch/nodespec/lifecycle_hook.go index 2874f32f50..09ac3ba8f3 100644 --- a/pkg/controller/elasticsearch/nodespec/lifecycle_hook.go +++ b/pkg/controller/elasticsearch/nodespec/lifecycle_hook.go @@ -73,6 +73,8 @@ function request() { if [ "$exit" -ne 0 ] || [ "$status" -lt 200 ] || [ "$status" -gt 299 ]; then # track curl DNS errors separately if [ "$exit" -eq 6 ]; then ((global_dns_error_cnt++)); fi + # make sure we have a non-zero exit code in the presence of errors + if [ "$exit" -eq 0 ]; then exit=1; fi echo $status $resp_body return $exit fi @@ -176,12 +178,12 @@ if grep -q -v '"nodes":\[\]' $resp_body; then fi echo "initiating node shutdown" -retry 10 request -X PUT $ES_URL/_nodes/$NODE_ID/shutdown $BASIC_AUTH -H 'Content-Type: application/json' -d' +retry 10 request -X PUT $ES_URL/_nodes/$NODE_ID/shutdown $BASIC_AUTH -H 'Content-Type: application/json' -d" { - "type": "$shutdown_type", - "reason": "pre-stop hook" + \"type\": \"$shutdown_type\", + \"reason\": \"pre-stop hook\" } -' +" if [ "$?" -ne 0 ]; then error_exit "Failed to call node shutdown API" $resp_body fi From 3fa9381579fc997ea95b9ff43560297588b643fe Mon Sep 17 00:00:00 2001 From: Peter Brachwitz Date: Fri, 17 Mar 2023 12:41:42 -0400 Subject: [PATCH 05/18] indentation --- .../elasticsearch/nodespec/lifecycle_hook.go | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/pkg/controller/elasticsearch/nodespec/lifecycle_hook.go b/pkg/controller/elasticsearch/nodespec/lifecycle_hook.go index 09ac3ba8f3..d53ace7449 100644 --- a/pkg/controller/elasticsearch/nodespec/lifecycle_hook.go +++ b/pkg/controller/elasticsearch/nodespec/lifecycle_hook.go @@ -67,7 +67,7 @@ max_dns_errors=${PRE_STOP_MAX_DNS_ERRORS:=2} global_dns_error_cnt=0 function request() { - local status exit + local status exit status=$(curl -k -sS -o $resp_body -w "%{http_code}" "$@") exit=$? if [ "$exit" -ne 0 ] || [ "$status" -lt 200 ] || [ "$status" -gt 299 ]; then @@ -91,8 +91,8 @@ function retry() { exit=$? wait=$((2 ** count)) count=$((count + 1)) - if [ $global_dns_error_cnt -gt $max_dns_errors ]; then - error_exit "too many DNS errors, giving up" + if [ $global_dns_error_cnt -gt $max_dns_errors ]; then + error_exit "too many DNS errors, giving up" fi if [ $count -lt "$retries" ]; then printf "Retry %s/%s exited %s, retrying in %s seconds...\n" "$count" "$retries" "$exit" "$wait" >&2 @@ -111,9 +111,9 @@ function error_exit() { } function delayed_exit() { - local elapsed=$(duration $script_start) + local elapsed=$(duration $script_start) sleep $(($PRE_STOP_ADDITIONAL_WAIT_SECONDS - $elapsed)) - exit 0 + exit 0 } function is_master(){ @@ -148,7 +148,7 @@ if ! supports_node_shutdown $version; then delayed_exit fi -# setup basic auth if credentials are available TODO dedicated user? +# setup basic auth if credentials are available if [ -f "{{.PreStopUserPasswordPath}}" ]; then PROBE_PASSWORD=$(<{{.PreStopUserPasswordPath}}) BASIC_AUTH="-u {{.PreStopUserName}}:${PROBE_PASSWORD}" From 429dbfb90c0612e28c86f6e9b0330b8e46abac6d Mon Sep 17 00:00:00 2001 From: Peter Brachwitz Date: Fri, 17 Mar 2023 12:45:58 -0400 Subject: [PATCH 06/18] more indentation --- .../elasticsearch/nodespec/lifecycle_hook.go | 60 +++++++++---------- 1 file changed, 30 insertions(+), 30 deletions(-) diff --git a/pkg/controller/elasticsearch/nodespec/lifecycle_hook.go b/pkg/controller/elasticsearch/nodespec/lifecycle_hook.go index d53ace7449..164265c1e2 100644 --- a/pkg/controller/elasticsearch/nodespec/lifecycle_hook.go +++ b/pkg/controller/elasticsearch/nodespec/lifecycle_hook.go @@ -55,9 +55,9 @@ script_start=$(date +%s) # compute time in seconds since the given start time function duration() { - local start=$1 - end=$(date +%s) - echo $((end-start)) + local start=$1 + end=$(date +%s) + echo $((end-start)) } # use DNS errors as a proxy to abort this script early if there is no chance of successful completion @@ -67,19 +67,19 @@ max_dns_errors=${PRE_STOP_MAX_DNS_ERRORS:=2} global_dns_error_cnt=0 function request() { - local status exit - status=$(curl -k -sS -o $resp_body -w "%{http_code}" "$@") - exit=$? - if [ "$exit" -ne 0 ] || [ "$status" -lt 200 ] || [ "$status" -gt 299 ]; then - # track curl DNS errors separately - if [ "$exit" -eq 6 ]; then ((global_dns_error_cnt++)); fi - # make sure we have a non-zero exit code in the presence of errors - if [ "$exit" -eq 0 ]; then exit=1; fi - echo $status $resp_body - return $exit - fi - global_dns_error_cnt=0 - return 0 + local status exit + status=$(curl -k -sS -o $resp_body -w "%{http_code}" "$@") + exit=$? + if [ "$exit" -ne 0 ] || [ "$status" -lt 200 ] || [ "$status" -gt 299 ]; then + # track curl DNS errors separately + if [ "$exit" -eq 6 ]; then ((global_dns_error_cnt++)); fi + # make sure we have a non-zero exit code in the presence of errors + if [ "$exit" -eq 0 ]; then exit=1; fi + echo $status $resp_body + return $exit + fi + global_dns_error_cnt=0 + return 0 } function retry() { @@ -111,9 +111,9 @@ function error_exit() { } function delayed_exit() { - local elapsed=$(duration $script_start) - sleep $(($PRE_STOP_ADDITIONAL_WAIT_SECONDS - $elapsed)) - exit 0 + local elapsed=$(duration $script_start) + sleep $(($PRE_STOP_ADDITIONAL_WAIT_SECONDS - $elapsed)) + exit 0 } function is_master(){ @@ -122,17 +122,17 @@ function is_master(){ } function supports_node_shutdown() { - local version="$1" - version=${version#[vV]} - major="${version%%\.*}" - minor="${version#*.}" - minor="${minor%.*}" - patch="${version##*.}" - # node shutdown is supported as of 7.15.2 - if [ "$major" -lt 7 ] || ([ "$major" -eq 7 ] && [ "$minor" -eq 15 ] && [ "$patch" -lt 2 ]); then - return 1 - fi - return 0 + local version="$1" + version=${version#[vV]} + major="${version%%\.*}" + minor="${version#*.}" + minor="${minor%.*}" + patch="${version##*.}" + # node shutdown is supported as of 7.15.2 + if [ "$major" -lt 7 ] || ([ "$major" -eq 7 ] && [ "$minor" -eq 15 ] && [ "$patch" -lt 2 ]); then + return 1 + fi + return 0 } version="" From 428b883230deb5cd61f515df0cf97890f2edd698 Mon Sep 17 00:00:00 2001 From: Peter Brachwitz Date: Fri, 17 Mar 2023 12:47:28 -0400 Subject: [PATCH 07/18] left-over TODO removed --- pkg/controller/elasticsearch/nodespec/lifecycle_hook.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pkg/controller/elasticsearch/nodespec/lifecycle_hook.go b/pkg/controller/elasticsearch/nodespec/lifecycle_hook.go index 164265c1e2..9eb9d34a7a 100644 --- a/pkg/controller/elasticsearch/nodespec/lifecycle_hook.go +++ b/pkg/controller/elasticsearch/nodespec/lifecycle_hook.go @@ -143,7 +143,7 @@ if [[ -f "{{.LabelsFile}}" ]]; then version=$(echo "${version}" | tr -d '"') fi -# if ES version does not support node shutdown exit early TODO bash regex +# if ES version does not support node shutdown exit early if ! supports_node_shutdown $version; then delayed_exit fi From c626900bc655d7ff0a171913958bec85ebc571fd Mon Sep 17 00:00:00 2001 From: Peter Brachwitz Date: Fri, 17 Mar 2023 18:13:41 -0400 Subject: [PATCH 08/18] Add missing e2e secret check update --- test/e2e/test/elasticsearch/checks_k8s.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/e2e/test/elasticsearch/checks_k8s.go b/test/e2e/test/elasticsearch/checks_k8s.go index b0cc7b114b..889e15c17f 100644 --- a/test/e2e/test/elasticsearch/checks_k8s.go +++ b/test/e2e/test/elasticsearch/checks_k8s.go @@ -126,7 +126,7 @@ func CheckSecrets(b Builder, k *test.K8sClient) test.Step { }, { Name: esName + "-es-internal-users", - Keys: []string{"elastic-internal", "elastic-internal-monitoring", "elastic-internal-probe"}, + Keys: []string{"elastic-internal", "elastic-internal-monitoring", "elastic-internal-pre-stop", "elastic-internal-probe"}, Labels: map[string]string{ "common.k8s.elastic.co/type": "elasticsearch", "eck.k8s.elastic.co/credentials": "true", From 1bc0e91effbc5c7f1daba7d2ad927dc3449b1904 Mon Sep 17 00:00:00 2001 From: Peter Brachwitz Date: Mon, 20 Mar 2023 15:52:55 +0100 Subject: [PATCH 09/18] review/shellcheck input --- .../elasticsearch/nodespec/lifecycle_hook.go | 51 +++++++++++-------- 1 file changed, 31 insertions(+), 20 deletions(-) diff --git a/pkg/controller/elasticsearch/nodespec/lifecycle_hook.go b/pkg/controller/elasticsearch/nodespec/lifecycle_hook.go index 9eb9d34a7a..8ba9f8be30 100644 --- a/pkg/controller/elasticsearch/nodespec/lifecycle_hook.go +++ b/pkg/controller/elasticsearch/nodespec/lifecycle_hook.go @@ -49,6 +49,7 @@ shutdown_type=${PRE_STOP_SHUTDOWN_TYPE:=restart} # capture response bodies in a temp file for better error messages and to extract necessary information for subsequent requests resp_body=$(mktemp) +# shellcheck disable=SC2064 trap "rm -f $resp_body" EXIT script_start=$(date +%s) @@ -68,14 +69,14 @@ global_dns_error_cnt=0 function request() { local status exit - status=$(curl -k -sS -o $resp_body -w "%{http_code}" "$@") + status=$(curl -k -sS -o "$resp_body" -w "%{http_code}" "$@") exit=$? if [ "$exit" -ne 0 ] || [ "$status" -lt 200 ] || [ "$status" -gt 299 ]; then # track curl DNS errors separately if [ "$exit" -eq 6 ]; then ((global_dns_error_cnt++)); fi # make sure we have a non-zero exit code in the presence of errors if [ "$exit" -eq 0 ]; then exit=1; fi - echo $status $resp_body + echo "$status" "$resp_body" return $exit fi global_dns_error_cnt=0 @@ -91,7 +92,7 @@ function retry() { exit=$? wait=$((2 ** count)) count=$((count + 1)) - if [ $global_dns_error_cnt -gt $max_dns_errors ]; then + if [ $global_dns_error_cnt -gt "$max_dns_errors" ]; then error_exit "too many DNS errors, giving up" fi if [ $count -lt "$retries" ]; then @@ -105,19 +106,28 @@ function retry() { return 0 } +function log() { + local timestamp + timestamp=$(date --iso-8601=seconds) + echo "{\"@timestamp\": \"${timestamp}\", \"message\": \"$1\", \"ecs.version\": \"1.2.0\", \"event.dataset\": \"elasticsearch.pre-stop-hook\"}" | tee /proc/1/fd/2 2> /dev/null +} + function error_exit() { - echo $1 1>&2 + log "$1" exit 1 } function delayed_exit() { - local elapsed=$(duration $script_start) - sleep $(($PRE_STOP_ADDITIONAL_WAIT_SECONDS - $elapsed)) + local elapsed + elapsed=$(duration "$script_start") + local remaining=$((PRE_STOP_ADDITIONAL_WAIT_SECONDS - elapsed)) + log "delaying termination for $remaining seconds" + sleep $remaining exit 0 } function is_master(){ - labels="{{.LabelsFile}}" + local labels="{{.LabelsFile}}" grep 'master="true"' $labels } @@ -129,7 +139,7 @@ function supports_node_shutdown() { minor="${minor%.*}" patch="${version##*.}" # node shutdown is supported as of 7.15.2 - if [ "$major" -lt 7 ] || ([ "$major" -eq 7 ] && [ "$minor" -eq 15 ] && [ "$patch" -lt 2 ]); then + if [ "$major" -lt 7 ] || { [ "$major" -eq 7 ] && [ "$minor" -eq 15 ] && [ "$patch" -lt 2 ]; }; then return 1 fi return 0 @@ -144,7 +154,7 @@ if [[ -f "{{.LabelsFile}}" ]]; then fi # if ES version does not support node shutdown exit early -if ! supports_node_shutdown $version; then +if ! supports_node_shutdown "$version"; then delayed_exit fi @@ -163,39 +173,40 @@ if is_master; then # we ignore the error here and try to call at least node shutdown fi -echo "retrieving node ID" +log "retrieving node ID" retry 10 request -X GET "$ES_URL/_cat/nodes?full_id=true&h=id,name" $BASIC_AUTH if [ "$?" -ne 0 ]; then - error_exit $status + error_exit "$status" fi -NODE_ID=$(grep $POD_NAME $resp_body | cut -f 1 -d ' ') +NODE_ID=$(grep "$POD_NAME" "$resp_body" | cut -f 1 -d ' ') # check if there is an ongoing shutdown request -request -X GET $ES_URL/_nodes/$NODE_ID/shutdown $BASIC_AUTH -if grep -q -v '"nodes":\[\]' $resp_body; then +request -X GET $ES_URL/_nodes/"$NODE_ID"/shutdown $BASIC_AUTH +if grep -q -v '"nodes":\[\]' "$resp_body"; then + log "shutdown managed by ECK operator" delayed_exit fi -echo "initiating node shutdown" -retry 10 request -X PUT $ES_URL/_nodes/$NODE_ID/shutdown $BASIC_AUTH -H 'Content-Type: application/json' -d" +log "initiating node shutdown" +retry 10 request -X PUT $ES_URL/_nodes/"$NODE_ID"/shutdown $BASIC_AUTH -H 'Content-Type: application/json' -d" { \"type\": \"$shutdown_type\", \"reason\": \"pre-stop hook\" } " if [ "$?" -ne 0 ]; then - error_exit "Failed to call node shutdown API" $resp_body + error_exit "Failed to call node shutdown API" "$resp_body" fi while : do - echo "waiting for node shutdown to complete" - request -X GET $ES_URL/_nodes/$NODE_ID/shutdown $BASIC_AUTH + log "waiting for node shutdown to complete" + request -X GET $ES_URL/_nodes/"$NODE_ID"/shutdown $BASIC_AUTH if [ "$?" -ne 0 ]; then continue fi - if grep -q -v 'IN_PROGRESS\|STALLED' $resp_body; then + if grep -q -v 'IN_PROGRESS\|STALLED' "$resp_body"; then break fi sleep 10 From 7f687a8fadae415ec4e0c21c7ce48bd42a95d0e3 Mon Sep 17 00:00:00 2001 From: Peter Brachwitz Date: Mon, 20 Mar 2023 15:54:54 +0100 Subject: [PATCH 10/18] update ECK docs to mention restart requirement --- docs/operating-eck/upgrading-eck.asciidoc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/operating-eck/upgrading-eck.asciidoc b/docs/operating-eck/upgrading-eck.asciidoc index ac1c3ec95c..d7173346b9 100644 --- a/docs/operating-eck/upgrading-eck.asciidoc +++ b/docs/operating-eck/upgrading-eck.asciidoc @@ -97,7 +97,7 @@ This will update the ECK installation to the latest binary and update the CRDs a Upgrading the operator results in a one-time update to existing managed resources in the cluster. This potentially triggers a rolling restart of pods by Kubernetes to apply those changes. The following list contains the ECK operator versions that would cause a rolling restart after they have been installed. - 1.6, 1.9, 2.0, 2.1, 2.2, 2.4, 2.5 + 1.6, 1.9, 2.0, 2.1, 2.2, 2.4, 2.5, 2.8 If you have a very large Elasticsearch cluster or multiple Elastic Stack deployments, this rolling restart might be disruptive or inconvenient. To have more control over when the pods belonging to a particular deployment should be restarted, you can <<{p}-exclude-resource,add an annotation>> to the corresponding resources to temporarily exclude them from being managed by the operator. When the time is convenient, you can remove the annotation and let the rolling restart go through. From 229b14c646114f2f34b715cb8971fef3ddcf7c57 Mon Sep 17 00:00:00 2001 From: Peter Brachwitz Date: Mon, 20 Mar 2023 16:31:51 +0100 Subject: [PATCH 11/18] replace echo with log --- pkg/controller/elasticsearch/nodespec/lifecycle_hook.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pkg/controller/elasticsearch/nodespec/lifecycle_hook.go b/pkg/controller/elasticsearch/nodespec/lifecycle_hook.go index 8ba9f8be30..5e13201289 100644 --- a/pkg/controller/elasticsearch/nodespec/lifecycle_hook.go +++ b/pkg/controller/elasticsearch/nodespec/lifecycle_hook.go @@ -76,7 +76,7 @@ function request() { if [ "$exit" -eq 6 ]; then ((global_dns_error_cnt++)); fi # make sure we have a non-zero exit code in the presence of errors if [ "$exit" -eq 0 ]; then exit=1; fi - echo "$status" "$resp_body" + log "$status" "$3" #by convention the third arg contains the URL return $exit fi global_dns_error_cnt=0 @@ -109,7 +109,7 @@ function retry() { function log() { local timestamp timestamp=$(date --iso-8601=seconds) - echo "{\"@timestamp\": \"${timestamp}\", \"message\": \"$1\", \"ecs.version\": \"1.2.0\", \"event.dataset\": \"elasticsearch.pre-stop-hook\"}" | tee /proc/1/fd/2 2> /dev/null + echo "{\"@timestamp\": \"${timestamp}\", \"message\": \"$@\", \"ecs.version\": \"1.2.0\", \"event.dataset\": \"elasticsearch.pre-stop-hook\"}" | tee /proc/1/fd/2 2> /dev/null } function error_exit() { From 81d7d16a05a74bece669d19c9bc91ac1d954cf3c Mon Sep 17 00:00:00 2001 From: Peter Brachwitz Date: Mon, 20 Mar 2023 21:17:27 +0100 Subject: [PATCH 12/18] Do not cancel pre-stop hook originated shutdowns --- .../elasticsearch/driver/downscale.go | 3 +- .../elasticsearch/driver/upgrade.go | 41 +++++++++++--- .../elasticsearch/migration/migrate_data.go | 2 +- .../elasticsearch/shutdown/interface.go | 6 +-- pkg/controller/elasticsearch/shutdown/node.go | 15 +++++- .../elasticsearch/shutdown/node_test.go | 53 ++++++++++++++++--- pkg/utils/k8s/k8sutils.go | 12 +++++ 7 files changed, 109 insertions(+), 23 deletions(-) diff --git a/pkg/controller/elasticsearch/driver/downscale.go b/pkg/controller/elasticsearch/driver/downscale.go index 5bcc88e1ce..ea3ec75e40 100644 --- a/pkg/controller/elasticsearch/driver/downscale.go +++ b/pkg/controller/elasticsearch/driver/downscale.go @@ -66,7 +66,8 @@ func HandleDownscale( // initiate shutdown of nodes that should be removed // if leaving nodes is empty this should cancel any ongoing shutdowns leavingNodes := leavingNodeNames(downscales) - if err := downscaleCtx.nodeShutdown.ReconcileShutdowns(downscaleCtx.parentCtx, leavingNodes); err != nil { + terminatingNodes := k8s.PodNames(k8s.TerminatingPods(actualPods)) + if err := downscaleCtx.nodeShutdown.ReconcileShutdowns(downscaleCtx.parentCtx, leavingNodes, terminatingNodes); err != nil { return results.WithError(err) } diff --git a/pkg/controller/elasticsearch/driver/upgrade.go b/pkg/controller/elasticsearch/driver/upgrade.go index d7aaa44eeb..fb8582188d 100644 --- a/pkg/controller/elasticsearch/driver/upgrade.go +++ b/pkg/controller/elasticsearch/driver/upgrade.go @@ -273,6 +273,14 @@ func podsToUpgrade( return toUpgrade, nil } +func terminatingPodNames(client k8s.Client, statefulSets sset.StatefulSetList) ([]string, error) { + pods, err := statefulSets.GetActualPods(client) + if err != nil { + return nil, err + } + return k8s.PodNames(k8s.TerminatingPods(pods)), nil +} + func doFlush(ctx context.Context, es esv1.Elasticsearch, esClient esclient.Client) error { log := ulog.FromContext(ctx) targetEsVersion, err := version.Parse(es.Spec.Version) @@ -319,17 +327,31 @@ func (d *defaultDriver) maybeCompleteNodeUpgrades( reason := fmt.Sprintf("Completing node upgrade: %s", reason) return results.WithReconciliationState(defaultRequeue.WithReason(reason)) } + + statefulSets, err := sset.RetrieveActualStatefulSets(d.Client, k8s.ExtractNamespacedName(&d.ES)) + if err != nil { + return results.WithError(err) + } + // Also make sure that when cleaning up node shutdowns we don't remove shutdown records for terminating Pods. + // The expectation mechanism covers planned spec changes for Pods. However, Pods might also be deleted due to external factors + // like Kubernetes node upgrades or manual admin intervention. We orchestrate node shutdown in these cases via a pre-stop hook + // and don't want to interrupt that process until it completes. This a best-effort attempt as observation of shutdown records in + // Elasticsearch and Pod deletion might not be in sync due to cache lag. + terminating, err := terminatingPodNames(d.Client, statefulSets) + if err != nil { + return results.WithError(err) + } + // once expectations are satisfied we can already delete shutdowns that are complete and where the node // is back in the cluster to avoid completed shutdowns from accumulating and affecting node availability calculations // in Elasticsearch for example for indices with `auto_expand_replicas` setting. if supportsNodeShutdown(esClient.Version()) { // clear all shutdowns of type restart that have completed - results = results.WithError(nodeShutdown.Clear(ctx, esclient.ShutdownComplete.Applies, nodeShutdown.OnlyNodesInCluster)) - } - - statefulSets, err := sset.RetrieveActualStatefulSets(d.Client, k8s.ExtractNamespacedName(&d.ES)) - if err != nil { - return results.WithError(err) + results = results.WithError(nodeShutdown.Clear(ctx, + esclient.ShutdownComplete.Applies, + nodeShutdown.OnlyNodesInCluster, + nodeShutdown.OnlyNonTerminatingNodes(terminating), + )) } // Make sure all nodes scheduled for upgrade are back into the cluster. @@ -352,7 +374,10 @@ func (d *defaultDriver) maybeCompleteNodeUpgrades( if supportsNodeShutdown(esClient.Version()) { // clear all shutdowns of type restart that have completed including those where the node is no longer in the cluster // or node state was lost due to an external event - results = results.WithError(nodeShutdown.Clear(ctx, esclient.ShutdownComplete.Applies)) + results = results.WithError(nodeShutdown.Clear(ctx, + esclient.ShutdownComplete.Applies, + nodeShutdown.OnlyNonTerminatingNodes(terminating), + )) } return results } @@ -414,7 +439,7 @@ func (ctx *upgradeCtx) requestNodeRestarts(podsToRestart []corev1.Pod) error { } // Note that ReconcileShutdowns would cancel ongoing shutdowns when called with no podNames // this is however not the case in the rolling upgrade logic where we exit early if no pod needs to be rotated. - return ctx.nodeShutdown.ReconcileShutdowns(ctx.parentCtx, podNames) + return ctx.nodeShutdown.ReconcileShutdowns(ctx.parentCtx, podNames, k8s.PodNames(k8s.TerminatingPods(ctx.currentPods))) } func (ctx *upgradeCtx) prepareClusterForNodeRestart(podsToUpgrade []corev1.Pod) error { diff --git a/pkg/controller/elasticsearch/migration/migrate_data.go b/pkg/controller/elasticsearch/migration/migrate_data.go index 9f01563c2c..11888181f2 100644 --- a/pkg/controller/elasticsearch/migration/migrate_data.go +++ b/pkg/controller/elasticsearch/migration/migrate_data.go @@ -34,7 +34,7 @@ func NewShardMigration(es esv1.Elasticsearch, c esclient.Client, s esclient.Shar } // ReconcileShutdowns migrates data away from the leaving nodes or removes any allocation filtering if no nodes are leaving. -func (sm *ShardMigration) ReconcileShutdowns(ctx context.Context, leavingNodes []string) error { +func (sm *ShardMigration) ReconcileShutdowns(ctx context.Context, leavingNodes, _ []string) error { return migrateData(ctx, sm.es, sm.c, leavingNodes) } diff --git a/pkg/controller/elasticsearch/shutdown/interface.go b/pkg/controller/elasticsearch/shutdown/interface.go index 11e3680125..1930188947 100644 --- a/pkg/controller/elasticsearch/shutdown/interface.go +++ b/pkg/controller/elasticsearch/shutdown/interface.go @@ -25,7 +25,7 @@ type NodeShutdownStatus struct { type Interface interface { // ReconcileShutdowns retrieves ongoing shutdowns and based on the given node names either cancels or creates new // shutdowns. - ReconcileShutdowns(ctx context.Context, leavingNodes []string) error + ReconcileShutdowns(ctx context.Context, leavingNodes []string, terminatingNodes []string) error // ShutdownStatus returns the current shutdown status for the given node. It returns an error if no shutdown is in // progress. ShutdownStatus(ctx context.Context, podName string) (NodeShutdownStatus, error) @@ -48,11 +48,11 @@ type observed struct { observer Observer } -func (o *observed) ReconcileShutdowns(ctx context.Context, leavingNodes []string) error { +func (o *observed) ReconcileShutdowns(ctx context.Context, leavingNodes []string, terminatingNodes []string) error { if o.observer != nil { o.observer.OnReconcileShutdowns(leavingNodes) } - return o.Interface.ReconcileShutdowns(ctx, leavingNodes) + return o.Interface.ReconcileShutdowns(ctx, leavingNodes, terminatingNodes) } func (o *observed) ShutdownStatus(ctx context.Context, podName string) (NodeShutdownStatus, error) { diff --git a/pkg/controller/elasticsearch/shutdown/node.go b/pkg/controller/elasticsearch/shutdown/node.go index 1f8bac24dd..3c24365565 100644 --- a/pkg/controller/elasticsearch/shutdown/node.go +++ b/pkg/controller/elasticsearch/shutdown/node.go @@ -79,13 +79,13 @@ func (ns *NodeShutdown) nodeInCluster(nodeID string) bool { // ReconcileShutdowns retrieves ongoing shutdowns and based on the given node names either cancels or creates new // shutdowns. -func (ns *NodeShutdown) ReconcileShutdowns(ctx context.Context, leavingNodes []string) error { +func (ns *NodeShutdown) ReconcileShutdowns(ctx context.Context, leavingNodes []string, terminatingNodes []string) error { if err := ns.initOnce(ctx); err != nil { return err } // cancel all ongoing shutdowns for the current shutdown type if len(leavingNodes) == 0 { - return ns.Clear(ctx) + return ns.Clear(ctx, ns.OnlyNonTerminatingNodes(terminatingNodes)) } for _, node := range leavingNodes { @@ -157,6 +157,17 @@ func (ns *NodeShutdown) OnlyNodesInCluster(s esclient.NodeShutdown) bool { return ns.nodeInCluster(s.NodeID) } +// OnlyNonTerminatingNodes is a function to generate a predicate to delete only those shutdowns which don't affect currently terminating Pods/ES nodes. +func (ns *NodeShutdown) OnlyNonTerminatingNodes(terminatingNodes []string) func(s esclient.NodeShutdown) bool { + terminatingNodeIDs := map[string]bool{} + for _, n := range terminatingNodes { + terminatingNodeIDs[ns.podToNodeID[n]] = true + } + return func(s esclient.NodeShutdown) bool { + return !terminatingNodeIDs[s.NodeID] + } +} + // Clear deletes shutdown requests matching the type of the NodeShutdown field typ and the given optional status. // Depending on the progress of the shutdown in question this means either a cancellation of the shutdown or a clean-up // after shutdown completion. diff --git a/pkg/controller/elasticsearch/shutdown/node_test.go b/pkg/controller/elasticsearch/shutdown/node_test.go index cdc1ffa7b8..a7a05bd7fc 100644 --- a/pkg/controller/elasticsearch/shutdown/node_test.go +++ b/pkg/controller/elasticsearch/shutdown/node_test.go @@ -336,9 +336,10 @@ func TestNodeShutdown_ShutdownStatus(t *testing.T) { func TestNodeShutdown_ReconcileShutdowns(t *testing.T) { type args struct { - typ esclient.ShutdownType - podToNodeID map[string]string - leavingNodes []string + typ esclient.ShutdownType + podToNodeID map[string]string + leavingNodes []string + terminatingNodes []string } tests := []struct { name string @@ -350,9 +351,10 @@ func TestNodeShutdown_ReconcileShutdowns(t *testing.T) { { name: "no node leaving", args: args{ - typ: esclient.Remove, - podToNodeID: nil, - leavingNodes: nil, + typ: esclient.Remove, + podToNodeID: nil, + leavingNodes: nil, + terminatingNodes: nil, }, fixtures: []string{ noShutdownFixture, @@ -367,7 +369,8 @@ func TestNodeShutdown_ReconcileShutdowns(t *testing.T) { podToNodeID: map[string]string{ "pod-1": "txXw-Kd2Q6K0PbYMAPzH-Q", }, - leavingNodes: []string{"pod-1"}, + leavingNodes: []string{"pod-1"}, + terminatingNodes: nil, }, fixtures: []string{ noShutdownFixture, @@ -426,6 +429,40 @@ func TestNodeShutdown_ReconcileShutdowns(t *testing.T) { wantErr: true, wantMethods: []string{"GET"}, }, + { + name: "should clean up shutdowns", + args: args{ + typ: esclient.Restart, + podToNodeID: map[string]string{ + "pod-1": "txXw-Kd2Q6K0PbYMAPzH-Q", + }, + leavingNodes: nil, + terminatingNodes: nil, + }, + fixtures: []string{ + singleRestartShutdownFixture, + ackFixture, + }, + wantErr: false, + wantMethods: []string{"GET", "DELETE"}, + }, + { + + name: "should not clean up shutdowns on terminating nodes", + args: args{ + typ: esclient.Restart, + podToNodeID: map[string]string{ + "pod-1": "txXw-Kd2Q6K0PbYMAPzH-Q", + }, + leavingNodes: nil, + terminatingNodes: []string{"pod-1"}, + }, + fixtures: []string{ + singleRestartShutdownFixture, + }, + wantErr: false, + wantMethods: []string{"GET"}, + }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { @@ -449,7 +486,7 @@ func TestNodeShutdown_ReconcileShutdowns(t *testing.T) { podToNodeID: tt.args.podToNodeID, log: log.Log.WithName("test"), } - if err := ns.ReconcileShutdowns(context.Background(), tt.args.leavingNodes); (err != nil) != tt.wantErr { + if err := ns.ReconcileShutdowns(context.Background(), tt.args.leavingNodes, tt.args.terminatingNodes); (err != nil) != tt.wantErr { t.Errorf("ReconcileShutdowns() error = %v, wantErr %v", err, tt.wantErr) } if !reflect.DeepEqual(methodsCalled, tt.wantMethods) { diff --git a/pkg/utils/k8s/k8sutils.go b/pkg/utils/k8s/k8sutils.go index a5649770d4..bd79cb5b86 100644 --- a/pkg/utils/k8s/k8sutils.go +++ b/pkg/utils/k8s/k8sutils.go @@ -74,6 +74,18 @@ func IsPodReady(pod corev1.Pod) bool { return conditionsTrue == 2 } +// TerminatingPods filters pods for Pods that are in the process of (graceful) termination. +func TerminatingPods(pods []corev1.Pod) []corev1.Pod { + var terminating []corev1.Pod //nolint:prealloc + for _, p := range pods { + if p.DeletionTimestamp.IsZero() { + continue + } + terminating = append(terminating, p) + } + return terminating +} + // PodsByName returns a map of pod names to pods func PodsByName(pods []corev1.Pod) map[string]corev1.Pod { podMap := make(map[string]corev1.Pod, len(pods)) From 9b4054fd58e359cff821a2d0205d5155ece8591b Mon Sep 17 00:00:00 2001 From: Peter Brachwitz Date: Mon, 20 Mar 2023 21:19:24 +0100 Subject: [PATCH 13/18] Do not set voting config exclusions in pre-stop hook --- .../elasticsearch/nodespec/lifecycle_hook.go | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/pkg/controller/elasticsearch/nodespec/lifecycle_hook.go b/pkg/controller/elasticsearch/nodespec/lifecycle_hook.go index 5e13201289..cdbc1069f8 100644 --- a/pkg/controller/elasticsearch/nodespec/lifecycle_hook.go +++ b/pkg/controller/elasticsearch/nodespec/lifecycle_hook.go @@ -126,11 +126,6 @@ function delayed_exit() { exit 0 } -function is_master(){ - local labels="{{.LabelsFile}}" - grep 'master="true"' $labels -} - function supports_node_shutdown() { local version="$1" version=${version#[vV]} @@ -168,11 +163,6 @@ fi ES_URL={{.ServiceURL}} -if is_master; then - retry 10 request -X POST "$ES_URL/_cluster/voting_config_exclusions?node_names=$POD_NAME" $BASIC_AUTH - # we ignore the error here and try to call at least node shutdown -fi - log "retrieving node ID" retry 10 request -X GET "$ES_URL/_cat/nodes?full_id=true&h=id,name" $BASIC_AUTH if [ "$?" -ne 0 ]; then From 7ff13722e279700ec22ab941b25ad4a5602e62c0 Mon Sep 17 00:00:00 2001 From: Peter Brachwitz Date: Mon, 20 Mar 2023 22:30:55 +0100 Subject: [PATCH 14/18] Update docs/orchestrating-elastic-stack-applications/elasticsearch/prestop.asciidoc Co-authored-by: Thibault Richard --- .../elasticsearch/prestop.asciidoc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/orchestrating-elastic-stack-applications/elasticsearch/prestop.asciidoc b/docs/orchestrating-elastic-stack-applications/elasticsearch/prestop.asciidoc index bd36bab6c1..5ea6e86d29 100644 --- a/docs/orchestrating-elastic-stack-applications/elasticsearch/prestop.asciidoc +++ b/docs/orchestrating-elastic-stack-applications/elasticsearch/prestop.asciidoc @@ -42,4 +42,4 @@ The pre-stop lifecycle hook also tries to gracefully shut down the Elasticsearch This is done on a best effort basis. In particular requests to an Elasticsearch cluster already in the process of shutting down might fail if the Kubernetes service has already been removed. The script allows for `PRE_STOP_MAX_DNS_ERRORS` which default to 2 before giving up. -When using local persistent volumes a different behaviour might be desirable because the Elasticsearch node's associated storage will not be available anymore on the new Kuberenetes node. `PRE_STOP_SHUTDOWN_TYPE` allows to override the default shutdown type to one of the link:https://www.elastic.co/guide/en/elasticsearch/reference/current/put-shutdown.html[possible values]. Please be aware that setting it to anything other than `restart` might mean that the pre-stop hook will run longer than `terminationGracePeriodSeconds` of the Pod while moving data out of the terminating Pod and will not be able to complete unless you also adjust that value in the `podTemplate`. \ No newline at end of file +When using local persistent volumes a different behaviour might be desirable because the Elasticsearch node's associated storage will not be available anymore on the new Kubernetes node. `PRE_STOP_SHUTDOWN_TYPE` allows to override the default shutdown type to one of the link:https://www.elastic.co/guide/en/elasticsearch/reference/current/put-shutdown.html[possible values]. Please be aware that setting it to anything other than `restart` might mean that the pre-stop hook will run longer than `terminationGracePeriodSeconds` of the Pod while moving data out of the terminating Pod and will not be able to complete unless you also adjust that value in the `podTemplate`. \ No newline at end of file From 00ca6ed007c070850aee9da797feae154aa49aa7 Mon Sep 17 00:00:00 2001 From: Peter Brachwitz Date: Tue, 21 Mar 2023 15:55:42 +0100 Subject: [PATCH 15/18] include retry messages in log --- pkg/controller/elasticsearch/nodespec/lifecycle_hook.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pkg/controller/elasticsearch/nodespec/lifecycle_hook.go b/pkg/controller/elasticsearch/nodespec/lifecycle_hook.go index cdbc1069f8..ef2f9f3036 100644 --- a/pkg/controller/elasticsearch/nodespec/lifecycle_hook.go +++ b/pkg/controller/elasticsearch/nodespec/lifecycle_hook.go @@ -96,10 +96,10 @@ function retry() { error_exit "too many DNS errors, giving up" fi if [ $count -lt "$retries" ]; then - printf "Retry %s/%s exited %s, retrying in %s seconds...\n" "$count" "$retries" "$exit" "$wait" >&2 + log "Retry $count/$retries exited $exit, retrying in $wait seconds" sleep $wait else - printf "Retry %s/%s exited %s, no more retries left.\n" "$count" "$retries" "$exit" >&2 + log "Retry $count/$retries exited $exit, no more retries left." return $exit fi done From 2cd09e6c41f5fd113d2baf1a99e6e51811d4f36a Mon Sep 17 00:00:00 2001 From: Peter Brachwitz Date: Tue, 21 Mar 2023 17:51:30 +0100 Subject: [PATCH 16/18] Do no automatically exit on error and do not hot loop retries while waiting --- .../elasticsearch/nodespec/lifecycle_hook.go | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/pkg/controller/elasticsearch/nodespec/lifecycle_hook.go b/pkg/controller/elasticsearch/nodespec/lifecycle_hook.go index ef2f9f3036..46ca475ec0 100644 --- a/pkg/controller/elasticsearch/nodespec/lifecycle_hook.go +++ b/pkg/controller/elasticsearch/nodespec/lifecycle_hook.go @@ -28,7 +28,7 @@ const PreStopHookScriptConfigKey = "pre-stop-hook-script.sh" var preStopHookScriptTemplate = template.Must(template.New("pre-stop").Parse(`#!/usr/bin/env bash -set -euo pipefail +set -uo pipefail # This script will wait for up to $PRE_STOP_ADDITIONAL_WAIT_SECONDS before allowing termination of the Pod # This slows down the process shutdown and allows to make changes to the pool gracefully, without blackholing traffic when DNS @@ -109,11 +109,11 @@ function retry() { function log() { local timestamp timestamp=$(date --iso-8601=seconds) - echo "{\"@timestamp\": \"${timestamp}\", \"message\": \"$@\", \"ecs.version\": \"1.2.0\", \"event.dataset\": \"elasticsearch.pre-stop-hook\"}" | tee /proc/1/fd/2 2> /dev/null + echo "{\"@timestamp\": \"${timestamp}\", \"message\": \"$*\", \"ecs.version\": \"1.2.0\", \"event.dataset\": \"elasticsearch.pre-stop-hook\"}" | tee /proc/1/fd/2 2> /dev/null } function error_exit() { - log "$1" + log "$@" exit 1 } @@ -165,7 +165,7 @@ ES_URL={{.ServiceURL}} log "retrieving node ID" retry 10 request -X GET "$ES_URL/_cat/nodes?full_id=true&h=id,name" $BASIC_AUTH -if [ "$?" -ne 0 ]; then +if [ "$?" -ne 0 ]; then error_exit "$status" fi @@ -186,17 +186,14 @@ retry 10 request -X PUT $ES_URL/_nodes/"$NODE_ID"/shutdown $BASIC_AUTH -H 'Conte } " if [ "$?" -ne 0 ]; then - error_exit "Failed to call node shutdown API" "$resp_body" + error_exit "Failed to call node shutdown API" "$status" fi while : do log "waiting for node shutdown to complete" request -X GET $ES_URL/_nodes/"$NODE_ID"/shutdown $BASIC_AUTH - if [ "$?" -ne 0 ]; then - continue - fi - if grep -q -v 'IN_PROGRESS\|STALLED' "$resp_body"; then + if [ "$?" -eq 0 ] && grep -q -v 'IN_PROGRESS\|STALLED' "$resp_body"; then break fi sleep 10 From bf17123fa69760b9ece17688428a2259aba7ef3a Mon Sep 17 00:00:00 2001 From: Peter Brachwitz Date: Tue, 21 Mar 2023 20:22:12 +0100 Subject: [PATCH 17/18] Always honor PRE_STOP_ADDITIONAL_WAIT_SECONDS, do not log $status we already do that --- pkg/controller/elasticsearch/nodespec/lifecycle_hook.go | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/pkg/controller/elasticsearch/nodespec/lifecycle_hook.go b/pkg/controller/elasticsearch/nodespec/lifecycle_hook.go index 46ca475ec0..bfa899fd58 100644 --- a/pkg/controller/elasticsearch/nodespec/lifecycle_hook.go +++ b/pkg/controller/elasticsearch/nodespec/lifecycle_hook.go @@ -114,7 +114,7 @@ function log() { function error_exit() { log "$@" - exit 1 + delayed_exit 1 } function delayed_exit() { @@ -123,7 +123,7 @@ function delayed_exit() { local remaining=$((PRE_STOP_ADDITIONAL_WAIT_SECONDS - elapsed)) log "delaying termination for $remaining seconds" sleep $remaining - exit 0 + exit ${1-0} } function supports_node_shutdown() { @@ -166,7 +166,7 @@ ES_URL={{.ServiceURL}} log "retrieving node ID" retry 10 request -X GET "$ES_URL/_cat/nodes?full_id=true&h=id,name" $BASIC_AUTH if [ "$?" -ne 0 ]; then - error_exit "$status" + error_exit "failed to retrieve node ID" fi NODE_ID=$(grep "$POD_NAME" "$resp_body" | cut -f 1 -d ' ') @@ -186,7 +186,7 @@ retry 10 request -X PUT $ES_URL/_nodes/"$NODE_ID"/shutdown $BASIC_AUTH -H 'Conte } " if [ "$?" -ne 0 ]; then - error_exit "Failed to call node shutdown API" "$status" + error_exit "failed to call node shutdown API" fi while : From 72569e8be0500709717cefc6fb46348e2838b593 Mon Sep 17 00:00:00 2001 From: Peter Brachwitz Date: Wed, 22 Mar 2023 09:56:19 +0100 Subject: [PATCH 18/18] Improve log message consistency --- pkg/controller/elasticsearch/nodespec/lifecycle_hook.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pkg/controller/elasticsearch/nodespec/lifecycle_hook.go b/pkg/controller/elasticsearch/nodespec/lifecycle_hook.go index bfa899fd58..cdace1fd6d 100644 --- a/pkg/controller/elasticsearch/nodespec/lifecycle_hook.go +++ b/pkg/controller/elasticsearch/nodespec/lifecycle_hook.go @@ -96,10 +96,10 @@ function retry() { error_exit "too many DNS errors, giving up" fi if [ $count -lt "$retries" ]; then - log "Retry $count/$retries exited $exit, retrying in $wait seconds" + log "retry $count/$retries exited $exit, retrying in $wait seconds" sleep $wait else - log "Retry $count/$retries exited $exit, no more retries left." + log "retry $count/$retries exited $exit, no more retries left" return $exit fi done