From 15abdd7816d36314630640235d0a0d7de78fe23b Mon Sep 17 00:00:00 2001 From: Dave McCormick Date: Sun, 23 Dec 2018 13:04:41 +0000 Subject: [PATCH] Improve etcd scalability. Bump etcd to 3.3.10, enable auto-compact every 1 hour, add 'compact' and 'defrag' operations to 'etcdadm' utility. (#1517) --- builtin/files/etcdadm/README.md | 5 ++++- builtin/files/etcdadm/etcdadm | 16 +++++++++++++++- builtin/files/userdata/cloud-config-etcd | 4 ++++ pkg/api/etcd.go | 2 +- pkg/api/etcd_test.go | 4 ++-- 5 files changed, 26 insertions(+), 5 deletions(-) diff --git a/builtin/files/etcdadm/README.md b/builtin/files/etcdadm/README.md index b38231b10..10537d6a3 100644 --- a/builtin/files/etcdadm/README.md +++ b/builtin/files/etcdadm/README.md @@ -14,7 +14,7 @@ AWS_SECRET_ACCESS_KEY=... \ ETCDADM_AWSCLI_DOCKER_IMAGE=quay.io/coreos/awscli \ # Required settings AWS_DEFAULT_REGION=ap-northeast-1 \ -ETCD_VERSION=3.2.13 \ +ETCD_VERSION=3.3.10 \ ETCD_DATA_DIR=/var/lib/etcd \ ETCD_INITIAL_CLUSTER=etcd0=http://127.0.0.1:3080,etcd1=http://127.0.0.1:3180,etcd2=http://127.0.0.1:3280 \ ETCDCTL_ENDPOINTS=http://127.0.0.1:3079,etcd1=http://127.0.0.1:3179,etcd2=http://127.0.0.1:3279, \ @@ -39,6 +39,9 @@ save it in S3 * `etcdadm replace` is used to manually recover from an etcd member from a permanent failure. It resets the etcd member running on the same node as etcdadm by: 1. clearing the contents of the etcd data dir 2. removing and then re-adding the etcd member by running `etcdctl member remove` and then `etcdctl memer add` +* `etcdadm compact` performs a compaction of the etcd cluster (i.e. removes all version history of the keys leaving the last one) - warning, the operation can adversely affect etcd cluster performance whilst it is running. +* `etcdadm defrag` performed a de-fragmentation operation on the current etcd servers datastore (does not perform this cluster-wide) - - warning, the operation can adversely affect etcd cluster performance whilst it is running. + ## Pre-requisites diff --git a/builtin/files/etcdadm/etcdadm b/builtin/files/etcdadm/etcdadm index 97d201e72..5a2338d8c 100755 --- a/builtin/files/etcdadm/etcdadm +++ b/builtin/files/etcdadm/etcdadm @@ -85,7 +85,7 @@ config_etcd_endpoints() { echo "${ETCD_ENDPOINTS}" } -etcd_version=${ETCD_VERSION:-3.2.13} +etcd_version=${ETCD_VERSION:-3.3.10} etcd_aci_url="https://github.com/coreos/etcd/releases/download/v$etcd_version/etcd-v$etcd_version-linux-amd64.aci" member_count="${ETCDADM_MEMBER_COUNT:?missing required env}" @@ -223,6 +223,14 @@ cluster_check() { fi } +cluster_compact() { + member_etcdctl compact 1 +} + +member_defrag() { + member_etcdctl defrag --command-timeout=60s --debug +} + member_next_index() { echo $(( ($(config_member_index) + 1) % member_count )) } @@ -915,6 +923,12 @@ etcdadm_main() { "check" ) cluster_check ;; + "compact" ) + cluster_compact + ;; + "defrag" ) + member_defrag + ;; * ) if [ "$(type -t "$cmd")" == "function" ]; then "$cmd" "${@:2}" diff --git a/builtin/files/userdata/cloud-config-etcd b/builtin/files/userdata/cloud-config-etcd index b1e0ca131..f517615de 100644 --- a/builtin/files/userdata/cloud-config-etcd +++ b/builtin/files/userdata/cloud-config-etcd @@ -409,6 +409,10 @@ coreos: content: | [Service] Environment="ETCD_IMAGE_TAG=v{{.Etcd.Version}}" + - name: 40-auto-compaction.conf + content: | + [Service] + Environment="ETCD_AUTO_COMPACTION_RETENTION=1" {{end}} enable: true command: start diff --git a/pkg/api/etcd.go b/pkg/api/etcd.go index f671be0c7..e003582d9 100644 --- a/pkg/api/etcd.go +++ b/pkg/api/etcd.go @@ -205,7 +205,7 @@ func (e Etcd) Version() EtcdVersion { if e.Cluster.Version != "" { return e.Cluster.Version } - return "3.2.13" + return "3.3.10" } func (v EtcdVersion) Is3() bool { diff --git a/pkg/api/etcd_test.go b/pkg/api/etcd_test.go index 0642f67ef..e96359d0a 100644 --- a/pkg/api/etcd_test.go +++ b/pkg/api/etcd_test.go @@ -36,8 +36,8 @@ func TestEtcd(t *testing.T) { t.Errorf("name tag key incorrect, expected: kube-aws:etcd:name, got: %s", etcdTest.NameTagKey()) } - if etcdTest.Version() != "3.2.13" { - t.Errorf("etcd version incorrect, epxected: 3.2.13, got: %s", etcdTest.Version()) + if etcdTest.Version() != "3.3.10" { + t.Errorf("etcd version incorrect, epxected: 3.3.10, got: %s", etcdTest.Version()) } if !etcdTest.NodeShouldHaveEIP() {