-
Notifications
You must be signed in to change notification settings - Fork 24
/
Copy pathtest_etcd.py
332 lines (279 loc) · 11.6 KB
/
test_etcd.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
import asyncio
import logging
import pytest
import re
from pathlib import Path
from ..utils import juju_run, juju_run_action
ws_logger = logging.getLogger("websockets.protocol")
ws_logger.setLevel(logging.INFO)
async def test_etcd_actions(model, tools):
"""Test etcd charm actions"""
async def assert_action(unit, action, output_regex=None, **action_params):
action = await juju_run_action(unit, action, **action_params)
if output_regex:
output = action.results["output"]
assert re.search(output_regex, output)
etcd = model.applications["etcd"].units[0]
# set db size limit to 16MB so we can fill it quickly
await juju_run(
etcd,
"sed -i 's/^quota-backend-bytes:.*$/quota-backend-bytes: 16777216/' /var/snap/etcd/common/etcd.conf.yml",
)
await juju_run(etcd, "sudo systemctl restart snap.etcd.etcd.service")
# fill the db to cause an alarm
await juju_run(
etcd,
"while [ 1 ]; do dd if=/dev/urandom bs=1024 count=1024 | ETCDCTL_API=3 /snap/bin/etcd.etcdctl --endpoints :4001 put key || break; done",
)
# confirm alarm is raised
await assert_action(etcd, "alarm-list", r"alarm:NOSPACE")
# compact and defrag db, then disarm alarm
await assert_action(etcd, "compact", r"compacted revision", physical=True)
await assert_action(etcd, "defrag", r"Finished defragmenting")
await assert_action(etcd, "alarm-disarm")
# confirm alarm is gone
await assert_action(etcd, "alarm-list", r"^$")
# reset db size to unlimited (default)
await juju_run(
etcd,
"sed -i 's/^quota-backend-bytes:.*$/quota-backend-bytes: 0/' /var/snap/etcd/common/etcd.conf.yml",
)
await juju_run(etcd, "sudo systemctl restart snap.etcd.etcd.service")
async def test_etcd_scaling(model, tools):
"""Scale etcd up and down and ensure the cluster state remains healthy."""
e = asyncio.Event()
async def on_unit_removed(delta, old_obj, new_obj, model):
e.set()
etcd = model.applications["etcd"]
# Scale down
for unit in etcd.units:
is_leader = await unit.is_leader_from_status()
if is_leader:
unit.on_remove(on_unit_removed)
await etcd.destroy_unit(unit.name)
await e.wait()
break
await tools.juju_wait()
await test_cluster_health(model, tools)
# Scale up
await etcd.add_units(count=1)
await tools.juju_wait()
await test_cluster_health(model, tools)
@pytest.mark.skip("Need to manually verify result tarball")
async def test_snapshot_restore(model, tools):
"""
Trigger snapshot and restore actions
"""
from sh import juju, ls
etcd = model.applications["etcd"]
for unit in etcd.units:
leader = await unit.is_leader_from_status()
if leader:
# Load dummy data
await load_data(unit)
for ver in ["v3"]:
assert await is_data_present(unit, ver)
filenames = {}
for dataset in ["v3"]:
# Take snapshot of data
action = await juju_run_action(
unit, "snapshot", **{"keys-version": dataset}
)
src = Path(action.results["snapshot"]["path"])
dst = Path(action.results["snapshot"]["path"]).name
await unit.scp_from(
str(src),
str(dst),
tools.controller_name,
tools.connection,
proxy=tools.juju_ssh_proxy,
)
filenames[dataset] = str(dst)
out = ls("-l", "result*")
print(out.stdout.decode().strip())
await delete_data(unit)
for ver in ["v3"]:
assert await is_data_present(unit, ver) is False
# Restore v2 data
# Note: libjuju does not implement attach yet.
juju(
"attach",
"-m",
"{}:{}".format(tools.controller_name, model.info.name),
"etcd",
"snapshot='./{}'".format(str(filenames["v2"])),
)
action = await juju_run_action(unit, "restore")
for ver in ["v3"]:
assert await is_data_present(unit, ver) is True
# Restore v3 data
juju(
"attach",
"-m",
"{}:{}".format(tools.controller_name, model.info.name),
"etcd",
"snapshot='./{}'".format(str(filenames["v3"])),
)
action = await juju_run_action(unit, "restore")
for ver in ["v3"]:
assert await is_data_present(unit, ver) is True
async def test_leader_status(model, tools):
"""Verify our leader is running the etcd daemon"""
etcd = model.applications["etcd"]
for unit in etcd.units:
is_leader = await unit.is_leader_from_status()
if is_leader:
status = await juju_run(
unit, "systemctl is-active snap.etcd.etcd", check=False
)
assert "inactive" not in status.stdout.strip()
assert "active" in status.stdout.strip()
async def test_config_snapd_refresh(model, tools):
"""Verify initial snap refresh config is set and can be changed"""
etcd = model.applications["etcd"]
for unit in etcd.units:
is_leader = await unit.is_leader_from_status()
if is_leader:
# default timer should be some day of the week followed by a
# number
timer = await juju_run(unit, "snap get core refresh.timer")
assert len(timer.stdout.strip()) == len("dayX")
# verify a new timer value
await etcd.set_config({"snapd_refresh": "fri5"})
timer = await juju_run(unit, "snap get core refresh.timer")
assert timer.stdout.strip() == "fri5"
async def test_cluster_health(model, tools):
"""Iterate all the units and verify we have a clean bill of health
from etcd"""
certs = (
"ETCDCTL_KEY_FILE=/var/snap/etcd/common/client.key "
"ETCDCTL_CERT_FILE=/var/snap/etcd/common/client.crt "
"ETCDCTL_CA_FILE=/var/snap/etcd/common/ca.crt"
)
etcd = model.applications["etcd"]
for unit in etcd.units:
out = await juju_run(unit, "systemctl is-active snap.etcd.etcd")
assert "inactive" not in out.stdout.strip()
assert "active" in out.stdout.strip()
cmd = "{} /snap/bin/etcdctl cluster-health".format(certs)
health = await juju_run(unit, cmd)
assert "unhealthy" not in health.stdout.strip()
assert "unavailable" not in health.stdout.strip()
async def test_leader_knows_all_members(model, tools):
"""Test we have the same number of units deployed and reporting in
the etcd cluster as participating"""
# The spacing here is semi-important as its a string of ENV exports
# also, this is hard coding for the defaults. if the defaults in
# layer.yaml change, this will need to change.
certs = (
"ETCDCTL_KEY_FILE=/var/snap/etcd/common/client.key "
"ETCDCTL_CERT_FILE=/var/snap/etcd/common/client.crt "
"ETCDCTL_CA_FILE=/var/snap/etcd/common/ca.crt"
)
# format the command, and execute on the leader
cmd = "{} ETCDCTL_API=2 /snap/bin/etcd.etcdctl member list".format(certs)
etcd = model.applications["etcd"]
for unit in etcd.units:
is_leader = await unit.is_leader_from_status()
if is_leader:
out = await juju_run(unit, cmd)
# turn the output into a list so we can iterate
members = out.stdout.strip()
members = members.split("\n")
for item in members:
# this is responded when TLS is enabled and we don't have
# proper Keys. This is kind of a "ssl works test" but of
# the worse variety... assuming the full stack completed.
assert "etcd cluster is unavailable" not in members
assert len(members) == len(etcd.units)
# TODO: Can we remove these?
# async def validate_etcd_fixture_data(etcd):
# """ Recall data set by set_etcd_fixture_data to ensure it persisted
# through the upgrade """
# # The spacing here is semi-important as its a string of ENV exports
# # also, this is hard coding for the defaults. if the defaults in
# # layer.yaml change, this will need to change.
# certs = (
# "ETCDCTL_KEY_FILE=/var/snap/etcd/common/client.key "
# "ETCDCTL_CERT_FILE=/var/snap/etcd/common/client.crt "
# "ETCDCTL_CA_FILE=/var/snap/etcd/common/ca.crt"
# )
# etcd = model.applications["etcd"]
# for unit in etcd.units:
# is_leader = await unit.is_leader_from_status()
# if is_leader:
# await juju_run(unit, "{} /snap/bin/etcd.etcdctl set juju rocks".format(certs))
# await juju_run(unit,
# "{} /snap/bin/etcd.etcdctl set nested/data works".format(certs)
# )
# juju_key = await juju_run(unit,
# "{} /snap/bin/etcd.etcdctl get juju rocks".format(certs)
# )
# nested_key = await juju_run(unit,
# "{} /snap/bin/etcd.etcdctl get nested/data works".format(certs)
# )
# assert "rocks" in juju_key.stdout.strip()
# assert "works" in nested_key.stdout.strip()
# async def validate_running_snap_daemon(etcd):
# """ Validate the snap based etcd daemon is running after an op """
# etcd = model.applications["etcd"]
# for unit in etcd.units:
# is_leader = await unit.is_leader_from_status()
# if is_leader:
# daemon_status = await juju_run(unit, "systemctl is-active snap.etcd.etcd")
# assert "active" in daemon_status.stdout.strip()
async def load_data(leader):
"""
Load dummy data
"""
certs = (
"ETCDCTL_KEY_FILE=/var/snap/etcd/common/client.key "
"ETCDCTL_CERT_FILE=/var/snap/etcd/common/client.crt "
"ETCDCTL_CA_FILE=/var/snap/etcd/common/ca.crt"
)
cmd = "{} ETCDCTL_API=2 /snap/bin/etcd.etcdctl set /etcd2key etcd2value".format(
certs
)
await juju_run(leader, cmd)
cmd = (
"{} ETCDCTL_API=3 /snap/bin/etcd.etcdctl --endpoints=http://localhost:4001 "
"put etcd3key etcd3value".format(certs)
)
await juju_run(leader, cmd)
async def is_data_present(leader, version):
"""
Check if we have the data present on the datastore of the version
Args:
version: v2 or v3 etcd datastore
Returns: True if the data is present
"""
certs = (
"ETCDCTL_KEY_FILE=/var/snap/etcd/common/client.key "
"ETCDCTL_CERT_FILE=/var/snap/etcd/common/client.crt "
"ETCDCTL_CA_FILE=/var/snap/etcd/common/ca.crt"
)
if version == "v3":
cmd = (
"{} ETCDCTL_API=3 /snap/bin/etcd.etcdctl --endpoints=http://localhost:4001 "
'get "" --prefix --keys-only'.format(certs)
)
data = await juju_run(leader, cmd)
return "etcd3key" in data.stdout.strip()
else:
return False
async def delete_data(leader):
"""
Delete all dummy data on etcd
"""
certs = (
"ETCDCTL_KEY_FILE=/var/snap/etcd/common/client.key "
"ETCDCTL_CERT_FILE=/var/snap/etcd/common/client.crt "
"ETCDCTL_CA_FILE=/var/snap/etcd/common/ca.crt"
)
cmd = "{} ETCDCTL_API=2 /snap/bin/etcd.etcdctl rm /etcd2key".format(certs)
await juju_run(leader, cmd)
cmd = (
"{} ETCDCTL_API=3 /snap/bin/etcdctl --endpoints=http://localhost:4001 "
"del etcd3key".format(certs)
)
await juju_run(leader, cmd)