forked from firecracker-microvm/firecracker
-
Notifications
You must be signed in to change notification settings - Fork 1
/
test_huge_pages.py
291 lines (232 loc) · 9.72 KB
/
test_huge_pages.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
# Copyright 2024 Amazon.com, Inc. or its affiliates. All Rights Reserved.
# SPDX-License-Identifier: Apache-2.0
"""Integration tests for Firecracker's huge pages support"""
import signal
import time
import pytest
from framework import utils
from framework.microvm import HugePagesConfig
from framework.properties import global_props
from framework.utils_ftrace import ftrace_events
from integration_tests.functional.test_uffd import SOCKET_PATH, spawn_pf_handler
def check_hugetlbfs_in_use(pid: int, allocation_name: str):
"""Asserts that the process with the given `pid` is using hugetlbfs pages somewhere.
`allocation_name` should be the name of the smaps entry for which we want to verify that huge pages are used.
For memfd-backed guest memory, this would be "memfd:guest_mem" (the `guest_mem` part originating from the name
we give the memfd in memory.rs), for anonymous memory this would be "/anon_hugepage".
Note: in our testing, we do not currently configure vhost-user-blk devices, so we only exercise
the "/anon_hugepage" case.
"""
# Format of a sample smaps entry:
# 7fc2bc400000-7fc2cc400000 rw-s 00000000 00:10 25488401 /anon_hugepage
# Size: 262144 kB
# KernelPageSize: 2048 kB
# MMUPageSize: 2048 kB
# Rss: 0 kB
# Pss: 0 kB
# Pss_Dirty: 0 kB
# Shared_Clean: 0 kB
# Shared_Dirty: 0 kB
# Private_Clean: 0 kB
# Private_Dirty: 0 kB
# Referenced: 0 kB
# Anonymous: 0 kB
# LazyFree: 0 kB
# AnonHugePages: 0 kB
# ShmemPmdMapped: 0 kB
# FilePmdMapped: 0 kB
# Shared_Hugetlb: 0 kB
# Private_Hugetlb: 92160 kB
# Swap: 0 kB
# SwapPss: 0 kB
# Locked: 0 kB
# THPeligible: 0
# ProtectionKey: 0
cmd = f"cat /proc/{pid}/smaps | grep {allocation_name} -A 23 | grep KernelPageSize"
_, stdout, _ = utils.check_output(cmd)
kernel_page_size_kib = int(stdout.split()[1])
assert kernel_page_size_kib > 4
def test_hugetlbfs_boot(uvm_plain):
"""Tests booting a microvm with guest memory backed by 2MB hugetlbfs pages"""
uvm_plain.spawn()
uvm_plain.basic_config(huge_pages=HugePagesConfig.HUGETLBFS_2MB, mem_size_mib=128)
uvm_plain.add_net_iface()
uvm_plain.start()
check_hugetlbfs_in_use(
uvm_plain.firecracker_pid,
"/anon_hugepage",
)
def test_hugetlbfs_snapshot(
microvm_factory, guest_kernel_linux_5_10, rootfs, uffd_handler_paths
):
"""
Test hugetlbfs snapshot restore via uffd
"""
### Create Snapshot ###
vm = microvm_factory.build(guest_kernel_linux_5_10, rootfs)
vm.memory_monitor = None
vm.spawn()
vm.basic_config(huge_pages=HugePagesConfig.HUGETLBFS_2MB, mem_size_mib=128)
vm.add_net_iface()
vm.start()
check_hugetlbfs_in_use(vm.firecracker_pid, "/anon_hugepage")
snapshot = vm.snapshot_full()
vm.kill()
### Restore Snapshot ###
vm = microvm_factory.build()
vm.spawn()
# Spawn page fault handler process.
_pf_handler = spawn_pf_handler(
vm, uffd_handler_paths["valid_handler"], snapshot.mem
)
vm.restore_from_snapshot(snapshot, resume=True, uffd_path=SOCKET_PATH)
check_hugetlbfs_in_use(vm.firecracker_pid, "/anon_hugepage")
def test_hugetlbfs_diff_snapshot(microvm_factory, uvm_plain, uffd_handler_paths):
"""
Test hugetlbfs differential snapshot support.
Despite guest memory being backed by huge pages, differential snapshots still work at 4K granularity.
"""
### Create Snapshot ###
uvm_plain.memory_monitor = None
uvm_plain.spawn()
uvm_plain.basic_config(
huge_pages=HugePagesConfig.HUGETLBFS_2MB,
mem_size_mib=128,
track_dirty_pages=True,
)
uvm_plain.add_net_iface()
uvm_plain.start()
# Wait for microvm to boot
base_snapshot = uvm_plain.snapshot_diff()
uvm_plain.resume()
# Run command to dirty some pages
uvm_plain.ssh.check_output("sync")
snapshot_diff = uvm_plain.snapshot_diff()
snapshot_merged = snapshot_diff.rebase_snapshot(base_snapshot)
uvm_plain.kill()
vm = microvm_factory.build()
vm.spawn()
# Spawn page fault handler process.
_pf_handler = spawn_pf_handler(
vm, uffd_handler_paths["valid_handler"], snapshot_merged.mem
)
vm.restore_from_snapshot(snapshot_merged, resume=True, uffd_path=SOCKET_PATH)
# Verify if the restored microvm works.
@pytest.mark.parametrize("huge_pages", HugePagesConfig)
def test_ept_violation_count(
microvm_factory,
guest_kernel_linux_5_10,
rootfs,
uffd_handler_paths,
metrics,
huge_pages,
):
"""
Tests hugetlbfs snapshot restore with a UFFD handler that pre-faults the entire guest memory
on the first page fault. Records metrics about the number of EPT_VIOLATIONS encountered by KVM.
"""
### Create Snapshot ###
vm = microvm_factory.build(guest_kernel_linux_5_10, rootfs)
vm.memory_monitor = None
vm.spawn()
vm.basic_config(huge_pages=huge_pages, mem_size_mib=256)
vm.add_net_iface()
vm.start()
metrics.set_dimensions(
{
"performance_test": "test_hugetlbfs_snapshot",
"huge_pages_config": str(huge_pages),
**vm.dimensions,
}
)
# Wait for microvm to boot. Then spawn fast_page_fault_helper to setup an environment where we can trigger
# a lot of fast_page_faults after restoring the snapshot.
vm.ssh.check_output(
"nohup /usr/local/bin/fast_page_fault_helper >/dev/null 2>&1 </dev/null &"
)
_, pid, _ = vm.ssh.check_output("pidof fast_page_fault_helper")
# Give the helper time to initialize
time.sleep(5)
snapshot = vm.snapshot_full()
vm.kill()
### Restore Snapshot ###
vm = microvm_factory.build()
vm.jailer.daemonize = False
vm.jailer.extra_args.update({"no-seccomp": None})
vm.spawn()
# Spawn page fault handler process.
_pf_handler = spawn_pf_handler(
vm, uffd_handler_paths["fault_all_handler"], snapshot.mem
)
with ftrace_events("kvm:*"):
vm.restore_from_snapshot(snapshot, resume=True, uffd_path=SOCKET_PATH)
# Verify if guest can run commands, and also wake up the fast page fault helper to trigger page faults.
vm.ssh.check_output(f"kill -s {signal.SIGUSR1} {pid}")
# Give the helper time to touch all its pages
time.sleep(5)
if global_props.cpu_architecture == "x86_64":
trace_entry = "reason EPT_VIOLATION"
metric = "ept_violations"
else:
# On ARM, KVM does not differentiate why it got a guest page fault.
# However, even in this slightly more general metric, we see a significant
# difference between 4K and 2M pages.
trace_entry = "kvm_guest_fault"
metric = "guest_page_faults"
_, metric_value, _ = utils.check_output(
f"cat /sys/kernel/tracing/trace | grep '{trace_entry}' | wc -l"
)
metrics.put_metric(metric, int(metric_value), "Count")
def test_negative_huge_pages_plus_balloon(uvm_plain):
"""Tests that huge pages and memory ballooning cannot be used together"""
uvm_plain.memory_monitor = None
uvm_plain.spawn()
# Ensure setting huge pages and then adding a balloon device doesn't work
uvm_plain.basic_config(huge_pages=HugePagesConfig.HUGETLBFS_2MB)
with pytest.raises(
RuntimeError,
match="Firecracker's huge pages support is incompatible with memory ballooning.",
):
uvm_plain.api.balloon.put(amount_mib=0, deflate_on_oom=False)
# Ensure adding a balloon device and then setting huge pages doesn't work
uvm_plain.basic_config(huge_pages=HugePagesConfig.NONE)
uvm_plain.api.balloon.put(amount_mib=0, deflate_on_oom=False)
with pytest.raises(
RuntimeError,
match="Machine config error: Firecracker's huge pages support is incompatible with memory ballooning.",
):
uvm_plain.basic_config(huge_pages=HugePagesConfig.HUGETLBFS_2MB)
def test_negative_huge_pages_plus_initrd(uvm_with_initrd):
"""Tests that huge pages and initrd cannot be used together"""
uvm_with_initrd.jailer.daemonize = False
uvm_with_initrd.spawn()
uvm_with_initrd.memory_monitor = None
# Ensure setting huge pages and then telling FC to boot an initrd does not work
with pytest.raises(
RuntimeError,
match="Boot source error: Firecracker's huge pages support is incompatible with initrds.",
):
# `basic_config` first does a PUT to /machine-config, which will apply the huge pages configuration,
# and then a PUT to /boot-source, which will register the initrd
uvm_with_initrd.basic_config(
boot_args="console=ttyS0 reboot=k panic=1 pci=off",
use_initrd=True,
huge_pages=HugePagesConfig.HUGETLBFS_2MB,
add_root_device=False,
vcpu_count=1,
)
# Ensure telling FC about the initrd first and then setting huge pages doesn't work
# This first does a PUT to /machine-config to reset the huge pages configuration, before doing a
# PUT to /boot-source to register the initrd
uvm_with_initrd.basic_config(
huge_pages=HugePagesConfig.NONE,
boot_args="console=ttyS0 reboot=k panic=1 pci=off",
use_initrd=True,
)
with pytest.raises(
RuntimeError,
match="Machine config error: Firecracker's huge pages support is incompatible with initrds.",
):
uvm_with_initrd.api.machine_config.patch(
huge_pages=HugePagesConfig.HUGETLBFS_2MB
)