diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 8516d4ec0a47..7e7b71e77822 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -253,7 +253,7 @@ jobs: - uses: actions/cache@v2 with: path: ${{ runner.tool_cache }}/qemu - key: qemu-${{ matrix.target }}-${{ env.QEMU_BUILD_VERSION }} + key: qemu-${{ matrix.target }}-${{ env.QEMU_BUILD_VERSION }}-patchmadvise2 if: matrix.target != '' && matrix.os == 'ubuntu-latest' - name: Install cross-compilation tools run: | @@ -286,6 +286,7 @@ jobs: # quickly. curl https://download.qemu.org/qemu-$QEMU_BUILD_VERSION.tar.xz | tar xJf - cd qemu-$QEMU_BUILD_VERSION + patch -p1 < $GITHUB_WORKSPACE/ci/qemu-madvise.patch ./configure --target-list=${{ matrix.qemu_target }} --prefix=${{ runner.tool_cache}}/qemu --disable-tools --disable-slirp --disable-fdt --disable-capstone --disable-docs ninja -C build install touch ${{ runner.tool_cache }}/qemu/built diff --git a/ci/qemu-madvise.patch b/ci/qemu-madvise.patch new file mode 100644 index 000000000000..d2eb9fcd33e1 --- /dev/null +++ b/ci/qemu-madvise.patch @@ -0,0 +1,61 @@ +From 1ec3de1634195a4d4410cc33fdc66c68057e16a3 Mon Sep 17 00:00:00 2001 +From: Chris Fallin +Date: Sat, 5 Feb 2022 22:45:58 -0800 +Subject: [PATCH] Emulate Linux madvise() properly when possible. + +Curently madvise() is not emulated for Linux targets because it is not +trivial to emulate when the guest and host page sizes differ -- in this +case, mmap()s are not passed straight through, so the semantics of +various MADV_* flags are not trivial to replicate. + +However, if the guest and host are both Linux, and the page sizes are +the same on both ends (which is often the case, e.g. 4KiB for x86-64, +aarch64, s390x, and possibly others), then the mmap()s are in fact +passed straight through. Furthermore, the MADV_* flags are defined in +target-independent headers, so we can pass the base, length, and +`advice` arugments to `madvise()` straight through. + +This patch alters the Linux-userspace syscall emulation to do just that, +passing through the `madvise()` calls when possible and returning +`EINVAL` otherwise so the guest is properly informed that the desired +semantics (e.g., MADV_DONTNEED to clear memory) are not available. +--- + linux-user/syscall.c | 22 ++++++++++++++++------ + 1 file changed, 16 insertions(+), 6 deletions(-) + +diff --git a/linux-user/syscall.c b/linux-user/syscall.c +index 5950222a77..836e39df5f 100644 +--- a/linux-user/syscall.c ++++ b/linux-user/syscall.c +@@ -11853,12 +11853,22 @@ static abi_long do_syscall1(void *cpu_env, int num, abi_long arg1, + + #ifdef TARGET_NR_madvise + case TARGET_NR_madvise: +- /* A straight passthrough may not be safe because qemu sometimes +- turns private file-backed mappings into anonymous mappings. +- This will break MADV_DONTNEED. +- This is a hint, so ignoring and returning success is ok. */ +- return 0; +-#endif ++#ifdef __linux__ ++ /* If the host is Linux, and the guest and host page sizes are the ++ * same, then mmaps will have been passed through one-to-one, so we can ++ * rely on the madvise semantics of the host. Note that the advice ++ * argument (arg3) is fully architecture-independent. */ ++ if (TARGET_PAGE_SIZE == sysconf(_SC_PAGESIZE)) { ++ return get_errno(madvise(g2h_untagged(arg1), (size_t)arg2, (int)arg3)); ++ } else { ++ return -TARGET_EINVAL; ++ } ++#else // __linux__ ++ /* We will not be able to emulate the Linux-specific semantics, so we ++ * raise an error. */ ++ return -TARGET_EINVAL; ++#endif // !__linux__ ++#endif // TARGET_NR_madvise + #ifdef TARGET_NR_fcntl64 + case TARGET_NR_fcntl64: + { +-- +2.34.1 + diff --git a/crates/runtime/src/instance/allocator/pooling.rs b/crates/runtime/src/instance/allocator/pooling.rs index 9990e797d819..bab7a47f3f9f 100644 --- a/crates/runtime/src/instance/allocator/pooling.rs +++ b/crates/runtime/src/instance/allocator/pooling.rs @@ -1748,11 +1748,6 @@ mod test { #[cfg(all(unix, target_pointer_width = "64", feature = "async"))] #[test] fn test_stack_zeroed() -> Result<()> { - // https://github.com/bytecodealliance/wasmtime/pull/2518#issuecomment-747280133 - if std::env::var("WASMTIME_TEST_NO_HOG_MEMORY").is_ok() { - return Ok(()); - } - let allocator = PoolingInstanceAllocator::new( PoolingAllocationStrategy::NextAvailable, ModuleLimits { diff --git a/crates/runtime/src/memfd.rs b/crates/runtime/src/memfd.rs index cf0da3e0dff1..53ba2490c118 100644 --- a/crates/runtime/src/memfd.rs +++ b/crates/runtime/src/memfd.rs @@ -554,10 +554,6 @@ mod test { #[test] fn instantiate_no_image() { - if skip_tests_due_to_qemu_madvise_semantics() { - return; - } - // 4 MiB mmap'd area, not accessible let mut mmap = Mmap::accessible_reserved(0, 4 << 20).unwrap(); // Create a MemFdSlot on top of it @@ -590,10 +586,6 @@ mod test { #[test] fn instantiate_image() { - if skip_tests_due_to_qemu_madvise_semantics() { - return; - } - // 4 MiB mmap'd area, not accessible let mut mmap = Mmap::accessible_reserved(0, 4 << 20).unwrap(); // Create a MemFdSlot on top of it @@ -637,19 +629,4 @@ mod test { let slice = mmap.as_slice(); assert_eq!(&[1, 2, 3, 4], &slice[4096..4100]); } - - /// qemu's madvise implementation does not implement the - /// "flash-reset back to zero or CoW backing" semantics that Linux - /// does. Our CI setup uses qemu (in usermode-binary mode, not - /// whole-system mode) to run tests on aarch64 and s390x. We want - /// to skip these tests when under qemu, but not when someone is - /// developing natively on one of these architectures. So instead, - /// we dynamically detect an environment variable that our CI - /// setup sets. - /// - /// See `skip_pooling_allocator_tests()` in `tests/all/main.rs` - /// for more. - fn skip_tests_due_to_qemu_madvise_semantics() -> bool { - std::env::var("WASMTIME_TEST_NO_HOG_MEMORY").is_ok() - } }