From 2de3845b25ad0d25e258405df8b89dc447accdb0 Mon Sep 17 00:00:00 2001 From: Justine Tunney Date: Tue, 24 Dec 2024 11:34:42 -0800 Subject: [PATCH] Build tool for hunting down flakes --- libc/calls/shm_path_np.c | 3 +- libc/intrin/pthread_mutex_lock.c | 1 - libc/thread/thread.h | 3 - test/libc/calls/cachestat_test.c | 7 +- test/libc/calls/raise_test.c | 3 +- test/libc/calls/shm_open_test.c | 14 ++- test/posix/mutex_async_signal_safety_test.c | 114 ------------------ tool/build/BUILD.mk | 4 +- tool/build/{dlopen_test.c => dlopen_tester.c} | 0 tool/scripts/flakes | 60 +++++++++ 10 files changed, 78 insertions(+), 131 deletions(-) delete mode 100644 test/posix/mutex_async_signal_safety_test.c rename tool/build/{dlopen_test.c => dlopen_tester.c} (100%) create mode 100755 tool/scripts/flakes diff --git a/libc/calls/shm_path_np.c b/libc/calls/shm_path_np.c index 42df957c419..dc5813b8a9f 100644 --- a/libc/calls/shm_path_np.c +++ b/libc/calls/shm_path_np.c @@ -35,9 +35,8 @@ void shm_path_np(const char *name, char buf[hasatleast 78]) { const char *a; uint8_t digest[BLAKE2B256_DIGEST_LENGTH]; a = "/tmp/", n = 5; - if (IsLinux() && isdirectory("/dev/shm")) { + if (IsLinux() && isdirectory("/dev/shm")) a = "/dev/shm/", n = 9; - } BLAKE2B256(name, strlen(name), digest); p = mempcpy(buf, a, n); p = hexpcpy(p, digest, BLAKE2B256_DIGEST_LENGTH); diff --git a/libc/intrin/pthread_mutex_lock.c b/libc/intrin/pthread_mutex_lock.c index e3dc8eca7e1..af9f1836aeb 100644 --- a/libc/intrin/pthread_mutex_lock.c +++ b/libc/intrin/pthread_mutex_lock.c @@ -242,7 +242,6 @@ static errno_t pthread_mutex_lock_impl(pthread_mutex_t *mutex, * * - `PTHREAD_RECURSIVE_MUTEX_INITIALIZER_NP` * - `PTHREAD_ERRORCHECK_MUTEX_INITIALIZER_NP` - * - `PTHREAD_SIGNAL_SAFE_MUTEX_INITIALIZER_NP` * - `PTHREAD_NORMAL_MUTEX_INITIALIZER_NP` * * Locking a mutex that's already locked by the calling thread will make diff --git a/libc/thread/thread.h b/libc/thread/thread.h index f45d880951f..533f15bc30e 100644 --- a/libc/thread/thread.h +++ b/libc/thread/thread.h @@ -52,9 +52,6 @@ COSMOPOLITAN_C_START_ #define PTHREAD_RECURSIVE_MUTEX_INITIALIZER_NP {0, PTHREAD_MUTEX_RECURSIVE} #define PTHREAD_ERRORCHECK_MUTEX_INITIALIZER_NP {0, PTHREAD_MUTEX_ERRORCHECK} -#define PTHREAD_SIGNAL_SAFE_MUTEX_INITIALIZER_NP \ - {0, PTHREAD_MUTEX_RECURSIVE | PTHREAD_PROCESS_SHARED} - #ifndef __cplusplus #define _PTHREAD_ATOMIC(x) _Atomic(x) #else diff --git a/test/libc/calls/cachestat_test.c b/test/libc/calls/cachestat_test.c index b756d852d48..92805dfee6f 100644 --- a/test/libc/calls/cachestat_test.c +++ b/test/libc/calls/cachestat_test.c @@ -29,6 +29,7 @@ #include "libc/runtime/runtime.h" #include "libc/runtime/sysconf.h" #include "libc/stdio/rand.h" +#include "libc/stdio/stdio.h" #include "libc/sysv/consts/auxv.h" #include "libc/sysv/consts/o.h" #include "libc/testlib/testlib.h" @@ -104,12 +105,14 @@ TEST(cachestat, testCachestatSyncNoDirty) { } TEST(cachestat, testCachestatShmem) { + char name[64]; + sprintf(name, "/cachestat_test-%ld", _rand64()); size_t filesize = 512 * 2 * pagesize; // 2 2MB huge pages. size_t compute_len = 512 * pagesize; unsigned long num_pages = compute_len / pagesize; char *data = gc(xmalloc(filesize)); ASSERT_SYS(0, filesize, getrandom(data, filesize, 0)); - ASSERT_SYS(0, 3, shm_open("tmpshmcstat", O_CREAT | O_RDWR, 0600)); + ASSERT_SYS(0, 3, shm_open(name, O_CREAT | O_RDWR, 0600)); ASSERT_SYS(0, 0, ftruncate(3, filesize)); ASSERT_SYS(0, filesize, write(3, data, filesize)); struct cachestat_range range = {pagesize, compute_len}; @@ -117,6 +120,6 @@ TEST(cachestat, testCachestatShmem) { ASSERT_SYS(0, 0, cachestat(3, &range, &cs, 0)); ASSERT_EQ(num_pages, cs.nr_cache + cs.nr_evicted, "total number of cached and evicted pages is off.\n"); - ASSERT_SYS(0, 0, shm_unlink("tmpshmcstat")); + ASSERT_SYS(0, 0, shm_unlink(name)); ASSERT_SYS(0, 0, close(3)); } diff --git a/test/libc/calls/raise_test.c b/test/libc/calls/raise_test.c index ee891715af3..481f207c336 100644 --- a/test/libc/calls/raise_test.c +++ b/test/libc/calls/raise_test.c @@ -56,9 +56,8 @@ int threadid; void WorkerQuit(int sig, siginfo_t *si, void *ctx) { ASSERT_EQ(SIGILL, sig); - if (!IsXnu() && !IsOpenbsd()) { + if (!IsXnu() && !IsOpenbsd()) ASSERT_EQ(SI_TKILL, si->si_code); - } ASSERT_EQ(threadid, gettid()); } diff --git a/test/libc/calls/shm_open_test.c b/test/libc/calls/shm_open_test.c index 3a83ea29836..1d8f71a2b24 100644 --- a/test/libc/calls/shm_open_test.c +++ b/test/libc/calls/shm_open_test.c @@ -9,6 +9,7 @@ #include "libc/dce.h" #include "libc/errno.h" #include "libc/runtime/runtime.h" +#include "libc/stdio/rand.h" #include "libc/stdio/stdio.h" #include "libc/str/str.h" #include "libc/sysv/consts/map.h" @@ -18,7 +19,6 @@ #include "libc/sysv/consts/sig.h" #include "libc/thread/semaphore.h" -#define SHM_PATH "/fc7261622dd420d8" #define STRING_SEND "hello" #define STRING_RECV "HELLO" @@ -29,13 +29,14 @@ struct shmbuf { char buf[256]; /* Data being transferred */ }; +char shm_path[64]; atomic_bool *ready; wontreturn void Bouncer(void) { /* Create shared memory object and set its size to the size of our structure. */ - int fd = shm_open(SHM_PATH, O_CREAT | O_EXCL | O_RDWR, S_IRUSR | S_IWUSR); + int fd = shm_open(shm_path, O_CREAT | O_EXCL | O_RDWR, S_IRUSR | S_IWUSR); if (fd == -1) { perror("shm_open(bouncer)"); exit(1); @@ -96,7 +97,7 @@ wontreturn void Sender(void) { /* Open the existing shared memory object and map it into the caller's address space. */ - int fd = shm_open(SHM_PATH, O_RDWR, 0); + int fd = shm_open(shm_path, O_RDWR, 0); if (fd == -1) { perror("shm_open(sender)"); exit(1); @@ -136,7 +137,7 @@ wontreturn void Sender(void) { /* Unlink the shared memory object. Even if the peer process is still using the object, this is okay. The object will be removed only after all open references are closed. */ - if (shm_unlink(SHM_PATH)) { + if (shm_unlink(shm_path)) { if (IsWindows() && errno == EACCES) { // TODO(jart): Make unlink() work better on Windows. } else { @@ -154,7 +155,7 @@ int pid2; void OnExit(void) { kill(pid1, SIGKILL); kill(pid2, SIGKILL); - shm_unlink(SHM_PATH); + shm_unlink(shm_path); } void OnTimeout(int sig) { @@ -164,6 +165,9 @@ void OnTimeout(int sig) { int main(int argc, char *argv[]) { + // create random shared memory name + sprintf(shm_path, "/shm_open_test-%ld", _rand64()); + // create synchronization object ready = _mapshared(1); diff --git a/test/posix/mutex_async_signal_safety_test.c b/test/posix/mutex_async_signal_safety_test.c deleted file mode 100644 index da6d2020b25..00000000000 --- a/test/posix/mutex_async_signal_safety_test.c +++ /dev/null @@ -1,114 +0,0 @@ -// Copyright 2024 Justine Alexandra Roberts Tunney -// -// Permission to use, copy, modify, and/or distribute this software for -// any purpose with or without fee is hereby granted, provided that the -// above copyright notice and this permission notice appear in all copies. -// -// THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL -// WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED -// WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE -// AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL -// DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR -// PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER -// TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR -// PERFORMANCE OF THIS SOFTWARE. - -#include -#include -#include -#include -#include -#include -#include - -// tests that recursive mutexes are implemented atomically -// -// glibc fails this test -// musl passes this test -// cosmo only guarantees this in process-shared non-debug mode - -atomic_bool done; -atomic_bool ready; -pthread_mutex_t lock; - -void hand(int sig) { - if (pthread_mutex_lock(&lock)) - _Exit(50); - if (pthread_mutex_unlock(&lock)) - _Exit(51); -} - -void* work(void* arg) { - ready = true; - while (!done) { - if (pthread_mutex_lock(&lock)) - _Exit(60); - if (pthread_mutex_unlock(&lock)) - _Exit(61); - } - return 0; -} - -int main() { - - if (IsQemuUser()) { - // qemu is believed to be the one at fault - kprintf("mutex_async_signal_safety_test flakes on qemu\n"); - return 0; - } - - if (IsModeDbg()) { - // the deadlock detector gets in the way of our glorious spin lock - kprintf("mutex_async_signal_safety_test not feasible in debug mode\n"); - return 0; - } - - struct sigaction sa; - sa.sa_handler = hand; - sa.sa_flags = SA_NODEFER; - sigemptyset(&sa.sa_mask); - if (sigaction(SIGUSR1, &sa, 0)) - _Exit(1); - - pthread_mutexattr_t attr; - if (pthread_mutexattr_init(&attr)) - _Exit(2); - if (pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_RECURSIVE)) - _Exit(3); - if (pthread_mutexattr_setpshared(&attr, PTHREAD_PROCESS_SHARED)) - _Exit(3); - if (pthread_mutex_init(&lock, &attr)) - _Exit(4); - if (pthread_mutexattr_destroy(&attr)) - _Exit(5); - - pthread_t th; - pthread_attr_t tattr; - if (pthread_attr_init(&tattr)) - _Exit(6); - if (pthread_attr_setstacksize(&tattr, 8 * 1024 * 1024)) - _Exit(7); - if (pthread_attr_setguardsize(&tattr, 64 * 1024)) - _Exit(8); - if (pthread_create(&th, &tattr, work, 0)) - _Exit(9); - if (pthread_attr_destroy(&tattr)) - _Exit(10); - for (;;) - if (ready) - break; - - for (int i = 0; i < 100; ++i) { - if (pthread_kill(th, SIGUSR1)) - _Exit(11); - if (pthread_kill(th, SIGUSR1)) - _Exit(12); - usleep(1); - } - - done = true; - if (pthread_join(th, 0)) - _Exit(13); - if (pthread_mutex_destroy(&lock)) - _Exit(14); -} diff --git a/tool/build/BUILD.mk b/tool/build/BUILD.mk index afd949f857e..2d37a2bd0ed 100644 --- a/tool/build/BUILD.mk +++ b/tool/build/BUILD.mk @@ -138,8 +138,8 @@ o/$(MODE)/tool/build/dso/dlopen_helper.so: \ o/$(MODE)/tool/build/dso/dlopen_helper.o \ $(OUTPUT_OPTION) -o/$(MODE)/tool/build/dlopen_test.runs: \ - o/$(MODE)/tool/build/dlopen_test \ +o/$(MODE)/tool/build/dlopen_tester.runs: \ + o/$(MODE)/tool/build/dlopen_tester \ o/$(MODE)/tool/build/dso/dlopen_helper.so $< o/$(MODE)/tool/build/dso/dlopen_helper.so diff --git a/tool/build/dlopen_test.c b/tool/build/dlopen_tester.c similarity index 100% rename from tool/build/dlopen_test.c rename to tool/build/dlopen_tester.c diff --git a/tool/scripts/flakes b/tool/scripts/flakes new file mode 100755 index 00000000000..315cb24c401 --- /dev/null +++ b/tool/scripts/flakes @@ -0,0 +1,60 @@ +#!/usr/bin/env python3 +import os +import sys +import subprocess +import concurrent.futures +from collections import Counter +from typing import List, Dict, Tuple + +NUM_PARALLEL = int(os.cpu_count() * 1.5) + +def find_test_files(root_dir: str) -> List[str]: + """Find all executable files ending with _test recursively.""" + test_files = [] + for root, _, files in os.walk(root_dir): + for file in files: + if file.endswith('_test'): + file_path = os.path.join(root, file) + if os.access(file_path, os.X_OK): + test_files.append(file_path) + return test_files + +def run_single_test(test_path: str) -> int: + """Run a single test and return its exit code.""" + try: + result = subprocess.run([test_path], capture_output=False) + return result.returncode + except Exception as e: + print(f"Error running {test_path}: {e}") + return -1 + +def run_test_multiple_times(test_path: str, iterations: int = NUM_PARALLEL) -> List[int]: + """Run a test multiple times in parallel and collect exit codes.""" + with concurrent.futures.ProcessPoolExecutor() as executor: + futures = [executor.submit(run_single_test, test_path) for _ in range(iterations)] + return [f.result() for f in concurrent.futures.as_completed(futures)] + +def analyze_results(test_path: str, exit_codes: List[int]) -> Tuple[bool, Dict[int, int]]: + """Analyze test results and return if it flaked and error distribution.""" + error_counts = Counter(code for code in exit_codes if code != 0) + return bool(error_counts), dict(error_counts) + +def print_flaky_report(test_path: str, error_distribution: Dict[int, int], total_runs: int): + """Print a report for a flaky test.""" + print(f"{test_path} flaked!") + for exit_code, count in error_distribution.items(): + print(f"* {count}/{total_runs} processes died with exit code {exit_code}") + +def main(directory = "o"): + test_files = find_test_files(directory) + for i, test_path in enumerate(test_files): + print("testing [%d/%d] %s..." % (i, len(test_files), test_path)) + sys.stdout.flush() + exit_codes = run_test_multiple_times(test_path) + is_flaky, error_distribution = analyze_results(test_path, exit_codes) + if is_flaky: + print_flaky_report(test_path, error_distribution, len(exit_codes)) + sys.exit(1) + +if __name__ == "__main__": + main(*sys.argv[1:])