Skip to content

Commit

Permalink
Build tool for hunting down flakes
Browse files Browse the repository at this point in the history
  • Loading branch information
jart committed Dec 24, 2024
1 parent 93e22c5 commit 2de3845
Show file tree
Hide file tree
Showing 10 changed files with 78 additions and 131 deletions.
3 changes: 1 addition & 2 deletions libc/calls/shm_path_np.c
Original file line number Diff line number Diff line change
Expand Up @@ -35,9 +35,8 @@ void shm_path_np(const char *name, char buf[hasatleast 78]) {
const char *a;
uint8_t digest[BLAKE2B256_DIGEST_LENGTH];
a = "/tmp/", n = 5;
if (IsLinux() && isdirectory("/dev/shm")) {
if (IsLinux() && isdirectory("/dev/shm"))
a = "/dev/shm/", n = 9;
}
BLAKE2B256(name, strlen(name), digest);
p = mempcpy(buf, a, n);
p = hexpcpy(p, digest, BLAKE2B256_DIGEST_LENGTH);
Expand Down
1 change: 0 additions & 1 deletion libc/intrin/pthread_mutex_lock.c
Original file line number Diff line number Diff line change
Expand Up @@ -242,7 +242,6 @@ static errno_t pthread_mutex_lock_impl(pthread_mutex_t *mutex,
*
* - `PTHREAD_RECURSIVE_MUTEX_INITIALIZER_NP`
* - `PTHREAD_ERRORCHECK_MUTEX_INITIALIZER_NP`
* - `PTHREAD_SIGNAL_SAFE_MUTEX_INITIALIZER_NP`
* - `PTHREAD_NORMAL_MUTEX_INITIALIZER_NP`
*
* Locking a mutex that's already locked by the calling thread will make
Expand Down
3 changes: 0 additions & 3 deletions libc/thread/thread.h
Original file line number Diff line number Diff line change
Expand Up @@ -52,9 +52,6 @@ COSMOPOLITAN_C_START_
#define PTHREAD_RECURSIVE_MUTEX_INITIALIZER_NP {0, PTHREAD_MUTEX_RECURSIVE}
#define PTHREAD_ERRORCHECK_MUTEX_INITIALIZER_NP {0, PTHREAD_MUTEX_ERRORCHECK}

#define PTHREAD_SIGNAL_SAFE_MUTEX_INITIALIZER_NP \
{0, PTHREAD_MUTEX_RECURSIVE | PTHREAD_PROCESS_SHARED}

#ifndef __cplusplus
#define _PTHREAD_ATOMIC(x) _Atomic(x)
#else
Expand Down
7 changes: 5 additions & 2 deletions test/libc/calls/cachestat_test.c
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
#include "libc/runtime/runtime.h"
#include "libc/runtime/sysconf.h"
#include "libc/stdio/rand.h"
#include "libc/stdio/stdio.h"
#include "libc/sysv/consts/auxv.h"
#include "libc/sysv/consts/o.h"
#include "libc/testlib/testlib.h"
Expand Down Expand Up @@ -104,19 +105,21 @@ TEST(cachestat, testCachestatSyncNoDirty) {
}

TEST(cachestat, testCachestatShmem) {
char name[64];
sprintf(name, "/cachestat_test-%ld", _rand64());
size_t filesize = 512 * 2 * pagesize; // 2 2MB huge pages.
size_t compute_len = 512 * pagesize;
unsigned long num_pages = compute_len / pagesize;
char *data = gc(xmalloc(filesize));
ASSERT_SYS(0, filesize, getrandom(data, filesize, 0));
ASSERT_SYS(0, 3, shm_open("tmpshmcstat", O_CREAT | O_RDWR, 0600));
ASSERT_SYS(0, 3, shm_open(name, O_CREAT | O_RDWR, 0600));
ASSERT_SYS(0, 0, ftruncate(3, filesize));
ASSERT_SYS(0, filesize, write(3, data, filesize));
struct cachestat_range range = {pagesize, compute_len};
struct cachestat cs;
ASSERT_SYS(0, 0, cachestat(3, &range, &cs, 0));
ASSERT_EQ(num_pages, cs.nr_cache + cs.nr_evicted,
"total number of cached and evicted pages is off.\n");
ASSERT_SYS(0, 0, shm_unlink("tmpshmcstat"));
ASSERT_SYS(0, 0, shm_unlink(name));
ASSERT_SYS(0, 0, close(3));
}
3 changes: 1 addition & 2 deletions test/libc/calls/raise_test.c
Original file line number Diff line number Diff line change
Expand Up @@ -56,9 +56,8 @@ int threadid;

void WorkerQuit(int sig, siginfo_t *si, void *ctx) {
ASSERT_EQ(SIGILL, sig);
if (!IsXnu() && !IsOpenbsd()) {
if (!IsXnu() && !IsOpenbsd())
ASSERT_EQ(SI_TKILL, si->si_code);
}
ASSERT_EQ(threadid, gettid());
}

Expand Down
14 changes: 9 additions & 5 deletions test/libc/calls/shm_open_test.c
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
#include "libc/dce.h"
#include "libc/errno.h"
#include "libc/runtime/runtime.h"
#include "libc/stdio/rand.h"
#include "libc/stdio/stdio.h"
#include "libc/str/str.h"
#include "libc/sysv/consts/map.h"
Expand All @@ -18,7 +19,6 @@
#include "libc/sysv/consts/sig.h"
#include "libc/thread/semaphore.h"

#define SHM_PATH "/fc7261622dd420d8"
#define STRING_SEND "hello"
#define STRING_RECV "HELLO"

Expand All @@ -29,13 +29,14 @@ struct shmbuf {
char buf[256]; /* Data being transferred */
};

char shm_path[64];
atomic_bool *ready;

wontreturn void Bouncer(void) {

/* Create shared memory object and set its size to the size
of our structure. */
int fd = shm_open(SHM_PATH, O_CREAT | O_EXCL | O_RDWR, S_IRUSR | S_IWUSR);
int fd = shm_open(shm_path, O_CREAT | O_EXCL | O_RDWR, S_IRUSR | S_IWUSR);
if (fd == -1) {
perror("shm_open(bouncer)");
exit(1);
Expand Down Expand Up @@ -96,7 +97,7 @@ wontreturn void Sender(void) {

/* Open the existing shared memory object and map it
into the caller's address space. */
int fd = shm_open(SHM_PATH, O_RDWR, 0);
int fd = shm_open(shm_path, O_RDWR, 0);
if (fd == -1) {
perror("shm_open(sender)");
exit(1);
Expand Down Expand Up @@ -136,7 +137,7 @@ wontreturn void Sender(void) {
/* Unlink the shared memory object. Even if the peer process
is still using the object, this is okay. The object will
be removed only after all open references are closed. */
if (shm_unlink(SHM_PATH)) {
if (shm_unlink(shm_path)) {
if (IsWindows() && errno == EACCES) {
// TODO(jart): Make unlink() work better on Windows.
} else {
Expand All @@ -154,7 +155,7 @@ int pid2;
void OnExit(void) {
kill(pid1, SIGKILL);
kill(pid2, SIGKILL);
shm_unlink(SHM_PATH);
shm_unlink(shm_path);
}

void OnTimeout(int sig) {
Expand All @@ -164,6 +165,9 @@ void OnTimeout(int sig) {

int main(int argc, char *argv[]) {

// create random shared memory name
sprintf(shm_path, "/shm_open_test-%ld", _rand64());

// create synchronization object
ready = _mapshared(1);

Expand Down
114 changes: 0 additions & 114 deletions test/posix/mutex_async_signal_safety_test.c

This file was deleted.

4 changes: 2 additions & 2 deletions tool/build/BUILD.mk
Original file line number Diff line number Diff line change
Expand Up @@ -138,8 +138,8 @@ o/$(MODE)/tool/build/dso/dlopen_helper.so: \
o/$(MODE)/tool/build/dso/dlopen_helper.o \
$(OUTPUT_OPTION)

o/$(MODE)/tool/build/dlopen_test.runs: \
o/$(MODE)/tool/build/dlopen_test \
o/$(MODE)/tool/build/dlopen_tester.runs: \
o/$(MODE)/tool/build/dlopen_tester \
o/$(MODE)/tool/build/dso/dlopen_helper.so
$< o/$(MODE)/tool/build/dso/dlopen_helper.so

Expand Down
File renamed without changes.
60 changes: 60 additions & 0 deletions tool/scripts/flakes
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
#!/usr/bin/env python3
import os
import sys
import subprocess
import concurrent.futures
from collections import Counter
from typing import List, Dict, Tuple

NUM_PARALLEL = int(os.cpu_count() * 1.5)

def find_test_files(root_dir: str) -> List[str]:
"""Find all executable files ending with _test recursively."""
test_files = []
for root, _, files in os.walk(root_dir):
for file in files:
if file.endswith('_test'):
file_path = os.path.join(root, file)
if os.access(file_path, os.X_OK):
test_files.append(file_path)
return test_files

def run_single_test(test_path: str) -> int:
"""Run a single test and return its exit code."""
try:
result = subprocess.run([test_path], capture_output=False)
return result.returncode
except Exception as e:
print(f"Error running {test_path}: {e}")
return -1

def run_test_multiple_times(test_path: str, iterations: int = NUM_PARALLEL) -> List[int]:
"""Run a test multiple times in parallel and collect exit codes."""
with concurrent.futures.ProcessPoolExecutor() as executor:
futures = [executor.submit(run_single_test, test_path) for _ in range(iterations)]
return [f.result() for f in concurrent.futures.as_completed(futures)]

def analyze_results(test_path: str, exit_codes: List[int]) -> Tuple[bool, Dict[int, int]]:
"""Analyze test results and return if it flaked and error distribution."""
error_counts = Counter(code for code in exit_codes if code != 0)
return bool(error_counts), dict(error_counts)

def print_flaky_report(test_path: str, error_distribution: Dict[int, int], total_runs: int):
"""Print a report for a flaky test."""
print(f"{test_path} flaked!")
for exit_code, count in error_distribution.items():
print(f"* {count}/{total_runs} processes died with exit code {exit_code}")

def main(directory = "o"):
test_files = find_test_files(directory)
for i, test_path in enumerate(test_files):
print("testing [%d/%d] %s..." % (i, len(test_files), test_path))
sys.stdout.flush()
exit_codes = run_test_multiple_times(test_path)
is_flaky, error_distribution = analyze_results(test_path, exit_codes)
if is_flaky:
print_flaky_report(test_path, error_distribution, len(exit_codes))
sys.exit(1)

if __name__ == "__main__":
main(*sys.argv[1:])

0 comments on commit 2de3845

Please sign in to comment.