From 327863d5db4bad30864410689b05d64a371afcae Mon Sep 17 00:00:00 2001 From: Justine Tunney Date: Wed, 25 Dec 2024 19:43:43 -0800 Subject: [PATCH] Reduce stack virtual memory consumption on Linux --- examples/greenbean.c | 2 +- examples/stackexplorer.c | 18 +- examples/thread.c | 17 ++ libc/calls/getrlimit.c | 4 +- libc/calls/setrlimit.c | 5 +- libc/cosmo.h | 4 +- libc/intrin/describemapflags.c | 26 ++- libc/intrin/describeprotflags.c | 2 + libc/intrin/getmainstack.c | 16 +- libc/{calls => intrin}/isqemu.c | 0 libc/intrin/lockless.h | 50 +++++ libc/intrin/maps.h | 10 +- libc/intrin/mmap.c | 50 +++-- libc/intrin/rlimit.h | 10 + libc/intrin/rlimitstack.c | 76 +++++++ libc/intrin/sig.c | 5 +- libc/intrin/stack.c | 257 +++++++++++++++++++----- libc/proc/proc.c | 3 +- libc/thread/itimer.c | 5 +- libc/thread/mapstack.c | 4 +- libc/thread/pthread_attr_getguardsize.c | 2 +- libc/thread/pthread_attr_getstack.c | 8 +- libc/thread/pthread_attr_init.c | 2 +- libc/thread/pthread_attr_setguardsize.c | 8 +- libc/thread/pthread_attr_setstack.c | 60 ++---- libc/thread/pthread_attr_setstacksize.c | 17 +- libc/thread/thread.h | 6 +- test/libc/intrin/stack_test.c | 75 +++++++ test/libc/thread/pthread_cancel_test.c | 2 + test/libc/thread/pthread_create_test.c | 1 + test/posix/signal_latency_test.c | 4 + 31 files changed, 583 insertions(+), 166 deletions(-) create mode 100644 examples/thread.c rename libc/{calls => intrin}/isqemu.c (100%) create mode 100644 libc/intrin/lockless.h create mode 100644 libc/intrin/rlimit.h create mode 100644 libc/intrin/rlimitstack.c create mode 100644 test/libc/intrin/stack_test.c diff --git a/examples/greenbean.c b/examples/greenbean.c index fda9ae99930..eca939a7ba3 100644 --- a/examples/greenbean.c +++ b/examples/greenbean.c @@ -337,7 +337,7 @@ int main(int argc, char *argv[]) { sigaddset(&block, SIGQUIT); pthread_attr_t attr; unassert(!pthread_attr_init(&attr)); - unassert(!pthread_attr_setstacksize(&attr, 65536)); + unassert(!pthread_attr_setstacksize(&attr, 65536 - getpagesize())); unassert(!pthread_attr_setguardsize(&attr, getpagesize())); unassert(!pthread_attr_setsigmask_np(&attr, &block)); unassert(!pthread_setcancelstate(PTHREAD_CANCEL_DISABLE, 0)); diff --git a/examples/stackexplorer.c b/examples/stackexplorer.c index 5b5d9add9ad..96c34114c03 100644 --- a/examples/stackexplorer.c +++ b/examples/stackexplorer.c @@ -7,9 +7,13 @@ │ • http://creativecommons.org/publicdomain/zero/1.0/ │ ╚─────────────────────────────────────────────────────────────────*/ #endif +#include "libc/dce.h" +#include "libc/intrin/maps.h" #include "libc/mem/alg.h" #include "libc/mem/mem.h" #include "libc/runtime/runtime.h" +#include "libc/runtime/stack.h" +#include "libc/runtime/winargs.internal.h" #include "libc/stdio/stdio.h" #include "libc/x/xasprintf.h" @@ -67,8 +71,18 @@ int main(int argc, char *argv[]) { Append((uintptr_t)&__auxv[i + 1], xasprintf("&auxv[%d] = %#lx", i + 1, __auxv[i + 1])); } + if (!IsWindows()) { + struct AddrSize stak = __get_main_stack(); + Append((intptr_t)stak.addr + stak.size, "top of stack"); + Append((intptr_t)stak.addr, "bottom of stack"); + } else { +#ifdef __x86_64__ + Append(GetStaticStackAddr(0) + GetStaticStackSize(), "top of stack"); + Append(GetStaticStackAddr(0) + GetGuardSize(), "bottom of stack"); + Append(GetStaticStackAddr(0), "bottom of guard region"); +#endif + } qsort(things.p, things.n, sizeof(*things.p), Compare); - for (int i = 0; i < things.n; ++i) { + for (int i = 0; i < things.n; ++i) printf("%012lx %s\n", things.p[i].i, things.p[i].s); - } } diff --git a/examples/thread.c b/examples/thread.c new file mode 100644 index 00000000000..283c2f8b09c --- /dev/null +++ b/examples/thread.c @@ -0,0 +1,17 @@ +#include +#include + +// how to spawn a thread + +void *my_thread(void *arg) { + printf("my_thread(%p) is running\n", arg); + return (void *)0x456L; +} + +int main(int argc, char *argv[]) { + void *res; + pthread_t th; + pthread_create(&th, 0, my_thread, (void *)0x123L); + pthread_join(th, &res); + printf("my_thread() returned %p\n", res); +} diff --git a/libc/calls/getrlimit.c b/libc/calls/getrlimit.c index de7df079e71..d2a826edacd 100644 --- a/libc/calls/getrlimit.c +++ b/libc/calls/getrlimit.c @@ -21,6 +21,7 @@ #include "libc/calls/syscall-sysv.internal.h" #include "libc/dce.h" #include "libc/intrin/describeflags.h" +#include "libc/intrin/rlimit.h" #include "libc/intrin/strace.h" #include "libc/runtime/runtime.h" #include "libc/runtime/stack.h" @@ -47,8 +48,7 @@ int getrlimit(int resource, struct rlimit *rlim) { } else if (!IsWindows()) { rc = sys_getrlimit(resource, rlim); } else if (resource == RLIMIT_STACK) { - rlim->rlim_cur = GetStaticStackSize(); - rlim->rlim_max = GetStaticStackSize(); + *rlim = __rlimit_stack_get(); rc = 0; } else if (resource == RLIMIT_AS) { rlim->rlim_cur = __virtualmax; diff --git a/libc/calls/setrlimit.c b/libc/calls/setrlimit.c index 6b832848941..0a2b12ffa7c 100644 --- a/libc/calls/setrlimit.c +++ b/libc/calls/setrlimit.c @@ -23,6 +23,7 @@ #include "libc/dce.h" #include "libc/errno.h" #include "libc/intrin/describeflags.h" +#include "libc/intrin/rlimit.h" #include "libc/intrin/strace.h" #include "libc/macros.h" #include "libc/runtime/runtime.h" @@ -88,10 +89,12 @@ int setrlimit(int resource, const struct rlimit *rlim) { } else if (!IsWindows() && !(IsNetbsd() && resource == RLIMIT_AS)) { rc = sys_setrlimit(resource, rlim); } else if (resource == RLIMIT_STACK) { - rc = enotsup(); + rc = 0; } else { rc = einval(); } + if (!rc && resource == RLIMIT_STACK) + __rlimit_stack_set(*rlim); // so __rlimit_stack_get() works on all OSes if (resource == RLIMIT_AS) { __virtualmax = rlim->rlim_cur; errno = olde; diff --git a/libc/cosmo.h b/libc/cosmo.h index d53c3045f34..e2691587a57 100644 --- a/libc/cosmo.h +++ b/libc/cosmo.h @@ -25,8 +25,8 @@ int cosmo_futex_wake(_COSMO_ATOMIC(int) *, int, char); int cosmo_futex_wait(_COSMO_ATOMIC(int) *, int, char, int, const struct timespec *); -errno_t cosmo_stack_alloc(unsigned *, unsigned *, void **) libcesque; -errno_t cosmo_stack_free(void *, unsigned, unsigned) libcesque; +errno_t cosmo_stack_alloc(size_t *, size_t *, void **) libcesque; +errno_t cosmo_stack_free(void *, size_t, size_t) libcesque; void cosmo_stack_clear(void) libcesque; void cosmo_stack_setmaxstacks(int) libcesque; int cosmo_stack_getmaxstacks(void) libcesque; diff --git a/libc/intrin/describemapflags.c b/libc/intrin/describemapflags.c index 9367ee08328..7d6461b1946 100644 --- a/libc/intrin/describemapflags.c +++ b/libc/intrin/describemapflags.c @@ -16,25 +16,29 @@ │ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │ │ PERFORMANCE OF THIS SOFTWARE. │ ╚─────────────────────────────────────────────────────────────────────────────*/ +#include "libc/dce.h" #include "libc/intrin/describeflags.h" #include "libc/macros.h" #include "libc/nt/enum/consolemodeflags.h" #include "libc/sysv/consts/map.h" #include "libc/sysv/consts/prot.h" +#define MAP_GROWSDOWN_LINUX 0x00000100 + const char *_DescribeMapFlags(char buf[64], int x) { const struct DescribeFlags kMapFlags[] = { - {MAP_PRIVATE, "PRIVATE"}, // - {MAP_ANONYMOUS, "ANONYMOUS"}, // - {MAP_SHARED, "SHARED"}, // - {MAP_FIXED, "FIXED"}, // - {MAP_FIXED_NOREPLACE, "FIXED_NOREPLACE"}, // - {MAP_HUGETLB, "HUGETLB"}, // - {MAP_CONCEAL, "CONCEAL"}, // - {MAP_LOCKED, "LOCKED"}, // - {MAP_NORESERVE, "NORESERVE"}, // - {MAP_NONBLOCK, "NONBLOCK"}, // - {MAP_POPULATE, "POPULATE"}, // + {MAP_PRIVATE, "PRIVATE"}, // + {MAP_ANONYMOUS, "ANONYMOUS"}, // + {MAP_SHARED, "SHARED"}, // + {MAP_FIXED, "FIXED"}, // + {MAP_FIXED_NOREPLACE, "FIXED_NOREPLACE"}, // + {MAP_HUGETLB, "HUGETLB"}, // + {MAP_CONCEAL, "CONCEAL"}, // + {MAP_LOCKED, "LOCKED"}, // + {MAP_NORESERVE, "NORESERVE"}, // + {MAP_NONBLOCK, "NONBLOCK"}, // + {MAP_POPULATE, "POPULATE"}, // + {IsLinux() ? MAP_GROWSDOWN_LINUX : 0, "GROWSDOWN"}, // }; return _DescribeFlags(buf, 64, kMapFlags, ARRAYLEN(kMapFlags), "MAP_", x); } diff --git a/libc/intrin/describeprotflags.c b/libc/intrin/describeprotflags.c index 44008757bab..9fad2bd3214 100644 --- a/libc/intrin/describeprotflags.c +++ b/libc/intrin/describeprotflags.c @@ -21,6 +21,8 @@ #include "libc/sysv/consts/prot.h" const char *_DescribeProtFlags(char buf[48], int x) { + if (!x) + return "PROT_NONE"; const struct DescribeFlags kProtFlags[] = { {PROT_READ, "READ"}, // {PROT_WRITE, "WRITE"}, // diff --git a/libc/intrin/getmainstack.c b/libc/intrin/getmainstack.c index 5aa21a6d6f8..afcf18e5a4b 100644 --- a/libc/intrin/getmainstack.c +++ b/libc/intrin/getmainstack.c @@ -17,16 +17,13 @@ │ PERFORMANCE OF THIS SOFTWARE. │ ╚─────────────────────────────────────────────────────────────────────────────*/ #include "libc/calls/struct/rlimit.h" -#include "libc/calls/struct/rlimit.internal.h" -#include "libc/dce.h" #include "libc/intrin/getauxval.h" -#include "libc/intrin/kprintf.h" #include "libc/intrin/maps.h" +#include "libc/intrin/rlimit.h" #include "libc/macros.h" #include "libc/runtime/runtime.h" +#include "libc/stdio/sysparam.h" #include "libc/sysv/consts/auxv.h" -#include "libc/sysv/consts/rlim.h" -#include "libc/sysv/consts/rlimit.h" // Hack for guessing boundaries of _start()'s stack // @@ -91,12 +88,9 @@ static uintptr_t __get_main_top(int pagesz) { } static size_t __get_stack_size(int pagesz, uintptr_t start, uintptr_t top) { - size_t size, max = 8 * 1024 * 1024; - struct rlimit rlim = {RLIM_INFINITY}; - sys_getrlimit(RLIMIT_STACK, &rlim); - if ((size = rlim.rlim_cur) > max) - size = max; - return MAX(ROUNDUP(size, pagesz), ROUNDUP(top - start, pagesz)); + size_t stacksz = __rlimit_stack_get().rlim_cur; + stacksz = MIN(stacksz, 1024ul * 1024 * 1024 * 1024); + return MAX(ROUNDDOWN(stacksz, pagesz), ROUNDUP(top - start, pagesz)); } /** diff --git a/libc/calls/isqemu.c b/libc/intrin/isqemu.c similarity index 100% rename from libc/calls/isqemu.c rename to libc/intrin/isqemu.c diff --git a/libc/intrin/lockless.h b/libc/intrin/lockless.h new file mode 100644 index 00000000000..7855f16c234 --- /dev/null +++ b/libc/intrin/lockless.h @@ -0,0 +1,50 @@ +#ifndef COSMOPOLITAN_LIBC_INTRIN_LOCKLESS_H_ +#define COSMOPOLITAN_LIBC_INTRIN_LOCKLESS_H_ +#include "libc/atomic.h" +#include "libc/intrin/atomic.h" +COSMOPOLITAN_C_START_ + +// lockless memory transactions +// +// - one writer +// - many readers +// - generation is monotonic +// - even numbers mean memory is ready +// - odd numbers mean memory is actively being changed +// - always use acquire semantics inside your read transaction +// +// let's say you want to be able to atomically read and write to 128-bit +// values, but you've only got a 64-bit system. if you expect that it'll +// frequently written, then you should use a mutex. but if you expect it +// to be frequently read and rarely written, then it's possible to do it +// without a mutex; in fact you don't even need the x86 lock instruction +// prefix; all that is required is a series of carefully ordered mov ops +// which are designed to exploit the strong ordering of the architecture + +static inline unsigned lockless_write_begin(atomic_uint* genptr) { + unsigned gen = atomic_load_explicit(genptr, memory_order_acquire); + atomic_store_explicit(genptr, gen + 1, memory_order_release); + return gen; +} + +static inline void lockless_write_end(atomic_uint* genptr, unsigned gen) { + atomic_store_explicit(genptr, gen + 2, memory_order_release); +} + +static inline unsigned lockless_read_begin(atomic_uint* genptr) { + return atomic_load_explicit(genptr, memory_order_acquire); +} + +static inline bool lockless_read_end(atomic_uint* genptr, unsigned* want) { + unsigned gen1 = *want; + unsigned gen2 = atomic_load_explicit(genptr, memory_order_acquire); + unsigned is_being_actively_changed = gen1 & 1; + unsigned we_lost_race_with_writers = gen1 ^ gen2; + if (!(is_being_actively_changed | we_lost_race_with_writers)) + return true; + *want = gen2; + return false; +} + +COSMOPOLITAN_C_END_ +#endif /* COSMOPOLITAN_LIBC_INTRIN_LOCKLESS_H_ */ diff --git a/libc/intrin/maps.h b/libc/intrin/maps.h index ad439448d2e..c8291f6ac9b 100644 --- a/libc/intrin/maps.h +++ b/libc/intrin/maps.h @@ -57,7 +57,8 @@ void *__maps_randaddr(void); void __maps_add(struct Map *); void __maps_free(struct Map *); void __maps_insert(struct Map *); -bool __maps_track(char *, size_t); +int __maps_untrack(char *, size_t); +bool __maps_track(char *, size_t, int, int); struct Map *__maps_alloc(void); struct Map *__maps_floor(const char *); void __maps_stack(char *, int, int, size_t, int, intptr_t); @@ -78,6 +79,13 @@ static inline struct Map *__maps_next(struct Map *map) { return 0; } +static inline struct Map *__maps_prev(struct Map *map) { + struct Tree *node; + if ((node = tree_prev(&map->tree))) + return MAP_TREE_CONTAINER(node); + return 0; +} + static inline struct Map *__maps_first(void) { struct Tree *node; if ((node = tree_first(__maps.maps))) diff --git a/libc/intrin/mmap.c b/libc/intrin/mmap.c index c35e83466e4..af4870ee967 100644 --- a/libc/intrin/mmap.c +++ b/libc/intrin/mmap.c @@ -41,7 +41,7 @@ #include "libc/sysv/consts/prot.h" #include "libc/sysv/errfuns.h" -#define MMDEBUG 0 +#define MMDEBUG 1 #define MAX_SIZE 0x0ff800000000ul #define MAX_TRIES 50 @@ -85,7 +85,8 @@ privileged optimizespeed struct Map *__maps_floor(const char *addr) { return 0; } -static bool __maps_overlaps(const char *addr, size_t size, int pagesz) { +static bool __maps_overlaps(const char *addr, size_t size) { + int pagesz = __pagesize; struct Map *map, *floor = __maps_floor(addr); for (map = floor; map && map->addr <= addr + size; map = __maps_next(map)) if (MAX(addr, map->addr) < @@ -305,27 +306,39 @@ void __maps_insert(struct Map *map) { } static void __maps_track_insert(struct Map *map, char *addr, size_t size, - uintptr_t map_handle) { + uintptr_t map_handle, int prot, int flags) { map->addr = addr; map->size = size; - map->prot = PROT_READ | PROT_WRITE; - map->flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_NOFORK; + map->prot = prot; + map->flags = flags; map->hand = map_handle; __maps_lock(); + ASSERT(!__maps_overlaps(addr, size)); __maps_insert(map); __maps_unlock(); } -bool __maps_track(char *addr, size_t size) { +// adds interval to rbtree (no sys_mmap) +bool __maps_track(char *addr, size_t size, int prot, int flags) { struct Map *map; do { if (!(map = __maps_alloc())) return false; } while (map == MAPS_RETRY); - __maps_track_insert(map, addr, size, -1); + __maps_track_insert(map, addr, size, -1, prot, flags); return true; } +// removes interval from rbtree (no sys_munmap) +int __maps_untrack(char *addr, size_t size) { + struct Map *deleted = 0; + __maps_lock(); + int rc = __muntrack(addr, size, __pagesize, &deleted); + __maps_unlock(); + __maps_free_all(deleted); + return rc; +} + struct Map *__maps_alloc(void) { struct Map *map; uintptr_t tip = atomic_load_explicit(&__maps.freed, memory_order_relaxed); @@ -342,7 +355,9 @@ struct Map *__maps_alloc(void) { if (sys.addr == MAP_FAILED) return 0; map = sys.addr; - __maps_track_insert(map, sys.addr, gransz, sys.maphandle); + __maps_track_insert(map, sys.addr, gransz, sys.maphandle, + PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS | MAP_NOFORK); for (int i = 1; i < gransz / sizeof(struct Map); ++i) __maps_free(map + i); return MAPS_RETRY; @@ -370,7 +385,7 @@ static int __munmap(char *addr, size_t size) { size_t pgup_size = (size + pagesz - 1) & -pagesz; size_t grup_size = (size + gransz - 1) & -gransz; if (grup_size > pgup_size) - if (__maps_overlaps(addr + pgup_size, grup_size - pgup_size, pagesz)) { + if (__maps_overlaps(addr + pgup_size, grup_size - pgup_size)) { __maps_unlock(); return einval(); } @@ -420,7 +435,7 @@ static void *__maps_pickaddr(size_t size) { __maps.pick = 0; if (!addr) addr = __maps_randaddr(); - if (!__maps_overlaps(addr, size, __pagesize)) { + if (!__maps_overlaps(addr, size)) { __maps.pick = addr + ((size + __gransize - 1) & -__gransize); __maps_unlock(); return addr; @@ -455,7 +470,7 @@ static void *__mmap_chunk(void *addr, size_t size, int prot, int flags, int fd, sysflags |= MAP_FIXED_NOREPLACE_linux; } else if (IsFreebsd() || IsNetbsd()) { sysflags |= MAP_FIXED; - if (__maps_overlaps(addr, size, pagesz)) { + if (__maps_overlaps(addr, size)) { __maps_free(map); return (void *)eexist(); } @@ -508,11 +523,8 @@ static void *__mmap_chunk(void *addr, size_t size, int prot, int flags, int fd, } // untrack mapping we blew away - if (!IsWindows() && should_untrack) { - struct Map *deleted = 0; - __muntrack(res.addr, size, pagesz, &deleted); - __maps_free_all(deleted); - } + if (!IsWindows() && should_untrack) + __maps_untrack(res.addr, size); // track map object map->addr = res.addr; @@ -599,8 +611,8 @@ static void *__mremap_impl(char *old_addr, size_t old_size, size_t new_size, size_t pgup_old_size = (old_size + pagesz - 1) & -pagesz; size_t grup_old_size = (old_size + gransz - 1) & -gransz; if (grup_old_size > pgup_old_size) - if (__maps_overlaps(old_addr + pgup_old_size, grup_old_size - pgup_old_size, - pagesz)) + if (__maps_overlaps(old_addr + pgup_old_size, + grup_old_size - pgup_old_size)) return (void *)einval(); old_size = pgup_old_size; @@ -611,7 +623,7 @@ static void *__mremap_impl(char *old_addr, size_t old_size, size_t new_size, size_t grup_new_size = (new_size + gransz - 1) & -gransz; if (grup_new_size > pgup_new_size) if (__maps_overlaps(new_addr + pgup_new_size, - grup_new_size - pgup_new_size, pagesz)) + grup_new_size - pgup_new_size)) return (void *)einval(); } diff --git a/libc/intrin/rlimit.h b/libc/intrin/rlimit.h new file mode 100644 index 00000000000..05d0fb96ee3 --- /dev/null +++ b/libc/intrin/rlimit.h @@ -0,0 +1,10 @@ +#ifndef COSMOPOLITAN_LIBC_INTRIN_RLIMIT_H_ +#define COSMOPOLITAN_LIBC_INTRIN_RLIMIT_H_ +#include "libc/calls/struct/rlimit.h" +COSMOPOLITAN_C_START_ + +void __rlimit_stack_set(struct rlimit); +struct rlimit __rlimit_stack_get(void); + +COSMOPOLITAN_C_END_ +#endif /* COSMOPOLITAN_LIBC_INTRIN_RLIMIT_H_ */ diff --git a/libc/intrin/rlimitstack.c b/libc/intrin/rlimitstack.c new file mode 100644 index 00000000000..66f47c64a9f --- /dev/null +++ b/libc/intrin/rlimitstack.c @@ -0,0 +1,76 @@ +/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│ +│ vi: set et ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi │ +╞══════════════════════════════════════════════════════════════════════════════╡ +│ Copyright 2024 Justine Alexandra Roberts Tunney │ +│ │ +│ Permission to use, copy, modify, and/or distribute this software for │ +│ any purpose with or without fee is hereby granted, provided that the │ +│ above copyright notice and this permission notice appear in all copies. │ +│ │ +│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL │ +│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED │ +│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE │ +│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL │ +│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR │ +│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER │ +│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │ +│ PERFORMANCE OF THIS SOFTWARE. │ +╚─────────────────────────────────────────────────────────────────────────────*/ +#include "libc/atomic.h" +#include "libc/calls/struct/rlimit.h" +#include "libc/calls/struct/rlimit.internal.h" +#include "libc/cosmo.h" +#include "libc/dce.h" +#include "libc/intrin/cxaatexit.h" +#include "libc/intrin/lockless.h" +#include "libc/intrin/rlimit.h" +#include "libc/runtime/stack.h" +#include "libc/sysv/consts/rlim.h" +#include "libc/sysv/consts/rlimit.h" + +struct atomic_rlimit { + atomic_ulong cur; + atomic_ulong max; + atomic_uint once; + atomic_uint gen; +}; + +static struct atomic_rlimit __rlimit_stack; + +static void __rlimit_stack_init(void) { + struct rlimit rlim; + if (IsWindows()) { + rlim.rlim_cur = GetStaticStackSize(); + rlim.rlim_max = -1; // RLIM_INFINITY in consts.sh + } else { + sys_getrlimit(RLIMIT_STACK, &rlim); + } + atomic_init(&__rlimit_stack.cur, rlim.rlim_cur); + atomic_init(&__rlimit_stack.max, rlim.rlim_max); +} + +struct rlimit __rlimit_stack_get(void) { + unsigned gen; + unsigned long cur, max; + cosmo_once(&__rlimit_stack.once, __rlimit_stack_init); + gen = lockless_read_begin(&__rlimit_stack.gen); + do { + cur = atomic_load_explicit(&__rlimit_stack.cur, memory_order_acquire); + max = atomic_load_explicit(&__rlimit_stack.max, memory_order_acquire); + } while (!lockless_read_end(&__rlimit_stack.gen, &gen)); + return (struct rlimit){cur, max}; +} + +void __rlimit_stack_set(struct rlimit rlim) { + unsigned gen; + unsigned long cur, max; + cosmo_once(&__rlimit_stack.once, __rlimit_stack_init); + __cxa_lock(); + cur = rlim.rlim_cur; + max = rlim.rlim_max; + gen = lockless_write_begin(&__rlimit_stack.gen); + atomic_store_explicit(&__rlimit_stack.cur, cur, memory_order_release); + atomic_store_explicit(&__rlimit_stack.max, max, memory_order_release); + lockless_write_end(&__rlimit_stack.gen, gen); + __cxa_unlock(); +} diff --git a/libc/intrin/sig.c b/libc/intrin/sig.c index 5a77cfe9bc1..b49356a5398 100644 --- a/libc/intrin/sig.c +++ b/libc/intrin/sig.c @@ -53,6 +53,8 @@ #include "libc/runtime/internal.h" #include "libc/runtime/symbols.internal.h" #include "libc/str/str.h" +#include "libc/sysv/consts/map.h" +#include "libc/sysv/consts/prot.h" #include "libc/sysv/consts/sa.h" #include "libc/sysv/consts/sicode.h" #include "libc/sysv/consts/ss.h" @@ -680,7 +682,8 @@ textwindows dontinstrument static uint32_t __sig_worker(void *arg) { __bootstrap_tls(&tls, __builtin_frame_address(0)); char *sp = __builtin_frame_address(0); __maps_track((char *)(((uintptr_t)sp + __pagesize - 1) & -__pagesize) - STKSZ, - STKSZ); + STKSZ, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS | MAP_NOFORK); for (;;) { _pthread_mutex_lock(&__sig_worker_lock); diff --git a/libc/intrin/stack.c b/libc/intrin/stack.c index d1a1320a68f..9a1e666450b 100644 --- a/libc/intrin/stack.c +++ b/libc/intrin/stack.c @@ -23,9 +23,16 @@ #include "libc/calls/syscall-sysv.internal.h" #include "libc/cosmo.h" #include "libc/dce.h" +#include "libc/dlopen/dlfcn.h" #include "libc/errno.h" +#include "libc/intrin/describeflags.h" #include "libc/intrin/dll.h" +#include "libc/intrin/maps.h" +#include "libc/intrin/rlimit.h" +#include "libc/intrin/strace.h" +#include "libc/intrin/weaken.h" #include "libc/runtime/runtime.h" +#include "libc/sock/internal.h" #include "libc/sysv/consts/map.h" #include "libc/sysv/consts/prot.h" #include "libc/thread/posixthread.internal.h" @@ -35,6 +42,11 @@ * @fileoverview cosmo stack memory manager */ +#define MAP_GROWSDOWN_LINUX 0x00000100 +#define MAP_ANONYMOUS_LINUX 0x00000020 +#define MAP_NOREPLACE_LINUX 0x08000000 +#define MAP_NORESERVE_LINUX 0x00004000 + #define MAP_ANON_OPENBSD 0x1000 #define MAP_STACK_OPENBSD 0x4000 @@ -43,8 +55,8 @@ struct CosmoStack { struct Dll elem; void *stackaddr; - unsigned stacksize; - unsigned guardsize; + size_t stacksize; + size_t guardsize; }; struct CosmoStacks { @@ -79,10 +91,133 @@ void cosmo_stack_wipe(void) { _pthread_mutex_wipe_np(&cosmo_stacks.lock); } -static errno_t cosmo_stack_munmap(void *addr, size_t size) { +// map_growsdown will not grow more than rlimit_stack +static size_t cosmo_stack_maxgrow(void) { + return __rlimit_stack_get().rlim_cur & -__pagesize; +} + +// allocates private anonymous fixed noreplace memory on linux +static void *flixmap(void *addr, size_t size, int prot, int flags) { + flags |= MAP_PRIVATE | MAP_ANONYMOUS_LINUX | MAP_NOREPLACE_LINUX; + void *res = __sys_mmap(addr, size, prot, flags, -1, 0, 0); + if (res != MAP_FAILED) { + if (res != addr) { + sys_munmap(addr, size); + errno = EEXIST; // polyfill linux 4.17+ behavior + res = 0; + } + } else { + res = 0; + } + STRACE("mmap(%p, %'zu, %s, %s) → %p% m", addr, size, DescribeProtFlags(prot), + DescribeMapFlags(flags), res); + return res; +} + +// maps stack on linux +static void *slackmap(size_t stacksize, size_t guardsize) { + int olde = errno; + struct Map *prev, *map; + char *max = (char *)0x7fffffffffff; + size_t need = guardsize + stacksize; + __maps_lock(); + for (;;) { + + // look for empty space beneath higher mappings + char *region = 0; + for (map = __maps_floor(max); map; map = prev) { + char *min = (char *)(intptr_t)__pagesize; + if ((prev = __maps_prev(map))) + min = prev->addr + prev->size; + if (map->addr - min >= need) { + region = map->addr - need; + max = region - 1; + break; + } + } + if (!region) + break; + + // track intended memory in rbtree + if (!__maps_track(region, guardsize, PROT_NONE, + MAP_PRIVATE | MAP_ANONYMOUS_LINUX)) + break; + if (!__maps_track(region + guardsize, stacksize, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS_LINUX)) { + __maps_untrack(region, need); + break; + } + __maps_unlock(); + + // ask kernel to create guard region + // taking special care to not clobber untracked mappings + // + // it's important that this call happen first, since it limits how + // much memory map_growsdown will secretly consume. if there's + // nothing beneath a map_growsdown mapping, then the kernel reserves + // (and this isn't listed /proc/PID/maps so don't bother looking) + // `rlimit_stack.rlim_cur & -__pagesize` bytes of memory including + // this top-most page, and another 1mb of guard pages beneath that. + // but by mapping our guard pages manually, we ensure the guard + // region and the stack itself will be exactly as big as we want. + // + // you'd think we could mmap(0, pagesz, growsdown) to let the kernel + // pick an address and then we could just upscale the user's stack + // size request to whatever rlimit_stack is if it's bigger. but the + // linux kernel will actually choose addresses between existing maps + // where the hole is smaller than rlimit_stack. + // + // to use map_growsdown, we must use map_fixed. normally when we use + // map_fixed, we reserve an entire kernel-assigned region beforehand + // to ensure there isn't any overlap with existing mappings. however + // since growsdown stops growing when it encounters another mapping, + // you can't map it on top of a reservation mapping. so we must take + // a leap of faith there aren't any mystery mappings twixt the guard + // region and growsdown page below. + char *guard_region = + flixmap(region, guardsize, PROT_NONE, MAP_NORESERVE_LINUX); + if (!guard_region) { + RecoverFromMmapFailure: + if (errno != EEXIST) { + // mmap() probably raised enomem due to rlimit_as etc. + __maps_untrack(region, need); + return 0; + } else { + // we've encountered a mystery mapping. it's hard to imagine + // this happening, since we don't use map_growsdown when + // cosmo_dlopen() is linked in the binary. in that case, the + // tracker we created covers at least some of the rogue map, + // therefore this issue should fix itself if we keep going + errno = olde; + __maps_lock(); + ++max; + continue; + } + } + + // ask kernel to create stack pages + // taking special care to not clobber untracked mappings + char *top_page = flixmap(region + need - __pagesize, __pagesize, + PROT_READ | PROT_WRITE, MAP_GROWSDOWN_LINUX); + if (!top_page) { + sys_munmap(region, guardsize); + goto RecoverFromMmapFailure; + } + + // return address to bottom of stack + return region + guardsize; + } + __maps_unlock(); + errno = ENOMEM; + return 0; +} + +static errno_t cosmo_stack_munmap(char *stackaddr, size_t stacksize, + size_t guardsize) { errno_t r = 0; errno_t e = errno; - if (!munmap(addr, size)) { + if (!munmap(stackaddr - guardsize, // + guardsize + stacksize)) { r = errno; errno = e; } @@ -119,7 +254,8 @@ static void cosmo_stack_rehabilitate(struct Dll *stacks) { struct Dll *e; for (e = dll_first(stacks); e; e = dll_next(stacks, e)) cosmo_stack_munmap(THREADSTACK_CONTAINER(e)->stackaddr, - THREADSTACK_CONTAINER(e)->stacksize); + THREADSTACK_CONTAINER(e)->stacksize, + THREADSTACK_CONTAINER(e)->guardsize); cosmo_stack_lock(); dll_make_first(&cosmo_stacks.objects, stacks); cosmo_stack_unlock(); @@ -193,39 +329,41 @@ void cosmo_stack_setmaxstacks(int maxstacks) { * abstract all the gory details of gaining authorized memory, and * additionally implements caching for lightning fast performance. * - * The stack size must be nonzero. It is rounded up to the granularity - * of the underlying system allocator, which is normally the page size. - * Your parameter will be updated with the selected value upon success. + * The stack size must be nonzero. It specifies the minimum amount of + * stack space that will be available for use. The provided value is + * rounded up to the system page size. It may be increased further for + * various reasons. Your stack size parameter will be updated with the + * chosen value upon success. * - * The guard size specifies how much memory should be protected at the - * bottom of your stack. This is helpful for ensuring stack overflows - * will result in a segmentation fault, rather than corrupting memory - * silently. This may be set to zero, in which case no guard pages will - * be protected. This value is rounded up to the system page size. The - * corrected value will be returned upon success. Your guard size needs - * to be small enough to leave room for at least one memory page in your - * stack size i.e. `guardsize + pagesize <= stacksize` must be the case. - * Otherwise this function will return an `EINVAL` error. + * The guard size specifies the minimum amount of memory that should be + * protected beneath your stack. This helps ensure stack overflows cause + * a segfault rather than corrupting memory silently. This may be set to + * zero in which case no guard pages will be made. This value is rounded + * up to the system page size. The corrected value will be returned upon + * success. Your guard size needs to be small enough to leave room for + * at least one memory page in your stack size i.e. `guardsize + + * pagesize <= stacksize` must be the case. Otherwise this function will + * return an `EINVAL` error. * * When you're done using your stack, pass it to cosmo_stack_free() so * it can be recycled. Stacks are only recycled when the `stacksize` and - * `guardsize` parameters are an exact match after correction. Otherwise - * they'll likely be freed eventually, in a least-recently used fashion, - * based upon the configurable cosmo_stack_setmaxstacks() setting. + * `guardsize` parameters match the constraints described above. Stacks + * that don't end up getting reused will be freed eventually, in a least + * recently used way based upon your cosmo_stack_setmaxstacks() setting. * * This function returns 0 on success, or an errno on error. See the * documentation of mmap() for a list possible errors that may occur. */ -errno_t cosmo_stack_alloc(unsigned *inout_stacksize, // - unsigned *inout_guardsize, // +errno_t cosmo_stack_alloc(size_t *inout_stacksize, // + size_t *inout_guardsize, // void **out_addr) { // validate arguments - unsigned stacksize = *inout_stacksize; - unsigned guardsize = *inout_guardsize; - stacksize = (stacksize + __gransize - 1) & -__gransize; + size_t stacksize = *inout_stacksize; + size_t guardsize = *inout_guardsize; + stacksize = (stacksize + __pagesize - 1) & -__pagesize; guardsize = (guardsize + __pagesize - 1) & -__pagesize; - if (guardsize + __pagesize > stacksize) + if (!stacksize) return EINVAL; // recycle stack @@ -236,8 +374,10 @@ errno_t cosmo_stack_alloc(unsigned *inout_stacksize, // struct CosmoStack *ts = THREADSTACK_CONTAINER(e); if (ts->stacksize == stacksize && // ts->guardsize == guardsize) { - dll_remove(&cosmo_stacks.stacks, e); stackaddr = ts->stackaddr; + stacksize = ts->stacksize; + guardsize = ts->guardsize; + dll_remove(&cosmo_stacks.stacks, e); dll_make_first(&cosmo_stacks.objects, e); --cosmo_stacks.count; break; @@ -247,20 +387,37 @@ errno_t cosmo_stack_alloc(unsigned *inout_stacksize, // // create stack if (!stackaddr) { - errno_t e = errno; - stackaddr = mmap(0, stacksize, PROT_READ | PROT_WRITE, - MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); - if (stackaddr == MAP_FAILED) { - errno_t err = errno; - errno = e; - return err; + errno_t olde = errno; + if (!IsTiny() && IsLinux() && guardsize && !_weaken(cosmo_dlopen) && + stacksize <= cosmo_stack_maxgrow() && !IsQemuUser()) { + // this special linux-only stack allocator significantly reduces + // the consumption of virtual memory. + if (!(stackaddr = slackmap(stacksize, guardsize))) { + errno_t err = errno; + errno = olde; + return err; + } + } else { + char *map = mmap(0, guardsize + stacksize, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + if (map == MAP_FAILED) { + errno_t err = errno; + errno = olde; + return err; + } + stackaddr = map + guardsize; + if (IsOpenbsd()) + if (!TellOpenbsdThisIsStackMemory(stackaddr, stacksize)) + notpossible; + if (guardsize) { + if (mprotect(map, guardsize, PROT_NONE | PROT_GUARD)) { + errno_t err = errno; + munmap(map, guardsize + stacksize); + errno = olde; + return err; + } + } } - if (IsOpenbsd()) - if (!TellOpenbsdThisIsStackMemory(stackaddr, stacksize)) - notpossible; - if (guardsize) - if (mprotect(stackaddr, guardsize, PROT_NONE | PROT_GUARD)) - notpossible; } // return stack @@ -277,20 +434,22 @@ static void cosmo_stack_setup(void) { /** * Frees stack memory. * - * While not strictly required, it's assumed these three values would be - * those returned by an earlier call to cosmo_stack_alloc(). + * While not strictly required, it's assumed the three parameters are + * those returned by an earlier call to cosmo_stack_alloc(). If they + * aren't page aligned and rounded, this function will return EINVAL. * * This function returns 0 on success, or an errno on error. The `errno` * variable is never clobbered. You can only dependably count on this to * return an error on failure when you say `cosmo_stack_setmaxstacks(0)` */ -errno_t cosmo_stack_free(void *stackaddr, unsigned stacksize, - unsigned guardsize) { - stacksize = (stacksize + __gransize - 1) & -__gransize; - guardsize = (guardsize + __pagesize - 1) & -__pagesize; - if (guardsize + __pagesize > stacksize) +errno_t cosmo_stack_free(void *stackaddr, size_t stacksize, size_t guardsize) { + if (!stacksize) + return EINVAL; + if (stacksize & (__pagesize - 1)) + return EINVAL; + if (guardsize & (__pagesize - 1)) return EINVAL; - if ((uintptr_t)stackaddr & (__gransize - 1)) + if ((uintptr_t)stackaddr & (__pagesize - 1)) return EINVAL; cosmo_stack_lock(); struct Dll *surplus = 0; @@ -318,7 +477,7 @@ errno_t cosmo_stack_free(void *stackaddr, unsigned stacksize, cosmo_stack_rehabilitate(surplus); errno_t err = 0; if (stackaddr) - err = cosmo_stack_munmap(stackaddr, stacksize); + err = cosmo_stack_munmap(stackaddr, stacksize, guardsize); return err; } diff --git a/libc/proc/proc.c b/libc/proc/proc.c index 325b7645704..5163d265ab1 100644 --- a/libc/proc/proc.c +++ b/libc/proc/proc.c @@ -141,7 +141,8 @@ static textwindows dontinstrument uint32_t __proc_worker(void *arg) { __bootstrap_tls(&tls, __builtin_frame_address(0)); __maps_track( (char *)(((uintptr_t)sp + __pagesize - 1) & -__pagesize) - STACK_SIZE, - STACK_SIZE); + STACK_SIZE, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS | MAP_NOFORK); for (;;) { // assemble a group of processes to wait on. if more than 64 diff --git a/libc/thread/itimer.c b/libc/thread/itimer.c index 6a7cf2b8afe..a820f9151d8 100644 --- a/libc/thread/itimer.c +++ b/libc/thread/itimer.c @@ -30,6 +30,8 @@ #include "libc/nt/thread.h" #include "libc/str/str.h" #include "libc/sysv/consts/clock.h" +#include "libc/sysv/consts/map.h" +#include "libc/sysv/consts/prot.h" #include "libc/sysv/consts/sicode.h" #include "libc/sysv/consts/sig.h" #include "libc/sysv/errfuns.h" @@ -47,7 +49,8 @@ static textwindows dontinstrument uint32_t __itimer_worker(void *arg) { __bootstrap_tls(&tls, sp); __maps_track( (char *)(((uintptr_t)sp + __pagesize - 1) & -__pagesize) - STACK_SIZE, - STACK_SIZE); + STACK_SIZE, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS | MAP_NOFORK); for (;;) { bool dosignal = false; struct timeval now, waituntil; diff --git a/libc/thread/mapstack.c b/libc/thread/mapstack.c index 28a3fd56e74..470ab58a63b 100644 --- a/libc/thread/mapstack.c +++ b/libc/thread/mapstack.c @@ -35,8 +35,8 @@ */ void *NewCosmoStack(void) { void *stackaddr; - unsigned stacksize = GetStackSize(); - unsigned guardsize = GetGuardSize(); + size_t stacksize = GetStackSize(); + size_t guardsize = GetGuardSize(); errno_t err = cosmo_stack_alloc(&stacksize, &guardsize, &stackaddr); if (!err) return stackaddr; diff --git a/libc/thread/pthread_attr_getguardsize.c b/libc/thread/pthread_attr_getguardsize.c index fd4524efb3a..ba10c30141a 100644 --- a/libc/thread/pthread_attr_getguardsize.c +++ b/libc/thread/pthread_attr_getguardsize.c @@ -19,7 +19,7 @@ #include "libc/thread/thread.h" /** - * Returns size of protected region at bottom of thread stack. + * Returns size of protected region beneath thread stack. * * @param guardsize will be set to guard size in bytes * @return 0 on success, or errno on error diff --git a/libc/thread/pthread_attr_getstack.c b/libc/thread/pthread_attr_getstack.c index 8b9a9c06d33..27c744d812b 100644 --- a/libc/thread/pthread_attr_getstack.c +++ b/libc/thread/pthread_attr_getstack.c @@ -20,15 +20,13 @@ #include "libc/thread/thread.h" /** - * Returns configuration for thread stack. + * Returns configuration for custom thread stack. * - * This is a getter for a configuration attribute. By default, zeros are - * returned. If pthread_attr_setstack() was called earlier, then this'll - * return those earlier supplied values. + * If zero is returned to `*stackaddr` then a custom stack hasn't been + * specified by a previous call to pthread_attr_setstack(). * * @param stackaddr will be set to stack address in bytes * @return 0 on success, or errno on error - * @see pthread_attr_setstacksize() */ errno_t pthread_attr_getstack(const pthread_attr_t *attr, void **stackaddr, size_t *stacksize) { diff --git a/libc/thread/pthread_attr_init.c b/libc/thread/pthread_attr_init.c index ec5fa47b116..4ef1e92077f 100644 --- a/libc/thread/pthread_attr_init.c +++ b/libc/thread/pthread_attr_init.c @@ -40,7 +40,7 @@ errno_t pthread_attr_init(pthread_attr_t *attr) { *attr = (pthread_attr_t){ .__stacksize = GetStackSize(), - .__guardsize = __pagesize, + .__guardsize = GetGuardSize(), }; return 0; } diff --git a/libc/thread/pthread_attr_setguardsize.c b/libc/thread/pthread_attr_setguardsize.c index e404ea04f63..4b776cdd937 100644 --- a/libc/thread/pthread_attr_setguardsize.c +++ b/libc/thread/pthread_attr_setguardsize.c @@ -19,13 +19,7 @@ #include "libc/thread/thread.h" /** - * Sets size of protected region at bottom of thread stack. - * - * Cosmopolitan sets this value to `sysconf(_SC_PAGESIZE)` by default. - * - * You may set `guardsize` to disable the stack guard feature and gain a - * slight performance advantage by avoiding mprotect() calls. Note that - * it could make your code more prone to silent unreported corruption. + * Sets minimum size of protected region beneath thread stack. * * @param guardsize contains guard size in bytes, which is implicitly * rounded up to `sysconf(_SC_PAGESIZE)`, or zero to disable diff --git a/libc/thread/pthread_attr_setstack.c b/libc/thread/pthread_attr_setstack.c index 8bfaed866ef..9017362af7b 100644 --- a/libc/thread/pthread_attr_setstack.c +++ b/libc/thread/pthread_attr_setstack.c @@ -16,64 +16,42 @@ │ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │ │ PERFORMANCE OF THIS SOFTWARE. │ ╚─────────────────────────────────────────────────────────────────────────────*/ -#include "libc/dce.h" #include "libc/errno.h" -#include "libc/limits.h" +#include "libc/runtime/stack.h" #include "libc/thread/thread.h" /** - * Configures custom allocated stack for thread, e.g. + * Configures custom stack for thread. * - * pthread_t id; - * pthread_attr_t attr; - * char *stk = NewCosmoStack(); - * pthread_attr_init(&attr); - * pthread_attr_setstack(&attr, stk, GetStackSize()); - * pthread_create(&id, &attr, func, 0); - * pthread_attr_destroy(&attr); - * pthread_join(id, 0); - * FreeCosmoStack(stk); + * Normally you want to use pthread_attr_setstacksize() and + * pthread_attr_setguardsize() to configure how pthread_create() + * allocates stack memory for newly created threads. Cosmopolitan is + * very good at managing stack memory. However if you still want to + * allocate stack memory on your own, POSIX defines this function. * - * Your stack must have at least `PTHREAD_STACK_MIN` bytes, which - * Cosmpolitan Libc defines as `GetStackSize()`. It's a link-time - * constant used by Actually Portable Executable that's 128 kb by - * default. See libc/runtime/stack.h for docs on your stack limit - * since the APE ELF phdrs are the one true source of truth here. + * Your `stackaddr` points to the byte at the very bottom of your stack. + * You are responsible for this memory. Your POSIX threads runtime will + * not free or unmap this allocation when the thread has terminated. If + * `stackaddr` is null then `stacksize` is ignored and default behavior + * is restored, i.e. pthread_create() will manage stack allocations. * - * Cosmpolitan Libc runtime magic (e.g. ftrace) and memory safety - * (e.g. kprintf) assumes that stack sizes are two-powers and are - * aligned to that two-power. Conformance isn't required since we - * say caveat emptor to those who don't maintain these invariants - * please consider using NewCosmoStack(), which is always perfect - * or use `mmap(0, GetStackSize() << 1, ...)` for a bigger stack. + * Your `stackaddr` could be created by malloc(). On OpenBSD, + * pthread_create() will augment your custom allocation so it's + * permissable by the kernel to use as a stack. You may also call + * Cosmopolitan APIs such NewCosmoStack() and cosmo_stack_alloc(). + * Static memory can be used, but it won't reduce pthread footprint. * - * Unlike pthread_attr_setstacksize(), this function permits just - * about any parameters and will change the values and allocation - * as needed to conform to the mandatory requirements of the host - * operating system even if it doesn't meet the stricter needs of - * Cosmopolitan Libc userspace libraries. For example with malloc - * allocations, things like page size alignment, shall be handled - * automatically for compatibility with existing codebases. - * - * The same stack shouldn't be used for two separate threads. Use - * fresh stacks for each thread so that ASAN can be much happier. - * - * @param stackaddr is address of stack allocated by caller, and - * may be NULL in which case default behavior is restored - * @param stacksize is size of caller allocated stack * @return 0 on success, or errno on error - * @raise EINVAL if parameters were unacceptable + * @raise EINVAL if `stacksize` is less than `PTHREAD_STACK_MIN` * @see pthread_attr_setstacksize() */ errno_t pthread_attr_setstack(pthread_attr_t *attr, void *stackaddr, size_t stacksize) { if (!stackaddr) { attr->__stackaddr = 0; - attr->__stacksize = 0; + attr->__stacksize = GetStackSize(); return 0; } - if (stacksize > INT_MAX) - return EINVAL; if (stacksize < PTHREAD_STACK_MIN) return EINVAL; attr->__stackaddr = stackaddr; diff --git a/libc/thread/pthread_attr_setstacksize.c b/libc/thread/pthread_attr_setstacksize.c index 58e69eb15e9..7b7eed9dad5 100644 --- a/libc/thread/pthread_attr_setstacksize.c +++ b/libc/thread/pthread_attr_setstacksize.c @@ -17,19 +17,28 @@ │ PERFORMANCE OF THIS SOFTWARE. │ ╚─────────────────────────────────────────────────────────────────────────────*/ #include "libc/errno.h" -#include "libc/limits.h" #include "libc/thread/thread.h" /** - * Defines minimum stack size for thread. + * Specifies minimum stack size for thread. + * + * On Linux, if you're not using `cosmocc -mtiny`, and you're not using + * cosmo_dlopen(), and guard size is nonzero, then `MAP_GROWSDOWN` will + * be used to create your stack memory. This helps minimize virtual + * memory consumption. Please note this is only possible if `stacksize` + * is no larger than the current `RLIMIT_STACK`, otherwise the runtime + * will map your stack using plain old mmap(). + * + * Non-custom stacks may be recycled by the cosmo runtime. You can + * control this behavior by calling cosmo_stack_setmaxstacks(). It's + * useful for both tuning performance and hardening security. See also + * pthread_attr_setguardsize() which is important for security too. * * @param stacksize contains stack size in bytes * @return 0 on success, or errno on error * @raise EINVAL if `stacksize` is less than `PTHREAD_STACK_MIN` */ errno_t pthread_attr_setstacksize(pthread_attr_t *a, size_t stacksize) { - if (stacksize > INT_MAX) - return EINVAL; if (stacksize < PTHREAD_STACK_MIN) return EINVAL; a->__stacksize = stacksize; diff --git a/libc/thread/thread.h b/libc/thread/thread.h index 533f15bc30e..af797cb28f2 100644 --- a/libc/thread/thread.h +++ b/libc/thread/thread.h @@ -2,7 +2,7 @@ #define COSMOPOLITAN_LIBC_THREAD_THREAD_H_ #define PTHREAD_KEYS_MAX 46 -#define PTHREAD_STACK_MIN 65536 +#define PTHREAD_STACK_MIN 32768 #define PTHREAD_USE_NSYNC 1 #define PTHREAD_DESTRUCTOR_ITERATIONS 4 @@ -129,8 +129,8 @@ typedef struct pthread_attr_s { int __contentionscope; int __sigaltstacksize; uint64_t __sigmask; - unsigned __guardsize; - unsigned __stacksize; + size_t __guardsize; + size_t __stacksize; void *__stackaddr; void *__sigaltstackaddr; } pthread_attr_t; diff --git a/test/libc/intrin/stack_test.c b/test/libc/intrin/stack_test.c new file mode 100644 index 00000000000..e07a2d7fcc9 --- /dev/null +++ b/test/libc/intrin/stack_test.c @@ -0,0 +1,75 @@ +/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│ +│ vi: set et ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi │ +╞══════════════════════════════════════════════════════════════════════════════╡ +│ Copyright 2024 Justine Alexandra Roberts Tunney │ +│ │ +│ Permission to use, copy, modify, and/or distribute this software for │ +│ any purpose with or without fee is hereby granted, provided that the │ +│ above copyright notice and this permission notice appear in all copies. │ +│ │ +│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL │ +│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED │ +│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE │ +│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL │ +│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR │ +│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER │ +│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │ +│ PERFORMANCE OF THIS SOFTWARE. │ +╚─────────────────────────────────────────────────────────────────────────────*/ +#include "libc/assert.h" +#include "libc/calls/calls.h" +#include "libc/calls/syscall-sysv.internal.h" +#include "libc/cosmo.h" +#include "libc/dce.h" +#include "libc/errno.h" +#include "libc/runtime/runtime.h" +#include "libc/sysv/consts/map.h" +#include "libc/sysv/consts/prot.h" +#include "libc/testlib/testlib.h" + +// returns true if byte at memory address is readable +bool readable(void *addr) { + return testlib_pokememory(addr); +} + +// returns true if page is reserved by linux memory manager +// it can be true for addresses that aren't listed in /proc/PID/maps +bool occupied(void *addr) { + int olde = errno; + char *want = (char *)((uintptr_t)addr & -__pagesize); + char *got = + __sys_mmap(want, __pagesize, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED_NOREPLACE, -1, 0, 0); + if (got == MAP_FAILED) { + unassert(errno == IsFreebsd() ? EINVAL : EEXIST); + errno = olde; + return true; + } + sys_munmap(got, __pagesize); + return got != want; +} + +TEST(stack, test) { + if (IsWindows()) + return; + + void *vstackaddr; + size_t stacksize = 65536; + size_t guardsize = 4096; + unassert(!cosmo_stack_alloc(&stacksize, &guardsize, &vstackaddr)); + char *stackaddr = vstackaddr; + + /* check memory reservation */ + unassert(occupied(stackaddr + stacksize - 1)); // top stack + unassert(occupied(stackaddr)); // bot stack + unassert(occupied(stackaddr - 1)); // top guard + unassert(occupied(stackaddr - guardsize)); // bot guard + + /* check memory accessibility */ + unassert(readable(stackaddr + stacksize - 1)); // top stack + unassert(readable(stackaddr)); // bot stack + unassert(!readable(stackaddr - 1)); // top guard + unassert(!readable(stackaddr - guardsize)); // bot guard + + unassert(!cosmo_stack_free(stackaddr, stacksize, guardsize)); +} diff --git a/test/libc/thread/pthread_cancel_test.c b/test/libc/thread/pthread_cancel_test.c index 7c7b4739b7e..06fb5093e77 100644 --- a/test/libc/thread/pthread_cancel_test.c +++ b/test/libc/thread/pthread_cancel_test.c @@ -19,6 +19,7 @@ #include "libc/atomic.h" #include "libc/calls/calls.h" #include "libc/calls/struct/sigaction.h" +#include "libc/calls/struct/sigaltstack.h" #include "libc/dce.h" #include "libc/errno.h" #include "libc/intrin/kprintf.h" @@ -27,6 +28,7 @@ #include "libc/nexgen32e/nexgen32e.h" #include "libc/runtime/internal.h" #include "libc/runtime/runtime.h" +#include "libc/runtime/sysconf.h" #include "libc/sysv/consts/sig.h" #include "libc/testlib/testlib.h" #include "libc/thread/thread.h" diff --git a/test/libc/thread/pthread_create_test.c b/test/libc/thread/pthread_create_test.c index 92b6c28db8e..c4daf45ff66 100644 --- a/test/libc/thread/pthread_create_test.c +++ b/test/libc/thread/pthread_create_test.c @@ -70,6 +70,7 @@ void OnUsr1(int sig, siginfo_t *si, void *vctx) { void SetUpOnce(void) { cosmo_stack_setmaxstacks((_rand64() & 7) - 1); + cosmo_stack_setmaxstacks(100); } void SetUp(void) { diff --git a/test/posix/signal_latency_test.c b/test/posix/signal_latency_test.c index c9ee5c2692d..080e1fd97bb 100644 --- a/test/posix/signal_latency_test.c +++ b/test/posix/signal_latency_test.c @@ -129,6 +129,10 @@ int compare(const void *a, const void *b) { int main() { + // this test probably exposes a bug in openbsd + if (IsOpenbsd()) + return 0; + // TODO(jart): Why is this test flaky on Windows? if (IsWindows()) return 0;