diff --git a/demo/test.c b/demo/test.c index 5ab9686c..2fa6e08d 100644 --- a/demo/test.c +++ b/demo/test.c @@ -2502,20 +2502,24 @@ struct thread_info { int ret; }; -static void *run_pthread(void *arg) +static void run(struct thread_info *tinfo) { - struct thread_info *tinfo = arg; - tinfo->ret = tinfo->t->fn(); + if (mp_warray_free() == -2) + tinfo->ret = EXIT_FAILURE; +} + +static void *run_pthread(void *arg) +{ + run(arg); + return arg; } static unsigned long run_msvc(void *arg) { - struct thread_info *tinfo = arg; - - tinfo->ret = tinfo->t->fn(); + run(arg); return 0; } @@ -2609,7 +2613,6 @@ static int unit_tests(int argc, char **argv) }; struct thread_info test_threads[sizeof(test)/sizeof(test[0])], *res; unsigned long i, ok, fail, nop; - size_t n_threads = MP_HAS(MULTI_THREADED) ? sizeof(test) / sizeof(test[0]) : 1; uint64_t t; int j; ok = fail = nop = 0; @@ -2620,8 +2623,7 @@ static int unit_tests(int argc, char **argv) mp_rand_source(s_mp_rand_jenkins); if (MP_HAS(MP_SMALL_STACK_SIZE)) { - printf("Small-stack enabled with %zu warray buffers\n\n", n_threads); - DO(mp_warray_init(n_threads, 1)); + printf("Small-stack enabled\n\n"); } if (MP_HAS(MULTI_THREADED)) { diff --git a/mp_warray_free.c b/mp_warray_free.c index 088efefc..f7470f81 100644 --- a/mp_warray_free.c +++ b/mp_warray_free.c @@ -9,19 +9,13 @@ MP_STATIC_ASSERT(warray_free_sz_does_not_overflow, (sizeof(mp_word) * MP_WARRAY) static int s_warray_free(void) { int ret = 0; - size_t n; - for (n = 0; n < s_mp_warray.allocated; ++n) { - if (s_mp_warray.l_used[n].warray) { - ret = -2; - goto ERR_OUT; - } + if (s_mp_warray.w_used) + return -2; + if (s_mp_warray.w_free) { + s_mp_zero_buf(s_mp_warray.w_free, sizeof(mp_word) * MP_WARRAY); + MP_FREE(s_mp_warray.w_free, sizeof(mp_word) * MP_WARRAY); + s_mp_warray.w_free = NULL; } - for (n = 0; n < s_mp_warray.allocated; ++n) { - MP_FREE(s_mp_warray.l_free[n].warray, sizeof(mp_word) * MP_WARRAY); - s_mp_warray.l_free[n].warray = NULL; - } - s_mp_warray_free(s_mp_warray.usable); -ERR_OUT: return ret; } diff --git a/mp_warray_init.c b/mp_warray_init.c deleted file mode 100644 index c2509886..00000000 --- a/mp_warray_init.c +++ /dev/null @@ -1,46 +0,0 @@ -#include "tommath_private.h" -#ifdef MP_WARRAY_INIT_C -/* LibTomMath, multiple-precision integer library -- Tom St Denis */ -/* SPDX-License-Identifier: Unlicense */ - -static mp_err s_warray_init(size_t n_alloc, bool preallocate) -{ - size_t n; - if (s_mp_warray.l_free != NULL || s_mp_warray.l_used != NULL) { - return MP_VAL; - } - - s_mp_warray.l_free = MP_CALLOC(n_alloc, sizeof(*(s_mp_warray.l_free))); - s_mp_warray.l_used = MP_CALLOC(n_alloc, sizeof(*(s_mp_warray.l_used))); - if (s_mp_warray.l_free == NULL || s_mp_warray.l_used == NULL) { - s_mp_warray_free(n_alloc); - return MP_MEM; - } - - if (preallocate) { - for (n = 0; n < n_alloc; ++n) { - s_mp_warray.l_free[n].warray = MP_CALLOC(MP_WARRAY, sizeof(mp_word)); - if (s_mp_warray.l_free[n].warray == NULL) { - while (n > 0) { - n--; - MP_FREE(s_mp_warray.l_free[n].warray, MP_WARRAY * sizeof(mp_word)); - s_mp_warray.l_free[n].warray = NULL; - } - s_mp_warray_free(n_alloc); - return MP_MEM; - } - } - s_mp_warray.allocated = n_alloc; - } - - s_mp_warray.usable = n_alloc; - return MP_OKAY; -} - -mp_err mp_warray_init(size_t n_alloc, bool preallocate) -{ - if (MP_HAS(MP_SMALL_STACK_SIZE)) return s_warray_init(n_alloc, preallocate); - return MP_ERR; -} - -#endif diff --git a/s_mp_cmpexch_n.c b/s_mp_cmpexch_n.c deleted file mode 100644 index e8ef969c..00000000 --- a/s_mp_cmpexch_n.c +++ /dev/null @@ -1,43 +0,0 @@ -#include "tommath_private.h" -#ifdef S_MP_CMPEXCH_N_C -/* LibTomMath, multiple-precision integer library -- Tom St Denis */ -/* SPDX-License-Identifier: Unlicense */ - -#ifdef __GNUC__ -#define S_CMPEXCH_N_GCC_C -static bool s_cmpexch_n_gcc(void **ptr, void **expected, void *desired) -{ - return __atomic_compare_exchange_n(ptr, expected, desired, true, __ATOMIC_ACQ_REL, __ATOMIC_ACQUIRE); -} -#endif - -#ifdef _MSC_VER -#define S_CMPEXCH_N_MSVC_C - -#ifndef _WIN32_WINNT -#define _WIN32_WINNT 0x0501 -#endif -#ifndef WINVER -#define WINVER 0x0501 -#endif - -#define WIN32_LEAN_AND_MEAN -#include - -static bool s_cmpexch_n_msvc(void **ptr, void **expected, void *desired) -{ - return InterlockedCompareExchangePointer(ptr, desired, *(expected)); -} -#endif - -bool s_cmpexch_n_gcc(void **ptr, void **expected, void *desired); -bool s_cmpexch_n_msvc(void **ptr, void **expected, void *desired); - -bool s_mp_cmpexch_n(void **ptr, void **expected, void *desired) -{ - if (MP_HAS(S_CMPEXCH_N_GCC)) return s_cmpexch_n_gcc(ptr, expected, desired); - if (MP_HAS(S_CMPEXCH_N_MSVC)) return s_cmpexch_n_msvc(ptr, expected, desired); - return false; -} - -#endif diff --git a/s_mp_warray.c b/s_mp_warray.c index d181057c..1b8b068b 100644 --- a/s_mp_warray.c +++ b/s_mp_warray.c @@ -3,6 +3,6 @@ /* LibTomMath, multiple-precision integer library -- Tom St Denis */ /* SPDX-License-Identifier: Unlicense */ -st_warray s_mp_warray; +mp_thread st_warray s_mp_warray = { 0 }; #endif diff --git a/s_mp_warray_free.c b/s_mp_warray_free.c deleted file mode 100644 index 9d8b75eb..00000000 --- a/s_mp_warray_free.c +++ /dev/null @@ -1,17 +0,0 @@ -#include "tommath_private.h" -#ifdef S_MP_WARRAY_FREE_C -/* LibTomMath, multiple-precision integer library -- Tom St Denis */ -/* SPDX-License-Identifier: Unlicense */ - -void s_mp_warray_free(size_t n) -{ - (void)n; - MP_FREE(s_mp_warray.l_free, n * sizeof(*(s_mp_warray.l_free))); - MP_FREE(s_mp_warray.l_used, n * sizeof(*(s_mp_warray.l_used))); - s_mp_warray.l_free = NULL; - s_mp_warray.l_used = NULL; - s_mp_warray.allocated = 0; - s_mp_warray.usable = 0; -} - -#endif diff --git a/s_mp_warray_get.c b/s_mp_warray_get.c index 39176eb2..26b0d7c1 100644 --- a/s_mp_warray_get.c +++ b/s_mp_warray_get.c @@ -5,34 +5,14 @@ void *s_mp_warray_get(void) { - void *ret = NULL; - size_t n; - if (s_mp_warray.usable == 0) { - if (mp_warray_init(1, false) != MP_OKAY) - return NULL; + if (s_mp_warray.w_used) + return NULL; + if (s_mp_warray.w_free == NULL) { + s_mp_warray.w_free = MP_CALLOC(MP_WARRAY, sizeof(mp_word)); } - for (n = 0; n < s_mp_warray.allocated;) { - if (s_mp_warray.l_free[n].warray == NULL) { - n++; - continue; - } - ret = s_mp_warray.l_free[n].warray; - if (s_mp_cmpexch_n(&s_mp_warray.l_free[n].warray, &ret, NULL)) { - s_mp_warray.l_used[n].warray = ret; - goto LBL_OUT; - } - /* restart from the beginning if we missed a potential slot */ - n = 0; - } - ret = NULL; - if (s_mp_warray.allocated + 1 > s_mp_warray.usable) - goto LBL_OUT; - ret = MP_CALLOC(MP_WARRAY, sizeof(mp_word)); - if (ret != NULL) - s_mp_warray.l_used[s_mp_warray.allocated++].warray = ret; - -LBL_OUT: - return ret; + s_mp_warray.w_used = s_mp_warray.w_free; + s_mp_warray.w_free = NULL; + return s_mp_warray.w_used; } #endif diff --git a/s_mp_warray_put.c b/s_mp_warray_put.c index 4cf413d6..79e014ac 100644 --- a/s_mp_warray_put.c +++ b/s_mp_warray_put.c @@ -5,14 +5,10 @@ void s_mp_warray_put(void *w) { - size_t n, allocated = s_mp_warray.allocated; - for (n = 0; n < allocated; ++n) { - if (s_mp_warray.l_used[n].warray == w) { - s_mp_warray.l_used[n].warray = NULL; - s_mp_warray.l_free[n].warray = w; - break; - } - } + if (s_mp_warray.w_free || s_mp_warray.w_used != w) + return; + s_mp_warray.w_free = w; + s_mp_warray.w_used = NULL; } #endif diff --git a/tommath.h b/tommath.h index 7da36d0e..1820d243 100644 --- a/tommath.h +++ b/tommath.h @@ -588,7 +588,6 @@ mp_err mp_fread(mp_int *a, int radix, FILE *stream) MP_WUR; mp_err mp_fwrite(const mp_int *a, int radix, FILE *stream) MP_WUR; #endif -mp_err mp_warray_init(size_t n_alloc, bool preallocate); int mp_warray_free(void); #define mp_to_binary(M, S, N) mp_to_radix((M), (S), (N), NULL, 2) diff --git a/tommath_private.h b/tommath_private.h index 36291a06..9c25f330 100644 --- a/tommath_private.h +++ b/tommath_private.h @@ -234,8 +234,6 @@ MP_PRIVATE mp_err s_mp_radix_size_overestimate(const mp_int *a, const int radix, MP_PRIVATE mp_err s_mp_fp_log(const mp_int *a, mp_int *c) MP_WUR; MP_PRIVATE mp_err s_mp_fp_log_d(const mp_int *a, mp_word *c) MP_WUR; -MP_PRIVATE bool s_mp_cmpexch_n(void **ptr, void **expected, void *desired); - #ifdef MP_SMALL_STACK_SIZE #define MP_SMALL_STACK_SIZE_C #define MP_ALLOC_WARRAY(name) *name = s_mp_warray_get() @@ -247,19 +245,20 @@ MP_PRIVATE bool s_mp_cmpexch_n(void **ptr, void **expected, void *desired); #define MP_CHECK_WARRAY(name) #endif -struct warray { - void *warray; -}; +#if defined(_MSC_VER) +#define mp_thread __declspec(thread) +#elif defined(__GNUC__) +#define mp_thread __thread +#endif + typedef struct { - struct warray *l_free, *l_used; - size_t allocated, usable; + void *w_free, *w_used; } st_warray; -extern MP_PRIVATE st_warray s_mp_warray; +extern MP_PRIVATE mp_thread st_warray s_mp_warray; MP_PRIVATE void *s_mp_warray_get(void); MP_PRIVATE void s_mp_warray_put(void *w); -MP_PRIVATE void s_mp_warray_free(size_t n); #define MP_RADIX_MAP_REVERSE_SIZE 80u extern MP_PRIVATE const char s_mp_radix_map[];