diff --git a/ci/test_files_to_stash.txt b/ci/test_files_to_stash.txt index a0b2ce45386..f41929bdac5 100755 --- a/ci/test_files_to_stash.txt +++ b/ci/test_files_to_stash.txt @@ -40,6 +40,8 @@ build/*/*/src/common/tests/umem_test, build/*/*/src/common/tests/umem_test_bmem, build/*/*/src/bio/smd/tests/smd_ut, build/*/*/src/tests/rpc/rpc_tests, +build/*/*/src/engine/tests/abt_perf, +build/*/*/src/engine/tests/abt_stack, src/common/tests/btree.sh, src/control/run_go_tests.sh, src/rdb/raft_tests/raft_tests.py, diff --git a/site_scons/prereq_tools/base.py b/site_scons/prereq_tools/base.py index 7b665316af0..f972f01f5a0 100644 --- a/site_scons/prereq_tools/base.py +++ b/site_scons/prereq_tools/base.py @@ -461,7 +461,6 @@ def __init__(self, env, opts): opts.Add('USE_INSTALLED', 'Comma separated list of preinstalled dependencies', 'none') opts.Add(('MPI_PKG', 'Specifies name of pkg-config to load for MPI', None)) opts.Add(BoolVariable('FIRMWARE_MGMT', 'Build in device firmware management.', False)) - opts.Add(BoolVariable('STACK_MMAP', 'Allocate ABT ULTs stacks with mmap()', False)) opts.Add(BoolVariable('STATIC_FUSE', "Build with static libfuse library", False)) opts.Add(EnumVariable('BUILD_TYPE', "Set the build type", 'release', ['dev', 'debug', 'release'], ignorecase=1)) diff --git a/src/common/SConscript b/src/common/SConscript index 9d4c522536e..1ea1ceeced9 100644 --- a/src/common/SConscript +++ b/src/common/SConscript @@ -15,9 +15,9 @@ COMMON_FILES = ['debug.c', 'mem.c', 'fail_loc.c', 'lru.c', def build_daos_common(denv, client): """ Building non-pmem version for client's common lib""" benv = denv.Clone() - stack_mmap_files = [] ad_mem_files = [] dav_src = [] + ult_files = [] common_libs = ['isal', 'isal_crypto', 'cart', 'gurt', 'lz4', 'protobuf-c', 'uuid', 'pthread'] if client: @@ -30,6 +30,7 @@ def build_daos_common(denv, client): 'dav/ravl_interval.c', 'dav/recycler.c', 'dav/stats.c', 'dav/tx.c', 'dav/ulog.c', 'dav/util.c', 'dav/wal_tx.c'] ad_mem_files = ['ad_mem.c', 'ad_tx.c'] + ult_files = ['ult_errno.c'] common_libs.extend(['pmemobj', 'abt', 'dav_v2']) benv.AppendUnique(RPATH_FULL=['$PREFIX/lib64/daos_srv']) benv.Append(CPPDEFINES=['-DDAOS_PMEM_BUILD']) @@ -37,11 +38,7 @@ def build_daos_common(denv, client): libname = 'daos_common_pmem' benv.require('pmdk', 'argobots') - if benv["STACK_MMAP"] == 1: - stack_mmap_files = ['stack_mmap.c'] - benv.Append(CCFLAGS=['-DULT_MMAP_STACK']) - - common = benv.d_library(libname, COMMON_FILES + dav_src + ad_mem_files + stack_mmap_files, + common = benv.d_library(libname, COMMON_FILES + dav_src + ad_mem_files + ult_files, LIBS=common_libs) benv.Install('$PREFIX/lib64/', common) return common diff --git a/src/common/stack_mmap.c b/src/common/stack_mmap.c deleted file mode 100644 index e0a409395d2..00000000000 --- a/src/common/stack_mmap.c +++ /dev/null @@ -1,342 +0,0 @@ -/** - * (C) Copyright 2021-2022 Intel Corporation. - * - * SPDX-License-Identifier: BSD-2-Clause-Patent - */ -/** - * \file - * - * This file implements an alternate/external way for ULTs stacks allocation. - * It is based on mmap() of MAP_STACK|MAP_GROWSDOWN regions, in order to - * allow overrun detection along with automatic growth capability. - */ - -#ifdef ULT_MMAP_STACK -#define D_LOGFAC DD_FAC(stack) - -#include -#include -#include -#include - -/* ABT_key for mmap()'ed ULT stacks */ -ABT_key stack_key; - -/* engine's (ie including all XStreams/stack-pools) max number of mmap()'ed - * ULTs stacks, to be based on vm.max_map_count minus an estimate of the - * non-stacks mmap()'ed regions required (where malloc() itself will use - * mmap() when allocating chunks of size > M_MMAP_THRESHOLD, and there is - * a M_MMAP_MAX maximum for such number of chunks, both can be updated - * dynamically using mallopt() !!...) for engine operations (including - * pre-reqs ...). - */ -int max_nb_mmap_stacks; - -/* engine's (ie including all XStreams/stack-pools) current number of mmap()'ed - * ULTs stacks, to be [in,de]cremented atomically and compared to - * max_nb_mmap_stacks - */ -ATOMIC int nb_mmap_stacks; - -/* engine's (ie including all XStreams/stack-pools) current number of free/queued - * mmap()'ed ULTs stacks, to be [in,de]cremented atomically and compared to - * max_nb_mmap_stacks - */ -ATOMIC int nb_free_stacks; - -/* mmap()'ed or Argobot's legacy/internal allocation method for ULT stacks ? */ -bool daos_ult_mmap_stack = true; - -/* one per supported ABT_thread_create[_...] API type */ -enum AbtThreadCreateType { - MAIN, - ON_XSTREAM -}; - -static int -call_abt_method(void *arg, enum AbtThreadCreateType flag, - void (*thread_func)(void *), void *thread_arg, - ABT_thread_attr attr, ABT_thread *newthread) -{ - int rc; - - if (flag == MAIN) { - rc = ABT_thread_create((ABT_pool)arg, thread_func, thread_arg, - attr, newthread); - } else if (flag == ON_XSTREAM) { - rc = ABT_thread_create_on_xstream((ABT_xstream)arg, thread_func, - thread_arg, attr, - newthread); - } else { - rc = ABT_ERR_INV_ARG; - D_ERROR("unsupported ABT_thread_create[_...]() API type\n"); - } - return rc; -} - -/* wrapper for ULT main function, mainly to register mmap()'ed stack - * descriptor as ABT_key to ensure stack pooling or munmap() upon ULT exit - */ -void mmap_stack_wrapper(void *arg) -{ - mmap_stack_desc_t *desc = (mmap_stack_desc_t *)arg; - - ABT_key_set(stack_key, desc); - - D_DEBUG(DB_MEM, - "New ULT with stack_desc %p running on CPU=%d\n", - desc, sched_getcpu()); - desc->thread_func(desc->thread_arg); -} - -static int -mmap_stack_thread_create_common(struct stack_pool *sp_alloc, void (*free_stack_cb)(void *), - enum AbtThreadCreateType flag, void *arg, - void (*thread_func)(void *), void *thread_arg, - ABT_thread_attr attr, ABT_thread *newthread) -{ - ABT_thread_attr new_attr = ABT_THREAD_ATTR_NULL; - int rc; - void *stack; - mmap_stack_desc_t *mmap_stack_desc = NULL; - size_t stack_size = MMAPED_ULT_STACK_SIZE, usable_stack_size; - - if (daos_ult_mmap_stack == false) { - /* let's use Argobots standard way ... */ - rc = call_abt_method(arg, flag, thread_func, thread_arg, - attr, newthread); - if (unlikely(rc != ABT_SUCCESS)) - D_ERROR("Failed to create ULT : %d\n", rc); - D_GOTO(out_err, rc); - } - - if (attr != ABT_THREAD_ATTR_NULL) { - ABT_thread_attr_get_stack(attr, &stack, &stack_size); - if (stack != NULL) { - /* an other external stack allocation method is being - * used, nothing to do, let's try Argobots standard way ... - */ - rc = call_abt_method(arg, flag, thread_func, thread_arg, - attr, newthread); - if (unlikely(rc != ABT_SUCCESS)) - D_ERROR("Failed to create ULT : %d\n", rc); - D_GOTO(out_err, rc); - } - /* only one mmap()'ed stack size allowed presently */ - if (stack_size > MMAPED_ULT_STACK_SIZE) - D_WARN("We do not support stacks > %u\n", MMAPED_ULT_STACK_SIZE); - stack_size = MMAPED_ULT_STACK_SIZE; - } else { - rc = ABT_thread_attr_create(&new_attr); - if (rc != ABT_SUCCESS) { - D_ERROR("Create ABT thread attr failed: %d\n", rc); - return rc; - } - attr = new_attr; - } - - /* XXX a stack is allocated from the creating XStream's stack pool - * but will be freed on the running one ... - */ - - if ((mmap_stack_desc = d_list_pop_entry(&sp_alloc->sp_stack_free_list, - mmap_stack_desc_t, - stack_list)) != NULL) { - D_ASSERT(sp_alloc->sp_free_stacks != 0); - --sp_alloc->sp_free_stacks; - atomic_fetch_sub(&nb_free_stacks, 1); - stack = mmap_stack_desc->stack; - stack_size = mmap_stack_desc->stack_size; - D_DEBUG(DB_MEM, - "mmap()'ed stack %p of size %zd from free list, in pool=%p, remaining free stacks in pool="DF_U64", on CPU=%d\n", - stack, stack_size, sp_alloc, sp_alloc->sp_free_stacks, sched_getcpu()); - } else { - /* XXX this test is racy, but if max_nb_mmap_stacks value is - * high enough it does not matter as we do not expect so many - * concurrent ULTs creations during mmap() syscall to cause - * nb_mmap_stacks to significantly exceed max_nb_mmap_stacks ... - */ - if (nb_mmap_stacks >= max_nb_mmap_stacks) { - D_INFO("nb_mmap_stacks (%d) > max_nb_mmap_stacks (%d), so using Argobots standard method for stack allocation\n", - nb_mmap_stacks, max_nb_mmap_stacks); - /* let's try Argobots standard way ... */ - rc = call_abt_method(arg, flag, thread_func, thread_arg, - attr, newthread); - if (unlikely(rc != ABT_SUCCESS)) - D_ERROR("Failed to create ULT : %d\n", rc); - D_GOTO(out_err, rc); - } - - stack = mmap(NULL, stack_size, PROT_READ | PROT_WRITE, - MAP_PRIVATE | MAP_ANONYMOUS | MAP_STACK | MAP_GROWSDOWN, - -1, 0); - if (stack == MAP_FAILED) { - D_ERROR("Failed to mmap() stack of size %zd : %s, in pool=%p, on CPU=%d\n", - stack_size, strerror(errno), sp_alloc, sched_getcpu()); - /* let's try Argobots standard way ... */ - rc = call_abt_method(arg, flag, thread_func, thread_arg, - attr, newthread); - if (unlikely(rc != ABT_SUCCESS)) - D_ERROR("Failed to create ULT : %d\n", rc); - D_GOTO(out_err, rc); - } - - atomic_fetch_add(&nb_mmap_stacks, 1); - - /* put descriptor at bottom of mmap()'ed stack */ - mmap_stack_desc = (mmap_stack_desc_t *)(stack + stack_size - - sizeof(mmap_stack_desc_t)); - - /* start to fill descriptor */ - mmap_stack_desc->stack = stack; - mmap_stack_desc->stack_size = stack_size; - /* store target XStream */ - mmap_stack_desc->sp = sp_alloc; - D_INIT_LIST_HEAD(&mmap_stack_desc->stack_list); - D_DEBUG(DB_MEM, - "mmap()'ed stack %p of size %zd has been allocated, in pool=%p, on CPU=%d\n", - stack, stack_size, sp_alloc, sched_getcpu()); - } - - /* continue to fill/update descriptor */ - mmap_stack_desc->thread_func = thread_func; - mmap_stack_desc->thread_arg = thread_arg; - mmap_stack_desc->free_stack_cb = free_stack_cb; - - /* usable stack size */ - usable_stack_size = stack_size - sizeof(mmap_stack_desc_t); - - rc = ABT_thread_attr_set_stack(attr, stack, usable_stack_size); - if (rc != ABT_SUCCESS) { - D_ERROR("Failed to set stack attrs : %d\n", rc); - D_GOTO(out_err, rc); - } - - /* XXX if newthread is set, we may need to use - * ABT_thread_set_specific() ?? - */ - rc = call_abt_method(arg, flag, mmap_stack_wrapper, mmap_stack_desc, - attr, newthread); - if (unlikely(rc != ABT_SUCCESS)) { - D_ERROR("Failed to create ULT : %d\n", rc); - D_GOTO(out_err, rc); - } -out_err: - if (rc && mmap_stack_desc != NULL) - free_stack(mmap_stack_desc); - /* free local attr if used */ - if (new_attr != ABT_THREAD_ATTR_NULL) - ABT_thread_attr_free(&new_attr); - return rc; -} - -/* XXX - * presently ABT_thread_create_[to,many]() are not used in DAOS code, but if it - * becomes we will also have to introduce a corresponding wrapper - */ - -int -mmap_stack_thread_create(struct stack_pool *sp_alloc, void (*free_stack_cb)(void *), - ABT_pool pool, void (*thread_func)(void *), void *thread_arg, - ABT_thread_attr attr, ABT_thread *newthread) -{ - return mmap_stack_thread_create_common(sp_alloc, free_stack_cb, MAIN, (void *)pool, thread_func, - thread_arg, attr, newthread); -} - -int -mmap_stack_thread_create_on_xstream(struct stack_pool *sp_alloc, void (*free_stack_cb)(void *), - ABT_xstream xstream, void (*thread_func)(void *), - void *thread_arg, ABT_thread_attr attr, - ABT_thread *newthread) -{ - return mmap_stack_thread_create_common(sp_alloc, free_stack_cb, ON_XSTREAM, (void *)xstream, - thread_func, thread_arg, attr, newthread); -} - -/* callback to free stack upon ULT exit during stack_key deregister */ -void -free_stack(void *arg) -{ - mmap_stack_desc_t *desc = (mmap_stack_desc_t *)arg; - struct stack_pool *sp; - int rc; - - if (desc->free_stack_cb != NULL) - desc->free_stack_cb(arg); - - /* callback may have re-evaluated pool where to free stack */ - sp = desc->sp; - - /* XXX - * We may need to reevaluate stack size since a growth may - * have occurred during previous context life time, if initial - * stack size has overflowed when there was no previous mapping - * in address space to trigger Kernel's stack guard gap - * (stack_guard_gap) ? This for both munmap() or queuing in - * free pool cases. - */ - - /* too many free stacks in pool ? */ - if (sp->sp_free_stacks > MAX_NUMBER_FREE_STACKS && - sp->sp_free_stacks / nb_mmap_stacks * 100 > MAX_PERCENT_FREE_STACKS) { - rc = munmap(desc->stack, desc->stack_size); - if (rc != 0) { - D_ERROR("Failed to munmap() %p stack of size %zd : %s\n", - desc->stack, desc->stack_size, strerror(errno)); - /* re-queue it on free list instead to leak it */ - d_list_add_tail(&desc->stack_list, &sp->sp_stack_free_list); - ++sp->sp_free_stacks; - atomic_fetch_add(&nb_free_stacks, 1); - } else { - atomic_fetch_sub(&nb_mmap_stacks, 1); - D_DEBUG(DB_MEM, - "mmap()'ed stack %p of size %zd munmap()'ed, in pool=%p, remaining free stacks in pool="DF_U64", on CPU=%d\n", - desc->stack, desc->stack_size, sp, sp->sp_free_stacks, - sched_getcpu()); - } - } else { - d_list_add_tail(&desc->stack_list, &sp->sp_stack_free_list); - ++sp->sp_free_stacks; - atomic_fetch_add(&nb_free_stacks, 1); - D_DEBUG(DB_MEM, - "mmap()'ed stack %p of size %zd on free list, in pool=%p, remaining free stacks in pool="DF_U64", on CPU=%d\n", - desc->stack, desc->stack_size, sp, sp->sp_free_stacks, - sched_getcpu()); - } -} - -int -stack_pool_create(struct stack_pool **sp) -{ - D_ALLOC(*sp, sizeof(struct stack_pool)); - if (*sp == NULL) { - D_DEBUG(DB_MEM, "unable to allocate a stack pool\n"); - return -DER_NOMEM; - } - (*sp)->sp_free_stacks = 0; - D_INIT_LIST_HEAD(&(*sp)->sp_stack_free_list); - D_DEBUG(DB_MEM, "pool %p has been allocated\n", *sp); - return 0; -} - -void stack_pool_destroy(struct stack_pool *sp) -{ - mmap_stack_desc_t *desc; - int rc; - - while ((desc = d_list_pop_entry(&sp->sp_stack_free_list, mmap_stack_desc_t, stack_list)) != NULL) { - D_DEBUG(DB_MEM, "munmap() of pool %p, desc %p, stack %p of size %zu, ", - sp, desc, desc->stack, desc->stack_size); - rc = munmap(desc->stack, desc->stack_size); - D_DEBUG(DB_MEM, "has been %ssuccessfully munmap()'ed%s%s\n", - (rc ? "un" : ""), (rc ? " : " : ""), (rc ? strerror(errno) : "")); - --sp->sp_free_stacks; - atomic_fetch_sub(&nb_mmap_stacks, 1); - atomic_fetch_sub(&nb_free_stacks, 1); - } - D_ASSERT(sp->sp_free_stacks == 0); - D_DEBUG(DB_MEM, "pool %p has been freed\n", sp); - D_FREE(sp); -} -#endif diff --git a/src/common/tests/SConscript b/src/common/tests/SConscript index a99dc6d4801..529954f0898 100644 --- a/src/common/tests/SConscript +++ b/src/common/tests/SConscript @@ -25,11 +25,6 @@ def scons(): tenv.d_test_program('lru', 'lru.c', LIBS=['daos_common_pmem', 'gurt', 'cart']) tenv.d_test_program('sched', 'sched.c', LIBS=['daos_common', 'gurt', 'cart', 'cmocka', 'pthread']) - new_env = tenv.Clone() - if tenv["STACK_MMAP"] == 1: - new_env.Append(CCFLAGS=['-DULT_MMAP_STACK']) - new_env.d_test_program('abt_perf', 'abt_perf.c', - LIBS=['daos_common_pmem', 'gurt', 'abt']) tenv.d_test_program('acl_real_tests', 'acl_util_real_tests.c', LIBS=['daos_common', 'gurt', 'cmocka']) tenv.d_test_program('prop_tests', 'prop_tests.c', diff --git a/src/common/ult_errno.c b/src/common/ult_errno.c new file mode 100644 index 00000000000..f376da7ecd2 --- /dev/null +++ b/src/common/ult_errno.c @@ -0,0 +1,145 @@ +/** + * (C) Copyright 2016-2024 Intel Corporation. + * + * SPDX-License-Identifier: BSD-2-Clause-Patent + */ + +#define D_LOGFAC DD_FAC(server) + +#include + +const char * +dss_abterr2str(int rc) +{ + static const char *err_str[] = {"ABT_SUCCESS", + "ABT_ERR_UNINITIALIZED", + "ABT_ERR_MEM", + "ABT_ERR_OTHER", + "ABT_ERR_INV_XSTREAM", + "ABT_ERR_INV_XSTREAM_RANK", + "ABT_ERR_INV_XSTREAM_BARRIER", + "ABT_ERR_INV_SCHED", + "ABT_ERR_INV_SCHED_KIND", + "ABT_ERR_INV_SCHED_PREDEF", + "ABT_ERR_INV_SCHED_TYPE", + "ABT_ERR_INV_SCHED_CONFIG", + "ABT_ERR_INV_POOL", + "ABT_ERR_INV_POOL_KIND", + "ABT_ERR_INV_POOL_ACCESS", + "ABT_ERR_INV_UNIT", + "ABT_ERR_INV_THREAD", + "ABT_ERR_INV_THREAD_ATTR", + "ABT_ERR_INV_TASK", + "ABT_ERR_INV_KEY", + "ABT_ERR_INV_MUTEX", + "ABT_ERR_INV_MUTEX_ATTR", + "ABT_ERR_INV_COND", + "ABT_ERR_INV_RWLOCK", + "ABT_ERR_INV_EVENTUAL", + "ABT_ERR_INV_FUTURE", + "ABT_ERR_INV_BARRIER", + "ABT_ERR_INV_TIMER", + "ABT_ERR_INV_QUERY_KIND", + "ABT_ERR_XSTREAM", + "ABT_ERR_XSTREAM_STATE", + "ABT_ERR_XSTREAM_BARRIER", + "ABT_ERR_SCHED", + "ABT_ERR_SCHED_CONFIG", + "ABT_ERR_POOL", + "ABT_ERR_UNIT", + "ABT_ERR_THREAD", + "ABT_ERR_TASK", + "ABT_ERR_KEY", + "ABT_ERR_MUTEX", + "ABT_ERR_MUTEX_LOCKED", + "ABT_ERR_COND", + "ABT_ERR_COND_TIMEDOUT", + "ABT_ERR_RWLOCK", + "ABT_ERR_EVENTUAL", + "ABT_ERR_FUTURE", + "ABT_ERR_BARRIER", + "ABT_ERR_TIMER", + "ABT_ERR_MIGRATION_TARGET", + "ABT_ERR_MIGRATION_NA", + "ABT_ERR_MISSING_JOIN", + "ABT_ERR_FEATURE_NA", + "ABT_ERR_INV_TOOL_CONTEXT", + "ABT_ERR_INV_ARG", + "ABT_ERR_SYS", + "ABT_ERR_CPUID", + "ABT_ERR_INV_POOL_CONFIG", + "ABT_ERR_INV_POOL_USER_DEF"}; + + if (rc < 0 || rc >= sizeof(err_str) / sizeof(err_str[0])) + rc = ABT_ERR_OTHER; + + return err_str[rc]; +} + +const char * +dss_abterr2desc(int rc) +{ + static const char *err_desc[] = {"The routine returns successfully", + "Argobots it not initialized", + "Memory allocation failure", + "Other error", + "Invalid execution stream", + "Invalid execution stream rank", + "Invalid execution stream barrier", + "Invalid scheduler", + "Invalid scheduler kind", + "Invalid predefined scheduler type", + "Deprecated error code", + "Invalid scheduler configuration", + "Invalid pool", + "Invalid pool kind", + "Invalid pool access type", + "Invalid work unit for scheduling", + "Invalid work unit", + "Invalid ULT attribute", + "Invalid work unit", + "Invalid work-unit-specific data key", + "Invalid mutex", + "Invalid mutex attribute", + "Invalid condition variable", + "Invalid readers-writer lock", + "Invalid eventual", + "Invalid future", + "Invalid barrier", + "Invalid timer", + "Invalid query kind", + "Error related to an execution stream", + "Error related to an execution stream state", + "Error related to an execution stream", + "Error related to a scheduler", + "Error related to a scheduler configuration", + "Error related to a pool", + "Error related to a work unit for scheduling", + "Error related to a work unit", + "Error related to a work unit", + "Error related to a work-unit-specific data key", + "Error related to a mutex", + "A return value when a mutex is locked", + "Error related to a condition variable", + "A return value when a condition variable is timed out", + "Error related to a readers-writer lock", + "Error related to an eventual", + "Error related to a future", + "Error related to a barrier", + "Error related to a timer", + "Error related to a migration target", + "Migration is not supported", + "Deprecated error code", + "Unsupported feature", + "Invalid tool context", + "Invalid user argument", + "Error related to system calls and standard libraries", + "Error related to CPU ID", + "Invalid pool configuration", + "Invalid pool definition"}; + + if (rc < 0 || rc >= sizeof(err_desc) / sizeof(err_desc[0])) + rc = ABT_ERR_OTHER; + + return err_desc[rc]; +} diff --git a/src/engine/SConscript b/src/engine/SConscript index c837e83022f..c7580742e0a 100644 --- a/src/engine/SConscript +++ b/src/engine/SConscript @@ -34,9 +34,6 @@ def scons(): 'sched.c', 'ult.c', 'event.pb-c.c', 'srv_metrics.c'] + libdaos_tgts - if denv["STACK_MMAP"] == 1: - denv.Append(CCFLAGS=['-DULT_MMAP_STACK']) - # Generate I/O Engine program engine = denv.d_program('daos_engine', sources, LIBS=libraries) denv.Install('$PREFIX/bin', engine) diff --git a/src/engine/init.c b/src/engine/init.c index 5b8ba437327..43a401c0c90 100644 --- a/src/engine/init.c +++ b/src/engine/init.c @@ -579,51 +579,6 @@ abt_init(int argc, char *argv[]) return dss_abterr2der(rc); } -#ifdef ULT_MMAP_STACK - FILE *fp; - - /* read vm.max_map_count from /proc instead of using sysctl() API - * as it seems the preferred way ... - */ - fp = fopen("/proc/sys/vm/max_map_count", "r"); - if (fp == NULL) { - D_ERROR("Unable to open /proc/sys/vm/max_map_count: %s\n", - strerror(errno)); - } else { - int n; - - n = fscanf(fp, "%d", &max_nb_mmap_stacks); - if (n == EOF) { - D_ERROR("Unable to read vm.max_map_count value: %s\n", - strerror(errno)); - /* just in case, to ensure value can be later safely - * compared and thus no ULT stack be mmap()'ed - */ - max_nb_mmap_stacks = 0; - } else { - /* need a minimum value to start mmap() ULT stacks */ - if (max_nb_mmap_stacks < MIN_VM_MAX_MAP_COUNT) { - D_WARN("vm.max_map_count (%d) value is too low (< %d) to start mmap() ULT stacks\n", - max_nb_mmap_stacks, MIN_VM_MAX_MAP_COUNT); - max_nb_mmap_stacks = 0; - } else { - /* consider half can be used to mmap() ULT - * stacks - */ - max_nb_mmap_stacks /= 2; - D_INFO("Will be able to mmap() %d ULT stacks\n", - max_nb_mmap_stacks); - } - } - } - - rc = ABT_key_create(free_stack, &stack_key); - if (rc != ABT_SUCCESS) { - D_ERROR("ABT key for stack create failed: %d\n", rc); - ABT_finalize(); - return dss_abterr2der(rc); - } -#endif dss_abt_init = true; return 0; @@ -632,9 +587,6 @@ abt_init(int argc, char *argv[]) static void abt_fini(void) { -#ifdef ULT_MMAP_STACK - ABT_key_free(&stack_key); -#endif dss_abt_init = false; ABT_finalize(); } diff --git a/src/engine/sched.c b/src/engine/sched.c index 49a46ca3618..e030bc8f74b 100644 --- a/src/engine/sched.c +++ b/src/engine/sched.c @@ -2161,10 +2161,7 @@ sched_watchdog_prep(struct dss_xstream *dx, ABT_unit unit) { struct sched_info *info = &dx->dx_sched_info; ABT_thread thread; - void (*thread_func)(void *); -#ifdef ULT_MMAP_STACK - mmap_stack_desc_t *desc; -#endif + void (*thread_func)(void *); int rc; if (!watchdog_enabled(dx)) @@ -2175,18 +2172,6 @@ sched_watchdog_prep(struct dss_xstream *dx, ABT_unit unit) D_ASSERT(rc == ABT_SUCCESS); rc = ABT_thread_get_thread_func(thread, &thread_func); D_ASSERT(rc == ABT_SUCCESS); -#ifdef ULT_MMAP_STACK - /* has ULT stack been allocated using mmap() or using - * Argobots standard way ? With the later case the ULT - * argument could not be used to address the mmap()'ed - * stack descriptor ! - */ - if (likely(thread_func == mmap_stack_wrapper)) { - rc = ABT_thread_get_arg(thread, (void **)&desc); - D_ASSERT(rc == ABT_SUCCESS); - thread_func = desc->thread_func; - } -#endif info->si_ult_func = thread_func; } diff --git a/src/engine/srv.c b/src/engine/srv.c index 9461a18e9d9..a85234b50ca 100644 --- a/src/engine/srv.c +++ b/src/engine/srv.c @@ -512,8 +512,8 @@ dss_srv_handler(void *arg) D_GOTO(nvme_fini, rc = dss_abterr2der(rc)); } - rc = daos_abt_thread_create(dx->dx_sp, dss_free_stack_cb, dx->dx_pools[DSS_POOL_NVME_POLL], - dss_nvme_poll_ult, NULL, attr, NULL); + rc = ABT_thread_create(dx->dx_pools[DSS_POOL_NVME_POLL], dss_nvme_poll_ult, NULL, + attr, NULL); ABT_thread_attr_free(&attr); if (rc != ABT_SUCCESS) { D_ERROR("create NVMe poll ULT failed: %d\n", rc); @@ -622,16 +622,6 @@ dss_xstream_alloc(hwloc_cpuset_t cpus) return NULL; } -#ifdef ULT_MMAP_STACK - if (daos_ult_mmap_stack == true) { - rc = stack_pool_create(&dx->dx_sp); - if (rc != 0) { - D_ERROR("failed to create stack pool\n"); - D_GOTO(err_free, rc); - } - } -#endif - dx->dx_stopping = ABT_FUTURE_NULL; dx->dx_shutdown = ABT_FUTURE_NULL; @@ -675,14 +665,6 @@ dss_xstream_alloc(hwloc_cpuset_t cpus) static inline void dss_xstream_free(struct dss_xstream *dx) { -#ifdef ULT_MMAP_STACK - struct stack_pool *sp = dx->dx_sp; - - if (daos_ult_mmap_stack == true) { - stack_pool_destroy(sp); - dx->dx_sp = NULL; - } -#endif hwloc_bitmap_free(dx->dx_cpuset); D_FREE(dx); } @@ -850,9 +832,8 @@ dss_start_one_xstream(hwloc_cpuset_t cpus, int tag, int xs_id) } /** start progress ULT */ - rc = daos_abt_thread_create(dx->dx_sp, dss_free_stack_cb, dx->dx_pools[DSS_POOL_NET_POLL], - dss_srv_handler, dx, attr, - &dx->dx_progress); + rc = ABT_thread_create(dx->dx_pools[DSS_POOL_NET_POLL], dss_srv_handler, dx, attr, + &dx->dx_progress); if (rc != ABT_SUCCESS) { D_ERROR("create progress ULT failed: %d\n", rc); D_GOTO(out_xstream, rc = dss_abterr2der(rc)); @@ -1086,12 +1067,6 @@ dss_xstreams_init(void) if (sched_prio_disabled) D_INFO("ULT prioritizing is disabled.\n"); -#ifdef ULT_MMAP_STACK - d_getenv_bool("DAOS_ULT_MMAP_STACK", &daos_ult_mmap_stack); - if (daos_ult_mmap_stack == false) - D_INFO("ULT mmap()'ed stack allocation is disabled.\n"); -#endif - d_getenv_uint("DAOS_SCHED_RELAX_INTVL", &sched_relax_intvl); if (sched_relax_intvl == 0 || sched_relax_intvl > SCHED_RELAX_INTVL_MAX) { diff --git a/src/engine/srv_internal.h b/src/engine/srv_internal.h index 222f07e4906..4e602d05111 100644 --- a/src/engine/srv_internal.h +++ b/src/engine/srv_internal.h @@ -7,7 +7,6 @@ #define __DAOS_SRV_INTERNAL__ #include -#include #include #include @@ -105,11 +104,7 @@ struct dss_xstream { bool dx_comm; /* true with cart context */ bool dx_iofw; /* true for DSS_XS_IOFW XS */ bool dx_dsc_started; /* DSC progress ULT started */ - struct mem_stats dx_mem_stats; /* memory usages stats on this xstream */ -#ifdef ULT_MMAP_STACK - /* per-xstream pool/list of free stacks */ - struct stack_pool *dx_sp; -#endif + struct mem_stats dx_mem_stats; /* memory usages stats on this xstream */ bool dx_progress_started; /* Network poll started */ int dx_tag; /** tag for xstream */ struct dss_chore_queue dx_chore_queue; @@ -288,43 +283,14 @@ sched_create_task(struct dss_xstream *dx, void (*func)(void *), void *arg, return dss_abterr2der(rc); } -#ifdef ULT_MMAP_STACK -/* callback to ensure stack will be freed in exiting-ULT/current-XStream pool */ -static inline void -dss_free_stack_cb(void *arg) -{ - mmap_stack_desc_t *desc = (mmap_stack_desc_t *)arg; - /* main thread doesn't have TLS and XS */ - struct dss_xstream *dx = dss_tls_get() ? dss_current_xstream() : NULL; - - /* ensure pool where to free stack is from current-XStream/ULT-exiting */ - if (dx != NULL) - desc->sp = dx->dx_sp; - -} -#else -#define dss_free_stack_cb NULL -#endif - static inline int sched_create_thread(struct dss_xstream *dx, void (*func)(void *), void *arg, ABT_thread_attr t_attr, ABT_thread *thread, unsigned int flags) { - ABT_pool abt_pool = dx->dx_pools[DSS_POOL_GENERIC]; - struct sched_info *info = &dx->dx_sched_info; - int rc; -#ifdef ULT_MMAP_STACK - bool tls_set = dss_tls_get() ? true : false; - struct dss_xstream *cur_dx = NULL; - - if (tls_set) - cur_dx = dss_current_xstream(); - - /* if possible,stack should be allocated from launching XStream pool */ - if (cur_dx == NULL) - cur_dx = dx; -#endif + ABT_pool abt_pool = dx->dx_pools[DSS_POOL_GENERIC]; + struct sched_info *info = &dx->dx_sched_info; + int rc; if (sched_xstream_stopping()) return -DER_SHUTDOWN; @@ -334,7 +300,7 @@ sched_create_thread(struct dss_xstream *dx, void (*func)(void *), void *arg, /* Atomic integer assignment from different xstream */ info->si_stats.ss_busy_ts = info->si_cur_ts; - rc = daos_abt_thread_create(cur_dx->dx_sp, dss_free_stack_cb, abt_pool, func, arg, t_attr, thread); + rc = ABT_thread_create(abt_pool, func, arg, t_attr, thread); return dss_abterr2der(rc); } diff --git a/src/engine/tests/SConscript b/src/engine/tests/SConscript index b5560a71b96..9f7da2117f9 100644 --- a/src/engine/tests/SConscript +++ b/src/engine/tests/SConscript @@ -34,6 +34,16 @@ def scons(): LIBS=['daos_common', 'protobuf-c', 'gurt', 'cmocka', 'uuid', 'pthread', 'abt', 'cart']) + abt_tenv = denv.Clone() + abt_tenv.AppendUnique(OBJPREFIX='utest_') + abt_tenv.AppendUnique(CPPDEFINES=['-DDAOS_PMEM_BUILD']) + libraries = ['abt', 'daos_common_pmem', 'gurt'] + abt_tenv.require('argobots') + + abt_tenv.d_test_program('abt_perf', 'abt_perf.c', LIBS=libraries) + + abt_tenv.d_test_program('abt_stack', 'abt_stack.c', LIBS=libraries) + if __name__ == "SCons.Script": scons() diff --git a/src/common/tests/abt_perf.c b/src/engine/tests/abt_perf.c similarity index 55% rename from src/common/tests/abt_perf.c rename to src/engine/tests/abt_perf.c index b6c1adb7cdf..c4df750930f 100644 --- a/src/common/tests/abt_perf.c +++ b/src/engine/tests/abt_perf.c @@ -1,38 +1,63 @@ /** - * (C) Copyright 2017-2021 Intel Corporation. + * (C) Copyright 2017-2024 Intel Corporation. * * SPDX-License-Identifier: BSD-2-Clause-Patent */ -#define D_LOGFAC DD_FAC(tests) +#define D_LOGFAC DD_FAC(tests) -#include -#include #include #include -#ifdef ULT_MMAP_STACK -#include -#endif - -static unsigned long abt_cntr; -static int abt_ults; -static bool abt_waiting; -static bool abt_exiting; - -static ABT_pool abt_pool; -static ABT_cond abt_cond; -static ABT_mutex abt_lock; -static ABT_xstream abt_xstream; -static ABT_thread_attr abt_attr = ABT_THREAD_ATTR_NULL; -static char *abt_name; - -static int opt_concur = 1; -static int opt_secs; -static int opt_stack; -static int opt_cr_type; -#ifdef ULT_MMAP_STACK -static int opt_mmap; -static struct stack_pool *sp; -#endif +#include + +#include +#include +#include + +static unsigned long abt_cntr; +static int abt_ults; +static bool abt_waiting; +static bool abt_exiting; + +static ABT_pool abt_pool; +static ABT_cond abt_cond; +static ABT_mutex abt_lock; +static ABT_xstream abt_xstream; +static ABT_thread_attr abt_attr = ABT_THREAD_ATTR_NULL; +static char *abt_name; + +static int opt_concur = 1; +static int opt_secs = 0; +static ssize_t opt_stack = -1; +static int opt_cr_type; + +static void +usage(char *name, FILE *out) +{ + fprintf(out, + "Usage:\n" + "\t%s -t test_id -s sec [-n num_ult] [-S stack_size]\n" + "\t%s -h\n" + "\n" + "Options:\n" + "\t--test=, -t \n" + "\t\tIdentifier of the test to run:\n" + "\t\t\tc: ULT creation test\n" + "\t\t\ts: ULT scheduling test\n" + "\t\t\tm: mutex creation test\n" + "\t\t\tw: rwlock creation test\n" + "\t\t\te: eventual creation test\n" + "\t\t\td: condition creation test\n" + "\t--sec=, sn \n" + "\t\tDuration in seconds of the test\n" + "\t--num=, -n \n" + "\t\tNumber of concurrent creation for ULT creation test\n" + "\t\tNumber of ULT to schedule for ULT scheduling test\n" + "\t--stack=, -S \n" + "\t\tULT stack size\n" + "\t--help, -h\n" + "\t\tPrint this description\n", + name, name); +} static inline uint64_t abt_current_ms(void) @@ -53,12 +78,6 @@ abt_thread_1(void *arg) abt_cntr++; ABT_mutex_unlock(abt_lock); -#ifdef ULT_MMAP_STACK - if (opt_mmap) - mmap_stack_thread_create(sp, NULL, abt_pool, abt_thread_1, NULL, - abt_attr, NULL); - else -#endif ABT_thread_create(abt_pool, abt_thread_1, NULL, abt_attr, NULL); ABT_mutex_lock(abt_lock); @@ -79,11 +98,11 @@ abt_thread_1(void *arg) static void abt_ult_create_rate(void) { - uint64_t then; - uint64_t now; - uint64_t prt; - int rc; - int nsec = 0; + uint64_t then; + uint64_t now; + uint64_t prt; + int rc; + int nsec = 0; prt = now = then = abt_current_ms(); while (1) { @@ -116,23 +135,15 @@ abt_ult_create_rate(void) abt_cntr++; ABT_mutex_unlock(abt_lock); -#ifdef ULT_MMAP_STACK - if (opt_mmap) - rc = mmap_stack_thread_create(sp, NULL, abt_pool, abt_thread_1, - NULL, abt_attr, NULL); - else -#endif - rc = ABT_thread_create(abt_pool, abt_thread_1, NULL, - abt_attr, NULL); + rc = ABT_thread_create(abt_pool, abt_thread_1, NULL, abt_attr, NULL); if (rc != ABT_SUCCESS) { - printf("ABT thread create failed: %d\n", rc); + fprintf(stderr, "ABT thread create failed: " AF_RC "\n", AP_RC(rc)); return; } if (now - prt >= 1000) { nsec++; - printf("Created %lu threads in %d seconds\n", - abt_cntr, nsec); + printf("Created %lu threads in %d seconds\n", abt_cntr, nsec); prt = now; } ABT_thread_yield(); @@ -162,9 +173,9 @@ abt_thread_2(void *arg) static void abt_sched_rate(void) { - uint64_t then = 0; - uint64_t now; - int rc; + uint64_t then = 0; + uint64_t now; + int rc; while (1) { if (then && !abt_exiting) { @@ -199,16 +210,7 @@ abt_sched_rate(void) abt_ults++; ABT_mutex_unlock(abt_lock); -#ifdef ULT_MMAP_STACK - if (opt_mmap) - rc = mmap_stack_thread_create(sp, NULL, abt_pool, abt_thread_2, - NULL, - ABT_THREAD_ATTR_NULL, - NULL); - else -#endif - rc = ABT_thread_create(abt_pool, abt_thread_2, NULL, - ABT_THREAD_ATTR_NULL, NULL); + rc = ABT_thread_create(abt_pool, abt_thread_2, NULL, ABT_THREAD_ATTR_NULL, NULL); if (rc != ABT_SUCCESS) { printf("ABT thread create failed: %d\n", rc); ABT_mutex_lock(abt_lock); @@ -230,13 +232,13 @@ enum { static void abt_lock_create_rate(void *arg) { - ABT_mutex mutex; - ABT_cond cond; - ABT_rwlock rwlock; - ABT_eventual eventual; - uint64_t then; - uint64_t now; - int rc; + ABT_mutex mutex; + ABT_cond cond; + ABT_rwlock rwlock; + ABT_eventual eventual; + uint64_t then; + uint64_t now; + int rc; then = abt_current_ms(); while (1) { @@ -276,8 +278,7 @@ abt_lock_create_rate(void *arg) } abt_cntr++; } - printf("ABT %s creation rate = %lu/sec.\n", - abt_name, abt_cntr / opt_secs); + printf("ABT %s creation rate = %lu/sec.\n", abt_name, abt_cntr / opt_secs); ABT_mutex_lock(abt_lock); if (abt_waiting) { @@ -290,46 +291,26 @@ abt_lock_create_rate(void *arg) static void abt_reset(void) { - abt_cntr = 0; - abt_ults = 0; - abt_exiting = false; - abt_waiting = false; + abt_cntr = 0; + abt_ults = 0; + abt_exiting = false; + abt_waiting = false; } static struct option abt_ops[] = { - /** - * test-id: - * m = mutext creation - * e = eventual creation - * d = condition creation - */ - { "test", required_argument, NULL, 't' }, - /** - * if test-id is 'c', it is the number of concurrent creation - * if test-id is 's', it is the total number of running ULTs - */ - { "num", required_argument, NULL, 'n' }, - /** test duration in seconds. */ - { "sec", required_argument, NULL, 's' }, - /** stack size (kilo-bytes) */ - { "stack", required_argument, NULL, 'S' }, -#ifdef ULT_MMAP_STACK - { "mmap", no_argument, NULL, 'm' }, -#endif -}; + {"test", required_argument, NULL, 't'}, {"num", required_argument, NULL, 'n'}, + {"sec", required_argument, NULL, 's'}, {"stack", required_argument, NULL, 'S'}, + {"help", no_argument, NULL, 'h'}, {0, 0, 0, 0}}; int main(int argc, char **argv) { - char test_id = 0; - int rc; + char test_id = 0; + const char *optstr = "t:n:s:S:m:h"; + int rc; - while ((rc = getopt_long(argc, argv, "t:n:s:S:", - abt_ops, NULL)) != -1) { + while ((rc = getopt_long(argc, argv, optstr, abt_ops, NULL)) != -1) { switch (rc) { - default: - fprintf(stderr, "unknown opc=%c\n", rc); - exit(-1); case 't': test_id = *optarg; break; @@ -343,140 +324,122 @@ main(int argc, char **argv) opt_stack = atoi(optarg); opt_stack <<= 10; /* kilo-byte */ break; -#ifdef ULT_MMAP_STACK - case 'm': - opt_mmap = true; + case 'h': + usage(argv[0], stdout); + exit(EXIT_SUCCESS); + break; + default: + usage(argv[0], stderr); + exit(EXIT_FAILURE); break; -#endif } } - if (opt_secs == 0) { - printf("invalid sec=%s\n", argv[1]); - return -1; + if ((test_id == 'c' || test_id == 's') && opt_secs <= 0) { + fprintf(stderr, "Missing test duration or invalid value.\n"); + usage(argv[0], stderr); + exit(EXIT_FAILURE); } - if (opt_concur == 0) { - printf("invalid ABT threads=%s\n", argv[2]); - return -1; + if (opt_concur <= 0) { + fprintf(stderr, "Missing number of ULTs or invalid value.\n"); + usage(argv[0], stderr); + exit(EXIT_FAILURE); } - printf("Create ABT threads for %d seconds, concur=%d\n", - opt_secs, opt_concur); + rc = daos_debug_init_ex("/dev/stdout", DLOG_INFO); + if (rc != 0) { + fprintf(stderr, "unable to create DAOS debug facities: " DF_RC "\n", DP_RC(rc)); + exit(EXIT_FAILURE); + } rc = ABT_init(0, NULL); if (rc != ABT_SUCCESS) { - printf("ABT init failed: %d\n", rc); - return -1; + fprintf(stderr, "Failed to init ABT: " AF_RC "\n", AP_RC(rc)); + exit(EXIT_FAILURE); } rc = ABT_xstream_self(&abt_xstream); if (rc != ABT_SUCCESS) { - printf("ABT get self xstream failed: %d\n", rc); - return -1; + fprintf(stderr, "ABT get self xstream failed: " AF_RC "\n", AP_RC(rc)); + exit(EXIT_FAILURE); } rc = ABT_xstream_get_main_pools(abt_xstream, 1, &abt_pool); if (rc != ABT_SUCCESS) { - printf("ABT pool get failed: %d\n", rc); - return -1; + fprintf(stderr, "ABT pool get failed: " AF_RC "\n", AP_RC(rc)); + exit(EXIT_FAILURE); } rc = ABT_cond_create(&abt_cond); if (rc != ABT_SUCCESS) { - printf("ABT cond create failed: %d\n", rc); - return -1; + fprintf(stderr, "ABT cond create failed: " AF_RC "\n", AP_RC(rc)); + exit(EXIT_FAILURE); } rc = ABT_mutex_create(&abt_lock); if (rc != ABT_SUCCESS) { - printf("ABT mutex create failed: %d\n", rc); - return -1; + fprintf(stderr, "ABT mutex create failed: " AF_RC "\n", AP_RC(rc)); + exit(EXIT_FAILURE); } if (opt_stack > 0) { rc = ABT_thread_attr_create(&abt_attr); if (rc != ABT_SUCCESS) { - printf("ABT thread attr create failed: %d\n", rc); - return -1; + fprintf(stderr, "ABT thread attr create failed: " AF_RC "\n", AP_RC(rc)); + exit(EXIT_FAILURE); } rc = ABT_thread_attr_set_stacksize(abt_attr, opt_stack); - D_ASSERT(rc == ABT_SUCCESS); -#ifdef ULT_MMAP_STACK - if (opt_mmap) - printf("mmap()'ed ULT stack size = %d\n", - max(opt_stack, MMAPED_ULT_STACK_SIZE)); - else - printf("ULT stack size = %d\n", opt_stack); -#else - printf("ULT stack size = %d\n", opt_stack); -#endif + if (rc != ABT_SUCCESS) { + fprintf(stderr, "Setting ABT thread stack size to %zd failed: " AF_RC "\n", + opt_stack, AP_RC(rc)); + exit(EXIT_FAILURE); + } + printf("ULT stack size = %zd\n", opt_stack); } else { -#ifdef ULT_MMAP_STACK - if (opt_mmap) - printf("mmap()'ed ULT stack size = %d\n", - MMAPED_ULT_STACK_SIZE); - else - printf("ULT stack size = default ABT ULT stack size\n"); -#else printf("ULT stack size = default ABT ULT stack size\n"); -#endif } -#ifdef ULT_MMAP_STACK - rc = stack_pool_create(&sp); - if (rc) { - fprintf(stderr, "unable to create stack pool: %d\n", rc); - return -1; - } -#endif switch (test_id) { default: break; case 'c': - printf("ULT create rate test (concur=%d, secs=%d)\n", - opt_concur, opt_secs); + printf("ULT create rate test (concur=%d, secs=%d)\n", opt_concur, opt_secs); abt_ult_create_rate(); goto out; case 's': - printf("ULT scheduling rate test (ULTs=%d, secs=%d)\n", - opt_concur, opt_secs); + printf("ULT scheduling rate test (ULTs=%d, secs=%d)\n", opt_concur, opt_secs); abt_sched_rate(); goto out; case 'm': printf("mutex creation rate test (secs=%d)\n", opt_secs); opt_cr_type = CR_MUTEX; - abt_name = "mutex"; + abt_name = "mutex"; break; case 'w': printf("rwlock creation rate test (secs=%d)\n", opt_secs); opt_cr_type = CR_RWLOCK; - abt_name = "rwlock"; + abt_name = "rwlock"; break; case 'e': - printf("eventual creation rate test within ULT (secs=%d)\n", - opt_secs); + printf("eventual creation rate test within ULT (secs=%d)\n", opt_secs); opt_cr_type = CR_EVENTUAL; - abt_name = "eventual"; + abt_name = "eventual"; break; case 'd': - printf("condition creation rate test within ULT (secs=%d)\n", - opt_secs); + printf("condition creation rate test within ULT (secs=%d)\n", opt_secs); opt_cr_type = CR_COND; - abt_name = "cond"; + abt_name = "cond"; break; } abt_waiting = true; -#ifdef ULT_MMAP_STACK - if (opt_mmap) - rc = mmap_stack_thread_create(sp, NULL, abt_pool, abt_lock_create_rate, - NULL, ABT_THREAD_ATTR_NULL, NULL); - else -#endif - rc = ABT_thread_create(abt_pool, abt_lock_create_rate, NULL, - ABT_THREAD_ATTR_NULL, NULL); + rc = ABT_thread_create(abt_pool, abt_lock_create_rate, NULL, ABT_THREAD_ATTR_NULL, NULL); + if (rc != ABT_SUCCESS) { + fprintf(stderr, "ABT thread create failed: " AF_RC "\n", AP_RC(rc)); + exit(EXIT_FAILURE); + } ABT_mutex_lock(abt_lock); if (abt_waiting) @@ -490,8 +453,7 @@ main(int argc, char **argv) ABT_mutex_free(&abt_lock); ABT_cond_free(&abt_cond); ABT_finalize(); -#ifdef ULT_MMAP_STACK - stack_pool_destroy(sp); -#endif + daos_debug_fini(); + return 0; } diff --git a/src/engine/tests/abt_stack.c b/src/engine/tests/abt_stack.c new file mode 100644 index 00000000000..76783dd3f0d --- /dev/null +++ b/src/engine/tests/abt_stack.c @@ -0,0 +1,190 @@ +/** + * (C) Copyright 2017-2024 Intel Corporation. + * + * SPDX-License-Identifier: BSD-2-Clause-Patent + */ +#define D_LOGFAC DD_FAC(tests) + +#include +#include +#include +#include +#include +#include + +#include +#include + +static size_t g_stack_size = 0; +static size_t g_total_size = 0; +static void *g_stack_start = NULL; +static void *g_stack_end = NULL; +static bool g_check_overflow = false; + +static void +usage(char *name, FILE *out) +{ + fprintf(out, + "Usage:\n" + "\t%s [-c] [-p] [-t] [-s stack_size] [-S var_size]\n" + "\t%s -h\n" + "\n" + "Options:\n" + "\t--check-overflow, -c\n" + "\t\tCheck if the stack was overflowed\n" + "\t--on-pool, -p\n" + "\t\tCreate ULT thread on ABT pool\n" + "\t--unnamed-thread, -u\n" + "\t\tCreate an unnamed ULT thread\n" + "\t--stack-size=, -s \n" + "\t\tSize in kilo bytes of the ULT thread stack\n" + "\t--var-size=, -S \n" + "\t\tSize in bytes of the variable to allocate on the stack\n" + "\t--help, -h\n" + "\t\tPrint this description\n", + name, name); +} + +static void +stack_fill(void *arg) +{ + ABT_thread thread; + void *sp = NULL; + size_t var_size = (size_t)arg; + int rc; + + rc = ABT_thread_self(&thread); + D_ASSERT(rc == ABT_SUCCESS); + rc = ABT_thread_get_stacksize(thread, &g_stack_size); + D_ASSERT(rc == ABT_SUCCESS); + printf("Starting filling stack:\n" + "\t- stack size: %zu\n" + "\t- var size: %zu\n", + g_stack_size, var_size); + + g_stack_start = &sp; + for (;;) { + g_stack_end = alloca(var_size); + g_total_size += var_size; + } +} + +static void +handler_segv(int sig, siginfo_t *si, void *unused) +{ + printf("\n" + "--------------------------------------------------------------------------------\n" + "Signal 0x%x was handle:\n" + "\tFailed to access memory location %p\n" + "\tAllocated %zu bytes on stack: start=%p end=%p size=%zd\n" + "--------------------------------------------------------------------------------\n", + si->si_signo, si->si_addr, g_total_size, g_stack_start, g_stack_end, + g_stack_start - g_stack_end); + + if (g_check_overflow && (g_total_size > g_stack_size)) + exit(EXIT_FAILURE); + + exit(EXIT_SUCCESS); +} + +static void +signal_register(void) +{ + struct sigaction act = {0}; + stack_t ss; + int rc; + + ss.ss_sp = malloc(SIGSTKSZ); + D_ASSERT(ss.ss_sp != NULL); + ss.ss_size = SIGSTKSZ; + ss.ss_flags = 0; + rc = sigaltstack(&ss, NULL); + D_ASSERT(rc == 0); + + act.sa_flags = SA_SIGINFO | SA_ONSTACK; + act.sa_sigaction = handler_segv; + rc = sigaction(SIGSEGV, &act, NULL); + D_ASSERT(rc == 0); +} + +int +main(int argc, char **argv) +{ + const char *opt_cfg = "pucs:S:h"; + const struct option long_opt_cfg[] = {{"on-pool", no_argument, NULL, 'p'}, + {"unnamed-thread", no_argument, NULL, 'u'}, + {"check-overflow", no_argument, NULL, 'c'}, + {"stack-size", required_argument, NULL, 's'}, + {"var-size", required_argument, NULL, 'S'}, + {"help", no_argument, NULL, 'h'}, + {0, 0, 0, 0}}; + int opt; + bool create_on_pool; + ABT_thread named_thread = {0}; + ABT_thread *thread = &named_thread; + ABT_thread_attr attr = ABT_THREAD_ATTR_NULL; + size_t var_size = 1 << 6; + ssize_t stack_size = -1; + int rc; + + while ((opt = getopt_long(argc, argv, opt_cfg, long_opt_cfg, NULL)) != -1) { + switch (opt) { + case 'c': + g_check_overflow = true; + break; + case 'p': + create_on_pool = true; + break; + case 'u': + thread = NULL; + break; + case 's': + stack_size = (size_t)atoi(optarg) << 10; + break; + case 'S': + var_size = (size_t)atoi(optarg); + break; + case 'h': + usage(argv[0], stdout); + exit(EXIT_SUCCESS); + break; + default: + usage(argv[0], stderr); + exit(EXIT_FAILURE); + break; + } + } + + printf("Initializing test...\n"); + rc = daos_debug_init_ex("/dev/stdout", DLOG_INFO); + D_ASSERT(rc == 0); + rc = ABT_init(0, NULL); + D_ASSERT(rc == 0); + + if (stack_size != -1) { + rc = ABT_thread_attr_create(&attr); + D_ASSERT(rc == ABT_SUCCESS); + rc = ABT_thread_attr_set_stacksize(attr, stack_size); + D_ASSERT(rc == ABT_SUCCESS); + } + + signal_register(); + + if (create_on_pool) { + ABT_pool pool; + + rc = ABT_self_get_last_pool(&pool); + D_ASSERT(rc == ABT_SUCCESS); + ABT_thread_create(pool, stack_fill, (void *)var_size, attr, thread); + } else { + ABT_xstream xstream; + + rc = ABT_self_get_xstream(&xstream); + D_ASSERT(rc == ABT_SUCCESS); + ABT_thread_create_on_xstream(xstream, stack_fill, (void *)var_size, attr, thread); + } + + printf("Scheduling ULT test thread...\n"); + ABT_thread_yield(); + D_ASSERT(false); +} diff --git a/src/engine/ult.c b/src/engine/ult.c index fbeb3f538fa..8ea592bed4c 100644 --- a/src/engine/ult.c +++ b/src/engine/ult.c @@ -879,9 +879,8 @@ dss_chore_queue_start(struct dss_xstream *dx) struct dss_chore_queue *queue = &dx->dx_chore_queue; int rc; - rc = daos_abt_thread_create(dx->dx_sp, dss_free_stack_cb, dx->dx_pools[DSS_POOL_GENERIC], - dss_chore_queue_ult, queue, ABT_THREAD_ATTR_NULL, - &queue->chq_ult); + rc = ABT_thread_create(dx->dx_pools[DSS_POOL_GENERIC], dss_chore_queue_ult, queue, + ABT_THREAD_ATTR_NULL, &queue->chq_ult); if (rc != 0) { D_ERROR("failed to create chore queue ULT: %d\n", rc); return dss_abterr2der(rc); diff --git a/src/include/daos/stack_mmap.h b/src/include/daos/stack_mmap.h deleted file mode 100644 index 96964e50915..00000000000 --- a/src/include/daos/stack_mmap.h +++ /dev/null @@ -1,113 +0,0 @@ -/** - * (C) Copyright 2016-2021 Intel Corporation. - * - * SPDX-License-Identifier: BSD-2-Clause-Patent - */ -/** - * This file is part of daos - * - * src/include/daos/stack_mmap.h - */ - -/* - * Implementation of an alternate and external way to allocate a stack - * area for any Argobots ULT. - * This aims to allow for a better way to detect/protect against stack - * overflow situations along with automatic growth capability. - * Each individual stack will be mmap()'ed with MAP_GROWSDOWN causing - * the Kernel to reserve stack_guard_gap number of prior additional pages - * that will be reserved for no other mapping and prevented to be accessed. - * The stacks are managed as a pool, using the mmap_stack_desc_t struct - * being located at the bottom (upper addresses) of each stack and being - * linked as a list upon ULT exit for future reuse by a new ULT, based on - * the requested stack size. - * The free stacks list is drained upon a certain number of free stacks or - * upon a certain percentage of free stacks. - * There is one stacks free-list per-engine to allow lock-less management. - */ - -#ifdef ULT_MMAP_STACK -#include -#include -#include -#include - -/* the minimum value for vm.max_map_count to allow for mmap()'ed ULT stacks - * usage. In fact, DEFAULT_MAX_MAP_COUNT, the Kernel's default value !! - */ -#define MIN_VM_MAX_MAP_COUNT 65530 - -/* max percent of free stacks vs total currently allocated */ -#define MAX_PERCENT_FREE_STACKS 20 - -/* max nb of free stacks per-XStream */ -#define MAX_NUMBER_FREE_STACKS 2000 - -/* per-engine max number of mmap()'ed ULTs stacks */ -extern int max_nb_mmap_stacks; - -/* engine's current number of mmap()'ed ULTs stacks */ -extern ATOMIC int nb_mmap_stacks; - -/* engine's current number of free mmap()'ed ULTs stacks */ -extern ATOMIC int nb_free_stacks; - -/* mmap()'ed stacks can allow for a bigger size with no impact on - * memory footprint if unused - */ -#define MMAPED_ULT_STACK_SIZE (1 * 1024 * 1024) - -/* ABT_key for mmap()'ed ULT stacks */ -extern ABT_key stack_key; - -extern bool daos_ult_mmap_stack; - -/* pool of free stacks */ -struct stack_pool { - /* per-xstream pool/list of free stacks */ - d_list_t sp_stack_free_list; - /* nb of free stacks in pool/list */ - uint64_t sp_free_stacks; -}; - -/* since being allocated before start of stack its size must be a - * multiple of (void *) !! - */ -typedef struct { - void *stack; - size_t stack_size; - /* ULT primary function */ - void (*thread_func)(void *); - /* ULT arg */ - void *thread_arg; - /* per-size free-list of stacks */ - d_list_t stack_list; - /* by default, pool where to free stack */ - struct stack_pool *sp; - /* callback to determine where to free stack at ULT exit time */ - void (*free_stack_cb)(void *); -} mmap_stack_desc_t; - -void free_stack(void *arg); - -void mmap_stack_wrapper(void *arg); - -int mmap_stack_thread_create(struct stack_pool *sp_alloc, void (*free_stack_cb)(void *), - ABT_pool pool, void (*thread_func)(void *), void *thread_arg, - ABT_thread_attr attr, ABT_thread *newthread); - -int mmap_stack_thread_create_on_xstream(struct stack_pool *sp_alloc, void (*free_stack_cb)(void *), - ABT_xstream xstream, void (*thread_func)(void *), - void *thread_arg, ABT_thread_attr attr, - ABT_thread *newthread); - -int stack_pool_create(struct stack_pool **sp); - -void stack_pool_destroy(struct stack_pool *sp); - -#define daos_abt_thread_create mmap_stack_thread_create -#define daos_abt_thread_create_on_xstream mmap_stack_thread_create_on_xstream -#else /* !defined(ULT_MMAP_STACK) */ -#define daos_abt_thread_create(sp_alloc, free_stack_cb, ...) ABT_thread_create(__VA_ARGS__) -#define daos_abt_thread_create_on_xstream(sp_alloc, free_stack_cb, ...) ABT_thread_create_on_xstream(__VA_ARGS__) -#endif diff --git a/src/include/daos_srv/daos_engine.h b/src/include/daos_srv/daos_engine.h index b5faa001176..8e1a33db208 100644 --- a/src/include/daos_srv/daos_engine.h +++ b/src/include/daos_srv/daos_engine.h @@ -32,6 +32,9 @@ /* Standard max length of addresses e.g. URI, PCI */ #define ADDR_STR_MAX_LEN 128 +#define AF_RC "%s(%d): '%s'" +#define AP_RC(rc) dss_abterr2str(rc), rc, dss_abterr2desc(rc) + /** DAOS system name (corresponds to crt group ID) */ extern char *daos_sysname; @@ -547,12 +550,37 @@ static inline int dss_abterr2der(int abt_errno) { switch (abt_errno) { - case ABT_SUCCESS: return 0; - case ABT_ERR_MEM: return -DER_NOMEM; - default: return -DER_INVAL; + case ABT_SUCCESS: + return 0; + case ABT_ERR_MEM: + return -DER_NOMEM; + default: + return -DER_INVAL; + } +} + +/* Convert DAOS errno to Argobots ones. */ +static inline int +dss_der2abterr(int der) +{ + switch (der) { + case -DER_SUCCESS: + return ABT_SUCCESS; + case -DER_NOMEM: + return ABT_ERR_MEM; + default: + return ABT_ERR_OTHER; } } +/** Helper converting ABT error code into human readable string */ +const char * +dss_abterr2str(int rc); + +/** Helper converting ABT error code into meaningful message */ +const char * +dss_abterr2desc(int rc); + /** RPC counter types */ enum dss_rpc_cntr_id { DSS_RC_OBJ = 0, diff --git a/src/tests/SConscript b/src/tests/SConscript index acef77d5b14..39f1aa1dded 100644 --- a/src/tests/SConscript +++ b/src/tests/SConscript @@ -38,16 +38,8 @@ def build_tests(env, prereqs): libs_server += ['vos', 'bio', 'abt', 'numa'] vos_engine = denv.StaticObject(['vos_engine.c']) - if denv["STACK_MMAP"] == 1: - new_env = denv.Clone() - new_env.Append(CCFLAGS=['-DULT_MMAP_STACK']) - vos_perf = new_env.d_program('vos_perf', - ['vos_perf.c', perf_common, vos_engine] + libdaos_tgts, - LIBS=libs_server) - else: - vos_perf = denv.d_program('vos_perf', - ['vos_perf.c', perf_common, vos_engine] + libdaos_tgts, - LIBS=libs_server) + vos_perf = denv.d_program('vos_perf', ['vos_perf.c', perf_common, vos_engine] + libdaos_tgts, + LIBS=libs_server) denv.Install('$PREFIX/bin/', vos_perf) obj_ctl = denv.d_program('obj_ctl', diff --git a/src/tests/vos_perf.c b/src/tests/vos_perf.c index 64024d7f125..8007b13137b 100644 --- a/src/tests/vos_perf.c +++ b/src/tests/vos_perf.c @@ -1,5 +1,5 @@ /** - * (C) Copyright 2018-2022 Intel Corporation. + * (C) Copyright 2018-2024 Intel Corporation. * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -24,7 +24,6 @@ #include #include #include "perf_internal.h" -#include uint64_t ts_flags; bool ts_flat = false; @@ -38,10 +37,6 @@ daos_unit_oid_t *ts_uoids; /* object shard IDs */ bool ts_in_ult; /* Run tests in ULT mode */ static ABT_xstream abt_xstream; -#ifdef ULT_MMAP_STACK -struct stack_pool *sp; -#endif - static int ts_abt_init(void) { @@ -51,7 +46,7 @@ ts_abt_init(void) rc = ABT_init(0, NULL); if (rc != ABT_SUCCESS) { - fprintf(stderr, "ABT init failed: %d\n", rc); + fprintf(stderr, "Failed to init ABT: " AF_RC "\n", AP_RC(rc)); return -1; } @@ -63,7 +58,7 @@ ts_abt_init(void) rc = ABT_xstream_get_cpubind(abt_xstream, &cpuid); if (rc != ABT_SUCCESS) { - fprintf(stderr, "get cpubind failed: %d\n", rc); + fprintf(stderr, "get cpubind failed: " AF_RC "\n", AP_RC(rc)); fprintf(stderr, "No CPU affinity for this test.\n"); fprintf(stderr, "Build ABT by --enable-affinity if" " you want to try CPU affinity.\n"); @@ -72,7 +67,7 @@ ts_abt_init(void) rc = ABT_xstream_get_affinity(abt_xstream, 0, NULL, &num_cpus); if (rc != ABT_SUCCESS) { - fprintf(stderr, "get num_cpus: %d\n", rc); + fprintf(stderr, "get num_cpus: " AF_RC "\n", AP_RC(rc)); fprintf(stderr, "No CPU affinity for this test.\n"); fprintf(stderr, "Build ABT by --enable-affinity if" " you want to try CPU affinity.\n"); @@ -82,7 +77,7 @@ ts_abt_init(void) cpuid = (cpuid + 1) % num_cpus; rc = ABT_xstream_set_cpubind(abt_xstream, cpuid); if (rc != ABT_SUCCESS) { - fprintf(stderr, "set affinity: %d\n", rc); + fprintf(stderr, "set affinity: " AF_RC "\n", AP_RC(rc)); fprintf(stderr, "No CPU affinity for this test.\n"); fprintf(stderr, "Build ABT by --enable-affinity if" " you want to try CPU affinity.\n"); @@ -206,10 +201,8 @@ vos_update_or_fetch(int obj_idx, enum ts_op_type op_type, ult_arg.epoch = epoch; ult_arg.duration = duration; ult_arg.obj_idx = obj_idx; - rc = daos_abt_thread_create_on_xstream(sp, NULL, abt_xstream, - vos_update_or_fetch_ult, - &ult_arg, ABT_THREAD_ATTR_NULL, - &thread); + rc = ABT_thread_create_on_xstream(abt_xstream, vos_update_or_fetch_ult, &ult_arg, + ABT_THREAD_ATTR_NULL, &thread); if (rc != ABT_SUCCESS) return rc; @@ -759,7 +752,7 @@ const struct option perf_vos_opts[] = { {"flat_dkey", no_argument, NULL, 'f'}, {"const_akey", no_argument, NULL, 'I'}, {"abt_ult", no_argument, NULL, 'x'}, - {NULL, 0, NULL, 0}, + {NULL, -1, NULL, 0}, }; const char perf_vos_optstr[] = "D:zifIx"; @@ -912,12 +905,6 @@ main(int argc, char **argv) ts_update_or_fetch_fn = vos_update_or_fetch; -#ifdef ULT_MMAP_STACK - rc = stack_pool_create(&sp); - if (rc) - return -1; -#endif - rc = dts_ctx_init(&ts_ctx, &vos_engine); if (rc) return -1; @@ -980,9 +967,6 @@ main(int argc, char **argv) stride_buf_fini(); dts_ctx_fini(&ts_ctx); -#ifdef ULT_MMAP_STACK - stack_pool_destroy(sp); -#endif par_fini(); if (ts_uoids) diff --git a/src/vos/tests/vos_cmd.c b/src/vos/tests/vos_cmd.c index 4cba4793c52..9a91cfd182f 100644 --- a/src/vos/tests/vos_cmd.c +++ b/src/vos/tests/vos_cmd.c @@ -2,7 +2,7 @@ #include #include "vts_io.h" -#include +#include #include static pthread_once_t once_control = PTHREAD_ONCE_INIT; @@ -658,8 +658,8 @@ handle_op(struct cmd_info *cinfo, bool async) if (async) d_list_add(&ult_info->link, &active_list); - rc = daos_abt_thread_create_on_xstream(NULL, NULL, abt_xstream, ult_func, ult_info, - ABT_THREAD_ATTR_NULL, &ult_info->thread); + rc = ABT_thread_create_on_xstream(abt_xstream, ult_func, ult_info, ABT_THREAD_ATTR_NULL, + &ult_info->thread); assert_int_equal(rc, ABT_SUCCESS); if (!async) { @@ -783,14 +783,14 @@ abit_start(void) rc = ABT_init(0, NULL); if (rc != ABT_SUCCESS) { - fprintf(stderr, "ABT init failed: %d\n", rc); + fprintf(stderr, "Failed to init ABT: " AF_RC "\n", AP_RC(rc)); return -1; } rc = ABT_xstream_self(&abt_xstream); if (rc != ABT_SUCCESS) { ABT_finalize(); - printf("ABT get self xstream failed: %d\n", rc); + printf("ABT get self xstream failed: " AF_RC "\n", AP_RC(rc)); return -1; } diff --git a/utils/utest.yaml b/utils/utest.yaml index faf0102050d..df6ae51cf7a 100644 --- a/utils/utest.yaml +++ b/utils/utest.yaml @@ -190,3 +190,22 @@ tests: - cmd: ["src/tests/ftest/tags.py", "unit"] - cmd: ["utils/unit/include_test.py"] +- name: Argobot performance testing + base: "BUILD_DIR" + tests: + - cmd: ["src/engine/tests/abt_perf", "--test=c", "--sec=10", "--num=100", "--stack=16"] + - cmd: ["src/engine/tests/abt_perf", "--test=s", "--sec=10", "--num=100"] + - cmd: ["src/engine/tests/abt_perf", "--test=m", "--sec=10"] + - cmd: ["src/engine/tests/abt_perf", "--test=w", "--sec=10"] + - cmd: ["src/engine/tests/abt_perf", "--test=e", "--sec=10"] + - cmd: ["src/engine/tests/abt_perf", "--test=d", "--sec=10"] +- name: Argobot stack overflow testing + base: "BUILD_DIR" + memcheck: False + tests: + - cmd: ["src/engine/tests/abt_stack", "--check-overflow"] + env_vars: + ABT_STACK_OVERFLOW_CHECK: "mprotect_strict" + - cmd: ["src/engine/tests/abt_stack", "--check-overflow", "--on-pool"] + env_vars: + ABT_STACK_OVERFLOW_CHECK: "mprotect_strict"