Skip to content

Commit

Permalink
fix compiler optimize thread local variable access (#2156)
Browse files Browse the repository at this point in the history
* fix compiler optimize thread local variable access

* change __thread to BAIDU_THREAD_LOCAL

* Add NOT_ALLOW_OPTIMIZE_THREAD_LOCAL_ACCESS according to compiler and arch

* move thread local access optimization condition to thread_local.h
  • Loading branch information
ehds authored Apr 26, 2023
1 parent 8f4dd63 commit 37c31fb
Show file tree
Hide file tree
Showing 2 changed files with 36 additions and 12 deletions.
19 changes: 7 additions & 12 deletions src/bthread/task_group.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ const bool ALLOW_UNUSED dummy_show_per_worker_usage_in_vars =
::GFLAGS_NS::RegisterFlagValidator(&FLAGS_show_per_worker_usage_in_vars,
pass_bool);

__thread TaskGroup* tls_task_group = NULL;
BAIDU_VOLATILE_THREAD_LOCAL(TaskGroup*, tls_task_group, NULL);
// Sync with TaskMeta::local_storage when a bthread is created or destroyed.
// During running, the two fields may be inconsistent, use tls_bls as the
// groundtruth.
Expand All @@ -68,7 +68,7 @@ extern void return_keytable(bthread_keytable_pool_t*, KeyTable*);

// [Hacky] This is a special TLS set by bthread-rpc privately... to save
// overhead of creation keytable, may be removed later.
BAIDU_THREAD_LOCAL void* tls_unique_user_ptr = NULL;
BAIDU_VOLATILE_THREAD_LOCAL(void*, tls_unique_user_ptr, NULL);

const TaskStatistics EMPTY_STAT = { 0, 0 };

Expand Down Expand Up @@ -248,9 +248,6 @@ int TaskGroup::init(size_t runqueue_capacity) {
return 0;
}

#if defined(__linux__) && defined(__aarch64__) && defined(__clang__)
__attribute__((optnone))
#endif
void TaskGroup::task_runner(intptr_t skip_remained) {
// NOTE: tls_task_group is volatile since tasks are moved around
// different groups.
Expand Down Expand Up @@ -301,7 +298,7 @@ void TaskGroup::task_runner(intptr_t skip_remained) {
}

// Group is probably changed
g = tls_task_group;
g = BAIDU_GET_VOLATILE_THREAD_LOCAL(tls_task_group);

// TODO: Save thread_return
(void)thread_return;
Expand Down Expand Up @@ -570,9 +567,6 @@ void TaskGroup::sched(TaskGroup** pg) {
sched_to(pg, next_tid);
}

#if defined(__linux__) && defined(__aarch64__) && defined(__clang__)
__attribute__((optnone))
#endif
void TaskGroup::sched_to(TaskGroup** pg, TaskMeta* next_meta) {
TaskGroup* g = *pg;
#ifndef NDEBUG
Expand Down Expand Up @@ -614,7 +608,7 @@ void TaskGroup::sched_to(TaskGroup** pg, TaskMeta* next_meta) {
if (next_meta->stack != cur_meta->stack) {
jump_stack(cur_meta->stack, next_meta->stack);
// probably went to another group, need to assign g again.
g = tls_task_group;
g = BAIDU_GET_VOLATILE_THREAD_LOCAL(tls_task_group);
}
#ifndef NDEBUG
else {
Expand All @@ -633,12 +627,13 @@ void TaskGroup::sched_to(TaskGroup** pg, TaskMeta* next_meta) {
RemainedFn fn = g->_last_context_remained;
g->_last_context_remained = NULL;
fn(g->_last_context_remained_arg);
g = tls_task_group;
g = BAIDU_GET_VOLATILE_THREAD_LOCAL(tls_task_group);
}

// Restore errno
errno = saved_errno;
tls_unique_user_ptr = saved_unique_user_ptr;
// tls_unique_user_ptr probably changed.
BAIDU_SET_VOLATILE_THREAD_LOCAL(tls_unique_user_ptr, saved_unique_user_ptr);

#ifndef NDEBUG
--g->_sched_recursive_guard;
Expand Down
29 changes: 29 additions & 0 deletions src/butil/thread_local.h
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,35 @@
#define BAIDU_THREAD_LOCAL __thread
#endif // _MSC_VER

#define BAIDU_VOLATILE_THREAD_LOCAL(type, var_name, default_value) \
BAIDU_THREAD_LOCAL type var_name = default_value; \
static __attribute__((noinline, unused)) type get_##var_name(void) { \
asm volatile(""); \
return var_name; \
} \
static __attribute__((noinline, unused)) type *get_ptr_##var_name(void) { \
type *ptr = &var_name; \
asm volatile("" : "+rm"(ptr)); \
return ptr; \
} \
static __attribute__((noinline, unused)) void set_##var_name(type v) { \
asm volatile(""); \
var_name = v; \
}

#if defined(__clang__) && (defined(__aarch64__) || defined(__arm64__))
// Clang compiler is incorrectly caching the address of thread_local variables
// across a suspend-point. The following macros used to disable the volatile
// thread local access optimization.
#define BAIDU_GET_VOLATILE_THREAD_LOCAL(var_name) get_##var_name()
#define BAIDU_GET_PTR_VOLATILE_THREAD_LOCAL(var_name) get_ptr_##var_name()
#define BAIDU_SET_VOLATILE_THREAD_LOCAL(var_name, value) set_##var_name(value)
#else
#define BAIDU_GET_VOLATILE_THREAD_LOCAL(var_name) var_name
#define BAIDU_GET_PTR_VOLATILE_THREAD_LOCAL(var_name) &##var_name
#define BAIDU_SET_VOLATILE_THREAD_LOCAL(var_name, value) var_name = value
#endif

namespace butil {

// Get a thread-local object typed T. The object will be default-constructed
Expand Down

0 comments on commit 37c31fb

Please sign in to comment.