Skip to content

Commit 888f452

Browse files
authored
Fix deadloack in dlopen. NFC (#18487)
When within dlopen itself we hold an exclusive lock so we need to disable the automatic code synchronization during that time. Split out from #18376
1 parent 207848f commit 888f452

File tree

4 files changed

+188
-58
lines changed

4 files changed

+188
-58
lines changed

system/lib/libc/dynlink.c

Lines changed: 92 additions & 58 deletions
Original file line numberDiff line numberDiff line change
@@ -47,9 +47,82 @@ void _emscripten_dlopen_js(struct dso* handle,
4747
void __dl_vseterr(const char*, va_list);
4848

4949
static struct dso * _Atomic head, * _Atomic tail;
50+
51+
#ifdef _REENTRANT
5052
static thread_local struct dso* thread_local_tail;
5153
static pthread_rwlock_t lock;
5254

55+
static void dlsync_locked() {
56+
if (!thread_local_tail) {
57+
thread_local_tail = head;
58+
}
59+
while (thread_local_tail->next) {
60+
struct dso* p = thread_local_tail->next;
61+
dbg("dlsync_locked: %s mem_addr=%p "
62+
"mem_size=%zu table_addr=%p table_size=%zu",
63+
p->name,
64+
p->mem_addr,
65+
p->mem_size,
66+
p->table_addr,
67+
p->table_size);
68+
void* success = _dlopen_js(p);
69+
if (!success) {
70+
// If any on the libraries fails to load here then we give up.
71+
// TODO(sbc): Ideally this would never happen and we could/should
72+
// abort, but on the main thread (where we don't have sync xhr) its
73+
// often not possible to syncronously load side module.
74+
_emscripten_errf("_dlopen_js failed: %s", dlerror());
75+
break;
76+
}
77+
thread_local_tail = p;
78+
}
79+
}
80+
81+
// This function is called from emscripten_yield which itself is called whenever
82+
// we block on a futex. We need to check to avoid infinite recursion when
83+
// taking the lock below.
84+
static thread_local bool skip_dlsync = false;
85+
86+
static void ensure_init();
87+
88+
void _emscripten_thread_sync_code() {
89+
if (skip_dlsync) {
90+
return;
91+
}
92+
skip_dlsync = true;
93+
ensure_init();
94+
if (thread_local_tail != tail) {
95+
dbg("emscripten_thread_sync_code: catching up %p %p", thread_local_tail, tail);
96+
pthread_rwlock_rdlock(&lock);
97+
dlsync_locked();
98+
pthread_rwlock_unlock(&lock);
99+
dbg("emscripten_thread_sync_code: done");
100+
}
101+
skip_dlsync = false;
102+
}
103+
104+
static void do_read_lock() {
105+
skip_dlsync = true;
106+
pthread_rwlock_rdlock(&lock);
107+
}
108+
109+
static void do_write_lock() {
110+
// Once we have the lock we want to avoid automatic code sync as that would
111+
// result in a deadlock.
112+
skip_dlsync = true;
113+
pthread_rwlock_wrlock(&lock);
114+
}
115+
116+
static void do_unlock() {
117+
pthread_rwlock_unlock(&lock);
118+
skip_dlsync = false;
119+
}
120+
#else
121+
#define do_unlock()
122+
#define do_read_lock()
123+
#define do_write_lock()
124+
#endif
125+
53126
static void error(const char* fmt, ...) {
54127
va_list ap;
55128
va_start(ap, fmt);
@@ -80,13 +153,16 @@ static void load_library_done(struct dso* p) {
80153
p->table_addr,
81154
p->table_size);
82155

156+
#ifdef _REENTRANT
157+
thread_local_tail = p;
158+
#endif
159+
83160
// insert into linked list
84161
p->prev = tail;
85162
if (tail) {
86163
tail->next = p;
87164
}
88165
tail = p;
89-
thread_local_tail = p;
90166

91167
if (!head) {
92168
head = p;
@@ -114,14 +190,14 @@ static void dlopen_js_onsuccess(struct dso* dso, struct async_data* data) {
114190
dso->mem_addr,
115191
dso->mem_size);
116192
load_library_done(dso);
117-
pthread_rwlock_unlock(&lock);
193+
do_unlock();
118194
data->onsuccess(data->user_data, dso);
119195
free(data);
120196
}
121197

122198
static void dlopen_js_onerror(struct dso* dso, struct async_data* data) {
123199
dbg("dlopen_js_onerror: dso=%p", dso);
124-
pthread_rwlock_unlock(&lock);
200+
do_unlock();
125201
data->onerror(data->user_data);
126202
free(dso);
127203
free(data);
@@ -134,7 +210,7 @@ static void ensure_init() {
134210
return;
135211
}
136212
// Initialize the dso list. This happens on first run.
137-
pthread_rwlock_wrlock(&lock);
213+
do_write_lock();
138214
if (!head) {
139215
// Flags are not important since the main module is already loaded.
140216
struct dso* p = load_library_start("__main__", RTLD_NOW|RTLD_GLOBAL);
@@ -143,7 +219,7 @@ static void ensure_init() {
143219
load_library_done(p);
144220
assert(head);
145221
}
146-
pthread_rwlock_unlock(&lock);
222+
do_unlock();
147223
}
148224

149225
void* dlopen(const char* file, int flags) {
@@ -156,7 +232,11 @@ void* dlopen(const char* file, int flags) {
156232
struct dso* p;
157233
int cs;
158234
pthread_setcancelstate(PTHREAD_CANCEL_DISABLE, &cs);
159-
pthread_rwlock_wrlock(&lock);
235+
do_write_lock();
236+
#ifdef _REENTRANT
237+
// Make sure we are in sync before loading any new DSOs.
238+
dlsync_locked();
239+
#endif
160240

161241
/* Search for the name to see if it's already loaded */
162242
for (p = head; p; p = p->next) {
@@ -180,7 +260,7 @@ void* dlopen(const char* file, int flags) {
180260
dbg("dlopen_js: success: %p", p);
181261
load_library_done(p);
182262
end:
183-
pthread_rwlock_unlock(&lock);
263+
do_unlock();
184264
pthread_setcancelstate(cs, 0);
185265
return p;
186266
}
@@ -192,10 +272,10 @@ void emscripten_dlopen(const char* filename, int flags, void* user_data,
192272
onsuccess(user_data, head);
193273
return;
194274
}
195-
pthread_rwlock_wrlock(&lock);
275+
do_write_lock();
196276
struct dso* p = load_library_start(filename, flags);
197277
if (!p) {
198-
pthread_rwlock_unlock(&lock);
278+
do_unlock();
199279
onerror(user_data);
200280
return;
201281
}
@@ -217,9 +297,10 @@ void* __dlsym(void* restrict p, const char* restrict s, void* restrict ra) {
217297
return 0;
218298
}
219299
void* res;
220-
pthread_rwlock_rdlock(&lock);
300+
do_read_lock();
221301
res = _dlsym_js(p, s);
222-
pthread_rwlock_unlock(&lock);
302+
do_unlock();
303+
dbg("__dlsym done dso:%p res:%p", p, res);
223304
return res;
224305
}
225306

@@ -232,50 +313,3 @@ int dladdr(const void* addr, Dl_info* info) {
232313
info->dli_saddr = NULL;
233314
return 1;
234315
}
235-
236-
#ifdef _REENTRANT
237-
void _emscripten_thread_sync_code() {
238-
// This function is called from emscripten_yeild which itself is called
239-
// whenever we block on a futex. We need to check to avoid infinite
240-
// recursion when taking the lock below.
241-
static thread_local bool syncing = false;
242-
if (syncing) {
243-
return;
244-
}
245-
syncing = true;
246-
ensure_init();
247-
if (thread_local_tail == tail) {
248-
dbg("emscripten_thread_sync_code: already in sync");
249-
goto done;
250-
}
251-
pthread_rwlock_rdlock(&lock);
252-
if (!thread_local_tail) {
253-
thread_local_tail = head;
254-
}
255-
while (thread_local_tail->next) {
256-
struct dso* p = thread_local_tail->next;
257-
dbg("emscripten_thread_sync_code: %s mem_addr=%p "
258-
"mem_size=%zu table_addr=%p table_size=%zu",
259-
p->name,
260-
p->mem_addr,
261-
p->mem_size,
262-
p->table_addr,
263-
p->table_size);
264-
void* success = _dlopen_js(p);
265-
if (!success) {
266-
// If any on the libraries fails to load here then we give up.
267-
// TODO(sbc): Ideally this would never happen and we could/should
268-
// abort, but on the main thread (where we don't have sync xhr) its
269-
// often not possible to syncronously load side module.
270-
_emscripten_errf("emscripten_thread_sync_code failed: %s", dlerror());
271-
break;
272-
}
273-
thread_local_tail = p;
274-
}
275-
pthread_rwlock_unlock(&lock);
276-
dbg("emscripten_thread_sync_code done");
277-
278-
done:
279-
syncing = false;
280-
}
281-
#endif
Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,72 @@
1+
#include <assert.h>
2+
#include <dlfcn.h>
3+
#include <pthread.h>
4+
#include <stdio.h>
5+
6+
#ifndef NUM_THREADS
7+
#define NUM_THREADS 2
8+
#endif
9+
10+
typedef int* (*sidey_data_type)();
11+
typedef int (*func_t)();
12+
typedef func_t (*sidey_func_type)();
13+
14+
pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER;
15+
16+
_Atomic int thread_count = 0;
17+
18+
void* thread_main(void* arg) {
19+
int num = (intptr_t)arg;
20+
printf("thread_main %d %p\n", num, pthread_self());
21+
thread_count++;
22+
23+
// busy wait until all threads are running
24+
while (thread_count != NUM_THREADS) {}
25+
26+
char filename[255];
27+
sprintf(filename, "liblib%d.so", num);
28+
printf("loading %s\n", filename);
29+
void* handle = dlopen(filename, RTLD_NOW|RTLD_GLOBAL);
30+
printf("done loading %s\n", filename);
31+
if (!handle) {
32+
printf("dlerror: %s\n", dlerror());
33+
}
34+
assert(handle);
35+
/*
36+
* TODO(sbc): We have a bug that new functions added to the table via dlsym
37+
* are not yet correctly synchronized between threads.
38+
* Uncommenting the code below will cause a "table out of sync" error.
39+
*/
40+
/*
41+
sidey_data_type p_side_data_address;
42+
sidey_func_type p_side_func_address;
43+
p_side_data_address = dlsym(handle, "side_data_address");
44+
printf("p_side_data_address=%p\n", p_side_data_address);
45+
p_side_func_address = dlsym(handle, "side_func_address");
46+
printf("p_side_func_address=%p\n", p_side_func_address);
47+
*/
48+
49+
printf("done thread_main %d\n", num);
50+
return NULL;
51+
}
52+
53+
int main() {
54+
printf("in main: %p\n", pthread_self());
55+
pthread_mutex_lock(&mutex);
56+
57+
// start a bunch of threads while holding the lock
58+
pthread_t threads[NUM_THREADS];
59+
for (int i = 0; i < NUM_THREADS; i++) {
60+
pthread_create(&threads[i], NULL, thread_main, (void*)i);
61+
}
62+
63+
// busy wait until all threads are running
64+
while (thread_count != NUM_THREADS) {}
65+
66+
for (int i = 0; i < NUM_THREADS; i++) {
67+
pthread_join(threads[i], NULL);
68+
}
69+
70+
printf("main done\n");
71+
return 0;
72+
}

test/core/pthread/test_pthread_dlopen_side.c

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,17 @@
11
#include <stdio.h>
2+
#include <stdlib.h>
23

34
typedef int (*myfunc_type)();
45

56
static int mydata[10] = { 44 };
67

8+
static void dtor() {
9+
puts("side module atexit ..");
10+
}
11+
712
__attribute__((constructor)) static void ctor() {
813
puts("side module ctor");
14+
atexit(dtor);
915
}
1016

1117
static int myfunc() {

test/test_core.py

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9348,6 +9348,24 @@ def test_pthread_dlopen(self, do_yield):
93489348
'invalid index into function table',
93499349
assert_returncode=NON_ZERO)
93509350

9351+
@needs_dylink
9352+
@node_pthreads
9353+
def test_pthread_dlopen_many(self):
9354+
nthreads = 10
9355+
self.set_setting('USE_PTHREADS')
9356+
self.emcc_args.append('-Wno-experimental')
9357+
self.build_dlfcn_lib(test_file('core/pthread/test_pthread_dlopen_side.c'))
9358+
for i in range(nthreads):
9359+
shutil.copyfile('liblib.so', f'liblib{i}.so')
9360+
9361+
self.prep_dlfcn_main()
9362+
self.set_setting('EXIT_RUNTIME')
9363+
self.set_setting('PROXY_TO_PTHREAD')
9364+
self.do_runf(test_file('core/pthread/test_pthread_dlopen_many.c'),
9365+
['side module ctor', 'main done', 'side module atexit'],
9366+
emcc_args=[f'-DNUM_THREADS={nthreads}'],
9367+
assert_all=True)
9368+
93519369
@needs_dylink
93529370
@node_pthreads
93539371
def test_pthread_dlsym(self):

0 commit comments

Comments
 (0)