Skip to content

Commit

Permalink
threads: avoid deadlock from recursive lock acquire (PR #38487) (#38753)
Browse files Browse the repository at this point in the history
Finalizers can't safely acquire many essential locks (such as the
iolock, to cleanup libuv objects) if they are run inside another lock.
Therefore, inhibit all finalizers on the thread until all locks are
released (previously, this was only true for our internal locks).

(cherry-picked from 59aedd1)
  • Loading branch information
vtjnash authored Dec 10, 2020
1 parent 599ecd8 commit ebcc603
Show file tree
Hide file tree
Showing 12 changed files with 140 additions and 31 deletions.
1 change: 1 addition & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,7 @@ Multi-threading changes
* `@threads` now allows an optional schedule argument. Use `@threads :static ...` to
ensure that the same schedule will be used as in past versions; the default schedule
is likely to change in the future.
* Locks now automatically inhibit finalizers from running, to avoid deadlock ([#38487]).

Build system changes
--------------------
Expand Down
10 changes: 10 additions & 0 deletions base/gcutils.jl
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,16 @@ Control whether garbage collection is enabled using a boolean argument (`true` f
"""
enable(on::Bool) = ccall(:jl_gc_enable, Int32, (Int32,), on) != 0

"""
GC.enable_finalizers(on::Bool)
Increment or decrement the counter that controls the running of finalizers on
the current Task. Finalizers will only run when the counter is at zero. (Set
`true` for enabling, `false` for disabling). They may still run concurrently on
another Task or thread.
"""
enable_finalizers(on::Bool) = ccall(:jl_gc_enable_finalizers, Cvoid, (Ptr{Cvoid}, Int32,), C_NULL, on)

"""
GC.@preserve x1 x2 ... xn expr
Expand Down
25 changes: 22 additions & 3 deletions base/lock.jl
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,24 @@
"""
ReentrantLock()
Creates a re-entrant lock for synchronizing [`Task`](@ref)s.
The same task can acquire the lock as many times as required.
Each [`lock`](@ref) must be matched with an [`unlock`](@ref).
Creates a re-entrant lock for synchronizing [`Task`](@ref)s. The same task can
acquire the lock as many times as required. Each [`lock`](@ref) must be matched
with an [`unlock`](@ref).
Calling 'lock' will also inhibit running of finalizers on that thread until the
corresponding 'unlock'. Use of the standard lock pattern illustrated below
should naturally be supported, but beware of inverting the try/lock order or
missing the try block entirely (e.g. attempting to return with the lock still
held):
```
lock(l)
try
<atomic work>
finally
unlock(l)
end
```
"""
mutable struct ReentrantLock <: AbstractLock
locked_by::Union{Task, Nothing}
Expand Down Expand Up @@ -44,6 +59,7 @@ function trylock(rl::ReentrantLock)
if rl.reentrancy_cnt == 0
rl.locked_by = t
rl.reentrancy_cnt = 1
GC.enable_finalizers(false)
got = true
elseif t === notnothing(rl.locked_by)
rl.reentrancy_cnt += 1
Expand Down Expand Up @@ -71,6 +87,7 @@ function lock(rl::ReentrantLock)
if rl.reentrancy_cnt == 0
rl.locked_by = t
rl.reentrancy_cnt = 1
GC.enable_finalizers(false)
break
elseif t === notnothing(rl.locked_by)
rl.reentrancy_cnt += 1
Expand Down Expand Up @@ -111,6 +128,7 @@ function unlock(rl::ReentrantLock)
rethrow()
end
end
GC.enable_finalizers(true)
end
unlock(rl.cond_wait)
return
Expand All @@ -132,6 +150,7 @@ function unlockall(rl::ReentrantLock)
rethrow()
end
end
GC.enable_finalizers(true)
unlock(rl.cond_wait)
return n
end
Expand Down
11 changes: 10 additions & 1 deletion base/locks-mt.jl
Original file line number Diff line number Diff line change
Expand Up @@ -61,10 +61,12 @@ Base.assert_havelock(l::SpinLock) = islocked(l) ? nothing : Base.concurrency_vio
function lock(l::SpinLock)
while true
if _get(l) == 0
GC.enable_finalizers(false)
p = _xchg!(l, 1)
if p == 0
return
end
GC.enable_finalizers(true)
end
ccall(:jl_cpu_pause, Cvoid, ())
# Temporary solution before we have gc transition support in codegen.
Expand All @@ -74,13 +76,20 @@ end

function trylock(l::SpinLock)
if _get(l) == 0
return _xchg!(l, 1) == 0
GC.enable_finalizers(false)
p = _xchg!(l, 1)
if p == 0
return true
end
GC.enable_finalizers(true)
end
return false
end

function unlock(l::SpinLock)
_get(l) == 0 && error("unlock count must match lock count")
_set!(l, 0)
GC.enable_finalizers(true)
ccall(:jl_cpu_wake, Cvoid, ())
return
end
Expand Down
30 changes: 27 additions & 3 deletions src/gc.c
Original file line number Diff line number Diff line change
Expand Up @@ -392,12 +392,36 @@ static void run_finalizers(jl_ptls_t ptls)
arraylist_free(&copied_list);
}

JL_DLLEXPORT int jl_gc_get_finalizers_inhibited(jl_ptls_t ptls)
{
if (ptls == NULL)
ptls = jl_get_ptls_states();
return ptls->finalizers_inhibited;
}

JL_DLLEXPORT void jl_gc_enable_finalizers(jl_ptls_t ptls, int on)
{
if (ptls == NULL)
ptls = jl_get_ptls_states();
int old_val = ptls->finalizers_inhibited;
int new_val = old_val + (on ? -1 : 1);
if (new_val < 0) {
JL_TRY {
jl_error(""); // get a backtrace
}
JL_CATCH {
jl_printf((JL_STREAM*)STDERR_FILENO, "WARNING: GC finalizers already enabled on this thread.\n");
// Only print the backtrace once, to avoid spamming the logs
static int backtrace_printed = 0;
if (backtrace_printed == 0) {
backtrace_printed = 1;
jlbacktrace(); // written to STDERR_FILENO
}
}
return;
}
ptls->finalizers_inhibited = new_val;
if (!new_val && old_val && !ptls->in_finalizer) {
if (!new_val && old_val && !ptls->in_finalizer && ptls->current_task->locks.len == 0) {
ptls->in_finalizer = 1;
run_finalizers(ptls);
ptls->in_finalizer = 0;
Expand Down Expand Up @@ -1580,7 +1604,7 @@ STATIC_INLINE uintptr_t gc_read_stack(void *_addr, uintptr_t offset,
JL_NORETURN NOINLINE void gc_assert_datatype_fail(jl_ptls_t ptls, jl_datatype_t *vt,
jl_gc_mark_sp_t sp)
{
jl_printf(JL_STDOUT, "GC error (probable corruption) :\n");
jl_safe_printf("GC error (probable corruption) :\n");
gc_debug_print_status();
jl_(vt);
gc_debug_critical_error();
Expand Down Expand Up @@ -3121,7 +3145,7 @@ JL_DLLEXPORT void jl_gc_collect(jl_gc_collection_t collection)
// Only disable finalizers on current thread
// Doing this on all threads is racy (it's impossible to check
// or wait for finalizers on other threads without dead lock).
if (!ptls->finalizers_inhibited) {
if (!ptls->finalizers_inhibited && ptls->current_task && ptls->current_task->locks.len == 0) {
int8_t was_in_finalizer = ptls->in_finalizer;
ptls->in_finalizer = 1;
run_finalizers(ptls);
Expand Down
3 changes: 0 additions & 3 deletions src/julia.h
Original file line number Diff line number Diff line change
Expand Up @@ -1719,7 +1719,6 @@ typedef struct _jl_handler_t {
int8_t gc_state;
size_t locks_len;
sig_atomic_t defer_signal;
int finalizers_inhibited;
jl_timing_block_t *timing_stack;
size_t world_age;
} jl_handler_t;
Expand Down Expand Up @@ -1751,8 +1750,6 @@ typedef struct _jl_task_t {
jl_gcframe_t *gcstack;
// saved exception stack
jl_excstack_t *excstack;
// current world age
size_t world_age;

// id of owning thread
// does not need to be defined until the task runs
Expand Down
9 changes: 4 additions & 5 deletions src/locks.h
Original file line number Diff line number Diff line change
Expand Up @@ -89,11 +89,9 @@ static inline void jl_lock_frame_pop(void)

static inline void jl_mutex_lock(jl_mutex_t *lock)
{
jl_ptls_t ptls = jl_get_ptls_states();
JL_SIGATOMIC_BEGIN();
jl_mutex_wait(lock, 1);
jl_lock_frame_push(lock);
jl_gc_enable_finalizers(ptls, 0);
}

static inline int jl_mutex_trylock_nogc(jl_mutex_t *lock)
Expand All @@ -116,10 +114,8 @@ static inline int jl_mutex_trylock(jl_mutex_t *lock)
{
int got = jl_mutex_trylock_nogc(lock);
if (got) {
jl_ptls_t ptls = jl_get_ptls_states();
JL_SIGATOMIC_BEGIN();
jl_lock_frame_push(lock);
jl_gc_enable_finalizers(ptls, 0);
}
return got;
}
Expand All @@ -139,9 +135,12 @@ static inline void jl_mutex_unlock(jl_mutex_t *lock)
{
jl_ptls_t ptls = jl_get_ptls_states();
jl_mutex_unlock_nogc(lock);
jl_gc_enable_finalizers(ptls, 1);
jl_lock_frame_pop();
JL_SIGATOMIC_END();
if (ptls->current_task && ptls->current_task->locks.len == 0 && ptls->finalizers_inhibited == 0) {
ptls->finalizers_inhibited = 1;
jl_gc_enable_finalizers(ptls, 1); // call run_finalizers (may GC)
}
}

static inline void jl_mutex_init(jl_mutex_t *lock) JL_NOTSAFEPOINT
Expand Down
12 changes: 8 additions & 4 deletions src/rtutils.c
Original file line number Diff line number Diff line change
Expand Up @@ -215,7 +215,6 @@ JL_DLLEXPORT void jl_enter_handler(jl_handler_t *eh)
eh->gc_state = ptls->gc_state;
eh->locks_len = current_task->locks.len;
eh->defer_signal = ptls->defer_signal;
eh->finalizers_inhibited = ptls->finalizers_inhibited;
eh->world_age = ptls->world_age;
current_task->eh = eh;
#ifdef ENABLE_TIMINGS
Expand Down Expand Up @@ -246,21 +245,26 @@ JL_DLLEXPORT void jl_eh_restore_state(jl_handler_t *eh)
current_task->eh = eh->prev;
ptls->pgcstack = eh->gcstack;
arraylist_t *locks = &current_task->locks;
if (locks->len > eh->locks_len) {
for (size_t i = locks->len;i > eh->locks_len;i--)
int unlocks = locks->len > eh->locks_len;
if (unlocks) {
for (size_t i = locks->len; i > eh->locks_len; i--)
jl_mutex_unlock_nogc((jl_mutex_t*)locks->items[i - 1]);
locks->len = eh->locks_len;
}
ptls->world_age = eh->world_age;
ptls->defer_signal = eh->defer_signal;
ptls->gc_state = eh->gc_state;
ptls->finalizers_inhibited = eh->finalizers_inhibited;
if (old_gc_state && !eh->gc_state) {
jl_gc_safepoint_(ptls);
}
if (old_defer_signal && !eh->defer_signal) {
jl_sigint_safepoint(ptls);
}
if (unlocks && eh->locks_len == 0 && ptls->finalizers_inhibited == 0) {
// call run_finalizers
ptls->finalizers_inhibited = 1;
jl_gc_enable_finalizers(ptls, 1);
}
}

JL_DLLEXPORT void jl_pop_handler(int n)
Expand Down
17 changes: 14 additions & 3 deletions src/task.c
Original file line number Diff line number Diff line change
Expand Up @@ -319,9 +319,8 @@ static void ctx_switch(jl_ptls_t ptls)
}

// set up global state for new task
lastt->world_age = ptls->world_age;
ptls->pgcstack = t->gcstack;
ptls->world_age = t->world_age;
ptls->world_age = 0;
t->gcstack = NULL;
#ifdef MIGRATE_TASKS
ptls->previous_task = lastt;
Expand Down Expand Up @@ -404,8 +403,14 @@ JL_DLLEXPORT void jl_switch(void)
else if (t->tid != ptls->tid) {
jl_error("cannot switch to task running on another thread");
}

// Store old values on the stack and reset
sig_atomic_t defer_signal = ptls->defer_signal;
int8_t gc_state = jl_gc_unsafe_enter(ptls);
size_t world_age = ptls->world_age;
int finalizers_inhibited = ptls->finalizers_inhibited;
ptls->world_age = 0;
ptls->finalizers_inhibited = 0;

#ifdef ENABLE_TIMINGS
jl_timing_block_t *blk = ct->timing_stack;
Expand All @@ -427,7 +432,12 @@ JL_DLLEXPORT void jl_switch(void)
assert(ptls == refetch_ptls());
#endif

ct = ptls->current_task;
// Pop old values back off the stack
assert(ct == ptls->current_task &&
0 == ptls->world_age &&
0 == ptls->finalizers_inhibited);
ptls->world_age = world_age;
ptls->finalizers_inhibited = finalizers_inhibited;

#ifdef ENABLE_TIMINGS
assert(blk == ct->timing_stack);
Expand Down Expand Up @@ -680,6 +690,7 @@ STATIC_OR_JS void NOINLINE JL_NORETURN start_task(void)
jl_ptls_t ptls = jl_get_ptls_states();
jl_task_t *t = ptls->current_task;
jl_value_t *res;
assert(ptls->finalizers_inhibited == 0);

#ifdef MIGRATE_TASKS
jl_task_t *pt = ptls->previous_task;
Expand Down
5 changes: 3 additions & 2 deletions stdlib/Distributed/src/messages.jl
Original file line number Diff line number Diff line change
Expand Up @@ -147,6 +147,7 @@ function flush_gc_msgs(w::Worker)
end

# del_msgs gets populated by finalizers, so be very careful here about ordering of allocations
# XXX: threading requires this to be atomic
new_array = Any[]
msgs = w.del_msgs
w.del_msgs = new_array
Expand Down Expand Up @@ -178,7 +179,7 @@ function send_msg_(w::Worker, header, msg, now::Bool)
wait(w.initialized)
end
io = w.w_stream
lock(io.lock)
lock(io)
try
reset_state(w.w_serializer)
serialize_hdr_raw(io, header)
Expand All @@ -191,7 +192,7 @@ function send_msg_(w::Worker, header, msg, now::Bool)
flush(io)
end
finally
unlock(io.lock)
unlock(io)
end
end

Expand Down
10 changes: 6 additions & 4 deletions test/core.jl
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@ using Random, SparseArrays, InteractiveUtils

const Bottom = Union{}


# For curmod_*
include("testenv.jl")

Expand Down Expand Up @@ -2071,7 +2070,7 @@ mutable struct A6142 <: AbstractMatrix{Float64}; end
+(x::A6142, y::AbstractRange) = "AbstractRange method called" #16324 ambiguity

# issue #6175
function g6175(); print(""); (); end
function g6175(); GC.safepoint(); (); end
g6175(i::Real, I...) = g6175(I...)
g6175(i, I...) = tuple(length(i), g6175(I...)...)
@test g6175(1:5) === (5,)
Expand Down Expand Up @@ -2211,7 +2210,7 @@ day_in(obj6387)
function segfault6793(;gamma=1)
A = 1
B = 1
print()
GC.safepoint()
return
-gamma
nothing
Expand Down Expand Up @@ -3317,7 +3316,7 @@ function f11065()
if i == 1
z = "z is defined"
elseif i == 2
print(z)
print(z) # z is undefined
end
end
end
Expand Down Expand Up @@ -4234,7 +4233,10 @@ end
end
# disable GC to make sure no collection/promotion happens
# when we are constructing the objects
get_finalizers_inhibited() = ccall(:jl_gc_get_finalizers_inhibited, Int32, (Ptr{Cvoid},), C_NULL)
let gc_enabled13995 = GC.enable(false)
@assert gc_enabled13995
@assert get_finalizers_inhibited() == 0
finalized13995 = [false, false, false, false]
create_dead_object13995(finalized13995)
GC.enable(true)
Expand Down
Loading

0 comments on commit ebcc603

Please sign in to comment.