Skip to content

Commit

Permalink
Fix issue 31252 - remote globals not being updated in some cases
Browse files Browse the repository at this point in the history
  • Loading branch information
amitmurthy authored and fredrikekre committed Apr 19, 2019
1 parent c0c6f96 commit fb17eeb
Show file tree
Hide file tree
Showing 2 changed files with 36 additions and 42 deletions.
51 changes: 13 additions & 38 deletions stdlib/Distributed/src/clusterserialize.jl
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ mutable struct ClusterSerializer{I<:IO} <: AbstractSerializer

pid::Int # Worker we are connected to.
tn_obj_sent::Set{UInt64} # TypeName objects sent
glbs_sent::Dict{UInt64, UInt64} # (key,value) -> (objectid, hash_value)
glbs_sent::Dict{Symbol, Tuple{UInt64, UInt64}} # (key,value) -> (symbol, (hash_value, objectid))
glbs_in_tnobj::Dict{UInt64, Vector{Symbol}} # Track globals referenced in
# anonymous functions.
anonfunc_id::UInt64
Expand Down Expand Up @@ -116,10 +116,11 @@ function serialize(s::ClusterSerializer, g::GlobalRef)
invoke(serialize, Tuple{AbstractSerializer, GlobalRef}, s, g)
end

# Send/resend a global object if
# a) has not been sent previously, i.e., we are seeing this objectid for the first time, or,
# Send/resend a global binding if
# a) has not been sent previously, i.e., we are seeing this binding for the first time, or,
# b) hash value has changed or
# c) is a bits type
# c) hash value is same but of a different object, i.e. objectid has changed or
# d) is a bits type
function syms_2b_sent(s::ClusterSerializer, identifier)
lst = Symbol[]
check_syms = get(s.glbs_in_tnobj, identifier, [])
Expand All @@ -129,10 +130,12 @@ function syms_2b_sent(s::ClusterSerializer, identifier)
if isbits(v)
push!(lst, sym)
else
oid = objectid(v)
if haskey(s.glbs_sent, oid)
# We have sent this object before, see if it has changed.
s.glbs_sent[oid] != hash(sym, hash(v)) && push!(lst, sym)
if haskey(s.glbs_sent, sym)
# We have sent this binding before, see if it has changed.
hval, oid = s.glbs_sent[sym]
if hval != hash(sym, hash(v)) || oid != objectid(v)
push!(lst, sym)
end
else
push!(lst, sym)
end
Expand All @@ -144,26 +147,9 @@ end
function serialize_global_from_main(s::ClusterSerializer, sym)
v = getfield(Main, sym)

oid = objectid(v)
record_v = true
if isbits(v)
record_v = false
elseif !haskey(s.glbs_sent, oid)
# set up a finalizer the first time this object is sent
try
finalizer(v) do x
delete_global_tracker(s,x)
end
catch ex
# Do not track objects that cannot be finalized.
if isa(ex, ErrorException)
record_v = false
else
rethrow()
end
end
if !isbits(v)
s.glbs_sent[sym] = (hash(sym, hash(v)), objectid(v))
end
record_v && (s.glbs_sent[oid] = hash(sym, hash(v)))

serialize(s, isconst(Main, sym))
serialize(s, v)
Expand All @@ -180,17 +166,6 @@ function deserialize_global_from_main(s::ClusterSerializer, sym)
return nothing
end

function delete_global_tracker(s::ClusterSerializer, v)
oid = objectid(v)
if haskey(s.glbs_sent, oid)
delete!(s.glbs_sent, oid)
end

# TODO: A global binding is released and gc'ed here but it continues
# to occupy memory on the remote node. Would be nice to release memory
# if possible.
end

function cleanup_tname_glbs(s::ClusterSerializer, identifier)
delete!(s.glbs_in_tnobj, identifier)
end
Expand Down
27 changes: 23 additions & 4 deletions stdlib/Distributed/test/distributed_exec.jl
Original file line number Diff line number Diff line change
Expand Up @@ -1214,6 +1214,29 @@ v6 = FooModEverywhere
@test remotecall_fetch(()->v5, id_other) === FooStructEverywhere
@test remotecall_fetch(()->v6, id_other) === FooModEverywhere

# hash value same but different object instance
v7 = ones(10)
oid1 = objectid(v7)
hval1 = hash(v7)
@test v7 == @fetchfrom id_other v7
remote_oid1 = @fetchfrom id_other objectid(v7)

v7 = ones(10)
@test oid1 != objectid(v7)
@test hval1 == hash(v7)
@test remote_oid1 != @fetchfrom id_other objectid(v7)


# Github issue #31252
v31252 = :a
@test :a == @fetchfrom id_other v31252

v31252 = :b
@test :b == @fetchfrom id_other v31252

v31252 = :a
@test :a == @fetchfrom id_other v31252


# Test that a global is not being repeatedly serialized when
# a) referenced multiple times in the closure
Expand Down Expand Up @@ -1311,10 +1334,6 @@ global ids_func = ()->ids_cleanup
clust_ser = (Distributed.worker_from_id(id_other)).w_serializer
@test remotecall_fetch(ids_func, id_other) == ids_cleanup

@test haskey(clust_ser.glbs_sent, objectid(ids_cleanup))
finalize(ids_cleanup)
@test !haskey(clust_ser.glbs_sent, objectid(ids_cleanup))

# TODO Add test for cleanup from `clust_ser.glbs_in_tnobj`

# reported github issues - Mostly tests with globals and various distributed macros
Expand Down

0 comments on commit fb17eeb

Please sign in to comment.