Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix issue 31252 - remote globals not being updated in some cases #31446

Merged
merged 1 commit into from
Apr 20, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
51 changes: 13 additions & 38 deletions stdlib/Distributed/src/clusterserialize.jl
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ mutable struct ClusterSerializer{I<:IO} <: AbstractSerializer

pid::Int # Worker we are connected to.
tn_obj_sent::Set{UInt64} # TypeName objects sent
glbs_sent::Dict{UInt64, UInt64} # (key,value) -> (objectid, hash_value)
glbs_sent::Dict{Symbol, Tuple{UInt64, UInt64}} # (key,value) -> (symbol, (hash_value, objectid))
glbs_in_tnobj::Dict{UInt64, Vector{Symbol}} # Track globals referenced in
# anonymous functions.
anonfunc_id::UInt64
Expand Down Expand Up @@ -116,10 +116,11 @@ function serialize(s::ClusterSerializer, g::GlobalRef)
invoke(serialize, Tuple{AbstractSerializer, GlobalRef}, s, g)
end

# Send/resend a global object if
# a) has not been sent previously, i.e., we are seeing this objectid for the first time, or,
# Send/resend a global binding if
# a) has not been sent previously, i.e., we are seeing this binding for the first time, or,
# b) hash value has changed or
# c) is a bits type
# c) hash value is same but of a different object, i.e. objectid has changed or
# d) is a bits type
function syms_2b_sent(s::ClusterSerializer, identifier)
lst = Symbol[]
check_syms = get(s.glbs_in_tnobj, identifier, [])
Expand All @@ -129,10 +130,12 @@ function syms_2b_sent(s::ClusterSerializer, identifier)
if isbits(v)
push!(lst, sym)
else
oid = objectid(v)
if haskey(s.glbs_sent, oid)
# We have sent this object before, see if it has changed.
s.glbs_sent[oid] != hash(sym, hash(v)) && push!(lst, sym)
if haskey(s.glbs_sent, sym)
# We have sent this binding before, see if it has changed.
hval, oid = s.glbs_sent[sym]
if hval != hash(sym, hash(v)) || oid != objectid(v)
push!(lst, sym)
end
else
push!(lst, sym)
end
Expand All @@ -144,26 +147,9 @@ end
function serialize_global_from_main(s::ClusterSerializer, sym)
v = getfield(Main, sym)

oid = objectid(v)
record_v = true
if isbits(v)
record_v = false
elseif !haskey(s.glbs_sent, oid)
# set up a finalizer the first time this object is sent
try
finalizer(v) do x
delete_global_tracker(s,x)
end
catch ex
# Do not track objects that cannot be finalized.
if isa(ex, ErrorException)
record_v = false
else
rethrow()
end
end
if !isbits(v)
s.glbs_sent[sym] = (hash(sym, hash(v)), objectid(v))
end
record_v && (s.glbs_sent[oid] = hash(sym, hash(v)))

serialize(s, isconst(Main, sym))
serialize(s, v)
Expand All @@ -180,17 +166,6 @@ function deserialize_global_from_main(s::ClusterSerializer, sym)
return nothing
end

function delete_global_tracker(s::ClusterSerializer, v)
oid = objectid(v)
if haskey(s.glbs_sent, oid)
delete!(s.glbs_sent, oid)
end

# TODO: A global binding is released and gc'ed here but it continues
# to occupy memory on the remote node. Would be nice to release memory
# if possible.
end

function cleanup_tname_glbs(s::ClusterSerializer, identifier)
delete!(s.glbs_in_tnobj, identifier)
end
Expand Down
27 changes: 23 additions & 4 deletions stdlib/Distributed/test/distributed_exec.jl
Original file line number Diff line number Diff line change
Expand Up @@ -1214,6 +1214,29 @@ v6 = FooModEverywhere
@test remotecall_fetch(()->v5, id_other) === FooStructEverywhere
@test remotecall_fetch(()->v6, id_other) === FooModEverywhere

# hash value same but different object instance
v7 = ones(10)
oid1 = objectid(v7)
hval1 = hash(v7)
@test v7 == @fetchfrom id_other v7
remote_oid1 = @fetchfrom id_other objectid(v7)

v7 = ones(10)
@test oid1 != objectid(v7)
@test hval1 == hash(v7)
@test remote_oid1 != @fetchfrom id_other objectid(v7)


# Github issue #31252
v31252 = :a
@test :a == @fetchfrom id_other v31252

v31252 = :b
@test :b == @fetchfrom id_other v31252

v31252 = :a
@test :a == @fetchfrom id_other v31252


# Test that a global is not being repeatedly serialized when
# a) referenced multiple times in the closure
Expand Down Expand Up @@ -1311,10 +1334,6 @@ global ids_func = ()->ids_cleanup
clust_ser = (Distributed.worker_from_id(id_other)).w_serializer
@test remotecall_fetch(ids_func, id_other) == ids_cleanup

@test haskey(clust_ser.glbs_sent, objectid(ids_cleanup))
finalize(ids_cleanup)
@test !haskey(clust_ser.glbs_sent, objectid(ids_cleanup))

# TODO Add test for cleanup from `clust_ser.glbs_in_tnobj`

# reported github issues - Mostly tests with globals and various distributed macros
Expand Down