Skip to content

Commit

Permalink
Merge pull request #22734 from JuliaLang/yyc/foreigncall
Browse files Browse the repository at this point in the history
Eliminating allocation of ccall root in simple cases
  • Loading branch information
yuyichao authored Jul 15, 2017
2 parents 2096248 + 245868c commit 526b83e
Show file tree
Hide file tree
Showing 11 changed files with 211 additions and 83 deletions.
12 changes: 9 additions & 3 deletions base/docs/helpdb/Base.jl
Original file line number Diff line number Diff line change
Expand Up @@ -115,11 +115,14 @@ getindex(collection, key...)
"""
cconvert(T,x)
Convert `x` to a value of type `T`, typically by calling `convert(T,x)`
Convert `x` to a value to be passed to C code as type `T`, typically by calling `convert(T, x)`.
In cases where `x` cannot be safely converted to `T`, unlike [`convert`](@ref), `cconvert` may
return an object of a type different from `T`, which however is suitable for
[`unsafe_convert`](@ref) to handle.
[`unsafe_convert`](@ref) to handle. The result of this function should be kept valid (for the GC)
until the result of [`unsafe_convert`](@ref) is not needed anymore.
This can be used to allocate memory that will be accessed by the `ccall`.
If multiple objects need to be allocated, a tuple of the objects can be used as return value.
Neither `convert` nor `cconvert` should take a Julia object and turn it into a `Ptr`.
"""
Expand Down Expand Up @@ -881,7 +884,8 @@ trunc
"""
unsafe_convert(T,x)
Convert `x` to a value of type `T`
Convert `x` to a C argument of type `T`
where the input `x` must be the return value of `cconvert(T, ...)`.
In cases where [`convert`](@ref) would need to take a Julia object
and turn it into a `Ptr`, this function should be used to define and perform
Expand All @@ -895,6 +899,8 @@ but `x=[a,b,c]` is not.
The `unsafe` prefix on this function indicates that using the result of this function after
the `x` argument to this function is no longer accessible to the program may cause undefined
behavior, including program corruption or segfaults, at any later time.
See also [`cconvert`](@ref)
"""
unsafe_convert

Expand Down
74 changes: 64 additions & 10 deletions base/inference.jl
Original file line number Diff line number Diff line change
Expand Up @@ -3687,6 +3687,10 @@ function substitute!(@nospecialize(e), na::Int, argexprs::Vector{Any}, @nospecia
for argt
in e.args[3] ]
e.args[3] = svec(argtuple...)
elseif i == 4
@assert isa((e.args[4]::QuoteNode).value, Symbol)
elseif i == 5
@assert isa(e.args[5], Int)
else
e.args[i] = substitute!(e.args[i], na, argexprs, spsig, spvals, offset)
end
Expand Down Expand Up @@ -4766,16 +4770,16 @@ function inlining_pass(e::Expr, sv::InferenceState, stmts, ins)
# by the interpreter and inlining might put in something it can't handle,
# like another ccall (or try to move the variables out into the function)
if e.head === :foreigncall
# 3 is rewritten to 1 below to handle the callee.
i0 = 3
# 5 is rewritten to 1 below to handle the callee.
i0 = 5
isccall = true
elseif is_known_call(e, Core.Intrinsics.llvmcall, sv.src, sv.mod)
i0 = 5
end
has_stmts = false # needed to preserve order-of-execution
prev_stmts_length = length(stmts)
for _i = length(eargs):-1:i0
if isccall && _i == 3
if isccall && _i == 5
i = 1
isccallee = true
else
Expand Down Expand Up @@ -4830,10 +4834,21 @@ function inlining_pass(e::Expr, sv::InferenceState, stmts, ins)
end
if isccall
le = length(eargs)
for i = 4:2:(le - 1)
if eargs[i] === eargs[i + 1]
eargs[i + 1] = 0
nccallargs = eargs[5]::Int
ccallargs = ObjectIdDict()
for i in 6:(5 + nccallargs)
ccallargs[eargs[i]] = nothing
end
i = 6 + nccallargs
while i <= le
rootarg = eargs[i]
if haskey(ccallargs, rootarg)
deleteat!(eargs, i)
le -= 1
elseif i < le
ccallargs[rootarg] = nothing
end
i += 1
end
end
if e.head !== :call
Expand Down Expand Up @@ -5216,6 +5231,27 @@ function occurs_outside_getfield(@nospecialize(e), @nospecialize(sym),
if head === :(=)
return occurs_outside_getfield(e.args[2], sym, sv,
field_count, field_names)
elseif head === :foreigncall
args = e.args
nccallargs = args[5]::Int
# Only arguments escape the structure/layout of the object,
# GC root arguments do not.
# Also note that only being used in the root slot for this ccall itself
# does **not** mean that the object is not needed during the ccall.
# However, if its address is never taken
# and the object is never used in a way that escapes its layout, we can be sure
# that there's no way the user code can rely on the heap allocation of this object.
for i in 1:length(args)
a = args[i]
if i > 5 + nccallargs && symequal(a, sym)
# No need to verify indices, uninitialized members can be
# ignored in root slot.
continue
end
if occurs_outside_getfield(a, sym, sv, field_count, field_names)
return true
end
end
else
if (head === :block && isa(sym, Slot) &&
sv.src.slotflags[slot_id(sym)] & Slot_UsedUndef == 0)
Expand Down Expand Up @@ -5799,8 +5835,11 @@ end
function replace_getfield!(e::Expr, tupname, vals, field_names, sv::InferenceState)
for i = 1:length(e.args)
a = e.args[i]
if isa(a,Expr) && is_known_call(a, getfield, sv.src, sv.mod) &&
symequal(a.args[2],tupname)
if !isa(a, Expr)
continue
end
a = a::Expr
if is_known_call(a, getfield, sv.src, sv.mod) && symequal(a.args[2], tupname)
idx = if isa(a.args[3], Int)
a.args[3]
else
Expand Down Expand Up @@ -5829,8 +5868,23 @@ function replace_getfield!(e::Expr, tupname, vals, field_names, sv::InferenceSta
end
end
e.args[i] = val
elseif isa(a, Expr)
replace_getfield!(a::Expr, tupname, vals, field_names, sv)
else
if a.head === :foreigncall
args = a.args
nccallargs = args[5]::Int
le = length(args)
next_i = 6 + nccallargs
while next_i <= le
i = next_i
next_i += 1

symequal(args[i], tupname) || continue
# Replace the gc root argument with its fields
splice!(args, i, vals)
next_i += length(vals) - 1
end
end
replace_getfield!(a, tupname, vals, field_names, sv)
end
end
end
Expand Down
11 changes: 9 additions & 2 deletions base/refpointer.jl
Original file line number Diff line number Diff line change
Expand Up @@ -54,9 +54,16 @@ convert(::Type{Ref{T}}, x) where {T} = RefValue{T}(x)

function unsafe_convert(P::Type{Ptr{T}}, b::RefValue{T}) where T
if isbits(T)
return convert(P, data_pointer_from_objref(b))
return convert(P, pointer_from_objref(b))
elseif isleaftype(T)
return convert(P, pointer_from_objref(b.x))
else
return convert(P, data_pointer_from_objref(b.x))
# If the slot is not leaf type, it could be either isbits or not.
# If it is actually an isbits type and the type inference can infer that
# it can rebox the `b.x` if we simply call `pointer_from_objref(b.x)` on it.
# Instead, explicitly load the pointer from the `RefValue` so that the pointer
# is the same as the one rooted in the `RefValue` object.
return convert(P, pointerref(Ptr{Ptr{Void}}(pointer_from_objref(b)), 1, 0))
end
end
function unsafe_convert(P::Type{Ptr{Any}}, b::RefValue{Any})
Expand Down
2 changes: 1 addition & 1 deletion doc/src/devdocs/llvm.md
Original file line number Diff line number Diff line change
Expand Up @@ -260,7 +260,7 @@ pointer which drops the reference to the array value. However, we of course
need to make sure that the array does stay alive while we're doing the `ccall`.
To understand how this is done, first recall the lowering of the above code:
```julia
return $(Expr(:foreigncall, :(:foo), Void, svec(Ptr{Float64}), :($(Expr(:foreigncall, :(:jl_array_ptr), Ptr{Float64}, svec(Any), :(A), 0))), :(A)))
return $(Expr(:foreigncall, :(:foo), Void, svec(Ptr{Float64}), :(:ccall), 1, :($(Expr(:foreigncall, :(:jl_array_ptr), Ptr{Float64}, svec(Any), :(:ccall), 1, :(A)))), :(A)))
```
The last `:(A)`, is an extra argument list inserted during lowering that informs
the code generator which Julia level values need to be kept alive for the
Expand Down
5 changes: 3 additions & 2 deletions doc/src/manual/calling-c-and-fortran-code.md
Original file line number Diff line number Diff line change
Expand Up @@ -255,8 +255,9 @@ ccall((:foo, "libfoo"), Void, (Int32, Float64),
```

[`Base.cconvert()`](@ref) normally just calls [`convert()`](@ref), but can be defined to return an
arbitrary new object more appropriate for passing to C. For example, this is used to convert an
`Array` of objects (e.g. strings) to an array of pointers.
arbitrary new object more appropriate for passing to C.
This should be used to perform all allocations of memory that will be accessed by the C code.
For example, this is used to convert an `Array` of objects (e.g. strings) to an array of pointers.

[`Base.unsafe_convert()`](@ref) handles conversion to `Ptr` types. It is considered unsafe because
converting an object to a native pointer can hide the object from the garbage collector, causing
Expand Down
Loading

6 comments on commit 526b83e

@nanosoldier
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Executing the daily benchmark build, I will reply here when finished:

@nanosoldier runbenchmarks(ALL, isdaily = true)

@nanosoldier
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Something went wrong when running your job:

NanosoldierError: failed to run benchmarks against primary commit: failed process: Process(`sudo cset shield -e su nanosoldier -- -c ./benchscript.sh`, ProcessExited(1)) [1]

Logs and partial data can be found here
cc @ararslan

@vtjnash
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@nanosoldier runbenchmarks(ALL, isdaily = true)

@nanosoldier
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Something went wrong when running your job:

NanosoldierError: failed to run benchmarks against primary commit: failed process: Process(`sudo cset shield -e su nanosoldier -- -c ./benchscript.sh`, ProcessExited(1)) [1]

Logs and partial data can be found here
cc @ararslan

@jrevels
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If you follow Nanosoldier’s links to the uploaded logs, and you look at the primary benchmark error log, you’ll see that JLD/HDF5 is erroring out trying to deserialize the benchmark parameters before the benchmarks have even run.

I'm guessing HDF5/JLD is broken on Julia master.

@ararslan
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Perhaps now is the time to go to that shiny new JLD2.

Please sign in to comment.