Skip to content
This repository has been archived by the owner on May 27, 2021. It is now read-only.

Commit

Permalink
Add toggle for contextualization, and disable for tests relying on na…
Browse files Browse the repository at this point in the history
…mes.
  • Loading branch information
maleadt committed Jan 20, 2020
1 parent 39c4b7a commit ff3b34e
Show file tree
Hide file tree
Showing 5 changed files with 24 additions and 23 deletions.
2 changes: 2 additions & 0 deletions src/compiler/common.jl
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@ Base.@kwdef struct CompilerJob
cap::VersionNumber
kernel::Bool

contextualize::Bool = true

# optional properties
minthreads::Union{Nothing,CuDim} = nothing
maxthreads::Union{Nothing,CuDim} = nothing
Expand Down
2 changes: 1 addition & 1 deletion src/compiler/driver.jl
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ function codegen(target::Symbol, job::CompilerJob;
@timeit_debug to "validation" check_method(job)

@timeit_debug to "Julia front-end" begin
f = contextualize(job.f)
f = job.contextualize ? contextualize(job.f) : job.f

# get the method instance
world = typemax(UInt)
Expand Down
3 changes: 2 additions & 1 deletion src/execution.jl
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ export @cuda, cudaconvert, cufunction, dynamic_cufunction, nearest_warpsize
# the code it generates, or the execution
function split_kwargs(kwargs)
macro_kws = [:dynamic]
compiler_kws = [:minthreads, :maxthreads, :blocks_per_sm, :maxregs, :name]
compiler_kws = [:minthreads, :maxthreads, :blocks_per_sm, :maxregs, :name, :contextualize]
call_kws = [:cooperative, :blocks, :threads, :config, :shmem, :stream]
macro_kwargs = []
compiler_kwargs = []
Expand Down Expand Up @@ -351,6 +351,7 @@ The following keyword arguments are supported:
- `maxregs`: the maximum number of registers to be allocated to a single thread (only
supported on LLVM 4.0+)
- `name`: override the name that the kernel will have in the generated code
- `contextualize`: whether to contextualize functions using Cassette (default: true)
The output of this function is automatically cached, i.e. you can simply call `cufunction`
in a hot path without degrading performance. New code will be generated automatically, when
Expand Down
32 changes: 15 additions & 17 deletions test/codegen.jl
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,8 @@
valid_kernel() = return
invalid_kernel() = 1

ir = sprint(io->CUDAnative.code_llvm(io, valid_kernel, Tuple{}; optimize=false, dump_module=true))
ir = sprint(io->CUDAnative.code_llvm(io, valid_kernel, Tuple{}; dump_module=true,
contextualize=false, optimize=false))

# module should contain our function + a generic call wrapper
@test occursin("define void @julia_valid_kernel", ir)
Expand All @@ -21,11 +22,6 @@
@test_throws CUDAnative.KernelError CUDAnative.code_llvm(devnull, invalid_kernel, Tuple{}; kernel=true) == nothing
end

@testset "unbound typevars" begin
invalid_kernel() where {unbound} = return
@test_throws CUDAnative.KernelError CUDAnative.code_llvm(devnull, invalid_kernel, Tuple{})
end

@testset "exceptions" begin
foobar() = throw(DivideError())
ir = sprint(io->CUDAnative.code_llvm(io, foobar, Tuple{}))
Expand All @@ -52,7 +48,7 @@ end
@noinline child(i) = sink(i)
parent(i) = child(i)

ir = sprint(io->CUDAnative.code_llvm(io, parent, Tuple{Int}))
ir = sprint(io->CUDAnative.code_llvm(io, parent, Tuple{Int}; contextualize=false))
@test occursin(r"call .+ @julia_child_", ir)
end

Expand All @@ -76,10 +72,10 @@ end
x::Int
end

ir = sprint(io->CUDAnative.code_llvm(io, kernel, Tuple{Aggregate}))
ir = sprint(io->CUDAnative.code_llvm(io, kernel, Tuple{Aggregate}; contextualize=false))
@test occursin(r"@julia_kernel_\d+\(({ i64 }|\[1 x i64\]) addrspace\(\d+\)?\*", ir)

ir = sprint(io->CUDAnative.code_llvm(io, kernel, Tuple{Aggregate}; kernel=true))
ir = sprint(io->CUDAnative.code_llvm(io, kernel, Tuple{Aggregate}; contextualize=false, kernel=true))
@test occursin(r"@ptxcall_kernel_\d+\(({ i64 }|\[1 x i64\])\)", ir)
end

Expand Down Expand Up @@ -135,7 +131,7 @@ end
closure = ()->return

function test_name(f, name; kwargs...)
code = sprint(io->CUDAnative.code_llvm(io, f, Tuple{}; kwargs...))
code = sprint(io->CUDAnative.code_llvm(io, f, Tuple{}; contextualize=false, kwargs...))
@test occursin(name, code)
end

Expand Down Expand Up @@ -221,7 +217,7 @@ end
return
end

asm = sprint(io->CUDAnative.code_ptx(io, parent, Tuple{Int64}))
asm = sprint(io->CUDAnative.code_ptx(io, parent, Tuple{Int64}; contextualize=false))
@test occursin(r"call.uni\s+julia_child_"m, asm)
end

Expand All @@ -232,7 +228,7 @@ end
return
end

asm = sprint(io->CUDAnative.code_ptx(io, entry, Tuple{Int64}; kernel=true))
asm = sprint(io->CUDAnative.code_ptx(io, entry, Tuple{Int64}; contextualize=false, kernel=true))
@test occursin(r"\.visible \.entry ptxcall_entry_", asm)
@test !occursin(r"\.visible \.func julia_nonentry_", asm)
@test occursin(r"\.func julia_nonentry_", asm)
Expand Down Expand Up @@ -279,15 +275,15 @@ end
return
end

asm = sprint(io->CUDAnative.code_ptx(io, parent1, Tuple{Int}))
asm = sprint(io->CUDAnative.code_ptx(io, parent1, Tuple{Int}; contextualize=false))
@test occursin(r".func julia_child_", asm)

function parent2(i)
child(i+1)
return
end

asm = sprint(io->CUDAnative.code_ptx(io, parent2, Tuple{Int}))
asm = sprint(io->CUDAnative.code_ptx(io, parent2, Tuple{Int}; contextualize=false))
@test occursin(r".func julia_child_", asm)
end

Expand Down Expand Up @@ -357,7 +353,7 @@ end
closure = ()->nothing

function test_name(f, name; kwargs...)
code = sprint(io->CUDAnative.code_ptx(io, f, Tuple{}; kwargs...))
code = sprint(io->CUDAnative.code_ptx(io, f, Tuple{}; contextualize=false, kwargs...))
@test occursin(name, code)
end

Expand Down Expand Up @@ -429,7 +425,7 @@ end
return
end

ir = sprint(io->CUDAnative.code_llvm(io, kernel, Tuple{Float32,Ptr{Float32}}))
ir = sprint(io->CUDAnative.code_llvm(io, kernel, Tuple{Float32,Ptr{Float32}}; contextualize=false))
@test occursin("jl_box_float32", ir)
CUDAnative.code_ptx(devnull, kernel, Tuple{Float32,Ptr{Float32}})
end
Expand All @@ -444,18 +440,20 @@ end

# some validation happens in the emit_function hook, which is called by code_llvm

# NOTE: contextualization changes order of frames
@testset "recursion" begin
@eval recurse_outer(i) = i > 0 ? i : recurse_inner(i)
@eval @noinline recurse_inner(i) = i < 0 ? i : recurse_outer(i)

@test_throws_message(CUDAnative.KernelError, CUDAnative.code_llvm(devnull, recurse_outer, Tuple{Int})) do msg
@test_throws_message(CUDAnative.KernelError, CUDAnative.code_llvm(devnull, recurse_outer, Tuple{Int}; contextualize=false)) do msg
occursin("recursion is currently not supported", msg) &&
occursin("[1] recurse_outer", msg) &&
occursin("[2] recurse_inner", msg) &&
occursin("[3] recurse_outer", msg)
end
end

# FIXME: contextualization removes all frames here -- changed inlining behavior?
@testset "base intrinsics" begin
foobar(i) = sin(i)

Expand Down
8 changes: 4 additions & 4 deletions test/device/execution.jl
Original file line number Diff line number Diff line change
Expand Up @@ -70,9 +70,9 @@ end
@test_throws ErrorException @device_code_lowered nothing

# make sure kernel name aliases are preserved in the generated code
@test occursin("ptxcall_dummy", sprint(io->(@device_code_llvm io=io @cuda dummy())))
@test occursin("ptxcall_dummy", sprint(io->(@device_code_ptx io=io @cuda dummy())))
@test occursin("ptxcall_dummy", sprint(io->(@device_code_sass io=io @cuda dummy())))
@test occursin("ptxcall_dummy", sprint(io->(@device_code_llvm io=io @cuda contextualize=false dummy())))
@test occursin("ptxcall_dummy", sprint(io->(@device_code_ptx io=io @cuda contextualize=false dummy())))
@test occursin("ptxcall_dummy", sprint(io->(@device_code_sass io=io @cuda contextualize=false dummy())))

# make sure invalid kernels can be partially reflected upon
let
Expand All @@ -96,7 +96,7 @@ end

# set name of kernel
@test occursin("ptxcall_mykernel", sprint(io->(@device_code_llvm io=io begin
k = cufunction(dummy, name="mykernel")
k = cufunction(dummy; name="mykernel", contextualize=false)
k()
end)))
end
Expand Down

0 comments on commit ff3b34e

Please sign in to comment.