Skip to content
This repository has been archived by the owner on May 17, 2020. It is now read-only.

Commit

Permalink
add prefetch
Browse files Browse the repository at this point in the history
  • Loading branch information
vchuravy committed Feb 1, 2019
1 parent ca04699 commit 01683e7
Show file tree
Hide file tree
Showing 2 changed files with 56 additions and 0 deletions.
2 changes: 2 additions & 0 deletions src/GPUifyLoops.jl
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ using Requires

export @setup, @loop, @synchronize
export @scratch, @shmem
export @prefetch, prefetch

@init @require CUDAnative="be33ccc6-a3ff-5ff2-a52e-74243cff1e17" begin
using .CUDAnative
Expand Down Expand Up @@ -68,6 +69,7 @@ end

include("scratch.jl")
include("shmem.jl")
include("prefetch.jl")

end

54 changes: 54 additions & 0 deletions src/prefetch.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
"""
@prefetch A[1, 2]
Prefetch the memory location accessed by A at location A[1, 1]
TODO:
- What to do about StructArray
"""
macro prefetch(expr)
@assert expr.head == :ref
A = expr.args[1]
I = expr.args[2:end]
esc(quote
$prefetch($A, $(I...))
end)
end

Base.@propagate_inbounds function prefetch(A, I...)
lindex = LinearIndices(A)[I...]
ptr = pointer(A, lindex)
__prefetch(ptr, Val(:read), Val(3), Val(:data))
end

@generated function __prefetch(ptr::T, ::Val{RW}, ::Val{Locality}, ::Val{Cache}) where {T, RW, Locality, Cache}
decls = """
declare void @llvm.prefetch(i8*, i32, i32, i32)
"""

if RW == :read
f_rw = 0
elseif RW == :write
f_rw = 1
end

f_locality = Locality

if Cache == :data
f_cache = 1
elseif Cache == :instruction
f_cache = 0
end

ir = """
%ptr = inttoptr i64 %0 to i8*
call void @llvm.prefetch(i8* %ptr, i32 $f_rw, i32 $f_locality, i32 $f_cache)
ret void
"""

quote
Base.@_inline_meta
Base.llvmcall(($decls, $ir), Nothing, Tuple{T}, ptr)
end
end

0 comments on commit 01683e7

Please sign in to comment.