Skip to content

Commit

Permalink
Merge pull request #361 from JuliaGPU/tb/opaque_pointers
Browse files Browse the repository at this point in the history
Add support for opaque pointers
  • Loading branch information
maleadt authored Jun 8, 2024
2 parents 23919a2 + 1f0bccc commit 2057418
Show file tree
Hide file tree
Showing 4 changed files with 70 additions and 18 deletions.
38 changes: 26 additions & 12 deletions .buildkite/pipeline.yml
Original file line number Diff line number Diff line change
Expand Up @@ -34,11 +34,11 @@ steps:
julia: "nightly"
soft_fail: true

# Test Storage modes Shared and Managed
- group: ":floppy_disk: Storage mode"
key: "storage"
# special tests
- group: ":eyes: Special"
depends_on: "julia"
steps:
- label: "MtlArray with {{matrix.storage}} storage"
- label: "{{matrix.storage}} array storage"
plugins:
- JuliaCI/julia#v1:
version: "1.10"
Expand All @@ -48,7 +48,7 @@ steps:
queue: "juliaecosystem"
os: "macos"
arch: "aarch64"
if: build.message !~ /\[skip tests\]/ && build.message !~ /\[skip storage\]/ && !build.pull_request.draft
if: build.message !~ /\[skip tests\]/ && !build.pull_request.draft
timeout_in_minutes: 60
matrix:
setup:
Expand All @@ -57,13 +57,7 @@ steps:
- "Managed"
commands: |
echo -e "[Metal]\ndefault_storage = \"{{matrix.storage}}\"" >LocalPreferences.toml
# special tests
- group: ":eyes: Special"
depends_on: "julia"
steps:
- label: "Metal API Validation"
- label: "API validation"
plugins:
- JuliaCI/julia#v1:
version: 1.8
Expand All @@ -85,3 +79,23 @@ steps:
arch: "aarch64"
if: build.message !~ /\[skip tests\]/ && !build.pull_request.draft
timeout_in_minutes: 60
- label: "Opaque pointers"
plugins:
- JuliaCI/julia#v1:
version: "1.10"
- JuliaCI/julia-test#v1:
test_args: "--quickfail"
- JuliaCI/julia-coverage#v1:
codecov: true
dirs:
- src
- lib
- examples
env:
JULIA_LLVM_ARGS: '--opaque-pointers'
agents:
queue: "juliaecosystem"
os: "macos"
arch: "aarch64"
if: build.message !~ /\[skip tests\]/ && !build.pull_request.draft
timeout_in_minutes: 60
4 changes: 2 additions & 2 deletions Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -37,8 +37,8 @@ ExprTools = "0.1"
GPUArrays = "10.1"
GPUCompiler = "0.26"
KernelAbstractions = "0.9.1"
LLVM = "6, 7"
LLVMDowngrader_jll = "0.2"
LLVM = "7.2"
LLVMDowngrader_jll = "0.3"
ObjectFile = "0.4"
ObjectiveC = "2.1, 3"
Preferences = "1"
Expand Down
44 changes: 41 additions & 3 deletions src/compiler/compilation.jl
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,47 @@ GPUCompiler.runtime_module(::MetalCompilerJob) = Metal
GPUCompiler.method_table(::MetalCompilerJob) = method_table


function GPUCompiler.finish_ir!(@nospecialize(job::MetalCompilerJob),
mod::LLVM.Module, entry::LLVM.Function)
entry = invoke(GPUCompiler.finish_ir!,
Tuple{CompilerJob{MetalCompilerTarget}, LLVM.Module, LLVM.Function},
job, mod, entry)

# pointer type information for typed intrinsics
# (this is consumed by the LLVM IR downgrader)
for (jltyp, llvmtyp) in (Int32 => :i32, Int64 => :i64,
Float16 => :f16, Float32 => :f32),
(as, asname) in (AS.Device => "global", AS.ThreadGroup => "local")

# map of intrinsics to pointer operand indices and eltypes
intrinsics = Dict()
## simd
intrinsics["simdgroup_matrix_8x8_load.v64$llvmtyp.p$as$llvmtyp"] = (1 => jltyp,)
intrinsics["simdgroup_matrix_8x8_store.v64$llvmtyp.p$as$llvmtyp"] = (2 => jltyp,)
## atomics
for op in [:store, :load, :xchg, :add, :sub, :min, :max, :and, :or, :xor]
intrinsics["atomic.$asname.$op.$llvmtyp"] = (1 => jltyp,)
end
intrinsics["atomic.$asname.cmpxchg.weak.$llvmtyp"] = (1 => jltyp, 2 => jltyp)

# apply metadata to the function declarations
for (intr, args) in intrinsics
fn = "air.$intr"
haskey(functions(mod), fn) || continue
f = functions(mod)[fn]
mds = []
for (idx, typ) in args
push!(mds, ConstantInt(Int32(idx-1)))
push!(mds, null(convert(LLVMType, typ)))
end
metadata(f)["arg_eltypes"] = MDNode(mds)
end
end

return entry
end


## compiler implementation (cache, configure, compile, and link)

# cache of compilation caches, per device
Expand Down Expand Up @@ -70,9 +111,6 @@ function compile(@nospecialize(job::CompilerJob))
log = Pipe()

cmd = `$(LLVMDowngrader_jll.llvm_as()) --bitcode-version=5.0 -o -`
if LLVM.version() >= v"16"
cmd = `$cmd --opaque-pointers=0`
end
proc = run(pipeline(cmd, stdout=output, stderr=log, stdin=input); wait=false)
close(output.in)
close(log.in)
Expand Down
2 changes: 1 addition & 1 deletion src/device/intrinsics/simd.jl
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ function convert_origin(origin::NTuple{2, Int64})
return (VecElement{Int64}(origin[1]-1), VecElement{Int64}(origin[2]-1))
end

for (jltype, suffix) in ((Float16, "f16"), (Float32, "f32"))
for (jltype, suffix) in ((:Float16, "f16"), (:Float32, "f32"))
for as in (AS.Device, AS.ThreadGroup)
@eval begin
@device_function simdgroup_load(
Expand Down

0 comments on commit 2057418

Please sign in to comment.