-
-
Notifications
You must be signed in to change notification settings - Fork 122
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
error for scatter with Metal arrays #534
Comments
This is likely because of atomic operations, which rely on Atomix: Line 112 in 83df642
IIUC, Metal does not yet support Atomix. |
That is correct; I think we have the necessary intrinsics, but nobody has implemented the Atomix.jl interface yet. The compiler shouldn't crash like that, though. Unless of course Atomix.jl is falling back to LLVM atomic, which aren't supported by the back-end. |
I tried this with Atomix 1.0 and on top of JuliaGPU/KernelAbstractions.jl#545 julia> using Metal, NNlib, Flux
julia> Metal.versioninfo()
macOS 15.0.1, Darwin 24.0.0
Toolchain:
- Julia: 1.11.1
- LLVM: 16.0.6
Julia packages:
- Metal.jl: 1.4.2
- GPUArrays: 10.3.1
- GPUCompiler: 0.27.8
- KernelAbstractions: 0.9.29
- ObjectiveC: 3.1.0
- LLVM: 9.1.3
- LLVMDowngrader_jll: 0.3.0+2
1 device:
- Apple M1 Pro (384.000 KiB allocated)
julia> device = Flux.get_device()
(::MetalDevice) (generic function with 4 methods)
julia> NNlib.gather([1 2 3; 4 5 6] |> device, [1,3,1,3,1] |> device)
2×5 MtlMatrix{Int64, Metal.PrivateStorage}:
1 3 1 3 1
4 6 4 6 4
julia> NNlib.scatter(+, [1 2 3 4; 5 6 7 8] |> device, [2,1,1,5] |> device)
ERROR: Compilation to native code failed; see below for details.
If you think this is a bug, please file an issue and attach /var/folders/z_/n_d2vxmx4jj95q7hzmwngnyc0000gn/T/jl_xsBWFuVaFy.metallib
Stacktrace:
[1] error(s::String)
@ Base ./error.jl:35
[2] macro expansion
@ ~/.julia/packages/Metal/JtmpJ/src/compiler/compilation.jl:195 [inlined]
[3] macro expansion
@ ~/.julia/packages/ObjectiveC/C7BVt/src/os.jl:264 [inlined]
[4] macro expansion
@ ~/.julia/packages/Metal/JtmpJ/src/compiler/compilation.jl:178 [inlined]
[5] (::Metal.var"#171#172"{Bool, GPUCompiler.CompilerJob{…}, @NamedTuple{…}})()
@ Metal ~/.julia/packages/ObjectiveC/C7BVt/src/foundation.jl:637
[6] macro expansion
@ ~/.julia/packages/ObjectiveC/C7BVt/src/foundation.jl:565 [inlined]
[7] macro expansion
@ ./lock.jl:273 [inlined]
[8] ObjectiveC.Foundation.NSAutoreleasePool(f::Metal.var"#171#172"{Bool, GPUCompiler.CompilerJob{…}, @NamedTuple{…}})
@ ObjectiveC.Foundation ~/.julia/packages/ObjectiveC/C7BVt/src/foundation.jl:557
[9] link(job::GPUCompiler.CompilerJob, compiled::@NamedTuple{image::Vector{UInt8}, entry::String}; return_function::Bool)
@ Metal ~/.julia/packages/ObjectiveC/C7BVt/src/foundation.jl:636
[10] actual_compilation(cache::Dict{…}, src::Core.MethodInstance, world::UInt64, cfg::GPUCompiler.CompilerConfig{…}, compiler::typeof(Metal.compile), linker::typeof(Metal.link))
@ GPUCompiler ~/.julia/packages/GPUCompiler/2CW9L/src/execution.jl:262
[11] cached_compilation(cache::Dict{…}, src::Core.MethodInstance, cfg::GPUCompiler.CompilerConfig{…}, compiler::Function, linker::Function)
@ GPUCompiler ~/.julia/packages/GPUCompiler/2CW9L/src/execution.jl:151
[12] macro expansion
@ ~/.julia/packages/Metal/JtmpJ/src/compiler/execution.jl:189 [inlined]
[13] macro expansion
@ ./lock.jl:273 [inlined]
[14] mtlfunction(f::typeof(NNlib.gpu__scatter!), tt::Type{Tuple{…}}; name::Nothing, kwargs::@Kwargs{})
@ Metal ~/.julia/packages/Metal/JtmpJ/src/compiler/execution.jl:184
[15] mtlfunction(f::typeof(NNlib.gpu__scatter!), tt::Type{Tuple{…}})
@ Metal ~/.julia/packages/Metal/JtmpJ/src/compiler/execution.jl:182
[16] macro expansion
@ ~/.julia/packages/Metal/JtmpJ/src/compiler/execution.jl:85 [inlined]
[17] (::KernelAbstractions.Kernel{…})(::Function, ::Vararg{…}; ndrange::Int64, workgroupsize::Nothing)
@ Metal.MetalKernels ~/.julia/packages/Metal/JtmpJ/src/MetalKernels.jl:110
[18] Kernel
@ ~/.julia/packages/Metal/JtmpJ/src/MetalKernels.jl:106 [inlined]
[19] scatter!
@ ~/.julia/dev/NNlib/src/scatter.jl:104 [inlined]
[20] scatter(op::typeof(+), src::MtlMatrix{…}, idx::MtlVector{…}; init::Nothing, dstsize::Nothing)
@ NNlib ~/.julia/dev/NNlib/src/scatter.jl:183
[21] scatter(op::typeof(+), src::MtlMatrix{Int64, Metal.PrivateStorage}, idx::MtlVector{Int64, Metal.PrivateStorage})
@ NNlib ~/.julia/dev/NNlib/src/scatter.jl:174
[22] top-level scope
@ REPL[22]:1
caused by: NSError: Failed to materializeAll. (AGXMetalG13X, code 3)
Stacktrace:
[1] Metal.MTL.MTLComputePipelineState(dev::Metal.MTL.MTLDeviceInstance, fun::Metal.MTL.MTLFunctionInstance)
@ Metal.MTL ~/.julia/packages/Metal/JtmpJ/lib/mtl/compute_pipeline.jl:60
[2] macro expansion
@ ~/.julia/packages/Metal/JtmpJ/src/compiler/compilation.jl:183 [inlined]
[3] macro expansion
@ ~/.julia/packages/ObjectiveC/C7BVt/src/os.jl:264 [inlined]
[4] macro expansion
@ ~/.julia/packages/Metal/JtmpJ/src/compiler/compilation.jl:178 [inlined]
[5] (::Metal.var"#171#172"{Bool, GPUCompiler.CompilerJob{…}, @NamedTuple{…}})()
@ Metal ~/.julia/packages/ObjectiveC/C7BVt/src/foundation.jl:637
[6] macro expansion
@ ~/.julia/packages/ObjectiveC/C7BVt/src/foundation.jl:565 [inlined]
[7] macro expansion
@ ./lock.jl:273 [inlined]
[8] ObjectiveC.Foundation.NSAutoreleasePool(f::Metal.var"#171#172"{Bool, GPUCompiler.CompilerJob{…}, @NamedTuple{…}})
@ ObjectiveC.Foundation ~/.julia/packages/ObjectiveC/C7BVt/src/foundation.jl:557
[9] link(job::GPUCompiler.CompilerJob, compiled::@NamedTuple{image::Vector{UInt8}, entry::String}; return_function::Bool)
@ Metal ~/.julia/packages/ObjectiveC/C7BVt/src/foundation.jl:636
[10] actual_compilation(cache::Dict{…}, src::Core.MethodInstance, world::UInt64, cfg::GPUCompiler.CompilerConfig{…}, compiler::typeof(Metal.compile), linker::typeof(Metal.link))
@ GPUCompiler ~/.julia/packages/GPUCompiler/2CW9L/src/execution.jl:262
[11] cached_compilation(cache::Dict{…}, src::Core.MethodInstance, cfg::GPUCompiler.CompilerConfig{…}, compiler::Function, linker::Function)
@ GPUCompiler ~/.julia/packages/GPUCompiler/2CW9L/src/execution.jl:151
[12] macro expansion
@ ~/.julia/packages/Metal/JtmpJ/src/compiler/execution.jl:189 [inlined]
[13] macro expansion
@ ./lock.jl:273 [inlined]
[14] mtlfunction(f::typeof(NNlib.gpu__scatter!), tt::Type{Tuple{…}}; name::Nothing, kwargs::@Kwargs{})
@ Metal ~/.julia/packages/Metal/JtmpJ/src/compiler/execution.jl:184
[15] mtlfunction(f::typeof(NNlib.gpu__scatter!), tt::Type{Tuple{…}})
@ Metal ~/.julia/packages/Metal/JtmpJ/src/compiler/execution.jl:182
[16] macro expansion
@ ~/.julia/packages/Metal/JtmpJ/src/compiler/execution.jl:85 [inlined]
[17] (::KernelAbstractions.Kernel{…})(::Function, ::Vararg{…}; ndrange::Int64, workgroupsize::Nothing)
@ Metal.MetalKernels ~/.julia/packages/Metal/JtmpJ/src/MetalKernels.jl:110
[18] Kernel
@ ~/.julia/packages/Metal/JtmpJ/src/MetalKernels.jl:106 [inlined]
[19] scatter!
@ ~/.julia/dev/NNlib/src/scatter.jl:104 [inlined]
[20] scatter(op::typeof(+), src::MtlMatrix{…}, idx::MtlVector{…}; init::Nothing, dstsize::Nothing)
@ NNlib ~/.julia/dev/NNlib/src/scatter.jl:183
[21] scatter(op::typeof(+), src::MtlMatrix{Int64, Metal.PrivateStorage}, idx::MtlVector{Int64, Metal.PrivateStorage})
@ NNlib ~/.julia/dev/NNlib/src/scatter.jl:174
[22] top-level scope
@ REPL[22]:1
Some type information was truncated. Use `show(err)` to see complete types. |
Can you verify in the LLVM IR that this is emitting AIR atomics and not native LLVM atomics? x-ref JuliaConcurrent/Atomix.jl#39 (comment) |
@CarloLucibello Does it work if your |
With julia> import Metal, NNlib, Flux
julia> dev = Flux.get_device();
julia> src, idx = Int32[1 2 3 4; 5 6 7 8], Int32[2,1,1,5];
julia> srcd, idxd = dev(x), dev(idx)
julia> y = NNlib.scatter(+, src, idx); # CPU
julia> yd = dev(zero(y));
julia> NNlib.scatter!(+, yd, srcd, idxd). # GPU
ERROR: InvalidIRError: compiling MethodInstance for NNlib.gpu__scatter!(::KernelAbstractions.CompilerMetadata{…}, ::typeof(+), ::MtlDeviceMatrix{…}, ::MtlDeviceMatrix{…}, ::MtlDeviceVector{…}, ::CartesianIndices{…}, ::Int64) resulted in invalid LLVM IR
Reason: unsupported call to an unknown function (call to gpu_malloc)
Stacktrace:
[1] malloc
@ ~/.julia/packages/GPUCompiler/2CW9L/src/runtime.jl:85
[2] gc_pool_alloc
@ ~/.julia/packages/GPUCompiler/2CW9L/src/runtime.jl:116
[3] modify!
@ ~/.julia/packages/Atomix/g4H61/ext/AtomixMetalExt.jl:38
[4] modify!
@ ~/.julia/packages/Atomix/g4H61/src/generic.jl:120
[5] macro expansion
@ ~/.julia/packages/NNlib/mRRJu/src/scatter.jl:123
[6] gpu__scatter!
@ ~/.julia/dev/KernelAbstractions/src/macros.jl:97
[7] gpu__scatter!
@ ./none:0
Reason: unsupported call to an unknown function (call to gpu_malloc)
Stacktrace:
[1] malloc
@ ~/.julia/packages/GPUCompiler/2CW9L/src/runtime.jl:85
[2] macro expansion
@ ~/.julia/packages/GPUCompiler/2CW9L/src/runtime.jl:180
[3] macro expansion
@ ./none:0
[4] box
@ ./none:0
[5] box_int64
@ ~/.julia/packages/GPUCompiler/2CW9L/src/runtime.jl:209
[6] modify!
@ ~/.julia/packages/Atomix/g4H61/ext/AtomixMetalExt.jl:38
[7] modify!
@ ~/.julia/packages/Atomix/g4H61/src/generic.jl:120
[8] macro expansion
@ ~/.julia/packages/NNlib/mRRJu/src/scatter.jl:123
[9] gpu__scatter!
@ ~/.julia/dev/KernelAbstractions/src/macros.jl:97
[10] gpu__scatter!
@ ./none:0
Reason: unsupported dynamic function invocation (call to atomic_fetch_add_explicit)
Stacktrace:
[1] modify!
@ ~/.julia/packages/Atomix/g4H61/ext/AtomixMetalExt.jl:38
[2] modify!
@ ~/.julia/packages/Atomix/g4H61/src/generic.jl:120
[3] macro expansion
@ ~/.julia/packages/NNlib/mRRJu/src/scatter.jl:123
[4] gpu__scatter!
@ ~/.julia/dev/KernelAbstractions/src/macros.jl:97
[5] gpu__scatter!
@ ./none:0
Hint: catch this exception as `err` and call `code_typed(err; interactive = true)` to introspect the erronous code with Cthulhu.jl
Stacktrace:
[1] check_ir(job::GPUCompiler.CompilerJob{GPUCompiler.MetalCompilerTarget, Metal.MetalCompilerParams}, args::LLVM.Module)
@ GPUCompiler ~/.julia/packages/GPUCompiler/2CW9L/src/validation.jl:147
[2] macro expansion
@ ~/.julia/packages/GPUCompiler/2CW9L/src/driver.jl:382 [inlined]
[3] macro expansion
@ ~/.julia/packages/TimerOutputs/NRdsv/src/TimerOutput.jl:253 [inlined]
[4] macro expansion
@ ~/.julia/packages/GPUCompiler/2CW9L/src/driver.jl:381 [inlined]
[5]
@ GPUCompiler ~/.julia/packages/GPUCompiler/2CW9L/src/utils.jl:108
[6]
@ GPUCompiler ~/.julia/packages/GPUCompiler/2CW9L/src/driver.jl:100
[7] codegen
@ ~/.julia/packages/GPUCompiler/2CW9L/src/driver.jl:82 [inlined]
[8] compile(target::Symbol, job::GPUCompiler.CompilerJob; kwargs::@Kwargs{})
@ GPUCompiler ~/.julia/packages/GPUCompiler/2CW9L/src/driver.jl:79
[9] compile
@ ~/.julia/packages/GPUCompiler/2CW9L/src/driver.jl:74 [inlined]
[10] (::Metal.var"#154#162"{GPUCompiler.CompilerJob{…}})(ctx::LLVM.Context)
@ Metal ~/.julia/packages/Metal/JtmpJ/src/compiler/compilation.jl:108
[11] JuliaContext(f::Metal.var"#154#162"{GPUCompiler.CompilerJob{…}}; kwargs::@Kwargs{})
@ GPUCompiler ~/.julia/packages/GPUCompiler/2CW9L/src/driver.jl:34
[12] JuliaContext(f::Function)
@ GPUCompiler ~/.julia/packages/GPUCompiler/2CW9L/src/driver.jl:25
[13] macro expansion
@ ~/.julia/packages/Metal/JtmpJ/src/compiler/compilation.jl:107 [inlined]
[14] macro expansion
@ ~/.julia/packages/ObjectiveC/C7BVt/src/os.jl:264 [inlined]
[15] compile(job::GPUCompiler.CompilerJob)
@ Metal ~/.julia/packages/Metal/JtmpJ/src/compiler/compilation.jl:105
[16] actual_compilation(cache::Dict{…}, src::Core.MethodInstance, world::UInt64, cfg::GPUCompiler.CompilerConfig{…}, compiler::typeof(Metal.compile), linker::typeof(Metal.link))
@ GPUCompiler ~/.julia/packages/GPUCompiler/2CW9L/src/execution.jl:237
[17] cached_compilation(cache::Dict{…}, src::Core.MethodInstance, cfg::GPUCompiler.CompilerConfig{…}, compiler::Function, linker::Function)
@ GPUCompiler ~/.julia/packages/GPUCompiler/2CW9L/src/execution.jl:151
[18] macro expansion
@ ~/.julia/packages/Metal/JtmpJ/src/compiler/execution.jl:189 [inlined]
[19] macro expansion
@ ./lock.jl:273 [inlined]
[20] mtlfunction(f::typeof(NNlib.gpu__scatter!), tt::Type{Tuple{…}}; name::Nothing, kwargs::@Kwargs{})
@ Metal ~/.julia/packages/Metal/JtmpJ/src/compiler/execution.jl:184
[21] mtlfunction(f::typeof(NNlib.gpu__scatter!), tt::Type{Tuple{…}})
@ Metal ~/.julia/packages/Metal/JtmpJ/src/compiler/execution.jl:182
[22] macro expansion
@ ~/.julia/packages/Metal/JtmpJ/src/compiler/execution.jl:85 [inlined]
[23] (::KernelAbstractions.Kernel{…})(::Function, ::Vararg{…}; ndrange::Int64, workgroupsize::Nothing)
@ Metal.MetalKernels ~/.julia/packages/Metal/JtmpJ/src/MetalKernels.jl:110
[24] Kernel
@ ~/.julia/packages/Metal/JtmpJ/src/MetalKernels.jl:106 [inlined]
[25] scatter!(op::typeof(+), dst::MtlMatrix{…}, src::MtlMatrix{…}, idx::MtlVector{…})
@ NNlib ~/.julia/packages/NNlib/mRRJu/src/scatter.jl:104
[26] top-level scope
@ REPL[47]:1
[27] top-level scope
@ ~/.julia/packages/Metal/JtmpJ/src/initialization.jl:72
Some type information was truncated. Use `show(err)` to see complete types. |
@maleadt here is the details``` julia> Metal.@device_code_llvm NNlib.scatter!(+, yd, srcd, idxd) ; GPUCompiler.CompilerJob{GPUCompiler.MetalCompilerTarget, Metal.MetalCompilerParams}(MethodInstance for NNlib.gpu__scatter!(::KernelAbstractions.CompilerMetadata{KernelAbstractions.NDIteration.DynamicSize, KernelAbstractions.NDIteration.DynamicCheck, Nothing, CartesianIndices{1, Tuple{Base.OneTo{Int64}}}, KernelAbstractions.NDIteration.NDRange{1, KernelAbstractions.NDIteration.DynamicSize, KernelAbstractions.NDIteration.DynamicSize, CartesianIndices{1, Tuple{Base.OneTo{Int64}}}, CartesianIndices{1, Tuple{Base.OneTo{Int64}}}}}, ::typeof(+), ::MtlDeviceMatrix{Int32, 1}, ::MtlDeviceMatrix{Float32, 1}, ::MtlDeviceVector{Int32, 1}, ::CartesianIndices{1, Tuple{Base.OneTo{Int64}}}, ::Int64), CompilerConfig for GPUCompiler.MetalCompilerTarget, 0x00000000000068bc) ; @ none within `gpu__scatter!` define void @_Z13gpu__scatter_16CompilerMetadataI11DynamicSize12DynamicCheckv16CartesianIndicesILl1E5TupleI5OneToI5Int64EEE7NDRangeILl1ES0_S0_S2_ILl1ES3_IS4_IS5_EEES2_ILl1ES3_IS4_IS5_EEEEE1_14MtlDeviceArrayI5Int32Ll2ELl1EES8_I7Float32Ll2ELl1EES8_IS9_Ll1ELl1EES2_ILl1ES3_IS4_IS5_EEES5_({ [1 x [1 x [1 x i64]]], [2 x [1 x [1 x [1 x i64]]]] } addrspace(1)* %0, { i8 addrspace(1)*, [2 x i64] } addrspace(1)* %1, { i8 addrspace(1)*, [2 x i64] } addrspace(1)* %2, { i8 addrspace(1)*, [1 x i64] } addrspace(1)* %3, [1 x [1 x [1 x i64]]] addrspace(1)* %4, i64 addrspace(1)* %5, i32 %thread_position_in_threadgroup, i32 %threadgroup_position_in_grid, i32 %thread_position_in_grid) local_unnamed_addr { conversion: %6 = getelementptr inbounds { [1 x [1 x [1 x i64]]], [2 x [1 x [1 x [1 x i64]]]] }, { [1 x [1 x [1 x i64]]], [2 x [1 x [1 x [1 x i64]]]] } addrspace(1)* %0, i64 0, i32 0, i64 0, i64 0, i64 0 %.unpack.unpack.unpack.unpack = load i64, i64 addrspace(1)* %6, align 8 %7 = getelementptr inbounds { [1 x [1 x [1 x i64]]], [2 x [1 x [1 x [1 x i64]]]] }, { [1 x [1 x [1 x i64]]], [2 x [1 x [1 x [1 x i64]]]] } addrspace(1)* %0, i64 0, i32 1, i64 1, i64 0, i64 0, i64 0 %.unpack18.unpack23.unpack.unpack.unpack = load i64, i64 addrspace(1)* %7, align 8 %.elt = getelementptr inbounds { i8 addrspace(1)*, [2 x i64] }, { i8 addrspace(1)*, [2 x i64] } addrspace(1)* %1, i64 0, i32 0 %.unpack = load i8 addrspace(1)*, i8 addrspace(1)* addrspace(1)* %.elt, align 8 %.unpack32.elt = getelementptr inbounds { i8 addrspace(1)*, [2 x i64] }, { i8 addrspace(1)*, [2 x i64] } addrspace(1)* %1, i64 0, i32 1, i64 0 %.unpack32.unpack = load i64, i64 addrspace(1)* %.unpack32.elt, align 8 %.unpack32.elt33 = getelementptr inbounds { i8 addrspace(1)*, [2 x i64] }, { i8 addrspace(1)*, [2 x i64] } addrspace(1)* %1, i64 0, i32 1, i64 1 %.unpack32.unpack34 = load i64, i64 addrspace(1)* %.unpack32.elt33, align 8 %8 = bitcast { i8 addrspace(1)*, [2 x i64] } addrspace(1)* %2 to float addrspace(1)* addrspace(1)* %.unpack3743 = load float addrspace(1)*, float addrspace(1)* addrspace(1)* %8, align 8 %9 = bitcast { i8 addrspace(1)*, [1 x i64] } addrspace(1)* %3 to i32 addrspace(1)* addrspace(1)* %.unpack4549 = load i32 addrspace(1)*, i32 addrspace(1)* addrspace(1)* %9, align 8 %10 = load i64, i64 addrspace(1)* %5, align 8 ; @ none within `gpu__scatter!` @ /Users/carlo/.julia/dev/KernelAbstractions/src/macros.jl:96 ; ┌ @ /Users/carlo/.julia/packages/Metal/JtmpJ/src/MetalKernels.jl:161 within `#__validindex` ; │┌ @ /Users/carlo/.julia/packages/Metal/JtmpJ/src/device/intrinsics/arguments.jl:49 within `#threadgroup_position_in_grid_1d` ; ││┌ @ int.jl:87 within `+` %11 = add i32 %threadgroup_position_in_grid, 1 ; │└└ ; │┌ @ /Users/carlo/.julia/packages/Metal/JtmpJ/src/device/intrinsics/arguments.jl:49 within `#thread_position_in_threadgroup_1d` ; ││┌ @ int.jl:87 within `+` %12 = add i32 %thread_position_in_threadgroup, 1 ; │└└ ; │┌ @ /Users/carlo/.julia/dev/KernelAbstractions/src/nditeration.jl:84 within `expand` ; ││┌ @ abstractarray.jl:1312 within `getindex` ; │││┌ @ indices.jl:365 within `to_indices` @ indices.jl:368 ; ││││┌ @ indices.jl:292 within `to_index` @ indices.jl:307 ; │││││┌ @ number.jl:7 within `convert` ; ││││││┌ @ boot.jl:892 within `Int64` ; │││││││┌ @ boot.jl:816 within `toInt64` %13 = zext i32 %11 to i64 %14 = zext i32 %12 to i64 ; ││└└└└└└ ; ││ @ /Users/carlo/.julia/dev/KernelAbstractions/src/nditeration.jl:84 within `expand` @ /Users/carlo/.julia/dev/KernelAbstractions/src/nditeration.jl:74 ; ││┌ @ ntuple.jl:48 within `ntuple` ; │││┌ @ /Users/carlo/.julia/dev/KernelAbstractions/src/nditeration.jl:78 within `#1` ; ││││┌ @ int.jl:86 within `-` %15 = add nsw i64 %13, -1 ; ││││└ ; ││││┌ @ int.jl:88 within `*` %16 = mul i64 %.unpack18.unpack23.unpack.unpack.unpack, %15 ; ││││└ ; ││││┌ @ int.jl:87 within `+` %17 = add i64 %16, %14 ; │└└└└ ; │ @ /Users/carlo/.julia/packages/Metal/JtmpJ/src/MetalKernels.jl:163 within `#__validindex` ; │┌ @ multidimensional.jl:477 within `in` ; ││┌ @ tuple.jl:382 within `map` ; │││┌ @ range.jl:1426 within `in` ; ││││┌ @ int.jl:514 within `<=` %18 = icmp slt i64 %17, 1 %19 = icmp sgt i64 %17, %.unpack.unpack.unpack.unpack ; └└└└└ %.not2 = or i1 %18, %19 br i1 %.not2, label %L288, label %L107L107: ; preds = %conversion L211: ; preds = %pass L217: ; preds = %L211 L219: ; preds = %pass, %L211 L247: ; preds = %L217 L250: ; preds = %L217 L288: ; preds = %L250, %conversion fail: ; preds = %L107 pass: ; preds = %L107
|
gather
works fine on Apple Silicon, but withscatter
I get an error.The scatter kernel works fine with cuda and amdgpu arrays.
cc @maleadt @pxl-th
The text was updated successfully, but these errors were encountered: