From 63e3df3a49f946cfb20fa6cc0d699bce5df4fdb8 Mon Sep 17 00:00:00 2001 From: Tim Besard Date: Fri, 16 Mar 2018 11:29:08 +0100 Subject: [PATCH] Revert the changes to CuDeviceArray. I'm not convinced by the design, this probably needs to be part of a buffer hierarchy. But let's keep it as a part of the history, for future reference. --- src/device/array.jl | 37 +++++++++++++------------------------ test/array.jl | 36 +++++++++++++++++------------------- test/codegen.jl | 2 +- 3 files changed, 31 insertions(+), 44 deletions(-) diff --git a/src/device/array.jl b/src/device/array.jl index 7505722b..b9956ac0 100644 --- a/src/device/array.jl +++ b/src/device/array.jl @@ -1,7 +1,7 @@ # Contiguous on-device arrays export - CuDeviceArray, CuDeviceVector, CuDeviceMatrix, CuBoundsError, Cached + CuDeviceArray, CuDeviceVector, CuDeviceMatrix, CuBoundsError ## construction @@ -11,38 +11,33 @@ export CuDeviceArray{T}(dims, ptr) CuDeviceArray{T,A}(dims, ptr) CuDeviceArray{T,A,N}(dims, ptr) - CuDeviceArray{T,A,N,C}(dims, ptr) Construct an `N`-dimensional dense CUDA device array with element type `T` wrapping a pointer, where `N` is determined from the length of `dims` and `T` is determined from the type of `ptr`. `dims` may be a single scalar, or a tuple of integers corresponding to the lengths in each dimension). If the rank `N` is supplied explicitly as in `Array{T,N}(dims)`, then it must match the length of `dims`. The same applies to the element type `T`, which -should match the type of the pointer `ptr`. The `C` param indicates whether accesses should -be cached, defaulting to false. +should match the type of the pointer `ptr`. """ CuDeviceArray # NOTE: we can't support the typical `tuple or series of integer` style construction, # because we're currently requiring a trailing pointer argument. -struct CuDeviceArray{T,N,A,C} <: AbstractArray{T,N} +struct CuDeviceArray{T,N,A} <: AbstractArray{T,N} shape::NTuple{N,Int} ptr::DevicePtr{T,A} # inner constructors, fully parameterized, exact types (ie. Int not <:Integer) - CuDeviceArray{T,N,A,C}(shape::NTuple{N,Int}, ptr::DevicePtr{T,A}) where {T,A,N,C} = new(shape,ptr) + CuDeviceArray{T,N,A}(shape::NTuple{N,Int}, ptr::DevicePtr{T,A}) where {T,A,N} = new(shape,ptr) end -const CuDeviceVector = CuDeviceArray{T,1,A,C} where {T,A,C} -const CuDeviceMatrix = CuDeviceArray{T,2,A,C} where {T,A,C} - -@inline Cached(::Type{CuDeviceArray{T,N,A,C}}) where {T,N,A,C} = CuDeviceArray{T,N,A,true} -@inline Cached(a::T) where {T<:CuDeviceArray} = Cached(T)(a.shape, a.ptr) +const CuDeviceVector = CuDeviceArray{T,1,A} where {T,A} +const CuDeviceMatrix = CuDeviceArray{T,2,A} where {T,A} # outer constructors, non-parameterized -CuDeviceArray(dims::NTuple{N,<:Integer}, p::DevicePtr{T,A}) where {T,A,N} = CuDeviceArray{T,N,A}(dims, p) -CuDeviceArray(len::Integer, p::DevicePtr{T,A}) where {T,A} = CuDeviceVector{T,A}((len,), p) +CuDeviceArray(dims::NTuple{N,<:Integer}, p::DevicePtr{T,A}) where {T,A,N} = CuDeviceArray{T,N,A}(dims, p) +CuDeviceArray(len::Integer, p::DevicePtr{T,A}) where {T,A} = CuDeviceVector{T,A}((len,), p) # outer constructors, partially parameterized CuDeviceArray{T}(dims::NTuple{N,<:Integer}, p::DevicePtr{T,A}) where {T,A,N} = CuDeviceArray{T,N,A}(dims, p) @@ -51,8 +46,8 @@ CuDeviceArray{T,N}(dims::NTuple{N,<:Integer}, p::DevicePtr{T,A}) where {T,A,N} = CuDeviceVector{T}(len::Integer, p::DevicePtr{T,A}) where {T,A} = CuDeviceVector{T,A}((len,), p) # outer constructors, fully parameterized -CuDeviceArray{T,N,A}(dims::NTuple{N,<:Integer}, p::DevicePtr{T,A}) where {T,A,N} = CuDeviceArray{T,N,A,false}(Int.(dims), p) -CuDeviceVector{T,A}(len::Integer, p::DevicePtr{T,A}) where {T,A} = CuDeviceVector{T,A,false}((Int(len),), p) +CuDeviceArray{T,N,A}(dims::NTuple{N,<:Integer}, p::DevicePtr{T,A}) where {T,A,N} = CuDeviceArray{T,N,A}(Int.(dims), p) +CuDeviceVector{T,A}(len::Integer, p::DevicePtr{T,A}) where {T,A} = CuDeviceVector{T,A}((Int(len),), p) ## getters @@ -77,16 +72,10 @@ cudaconvert(a::CuArray{T,N}) where {T,N} = convert(CuDeviceArray{T,N,AS.Global}, ## indexing -@inline function Base.getindex(a::CuDeviceArray{T}, index::Integer) where {T} - @boundscheck checkbounds(a, index) - align = datatype_align(T) - Base.unsafe_load(pointer(a), index, Val(align))::T -end - -@inline function Base.getindex(a::CuDeviceArray{T,N,A,true}, index::Integer) where {T,N,A} - @boundscheck checkbounds(a, index) +@inline function Base.getindex(A::CuDeviceArray{T}, index::Integer) where {T} + @boundscheck checkbounds(A, index) align = datatype_align(T) - unsafe_cached_load(pointer(a), index, Val(align))::T + Base.unsafe_load(pointer(A), index, Val(align))::T end @inline function Base.setindex!(A::CuDeviceArray{T}, x, index::Integer) where {T} diff --git a/test/array.jl b/test/array.jl index dd162fa8..281e49ca 100644 --- a/test/array.jl +++ b/test/array.jl @@ -1,5 +1,7 @@ @testset "device arrays" begin +############################################################################################ + @testset "constructors" begin # inner constructors let @@ -47,6 +49,10 @@ end end + + +############################################################################################ + @testset "basics" begin # argument passing, get and setindex, length dims = (16, 16) len = prod(dims) @@ -93,6 +99,8 @@ end @test sum(input) ≈ output[1] end +############################################################################################ + @testset "bounds checking" begin @eval function array_oob_1d(array) return array[1] @@ -100,17 +108,21 @@ end # NOTE: these tests verify that bounds checking is _disabled_ (see #4) - ir = sprint(io->CUDAnative.code_llvm(io, array_oob_1d, (CuDeviceArray{Int,1,AS.Global,false},))) + ir = sprint(io->CUDAnative.code_llvm(io, array_oob_1d, (CuDeviceArray{Int,1,AS.Global},))) @test !contains(ir, "trap") @eval function array_oob_2d(array) return array[1, 1] end - ir = sprint(io->CUDAnative.code_llvm(io, array_oob_2d, (CuDeviceArray{Int,2,AS.Global,false},))) + ir = sprint(io->CUDAnative.code_llvm(io, array_oob_2d, (CuDeviceArray{Int,2,AS.Global},))) @test !contains(ir, "trap") end + + +############################################################################################ + @testset "views" begin @eval function array_view(array) i = (blockIdx().x-1) * blockDim().x + threadIdx().x @@ -135,6 +147,9 @@ end @test array == Array(array_dev) end +############################################################################################ + + @testset "bug: non-Int index to unsafe_load" begin @eval function array_load_index(a) return a[UInt64(1)] @@ -147,21 +162,4 @@ end array_load_index(da) end -@testset "cached access" begin - @eval function array_cached_load(a, b, i) - b[i] = Cached(a)[i] - return nothing - end - - buf = IOBuffer() - - a = CuTestArray([0]) - b = CuTestArray([0]) - @device_code_ptx io=buf @cuda array_cached_load(a, b, 1) - @test Array(a) == Array(b) - - asm = String(buf) - @test contains(asm, "ld.global.nc") -end - end diff --git a/test/codegen.jl b/test/codegen.jl index 36cab1e5..467f3895 100644 --- a/test/codegen.jl +++ b/test/codegen.jl @@ -127,7 +127,7 @@ end @eval llvm_D32593(arr) = arr[1].foo - CUDAnative.code_llvm(DevNull, llvm_D32593, Tuple{CuDeviceVector{llvm_D32593_struct,AS.Global,false}}) + CUDAnative.code_llvm(DevNull, llvm_D32593, Tuple{CuDeviceVector{llvm_D32593_struct,AS.Global}}) end @testset "julia calling convention" begin