Skip to content
This repository has been archived by the owner on May 27, 2021. It is now read-only.

Commit

Permalink
Revert the changes to CuDeviceArray.
Browse files Browse the repository at this point in the history
I'm not convinced by the design, this probably needs to be part of a
buffer hierarchy. But let's keep it as a part of the history,
for future reference.
  • Loading branch information
maleadt committed Mar 16, 2018
1 parent 706a2ae commit 63e3df3
Show file tree
Hide file tree
Showing 3 changed files with 31 additions and 44 deletions.
37 changes: 13 additions & 24 deletions src/device/array.jl
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# Contiguous on-device arrays

export
CuDeviceArray, CuDeviceVector, CuDeviceMatrix, CuBoundsError, Cached
CuDeviceArray, CuDeviceVector, CuDeviceMatrix, CuBoundsError


## construction
Expand All @@ -11,38 +11,33 @@ export
CuDeviceArray{T}(dims, ptr)
CuDeviceArray{T,A}(dims, ptr)
CuDeviceArray{T,A,N}(dims, ptr)
CuDeviceArray{T,A,N,C}(dims, ptr)
Construct an `N`-dimensional dense CUDA device array with element type `T` wrapping a
pointer, where `N` is determined from the length of `dims` and `T` is determined from the
type of `ptr`. `dims` may be a single scalar, or a tuple of integers corresponding to the
lengths in each dimension). If the rank `N` is supplied explicitly as in `Array{T,N}(dims)`,
then it must match the length of `dims`. The same applies to the element type `T`, which
should match the type of the pointer `ptr`. The `C` param indicates whether accesses should
be cached, defaulting to false.
should match the type of the pointer `ptr`.
"""
CuDeviceArray

# NOTE: we can't support the typical `tuple or series of integer` style construction,
# because we're currently requiring a trailing pointer argument.

struct CuDeviceArray{T,N,A,C} <: AbstractArray{T,N}
struct CuDeviceArray{T,N,A} <: AbstractArray{T,N}
shape::NTuple{N,Int}
ptr::DevicePtr{T,A}

# inner constructors, fully parameterized, exact types (ie. Int not <:Integer)
CuDeviceArray{T,N,A,C}(shape::NTuple{N,Int}, ptr::DevicePtr{T,A}) where {T,A,N,C} = new(shape,ptr)
CuDeviceArray{T,N,A}(shape::NTuple{N,Int}, ptr::DevicePtr{T,A}) where {T,A,N} = new(shape,ptr)
end

const CuDeviceVector = CuDeviceArray{T,1,A,C} where {T,A,C}
const CuDeviceMatrix = CuDeviceArray{T,2,A,C} where {T,A,C}

@inline Cached(::Type{CuDeviceArray{T,N,A,C}}) where {T,N,A,C} = CuDeviceArray{T,N,A,true}
@inline Cached(a::T) where {T<:CuDeviceArray} = Cached(T)(a.shape, a.ptr)
const CuDeviceVector = CuDeviceArray{T,1,A} where {T,A}
const CuDeviceMatrix = CuDeviceArray{T,2,A} where {T,A}

# outer constructors, non-parameterized
CuDeviceArray(dims::NTuple{N,<:Integer}, p::DevicePtr{T,A}) where {T,A,N} = CuDeviceArray{T,N,A}(dims, p)
CuDeviceArray(len::Integer, p::DevicePtr{T,A}) where {T,A} = CuDeviceVector{T,A}((len,), p)
CuDeviceArray(dims::NTuple{N,<:Integer}, p::DevicePtr{T,A}) where {T,A,N} = CuDeviceArray{T,N,A}(dims, p)
CuDeviceArray(len::Integer, p::DevicePtr{T,A}) where {T,A} = CuDeviceVector{T,A}((len,), p)

# outer constructors, partially parameterized
CuDeviceArray{T}(dims::NTuple{N,<:Integer}, p::DevicePtr{T,A}) where {T,A,N} = CuDeviceArray{T,N,A}(dims, p)
Expand All @@ -51,8 +46,8 @@ CuDeviceArray{T,N}(dims::NTuple{N,<:Integer}, p::DevicePtr{T,A}) where {T,A,N} =
CuDeviceVector{T}(len::Integer, p::DevicePtr{T,A}) where {T,A} = CuDeviceVector{T,A}((len,), p)

# outer constructors, fully parameterized
CuDeviceArray{T,N,A}(dims::NTuple{N,<:Integer}, p::DevicePtr{T,A}) where {T,A,N} = CuDeviceArray{T,N,A,false}(Int.(dims), p)
CuDeviceVector{T,A}(len::Integer, p::DevicePtr{T,A}) where {T,A} = CuDeviceVector{T,A,false}((Int(len),), p)
CuDeviceArray{T,N,A}(dims::NTuple{N,<:Integer}, p::DevicePtr{T,A}) where {T,A,N} = CuDeviceArray{T,N,A}(Int.(dims), p)
CuDeviceVector{T,A}(len::Integer, p::DevicePtr{T,A}) where {T,A} = CuDeviceVector{T,A}((Int(len),), p)


## getters
Expand All @@ -77,16 +72,10 @@ cudaconvert(a::CuArray{T,N}) where {T,N} = convert(CuDeviceArray{T,N,AS.Global},

## indexing

@inline function Base.getindex(a::CuDeviceArray{T}, index::Integer) where {T}
@boundscheck checkbounds(a, index)
align = datatype_align(T)
Base.unsafe_load(pointer(a), index, Val(align))::T
end

@inline function Base.getindex(a::CuDeviceArray{T,N,A,true}, index::Integer) where {T,N,A}
@boundscheck checkbounds(a, index)
@inline function Base.getindex(A::CuDeviceArray{T}, index::Integer) where {T}
@boundscheck checkbounds(A, index)
align = datatype_align(T)
unsafe_cached_load(pointer(a), index, Val(align))::T
Base.unsafe_load(pointer(A), index, Val(align))::T
end

@inline function Base.setindex!(A::CuDeviceArray{T}, x, index::Integer) where {T}
Expand Down
36 changes: 17 additions & 19 deletions test/array.jl
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
@testset "device arrays" begin

############################################################################################

@testset "constructors" begin
# inner constructors
let
Expand Down Expand Up @@ -47,6 +49,10 @@
end
end



############################################################################################

@testset "basics" begin # argument passing, get and setindex, length
dims = (16, 16)
len = prod(dims)
Expand Down Expand Up @@ -93,24 +99,30 @@ end
@test sum(input) output[1]
end

############################################################################################

@testset "bounds checking" begin
@eval function array_oob_1d(array)
return array[1]
end

# NOTE: these tests verify that bounds checking is _disabled_ (see #4)

ir = sprint(io->CUDAnative.code_llvm(io, array_oob_1d, (CuDeviceArray{Int,1,AS.Global,false},)))
ir = sprint(io->CUDAnative.code_llvm(io, array_oob_1d, (CuDeviceArray{Int,1,AS.Global},)))
@test !contains(ir, "trap")

@eval function array_oob_2d(array)
return array[1, 1]
end

ir = sprint(io->CUDAnative.code_llvm(io, array_oob_2d, (CuDeviceArray{Int,2,AS.Global,false},)))
ir = sprint(io->CUDAnative.code_llvm(io, array_oob_2d, (CuDeviceArray{Int,2,AS.Global},)))
@test !contains(ir, "trap")
end



############################################################################################

@testset "views" begin
@eval function array_view(array)
i = (blockIdx().x-1) * blockDim().x + threadIdx().x
Expand All @@ -135,6 +147,9 @@ end
@test array == Array(array_dev)
end

############################################################################################


@testset "bug: non-Int index to unsafe_load" begin
@eval function array_load_index(a)
return a[UInt64(1)]
Expand All @@ -147,21 +162,4 @@ end
array_load_index(da)
end

@testset "cached access" begin
@eval function array_cached_load(a, b, i)
b[i] = Cached(a)[i]
return nothing
end

buf = IOBuffer()

a = CuTestArray([0])
b = CuTestArray([0])
@device_code_ptx io=buf @cuda array_cached_load(a, b, 1)
@test Array(a) == Array(b)

asm = String(buf)
@test contains(asm, "ld.global.nc")
end

end
2 changes: 1 addition & 1 deletion test/codegen.jl
Original file line number Diff line number Diff line change
Expand Up @@ -127,7 +127,7 @@ end

@eval llvm_D32593(arr) = arr[1].foo

CUDAnative.code_llvm(DevNull, llvm_D32593, Tuple{CuDeviceVector{llvm_D32593_struct,AS.Global,false}})
CUDAnative.code_llvm(DevNull, llvm_D32593, Tuple{CuDeviceVector{llvm_D32593_struct,AS.Global}})
end

@testset "julia calling convention" begin
Expand Down

0 comments on commit 63e3df3

Please sign in to comment.