-
Notifications
You must be signed in to change notification settings - Fork 124
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[NDTensors] Add
AMDGPU.jl
(ROCm) based extension for NDTensors (#1325)
- Loading branch information
Showing
26 changed files
with
273 additions
and
25 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,11 @@ | ||
module NDTensorsAMDGPUExt | ||
|
||
include("copyto.jl") | ||
include("set_types.jl") | ||
include("adapt.jl") | ||
include("indexing.jl") | ||
include("linearalgebra.jl") | ||
include("mul.jl") | ||
include("permutedims.jl") | ||
|
||
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,31 @@ | ||
using NDTensors: NDTensors, EmptyStorage, adapt_storagetype, emptytype | ||
using NDTensors.AMDGPUExtensions: AMDGPUExtensions, ROCArrayAdaptor | ||
using NDTensors.GPUArraysCoreExtensions: storagemode | ||
using NDTensors.TypeParameterAccessors: | ||
default_type_parameter, | ||
set_type_parameter, | ||
set_type_parameters, | ||
type_parameter, | ||
type_parameters | ||
using Adapt: Adapt, adapt | ||
using AMDGPU: AMDGPU, ROCArray, ROCVector | ||
using Functors: fmap | ||
|
||
function AMDGPUExtensions.roc(xs; storagemode=default_type_parameter(ROCArray, storagemode)) | ||
return fmap(x -> adapt(ROCArrayAdaptor{storagemode}(), x), xs) | ||
end | ||
|
||
function Adapt.adapt_storage(adaptor::ROCArrayAdaptor, xs::AbstractArray) | ||
new_parameters = (type_parameters(xs, (eltype, ndims))..., storagemode(adaptor)) | ||
roctype = set_type_parameters(ROCArray, (eltype, ndims, storagemode), new_parameters) | ||
return isbits(xs) ? xs : adapt(roctype, xs) | ||
end | ||
|
||
function NDTensors.adapt_storagetype( | ||
adaptor::ROCArrayAdaptor, xs::Type{EmptyStorage{ElT,StoreT}} | ||
) where {ElT,StoreT} | ||
roctype = set_type_parameters( | ||
ROCVector, (eltype, storagemode), (ElT, storagemode(adaptor)) | ||
) | ||
return emptytype(adapt_storagetype(roctype, StoreT)) | ||
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,35 @@ | ||
using NDTensors.Expose: Exposed, expose, parent, unexpose | ||
using LinearAlgebra: LinearAlgebra, Adjoint | ||
using AMDGPU: ROCArray | ||
|
||
# Same definition as `MtlArray`. | ||
function Base.copy(src::Exposed{<:ROCArray,<:Base.ReshapedArray}) | ||
return reshape(copy(parent(src)), size(unexpose(src))) | ||
end | ||
|
||
function Base.copy( | ||
src::Exposed{ | ||
<:ROCArray,<:SubArray{<:Any,<:Any,<:Base.ReshapedArray{<:Any,<:Any,<:Adjoint}} | ||
}, | ||
) | ||
return copy(@view copy(expose(parent(src)))[parentindices(unexpose(src))...]) | ||
end | ||
|
||
function Base.copyto!(dest::Exposed{<:ROCArray}, src::Exposed{<:ROCArray,<:SubArray}) | ||
copyto!(dest, expose(copy(src))) | ||
return unexpose(dest) | ||
end | ||
|
||
function Base.copyto!( | ||
dest::Exposed{<:ROCArray}, src::Exposed{<:ROCArray,<:Base.ReshapedArray} | ||
) | ||
copyto!(dest, expose(parent(src))) | ||
return unexpose(dest) | ||
end | ||
|
||
function Base.copyto!( | ||
dest::Exposed{<:ROCArray}, src::Exposed{<:ROCArray,<:LinearAlgebra.Transpose} | ||
) | ||
copyto!(expose(transpose(dest)), expose(parent(src))) | ||
return unexpose(dest) | ||
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,23 @@ | ||
using NDTensors.Expose: Exposed, expose, parent, unexpose | ||
using NDTensors.GPUArraysCoreExtensions: cpu | ||
using AMDGPU: AMDGPU, ROCArray | ||
using GPUArraysCore: @allowscalar | ||
|
||
function Base.getindex(E::Exposed{<:ROCArray}) | ||
return @allowscalar unexpose(E)[] | ||
end | ||
|
||
function Base.setindex!(E::Exposed{<:ROCArray}, x::Number) | ||
@allowscalar unexpose(E)[] = x | ||
return unexpose(E) | ||
end | ||
|
||
function Base.getindex(E::Exposed{<:ROCArray,<:Adjoint}, i, j) | ||
return (expose(parent(E))[j, i])' | ||
end | ||
|
||
Base.any(f, E::Exposed{<:ROCArray,<:NDTensors.Tensor}) = any(f, data(unexpose(E))) | ||
|
||
function Base.print_array(io::IO, E::Exposed{<:ROCArray}) | ||
return Base.print_array(io, expose(cpu(E))) | ||
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,22 @@ | ||
using NDTensors.AMDGPUExtensions: roc | ||
using NDTensors.Expose: Expose, Exposed, expose, ql, ql_positive | ||
using NDTensors.GPUArraysCoreExtensions: cpu | ||
using NDTensors.TypeParameterAccessors: unwrap_array_type | ||
using LinearAlgebra: svd | ||
using Adapt: adapt | ||
using AMDGPU: ROCMatrix | ||
|
||
function LinearAlgebra.svd(A::Exposed{<:ROCMatrix}; kwargs...) | ||
U, S, V = svd(cpu(A)) | ||
return roc.((U, S, V)) | ||
end | ||
|
||
## TODO currently AMDGPU doesn't have ql so make a ql function | ||
function Expose.ql(A::Exposed{<:ROCMatrix}) | ||
Q, L = ql(expose(cpu(A))) | ||
return adapt(unwrap_array_type(A), Matrix(Q)), adapt(unwrap_array_type(A), L) | ||
end | ||
function Expose.ql_positive(A::Exposed{<:ROCMatrix}) | ||
Q, L = ql_positive(expose(cpu(A))) | ||
return adapt(unwrap_array_type(A), Matrix(Q)), adapt(unwrap_array_type(A), L) | ||
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,45 @@ | ||
using NDTensors.Expose: Exposed, expose, parent, unexpose | ||
using LinearAlgebra: LinearAlgebra, Adjoint, Transpose, mul! | ||
using AMDGPU: ROCArray | ||
|
||
# This was calling generic matrix multiplication. | ||
function LinearAlgebra.mul!( | ||
CM::Exposed{<:ROCArray,<:LinearAlgebra.Transpose}, | ||
AM::Exposed{<:ROCArray}, | ||
BM::Exposed{<:ROCArray}, | ||
α, | ||
β, | ||
) | ||
mul!(transpose(CM), transpose(BM), transpose(AM), α, β) | ||
return unexpose(CM) | ||
end | ||
|
||
# This was calling generic matrix multiplication. | ||
function LinearAlgebra.mul!( | ||
CM::Exposed{<:ROCArray,<:LinearAlgebra.Adjoint}, | ||
AM::Exposed{<:ROCArray}, | ||
BM::Exposed{<:ROCArray}, | ||
α, | ||
β, | ||
) | ||
mul!(CM', BM', AM', α, β) | ||
return unexpose(CM) | ||
end | ||
|
||
# Fix issue in AMDGPU.jl where it cannot distinguish | ||
# Transpose{Reshape{Adjoint{ROCArray}}} as a ROCArray and calls generic matmul | ||
function LinearAlgebra.mul!( | ||
CM::Exposed{<:ROCArray}, | ||
AM::Exposed{<:ROCArray}, | ||
BM::Exposed{ | ||
<:ROCArray, | ||
<:LinearAlgebra.Transpose{ | ||
<:Any,<:Base.ReshapedArray{<:Any,<:Any,<:LinearAlgebra.Adjoint} | ||
}, | ||
}, | ||
α, | ||
β, | ||
) | ||
mul!(CM, AM, expose(transpose(copy(expose(parent(BM))))), α, β) | ||
return unexpose(CM) | ||
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,23 @@ | ||
using NDTensors.Expose: Exposed, expose, parent, unexpose | ||
using AMDGPU: ROCArray | ||
|
||
function Base.permutedims!( | ||
Edest::Exposed{<:ROCArray,<:Base.ReshapedArray}, Esrc::Exposed{<:ROCArray}, perm | ||
) | ||
Aperm = permutedims(Esrc, perm) | ||
copyto!(expose(parent(Edest)), expose(Aperm)) | ||
return unexpose(Edest) | ||
end | ||
|
||
# There is an issue in AMDGPU where if Edest is a reshaped{<:Adjoint} | ||
# .= can fail. So instead force Esrc into the shape of parent(Edest) | ||
function Base.permutedims!( | ||
Edest::Exposed{<:ROCArray,<:Base.ReshapedArray{<:Any,<:Any,<:Adjoint}}, | ||
Esrc::Exposed{<:ROCArray}, | ||
perm, | ||
f, | ||
) | ||
Aperm = reshape(permutedims(Esrc, perm), size(parent(Edest))) | ||
parent(Edest) .= f.(parent(Edest), Aperm) | ||
return unexpose(Edest) | ||
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,11 @@ | ||
# TypeParameterAccessors definitions | ||
using NDTensors.TypeParameterAccessors: TypeParameterAccessors, Position | ||
using NDTensors.GPUArraysCoreExtensions: storagemode | ||
using AMDGPU: AMDGPU, ROCArray | ||
|
||
function TypeParameterAccessors.default_type_parameters(::Type{<:ROCArray}) | ||
return (Float64, 1, AMDGPU.Mem.HIPBuffer) | ||
end | ||
TypeParameterAccessors.position(::Type{<:ROCArray}, ::typeof(eltype)) = Position(1) | ||
TypeParameterAccessors.position(::Type{<:ROCArray}, ::typeof(ndims)) = Position(2) | ||
TypeParameterAccessors.position(::Type{<:ROCArray}, ::typeof(storagemode)) = Position(3) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
style = "blue" | ||
indent = 2 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,4 @@ | ||
module AMDGPUExtensions | ||
include("roc.jl") | ||
|
||
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,14 @@ | ||
using NDTensors.TypeParameterAccessors: TypeParameterAccessors, Position | ||
using NDTensors.GPUArraysCoreExtensions: storagemode | ||
# Implemented in NDTensorsAMDGPUExt | ||
function roc end | ||
|
||
## Here we need an ROCArrayAdaptor to prevent conversion of 64 bit numbers to 32 bit. | ||
## We cannot write `adapt(CuVector, x)` because this | ||
## will not allow us to properly utilize the buffer preference without changing the value of | ||
## default_buffertype. Also `adapt(CuVector{<:Any, <:Any, Buffertype})` fails to work properly | ||
struct ROCArrayAdaptor{B} end | ||
|
||
function TypeParameterAccessors.position(::Type{<:ROCArrayAdaptor}, ::typeof(storagemode)) | ||
return Position(1) | ||
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,9 @@ | ||
@eval module $(gensym()) | ||
using Test: @testset, @test | ||
using NDTensors.AMDGPUExtensions: roc, ROCArrayAdaptor | ||
using NDTensors.GPUArraysCoreExtensions: storagemode | ||
@testset "roc and ROCArrayAdaptor" begin | ||
@test roc isa Function | ||
@test storagemode(ROCArrayAdaptor{1}) == 1 | ||
end | ||
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.