Skip to content

Commit

Permalink
update Yao
Browse files Browse the repository at this point in the history
  • Loading branch information
GiggleLiu committed Feb 16, 2022
1 parent 78f7954 commit e112d96
Show file tree
Hide file tree
Showing 6 changed files with 71 additions and 62 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ docs/src/tutorial/QCBM.md
_*.dat
*.swp
__pycache__/
.vscode/

Manifest.toml

Expand Down
6 changes: 3 additions & 3 deletions Project.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
name = "CuYao"
uuid = "b48ca7a8-dd42-11e8-2b8e-1b7706800275"
version = "0.3.1"
version = "0.3.2"

[deps]
BitBasis = "50ba71b6-fa0f-514d-ae9a-0916efc90dcf"
Expand All @@ -16,13 +16,13 @@ Yao = "5872b779-8223-5990-8dd0-5abbb0748c8c"

[compat]
BitBasis = "0.7"
CUDA = "3.6"
CUDA = "3.8"
LuxurySparse = "0.6"
Reexport = "0.2, 1"
StaticArrays = "0.12, 1"
StatsBase = "0.33"
TupleTools = "1"
Yao = "0.6"
Yao = "0.7"
julia = "1"

[extras]
Expand Down
63 changes: 35 additions & 28 deletions src/GPUReg.jl
Original file line number Diff line number Diff line change
Expand Up @@ -6,37 +6,40 @@ import Yao: expect

export cpu, cu, GPUReg

cu(reg::ArrayReg{B}) where B = ArrayReg{B}(CuArray(reg.state))
cpu(reg::ArrayReg{B}) where B = ArrayReg{B}(collect(reg.state))
const GPUReg{B, T, MT} = ArrayReg{B, T, MT} where MT<:CuArray
cu(reg::ArrayReg{D}) where D = ArrayReg{D}(CuArray(reg.state))
cpu(reg::ArrayReg{D}) where D = ArrayReg{D}(Array(reg.state))
cu(reg::BatchedArrayReg{D}) where D = BatchedArrayReg{D}(CuArray(reg.state), reg.nbatch)
cpu(reg::BatchedArrayReg{D}) where D = BatchedArrayReg{D}(Array(reg.state), reg.nbatch)
const GPUReg{D, T, MT} = AbstractArrayReg{D, T, MT} where MT<:DenseCuArray

function batch_normalize!(s::DenseCuArray, p::Real=2)
p!=2 && throw(ArgumentError("p must be 2!"))
s./=norm2(s, dims=1)
s
return s
end

@inline function tri2ij(l::Int)
i = ceil(Int, sqrt(2*l+0.25)-0.5)
j = l-i*(i-1)÷2
i+1,j
return i+1,j
end

############### MEASURE ##################
function measure(::ComputationalBasis, reg::GPUReg{1}, ::AllLocs; rng::AbstractRNG=Random.GLOBAL_RNG, nshots::Int=1)
function measure(::ComputationalBasis, reg::ArrayReg{D, T, MT} where MT<:DenseCuArray, ::AllLocs; rng::AbstractRNG=Random.GLOBAL_RNG, nshots::Int=1) where {D,T}
_measure(rng, reg |> probs |> Vector, nshots)
end

# TODO: optimize the batch dimension using parallel sampling
function measure(::ComputationalBasis, reg::GPUReg{B}, ::AllLocs; rng::AbstractRNG=Random.GLOBAL_RNG, nshots::Int=1) where B
function measure(::ComputationalBasis, reg::BatchedArrayReg{D, T, MT} where MT<:DenseCuArray, ::AllLocs; rng::AbstractRNG=Random.GLOBAL_RNG, nshots::Int=1) where {D,T}
regm = reg |> rank3
pl = dropdims(mapreduce(abs2, +, regm, dims=2), dims=2)
_measure(rng, pl |> Matrix, nshots)
return _measure(rng, pl |> Matrix, nshots)
end

function measure!(::RemoveMeasured, ::ComputationalBasis, reg::GPUReg{B}, ::AllLocs; rng::AbstractRNG=Random.GLOBAL_RNG) where B
function measure!(::RemoveMeasured, ::ComputationalBasis, reg::GPUReg{D}, ::AllLocs; rng::AbstractRNG=Random.GLOBAL_RNG) where D
regm = reg |> rank3
nregm = similar(regm, 1<<nremain(reg), B)
B = size(regm, 3)
nregm = similar(regm, D ^ nremain(reg), B)
pl = dropdims(mapreduce(abs2, +, regm, dims=2), dims=2)
pl_cpu = pl |> Matrix
res_cpu = map(ib->_measure(rng, view(pl_cpu, :, ib), 1)[], 1:B)
Expand All @@ -51,11 +54,12 @@ function measure!(::RemoveMeasured, ::ComputationalBasis, reg::GPUReg{B}, ::AllL
end
gpu_call(kernel, nregm, regm, res, pl)
reg.state = reshape(nregm,1,:)
B == 1 ? Array(res)[] : res
return reg isa ArrayReg ? Array(res)[] : res
end

function measure!(::NoPostProcess, ::ComputationalBasis, reg::GPUReg{B, T}, ::AllLocs; rng::AbstractRNG=Random.GLOBAL_RNG) where {B, T}
function measure!(::NoPostProcess, ::ComputationalBasis, reg::GPUReg{D, T}, ::AllLocs; rng::AbstractRNG=Random.GLOBAL_RNG) where {D, T}
regm = reg |> rank3
B = size(regm, 3)
pl = dropdims(mapreduce(abs2, +, regm, dims=2), dims=2)
pl_cpu = pl |> Matrix
res_cpu = map(ib->_measure(rng, view(pl_cpu, :, ib), 1)[], 1:B)
Expand All @@ -70,17 +74,18 @@ function measure!(::NoPostProcess, ::ComputationalBasis, reg::GPUReg{B, T}, ::Al
return
end
gpu_call(kernel, regm, res, pl)
B == 1 ? Array(res)[] : res
return reg isa ArrayReg ? Array(res)[] : res
end

function YaoBase.measure!(
::NoPostProcess,
bb::BlockedBasis,
reg::GPUReg{B,T},
reg::GPUReg{D,T},
::AllLocs;
rng::AbstractRNG = Random.GLOBAL_RNG,
) where {B,T}
) where {D,T}
state = @inbounds (reg|>rank3)[bb.perm, :, :] # permute to make eigen values sorted
B = size(state, 3)
pl = dropdims(mapreduce(abs2, +, state, dims=2), dims=2)
pl_cpu = pl |> Matrix
pl_block = zeros(eltype(pl), nblocks(bb), B)
Expand All @@ -106,11 +111,12 @@ function YaoBase.measure!(
_state = reshape(state, 1 << nactive(reg), :)
rstate = reshape(reg.state, 1 << nactive(reg), :)
@inbounds rstate[bb.perm, :] .= _state
return B == 1 ? bb.values[res_cpu[]] : CuArray(bb.values[res_cpu])
return reg isa ArrayReg ? bb.values[res_cpu[]] : CuArray(bb.values[res_cpu])
end

function measure!(rst::ResetTo, ::ComputationalBasis, reg::GPUReg{B, T}, ::AllLocs; rng::AbstractRNG=Random.GLOBAL_RNG) where {B, T}
function measure!(rst::ResetTo, ::ComputationalBasis, reg::GPUReg{D, T}, ::AllLocs; rng::AbstractRNG=Random.GLOBAL_RNG) where {D, T}
regm = reg |> rank3
B = size(regm, 3)
pl = dropdims(mapreduce(abs2, +, regm, dims=2), dims=2)
pl_cpu = pl |> Matrix
res_cpu = map(ib->_measure(rng, view(pl_cpu, :, ib), 1)[], 1:B)
Expand All @@ -128,10 +134,10 @@ function measure!(rst::ResetTo, ::ComputationalBasis, reg::GPUReg{B, T}, ::AllLo
end

gpu_call(kernel, regm, res, pl, rst.x)
B == 1 ? Array(res)[] : res
return reg isa ArrayReg ? Array(res)[] : res
end

import Yao.YaoArrayRegister: insert_qubits!, join
import Yao.YaoArrayRegister: insert_qudits!, join
function YaoBase.batched_kron(A::DenseCuArray{T1}, B::DenseCuArray{T2}) where {T1 ,T2}
res = CUDA.zeros(promote_type(T1,T2), size(A,1)*size(B, 1), size(A,2)*size(B,2), size(A, 3))
CI = Base.CartesianIndices(res)
Expand All @@ -145,7 +151,7 @@ function YaoBase.batched_kron(A::DenseCuArray{T1}, B::DenseCuArray{T2}) where {T
end

gpu_call(kernel, res, A, B)
res
return res
end

"""
Expand All @@ -154,7 +160,7 @@ end
Performs batched Kronecker products in-place on the GPU.
The results are stored in 'C', overwriting the existing values of 'C'.
"""
function YaoBase.batched_kron!(C::CuArray{T3, 3}, A::DenseCuArray, B::DenseCuArray) where {T1 ,T2, T3}
function YaoBase.batched_kron!(C::CuArray{T3, 3}, A::DenseCuArray, B::DenseCuArray) where {T3}
@boundscheck (size(C) == (size(A,1)*size(B,1), size(A,2)*size(B,2), size(A,3))) || throw(DimensionMismatch())
@boundscheck (size(A,3) == size(B,3) == size(C,3)) || throw(DimensionMismatch())
CI = Base.CartesianIndices(C)
Expand All @@ -168,23 +174,24 @@ function YaoBase.batched_kron!(C::CuArray{T3, 3}, A::DenseCuArray, B::DenseCuArr
end

gpu_call(kernel, C, A, B)
C
return C
end

function join(reg1::GPUReg{B}, reg2::GPUReg{B}) where {B}
function join(reg1::GPUReg{D}, reg2::GPUReg{D}) where {D}
@assert nbatch(reg1) == nbatch(reg2)
s1 = reg1 |> rank3
s2 = reg2 |> rank3
state = YaoBase.batched_kron(s1, s2)
ArrayReg{B}(copy(reshape(state, size(state, 1), :)))
return arrayreg(copy(reshape(state, size(state, 1), :)); nlevel=D, nbatch=nbatch(reg1))
end

export insert_qubits!
function insert_qubits!(reg::GPUReg{B}, loc::Int; nqubits::Int=1) where B
export insert_qudits!
function insert_qudits!(reg::GPUReg{D}, loc::Int; nqudits::Int=1) where D
na = nactive(reg)
focus!(reg, 1:loc-1)
reg2 = join(zero_state(nqubits; nbatch=B) |> cu, reg) |> relax! |> focus!((1:na+nqubits)...)
reg2 = join(zero_state(nqudits; nbatch=nbatch(reg)) |> cu, reg) |> relax! |> focus!((1:na+nqudits)...)
reg.state = reg2.state
reg
return reg
end

#=
Expand Down
23 changes: 12 additions & 11 deletions src/gpuapplys.jl
Original file line number Diff line number Diff line change
Expand Up @@ -7,26 +7,26 @@ gpu_compatible(A::AbstractVecOrMat) = A |> staticize
gpu_compatible(A::StaticArray) = A

###################### unapply! ############################
function instruct!(state::DenseCuVecOrMat, U0::AbstractMatrix, locs::NTuple{M, Int}, clocs::NTuple{C, Int}, cvals::NTuple{C, Int}) where {C, M}
function instruct!(::Val{2}, state::DenseCuVecOrMat, U0::AbstractMatrix, locs::NTuple{M, Int}, clocs::NTuple{C, Int}, cvals::NTuple{C, Int}) where {C, M}
U0 = gpu_compatible(U0)
# reorder a unirary matrix.
D, kf = un_kernel(log2dim1(state), clocs, cvals, U0, locs)

gpu_call(kf, state; elements=D*size(state,2))
state
end
instruct!(state::DenseCuVecOrMat, U0::IMatrix, locs::NTuple{M, Int}, clocs::NTuple{C, Int}, cvals::NTuple{C, Int}) where {C, M} = state
instruct!(state::DenseCuVecOrMat, U0::SDSparseMatrixCSC, locs::NTuple{M, Int}, clocs::NTuple{C, Int}, cvals::NTuple{C, Int}) where {C, M} = instruct!(state, U0 |> Matrix, locs, clocs, cvals)
instruct!(::Val{2}, state::DenseCuVecOrMat, U0::IMatrix, locs::NTuple{M, Int}, clocs::NTuple{C, Int}, cvals::NTuple{C, Int}) where {C, M} = state
instruct!(::Val{2}, state::DenseCuVecOrMat, U0::SDSparseMatrixCSC, locs::NTuple{M, Int}, clocs::NTuple{C, Int}, cvals::NTuple{C, Int}) where {C, M} = instruct!(Val(2), state, U0 |> Matrix, locs, clocs, cvals)

################## General U1 apply! ###################
for MT in [:SDDiagonal, :SDPermMatrix, :AbstractMatrix, :SDSparseMatrixCSC]
@eval function instruct!(state::DenseCuVecOrMat, U1::$MT, ibit::Tuple{Int})
@eval function instruct!(::Val{2}, state::DenseCuVecOrMat, U1::$MT, ibit::Tuple{Int})
D,kf = u1_kernel(log2dim1(state), U1, ibit...)
gpu_call(kf, state; elements=D*size(state,2))
state
end
end
instruct!(state::DenseCuVecOrMat, U::IMatrix, locs::Tuple{Int}) = state
instruct!(::Val{2}, state::DenseCuVecOrMat, U::IMatrix, locs::Tuple{Int}) = state

################## XYZ #############
using Yao.ConstGate: S, T, Sdag, Tdag
Expand All @@ -50,26 +50,26 @@ for G in [:X, :Y, :Z, :S, :T, :Sdag, :Tdag]
end

@eval begin
function YaoBase.instruct!(state::DenseCuVecOrMat, g::Val{$(QuoteNode(G))}, locs::NTuple{C,Int}) where C
function YaoBase.instruct!(::Val{2}, state::DenseCuVecOrMat, g::Val{$(QuoteNode(G))}, locs::NTuple{C,Int}) where C
_instruct!(state, g, locs)
end

function YaoBase.instruct!(state::DenseCuVecOrMat, g::Val{$(QuoteNode(G))}, locs::Tuple{Int})
function YaoBase.instruct!(::Val{2}, state::DenseCuVecOrMat, g::Val{$(QuoteNode(G))}, locs::Tuple{Int})
_instruct!(state, g, locs)
end

function YaoBase.instruct!(state::DenseCuVecOrMat, g::Val{$(QuoteNode(G))}, loc::Tuple{Int}, clocs::NTuple{C, Int}, cvals::NTuple{C, Int}) where C
function YaoBase.instruct!(::Val{2}, state::DenseCuVecOrMat, g::Val{$(QuoteNode(G))}, loc::Tuple{Int}, clocs::NTuple{C, Int}, cvals::NTuple{C, Int}) where C
_instruct!(state, g, loc, clocs, cvals)
end

function YaoBase.instruct!(state::DenseCuVecOrMat, vg::Val{$(QuoteNode(G))}, loc::Tuple{Int}, cloc::Tuple{Int}, cval::Tuple{Int})
function YaoBase.instruct!(::Val{2}, state::DenseCuVecOrMat, vg::Val{$(QuoteNode(G))}, loc::Tuple{Int}, cloc::Tuple{Int}, cval::Tuple{Int})
_instruct!(state, vg, loc, cloc, cval)
end
end

end

function instruct!(state::DenseCuVecOrMat, ::Val{:SWAP}, locs::Tuple{Int,Int})
function instruct!(::Val{2}, state::DenseCuVecOrMat, ::Val{:SWAP}, locs::Tuple{Int,Int})
b1, b2 = locs
mask1 = bmask(b1)
mask2 = bmask(b2)
Expand All @@ -92,7 +92,7 @@ end
# parametrized swap gate
using Yao.ConstGate: SWAPGate

function instruct!(state::DenseCuVecOrMat, ::Val{:PSWAP}, locs::Tuple{Int, Int}, θ::Real)
function instruct!(::Val{2}, state::DenseCuVecOrMat, ::Val{:PSWAP}, locs::Tuple{Int, Int}, θ::Real)
D, kf = pswap_kernel(log2dim1(state),locs..., θ)
gpu_call(kf, state; elements=D*size(state,2))
state
Expand All @@ -107,6 +107,7 @@ end

for RG in [:Rx, :Ry, :Rz]
@eval function instruct!(
::Val{2},
state::DenseCuVecOrMat{T},
::Val{$(QuoteNode(RG))},
locs::Tuple{Int},
Expand Down
12 changes: 6 additions & 6 deletions test/GPUReg.jl
Original file line number Diff line number Diff line change
Expand Up @@ -50,12 +50,12 @@ end
res = measure!(ResetTo(3), greg1)
@test all(measure(greg1, nshots=10) .== 3)
@test greg1 |> isnormalized
@test all(select.(greg0 |> cpu, res |> myvec) .|> normalize! .≈ select.(greg1 |> cpu, 3))
@test all(select.(BatchedArrayReg(greg0 |> cpu), res |> myvec) .|> normalize! .≈ select.(BatchedArrayReg(greg1 |> cpu), 3))

greg1 = greg |> copy |> focus!(1,4,3)
greg0 = copy(greg1)
res = measure!(greg1)
@test all(select.(greg0 |> cpu, res |> myvec) .|> normalize! .≈ select.(greg1 |> cpu, res|>myvec))
@test all(select.(BatchedArrayReg(greg0 |> cpu), res |> myvec) .|> normalize! .≈ select.(BatchedArrayReg(greg1 |> cpu), res|>myvec))
end

@test join(rand_state(3) |> cu, rand_state(3) |> cu) |> nactive == 6
Expand All @@ -64,12 +64,12 @@ end

@testset "insert_qubits!" begin
reg = rand_state(5; nbatch=10)
res = insert_qubits!(reg |> cu, 3; nqubits=2) |> cpu
@test insert_qubits!(reg, 3; nqubits=2) res
res = insert_qudits!(reg |> cu, 3; nqudits=2) |> cpu
@test insert_qudits!(reg, 3; nqudits=2) res

reg = rand_state(5, nbatch=10) |>focus!(2,3)
res = insert_qubits!(reg |> cu, 3; nqubits=2) |> cpu
@test insert_qubits!(reg, 3; nqubits=2) res
res = insert_qudits!(reg |> cu, 3; nqudits=2) |> cpu
@test insert_qudits!(reg, 3; nqudits=2) res
end

@testset "cuda-op-measures" begin
Expand Down
28 changes: 14 additions & 14 deletions test/gpuapplys.jl
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,8 @@ using CUDA
mat(ComplexF32, put(2,2=>P0))
]

@test instruct!(v1 |> CuArray, UN, (3,1)) |> Vector instruct!(v1 |> copy, UN, (3,1))
@test instruct!(vn |> CuArray, UN, (3,1)) |> Matrix instruct!(vn |> copy, UN, (3,1))
@test instruct!(Val(2),v1 |> CuArray, UN, (3,1)) |> Vector instruct!(Val(2),v1 |> copy, UN, (3,1))
@test instruct!(Val(2),vn |> CuArray, UN, (3,1)) |> Matrix instruct!(Val(2),vn |> copy, UN, (3,1))
end
# sparse matrix like P0, P1 et. al. are not implemented.
end
Expand All @@ -34,8 +34,8 @@ end
v1 = randn(ComplexF32, N)
vn = randn(ComplexF32, N, 333)

@test instruct!(v1 |> CuArray, Val(:SWAP), (3,5)) |> Vector instruct!(v1 |> copy, Val(:SWAP), (3,5))
@test instruct!(vn |> CuArray, Val(:SWAP), (3,5)) |> Matrix instruct!(vn |> copy, Val(:SWAP), (3,5))
@test instruct!(Val(2),v1 |> CuArray, Val(:SWAP), (3,5)) |> Vector instruct!(Val(2),v1 |> copy, Val(:SWAP), (3,5))
@test instruct!(Val(2),vn |> CuArray, Val(:SWAP), (3,5)) |> Matrix instruct!(Val(2),vn |> copy, Val(:SWAP), (3,5))
end

@testset "gpu instruct! 1bit" begin
Expand All @@ -46,8 +46,8 @@ end
vn = randn(ComplexF32, N, 333)

for U1 in [mat(H), mat(Z), mat(I2), mat(ConstGate.P0), mat(X), mat(Y)]
@test instruct!(v1 |> CuArray, U1, (3,)) |> Vector instruct!(v1 |> copy, U1, (3,))
@test instruct!(vn |> CuArray, U1, (3,)) |> Matrix instruct!(vn |> copy, U1, (3,))
@test instruct!(Val(2),v1 |> CuArray, U1, (3,)) |> Vector instruct!(Val(2),v1 |> copy, U1, (3,))
@test instruct!(Val(2),vn |> CuArray, U1, (3,)) |> Matrix instruct!(Val(2),vn |> copy, U1, (3,))
end
# sparse matrix like P0, P1 et. al. are not implemented.
end
Expand All @@ -60,11 +60,11 @@ end
vn = randn(ComplexF32, N, 333)

for G in [:X, :Y, :Z, :T, :H, :Tdag, :S, :Sdag]
@test instruct!(v1 |> CuArray, Val(G), (3,)) |> Vector instruct!(v1 |> copy, Val(G), (3,))
@test instruct!(vn |> CuArray, Val(G), (3,)) |> Matrix instruct!(vn |> copy, Val(G), (3,))
@test instruct!(Val(2),v1 |> CuArray, Val(G), (3,)) |> Vector instruct!(Val(2),v1 |> copy, Val(G), (3,))
@test instruct!(Val(2),vn |> CuArray, Val(G), (3,)) |> Matrix instruct!(Val(2),vn |> copy, Val(G), (3,))
if G != :H
@test instruct!(v1 |> CuArray, Val(G), (1,3,4)) |> Vector instruct!(v1 |> copy, Val(G), (1,3,4))
@test instruct!(vn |> CuArray, Val(G),(1,3,4)) |> Matrix instruct!(vn |> copy, Val(G), (1,3,4))
@test instruct!(Val(2),v1 |> CuArray, Val(G), (1,3,4)) |> Vector instruct!(Val(2),v1 |> copy, Val(G), (1,3,4))
@test instruct!(Val(2),vn |> CuArray, Val(G),(1,3,4)) |> Matrix instruct!(Val(2),vn |> copy, Val(G), (1,3,4))
end
end
end
Expand All @@ -77,10 +77,10 @@ end
vn = randn(ComplexF32, N, 333)

for G in [:X, :Y, :Z, :T, :Tdag, :S, :Sdag]
@test instruct!(v1 |> CuArray, Val(G), (3,), (4,5), (0, 1)) |> Vector instruct!(v1 |> copy, Val(G), (3,), (4,5), (0, 1))
@test instruct!(vn |> CuArray, Val(G), (3,), (4,5), (0, 1)) |> Matrix instruct!(vn |> copy, Val(G), (3,), (4,5), (0, 1))
@test instruct!(v1 |> CuArray, Val(G), (3,), (1,), (1,)) |> Vector instruct!(v1 |> copy, Val(G),(3,), (1,), (1,))
@test instruct!(vn |> CuArray, Val(G), (3,), (1,), (1,)) |> Matrix instruct!(vn |> copy, Val(G),(3,), (1,), (1,))
@test instruct!(Val(2),v1 |> CuArray, Val(G), (3,), (4,5), (0, 1)) |> Vector instruct!(Val(2),v1 |> copy, Val(G), (3,), (4,5), (0, 1))
@test instruct!(Val(2),vn |> CuArray, Val(G), (3,), (4,5), (0, 1)) |> Matrix instruct!(Val(2),vn |> copy, Val(G), (3,), (4,5), (0, 1))
@test instruct!(Val(2),v1 |> CuArray, Val(G), (3,), (1,), (1,)) |> Vector instruct!(Val(2),v1 |> copy, Val(G),(3,), (1,), (1,))
@test instruct!(Val(2),vn |> CuArray, Val(G), (3,), (1,), (1,)) |> Matrix instruct!(Val(2),vn |> copy, Val(G),(3,), (1,), (1,))
end
end

Expand Down

0 comments on commit e112d96

Please sign in to comment.