Error with GroupNorm on GPU #1247

a-r-n-o-l-d · 2020-06-23T17:30:56Z

When I try this:

using Statistics
using Flux
using Flux: flatten, binarycrossentropy
using CuArrays
CuArrays.allowscalar(false)

imsize = (32,32)
group = 4

model = Chain(Conv((3,3), 3=>32, pad=(1,1)),
              GroupNorm(32, group, relu),
              MaxPool((2,2)),
              flatten,
              Dense(Int(prod(imsize) / 4 * 32), 128, relu),
              Dense(128, 1, sigmoid))

x1 = Float32.(randn(32, 32, 3, 8));
y1 = Float32.([1 0 0 0 0 0 0 0]);

bce(ŷ, y) = mean(binarycrossentropy.(ŷ, y))
loss(x, y) = bce(model(x), y)

opt = Descent();
ps = params(model);
Flux.train!(loss, ps, [(x1,y1)], opt)

It seems OK on CPU, but when I move data and model on GPU:

model = gpu(model);
opt = Descent();
ps = params(model);
Flux.train!(loss, ps, gpu.([(x1,y1)]), opt)

ERROR: scalar getindex is disallowed
Stacktrace:
 [1] error(::String) at ./error.jl:33
 [2] assertscalar(::String) at /home/afertin/.julia/packages/GPUArrays/JqOUg/src/host/indexing.jl:41
 [3] getindex at /home/afertin/.julia/packages/GPUArrays/JqOUg/src/host/indexing.jl:96 [inlined]
 [4] _getindex at ./abstractarray.jl:1003 [inlined]
 [5] getindex at ./abstractarray.jl:980 [inlined]
 [6] _broadcast_getindex at ./broadcast.jl:597 [inlined]
 [7] _getindex at ./broadcast.jl:628 [inlined]
 [8] _broadcast_getindex at ./broadcast.jl:603 [inlined]
 [9] getindex at ./broadcast.jl:564 [inlined]
 [10] copy at ./broadcast.jl:854 [inlined]
 [11] materialize(::Base.Broadcast.Broadcasted{CuArrays.CuArrayStyle{2},Nothing,Zygote.var"#1161#1164"{CuArrays.var"#68#69"{Float32}},Tuple{CuArray{Float32,2,CuArray{Float32,5,Nothing}}}}) at ./broadcast.jl:820
 [12] broadcast_forward(::Function, ::CuArray{Float32,2,CuArray{Float32,5,Nothing}}) at /home/afertin/.julia/packages/Zygote/1GXzF/src/lib/broadcast.jl:181
 [13] adjoint at /home/afertin/.julia/packages/Zygote/1GXzF/src/lib/broadcast.jl:197 [inlined]
 [14] _pullback at /home/afertin/.julia/packages/ZygoteRules/6nssF/src/adjoint.jl:47 [inlined]
 [15] adjoint at /home/afertin/.julia/packages/Zygote/1GXzF/src/lib/lib.jl:179 [inlined]
 [16] _pullback at /home/afertin/.julia/packages/ZygoteRules/6nssF/src/adjoint.jl:47 [inlined]
 [17] broadcasted at ./broadcast.jl:1232 [inlined]
 [18] _pullback(::Zygote.Context, ::typeof(Base.Broadcast.broadcasted), ::Type{Float32}, ::CuArray{Float32,2,CuArray{Float32,5,Nothing}}) at /home/afertin/.julia/packages/Zygote/1GXzF/src/compiler/interface2.jl:0
 [19] GroupNorm at /home/afertin/.julia/packages/Flux/Fj3bt/src/layers/normalise.jl:394 [inlined]
 [20] _pullback(::Zygote.Context, ::GroupNorm{typeof(relu),CuArray{Float32,1,Nothing},CuArray{Float32,2,Nothing},Float32,Int64}, ::CuArray{Float32,4,Nothing}) at /home/afertin/.julia/packages/Zygote/1GXzF/src/compiler/interface2.jl:0
 [21] applychain at /home/afertin/.julia/packages/Flux/Fj3bt/src/layers/basic.jl:36 [inlined]
 [22] _pullback(::Zygote.Context, ::typeof(Flux.applychain), ::Tuple{GroupNorm{typeof(relu),CuArray{Float32,1,Nothing},CuArray{Float32,2,Nothing},Float32,Int64},MaxPool{2,4},typeof(flatten),Dense{typeof(relu),CuArray{Float32,2,Nothing},CuArray{Float32,1,Nothing}},Dense{typeof(σ),CuArray{Float32,2,Nothing},CuArray{Float32,1,Nothing}}}, ::CuArray{Float32,4,Nothing}) at /home/afertin/.julia/packages/Zygote/1GXzF/src/compiler/interface2.jl:0
 [23] applychain at /home/afertin/.julia/packages/Flux/Fj3bt/src/layers/basic.jl:36 [inlined]
 [24] _pullback(::Zygote.Context, ::typeof(Flux.applychain), ::Tuple{Conv{2,2,typeof(identity),CuArray{Float32,4,Nothing},CuArray{Float32,1,Nothing}},GroupNorm{typeof(relu),CuArray{Float32,1,Nothing},CuArray{Float32,2,Nothing},Float32,Int64},MaxPool{2,4},typeof(flatten),Dense{typeof(relu),CuArray{Float32,2,Nothing},CuArray{Float32,1,Nothing}},Dense{typeof(σ),CuArray{Float32,2,Nothing},CuArray{Float32,1,Nothing}}}, ::CuArray{Float32,4,Nothing}) at /home/afertin/.julia/packages/Zygote/1GXzF/src/compiler/interface2.jl:0
 [25] Chain at /home/afertin/.julia/packages/Flux/Fj3bt/src/layers/basic.jl:38 [inlined]
 [26] _pullback(::Zygote.Context, ::Chain{Tuple{Conv{2,2,typeof(identity),CuArray{Float32,4,Nothing},CuArray{Float32,1,Nothing}},GroupNorm{typeof(relu),CuArray{Float32,1,Nothing},CuArray{Float32,2,Nothing},Float32,Int64},MaxPool{2,4},typeof(flatten),Dense{typeof(relu),CuArray{Float32,2,Nothing},CuArray{Float32,1,Nothing}},Dense{typeof(σ),CuArray{Float32,2,Nothing},CuArray{Float32,1,Nothing}}}}, ::CuArray{Float32,4,Nothing}) at /home/afertin/.julia/packages/Zygote/1GXzF/src/compiler/interface2.jl:0
 [27] loss at ./REPL[12]:1 [inlined]
 [28] _pullback(::Zygote.Context, ::typeof(loss), ::CuArray{Float32,4,Nothing}, ::CuArray{Float32,2,Nothing}) at /home/afertin/.julia/packages/Zygote/1GXzF/src/compiler/interface2.jl:0
 [29] adjoint at /home/afertin/.julia/packages/Zygote/1GXzF/src/lib/lib.jl:179 [inlined]
 [30] _pullback at /home/afertin/.julia/packages/ZygoteRules/6nssF/src/adjoint.jl:47 [inlined]
 [31] #17 at /home/afertin/.julia/packages/Flux/Fj3bt/src/optimise/train.jl:89 [inlined]
 [32] _pullback(::Zygote.Context, ::Flux.Optimise.var"#17#25"{typeof(loss),Tuple{CuArray{Float32,4,Nothing},CuArray{Float32,2,Nothing}}}) at /home/afertin/.julia/packages/Zygote/1GXzF/src/compiler/interface2.jl:0
 [33] pullback(::Function, ::Zygote.Params) at /home/afertin/.julia/packages/Zygote/1GXzF/src/compiler/interface.jl:172
 [34] gradient(::Function, ::Zygote.Params) at /home/afertin/.julia/packages/Zygote/1GXzF/src/compiler/interface.jl:53
 [35] macro expansion at /home/afertin/.julia/packages/Flux/Fj3bt/src/optimise/train.jl:88 [inlined]
 [36] macro expansion at /home/afertin/.julia/packages/Juno/tLMZd/src/progress.jl:134 [inlined]
 [37] train!(::typeof(loss), ::Zygote.Params, ::Array{Tuple{CuArray{Float32,4,Nothing},CuArray{Float32,2,Nothing}},1}, ::Descent; cb::Flux.Optimise.var"#18#26") at /home/afertin/.julia/packages/Flux/Fj3bt/src/optimise/train.jl:81
 [38] train!(::Function, ::Zygote.Params, ::Array{Tuple{CuArray{Float32,4,Nothing},CuArray{Float32,2,Nothing}},1}, ::Descent) at /home/afertin/.julia/packages/Flux/Fj3bt/src/optimise/train.jl:79
 [39] top-level scope at REPL[19]:1

a-r-n-o-l-d · 2020-06-23T17:32:21Z

And with allowscalar(true):

CuArrays.allowscalar(true)
Flux.train!(loss, ps, gpu.([(x1,y1)]), opt)

┌ Warning: Performing scalar operations on GPU arrays: This is very slow, consider disallowing these operations with `allowscalar(false)`
└ @ GPUArrays ~/.julia/packages/GPUArrays/JqOUg/src/host/indexing.jl:43
ERROR: MethodError: no method matching Float32(::ForwardDiff.Dual{Nothing,Float32,1})
Closest candidates are:
  Float32(::Real, ::RoundingMode) where T<:AbstractFloat at rounding.jl:200
  Float32(::T) where T<:Number at boot.jl:715
  Float32(::Int8) at float.jl:60
  ...
Stacktrace:
 [1] (::CuArrays.var"#68#69"{Float32})(::ForwardDiff.Dual{Nothing,Float32,1}) at /home/afertin/.julia/packages/CuArrays/YFdj7/src/broadcast.jl:21
 [2] (::Zygote.var"#1161#1164"{CuArrays.var"#68#69"{Float32}})(::Float32) at /home/afertin/.julia/packages/Zygote/1GXzF/src/lib/broadcast.jl:175
 [3] _broadcast_getindex_evalf at ./broadcast.jl:631 [inlined]
 [4] _broadcast_getindex at ./broadcast.jl:604 [inlined]
 [5] getindex at ./broadcast.jl:564 [inlined]
 [6] copy at ./broadcast.jl:854 [inlined]
 [7] materialize(::Base.Broadcast.Broadcasted{CuArrays.CuArrayStyle{2},Nothing,Zygote.var"#1161#1164"{CuArrays.var"#68#69"{Float32}},Tuple{CuArray{Float32,2,CuArray{Float32,5,Nothing}}}}) at ./broadcast.jl:820
 [8] broadcast_forward(::Function, ::CuArray{Float32,2,CuArray{Float32,5,Nothing}}) at /home/afertin/.julia/packages/Zygote/1GXzF/src/lib/broadcast.jl:181
 [9] adjoint at /home/afertin/.julia/packages/Zygote/1GXzF/src/lib/broadcast.jl:197 [inlined]
 [10] _pullback at /home/afertin/.julia/packages/ZygoteRules/6nssF/src/adjoint.jl:47 [inlined]
 [11] adjoint at /home/afertin/.julia/packages/Zygote/1GXzF/src/lib/lib.jl:179 [inlined]
 [12] _pullback at /home/afertin/.julia/packages/ZygoteRules/6nssF/src/adjoint.jl:47 [inlined]
 [13] broadcasted at ./broadcast.jl:1232 [inlined]
 [14] _pullback(::Zygote.Context, ::typeof(Base.Broadcast.broadcasted), ::Type{Float32}, ::CuArray{Float32,2,CuArray{Float32,5,Nothing}}) at /home/afertin/.julia/packages/Zygote/1GXzF/src/compiler/interface2.jl:0
 [15] GroupNorm at /home/afertin/.julia/packages/Flux/Fj3bt/src/layers/normalise.jl:394 [inlined]
 [16] _pullback(::Zygote.Context, ::GroupNorm{typeof(relu),CuArray{Float32,1,Nothing},CuArray{Float32,2,Nothing},Float32,Int64}, ::CuArray{Float32,4,Nothing}) at /home/afertin/.julia/packages/Zygote/1GXzF/src/compiler/interface2.jl:0
 [17] applychain at /home/afertin/.julia/packages/Flux/Fj3bt/src/layers/basic.jl:36 [inlined]
 [18] _pullback(::Zygote.Context, ::typeof(Flux.applychain), ::Tuple{GroupNorm{typeof(relu),CuArray{Float32,1,Nothing},CuArray{Float32,2,Nothing},Float32,Int64},MaxPool{2,4},typeof(flatten),Dense{typeof(relu),CuArray{Float32,2,Nothing},CuArray{Float32,1,Nothing}},Dense{typeof(σ),CuArray{Float32,2,Nothing},CuArray{Float32,1,Nothing}}}, ::CuArray{Float32,4,Nothing}) at /home/afertin/.julia/packages/Zygote/1GXzF/src/compiler/interface2.jl:0
 [19] applychain at /home/afertin/.julia/packages/Flux/Fj3bt/src/layers/basic.jl:36 [inlined]
 [20] _pullback(::Zygote.Context, ::typeof(Flux.applychain), ::Tuple{Conv{2,2,typeof(identity),CuArray{Float32,4,Nothing},CuArray{Float32,1,Nothing}},GroupNorm{typeof(relu),CuArray{Float32,1,Nothing},CuArray{Float32,2,Nothing},Float32,Int64},MaxPool{2,4},typeof(flatten),Dense{typeof(relu),CuArray{Float32,2,Nothing},CuArray{Float32,1,Nothing}},Dense{typeof(σ),CuArray{Float32,2,Nothing},CuArray{Float32,1,Nothing}}}, ::CuArray{Float32,4,Nothing}) at /home/afertin/.julia/packages/Zygote/1GXzF/src/compiler/interface2.jl:0
 [21] Chain at /home/afertin/.julia/packages/Flux/Fj3bt/src/layers/basic.jl:38 [inlined]
 [22] _pullback(::Zygote.Context, ::Chain{Tuple{Conv{2,2,typeof(identity),CuArray{Float32,4,Nothing},CuArray{Float32,1,Nothing}},GroupNorm{typeof(relu),CuArray{Float32,1,Nothing},CuArray{Float32,2,Nothing},Float32,Int64},MaxPool{2,4},typeof(flatten),Dense{typeof(relu),CuArray{Float32,2,Nothing},CuArray{Float32,1,Nothing}},Dense{typeof(σ),CuArray{Float32,2,Nothing},CuArray{Float32,1,Nothing}}}}, ::CuArray{Float32,4,Nothing}) at /home/afertin/.julia/packages/Zygote/1GXzF/src/compiler/interface2.jl:0
 [23] loss at ./REPL[12]:1 [inlined]
 [24] _pullback(::Zygote.Context, ::typeof(loss), ::CuArray{Float32,4,Nothing}, ::CuArray{Float32,2,Nothing}) at /home/afertin/.julia/packages/Zygote/1GXzF/src/compiler/interface2.jl:0
 [25] adjoint at /home/afertin/.julia/packages/Zygote/1GXzF/src/lib/lib.jl:179 [inlined]
 [26] _pullback at /home/afertin/.julia/packages/ZygoteRules/6nssF/src/adjoint.jl:47 [inlined]
 [27] #17 at /home/afertin/.julia/packages/Flux/Fj3bt/src/optimise/train.jl:89 [inlined]
 [28] _pullback(::Zygote.Context, ::Flux.Optimise.var"#17#25"{typeof(loss),Tuple{CuArray{Float32,4,Nothing},CuArray{Float32,2,Nothing}}}) at /home/afertin/.julia/packages/Zygote/1GXzF/src/compiler/interface2.jl:0
 [29] pullback(::Function, ::Zygote.Params) at /home/afertin/.julia/packages/Zygote/1GXzF/src/compiler/interface.jl:172
 [30] gradient(::Function, ::Zygote.Params) at /home/afertin/.julia/packages/Zygote/1GXzF/src/compiler/interface.jl:53
 [31] macro expansion at /home/afertin/.julia/packages/Flux/Fj3bt/src/optimise/train.jl:88 [inlined]
 [32] macro expansion at /home/afertin/.julia/packages/Juno/tLMZd/src/progress.jl:134 [inlined]
 [33] train!(::typeof(loss), ::Zygote.Params, ::Array{Tuple{CuArray{Float32,4,Nothing},CuArray{Float32,2,Nothing}},1}, ::Descent; cb::Flux.Optimise.var"#18#26") at /home/afertin/.julia/packages/Flux/Fj3bt/src/optimise/train.jl:81
 [34] train!(::Function, ::Zygote.Params, ::Array{Tuple{CuArray{Float32,4,Nothing},CuArray{Float32,2,Nothing}},1}, ::Descent) at /home/afertin/.julia/packages/Flux/Fj3bt/src/optimise/train.jl:79
 [35] top-level scope at REPL[21]:1

deveshjawla · 2020-06-30T05:39:12Z

I think you should check this out. Your loss function is causing the problem.

CarloLucibello · 2020-06-30T17:43:18Z

looks like the GroupNorm adjoint is not gpu friendly

a-r-n-o-l-d · 2020-07-01T15:30:54Z

@deveshjawla I have same error when I simulate a multiclass problem with crossentropy as loss:

using Statistics
using Flux
using Flux: flatten, crossentropy
using CuArrays
CuArrays.allowscalar(false)

imsize = (32,32)
group = 4
nclasses = 4

model = Chain(Conv((3,3), 3=>32, pad=(1,1)),
              GroupNorm(32, group, relu),
              MaxPool((2,2)),
              flatten,
              Dense(Int(prod(imsize) / 4 * 32), 128, relu),
              Dense(128, nclasses),
              softmax)

x1 = Float32.(randn(32, 32, 3, 5));
y1 = Float32.(hcat([1;0;0;0], [0;1;0;0], [1;0;0;0], [0;0;1;0], [0;0;0;1]));

loss(x, y) = crossentropy(model(x), y)

opt = Descent();
ps = params(model);
Flux.train!(loss, ps, [(x1,y1)], opt)

model = gpu(model);
opt = Descent();
ps = params(model);
Flux.train!(loss, ps, gpu.([(x1,y1)]), opt)

a-r-n-o-l-d · 2020-07-01T15:42:45Z

@CarloLucibello : Is the problem coming from Zygote? I have no knowledge about automatic differentiation, therefore I am not able to propose a patch.

deveshjawla · 2020-07-01T17:13:13Z

@deveshjawla I have same error when I simulate a multiclass problem with crossentropy as loss:

using Statistics
using Flux
using Flux: flatten, crossentropy
using CuArrays
CuArrays.allowscalar(false)

imsize = (32,32)
group = 4
nclasses = 4

model = Chain(Conv((3,3), 3=>32, pad=(1,1)),
              GroupNorm(32, group, relu),
              MaxPool((2,2)),
              flatten,
              Dense(Int(prod(imsize) / 4 * 32), 128, relu),
              Dense(128, nclasses),
              softmax)

x1 = Float32.(randn(32, 32, 3, 5));
y1 = Float32.(hcat([1;0;0;0], [0;1;0;0], [1;0;0;0], [0;0;1;0], [0;0;0;1]));

loss(x, y) = crossentropy(model(x), y)

opt = Descent();
ps = params(model);
Flux.train!(loss, ps, [(x1,y1)], opt)

model = gpu(model);
opt = Descent();
ps = params(model);
Flux.train!(loss, ps, gpu.([(x1,y1)]), opt)

Noted.)

CarloLucibello added bug cuda labels Jul 4, 2020

CarloLucibello mentioned this issue Dec 24, 2020

Rework normalization layers #1397

Merged

4 tasks

bors bot closed this as completed in 7e9a180 Feb 4, 2021

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Error with GroupNorm on GPU #1247

Error with GroupNorm on GPU #1247

a-r-n-o-l-d commented Jun 23, 2020

a-r-n-o-l-d commented Jun 23, 2020

deveshjawla commented Jun 30, 2020

CarloLucibello commented Jun 30, 2020

a-r-n-o-l-d commented Jul 1, 2020

a-r-n-o-l-d commented Jul 1, 2020

deveshjawla commented Jul 1, 2020

Error with GroupNorm on GPU #1247

Error with GroupNorm on GPU #1247

Comments

a-r-n-o-l-d commented Jun 23, 2020

a-r-n-o-l-d commented Jun 23, 2020

deveshjawla commented Jun 30, 2020

CarloLucibello commented Jun 30, 2020

a-r-n-o-l-d commented Jul 1, 2020

a-r-n-o-l-d commented Jul 1, 2020

deveshjawla commented Jul 1, 2020