FluxML · bors · Feb 11, 2021 · Feb 3, 2021 · Feb 3, 2021 · Feb 3, 2021
diff --git a/src/utils.jl b/src/utils.jl
@@ -174,6 +174,67 @@ end
 kaiming_normal(dims...; kwargs...) = kaiming_normal(Random.GLOBAL_RNG, dims...; kwargs...)
 kaiming_normal(rng::AbstractRNG; kwargs...) = (dims...; kwargs...) -> kaiming_normal(rng, dims...; kwargs...)
 
+"""
+ orthogonal([rng=GLOBAL_RNG], dims...; gain = 1)
+
+Return an `Array` of size `dims` which is a (semi) orthogonal matrix, as described in *Exact solutions to the nonlinear dynamics of learning in deep linear neural networks - Saxe, A. et al. (2013)*. 
+
+The input tensor must have at least 2 dimensions. 
+
+# Examples
+```jldoctest; setup = :(using Random; Random.seed!(0));
+julia> using LinearAlgebra
+
+julia> W = Flux.orthogonal(5, 7);
+
+julia> summary(W)
+"5×7 Array{Float32,2}"
+
+julia> W * W'
+5×5 Array{Float32,2}:
+ 1.0 -2.42898f-8 6.32759f-8 -1.37195f-7 -2.19659f-8
+ -2.42898f-8 1.0 4.03295f-8 -1.34284f-7 1.06978f-7
+ 6.32759f-8 4.03295f-8 1.0 7.93047f-8 2.6339f-7
+ -1.37195f-7 -1.34284f-7 7.93047f-8 1.0 6.60169f-8
+ -2.19659f-8 1.06978f-7 2.6339f-7 6.60169f-8 1.0
+
+julia> W * W' ≈ I(5)
+true
+
+julia> W2 = Flux.orthogonal(7, 5);
+
+julia> W2 * W2' ≈ I(7)
+false
+
+julia> W2' * W2 ≈ I(5)
+true
+```
+
+# References
+[1] Saxe, McClelland, Ganguli. "Exact solutions to the nonlinear dynamics of learning in deep linear neural networks", ICLR 2014, https://arxiv.org/abs/1312.6120
+
+"""
+function orthogonal(rng::AbstractRNG, dims...; gain = 1)
+ if length(dims) < 2
+ throw(ArgumentError("Only Arrays with 2 or more dimensions are supported"))
+ end
+
+ rows = dims[1]
+ cols = div(prod(dims),rows)
+ mat = rows > cols ? randn(Float32, rows, cols) : randn(Float32, cols, rows)
+
+ Q, R = LinearAlgebra.qr(mat)
+ Q = Array(Q) * sign.(LinearAlgebra.Diagonal(R))
+ if rows < cols
+ Q = transpose(Q)
+ end
- if rows < cols
- Q = transpose(Q)
- end
+Q = rows < cols ? transpose(Q) : Q
- if rows < cols
- Q = transpose(Q)
- end
+Q = rows < cols ? transpose(Q) : Q
+
+ return gain * reshape(Q, dims)
+end
+
+orthogonal(dims...; kwargs...) = orthogonal(Random.GLOBAL_RNG, dims...; kwargs...)
+orthogonal(rng::AbstractRNG; kwargs...) = (dims...; kwargs...) -> orthogonal(rng, dims...; kwargs...)
+
 """
  sparse_init([rng=GLOBAL_RNG], dims...; sparsity, std = 0.01)
 

diff --git a/test/utils.jl b/test/utils.jl
@@ -1,5 +1,5 @@
 using Flux
-using Flux: throttle, nfan, glorot_uniform, glorot_normal, kaiming_normal, kaiming_uniform, sparse_init, stack, unstack, Zeros
+using Flux: throttle, nfan, glorot_uniform, glorot_normal, kaiming_normal, kaiming_uniform, orthogonal, sparse_init, stack, unstack, Zeros
 using StatsBase: var, std
 using Random
 using Test
@@ -96,6 +96,14 @@ end
  end
  end
 
+ @testset "orthogonal" begin
+ # A matrix of dim = (m,n) with m > n should produce a QR decomposition. In the other case, the transpose should be taken to compute the QR decomposition.
+ for (rows,cols) in [(5,3),(3,5)]
+ v = orthogonal(rows, cols)
+ rows < cols ? (@test v * v' ≈ I(rows)) : (@test v' * v ≈ I(cols))
+ end
+ end
+
  @testset "sparse_init" begin
  # sparse_init should yield an error for non 2-d dimensions
  # sparse_init should yield no zero elements if sparsity < 0