From a39eba84609ec9d2883d3e563b6567892489cbcb Mon Sep 17 00:00:00 2001
From: CarloLucibello <carlo.lucibello@gmail.com>
Date: Fri, 26 Jan 2018 15:35:36 +0100
Subject: [PATCH 1/3] add elu and selu activations

---
 src/Knet.jl  |  2 +-
 src/unary.jl | 33 +++++++++++++++++++++++++++++++++
 2 files changed, 34 insertions(+), 1 deletion(-)

diff --git a/src/Knet.jl b/src/Knet.jl
index 79fa94717..9e3caa34c 100644
--- a/src/Knet.jl
+++ b/src/Knet.jl
@@ -21,7 +21,7 @@ include("uva.jl")
 include("kptr.jl");             export knetgc # KnetPtr
 include("karray.jl");           export KnetArray
 include("unfuse.jl");           # julia6 broadcast fixes
-include("unary.jl");            export relu, sigm, invx
+include("unary.jl");            export relu, sigm, invx, elu, selu
 include("broadcast.jl");        # elementwise broadcasting operations
 include("reduction.jl");        # sum, max, mean, etc.
 include("linalg.jl");           export mat # matmul, axpy!, transpose, (i)permutedims
diff --git a/src/unary.jl b/src/unary.jl
index 951e23312..7281170e6 100644
--- a/src/unary.jl
+++ b/src/unary.jl
@@ -153,3 +153,36 @@ broadcast(::typeof(+), a::KnetArray)=a
 +(a::KnetArray)=a
 -(a::KnetArray)=broadcast(-,a)
 
+"""
+    elu(x, alpha=1)
+
+Exponential Linear Unit. Returns
+`max(0,x) + alpha*(exp(min(x,0)) - 1)
+
+Paper Ref. :
+"Fast and Accurate Deep Network Learning by Exponential Linear Units (ELUs) (ICLR 2016)"
+"""
+function elu(x, alpha=1)
+    p = relu(x)
+    m = -relu(-x)
+    return p + alpha*(exp(m) - 1)
+end
+
+"""
+    selu(x)
+
+Self-Normalizing Exponential Linear Unit. Returns
+`scale*(max(0,x) + alpha*(exp(min(x,0)) - 1))`
+where `scale=1.0507009` and `alpha=1.6732632`.
+
+Paper Ref. :
+Self-Normalizing Neural Networks
+https://arxiv.org/abs/1706.02515
+"""
+function selu(x)
+    alpha = 1.6732632f
+    scale = 1.0507009f
+    p = relu(x)
+    m = -relu(-x)
+    return scale*(p + alpha*(exp(m) - 1))
+end

From 7cef95a3bb33609dcef33cb013c33a27e33dc751 Mon Sep 17 00:00:00 2001
From: CarloLucibello <carlo.lucibello@gmail.com>
Date: Fri, 26 Jan 2018 19:22:16 +0100
Subject: [PATCH 2/3] add alpha dropout

---
 src/dropout.jl | 22 ++++++++++++++++++++++
 1 file changed, 22 insertions(+)

diff --git a/src/dropout.jl b/src/dropout.jl
index 4468cb219..476e8ab9e 100644
--- a/src/dropout.jl
+++ b/src/dropout.jl
@@ -92,3 +92,25 @@ function dropback!(p,x,y,dy,dx)
     return dx
 end
 
+
+"""
+    alpha_dropout(x, p)
+
+Dropout associated to the `selu` activation. 
+
+Paper Ref.:
+Self-Normalizing Neural Networks
+https://arxiv.org/abs/1706.02515
+"""
+function alpha_dropout(x, p)
+    training = x isa Rec
+    (p == 0 || !training) && return x
+
+    alpha = Float32(-1.758099)
+    q = Float32(1-p)
+    x = q*dropout(x .- alpha, p) .+ alpha #set dropped input to alpha
+    a = 1 / sqrt(q + alpha^2 * q*p)
+    b = -a * alpha * p
+    return a*x + b
+end
+

From c073d391b79e6e473574698dfd8b14e3bc7555f6 Mon Sep 17 00:00:00 2001
From: CarloLucibello <carlo.lucibello@gmail.com>
Date: Fri, 23 Mar 2018 09:27:16 -0400
Subject: [PATCH 3/3] update

---
 src/broadcast.jl |  1 +
 src/unary.jl     | 21 ++++-----------------
 2 files changed, 5 insertions(+), 17 deletions(-)

diff --git a/src/broadcast.jl b/src/broadcast.jl
index d6fbb5c3d..cf9df0b4b 100644
--- a/src/broadcast.jl
+++ b/src/broadcast.jl
@@ -32,6 +32,7 @@ broadcast_ops = [
     # "fdim",
     ("invxback","invxback","(-xi*yi*yi)"),
     ("reluback","reluback","(yi>0?xi:0)"),
+    ("eluback", "eluback", "ifelse(yi>0,dyi,yi+1)"),
     ("sigmback","sigmback","(xi*yi*(1-yi))"),
     ("tanhback","tanhback","(xi*(1-yi*yi))"),
     ("rpow","rpow","pow(yi,xi)"),   # need this for Array.^Scalar
diff --git a/src/unary.jl b/src/unary.jl
index 7281170e6..4d677f154 100644
--- a/src/unary.jl
+++ b/src/unary.jl
@@ -45,6 +45,7 @@ unary_ops = [
 # "normcdfinv",
 # "rcbrt",
 ("relu", "relu", "(xi>0?xi:0)"),
+("elu", "elu", "(xi>0?xi:exp(xi)-1)"),
 # "rint",
 "round",
 # "rsqrt",
@@ -99,6 +100,7 @@ end
 for (f,g,y,dx) in
     ((:invx, :invxback, :(one(T)/xi), :(-yi*yi*dyi)),
      (:relu, :reluback, :(max(zero(T),xi)), :(ifelse(yi>0,dyi,zero(T)))),
+     (:elu, :eluback, :(ifelse(xi>0,xi,exp(xi)-1)), :(ifelse(yi>0,dyi,yi+1))),
      (:tanx, :tanhback, :(tanh(xi)), :(dyi*(one(T)-yi*yi))),
      (:sigm, :sigmback, 
       # Numerically stable implementation from
@@ -153,21 +155,6 @@ broadcast(::typeof(+), a::KnetArray)=a
 +(a::KnetArray)=a
 -(a::KnetArray)=broadcast(-,a)
 
-"""
-    elu(x, alpha=1)
-
-Exponential Linear Unit. Returns
-`max(0,x) + alpha*(exp(min(x,0)) - 1)
-
-Paper Ref. :
-"Fast and Accurate Deep Network Learning by Exponential Linear Units (ELUs) (ICLR 2016)"
-"""
-function elu(x, alpha=1)
-    p = relu(x)
-    m = -relu(-x)
-    return p + alpha*(exp(m) - 1)
-end
-
 """
     selu(x)
 
@@ -180,8 +167,8 @@ Self-Normalizing Neural Networks
 https://arxiv.org/abs/1706.02515
 """
 function selu(x)
-    alpha = 1.6732632f
-    scale = 1.0507009f
+    alpha = 1.6732632f0
+    scale = 1.0507009f0
     p = relu(x)
     m = -relu(-x)
     return scale*(p + alpha*(exp(m) - 1))