fix bug in prelu , issue 12061 (apache#12660)

* fix bug in prelu * add unit test
vandanavk · Sep 28, 2018 · f274039 · f274039
1 parent d4e202a
commit f274039
Show file tree

Hide file tree

Showing 2 changed files with 41 additions and 28 deletions.
diff --git a/src/operator/leaky_relu-inl.h b/src/operator/leaky_relu-inl.h
@@ -301,7 +301,7 @@ class LeakyReLUOp : public Operator {
  TShape result(dst.ndim());
  int s = src.ndim() - 1;
  for (int i = dst.ndim() - 1; i >= 0; i--) {
- if (s >= 0 && (dst[i] == src[s] || src[s] == 1)) {
+ if (s >= 0 && i <= 1 && (dst[i] == src[s] || src[s] == 1)) {
  result[i] = src[s];
  s--;
  } else {

diff --git a/tests/python/unittest/test_operator.py b/tests/python/unittest/test_operator.py
@@ -772,48 +772,61 @@ def test_prelu():
  def fprelu(x, gamma):
  pos_indices = x > 0
  out = x.copy()
- out = np.multiply(out, gamma)
+ if len(x.shape) == 4:
+ out = out.transpose(2,3,0,1)
+ out = np.multiply(out, gamma)
+ out = out.transpose(2,3,0,1)
+ else:
+ out = np.multiply(out, gamma)
  out[pos_indices] = x[pos_indices]
  return out
  def fprelu_grad(x, y, gamma):
  pos_indices = x > 0
- grad_x = np.multiply(np.ones(x.shape), gamma)
+ if len(x.shape) == 4:
+ grad_x = np.multiply(np.ones(x.shape).transpose(2,3,0,1), gamma)
+ grad_x = grad_x.transpose(2,3,0,1)
+ else:
+ grad_x = np.multiply(np.ones(x.shape), gamma)
  grad_gam = np.zeros(gamma.shape)
  copy_x = x.copy()
  copy_x[pos_indices] = 0.0
  grad_x[pos_indices] = 1.0
- if len(gamma.shape) > 1:
+ if len(gamma.shape) > 1 and len(x.shape) != 4:
  grad_gam = copy_x
+ elif len(gamma.shape) > 1 and len(x.shape) == 4:
+ grad_gam = np.sum(copy_x, axis=(2,3))
  elif gamma.shape[0] == 1:
  grad_gam = np.sum(np.sum(copy_x))
- elif gamma.shape[0] > 1:
+ elif gamma.shape[0] > 1 and len(x.shape) != 4:
  grad_gam = np.sum(copy_x, axis=0)
+ elif gamma.shape[0] > 1 and len(x.shape) == 4:
+ grad_gam = np.sum(copy_x, axis=(0,2,3))
  return (grad_x, grad_gam)
- shape = (3,4)
  x = mx.symbol.Variable("x")
  gamma = mx.symbol.Variable("gamma")
- for dtype in [np.float16, np.float32, np.float64]:
- for gam in [np.array([0.1, 0.2, 0.3, 0.4], dtype=dtype)]:
- gam_full = np.array([gam, gam, gam])
- xa = np.random.uniform(low=-1.0,high=1.0,size=shape).astype(dtype)
- rtol = 1e-2
- atol = 1e-3
- eps = 1e-4
- xa[abs(xa) < eps] = 1.0
- y = mx.symbol.LeakyReLU(data=x, gamma=gamma, act_type='prelu')
- ya = fprelu(xa, gam)
- ya_full = fprelu(xa, gam_full)
- g_xa, g_gam = fprelu_grad(xa, ya, gamma=gam)
- g_xa_full, g_gam_full = fprelu_grad(xa, ya_full, gamma=gam_full)
- # Skip numeric check for float16 type to get rid of flaky behavior
- if dtype is not np.float16:
- check_numeric_gradient(y, [xa, gam], numeric_eps=eps, rtol=rtol, atol=atol, dtype=dtype)
- check_numeric_gradient(y, [xa, gam_full], numeric_eps=eps, rtol=rtol, atol=atol, dtype=dtype)
- check_symbolic_forward(y, [xa, gam], [ya], rtol=rtol, atol=atol, dtype=dtype)
- check_symbolic_backward(y, [xa, gam], [np.ones(shape), np.ones(gam.shape)], [g_xa, g_gam], rtol=rtol, atol=atol, dtype=dtype)
- check_symbolic_forward(y, [xa, gam_full], [ya_full], rtol=rtol, atol=atol, dtype=dtype)
- check_symbolic_backward(y, [xa, gam_full], [np.ones(shape), np.ones(gam_full.shape)],
- [g_xa_full, g_gam_full], rtol=rtol, atol=atol, dtype=dtype)
+ for shape in [(3,4), (3,4,4,5)]:
+ for dtype in [np.float16, np.float32, np.float64]:
+ for gam in [np.array([0.1, 0.2, 0.3, 0.4], dtype=dtype)]:
+ gam_full = np.array([gam, gam, gam])
+ xa = np.random.uniform(low=-1.0,high=1.0,size=shape).astype(dtype)
+ rtol = 1e-2
+ atol = 1e-3
+ eps = 1e-4
+ xa[abs(xa) < eps] = 1.0
+ y = mx.symbol.LeakyReLU(data=x, gamma=gamma, act_type='prelu')
+ ya = fprelu(xa, gam)
+ ya_full = fprelu(xa, gam_full)
+ g_xa, g_gam = fprelu_grad(xa, ya, gamma=gam)
+ g_xa_full, g_gam_full = fprelu_grad(xa, ya_full, gamma=gam_full)
+ # Skip numeric check for float16 type to get rid of flaky behavior
+ if dtype is not np.float16:
+ check_numeric_gradient(y, [xa, gam], numeric_eps=eps, rtol=rtol, atol=atol, dtype=dtype)
+ check_numeric_gradient(y, [xa, gam_full], numeric_eps=eps, rtol=rtol, atol=atol, dtype=dtype)
+ check_symbolic_forward(y, [xa, gam], [ya], rtol=rtol, atol=atol, dtype=dtype)
+ check_symbolic_backward(y, [xa, gam], [np.ones(shape), np.ones(gam.shape)], [g_xa, g_gam], rtol=rtol, atol=atol, dtype=dtype)
+ check_symbolic_forward(y, [xa, gam_full], [ya_full], rtol=rtol, atol=atol, dtype=dtype)
+ check_symbolic_backward(y, [xa, gam_full], [np.ones(shape), np.ones(gam_full.shape)],
+ [g_xa_full, g_gam_full], rtol=rtol, atol=atol, dtype=dtype)
 
 @with_seed()
 def test_selu():