From f27403926a647c67361d3bab06535af5d455611b Mon Sep 17 00:00:00 2001 From: TianqiTang <2258792692@qq.com> Date: Thu, 27 Sep 2018 15:15:59 +0800 Subject: [PATCH] fix bug in prelu , issue 12061 (#12660) * fix bug in prelu * add unit test --- src/operator/leaky_relu-inl.h | 2 +- tests/python/unittest/test_operator.py | 67 +++++++++++++++----------- 2 files changed, 41 insertions(+), 28 deletions(-) diff --git a/src/operator/leaky_relu-inl.h b/src/operator/leaky_relu-inl.h index 1d2baa4b6c3f..fe2668959af1 100644 --- a/src/operator/leaky_relu-inl.h +++ b/src/operator/leaky_relu-inl.h @@ -301,7 +301,7 @@ class LeakyReLUOp : public Operator { TShape result(dst.ndim()); int s = src.ndim() - 1; for (int i = dst.ndim() - 1; i >= 0; i--) { - if (s >= 0 && (dst[i] == src[s] || src[s] == 1)) { + if (s >= 0 && i <= 1 && (dst[i] == src[s] || src[s] == 1)) { result[i] = src[s]; s--; } else { diff --git a/tests/python/unittest/test_operator.py b/tests/python/unittest/test_operator.py index a7f484e81b38..b5a7303195f1 100644 --- a/tests/python/unittest/test_operator.py +++ b/tests/python/unittest/test_operator.py @@ -772,48 +772,61 @@ def test_prelu(): def fprelu(x, gamma): pos_indices = x > 0 out = x.copy() - out = np.multiply(out, gamma) + if len(x.shape) == 4: + out = out.transpose(2,3,0,1) + out = np.multiply(out, gamma) + out = out.transpose(2,3,0,1) + else: + out = np.multiply(out, gamma) out[pos_indices] = x[pos_indices] return out def fprelu_grad(x, y, gamma): pos_indices = x > 0 - grad_x = np.multiply(np.ones(x.shape), gamma) + if len(x.shape) == 4: + grad_x = np.multiply(np.ones(x.shape).transpose(2,3,0,1), gamma) + grad_x = grad_x.transpose(2,3,0,1) + else: + grad_x = np.multiply(np.ones(x.shape), gamma) grad_gam = np.zeros(gamma.shape) copy_x = x.copy() copy_x[pos_indices] = 0.0 grad_x[pos_indices] = 1.0 - if len(gamma.shape) > 1: + if len(gamma.shape) > 1 and len(x.shape) != 4: grad_gam = copy_x + elif len(gamma.shape) > 1 and len(x.shape) == 4: + grad_gam = np.sum(copy_x, axis=(2,3)) elif gamma.shape[0] == 1: grad_gam = np.sum(np.sum(copy_x)) - elif gamma.shape[0] > 1: + elif gamma.shape[0] > 1 and len(x.shape) != 4: grad_gam = np.sum(copy_x, axis=0) + elif gamma.shape[0] > 1 and len(x.shape) == 4: + grad_gam = np.sum(copy_x, axis=(0,2,3)) return (grad_x, grad_gam) - shape = (3,4) x = mx.symbol.Variable("x") gamma = mx.symbol.Variable("gamma") - for dtype in [np.float16, np.float32, np.float64]: - for gam in [np.array([0.1, 0.2, 0.3, 0.4], dtype=dtype)]: - gam_full = np.array([gam, gam, gam]) - xa = np.random.uniform(low=-1.0,high=1.0,size=shape).astype(dtype) - rtol = 1e-2 - atol = 1e-3 - eps = 1e-4 - xa[abs(xa) < eps] = 1.0 - y = mx.symbol.LeakyReLU(data=x, gamma=gamma, act_type='prelu') - ya = fprelu(xa, gam) - ya_full = fprelu(xa, gam_full) - g_xa, g_gam = fprelu_grad(xa, ya, gamma=gam) - g_xa_full, g_gam_full = fprelu_grad(xa, ya_full, gamma=gam_full) - # Skip numeric check for float16 type to get rid of flaky behavior - if dtype is not np.float16: - check_numeric_gradient(y, [xa, gam], numeric_eps=eps, rtol=rtol, atol=atol, dtype=dtype) - check_numeric_gradient(y, [xa, gam_full], numeric_eps=eps, rtol=rtol, atol=atol, dtype=dtype) - check_symbolic_forward(y, [xa, gam], [ya], rtol=rtol, atol=atol, dtype=dtype) - check_symbolic_backward(y, [xa, gam], [np.ones(shape), np.ones(gam.shape)], [g_xa, g_gam], rtol=rtol, atol=atol, dtype=dtype) - check_symbolic_forward(y, [xa, gam_full], [ya_full], rtol=rtol, atol=atol, dtype=dtype) - check_symbolic_backward(y, [xa, gam_full], [np.ones(shape), np.ones(gam_full.shape)], - [g_xa_full, g_gam_full], rtol=rtol, atol=atol, dtype=dtype) + for shape in [(3,4), (3,4,4,5)]: + for dtype in [np.float16, np.float32, np.float64]: + for gam in [np.array([0.1, 0.2, 0.3, 0.4], dtype=dtype)]: + gam_full = np.array([gam, gam, gam]) + xa = np.random.uniform(low=-1.0,high=1.0,size=shape).astype(dtype) + rtol = 1e-2 + atol = 1e-3 + eps = 1e-4 + xa[abs(xa) < eps] = 1.0 + y = mx.symbol.LeakyReLU(data=x, gamma=gamma, act_type='prelu') + ya = fprelu(xa, gam) + ya_full = fprelu(xa, gam_full) + g_xa, g_gam = fprelu_grad(xa, ya, gamma=gam) + g_xa_full, g_gam_full = fprelu_grad(xa, ya_full, gamma=gam_full) + # Skip numeric check for float16 type to get rid of flaky behavior + if dtype is not np.float16: + check_numeric_gradient(y, [xa, gam], numeric_eps=eps, rtol=rtol, atol=atol, dtype=dtype) + check_numeric_gradient(y, [xa, gam_full], numeric_eps=eps, rtol=rtol, atol=atol, dtype=dtype) + check_symbolic_forward(y, [xa, gam], [ya], rtol=rtol, atol=atol, dtype=dtype) + check_symbolic_backward(y, [xa, gam], [np.ones(shape), np.ones(gam.shape)], [g_xa, g_gam], rtol=rtol, atol=atol, dtype=dtype) + check_symbolic_forward(y, [xa, gam_full], [ya_full], rtol=rtol, atol=atol, dtype=dtype) + check_symbolic_backward(y, [xa, gam_full], [np.ones(shape), np.ones(gam_full.shape)], + [g_xa_full, g_gam_full], rtol=rtol, atol=atol, dtype=dtype) @with_seed() def test_selu():