Skip to content

Commit

Permalink
[Eager] FLAGS_retain_grad_for_all_tensor set false in default (#43142)
Browse files Browse the repository at this point in the history
* [Eager] FLAGS_retain_grad set false

* Add FLAGS_retain_grad_ for some tests

* Add FLAGS_retain_grad_ to some tests

* modified set_flags

* modified set_flags

* fix windows-ci and windows-openblas-ci

* import paddle.fluid
  • Loading branch information
veyron95 authored Jun 2, 2022
1 parent 1e0ea6a commit 4d3b7d7
Show file tree
Hide file tree
Showing 24 changed files with 134 additions and 1 deletion.
2 changes: 1 addition & 1 deletion paddle/fluid/eager/utils.cc
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@
#include "paddle/fluid/framework/phi_utils.h"
#include "paddle/fluid/framework/variable.h"

PADDLE_DEFINE_EXPORTED_bool(retain_grad_for_all_tensor, true,
PADDLE_DEFINE_EXPORTED_bool(retain_grad_for_all_tensor, false,
"retain grad for all tensor");

namespace egr {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
import numpy as np

import paddle
import paddle.fluid as fluid
import paddle.static as static
from paddle.utils.cpp_extension import load, get_build_directory
from paddle.utils.cpp_extension.extension_utils import run_cmd
Expand All @@ -40,6 +41,7 @@


def custom_tanh_double_grad_dynamic(func, device, dtype, np_x):
fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True})
paddle.set_device(device)

t = paddle.to_tensor(np_x, dtype=dtype, stop_gradient=False)
Expand All @@ -55,6 +57,7 @@ def custom_tanh_double_grad_dynamic(func, device, dtype, np_x):
assert out.grad is not None
assert dx[0].grad is not None
return dx[0].numpy(), dx[0].grad.numpy(), out.grad.numpy()
fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": False})


class TestCustomTanhDoubleGradJit(unittest.TestCase):
Expand Down Expand Up @@ -85,9 +88,11 @@ def func_double_grad_dynamic(self):
dout, pd_dout))

def test_func_double_grad_dynamic(self):
fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True})
with _test_eager_guard():
self.func_double_grad_dynamic()
self.func_double_grad_dynamic()
fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": False})


if __name__ == "__main__":
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@

import numpy as np
import paddle
import paddle.fluid as fluid
import paddle.compat as cpt
import paddle.nn.functional as F
from paddle.autograd.functional import _as_tensors
Expand Down Expand Up @@ -490,6 +491,8 @@ def func(x, y):
self.rtol, self.atol)

def func_create_graph_true(self):
fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True})

def func(x):
return paddle.sum(F.sigmoid(x))

Expand All @@ -501,6 +504,7 @@ def func(x):
assert hessian[:].stop_gradient == False
np.testing.assert_allclose(hessian[:].numpy(), numerical_hessian,
self.rtol, self.atol)
fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": False})

def func_out_not_single(self):
def func(x):
Expand Down Expand Up @@ -733,6 +737,8 @@ def func(x):
"does not appear") > 0

def func_create_graph_true(self):
fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True})

def func(x):
return paddle.sum(F.sigmoid(x))

Expand All @@ -745,6 +751,7 @@ def func(x):
self.rtol, self.atol)
triple_grad = paddle.grad(hessian, self.x)
assert triple_grad is not None
fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": False})

def test_all_cases(self):
with _test_eager_guard():
Expand Down Expand Up @@ -1018,6 +1025,8 @@ def func(x, y):
self.atol)

def func_create_graph_true(self):
fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True})

def func(x):
return paddle.sum(F.sigmoid(x))

Expand All @@ -1034,6 +1043,7 @@ def func(x):
self.atol)
triple_grad = paddle.grad(vhp, self.x)
assert triple_grad is not None
fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": False})

def test_all_cases(self):
with _test_eager_guard():
Expand Down Expand Up @@ -1102,6 +1112,8 @@ def func(x, y):
self.atol)

def func_multi_input_and_multi_output(self):
fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True})

def func(x, y):
return paddle.matmul(x, y), x * y

Expand All @@ -1115,6 +1127,7 @@ def func(x, y):
np.testing.assert_allclose(jacobian[i][j].numpy(),
numerical_jacobian[i][j], self.rtol,
self.atol)
fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": False})

def func_allow_unused_false(self):
def func(x, y):
Expand Down
16 changes: 16 additions & 0 deletions python/paddle/fluid/tests/unittests/test_activation_nn_grad.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,8 +67,10 @@ def func(self, place):
x_arr[np.abs(x_arr) < 0.005] = 0.002
gradient_checker.double_grad_check(
[x], y, x_init=x_arr, place=place, eps=eps)
fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True})
gradient_checker.double_grad_check_for_dygraph(
self.sigmoid_wrapper, [x], y, x_init=x_arr, place=place)
fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": False})

def test_grad(self):
paddle.enable_static()
Expand All @@ -95,8 +97,10 @@ def func(self, place):
x_arr[np.abs(x_arr) < 0.005] = 0.002
gradient_checker.triple_grad_check(
[x], y, x_init=x_arr, place=place, eps=eps)
fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True})
gradient_checker.triple_grad_check_for_dygraph(
self.tanh_wrapper, [x], y, x_init=x_arr, place=place)
fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": False})

def test_grad(self):
paddle.enable_static()
Expand All @@ -123,8 +127,10 @@ def func(self, place):
x_arr[np.abs(x_arr) < 0.005] = 0.002
gradient_checker.double_grad_check(
[x], y, x_init=x_arr, place=place, eps=eps)
fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True})
gradient_checker.double_grad_check_for_dygraph(
self.tanh_wrapper, [x], y, x_init=x_arr, place=place)
fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": False})

def test_grad(self):
paddle.enable_static()
Expand All @@ -151,8 +157,10 @@ def func(self, place):
x_arr[np.abs(x_arr) < 0.005] = 0.002
gradient_checker.double_grad_check(
[x], y, x_init=x_arr, place=place, eps=eps)
fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True})
gradient_checker.double_grad_check_for_dygraph(
self.abs_wrapper, [x], y, x_init=x_arr, place=place)
fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": False})

def test_grad(self):
paddle.enable_static()
Expand Down Expand Up @@ -240,8 +248,10 @@ def func(self, place):
x_arr = np.random.uniform(-1, 1, shape).astype(dtype)
gradient_checker.double_grad_check(
[x], y, x_init=x_arr, place=place, eps=eps)
fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True})
gradient_checker.double_grad_check_for_dygraph(
self.elu_wrapper, [x], y, x_init=x_arr, place=place)
fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": False})

def test_grad(self):
paddle.enable_static()
Expand Down Expand Up @@ -272,8 +282,10 @@ def func(self, place):
x_arr = np.random.uniform(-1, 1, shape).astype(dtype)
gradient_checker.double_grad_check(
[x], y, x_init=x_arr, place=place, eps=eps)
fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True})
gradient_checker.double_grad_check_for_dygraph(
self.celu_wrapper, [x], y, x_init=x_arr, place=place)
fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": False})

def test_grad(self):
paddle.enable_static()
Expand Down Expand Up @@ -362,8 +374,10 @@ def func(self, place):

gradient_checker.double_grad_check(
[x], y, x_init=x_arr, place=place, eps=eps)
fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True})
gradient_checker.double_grad_check_for_dygraph(
self.square_wrapper, [x], y, x_init=x_arr, place=place)
fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": False})

def test_grad(self):
paddle.enable_static()
Expand Down Expand Up @@ -421,8 +435,10 @@ def func(self, place):

gradient_checker.double_grad_check(
[x], y, x_init=x_arr, place=place, eps=eps)
fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True})
gradient_checker.double_grad_check_for_dygraph(
self.log_wrapper, [x], y, x_init=x_arr, place=place)
fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": False})

def test_grad(self):
paddle.enable_static()
Expand Down
14 changes: 14 additions & 0 deletions python/paddle/fluid/tests/unittests/test_assign_op.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,10 +34,14 @@ def setUp(self):
self.outputs = {'Out': x}

def test_forward(self):
fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True})
self.check_output(check_eager=True)
fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": False})

def test_backward(self):
fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True})
self.check_grad(['X'], 'Out', check_eager=True)
fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": False})


class TestAssignFP16Op(op_test.OpTest):
Expand All @@ -49,14 +53,19 @@ def setUp(self):
self.outputs = {'Out': x}

def test_forward(self):
fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True})
self.check_output(check_eager=True)
fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": False})

def test_backward(self):
fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True})
self.check_grad(['X'], 'Out', check_eager=True)
fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": False})


class TestAssignOpWithLoDTensorArray(unittest.TestCase):
def test_assign_LoDTensorArray(self):
fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True})
main_program = Program()
startup_program = Program()
with program_guard(main_program):
Expand All @@ -71,6 +80,7 @@ def test_assign_LoDTensorArray(self):
sums = fluid.layers.array_read(array=init_array, i=i)
mean = fluid.layers.mean(sums)
append_backward(mean)
fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": False})

place = fluid.CUDAPlace(0) if core.is_compiled_with_cuda(
) else fluid.CPUPlace()
Expand Down Expand Up @@ -173,6 +183,7 @@ def test_assign_BasicTypes(self):

def test_clone(self):
paddle.disable_static()
fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True})
self.python_api = paddle.clone

x = paddle.ones([2])
Expand All @@ -185,6 +196,7 @@ def test_clone(self):
self.assertTrue(np.array_equal(x, [1, 1]), True)
self.assertTrue(np.array_equal(clone_x.grad.numpy(), [3, 3]), True)
self.assertTrue(np.array_equal(x.grad.numpy(), [3, 3]), True)
fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": False})
paddle.enable_static()

with program_guard(Program(), Program()):
Expand All @@ -201,6 +213,7 @@ def test_clone(self):

class TestAssignOpErrorApi(unittest.TestCase):
def test_errors(self):
fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True})
with program_guard(Program(), Program()):
# The type of input must be Variable or numpy.ndarray.
x1 = fluid.create_lod_tensor(
Expand All @@ -209,6 +222,7 @@ def test_errors(self):
# When the type of input is numpy.ndarray, the dtype of input must be float32, int32.
x2 = np.array([[2.5, 2.5]], dtype='uint8')
self.assertRaises(TypeError, paddle.assign, x2)
fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": False})

def test_type_error(self):
paddle.enable_static()
Expand Down
2 changes: 2 additions & 0 deletions python/paddle/fluid/tests/unittests/test_detach.py
Original file line number Diff line number Diff line change
Expand Up @@ -139,10 +139,12 @@ def detach_multi(self):
return x.gradient()

def test_NoDetachMulti_DetachMulti(self):
fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True})
array_no_detach_multi = self.no_detach_multi()
array_detach_multi = self.detach_multi()

assert not np.array_equal(array_no_detach_multi, array_detach_multi)
fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": False})

def test_NoDetachSingle_DetachMulti(self):
array_no_detach_single = self.no_detach_single()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -346,11 +346,13 @@ def func(self, place):

gradient_checker.triple_grad_check(
[x, y], out, x_init=[x_arr, y_arr], place=place, eps=eps)
fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True})
gradient_checker.triple_grad_check_for_dygraph(
self.multiply_wrapper, [x, y],
out,
x_init=[x_arr, y_arr],
place=place)
fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": False})

def test_grad(self):
paddle.enable_static()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -185,6 +185,7 @@ def setUp(self):
print(self.grad_res, self.grad_x, self.grad_y)

def test_grad(self):
fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True})
places = [fluid.CPUPlace()]
if fluid.is_compiled_with_cuda():
places.append(fluid.CUDAPlace(0))
Expand All @@ -200,6 +201,7 @@ def test_grad(self):
self.assertTrue(np.array_equal(res.gradient(), self.grad_res))
self.assertTrue(np.array_equal(x.gradient(), self.grad_x))
self.assertTrue(np.array_equal(y.gradient(), self.grad_y))
fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": False})


if __name__ == '__main__':
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -195,9 +195,11 @@ def func_auto_prune3(self):
self.assertTrue((part2.gradient() == 0).all())

def test_auto_prune3(self):
fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True})
with _test_eager_guard():
self.func_auto_prune3()
self.func_auto_prune3()
fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": False})

def func_auto_prune4(self):
with fluid.dygraph.guard():
Expand All @@ -212,9 +214,11 @@ def func_auto_prune4(self):
self.assertTrue((part2.gradient() == 1).all())

def test_auto_prune4(self):
fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True})
with _test_eager_guard():
self.func_auto_prune4()
self.func_auto_prune4()
fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": False})

def func_auto_prune5(self):
with fluid.dygraph.guard():
Expand All @@ -229,9 +233,11 @@ def func_auto_prune5(self):
self.assertTrue((part2.gradient() == 0).all())

def test_auto_prune5(self):
fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True})
with _test_eager_guard():
self.func_auto_prune5()
self.func_auto_prune5()
fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": False})

def func_auto_prune6(self):
with fluid.dygraph.guard():
Expand Down
2 changes: 2 additions & 0 deletions python/paddle/fluid/tests/unittests/test_imperative_basic.py
Original file line number Diff line number Diff line change
Expand Up @@ -479,9 +479,11 @@ def func_layer_in_out(self):
self.assertTrue(np.array_equal(dy_grad2, static_grad))

def test_layer_in_out(self):
fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True})
with _test_eager_guard():
self.func_layer_in_out()
self.func_layer_in_out()
fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": False})

def func_mlp(self):
np_inp = np.array([[1.0, 2.0], [3.0, 4.0]], dtype=np.float32)
Expand Down
Loading

0 comments on commit 4d3b7d7

Please sign in to comment.