2020import paddle
2121import paddle .base .dygraph as dg
2222import paddle .nn .functional as F
23- from paddle import base
23+ from paddle import base , nn
2424
2525
2626def gelu (x , approximate ):
27+ if approximate == "tanh" :
28+ approximate = True
29+ if approximate == "none" :
30+ approximate = False
2731 if approximate :
2832 y_ref = (
2933 0.5
@@ -46,9 +50,14 @@ def _test_case1_cpu(self, approximate):
4650 place = base .CPUPlace ()
4751 with dg .guard (place ) as g :
4852 x_var = paddle .to_tensor (x )
49- y_var = F .gelu (x_var , approximate )
50- y_test = y_var .numpy ()
51- np .testing .assert_allclose (y_ref , y_test , rtol = 1e-05 , atol = 1e-08 )
53+ y_var1 = F .gelu (x_var , approximate )
54+ y_test1 = y_var1 .numpy ()
55+
56+ func = nn .GELU (approximate )
57+ y_var2 = func (x_var )
58+ y_test2 = y_var2 .numpy ()
59+ np .testing .assert_allclose (y_ref , y_test1 , rtol = 1e-05 , atol = 1e-08 )
60+ np .testing .assert_allclose (y_ref , y_test2 , rtol = 1e-05 , atol = 1e-08 )
5261
5362 def _test_case1_gpu (self , approximate ):
5463 x = np .random .uniform (- 1 , 1 , size = (11 , 17 )).astype (np .float32 )
@@ -57,12 +66,17 @@ def _test_case1_gpu(self, approximate):
5766 place = base .CUDAPlace (0 )
5867 with dg .guard (place ) as g :
5968 x_var = paddle .to_tensor (x )
60- y_var = F .gelu (x_var , approximate )
61- y_test = y_var .numpy ()
62- np .testing .assert_allclose (y_ref , y_test , rtol = 1e-05 , atol = 1e-08 )
69+ y_var1 = F .gelu (x_var , approximate )
70+ y_test1 = y_var1 .numpy ()
71+
72+ func = nn .GELU (approximate )
73+ y_var2 = func (x_var )
74+ y_test2 = y_var2 .numpy ()
75+ np .testing .assert_allclose (y_ref , y_test1 , rtol = 1e-05 , atol = 1e-08 )
76+ np .testing .assert_allclose (y_ref , y_test2 , rtol = 1e-05 , atol = 1e-08 )
6377
6478 def test_cases (self ):
65- for approximate in [True , False ]:
79+ for approximate in [True , False , "none" , "tanh" ]:
6680 self ._test_case1_cpu (approximate )
6781 if base .is_compiled_with_cuda ():
6882 self ._test_case1_gpu (approximate )
@@ -86,15 +100,36 @@ def run_gelu_op(approximate):
86100 x_grad = paddle .grad ([y ], [x ], [paddle .to_tensor (y_g_np )])[0 ]
87101 return y .numpy (), x_grad .numpy ()
88102
103+ def run_gelu_class (approximate ):
104+ with dg .guard ():
105+ x = paddle .to_tensor (x_np )
106+ x .stop_gradient = False
107+ func = nn .GELU (approximate = approximate )
108+ y = func (x )
109+ x_grad = paddle .grad ([y ], [x ], [paddle .to_tensor (y_g_np )])[0 ]
110+ return y .numpy (), x_grad .numpy ()
111+
89112 use_fast_math (True )
90- y_fast_math , x_g_fast_math = run_gelu_op (True )
113+ y_fast_math1 , x_g_fast_math1 = run_gelu_op (True )
114+ y_fast_math2 , x_g_fast_math2 = run_gelu_class (True )
91115 use_fast_math (False )
92116
93- y_ref , x_g_ref = run_gelu_op (True )
94- np .testing .assert_allclose (y_ref , y_fast_math , rtol = 1e-05 , atol = 0.0005 )
117+ y_ref1 , x_g_ref1 = run_gelu_op (True )
118+ y_ref2 , x_g_ref2 = run_gelu_class (True )
119+ np .testing .assert_allclose (
120+ y_ref1 , y_fast_math1 , rtol = 1e-05 , atol = 0.0005
121+ )
122+
123+ np .testing .assert_allclose (
124+ x_g_ref1 , x_g_fast_math1 , rtol = 1e-05 , atol = 0.0005
125+ )
126+
127+ np .testing .assert_allclose (
128+ y_ref2 , y_fast_math2 , rtol = 1e-05 , atol = 0.0005
129+ )
95130
96131 np .testing .assert_allclose (
97- x_g_ref , x_g_fast_math , rtol = 1e-05 , atol = 0.0005
132+ x_g_ref2 , x_g_fast_math2 , rtol = 1e-05 , atol = 0.0005
98133 )
99134
100135
@@ -105,38 +140,97 @@ def _test_case1_cpu(self, approximate):
105140
106141 place = base .CPUPlace ()
107142 with dg .guard (place ) as g :
108- x_var = paddle .to_tensor (x )
109- x_var .stop_gradient = False
110- y_var = F .gelu (x_var , approximate )
111- y_test = y_var .numpy ()
143+ x_var1 = paddle .to_tensor (x )
144+ x_var2 = paddle .to_tensor (x )
145+
146+ x_var1 .stop_gradient = False
147+ x_var2 .stop_gradient = False
148+
149+ y_var1 = F .gelu (x_var1 , approximate )
150+ y_test1 = y_var1 .numpy ()
151+
152+ func = nn .GELU (approximate )
153+ y_var2 = func (x_var2 )
154+ y_test2 = y_var2 .numpy ()
112155
113- loss = paddle .sum (y_var )
114- loss .backward ()
115- np .testing .assert_allclose (y_ref , y_test , rtol = 1e-05 , atol = 1e-08 )
116- np .testing .assert_allclose (x_var .grad .shape , x_var .shape )
156+ loss1 = paddle .sum (y_var1 )
157+ loss1 .backward ()
158+
159+ loss2 = paddle .sum (y_var2 )
160+ loss2 .backward ()
161+ np .testing .assert_allclose (y_ref , y_test1 , rtol = 1e-05 , atol = 1e-08 )
162+ np .testing .assert_allclose (x_var1 .grad .shape , x_var1 .shape )
163+
164+ np .testing .assert_allclose (y_ref , y_test2 , rtol = 1e-05 , atol = 1e-08 )
165+ np .testing .assert_allclose (x_var2 .grad .shape , x_var2 .shape )
117166
118167 def _test_case1_gpu (self , approximate ):
119168 x = np .random .uniform (- 1 , 1 , size = (0 , 17 )).astype (np .float32 )
120169 y_ref = gelu (x , approximate )
121170
122171 place = base .CUDAPlace (0 )
123172 with dg .guard (place ) as g :
124- x_var = paddle .to_tensor (x )
125- x_var .stop_gradient = False
126- y_var = F .gelu (x_var , approximate )
127- y_test = y_var .numpy ()
173+ x_var1 = paddle .to_tensor (x )
174+ x_var2 = paddle .to_tensor (x )
175+
176+ x_var1 .stop_gradient = False
177+ x_var2 .stop_gradient = False
178+
179+ y_var1 = F .gelu (x_var1 , approximate )
180+ y_test1 = y_var1 .numpy ()
128181
129- loss = paddle .sum (y_var )
130- loss .backward ()
131- np .testing .assert_allclose (y_ref , y_test , rtol = 1e-05 , atol = 1e-08 )
132- np .testing .assert_allclose (x_var .grad .shape , x_var .shape )
182+ func = nn .GELU (approximate )
183+ y_var2 = func (x_var2 )
184+ y_test2 = y_var2 .numpy ()
185+
186+ loss1 = paddle .sum (y_var1 )
187+ loss1 .backward ()
188+
189+ loss2 = paddle .sum (y_var2 )
190+ loss2 .backward ()
191+ np .testing .assert_allclose (y_ref , y_test1 , rtol = 1e-05 , atol = 1e-08 )
192+ np .testing .assert_allclose (x_var1 .grad .shape , x_var1 .shape )
193+
194+ np .testing .assert_allclose (y_ref , y_test2 , rtol = 1e-05 , atol = 1e-08 )
195+ np .testing .assert_allclose (x_var2 .grad .shape , x_var2 .shape )
133196
134197 def test_cases (self ):
135- for approximate in [True , False ]:
198+ for approximate in [True , False , "none" , "tanh" ]:
136199 self ._test_case1_cpu (approximate )
137200 if base .is_compiled_with_cuda ():
138201 self ._test_case1_gpu (approximate )
139202
140203
204+ class TestGeluError (unittest .TestCase ):
205+
206+ def setUp (self ):
207+ x = np .random .uniform (- 1 , 1 , size = (11 , 17 )).astype (np .float32 )
208+ self .x = paddle .to_tensor (x )
209+
210+ def test_gelu_op_error (self ):
211+
212+ def test_type_error1 ():
213+ y = F .gelu (self .x , "tan" )
214+
215+ def test_type_error2 ():
216+ y = F .gelu (self .x , 1234 )
217+
218+ self .assertRaises (TypeError , test_type_error1 )
219+ self .assertRaises (TypeError , test_type_error2 )
220+
221+ def test_gelu_class_error (self ):
222+
223+ def test_type_error1 ():
224+ func = nn .GELU ("tan" )
225+ y = func (self .x )
226+
227+ def test_type_error2 ():
228+ func = nn .GELU (1234 )
229+ y = func (self .x )
230+
231+ self .assertRaises (TypeError , test_type_error1 )
232+ self .assertRaises (TypeError , test_type_error2 )
233+
234+
141235if __name__ == '__main__' :
142236 unittest .main ()
0 commit comments