diff --git a/python/paddle/fluid/tests/unittests/test_softmax_mask_fuse_upper_triangle_op.py b/python/paddle/fluid/tests/unittests/test_softmax_mask_fuse_upper_triangle_op.py index 1911ec3ef65af..a5f59c6d1f261 100644 --- a/python/paddle/fluid/tests/unittests/test_softmax_mask_fuse_upper_triangle_op.py +++ b/python/paddle/fluid/tests/unittests/test_softmax_mask_fuse_upper_triangle_op.py @@ -57,7 +57,30 @@ def test_check_grad(self): @unittest.skipIf(not core.is_compiled_with_cuda(), "core is not compiled with CUDA") -class TestDropoutBiasFuseOp1(unittest.TestCase): +class TestSoftmaxMaskFuseOp1(OpTest): + def setUp(self): + self.op_type = "softmax_mask_fuse_upper_triangle" + x = np.random.random((1, 1, 32, 32)) + self.inputs = {'X': x} + rst = _get_softmax_upper(x) + self.outputs = {'Out': rst} + + def test_check_output(self): + try: + self.check_output_with_place(core.CPUPlace()) + except NotImplementedError: + pass + + def test_check_grad(self): + try: + self.check_grad_with_place(core.CPUPlace(), ["X"], "Out") + except NotImplementedError: + pass + + +@unittest.skipIf(not core.is_compiled_with_cuda(), + "core is not compiled with CUDA") +class TestDropoutBiasFuseOp2(unittest.TestCase): # test the python side API for softmax_mask_fuse op def setUp(self): np.random.seed(123) diff --git a/python/paddle/incubate/operators/softmax_mask_fuse_upper_triangle.py b/python/paddle/incubate/operators/softmax_mask_fuse_upper_triangle.py index 5ebeadc02fe65..b81ad4ecdc82a 100644 --- a/python/paddle/incubate/operators/softmax_mask_fuse_upper_triangle.py +++ b/python/paddle/incubate/operators/softmax_mask_fuse_upper_triangle.py @@ -24,7 +24,7 @@ def softmax_mask_fuse_upper_triangle(x): Fuse softmax mask together without even give a mask. Under GPT model, the mask is always be a upper triangle so we can simply mask the upper triangle part of x to get the mask result - :param x: the input x + :param x: the input x (rst of QK) :return: the result of softmax mask fuse (upper triangle) """ if in_dygraph_mode():