From 8b1107722172cedcd07fdbe379e0f51256cab4ed Mon Sep 17 00:00:00 2001
From: liuhui29 <liuhui29@baidu.com>
Date: Tue, 20 Oct 2020 11:40:20 +0800
Subject: [PATCH 1/5] rename count_include_pad-->exclusive 
 return_indices-->return_mask

---
 .../unittests/test_adaptive_max_pool2d.py     |   2 +-
 .../fluid/tests/unittests/test_pool1d_api.py  |   6 +-
 .../fluid/tests/unittests/test_pool2d_api.py  |  10 +-
 .../fluid/tests/unittests/test_pool3d_api.py  |  10 +-
 python/paddle/nn/functional/pooling.py        | 110 +++++++++---------
 python/paddle/nn/layer/pooling.py             |  98 ++++++++--------
 6 files changed, 118 insertions(+), 118 deletions(-)

diff --git a/python/paddle/fluid/tests/unittests/test_adaptive_max_pool2d.py b/python/paddle/fluid/tests/unittests/test_adaptive_max_pool2d.py
index 14de5aa53a5f5..920ca82787b0b 100644
--- a/python/paddle/fluid/tests/unittests/test_adaptive_max_pool2d.py
+++ b/python/paddle/fluid/tests/unittests/test_adaptive_max_pool2d.py
@@ -150,7 +150,7 @@ def test_dynamic_graph(self):
             x = paddle.to_tensor(self.x_np)
 
             out_1 = paddle.nn.functional.adaptive_max_pool2d(
-                x=x, return_indices=False, output_size=[3, 3])
+                x=x, return_mask=False, output_size=[3, 3])
 
             out_2 = paddle.nn.functional.adaptive_max_pool2d(x=x, output_size=5)
 
diff --git a/python/paddle/fluid/tests/unittests/test_pool1d_api.py b/python/paddle/fluid/tests/unittests/test_pool1d_api.py
index c1169dfc5210a..2241af907eca9 100644
--- a/python/paddle/fluid/tests/unittests/test_pool1d_api.py
+++ b/python/paddle/fluid/tests/unittests/test_pool1d_api.py
@@ -152,7 +152,7 @@ def check_avg_dygraph_padding_results(self, place):
                 kernel_size=2,
                 stride=2,
                 padding=[1],
-                count_include_pad=True)
+                exclusive=True)
 
             result_np = avg_pool1D_forward_naive(
                 input_np, ksize=[2], strides=[2], paddings=[1], exclusive=False)
@@ -160,7 +160,7 @@ def check_avg_dygraph_padding_results(self, place):
             self.assertTrue(np.allclose(result.numpy(), result_np))
 
             avg_pool1d_dg = paddle.nn.AvgPool1d(
-                kernel_size=2, stride=None, padding=1, count_include_pad=True)
+                kernel_size=2, stride=None, padding=1, exclusive=True)
             result = avg_pool1d_dg(input)
             self.assertTrue(np.allclose(result.numpy(), result_np))
 
@@ -200,7 +200,7 @@ def check_max_dygraph_return_index_results(self, place):
             input_np = np.random.random([2, 3, 32]).astype("float32")
             input = fluid.dygraph.to_variable(input_np)
             result, index = F.max_pool1d(
-                input, kernel_size=2, stride=2, padding=0, return_indices=True)
+                input, kernel_size=2, stride=2, padding=0, return_mask=True)
 
             result_np = max_pool1D_forward_naive(
                 input_np, ksize=[2], strides=[2], paddings=[0])
diff --git a/python/paddle/fluid/tests/unittests/test_pool2d_api.py b/python/paddle/fluid/tests/unittests/test_pool2d_api.py
index 91faf78418b0d..8a8edfade9cb2 100644
--- a/python/paddle/fluid/tests/unittests/test_pool2d_api.py
+++ b/python/paddle/fluid/tests/unittests/test_pool2d_api.py
@@ -134,7 +134,7 @@ def check_max_dygraph_results(self, place):
             input_np = np.random.random([2, 3, 32, 32]).astype("float32")
             input = fluid.dygraph.to_variable(input_np)
             result = max_pool2d(
-                input, kernel_size=2, stride=2, padding=0, return_indices=False)
+                input, kernel_size=2, stride=2, padding=0, return_mask=False)
 
             result_np = pool2D_forward_naive(
                 input_np,
@@ -159,7 +159,7 @@ def check_max_dygraph_nhwc_results(self, place):
                 kernel_size=2,
                 stride=2,
                 padding=0,
-                return_indices=False,
+                return_mask=False,
                 data_format="NHWC")
 
             result_np = pool2D_forward_naive(
@@ -222,7 +222,7 @@ def check_max_dygraph_stride_is_none(self, place):
                 kernel_size=2,
                 stride=None,
                 padding="SAME",
-                return_indices=True)
+                return_mask=True)
 
             result_np = pool2D_forward_naive(
                 input_np,
@@ -269,7 +269,7 @@ def check_max_dygraph_padding(self, place):
                 kernel_size=2,
                 stride=2,
                 padding=padding,
-                return_indices=False)
+                return_mask=False)
 
             result_np = pool2D_forward_naive(
                 input_np,
@@ -490,7 +490,7 @@ def run9():
                     padding=0,
                     ceil_mode=False,
                     data_format='NHWC',
-                    return_indices=True)
+                    return_mask=True)
 
         self.assertRaises(ValueError, run9)
 
diff --git a/python/paddle/fluid/tests/unittests/test_pool3d_api.py b/python/paddle/fluid/tests/unittests/test_pool3d_api.py
index 505a1c7383841..7941028b3d84c 100644
--- a/python/paddle/fluid/tests/unittests/test_pool3d_api.py
+++ b/python/paddle/fluid/tests/unittests/test_pool3d_api.py
@@ -83,7 +83,7 @@ def check_avg_dygraph_padding_results(self, place):
                 stride=2,
                 padding=1,
                 ceil_mode=False,
-                count_include_pad=True)
+                exclusive=True)
 
             result_np = avg_pool3D_forward_naive(
                 input_np,
@@ -100,7 +100,7 @@ def check_avg_dygraph_padding_results(self, place):
                 stride=None,
                 padding=1,
                 ceil_mode=False,
-                count_include_pad=True)
+                exclusive=True)
             result = avg_pool3d_dg(input)
             self.assertTrue(np.allclose(result.numpy(), result_np))
 
@@ -175,7 +175,7 @@ def check_max_dygraph_ndhwc_results(self, place):
                 stride=2,
                 padding=0,
                 data_format="NDHWC",
-                return_indices=False)
+                return_mask=False)
 
             result_np = pool3D_forward_naive(
                 input_np,
@@ -239,7 +239,7 @@ def check_max_dygraph_stride_is_none(self, place):
                 kernel_size=2,
                 stride=None,
                 padding="SAME",
-                return_indices=True)
+                return_mask=True)
 
             result_np = pool3D_forward_naive(
                 input_np,
@@ -467,7 +467,7 @@ def run10():
                     stride=2,
                     padding=0,
                     data_format='NDHWC',
-                    return_indices=True)
+                    return_mask=True)
 
         self.assertRaises(ValueError, run10)
 
diff --git a/python/paddle/nn/functional/pooling.py b/python/paddle/nn/functional/pooling.py
index 73652ff1266f5..d7d93f4b3bf7e 100755
--- a/python/paddle/nn/functional/pooling.py
+++ b/python/paddle/nn/functional/pooling.py
@@ -157,7 +157,7 @@ def avg_pool1d(x,
                kernel_size,
                stride=None,
                padding=0,
-               count_include_pad=True,
+               exclusive=True,
                ceil_mode=False,
                name=None):
     """
@@ -179,7 +179,7 @@ def avg_pool1d(x,
             4. A list[int] or tuple(int) whose length is 2. It has the form [pad_before, pad_after].
             5. A list or tuple of pairs of integers. It has the form [[pad_before, pad_after], [pad_before, pad_after], ...]. Note that, the batch dimension and channel dimension should be [0,0] or (0,0).
             The default value is 0.
-        count_include_pad (bool): Whether to exclude padding points in average pooling
+        exclusive (bool): Whether to exclude padding points in average pooling
                           mode, default is `True`.
         ceil_mode (bool): ${ceil_mode_comment}Whether to use the ceil function to calculate output height and width.
             If it is set to False, the floor function will be used. The default value is False.
@@ -230,7 +230,7 @@ def avg_pool1d(x,
             x, 'pooling_type', 'avg', 'ksize', kernel_size, 'global_pooling',
             False, 'strides', stride, 'paddings', padding, 'padding_algorithm',
             padding_algorithm, 'use_cudnn', True, 'ceil_mode', ceil_mode,
-            'use_mkldnn', False, 'exclusive', not count_include_pad,
+            'use_mkldnn', False, 'exclusive', not exclusive,
             'data_format', data_format)
         return squeeze(output, [2])
 
@@ -253,7 +253,7 @@ def avg_pool1d(x,
             "use_cudnn": True,
             "ceil_mode": ceil_mode,
             "use_mkldnn": False,
-            "exclusive": not count_include_pad,
+            "exclusive": not exclusive,
             "data_format": data_format,
         })
 
@@ -265,7 +265,7 @@ def avg_pool2d(x,
                stride=None,
                padding=0,
                ceil_mode=False,
-               count_include_pad=True,
+               exclusive=True,
                divisor_override=None,
                data_format="NCHW",
                name=None):
@@ -294,7 +294,7 @@ def avg_pool2d(x,
             5. A list or tuple of pairs of integers. It has the form [[pad_before, pad_after], [pad_before, pad_after], ...]. Note that, the batch dimension and channel dimension should be [0,0] or (0,0).
             The default value is 0.
         ceil_mode (bool): when True, will use `ceil` instead of `floor` to compute the output shape
-        count_include_pad (bool): Whether to exclude padding points in average pooling
+        exclusive (bool): Whether to exclude padding points in average pooling
                           mode, default is `true`.
         divisor_override (float): if specified, it will be used as divisor, otherwise kernel_size will be used. Default None.
         data_format (string): The data format of the input and output data. An optional string from: `"NCHW"`, `"NHWC"`.
@@ -338,7 +338,7 @@ def avg_pool2d(x,
             x, 'pooling_type', 'avg', 'ksize', kernel_size, 'global_pooling',
             False, 'padding_algorithm', padding_algorithm, 'strides', stride,
             'paddings', padding, 'use_cudnn', True, 'ceil_mode', ceil_mode,
-            'use_mkldnn', False, 'exclusive', not count_include_pad,
+            'use_mkldnn', False, 'exclusive', not exclusive,
             'data_format', data_format)
         if divisor_override is None:
             return output
@@ -365,7 +365,7 @@ def avg_pool2d(x,
             "use_cudnn": True,
             "ceil_mode": ceil_mode,
             "use_mkldnn": False,
-            "exclusive": not count_include_pad,
+            "exclusive": not exclusive,
             "data_format": data_format,
         })
 
@@ -381,7 +381,7 @@ def avg_pool3d(x,
                stride=None,
                padding=0,
                ceil_mode=False,
-               count_include_pad=True,
+               exclusive=True,
                divisor_override=None,
                data_format="NCDHW",
                name=None):
@@ -408,7 +408,7 @@ def avg_pool3d(x,
             5. A list or tuple of pairs of integers. It has the form [[pad_before, pad_after], [pad_before, pad_after], ...]. Note that, the batch dimension and channel dimension should be [0,0] or (0,0).
             The default value is 0.
         ceil_mode (bool): ${ceil_mode_comment}
-        count_include_pad (bool): Whether to exclude padding points in average pooling
+        exclusive (bool): Whether to exclude padding points in average pooling
                           mode, default is True.
         divisor_override (int|float) if specified, it will be used as divisor, otherwise kernel_size will be used. Default None.
         data_format (string): The data format of the input and output data. An optional string from: `"NCDHW"`, `"NDHWC"`.
@@ -452,7 +452,7 @@ def avg_pool3d(x,
             x, 'pooling_type', 'avg', 'ksize', kernel_size, 'strides', stride,
             'paddings', padding, 'global_pooling', False, 'padding_algorithm',
             padding_algorithm, 'use_cudnn', True, 'ceil_mode', ceil_mode,
-            'use_mkldnn', False, 'exclusive', not count_include_pad,
+            'use_mkldnn', False, 'exclusive', not exclusive,
             'data_format', data_format)
         if divisor_override is None:
             return output
@@ -481,7 +481,7 @@ def avg_pool3d(x,
             "use_cudnn": True,
             "ceil_mode": ceil_mode,
             "use_mkldnn": False,
-            "exclusive": not count_include_pad,
+            "exclusive": not exclusive,
             "data_format": data_format,
         })
 
@@ -497,7 +497,7 @@ def max_pool1d(x,
                kernel_size,
                stride=None,
                padding=0,
-               return_indices=False,
+               return_mask=False,
                ceil_mode=False,
                name=None):
     """
@@ -519,7 +519,7 @@ def max_pool1d(x,
             4. A list[int] or tuple(int) whose length is 2. It has the form [pad_before, pad_after].
             5. A list or tuple of pairs of integers. It has the form [[pad_before, pad_after], [pad_before, pad_after], ...]. Note that, the batch dimension and channel dimension should be [0,0] or (0,0).
             The default value is 0.
-        return_indices (bool): Whether return the max indices along with the outputs. default is `False`.
+        return_mask (bool): Whether return the max indices along with the outputs. default is `False`.
         ceil_mode (bool): Whether to use the ceil function to calculate output height and width. False is the default.
             If it is set to False, the floor function will be used. Default False.
         name(str, optional): For detailed information, please refer
@@ -542,7 +542,7 @@ def max_pool1d(x,
           data = paddle.to_tensor(np.random.uniform(-1, 1, [1, 3, 32]).astype(np.float32))
           pool_out = F.max_pool1d(data, kernel_size=2, stride=2, padding=0)
           # pool_out shape: [1, 3, 16]
-          pool_out, indices = F.max_pool1d(data, kernel_size=2, stride=2, padding=0, return_indices=True)
+          pool_out, indices = F.max_pool1d(data, kernel_size=2, stride=2, padding=0, return_mask=True)
           # pool_out shape: [1, 3, 16],  indices shape: [1, 3, 16]
     """
     """NCL to NCHW"""
@@ -563,7 +563,7 @@ def max_pool1d(x,
     padding = _expand_low_nd_padding(padding)
 
     if in_dygraph_mode():
-        if return_indices:
+        if return_mask:
             pool_out = core.ops.max_pool2d_with_index(
                 x, 'ksize', kernel_size, 'global_pooling', False, 'strides',
                 stride, 'paddings', padding, 'padding_algorithm',
@@ -572,7 +572,7 @@ def max_pool1d(x,
                 data_format)
             return (squeeze(pool_out[0], [2]), squeeze(
                 pool_out[1],
-                [2])) if return_indices else squeeze(pool_out[0], [2])
+                [2])) if return_mask else squeeze(pool_out[0], [2])
         else:
             pool_out = core.ops.pool2d(
                 x, 'pooling_type', 'max', 'ksize', kernel_size,
@@ -582,7 +582,7 @@ def max_pool1d(x,
                 'data_format', data_format)
             return squeeze(pool_out, [2])
 
-    op_type = 'max_pool2d_with_index' if return_indices else "pool2d"
+    op_type = 'max_pool2d_with_index' if return_mask else "pool2d"
     helper = LayerHelper(op_type, **locals())
     dtype = helper.input_dtype()
     pool_out = helper.create_variable_for_type_inference(dtype)
@@ -608,14 +608,14 @@ def max_pool1d(x,
         })
 
     return (squeeze(pool_out, [2]),
-            squeeze(mask, [2])) if return_indices else squeeze(pool_out, [2])
+            squeeze(mask, [2])) if return_mask else squeeze(pool_out, [2])
 
 
 def max_pool2d(x,
                kernel_size,
                stride=None,
                padding=0,
-               return_indices=False,
+               return_mask=False,
                ceil_mode=False,
                data_format="NCHW",
                name=None):
@@ -643,7 +643,7 @@ def max_pool2d(x,
             5. A list or tuple of pairs of integers. It has the form [[pad_before, pad_after], [pad_before, pad_after], ...]. Note that, the batch dimension and channel dimension should be [0,0] or (0,0).
             The default value is 0.
         ceil_mode (bool): when True, will use `ceil` instead of `floor` to compute the output shape
-        return_indices (bool): Whether to return the max indices along with the outputs. Default False, only support `"NCHW"` data format
+        return_mask (bool): Whether to return the max indices along with the outputs. Default False, only support `"NCHW"` data format
         data_format (string): The data format of the input and output data. An optional string from: `"NCHW"`, `"NHWC"`.
                         The default is `"NCHW"`. When it is `"NCHW"`, the data is stored in the order of:
                         `[batch_size, input_channels, input_height, input_width]`.
@@ -668,12 +668,12 @@ def max_pool2d(x,
                                 kernel_size=2,
                                 stride=2, padding=0)
           # output.shape [1, 3, 16, 16]
-          # for return_indices=True
+          # for return_mask=True
           out, max_indices = F.max_pool2d(x,
                                              kernel_size=2,
                                              stride=2,
                                              padding=0,
-                                             return_indices=True)
+                                             return_mask=True)
           # out.shape [1, 3, 16, 16], max_indices.shape [1, 3, 16, 16],
     """
     check_variable_and_dtype(x, 'x', ['float32', 'float64'], 'max_pool2d')
@@ -693,20 +693,20 @@ def max_pool2d(x,
     padding, padding_algorithm = _update_padding_nd(
         padding, num_dims=2, channel_last=channel_last, ceil_mode=ceil_mode)
 
-    if data_format == "NHWC" and return_indices:
+    if data_format == "NHWC" and return_mask:
         raise ValueError(
-            "When setting return_indices to true, data_format must be set to NCHW in API:max_pool2d"
+            "When setting return_mask to true, data_format must be set to NCHW in API:max_pool2d"
         )
 
     if in_dygraph_mode():
-        if return_indices:
+        if return_mask:
             output = core.ops.max_pool2d_with_index(
                 x, 'ksize', kernel_size, 'global_pooling', False, 'strides',
                 stride, 'paddings', padding, 'padding_algorithm',
                 padding_algorithm, 'use_cudnn', True, 'ceil_mode', ceil_mode,
                 'use_mkldnn', False, 'exclusive', True, 'data_format',
                 data_format)
-            return output if return_indices else output[0]
+            return output if return_mask else output[0]
         else:
             output = core.ops.pool2d(
                 x, 'pooling_type', 'max', 'ksize', kernel_size,
@@ -716,7 +716,7 @@ def max_pool2d(x,
                 'data_format', data_format)
             return output
 
-    op_type = 'max_pool2d_with_index' if return_indices else "pool2d"
+    op_type = 'max_pool2d_with_index' if return_mask else "pool2d"
     helper = LayerHelper(op_type, **locals())
     dtype = helper.input_dtype()
     pool_out = helper.create_variable_for_type_inference(dtype)
@@ -741,14 +741,14 @@ def max_pool2d(x,
             "data_format": data_format,
         })
 
-    return (pool_out, mask) if return_indices else pool_out
+    return (pool_out, mask) if return_mask else pool_out
 
 
 def max_pool3d(x,
                kernel_size,
                stride=None,
                padding=0,
-               return_indices=False,
+               return_mask=False,
                ceil_mode=False,
                data_format="NCDHW",
                name=None):
@@ -773,7 +773,7 @@ def max_pool3d(x,
             5. A list or tuple of pairs of integers. It has the form [[pad_before, pad_after], [pad_before, pad_after], ...]. Note that, the batch dimension and channel dimension should be [0,0] or (0,0).
             The default value is 0.
         ceil_mode (bool): ${ceil_mode_comment}
-        return_indices (bool): Whether to return the max indices along with the outputs. Default False. Only support "NDCHW" data_format.
+        return_mask (bool): Whether to return the max indices along with the outputs. Default False. Only support "NDCHW" data_format.
         data_format (string): The data format of the input and output data. An optional string from: `"NCDHW"`, `"NDHWC"`.
                         The default is `"NCDHW"`. When it is `"NCDHW"`, the data is stored in the order of:
                         `[batch_size, input_channels, input_depth, input_height, input_width]`.
@@ -798,13 +798,13 @@ def max_pool3d(x,
                                 kernel_size=2,
                                 stride=2, padding=0)
           output.shape [1, 3, 16, 16, 16]
-          # for return_indices=True
+          # for return_mask=True
           x = paddle.to_tensor(np.random.uniform(-1, 1, [1, 3, 32, 32, 32]).astype(np.float32))
           output, max_indices = paddle.nn.functional.max_pool3d(x,
                                         kernel_size = 2,
                                         stride = 2,
                                         padding=0,
-                                        return_indices=True)
+                                        return_mask=True)
           # output.shape [None, 3, 16, 16, 16], max_indices.shape [None, 3, 16, 16, 16],
     """
     check_variable_and_dtype(x, 'x', ['float32', 'float64'], 'max_pool3d')
@@ -819,20 +819,20 @@ def max_pool3d(x,
     padding, padding_algorithm = _update_padding_nd(
         padding, 3, channel_last=channel_last, ceil_mode=ceil_mode)
 
-    if data_format == "NDHWC" and return_indices:
+    if data_format == "NDHWC" and return_mask:
         raise ValueError(
-            "When setting return_indices to true, data_format must be set to NCDHW in API:max_pool3d"
+            "When setting return_mask to true, data_format must be set to NCDHW in API:max_pool3d"
         )
 
     if in_dygraph_mode():
-        if return_indices:
+        if return_mask:
             output = core.ops.max_pool3d_with_index(
                 x, 'pooling_type', 'max', 'ksize', kernel_size, 'strides',
                 stride, 'paddings', padding, 'global_pooling', False,
                 'padding_algorithm', padding_algorithm, 'use_cudnn', True,
                 'ceil_mode', ceil_mode, 'use_mkldnn', False, 'exclusive', True,
                 'data_format', data_format)
-            return output if return_indices else output[0]
+            return output if return_mask else output[0]
         else:
             output = core.ops.pool3d(
                 x, 'pooling_type', 'max', 'ksize', kernel_size,
@@ -842,7 +842,7 @@ def max_pool3d(x,
                 'data_format', data_format)
             return output
 
-    op_type = "max_pool3d_with_index" if return_indices else "pool3d"
+    op_type = "max_pool3d_with_index" if return_mask else "pool3d"
     helper = LayerHelper(op_type, **locals())
     dtype = helper.input_dtype()
     pool_out = helper.create_variable_for_type_inference(dtype)
@@ -867,7 +867,7 @@ def max_pool3d(x,
             "data_format": data_format,
         })
 
-    return (pool_out, mask) if return_indices else pool_out
+    return (pool_out, mask) if return_mask else pool_out
 
 
 def adaptive_avg_pool1d(x, output_size, name=None):
@@ -1148,7 +1148,7 @@ def adaptive_avg_pool3d(x, output_size, data_format='NCDHW', name=None):
     return pool_out
 
 
-def adaptive_max_pool1d(x, output_size, return_indices=False, name=None):
+def adaptive_max_pool1d(x, output_size, return_mask=False, name=None):
     """
     This API implements adaptive max pooling 1d operation.
     See more details in :ref:`api_nn_pooling_AdaptiveMaxPool1d` .
@@ -1159,7 +1159,7 @@ def adaptive_max_pool1d(x, output_size, return_indices=False, name=None):
                               where N is batch size, C is the number of channels, L is the
                               length of the feature. The data type is float32 or float64.
         output_size (int): The pool kernel size. The value should be an integer.
-        return_indices (bool): If true, the index of max pooling point will be returned along
+        return_mask (bool): If true, the index of max pooling point will be returned along
                 with outputs. It cannot be set in average pooling type. Default False.
         name(str, optional): For detailed information, please refer
                                  to :ref:`api_guide_Name`. Usually name is no need to set and
@@ -1190,7 +1190,7 @@ def adaptive_max_pool1d(x, output_size, return_indices=False, name=None):
               data = paddle.to_tensor(np.random.uniform(-1, 1, [1, 3, 32]).astype(np.float32))
               pool_out = F.adaptive_max_pool1d(data, output_size=16)
               # pool_out shape: [1, 3, 16])
-              pool_out, indices = F.adaptive_max_pool1d(data, output_size=16, return_indices=True)
+              pool_out, indices = F.adaptive_max_pool1d(data, output_size=16, return_mask=True)
               # pool_out shape: [1, 3, 16] indices  shape: [1, 3, 16]
     """
     pool_type = 'max'
@@ -1198,7 +1198,7 @@ def adaptive_max_pool1d(x, output_size, return_indices=False, name=None):
                              'adaptive_max_pool1d')
     _check_input(x, 3)
     check_type(output_size, 'pool_size', int, 'adaptive_max_pool1d')
-    check_type(return_indices, 'return_indices', bool, 'adaptive_max_pool1d')
+    check_type(return_mask, 'return_mask', bool, 'adaptive_max_pool1d')
 
     pool_size = [1] + utils.convert_to_list(output_size, 1, 'pool_size')
 
@@ -1209,7 +1209,7 @@ def adaptive_max_pool1d(x, output_size, return_indices=False, name=None):
         pool_out = core.ops.max_pool2d_with_index(
             x, 'pooling_type', pool_type, 'ksize', pool_size, 'adaptive', True)
         return (squeeze(pool_out[0], [2]), squeeze(
-            pool_out[1], [2])) if return_indices else squeeze(pool_out[0], [2])
+            pool_out[1], [2])) if return_mask else squeeze(pool_out[0], [2])
 
     helper = LayerHelper(l_type, **locals())
     dtype = helper.input_dtype()
@@ -1229,10 +1229,10 @@ def adaptive_max_pool1d(x, output_size, return_indices=False, name=None):
         })
 
     return (squeeze(pool_out, [2]),
-            squeeze(mask, [2])) if return_indices else squeeze(pool_out, [2])
+            squeeze(mask, [2])) if return_mask else squeeze(pool_out, [2])
 
 
-def adaptive_max_pool2d(x, output_size, return_indices=False, name=None):
+def adaptive_max_pool2d(x, output_size, return_mask=False, name=None):
     """
         This operation applies a 2D adaptive max pooling on input tensor.
         See more details in :ref:`api_nn_pooling_AdaptiveMaxPool2d` .
@@ -1240,7 +1240,7 @@ def adaptive_max_pool2d(x, output_size, return_indices=False, name=None):
         Args:
             x (Tensor): The input tensor of adaptive max pool2d operator, which is a 4-D tensor. The data type can be float16, float32, float64, int32 or int64.
             output_size (int|list|tuple): The pool kernel size. If pool kernel size is a tuple or list, it must contain two elements, (H, W). H and W can be either a int, or None which means the size will be the same as that of the input.
-            return_indices (bool): If true, the index of max pooling point will be returned along with outputs. Default False.
+            return_mask (bool): If true, the index of max pooling point will be returned along with outputs. Default False.
             name(str, optional): For detailed information, please refer to :ref:`api_guide_Name`. Usually name is no need to set and None by default.
 
         Returns:
@@ -1280,7 +1280,7 @@ def adaptive_max_pool2d(x, output_size, return_indices=False, name=None):
                                  'adaptive_max_pool2d')
     _check_input(x, 4)
     #check_type(output_size, 'pool_size', (int), 'adaptive_max_pool2d')
-    check_type(return_indices, 'return_indices', bool, 'adaptive_max_pool2d')
+    check_type(return_mask, 'return_mask', bool, 'adaptive_max_pool2d')
 
     in_h, in_w = x.shape[2:4]
     if isinstance(output_size, int):
@@ -1295,7 +1295,7 @@ def adaptive_max_pool2d(x, output_size, return_indices=False, name=None):
     if in_dygraph_mode():
         pool_out = core.ops.max_pool2d_with_index(
             x, 'pooling_type', 'max', 'ksize', output_size, 'adaptive', True)
-        return pool_out if return_indices else pool_out[0]
+        return pool_out if return_mask else pool_out[0]
 
     l_type = 'max_pool2d_with_index'
 
@@ -1315,11 +1315,11 @@ def adaptive_max_pool2d(x, output_size, return_indices=False, name=None):
             "ksize": output_size,
             "adaptive": True,
         })
-    #return (pool_out, mask) if return_indices else pool_out
+    #return (pool_out, mask) if return_mask else pool_out
     return pool_out
 
 
-def adaptive_max_pool3d(x, output_size, return_indices=False, name=None):
+def adaptive_max_pool3d(x, output_size, return_mask=False, name=None):
     """
         This operation applies a 3D adaptive max pooling on input tensor.
         See more details in :ref:`api_nn_pooling_AdaptiveMaxPool3d` .
@@ -1327,7 +1327,7 @@ def adaptive_max_pool3d(x, output_size, return_indices=False, name=None):
         Args:
             x (Tensor): The input tensor of adaptive max pool3d operator, which is a 5-D tensor. The data type can be float32, float64.
             output_size (int|list|tuple): The pool kernel size. If pool kernel size is a tuple or list, it must contain three elements, (D, H, W). D, H and W can be either a int, or None which means the size will be the same as that of the input.
-            return_indices (bool): If true, the index of max pooling point will be returned along with outputs. Default False.
+            return_mask (bool): If true, the index of max pooling point will be returned along with outputs. Default False.
             name(str, optional): For detailed information, please refer to :ref:`api_guide_Name`. Usually name is no need to set and None by default.
 
         Returns:
@@ -1371,7 +1371,7 @@ def adaptive_max_pool3d(x, output_size, return_indices=False, name=None):
                                  'adaptive_max_pool3d')
     _check_input(x, 5)
     #check_type(output_size, 'pool_size', (int), 'adaptive_max_pool3d')
-    check_type(return_indices, 'return_indices', bool, 'adaptive_max_pool3d')
+    check_type(return_mask, 'return_mask', bool, 'adaptive_max_pool3d')
 
     in_l, in_h, in_w = x.shape[2:5]
     if isinstance(output_size, int):
@@ -1388,7 +1388,7 @@ def adaptive_max_pool3d(x, output_size, return_indices=False, name=None):
     if in_dygraph_mode():
         pool_out = core.ops.max_pool3d_with_index(
             x, 'pooling_type', 'max', 'ksize', output_size, 'adaptive', True)
-        return pool_out if return_indices else pool_out[0]
+        return pool_out if return_mask else pool_out[0]
 
     l_type = 'max_pool3d_with_index'
 
@@ -1409,4 +1409,4 @@ def adaptive_max_pool3d(x, output_size, return_indices=False, name=None):
             "adaptive": True,
         })
 
-    return (pool_out, mask) if return_indices else pool_out
+    return (pool_out, mask) if return_mask else pool_out
diff --git a/python/paddle/nn/layer/pooling.py b/python/paddle/nn/layer/pooling.py
index 129dae93b3832..ae2174b300795 100755
--- a/python/paddle/nn/layer/pooling.py
+++ b/python/paddle/nn/layer/pooling.py
@@ -35,7 +35,7 @@
 class AvgPool1d(layers.Layer):
     """
     This operation applies a 1D average pooling over an input signal composed
-    of several input planes, based on the input, output_size, return_indices parameters.
+    of several input planes, based on the input, output_size, return_mask parameters.
     Input(X) and output(Out) are in NCL format, where N is batch
     size, C is the number of channels, L is the length of the feature.
     The output tensor shape will be [N, C, output_size].
@@ -61,7 +61,7 @@ class AvgPool1d(layers.Layer):
             4. A list[int] or tuple(int) whose length is 2. It has the form [pad_before, pad_after].
             5. A list or tuple of pairs of integers. It has the form [[pad_before, pad_after], [pad_before, pad_after], ...]. Note that, the batch dimension and channel dimension should be [0,0] or (0,0).
             The default value is 0.
-        count_include_pad (bool): Whether to exclude padding points in average pooling
+        exclusive (bool): Whether to exclude padding points in average pooling
                           mode, default is `True`.
         ceil_mode (bool): ${ceil_mode_comment}Whether to use the ceil function to calculate output height and width.
             If it is set to False, the floor function will be used. The default value is False.
@@ -103,7 +103,7 @@ def __init__(self,
                  kernel_size,
                  stride=None,
                  padding=0,
-                 count_include_pad=True,
+                 exclusive=True,
                  ceil_mode=False,
                  name=None):
         super(AvgPool1d, self).__init__()
@@ -111,12 +111,12 @@ def __init__(self,
         self.stride = stride
         self.padding = padding
         self.ceil_mode = ceil_mode
-        self.count_include_pad = count_include_pad
+        self.exclusive = exclusive
         self.name = name
 
     def forward(self, x):
         out = F.avg_pool1d(x, self.kernel_size, self.stride, self.padding,
-                           self.count_include_pad, self.ceil_mode, self.name)
+                           self.exclusive, self.ceil_mode, self.name)
         return out
 
 
@@ -156,7 +156,7 @@ class AvgPool2d(layers.Layer):
             5. A list or tuple of pairs of integers. It has the form [[pad_before, pad_after], [pad_before, pad_after], ...]. Note that, the batch dimension and channel dimension should be [0,0] or (0,0).
             The default value is 0.
         ceil_mode (bool): when True, will use `ceil` instead of `floor` to compute the output shape
-        count_include_pad (bool): Whether to exclude padding points in average pooling
+        exclusive (bool): Whether to exclude padding points in average pooling
                           mode, default is `true`.
         divisor_override (float): if specified, it will be used as divisor, otherwise kernel_size will be used. Default None.
         data_format (string): The data format of the input and output data. An optional string from: `"NCHW"`, `"NDHW"`.
@@ -197,7 +197,7 @@ def __init__(self,
                  stride=None,
                  padding=0,
                  ceil_mode=False,
-                 count_include_pad=True,
+                 exclusive=True,
                  divisor_override=None,
                  data_format="NCHW",
                  name=None):
@@ -206,7 +206,7 @@ def __init__(self,
         self.stride = stride
         self.padding = padding
         self.ceil_mode = ceil_mode
-        self.count_include_pad = count_include_pad
+        self.exclusive = exclusive
         self.divisor = divisor_override
         self.data_format = data_format
         self.name = name
@@ -218,7 +218,7 @@ def forward(self, x):
             stride=self.stride,
             padding=self.padding,
             ceil_mode=self.ceil_mode,
-            count_include_pad=self.count_include_pad,
+            exclusive=self.exclusive,
             divisor_override=self.divisor,
             data_format=self.data_format,
             name=self.name)
@@ -247,7 +247,7 @@ class AvgPool3d(layers.Layer):
             5. A list or tuple of pairs of integers. It has the form [[pad_before, pad_after], [pad_before, pad_after], ...]. Note that, the batch dimension and channel dimension should be [0,0] or (0,0).
             The default value is 0.
         ceil_mode (bool): ${ceil_mode_comment}
-        count_include_pad (bool): Whether to exclude padding points in average pooling
+        exclusive (bool): Whether to exclude padding points in average pooling
                           mode, default is True.
         divisor_override (int|float) if specified, it will be used as divisor, otherwise kernel_size will be used. Default None.
         data_format (string): The data format of the input and output data. An optional string from: `"NCDHW"`, `"NDHWC"`.
@@ -289,7 +289,7 @@ def __init__(self,
                  stride,
                  padding=0,
                  ceil_mode=False,
-                 count_include_pad=True,
+                 exclusive=True,
                  divisor_override=None,
                  data_format="NCDHW",
                  name=None):
@@ -298,7 +298,7 @@ def __init__(self,
         self.stride = stride
         self.padding = padding
         self.ceil_mode = ceil_mode
-        self.count_include_pad = count_include_pad
+        self.exclusive = exclusive
         self.divisor = divisor_override
         self.data_format = data_format
         self.name = name
@@ -310,7 +310,7 @@ def forward(self, x):
             stride=self.stride,
             padding=self.padding,
             ceil_mode=self.ceil_mode,
-            count_include_pad=self.count_include_pad,
+            exclusive=self.exclusive,
             divisor_override=self.divisor,
             data_format=self.data_format,
             name=self.name)
@@ -319,7 +319,7 @@ def forward(self, x):
 class MaxPool1d(layers.Layer):
     """
     Applies a 1D max pooling over an input signal composed of several input planes based
-    on the input, output_size, return_indices parameters.
+    on the input, output_size, return_mask parameters.
     Input(X) and output(Out) are in NCL format, where N is batch
     size, C is the number of channels, L is the length of the feature.
 
@@ -343,7 +343,7 @@ class MaxPool1d(layers.Layer):
             4. A list[int] or tuple(int) whose length is 2. It has the form [pad_before, pad_after].
             5. A list or tuple of pairs of integers. It has the form [[pad_before, pad_after], [pad_before, pad_after], ...]. Note that, the batch dimension and channel dimension should be [0,0] or (0,0).
             The default value is 0.
-        return_indices (bool): Whether return the max indices along with the outputs. default is `False`.
+        return_mask (bool): Whether return the max indices along with the outputs. default is `False`.
         ceil_mode (bool): Whether to use the ceil function to calculate output height and width. False is the default.
             If it is set to False, the floor function will be used. Default False.
         name(str, optional): For detailed information, please refer
@@ -377,7 +377,7 @@ class MaxPool1d(layers.Layer):
           pool_out = MaxPool1d(data)
           # pool_out shape: [1, 3, 16]
 
-          MaxPool1d = nn.MaxPool1d(kernel_size=2, stride=2, padding=0, return_indices=True)
+          MaxPool1d = nn.MaxPool1d(kernel_size=2, stride=2, padding=0, return_mask=True)
           pool_out, indices = MaxPool1d(data)
           # pool_out shape: [1, 3, 16], indices shape: [1, 3, 16]
 
@@ -387,7 +387,7 @@ def __init__(self,
                  kernel_size,
                  stride=None,
                  padding=0,
-                 return_indices=False,
+                 return_mask=False,
                  ceil_mode=False,
                  name=None):
         super(MaxPool1d, self).__init__()
@@ -395,12 +395,12 @@ def __init__(self,
         self.stride = stride
         self.padding = padding
         self.ceil_mode = ceil_mode
-        self.return_indices = return_indices
+        self.return_mask = return_mask
         self.name = name
 
     def forward(self, input):
         out = F.max_pool1d(input, self.kernel_size, self.stride, self.padding,
-                           self.return_indices, self.ceil_mode, self.name)
+                           self.return_mask, self.ceil_mode, self.name)
         return out
 
 
@@ -440,7 +440,7 @@ class MaxPool2d(layers.Layer):
             5. A list or tuple of pairs of integers. It has the form [[pad_before, pad_after], [pad_before, pad_after], ...]. Note that, the batch dimension and channel dimension should be [0,0] or (0,0).
             The default value is 0.
         ceil_mode (bool): when True, will use `ceil` instead of `floor` to compute the output shape
-        return_indices (bool): Whether to return the max indices along with the outputs.
+        return_mask (bool): Whether to return the max indices along with the outputs.
         data_format (string): The data format of the input and output data. An optional string from: `"NCHW"`, `"NDHW"`.
                         The default is `"NCHW"`. When it is `"NCHW"`, the data is stored in the order of:
                         `[batch_size, input_channels, input_height, input_width]`.
@@ -473,8 +473,8 @@ class MaxPool2d(layers.Layer):
           output = MaxPool2d(input)
           # output.shape [1, 3, 16, 16]
 
-          # for return_indices=True
-          MaxPool2d = nn.MaxPool2d(kernel_size=2,stride=2, padding=0, return_indices=True)
+          # for return_mask=True
+          MaxPool2d = nn.MaxPool2d(kernel_size=2,stride=2, padding=0, return_mask=True)
           output, max_indices = MaxPool2d(input)
           # output.shape [1, 3, 16, 16], max_indices.shape [1, 3, 16, 16],
     """
@@ -483,7 +483,7 @@ def __init__(self,
                  kernel_size,
                  stride=None,
                  padding=0,
-                 return_indices=False,
+                 return_mask=False,
                  ceil_mode=False,
                  data_format="NCHW",
                  name=None):
@@ -491,7 +491,7 @@ def __init__(self,
         self.ksize = kernel_size
         self.stride = stride
         self.padding = padding
-        self.return_indices = return_indices
+        self.return_mask = return_mask
         self.ceil_mode = ceil_mode
         self.data_format = data_format
         self.name = name
@@ -502,7 +502,7 @@ def forward(self, x):
             kernel_size=self.ksize,
             stride=self.stride,
             padding=self.padding,
-            return_indices=self.return_indices,
+            return_mask=self.return_mask,
             data_format=self.data_format,
             name=self.name)
 
@@ -530,7 +530,7 @@ class MaxPool3d(layers.Layer):
             5. A list or tuple of pairs of integers. It has the form [[pad_before, pad_after], [pad_before, pad_after], ...]. Note that, the batch dimension and channel dimension should be [0,0] or (0,0).
             The default value is 0.
         ceil_mode (bool): ${ceil_mode_comment}
-        return_indices (bool): Whether to return the max indices along with the outputs.
+        return_mask (bool): Whether to return the max indices along with the outputs.
         data_format (string): The data format of the input and output data. An optional string from: `"NCDHW"`, `"NDHWC"`.
                         The default is `"NCDHW"`. When it is `"NCDHW"`, the data is stored in the order of:
                         `[batch_size, input_channels, input_depth, input_height, input_width]`.
@@ -564,8 +564,8 @@ class MaxPool3d(layers.Layer):
           output = MaxPool3d(input)
           # output.shape [1, 2, 3, 16, 16]
 
-          # for return_indices=True
-          MaxPool3d = nn.MaxPool3d(kernel_size=2,stride=2, padding=0, return_indices=True)
+          # for return_mask=True
+          MaxPool3d = nn.MaxPool3d(kernel_size=2,stride=2, padding=0, return_mask=True)
           output, max_indices = MaxPool3d(input)
           # output.shape [1, 2, 3, 16, 16], max_indices.shape [1, 2, 3, 16, 16],
     """
@@ -574,7 +574,7 @@ def __init__(self,
                  kernel_size,
                  stride,
                  padding,
-                 return_indices=False,
+                 return_mask=False,
                  ceil_mode=False,
                  data_format="NCDHW",
                  name=None):
@@ -582,7 +582,7 @@ def __init__(self,
         self.ksize = kernel_size
         self.stride = stride
         self.padding = padding
-        self.return_indices = return_indices
+        self.return_mask = return_mask
         self.ceil_mode = ceil_mode
         self.data_format = data_format
         self.name = name
@@ -593,7 +593,7 @@ def forward(self, x):
             kernel_size=self.ksize,
             stride=self.stride,
             padding=self.padding,
-            return_indices=self.return_indices,
+            return_mask=self.return_mask,
             data_format=self.data_format,
             name=self.name)
 
@@ -602,7 +602,7 @@ class AdaptiveAvgPool1d(layers.Layer):
     """
 
     This operation applies a 1D adaptive average pooling over an input signal composed
-    of several input planes, based on the input, output_size, return_indices parameters.
+    of several input planes, based on the input, output_size, return_mask parameters.
     Input(X) and output(Out) are in NCL format, where N is batch
     size, C is the number of channels, L is the length of the feature.
     The output tensor shape will be [N, C, output_size].
@@ -841,7 +841,7 @@ class AdaptiveMaxPool1d(layers.Layer):
     """
 
     This operation applies a 1D adaptive max pooling over an input signal composed
-    of several input planes, based on the input, output_size, return_indices parameters.
+    of several input planes, based on the input, output_size, return_mask parameters.
     Input(X) and output(Out) are in NCL format, where N is batch
     size, C is the number of channels, L is the length of the feature.
     The output tensor shape will be [N, C, output_size].
@@ -859,7 +859,7 @@ class AdaptiveMaxPool1d(layers.Layer):
     Args:
         output_size (int|list|tuple): The pool kernel size. If pool kernel size is a tuple or list,
              it must contain one int.
-        return_indices (bool): If true, the index of max pooling point will be returned along
+        return_mask (bool): If true, the index of max pooling point will be returned along
             with outputs. It cannot be set in average pooling type. Default False.
         name(str, optional): For detailed information, please refer
                              to :ref:`api_guide_Name`. Usually name is no need to set and
@@ -898,22 +898,22 @@ class AdaptiveMaxPool1d(layers.Layer):
           pool_out = AdaptiveMaxPool1d(data)
           # pool_out shape: [1, 3, 16]
 
-          # for return_indices = true
-          AdaptiveMaxPool1d = nn.AdaptiveMaxPool1d(output_size=16, return_indices=True)
+          # for return_mask = true
+          AdaptiveMaxPool1d = nn.AdaptiveMaxPool1d(output_size=16, return_mask=True)
           pool_out, indices = AdaptiveMaxPool1d(data)
           # pool_out shape: [1, 3, 16], indices shape: [1, 3, 16]
 
     """
 
-    def __init__(self, output_size, return_indices=False, name=None):
+    def __init__(self, output_size, return_mask=False, name=None):
         super(AdaptiveMaxPool1d, self).__init__()
         self.output_size = output_size
-        self.return_indices = return_indices
+        self.return_mask = return_mask
         self.name = name
 
     def forward(self, input):
         return F.adaptive_max_pool1d(input, self.output_size,
-                                     self.return_indices, self.name)
+                                     self.return_mask, self.name)
 
 
 class AdaptiveMaxPool2d(layers.Layer):
@@ -932,7 +932,7 @@ class AdaptiveMaxPool2d(layers.Layer):
        Output(i ,j) &= max(Input[hstart:hend, wstart:wend])
     Parameters:
         output_size (int|list|tuple): The pool kernel size. If pool kernel size is a tuple or list, it must contain two element, (H, W). H and W can be either a int, or None which means the size will be the same as that of the input.
-        return_indices (bool): If true, the index of max pooling point will be returned along with outputs. It cannot be set in average pooling type. Default False.
+        return_mask (bool): If true, the index of max pooling point will be returned along with outputs. It cannot be set in average pooling type. Default False.
         name(str, optional): For detailed information, please refer
                              to :ref:`api_guide_Name`. Usually name is no need to set and
                              None by default.
@@ -965,21 +965,21 @@ class AdaptiveMaxPool2d(layers.Layer):
             paddle.disable_static()
             input_data = np.random.rand(2, 3, 32, 32)
             x = paddle.to_tensor(input_data)
-            adaptive_max_pool = paddle.nn.AdaptiveMaxPool2d(output_size=3, return_indices=True)
+            adaptive_max_pool = paddle.nn.AdaptiveMaxPool2d(output_size=3, return_mask=True)
             pool_out, indices = adaptive_max_pool(x = x)
     """
 
-    def __init__(self, output_size, return_indices=False, name=None):
+    def __init__(self, output_size, return_mask=False, name=None):
         super(AdaptiveMaxPool2d, self).__init__()
         self._output_size = output_size
-        self._return_indices = return_indices
+        self._return_mask = return_mask
         self._name = name
 
     def forward(self, x):
         return F.adaptive_max_pool2d(
             x,
             output_size=self._output_size,
-            return_indices=self._return_indices,
+            return_mask=self._return_mask,
             name=self._name)
 
 
@@ -1002,7 +1002,7 @@ class AdaptiveMaxPool3d(layers.Layer):
 
     Parameters:
         output_size (int|list|tuple): The pool kernel size. If pool kernel size is a tuple or list, it must contain three elements, (D, H, W). D, H and W can be either a int, or None which means the size will be the same as that of the input.
-        return_indices (bool): If true, the index of max pooling point will be returned along with outputs. Default False.
+        return_mask (bool): If true, the index of max pooling point will be returned along with outputs. Default False.
         name(str, optional): For detailed information, please refer
                              to :ref:`api_guide_Name`. Usually name is no need to set and
                              None by default.
@@ -1040,21 +1040,21 @@ class AdaptiveMaxPool3d(layers.Layer):
             pool = paddle.nn.AdaptiveMaxPool3d(output_size=4)
             out = pool(x)
             # out shape: [2, 3, 4, 4, 4]
-            pool = paddle.nn.AdaptiveMaxPool3d(output_size=3, return_indices=True)
+            pool = paddle.nn.AdaptiveMaxPool3d(output_size=3, return_mask=True)
             out, indices = pool(x)
             # out shape: [2, 3, 4, 4, 4], indices shape: [2, 3, 4, 4, 4]
 
     """
 
-    def __init__(self, output_size, return_indices=False, name=None):
+    def __init__(self, output_size, return_mask=False, name=None):
         super(AdaptiveMaxPool3d, self).__init__()
         self._output_size = output_size
-        self._return_indices = return_indices
+        self._return_mask = return_mask
         self._name = name
 
     def forward(self, x):
         return F.adaptive_max_pool3d(
             x,
             output_size=self._output_size,
-            return_indices=self._return_indices,
+            return_mask=self._return_mask,
             name=self._name)

From ea162b43e0c1575f90fe41a68b2c784a76b12100 Mon Sep 17 00:00:00 2001
From: liuhui29 <liuhui29@baidu.com>
Date: Tue, 20 Oct 2020 11:45:39 +0800
Subject: [PATCH 2/5] remove track_running_stats

---
 .../parallel_dygraph_sync_batch_norm.py       |  3 +-
 python/paddle/nn/layer/norm.py                | 50 ++-----------------
 2 files changed, 4 insertions(+), 49 deletions(-)

diff --git a/python/paddle/fluid/tests/unittests/parallel_dygraph_sync_batch_norm.py b/python/paddle/fluid/tests/unittests/parallel_dygraph_sync_batch_norm.py
index b7ef54a5c2a48..9e112cbc55d20 100644
--- a/python/paddle/fluid/tests/unittests/parallel_dygraph_sync_batch_norm.py
+++ b/python/paddle/fluid/tests/unittests/parallel_dygraph_sync_batch_norm.py
@@ -65,8 +65,7 @@ def __init__(self,
         self._sync_batch_norm2 = SyncBatchNorm(
             num_filters,
             weight_attr=False,
-            bias_attr=False,
-            track_running_stats=False)
+            bias_attr=False)
 
     def forward(self, inputs):
         y = self._conv(inputs)
diff --git a/python/paddle/nn/layer/norm.py b/python/paddle/nn/layer/norm.py
index ad8dc9b64e78a..45f5539c3a2d7 100644
--- a/python/paddle/nn/layer/norm.py
+++ b/python/paddle/nn/layer/norm.py
@@ -73,7 +73,6 @@ def __init__(self,
                  momentum=0.9,
                  weight_attr=None,
                  bias_attr=None,
-                 track_running_stats=False,
                  data_format="NCHW",
                  name=None):
         super(_InstanceNormBase, self).__init__()
@@ -135,9 +134,6 @@ class InstanceNorm1d(_InstanceNormBase):
         epsilon(float, optional): A value added to the denominator for
             numerical stability. Default is 1e-5.
         momentum(float, optional): The value used for the moving_mean and moving_var computation. Default: 0.9.
-        track_running_stats(bool, optional): Whether to use global mean and
-            variance. In train mode, when setting track_running_stats True, the global mean
-            and variance are also used during train period. Default: False.
         weight_attr(ParamAttr|bool, optional): The parameter attribute for Parameter `scale`
              of instance_norm. If it is set to None or one attribute of ParamAttr, instance_norm
 	     will create ParamAttr as weight_attr, the name of scale can be set in ParamAttr.
@@ -159,9 +155,6 @@ class InstanceNorm1d(_InstanceNormBase):
     Returns:
         None.
 
-    **Note**:
-        Momentum and track_running_stats is not effective. The next version will fix the problem .
-
 
     Examples:
 
@@ -214,9 +207,6 @@ class InstanceNorm2d(_InstanceNormBase):
         epsilon(float, optional): A value added to the denominator for
             numerical stability. Default is 1e-5.
         momentum(float, optional): The value used for the moving_mean and moving_var computation. Default: 0.9.
-        track_running_stats(bool, optional): Whether to use global mean and
-            variance. In train mode, when setting track_running_stats True, the global mean
-            and variance are also used during train period. Default: False.
         weight_attr(ParamAttr|bool, optional): The parameter attribute for Parameter `scale`
              of instance_norm. If it is set to None or one attribute of ParamAttr, instance_norm
 	     will create ParamAttr as weight_attr, the name of scale can be set in ParamAttr.
@@ -237,8 +227,6 @@ class InstanceNorm2d(_InstanceNormBase):
     Returns:
         None.
 
-    **Note**:
-        Momentum and track_running_stats is not effective. The next version will fix the problem .
 
     Examples:
 
@@ -290,9 +278,6 @@ class InstanceNorm3d(_InstanceNormBase):
         epsilon(float, optional): A value added to the denominator for
             numerical stability. Default is 1e-5.
         momentum(float, optional): The value used for the moving_mean and moving_var computation. Default: 0.9.
-        track_running_stats(bool, optional): Whether to use global mean and
-            variance. In train mode, when setting track_running_stats True, the global mean
-            and variance are also used during train period. Default: False.
         weight_attr(ParamAttr|bool, optional): The parameter attribute for Parameter `scale`
              of instance_norm. If it is set to None or one attribute of ParamAttr, instance_norm
 	     will create ParamAttr as weight_attr, the name of scale can be set in ParamAttr.
@@ -313,8 +298,6 @@ class InstanceNorm3d(_InstanceNormBase):
     Returns:
         None.
 
-    **Note**:
-        Momentum and track_running_stats is not effective. The next version will fix the problem .
 
     Examples:
 
@@ -570,7 +553,6 @@ def __init__(self,
                  weight_attr=None,
                  bias_attr=None,
                  data_format='NCHW',
-                 track_running_stats=True,
                  name=None):
         super(_BatchNormBase, self).__init__()
         self._num_features = num_features
@@ -636,7 +618,6 @@ def __init__(self,
         self._momentum = momentum
         self._epsilon = epsilon
         self._fuse_with_relu = False
-        self._track_running_stats = track_running_stats
         self._name = name
 
     def _check_input_dim(self, input):
@@ -651,11 +632,7 @@ def forward(self, input):
 
         self._check_input_dim(input)
 
-        if not self.training and not self._track_running_stats:
-            raise ValueError(
-                'When inference, expected track_running_stats is True.')
-
-        if self.training and not self._track_running_stats:
+        if self.training:
             warnings.warn(
                 "When training, we now always track global mean and variance.")
 
@@ -720,9 +697,6 @@ class BatchNorm1d(_BatchNormBase):
             will create ParamAttr as bias_attr. If it is set to Fasle, the weight is not learnable.
             If the Initializer of the bias_attr is not set, the bias is initialized zero. Default: None.
         data_format(str, optional): Specify the input data format, may be "NC", "NCL" or "NLC". Defalut "NCL".
-        track_running_stats(bool, optional): Whether to use global mean and variance. In train period, 
-            True will track global mean and variance used for inference. When inference, track_running_stats must be 
-            True. Default: True.
         name(str, optional): Name for the BatchNorm, default is None. For more information, please refer to :ref:`api_guide_Name`..
 
     Shape:
@@ -732,9 +706,6 @@ class BatchNorm1d(_BatchNormBase):
 
     Returns:
         None.
-
-    **Note**:
-        Now track_running_stats is actucal always true. The next version will fix the problem .
     
 
     Examples:
@@ -817,9 +788,6 @@ class BatchNorm2d(_BatchNormBase):
             will create ParamAttr as bias_attr. If it is set to Fasle, the weight is not learnable.
             If the Initializer of the bias_attr is not set, the bias is initialized zero. Default: None.
         data_format(str, optional): Specify the input data format, the data format can be "NCHW" or "NHWC". Default: NCHW.
-        track_running_stats(bool, optional): Whether to use global mean and variance. In train period, 
-            True will track global mean and variance used for inference. When inference, track_running_stats must be 
-            True. Default: True.
         name(str, optional): Name for the BatchNorm, default is None. For more information, please refer to :ref:`api_guide_Name`..
 
     Shape:
@@ -830,9 +798,6 @@ class BatchNorm2d(_BatchNormBase):
     Returns:
         None
 
-    **Note**:
-        Now track_running_stats is actucal always true. The next version will fix the problem .
-
     Examples:
         .. code-block:: python
 
@@ -912,9 +877,6 @@ class BatchNorm3d(_BatchNormBase):
             will create ParamAttr as bias_attr. If it is set to Fasle, the weight is not learnable.
             If the Initializer of the bias_attr is not set, the bias is initialized zero. Default: None.
         data_format(str, optional): Specify the input data format, the data format can be "NCDHW" or "NDHWC. Default: NCDHW.
-        track_running_stats(bool, optional): Whether to use global mean and variance. In train period, 
-            True will track global mean and variance used for inference. When inference, track_running_stats must be 
-            True. Default: True.
         name(str, optional): Name for the BatchNorm, default is None. For more information, please refer to :ref:`api_guide_Name`..
 
     Shape:
@@ -925,9 +887,6 @@ class BatchNorm3d(_BatchNormBase):
     Returns:
         None
 
-    **Note**:
-        Now track_running_stats is actucal always true. The next version will fix the problem .
-
     Examples:
         .. code-block:: python
 
@@ -1024,8 +983,6 @@ class SyncBatchNorm(_BatchNormBase):
              will create ParamAttr as bias_attr. If the Initializer of the bias_attr
              is not set, the bias is initialized zero. If it is set to False, this layer will not 
              have trainable bias parameter. Default: None.
-        track_running_stats(bool, optional): Whether to compute global stats, which including running mean and 
-             running variance. Default: True.
 
     Shapes:
         input: Tensor that the dimension from 2 to 5.
@@ -1055,11 +1012,10 @@ def __init__(self,
                  weight_attr=None,
                  bias_attr=None,
                  data_format='NCHW',
-                 track_running_stats=True,
                  name=None):
         super(SyncBatchNorm,
               self).__init__(num_features, momentum, epsilon, weight_attr,
-                             bias_attr, data_format, track_running_stats, name)
+                             bias_attr, data_format, name)
 
     def forward(self, x):
         # create output
@@ -1150,7 +1106,7 @@ def convert_sync_batchnorm(cls, layer):
             layer_output = SyncBatchNorm(
                 layer._num_features, layer._momentum, layer._epsilon,
                 layer._weight_attr, layer._bias_attr, layer._data_format,
-                layer._track_running_stats, layer._name)
+                layer._name)
 
             if layer._weight_attr != False and layer._bias_attr != False:
                 with no_grad():

From 59c01ff33737e0feffa5ce9e0adf2a5385ffc4d1 Mon Sep 17 00:00:00 2001
From: liuhui29 <liuhui29@baidu.com>
Date: Tue, 20 Oct 2020 13:26:20 +0800
Subject: [PATCH 3/5] fix typo.

---
 .../parallel_dygraph_sync_batch_norm.py        |  4 +---
 .../fluid/tests/unittests/test_pool1d_api.py   |  6 +-----
 python/paddle/nn/functional/pooling.py         | 18 +++++++++---------
 python/paddle/nn/layer/norm.py                 |  8 ++++----
 python/paddle/nn/layer/pooling.py              |  4 ++--
 5 files changed, 17 insertions(+), 23 deletions(-)

diff --git a/python/paddle/fluid/tests/unittests/parallel_dygraph_sync_batch_norm.py b/python/paddle/fluid/tests/unittests/parallel_dygraph_sync_batch_norm.py
index 9e112cbc55d20..63ccbea401eb9 100644
--- a/python/paddle/fluid/tests/unittests/parallel_dygraph_sync_batch_norm.py
+++ b/python/paddle/fluid/tests/unittests/parallel_dygraph_sync_batch_norm.py
@@ -63,9 +63,7 @@ def __init__(self,
             bias_attr=False)
 
         self._sync_batch_norm2 = SyncBatchNorm(
-            num_filters,
-            weight_attr=False,
-            bias_attr=False)
+            num_filters, weight_attr=False, bias_attr=False)
 
     def forward(self, inputs):
         y = self._conv(inputs)
diff --git a/python/paddle/fluid/tests/unittests/test_pool1d_api.py b/python/paddle/fluid/tests/unittests/test_pool1d_api.py
index 2241af907eca9..6b3139c2c38c5 100644
--- a/python/paddle/fluid/tests/unittests/test_pool1d_api.py
+++ b/python/paddle/fluid/tests/unittests/test_pool1d_api.py
@@ -148,11 +148,7 @@ def check_avg_dygraph_padding_results(self, place):
             input_np = np.random.random([2, 3, 32]).astype("float32")
             input = fluid.dygraph.to_variable(input_np)
             result = F.avg_pool1d(
-                input,
-                kernel_size=2,
-                stride=2,
-                padding=[1],
-                exclusive=True)
+                input, kernel_size=2, stride=2, padding=[1], exclusive=True)
 
             result_np = avg_pool1D_forward_naive(
                 input_np, ksize=[2], strides=[2], paddings=[1], exclusive=False)
diff --git a/python/paddle/nn/functional/pooling.py b/python/paddle/nn/functional/pooling.py
index d7d93f4b3bf7e..73e3cb31221f1 100755
--- a/python/paddle/nn/functional/pooling.py
+++ b/python/paddle/nn/functional/pooling.py
@@ -230,8 +230,8 @@ def avg_pool1d(x,
             x, 'pooling_type', 'avg', 'ksize', kernel_size, 'global_pooling',
             False, 'strides', stride, 'paddings', padding, 'padding_algorithm',
             padding_algorithm, 'use_cudnn', True, 'ceil_mode', ceil_mode,
-            'use_mkldnn', False, 'exclusive', not exclusive,
-            'data_format', data_format)
+            'use_mkldnn', False, 'exclusive', not exclusive, 'data_format',
+            data_format)
         return squeeze(output, [2])
 
     op_type = 'pool2d'
@@ -338,8 +338,8 @@ def avg_pool2d(x,
             x, 'pooling_type', 'avg', 'ksize', kernel_size, 'global_pooling',
             False, 'padding_algorithm', padding_algorithm, 'strides', stride,
             'paddings', padding, 'use_cudnn', True, 'ceil_mode', ceil_mode,
-            'use_mkldnn', False, 'exclusive', not exclusive,
-            'data_format', data_format)
+            'use_mkldnn', False, 'exclusive', not exclusive, 'data_format',
+            data_format)
         if divisor_override is None:
             return output
         else:
@@ -452,8 +452,8 @@ def avg_pool3d(x,
             x, 'pooling_type', 'avg', 'ksize', kernel_size, 'strides', stride,
             'paddings', padding, 'global_pooling', False, 'padding_algorithm',
             padding_algorithm, 'use_cudnn', True, 'ceil_mode', ceil_mode,
-            'use_mkldnn', False, 'exclusive', not exclusive,
-            'data_format', data_format)
+            'use_mkldnn', False, 'exclusive', not exclusive, 'data_format',
+            data_format)
         if divisor_override is None:
             return output
         else:
@@ -570,9 +570,9 @@ def max_pool1d(x,
                 padding_algorithm, 'use_cudnn', True, 'ceil_mode', ceil_mode,
                 'use_mkldnn', False, 'exclusive', True, 'data_format',
                 data_format)
-            return (squeeze(pool_out[0], [2]), squeeze(
-                pool_out[1],
-                [2])) if return_mask else squeeze(pool_out[0], [2])
+            return (squeeze(pool_out[0], [2]),
+                    squeeze(pool_out[1],
+                            [2])) if return_mask else squeeze(pool_out[0], [2])
         else:
             pool_out = core.ops.pool2d(
                 x, 'pooling_type', 'max', 'ksize', kernel_size,
diff --git a/python/paddle/nn/layer/norm.py b/python/paddle/nn/layer/norm.py
index 45f5539c3a2d7..67e087b68872b 100644
--- a/python/paddle/nn/layer/norm.py
+++ b/python/paddle/nn/layer/norm.py
@@ -1103,10 +1103,10 @@ def convert_sync_batchnorm(cls, layer):
         """
         layer_output = layer
         if isinstance(layer, _BatchNormBase):
-            layer_output = SyncBatchNorm(
-                layer._num_features, layer._momentum, layer._epsilon,
-                layer._weight_attr, layer._bias_attr, layer._data_format,
-                layer._name)
+            layer_output = SyncBatchNorm(layer._num_features, layer._momentum,
+                                         layer._epsilon, layer._weight_attr,
+                                         layer._bias_attr, layer._data_format,
+                                         layer._name)
 
             if layer._weight_attr != False and layer._bias_attr != False:
                 with no_grad():
diff --git a/python/paddle/nn/layer/pooling.py b/python/paddle/nn/layer/pooling.py
index ae2174b300795..9800bdaf720ce 100755
--- a/python/paddle/nn/layer/pooling.py
+++ b/python/paddle/nn/layer/pooling.py
@@ -912,8 +912,8 @@ def __init__(self, output_size, return_mask=False, name=None):
         self.name = name
 
     def forward(self, input):
-        return F.adaptive_max_pool1d(input, self.output_size,
-                                     self.return_mask, self.name)
+        return F.adaptive_max_pool1d(input, self.output_size, self.return_mask,
+                                     self.name)
 
 
 class AdaptiveMaxPool2d(layers.Layer):

From 6864ea8dc5461554be63258d23d9ff3e0bbdf4b1 Mon Sep 17 00:00:00 2001
From: liuhui29 <liuhui29@baidu.com>
Date: Wed, 21 Oct 2020 19:51:45 +0800
Subject: [PATCH 4/5] rename xxxd-->xxxxD

---
 .../fluid/tests/unittests/test_pool1d_api.py  |  15 +-
 python/paddle/nn/layer/pooling.py             | 156 ++++++++++++------
 2 files changed, 111 insertions(+), 60 deletions(-)

diff --git a/python/paddle/fluid/tests/unittests/test_pool1d_api.py b/python/paddle/fluid/tests/unittests/test_pool1d_api.py
index 6b3139c2c38c5..aadecf4df240f 100644
--- a/python/paddle/fluid/tests/unittests/test_pool1d_api.py
+++ b/python/paddle/fluid/tests/unittests/test_pool1d_api.py
@@ -105,7 +105,7 @@ def avg_pool1D_forward_naive(x,
     return out
 
 
-class TestPool1d_API(unittest.TestCase):
+class TestPool1D_API(unittest.TestCase):
     def setUp(self):
         np.random.seed(123)
         self.places = [fluid.CPUPlace()]
@@ -138,7 +138,7 @@ def check_avg_dygraph_results(self, place):
 
             self.assertTrue(np.allclose(result.numpy(), result_np))
 
-            avg_pool1d_dg = paddle.nn.layer.AvgPool1d(
+            avg_pool1d_dg = paddle.nn.layer.AvgPool1D(
                 kernel_size=2, stride=None, padding=0)
             result = avg_pool1d_dg(input)
             self.assertTrue(np.allclose(result.numpy(), result_np))
@@ -155,8 +155,13 @@ def check_avg_dygraph_padding_results(self, place):
 
             self.assertTrue(np.allclose(result.numpy(), result_np))
 
+<<<<<<< HEAD
             avg_pool1d_dg = paddle.nn.AvgPool1d(
                 kernel_size=2, stride=None, padding=1, exclusive=True)
+=======
+            avg_pool1d_dg = paddle.nn.AvgPool1D(
+                kernel_size=2, stride=None, padding=1, count_include_pad=True)
+>>>>>>> 7c1aa0d69dd21d7db98b1c46873f3a028e344e95
             result = avg_pool1d_dg(input)
             self.assertTrue(np.allclose(result.numpy(), result_np))
 
@@ -186,7 +191,7 @@ def check_max_dygraph_results(self, place):
 
             self.assertTrue(np.allclose(result.numpy(), result_np))
 
-            max_pool1d_dg = paddle.nn.layer.MaxPool1d(
+            max_pool1d_dg = paddle.nn.layer.MaxPool1D(
                 kernel_size=2, stride=None, padding=0)
             result = max_pool1d_dg(input)
             self.assertTrue(np.allclose(result.numpy(), result_np))
@@ -203,7 +208,7 @@ def check_max_dygraph_return_index_results(self, place):
 
             self.assertTrue(np.allclose(result.numpy(), result_np))
 
-            max_pool1d_dg = paddle.nn.layer.MaxPool1d(
+            max_pool1d_dg = paddle.nn.layer.MaxPool1D(
                 kernel_size=2, stride=None, padding=0)
             result = max_pool1d_dg(input)
             self.assertTrue(np.allclose(result.numpy(), result_np))
@@ -244,7 +249,7 @@ def test_pool1d(self):
             self.check_max_dygraph_return_index_results(place)
 
 
-class TestPool2dError_API(unittest.TestCase):
+class TestPool2DError_API(unittest.TestCase):
     def test_error_api(self):
         def run1():
             with fluid.dygraph.guard():
diff --git a/python/paddle/nn/layer/pooling.py b/python/paddle/nn/layer/pooling.py
index 9800bdaf720ce..1f1b4a86cb153 100755
--- a/python/paddle/nn/layer/pooling.py
+++ b/python/paddle/nn/layer/pooling.py
@@ -17,22 +17,22 @@
 from .. import functional as F
 
 __all__ = [
-    'AvgPool1d',
-    'AvgPool2d',
-    'AvgPool3d',
-    'MaxPool1d',
-    'MaxPool2d',
-    'MaxPool3d',
-    'AdaptiveAvgPool1d',
-    'AdaptiveAvgPool2d',
-    'AdaptiveAvgPool3d',
-    'AdaptiveMaxPool1d',
-    'AdaptiveMaxPool2d',
-    'AdaptiveMaxPool3d',
+    'AvgPool1D',
+    'AvgPool2D',
+    'AvgPool3D',
+    'MaxPool1D',
+    'MaxPool2D',
+    'MaxPool3D',
+    'AdaptiveAvgPool1D',
+    'AdaptiveAvgPool2D',
+    'AdaptiveAvgPool3D',
+    'AdaptiveMaxPool1D',
+    'AdaptiveMaxPool2D',
+    'AdaptiveMaxPool3D',
 ]
 
 
-class AvgPool1d(layers.Layer):
+class AvgPool1D(layers.Layer):
     """
     This operation applies a 1D average pooling over an input signal composed
     of several input planes, based on the input, output_size, return_mask parameters.
@@ -93,8 +93,8 @@ class AvgPool1d(layers.Layer):
           paddle.disable_static()
 
           data = paddle.to_tensor(np.random.uniform(-1, 1, [1, 3, 32]).astype(np.float32))
-          AvgPool1d = nn.AvgPool1d(kernel_size=2, stride=2, padding=0)
-          pool_out = AvgPool1d(data)
+          AvgPool1D = nn.AvgPool1D(kernel_size=2, stride=2, padding=0)
+          pool_out = AvgPool1D(data)
           # pool_out shape: [1, 3, 16]
 
     """
@@ -106,7 +106,7 @@ def __init__(self,
                  exclusive=True,
                  ceil_mode=False,
                  name=None):
-        super(AvgPool1d, self).__init__()
+        super(AvgPool1D, self).__init__()
         self.kernel_size = kernel_size
         self.stride = stride
         self.padding = padding
@@ -120,7 +120,7 @@ def forward(self, x):
         return out
 
 
-class AvgPool2d(layers.Layer):
+class AvgPool2D(layers.Layer):
     """
     This operation applies 2D average pooling over input features based on the input,
     and kernel_size, stride, padding parameters. Input(X) and Output(Out) are
@@ -185,7 +185,7 @@ class AvgPool2d(layers.Layer):
 
           # max pool2d
           input = paddle.to_tensor(np.random.uniform(-1, 1, [1, 3, 32, 32]).astype(np.float32))
-          AvgPool2d = nn.AvgPool2d(kernel_size=2,
+          AvgPool2D = nn.AvgPool2D(kernel_size=2,
                                 stride=2, padding=0)
           output = AvgPoo2d(input)
           # output.shape [1, 3, 16, 16]
@@ -201,7 +201,7 @@ def __init__(self,
                  divisor_override=None,
                  data_format="NCHW",
                  name=None):
-        super(AvgPool2d, self).__init__()
+        super(AvgPool2D, self).__init__()
         self.ksize = kernel_size
         self.stride = stride
         self.padding = padding
@@ -224,7 +224,7 @@ def forward(self, x):
             name=self.name)
 
 
-class AvgPool3d(layers.Layer):
+class AvgPool3D(layers.Layer):
     """
     This operation applies 3D max pooling over input features based on the input,
     and kernel_size, stride, padding parameters. Input(X) and Output(Out) are
@@ -277,9 +277,9 @@ class AvgPool3d(layers.Layer):
 
           # avg pool3d
           input = paddle.to_tensor(np.random.uniform(-1, 1, [1, 2, 3, 32, 32]).astype(np.float32))
-          AvgPool3d = nn.AvgPool3d(kernel_size=2,
+          AvgPool3D = nn.AvgPool3D(kernel_size=2,
                                    stride=2, padding=0)
-          output = AvgPool3d(input)
+          output = AvgPool3D(input)
           # output.shape [1, 2, 3, 16, 16]
 
     """
@@ -293,7 +293,7 @@ def __init__(self,
                  divisor_override=None,
                  data_format="NCDHW",
                  name=None):
-        super(AvgPool3d, self).__init__()
+        super(AvgPool3D, self).__init__()
         self.ksize = kernel_size
         self.stride = stride
         self.padding = padding
@@ -316,7 +316,7 @@ def forward(self, x):
             name=self.name)
 
 
-class MaxPool1d(layers.Layer):
+class MaxPool1D(layers.Layer):
     """
     Applies a 1D max pooling over an input signal composed of several input planes based
     on the input, output_size, return_mask parameters.
@@ -373,12 +373,17 @@ class MaxPool1d(layers.Layer):
           paddle.disable_static()
 
           data = paddle.to_tensor(np.random.uniform(-1, 1, [1, 3, 32]).astype(np.float32))
-          MaxPool1d = nn.MaxPool1d(kernel_size=2, stride=2, padding=0)
-          pool_out = MaxPool1d(data)
+          MaxPool1D = nn.MaxPool1D(kernel_size=2, stride=2, padding=0)
+          pool_out = MaxPool1D(data)
           # pool_out shape: [1, 3, 16]
 
+<<<<<<< HEAD
           MaxPool1d = nn.MaxPool1d(kernel_size=2, stride=2, padding=0, return_mask=True)
           pool_out, indices = MaxPool1d(data)
+=======
+          MaxPool1D = nn.MaxPool1D(kernel_size=2, stride=2, padding=0, return_indices=True)
+          pool_out, indices = MaxPool1D(data)
+>>>>>>> 7c1aa0d69dd21d7db98b1c46873f3a028e344e95
           # pool_out shape: [1, 3, 16], indices shape: [1, 3, 16]
 
     """
@@ -390,7 +395,7 @@ def __init__(self,
                  return_mask=False,
                  ceil_mode=False,
                  name=None):
-        super(MaxPool1d, self).__init__()
+        super(MaxPool1D, self).__init__()
         self.kernel_size = kernel_size
         self.stride = stride
         self.padding = padding
@@ -404,7 +409,7 @@ def forward(self, input):
         return out
 
 
-class MaxPool2d(layers.Layer):
+class MaxPool2D(layers.Layer):
     """
     This operation applies 2D max pooling over input feature based on the input,
     and kernel_size, stride, padding parameters. Input(X) and Output(Out) are
@@ -468,14 +473,20 @@ class MaxPool2d(layers.Layer):
 
           # max pool2d
           input = paddle.to_tensor(np.random.uniform(-1, 1, [1, 3, 32, 32]).astype(np.float32))
-          MaxPool2d = nn.MaxPool2d(kernel_size=2,
+          MaxPool2D = nn.MaxPool2D(kernel_size=2,
                                    stride=2, padding=0)
-          output = MaxPool2d(input)
+          output = MaxPool2D(input)
           # output.shape [1, 3, 16, 16]
 
+<<<<<<< HEAD
           # for return_mask=True
           MaxPool2d = nn.MaxPool2d(kernel_size=2,stride=2, padding=0, return_mask=True)
           output, max_indices = MaxPool2d(input)
+=======
+          # for return_indices=True
+          MaxPool2D = nn.MaxPool2D(kernel_size=2,stride=2, padding=0, return_indices=True)
+          output, max_indices = MaxPool2D(input)
+>>>>>>> 7c1aa0d69dd21d7db98b1c46873f3a028e344e95
           # output.shape [1, 3, 16, 16], max_indices.shape [1, 3, 16, 16],
     """
 
@@ -487,7 +498,7 @@ def __init__(self,
                  ceil_mode=False,
                  data_format="NCHW",
                  name=None):
-        super(MaxPool2d, self).__init__()
+        super(MaxPool2D, self).__init__()
         self.ksize = kernel_size
         self.stride = stride
         self.padding = padding
@@ -507,7 +518,7 @@ def forward(self, x):
             name=self.name)
 
 
-class MaxPool3d(layers.Layer):
+class MaxPool3D(layers.Layer):
     """
     This operation applies 3D max pooling over input features based on the input,
     and kernel_size, stride, padding parameters. Input(X) and Output(Out) are
@@ -559,14 +570,20 @@ class MaxPool3d(layers.Layer):
 
           # max pool3d
           input = paddle.to_tensor(np.random.uniform(-1, 1, [1, 2, 3, 32, 32]).astype(np.float32))
-          MaxPool3d = nn.MaxPool3d(kernel_size=2,
+          MaxPool3D = nn.MaxPool3D(kernel_size=2,
                                    stride=2, padding=0)
-          output = MaxPool3d(input)
+          output = MaxPool3D(input)
           # output.shape [1, 2, 3, 16, 16]
 
+<<<<<<< HEAD
           # for return_mask=True
           MaxPool3d = nn.MaxPool3d(kernel_size=2,stride=2, padding=0, return_mask=True)
           output, max_indices = MaxPool3d(input)
+=======
+          # for return_indices=True
+          MaxPool3D = nn.MaxPool3D(kernel_size=2,stride=2, padding=0, return_indices=True)
+          output, max_indices = MaxPool3D(input)
+>>>>>>> 7c1aa0d69dd21d7db98b1c46873f3a028e344e95
           # output.shape [1, 2, 3, 16, 16], max_indices.shape [1, 2, 3, 16, 16],
     """
 
@@ -578,7 +595,7 @@ def __init__(self,
                  ceil_mode=False,
                  data_format="NCDHW",
                  name=None):
-        super(MaxPool3d, self).__init__()
+        super(MaxPool3D, self).__init__()
         self.ksize = kernel_size
         self.stride = stride
         self.padding = padding
@@ -598,7 +615,7 @@ def forward(self, x):
             name=self.name)
 
 
-class AdaptiveAvgPool1d(layers.Layer):
+class AdaptiveAvgPool1D(layers.Layer):
     """
 
     This operation applies a 1D adaptive average pooling over an input signal composed
@@ -653,13 +670,13 @@ class AdaptiveAvgPool1d(layers.Layer):
           paddle.disable_static()
 
           data = paddle.to_tensor(np.random.uniform(-1, 1, [1, 3, 32]).astype(np.float32))
-          AdaptiveAvgPool1d = nn.AdaptiveAvgPool1d(output_size=16)
-          pool_out = AdaptiveAvgPool1d(data)
+          AdaptiveAvgPool1D = nn.AdaptiveAvgPool1D(output_size=16)
+          pool_out = AdaptiveAvgPool1D(data)
           # pool_out shape: [1, 3, 16]
     """
 
     def __init__(self, output_size, name=None):
-        super(AdaptiveAvgPool1d, self).__init__()
+        super(AdaptiveAvgPool1D, self).__init__()
         self.output_size = output_size
         self.name = name
 
@@ -667,7 +684,7 @@ def forward(self, input):
         return F.adaptive_avg_pool1d(input, self.output_size, self.name)
 
 
-class AdaptiveAvgPool2d(layers.Layer):
+class AdaptiveAvgPool2D(layers.Layer):
     """
 
     This operation applies 2D adaptive avg pooling on input tensor. The h and w dimensions
@@ -704,7 +721,7 @@ class AdaptiveAvgPool2d(layers.Layer):
         output (Tensor): The output tensor of adaptive avg pool2d operator, which is a 4-D tensor. The data type is same as input x.
 
     Returns:
-        A callable object of AdaptiveAvgPool2d.
+        A callable object of AdaptiveAvgPool2D.
 
     Examples:
         .. code-block:: python
@@ -730,13 +747,13 @@ class AdaptiveAvgPool2d(layers.Layer):
             input_data = np.random.rand(2, 3, 32, 32)
             x = paddle.to_tensor(input_data)
             # x.shape is [2, 3, 32, 32]
-            adaptive_avg_pool = paddle.nn.AdaptiveAvgPool2d(output_size=3)
+            adaptive_avg_pool = paddle.nn.AdaptiveAvgPool2D(output_size=3)
             pool_out = adaptive_avg_pool(x = x)
             # pool_out.shape is [2, 3, 3, 3]
     """
 
     def __init__(self, output_size, data_format="NCHW", name=None):
-        super(AdaptiveAvgPool2d, self).__init__()
+        super(AdaptiveAvgPool2D, self).__init__()
         self._output_size = output_size
         self._data_format = data_format
         self._name = name
@@ -749,7 +766,7 @@ def forward(self, x):
             name=self._name)
 
 
-class AdaptiveAvgPool3d(layers.Layer):
+class AdaptiveAvgPool3D(layers.Layer):
     """
 
     This operation applies 3D adaptive avg pooling on input tensor. The h and w dimensions
@@ -789,7 +806,7 @@ class AdaptiveAvgPool3d(layers.Layer):
         output (Tensor): The output tensor of adaptive avg pool3d operator, which is a 5-D tensor. The data type is same as input x.
 
     Returns:
-        A callable object of AdaptiveAvgPool3d.
+        A callable object of AdaptiveAvgPool3D.
 
     Examples:
         .. code-block:: python
@@ -818,13 +835,13 @@ class AdaptiveAvgPool3d(layers.Layer):
             input_data = np.random.rand(2, 3, 8, 32, 32)
             x = paddle.to_tensor(input_data)
             # x.shape is [2, 3, 8, 32, 32]
-            adaptive_avg_pool = paddle.nn.AdaptiveAvgPool3d(output_size=3)
+            adaptive_avg_pool = paddle.nn.AdaptiveAvgPool3D(output_size=3)
             pool_out = adaptive_avg_pool(x = x)
             # pool_out = [2, 3, 3, 3, 3]
     """
 
     def __init__(self, output_size, data_format="NCDHW", name=None):
-        super(AdaptiveAvgPool3d, self).__init__()
+        super(AdaptiveAvgPool3D, self).__init__()
         self._output_size = output_size
         self._data_format = data_format
         self._name = name
@@ -837,7 +854,7 @@ def forward(self, x):
             name=self._name)
 
 
-class AdaptiveMaxPool1d(layers.Layer):
+class AdaptiveMaxPool1D(layers.Layer):
     """
 
     This operation applies a 1D adaptive max pooling over an input signal composed
@@ -894,19 +911,30 @@ class AdaptiveMaxPool1d(layers.Layer):
           paddle.disable_static()
 
           data = paddle.to_tensor(np.random.uniform(-1, 1, [1, 3, 32]).astype(np.float32))
-          AdaptiveMaxPool1d = nn.AdaptiveMaxPool1d(output_size=16)
-          pool_out = AdaptiveMaxPool1d(data)
+          AdaptiveMaxPool1D = nn.AdaptiveMaxPool1D(output_size=16)
+          pool_out = AdaptiveMaxPool1D(data)
           # pool_out shape: [1, 3, 16]
 
+<<<<<<< HEAD
           # for return_mask = true
           AdaptiveMaxPool1d = nn.AdaptiveMaxPool1d(output_size=16, return_mask=True)
           pool_out, indices = AdaptiveMaxPool1d(data)
+=======
+          # for return_indices = true
+          AdaptiveMaxPool1D = nn.AdaptiveMaxPool1D(output_size=16, return_indices=True)
+          pool_out, indices = AdaptiveMaxPool1D(data)
+>>>>>>> 7c1aa0d69dd21d7db98b1c46873f3a028e344e95
           # pool_out shape: [1, 3, 16], indices shape: [1, 3, 16]
 
     """
 
+<<<<<<< HEAD
     def __init__(self, output_size, return_mask=False, name=None):
         super(AdaptiveMaxPool1d, self).__init__()
+=======
+    def __init__(self, output_size, return_indices=False, name=None):
+        super(AdaptiveMaxPool1D, self).__init__()
+>>>>>>> 7c1aa0d69dd21d7db98b1c46873f3a028e344e95
         self.output_size = output_size
         self.return_mask = return_mask
         self.name = name
@@ -916,7 +944,7 @@ def forward(self, input):
                                      self.name)
 
 
-class AdaptiveMaxPool2d(layers.Layer):
+class AdaptiveMaxPool2D(layers.Layer):
     """
     This operation applies 2D adaptive max pooling on input tensor. The h and w dimensions
     of the output tensor are determined by the parameter output_size. The difference between adaptive pooling and pooling is adaptive one focus on the output size.
@@ -941,7 +969,7 @@ class AdaptiveMaxPool2d(layers.Layer):
         output (Tensor): The output tensor of adaptive max pool2d operator, which is a 4-D tensor. The data type is same as input x.
 
     Returns:
-        A callable object of AdaptiveMaxPool2d.
+        A callable object of AdaptiveMaxPool2D.
     Examples:
         .. code-block:: python
 
@@ -965,12 +993,21 @@ class AdaptiveMaxPool2d(layers.Layer):
             paddle.disable_static()
             input_data = np.random.rand(2, 3, 32, 32)
             x = paddle.to_tensor(input_data)
+<<<<<<< HEAD
             adaptive_max_pool = paddle.nn.AdaptiveMaxPool2d(output_size=3, return_mask=True)
             pool_out, indices = adaptive_max_pool(x = x)
     """
 
     def __init__(self, output_size, return_mask=False, name=None):
         super(AdaptiveMaxPool2d, self).__init__()
+=======
+            adaptive_max_pool = paddle.nn.AdaptiveMaxPool2D(output_size=3, return_indices=True)
+            pool_out, indices = adaptive_max_pool(x = x)
+    """
+
+    def __init__(self, output_size, return_indices=False, name=None):
+        super(AdaptiveMaxPool2D, self).__init__()
+>>>>>>> 7c1aa0d69dd21d7db98b1c46873f3a028e344e95
         self._output_size = output_size
         self._return_mask = return_mask
         self._name = name
@@ -983,7 +1020,7 @@ def forward(self, x):
             name=self._name)
 
 
-class AdaptiveMaxPool3d(layers.Layer):
+class AdaptiveMaxPool3D(layers.Layer):
     """
     This operation applies 3D adaptive max pooling on input tensor. The h and w dimensions
     of the output tensor are determined by the parameter output_size. The difference between adaptive pooling and pooling is adaptive one focus on the output size.
@@ -1010,7 +1047,7 @@ class AdaptiveMaxPool3d(layers.Layer):
         x (Tensor): The input tensor of adaptive max pool3d operator, which is a 5-D tensor. The data type can be float32, float64.
         output (Tensor): The output tensor of adaptive max pool3d operator, which is a 5-D tensor. The data type is same as input x.
     Returns:
-        A callable object of AdaptiveMaxPool3d.
+        A callable object of AdaptiveMaxPool3D.
     Examples:
         .. code-block:: python
 
@@ -1037,17 +1074,26 @@ class AdaptiveMaxPool3d(layers.Layer):
             paddle.disable_static()
             input_data = np.random.rand(2, 3, 8, 32, 32)
             x = paddle.to_tensor(input_data)
-            pool = paddle.nn.AdaptiveMaxPool3d(output_size=4)
+            pool = paddle.nn.AdaptiveMaxPool3D(output_size=4)
             out = pool(x)
             # out shape: [2, 3, 4, 4, 4]
+<<<<<<< HEAD
             pool = paddle.nn.AdaptiveMaxPool3d(output_size=3, return_mask=True)
+=======
+            pool = paddle.nn.AdaptiveMaxPool3D(output_size=3, return_indices=True)
+>>>>>>> 7c1aa0d69dd21d7db98b1c46873f3a028e344e95
             out, indices = pool(x)
             # out shape: [2, 3, 4, 4, 4], indices shape: [2, 3, 4, 4, 4]
 
     """
 
+<<<<<<< HEAD
     def __init__(self, output_size, return_mask=False, name=None):
         super(AdaptiveMaxPool3d, self).__init__()
+=======
+    def __init__(self, output_size, return_indices=False, name=None):
+        super(AdaptiveMaxPool3D, self).__init__()
+>>>>>>> 7c1aa0d69dd21d7db98b1c46873f3a028e344e95
         self._output_size = output_size
         self._return_mask = return_mask
         self._name = name

From 48874e832c6e048f39c16a7e8c4e9351ddb4276a Mon Sep 17 00:00:00 2001
From: liuhui29 <liuhui29@baidu.com>
Date: Wed, 21 Oct 2020 20:16:15 +0800
Subject: [PATCH 5/5] solve conflicts

---
 .../fluid/tests/unittests/test_pool1d_api.py  |  7 +--
 python/paddle/nn/layer/pooling.py             | 58 ++-----------------
 2 files changed, 7 insertions(+), 58 deletions(-)

diff --git a/python/paddle/fluid/tests/unittests/test_pool1d_api.py b/python/paddle/fluid/tests/unittests/test_pool1d_api.py
index aadecf4df240f..cf034b5200083 100644
--- a/python/paddle/fluid/tests/unittests/test_pool1d_api.py
+++ b/python/paddle/fluid/tests/unittests/test_pool1d_api.py
@@ -155,13 +155,8 @@ def check_avg_dygraph_padding_results(self, place):
 
             self.assertTrue(np.allclose(result.numpy(), result_np))
 
-<<<<<<< HEAD
-            avg_pool1d_dg = paddle.nn.AvgPool1d(
-                kernel_size=2, stride=None, padding=1, exclusive=True)
-=======
             avg_pool1d_dg = paddle.nn.AvgPool1D(
-                kernel_size=2, stride=None, padding=1, count_include_pad=True)
->>>>>>> 7c1aa0d69dd21d7db98b1c46873f3a028e344e95
+                kernel_size=2, stride=None, padding=1, exclusive=True)
             result = avg_pool1d_dg(input)
             self.assertTrue(np.allclose(result.numpy(), result_np))
 
diff --git a/python/paddle/nn/layer/pooling.py b/python/paddle/nn/layer/pooling.py
index 1f1b4a86cb153..2be3d886d9fd8 100755
--- a/python/paddle/nn/layer/pooling.py
+++ b/python/paddle/nn/layer/pooling.py
@@ -377,13 +377,8 @@ class MaxPool1D(layers.Layer):
           pool_out = MaxPool1D(data)
           # pool_out shape: [1, 3, 16]
 
-<<<<<<< HEAD
-          MaxPool1d = nn.MaxPool1d(kernel_size=2, stride=2, padding=0, return_mask=True)
-          pool_out, indices = MaxPool1d(data)
-=======
-          MaxPool1D = nn.MaxPool1D(kernel_size=2, stride=2, padding=0, return_indices=True)
+          MaxPool1D = nn.MaxPool1D(kernel_size=2, stride=2, padding=0, return_mask=True)
           pool_out, indices = MaxPool1D(data)
->>>>>>> 7c1aa0d69dd21d7db98b1c46873f3a028e344e95
           # pool_out shape: [1, 3, 16], indices shape: [1, 3, 16]
 
     """
@@ -478,15 +473,9 @@ class MaxPool2D(layers.Layer):
           output = MaxPool2D(input)
           # output.shape [1, 3, 16, 16]
 
-<<<<<<< HEAD
           # for return_mask=True
-          MaxPool2d = nn.MaxPool2d(kernel_size=2,stride=2, padding=0, return_mask=True)
-          output, max_indices = MaxPool2d(input)
-=======
-          # for return_indices=True
-          MaxPool2D = nn.MaxPool2D(kernel_size=2,stride=2, padding=0, return_indices=True)
+          MaxPool2D = nn.MaxPool2D(kernel_size=2,stride=2, padding=0, return_mask=True)
           output, max_indices = MaxPool2D(input)
->>>>>>> 7c1aa0d69dd21d7db98b1c46873f3a028e344e95
           # output.shape [1, 3, 16, 16], max_indices.shape [1, 3, 16, 16],
     """
 
@@ -575,15 +564,9 @@ class MaxPool3D(layers.Layer):
           output = MaxPool3D(input)
           # output.shape [1, 2, 3, 16, 16]
 
-<<<<<<< HEAD
           # for return_mask=True
-          MaxPool3d = nn.MaxPool3d(kernel_size=2,stride=2, padding=0, return_mask=True)
-          output, max_indices = MaxPool3d(input)
-=======
-          # for return_indices=True
-          MaxPool3D = nn.MaxPool3D(kernel_size=2,stride=2, padding=0, return_indices=True)
+          MaxPool3D = nn.MaxPool3D(kernel_size=2,stride=2, padding=0, return_mask=True)
           output, max_indices = MaxPool3D(input)
->>>>>>> 7c1aa0d69dd21d7db98b1c46873f3a028e344e95
           # output.shape [1, 2, 3, 16, 16], max_indices.shape [1, 2, 3, 16, 16],
     """
 
@@ -915,26 +898,15 @@ class AdaptiveMaxPool1D(layers.Layer):
           pool_out = AdaptiveMaxPool1D(data)
           # pool_out shape: [1, 3, 16]
 
-<<<<<<< HEAD
           # for return_mask = true
-          AdaptiveMaxPool1d = nn.AdaptiveMaxPool1d(output_size=16, return_mask=True)
-          pool_out, indices = AdaptiveMaxPool1d(data)
-=======
-          # for return_indices = true
-          AdaptiveMaxPool1D = nn.AdaptiveMaxPool1D(output_size=16, return_indices=True)
+          AdaptiveMaxPool1D = nn.AdaptiveMaxPool1D(output_size=16, return_mask=True)
           pool_out, indices = AdaptiveMaxPool1D(data)
->>>>>>> 7c1aa0d69dd21d7db98b1c46873f3a028e344e95
           # pool_out shape: [1, 3, 16], indices shape: [1, 3, 16]
 
     """
 
-<<<<<<< HEAD
     def __init__(self, output_size, return_mask=False, name=None):
-        super(AdaptiveMaxPool1d, self).__init__()
-=======
-    def __init__(self, output_size, return_indices=False, name=None):
         super(AdaptiveMaxPool1D, self).__init__()
->>>>>>> 7c1aa0d69dd21d7db98b1c46873f3a028e344e95
         self.output_size = output_size
         self.return_mask = return_mask
         self.name = name
@@ -993,21 +965,12 @@ class AdaptiveMaxPool2D(layers.Layer):
             paddle.disable_static()
             input_data = np.random.rand(2, 3, 32, 32)
             x = paddle.to_tensor(input_data)
-<<<<<<< HEAD
-            adaptive_max_pool = paddle.nn.AdaptiveMaxPool2d(output_size=3, return_mask=True)
+            adaptive_max_pool = paddle.nn.AdaptiveMaxPool2D(output_size=3, return_mask=True)
             pool_out, indices = adaptive_max_pool(x = x)
     """
 
     def __init__(self, output_size, return_mask=False, name=None):
-        super(AdaptiveMaxPool2d, self).__init__()
-=======
-            adaptive_max_pool = paddle.nn.AdaptiveMaxPool2D(output_size=3, return_indices=True)
-            pool_out, indices = adaptive_max_pool(x = x)
-    """
-
-    def __init__(self, output_size, return_indices=False, name=None):
         super(AdaptiveMaxPool2D, self).__init__()
->>>>>>> 7c1aa0d69dd21d7db98b1c46873f3a028e344e95
         self._output_size = output_size
         self._return_mask = return_mask
         self._name = name
@@ -1077,23 +1040,14 @@ class AdaptiveMaxPool3D(layers.Layer):
             pool = paddle.nn.AdaptiveMaxPool3D(output_size=4)
             out = pool(x)
             # out shape: [2, 3, 4, 4, 4]
-<<<<<<< HEAD
-            pool = paddle.nn.AdaptiveMaxPool3d(output_size=3, return_mask=True)
-=======
-            pool = paddle.nn.AdaptiveMaxPool3D(output_size=3, return_indices=True)
->>>>>>> 7c1aa0d69dd21d7db98b1c46873f3a028e344e95
+            pool = paddle.nn.AdaptiveMaxPool3D(output_size=3, return_mask=True)
             out, indices = pool(x)
             # out shape: [2, 3, 4, 4, 4], indices shape: [2, 3, 4, 4, 4]
 
     """
 
-<<<<<<< HEAD
     def __init__(self, output_size, return_mask=False, name=None):
-        super(AdaptiveMaxPool3d, self).__init__()
-=======
-    def __init__(self, output_size, return_indices=False, name=None):
         super(AdaptiveMaxPool3D, self).__init__()
->>>>>>> 7c1aa0d69dd21d7db98b1c46873f3a028e344e95
         self._output_size = output_size
         self._return_mask = return_mask
         self._name = name