PaddlePaddle · luotao1 · Jul 2, 2024 · Mar 24, 2024 · Mar 24, 2024 · Mar 25, 2024
diff --git a/paddle/phi/api/yaml/backward.yaml b/paddle/phi/api/yaml/backward.yaml
@@ -1543,17 +1543,17 @@
     func : matrix_power_grad
 
 - backward_op : max_pool2d_with_index_grad
-  forward : max_pool2d_with_index(Tensor x, int[] kernel_size, int[] strides = {1, 1}, int[] paddings = {0, 0}, bool global_pooling = false, bool adaptive = false) -> Tensor(out), Tensor(mask)
-  args : (Tensor x, Tensor mask, Tensor out_grad, int[] kernel_size, int[] strides, int[] paddings, bool global_pooling, bool adaptive)
+  forward : max_pool2d_with_index(Tensor x, int[] kernel_size, int[] strides = {1, 1}, int[] paddings = {0, 0}, bool global_pooling = false, bool adaptive = false, bool ceil_mode = false) -> Tensor(out), Tensor(mask)
+  args : (Tensor x, Tensor mask, Tensor out_grad, int[] kernel_size, int[] strides, int[] paddings, bool global_pooling, bool adaptive, bool ceil_mode = false)
   output : Tensor(x_grad)
   infer_meta :
     func : MaxPoolWithIndexGradInferMeta
   kernel :
     func : max_pool2d_with_index_grad
 
 - backward_op : max_pool3d_with_index_grad
-  forward : max_pool3d_with_index(Tensor x, int[] kernel_size, int[] strides = {1, 1, 1}, int[] paddings = {0, 0, 0}, bool global_pooling = false, bool adaptive = false) -> Tensor(out), Tensor(mask)
-  args : (Tensor x, Tensor mask, Tensor out_grad, int[] kernel_size, int[] strides, int[] paddings, bool global_pooling, bool adaptive)
+  forward : max_pool3d_with_index(Tensor x, int[] kernel_size, int[] strides = {1, 1, 1}, int[] paddings = {0, 0, 0}, bool global_pooling = false, bool adaptive = false, bool ceil_mode = false) -> Tensor(out), Tensor(mask)
+  args : (Tensor x, Tensor mask, Tensor out_grad, int[] kernel_size, int[] strides, int[] paddings, bool global_pooling, bool adaptive, bool ceil_mode = false)
   output : Tensor(x_grad)
   infer_meta :
     func : MaxPoolWithIndexGradInferMeta

diff --git a/paddle/phi/api/yaml/ops.yaml b/paddle/phi/api/yaml/ops.yaml
@@ -1877,7 +1877,7 @@
   backward : matrix_power_grad
 
 - op : max_pool2d_with_index
-  args : (Tensor x, int[] kernel_size, int[] strides= {1, 1}, int[] paddings = {0, 0}, bool global_pooling = false, bool adaptive = false)
+  args : (Tensor x, int[] kernel_size, int[] strides= {1, 1}, int[] paddings = {0, 0}, bool global_pooling = false, bool adaptive = false, bool ceil_mode = false)
   output : Tensor(out), Tensor(mask)
   infer_meta :
     func : MaxPoolWithIndexInferMeta
@@ -1886,7 +1886,7 @@
   backward : max_pool2d_with_index_grad
 
 - op : max_pool3d_with_index
-  args : (Tensor x, int[] kernel_size, int[] strides = {1, 1, 1}, int[] paddings = {0, 0, 0}, bool global_pooling = false, bool adaptive = false)
+  args : (Tensor x, int[] kernel_size, int[] strides = {1, 1, 1}, int[] paddings = {0, 0, 0}, bool global_pooling = false, bool adaptive = false, bool ceil_mode = false)
   output : Tensor(out), Tensor(mask)
   infer_meta :
     func : MaxPoolWithIndexInferMeta

diff --git a/paddle/phi/infermeta/backward.cc b/paddle/phi/infermeta/backward.cc
@@ -710,6 +710,7 @@ void MaxPoolWithIndexGradInferMeta(const MetaTensor& x,
                                    const std::vector<int>& paddings,
                                    bool global_pooling,
                                    bool adaptive,
+                                   bool ceil_mode,
                                    MetaTensor* dx) {
   dx->share_meta(x);
 }

diff --git a/paddle/phi/infermeta/backward.h b/paddle/phi/infermeta/backward.h
@@ -339,6 +339,7 @@ void MaxPoolWithIndexGradInferMeta(const MetaTensor& x,
                                    const std::vector<int>& paddings,
                                    bool global_pooling,
                                    bool adaptive,
+                                   bool ceil_mode,
                                    MetaTensor* dx);
 
 void MeshgridGradInferMeta(const std::vector<const MetaTensor*>& inputs,

diff --git a/paddle/phi/infermeta/unary.cc b/paddle/phi/infermeta/unary.cc
@@ -2373,6 +2373,7 @@ void MaxPoolWithIndexInferMeta(const MetaTensor& x,
                                const std::vector<int>& paddings,
                                bool global_pooling,
                                bool adaptive,
+                               bool ceil_mode,
                                MetaTensor* out,
                                MetaTensor* mask,
                                MetaConfig config) {
@@ -2431,7 +2432,8 @@ void MaxPoolWithIndexInferMeta(const MetaTensor& x,
             funcs::MaxPoolOutputSize(static_cast<int>(x_dims[i + 2]),
                                      kernel_size_[i],
                                      paddings_[i],
-                                     strides[i]));
+                                     strides[i],
+                                     ceil_mode));
       }
     }
   }

diff --git a/paddle/phi/infermeta/unary.h b/paddle/phi/infermeta/unary.h
@@ -364,6 +364,7 @@ void MaxPoolWithIndexInferMeta(const MetaTensor& x,
                                const std::vector<int>& paddings,
                                bool global_pooling,
                                bool adaptive,
+                               bool ceil_mode,
                                MetaTensor* out,
                                MetaTensor* mask,
                                MetaConfig config = MetaConfig());

diff --git a/paddle/phi/kernels/funcs/pooling.h b/paddle/phi/kernels/funcs/pooling.h
@@ -502,17 +502,18 @@ inline int PoolOutputSize(int input_size,
   return output_size;
 }
 
-inline int MaxPoolOutputSize(int input_size,
-                             int filter_size,
-                             int padding,
-                             int stride) {
+inline int MaxPoolOutputSize(
+    int input_size, int filter_size, int padding, int stride, bool ceil_mode) {
   PADDLE_ENFORCE_NE(
       stride,
       0,
       phi::errors::InvalidArgument(
           "The stride of MaxPool shall not be 0, but received %d.", stride));
-  int output_size = (input_size - filter_size + 2 * padding) / stride + 1;
-  return output_size;
+  if (ceil_mode) {
+    return (input_size - filter_size + 2 * padding + stride - 1) / stride + 1;
+  } else {
+    return (input_size - filter_size + 2 * padding) / stride + 1;
+  }
 }
 
 template <typename T = int>

diff --git a/paddle/phi/kernels/impl/pool_grad_kernel_impl.h b/paddle/phi/kernels/impl/pool_grad_kernel_impl.h
@@ -15,6 +15,7 @@ limitations under the License. */
 #pragma once
 
 #include "paddle/common/ddim.h"
+#include "paddle/common/macros.h"
 #include "paddle/phi/kernels/funcs/math_function.h"
 #include "paddle/phi/kernels/funcs/pooling.h"
 #include "paddle/phi/kernels/pool_grad_kernel.h"
@@ -262,6 +263,7 @@ void MaxPool2dWithIndexGradKernel(const Context& ctx,
                                   const std::vector<int>& paddings,
                                   bool global_pooling,
                                   bool adaptive,
+                                  bool ceil_mode UNUSED,
                                   DenseTensor* dx) {
   MaxPoolWithIndexGradRawKernel<Context, T>(ctx,
                                             x,
@@ -317,6 +319,7 @@ void MaxPool3dWithIndexGradKernel(const Context& ctx,
                                   const std::vector<int>& paddings,
                                   bool global_pooling,
                                   bool adaptive,
+                                  bool ceil_mode UNUSED,
                                   DenseTensor* dx) {
   MaxPoolWithIndexGradRawKernel<Context, T>(ctx,
                                             x,

diff --git a/paddle/phi/kernels/impl/pool_kernel_impl.h b/paddle/phi/kernels/impl/pool_kernel_impl.h
@@ -260,6 +260,7 @@ void MaxPool2dWithIndexKernel(const Context& ctx,
                               const std::vector<int>& paddings,
                               bool global_pooling,
                               bool adaptive,
+                              bool ceil_mode UNUSED,
                               DenseTensor* out,
                               DenseTensor* mask) {
   MaxPoolWithIndexRawKernel<Context, T>(ctx,
@@ -309,6 +310,7 @@ void MaxPool3dWithIndexKernel(const Context& ctx,
                               const std::vector<int>& paddings,
                               bool global_pooling,
                               bool adaptive,
+                              bool ceil_mode UNUSED,
                               DenseTensor* out,
                               DenseTensor* mask) {
   MaxPoolWithIndexRawKernel<Context, T>(ctx,

diff --git a/paddle/phi/kernels/pool_grad_kernel.h b/paddle/phi/kernels/pool_grad_kernel.h
@@ -96,6 +96,7 @@ void MaxPool2dWithIndexGradKernel(const Context& ctx,
                                   const std::vector<int>& paddings,
                                   bool global_pooling,
                                   bool adaptive,
+                                  bool ceil_mode,
                                   DenseTensor* dx);
 
 template <typename T, typename Context>
@@ -142,6 +143,7 @@ void MaxPool3dWithIndexGradKernel(const Context& ctx,
                                   const std::vector<int>& paddings,
                                   bool global_pooling,
                                   bool adaptive,
+                                  bool ceil_mode,
                                   DenseTensor* dx);
 
 template <typename T, typename Context>

diff --git a/paddle/phi/kernels/pool_kernel.h b/paddle/phi/kernels/pool_kernel.h
@@ -60,6 +60,7 @@ void MaxPool2dWithIndexKernel(const Context& ctx,
                               const std::vector<int>& paddings,
                               bool global_pooling,
                               bool adaptive,
+                              bool ceil_mode,
                               DenseTensor* out,
                               DenseTensor* mask);
 
@@ -101,6 +102,7 @@ void MaxPool3dWithIndexKernel(const Context& ctx,
                               const std::vector<int>& paddings,
                               bool global_pooling,
                               bool adaptive,
+                              bool ceil_mode,
                               DenseTensor* out,
                               DenseTensor* mask);
 

diff --git a/paddle/phi/kernels/xpu/pool_grad_kernel.cc b/paddle/phi/kernels/xpu/pool_grad_kernel.cc
@@ -386,6 +386,7 @@ void MaxPool2dWithIndexGradKernel(const Context& ctx,
                                   const std::vector<int>& paddings_t,
                                   bool global_pooling,
                                   bool adaptive,
+                                  bool ceil_mode UNUSED,
                                   DenseTensor* dx) {
   using XPUType = typename XPUTypeTrait<T>::Type;
 

diff --git a/paddle/phi/kernels/xpu/pool_kernel.cc b/paddle/phi/kernels/xpu/pool_kernel.cc
@@ -14,6 +14,7 @@
 
 #include "paddle/phi/kernels/pool_kernel.h"
 
+#include "paddle/common/macros.h"
 #include "paddle/phi/backends/xpu/enforce_xpu.h"
 #include "paddle/phi/core/kernel_registry.h"
 #include "paddle/phi/kernels/funcs/pooling.h"
@@ -308,6 +309,7 @@ void MaxPool2dWithIndexKernel(const Context& ctx,
                               const std::vector<int>& paddings_t,
                               bool global_pooling,
                               bool adaptive,
+                              bool ceil_mode UNUSED,
                               DenseTensor* out,
                               DenseTensor* mask) {
   using XPUType = typename XPUTypeTrait<T>::Type;

diff --git a/python/paddle/nn/functional/pooling.py b/python/paddle/nn/functional/pooling.py
@@ -629,7 +629,7 @@ def max_pool1d(
     if in_dynamic_or_pir_mode():
         if return_mask:
             pool_out = _C_ops.max_pool2d_with_index(
-                x, kernel_size, stride, padding, False, False
+                x, kernel_size, stride, padding, False, False, ceil_mode
             )
             return (
                 (squeeze(pool_out[0], [2]), squeeze(pool_out[1], [2]))
@@ -1201,7 +1201,7 @@ def max_pool2d(
     if in_dynamic_or_pir_mode():
         if return_mask:
             output = _C_ops.max_pool2d_with_index(
-                x, kernel_size, stride, padding, False, False
+                x, kernel_size, stride, padding, False, False, ceil_mode
             )
             return output if return_mask else output[0]
         else:
@@ -1366,7 +1366,7 @@ def max_pool3d(
     if in_dynamic_or_pir_mode():
         if return_mask:
             output = _C_ops.max_pool3d_with_index(
-                x, kernel_size, stride, padding, False, False
+                x, kernel_size, stride, padding, False, False, ceil_mode
             )
             return output if return_mask else output[0]
         else:
@@ -1816,7 +1816,7 @@ def adaptive_max_pool1d(x, output_size, return_mask=False, name=None):
     x = unsqueeze(x, [2])
     if in_dygraph_mode():
         pool_out = _C_ops.max_pool2d_with_index(
-            x, pool_size, [1, 1], [0, 0], False, True
+            x, pool_size, [1, 1], [0, 0], False, True, False
         )
         return (
             (squeeze(pool_out[0], [2]), squeeze(pool_out[1], [2]))
@@ -1847,6 +1847,7 @@ def adaptive_max_pool1d(x, output_size, return_mask=False, name=None):
                 "pooling_type": 'max',
                 "ksize": pool_size,
                 "adaptive": True,
+                "ceil_mode": False,
             },
         )
 
@@ -1910,7 +1911,7 @@ def adaptive_max_pool2d(x, output_size, return_mask=False, name=None):
             output_size[1] = in_w
     if in_dygraph_mode():
         pool_out = _C_ops.max_pool2d_with_index(
-            x, output_size, [1, 1], [0, 0], False, True
+            x, output_size, [1, 1], [0, 0], False, True, False
         )
         return pool_out if return_mask else pool_out[0]
     else:
@@ -1937,6 +1938,7 @@ def adaptive_max_pool2d(x, output_size, return_mask=False, name=None):
                 "pooling_type": 'max',
                 "ksize": output_size,
                 "adaptive": True,
+                "ceil_mode": False,
             },
         )
         return (pool_out, mask) if return_mask else pool_out
@@ -2002,7 +2004,7 @@ def adaptive_max_pool3d(x, output_size, return_mask=False, name=None):
     if in_dygraph_mode():
         # By default, strides is [1,1,1] and paddings is [0, 0, 0]
         pool_out = _C_ops.max_pool3d_with_index(
-            x, output_size, [1, 1, 1], [0, 0, 0], False, True
+            x, output_size, [1, 1, 1], [0, 0, 0], False, True, False
         )
         return pool_out if return_mask else pool_out[0]
     else:
@@ -2029,6 +2031,7 @@ def adaptive_max_pool3d(x, output_size, return_mask=False, name=None):
                 "pooling_type": 'max',
                 "ksize": output_size,
                 "adaptive": True,
+                "ceil_mode": False,
             },
         )
 

diff --git a/python/paddle/signal.py b/python/paddle/signal.py
@@ -362,6 +362,7 @@ def stft(
         ), f'expected a 1D window tensor of size equal to win_length({win_length}), but got window with shape {window.shape}.'
     else:
         window = paddle.ones(shape=(win_length,), dtype=x.dtype)
+        window = paddle.to_tensor(window, place=x.place)
 
     if win_length < n_fft:
         pad_left = (n_fft - win_length) // 2

diff --git a/test/legacy_test/test_pool1d_api.py b/test/legacy_test/test_pool1d_api.py
@@ -296,6 +296,22 @@ def check_avg_dygraph_padding_same(self, place):
 
             np.testing.assert_allclose(result.numpy(), result_np, rtol=1e-05)
 
+    def check_max_pool_return_mask_ceil(self, place):
+        with base.dygraph.guard(place):
+            input_np = np.random.random([1, 3, 6]).astype("float32")
+            input = paddle.to_tensor(input_np)
+            result, _ = F.max_pool1d(
+                input, kernel_size=5, ceil_mode=True, return_mask=True
+            )
+            result_np = max_pool1D_forward_naive(
+                input_np,
+                ksize=[5],
+                strides=[5],
+                paddings=[0],
+                ceil_mode=True,
+            )
+            np.testing.assert_allclose(result.numpy(), result_np, rtol=1e-05)
+
     def test_pool1d(self):
         for place in self.places:
             self.check_max_dygraph_results(place)
@@ -306,6 +322,7 @@ def test_pool1d(self):
             self.check_avg_dygraph_padding_same(place)
             self.check_max_dygraph_return_index_results(place)
             self.check_avg_static_results_fp16(place)
+            self.check_max_pool_return_mask_ceil(place)
 
 
 class TestPool1DError_API(unittest.TestCase):

diff --git a/test/legacy_test/test_pool2d_api.py b/test/legacy_test/test_pool2d_api.py
@@ -357,6 +357,22 @@ def check_avg_divisor(self, place):
             result = avg_pool2d_dg(input)
             np.testing.assert_allclose(result.numpy(), result_np, rtol=1e-05)
 
+    def check_max_pool_return_mask_ceil(self, place):
+        with base.dygraph.guard(place):
+            input_np = np.random.random([2, 3, 33, 33]).astype("float32")
+            input = paddle.to_tensor(input_np)
+            result, _ = max_pool2d(
+                input, kernel_size=5, ceil_mode=True, return_mask=True
+            )
+            result_np = pool2D_forward_naive(
+                input_np,
+                ksize=[5, 5],
+                strides=[5, 5],
+                paddings=[0, 0],
+                ceil_mode=True,
+            )
+            np.testing.assert_allclose(result.numpy(), result_np, rtol=1e-05)
+
     def test_pool2d(self):
         for place in self.places:
             self.check_max_dygraph_results(place)
@@ -368,6 +384,7 @@ def test_pool2d(self):
             self.check_max_dygraph_padding_results(place)
             self.check_max_dygraph_ceilmode_results(place)
             self.check_max_dygraph_nhwc_results(place)
+            self.check_max_pool_return_mask_ceil(place)
 
     @test_with_pir_api
     def test_pool2d_static(self):

diff --git a/test/legacy_test/test_pool3d_api.py b/test/legacy_test/test_pool3d_api.py
@@ -356,6 +356,22 @@ def check_avg_divisor(self, place):
             )
             np.testing.assert_allclose(result.numpy(), result_np, rtol=1e-05)
 
+    def check_max_pool_return_mask_ceil(self, place):
+        with base.dygraph.guard(place):
+            input_np = np.random.random([1, 2, 6, 33, 33]).astype("float32")
+            input = paddle.to_tensor(input_np)
+            result, _ = max_pool3d(
+                input, kernel_size=5, ceil_mode=True, return_mask=True
+            )
+            result_np = pool3D_forward_naive(
+                input_np,
+                ksize=[5, 5, 5],
+                strides=[5, 5, 5],
+                paddings=[0, 0, 0],
+                ceil_mode=True,
+            )
+            np.testing.assert_allclose(result.numpy(), result_np, rtol=1e-05)
+
     def test_pool3d(self):
         paddle.enable_static()
         for place in self.places:
@@ -368,6 +384,7 @@ def test_pool3d(self):
             self.check_avg_divisor(place)
             self.check_max_dygraph_ndhwc_results(place)
             self.check_max_dygraph_ceilmode_results(place)
+            self.check_max_pool_return_mask_ceil(place)
 
     @test_with_pir_api
     def test_static_fp16_gpu(self):