Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add roi pool #35084

Merged
merged 6 commits into from
Sep 27, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
109 changes: 109 additions & 0 deletions python/paddle/tests/test_ops_roi_pool.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,109 @@
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import unittest
import numpy as np

import paddle
from paddle.vision.ops import roi_pool, RoIPool


class TestRoIPool(unittest.TestCase):
def setUp(self):
self.data = np.random.rand(1, 256, 32, 32).astype('float32')
boxes = np.random.rand(3, 4)
boxes[:, 2] += boxes[:, 0] + 3
boxes[:, 3] += boxes[:, 1] + 4
self.boxes = boxes.astype('float32')
self.boxes_num = np.array([3], dtype=np.int32)

def roi_pool_functional(self, output_size):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

结论正确性,可以 复用

这个文件里的函数,抽取公共函数,进行测试。

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

好的 正确性在test_value里 有验证具体计算的结果


if isinstance(output_size, int):
output_shape = (3, 256, output_size, output_size)
else:
output_shape = (3, 256, output_size[0], output_size[1])

if paddle.in_dynamic_mode():
data = paddle.to_tensor(self.data)
boxes = paddle.to_tensor(self.boxes)
boxes_num = paddle.to_tensor(self.boxes_num)

pool_out = roi_pool(
data, boxes, boxes_num=boxes_num, output_size=output_size)
np.testing.assert_equal(pool_out.shape, output_shape)

else:
data = paddle.static.data(
shape=self.data.shape, dtype=self.data.dtype, name='data')
boxes = paddle.static.data(
shape=self.boxes.shape, dtype=self.boxes.dtype, name='boxes')
boxes_num = paddle.static.data(
shape=self.boxes_num.shape,
dtype=self.boxes_num.dtype,
name='boxes_num')

pool_out = roi_pool(
data, boxes, boxes_num=boxes_num, output_size=output_size)

place = paddle.CPUPlace()
exe = paddle.static.Executor(place)

pool_out = exe.run(paddle.static.default_main_program(),
feed={
'data': self.data,
'boxes': self.boxes,
'boxes_num': self.boxes_num
},
fetch_list=[pool_out])

np.testing.assert_equal(pool_out[0].shape, output_shape)

def test_roi_pool_functional_dynamic(self):
self.roi_pool_functional(3)
self.roi_pool_functional(output_size=(3, 4))

def test_roi_pool_functional_static(self):
paddle.enable_static()
self.roi_pool_functional(3)
paddle.disable_static()

def test_RoIPool(self):
roi_pool_c = RoIPool(output_size=(4, 3))
data = paddle.to_tensor(self.data)
boxes = paddle.to_tensor(self.boxes)
boxes_num = paddle.to_tensor(self.boxes_num)

pool_out = roi_pool_c(data, boxes, boxes_num)
np.testing.assert_equal(pool_out.shape, (3, 256, 4, 3))

def test_value(self, ):
data = np.array([i for i in range(1, 17)]).reshape(1, 1, 4,
4).astype(np.float32)
boxes = np.array(
[[1., 1., 2., 2.], [1.5, 1.5, 3., 3.]]).astype(np.float32)
boxes_num = np.array([2]).astype(np.int32)
output = np.array([[[[11.]]], [[[16.]]]], dtype=np.float32)

data = paddle.to_tensor(data)
boxes = paddle.to_tensor(boxes)
boxes_num = paddle.to_tensor(boxes_num)

roi_pool_c = RoIPool(output_size=1)
pool_out = roi_pool_c(data, boxes, boxes_num)
np.testing.assert_almost_equal(pool_out.numpy(), output)


if __name__ == '__main__':
unittest.main()
125 changes: 125 additions & 0 deletions python/paddle/vision/ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,8 @@
'DeformConv2D',
'read_file',
'decode_jpeg',
'roi_pool',
'RoIPool',
'psroi_pool',
'PSRoIPool',
]
Expand Down Expand Up @@ -1013,3 +1015,126 @@ def __init__(self, output_size, spatial_scale=1.0):
def forward(self, x, boxes, boxes_num):
return psroi_pool(x, boxes, boxes_num, self.output_size,
self.spatial_scale)


def roi_pool(x, boxes, boxes_num, output_size, spatial_scale=1.0, name=None):
"""
This operator implements the roi_pooling layer.
Region of interest pooling (also known as RoI pooling) is to perform max pooling on inputs of nonuniform sizes to obtain fixed-size feature maps (e.g. 7*7).
The operator has three steps: 1. Dividing each region proposal into equal-sized sections with output_size(h, w) 2. Finding the largest value in each section 3. Copying these max values to the output buffer
For more information, please refer to https://stackoverflow.com/questions/43430056/what-is-roi-layer-in-fast-rcnn.

Args:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Args 前有个空行

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

done

x (Tensor): input feature, 4D-Tensor with the shape of [N,C,H,W],
where N is the batch size, C is the input channel, H is Height, W is weight.
The data type is float32 or float64.
boxes (Tensor): boxes (Regions of Interest) to pool over.
2D-Tensor with the shape of [num_boxes,4].
Given as [[x1, y1, x2, y2], ...], (x1, y1) is the top left coordinates,
and (x2, y2) is the bottom right coordinates.
boxes_num (Tensor): the number of RoIs in each image, data type is int32. Default: None
output_size (int or tuple[int, int]): the pooled output size(h, w), data type is int32. If int, h and w are both equal to output_size.
spatial_scale (float, optional): multiplicative spatial scale factor to translate ROI coords from their input scale to the scale used when pooling. Default: 1.0
name(str, optional): for detailed information, please refer to :ref:`api_guide_Name`. Usually name is no need to set and None by default.

Returns:
pool_out (Tensor): the pooled feature, 4D-Tensor with the shape of [num_boxes, C, output_size[0], output_size[1]].

Examples:
.. code-block:: python

import paddle
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

import paddle 前要加一行空行

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

done

from paddle.vision.ops import roi_pool

data = paddle.rand([1, 256, 32, 32])
boxes = paddle.rand([3, 4])
boxes[:, 2] += boxes[:, 0] + 3
boxes[:, 3] += boxes[:, 1] + 4
boxes_num = paddle.to_tensor([3]).astype('int32')
pool_out = roi_pool(data, boxes, boxes_num=boxes_num, output_size=3)
assert pool_out.shape == [3, 256, 3, 3], ''
"""

check_type(output_size, 'output_size', (int, tuple), 'roi_pool')
if isinstance(output_size, int):
output_size = (output_size, output_size)

pooled_height, pooled_width = output_size
if in_dygraph_mode():
assert boxes_num is not None, "boxes_num should not be None in dygraph mode."
pool_out, argmaxes = core.ops.roi_pool(
x, boxes, boxes_num, "pooled_height", pooled_height, "pooled_width",
pooled_width, "spatial_scale", spatial_scale)
return pool_out

else:
check_variable_and_dtype(x, 'x', ['float32'], 'roi_pool')
check_variable_and_dtype(boxes, 'boxes', ['float32'], 'roi_pool')
helper = LayerHelper('roi_pool', **locals())
dtype = helper.input_dtype()
pool_out = helper.create_variable_for_type_inference(dtype)
argmaxes = helper.create_variable_for_type_inference(dtype='int32')

inputs = {
"X": x,
"ROIs": boxes,
}
if boxes_num is not None:
inputs['RoisNum'] = boxes_num
helper.append_op(
type="roi_pool",
inputs=inputs,
outputs={"Out": pool_out,
"Argmax": argmaxes},
attrs={
"pooled_height": pooled_height,
"pooled_width": pooled_width,
"spatial_scale": spatial_scale
})
return pool_out


class RoIPool(Layer):
"""
This interface is used to construct a callable object of the `RoIPool` class. Please
refer to :ref:`api_paddle_vision_ops_roi_pool`.

Args:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Args: 前加个空行

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

done

output_size (int or tuple[int, int]): the pooled output size(h, w), data type is int32. If int, h and w are both equal to output_size.
spatial_scale (float, optional): multiplicative spatial scale factor to translate ROI coords from their input scale to the scale used when pooling. Default: 1.0.

Returns:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Returns 前有个空行

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

done

pool_out (Tensor): the pooled feature, 4D-Tensor with the shape of [num_boxes, C, output_size[0], output_size[1]].

Examples:
.. code-block:: python

import paddle
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

import paddle 前有个空行

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

done

from paddle.vision.ops import RoIPool

data = paddle.rand([1, 256, 32, 32])
boxes = paddle.rand([3, 4])
boxes[:, 2] += boxes[:, 0] + 3
boxes[:, 3] += boxes[:, 1] + 4
boxes_num = paddle.to_tensor([3]).astype('int32')
roi_pool = RoIPool(output_size=(4, 3))
pool_out = roi_pool(data, boxes, boxes_num)
assert pool_out.shape == [3, 256, 4, 3], ''
"""

def __init__(self, output_size, spatial_scale=1.0):
super(RoIPool, self).__init__()
self._output_size = output_size
self._spatial_scale = spatial_scale

def forward(self, x, boxes, boxes_num):
return roi_pool(
x=x,
boxes=boxes,
boxes_num=boxes_num,
output_size=self._output_size,
spatial_scale=self._spatial_scale)

def extra_repr(self):
main_str = 'output_size={_output_size}, spatial_scale={_spatial_scale}'
return main_str.format(**self.__dict__)