Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 15 additions & 12 deletions PaddleCV/rrpn/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@

## 安装

在当前目录下运行样例代码需要PadddlePaddle Fluid的develop或以上的版本。如果你的运行环境中的PaddlePaddle低于此版本,请根据[安装文档](http://www.paddlepaddle.org/)中的说明来更新PaddlePaddle。
在当前目录下运行样例代码需要PadddlePaddle Fluid的1.8.0或以上的版本。如果你的运行环境中的PaddlePaddle低于此版本,请根据[安装文档](http://www.paddlepaddle.org/)中的说明来更新PaddlePaddle。


## 简介
Expand All @@ -27,16 +27,23 @@ RRPN是在Faster RCNN基础上拓展出的两阶段目标检测器,可用于

### 编译自定义OP

**注意:** 通过pip方式安装的PaddlePaddle由GCC 4.8编译得到,由于GCC 4.8和GCC 5以上C++11 ABI不兼容,您编写的自定义OP,需要通过GCC 4.8编译。若是GCC 5及以上的环境上使用自定义OP,推荐使用Docker安装PaddlePaddle,使得编Paddle和编译自定义OP的GCC版本相同。

自定义OP编译方式如下:

进入 `models/ext_op/src` 目录,执行编译脚本
```
cd models/ext_op/src
sh make.sh ${cuda_path} ${cudnn_path} ${nccl_path}
'''
```
其中${cuda_path}、$cudnn_path}和{nccl_path}分别为cuda、cudnn、nccl的安装路径,需通过命令行进行指定
成功编译后,`ext_op/src` 目录下将会生成 `rrpn_lib.so`

成功编译后,`ext_op/src` 目录下将会生成 `rrpn_lib.so`。
需要将`rrpn_lib.so`所在路径以及libpaddle_framework.so路径(即paddle.sysconfig.get_lib()得到路径)设置到环境变量LD_LIBRARY_PATH中:
```
# 假如rrpn_lib.so路径是:`rrpn/models/ext_op/src/`,对于Linux环境设置:
export LD_LIBRARY_PATH=rrpn/models/ext_op/src/:$( python -c 'import paddle; print(paddle.sysconfig.get_lib())'):$LD_LIBRARY_PATH
```

## 数据准备
### 公开数据集
在[ICDAR2015数据集](https://rrc.cvc.uab.es/?ch=4&com=downloads)上进行训练,数据集需进入官网进行注册后方可下载。
Expand All @@ -58,8 +65,8 @@ dataset/icdar2015/
│ ├── img_112.jpg
| ...
├── ch4_test_localization_transcription_gt
│ ├── img_111.jpg
│ ├── img_112.jpg
│ ├── img_111.txt
│ ├── img_112.txt
| ...
```
### 自定义数据
Expand Down Expand Up @@ -88,7 +95,7 @@ x1, y1, x2, y2, x3, y3, x4, y4, class_name
python train.py \
--model_save_dir=output/ \
--pretrained_model=${path_to_pretrain_model} \
--data_dir=${path_to_data} \
--data_dir=${path_to_icdar2015} \
```


Expand Down Expand Up @@ -126,7 +133,7 @@ x1, y1, x2, y2, x3, y3, x4, y4, class_name

```
python eval.py \
--dataset=icdar2015 \
--data_dir=${path_to_icdar2015} \
--pretrained_model=${path_to_trained_model}
```

Expand All @@ -143,10 +150,6 @@ RRPN
| [RRPN](https://paddleseg.bj.bcebos.com/deploy/temp/model_final.tar) |8 | 17500 | 0.8048 |






## 模型推断及可视化

模型推断可以获取图像中的物体及其对应的类别,`infer.py`是主要执行程序,调用示例如下:
Expand Down
54 changes: 33 additions & 21 deletions PaddleCV/rrpn/checkpoint.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,13 @@ def _load_state(path):
return state


def _strip_postfix(path):
path, ext = os.path.splitext(path)
assert ext in ['', '.pdparams', '.pdopt', '.pdmodel'], \
"Unknown postfix {} from weights".format(ext)
return path


def load_params(exe, prog, path):
"""
Load model from the given path.
Expand All @@ -50,20 +57,33 @@ def load_params(exe, prog, path):
path (string): URL string or loca model path.
"""

if not os.path.exists(path):
path = _strip_postfix(path)
if not (os.path.isdir(path) or os.path.exists(path + '.pdparams')):
raise ValueError("Model pretrain path {} does not "
"exists.".format(path))

logger.info('Loading parameters from {}...'.format(path))

def _if_exist(var):
param_exist = os.path.exists(os.path.join(path, var.name))
do_load = param_exist
if do_load:
logger.debug('load weight {}'.format(var.name))
return do_load
ignore_set = set()
state = _load_state(path)

fluid.io.load_vars(exe, path, prog, predicate=_if_exist)
# ignore the parameter which mismatch the shape
# between the model and pretrain weight.
all_var_shape = {}
for block in prog.blocks:
for param in block.all_parameters():
all_var_shape[param.name] = param.shape
ignore_set.update([
name for name, shape in all_var_shape.items()
if name in state and shape != state[name].shape
])

if len(ignore_set) > 0:
for k in ignore_set:
if k in state:
logger.warning('variable {} not used'.format(k))
del state[k]
fluid.io.set_program_state(prog, state)


def save(exe, prog, path):
Expand All @@ -83,6 +103,7 @@ def save(exe, prog, path):
def load_and_fusebn(exe, prog, path):
"""
Fuse params of batch norm to scale and bias.

Args:
exe (fluid.Executor): The fluid.Executor object.
prog (fluid.Program): save weight from which Program object.
Expand All @@ -104,19 +125,12 @@ def load_and_fusebn(exe, prog, path):
# x is any prefix
mean_variances = set()
bn_vars = []

state = None
if os.path.exists(path + '.pdparams'):
state = _load_state(path)
state = _load_state(path)

def check_mean_and_bias(prefix):
m = prefix + 'mean'
v = prefix + 'variance'
if state:
return v in state and m in state
else:
return (os.path.exists(os.path.join(path, m)) and
os.path.exists(os.path.join(path, v)))
return v in state and m in state

has_mean_bias = True

Expand Down Expand Up @@ -156,16 +170,14 @@ def check_mean_and_bias(prefix):
bn_vars.append(
[scale_name, bias_name, mean_name, variance_name])

if state:
fluid.io.set_program_state(prog, state)
else:
load_params(exe, prog, path)
if not has_mean_bias:
fluid.io.set_program_state(prog, state)
logger.warning(
"There is no paramters of batch norm in model {}. "
"Skip to fuse batch norm. And load paramters done.".format(path))
return

fluid.load(prog, path, exe)
eps = 1e-5
for names in bn_vars:
scale_name, bias_name, mean_name, var_name = names
Expand Down
2 changes: 1 addition & 1 deletion PaddleCV/rrpn/models/ext_op/rrpn_lib.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
import paddle.fluid as fluid
from paddle.fluid.layer_helper import LayerHelper
from paddle.fluid.framework import Variable
fluid.load_op_library('models/ext_op/src/rrpn_lib.so')
fluid.load_op_library('rrpn_lib.so')


def rrpn_target_assign(bbox_pred,
Expand Down
2 changes: 1 addition & 1 deletion PaddleCV/rrpn/models/ext_op/src/make.sh
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ git clone https://github.com/NVlabs/cub.git

nvcc rrpn_generate_proposals_op.cu -c -o rrpn_generate_proposals_op.cu.o -ccbin cc -DPADDLE_WITH_MKLDNN -DPADDLE_WITH_CUDA -DEIGEN_USE_GPU -DPADDLE_USE_DSO -Xcompiler -fPIC -std=c++11 -Xcompiler -fPIC -w --expt-relaxed-constexpr -O3 -DNVCC \
-I ${include_dir} \
-I ${include_dir}/third_party \
-I ${include_dir}/third_party \
-I ${CUDA}/include \
-I ${CUDNN}/include \
-I ${NCCL}/include \
Expand Down
8 changes: 3 additions & 5 deletions PaddleCV/rrpn/models/ext_op/src/rrpn_rotated_roi_align_op.cc
Original file line number Diff line number Diff line change
Expand Up @@ -165,8 +165,7 @@ class RRPNRotatedROIAlignGradMaker : public framework::SingleGradOpMaker<T> {
using framework::SingleGradOpMaker<T>::SingleGradOpMaker;

protected:
std::unique_ptr<T> Apply() const override {
std::unique_ptr<T> op(new T);
void Apply(GradOpPtr<T> op) const override {
op->SetType("rrpn_rotated_roi_align_grad");
op->SetInput("X", this->Input("X"));
op->SetInput("ROIs", this->Input("ROIs"));
Expand All @@ -175,12 +174,11 @@ class RRPNRotatedROIAlignGradMaker : public framework::SingleGradOpMaker<T> {
op->SetInput(framework::GradVarName("Out"), this->OutputGrad("Out"));
op->SetOutput(framework::GradVarName("X"), this->InputGrad("X"));
op->SetAttrMap(this->Attrs());
return op;
}
};

DECLARE_NO_NEED_BUFFER_VARS_INFERENCE(
RRPNRotatedRoiAlignGradNoNeedBufVarsInferer, "X");
DECLARE_NO_NEED_BUFFER_VARS_INFERER(RRPNRotatedRoiAlignGradNoNeedBufVarsInferer,
"X");

} // namespace operators
} // namespace paddle
Expand Down
1 change: 0 additions & 1 deletion PaddleCV/rrpn/reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,6 @@ def reader():
continue
batch_out.append(datas)
end = time.time()
#print('reader time:', end - start)
if len(batch_out) == batch_size:
yield batch_out
count += 1
Expand Down
12 changes: 0 additions & 12 deletions PaddleCV/rrpn/roidbs.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,6 @@ def get_roidb(self):
elif edge2 >= edge1:
width = edge2
height = edge1
# print pt2[0], pt3[0]
if pt2[0] - pt3[0] != 0:
angle = -np.arctan(
float(pt2[1] - pt3[1]) /
Expand Down Expand Up @@ -160,7 +159,6 @@ def get_roidb(self):
else:
hard_boxes.append([x_ctr, y_ctr, width, height, angle])

#print(easy_boxes)
if self.mode == 'train':
boxes.extend(easy_boxes)
# hard box only get 1/3 for train
Expand All @@ -173,8 +171,6 @@ def get_roidb(self):
is_difficult = [0] * len(easy_boxes)
is_difficult.extend([1] * int(len(hard_boxes)))
len_of_bboxes = len(boxes)
#is_difficult = [0] * len(easy_boxes)
#is_difficult.extend([1] * int(len(hard_boxes)))
is_difficult = np.array(is_difficult).reshape(
1, len_of_bboxes).astype(np.int32)
if self.mode == 'train':
Expand Down Expand Up @@ -221,11 +217,9 @@ class ICDAR2017Dataset(object):
def __init__(self, mode):
print('Creating: {}'.format(cfg.dataset))
self.name = cfg.data_dir
#print('**************', self.name)
self.mode = mode
data_path = DatasetPath(mode, self.name)
data_dir = data_path.get_data_dir()
#print("&**************", data_dir)
file_list = data_path.get_file_list()
self.image_dir = data_dir
self.gt_dir = file_list
Expand All @@ -245,15 +239,12 @@ def get_roidb(self):
labels_map = get_labels_maps()
for image in image_list:
prefix = image[:-4]
#print(image)

if image.split('.')[-1] not in post_fix:
continue
img_name = os.path.join(self.image_dir, image)
gt_name = os.path.join(self.gt_dir, 'gt_' + prefix + '.txt')
gt_classes = []
#boxes = []
#hard_boxes = []
boxes = []
gt_obj = open(gt_name, 'r', encoding='UTF-8-sig')
gt_txt = gt_obj.read()
Expand Down Expand Up @@ -293,7 +284,6 @@ def get_roidb(self):
elif edge2 >= edge1:
width = edge2
height = edge1
# print pt2[0], pt3[0]
if pt2[0] - pt3[0] != 0:
angle = -np.arctan(
float(pt2[1] - pt3[1]) /
Expand All @@ -312,7 +302,6 @@ def get_roidb(self):
else:
boxes.append([x_ctr, y_ctr, width, height, angle])
len_of_bboxes = len(boxes)
#print(len_of_bboxes)
is_difficult = np.zeros((len_of_bboxes, 1), dtype=np.int32)
if self.mode == 'train':
gt_boxes = np.zeros((len_of_bboxes, 5), dtype=np.int32)
Expand All @@ -332,7 +321,6 @@ def get_roidb(self):
boxes[idx][3], boxes[idx][4], boxes[idx][5],
boxes[idx][6], boxes[idx][7]
]
#gt_classes[idx] = 1
if gt_boxes.shape[0] <= 0:
continue
gt_boxes = gt_boxes.astype(np.float64)
Expand Down
2 changes: 1 addition & 1 deletion PaddleCV/rrpn/utility.py
Original file line number Diff line number Diff line change
Expand Up @@ -154,7 +154,7 @@ def parse_args():
add_arg('pixel_means', float, [0.485, 0.456, 0.406], "pixel mean")
add_arg('nms_thresh', float, 0.3, "NMS threshold.")
add_arg('score_thresh', float, 0.01, "score threshold for NMS.")
add_arg('snapshot_stride', int, 1000, "save model every snapshot stride.")
add_arg('snapshot_iter', int, 1000, "save model every snapshot iter.")
# SINGLE EVAL AND DRAW
add_arg('draw_threshold', float, 0.8, "Confidence threshold to draw bbox.")
add_arg('image_path', str, 'ICDAR2015/tmp/', "The image path used to inference and visualize.")
Expand Down