Merge pull request PaddlePaddle#25 from LielinJiang/more-cls-models

add some image classification model
qingqing01 · Apr 8, 2020 · f2e17b2 · f2e17b2
2 parents 3c68292 + e6e2f65
commit f2e17b2
Show file tree

Hide file tree

Showing 10 changed files with 944 additions and 26 deletions.
diff --git a/datasets/folder.py b/datasets/folder.py
@@ -1,3 +1,17 @@
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import os
 import sys
 import cv2
@@ -6,11 +20,11 @@
 
 
 def has_valid_extension(filename, extensions):
-    """Checks if a file is an allowed extension.
+    """Checks if a file is a vilid extension.
 
     Args:
-        filename (string): path to a file
-        extensions (tuple of strings): extensions to consider (lowercase)
+        filename (str): path to a file
+        extensions (tuple of str): extensions to consider (lowercase)
 
     Returns:
         bool: True if the filename ends with one of given extensions

diff --git a/image_classification/README.MD b/image_classification/README.MD
@@ -30,6 +30,7 @@
 ```bash
 python -u main.py --arch resnet50 /path/to/imagenet -d
 ```
+-d 是使用动态模式训练，默认为静态图模式。
 
 ### 多卡训练
 执行如下命令进行训练
@@ -64,11 +65,28 @@ CUDA_VISIBLE_DEVICES=0,1,2,3 python -m paddle.distributed.launch main.py --arch
 * **output-dir**: 模型文件保存的文件夹，默认值：'output'
 * **num-workers**: dataloader的进程数，默认值：4
 * **resume**: 恢复训练的模型路径，默认值：None
-* **eval-only**: 仅仅进行预测，默认值：False
+* **eval-only**: 是否仅仅进行预测
+* **lr-scheduler**: 学习率衰减策略，默认值：piecewise
+* **milestones**: piecewise学习率衰减策略的边界，默认值：[30, 60, 80]
+* **weight-decay**: 模型权重正则化系数，默认值：1e-4
+* **momentum**: SGD优化器的动量，默认值：0.9
 
 
 ## 模型
 
 | 模型 | top1 acc | top5 acc |
 | --- | --- | --- |
-| ResNet50 | 76.28 | 93.04 | 
+| [ResNet50](https://paddle-hapi.bj.bcebos.com/models/resnet50.pdparams) | 76.28 | 93.04 |
+| [vgg16](https://paddle-hapi.bj.bcebos.com/models/vgg16.pdparams) | 71.84 | 90.71 | 
+| [mobilenet_v1](https://paddle-hapi.bj.bcebos.com/models/mobilenet_v1_x1.0.pdparams) | 71.25 | 89.92 | 
+| [mobilenet_v2](https://paddle-hapi.bj.bcebos.com/models/mobilenet_v2_x1.0.pdparams) | 72.27 | 90.66 | 
+
+上述模型的复现参数请参考scripts下的脚本。
+
+
+## 参考文献
+- ResNet: [Deep Residual Learning for Image Recognitio](https://arxiv.org/abs/1512.03385), Kaiming He, Xiangyu Zhang, Shaoqing Ren, Jian Sun
+- MobileNetV1: [MobileNets: Efficient Convolutional Neural Networks for Mobile Vision Applications](https://arxiv.org/abs/1704.04861), Andrew G. Howard, Menglong Zhu, Bo Chen, Dmitry Kalenichenko, Weijun Wang, Tobias Weyand, Marco Andreetto, Hartwig Adam
+- MobileNetV2: [MobileNetV2: Inverted Residuals and Linear Bottlenecks](https://arxiv.org/pdf/1801.04381v4.pdf), Mark Sandler, Andrew Howard, Menglong Zhu, Andrey Zhmoginov, Liang-Chieh Chen
+- VGG: [Very Deep Convolutional Networks for Large-scale Image Recognition](https://arxiv.org/pdf/1409.1556), Karen Simonyan, Andrew Zisserman
+
diff --git a/image_classification/imagenet_dataset.py b/image_classification/imagenet_dataset.py
@@ -1,3 +1,17 @@
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import os
 import cv2
 import math

diff --git a/image_classification/main.py b/image_classification/main.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -37,23 +37,36 @@
 
 def make_optimizer(step_per_epoch, parameter_list=None):
     base_lr = FLAGS.lr
-    momentum = 0.9
-    weight_decay = 1e-4
+    lr_scheduler = FLAGS.lr_scheduler
+    momentum = FLAGS.momentum
+    weight_decay = FLAGS.weight_decay
+
+    if lr_scheduler == 'piecewise':
+        milestones = FLAGS.milestones
+        boundaries = [step_per_epoch * e for e in milestones]
+        values = [base_lr * (0.1**i) for i in range(len(boundaries) + 1)]
+        learning_rate = fluid.layers.piecewise_decay(
+            boundaries=boundaries, values=values)
+    elif lr_scheduler == 'cosine':
+        learning_rate = fluid.layers.cosine_decay(base_lr, step_per_epoch,
+                                                  FLAGS.epoch)
+    else:
+        raise ValueError(
+            "Expected lr_scheduler in ['piecewise', 'cosine'], but got {}".
+            format(lr_scheduler))
 
-    boundaries = [step_per_epoch * e for e in [30, 60, 80]]
-    values = [base_lr * (0.1**i) for i in range(len(boundaries) + 1)]
-    learning_rate = fluid.layers.piecewise_decay(
-        boundaries=boundaries, values=values)
     learning_rate = fluid.layers.linear_lr_warmup(
         learning_rate=learning_rate,
         warmup_steps=5 * step_per_epoch,
         start_lr=0.,
         end_lr=base_lr)
+
     optimizer = fluid.optimizer.Momentum(
         learning_rate=learning_rate,
         momentum=momentum,
         regularization=fluid.regularizer.L2Decay(weight_decay),
         parameter_list=parameter_list)
+
     return optimizer
 
 
@@ -138,6 +151,20 @@ def main():
         help="checkpoint path to resume")
     parser.add_argument(
         "--eval-only", action='store_true', help="enable dygraph mode")
+    parser.add_argument(
+        "--lr-scheduler",
+        default='piecewise',
+        type=str,
+        help="learning rate scheduler")
+    parser.add_argument(
+        "--milestones",
+        nargs='+',
+        type=int,
+        default=[30, 60, 80],
+        help="piecewise decay milestones")
+    parser.add_argument(
+        "--weight-decay", default=1e-4, type=float, help="weight decay")
+    parser.add_argument("--momentum", default=0.9, type=float, help="momentum")
     FLAGS = parser.parse_args()
     assert FLAGS.data, "error: must provide data path"
     main()
diff --git a/model.py b/model.py
@@ -42,6 +42,14 @@
 
 
 def set_device(device):
+    """
+    Args:
+        device (str): specify device type, 'cpu' or 'gpu'.
+        
+    Returns:
+        fluid.CUDAPlace or fluid.CPUPlace: Created GPU or CPU place.
+    """
+
     assert isinstance(device, six.string_types) and device.lower() in ['cpu', 'gpu'], \
     "Expected device in ['cpu', 'gpu'], but got {}".format(device)
 
@@ -1082,7 +1090,11 @@ def evaluate(
 
         return eval_result
 
-    def predict(self, test_data, batch_size=1, num_workers=0, stack_outputs=True):
+    def predict(self,
+                test_data,
+                batch_size=1,
+                num_workers=0,
+                stack_outputs=True):
         """
         FIXME: add more comments and usage
         Args:

diff --git a/models/__init__.py b/models/__init__.py
@@ -13,13 +13,22 @@
 #limitations under the License.
 
 from . import resnet
+from . import vgg
+from . import mobilenetv1
+from . import mobilenetv2
 from . import darknet
 from . import yolov3
 
 from .resnet import *
+from .mobilenetv1 import *
+from .mobilenetv2 import *
+from .vgg import *
 from .darknet import *
 from .yolov3 import *
 
 __all__ = resnet.__all__ \
+        + vgg.__all__ \
+        + mobilenetv1.__all__ \
+        + mobilenetv2.__all__ \
         + darknet.__all__ \
         + yolov3.__all__