Merge pull request PaddlePaddle#18 from LielinJiang/cv-models

Add models api
qingqing01 · Apr 2, 2020 · e252e28 · e252e28
2 parents 7d1ea67 + 3fe5623
commit e252e28
Show file tree

Hide file tree

Showing 7 changed files with 790 additions and 0 deletions.
diff --git a/datasets/folder.py b/datasets/folder.py
@@ -0,0 +1,154 @@
+import os
+import sys
+import cv2
+
+from paddle.fluid.io import Dataset
+
+
+def has_valid_extension(filename, extensions):
+    """Checks if a file is an allowed extension.
+
+    Args:
+        filename (string): path to a file
+        extensions (tuple of strings): extensions to consider (lowercase)
+
+    Returns:
+        bool: True if the filename ends with one of given extensions
+    """
+    return filename.lower().endswith(extensions)
+
+
+def make_dataset(dir, class_to_idx, extensions=None, is_valid_file=None):
+    images = []
+    dir = os.path.expanduser(dir)
+    if not ((extensions is None) ^ (is_valid_file is None)):
+        raise ValueError(
+            "Both extensions and is_valid_file cannot be None or not None at the same time"
+        )
+    if extensions is not None:
+
+        def is_valid_file(x):
+            return has_valid_extension(x, extensions)
+
+    for target in sorted(class_to_idx.keys()):
+        d = os.path.join(dir, target)
+        if not os.path.isdir(d):
+            continue
+        for root, _, fnames in sorted(os.walk(d, followlinks=True)):
+            for fname in sorted(fnames):
+                path = os.path.join(root, fname)
+                if is_valid_file(path):
+                    item = (path, class_to_idx[target])
+                    images.append(item)
+
+    return images
+
+
+class DatasetFolder(Dataset):
+    """A generic data loader where the samples are arranged in this way:
+
+        root/class_a/1.ext
+        root/class_a/2.ext
+        root/class_a/3.ext
+
+        root/class_b/123.ext
+        root/class_b/456.ext
+        root/class_b/789.ext
+
+    Args:
+        root (string): Root directory path.
+        loader (callable, optional): A function to load a sample given its path.
+        extensions (tuple[string], optional): A list of allowed extensions.
+            both extensions and is_valid_file should not be passed.
+        transform (callable, optional): A function/transform that takes in
+            a sample and returns a transformed version.
+        target_transform (callable, optional): A function/transform that takes
+            in the target and transforms it.
+        is_valid_file (callable, optional): A function that takes path of a file
+            and check if the file is a valid file (used to check of corrupt files)
+            both extensions and is_valid_file should not be passed.
+
+     Attributes:
+        classes (list): List of the class names.
+        class_to_idx (dict): Dict with items (class_name, class_index).
+        samples (list): List of (sample path, class_index) tuples
+        targets (list): The class_index value for each image in the dataset
+    """
+
+    def __init__(self,
+                 root,
+                 loader=None,
+                 extensions=None,
+                 transform=None,
+                 target_transform=None,
+                 is_valid_file=None):
+        self.root = root
+        if extensions is None:
+            extensions = IMG_EXTENSIONS
+        classes, class_to_idx = self._find_classes(self.root)
+        samples = make_dataset(self.root, class_to_idx, extensions,
+                               is_valid_file)
+        if len(samples) == 0:
+            raise (RuntimeError(
+                "Found 0 files in subfolders of: " + self.root + "\n"
+                "Supported extensions are: " + ",".join(extensions)))
+
+        self.loader = cv2_loader if loader is None else loader
+        self.extensions = extensions
+
+        self.classes = classes
+        self.class_to_idx = class_to_idx
+        self.samples = samples
+        self.targets = [s[1] for s in samples]
+
+    def _find_classes(self, dir):
+        """
+        Finds the class folders in a dataset.
+
+        Args:
+            dir (string): Root directory path.
+
+        Returns:
+            tuple: (classes, class_to_idx) where classes are relative to (dir), 
+                    and class_to_idx is a dictionary.
+
+        """
+        if sys.version_info >= (3, 5):
+            # Faster and available in Python 3.5 and above
+            classes = [d.name for d in os.scandir(dir) if d.is_dir()]
+        else:
+            classes = [
+                d for d in os.listdir(dir)
+                if os.path.isdir(os.path.join(dir, d))
+            ]
+        classes.sort()
+        class_to_idx = {classes[i]: i for i in range(len(classes))}
+        return classes, class_to_idx
+
+    def __getitem__(self, index):
+        """
+        Args:
+            index (int): Index
+
+        Returns:
+            tuple: (sample, target) where target is class_index of the target class.
+        """
+        path, target = self.samples[index]
+        sample = self.loader(path)
+        if self.transform is not None:
+            sample = self.transform(sample)
+        if self.target_transform is not None:
+            target = self.target_transform(target)
+
+        return sample, target
+
+    def __len__(self):
+        return len(self.samples)
+
+
+IMG_EXTENSIONS = ('.jpg', '.jpeg', '.png', '.ppm', '.bmp', '.pgm', '.tif',
+                  '.tiff', '.webp')
+
+
+def cv2_loader(path):
+    return cv2.imread(path)
diff --git a/image_classification/README.MD b/image_classification/README.MD
@@ -0,0 +1,74 @@
+# 高级api图像分类
+
+## 数据集准备
+在开始训练前，请确保已经下载解压好[ImageNet数据集](http://image-net.org/download)，并放在合适的目录下，准备好的数据集的目录结构如下所示：
+
+```bash
+/path/to/imagenet
+    train
+         n01440764
+              xxx.jpg
+              ...
+         n01443537
+              xxx.jpg
+              ...
+         ...
+    val
+         n01440764
+              xxx.jpg
+              ...
+         n01443537
+              xxx.jpg
+              ...
+         ...
+```
+
+
+## 训练
+### 单卡训练
+执行如下命令进行训练
+```bash
+python -u main.py --arch resnet50 /path/to/imagenet -d
+```
+
+### 多卡训练
+执行如下命令进行训练
+```bash
+CUDA_VISIBLE_DEVICES=0,1,2,3 python -m paddle.distributed.launch main.py --arch resnet50 -d /path/to/imagenet
+```
+
+## 预测
+
+### 单卡预测
+执行如下命令进行预测
+```bash
+python -u main.py --arch resnet50 -d --evaly-only /path/to/imagenet 
+```
+
+### 多卡预测
+执行如下命令进行多卡预测
+```bash
+CUDA_VISIBLE_DEVICES=0,1,2,3 python -m paddle.distributed.launch main.py --arch resnet50 --evaly-only /path/to/imagenet
+```
+
+
+## 参数说明
+
+
+* **arch**: 要训练或预测的模型名称
+* **device**: 训练使用的设备，'gpu'或'cpu'，默认值：'gpu'
+* **dynamic**: 是否使用动态图模式训练
+* **epoch**: 训练的轮数，默认值：120
+* **learning-rate**: 学习率，默认值：0.1
+* **batch-size**: 每张卡的batch size，默认值：64
+* **output-dir**: 模型文件保存的文件夹，默认值：'output'
+* **num-workers**: dataloader的进程数，默认值：4
+* **resume**: 恢复训练的模型路径，默认值：None
+* **eval-only**: 仅仅进行预测，默认值：False
+
+
+## 模型
+
+| 模型 | top1 acc | top5 acc |
+| --- | --- | --- |
+| ResNet50 | 76.28 | 93.04 | 
diff --git a/image_classification/imagenet_dataset.py b/image_classification/imagenet_dataset.py
@@ -0,0 +1,84 @@
+import os
+import cv2
+import math
+import random
+import numpy as np
+
+from datasets.folder import DatasetFolder
+
+
+def center_crop_resize(img):
+    h, w = img.shape[:2]
+    c = int(224 / 256 * min((h, w)))
+    i = (h + 1 - c) // 2
+    j = (w + 1 - c) // 2
+    img = img[i:i + c, j:j + c, :]
+    return cv2.resize(img, (224, 224), 0, 0, cv2.INTER_LINEAR)
+
+
+def random_crop_resize(img):
+    height, width = img.shape[:2]
+    area = height * width
+
+    for attempt in range(10):
+        target_area = random.uniform(0.08, 1.) * area
+        log_ratio = (math.log(3 / 4), math.log(4 / 3))
+        aspect_ratio = math.exp(random.uniform(*log_ratio))
+
+        w = int(round(math.sqrt(target_area * aspect_ratio)))
+        h = int(round(math.sqrt(target_area / aspect_ratio)))
+
+        if w <= width and h <= height:
+            i = random.randint(0, height - h)
+            j = random.randint(0, width - w)
+            img = img[i:i + h, j:j + w, :]
+            return cv2.resize(img, (224, 224), 0, 0, cv2.INTER_LINEAR)
+
+    return center_crop_resize(img)
+
+
+def random_flip(img):
+    if np.random.randint(0, 2) == 1:
+        img = img[:, ::-1, :]
+    return img
+
+
+def normalize_permute(img):
+    # transpose and convert to RGB from BGR
+    img = img.astype(np.float32).transpose((2, 0, 1))[::-1, ...]
+    mean = np.array([123.675, 116.28, 103.53], dtype=np.float32)
+    std = np.array([58.395, 57.120, 57.375], dtype=np.float32)
+    invstd = 1. / std
+    for v, m, s in zip(img, mean, invstd):
+        v.__isub__(m).__imul__(s)
+    return img
+
+
+def compose(functions):
+    def process(sample):
+        img, label = sample
+        for fn in functions:
+            img = fn(img)
+        return img, label
+
+    return process
+
+
+class ImageNetDataset(DatasetFolder):
+    def __init__(self, path, mode='train'):
+        super(ImageNetDataset, self).__init__(path)
+        self.mode = mode
+        if self.mode == 'train':
+            self.transform = compose([
+                cv2.imread, random_crop_resize, random_flip, normalize_permute
+            ])
+        else:
+            self.transform = compose(
+                [cv2.imread, center_crop_resize, normalize_permute])
+
+    def __getitem__(self, idx):
+        img, label = self.samples[idx]
+        return self.transform((img, [label]))
+
+    def __len__(self):
+        return len(self.samples)