forked from PaddlePaddle/Paddle
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request PaddlePaddle#18 from LielinJiang/cv-models
Add models api
- Loading branch information
Showing
7 changed files
with
790 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,154 @@ | ||
import os | ||
import sys | ||
import cv2 | ||
|
||
from paddle.fluid.io import Dataset | ||
|
||
|
||
def has_valid_extension(filename, extensions): | ||
"""Checks if a file is an allowed extension. | ||
Args: | ||
filename (string): path to a file | ||
extensions (tuple of strings): extensions to consider (lowercase) | ||
Returns: | ||
bool: True if the filename ends with one of given extensions | ||
""" | ||
return filename.lower().endswith(extensions) | ||
|
||
|
||
def make_dataset(dir, class_to_idx, extensions=None, is_valid_file=None): | ||
images = [] | ||
dir = os.path.expanduser(dir) | ||
if not ((extensions is None) ^ (is_valid_file is None)): | ||
raise ValueError( | ||
"Both extensions and is_valid_file cannot be None or not None at the same time" | ||
) | ||
if extensions is not None: | ||
|
||
def is_valid_file(x): | ||
return has_valid_extension(x, extensions) | ||
|
||
for target in sorted(class_to_idx.keys()): | ||
d = os.path.join(dir, target) | ||
if not os.path.isdir(d): | ||
continue | ||
for root, _, fnames in sorted(os.walk(d, followlinks=True)): | ||
for fname in sorted(fnames): | ||
path = os.path.join(root, fname) | ||
if is_valid_file(path): | ||
item = (path, class_to_idx[target]) | ||
images.append(item) | ||
|
||
return images | ||
|
||
|
||
class DatasetFolder(Dataset): | ||
"""A generic data loader where the samples are arranged in this way: | ||
root/class_a/1.ext | ||
root/class_a/2.ext | ||
root/class_a/3.ext | ||
root/class_b/123.ext | ||
root/class_b/456.ext | ||
root/class_b/789.ext | ||
Args: | ||
root (string): Root directory path. | ||
loader (callable, optional): A function to load a sample given its path. | ||
extensions (tuple[string], optional): A list of allowed extensions. | ||
both extensions and is_valid_file should not be passed. | ||
transform (callable, optional): A function/transform that takes in | ||
a sample and returns a transformed version. | ||
target_transform (callable, optional): A function/transform that takes | ||
in the target and transforms it. | ||
is_valid_file (callable, optional): A function that takes path of a file | ||
and check if the file is a valid file (used to check of corrupt files) | ||
both extensions and is_valid_file should not be passed. | ||
Attributes: | ||
classes (list): List of the class names. | ||
class_to_idx (dict): Dict with items (class_name, class_index). | ||
samples (list): List of (sample path, class_index) tuples | ||
targets (list): The class_index value for each image in the dataset | ||
""" | ||
|
||
def __init__(self, | ||
root, | ||
loader=None, | ||
extensions=None, | ||
transform=None, | ||
target_transform=None, | ||
is_valid_file=None): | ||
self.root = root | ||
if extensions is None: | ||
extensions = IMG_EXTENSIONS | ||
classes, class_to_idx = self._find_classes(self.root) | ||
samples = make_dataset(self.root, class_to_idx, extensions, | ||
is_valid_file) | ||
if len(samples) == 0: | ||
raise (RuntimeError( | ||
"Found 0 files in subfolders of: " + self.root + "\n" | ||
"Supported extensions are: " + ",".join(extensions))) | ||
|
||
self.loader = cv2_loader if loader is None else loader | ||
self.extensions = extensions | ||
|
||
self.classes = classes | ||
self.class_to_idx = class_to_idx | ||
self.samples = samples | ||
self.targets = [s[1] for s in samples] | ||
|
||
def _find_classes(self, dir): | ||
""" | ||
Finds the class folders in a dataset. | ||
Args: | ||
dir (string): Root directory path. | ||
Returns: | ||
tuple: (classes, class_to_idx) where classes are relative to (dir), | ||
and class_to_idx is a dictionary. | ||
""" | ||
if sys.version_info >= (3, 5): | ||
# Faster and available in Python 3.5 and above | ||
classes = [d.name for d in os.scandir(dir) if d.is_dir()] | ||
else: | ||
classes = [ | ||
d for d in os.listdir(dir) | ||
if os.path.isdir(os.path.join(dir, d)) | ||
] | ||
classes.sort() | ||
class_to_idx = {classes[i]: i for i in range(len(classes))} | ||
return classes, class_to_idx | ||
|
||
def __getitem__(self, index): | ||
""" | ||
Args: | ||
index (int): Index | ||
Returns: | ||
tuple: (sample, target) where target is class_index of the target class. | ||
""" | ||
path, target = self.samples[index] | ||
sample = self.loader(path) | ||
if self.transform is not None: | ||
sample = self.transform(sample) | ||
if self.target_transform is not None: | ||
target = self.target_transform(target) | ||
|
||
return sample, target | ||
|
||
def __len__(self): | ||
return len(self.samples) | ||
|
||
|
||
IMG_EXTENSIONS = ('.jpg', '.jpeg', '.png', '.ppm', '.bmp', '.pgm', '.tif', | ||
'.tiff', '.webp') | ||
|
||
|
||
def cv2_loader(path): | ||
return cv2.imread(path) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,74 @@ | ||
# 高级api图像分类 | ||
|
||
## 数据集准备 | ||
在开始训练前,请确保已经下载解压好[ImageNet数据集](http://image-net.org/download),并放在合适的目录下,准备好的数据集的目录结构如下所示: | ||
|
||
```bash | ||
/path/to/imagenet | ||
train | ||
n01440764 | ||
xxx.jpg | ||
... | ||
n01443537 | ||
xxx.jpg | ||
... | ||
... | ||
val | ||
n01440764 | ||
xxx.jpg | ||
... | ||
n01443537 | ||
xxx.jpg | ||
... | ||
... | ||
``` | ||
|
||
|
||
## 训练 | ||
### 单卡训练 | ||
执行如下命令进行训练 | ||
```bash | ||
python -u main.py --arch resnet50 /path/to/imagenet -d | ||
``` | ||
|
||
### 多卡训练 | ||
执行如下命令进行训练 | ||
```bash | ||
CUDA_VISIBLE_DEVICES=0,1,2,3 python -m paddle.distributed.launch main.py --arch resnet50 -d /path/to/imagenet | ||
``` | ||
|
||
## 预测 | ||
|
||
### 单卡预测 | ||
执行如下命令进行预测 | ||
```bash | ||
python -u main.py --arch resnet50 -d --evaly-only /path/to/imagenet | ||
``` | ||
|
||
### 多卡预测 | ||
执行如下命令进行多卡预测 | ||
```bash | ||
CUDA_VISIBLE_DEVICES=0,1,2,3 python -m paddle.distributed.launch main.py --arch resnet50 --evaly-only /path/to/imagenet | ||
``` | ||
|
||
|
||
## 参数说明 | ||
|
||
|
||
* **arch**: 要训练或预测的模型名称 | ||
* **device**: 训练使用的设备,'gpu'或'cpu',默认值:'gpu' | ||
* **dynamic**: 是否使用动态图模式训练 | ||
* **epoch**: 训练的轮数,默认值:120 | ||
* **learning-rate**: 学习率,默认值:0.1 | ||
* **batch-size**: 每张卡的batch size,默认值:64 | ||
* **output-dir**: 模型文件保存的文件夹,默认值:'output' | ||
* **num-workers**: dataloader的进程数,默认值:4 | ||
* **resume**: 恢复训练的模型路径,默认值:None | ||
* **eval-only**: 仅仅进行预测,默认值:False | ||
|
||
|
||
## 模型 | ||
|
||
| 模型 | top1 acc | top5 acc | | ||
| --- | --- | --- | | ||
| ResNet50 | 76.28 | 93.04 | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,84 @@ | ||
import os | ||
import cv2 | ||
import math | ||
import random | ||
import numpy as np | ||
|
||
from datasets.folder import DatasetFolder | ||
|
||
|
||
def center_crop_resize(img): | ||
h, w = img.shape[:2] | ||
c = int(224 / 256 * min((h, w))) | ||
i = (h + 1 - c) // 2 | ||
j = (w + 1 - c) // 2 | ||
img = img[i:i + c, j:j + c, :] | ||
return cv2.resize(img, (224, 224), 0, 0, cv2.INTER_LINEAR) | ||
|
||
|
||
def random_crop_resize(img): | ||
height, width = img.shape[:2] | ||
area = height * width | ||
|
||
for attempt in range(10): | ||
target_area = random.uniform(0.08, 1.) * area | ||
log_ratio = (math.log(3 / 4), math.log(4 / 3)) | ||
aspect_ratio = math.exp(random.uniform(*log_ratio)) | ||
|
||
w = int(round(math.sqrt(target_area * aspect_ratio))) | ||
h = int(round(math.sqrt(target_area / aspect_ratio))) | ||
|
||
if w <= width and h <= height: | ||
i = random.randint(0, height - h) | ||
j = random.randint(0, width - w) | ||
img = img[i:i + h, j:j + w, :] | ||
return cv2.resize(img, (224, 224), 0, 0, cv2.INTER_LINEAR) | ||
|
||
return center_crop_resize(img) | ||
|
||
|
||
def random_flip(img): | ||
if np.random.randint(0, 2) == 1: | ||
img = img[:, ::-1, :] | ||
return img | ||
|
||
|
||
def normalize_permute(img): | ||
# transpose and convert to RGB from BGR | ||
img = img.astype(np.float32).transpose((2, 0, 1))[::-1, ...] | ||
mean = np.array([123.675, 116.28, 103.53], dtype=np.float32) | ||
std = np.array([58.395, 57.120, 57.375], dtype=np.float32) | ||
invstd = 1. / std | ||
for v, m, s in zip(img, mean, invstd): | ||
v.__isub__(m).__imul__(s) | ||
return img | ||
|
||
|
||
def compose(functions): | ||
def process(sample): | ||
img, label = sample | ||
for fn in functions: | ||
img = fn(img) | ||
return img, label | ||
|
||
return process | ||
|
||
|
||
class ImageNetDataset(DatasetFolder): | ||
def __init__(self, path, mode='train'): | ||
super(ImageNetDataset, self).__init__(path) | ||
self.mode = mode | ||
if self.mode == 'train': | ||
self.transform = compose([ | ||
cv2.imread, random_crop_resize, random_flip, normalize_permute | ||
]) | ||
else: | ||
self.transform = compose( | ||
[cv2.imread, center_crop_resize, normalize_permute]) | ||
|
||
def __getitem__(self, idx): | ||
img, label = self.samples[idx] | ||
return self.transform((img, [label])) | ||
|
||
def __len__(self): | ||
return len(self.samples) |
Oops, something went wrong.