From f5112adb07f31410e59acfa75ab9f3363e2b0df1 Mon Sep 17 00:00:00 2001 From: wangmeng28 Date: Wed, 17 Jan 2018 12:52:42 +0800 Subject: [PATCH 1/2] Add fluid version of SE-ResNeXt --- fluid/image_classification/SE-ResNeXt.py | 155 +++++++++++++++++++++++ fluid/image_classification/reader.py | 127 +++++++++++++++++++ 2 files changed, 282 insertions(+) create mode 100644 fluid/image_classification/SE-ResNeXt.py create mode 100644 fluid/image_classification/reader.py diff --git a/fluid/image_classification/SE-ResNeXt.py b/fluid/image_classification/SE-ResNeXt.py new file mode 100644 index 0000000000..36e4042940 --- /dev/null +++ b/fluid/image_classification/SE-ResNeXt.py @@ -0,0 +1,155 @@ +import os +import paddle.v2 as paddle +import paddle.v2.fluid as fluid +import reader + + +def conv_bn_layer(input, num_filters, filter_size, stride=1, groups=1, + act=None): + conv = fluid.layers.conv2d( + input=input, + num_filters=num_filters, + filter_size=filter_size, + stride=stride, + padding=(filter_size - 1) / 2, + groups=groups, + act=None, + bias_attr=False) + return fluid.layers.batch_norm(input=conv, act=act) + + +def squeeze_excitation(input, num_channels, reduction_ratio): + pool = fluid.layers.pool2d( + input=input, pool_size=0, pool_type='avg', global_pooling=True) + squeeze = fluid.layers.fc( + input=pool, size=num_channels / reduction_ratio, act='relu') + excitation = fluid.layers.fc( + input=squeeze, size=num_channels, act='sigmoid') + scale = fluid.layers.elementwise_mul(x=input, y=excitation, axis=0) + return scale + + +def shortcut(input, ch_out, stride): + ch_in = input.shape[1] + if ch_in != ch_out: + return conv_bn_layer(input, ch_out, 3, stride) + else: + return input + + +def bottleneck_block(input, num_filters, stride, cardinality, reduction_ratio): + conv0 = conv_bn_layer( + input=input, num_filters=num_filters, filter_size=1, act='relu') + conv1 = conv_bn_layer( + input=conv0, + num_filters=num_filters, + filter_size=3, + stride=stride, + groups=cardinality, + act='relu') + conv2 = conv_bn_layer( + input=conv1, num_filters=num_filters * 2, filter_size=1, act=None) + scale = squeeze_excitation( + input=conv2, + num_channels=num_filters * 2, + reduction_ratio=reduction_ratio) + + short = shortcut(input, num_filters * 2, stride) + + return fluid.layers.elementwise_add(x=short, y=scale, act='relu') + + +def SE_ResNeXt(input, class_dim, infer=False): + cardinality = 64 + reduction_ratio = 16 + depth = [3, 8, 36, 3] + num_filters = [128, 256, 512, 1024] + + conv = conv_bn_layer( + input=input, num_filters=64, filter_size=3, stride=2, act='relu') + conv = conv_bn_layer( + input=conv, num_filters=64, filter_size=3, stride=1, act='relu') + conv = conv_bn_layer( + input=conv, num_filters=128, filter_size=3, stride=1, act='relu') + conv = fluid.layers.pool2d( + input=conv, pool_size=3, pool_stride=2, pool_type='max') + + for block in range(len(depth)): + for i in range(depth[block]): + conv = bottleneck_block( + input=conv, + num_filters=num_filters[block], + stride=2 if i == 0 and block != 0 else 1, + cardinality=cardinality, + reduction_ratio=reduction_ratio) + + pool = fluid.layers.pool2d( + input=conv, pool_size=0, pool_type='avg', global_pooling=True) + if not infer: + drop = fluid.layers.dropout(x=pool, dropout_prob=0.2) + else: + drop = pool + out = fluid.layers.fc(input=drop, size=class_dim, act='softmax') + return out + + +def train(learning_rate, batch_size, num_passes, model_save_dir='model'): + class_dim = 1000 + image_shape = [3, 224, 224] + + image = fluid.layers.data(name='image', shape=image_shape, dtype='float32') + label = fluid.layers.data(name='label', shape=[1], dtype='int64') + + out = SE_ResNeXt(input=image, class_dim=class_dim) + + cost = fluid.layers.cross_entropy(input=out, label=label) + avg_cost = fluid.layers.mean(x=cost) + + optimizer = fluid.optimizer.Momentum( + learning_rate=learning_rate / batch_size, + momentum=0.9, + regularization=fluid.regularizer.L2Decay(1e-4 * batch_size)) + opts = optimizer.minimize(avg_cost) + accuracy = fluid.evaluator.Accuracy(input=out, label=label) + + inference_program = fluid.default_main_program().clone() + with fluid.program_guard(inference_program): + test_accuracy = fluid.evaluator.Accuracy(input=out, label=label) + test_target = [avg_cost] + test_accuracy.metrics + test_accuracy.states + inference_program = fluid.io.get_inference_program(test_target) + + place = fluid.CUDAPlace(0) + exe = fluid.Executor(place) + exe.run(fluid.default_startup_program()) + + train_reader = paddle.batch(datareader.train(), batch_size=batch_size) + test_reader = paddle.batch(datareader.test(), batch_size=batch_size) + feeder = fluid.DataFeeder(place=place, feed_list=[image, label]) + + for pass_id in range(num_passes): + accuracy.reset(exe) + for batch_id, data in enumerate(train_reader()): + loss, acc = exe.run( + fluid.default_main_program(), + feed=feeder.feed(data), + fetch_list=[avg_cost] + accuracy.metrics) + print("Pass {0}, batch {1}, loss {2}, acc {3}".format( + pass_id, batch_id, loss[0], acc[0])) + pass_acc = accuracy.eval(exe) + + test_accuracy.reset(exe) + for data in test_reader(): + out, acc = exe.run( + inference_program, + feed=feeder.feed(data), + fetch_list=[avg_cost] + test_accuracy.metrics) + test_pass_acc = test_accuracy.eval(exe) + print("End pass {0}, train_acc {1}, test_acc {2}".format( + pass_id, pass_acc, test_pass_acc)) + + model_path = os.path.join(model_save_dir, str(pass_id)) + fluid.io.save_inference_model(model_path, ['image'], [out], exe) + + +if __name__ == '__main__': + train(learning_rate=0.1, batch_size=7, num_passes=100) diff --git a/fluid/image_classification/reader.py b/fluid/image_classification/reader.py new file mode 100644 index 0000000000..d700810e48 --- /dev/null +++ b/fluid/image_classification/reader.py @@ -0,0 +1,127 @@ +import os +import random +import functools +import numpy as np +import paddle.v2 as paddle +from PIL import Image, ImageEnhance + +random.seed(0) + +_R_MEAN = 123.0 +_G_MEAN = 117.0 +_B_MEAN = 104.0 + +DATA_DIM = 224 + +THREAD = 8 +BUF_SIZE = 1024 + +DATA_DIR = 'ILSVRC2012' +TRAIN_LIST = 'ILSVRC2012/train_list.txt' +TEST_LIST = 'ILSVRC2012/test_list.txt' + +img_mean = np.array([_R_MEAN, _G_MEAN, _B_MEAN]).reshape((3, 1, 1)) + + +def resize_short(img, target_size): + percent = float(target_size) / min(img.size[0], img.size[1]) + resized_width = int(round(img.size[0] * percent)) + resized_height = int(round(img.size[1] * percent)) + img = img.resize((resized_width, resized_height), Image.LANCZOS) + return img + + +def crop_image(img, target_size, center): + width, height = img.size + size = target_size + if center == True: + w_start = (width - size) / 2 + h_start = (height - size) / 2 + else: + w_start = random.randint(0, width - size) + h_start = random.randint(0, height - size) + w_end = w_start + size + h_end = h_start + size + img = img.crop((w_start, h_start, w_end, h_end)) + return img + + +def distort_color(img): + def random_brightness(img, lower=0.5, upper=1.5): + e = random.uniform(lower, upper) + return ImageEnhance.Brightness(img).enhance(e) + + def random_contrast(img, lower=0.5, upper=1.5): + e = random.uniform(lower, upper) + return ImageEnhance.Contrast(img).enhance(e) + + def random_color(img, lower=0.5, upper=1.5): + e = random.uniform(lower, upper) + return ImageEnhance.Color(img).enhance(e) + + ops = [random_brightness, random_contrast, random_color] + random.shuffle(ops) + + img = ops[0](img) + img = ops[1](img) + img = ops[2](img) + + return img + + +def process_image(sample, mode): + img_path = sample[0] + + img = Image.open(img_path) + if mode == 'train': + img = resize_short(img, DATA_DIM + 32) + else: + img = resize_short(img, DATA_DIM) + img = crop_image(img, target_size=DATA_DIM, center=(mode != 'train')) + if mode == 'train': + img = distort_color(img) + if random.randint(0, 1) == 1: + img = img.transpose(Image.FLIP_LEFT_RIGHT) + + if img.mode != 'RGB': + img = img.convert('RGB') + + img = np.array(img).astype('float32').transpose((2, 0, 1)) + img -= img_mean + + if mode == 'train' or mode == 'test': + return img, sample[1] + elif mode == 'infer': + return img + + +def _reader_creator(file_list, mode, shuffle=False): + def reader(): + with open(file_list) as flist: + lines = [line.strip() for line in flist] + if shuffle: + random.shuffle(lines) + for line in lines: + if mode == 'train' or mode == 'test': + img_path, label = line.split() + img_path = os.path.join(DATA_DIR, img_path) + yield img_path, int(label) + elif mode == 'infer': + img_path = os.path.join(DATA_DIR, line) + yield [img_path] + + mapper = functools.partial(process_image, mode=mode) + + return paddle.reader.xmap_readers(mapper, reader, THREAD, BUF_SIZE) + + +def train(): + return _reader_creator(TRAIN_LIST, 'train', shuffle=True) + + +def test(): + return _reader_creator(TEST_LIST, 'test', shuffle=False) + + +def infer(file_list): + return _reader_creator(file_list, 'infer', shuffle=False) From 670090abb84950d134635b2e436438087f4fec24 Mon Sep 17 00:00:00 2001 From: wangmeng28 Date: Thu, 18 Jan 2018 10:59:57 +0800 Subject: [PATCH 2/2] rename SE-ResNeXt to se_resnext --- fluid/image_classification/{SE-ResNeXt.py => se_resnext.py} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename fluid/image_classification/{SE-ResNeXt.py => se_resnext.py} (100%) diff --git a/fluid/image_classification/SE-ResNeXt.py b/fluid/image_classification/se_resnext.py similarity index 100% rename from fluid/image_classification/SE-ResNeXt.py rename to fluid/image_classification/se_resnext.py