From 8fc193225fd65a41bac88078e2f5055c79afc981 Mon Sep 17 00:00:00 2001 From: Sergey Karayev Date: Fri, 22 Nov 2013 21:30:44 -0800 Subject: [PATCH 01/15] another python wrapper that batches image subwindows and provides a selective search provider --- python/caffe/imagenet/wrapper2.py | 227 ++++++++++++++++++++++++++++++ 1 file changed, 227 insertions(+) create mode 100644 python/caffe/imagenet/wrapper2.py diff --git a/python/caffe/imagenet/wrapper2.py b/python/caffe/imagenet/wrapper2.py new file mode 100644 index 00000000000..fab53790925 --- /dev/null +++ b/python/caffe/imagenet/wrapper2.py @@ -0,0 +1,227 @@ +""" +wrapper2.py classifies a number of images at once, optionally using +selective search crops. + +TODO: also store the window coordinates with the image filenames. +""" +import numpy as np +import os +import sys +import gflags +import pandas as pd +import time +from skimage import io +from skimage import transform +import selective_search_ijcv as selective_search +import caffe + +IMAGE_DIM = 256 +CROPPED_DIM = 227 +IMAGE_CENTER = int((IMAGE_DIM - CROPPED_DIM) / 2) + +CROP_MODES = ['center_only', 'corners', 'selective_search'] + +# Load the imagenet mean file +IMAGENET_MEAN = np.load( + os.path.join(os.path.dirname(__file__), 'ilsvrc_2012_mean.npy')) + + +def load_image(filename): + """ + Input: + filename: string + Output: + image: an image of size (256 x 256 x 3) of type uint8. + """ + img = io.imread(filename) + if img.ndim == 2: + img = np.tile(img[:, :, np.newaxis], (1, 1, 3)) + elif img.shape[2] == 4: + img = img[:, :, :3] + return img + + +def format_image(image, window=None, dim=IMAGE_DIM): + # Crop a subimage if window is provided. + if window is not None: + image = image[ + window[1]:window[1] + window[3], + window[0]:window[0] + window[2] + ] + + # Resize to ImageNet size, convert to BGR, subtract mean. + image = (transform.resize(image, (IMAGE_DIM, IMAGE_DIM)) * 255)[:, :, ::-1] + image -= IMAGENET_MEAN + + # Resize further if needed. + if not np.all(dim == IMAGE_DIM): + image = transform.resize(image, (dim, dim) * 255) + + image = image.swapaxes(1, 2).swapaxes(0, 1) + return image + + +def _assemble_images_center_only(image_fnames, max_batch_size): + all_images = [] + for image_filename in image_fnames: + image = format_image(load_image(image_filename)) + + all_images.append(np.ascontiguousarray( + image[:, + IMAGE_CENTER:IMAGE_CENTER + CROPPED_DIM, + IMAGE_CENTER:IMAGE_CENTER + CROPPED_DIM], + dtype=np.float32 + )) + return all_images[np.newaxis, :], image_fnames + + +def _assemble_images_corners(image_fnames, max_batch_size): + all_images = [] + fnames = [] + for image_filename in image_fnames: + image = format_image(load_image(image_filename)) + indices = [0, IMAGE_DIM - CROPPED_DIM] + + images = np.empty((10, 3, CROPPED_DIM, CROPPED_DIM), dtype=np.float32) + curr = 0 + for i in indices: + for j in indices: + images[curr] = image[:, i:i + CROPPED_DIM, j:j + CROPPED_DIM] + curr += 1 + images[4] = image[ + :, + IMAGE_CENTER:IMAGE_CENTER + CROPPED_DIM, + IMAGE_CENTER:IMAGE_CENTER + CROPPED_DIM + ] + + # Add flipped versions. + images[5:] = images[:5, :, :, ::-1] + + all_images.append(images) + fnames.append(image_filename) + return all_images, fnames + + +def _assemble_images_selective_search(image_fnames, max_batch_size): + windows_list = selective_search.get_windows(image_fnames) + all_images = [] + fnames = [] + for image_fname, windows in zip(image_fnames, windows_list): + image = load_image(image_fname) + images = np.empty( + (len(windows), 3, CROPPED_DIM, CROPPED_DIM), dtype=np.float32) + for i, window in enumerate(windows): + images[i] = format_image(image, window, CROPPED_DIM) + all_images.append(images) + fnames.append(image_fname) + return all_images, fnames + + +def assemble_batches( + image_fnames, crop_mode='center_only', max_batch_size=256): + """ + Assemble list of batches, each one of at most max_batch_size subimage + objects. + + Input: + image_fnames: list of string + mode: string + 'center_only': the CROPPED_DIM middle of the image is taken as is + 'corners': take CROPPED_DIM-sized boxes at 4 corners and center of + the image, as well as their flipped versions: a total of 10 images + 'selective_search': run Selective Search region proposal on the + image, and take each enclosing subwindow. + + Output: + images: (X x 3 x 227 x 227) ndarray, where X <= max_batch_size + """ + if crop_mode == 'center_only': + all_images, fnames = _assemble_images_center_only( + image_fnames, max_batch_size) + + elif crop_mode == 'corners': + all_images, fnames = _assemble_images_corners( + image_fnames, max_batch_size) + + elif crop_mode == 'selective_search': + all_images, fnames = _assemble_images_selective_search( + image_fnames, max_batch_size) + + else: + raise Exception("Unknown mode: not in {}".format(CROP_MODES)) + + # Concatenate into one (N, 3, CROPPED_DIM, CROPPED_DIM) array, + # then split to (N/max_batch_size, 3, CROPPED_DIM, CROPPED_DIM) chunks + all_images = np.concatenate(all_images, 0) + all_fnames = np.repeat(fnames, [len(images) for images in all_images]) + assert(all_images.shape[0] == all_fnames.shape[0]) + + num_batches = int(all_images.shape[0] / max_batch_size) + image_batches = np.array_split(all_images, num_batches, axis=0) + fname_batches = np.array_split(all_fnames, num_batches, axis=0) + return image_batches, fname_batches + + +def compute_feats(batch, layer='imagenet'): + if layer == 'imagenet': + num_output = 1000 + else: + raise ValueError("Unknown layer requested: {}".format(layer)) + + num = batch.shape[0] + output_blobs = [np.empty((num, num_output, 1, 1), dtype=np.float32)] + caffenet.Forward([batch], output_blobs) + feats = [output_blobs[0][i].flatten() for i in range(len(output_blobs[0]))] + + return feats + + +if __name__ == "__main__": + ## Parse cmdline options + gflags.DEFINE_string( + "model_def", "", "The model definition file.") + gflags.DEFINE_string( + "pretrained_model", "", "The pretrained model.") + gflags.DEFINE_boolean( + "gpu", True, "use gpu for computation") + gflags.DEFINE_string( + "crop_mode", "center_only", "Crop mode, from {}".format(CROP_MODES)) + gflags.DEFINE_string( + "images_file", "", "File that contains image filenames.") + gflags.DEFINE_string( + "output", "", "The output DataFrame HDF5 filename.") + gflags.DEFINE_string( + "layer", "imagenet", "Layer to output.") + FLAGS = gflags.FLAGS + FLAGS(sys.argv) + + ## Initialize network by loading model definition and weights. + caffenet = caffe.CaffeNet(FLAGS.model_def, FLAGS.pretrained_model) + caffenet.set_phase_test() + if FLAGS.gpu: + caffenet.set_mode_gpu() + + ## Load list of image filenames and assemble into batches. + with open(gflags.images_file) as f: + image_fnames = [_.strip() for _ in f.readlines()] + image_batches, fname_batches = assemble_batches( + image_fnames, FLAGS.crop_mode) + print 'Running on {} files in {} batches'.format( + len(image_fnames), len(image_batches)) + + # Process the batches. + start = time.time() + all_feats = [] + for i, batch in range(len(image_batches)): + if i % 10 == 0: + print('Batch {}/{}, elapsed {:.3f} s'.format( + i, len(image_batches), time.time() - start)) + all_feats.append(compute_feats(image_batches[i])) + all_feats = np.concatenate(all_feats, 0) + all_fnames = np.concatenate(fname_batches, 0) + + df = pd.DataFrame({'feat': all_feats, 'filename': all_fnames}) + + # Finally, write the results. + df.to_hdf(FLAGS.output, 'df', mode='w') + print 'Done. Saved to %s.' % FLAGS.output From 5429729559b39661e5df1091e3312b969c1b1c62 Mon Sep 17 00:00:00 2001 From: Sergey Karayev Date: Mon, 25 Nov 2013 07:22:11 -0800 Subject: [PATCH 02/15] assembling batches works properly for all modes; segfault in pycaffe --- python/caffe/imagenet/wrapper2.py | 62 ++++++++++++++++++------------- 1 file changed, 36 insertions(+), 26 deletions(-) diff --git a/python/caffe/imagenet/wrapper2.py b/python/caffe/imagenet/wrapper2.py index fab53790925..a3bd100ad50 100644 --- a/python/caffe/imagenet/wrapper2.py +++ b/python/caffe/imagenet/wrapper2.py @@ -2,7 +2,12 @@ wrapper2.py classifies a number of images at once, optionally using selective search crops. -TODO: also store the window coordinates with the image filenames. +The selective_search_ijcv_with_python code is available at +https://github.com/sergeyk/selective_search_ijcv_with_python + +TODO: +- [ ] store the window coordinates with the image filenames. +- [ ] batch up image filenames as well: don't want to load all of them into memory """ import numpy as np import os @@ -12,7 +17,7 @@ import time from skimage import io from skimage import transform -import selective_search_ijcv as selective_search +import selective_search_ijcv_with_python as selective_search import caffe IMAGE_DIM = 256 @@ -42,11 +47,19 @@ def load_image(filename): def format_image(image, window=None, dim=IMAGE_DIM): + """ + Input: + image: (H x W x 3) ndarray + window: (4) ndarray + (y, x, h, w) coordinates + dim: int + Final width of the square image. + """ # Crop a subimage if window is provided. if window is not None: image = image[ - window[1]:window[1] + window[3], - window[0]:window[0] + window[2] + window[0]:window[2], + window[1]:window[3] ] # Resize to ImageNet size, convert to BGR, subtract mean. @@ -54,28 +67,26 @@ def format_image(image, window=None, dim=IMAGE_DIM): image -= IMAGENET_MEAN # Resize further if needed. - if not np.all(dim == IMAGE_DIM): - image = transform.resize(image, (dim, dim) * 255) - + if not dim == IMAGE_DIM: + image = transform.resize(image, (dim, dim)) image = image.swapaxes(1, 2).swapaxes(0, 1) return image -def _assemble_images_center_only(image_fnames, max_batch_size): +def _assemble_images_center_only(image_fnames): all_images = [] for image_filename in image_fnames: image = format_image(load_image(image_filename)) - all_images.append(np.ascontiguousarray( - image[:, + image[np.newaxis, :, IMAGE_CENTER:IMAGE_CENTER + CROPPED_DIM, IMAGE_CENTER:IMAGE_CENTER + CROPPED_DIM], dtype=np.float32 )) - return all_images[np.newaxis, :], image_fnames + return all_images, image_fnames -def _assemble_images_corners(image_fnames, max_batch_size): +def _assemble_images_corners(image_fnames): all_images = [] fnames = [] for image_filename in image_fnames: @@ -93,16 +104,14 @@ def _assemble_images_corners(image_fnames, max_batch_size): IMAGE_CENTER:IMAGE_CENTER + CROPPED_DIM, IMAGE_CENTER:IMAGE_CENTER + CROPPED_DIM ] - - # Add flipped versions. - images[5:] = images[:5, :, :, ::-1] + images[5:] = images[:5, :, :, ::-1] # flipped versions all_images.append(images) fnames.append(image_filename) return all_images, fnames -def _assemble_images_selective_search(image_fnames, max_batch_size): +def _assemble_images_selective_search(image_fnames): windows_list = selective_search.get_windows(image_fnames) all_images = [] fnames = [] @@ -136,27 +145,24 @@ def assemble_batches( images: (X x 3 x 227 x 227) ndarray, where X <= max_batch_size """ if crop_mode == 'center_only': - all_images, fnames = _assemble_images_center_only( - image_fnames, max_batch_size) + all_images, fnames = _assemble_images_center_only(image_fnames) elif crop_mode == 'corners': - all_images, fnames = _assemble_images_corners( - image_fnames, max_batch_size) + all_images, fnames = _assemble_images_corners(image_fnames) elif crop_mode == 'selective_search': - all_images, fnames = _assemble_images_selective_search( - image_fnames, max_batch_size) + all_images, fnames = _assemble_images_selective_search(image_fnames) else: raise Exception("Unknown mode: not in {}".format(CROP_MODES)) # Concatenate into one (N, 3, CROPPED_DIM, CROPPED_DIM) array, # then split to (N/max_batch_size, 3, CROPPED_DIM, CROPPED_DIM) chunks - all_images = np.concatenate(all_images, 0) all_fnames = np.repeat(fnames, [len(images) for images in all_images]) + all_images = np.concatenate(all_images, 0) assert(all_images.shape[0] == all_fnames.shape[0]) - num_batches = int(all_images.shape[0] / max_batch_size) + num_batches = 1 + int(all_images.shape[0] / max_batch_size) image_batches = np.array_split(all_images, num_batches, axis=0) fname_batches = np.array_split(all_fnames, num_batches, axis=0) return image_batches, fname_batches @@ -183,7 +189,7 @@ def compute_feats(batch, layer='imagenet'): gflags.DEFINE_string( "pretrained_model", "", "The pretrained model.") gflags.DEFINE_boolean( - "gpu", True, "use gpu for computation") + "gpu", False, "use gpu for computation") gflags.DEFINE_string( "crop_mode", "center_only", "Crop mode, from {}".format(CROP_MODES)) gflags.DEFINE_string( @@ -202,13 +208,17 @@ def compute_feats(batch, layer='imagenet'): caffenet.set_mode_gpu() ## Load list of image filenames and assemble into batches. - with open(gflags.images_file) as f: + with open(FLAGS.images_file) as f: image_fnames = [_.strip() for _ in f.readlines()] image_batches, fname_batches = assemble_batches( image_fnames, FLAGS.crop_mode) print 'Running on {} files in {} batches'.format( len(image_fnames), len(image_batches)) + from IPython import embed; embed() + # TODO: debug this + # F1125 07:17:35.980950 6826 pycaffe.cpp:52] Check failed: len(bottom) == input_blobs.size() (1 vs. 0) + # Process the batches. start = time.time() all_feats = [] From 0e012d87fa1d2554ea469c60148849d02f9175b3 Mon Sep 17 00:00:00 2001 From: Sergey Karayev Date: Mon, 25 Nov 2013 12:15:35 -0800 Subject: [PATCH 03/15] debugging pycaffe error --- python/caffe/imagenet/wrapper2.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/python/caffe/imagenet/wrapper2.py b/python/caffe/imagenet/wrapper2.py index a3bd100ad50..d07d5c75cce 100644 --- a/python/caffe/imagenet/wrapper2.py +++ b/python/caffe/imagenet/wrapper2.py @@ -175,8 +175,11 @@ def compute_feats(batch, layer='imagenet'): raise ValueError("Unknown layer requested: {}".format(layer)) num = batch.shape[0] + input_blobs = [batch] output_blobs = [np.empty((num, num_output, 1, 1), dtype=np.float32)] - caffenet.Forward([batch], output_blobs) + print(len(input_blobs), len(output_blobs)) + print(input_blobs[0].shape, output_blobs[0].shape) + caffenet.Forward(input_blobs, output_blobs) feats = [output_blobs[0][i].flatten() for i in range(len(output_blobs[0]))] return feats @@ -215,17 +218,15 @@ def compute_feats(batch, layer='imagenet'): print 'Running on {} files in {} batches'.format( len(image_fnames), len(image_batches)) - from IPython import embed; embed() - # TODO: debug this - # F1125 07:17:35.980950 6826 pycaffe.cpp:52] Check failed: len(bottom) == input_blobs.size() (1 vs. 0) - # Process the batches. start = time.time() all_feats = [] - for i, batch in range(len(image_batches)): + for i in range(len(image_batches)): if i % 10 == 0: print('Batch {}/{}, elapsed {:.3f} s'.format( i, len(image_batches), time.time() - start)) + # TODO: debug this + # F1125 07:17:35.980950 6826 pycaffe.cpp:52] Check failed: len(bottom) == input_blobs.size() (1 vs. 0) all_feats.append(compute_feats(image_batches[i])) all_feats = np.concatenate(all_feats, 0) all_fnames = np.concatenate(fname_batches, 0) From 489be58e49b006ceeae0591d6d75706f94f31a37 Mon Sep 17 00:00:00 2001 From: Sergey Karayev Date: Tue, 26 Nov 2013 03:27:02 -0800 Subject: [PATCH 04/15] in the middle of refactoring to use dataframes --- python/caffe/imagenet/wrapper2.py | 52 ++++++++++++++++++------------- 1 file changed, 31 insertions(+), 21 deletions(-) diff --git a/python/caffe/imagenet/wrapper2.py b/python/caffe/imagenet/wrapper2.py index d07d5c75cce..82795a3d4c6 100644 --- a/python/caffe/imagenet/wrapper2.py +++ b/python/caffe/imagenet/wrapper2.py @@ -6,6 +6,7 @@ https://github.com/sergeyk/selective_search_ijcv_with_python TODO: +- [ ] make sure all batches are the same size - [ ] store the window coordinates with the image filenames. - [ ] batch up image filenames as well: don't want to load all of them into memory """ @@ -26,6 +27,8 @@ CROP_MODES = ['center_only', 'corners', 'selective_search'] +BATCH_SIZE = 256 + # Load the imagenet mean file IMAGENET_MEAN = np.load( os.path.join(os.path.dirname(__file__), 'ilsvrc_2012_mean.npy')) @@ -112,22 +115,24 @@ def _assemble_images_corners(image_fnames): def _assemble_images_selective_search(image_fnames): + # Get window proposals. windows_list = selective_search.get_windows(image_fnames) - all_images = [] - fnames = [] + + # for image_fname, windows in zip(image_fnames, windows_list): image = load_image(image_fname) - images = np.empty( - (len(windows), 3, CROPPED_DIM, CROPPED_DIM), dtype=np.float32) for i, window in enumerate(windows): - images[i] = format_image(image, window, CROPPED_DIM) - all_images.append(images) - fnames.append(image_fname) - return all_images, fnames + data.append({ + 'image': format_image(image, window, CROPPED_DIM), + 'window': window, + 'filename': image_fname + }) + images_df = pd.DataFrame(data) + return images_df def assemble_batches( - image_fnames, crop_mode='center_only', max_batch_size=256): + image_fnames, crop_mode='center_only'): """ Assemble list of batches, each one of at most max_batch_size subimage objects. @@ -142,27 +147,30 @@ def assemble_batches( image, and take each enclosing subwindow. Output: - images: (X x 3 x 227 x 227) ndarray, where X <= max_batch_size + : list of (X x 3 x 227 x 227) ndarray, where X <= max_batch_size """ if crop_mode == 'center_only': - all_images, fnames = _assemble_images_center_only(image_fnames) + images_df = _assemble_images_center_only(image_fnames) elif crop_mode == 'corners': - all_images, fnames = _assemble_images_corners(image_fnames) + images_df = _assemble_images_corners(image_fnames) elif crop_mode == 'selective_search': - all_images, fnames = _assemble_images_selective_search(image_fnames) + images_df = _assemble_images_selective_search(image_fnames) else: raise Exception("Unknown mode: not in {}".format(CROP_MODES)) - # Concatenate into one (N, 3, CROPPED_DIM, CROPPED_DIM) array, + # Make sure the DataFrame has a multiple of BATCH_SIZE rows: + # just fill the extra rows with NaN filenames and all-zero images. + pass + # then split to (N/max_batch_size, 3, CROPPED_DIM, CROPPED_DIM) chunks all_fnames = np.repeat(fnames, [len(images) for images in all_images]) all_images = np.concatenate(all_images, 0) assert(all_images.shape[0] == all_fnames.shape[0]) - num_batches = 1 + int(all_images.shape[0] / max_batch_size) + num_batches = 1 + int(all_images.shape[0] / BATCH_SIZE) image_batches = np.array_split(all_images, num_batches, axis=0) fname_batches = np.array_split(all_fnames, num_batches, axis=0) return image_batches, fname_batches @@ -211,23 +219,25 @@ def compute_feats(batch, layer='imagenet'): caffenet.set_mode_gpu() ## Load list of image filenames and assemble into batches. + t = time.time() + print('Assembling batches...') with open(FLAGS.images_file) as f: image_fnames = [_.strip() for _ in f.readlines()] image_batches, fname_batches = assemble_batches( image_fnames, FLAGS.crop_mode) - print 'Running on {} files in {} batches'.format( - len(image_fnames), len(image_batches)) + print('{} batches assembled in {:.3f} s'.format(time.time() - t)) # Process the batches. - start = time.time() + t = time.time() + print 'Running on {} files in {} batches'.format( + len(image_fnames), len(image_batches)) all_feats = [] for i in range(len(image_batches)): if i % 10 == 0: print('Batch {}/{}, elapsed {:.3f} s'.format( - i, len(image_batches), time.time() - start)) - # TODO: debug this - # F1125 07:17:35.980950 6826 pycaffe.cpp:52] Check failed: len(bottom) == input_blobs.size() (1 vs. 0) + i, len(image_batches), time.time() - t)) all_feats.append(compute_feats(image_batches[i])) + all_feats = np.concatenate(all_feats, 0) all_fnames = np.concatenate(fname_batches, 0) From 67e2c9d94fb53168114b42f9ab9b1a01cc86dafc Mon Sep 17 00:00:00 2001 From: Yangqing Jia Date: Sun, 1 Dec 2013 16:38:24 -0800 Subject: [PATCH 05/15] convert script: spacing --- examples/convert_imageset.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/convert_imageset.cpp b/examples/convert_imageset.cpp index 6a6b86dcdf8..2769f1ea2d6 100644 --- a/examples/convert_imageset.cpp +++ b/examples/convert_imageset.cpp @@ -46,7 +46,7 @@ int main(int argc, char** argv) { LOG(INFO) << "Shuffling data"; std::random_shuffle(lines.begin(), lines.end()); } - LOG(INFO) << "A total of " << lines.size() << "images."; + LOG(INFO) << "A total of " << lines.size() << " images."; leveldb::DB* db; leveldb::Options options; From 524e0956ef58d0f1fac0bf302de341c9eff882e0 Mon Sep 17 00:00:00 2001 From: Yangqing Jia Date: Mon, 2 Dec 2013 15:19:37 -0800 Subject: [PATCH 06/15] get_mnist.sh: changed the script to generate leveldb as well. --- data/get_mnist.sh | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/data/get_mnist.sh b/data/get_mnist.sh index ec979bd7bd0..2a9ad083fb4 100755 --- a/data/get_mnist.sh +++ b/data/get_mnist.sh @@ -15,4 +15,9 @@ gunzip train-labels-idx1-ubyte.gz gunzip t10k-images-idx3-ubyte.gz gunzip t10k-labels-idx1-ubyte.gz +echo "Creating leveldb..." + +../examples/convert_mnist_data.bin train-images-idx3-ubyte train-labels-idx1-ubyte mnist-train-leveldb +../examples/convert_mnist_data.bin t10k-images-idx3-ubyte t10k-labels-idx1-ubyte mnist-test-leveldb + echo "Done." From ca3e042863e59e9a8a8edd7e78ffe9b2431aea8f Mon Sep 17 00:00:00 2001 From: Yangqing Jia Date: Mon, 2 Dec 2013 15:44:43 -0800 Subject: [PATCH 07/15] Several changes: changed the paths for the mnist demo removed unused fields in SolverParameter added solver_mode to specify CPU/GPU solving accordingly, removed the set mode code in the examples code removed examples/test_read_imagenet since it is trivial removed get_cifar script since no cifar demo exists. --- data/get_cifar.sh | 12 ------------ data/lenet.prototxt | 2 +- data/lenet_solver.prototxt | 29 ++++++++++++++++++++++------- data/lenet_test.prototxt | 2 +- examples/finetune_net.cpp | 3 --- examples/test_read_imagenet.cpp | 33 --------------------------------- examples/train_net.cpp | 3 --- src/caffe/proto/caffe.proto | 7 ++----- src/caffe/solver.cpp | 4 ++++ 9 files changed, 30 insertions(+), 65 deletions(-) delete mode 100755 data/get_cifar.sh delete mode 100644 examples/test_read_imagenet.cpp diff --git a/data/get_cifar.sh b/data/get_cifar.sh deleted file mode 100755 index 6f42bb09ae2..00000000000 --- a/data/get_cifar.sh +++ /dev/null @@ -1,12 +0,0 @@ -#!/usr/bin/env sh -# This scripts downloads the mnist data and unzips it. - -echo "Downloading..." - -wget -q http://www.cs.toronto.edu/~kriz/cifar-10-binary.tar.gz - -echo "Unzipping..." - -tar xzf cifar-10-binary.tar.gz - -echo "Done." diff --git a/data/lenet.prototxt b/data/lenet.prototxt index 085ed43f232..c8a99946bc7 100644 --- a/data/lenet.prototxt +++ b/data/lenet.prototxt @@ -3,7 +3,7 @@ layers { layer { name: "mnist" type: "data" - source: "data/mnist-train-leveldb" + source: "mnist-train-leveldb" batchsize: 64 scale: 0.00390625 } diff --git a/data/lenet_solver.prototxt b/data/lenet_solver.prototxt index d58255b9e31..d0edc0f0c87 100644 --- a/data/lenet_solver.prototxt +++ b/data/lenet_solver.prototxt @@ -1,12 +1,27 @@ -train_net: "data/lenet.prototxt" -test_net: "data/lenet_test.prototxt" +# The training protocol buffer definition +train_net: "lenet.prototxt" +# The testing protocol buffer definition +test_net: "lenet_test.prototxt" +# test_iter specifies how many forward passes the test should carry out. +# In the case of MNIST, we have test batch size 100 and 100 test iterations, +# covering the full 10,000 testing images. +test_iter: 100 +# Carry out testing every 500 training iterations. +test_interval: 500 +# The base learning rate, momentum and the weight decay of the network. base_lr: 0.01 +momentum: 0.9 +weight_decay: 0.0005 +# The learning rate policy lr_policy: "inv" gamma: 0.0001 power: 0.75 +# Display every 100 iterations display: 100 -max_iter: 5000 -momentum: 0.9 -weight_decay: 0.0005 -test_iter: 100 -test_interval: 500 \ No newline at end of file +# The maximum number of iterations +max_iter: 10000 +# snapshot intermediate results +snapshot: 5000 +snapshot_prefix: "lenet" +# solver mode: 0 for CPU and 1 for GPU +solver_mode: 1 diff --git a/data/lenet_test.prototxt b/data/lenet_test.prototxt index fdda4a67ee0..676a2a6ab7d 100644 --- a/data/lenet_test.prototxt +++ b/data/lenet_test.prototxt @@ -3,7 +3,7 @@ layers { layer { name: "mnist" type: "data" - source: "data/mnist-test-leveldb" + source: "mnist-test-leveldb" batchsize: 100 scale: 0.00390625 } diff --git a/examples/finetune_net.cpp b/examples/finetune_net.cpp index d35b425c619..559715d6707 100644 --- a/examples/finetune_net.cpp +++ b/examples/finetune_net.cpp @@ -19,9 +19,6 @@ int main(int argc, char** argv) { return 0; } - Caffe::SetDevice(0); - Caffe::set_mode(Caffe::GPU); - SolverParameter solver_param; ReadProtoFromTextFile(argv[1], &solver_param); diff --git a/examples/test_read_imagenet.cpp b/examples/test_read_imagenet.cpp deleted file mode 100644 index 7b61990b941..00000000000 --- a/examples/test_read_imagenet.cpp +++ /dev/null @@ -1,33 +0,0 @@ -// Copyright 2013 Yangqing Jia - -#include -#include - -#include - -int main(int argc, char** argv) { - ::google::InitGoogleLogging(argv[0]); - leveldb::DB* db; - leveldb::Options options; - options.create_if_missing = false; - - LOG(INFO) << "Opening leveldb " << argv[1]; - leveldb::Status status = leveldb::DB::Open( - options, argv[1], &db); - CHECK(status.ok()) << "Failed to open leveldb " << argv[1]; - - leveldb::ReadOptions read_options; - read_options.fill_cache = false; - int count = 0; - leveldb::Iterator* it = db->NewIterator(read_options); - for (it->SeekToFirst(); it->Valid(); it->Next()) { - // just a dummy operation - LOG(ERROR) << it->key().ToString(); - if (++count % 10000 == 0) { - LOG(ERROR) << "Processed " << count << " files."; - } - } - - delete db; - return 0; -} diff --git a/examples/train_net.cpp b/examples/train_net.cpp index d84619b2d44..ce62616b118 100644 --- a/examples/train_net.cpp +++ b/examples/train_net.cpp @@ -20,9 +20,6 @@ int main(int argc, char** argv) { return 0; } - Caffe::SetDevice(0); - Caffe::set_mode(Caffe::GPU); - SolverParameter solver_param; ReadProtoFromTextFile(argv[1], &solver_param); diff --git a/src/caffe/proto/caffe.proto b/src/caffe/proto/caffe.proto index 5b4c9ed5275..e1be61c35e1 100644 --- a/src/caffe/proto/caffe.proto +++ b/src/caffe/proto/caffe.proto @@ -137,11 +137,8 @@ message SolverParameter { // whether to snapshot diff in the results or not. Snapshotting diff will help // debugging but the final protocol buffer size will be much larger. optional bool snapshot_diff = 16 [ default = false]; - - // the asynchronous solver stuff - optional string tcp_port = 20 [ default = "20901"]; - optional string tcp_server = 21; - optional int32 communication_interval = 22; + // the mode solver will use: 0 for CPU and 1 for GPU. Use GPU in default. + optional int32 solver_mode = 17 [default = 1]; } // A message that stores the solver snapshots diff --git a/src/caffe/solver.cpp b/src/caffe/solver.cpp index 5c25641ce9c..e02b72f31f6 100644 --- a/src/caffe/solver.cpp +++ b/src/caffe/solver.cpp @@ -39,6 +39,7 @@ Solver::Solver(const SolverParameter& param) template void Solver::Solve(const char* resume_file) { + Caffe::set_mode(Caffe::Brew(param_.solver_mode())); Caffe::set_phase(Caffe::TRAIN); LOG(INFO) << "Solving " << net_->name(); PreSolve(); @@ -71,6 +72,9 @@ void Solver::Solve(const char* resume_file) { Caffe::set_phase(Caffe::TRAIN); } } + // After the optimization is done, always do a snapshot. + iter_--; + Snapshot(); LOG(INFO) << "Optimization Done."; } From 91dc83e0bf0ced7f65b5e5a0390b8cc537be971d Mon Sep 17 00:00:00 2001 From: Yangqing Jia Date: Tue, 3 Dec 2013 10:05:19 -0800 Subject: [PATCH 08/15] mnist train script --- data/train_mnist.sh | 3 +++ 1 file changed, 3 insertions(+) create mode 100755 data/train_mnist.sh diff --git a/data/train_mnist.sh b/data/train_mnist.sh new file mode 100755 index 00000000000..1d1036a108a --- /dev/null +++ b/data/train_mnist.sh @@ -0,0 +1,3 @@ +#!/usr/bin/env sh + +GLOG_logtostderr=1 ../examples/train_net.bin lenet_solver.prototxt From 8c96ac2e87ba1c370ee376152dce99236aac5bfd Mon Sep 17 00:00:00 2001 From: Yangqing Jia Date: Tue, 3 Dec 2013 10:53:29 -0800 Subject: [PATCH 09/15] lenet.prototxt loss layer rename --- data/lenet.prototxt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/data/lenet.prototxt b/data/lenet.prototxt index c8a99946bc7..f5877ae4804 100644 --- a/data/lenet.prototxt +++ b/data/lenet.prototxt @@ -114,7 +114,7 @@ layers { } layers { layer { - name: "prob" + name: "loss" type: "softmax_loss" } bottom: "ip2" From 521e4712ed313959aae032a4f7c118da32bdcf16 Mon Sep 17 00:00:00 2001 From: Sergey Karayev Date: Tue, 26 Nov 2013 03:51:01 -0800 Subject: [PATCH 10/15] OS X makefile changes --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 2e07675c978..11ba70f5453 100644 --- a/Makefile +++ b/Makefile @@ -63,7 +63,7 @@ TEST_BINS := ${TEST_OBJS:.o=.testbin} # Derive include and lib directories ############################## CUDA_INCLUDE_DIR := $(CUDA_DIR)/include -CUDA_LIB_DIR := $(CUDA_DIR)/lib64 $(CUDA_DIR)/lib +CUDA_LIB_DIR := $(CUDA_DIR)/lib $(CUDA_DIR)/lib64 MKL_INCLUDE_DIR := $(MKL_DIR)/include MKL_LIB_DIR := $(MKL_DIR)/lib $(MKL_DIR)/lib/intel64 From 1786be3289bca001319935abfdfa751ba97c89fa Mon Sep 17 00:00:00 2001 From: Sergey Karayev Date: Fri, 22 Nov 2013 21:30:44 -0800 Subject: [PATCH 11/15] another python wrapper that batches image subwindows and provides a selective search provider --- python/caffe/imagenet/wrapper2.py | 227 ++++++++++++++++++++++++++++++ 1 file changed, 227 insertions(+) create mode 100644 python/caffe/imagenet/wrapper2.py diff --git a/python/caffe/imagenet/wrapper2.py b/python/caffe/imagenet/wrapper2.py new file mode 100644 index 00000000000..fab53790925 --- /dev/null +++ b/python/caffe/imagenet/wrapper2.py @@ -0,0 +1,227 @@ +""" +wrapper2.py classifies a number of images at once, optionally using +selective search crops. + +TODO: also store the window coordinates with the image filenames. +""" +import numpy as np +import os +import sys +import gflags +import pandas as pd +import time +from skimage import io +from skimage import transform +import selective_search_ijcv as selective_search +import caffe + +IMAGE_DIM = 256 +CROPPED_DIM = 227 +IMAGE_CENTER = int((IMAGE_DIM - CROPPED_DIM) / 2) + +CROP_MODES = ['center_only', 'corners', 'selective_search'] + +# Load the imagenet mean file +IMAGENET_MEAN = np.load( + os.path.join(os.path.dirname(__file__), 'ilsvrc_2012_mean.npy')) + + +def load_image(filename): + """ + Input: + filename: string + Output: + image: an image of size (256 x 256 x 3) of type uint8. + """ + img = io.imread(filename) + if img.ndim == 2: + img = np.tile(img[:, :, np.newaxis], (1, 1, 3)) + elif img.shape[2] == 4: + img = img[:, :, :3] + return img + + +def format_image(image, window=None, dim=IMAGE_DIM): + # Crop a subimage if window is provided. + if window is not None: + image = image[ + window[1]:window[1] + window[3], + window[0]:window[0] + window[2] + ] + + # Resize to ImageNet size, convert to BGR, subtract mean. + image = (transform.resize(image, (IMAGE_DIM, IMAGE_DIM)) * 255)[:, :, ::-1] + image -= IMAGENET_MEAN + + # Resize further if needed. + if not np.all(dim == IMAGE_DIM): + image = transform.resize(image, (dim, dim) * 255) + + image = image.swapaxes(1, 2).swapaxes(0, 1) + return image + + +def _assemble_images_center_only(image_fnames, max_batch_size): + all_images = [] + for image_filename in image_fnames: + image = format_image(load_image(image_filename)) + + all_images.append(np.ascontiguousarray( + image[:, + IMAGE_CENTER:IMAGE_CENTER + CROPPED_DIM, + IMAGE_CENTER:IMAGE_CENTER + CROPPED_DIM], + dtype=np.float32 + )) + return all_images[np.newaxis, :], image_fnames + + +def _assemble_images_corners(image_fnames, max_batch_size): + all_images = [] + fnames = [] + for image_filename in image_fnames: + image = format_image(load_image(image_filename)) + indices = [0, IMAGE_DIM - CROPPED_DIM] + + images = np.empty((10, 3, CROPPED_DIM, CROPPED_DIM), dtype=np.float32) + curr = 0 + for i in indices: + for j in indices: + images[curr] = image[:, i:i + CROPPED_DIM, j:j + CROPPED_DIM] + curr += 1 + images[4] = image[ + :, + IMAGE_CENTER:IMAGE_CENTER + CROPPED_DIM, + IMAGE_CENTER:IMAGE_CENTER + CROPPED_DIM + ] + + # Add flipped versions. + images[5:] = images[:5, :, :, ::-1] + + all_images.append(images) + fnames.append(image_filename) + return all_images, fnames + + +def _assemble_images_selective_search(image_fnames, max_batch_size): + windows_list = selective_search.get_windows(image_fnames) + all_images = [] + fnames = [] + for image_fname, windows in zip(image_fnames, windows_list): + image = load_image(image_fname) + images = np.empty( + (len(windows), 3, CROPPED_DIM, CROPPED_DIM), dtype=np.float32) + for i, window in enumerate(windows): + images[i] = format_image(image, window, CROPPED_DIM) + all_images.append(images) + fnames.append(image_fname) + return all_images, fnames + + +def assemble_batches( + image_fnames, crop_mode='center_only', max_batch_size=256): + """ + Assemble list of batches, each one of at most max_batch_size subimage + objects. + + Input: + image_fnames: list of string + mode: string + 'center_only': the CROPPED_DIM middle of the image is taken as is + 'corners': take CROPPED_DIM-sized boxes at 4 corners and center of + the image, as well as their flipped versions: a total of 10 images + 'selective_search': run Selective Search region proposal on the + image, and take each enclosing subwindow. + + Output: + images: (X x 3 x 227 x 227) ndarray, where X <= max_batch_size + """ + if crop_mode == 'center_only': + all_images, fnames = _assemble_images_center_only( + image_fnames, max_batch_size) + + elif crop_mode == 'corners': + all_images, fnames = _assemble_images_corners( + image_fnames, max_batch_size) + + elif crop_mode == 'selective_search': + all_images, fnames = _assemble_images_selective_search( + image_fnames, max_batch_size) + + else: + raise Exception("Unknown mode: not in {}".format(CROP_MODES)) + + # Concatenate into one (N, 3, CROPPED_DIM, CROPPED_DIM) array, + # then split to (N/max_batch_size, 3, CROPPED_DIM, CROPPED_DIM) chunks + all_images = np.concatenate(all_images, 0) + all_fnames = np.repeat(fnames, [len(images) for images in all_images]) + assert(all_images.shape[0] == all_fnames.shape[0]) + + num_batches = int(all_images.shape[0] / max_batch_size) + image_batches = np.array_split(all_images, num_batches, axis=0) + fname_batches = np.array_split(all_fnames, num_batches, axis=0) + return image_batches, fname_batches + + +def compute_feats(batch, layer='imagenet'): + if layer == 'imagenet': + num_output = 1000 + else: + raise ValueError("Unknown layer requested: {}".format(layer)) + + num = batch.shape[0] + output_blobs = [np.empty((num, num_output, 1, 1), dtype=np.float32)] + caffenet.Forward([batch], output_blobs) + feats = [output_blobs[0][i].flatten() for i in range(len(output_blobs[0]))] + + return feats + + +if __name__ == "__main__": + ## Parse cmdline options + gflags.DEFINE_string( + "model_def", "", "The model definition file.") + gflags.DEFINE_string( + "pretrained_model", "", "The pretrained model.") + gflags.DEFINE_boolean( + "gpu", True, "use gpu for computation") + gflags.DEFINE_string( + "crop_mode", "center_only", "Crop mode, from {}".format(CROP_MODES)) + gflags.DEFINE_string( + "images_file", "", "File that contains image filenames.") + gflags.DEFINE_string( + "output", "", "The output DataFrame HDF5 filename.") + gflags.DEFINE_string( + "layer", "imagenet", "Layer to output.") + FLAGS = gflags.FLAGS + FLAGS(sys.argv) + + ## Initialize network by loading model definition and weights. + caffenet = caffe.CaffeNet(FLAGS.model_def, FLAGS.pretrained_model) + caffenet.set_phase_test() + if FLAGS.gpu: + caffenet.set_mode_gpu() + + ## Load list of image filenames and assemble into batches. + with open(gflags.images_file) as f: + image_fnames = [_.strip() for _ in f.readlines()] + image_batches, fname_batches = assemble_batches( + image_fnames, FLAGS.crop_mode) + print 'Running on {} files in {} batches'.format( + len(image_fnames), len(image_batches)) + + # Process the batches. + start = time.time() + all_feats = [] + for i, batch in range(len(image_batches)): + if i % 10 == 0: + print('Batch {}/{}, elapsed {:.3f} s'.format( + i, len(image_batches), time.time() - start)) + all_feats.append(compute_feats(image_batches[i])) + all_feats = np.concatenate(all_feats, 0) + all_fnames = np.concatenate(fname_batches, 0) + + df = pd.DataFrame({'feat': all_feats, 'filename': all_fnames}) + + # Finally, write the results. + df.to_hdf(FLAGS.output, 'df', mode='w') + print 'Done. Saved to %s.' % FLAGS.output From aca88a5f156db90193158774b4e5c8fee6075cb1 Mon Sep 17 00:00:00 2001 From: Sergey Karayev Date: Mon, 25 Nov 2013 07:22:11 -0800 Subject: [PATCH 12/15] assembling batches works properly for all modes; segfault in pycaffe --- python/caffe/imagenet/wrapper2.py | 62 ++++++++++++++++++------------- 1 file changed, 36 insertions(+), 26 deletions(-) diff --git a/python/caffe/imagenet/wrapper2.py b/python/caffe/imagenet/wrapper2.py index fab53790925..a3bd100ad50 100644 --- a/python/caffe/imagenet/wrapper2.py +++ b/python/caffe/imagenet/wrapper2.py @@ -2,7 +2,12 @@ wrapper2.py classifies a number of images at once, optionally using selective search crops. -TODO: also store the window coordinates with the image filenames. +The selective_search_ijcv_with_python code is available at +https://github.com/sergeyk/selective_search_ijcv_with_python + +TODO: +- [ ] store the window coordinates with the image filenames. +- [ ] batch up image filenames as well: don't want to load all of them into memory """ import numpy as np import os @@ -12,7 +17,7 @@ import time from skimage import io from skimage import transform -import selective_search_ijcv as selective_search +import selective_search_ijcv_with_python as selective_search import caffe IMAGE_DIM = 256 @@ -42,11 +47,19 @@ def load_image(filename): def format_image(image, window=None, dim=IMAGE_DIM): + """ + Input: + image: (H x W x 3) ndarray + window: (4) ndarray + (y, x, h, w) coordinates + dim: int + Final width of the square image. + """ # Crop a subimage if window is provided. if window is not None: image = image[ - window[1]:window[1] + window[3], - window[0]:window[0] + window[2] + window[0]:window[2], + window[1]:window[3] ] # Resize to ImageNet size, convert to BGR, subtract mean. @@ -54,28 +67,26 @@ def format_image(image, window=None, dim=IMAGE_DIM): image -= IMAGENET_MEAN # Resize further if needed. - if not np.all(dim == IMAGE_DIM): - image = transform.resize(image, (dim, dim) * 255) - + if not dim == IMAGE_DIM: + image = transform.resize(image, (dim, dim)) image = image.swapaxes(1, 2).swapaxes(0, 1) return image -def _assemble_images_center_only(image_fnames, max_batch_size): +def _assemble_images_center_only(image_fnames): all_images = [] for image_filename in image_fnames: image = format_image(load_image(image_filename)) - all_images.append(np.ascontiguousarray( - image[:, + image[np.newaxis, :, IMAGE_CENTER:IMAGE_CENTER + CROPPED_DIM, IMAGE_CENTER:IMAGE_CENTER + CROPPED_DIM], dtype=np.float32 )) - return all_images[np.newaxis, :], image_fnames + return all_images, image_fnames -def _assemble_images_corners(image_fnames, max_batch_size): +def _assemble_images_corners(image_fnames): all_images = [] fnames = [] for image_filename in image_fnames: @@ -93,16 +104,14 @@ def _assemble_images_corners(image_fnames, max_batch_size): IMAGE_CENTER:IMAGE_CENTER + CROPPED_DIM, IMAGE_CENTER:IMAGE_CENTER + CROPPED_DIM ] - - # Add flipped versions. - images[5:] = images[:5, :, :, ::-1] + images[5:] = images[:5, :, :, ::-1] # flipped versions all_images.append(images) fnames.append(image_filename) return all_images, fnames -def _assemble_images_selective_search(image_fnames, max_batch_size): +def _assemble_images_selective_search(image_fnames): windows_list = selective_search.get_windows(image_fnames) all_images = [] fnames = [] @@ -136,27 +145,24 @@ def assemble_batches( images: (X x 3 x 227 x 227) ndarray, where X <= max_batch_size """ if crop_mode == 'center_only': - all_images, fnames = _assemble_images_center_only( - image_fnames, max_batch_size) + all_images, fnames = _assemble_images_center_only(image_fnames) elif crop_mode == 'corners': - all_images, fnames = _assemble_images_corners( - image_fnames, max_batch_size) + all_images, fnames = _assemble_images_corners(image_fnames) elif crop_mode == 'selective_search': - all_images, fnames = _assemble_images_selective_search( - image_fnames, max_batch_size) + all_images, fnames = _assemble_images_selective_search(image_fnames) else: raise Exception("Unknown mode: not in {}".format(CROP_MODES)) # Concatenate into one (N, 3, CROPPED_DIM, CROPPED_DIM) array, # then split to (N/max_batch_size, 3, CROPPED_DIM, CROPPED_DIM) chunks - all_images = np.concatenate(all_images, 0) all_fnames = np.repeat(fnames, [len(images) for images in all_images]) + all_images = np.concatenate(all_images, 0) assert(all_images.shape[0] == all_fnames.shape[0]) - num_batches = int(all_images.shape[0] / max_batch_size) + num_batches = 1 + int(all_images.shape[0] / max_batch_size) image_batches = np.array_split(all_images, num_batches, axis=0) fname_batches = np.array_split(all_fnames, num_batches, axis=0) return image_batches, fname_batches @@ -183,7 +189,7 @@ def compute_feats(batch, layer='imagenet'): gflags.DEFINE_string( "pretrained_model", "", "The pretrained model.") gflags.DEFINE_boolean( - "gpu", True, "use gpu for computation") + "gpu", False, "use gpu for computation") gflags.DEFINE_string( "crop_mode", "center_only", "Crop mode, from {}".format(CROP_MODES)) gflags.DEFINE_string( @@ -202,13 +208,17 @@ def compute_feats(batch, layer='imagenet'): caffenet.set_mode_gpu() ## Load list of image filenames and assemble into batches. - with open(gflags.images_file) as f: + with open(FLAGS.images_file) as f: image_fnames = [_.strip() for _ in f.readlines()] image_batches, fname_batches = assemble_batches( image_fnames, FLAGS.crop_mode) print 'Running on {} files in {} batches'.format( len(image_fnames), len(image_batches)) + from IPython import embed; embed() + # TODO: debug this + # F1125 07:17:35.980950 6826 pycaffe.cpp:52] Check failed: len(bottom) == input_blobs.size() (1 vs. 0) + # Process the batches. start = time.time() all_feats = [] From a21c86cf5d2ac53cb9cee02b3e525e5bf6938620 Mon Sep 17 00:00:00 2001 From: Sergey Karayev Date: Mon, 25 Nov 2013 12:15:35 -0800 Subject: [PATCH 13/15] debugging pycaffe error --- python/caffe/imagenet/wrapper2.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/python/caffe/imagenet/wrapper2.py b/python/caffe/imagenet/wrapper2.py index a3bd100ad50..d07d5c75cce 100644 --- a/python/caffe/imagenet/wrapper2.py +++ b/python/caffe/imagenet/wrapper2.py @@ -175,8 +175,11 @@ def compute_feats(batch, layer='imagenet'): raise ValueError("Unknown layer requested: {}".format(layer)) num = batch.shape[0] + input_blobs = [batch] output_blobs = [np.empty((num, num_output, 1, 1), dtype=np.float32)] - caffenet.Forward([batch], output_blobs) + print(len(input_blobs), len(output_blobs)) + print(input_blobs[0].shape, output_blobs[0].shape) + caffenet.Forward(input_blobs, output_blobs) feats = [output_blobs[0][i].flatten() for i in range(len(output_blobs[0]))] return feats @@ -215,17 +218,15 @@ def compute_feats(batch, layer='imagenet'): print 'Running on {} files in {} batches'.format( len(image_fnames), len(image_batches)) - from IPython import embed; embed() - # TODO: debug this - # F1125 07:17:35.980950 6826 pycaffe.cpp:52] Check failed: len(bottom) == input_blobs.size() (1 vs. 0) - # Process the batches. start = time.time() all_feats = [] - for i, batch in range(len(image_batches)): + for i in range(len(image_batches)): if i % 10 == 0: print('Batch {}/{}, elapsed {:.3f} s'.format( i, len(image_batches), time.time() - start)) + # TODO: debug this + # F1125 07:17:35.980950 6826 pycaffe.cpp:52] Check failed: len(bottom) == input_blobs.size() (1 vs. 0) all_feats.append(compute_feats(image_batches[i])) all_feats = np.concatenate(all_feats, 0) all_fnames = np.concatenate(fname_batches, 0) From de648ba3ef3b913b1898b663e7c7c482559f22c4 Mon Sep 17 00:00:00 2001 From: Sergey Karayev Date: Tue, 26 Nov 2013 03:27:02 -0800 Subject: [PATCH 14/15] in the middle of refactoring to use dataframes --- python/caffe/imagenet/wrapper2.py | 52 ++++++++++++++++++------------- 1 file changed, 31 insertions(+), 21 deletions(-) diff --git a/python/caffe/imagenet/wrapper2.py b/python/caffe/imagenet/wrapper2.py index d07d5c75cce..82795a3d4c6 100644 --- a/python/caffe/imagenet/wrapper2.py +++ b/python/caffe/imagenet/wrapper2.py @@ -6,6 +6,7 @@ https://github.com/sergeyk/selective_search_ijcv_with_python TODO: +- [ ] make sure all batches are the same size - [ ] store the window coordinates with the image filenames. - [ ] batch up image filenames as well: don't want to load all of them into memory """ @@ -26,6 +27,8 @@ CROP_MODES = ['center_only', 'corners', 'selective_search'] +BATCH_SIZE = 256 + # Load the imagenet mean file IMAGENET_MEAN = np.load( os.path.join(os.path.dirname(__file__), 'ilsvrc_2012_mean.npy')) @@ -112,22 +115,24 @@ def _assemble_images_corners(image_fnames): def _assemble_images_selective_search(image_fnames): + # Get window proposals. windows_list = selective_search.get_windows(image_fnames) - all_images = [] - fnames = [] + + # for image_fname, windows in zip(image_fnames, windows_list): image = load_image(image_fname) - images = np.empty( - (len(windows), 3, CROPPED_DIM, CROPPED_DIM), dtype=np.float32) for i, window in enumerate(windows): - images[i] = format_image(image, window, CROPPED_DIM) - all_images.append(images) - fnames.append(image_fname) - return all_images, fnames + data.append({ + 'image': format_image(image, window, CROPPED_DIM), + 'window': window, + 'filename': image_fname + }) + images_df = pd.DataFrame(data) + return images_df def assemble_batches( - image_fnames, crop_mode='center_only', max_batch_size=256): + image_fnames, crop_mode='center_only'): """ Assemble list of batches, each one of at most max_batch_size subimage objects. @@ -142,27 +147,30 @@ def assemble_batches( image, and take each enclosing subwindow. Output: - images: (X x 3 x 227 x 227) ndarray, where X <= max_batch_size + : list of (X x 3 x 227 x 227) ndarray, where X <= max_batch_size """ if crop_mode == 'center_only': - all_images, fnames = _assemble_images_center_only(image_fnames) + images_df = _assemble_images_center_only(image_fnames) elif crop_mode == 'corners': - all_images, fnames = _assemble_images_corners(image_fnames) + images_df = _assemble_images_corners(image_fnames) elif crop_mode == 'selective_search': - all_images, fnames = _assemble_images_selective_search(image_fnames) + images_df = _assemble_images_selective_search(image_fnames) else: raise Exception("Unknown mode: not in {}".format(CROP_MODES)) - # Concatenate into one (N, 3, CROPPED_DIM, CROPPED_DIM) array, + # Make sure the DataFrame has a multiple of BATCH_SIZE rows: + # just fill the extra rows with NaN filenames and all-zero images. + pass + # then split to (N/max_batch_size, 3, CROPPED_DIM, CROPPED_DIM) chunks all_fnames = np.repeat(fnames, [len(images) for images in all_images]) all_images = np.concatenate(all_images, 0) assert(all_images.shape[0] == all_fnames.shape[0]) - num_batches = 1 + int(all_images.shape[0] / max_batch_size) + num_batches = 1 + int(all_images.shape[0] / BATCH_SIZE) image_batches = np.array_split(all_images, num_batches, axis=0) fname_batches = np.array_split(all_fnames, num_batches, axis=0) return image_batches, fname_batches @@ -211,23 +219,25 @@ def compute_feats(batch, layer='imagenet'): caffenet.set_mode_gpu() ## Load list of image filenames and assemble into batches. + t = time.time() + print('Assembling batches...') with open(FLAGS.images_file) as f: image_fnames = [_.strip() for _ in f.readlines()] image_batches, fname_batches = assemble_batches( image_fnames, FLAGS.crop_mode) - print 'Running on {} files in {} batches'.format( - len(image_fnames), len(image_batches)) + print('{} batches assembled in {:.3f} s'.format(time.time() - t)) # Process the batches. - start = time.time() + t = time.time() + print 'Running on {} files in {} batches'.format( + len(image_fnames), len(image_batches)) all_feats = [] for i in range(len(image_batches)): if i % 10 == 0: print('Batch {}/{}, elapsed {:.3f} s'.format( - i, len(image_batches), time.time() - start)) - # TODO: debug this - # F1125 07:17:35.980950 6826 pycaffe.cpp:52] Check failed: len(bottom) == input_blobs.size() (1 vs. 0) + i, len(image_batches), time.time() - t)) all_feats.append(compute_feats(image_batches[i])) + all_feats = np.concatenate(all_feats, 0) all_fnames = np.concatenate(fname_batches, 0) From b0e7d645154fd98cebe972d5557c02553ec95f90 Mon Sep 17 00:00:00 2001 From: Sergey Karayev Date: Thu, 5 Dec 2013 16:25:05 -0800 Subject: [PATCH 15/15] now using dataframes to pass data around and storing windows --- python/caffe/imagenet/wrapper2.py | 201 ++++++++++++++++++++---------- 1 file changed, 134 insertions(+), 67 deletions(-) diff --git a/python/caffe/imagenet/wrapper2.py b/python/caffe/imagenet/wrapper2.py index 82795a3d4c6..e23b48b4079 100644 --- a/python/caffe/imagenet/wrapper2.py +++ b/python/caffe/imagenet/wrapper2.py @@ -1,13 +1,17 @@ """ -wrapper2.py classifies a number of images at once, optionally using -selective search crops. +Classify a number of images at once, optionally using the selective +search window proposal method. + +This implementation follows + Ross Girshick, Jeff Donahue, Trevor Darrell, Jitendra Malik. + Rich feature hierarchies for accurate object detection and semantic + segmentation. + http://arxiv.org/abs/1311.2524 The selective_search_ijcv_with_python code is available at -https://github.com/sergeyk/selective_search_ijcv_with_python + https://github.com/sergeyk/selective_search_ijcv_with_python TODO: -- [ ] make sure all batches are the same size -- [ ] store the window coordinates with the image filenames. - [ ] batch up image filenames as well: don't want to load all of them into memory """ import numpy as np @@ -16,8 +20,8 @@ import gflags import pandas as pd import time -from skimage import io -from skimage import transform +import skimage.io +import skimage.transform import selective_search_ijcv_with_python as selective_search import caffe @@ -38,10 +42,11 @@ def load_image(filename): """ Input: filename: string + Output: image: an image of size (256 x 256 x 3) of type uint8. """ - img = io.imread(filename) + img = skimage.io.imread(filename) if img.ndim == 2: img = np.tile(img[:, :, np.newaxis], (1, 1, 3)) elif img.shape[2] == 4: @@ -57,6 +62,10 @@ def format_image(image, window=None, dim=IMAGE_DIM): (y, x, h, w) coordinates dim: int Final width of the square image. + + Output: + image: (3 x H x W) ndarray + Resized to (dim, dim). """ # Crop a subimage if window is provided. if window is not None: @@ -66,17 +75,27 @@ def format_image(image, window=None, dim=IMAGE_DIM): ] # Resize to ImageNet size, convert to BGR, subtract mean. - image = (transform.resize(image, (IMAGE_DIM, IMAGE_DIM)) * 255)[:, :, ::-1] + image = (skimage.transform.resize(image, (IMAGE_DIM, IMAGE_DIM)) * 255)[:, :, ::-1] image -= IMAGENET_MEAN # Resize further if needed. if not dim == IMAGE_DIM: - image = transform.resize(image, (dim, dim)) + image = skimage.transform.resize(image, (dim, dim)) image = image.swapaxes(1, 2).swapaxes(0, 1) return image def _assemble_images_center_only(image_fnames): + """ + For each image, square the image and crop its center. + + Input: + image_fnames: list + + Output: + images_df: pandas.DataFrame + With 'image', 'filename' columns. + """ all_images = [] for image_filename in image_fnames: image = format_image(load_image(image_filename)) @@ -86,12 +105,27 @@ def _assemble_images_center_only(image_fnames): IMAGE_CENTER:IMAGE_CENTER + CROPPED_DIM], dtype=np.float32 )) - return all_images, image_fnames + + images_df = pd.DataFrame({ + 'image': all_images, + 'filename': image_fnames + }) + return images_df def _assemble_images_corners(image_fnames): + """ + For each image, square the image and crop its center, four corners, + and mirrored version of the above. + + Input: + image_fnames: list + + Output: + images_df: pandas.DataFrame + With 'image', 'filename' columns. + """ all_images = [] - fnames = [] for image_filename in image_fnames: image = format_image(load_image(image_filename)) indices = [0, IMAGE_DIM - CROPPED_DIM] @@ -110,44 +144,61 @@ def _assemble_images_corners(image_fnames): images[5:] = images[:5, :, :, ::-1] # flipped versions all_images.append(images) - fnames.append(image_filename) - return all_images, fnames + + images_df = pd.DataFrame({ + 'image': [row for row in images for images in all_images], + 'filename': np.repeat(image_fnames, 10) + }) + return images_df def _assemble_images_selective_search(image_fnames): - # Get window proposals. + """ + Run Selective Search window proposals on all images, then for each + image-window pair, extract a square crop. + + Input: + image_fnames: list + + Output: + images_df: pandas.DataFrame + With 'image', 'filename' columns. + """ windows_list = selective_search.get_windows(image_fnames) - # + data = [] for image_fname, windows in zip(image_fnames, windows_list): image = load_image(image_fname) - for i, window in enumerate(windows): + for window in windows: data.append({ - 'image': format_image(image, window, CROPPED_DIM), + 'image': format_image(image, window, CROPPED_DIM)[np.newaxis, :], 'window': window, 'filename': image_fname }) + images_df = pd.DataFrame(data) return images_df -def assemble_batches( - image_fnames, crop_mode='center_only'): +def assemble_batches(image_fnames, crop_mode='center_only', batch_size=256): """ - Assemble list of batches, each one of at most max_batch_size subimage - objects. + Assemble DataFrame of image crops for feature computation. Input: - image_fnames: list of string - mode: string - 'center_only': the CROPPED_DIM middle of the image is taken as is - 'corners': take CROPPED_DIM-sized boxes at 4 corners and center of - the image, as well as their flipped versions: a total of 10 images - 'selective_search': run Selective Search region proposal on the - image, and take each enclosing subwindow. + image_fnames: list of string + mode: string + 'center_only': the CROPPED_DIM middle of the image is taken as is + 'corners': take CROPPED_DIM-sized boxes at 4 corners and center of + the image, as well as their flipped versions: a total of 10. + 'selective_search': run Selective Search region proposal on the + image, and take each enclosing subwindow. Output: - : list of (X x 3 x 227 x 227) ndarray, where X <= max_batch_size + df_batches: list of DataFrames, each one of BATCH_SIZE rows. + Each row has 'image', 'filename', and 'window' info. + Column 'image' contains (X x 3 x 227 x 227) ndarrays. + Column 'filename' contains source filenames. + If 'filename' is None, then the row is just for padding. """ if crop_mode == 'center_only': images_df = _assemble_images_center_only(image_fnames) @@ -163,34 +214,43 @@ def assemble_batches( # Make sure the DataFrame has a multiple of BATCH_SIZE rows: # just fill the extra rows with NaN filenames and all-zero images. - pass - - # then split to (N/max_batch_size, 3, CROPPED_DIM, CROPPED_DIM) chunks - all_fnames = np.repeat(fnames, [len(images) for images in all_images]) - all_images = np.concatenate(all_images, 0) - assert(all_images.shape[0] == all_fnames.shape[0]) - - num_batches = 1 + int(all_images.shape[0] / BATCH_SIZE) - image_batches = np.array_split(all_images, num_batches, axis=0) - fname_batches = np.array_split(all_fnames, num_batches, axis=0) - return image_batches, fname_batches - - -def compute_feats(batch, layer='imagenet'): + N = images_df.shape[0] + remainder = N % BATCH_SIZE + if remainder > 0: + zero_image = np.zeros_like(images_df['image'].iloc[0]) + remainder_df = pd.DataFrame([{ + 'filename': None, + 'image': zero_image, + 'window': [0, 0, 0, 0] + }] * (BATCH_SIZE - remainder)) + images_df = images_df.append(remainder_df) + N = images_df.shape[0] + + # Split into batches of BATCH_SIZE. + ind = np.arange(N) / BATCH_SIZE + df_batches = [images_df[ind == i] for i in range(N / BATCH_SIZE)] + return df_batches + + +def compute_feats(images_df, layer='imagenet'): if layer == 'imagenet': num_output = 1000 else: raise ValueError("Unknown layer requested: {}".format(layer)) - num = batch.shape[0] - input_blobs = [batch] + num = images_df.shape[0] + input_blobs = [np.concatenate(images_df['image'].values)] output_blobs = [np.empty((num, num_output, 1, 1), dtype=np.float32)] print(len(input_blobs), len(output_blobs)) print(input_blobs[0].shape, output_blobs[0].shape) - caffenet.Forward(input_blobs, output_blobs) + + #caffenet.Forward(input_blobs, output_blobs) feats = [output_blobs[0][i].flatten() for i in range(len(output_blobs[0]))] - return feats + # Add the features and delete the images. + del images_df['image'] + images_df['feat'] = feats + return images_df if __name__ == "__main__": @@ -205,6 +265,8 @@ def compute_feats(batch, layer='imagenet'): "crop_mode", "center_only", "Crop mode, from {}".format(CROP_MODES)) gflags.DEFINE_string( "images_file", "", "File that contains image filenames.") + gflags.DEFINE_string( + "batch_size", 256, "Number of image crops to let through in one go") gflags.DEFINE_string( "output", "", "The output DataFrame HDF5 filename.") gflags.DEFINE_string( @@ -212,37 +274,42 @@ def compute_feats(batch, layer='imagenet'): FLAGS = gflags.FLAGS FLAGS(sys.argv) - ## Initialize network by loading model definition and weights. - caffenet = caffe.CaffeNet(FLAGS.model_def, FLAGS.pretrained_model) - caffenet.set_phase_test() - if FLAGS.gpu: - caffenet.set_mode_gpu() - ## Load list of image filenames and assemble into batches. t = time.time() print('Assembling batches...') with open(FLAGS.images_file) as f: image_fnames = [_.strip() for _ in f.readlines()] - image_batches, fname_batches = assemble_batches( - image_fnames, FLAGS.crop_mode) - print('{} batches assembled in {:.3f} s'.format(time.time() - t)) + image_batches = assemble_batches( + image_fnames, FLAGS.crop_mode, FLAGS.batch_size) + print('{} batches assembled in {:.3f} s'.format( + len(image_batches), time.time() - t)) + + # Initialize network by loading model definition and weights. + t = time.time() + print("Loading Caffe model.") + caffenet = caffe.CaffeNet(FLAGS.model_def, FLAGS.pretrained_model) + caffenet.set_phase_test() + if FLAGS.gpu: + caffenet.set_mode_gpu() + print("Caffe model loaded in {:.3f} s".format(time.time() - t)) # Process the batches. t = time.time() - print 'Running on {} files in {} batches'.format( + print 'Processing {} files in {} batches'.format( len(image_fnames), len(image_batches)) - all_feats = [] + dfs_with_feats = [] for i in range(len(image_batches)): if i % 10 == 0: - print('Batch {}/{}, elapsed {:.3f} s'.format( + print('Batch {}/{}, elapsed time: {:.3f} s'.format( i, len(image_batches), time.time() - t)) - all_feats.append(compute_feats(image_batches[i])) + dfs_with_feats.append(compute_feats(image_batches[i])) - all_feats = np.concatenate(all_feats, 0) - all_fnames = np.concatenate(fname_batches, 0) + # Concatenate, droppping the padding rows. + df = pd.concat(dfs_with_feats).dropna(subset=['filename']) + print("Processing complete after {:.3f} s.".format(time.time() - t)) - df = pd.DataFrame({'feat': all_feats, 'filename': all_fnames}) - - # Finally, write the results. + # Write our the results. + t = time.time() df.to_hdf(FLAGS.output, 'df', mode='w') - print 'Done. Saved to %s.' % FLAGS.output + print("Done. Saving to {} took {:.3f} s.".format( + FLAGS.output, time.time() - t))