diff --git a/Makefile b/Makefile index 2e07675c978..11ba70f5453 100644 --- a/Makefile +++ b/Makefile @@ -63,7 +63,7 @@ TEST_BINS := ${TEST_OBJS:.o=.testbin} # Derive include and lib directories ############################## CUDA_INCLUDE_DIR := $(CUDA_DIR)/include -CUDA_LIB_DIR := $(CUDA_DIR)/lib64 $(CUDA_DIR)/lib +CUDA_LIB_DIR := $(CUDA_DIR)/lib $(CUDA_DIR)/lib64 MKL_INCLUDE_DIR := $(MKL_DIR)/include MKL_LIB_DIR := $(MKL_DIR)/lib $(MKL_DIR)/lib/intel64 diff --git a/data/get_cifar.sh b/data/get_cifar.sh deleted file mode 100755 index 6f42bb09ae2..00000000000 --- a/data/get_cifar.sh +++ /dev/null @@ -1,12 +0,0 @@ -#!/usr/bin/env sh -# This scripts downloads the mnist data and unzips it. - -echo "Downloading..." - -wget -q http://www.cs.toronto.edu/~kriz/cifar-10-binary.tar.gz - -echo "Unzipping..." - -tar xzf cifar-10-binary.tar.gz - -echo "Done." diff --git a/data/get_mnist.sh b/data/get_mnist.sh index ec979bd7bd0..2a9ad083fb4 100755 --- a/data/get_mnist.sh +++ b/data/get_mnist.sh @@ -15,4 +15,9 @@ gunzip train-labels-idx1-ubyte.gz gunzip t10k-images-idx3-ubyte.gz gunzip t10k-labels-idx1-ubyte.gz +echo "Creating leveldb..." + +../examples/convert_mnist_data.bin train-images-idx3-ubyte train-labels-idx1-ubyte mnist-train-leveldb +../examples/convert_mnist_data.bin t10k-images-idx3-ubyte t10k-labels-idx1-ubyte mnist-test-leveldb + echo "Done." diff --git a/data/lenet.prototxt b/data/lenet.prototxt index 085ed43f232..f5877ae4804 100644 --- a/data/lenet.prototxt +++ b/data/lenet.prototxt @@ -3,7 +3,7 @@ layers { layer { name: "mnist" type: "data" - source: "data/mnist-train-leveldb" + source: "mnist-train-leveldb" batchsize: 64 scale: 0.00390625 } @@ -114,7 +114,7 @@ layers { } layers { layer { - name: "prob" + name: "loss" type: "softmax_loss" } bottom: "ip2" diff --git a/data/lenet_solver.prototxt b/data/lenet_solver.prototxt index d58255b9e31..d0edc0f0c87 100644 --- a/data/lenet_solver.prototxt +++ b/data/lenet_solver.prototxt @@ -1,12 +1,27 @@ -train_net: "data/lenet.prototxt" -test_net: "data/lenet_test.prototxt" +# The training protocol buffer definition +train_net: "lenet.prototxt" +# The testing protocol buffer definition +test_net: "lenet_test.prototxt" +# test_iter specifies how many forward passes the test should carry out. +# In the case of MNIST, we have test batch size 100 and 100 test iterations, +# covering the full 10,000 testing images. +test_iter: 100 +# Carry out testing every 500 training iterations. +test_interval: 500 +# The base learning rate, momentum and the weight decay of the network. base_lr: 0.01 +momentum: 0.9 +weight_decay: 0.0005 +# The learning rate policy lr_policy: "inv" gamma: 0.0001 power: 0.75 +# Display every 100 iterations display: 100 -max_iter: 5000 -momentum: 0.9 -weight_decay: 0.0005 -test_iter: 100 -test_interval: 500 \ No newline at end of file +# The maximum number of iterations +max_iter: 10000 +# snapshot intermediate results +snapshot: 5000 +snapshot_prefix: "lenet" +# solver mode: 0 for CPU and 1 for GPU +solver_mode: 1 diff --git a/data/lenet_test.prototxt b/data/lenet_test.prototxt index fdda4a67ee0..676a2a6ab7d 100644 --- a/data/lenet_test.prototxt +++ b/data/lenet_test.prototxt @@ -3,7 +3,7 @@ layers { layer { name: "mnist" type: "data" - source: "data/mnist-test-leveldb" + source: "mnist-test-leveldb" batchsize: 100 scale: 0.00390625 } diff --git a/data/train_mnist.sh b/data/train_mnist.sh new file mode 100755 index 00000000000..1d1036a108a --- /dev/null +++ b/data/train_mnist.sh @@ -0,0 +1,3 @@ +#!/usr/bin/env sh + +GLOG_logtostderr=1 ../examples/train_net.bin lenet_solver.prototxt diff --git a/examples/convert_imageset.cpp b/examples/convert_imageset.cpp index 6a6b86dcdf8..2769f1ea2d6 100644 --- a/examples/convert_imageset.cpp +++ b/examples/convert_imageset.cpp @@ -46,7 +46,7 @@ int main(int argc, char** argv) { LOG(INFO) << "Shuffling data"; std::random_shuffle(lines.begin(), lines.end()); } - LOG(INFO) << "A total of " << lines.size() << "images."; + LOG(INFO) << "A total of " << lines.size() << " images."; leveldb::DB* db; leveldb::Options options; diff --git a/examples/finetune_net.cpp b/examples/finetune_net.cpp index d35b425c619..559715d6707 100644 --- a/examples/finetune_net.cpp +++ b/examples/finetune_net.cpp @@ -19,9 +19,6 @@ int main(int argc, char** argv) { return 0; } - Caffe::SetDevice(0); - Caffe::set_mode(Caffe::GPU); - SolverParameter solver_param; ReadProtoFromTextFile(argv[1], &solver_param); diff --git a/examples/test_read_imagenet.cpp b/examples/test_read_imagenet.cpp deleted file mode 100644 index 7b61990b941..00000000000 --- a/examples/test_read_imagenet.cpp +++ /dev/null @@ -1,33 +0,0 @@ -// Copyright 2013 Yangqing Jia - -#include -#include - -#include - -int main(int argc, char** argv) { - ::google::InitGoogleLogging(argv[0]); - leveldb::DB* db; - leveldb::Options options; - options.create_if_missing = false; - - LOG(INFO) << "Opening leveldb " << argv[1]; - leveldb::Status status = leveldb::DB::Open( - options, argv[1], &db); - CHECK(status.ok()) << "Failed to open leveldb " << argv[1]; - - leveldb::ReadOptions read_options; - read_options.fill_cache = false; - int count = 0; - leveldb::Iterator* it = db->NewIterator(read_options); - for (it->SeekToFirst(); it->Valid(); it->Next()) { - // just a dummy operation - LOG(ERROR) << it->key().ToString(); - if (++count % 10000 == 0) { - LOG(ERROR) << "Processed " << count << " files."; - } - } - - delete db; - return 0; -} diff --git a/examples/train_net.cpp b/examples/train_net.cpp index d84619b2d44..ce62616b118 100644 --- a/examples/train_net.cpp +++ b/examples/train_net.cpp @@ -20,9 +20,6 @@ int main(int argc, char** argv) { return 0; } - Caffe::SetDevice(0); - Caffe::set_mode(Caffe::GPU); - SolverParameter solver_param; ReadProtoFromTextFile(argv[1], &solver_param); diff --git a/python/caffe/imagenet/wrapper2.py b/python/caffe/imagenet/wrapper2.py new file mode 100644 index 00000000000..e23b48b4079 --- /dev/null +++ b/python/caffe/imagenet/wrapper2.py @@ -0,0 +1,315 @@ +""" +Classify a number of images at once, optionally using the selective +search window proposal method. + +This implementation follows + Ross Girshick, Jeff Donahue, Trevor Darrell, Jitendra Malik. + Rich feature hierarchies for accurate object detection and semantic + segmentation. + http://arxiv.org/abs/1311.2524 + +The selective_search_ijcv_with_python code is available at + https://github.com/sergeyk/selective_search_ijcv_with_python + +TODO: +- [ ] batch up image filenames as well: don't want to load all of them into memory +""" +import numpy as np +import os +import sys +import gflags +import pandas as pd +import time +import skimage.io +import skimage.transform +import selective_search_ijcv_with_python as selective_search +import caffe + +IMAGE_DIM = 256 +CROPPED_DIM = 227 +IMAGE_CENTER = int((IMAGE_DIM - CROPPED_DIM) / 2) + +CROP_MODES = ['center_only', 'corners', 'selective_search'] + +BATCH_SIZE = 256 + +# Load the imagenet mean file +IMAGENET_MEAN = np.load( + os.path.join(os.path.dirname(__file__), 'ilsvrc_2012_mean.npy')) + + +def load_image(filename): + """ + Input: + filename: string + + Output: + image: an image of size (256 x 256 x 3) of type uint8. + """ + img = skimage.io.imread(filename) + if img.ndim == 2: + img = np.tile(img[:, :, np.newaxis], (1, 1, 3)) + elif img.shape[2] == 4: + img = img[:, :, :3] + return img + + +def format_image(image, window=None, dim=IMAGE_DIM): + """ + Input: + image: (H x W x 3) ndarray + window: (4) ndarray + (y, x, h, w) coordinates + dim: int + Final width of the square image. + + Output: + image: (3 x H x W) ndarray + Resized to (dim, dim). + """ + # Crop a subimage if window is provided. + if window is not None: + image = image[ + window[0]:window[2], + window[1]:window[3] + ] + + # Resize to ImageNet size, convert to BGR, subtract mean. + image = (skimage.transform.resize(image, (IMAGE_DIM, IMAGE_DIM)) * 255)[:, :, ::-1] + image -= IMAGENET_MEAN + + # Resize further if needed. + if not dim == IMAGE_DIM: + image = skimage.transform.resize(image, (dim, dim)) + image = image.swapaxes(1, 2).swapaxes(0, 1) + return image + + +def _assemble_images_center_only(image_fnames): + """ + For each image, square the image and crop its center. + + Input: + image_fnames: list + + Output: + images_df: pandas.DataFrame + With 'image', 'filename' columns. + """ + all_images = [] + for image_filename in image_fnames: + image = format_image(load_image(image_filename)) + all_images.append(np.ascontiguousarray( + image[np.newaxis, :, + IMAGE_CENTER:IMAGE_CENTER + CROPPED_DIM, + IMAGE_CENTER:IMAGE_CENTER + CROPPED_DIM], + dtype=np.float32 + )) + + images_df = pd.DataFrame({ + 'image': all_images, + 'filename': image_fnames + }) + return images_df + + +def _assemble_images_corners(image_fnames): + """ + For each image, square the image and crop its center, four corners, + and mirrored version of the above. + + Input: + image_fnames: list + + Output: + images_df: pandas.DataFrame + With 'image', 'filename' columns. + """ + all_images = [] + for image_filename in image_fnames: + image = format_image(load_image(image_filename)) + indices = [0, IMAGE_DIM - CROPPED_DIM] + + images = np.empty((10, 3, CROPPED_DIM, CROPPED_DIM), dtype=np.float32) + curr = 0 + for i in indices: + for j in indices: + images[curr] = image[:, i:i + CROPPED_DIM, j:j + CROPPED_DIM] + curr += 1 + images[4] = image[ + :, + IMAGE_CENTER:IMAGE_CENTER + CROPPED_DIM, + IMAGE_CENTER:IMAGE_CENTER + CROPPED_DIM + ] + images[5:] = images[:5, :, :, ::-1] # flipped versions + + all_images.append(images) + + images_df = pd.DataFrame({ + 'image': [row for row in images for images in all_images], + 'filename': np.repeat(image_fnames, 10) + }) + return images_df + + +def _assemble_images_selective_search(image_fnames): + """ + Run Selective Search window proposals on all images, then for each + image-window pair, extract a square crop. + + Input: + image_fnames: list + + Output: + images_df: pandas.DataFrame + With 'image', 'filename' columns. + """ + windows_list = selective_search.get_windows(image_fnames) + + data = [] + for image_fname, windows in zip(image_fnames, windows_list): + image = load_image(image_fname) + for window in windows: + data.append({ + 'image': format_image(image, window, CROPPED_DIM)[np.newaxis, :], + 'window': window, + 'filename': image_fname + }) + + images_df = pd.DataFrame(data) + return images_df + + +def assemble_batches(image_fnames, crop_mode='center_only', batch_size=256): + """ + Assemble DataFrame of image crops for feature computation. + + Input: + image_fnames: list of string + mode: string + 'center_only': the CROPPED_DIM middle of the image is taken as is + 'corners': take CROPPED_DIM-sized boxes at 4 corners and center of + the image, as well as their flipped versions: a total of 10. + 'selective_search': run Selective Search region proposal on the + image, and take each enclosing subwindow. + + Output: + df_batches: list of DataFrames, each one of BATCH_SIZE rows. + Each row has 'image', 'filename', and 'window' info. + Column 'image' contains (X x 3 x 227 x 227) ndarrays. + Column 'filename' contains source filenames. + If 'filename' is None, then the row is just for padding. + """ + if crop_mode == 'center_only': + images_df = _assemble_images_center_only(image_fnames) + + elif crop_mode == 'corners': + images_df = _assemble_images_corners(image_fnames) + + elif crop_mode == 'selective_search': + images_df = _assemble_images_selective_search(image_fnames) + + else: + raise Exception("Unknown mode: not in {}".format(CROP_MODES)) + + # Make sure the DataFrame has a multiple of BATCH_SIZE rows: + # just fill the extra rows with NaN filenames and all-zero images. + N = images_df.shape[0] + remainder = N % BATCH_SIZE + if remainder > 0: + zero_image = np.zeros_like(images_df['image'].iloc[0]) + remainder_df = pd.DataFrame([{ + 'filename': None, + 'image': zero_image, + 'window': [0, 0, 0, 0] + }] * (BATCH_SIZE - remainder)) + images_df = images_df.append(remainder_df) + N = images_df.shape[0] + + # Split into batches of BATCH_SIZE. + ind = np.arange(N) / BATCH_SIZE + df_batches = [images_df[ind == i] for i in range(N / BATCH_SIZE)] + return df_batches + + +def compute_feats(images_df, layer='imagenet'): + if layer == 'imagenet': + num_output = 1000 + else: + raise ValueError("Unknown layer requested: {}".format(layer)) + + num = images_df.shape[0] + input_blobs = [np.concatenate(images_df['image'].values)] + output_blobs = [np.empty((num, num_output, 1, 1), dtype=np.float32)] + print(len(input_blobs), len(output_blobs)) + print(input_blobs[0].shape, output_blobs[0].shape) + + #caffenet.Forward(input_blobs, output_blobs) + feats = [output_blobs[0][i].flatten() for i in range(len(output_blobs[0]))] + + # Add the features and delete the images. + del images_df['image'] + images_df['feat'] = feats + return images_df + + +if __name__ == "__main__": + ## Parse cmdline options + gflags.DEFINE_string( + "model_def", "", "The model definition file.") + gflags.DEFINE_string( + "pretrained_model", "", "The pretrained model.") + gflags.DEFINE_boolean( + "gpu", False, "use gpu for computation") + gflags.DEFINE_string( + "crop_mode", "center_only", "Crop mode, from {}".format(CROP_MODES)) + gflags.DEFINE_string( + "images_file", "", "File that contains image filenames.") + gflags.DEFINE_string( + "batch_size", 256, "Number of image crops to let through in one go") + gflags.DEFINE_string( + "output", "", "The output DataFrame HDF5 filename.") + gflags.DEFINE_string( + "layer", "imagenet", "Layer to output.") + FLAGS = gflags.FLAGS + FLAGS(sys.argv) + + ## Load list of image filenames and assemble into batches. + t = time.time() + print('Assembling batches...') + with open(FLAGS.images_file) as f: + image_fnames = [_.strip() for _ in f.readlines()] + image_batches = assemble_batches( + image_fnames, FLAGS.crop_mode, FLAGS.batch_size) + print('{} batches assembled in {:.3f} s'.format( + len(image_batches), time.time() - t)) + + # Initialize network by loading model definition and weights. + t = time.time() + print("Loading Caffe model.") + caffenet = caffe.CaffeNet(FLAGS.model_def, FLAGS.pretrained_model) + caffenet.set_phase_test() + if FLAGS.gpu: + caffenet.set_mode_gpu() + print("Caffe model loaded in {:.3f} s".format(time.time() - t)) + + # Process the batches. + t = time.time() + print 'Processing {} files in {} batches'.format( + len(image_fnames), len(image_batches)) + dfs_with_feats = [] + for i in range(len(image_batches)): + if i % 10 == 0: + print('Batch {}/{}, elapsed time: {:.3f} s'.format( + i, len(image_batches), time.time() - t)) + dfs_with_feats.append(compute_feats(image_batches[i])) + + # Concatenate, droppping the padding rows. + df = pd.concat(dfs_with_feats).dropna(subset=['filename']) + print("Processing complete after {:.3f} s.".format(time.time() - t)) + + # Write our the results. + t = time.time() + df.to_hdf(FLAGS.output, 'df', mode='w') + print("Done. Saving to {} took {:.3f} s.".format( + FLAGS.output, time.time() - t)) diff --git a/src/caffe/proto/caffe.proto b/src/caffe/proto/caffe.proto index 5b4c9ed5275..e1be61c35e1 100644 --- a/src/caffe/proto/caffe.proto +++ b/src/caffe/proto/caffe.proto @@ -137,11 +137,8 @@ message SolverParameter { // whether to snapshot diff in the results or not. Snapshotting diff will help // debugging but the final protocol buffer size will be much larger. optional bool snapshot_diff = 16 [ default = false]; - - // the asynchronous solver stuff - optional string tcp_port = 20 [ default = "20901"]; - optional string tcp_server = 21; - optional int32 communication_interval = 22; + // the mode solver will use: 0 for CPU and 1 for GPU. Use GPU in default. + optional int32 solver_mode = 17 [default = 1]; } // A message that stores the solver snapshots diff --git a/src/caffe/solver.cpp b/src/caffe/solver.cpp index 5c25641ce9c..e02b72f31f6 100644 --- a/src/caffe/solver.cpp +++ b/src/caffe/solver.cpp @@ -39,6 +39,7 @@ Solver::Solver(const SolverParameter& param) template void Solver::Solve(const char* resume_file) { + Caffe::set_mode(Caffe::Brew(param_.solver_mode())); Caffe::set_phase(Caffe::TRAIN); LOG(INFO) << "Solving " << net_->name(); PreSolve(); @@ -71,6 +72,9 @@ void Solver::Solve(const char* resume_file) { Caffe::set_phase(Caffe::TRAIN); } } + // After the optimization is done, always do a snapshot. + iter_--; + Snapshot(); LOG(INFO) << "Optimization Done."; }