From 8fc193225fd65a41bac88078e2f5055c79afc981 Mon Sep 17 00:00:00 2001
From: Sergey Karayev <sergeykarayev@gmail.com>
Date: Fri, 22 Nov 2013 21:30:44 -0800
Subject: [PATCH 01/15] another python wrapper that batches image subwindows
 and provides a selective search provider

---
 python/caffe/imagenet/wrapper2.py | 227 ++++++++++++++++++++++++++++++
 1 file changed, 227 insertions(+)
 create mode 100644 python/caffe/imagenet/wrapper2.py

diff --git a/python/caffe/imagenet/wrapper2.py b/python/caffe/imagenet/wrapper2.py
new file mode 100644
index 00000000000..fab53790925
--- /dev/null
+++ b/python/caffe/imagenet/wrapper2.py
@@ -0,0 +1,227 @@
+"""
+wrapper2.py classifies a number of images at once, optionally using
+selective search crops.
+
+TODO: also store the window coordinates with the image filenames.
+"""
+import numpy as np
+import os
+import sys
+import gflags
+import pandas as pd
+import time
+from skimage import io
+from skimage import transform
+import selective_search_ijcv as selective_search
+import caffe
+
+IMAGE_DIM = 256
+CROPPED_DIM = 227
+IMAGE_CENTER = int((IMAGE_DIM - CROPPED_DIM) / 2)
+
+CROP_MODES = ['center_only', 'corners', 'selective_search']
+
+# Load the imagenet mean file
+IMAGENET_MEAN = np.load(
+    os.path.join(os.path.dirname(__file__), 'ilsvrc_2012_mean.npy'))
+
+
+def load_image(filename):
+  """
+  Input:
+    filename: string
+  Output:
+    image: an image of size (256 x 256 x 3) of type uint8.
+  """
+  img = io.imread(filename)
+  if img.ndim == 2:
+    img = np.tile(img[:, :, np.newaxis], (1, 1, 3))
+  elif img.shape[2] == 4:
+    img = img[:, :, :3]
+  return img
+
+
+def format_image(image, window=None, dim=IMAGE_DIM):
+  # Crop a subimage if window is provided.
+  if window is not None:
+    image = image[
+      window[1]:window[1] + window[3],
+      window[0]:window[0] + window[2]
+    ]
+
+  # Resize to ImageNet size, convert to BGR, subtract mean.
+  image = (transform.resize(image, (IMAGE_DIM, IMAGE_DIM)) * 255)[:, :, ::-1]
+  image -= IMAGENET_MEAN
+
+  # Resize further if needed.
+  if not np.all(dim == IMAGE_DIM):
+    image = transform.resize(image, (dim, dim) * 255)
+
+  image = image.swapaxes(1, 2).swapaxes(0, 1)
+  return image
+
+
+def _assemble_images_center_only(image_fnames, max_batch_size):
+  all_images = []
+  for image_filename in image_fnames:
+    image = format_image(load_image(image_filename))
+
+    all_images.append(np.ascontiguousarray(
+        image[:,
+              IMAGE_CENTER:IMAGE_CENTER + CROPPED_DIM,
+              IMAGE_CENTER:IMAGE_CENTER + CROPPED_DIM],
+        dtype=np.float32
+    ))
+  return all_images[np.newaxis, :], image_fnames
+
+
+def _assemble_images_corners(image_fnames, max_batch_size):
+  all_images = []
+  fnames = []
+  for image_filename in image_fnames:
+    image = format_image(load_image(image_filename))
+    indices = [0, IMAGE_DIM - CROPPED_DIM]
+
+    images = np.empty((10, 3, CROPPED_DIM, CROPPED_DIM), dtype=np.float32)
+    curr = 0
+    for i in indices:
+      for j in indices:
+        images[curr] = image[:, i:i + CROPPED_DIM, j:j + CROPPED_DIM]
+        curr += 1
+    images[4] = image[
+      :,
+      IMAGE_CENTER:IMAGE_CENTER + CROPPED_DIM,
+      IMAGE_CENTER:IMAGE_CENTER + CROPPED_DIM
+    ]
+
+    # Add flipped versions.
+    images[5:] = images[:5, :, :, ::-1]
+
+    all_images.append(images)
+    fnames.append(image_filename)
+  return all_images, fnames
+
+
+def _assemble_images_selective_search(image_fnames, max_batch_size):
+  windows_list = selective_search.get_windows(image_fnames)
+  all_images = []
+  fnames = []
+  for image_fname, windows in zip(image_fnames, windows_list):
+    image = load_image(image_fname)
+    images = np.empty(
+      (len(windows), 3, CROPPED_DIM, CROPPED_DIM), dtype=np.float32)
+    for i, window in enumerate(windows):
+      images[i] = format_image(image, window, CROPPED_DIM)
+    all_images.append(images)
+    fnames.append(image_fname)
+  return all_images, fnames
+
+
+def assemble_batches(
+  image_fnames, crop_mode='center_only', max_batch_size=256):
+  """
+  Assemble list of batches, each one of at most max_batch_size subimage
+  objects.
+
+  Input:
+      image_fnames: list of string
+      mode: string
+        'center_only': the CROPPED_DIM middle of the image is taken as is
+        'corners': take CROPPED_DIM-sized boxes at 4 corners and center of
+          the image, as well as their flipped versions: a total of 10 images
+        'selective_search': run Selective Search region proposal on the
+          image, and take each enclosing subwindow.
+
+  Output:
+      images: (X x 3 x 227 x 227) ndarray, where X <= max_batch_size
+  """
+  if crop_mode == 'center_only':
+    all_images, fnames = _assemble_images_center_only(
+      image_fnames, max_batch_size)
+
+  elif crop_mode == 'corners':
+    all_images, fnames = _assemble_images_corners(
+      image_fnames, max_batch_size)
+
+  elif crop_mode == 'selective_search':
+    all_images, fnames = _assemble_images_selective_search(
+      image_fnames, max_batch_size)
+
+  else:
+    raise Exception("Unknown mode: not in {}".format(CROP_MODES))
+
+  # Concatenate into one (N, 3, CROPPED_DIM, CROPPED_DIM) array,
+  # then split to (N/max_batch_size, 3, CROPPED_DIM, CROPPED_DIM) chunks
+  all_images = np.concatenate(all_images, 0)
+  all_fnames = np.repeat(fnames, [len(images) for images in all_images])
+  assert(all_images.shape[0] == all_fnames.shape[0])
+
+  num_batches = int(all_images.shape[0] / max_batch_size)
+  image_batches = np.array_split(all_images, num_batches, axis=0)
+  fname_batches = np.array_split(all_fnames, num_batches, axis=0)
+  return image_batches, fname_batches
+
+
+def compute_feats(batch, layer='imagenet'):
+  if layer == 'imagenet':
+    num_output = 1000
+  else:
+    raise ValueError("Unknown layer requested: {}".format(layer))
+
+  num = batch.shape[0]
+  output_blobs = [np.empty((num, num_output, 1, 1), dtype=np.float32)]
+  caffenet.Forward([batch], output_blobs)
+  feats = [output_blobs[0][i].flatten() for i in range(len(output_blobs[0]))]
+
+  return feats
+
+
+if __name__ == "__main__":
+  ## Parse cmdline options
+  gflags.DEFINE_string(
+    "model_def", "", "The model definition file.")
+  gflags.DEFINE_string(
+    "pretrained_model", "", "The pretrained model.")
+  gflags.DEFINE_boolean(
+    "gpu", True, "use gpu for computation")
+  gflags.DEFINE_string(
+    "crop_mode", "center_only", "Crop mode, from {}".format(CROP_MODES))
+  gflags.DEFINE_string(
+    "images_file", "", "File that contains image filenames.")
+  gflags.DEFINE_string(
+    "output", "", "The output DataFrame HDF5 filename.")
+  gflags.DEFINE_string(
+    "layer", "imagenet", "Layer to output.")
+  FLAGS = gflags.FLAGS
+  FLAGS(sys.argv)
+
+  ## Initialize network by loading model definition and weights.
+  caffenet = caffe.CaffeNet(FLAGS.model_def, FLAGS.pretrained_model)
+  caffenet.set_phase_test()
+  if FLAGS.gpu:
+    caffenet.set_mode_gpu()
+
+  ## Load list of image filenames and assemble into batches.
+  with open(gflags.images_file) as f:
+    image_fnames = [_.strip() for _ in f.readlines()]
+  image_batches, fname_batches = assemble_batches(
+    image_fnames, FLAGS.crop_mode)
+  print 'Running on {} files in {} batches'.format(
+    len(image_fnames), len(image_batches))
+
+  # Process the batches.
+  start = time.time()
+  all_feats = []
+  for i, batch in range(len(image_batches)):
+    if i % 10 == 0:
+      print('Batch {}/{}, elapsed {:.3f} s'.format(
+        i, len(image_batches), time.time() - start))
+    all_feats.append(compute_feats(image_batches[i]))
+  all_feats = np.concatenate(all_feats, 0)
+  all_fnames = np.concatenate(fname_batches, 0)
+
+  df = pd.DataFrame({'feat': all_feats, 'filename': all_fnames})
+
+  # Finally, write the results.
+  df.to_hdf(FLAGS.output, 'df', mode='w')
+  print 'Done. Saved to %s.' % FLAGS.output

From 5429729559b39661e5df1091e3312b969c1b1c62 Mon Sep 17 00:00:00 2001
From: Sergey Karayev <sergeykarayev@gmail.com>
Date: Mon, 25 Nov 2013 07:22:11 -0800
Subject: [PATCH 02/15] assembling batches works properly for all modes;
 segfault in pycaffe

---
 python/caffe/imagenet/wrapper2.py | 62 ++++++++++++++++++-------------
 1 file changed, 36 insertions(+), 26 deletions(-)

diff --git a/python/caffe/imagenet/wrapper2.py b/python/caffe/imagenet/wrapper2.py
index fab53790925..a3bd100ad50 100644
--- a/python/caffe/imagenet/wrapper2.py
+++ b/python/caffe/imagenet/wrapper2.py
@@ -2,7 +2,12 @@
 wrapper2.py classifies a number of images at once, optionally using
 selective search crops.
 
-TODO: also store the window coordinates with the image filenames.
+The selective_search_ijcv_with_python code is available at
+https://github.com/sergeyk/selective_search_ijcv_with_python
+
+TODO:
+- [ ] store the window coordinates with the image filenames.
+- [ ] batch up image filenames as well: don't want to load all of them into memory
 """
 import numpy as np
 import os
@@ -12,7 +17,7 @@
 import time
 from skimage import io
 from skimage import transform
-import selective_search_ijcv as selective_search
+import selective_search_ijcv_with_python as selective_search
 import caffe
 
 IMAGE_DIM = 256
@@ -42,11 +47,19 @@ def load_image(filename):
 
 
 def format_image(image, window=None, dim=IMAGE_DIM):
+  """
+  Input:
+    image: (H x W x 3) ndarray
+    window: (4) ndarray
+      (y, x, h, w) coordinates
+    dim: int
+      Final width of the square image.
+  """
   # Crop a subimage if window is provided.
   if window is not None:
     image = image[
-      window[1]:window[1] + window[3],
-      window[0]:window[0] + window[2]
+      window[0]:window[2],
+      window[1]:window[3]
     ]
 
   # Resize to ImageNet size, convert to BGR, subtract mean.
@@ -54,28 +67,26 @@ def format_image(image, window=None, dim=IMAGE_DIM):
   image -= IMAGENET_MEAN
 
   # Resize further if needed.
-  if not np.all(dim == IMAGE_DIM):
-    image = transform.resize(image, (dim, dim) * 255)
-
+  if not dim == IMAGE_DIM:
+    image = transform.resize(image, (dim, dim))
   image = image.swapaxes(1, 2).swapaxes(0, 1)
   return image
 
 
-def _assemble_images_center_only(image_fnames, max_batch_size):
+def _assemble_images_center_only(image_fnames):
   all_images = []
   for image_filename in image_fnames:
     image = format_image(load_image(image_filename))
-
     all_images.append(np.ascontiguousarray(
-        image[:,
+        image[np.newaxis, :,
               IMAGE_CENTER:IMAGE_CENTER + CROPPED_DIM,
               IMAGE_CENTER:IMAGE_CENTER + CROPPED_DIM],
         dtype=np.float32
     ))
-  return all_images[np.newaxis, :], image_fnames
+  return all_images, image_fnames
 
 
-def _assemble_images_corners(image_fnames, max_batch_size):
+def _assemble_images_corners(image_fnames):
   all_images = []
   fnames = []
   for image_filename in image_fnames:
@@ -93,16 +104,14 @@ def _assemble_images_corners(image_fnames, max_batch_size):
       IMAGE_CENTER:IMAGE_CENTER + CROPPED_DIM,
       IMAGE_CENTER:IMAGE_CENTER + CROPPED_DIM
     ]
-
-    # Add flipped versions.
-    images[5:] = images[:5, :, :, ::-1]
+    images[5:] = images[:5, :, :, ::-1]  # flipped versions
 
     all_images.append(images)
     fnames.append(image_filename)
   return all_images, fnames
 
 
-def _assemble_images_selective_search(image_fnames, max_batch_size):
+def _assemble_images_selective_search(image_fnames):
   windows_list = selective_search.get_windows(image_fnames)
   all_images = []
   fnames = []
@@ -136,27 +145,24 @@ def assemble_batches(
       images: (X x 3 x 227 x 227) ndarray, where X <= max_batch_size
   """
   if crop_mode == 'center_only':
-    all_images, fnames = _assemble_images_center_only(
-      image_fnames, max_batch_size)
+    all_images, fnames = _assemble_images_center_only(image_fnames)
 
   elif crop_mode == 'corners':
-    all_images, fnames = _assemble_images_corners(
-      image_fnames, max_batch_size)
+    all_images, fnames = _assemble_images_corners(image_fnames)
 
   elif crop_mode == 'selective_search':
-    all_images, fnames = _assemble_images_selective_search(
-      image_fnames, max_batch_size)
+    all_images, fnames = _assemble_images_selective_search(image_fnames)
 
   else:
     raise Exception("Unknown mode: not in {}".format(CROP_MODES))
 
   # Concatenate into one (N, 3, CROPPED_DIM, CROPPED_DIM) array,
   # then split to (N/max_batch_size, 3, CROPPED_DIM, CROPPED_DIM) chunks
-  all_images = np.concatenate(all_images, 0)
   all_fnames = np.repeat(fnames, [len(images) for images in all_images])
+  all_images = np.concatenate(all_images, 0)
   assert(all_images.shape[0] == all_fnames.shape[0])
 
-  num_batches = int(all_images.shape[0] / max_batch_size)
+  num_batches = 1 + int(all_images.shape[0] / max_batch_size)
   image_batches = np.array_split(all_images, num_batches, axis=0)
   fname_batches = np.array_split(all_fnames, num_batches, axis=0)
   return image_batches, fname_batches
@@ -183,7 +189,7 @@ def compute_feats(batch, layer='imagenet'):
   gflags.DEFINE_string(
     "pretrained_model", "", "The pretrained model.")
   gflags.DEFINE_boolean(
-    "gpu", True, "use gpu for computation")
+    "gpu", False, "use gpu for computation")
   gflags.DEFINE_string(
     "crop_mode", "center_only", "Crop mode, from {}".format(CROP_MODES))
   gflags.DEFINE_string(
@@ -202,13 +208,17 @@ def compute_feats(batch, layer='imagenet'):
     caffenet.set_mode_gpu()
 
   ## Load list of image filenames and assemble into batches.
-  with open(gflags.images_file) as f:
+  with open(FLAGS.images_file) as f:
     image_fnames = [_.strip() for _ in f.readlines()]
   image_batches, fname_batches = assemble_batches(
     image_fnames, FLAGS.crop_mode)
   print 'Running on {} files in {} batches'.format(
     len(image_fnames), len(image_batches))
 
+  from IPython import embed; embed()
+  # TODO: debug this
+  # F1125 07:17:35.980950  6826 pycaffe.cpp:52] Check failed: len(bottom) == input_blobs.size() (1 vs. 0)
+
   # Process the batches.
   start = time.time()
   all_feats = []

From 0e012d87fa1d2554ea469c60148849d02f9175b3 Mon Sep 17 00:00:00 2001
From: Sergey Karayev <sergeykarayev@gmail.com>
Date: Mon, 25 Nov 2013 12:15:35 -0800
Subject: [PATCH 03/15] debugging pycaffe error

---
 python/caffe/imagenet/wrapper2.py | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/python/caffe/imagenet/wrapper2.py b/python/caffe/imagenet/wrapper2.py
index a3bd100ad50..d07d5c75cce 100644
--- a/python/caffe/imagenet/wrapper2.py
+++ b/python/caffe/imagenet/wrapper2.py
@@ -175,8 +175,11 @@ def compute_feats(batch, layer='imagenet'):
     raise ValueError("Unknown layer requested: {}".format(layer))
 
   num = batch.shape[0]
+  input_blobs = [batch]
   output_blobs = [np.empty((num, num_output, 1, 1), dtype=np.float32)]
-  caffenet.Forward([batch], output_blobs)
+  print(len(input_blobs), len(output_blobs))
+  print(input_blobs[0].shape, output_blobs[0].shape)
+  caffenet.Forward(input_blobs, output_blobs)
   feats = [output_blobs[0][i].flatten() for i in range(len(output_blobs[0]))]
 
   return feats
@@ -215,17 +218,15 @@ def compute_feats(batch, layer='imagenet'):
   print 'Running on {} files in {} batches'.format(
     len(image_fnames), len(image_batches))
 
-  from IPython import embed; embed()
-  # TODO: debug this
-  # F1125 07:17:35.980950  6826 pycaffe.cpp:52] Check failed: len(bottom) == input_blobs.size() (1 vs. 0)
-
   # Process the batches.
   start = time.time()
   all_feats = []
-  for i, batch in range(len(image_batches)):
+  for i in range(len(image_batches)):
     if i % 10 == 0:
       print('Batch {}/{}, elapsed {:.3f} s'.format(
         i, len(image_batches), time.time() - start))
+  # TODO: debug this
+  # F1125 07:17:35.980950  6826 pycaffe.cpp:52] Check failed: len(bottom) == input_blobs.size() (1 vs. 0)
     all_feats.append(compute_feats(image_batches[i]))
   all_feats = np.concatenate(all_feats, 0)
   all_fnames = np.concatenate(fname_batches, 0)

From 489be58e49b006ceeae0591d6d75706f94f31a37 Mon Sep 17 00:00:00 2001
From: Sergey Karayev <sergeykarayev@gmail.com>
Date: Tue, 26 Nov 2013 03:27:02 -0800
Subject: [PATCH 04/15] in the middle of refactoring to use dataframes

---
 python/caffe/imagenet/wrapper2.py | 52 ++++++++++++++++++-------------
 1 file changed, 31 insertions(+), 21 deletions(-)

diff --git a/python/caffe/imagenet/wrapper2.py b/python/caffe/imagenet/wrapper2.py
index d07d5c75cce..82795a3d4c6 100644
--- a/python/caffe/imagenet/wrapper2.py
+++ b/python/caffe/imagenet/wrapper2.py
@@ -6,6 +6,7 @@
 https://github.com/sergeyk/selective_search_ijcv_with_python
 
 TODO:
+- [ ] make sure all batches are the same size
 - [ ] store the window coordinates with the image filenames.
 - [ ] batch up image filenames as well: don't want to load all of them into memory
 """
@@ -26,6 +27,8 @@
 
 CROP_MODES = ['center_only', 'corners', 'selective_search']
 
+BATCH_SIZE = 256
+
 # Load the imagenet mean file
 IMAGENET_MEAN = np.load(
     os.path.join(os.path.dirname(__file__), 'ilsvrc_2012_mean.npy'))
@@ -112,22 +115,24 @@ def _assemble_images_corners(image_fnames):
 
 
 def _assemble_images_selective_search(image_fnames):
+  # Get window proposals.
   windows_list = selective_search.get_windows(image_fnames)
-  all_images = []
-  fnames = []
+
+  #
   for image_fname, windows in zip(image_fnames, windows_list):
     image = load_image(image_fname)
-    images = np.empty(
-      (len(windows), 3, CROPPED_DIM, CROPPED_DIM), dtype=np.float32)
     for i, window in enumerate(windows):
-      images[i] = format_image(image, window, CROPPED_DIM)
-    all_images.append(images)
-    fnames.append(image_fname)
-  return all_images, fnames
+      data.append({
+        'image': format_image(image, window, CROPPED_DIM),
+        'window': window,
+        'filename': image_fname
+      })
+  images_df = pd.DataFrame(data)
+  return images_df
 
 
 def assemble_batches(
-  image_fnames, crop_mode='center_only', max_batch_size=256):
+  image_fnames, crop_mode='center_only'):
   """
   Assemble list of batches, each one of at most max_batch_size subimage
   objects.
@@ -142,27 +147,30 @@ def assemble_batches(
           image, and take each enclosing subwindow.
 
   Output:
-      images: (X x 3 x 227 x 227) ndarray, where X <= max_batch_size
+      : list of (X x 3 x 227 x 227) ndarray, where X <= max_batch_size
   """
   if crop_mode == 'center_only':
-    all_images, fnames = _assemble_images_center_only(image_fnames)
+    images_df = _assemble_images_center_only(image_fnames)
 
   elif crop_mode == 'corners':
-    all_images, fnames = _assemble_images_corners(image_fnames)
+    images_df = _assemble_images_corners(image_fnames)
 
   elif crop_mode == 'selective_search':
-    all_images, fnames = _assemble_images_selective_search(image_fnames)
+    images_df = _assemble_images_selective_search(image_fnames)
 
   else:
     raise Exception("Unknown mode: not in {}".format(CROP_MODES))
 
-  # Concatenate into one (N, 3, CROPPED_DIM, CROPPED_DIM) array,
+  # Make sure the DataFrame has a multiple of BATCH_SIZE rows:
+  # just fill the extra rows with NaN filenames and all-zero images.
+  pass
+
   # then split to (N/max_batch_size, 3, CROPPED_DIM, CROPPED_DIM) chunks
   all_fnames = np.repeat(fnames, [len(images) for images in all_images])
   all_images = np.concatenate(all_images, 0)
   assert(all_images.shape[0] == all_fnames.shape[0])
 
-  num_batches = 1 + int(all_images.shape[0] / max_batch_size)
+  num_batches = 1 + int(all_images.shape[0] / BATCH_SIZE)
   image_batches = np.array_split(all_images, num_batches, axis=0)
   fname_batches = np.array_split(all_fnames, num_batches, axis=0)
   return image_batches, fname_batches
@@ -211,23 +219,25 @@ def compute_feats(batch, layer='imagenet'):
     caffenet.set_mode_gpu()
 
   ## Load list of image filenames and assemble into batches.
+  t = time.time()
+  print('Assembling batches...')
   with open(FLAGS.images_file) as f:
     image_fnames = [_.strip() for _ in f.readlines()]
   image_batches, fname_batches = assemble_batches(
     image_fnames, FLAGS.crop_mode)
-  print 'Running on {} files in {} batches'.format(
-    len(image_fnames), len(image_batches))
+  print('{} batches assembled in {:.3f} s'.format(time.time() - t))
 
   # Process the batches.
-  start = time.time()
+  t = time.time()
+  print 'Running on {} files in {} batches'.format(
+    len(image_fnames), len(image_batches))
   all_feats = []
   for i in range(len(image_batches)):
     if i % 10 == 0:
       print('Batch {}/{}, elapsed {:.3f} s'.format(
-        i, len(image_batches), time.time() - start))
-  # TODO: debug this
-  # F1125 07:17:35.980950  6826 pycaffe.cpp:52] Check failed: len(bottom) == input_blobs.size() (1 vs. 0)
+        i, len(image_batches), time.time() - t))
     all_feats.append(compute_feats(image_batches[i]))
+
   all_feats = np.concatenate(all_feats, 0)
   all_fnames = np.concatenate(fname_batches, 0)
 

From 67e2c9d94fb53168114b42f9ab9b1a01cc86dafc Mon Sep 17 00:00:00 2001
From: Yangqing Jia <jiayq84@gmail.com>
Date: Sun, 1 Dec 2013 16:38:24 -0800
Subject: [PATCH 05/15] convert script: spacing

---
 examples/convert_imageset.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/examples/convert_imageset.cpp b/examples/convert_imageset.cpp
index 6a6b86dcdf8..2769f1ea2d6 100644
--- a/examples/convert_imageset.cpp
+++ b/examples/convert_imageset.cpp
@@ -46,7 +46,7 @@ int main(int argc, char** argv) {
     LOG(INFO) << "Shuffling data";
     std::random_shuffle(lines.begin(), lines.end());
   }
-  LOG(INFO) << "A total of " << lines.size() << "images.";
+  LOG(INFO) << "A total of " << lines.size() << " images.";
 
   leveldb::DB* db;
   leveldb::Options options;

From 524e0956ef58d0f1fac0bf302de341c9eff882e0 Mon Sep 17 00:00:00 2001
From: Yangqing Jia <jiayq84@gmail.com>
Date: Mon, 2 Dec 2013 15:19:37 -0800
Subject: [PATCH 06/15] get_mnist.sh: changed the script to generate leveldb as
 well.

---
 data/get_mnist.sh | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/data/get_mnist.sh b/data/get_mnist.sh
index ec979bd7bd0..2a9ad083fb4 100755
--- a/data/get_mnist.sh
+++ b/data/get_mnist.sh
@@ -15,4 +15,9 @@ gunzip train-labels-idx1-ubyte.gz
 gunzip t10k-images-idx3-ubyte.gz
 gunzip t10k-labels-idx1-ubyte.gz
 
+echo "Creating leveldb..."
+
+../examples/convert_mnist_data.bin train-images-idx3-ubyte train-labels-idx1-ubyte mnist-train-leveldb
+../examples/convert_mnist_data.bin t10k-images-idx3-ubyte t10k-labels-idx1-ubyte mnist-test-leveldb
+
 echo "Done."

From ca3e042863e59e9a8a8edd7e78ffe9b2431aea8f Mon Sep 17 00:00:00 2001
From: Yangqing Jia <jiayq84@gmail.com>
Date: Mon, 2 Dec 2013 15:44:43 -0800
Subject: [PATCH 07/15] Several changes:

changed the paths for the mnist demo
removed unused fields in SolverParameter
added solver_mode to specify CPU/GPU solving
accordingly, removed the set mode code in the examples code
removed examples/test_read_imagenet since it is trivial
removed get_cifar script since no cifar demo exists.
---
 data/get_cifar.sh               | 12 ------------
 data/lenet.prototxt             |  2 +-
 data/lenet_solver.prototxt      | 29 ++++++++++++++++++++++-------
 data/lenet_test.prototxt        |  2 +-
 examples/finetune_net.cpp       |  3 ---
 examples/test_read_imagenet.cpp | 33 ---------------------------------
 examples/train_net.cpp          |  3 ---
 src/caffe/proto/caffe.proto     |  7 ++-----
 src/caffe/solver.cpp            |  4 ++++
 9 files changed, 30 insertions(+), 65 deletions(-)
 delete mode 100755 data/get_cifar.sh
 delete mode 100644 examples/test_read_imagenet.cpp

diff --git a/data/get_cifar.sh b/data/get_cifar.sh
deleted file mode 100755
index 6f42bb09ae2..00000000000
--- a/data/get_cifar.sh
+++ /dev/null
@@ -1,12 +0,0 @@
-#!/usr/bin/env sh
-# This scripts downloads the mnist data and unzips it.
-
-echo "Downloading..."
-
-wget -q http://www.cs.toronto.edu/~kriz/cifar-10-binary.tar.gz
-
-echo "Unzipping..."
-
-tar xzf cifar-10-binary.tar.gz
-
-echo "Done."
diff --git a/data/lenet.prototxt b/data/lenet.prototxt
index 085ed43f232..c8a99946bc7 100644
--- a/data/lenet.prototxt
+++ b/data/lenet.prototxt
@@ -3,7 +3,7 @@ layers {
   layer {
     name: "mnist"
     type: "data"
-    source: "data/mnist-train-leveldb"
+    source: "mnist-train-leveldb"
     batchsize: 64
     scale: 0.00390625
   }
diff --git a/data/lenet_solver.prototxt b/data/lenet_solver.prototxt
index d58255b9e31..d0edc0f0c87 100644
--- a/data/lenet_solver.prototxt
+++ b/data/lenet_solver.prototxt
@@ -1,12 +1,27 @@
-train_net: "data/lenet.prototxt"
-test_net: "data/lenet_test.prototxt"
+# The training protocol buffer definition
+train_net: "lenet.prototxt"
+# The testing protocol buffer definition
+test_net: "lenet_test.prototxt"
+# test_iter specifies how many forward passes the test should carry out.
+# In the case of MNIST, we have test batch size 100 and 100 test iterations,
+# covering the full 10,000 testing images.
+test_iter: 100
+# Carry out testing every 500 training iterations.
+test_interval: 500
+# The base learning rate, momentum and the weight decay of the network.
 base_lr: 0.01
+momentum: 0.9
+weight_decay: 0.0005
+# The learning rate policy
 lr_policy: "inv"
 gamma: 0.0001
 power: 0.75
+# Display every 100 iterations
 display: 100
-max_iter: 5000
-momentum: 0.9
-weight_decay: 0.0005
-test_iter: 100
-test_interval: 500
\ No newline at end of file
+# The maximum number of iterations
+max_iter: 10000
+# snapshot intermediate results
+snapshot: 5000
+snapshot_prefix: "lenet"
+# solver mode: 0 for CPU and 1 for GPU
+solver_mode: 1
diff --git a/data/lenet_test.prototxt b/data/lenet_test.prototxt
index fdda4a67ee0..676a2a6ab7d 100644
--- a/data/lenet_test.prototxt
+++ b/data/lenet_test.prototxt
@@ -3,7 +3,7 @@ layers {
   layer {
     name: "mnist"
     type: "data"
-    source: "data/mnist-test-leveldb"
+    source: "mnist-test-leveldb"
     batchsize: 100
     scale: 0.00390625
   }
diff --git a/examples/finetune_net.cpp b/examples/finetune_net.cpp
index d35b425c619..559715d6707 100644
--- a/examples/finetune_net.cpp
+++ b/examples/finetune_net.cpp
@@ -19,9 +19,6 @@ int main(int argc, char** argv) {
     return 0;
   }
 
-  Caffe::SetDevice(0);
-  Caffe::set_mode(Caffe::GPU);
-
   SolverParameter solver_param;
   ReadProtoFromTextFile(argv[1], &solver_param);
 
diff --git a/examples/test_read_imagenet.cpp b/examples/test_read_imagenet.cpp
deleted file mode 100644
index 7b61990b941..00000000000
--- a/examples/test_read_imagenet.cpp
+++ /dev/null
@@ -1,33 +0,0 @@
-// Copyright 2013 Yangqing Jia
-
-#include <glog/logging.h>
-#include <leveldb/db.h>
-
-#include <string>
-
-int main(int argc, char** argv) {
-  ::google::InitGoogleLogging(argv[0]);
-  leveldb::DB* db;
-  leveldb::Options options;
-  options.create_if_missing = false;
-
-  LOG(INFO) << "Opening leveldb " << argv[1];
-  leveldb::Status status = leveldb::DB::Open(
-      options, argv[1], &db);
-  CHECK(status.ok()) << "Failed to open leveldb " << argv[1];
-
-  leveldb::ReadOptions read_options;
-  read_options.fill_cache = false;
-  int count = 0;
-  leveldb::Iterator* it = db->NewIterator(read_options);
-  for (it->SeekToFirst(); it->Valid(); it->Next()) {
-    // just a dummy operation
-    LOG(ERROR) << it->key().ToString();
-    if (++count % 10000 == 0) {
-      LOG(ERROR) << "Processed " << count << " files.";
-    }
-  }
-
-  delete db;
-  return 0;
-}
diff --git a/examples/train_net.cpp b/examples/train_net.cpp
index d84619b2d44..ce62616b118 100644
--- a/examples/train_net.cpp
+++ b/examples/train_net.cpp
@@ -20,9 +20,6 @@ int main(int argc, char** argv) {
     return 0;
   }
 
-  Caffe::SetDevice(0);
-  Caffe::set_mode(Caffe::GPU);
-
   SolverParameter solver_param;
   ReadProtoFromTextFile(argv[1], &solver_param);
 
diff --git a/src/caffe/proto/caffe.proto b/src/caffe/proto/caffe.proto
index 5b4c9ed5275..e1be61c35e1 100644
--- a/src/caffe/proto/caffe.proto
+++ b/src/caffe/proto/caffe.proto
@@ -137,11 +137,8 @@ message SolverParameter {
   // whether to snapshot diff in the results or not. Snapshotting diff will help
   // debugging but the final protocol buffer size will be much larger.
   optional bool snapshot_diff = 16 [ default = false];
-
-  // the asynchronous solver stuff
-  optional string tcp_port = 20 [ default = "20901"];
-  optional string tcp_server = 21;
-  optional int32 communication_interval = 22;
+  // the mode solver will use: 0 for CPU and 1 for GPU. Use GPU in default.
+  optional int32 solver_mode = 17 [default = 1];
 }
 
 // A message that stores the solver snapshots
diff --git a/src/caffe/solver.cpp b/src/caffe/solver.cpp
index 5c25641ce9c..e02b72f31f6 100644
--- a/src/caffe/solver.cpp
+++ b/src/caffe/solver.cpp
@@ -39,6 +39,7 @@ Solver<Dtype>::Solver(const SolverParameter& param)
 
 template <typename Dtype>
 void Solver<Dtype>::Solve(const char* resume_file) {
+  Caffe::set_mode(Caffe::Brew(param_.solver_mode()));
   Caffe::set_phase(Caffe::TRAIN);
   LOG(INFO) << "Solving " << net_->name();
   PreSolve();
@@ -71,6 +72,9 @@ void Solver<Dtype>::Solve(const char* resume_file) {
       Caffe::set_phase(Caffe::TRAIN);
     }
   }
+  // After the optimization is done, always do a snapshot.
+  iter_--;
+  Snapshot();
   LOG(INFO) << "Optimization Done.";
 }
 

From 91dc83e0bf0ced7f65b5e5a0390b8cc537be971d Mon Sep 17 00:00:00 2001
From: Yangqing Jia <jiayq84@gmail.com>
Date: Tue, 3 Dec 2013 10:05:19 -0800
Subject: [PATCH 08/15] mnist train script

---
 data/train_mnist.sh | 3 +++
 1 file changed, 3 insertions(+)
 create mode 100755 data/train_mnist.sh

diff --git a/data/train_mnist.sh b/data/train_mnist.sh
new file mode 100755
index 00000000000..1d1036a108a
--- /dev/null
+++ b/data/train_mnist.sh
@@ -0,0 +1,3 @@
+#!/usr/bin/env sh
+
+GLOG_logtostderr=1 ../examples/train_net.bin lenet_solver.prototxt

From 8c96ac2e87ba1c370ee376152dce99236aac5bfd Mon Sep 17 00:00:00 2001
From: Yangqing Jia <jiayq84@gmail.com>
Date: Tue, 3 Dec 2013 10:53:29 -0800
Subject: [PATCH 09/15] lenet.prototxt loss layer rename

---
 data/lenet.prototxt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/data/lenet.prototxt b/data/lenet.prototxt
index c8a99946bc7..f5877ae4804 100644
--- a/data/lenet.prototxt
+++ b/data/lenet.prototxt
@@ -114,7 +114,7 @@ layers {
 }
 layers {
   layer {
-    name: "prob"
+    name: "loss"
     type: "softmax_loss"
   }
   bottom: "ip2"

From 521e4712ed313959aae032a4f7c118da32bdcf16 Mon Sep 17 00:00:00 2001
From: Sergey Karayev <sergeykarayev@gmail.com>
Date: Tue, 26 Nov 2013 03:51:01 -0800
Subject: [PATCH 10/15] OS X makefile changes

---
 Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index 2e07675c978..11ba70f5453 100644
--- a/Makefile
+++ b/Makefile
@@ -63,7 +63,7 @@ TEST_BINS := ${TEST_OBJS:.o=.testbin}
 # Derive include and lib directories
 ##############################
 CUDA_INCLUDE_DIR := $(CUDA_DIR)/include
-CUDA_LIB_DIR := $(CUDA_DIR)/lib64 $(CUDA_DIR)/lib
+CUDA_LIB_DIR := $(CUDA_DIR)/lib $(CUDA_DIR)/lib64
 MKL_INCLUDE_DIR := $(MKL_DIR)/include
 MKL_LIB_DIR := $(MKL_DIR)/lib $(MKL_DIR)/lib/intel64
 

From 1786be3289bca001319935abfdfa751ba97c89fa Mon Sep 17 00:00:00 2001
From: Sergey Karayev <sergeykarayev@gmail.com>
Date: Fri, 22 Nov 2013 21:30:44 -0800
Subject: [PATCH 11/15] another python wrapper that batches image subwindows
 and provides a selective search provider

---
 python/caffe/imagenet/wrapper2.py | 227 ++++++++++++++++++++++++++++++
 1 file changed, 227 insertions(+)
 create mode 100644 python/caffe/imagenet/wrapper2.py

diff --git a/python/caffe/imagenet/wrapper2.py b/python/caffe/imagenet/wrapper2.py
new file mode 100644
index 00000000000..fab53790925
--- /dev/null
+++ b/python/caffe/imagenet/wrapper2.py
@@ -0,0 +1,227 @@
+"""
+wrapper2.py classifies a number of images at once, optionally using
+selective search crops.
+
+TODO: also store the window coordinates with the image filenames.
+"""
+import numpy as np
+import os
+import sys
+import gflags
+import pandas as pd
+import time
+from skimage import io
+from skimage import transform
+import selective_search_ijcv as selective_search
+import caffe
+
+IMAGE_DIM = 256
+CROPPED_DIM = 227
+IMAGE_CENTER = int((IMAGE_DIM - CROPPED_DIM) / 2)
+
+CROP_MODES = ['center_only', 'corners', 'selective_search']
+
+# Load the imagenet mean file
+IMAGENET_MEAN = np.load(
+    os.path.join(os.path.dirname(__file__), 'ilsvrc_2012_mean.npy'))
+
+
+def load_image(filename):
+  """
+  Input:
+    filename: string
+  Output:
+    image: an image of size (256 x 256 x 3) of type uint8.
+  """
+  img = io.imread(filename)
+  if img.ndim == 2:
+    img = np.tile(img[:, :, np.newaxis], (1, 1, 3))
+  elif img.shape[2] == 4:
+    img = img[:, :, :3]
+  return img
+
+
+def format_image(image, window=None, dim=IMAGE_DIM):
+  # Crop a subimage if window is provided.
+  if window is not None:
+    image = image[
+      window[1]:window[1] + window[3],
+      window[0]:window[0] + window[2]
+    ]
+
+  # Resize to ImageNet size, convert to BGR, subtract mean.
+  image = (transform.resize(image, (IMAGE_DIM, IMAGE_DIM)) * 255)[:, :, ::-1]
+  image -= IMAGENET_MEAN
+
+  # Resize further if needed.
+  if not np.all(dim == IMAGE_DIM):
+    image = transform.resize(image, (dim, dim) * 255)
+
+  image = image.swapaxes(1, 2).swapaxes(0, 1)
+  return image
+
+
+def _assemble_images_center_only(image_fnames, max_batch_size):
+  all_images = []
+  for image_filename in image_fnames:
+    image = format_image(load_image(image_filename))
+
+    all_images.append(np.ascontiguousarray(
+        image[:,
+              IMAGE_CENTER:IMAGE_CENTER + CROPPED_DIM,
+              IMAGE_CENTER:IMAGE_CENTER + CROPPED_DIM],
+        dtype=np.float32
+    ))
+  return all_images[np.newaxis, :], image_fnames
+
+
+def _assemble_images_corners(image_fnames, max_batch_size):
+  all_images = []
+  fnames = []
+  for image_filename in image_fnames:
+    image = format_image(load_image(image_filename))
+    indices = [0, IMAGE_DIM - CROPPED_DIM]
+
+    images = np.empty((10, 3, CROPPED_DIM, CROPPED_DIM), dtype=np.float32)
+    curr = 0
+    for i in indices:
+      for j in indices:
+        images[curr] = image[:, i:i + CROPPED_DIM, j:j + CROPPED_DIM]
+        curr += 1
+    images[4] = image[
+      :,
+      IMAGE_CENTER:IMAGE_CENTER + CROPPED_DIM,
+      IMAGE_CENTER:IMAGE_CENTER + CROPPED_DIM
+    ]
+
+    # Add flipped versions.
+    images[5:] = images[:5, :, :, ::-1]
+
+    all_images.append(images)
+    fnames.append(image_filename)
+  return all_images, fnames
+
+
+def _assemble_images_selective_search(image_fnames, max_batch_size):
+  windows_list = selective_search.get_windows(image_fnames)
+  all_images = []
+  fnames = []
+  for image_fname, windows in zip(image_fnames, windows_list):
+    image = load_image(image_fname)
+    images = np.empty(
+      (len(windows), 3, CROPPED_DIM, CROPPED_DIM), dtype=np.float32)
+    for i, window in enumerate(windows):
+      images[i] = format_image(image, window, CROPPED_DIM)
+    all_images.append(images)
+    fnames.append(image_fname)
+  return all_images, fnames
+
+
+def assemble_batches(
+  image_fnames, crop_mode='center_only', max_batch_size=256):
+  """
+  Assemble list of batches, each one of at most max_batch_size subimage
+  objects.
+
+  Input:
+      image_fnames: list of string
+      mode: string
+        'center_only': the CROPPED_DIM middle of the image is taken as is
+        'corners': take CROPPED_DIM-sized boxes at 4 corners and center of
+          the image, as well as their flipped versions: a total of 10 images
+        'selective_search': run Selective Search region proposal on the
+          image, and take each enclosing subwindow.
+
+  Output:
+      images: (X x 3 x 227 x 227) ndarray, where X <= max_batch_size
+  """
+  if crop_mode == 'center_only':
+    all_images, fnames = _assemble_images_center_only(
+      image_fnames, max_batch_size)
+
+  elif crop_mode == 'corners':
+    all_images, fnames = _assemble_images_corners(
+      image_fnames, max_batch_size)
+
+  elif crop_mode == 'selective_search':
+    all_images, fnames = _assemble_images_selective_search(
+      image_fnames, max_batch_size)
+
+  else:
+    raise Exception("Unknown mode: not in {}".format(CROP_MODES))
+
+  # Concatenate into one (N, 3, CROPPED_DIM, CROPPED_DIM) array,
+  # then split to (N/max_batch_size, 3, CROPPED_DIM, CROPPED_DIM) chunks
+  all_images = np.concatenate(all_images, 0)
+  all_fnames = np.repeat(fnames, [len(images) for images in all_images])
+  assert(all_images.shape[0] == all_fnames.shape[0])
+
+  num_batches = int(all_images.shape[0] / max_batch_size)
+  image_batches = np.array_split(all_images, num_batches, axis=0)
+  fname_batches = np.array_split(all_fnames, num_batches, axis=0)
+  return image_batches, fname_batches
+
+
+def compute_feats(batch, layer='imagenet'):
+  if layer == 'imagenet':
+    num_output = 1000
+  else:
+    raise ValueError("Unknown layer requested: {}".format(layer))
+
+  num = batch.shape[0]
+  output_blobs = [np.empty((num, num_output, 1, 1), dtype=np.float32)]
+  caffenet.Forward([batch], output_blobs)
+  feats = [output_blobs[0][i].flatten() for i in range(len(output_blobs[0]))]
+
+  return feats
+
+
+if __name__ == "__main__":
+  ## Parse cmdline options
+  gflags.DEFINE_string(
+    "model_def", "", "The model definition file.")
+  gflags.DEFINE_string(
+    "pretrained_model", "", "The pretrained model.")
+  gflags.DEFINE_boolean(
+    "gpu", True, "use gpu for computation")
+  gflags.DEFINE_string(
+    "crop_mode", "center_only", "Crop mode, from {}".format(CROP_MODES))
+  gflags.DEFINE_string(
+    "images_file", "", "File that contains image filenames.")
+  gflags.DEFINE_string(
+    "output", "", "The output DataFrame HDF5 filename.")
+  gflags.DEFINE_string(
+    "layer", "imagenet", "Layer to output.")
+  FLAGS = gflags.FLAGS
+  FLAGS(sys.argv)
+
+  ## Initialize network by loading model definition and weights.
+  caffenet = caffe.CaffeNet(FLAGS.model_def, FLAGS.pretrained_model)
+  caffenet.set_phase_test()
+  if FLAGS.gpu:
+    caffenet.set_mode_gpu()
+
+  ## Load list of image filenames and assemble into batches.
+  with open(gflags.images_file) as f:
+    image_fnames = [_.strip() for _ in f.readlines()]
+  image_batches, fname_batches = assemble_batches(
+    image_fnames, FLAGS.crop_mode)
+  print 'Running on {} files in {} batches'.format(
+    len(image_fnames), len(image_batches))
+
+  # Process the batches.
+  start = time.time()
+  all_feats = []
+  for i, batch in range(len(image_batches)):
+    if i % 10 == 0:
+      print('Batch {}/{}, elapsed {:.3f} s'.format(
+        i, len(image_batches), time.time() - start))
+    all_feats.append(compute_feats(image_batches[i]))
+  all_feats = np.concatenate(all_feats, 0)
+  all_fnames = np.concatenate(fname_batches, 0)
+
+  df = pd.DataFrame({'feat': all_feats, 'filename': all_fnames})
+
+  # Finally, write the results.
+  df.to_hdf(FLAGS.output, 'df', mode='w')
+  print 'Done. Saved to %s.' % FLAGS.output

From aca88a5f156db90193158774b4e5c8fee6075cb1 Mon Sep 17 00:00:00 2001
From: Sergey Karayev <sergeykarayev@gmail.com>
Date: Mon, 25 Nov 2013 07:22:11 -0800
Subject: [PATCH 12/15] assembling batches works properly for all modes;
 segfault in pycaffe

---
 python/caffe/imagenet/wrapper2.py | 62 ++++++++++++++++++-------------
 1 file changed, 36 insertions(+), 26 deletions(-)

diff --git a/python/caffe/imagenet/wrapper2.py b/python/caffe/imagenet/wrapper2.py
index fab53790925..a3bd100ad50 100644
--- a/python/caffe/imagenet/wrapper2.py
+++ b/python/caffe/imagenet/wrapper2.py
@@ -2,7 +2,12 @@
 wrapper2.py classifies a number of images at once, optionally using
 selective search crops.
 
-TODO: also store the window coordinates with the image filenames.
+The selective_search_ijcv_with_python code is available at
+https://github.com/sergeyk/selective_search_ijcv_with_python
+
+TODO:
+- [ ] store the window coordinates with the image filenames.
+- [ ] batch up image filenames as well: don't want to load all of them into memory
 """
 import numpy as np
 import os
@@ -12,7 +17,7 @@
 import time
 from skimage import io
 from skimage import transform
-import selective_search_ijcv as selective_search
+import selective_search_ijcv_with_python as selective_search
 import caffe
 
 IMAGE_DIM = 256
@@ -42,11 +47,19 @@ def load_image(filename):
 
 
 def format_image(image, window=None, dim=IMAGE_DIM):
+  """
+  Input:
+    image: (H x W x 3) ndarray
+    window: (4) ndarray
+      (y, x, h, w) coordinates
+    dim: int
+      Final width of the square image.
+  """
   # Crop a subimage if window is provided.
   if window is not None:
     image = image[
-      window[1]:window[1] + window[3],
-      window[0]:window[0] + window[2]
+      window[0]:window[2],
+      window[1]:window[3]
     ]
 
   # Resize to ImageNet size, convert to BGR, subtract mean.
@@ -54,28 +67,26 @@ def format_image(image, window=None, dim=IMAGE_DIM):
   image -= IMAGENET_MEAN
 
   # Resize further if needed.
-  if not np.all(dim == IMAGE_DIM):
-    image = transform.resize(image, (dim, dim) * 255)
-
+  if not dim == IMAGE_DIM:
+    image = transform.resize(image, (dim, dim))
   image = image.swapaxes(1, 2).swapaxes(0, 1)
   return image
 
 
-def _assemble_images_center_only(image_fnames, max_batch_size):
+def _assemble_images_center_only(image_fnames):
   all_images = []
   for image_filename in image_fnames:
     image = format_image(load_image(image_filename))
-
     all_images.append(np.ascontiguousarray(
-        image[:,
+        image[np.newaxis, :,
               IMAGE_CENTER:IMAGE_CENTER + CROPPED_DIM,
               IMAGE_CENTER:IMAGE_CENTER + CROPPED_DIM],
         dtype=np.float32
     ))
-  return all_images[np.newaxis, :], image_fnames
+  return all_images, image_fnames
 
 
-def _assemble_images_corners(image_fnames, max_batch_size):
+def _assemble_images_corners(image_fnames):
   all_images = []
   fnames = []
   for image_filename in image_fnames:
@@ -93,16 +104,14 @@ def _assemble_images_corners(image_fnames, max_batch_size):
       IMAGE_CENTER:IMAGE_CENTER + CROPPED_DIM,
       IMAGE_CENTER:IMAGE_CENTER + CROPPED_DIM
     ]
-
-    # Add flipped versions.
-    images[5:] = images[:5, :, :, ::-1]
+    images[5:] = images[:5, :, :, ::-1]  # flipped versions
 
     all_images.append(images)
     fnames.append(image_filename)
   return all_images, fnames
 
 
-def _assemble_images_selective_search(image_fnames, max_batch_size):
+def _assemble_images_selective_search(image_fnames):
   windows_list = selective_search.get_windows(image_fnames)
   all_images = []
   fnames = []
@@ -136,27 +145,24 @@ def assemble_batches(
       images: (X x 3 x 227 x 227) ndarray, where X <= max_batch_size
   """
   if crop_mode == 'center_only':
-    all_images, fnames = _assemble_images_center_only(
-      image_fnames, max_batch_size)
+    all_images, fnames = _assemble_images_center_only(image_fnames)
 
   elif crop_mode == 'corners':
-    all_images, fnames = _assemble_images_corners(
-      image_fnames, max_batch_size)
+    all_images, fnames = _assemble_images_corners(image_fnames)
 
   elif crop_mode == 'selective_search':
-    all_images, fnames = _assemble_images_selective_search(
-      image_fnames, max_batch_size)
+    all_images, fnames = _assemble_images_selective_search(image_fnames)
 
   else:
     raise Exception("Unknown mode: not in {}".format(CROP_MODES))
 
   # Concatenate into one (N, 3, CROPPED_DIM, CROPPED_DIM) array,
   # then split to (N/max_batch_size, 3, CROPPED_DIM, CROPPED_DIM) chunks
-  all_images = np.concatenate(all_images, 0)
   all_fnames = np.repeat(fnames, [len(images) for images in all_images])
+  all_images = np.concatenate(all_images, 0)
   assert(all_images.shape[0] == all_fnames.shape[0])
 
-  num_batches = int(all_images.shape[0] / max_batch_size)
+  num_batches = 1 + int(all_images.shape[0] / max_batch_size)
   image_batches = np.array_split(all_images, num_batches, axis=0)
   fname_batches = np.array_split(all_fnames, num_batches, axis=0)
   return image_batches, fname_batches
@@ -183,7 +189,7 @@ def compute_feats(batch, layer='imagenet'):
   gflags.DEFINE_string(
     "pretrained_model", "", "The pretrained model.")
   gflags.DEFINE_boolean(
-    "gpu", True, "use gpu for computation")
+    "gpu", False, "use gpu for computation")
   gflags.DEFINE_string(
     "crop_mode", "center_only", "Crop mode, from {}".format(CROP_MODES))
   gflags.DEFINE_string(
@@ -202,13 +208,17 @@ def compute_feats(batch, layer='imagenet'):
     caffenet.set_mode_gpu()
 
   ## Load list of image filenames and assemble into batches.
-  with open(gflags.images_file) as f:
+  with open(FLAGS.images_file) as f:
     image_fnames = [_.strip() for _ in f.readlines()]
   image_batches, fname_batches = assemble_batches(
     image_fnames, FLAGS.crop_mode)
   print 'Running on {} files in {} batches'.format(
     len(image_fnames), len(image_batches))
 
+  from IPython import embed; embed()
+  # TODO: debug this
+  # F1125 07:17:35.980950  6826 pycaffe.cpp:52] Check failed: len(bottom) == input_blobs.size() (1 vs. 0)
+
   # Process the batches.
   start = time.time()
   all_feats = []

From a21c86cf5d2ac53cb9cee02b3e525e5bf6938620 Mon Sep 17 00:00:00 2001
From: Sergey Karayev <sergeykarayev@gmail.com>
Date: Mon, 25 Nov 2013 12:15:35 -0800
Subject: [PATCH 13/15] debugging pycaffe error

---
 python/caffe/imagenet/wrapper2.py | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/python/caffe/imagenet/wrapper2.py b/python/caffe/imagenet/wrapper2.py
index a3bd100ad50..d07d5c75cce 100644
--- a/python/caffe/imagenet/wrapper2.py
+++ b/python/caffe/imagenet/wrapper2.py
@@ -175,8 +175,11 @@ def compute_feats(batch, layer='imagenet'):
     raise ValueError("Unknown layer requested: {}".format(layer))
 
   num = batch.shape[0]
+  input_blobs = [batch]
   output_blobs = [np.empty((num, num_output, 1, 1), dtype=np.float32)]
-  caffenet.Forward([batch], output_blobs)
+  print(len(input_blobs), len(output_blobs))
+  print(input_blobs[0].shape, output_blobs[0].shape)
+  caffenet.Forward(input_blobs, output_blobs)
   feats = [output_blobs[0][i].flatten() for i in range(len(output_blobs[0]))]
 
   return feats
@@ -215,17 +218,15 @@ def compute_feats(batch, layer='imagenet'):
   print 'Running on {} files in {} batches'.format(
     len(image_fnames), len(image_batches))
 
-  from IPython import embed; embed()
-  # TODO: debug this
-  # F1125 07:17:35.980950  6826 pycaffe.cpp:52] Check failed: len(bottom) == input_blobs.size() (1 vs. 0)
-
   # Process the batches.
   start = time.time()
   all_feats = []
-  for i, batch in range(len(image_batches)):
+  for i in range(len(image_batches)):
     if i % 10 == 0:
       print('Batch {}/{}, elapsed {:.3f} s'.format(
         i, len(image_batches), time.time() - start))
+  # TODO: debug this
+  # F1125 07:17:35.980950  6826 pycaffe.cpp:52] Check failed: len(bottom) == input_blobs.size() (1 vs. 0)
     all_feats.append(compute_feats(image_batches[i]))
   all_feats = np.concatenate(all_feats, 0)
   all_fnames = np.concatenate(fname_batches, 0)

From de648ba3ef3b913b1898b663e7c7c482559f22c4 Mon Sep 17 00:00:00 2001
From: Sergey Karayev <sergeykarayev@gmail.com>
Date: Tue, 26 Nov 2013 03:27:02 -0800
Subject: [PATCH 14/15] in the middle of refactoring to use dataframes

---
 python/caffe/imagenet/wrapper2.py | 52 ++++++++++++++++++-------------
 1 file changed, 31 insertions(+), 21 deletions(-)

diff --git a/python/caffe/imagenet/wrapper2.py b/python/caffe/imagenet/wrapper2.py
index d07d5c75cce..82795a3d4c6 100644
--- a/python/caffe/imagenet/wrapper2.py
+++ b/python/caffe/imagenet/wrapper2.py
@@ -6,6 +6,7 @@
 https://github.com/sergeyk/selective_search_ijcv_with_python
 
 TODO:
+- [ ] make sure all batches are the same size
 - [ ] store the window coordinates with the image filenames.
 - [ ] batch up image filenames as well: don't want to load all of them into memory
 """
@@ -26,6 +27,8 @@
 
 CROP_MODES = ['center_only', 'corners', 'selective_search']
 
+BATCH_SIZE = 256
+
 # Load the imagenet mean file
 IMAGENET_MEAN = np.load(
     os.path.join(os.path.dirname(__file__), 'ilsvrc_2012_mean.npy'))
@@ -112,22 +115,24 @@ def _assemble_images_corners(image_fnames):
 
 
 def _assemble_images_selective_search(image_fnames):
+  # Get window proposals.
   windows_list = selective_search.get_windows(image_fnames)
-  all_images = []
-  fnames = []
+
+  #
   for image_fname, windows in zip(image_fnames, windows_list):
     image = load_image(image_fname)
-    images = np.empty(
-      (len(windows), 3, CROPPED_DIM, CROPPED_DIM), dtype=np.float32)
     for i, window in enumerate(windows):
-      images[i] = format_image(image, window, CROPPED_DIM)
-    all_images.append(images)
-    fnames.append(image_fname)
-  return all_images, fnames
+      data.append({
+        'image': format_image(image, window, CROPPED_DIM),
+        'window': window,
+        'filename': image_fname
+      })
+  images_df = pd.DataFrame(data)
+  return images_df
 
 
 def assemble_batches(
-  image_fnames, crop_mode='center_only', max_batch_size=256):
+  image_fnames, crop_mode='center_only'):
   """
   Assemble list of batches, each one of at most max_batch_size subimage
   objects.
@@ -142,27 +147,30 @@ def assemble_batches(
           image, and take each enclosing subwindow.
 
   Output:
-      images: (X x 3 x 227 x 227) ndarray, where X <= max_batch_size
+      : list of (X x 3 x 227 x 227) ndarray, where X <= max_batch_size
   """
   if crop_mode == 'center_only':
-    all_images, fnames = _assemble_images_center_only(image_fnames)
+    images_df = _assemble_images_center_only(image_fnames)
 
   elif crop_mode == 'corners':
-    all_images, fnames = _assemble_images_corners(image_fnames)
+    images_df = _assemble_images_corners(image_fnames)
 
   elif crop_mode == 'selective_search':
-    all_images, fnames = _assemble_images_selective_search(image_fnames)
+    images_df = _assemble_images_selective_search(image_fnames)
 
   else:
     raise Exception("Unknown mode: not in {}".format(CROP_MODES))
 
-  # Concatenate into one (N, 3, CROPPED_DIM, CROPPED_DIM) array,
+  # Make sure the DataFrame has a multiple of BATCH_SIZE rows:
+  # just fill the extra rows with NaN filenames and all-zero images.
+  pass
+
   # then split to (N/max_batch_size, 3, CROPPED_DIM, CROPPED_DIM) chunks
   all_fnames = np.repeat(fnames, [len(images) for images in all_images])
   all_images = np.concatenate(all_images, 0)
   assert(all_images.shape[0] == all_fnames.shape[0])
 
-  num_batches = 1 + int(all_images.shape[0] / max_batch_size)
+  num_batches = 1 + int(all_images.shape[0] / BATCH_SIZE)
   image_batches = np.array_split(all_images, num_batches, axis=0)
   fname_batches = np.array_split(all_fnames, num_batches, axis=0)
   return image_batches, fname_batches
@@ -211,23 +219,25 @@ def compute_feats(batch, layer='imagenet'):
     caffenet.set_mode_gpu()
 
   ## Load list of image filenames and assemble into batches.
+  t = time.time()
+  print('Assembling batches...')
   with open(FLAGS.images_file) as f:
     image_fnames = [_.strip() for _ in f.readlines()]
   image_batches, fname_batches = assemble_batches(
     image_fnames, FLAGS.crop_mode)
-  print 'Running on {} files in {} batches'.format(
-    len(image_fnames), len(image_batches))
+  print('{} batches assembled in {:.3f} s'.format(time.time() - t))
 
   # Process the batches.
-  start = time.time()
+  t = time.time()
+  print 'Running on {} files in {} batches'.format(
+    len(image_fnames), len(image_batches))
   all_feats = []
   for i in range(len(image_batches)):
     if i % 10 == 0:
       print('Batch {}/{}, elapsed {:.3f} s'.format(
-        i, len(image_batches), time.time() - start))
-  # TODO: debug this
-  # F1125 07:17:35.980950  6826 pycaffe.cpp:52] Check failed: len(bottom) == input_blobs.size() (1 vs. 0)
+        i, len(image_batches), time.time() - t))
     all_feats.append(compute_feats(image_batches[i]))
+
   all_feats = np.concatenate(all_feats, 0)
   all_fnames = np.concatenate(fname_batches, 0)
 

From b0e7d645154fd98cebe972d5557c02553ec95f90 Mon Sep 17 00:00:00 2001
From: Sergey Karayev <sergeykarayev@gmail.com>
Date: Thu, 5 Dec 2013 16:25:05 -0800
Subject: [PATCH 15/15] now using dataframes to pass data around and storing
 windows

---
 python/caffe/imagenet/wrapper2.py | 201 ++++++++++++++++++++----------
 1 file changed, 134 insertions(+), 67 deletions(-)

diff --git a/python/caffe/imagenet/wrapper2.py b/python/caffe/imagenet/wrapper2.py
index 82795a3d4c6..e23b48b4079 100644
--- a/python/caffe/imagenet/wrapper2.py
+++ b/python/caffe/imagenet/wrapper2.py
@@ -1,13 +1,17 @@
 """
-wrapper2.py classifies a number of images at once, optionally using
-selective search crops.
+Classify a number of images at once, optionally using the selective
+search window proposal method.
+
+This implementation follows
+  Ross Girshick, Jeff Donahue, Trevor Darrell, Jitendra Malik.
+  Rich feature hierarchies for accurate object detection and semantic
+  segmentation.
+  http://arxiv.org/abs/1311.2524
 
 The selective_search_ijcv_with_python code is available at
-https://github.com/sergeyk/selective_search_ijcv_with_python
+  https://github.com/sergeyk/selective_search_ijcv_with_python
 
 TODO:
-- [ ] make sure all batches are the same size
-- [ ] store the window coordinates with the image filenames.
 - [ ] batch up image filenames as well: don't want to load all of them into memory
 """
 import numpy as np
@@ -16,8 +20,8 @@
 import gflags
 import pandas as pd
 import time
-from skimage import io
-from skimage import transform
+import skimage.io
+import skimage.transform
 import selective_search_ijcv_with_python as selective_search
 import caffe
 
@@ -38,10 +42,11 @@ def load_image(filename):
   """
   Input:
     filename: string
+
   Output:
     image: an image of size (256 x 256 x 3) of type uint8.
   """
-  img = io.imread(filename)
+  img = skimage.io.imread(filename)
   if img.ndim == 2:
     img = np.tile(img[:, :, np.newaxis], (1, 1, 3))
   elif img.shape[2] == 4:
@@ -57,6 +62,10 @@ def format_image(image, window=None, dim=IMAGE_DIM):
       (y, x, h, w) coordinates
     dim: int
       Final width of the square image.
+
+  Output:
+    image: (3 x H x W) ndarray
+      Resized to (dim, dim).
   """
   # Crop a subimage if window is provided.
   if window is not None:
@@ -66,17 +75,27 @@ def format_image(image, window=None, dim=IMAGE_DIM):
     ]
 
   # Resize to ImageNet size, convert to BGR, subtract mean.
-  image = (transform.resize(image, (IMAGE_DIM, IMAGE_DIM)) * 255)[:, :, ::-1]
+  image = (skimage.transform.resize(image, (IMAGE_DIM, IMAGE_DIM)) * 255)[:, :, ::-1]
   image -= IMAGENET_MEAN
 
   # Resize further if needed.
   if not dim == IMAGE_DIM:
-    image = transform.resize(image, (dim, dim))
+    image = skimage.transform.resize(image, (dim, dim))
   image = image.swapaxes(1, 2).swapaxes(0, 1)
   return image
 
 
 def _assemble_images_center_only(image_fnames):
+  """
+  For each image, square the image and crop its center.
+
+  Input:
+    image_fnames: list
+
+  Output:
+    images_df: pandas.DataFrame
+      With 'image', 'filename' columns.
+  """
   all_images = []
   for image_filename in image_fnames:
     image = format_image(load_image(image_filename))
@@ -86,12 +105,27 @@ def _assemble_images_center_only(image_fnames):
               IMAGE_CENTER:IMAGE_CENTER + CROPPED_DIM],
         dtype=np.float32
     ))
-  return all_images, image_fnames
+
+  images_df = pd.DataFrame({
+    'image': all_images,
+    'filename': image_fnames
+  })
+  return images_df
 
 
 def _assemble_images_corners(image_fnames):
+  """
+  For each image, square the image and crop its center, four corners,
+  and mirrored version of the above.
+
+  Input:
+    image_fnames: list
+
+  Output:
+    images_df: pandas.DataFrame
+      With 'image', 'filename' columns.
+  """
   all_images = []
-  fnames = []
   for image_filename in image_fnames:
     image = format_image(load_image(image_filename))
     indices = [0, IMAGE_DIM - CROPPED_DIM]
@@ -110,44 +144,61 @@ def _assemble_images_corners(image_fnames):
     images[5:] = images[:5, :, :, ::-1]  # flipped versions
 
     all_images.append(images)
-    fnames.append(image_filename)
-  return all_images, fnames
+
+  images_df = pd.DataFrame({
+    'image': [row for row in images for images in all_images],
+    'filename': np.repeat(image_fnames, 10)
+  })
+  return images_df
 
 
 def _assemble_images_selective_search(image_fnames):
-  # Get window proposals.
+  """
+  Run Selective Search window proposals on all images, then for each
+  image-window pair, extract a square crop.
+
+  Input:
+    image_fnames: list
+
+  Output:
+    images_df: pandas.DataFrame
+      With 'image', 'filename' columns.
+  """
   windows_list = selective_search.get_windows(image_fnames)
 
-  #
+  data = []
   for image_fname, windows in zip(image_fnames, windows_list):
     image = load_image(image_fname)
-    for i, window in enumerate(windows):
+    for window in windows:
       data.append({
-        'image': format_image(image, window, CROPPED_DIM),
+        'image': format_image(image, window, CROPPED_DIM)[np.newaxis, :],
         'window': window,
         'filename': image_fname
       })
+
   images_df = pd.DataFrame(data)
   return images_df
 
 
-def assemble_batches(
-  image_fnames, crop_mode='center_only'):
+def assemble_batches(image_fnames, crop_mode='center_only', batch_size=256):
   """
-  Assemble list of batches, each one of at most max_batch_size subimage
-  objects.
+  Assemble DataFrame of image crops for feature computation.
 
   Input:
-      image_fnames: list of string
-      mode: string
-        'center_only': the CROPPED_DIM middle of the image is taken as is
-        'corners': take CROPPED_DIM-sized boxes at 4 corners and center of
-          the image, as well as their flipped versions: a total of 10 images
-        'selective_search': run Selective Search region proposal on the
-          image, and take each enclosing subwindow.
+    image_fnames: list of string
+    mode: string
+      'center_only': the CROPPED_DIM middle of the image is taken as is
+      'corners': take CROPPED_DIM-sized boxes at 4 corners and center of
+        the image, as well as their flipped versions: a total of 10.
+      'selective_search': run Selective Search region proposal on the
+        image, and take each enclosing subwindow.
 
   Output:
-      : list of (X x 3 x 227 x 227) ndarray, where X <= max_batch_size
+    df_batches: list of DataFrames, each one of BATCH_SIZE rows.
+      Each row has 'image', 'filename', and 'window' info.
+      Column 'image' contains (X x 3 x 227 x 227) ndarrays.
+      Column 'filename' contains source filenames.
+      If 'filename' is None, then the row is just for padding.
   """
   if crop_mode == 'center_only':
     images_df = _assemble_images_center_only(image_fnames)
@@ -163,34 +214,43 @@ def assemble_batches(
 
   # Make sure the DataFrame has a multiple of BATCH_SIZE rows:
   # just fill the extra rows with NaN filenames and all-zero images.
-  pass
-
-  # then split to (N/max_batch_size, 3, CROPPED_DIM, CROPPED_DIM) chunks
-  all_fnames = np.repeat(fnames, [len(images) for images in all_images])
-  all_images = np.concatenate(all_images, 0)
-  assert(all_images.shape[0] == all_fnames.shape[0])
-
-  num_batches = 1 + int(all_images.shape[0] / BATCH_SIZE)
-  image_batches = np.array_split(all_images, num_batches, axis=0)
-  fname_batches = np.array_split(all_fnames, num_batches, axis=0)
-  return image_batches, fname_batches
-
-
-def compute_feats(batch, layer='imagenet'):
+  N = images_df.shape[0]
+  remainder = N % BATCH_SIZE
+  if remainder > 0:
+    zero_image = np.zeros_like(images_df['image'].iloc[0])
+    remainder_df = pd.DataFrame([{
+      'filename': None,
+      'image': zero_image,
+      'window': [0, 0, 0, 0]
+    }] * (BATCH_SIZE - remainder))
+    images_df = images_df.append(remainder_df)
+    N = images_df.shape[0]
+
+  # Split into batches of BATCH_SIZE.
+  ind = np.arange(N) / BATCH_SIZE
+  df_batches = [images_df[ind == i] for i in range(N / BATCH_SIZE)]
+  return df_batches
+
+
+def compute_feats(images_df, layer='imagenet'):
   if layer == 'imagenet':
     num_output = 1000
   else:
     raise ValueError("Unknown layer requested: {}".format(layer))
 
-  num = batch.shape[0]
-  input_blobs = [batch]
+  num = images_df.shape[0]
+  input_blobs = [np.concatenate(images_df['image'].values)]
   output_blobs = [np.empty((num, num_output, 1, 1), dtype=np.float32)]
   print(len(input_blobs), len(output_blobs))
   print(input_blobs[0].shape, output_blobs[0].shape)
-  caffenet.Forward(input_blobs, output_blobs)
+
+  #caffenet.Forward(input_blobs, output_blobs)
   feats = [output_blobs[0][i].flatten() for i in range(len(output_blobs[0]))]
 
-  return feats
+  # Add the features and delete the images.
+  del images_df['image']
+  images_df['feat'] = feats
+  return images_df
 
 
 if __name__ == "__main__":
@@ -205,6 +265,8 @@ def compute_feats(batch, layer='imagenet'):
     "crop_mode", "center_only", "Crop mode, from {}".format(CROP_MODES))
   gflags.DEFINE_string(
     "images_file", "", "File that contains image filenames.")
+  gflags.DEFINE_string(
+    "batch_size", 256, "Number of image crops to let through in one go")
   gflags.DEFINE_string(
     "output", "", "The output DataFrame HDF5 filename.")
   gflags.DEFINE_string(
@@ -212,37 +274,42 @@ def compute_feats(batch, layer='imagenet'):
   FLAGS = gflags.FLAGS
   FLAGS(sys.argv)
 
-  ## Initialize network by loading model definition and weights.
-  caffenet = caffe.CaffeNet(FLAGS.model_def, FLAGS.pretrained_model)
-  caffenet.set_phase_test()
-  if FLAGS.gpu:
-    caffenet.set_mode_gpu()
-
   ## Load list of image filenames and assemble into batches.
   t = time.time()
   print('Assembling batches...')
   with open(FLAGS.images_file) as f:
     image_fnames = [_.strip() for _ in f.readlines()]
-  image_batches, fname_batches = assemble_batches(
-    image_fnames, FLAGS.crop_mode)
-  print('{} batches assembled in {:.3f} s'.format(time.time() - t))
+  image_batches = assemble_batches(
+    image_fnames, FLAGS.crop_mode, FLAGS.batch_size)
+  print('{} batches assembled in {:.3f} s'.format(
+    len(image_batches), time.time() - t))
+
+  # Initialize network by loading model definition and weights.
+  t = time.time()
+  print("Loading Caffe model.")
+  caffenet = caffe.CaffeNet(FLAGS.model_def, FLAGS.pretrained_model)
+  caffenet.set_phase_test()
+  if FLAGS.gpu:
+    caffenet.set_mode_gpu()
+  print("Caffe model loaded in {:.3f} s".format(time.time() - t))
 
   # Process the batches.
   t = time.time()
-  print 'Running on {} files in {} batches'.format(
+  print 'Processing {} files in {} batches'.format(
     len(image_fnames), len(image_batches))
-  all_feats = []
+  dfs_with_feats = []
   for i in range(len(image_batches)):
     if i % 10 == 0:
-      print('Batch {}/{}, elapsed {:.3f} s'.format(
+      print('Batch {}/{}, elapsed time: {:.3f} s'.format(
         i, len(image_batches), time.time() - t))
-    all_feats.append(compute_feats(image_batches[i]))
+    dfs_with_feats.append(compute_feats(image_batches[i]))
 
-  all_feats = np.concatenate(all_feats, 0)
-  all_fnames = np.concatenate(fname_batches, 0)
+  # Concatenate, droppping the padding rows.
+  df = pd.concat(dfs_with_feats).dropna(subset=['filename'])
+  print("Processing complete after {:.3f} s.".format(time.time() - t))
 
-  df = pd.DataFrame({'feat': all_feats, 'filename': all_fnames})
-
-  # Finally, write the results.
+  # Write our the results.
+  t = time.time()
   df.to_hdf(FLAGS.output, 'df', mode='w')
-  print 'Done. Saved to %s.' % FLAGS.output
+  print("Done. Saving to {} took {:.3f} s.".format(
+    FLAGS.output, time.time() - t))