1. Used conversion from numpy 1.9 tobytes() to pack LMDB data. Issue …

…that image width was scaled twice disappeared 2. Reduced batch size for pairs network and changed parameters in solver (took from imagenet and scaled acc to info in BVLC/caffe#430) Minimization dont converge.. but at least running now
zheden · Jun 9, 2015 · ffed4c5 · ffed4c5
1 parent d2b6d45
commit ffed4c5
Show file tree

Hide file tree

Showing 3 changed files with 23 additions and 52 deletions.
diff --git a/network/pair_author_rec_solver.prototxt b/network/pair_author_rec_solver.prototxt
@@ -3,23 +3,26 @@ net: "network/pair_author_rec_train_test.prototxt"
 # test_iter specifies how many forward passes the test should carry out.
 # In the case of MNIST, we have test batch size 100 and 100 test iterations,
 # covering the full 10,000 testing images.  EVG was 100
-test_iter: 10
+test_iter: 100
 # Carry out testing every 500 training iterations.
 test_interval: 500
 # The base learning rate, momentum and the weight decay of the network.
-base_lr: 0.01
+# mult by sqrt(10) because of batch size
+base_lr: 0.04
 momentum: 0.9
-weight_decay: 0.0000
+weight_decay: 0.0005
 # The learning rate policy
-lr_policy: "inv"
-gamma: 0.0001
-power: 0.75
+# took from imagenet
+lr_policy: "step"
+gamma: 0.1
+# step milt by 10 because of batch size
+stepsize: 1000000
 # Display every 100 iterations
 display: 100
 # The maximum number of iterations, EVG was 50000 in mnist
-max_iter: 5
+max_iter: 5000
 # snapshot intermediate results
-snapshot: 5000
-snapshot_prefix: "network/pair_author_rec"
+snapshot: 1000
+snapshot_prefix: "network/snap_pair_author_rec"
 # solver mode: CPU or GPU
 solver_mode: GPU
diff --git a/network/pair_author_rec_train_test.prototxt b/network/pair_author_rec_train_test.prototxt
@@ -9,7 +9,7 @@ layer {
   }
   data_param {
     source: "network/pairs_train_lmdb"
-    batch_size: 256
+    batch_size: 25
     backend: LMDB
   }
 }
@@ -23,7 +23,7 @@ layer {
   }
   data_param {
     source: "network/pairs_train_lmdb"
-    batch_size: 50
+    batch_size: 25
     backend: LMDB
   }
 }
@@ -38,23 +38,10 @@ layer {
     slice_point: 1
   }
 }
-layer {
-  name: "pool0"
-  type: "Pooling"
-  bottom: "data"
-  top: "pool0"
-  pooling_param {
-    pool: MAX
-    kernel_w: 2
-    kernel_h: 1
-    stride_w: 2
-	stride_h: 1
-  }
-}
 layer {
   name: "conv1"
   type: "Convolution"
-  bottom: "pool0"
+  bottom: "data"
   top: "conv1"
   convolution_param {
     num_output: 10
@@ -114,23 +101,10 @@ layer {
   bottom: "fc1"
   top: "descriptor"
 }
-layer {
-  name: "pool0_p"
-  type: "Pooling"
-  bottom: "data_p"
-  top: "pool0_p"
-  pooling_param {
-    pool: MAX
-    kernel_w: 2
-    kernel_h: 1
-    stride_w: 2
-	stride_h: 1
-  }
-}
 layer {
   name: "conv1_p"
   type: "Convolution"
-  bottom: "pool0_p"
+  bottom: "data_p"
   top: "conv1_p"
   convolution_param {
     num_output: 10

diff --git a/readDB.py b/readDB.py
@@ -1,9 +1,3 @@
-# -*- coding: utf-8 -*-
-"""
-Created on Wed Jun 03 14:47:53 2015
-
-@author: Ievgeniia
-"""
 
 import os
 from scipy import misc
@@ -153,6 +147,8 @@ def preprocessImages(linesToTrain, minAcceptableWidth):
 
     maxHeight = max(lineHeights)
     minWidth = max(minAcceptableWidth, min(willBeNotRejected))
+    # temp: cut line and take first third, just to train network TODO: take all info from line
+    minWidth = minWidth / 3
     print 'Width of images -', minWidth
     print 'Height of images -', maxHeight
 
@@ -174,11 +170,9 @@ def getLMDBEntry(i_image1, i_image2, i_label):
     datum.height = i_image1.shape[0]
     datum.width = i_image1.shape[1]
 
-    binStr = binascii.hexlify(image)
-    datum.data = binStr
+    datum.data = image.tobytes()
 
-    # TODO: why it is twice wider???
-    #flatIm = np.fromstring(datum.data, dtype=np.int8)
+    #flatIm = np.fromstring(datum.data, dtype=np.uint8)
     #im = flatIm.reshape(datum.channels, datum.height, datum.width)
 
     datum.label = i_label
@@ -333,16 +327,16 @@ def createLMDBpairs(i_nameLMDB):
 env = lmdb.open(nameLMDB, readonly=True)
 with env.begin() as txn:
     raw_datum = txn.get(b'00000002')
-lmdb.close()
 
 datum = caffe.proto.caffe_pb2.Datum()
 datum.ParseFromString(raw_datum)
 
-flatIm = np.fromstring(datum.data, dtype=np.int8)
+flatIm = np.fromstring(datum.data, dtype=np.uint8)
 #print flatIm.shape
 
 # TODO: why it is twice wider????
-im = flatIm.reshape(datum.channels, datum.height, datum.width*2)
+im = flatIm.reshape(datum.channels, datum.height, datum.width)
+
 #plt.imshow(im)
 
 import scipy