Skip to content

Commit

Permalink
1. Used conversion from numpy 1.9 tobytes() to pack LMDB data. Issue …
Browse files Browse the repository at this point in the history
…that image width was scaled twice disappeared

2. Reduced batch size for pairs network and changed parameters in solver (took from imagenet and scaled acc to info in BVLC/caffe#430)
Minimization dont converge.. but at least running now
  • Loading branch information
zheden committed Jun 9, 2015
1 parent d2b6d45 commit ffed4c5
Show file tree
Hide file tree
Showing 3 changed files with 23 additions and 52 deletions.
21 changes: 12 additions & 9 deletions network/pair_author_rec_solver.prototxt
Original file line number Diff line number Diff line change
Expand Up @@ -3,23 +3,26 @@ net: "network/pair_author_rec_train_test.prototxt"
# test_iter specifies how many forward passes the test should carry out.
# In the case of MNIST, we have test batch size 100 and 100 test iterations,
# covering the full 10,000 testing images. EVG was 100
test_iter: 10
test_iter: 100
# Carry out testing every 500 training iterations.
test_interval: 500
# The base learning rate, momentum and the weight decay of the network.
base_lr: 0.01
# mult by sqrt(10) because of batch size
base_lr: 0.04
momentum: 0.9
weight_decay: 0.0000
weight_decay: 0.0005
# The learning rate policy
lr_policy: "inv"
gamma: 0.0001
power: 0.75
# took from imagenet
lr_policy: "step"
gamma: 0.1
# step milt by 10 because of batch size
stepsize: 1000000
# Display every 100 iterations
display: 100
# The maximum number of iterations, EVG was 50000 in mnist
max_iter: 5
max_iter: 5000
# snapshot intermediate results
snapshot: 5000
snapshot_prefix: "network/pair_author_rec"
snapshot: 1000
snapshot_prefix: "network/snap_pair_author_rec"
# solver mode: CPU or GPU
solver_mode: GPU
34 changes: 4 additions & 30 deletions network/pair_author_rec_train_test.prototxt
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ layer {
}
data_param {
source: "network/pairs_train_lmdb"
batch_size: 256
batch_size: 25
backend: LMDB
}
}
Expand All @@ -23,7 +23,7 @@ layer {
}
data_param {
source: "network/pairs_train_lmdb"
batch_size: 50
batch_size: 25
backend: LMDB
}
}
Expand All @@ -38,23 +38,10 @@ layer {
slice_point: 1
}
}
layer {
name: "pool0"
type: "Pooling"
bottom: "data"
top: "pool0"
pooling_param {
pool: MAX
kernel_w: 2
kernel_h: 1
stride_w: 2
stride_h: 1
}
}
layer {
name: "conv1"
type: "Convolution"
bottom: "pool0"
bottom: "data"
top: "conv1"
convolution_param {
num_output: 10
Expand Down Expand Up @@ -114,23 +101,10 @@ layer {
bottom: "fc1"
top: "descriptor"
}
layer {
name: "pool0_p"
type: "Pooling"
bottom: "data_p"
top: "pool0_p"
pooling_param {
pool: MAX
kernel_w: 2
kernel_h: 1
stride_w: 2
stride_h: 1
}
}
layer {
name: "conv1_p"
type: "Convolution"
bottom: "pool0_p"
bottom: "data_p"
top: "conv1_p"
convolution_param {
num_output: 10
Expand Down
20 changes: 7 additions & 13 deletions readDB.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,3 @@
# -*- coding: utf-8 -*-
"""
Created on Wed Jun 03 14:47:53 2015
@author: Ievgeniia
"""

import os
from scipy import misc
Expand Down Expand Up @@ -153,6 +147,8 @@ def preprocessImages(linesToTrain, minAcceptableWidth):

maxHeight = max(lineHeights)
minWidth = max(minAcceptableWidth, min(willBeNotRejected))
# temp: cut line and take first third, just to train network TODO: take all info from line
minWidth = minWidth / 3
print 'Width of images -', minWidth
print 'Height of images -', maxHeight

Expand All @@ -174,11 +170,9 @@ def getLMDBEntry(i_image1, i_image2, i_label):
datum.height = i_image1.shape[0]
datum.width = i_image1.shape[1]

binStr = binascii.hexlify(image)
datum.data = binStr
datum.data = image.tobytes()

# TODO: why it is twice wider???
#flatIm = np.fromstring(datum.data, dtype=np.int8)
#flatIm = np.fromstring(datum.data, dtype=np.uint8)
#im = flatIm.reshape(datum.channels, datum.height, datum.width)

datum.label = i_label
Expand Down Expand Up @@ -333,16 +327,16 @@ def createLMDBpairs(i_nameLMDB):
env = lmdb.open(nameLMDB, readonly=True)
with env.begin() as txn:
raw_datum = txn.get(b'00000002')
lmdb.close()

datum = caffe.proto.caffe_pb2.Datum()
datum.ParseFromString(raw_datum)

flatIm = np.fromstring(datum.data, dtype=np.int8)
flatIm = np.fromstring(datum.data, dtype=np.uint8)
#print flatIm.shape

# TODO: why it is twice wider????
im = flatIm.reshape(datum.channels, datum.height, datum.width*2)
im = flatIm.reshape(datum.channels, datum.height, datum.width)

#plt.imshow(im)

import scipy
Expand Down

0 comments on commit ffed4c5

Please sign in to comment.