forked from wulfebw/mergevec
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmergevec.py
169 lines (137 loc) · 6.13 KB
/
mergevec.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
###############################################################################
# Copyright (c) 2014, Blake Wulfe
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
# THE SOFTWARE.
###############################################################################
"""
File: mergevec.py
Author: blake.w.wulfe@gmail.com
Date: 6/13/2014
File Description:
This file contains a function that merges .vec files called "merge_vec_files".
I made it as a replacement for mergevec.cpp (created by Naotoshi Seo.
See: http://note.sonots.com/SciSoftware/haartraining/mergevec.cpp.html)
in order to avoid recompiling openCV with mergevec.cpp.
To use the function:
(1) Place all .vec files to be merged in a single directory (vec_directory).
(2) Navigate to this file in your CLI (terminal or cmd) and type "python mergevec.py -v your_vec_directory -o your_output_filename".
The first argument (-v) is the name of the directory containing the .vec files
The second argument (-o) is the name of the output file
To test the output of the function:
(1) Install openCV.
(2) Navigate to the output file in your CLI (terminal or cmd).
(2) Type "opencv_createsamples -w img_width -h img_height -vec output_filename".
This should show the .vec files in sequence.
"""
import sys
import glob
import struct
import argparse
import traceback
def exception_response(e):
exc_type, exc_value, exc_traceback = sys.exc_info()
lines = traceback.format_exception(exc_type, exc_value, exc_traceback)
for line in lines:
print(line)
def get_args():
parser = argparse.ArgumentParser()
parser.add_argument('-v', dest='vec_directory')
parser.add_argument('-o', dest='output_filename')
args = parser.parse_args()
return (args.vec_directory, args.output_filename)
def merge_vec_files(vec_directory, output_vec_file):
"""
Iterates throught the .vec files in a directory and combines them.
(1) Iterates through files getting a count of the total images in the .vec files
(2) checks that the image sizes in all files are the same
The format of a .vec file is:
4 bytes denoting number of total images (int)
4 bytes denoting size of images (int)
2 bytes denoting min value (short)
2 bytes denoting max value (short)
ex: 6400 0000 4605 0000 0000 0000
hex 6400 0000 4605 0000 0000 0000
# images size of h * w min max
dec 100 1350 0 0
:type vec_directory: string
:param vec_directory: Name of the directory containing .vec files to be combined.
Do not end with slash. Ex: '/Users/username/Documents/vec_files'
:type output_vec_file: string
:param output_vec_file: Name of aggregate .vec file for output.
Ex: '/Users/username/Documents/aggregate_vec_file.vec'
"""
# Check that the .vec directory does not end in '/' and if it does, remove it.
if vec_directory.endswith('/'):
vec_directory = vec_directory[:-1]
# Get .vec files
files = glob.glob('{0}/*.vec'.format(vec_directory))
# Check to make sure there are .vec files in the directory
if len(files) <= 0:
print('Vec files to be mereged could not be found from directory: {0}'.format(vec_directory))
sys.exit(1)
# Check to make sure there are more than one .vec files
if len(files) == 1:
print('Only 1 vec file was found in directory: {0}. Cannot merge a single file.'.format(vec_directory))
sys.exit(1)
# Get the value for the first image size
prev_image_size = 0
try:
with open(files[0], 'rb') as vecfile:
content = b''.join((line) for line in vecfile.readlines())
val = struct.unpack('<iihh', content[:12])
prev_image_size = val[1]
except IOError as e:
print('An IO error occured while processing the file: {0}'.format(f))
exception_response(e)
# Get the total number of images
total_num_images = 0
for f in files:
try:
with open(f, 'rb') as vecfile:
content = b''.join((line) for line in vecfile.readlines())
val = struct.unpack('<iihh', content[:12])
num_images = val[0]
image_size = val[1]
if image_size != prev_image_size:
err_msg = """The image sizes in the .vec files differ. These values must be the same. \n The image size of file {0}: {1}\n
The image size of previous files: {0}""".format(f, image_size, prev_image_size)
sys.exit(err_msg)
total_num_images += num_images
except IOError as e:
print('An IO error occured while processing the file: {0}'.format(f))
exception_response(e)
# Iterate through the .vec files, writing their data (not the header) to the output file
# '<iihh' means 'little endian, int, int, short, short'
header = struct.pack('<iihh', total_num_images, image_size, 0, 0)
try:
with open(output_vec_file, 'wb') as outputfile:
outputfile.write(header)
for f in files:
with open(f, 'rb') as vecfile:
content = b''.join((line) for line in vecfile.readlines())
outputfile.write(bytearray(content[12:]))
except Exception as e:
exception_response(e)
if __name__ == '__main__':
vec_directory, output_filename = get_args()
if not vec_directory:
sys.exit('mergvec requires a directory of vec files. Call mergevec.py with -v /your_vec_directory')
if not output_filename:
sys.exit('mergevec requires an output filename. Call mergevec.py with -o your_output_filename')
merge_vec_files(vec_directory, output_filename)