forked from oreillymedia/Learning-OpenCV-3_examples
-
Notifications
You must be signed in to change notification settings - Fork 0
/
example_15-04.cpp
457 lines (407 loc) · 14 KB
/
example_15-04.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
//Example 15-4. Codebook algorithm implementation
#include <opencv2/opencv.hpp>
#include <vector>
#include <iostream>
#include <cstdlib>
#include <fstream>
using namespace std;
#define CHANNELS 3 //Always 3 because yuv
int cbBounds[CHANNELS]; // IF pixel is within this bound outside of codebook, learn it, else form new code
int minMod[CHANNELS]; // If pixel is lower than a codebook by this amount, it's matched
int maxMod[CHANNELS]; // If pixel is high than a codebook by this amount, it's matched
//The variable t counts the number of points we’ve accumulated since the start or the last
//clear operation. Here’s how the actual codebook elements are described:
//
class CodeElement {
public:
uchar learnHigh[CHANNELS]; //High side threshold for learning
uchar learnLow[CHANNELS]; //Low side threshold for learning
uchar max[CHANNELS]; //High side of box boundary
uchar min[CHANNELS]; //Low side of box boundary
int t_last_update; //Allow us to kill stale entries
int stale; //max negative run (longest period of inactivity)
CodeElement() {
for(int i = 0; i < CHANNELS; i++)
learnHigh[i] = learnLow[i] = max[i] = min[i] = 0;
t_last_update = stale = 0;
}
CodeElement& operator=( const CodeElement& ce ) {
for(int i=0; i<CHANNELS; i++ ) {
learnHigh[i] = ce.learnHigh[i];
learnLow[i] = ce.learnLow[i];
min[i] = ce.min[i];
max[i] = ce.max[i];
}
t_last_update = ce.t_last_update;
stale = ce.stale;
return *this;
}
CodeElement( const CodeElement& ce ) { *this = ce; }
};
// You need one of these for each pixel in the video image (rowXcol)
//
class CodeBook : public vector<CodeElement> {
public:
int t; //Count of every image learned on
// count every access
CodeBook() { t=0; }
// Default is an empty book
CodeBook( int n ) : vector<CodeElement>(n) { t=0; } // Construct book of size n
};
// Updates the codebook entry with a new data point
// Note: cbBounds must be of length equal to numChannels
//
//
int updateCodebook( // return CodeBook index
const cv::Vec3b& p, // incoming YUV pixel
CodeBook& c, // CodeBook for the pixel
int* cbBounds, // Bounds for codebook (usually: {10,10,10})
int numChannels // Number of color channels we're learning
) {
if(c.size() == 0)
c.t = 0;
c.t += 1; //Record learning event
//SET HIGH AND LOW BOUNDS
unsigned int high[3], low[3], n;
for( n=0; n<numChannels; n++ ) {
high[n] = p[n] + *(cbBounds+n);
if( high[n] > 255 ) high[n] = 255;
low[n] = p[n] - *(cbBounds+n);
if( low[n] < 0) low[n] = 0;
}
// SEE IF THIS FITS AN EXISTING CODEWORD
//
int i;
int matchChannel;
for( i=0; i<c.size(); i++ ) {
matchChannel = 0;
for( n=0; n<numChannels; n++ ) {
if( // Found an entry for this channel
( c[i].learnLow[n] <= p[n] ) && ( p[n] <= c[i].learnHigh[n]))
matchChannel++;
}
if( matchChannel == numChannels ) {// If an entry was found
c[i].t_last_update = c.t;
// adjust this codeword for the first channel
//
for( n=0; n<numChannels; n++ ) {
if( c[i].max[n] < p[n] )
c[i].max[n] = p[n];
else if( c[i].min[n] > p[n] )
c[i].min[n] = p[n];
}
break;
}
}
// OVERHEAD TO TRACK POTENTIAL STALE ENTRIES
//
for( int s=0; s<c.size(); s++ ) {
// Track which codebook entries are going stale:
//
int negRun = c.t - c[s].t_last_update;
if( c[s].stale < negRun ) c[s].stale = negRun;
}
// ENTER A NEW CODEWORD IF NEEDED
//
if( i == c.size() ) {
// if no existing codeword found, make one
CodeElement ce;
for( n=0; n<numChannels; n++ ) {
ce.learnHigh[n] = high[n];
ce.learnLow[n] = low[n];
ce.max[n] = p[n];
ce.min[n] = p[n];
}
ce.t_last_update = c.t;
ce.stale = 0;
c.push_back( ce );
}
// SLOWLY ADJUST LEARNING BOUNDS
//
for( n=0; n<numChannels; n++ ) {
if( c[i].learnHigh[n] < high[n]) c[i].learnHigh[n] += 1;
if( c[i].learnLow[n] > low[n] ) c[i].learnLow[n] -= 1;
}
return c.size();
}
// During learning, after you've learned for some period of time,
// periodically call this to clear out stale codebook entries
//
int foo = 0;
int clearStaleEntries(
// return number of entries cleared
CodeBook &c
// Codebook to clean up
){
int staleThresh = c.t>>1;
int *keep = new int[c.size()];
int keepCnt = 0;
// SEE WHICH CODEBOOK ENTRIES ARE TOO STALE
//
int foogo2 = 0;
for( int i=0; i<c.size(); i++ ){
if(c[i].stale > staleThresh)
keep[i] = 0; // Mark for destruction
else
{
keep[i] = 1; // Mark to keep
keepCnt += 1;
}
}
// move the entries we want to keep to the front of the vector and then
// truncate to the correct length once all of the good stuff is saved.
//
int k = 0;
int numCleared = 0;
for( int ii=0; ii<c.size(); ii++ ) {
if( keep[ii] ) {
c[k] = c[ii];
// We have to refresh these entries for next clearStale
c[k].t_last_update = 0;
k++;
} else {
numCleared++;
}
}
c.resize( keepCnt );
delete[] keep;
return numCleared;
}
// Given a pixel and a codebook, determine whether the pixel is
// covered by the codebook
//
// NOTES:
// minMod and maxMod must have length numChannels,
// e.g. 3 channels => minMod[3], maxMod[3]. There is one min and
// one max threshold per channel.
//
uchar backgroundDiff( // return 0 => background, 255 => foreground
const cv::Vec3b& p, // Pixel (YUV)
CodeBook& c, // Codebook
int numChannels, // Number of channels we are testing
int* minMod_, // Add this (possibly negative) number onto max level
// when determining whether new pixel is foreground
int* maxMod_ // Subtract this (possibly negative) number from min
// level when determining whether new pixel is
// foreground
) {
int matchChannel;
// SEE IF THIS FITS AN EXISTING CODEWORD
//
int i;
for( i=0; i<c.size(); i++ ) {
matchChannel = 0;
for( int n=0; n<numChannels; n++ ) {
if((c[i].min[n] - minMod_[n] <= p[n] ) && (p[n] <= c[i].max[n] + maxMod_[n]))
{
matchChannel++; // Found an entry for this channel
} else {
break;
}
}
if(matchChannel == numChannels) {
break; // Found an entry that matched all channels
}
}
if( i >= c.size() ) //No match with codebook => foreground
return 255;
return 0; //Else background
}
///////////////////////////////////////////////////////////////////////////////////////////////////
/////////////////// This part adds a "main" to run the above code. ////////////////////////////////
///////////////////////////////////////////////////////////////////////////////////////////////////
// Just make a convienience class (assumes image will not change size in video);
class CbBackgroudDiff {
public:
cv::Mat Iyuv; //Will hold the yuv converted image
cv::Mat mask; //Will hold the background difference mask
vector<CodeBook> codebooks; //Will hold a CodeBook for each pixel
int row, col, image_length; //How many pixels are in the image
//Constructor
void init(cv::Mat &I_from_video) {
vector<int> v(3,10);
set_global_vecs(cbBounds, v);
v[0] = 6; v[1] = 20; v[2] = 8; //Just some decent defaults for low side
set_global_vecs(minMod, v);
v[0] = 70; v[1] = 20; v[2] = 6; //Decent defaults for high side
set_global_vecs(maxMod, v);
Iyuv.create(I_from_video.size(), I_from_video.type());
mask.create(I_from_video.size(), CV_8UC1);
row = I_from_video.rows;
col = I_from_video.cols;
image_length = row*col;
cout << "(row,col,len) = (" << row << ", " << col << ", " << image_length << ")" << endl;
codebooks.resize(image_length);
}
CbBackgroudDiff(cv::Mat &I_from_video) {
init(I_from_video);
}
CbBackgroudDiff(){};
//Convert to YUV
void convert_to_yuv(cv::Mat &Irgb)
{
cvtColor(Irgb, Iyuv, cv::COLOR_BGR2YUV);
}
int size_check(cv::Mat &I) { //Check that image doesn't change size, return -1 if size doesn't match, else 0
int ret = 0;
if((row != I.rows) || (col != I.cols)) {
cerr << "ERROR: Size changed! old[" << row << ", " << col << "], now [" << I.rows << ", " << I.cols << "]!" << endl;
ret = -1;
}
return ret;
}
//Utilities for setting gloabals
void set_global_vecs(int *globalvec, vector<int> &vec) {
if(vec.size() != CHANNELS) {
cerr << "Input vec[" << vec.size() << "] should equal CHANNELS [" << CHANNELS << "]" << endl;
vec.resize(CHANNELS, 10);
}
int i = 0;
for (vector<int>::iterator it = vec.begin(); it != vec.end(); ++it, ++i) {
globalvec[i] = *it;
}
}
//Background operations
int updateCodebookBackground(cv::Mat &Irgb) { //Learn codebook, -1 if error, else total # of codes
convert_to_yuv(Irgb);
if(size_check(Irgb))
return -1;
int total_codebooks = 0;
cv::Mat_<cv::Vec3b>::iterator Iit = Iyuv.begin<cv::Vec3b>(), IitEnd = Iyuv.end<cv::Vec3b>();
vector<CodeBook>::iterator Cit = codebooks.begin(), CitEnd = codebooks.end();
for(; Iit != IitEnd; ++Iit,++Cit) {
total_codebooks += updateCodebook(*Iit,*Cit,cbBounds,CHANNELS);
}
if(Cit != CitEnd)
cerr << "ERROR: Cit != CitEnd in updateCodeBackground(...) " << endl;
return(total_codebooks);
}
int clearStaleEntriesBackground() { //Clean out stuff that hasn't been updated for a long time
int total_cleared = 0;
vector<CodeBook>::iterator Cit = codebooks.begin(), CitEnd = codebooks.end();
for(; Cit != CitEnd; ++Cit) {
total_cleared += clearStaleEntries(*Cit);
}
return(total_cleared);
}
int backgroundDiffBackground(cv::Mat &Irgb) { //Take the background difference of the image
convert_to_yuv(Irgb);
if(size_check(Irgb))
return -1;
cv::Mat_<cv::Vec3b>::iterator Iit = Iyuv.begin<cv::Vec3b>(), IitEnd = Iyuv.end<cv::Vec3b>();
vector<CodeBook>::iterator Cit = codebooks.begin(), CitEnd = codebooks.end();
cv::Mat_<uchar>::iterator Mit = mask.begin<uchar>(), MitEnd = mask.end<uchar>();
for(; Iit != IitEnd; ++Iit,++Cit,++Mit) {
*Mit = backgroundDiff(*Iit,*Cit,CHANNELS,minMod,maxMod);
}
if((Cit != CitEnd)||(Mit != MitEnd)){
cerr << "ERROR: Cit != CitEnd and, or Mit != MitEnd in updateCodeBackground(...) " << endl;
return -1;
}
return 0;
}
}; // end CbBackgroudDiff
void help(char** argv ) {
cout << "\n"
<< "Train a codebook background model on the first <#frames to train on> frames of an incoming video, then run the model\n"
<< argv[0] <<" <#frames to train on> <avi_path/filename>\n"
<< "For example:\n"
<< argv[0] << " 50 ../tree.avi\n"
<< "'A' or 'a' to adjust thresholds, esc, 'q' or 'Q' to quit"
<< endl;
}
//Adjusting the distance you have to be on the low side (minMod) or high side (maxMod) of a codebook
//to be considered as recognized/background
//
void adjustThresholds(char* charstr, cv::Mat &Irgb, CbBackgroudDiff &bgd) {
int key = 1;
int y = 1,u = 0,v = 0, index = 0, thresh = 0;
if(thresh)
cout << "yuv[" << y << "][" << u << "][" << v << "] maxMod active" << endl;
else
cout << "yuv[" << y << "][" << u << "][" << v << "] minMod active" << endl;
cout << "minMod[" << minMod[0] << "][" << minMod[1] << "][" << minMod[2] << "]" << endl;
cout << "maxMod[" << maxMod[0] << "][" << maxMod[1] << "][" << maxMod[2] << "]" << endl;
while((key = cv::waitKey()) != 27 && key != 'Q' && key != 'q') // Esc or Q or q to exit
{
if(thresh)
cout << "yuv[" << y << "][" << u << "][" << v << "] maxMod active" << endl;
else
cout << "yuv[" << y << "][" << u << "][" << v << "] minMod active" << endl;
cout << "minMod[" << minMod[0] << "][" << minMod[1] << "][" << minMod[2] << "]" << endl;
cout << "maxMod[" << maxMod[0] << "][" << maxMod[1] << "][" << maxMod[2] << "]" << endl;
if(key == 'y') { y = 1; u = 0; v = 0; index = 0;}
if(key == 'u') { y = 0; u = 1; v = 0; index = 1;}
if(key == 'v') { y = 0; u = 0; v = 1; index = 2;}
if(key == 'l') { thresh = 0;} //minMod
if(key == 'h') { thresh = 1;} //maxMod
if(key == '.') { //Up
if(thresh == 0) { minMod[index] += 4;}
if(thresh == 1) { maxMod[index] += 4;}
}
if(key == ',') { //Down
if(thresh == 0) { minMod[index] -= 4;}
if(thresh == 1) { maxMod[index] -= 4;}
}
cout << "y,u,v for channel; l for minMod, h for maxMod threshold; , for down, . for up; esq or q to quit;" << endl;
bgd.backgroundDiffBackground(Irgb);
cv::imshow(charstr, bgd.mask);
}
}
////////////////////////////////////////////////////////////////
int main( int argc, char** argv) {
cv::namedWindow( argv[0], cv::WINDOW_AUTOSIZE );
cv::VideoCapture cap;
if((argc < 3)|| !cap.open(argv[2])) {
cerr << "Couldn't run the program" << endl;
help(argv);
cap.open(0);
return -1;
}
int number_to_train_on = atoi( argv[1] );
cv::Mat image;
CbBackgroudDiff bgd;
// FIRST PROCESSING LOOP (TRAINING):
//
int frame_count = 0;
int key;
bool first_frame = true;
cout << "Total frames to train on = " << number_to_train_on << endl; //db
char seg[] = "Segmentation";
while(1) {
cout << "frame#: " << frame_count;
cap >> image;
if( !image.data ) exit(1); // Something went wrong, abort
if(frame_count == 0) { bgd.init(image);}
cout << ", Codebooks: " << bgd.updateCodebookBackground(image) << endl;
cv::imshow( argv[0], image );
frame_count++;
if( (key = cv::waitKey(7)) == 27 || key == 'q' || key == 'Q' || frame_count >= number_to_train_on) break; //Allow early exit on space, esc, q
}
// We have accumulated our training, now create the models
//
cout << "Created the background model" << endl;
cout << "Total entries cleared = " << bgd.clearStaleEntriesBackground() << endl;
cout << "Press a key to start background differencing, 'a' to set thresholds, esc or q or Q to quit" << endl;
// SECOND PROCESSING LOOP (TESTING):
//
cv::namedWindow( seg, cv::WINDOW_AUTOSIZE );
while((key = cv::waitKey()) != 27 || key == 'q' || key == 'Q' ) { // esc, 'q' or 'Q' to exit
cap >> image;
if( !image.data ) exit(0);
cout << frame_count++ << " 'a' to adjust threholds" << endl;
if(key == 'a') {
cout << "Adjusting thresholds" << endl;
cout << "y,u,v for channel; l for minMod, h for maxMod threshold; , for down, . for up; esq or q to quit;" << endl;
adjustThresholds(seg,image,bgd);
}
else {
if(bgd.backgroundDiffBackground(image)) {
cerr << "ERROR, bdg.backgroundDiffBackground(...) failed" << endl;
exit(-1);
}
}
cv::imshow("Segmentation",bgd.mask);
}
exit(0);
}