-
Notifications
You must be signed in to change notification settings - Fork 9
/
me.c
420 lines (348 loc) · 11.2 KB
/
me.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
/*************************************************************
Copyright (C) 1990, 1991, 1993 Andy C. Hung, all rights reserved.
PUBLIC DOMAIN LICENSE: Stanford University Portable Video Research
Group. If you use this software, you agree to the following: This
program package is purely experimental, and is licensed "as is".
Permission is granted to use, modify, and distribute this program
without charge for any purpose, provided this license/ disclaimer
notice appears in the copies. No warranty or maintenance is given,
either expressed or implied. In no event shall the author(s) be
liable to you or a third party for any special, incidental,
consequential, or other damages, arising out of the use or inability
to use the program for any purpose (or the loss of data), even if we
have been advised of such possibilities. Any public reference or
advertisement of this source code should refer to it as the Portable
Video Research Group (PVRG) code, and not by any author(s) (or
Stanford University) name.
*************************************************************/
/*
************************************************************
me.c
This file does much of the motion estimation and compensation.
************************************************************
*/
/*LABEL me.c */
#include "globals.h"
#ifdef __SSE2__
#include "emmintrin.h"
#endif
/*PUBLIC*/
extern void initmc();
extern void FastBME();
extern void BruteMotionEstimation();
extern void BMC();
extern MEM *MotionCompensation();
/*PRIVATE*/
static int VAR;
static int VAROR;
static int MWOR;
int MeVAR[1024];
int MeVAROR[1024];
int MeMWOR[1024];
int MX;
int MY;
int MV;
int OMV;
int MeX[1024];
int MeY[1024];
int MeVal[1024];
int MeOVal[1024];
int MeN=0;
int SearchLimit = 15;
BLOCK nb,rb;
#define COMPARISON >= /* This is to compare for short-circuit exit */
/*START*/
/*BFUNC
initmc() initializes the block structures used for copying areas
of memory.
EFUNC*/
void initmc()
{
BEGIN("initmc");
nb = MakeBlock();
rb = MakeBlock();
}
/*BFUNC
ComputeError() gives an error score regarding how well a 16x16 block matches
a target. Computation stops once the error reaches or surpasses the
best known solution.
EFUNC*/
__inline int ComputeError(unsigned char *bptr, unsigned char *cptr, MEM *rm, MEM *cm) {
int i,residue,error;
#ifdef ARMSIMD32
int32_t a,b;
#elif __SSE2__
__m128i a,b;
#endif
error = 0;
for(i=0;i<16;i++) {
#ifdef ARMSIMD32
a = *((int32_t*)bptr);
b = *((int32_t*)cptr);
asm volatile ("usad8 %[result], %[op1], %[op2]" : [result] "=r" (a): [op1] "0" (a), [op2] "r" (b));
error += a;
a = *((int32_t*)(bptr + 4));
b = *((int32_t*)(cptr + 4));
asm volatile ("usad8 %[result], %[op1], %[op2]" : [result] "=r" (a): [op1] "0" (a), [op2] "r" (b));
error += a;
a = *((int32_t*)(bptr + 8));
b = *((int32_t*)(cptr + 8));
asm volatile ("usad8 %[result], %[op1], %[op2]" : [result] "=r" (a): [op1] "0" (a), [op2] "r" (b));
error += a;
a = *((int32_t*)(bptr + 12));
b = *((int32_t*)(cptr + 12));
asm volatile ("usad8 %[result], %[op1], %[op2]" : [result] "=r" (a): [op1] "0" (a), [op2] "r" (b));
error += a;
if (error COMPARISON MV) break;
bptr += rm->width;
cptr += cm->width;
#elif __SSE2__
a = _mm_loadu_si128((__m128i *) bptr);
b = _mm_loadu_si128((__m128i *) cptr);
a = _mm_sad_epu8(a, b);
b = _mm_srli_si128(a, 8);
a = _mm_add_epi32(a, b);
error += _mm_cvtsi128_si32(a);
// break if the error exceeds the best known solution
if (error COMPARISON MV) break;
bptr += rm->width;
cptr += cm->width;
#else
residue=(*(bptr++)-*(cptr++));
if (residue<0) {error-=residue;} else {error+=residue;}
residue=(*(bptr++)-*(cptr++));
if (residue<0) {error-=residue;} else {error+=residue;}
residue=(*(bptr++)-*(cptr++));
if (residue<0) {error-=residue;} else {error+=residue;}
residue=(*(bptr++)-*(cptr++));
if (residue<0) {error-=residue;} else {error+=residue;}
residue=(*(bptr++)-*(cptr++));
if (residue<0) {error-=residue;} else {error+=residue;}
residue=(*(bptr++)-*(cptr++));
if (residue<0) {error-=residue;} else {error+=residue;}
residue=(*(bptr++)-*(cptr++));
if (residue<0) {error-=residue;} else {error+=residue;}
residue=(*(bptr++)-*(cptr++));
if (residue<0) {error-=residue;} else {error+=residue;}
residue=(*(bptr++)-*(cptr++));
if (residue<0) {error-=residue;} else {error+=residue;}
residue=(*(bptr++)-*(cptr++));
if (residue<0) {error-=residue;} else {error+=residue;}
residue=(*(bptr++)-*(cptr++));
if (residue<0) {error-=residue;} else {error+=residue;}
residue=(*(bptr++)-*(cptr++));
if (residue<0) {error-=residue;} else {error+=residue;}
residue=(*(bptr++)-*(cptr++));
if (residue<0) {error-=residue;} else {error+=residue;}
residue=(*(bptr++)-*(cptr++));
if (residue<0) {error-=residue;} else {error+=residue;}
residue=(*(bptr++)-*(cptr++));
if (residue<0) {error-=residue;} else {error+=residue;}
residue=(*(bptr++)-*(cptr++));
if (residue<0) {error-=residue;} else {error+=residue;}
// break if the error exceeds the best known solution
if (error COMPARISON MV) break;
bptr += (rm->width - 16);
cptr += (cm->width - 16);
#endif
}
return error;
}
/*BFUNC
FastBME() does a fast brute-force motion estimation with two indexes
into two memory structures. The motion estimation has a short-circuit
abort to speed up calculation.
EFUNC*/
void FastBME(int rx, int ry, MEM *rm, int cx, int cy, MEM *cm) {
BEGIN("FastBME");
int px, py, dx, dy, i, j, data, error;
unsigned char *baseptr, *bptr, *cptr;
// set up to check (0,0) vector
MX = MY = MV = 0;
bptr = rm->data + rx + (ry * rm->width);
baseptr = cm->data + cx + (cy * cm->width);
cptr = baseptr;
// make sure we don't short-circuit when computing the error score
MV = 65536;
// determine error for (0,0)
OMV = MV = ComputeError(bptr, cptr, rm, cm);
// do an exhaustive search
for (dx = -SearchLimit / 2; dx < SearchLimit / 2; ++dx) {
px = rx + dx;
for (dy = -SearchLimit / 2; dy < SearchLimit / 2; ++dy) {
py = ry + dy;
// only test vector if we're within frame boundaries
if ((px >= 0) && (px < rm->width - 16) &&
(py >= 0) && (py < rm->height - 16)) {
error = 0;
bptr = rm->data + px + (py * rm->width);
cptr = baseptr;
error = ComputeError(bptr, cptr, rm, cm);
if (error < MV) {
MV = error;
MX = dx;
MY = dy;
}
}
}
}
// gather statistics for search result, used in decision-making
bptr = rm->data + (MX + rx) + ((MY + ry) * rm->width);
cptr = baseptr;
for (VAR = 0, VAROR = 0, MWOR = 0, i = 0; i < 16; i++) {
for (j = 0; j < 16; j++) {
data = *(bptr) - *(cptr);
VAR += data*data;
VAROR += *(bptr)*(*(bptr));
MWOR += *(bptr);
bptr++;
cptr++;
}
bptr += (rm->width - 16);
cptr += (cm->width - 16);
}
VAR = VAR / 256;
VAROR = (VAROR / 256)-(MWOR / 256)*(MWOR / 256);
}
/*BFUNC
StepBME() implements the Three Step Search (TSS) motion search algorithm
proposed by Koga et al. 1981
EFUNC*/
void StepBME(int rx, int ry, MEM *rm, int cx, int cy, MEM *cm) {
BEGIN("StepBME");
int px, py, dx, dy, i, j, data, error, step, dirx, diry, bestx, besty;
unsigned char *baseptr, *bptr, *cptr;
// set up to check (0,0) vector
MX = MY = MV = 0;
bptr = rm->data + rx + (ry * rm->width);
baseptr = cm->data + cx + (cy * cm->width);
cptr = baseptr;
// make sure we don't short-circuit when computing the error score
MV = 65536;
// determine error for (0,0)
OMV = MV = ComputeError(bptr, cptr, rm, cm);
// initialize step search at (0,0)
bestx = rx;
besty = ry;
for (step = 8; step >= 1; step /= 2) {
for (diry = -1; diry <= 1; ++diry) {
py = besty + (diry * step);
dy = py - ry;
for (dirx = -1; dirx <= 1; ++dirx) {
if(dirx == 0 && diry == 0) {
// dont' test center again, we already did that
continue;
}
px = bestx + (dirx * step);
dx = px - rx;
// only test vector if we're within frame boundaries
// and the vector components are within -15..15
if ((px >= 0) && (px < rm->width - 16) &&
(py >= 0) && (py < rm->height - 16) &&
dx >= -15 && dx <= 15 && dy >= -15 && dy <= 15) {
bptr = rm->data + px + (py * rm->width);
cptr = baseptr;
error = ComputeError(bptr, cptr, rm, cm);
if (error < MV) {
MV = error;
MX = dx;
MY = dy;
}
}
}
}
// center search for next iteration on best candidate so far
bestx = rx + MX;
besty = ry + MY;
}
// gather statistics for search result, used in decision-making
bptr = rm->data + (MX + rx) + ((MY + ry) * rm->width);
cptr = baseptr;
for (VAR = 0, VAROR = 0, MWOR = 0, i = 0; i < 16; i++) {
for (j = 0; j < 16; j++) {
data = *(bptr) - *(cptr);
VAR += data*data;
VAROR += *(bptr)*(*(bptr));
MWOR += *(bptr);
bptr++;
cptr++;
}
bptr += (rm->width - 16);
cptr += (cm->width - 16);
}
VAR = VAR / 256;
VAROR = (VAROR / 256)-(MWOR / 256)*(MWOR / 256);
}
/*BFUNC
MotionEstimation() does a motion estimation on all
aligned 16x16 blocks in two memory structures.
EFUNC*/
void MotionEstimation(pmem,fmem)
MEM *pmem;
MEM *fmem;
{
BEGIN("MotionEstimation");
int x,y;
for(MeN=0,y=0;y<fmem->height;y+=16)
{
for(x=0;x<fmem->width;x+=16)
{
//FastBME(x,y,pmem,x,y,fmem);
StepBME(x,y,pmem,x,y,fmem);
MeVAR[MeN] = VAR;
MeVAROR[MeN] = VAROR;
MeMWOR[MeN] = MWOR;
MeX[MeN] = MX;
MeY[MeN] = MY;
MeVal[MeN] = MV;
MeOVal[MeN] = OMV;
MeN++;
}
}
}
/*BFUNC
BMC() does a motion compensated copy from one memory structure to
another memory structure with appropriate indexes.
EFUNC*/
void BMC(rx,ry,rm,cx,cy,cm)
int rx;
int ry;
MEM *rm;
int cx;
int cy;
MEM *cm;
{
BEGIN("BMC");
SetPointerBlock(rx,ry,cm,nb);
SetPointerBlock(cx+MX,cy+MY,rm,rb);
CopyBlock(rb,nb);
}
/*BFUNC
MotionCompensation() does a full motion compensation of all the blocks
based on the motion vectors created by BruteMotionEstimation. Not
actually used in the program. If (omem) is null, it creates a new
memory structure.
EFUNC*/
MEM *MotionCompensation(mem,omem)
MEM *mem;
MEM *omem;
{
BEGIN("MotionCompensation");
int x,y;
int count;
MEM *temp;
if (omem) {temp=omem;}
else {temp = MakeMem(mem->width,mem->height);}
for(count=0,y=0;y<mem->height;y+=16)
{
for(x=0;x<mem->width;x+=16)
{
SetPointerBlock(x,y,temp,nb);
SetPointerBlock(x+MeX[count],y+MeY[count],mem,rb);
count++;
CopyBlock(rb,nb);
}
}
return(temp);
}
/*END*/