Skip to content

Commit ca4f5af

Browse files
committed
Add speed_skin_detection_sse.cpp
1 parent 017a980 commit ca4f5af

File tree

1 file changed

+91
-13
lines changed

1 file changed

+91
-13
lines changed

speed_skin_detection_sse.cpp

Lines changed: 91 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,30 +1,49 @@
1+
#include "stdafx.h"
12
#include <stdio.h>
23
#include <opencv2/opencv.hpp>
3-
4+
#include <future>
45
using namespace std;
56
using namespace cv;
67

78
#define IM_Max(a, b) (((a) >= (b)) ? (a): (b))
89
#define IM_Min(a, b) (((a) >= (b)) ? (b): (a))
910
#define _mm_cmpge_epu8(a, b) _mm_cmpeq_epi8(_mm_max_epu8(a, b), a)
1011

11-
void IM_GetRoughSkinRegion(unsigned char *Src, unsigned char *Skin, int Width, int Height, int Stride){
12+
void IM_GetRoughSkinRegion(unsigned char *Src, unsigned char *Skin, int Width, int Height, int Stride) {
1213
for (int Y = 0; Y < Height; Y++)
1314
{
14-
unsigned char *LinePS = Src + Y * Stride;
15-
unsigned char *LinePD = Skin + Y * Width;
15+
unsigned char *LinePS = Src + Y * Stride;
16+
unsigned char *LinePD = Skin + Y * Width;
1617
for (int X = 0; X < Width; X++)
1718
{
1819
int Blue = LinePS[0], Green = LinePS[1], Red = LinePS[2];
1920
if (Red >= 60 && Green >= 40 && Blue >= 20 && Red >= Blue && (Red - Green) >= 10 && IM_Max(IM_Max(Red, Green), Blue) - IM_Min(IM_Min(Red, Green), Blue) >= 10)
20-
LinePD[X] = 255;
21+
LinePD[X] = 255;
2122
else
2223
LinePD[X] = 16;
23-
LinePS += 3;
24+
LinePS += 3;
2425
}
2526
}
2627
}
2728

29+
void IM_GetRoughSkinRegion_OpenMP(unsigned char *Src, unsigned char *Skin, int Width, int Height, int Stride) {
30+
for (int Y = 0; Y < Height; Y++)
31+
{
32+
unsigned char *LinePS = Src + Y * Stride;
33+
unsigned char *LinePD = Skin + Y * Width;
34+
#pragma omp parallel for num_threads(4)
35+
for (int X = 0; X < Width; X++)
36+
{
37+
int Blue = LinePS[X*3 + 0], Green = LinePS[X*3 + 1], Red = LinePS[X*3 + 2];
38+
if (Red >= 60 && Green >= 40 && Blue >= 20 && Red >= Blue && (Red - Green) >= 10 && IM_Max(IM_Max(Red, Green), Blue) - IM_Min(IM_Min(Red, Green), Blue) >= 10)
39+
LinePD[X] = 255;
40+
else
41+
LinePD[X] = 16;
42+
}
43+
}
44+
}
45+
46+
2847
void IM_GetRoughSkinRegion_SSE(unsigned char *Src, unsigned char *Skin, int Width, int Height, int Stride) {
2948
const int NonSkinLevel = 10; //非肤色部分的处理程序,本例取16,最大值取100,那样就是所有区域都为肤色,毫无意义
3049
const int BlockSize = 16;
@@ -58,7 +77,7 @@ void IM_GetRoughSkinRegion_SSE(unsigned char *Src, unsigned char *Skin, int Widt
5877
Result = _mm_and_si128(Result, _mm_cmpge_epu8(Red, Blue)); //Red >= Blue
5978
Result = _mm_and_si128(Result, _mm_cmpge_epu8(_mm_subs_epu8(Red, Green), _mm_set1_epi8(10))); //(Red - Green) >= 10
6079
Result = _mm_and_si128(Result, _mm_cmpge_epu8(_mm_subs_epu8(Max, Min), _mm_set1_epi8(10))); //IM_Max(IM_Max(Red, Green), Blue) - IM_Min(IM_Min(Red, Green), Blue) >= 10
61-
Result = _mm_or_si128(Result, _mm_set1_epi8(16));
80+
Result = _mm_or_si128(Result, _mm_set1_epi8(16));
6281
_mm_storeu_si128((__m128i*)(LinePD + 0), Result);
6382
}
6483
for (int X = Block * BlockSize; X < Width; X++, LinePS += 3, LinePD++)
@@ -72,6 +91,66 @@ void IM_GetRoughSkinRegion_SSE(unsigned char *Src, unsigned char *Skin, int Widt
7291
}
7392
}
7493

94+
void _IM_GetRoughSkinRegion(unsigned char* Src, const int32_t Width, const int32_t start_row, const int32_t thread_stride, const int32_t Stride, unsigned char* Dest) {
95+
const int NonSkinLevel = 10; //非肤色部分的处理程序,本例取16,最大值取100,那样就是所有区域都为肤色,毫无意义
96+
const int BlockSize = 16;
97+
int Block = Width / BlockSize;
98+
for (int Y = start_row; Y < start_row + thread_stride; Y++) {
99+
unsigned char *LinePS = Src + Y * Stride;
100+
unsigned char *LinePD = Dest + Y * Width;
101+
for (int X = 0; X < Block * BlockSize; X += BlockSize, LinePS += BlockSize * 3, LinePD += BlockSize) {
102+
__m128i Src1, Src2, Src3, Blue, Green, Red, Result, Max, Min, AbsDiff;
103+
Src1 = _mm_loadu_si128((__m128i *)(LinePS + 0));
104+
Src2 = _mm_loadu_si128((__m128i *)(LinePS + 16));
105+
Src3 = _mm_loadu_si128((__m128i *)(LinePS + 32));
106+
107+
Blue = _mm_shuffle_epi8(Src1, _mm_setr_epi8(0, 3, 6, 9, 12, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1));
108+
Blue = _mm_or_si128(Blue, _mm_shuffle_epi8(Src2, _mm_setr_epi8(-1, -1, -1, -1, -1, -1, 2, 5, 8, 11, 14, -1, -1, -1, -1, -1)));
109+
Blue = _mm_or_si128(Blue, _mm_shuffle_epi8(Src3, _mm_setr_epi8(-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 1, 4, 7, 10, 13)));
110+
111+
Green = _mm_shuffle_epi8(Src1, _mm_setr_epi8(1, 4, 7, 10, 13, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1));
112+
Green = _mm_or_si128(Green, _mm_shuffle_epi8(Src2, _mm_setr_epi8(-1, -1, -1, -1, -1, 0, 3, 6, 9, 12, 15, -1, -1, -1, -1, -1)));
113+
Green = _mm_or_si128(Green, _mm_shuffle_epi8(Src3, _mm_setr_epi8(-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 2, 5, 8, 11, 14)));
114+
115+
Red = _mm_shuffle_epi8(Src1, _mm_setr_epi8(2, 5, 8, 11, 14, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1));
116+
Red = _mm_or_si128(Red, _mm_shuffle_epi8(Src2, _mm_setr_epi8(-1, -1, -1, -1, -1, 1, 4, 7, 10, 13, -1, -1, -1, -1, -1, -1)));
117+
Red = _mm_or_si128(Red, _mm_shuffle_epi8(Src3, _mm_setr_epi8(-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0, 3, 6, 9, 12, 15)));
118+
119+
Max = _mm_max_epu8(_mm_max_epu8(Blue, Green), Red); //IM_Max(IM_Max(Red, Green), Blue)
120+
Min = _mm_min_epu8(_mm_min_epu8(Blue, Green), Red); //IM_Min(IM_Min(Red, Green), Blue)
121+
Result = _mm_cmpge_epu8(Blue, _mm_set1_epi8(20)); //Blue >= 20
122+
Result = _mm_and_si128(Result, _mm_cmpge_epu8(Green, _mm_set1_epi8(40))); //Green >= 40
123+
Result = _mm_and_si128(Result, _mm_cmpge_epu8(Red, _mm_set1_epi8(60))); //Red >= 60
124+
Result = _mm_and_si128(Result, _mm_cmpge_epu8(Red, Blue)); //Red >= Blue
125+
Result = _mm_and_si128(Result, _mm_cmpge_epu8(_mm_subs_epu8(Red, Green), _mm_set1_epi8(10))); //(Red - Green) >= 10
126+
Result = _mm_and_si128(Result, _mm_cmpge_epu8(_mm_subs_epu8(Max, Min), _mm_set1_epi8(10))); //IM_Max(IM_Max(Red, Green), Blue) - IM_Min(IM_Min(Red, Green), Blue) >= 10
127+
Result = _mm_or_si128(Result, _mm_set1_epi8(16));
128+
_mm_storeu_si128((__m128i*)(LinePD + 0), Result);
129+
}
130+
for (int X = Block * BlockSize; X < Width; X++, LinePS += 3, LinePD++)
131+
{
132+
int Blue = LinePS[0], Green = LinePS[1], Red = LinePS[2];
133+
if (Red >= 60 && Green >= 40 && Blue >= 20 && Red >= Blue && (Red - Green) >= 10 && IM_Max(IM_Max(Red, Green), Blue) - IM_Min(IM_Min(Red, Green), Blue) >= 10)
134+
LinePD[0] = 255; // 全为肤色部分
135+
else
136+
LinePD[0] = 16;
137+
}
138+
}
139+
}
140+
141+
void IM_GetRoughSkinRegion_SSE2(unsigned char *Src, unsigned char *Skin, int width, int height, int stride) {
142+
const int32_t hw_concur = std::min(height >> 4, static_cast<int32_t>(std::thread::hardware_concurrency()));
143+
std::vector<std::future<void>> fut(hw_concur);
144+
const int thread_stride = (height - 1) / hw_concur + 1;
145+
int i = 0, start = 0;
146+
for (; i < std::min(height, hw_concur); i++, start += thread_stride)
147+
{
148+
fut[i] = std::async(std::launch::async, _IM_GetRoughSkinRegion, Src, width, start, thread_stride, stride, Skin);
149+
}
150+
for (int j = 0; j < i; ++j)
151+
fut[j].wait();
152+
}
153+
75154
void IM_GrayToRGB(unsigned char *Gray, unsigned char *RGB, int Width, int Height, int Stride)
76155
{
77156
for (int Y = 0; Y < Height; Y++)
@@ -98,18 +177,17 @@ int main() {
98177
int Radius = 11;
99178
int Adjustment = 50;
100179
int64 st = cvGetTickCount();
101-
for (int i = 0; i <50; i++) {
102-
IM_GetRoughSkinRegion_SSE(Src, Skin, Width, Height, Stride);
103-
IM_GrayToRGB(Skin, Dest, Width, Height, Stride);
180+
for (int i = 0; i <1000; i++) {
181+
IM_GetRoughSkinRegion_SSE2(Src, Skin, Width, Height, Stride);
182+
//IM_GrayToRGB(Skin, Dest, Width, Height, Stride);
104183
}
105-
double duration = (cv::getTickCount() - st) / cv::getTickFrequency() * 20;
184+
double duration = (cv::getTickCount() - st) / cv::getTickFrequency();
106185
printf("%.5f\n", duration);
107-
IM_GetRoughSkinRegion_SSE(Src, Skin, Width, Height, Stride);
186+
IM_GetRoughSkinRegion_SSE2(Src, Skin, Width, Height, Stride);
108187
IM_GrayToRGB(Skin, Dest, Width, Height, Stride);
109188
Mat dst(Height, Width, CV_8UC3, Dest);
110189
imshow("origin", src);
111190
imshow("result", dst);
112191
imwrite("F:\\res.jpg", dst);
113192
waitKey(0);
114-
waitKey(0);
115193
}

0 commit comments

Comments
 (0)