From 7875ca6fd4c7962c7a0e7e40f891ab46fb79ee98 Mon Sep 17 00:00:00 2001 From: ReneThrane Date: Tue, 27 Feb 2024 16:10:26 +0100 Subject: [PATCH] Release 6.2.4 - Updated zlib version - OpenCL SoftISP Fix denoise kernel issue Change-Id: I2b47505100e62a8a0672f1a8b6f56021fce3bd0b --- DemoApps/OpenCL/SoftISP/Content/isp_kernel.cl | 1498 +++++++++-------- DemoApps/OpenCL/SoftISP/Fsl.gen | 2 - .../OpenCL/SoftISP/source/OptionParser.cpp | 8 +- .../OpenCL/SoftISP/source/OptionParser.hpp | 10 +- DemoApps/OpenCL/SoftISP/source/SoftISP.cpp | 261 +-- DemoApps/OpenCL/SoftISP/source/SoftISP.hpp | 22 +- Project.gen | 2 +- README.md | 2 +- SCR-gtec-demo-framework.txt | 4 +- ThirdParty/Recipe/zlib_1_3/Fsl.gen | 18 +- 10 files changed, 932 insertions(+), 895 deletions(-) diff --git a/DemoApps/OpenCL/SoftISP/Content/isp_kernel.cl b/DemoApps/OpenCL/SoftISP/Content/isp_kernel.cl index edf660d3e..fc97c4f4e 100644 --- a/DemoApps/OpenCL/SoftISP/Content/isp_kernel.cl +++ b/DemoApps/OpenCL/SoftISP/Content/isp_kernel.cl @@ -1,803 +1,833 @@ /**************************************************************************************************************************************************** -* Copyright 2017 NXP -* All rights reserved. -* -* Redistribution and use in source and binary forms, with or without -* modification, are permitted provided that the following conditions are met: -* -* * Redistributions of source code must retain the above copyright notice, -* this list of conditions and the following disclaimer. -* -* * Redistributions in binary form must reproduce the above copyright notice, -* this list of conditions and the following disclaimer in the documentation -* and/or other materials provided with the distribution. -* -* * Neither the name of the NXP. nor the names of -* its contributors may be used to endorse or promote products derived from -* this software without specific prior written permission. -* -* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND -* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED -* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. -* IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, -* INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, -* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE -* OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF -* ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -* -****************************************************************************************************************************************************/ + * Copyright 2017 NXP + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * * Neither the name of the NXP. nor the names of + * its contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, + * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE + * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + ****************************************************************************************************************************************************/ #define src_step 1920 #define dst_step 1920 #define dst_step_dsp 1920 #define WIDTH 1920 +#define HEIGHT 1080 -#define R_num (1920*1080/4) -#define G_num (1920*1080/2) -#define B_num (1920*1080/4) - -__kernel void badpixel( __global uchar *src, __global uchar *dst) -{ - int i = get_global_id(0); - int j = get_global_id(1); - - int img_arrayA[4],img_arrayB[4],img_arrayGA[4],img_arrayGB[4]; - uchar8 lineA,lineB,lineC,lineD,lineE,lineF; - lineA = vload8(0, src + mad24(j*2-2, src_step, i*4-2)); - lineB = vload8(0, src + mad24(j*2-1, src_step, i*4-2)); - lineC = vload8(0, src + mad24(j*2, src_step, i*4-2)); - lineD = vload8(0, src + mad24(j*2+1, src_step, i*4-2)); - lineE = vload8(0, src + mad24(j*2+2, src_step, i*4-2)); - lineF = vload8(0, src + mad24(j*2+3, src_step, i*4-2)); - - uchar4 pixel_center0 = (uchar4)(lineC.s2, lineC.s3, lineC.s4, lineC.s5); - uchar4 pixel_center1 = (uchar4)(lineD.s2, lineD.s3, lineD.s4, lineD.s5); - - int temp; - - //B - img_arrayA[0] = (int)lineA.s2; - img_arrayA[1] = (int)lineC.s0; - img_arrayA[2] = (int)lineC.s4; - img_arrayA[3] = (int)lineE.s2; - - #pragma unroll - for (int pass = 1; pass < 4; pass++) - { - for (int k = 0; k< 4 - pass; k++) - { - if (img_arrayA[k] > img_arrayA[k + 1]) - { - temp = img_arrayA[k]; - img_arrayA[k] = img_arrayA[k + 1]; - img_arrayA[k + 1] = temp; - } - } - } - int AVE = (img_arrayA[1] + img_arrayA[2])>>1; - int ABS = img_arrayA[3] - img_arrayA[0]; - int up_value = AVE + ABS; - int down_value = AVE - ABS; - - if (pixel_center0.s0 > up_value || pixel_center0.s0 < down_value) - pixel_center0.s0 = convert_uchar(AVE); - - // G - img_arrayGA[0] = lineB.s2; - img_arrayGA[1] = lineB.s4; - img_arrayGA[2] = lineD.s2; - img_arrayGA[3] = lineD.s4; - - #pragma unroll - for (int pass = 1; pass < 4; pass++) - { - for (int k = 0; k< 4 - pass; k++) - if (img_arrayGA[k] > img_arrayGA[k + 1]) - { - temp = img_arrayGA[k]; - img_arrayGA[k] = img_arrayGA[k + 1]; - img_arrayGA[k + 1] = temp; - } - } - - AVE = (img_arrayGA[1] + img_arrayGA[2])>>1; - ABS = img_arrayGA[3] - img_arrayGA[0]; - up_value = AVE + ABS; - down_value = abs(AVE - ABS); - - if (pixel_center0.s1 > up_value || pixel_center0.s1 < down_value) - pixel_center0.s1 = convert_uchar(AVE); - - img_arrayB[0] = (int)lineA.s4; - img_arrayB[1] = (int)lineC.s2; - img_arrayB[2] = (int)lineC.s6; - img_arrayB[3] = (int)lineE.s4; - - #pragma unroll - for (int pass = 1; pass < 4; pass++) - { - for (int k = 0; k< 4 - pass; k++) - { - if (img_arrayB[k] > img_arrayB[k + 1]) - { - temp = img_arrayB[k]; - img_arrayB[k] = img_arrayB[k + 1]; - img_arrayB[k + 1] = temp; - } - } - - } - AVE = (img_arrayB[2] + img_arrayB[1])>>1; - ABS = img_arrayB[3] - img_arrayB[0]; - up_value = AVE + ABS; - down_value = abs(AVE - ABS); - - if (pixel_center0.s2 > up_value || pixel_center0.s2 < down_value) - pixel_center0.s2 = convert_uchar(AVE); - - - img_arrayGB[0] = (int)lineB.s4; - img_arrayGB[1] = (int)lineB.s6; - img_arrayGB[2] = (int)lineD.s4; - img_arrayGB[3] = (int)lineD.s6; - #pragma unroll - for (int pass = 1; pass < 4; pass++) - { - for (int k = 0; k< 4 - pass; k++) - if (img_arrayGA[k] > img_arrayGB[k + 1]) - { - temp = img_arrayGB[k]; - img_arrayGB[k] = img_arrayGB[k + 1]; - img_arrayGB[k + 1] = temp; - } - } - AVE = (img_arrayGB[1] + img_arrayGB[2])>>1; - ABS = img_arrayGB[3] - img_arrayGB[0]; - up_value = AVE + ABS; - down_value = abs(AVE - ABS); - - if (pixel_center0.s3 > up_value || pixel_center0.s3 < down_value) - pixel_center0.s3 = convert_uchar(AVE); - - vstore4(pixel_center0, 0 ,dst+ mad24(j*2, dst_step, i<<2 )); - - img_arrayGA[0] = (int)lineC.s1; - img_arrayGA[1] = (int)lineC.s3; - img_arrayGA[2] = (int)lineE.s1; - img_arrayGA[3] = (int)lineE.s3; - #pragma unroll - for (int pass = 1; pass < 4; pass++) - { - for (int k = 0; k< 4 - pass; k++) - if (img_arrayGA[k] > img_arrayGA[k + 1]) - { - temp = img_arrayGA[k]; - img_arrayGA[k] = img_arrayGA[k + 1]; - img_arrayGA[k + 1] = temp; - } - } - AVE = (img_arrayGA[1] + img_arrayGA[2])>>1; - ABS = img_arrayGA[3] - img_arrayGA[0]; - up_value = AVE + ABS; - down_value = abs(AVE - ABS); - - if (pixel_center1.s0 > up_value || pixel_center1.s0 < down_value) - pixel_center1.s0 = convert_uchar(AVE); - - img_arrayA[0] = (int)lineB.s3; - img_arrayA[1] = (int)lineD.s1; - img_arrayA[2] = (int)lineD.s5; - img_arrayA[3] = (int)lineF.s3; - #pragma unroll - for (int pass = 1; pass < 4; pass++) - { - for (int k = 0; k< 4 - pass; k++) - { - if (img_arrayA[k] > img_arrayA[k + 1]) - { - temp = img_arrayA[k]; - img_arrayA[k] = img_arrayA[k + 1]; - img_arrayA[k + 1] = temp; - } - } - } - AVE = (img_arrayA[1] + img_arrayA[2] )>>1; - ABS = img_arrayA[3] - img_arrayA[0]; - up_value = AVE + ABS; - down_value = abs(AVE - ABS); - - if (pixel_center1.s1 > up_value || pixel_center1.s1 < down_value) - pixel_center1.s1 = convert_uchar(AVE); - - img_arrayGB[0] = (int)lineC.s3; - img_arrayGB[1] = (int)lineC.s5; - img_arrayGB[2] = (int)lineE.s3; - img_arrayGB[3] = (int)lineE.s5; - #pragma unroll - for (int pass = 1; pass < 4; pass++) - { - for (int k = 0; k< 4 - pass; k++) - if (img_arrayGB[k] > img_arrayGB[k + 1]) - { - temp = img_arrayGB[k]; - img_arrayGB[k] = img_arrayGB[k + 1]; - img_arrayGB[k + 1] = temp; - } - } - AVE = (img_arrayGB[1] + img_arrayGB[2])>>1; - ABS = img_arrayGB[3] - img_arrayGB[0]; - up_value = AVE + ABS; - down_value = abs(AVE - ABS); - - if (pixel_center1.s2 > up_value || pixel_center1.s2 < down_value) - pixel_center1.s2 = convert_uchar(AVE); - - img_arrayB[0] = (int)lineB.s5; - img_arrayB[1] = (int)lineD.s3; - img_arrayB[2] = (int)lineD.s7; - img_arrayB[3] = (int)lineF.s5; - #pragma unroll - for (int pass = 1; pass < 4; pass++) - { - for (int k = 0; k< 4 - pass; k++) - { - if (img_arrayB[k] > img_arrayB[k + 1]) - { - temp = img_arrayB[k]; - img_arrayB[k] = img_arrayB[k + 1]; - img_arrayB[k + 1] = temp; - } - } - - } - AVE = (img_arrayB[1] + img_arrayB[2])>>1; - ABS = img_arrayB[3] - img_arrayB[0]; - up_value = AVE + ABS; - down_value = abs(AVE - ABS); - - if (pixel_center1.s3 > up_value || pixel_center1.s3 < down_value) - pixel_center1.s3 = convert_uchar(AVE); - - vstore4(pixel_center1 , 0 ,dst+ mad24(j*2+1, dst_step, i<<2 )); +#define R_num (1920 * 1080 / 4) +#define G_num (1920 * 1080 / 2) +#define B_num (1920 * 1080 / 4) + +__kernel void badpixel(__global uchar* src, __global uchar* dst) +{ + int i = get_global_id(0); + int j = get_global_id(1); + + int img_arrayA[4], img_arrayB[4], img_arrayGA[4], img_arrayGB[4]; + uchar8 lineA, lineB, lineC, lineD, lineE, lineF; + lineA = vload8(0, src + mad24(j * 2 - 2, src_step, i * 4 - 2)); + lineB = vload8(0, src + mad24(j * 2 - 1, src_step, i * 4 - 2)); + lineC = vload8(0, src + mad24(j * 2, src_step, i * 4 - 2)); + lineD = vload8(0, src + mad24(j * 2 + 1, src_step, i * 4 - 2)); + lineE = vload8(0, src + mad24(j * 2 + 2, src_step, i * 4 - 2)); + lineF = vload8(0, src + mad24(j * 2 + 3, src_step, i * 4 - 2)); + + uchar4 pixel_center0 = (uchar4)(lineC.s2, lineC.s3, lineC.s4, lineC.s5); + uchar4 pixel_center1 = (uchar4)(lineD.s2, lineD.s3, lineD.s4, lineD.s5); + + int temp; + + // B + img_arrayA[0] = (int)lineA.s2; + img_arrayA[1] = (int)lineC.s0; + img_arrayA[2] = (int)lineC.s4; + img_arrayA[3] = (int)lineE.s2; + +#pragma unroll + for (int pass = 1; pass < 4; pass++) + { + for (int k = 0; k < 4 - pass; k++) + { + if (img_arrayA[k] > img_arrayA[k + 1]) + { + temp = img_arrayA[k]; + img_arrayA[k] = img_arrayA[k + 1]; + img_arrayA[k + 1] = temp; + } + } + } + int AVE = (img_arrayA[1] + img_arrayA[2]) >> 1; + int ABS = img_arrayA[3] - img_arrayA[0]; + int up_value = AVE + ABS; + int down_value = AVE - ABS; + + if (pixel_center0.s0 > up_value || pixel_center0.s0 < down_value) + pixel_center0.s0 = convert_uchar(AVE); + + // G + img_arrayGA[0] = lineB.s2; + img_arrayGA[1] = lineB.s4; + img_arrayGA[2] = lineD.s2; + img_arrayGA[3] = lineD.s4; + +#pragma unroll + for (int pass = 1; pass < 4; pass++) + { + for (int k = 0; k < 4 - pass; k++) + if (img_arrayGA[k] > img_arrayGA[k + 1]) + { + temp = img_arrayGA[k]; + img_arrayGA[k] = img_arrayGA[k + 1]; + img_arrayGA[k + 1] = temp; + } + } + + AVE = (img_arrayGA[1] + img_arrayGA[2]) >> 1; + ABS = img_arrayGA[3] - img_arrayGA[0]; + up_value = AVE + ABS; + down_value = abs(AVE - ABS); + + if (pixel_center0.s1 > up_value || pixel_center0.s1 < down_value) + pixel_center0.s1 = convert_uchar(AVE); + + img_arrayB[0] = (int)lineA.s4; + img_arrayB[1] = (int)lineC.s2; + img_arrayB[2] = (int)lineC.s6; + img_arrayB[3] = (int)lineE.s4; + +#pragma unroll + for (int pass = 1; pass < 4; pass++) + { + for (int k = 0; k < 4 - pass; k++) + { + if (img_arrayB[k] > img_arrayB[k + 1]) + { + temp = img_arrayB[k]; + img_arrayB[k] = img_arrayB[k + 1]; + img_arrayB[k + 1] = temp; + } + } + } + AVE = (img_arrayB[2] + img_arrayB[1]) >> 1; + ABS = img_arrayB[3] - img_arrayB[0]; + up_value = AVE + ABS; + down_value = abs(AVE - ABS); + + if (pixel_center0.s2 > up_value || pixel_center0.s2 < down_value) + pixel_center0.s2 = convert_uchar(AVE); + + + img_arrayGB[0] = (int)lineB.s4; + img_arrayGB[1] = (int)lineB.s6; + img_arrayGB[2] = (int)lineD.s4; + img_arrayGB[3] = (int)lineD.s6; +#pragma unroll + for (int pass = 1; pass < 4; pass++) + { + for (int k = 0; k < 4 - pass; k++) + if (img_arrayGA[k] > img_arrayGB[k + 1]) + { + temp = img_arrayGB[k]; + img_arrayGB[k] = img_arrayGB[k + 1]; + img_arrayGB[k + 1] = temp; + } + } + AVE = (img_arrayGB[1] + img_arrayGB[2]) >> 1; + ABS = img_arrayGB[3] - img_arrayGB[0]; + up_value = AVE + ABS; + down_value = abs(AVE - ABS); + + if (pixel_center0.s3 > up_value || pixel_center0.s3 < down_value) + pixel_center0.s3 = convert_uchar(AVE); + + vstore4(pixel_center0, 0, dst + mad24(j * 2, dst_step, i << 2)); + + img_arrayGA[0] = (int)lineC.s1; + img_arrayGA[1] = (int)lineC.s3; + img_arrayGA[2] = (int)lineE.s1; + img_arrayGA[3] = (int)lineE.s3; +#pragma unroll + for (int pass = 1; pass < 4; pass++) + { + for (int k = 0; k < 4 - pass; k++) + if (img_arrayGA[k] > img_arrayGA[k + 1]) + { + temp = img_arrayGA[k]; + img_arrayGA[k] = img_arrayGA[k + 1]; + img_arrayGA[k + 1] = temp; + } + } + AVE = (img_arrayGA[1] + img_arrayGA[2]) >> 1; + ABS = img_arrayGA[3] - img_arrayGA[0]; + up_value = AVE + ABS; + down_value = abs(AVE - ABS); + + if (pixel_center1.s0 > up_value || pixel_center1.s0 < down_value) + pixel_center1.s0 = convert_uchar(AVE); + + img_arrayA[0] = (int)lineB.s3; + img_arrayA[1] = (int)lineD.s1; + img_arrayA[2] = (int)lineD.s5; + img_arrayA[3] = (int)lineF.s3; +#pragma unroll + for (int pass = 1; pass < 4; pass++) + { + for (int k = 0; k < 4 - pass; k++) + { + if (img_arrayA[k] > img_arrayA[k + 1]) + { + temp = img_arrayA[k]; + img_arrayA[k] = img_arrayA[k + 1]; + img_arrayA[k + 1] = temp; + } + } + } + AVE = (img_arrayA[1] + img_arrayA[2]) >> 1; + ABS = img_arrayA[3] - img_arrayA[0]; + up_value = AVE + ABS; + down_value = abs(AVE - ABS); + + if (pixel_center1.s1 > up_value || pixel_center1.s1 < down_value) + pixel_center1.s1 = convert_uchar(AVE); + + img_arrayGB[0] = (int)lineC.s3; + img_arrayGB[1] = (int)lineC.s5; + img_arrayGB[2] = (int)lineE.s3; + img_arrayGB[3] = (int)lineE.s5; +#pragma unroll + for (int pass = 1; pass < 4; pass++) + { + for (int k = 0; k < 4 - pass; k++) + if (img_arrayGB[k] > img_arrayGB[k + 1]) + { + temp = img_arrayGB[k]; + img_arrayGB[k] = img_arrayGB[k + 1]; + img_arrayGB[k + 1] = temp; + } + } + AVE = (img_arrayGB[1] + img_arrayGB[2]) >> 1; + ABS = img_arrayGB[3] - img_arrayGB[0]; + up_value = AVE + ABS; + down_value = abs(AVE - ABS); + + if (pixel_center1.s2 > up_value || pixel_center1.s2 < down_value) + pixel_center1.s2 = convert_uchar(AVE); + + img_arrayB[0] = (int)lineB.s5; + img_arrayB[1] = (int)lineD.s3; + img_arrayB[2] = (int)lineD.s7; + img_arrayB[3] = (int)lineF.s5; +#pragma unroll + for (int pass = 1; pass < 4; pass++) + { + for (int k = 0; k < 4 - pass; k++) + { + if (img_arrayB[k] > img_arrayB[k + 1]) + { + temp = img_arrayB[k]; + img_arrayB[k] = img_arrayB[k + 1]; + img_arrayB[k + 1] = temp; + } + } + } + AVE = (img_arrayB[1] + img_arrayB[2]) >> 1; + ABS = img_arrayB[3] - img_arrayB[0]; + up_value = AVE + ABS; + down_value = abs(AVE - ABS); + + if (pixel_center1.s3 > up_value || pixel_center1.s3 < down_value) + pixel_center1.s3 = convert_uchar(AVE); + + vstore4(pixel_center1, 0, dst + mad24(j * 2 + 1, dst_step, i << 2)); } -__kernel void sigma(__global const uchar *src, - __global int *pixel_valuer, __global int *pixel_valueg,__global int *pixel_valueb) +__kernel void sigma(__global const uchar* src, __global int* pixel_valuer, __global int* pixel_valueg, __global int* pixel_valueb) { - const int i = get_global_id(0); - const int j = get_global_id(1); - - uchar8 pix = vload8(0, src + mad24(j*2, src_step, i*8)); - uchar8 pix2 = vload8(0, src + mad24(j*2+1, src_step, i*8)); - atomic_add(pixel_valueb,pix.s0); - atomic_add(pixel_valueg,pix.s1); - atomic_add(pixel_valueb,pix.s2); - atomic_add(pixel_valueg,pix.s3); - atomic_add(pixel_valueb,pix.s4); - atomic_add(pixel_valueg,pix.s5); - atomic_add(pixel_valueb,pix.s6); - atomic_add(pixel_valueg,pix.s7); - - atomic_add(pixel_valueg,pix2.s0); - atomic_add(pixel_valuer,pix2.s1); - atomic_add(pixel_valueg,pix2.s2); - atomic_add(pixel_valuer,pix2.s3); - atomic_add(pixel_valueg,pix2.s4); - atomic_add(pixel_valuer,pix2.s5); - atomic_add(pixel_valueg,pix2.s6); - atomic_add(pixel_valuer,pix2.s7); + const int i = get_global_id(0); + const int j = get_global_id(1); + + uchar8 pix = vload8(0, src + mad24(j * 2, src_step, i * 8)); + uchar8 pix2 = vload8(0, src + mad24(j * 2 + 1, src_step, i * 8)); + atomic_add(pixel_valueb, pix.s0); + atomic_add(pixel_valueg, pix.s1); + atomic_add(pixel_valueb, pix.s2); + atomic_add(pixel_valueg, pix.s3); + atomic_add(pixel_valueb, pix.s4); + atomic_add(pixel_valueg, pix.s5); + atomic_add(pixel_valueb, pix.s6); + atomic_add(pixel_valueg, pix.s7); + + atomic_add(pixel_valueg, pix2.s0); + atomic_add(pixel_valuer, pix2.s1); + atomic_add(pixel_valueg, pix2.s2); + atomic_add(pixel_valuer, pix2.s3); + atomic_add(pixel_valueg, pix2.s4); + atomic_add(pixel_valuer, pix2.s5); + atomic_add(pixel_valueg, pix2.s6); + atomic_add(pixel_valuer, pix2.s7); } -__kernel void awb(__global const uchar *src, - __global int *pixel_valuer, __global int *pixel_valueg,__global int *pixel_valueb, __global uchar *dst) +__kernel void awb(__global const uchar* src, __global int* pixel_valuer, __global int* pixel_valueg, __global int* pixel_valueb, __global uchar* dst) { - const int i = get_global_id(0); - const int j = get_global_id(1); - - uchar8 pix = vload8(0, src + mad24(j*2, src_step, i*8)); - uchar8 pix2 = vload8(0, src + mad24(j*2+1, src_step, i*8)); - float R_AVG = (float)(*pixel_valuer)/(float)R_num; - float G_AVG = (float)(*pixel_valueg)/(float)G_num; - float B_AVG = (float)(*pixel_valueb)/(float)B_num; - float alpha = G_AVG/R_AVG; - float beta = G_AVG/B_AVG; - - uchar8 out = 0; - uchar8 out2 = 0; - - uchar temp1 = convert_uchar_sat_rte(pix.s0*beta); - uchar temp2 = convert_uchar_sat_rte(pix.s2*beta); - uchar temp3 = convert_uchar_sat_rte(pix.s4*beta); - uchar temp4 = convert_uchar_sat_rte(pix.s6*beta); - out = (uchar8)(temp1, pix.s1, temp2, pix.s3, temp3, pix.s5, temp4, pix.s7); - - temp1 = convert_uchar_sat_rte(pix2.s1*alpha); - temp2 = convert_uchar_sat_rte(pix2.s3*alpha); - temp3 = convert_uchar_sat_rte(pix2.s5*alpha); - temp4 = convert_uchar_sat_rte(pix2.s7*alpha); - out2 = (uchar8)(pix2.s0, temp1, pix2.s2, temp2, pix2.s4, temp3, pix2.s6, temp4); - - vstore8(out , 0 ,dst+ mad24(j*2, dst_step, i*8 )); - vstore8(out2 , 0 ,dst+ mad24(j*2+1, dst_step, i*8 )); + const int i = get_global_id(0); + const int j = get_global_id(1); + + uchar8 pix = vload8(0, src + mad24(j * 2, src_step, i * 8)); + uchar8 pix2 = vload8(0, src + mad24(j * 2 + 1, src_step, i * 8)); + float R_AVG = (float)(*pixel_valuer) / (float)R_num; + float G_AVG = (float)(*pixel_valueg) / (float)G_num; + float B_AVG = (float)(*pixel_valueb) / (float)B_num; + float alpha = G_AVG / R_AVG; + float beta = G_AVG / B_AVG; + + uchar8 out = 0; + uchar8 out2 = 0; + + uchar temp1 = convert_uchar_sat_rte(pix.s0 * beta); + uchar temp2 = convert_uchar_sat_rte(pix.s2 * beta); + uchar temp3 = convert_uchar_sat_rte(pix.s4 * beta); + uchar temp4 = convert_uchar_sat_rte(pix.s6 * beta); + out = (uchar8)(temp1, pix.s1, temp2, pix.s3, temp3, pix.s5, temp4, pix.s7); + + temp1 = convert_uchar_sat_rte(pix2.s1 * alpha); + temp2 = convert_uchar_sat_rte(pix2.s3 * alpha); + temp3 = convert_uchar_sat_rte(pix2.s5 * alpha); + temp4 = convert_uchar_sat_rte(pix2.s7 * alpha); + out2 = (uchar8)(pix2.s0, temp1, pix2.s2, temp2, pix2.s4, temp3, pix2.s6, temp4); + + vstore8(out, 0, dst + mad24(j * 2, dst_step, i * 8)); + vstore8(out2, 0, dst + mad24(j * 2 + 1, dst_step, i * 8)); } -__kernel void equalize1(__global const uchar *src, - __global int *pixel_valuer, __global int *pixel_valueg,__global int *pixel_valueb) +__kernel void equalize1(__global const uchar* src, __global int* pixel_valuer, __global int* pixel_valueg, __global int* pixel_valueb) { - const int i = get_global_id(0); - const int j = get_global_id(1); - - uchar8 pix0 = vload8(0, src + mad24(2*j, src_step, i*8)); - uchar8 pix1 = vload8(0, src + mad24(2*j + 1, src_step, i*8)); - int8 tmp0 = convert_int8(pix0); - atomic_inc(pixel_valueb + tmp0.s0); - atomic_inc(pixel_valueg + tmp0.s1); - atomic_inc(pixel_valueb + tmp0.s2); - atomic_inc(pixel_valueg + tmp0.s3); - atomic_inc(pixel_valueb + tmp0.s4); - atomic_inc(pixel_valueg + tmp0.s5); - atomic_inc(pixel_valueb + tmp0.s6); - atomic_inc(pixel_valueg + tmp0.s7); - - int8 tmp1 = convert_int8(pix1); - atomic_inc(pixel_valueg + tmp1.s0); - atomic_inc(pixel_valuer + tmp1.s1); - atomic_inc(pixel_valueg + tmp1.s2); - atomic_inc(pixel_valuer + tmp1.s3); - atomic_inc(pixel_valueg + tmp1.s4); - atomic_inc(pixel_valuer + tmp1.s5); - atomic_inc(pixel_valueg + tmp1.s6); - atomic_inc(pixel_valuer + tmp1.s7); + const int i = get_global_id(0); + const int j = get_global_id(1); + + uchar8 pix0 = vload8(0, src + mad24(2 * j, src_step, i * 8)); + uchar8 pix1 = vload8(0, src + mad24(2 * j + 1, src_step, i * 8)); + int8 tmp0 = convert_int8(pix0); + atomic_inc(pixel_valueb + tmp0.s0); + atomic_inc(pixel_valueg + tmp0.s1); + atomic_inc(pixel_valueb + tmp0.s2); + atomic_inc(pixel_valueg + tmp0.s3); + atomic_inc(pixel_valueb + tmp0.s4); + atomic_inc(pixel_valueg + tmp0.s5); + atomic_inc(pixel_valueb + tmp0.s6); + atomic_inc(pixel_valueg + tmp0.s7); + + int8 tmp1 = convert_int8(pix1); + atomic_inc(pixel_valueg + tmp1.s0); + atomic_inc(pixel_valuer + tmp1.s1); + atomic_inc(pixel_valueg + tmp1.s2); + atomic_inc(pixel_valuer + tmp1.s3); + atomic_inc(pixel_valueg + tmp1.s4); + atomic_inc(pixel_valuer + tmp1.s5); + atomic_inc(pixel_valueg + tmp1.s6); + atomic_inc(pixel_valuer + tmp1.s7); } -__kernel void equalize2(__global int *in_R, __global int *in_G,__global int *in_B, __global int *out_R, __global int *out_G,__global int *out_B) +__kernel void equalize2(__global int* in_R, __global int* in_G, __global int* in_B, __global int* out_R, __global int* out_G, __global int* out_B) { - const int i = get_global_id(0); - const int j = get_global_id(1); - int gid = get_group_id(1)* get_num_groups(0) + get_group_id(0); - - if (gid == 0){ - float scaler, scaleg, scaleb; - int sumr = 0; - int ir = 0; - while (!(*(in_R + ir))) - ++ir; - if (R_num == (*(in_R + ir))){ - scaler = 1; - *(out_R + ir) = ir; - } - else{ - scaler = 255.f / (R_num - *(in_R + ir)); - *(out_R + ir) = 0; - ir++; - for (; ir < 256; ir++){ - sumr += *(in_R + ir); - *(out_R + ir) = convert_int_sat_rte(convert_float(sumr) * scaler); - } - } - int sumg = 0; - int ig = 0; - while (!(*(in_G + ig))) - ++ig; - if (G_num == *(in_G + ig)){ - scaleg = 1; - *(out_G + ig) = ig; - } - else{ - scaleg = 255.f / (G_num - *(in_G + ig)); - *(out_G + ig) = 0; - ig++; - for (; ig < 256; ig++){ - sumg += *(in_G + ig); - *(out_G + ig) = convert_int_sat_rte(convert_float(sumg) * scaleg); - } - } - int sumb = 0; - int ib = 0; - while (!(*(in_B + ib))) - ++ib; - if (B_num == *(in_B + ib)){ - scaleb = 1; - *(out_B + ib) = ib; - } - else{ - scaleb = 255.f / (B_num - *(in_B + ib)); - *(out_B + ib) = 0; - ib++; - for (; ib < 256; ib++){ - sumb += *(in_B + ib); - *(out_B + ib) = (int)convert_uchar_sat_rte(convert_float(sumb) * scaleb); - } - } - } + const int i = get_global_id(0); + const int j = get_global_id(1); + int gid = get_group_id(1) * get_num_groups(0) + get_group_id(0); + + if (gid == 0) + { + float scaler, scaleg, scaleb; + int sumr = 0; + int ir = 0; + while (!(*(in_R + ir))) + ++ir; + if (R_num == (*(in_R + ir))) + { + scaler = 1; + *(out_R + ir) = ir; + } + else + { + scaler = 255.f / (R_num - *(in_R + ir)); + *(out_R + ir) = 0; + ir++; + for (; ir < 256; ir++) + { + sumr += *(in_R + ir); + *(out_R + ir) = convert_int_sat_rte(convert_float(sumr) * scaler); + } + } + int sumg = 0; + int ig = 0; + while (!(*(in_G + ig))) + ++ig; + if (G_num == *(in_G + ig)) + { + scaleg = 1; + *(out_G + ig) = ig; + } + else + { + scaleg = 255.f / (G_num - *(in_G + ig)); + *(out_G + ig) = 0; + ig++; + for (; ig < 256; ig++) + { + sumg += *(in_G + ig); + *(out_G + ig) = convert_int_sat_rte(convert_float(sumg) * scaleg); + } + } + int sumb = 0; + int ib = 0; + while (!(*(in_B + ib))) + ++ib; + if (B_num == *(in_B + ib)) + { + scaleb = 1; + *(out_B + ib) = ib; + } + else + { + scaleb = 255.f / (B_num - *(in_B + ib)); + *(out_B + ib) = 0; + ib++; + for (; ib < 256; ib++) + { + sumb += *(in_B + ib); + *(out_B + ib) = (int)convert_uchar_sat_rte(convert_float(sumb) * scaleb); + } + } + } } -__kernel void equalize3(__global const uchar *src, - __global int *pixel_valuer, __global int *pixel_valueg,__global int *pixel_valueb, __global uchar *dst) +__kernel void equalize3(__global const uchar* src, __global int* pixel_valuer, __global int* pixel_valueg, __global int* pixel_valueb, + __global uchar* dst) { - const int i = get_global_id(0); - const int j = get_global_id(1); - uchar8 pix0 = vload8(0, src + mad24(2*j, src_step, i * 8)); - uchar8 pix1 = vload8(0, src + mad24(2*j + 1, src_step, i * 8)); - int8 pixelEven, pixelOdd; - uchar8 pixelOut0, pixelOut1; - pixelEven = convert_int8(pix0); - pixelOdd = convert_int8(pix1); - pixelOut0.s0 = convert_uchar(*(pixel_valueb + pixelEven.s0)); - pixelOut0.s1 = convert_uchar(*(pixel_valueg + pixelEven.s1)); - pixelOut0.s2 = convert_uchar(*(pixel_valueb + pixelEven.s2)); - pixelOut0.s3 = convert_uchar(*(pixel_valueg + pixelEven.s3)); - pixelOut0.s4 = convert_uchar(*(pixel_valueb + pixelEven.s4)); - pixelOut0.s5 = convert_uchar(*(pixel_valueg + pixelEven.s5)); - pixelOut0.s6 = convert_uchar(*(pixel_valueb + pixelEven.s6)); - pixelOut0.s7 = convert_uchar(*(pixel_valueg + pixelEven.s7)); - - pixelOut1.s0 = convert_uchar(*(pixel_valueg + pixelOdd.s0)); - pixelOut1.s1 = convert_uchar(*(pixel_valuer + pixelOdd.s1)); - pixelOut1.s2 = convert_uchar(*(pixel_valueg + pixelOdd.s2)); - pixelOut1.s3 = convert_uchar(*(pixel_valuer + pixelOdd.s3)); - pixelOut1.s4 = convert_uchar(*(pixel_valueg + pixelOdd.s4)); - pixelOut1.s5 = convert_uchar(*(pixel_valuer + pixelOdd.s5)); - pixelOut1.s6 = convert_uchar(*(pixel_valueg + pixelOdd.s6)); - pixelOut1.s7 = convert_uchar(*(pixel_valuer + pixelOdd.s7)); - - vstore8(pixelOut0 , 0 ,dst + mad24(j * 2, dst_step, i * 8)); - vstore8(pixelOut1 , 0 ,dst + mad24(j * 2 + 1, dst_step, i * 8)); + const int i = get_global_id(0); + const int j = get_global_id(1); + uchar8 pix0 = vload8(0, src + mad24(2 * j, src_step, i * 8)); + uchar8 pix1 = vload8(0, src + mad24(2 * j + 1, src_step, i * 8)); + int8 pixelEven, pixelOdd; + uchar8 pixelOut0, pixelOut1; + pixelEven = convert_int8(pix0); + pixelOdd = convert_int8(pix1); + pixelOut0.s0 = convert_uchar(*(pixel_valueb + pixelEven.s0)); + pixelOut0.s1 = convert_uchar(*(pixel_valueg + pixelEven.s1)); + pixelOut0.s2 = convert_uchar(*(pixel_valueb + pixelEven.s2)); + pixelOut0.s3 = convert_uchar(*(pixel_valueg + pixelEven.s3)); + pixelOut0.s4 = convert_uchar(*(pixel_valueb + pixelEven.s4)); + pixelOut0.s5 = convert_uchar(*(pixel_valueg + pixelEven.s5)); + pixelOut0.s6 = convert_uchar(*(pixel_valueb + pixelEven.s6)); + pixelOut0.s7 = convert_uchar(*(pixel_valueg + pixelEven.s7)); + + pixelOut1.s0 = convert_uchar(*(pixel_valueg + pixelOdd.s0)); + pixelOut1.s1 = convert_uchar(*(pixel_valuer + pixelOdd.s1)); + pixelOut1.s2 = convert_uchar(*(pixel_valueg + pixelOdd.s2)); + pixelOut1.s3 = convert_uchar(*(pixel_valuer + pixelOdd.s3)); + pixelOut1.s4 = convert_uchar(*(pixel_valueg + pixelOdd.s4)); + pixelOut1.s5 = convert_uchar(*(pixel_valuer + pixelOdd.s5)); + pixelOut1.s6 = convert_uchar(*(pixel_valueg + pixelOdd.s6)); + pixelOut1.s7 = convert_uchar(*(pixel_valuer + pixelOdd.s7)); + + vstore8(pixelOut0, 0, dst + mad24(j * 2, dst_step, i * 8)); + vstore8(pixelOut1, 0, dst + mad24(j * 2 + 1, dst_step, i * 8)); } -inline float4 Pattern(float4 PATTERN , float4 value, float4 temp, float4 Dvec){ - - const float4 kA = (float4)(-1.0/8.0, -1.5/8.0, 0.5/8.0, -1.0/8.0); - const float4 kB = (float4)( 2.0/8.0, 0.0, 0.0, 4.0/8.0); - const float4 kD = (float4)( 0.0, 2.0/8.0, -1.0/8.0, -1.0/8.0); +inline float4 Pattern(float4 PATTERN, float4 value, float4 temp, float4 Dvec) +{ + const float4 kA = (float4)(-1.0 / 8.0, -1.5 / 8.0, 0.5 / 8.0, -1.0 / 8.0); + const float4 kB = (float4)(2.0 / 8.0, 0.0, 0.0, 4.0 / 8.0); + const float4 kD = (float4)(0.0, 2.0 / 8.0, -1.0 / 8.0, -1.0 / 8.0); - const float4 kE = kA.xywz; - const float4 kF = kB.xywz; + const float4 kE = kA.xywz; + const float4 kF = kB.xywz; - value += temp; - - PATTERN.yzw += (kD.yz * Dvec.x).xyy; + value += temp; - PATTERN += (kA.xyz * (float3)(value.x,value.x,value.x)).xyzx + (kE.xyw * (float3)(value.z,value.z,value.z)).xyxz; - PATTERN.xw += kB.xw * (float2)(value.y,value.y); - PATTERN.xz += kF.xz * (float2)(value.w,value.w); + PATTERN.yzw += (kD.yz * Dvec.x).xyy; - return PATTERN; + PATTERN += (kA.xyz * (float3)(value.x, value.x, value.x)).xyzx + (kE.xyw * (float3)(value.z, value.z, value.z)).xyxz; + PATTERN.xw += kB.xw * (float2)(value.y, value.y); + PATTERN.xz += kF.xz * (float2)(value.w, value.w); + return PATTERN; } -__kernel void debayer( __global uchar *psrc, __global uchar *pdst) -{ - int i = get_global_id(0); - int j = get_global_id(1); - - uchar8 lineA, lineB, lineC, lineD, lineE, lineF; +__kernel void debayer(__global uchar* psrc, __global uchar* pdst) +{ + int i = get_global_id(0); + int j = get_global_id(1); + if ((i < 2) || (i >= WIDTH / 4 - 2)) + { + return; + } + if ((j < 2) || (j >= HEIGHT / 2 - 2)) + { + return; + } + + + uchar8 lineA, lineB, lineC, lineD, lineE, lineF; + + lineA = vload8(0, psrc + mad24(j * 2 - 2, src_step, i * 4 - 2)); + lineB = vload8(0, psrc + mad24(j * 2 - 1, src_step, i * 4 - 2)); + lineC = vload8(0, psrc + mad24(j * 2, src_step, i * 4 - 2)); + lineD = vload8(0, psrc + mad24(j * 2 + 1, src_step, i * 4 - 2)); + lineE = vload8(0, psrc + mad24(j * 2 + 2, src_step, i * 4 - 2)); + lineF = vload8(0, psrc + mad24(j * 2 + 3, src_step, i * 4 - 2)); + + const float4 kC = (float4)(4.0 / 8.0, 6.0 / 8.0, 5.0 / 8.0, 5.0 / 8.0); + + uchar16 out1, out2; + float4 Dvec = (float4)(lineB.s1, lineD.s1, lineB.s3, lineD.s3); + + float4 PATTERN = (kC.xyz * lineC.s2).xyzz; - lineA = vload8(0, psrc + mad24(j*2-2, src_step, i*4-2)); - lineB = vload8(0, psrc + mad24(j*2-1, src_step, i*4-2)); - lineC = vload8(0, psrc + mad24(j*2, src_step, i*4-2)); - lineD = vload8(0, psrc + mad24(j*2+1, src_step, i*4-2)); - lineE = vload8(0, psrc + mad24(j*2+2, src_step, i*4-2)); - lineF = vload8(0, psrc + mad24(j*2+3, src_step, i*4-2)); - - const float4 kC = (float4)( 4.0/8.0, 6.0/8.0, 5.0/8.0, 5.0/8.0); + Dvec.xy += Dvec.zw; + Dvec.x += Dvec.y; - uchar16 out1, out2; - float4 Dvec = (float4)(lineB.s1, lineD.s1, lineB.s3, lineD.s3); + float4 value = (float4)(lineA.s2, lineB.s2, lineC.s0, lineC.s1); + float4 temp = (float4)(lineE.s2, lineD.s2, lineC.s4, lineC.s3); - float4 PATTERN = (kC.xyz * lineC.s2).xyzz; - - Dvec.xy += Dvec.zw; - Dvec.x += Dvec.y; + float4 PATTERN_One = Pattern(PATTERN, value, temp, Dvec); - float4 value = (float4)(lineA.s2, lineB.s2, lineC.s0, lineC.s1); - float4 temp = (float4)(lineE.s2, lineD.s2, lineC.s4, lineC.s3); + Dvec = (float4)(lineB.s2, lineD.s2, lineB.s4, lineD.s4); - float4 PATTERN_One = Pattern(PATTERN , value , temp, Dvec); + PATTERN = (kC.xyz * lineC.s3).xyzz; - Dvec = (float4)(lineB.s2, lineD.s2, lineB.s4, lineD.s4); + Dvec.xy += Dvec.zw; + Dvec.x += Dvec.y; - PATTERN = (kC.xyz * lineC.s3).xyzz; - - Dvec.xy += Dvec.zw; - Dvec.x += Dvec.y; + value = (float4)(lineA.s3, lineB.s3, lineC.s1, lineC.s2); + temp = (float4)(lineE.s3, lineD.s3, lineC.s5, lineC.s4); - value = (float4)(lineA.s3, lineB.s3, lineC.s1, lineC.s2); - temp = (float4)(lineE.s3, lineD.s3, lineC.s5, lineC.s4); + float4 PATTERN_Two = Pattern(PATTERN, value, temp, Dvec); - float4 PATTERN_Two = Pattern(PATTERN , value , temp, Dvec); + Dvec = (float4)(lineB.s3, lineD.s3, lineB.s5, lineD.s5); - Dvec = (float4)(lineB.s3, lineD.s3, lineB.s5, lineD.s5); + PATTERN = (kC.xyz * lineC.s4).xyzz; - PATTERN = (kC.xyz * lineC.s4).xyzz; - - Dvec.xy += Dvec.zw; - Dvec.x += Dvec.y; + Dvec.xy += Dvec.zw; + Dvec.x += Dvec.y; - value = (float4)(lineA.s4, lineB.s4, lineC.s2, lineC.s3); - temp = (float4)(lineE.s4, lineD.s4, lineC.s6, lineC.s5); + value = (float4)(lineA.s4, lineB.s4, lineC.s2, lineC.s3); + temp = (float4)(lineE.s4, lineD.s4, lineC.s6, lineC.s5); - float4 PATTERN_Three = Pattern(PATTERN , value , temp, Dvec); + float4 PATTERN_Three = Pattern(PATTERN, value, temp, Dvec); - Dvec = (float4)(lineB.s4, lineD.s4, lineB.s6, lineD.s6); + Dvec = (float4)(lineB.s4, lineD.s4, lineB.s6, lineD.s6); - PATTERN = (kC.xyz * lineC.s5).xyzz; - - Dvec.xy += Dvec.zw; - Dvec.x += Dvec.y; + PATTERN = (kC.xyz * lineC.s5).xyzz; - value = (float4)(lineA.s5, lineB.s5, lineC.s3, lineC.s4); - temp = (float4)(lineE.s5, lineD.s5, lineC.s7, lineC.s6); + Dvec.xy += Dvec.zw; + Dvec.x += Dvec.y; - float4 PATTERN_Four = Pattern(PATTERN , value , temp, Dvec); - - out1 = (uchar16)( - (uchar)(PATTERN_One.y > 255 ? 255 : PATTERN_One.y),(uchar)(PATTERN_One.x > 255 ? 255 : PATTERN_One.x), lineC.s2,255, - (uchar)(PATTERN_Two.w > 255 ? 255 : PATTERN_Two.w),lineC.s3,(uchar)(PATTERN_Two.z > 255 ? 255 : PATTERN_Two.z),255, - (uchar)(PATTERN_Three.y > 255 ? 255 : PATTERN_Three.y),(uchar)(PATTERN_Three.x > 255 ? 255 : PATTERN_Three.x), lineC.s4,255, - (uchar)(PATTERN_Four.w > 255 ? 255 : PATTERN_Four.w),lineC.s5,(uchar)(PATTERN_Four.z > 255 ? 255 : PATTERN_Four.z),255); + value = (float4)(lineA.s5, lineB.s5, lineC.s3, lineC.s4); + temp = (float4)(lineE.s5, lineD.s5, lineC.s7, lineC.s6); - vstore16(out1, 0, pdst + mad24(j*2, dst_step_dsp*4, i<<4)); + float4 PATTERN_Four = Pattern(PATTERN, value, temp, Dvec); - Dvec = (float4)(lineC.s1, lineE.s1, lineC.s3, lineE.s3); + out1 = (uchar16)((uchar)(PATTERN_One.y > 255 ? 255 : PATTERN_One.y), (uchar)(PATTERN_One.x > 255 ? 255 : PATTERN_One.x), lineC.s2, 255, + (uchar)(PATTERN_Two.w > 255 ? 255 : PATTERN_Two.w), lineC.s3, (uchar)(PATTERN_Two.z > 255 ? 255 : PATTERN_Two.z), 255, + (uchar)(PATTERN_Three.y > 255 ? 255 : PATTERN_Three.y), (uchar)(PATTERN_Three.x > 255 ? 255 : PATTERN_Three.x), lineC.s4, 255, + (uchar)(PATTERN_Four.w > 255 ? 255 : PATTERN_Four.w), lineC.s5, (uchar)(PATTERN_Four.z > 255 ? 255 : PATTERN_Four.z), 255); - PATTERN = (kC.xyz * lineD.s2).xyzz; - - Dvec.xy += Dvec.zw; - Dvec.x += Dvec.y; + vstore16(out1, 0, pdst + mad24(j * 2, dst_step_dsp * 4, i << 4)); - value = (float4)(lineB.s2, lineC.s2, lineD.s0, lineD.s1); - temp = (float4)(lineF.s2, lineE.s2, lineD.s4, lineD.s3); + Dvec = (float4)(lineC.s1, lineE.s1, lineC.s3, lineE.s3); - PATTERN_One = Pattern(PATTERN , value , temp, Dvec); + PATTERN = (kC.xyz * lineD.s2).xyzz; - Dvec = (float4)(lineC.s2, lineE.s2, lineC.s4, lineE.s4); + Dvec.xy += Dvec.zw; + Dvec.x += Dvec.y; - PATTERN = (kC.xyz * lineD.s3).xyzz; - - Dvec.xy += Dvec.zw; - Dvec.x += Dvec.y; + value = (float4)(lineB.s2, lineC.s2, lineD.s0, lineD.s1); + temp = (float4)(lineF.s2, lineE.s2, lineD.s4, lineD.s3); - value = (float4)(lineB.s3, lineC.s3, lineD.s1, lineD.s2); - temp = (float4)(lineF.s3, lineE.s3, lineD.s5, lineD.s4); + PATTERN_One = Pattern(PATTERN, value, temp, Dvec); - PATTERN_Two = Pattern(PATTERN , value , temp, Dvec); + Dvec = (float4)(lineC.s2, lineE.s2, lineC.s4, lineE.s4); - Dvec = (float4)(lineC.s3, lineE.s3, lineC.s5, lineE.s5); + PATTERN = (kC.xyz * lineD.s3).xyzz; - PATTERN = (kC.xyz * lineD.s4).xyzz; - - Dvec.xy += Dvec.zw; - Dvec.x += Dvec.y; + Dvec.xy += Dvec.zw; + Dvec.x += Dvec.y; - value = (float4)(lineB.s4, lineC.s4, lineD.s2, lineD.s3); - temp = (float4)(lineF.s4, lineE.s4, lineD.s6, lineD.s5); + value = (float4)(lineB.s3, lineC.s3, lineD.s1, lineD.s2); + temp = (float4)(lineF.s3, lineE.s3, lineD.s5, lineD.s4); - PATTERN_Three = Pattern(PATTERN , value , temp, Dvec); + PATTERN_Two = Pattern(PATTERN, value, temp, Dvec); - Dvec = (float4)(lineC.s4, lineE.s4, lineC.s6, lineE.s6); + Dvec = (float4)(lineC.s3, lineE.s3, lineC.s5, lineE.s5); - PATTERN = (kC.xyz * lineD.s5).xyzz; - - Dvec.xy += Dvec.zw; - Dvec.x += Dvec.y; + PATTERN = (kC.xyz * lineD.s4).xyzz; - value = (float4)(lineB.s5, lineC.s5, lineD.s3, lineD.s4); - temp = (float4)(lineF.s5, lineE.s5, lineD.s7, lineD.s6); + Dvec.xy += Dvec.zw; + Dvec.x += Dvec.y; - PATTERN_Four = Pattern(PATTERN , value , temp, Dvec); + value = (float4)(lineB.s4, lineC.s4, lineD.s2, lineD.s3); + temp = (float4)(lineF.s4, lineE.s4, lineD.s6, lineD.s5); - out2 = (uchar16)( - (uchar)(PATTERN_One.z > 255 ? 255 : PATTERN_One.z),lineD.s2, (uchar)(PATTERN_One.w > 255 ? 255 : PATTERN_One.w), 255, - lineD.s3,(uchar)(PATTERN_Two.x > 255 ? 255 : PATTERN_Two.x),(uchar)(PATTERN_Two.y > 255 ? 255 : PATTERN_Two.y), 255 , - (uchar)(PATTERN_Three.z > 255 ? 255 : PATTERN_Three.z),lineD.s4,(uchar)(PATTERN_Three.w > 255 ? 255 : PATTERN_Three.w), 255, - lineD.s5,(uchar)(PATTERN_Four.x > 255 ? 255 : PATTERN_Four.x),(uchar)(PATTERN_Four.y > 255 ? 255 : PATTERN_Four.y), 255); + PATTERN_Three = Pattern(PATTERN, value, temp, Dvec); - vstore16(out2, 0, pdst+ mad24(j*2 + 1, dst_step_dsp*4, i<<4 )); + Dvec = (float4)(lineC.s4, lineE.s4, lineC.s6, lineE.s6); + + PATTERN = (kC.xyz * lineD.s5).xyzz; + + Dvec.xy += Dvec.zw; + Dvec.x += Dvec.y; + + value = (float4)(lineB.s5, lineC.s5, lineD.s3, lineD.s4); + temp = (float4)(lineF.s5, lineE.s5, lineD.s7, lineD.s6); + + PATTERN_Four = Pattern(PATTERN, value, temp, Dvec); + + out2 = (uchar16)((uchar)(PATTERN_One.z > 255 ? 255 : PATTERN_One.z), lineD.s2, (uchar)(PATTERN_One.w > 255 ? 255 : PATTERN_One.w), 255, lineD.s3, + (uchar)(PATTERN_Two.x > 255 ? 255 : PATTERN_Two.x), (uchar)(PATTERN_Two.y > 255 ? 255 : PATTERN_Two.y), 255, + (uchar)(PATTERN_Three.z > 255 ? 255 : PATTERN_Three.z), lineD.s4, (uchar)(PATTERN_Three.w > 255 ? 255 : PATTERN_Three.w), 255, + lineD.s5, (uchar)(PATTERN_Four.x > 255 ? 255 : PATTERN_Four.x), (uchar)(PATTERN_Four.y > 255 ? 255 : PATTERN_Four.y), 255); + + vstore16(out2, 0, pdst + mad24(j * 2 + 1, dst_step_dsp * 4, i << 4)); } -__kernel void rgba2yuyv(__global const uchar * src, __global uchar * dst1, __global uchar * dst2) -{ - int i = get_global_id(0); - int j = get_global_id(1); - int src_idx = mad24(j, WIDTH * 4, i*4); - int y_idx = mad24(j, WIDTH, i); - int uv_idx = mad24(j, WIDTH * 2, i*2); - - float4 outColor; - - //BGRA - outColor.x = 0.098 * src[src_idx] + 0.504 * src[src_idx+1] + 0.257 * src[src_idx+2] + 16; - outColor.y = 0.439 * src[src_idx] - 0.291 * src[src_idx+1] - 0.148 * src[src_idx+2] + 128; - outColor.z = (-0.071) * src[src_idx] - 0.368 * src[src_idx+1] + 0.439 * src[src_idx+2] + 128; - uchar4 outColor1 = convert_uchar4(outColor); - dst1[y_idx] = outColor1.x; - dst2[uv_idx] = outColor1.y; - dst2[uv_idx + 1] = outColor1.z; +__kernel void rgba2yuyv(__global const uchar* src, __global uchar* dst1, __global uchar* dst2) +{ + int i = get_global_id(0); + int j = get_global_id(1); + int src_idx = mad24(j, WIDTH * 4, i * 4); + int y_idx = mad24(j, WIDTH, i); + int uv_idx = mad24(j, WIDTH * 2, i * 2); + + float4 outColor; + + // BGRA + outColor.x = 0.098 * src[src_idx] + 0.504 * src[src_idx + 1] + 0.257 * src[src_idx + 2] + 16; + outColor.y = 0.439 * src[src_idx] - 0.291 * src[src_idx + 1] - 0.148 * src[src_idx + 2] + 128; + outColor.z = (-0.071) * src[src_idx] - 0.368 * src[src_idx + 1] + 0.439 * src[src_idx + 2] + 128; + uchar4 outColor1 = convert_uchar4(outColor); + dst1[y_idx] = outColor1.x; + dst2[uv_idx] = outColor1.y; + dst2[uv_idx + 1] = outColor1.z; } -#define SPCW (30*30) -__kernel void bilateral( __global uchar* input_image, __global uchar* output_image) -{ - int x = get_global_id(0); - int y = get_global_id(1); - - float med1[7][7] = {1.000, 0.722, 0.556, 0.500, 0.556, 0.722, 1.000, - 0.722, 0.444, 0.278, 0.222, 0.278, 0.444, 0.722, - 0.556, 0.278, 0.111, 0.056, 0.111, 0.278, 0.556, - 0.500, 0.222, 0.056, 0.000, 0.056, 0.222, 0.500, - 0.556, 0.278, 0.111, 0.056, 0.111, 0.278, 0.556, - 0.722, 0.444, 0.278, 0.222, 0.278, 0.444, 0.722, - 1.000, 0.722, 0.556, 0.500, 0.556, 0.722, 1.000}; - - uchar m_pixel; - uchar16 lineA, lineB, lineC, lineD; - uchar8 tmp[7], lineP, rangeWeight0, out; - float8 rangeWeight1 = 0, rangeWeight2 = 0, spaceWeight = 0, weight = 0, sumWeight = 0; - float8 tmpPixel = 0, tmpOut = 0; - - lineA = vload16(0, input_image + (y - 3) * WIDTH + x * 8 - 3); - lineB = vload16(0, input_image + (y - 2) * WIDTH + x * 8 - 3); - lineC = vload16(0, input_image + (y - 1) * WIDTH + x * 8 - 3); - lineD = vload16(0, input_image + (y + 0) * WIDTH + x * 8 - 3); - - lineP = lineD.s3456789a; - tmp[0] = lineA.s01234567; - tmp[1] = lineA.s12345678; - tmp[2] = lineA.s23456789; - tmp[3] = lineA.s3456789a; - tmp[4] = lineA.s456789ab; - tmp[5] = lineA.s56789abc; - tmp[6] = lineA.s6789abcd; - for(int i = 0; i < 7; i++){ - rangeWeight0 = abs_diff(tmp[i], lineP); - rangeWeight1 = convert_float8(rangeWeight0); - rangeWeight2 = rangeWeight1 * rangeWeight1 / (SPCW * 2); - spaceWeight = (float8)med1[0][i]; - weight = exp(-(rangeWeight2 + spaceWeight)); - sumWeight += weight; - tmpPixel = convert_float8(tmp[i]); - tmpOut += tmpPixel * weight; - } - - tmp[0] = lineB.s01234567; - tmp[1] = lineB.s12345678; - tmp[2] = lineB.s23456789; - tmp[3] = lineB.s3456789a; - tmp[4] = lineB.s456789ab; - tmp[5] = lineB.s56789abc; - tmp[6] = lineB.s6789abcd; - for(int i = 0; i < 7; i++){ - rangeWeight0 = abs_diff(tmp[i], lineP); - rangeWeight1 = convert_float8(rangeWeight0); - rangeWeight2 = rangeWeight1 * rangeWeight1 / (SPCW * 2); - spaceWeight = (float8)med1[1][i]; - weight = exp(-(rangeWeight2 + spaceWeight)); - sumWeight += weight; - tmpPixel = convert_float8(tmp[i]); - tmpOut += tmpPixel * weight; - } - - tmp[0] = lineC.s01234567; - tmp[1] = lineC.s12345678; - tmp[2] = lineC.s23456789; - tmp[3] = lineC.s3456789a; - tmp[4] = lineC.s456789ab; - tmp[5] = lineC.s56789abc; - tmp[6] = lineC.s6789abcd; - for(int i = 0; i < 7; i++){ - rangeWeight0 = abs_diff(tmp[i], lineP); - rangeWeight1 = convert_float8(rangeWeight0); - rangeWeight2 = rangeWeight1 * rangeWeight1 / (SPCW * 2); - spaceWeight = (float8)med1[2][i]; - weight = exp(-(rangeWeight2 + spaceWeight)); - sumWeight += weight; - tmpPixel = convert_float8(tmp[i]); - tmpOut += tmpPixel * weight; - } - - tmp[0] = lineD.s01234567; - tmp[1] = lineD.s12345678; - tmp[2] = lineD.s23456789; - tmp[3] = lineD.s3456789a; - tmp[4] = lineD.s456789ab; - tmp[5] = lineD.s56789abc; - tmp[6] = lineD.s6789abcd; - for(int i = 0; i < 7; i++){ - rangeWeight0 = abs_diff(tmp[i], lineP); - rangeWeight1 = convert_float8(rangeWeight0); - rangeWeight2 = rangeWeight1 * rangeWeight1 / (SPCW * 2); - spaceWeight = (float8)med1[3][i]; - weight = exp(-(rangeWeight2 + spaceWeight)); - sumWeight += weight; - tmpPixel = convert_float8(tmp[i]); - tmpOut += tmpPixel * weight; - } - - lineA = vload16(0, input_image + (y + 1) * WIDTH + x * 8 - 3); - lineB = vload16(0, input_image + (y + 2) * WIDTH + x * 8 - 3); - lineC = vload16(0, input_image + (y + 3) * WIDTH + x * 8 - 3); - tmp[0] = lineA.s01234567; - tmp[1] = lineA.s12345678; - tmp[2] = lineA.s23456789; - tmp[3] = lineA.s3456789a; - tmp[4] = lineA.s456789ab; - tmp[5] = lineA.s56789abc; - tmp[6] = lineA.s6789abcd; - for(int i = 0; i < 7; i++){ - rangeWeight0 = abs_diff(tmp[i], lineP); - rangeWeight1 = convert_float8(rangeWeight0); - rangeWeight2 = rangeWeight1 * rangeWeight1 / (SPCW * 2); - spaceWeight = (float8)med1[4][i]; - weight = exp(-(rangeWeight2 + spaceWeight)); - sumWeight += weight; - tmpPixel = convert_float8(tmp[i]); - tmpOut += tmpPixel * weight; - } - - tmp[0] = lineB.s01234567; - tmp[1] = lineB.s12345678; - tmp[2] = lineB.s23456789; - tmp[3] = lineB.s3456789a; - tmp[4] = lineB.s456789ab; - tmp[5] = lineB.s56789abc; - tmp[6] = lineB.s6789abcd; - for(int i = 0; i < 7; i++){ - rangeWeight0 = abs_diff(tmp[i], lineP); - rangeWeight1 = convert_float8(rangeWeight0); - rangeWeight2 = rangeWeight1 * rangeWeight1 / (SPCW * 2); - spaceWeight = (float8)med1[5][i]; - weight = exp(-(rangeWeight2 + spaceWeight)); - sumWeight += weight; - tmpPixel = convert_float8(tmp[i]); - tmpOut += tmpPixel * weight; - } - - tmp[0] = lineC.s01234567; - tmp[1] = lineC.s12345678; - tmp[2] = lineC.s23456789; - tmp[3] = lineC.s3456789a; - tmp[4] = lineC.s456789ab; - tmp[5] = lineC.s56789abc; - tmp[6] = lineC.s6789abcd; - for(int i = 0; i < 7; i++){ - rangeWeight0 = abs_diff(tmp[i], lineP); - rangeWeight1 = convert_float8(rangeWeight0); - rangeWeight2 = rangeWeight1 * rangeWeight1 / (SPCW * 2); - spaceWeight = (float8)med1[6][i]; - weight = exp(-(rangeWeight2 + spaceWeight)); - sumWeight = sumWeight + weight; - tmpPixel = convert_float8(tmp[i]); - tmpOut = tmpOut + tmpPixel * weight; - } - tmpOut = tmpOut / sumWeight; - out = convert_uchar8(tmpOut); - vstore8(out, x, output_image + y * WIDTH); +#define SPCW (30 * 30) +__kernel void bilateral(__global uchar* input_image, __global uchar* output_image) +{ + int x = get_global_id(0); + int y = get_global_id(1); + if ((x < 3) || (x >= WIDTH / 8 - 3)) + { + return; + } + if ((y < 3) || (y >= HEIGHT - 3)) + { + return; + } + + float med1[7][7] = { + // clang-format off + 1.000, 0.722, 0.556, 0.500, 0.556, 0.722, 1.000, + 0.722, 0.444, 0.278, 0.222, 0.278, 0.444, 0.722, + 0.556, 0.278, 0.111, 0.056, 0.111, 0.278, 0.556, + 0.500, 0.222, 0.056, 0.000, 0.056, 0.222, 0.500, + 0.556, 0.278, 0.111, 0.056, 0.111, 0.278, 0.556, + 0.722, 0.444, 0.278, 0.222, 0.278, 0.444, 0.722, + 1.000, 0.722, 0.556, 0.500, 0.556, 0.722, 1.000 + // clang-format on + }; + + uchar m_pixel; + uchar16 lineA, lineB, lineC, lineD; + uchar8 tmp[7], lineP, rangeWeight0, out; + float8 rangeWeight1 = 0, rangeWeight2 = 0, spaceWeight = 0, weight = 0, sumWeight = 0; + float8 tmpPixel = 0, tmpOut = 0; + + lineA = vload16(0, input_image + (y - 3) * WIDTH + x * 8 - 3); + lineB = vload16(0, input_image + (y - 2) * WIDTH + x * 8 - 3); + lineC = vload16(0, input_image + (y - 1) * WIDTH + x * 8 - 3); + lineD = vload16(0, input_image + (y + 0) * WIDTH + x * 8 - 3); + + lineP = lineD.s3456789a; + tmp[0] = lineA.s01234567; + tmp[1] = lineA.s12345678; + tmp[2] = lineA.s23456789; + tmp[3] = lineA.s3456789a; + tmp[4] = lineA.s456789ab; + tmp[5] = lineA.s56789abc; + tmp[6] = lineA.s6789abcd; + for (int i = 0; i < 7; i++) + { + rangeWeight0 = abs_diff(tmp[i], lineP); + rangeWeight1 = convert_float8(rangeWeight0); + rangeWeight2 = rangeWeight1 * rangeWeight1 / (SPCW * 2); + spaceWeight = (float8)med1[0][i]; + weight = exp(-(rangeWeight2 + spaceWeight)); + sumWeight += weight; + tmpPixel = convert_float8(tmp[i]); + tmpOut += tmpPixel * weight; + } + + tmp[0] = lineB.s01234567; + tmp[1] = lineB.s12345678; + tmp[2] = lineB.s23456789; + tmp[3] = lineB.s3456789a; + tmp[4] = lineB.s456789ab; + tmp[5] = lineB.s56789abc; + tmp[6] = lineB.s6789abcd; + for (int i = 0; i < 7; i++) + { + rangeWeight0 = abs_diff(tmp[i], lineP); + rangeWeight1 = convert_float8(rangeWeight0); + rangeWeight2 = rangeWeight1 * rangeWeight1 / (SPCW * 2); + spaceWeight = (float8)med1[1][i]; + weight = exp(-(rangeWeight2 + spaceWeight)); + sumWeight += weight; + tmpPixel = convert_float8(tmp[i]); + tmpOut += tmpPixel * weight; + } + + tmp[0] = lineC.s01234567; + tmp[1] = lineC.s12345678; + tmp[2] = lineC.s23456789; + tmp[3] = lineC.s3456789a; + tmp[4] = lineC.s456789ab; + tmp[5] = lineC.s56789abc; + tmp[6] = lineC.s6789abcd; + for (int i = 0; i < 7; i++) + { + rangeWeight0 = abs_diff(tmp[i], lineP); + rangeWeight1 = convert_float8(rangeWeight0); + rangeWeight2 = rangeWeight1 * rangeWeight1 / (SPCW * 2); + spaceWeight = (float8)med1[2][i]; + weight = exp(-(rangeWeight2 + spaceWeight)); + sumWeight += weight; + tmpPixel = convert_float8(tmp[i]); + tmpOut += tmpPixel * weight; + } + + tmp[0] = lineD.s01234567; + tmp[1] = lineD.s12345678; + tmp[2] = lineD.s23456789; + tmp[3] = lineD.s3456789a; + tmp[4] = lineD.s456789ab; + tmp[5] = lineD.s56789abc; + tmp[6] = lineD.s6789abcd; + for (int i = 0; i < 7; i++) + { + rangeWeight0 = abs_diff(tmp[i], lineP); + rangeWeight1 = convert_float8(rangeWeight0); + rangeWeight2 = rangeWeight1 * rangeWeight1 / (SPCW * 2); + spaceWeight = (float8)med1[3][i]; + weight = exp(-(rangeWeight2 + spaceWeight)); + sumWeight += weight; + tmpPixel = convert_float8(tmp[i]); + tmpOut += tmpPixel * weight; + } + + lineA = vload16(0, input_image + (y + 1) * WIDTH + x * 8 - 3); + lineB = vload16(0, input_image + (y + 2) * WIDTH + x * 8 - 3); + lineC = vload16(0, input_image + (y + 3) * WIDTH + x * 8 - 3); + tmp[0] = lineA.s01234567; + tmp[1] = lineA.s12345678; + tmp[2] = lineA.s23456789; + tmp[3] = lineA.s3456789a; + tmp[4] = lineA.s456789ab; + tmp[5] = lineA.s56789abc; + tmp[6] = lineA.s6789abcd; + for (int i = 0; i < 7; i++) + { + rangeWeight0 = abs_diff(tmp[i], lineP); + rangeWeight1 = convert_float8(rangeWeight0); + rangeWeight2 = rangeWeight1 * rangeWeight1 / (SPCW * 2); + spaceWeight = (float8)med1[4][i]; + weight = exp(-(rangeWeight2 + spaceWeight)); + sumWeight += weight; + tmpPixel = convert_float8(tmp[i]); + tmpOut += tmpPixel * weight; + } + + tmp[0] = lineB.s01234567; + tmp[1] = lineB.s12345678; + tmp[2] = lineB.s23456789; + tmp[3] = lineB.s3456789a; + tmp[4] = lineB.s456789ab; + tmp[5] = lineB.s56789abc; + tmp[6] = lineB.s6789abcd; + for (int i = 0; i < 7; i++) + { + rangeWeight0 = abs_diff(tmp[i], lineP); + rangeWeight1 = convert_float8(rangeWeight0); + rangeWeight2 = rangeWeight1 * rangeWeight1 / (SPCW * 2); + spaceWeight = (float8)med1[5][i]; + weight = exp(-(rangeWeight2 + spaceWeight)); + sumWeight += weight; + tmpPixel = convert_float8(tmp[i]); + tmpOut += tmpPixel * weight; + } + + tmp[0] = lineC.s01234567; + tmp[1] = lineC.s12345678; + tmp[2] = lineC.s23456789; + tmp[3] = lineC.s3456789a; + tmp[4] = lineC.s456789ab; + tmp[5] = lineC.s56789abc; + tmp[6] = lineC.s6789abcd; + for (int i = 0; i < 7; i++) + { + rangeWeight0 = abs_diff(tmp[i], lineP); + rangeWeight1 = convert_float8(rangeWeight0); + rangeWeight2 = rangeWeight1 * rangeWeight1 / (SPCW * 2); + spaceWeight = (float8)med1[6][i]; + weight = exp(-(rangeWeight2 + spaceWeight)); + sumWeight = sumWeight + weight; + tmpPixel = convert_float8(tmp[i]); + tmpOut = tmpOut + tmpPixel * weight; + } + tmpOut = tmpOut / sumWeight; + out = convert_uchar8(tmpOut); + vstore8(out, x, output_image + y * WIDTH); } -__kernel void yuyv2rgba( __global const uchar * src1, __global const uchar * src2, __global uchar * dst) -{ - int i = get_global_id(0); - int j = get_global_id(1); - int y_idx = mad24(j, WIDTH, i); - int uv_idx = mad24(j, WIDTH * 2, i*2); - int dst_idx = mad24(j, WIDTH * 4, i*4); - unsigned int temp; - float4 outColor; - float4 out = (float4)( src1[y_idx], src2[uv_idx], src2[uv_idx + 1], 255.0); - - temp = (out.x -16)*1.164 + 1.596 * (out.z - 128); - outColor.z = (temp < 0 ? 0 : temp); - outColor.z = (temp > 255 ? 255 : temp); - - temp = (out.x - 16)*1.164 - 0.392 * (out.y - 128) - 0.813 * (out.z - 128); - outColor.y = (temp < 0 ? 0 : temp); - outColor.y = (temp > 255 ? 255 : temp); - - temp = (out.x - 16)*1.164 + 2.017 * (out.y - 128); - outColor.x = (temp < 0 ? 0 : temp); - outColor.x = (temp > 255 ? 255 : temp); - - outColor.w = 255.0; - - uchar4 outColor1= convert_uchar4(outColor); - vstore4(outColor1, 0, &dst[dst_idx]); - +__kernel void yuyv2rgba(__global const uchar* src1, __global const uchar* src2, __global uchar* dst) +{ + int i = get_global_id(0); + int j = get_global_id(1); + int y_idx = mad24(j, WIDTH, i); + int uv_idx = mad24(j, WIDTH * 2, i * 2); + int dst_idx = mad24(j, WIDTH * 4, i * 4); + unsigned int temp; + float4 outColor; + float4 out = (float4)(src1[y_idx], src2[uv_idx], src2[uv_idx + 1], 255.0); + + temp = (out.x - 16) * 1.164 + 1.596 * (out.z - 128); + outColor.z = (temp < 0 ? 0 : temp); + outColor.z = (temp > 255 ? 255 : temp); + + temp = (out.x - 16) * 1.164 - 0.392 * (out.y - 128) - 0.813 * (out.z - 128); + outColor.y = (temp < 0 ? 0 : temp); + outColor.y = (temp > 255 ? 255 : temp); + + temp = (out.x - 16) * 1.164 + 2.017 * (out.y - 128); + outColor.x = (temp < 0 ? 0 : temp); + outColor.x = (temp > 255 ? 255 : temp); + + outColor.w = 255.0; + + uchar4 outColor1 = convert_uchar4(outColor); + vstore4(outColor1, 0, &dst[dst_idx]); } diff --git a/DemoApps/OpenCL/SoftISP/Fsl.gen b/DemoApps/OpenCL/SoftISP/Fsl.gen index a3d7f0e7e..0c0d33d56 100644 --- a/DemoApps/OpenCL/SoftISP/Fsl.gen +++ b/DemoApps/OpenCL/SoftISP/Fsl.gen @@ -1,8 +1,6 @@ - - diff --git a/DemoApps/OpenCL/SoftISP/source/OptionParser.cpp b/DemoApps/OpenCL/SoftISP/source/OptionParser.cpp index eeaa422be..0e36b7b99 100644 --- a/DemoApps/OpenCL/SoftISP/source/OptionParser.cpp +++ b/DemoApps/OpenCL/SoftISP/source/OptionParser.cpp @@ -61,14 +61,12 @@ namespace Fsl } - OptionParser::~OptionParser() - { - } + OptionParser::~OptionParser() = default; void OptionParser::OnArgumentSetup(std::deque