forked from shobana-mcw/MIVisionX
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
static build option - statistical_operations enabled (ROCm#52)
* Mods for max/min/thresholding * Merge branch 'ar/master' into ar/hip_stat_opt * Remove fork CI
- Loading branch information
Showing
5 changed files
with
310 additions
and
122 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,61 +1,113 @@ | ||
#include <hip/hip_runtime.h> | ||
#define saturate_8u(value) ( (value) > 255 ? 255 : ((value) < 0 ? 0 : (value) )) | ||
extern "C" __global__ void max_hip( unsigned char* input1, | ||
unsigned char* input2, | ||
unsigned char* output, | ||
const unsigned int height, | ||
const unsigned int width, | ||
const unsigned int channel | ||
) | ||
|
||
#if defined(STATIC) | ||
#include "rpp_hip_host_decls.hpp" | ||
#endif | ||
|
||
#define saturate_8u(value) ((value) > 255 ? 255 : ((value) < 0 ? 0 : (value))) | ||
|
||
__device__ unsigned char max_formula(unsigned char input_pixel1, unsigned char input_pixel2) | ||
{ | ||
int id_x = hipBlockIdx_x *hipBlockDim_x + hipThreadIdx_x; | ||
int id_y = hipBlockIdx_y *hipBlockDim_y + hipThreadIdx_y; | ||
int id_z = hipBlockIdx_z *hipBlockDim_z + hipThreadIdx_z; | ||
if (id_x >= width || id_y >= height || id_z >= channel) return; | ||
return saturate_8u((input_pixel1 >= input_pixel2) ? input_pixel1 : input_pixel2); | ||
} | ||
|
||
extern "C" __global__ void max_hip(unsigned char *input1, | ||
unsigned char *input2, | ||
unsigned char *output, | ||
const unsigned int height, | ||
const unsigned int width, | ||
const unsigned int channel) | ||
{ | ||
int id_x = hipBlockIdx_x * hipBlockDim_x + hipThreadIdx_x; | ||
int id_y = hipBlockIdx_y * hipBlockDim_y + hipThreadIdx_y; | ||
int id_z = hipBlockIdx_z * hipBlockDim_z + hipThreadIdx_z; | ||
|
||
if (id_x >= width || id_y >= height || id_z >= channel) | ||
{ | ||
return; | ||
} | ||
|
||
int pixIdx = id_x + id_y * width + id_z * width * height; | ||
|
||
output[pixIdx] = (input1[pixIdx] > input2[pixIdx]) ? input2[pixIdx] : input1[pixIdx]; | ||
} | ||
|
||
__device__ unsigned char max_formula( unsigned char input_pixel1, unsigned char input_pixel2){ | ||
return saturate_8u((input_pixel1 >= input_pixel2) ? input_pixel1 : input_pixel2); | ||
} | ||
extern "C" __global__ void max_batch( unsigned char* input1, | ||
unsigned char* input2, | ||
unsigned char* output, | ||
int *xroi_begin, | ||
int *xroi_end, | ||
int *yroi_begin, | ||
int *yroi_end, | ||
|
||
extern "C" __global__ void max_batch(unsigned char *input1, | ||
unsigned char *input2, | ||
unsigned char *output, | ||
unsigned int *xroi_begin, | ||
unsigned int *xroi_end, | ||
unsigned int *yroi_begin, | ||
unsigned int *yroi_end, | ||
unsigned int *height, | ||
unsigned int *width, | ||
unsigned int *max_width, | ||
unsigned long *batch_index, | ||
const unsigned int channel, | ||
unsigned int *inc, // use width * height for pln and 1 for pkd | ||
const int plnpkdindex // use 1 pln 3 for pkd | ||
) | ||
unsigned long long *batch_index, | ||
const unsigned int channel, | ||
unsigned int *inc, // use width * height for pln and 1 for pkd | ||
const int plnpkdindex) // use 1 pln 3 for pkd | ||
{ | ||
int id_x = hipBlockIdx_x *hipBlockDim_x + hipThreadIdx_x, id_y = hipBlockIdx_y *hipBlockDim_y + hipThreadIdx_y, id_z = hipBlockIdx_z *hipBlockDim_z + hipThreadIdx_z; | ||
int id_x = hipBlockIdx_x * hipBlockDim_x + hipThreadIdx_x; | ||
int id_y = hipBlockIdx_y * hipBlockDim_y + hipThreadIdx_y; | ||
int id_z = hipBlockIdx_z * hipBlockDim_z + hipThreadIdx_z; | ||
|
||
unsigned char valuergb1, valuergb2; | ||
int indextmp=0; | ||
int indextmp = 0; | ||
long pixIdx = 0; | ||
|
||
pixIdx = batch_index[id_z] + (id_x + id_y * max_width[id_z] ) * plnpkdindex ; | ||
if((id_y >= yroi_begin[id_z] ) && (id_y <= yroi_end[id_z]) && (id_x >= xroi_begin[id_z]) && (id_x <= xroi_end[id_z])) | ||
{ | ||
for(indextmp = 0; indextmp < channel; indextmp++){ | ||
pixIdx = batch_index[id_z] + (id_x + id_y * max_width[id_z]) * plnpkdindex; | ||
|
||
if((id_y >= yroi_begin[id_z]) && (id_y <= yroi_end[id_z]) && (id_x >= xroi_begin[id_z]) && (id_x <= xroi_end[id_z])) | ||
{ | ||
for(indextmp = 0; indextmp < channel; indextmp++) | ||
{ | ||
valuergb1 = input1[pixIdx]; | ||
valuergb2 = input2[pixIdx]; | ||
output[pixIdx] = max_formula(valuergb1, valuergb2); | ||
pixIdx += inc[id_z]; | ||
} | ||
} | ||
else if((id_x < width[id_z] ) && (id_y < height[id_z])){ | ||
for(indextmp = 0; indextmp < channel; indextmp++){ | ||
output[pixIdx] = input1[pixIdx]; | ||
pixIdx += inc[id_z]; | ||
} | ||
else if((id_x < width[id_z]) && (id_y < height[id_z])) | ||
{ | ||
for(indextmp = 0; indextmp < channel; indextmp++) | ||
{ | ||
output[pixIdx] = input1[pixIdx]; | ||
pixIdx += inc[id_z]; | ||
} | ||
} | ||
} | ||
} | ||
|
||
#if defined(STATIC) | ||
RppStatus hip_exec_max_batch(Rpp8u *srcPtr1, Rpp8u *srcPtr2, Rpp8u *dstPtr, rpp::Handle& handle, RppiChnFormat chnFormat, Rpp32u channel, Rpp32s plnpkdind, Rpp32u max_height, Rpp32u max_width) | ||
{ | ||
int localThreads_x = 32; | ||
int localThreads_y = 32; | ||
int localThreads_z = 1; | ||
int globalThreads_x = (max_width + 31) & ~31; | ||
int globalThreads_y = (max_height + 31) & ~31; | ||
int globalThreads_z = handle.GetBatchSize(); | ||
|
||
hipLaunchKernelGGL(max_batch, | ||
dim3(ceil((float)globalThreads_x/localThreads_x), ceil((float)globalThreads_y/localThreads_y), ceil((float)globalThreads_z/localThreads_z)), | ||
dim3(localThreads_x, localThreads_y, localThreads_z), | ||
0, | ||
handle.GetStream(), | ||
srcPtr1, | ||
srcPtr2, | ||
dstPtr, | ||
handle.GetInitHandle()->mem.mgpu.roiPoints.x, | ||
handle.GetInitHandle()->mem.mgpu.roiPoints.roiWidth, | ||
handle.GetInitHandle()->mem.mgpu.roiPoints.y, | ||
handle.GetInitHandle()->mem.mgpu.roiPoints.roiHeight, | ||
handle.GetInitHandle()->mem.mgpu.srcSize.height, | ||
handle.GetInitHandle()->mem.mgpu.srcSize.width, | ||
handle.GetInitHandle()->mem.mgpu.maxSrcSize.width, | ||
handle.GetInitHandle()->mem.mgpu.srcBatchIndex, | ||
channel, | ||
handle.GetInitHandle()->mem.mgpu.inc, | ||
plnpkdind); | ||
|
||
return RPP_SUCCESS; | ||
} | ||
#endif |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,61 +1,112 @@ | ||
#include <hip/hip_runtime.h> | ||
#define saturate_8u(value) ( (value) > 255 ? 255 : ((value) < 0 ? 0 : (value) )) | ||
extern "C" __global__ void min_hip( unsigned char* input1, | ||
unsigned char* input2, | ||
unsigned char* output, | ||
const unsigned int height, | ||
const unsigned int width, | ||
const unsigned int channel | ||
) | ||
|
||
#if defined(STATIC) | ||
#include "rpp_hip_host_decls.hpp" | ||
#endif | ||
|
||
#define saturate_8u(value) ((value) > 255 ? 255 : ((value) < 0 ? 0 : (value))) | ||
|
||
__device__ unsigned char min_formula(unsigned char input_pixel1, unsigned char input_pixel2) | ||
{ | ||
int id_x = hipBlockIdx_x *hipBlockDim_x + hipThreadIdx_x; | ||
int id_y = hipBlockIdx_y *hipBlockDim_y + hipThreadIdx_y; | ||
int id_z = hipBlockIdx_z *hipBlockDim_z + hipThreadIdx_z; | ||
if (id_x >= width || id_y >= height || id_z >= channel) return; | ||
return saturate_8u((input_pixel1 <= input_pixel2) ? input_pixel1 : input_pixel2); | ||
} | ||
|
||
extern "C" __global__ void min_hip(unsigned char *input1, | ||
unsigned char *input2, | ||
unsigned char *output, | ||
const unsigned int height, | ||
const unsigned int width, | ||
const unsigned int channel) | ||
{ | ||
int id_x = hipBlockIdx_x * hipBlockDim_x + hipThreadIdx_x; | ||
int id_y = hipBlockIdx_y * hipBlockDim_y + hipThreadIdx_y; | ||
int id_z = hipBlockIdx_z * hipBlockDim_z + hipThreadIdx_z; | ||
|
||
if (id_x >= width || id_y >= height || id_z >= channel) | ||
{ | ||
return; | ||
} | ||
|
||
int pixIdx = id_x + id_y * width + id_z * width * height; | ||
|
||
output[pixIdx] = (input1[pixIdx] > input2[pixIdx]) ? input2[pixIdx] : input1[pixIdx]; | ||
} | ||
|
||
__device__ unsigned char min_formula( unsigned char input_pixel1, unsigned char input_pixel2){ | ||
return saturate_8u((input_pixel1 <= input_pixel2) ? input_pixel1 : input_pixel2); | ||
} | ||
extern "C" __global__ void min_batch( unsigned char* input1, | ||
unsigned char* input2, | ||
unsigned char* output, | ||
int *xroi_begin, | ||
int *xroi_end, | ||
int *yroi_begin, | ||
int *yroi_end, | ||
extern "C" __global__ void min_batch(unsigned char *input1, | ||
unsigned char *input2, | ||
unsigned char *output, | ||
unsigned int *xroi_begin, | ||
unsigned int *xroi_end, | ||
unsigned int *yroi_begin, | ||
unsigned int *yroi_end, | ||
unsigned int *height, | ||
unsigned int *width, | ||
unsigned int *max_width, | ||
unsigned long *batch_index, | ||
const unsigned int channel, | ||
unsigned int *inc, // use width * height for pln and 1 for pkd | ||
const int plnpkdindex // use 1 pln 3 for pkd | ||
) | ||
unsigned long long *batch_index, | ||
const unsigned int channel, | ||
unsigned int *inc, // use width * height for pln and 1 for pkd | ||
const int plnpkdindex) // use 1 pln 3 for pkd | ||
{ | ||
int id_x = hipBlockIdx_x *hipBlockDim_x + hipThreadIdx_x, id_y = hipBlockIdx_y *hipBlockDim_y + hipThreadIdx_y, id_z = hipBlockIdx_z *hipBlockDim_z + hipThreadIdx_z; | ||
int id_x = hipBlockIdx_x * hipBlockDim_x + hipThreadIdx_x; | ||
int id_y = hipBlockIdx_y * hipBlockDim_y + hipThreadIdx_y; | ||
int id_z = hipBlockIdx_z * hipBlockDim_z + hipThreadIdx_z; | ||
|
||
unsigned char valuergb1, valuergb2; | ||
int indextmp=0; | ||
int indextmp = 0; | ||
long pixIdx = 0; | ||
|
||
pixIdx = batch_index[id_z] + (id_x + id_y * max_width[id_z] ) * plnpkdindex ; | ||
pixIdx = batch_index[id_z] + (id_x + id_y * max_width[id_z]) * plnpkdindex; | ||
|
||
if((id_y >= yroi_begin[id_z] ) && (id_y <= yroi_end[id_z]) && (id_x >= xroi_begin[id_z]) && (id_x <= xroi_end[id_z])) | ||
{ | ||
for(indextmp = 0; indextmp < channel; indextmp++){ | ||
{ | ||
for(indextmp = 0; indextmp < channel; indextmp++) | ||
{ | ||
valuergb1 = input1[pixIdx]; | ||
valuergb2 = input2[pixIdx]; | ||
output[pixIdx] = min_formula(valuergb1, valuergb2); | ||
pixIdx += inc[id_z]; | ||
} | ||
} | ||
else if((id_x < width[id_z] ) && (id_y < height[id_z])){ | ||
for(indextmp = 0; indextmp < channel; indextmp++){ | ||
output[pixIdx] = input1[pixIdx]; | ||
pixIdx += inc[id_z]; | ||
} | ||
else if((id_x < width[id_z]) && (id_y < height[id_z])) | ||
{ | ||
for(indextmp = 0; indextmp < channel; indextmp++) | ||
{ | ||
output[pixIdx] = input1[pixIdx]; | ||
pixIdx += inc[id_z]; | ||
} | ||
} | ||
} | ||
} | ||
|
||
#if defined(STATIC) | ||
RppStatus hip_exec_min_batch(Rpp8u *srcPtr1, Rpp8u *srcPtr2, Rpp8u *dstPtr, rpp::Handle& handle, RppiChnFormat chnFormat, Rpp32u channel, Rpp32s plnpkdind, Rpp32u max_height, Rpp32u max_width) | ||
{ | ||
int localThreads_x = 32; | ||
int localThreads_y = 32; | ||
int localThreads_z = 1; | ||
int globalThreads_x = (max_width + 31) & ~31; | ||
int globalThreads_y = (max_height + 31) & ~31; | ||
int globalThreads_z = handle.GetBatchSize(); | ||
|
||
hipLaunchKernelGGL(min_batch, | ||
dim3(ceil((float)globalThreads_x/localThreads_x), ceil((float)globalThreads_y/localThreads_y), ceil((float)globalThreads_z/localThreads_z)), | ||
dim3(localThreads_x, localThreads_y, localThreads_z), | ||
0, | ||
handle.GetStream(), | ||
srcPtr1, | ||
srcPtr2, | ||
dstPtr, | ||
handle.GetInitHandle()->mem.mgpu.roiPoints.x, | ||
handle.GetInitHandle()->mem.mgpu.roiPoints.roiWidth, | ||
handle.GetInitHandle()->mem.mgpu.roiPoints.y, | ||
handle.GetInitHandle()->mem.mgpu.roiPoints.roiHeight, | ||
handle.GetInitHandle()->mem.mgpu.srcSize.height, | ||
handle.GetInitHandle()->mem.mgpu.srcSize.width, | ||
handle.GetInitHandle()->mem.mgpu.maxSrcSize.width, | ||
handle.GetInitHandle()->mem.mgpu.srcBatchIndex, | ||
channel, | ||
handle.GetInitHandle()->mem.mgpu.inc, | ||
plnpkdind); | ||
|
||
return RPP_SUCCESS; | ||
} | ||
#endif |
Oops, something went wrong.