Skip to content

Commit

Permalink
OpenVX 1.3 - Laplacian Pyramid Node fix - GPU OpenCL (#636)
Browse files Browse the repository at this point in the history
* pad first & last row for hip kernel

* kernel optimization

* code cleanup

* initialize to 0
  • Loading branch information
hansely123 authored Oct 7, 2021
1 parent d3ca3fe commit cea5213
Show file tree
Hide file tree
Showing 3 changed files with 6 additions and 5 deletions.
3 changes: 1 addition & 2 deletions amd_openvx/openvx/ago/ago_haf_cpu_pyramid.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -403,5 +403,4 @@ int HafCpu_ScaleGaussianOrb_U8_U8_5x5
pDstImage += dstImageStrideInBytes;
}
return AGO_SUCCESS;
}

}
5 changes: 3 additions & 2 deletions amd_openvx/openvx/ago/ago_haf_gpu_special_filters.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1403,7 +1403,7 @@ int HafGpu_ScaleGaussianHalf(AgoNode * node)
int work_group_width = 16;
int work_group_height = 16;
int width = node->paramList[0]->u.img.width;
int height = node->paramList[0]->u.img.height;
int height = node->paramList[0]->u.img.height - 1;
int N = 0;
if (node->akernel->id == VX_KERNEL_AMD_SCALE_GAUSSIAN_HALF_U8_U8_3x3) {
N = 3;
Expand Down Expand Up @@ -1437,6 +1437,7 @@ int HafGpu_ScaleGaussianHalf(AgoNode * node)
" int gx = get_global_id(0);\n"
" int gy = get_global_id(1);\n"
" p0_buf += p0_offset + (gy * p0_stride) + (gx << 2);\n"
" *(__global uint *)p0_buf = 0;\n"
" int gstride = p1_stride;\n"
" __global uchar * gbuf = p1_buf + p1_offset + (((gy - ly) << 1) + 1) * gstride + ((gx - lx) << 3);\n"
" bool valid = ((gx < %d) && (gy < %d)) ? true : false;\n" // (width+3)/4, height
Expand Down Expand Up @@ -1540,7 +1541,7 @@ int HafGpu_ScaleGaussianHalf(AgoNode * node)
" L0.s01 = vload2(0, (__local uint *)&lbuf_ptr[%d]);\n" // LMemStride * 4
" sum.s0 += (float)(L0.s0 & 0xffff); sum.s1 += (float)(L0.s0 >> 16); sum.s2 += (float)(L0.s1 & 0xffff); sum.s3 += (float)(L0.s1 >> 16);\n"
" sum = sum * (float4)0.00390625f;\n"
" if (valid) {;\n"
" if (valid) {\n"
" *(__global uint *)p0_buf = amd_pack(sum);\n"
" }\n"
"}\n"
Expand Down
3 changes: 2 additions & 1 deletion amd_openvx/openvx/ago/ago_kernel_api.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15493,8 +15493,9 @@ int agoKernel_ScaleGaussianHalf_U8_U8_5x5(AgoNode * node, AgoKernelCommand cmd)
status = VX_SUCCESS;
AgoData * oImg = node->paramList[0];
AgoData * iImg = node->paramList[1];
hipMemset(oImg->hip_memory, 0, oImg->size + oImg->gpu_buffer_offset);
if (HipExec_ScaleGaussianHalf_U8_U8_5x5(
node->hip_stream0, oImg->u.img.width, oImg->u.img.height,
node->hip_stream0, oImg->u.img.width, oImg->u.img.height - 1,
oImg->hip_memory + oImg->gpu_buffer_offset,oImg->u.img.stride_in_bytes,
iImg->u.img.width, iImg->u.img.height,
iImg->hip_memory + iImg->gpu_buffer_offset, iImg->u.img.stride_in_bytes, iImg->size)) {
Expand Down

0 comments on commit cea5213

Please sign in to comment.