diff --git a/amd_openvx/openvx/ago/ago_kernel_api.cpp b/amd_openvx/openvx/ago/ago_kernel_api.cpp index a6b035c482..dc076e0f94 100644 --- a/amd_openvx/openvx/ago/ago_kernel_api.cpp +++ b/amd_openvx/openvx/ago/ago_kernel_api.cpp @@ -18142,7 +18142,7 @@ int agoKernel_NonMaxSupp_XY_ANY_3x3(AgoNode * node, AgoKernelCommand cmd) AgoData * oList = node->paramList[0]; AgoData * iImg = node->paramList[1]; if (HipExec_NonMaxSupp_XY_ANY_3x3( - node->hip_stream0, (vx_uint32)oList->u.arr.capacity, (ago_keypoint_xys_t *)(oList->hip_memory + oList->gpu_buffer_offset), + node->hip_stream0, (vx_uint32)oList->u.arr.capacity, oList->hip_memory, oList->gpu_buffer_offset, iImg->u.img.width, iImg->u.img.height, (vx_float32 *)(iImg->hip_memory + iImg->gpu_buffer_offset), iImg->u.img.stride_in_bytes)) { status = VX_FAILURE; @@ -19116,7 +19116,7 @@ int agoKernel_WarpAffine_U8_U8_Nearest_Constant(AgoNode * node, AgoKernelCommand iImg->u.img.width, iImg->u.img.height, iImg->hip_memory + iImg->gpu_buffer_offset, iImg->u.img.stride_in_bytes, (ago_affine_matrix_t *)(iMat->hip_memory + iMat->gpu_buffer_offset), - node->paramList[3]->u.scalar.u.u)) { + node->paramList[3]->u.scalar.u.u, iImg->u.img.rect_valid)) { status = VX_FAILURE; } } diff --git a/amd_openvx/openvx/hipvx/geometric_kernels.cpp b/amd_openvx/openvx/hipvx/geometric_kernels.cpp index 72a6be726a..e4f650b702 100644 --- a/amd_openvx/openvx/hipvx/geometric_kernels.cpp +++ b/amd_openvx/openvx/hipvx/geometric_kernels.cpp @@ -838,7 +838,7 @@ __global__ void __attribute__((visibility("default"))) Hip_WarpAffine_U8_U8_Nearest_Constant(uint dstWidth, uint dstHeight, uchar *pDstImage, uint dstImageStrideInBytes, const uchar *pSrcImage, uint srcImageStrideInBytes, - d_affine_matrix_t *affineMatrix, uint borderValue) { + d_affine_matrix_t *affineMatrix, uint borderValue, vx_rectangle_t rect_valid) { int x = (hipBlockDim_x * hipBlockIdx_x + hipThreadIdx_x) * 8; int y = hipBlockDim_y * hipBlockIdx_y + hipThreadIdx_y; @@ -859,12 +859,17 @@ Hip_WarpAffine_U8_U8_Nearest_Constant(uint dstWidth, uint dstHeight, sy = fmaf(dy, affineMatrix->m[1][1], affineMatrix->m[2][1]); sy = fmaf(dx, affineMatrix->m[0][1], sy); + uint vl = rect_valid.start_x; + uint vr = rect_valid.end_x; + uint vt = rect_valid.start_y; + uint vb = rect_valid.end_y; + x = (uint)(int)sx; y = (uint)(int)sy; - dstWidth -= 2; - dstHeight -= 2; + dstWidth -= vl; + dstHeight -= vt; - mask = ((int)(x | (dstWidth - x) | y | (dstHeight - y))) >> 31; + mask = ((int)((x - vl) | (vr - 1 - x) | (y - vt) | (vb - 1 - y))) >> 31; mask = ~mask; x &= mask; y &= mask; @@ -876,7 +881,7 @@ Hip_WarpAffine_U8_U8_Nearest_Constant(uint dstWidth, uint dstHeight, sy += affineMatrix->m[0][1]; x = (uint)(int)sx; y = (uint)(int)sy; - mask = ((int)(x | (dstWidth - x) | y | (dstHeight - y))) >> 31; + mask = ((int)((x - vl) | (vr - 1 - x) | (y - vt) | (vb - 1 - y))) >> 31; mask = ~mask; x &= mask; y &= mask; @@ -888,7 +893,7 @@ Hip_WarpAffine_U8_U8_Nearest_Constant(uint dstWidth, uint dstHeight, sy += affineMatrix->m[0][1]; x = (uint)(int)sx; y = (uint)(int)sy; - mask = ((int)(x | (dstWidth - x) | y | (dstHeight - y))) >> 31; + mask = ((int)((x - vl) | (vr - 1 - x) | (y - vt) | (vb - 1 - y))) >> 31; mask = ~mask; x &= mask; y &= mask; @@ -900,7 +905,7 @@ Hip_WarpAffine_U8_U8_Nearest_Constant(uint dstWidth, uint dstHeight, sy += affineMatrix->m[0][1]; x = (uint)(int)sx; y = (uint)(int)sy; - mask = ((int)(x | (dstWidth - x) | y | (dstHeight - y))) >> 31; + mask = ((int)((x - vl) | (vr - 1 - x) | (y - vt) | (vb - 1 - y))) >> 31; mask = ~mask; x &= mask; y &= mask; @@ -913,7 +918,7 @@ Hip_WarpAffine_U8_U8_Nearest_Constant(uint dstWidth, uint dstHeight, x = (uint)(int)sx; y = (uint)(int)sy; - mask = ((int)(x | (dstWidth - x) | y | (dstHeight - y))) >> 31; + mask = ((int)((x - vl) | (vr - 1 - x) | (y - vt) | (vb - 1 - y))) >> 31; mask = ~mask; x &= mask; y &= mask; @@ -925,7 +930,7 @@ Hip_WarpAffine_U8_U8_Nearest_Constant(uint dstWidth, uint dstHeight, sy += affineMatrix->m[0][1]; x = (uint)(int)sx; y = (uint)(int)sy; - mask = ((int)(x | (dstWidth - x) | y | (dstHeight - y))) >> 31; + mask = ((int)((x - vl) | (vr - 1 - x) | (y - vt) | (vb - 1 - y))) >> 31; mask = ~mask; x &= mask; y &= mask; @@ -937,7 +942,7 @@ Hip_WarpAffine_U8_U8_Nearest_Constant(uint dstWidth, uint dstHeight, sy += affineMatrix->m[0][1]; x = (uint)(int)sx; y = (uint)(int)sy; - mask = ((int)(x | (dstWidth - x) | y | (dstHeight - y))) >> 31; + mask = ((int)((x - vl) | (vr - 1 - x) | (y - vt) | (vb - 1 - y))) >> 31; mask = ~mask; x &= mask; y &= mask; @@ -949,7 +954,7 @@ Hip_WarpAffine_U8_U8_Nearest_Constant(uint dstWidth, uint dstHeight, sy += affineMatrix->m[0][1]; x = (uint)(int)sx; y = (uint)(int)sy; - mask = ((int)(x | (dstWidth - x) | y | (dstHeight - y))) >> 31; + mask = ((int)((x - vl) | (vr - 1 - x) | (y - vt) | (vb - 1 - y))) >> 31; mask = ~mask; x &= mask; y &= mask; @@ -963,7 +968,7 @@ int HipExec_WarpAffine_U8_U8_Nearest_Constant(hipStream_t stream, vx_uint32 dstW vx_uint8 *pHipDstImage, vx_uint32 dstImageStrideInBytes, vx_uint32 srcWidth, vx_uint32 srcHeight, const vx_uint8 *pHipSrcImage, vx_uint32 srcImageStrideInBytes, - ago_affine_matrix_t *affineMatrix, vx_uint8 borderValue) { + ago_affine_matrix_t *affineMatrix, vx_uint8 borderValue, vx_rectangle_t rect_valid) { int localThreads_x = 16; int localThreads_y = 16; int globalThreads_x = (dstWidth + 7) >> 3; @@ -972,7 +977,7 @@ int HipExec_WarpAffine_U8_U8_Nearest_Constant(hipStream_t stream, vx_uint32 dstW hipLaunchKernelGGL(Hip_WarpAffine_U8_U8_Nearest_Constant, dim3(ceil((float)globalThreads_x/localThreads_x), ceil((float)globalThreads_y/localThreads_y)), dim3(localThreads_x, localThreads_y), 0, stream, dstWidth, dstHeight, (uchar *)pHipDstImage , dstImageStrideInBytes, (const uchar *)pHipSrcImage, srcImageStrideInBytes, - (d_affine_matrix_t *) affineMatrix, (uint) borderValue); + (d_affine_matrix_t *) affineMatrix, (uint) borderValue, rect_valid); return VX_SUCCESS; } diff --git a/amd_openvx/openvx/hipvx/hip_host_decls.h b/amd_openvx/openvx/hipvx/hip_host_decls.h index c91607c903..1e9da49689 100644 --- a/amd_openvx/openvx/hipvx/hip_host_decls.h +++ b/amd_openvx/openvx/hipvx/hip_host_decls.h @@ -758,7 +758,7 @@ int HipExec_WarpAffine_U8_U8_Nearest_Constant( vx_uint32 srcWidth, vx_uint32 srcHeight, const vx_uint8 *pHipSrcImage, vx_uint32 srcImageStrideInBytes, ago_affine_matrix_t *affineMatrix, - vx_uint8 borderValue); + vx_uint8 borderValue, vx_rectangle_t rect_valid); int HipExec_WarpAffine_U8_U8_Bilinear( hipStream_t stream, vx_uint32 dstWidth, vx_uint32 dstHeight, vx_uint8 *pHipDstImage, vx_uint32 dstImageStrideInBytes,