Skip to content

Commit

Permalink
GPU Fix - multiply gpu (ROCm#39)
Browse files Browse the repository at this point in the history
* CMake

* multiply fix

* code cleanup
  • Loading branch information
hansely authored Feb 18, 2021
1 parent 698d731 commit a0aa933
Show file tree
Hide file tree
Showing 2 changed files with 27 additions and 27 deletions.
16 changes: 8 additions & 8 deletions amd_openvx/openvx/ago/ago_kernel_api.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8987,14 +8987,14 @@ int agoKernel_Mul_S16_S16S16_Wrap_Round(AgoNode * node, AgoKernelCommand cmd)
"void %s (S16x8 * p0, S16x8 p1, S16x8 p2, float p3)\n"
"{\n"
" S16x8 r;\n"
" r.s0 = ((int)convert_short_rte(p3 * (float)((((int)(p1.s0)) << 16) >> 16) * (float)((((int)(p2.s0)) << 16) >> 16)) & 0x0000ffff) ;\n"
" r.s0 |= ((int)convert_short_rte(p3 * (float)(((int)(p1.s0)) >> 16) * (float)(((int)(p2.s0)) >> 16)) ) << 16;\n"
" r.s1 = ((int)convert_short_rte(p3 * (float)((((int)(p1.s1)) << 16) >> 16) * (float)((((int)(p2.s1)) << 16) >> 16)) & 0x0000ffff);\n"
" r.s1 |= ((int)convert_short_rte(p3 * (float)(((int)(p1.s1)) >> 16) * (float)(((int)(p2.s1)) >> 16)) ) << 16;\n"
" r.s2 = ((int)convert_short_rte(p3 * (float)((((int)(p1.s2)) << 16) >> 16) * (float)((((int)(p2.s2)) << 16) >> 16)) & 0x0000ffff) ;\n"
" r.s2 |= ((int)convert_short_rte(p3 * (float)(((int)(p1.s2)) >> 16) * (float)(((int)(p2.s2)) >> 16)) ) << 16;\n"
" r.s3 = ((int)convert_short_rte(p3 * (float)((((int)(p1.s3)) << 16) >> 16) * (float)((((int)(p2.s3)) << 16) >> 16)) & 0x0000ffff) ;\n"
" r.s3 |= ((int)convert_short_rte(p3 * (float)(((int)(p1.s3)) >> 16) * (float)(((int)(p2.s3)) >> 16)) ) << 16;\n"
" r.s0 = ((int)convert_int_rte(p3 * (float)((((int)(p1.s0)) << 16) >> 16) * (float)((((int)(p2.s0)) << 16) >> 16)) & 0x0000ffff) ;\n"
" r.s0 |= ((int)convert_int_rte(p3 * (float)(((int)(p1.s0)) >> 16) * (float)(((int)(p2.s0)) >> 16)) ) << 16;\n"
" r.s1 = ((int)convert_int_rte(p3 * (float)((((int)(p1.s1)) << 16) >> 16) * (float)((((int)(p2.s1)) << 16) >> 16)) & 0x0000ffff);\n"
" r.s1 |= ((int)convert_int_rte(p3 * (float)(((int)(p1.s1)) >> 16) * (float)(((int)(p2.s1)) >> 16)) ) << 16;\n"
" r.s2 = ((int)convert_int_rte(p3 * (float)((((int)(p1.s2)) << 16) >> 16) * (float)((((int)(p2.s2)) << 16) >> 16)) & 0x0000ffff) ;\n"
" r.s2 |= ((int)convert_int_rte(p3 * (float)(((int)(p1.s2)) >> 16) * (float)(((int)(p2.s2)) >> 16)) ) << 16;\n"
" r.s3 = ((int)convert_int_rte(p3 * (float)((((int)(p1.s3)) << 16) >> 16) * (float)((((int)(p2.s3)) << 16) >> 16)) & 0x0000ffff) ;\n"
" r.s3 |= ((int)convert_int_rte(p3 * (float)(((int)(p1.s3)) >> 16) * (float)(((int)(p2.s3)) >> 16)) ) << 16;\n"
" *p0 = r;\n"
"}\n"
), node->opencl_name);
Expand Down
38 changes: 19 additions & 19 deletions amd_openvx/openvx/ago/ago_util_opencl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1738,25 +1738,25 @@ int agoGpuOclSuperNodeFinalize(AgoGraph * graph, AgoSuperNode * supernode)
" r |= (p1.s1 >> 17) & 128;\n"
" *p0 = r;\n"
"}\n"
"void Convert_U8_S16 (U8x8 *p0, S16X8 p1)\n"
"{\n"
" U8x8 r;\n"
" p1.s0 = (~p1.s0) + 1; \n"
" p1.s1 = (~p1.s1) + 1; \n"
" p1.s2 = (~p1.s2) + 1; \n"
" p1.s3 = (~p1.s3) + 1; \n"
" r = p1.s0 & 0x000000ff;\n"
" r |= (p1.s0 >> 15) & 0x0000ff00;\n"
" r |= (p1.s1) & 0x000000ff;\n"
" r |= (p1.s1 >> 15) & 0x0000ff00;\n"
" p0.s0 = r;\n"
" U8x8 q;\n"
" q = p1.s2 & 0x000000ff;\n"
" q |= (p1.s2 >> 15) & 0x0000ff00;\n"
" q |= (p1.s3) & 0x000000ff;\n"
" q |= (p1.s3 >> 15) & 0x0000ff00;\n"
" p0.s1 = q;\n"
"}\n"
// "void Convert_U8_S16 (U8x8 *p0, S16X8 p1)\n"
// "{\n"
// " U8x8 r;\n"
// " p1.s0 = (~p1.s0) + 1; \n"
// " p1.s1 = (~p1.s1) + 1; \n"
// " p1.s2 = (~p1.s2) + 1; \n"
// " p1.s3 = (~p1.s3) + 1; \n"
// " r = p1.s0 & 0x000000ff;\n"
// " r |= (p1.s0 >> 15) & 0x0000ff00;\n"
// " r |= (p1.s1) & 0x000000ff;\n"
// " r |= (p1.s1 >> 15) & 0x0000ff00;\n"
// " p0.s0 = r;\n"
// " U8x8 q;\n"
// " q = p1.s2 & 0x000000ff;\n"
// " q |= (p1.s2 >> 15) & 0x0000ff00;\n"
// " q |= (p1.s3) & 0x000000ff;\n"
// " q |= (p1.s3 >> 15) & 0x0000ff00;\n"
// " p0.s1 = q;\n"
// "}\n"
);
for (size_t index = 0; index < supernode->nodeList.size(); index++) {
// get node and set node name
Expand Down

0 comments on commit a0aa933

Please sign in to comment.