device-libs: Optimize odd/even integer checks in pow*

There are apparently some missing optimizations surrounding comparisons to the previous pseudo-enum. The compare of the conditional add of boolean compared to the constant 1 did not fold out. We would need to implement an optimization such as icmp eq (add (zext i1 x), (zext i1 y)), 1 => xor x, y which I filed here: llvm#64859 Just do this manually since it's more legible anyway. Saves 5 instructions for the f32 case. Change-Id: Iee7befb093561cf66b72a9df6b37d0cacb2154ee
ROCm · Aug 24, 2023 · ee84ee4 · ee84ee4
1 parent 47ec0c9
commit ee84ee4
Show file tree

Hide file tree

Showing 3 changed files with 46 additions and 52 deletions.
diff --git a/amd/device-libs/ocml/src/powD_base.h b/amd/device-libs/ocml/src/powD_base.h
@@ -21,18 +21,18 @@ samesign(double x, double y)
     return ((xh ^ yh) & 0x80000000U) == 0;
 }
 
-// Check if a double is an integral value, and whether it's even or
-// odd.
-//
-// status: 0=not integer, 1=odd, 2=even
-static int classify_integer(double ay)
+static bool is_integer(double ay)
 {
-    int inty = BUILTIN_TRUNC_F64(ay) == ay;
-    double half_ay = 0.5 * ay;
+    return BUILTIN_TRUNC_F64(ay) == ay;
+}
+
+static bool is_even_integer(double ay) {
+    // Even integers are still integers after division by 2.
+    return is_integer(0.5 * ay);
+}
 
-    // Even integers are still even after division by 2.
-    inty += inty & (BUILTIN_TRUNC_F64(half_ay) == half_ay);
-    return inty;
+static bool is_odd_integer(double ay) {
+    return is_integer(ay) && !is_even_integer(ay);
 }
 
 #if defined(COMPILING_POW)
@@ -44,19 +44,20 @@ MATH_MANGLE(pow)(double x, double y)
     double expylnx = MATH_PRIVATE(expep)(omul(y, MATH_PRIVATE(epln)(ax)));
 
     double ay = BUILTIN_ABS_F64(y);
-    int inty = classify_integer(ay);
-    double ret = BUILTIN_COPYSIGN_F64(expylnx, ((inty == 1) & (x < 0.0)) ? -0.0 : 0.0);
+    bool is_odd_y = is_odd_integer(ay);
+
+    double ret = BUILTIN_COPYSIGN_F64(expylnx, (is_odd_y & (x < 0.0)) ? -0.0 : 0.0);
 
     // Now all the edge cases
-    if (x < 0.0 && !inty)
+    if (x < 0.0 && !is_integer(ay))
         ret = QNAN_F64;
 
     if (BUILTIN_ISINF_F64(ay))
         ret = ax == 1.0 ? ax : (samesign(y, ax - 1.0) ? ay : 0.0);
 
     if (BUILTIN_ISINF_F64(ax) || x == 0.0)
         ret = BUILTIN_COPYSIGN_F64((x == 0.0) ^ (y < 0.0) ? 0.0 : PINF_F64,
-                                   inty == 1 ? x : 0.0);
+                                   is_odd_y ? x : 0.0);
 
     if (BUILTIN_ISUNORDERED_F64(x, y))
         ret = QNAN_F64;
@@ -77,9 +78,7 @@ MATH_MANGLE(powr)(double x, double y)
     double expylnx = MATH_PRIVATE(expep)(omul(y, MATH_PRIVATE(epln)(ax)));
 
     double ay = BUILTIN_ABS_F64(y);
-    int inty = classify_integer(ay);
-
-    double ret = BUILTIN_COPYSIGN_F64(expylnx, ((inty == 1) & (x < 0.0)) ? -0.0 : 0.0);
+    double ret = BUILTIN_COPYSIGN_F64(expylnx, (is_odd_integer(ay) & (x < 0.0)) ? -0.0 : 0.0);
 
     // Now all the edge cases
     double iz = y < 0.0 ? PINF_F64 : 0.0;

diff --git a/amd/device-libs/ocml/src/powF_base.h b/amd/device-libs/ocml/src/powF_base.h
@@ -55,18 +55,18 @@ static float compute_expylnx_float(float ax, float y)
     return MATH_PRIVATE(expep)(omul(y, MATH_PRIVATE(epln)(ax)));
 }
 
-// Check if a float is an integral value, and whether it's even or
-// odd.
-//
-// status: 0=not integer, 1=odd, 2=even
-static int classify_integer(float ay)
+static bool is_integer(float ay)
 {
-    int inty = BUILTIN_TRUNC_F32(ay) == ay;
-    float half_ay = 0.5f * ay;
+    return BUILTIN_TRUNC_F32(ay) == ay;
+}
+
+static bool is_even_integer(float ay) {
+    // Even integers are still integers after division by 2.
+    return is_integer(0.5f * ay);
+}
 
-    // Even integers are still even after division by 2.
-    inty += inty & (BUILTIN_TRUNC_F32(half_ay) == half_ay);
-    return inty;
+static bool is_odd_integer(float ay) {
+    return is_integer(ay) && !is_even_integer(ay);
 }
 
 #if defined(COMPILING_POW)
@@ -78,20 +78,20 @@ MATH_MANGLE(pow)(float x, float y)
     float expylnx = compute_expylnx_float(ax, y);
 
     float ay = BUILTIN_ABS_F32(y);
-    int inty = classify_integer(ay);
+    bool is_odd_y = is_odd_integer(ay);
 
-    float ret = BUILTIN_COPYSIGN_F32(expylnx, ((inty == 1) & (x < 0.0f)) ? -0.0f : 0.0f);
+    float ret = BUILTIN_COPYSIGN_F32(expylnx, (is_odd_y & (x < 0.0f)) ? -0.0f : 0.0f);
 
     // Now all the edge cases
-    if (x < 0.0f && !inty)
+    if (x < 0.0f && !is_integer(ay))
         ret = QNAN_F32;
 
     if (BUILTIN_ISINF_F32(ay))
         ret = ax == 1.0f ? ax : (samesign(y, ax - 1.0f) ? ay : 0.0f);
 
     if (BUILTIN_ISINF_F32(ax) || x == 0.0f)
         ret = BUILTIN_COPYSIGN_F32((x == 0.0f) ^ (y < 0.0f) ? 0.0f : PINF_F32,
-                                   inty == 1 ? x : 0.0f);
+                                   is_odd_y ? x : 0.0f);
 
     if (BUILTIN_ISUNORDERED_F32(x, y))
         ret = QNAN_F32;
@@ -111,9 +111,7 @@ MATH_MANGLE(powr)(float x, float y)
     float expylnx = compute_expylnx_float(ax, y);
 
     float ay = BUILTIN_ABS_F32(y);
-    int inty = classify_integer(ay);
-
-    float ret = BUILTIN_COPYSIGN_F32(expylnx, ((inty == 1) & (x < 0.0f)) ? -0.0f : 0.0f);
+    float ret = BUILTIN_COPYSIGN_F32(expylnx, (is_odd_integer(ay) & (x < 0.0f)) ? -0.0f : 0.0f);
 
     // Now all the edge cases
     float iz = y < 0.0f ? PINF_F32 : 0.0f;

diff --git a/amd/device-libs/ocml/src/powH_base.h b/amd/device-libs/ocml/src/powH_base.h
@@ -16,18 +16,18 @@ static float compute_expylnx_f16(half ax, half y)
     return BUILTIN_AMDGPU_EXP2_F32((float)y * BUILTIN_AMDGPU_LOG2_F32((float)ax));
 }
 
-// Check if a half is an integral value, and whether it's even or
-// odd.
-//
-// status: 0=not integer, 1=odd, 2=even
-static int classify_integer(half ay)
+static bool is_integer(half ay)
 {
-    bool inty = BUILTIN_TRUNC_F16(ay) == ay;
-    half half_ay = 0.5h * ay;
+    return BUILTIN_TRUNC_F16(ay) == ay;
+}
+
+static bool is_even_integer(half ay) {
+    // Even integers are still integers after division by 2.
+    return is_integer(0.5h * ay);
+}
 
-    // Even integers are still even after division by 2.
-    inty += inty & (BUILTIN_TRUNC_F16(half_ay) == half_ay);
-    return inty;
+static bool is_odd_integer(half ay) {
+    return is_integer(ay) && !is_even_integer(ay);
 }
 
 #if defined(COMPILING_POW)
@@ -39,20 +39,19 @@ MATH_MANGLE(pow)(half x, half y)
     float p = compute_expylnx_f16(ax, y);
 
     half ay = BUILTIN_ABS_F16(y);
-    int inty = classify_integer(ay);
-
-    half ret = BUILTIN_COPYSIGN_F16((half)p, ((inty == 1) & (x < 0.0h)) ? -0.0f : 0.0f);
+    bool is_odd_y = is_odd_integer(ay);
+    half ret = BUILTIN_COPYSIGN_F16((half)p, (is_odd_y & (x < 0.0h)) ? -0.0f : 0.0f);
 
     // Now all the edge cases
-    if (x < 0.0h && !inty)
+    if (x < 0.0h && !is_integer(ay))
         ret = QNAN_F16;
 
     if (BUILTIN_ISINF_F16(ay))
         ret = ax == 1.0h ? ax : (samesign(y, ax - 1.0h) ? ay : 0.0h);
 
     if (BUILTIN_ISINF_F16(ax) || x == 0.0h)
         ret = BUILTIN_COPYSIGN_F16((x == 0.0h) ^ (y < 0.0h) ? 0.0h : PINF_F16,
-                                   inty == 1 ? x : 0.0h);
+                                   is_odd_y ? x : 0.0h);
 
     if (BUILTIN_ISUNORDERED_F16(x, y))
         ret = QNAN_F16;
@@ -72,9 +71,7 @@ MATH_MANGLE(powr)(half x, half y)
     float p = compute_expylnx_f16(ax, y);
 
     half ay = BUILTIN_ABS_F16(y);
-    int inty = classify_integer(ay);
-
-    half ret = BUILTIN_COPYSIGN_F16((half)p, ((inty == 1) & (x < 0.0h)) ? -0.0f : 0.0f);
+    half ret = BUILTIN_COPYSIGN_F16((half)p, (is_odd_integer(ay) & (x < 0.0h)) ? -0.0f : 0.0f);
 
     // Now all the edge cases
     half iz = y < 0.0h ? PINF_F16 : 0.0h;