From ee84ee4272d91dc9b0ee17ee44fce11ddaa17b5f Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Sun, 20 Aug 2023 18:32:48 -0400 Subject: [PATCH] device-libs: Optimize odd/even integer checks in pow* There are apparently some missing optimizations surrounding comparisons to the previous pseudo-enum. The compare of the conditional add of boolean compared to the constant 1 did not fold out. We would need to implement an optimization such as icmp eq (add (zext i1 x), (zext i1 y)), 1 => xor x, y which I filed here: https://github.com/llvm/llvm-project/issues/64859 Just do this manually since it's more legible anyway. Saves 5 instructions for the f32 case. Change-Id: Iee7befb093561cf66b72a9df6b37d0cacb2154ee --- amd/device-libs/ocml/src/powD_base.h | 33 ++++++++++++++-------------- amd/device-libs/ocml/src/powF_base.h | 32 +++++++++++++-------------- amd/device-libs/ocml/src/powH_base.h | 33 +++++++++++++--------------- 3 files changed, 46 insertions(+), 52 deletions(-) diff --git a/amd/device-libs/ocml/src/powD_base.h b/amd/device-libs/ocml/src/powD_base.h index 263319461c120ca..1c63ac61fcd961b 100644 --- a/amd/device-libs/ocml/src/powD_base.h +++ b/amd/device-libs/ocml/src/powD_base.h @@ -21,18 +21,18 @@ samesign(double x, double y) return ((xh ^ yh) & 0x80000000U) == 0; } -// Check if a double is an integral value, and whether it's even or -// odd. -// -// status: 0=not integer, 1=odd, 2=even -static int classify_integer(double ay) +static bool is_integer(double ay) { - int inty = BUILTIN_TRUNC_F64(ay) == ay; - double half_ay = 0.5 * ay; + return BUILTIN_TRUNC_F64(ay) == ay; +} + +static bool is_even_integer(double ay) { + // Even integers are still integers after division by 2. + return is_integer(0.5 * ay); +} - // Even integers are still even after division by 2. - inty += inty & (BUILTIN_TRUNC_F64(half_ay) == half_ay); - return inty; +static bool is_odd_integer(double ay) { + return is_integer(ay) && !is_even_integer(ay); } #if defined(COMPILING_POW) @@ -44,11 +44,12 @@ MATH_MANGLE(pow)(double x, double y) double expylnx = MATH_PRIVATE(expep)(omul(y, MATH_PRIVATE(epln)(ax))); double ay = BUILTIN_ABS_F64(y); - int inty = classify_integer(ay); - double ret = BUILTIN_COPYSIGN_F64(expylnx, ((inty == 1) & (x < 0.0)) ? -0.0 : 0.0); + bool is_odd_y = is_odd_integer(ay); + + double ret = BUILTIN_COPYSIGN_F64(expylnx, (is_odd_y & (x < 0.0)) ? -0.0 : 0.0); // Now all the edge cases - if (x < 0.0 && !inty) + if (x < 0.0 && !is_integer(ay)) ret = QNAN_F64; if (BUILTIN_ISINF_F64(ay)) @@ -56,7 +57,7 @@ MATH_MANGLE(pow)(double x, double y) if (BUILTIN_ISINF_F64(ax) || x == 0.0) ret = BUILTIN_COPYSIGN_F64((x == 0.0) ^ (y < 0.0) ? 0.0 : PINF_F64, - inty == 1 ? x : 0.0); + is_odd_y ? x : 0.0); if (BUILTIN_ISUNORDERED_F64(x, y)) ret = QNAN_F64; @@ -77,9 +78,7 @@ MATH_MANGLE(powr)(double x, double y) double expylnx = MATH_PRIVATE(expep)(omul(y, MATH_PRIVATE(epln)(ax))); double ay = BUILTIN_ABS_F64(y); - int inty = classify_integer(ay); - - double ret = BUILTIN_COPYSIGN_F64(expylnx, ((inty == 1) & (x < 0.0)) ? -0.0 : 0.0); + double ret = BUILTIN_COPYSIGN_F64(expylnx, (is_odd_integer(ay) & (x < 0.0)) ? -0.0 : 0.0); // Now all the edge cases double iz = y < 0.0 ? PINF_F64 : 0.0; diff --git a/amd/device-libs/ocml/src/powF_base.h b/amd/device-libs/ocml/src/powF_base.h index 0d96128e12c0cab..3975db9ba2597b5 100644 --- a/amd/device-libs/ocml/src/powF_base.h +++ b/amd/device-libs/ocml/src/powF_base.h @@ -55,18 +55,18 @@ static float compute_expylnx_float(float ax, float y) return MATH_PRIVATE(expep)(omul(y, MATH_PRIVATE(epln)(ax))); } -// Check if a float is an integral value, and whether it's even or -// odd. -// -// status: 0=not integer, 1=odd, 2=even -static int classify_integer(float ay) +static bool is_integer(float ay) { - int inty = BUILTIN_TRUNC_F32(ay) == ay; - float half_ay = 0.5f * ay; + return BUILTIN_TRUNC_F32(ay) == ay; +} + +static bool is_even_integer(float ay) { + // Even integers are still integers after division by 2. + return is_integer(0.5f * ay); +} - // Even integers are still even after division by 2. - inty += inty & (BUILTIN_TRUNC_F32(half_ay) == half_ay); - return inty; +static bool is_odd_integer(float ay) { + return is_integer(ay) && !is_even_integer(ay); } #if defined(COMPILING_POW) @@ -78,12 +78,12 @@ MATH_MANGLE(pow)(float x, float y) float expylnx = compute_expylnx_float(ax, y); float ay = BUILTIN_ABS_F32(y); - int inty = classify_integer(ay); + bool is_odd_y = is_odd_integer(ay); - float ret = BUILTIN_COPYSIGN_F32(expylnx, ((inty == 1) & (x < 0.0f)) ? -0.0f : 0.0f); + float ret = BUILTIN_COPYSIGN_F32(expylnx, (is_odd_y & (x < 0.0f)) ? -0.0f : 0.0f); // Now all the edge cases - if (x < 0.0f && !inty) + if (x < 0.0f && !is_integer(ay)) ret = QNAN_F32; if (BUILTIN_ISINF_F32(ay)) @@ -91,7 +91,7 @@ MATH_MANGLE(pow)(float x, float y) if (BUILTIN_ISINF_F32(ax) || x == 0.0f) ret = BUILTIN_COPYSIGN_F32((x == 0.0f) ^ (y < 0.0f) ? 0.0f : PINF_F32, - inty == 1 ? x : 0.0f); + is_odd_y ? x : 0.0f); if (BUILTIN_ISUNORDERED_F32(x, y)) ret = QNAN_F32; @@ -111,9 +111,7 @@ MATH_MANGLE(powr)(float x, float y) float expylnx = compute_expylnx_float(ax, y); float ay = BUILTIN_ABS_F32(y); - int inty = classify_integer(ay); - - float ret = BUILTIN_COPYSIGN_F32(expylnx, ((inty == 1) & (x < 0.0f)) ? -0.0f : 0.0f); + float ret = BUILTIN_COPYSIGN_F32(expylnx, (is_odd_integer(ay) & (x < 0.0f)) ? -0.0f : 0.0f); // Now all the edge cases float iz = y < 0.0f ? PINF_F32 : 0.0f; diff --git a/amd/device-libs/ocml/src/powH_base.h b/amd/device-libs/ocml/src/powH_base.h index fb3a272d2b52962..e26bebfa21a0567 100644 --- a/amd/device-libs/ocml/src/powH_base.h +++ b/amd/device-libs/ocml/src/powH_base.h @@ -16,18 +16,18 @@ static float compute_expylnx_f16(half ax, half y) return BUILTIN_AMDGPU_EXP2_F32((float)y * BUILTIN_AMDGPU_LOG2_F32((float)ax)); } -// Check if a half is an integral value, and whether it's even or -// odd. -// -// status: 0=not integer, 1=odd, 2=even -static int classify_integer(half ay) +static bool is_integer(half ay) { - bool inty = BUILTIN_TRUNC_F16(ay) == ay; - half half_ay = 0.5h * ay; + return BUILTIN_TRUNC_F16(ay) == ay; +} + +static bool is_even_integer(half ay) { + // Even integers are still integers after division by 2. + return is_integer(0.5h * ay); +} - // Even integers are still even after division by 2. - inty += inty & (BUILTIN_TRUNC_F16(half_ay) == half_ay); - return inty; +static bool is_odd_integer(half ay) { + return is_integer(ay) && !is_even_integer(ay); } #if defined(COMPILING_POW) @@ -39,12 +39,11 @@ MATH_MANGLE(pow)(half x, half y) float p = compute_expylnx_f16(ax, y); half ay = BUILTIN_ABS_F16(y); - int inty = classify_integer(ay); - - half ret = BUILTIN_COPYSIGN_F16((half)p, ((inty == 1) & (x < 0.0h)) ? -0.0f : 0.0f); + bool is_odd_y = is_odd_integer(ay); + half ret = BUILTIN_COPYSIGN_F16((half)p, (is_odd_y & (x < 0.0h)) ? -0.0f : 0.0f); // Now all the edge cases - if (x < 0.0h && !inty) + if (x < 0.0h && !is_integer(ay)) ret = QNAN_F16; if (BUILTIN_ISINF_F16(ay)) @@ -52,7 +51,7 @@ MATH_MANGLE(pow)(half x, half y) if (BUILTIN_ISINF_F16(ax) || x == 0.0h) ret = BUILTIN_COPYSIGN_F16((x == 0.0h) ^ (y < 0.0h) ? 0.0h : PINF_F16, - inty == 1 ? x : 0.0h); + is_odd_y ? x : 0.0h); if (BUILTIN_ISUNORDERED_F16(x, y)) ret = QNAN_F16; @@ -72,9 +71,7 @@ MATH_MANGLE(powr)(half x, half y) float p = compute_expylnx_f16(ax, y); half ay = BUILTIN_ABS_F16(y); - int inty = classify_integer(ay); - - half ret = BUILTIN_COPYSIGN_F16((half)p, ((inty == 1) & (x < 0.0h)) ? -0.0f : 0.0f); + half ret = BUILTIN_COPYSIGN_F16((half)p, (is_odd_integer(ay) & (x < 0.0h)) ? -0.0f : 0.0f); // Now all the edge cases half iz = y < 0.0h ? PINF_F16 : 0.0h;