Skip to content

Commit

Permalink
device-libs: Optimize odd/even integer checks in pow*
Browse files Browse the repository at this point in the history
There are apparently some missing optimizations surrounding
comparisons to the previous pseudo-enum. The compare of the
conditional add of boolean compared to the constant 1 did not fold
out.

We would need to implement an optimization such as
  icmp eq (add (zext i1 x), (zext i1 y)), 1 => xor x, y

which I filed here: llvm#64859

Just do this manually since it's more legible anyway. Saves 5
instructions for the f32 case.

Change-Id: Iee7befb093561cf66b72a9df6b37d0cacb2154ee
  • Loading branch information
arsenm authored and ronlieb committed Aug 24, 2023
1 parent 47ec0c9 commit ee84ee4
Show file tree
Hide file tree
Showing 3 changed files with 46 additions and 52 deletions.
33 changes: 16 additions & 17 deletions amd/device-libs/ocml/src/powD_base.h
Original file line number Diff line number Diff line change
Expand Up @@ -21,18 +21,18 @@ samesign(double x, double y)
return ((xh ^ yh) & 0x80000000U) == 0;
}

// Check if a double is an integral value, and whether it's even or
// odd.
//
// status: 0=not integer, 1=odd, 2=even
static int classify_integer(double ay)
static bool is_integer(double ay)
{
int inty = BUILTIN_TRUNC_F64(ay) == ay;
double half_ay = 0.5 * ay;
return BUILTIN_TRUNC_F64(ay) == ay;
}

static bool is_even_integer(double ay) {
// Even integers are still integers after division by 2.
return is_integer(0.5 * ay);
}

// Even integers are still even after division by 2.
inty += inty & (BUILTIN_TRUNC_F64(half_ay) == half_ay);
return inty;
static bool is_odd_integer(double ay) {
return is_integer(ay) && !is_even_integer(ay);
}

#if defined(COMPILING_POW)
Expand All @@ -44,19 +44,20 @@ MATH_MANGLE(pow)(double x, double y)
double expylnx = MATH_PRIVATE(expep)(omul(y, MATH_PRIVATE(epln)(ax)));

double ay = BUILTIN_ABS_F64(y);
int inty = classify_integer(ay);
double ret = BUILTIN_COPYSIGN_F64(expylnx, ((inty == 1) & (x < 0.0)) ? -0.0 : 0.0);
bool is_odd_y = is_odd_integer(ay);

double ret = BUILTIN_COPYSIGN_F64(expylnx, (is_odd_y & (x < 0.0)) ? -0.0 : 0.0);

// Now all the edge cases
if (x < 0.0 && !inty)
if (x < 0.0 && !is_integer(ay))
ret = QNAN_F64;

if (BUILTIN_ISINF_F64(ay))
ret = ax == 1.0 ? ax : (samesign(y, ax - 1.0) ? ay : 0.0);

if (BUILTIN_ISINF_F64(ax) || x == 0.0)
ret = BUILTIN_COPYSIGN_F64((x == 0.0) ^ (y < 0.0) ? 0.0 : PINF_F64,
inty == 1 ? x : 0.0);
is_odd_y ? x : 0.0);

if (BUILTIN_ISUNORDERED_F64(x, y))
ret = QNAN_F64;
Expand All @@ -77,9 +78,7 @@ MATH_MANGLE(powr)(double x, double y)
double expylnx = MATH_PRIVATE(expep)(omul(y, MATH_PRIVATE(epln)(ax)));

double ay = BUILTIN_ABS_F64(y);
int inty = classify_integer(ay);

double ret = BUILTIN_COPYSIGN_F64(expylnx, ((inty == 1) & (x < 0.0)) ? -0.0 : 0.0);
double ret = BUILTIN_COPYSIGN_F64(expylnx, (is_odd_integer(ay) & (x < 0.0)) ? -0.0 : 0.0);

// Now all the edge cases
double iz = y < 0.0 ? PINF_F64 : 0.0;
Expand Down
32 changes: 15 additions & 17 deletions amd/device-libs/ocml/src/powF_base.h
Original file line number Diff line number Diff line change
Expand Up @@ -55,18 +55,18 @@ static float compute_expylnx_float(float ax, float y)
return MATH_PRIVATE(expep)(omul(y, MATH_PRIVATE(epln)(ax)));
}

// Check if a float is an integral value, and whether it's even or
// odd.
//
// status: 0=not integer, 1=odd, 2=even
static int classify_integer(float ay)
static bool is_integer(float ay)
{
int inty = BUILTIN_TRUNC_F32(ay) == ay;
float half_ay = 0.5f * ay;
return BUILTIN_TRUNC_F32(ay) == ay;
}

static bool is_even_integer(float ay) {
// Even integers are still integers after division by 2.
return is_integer(0.5f * ay);
}

// Even integers are still even after division by 2.
inty += inty & (BUILTIN_TRUNC_F32(half_ay) == half_ay);
return inty;
static bool is_odd_integer(float ay) {
return is_integer(ay) && !is_even_integer(ay);
}

#if defined(COMPILING_POW)
Expand All @@ -78,20 +78,20 @@ MATH_MANGLE(pow)(float x, float y)
float expylnx = compute_expylnx_float(ax, y);

float ay = BUILTIN_ABS_F32(y);
int inty = classify_integer(ay);
bool is_odd_y = is_odd_integer(ay);

float ret = BUILTIN_COPYSIGN_F32(expylnx, ((inty == 1) & (x < 0.0f)) ? -0.0f : 0.0f);
float ret = BUILTIN_COPYSIGN_F32(expylnx, (is_odd_y & (x < 0.0f)) ? -0.0f : 0.0f);

// Now all the edge cases
if (x < 0.0f && !inty)
if (x < 0.0f && !is_integer(ay))
ret = QNAN_F32;

if (BUILTIN_ISINF_F32(ay))
ret = ax == 1.0f ? ax : (samesign(y, ax - 1.0f) ? ay : 0.0f);

if (BUILTIN_ISINF_F32(ax) || x == 0.0f)
ret = BUILTIN_COPYSIGN_F32((x == 0.0f) ^ (y < 0.0f) ? 0.0f : PINF_F32,
inty == 1 ? x : 0.0f);
is_odd_y ? x : 0.0f);

if (BUILTIN_ISUNORDERED_F32(x, y))
ret = QNAN_F32;
Expand All @@ -111,9 +111,7 @@ MATH_MANGLE(powr)(float x, float y)
float expylnx = compute_expylnx_float(ax, y);

float ay = BUILTIN_ABS_F32(y);
int inty = classify_integer(ay);

float ret = BUILTIN_COPYSIGN_F32(expylnx, ((inty == 1) & (x < 0.0f)) ? -0.0f : 0.0f);
float ret = BUILTIN_COPYSIGN_F32(expylnx, (is_odd_integer(ay) & (x < 0.0f)) ? -0.0f : 0.0f);

// Now all the edge cases
float iz = y < 0.0f ? PINF_F32 : 0.0f;
Expand Down
33 changes: 15 additions & 18 deletions amd/device-libs/ocml/src/powH_base.h
Original file line number Diff line number Diff line change
Expand Up @@ -16,18 +16,18 @@ static float compute_expylnx_f16(half ax, half y)
return BUILTIN_AMDGPU_EXP2_F32((float)y * BUILTIN_AMDGPU_LOG2_F32((float)ax));
}

// Check if a half is an integral value, and whether it's even or
// odd.
//
// status: 0=not integer, 1=odd, 2=even
static int classify_integer(half ay)
static bool is_integer(half ay)
{
bool inty = BUILTIN_TRUNC_F16(ay) == ay;
half half_ay = 0.5h * ay;
return BUILTIN_TRUNC_F16(ay) == ay;
}

static bool is_even_integer(half ay) {
// Even integers are still integers after division by 2.
return is_integer(0.5h * ay);
}

// Even integers are still even after division by 2.
inty += inty & (BUILTIN_TRUNC_F16(half_ay) == half_ay);
return inty;
static bool is_odd_integer(half ay) {
return is_integer(ay) && !is_even_integer(ay);
}

#if defined(COMPILING_POW)
Expand All @@ -39,20 +39,19 @@ MATH_MANGLE(pow)(half x, half y)
float p = compute_expylnx_f16(ax, y);

half ay = BUILTIN_ABS_F16(y);
int inty = classify_integer(ay);

half ret = BUILTIN_COPYSIGN_F16((half)p, ((inty == 1) & (x < 0.0h)) ? -0.0f : 0.0f);
bool is_odd_y = is_odd_integer(ay);
half ret = BUILTIN_COPYSIGN_F16((half)p, (is_odd_y & (x < 0.0h)) ? -0.0f : 0.0f);

// Now all the edge cases
if (x < 0.0h && !inty)
if (x < 0.0h && !is_integer(ay))
ret = QNAN_F16;

if (BUILTIN_ISINF_F16(ay))
ret = ax == 1.0h ? ax : (samesign(y, ax - 1.0h) ? ay : 0.0h);

if (BUILTIN_ISINF_F16(ax) || x == 0.0h)
ret = BUILTIN_COPYSIGN_F16((x == 0.0h) ^ (y < 0.0h) ? 0.0h : PINF_F16,
inty == 1 ? x : 0.0h);
is_odd_y ? x : 0.0h);

if (BUILTIN_ISUNORDERED_F16(x, y))
ret = QNAN_F16;
Expand All @@ -72,9 +71,7 @@ MATH_MANGLE(powr)(half x, half y)
float p = compute_expylnx_f16(ax, y);

half ay = BUILTIN_ABS_F16(y);
int inty = classify_integer(ay);

half ret = BUILTIN_COPYSIGN_F16((half)p, ((inty == 1) & (x < 0.0h)) ? -0.0f : 0.0f);
half ret = BUILTIN_COPYSIGN_F16((half)p, (is_odd_integer(ay) & (x < 0.0h)) ? -0.0f : 0.0f);

// Now all the edge cases
half iz = y < 0.0h ? PINF_F16 : 0.0h;
Expand Down

0 comments on commit ee84ee4

Please sign in to comment.