Closed
Description
The float_control(precise, on)
no longer has an effect if a function is inlined. In the example below, adding the "noinline" attribute to a function changes the instruction used in the division (vdivss vs vrcpps).
$ cat inline.cpp
#pragma float_control(push)
#pragma float_control(precise, on)
float e(float a, float b)
{
return a / b;
}
#pragma float_control(pop)
struct c { float a, b, c, d; };
__attribute__((noinline)) c
b(float X, float Y)
{
return c {
2.0f * e(0.0f, X) - 1.0f,
1.0f - 2.0f * e(3.0f, Y),
2.0f * e(4.0f, X) - 1.0f,
1.0f - 2.0f * e(5.0f, Y)
};
}
int main()
{
c f = b(1.0f, 2.0f);
return (int) f.a + f.b + f.c + f.d;
}
$ diff -u -d inline.cpp noinline.cpp
--- inline.cpp 2024-02-23 17:26:53.041466900 -0800
+++ noinline.cpp 2024-02-23 17:27:16.320344400 -0800
@@ -1,5 +1,6 @@
#pragma float_control(push)
#pragma float_control(precise, on)
+__attribute__((noinline))
float e(float a, float b)
{
return a / b;
$ clang -S -O3 -ffast-math inline.cpp
$ clang -S -O3 -ffast-math noinline.cpp
$ cat inline.s
...
_Z1bff: # @_Z1bff
...
vmovss .LCPI1_2(%rip), %xmm3 # xmm3 = [-1.0E+0,0.0E+0,0.0E+0,0.0E+0]
vdivss %xmm1, %xmm2, %xmm2
vrcpps %xmm0, %xmm1 # <----------
vaddss .LCPI1_1(%rip), %xmm2, %xmm2
vmulps %xmm4, %xmm1, %xmm5
...
$ cat noinline.s
...
_Z1eff: # @_Z1eff
.cfi_startproc
# %bb.0: # %entry
vdivss %xmm1, %xmm0, %xmm0 # <----------
retq
...
$