forked from gcc-mirror/gcc
-
Notifications
You must be signed in to change notification settings - Fork 3
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
arm: Stop vadcq, vsbcq intrinsics from overwriting the FPSCR NZ flags
Hi all, We noticed that calls to the vadcq and vsbcq intrinsics, both of which use __builtin_arm_set_fpscr_nzcvqc to set the Carry flag in the FPSCR, would produce the following code: ``` < r2 is the *carry input > vmrs r3, FPSCR_nzcvqc bic r3, r3, #536870912 orr r3, r3, r2, lsl gcc-mirror#29 vmsr FPSCR_nzcvqc, r3 ``` when the MVE ACLE instead gives a different instruction sequence of: ``` < Rt is the *carry input > VMRS Rs,FPSCR_nzcvqc BFI Rs,Rt,gcc-mirror#29,#1 VMSR FPSCR_nzcvqc,Rs ``` the bic + orr pair is slower and it's also wrong, because, if the *carry input is greater than 1, then we risk overwriting the top two bits of the FPSCR register (the N and Z flags). This turned out to be a problem in the header file and the solution was to simply add a `& 1x0u` to the `*carry` input: then the compiler knows that we only care about the lowest bit and can optimise to a BFI. Ok for trunk? Thanks, Stam Markianos-Wright gcc/ChangeLog: * config/arm/arm_mve.h (__arm_vadcq_s32): Fix arithmetic. (__arm_vadcq_u32): Likewise. (__arm_vadcq_m_s32): Likewise. (__arm_vadcq_m_u32): Likewise. (__arm_vsbcq_s32): Likewise. (__arm_vsbcq_u32): Likewise. (__arm_vsbcq_m_s32): Likewise. (__arm_vsbcq_m_u32): Likewise. * config/arm/mve.md (get_fpscr_nzcvqc): Make unspec_volatile. gcc/testsuite/ChangeLog: * gcc.target/arm/mve/mve_vadcq_vsbcq_fpscr_overwrite.c: New.
- Loading branch information
Showing
3 changed files
with
76 additions
and
9 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
67 changes: 67 additions & 0 deletions
67
gcc/testsuite/gcc.target/arm/mve/mve_vadcq_vsbcq_fpscr_overwrite.c
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,67 @@ | ||
/* { dg-do run } */ | ||
/* { dg-require-effective-target arm_mve_hw } */ | ||
/* { dg-options "-O2" } */ | ||
/* { dg-add-options arm_v8_1m_mve } */ | ||
|
||
#include <arm_mve.h> | ||
|
||
volatile int32x4_t c1; | ||
volatile uint32x4_t c2; | ||
int *carry; | ||
|
||
int | ||
main () | ||
{ | ||
int32x4_t a1 = vcreateq_s32 (0, 0); | ||
int32x4_t b1 = vcreateq_s32 (0, 0); | ||
int32x4_t inactive1 = vcreateq_s32 (0, 0); | ||
|
||
uint32x4_t a2 = vcreateq_u32 (0, 0); | ||
uint32x4_t b2 = vcreateq_u32 (0, 0); | ||
uint32x4_t inactive2 = vcreateq_u32 (0, 0); | ||
|
||
mve_pred16_t p = 0xFFFF; | ||
(*carry) = 0xFFFFFFFF; | ||
|
||
__builtin_arm_set_fpscr_nzcvqc (0); | ||
c1 = vadcq (a1, b1, carry); | ||
if (__builtin_arm_get_fpscr_nzcvqc () & !0x20000000) | ||
__builtin_abort (); | ||
(*carry) = 0xFFFFFFFF; | ||
__builtin_arm_set_fpscr_nzcvqc (0); | ||
c2 = vadcq (a2, b2, carry); | ||
if (__builtin_arm_get_fpscr_nzcvqc () & !0x20000000) | ||
__builtin_abort (); | ||
(*carry) = 0xFFFFFFFF; | ||
__builtin_arm_set_fpscr_nzcvqc (0); | ||
c1 = vsbcq (a1, b1, carry); | ||
if (__builtin_arm_get_fpscr_nzcvqc () & !0x20000000) | ||
__builtin_abort (); | ||
(*carry) = 0xFFFFFFFF; | ||
__builtin_arm_set_fpscr_nzcvqc (0); | ||
c2 = vsbcq (a2, b2, carry); | ||
if (__builtin_arm_get_fpscr_nzcvqc () & !0x20000000) | ||
__builtin_abort (); | ||
(*carry) = 0xFFFFFFFF; | ||
__builtin_arm_set_fpscr_nzcvqc (0); | ||
c1 = vadcq_m (inactive1, a1, b1, carry, p); | ||
if (__builtin_arm_get_fpscr_nzcvqc () & !0x20000000) | ||
__builtin_abort (); | ||
(*carry) = 0xFFFFFFFF; | ||
__builtin_arm_set_fpscr_nzcvqc (0); | ||
c2 = vadcq_m (inactive2, a2, b2, carry, p); | ||
if (__builtin_arm_get_fpscr_nzcvqc () & !0x20000000) | ||
__builtin_abort (); | ||
(*carry) = 0xFFFFFFFF; | ||
__builtin_arm_set_fpscr_nzcvqc (0); | ||
c1 = vsbcq_m (inactive1, a1, b1, carry, p); | ||
if (__builtin_arm_get_fpscr_nzcvqc () & !0x20000000) | ||
__builtin_abort (); | ||
(*carry) = 0xFFFFFFFF; | ||
__builtin_arm_set_fpscr_nzcvqc (0); | ||
c2 = vsbcq_m (inactive2, a2, b2, carry, p); | ||
if (__builtin_arm_get_fpscr_nzcvqc () & !0x20000000) | ||
__builtin_abort (); | ||
|
||
return 0; | ||
} |