Skip to content

Commit

Permalink
arm: Replace arm_builtin_vectorized_function [PR106253]
Browse files Browse the repository at this point in the history
This patch extends the fix for PR106253 to AArch32.  As with AArch64,
we were using ACLE intrinsics to vectorise scalar built-ins, even
though the two sometimes have different ECF_* flags.  (That in turn
is because the ACLE intrinsics should follow the instruction semantics
as closely as possible, whereas the scalar built-ins follow language
specs.)

The patch also removes the copysignf built-in, which only existed
for this purpose and wasn't a “real” arm_neon.h built-in.

Doing this also has the side-effect of enabling vectorisation of
rint and roundeven.  Logically that should be a separate patch,
but making it one would have meant adding a new int iterator
for the original set of instructions and then removing it again
when including new functions.

I've restricted the bswap tests to little-endian because we end
up with excessive spilling on big-endian.  E.g.:

        sub     sp, sp, gcc-mirror#8
        vstr    d1, [sp]
        vldr    d16, [sp]
        vrev16.8        d16, d16
        vstr    d16, [sp]
        vldr    d0, [sp]
        add     sp, sp, gcc-mirror#8
        @ sp needed
        bx      lr

Similarly, the copysign tests require little-endian because on
big-endian we unnecessarily load the constant from the constant pool:

        vldr.32 s15, .L3
        vdup.32 d0, d7[1]
        vbsl    d0, d2, d1
        bx      lr
.L3:
        .word   -2147483648

gcc/
	PR target/106253
	* config/arm/arm-builtins.cc (arm_builtin_vectorized_function):
	Delete.
	* config/arm/arm-protos.h (arm_builtin_vectorized_function): Delete.
	* config/arm/arm.cc (TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION):
	Delete.
	* config/arm/arm_neon_builtins.def (copysignf): Delete.
	* config/arm/iterators.md (nvrint_pattern): New attribute.
	* config/arm/neon.md (<NEON_VRINT:nvrint_pattern><VCVTF:mode>2):
	New pattern.
	(l<NEON_VCVT:nvrint_pattern><su_optab><VCVTF:mode><v_cmp_result>2):
	Likewise.
	(neon_copysignf<mode>): Rename to...
	(copysign<mode>3): ...this.

gcc/testsuite/
	PR target/106253
	* gcc.target/arm/vect_unary_1.c: New test.
	* gcc.target/arm/vect_binary_1.c: Likewise.
  • Loading branch information
rsandifo-arm committed Jul 18, 2022
1 parent 9c8349e commit 7313381
Show file tree
Hide file tree
Showing 8 changed files with 297 additions and 130 deletions.
123 changes: 0 additions & 123 deletions gcc/config/arm/arm-builtins.cc
Original file line number Diff line number Diff line change
Expand Up @@ -4026,129 +4026,6 @@ arm_expand_builtin (tree exp,
return NULL_RTX;
}

tree
arm_builtin_vectorized_function (unsigned int fn, tree type_out, tree type_in)
{
machine_mode in_mode, out_mode;
int in_n, out_n;
bool out_unsigned_p = TYPE_UNSIGNED (type_out);

/* Can't provide any vectorized builtins when we can't use NEON. */
if (!TARGET_NEON)
return NULL_TREE;

if (TREE_CODE (type_out) != VECTOR_TYPE
|| TREE_CODE (type_in) != VECTOR_TYPE)
return NULL_TREE;

out_mode = TYPE_MODE (TREE_TYPE (type_out));
out_n = TYPE_VECTOR_SUBPARTS (type_out);
in_mode = TYPE_MODE (TREE_TYPE (type_in));
in_n = TYPE_VECTOR_SUBPARTS (type_in);

/* ARM_CHECK_BUILTIN_MODE and ARM_FIND_VRINT_VARIANT are used to find the
decl of the vectorized builtin for the appropriate vector mode.
NULL_TREE is returned if no such builtin is available. */
#undef ARM_CHECK_BUILTIN_MODE
#define ARM_CHECK_BUILTIN_MODE(C) \
(TARGET_VFP5 \
&& flag_unsafe_math_optimizations \
&& ARM_CHECK_BUILTIN_MODE_1 (C))

#undef ARM_CHECK_BUILTIN_MODE_1
#define ARM_CHECK_BUILTIN_MODE_1(C) \
(out_mode == SFmode && out_n == C \
&& in_mode == SFmode && in_n == C)

#undef ARM_FIND_VRINT_VARIANT
#define ARM_FIND_VRINT_VARIANT(N) \
(ARM_CHECK_BUILTIN_MODE (2) \
? arm_builtin_decl(ARM_BUILTIN_NEON_##N##v2sf, false) \
: (ARM_CHECK_BUILTIN_MODE (4) \
? arm_builtin_decl(ARM_BUILTIN_NEON_##N##v4sf, false) \
: NULL_TREE))

switch (fn)
{
CASE_CFN_FLOOR:
return ARM_FIND_VRINT_VARIANT (vrintm);
CASE_CFN_CEIL:
return ARM_FIND_VRINT_VARIANT (vrintp);
CASE_CFN_TRUNC:
return ARM_FIND_VRINT_VARIANT (vrintz);
CASE_CFN_ROUND:
return ARM_FIND_VRINT_VARIANT (vrinta);
#undef ARM_CHECK_BUILTIN_MODE_1
#define ARM_CHECK_BUILTIN_MODE_1(C) \
(out_mode == SImode && out_n == C \
&& in_mode == SFmode && in_n == C)

#define ARM_FIND_VCVT_VARIANT(N) \
(ARM_CHECK_BUILTIN_MODE (2) \
? arm_builtin_decl(ARM_BUILTIN_NEON_##N##v2sfv2si, false) \
: (ARM_CHECK_BUILTIN_MODE (4) \
? arm_builtin_decl(ARM_BUILTIN_NEON_##N##v4sfv4si, false) \
: NULL_TREE))

#define ARM_FIND_VCVTU_VARIANT(N) \
(ARM_CHECK_BUILTIN_MODE (2) \
? arm_builtin_decl(ARM_BUILTIN_NEON_##N##uv2sfv2si, false) \
: (ARM_CHECK_BUILTIN_MODE (4) \
? arm_builtin_decl(ARM_BUILTIN_NEON_##N##uv4sfv4si, false) \
: NULL_TREE))
CASE_CFN_LROUND:
return (out_unsigned_p
? ARM_FIND_VCVTU_VARIANT (vcvta)
: ARM_FIND_VCVT_VARIANT (vcvta));
CASE_CFN_LCEIL:
return (out_unsigned_p
? ARM_FIND_VCVTU_VARIANT (vcvtp)
: ARM_FIND_VCVT_VARIANT (vcvtp));
CASE_CFN_LFLOOR:
return (out_unsigned_p
? ARM_FIND_VCVTU_VARIANT (vcvtm)
: ARM_FIND_VCVT_VARIANT (vcvtm));
#undef ARM_CHECK_BUILTIN_MODE
#define ARM_CHECK_BUILTIN_MODE(C, N) \
(out_mode == N##mode && out_n == C \
&& in_mode == N##mode && in_n == C)
case CFN_BUILT_IN_BSWAP16:
if (ARM_CHECK_BUILTIN_MODE (4, HI))
return arm_builtin_decl (ARM_BUILTIN_NEON_bswapv4hi, false);
else if (ARM_CHECK_BUILTIN_MODE (8, HI))
return arm_builtin_decl (ARM_BUILTIN_NEON_bswapv8hi, false);
else
return NULL_TREE;
case CFN_BUILT_IN_BSWAP32:
if (ARM_CHECK_BUILTIN_MODE (2, SI))
return arm_builtin_decl (ARM_BUILTIN_NEON_bswapv2si, false);
else if (ARM_CHECK_BUILTIN_MODE (4, SI))
return arm_builtin_decl (ARM_BUILTIN_NEON_bswapv4si, false);
else
return NULL_TREE;
case CFN_BUILT_IN_BSWAP64:
if (ARM_CHECK_BUILTIN_MODE (2, DI))
return arm_builtin_decl (ARM_BUILTIN_NEON_bswapv2di, false);
else
return NULL_TREE;
CASE_CFN_COPYSIGN:
if (ARM_CHECK_BUILTIN_MODE (2, SF))
return arm_builtin_decl (ARM_BUILTIN_NEON_copysignfv2sf, false);
else if (ARM_CHECK_BUILTIN_MODE (4, SF))
return arm_builtin_decl (ARM_BUILTIN_NEON_copysignfv4sf, false);
else
return NULL_TREE;

default:
return NULL_TREE;
}
return NULL_TREE;
}
#undef ARM_FIND_VCVT_VARIANT
#undef ARM_FIND_VCVTU_VARIANT
#undef ARM_CHECK_BUILTIN_MODE
#undef ARM_FIND_VRINT_VARIANT

void
arm_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
{
Expand Down
1 change: 0 additions & 1 deletion gcc/config/arm/arm-protos.h
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,6 @@ extern void neon_pairwise_reduce (rtx, rtx, machine_mode,
rtx (*) (rtx, rtx, rtx));
extern rtx mve_bool_vec_to_const (rtx const_vec);
extern rtx neon_make_constant (rtx, bool generate = true);
extern tree arm_builtin_vectorized_function (unsigned int, tree, tree);
extern void neon_expand_vector_init (rtx, rtx);
extern void neon_lane_bounds (rtx, HOST_WIDE_INT, HOST_WIDE_INT, const_tree);
extern void arm_const_bounds (rtx, HOST_WIDE_INT, HOST_WIDE_INT);
Expand Down
4 changes: 0 additions & 4 deletions gcc/config/arm/arm.cc
Original file line number Diff line number Diff line change
Expand Up @@ -739,10 +739,6 @@ static const struct attribute_spec arm_attribute_table[] =
#undef TARGET_VECTORIZE_BUILTINS
#define TARGET_VECTORIZE_BUILTINS

#undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
#define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
arm_builtin_vectorized_function

#undef TARGET_VECTOR_ALIGNMENT
#define TARGET_VECTOR_ALIGNMENT arm_vector_alignment

Expand Down
1 change: 0 additions & 1 deletion gcc/config/arm/arm_neon_builtins.def
Original file line number Diff line number Diff line change
Expand Up @@ -264,7 +264,6 @@ VAR1 (UNOP, vcvtv4hf, v4sf)
VAR10 (TERNOP, vbsl,
v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di)
VAR2 (TERNOP, vbsl, v8hf, v4hf)
VAR2 (UNOP, copysignf, v2sf, v4sf)
VAR2 (UNOP, vrintn, v2sf, v4sf)
VAR2 (UNOP, vrinta, v2sf, v4sf)
VAR2 (UNOP, vrintp, v2sf, v4sf)
Expand Down
7 changes: 7 additions & 0 deletions gcc/config/arm/iterators.md
Original file line number Diff line number Diff line change
Expand Up @@ -1150,6 +1150,13 @@
(UNSPEC_VRINTA "unconditional") (UNSPEC_VRINTM "unconditional")
(UNSPEC_VRINTR "nocond") (UNSPEC_VRINTX "nocond")])

(define_int_attr nvrint_pattern [(UNSPEC_NVRINTZ "btrunc")
(UNSPEC_NVRINTP "ceil")
(UNSPEC_NVRINTA "round")
(UNSPEC_NVRINTM "floor")
(UNSPEC_NVRINTX "rint")
(UNSPEC_NVRINTN "roundeven")])

(define_int_attr nvrint_variant [(UNSPEC_NVRINTZ "z") (UNSPEC_NVRINTP "p")
(UNSPEC_NVRINTA "a") (UNSPEC_NVRINTM "m")
(UNSPEC_NVRINTX "x") (UNSPEC_NVRINTN "n")])
Expand Down
17 changes: 16 additions & 1 deletion gcc/config/arm/neon.md
Original file line number Diff line number Diff line change
Expand Up @@ -635,6 +635,13 @@
[(set_attr "type" "neon_fp_mla_s<q>")]
)

(define_expand "<NEON_VRINT:nvrint_pattern><VCVTF:mode>2"
[(set (match_operand:VCVTF 0 "s_register_operand")
(unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand")]
NEON_VRINT))]
"TARGET_NEON && TARGET_VFP5 && flag_unsafe_math_optimizations"
)

(define_insn "neon_vrint<NEON_VRINT:nvrint_variant><VCVTF:mode>"
[(set (match_operand:VCVTF 0 "s_register_operand" "=w")
(unspec:VCVTF [(match_operand:VCVTF 1
Expand All @@ -645,6 +652,14 @@
[(set_attr "type" "neon_fp_round_<V_elem_ch><q>")]
)

(define_expand "l<NEON_VCVT:nvrint_pattern><su_optab><VCVTF:mode><v_cmp_result>2"
[(set (match_operand:<V_cmp_result> 0 "register_operand")
(FIXUORS:<V_cmp_result>
(unspec:VCVTF [(match_operand:VCVTF 1 "register_operand")]
NEON_VCVT)))]
"TARGET_NEON && TARGET_VFP5 && flag_unsafe_math_optimizations"
)

(define_insn "neon_vcvt<NEON_VCVT:nvrint_variant><su_optab><VCVTF:mode><v_cmp_result>"
[(set (match_operand:<V_cmp_result> 0 "register_operand" "=w")
(FIXUORS:<V_cmp_result> (unspec:VCVTF
Expand Down Expand Up @@ -3059,7 +3074,7 @@
"TARGET_I8MM"
)

(define_expand "neon_copysignf<mode>"
(define_expand "copysign<mode>3"
[(match_operand:VCVTF 0 "register_operand")
(match_operand:VCVTF 1 "register_operand")
(match_operand:VCVTF 2 "register_operand")]
Expand Down
50 changes: 50 additions & 0 deletions gcc/testsuite/gcc.target/arm/vect_binary_1.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
/* { dg-do compile { target { arm*-*-* } } } */
/* { dg-require-effective-target arm_hard_ok } */
/* { dg-require-effective-target arm_v8_neon_ok } */
/* { dg-add-options arm_v8_neon } */
/* { dg-additional-options "-O3 -mfloat-abi=hard" } */
/* { dg-final { check-function-bodies "**" "" "" } } */

#include <stdint.h>

#define TEST2(OUT, NAME, IN) \
OUT __attribute__((vector_size(sizeof(OUT) * 2))) \
test2_##OUT##_##NAME##_##IN (float dummy, \
IN __attribute__((vector_size(sizeof(IN) * 2))) y, \
IN __attribute__((vector_size(sizeof(IN) * 2))) z) \
{ \
OUT __attribute__((vector_size(sizeof(OUT) * 2))) x; \
x[0] = __builtin_##NAME (y[0], z[0]); \
x[1] = __builtin_##NAME (y[1], z[1]); \
return x; \
}

#define TEST4(OUT, NAME, IN) \
OUT __attribute__((vector_size(sizeof(OUT) * 4))) \
test4_##OUT##_##NAME##_##IN (float dummy, \
IN __attribute__((vector_size(sizeof(OUT) * 4))) y, \
IN __attribute__((vector_size(sizeof(OUT) * 4))) z) \
{ \
OUT __attribute__((vector_size(sizeof(OUT) * 4))) x; \
x[0] = __builtin_##NAME (y[0], z[0]); \
x[1] = __builtin_##NAME (y[1], z[1]); \
x[2] = __builtin_##NAME (y[2], z[2]); \
x[3] = __builtin_##NAME (y[3], z[3]); \
return x; \
}

/*
** test2_float_copysignf_float: { target arm_little_endian }
** vmov.i32 d0, #(0x80000000|2147483648)(\s+.*)
** vbsl d0, d2, d1
** bx lr
*/
TEST2 (float, copysignf, float)

/*
** test4_float_copysignf_float: { target arm_little_endian }
** vmov.i32 q0, #(0x80000000|2147483648)(\s+.*)
** vbsl q0, q2, q1
** bx lr
*/
TEST4 (float, copysignf, float)
Loading

0 comments on commit 7313381

Please sign in to comment.