Skip to content

Commit

Permalink
arm: [MVE intrinsics] rework vddup vidup
Browse files Browse the repository at this point in the history
Implement vddup and vidup using the new MVE builtins framework.

We generate better code because we take advantage of the two outputs
produced by the v[id]dup instructions.

For instance, before:
	ldr	r3, [r0]
	sub	r2, r3, #8
	str	r2, [r0]
	mov	r2, r3
	vddup.u16	q3, r2, #1

now:
	ldr	r2, [r0]
	vddup.u16	q3, r2, #1
	str	r2, [r0]

2024-08-21  Christophe Lyon  <christophe.lyon@linaro.org>

	gcc/
	* config/arm/arm-mve-builtins-base.cc (class viddup_impl): New.
	(vddup): New.
	(vidup): New.
	* config/arm/arm-mve-builtins-base.def (vddupq): New.
	(vidupq): New.
	* config/arm/arm-mve-builtins-base.h (vddupq): New.
	(vidupq): New.
	* config/arm/arm_mve.h (vddupq_m): Delete.
	(vddupq_u8): Delete.
	(vddupq_u32): Delete.
	(vddupq_u16): Delete.
	(vidupq_m): Delete.
	(vidupq_u8): Delete.
	(vidupq_u32): Delete.
	(vidupq_u16): Delete.
	(vddupq_x_u8): Delete.
	(vddupq_x_u16): Delete.
	(vddupq_x_u32): Delete.
	(vidupq_x_u8): Delete.
	(vidupq_x_u16): Delete.
	(vidupq_x_u32): Delete.
	(vddupq_m_n_u8): Delete.
	(vddupq_m_n_u32): Delete.
	(vddupq_m_n_u16): Delete.
	(vddupq_m_wb_u8): Delete.
	(vddupq_m_wb_u16): Delete.
	(vddupq_m_wb_u32): Delete.
	(vddupq_n_u8): Delete.
	(vddupq_n_u32): Delete.
	(vddupq_n_u16): Delete.
	(vddupq_wb_u8): Delete.
	(vddupq_wb_u16): Delete.
	(vddupq_wb_u32): Delete.
	(vidupq_m_n_u8): Delete.
	(vidupq_m_n_u32): Delete.
	(vidupq_m_n_u16): Delete.
	(vidupq_m_wb_u8): Delete.
	(vidupq_m_wb_u16): Delete.
	(vidupq_m_wb_u32): Delete.
	(vidupq_n_u8): Delete.
	(vidupq_n_u32): Delete.
	(vidupq_n_u16): Delete.
	(vidupq_wb_u8): Delete.
	(vidupq_wb_u16): Delete.
	(vidupq_wb_u32): Delete.
	(vddupq_x_n_u8): Delete.
	(vddupq_x_n_u16): Delete.
	(vddupq_x_n_u32): Delete.
	(vddupq_x_wb_u8): Delete.
	(vddupq_x_wb_u16): Delete.
	(vddupq_x_wb_u32): Delete.
	(vidupq_x_n_u8): Delete.
	(vidupq_x_n_u16): Delete.
	(vidupq_x_n_u32): Delete.
	(vidupq_x_wb_u8): Delete.
	(vidupq_x_wb_u16): Delete.
	(vidupq_x_wb_u32): Delete.
	(__arm_vddupq_m_n_u8): Delete.
	(__arm_vddupq_m_n_u32): Delete.
	(__arm_vddupq_m_n_u16): Delete.
	(__arm_vddupq_m_wb_u8): Delete.
	(__arm_vddupq_m_wb_u16): Delete.
	(__arm_vddupq_m_wb_u32): Delete.
	(__arm_vddupq_n_u8): Delete.
	(__arm_vddupq_n_u32): Delete.
	(__arm_vddupq_n_u16): Delete.
	(__arm_vidupq_m_n_u8): Delete.
	(__arm_vidupq_m_n_u32): Delete.
	(__arm_vidupq_m_n_u16): Delete.
	(__arm_vidupq_n_u8): Delete.
	(__arm_vidupq_m_wb_u8): Delete.
	(__arm_vidupq_m_wb_u16): Delete.
	(__arm_vidupq_m_wb_u32): Delete.
	(__arm_vidupq_n_u32): Delete.
	(__arm_vidupq_n_u16): Delete.
	(__arm_vidupq_wb_u8): Delete.
	(__arm_vidupq_wb_u16): Delete.
	(__arm_vidupq_wb_u32): Delete.
	(__arm_vddupq_wb_u8): Delete.
	(__arm_vddupq_wb_u16): Delete.
	(__arm_vddupq_wb_u32): Delete.
	(__arm_vddupq_x_n_u8): Delete.
	(__arm_vddupq_x_n_u16): Delete.
	(__arm_vddupq_x_n_u32): Delete.
	(__arm_vddupq_x_wb_u8): Delete.
	(__arm_vddupq_x_wb_u16): Delete.
	(__arm_vddupq_x_wb_u32): Delete.
	(__arm_vidupq_x_n_u8): Delete.
	(__arm_vidupq_x_n_u16): Delete.
	(__arm_vidupq_x_n_u32): Delete.
	(__arm_vidupq_x_wb_u8): Delete.
	(__arm_vidupq_x_wb_u16): Delete.
	(__arm_vidupq_x_wb_u32): Delete.
	(__arm_vddupq_m): Delete.
	(__arm_vddupq_u8): Delete.
	(__arm_vddupq_u32): Delete.
	(__arm_vddupq_u16): Delete.
	(__arm_vidupq_m): Delete.
	(__arm_vidupq_u8): Delete.
	(__arm_vidupq_u32): Delete.
	(__arm_vidupq_u16): Delete.
	(__arm_vddupq_x_u8): Delete.
	(__arm_vddupq_x_u16): Delete.
	(__arm_vddupq_x_u32): Delete.
	(__arm_vidupq_x_u8): Delete.
	(__arm_vidupq_x_u16): Delete.
	(__arm_vidupq_x_u32): Delete.
  • Loading branch information
Christophe Lyon committed Oct 18, 2024
1 parent e38566a commit d7250b6
Show file tree
Hide file tree
Showing 4 changed files with 116 additions and 676 deletions.
112 changes: 112 additions & 0 deletions gcc/config/arm/arm-mve-builtins-base.cc
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
#include "basic-block.h"
#include "function.h"
#include "gimple.h"
#include "emit-rtl.h"
#include "arm-mve-builtins.h"
#include "arm-mve-builtins-shapes.h"
#include "arm-mve-builtins-base.h"
Expand Down Expand Up @@ -402,6 +403,115 @@ class vcvtxq_impl : public function_base
}
};

/* Map the vidup / vddup function directly to CODE (UNSPEC, M) where M is the
vector mode associated with type suffix 0. We need this special case
because in MODE_wb the builtins derefrence the first parameter and update
its contents. We also have to insert the two additional parameters needed
by the builtins compared to the intrinsics. */
class viddup_impl : public function_base
{
public:
CONSTEXPR viddup_impl (bool inc_dec)
: m_inc_dec (inc_dec)
{}

/* Increment (true) or decrement (false). */
bool m_inc_dec;

unsigned int
call_properties (const function_instance &fi) const override
{
if (fi.mode_suffix_id == MODE_wb)
return CP_WRITE_MEMORY | CP_READ_MEMORY;
else
return 0;
}

tree
memory_scalar_type (const function_instance &) const override
{
return get_typenode_from_name (UINT32_TYPE);
}

rtx
expand (function_expander &e) const override
{
machine_mode mode = e.vector_mode (0);
insn_code code;
rtx insns, offset_ptr;
rtx new_offset;
int offset_arg_no;
rtx incr, total_incr;

if (! e.type_suffix (0).integer_p)
gcc_unreachable ();

if ((e.mode_suffix_id != MODE_n)
&& (e.mode_suffix_id != MODE_wb))
gcc_unreachable ();

offset_arg_no = (e.pred == PRED_m) ? 1 : 0;

/* In _wb mode, the start offset is passed via a pointer,
dereference it. */
if (e.mode_suffix_id == MODE_wb)
{
rtx offset = gen_reg_rtx (SImode);
offset_ptr = e.args[offset_arg_no];
emit_insn (gen_rtx_SET (offset, gen_rtx_MEM (SImode, offset_ptr)));
e.args[offset_arg_no] = offset;
}

/* We have to shuffle parameters because the builtin needs additional
arguments:
- the updated "new_offset"
- total increment (incr * number of lanes) */
new_offset = gen_reg_rtx (SImode);
e.args.quick_insert (offset_arg_no, new_offset);

incr = e.args[offset_arg_no + 2];
total_incr = gen_int_mode (INTVAL (incr)
* GET_MODE_NUNITS (e.vector_mode (0)),
SImode);
e.args.quick_push (total_incr);

/* _wb mode uses the _n builtins and adds code to update the
offset. */
switch (e.pred)
{
case PRED_none:
/* No predicate. */
code = m_inc_dec
? code_for_mve_q_u_insn (VIDUPQ, mode)
: code_for_mve_q_u_insn (VDDUPQ, mode);
insns = e.use_exact_insn (code);
break;

case PRED_m:
case PRED_x:
/* "m" or "x" predicate. */
code = m_inc_dec
? code_for_mve_q_m_wb_u_insn (VIDUPQ_M, mode)
: code_for_mve_q_m_wb_u_insn (VDDUPQ_M, mode);

if (e.pred == PRED_m)
insns = e.use_cond_insn (code, 0);
else
insns = e.use_pred_x_insn (code);
break;

default:
gcc_unreachable ();
}

/* Update offset as appropriate. */
if (e.mode_suffix_id == MODE_wb)
emit_insn (gen_rtx_SET (gen_rtx_MEM (Pmode, offset_ptr), new_offset));

return insns;
}
};

} /* end anonymous namespace */

namespace arm_mve {
Expand Down Expand Up @@ -614,7 +724,9 @@ FUNCTION_WITHOUT_N_NO_F (vcvtmq, VCVTMQ)
FUNCTION_WITHOUT_N_NO_F (vcvtnq, VCVTNQ)
FUNCTION_WITHOUT_N_NO_F (vcvtpq, VCVTPQ)
FUNCTION (vcvttq, vcvtxq_impl, (VCVTTQ_F16_F32, VCVTTQ_M_F16_F32, VCVTTQ_F32_F16, VCVTTQ_M_F32_F16))
FUNCTION (vddupq, viddup_impl, (false))
FUNCTION (vdupq, vdupq_impl, (VDUPQ_M_N_S, VDUPQ_M_N_U, VDUPQ_M_N_F))
FUNCTION (vidupq, viddup_impl, (true))
FUNCTION_WITH_RTX_M (veorq, XOR, VEORQ)
FUNCTION (vfmaq, unspec_mve_function_exact_insn, (-1, -1, VFMAQ_F, -1, -1, VFMAQ_N_F, -1, -1, VFMAQ_M_F, -1, -1, VFMAQ_M_N_F))
FUNCTION (vfmasq, unspec_mve_function_exact_insn, (-1, -1, -1, -1, -1, VFMASQ_N_F, -1, -1, -1, -1, -1, VFMASQ_M_N_F))
Expand Down
2 changes: 2 additions & 0 deletions gcc/config/arm/arm-mve-builtins-base.def
Original file line number Diff line number Diff line change
Expand Up @@ -46,12 +46,14 @@ DEF_MVE_FUNCTION (vctp16q, vctp, none, m_or_none)
DEF_MVE_FUNCTION (vctp32q, vctp, none, m_or_none)
DEF_MVE_FUNCTION (vctp64q, vctp, none, m_or_none)
DEF_MVE_FUNCTION (vctp8q, vctp, none, m_or_none)
DEF_MVE_FUNCTION (vddupq, viddup, all_unsigned, mx_or_none)
DEF_MVE_FUNCTION (vdupq, unary_n, all_integer, mx_or_none)
DEF_MVE_FUNCTION (veorq, binary, all_integer, mx_or_none)
DEF_MVE_FUNCTION (vhaddq, binary_opt_n, all_integer, mx_or_none)
DEF_MVE_FUNCTION (vhcaddq_rot270, binary, all_signed, mx_or_none)
DEF_MVE_FUNCTION (vhcaddq_rot90, binary, all_signed, mx_or_none)
DEF_MVE_FUNCTION (vhsubq, binary_opt_n, all_integer, mx_or_none)
DEF_MVE_FUNCTION (vidupq, viddup, all_unsigned, mx_or_none)
DEF_MVE_FUNCTION (vld1q, load, all_integer, none)
DEF_MVE_FUNCTION (vmaxaq, binary_maxamina, all_signed, m_or_none)
DEF_MVE_FUNCTION (vmaxavq, binary_maxavminav, all_signed, p_or_none)
Expand Down
2 changes: 2 additions & 0 deletions gcc/config/arm/arm-mve-builtins-base.h
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,7 @@ extern const function_base *const vcvtnq;
extern const function_base *const vcvtpq;
extern const function_base *const vcvtq;
extern const function_base *const vcvttq;
extern const function_base *const vddupq;
extern const function_base *const vdupq;
extern const function_base *const veorq;
extern const function_base *const vfmaq;
Expand All @@ -75,6 +76,7 @@ extern const function_base *const vhaddq;
extern const function_base *const vhcaddq_rot270;
extern const function_base *const vhcaddq_rot90;
extern const function_base *const vhsubq;
extern const function_base *const vidupq;
extern const function_base *const vld1q;
extern const function_base *const vmaxaq;
extern const function_base *const vmaxavq;
Expand Down
Loading

0 comments on commit d7250b6

Please sign in to comment.