Skip to content

Commit

Permalink
arm: Use utxb rN, rM, ror #8 to implement zero_extract on armv6.
Browse files Browse the repository at this point in the history
Examining the code generated for the following C snippet on a
raspberry pi:

int popcount_lut8(unsigned *buf, int n)
{
  int cnt=0;
  unsigned int i;
  do {
    i = *buf;
    cnt += lut[i&255];
    cnt += lut[i>>8&255];
    cnt += lut[i>>16&255];
    cnt += lut[i>>24];
    buf++;
  } while(--n);
  return cnt;
}

I was surprised to see following instruction sequence generated by the
compiler:

  mov    r5, r2, lsr #8
  uxtb   r5, r5

This sequence can be performed by a single ARM instruction:

  uxtb   r5, r2, ror #8

The attached patch allows GCC's combine pass to take advantage of ARM's
uxtb with rotate functionality to implement the above zero_extract, and
likewise to use the sxtb with rotate to implement sign_extract.  ARM's
uxtb and sxtb can only be used with rotates of 0, 8, 16 and 24, and of
these only the 8 and 16 are useful [ror #0 is a nop, and extends with
ror #24 can be implemented using regular shifts],  so the approach here
is to add the six missing but useful instructions as 6 different
define_insn in arm.md, rather than try to be clever with new predicates.

Later ARM hardware has advanced bit field instructions, and earlier
ARM cores didn't support extend-with-rotate, so this appears to only
benefit armv6 era CPUs (e.g. the raspberry pi).

Patch posted:
https://gcc.gnu.org/legacy-ml/gcc-patches/2018-01/msg01339.html
Approved by Kyrill Tkachov:
https://gcc.gnu.org/legacy-ml/gcc-patches/2018-01/msg01881.html

2024-05-12  Roger Sayle  <roger@nextmovesoftware.com>
	    Kyrill Tkachov  <kyrylo.tkachov@foss.arm.com>

	* config/arm/arm.md (*arm_zeroextractsi2_8_8, *arm_signextractsi2_8_8,
	*arm_zeroextractsi2_8_16, *arm_signextractsi2_8_16,
	*arm_zeroextractsi2_16_8, *arm_signextractsi2_16_8): New.

2024-05-12  Roger Sayle  <roger@nextmovesoftware.com>
	    Kyrill Tkachov  <kyrylo.tkachov@foss.arm.com>

	* gcc.target/arm/extend-ror.c: New test.
  • Loading branch information
rogersayle committed May 12, 2024
1 parent 83fb5e6 commit 4607799
Show file tree
Hide file tree
Showing 2 changed files with 104 additions and 0 deletions.
66 changes: 66 additions & 0 deletions gcc/config/arm/arm.md
Original file line number Diff line number Diff line change
Expand Up @@ -12647,6 +12647,72 @@
""
)

;; Implement zero_extract using uxtb/uxth instruction with
;; the ror #N qualifier when applicable.

(define_insn "*arm_zeroextractsi2_8_8"
[(set (match_operand:SI 0 "s_register_operand" "=r")
(zero_extract:SI (match_operand:SI 1 "s_register_operand" "r")
(const_int 8) (const_int 8)))]
"TARGET_ARM && arm_arch6"
"uxtb%?\\t%0, %1, ror #8"
[(set_attr "predicable" "yes")
(set_attr "type" "extend")]
)

(define_insn "*arm_zeroextractsi2_8_16"
[(set (match_operand:SI 0 "s_register_operand" "=r")
(zero_extract:SI (match_operand:SI 1 "s_register_operand" "r")
(const_int 8) (const_int 16)))]
"TARGET_ARM && arm_arch6"
"uxtb%?\\t%0, %1, ror #16"
[(set_attr "predicable" "yes")
(set_attr "type" "extend")]
)

(define_insn "*arm_zeroextractsi2_16_8"
[(set (match_operand:SI 0 "s_register_operand" "=r")
(zero_extract:SI (match_operand:SI 1 "s_register_operand" "r")
(const_int 16) (const_int 8)))]
"TARGET_ARM && arm_arch6"
"uxth%?\\t%0, %1, ror #8"
[(set_attr "predicable" "yes")
(set_attr "type" "extend")]
)

;; Implement sign_extract using sxtb/sxth instruction with
;; the ror #N qualifier when applicable.

(define_insn "*arm_signextractsi2_8_8"
[(set (match_operand:SI 0 "s_register_operand" "=r")
(sign_extract:SI (match_operand:SI 1 "s_register_operand" "r")
(const_int 8) (const_int 8)))]
"TARGET_ARM && arm_arch6"
"sxtb%?\\t%0, %1, ror #8"
[(set_attr "predicable" "yes")
(set_attr "type" "extend")]
)

(define_insn "*arm_signextractsi2_8_16"
[(set (match_operand:SI 0 "s_register_operand" "=r")
(sign_extract:SI (match_operand:SI 1 "s_register_operand" "r")
(const_int 8) (const_int 16)))]
"TARGET_ARM && arm_arch6"
"sxtb%?\\t%0, %1, ror #16"
[(set_attr "predicable" "yes")
(set_attr "type" "extend")]
)

(define_insn "*arm_signextractsi2_16_8"
[(set (match_operand:SI 0 "s_register_operand" "=r")
(sign_extract:SI (match_operand:SI 1 "s_register_operand" "r")
(const_int 16) (const_int 8)))]
"TARGET_ARM && arm_arch6"
"sxth%?\\t%0, %1, ror #8"
[(set_attr "predicable" "yes")
(set_attr "type" "extend")]
)

;; Patterns for LDRD/STRD in Thumb2 mode

(define_insn "*thumb2_ldrd"
Expand Down
38 changes: 38 additions & 0 deletions gcc/testsuite/gcc.target/arm/extend-ror.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
/* { dg-do compile } */
/* { dg-skip-if "avoid conflicting multilib options" { *-*-* } { "-march=*" } { "-march=armv6" } } */
/* { dg-require-effective-target arm_arm_ok } */
/* { dg-add-options arm_arch_v6 } */
/* { dg-additional-options "-O -marm" } */

unsigned int zeroextractsi2_8_8(unsigned int x)
{
return (unsigned char)(x>>8);
}

unsigned int zeroextractsi2_8_16(unsigned int x)
{
return (unsigned char)(x>>16);
}

unsigned int signextractsi2_8_8(unsigned int x)
{
return (int)(signed char)(x>>8);
}

unsigned int signextractsi2_8_16(unsigned int x)
{
return (int)(signed char)(x>>16);
}

unsigned int zeroextractsi2_16_8(unsigned int x)
{
return (unsigned short)(x>>8);
}

unsigned int signextractsi2_16_8(unsigned int x)
{
return (int)(short)(x>>8);
}

/* { dg-final { scan-assembler-times ", ror #8" 4 } } */
/* { dg-final { scan-assembler-times ", ror #16" 2 } } */

0 comments on commit 4607799

Please sign in to comment.