Skip to content

Commit

Permalink
[AArch64] Use INDEX for constant Neon step vectors (llvm#113424)
Browse files Browse the repository at this point in the history
When compiling for an SVE target we can use INDEX to generate constant
fixed-length step vectors, e.g.:
```
uint32x4_t foo() {
  return (uint32x4_t){0, 1, 2, 3};
}
```
Currently:
```
foo():
        adrp    x8, .LCPI1_0
        ldr     q0, [x8, :lo12:.LCPI1_0]
        ret
```
With INDEX:
```
foo():
        index   z0.s, #0, #1
        ret
```

The logic for this was already in `LowerBUILD_VECTOR`, though it was
hidden under a check for `!Subtarget->isNeonAvailable()`. This patch
refactors this to enable the corresponding code path unconditionally for
constant step vectors (as long as we can use SVE for them).
  • Loading branch information
rj-jesus authored Oct 23, 2024
1 parent 294726d commit 8a9921f
Show file tree
Hide file tree
Showing 3 changed files with 146 additions and 13 deletions.
4 changes: 3 additions & 1 deletion llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14520,7 +14520,9 @@ SDValue AArch64TargetLowering::LowerBUILD_VECTOR(SDValue Op,
SelectionDAG &DAG) const {
EVT VT = Op.getValueType();

if (useSVEForFixedLengthVectorVT(VT, !Subtarget->isNeonAvailable()))
bool OverrideNEON = !Subtarget->isNeonAvailable() ||
cast<BuildVectorSDNode>(Op)->isConstantSequence();
if (useSVEForFixedLengthVectorVT(VT, OverrideNEON))
return LowerFixedLengthBuildVectorToSVE(Op, DAG);

// Try to build a simple constant vector.
Expand Down
20 changes: 8 additions & 12 deletions llvm/test/CodeGen/AArch64/active_lane_mask.ll
Original file line number Diff line number Diff line change
Expand Up @@ -430,10 +430,9 @@ define <2 x i1> @lane_mask_v2i1_i64(i64 %index, i64 %TC) {
define <16 x i1> @lane_mask_v16i1_i8(i8 %index, i8 %TC) {
; CHECK-LABEL: lane_mask_v16i1_i8:
; CHECK: // %bb.0:
; CHECK-NEXT: adrp x8, .LCPI24_0
; CHECK-NEXT: dup v0.16b, w0
; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI24_0]
; CHECK-NEXT: uqadd v0.16b, v0.16b, v1.16b
; CHECK-NEXT: index z0.b, #0, #1
; CHECK-NEXT: dup v1.16b, w0
; CHECK-NEXT: uqadd v0.16b, v1.16b, v0.16b
; CHECK-NEXT: dup v1.16b, w1
; CHECK-NEXT: cmhi v0.16b, v1.16b, v0.16b
; CHECK-NEXT: ret
Expand All @@ -444,10 +443,9 @@ define <16 x i1> @lane_mask_v16i1_i8(i8 %index, i8 %TC) {
define <8 x i1> @lane_mask_v8i1_i8(i8 %index, i8 %TC) {
; CHECK-LABEL: lane_mask_v8i1_i8:
; CHECK: // %bb.0:
; CHECK-NEXT: dup v0.8b, w0
; CHECK-NEXT: adrp x8, .LCPI25_0
; CHECK-NEXT: ldr d1, [x8, :lo12:.LCPI25_0]
; CHECK-NEXT: uqadd v0.8b, v0.8b, v1.8b
; CHECK-NEXT: index z0.b, #0, #1
; CHECK-NEXT: dup v1.8b, w0
; CHECK-NEXT: uqadd v0.8b, v1.8b, v0.8b
; CHECK-NEXT: dup v1.8b, w1
; CHECK-NEXT: cmhi v0.8b, v1.8b, v0.8b
; CHECK-NEXT: ret
Expand All @@ -459,9 +457,8 @@ define <4 x i1> @lane_mask_v4i1_i8(i8 %index, i8 %TC) {
; CHECK-LABEL: lane_mask_v4i1_i8:
; CHECK: // %bb.0:
; CHECK-NEXT: dup v0.4h, w0
; CHECK-NEXT: adrp x8, .LCPI26_0
; CHECK-NEXT: index z1.h, #0, #1
; CHECK-NEXT: movi d2, #0xff00ff00ff00ff
; CHECK-NEXT: ldr d1, [x8, :lo12:.LCPI26_0]
; CHECK-NEXT: dup v3.4h, w1
; CHECK-NEXT: bic v0.4h, #255, lsl #8
; CHECK-NEXT: bic v3.4h, #255, lsl #8
Expand All @@ -478,8 +475,7 @@ define <2 x i1> @lane_mask_v2i1_i8(i8 %index, i8 %TC) {
; CHECK: // %bb.0:
; CHECK-NEXT: movi d0, #0x0000ff000000ff
; CHECK-NEXT: dup v1.2s, w0
; CHECK-NEXT: adrp x8, .LCPI27_0
; CHECK-NEXT: ldr d2, [x8, :lo12:.LCPI27_0]
; CHECK-NEXT: index z2.s, #0, #1
; CHECK-NEXT: dup v3.2s, w1
; CHECK-NEXT: and v1.8b, v1.8b, v0.8b
; CHECK-NEXT: add v1.2s, v1.2s, v2.2s
Expand Down
135 changes: 135 additions & 0 deletions llvm/test/CodeGen/AArch64/sve-index-const-step-vector.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,135 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s

; 128-bit vectors

define <16 x i8> @v16i8() #0 {
; CHECK-LABEL: v16i8:
; CHECK: // %bb.0:
; CHECK-NEXT: index z0.b, #0, #1
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
ret <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>
}

define <8 x i16> @v8i16() #0 {
; CHECK-LABEL: v8i16:
; CHECK: // %bb.0:
; CHECK-NEXT: index z0.h, #0, #1
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
ret <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>
}

define <4 x i32> @v4i32() #0 {
; CHECK-LABEL: v4i32:
; CHECK: // %bb.0:
; CHECK-NEXT: index z0.s, #0, #1
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
ret <4 x i32> <i32 0, i32 1, i32 2, i32 3>
}

define <2 x i64> @v2i64() #0 {
; CHECK-LABEL: v2i64:
; CHECK: // %bb.0:
; CHECK-NEXT: index z0.d, #0, #1
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
ret <2 x i64> <i64 0, i64 1>
}

; 64-bit vectors

define <8 x i8> @v8i8() #0 {
; CHECK-LABEL: v8i8:
; CHECK: // %bb.0:
; CHECK-NEXT: index z0.b, #0, #1
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
ret <8 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7>
}

define <4 x i16> @v4i16() #0 {
; CHECK-LABEL: v4i16:
; CHECK: // %bb.0:
; CHECK-NEXT: index z0.h, #0, #1
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
ret <4 x i16> <i16 0, i16 1, i16 2, i16 3>
}

define <2 x i32> @v2i32() #0 {
; CHECK-LABEL: v2i32:
; CHECK: // %bb.0:
; CHECK-NEXT: index z0.s, #0, #1
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
ret <2 x i32> <i32 0, i32 1>
}

; Positive test, non-zero start and non-unitary step.
; Note: This should be INDEX z0.s, #1, #2 (without the ORR).
define <4 x i32> @v4i32_non_zero_non_one() #0 {
; CHECK-LABEL: v4i32_non_zero_non_one:
; CHECK: // %bb.0:
; CHECK-NEXT: index z0.s, #0, #2
; CHECK-NEXT: orr z0.s, z0.s, #0x1
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
ret <4 x i32> <i32 1, i32 3, i32 5, i32 7>
}

; Positive test, same as above but negative immediates.
define <4 x i32> @v4i32_neg_immediates() #0 {
; CHECK-LABEL: v4i32_neg_immediates:
; CHECK: // %bb.0:
; CHECK-NEXT: index z0.s, #-1, #-2
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
ret <4 x i32> <i32 -1, i32 -3, i32 -5, i32 -7>
}

; Positive test, out of imm range start.
define <4 x i32> @v4i32_out_range_start() #0 {
; CHECK-LABEL: v4i32_out_range_start:
; CHECK: // %bb.0:
; CHECK-NEXT: index z0.s, #0, #1
; CHECK-NEXT: add z0.s, z0.s, #16 // =0x10
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
ret <4 x i32> <i32 16, i32 17, i32 18, i32 19>
}

; Positive test, out of imm range step.
define <4 x i32> @v4i32_out_range_step() #0 {
; CHECK-LABEL: v4i32_out_range_step:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w8, #16 // =0x10
; CHECK-NEXT: index z0.s, #0, w8
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
ret <4 x i32> <i32 0, i32 16, i32 32, i32 48>
}

; Positive test, out of imm range start and step.
define <4 x i32> @v4i32_out_range_start_step() #0 {
; CHECK-LABEL: v4i32_out_range_start_step:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w8, #16 // =0x10
; CHECK-NEXT: index z0.s, #0, w8
; CHECK-NEXT: add z0.s, z0.s, #16 // =0x10
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
ret <4 x i32> <i32 16, i32 32, i32 48, i32 64>
}

; Negative test, non sequential.
define <4 x i32> @v4i32_non_sequential() #0 {
; CHECK-LABEL: v4i32_non_sequential:
; CHECK: // %bb.0:
; CHECK-NEXT: adrp x8, .LCPI12_0
; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI12_0]
; CHECK-NEXT: ret
ret <4 x i32> <i32 0, i32 2, i32 2, i32 3>
}

0 comments on commit 8a9921f

Please sign in to comment.