@@ -671,4 +671,52 @@ entry:
671
671
ret void
672
672
}
673
673
674
+ ; Test with a 14 scalable bytes (so up to 14 * 16 = 224) of unprobed
675
+ ; are bellow the save location of `p9`.
676
+ define void @sve_unprobed_area (<vscale x 4 x float > %a , i32 %n ) #0 {
677
+ ; CHECK-LABEL: sve_unprobed_area:
678
+ ; CHECK: // %bb.0: // %entry
679
+ ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
680
+ ; CHECK-NEXT: .cfi_def_cfa_offset 16
681
+ ; CHECK-NEXT: .cfi_offset w29, -16
682
+ ; CHECK-NEXT: addvl sp, sp, #-4
683
+ ; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 32 * VG
684
+ ; CHECK-NEXT: str xzr, [sp]
685
+ ; CHECK-NEXT: str p9, [sp, #7, mul vl] // 2-byte Folded Spill
686
+ ; CHECK-NEXT: str z10, [sp, #1, mul vl] // 16-byte Folded Spill
687
+ ; CHECK-NEXT: str z9, [sp, #2, mul vl] // 16-byte Folded Spill
688
+ ; CHECK-NEXT: str z8, [sp, #3, mul vl] // 16-byte Folded Spill
689
+ ; CHECK-NEXT: .cfi_escape 0x10, 0x48, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x78, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d8 @ cfa - 16 - 8 * VG
690
+ ; CHECK-NEXT: .cfi_escape 0x10, 0x49, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x70, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d9 @ cfa - 16 - 16 * VG
691
+ ; CHECK-NEXT: .cfi_escape 0x10, 0x4a, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x68, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d10 @ cfa - 16 - 24 * VG
692
+ ; CHECK-NEXT: addvl sp, sp, #-4
693
+ ; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0xc0, 0x00, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 64 * VG
694
+ ; CHECK-NEXT: //APP
695
+ ; CHECK-NEXT: //NO_APP
696
+ ; CHECK-NEXT: addvl sp, sp, #4
697
+ ; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 32 * VG
698
+ ; CHECK-NEXT: ldr p9, [sp, #7, mul vl] // 2-byte Folded Reload
699
+ ; CHECK-NEXT: ldr z10, [sp, #1, mul vl] // 16-byte Folded Reload
700
+ ; CHECK-NEXT: ldr z9, [sp, #2, mul vl] // 16-byte Folded Reload
701
+ ; CHECK-NEXT: ldr z8, [sp, #3, mul vl] // 16-byte Folded Reload
702
+ ; CHECK-NEXT: addvl sp, sp, #4
703
+ ; CHECK-NEXT: .cfi_def_cfa wsp, 16
704
+ ; CHECK-NEXT: .cfi_restore z8
705
+ ; CHECK-NEXT: .cfi_restore z9
706
+ ; CHECK-NEXT: .cfi_restore z10
707
+ ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
708
+ ; CHECK-NEXT: .cfi_def_cfa_offset 0
709
+ ; CHECK-NEXT: .cfi_restore w29
710
+ ; CHECK-NEXT: ret
711
+ entry:
712
+ call void asm sideeffect "" , "~{z8},~{z9},~{z10},~{p9}" ()
713
+
714
+ %v0 = alloca <vscale x 4 x float >, align 16
715
+ %v1 = alloca <vscale x 4 x float >, align 16
716
+ %v2 = alloca <vscale x 4 x float >, align 16
717
+ %v3 = alloca <vscale x 4 x float >, align 16
718
+
719
+ ret void
720
+ }
721
+
674
722
attributes #0 = { uwtable (async) "probe-stack" ="inline-asm" "frame-pointer" ="none" "target-features" ="+sve" }
0 commit comments