Skip to content

Commit 8d07d9f

Browse files
[AArch64][SME] Zero reserved bytes when allocating a new TPIDR2 object (#68411)
SME support routines expect that the reserved bytes of TPIDR2 (bytes 10-15) are zero. This patch ensures that the reserved bytes are cleared when allocating a new TPIDR2 block.
1 parent 42c564d commit 8d07d9f

File tree

4 files changed

+48
-18
lines changed

4 files changed

+48
-18
lines changed

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6383,6 +6383,17 @@ AArch64TargetLowering::allocateLazySaveBuffer(SDValue &Chain, const SDLoc &DL,
63836383
DAG.getTargetLoweringInfo().getFrameIndexTy(DAG.getDataLayout()));
63846384
Chain = DAG.getStore(Chain, DL, Buffer, Ptr, MPI);
63856385

6386+
// Set the reserved bytes (10-15) to zero
6387+
EVT PtrTy = Ptr.getValueType();
6388+
SDValue ReservedPtr =
6389+
DAG.getNode(ISD::ADD, DL, PtrTy, Ptr, DAG.getConstant(10, DL, PtrTy));
6390+
Chain = DAG.getStore(Chain, DL, DAG.getConstant(0, DL, MVT::i16), ReservedPtr,
6391+
MPI);
6392+
ReservedPtr =
6393+
DAG.getNode(ISD::ADD, DL, PtrTy, Ptr, DAG.getConstant(12, DL, PtrTy));
6394+
Chain = DAG.getStore(Chain, DL, DAG.getConstant(0, DL, MVT::i32), ReservedPtr,
6395+
MPI);
6396+
63866397
return TPIDR2Obj;
63876398
}
63886399

llvm/test/CodeGen/AArch64/sme-disable-gisel-fisel.ll

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -220,6 +220,8 @@ define double @za_new_caller_to_za_shared_callee(double %x) nounwind noinline o
220220
; CHECK-COMMON-NEXT: msub x8, x8, x8, x9
221221
; CHECK-COMMON-NEXT: mov sp, x8
222222
; CHECK-COMMON-NEXT: stur x8, [x29, #-16]
223+
; CHECK-COMMON-NEXT: sturh wzr, [x29, #-6]
224+
; CHECK-COMMON-NEXT: stur wzr, [x29, #-4]
223225
; CHECK-COMMON-NEXT: mrs x8, TPIDR2_EL0
224226
; CHECK-COMMON-NEXT: cbz x8, .LBB6_2
225227
; CHECK-COMMON-NEXT: b .LBB6_1
@@ -255,6 +257,8 @@ define double @za_shared_caller_to_za_none_callee(double %x) nounwind noinline
255257
; CHECK-COMMON-NEXT: msub x9, x8, x8, x9
256258
; CHECK-COMMON-NEXT: mov sp, x9
257259
; CHECK-COMMON-NEXT: stur x9, [x29, #-16]
260+
; CHECK-COMMON-NEXT: sturh wzr, [x29, #-6]
261+
; CHECK-COMMON-NEXT: stur wzr, [x29, #-4]
258262
; CHECK-COMMON-NEXT: sturh w8, [x29, #-8]
259263
; CHECK-COMMON-NEXT: sub x8, x29, #16
260264
; CHECK-COMMON-NEXT: msr TPIDR2_EL0, x8
@@ -292,10 +296,12 @@ define fp128 @f128_call_za(fp128 %a, fp128 %b) "aarch64_pstate_za_shared" nounwi
292296
; CHECK-COMMON-NEXT: mov x9, sp
293297
; CHECK-COMMON-NEXT: msub x9, x8, x8, x9
294298
; CHECK-COMMON-NEXT: mov sp, x9
299+
; CHECK-COMMON-NEXT: sub x10, x29, #16
300+
; CHECK-COMMON-NEXT: stur wzr, [x29, #-4]
301+
; CHECK-COMMON-NEXT: sturh wzr, [x29, #-6]
295302
; CHECK-COMMON-NEXT: stur x9, [x29, #-16]
296-
; CHECK-COMMON-NEXT: sub x9, x29, #16
297303
; CHECK-COMMON-NEXT: sturh w8, [x29, #-8]
298-
; CHECK-COMMON-NEXT: msr TPIDR2_EL0, x9
304+
; CHECK-COMMON-NEXT: msr TPIDR2_EL0, x10
299305
; CHECK-COMMON-NEXT: bl __addtf3
300306
; CHECK-COMMON-NEXT: smstart za
301307
; CHECK-COMMON-NEXT: mrs x8, TPIDR2_EL0
@@ -352,10 +358,12 @@ define double @frem_call_za(double %a, double %b) "aarch64_pstate_za_shared" nou
352358
; CHECK-COMMON-NEXT: mov x9, sp
353359
; CHECK-COMMON-NEXT: msub x9, x8, x8, x9
354360
; CHECK-COMMON-NEXT: mov sp, x9
361+
; CHECK-COMMON-NEXT: sub x10, x29, #16
362+
; CHECK-COMMON-NEXT: stur wzr, [x29, #-4]
363+
; CHECK-COMMON-NEXT: sturh wzr, [x29, #-6]
355364
; CHECK-COMMON-NEXT: stur x9, [x29, #-16]
356-
; CHECK-COMMON-NEXT: sub x9, x29, #16
357365
; CHECK-COMMON-NEXT: sturh w8, [x29, #-8]
358-
; CHECK-COMMON-NEXT: msr TPIDR2_EL0, x9
366+
; CHECK-COMMON-NEXT: msr TPIDR2_EL0, x10
359367
; CHECK-COMMON-NEXT: bl fmod
360368
; CHECK-COMMON-NEXT: smstart za
361369
; CHECK-COMMON-NEXT: mrs x8, TPIDR2_EL0

llvm/test/CodeGen/AArch64/sme-lazy-save-call.ll

Lines changed: 17 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -16,10 +16,12 @@ define void @test_lazy_save_1_callee() nounwind "aarch64_pstate_za_shared" {
1616
; CHECK-NEXT: mov x9, sp
1717
; CHECK-NEXT: msub x9, x8, x8, x9
1818
; CHECK-NEXT: mov sp, x9
19+
; CHECK-NEXT: sub x10, x29, #16
20+
; CHECK-NEXT: stur wzr, [x29, #-4]
21+
; CHECK-NEXT: sturh wzr, [x29, #-6]
1922
; CHECK-NEXT: stur x9, [x29, #-16]
20-
; CHECK-NEXT: sub x9, x29, #16
2123
; CHECK-NEXT: sturh w8, [x29, #-8]
22-
; CHECK-NEXT: msr TPIDR2_EL0, x9
24+
; CHECK-NEXT: msr TPIDR2_EL0, x10
2325
; CHECK-NEXT: bl private_za_callee
2426
; CHECK-NEXT: smstart za
2527
; CHECK-NEXT: mrs x8, TPIDR2_EL0
@@ -49,6 +51,8 @@ define void @test_lazy_save_2_callees() nounwind "aarch64_pstate_za_shared" {
4951
; CHECK-NEXT: msub x8, x19, x19, x8
5052
; CHECK-NEXT: mov sp, x8
5153
; CHECK-NEXT: sub x20, x29, #16
54+
; CHECK-NEXT: stur wzr, [x29, #-4]
55+
; CHECK-NEXT: sturh wzr, [x29, #-6]
5256
; CHECK-NEXT: stur x8, [x29, #-16]
5357
; CHECK-NEXT: sturh w19, [x29, #-8]
5458
; CHECK-NEXT: msr TPIDR2_EL0, x20
@@ -92,10 +96,12 @@ define float @test_lazy_save_expanded_intrinsic(float %a) nounwind "aarch64_psta
9296
; CHECK-NEXT: mov x9, sp
9397
; CHECK-NEXT: msub x9, x8, x8, x9
9498
; CHECK-NEXT: mov sp, x9
99+
; CHECK-NEXT: sub x10, x29, #16
100+
; CHECK-NEXT: stur wzr, [x29, #-4]
101+
; CHECK-NEXT: sturh wzr, [x29, #-6]
95102
; CHECK-NEXT: stur x9, [x29, #-16]
96-
; CHECK-NEXT: sub x9, x29, #16
97103
; CHECK-NEXT: sturh w8, [x29, #-8]
98-
; CHECK-NEXT: msr TPIDR2_EL0, x9
104+
; CHECK-NEXT: msr TPIDR2_EL0, x10
99105
; CHECK-NEXT: bl cosf
100106
; CHECK-NEXT: smstart za
101107
; CHECK-NEXT: mrs x8, TPIDR2_EL0
@@ -128,10 +134,12 @@ define void @test_lazy_save_and_conditional_smstart() nounwind "aarch64_pstate_z
128134
; CHECK-NEXT: mov x9, sp
129135
; CHECK-NEXT: msub x9, x8, x8, x9
130136
; CHECK-NEXT: mov sp, x9
137+
; CHECK-NEXT: sub x10, x29, #80
138+
; CHECK-NEXT: stur wzr, [x29, #-68]
139+
; CHECK-NEXT: sturh wzr, [x29, #-70]
131140
; CHECK-NEXT: stur x9, [x29, #-80]
132-
; CHECK-NEXT: sub x9, x29, #80
133141
; CHECK-NEXT: sturh w8, [x29, #-72]
134-
; CHECK-NEXT: msr TPIDR2_EL0, x9
142+
; CHECK-NEXT: msr TPIDR2_EL0, x10
135143
; CHECK-NEXT: bl __arm_sme_state
136144
; CHECK-NEXT: and x19, x0, #0x1
137145
; CHECK-NEXT: tbz w19, #0, .LBB3_2
@@ -181,10 +189,9 @@ define void @za_shared_caller_za_preserved_callee() nounwind "aarch64_pstate_za_
181189
; CHECK-NEXT: mov x9, sp
182190
; CHECK-NEXT: msub x8, x8, x8, x9
183191
; CHECK-NEXT: mov sp, x8
184-
; CHECK-NEXT: stur x8, [x29, #-80]
185-
; CHECK-NEXT: sub x8, x29, #80
186-
; CHECK-NEXT: sturh wzr, [x29, #-72]
187-
; CHECK-NEXT: msr TPIDR2_EL0, x8
192+
; CHECK-NEXT: sub x9, x29, #80
193+
; CHECK-NEXT: stp x8, xzr, [x29, #-80]
194+
; CHECK-NEXT: msr TPIDR2_EL0, x9
188195
; CHECK-NEXT: bl __arm_sme_state
189196
; CHECK-NEXT: and x19, x0, #0x1
190197
; CHECK-NEXT: tbz w19, #0, .LBB4_2

llvm/test/CodeGen/AArch64/sme-shared-za-interface.ll

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -14,10 +14,12 @@ define void @disable_tailcallopt() "aarch64_pstate_za_shared" nounwind {
1414
; CHECK-NEXT: mov x9, sp
1515
; CHECK-NEXT: msub x9, x8, x8, x9
1616
; CHECK-NEXT: mov sp, x9
17+
; CHECK-NEXT: sub x10, x29, #16
18+
; CHECK-NEXT: stur wzr, [x29, #-4]
19+
; CHECK-NEXT: sturh wzr, [x29, #-6]
1720
; CHECK-NEXT: stur x9, [x29, #-16]
18-
; CHECK-NEXT: sub x9, x29, #16
1921
; CHECK-NEXT: sturh w8, [x29, #-8]
20-
; CHECK-NEXT: msr TPIDR2_EL0, x9
22+
; CHECK-NEXT: msr TPIDR2_EL0, x10
2123
; CHECK-NEXT: bl private_za_callee
2224
; CHECK-NEXT: smstart za
2325
; CHECK-NEXT: mrs x8, TPIDR2_EL0
@@ -45,10 +47,12 @@ define fp128 @f128_call_za(fp128 %a, fp128 %b) "aarch64_pstate_za_shared" nounwi
4547
; CHECK-NEXT: mov x9, sp
4648
; CHECK-NEXT: msub x9, x8, x8, x9
4749
; CHECK-NEXT: mov sp, x9
50+
; CHECK-NEXT: sub x10, x29, #16
51+
; CHECK-NEXT: stur wzr, [x29, #-4]
52+
; CHECK-NEXT: sturh wzr, [x29, #-6]
4853
; CHECK-NEXT: stur x9, [x29, #-16]
49-
; CHECK-NEXT: sub x9, x29, #16
5054
; CHECK-NEXT: sturh w8, [x29, #-8]
51-
; CHECK-NEXT: msr TPIDR2_EL0, x9
55+
; CHECK-NEXT: msr TPIDR2_EL0, x10
5256
; CHECK-NEXT: bl __addtf3
5357
; CHECK-NEXT: smstart za
5458
; CHECK-NEXT: mrs x8, TPIDR2_EL0

0 commit comments

Comments
 (0)