Skip to content

Commit 1fccba5

Browse files
authored
[AArch64][PAC] Eliminate excessive MOVs when computing blend (#115185)
As function calls do not generally preserve X16 and X17, it is beneficial to allow AddrDisc operand of BLRA instruction to reside in these registers and make use of this condition when computing the discriminator. This can save up to two MOVs in cases such as loading a (signed) virtual function pointer via a (signed) pointer to vtable, for example ldr x9, [x16] mov x8, x16 mov x17, x8 movk x17, #34646, lsl #48 blraa x9, x17 can be simplified to ldr x8, [x16] movk x16, #34646, lsl #48 blraa x8, x16
1 parent 345b331 commit 1fccba5

File tree

3 files changed

+103
-48
lines changed

3 files changed

+103
-48
lines changed

llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp

+69-41
Original file line numberDiff line numberDiff line change
@@ -162,8 +162,27 @@ class AArch64AsmPrinter : public AsmPrinter {
162162
// Emit the sequence for AUT or AUTPAC.
163163
void emitPtrauthAuthResign(const MachineInstr *MI);
164164

165-
// Emit the sequence to compute a discriminator into x17, or reuse AddrDisc.
166-
unsigned emitPtrauthDiscriminator(uint16_t Disc, unsigned AddrDisc);
165+
// Emit the sequence to compute the discriminator.
166+
//
167+
// ScratchReg should be x16/x17.
168+
//
169+
// The returned register is either unmodified AddrDisc or x16/x17.
170+
//
171+
// If the expanded pseudo is allowed to clobber AddrDisc register, setting
172+
// MayUseAddrAsScratch may save one MOV instruction, provided the address
173+
// is already in x16/x17 (i.e. return x16/x17 which is the *modified* AddrDisc
174+
// register at the same time):
175+
//
176+
// mov x17, x16
177+
// movk x17, #1234, lsl #48
178+
// ; x16 is not used anymore
179+
//
180+
// can be replaced by
181+
//
182+
// movk x16, #1234, lsl #48
183+
Register emitPtrauthDiscriminator(uint16_t Disc, Register AddrDisc,
184+
Register ScratchReg,
185+
bool MayUseAddrAsScratch = false);
167186

168187
// Emit the sequence for LOADauthptrstatic
169188
void LowerLOADauthptrstatic(const MachineInstr &MI);
@@ -1726,8 +1745,11 @@ void AArch64AsmPrinter::emitFMov0(const MachineInstr &MI) {
17261745
}
17271746
}
17281747

1729-
unsigned AArch64AsmPrinter::emitPtrauthDiscriminator(uint16_t Disc,
1730-
unsigned AddrDisc) {
1748+
Register AArch64AsmPrinter::emitPtrauthDiscriminator(uint16_t Disc,
1749+
Register AddrDisc,
1750+
Register ScratchReg,
1751+
bool MayUseAddrAsScratch) {
1752+
assert(ScratchReg == AArch64::X16 || ScratchReg == AArch64::X17);
17311753
// So far we've used NoRegister in pseudos. Now we need real encodings.
17321754
if (AddrDisc == AArch64::NoRegister)
17331755
AddrDisc = AArch64::XZR;
@@ -1737,16 +1759,24 @@ unsigned AArch64AsmPrinter::emitPtrauthDiscriminator(uint16_t Disc,
17371759
if (!Disc)
17381760
return AddrDisc;
17391761

1740-
// If there's only a constant discriminator, MOV it into x17.
1762+
// If there's only a constant discriminator, MOV it into the scratch register.
17411763
if (AddrDisc == AArch64::XZR) {
1742-
emitMOVZ(AArch64::X17, Disc, 0);
1743-
return AArch64::X17;
1764+
emitMOVZ(ScratchReg, Disc, 0);
1765+
return ScratchReg;
17441766
}
17451767

1746-
// If there are both, emit a blend into x17.
1747-
emitMovXReg(AArch64::X17, AddrDisc);
1748-
emitMOVK(AArch64::X17, Disc, 48);
1749-
return AArch64::X17;
1768+
// If there are both, emit a blend into the scratch register.
1769+
1770+
// Check if we can save one MOV instruction.
1771+
assert(MayUseAddrAsScratch || ScratchReg != AddrDisc);
1772+
bool AddrDiscIsSafe = AddrDisc == AArch64::X16 || AddrDisc == AArch64::X17;
1773+
if (MayUseAddrAsScratch && AddrDiscIsSafe)
1774+
ScratchReg = AddrDisc;
1775+
else
1776+
emitMovXReg(ScratchReg, AddrDisc);
1777+
1778+
emitMOVK(ScratchReg, Disc, 48);
1779+
return ScratchReg;
17501780
}
17511781

17521782
/// Emits a code sequence to check an authenticated pointer value.
@@ -1963,7 +1993,8 @@ void AArch64AsmPrinter::emitPtrauthAuthResign(const MachineInstr *MI) {
19631993

19641994
// Compute aut discriminator into x17
19651995
assert(isUInt<16>(AUTDisc));
1966-
unsigned AUTDiscReg = emitPtrauthDiscriminator(AUTDisc, AUTAddrDisc);
1996+
Register AUTDiscReg =
1997+
emitPtrauthDiscriminator(AUTDisc, AUTAddrDisc, AArch64::X17);
19671998
bool AUTZero = AUTDiscReg == AArch64::XZR;
19681999
unsigned AUTOpc = getAUTOpcodeForKey(AUTKey, AUTZero);
19692000

@@ -2004,7 +2035,8 @@ void AArch64AsmPrinter::emitPtrauthAuthResign(const MachineInstr *MI) {
20042035

20052036
// Compute pac discriminator into x17
20062037
assert(isUInt<16>(PACDisc));
2007-
unsigned PACDiscReg = emitPtrauthDiscriminator(PACDisc, PACAddrDisc);
2038+
Register PACDiscReg =
2039+
emitPtrauthDiscriminator(PACDisc, PACAddrDisc, AArch64::X17);
20082040
bool PACZero = PACDiscReg == AArch64::XZR;
20092041
unsigned PACOpc = getPACOpcodeForKey(PACKey, PACZero);
20102042

@@ -2036,8 +2068,20 @@ void AArch64AsmPrinter::emitPtrauthBranch(const MachineInstr *MI) {
20362068

20372069
unsigned AddrDisc = MI->getOperand(3).getReg();
20382070

2039-
// Compute discriminator into x17
2040-
unsigned DiscReg = emitPtrauthDiscriminator(Disc, AddrDisc);
2071+
// Make sure AddrDisc is solely used to compute the discriminator.
2072+
// While hardly meaningful, it is still possible to describe an authentication
2073+
// of a pointer against its own value (instead of storage address) with
2074+
// intrinsics, so use report_fatal_error instead of assert.
2075+
if (BrTarget == AddrDisc)
2076+
report_fatal_error("Branch target is signed with its own value");
2077+
2078+
// If we are printing BLRA pseudo instruction, then x16 and x17 are
2079+
// implicit-def'ed by the MI and AddrDisc is not used as any other input, so
2080+
// try to save one MOV by setting MayUseAddrAsScratch.
2081+
// Unlike BLRA, BRA pseudo is used to perform computed goto, and thus not
2082+
// declared as clobbering x16/x17.
2083+
Register DiscReg = emitPtrauthDiscriminator(Disc, AddrDisc, AArch64::X17,
2084+
/*MayUseAddrAsScratch=*/IsCall);
20412085
bool IsZeroDisc = DiscReg == AArch64::XZR;
20422086

20432087
unsigned Opc;
@@ -2331,16 +2375,7 @@ void AArch64AsmPrinter::LowerMOVaddrPAC(const MachineInstr &MI) {
23312375
}
23322376
}
23332377

2334-
unsigned DiscReg = AddrDisc;
2335-
if (Disc != 0) {
2336-
if (AddrDisc != AArch64::XZR) {
2337-
emitMovXReg(AArch64::X17, AddrDisc);
2338-
emitMOVK(AArch64::X17, Disc, 48);
2339-
} else {
2340-
emitMOVZ(AArch64::X17, Disc, 0);
2341-
}
2342-
DiscReg = AArch64::X17;
2343-
}
2378+
Register DiscReg = emitPtrauthDiscriminator(Disc, AddrDisc, AArch64::X17);
23442379

23452380
auto MIB = MCInstBuilder(getPACOpcodeForKey(Key, DiscReg == AArch64::XZR))
23462381
.addReg(AArch64::X16)
@@ -2608,6 +2643,7 @@ void AArch64AsmPrinter::emitInstruction(const MachineInstr *MI) {
26082643
// instruction here.
26092644
case AArch64::AUTH_TCRETURN:
26102645
case AArch64::AUTH_TCRETURN_BTI: {
2646+
Register Callee = MI->getOperand(0).getReg();
26112647
const uint64_t Key = MI->getOperand(2).getImm();
26122648
assert((Key == AArch64PACKey::IA || Key == AArch64PACKey::IB) &&
26132649
"Invalid auth key for tail-call return");
@@ -2617,31 +2653,23 @@ void AArch64AsmPrinter::emitInstruction(const MachineInstr *MI) {
26172653

26182654
Register AddrDisc = MI->getOperand(4).getReg();
26192655

2620-
Register ScratchReg = MI->getOperand(0).getReg() == AArch64::X16
2621-
? AArch64::X17
2622-
: AArch64::X16;
2656+
Register ScratchReg = Callee == AArch64::X16 ? AArch64::X17 : AArch64::X16;
26232657

26242658
emitPtrauthTailCallHardening(MI);
26252659

2626-
unsigned DiscReg = AddrDisc;
2627-
if (Disc) {
2628-
if (AddrDisc != AArch64::NoRegister) {
2629-
if (ScratchReg != AddrDisc)
2630-
emitMovXReg(ScratchReg, AddrDisc);
2631-
emitMOVK(ScratchReg, Disc, 48);
2632-
} else {
2633-
emitMOVZ(ScratchReg, Disc, 0);
2634-
}
2635-
DiscReg = ScratchReg;
2636-
}
2660+
// See the comments in emitPtrauthBranch.
2661+
if (Callee == AddrDisc)
2662+
report_fatal_error("Call target is signed with its own value");
2663+
Register DiscReg = emitPtrauthDiscriminator(Disc, AddrDisc, ScratchReg,
2664+
/*MayUseAddrAsScratch=*/true);
26372665

2638-
const bool IsZero = DiscReg == AArch64::NoRegister;
2666+
const bool IsZero = DiscReg == AArch64::XZR;
26392667
const unsigned Opcodes[2][2] = {{AArch64::BRAA, AArch64::BRAAZ},
26402668
{AArch64::BRAB, AArch64::BRABZ}};
26412669

26422670
MCInst TmpInst;
26432671
TmpInst.setOpcode(Opcodes[Key][IsZero]);
2644-
TmpInst.addOperand(MCOperand::createReg(MI->getOperand(0).getReg()));
2672+
TmpInst.addOperand(MCOperand::createReg(Callee));
26452673
if (!IsZero)
26462674
TmpInst.addOperand(MCOperand::createReg(DiscReg));
26472675
EmitToStreamer(*OutStreamer, TmpInst);

llvm/lib/Target/AArch64/AArch64InstrInfo.td

+7-7
Original file line numberDiff line numberDiff line change
@@ -1841,28 +1841,28 @@ let Predicates = [HasPAuth] in {
18411841
// materialization here), in part because they're handled in a safer way by
18421842
// the kernel, notably on Darwin.
18431843
def BLRA : Pseudo<(outs), (ins GPR64noip:$Rn, i32imm:$Key, i64imm:$Disc,
1844-
GPR64noip:$AddrDisc),
1844+
GPR64:$AddrDisc),
18451845
[(AArch64authcall GPR64noip:$Rn, timm:$Key, timm:$Disc,
1846-
GPR64noip:$AddrDisc)]>, Sched<[]> {
1846+
GPR64:$AddrDisc)]>, Sched<[]> {
18471847
let isCodeGenOnly = 1;
18481848
let hasSideEffects = 1;
18491849
let mayStore = 0;
18501850
let mayLoad = 0;
18511851
let isCall = 1;
18521852
let Size = 12; // 4 fixed + 8 variable, to compute discriminator.
1853-
let Defs = [X17,LR];
1853+
let Defs = [X16,X17,LR];
18541854
let Uses = [SP];
18551855
}
18561856

18571857
def BLRA_RVMARKER : Pseudo<
18581858
(outs), (ins i64imm:$rvfunc, GPR64noip:$Rn, i32imm:$Key, i64imm:$Disc,
1859-
GPR64noip:$AddrDisc),
1859+
GPR64:$AddrDisc),
18601860
[(AArch64authcall_rvmarker tglobaladdr:$rvfunc,
18611861
GPR64noip:$Rn, timm:$Key, timm:$Disc,
1862-
GPR64noip:$AddrDisc)]>, Sched<[]> {
1862+
GPR64:$AddrDisc)]>, Sched<[]> {
18631863
let isCodeGenOnly = 1;
18641864
let isCall = 1;
1865-
let Defs = [X17,LR];
1865+
let Defs = [X16,X17,LR];
18661866
let Uses = [SP];
18671867
}
18681868

@@ -1972,7 +1972,7 @@ let Predicates = [HasPAuth] in {
19721972
// make sure at least one register is usable as a scratch one - for that
19731973
// purpose, use tcGPRnotx16x17 register class for one of the operands.
19741974
let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, Size = 16,
1975-
Uses = [SP] in {
1975+
Defs = [X16,X17], Uses = [SP] in {
19761976
def AUTH_TCRETURN
19771977
: Pseudo<(outs), (ins tcGPRnotx16x17:$dst, i32imm:$FPDiff, i32imm:$Key,
19781978
i64imm:$Disc, tcGPR64:$AddrDisc),

llvm/test/CodeGen/AArch64/ptrauth-call.ll

+27
Original file line numberDiff line numberDiff line change
@@ -188,6 +188,33 @@ define void @test_tailcall_omit_mov_x16_x16(ptr %objptr) #0 {
188188
ret void
189189
}
190190

191+
define i32 @test_call_omit_extra_moves(ptr %objptr) #0 {
192+
; CHECK-LABEL: test_call_omit_extra_moves:
193+
; DARWIN-NEXT: stp x29, x30, [sp, #-16]!
194+
; ELF-NEXT: str x30, [sp, #-16]!
195+
; CHECK-NEXT: ldr x16, [x0]
196+
; CHECK-NEXT: mov x17, x0
197+
; CHECK-NEXT: movk x17, #6503, lsl #48
198+
; CHECK-NEXT: autda x16, x17
199+
; CHECK-NEXT: ldr x8, [x16]
200+
; CHECK-NEXT: movk x16, #34646, lsl #48
201+
; CHECK-NEXT: blraa x8, x16
202+
; CHECK-NEXT: mov w0, #42
203+
; DARWIN-NEXT: ldp x29, x30, [sp], #16
204+
; ELF-NEXT: ldr x30, [sp], #16
205+
; CHECK-NEXT: ret
206+
%vtable.signed = load ptr, ptr %objptr
207+
%objptr.int = ptrtoint ptr %objptr to i64
208+
%vtable.discr = tail call i64 @llvm.ptrauth.blend(i64 %objptr.int, i64 6503)
209+
%vtable.signed.int = ptrtoint ptr %vtable.signed to i64
210+
%vtable.int = tail call i64 @llvm.ptrauth.auth(i64 %vtable.signed.int, i32 2, i64 %vtable.discr)
211+
%vtable = inttoptr i64 %vtable.int to ptr
212+
%callee.signed = load ptr, ptr %vtable
213+
%callee.discr = tail call i64 @llvm.ptrauth.blend(i64 %vtable.int, i64 34646)
214+
%call.result = tail call i32 %callee.signed(ptr %objptr) [ "ptrauth"(i32 0, i64 %callee.discr) ]
215+
ret i32 42
216+
}
217+
191218
define i32 @test_call_ia_arg(ptr %arg0, i64 %arg1) #0 {
192219
; DARWIN-LABEL: test_call_ia_arg:
193220
; DARWIN-NEXT: stp x29, x30, [sp, #-16]!

0 commit comments

Comments
 (0)