-
Notifications
You must be signed in to change notification settings - Fork 14.2k
[ARM,ELF] Fix access to dso_preemptable __stack_chk_guard with static relocation model #70014
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-lto @llvm/pr-subscribers-backend-arm Author: Fangrui Song (MaskRay) ChangesThe ELF code from https://reviews.llvm.org/D112811 emits LDRLIT_ga_pcrel when To fix this, prefer Due to using Fix #64999 Full diff: https://github.com/llvm/llvm-project/pull/70014.diff 4 Files Affected:
diff --git a/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp b/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp
index 1ffdde0360cf623..4c78379ccf5c467 100644
--- a/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp
+++ b/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp
@@ -4978,7 +4978,7 @@ void ARMBaseInstrInfo::expandLoadStackGuardBase(MachineBasicBlock::iterator MI,
TargetFlags |= ARMII::MO_DLLIMPORT;
else if (IsIndirect)
TargetFlags |= ARMII::MO_COFFSTUB;
- } else if (Subtarget.isGVInGOT(GV)) {
+ } else if (IsIndirect) {
TargetFlags |= ARMII::MO_GOT;
}
diff --git a/llvm/lib/Target/ARM/ARMInstrInfo.cpp b/llvm/lib/Target/ARM/ARMInstrInfo.cpp
index 00db13f2eb520d7..1199e6f1981cca6 100644
--- a/llvm/lib/Target/ARM/ARMInstrInfo.cpp
+++ b/llvm/lib/Target/ARM/ARMInstrInfo.cpp
@@ -104,11 +104,12 @@ void ARMInstrInfo::expandLoadStackGuard(MachineBasicBlock::iterator MI) const {
const GlobalValue *GV =
cast<GlobalValue>((*MI->memoperands_begin())->getValue());
- if (!Subtarget.useMovt() || Subtarget.isGVInGOT(GV)) {
- if (TM.isPositionIndependent())
- expandLoadStackGuardBase(MI, ARM::LDRLIT_ga_pcrel, ARM::LDRi12);
- else
+ if (!Subtarget.useMovt() ||
+ (Subtarget.isTargetELF() && !TM.shouldAssumeDSOLocal(M, GV))) {
+ if (TM.shouldAssumeDSOLocal(M, GV))
expandLoadStackGuardBase(MI, ARM::LDRLIT_ga_abs, ARM::LDRi12);
+ else
+ expandLoadStackGuardBase(MI, ARM::LDRLIT_ga_pcrel, ARM::LDRi12);
return;
}
diff --git a/llvm/test/CodeGen/ARM/stack_guard_remat.ll b/llvm/test/CodeGen/ARM/stack_guard_remat.ll
index ad40212c9b6f1d0..8904884e3d4f444 100644
--- a/llvm/test/CodeGen/ARM/stack_guard_remat.ll
+++ b/llvm/test/CodeGen/ARM/stack_guard_remat.ll
@@ -1,9 +1,11 @@
-; RUN: llc < %s -mtriple=arm-apple-ios -relocation-model=pic -no-integrated-as | FileCheck %s -check-prefix=PIC
-; RUN: llc < %s -mtriple=arm-apple-ios -relocation-model=static -no-integrated-as | FileCheck %s -check-prefix=NO-PIC -check-prefix=STATIC
-; RUN: llc < %s -mtriple=arm-apple-ios -relocation-model=dynamic-no-pic -no-integrated-as | FileCheck %s -check-prefix=NO-PIC -check-prefix=DYNAMIC-NO-PIC
-; RUN: llc < %s -mtriple=armv7-apple-ios -mcpu=cortex-a8 -relocation-model=pic -no-integrated-as | FileCheck %s -check-prefix=PIC-V7
-; RUN: llc < %s -mtriple=armv7-apple-ios -mcpu=cortex-a8 -relocation-model=static -no-integrated-as | FileCheck %s -check-prefix=STATIC-V7
-; RUN: llc < %s -mtriple=armv7-apple-ios -mcpu=cortex-a8 -relocation-model=dynamic-no-pic -no-integrated-as | FileCheck %s -check-prefix=DYNAMIC-NO-PIC-V7
+; RUN: rm -rf %t && split-file %s %t && cd %t
+; RUN: cat main.ll pic-flag.ll > pic.ll
+; RUN: llc < pic.ll -mtriple=arm-apple-ios -relocation-model=pic -no-integrated-as | FileCheck %s -check-prefix=PIC
+; RUN: llc < main.ll -mtriple=arm-apple-ios -relocation-model=static -no-integrated-as | FileCheck %s -check-prefix=NO-PIC -check-prefix=STATIC
+; RUN: llc < main.ll -mtriple=arm-apple-ios -relocation-model=dynamic-no-pic -no-integrated-as | FileCheck %s -check-prefix=PIC
+; RUN: llc < pic.ll -mtriple=armv7-apple-ios -mcpu=cortex-a8 -relocation-model=pic -no-integrated-as | FileCheck %s -check-prefix=PIC-V7
+; RUN: llc < main.ll -mtriple=armv7-apple-ios -mcpu=cortex-a8 -relocation-model=static -no-integrated-as | FileCheck %s -check-prefix=STATIC-V7
+; RUN: llc < main.ll -mtriple=armv7-apple-ios -mcpu=cortex-a8 -relocation-model=dynamic-no-pic -no-integrated-as | FileCheck %s -check-prefix=DYNAMIC-NO-PIC-V7
;PIC: foo2
;PIC: ldr [[R0:r[0-9]+]], [[LABEL0:LCPI[0-9_]+]]
@@ -47,6 +49,7 @@
;DYNAMIC-NO-PIC-V7: L___stack_chk_guard$non_lazy_ptr:
;DYNAMIC-NO-PIC-V7: .indirect_symbol ___stack_chk_guard
+;--- main.ll
; Function Attrs: nounwind ssp
define i32 @test_stack_guard_remat() #0 {
%a1 = alloca [256 x i32], align 4
@@ -67,5 +70,6 @@ declare void @llvm.lifetime.end.p0(i64, ptr nocapture)
attributes #0 = { nounwind ssp "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+;--- pic-flag.ll
!llvm.module.flags = !{!0}
!0 = !{i32 7, !"PIC Level", i32 2}
diff --git a/llvm/test/LTO/ARM/ssp-static-reloc.ll b/llvm/test/LTO/ARM/ssp-static-reloc.ll
index c8825c2aae0fbb6..2ea6977ef78e752 100644
--- a/llvm/test/LTO/ARM/ssp-static-reloc.ll
+++ b/llvm/test/LTO/ARM/ssp-static-reloc.ll
@@ -19,11 +19,12 @@ entry:
; CHECK: <foo>:
; CHECK: [[#%x,CURPC:]]:{{.*}} ldr r[[REG1:[0-9]+]], [pc, #0x[[#%x,OFFSET:]]]
+; CHECK-NEXT: add r0, pc, r0
; CHECK-NEXT: ldr r[[REG2:[0-9]+]], [r[[REG1]]]
; CHECK-NEXT: ldr r[[REG3:[0-9]+]], [r[[REG2]]]
; CHECK-NEXT: str r[[REG3]],
; CHECK: [[#CURPC + OFFSET + 8]]:{{.*}}.word
-; CHECK-NEXT: R_ARM_ABS32 __stack_chk_guard
+; CHECK-NEXT: R_ARM_GOT_PREL __stack_chk_guard
declare void @llvm.memset.p0.i32(ptr nocapture writeonly, i8, i32, i1 immarg)
|
@@ -104,11 +104,12 @@ void ARMInstrInfo::expandLoadStackGuard(MachineBasicBlock::iterator MI) const { | |||
const GlobalValue *GV = | |||
cast<GlobalValue>((*MI->memoperands_begin())->getValue()); | |||
|
|||
if (!Subtarget.useMovt() || Subtarget.isGVInGOT(GV)) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
isGVInGOT
is defined as
bool ARMSubtarget::isGVInGOT(const GlobalValue *GV) const {
return isTargetELF() && TM.isPositionIndependent() &&
!TM.shouldAssumeDSOLocal(*GV->getParent(), GV);
}
I believe the function name is misleading and TM.isPositionIndependent() &&
probably should be removed, so I avoid using it.
However, removing TM.isPositionIndependent() &&
from the function uncovers other problems, so I do not touch ARMSubtarget::isGVInGOT
.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Not sure I understand the change to the inner if statement here if (TM.isPositionIndependent())
-> if (TM.shouldAssumeDSOLocal(M, GV))
. The only thing that matters choosing between LDRLIT_ga_abs and LDRLIT_ga_pcrel should be whether we're generating position-independent code, which isn't directly related to whether the symbol is dso_local.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I agree that we can technically use LDRLIT_ga_abs
to load the GOT entry address but
ARMExpandPseudoInsts.cpp:2640 does not use GOT for LDRLIT_ga_abs
.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Technically with dso_preemptable __stack_chk_guard
with static relocation model, we could emit
ldr r0, .LCPI0_0 // from LDRLIT_ga_abs
ldr r0, [r0]
ldr r0, [r0]
...
.LCPI0_0:
.long __stack_chk_guard@GOT
Unfortunately, such a GOT-generating relocation with computation GOT(S) + A
(R_ARM_GOT_ABS
) does not have assembler support.
Therefore, we have to use LDRLIT_ga_pcrel and expand to the PIC+GOT code sequence:
.LCPI0_0:
.Ltmp0:
.long __stack_chk_guard(GOT_PREL)-((.LPC0_0+8)-.Ltmp0)
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Oh, it's also a relocation thing... that makes sense. Could you add a comment in the code explaining this so it's clear why we're using this specific path on ELF?
Maybe for clarity, also compute "do we need to force a got-pic sequence" as a boolean, then make the inner check something like if (TM.isPositionIndependent() || ForceELFGOTPIC
)`.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Added a comment.
Edited my previous comment: R_ARM_GOT_ABS
is present, but it does not have assembler support.
I have tested that this patch does not change code generation for the following configurations:
|
@@ -2019,7 +2019,9 @@ void TargetLoweringBase::insertSSPDeclarations(Module &M) const { | |||
if (M.getDirectAccessExternalData() && | |||
!TM.getTargetTriple().isWindowsGNUEnvironment() && | |||
!TM.getTargetTriple().isOSFreeBSD() && | |||
!TM.getTargetTriple().isOSDarwin()) | |||
(!TM.getTargetTriple().isOSDarwin() || | |||
(TM.getTargetTriple().isARM() && |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This ensures that code generation for arm*-apple-darwin -fno-pic does not change.
If arm*-apple-darwin -fno-pic
is ok to have -fpic
-like code sequence for __stack_chk_guard
, we can remove this special case and simplify Darwin-specific code in ARMInstrInfo::expandLoadStackGuard
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Does the current version of the patch actually change the logic for MachO targets outside of this change? I can't find any logical differences (the ARMInstrInfo.cpp changes are guarded by isTargetELF(), and the ARMBaseInstrInfo.cpp change is unreachable due to an earlier isTargetMachO()).
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I believe this patch does not change Mach-O behavior.
This llvm/lib/CodeGen/TargetLoweringBase.cpp
change can also be removed and the behavior does not change. Sorry for including it here. However, the file is somewhat related.
I've added a commit to this PR and this now becomes
if (M.getDirectAccessExternalData() &&
!TM.getTargetTriple().isWindowsGNUEnvironment() &&
!TM.getTargetTriple().isOSFreeBSD() &&
(!TM.getTargetTriple().isOSDarwin() ||
TM.getRelocationModel() == Reloc::Static))
GV->setDSOLocal(true);
}
This TM.getRelocationModel() == Reloc::Static
condition refines 1ec3010 .
I think Mach-O's static relocation model largely ignores dso_local
IR markers and relies on TM.shouldAssumeDSOLocal
to return true for static relocation code model. However, it seems more appropriate to me to encode this information correctly, which can probably enable code share between ELF and Mach-O.
Is that the right link? |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
… relocation model The ELF code from https://reviews.llvm.org/D112811 emits LDRLIT_ga_pcrel when `TM.isPositionIndependent()` but uses a different condition `Subtarget.isGVIndirectSymbol(GV)` (aka dso_preemptable on ELF targets). This would cause incorrect access for dso_preemptable `__stack_chk_guard` with the static relocation model. Regarding whether `__stack_chk_guard` gets the dso_local specifier, https://reviews.llvm.org/D150841 switched to `M.getDirectAccessExternalData()` (implied by "PIC Level") instead of `TM.getRelocationModel() == Reloc::Static`. The result is that when non-zero "PIC Level" is used with static relocation model (e.g. -fPIE/-fPIC LTO compiles with -no-pie linking), `__stack_chk_guard` accesses are incorrect. ``` ldr r0, .LCPI0_0 ldr r0, [r0] ldr r0, [r0] // incorrectly dereferences __stack_chk_guard ... .LCPI0_0: .long __stack_chk_guard ``` To fix this, for dso_preemptable `__stack_chk_guard`, emit a GOT PIC code sequence like for -fpic using `LDRLIT_ga_pcrel`: ``` ldr r0, .LCPI0_0 .LPC0_0: add r0, pc, r0 ldr r0, [r0] ldr r0, [r0] ... LCPI0_0: .Ltmp0: .long __stack_chk_guard(GOT_PREL)-((.LPC0_0+8)-.Ltmp0) ``` Technically, `LDRLIT_ga_abs` with `R_ARM_GOT_ABS` could be used, but `R_ARM_GOT_ABS` does not have GNU or integrated assembler support. (Note, `.LCPI0_0: .long __stack_chk_guard@GOT` produces an `R_ARM_GOT_BREL`, which is not desired). This patch fixes llvm#6499 while not changing behavior for the following configurations: ``` run arm.linux.nopic --target=arm-linux-gnueabi -fno-pic run arm.linux.pie --target=arm-linux-gnueabi -fpie run arm.linux.pic --target=arm-linux-gnueabi -fpic run armv6.darwin.nopic --target=armv6-apple-darwin -fno-pic run armv6.darwin.dynamicnopic --target=armv6-apple-darwin -mdynamic-no-pic run armv6.darwin.pic --target=armv6-apple-darwin -fpic run armv7.darwin.nopic --target=armv7-apple-darwin -mcpu=cortex-a8 -fno-pic run armv7.darwin.dynamicnopic --target=armv7-apple-darwin -mcpu=cortex-a8 -mdynamic-no-pic run armv7.darwin.pic --target=armv7-apple-darwin -mcpu=cortex-a8 -fpic run arm64.darwin.pic --target=arm64-apple-darwin ```
Rebase after test adjustment at 6ae7b73
|
…ic relocation model (#78950) PR #70014 fixes A32 to use GOT for dso_preemptable `__stack_chk_guard` with static relocation model (e.g. -fPIE/-fPIC LTO compiles with -no-pie linking). This patch fixes such `__stack_chk_guard` access for Thumb1 and Thumb2. Note: `t2LDRLIT_ga_pcrel` is only for ELF. mingw needs `.refptr.__stack_chk_guard` (https://reviews.llvm.org/D92738). Fix #64999
The ELF code from https://reviews.llvm.org/D112811 emits LDRLIT_ga_pcrel
when
TM.isPositionIndependent()
but uses a different conditionSubtarget.isGVIndirectSymbol(GV)
(aka dso_preemptable on ELF targets).This would cause incorrect access for dso_preemptable
__stack_chk_guard
with the static relocation model.Regarding whether
__stack_chk_guard
gets the dso_local specifier,https://reviews.llvm.org/D150841 switched to
M.getDirectAccessExternalData()
(implied by "PIC Level") instead ofTM.getRelocationModel() == Reloc::Static
.The result is that when non-zero "PIC Level" is used with static
relocation model (e.g. -fPIE/-fPIC LTO compiles with -no-pie linking),
__stack_chk_guard
accesses are incorrect.To fix this, for dso_preemptable
__stack_chk_guard
, emit a GOT PICcode sequence like for -fpic using
LDRLIT_ga_pcrel
:Technically,
LDRLIT_ga_abs
withR_ARM_GOT_ABS
could be used, butR_ARM_GOT_ABS
does not have GNU or integrated assembler support.(Note,
.LCPI0_0: .long __stack_chk_guard@GOT
produces anR_ARM_GOT_BREL
, which is not desired).This patch fixes #6499 while not changing behavior for the following
configurations: