-
Notifications
You must be signed in to change notification settings - Fork 12.5k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[SPARC] Allow overaligned alloca
s
#107223
Conversation
SPARC doesn't do stack realignment, so let LLVM know about it in `SparcFrameLowering`. This has the side effect of making all overaligned allocations go through `LowerDYNAMIC_STACKALLOC`, so implement the missing logic there for overaligned allocations. This allows the SPARC backend to not crash on overaligned `alloca`s.
@llvm/pr-subscribers-compiler-rt-sanitizer @llvm/pr-subscribers-backend-sparc Author: Koakuma (koachan) ChangesSPARC doesn't do stack realignment, so let LLVM know about it in Full diff: https://github.com/llvm/llvm-project/pull/107223.diff 9 Files Affected:
diff --git a/llvm/lib/Target/Sparc/SparcFrameLowering.cpp b/llvm/lib/Target/Sparc/SparcFrameLowering.cpp
index 000418be9a9e33..b28642aee4ca06 100644
--- a/llvm/lib/Target/Sparc/SparcFrameLowering.cpp
+++ b/llvm/lib/Target/Sparc/SparcFrameLowering.cpp
@@ -35,7 +35,8 @@ DisableLeafProc("disable-sparc-leaf-proc",
SparcFrameLowering::SparcFrameLowering(const SparcSubtarget &ST)
: TargetFrameLowering(TargetFrameLowering::StackGrowsDown,
ST.is64Bit() ? Align(16) : Align(8), 0,
- ST.is64Bit() ? Align(16) : Align(8)) {}
+ ST.is64Bit() ? Align(16) : Align(8),
+ /* StackRealignable */ false) {}
void SparcFrameLowering::emitSPAdjustment(MachineFunction &MF,
MachineBasicBlock &MBB,
@@ -97,12 +98,6 @@ void SparcFrameLowering::emitPrologue(MachineFunction &MF,
// Debug location must be unknown since the first debug location is used
// to determine the end of the prologue.
DebugLoc dl;
- bool NeedsStackRealignment = RegInfo.shouldRealignStack(MF);
-
- if (NeedsStackRealignment && !RegInfo.canRealignStack(MF))
- report_fatal_error("Function \"" + Twine(MF.getName()) + "\" required "
- "stack re-alignment, but LLVM couldn't handle it "
- "(probably because it has a dynamic alloca).");
// Get the number of bytes to allocate from the FrameInfo
int NumBytes = (int) MFI.getStackSize();
@@ -168,31 +163,6 @@ void SparcFrameLowering::emitPrologue(MachineFunction &MF,
MCCFIInstruction::createRegister(nullptr, regOutRA, regInRA));
BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
.addCFIIndex(CFIIndex);
-
- if (NeedsStackRealignment) {
- int64_t Bias = Subtarget.getStackPointerBias();
- unsigned regUnbiased;
- if (Bias) {
- // This clobbers G1 which we always know is available here.
- regUnbiased = SP::G1;
- // add %o6, BIAS, %g1
- BuildMI(MBB, MBBI, dl, TII.get(SP::ADDri), regUnbiased)
- .addReg(SP::O6).addImm(Bias);
- } else
- regUnbiased = SP::O6;
-
- // andn %regUnbiased, MaxAlign-1, %regUnbiased
- Align MaxAlign = MFI.getMaxAlign();
- BuildMI(MBB, MBBI, dl, TII.get(SP::ANDNri), regUnbiased)
- .addReg(regUnbiased)
- .addImm(MaxAlign.value() - 1U);
-
- if (Bias) {
- // add %g1, -BIAS, %o6
- BuildMI(MBB, MBBI, dl, TII.get(SP::ADDri), SP::O6)
- .addReg(regUnbiased).addImm(-Bias);
- }
- }
}
MachineBasicBlock::iterator SparcFrameLowering::
diff --git a/llvm/lib/Target/Sparc/SparcISelLowering.cpp b/llvm/lib/Target/Sparc/SparcISelLowering.cpp
index 42b8248006d1fd..06bd2aa27e904f 100644
--- a/llvm/lib/Target/Sparc/SparcISelLowering.cpp
+++ b/llvm/lib/Target/Sparc/SparcISelLowering.cpp
@@ -2764,20 +2764,27 @@ static SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG,
const SparcSubtarget *Subtarget) {
SDValue Chain = Op.getOperand(0); // Legalize the chain.
SDValue Size = Op.getOperand(1); // Legalize the size.
- MaybeAlign Alignment =
- cast<ConstantSDNode>(Op.getOperand(2))->getMaybeAlignValue();
+ SDValue Alignment = Op.getOperand(2); // Legalize the alignment.
+ MaybeAlign MaybeAlignment =
+ cast<ConstantSDNode>(Alignment)->getMaybeAlignValue();
Align StackAlign = Subtarget->getFrameLowering()->getStackAlign();
EVT VT = Size->getValueType(0);
SDLoc dl(Op);
- // TODO: implement over-aligned alloca. (Note: also implies
- // supporting support for overaligned function frames + dynamic
- // allocations, at all, which currently isn't supported)
- if (Alignment && *Alignment > StackAlign) {
- const MachineFunction &MF = DAG.getMachineFunction();
- report_fatal_error("Function \"" + Twine(MF.getName()) + "\": "
- "over-aligned dynamic alloca not supported.");
- }
+ int64_t Bias = Subtarget->getStackPointerBias();
+ unsigned SPReg = SP::O6;
+ SDValue SP = DAG.getCopyFromReg(Chain, dl, SPReg, VT);
+ SDValue AlignedPtr = SP;
+
+ bool IsOveraligned = MaybeAlignment && *MaybeAlignment > StackAlign;
+ if (IsOveraligned)
+ AlignedPtr = DAG.getNode(
+ ISD::AND, dl, VT,
+ DAG.getNode(
+ ISD::SUB, dl, VT,
+ DAG.getNode(ISD::ADD, dl, VT, SP, DAG.getConstant(Bias, dl, VT)),
+ Alignment),
+ DAG.getNode(ISD::SUB, dl, VT, DAG.getConstant(0, dl, VT), Alignment));
// The resultant pointer needs to be above the register spill area
// at the bottom of the stack.
@@ -2811,12 +2818,17 @@ static SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG,
regSpillArea = 96;
}
- unsigned SPReg = SP::O6;
- SDValue SP = DAG.getCopyFromReg(Chain, dl, SPReg, VT);
- SDValue NewSP = DAG.getNode(ISD::SUB, dl, VT, SP, Size); // Value
- Chain = DAG.getCopyToReg(SP.getValue(1), dl, SPReg, NewSP); // Output chain
+ AlignedPtr = DAG.getNode(ISD::SUB, dl, VT, AlignedPtr, Size);
- regSpillArea += Subtarget->getStackPointerBias();
+ // If we are allocating overaligned memory then the bias is already accounted
+ // for in AlignedPtr calculation, so:
+ // - We do not need to adjust the regSpillArea; but
+ // - We do need to decrement AlignedPtr by bias to obtain the new SP.
+ regSpillArea += IsOveraligned ? 0 : Bias;
+ SDValue NewSP =
+ DAG.getNode(ISD::SUB, dl, VT, AlignedPtr,
+ DAG.getConstant(IsOveraligned ? Bias : 0, dl, VT));
+ Chain = DAG.getCopyToReg(SP.getValue(1), dl, SPReg, NewSP);
SDValue NewVal = DAG.getNode(ISD::ADD, dl, VT, NewSP,
DAG.getConstant(regSpillArea, dl, VT));
diff --git a/llvm/lib/Target/Sparc/SparcRegisterInfo.cpp b/llvm/lib/Target/Sparc/SparcRegisterInfo.cpp
index 71a27f77d2c6bf..e4db27a63076d8 100644
--- a/llvm/lib/Target/Sparc/SparcRegisterInfo.cpp
+++ b/llvm/lib/Target/Sparc/SparcRegisterInfo.cpp
@@ -226,26 +226,3 @@ SparcRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
Register SparcRegisterInfo::getFrameRegister(const MachineFunction &MF) const {
return SP::I6;
}
-
-// Sparc has no architectural need for stack realignment support,
-// except that LLVM unfortunately currently implements overaligned
-// stack objects by depending upon stack realignment support.
-// If that ever changes, this can probably be deleted.
-bool SparcRegisterInfo::canRealignStack(const MachineFunction &MF) const {
- if (!TargetRegisterInfo::canRealignStack(MF))
- return false;
-
- // Sparc always has a fixed frame pointer register, so don't need to
- // worry about needing to reserve it. [even if we don't have a frame
- // pointer for our frame, it still cannot be used for other things,
- // or register window traps will be SADNESS.]
-
- // If there's a reserved call frame, we can use SP to access locals.
- if (getFrameLowering(MF)->hasReservedCallFrame(MF))
- return true;
-
- // Otherwise, we'd need a base pointer, but those aren't implemented
- // for SPARC at the moment.
-
- return false;
-}
diff --git a/llvm/lib/Target/Sparc/SparcRegisterInfo.h b/llvm/lib/Target/Sparc/SparcRegisterInfo.h
index 58c85f33635f2d..eae859ce1a519e 100644
--- a/llvm/lib/Target/Sparc/SparcRegisterInfo.h
+++ b/llvm/lib/Target/Sparc/SparcRegisterInfo.h
@@ -40,9 +40,6 @@ struct SparcRegisterInfo : public SparcGenRegisterInfo {
RegScavenger *RS = nullptr) const override;
Register getFrameRegister(const MachineFunction &MF) const override;
-
- bool canRealignStack(const MachineFunction &MF) const override;
-
};
} // end namespace llvm
diff --git a/llvm/test/CodeGen/Generic/ForceStackAlign.ll b/llvm/test/CodeGen/Generic/ForceStackAlign.ll
index 7993b3eff65b68..811a192e752ab1 100644
--- a/llvm/test/CodeGen/Generic/ForceStackAlign.ll
+++ b/llvm/test/CodeGen/Generic/ForceStackAlign.ll
@@ -5,9 +5,6 @@
; CHECK-LABEL: @f
; CHECK-LABEL: @g
-; Stack realignment not supported.
-; XFAIL: target=sparc{{.*}}
-
; NVPTX can only select dynamic_stackalloc on sm_52+ and with ptx73+
; XFAIL: target=nvptx{{.*}}
diff --git a/llvm/test/CodeGen/SPARC/alloca-align.ll b/llvm/test/CodeGen/SPARC/alloca-align.ll
new file mode 100644
index 00000000000000..1ece601437ea4a
--- /dev/null
+++ b/llvm/test/CodeGen/SPARC/alloca-align.ll
@@ -0,0 +1,113 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -march=sparc < %s | FileCheck %s --check-prefixes=CHECK32
+; RUN: llc -march=sparcv9 < %s | FileCheck %s --check-prefixes=CHECK64
+
+define void @variable_alloca_with_overalignment(i32 %num) {
+; CHECK32-LABEL: variable_alloca_with_overalignment:
+; CHECK32: .cfi_startproc
+; CHECK32-NEXT: ! %bb.0:
+; CHECK32-NEXT: save %sp, -96, %sp
+; CHECK32-NEXT: .cfi_def_cfa_register %fp
+; CHECK32-NEXT: .cfi_window_save
+; CHECK32-NEXT: .cfi_register %o7, %i7
+; CHECK32-NEXT: add %sp, -64, %i1
+; CHECK32-NEXT: and %i1, -64, %i1
+; CHECK32-NEXT: add %i1, -16, %sp
+; CHECK32-NEXT: add %i0, 7, %i0
+; CHECK32-NEXT: and %i0, -8, %i0
+; CHECK32-NEXT: sub %sp, %i0, %i0
+; CHECK32-NEXT: add %i1, 80, %o0
+; CHECK32-NEXT: add %i0, -8, %sp
+; CHECK32-NEXT: call foo
+; CHECK32-NEXT: add %i0, 88, %o1
+; CHECK32-NEXT: ret
+; CHECK32-NEXT: restore
+;
+; CHECK64-LABEL: variable_alloca_with_overalignment:
+; CHECK64: .cfi_startproc
+; CHECK64-NEXT: ! %bb.0:
+; CHECK64-NEXT: save %sp, -128, %sp
+; CHECK64-NEXT: .cfi_def_cfa_register %fp
+; CHECK64-NEXT: .cfi_window_save
+; CHECK64-NEXT: .cfi_register %o7, %i7
+; CHECK64-NEXT: add %sp, 1983, %i1
+; CHECK64-NEXT: and %i1, -64, %i1
+; CHECK64-NEXT: add %i1, -2063, %sp
+; CHECK64-NEXT: add %i1, -1935, %o0
+; CHECK64-NEXT: srl %i0, 0, %i0
+; CHECK64-NEXT: add %i0, 15, %i0
+; CHECK64-NEXT: sethi 4194303, %i1
+; CHECK64-NEXT: or %i1, 1008, %i1
+; CHECK64-NEXT: sethi 0, %i2
+; CHECK64-NEXT: or %i2, 1, %i2
+; CHECK64-NEXT: sllx %i2, 32, %i2
+; CHECK64-NEXT: or %i2, %i1, %i1
+; CHECK64-NEXT: and %i0, %i1, %i0
+; CHECK64-NEXT: sub %sp, %i0, %i0
+; CHECK64-NEXT: add %i0, 2175, %o1
+; CHECK64-NEXT: mov %i0, %sp
+; CHECK64-NEXT: call foo
+; CHECK64-NEXT: add %sp, -48, %sp
+; CHECK64-NEXT: add %sp, 48, %sp
+; CHECK64-NEXT: ret
+; CHECK64-NEXT: restore
+ %aligned = alloca i32, align 64
+ %var_size = alloca i8, i32 %num, align 4
+ call void @foo(ptr %aligned, ptr %var_size)
+ ret void
+}
+
+;; Same but with the alloca itself overaligned
+define void @variable_alloca_with_overalignment_2(i32 %num) {
+; CHECK32-LABEL: variable_alloca_with_overalignment_2:
+; CHECK32: .cfi_startproc
+; CHECK32-NEXT: ! %bb.0:
+; CHECK32-NEXT: save %sp, -96, %sp
+; CHECK32-NEXT: .cfi_def_cfa_register %fp
+; CHECK32-NEXT: .cfi_window_save
+; CHECK32-NEXT: .cfi_register %o7, %i7
+; CHECK32-NEXT: add %i0, 7, %i0
+; CHECK32-NEXT: and %i0, -8, %i0
+; CHECK32-NEXT: add %sp, -64, %i1
+; CHECK32-NEXT: and %i1, -64, %i1
+; CHECK32-NEXT: sub %i1, %i0, %i0
+; CHECK32-NEXT: add %i0, -8, %sp
+; CHECK32-NEXT: add %i0, 88, %o1
+; CHECK32-NEXT: call foo
+; CHECK32-NEXT: mov %g0, %o0
+; CHECK32-NEXT: ret
+; CHECK32-NEXT: restore
+;
+; CHECK64-LABEL: variable_alloca_with_overalignment_2:
+; CHECK64: .cfi_startproc
+; CHECK64-NEXT: ! %bb.0:
+; CHECK64-NEXT: save %sp, -128, %sp
+; CHECK64-NEXT: .cfi_def_cfa_register %fp
+; CHECK64-NEXT: .cfi_window_save
+; CHECK64-NEXT: .cfi_register %o7, %i7
+; CHECK64-NEXT: srl %i0, 0, %i0
+; CHECK64-NEXT: add %i0, 15, %i0
+; CHECK64-NEXT: sethi 4194303, %i1
+; CHECK64-NEXT: or %i1, 1008, %i1
+; CHECK64-NEXT: sethi 0, %i2
+; CHECK64-NEXT: or %i2, 1, %i2
+; CHECK64-NEXT: sllx %i2, 32, %i2
+; CHECK64-NEXT: or %i2, %i1, %i1
+; CHECK64-NEXT: and %i0, %i1, %i0
+; CHECK64-NEXT: add %sp, 1983, %i1
+; CHECK64-NEXT: and %i1, -64, %i1
+; CHECK64-NEXT: sub %i1, %i0, %i0
+; CHECK64-NEXT: add %i0, -2047, %sp
+; CHECK64-NEXT: add %i0, -1919, %o1
+; CHECK64-NEXT: add %sp, -48, %sp
+; CHECK64-NEXT: call foo
+; CHECK64-NEXT: mov %g0, %o0
+; CHECK64-NEXT: add %sp, 48, %sp
+; CHECK64-NEXT: ret
+; CHECK64-NEXT: restore
+ %var_size = alloca i8, i32 %num, align 64
+ call void @foo(ptr null, ptr %var_size)
+ ret void
+}
+
+declare void @foo(ptr, ptr);
diff --git a/llvm/test/CodeGen/SPARC/fail-alloca-align.ll b/llvm/test/CodeGen/SPARC/fail-alloca-align.ll
deleted file mode 100644
index e2dc235389b1dc..00000000000000
--- a/llvm/test/CodeGen/SPARC/fail-alloca-align.ll
+++ /dev/null
@@ -1,23 +0,0 @@
-;; Sparc backend can't currently handle variable allocas with
-;; alignment greater than the stack alignment. This code ought to
-;; compile, but doesn't currently.
-
-;; RUN: not --crash llc -march=sparc < %s 2>&1 | FileCheck %s
-;; RUN: not --crash llc -march=sparcv9 < %s 2>&1 | FileCheck %s
-;; CHECK: ERROR: Function {{.*}} required stack re-alignment
-
-define void @variable_alloca_with_overalignment(i32 %num) {
- %aligned = alloca i32, align 64
- %var_size = alloca i8, i32 %num, align 4
- call void @foo(ptr %aligned, ptr %var_size)
- ret void
-}
-
-;; Same but with the alloca itself overaligned
-define void @variable_alloca_with_overalignment_2(i32 %num) {
- %var_size = alloca i8, i32 %num, align 64
- call void @foo(ptr null, ptr %var_size)
- ret void
-}
-
-declare void @foo(ptr, ptr);
diff --git a/llvm/test/CodeGen/SPARC/fp128.ll b/llvm/test/CodeGen/SPARC/fp128.ll
index 80f3da285e053f..3e43d3eb5da70f 100644
--- a/llvm/test/CodeGen/SPARC/fp128.ll
+++ b/llvm/test/CodeGen/SPARC/fp128.ll
@@ -54,18 +54,10 @@ entry:
; CHECK-LABEL: f128_spill_large:
; CHECK: sethi 4, %g1
-; CHECK: sethi 4, %g1
-; CHECK-NEXT: add %g1, %sp, %g1
-; CHECK-NEXT: std %f{{.+}}, [%g1]
-; CHECK: sethi 4, %g1
-; CHECK-NEXT: add %g1, %sp, %g1
-; CHECK-NEXT: std %f{{.+}}, [%g1+8]
-; CHECK: sethi 4, %g1
-; CHECK-NEXT: add %g1, %sp, %g1
-; CHECK-NEXT: ldd [%g1], %f{{.+}}
-; CHECK: sethi 4, %g1
-; CHECK-NEXT: add %g1, %sp, %g1
-; CHECK-NEXT: ldd [%g1+8], %f{{.+}}
+; CHECK: std %f{{.+}}, [%fp+-16]
+; CHECK-NEXT: std %f{{.+}}, [%fp+-8]
+; CHECK: ldd [%fp+-16], %f{{.+}}
+; CHECK-NEXT: ldd [%fp+-8], %f{{.+}}
define void @f128_spill_large(ptr noalias sret(<251 x fp128>) %scalar.result, ptr byval(<251 x fp128>) %a) {
entry:
diff --git a/llvm/test/CodeGen/SPARC/stack-align.ll b/llvm/test/CodeGen/SPARC/stack-align.ll
index 6632237f08e274..e2dfe854d643a1 100644
--- a/llvm/test/CodeGen/SPARC/stack-align.ll
+++ b/llvm/test/CodeGen/SPARC/stack-align.ll
@@ -1,24 +1,47 @@
-; RUN: llc -march=sparc < %s | FileCheck %s --check-prefixes=CHECK,CHECK32
-; RUN: llc -march=sparcv9 < %s | FileCheck %s --check-prefixes=CHECK,CHECK64
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -march=sparc < %s | FileCheck %s --check-prefixes=CHECK32
+; RUN: llc -march=sparcv9 < %s | FileCheck %s --check-prefixes=CHECK64
declare void @stack_realign_helper(i32 %a, ptr %b)
;; This is a function where we have a local variable of 64-byte
-;; alignment. We want to see that the stack is aligned (the initial
-;; andn), that the local var is accessed via stack pointer (to %o1), and that
+;; alignment. We want to see that the stack is aligned (the initial add/and),
+;; that the local var is accessed via stack pointer (to %o1), and that
;; the argument is accessed via frame pointer not stack pointer (to %o0).
-;; CHECK-LABEL: stack_realign:
-;; CHECK32: andn %sp, 63, %sp
-;; CHECK32-NEXT: ld [%fp+92], %o0
-;; CHECK64: add %sp, 2047, %g1
-;; CHECK64-NEXT: andn %g1, 63, %g1
-;; CHECK64-NEXT: add %g1, -2047, %sp
-;; CHECK64-NEXT: ld [%fp+2227], %o0
-;; CHECK-NEXT: call stack_realign_helper
-;; CHECK32-NEXT: add %sp, 128, %o1
-;; CHECK64-NEXT: add %sp, 2239, %o1
-
define void @stack_realign(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f, i32 %g) {
+; CHECK32-LABEL: stack_realign:
+; CHECK32: .cfi_startproc
+; CHECK32-NEXT: ! %bb.0: ! %entry
+; CHECK32-NEXT: save %sp, -96, %sp
+; CHECK32-NEXT: .cfi_def_cfa_register %fp
+; CHECK32-NEXT: .cfi_window_save
+; CHECK32-NEXT: .cfi_register %o7, %i7
+; CHECK32-NEXT: ld [%fp+92], %o0
+; CHECK32-NEXT: add %sp, -64, %i0
+; CHECK32-NEXT: and %i0, -64, %i0
+; CHECK32-NEXT: add %i0, -16, %sp
+; CHECK32-NEXT: call stack_realign_helper
+; CHECK32-NEXT: add %i0, 80, %o1
+; CHECK32-NEXT: ret
+; CHECK32-NEXT: restore
+;
+; CHECK64-LABEL: stack_realign:
+; CHECK64: .cfi_startproc
+; CHECK64-NEXT: ! %bb.0: ! %entry
+; CHECK64-NEXT: save %sp, -128, %sp
+; CHECK64-NEXT: .cfi_def_cfa_register %fp
+; CHECK64-NEXT: .cfi_window_save
+; CHECK64-NEXT: .cfi_register %o7, %i7
+; CHECK64-NEXT: add %sp, 1983, %i0
+; CHECK64-NEXT: and %i0, -64, %i0
+; CHECK64-NEXT: add %i0, -2063, %sp
+; CHECK64-NEXT: add %i0, -1935, %o1
+; CHECK64-NEXT: add %sp, -48, %sp
+; CHECK64-NEXT: call stack_realign_helper
+; CHECK64-NEXT: ld [%fp+2227], %o0
+; CHECK64-NEXT: add %sp, 48, %sp
+; CHECK64-NEXT: ret
+; CHECK64-NEXT: restore
entry:
%aligned = alloca i32, align 64
call void @stack_realign_helper(i32 %g, ptr %aligned)
|
Tagging this as WIP since I feel like the calculation could be done better. |
Also, @cpython-java, can you please try this patch too? ^_^ |
SDValue SP = DAG.getCopyFromReg(Chain, dl, SPReg, VT); | ||
SDValue AlignedPtr = SP; | ||
|
||
bool IsOveraligned = MaybeAlignment && *MaybeAlignment > StackAlign; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
There is no need for the second check.
Alignment, if present, is guaranteed to be greater than the stack alignment.
SDValue SP = DAG.getCopyFromReg(Chain, dl, SPReg, VT); | ||
SDValue NewSP = DAG.getNode(ISD::SUB, dl, VT, SP, Size); // Value | ||
Chain = DAG.getCopyToReg(SP.getValue(1), dl, SPReg, NewSP); // Output chain | ||
AlignedPtr = DAG.getNode(ISD::SUB, dl, VT, AlignedPtr, Size); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The subtraction should be done before aligning to avoid excessive stack usage.
Take for example
old sp = 40, size = 8, align = 16
The current formula gives:
new sp = (40 - 16) & -16 - 8 = 8
That is, it allocates 32 bytes instead of just 8 (which would be sufficient because 40 - 8 = 32
is 16-byte aligned).
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
A little question here: why is the aligning formula (ptr - align) & -align
instead of just ptr & -align
?
In my understanding the latter should avoid excessive allocation too; using your example of old sp = 40, size = 8, align = 16
:
ptr = 40 - 8 // 32
// using (ptr - align) & -align
new sp = (32 - 16) & (-16) // 16, overallocating by 16 bytes
// using ptr & -align
new sp = 32 & -16 // 32
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It should be (ptr - size) & -align
.
I'm currently building with this patch on Solaris/sparcv9, initially as is. Later I'm going to enable ASan on 32-bit SPARC to see how this goes (Issue #57626). |
I've made some progess on the 32-bit Solaris/sparc side:
Some of the failures are generic issues with the testcases (e.g. performing unaligned accesses on a strict-alignment target), others might be an endianess issue, but a considerable part of the failures may be related to an issue with your patch, e.g.
ISTM that your patch doesn't yet heed The Linux/sparc64 side of things is considerably worse, unfortunately: quite a number of ASan tests just hang, and while the unittests do work, several 64-bit tests |
With PR llvm#107223 and PR llvm#107403, ASan testing can be enabled on SPARC. This patch does so, 32-bit only on Solaris (there is no 64-bit support even in GCC), and both 32 and 64-bit on Linux (although GCC only support 32-bit here). Apart from the obvious CMake changes, this patch includes a couple of testcase adjustments necessary for SPARC: - In `asan_oob_test.cpp`, the `OOB_int` subtest needs to be disabled: it performs unaligned accesses that cannot work on a strict-alignment target like SPARC. - `asan_test.cpp` needs to disable subtests that depend on support for `__builtin_setjmp` and `__builtin_longjmp`. - `zero_page_pc.cpp` reports `0x5` as the faulting address on access to `0x4`. I don't really know why, but it's consistent between Solaris and Linux. Tested on `sparcv9-sun-solaris2.11` and `sparc64-unknown-linux-gnu`. Solaris results are not too bad (36 `FAIL`s) while the Linux ones are quite bad, in particular because quite a number of tests just hang. For those reasons, I'm just posting the patch for reference in case someone else wants to try this on Linux.
Upon calculating NewSP we end up with a biased SP again, so we need to account for that in NewVal return.
Are these patches for Solaris only or do they apply for Linux too?
|
The patches work on Linux/sparc64, too. You don't need the driver one ( I guess it's best if you give me a day or two to finish this stuff. Thanks a lot for your work, which finally will allow |
Most patches should be in place now. On Solaris/sparcv9, there are only two failures left:
The necessary patches are PRs #107405, #108206, #108200, and #108542. There's one patch left that's in final testing: tests like There are several more failures on Linux/sparc64, some of which involve (lack of) |
Your last change brought the Solaris/sparcv9 failures down to just one:
same as reported before:
Linux/sparc64 is worse, but AFAICS none of the failures there are related to this patch, just Linux/sparc64 specific issues. Thanks again! |
I admit I still find it hard to understand what is this testing, so may I ask for some explaination? |
Neither am I. I guess it's best to ask the author of the testcase, originally submitted in Implement variable-sized alloca instrumentation (take 2).. However, I cannot find a github account for him (m.ostapenko). Maybe @yugr or @vitalybuka can shed some light? FWIW, I've tried to build the testcase with gcc (which supports 32-bit ASan on SPARC just fine), but the test fails there, too. |
|
Once PR #107223 lands, ASan can be enabled on Solaris/SPARC. This patch does just that. As on Solaris/x86, the dynamic ASan runtime lib needs to be linked with `-z now` to avoid an `AsanInitInternal` cycle. Tested on `sparcv9-sun-solaris2.11` and `sparc64-unknown-linux-gnu`.
Once PR llvm#107223 lands, ASan can be enabled on Solaris/SPARC. This patch does just that. As on Solaris/x86, the dynamic ASan runtime lib needs to be linked with `-z now` to avoid an `AsanInitInternal` cycle. Tested on `sparcv9-sun-solaris2.11` and `sparc64-unknown-linux-gnu`.
alloca
salloca
s
Taking out the WIP status since I think this is ready for merging now. |
I've just retried the testcase on Linux/i386 with That said, I'm fine with merging the patch now, with one caveat: please first file an Issue for the remaining failure with everything that's known about the issue, then |
Hi, sorry for the late response, I took some time for me to restore my GH account... I don't remember all the details, but from what I remember, when implementing VLA/alloca poisoning, LLVM showed different behavior for allocation slots management between VLAs/allocas into loop nest:
So the test case was indented to check that ASAN can handle poisoning/unpoisoning correctly in presence of both VLAs/allocas into same loop nest. However, looking on freshly generated LLVM IR, it seems this difference is not the case anymore, maybe it was a bug in LLVM back then... I'm sorry that I can't provide more technical details now, this patch is pretty old... |
Posted the issue tracker at #110956.
The buildbot should be green again now :) |
Once PR llvm#107223 lands, ASan can be enabled on Solaris/SPARC. This patch does just that. As on Solaris/x86, the dynamic ASan runtime lib needs to be linked with `-z now` to avoid an `AsanInitInternal` cycle. Tested on `sparcv9-sun-solaris2.11` and `sparc64-unknown-linux-gnu`.
Fine, thanks.
It is indeed. I wonder what to do about the testcase now. I guess there are 3 options:
|
Okay, XFAIL-ed the test on SPARC for now. |
Ping? |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM (I didn't look closely at the tests)
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/88/builds/4193 Here is the relevant piece of the build log for the reference
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/66/builds/5702 Here is the relevant piece of the build log for the reference
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/145/builds/2912 Here is the relevant piece of the build log for the reference
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/51/builds/5893 Here is the relevant piece of the build log for the reference
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/168/builds/5084 Here is the relevant piece of the build log for the reference
|
With PR #107223 and PR #107403, ASan testing can be enabled on SPARC. This patch does so, 32-bit only on both Solaris and Linux. There is no 64-bit support even in GCC. Apart from the obvious CMake changes, this patch includes a couple of testcase adjustments necessary for SPARC: - In `asan_oob_test.cpp`, the `OOB_int` subtest needs to be disabled: it performs unaligned accesses that cannot work on a strict-alignment target like SPARC. - `asan_test.cpp` needs to disable subtests that depend on support for `__builtin_setjmp` and `__builtin_longjmp`. - `zero_page_pc.cpp` reports `0x5` as the faulting address on access to `0x4`. I don't really know why, but it's consistent between Solaris and Linux. Tested on `sparcv9-sun-solaris2.11` and `sparc64-unknown-linux-gnu`.
SPARC ABI doesn't use stack realignment, so let LLVM know about it in `SparcFrameLowering`. This has the side effect of making all overaligned allocations go through `LowerDYNAMIC_STACKALLOC`, so implement the missing logic there too for overaligned allocations. This makes the SPARC backend not crash on overaligned `alloca`s and fix llvm#89569.
With PR llvm#107223 and PR llvm#107403, ASan testing can be enabled on SPARC. This patch does so, 32-bit only on both Solaris and Linux. There is no 64-bit support even in GCC. Apart from the obvious CMake changes, this patch includes a couple of testcase adjustments necessary for SPARC: - In `asan_oob_test.cpp`, the `OOB_int` subtest needs to be disabled: it performs unaligned accesses that cannot work on a strict-alignment target like SPARC. - `asan_test.cpp` needs to disable subtests that depend on support for `__builtin_setjmp` and `__builtin_longjmp`. - `zero_page_pc.cpp` reports `0x5` as the faulting address on access to `0x4`. I don't really know why, but it's consistent between Solaris and Linux. Tested on `sparcv9-sun-solaris2.11` and `sparc64-unknown-linux-gnu`.
SPARC doesn't do stack realignment, so let LLVM know about it in
SparcFrameLowering
. This has the side effect of making all overaligned allocations go throughLowerDYNAMIC_STACKALLOC
, so implement the missing logic there too for overaligned allocations.This makes the SPARC backend not crash on overaligned
alloca
s and fix #89569.