-
-
Notifications
You must be signed in to change notification settings - Fork 5.5k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[LLVM] Add patches for PPC knownbits and AArch64 globalisel
- Loading branch information
Showing
3 changed files
with
188 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,97 @@ | ||
From 8bec64e2c0386934d4e38344907f0f4b0de4d8a3 Mon Sep 17 00:00:00 2001 | ||
From: Valentin Churavy <v.churavy@gmail.com> | ||
Date: Tue, 15 Dec 2020 09:59:18 -0500 | ||
Subject: [PATCH] [PowerPC] KnownBits should be constant when performing | ||
non-sign comparison | ||
|
||
In `PPCTargetLowering::DAGCombineTruncBoolExt`, when checking if it's correct to perform the transformation for non-sign comparison, as the comment says | ||
``` | ||
// This is neither a signed nor an unsigned comparison, just make sure | ||
// that the high bits are equal. | ||
``` | ||
Origin check | ||
``` | ||
if (Op1Known.Zero != Op2Known.Zero || Op1Known.One != Op2Known.One) | ||
return SDValue(); | ||
``` | ||
is not strong enough. For example, | ||
``` | ||
Op1Known = 111x000x; | ||
Op2Known = 111x000x; | ||
``` | ||
Bit 4, besides bit 0, is still unknown and affects the final result. | ||
|
||
This patch fixes https://bugs.llvm.org/show_bug.cgi?id=48388. | ||
|
||
Differential Revision: https://reviews.llvm.org/D93092 | ||
--- | ||
llvm/lib/Target/PowerPC/PPCISelLowering.cpp | 3 +- | ||
llvm/test/CodeGen/PowerPC/pr48388.ll | 42 +++++++++++++++++++++ | ||
2 files changed, 44 insertions(+), 1 deletion(-) | ||
create mode 100644 llvm/test/CodeGen/PowerPC/pr48388.ll | ||
|
||
diff --git llvm/lib/Target/PowerPC/PPCISelLowering.cpp llvm/lib/Target/PowerPC/PPCISelLowering.cpp | ||
index f54f1673526d..76b32db44656 100644 | ||
--- llvm/lib/Target/PowerPC/PPCISelLowering.cpp | ||
+++ llvm/lib/Target/PowerPC/PPCISelLowering.cpp | ||
@@ -13291,7 +13291,8 @@ SDValue PPCTargetLowering::DAGCombineTruncBoolExt(SDNode *N, | ||
Op1Known.Zero.clearBit(0); Op1Known.One.clearBit(0); | ||
Op2Known.Zero.clearBit(0); Op2Known.One.clearBit(0); | ||
|
||
- if (Op1Known.Zero != Op2Known.Zero || Op1Known.One != Op2Known.One) | ||
+ if (!Op1Known.isConstant() || !Op2Known.isConstant() || | ||
+ Op1Known.getConstant() != Op2Known.getConstant()) | ||
return SDValue(); | ||
} | ||
} | ||
diff --git llvm/test/CodeGen/PowerPC/pr48388.ll llvm/test/CodeGen/PowerPC/pr48388.ll | ||
new file mode 100644 | ||
index 000000000000..138fb6147832 | ||
--- /dev/null | ||
+++ llvm/test/CodeGen/PowerPC/pr48388.ll | ||
@@ -0,0 +1,42 @@ | ||
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py | ||
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64le -ppc-asm-full-reg-names \ | ||
+; RUN: < %s | FileCheck %s | ||
+ | ||
+define i64 @julia_div_i64(i64 %0, i64 %1) local_unnamed_addr #0 { | ||
+; CHECK-LABEL: julia_div_i64: | ||
+; CHECK: # %bb.0: # %entry | ||
+; CHECK-NEXT: divd r6, r3, r4 | ||
+; CHECK-NEXT: li r5, 32767 | ||
+; CHECK-NEXT: sldi r5, r5, 32 | ||
+; CHECK-NEXT: oris r7, r5, 40069 | ||
+; CHECK-NEXT: oris r5, r5, 40079 | ||
+; CHECK-NEXT: cmpdi r3, 0 | ||
+; CHECK-NEXT: ori r7, r7, 13456 | ||
+; CHECK-NEXT: ori r5, r5, 65264 | ||
+; CHECK-NEXT: iselgt r9, r5, r7 | ||
+; CHECK-NEXT: cmpdi r4, 0 | ||
+; CHECK-NEXT: mulld r8, r6, r4 | ||
+; CHECK-NEXT: iselgt r4, r5, r7 | ||
+; CHECK-NEXT: xor r4, r9, r4 | ||
+; CHECK-NEXT: cntlzd r4, r4 | ||
+; CHECK-NEXT: rldicl r4, r4, 58, 63 | ||
+; CHECK-NEXT: xor r3, r8, r3 | ||
+; CHECK-NEXT: addic r5, r3, -1 | ||
+; CHECK-NEXT: subfe r3, r5, r3 | ||
+; CHECK-NEXT: and r3, r4, r3 | ||
+; CHECK-NEXT: add r3, r6, r3 | ||
+; CHECK-NEXT: blr | ||
+entry: | ||
+ %2 = sdiv i64 %0, %1 | ||
+ %3 = icmp sgt i64 %0, 0 | ||
+ %4 = icmp sgt i64 %1, 0 | ||
+ %5 = select i1 %3, i64 140735820070640, i64 140735819363472 | ||
+ %6 = select i1 %4, i64 140735820070640, i64 140735819363472 | ||
+ %7 = icmp eq i64 %5, %6 | ||
+ %8 = mul i64 %2, %1 | ||
+ %9 = icmp ne i64 %8, %0 | ||
+ %10 = and i1 %7, %9 | ||
+ %11 = zext i1 %10 to i64 | ||
+ %12 = add i64 %2, %11 | ||
+ ret i64 %12 | ||
+} | ||
-- | ||
2.29.2 | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,88 @@ | ||
From af809ec100ef60cdeeef776e54c123e4fc8f1071 Mon Sep 17 00:00:00 2001 | ||
From: Jameson Nash <vtjnash@gmail.com> | ||
Date: Tue, 15 Dec 2020 10:04:08 -0500 | ||
Subject: [PATCH] GlobalISel: remove assert that memcpy Src and Dst addrspace | ||
must be identical | ||
|
||
The LangRef does not require these arguments to have the same type. | ||
|
||
Differential Revision: https://reviews.llvm.org/D93154 | ||
--- | ||
.../lib/CodeGen/GlobalISel/CombinerHelper.cpp | 27 +++++++++++-------- | ||
1 file changed, 16 insertions(+), 11 deletions(-) | ||
|
||
diff --git llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp | ||
index 79f74a47d83c..7bd6f8f52c8b 100644 | ||
--- llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp | ||
+++ llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp | ||
@@ -1240,7 +1240,6 @@ bool CombinerHelper::optimizeMemcpy(MachineInstr &MI, Register Dst, | ||
// of that value loaded. This can result in a sequence of loads and stores | ||
// mixed types, depending on what the target specifies as good types to use. | ||
unsigned CurrOffset = 0; | ||
- LLT PtrTy = MRI.getType(Src); | ||
unsigned Size = KnownLen; | ||
for (auto CopyTy : MemOps) { | ||
// Issuing an unaligned load / store pair that overlaps with the previous | ||
@@ -1258,15 +1257,20 @@ bool CombinerHelper::optimizeMemcpy(MachineInstr &MI, Register Dst, | ||
Register LoadPtr = Src; | ||
Register Offset; | ||
if (CurrOffset != 0) { | ||
- Offset = MIB.buildConstant(LLT::scalar(PtrTy.getSizeInBits()), CurrOffset) | ||
- .getReg(0); | ||
- LoadPtr = MIB.buildPtrAdd(PtrTy, Src, Offset).getReg(0); | ||
+ LLT LoadTy = MRI.getType(Src); | ||
+ Offset = | ||
+ MIB.buildConstant(LLT::scalar(LoadTy.getSizeInBits()), CurrOffset) | ||
+ .getReg(0); | ||
+ LoadPtr = MIB.buildPtrAdd(LoadTy, Src, Offset).getReg(0); | ||
} | ||
auto LdVal = MIB.buildLoad(CopyTy, LoadPtr, *LoadMMO); | ||
|
||
// Create the store. | ||
- Register StorePtr = | ||
- CurrOffset == 0 ? Dst : MIB.buildPtrAdd(PtrTy, Dst, Offset).getReg(0); | ||
+ Register StorePtr = Dst; | ||
+ if (CurrOffset != 0) { | ||
+ LLT StoreTy = MRI.getType(Dst); | ||
+ StorePtr = MIB.buildPtrAdd(StoreTy, Dst, Offset).getReg(0); | ||
+ } | ||
MIB.buildStore(LdVal, StorePtr, *StoreMMO); | ||
CurrOffset += CopyTy.getSizeInBytes(); | ||
Size -= CopyTy.getSizeInBytes(); | ||
@@ -1343,7 +1347,6 @@ bool CombinerHelper::optimizeMemmove(MachineInstr &MI, Register Dst, | ||
// Apart from that, this loop is pretty much doing the same thing as the | ||
// memcpy codegen function. | ||
unsigned CurrOffset = 0; | ||
- LLT PtrTy = MRI.getType(Src); | ||
SmallVector<Register, 16> LoadVals; | ||
for (auto CopyTy : MemOps) { | ||
// Construct MMO for the load. | ||
@@ -1353,9 +1356,10 @@ bool CombinerHelper::optimizeMemmove(MachineInstr &MI, Register Dst, | ||
// Create the load. | ||
Register LoadPtr = Src; | ||
if (CurrOffset != 0) { | ||
+ LLT LoadTy = MRI.getType(Src); | ||
auto Offset = | ||
- MIB.buildConstant(LLT::scalar(PtrTy.getSizeInBits()), CurrOffset); | ||
- LoadPtr = MIB.buildPtrAdd(PtrTy, Src, Offset).getReg(0); | ||
+ MIB.buildConstant(LLT::scalar(LoadTy.getSizeInBits()), CurrOffset); | ||
+ LoadPtr = MIB.buildPtrAdd(LoadTy, Src, Offset).getReg(0); | ||
} | ||
LoadVals.push_back(MIB.buildLoad(CopyTy, LoadPtr, *LoadMMO).getReg(0)); | ||
CurrOffset += CopyTy.getSizeInBytes(); | ||
@@ -1370,9 +1374,10 @@ bool CombinerHelper::optimizeMemmove(MachineInstr &MI, Register Dst, | ||
|
||
Register StorePtr = Dst; | ||
if (CurrOffset != 0) { | ||
+ LLT StoreTy = MRI.getType(Dst); | ||
auto Offset = | ||
- MIB.buildConstant(LLT::scalar(PtrTy.getSizeInBits()), CurrOffset); | ||
- StorePtr = MIB.buildPtrAdd(PtrTy, Dst, Offset).getReg(0); | ||
+ MIB.buildConstant(LLT::scalar(StoreTy.getSizeInBits()), CurrOffset); | ||
+ StorePtr = MIB.buildPtrAdd(StoreTy, Dst, Offset).getReg(0); | ||
} | ||
MIB.buildStore(LoadVals[I], StorePtr, *StoreMMO); | ||
CurrOffset += CopyTy.getSizeInBytes(); | ||
-- | ||
2.29.2 | ||
|