-
-
Notifications
You must be signed in to change notification settings - Fork 5.5k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
add patch that works around a issue with partword-atomics
- Loading branch information
Showing
2 changed files
with
152 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,151 @@ | ||
From e060ffb4b20e294ecb8429bd8a925f9f12b63b17 Mon Sep 17 00:00:00 2001 | ||
From: Hal Finkel <hfinkel@anl.gov> | ||
Date: Mon, 29 Aug 2016 22:25:36 +0000 | ||
Subject: [PATCH] [PowerPC] Fix i8/i16 atomics for little-Endian targets | ||
without partword atomics | ||
|
||
For little-Endian PowerPC, we generally target only P8 and later by default. | ||
However, generic (older) 64-bit configurations are still an option, and in that | ||
case, partword atomics are not available (e.g. stbcx.). To lower i8/i16 atomics | ||
without true i8/i16 atomic operations, we emulate using i32 atomics in | ||
combination with a bunch of shifting and masking, etc. The amount by which to | ||
shift in little-Endian mode is different from the amount in big-Endian mode (it | ||
is inverted -- meaning we can leave off the xor when computing the amount). | ||
|
||
Fixes PR22923. | ||
|
||
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@280022 91177308-0d34-0410-b5e6-96231b3b80d8 | ||
--- | ||
lib/Target/PowerPC/PPCISelLowering.cpp | 18 ++++++++++++------ | ||
test/CodeGen/PowerPC/atomic-2.ll | 15 ++++++++++++++- | ||
2 files changed, 26 insertions(+), 7 deletions(-) | ||
|
||
diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp | ||
index e89b6ca..f895b06 100644 | ||
--- a/lib/Target/PowerPC/PPCISelLowering.cpp | ||
+++ b/lib/Target/PowerPC/PPCISelLowering.cpp | ||
@@ -8513,6 +8513,7 @@ PPCTargetLowering::EmitPartwordAtomicBinary(MachineInstr &MI, | ||
// registers without caring whether they're 32 or 64, but here we're | ||
// doing actual arithmetic on the addresses. | ||
bool is64bit = Subtarget.isPPC64(); | ||
+ bool isLittleEndian = Subtarget.isLittleEndian(); | ||
unsigned ZeroReg = is64bit ? PPC::ZERO8 : PPC::ZERO; | ||
|
||
const BasicBlock *LLVM_BB = BB->getBasicBlock(); | ||
@@ -8542,7 +8543,8 @@ PPCTargetLowering::EmitPartwordAtomicBinary(MachineInstr &MI, | ||
: &PPC::GPRCRegClass; | ||
unsigned PtrReg = RegInfo.createVirtualRegister(RC); | ||
unsigned Shift1Reg = RegInfo.createVirtualRegister(RC); | ||
- unsigned ShiftReg = RegInfo.createVirtualRegister(RC); | ||
+ unsigned ShiftReg = | ||
+ isLittleEndian ? Shift1Reg : RegInfo.createVirtualRegister(RC); | ||
unsigned Incr2Reg = RegInfo.createVirtualRegister(RC); | ||
unsigned MaskReg = RegInfo.createVirtualRegister(RC); | ||
unsigned Mask2Reg = RegInfo.createVirtualRegister(RC); | ||
@@ -8587,8 +8589,9 @@ PPCTargetLowering::EmitPartwordAtomicBinary(MachineInstr &MI, | ||
} | ||
BuildMI(BB, dl, TII->get(PPC::RLWINM), Shift1Reg).addReg(Ptr1Reg) | ||
.addImm(3).addImm(27).addImm(is8bit ? 28 : 27); | ||
- BuildMI(BB, dl, TII->get(is64bit ? PPC::XORI8 : PPC::XORI), ShiftReg) | ||
- .addReg(Shift1Reg).addImm(is8bit ? 24 : 16); | ||
+ if (!isLittleEndian) | ||
+ BuildMI(BB, dl, TII->get(is64bit ? PPC::XORI8 : PPC::XORI), ShiftReg) | ||
+ .addReg(Shift1Reg).addImm(is8bit ? 24 : 16); | ||
if (is64bit) | ||
BuildMI(BB, dl, TII->get(PPC::RLDICR), PtrReg) | ||
.addReg(Ptr1Reg).addImm(0).addImm(61); | ||
@@ -9293,6 +9296,7 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, | ||
// since we're actually doing arithmetic on them. Other registers | ||
// can be 32-bit. | ||
bool is64bit = Subtarget.isPPC64(); | ||
+ bool isLittleEndian = Subtarget.isLittleEndian(); | ||
bool is8bit = MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I8; | ||
|
||
unsigned dest = MI.getOperand(0).getReg(); | ||
@@ -9319,7 +9323,8 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, | ||
: &PPC::GPRCRegClass; | ||
unsigned PtrReg = RegInfo.createVirtualRegister(RC); | ||
unsigned Shift1Reg = RegInfo.createVirtualRegister(RC); | ||
- unsigned ShiftReg = RegInfo.createVirtualRegister(RC); | ||
+ unsigned ShiftReg = | ||
+ isLittleEndian ? Shift1Reg : RegInfo.createVirtualRegister(RC); | ||
unsigned NewVal2Reg = RegInfo.createVirtualRegister(RC); | ||
unsigned NewVal3Reg = RegInfo.createVirtualRegister(RC); | ||
unsigned OldVal2Reg = RegInfo.createVirtualRegister(RC); | ||
@@ -9374,8 +9379,9 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, | ||
} | ||
BuildMI(BB, dl, TII->get(PPC::RLWINM), Shift1Reg).addReg(Ptr1Reg) | ||
.addImm(3).addImm(27).addImm(is8bit ? 28 : 27); | ||
- BuildMI(BB, dl, TII->get(is64bit ? PPC::XORI8 : PPC::XORI), ShiftReg) | ||
- .addReg(Shift1Reg).addImm(is8bit ? 24 : 16); | ||
+ if (!isLittleEndian) | ||
+ BuildMI(BB, dl, TII->get(is64bit ? PPC::XORI8 : PPC::XORI), ShiftReg) | ||
+ .addReg(Shift1Reg).addImm(is8bit ? 24 : 16); | ||
if (is64bit) | ||
BuildMI(BB, dl, TII->get(PPC::RLDICR), PtrReg) | ||
.addReg(Ptr1Reg).addImm(0).addImm(61); | ||
diff --git a/test/CodeGen/PowerPC/atomic-2.ll b/test/CodeGen/PowerPC/atomic-2.ll | ||
index 1857d5d..bafabdb 100644 | ||
--- a/test/CodeGen/PowerPC/atomic-2.ll | ||
+++ b/test/CodeGen/PowerPC/atomic-2.ll | ||
@@ -1,4 +1,5 @@ | ||
-; RUN: llc < %s -march=ppc64 | FileCheck %s | ||
+; RUN: llc < %s -march=ppc64 | FileCheck %s -check-prefix=CHECK -check-prefix=CHECK-BE | ||
+; RUN: llc < %s -march=ppc64le | FileCheck %s -check-prefix=CHECK -check-prefix=CHECK-LE | ||
; RUN: llc < %s -march=ppc64 -mcpu=pwr7 | FileCheck %s | ||
; RUN: llc < %s -march=ppc64 -mcpu=pwr8 | FileCheck %s -check-prefix=CHECK-P8U | ||
|
||
@@ -12,6 +13,8 @@ define i64 @exchange_and_add(i64* %mem, i64 %val) nounwind { | ||
|
||
define i8 @exchange_and_add8(i8* %mem, i8 %val) nounwind { | ||
; CHECK-LABEL: exchange_and_add8: | ||
+; CHECK-BE: xori | ||
+; CHECK-LE-NOT: xori | ||
; CHECK-P8U: lbarx | ||
%tmp = atomicrmw add i8* %mem, i8 %val monotonic | ||
; CHECK-P8U: stbcx. | ||
@@ -20,6 +23,8 @@ define i8 @exchange_and_add8(i8* %mem, i8 %val) nounwind { | ||
|
||
define i16 @exchange_and_add16(i16* %mem, i16 %val) nounwind { | ||
; CHECK-LABEL: exchange_and_add16: | ||
+; CHECK-BE: xori | ||
+; CHECK-LE-NOT: xori | ||
; CHECK-P8U: lharx | ||
%tmp = atomicrmw add i16* %mem, i16 %val monotonic | ||
; CHECK-P8U: sthcx. | ||
@@ -38,6 +43,8 @@ define i64 @exchange_and_cmp(i64* %mem) nounwind { | ||
|
||
define i8 @exchange_and_cmp8(i8* %mem) nounwind { | ||
; CHECK-LABEL: exchange_and_cmp8: | ||
+; CHECK-BE: xori | ||
+; CHECK-LE-NOT: xori | ||
; CHECK-P8U: lbarx | ||
%tmppair = cmpxchg i8* %mem, i8 0, i8 1 monotonic monotonic | ||
%tmp = extractvalue { i8, i1 } %tmppair, 0 | ||
@@ -48,6 +55,8 @@ define i8 @exchange_and_cmp8(i8* %mem) nounwind { | ||
|
||
define i16 @exchange_and_cmp16(i16* %mem) nounwind { | ||
; CHECK-LABEL: exchange_and_cmp16: | ||
+; CHECK-BE: xori | ||
+; CHECK-LE-NOT: xori | ||
; CHECK-P8U: lharx | ||
%tmppair = cmpxchg i16* %mem, i16 0, i16 1 monotonic monotonic | ||
%tmp = extractvalue { i16, i1 } %tmppair, 0 | ||
@@ -66,6 +75,8 @@ define i64 @exchange(i64* %mem, i64 %val) nounwind { | ||
|
||
define i8 @exchange8(i8* %mem, i8 %val) nounwind { | ||
; CHECK-LABEL: exchange8: | ||
+; CHECK-BE: xori | ||
+; CHECK-LE-NOT: xori | ||
; CHECK-P8U: lbarx | ||
%tmp = atomicrmw xchg i8* %mem, i8 1 monotonic | ||
; CHECK-P8U: stbcx. | ||
@@ -74,6 +85,8 @@ define i8 @exchange8(i8* %mem, i8 %val) nounwind { | ||
|
||
define i16 @exchange16(i16* %mem, i16 %val) nounwind { | ||
; CHECK-LABEL: exchange16: | ||
+; CHECK-BE: xori | ||
+; CHECK-LE-NOT: xori | ||
; CHECK-P8U: lharx | ||
%tmp = atomicrmw xchg i16* %mem, i16 1 monotonic | ||
; CHECK-P8U: sthcx. |