Skip to content

Commit

Permalink
add patch that works around a issue with partword-atomics
Browse files Browse the repository at this point in the history
(cherry picked from commit c38d1f8)
x-ref: #18557
  • Loading branch information
vchuravy committed Oct 6, 2016
1 parent 99d0539 commit f96624e
Show file tree
Hide file tree
Showing 2 changed files with 152 additions and 0 deletions.
1 change: 1 addition & 0 deletions deps/llvm.mk
Original file line number Diff line number Diff line change
Expand Up @@ -462,6 +462,7 @@ $(eval $(call LLVM_PATCH,llvm-win64-reloc-dwarf))
else ifeq ($(LLVM_VER_SHORT),3.9)
# fix lowering for atomics on ppc
$(eval $(call LLVM_PATCH,llvm-rL279933-ppc-atomicrmw-lowering)) # Remove for 4.0
$(eval $(call LLVM_PATCH,llvm-PR22923)) # Remove for 4.0
endif # LLVM_VER

ifeq ($(LLVM_VER),3.7.1)
Expand Down
151 changes: 151 additions & 0 deletions deps/patches/llvm-PR22923.patch
Original file line number Diff line number Diff line change
@@ -0,0 +1,151 @@
From e060ffb4b20e294ecb8429bd8a925f9f12b63b17 Mon Sep 17 00:00:00 2001
From: Hal Finkel <hfinkel@anl.gov>
Date: Mon, 29 Aug 2016 22:25:36 +0000
Subject: [PATCH] [PowerPC] Fix i8/i16 atomics for little-Endian targets
without partword atomics

For little-Endian PowerPC, we generally target only P8 and later by default.
However, generic (older) 64-bit configurations are still an option, and in that
case, partword atomics are not available (e.g. stbcx.). To lower i8/i16 atomics
without true i8/i16 atomic operations, we emulate using i32 atomics in
combination with a bunch of shifting and masking, etc. The amount by which to
shift in little-Endian mode is different from the amount in big-Endian mode (it
is inverted -- meaning we can leave off the xor when computing the amount).

Fixes PR22923.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@280022 91177308-0d34-0410-b5e6-96231b3b80d8
---
lib/Target/PowerPC/PPCISelLowering.cpp | 18 ++++++++++++------
test/CodeGen/PowerPC/atomic-2.ll | 15 ++++++++++++++-
2 files changed, 26 insertions(+), 7 deletions(-)

diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp
index e89b6ca..f895b06 100644
--- a/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -8513,6 +8513,7 @@ PPCTargetLowering::EmitPartwordAtomicBinary(MachineInstr &MI,
// registers without caring whether they're 32 or 64, but here we're
// doing actual arithmetic on the addresses.
bool is64bit = Subtarget.isPPC64();
+ bool isLittleEndian = Subtarget.isLittleEndian();
unsigned ZeroReg = is64bit ? PPC::ZERO8 : PPC::ZERO;

const BasicBlock *LLVM_BB = BB->getBasicBlock();
@@ -8542,7 +8543,8 @@ PPCTargetLowering::EmitPartwordAtomicBinary(MachineInstr &MI,
: &PPC::GPRCRegClass;
unsigned PtrReg = RegInfo.createVirtualRegister(RC);
unsigned Shift1Reg = RegInfo.createVirtualRegister(RC);
- unsigned ShiftReg = RegInfo.createVirtualRegister(RC);
+ unsigned ShiftReg =
+ isLittleEndian ? Shift1Reg : RegInfo.createVirtualRegister(RC);
unsigned Incr2Reg = RegInfo.createVirtualRegister(RC);
unsigned MaskReg = RegInfo.createVirtualRegister(RC);
unsigned Mask2Reg = RegInfo.createVirtualRegister(RC);
@@ -8587,8 +8589,9 @@ PPCTargetLowering::EmitPartwordAtomicBinary(MachineInstr &MI,
}
BuildMI(BB, dl, TII->get(PPC::RLWINM), Shift1Reg).addReg(Ptr1Reg)
.addImm(3).addImm(27).addImm(is8bit ? 28 : 27);
- BuildMI(BB, dl, TII->get(is64bit ? PPC::XORI8 : PPC::XORI), ShiftReg)
- .addReg(Shift1Reg).addImm(is8bit ? 24 : 16);
+ if (!isLittleEndian)
+ BuildMI(BB, dl, TII->get(is64bit ? PPC::XORI8 : PPC::XORI), ShiftReg)
+ .addReg(Shift1Reg).addImm(is8bit ? 24 : 16);
if (is64bit)
BuildMI(BB, dl, TII->get(PPC::RLDICR), PtrReg)
.addReg(Ptr1Reg).addImm(0).addImm(61);
@@ -9293,6 +9296,7 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
// since we're actually doing arithmetic on them. Other registers
// can be 32-bit.
bool is64bit = Subtarget.isPPC64();
+ bool isLittleEndian = Subtarget.isLittleEndian();
bool is8bit = MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I8;

unsigned dest = MI.getOperand(0).getReg();
@@ -9319,7 +9323,8 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
: &PPC::GPRCRegClass;
unsigned PtrReg = RegInfo.createVirtualRegister(RC);
unsigned Shift1Reg = RegInfo.createVirtualRegister(RC);
- unsigned ShiftReg = RegInfo.createVirtualRegister(RC);
+ unsigned ShiftReg =
+ isLittleEndian ? Shift1Reg : RegInfo.createVirtualRegister(RC);
unsigned NewVal2Reg = RegInfo.createVirtualRegister(RC);
unsigned NewVal3Reg = RegInfo.createVirtualRegister(RC);
unsigned OldVal2Reg = RegInfo.createVirtualRegister(RC);
@@ -9374,8 +9379,9 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
}
BuildMI(BB, dl, TII->get(PPC::RLWINM), Shift1Reg).addReg(Ptr1Reg)
.addImm(3).addImm(27).addImm(is8bit ? 28 : 27);
- BuildMI(BB, dl, TII->get(is64bit ? PPC::XORI8 : PPC::XORI), ShiftReg)
- .addReg(Shift1Reg).addImm(is8bit ? 24 : 16);
+ if (!isLittleEndian)
+ BuildMI(BB, dl, TII->get(is64bit ? PPC::XORI8 : PPC::XORI), ShiftReg)
+ .addReg(Shift1Reg).addImm(is8bit ? 24 : 16);
if (is64bit)
BuildMI(BB, dl, TII->get(PPC::RLDICR), PtrReg)
.addReg(Ptr1Reg).addImm(0).addImm(61);
diff --git a/test/CodeGen/PowerPC/atomic-2.ll b/test/CodeGen/PowerPC/atomic-2.ll
index 1857d5d..bafabdb 100644
--- a/test/CodeGen/PowerPC/atomic-2.ll
+++ b/test/CodeGen/PowerPC/atomic-2.ll
@@ -1,4 +1,5 @@
-; RUN: llc < %s -march=ppc64 | FileCheck %s
+; RUN: llc < %s -march=ppc64 | FileCheck %s -check-prefix=CHECK -check-prefix=CHECK-BE
+; RUN: llc < %s -march=ppc64le | FileCheck %s -check-prefix=CHECK -check-prefix=CHECK-LE
; RUN: llc < %s -march=ppc64 -mcpu=pwr7 | FileCheck %s
; RUN: llc < %s -march=ppc64 -mcpu=pwr8 | FileCheck %s -check-prefix=CHECK-P8U

@@ -12,6 +13,8 @@ define i64 @exchange_and_add(i64* %mem, i64 %val) nounwind {

define i8 @exchange_and_add8(i8* %mem, i8 %val) nounwind {
; CHECK-LABEL: exchange_and_add8:
+; CHECK-BE: xori
+; CHECK-LE-NOT: xori
; CHECK-P8U: lbarx
%tmp = atomicrmw add i8* %mem, i8 %val monotonic
; CHECK-P8U: stbcx.
@@ -20,6 +23,8 @@ define i8 @exchange_and_add8(i8* %mem, i8 %val) nounwind {

define i16 @exchange_and_add16(i16* %mem, i16 %val) nounwind {
; CHECK-LABEL: exchange_and_add16:
+; CHECK-BE: xori
+; CHECK-LE-NOT: xori
; CHECK-P8U: lharx
%tmp = atomicrmw add i16* %mem, i16 %val monotonic
; CHECK-P8U: sthcx.
@@ -38,6 +43,8 @@ define i64 @exchange_and_cmp(i64* %mem) nounwind {

define i8 @exchange_and_cmp8(i8* %mem) nounwind {
; CHECK-LABEL: exchange_and_cmp8:
+; CHECK-BE: xori
+; CHECK-LE-NOT: xori
; CHECK-P8U: lbarx
%tmppair = cmpxchg i8* %mem, i8 0, i8 1 monotonic monotonic
%tmp = extractvalue { i8, i1 } %tmppair, 0
@@ -48,6 +55,8 @@ define i8 @exchange_and_cmp8(i8* %mem) nounwind {

define i16 @exchange_and_cmp16(i16* %mem) nounwind {
; CHECK-LABEL: exchange_and_cmp16:
+; CHECK-BE: xori
+; CHECK-LE-NOT: xori
; CHECK-P8U: lharx
%tmppair = cmpxchg i16* %mem, i16 0, i16 1 monotonic monotonic
%tmp = extractvalue { i16, i1 } %tmppair, 0
@@ -66,6 +75,8 @@ define i64 @exchange(i64* %mem, i64 %val) nounwind {

define i8 @exchange8(i8* %mem, i8 %val) nounwind {
; CHECK-LABEL: exchange8:
+; CHECK-BE: xori
+; CHECK-LE-NOT: xori
; CHECK-P8U: lbarx
%tmp = atomicrmw xchg i8* %mem, i8 1 monotonic
; CHECK-P8U: stbcx.
@@ -74,6 +85,8 @@ define i8 @exchange8(i8* %mem, i8 %val) nounwind {

define i16 @exchange16(i16* %mem, i16 %val) nounwind {
; CHECK-LABEL: exchange16:
+; CHECK-BE: xori
+; CHECK-LE-NOT: xori
; CHECK-P8U: lharx
%tmp = atomicrmw xchg i16* %mem, i16 1 monotonic
; CHECK-P8U: sthcx.

0 comments on commit f96624e

Please sign in to comment.