Skip to content

Commit 292d9e8

Browse files
authored
[PowerPC] Mask constant operands in ValueBit tracking (#67653)
In IR or C code, shift amount larger than value size is undefined behavior. But in practice, backend lowering for shift_parts produces add/sub of shift amounts, thus constant shift amounts might be negative or larger than value size, which depends on ISA definition. PowerPC ISA says, the lowest 7 bits (6 bits for 32-bit instruction) will be taken, and if the highest among them is 1, result will be zero, otherwise the low 6 bits (or 5 on 32-bit) are used as shift amount. This commit emulates the behavior and avoids array overflow in bit permutation's value bits calculator.
1 parent 726cf60 commit 292d9e8

File tree

2 files changed

+156
-13
lines changed

2 files changed

+156
-13
lines changed

llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp

+24-13
Original file line numberDiff line numberDiff line change
@@ -1632,7 +1632,8 @@ class BitPermutationSelector {
16321632
default: break;
16331633
case ISD::ROTL:
16341634
if (isa<ConstantSDNode>(V.getOperand(1))) {
1635-
unsigned RotAmt = V.getConstantOperandVal(1);
1635+
assert(isPowerOf2_32(NumBits) && "rotl bits should be power of 2!");
1636+
unsigned RotAmt = V.getConstantOperandVal(1) & (NumBits - 1);
16361637

16371638
const auto &LHSBits = *getValueBits(V.getOperand(0), NumBits).second;
16381639

@@ -1645,31 +1646,41 @@ class BitPermutationSelector {
16451646
case ISD::SHL:
16461647
case PPCISD::SHL:
16471648
if (isa<ConstantSDNode>(V.getOperand(1))) {
1648-
unsigned ShiftAmt = V.getConstantOperandVal(1);
1649+
// sld takes 7 bits, slw takes 6.
1650+
unsigned ShiftAmt = V.getConstantOperandVal(1) & ((NumBits << 1) - 1);
16491651

16501652
const auto &LHSBits = *getValueBits(V.getOperand(0), NumBits).second;
16511653

1652-
for (unsigned i = ShiftAmt; i < NumBits; ++i)
1653-
Bits[i] = LHSBits[i - ShiftAmt];
1654-
1655-
for (unsigned i = 0; i < ShiftAmt; ++i)
1656-
Bits[i] = ValueBit(ValueBit::ConstZero);
1654+
if (ShiftAmt >= NumBits) {
1655+
for (unsigned i = 0; i < NumBits; ++i)
1656+
Bits[i] = ValueBit(ValueBit::ConstZero);
1657+
} else {
1658+
for (unsigned i = ShiftAmt; i < NumBits; ++i)
1659+
Bits[i] = LHSBits[i - ShiftAmt];
1660+
for (unsigned i = 0; i < ShiftAmt; ++i)
1661+
Bits[i] = ValueBit(ValueBit::ConstZero);
1662+
}
16571663

16581664
return std::make_pair(Interesting = true, &Bits);
16591665
}
16601666
break;
16611667
case ISD::SRL:
16621668
case PPCISD::SRL:
16631669
if (isa<ConstantSDNode>(V.getOperand(1))) {
1664-
unsigned ShiftAmt = V.getConstantOperandVal(1);
1670+
// srd takes lowest 7 bits, srw takes 6.
1671+
unsigned ShiftAmt = V.getConstantOperandVal(1) & ((NumBits << 1) - 1);
16651672

16661673
const auto &LHSBits = *getValueBits(V.getOperand(0), NumBits).second;
16671674

1668-
for (unsigned i = 0; i < NumBits - ShiftAmt; ++i)
1669-
Bits[i] = LHSBits[i + ShiftAmt];
1670-
1671-
for (unsigned i = NumBits - ShiftAmt; i < NumBits; ++i)
1672-
Bits[i] = ValueBit(ValueBit::ConstZero);
1675+
if (ShiftAmt >= NumBits) {
1676+
for (unsigned i = 0; i < NumBits; ++i)
1677+
Bits[i] = ValueBit(ValueBit::ConstZero);
1678+
} else {
1679+
for (unsigned i = 0; i < NumBits - ShiftAmt; ++i)
1680+
Bits[i] = LHSBits[i + ShiftAmt];
1681+
for (unsigned i = NumBits - ShiftAmt; i < NumBits; ++i)
1682+
Bits[i] = ValueBit(ValueBit::ConstZero);
1683+
}
16731684

16741685
return std::make_pair(Interesting = true, &Bits);
16751686
}

llvm/test/CodeGen/PowerPC/pr59074.ll

+132
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,132 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2+
; RUN: llc -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr7 < %s | FileCheck %s --check-prefix=LE64
3+
; RUN: llc -mtriple=powerpcle-unknown-linux-gnu -mcpu=pwr7 < %s | FileCheck %s --check-prefix=LE32
4+
; RUN: llc -mtriple=powerpc64-ibm-aix -mcpu=pwr7 < %s | FileCheck %s --check-prefix=BE64
5+
; RUN: llc -mtriple=powerpc-ibm-aix -mcpu=pwr7 < %s | FileCheck %s --check-prefix=BE32
6+
7+
; To verify this doesn't crash due to array out of bound.
8+
define void @pr59074(ptr %0) {
9+
; LE64-LABEL: pr59074:
10+
; LE64: # %bb.0: # %entry
11+
; LE64-NEXT: lwz 6, 0(3)
12+
; LE64-NEXT: li 7, 12
13+
; LE64-NEXT: ld 4, 16(3)
14+
; LE64-NEXT: ld 5, 24(3)
15+
; LE64-NEXT: addi 6, 6, -12
16+
; LE64-NEXT: std 4, 16(3)
17+
; LE64-NEXT: std 5, 24(3)
18+
; LE64-NEXT: srd 6, 7, 6
19+
; LE64-NEXT: li 7, 0
20+
; LE64-NEXT: std 7, 8(3)
21+
; LE64-NEXT: std 6, 0(3)
22+
; LE64-NEXT: blr
23+
;
24+
; LE32-LABEL: pr59074:
25+
; LE32: # %bb.0: # %entry
26+
; LE32-NEXT: stwu 1, -80(1)
27+
; LE32-NEXT: .cfi_def_cfa_offset 80
28+
; LE32-NEXT: lwz 4, 0(3)
29+
; LE32-NEXT: xxlxor 0, 0, 0
30+
; LE32-NEXT: li 5, 4
31+
; LE32-NEXT: addi 6, 1, 16
32+
; LE32-NEXT: li 7, 0
33+
; LE32-NEXT: li 8, 12
34+
; LE32-NEXT: xxswapd 0, 0
35+
; LE32-NEXT: addi 4, 4, -12
36+
; LE32-NEXT: rlwinm 9, 4, 29, 28, 31
37+
; LE32-NEXT: stxvd2x 0, 6, 5
38+
; LE32-NEXT: stw 7, 44(1)
39+
; LE32-NEXT: stw 7, 40(1)
40+
; LE32-NEXT: stw 7, 36(1)
41+
; LE32-NEXT: stw 8, 16(1)
42+
; LE32-NEXT: lwzux 5, 9, 6
43+
; LE32-NEXT: li 6, 7
44+
; LE32-NEXT: lwz 7, 8(9)
45+
; LE32-NEXT: nand 6, 4, 6
46+
; LE32-NEXT: lwz 8, 4(9)
47+
; LE32-NEXT: clrlwi 4, 4, 29
48+
; LE32-NEXT: lwz 9, 12(9)
49+
; LE32-NEXT: clrlwi 6, 6, 27
50+
; LE32-NEXT: subfic 11, 4, 32
51+
; LE32-NEXT: srw 5, 5, 4
52+
; LE32-NEXT: slwi 10, 7, 1
53+
; LE32-NEXT: srw 7, 7, 4
54+
; LE32-NEXT: slw 6, 10, 6
55+
; LE32-NEXT: srw 10, 8, 4
56+
; LE32-NEXT: slw 8, 8, 11
57+
; LE32-NEXT: slw 11, 9, 11
58+
; LE32-NEXT: srw 4, 9, 4
59+
; LE32-NEXT: or 5, 8, 5
60+
; LE32-NEXT: or 7, 11, 7
61+
; LE32-NEXT: or 6, 10, 6
62+
; LE32-NEXT: stw 4, 12(3)
63+
; LE32-NEXT: stw 7, 8(3)
64+
; LE32-NEXT: stw 5, 0(3)
65+
; LE32-NEXT: stw 6, 4(3)
66+
; LE32-NEXT: addi 1, 1, 80
67+
; LE32-NEXT: blr
68+
;
69+
; BE64-LABEL: pr59074:
70+
; BE64: # %bb.0: # %entry
71+
; BE64-NEXT: lwz 6, 12(3)
72+
; BE64-NEXT: li 7, 12
73+
; BE64-NEXT: ld 4, 24(3)
74+
; BE64-NEXT: ld 5, 16(3)
75+
; BE64-NEXT: addi 6, 6, -12
76+
; BE64-NEXT: std 4, 24(3)
77+
; BE64-NEXT: std 5, 16(3)
78+
; BE64-NEXT: srd 6, 7, 6
79+
; BE64-NEXT: li 7, 0
80+
; BE64-NEXT: std 7, 0(3)
81+
; BE64-NEXT: std 6, 8(3)
82+
; BE64-NEXT: blr
83+
;
84+
; BE32-LABEL: pr59074:
85+
; BE32: # %bb.0: # %entry
86+
; BE32-NEXT: lwz 4, 12(3)
87+
; BE32-NEXT: xxlxor 0, 0, 0
88+
; BE32-NEXT: addi 5, 1, -64
89+
; BE32-NEXT: li 6, 12
90+
; BE32-NEXT: li 7, 0
91+
; BE32-NEXT: addi 8, 1, -48
92+
; BE32-NEXT: li 10, 7
93+
; BE32-NEXT: stxvw4x 0, 0, 5
94+
; BE32-NEXT: addi 4, 4, -12
95+
; BE32-NEXT: stw 6, -36(1)
96+
; BE32-NEXT: stw 7, -40(1)
97+
; BE32-NEXT: stw 7, -44(1)
98+
; BE32-NEXT: rlwinm 9, 4, 29, 28, 31
99+
; BE32-NEXT: stw 7, -48(1)
100+
; BE32-NEXT: sub 5, 8, 9
101+
; BE32-NEXT: nand 6, 4, 10
102+
; BE32-NEXT: clrlwi 4, 4, 29
103+
; BE32-NEXT: clrlwi 6, 6, 27
104+
; BE32-NEXT: lwz 7, 4(5)
105+
; BE32-NEXT: lwz 8, 8(5)
106+
; BE32-NEXT: lwz 9, 0(5)
107+
; BE32-NEXT: lwz 5, 12(5)
108+
; BE32-NEXT: slwi 10, 7, 1
109+
; BE32-NEXT: srw 11, 8, 4
110+
; BE32-NEXT: srw 7, 7, 4
111+
; BE32-NEXT: srw 5, 5, 4
112+
; BE32-NEXT: slw 6, 10, 6
113+
; BE32-NEXT: subfic 10, 4, 32
114+
; BE32-NEXT: srw 4, 9, 4
115+
; BE32-NEXT: slw 8, 8, 10
116+
; BE32-NEXT: slw 10, 9, 10
117+
; BE32-NEXT: or 6, 11, 6
118+
; BE32-NEXT: or 7, 10, 7
119+
; BE32-NEXT: or 5, 8, 5
120+
; BE32-NEXT: stw 4, 0(3)
121+
; BE32-NEXT: stw 6, 8(3)
122+
; BE32-NEXT: stw 5, 12(3)
123+
; BE32-NEXT: stw 7, 4(3)
124+
; BE32-NEXT: blr
125+
entry:
126+
%v1 = load <2 x i128>, <2 x i128>* %0
127+
%v2 = insertelement <2 x i128> %v1, i128 12, i32 0
128+
%v3 = sub <2 x i128> %v1, %v2
129+
%v4 = lshr <2 x i128> %v2, %v3
130+
store <2 x i128> %v4, <2 x i128>* %0
131+
ret void
132+
}

0 commit comments

Comments
 (0)