Skip to content
This repository was archived by the owner on Apr 23, 2020. It is now read-only.

Commit 5012548

Browse files
committed
This patch teaches x86 fast-isel to generate the native div/idiv instructions
for the sdiv/srem/udiv/urem bitcode instructions. This is done for the i8, i16, and i32 types, as well as i64 for the x86_64 target. Patch by Jim Stichnoth git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@179715 91177308-0d34-0410-b5e6-96231b3b80d8
1 parent 9c63f0d commit 5012548

File tree

3 files changed

+288
-0
lines changed

3 files changed

+288
-0
lines changed

lib/Target/X86/X86FastISel.cpp

Lines changed: 125 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -107,6 +107,8 @@ class X86FastISel : public FastISel {
107107

108108
bool X86SelectShift(const Instruction *I);
109109

110+
bool X86SelectDivRem(const Instruction *I);
111+
110112
bool X86SelectSelect(const Instruction *I);
111113

112114
bool X86SelectTrunc(const Instruction *I);
@@ -1235,6 +1237,124 @@ bool X86FastISel::X86SelectShift(const Instruction *I) {
12351237
return true;
12361238
}
12371239

1240+
bool X86FastISel::X86SelectDivRem(const Instruction *I) {
1241+
const static unsigned NumTypes = 4; // i8, i16, i32, i64
1242+
const static unsigned NumOps = 4; // SDiv, SRem, UDiv, URem
1243+
const static bool S = true; // IsSigned
1244+
const static bool U = false; // !IsSigned
1245+
const static unsigned Copy = TargetOpcode::COPY;
1246+
// For the X86 DIV/IDIV instruction, in most cases the dividend
1247+
// (numerator) must be in a specific register pair highreg:lowreg,
1248+
// producing the quotient in lowreg and the remainder in highreg.
1249+
// For most data types, to set up the instruction, the dividend is
1250+
// copied into lowreg, and lowreg is sign-extended or zero-extended
1251+
// into highreg. The exception is i8, where the dividend is defined
1252+
// as a single register rather than a register pair, and we
1253+
// therefore directly sign-extend or zero-extend the dividend into
1254+
// lowreg, instead of copying, and ignore the highreg.
1255+
const static struct DivRemEntry {
1256+
// The following portion depends only on the data type.
1257+
const TargetRegisterClass *RC;
1258+
unsigned LowInReg; // low part of the register pair
1259+
unsigned HighInReg; // high part of the register pair
1260+
// The following portion depends on both the data type and the operation.
1261+
struct DivRemResult {
1262+
unsigned OpDivRem; // The specific DIV/IDIV opcode to use.
1263+
unsigned OpSignExtend; // Opcode for sign-extending lowreg into
1264+
// highreg, or copying a zero into highreg.
1265+
unsigned OpCopy; // Opcode for copying dividend into lowreg, or
1266+
// zero/sign-extending into lowreg for i8.
1267+
unsigned DivRemResultReg; // Register containing the desired result.
1268+
bool IsOpSigned; // Whether to use signed or unsigned form.
1269+
} ResultTable[NumOps];
1270+
} OpTable[NumTypes] = {
1271+
{ &X86::GR8RegClass, X86::AX, 0, {
1272+
{ X86::IDIV8r, 0, X86::MOVSX16rr8, X86::AL, S }, // SDiv
1273+
{ X86::IDIV8r, 0, X86::MOVSX16rr8, X86::AH, S }, // SRem
1274+
{ X86::DIV8r, 0, X86::MOVZX16rr8, X86::AL, U }, // UDiv
1275+
{ X86::DIV8r, 0, X86::MOVZX16rr8, X86::AH, U }, // URem
1276+
}
1277+
}, // i8
1278+
{ &X86::GR16RegClass, X86::AX, X86::DX, {
1279+
{ X86::IDIV16r, X86::CWD, Copy, X86::AX, S }, // SDiv
1280+
{ X86::IDIV16r, X86::CWD, Copy, X86::DX, S }, // SRem
1281+
{ X86::DIV16r, X86::MOV16r0, Copy, X86::AX, U }, // UDiv
1282+
{ X86::DIV16r, X86::MOV16r0, Copy, X86::DX, U }, // URem
1283+
}
1284+
}, // i16
1285+
{ &X86::GR32RegClass, X86::EAX, X86::EDX, {
1286+
{ X86::IDIV32r, X86::CDQ, Copy, X86::EAX, S }, // SDiv
1287+
{ X86::IDIV32r, X86::CDQ, Copy, X86::EDX, S }, // SRem
1288+
{ X86::DIV32r, X86::MOV32r0, Copy, X86::EAX, U }, // UDiv
1289+
{ X86::DIV32r, X86::MOV32r0, Copy, X86::EDX, U }, // URem
1290+
}
1291+
}, // i32
1292+
{ &X86::GR64RegClass, X86::RAX, X86::RDX, {
1293+
{ X86::IDIV64r, X86::CQO, Copy, X86::RAX, S }, // SDiv
1294+
{ X86::IDIV64r, X86::CQO, Copy, X86::RDX, S }, // SRem
1295+
{ X86::DIV64r, X86::MOV64r0, Copy, X86::RAX, U }, // UDiv
1296+
{ X86::DIV64r, X86::MOV64r0, Copy, X86::RDX, U }, // URem
1297+
}
1298+
}, // i64
1299+
};
1300+
1301+
MVT VT;
1302+
if (!isTypeLegal(I->getType(), VT))
1303+
return false;
1304+
1305+
unsigned TypeIndex, OpIndex;
1306+
switch (VT.SimpleTy) {
1307+
default: return false;
1308+
case MVT::i8: TypeIndex = 0; break;
1309+
case MVT::i16: TypeIndex = 1; break;
1310+
case MVT::i32: TypeIndex = 2; break;
1311+
case MVT::i64: TypeIndex = 3;
1312+
if (!Subtarget->is64Bit())
1313+
return false;
1314+
break;
1315+
}
1316+
1317+
switch (I->getOpcode()) {
1318+
default: llvm_unreachable("Unexpected div/rem opcode");
1319+
case Instruction::SDiv: OpIndex = 0; break;
1320+
case Instruction::SRem: OpIndex = 1; break;
1321+
case Instruction::UDiv: OpIndex = 2; break;
1322+
case Instruction::URem: OpIndex = 3; break;
1323+
}
1324+
1325+
const DivRemEntry &TypeEntry = OpTable[TypeIndex];
1326+
const DivRemEntry::DivRemResult &OpEntry = TypeEntry.ResultTable[OpIndex];
1327+
unsigned Op0Reg = getRegForValue(I->getOperand(0));
1328+
if (Op0Reg == 0)
1329+
return false;
1330+
unsigned Op1Reg = getRegForValue(I->getOperand(1));
1331+
if (Op1Reg == 0)
1332+
return false;
1333+
1334+
// Move op0 into low-order input register.
1335+
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
1336+
TII.get(OpEntry.OpCopy), TypeEntry.LowInReg).addReg(Op0Reg);
1337+
// Zero-extend or sign-extend into high-order input register.
1338+
if (OpEntry.OpSignExtend) {
1339+
if (OpEntry.IsOpSigned)
1340+
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
1341+
TII.get(OpEntry.OpSignExtend));
1342+
else
1343+
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
1344+
TII.get(OpEntry.OpSignExtend), TypeEntry.HighInReg);
1345+
}
1346+
// Generate the DIV/IDIV instruction.
1347+
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
1348+
TII.get(OpEntry.OpDivRem)).addReg(Op1Reg);
1349+
// Copy output register into result register.
1350+
unsigned ResultReg = createResultReg(TypeEntry.RC);
1351+
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
1352+
TII.get(Copy), ResultReg).addReg(OpEntry.DivRemResultReg);
1353+
UpdateValueMap(I, ResultReg);
1354+
1355+
return true;
1356+
}
1357+
12381358
bool X86FastISel::X86SelectSelect(const Instruction *I) {
12391359
MVT VT;
12401360
if (!isTypeLegal(I->getType(), VT))
@@ -2084,6 +2204,11 @@ X86FastISel::TargetSelectInstruction(const Instruction *I) {
20842204
case Instruction::AShr:
20852205
case Instruction::Shl:
20862206
return X86SelectShift(I);
2207+
case Instruction::SDiv:
2208+
case Instruction::UDiv:
2209+
case Instruction::SRem:
2210+
case Instruction::URem:
2211+
return X86SelectDivRem(I);
20872212
case Instruction::Select:
20882213
return X86SelectSelect(I);
20892214
case Instruction::Trunc:
Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
; RUN: llc -mtriple=x86_64-none-linux -fast-isel -fast-isel-abort < %s | FileCheck %s
2+
3+
define i64 @test_sdiv64(i64 %dividend, i64 %divisor) nounwind {
4+
entry:
5+
%result = sdiv i64 %dividend, %divisor
6+
ret i64 %result
7+
}
8+
9+
; CHECK: test_sdiv64:
10+
; CHECK: cqto
11+
; CHECK: idivq
12+
13+
define i64 @test_srem64(i64 %dividend, i64 %divisor) nounwind {
14+
entry:
15+
%result = srem i64 %dividend, %divisor
16+
ret i64 %result
17+
}
18+
19+
; CHECK: test_srem64:
20+
; CHECK: cqto
21+
; CHECK: idivq
22+
23+
define i64 @test_udiv64(i64 %dividend, i64 %divisor) nounwind {
24+
entry:
25+
%result = udiv i64 %dividend, %divisor
26+
ret i64 %result
27+
}
28+
29+
; CHECK: test_udiv64:
30+
; CHECK: xorl
31+
; CHECK: divq
32+
33+
define i64 @test_urem64(i64 %dividend, i64 %divisor) nounwind {
34+
entry:
35+
%result = urem i64 %dividend, %divisor
36+
ret i64 %result
37+
}
38+
39+
; CHECK: test_urem64:
40+
; CHECK: xorl
41+
; CHECK: divq

test/CodeGen/X86/fast-isel-divrem.ll

Lines changed: 122 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,122 @@
1+
; RUN: llc -mtriple=x86_64-none-linux -fast-isel -fast-isel-abort < %s | FileCheck %s
2+
; RUN: llc -mtriple=i686-none-linux -fast-isel -fast-isel-abort < %s | FileCheck %s
3+
4+
define i8 @test_sdiv8(i8 %dividend, i8 %divisor) nounwind {
5+
entry:
6+
%result = sdiv i8 %dividend, %divisor
7+
ret i8 %result
8+
}
9+
10+
; CHECK: test_sdiv8:
11+
; CHECK: movsbw
12+
; CHECK: idivb
13+
14+
define i8 @test_srem8(i8 %dividend, i8 %divisor) nounwind {
15+
entry:
16+
%result = srem i8 %dividend, %divisor
17+
ret i8 %result
18+
}
19+
20+
; CHECK: test_srem8:
21+
; CHECK: movsbw
22+
; CHECK: idivb
23+
24+
define i8 @test_udiv8(i8 %dividend, i8 %divisor) nounwind {
25+
entry:
26+
%result = udiv i8 %dividend, %divisor
27+
ret i8 %result
28+
}
29+
30+
; CHECK: test_udiv8:
31+
; CHECK: movzbw
32+
; CHECK: divb
33+
34+
define i8 @test_urem8(i8 %dividend, i8 %divisor) nounwind {
35+
entry:
36+
%result = urem i8 %dividend, %divisor
37+
ret i8 %result
38+
}
39+
40+
; CHECK: test_urem8:
41+
; CHECK: movzbw
42+
; CHECK: divb
43+
44+
define i16 @test_sdiv16(i16 %dividend, i16 %divisor) nounwind {
45+
entry:
46+
%result = sdiv i16 %dividend, %divisor
47+
ret i16 %result
48+
}
49+
50+
; CHECK: test_sdiv16:
51+
; CHECK: cwtd
52+
; CHECK: idivw
53+
54+
define i16 @test_srem16(i16 %dividend, i16 %divisor) nounwind {
55+
entry:
56+
%result = srem i16 %dividend, %divisor
57+
ret i16 %result
58+
}
59+
60+
; CHECK: test_srem16:
61+
; CHECK: cwtd
62+
; CHECK: idivw
63+
64+
define i16 @test_udiv16(i16 %dividend, i16 %divisor) nounwind {
65+
entry:
66+
%result = udiv i16 %dividend, %divisor
67+
ret i16 %result
68+
}
69+
70+
; CHECK: test_udiv16:
71+
; CHECK: xorl
72+
; CHECK: divw
73+
74+
define i16 @test_urem16(i16 %dividend, i16 %divisor) nounwind {
75+
entry:
76+
%result = urem i16 %dividend, %divisor
77+
ret i16 %result
78+
}
79+
80+
; CHECK: test_urem16:
81+
; CHECK: xorl
82+
; CHECK: divw
83+
84+
define i32 @test_sdiv32(i32 %dividend, i32 %divisor) nounwind {
85+
entry:
86+
%result = sdiv i32 %dividend, %divisor
87+
ret i32 %result
88+
}
89+
90+
; CHECK: test_sdiv32:
91+
; CHECK: cltd
92+
; CHECK: idivl
93+
94+
define i32 @test_srem32(i32 %dividend, i32 %divisor) nounwind {
95+
entry:
96+
%result = srem i32 %dividend, %divisor
97+
ret i32 %result
98+
}
99+
100+
; CHECK: test_srem32:
101+
; CHECK: cltd
102+
; CHECK: idivl
103+
104+
define i32 @test_udiv32(i32 %dividend, i32 %divisor) nounwind {
105+
entry:
106+
%result = udiv i32 %dividend, %divisor
107+
ret i32 %result
108+
}
109+
110+
; CHECK: test_udiv32:
111+
; CHECK: xorl
112+
; CHECK: divl
113+
114+
define i32 @test_urem32(i32 %dividend, i32 %divisor) nounwind {
115+
entry:
116+
%result = urem i32 %dividend, %divisor
117+
ret i32 %result
118+
}
119+
120+
; CHECK: test_urem32:
121+
; CHECK: xorl
122+
; CHECK: divl

0 commit comments

Comments
 (0)