Skip to content
This repository was archived by the owner on Feb 5, 2019. It is now read-only.

Commit 4607999

Browse files
committed
[3.9.1] Merging r280837 [X86] Don't reduce the width of vector mul if the target doesn't support SSE2.
The patch is to fix PR30298, which is caused by rL272694. The solution is to bail out if the target has no SSE2. Differential Revision: https://reviews.llvm.org/D24288 git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_39@282753 91177308-0d34-0410-b5e6-96231b3b80d8
1 parent 2eb3d6d commit 4607999

File tree

2 files changed

+45
-1
lines changed

2 files changed

+45
-1
lines changed

Diff for: lib/Target/X86/X86ISelLowering.cpp

+2-1
Original file line numberDiff line numberDiff line change
@@ -27516,7 +27516,8 @@ static SDValue reduceVMULWidth(SDNode *N, SelectionDAG &DAG,
2751627516
const X86Subtarget &Subtarget) {
2751727517
// pmulld is supported since SSE41. It is better to use pmulld
2751827518
// instead of pmullw+pmulhw.
27519-
if (Subtarget.hasSSE41())
27519+
// pmullw/pmulhw are not supported by SSE.
27520+
if (Subtarget.hasSSE41() || !Subtarget.hasSSE2())
2752027521
return SDValue();
2752127522

2752227523
ShrinkMode Mode;

Diff for: test/CodeGen/X86/pr30298.ll

+43
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2+
; RUN: llc -mtriple=i386-pc-linux-gnu -mattr=+sse < %s | FileCheck %s
3+
4+
@c = external global i32*, align 8
5+
6+
define void @mul_2xi8(i8* nocapture readonly %a, i8* nocapture readonly %b, i64 %index) nounwind {
7+
; CHECK-LABEL: mul_2xi8:
8+
; CHECK: # BB#0: # %entry
9+
; CHECK-NEXT: pushl %ebx
10+
; CHECK-NEXT: pushl %edi
11+
; CHECK-NEXT: pushl %esi
12+
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
13+
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx
14+
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edx
15+
; CHECK-NEXT: movl c, %esi
16+
; CHECK-NEXT: movzbl 1(%edx,%ecx), %edi
17+
; CHECK-NEXT: movzbl (%edx,%ecx), %edx
18+
; CHECK-NEXT: movzbl 1(%eax,%ecx), %ebx
19+
; CHECK-NEXT: movzbl (%eax,%ecx), %eax
20+
; CHECK-NEXT: imull %edx, %eax
21+
; CHECK-NEXT: imull %edi, %ebx
22+
; CHECK-NEXT: movl %ebx, 4(%esi,%ecx,4)
23+
; CHECK-NEXT: movl %eax, (%esi,%ecx,4)
24+
; CHECK-NEXT: popl %esi
25+
; CHECK-NEXT: popl %edi
26+
; CHECK-NEXT: popl %ebx
27+
; CHECK-NEXT: retl
28+
entry:
29+
%pre = load i32*, i32** @c
30+
%tmp6 = getelementptr inbounds i8, i8* %a, i64 %index
31+
%tmp7 = bitcast i8* %tmp6 to <2 x i8>*
32+
%wide.load = load <2 x i8>, <2 x i8>* %tmp7, align 1
33+
%tmp8 = zext <2 x i8> %wide.load to <2 x i32>
34+
%tmp10 = getelementptr inbounds i8, i8* %b, i64 %index
35+
%tmp11 = bitcast i8* %tmp10 to <2 x i8>*
36+
%wide.load17 = load <2 x i8>, <2 x i8>* %tmp11, align 1
37+
%tmp12 = zext <2 x i8> %wide.load17 to <2 x i32>
38+
%tmp13 = mul nuw nsw <2 x i32> %tmp12, %tmp8
39+
%tmp14 = getelementptr inbounds i32, i32* %pre, i64 %index
40+
%tmp15 = bitcast i32* %tmp14 to <2 x i32>*
41+
store <2 x i32> %tmp13, <2 x i32>* %tmp15, align 4
42+
ret void
43+
}

0 commit comments

Comments
 (0)