@@ -13,36 +13,17 @@ declare i1 @llvm.vector.reduce.and.v8i1(<8 x i1>)
13
13
; FIXME: All four versions are semantically equivalent and should produce same asm as scalar version.
14
14
15
15
define i1 @intrinsic_v2i8 (ptr align 1 %arg , ptr align 1 %arg1 ) {
16
- ; SSE2-LABEL: intrinsic_v2i8:
17
- ; SSE2: # %bb.0: # %bb
18
- ; SSE2-NEXT: movzwl (%rsi), %eax
19
- ; SSE2-NEXT: movd %eax, %xmm0
20
- ; SSE2-NEXT: movzwl (%rdi), %eax
21
- ; SSE2-NEXT: movd %eax, %xmm1
22
- ; SSE2-NEXT: pcmpeqb %xmm0, %xmm1
23
- ; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
24
- ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,2,1,4,5,6,7]
25
- ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
26
- ; SSE2-NEXT: movmskpd %xmm0, %eax
27
- ; SSE2-NEXT: cmpb $3, %al
28
- ; SSE2-NEXT: sete %al
29
- ; SSE2-NEXT: retq
30
- ;
31
- ; SSE42-LABEL: intrinsic_v2i8:
32
- ; SSE42: # %bb.0: # %bb
33
- ; SSE42-NEXT: pmovzxbq {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero
34
- ; SSE42-NEXT: pmovzxbq {{.*#+}} xmm1 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero
35
- ; SSE42-NEXT: psubq %xmm1, %xmm0
36
- ; SSE42-NEXT: ptest %xmm0, %xmm0
37
- ; SSE42-NEXT: sete %al
38
- ; SSE42-NEXT: retq
16
+ ; SSE-LABEL: intrinsic_v2i8:
17
+ ; SSE: # %bb.0: # %bb
18
+ ; SSE-NEXT: movzwl (%rdi), %eax
19
+ ; SSE-NEXT: cmpw %ax, (%rsi)
20
+ ; SSE-NEXT: sete %al
21
+ ; SSE-NEXT: retq
39
22
;
40
23
; AVX-LABEL: intrinsic_v2i8:
41
24
; AVX: # %bb.0: # %bb
42
- ; AVX-NEXT: vpmovzxbq {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero
43
- ; AVX-NEXT: vpmovzxbq {{.*#+}} xmm1 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero
44
- ; AVX-NEXT: vpsubq %xmm1, %xmm0, %xmm0
45
- ; AVX-NEXT: vptest %xmm0, %xmm0
25
+ ; AVX-NEXT: movzwl (%rdi), %eax
26
+ ; AVX-NEXT: cmpw %ax, (%rsi)
46
27
; AVX-NEXT: sete %al
47
28
; AVX-NEXT: retq
48
29
;
@@ -63,10 +44,8 @@ define i1 @intrinsic_v2i8(ptr align 1 %arg, ptr align 1 %arg1) {
63
44
; X86: # %bb.0: # %bb
64
45
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
65
46
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
66
- ; X86-NEXT: vpmovzxbq {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero
67
- ; X86-NEXT: vpmovzxbq {{.*#+}} xmm1 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero
68
- ; X86-NEXT: vpsubq %xmm1, %xmm0, %xmm0
69
- ; X86-NEXT: vptest %xmm0, %xmm0
47
+ ; X86-NEXT: movzwl (%eax), %eax
48
+ ; X86-NEXT: cmpw %ax, (%ecx)
70
49
; X86-NEXT: sete %al
71
50
; X86-NEXT: retl
72
51
bb:
78
57
}
79
58
80
59
define i1 @intrinsic_v4i8 (ptr align 1 %arg , ptr align 1 %arg1 ) {
81
- ; SSE2-LABEL: intrinsic_v4i8:
82
- ; SSE2: # %bb.0: # %bb
83
- ; SSE2-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
84
- ; SSE2-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
85
- ; SSE2-NEXT: pcmpeqb %xmm0, %xmm1
86
- ; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
87
- ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
88
- ; SSE2-NEXT: movmskps %xmm0, %eax
89
- ; SSE2-NEXT: cmpb $15, %al
90
- ; SSE2-NEXT: sete %al
91
- ; SSE2-NEXT: retq
92
- ;
93
- ; SSE42-LABEL: intrinsic_v4i8:
94
- ; SSE42: # %bb.0: # %bb
95
- ; SSE42-NEXT: pmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
96
- ; SSE42-NEXT: pmovzxbd {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
97
- ; SSE42-NEXT: psubd %xmm1, %xmm0
98
- ; SSE42-NEXT: ptest %xmm0, %xmm0
99
- ; SSE42-NEXT: sete %al
100
- ; SSE42-NEXT: retq
60
+ ; SSE-LABEL: intrinsic_v4i8:
61
+ ; SSE: # %bb.0: # %bb
62
+ ; SSE-NEXT: movl (%rdi), %eax
63
+ ; SSE-NEXT: cmpl %eax, (%rsi)
64
+ ; SSE-NEXT: sete %al
65
+ ; SSE-NEXT: retq
101
66
;
102
67
; AVX-LABEL: intrinsic_v4i8:
103
68
; AVX: # %bb.0: # %bb
104
- ; AVX-NEXT: vpmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
105
- ; AVX-NEXT: vpmovzxbd {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
106
- ; AVX-NEXT: vpsubd %xmm1, %xmm0, %xmm0
107
- ; AVX-NEXT: vptest %xmm0, %xmm0
69
+ ; AVX-NEXT: movl (%rdi), %eax
70
+ ; AVX-NEXT: cmpl %eax, (%rsi)
108
71
; AVX-NEXT: sete %al
109
72
; AVX-NEXT: retq
110
73
;
@@ -123,10 +86,8 @@ define i1 @intrinsic_v4i8(ptr align 1 %arg, ptr align 1 %arg1) {
123
86
; X86: # %bb.0: # %bb
124
87
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
125
88
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
126
- ; X86-NEXT: vpmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
127
- ; X86-NEXT: vpmovzxbd {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
128
- ; X86-NEXT: vpsubd %xmm1, %xmm0, %xmm0
129
- ; X86-NEXT: vptest %xmm0, %xmm0
89
+ ; X86-NEXT: movl (%eax), %eax
90
+ ; X86-NEXT: cmpl %eax, (%ecx)
130
91
; X86-NEXT: sete %al
131
92
; X86-NEXT: retl
132
93
bb:
@@ -140,21 +101,15 @@ bb:
140
101
define i1 @intrinsic_v8i8 (ptr align 1 %arg , ptr align 1 %arg1 ) {
141
102
; SSE-LABEL: intrinsic_v8i8:
142
103
; SSE: # %bb.0: # %bb
143
- ; SSE-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
144
- ; SSE-NEXT: movq {{.*#+}} xmm1 = mem[0],zero
145
- ; SSE-NEXT: pcmpeqb %xmm0, %xmm1
146
- ; SSE-NEXT: pmovmskb %xmm1, %eax
147
- ; SSE-NEXT: cmpb $-1, %al
104
+ ; SSE-NEXT: movq (%rdi), %rax
105
+ ; SSE-NEXT: cmpq %rax, (%rsi)
148
106
; SSE-NEXT: sete %al
149
107
; SSE-NEXT: retq
150
108
;
151
109
; AVX-LABEL: intrinsic_v8i8:
152
110
; AVX: # %bb.0: # %bb
153
- ; AVX-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
154
- ; AVX-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
155
- ; AVX-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
156
- ; AVX-NEXT: vpmovmskb %xmm0, %eax
157
- ; AVX-NEXT: cmpb $-1, %al
111
+ ; AVX-NEXT: movq (%rdi), %rax
112
+ ; AVX-NEXT: cmpq %rax, (%rsi)
158
113
; AVX-NEXT: sete %al
159
114
; AVX-NEXT: retq
160
115
;
0 commit comments