@@ -16,7 +16,7 @@ use stdsimd_test::assert_instr;
16
16
17
17
/// Constructs a 64-bit integer vector initialized to zero.
18
18
#[ inline( always) ]
19
- #[ target_feature = "+mmx,+sse " ]
19
+ #[ target_feature = "+mmx" ]
20
20
// FIXME: this produces a movl instead of xorps on x86
21
21
// FIXME: this produces a xor intrinsic instead of xorps on x86_64
22
22
#[ cfg_attr( all( test, target_arch = "x86_64" ) , assert_instr( xor) ) ]
@@ -30,7 +30,7 @@ pub unsafe fn _mm_setzero_si64() -> __m64 {
30
30
/// Positive values greater than 0x7F are saturated to 0x7F. Negative values
31
31
/// less than 0x80 are saturated to 0x80.
32
32
#[ inline( always) ]
33
- #[ target_feature = "+mmx,+sse " ]
33
+ #[ target_feature = "+mmx" ]
34
34
#[ cfg_attr( test, assert_instr( packsswb) ) ]
35
35
pub unsafe fn _mm_packs_pi16 ( a : i16x4 , b : i16x4 ) -> i8x8 {
36
36
mem:: transmute ( packsswb ( mem:: transmute ( a) , mem:: transmute ( b) ) )
@@ -42,63 +42,93 @@ pub unsafe fn _mm_packs_pi16(a: i16x4, b: i16x4) -> i8x8 {
42
42
/// Positive values greater than 0x7F are saturated to 0x7F. Negative values
43
43
/// less than 0x80 are saturated to 0x80.
44
44
#[ inline( always) ]
45
- #[ target_feature = "+mmx,+sse " ]
45
+ #[ target_feature = "+mmx" ]
46
46
#[ cfg_attr( test, assert_instr( packssdw) ) ]
47
47
pub unsafe fn _mm_packs_pi32 ( a : i32x2 , b : i32x2 ) -> i16x4 {
48
48
mem:: transmute ( packssdw ( mem:: transmute ( a) , mem:: transmute ( b) ) )
49
49
}
50
50
51
- /// Compares the 8-bit integer elements of two 64-bit integer vectors of
52
- /// [8 x i8] to determine if the element of the first vector is greater than
53
- /// the corresponding element of the second vector.
54
- ///
55
- /// The comparison yields 0 for false, 0xFF for true.
51
+ /// Compares whether each element of `a` is greater than the corresponding
52
+ /// element of `b` returning `0` for `false` and `-1` for `true`.
56
53
#[ inline( always) ]
57
54
#[ target_feature = "+mmx" ]
58
55
#[ cfg_attr( test, assert_instr( pcmpgtb) ) ]
59
56
pub unsafe fn _mm_cmpgt_pi8 ( a : i8x8 , b : i8x8 ) -> i8x8 {
60
57
mem:: transmute ( pcmpgtb ( mem:: transmute ( a) , mem:: transmute ( b) ) )
61
58
}
62
59
63
- /// Compares the 16-bit integer elements of two 64-bit integer vectors of
64
- /// [4 x i16] to determine if the element of the first vector is greater than
65
- /// the corresponding element of the second vector.
66
- ///
67
- /// The comparison yields 0 for false, 0xFFFF for true.
60
+ /// Compares whether each element of `a` is greater than the corresponding
61
+ /// element of `b` returning `0` for `false` and `-1` for `true`.
68
62
#[ inline( always) ]
69
63
#[ target_feature = "+mmx" ]
70
64
#[ cfg_attr( test, assert_instr( pcmpgtw) ) ]
71
65
pub unsafe fn _mm_cmpgt_pi16 ( a : i16x4 , b : i16x4 ) -> i16x4 {
72
66
mem:: transmute ( pcmpgtw ( mem:: transmute ( a) , mem:: transmute ( b) ) )
73
67
}
74
68
75
- /// Unpacks the upper 32 bits from two 64-bit integer vectors of
76
- /// [4 x i16] and interleaves them into a 64-bit integer vector of [4 x i16] .
69
+ /// Unpacks the upper two elements from two `i16x4` vectors and interleaves
70
+ /// them into the result: `[a.2, b.2, a.3, b.3]` .
77
71
#[ inline( always) ]
78
72
#[ target_feature = "+mmx" ]
79
- #[ cfg_attr( test, assert_instr( punpckhwd ) ) ] // FIXME punpcklbw expected
73
+ #[ cfg_attr( test, assert_instr( punpcklbw ) ) ] // TODO: check
80
74
pub unsafe fn _mm_unpackhi_pi16 ( a : i16x4 , b : i16x4 ) -> i16x4 {
81
- mem:: transmute ( punpckhwd ( mem:: transmute ( a) , mem:: transmute ( b) ) )
75
+ mem:: transmute ( punpcklbw ( mem:: transmute ( a) , mem:: transmute ( b) ) )
76
+ }
77
+
78
+ /// Compares whether each element of `a` is greater than the corresponding
79
+ /// element of `b` returning `0` for `false` and `-1` for `true`.
80
+ #[ inline( always) ]
81
+ #[ target_feature = "+mmx" ]
82
+ #[ cfg_attr( test, assert_instr( pcmpgtd) ) ]
83
+ pub unsafe fn _mm_cmpgt_pi32 ( a : i32x2 , b : i32x2 ) -> i32x2 {
84
+ mem:: transmute ( pcmpgtd ( mem:: transmute ( a) , mem:: transmute ( b) ) )
85
+ }
86
+
87
+ /// Unpacks the upper four elements from two `i8x8` vectors and interleaves
88
+ /// them into the result: `[a.4, b.4, a.5, b.5, a.6, b.6, a.7, b.7]`.
89
+ #[ inline( always) ]
90
+ #[ target_feature = "+mmx" ]
91
+ #[ cfg_attr( test, assert_instr( punpckhbw) ) ]
92
+ pub unsafe fn _mm_unpackhi_pi8 ( a : i8x8 , b : i8x8 ) -> i8x8 {
93
+ mem:: transmute ( punpckhbw ( mem:: transmute ( a) , mem:: transmute ( b) ) )
82
94
}
83
95
84
- /// Unpacks the lower 32 bits from two 64-bit integer vectors of [8 x i8]
85
- /// and interleaves them into a 64-bit integer vector of [8 x i8] .
96
+ /// Unpacks the lower four elements from two `i8x8` vectors and interleaves
97
+ /// them into the result: `[a.0, b.0, a.1, b.1, a.2, b.2, a.3, b.3]` .
86
98
#[ inline( always) ]
87
99
#[ target_feature = "+mmx" ]
88
100
#[ cfg_attr( test, assert_instr( punpcklbw) ) ]
89
101
pub unsafe fn _mm_unpacklo_pi8 ( a : i8x8 , b : i8x8 ) -> i8x8 {
90
102
mem:: transmute ( punpcklbw ( mem:: transmute ( a) , mem:: transmute ( b) ) )
91
103
}
92
104
93
- /// Unpacks the lower 32 bits from two 64-bit integer vectors of
94
- /// [4 x i16] and interleaves them into a 64-bit integer vector of [4 x i16] .
105
+ /// Unpacks the lower two elements from two `i16x4` vectors and interleaves
106
+ /// them into the result: `[a.0 b.0 a.1 b.1]` .
95
107
#[ inline( always) ]
96
108
#[ target_feature = "+mmx" ]
97
109
#[ cfg_attr( test, assert_instr( punpcklwd) ) ]
98
110
pub unsafe fn _mm_unpacklo_pi16 ( a : i16x4 , b : i16x4 ) -> i16x4 {
99
111
mem:: transmute ( punpcklwd ( mem:: transmute ( a) , mem:: transmute ( b) ) )
100
112
}
101
113
114
+ /// Unpacks the upper element from two `i32x2` vectors and interleaves them
115
+ /// into the result: `[a.1, b.1]`.
116
+ #[ inline( always) ]
117
+ #[ target_feature = "+mmx" ]
118
+ #[ cfg_attr( test, assert_instr( punpckhdq) ) ]
119
+ pub unsafe fn _mm_unpackhi_pi32 ( a : i32x2 , b : i32x2 ) -> i32x2 {
120
+ mem:: transmute ( punpckhdq ( mem:: transmute ( a) , mem:: transmute ( b) ) )
121
+ }
122
+
123
+ /// Unpacks the lower element from two `i32x2` vectors and interleaves them
124
+ /// into the result: `[a.0, b.0]`.
125
+ #[ inline( always) ]
126
+ #[ target_feature = "+mmx" ]
127
+ #[ cfg_attr( test, assert_instr( punpckldq) ) ]
128
+ pub unsafe fn _mm_unpacklo_pi32 ( a : i32x2 , b : i32x2 ) -> i32x2 {
129
+ mem:: transmute ( punpckldq ( mem:: transmute ( a) , mem:: transmute ( b) ) )
130
+ }
131
+
102
132
#[ allow( improper_ctypes) ]
103
133
extern "C" {
104
134
#[ link_name = "llvm.x86.mmx.packsswb" ]
@@ -109,12 +139,20 @@ extern "C" {
109
139
fn pcmpgtb ( a : __m64 , b : __m64 ) -> __m64 ;
110
140
#[ link_name = "llvm.x86.mmx.pcmpgt.w" ]
111
141
fn pcmpgtw ( a : __m64 , b : __m64 ) -> __m64 ;
142
+ #[ link_name = "llvm.x86.mmx.pcmpgt.d" ]
143
+ fn pcmpgtd ( a : __m64 , b : __m64 ) -> __m64 ;
112
144
#[ link_name = "llvm.x86.mmx.punpckhwd" ]
113
145
fn punpckhwd ( a : __m64 , b : __m64 ) -> __m64 ;
114
- #[ link_name = "llvm.x86.mmx.punpcklbw" ]
115
- fn punpcklbw ( a : __m64 , b : __m64 ) -> __m64 ;
116
146
#[ link_name = "llvm.x86.mmx.punpcklwd" ]
117
147
fn punpcklwd ( a : __m64 , b : __m64 ) -> __m64 ;
148
+ #[ link_name = "llvm.x86.mmx.punpckhbw" ]
149
+ fn punpckhbw ( a : __m64 , b : __m64 ) -> __m64 ;
150
+ #[ link_name = "llvm.x86.mmx.punpcklbw" ]
151
+ fn punpcklbw ( a : __m64 , b : __m64 ) -> __m64 ;
152
+ #[ link_name = "llvm.x86.mmx.punpckhdq" ]
153
+ fn punpckhdq ( a : __m64 , b : __m64 ) -> __m64 ;
154
+ #[ link_name = "llvm.x86.mmx.punpckldq" ]
155
+ fn punpckldq ( a : __m64 , b : __m64 ) -> __m64 ;
118
156
}
119
157
120
158
#[ cfg( test) ]
@@ -123,21 +161,21 @@ mod tests {
123
161
use x86:: i686:: mmx;
124
162
use stdsimd_test:: simd_test;
125
163
126
- #[ simd_test = "sse" ] // FIXME: should be mmx
164
+ #[ simd_test = "mmx" ]
127
165
unsafe fn _mm_setzero_si64 ( ) {
128
166
let r: __m64 = :: std:: mem:: transmute ( 0_i64 ) ;
129
167
assert_eq ! ( r, mmx:: _mm_setzero_si64( ) ) ;
130
168
}
131
169
132
- #[ simd_test = "sse" ] // FIXME: should be mmx
170
+ #[ simd_test = "mmx" ]
133
171
unsafe fn _mm_packs_pi16 ( ) {
134
172
let a = i16x4:: new ( -1 , 2 , -3 , 4 ) ;
135
173
let b = i16x4:: new ( -5 , 6 , -7 , 8 ) ;
136
174
let r = i8x8:: new ( -1 , 2 , -3 , 4 , -5 , 6 , -7 , 8 ) ;
137
175
assert_eq ! ( r, mmx:: _mm_packs_pi16( a, b) ) ;
138
176
}
139
177
140
- #[ simd_test = "sse" ] // FIXME: should be mmx
178
+ #[ simd_test = "mmx" ]
141
179
unsafe fn _mm_packs_pi32 ( ) {
142
180
let a = i32x2:: new ( -1 , 2 ) ;
143
181
let b = i32x2:: new ( -5 , 6 ) ;
@@ -147,41 +185,128 @@ mod tests {
147
185
148
186
#[ simd_test = "mmx" ]
149
187
unsafe fn _mm_cmpgt_pi8 ( ) {
150
- let a = i8x8:: new ( 0 , 1 , 2 , 3 , 4 , 5 , 6 , 7 ) ;
151
- let b = i8x8:: new ( 8 , 7 , 6 , 5 , 4 , 3 , 2 , 1 ) ;
152
- let r = i8x8:: new ( 0 , 0 , 0 , 0 , 0 , -1 , -1 , -1 ) ;
153
- assert_eq ! ( r, mmx:: _mm_cmpgt_pi8( a, b) ) ;
188
+ {
189
+ let a = i8x8:: new ( 0 , 1 , 2 , 3 , 4 , 5 , 6 , 7 ) ;
190
+ let b = i8x8:: new ( 8 , 7 , 6 , 5 , 4 , 3 , 2 , 1 ) ;
191
+ let r = i8x8:: new ( 0 , 0 , 0 , 0 , 0 , -1 , -1 , -1 ) ;
192
+ assert_eq ! ( r, mmx:: _mm_cmpgt_pi8( a, b) ) ;
193
+ }
194
+ {
195
+ let a = i8x8:: new ( 0 , 3 , 4 , 7 , 8 , 11 , 12 , 15 ) ;
196
+ let b = i8x8:: new ( 1 , 2 , 5 , 6 , 9 , 10 , 13 , 14 ) ;
197
+ let r0 = i8x8:: new ( 0 , -1 , 0 , -1 , 0 , -1 , 0 , -1 ) ;
198
+ let r1 = i8x8:: new ( -1 , 0 , -1 , 0 , -1 , 0 , -1 , 0 ) ;
199
+
200
+ assert_eq ! ( r0, mmx:: _mm_cmpgt_pi8( a, b) ) ;
201
+ assert_eq ! ( r1, mmx:: _mm_cmpgt_pi8( b, a) ) ;
202
+ }
154
203
}
155
204
156
205
#[ simd_test = "mmx" ]
157
206
unsafe fn _mm_cmpgt_pi16 ( ) {
158
- let a = i16x4:: new ( 0 , 1 , 2 , 3 ) ;
159
- let b = i16x4:: new ( 4 , 3 , 2 , 1 ) ;
160
- let r = i16x4:: new ( 0 , 0 , 0 , -1 ) ;
161
- assert_eq ! ( r, mmx:: _mm_cmpgt_pi16( a, b) ) ;
207
+ {
208
+ let a = i16x4:: new ( 0 , 1 , 2 , 3 ) ;
209
+ let b = i16x4:: new ( 4 , 3 , 2 , 1 ) ;
210
+ let r = i16x4:: new ( 0 , 0 , 0 , -1 ) ;
211
+ assert_eq ! ( r, mmx:: _mm_cmpgt_pi16( a, b) ) ;
212
+ }
213
+ {
214
+ let a = i16x4:: new ( 0 , 3 , 4 , 7 ) ;
215
+ let b = i16x4:: new ( 1 , 2 , 5 , 6 ) ;
216
+ let r0 = i16x4:: new ( 0 , -1 , 0 , -1 ) ;
217
+ let r1 = i16x4:: new ( -1 , 0 , -1 , 0 ) ;
218
+
219
+ assert_eq ! ( r0, mmx:: _mm_cmpgt_pi16( a, b) ) ;
220
+ assert_eq ! ( r1, mmx:: _mm_cmpgt_pi16( b, a) ) ;
221
+ }
162
222
}
163
223
164
224
#[ simd_test = "mmx" ]
165
- unsafe fn _mm_unpackhi_pi16 ( ) {
166
- let a = i16x4:: new ( 0 , 1 , 2 , 3 ) ;
167
- let b = i16x4:: new ( 4 , 5 , 6 , 7 ) ;
168
- let r = i16x4:: new ( 2 , 6 , 3 , 7 ) ;
169
- assert_eq ! ( r, mmx:: _mm_unpackhi_pi16( a, b) ) ;
225
+ unsafe fn _mm_cmpgt_pi32 ( ) {
226
+ let a = i32x2:: new ( 0 , 3 ) ;
227
+ let b = i32x2:: new ( 1 , 2 ) ;
228
+ let r0 = i32x2:: new ( 0 , -1 ) ;
229
+ let r1 = i32x2:: new ( -1 , 0 ) ;
230
+
231
+ assert_eq ! ( r0, mmx:: _mm_cmpgt_pi32( a, b) ) ;
232
+ assert_eq ! ( r1, mmx:: _mm_cmpgt_pi32( b, a) ) ;
233
+ }
234
+
235
+ #[ simd_test = "mmx" ]
236
+ unsafe fn _mm_unpackhi_pi8 ( ) {
237
+ let a = i8x8:: new ( 0 , 3 , 4 , 7 , 8 , 11 , 12 , 15 ) ;
238
+ let b = i8x8:: new ( 1 , 2 , 5 , 6 , 9 , 10 , 13 , 14 ) ;
239
+ let r = i8x8:: new ( 8 , 9 , 11 , 10 , 12 , 13 , 15 , 14 ) ;
240
+
241
+ assert_eq ! ( r, mmx:: _mm_unpackhi_pi8( a, b) ) ;
170
242
}
171
243
172
244
#[ simd_test = "mmx" ]
173
245
unsafe fn _mm_unpacklo_pi8 ( ) {
174
- let a = i8x8:: new ( 0 , 1 , 2 , 3 , 4 , 5 , 6 , 7 ) ;
175
- let b = i8x8:: new ( 8 , 9 , 10 , 11 , 12 , 13 , 14 , 15 ) ;
176
- let r = i8x8:: new ( 0 , 8 , 1 , 9 , 2 , 10 , 3 , 11 ) ;
177
- assert_eq ! ( r, mmx:: _mm_unpacklo_pi8( a, b) ) ;
246
+ {
247
+ let a = i8x8:: new ( 0 , 1 , 2 , 3 , 4 , 5 , 6 , 7 ) ;
248
+ let b = i8x8:: new ( 8 , 9 , 10 , 11 , 12 , 13 , 14 , 15 ) ;
249
+ let r = i8x8:: new ( 0 , 8 , 1 , 9 , 2 , 10 , 3 , 11 ) ;
250
+ assert_eq ! ( r, mmx:: _mm_unpacklo_pi8( a, b) ) ;
251
+ }
252
+ {
253
+ let a = i8x8:: new ( 0 , 3 , 4 , 7 , 8 , 11 , 12 , 15 ) ;
254
+ let b = i8x8:: new ( 1 , 2 , 5 , 6 , 9 , 10 , 13 , 14 ) ;
255
+ let r = i8x8:: new ( 0 , 1 , 3 , 2 , 4 , 5 , 7 , 6 ) ;
256
+ assert_eq ! ( r, mmx:: _mm_unpacklo_pi8( a, b) ) ;
257
+ }
258
+ }
259
+
260
+ #[ simd_test = "mmx" ]
261
+ unsafe fn _mm_unpackhi_pi16 ( ) {
262
+ {
263
+ let a = i16x4:: new ( 0 , 1 , 2 , 3 ) ;
264
+ let b = i16x4:: new ( 4 , 5 , 6 , 7 ) ;
265
+ let r = i16x4:: new ( 2 , 6 , 3 , 7 ) ;
266
+ assert_eq ! ( r, mmx:: _mm_unpackhi_pi16( a, b) ) ;
267
+ }
268
+ {
269
+ let a = i16x4:: new ( 0 , 3 , 4 , 7 ) ;
270
+ let b = i16x4:: new ( 1 , 2 , 5 , 6 ) ;
271
+ let r = i16x4:: new ( 4 , 5 , 7 , 6 ) ;
272
+
273
+ assert_eq ! ( r, mmx:: _mm_unpackhi_pi16( a, b) ) ;
274
+ }
178
275
}
179
276
180
277
#[ simd_test = "mmx" ]
181
278
unsafe fn _mm_unpacklo_pi16 ( ) {
182
- let a = i16x4:: new ( 0 , 1 , 2 , 3 ) ;
183
- let b = i16x4:: new ( 4 , 5 , 6 , 7 ) ;
184
- let r = i16x4:: new ( 0 , 4 , 1 , 5 ) ;
185
- assert_eq ! ( r, mmx:: _mm_unpacklo_pi16( a, b) ) ;
279
+ {
280
+ let a = i16x4:: new ( 0 , 1 , 2 , 3 ) ;
281
+ let b = i16x4:: new ( 4 , 5 , 6 , 7 ) ;
282
+ let r = i16x4:: new ( 0 , 4 , 1 , 5 ) ;
283
+ assert_eq ! ( r, mmx:: _mm_unpacklo_pi16( a, b) ) ;
284
+ }
285
+ {
286
+ let a = i16x4:: new ( 0 , 3 , 4 , 7 ) ;
287
+ let b = i16x4:: new ( 1 , 2 , 5 , 6 ) ;
288
+ let r = i16x4:: new ( 0 , 1 , 3 , 2 ) ;
289
+
290
+ assert_eq ! ( r, mmx:: _mm_unpacklo_pi16( a, b) ) ;
291
+ }
186
292
}
293
+
294
+ #[ simd_test = "mmx" ]
295
+ unsafe fn _mm_unpackhi_pi32 ( ) {
296
+ let a = i32x2:: new ( 0 , 3 ) ;
297
+ let b = i32x2:: new ( 1 , 2 ) ;
298
+ let r = i32x2:: new ( 3 , 2 ) ;
299
+
300
+ assert_eq ! ( r, mmx:: _mm_unpackhi_pi32( a, b) ) ;
301
+ }
302
+
303
+ #[ simd_test = "mmx" ]
304
+ unsafe fn _mm_unpacklo_pi32 ( ) {
305
+ let a = i32x2:: new ( 0 , 3 ) ;
306
+ let b = i32x2:: new ( 1 , 2 ) ;
307
+ let r = i32x2:: new ( 0 , 1 ) ;
308
+
309
+ assert_eq ! ( r, mmx:: _mm_unpacklo_pi32( a, b) ) ;
310
+ }
311
+
187
312
}
0 commit comments