@@ -16,7 +16,7 @@ use stdsimd_test::assert_instr;
16
16
17
17
/// Constructs a 64-bit integer vector initialized to zero.
18
18
#[ inline( always) ]
19
- #[ target_feature = "+mmx,+sse " ]
19
+ #[ target_feature = "+mmx" ]
20
20
// FIXME: this produces a movl instead of xorps on x86
21
21
// FIXME: this produces a xor intrinsic instead of xorps on x86_64
22
22
#[ cfg_attr( all( test, target_arch = "x86_64" ) , assert_instr( xor) ) ]
@@ -30,7 +30,7 @@ pub unsafe fn _mm_setzero_si64() -> __m64 {
30
30
/// Positive values greater than 0x7F are saturated to 0x7F. Negative values
31
31
/// less than 0x80 are saturated to 0x80.
32
32
#[ inline( always) ]
33
- #[ target_feature = "+mmx,+sse " ]
33
+ #[ target_feature = "+mmx" ]
34
34
#[ cfg_attr( test, assert_instr( packsswb) ) ]
35
35
pub unsafe fn _mm_packs_pi16 ( a : __m64 , b : __m64 ) -> __m64 {
36
36
packsswb ( a, b)
@@ -42,63 +42,94 @@ pub unsafe fn _mm_packs_pi16(a: __m64, b: __m64) -> __m64 {
42
42
/// Positive values greater than 0x7F are saturated to 0x7F. Negative values
43
43
/// less than 0x80 are saturated to 0x80.
44
44
#[ inline( always) ]
45
- #[ target_feature = "+mmx,+sse " ]
45
+ #[ target_feature = "+mmx" ]
46
46
#[ cfg_attr( test, assert_instr( packssdw) ) ]
47
47
pub unsafe fn _mm_packs_pi32 ( a : __m64 , b : __m64 ) -> __m64 {
48
48
packssdw ( a, b)
49
49
}
50
50
51
- /// Compares the 8-bit integer elements of two 64-bit integer vectors of
52
- /// [8 x i8] to determine if the element of the first vector is greater than
53
- /// the corresponding element of the second vector.
54
- ///
55
- /// The comparison yields 0 for false, 0xFF for true.
51
+ /// Compares whether each element of `a` is greater than the corresponding
52
+ /// element of `b` returning `0` for `false` and `-1` for `true`.
56
53
#[ inline( always) ]
57
54
#[ target_feature = "+mmx" ]
58
55
#[ cfg_attr( test, assert_instr( pcmpgtb) ) ]
59
56
pub unsafe fn _mm_cmpgt_pi8 ( a : __m64 , b : __m64 ) -> __m64 {
60
57
pcmpgtb ( a, b)
61
58
}
62
59
63
- /// Compares the 16-bit integer elements of two 64-bit integer vectors of
64
- /// [4 x i16] to determine if the element of the first vector is greater than
65
- /// the corresponding element of the second vector.
66
- ///
67
- /// The comparison yields 0 for false, 0xFFFF for true.
60
+ /// Compares whether each element of `a` is greater than the corresponding
61
+ /// element of `b` returning `0` for `false` and `-1` for `true`.
68
62
#[ inline( always) ]
69
63
#[ target_feature = "+mmx" ]
70
64
#[ cfg_attr( test, assert_instr( pcmpgtw) ) ]
71
65
pub unsafe fn _mm_cmpgt_pi16 ( a : __m64 , b : __m64 ) -> __m64 {
72
66
pcmpgtw ( a, b)
73
67
}
74
68
75
- /// Unpacks the upper 32 bits from two 64-bit integer vectors of
76
- /// [4 x i16] and interleaves them into a 64-bit integer vector of [4 x i16].
69
+ /// Compares whether each element of `a` is greater than the corresponding
70
+ /// element of `b` returning `0` for `false` and `-1` for `true`.
71
+ #[ inline( always) ]
72
+ #[ target_feature = "+mmx" ]
73
+ #[ cfg_attr( test, assert_instr( pcmpgtd) ) ]
74
+ pub unsafe fn _mm_cmpgt_pi32 ( a : __m64 , b : __m64 ) -> __m64 {
75
+ pcmpgtd ( a, b)
76
+ }
77
+
78
+
79
+ /// Unpacks the upper two elements from two `i16x4` vectors and interleaves
80
+ /// them into the result: `[a.2, b.2, a.3, b.3]`.
77
81
#[ inline( always) ]
78
82
#[ target_feature = "+mmx" ]
79
83
#[ cfg_attr( test, assert_instr( punpckhwd) ) ] // FIXME punpcklbw expected
80
84
pub unsafe fn _mm_unpackhi_pi16 ( a : __m64 , b : __m64 ) -> __m64 {
81
85
punpckhwd ( a, b)
82
86
}
83
87
84
- /// Unpacks the lower 32 bits from two 64-bit integer vectors of [8 x i8]
85
- /// and interleaves them into a 64-bit integer vector of [8 x i8].
88
+ /// Unpacks the upper four elements from two `i8x8` vectors and interleaves
89
+ /// them into the result: `[a.4, b.4, a.5, b.5, a.6, b.6, a.7, b.7]`.
90
+ #[ inline( always) ]
91
+ #[ target_feature = "+mmx" ]
92
+ #[ cfg_attr( test, assert_instr( punpckhbw) ) ]
93
+ pub unsafe fn _mm_unpackhi_pi8 ( a : __m64 , b : __m64 ) -> __m64 {
94
+ punpckhbw ( a, b)
95
+ }
96
+
97
+ /// Unpacks the lower four elements from two `i8x8` vectors and interleaves
98
+ /// them into the result: `[a.0, b.0, a.1, b.1, a.2, b.2, a.3, b.3]`.
86
99
#[ inline( always) ]
87
100
#[ target_feature = "+mmx" ]
88
101
#[ cfg_attr( test, assert_instr( punpcklbw) ) ]
89
102
pub unsafe fn _mm_unpacklo_pi8 ( a : __m64 , b : __m64 ) -> __m64 {
90
103
punpcklbw ( a, b)
91
104
}
92
105
93
- /// Unpacks the lower 32 bits from two 64-bit integer vectors of
94
- /// [4 x i16] and interleaves them into a 64-bit integer vector of [4 x i16] .
106
+ /// Unpacks the lower two elements from two `i16x4` vectors and interleaves
107
+ /// them into the result: `[a.0 b.0 a.1 b.1]` .
95
108
#[ inline( always) ]
96
109
#[ target_feature = "+mmx" ]
97
110
#[ cfg_attr( test, assert_instr( punpcklwd) ) ]
98
111
pub unsafe fn _mm_unpacklo_pi16 ( a : __m64 , b : __m64 ) -> __m64 {
99
112
punpcklwd ( a, b)
100
113
}
101
114
115
+ /// Unpacks the upper element from two `i32x2` vectors and interleaves them
116
+ /// into the result: `[a.1, b.1]`.
117
+ #[ inline( always) ]
118
+ #[ target_feature = "+mmx" ]
119
+ #[ cfg_attr( test, assert_instr( punpckhdq) ) ]
120
+ pub unsafe fn _mm_unpackhi_pi32 ( a : __m64 , b : __m64 ) -> __m64 {
121
+ punpckhdq ( a, b)
122
+ }
123
+
124
+ /// Unpacks the lower element from two `i32x2` vectors and interleaves them
125
+ /// into the result: `[a.0, b.0]`.
126
+ #[ inline( always) ]
127
+ #[ target_feature = "+mmx" ]
128
+ #[ cfg_attr( test, assert_instr( punpckldq) ) ]
129
+ pub unsafe fn _mm_unpacklo_pi32 ( a : __m64 , b : __m64 ) -> __m64 {
130
+ punpckldq ( a, b)
131
+ }
132
+
102
133
#[ allow( improper_ctypes) ]
103
134
extern "C" {
104
135
#[ link_name = "llvm.x86.mmx.packsswb" ]
@@ -109,12 +140,20 @@ extern "C" {
109
140
fn pcmpgtb ( a : __m64 , b : __m64 ) -> __m64 ;
110
141
#[ link_name = "llvm.x86.mmx.pcmpgt.w" ]
111
142
fn pcmpgtw ( a : __m64 , b : __m64 ) -> __m64 ;
143
+ #[ link_name = "llvm.x86.mmx.pcmpgt.d" ]
144
+ fn pcmpgtd ( a : __m64 , b : __m64 ) -> __m64 ;
112
145
#[ link_name = "llvm.x86.mmx.punpckhwd" ]
113
146
fn punpckhwd ( a : __m64 , b : __m64 ) -> __m64 ;
114
- #[ link_name = "llvm.x86.mmx.punpcklbw" ]
115
- fn punpcklbw ( a : __m64 , b : __m64 ) -> __m64 ;
116
147
#[ link_name = "llvm.x86.mmx.punpcklwd" ]
117
148
fn punpcklwd ( a : __m64 , b : __m64 ) -> __m64 ;
149
+ #[ link_name = "llvm.x86.mmx.punpckhbw" ]
150
+ fn punpckhbw ( a : __m64 , b : __m64 ) -> __m64 ;
151
+ #[ link_name = "llvm.x86.mmx.punpcklbw" ]
152
+ fn punpcklbw ( a : __m64 , b : __m64 ) -> __m64 ;
153
+ #[ link_name = "llvm.x86.mmx.punpckhdq" ]
154
+ fn punpckhdq ( a : __m64 , b : __m64 ) -> __m64 ;
155
+ #[ link_name = "llvm.x86.mmx.punpckldq" ]
156
+ fn punpckldq ( a : __m64 , b : __m64 ) -> __m64 ;
118
157
}
119
158
120
159
#[ cfg( test) ]
@@ -123,21 +162,21 @@ mod tests {
123
162
use x86:: i686:: mmx;
124
163
use stdsimd_test:: simd_test;
125
164
126
- #[ simd_test = "sse" ] // FIXME: should be mmx
165
+ #[ simd_test = "mmx" ]
127
166
unsafe fn _mm_setzero_si64 ( ) {
128
167
let r: __m64 = :: std:: mem:: transmute ( 0_i64 ) ;
129
168
assert_eq ! ( r, mmx:: _mm_setzero_si64( ) ) ;
130
169
}
131
170
132
- #[ simd_test = "sse" ] // FIXME: should be mmx
171
+ #[ simd_test = "mmx" ]
133
172
unsafe fn _mm_packs_pi16 ( ) {
134
173
let a = i16x4:: new ( -1 , 2 , -3 , 4 ) ;
135
174
let b = i16x4:: new ( -5 , 6 , -7 , 8 ) ;
136
175
let r = i8x8:: new ( -1 , 2 , -3 , 4 , -5 , 6 , -7 , 8 ) ;
137
176
assert_eq ! ( r, i8x8:: from( mmx:: _mm_packs_pi16( a. into( ) , b. into( ) ) ) ) ;
138
177
}
139
178
140
- #[ simd_test = "sse" ] // FIXME: should be mmx
179
+ #[ simd_test = "mmx" ]
141
180
unsafe fn _mm_packs_pi32 ( ) {
142
181
let a = i32x2:: new ( -1 , 2 ) ;
143
182
let b = i32x2:: new ( -5 , 6 ) ;
@@ -162,11 +201,23 @@ mod tests {
162
201
}
163
202
164
203
#[ simd_test = "mmx" ]
165
- unsafe fn _mm_unpackhi_pi16 ( ) {
166
- let a = i16x4:: new ( 0 , 1 , 2 , 3 ) ;
167
- let b = i16x4:: new ( 4 , 5 , 6 , 7 ) ;
168
- let r = i16x4:: new ( 2 , 6 , 3 , 7 ) ;
169
- assert_eq ! ( r, i16x4:: from( mmx:: _mm_unpackhi_pi16( a. into( ) , b. into( ) ) ) ) ;
204
+ unsafe fn _mm_cmpgt_pi32 ( ) {
205
+ let a = i32x2:: new ( 0 , 3 ) ;
206
+ let b = i32x2:: new ( 1 , 2 ) ;
207
+ let r0 = i32x2:: new ( 0 , -1 ) ;
208
+ let r1 = i32x2:: new ( -1 , 0 ) ;
209
+
210
+ assert_eq ! ( r0, mmx:: _mm_cmpgt_pi32( a. into( ) , b. into( ) ) . into( ) ) ;
211
+ assert_eq ! ( r1, mmx:: _mm_cmpgt_pi32( b. into( ) , a. into( ) ) . into( ) ) ;
212
+ }
213
+
214
+ #[ simd_test = "mmx" ]
215
+ unsafe fn _mm_unpackhi_pi8 ( ) {
216
+ let a = i8x8:: new ( 0 , 3 , 4 , 7 , 8 , 11 , 12 , 15 ) ;
217
+ let b = i8x8:: new ( 1 , 2 , 5 , 6 , 9 , 10 , 13 , 14 ) ;
218
+ let r = i8x8:: new ( 8 , 9 , 11 , 10 , 12 , 13 , 15 , 14 ) ;
219
+
220
+ assert_eq ! ( r, mmx:: _mm_unpackhi_pi8( a. into( ) , b. into( ) ) . into( ) ) ;
170
221
}
171
222
172
223
#[ simd_test = "mmx" ]
@@ -177,11 +228,37 @@ mod tests {
177
228
assert_eq ! ( r, i8x8:: from( mmx:: _mm_unpacklo_pi8( a. into( ) , b. into( ) ) ) ) ;
178
229
}
179
230
231
+ #[ simd_test = "mmx" ]
232
+ unsafe fn _mm_unpackhi_pi16 ( ) {
233
+ let a = i16x4:: new ( 0 , 1 , 2 , 3 ) ;
234
+ let b = i16x4:: new ( 4 , 5 , 6 , 7 ) ;
235
+ let r = i16x4:: new ( 2 , 6 , 3 , 7 ) ;
236
+ assert_eq ! ( r, i16x4:: from( mmx:: _mm_unpackhi_pi16( a. into( ) , b. into( ) ) ) ) ;
237
+ }
238
+
180
239
#[ simd_test = "mmx" ]
181
240
unsafe fn _mm_unpacklo_pi16 ( ) {
182
241
let a = i16x4:: new ( 0 , 1 , 2 , 3 ) ;
183
242
let b = i16x4:: new ( 4 , 5 , 6 , 7 ) ;
184
243
let r = i16x4:: new ( 0 , 4 , 1 , 5 ) ;
185
244
assert_eq ! ( r, i16x4:: from( mmx:: _mm_unpacklo_pi16( a. into( ) , b. into( ) ) ) ) ;
186
245
}
246
+
247
+ #[ simd_test = "mmx" ]
248
+ unsafe fn _mm_unpackhi_pi32 ( ) {
249
+ let a = i32x2:: new ( 0 , 3 ) ;
250
+ let b = i32x2:: new ( 1 , 2 ) ;
251
+ let r = i32x2:: new ( 3 , 2 ) ;
252
+
253
+ assert_eq ! ( r, mmx:: _mm_unpackhi_pi32( a. into( ) , b. into( ) ) . into( ) ) ;
254
+ }
255
+
256
+ #[ simd_test = "mmx" ]
257
+ unsafe fn _mm_unpacklo_pi32 ( ) {
258
+ let a = i32x2:: new ( 0 , 3 ) ;
259
+ let b = i32x2:: new ( 1 , 2 ) ;
260
+ let r = i32x2:: new ( 0 , 1 ) ;
261
+
262
+ assert_eq ! ( r, mmx:: _mm_unpacklo_pi32( a. into( ) , b. into( ) ) . into( ) ) ;
263
+ }
187
264
}
0 commit comments