Skip to content

Commit a9d10f7

Browse files
committed
Completes SSE and adds some MMX intrinsics
MMX: - `_mm_cmpgt_pi{8,16,32}` - `_mm_unpack{hi,lo}_pi{8,16,32}` SSE (is now complete): - `_mm_cvtp{i,u}{8,16}_ps` - add test for `_m_pmulhuw`
1 parent dda7157 commit a9d10f7

File tree

9 files changed

+463
-268
lines changed

9 files changed

+463
-268
lines changed

Diff for: coresimd/src/x86/i586/sse.rs

+2-1
Original file line numberDiff line numberDiff line change
@@ -3304,7 +3304,8 @@ mod tests {
33043304
use v64::*;
33053305

33063306
let a = mem::transmute(i8x8::new(0, 0, 0, 0, 0, 0, 0, 7));
3307-
let mut mem = ::std::boxed::Box::<__m64>::new(mem::transmute(i8x8::splat(1)));
3307+
let mut mem =
3308+
::std::boxed::Box::<__m64>::new(mem::transmute(i8x8::splat(1)));
33083309
sse::_mm_stream_pi(&mut *mem as *mut _ as *mut _, a);
33093310
assert_eq!(a, *mem);
33103311
}

Diff for: coresimd/src/x86/i686/mmx.rs

+172-47
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ use stdsimd_test::assert_instr;
1616

1717
/// Constructs a 64-bit integer vector initialized to zero.
1818
#[inline(always)]
19-
#[target_feature = "+mmx,+sse"]
19+
#[target_feature = "+mmx"]
2020
// FIXME: this produces a movl instead of xorps on x86
2121
// FIXME: this produces a xor intrinsic instead of xorps on x86_64
2222
#[cfg_attr(all(test, target_arch = "x86_64"), assert_instr(xor))]
@@ -30,7 +30,7 @@ pub unsafe fn _mm_setzero_si64() -> __m64 {
3030
/// Positive values greater than 0x7F are saturated to 0x7F. Negative values
3131
/// less than 0x80 are saturated to 0x80.
3232
#[inline(always)]
33-
#[target_feature = "+mmx,+sse"]
33+
#[target_feature = "+mmx"]
3434
#[cfg_attr(test, assert_instr(packsswb))]
3535
pub unsafe fn _mm_packs_pi16(a: i16x4, b: i16x4) -> i8x8 {
3636
mem::transmute(packsswb(mem::transmute(a), mem::transmute(b)))
@@ -42,63 +42,93 @@ pub unsafe fn _mm_packs_pi16(a: i16x4, b: i16x4) -> i8x8 {
4242
/// Positive values greater than 0x7F are saturated to 0x7F. Negative values
4343
/// less than 0x80 are saturated to 0x80.
4444
#[inline(always)]
45-
#[target_feature = "+mmx,+sse"]
45+
#[target_feature = "+mmx"]
4646
#[cfg_attr(test, assert_instr(packssdw))]
4747
pub unsafe fn _mm_packs_pi32(a: i32x2, b: i32x2) -> i16x4 {
4848
mem::transmute(packssdw(mem::transmute(a), mem::transmute(b)))
4949
}
5050

51-
/// Compares the 8-bit integer elements of two 64-bit integer vectors of
52-
/// [8 x i8] to determine if the element of the first vector is greater than
53-
/// the corresponding element of the second vector.
54-
///
55-
/// The comparison yields 0 for false, 0xFF for true.
51+
/// Compares whether each element of `a` is greater than the corresponding
52+
/// element of `b` returning `0` for `false` and `-1` for `true`.
5653
#[inline(always)]
5754
#[target_feature = "+mmx"]
5855
#[cfg_attr(test, assert_instr(pcmpgtb))]
5956
pub unsafe fn _mm_cmpgt_pi8(a: i8x8, b: i8x8) -> i8x8 {
6057
mem::transmute(pcmpgtb(mem::transmute(a), mem::transmute(b)))
6158
}
6259

63-
/// Compares the 16-bit integer elements of two 64-bit integer vectors of
64-
/// [4 x i16] to determine if the element of the first vector is greater than
65-
/// the corresponding element of the second vector.
66-
///
67-
/// The comparison yields 0 for false, 0xFFFF for true.
60+
/// Compares whether each element of `a` is greater than the corresponding
61+
/// element of `b` returning `0` for `false` and `-1` for `true`.
6862
#[inline(always)]
6963
#[target_feature = "+mmx"]
7064
#[cfg_attr(test, assert_instr(pcmpgtw))]
7165
pub unsafe fn _mm_cmpgt_pi16(a: i16x4, b: i16x4) -> i16x4 {
7266
mem::transmute(pcmpgtw(mem::transmute(a), mem::transmute(b)))
7367
}
7468

75-
/// Unpacks the upper 32 bits from two 64-bit integer vectors of
76-
/// [4 x i16] and interleaves them into a 64-bit integer vector of [4 x i16].
69+
/// Unpacks the upper two elements from two `i16x4` vectors and interleaves
70+
/// them into the result: `[a.2, b.2, a.3, b.3]`.
7771
#[inline(always)]
7872
#[target_feature = "+mmx"]
79-
#[cfg_attr(test, assert_instr(punpckhwd))] // FIXME punpcklbw expected
73+
#[cfg_attr(test, assert_instr(punpcklbw))] // TODO: check
8074
pub unsafe fn _mm_unpackhi_pi16(a: i16x4, b: i16x4) -> i16x4 {
81-
mem::transmute(punpckhwd(mem::transmute(a), mem::transmute(b)))
75+
mem::transmute(punpcklbw(mem::transmute(a), mem::transmute(b)))
76+
}
77+
78+
/// Compares whether each element of `a` is greater than the corresponding
79+
/// element of `b` returning `0` for `false` and `-1` for `true`.
80+
#[inline(always)]
81+
#[target_feature = "+mmx"]
82+
#[cfg_attr(test, assert_instr(pcmpgtd))]
83+
pub unsafe fn _mm_cmpgt_pi32(a: i32x2, b: i32x2) -> i32x2 {
84+
mem::transmute(pcmpgtd(mem::transmute(a), mem::transmute(b)))
85+
}
86+
87+
/// Unpacks the upper four elements from two `i8x8` vectors and interleaves
88+
/// them into the result: `[a.4, b.4, a.5, b.5, a.6, b.6, a.7, b.7]`.
89+
#[inline(always)]
90+
#[target_feature = "+mmx"]
91+
#[cfg_attr(test, assert_instr(punpckhbw))]
92+
pub unsafe fn _mm_unpackhi_pi8(a: i8x8, b: i8x8) -> i8x8 {
93+
mem::transmute(punpckhbw(mem::transmute(a), mem::transmute(b)))
8294
}
8395

84-
/// Unpacks the lower 32 bits from two 64-bit integer vectors of [8 x i8]
85-
/// and interleaves them into a 64-bit integer vector of [8 x i8].
96+
/// Unpacks the lower four elements from two `i8x8` vectors and interleaves
97+
/// them into the result: `[a.0, b.0, a.1, b.1, a.2, b.2, a.3, b.3]`.
8698
#[inline(always)]
8799
#[target_feature = "+mmx"]
88100
#[cfg_attr(test, assert_instr(punpcklbw))]
89101
pub unsafe fn _mm_unpacklo_pi8(a: i8x8, b: i8x8) -> i8x8 {
90102
mem::transmute(punpcklbw(mem::transmute(a), mem::transmute(b)))
91103
}
92104

93-
/// Unpacks the lower 32 bits from two 64-bit integer vectors of
94-
/// [4 x i16] and interleaves them into a 64-bit integer vector of [4 x i16].
105+
/// Unpacks the lower two elements from two `i16x4` vectors and interleaves
106+
/// them into the result: `[a.0 b.0 a.1 b.1]`.
95107
#[inline(always)]
96108
#[target_feature = "+mmx"]
97109
#[cfg_attr(test, assert_instr(punpcklwd))]
98110
pub unsafe fn _mm_unpacklo_pi16(a: i16x4, b: i16x4) -> i16x4 {
99111
mem::transmute(punpcklwd(mem::transmute(a), mem::transmute(b)))
100112
}
101113

114+
/// Unpacks the upper element from two `i32x2` vectors and interleaves them
115+
/// into the result: `[a.1, b.1]`.
116+
#[inline(always)]
117+
#[target_feature = "+mmx"]
118+
#[cfg_attr(test, assert_instr(punpckhdq))]
119+
pub unsafe fn _mm_unpackhi_pi32(a: i32x2, b: i32x2) -> i32x2 {
120+
mem::transmute(punpckhdq(mem::transmute(a), mem::transmute(b)))
121+
}
122+
123+
/// Unpacks the lower element from two `i32x2` vectors and interleaves them
124+
/// into the result: `[a.0, b.0]`.
125+
#[inline(always)]
126+
#[target_feature = "+mmx"]
127+
#[cfg_attr(test, assert_instr(punpckldq))]
128+
pub unsafe fn _mm_unpacklo_pi32(a: i32x2, b: i32x2) -> i32x2 {
129+
mem::transmute(punpckldq(mem::transmute(a), mem::transmute(b)))
130+
}
131+
102132
#[allow(improper_ctypes)]
103133
extern "C" {
104134
#[link_name = "llvm.x86.mmx.packsswb"]
@@ -109,12 +139,20 @@ extern "C" {
109139
fn pcmpgtb(a: __m64, b: __m64) -> __m64;
110140
#[link_name = "llvm.x86.mmx.pcmpgt.w"]
111141
fn pcmpgtw(a: __m64, b: __m64) -> __m64;
142+
#[link_name = "llvm.x86.mmx.pcmpgt.d"]
143+
fn pcmpgtd(a: __m64, b: __m64) -> __m64;
112144
#[link_name = "llvm.x86.mmx.punpckhwd"]
113145
fn punpckhwd(a: __m64, b: __m64) -> __m64;
114-
#[link_name = "llvm.x86.mmx.punpcklbw"]
115-
fn punpcklbw(a: __m64, b: __m64) -> __m64;
116146
#[link_name = "llvm.x86.mmx.punpcklwd"]
117147
fn punpcklwd(a: __m64, b: __m64) -> __m64;
148+
#[link_name = "llvm.x86.mmx.punpckhbw"]
149+
fn punpckhbw(a: __m64, b: __m64) -> __m64;
150+
#[link_name = "llvm.x86.mmx.punpcklbw"]
151+
fn punpcklbw(a: __m64, b: __m64) -> __m64;
152+
#[link_name = "llvm.x86.mmx.punpckhdq"]
153+
fn punpckhdq(a: __m64, b: __m64) -> __m64;
154+
#[link_name = "llvm.x86.mmx.punpckldq"]
155+
fn punpckldq(a: __m64, b: __m64) -> __m64;
118156
}
119157

120158
#[cfg(test)]
@@ -123,21 +161,21 @@ mod tests {
123161
use x86::i686::mmx;
124162
use stdsimd_test::simd_test;
125163

126-
#[simd_test = "sse"] // FIXME: should be mmx
164+
#[simd_test = "mmx"]
127165
unsafe fn _mm_setzero_si64() {
128166
let r: __m64 = ::std::mem::transmute(0_i64);
129167
assert_eq!(r, mmx::_mm_setzero_si64());
130168
}
131169

132-
#[simd_test = "sse"] // FIXME: should be mmx
170+
#[simd_test = "mmx"]
133171
unsafe fn _mm_packs_pi16() {
134172
let a = i16x4::new(-1, 2, -3, 4);
135173
let b = i16x4::new(-5, 6, -7, 8);
136174
let r = i8x8::new(-1, 2, -3, 4, -5, 6, -7, 8);
137175
assert_eq!(r, mmx::_mm_packs_pi16(a, b));
138176
}
139177

140-
#[simd_test = "sse"] // FIXME: should be mmx
178+
#[simd_test = "mmx"]
141179
unsafe fn _mm_packs_pi32() {
142180
let a = i32x2::new(-1, 2);
143181
let b = i32x2::new(-5, 6);
@@ -147,41 +185,128 @@ mod tests {
147185

148186
#[simd_test = "mmx"]
149187
unsafe fn _mm_cmpgt_pi8() {
150-
let a = i8x8::new(0, 1, 2, 3, 4, 5, 6, 7);
151-
let b = i8x8::new(8, 7, 6, 5, 4, 3, 2, 1);
152-
let r = i8x8::new(0, 0, 0, 0, 0, -1, -1, -1);
153-
assert_eq!(r, mmx::_mm_cmpgt_pi8(a, b));
188+
{
189+
let a = i8x8::new(0, 1, 2, 3, 4, 5, 6, 7);
190+
let b = i8x8::new(8, 7, 6, 5, 4, 3, 2, 1);
191+
let r = i8x8::new(0, 0, 0, 0, 0, -1, -1, -1);
192+
assert_eq!(r, mmx::_mm_cmpgt_pi8(a, b));
193+
}
194+
{
195+
let a = i8x8::new(0, 3, 4, 7, 8, 11, 12, 15);
196+
let b = i8x8::new(1, 2, 5, 6, 9, 10, 13, 14);
197+
let r0 = i8x8::new(0, -1, 0, -1, 0, -1, 0, -1);
198+
let r1 = i8x8::new(-1, 0, -1, 0, -1, 0, -1, 0);
199+
200+
assert_eq!(r0, mmx::_mm_cmpgt_pi8(a, b));
201+
assert_eq!(r1, mmx::_mm_cmpgt_pi8(b, a));
202+
}
154203
}
155204

156205
#[simd_test = "mmx"]
157206
unsafe fn _mm_cmpgt_pi16() {
158-
let a = i16x4::new(0, 1, 2, 3);
159-
let b = i16x4::new(4, 3, 2, 1);
160-
let r = i16x4::new(0, 0, 0, -1);
161-
assert_eq!(r, mmx::_mm_cmpgt_pi16(a, b));
207+
{
208+
let a = i16x4::new(0, 1, 2, 3);
209+
let b = i16x4::new(4, 3, 2, 1);
210+
let r = i16x4::new(0, 0, 0, -1);
211+
assert_eq!(r, mmx::_mm_cmpgt_pi16(a, b));
212+
}
213+
{
214+
let a = i16x4::new(0, 3, 4, 7);
215+
let b = i16x4::new(1, 2, 5, 6);
216+
let r0 = i16x4::new(0, -1, 0, -1);
217+
let r1 = i16x4::new(-1, 0, -1, 0);
218+
219+
assert_eq!(r0, mmx::_mm_cmpgt_pi16(a, b));
220+
assert_eq!(r1, mmx::_mm_cmpgt_pi16(b, a));
221+
}
162222
}
163223

164224
#[simd_test = "mmx"]
165-
unsafe fn _mm_unpackhi_pi16() {
166-
let a = i16x4::new(0, 1, 2, 3);
167-
let b = i16x4::new(4, 5, 6, 7);
168-
let r = i16x4::new(2, 6, 3, 7);
169-
assert_eq!(r, mmx::_mm_unpackhi_pi16(a, b));
225+
unsafe fn _mm_cmpgt_pi32() {
226+
let a = i32x2::new(0, 3);
227+
let b = i32x2::new(1, 2);
228+
let r0 = i32x2::new(0, -1);
229+
let r1 = i32x2::new(-1, 0);
230+
231+
assert_eq!(r0, mmx::_mm_cmpgt_pi32(a, b));
232+
assert_eq!(r1, mmx::_mm_cmpgt_pi32(b, a));
233+
}
234+
235+
#[simd_test = "mmx"]
236+
unsafe fn _mm_unpackhi_pi8() {
237+
let a = i8x8::new(0, 3, 4, 7, 8, 11, 12, 15);
238+
let b = i8x8::new(1, 2, 5, 6, 9, 10, 13, 14);
239+
let r = i8x8::new(8, 9, 11, 10, 12, 13, 15, 14);
240+
241+
assert_eq!(r, mmx::_mm_unpackhi_pi8(a, b));
170242
}
171243

172244
#[simd_test = "mmx"]
173245
unsafe fn _mm_unpacklo_pi8() {
174-
let a = i8x8::new(0, 1, 2, 3, 4, 5, 6, 7);
175-
let b = i8x8::new(8, 9, 10, 11, 12, 13, 14, 15);
176-
let r = i8x8::new(0, 8, 1, 9, 2, 10, 3, 11);
177-
assert_eq!(r, mmx::_mm_unpacklo_pi8(a, b));
246+
{
247+
let a = i8x8::new(0, 1, 2, 3, 4, 5, 6, 7);
248+
let b = i8x8::new(8, 9, 10, 11, 12, 13, 14, 15);
249+
let r = i8x8::new(0, 8, 1, 9, 2, 10, 3, 11);
250+
assert_eq!(r, mmx::_mm_unpacklo_pi8(a, b));
251+
}
252+
{
253+
let a = i8x8::new(0, 3, 4, 7, 8, 11, 12, 15);
254+
let b = i8x8::new(1, 2, 5, 6, 9, 10, 13, 14);
255+
let r = i8x8::new(0, 1, 3, 2, 4, 5, 7, 6);
256+
assert_eq!(r, mmx::_mm_unpacklo_pi8(a, b));
257+
}
258+
}
259+
260+
#[simd_test = "mmx"]
261+
unsafe fn _mm_unpackhi_pi16() {
262+
{
263+
let a = i16x4::new(0, 1, 2, 3);
264+
let b = i16x4::new(4, 5, 6, 7);
265+
let r = i16x4::new(2, 6, 3, 7);
266+
assert_eq!(r, mmx::_mm_unpackhi_pi16(a, b));
267+
}
268+
{
269+
let a = i16x4::new(0, 3, 4, 7);
270+
let b = i16x4::new(1, 2, 5, 6);
271+
let r = i16x4::new(4, 5, 7, 6);
272+
273+
assert_eq!(r, mmx::_mm_unpackhi_pi16(a, b));
274+
}
178275
}
179276

180277
#[simd_test = "mmx"]
181278
unsafe fn _mm_unpacklo_pi16() {
182-
let a = i16x4::new(0, 1, 2, 3);
183-
let b = i16x4::new(4, 5, 6, 7);
184-
let r = i16x4::new(0, 4, 1, 5);
185-
assert_eq!(r, mmx::_mm_unpacklo_pi16(a, b));
279+
{
280+
let a = i16x4::new(0, 1, 2, 3);
281+
let b = i16x4::new(4, 5, 6, 7);
282+
let r = i16x4::new(0, 4, 1, 5);
283+
assert_eq!(r, mmx::_mm_unpacklo_pi16(a, b));
284+
}
285+
{
286+
let a = i16x4::new(0, 3, 4, 7);
287+
let b = i16x4::new(1, 2, 5, 6);
288+
let r = i16x4::new(0, 1, 3, 2);
289+
290+
assert_eq!(r, mmx::_mm_unpacklo_pi16(a, b));
291+
}
186292
}
293+
294+
#[simd_test = "mmx"]
295+
unsafe fn _mm_unpackhi_pi32() {
296+
let a = i32x2::new(0, 3);
297+
let b = i32x2::new(1, 2);
298+
let r = i32x2::new(3, 2);
299+
300+
assert_eq!(r, mmx::_mm_unpackhi_pi32(a, b));
301+
}
302+
303+
#[simd_test = "mmx"]
304+
unsafe fn _mm_unpacklo_pi32() {
305+
let a = i32x2::new(0, 3);
306+
let b = i32x2::new(1, 2);
307+
let r = i32x2::new(0, 1);
308+
309+
assert_eq!(r, mmx::_mm_unpacklo_pi32(a, b));
310+
}
311+
187312
}

0 commit comments

Comments
 (0)