Skip to content

Commit 875c926

Browse files
committed
Completes SSE and adds some MMX intrinsics
MMX: - `_mm_cmpgt_pi{8,16,32}` - `_mm_unpack{hi,lo}_pi{8,16,32}` SSE (is now complete): - `_mm_cvtp{i,u}{8,16}_ps` - add test for `_m_pmulhuw`
1 parent 43039ef commit 875c926

File tree

8 files changed

+344
-226
lines changed

8 files changed

+344
-226
lines changed

Diff for: coresimd/src/x86/i586/sse.rs

+2-1
Original file line numberDiff line numberDiff line change
@@ -3299,7 +3299,8 @@ mod tests {
32993299
use v64::*;
33003300

33013301
let a = mem::transmute(i8x8::new(0, 0, 0, 0, 0, 0, 0, 7));
3302-
let mut mem = ::std::boxed::Box::<__m64>::new(mem::transmute(i8x8::splat(1)));
3302+
let mut mem =
3303+
::std::boxed::Box::<__m64>::new(mem::transmute(i8x8::splat(1)));
33033304
sse::_mm_stream_pi(&mut *mem as *mut _ as *mut _, a);
33043305
assert_eq!(a, *mem);
33053306
}

Diff for: coresimd/src/x86/i686/mmx.rs

+106-29
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ use stdsimd_test::assert_instr;
1616

1717
/// Constructs a 64-bit integer vector initialized to zero.
1818
#[inline(always)]
19-
#[target_feature = "+mmx,+sse"]
19+
#[target_feature = "+mmx"]
2020
// FIXME: this produces a movl instead of xorps on x86
2121
// FIXME: this produces a xor intrinsic instead of xorps on x86_64
2222
#[cfg_attr(all(test, target_arch = "x86_64"), assert_instr(xor))]
@@ -30,7 +30,7 @@ pub unsafe fn _mm_setzero_si64() -> __m64 {
3030
/// Positive values greater than 0x7F are saturated to 0x7F. Negative values
3131
/// less than 0x80 are saturated to 0x80.
3232
#[inline(always)]
33-
#[target_feature = "+mmx,+sse"]
33+
#[target_feature = "+mmx"]
3434
#[cfg_attr(test, assert_instr(packsswb))]
3535
pub unsafe fn _mm_packs_pi16(a: __m64, b: __m64) -> __m64 {
3636
packsswb(a, b)
@@ -42,63 +42,94 @@ pub unsafe fn _mm_packs_pi16(a: __m64, b: __m64) -> __m64 {
4242
/// Positive values greater than 0x7F are saturated to 0x7F. Negative values
4343
/// less than 0x80 are saturated to 0x80.
4444
#[inline(always)]
45-
#[target_feature = "+mmx,+sse"]
45+
#[target_feature = "+mmx"]
4646
#[cfg_attr(test, assert_instr(packssdw))]
4747
pub unsafe fn _mm_packs_pi32(a: __m64, b: __m64) -> __m64 {
4848
packssdw(a, b)
4949
}
5050

51-
/// Compares the 8-bit integer elements of two 64-bit integer vectors of
52-
/// [8 x i8] to determine if the element of the first vector is greater than
53-
/// the corresponding element of the second vector.
54-
///
55-
/// The comparison yields 0 for false, 0xFF for true.
51+
/// Compares whether each element of `a` is greater than the corresponding
52+
/// element of `b` returning `0` for `false` and `-1` for `true`.
5653
#[inline(always)]
5754
#[target_feature = "+mmx"]
5855
#[cfg_attr(test, assert_instr(pcmpgtb))]
5956
pub unsafe fn _mm_cmpgt_pi8(a: __m64, b: __m64) -> __m64 {
6057
pcmpgtb(a, b)
6158
}
6259

63-
/// Compares the 16-bit integer elements of two 64-bit integer vectors of
64-
/// [4 x i16] to determine if the element of the first vector is greater than
65-
/// the corresponding element of the second vector.
66-
///
67-
/// The comparison yields 0 for false, 0xFFFF for true.
60+
/// Compares whether each element of `a` is greater than the corresponding
61+
/// element of `b` returning `0` for `false` and `-1` for `true`.
6862
#[inline(always)]
6963
#[target_feature = "+mmx"]
7064
#[cfg_attr(test, assert_instr(pcmpgtw))]
7165
pub unsafe fn _mm_cmpgt_pi16(a: __m64, b: __m64) -> __m64 {
7266
pcmpgtw(a, b)
7367
}
7468

75-
/// Unpacks the upper 32 bits from two 64-bit integer vectors of
76-
/// [4 x i16] and interleaves them into a 64-bit integer vector of [4 x i16].
69+
/// Compares whether each element of `a` is greater than the corresponding
70+
/// element of `b` returning `0` for `false` and `-1` for `true`.
71+
#[inline(always)]
72+
#[target_feature = "+mmx"]
73+
#[cfg_attr(test, assert_instr(pcmpgtd))]
74+
pub unsafe fn _mm_cmpgt_pi32(a: __m64, b: __m64) -> __m64 {
75+
pcmpgtd(a, b)
76+
}
77+
78+
79+
/// Unpacks the upper two elements from two `i16x4` vectors and interleaves
80+
/// them into the result: `[a.2, b.2, a.3, b.3]`.
7781
#[inline(always)]
7882
#[target_feature = "+mmx"]
7983
#[cfg_attr(test, assert_instr(punpckhwd))] // FIXME punpcklbw expected
8084
pub unsafe fn _mm_unpackhi_pi16(a: __m64, b: __m64) -> __m64 {
8185
punpckhwd(a, b)
8286
}
8387

84-
/// Unpacks the lower 32 bits from two 64-bit integer vectors of [8 x i8]
85-
/// and interleaves them into a 64-bit integer vector of [8 x i8].
88+
/// Unpacks the upper four elements from two `i8x8` vectors and interleaves
89+
/// them into the result: `[a.4, b.4, a.5, b.5, a.6, b.6, a.7, b.7]`.
90+
#[inline(always)]
91+
#[target_feature = "+mmx"]
92+
#[cfg_attr(test, assert_instr(punpckhbw))]
93+
pub unsafe fn _mm_unpackhi_pi8(a: __m64, b: __m64) -> __m64 {
94+
punpckhbw(a, b)
95+
}
96+
97+
/// Unpacks the lower four elements from two `i8x8` vectors and interleaves
98+
/// them into the result: `[a.0, b.0, a.1, b.1, a.2, b.2, a.3, b.3]`.
8699
#[inline(always)]
87100
#[target_feature = "+mmx"]
88101
#[cfg_attr(test, assert_instr(punpcklbw))]
89102
pub unsafe fn _mm_unpacklo_pi8(a: __m64, b: __m64) -> __m64 {
90103
punpcklbw(a, b)
91104
}
92105

93-
/// Unpacks the lower 32 bits from two 64-bit integer vectors of
94-
/// [4 x i16] and interleaves them into a 64-bit integer vector of [4 x i16].
106+
/// Unpacks the lower two elements from two `i16x4` vectors and interleaves
107+
/// them into the result: `[a.0 b.0 a.1 b.1]`.
95108
#[inline(always)]
96109
#[target_feature = "+mmx"]
97110
#[cfg_attr(test, assert_instr(punpcklwd))]
98111
pub unsafe fn _mm_unpacklo_pi16(a: __m64, b: __m64) -> __m64 {
99112
punpcklwd(a, b)
100113
}
101114

115+
/// Unpacks the upper element from two `i32x2` vectors and interleaves them
116+
/// into the result: `[a.1, b.1]`.
117+
#[inline(always)]
118+
#[target_feature = "+mmx"]
119+
#[cfg_attr(test, assert_instr(punpckhdq))]
120+
pub unsafe fn _mm_unpackhi_pi32(a: __m64, b: __m64) -> __m64 {
121+
punpckhdq(a, b)
122+
}
123+
124+
/// Unpacks the lower element from two `i32x2` vectors and interleaves them
125+
/// into the result: `[a.0, b.0]`.
126+
#[inline(always)]
127+
#[target_feature = "+mmx"]
128+
#[cfg_attr(test, assert_instr(punpckldq))]
129+
pub unsafe fn _mm_unpacklo_pi32(a: __m64, b: __m64) -> __m64 {
130+
punpckldq(a, b)
131+
}
132+
102133
#[allow(improper_ctypes)]
103134
extern "C" {
104135
#[link_name = "llvm.x86.mmx.packsswb"]
@@ -109,12 +140,20 @@ extern "C" {
109140
fn pcmpgtb(a: __m64, b: __m64) -> __m64;
110141
#[link_name = "llvm.x86.mmx.pcmpgt.w"]
111142
fn pcmpgtw(a: __m64, b: __m64) -> __m64;
143+
#[link_name = "llvm.x86.mmx.pcmpgt.d"]
144+
fn pcmpgtd(a: __m64, b: __m64) -> __m64;
112145
#[link_name = "llvm.x86.mmx.punpckhwd"]
113146
fn punpckhwd(a: __m64, b: __m64) -> __m64;
114-
#[link_name = "llvm.x86.mmx.punpcklbw"]
115-
fn punpcklbw(a: __m64, b: __m64) -> __m64;
116147
#[link_name = "llvm.x86.mmx.punpcklwd"]
117148
fn punpcklwd(a: __m64, b: __m64) -> __m64;
149+
#[link_name = "llvm.x86.mmx.punpckhbw"]
150+
fn punpckhbw(a: __m64, b: __m64) -> __m64;
151+
#[link_name = "llvm.x86.mmx.punpcklbw"]
152+
fn punpcklbw(a: __m64, b: __m64) -> __m64;
153+
#[link_name = "llvm.x86.mmx.punpckhdq"]
154+
fn punpckhdq(a: __m64, b: __m64) -> __m64;
155+
#[link_name = "llvm.x86.mmx.punpckldq"]
156+
fn punpckldq(a: __m64, b: __m64) -> __m64;
118157
}
119158

120159
#[cfg(test)]
@@ -123,21 +162,21 @@ mod tests {
123162
use x86::i686::mmx;
124163
use stdsimd_test::simd_test;
125164

126-
#[simd_test = "sse"] // FIXME: should be mmx
165+
#[simd_test = "mmx"]
127166
unsafe fn _mm_setzero_si64() {
128167
let r: __m64 = ::std::mem::transmute(0_i64);
129168
assert_eq!(r, mmx::_mm_setzero_si64());
130169
}
131170

132-
#[simd_test = "sse"] // FIXME: should be mmx
171+
#[simd_test = "mmx"]
133172
unsafe fn _mm_packs_pi16() {
134173
let a = i16x4::new(-1, 2, -3, 4);
135174
let b = i16x4::new(-5, 6, -7, 8);
136175
let r = i8x8::new(-1, 2, -3, 4, -5, 6, -7, 8);
137176
assert_eq!(r, i8x8::from(mmx::_mm_packs_pi16(a.into(), b.into())));
138177
}
139178

140-
#[simd_test = "sse"] // FIXME: should be mmx
179+
#[simd_test = "mmx"]
141180
unsafe fn _mm_packs_pi32() {
142181
let a = i32x2::new(-1, 2);
143182
let b = i32x2::new(-5, 6);
@@ -162,11 +201,23 @@ mod tests {
162201
}
163202

164203
#[simd_test = "mmx"]
165-
unsafe fn _mm_unpackhi_pi16() {
166-
let a = i16x4::new(0, 1, 2, 3);
167-
let b = i16x4::new(4, 5, 6, 7);
168-
let r = i16x4::new(2, 6, 3, 7);
169-
assert_eq!(r, i16x4::from(mmx::_mm_unpackhi_pi16(a.into(), b.into())));
204+
unsafe fn _mm_cmpgt_pi32() {
205+
let a = i32x2::new(0, 3);
206+
let b = i32x2::new(1, 2);
207+
let r0 = i32x2::new(0, -1);
208+
let r1 = i32x2::new(-1, 0);
209+
210+
assert_eq!(r0, mmx::_mm_cmpgt_pi32(a.into(), b.into()).into());
211+
assert_eq!(r1, mmx::_mm_cmpgt_pi32(b.into(), a.into()).into());
212+
}
213+
214+
#[simd_test = "mmx"]
215+
unsafe fn _mm_unpackhi_pi8() {
216+
let a = i8x8::new(0, 3, 4, 7, 8, 11, 12, 15);
217+
let b = i8x8::new(1, 2, 5, 6, 9, 10, 13, 14);
218+
let r = i8x8::new(8, 9, 11, 10, 12, 13, 15, 14);
219+
220+
assert_eq!(r, mmx::_mm_unpackhi_pi8(a.into(), b.into()).into());
170221
}
171222

172223
#[simd_test = "mmx"]
@@ -177,11 +228,37 @@ mod tests {
177228
assert_eq!(r, i8x8::from(mmx::_mm_unpacklo_pi8(a.into(), b.into())));
178229
}
179230

231+
#[simd_test = "mmx"]
232+
unsafe fn _mm_unpackhi_pi16() {
233+
let a = i16x4::new(0, 1, 2, 3);
234+
let b = i16x4::new(4, 5, 6, 7);
235+
let r = i16x4::new(2, 6, 3, 7);
236+
assert_eq!(r, i16x4::from(mmx::_mm_unpackhi_pi16(a.into(), b.into())));
237+
}
238+
180239
#[simd_test = "mmx"]
181240
unsafe fn _mm_unpacklo_pi16() {
182241
let a = i16x4::new(0, 1, 2, 3);
183242
let b = i16x4::new(4, 5, 6, 7);
184243
let r = i16x4::new(0, 4, 1, 5);
185244
assert_eq!(r, i16x4::from(mmx::_mm_unpacklo_pi16(a.into(), b.into())));
186245
}
246+
247+
#[simd_test = "mmx"]
248+
unsafe fn _mm_unpackhi_pi32() {
249+
let a = i32x2::new(0, 3);
250+
let b = i32x2::new(1, 2);
251+
let r = i32x2::new(3, 2);
252+
253+
assert_eq!(r, mmx::_mm_unpackhi_pi32(a.into(), b.into()).into());
254+
}
255+
256+
#[simd_test = "mmx"]
257+
unsafe fn _mm_unpacklo_pi32() {
258+
let a = i32x2::new(0, 3);
259+
let b = i32x2::new(1, 2);
260+
let r = i32x2::new(0, 1);
261+
262+
assert_eq!(r, mmx::_mm_unpacklo_pi32(a.into(), b.into()).into());
263+
}
187264
}

Diff for: coresimd/src/x86/i686/sse.rs

+32-25
Original file line numberDiff line numberDiff line change
@@ -221,10 +221,31 @@ pub unsafe fn _mm_cvt_pi2ps(a: f32x4, b: i32x2) -> f32x4 {
221221
_mm_cvtpi32_ps(a, b)
222222
}
223223

224-
/// Converts a 64-bit vector of [4 x i16] into a 128-bit vector of [4 x
225-
/// float].
224+
/// Converts the lower 4 8-bit values of `a` into a 128-bit vector of 4 `f32`s.
226225
#[inline(always)]
227226
#[target_feature = "+sse"]
227+
#[cfg_attr(test, assert_instr(cvtpi2ps))]
228+
pub unsafe fn _mm_cvtpi8_ps(a: __m64) -> f32x4 {
229+
let b = mmx::_mm_setzero_si64();
230+
let b = mmx::_mm_cmpgt_pi8(b, a);
231+
let b = mmx::_mm_unpacklo_pi8(a, b);
232+
_mm_cvtpi16_ps(b)
233+
}
234+
235+
/// Converts the lower 4 8-bit values of `a` into a 128-bit vector of 4 `f32`s.
236+
#[inline(always)]
237+
#[target_feature = "+sse"]
238+
#[cfg_attr(test, assert_instr(cvtpi2ps))]
239+
pub unsafe fn _mm_cvtpu8_ps(a: __m64) -> f32x4 {
240+
let b = mmx::_mm_setzero_si64();
241+
let b = mmx::_mm_unpacklo_pi8(a, b);
242+
_mm_cvtpi16_ps(b)
243+
}
244+
245+
/// Converts a 64-bit vector of `i16`s into a 128-bit vector of 4 `f32`s.
246+
#[inline(always)]
247+
#[target_feature = "+sse"]
248+
#[cfg_attr(test, assert_instr(cvtpi2ps))]
228249
pub unsafe fn _mm_cvtpi16_ps(a: __m64) -> f32x4 {
229250
let b = mmx::_mm_setzero_si64();
230251
let b = mmx::_mm_cmpgt_pi16(mem::transmute(b), a);
@@ -236,10 +257,10 @@ pub unsafe fn _mm_cvtpi16_ps(a: __m64) -> f32x4 {
236257
cvtpi2ps(r, mem::transmute(c))
237258
}
238259

239-
/// Converts a 64-bit vector of 16-bit unsigned integer values into a
240-
/// 128-bit vector of [4 x float].
260+
/// Converts a 64-bit vector of `i16`s into a 128-bit vector of 4 `f32`s.
241261
#[inline(always)]
242262
#[target_feature = "+sse"]
263+
#[cfg_attr(test, assert_instr(cvtpi2ps))]
243264
pub unsafe fn _mm_cvtpu16_ps(a: __m64) -> f32x4 {
244265
let b = mmx::_mm_setzero_si64();
245266
let c = mmx::_mm_unpackhi_pi16(a, b);
@@ -250,27 +271,6 @@ pub unsafe fn _mm_cvtpu16_ps(a: __m64) -> f32x4 {
250271
cvtpi2ps(r, c)
251272
}
252273

253-
/// Converts the lower four 8-bit values from a 64-bit vector of [8 x i8]
254-
/// into a 128-bit vector of [4 x float].
255-
#[inline(always)]
256-
#[target_feature = "+sse"]
257-
pub unsafe fn _mm_cvtpi8_ps(a: __m64) -> f32x4 {
258-
let b = mmx::_mm_setzero_si64();
259-
let b = mmx::_mm_cmpgt_pi8(b, a);
260-
let b = mmx::_mm_unpacklo_pi8(a, b);
261-
_mm_cvtpi16_ps(b)
262-
}
263-
264-
/// Converts the lower four unsigned 8-bit integer values from a 64-bit
265-
/// vector of [8 x u8] into a 128-bit vector of [4 x float].
266-
#[inline(always)]
267-
#[target_feature = "+sse"]
268-
pub unsafe fn _mm_cvtpu8_ps(a: __m64) -> f32x4 {
269-
let b = mmx::_mm_setzero_si64();
270-
let b = mmx::_mm_unpacklo_pi8(a, b);
271-
_mm_cvtpi16_ps(b)
272-
}
273-
274274
/// Converts the two 32-bit signed integer values from each 64-bit vector
275275
/// operand of [2 x i32] into a 128-bit vector of [4 x float].
276276
#[inline(always)]
@@ -512,6 +512,13 @@ mod tests {
512512
assert_eq!(r, u16x4::splat(15));
513513
}
514514

515+
#[simd_test = "sse"]
516+
unsafe fn _m_pmulhuw() {
517+
let (a, b) = (u16x4::splat(1000), u16x4::splat(1001));
518+
let r = sse::_m_pmulhuw(a.into(), b.into());
519+
assert_eq!(r, u16x4::splat(15).into());
520+
}
521+
515522
#[simd_test = "sse"]
516523
unsafe fn _mm_avg_pu8() {
517524
let (a, b) = (u8x8::splat(3), u8x8::splat(9));

0 commit comments

Comments
 (0)