Skip to content

Commit

Permalink
Make rustc's job a little esaier in sse42 (rust-lang#277)
Browse files Browse the repository at this point in the history
Move all the casts from `__m128i` to `i8x16` outside the macro invocations so
rustc only has to resolve a few function calls, not thousands!
  • Loading branch information
alexcrichton authored Jan 12, 2018
1 parent 94bee32 commit 5cf1db0
Showing 1 changed file with 44 additions and 19 deletions.
63 changes: 44 additions & 19 deletions coresimd/src/x86/i586/sse42.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
use stdsimd_test::assert_instr;

use v128::*;
use x86::*;

/// String contains unsigned 8-bit characters *(Default)*
pub const _SIDD_UBYTE_OPS: i32 = 0b0000_0000;
Expand Down Expand Up @@ -51,10 +52,12 @@ pub const _SIDD_UNIT_MASK: i32 = 0b0100_0000;
#[target_feature = "+sse4.2"]
#[cfg_attr(test, assert_instr(pcmpistrm, imm8 = 0))]
pub unsafe fn _mm_cmpistrm(a: __m128i, b: __m128i, imm8: i32) -> __m128i {
let a = a.as_i8x16();
let b = b.as_i8x16();
macro_rules! call {
($imm8:expr) => { __m128i::from(pcmpistrm128(i8x16::from(a), i8x16::from(b), $imm8)) }
($imm8:expr) => { pcmpistrm128(a, b, $imm8) }
}
constify_imm8!(imm8, call)
mem::transmute(constify_imm8!(imm8, call))
}

/// Compare packed strings with implicit lengths in `a` and `b` using the
Expand Down Expand Up @@ -271,8 +274,10 @@ pub unsafe fn _mm_cmpistrm(a: __m128i, b: __m128i, imm8: i32) -> __m128i {
#[target_feature = "+sse4.2"]
#[cfg_attr(test, assert_instr(pcmpistri, imm8 = 0))]
pub unsafe fn _mm_cmpistri(a: __m128i, b: __m128i, imm8: i32) -> i32 {
let a = a.as_i8x16();
let b = b.as_i8x16();
macro_rules! call {
($imm8:expr) => { pcmpistri128(i8x16::from(a), i8x16::from(b), $imm8) }
($imm8:expr) => { pcmpistri128(a, b, $imm8) }
}
constify_imm8!(imm8, call)
}
Expand All @@ -284,10 +289,10 @@ pub unsafe fn _mm_cmpistri(a: __m128i, b: __m128i, imm8: i32) -> i32 {
#[target_feature = "+sse4.2"]
#[cfg_attr(test, assert_instr(pcmpistri, imm8 = 0))]
pub unsafe fn _mm_cmpistrz(a: __m128i, b: __m128i, imm8: i32) -> i32 {
let a = a.as_i8x16();
let b = b.as_i8x16();
macro_rules! call {
($imm8:expr) => { pcmpistriz128(i8x16::from(a),
i8x16::from(b),
$imm8) }
($imm8:expr) => { pcmpistriz128(a, b, $imm8) }
}
constify_imm8!(imm8, call)
}
Expand All @@ -299,8 +304,10 @@ pub unsafe fn _mm_cmpistrz(a: __m128i, b: __m128i, imm8: i32) -> i32 {
#[target_feature = "+sse4.2"]
#[cfg_attr(test, assert_instr(pcmpistri, imm8 = 0))]
pub unsafe fn _mm_cmpistrc(a: __m128i, b: __m128i, imm8: i32) -> i32 {
let a = a.as_i8x16();
let b = b.as_i8x16();
macro_rules! call {
($imm8:expr) => { pcmpistric128(i8x16::from(a), i8x16::from(b), $imm8) }
($imm8:expr) => { pcmpistric128(a, b, $imm8) }
}
constify_imm8!(imm8, call)
}
Expand All @@ -312,8 +319,10 @@ pub unsafe fn _mm_cmpistrc(a: __m128i, b: __m128i, imm8: i32) -> i32 {
#[target_feature = "+sse4.2"]
#[cfg_attr(test, assert_instr(pcmpistri, imm8 = 0))]
pub unsafe fn _mm_cmpistrs(a: __m128i, b: __m128i, imm8: i32) -> i32 {
let a = a.as_i8x16();
let b = b.as_i8x16();
macro_rules! call {
($imm8:expr) => { pcmpistris128(i8x16::from(a), i8x16::from(b), $imm8) }
($imm8:expr) => { pcmpistris128(a, b, $imm8) }
}
constify_imm8!(imm8, call)
}
Expand All @@ -324,8 +333,10 @@ pub unsafe fn _mm_cmpistrs(a: __m128i, b: __m128i, imm8: i32) -> i32 {
#[target_feature = "+sse4.2"]
#[cfg_attr(test, assert_instr(pcmpistri, imm8 = 0))]
pub unsafe fn _mm_cmpistro(a: __m128i, b: __m128i, imm8: i32) -> i32 {
let a = a.as_i8x16();
let b = b.as_i8x16();
macro_rules! call {
($imm8:expr) => { pcmpistrio128(i8x16::from(a), i8x16::from(b), $imm8) }
($imm8:expr) => { pcmpistrio128(a, b, $imm8) }
}
constify_imm8!(imm8, call)
}
Expand All @@ -337,8 +348,10 @@ pub unsafe fn _mm_cmpistro(a: __m128i, b: __m128i, imm8: i32) -> i32 {
#[target_feature = "+sse4.2"]
#[cfg_attr(test, assert_instr(pcmpistri, imm8 = 0))]
pub unsafe fn _mm_cmpistra(a: __m128i, b: __m128i, imm8: i32) -> i32 {
let a = a.as_i8x16();
let b = b.as_i8x16();
macro_rules! call {
($imm8:expr) => { pcmpistria128(i8x16::from(a), i8x16::from(b), $imm8) }
($imm8:expr) => { pcmpistria128(a, b, $imm8) }
}
constify_imm8!(imm8, call)
}
Expand All @@ -351,10 +364,10 @@ pub unsafe fn _mm_cmpistra(a: __m128i, b: __m128i, imm8: i32) -> i32 {
pub unsafe fn _mm_cmpestrm(
a: __m128i, la: i32, b: __m128i, lb: i32, imm8: i32
) -> __m128i {
let a = a.as_i8x16();
let b = b.as_i8x16();
macro_rules! call {
($imm8:expr) => { __m128i::from(pcmpestrm128(i8x16::from(a), la,
i8x16::from(b), lb,
$imm8)) }
($imm8:expr) => { __m128i::from(pcmpestrm128(a, la, b, lb, $imm8)) }
}
constify_imm8!(imm8, call)
}
Expand Down Expand Up @@ -447,8 +460,10 @@ pub unsafe fn _mm_cmpestrm(
pub unsafe fn _mm_cmpestri(
a: __m128i, la: i32, b: __m128i, lb: i32, imm8: i32
) -> i32 {
let a = a.as_i8x16();
let b = b.as_i8x16();
macro_rules! call {
($imm8:expr) => { pcmpestri128(i8x16::from(a), la, i8x16::from(b), lb, $imm8) }
($imm8:expr) => { pcmpestri128(a, la, b, lb, $imm8) }
}
constify_imm8!(imm8, call)
}
Expand All @@ -462,8 +477,10 @@ pub unsafe fn _mm_cmpestri(
pub unsafe fn _mm_cmpestrz(
a: __m128i, la: i32, b: __m128i, lb: i32, imm8: i32
) -> i32 {
let a = a.as_i8x16();
let b = b.as_i8x16();
macro_rules! call {
($imm8:expr) => { pcmpestriz128(i8x16::from(a), la, i8x16::from(b), lb, $imm8) }
($imm8:expr) => { pcmpestriz128(a, la, b, lb, $imm8) }
}
constify_imm8!(imm8, call)
}
Expand All @@ -477,8 +494,10 @@ pub unsafe fn _mm_cmpestrz(
pub unsafe fn _mm_cmpestrc(
a: __m128i, la: i32, b: __m128i, lb: i32, imm8: i32
) -> i32 {
let a = a.as_i8x16();
let b = b.as_i8x16();
macro_rules! call {
($imm8:expr) => { pcmpestric128(i8x16::from(a), la, i8x16::from(b), lb, $imm8) }
($imm8:expr) => { pcmpestric128(a, la, b, lb, $imm8) }
}
constify_imm8!(imm8, call)
}
Expand All @@ -492,8 +511,10 @@ pub unsafe fn _mm_cmpestrc(
pub unsafe fn _mm_cmpestrs(
a: __m128i, la: i32, b: __m128i, lb: i32, imm8: i32
) -> i32 {
let a = a.as_i8x16();
let b = b.as_i8x16();
macro_rules! call {
($imm8:expr) => { pcmpestris128(i8x16::from(a), la, i8x16::from(b), lb, $imm8) }
($imm8:expr) => { pcmpestris128(a, la, b, lb, $imm8) }
}
constify_imm8!(imm8, call)
}
Expand All @@ -507,8 +528,10 @@ pub unsafe fn _mm_cmpestrs(
pub unsafe fn _mm_cmpestro(
a: __m128i, la: i32, b: __m128i, lb: i32, imm8: i32
) -> i32 {
let a = a.as_i8x16();
let b = b.as_i8x16();
macro_rules! call {
($imm8:expr) => { pcmpestrio128(i8x16::from(a), la, i8x16::from(b), lb, $imm8) }
($imm8:expr) => { pcmpestrio128(a, la, b, lb, $imm8) }
}
constify_imm8!(imm8, call)
}
Expand All @@ -523,8 +546,10 @@ pub unsafe fn _mm_cmpestro(
pub unsafe fn _mm_cmpestra(
a: __m128i, la: i32, b: __m128i, lb: i32, imm8: i32
) -> i32 {
let a = a.as_i8x16();
let b = b.as_i8x16();
macro_rules! call {
($imm8:expr) => { pcmpestria128(i8x16::from(a), la, i8x16::from(b), lb, $imm8) }
($imm8:expr) => { pcmpestria128(a, la, b, lb, $imm8) }
}
constify_imm8!(imm8, call)
}
Expand Down

0 comments on commit 5cf1db0

Please sign in to comment.