diff --git a/coresimd/src/lib.rs b/coresimd/src/lib.rs index d244a3c8c3..fa4820d16b 100644 --- a/coresimd/src/lib.rs +++ b/coresimd/src/lib.rs @@ -14,8 +14,8 @@ #![feature(const_fn, link_llvm_intrinsics, platform_intrinsics, repr_simd, simd_ffi, target_feature, cfg_target_feature, i128_type, asm, integer_atomics, stmt_expr_attributes, core_intrinsics, - crate_in_paths)] -#![cfg_attr(test, feature(proc_macro, test, attr_literals, abi_vectorcall))] + crate_in_paths, attr_literals, rustc_attrs)] +#![cfg_attr(test, feature(proc_macro, test, abi_vectorcall))] #![cfg_attr(feature = "cargo-clippy", allow(inline_always, too_many_arguments, cast_sign_loss, cast_lossless, cast_possible_wrap, diff --git a/coresimd/src/x86/i586/avx.rs b/coresimd/src/x86/i586/avx.rs index 6d8200c4d3..cbff4bf8e6 100644 --- a/coresimd/src/x86/i586/avx.rs +++ b/coresimd/src/x86/i586/avx.rs @@ -96,6 +96,7 @@ pub unsafe fn _mm256_or_ps(a: __m256, b: __m256) -> __m256 { #[inline] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vshufpd, imm8 = 0x1))] +#[rustc_args_required_const(2)] pub unsafe fn _mm256_shuffle_pd(a: __m256d, b: __m256d, imm8: i32) -> __m256d { let imm8 = (imm8 & 0xFF) as u8; macro_rules! shuffle4 { @@ -138,6 +139,7 @@ pub unsafe fn _mm256_shuffle_pd(a: __m256d, b: __m256d, imm8: i32) -> __m256d { #[inline] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vshufps, imm8 = 0x0))] +#[rustc_args_required_const(2)] pub unsafe fn _mm256_shuffle_ps(a: __m256, b: __m256, imm8: i32) -> __m256 { let imm8 = (imm8 & 0xFF) as u8; macro_rules! shuffle4 { @@ -330,6 +332,7 @@ pub unsafe fn _mm256_div_pd(a: __m256d, b: __m256d) -> __m256d { #[inline] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vroundpd, b = 0x3))] +#[rustc_args_required_const(1)] pub unsafe fn _mm256_round_pd(a: __m256d, b: i32) -> __m256d { macro_rules! call { ($imm8:expr) => { roundpd256(a, $imm8) } @@ -369,6 +372,7 @@ pub unsafe fn _mm256_floor_pd(a: __m256d) -> __m256d { #[inline] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vroundps, b = 0x00))] +#[rustc_args_required_const(1)] pub unsafe fn _mm256_round_ps(a: __m256, b: i32) -> __m256 { macro_rules! call { ($imm8:expr) => { @@ -419,6 +423,7 @@ pub unsafe fn _mm256_sqrt_pd(a: __m256d) -> __m256d { #[inline] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vblendpd, imm8 = 9))] +#[rustc_args_required_const(2)] pub unsafe fn _mm256_blend_pd(a: __m256d, b: __m256d, imm8: i32) -> __m256d { let imm8 = (imm8 & 0xFF) as u8; macro_rules! blend4 { @@ -461,6 +466,7 @@ pub unsafe fn _mm256_blend_pd(a: __m256d, b: __m256d, imm8: i32) -> __m256d { #[inline] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vblendps, imm8 = 9))] +#[rustc_args_required_const(2)] pub unsafe fn _mm256_blend_ps(a: __m256, b: __m256, imm8: i32) -> __m256 { let imm8 = (imm8 & 0xFF) as u8; macro_rules! blend4 { @@ -531,6 +537,7 @@ pub unsafe fn _mm256_blendv_ps(a: __m256, b: __m256, c: __m256) -> __m256 { #[inline] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vdpps, imm8 = 0x0))] +#[rustc_args_required_const(2)] pub unsafe fn _mm256_dp_ps(a: __m256, b: __m256, imm8: i32) -> __m256 { macro_rules! call { ($imm8:expr) => { vdpps(a, b, $imm8) } @@ -678,6 +685,7 @@ pub const _CMP_TRUE_US: i32 = 0x1f; #[inline] #[target_feature(enable = "avx,sse2")] #[cfg_attr(test, assert_instr(vcmpeqpd, imm8 = 0))] // TODO Validate vcmppd +#[rustc_args_required_const(2)] pub unsafe fn _mm_cmp_pd(a: __m128d, b: __m128d, imm8: i32) -> __m128d { macro_rules! call { ($imm8:expr) => { vcmppd(a, b, $imm8) } @@ -691,6 +699,7 @@ pub unsafe fn _mm_cmp_pd(a: __m128d, b: __m128d, imm8: i32) -> __m128d { #[inline] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vcmpeqpd, imm8 = 0))] // TODO Validate vcmppd +#[rustc_args_required_const(2)] pub unsafe fn _mm256_cmp_pd(a: __m256d, b: __m256d, imm8: i32) -> __m256d { macro_rules! call { ($imm8:expr) => { vcmppd256(a, b, $imm8) } @@ -704,6 +713,7 @@ pub unsafe fn _mm256_cmp_pd(a: __m256d, b: __m256d, imm8: i32) -> __m256d { #[inline] #[target_feature(enable = "avx,sse")] #[cfg_attr(test, assert_instr(vcmpeqps, imm8 = 0))] // TODO Validate vcmpps +#[rustc_args_required_const(2)] pub unsafe fn _mm_cmp_ps(a: __m128, b: __m128, imm8: i32) -> __m128 { macro_rules! call { ($imm8:expr) => { vcmpps(a, b, $imm8) } @@ -717,6 +727,7 @@ pub unsafe fn _mm_cmp_ps(a: __m128, b: __m128, imm8: i32) -> __m128 { #[inline] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vcmpeqps, imm8 = 0))] // TODO Validate vcmpps +#[rustc_args_required_const(2)] pub unsafe fn _mm256_cmp_ps(a: __m256, b: __m256, imm8: i32) -> __m256 { macro_rules! call { ($imm8:expr) => { vcmpps256(a, b, $imm8) } @@ -732,6 +743,7 @@ pub unsafe fn _mm256_cmp_ps(a: __m256, b: __m256, imm8: i32) -> __m256 { #[inline] #[target_feature(enable = "avx,sse2")] #[cfg_attr(test, assert_instr(vcmpeqsd, imm8 = 0))] // TODO Validate vcmpsd +#[rustc_args_required_const(2)] pub unsafe fn _mm_cmp_sd(a: __m128d, b: __m128d, imm8: i32) -> __m128d { macro_rules! call { ($imm8:expr) => { vcmpsd(a, b, $imm8) } @@ -747,6 +759,7 @@ pub unsafe fn _mm_cmp_sd(a: __m128d, b: __m128d, imm8: i32) -> __m128d { #[inline] #[target_feature(enable = "avx,sse")] #[cfg_attr(test, assert_instr(vcmpeqss, imm8 = 0))] // TODO Validate vcmpss +#[rustc_args_required_const(2)] pub unsafe fn _mm_cmp_ss(a: __m128, b: __m128, imm8: i32) -> __m128 { macro_rules! call { ($imm8:expr) => { vcmpss(a, b, $imm8) } @@ -830,7 +843,8 @@ pub unsafe fn _mm256_cvttps_epi32(a: __m256) -> __m256i { /// floating-point elements) from `a`, selected with `imm8`. #[inline] #[target_feature(enable = "avx")] -#[cfg_attr(test, assert_instr(vextractf128))] +#[cfg_attr(test, assert_instr(vextractf128, imm8 = 1))] +#[rustc_args_required_const(1)] pub unsafe fn _mm256_extractf128_ps(a: __m256, imm8: i32) -> __m128 { match imm8 & 1 { 0 => simd_shuffle4(a, _mm256_undefined_ps(), [0, 1, 2, 3]), @@ -842,7 +856,8 @@ pub unsafe fn _mm256_extractf128_ps(a: __m256, imm8: i32) -> __m128 { /// floating-point elements) from `a`, selected with `imm8`. #[inline] #[target_feature(enable = "avx")] -#[cfg_attr(test, assert_instr(vextractf128))] +#[cfg_attr(test, assert_instr(vextractf128, imm8 = 1))] +#[rustc_args_required_const(1)] pub unsafe fn _mm256_extractf128_pd(a: __m256d, imm8: i32) -> __m128d { match imm8 & 1 { 0 => simd_shuffle2(a, _mm256_undefined_pd(), [0, 1]), @@ -853,7 +868,8 @@ pub unsafe fn _mm256_extractf128_pd(a: __m256d, imm8: i32) -> __m128d { /// Extract 128 bits (composed of integer data) from `a`, selected with `imm8`. #[inline] #[target_feature(enable = "avx")] -#[cfg_attr(test, assert_instr(vextractf128))] +#[cfg_attr(test, assert_instr(vextractf128, imm8 = 1))] +#[rustc_args_required_const(1)] pub unsafe fn _mm256_extractf128_si256(a: __m256i, imm8: i32) -> __m128i { let b = _mm256_undefined_si256().as_i64x4(); let dst: i64x2 = match imm8 & 1 { @@ -903,6 +919,7 @@ pub unsafe fn _mm_permutevar_ps(a: __m128, b: __m128i) -> __m128 { #[inline] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vpermilps, imm8 = 9))] +#[rustc_args_required_const(1)] pub unsafe fn _mm256_permute_ps(a: __m256, imm8: i32) -> __m256 { let imm8 = (imm8 & 0xFF) as u8; macro_rules! shuffle4 { @@ -955,6 +972,7 @@ pub unsafe fn _mm256_permute_ps(a: __m256, imm8: i32) -> __m256 { #[inline] #[target_feature(enable = "avx,sse")] #[cfg_attr(test, assert_instr(vpermilps, imm8 = 9))] +#[rustc_args_required_const(1)] pub unsafe fn _mm_permute_ps(a: __m128, imm8: i32) -> __m128 { let imm8 = (imm8 & 0xFF) as u8; macro_rules! shuffle4 { @@ -1025,6 +1043,7 @@ pub unsafe fn _mm_permutevar_pd(a: __m128d, b: __m128i) -> __m128d { #[inline] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vpermilpd, imm8 = 0x1))] +#[rustc_args_required_const(1)] pub unsafe fn _mm256_permute_pd(a: __m256d, imm8: i32) -> __m256d { let imm8 = (imm8 & 0xFF) as u8; macro_rules! shuffle4 { @@ -1067,6 +1086,7 @@ pub unsafe fn _mm256_permute_pd(a: __m256d, imm8: i32) -> __m256d { #[inline] #[target_feature(enable = "avx,sse2")] #[cfg_attr(test, assert_instr(vpermilpd, imm8 = 0x1))] +#[rustc_args_required_const(1)] pub unsafe fn _mm_permute_pd(a: __m128d, imm8: i32) -> __m128d { let imm8 = (imm8 & 0xFF) as u8; macro_rules! shuffle2 { @@ -1093,6 +1113,7 @@ pub unsafe fn _mm_permute_pd(a: __m128d, imm8: i32) -> __m128d { #[inline] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vperm2f128, imm8 = 0x5))] +#[rustc_args_required_const(2)] pub unsafe fn _mm256_permute2f128_ps( a: __m256, b: __m256, imm8: i32 ) -> __m256 { @@ -1107,6 +1128,7 @@ pub unsafe fn _mm256_permute2f128_ps( #[inline] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vperm2f128, imm8 = 0x31))] +#[rustc_args_required_const(2)] pub unsafe fn _mm256_permute2f128_pd( a: __m256d, b: __m256d, imm8: i32 ) -> __m256d { @@ -1121,6 +1143,7 @@ pub unsafe fn _mm256_permute2f128_pd( #[inline] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vperm2f128, imm8 = 0x31))] +#[rustc_args_required_const(2)] pub unsafe fn _mm256_permute2f128_si256( a: __m256i, b: __m256i, imm8: i32 ) -> __m256i { @@ -1184,6 +1207,7 @@ pub unsafe fn _mm256_broadcast_pd(a: &__m128d) -> __m256d { #[inline] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vinsertf128, imm8 = 1))] +#[rustc_args_required_const(2)] pub unsafe fn _mm256_insertf128_ps(a: __m256, b: __m128, imm8: i32) -> __m256 { let b = _mm256_castps128_ps256(b); match imm8 & 1 { @@ -1198,6 +1222,7 @@ pub unsafe fn _mm256_insertf128_ps(a: __m256, b: __m128, imm8: i32) -> __m256 { #[inline] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vinsertf128, imm8 = 1))] +#[rustc_args_required_const(2)] pub unsafe fn _mm256_insertf128_pd( a: __m256d, b: __m128d, imm8: i32 ) -> __m256d { @@ -1212,6 +1237,7 @@ pub unsafe fn _mm256_insertf128_pd( #[inline] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vinsertf128, imm8 = 1))] +#[rustc_args_required_const(2)] pub unsafe fn _mm256_insertf128_si256( a: __m256i, b: __m128i, imm8: i32 ) -> __m256i { @@ -1228,6 +1254,7 @@ pub unsafe fn _mm256_insertf128_si256( #[inline] #[target_feature(enable = "avx")] // This intrinsic has no corresponding instruction. +#[rustc_args_required_const(2)] pub unsafe fn _mm256_insert_epi8(a: __m256i, i: i8, index: i32) -> __m256i { mem::transmute(simd_insert(a.as_i8x32(), (index as u32) & 31, i)) } @@ -1237,6 +1264,7 @@ pub unsafe fn _mm256_insert_epi8(a: __m256i, i: i8, index: i32) -> __m256i { #[inline] #[target_feature(enable = "avx")] // This intrinsic has no corresponding instruction. +#[rustc_args_required_const(2)] pub unsafe fn _mm256_insert_epi16(a: __m256i, i: i16, index: i32) -> __m256i { mem::transmute(simd_insert(a.as_i16x16(), (index as u32) & 15, i)) } @@ -1246,6 +1274,7 @@ pub unsafe fn _mm256_insert_epi16(a: __m256i, i: i16, index: i32) -> __m256i { #[inline] #[target_feature(enable = "avx")] // This intrinsic has no corresponding instruction. +#[rustc_args_required_const(2)] pub unsafe fn _mm256_insert_epi32(a: __m256i, i: i32, index: i32) -> __m256i { mem::transmute(simd_insert(a.as_i32x8(), (index as u32) & 7, i)) } diff --git a/coresimd/src/x86/i586/avx2.rs b/coresimd/src/x86/i586/avx2.rs index ac5d78f171..d47dcbc804 100644 --- a/coresimd/src/x86/i586/avx2.rs +++ b/coresimd/src/x86/i586/avx2.rs @@ -123,6 +123,7 @@ pub unsafe fn _mm256_adds_epu16(a: __m256i, b: __m256i) -> __m256i { #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpalignr, n = 15))] +#[rustc_args_required_const(2)] pub unsafe fn _mm256_alignr_epi8(a: __m256i, b: __m256i, n: i32) -> __m256i { let n = n as u32; // If palignr is shifting the pair of vectors more than the size of two @@ -227,6 +228,7 @@ pub unsafe fn _mm256_avg_epu8(a: __m256i, b: __m256i) -> __m256i { #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vblendps, imm8 = 9))] +#[rustc_args_required_const(2)] pub unsafe fn _mm_blend_epi32(a: __m128i, b: __m128i, imm8: i32) -> __m128i { let imm8 = (imm8 & 0xFF) as u8; let a = a.as_i32x4(); @@ -259,6 +261,7 @@ pub unsafe fn _mm_blend_epi32(a: __m128i, b: __m128i, imm8: i32) -> __m128i { #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vblendps, imm8 = 9))] +#[rustc_args_required_const(2)] pub unsafe fn _mm256_blend_epi32( a: __m256i, b: __m256i, imm8: i32 ) -> __m256i { @@ -313,6 +316,7 @@ pub unsafe fn _mm256_blend_epi32( #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpblendw, imm8 = 9))] +#[rustc_args_required_const(2)] pub unsafe fn _mm256_blend_epi16( a: __m256i, b: __m256i, imm8: i32 ) -> __m256i { @@ -697,6 +701,7 @@ pub unsafe fn _mm256_cvtepu8_epi64(a: __m128i) -> __m256i { #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vextractf128, imm8 = 1))] +#[rustc_args_required_const(1)] pub unsafe fn _mm256_extracti128_si256(a: __m256i, imm8: i32) -> __m128i { let a = a.as_i64x4(); let b = _mm256_undefined_si256().as_i64x4(); @@ -763,6 +768,7 @@ pub unsafe fn _mm256_hsubs_epi16(a: __m256i, b: __m256i) -> __m256i { #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpgatherdd, scale = 1))] +#[rustc_args_required_const(2)] pub unsafe fn _mm_i32gather_epi32( slice: *const i32, offsets: __m128i, scale: i32 ) -> __m128i { @@ -784,6 +790,7 @@ pub unsafe fn _mm_i32gather_epi32( #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpgatherdd, scale = 1))] +#[rustc_args_required_const(4)] pub unsafe fn _mm_mask_i32gather_epi32( src: __m128i, slice: *const i32, offsets: __m128i, mask: __m128i, scale: i32, @@ -805,6 +812,7 @@ pub unsafe fn _mm_mask_i32gather_epi32( #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpgatherdd, scale = 1))] +#[rustc_args_required_const(2)] pub unsafe fn _mm256_i32gather_epi32( slice: *const i32, offsets: __m256i, scale: i32 ) -> __m256i { @@ -826,6 +834,7 @@ pub unsafe fn _mm256_i32gather_epi32( #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpgatherdd, scale = 1))] +#[rustc_args_required_const(4)] pub unsafe fn _mm256_mask_i32gather_epi32( src: __m256i, slice: *const i32, offsets: __m256i, mask: __m256i, scale: i32, @@ -847,6 +856,7 @@ pub unsafe fn _mm256_mask_i32gather_epi32( #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vgatherdps, scale = 1))] +#[rustc_args_required_const(2)] pub unsafe fn _mm_i32gather_ps( slice: *const f32, offsets: __m128i, scale: i32 ) -> __m128 { @@ -867,6 +877,7 @@ pub unsafe fn _mm_i32gather_ps( #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vgatherdps, scale = 1))] +#[rustc_args_required_const(4)] pub unsafe fn _mm_mask_i32gather_ps( src: __m128, slice: *const f32, offsets: __m128i, mask: __m128, scale: i32 ) -> __m128 { @@ -884,6 +895,7 @@ pub unsafe fn _mm_mask_i32gather_ps( #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vgatherdps, scale = 1))] +#[rustc_args_required_const(2)] pub unsafe fn _mm256_i32gather_ps( slice: *const f32, offsets: __m256i, scale: i32 ) -> __m256 { @@ -904,6 +916,7 @@ pub unsafe fn _mm256_i32gather_ps( #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vgatherdps, scale = 1))] +#[rustc_args_required_const(4)] pub unsafe fn _mm256_mask_i32gather_ps( src: __m256, slice: *const f32, offsets: __m256i, mask: __m256, scale: i32 ) -> __m256 { @@ -921,6 +934,7 @@ pub unsafe fn _mm256_mask_i32gather_ps( #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpgatherdq, scale = 1))] +#[rustc_args_required_const(2)] pub unsafe fn _mm_i32gather_epi64( slice: *const i64, offsets: __m128i, scale: i32 ) -> __m128i { @@ -942,6 +956,7 @@ pub unsafe fn _mm_i32gather_epi64( #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpgatherdq, scale = 1))] +#[rustc_args_required_const(4)] pub unsafe fn _mm_mask_i32gather_epi64( src: __m128i, slice: *const i64, offsets: __m128i, mask: __m128i, scale: i32, @@ -963,6 +978,7 @@ pub unsafe fn _mm_mask_i32gather_epi64( #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpgatherdq, scale = 1))] +#[rustc_args_required_const(2)] pub unsafe fn _mm256_i32gather_epi64( slice: *const i64, offsets: __m128i, scale: i32 ) -> __m256i { @@ -984,6 +1000,7 @@ pub unsafe fn _mm256_i32gather_epi64( #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpgatherdq, scale = 1))] +#[rustc_args_required_const(4)] pub unsafe fn _mm256_mask_i32gather_epi64( src: __m256i, slice: *const i64, offsets: __m128i, mask: __m256i, scale: i32, @@ -1005,6 +1022,7 @@ pub unsafe fn _mm256_mask_i32gather_epi64( #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vgatherdpd, scale = 1))] +#[rustc_args_required_const(2)] pub unsafe fn _mm_i32gather_pd( slice: *const f64, offsets: __m128i, scale: i32 ) -> __m128d { @@ -1025,6 +1043,7 @@ pub unsafe fn _mm_i32gather_pd( #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vgatherdpd, scale = 1))] +#[rustc_args_required_const(4)] pub unsafe fn _mm_mask_i32gather_pd( src: __m128d, slice: *const f64, offsets: __m128i, mask: __m128d, scale: i32, @@ -1043,6 +1062,7 @@ pub unsafe fn _mm_mask_i32gather_pd( #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vgatherdpd, scale = 1))] +#[rustc_args_required_const(2)] pub unsafe fn _mm256_i32gather_pd( slice: *const f64, offsets: __m128i, scale: i32 ) -> __m256d { @@ -1063,6 +1083,7 @@ pub unsafe fn _mm256_i32gather_pd( #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vgatherdpd, scale = 1))] +#[rustc_args_required_const(4)] pub unsafe fn _mm256_mask_i32gather_pd( src: __m256d, slice: *const f64, offsets: __m128i, mask: __m256d, scale: i32, @@ -1081,6 +1102,7 @@ pub unsafe fn _mm256_mask_i32gather_pd( #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpgatherqd, scale = 1))] +#[rustc_args_required_const(2)] pub unsafe fn _mm_i64gather_epi32( slice: *const i32, offsets: __m128i, scale: i32 ) -> __m128i { @@ -1102,6 +1124,7 @@ pub unsafe fn _mm_i64gather_epi32( #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpgatherqd, scale = 1))] +#[rustc_args_required_const(4)] pub unsafe fn _mm_mask_i64gather_epi32( src: __m128i, slice: *const i32, offsets: __m128i, mask: __m128i, scale: i32, @@ -1123,6 +1146,7 @@ pub unsafe fn _mm_mask_i64gather_epi32( #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpgatherqd, scale = 1))] +#[rustc_args_required_const(2)] pub unsafe fn _mm256_i64gather_epi32( slice: *const i32, offsets: __m256i, scale: i32 ) -> __m128i { @@ -1144,6 +1168,7 @@ pub unsafe fn _mm256_i64gather_epi32( #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpgatherqd, scale = 1))] +#[rustc_args_required_const(4)] pub unsafe fn _mm256_mask_i64gather_epi32( src: __m128i, slice: *const i32, offsets: __m256i, mask: __m128i, scale: i32, @@ -1165,6 +1190,7 @@ pub unsafe fn _mm256_mask_i64gather_epi32( #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vgatherqps, scale = 1))] +#[rustc_args_required_const(2)] pub unsafe fn _mm_i64gather_ps( slice: *const f32, offsets: __m128i, scale: i32 ) -> __m128 { @@ -1185,6 +1211,7 @@ pub unsafe fn _mm_i64gather_ps( #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vgatherqps, scale = 1))] +#[rustc_args_required_const(4)] pub unsafe fn _mm_mask_i64gather_ps( src: __m128, slice: *const f32, offsets: __m128i, mask: __m128, scale: i32 ) -> __m128 { @@ -1202,6 +1229,7 @@ pub unsafe fn _mm_mask_i64gather_ps( #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vgatherqps, scale = 1))] +#[rustc_args_required_const(2)] pub unsafe fn _mm256_i64gather_ps( slice: *const f32, offsets: __m256i, scale: i32 ) -> __m128 { @@ -1222,6 +1250,7 @@ pub unsafe fn _mm256_i64gather_ps( #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vgatherqps, scale = 1))] +#[rustc_args_required_const(4)] pub unsafe fn _mm256_mask_i64gather_ps( src: __m128, slice: *const f32, offsets: __m256i, mask: __m128, scale: i32 ) -> __m128 { @@ -1239,6 +1268,7 @@ pub unsafe fn _mm256_mask_i64gather_ps( #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpgatherqq, scale = 1))] +#[rustc_args_required_const(2)] pub unsafe fn _mm_i64gather_epi64( slice: *const i64, offsets: __m128i, scale: i32 ) -> __m128i { @@ -1260,6 +1290,7 @@ pub unsafe fn _mm_i64gather_epi64( #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpgatherqq, scale = 1))] +#[rustc_args_required_const(4)] pub unsafe fn _mm_mask_i64gather_epi64( src: __m128i, slice: *const i64, offsets: __m128i, mask: __m128i, scale: i32, @@ -1281,6 +1312,7 @@ pub unsafe fn _mm_mask_i64gather_epi64( #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpgatherqq, scale = 1))] +#[rustc_args_required_const(2)] pub unsafe fn _mm256_i64gather_epi64( slice: *const i64, offsets: __m256i, scale: i32 ) -> __m256i { @@ -1302,6 +1334,7 @@ pub unsafe fn _mm256_i64gather_epi64( #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpgatherqq, scale = 1))] +#[rustc_args_required_const(4)] pub unsafe fn _mm256_mask_i64gather_epi64( src: __m256i, slice: *const i64, offsets: __m256i, mask: __m256i, scale: i32, @@ -1323,6 +1356,7 @@ pub unsafe fn _mm256_mask_i64gather_epi64( #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vgatherqpd, scale = 1))] +#[rustc_args_required_const(2)] pub unsafe fn _mm_i64gather_pd( slice: *const f64, offsets: __m128i, scale: i32 ) -> __m128d { @@ -1343,6 +1377,7 @@ pub unsafe fn _mm_i64gather_pd( #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vgatherqpd, scale = 1))] +#[rustc_args_required_const(4)] pub unsafe fn _mm_mask_i64gather_pd( src: __m128d, slice: *const f64, offsets: __m128i, mask: __m128d, scale: i32, @@ -1361,6 +1396,7 @@ pub unsafe fn _mm_mask_i64gather_pd( #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vgatherqpd, scale = 1))] +#[rustc_args_required_const(2)] pub unsafe fn _mm256_i64gather_pd( slice: *const f64, offsets: __m256i, scale: i32 ) -> __m256d { @@ -1381,6 +1417,7 @@ pub unsafe fn _mm256_i64gather_pd( #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vgatherqpd, scale = 1))] +#[rustc_args_required_const(4)] pub unsafe fn _mm256_mask_i64gather_pd( src: __m256d, slice: *const f64, offsets: __m256i, mask: __m256d, scale: i32, @@ -1398,6 +1435,7 @@ pub unsafe fn _mm256_mask_i64gather_pd( #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vinsertf128, imm8 = 1))] +#[rustc_args_required_const(2)] pub unsafe fn _mm256_inserti128_si256( a: __m256i, b: __m128i, imm8: i32 ) -> __m256i { @@ -1654,6 +1692,7 @@ pub unsafe fn _mm256_movemask_epi8(a: __m256i) -> i32 { #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vmpsadbw, imm8 = 0))] +#[rustc_args_required_const(2)] pub unsafe fn _mm256_mpsadbw_epu8( a: __m256i, b: __m256i, imm8: i32 ) -> __m256i { @@ -1799,6 +1838,7 @@ pub unsafe fn _mm256_permutevar8x32_epi32(a: __m256i, b: __m256i) -> __m256i { #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpermpd, imm8 = 9))] +#[rustc_args_required_const(1)] pub unsafe fn _mm256_permute4x64_epi64(a: __m256i, imm8: i32) -> __m256i { let imm8 = (imm8 & 0xFF) as u8; let zero = _mm256_setzero_si256().as_i64x4(); @@ -1851,6 +1891,7 @@ pub unsafe fn _mm256_permute4x64_epi64(a: __m256i, imm8: i32) -> __m256i { #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vperm2f128, imm8 = 9))] +#[rustc_args_required_const(2)] pub unsafe fn _mm256_permute2x128_si256( a: __m256i, b: __m256i, imm8: i32 ) -> __m256i { @@ -1869,6 +1910,7 @@ pub unsafe fn _mm256_permute2x128_si256( #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpermpd, imm8 = 1))] +#[rustc_args_required_const(1)] pub unsafe fn _mm256_permute4x64_pd(a: __m256d, imm8: i32) -> __m256d { use x86::i586::avx::_mm256_undefined_pd; let imm8 = (imm8 & 0xFF) as u8; @@ -1989,11 +2031,8 @@ pub unsafe fn _mm256_shuffle_epi8(a: __m256i, b: __m256i) -> __m256i { /// /// let a = _mm256_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7); /// -/// let shuffle1 = 0b00_11_10_01; -/// let shuffle2 = 0b01_00_10_11; -/// -/// let c1 = _mm256_shuffle_epi32(a, shuffle1); -/// let c2 = _mm256_shuffle_epi32(a, shuffle2); +/// let c1 = _mm256_shuffle_epi32(a, 0b00_11_10_01); +/// let c2 = _mm256_shuffle_epi32(a, 0b01_00_10_11); /// /// let expected1 = _mm256_setr_epi32(1, 2, 3, 0, 5, 6, 7, 4); /// let expected2 = _mm256_setr_epi32(3, 2, 0, 1, 7, 6, 4, 5); @@ -2008,6 +2047,7 @@ pub unsafe fn _mm256_shuffle_epi8(a: __m256i, b: __m256i) -> __m256i { #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpermilps, imm8 = 9))] +#[rustc_args_required_const(1)] pub unsafe fn _mm256_shuffle_epi32(a: __m256i, imm8: i32) -> __m256i { // simd_shuffleX requires that its selector parameter be made up of // constant values, but we can't enforce that here. In spirit, we need @@ -2069,6 +2109,7 @@ pub unsafe fn _mm256_shuffle_epi32(a: __m256i, imm8: i32) -> __m256i { #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpshufhw, imm8 = 9))] +#[rustc_args_required_const(1)] pub unsafe fn _mm256_shufflehi_epi16(a: __m256i, imm8: i32) -> __m256i { let imm8 = (imm8 & 0xFF) as u8; let a = a.as_i16x16(); @@ -2126,6 +2167,7 @@ pub unsafe fn _mm256_shufflehi_epi16(a: __m256i, imm8: i32) -> __m256i { #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpshuflw, imm8 = 9))] +#[rustc_args_required_const(1)] pub unsafe fn _mm256_shufflelo_epi16(a: __m256i, imm8: i32) -> __m256i { let imm8 = (imm8 & 0xFF) as u8; let a = a.as_i16x16(); @@ -2265,6 +2307,7 @@ pub unsafe fn _mm256_slli_epi64(a: __m256i, imm8: i32) -> __m256i { #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpslldq, imm8 = 3))] +#[rustc_args_required_const(1)] pub unsafe fn _mm256_slli_si256(a: __m256i, imm8: i32) -> __m256i { let a = a.as_i64x4(); macro_rules! call { @@ -2279,8 +2322,15 @@ pub unsafe fn _mm256_slli_si256(a: __m256i, imm8: i32) -> __m256i { #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpslldq, imm8 = 3))] +#[rustc_args_required_const(1)] pub unsafe fn _mm256_bslli_epi128(a: __m256i, imm8: i32) -> __m256i { - _mm256_slli_si256(a, imm8) + let a = a.as_i64x4(); + macro_rules! call { + ($imm8:expr) => { + vpslldq(a, $imm8) + } + } + mem::transmute(constify_imm8!(imm8 * 8, call)) } /// Shift packed 32-bit integers in `a` left by the amount @@ -2381,6 +2431,7 @@ pub unsafe fn _mm256_srav_epi32(a: __m256i, count: __m256i) -> __m256i { #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpsrldq, imm8 = 3))] +#[rustc_args_required_const(1)] pub unsafe fn _mm256_srli_si256(a: __m256i, imm8: i32) -> __m256i { let a = a.as_i64x4(); macro_rules! call { @@ -2395,8 +2446,15 @@ pub unsafe fn _mm256_srli_si256(a: __m256i, imm8: i32) -> __m256i { #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpsrldq, imm8 = 3))] +#[rustc_args_required_const(1)] pub unsafe fn _mm256_bsrli_epi128(a: __m256i, imm8: i32) -> __m256i { - _mm256_srli_si256(a, imm8) + let a = a.as_i64x4(); + macro_rules! call { + ($imm8:expr) => { + vpsrldq(a, $imm8) + } + } + mem::transmute(constify_imm8!(imm8 * 8, call)) } /// Shift packed 16-bit integers in `a` right by `count` while shifting in @@ -2897,6 +2955,7 @@ pub unsafe fn _mm256_xor_si256(a: __m256i, b: __m256i) -> __m256i { #[inline] #[target_feature(enable = "avx2")] // This intrinsic has no corresponding instruction. +#[rustc_args_required_const(1)] pub unsafe fn _mm256_extract_epi8(a: __m256i, imm8: i32) -> i8 { let imm8 = (imm8 & 31) as u32; simd_extract(a.as_i8x32(), imm8) @@ -2909,6 +2968,7 @@ pub unsafe fn _mm256_extract_epi8(a: __m256i, imm8: i32) -> i8 { #[inline] #[target_feature(enable = "avx2")] // This intrinsic has no corresponding instruction. +#[rustc_args_required_const(1)] pub unsafe fn _mm256_extract_epi16(a: __m256i, imm8: i32) -> i16 { let imm8 = (imm8 & 15) as u32; simd_extract(a.as_i16x16(), imm8) @@ -2918,6 +2978,7 @@ pub unsafe fn _mm256_extract_epi16(a: __m256i, imm8: i32) -> i16 { #[inline] #[target_feature(enable = "avx2")] // This intrinsic has no corresponding instruction. +#[rustc_args_required_const(1)] pub unsafe fn _mm256_extract_epi32(a: __m256i, imm8: i32) -> i32 { let imm8 = (imm8 & 7) as u32; simd_extract(a.as_i32x8(), imm8) diff --git a/coresimd/src/x86/i586/sse.rs b/coresimd/src/x86/i586/sse.rs index f103333492..4aca580d1c 100644 --- a/coresimd/src/x86/i586/sse.rs +++ b/coresimd/src/x86/i586/sse.rs @@ -764,6 +764,7 @@ pub unsafe fn _mm_setzero_ps() -> __m128 { #[inline] #[target_feature(enable = "sse")] #[cfg_attr(test, assert_instr(shufps, mask = 3))] +#[rustc_args_required_const(2)] pub unsafe fn _mm_shuffle_ps(a: __m128, b: __m128, mask: u32) -> __m128 { let mask = (mask & 0xFF) as u8; @@ -1548,6 +1549,7 @@ pub const _MM_HINT_NTA: i32 = 0; #[cfg_attr(test, assert_instr(prefetcht1, strategy = _MM_HINT_T1))] #[cfg_attr(test, assert_instr(prefetcht2, strategy = _MM_HINT_T2))] #[cfg_attr(test, assert_instr(prefetchnta, strategy = _MM_HINT_NTA))] +#[rustc_args_required_const(1)] pub unsafe fn _mm_prefetch(p: *const i8, strategy: i32) { // The `strategy` must be a compile-time constant, so we use a short form // of `constify_imm8!` for now. @@ -2739,8 +2741,7 @@ mod tests { unsafe fn test_mm_shuffle_ps() { let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0); let b = _mm_setr_ps(5.0, 6.0, 7.0, 8.0); - let mask = 0b00_01_01_11; - let r = _mm_shuffle_ps(a, b, mask); + let r = _mm_shuffle_ps(a, b, 0b00_01_01_11); assert_eq_m128(r, _mm_setr_ps(4.0, 2.0, 6.0, 5.0)); } diff --git a/coresimd/src/x86/i586/sse2.rs b/coresimd/src/x86/i586/sse2.rs index 6011d1a372..b8763305ff 100644 --- a/coresimd/src/x86/i586/sse2.rs +++ b/coresimd/src/x86/i586/sse2.rs @@ -314,7 +314,14 @@ pub unsafe fn _mm_subs_epu16(a: __m128i, b: __m128i) -> __m128i { #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(pslldq, imm8 = 1))] +#[rustc_args_required_const(1)] pub unsafe fn _mm_slli_si128(a: __m128i, imm8: i32) -> __m128i { + _mm_slli_si128_impl(a, imm8) +} + +#[inline] +#[target_feature(enable = "sse2")] +unsafe fn _mm_slli_si128_impl(a: __m128i, imm8: i32) -> __m128i { let (zero, imm8) = (_mm_set1_epi8(0).as_i8x16(), imm8 as u32); let a = a.as_i8x16(); macro_rules! shuffle { @@ -357,22 +364,25 @@ pub unsafe fn _mm_slli_si128(a: __m128i, imm8: i32) -> __m128i { #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(pslldq, imm8 = 1))] +#[rustc_args_required_const(1)] pub unsafe fn _mm_bslli_si128(a: __m128i, imm8: i32) -> __m128i { - _mm_slli_si128(a, imm8) + _mm_slli_si128_impl(a, imm8) } /// Shift `a` right by `imm8` bytes while shifting in zeros. #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(psrldq, imm8 = 1))] +#[rustc_args_required_const(1)] pub unsafe fn _mm_bsrli_si128(a: __m128i, imm8: i32) -> __m128i { - _mm_srli_si128(a, imm8) + _mm_srli_si128_impl(a, imm8) } /// Shift packed 16-bit integers in `a` left by `imm8` while shifting in zeros. #[inline] #[target_feature(enable = "sse2")] -#[cfg_attr(test, assert_instr(psllw))] +#[cfg_attr(test, assert_instr(psllw, imm8 = 7))] +#[rustc_args_required_const(1)] pub unsafe fn _mm_slli_epi16(a: __m128i, imm8: i32) -> __m128i { mem::transmute(pslliw(a.as_i16x8(), imm8)) } @@ -389,7 +399,8 @@ pub unsafe fn _mm_sll_epi16(a: __m128i, count: __m128i) -> __m128i { /// Shift packed 32-bit integers in `a` left by `imm8` while shifting in zeros. #[inline] #[target_feature(enable = "sse2")] -#[cfg_attr(test, assert_instr(pslld))] +#[cfg_attr(test, assert_instr(pslld, imm8 = 7))] +#[rustc_args_required_const(1)] pub unsafe fn _mm_slli_epi32(a: __m128i, imm8: i32) -> __m128i { mem::transmute(psllid(a.as_i32x4(), imm8)) } @@ -406,7 +417,8 @@ pub unsafe fn _mm_sll_epi32(a: __m128i, count: __m128i) -> __m128i { /// Shift packed 64-bit integers in `a` left by `imm8` while shifting in zeros. #[inline] #[target_feature(enable = "sse2")] -#[cfg_attr(test, assert_instr(psllq))] +#[cfg_attr(test, assert_instr(psllq, imm8 = 7))] +#[rustc_args_required_const(1)] pub unsafe fn _mm_slli_epi64(a: __m128i, imm8: i32) -> __m128i { mem::transmute(pslliq(a.as_i64x2(), imm8)) } @@ -424,7 +436,8 @@ pub unsafe fn _mm_sll_epi64(a: __m128i, count: __m128i) -> __m128i { /// bits. #[inline] #[target_feature(enable = "sse2")] -#[cfg_attr(test, assert_instr(psraw))] +#[cfg_attr(test, assert_instr(psraw, imm8 = 1))] +#[rustc_args_required_const(1)] pub unsafe fn _mm_srai_epi16(a: __m128i, imm8: i32) -> __m128i { mem::transmute(psraiw(a.as_i16x8(), imm8)) } @@ -442,7 +455,8 @@ pub unsafe fn _mm_sra_epi16(a: __m128i, count: __m128i) -> __m128i { /// bits. #[inline] #[target_feature(enable = "sse2")] -#[cfg_attr(test, assert_instr(psrad))] +#[cfg_attr(test, assert_instr(psrad, imm8 = 1))] +#[rustc_args_required_const(1)] pub unsafe fn _mm_srai_epi32(a: __m128i, imm8: i32) -> __m128i { mem::transmute(psraid(a.as_i32x4(), imm8)) } @@ -460,7 +474,14 @@ pub unsafe fn _mm_sra_epi32(a: __m128i, count: __m128i) -> __m128i { #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(psrldq, imm8 = 1))] +#[rustc_args_required_const(1)] pub unsafe fn _mm_srli_si128(a: __m128i, imm8: i32) -> __m128i { + _mm_srli_si128_impl(a, imm8) +} + +#[inline] +#[target_feature(enable = "sse2")] +unsafe fn _mm_srli_si128_impl(a: __m128i, imm8: i32) -> __m128i { let (zero, imm8) = (_mm_set1_epi8(0).as_i8x16(), imm8 as u32); let a = a.as_i8x16(); macro_rules! shuffle { @@ -503,7 +524,8 @@ pub unsafe fn _mm_srli_si128(a: __m128i, imm8: i32) -> __m128i { /// zeros. #[inline] #[target_feature(enable = "sse2")] -#[cfg_attr(test, assert_instr(psrlw))] +#[cfg_attr(test, assert_instr(psrlw, imm8 = 1))] +#[rustc_args_required_const(1)] pub unsafe fn _mm_srli_epi16(a: __m128i, imm8: i32) -> __m128i { mem::transmute(psrliw(a.as_i16x8(), imm8)) } @@ -521,7 +543,8 @@ pub unsafe fn _mm_srl_epi16(a: __m128i, count: __m128i) -> __m128i { /// zeros. #[inline] #[target_feature(enable = "sse2")] -#[cfg_attr(test, assert_instr(psrld))] +#[cfg_attr(test, assert_instr(psrld, imm8 = 8))] +#[rustc_args_required_const(1)] pub unsafe fn _mm_srli_epi32(a: __m128i, imm8: i32) -> __m128i { mem::transmute(psrlid(a.as_i32x4(), imm8)) } @@ -539,7 +562,8 @@ pub unsafe fn _mm_srl_epi32(a: __m128i, count: __m128i) -> __m128i { /// zeros. #[inline] #[target_feature(enable = "sse2")] -#[cfg_attr(test, assert_instr(psrlq))] +#[cfg_attr(test, assert_instr(psrlq, imm8 = 1))] +#[rustc_args_required_const(1)] pub unsafe fn _mm_srli_epi64(a: __m128i, imm8: i32) -> __m128i { mem::transmute(psrliq(a.as_i64x2(), imm8)) } @@ -985,6 +1009,7 @@ pub unsafe fn _mm_packus_epi16(a: __m128i, b: __m128i) -> __m128i { #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(pextrw, imm8 = 9))] +#[rustc_args_required_const(1)] pub unsafe fn _mm_extract_epi16(a: __m128i, imm8: i32) -> i32 { simd_extract::<_, i16>(a.as_i16x8(), (imm8 & 7) as u32) as i32 } @@ -993,6 +1018,7 @@ pub unsafe fn _mm_extract_epi16(a: __m128i, imm8: i32) -> i32 { #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(pinsrw, imm8 = 9))] +#[rustc_args_required_const(2)] pub unsafe fn _mm_insert_epi16(a: __m128i, i: i32, imm8: i32) -> __m128i { mem::transmute(simd_insert(a.as_i16x8(), (imm8 & 7) as u32, i as i16)) } @@ -1009,6 +1035,7 @@ pub unsafe fn _mm_movemask_epi8(a: __m128i) -> i32 { #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(pshufd, imm8 = 9))] +#[rustc_args_required_const(1)] pub unsafe fn _mm_shuffle_epi32(a: __m128i, imm8: i32) -> __m128i { // simd_shuffleX requires that its selector parameter be made up of // constant values, but we can't enforce that here. In spirit, we need @@ -1072,6 +1099,7 @@ pub unsafe fn _mm_shuffle_epi32(a: __m128i, imm8: i32) -> __m128i { #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(pshufhw, imm8 = 9))] +#[rustc_args_required_const(1)] pub unsafe fn _mm_shufflehi_epi16(a: __m128i, imm8: i32) -> __m128i { // See _mm_shuffle_epi32. let imm8 = (imm8 & 0xFF) as u8; @@ -1130,6 +1158,7 @@ pub unsafe fn _mm_shufflehi_epi16(a: __m128i, imm8: i32) -> __m128i { #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(pshuflw, imm8 = 9))] +#[rustc_args_required_const(1)] pub unsafe fn _mm_shufflelo_epi16(a: __m128i, imm8: i32) -> __m128i { // See _mm_shuffle_epi32. let imm8 = (imm8 & 0xFF) as u8; @@ -2078,6 +2107,7 @@ pub unsafe fn _mm_loadu_pd(mem_addr: *const f64) -> __m128d { #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(shufpd, imm8 = 1))] +#[rustc_args_required_const(2)] pub unsafe fn _mm_shuffle_pd(a: __m128d, b: __m128d, imm8: i32) -> __m128d { match imm8 & 0b11 { 0b00 => simd_shuffle2(a, b, [0, 2]), diff --git a/coresimd/src/x86/i586/sse41.rs b/coresimd/src/x86/i586/sse41.rs index fb853a3640..0e8edd86bc 100644 --- a/coresimd/src/x86/i586/sse41.rs +++ b/coresimd/src/x86/i586/sse41.rs @@ -64,6 +64,7 @@ pub unsafe fn _mm_blendv_epi8( #[inline] #[target_feature(enable = "sse4.1")] #[cfg_attr(test, assert_instr(pblendw, imm8 = 0xF0))] +#[rustc_args_required_const(2)] pub unsafe fn _mm_blend_epi16(a: __m128i, b: __m128i, imm8: i32) -> __m128i { let a = a.as_i16x8(); let b = b.as_i16x8(); @@ -96,6 +97,7 @@ pub unsafe fn _mm_blendv_ps(a: __m128, b: __m128, mask: __m128) -> __m128 { #[inline] #[target_feature(enable = "sse4.1")] #[cfg_attr(test, assert_instr(blendpd, imm2 = 0b10))] +#[rustc_args_required_const(2)] pub unsafe fn _mm_blend_pd(a: __m128d, b: __m128d, imm2: i32) -> __m128d { macro_rules! call { ($imm2:expr) => { blendpd(a, b, $imm2) } @@ -108,6 +110,7 @@ pub unsafe fn _mm_blend_pd(a: __m128d, b: __m128d, imm2: i32) -> __m128d { #[inline] #[target_feature(enable = "sse4.1")] #[cfg_attr(test, assert_instr(blendps, imm4 = 0b0101))] +#[rustc_args_required_const(2)] pub unsafe fn _mm_blend_ps(a: __m128, b: __m128, imm4: i32) -> __m128 { macro_rules! call { ($imm4:expr) => { blendps(a, b, $imm4) } @@ -121,6 +124,7 @@ pub unsafe fn _mm_blend_ps(a: __m128, b: __m128, imm4: i32) -> __m128 { #[target_feature(enable = "sse4.1")] // TODO: Add test for Windows #[cfg_attr(test, assert_instr(extractps, imm8 = 0))] +#[rustc_args_required_const(1)] pub unsafe fn _mm_extract_ps(a: __m128, imm8: i32) -> i32 { mem::transmute(simd_extract::<_, f32>(a, imm8 as u32 & 0b11)) } @@ -132,6 +136,7 @@ pub unsafe fn _mm_extract_ps(a: __m128, imm8: i32) -> i32 { #[inline] #[target_feature(enable = "sse4.1")] #[cfg_attr(test, assert_instr(pextrb, imm8 = 0))] +#[rustc_args_required_const(1)] pub unsafe fn _mm_extract_epi8(a: __m128i, imm8: i32) -> i32 { let imm8 = (imm8 & 15) as u32; simd_extract::<_, u8>(a.as_u8x16(), imm8) as i32 @@ -142,6 +147,7 @@ pub unsafe fn _mm_extract_epi8(a: __m128i, imm8: i32) -> i32 { #[target_feature(enable = "sse4.1")] // TODO: Add test for Windows #[cfg_attr(test, assert_instr(extractps, imm8 = 1))] +#[rustc_args_required_const(1)] pub unsafe fn _mm_extract_epi32(a: __m128i, imm8: i32) -> i32 { let imm8 = (imm8 & 3) as u32; simd_extract::<_, i32>(a.as_i32x4(), imm8) @@ -172,6 +178,7 @@ pub unsafe fn _mm_extract_epi32(a: __m128i, imm8: i32) -> i32 { #[inline] #[target_feature(enable = "sse4.1")] #[cfg_attr(test, assert_instr(insertps, imm8 = 0b1010))] +#[rustc_args_required_const(2)] pub unsafe fn _mm_insert_ps(a: __m128, b: __m128, imm8: i32) -> __m128 { macro_rules! call { ($imm8:expr) => { insertps(a, b, $imm8) } @@ -184,6 +191,7 @@ pub unsafe fn _mm_insert_ps(a: __m128, b: __m128, imm8: i32) -> __m128 { #[inline] #[target_feature(enable = "sse4.1")] #[cfg_attr(test, assert_instr(pinsrb, imm8 = 0))] +#[rustc_args_required_const(2)] pub unsafe fn _mm_insert_epi8(a: __m128i, i: i32, imm8: i32) -> __m128i { mem::transmute(simd_insert(a.as_i8x16(), (imm8 & 0b1111) as u32, i as i8)) } @@ -193,6 +201,7 @@ pub unsafe fn _mm_insert_epi8(a: __m128i, i: i32, imm8: i32) -> __m128i { #[inline] #[target_feature(enable = "sse4.1")] #[cfg_attr(test, assert_instr(pinsrd, imm8 = 0))] +#[rustc_args_required_const(2)] pub unsafe fn _mm_insert_epi32(a: __m128i, i: i32, imm8: i32) -> __m128i { mem::transmute(simd_insert(a.as_i32x4(), (imm8 & 0b11) as u32, i)) } @@ -420,6 +429,7 @@ pub unsafe fn _mm_cvtepu32_epi64(a: __m128i) -> __m128i { #[inline] #[target_feature(enable = "sse4.1")] #[cfg_attr(test, assert_instr(dppd, imm8 = 0))] +#[rustc_args_required_const(2)] pub unsafe fn _mm_dp_pd(a: __m128d, b: __m128d, imm8: i32) -> __m128d { macro_rules! call { ($imm8:expr) => { dppd(a, b, $imm8) } @@ -437,6 +447,7 @@ pub unsafe fn _mm_dp_pd(a: __m128d, b: __m128d, imm8: i32) -> __m128d { #[inline] #[target_feature(enable = "sse4.1")] #[cfg_attr(test, assert_instr(dpps, imm8 = 0))] +#[rustc_args_required_const(2)] pub unsafe fn _mm_dp_ps(a: __m128, b: __m128, imm8: i32) -> __m128 { macro_rules! call { ($imm8:expr) => { dpps(a, b, $imm8) } @@ -554,6 +565,7 @@ pub unsafe fn _mm_ceil_ss(a: __m128, b: __m128) -> __m128 { #[inline] #[target_feature(enable = "sse4.1")] #[cfg_attr(test, assert_instr(roundpd, rounding = 0))] +#[rustc_args_required_const(1)] pub unsafe fn _mm_round_pd(a: __m128d, rounding: i32) -> __m128d { macro_rules! call { ($imm4:expr) => { roundpd(a, $imm4) } @@ -583,6 +595,7 @@ pub unsafe fn _mm_round_pd(a: __m128d, rounding: i32) -> __m128d { #[inline] #[target_feature(enable = "sse4.1")] #[cfg_attr(test, assert_instr(roundps, rounding = 0))] +#[rustc_args_required_const(1)] pub unsafe fn _mm_round_ps(a: __m128, rounding: i32) -> __m128 { macro_rules! call { ($imm4:expr) => { roundps(a, $imm4) } @@ -614,6 +627,7 @@ pub unsafe fn _mm_round_ps(a: __m128, rounding: i32) -> __m128 { #[inline] #[target_feature(enable = "sse4.1")] #[cfg_attr(test, assert_instr(roundsd, rounding = 0))] +#[rustc_args_required_const(2)] pub unsafe fn _mm_round_sd(a: __m128d, b: __m128d, rounding: i32) -> __m128d { macro_rules! call { ($imm4:expr) => { roundsd(a, b, $imm4) } @@ -645,6 +659,7 @@ pub unsafe fn _mm_round_sd(a: __m128d, b: __m128d, rounding: i32) -> __m128d { #[inline] #[target_feature(enable = "sse4.1")] #[cfg_attr(test, assert_instr(roundss, rounding = 0))] +#[rustc_args_required_const(2)] pub unsafe fn _mm_round_ss(a: __m128, b: __m128, rounding: i32) -> __m128 { macro_rules! call { ($imm4:expr) => { roundss(a, b, $imm4) } @@ -734,6 +749,7 @@ pub unsafe fn _mm_mullo_epi32(a: __m128i, b: __m128i) -> __m128i { #[inline] #[target_feature(enable = "sse4.1")] #[cfg_attr(test, assert_instr(mpsadbw, imm8 = 0))] +#[rustc_args_required_const(2)] pub unsafe fn _mm_mpsadbw_epu8(a: __m128i, b: __m128i, imm8: i32) -> __m128i { let a = a.as_u8x16(); let b = b.as_u8x16(); diff --git a/coresimd/src/x86/i586/sse42.rs b/coresimd/src/x86/i586/sse42.rs index e53836efa2..5a2804349c 100644 --- a/coresimd/src/x86/i586/sse42.rs +++ b/coresimd/src/x86/i586/sse42.rs @@ -51,6 +51,7 @@ pub const _SIDD_UNIT_MASK: i32 = 0b0100_0000; #[inline] #[target_feature(enable = "sse4.2")] #[cfg_attr(test, assert_instr(pcmpistrm, imm8 = 0))] +#[rustc_args_required_const(2)] pub unsafe fn _mm_cmpistrm(a: __m128i, b: __m128i, imm8: i32) -> __m128i { let a = a.as_i8x16(); let b = b.as_i8x16(); @@ -261,6 +262,7 @@ pub unsafe fn _mm_cmpistrm(a: __m128i, b: __m128i, imm8: i32) -> __m128i { #[inline] #[target_feature(enable = "sse4.2")] #[cfg_attr(test, assert_instr(pcmpistri, imm8 = 0))] +#[rustc_args_required_const(2)] pub unsafe fn _mm_cmpistri(a: __m128i, b: __m128i, imm8: i32) -> i32 { let a = a.as_i8x16(); let b = b.as_i8x16(); @@ -276,6 +278,7 @@ pub unsafe fn _mm_cmpistri(a: __m128i, b: __m128i, imm8: i32) -> i32 { #[inline] #[target_feature(enable = "sse4.2")] #[cfg_attr(test, assert_instr(pcmpistri, imm8 = 0))] +#[rustc_args_required_const(2)] pub unsafe fn _mm_cmpistrz(a: __m128i, b: __m128i, imm8: i32) -> i32 { let a = a.as_i8x16(); let b = b.as_i8x16(); @@ -291,6 +294,7 @@ pub unsafe fn _mm_cmpistrz(a: __m128i, b: __m128i, imm8: i32) -> i32 { #[inline] #[target_feature(enable = "sse4.2")] #[cfg_attr(test, assert_instr(pcmpistri, imm8 = 0))] +#[rustc_args_required_const(2)] pub unsafe fn _mm_cmpistrc(a: __m128i, b: __m128i, imm8: i32) -> i32 { let a = a.as_i8x16(); let b = b.as_i8x16(); @@ -306,6 +310,7 @@ pub unsafe fn _mm_cmpistrc(a: __m128i, b: __m128i, imm8: i32) -> i32 { #[inline] #[target_feature(enable = "sse4.2")] #[cfg_attr(test, assert_instr(pcmpistri, imm8 = 0))] +#[rustc_args_required_const(2)] pub unsafe fn _mm_cmpistrs(a: __m128i, b: __m128i, imm8: i32) -> i32 { let a = a.as_i8x16(); let b = b.as_i8x16(); @@ -320,6 +325,7 @@ pub unsafe fn _mm_cmpistrs(a: __m128i, b: __m128i, imm8: i32) -> i32 { #[inline] #[target_feature(enable = "sse4.2")] #[cfg_attr(test, assert_instr(pcmpistri, imm8 = 0))] +#[rustc_args_required_const(2)] pub unsafe fn _mm_cmpistro(a: __m128i, b: __m128i, imm8: i32) -> i32 { let a = a.as_i8x16(); let b = b.as_i8x16(); @@ -335,6 +341,7 @@ pub unsafe fn _mm_cmpistro(a: __m128i, b: __m128i, imm8: i32) -> i32 { #[inline] #[target_feature(enable = "sse4.2")] #[cfg_attr(test, assert_instr(pcmpistri, imm8 = 0))] +#[rustc_args_required_const(2)] pub unsafe fn _mm_cmpistra(a: __m128i, b: __m128i, imm8: i32) -> i32 { let a = a.as_i8x16(); let b = b.as_i8x16(); @@ -349,6 +356,7 @@ pub unsafe fn _mm_cmpistra(a: __m128i, b: __m128i, imm8: i32) -> i32 { #[inline] #[target_feature(enable = "sse4.2")] #[cfg_attr(test, assert_instr(pcmpestrm, imm8 = 0))] +#[rustc_args_required_const(4)] pub unsafe fn _mm_cmpestrm( a: __m128i, la: i32, b: __m128i, lb: i32, imm8: i32 ) -> __m128i { @@ -442,6 +450,7 @@ pub unsafe fn _mm_cmpestrm( #[inline] #[target_feature(enable = "sse4.2")] #[cfg_attr(test, assert_instr(pcmpestri, imm8 = 0))] +#[rustc_args_required_const(4)] pub unsafe fn _mm_cmpestri( a: __m128i, la: i32, b: __m128i, lb: i32, imm8: i32 ) -> i32 { @@ -459,6 +468,7 @@ pub unsafe fn _mm_cmpestri( #[inline] #[target_feature(enable = "sse4.2")] #[cfg_attr(test, assert_instr(pcmpestri, imm8 = 0))] +#[rustc_args_required_const(4)] pub unsafe fn _mm_cmpestrz( a: __m128i, la: i32, b: __m128i, lb: i32, imm8: i32 ) -> i32 { @@ -476,6 +486,7 @@ pub unsafe fn _mm_cmpestrz( #[inline] #[target_feature(enable = "sse4.2")] #[cfg_attr(test, assert_instr(pcmpestri, imm8 = 0))] +#[rustc_args_required_const(4)] pub unsafe fn _mm_cmpestrc( a: __m128i, la: i32, b: __m128i, lb: i32, imm8: i32 ) -> i32 { @@ -493,6 +504,7 @@ pub unsafe fn _mm_cmpestrc( #[inline] #[target_feature(enable = "sse4.2")] #[cfg_attr(test, assert_instr(pcmpestri, imm8 = 0))] +#[rustc_args_required_const(4)] pub unsafe fn _mm_cmpestrs( a: __m128i, la: i32, b: __m128i, lb: i32, imm8: i32 ) -> i32 { @@ -510,6 +522,7 @@ pub unsafe fn _mm_cmpestrs( #[inline] #[target_feature(enable = "sse4.2")] #[cfg_attr(test, assert_instr(pcmpestri, imm8 = 0))] +#[rustc_args_required_const(4)] pub unsafe fn _mm_cmpestro( a: __m128i, la: i32, b: __m128i, lb: i32, imm8: i32 ) -> i32 { @@ -528,6 +541,7 @@ pub unsafe fn _mm_cmpestro( #[inline] #[target_feature(enable = "sse4.2")] #[cfg_attr(test, assert_instr(pcmpestri, imm8 = 0))] +#[rustc_args_required_const(4)] pub unsafe fn _mm_cmpestra( a: __m128i, la: i32, b: __m128i, lb: i32, imm8: i32 ) -> i32 { diff --git a/coresimd/src/x86/i586/ssse3.rs b/coresimd/src/x86/i586/ssse3.rs index 4efcee388d..821d7fad1d 100644 --- a/coresimd/src/x86/i586/ssse3.rs +++ b/coresimd/src/x86/i586/ssse3.rs @@ -74,6 +74,7 @@ pub unsafe fn _mm_shuffle_epi8(a: __m128i, b: __m128i) -> __m128i { #[inline] #[target_feature(enable = "ssse3")] #[cfg_attr(test, assert_instr(palignr, n = 15))] +#[rustc_args_required_const(2)] pub unsafe fn _mm_alignr_epi8(a: __m128i, b: __m128i, n: i32) -> __m128i { let n = n as u32; // If palignr is shifting the pair of vectors more than the size of two diff --git a/coresimd/src/x86/i686/aes.rs b/coresimd/src/x86/i686/aes.rs index ed2251aa81..509a184caa 100644 --- a/coresimd/src/x86/i686/aes.rs +++ b/coresimd/src/x86/i686/aes.rs @@ -75,6 +75,7 @@ pub unsafe fn _mm_aesimc_si128(a: __m128i) -> __m128i { #[inline] #[target_feature(enable = "aes")] #[cfg_attr(test, assert_instr(aeskeygenassist, imm8 = 0))] +#[rustc_args_required_const(1)] pub unsafe fn _mm_aeskeygenassist_si128(a: __m128i, imm8: i32) -> __m128i { macro_rules! call { ($imm8:expr) => (aeskeygenassist(a, $imm8)) diff --git a/coresimd/src/x86/i686/sse.rs b/coresimd/src/x86/i686/sse.rs index bf999c51e0..b6789d540d 100644 --- a/coresimd/src/x86/i686/sse.rs +++ b/coresimd/src/x86/i686/sse.rs @@ -312,6 +312,7 @@ pub unsafe fn _m_maskmovq(a: __m64, mask: __m64, mem_addr: *mut i8) { #[inline] #[target_feature(enable = "sse,mmx")] #[cfg_attr(test, assert_instr(pextrw, imm2 = 0))] +#[rustc_args_required_const(1)] pub unsafe fn _mm_extract_pi16(a: __m64, imm2: i32) -> i32 { macro_rules! call { ($imm2:expr) => { pextrw(a, $imm2) as i32 } @@ -324,8 +325,12 @@ pub unsafe fn _mm_extract_pi16(a: __m64, imm2: i32) -> i32 { #[inline] #[target_feature(enable = "sse,mmx")] #[cfg_attr(test, assert_instr(pextrw, imm2 = 0))] +#[rustc_args_required_const(1)] pub unsafe fn _m_pextrw(a: __m64, imm2: i32) -> i32 { - _mm_extract_pi16(a, imm2) + macro_rules! call { + ($imm2:expr) => { pextrw(a, $imm2) as i32 } + } + constify_imm2!(imm2, call) } /// Copies data from the 64-bit vector of [4 x i16] to the destination, @@ -334,6 +339,7 @@ pub unsafe fn _m_pextrw(a: __m64, imm2: i32) -> i32 { #[inline] #[target_feature(enable = "sse,mmx")] #[cfg_attr(test, assert_instr(pinsrw, imm2 = 0))] +#[rustc_args_required_const(2)] pub unsafe fn _mm_insert_pi16(a: __m64, d: i32, imm2: i32) -> __m64 { macro_rules! call { ($imm2:expr) => { pinsrw(a, d, $imm2) } @@ -347,8 +353,12 @@ pub unsafe fn _mm_insert_pi16(a: __m64, d: i32, imm2: i32) -> __m64 { #[inline] #[target_feature(enable = "sse,mmx")] #[cfg_attr(test, assert_instr(pinsrw, imm2 = 0))] +#[rustc_args_required_const(2)] pub unsafe fn _m_pinsrw(a: __m64, d: i32, imm2: i32) -> __m64 { - _mm_insert_pi16(a, d, imm2) + macro_rules! call { + ($imm2:expr) => { pinsrw(a, d, $imm2) } + } + constify_imm2!(imm2, call) } /// Takes the most significant bit from each 8-bit element in a 64-bit @@ -376,6 +386,7 @@ pub unsafe fn _m_pmovmskb(a: __m64) -> i32 { #[inline] #[target_feature(enable = "sse,mmx")] #[cfg_attr(test, assert_instr(pshufw, imm8 = 0))] +#[rustc_args_required_const(1)] pub unsafe fn _mm_shuffle_pi16(a: __m64, imm8: i32) -> __m64 { macro_rules! call { ($imm8:expr) => { pshufw(a, $imm8) } @@ -388,8 +399,12 @@ pub unsafe fn _mm_shuffle_pi16(a: __m64, imm8: i32) -> __m64 { #[inline] #[target_feature(enable = "sse,mmx")] #[cfg_attr(test, assert_instr(pshufw, imm8 = 0))] +#[rustc_args_required_const(1)] pub unsafe fn _m_pshufw(a: __m64, imm8: i32) -> __m64 { - _mm_shuffle_pi16(a, imm8) + macro_rules! call { + ($imm8:expr) => { pshufw(a, $imm8) } + } + constify_imm8!(imm8, call) } /// Convert the two lower packed single-precision (32-bit) floating-point diff --git a/coresimd/src/x86/i686/ssse3.rs b/coresimd/src/x86/i686/ssse3.rs index c386d8a0a4..ccfd36dede 100644 --- a/coresimd/src/x86/i686/ssse3.rs +++ b/coresimd/src/x86/i686/ssse3.rs @@ -46,6 +46,7 @@ pub unsafe fn _mm_shuffle_pi8(a: __m64, b: __m64) -> __m64 { #[inline] #[target_feature(enable = "ssse3,mmx")] #[cfg_attr(test, assert_instr(palignr, n = 15))] +#[rustc_args_required_const(2)] pub unsafe fn _mm_alignr_pi8(a: __m64, b: __m64, n: i32) -> __m64 { macro_rules! call { ($imm8:expr) => { diff --git a/coresimd/src/x86/x86_64/avx.rs b/coresimd/src/x86/x86_64/avx.rs index 33338bfb26..214d883eb5 100644 --- a/coresimd/src/x86/x86_64/avx.rs +++ b/coresimd/src/x86/x86_64/avx.rs @@ -21,6 +21,7 @@ use x86::*; /// Copy `a` to result, and insert the 64-bit integer `i` into result /// at the location specified by `index`. #[inline] +#[rustc_args_required_const(2)] #[target_feature(enable = "avx")] // This intrinsic has no corresponding instruction. pub unsafe fn _mm256_insert_epi64(a: __m256i, i: i64, index: i32) -> __m256i { diff --git a/coresimd/src/x86/x86_64/avx2.rs b/coresimd/src/x86/x86_64/avx2.rs index 4786ef4d5d..d37897ddc6 100644 --- a/coresimd/src/x86/x86_64/avx2.rs +++ b/coresimd/src/x86/x86_64/avx2.rs @@ -24,6 +24,7 @@ use x86::*; /// Extract a 64-bit integer from `a`, selected with `imm8`. #[inline] #[target_feature(enable = "avx2")] +#[rustc_args_required_const(1)] // This intrinsic has no corresponding instruction. pub unsafe fn _mm256_extract_epi64(a: __m256i, imm8: i32) -> i64 { let imm8 = (imm8 & 3) as u32; diff --git a/coresimd/src/x86/x86_64/sse41.rs b/coresimd/src/x86/x86_64/sse41.rs index cfabb24ccc..5f9158e628 100644 --- a/coresimd/src/x86/x86_64/sse41.rs +++ b/coresimd/src/x86/x86_64/sse41.rs @@ -13,6 +13,7 @@ use stdsimd_test::assert_instr; #[target_feature(enable = "sse4.1")] // TODO: Add test for Windows #[cfg_attr(test, assert_instr(pextrq, imm8 = 1))] +#[rustc_args_required_const(1)] pub unsafe fn _mm_extract_epi64(a: __m128i, imm8: i32) -> i64 { let imm8 = (imm8 & 1) as u32; simd_extract(a.as_i64x2(), imm8) @@ -23,6 +24,7 @@ pub unsafe fn _mm_extract_epi64(a: __m128i, imm8: i32) -> i64 { #[inline] #[target_feature(enable = "sse4.1")] #[cfg_attr(test, assert_instr(pinsrq, imm8 = 0))] +#[rustc_args_required_const(2)] pub unsafe fn _mm_insert_epi64(a: __m128i, i: i64, imm8: i32) -> __m128i { mem::transmute(simd_insert(a.as_i64x2(), (imm8 & 1) as u32, i)) } diff --git a/stdsimd-verify/src/lib.rs b/stdsimd-verify/src/lib.rs index 5e55e9d296..c0a60a5aa2 100644 --- a/stdsimd-verify/src/lib.rs +++ b/stdsimd-verify/src/lib.rs @@ -4,6 +4,7 @@ extern crate proc_macro; extern crate proc_macro2; #[macro_use] extern crate quote; +#[macro_use] extern crate syn; use std::path::Path; @@ -77,6 +78,7 @@ pub fn x86_functions(input: TokenStream) -> TokenStream { Some(i) => my_quote! { Some(#i) }, None => my_quote! { None }, }; + let required_const = find_required_const(&f.attrs); my_quote! { Function { name: stringify!(#name), @@ -85,6 +87,7 @@ pub fn x86_functions(input: TokenStream) -> TokenStream { target_feature: #target_feature, instrs: &[#(stringify!(#instrs)),*], file: stringify!(#path), + required_const: &[#(#required_const),*], } } }) @@ -236,3 +239,29 @@ fn find_target_feature(attrs: &[syn::Attribute]) -> Option { }) .next() } + +fn find_required_const(attrs: &[syn::Attribute]) -> Vec { + attrs.iter() + .filter(|a| a.path.segments[0].ident == "rustc_args_required_const") + .map(|a| a.tts.clone()) + .map(|a| syn::parse::(a.into()).unwrap()) + .flat_map(|a| a.args) + .collect() +} + +struct RustcArgsRequiredConst { + args: Vec, +} + +impl syn::synom::Synom for RustcArgsRequiredConst { + named!(parse -> Self, do_parse!( + items: parens!( + call!(syn::punctuated::Punctuated::::parse_terminated) + ) >> + (RustcArgsRequiredConst { + args: items.1.into_iter() + .map(|a| a.value() as usize) + .collect(), + }) + )); +} diff --git a/stdsimd-verify/tests/x86-intel.rs b/stdsimd-verify/tests/x86-intel.rs index d53fae30af..2d4b5f1f85 100644 --- a/stdsimd-verify/tests/x86-intel.rs +++ b/stdsimd-verify/tests/x86-intel.rs @@ -25,9 +25,9 @@ struct Function { target_feature: Option<&'static str>, instrs: &'static [&'static str], file: &'static str, + required_const: &'static [usize], } -static BOOL: Type = Type::Bool; static F32: Type = Type::PrimFloat(32); static F64: Type = Type::PrimFloat(64); static I16: Type = Type::PrimSigned(16); @@ -63,7 +63,6 @@ enum Type { M256, M256D, M256I, - Bool, Tuple, CpuidResult, } @@ -301,7 +300,7 @@ fn matches(rust: &Function, intel: &Intrinsic) -> Result<(), String> { // Make sure we've got the right return type. if let Some(t) = rust.ret { - equate(t, &intel.rettype, rust.name)?; + equate(t, &intel.rettype, rust.name, false)?; } else if intel.rettype != "" && intel.rettype != "void" { bail!( "{} returns `{}` with intel, void in rust", @@ -321,8 +320,9 @@ fn matches(rust: &Function, intel: &Intrinsic) -> Result<(), String> { if rust.arguments.len() != intel.parameters.len() { bail!("wrong number of arguments on {}", rust.name) } - for (a, b) in intel.parameters.iter().zip(rust.arguments) { - equate(b, &a.type_, &intel.name)?; + for (i, (a, b)) in intel.parameters.iter().zip(rust.arguments).enumerate() { + let is_const = rust.required_const.contains(&i); + equate(b, &a.type_, &intel.name, is_const)?; } } @@ -361,16 +361,25 @@ fn matches(rust: &Function, intel: &Intrinsic) -> Result<(), String> { Ok(()) } -fn equate(t: &Type, intel: &str, intrinsic: &str) -> Result<(), String> { +fn equate(t: &Type, + intel: &str, + intrinsic: &str, + is_const: bool) -> Result<(), String> { let intel = intel.replace(" *", "*"); let intel = intel.replace(" const*", "*"); + let require_const = || { + if is_const { + return Ok(()) + } + Err(format!("argument required to be const but isn't")) + }; match (t, &intel[..]) { (&Type::PrimFloat(32), "float") => {} (&Type::PrimFloat(64), "double") => {} (&Type::PrimSigned(16), "__int16") => {} (&Type::PrimSigned(16), "short") => {} (&Type::PrimSigned(32), "__int32") => {} - (&Type::PrimSigned(32), "const int") => {} + (&Type::PrimSigned(32), "const int") => require_const()?, (&Type::PrimSigned(32), "int") => {} (&Type::PrimSigned(64), "__int64") => {} (&Type::PrimSigned(64), "long long") => {}