Skip to content

Commit eb5177c

Browse files
jackmottBurntSushi
authored andcommitted
Start on AVX (#3)
start on avx
1 parent dcd1712 commit eb5177c

File tree

4 files changed

+83
-11
lines changed

4 files changed

+83
-11
lines changed

Diff for: TODO.md

+3-3
Original file line numberDiff line numberDiff line change
@@ -540,9 +540,9 @@ sse4.2
540540

541541
avx
542542
---
543-
* [ ] `_mm256_add_pd`
544-
* [ ] `_mm256_add_ps`
545-
* [ ] `_mm256_addsub_pd`
543+
* [x] `_mm256_add_pd`
544+
* [x] `_mm256_add_ps`
545+
* [x] `_mm256_addsub_pd`
546546
* [ ] `_mm256_addsub_ps`
547547
* [ ] `_mm256_and_pd`
548548
* [ ] `_mm256_and_ps`

Diff for: src/x86/avx.rs

+71
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,71 @@
1+
use v256::*;
2+
3+
/// Add packed double-precision (64-bit) floating-point elements
4+
/// in `a` and `b`.
5+
#[inline(always)]
6+
#[target_feature = "+avx"]
7+
pub fn _mm256_add_pd(a: f64x4, b: f64x4) -> f64x4 {
8+
a + b
9+
}
10+
11+
/// Add packed single-precision (32-bit) floating-point elements in `a` and `b`.
12+
#[inline(always)]
13+
#[target_feature = "+avx"]
14+
pub fn _mm256_add_ps(a: f32x8, b: f32x8) -> f32x8 {
15+
a + b
16+
}
17+
18+
/// Alternatively add and subtract packed double-precision (64-bit)
19+
/// floating-point elements in `a` to/from packed elements in `b`.
20+
#[inline(always)]
21+
#[target_feature = "+avx"]
22+
pub fn _mm256_addsub_pd(a: f64x4, b: f64x4) -> f64x4 {
23+
unsafe { addsubpd256(a, b) }
24+
}
25+
26+
27+
#[allow(improper_ctypes)]
28+
extern "C" {
29+
#[link_name = "llvm.x86.avx.addsub.pd.256"]
30+
fn addsubpd256(a: f64x4, b:f64x4) -> f64x4;
31+
}
32+
33+
34+
#[cfg(test)]
35+
mod tests {
36+
use v256::*;
37+
use x86::avx;
38+
39+
#[test]
40+
#[target_feature = "+avx"]
41+
fn _mm256_add_pd() {
42+
let a = f64x4::new(1.0, 2.0, 3.0, 4.0);
43+
let b = f64x4::new(5.0, 6.0, 7.0, 8.0);
44+
let r = avx::_mm256_add_pd(a, b);
45+
let e = f64x4::new(6.0, 8.0, 10.0, 12.0);
46+
assert_eq!(r, e);
47+
}
48+
49+
#[test]
50+
#[target_feature = "+avx"]
51+
fn _mm256_add_ps() {
52+
let a = f32x8::new(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
53+
let b = f32x8::new(9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
54+
let r = avx::_mm256_add_ps(a, b);
55+
let e = f32x8::new(10.0, 12.0, 14.0, 16.0, 18.0, 20.0, 22.0, 24.0);
56+
assert_eq!(r, e);
57+
}
58+
59+
#[test]
60+
#[target_feature = "+avx"]
61+
fn _mm256_addsub_pd() {
62+
let a = f64x4::new(1.0, 2.0, 3.0, 4.0);
63+
let b = f64x4::new(5.0, 6.0, 7.0, 8.0);
64+
let r = avx::_mm256_addsub_pd(a, b);
65+
let e = f64x4::new(-4.0,8.0,-4.0,12.0);
66+
assert_eq!(r, e);
67+
}
68+
69+
70+
71+
}

Diff for: src/x86/avx2.rs

+7-8
Original file line numberDiff line numberDiff line change
@@ -568,7 +568,7 @@ mod tests {
568568
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
569569
16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31);
570570
let b = i8x32::new(
571-
31, 30, 2, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16,
571+
31, 30, 2, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16,
572572
15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
573573
let r = avx2::_mm256_cmpeq_epi8(a, b);
574574
assert_eq!(r, i8x32::splat(0).replace(2,0xFFu8 as i8));
@@ -641,7 +641,7 @@ mod tests {
641641
let b = i16x16::splat(4);
642642
let r = avx2::_mm256_hadd_epi16(a, b);
643643
let e = i16x16::new(4, 4, 4, 4, 8, 8, 8, 8, 4, 4, 4, 4, 8, 8, 8, 8);
644-
assert_eq!(r,e);
644+
assert_eq!(r, e);
645645
}
646646

647647
#[test]
@@ -651,7 +651,7 @@ mod tests {
651651
let b = i32x8::splat(4);
652652
let r = avx2::_mm256_hadd_epi32(a, b);
653653
let e = i32x8::new(4, 4, 8, 8, 4, 4, 8, 8);
654-
assert_eq!(r,e);
654+
assert_eq!(r, e);
655655
}
656656

657657
#[test]
@@ -662,7 +662,7 @@ mod tests {
662662
let r = avx2::_mm256_hadds_epi16(a, b);
663663
let e = i16x16::new(
664664
0x7FFF, 4, 4, 4, 8, 8, 8, 8, 4, 4, 4, 4, 8, 8, 8, 8);
665-
assert_eq!(r,e);
665+
assert_eq!(r, e);
666666
}
667667

668668
#[test]
@@ -672,7 +672,7 @@ mod tests {
672672
let b = i16x16::splat(4);
673673
let r = avx2::_mm256_hsub_epi16(a, b);
674674
let e = i16x16::splat(0);
675-
assert_eq!(r,e);
675+
assert_eq!(r, e);
676676
}
677677

678678
#[test]
@@ -682,7 +682,7 @@ mod tests {
682682
let b = i32x8::splat(4);
683683
let r = avx2::_mm256_hsub_epi32(a, b);
684684
let e = i32x8::splat(0);
685-
assert_eq!(r,e);
685+
assert_eq!(r, e);
686686
}
687687

688688
#[test]
@@ -692,8 +692,7 @@ mod tests {
692692
let b = i16x16::splat(4);
693693
let r = avx2::_mm256_hsubs_epi16(a, b);
694694
let e = i16x16::splat(0).replace(0,0x7FFF);
695-
assert_eq!(r,e);
695+
assert_eq!(r, e);
696696
}
697697

698-
699698
}

Diff for: src/x86/mod.rs

+2
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ pub use self::sse::*;
22
pub use self::sse2::*;
33
pub use self::ssse3::*;
44
pub use self::sse42::*;
5+
pub use self::avx::*;
56
pub use self::avx2::*;
67

78
#[allow(non_camel_case_types)]
@@ -13,4 +14,5 @@ mod sse;
1314
mod sse2;
1415
mod ssse3;
1516
mod sse42;
17+
mod avx;
1618
mod avx2;

0 commit comments

Comments
 (0)