diff --git a/Cargo.toml b/Cargo.toml index 56de806b0..65f3d3693 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -40,7 +40,7 @@ approx-0_5 = { package = "approx", version = "0.5", optional = true , default-fe cblas-sys = { version = "0.1.4", optional = true, default-features = false } libc = { version = "0.2.82", optional = true } -matrixmultiply = { version = "0.3.0", default-features = false} +matrixmultiply = { version = "0.3.2", default-features = false, features=["cgemm"] } serde = { version = "1.0", optional = true, default-features = false, features = ["alloc"] } rawpointer = { version = "0.2" } diff --git a/benches/gemv.rs b/benches/gemv_gemm.rs similarity index 61% rename from benches/gemv.rs rename to benches/gemv_gemm.rs index 4bca08319..cfa14beac 100644 --- a/benches/gemv.rs +++ b/benches/gemv_gemm.rs @@ -9,8 +9,13 @@ extern crate test; use test::Bencher; +use num_complex::Complex; +use num_traits::{Float, One, Zero}; + use ndarray::prelude::*; +use ndarray::LinalgScalar; +use ndarray::linalg::general_mat_mul; use ndarray::linalg::general_mat_vec_mul; #[bench] @@ -45,3 +50,27 @@ fn gemv_64_32(bench: &mut Bencher) { general_mat_vec_mul(1.0, &a, &x, 1.0, &mut y); }); } + +#[bench] +fn cgemm_100(bench: &mut Bencher) { + cgemm_bench::(100, bench); +} + +#[bench] +fn zgemm_100(bench: &mut Bencher) { + cgemm_bench::(100, bench); +} + +fn cgemm_bench(size: usize, bench: &mut Bencher) +where + A: LinalgScalar + Float, +{ + let (m, k, n) = (size, size, size); + let a = Array::, _>::zeros((m, k)); + + let x = Array::zeros((k, n)); + let mut y = Array::zeros((m, n)); + bench.iter(|| { + general_mat_mul(Complex::one(), &a, &x, Complex::zero(), &mut y); + }); +} diff --git a/src/lib.rs b/src/lib.rs index 8cf23e915..aadef4c9c 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -15,7 +15,6 @@ clippy::manual_map, // is not an error clippy::while_let_on_iterator, // is not an error clippy::from_iter_instead_of_collect, // using from_iter is good style - clippy::if_then_panic, // is not an error clippy::redundant_closure, // false positives clippy #7812 )] #![doc(test(attr(deny(warnings))))] diff --git a/src/linalg/impl_linalg.rs b/src/linalg/impl_linalg.rs index 0e6700d65..d953dfdb4 100644 --- a/src/linalg/impl_linalg.rs +++ b/src/linalg/impl_linalg.rs @@ -18,6 +18,9 @@ use std::any::TypeId; use std::mem::MaybeUninit; use alloc::vec::Vec; +use num_complex::Complex; +use num_complex::{Complex32 as c32, Complex64 as c64}; + #[cfg(feature = "blas")] use libc::c_int; #[cfg(feature = "blas")] @@ -30,9 +33,6 @@ use cblas_sys as blas_sys; #[cfg(feature = "blas")] use cblas_sys::{CblasNoTrans, CblasRowMajor, CblasTrans, CBLAS_LAYOUT}; -#[cfg(feature = "blas")] -use num_complex::{Complex32 as c32, Complex64 as c64}; - /// len of vector before we use blas #[cfg(feature = "blas")] const DOT_BLAS_CUTOFF: usize = 32; @@ -505,7 +505,7 @@ fn mat_mul_general( let (rsc, csc) = (c.strides()[0], c.strides()[1]); if same_type::() { unsafe { - ::matrixmultiply::sgemm( + matrixmultiply::sgemm( m, k, n, @@ -524,7 +524,7 @@ fn mat_mul_general( } } else if same_type::() { unsafe { - ::matrixmultiply::dgemm( + matrixmultiply::dgemm( m, k, n, @@ -541,6 +541,48 @@ fn mat_mul_general( csc, ); } + } else if same_type::() { + unsafe { + matrixmultiply::cgemm( + matrixmultiply::CGemmOption::Standard, + matrixmultiply::CGemmOption::Standard, + m, + k, + n, + complex_array(cast_as(&alpha)), + ap as *const _, + lhs.strides()[0], + lhs.strides()[1], + bp as *const _, + rhs.strides()[0], + rhs.strides()[1], + complex_array(cast_as(&beta)), + cp as *mut _, + rsc, + csc, + ); + } + } else if same_type::() { + unsafe { + matrixmultiply::zgemm( + matrixmultiply::CGemmOption::Standard, + matrixmultiply::CGemmOption::Standard, + m, + k, + n, + complex_array(cast_as(&alpha)), + ap as *const _, + lhs.strides()[0], + lhs.strides()[1], + bp as *const _, + rhs.strides()[0], + rhs.strides()[1], + complex_array(cast_as(&beta)), + cp as *mut _, + rsc, + csc, + ); + } } else { // It's a no-op if `c` has zero length. if c.is_empty() { @@ -768,10 +810,17 @@ fn same_type() -> bool { // // **Panics** if `A` and `B` are not the same type fn cast_as(a: &A) -> B { - assert!(same_type::()); + assert!(same_type::(), "expect type {} and {} to match", + std::any::type_name::(), std::any::type_name::()); unsafe { ::std::ptr::read(a as *const _ as *const B) } } +/// Return the complex in the form of an array [re, im] +#[inline] +fn complex_array(z: Complex) -> [A; 2] { + [z.re, z.im] +} + #[cfg(feature = "blas")] fn blas_compat_1d(a: &ArrayBase) -> bool where diff --git a/xtest-numeric/Cargo.toml b/xtest-numeric/Cargo.toml index 14bbd16ee..7a6c3cedd 100644 --- a/xtest-numeric/Cargo.toml +++ b/xtest-numeric/Cargo.toml @@ -3,6 +3,7 @@ name = "numeric-tests" version = "0.1.0" authors = ["bluss"] publish = false +edition = "2018" [dependencies] approx = "0.4" @@ -17,6 +18,10 @@ openblas-src = { optional = true, version = "0.10", default-features = false, fe version = "0.8.0" features = ["small_rng"] +[dev-dependencies] +num-traits = { version = "0.2.14", default-features = false } +num-complex = { version = "0.4", default-features = false } + [lib] test = false diff --git a/xtest-numeric/tests/accuracy.rs b/xtest-numeric/tests/accuracy.rs index 438b73705..1f8e9a82c 100644 --- a/xtest-numeric/tests/accuracy.rs +++ b/xtest-numeric/tests/accuracy.rs @@ -6,6 +6,8 @@ extern crate rand; extern crate numeric_tests; +use std::fmt; + use ndarray_rand::RandomExt; use rand::{Rng, SeedableRng}; use rand::rngs::SmallRng; @@ -17,10 +19,28 @@ use ndarray::{ }; use ndarray::linalg::general_mat_mul; -use rand_distr::Normal; +use rand_distr::{Normal, StandardNormal, Distribution}; +use num_traits::{Float, AsPrimitive}; +use num_complex::Complex; use approx::{assert_abs_diff_eq, assert_relative_eq}; +fn kahan_sum(iter: impl Iterator) -> A + where A: LinalgScalar +{ + let mut sum = A::zero(); + let mut compensation = A::zero(); + + for elt in iter { + let y = elt - compensation; + let t = sum + y; + compensation = (t - sum) - y; + sum = t; + } + + sum +} + // simple, slow, correct (hopefully) mat mul fn reference_mat_mul(lhs: &ArrayBase, rhs: &ArrayBase) -> Array @@ -29,46 +49,48 @@ fn reference_mat_mul(lhs: &ArrayBase, rhs: &ArrayBase S2: Data, { let ((m, k), (_, n)) = (lhs.dim(), rhs.dim()); - let mut res_elems = Vec::::with_capacity(m * n); - unsafe { - res_elems.set_len(m * n); - } + let mut res_elems = Array::zeros(m * n); let mut i = 0; let mut j = 0; for rr in &mut res_elems { - unsafe { - *rr = (0..k).fold(A::zero(), - move |s, x| s + *lhs.uget((i, x)) * *rhs.uget((x, j))); - } + let lhs_i = lhs.row(i); + let rhs_j = rhs.column(j); + *rr = kahan_sum((0..k).map(move |x| lhs_i[x] * rhs_j[x])); + j += 1; if j == n { j = 0; i += 1; } } - unsafe { - ArrayBase::from_shape_vec_unchecked((m, n), res_elems) - } + + res_elems.into_shape((m, n)).unwrap() } -fn gen(d: D) -> Array +fn gen(d: D, rng: &mut SmallRng) -> Array where D: Dimension, + A: Float, + StandardNormal: Distribution, { - Array::random(d, Normal::new(0., 1.).unwrap()) + Array::random_using(d, Normal::new(A::zero(), A::one()).unwrap(), rng) } -fn gen_f64(d: D) -> Array + +fn gen_complex(d: D, rng: &mut SmallRng) -> Array, D> where D: Dimension, + A: Float, + StandardNormal: Distribution, { - Array::random(d, Normal::new(0., 1.).unwrap()) + gen(d.clone(), rng).mapv(Complex::from) + gen(d, rng).mapv(|x| Complex::new(A::zero(), x)) } #[test] fn accurate_eye_f32() { + let rng = &mut SmallRng::from_entropy(); for i in 0..20 { let eye = Array::eye(i); for j in 0..20 { - let a = gen(Ix2(i, j)); + let a = gen::(Ix2(i, j), rng); let a2 = eye.dot(&a); assert_abs_diff_eq!(a, a2, epsilon = 1e-6); let a3 = a.t().dot(&eye); @@ -76,12 +98,11 @@ fn accurate_eye_f32() { } } // pick a few random sizes - let mut rng = SmallRng::from_entropy(); for _ in 0..10 { let i = rng.gen_range(15..512); let j = rng.gen_range(15..512); println!("Testing size {} by {}", i, j); - let a = gen(Ix2(i, j)); + let a = gen::(Ix2(i, j), rng); let eye = Array::eye(i); let a2 = eye.dot(&a); assert_abs_diff_eq!(a, a2, epsilon = 1e-6); @@ -92,11 +113,12 @@ fn accurate_eye_f32() { #[test] fn accurate_eye_f64() { + let rng = &mut SmallRng::from_entropy(); let abs_tol = 1e-15; for i in 0..20 { let eye = Array::eye(i); for j in 0..20 { - let a = gen_f64(Ix2(i, j)); + let a = gen::(Ix2(i, j), rng); let a2 = eye.dot(&a); assert_abs_diff_eq!(a, a2, epsilon = abs_tol); let a3 = a.t().dot(&eye); @@ -104,12 +126,11 @@ fn accurate_eye_f64() { } } // pick a few random sizes - let mut rng = SmallRng::from_entropy(); for _ in 0..10 { let i = rng.gen_range(15..512); let j = rng.gen_range(15..512); println!("Testing size {} by {}", i, j); - let a = gen_f64(Ix2(i, j)); + let a = gen::(Ix2(i, j), rng); let eye = Array::eye(i); let a2 = eye.dot(&a); assert_abs_diff_eq!(a, a2, epsilon = 1e-6); @@ -119,114 +140,125 @@ fn accurate_eye_f64() { } #[test] -fn accurate_mul_f32() { - // pick a few random sizes - let mut rng = SmallRng::from_entropy(); - for i in 0..20 { - let m = rng.gen_range(15..512); - let k = rng.gen_range(15..512); - let n = rng.gen_range(15..1560); - let a = gen(Ix2(m, k)); - let b = gen(Ix2(n, k)); - let b = b.t(); - let (a, b) = if i > 10 { - (a.slice(s![..;2, ..;2]), - b.slice(s![..;2, ..;2])) - } else { (a.view(), b) }; - - println!("Testing size {} by {} by {}", a.shape()[0], a.shape()[1], b.shape()[1]); - let c = a.dot(&b); - let reference = reference_mat_mul(&a, &b); - - assert_relative_eq!(c, reference, epsilon = 1e-4, max_relative = 1e-3); - } +fn accurate_mul_f32_dot() { + accurate_mul_float_general::(1e-5, false); } #[test] fn accurate_mul_f32_general() { - // pick a few random sizes - let mut rng = SmallRng::from_entropy(); - for i in 0..20 { - let m = rng.gen_range(15..512); - let k = rng.gen_range(15..512); - let n = rng.gen_range(15..1560); - let a = gen(Ix2(m, k)); - let b = gen(Ix2(n, k)); - let mut c = gen(Ix2(m, n)); - let b = b.t(); - let (a, b, mut c) = if i > 10 { - (a.slice(s![..;2, ..;2]), - b.slice(s![..;2, ..;2]), - c.slice_mut(s![..;2, ..;2])) - } else { (a.view(), b, c.view_mut()) }; - - println!("Testing size {} by {} by {}", a.shape()[0], a.shape()[1], b.shape()[1]); - general_mat_mul(1., &a, &b, 0., &mut c); - let reference = reference_mat_mul(&a, &b); - - assert_relative_eq!(c, reference, epsilon = 1e-4, max_relative = 1e-3); - } + accurate_mul_float_general::(1e-5, true); +} + +#[test] +fn accurate_mul_f64_dot() { + accurate_mul_float_general::(1e-14, false); } #[test] -fn accurate_mul_f64() { +fn accurate_mul_f64_general() { + accurate_mul_float_general::(1e-14, true); +} + +/// Generate random sized matrices using the given generator function. +/// Compute gemm using either .dot() (if use_general is false) otherwise general_mat_mul. +/// Return tuple of actual result matrix and reference matrix, which should be equal. +fn random_matrix_mul(rng: &mut SmallRng, use_stride: bool, use_general: bool, + generator: fn(Ix2, &mut SmallRng) -> Array2) + -> (Array2, Array2) + where A: LinalgScalar, +{ + let m = rng.gen_range(15..512); + let k = rng.gen_range(15..512); + let n = rng.gen_range(15..1560); + let a = generator(Ix2(m, k), rng); + let b = generator(Ix2(n, k), rng); + let c = if use_general { + Some(generator(Ix2(m, n), rng)) + } else { + None + }; + + let b = b.t(); + let (a, b, mut c) = if use_stride { + (a.slice(s![..;2, ..;2]), + b.slice(s![..;2, ..;2]), + c.map(|c_| c_.slice_move(s![..;2, ..;2]))) + } else { + (a.view(), + b, + c) + }; + + println!("Testing size {} by {} by {}", a.shape()[0], a.shape()[1], b.shape()[1]); + if let Some(c) = &mut c { + general_mat_mul(A::one(), &a, &b, A::zero(), c); + } else { + c = Some(a.dot(&b)); + } + let c = c.unwrap(); + let reference = reference_mat_mul(&a, &b); + + (c, reference) +} + +fn accurate_mul_float_general(limit: f64, use_general: bool) + where A: Float + Copy + 'static + AsPrimitive, + StandardNormal: Distribution, + A: fmt::Debug, +{ // pick a few random sizes let mut rng = SmallRng::from_entropy(); for i in 0..20 { - let m = rng.gen_range(15..512); - let k = rng.gen_range(15..512); - let n = rng.gen_range(15..1560); - let a = gen_f64(Ix2(m, k)); - let b = gen_f64(Ix2(n, k)); - let b = b.t(); - let (a, b) = if i > 10 { - (a.slice(s![..;2, ..;2]), - b.slice(s![..;2, ..;2])) - } else { (a.view(), b) }; - - println!("Testing size {} by {} by {}", a.shape()[0], a.shape()[1], b.shape()[1]); - let c = a.dot(&b); - let reference = reference_mat_mul(&a, &b); - - assert_relative_eq!(c, reference, epsilon = 1e-12, max_relative = 1e-7); + let (c, reference) = random_matrix_mul(&mut rng, i > 10, use_general, gen::); + + let diff = &c - &reference; + let max_diff = diff.iter().copied().fold(A::zero(), A::max); + let max_elt = reference.iter().copied().fold(A::zero(), A::max); + println!("Max elt diff={:?}, max={:?}, ratio={:.4e}", max_diff, max_elt, (max_diff/max_elt).as_()); + assert!((max_diff / max_elt).as_() < limit, + "Expected relative norm diff < {:e}, found {:?} / {:?}", limit, max_diff, max_elt); } } #[test] -fn accurate_mul_f64_general() { +fn accurate_mul_complex32() { + accurate_mul_complex_general::(1e-5); +} + +#[test] +fn accurate_mul_complex64() { + accurate_mul_complex_general::(1e-14); +} + +fn accurate_mul_complex_general(limit: f64) + where A: Float + Copy + 'static + AsPrimitive, + StandardNormal: Distribution, + A: fmt::Debug, +{ // pick a few random sizes let mut rng = SmallRng::from_entropy(); for i in 0..20 { - let m = rng.gen_range(15..512); - let k = rng.gen_range(15..512); - let n = rng.gen_range(15..1560); - let a = gen_f64(Ix2(m, k)); - let b = gen_f64(Ix2(n, k)); - let mut c = gen_f64(Ix2(m, n)); - let b = b.t(); - let (a, b, mut c) = if i > 10 { - (a.slice(s![..;2, ..;2]), - b.slice(s![..;2, ..;2]), - c.slice_mut(s![..;2, ..;2])) - } else { (a.view(), b, c.view_mut()) }; - - println!("Testing size {} by {} by {}", a.shape()[0], a.shape()[1], b.shape()[1]); - general_mat_mul(1., &a, &b, 0., &mut c); - let reference = reference_mat_mul(&a, &b); - - assert_relative_eq!(c, reference, epsilon = 1e-12, max_relative = 1e-7); + let (c, reference) = random_matrix_mul(&mut rng, i > 10, true, gen_complex::); + + let diff = &c - &reference; + let max_elt = |elt: &Complex<_>| A::max(A::abs(elt.re), A::abs(elt.im)); + let max_diff = diff.iter().map(max_elt).fold(A::zero(), A::max); + let max_elt = reference.iter().map(max_elt).fold(A::zero(), A::max); + println!("Max elt diff={:?}, max={:?}, ratio={:.4e}", max_diff, max_elt, (max_diff/max_elt).as_()); + assert!((max_diff / max_elt).as_() < limit, + "Expected relative norm diff < {:e}, found {:?} / {:?}", limit, max_diff, max_elt); } } #[test] fn accurate_mul_with_column_f64() { // pick a few random sizes - let mut rng = SmallRng::from_entropy(); + let rng = &mut SmallRng::from_entropy(); for i in 0..10 { let m = rng.gen_range(1..350); let k = rng.gen_range(1..350); - let a = gen_f64(Ix2(m, k)); - let b_owner = gen_f64(Ix2(k, k)); + let a = gen::(Ix2(m, k), rng); + let b_owner = gen::(Ix2(k, k), rng); let b_row_col; let b_sq;