Remove duplicate public functions

QuState · Mar 20, 2024 · 8b2bf5d · 8b2bf5d
1 parent 1d3a1b3
commit 8b2bf5d
Show file tree

Hide file tree

Showing 3 changed files with 190 additions and 239 deletions.
diff --git a/src/lib.rs b/src/lib.rs
@@ -13,7 +13,7 @@ use crate::kernels::{
     fft_32_chunk_n_simd, fft_64_chunk_n_simd, fft_chunk_2, fft_chunk_4, fft_chunk_n,
 };
 use crate::options::Options;
-use crate::planner::{Direction, Planner};
+use crate::planner::{Direction, Planner32, Planner64};
 use crate::twiddles::filter_twiddles;
 
 pub mod cobra;
@@ -22,177 +22,124 @@ pub mod options;
 pub mod planner;
 mod twiddles;
 
-/// FFT -- Decimation in Frequency. This is just the decimation-in-time algorithm, reversed.
-/// This call to FFT is run, in-place.
-/// The input should be provided in normal order, and then the modified input is bit-reversed.
-///
-/// # Panics
-///
-/// Panics if `reals.len() != imags.len()`
-///
-/// ## References
-/// <https://inst.eecs.berkeley.edu/~ee123/sp15/Notes/Lecture08_FFT_and_SpectAnalysis.key.pdf>
-pub fn fft_64(reals: &mut [f64], imags: &mut [f64], direction: Direction) {
-    assert_eq!(
-        reals.len(),
-        imags.len(),
-        "real and imaginary inputs must be of equal size, but got: {} {}",
-        reals.len(),
-        imags.len()
-    );
-
-    let mut planner = Planner::new(reals.len(), direction);
-    assert!(planner.num_twiddles().is_power_of_two() && planner.num_twiddles() == reals.len() / 2);
-
-    let opts = Options::guess_options(reals.len());
-    fft_64_with_opts_and_plan(reals, imags, &opts, &mut planner);
-}
-
-/// FFT -- Decimation in Frequency. This is just the decimation-in-time algorithm, reversed.
-/// This call to FFT is run, in-place.
-/// The input should be provided in normal order, and then the modified input is bit-reversed.
-///
-/// # Panics
-///
-/// Panics if `reals.len() != imags.len()`
-///
-/// ## References
-/// <https://inst.eecs.berkeley.edu/~ee123/sp15/Notes/Lecture08_FFT_and_SpectAnalysis.key.pdf>
-pub fn fft_32(reals: &mut [f32], imags: &mut [f32], direction: Direction) {
-    assert_eq!(
-        reals.len(),
-        imags.len(),
-        "real and imaginary inputs must be of equal size, but got: {} {}",
-        reals.len(),
-        imags.len()
-    );
-
-    let mut planner = Planner::new(reals.len(), direction);
-    assert!(planner.num_twiddles().is_power_of_two() && planner.num_twiddles() == reals.len() / 2);
-
-    let opts = Options::guess_options(reals.len());
-    fft_32_with_opts_and_plan(reals, imags, &opts, &mut planner);
+macro_rules! impl_fft_for {
+    ($func_name:ident, $precision:ty, $planner:ty, $opts_and_plan:ident) => {
+        /// FFT -- Decimation in Frequency. This is just the decimation-in-time algorithm, reversed.
+        /// This call to FFT is run, in-place.
+        /// The input should be provided in normal order, and then the modified input is bit-reversed.
+        ///
+        /// # Panics
+        ///
+        /// Panics if `reals.len() != imags.len()` or if `reals.len()` and `imags.len()` are not a power of
+        /// 2
+        ///
+        /// ## References
+        /// <https://inst.eecs.berkeley.edu/~ee123/sp15/Notes/Lecture08_FFT_and_SpectAnalysis.key.pdf>
+        pub fn $func_name(
+            reals: &mut [$precision],
+            imags: &mut [$precision],
+            direction: Direction,
+        ) {
+            assert_eq!(
+                reals.len(),
+                imags.len(),
+                "real and imaginary inputs must be of equal size, but got: {} {}",
+                reals.len(),
+                imags.len()
+            );
+
+            let mut planner = <$planner>::new(reals.len(), direction);
+            assert!(
+                planner.num_twiddles().is_power_of_two()
+                    && planner.num_twiddles() == reals.len() / 2
+            );
+
+            let opts = Options::guess_options(reals.len());
+            $opts_and_plan(reals, imags, &opts, &mut planner);
+        }
+    };
 }
 
-/// Same as [fft], but also accepts [`Options`] that control optimization strategies, as well as
-/// a [`Planner`] in the case that this FFT will need to be run multiple times.
-///
-/// `fft` automatically guesses the best strategy for a given input,
-/// so you only need to call this if you are tuning performance for a specific hardware platform.
-///
-/// In addition, `fft` automatically creates a planner to be used. In the case that you plan
-/// on running an FFT many times on inputs of the same size, use this function with the pre-built
-/// [`Planner`].
-///
-/// # Panics
-///
-/// Panics if `reals.len() != imags.len()`, or if the input length is *not* a power of two.
-pub fn fft_32_with_opts_and_plan(
-    reals: &mut [f32],
-    imags: &mut [f32],
-    opts: &Options,
-    planner: &mut Planner<f32>,
-) {
-    assert!(reals.len() == imags.len() && reals.len().is_power_of_two());
-    let n: usize = reals.len().ilog2() as usize;
-
-    let twiddles_re = &mut planner.twiddles_re;
-    let twiddles_im = &mut planner.twiddles_im;
-
-    // We shouldn't be able to execute FFT if the # of twiddles isn't equal to the distance
-    // between pairs
-    assert!(twiddles_re.len() == reals.len() / 2 && twiddles_im.len() == imags.len() / 2);
-
-    for t in (0..n).rev() {
-        let dist = 1 << t;
-        let chunk_size = dist << 1;
-
-        if chunk_size > 4 {
-            if t < n - 1 {
-                filter_twiddles(twiddles_re, twiddles_im);
+impl_fft_for!(fft_64, f64, Planner64, fft_64_with_opts_and_plan);
+impl_fft_for!(fft_32, f32, Planner32, fft_32_with_opts_and_plan);
+
+macro_rules! impl_fft_with_opts_and_plan_for {
+    ($func_name:ident, $precision:ty, $planner:ty, $simd_butterfly_kernel:ident) => {
+        /// Same as [fft], but also accepts [`Options`] that control optimization strategies, as well as
+        /// a [`Planner`] in the case that this FFT will need to be run multiple times.
+        ///
+        /// `fft` automatically guesses the best strategy for a given input,
+        /// so you only need to call this if you are tuning performance for a specific hardware platform.
+        ///
+        /// In addition, `fft` automatically creates a planner to be used. In the case that you plan
+        /// on running an FFT many times on inputs of the same size, use this function with the pre-built
+        /// [`Planner`].
+        ///
+        /// # Panics
+        ///
+        /// Panics if `reals.len() != imags.len()`, or if the input length is *not* a power of 2.
+        pub fn $func_name(
+            reals: &mut [$precision],
+            imags: &mut [$precision],
+            opts: &Options,
+            planner: &mut $planner,
+        ) {
+            assert!(reals.len() == imags.len() && reals.len().is_power_of_two());
+            let n: usize = reals.len().ilog2() as usize;
+
+            let twiddles_re = &mut planner.twiddles_re;
+            let twiddles_im = &mut planner.twiddles_im;
+
+            // We shouldn't be able to execute FFT if the # of twiddles isn't equal to the distance
+            // between pairs
+            assert!(twiddles_re.len() == reals.len() / 2 && twiddles_im.len() == imags.len() / 2);
+
+            for t in (0..n).rev() {
+                let dist = 1 << t;
+                let chunk_size = dist << 1;
+
+                if chunk_size > 4 {
+                    if t < n - 1 {
+                        filter_twiddles(twiddles_re, twiddles_im);
+                    }
+                    if chunk_size >= 16 {
+                        $simd_butterfly_kernel(reals, imags, twiddles_re, twiddles_im, dist);
+                    } else {
+                        fft_chunk_n(reals, imags, twiddles_re, twiddles_im, dist);
+                    }
+                } else if chunk_size == 2 {
+                    fft_chunk_2(reals, imags);
+                } else if chunk_size == 4 {
+                    fft_chunk_4(reals, imags);
+                }
             }
-            if chunk_size >= 16 {
-                fft_32_chunk_n_simd(reals, imags, twiddles_re, twiddles_im, dist);
+
+            if opts.multithreaded_bit_reversal {
+                std::thread::scope(|s| {
+                    s.spawn(|| cobra_apply(reals, n));
+                    s.spawn(|| cobra_apply(imags, n));
+                });
             } else {
-                fft_chunk_n(reals, imags, twiddles_re, twiddles_im, dist);
+                cobra_apply(reals, n);
+                cobra_apply(imags, n);
             }
-        } else if chunk_size == 2 {
-            fft_chunk_2(reals, imags);
-        } else if chunk_size == 4 {
-            fft_chunk_4(reals, imags);
         }
-    }
-
-    if opts.multithreaded_bit_reversal {
-        std::thread::scope(|s| {
-            s.spawn(|| cobra_apply(reals, n));
-            s.spawn(|| cobra_apply(imags, n));
-        });
-    } else {
-        cobra_apply(reals, n);
-        cobra_apply(imags, n);
-    }
+    };
 }
 
-/// Same as [fft], but also accepts [`Options`] that control optimization strategies, as well as
-/// a [`Planner`] in the case that this FFT will need to be run multiple times.
-///
-/// `fft` automatically guesses the best strategy for a given input,
-/// so you only need to call this if you are tuning performance for a specific hardware platform.
-///
-/// In addition, `fft` automatically creates a planner to be used. In the case that you plan
-/// on running an FFT many times on inputs of the same size, use this function with the pre-built
-/// [`Planner`].
-///
-/// # Panics
-///
-/// Panics if `reals.len() != imags.len()`, or if the input length is *not* a power of two.
-pub fn fft_64_with_opts_and_plan(
-    reals: &mut [f64],
-    imags: &mut [f64],
-    opts: &Options,
-    planner: &mut Planner<f64>,
-) {
-    assert!(reals.len() == imags.len() && reals.len().is_power_of_two());
-    let n: usize = reals.len().ilog2() as usize;
-
-    let twiddles_re = &mut planner.twiddles_re;
-    let twiddles_im = &mut planner.twiddles_im;
-
-    // We shouldn't be able to execute FFT if the # of twiddles isn't equal to the distance
-    // between pairs
-    assert!(twiddles_re.len() == reals.len() / 2 && twiddles_im.len() == imags.len() / 2);
-
-    for t in (0..n).rev() {
-        let dist = 1 << t;
-        let chunk_size = dist << 1;
-
-        if chunk_size > 4 {
-            if t < n - 1 {
-                filter_twiddles(twiddles_re, twiddles_im);
-            }
-            if chunk_size >= 16 {
-                fft_64_chunk_n_simd(reals, imags, twiddles_re, twiddles_im, dist);
-            } else {
-                fft_chunk_n(reals, imags, twiddles_re, twiddles_im, dist);
-            }
-        } else if chunk_size == 2 {
-            fft_chunk_2(reals, imags);
-        } else if chunk_size == 4 {
-            fft_chunk_4(reals, imags);
-        }
-    }
+impl_fft_with_opts_and_plan_for!(
+    fft_64_with_opts_and_plan,
+    f64,
+    Planner64,
+    fft_64_chunk_n_simd
+);
 
-    if opts.multithreaded_bit_reversal {
-        std::thread::scope(|s| {
-            s.spawn(|| cobra_apply(reals, n));
-            s.spawn(|| cobra_apply(imags, n));
-        });
-    } else {
-        cobra_apply(reals, n);
-        cobra_apply(imags, n);
-    }
-}
+impl_fft_with_opts_and_plan_for!(
+    fft_32_with_opts_and_plan,
+    f32,
+    Planner32,
+    fft_32_chunk_n_simd
+);
 
 #[cfg(test)]
 mod tests {
@@ -211,7 +158,7 @@ mod tests {
         let num_points = 5;
 
         // this test will actually always fail at this stage
-        let mut planner = Planner::new(num_points, Direction::Forward);
+        let mut planner = Planner64::new(num_points, Direction::Forward);
 
         let mut reals = vec![0.0; num_points];
         let mut imags = vec![0.0; num_points];
@@ -234,7 +181,7 @@ mod tests {
         // size of the generated twiddle factors is half the size of the input.
         // In this case, we have an input of size 1024 (used for mp3), but we tell the planner the
         // input size is 16.
-        let mut planner = Planner::new(n, Direction::Forward);
+        let mut planner = Planner64::new(n, Direction::Forward);
 
         let mut reals = vec![0.0; num_points];
         let mut imags = vec![0.0; num_points];