diff --git a/examples/heat_1d_ap_fft.rs b/examples/heat_1d_ap_fft.rs new file mode 100644 index 0000000..da58b7c --- /dev/null +++ b/examples/heat_1d_ap_fft.rs @@ -0,0 +1,44 @@ +use nhls::domain::*; +use nhls::image_1d_example::*; +use nhls::solver::*; + +fn main() { + let (args, output_image_path) = Args::cli_parse("heat_1d_ap_fft"); + + let stencil = nhls::standard_stencils::heat_1d(1.0, 1.0, 0.5); + + // Create domains + let grid_bound = args.grid_bounds(); + let mut input_domain = OwnedDomain::new(grid_bound); + let mut output_domain = OwnedDomain::new(grid_bound); + + // Create BC + let bc = ConstantCheck::new(1.0, grid_bound); + + // Create AP Solver + let cutoff = 40; + let ratio = 0.5; + let mut solver = APSolver::new( + &bc, + &stencil, + cutoff, + ratio, + &grid_bound, + args.chunk_size, + ); + + // Make image + let mut img = nhls::image::Image1D::new(grid_bound, args.lines as u32); + img.add_line(0, input_domain.buffer()); + for t in 1..args.lines as u32 { + solver.loop_solve( + &mut input_domain, + &mut output_domain, + args.steps_per_line, + ); + std::mem::swap(&mut input_domain, &mut output_domain); + img.add_line(t, input_domain.buffer()); + } + + img.write(&output_image_path); +} diff --git a/src/decomposition/mod.rs b/src/decomposition/mod.rs index 3ed0f7d..e3e79b8 100644 --- a/src/decomposition/mod.rs +++ b/src/decomposition/mod.rs @@ -1,37 +1,91 @@ -// Recursive domain decomposition. -// -// We need boundary condition functions -// -// What is value out of bounds - -// -// Face vectors - -/* -#[derive(Debug)] -pub enum Boundary { - SPACE, - PERIODIC, - FIXED, +use crate::util::*; + +pub struct FFTSolveParams { + pub slopes: Bounds, + pub cutoff: i32, + pub ratio: f64, } -#[derive(Debug)] -pub struct FFTParams { - sigma: i32, - cutoff: i32, - ratio: f64, +pub struct FFTSolve { + pub solve_region: AABB, + pub steps: usize, } -#[derive(Debug)] -pub struct FFTSolveArgs { - boundaries: [Boundary; 2], - t0: usize, - t_end: usize, - x0: usize, - x_end: usize, +pub fn try_fftsolve( + bounds: &AABB, + params: &FFTSolveParams, + max_steps: Option, +) -> Option> { + if bounds.min_size_len() <= params.cutoff { + return None; + } + + let (steps, solve_region) = + bounds.shrink(params.ratio, params.slopes, max_steps); + + println!( + "Found fft solve, steps: {}, region: {:?}", + steps, solve_region + ); + + Some(FFTSolve { + solve_region, + steps, + }) } -pub fn recursive_solve(args: FFTSolveArgs, p: FFTParams, level: usize) { - println!("RS: args: {:?}, p: {:?}", args, p); +/// This needs to match logic from AABB::decomposition +pub fn decomposition_slopes( +) -> [[Bounds; 2]; DIMENSION] { + let lower = 0; + let upper = 1; + + // Start will all ones, turn off things that don't slope + let mut result = [[Bounds::from_element(1); 2]; DIMENSION]; + + for d in 0..DIMENSION { + result[d][lower][(d, lower)] = 0; + result[d][upper][(d, upper)] = 0; + + for d0 in d + 1..DIMENSION { + result[d][lower][(d0, lower)] = 0; + result[d][lower][(d0, upper)] = 0; + result[d][upper][(d0, lower)] = 0; + result[d][upper][(d0, upper)] = 0; + } + } + + result +} + +#[cfg(test)] +mod unit_tests { + use super::*; + + #[test] + fn decomposition_slopes_test() { + { + let d1 = decomposition_slopes::<1>(); + assert_eq!(d1, [[matrix![0, 1], matrix![1, 0]]]); + } + + { + let d2 = decomposition_slopes::<2>(); + let expected = [ + [matrix![0, 1; 0, 0], matrix![1, 0; 0, 0]], + [matrix![1, 1; 0, 1], matrix![1, 1; 1, 0]], + ]; + assert_eq!(d2, expected); + } + + { + let d3 = decomposition_slopes::<3>(); + let expected = [ + [matrix![0, 1; 0,0; 0,0], matrix![1, 0; 0, 0; 0, 0]], + [matrix![1, 1; 0, 1; 0,0], matrix![1, 1; 1, 0; 0,0]], + [matrix![1, 1; 1, 1; 0, 1], matrix![1, 1; 1, 1; 1, 0]], + ]; + assert_eq!(d3, expected); + } + } } -*/ diff --git a/src/domain/gather_args.rs b/src/domain/gather_args.rs index dbfed8f..5b9be48 100644 --- a/src/domain/gather_args.rs +++ b/src/domain/gather_args.rs @@ -68,12 +68,6 @@ mod unit_tests { for i in 0..n_r { let coord = bound.linear_to_coord(i); buffer.as_slice_mut()[i] = (coord[0] + 3 * coord[1]) as f64; - println!( - "i: {}, c: {:?}, r: {}", - i, - coord, - buffer.as_slice_mut()[i] - ); } let mut domain = OwnedDomain::new(bound); domain.par_set_values(|coord| (coord[0] + 3 * coord[1]) as f64, 1); @@ -90,7 +84,6 @@ mod unit_tests { (8 + 3 * 9) as f64, (9 + 3 * 9) as f64, ]; - println!("r: {:?}, e: {:?}", r, e); for n in 0..r.len() { assert_approx_eq!(f64, r[n], e[n]); } diff --git a/src/solver/ap_solve.rs b/src/solver/ap_solve.rs new file mode 100644 index 0000000..1934edd --- /dev/null +++ b/src/solver/ap_solve.rs @@ -0,0 +1,149 @@ +use crate::decomposition::*; +use crate::domain::*; +use crate::solver::*; +use crate::stencil::*; +use crate::util::*; + +pub struct APSolver< + 'a, + BC, + Operation, + const GRID_DIMENSION: usize, + const NEIGHBORHOOD_SIZE: usize, +> where + Operation: StencilOperation, + BC: BCCheck, +{ + bc: &'a BC, + stencil: &'a StencilF64, + params: FFTSolveParams, + periodic_lib: + PeriodicPlanLibrary<'a, Operation, GRID_DIMENSION, NEIGHBORHOOD_SIZE>, + chunk_size: usize, + slopes: Bounds, +} + +impl< + 'a, + BC, + Operation, + const GRID_DIMENSION: usize, + const NEIGHBORHOOD_SIZE: usize, + > APSolver<'a, BC, Operation, GRID_DIMENSION, NEIGHBORHOOD_SIZE> +where + Operation: StencilOperation, + BC: BCCheck, +{ + pub fn new( + bc: &'a BC, + stencil: &'a StencilF64, + cutoff: i32, + ratio: f64, + max_bound: &AABB, + chunk_size: usize, + ) -> Self { + let params = FFTSolveParams { + slopes: stencil.slopes(), + cutoff, + ratio, + }; + + let periodic_lib = PeriodicPlanLibrary::new(max_bound, stencil); + + let slopes = stencil.slopes(); + + APSolver { + bc, + stencil, + params, + periodic_lib, + chunk_size, + slopes, + } + } + + /// Performs outermost loop of AP algorithm + /// Here we find the next root FFT solve, + /// which may not get us to the desired number of steps, + /// we we have to keep finding root FFT Solves and applying + /// the recursion until the desired steps are reached. + /// + /// NOTE: incomplete implementation + pub fn loop_solve + Sync>( + &mut self, + input: &mut DomainType, + output: &mut DomainType, + steps: usize, + ) { + // TODO: we just frustrum solve each of the reguins, no recusion + let mut remaining_steps = steps; + while remaining_steps != 0 { + let maybe_fft_solve = + try_fftsolve(input.aabb(), &self.params, Some(remaining_steps)); + if maybe_fft_solve.is_none() { + box_apply( + self.bc, + self.stencil, + input, + output, + remaining_steps, + self.chunk_size, + ); + return; + } + + let fft_solve = maybe_fft_solve.unwrap(); + let iter_steps = fft_solve.steps; + + // Output domain now has FFT solve + self.periodic_lib + .apply(input, output, iter_steps, self.chunk_size); + + // For each degree make domain + let sub_domain_bounds = + input.aabb().decomposition(&fft_solve.solve_region); + let sub_domain_sloped_sides = + decomposition_slopes::(); + + for d in 0..GRID_DIMENSION { + for r in 0..2 { + // get bounds + let output_aabb = sub_domain_bounds[d][r]; + let sloped_sides = sub_domain_sloped_sides[d][r]; + let input_aabb = trapezoid_input_region( + iter_steps, + &output_aabb, + &sloped_sides, + &self.slopes, + ); + + // Make sub domain + let mut input_domain = OwnedDomain::new(input_aabb); + let mut output_domain = OwnedDomain::new(input_aabb); + + // copy input + input_domain.par_from_superset(input, self.chunk_size); + + trapezoid_apply( + self.bc, + self.stencil, + &mut input_domain, + &mut output_domain, + &sloped_sides, + &self.slopes, + iter_steps, + self.chunk_size, + ); + + // copy output + debug_assert_eq!(output_domain.aabb(), &output_aabb); + output.par_set_subdomain(&output_domain, self.chunk_size); + } + } + + remaining_steps -= iter_steps; + std::mem::swap(input, output); + } + std::mem::swap(input, output); + } +} diff --git a/src/solver/mod.rs b/src/solver/mod.rs index 4062cc3..b81e976 100644 --- a/src/solver/mod.rs +++ b/src/solver/mod.rs @@ -1,9 +1,12 @@ +pub mod ap_solve; pub mod direct; pub mod fft_plan; pub mod periodic_direct; pub mod periodic_plan; pub mod trapezoid; +pub use ap_solve::*; pub use direct::*; pub use periodic_direct::*; pub use periodic_plan::*; +pub use trapezoid::*; diff --git a/src/solver/trapezoid.rs b/src/solver/trapezoid.rs index 7be309c..f4a35b1 100644 --- a/src/solver/trapezoid.rs +++ b/src/solver/trapezoid.rs @@ -43,16 +43,11 @@ pub fn trapezoid_apply< trapezoid_slopes.set_column(1, &negative_slopes); let mut output_box = *input.aabb(); - for t in 0..steps { - println!("trapezoid t: {}", t); + for _ in 0..steps { output_box = output_box.add_bounds_diff(trapezoid_slopes); debug_assert!(input.aabb().buffer_size() >= output_box.buffer_size()); output.set_aabb(output_box); - println!(" output_box: {:?}", output_box); - par_stencil::apply(bc, stencil, input, output, chunk_size); - println!(" done with apply"); - std::mem::swap(input, output); } std::mem::swap(input, output); diff --git a/tests/base_solver_compare.rs b/tests/base_solver_compare.rs index 01c72d2..22a0063 100644 --- a/tests/base_solver_compare.rs +++ b/tests/base_solver_compare.rs @@ -8,11 +8,11 @@ use float_cmp::assert_approx_eq; use nalgebra::matrix; #[test] -fn heat_1d_compare() { +fn heat_1d_p_compare() { // Grid size let grid_bound = AABB::new(matrix![0, 999]); - let n_steps = 16; + let n_steps = 400; let chunk_size = 100; @@ -122,3 +122,52 @@ fn periodic_compare() { } } } + +#[test] +fn heat_1d_ap_compare() { + // Grid size + let grid_bound = AABB::new(matrix![0, 999]); + + let n_steps = 400; + + let chunk_size = 100; + + let stencil = nhls::standard_stencils::heat_1d(1.0, 1.0, 0.5); + + // Create domains + let buffer_size = grid_bound.buffer_size(); + let mut direct_input_domain = OwnedDomain::new(grid_bound); + let mut direct_output_domain = OwnedDomain::new(grid_bound); + + let mut fft_input_domain = OwnedDomain::new(grid_bound); + let mut fft_output_domain = OwnedDomain::new(grid_bound); + + // Create BC + let bc = ConstantCheck::new(1.0, grid_bound); + + // Create AP Solver + let cutoff = 40; + let ratio = 0.5; + let mut solver = + APSolver::new(&bc, &stencil, cutoff, ratio, &grid_bound, chunk_size); + + solver.loop_solve(&mut fft_input_domain, &mut fft_output_domain, n_steps); + + box_apply( + &bc, + &stencil, + &mut direct_input_domain, + &mut direct_output_domain, + n_steps, + chunk_size, + ); + + for i in 0..buffer_size { + assert_approx_eq!( + f64, + fft_output_domain.buffer()[i], + direct_output_domain.buffer()[i], + epsilon = 0.00000000000001 + ); + } +}