Try to autovectorize rate calculation

TeamAtomECS · Feb 11, 2021 · 4098053 · 4098053
1 parent 1e2925d
commit 4098053
Show file tree

Hide file tree

Showing 3 changed files with 92 additions and 45 deletions.
diff --git a/src/laser/intensity.rs b/src/laser/intensity.rs
@@ -10,6 +10,7 @@ extern crate specs;
 use super::cooling::CoolingLightIndex;
 use super::gaussian::{get_gaussian_beam_intensity, CircularMask, GaussianBeam};
 use crate::atom::Position;
+use nalgebra::Vector3;
 use specs::{Component, Entities, Join, ReadStorage, System, VecStorage, WriteStorage};
 
 const LASER_CACHE_SIZE: usize = 16;
@@ -19,13 +20,17 @@ const LASER_CACHE_SIZE: usize = 16;
 pub struct LaserIntensitySampler {
     /// Intensity in SI units of W/m^2
     pub intensity: f64,
+
+    /// Normalised vector showing the direction of the beam.
+    pub direction: Vector3<f64>,
 }
 
 impl Default for LaserIntensitySampler {
     fn default() -> Self {
         LaserIntensitySampler {
             /// Intensity in SI units of W/m^2
             intensity: f64::NAN,
+            direction: Vector3::new(0.0, 0.0, 0.0),
         }
     }
 }
@@ -109,6 +114,7 @@ impl<'a> System<'a> for SampleLaserIntensitySystem {
                         let (index, gaussian, mask) = laser_array[i];
                         samplers.contents[index.index].intensity =
                             get_gaussian_beam_intensity(&gaussian, &pos, mask.as_ref());
+                        samplers.contents[index.index].direction = gaussian.direction.normalize();
                     }
                 });
         }

diff --git a/src/laser/rate.rs b/src/laser/rate.rs
@@ -5,13 +5,14 @@ extern crate specs;
 
 use super::cooling::{CoolingLight, CoolingLightIndex};
 use crate::atom::AtomicTransition;
+use crate::constant::PI;
 use crate::laser::gaussian::GaussianBeam;
 use crate::laser::intensity::LaserIntensitySamplers;
 use crate::laser::sampler::LaserDetuningSamplers;
+use crate::laser::sampler::LaserSamplerMasks;
 use crate::magnetic::MagneticFieldSampler;
-use specs::{Component, Join, ReadStorage, System, VecStorage, WriteStorage};
-
-use crate::constant::PI;
+use specs::Join;
+use specs::{Component, ReadStorage, System, VecStorage, WriteStorage};
 
 /// Represents the rate coefficient of the atom with respect to a specific CoolingLight entity
 #[derive(Clone, Copy)]
@@ -69,70 +70,100 @@ pub struct CalculateRateCoefficientsSystem;
 
 impl<'a> System<'a> for CalculateRateCoefficientsSystem {
     type SystemData = (
-        ReadStorage<'a, CoolingLight>,
-        ReadStorage<'a, CoolingLightIndex>,
         ReadStorage<'a, LaserDetuningSamplers>,
         ReadStorage<'a, LaserIntensitySamplers>,
         ReadStorage<'a, AtomicTransition>,
-        ReadStorage<'a, GaussianBeam>,
         ReadStorage<'a, MagneticFieldSampler>,
+        ReadStorage<'a, LaserSamplerMasks>,
         WriteStorage<'a, RateCoefficients>,
     );
     fn run(
         &mut self,
         (
-            cooling_light,
-            cooling_index,
             laser_detunings,
             laser_intensities,
             atomic_transition,
-            gaussian_beam,
             magnetic_field_sampler,
+            masks,
             mut rate_coefficients,
         ): Self::SystemData,
     ) {
         use rayon::prelude::*;
         use specs::ParJoin;
 
-        for (cooling, index, gaussian) in (&cooling_light, &cooling_index, &gaussian_beam).join() {
-            (
-                &laser_detunings,
-                &laser_intensities,
-                &atomic_transition,
-                &magnetic_field_sampler,
-                &mut rate_coefficients,
-            )
-                .par_join()
-                .for_each(|(detunings, intensities, atominfo, bfield, rates)| {
-                    let beam_direction_vector = gaussian.direction.normalize();
+        (
+            &laser_detunings,
+            &laser_intensities,
+            &atomic_transition,
+            &magnetic_field_sampler,
+            &masks,
+            &mut rate_coefficients,
+        )
+            .par_join()
+            .for_each(|(detunings, intensities, atominfo, bfield, masks, rates)| {
+                // LLVM should auto vectorize this but does not!
+                for (rate, (detuning, (intensity, mask))) in rates.contents.iter_mut().zip(
+                    detunings
+                        .contents
+                        .iter()
+                        .zip(intensities.contents.iter().zip(masks.contents.iter())),
+                ) {
+                    if !mask.filled {
+                        continue;
+                    }
+
                     let costheta = if &bfield.field.norm_squared() < &(10.0 * f64::EPSILON) {
                         0.0
                     } else {
-                        beam_direction_vector
-                            .normalize()
-                            .dot(&bfield.field.normalize())
+                        intensity.direction.dot(&bfield.field.normalize())
                     };
 
-                    let prefactor =
-                        atominfo.rate_prefactor * intensities.contents[index.index].intensity;
+                    let prefactor = atominfo.rate_prefactor * intensity.intensity;
 
                     let scatter1 =
-                        0.25 * (cooling.polarization as f64 * costheta + 1.).powf(2.) * prefactor
-                            / (detunings.contents[index.index].detuning_sigma_plus.powf(2.)
+                        0.25 * (detuning.polarization * costheta + 1.).powf(2.) * prefactor
+                            / (detuning.detuning_sigma_plus.powf(2.)
                                 + (PI * atominfo.linewidth).powf(2.));
 
                     let scatter2 =
-                        0.25 * (cooling.polarization as f64 * costheta - 1.).powf(2.) * prefactor
-                            / (detunings.contents[index.index]
-                                .detuning_sigma_minus
-                                .powf(2.)
+                        0.25 * (detuning.polarization * costheta - 1.).powf(2.) * prefactor
+                            / (detuning.detuning_sigma_minus.powf(2.)
                                 + (PI * atominfo.linewidth).powf(2.));
 
                     let scatter3 = 0.5 * (1. - costheta.powf(2.)) * prefactor
-                        / (detunings.contents[index.index].detuning_pi.powf(2.)
-                            + (PI * atominfo.linewidth).powf(2.));
-                    rates.contents[index.index].rate = scatter1 + scatter2 + scatter3;
-                });
-        }
+                        / (detuning.detuning_pi.powf(2.) + (PI * atominfo.linewidth).powf(2.));
+                    rate.rate = scatter1 + scatter2 + scatter3;
+                }
+
+                // LLVM doesn't vectorize this either, even though it is explict about being a fixed size slice.
+                // for i in 0..crate::laser::COOLING_BEAM_LIMIT {
+                //     let costheta = if &bfield.field.norm_squared() < &(10.0 * f64::EPSILON) {
+                //         0.0
+                //     } else {
+                //         intensities.contents[i]
+                //             .direction
+                //             .dot(&bfield.field.normalize())
+                //     };
+
+                //     let prefactor = atominfo.rate_prefactor * intensities.contents[i].intensity;
+
+                //     let scatter1 = 0.25
+                //         * (detunings.contents[i].polarization * costheta + 1.).powf(2.)
+                //         * prefactor
+                //         / (detunings.contents[i].detuning_sigma_plus.powf(2.)
+                //             + (PI * atominfo.linewidth).powf(2.));
+
+                //     let scatter2 = 0.25
+                //         * (detunings.contents[i].polarization * costheta - 1.).powf(2.)
+                //         * prefactor
+                //         / (detunings.contents[i].detuning_sigma_minus.powf(2.)
+                //             + (PI * atominfo.linewidth).powf(2.));
+
+                //     let scatter3 = 0.5 * (1. - costheta.powf(2.)) * prefactor
+                //         / (detunings.contents[i].detuning_pi.powf(2.)
+                //             + (PI * atominfo.linewidth).powf(2.));
+                //     rates.contents[i].rate = scatter1 + scatter2 + scatter3;
+                // }
+            });
     }
 }
diff --git a/src/laser/sampler.rs b/src/laser/sampler.rs
@@ -32,6 +32,14 @@ impl Component for LaserSamplerMasks {
     type Storage = VecStorage<Self>;
 }
 
+// /// Component that holds a vector of `CoolingLight`
+// pub struct CoolingLightSamplers {
+//     pub contents: [CoolingLight; crate::laser::COOLING_BEAM_LIMIT],
+// }
+// impl Component for CoolingLightSamplers {
+//     type Storage = VecStorage<Self>;
+// }
+
 /// Marks all laser sampler mask slots as empty.
 pub struct InitialiseLaserSamplerMasksSystem;
 impl<'a> System<'a> for InitialiseLaserSamplerMasksSystem {
@@ -75,17 +83,17 @@ pub struct LaserDetuningSampler {
     pub detuning_sigma_minus: f64,
     /// Laser detuning of the pi transition with respect to laser beam, in SI units of Hz
     pub detuning_pi: f64,
+    /// Polarisation of the laser beam
+    pub polarization: f64,
 }
 
 impl Default for LaserDetuningSampler {
     fn default() -> Self {
         LaserDetuningSampler {
-            /// Laser detuning of the sigma plus transition with respect to laser beam, in SI units of Hz
             detuning_sigma_plus: f64::NAN,
-            /// Laser detuning of the sigma minus transition with respect to laser beam, in SI units of Hz
             detuning_sigma_minus: f64::NAN,
-            /// Laser detuning of the pi transition with respect to laser beam, in SI units of Hz
             detuning_pi: f64::NAN,
+            polarization: f64::NAN,
         }
     }
 }
@@ -176,12 +184,14 @@ impl<'a> System<'a> for CalculateLaserDetuningSystem {
                                 * constant::PI
                                 - doppler_samplers.contents[index.index].doppler_shift;
 
-                            detuning_sampler.contents[index.index].detuning_sigma_plus =
-                                without_zeeman.clone() - zeeman_sampler.sigma_plus;
-                            detuning_sampler.contents[index.index].detuning_sigma_minus =
-                                without_zeeman.clone() - zeeman_sampler.sigma_minus;
-                            detuning_sampler.contents[index.index].detuning_pi =
-                                without_zeeman.clone() - zeeman_sampler.sigma_pi;
+                            detuning_sampler.contents[index.index] = LaserDetuningSampler {
+                                detuning_sigma_plus: without_zeeman.clone()
+                                    - zeeman_sampler.sigma_plus,
+                                detuning_sigma_minus: without_zeeman.clone()
+                                    - zeeman_sampler.sigma_minus,
+                                detuning_pi: without_zeeman.clone() - zeeman_sampler.sigma_pi,
+                                polarization: cooling.polarization as f64,
+                            };
                         }
                     },
                 )