diff --git a/examples/gaussian_weight/analysis.py b/examples/gaussian_weight/analysis.py index c1e766c6b3..8e4d437f25 100755 --- a/examples/gaussian_weight/analysis.py +++ b/examples/gaussian_weight/analysis.py @@ -118,7 +118,7 @@ assert(np.abs((np.average(xp)-x_avg)) < 5e-7) assert(np.abs((np.average(yp)-y_avg)/y_avg) < .03) - assert( np.abs((np.average(zp)-z_avg)/z_avg) < .035) + assert( np.abs((np.average(zp)-z_avg)/z_avg) < .05) assert(np.abs((np.std(xp)-x_std)/x_std) < .03) assert(np.abs((np.std(yp)-y_std)/y_std) < .03) assert(np.abs((np.std(zp)-z_std)/z_std) < .03) diff --git a/src/particles/beam/BeamParticleContainer.H b/src/particles/beam/BeamParticleContainer.H index a0cbb1070b..c901b7c6f8 100644 --- a/src/particles/beam/BeamParticleContainer.H +++ b/src/particles/beam/BeamParticleContainer.H @@ -87,17 +87,8 @@ public: void InitBeamFixedPPCSlice (const int islice, const int which_beam_slice); /** Initialize a beam with a fix number of particles, and fixed weight */ - void InitBeamFixedWeight (int num_to_add, - const GetInitialMomentum& get_momentum, - const amrex::ParserExecutor<1>& pos_mean_x, - const amrex::ParserExecutor<1>& pos_mean_y, - const amrex::Real pos_mean_z, - const amrex::RealVect pos_std, - const amrex::Real total_charge, - const amrex::Real z_foc, - const bool do_symmetrize, - const bool can, const amrex::Real zmin, const amrex::Real zmax, - const amrex::Real radius); + void InitBeamFixedWeight3D (); + void InitBeamFixedWeightSlice (const int islice, const int which_beam_slice); #ifdef HIPACE_USE_OPENPMD /** Checks the input file first to determine its Datatype @@ -213,37 +204,48 @@ private: std::string m_name; /**< name of the species */ /** injection type, fixed_width or fixed_ppc */ std::string m_injection_type; + uint64_t m_id64 = 1; /**< 64 bit ID to initialize many particles without overflowing */ + /** Min longitudinal particle position of the beam */ + amrex::Real m_zmin = -std::numeric_limits::infinity(); + /** Max longitudinal particle position of the beam */ + amrex::Real m_zmax = std::numeric_limits::infinity(); + amrex::Real m_radius {std::numeric_limits::infinity()}; /**< Radius of the beam */ + GetInitialMomentum m_get_momentum {}; /**< momentum profile of the beam */ // fixed_ppc: amrex::IntVect m_ppc {1, 1, 1}; /**< Number of particles per cell in each direction */ - amrex::Real m_zmin; /**< Min longitudinal particle position of the beam */ - amrex::Real m_zmax; /**< Max longitudinal particle position of the beam */ - amrex::Real m_radius {std::numeric_limits::infinity()}; /**< Radius of the beam */ amrex::RealVect m_position_mean {0., 0., 0.}; /**< mean position of the beam */ amrex::Real m_min_density {0.}; /**< minimum density at which beam particles are generated */ amrex::IntVect m_random_ppc {0, 0, 0}; /**< if the cell position is random in each direction */ GetInitialDensity m_get_density {}; /**< density profile of the beam */ - GetInitialMomentum m_get_momentum {}; /**< momentum profile of the beam */ /** Density parser for fixed-ppc beam. Owns data for m_density_func */ amrex::Parser m_density_parser; - uint64_t m_id64 = 1; /**< 64 bit ID to initialize many particles without overflowing */ // fixed_weight: + bool m_can_profile = false; /** Average x position of the fixed-weight beam depending on z */ amrex::Parser m_pos_mean_x_parser; + /** Average x position of the fixed-weight beam depending on z */ + amrex::ParserExecutor<1> m_pos_mean_x_func; /** Average y position of the fixed-weight beam depending on z */ amrex::Parser m_pos_mean_y_parser; + /* Average y position of the fixed-weight beam depending on z */ + amrex::ParserExecutor<1> m_pos_mean_y_func; + /* Average z position of the fixed-weight beam depending on z */ + amrex::Real m_pos_mean_z = 0; /** Width of the Gaussian beam. Only used for a fixed-weight beam */ amrex::RealVect m_position_std {0., 0., 0.}; /** Distance at which the beam is focused, starting from its initial position */ amrex::Real m_z_foc {0.}; amrex::Real m_duz_per_uz0_dzeta {0.}; /**< relative energy spread per dzeta */ - int m_num_particles; /**< Number of particles for fixed-weigth Gaussian beam */ + amrex::Long m_num_particles; /**< Number of particles for fixed-weight Gaussian beam */ amrex::Real m_total_charge; /**< Total beam charge for fixed-weight Gaussian beam */ amrex::Real m_density; /**< Peak density for fixed-weight Gaussian beam */ bool m_do_symmetrize {0}; /**< Option to symmetrize the beam */ + /** Array for the z position of all beam particles */ + amrex::PODVector> m_z_array {}; // from_file: diff --git a/src/particles/beam/BeamParticleContainer.cpp b/src/particles/beam/BeamParticleContainer.cpp index 2c507e2ab8..6b1d24977f 100644 --- a/src/particles/beam/BeamParticleContainer.cpp +++ b/src/particles/beam/BeamParticleContainer.cpp @@ -104,22 +104,19 @@ BeamParticleContainer::InitData (const amrex::Geometry& geom) } else if (m_injection_type == "fixed_weight") { - bool can = false; - amrex::Real zmin = -std::numeric_limits::infinity(); - amrex::Real zmax = std::numeric_limits::infinity(); std::string profile = "gaussian"; queryWithParser(pp, "profile", profile); queryWithParser(pp, "radius", m_radius); if (profile == "can") { - can = true; - getWithParser(pp, "zmin", zmin); - getWithParser(pp, "zmax", zmax); + m_can_profile = true; + getWithParser(pp, "zmin", m_zmin); + getWithParser(pp, "zmax", m_zmax); } else if (profile == "gaussian") { - queryWithParser(pp, "zmin", zmin); - queryWithParser(pp, "zmax", zmax); + queryWithParser(pp, "zmin", m_zmin); + queryWithParser(pp, "zmax", m_zmax); AMREX_ALWAYS_ASSERT_WITH_MESSAGE( !m_do_salame || - (zmin != -std::numeric_limits::infinity() && - zmax != std::numeric_limits::infinity()), + (m_zmin != -std::numeric_limits::infinity() && + m_zmax != std::numeric_limits::infinity()), "For the SALAME algorithm it is mandatory to either use a 'can' profile or " "'zmin' and 'zmax' with a gaussian profile"); } else { @@ -129,10 +126,9 @@ BeamParticleContainer::InitData (const amrex::Geometry& geom) std::array pos_mean_arr{"","",""}; getWithParser(pp, "position_mean", pos_mean_arr); queryWithParser(pp, "z_foc", m_z_foc); - auto pos_mean_x = makeFunctionWithParser<1>(pos_mean_arr[0], m_pos_mean_x_parser, {"z"}); - auto pos_mean_y = makeFunctionWithParser<1>(pos_mean_arr[1], m_pos_mean_y_parser, {"z"}); - amrex::Real pos_mean_z = 0; - Parser::fillWithParser(pos_mean_arr[2], pos_mean_z); + m_pos_mean_x_func = makeFunctionWithParser<1>(pos_mean_arr[0], m_pos_mean_x_parser, {"z"}); + m_pos_mean_y_func = makeFunctionWithParser<1>(pos_mean_arr[1], m_pos_mean_y_parser, {"z"}); + Parser::fillWithParser(pos_mean_arr[2], m_pos_mean_z); getWithParser(pp, "position_std", m_position_std); getWithParser(pp, "num_particles", m_num_particles); @@ -161,10 +157,13 @@ BeamParticleContainer::InitData (const amrex::Geometry& geom) m_total_charge *= geom.InvCellSize(0)*geom.InvCellSize(1)*geom.InvCellSize(2); } - const GetInitialMomentum get_momentum(m_name); - InitBeamFixedWeight(m_num_particles, get_momentum, pos_mean_x, pos_mean_y, pos_mean_z, - m_position_std, m_total_charge, m_z_foc, m_do_symmetrize, can, zmin, zmax, m_radius); - m_total_num_particles = getBeamInitSlice().size(); + m_get_momentum = GetInitialMomentum{m_name}; + InitBeamFixedWeight3D(); + m_total_num_particles = m_num_particles; + if (Hipace::HeadRank()) { + m_init_sorter.sortParticlesByBox(m_z_array.dataPtr(), m_z_array.size(), + m_initialize_on_cpu, geom); + } } else if (m_injection_type == "from_file") { #ifdef HIPACE_USE_OPENPMD @@ -182,6 +181,11 @@ BeamParticleContainer::InitData (const amrex::Geometry& geom) geom, m_plasma_density, m_num_iteration, m_species_name, species_specified); m_total_num_particles = getBeamInitSlice().size(); + if (Hipace::HeadRank()) { + m_init_sorter.sortParticlesByBox( + getBeamInitSlice().GetStructOfArrays().GetRealData(BeamIdx::z).dataPtr(), + getBeamInitSlice().size(), m_initialize_on_cpu, geom); + } #else amrex::Abort("beam particle injection via external_file requires openPMD support: " "Add HiPACE_OPENPMD=ON when compiling HiPACE++.\n"); @@ -200,10 +204,6 @@ BeamParticleContainer::InitData (const amrex::Geometry& geom) amrex::ParallelDescriptor::Communicator()); #endif - if (Hipace::HeadRank() && m_injection_type != "fixed_ppc") { - m_init_sorter.sortParticlesByBox(*this, geom); - } - if (m_insitu_period > 0) { #ifdef HIPACE_USE_OPENPMD AMREX_ALWAYS_ASSERT_WITH_MESSAGE(m_insitu_file_prefix != @@ -275,6 +275,8 @@ BeamParticleContainer::intializeSlice (int slice, int which_slice) { if (m_injection_type == "fixed_ppc") { InitBeamFixedPPCSlice(slice, which_slice); + } else if (m_injection_type == "fixed_weight") { + InitBeamFixedWeightSlice(slice, which_slice); } else { const int num_particles = m_init_sorter.m_box_counts_cpu[slice]; diff --git a/src/particles/beam/BeamParticleContainerInit.cpp b/src/particles/beam/BeamParticleContainerInit.cpp index 5c5c9bc3f9..709be6ce8f 100644 --- a/src/particles/beam/BeamParticleContainerInit.cpp +++ b/src/particles/beam/BeamParticleContainerInit.cpp @@ -56,6 +56,48 @@ namespace iarrdata[BeamIdx::id ][ip] = pid > 0 ? pid + ip : pid; } + + /** \brief Adds a single beam particle into the per-slice BeamTile + * + * \param[in,out] rarrdata real array with SoA beam data + * \param[in,out] iarrdata int array with SoA beam data + * \param[in] x position in x + * \param[in] y position in y + * \param[in] z position in z + * \param[in] ux gamma * beta_x + * \param[in] uy gamma * beta_y + * \param[in] uz gamma * beta_z + * \param[in] weight weight of the single particle + * \param[in] pid particle ID to be assigned to the particle at index 0 + * \param[in] ip index of the particle + * \param[in] speed_of_light speed of light in the current units + * \param[in] is_valid if the particle is valid + */ + AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE + void AddOneBeamParticleSlice ( + amrex::GpuArray rarrdata, + amrex::GpuArray iarrdata,const amrex::Real x, + const amrex::Real y, const amrex::Real z, const amrex::Real ux, const amrex::Real uy, + const amrex::Real uz, const amrex::Real weight, const amrex::Long pid, + const amrex::Long ip, const amrex::Real speed_of_light, const bool is_valid=true) noexcept + { + rarrdata[BeamIdx::x ][ip] = x; + rarrdata[BeamIdx::y ][ip] = y; + rarrdata[BeamIdx::z ][ip] = z; + rarrdata[BeamIdx::ux ][ip] = ux * speed_of_light; + rarrdata[BeamIdx::uy ][ip] = uy * speed_of_light; + rarrdata[BeamIdx::uz ][ip] = uz * speed_of_light; + rarrdata[BeamIdx::w ][ip] = std::abs(weight); + + // ensure id is always valid + // it will repeat if there are more than 2^31-1 particles + int id = int((pid + ip) & ((1ull << 31) - 1)); + if (id == 0) id = 1; + if (!is_valid) id = -1; + + iarrdata[BeamIdx::id ][ip] = id; + iarrdata[BeamIdx::cpu][ip] = 0; + } } void @@ -281,21 +323,8 @@ InitBeamFixedPPCSlice (const int islice, const int which_beam_slice) const amrex::Real weight = density * scale_fac; - rarrdata[BeamIdx::x ][pidx] = x; - rarrdata[BeamIdx::y ][pidx] = y; - rarrdata[BeamIdx::z ][pidx] = z; - rarrdata[BeamIdx::ux ][pidx] = u[0] * speed_of_light; - rarrdata[BeamIdx::uy ][pidx] = u[1] * speed_of_light; - rarrdata[BeamIdx::uz ][pidx] = u[2] * speed_of_light; - rarrdata[BeamIdx::w ][pidx] = std::abs(weight); - - // ensure id is always valid - // it will repeat if there are more than 2^31-1 particles - int id = int((pid + pidx) & ((1ull << 31) - 1)); - if (id == 0) id = 1; - - iarrdata[BeamIdx::id ][pidx] = id; - iarrdata[BeamIdx::cpu][pidx] = 0; + AddOneBeamParticleSlice(rarrdata, iarrdata, x, y, z, u[0], u[1], u[2], weight, + pid, pidx, speed_of_light, true); ++pidx; } @@ -304,93 +333,131 @@ InitBeamFixedPPCSlice (const int islice, const int which_beam_slice) void BeamParticleContainer:: -InitBeamFixedWeight (int num_to_add, - const GetInitialMomentum& get_momentum, - const amrex::ParserExecutor<1>& pos_mean_x, - const amrex::ParserExecutor<1>& pos_mean_y, - const amrex::Real pos_mean_z, - const amrex::RealVect pos_std, - const amrex::Real total_charge, - const amrex::Real z_foc, - const bool do_symmetrize, - const bool can, const amrex::Real zmin, const amrex::Real zmax, - const amrex::Real radius) +InitBeamFixedWeight3D () { - HIPACE_PROFILE("BeamParticleContainer::InitParticles()"); + HIPACE_PROFILE("BeamParticleContainer::InitBeamFixedWeight3D()"); + using namespace amrex::literals; + + if (!Hipace::HeadRank() || m_num_particles == 0) { return; } + + amrex::Long num_to_add = m_num_particles; + if (m_do_symmetrize) num_to_add /= 4; + + m_z_array.setArena(m_initialize_on_cpu ? amrex::The_Pinned_Arena() : amrex::The_Arena()); + m_z_array.resize(num_to_add); + amrex::Real * const pos_z = m_z_array.dataPtr(); + + const bool can = m_can_profile; + const amrex::Real z_min = m_zmin; + const amrex::Real z_max = m_zmax; + const amrex::Real z_mean = m_pos_mean_z; + const amrex::Real z_std = m_position_std[2]; + + amrex::ParallelForRNG( + num_to_add, + [=] AMREX_GPU_DEVICE (amrex::Long i, const amrex::RandomEngine& engine) noexcept + { + pos_z[i] = can + ? amrex::Random(engine) * (z_max - z_min) + z_min + : amrex::RandomNormal(z_mean, z_std, engine); + }); + + return; +} + +void +BeamParticleContainer:: +InitBeamFixedWeightSlice (int slice, int which_slice) +{ + HIPACE_PROFILE("BeamParticleContainer::InitBeamFixedWeightSlice()"); using namespace amrex::literals; + if (!Hipace::HeadRank() || m_num_particles == 0) { return; } + + const int num_to_add = m_init_sorter.m_box_counts_cpu[slice]; + if (m_do_symmetrize) { + resize(which_slice, 4*num_to_add, 0); + } else { + resize(which_slice, num_to_add, 0); + } + if (num_to_add == 0) return; - if (do_symmetrize) num_to_add /=4; - PhysConst phys_const = get_phys_const(); + const amrex::Real clight = get_phys_const().c; + + auto& particle_tile = getBeamSlice(which_slice); + + // Access particles' SoA + amrex::GpuArray rarrdata = + particle_tile.GetStructOfArrays().realarray(); + amrex::GpuArray iarrdata = + particle_tile.GetStructOfArrays().intarray(); + + const amrex::Long slice_offset = m_init_sorter.m_box_offsets_cpu[slice]; + const auto permutations = m_init_sorter.m_box_permutations.dataPtr(); + amrex::Real * const pos_z = m_z_array.dataPtr(); + + const uint64_t pid = m_id64; + m_id64 += num_to_add; + + const bool can = m_can_profile; + const bool do_symmetrize = m_do_symmetrize; + const amrex::Real duz_per_uz0_dzeta = m_duz_per_uz0_dzeta; + const amrex::Real z_min = m_zmin; + const amrex::Real z_max = m_zmax; + const amrex::Real z_mean = can ? 0.5_rt * (z_min + z_max) : m_pos_mean_z; + const amrex::RealVect pos_std = m_position_std; + const amrex::Real z_foc = m_z_foc; + const amrex::Real radius = m_radius; + auto pos_mean_x = m_pos_mean_x_func; + auto pos_mean_y = m_pos_mean_y_func; + const amrex::Real weight = m_total_charge / (m_num_particles * m_charge); + const GetInitialMomentum get_momentum = m_get_momentum; - if (Hipace::HeadRank()) { + amrex::ParallelForRNG( + num_to_add, + [=] AMREX_GPU_DEVICE (amrex::Long i, const amrex::RandomEngine& engine) noexcept + { + const amrex::Real z_central = pos_z[permutations[slice_offset + i]]; + amrex::Real x = amrex::RandomNormal(0, pos_std[0], engine); + amrex::Real y = amrex::RandomNormal(0, pos_std[1], engine); - auto& particle_tile = getBeamInitSlice(); - auto old_size = particle_tile.size(); - auto new_size = do_symmetrize? old_size + 4*num_to_add : old_size + num_to_add; - particle_tile.resize(new_size); + amrex::Real u[3] = {0.,0.,0.}; + get_momentum(u[0], u[1], u[2], engine, z_central - z_mean, duz_per_uz0_dzeta); - // Access particles' SoA - amrex::GpuArray rarrdata = - particle_tile.GetStructOfArrays().realarray(); - amrex::GpuArray iarrdata = - particle_tile.GetStructOfArrays().intarray(); + bool is_valid = true; + if (z_central < z_min || z_central > z_max || x*x + y*y > radius*radius) { + is_valid = false; + } - const int pid = BeamTileInit::ParticleType::NextID(); - BeamTileInit::ParticleType::NextID(pid + num_to_add); + // Propagate each electron ballistically for z_foc + x -= z_foc*u[0]/u[2]; + y -= z_foc*u[1]/u[2]; - const amrex::Real duz_per_uz0_dzeta = m_duz_per_uz0_dzeta; - const amrex::Real single_charge = m_charge; - const amrex::Real z_mean = can ? 0.5_rt * (zmin + zmax) : pos_mean_z; + const amrex::Real cental_x_pos = pos_mean_x(z_central); + const amrex::Real cental_y_pos = pos_mean_y(z_central); - amrex::ParallelForRNG( - num_to_add, - [=] AMREX_GPU_DEVICE (int i, const amrex::RandomEngine& engine) noexcept + if (!do_symmetrize) { - amrex::Real x = amrex::RandomNormal(0, pos_std[0], engine); - amrex::Real y = amrex::RandomNormal(0, pos_std[1], engine); - const amrex::Real z = can - ? (amrex::Random(engine) - 0.5_rt) * (zmax - zmin) - : amrex::RandomNormal(0, pos_std[2], engine); - amrex::Real u[3] = {0.,0.,0.}; - get_momentum(u[0],u[1],u[2], engine, z, duz_per_uz0_dzeta); - - const amrex::Real z_central = z + z_mean; - int valid_id = pid; - if (z_central < zmin || z_central > zmax || - x*x + y*y > radius*radius) valid_id = -1; - - // Propagate each electron ballistically for z_foc - x -= z_foc*u[0]/u[2]; - y -= z_foc*u[1]/u[2]; - - const amrex::Real cental_x_pos = pos_mean_x(z_central); - const amrex::Real cental_y_pos = pos_mean_y(z_central); - - amrex::Real weight = total_charge / (num_to_add * single_charge); - if (!do_symmetrize) - { - AddOneBeamParticle(rarrdata, iarrdata, cental_x_pos+x, cental_y_pos+y, - z_central, u[0], u[1], u[2], weight, - valid_id, i, phys_const.c); - } else { - weight /= 4; - AddOneBeamParticle(rarrdata, iarrdata, cental_x_pos+x, cental_y_pos+y, - z_central, u[0], u[1], u[2], weight, - valid_id, 4*i, phys_const.c); - AddOneBeamParticle(rarrdata, iarrdata, cental_x_pos-x, cental_y_pos+y, - z_central, -u[0], u[1], u[2], weight, - valid_id, 4*i+1, phys_const.c); - AddOneBeamParticle(rarrdata, iarrdata, cental_x_pos+x, cental_y_pos-y, - z_central, u[0], -u[1], u[2], weight, - valid_id, 4*i+2, phys_const.c); - AddOneBeamParticle(rarrdata, iarrdata, cental_x_pos-x, cental_y_pos-y, - z_central, -u[0], -u[1], u[2], weight, - valid_id, 4*i+3, phys_const.c); - } - }); - } + AddOneBeamParticleSlice(rarrdata, iarrdata, cental_x_pos+x, cental_y_pos+y, + z_central, u[0], u[1], u[2], weight, + pid, i, clight, is_valid); + + } else { + AddOneBeamParticleSlice(rarrdata, iarrdata, cental_x_pos+x, cental_y_pos+y, + z_central, u[0], u[1], u[2], weight, + pid, 4*i, clight, is_valid); + AddOneBeamParticleSlice(rarrdata, iarrdata, cental_x_pos-x, cental_y_pos+y, + z_central, -u[0], u[1], u[2], weight, + pid, 4*i+1, clight, is_valid); + AddOneBeamParticleSlice(rarrdata, iarrdata, cental_x_pos+x, cental_y_pos-y, + z_central, u[0], -u[1], u[2], weight, + pid, 4*i+2, clight, is_valid); + AddOneBeamParticleSlice(rarrdata, iarrdata, cental_x_pos-x, cental_y_pos-y, + z_central, -u[0], -u[1], u[2], weight, + pid, 4*i+3, clight, is_valid); + } + }); return; } diff --git a/src/particles/sorting/BoxSort.H b/src/particles/sorting/BoxSort.H index 63a37ac283..530184a84c 100644 --- a/src/particles/sorting/BoxSort.H +++ b/src/particles/sorting/BoxSort.H @@ -18,9 +18,12 @@ class BeamParticleContainer; class BoxSorter { public: - using index_type = unsigned int; + using index_type = unsigned long long; - void sortParticlesByBox (BeamParticleContainer& a_beam, const amrex::Geometry& a_geom); + void sortParticlesByBox (const amrex::Real * z_array, + const index_type num_particles, + const bool init_on_cpu, + const amrex::Geometry& a_geom); //! \brief returns the pointer to the permutation array index_type* boxCountsPtr () noexcept { return m_box_counts_cpu.dataPtr(); } diff --git a/src/particles/sorting/BoxSort.cpp b/src/particles/sorting/BoxSort.cpp index e990b2aad3..1bbe7b66d6 100644 --- a/src/particles/sorting/BoxSort.cpp +++ b/src/particles/sorting/BoxSort.cpp @@ -11,60 +11,53 @@ #include -void BoxSorter::sortParticlesByBox (BeamParticleContainer& a_beam, const amrex::Geometry& a_geom) +void BoxSorter::sortParticlesByBox (const amrex::Real * z_array, const index_type num_particles, + const bool init_on_cpu, const amrex::Geometry& a_geom) { HIPACE_PROFILE("sortBeamParticlesByBox()"); m_box_permutations.setArena( - a_beam.m_initialize_on_cpu ? amrex::The_Pinned_Arena() : amrex::The_Arena()); - - const index_type np = a_beam.getBeamInitSlice().numParticles(); - auto ptd = a_beam.getBeamInitSlice().getParticleTileData(); + init_on_cpu ? amrex::The_Pinned_Arena() : amrex::The_Arena()); int num_boxes = a_geom.Domain().length(2); m_box_counts_cpu.resize(num_boxes+1); m_box_offsets_cpu.resize(num_boxes+1); - m_box_permutations.resize(np); - - amrex::PODVector> local_offsets {}; - local_offsets.setArena( - a_beam.m_initialize_on_cpu ? amrex::The_Pinned_Arena() : amrex::The_Arena()); - local_offsets.resize(np); + m_box_permutations.resize(num_particles); amrex::Gpu::DeviceVector box_counts (num_boxes+1, 0); amrex::Gpu::DeviceVector box_offsets (num_boxes+1, 0); auto p_box_counts = box_counts.dataPtr(); - auto p_local_offsets = local_offsets.dataPtr(); auto p_permutations = m_box_permutations.dataPtr(); // Extract box properties const amrex::Real dzi = a_geom.InvCellSize(2); const amrex::Real plo_z = a_geom.ProbLo(2); - amrex::ParallelFor(np, + amrex::ParallelFor(num_particles, [=] AMREX_GPU_DEVICE (const index_type i) { - int dst_box = static_cast((ptd.pos(2, i) - plo_z) * dzi); - if (ptd.id(i) < 0) dst_box = num_boxes; // if pid is invalid, remove particle + int dst_box = static_cast((z_array[i] - plo_z) * dzi); if (dst_box < 0 || dst_box > num_boxes) { // particle has left domain transversely, stick it at the end and invalidate dst_box = num_boxes; - ptd.id(i) = -std::abs(ptd.id(i)); } - unsigned int index = amrex::Gpu::Atomic::Add(&p_box_counts[dst_box], 1u); - p_local_offsets[i] = index; + amrex::Gpu::Atomic::Add(&p_box_counts[dst_box], 1ull); }); amrex::Gpu::exclusive_scan(box_counts.begin(), box_counts.end(), box_offsets.begin()); + box_counts.assign(num_boxes+1, 0); + auto p_box_offsets = box_offsets.dataPtr(); - amrex::ParallelFor(np, + amrex::ParallelFor(num_particles, [=] AMREX_GPU_DEVICE (const index_type i) { - int dst_box = static_cast((ptd.pos(2, i) - plo_z) * dzi); - if (ptd.id(i) < 0) dst_box = num_boxes; // if pid is invalid, remove particle - if (dst_box < 0 || dst_box > num_boxes) dst_box = num_boxes; - p_permutations[p_local_offsets[i] + p_box_offsets[dst_box]] = i; + int dst_box = static_cast((z_array[i] - plo_z) * dzi); + if (dst_box < 0 || dst_box > num_boxes) { + dst_box = num_boxes; + } + const index_type local_offset = amrex::Gpu::Atomic::Add(&p_box_counts[dst_box], 1ull); + p_permutations[local_offset + p_box_offsets[dst_box]] = i; }); #ifdef AMREX_USE_GPU diff --git a/src/particles/sorting/SliceSort.cpp b/src/particles/sorting/SliceSort.cpp index c785a70662..6fd8914db1 100644 --- a/src/particles/sorting/SliceSort.cpp +++ b/src/particles/sorting/SliceSort.cpp @@ -53,6 +53,7 @@ shiftSlippedParticles (BeamParticleContainer& beam, const int slice, amrex::Geom if (num_invalid == 0 && num_slipped == 0) { // nothing to do + beam.resize(WhichBeamSlice::This, num_stay, 0); return; } diff --git a/src/utils/AdaptiveTimeStep.cpp b/src/utils/AdaptiveTimeStep.cpp index c3119f92d0..6f2f8d1c2e 100644 --- a/src/utils/AdaptiveTimeStep.cpp +++ b/src/utils/AdaptiveTimeStep.cpp @@ -96,7 +96,7 @@ AdaptiveTimeStep::GatherMinUzSlice (MultiBeam& beams, const bool initial) for (int ibeam = 0; ibeam < nbeams; ibeam++) { auto& beam = beams.getBeam(ibeam); - if (initial && beam.m_injection_type == "fixed_ppc") { + if (initial && beam.m_injection_type != "from_file") { // estimate values before the beam is initialized m_timestep_data[ibeam][WhichDouble::SumWeights] = 1._rt; m_timestep_data[ibeam][WhichDouble::SumWeightsTimesUz] = diff --git a/src/utils/Parser.H b/src/utils/Parser.H index 0dd4c61ab9..7b522edb7a 100755 --- a/src/utils/Parser.H +++ b/src/utils/Parser.H @@ -138,6 +138,39 @@ namespace Parser return result; } + /** \brief return valid Long, asserts if inf or NaN + * + * \param[in] x value to cast + * \param[in] real_name name of value for error message + */ + inline amrex::Long + safeCastToLong (const double x, const std::string& real_name) { + amrex::Long result = 0; + bool error_detected = false; + std::string assert_msg; + // (2.0*(numeric_limits::max()/2+1)) converts + // numeric_limits::max()+1 to a real + // ensuring accuracy to all digits. This accepts x = 2**31-1 but rejects 2**31. + if (x < (2.0*(std::numeric_limits::max()/2+1))) { + if (std::ceil(x) >= std::numeric_limits::min()) { + result = static_cast(x); + } else { + error_detected = true; + assert_msg = "Error: Negative overflow detected when casting " + + real_name + " = " + std::to_string(x) + " to amrex::Long"; + } + } else if (x > 0) { + error_detected = true; + assert_msg = "Error: Overflow detected when casting " + + real_name + " = " + std::to_string(x) + " to amrex::Long"; + } else { + error_detected = true; + assert_msg = "Error: NaN detected when casting " + real_name + " to amrex::Long"; + } + AMREX_ALWAYS_ASSERT_WITH_MESSAGE(!error_detected, assert_msg); + return result; + } + /** \brief init Parser ready to compile * * \param[in,out] parser Parser that has been defined @@ -222,6 +255,13 @@ namespace Parser val = safeCastToInt(std::round(parser.compileHost<0>()()),str); } + inline void + fillWithParser (std::string const& str, amrex::Long& val) { + amrex::Parser parser(str); + initParser(parser, {}); + val = safeCastToLong(std::round(parser.compileHost<0>()()),str); + } + inline void fillWithParser (std::string const& str, bool& val) { amrex::Parser parser(str); diff --git a/tests/checksum/benchmarks_json/ion_motion.SI.1Rank.json b/tests/checksum/benchmarks_json/ion_motion.SI.1Rank.json index cfd4a21975..1fc30a0642 100755 --- a/tests/checksum/benchmarks_json/ion_motion.SI.1Rank.json +++ b/tests/checksum/benchmarks_json/ion_motion.SI.1Rank.json @@ -1,32 +1,32 @@ { "lev=0": { - "Bx": 368119.6679272, - "By": 346482.87538383, - "Bz": 120787.84520733, - "ExmBy": 137270194999020.0, - "EypBx": 150972083157650.0, - "Ez": 99855921358316.0, - "Psi": 2617932445.2268, - "Sx": 8422092354132900.0, - "Sy": 8634152312393900.0, - "chi": 9743692179257600.0, - "jx": 2.5629912811215e+16, - "jx_beam": 1250804162997800.0, - "jy": 3.1961203980136e+16, - "jy_beam": 2501608325995600.0, - "jz_beam": 1.2512142721809e+16, - "rhomjz": 216668663.78759 + "Bx": 368250.02966047, + "By": 346571.7274177, + "Bz": 120789.47518633, + "ExmBy": 137277586784810.0, + "EypBx": 150986513841620.0, + "Ez": 99831489576094.0, + "Psi": 2618422274.543, + "Sx": 8424844871581100.0, + "Sy": 8637522213709700.0, + "chi": 9743687370856800.0, + "jx": 2.5625031950017e+16, + "jx_beam": 1250816704563000.0, + "jy": 3.1959847061636e+16, + "jy_beam": 2501633409126100.0, + "jz_beam": 1.2511979681461e+16, + "rhomjz": 216766805.52053 }, "beam": { - "charge": 1.5984179276166e-13, - "id": 498824278143, - "mass": 9.0880130873363e-25, - "x": 3.7821905645792, - "y": 3.8835739998596, - "z": 20.82689752569, - "ux": 9976540.0, - "uy": 19953080.0, - "uz": 99765400.0, - "w": 1001030223.113 + "charge": 1.5983970993204e-13, + "id": 497644281261, + "mass": 9.0878946653482e-25, + "x": 3.7827409105503, + "y": 3.8783689163145, + "z": 20.841963429926, + "ux": 9976410.0, + "uy": 19952820.0, + "uz": 99764100.0, + "w": 1001017179.1189 } } diff --git a/tests/checksum/benchmarks_json/production.SI.2Rank_pwfa.json b/tests/checksum/benchmarks_json/production.SI.2Rank_pwfa.json index d64cfe8a0e..92ade7f067 100644 --- a/tests/checksum/benchmarks_json/production.SI.2Rank_pwfa.json +++ b/tests/checksum/benchmarks_json/production.SI.2Rank_pwfa.json @@ -1,44 +1,44 @@ { "lev=0": { - "Bx": 0.022057958790113, - "By": 8971.5483724174, - "Bz": 0.031326817056901, - "ExmBy": 3100247994049.6, - "EypBx": 8139988.7850516, - "Ez": 2412711942438.3, - "Psi": 119922462.44227, - "Sx": 77236067809066.0, - "Sy": 126525172.5326, - "chi": 4383268612110.4, - "jx": 160531516980650.0, - "jx_beam": 160873179770.07, - "jy": 1020048145.9975, - "jy_beam": 89529.98621689, - "jz_beam": 483430842955410.0, - "rhomjz": 2214095.3620009 + "Bx": 0.023955172897645, + "By": 8968.4071704363, + "Bz": 0.03057756427414, + "ExmBy": 3097416692918.7, + "EypBx": 8069510.9151862, + "Ez": 2409212015861.5, + "Psi": 119780957.0913, + "Sx": 77250436730650.0, + "Sy": 123909269.95519, + "chi": 4383175623974.3, + "jx": 160282718815310.0, + "jx_beam": 159216666161.06, + "jy": 1001084833.0105, + "jy_beam": 426936.55837821, + "jz_beam": 483557337917750.0, + "rhomjz": 2207749.6638023 }, "driver": { - "charge": 1.6011704670738e-13, - "id": 499672863758, - "mass": 9.1036630085355e-25, - "x": 3.136541976873, - "y": 3.1216622704449, - "z": 23.911844573419, - "ux": 1034229.5695309, - "uy": 1032618.9441968, - "uz": 983011526.49251, - "w": 3742553644.0572 + "charge": 1.6019651466843e-13, + "id": 137618956954, + "mass": 9.1081812628514e-25, + "x": 3.1191135058977, + "y": 3.1261217325557, + "z": 23.988150404876, + "ux": 1031950.4676146, + "uy": 1032272.0886339, + "uz": 983561212.10909, + "w": 3744411117.1578 }, "witness": { - "charge": 1.6019907815105e-13, - "id": 749914353870, - "mass": 9.1083270129906e-25, - "x": 2.4516736944533, - "y": 2.4574927757864, - "z": 159.98788995953, - "ux": 1127472.0551172, - "uy": 1131961.5289339, - "uz": 1032420143.5627, - "w": 1248157011.8816 + "charge": 1.602176634e-13, + "id": 199717622704, + "mass": 9.1093837015e-25, + "x": 2.4585086291692, + "y": 2.4568962478141, + "z": 160.01734597671, + "ux": 1129069.5777406, + "uy": 1129690.4645841, + "uz": 1032518717.3144, + "w": 1248301814.8922 } } diff --git a/tests/checksum/benchmarks_json/radiation_reaction.1Rank.json b/tests/checksum/benchmarks_json/radiation_reaction.1Rank.json index 3c82a93311..d2838d97f9 100644 --- a/tests/checksum/benchmarks_json/radiation_reaction.1Rank.json +++ b/tests/checksum/benchmarks_json/radiation_reaction.1Rank.json @@ -1,32 +1,32 @@ { "lev=0": { - "Bx": 3.9728499837158e-19, - "By": 5.8910609879039e-14, - "Bz": 1.6891286774736e-24, + "Bx": 5.0635586991256e-19, + "By": 5.8876460574254e-14, + "Bz": 3.0204131781696e-24, "ExmBy": 0.0, "EypBx": 0.0, - "Ez": 3.1259472441681e-09, + "Ez": 3.6721700850032e-09, "Psi": 0.0, - "Sx": 0.0024041543676677, - "Sy": 7.6433815475634e-12, + "Sx": 0.0023995684827594, + "Sy": 8.7310291859833e-12, "chi": 0.0, - "jx": 3.1950444198537e-06, - "jx_beam": 3.1950444198537e-06, - "jy": 3.7374939092852e-13, - "jy_beam": 3.7374939092852e-13, - "jz_beam": 0.013037977938616, + "jx": 3.2835861877915e-06, + "jx_beam": 3.2835861877915e-06, + "jy": 6.8656879230286e-13, + "jy_beam": 6.8656879230286e-13, + "jz_beam": 0.013038004315708, "rhomjz": 0.0 }, "beam": { "charge": 1.602176634e-14, "id": 5000050000, "mass": 9.1093837015e-26, - "x": 0.37632823449197, - "y": 4.6797140205514e-08, - "z": 0.079644759023598, - "ux": 5191150.7069722, - "uy": 0.7570356636811, - "uz": 196732680.56282, + "x": 0.37730114126382, + "y": 4.6692843134276e-08, + "z": 0.079861609444484, + "ux": 5180406.3905248, + "uy": 0.75757302600776, + "uz": 196741786.60696, "w": 3.8193025298947e-08 } }