Skip to content

Commit

Permalink
Reduce memory footprint of plasma initialization (#885)
Browse files Browse the repository at this point in the history
* Reduce memory footprint of plasma initialization

* add more doc
  • Loading branch information
AlexanderSinn authored Mar 10, 2023
1 parent c24580f commit a9316e1
Show file tree
Hide file tree
Showing 3 changed files with 87 additions and 59 deletions.
2 changes: 1 addition & 1 deletion src/laser/MultiLaser.H
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@ namespace WhichLaserSlice {
np1jp2_i,
N
};
};
}

class Fields;

Expand Down
6 changes: 3 additions & 3 deletions src/laser/MultiLaser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -247,9 +247,9 @@ MultiLaser::GetEnvelopeFromFile (const amrex::Box& domain) {
//hipace: xyt in Fortran order
//lasy: tyx in C order

if (domain.length(0) != extend[2] ||
domain.length(1) != extend[1] ||
domain.length(2) != extend[0]) {
if (static_cast<std::uint64_t>(domain.length(0)) != extend[2] ||
static_cast<std::uint64_t>(domain.length(1)) != extend[1] ||
static_cast<std::uint64_t>(domain.length(2)) != extend[0]) {
amrex::Abort("Incompatible box sizes. HiPACE++ (" +
std::to_string(domain.length(0)) + ", " + std::to_string(domain.length(1)) + ", "
+ std::to_string(domain.length(2)) + "), "
Expand Down
138 changes: 83 additions & 55 deletions src/particles/plasma/PlasmaParticleContainerInit.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -58,28 +58,69 @@ InitParticles (const amrex::IntVect& a_num_particles_per_cell,

for(amrex::MFIter mfi = MakeMFIter(lev, DfltMfi); mfi.isValid(); ++mfi)
{

const amrex::Box& tile_box = mfi.tilebox(box_nodal, box_grow);

const auto lo = amrex::lbound(tile_box);
const auto hi = amrex::ubound(tile_box);

amrex::Gpu::DeviceVector<unsigned int> counts(tile_box.numPts()*num_ppc, 0);
unsigned int* pcount = counts.dataPtr();

amrex::Gpu::DeviceVector<unsigned int> offsets(tile_box.numPts()*num_ppc);
unsigned int* poffset = offsets.dataPtr();

UpdateDensityFunction();
auto density_func = m_density_func;
const amrex::Real c_light = get_phys_const().c;
const amrex::Real c_t = c_light * Hipace::m_physical_time;
const amrex::Real min_density = m_min_density;

amrex::ParallelFor(tile_box,
[=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept
// Count the total number of particles so only one resize is needed
const amrex::Long total_num_particles = amrex::Reduce::Sum<amrex::Long>(tile_box.numPts(),
[=] AMREX_GPU_DEVICE (amrex::Long idx) noexcept
{
auto [i,j,k] = tile_box.atOffset3d(idx).arr;

amrex::Long num_particles_cell = 0;
for (int i_part=0; i_part<num_ppc; ++i_part)
{
amrex::Real r[3];
ParticleUtil::get_position_unit_cell(r, a_num_particles_per_cell, i_part);

amrex::Real x = plo[0] + (i + r[0] + x_offset)*dx[0];
amrex::Real y = plo[1] + (j + r[1] + y_offset)*dx[1];

const amrex::Real rsq = x*x + y*y;
if (x >= a_bounds.hi(0) || x < a_bounds.lo(0) ||
y >= a_bounds.hi(1) || y < a_bounds.lo(1) ||
rsq > a_radius*a_radius ||
rsq < a_hollow_core_radius*a_hollow_core_radius ||
density_func(x, y, c_t) < min_density) continue;

num_particles_cell += 1;
}
return num_particles_cell;
});

auto& particles = GetParticles(lev);
auto& particle_tile = particles[std::make_pair(mfi.index(), mfi.LocalTileIndex())];

auto old_size = particle_tile.GetArrayOfStructs().size();
auto new_size = old_size + total_num_particles;
particle_tile.resize(new_size);
m_init_num_par[mfi.tileIndex()] = new_size;

int procID = amrex::ParallelDescriptor::MyProc();
int pid = ParticleType::NextID();
ParticleType::NextID(pid + total_num_particles);

// The loop over particles is outside the loop over cells
// so that particles in the same cell are far apart.
// This makes current deposition faster.
for (int i_part=0; i_part<num_ppc; ++i_part)
{
for (int i_part=0; i_part<num_ppc;i_part++)
amrex::Gpu::DeviceVector<unsigned int> counts(tile_box.numPts(), 0);
unsigned int* pcount = counts.dataPtr();

amrex::Gpu::DeviceVector<unsigned int> offsets(tile_box.numPts());
unsigned int* poffset = offsets.dataPtr();

amrex::ParallelFor(tile_box,
[=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept
{
amrex::Real r[3];

Expand All @@ -93,7 +134,7 @@ InitParticles (const amrex::IntVect& a_num_particles_per_cell,
y >= a_bounds.hi(1) || y < a_bounds.lo(1) ||
rsq > a_radius*a_radius ||
rsq < a_hollow_core_radius*a_hollow_core_radius ||
density_func(x, y, c_t) < min_density) continue;
density_func(x, y, c_t) < min_density) return;

int ix = i - lo.x;
int iy = j - lo.y;
Expand All @@ -113,69 +154,54 @@ InitParticles (const amrex::IntVect& a_num_particles_per_cell,
// y
// z (not used)
// ppc
cellid = ((i_part * nz + uiz) * ny + uiy) * nx +
cellid = (uiz * ny + uiy) * nx +
uix/depos_order_1 + ((uix%depos_order_1)*nx+depos_order_1-1)/depos_order_1;
} else {
// ordering of axes from fastest to slowest:
// x
// y
// z (not used)
// ppc
cellid = ((i_part * nz + uiz) * ny + uiy) * nx + uix;
cellid = (uiz * ny + uiy) * nx + uix;
}

pcount[cellid] = 1;
}
});
});

int num_to_add = amrex::Scan::ExclusiveSum(counts.size(), counts.data(), offsets.data());
unsigned int num_to_add =
amrex::Scan::ExclusiveSum(counts.size(), counts.data(), offsets.data());

auto& particles = GetParticles(lev);
auto& particle_tile = particles[std::make_pair(mfi.index(), mfi.LocalTileIndex())];

auto old_size = particle_tile.GetArrayOfStructs().size();
auto new_size = old_size + num_to_add;
particle_tile.resize(new_size);

m_init_num_par[mfi.tileIndex()] = new_size;
if (num_to_add == 0) continue;

if (num_to_add == 0) continue;
ParticleType* pstruct = particle_tile.GetArrayOfStructs()().data();

ParticleType* pstruct = particle_tile.GetArrayOfStructs()().data();
auto arrdata = particle_tile.GetStructOfArrays().realarray();
auto int_arrdata = particle_tile.GetStructOfArrays().intarray();

auto arrdata = particle_tile.GetStructOfArrays().realarray();
auto int_arrdata = particle_tile.GetStructOfArrays().intarray();
const int init_ion_lev = m_init_ion_lev;

int procID = amrex::ParallelDescriptor::MyProc();
int pid = ParticleType::NextID();
ParticleType::NextID(pid + num_to_add);

const int init_ion_lev = m_init_ion_lev;

amrex::ParallelForRNG(tile_box,
[=] AMREX_GPU_DEVICE (int i, int j, int k, const amrex::RandomEngine& engine) noexcept
{
int ix = i - lo.x;
int iy = j - lo.y;
int iz = k - lo.z;
int nx = hi.x-lo.x+1;
int ny = hi.y-lo.y+1;
int nz = hi.z-lo.z+1;
unsigned int uix = amrex::min(nx-1,amrex::max(0,ix));
unsigned int uiy = amrex::min(ny-1,amrex::max(0,iy));
unsigned int uiz = amrex::min(nz-1,amrex::max(0,iz));

for (int i_part=0; i_part<num_ppc;i_part++)
amrex::ParallelForRNG(tile_box,
[=] AMREX_GPU_DEVICE (int i, int j, int k, const amrex::RandomEngine& engine) noexcept
{
int ix = i - lo.x;
int iy = j - lo.y;
int iz = k - lo.z;
int nx = hi.x-lo.x+1;
int ny = hi.y-lo.y+1;
int nz = hi.z-lo.z+1;
unsigned int uix = amrex::min(nx-1,amrex::max(0,ix));
unsigned int uiy = amrex::min(ny-1,amrex::max(0,iy));
unsigned int uiz = amrex::min(nz-1,amrex::max(0,iz));

unsigned int cellid = 0;
if (outer_depos_loop) {
cellid = ((i_part * nz + uiz) * ny + uiy) * nx +
cellid = (uiz * ny + uiy) * nx +
uix/depos_order_1 + ((uix%depos_order_1)*nx+depos_order_1-1)/depos_order_1;
} else {
cellid = ((i_part * nz + uiz) * ny + uiy) * nx + uix;
cellid = (uiz * ny + uiy) * nx + uix;
}

const int pidx = int(poffset[cellid] - poffset[0]);
const amrex::Long pidx = poffset[cellid] - poffset[0] + old_size;

amrex::Real r[3] = {0.,0.,0.};

Expand All @@ -190,13 +216,13 @@ InitParticles (const amrex::IntVect& a_num_particles_per_cell,
y >= a_bounds.hi(1) || y < a_bounds.lo(1) ||
rsq > a_radius*a_radius ||
rsq < a_hollow_core_radius*a_hollow_core_radius ||
density_func(x, y, c_t) < min_density) continue;
density_func(x, y, c_t) < min_density) return;

amrex::Real u[3] = {0.,0.,0.};
ParticleUtil::get_gaussian_random_momentum(u, a_u_mean, a_u_std, engine);

ParticleType& p = pstruct[pidx];
p.id() = pid + pidx;
p.id() = pid + int(pidx);
p.cpu() = procID;
p.pos(0) = x;
p.pos(1) = y;
Expand All @@ -223,8 +249,10 @@ InitParticles (const amrex::IntVect& a_num_particles_per_cell,
arrdata[PlasmaIdx::x0 ][pidx] = x;
arrdata[PlasmaIdx::y0 ][pidx] = y;
int_arrdata[PlasmaIdx::ion_lev][pidx] = init_ion_lev;
}
});
});

old_size += num_to_add;
}
}

AMREX_ASSERT(OK());
Expand Down

0 comments on commit a9316e1

Please sign in to comment.