Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Optimize track vector data layout for particle types #1322

Merged
merged 12 commits into from
Aug 29, 2024
1 change: 1 addition & 0 deletions src/celeritas/Types.cc
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,7 @@ char const* to_cstring(TrackOrder value)
{
static EnumStringMapper<TrackOrder> const to_cstring_impl{
"unsorted",
"partition_charge",
"shuffled",
"partition_status",
"sort_along_step_action",
Expand Down
4 changes: 3 additions & 1 deletion src/celeritas/Types.hh
Original file line number Diff line number Diff line change
Expand Up @@ -158,8 +158,10 @@ enum class StepPoint
enum class TrackOrder
{
unsorted, //!< Don't do any sorting: tracks are in an arbitrary order
// Reorder track data layout
partition_charge, //!< Partition data layout of tracks by charged/neutral
// Reorder track slot indices
shuffled, //!< Shuffle at the start of the simulation

partition_status, //!< Partition by status at the start of each step
sort_along_step_action, //!< Sort only by the along-step action id
sort_step_limit_action, //!< Sort only by the step limit action id
Expand Down
1 change: 1 addition & 0 deletions src/celeritas/global/CoreParams.cc
Original file line number Diff line number Diff line change
Expand Up @@ -254,6 +254,7 @@ CoreParams::CoreParams(Input input) : input_(std::move(input))
switch (TrackOrder track_order = input_.init->host_ref().track_order)
{
case TrackOrder::unsorted:
case TrackOrder::partition_charge:
case TrackOrder::shuffled:
break;
case TrackOrder::partition_status:
Expand Down
3 changes: 2 additions & 1 deletion src/celeritas/global/CoreTrackData.cc
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,8 @@ void resize(CoreStateData<Ownership::value, M>* state,
resize(&state->init, params.init, size);
state->stream_id = stream_id;

if (params.init.track_order != TrackOrder::unsorted)
if (params.init.track_order != TrackOrder::unsorted
&& params.init.track_order != TrackOrder::partition_charge)
{
resize(&state->track_slots, size);
Span track_slots{
Expand Down
13 changes: 13 additions & 0 deletions src/celeritas/track/InitializeTracksAction.cc
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,10 @@
#include "celeritas/global/ActionLauncher.hh"
#include "celeritas/global/CoreParams.hh"
#include "celeritas/global/CoreState.hh"
#include "celeritas/track/TrackInitParams.hh"

#include "detail/InitTracksExecutor.hh" // IWYU pragma: associated
#include "detail/TrackInitAlgorithms.hh"

namespace celeritas
{
Expand Down Expand Up @@ -61,6 +63,17 @@ void InitializeTracksAction::execute_impl(CoreParams const& core_params,
= std::min(counters.num_vacancies, counters.num_initializers);
if (num_new_tracks > 0 || core_state.warming_up())
{
if (core_params.init()->host_ref().track_order
== TrackOrder::partition_charge)
amandalund marked this conversation as resolved.
Show resolved Hide resolved
{
// Partition tracks by whether they are charged or neutral
detail::partition_initializers(core_params,
core_state.ref().init.initializers,
counters,
num_new_tracks,
core_state.stream_id());
}

// Launch a kernel to initialize tracks
this->execute_impl(core_params, core_state, num_new_tracks);

Expand Down
12 changes: 9 additions & 3 deletions src/celeritas/track/TrackInitData.hh
Original file line number Diff line number Diff line change
Expand Up @@ -123,8 +123,9 @@ struct TrackInitStateData
//! Whether the data are assigned
explicit CELER_FUNCTION operator bool() const
{
return !parents.empty() && secondary_counts.size() == parents.size() + 1
&& !track_counters.empty() && vacancies.size() == parents.size()
return secondary_counts.size() == vacancies.size() + 1
&& !track_counters.empty()
&& (vacancies.size() == parents.size() || parents.empty())
&& !initializers.empty();
}

Expand Down Expand Up @@ -169,7 +170,12 @@ void resize(TrackInitStateData<Ownership::value, M>* data,
CELER_EXPECT(M == MemSpace::host || celeritas::device());

// Allocate device data
resize(&data->parents, size);
if (params.track_order != TrackOrder::partition_charge)
{
// The parent's geometry states are not currently reused when
// partitioning the track slot data layout
resize(&data->parents, size);
}
resize(&data->secondary_counts, size + 1);
resize(&data->track_counters, params.max_events);

Expand Down
21 changes: 16 additions & 5 deletions src/celeritas/track/detail/InitTracksExecutor.hh
Original file line number Diff line number Diff line change
Expand Up @@ -78,25 +78,36 @@ CELER_FUNCTION void InitTracksExecutor::operator()(ThreadId tid) const
// parent they can copy the geometry state from.
auto const& data = state->init;
ItemId<TrackInitializer> idx{index_before(counters.num_initializers, tid)};
TrackInitializer const& init = data.initializers[idx];

// View to the new track to be initialized
CoreTrackView vacancy{
*params, *state, [&] {
TrackSlotId idx{index_before(counters.num_vacancies, tid)};
return data.vacancies[idx];
if (params->init.track_order == TrackOrder::partition_charge)
{
return data.vacancies[TrackSlotId(
index_partitioned(num_new_tracks,
counters.num_vacancies,
IsNeutral{params}(init),
tid))];
}
return data.vacancies[TrackSlotId(
index_before(counters.num_vacancies, tid))];
}()};

// Initialize the simulation state and particle attributes
TrackInitializer const& init = data.initializers[idx];
vacancy.make_sim_view() = init.sim;
vacancy.make_particle_view() = init.particle;

// Initialize the geometry
{
auto geo = vacancy.make_geo_view();
if (tid < counters.num_secondaries)
if (tid < counters.num_secondaries
&& params->init.track_order != TrackOrder::partition_charge)
{
// Copy the geometry state from the parent for improved performance
// Copy the geometry state from the parent for improved
// performance, unless the track initializers have been
// partitioned by charge
TrackSlotId parent_id = data.parents[TrackSlotId{
index_before(data.parents.size(), tid)}];
GeoTrackView const parent_geo(
Expand Down
3 changes: 2 additions & 1 deletion src/celeritas/track/detail/LocateAliveExecutor.hh
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,8 @@ CELER_FUNCTION void LocateAliveExecutor::operator()(TrackSlotId tid) const
// The track is alive: mark this track slot as occupied
return occupied();
}
else if (num_secondaries > 0)
else if (num_secondaries > 0
&& params->init.track_order != TrackOrder::partition_charge)
{
// The track was killed and produced secondaries: in this case, the
// empty track slot will be filled with the first secondary. Mark
Expand Down
6 changes: 4 additions & 2 deletions src/celeritas/track/detail/ProcessSecondariesExecutor.hh
Original file line number Diff line number Diff line change
Expand Up @@ -123,7 +123,8 @@ ProcessSecondariesExecutor::operator()(TrackSlotId tid) const
ti.particle.energy = secondary.energy;
CELER_ASSERT(ti);

if (!initialized && sim.status() != TrackStatus::alive)
if (!initialized && sim.status() != TrackStatus::alive
&& params->init.track_order != TrackOrder::partition_charge)
amandalund marked this conversation as resolved.
Show resolved Hide resolved
{
ParticleTrackView particle(
params->particles, state->particles, tid);
Expand Down Expand Up @@ -155,7 +156,8 @@ ProcessSecondariesExecutor::operator()(TrackSlotId tid) const

// Store the thread ID of the secondary's parent if the
// secondary could be initialized in the next step
if (offset <= data.parents.size())
if (offset <= data.parents.size()
&& params->init.track_order != TrackOrder::partition_charge)
{
data.parents[TrackSlotId(data.parents.size() - offset)]
= tid;
Expand Down
18 changes: 18 additions & 0 deletions src/celeritas/track/detail/TrackInitAlgorithms.cc
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,24 @@ size_type exclusive_scan_counts(
return *(stop - 1);
}

//---------------------------------------------------------------------------//
/*!
* Sort the tracks that will be initialized in this step by charged/neutral.
*/
void partition_initializers(
CoreParams const& params,
Collection<TrackInitializer, Ownership::reference, MemSpace::host> const& init,
CoreStateCounters const& counters,
size_type count,
StreamId)
{
auto* end = static_cast<TrackInitializer*>(init.data())
+ counters.num_initializers;
auto* start = end - count;
std::stable_partition(
start, end, IsNeutral{params.ptr<MemSpace::native>()});
}

//---------------------------------------------------------------------------//
} // namespace detail
} // namespace celeritas
23 changes: 23 additions & 0 deletions src/celeritas/track/detail/TrackInitAlgorithms.cu
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@

#include <thrust/device_ptr.h>
#include <thrust/execution_policy.h>
#include <thrust/partition.h>
#include <thrust/remove.h>
#include <thrust/scan.h>

Expand Down Expand Up @@ -76,6 +77,28 @@ size_type exclusive_scan_counts(
return *(stop - 1);
}

//---------------------------------------------------------------------------//
/*!
* Sort the tracks that will be initialized in this step by charged/neutral.
*/
void partition_initializers(
CoreParams const& params,
Collection<TrackInitializer, Ownership::reference, MemSpace::device> const&
init,
CoreStateCounters const& counters,
size_type count,
StreamId stream_id)
{
ScopedProfiling profile_this{"partition-initializers"};
auto end = device_pointer_cast(init.data()) + counters.num_initializers;
auto start = end - count;
thrust::stable_partition(thrust_execute_on(stream_id),
start,
end,
IsNeutral{params.ptr<MemSpace::native>()});
CELER_DEVICE_CHECK_ERROR();
}

//---------------------------------------------------------------------------//
} // namespace detail
} // namespace celeritas
29 changes: 29 additions & 0 deletions src/celeritas/track/detail/TrackInitAlgorithms.hh
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,10 @@
#include "corecel/Types.hh"
#include "corecel/data/Collection.hh"
#include "corecel/sys/ThreadId.hh"
#include "celeritas/global/CoreParams.hh"
#include "celeritas/track/CoreStateCounters.hh"

#include "Utils.hh"

namespace celeritas
{
Expand All @@ -35,6 +39,21 @@ size_type exclusive_scan_counts(
StateCollection<size_type, Ownership::reference, MemSpace::device> const&,
StreamId);

//---------------------------------------------------------------------------//
// Sort the tracks that will be initialized in this step by charged/neutral
void partition_initializers(
CoreParams const&,
Collection<TrackInitializer, Ownership::reference, MemSpace::host> const&,
CoreStateCounters const&,
size_type,
StreamId);
void partition_initializers(
CoreParams const&,
Collection<TrackInitializer, Ownership::reference, MemSpace::device> const&,
CoreStateCounters const&,
size_type,
StreamId);

//---------------------------------------------------------------------------//
// INLINE DEFINITIONS
//---------------------------------------------------------------------------//
Expand All @@ -53,6 +72,16 @@ inline size_type exclusive_scan_counts(
CELER_NOT_CONFIGURED("CUDA or HIP");
}

inline void partition_initializers(
CoreParams const&,
Collection<TrackInitializer, Ownership::reference, MemSpace::device> const&,
CoreStateCounters const&,
size_type,
StreamId)
{
CELER_NOT_CONFIGURED("CUDA or HIP");
}

#endif
//---------------------------------------------------------------------------//
} // namespace detail
Expand Down
31 changes: 31 additions & 0 deletions src/celeritas/track/detail/Utils.hh
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@
#include "corecel/Types.hh"
#include "corecel/data/Collection.hh"
#include "corecel/sys/ThreadId.hh"
#include "celeritas/global/CoreTrackData.hh"
#include "celeritas/phys/ParticleView.hh"

namespace celeritas
{
Expand All @@ -25,6 +27,21 @@ struct IsEqual
CELER_FUNCTION bool operator()(TrackSlotId x) const { return x == value; }
};

//---------------------------------------------------------------------------//
//! Predicate for sorting charged from neutral tracks
struct IsNeutral
{
using ParamsPtr = CRefPtr<CoreParamsData, MemSpace::native>;

ParamsPtr params;

CELER_FUNCTION bool operator()(TrackInitializer const& ti) const
{
return ParticleView(params->particles, ti.particle.particle_id).charge()
== zero_quantity();
}
};

//---------------------------------------------------------------------------//
//! Indicate that a track slot is occupied by a still-alive track
CELER_CONSTEXPR_FUNCTION TrackSlotId occupied()
Expand All @@ -48,6 +65,20 @@ CELER_FORCEINLINE_FUNCTION size_type index_after(size_type size, ThreadId tid)
return size + tid.unchecked_get();
}

//---------------------------------------------------------------------------//
//! Get an initializer index starting from one end or the other
CELER_FORCEINLINE_FUNCTION size_type index_partitioned(size_type num_new_tracks,
size_type num_vacancies,
bool get_from_front,
ThreadId tid)
{
CELER_EXPECT(tid.get() < num_new_tracks);
CELER_EXPECT(num_new_tracks <= num_vacancies);

return get_from_front ? index_before(num_new_tracks, tid)
: index_before(num_vacancies, tid);
}

//---------------------------------------------------------------------------//
} // namespace detail
} // namespace celeritas
Loading
Loading