Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Rename max_num_tracks to num_track_slots and divide by num_streams #785

Merged
merged 7 commits into from
Jun 1, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 5 additions & 3 deletions app/celer-sim/Runner.cc
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
#include "corecel/io/Logger.hh"
#include "corecel/io/OutputRegistry.hh"
#include "corecel/io/StringUtils.hh"
#include "corecel/math/Algorithms.hh"
#include "corecel/sys/Device.hh"
#include "corecel/sys/Environment.hh"
#include "corecel/sys/ScopedMem.hh"
Expand Down Expand Up @@ -317,13 +318,14 @@ void Runner::build_core_params(RunnerInput const& inp,
*/
void Runner::build_transporter_input(RunnerInput const& inp)
{
CELER_VALIDATE(inp.max_num_tracks > 0,
<< "nonpositive max_num_tracks=" << inp.max_num_tracks);
CELER_VALIDATE(inp.num_track_slots > 0,
<< "nonpositive num_track_slots=" << inp.num_track_slots);
CELER_VALIDATE(inp.max_steps > 0,
<< "nonpositive max_steps=" << inp.max_steps);

transporter_input_ = std::make_shared<TransporterInput>();
transporter_input_->num_track_slots = inp.max_num_tracks;
transporter_input_->num_track_slots
= ceil_div(inp.num_track_slots, core_params_->max_streams());
transporter_input_->max_steps = inp.max_steps;
transporter_input_->sync = inp.sync;
transporter_input_->params = core_params_;
Expand Down
18 changes: 0 additions & 18 deletions app/celer-sim/Runner.hh
Original file line number Diff line number Diff line change
Expand Up @@ -32,24 +32,6 @@ namespace app
//---------------------------------------------------------------------------//
struct RunnerInput;

//---------------------------------------------------------------------------//
/*!
* Results from transporting all events.
*/
struct SimulationResult
{
//!@{
//! \name Type aliases

//!@}

//// DATA ////

real_type total_time{}; //!< Total simulation time
real_type setup_time{}; //!< One-time initialization cost
std::vector<TransporterResult> events; //< Results tallied for each event
};

//---------------------------------------------------------------------------//
/*!
* Manage execution of Celeritas.
Expand Down
4 changes: 2 additions & 2 deletions app/celer-sim/RunnerInput.hh
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ struct RunnerInput

// Control
unsigned int seed{};
size_type max_num_tracks{};
size_type num_track_slots{}; //!< Divided among streams
size_type max_steps{unspecified};
size_type initializer_capacity{};
size_type max_events{};
Expand Down Expand Up @@ -86,7 +86,7 @@ struct RunnerInput
{
return !geometry_filename.empty() && !physics_filename.empty()
&& (primary_gen_options || !hepmc3_filename.empty())
&& max_num_tracks > 0 && max_steps > 0
&& num_track_slots > 0 && max_steps > 0
&& initializer_capacity > 0 && max_events > 0
&& secondary_stack_factor > 0
&& (step_diagnostic_maxsteps > 0 || !step_diagnostic)
Expand Down
11 changes: 9 additions & 2 deletions app/celer-sim/RunnerInputIO.json.cc
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@

#include "corecel/cont/ArrayIO.json.hh"
#include "corecel/io/LabelIO.json.hh"
#include "corecel/io/Logger.hh"
#include "corecel/io/StringEnumMapper.hh"
#include "corecel/io/StringUtils.hh"
#include "corecel/sys/EnvironmentIO.json.hh"
Expand Down Expand Up @@ -74,8 +75,14 @@ void from_json(nlohmann::json const& j, RunnerInput& v)
LDIO_LOAD_OPTION(step_diagnostic);
LDIO_LOAD_OPTION(step_diagnostic_maxsteps);

if (j.contains("max_num_tracks"))
{
CELER_LOG(warning) << "Deprecated option 'max_num_tracks'";
j.at("max_num_tracks").get_to(v.num_track_slots);
}

LDIO_LOAD_OPTION(seed);
LDIO_LOAD_OPTION(max_num_tracks);
LDIO_LOAD_OPTION(num_track_slots);
LDIO_LOAD_OPTION(max_steps);
LDIO_LOAD_REQUIRED(initializer_capacity);
LDIO_LOAD_REQUIRED(max_events);
Expand Down Expand Up @@ -147,7 +154,7 @@ void to_json(nlohmann::json& j, RunnerInput const& v)
LDIO_SAVE_OPTION(step_diagnostic_maxsteps);

LDIO_SAVE_OPTION(seed);
LDIO_SAVE_OPTION(max_num_tracks);
LDIO_SAVE_OPTION(num_track_slots);
LDIO_SAVE_OPTION(max_steps);
LDIO_SAVE_REQUIRED(initializer_capacity);
LDIO_SAVE_REQUIRED(max_events);
Expand Down
4 changes: 4 additions & 0 deletions app/celer-sim/RunnerOutput.cc
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@ void RunnerOutput::output(JsonPimpl* j) const
auto active = json::array();
auto alive = json::array();
auto initializers = json::array();
auto num_track_slots = json::array();
auto step_times = json::array();
MapStrReal action_times;

Expand All @@ -58,6 +59,7 @@ void RunnerOutput::output(JsonPimpl* j) const
active.push_back(event.active);
alive.push_back(event.alive);
initializers.push_back(event.initializers);
num_track_slots.push_back(event.num_track_slots);
if (!event.step_times.empty())
{
step_times.push_back(event.step_times);
Expand All @@ -71,12 +73,14 @@ void RunnerOutput::output(JsonPimpl* j) const
obj["active"] = std::move(active);
obj["alive"] = std::move(alive);
obj["initializers"] = std::move(initializers);
obj["num_track_slots"] = std::move(num_track_slots);
obj["time"] = {
{"steps", std::move(step_times)},
{"actions", std::move(action_times)},
{"total", result_.total_time},
{"setup", result_.setup_time},
};
obj["num_streams"] = result_.num_streams;

j->obj = std::move(obj);
#else
Expand Down
16 changes: 15 additions & 1 deletion app/celer-sim/RunnerOutput.hh
Original file line number Diff line number Diff line change
Expand Up @@ -11,12 +11,26 @@

#include "corecel/io/OutputInterface.hh"

#include "Runner.hh"
#include "Transporter.hh"

namespace celeritas
{
namespace app
{
//---------------------------------------------------------------------------//
/*!
* Results from transporting all events.
*/
struct SimulationResult
{
//// DATA ////

real_type total_time{}; //!< Total simulation time
real_type setup_time{}; //!< One-time initialization cost
std::vector<TransporterResult> events; //!< Results tallied for each event
size_type num_streams{}; //!< Number of CPU/OpenMP threads
};

//---------------------------------------------------------------------------//
/*!
* Output demo loop results.
Expand Down
1 change: 1 addition & 0 deletions app/celer-sim/Transporter.cc
Original file line number Diff line number Diff line change
Expand Up @@ -122,6 +122,7 @@ auto Transporter<M>::operator()(SpanConstPrimary primaries)
result.action_times[label] = times[i];
}
}
result.num_track_slots = stepper_->state().size();
return result;
}

Expand Down
1 change: 1 addition & 0 deletions app/celer-sim/Transporter.hh
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,7 @@ struct TransporterResult
VecCount alive; //!< Num living tracks at end of step
MapStrReal action_times{}; //!< Accumulated action timing
VecReal step_times; //!< Real time per step
size_type num_track_slots{}; //!< Number of total track slots
};

//---------------------------------------------------------------------------//
Expand Down
5 changes: 5 additions & 0 deletions app/celer-sim/celer-sim.cc
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@

#include "celeritas_config.h"
#include "celeritas_version.h"
#include "corecel/device_runtime_api.h"
#include "corecel/io/BuildOutput.hh"
#include "corecel/io/ExceptionOutput.hh"
#include "corecel/io/Logger.hh"
Expand Down Expand Up @@ -100,6 +101,7 @@ void run(std::istream* is, std::shared_ptr<OutputRegistry> output)
CELER_ASSERT(device());
device().create_streams(num_streams);
}
result.num_streams = num_streams;

Stopwatch get_transport_time;
if (run_input->merge_events)
Expand All @@ -125,6 +127,8 @@ void run(std::istream* is, std::shared_ptr<OutputRegistry> output)
#endif
for (size_type event = 0; event < run_stream.num_events(); ++event)
{
activate_device_local();

// Run a single event on a single thread
CELER_TRY_HANDLE(result.events[event] = run_stream(
StreamId(get_openmp_thread()), EventId(event)),
Expand All @@ -133,6 +137,7 @@ void run(std::istream* is, std::shared_ptr<OutputRegistry> output)
log_and_rethrow(std::move(capture_exception));
}
result.total_time = get_transport_time();
record_mem = {};
output->insert(std::make_shared<RunnerOutput>(std::move(result)));
}

Expand Down
2 changes: 1 addition & 1 deletion app/celer-sim/simple-driver.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,7 @@
'hepmc3_filename': hepmc3_filename,
'mctruth_filename': rootout_filename,
'seed': 12345,
'max_num_tracks': num_tracks,
'num_track_slots': num_tracks,
'max_steps': max_steps,
'initializer_capacity': 100 * max([num_tracks, num_primaries]),
'max_events': 1000,
Expand Down
42 changes: 35 additions & 7 deletions src/celeritas/global/CoreState.hh
Original file line number Diff line number Diff line change
Expand Up @@ -22,20 +22,48 @@
namespace celeritas
{
class CoreParams;
//---------------------------------------------------------------------------//
/*!
* Abstract base class for CoreState.
*/
class CoreStateInterface
{
public:
//!@{
//! \name Type aliases
using size_type = TrackSlotId::size_type;
using PrimaryRange = ItemRange<Primary>;
//!@}

public:
//! Thread/stream ID
virtual StreamId stream_id() const = 0;

//! Number of track slots
virtual size_type size() const = 0;

//! Access track initialization counters
virtual CoreStateCounters const& counters() const = 0;

// Inject primaries to be turned into TrackInitializers
virtual void insert_primaries(Span<Primary const> host_primaries) = 0;

protected:
~CoreStateInterface() = default;
};

//---------------------------------------------------------------------------//
/*!
* Store all state data for a single thread.
*/
template<MemSpace M>
class CoreState
class CoreState final : public CoreStateInterface
{
public:
//!@{
//! \name Type aliases
using Ref = CoreStateData<Ownership::reference, M>;
using Ptr = ObserverPtr<Ref, M>;
using size_type = TrackSlotId::size_type;
using PrimaryRange = ItemRange<Primary>;
using PrimaryCRef = Collection<Primary, Ownership::const_reference, M>;
//!@}

Expand All @@ -46,10 +74,10 @@ class CoreState
size_type num_track_slots);

//! Thread/stream ID
StreamId stream_id() const { return this->ref().stream_id; }
StreamId stream_id() const final { return this->ref().stream_id; }

//! Number of track slots
size_type size() const { return states_.size(); }
size_type size() const final { return states_.size(); }

//! Get a reference to the mutable state data
Ref& ref() { return states_.ref(); }
Expand All @@ -64,12 +92,12 @@ class CoreState
CoreStateCounters& counters() { return counters_; }

//! Track initialization counters
CoreStateCounters const& counters() const { return counters_; }
CoreStateCounters const& counters() const final { return counters_; }

//// PRIMARY STORAGE ////

// Inject primaries to be turned into TrackInitializers
void insert_primaries(Span<Primary const> host_primaries);
void insert_primaries(Span<Primary const> host_primaries) final;

// Get the range of valid primaries
inline PrimaryRange primary_range() const;
Expand Down
6 changes: 6 additions & 0 deletions src/celeritas/global/Stepper.hh
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,9 @@ class StepperInterface
//! Get action sequence for timing diagnostics
virtual ActionSequence const& actions() const = 0;

//! Get the core state interface
virtual CoreStateInterface const& state() const = 0;

protected:
// Protected destructor prevents deletion of pointer-to-interface
~StepperInterface() = default;
Expand Down Expand Up @@ -142,6 +145,9 @@ class Stepper final : public StepperInterface
//! Access core data, primarily for debugging
StateRef const& state_ref() const { return state_.ref(); }

//! Get the core state interface for diagnostic output
CoreStateInterface const& state() const final { return state_; }

private:
// Params and call sequence
std::shared_ptr<CoreParams const> params_;
Expand Down
17 changes: 17 additions & 0 deletions src/corecel/sys/Device.cc
Original file line number Diff line number Diff line change
Expand Up @@ -329,6 +329,23 @@ void activate_device(MpiCommunicator const& comm)
}
}

//---------------------------------------------------------------------------//
/*!
* Call cudaSetDevice using the existing device, for thread-local safety.
*
* See
* https://developer.nvidia.com/blog/cuda-pro-tip-always-set-current-device-avoid-multithreading-bugs
*
* \pre activate_device was called to set \c device()
*/
void activate_device_local()
{
if (device())
{
CELER_DEVICE_CALL_PREFIX(SetDevice(device().device_id()));
}
}

//---------------------------------------------------------------------------//
/*!
* Print device info.
Expand Down
3 changes: 3 additions & 0 deletions src/corecel/sys/Device.hh
Original file line number Diff line number Diff line change
Expand Up @@ -151,6 +151,9 @@ void activate_device();
// Initialize a device in a round-robin fashion from a communicator.
void activate_device(MpiCommunicator const&);

// Call cudaSetDevice using the existing device, for thread-local safety
void activate_device_local();

// Print device info
std::ostream& operator<<(std::ostream&, Device const&);

Expand Down
2 changes: 2 additions & 0 deletions src/corecel/sys/ScopedMem.hh
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,8 @@ class ScopedMem
{
public:
// Default constructor for "null-op" recording
ScopedMem() = default;

// Construct with name and registries
ScopedMem(std::string_view label, MemRegistry* registry);

Expand Down