diff --git a/app/celer-g4/GlobalSetup.cc b/app/celer-g4/GlobalSetup.cc index 36f4648c28..f616713efb 100644 --- a/app/celer-g4/GlobalSetup.cc +++ b/app/celer-g4/GlobalSetup.cc @@ -167,6 +167,7 @@ void GlobalSetup::ReadInput(std::string const& filename) options_->cuda_heap_size = input_.cuda_heap_size; options_->sync = input_.sync; options_->default_stream = input_.default_stream; + options_->auto_flush = input_.auto_flush; } else if (ends_with(filename, ".mac")) { diff --git a/app/celer-g4/RunInput.cc b/app/celer-g4/RunInput.cc index 157592574b..67b1e74e02 100644 --- a/app/celer-g4/RunInput.cc +++ b/app/celer-g4/RunInput.cc @@ -51,7 +51,8 @@ RunInput::operator bool() const && physics_list < PhysicsListSelection::size_ && (field == no_field() || field_options) && ((num_track_slots > 0 && max_steps > 0 - && initializer_capacity > 0 && secondary_stack_factor > 0) + && initializer_capacity > 0 && secondary_stack_factor > 0 + && auto_flush > 0 && auto_flush <= initializer_capacity) || SharedParams::CeleritasDisabled()) && (step_diagnostic_bins > 0 || !step_diagnostic); } diff --git a/app/celer-g4/RunInput.hh b/app/celer-g4/RunInput.hh index 99f48bc1c6..d8c17adee1 100644 --- a/app/celer-g4/RunInput.hh +++ b/app/celer-g4/RunInput.hh @@ -67,6 +67,7 @@ struct RunInput size_type max_steps{unspecified}; size_type initializer_capacity{}; real_type secondary_stack_factor{}; + size_type auto_flush{}; //!< Defaults to num_track_slots bool sync{false}; bool default_stream{false}; //!< Launch all kernels on the default stream diff --git a/app/celer-g4/RunInputIO.json.cc b/app/celer-g4/RunInputIO.json.cc index b14bcd7f32..10130f2301 100644 --- a/app/celer-g4/RunInputIO.json.cc +++ b/app/celer-g4/RunInputIO.json.cc @@ -72,6 +72,14 @@ void from_json(nlohmann::json const& j, RunInput& v) RI_LOAD_OPTION(secondary_stack_factor); RI_LOAD_OPTION(sync); RI_LOAD_OPTION(default_stream); + if (auto iter = j.find("auto_flush"); iter != j.end()) + { + iter->get_to(v.auto_flush); + } + else + { + v.auto_flush = v.num_track_slots; + } RI_LOAD_OPTION(physics_list); RI_LOAD_OPTION(physics_options); @@ -162,6 +170,7 @@ void to_json(nlohmann::json& j, RunInput const& v) RI_SAVE_OPTION(cuda_heap_size); RI_SAVE(sync); RI_SAVE(default_stream); + RI_SAVE(auto_flush); RI_SAVE(physics_list); if (v.physics_list == PhysicsListSelection::geant_physics_list) diff --git a/src/accel/LocalTransporter.cc b/src/accel/LocalTransporter.cc index 270b345929..52743ed2ba 100644 --- a/src/accel/LocalTransporter.cc +++ b/src/accel/LocalTransporter.cc @@ -50,7 +50,8 @@ namespace celeritas */ LocalTransporter::LocalTransporter(SetupOptions const& options, SharedParams const& params) - : auto_flush_(options.max_num_tracks) + : auto_flush_(options.auto_flush ? options.auto_flush + : options.max_num_tracks) , max_steps_(options.max_steps) , dump_primaries_{params.offload_writer()} , hit_manager_{params.hit_manager()} diff --git a/src/accel/SetupOptions.hh b/src/accel/SetupOptions.hh index 44df17d859..d3d24aa8b2 100644 --- a/src/accel/SetupOptions.hh +++ b/src/accel/SetupOptions.hh @@ -126,6 +126,8 @@ struct SetupOptions size_type initializer_capacity{}; //! At least the average number of secondaries per track slot real_type secondary_stack_factor{3.0}; + //! Number of tracks to buffer before offloading (if unset: max num tracks) + size_type auto_flush{}; //!@} //! Set the number of streams (defaults to run manager # threads) diff --git a/src/accel/SetupOptionsMessenger.cc b/src/accel/SetupOptionsMessenger.cc index 3be3b41ce6..7c51cf226f 100644 --- a/src/accel/SetupOptionsMessenger.cc +++ b/src/accel/SetupOptionsMessenger.cc @@ -190,6 +190,9 @@ SetupOptionsMessenger::SetupOptionsMessenger(SetupOptions* options) add_cmd(&options->secondary_stack_factor, "secondaryStackFactor", "At least the average number of secondaries per track slot"); + add_cmd(&options->auto_flush, + "autoFlush", + "Number of tracks to buffer before offloading"); directories_.emplace_back(new CelerDirectory( "/celer/detector/", "Celeritas sensitive detector setup options")); diff --git a/src/accel/SetupOptionsMessenger.hh b/src/accel/SetupOptionsMessenger.hh index 60344b306e..e4657f65f9 100644 --- a/src/accel/SetupOptionsMessenger.hh +++ b/src/accel/SetupOptionsMessenger.hh @@ -33,6 +33,7 @@ struct SetupOptions; maxNumSteps | Limit on number of step iterations before aborting maxInitializers | Maximum number of track initializers secondaryStackFactor | At least the average number of secondaries per track + autoFlush | Number of tracks to buffer before offloading * The following option is exposed in the \c /celer/detector/ command * "directory":