Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix resetting of CUDA streams when running through accel #927

Merged
merged 10 commits into from
Sep 7, 2023
3 changes: 3 additions & 0 deletions src/accel/SharedParams.cc
Original file line number Diff line number Diff line change
Expand Up @@ -225,6 +225,9 @@ void SharedParams::Finalize()
CELER_LOG_LOCAL(debug) << "Resetting shared parameters";
*this = {};

// Reset streams before the static destructor does
celeritas::device().create_streams(0);

CELER_ENSURE(!*this);
}

Expand Down
14 changes: 11 additions & 3 deletions src/corecel/sys/Device.cc
Original file line number Diff line number Diff line change
Expand Up @@ -148,6 +148,8 @@ Device::Device(int id) : id_{id}, streams_{new detail::StreamStorage{}}
{
CELER_EXPECT(id >= 0 && id < Device::num_devices());

CELER_LOG_LOCAL(debug) << "Constructing device ID " << id;

unsigned int max_threads_per_block = 0;
#if CELER_USE_DEVICE
# if CELERITAS_USE_CUDA
Expand Down Expand Up @@ -242,8 +244,8 @@ Device::Device(int id) : id_{id}, streams_{new detail::StreamStorage{}}
*/
StreamId::size_type Device::num_streams() const
{
CELER_EXPECT(streams_);

if (!streams_)
return 0;
return streams_->size();
}

Expand Down Expand Up @@ -314,7 +316,13 @@ void activate_device(Device&& device)
// the global_device function (in debug mode) also uses this lock.
std::lock_guard<std::mutex> scoped_lock{device_setter_mutex()};
CELER_DEVICE_CALL_PREFIX(SetDevice(device.device_id()));
d = std::move(device);
if (!d || (d.device_id() != device.device_id()))
{
// The device ID is different than the global device; replace it
auto num_streams = d.num_streams();
d = std::move(device);
d.create_streams(num_streams);
}
}

// Call cudaFree to wake up the device, making other timers more accurate
Expand Down
9 changes: 9 additions & 0 deletions src/corecel/sys/Stream.cc
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,13 @@ void AsyncMemoryResource<Pointer>::do_deallocate([[maybe_unused]] pointer p,
Stream::Stream() : memory_resource_(stream_)
{
CELER_DEVICE_CALL_PREFIX(StreamCreate(&stream_));
#if CUDART_VERSION >= 12000
unsigned long long stream_id = -1;
CELER_CUDA_CALL(cudaStreamGetId(stream_, &stream_id));
CELER_LOG_LOCAL(debug) << "Created stream ID " << stream_id;
#else
CELER_LOG_LOCAL(debug) << "Created stream " << static_cast<void*>(stream_);
#endif
}

//---------------------------------------------------------------------------//
Expand All @@ -80,6 +87,8 @@ Stream::~Stream()
try
{
CELER_DEVICE_CALL_PREFIX(StreamDestroy(stream_));
CELER_LOG_LOCAL(debug)
<< "Destroyed stream " << static_cast<void*>(stream_);
}
catch (RuntimeError const& e)
{
Expand Down