From e61e9be2e04a2eec4fdb234946dc24e97875fdc7 Mon Sep 17 00:00:00 2001 From: Haoran Xu Date: Tue, 15 Sep 2020 17:49:08 -0700 Subject: [PATCH] Fix a deadlock condition (#331) In OpenSegment(), we are holding a lock to do a bunch of operations, which includes bumping an epoch. Bumping an epoch could trigger drain callbacks, which includes the flush callback. The flush callback may in turn call OpenSegment() again, which would take the lock again, resulting in a self-deadlock. The solution is to release the lock before bumping epoch: the lock is only to protect the file list, which is already releasable at this point. --- cc/src/device/file_system_disk.h | 46 ++++++++++++++++++++++++++++++-- 1 file changed, 44 insertions(+), 2 deletions(-) diff --git a/cc/src/device/file_system_disk.h b/cc/src/device/file_system_disk.h index 1e074418c..b2a7c5db5 100644 --- a/cc/src/device/file_system_disk.h +++ b/cc/src/device/file_system_disk.h @@ -87,6 +87,38 @@ class FileSystemFile { environment::FileOptions file_options_; }; +// Similar to std::lock_guard, but allows manual early unlock +// +class ReleasableLockGuard +{ +public: + ReleasableLockGuard(std::mutex* mutex) + { + m_mutex = mutex; + m_mutex->lock(); + m_released = false; + } + + ~ReleasableLockGuard() + { + if (!m_released) + { + m_mutex->unlock(); + } + } + + void Unlock() + { + assert(!m_released); + m_mutex->unlock(); + m_released = true; + } + +private: + std::mutex* m_mutex; + bool m_released; +}; + /// Manages a bundle of segment files. template class FileSystemSegmentBundle { @@ -310,7 +342,7 @@ class FileSystemSegmentedFile { }; // Only one thread can modify the list of files at a given time. - std::lock_guard lock{ mutex_ }; + ReleasableLockGuard lock{ &mutex_ }; bundle_t* files = files_.load(); if(segment < begin_segment_) { @@ -343,6 +375,11 @@ class FileSystemSegmentedFile { core::IAsyncContext* context_copy; core::Status result = context.DeepCopy(context_copy); assert(result == core::Status::Ok); + // unlock the lock before calling BumpCurrentEpoch(), + // which may call completion callbacks which call this function again, + // resulting in self-deadlock. + // + lock.Unlock(); epoch_->BumpCurrentEpoch(callback, context_copy); return core::Status::Ok; } @@ -386,7 +423,7 @@ class FileSystemSegmentedFile { }; // Only one thread can modify the list of files at a given time. - std::lock_guard lock{ mutex_ }; + ReleasableLockGuard lock{ &mutex_ }; bundle_t* files = files_.load(); assert(files); if(files->begin_segment >= new_begin_segment) { @@ -407,6 +444,11 @@ class FileSystemSegmentedFile { core::IAsyncContext* context_copy; core::Status result = context.DeepCopy(context_copy); assert(result == core::Status::Ok); + // unlock the lock before calling BumpCurrentEpoch(), + // which may call completion callbacks which call this function again, + // resulting in self-deadlock. + // + lock.Unlock(); epoch_->BumpCurrentEpoch(callback, context_copy); }