-
Notifications
You must be signed in to change notification settings - Fork 8.5k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Attempt to make the monarch more thread safe. #11189
Changes from 8 commits
4eedfcc
825dfc3
91f1845
e386d6f
f765728
5f632f4
ee463aa
3aeaeed
2f0db9e
e1c2f53
80646b3
be2cdbe
d95629b
8fd2b75
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -71,10 +71,24 @@ namespace winrt::Microsoft::Terminal::Remoting::implementation | |
{ | ||
// Peasant already had an ID (from an older monarch). Leave that one | ||
// be. Make sure that the next peasant's ID is higher than it. | ||
_nextPeasantID = providedID >= _nextPeasantID ? providedID + 1 : _nextPeasantID; | ||
// If multiple peasants are added concurrently we keep trying to update | ||
// until we get to set the new id. | ||
uint64_t current; | ||
do | ||
{ | ||
current = _nextPeasantID.load(std::memory_order_relaxed); | ||
} while (current <= providedID && !_nextPeasantID.compare_exchange_weak(current, providedID + 1, std::memory_order_relaxed)); | ||
} | ||
|
||
auto newPeasantsId = peasant.GetID(); | ||
|
||
// Keep track of which peasant we are | ||
// SAFETY: this is only true for one peasant, and each peasant | ||
// is only added to a monarch once, so we do not need synchronization here. | ||
if (peasant.GetPID() == _ourPID) | ||
{ | ||
_ourPeasantId = newPeasantsId; | ||
} | ||
// Add an event listener to the peasant's WindowActivated event. | ||
peasant.WindowActivated({ this, &Monarch::_peasantWindowActivated }); | ||
peasant.IdentifyWindowsRequested({ this, &Monarch::_identifyWindows }); | ||
|
@@ -84,7 +98,10 @@ namespace winrt::Microsoft::Terminal::Remoting::implementation | |
peasant.HideNotificationIconRequested([this](auto&&, auto&&) { _HideNotificationIconRequestedHandlers(*this, nullptr); }); | ||
peasant.QuitAllRequested({ this, &Monarch::_handleQuitAll }); | ||
|
||
_peasants[newPeasantsId] = peasant; | ||
{ | ||
std::unique_lock lock{ _peasantsMutex }; | ||
_peasants[newPeasantsId] = peasant; | ||
} | ||
|
||
TraceLoggingWrite(g_hRemotingProvider, | ||
"Monarch_AddPeasant", | ||
|
@@ -124,9 +141,15 @@ namespace winrt::Microsoft::Terminal::Remoting::implementation | |
// closing all windows. | ||
_QuitAllRequestedHandlers(*this, nullptr); | ||
|
||
_quitting.store(true); | ||
// Tell all peasants to exit. | ||
const auto callback = [&](const auto& /*id*/, const auto& p) { | ||
p.Quit(); | ||
const auto callback = [&](const auto& id, const auto& p) { | ||
// We want to tell our peasant to quit last, so that we don't try | ||
// to perform a bunch of elections on quit. | ||
if (id != _ourPeasantId) | ||
{ | ||
p.Quit(); | ||
} | ||
}; | ||
const auto onError = [&](const auto& id) { | ||
TraceLoggingWrite(g_hRemotingProvider, | ||
|
@@ -137,6 +160,21 @@ namespace winrt::Microsoft::Terminal::Remoting::implementation | |
}; | ||
|
||
_forEachPeasant(callback, onError); | ||
|
||
{ | ||
std::shared_lock lock{ _peasantsMutex }; | ||
const auto peasantSearch = _peasants.find(_ourPeasantId); | ||
if (peasantSearch != _peasants.end()) | ||
{ | ||
peasantSearch->second.Quit(); | ||
} | ||
else | ||
{ | ||
// Somehow we don't have our own peasant, this should never happen. | ||
// We are trying to quit anyways so just fail here. | ||
assert(peasantSearch != _peasants.end()); | ||
} | ||
} | ||
} | ||
|
||
// Method Description: | ||
|
@@ -147,8 +185,19 @@ namespace winrt::Microsoft::Terminal::Remoting::implementation | |
// - <none> | ||
void Monarch::SignalClose(const uint64_t peasantId) | ||
{ | ||
// If we are quitting we don't care about maintaining our list of | ||
// peasants anymore, and don't need to notify the host that something | ||
// changed. | ||
if (_quitting.load(std::memory_order_acquire)) | ||
{ | ||
return; | ||
} | ||
|
||
_clearOldMruEntries(peasantId); | ||
_peasants.erase(peasantId); | ||
{ | ||
std::unique_lock lock{ _peasantsMutex }; | ||
_peasants.erase(peasantId); | ||
} | ||
_WindowClosedHandlers(nullptr, nullptr); | ||
} | ||
|
||
|
@@ -160,23 +209,8 @@ namespace winrt::Microsoft::Terminal::Remoting::implementation | |
// - the number of active peasants. | ||
uint64_t Monarch::GetNumberOfPeasants() | ||
{ | ||
auto num = 0; | ||
auto callback = [&](const auto& /*id*/, const auto& p) { | ||
// Check that the peasant is alive, and if so increment the count | ||
p.GetID(); | ||
num += 1; | ||
}; | ||
auto onError = [](const auto& id) { | ||
TraceLoggingWrite(g_hRemotingProvider, | ||
"Monarch_GetNumberOfPeasants_Failed", | ||
TraceLoggingInt64(id, "peasantID", "The ID of the peasant which we could not enumerate"), | ||
TraceLoggingLevel(WINEVENT_LEVEL_VERBOSE), | ||
TraceLoggingKeyword(TIL_KEYWORD_TRACE)); | ||
}; | ||
|
||
_forEachPeasant(callback, onError); | ||
|
||
return num; | ||
std::shared_lock lock{ _peasantsMutex }; | ||
return _peasants.size(); | ||
} | ||
|
||
// Method Description: | ||
|
@@ -205,8 +239,12 @@ namespace winrt::Microsoft::Terminal::Remoting::implementation | |
{ | ||
try | ||
{ | ||
const auto peasantSearch = _peasants.find(peasantID); | ||
auto maybeThePeasant = peasantSearch == _peasants.end() ? nullptr : peasantSearch->second; | ||
IPeasant maybeThePeasant = nullptr; | ||
{ | ||
std::shared_lock lock{ _peasantsMutex }; | ||
const auto peasantSearch = _peasants.find(peasantID); | ||
maybeThePeasant = peasantSearch == _peasants.end() ? nullptr : peasantSearch->second; | ||
} | ||
// Ask the peasant for their PID. This will validate that they're | ||
// actually still alive. | ||
if (maybeThePeasant) | ||
|
@@ -218,8 +256,12 @@ namespace winrt::Microsoft::Terminal::Remoting::implementation | |
catch (...) | ||
{ | ||
LOG_CAUGHT_EXCEPTION(); | ||
|
||
// Remove the peasant from the list of peasants | ||
_peasants.erase(peasantID); | ||
{ | ||
std::unique_lock lock{ _peasantsMutex }; | ||
_peasants.erase(peasantID); | ||
} | ||
|
||
// Remove the peasant from the list of MRU windows. They're dead. | ||
// They can't be the MRU anymore. | ||
|
@@ -244,39 +286,27 @@ namespace winrt::Microsoft::Terminal::Remoting::implementation | |
return 0; | ||
} | ||
|
||
std::vector<uint64_t> peasantsToErase{}; | ||
uint64_t result = 0; | ||
for (const auto& [id, p] : _peasants) | ||
{ | ||
try | ||
{ | ||
auto otherName = p.WindowName(); | ||
if (otherName == name) | ||
{ | ||
result = id; | ||
break; | ||
} | ||
} | ||
catch (...) | ||
|
||
const auto callback = [&](const auto& id, const auto& p) { | ||
auto otherName = p.WindowName(); | ||
if (otherName == name) | ||
{ | ||
LOG_CAUGHT_EXCEPTION(); | ||
// Normally, we'd just erase the peasant here. However, we can't | ||
// erase from the map while we're iterating over it like this. | ||
// Instead, pull a good ole Java and collect this id for removal | ||
// later. | ||
peasantsToErase.push_back(id); | ||
result = id; | ||
return false; | ||
} | ||
} | ||
return true; | ||
}; | ||
|
||
// Remove the dead peasants we came across while iterating. | ||
for (const auto& id : peasantsToErase) | ||
{ | ||
// Remove the peasant from the list of peasants | ||
_peasants.erase(id); | ||
// Remove the peasant from the list of MRU windows. They're dead. | ||
// They can't be the MRU anymore. | ||
_clearOldMruEntries(id); | ||
} | ||
const auto onError = [&](const auto& id) { | ||
TraceLoggingWrite(g_hRemotingProvider, | ||
"Monarch_lookupPeasantIdForName_Failed", | ||
TraceLoggingInt64(id, "peasantID", "The ID of the peasant which we could not get the name of"), | ||
TraceLoggingLevel(WINEVENT_LEVEL_VERBOSE), | ||
TraceLoggingKeyword(TIL_KEYWORD_TRACE)); | ||
}; | ||
|
||
_forEachPeasant(callback, onError); | ||
|
||
TraceLoggingWrite(g_hRemotingProvider, | ||
"Monarch_lookupPeasantIdForName", | ||
|
@@ -334,6 +364,7 @@ namespace winrt::Microsoft::Terminal::Remoting::implementation | |
// - <none> | ||
void Monarch::_clearOldMruEntries(const uint64_t peasantID) | ||
{ | ||
std::lock_guard lock{ _mruPeasantsMutex }; | ||
auto result = std::find_if(_mruPeasants.begin(), | ||
_mruPeasants.end(), | ||
[peasantID](auto&& other) { | ||
|
@@ -370,14 +401,17 @@ namespace winrt::Microsoft::Terminal::Remoting::implementation | |
// * If the current desktop doesn't have a vector, add one. | ||
const auto desktopGuid{ localArgs->DesktopID() }; | ||
|
||
// * Add this args list. By using lower_bound with insert, we can get it | ||
// into exactly the right spot, without having to re-sort the whole | ||
// array. | ||
_mruPeasants.insert(std::lower_bound(_mruPeasants.begin(), | ||
_mruPeasants.end(), | ||
*localArgs, | ||
[](const auto& first, const auto& second) { return first.ActivatedTime() > second.ActivatedTime(); }), | ||
*localArgs); | ||
{ | ||
std::lock_guard lock{ _mruPeasantsMutex }; | ||
// * Add this args list. By using lower_bound with insert, we can get it | ||
// into exactly the right spot, without having to re-sort the whole | ||
// array. | ||
_mruPeasants.insert(std::lower_bound(_mruPeasants.begin(), | ||
_mruPeasants.end(), | ||
*localArgs, | ||
[](const auto& first, const auto& second) { return first.ActivatedTime() > second.ActivatedTime(); }), | ||
*localArgs); | ||
} | ||
|
||
TraceLoggingWrite(g_hRemotingProvider, | ||
"Monarch_SetMostRecentPeasant", | ||
|
@@ -403,8 +437,11 @@ namespace winrt::Microsoft::Terminal::Remoting::implementation | |
// - the ID of the most recent peasant, otherwise 0 if we could not find one. | ||
uint64_t Monarch::_getMostRecentPeasantID(const bool limitToCurrentDesktop, const bool ignoreQuakeWindow) | ||
{ | ||
std::lock_guard lock{ _mruPeasantsMutex }; | ||
if (_mruPeasants.empty()) | ||
{ | ||
// Only need a shared lock for read | ||
std::shared_lock lock{ _peasantsMutex }; | ||
// We haven't yet been told the MRU peasant. Just use the first one. | ||
// This is just gonna be a random one, but really shouldn't happen | ||
// in practice. The WindowManager should set the MRU peasant | ||
|
@@ -855,7 +892,10 @@ namespace winrt::Microsoft::Terminal::Remoting::implementation | |
Windows::Foundation::Collections::IVectorView<PeasantInfo> Monarch::GetPeasantInfos() | ||
{ | ||
std::vector<PeasantInfo> names; | ||
names.reserve(_peasants.size()); | ||
{ | ||
std::shared_lock lock{ _peasantsMutex }; | ||
names.reserve(_peasants.size()); | ||
} | ||
Comment on lines
+933
to
+936
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. BTW This makes me certain that we should create a thread safe proxy class for the Monarch in the future. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. There is something to say about the simplicity of just throwing a There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yeah if you consider how we got event handlers, it's clear that recursive mutexes are the best solution here. You already have me entirely convinced about that. 😄 |
||
|
||
const auto func = [&](const auto& id, const auto& p) -> void { | ||
names.push_back({ id, p.WindowName(), p.ActiveTabTitle() }); | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -7,6 +7,7 @@ | |
#include "Peasant.h" | ||
#include "../cascadia/inc/cppwinrt_utils.h" | ||
#include "WindowActivatedArgs.h" | ||
#include <atomic> | ||
|
||
// We sure different GUIDs here depending on whether we're running a Release, | ||
// Preview, or Dev build. This ensures that different installs don't | ||
|
@@ -69,14 +70,19 @@ namespace winrt::Microsoft::Terminal::Remoting::implementation | |
private: | ||
uint64_t _ourPID; | ||
|
||
uint64_t _nextPeasantID{ 1 }; | ||
uint64_t _thisPeasantID{ 0 }; | ||
std::atomic<uint64_t> _nextPeasantID{ 1 }; | ||
uint64_t _ourPeasantId{ 0 }; | ||
|
||
// When we're quitting we do not care as much about handling some events that we know will be triggered | ||
std::atomic<bool> _quitting{ false }; | ||
|
||
winrt::com_ptr<IVirtualDesktopManager> _desktopManager{ nullptr }; | ||
|
||
std::unordered_map<uint64_t, winrt::Microsoft::Terminal::Remoting::IPeasant> _peasants; | ||
std::shared_mutex _peasantsMutex{}; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Consider using til::shared_mutex<std::unordered_map<uint64_t, IPeasant>> _peasants There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I did originally consider it, but disregarded because the first iteration of this had a single Once we are confident that this locking strategy is what we want I can take another look at using There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If possibly I would like this refactor to be deferred for now. I took a shot at doing so, but it proved to be a bit more difficult than I hoped. |
||
|
||
std::vector<Remoting::WindowActivatedArgs> _mruPeasants; | ||
std::recursive_mutex _mruPeasantsMutex{}; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is there ever a situation where this mutex is locked recursively? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. _getMostRecentPeasantID calls (through a couple hops) _clearOldMruEntries There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Oh right... That's unfortunate - recursive mutexes are really annoying (it uses critical sections brrrr). There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. In normal circumstances, yes. The problem is with loading / quitting where there are potentially "many" (O(#windows)) calls, and of course that is where I found these problems in the first place. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yea, I', not terribly worried about the perf here, there's not really going to be that many calls coming through here per second, and they're pretty much all user-input driven. Plus, O(N) where N=number of windows? That's gonna be pretty small unless you're actively trying to spawn 100's of windows per second and close them quickly. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I can take a look if there is an easy refactor to get rid of the recursive mutex for a shared_mutex, but I think it is still better to have the more granular locking in general. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Without rewriting There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I made There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Spent more time trying to make sure we can't deadlock. I am ~90% confident, but have not proved, that the current version of the code cannot deadlock. Currently |
||
|
||
winrt::Microsoft::Terminal::Remoting::IPeasant _getPeasant(uint64_t peasantID); | ||
uint64_t _getMostRecentPeasantID(bool limitToCurrentDesktop, const bool ignoreQuakeWindow); | ||
|
@@ -121,42 +127,50 @@ namespace winrt::Microsoft::Terminal::Remoting::implementation | |
|
||
std::vector<uint64_t> peasantsToErase; | ||
|
||
for (const auto& [id, p] : _peasants) | ||
{ | ||
try | ||
std::shared_lock lock{ _peasantsMutex }; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If you hold this lock for the entire duration of the function you don't need to lock it again below. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I deliberately wanted to separate the read only vs the rw portions of the lock to reduce contention if there was nothing to be removed. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The lock is largely uncontented and as such there shouldn't be any noticeable advantage to using read-only locking. Personally I'd just acquire an exclusive lock in all cases. But I don't mind the current code either. |
||
|
||
for (const auto& [id, p] : _peasants) | ||
{ | ||
if constexpr (IsVoid) | ||
try | ||
{ | ||
func(id, p); | ||
} | ||
else | ||
{ | ||
if (!func(id, p)) | ||
if constexpr (IsVoid) | ||
{ | ||
break; | ||
func(id, p); | ||
} | ||
else | ||
{ | ||
if (!func(id, p)) | ||
{ | ||
break; | ||
} | ||
} | ||
} | ||
} | ||
catch (const winrt::hresult_error& exception) | ||
{ | ||
onError(id); | ||
|
||
if (exception.code() == 0x800706ba) // The RPC server is unavailable. | ||
{ | ||
peasantsToErase.emplace_back(id); | ||
} | ||
else | ||
catch (const winrt::hresult_error& exception) | ||
{ | ||
LOG_CAUGHT_EXCEPTION(); | ||
throw; | ||
onError(id); | ||
|
||
if (exception.code() == 0x800706ba) // The RPC server is unavailable. | ||
{ | ||
peasantsToErase.emplace_back(id); | ||
} | ||
else | ||
{ | ||
LOG_CAUGHT_EXCEPTION(); | ||
throw; | ||
} | ||
} | ||
} | ||
} | ||
|
||
for (const auto& id : peasantsToErase) | ||
if (peasantsToErase.size() > 0) | ||
{ | ||
_peasants.erase(id); | ||
_clearOldMruEntries(id); | ||
std::unique_lock lock{ _peasantsMutex }; | ||
for (const auto& id : peasantsToErase) | ||
{ | ||
_peasants.erase(id); | ||
_clearOldMruEntries(id); | ||
} | ||
} | ||
} | ||
|
||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Why does this need to sync the memory with
Monarch::_handleQuitAll
?Is there a reason
_quitting
is needed? Why would a listener care if it got notified during shutdown?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I don't think it is necessary, but since each peasant calls this on close I don't want them waiting for the
unique_lock
until after the_handleQuitAll
finishes. This would also trigger _GetNumberOfPeasants through a couple of event hops and that would be a bunch of needless churn and lock contention.