Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix exclusion in repairDeadDatacenter to be remote only [release-7.2] #9385

Merged
merged 3 commits into from
Feb 16, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 11 additions & 9 deletions fdbserver/QuietDatabase.actor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -658,7 +658,7 @@ ACTOR Future<Void> repairDeadDatacenter(Database cx,
std::string context) {
if (g_network->isSimulated() && g_simulator->usableRegions > 1 && !g_simulator->quiesced) {
state bool primaryDead = g_simulator->datacenterDead(g_simulator->primaryDcId);
bool remoteDead = g_simulator->datacenterDead(g_simulator->remoteDcId);
state bool remoteDead = g_simulator->datacenterDead(g_simulator->remoteDcId);

// FIXME: the primary and remote can both be considered dead because excludes are not handled properly by the
// datacenterDead function
Expand All @@ -667,21 +667,23 @@ ACTOR Future<Void> repairDeadDatacenter(Database cx,
return Void();
}
if (primaryDead || remoteDead) {
if (remoteDead) {
std::vector<AddressExclusion> servers =
g_simulator->getAllAddressesInDCToExclude(g_simulator->remoteDcId);
TraceEvent(SevWarnAlways, "DisablingFearlessConfiguration")
.detail("Location", context)
.detail("Stage", "ExcludeServers")
.detail("Servers", describe(servers));
wait(excludeServers(cx, servers, false));
}

TraceEvent(SevWarnAlways, "DisablingFearlessConfiguration")
.detail("Location", context)
.detail("Stage", "Repopulate")
.detail("RemoteDead", remoteDead)
.detail("PrimaryDead", primaryDead);
g_simulator->usableRegions = 1;

state std::vector<AddressExclusion> servers = g_simulator->getAllAddressesInDCToExclude(
primaryDead ? g_simulator->primaryDcId : g_simulator->remoteDcId);
wait(excludeServers(cx, servers, false));
TraceEvent(SevWarnAlways, "DisablingFearlessConfiguration")
.detail("Location", context)
.detail("Stage", "ServerExcluded")
.detail("Servers", describe(servers));

wait(success(ManagementAPI::changeConfig(
cx.getReference(),
(primaryDead ? g_simulator->disablePrimary : g_simulator->disableRemote) + " repopulate_anti_quorum=1",
Expand Down
31 changes: 18 additions & 13 deletions fdbserver/workloads/SkewedReadWrite.actor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -99,21 +99,26 @@ struct SkewedReadWriteWorkload : ReadWriteCommon {
}

ACTOR static Future<Void> updateServerShards(Database cx, SkewedReadWriteWorkload* self) {
state Future<RangeResult> serverList =
runRYWTransaction(cx, [](Reference<ReadYourWritesTransaction> tr) -> Future<RangeResult> {
tr->setOption(FDBTransactionOptions::READ_SYSTEM_KEYS);
return tr->getRange(serverListKeys, CLIENT_KNOBS->TOO_MANY);
});
state RangeResult range =
wait(runRYWTransaction(cx, [](Reference<ReadYourWritesTransaction> tr) -> Future<RangeResult> {
tr->setOption(FDBTransactionOptions::READ_SYSTEM_KEYS);
return tr->getRange(serverKeysRange, CLIENT_KNOBS->TOO_MANY);
}));
wait(success(serverList));
state RangeResult serverList;
state RangeResult range;
state Reference<ReadYourWritesTransaction> tr(new ReadYourWritesTransaction(cx));
loop {
// read in transaction to ensure two key ranges are transactionally consistent
try {
tr->setOption(FDBTransactionOptions::READ_SYSTEM_KEYS);
state Future<RangeResult> serverListF = tr->getRange(serverListKeys, CLIENT_KNOBS->TOO_MANY);
state Future<RangeResult> rangeF = tr->getRange(serverKeysRange, CLIENT_KNOBS->TOO_MANY);
wait(store(serverList, serverListF));
wait(store(range, rangeF));
break;
} catch (Error& e) {
wait(tr->onError(e));
}
}
// decode server interfaces
self->serverInterfaces.clear();
for (int i = 0; i < serverList.get().size(); i++) {
auto ssi = decodeServerListValue(serverList.get()[i].value);
for (int i = 0; i < serverList.size(); i++) {
auto ssi = decodeServerListValue(serverList[i].value);
self->serverInterfaces.emplace(ssi.id(), ssi);
}
// clear self->serverShards
Expand Down