Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix audit_storage issues #9265

Merged
merged 2 commits into from
Jan 31, 2023
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 2 additions & 5 deletions fdbcli/AuditStorageCommand.actor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -47,11 +47,8 @@ ACTOR Future<UID> auditStorageCommandActor(Reference<IClusterConnectionRecord> c
return UID();
}

Key begin, end;
if (tokens.size() == 2) {
begin = allKeys.begin;
end = allKeys.end;
} else if (tokens.size() == 3) {
Key begin = allKeys.begin, end = allKeys.end;
if (tokens.size() == 3) {
begin = tokens[2];
} else if (tokens.size() == 4) {
begin = tokens[2];
Expand Down
11 changes: 9 additions & 2 deletions fdbcli/GetAuditStatusCommand.actor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@
namespace fdb_cli {

ACTOR Future<bool> getAuditStatusCommandActor(Database cx, std::vector<StringRef> tokens) {
if (tokens.size() != 4) {
if (tokens.size() < 3 || tokens.size() > 4) {
printUsage(tokens[0]);
return false;
}
Expand All @@ -45,11 +45,18 @@ ACTOR Future<bool> getAuditStatusCommandActor(Database cx, std::vector<StringRef
}

if (tokencmp(tokens[2], "id")) {
if (tokens.size() != 4) {
printUsage(tokens[0]);
return false;
}
const UID id = UID::fromString(tokens[3].toString());
AuditStorageState res = wait(getAuditState(cx, type, id));
printf("Audit result is:\n%s", res.toString().c_str());
} else if (tokencmp(tokens[2], "recent")) {
const int count = std::stoi(tokens[3].toString());
int count = 5;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What is this? 5 'rows'? We should add this to the help output, that 5 is the default 'Count' (I can do that over in #9266).

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think default should be ROW_LIMIT_UNLIMITED -- I think that would make more sense to the user. What do you think ?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That's a good point, I put CLIENT_KNOBS->TOO_MANY as the default.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is TOO_MANY too small? Its only 1M? Thanks.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

My current plan is to keep only the most recent N audit records per audit type, I feel N would be smaller than 1M, and a background process will be added to GC the earlier audit results.

if (tokens.size() == 4) {
count = std::stoi(tokens[3].toString());
}
std::vector<AuditStorageState> res = wait(getLatestAuditStates(cx, type, count));
for (const auto& it : res) {
printf("Audit result is:\n%s\n", it.toString().c_str());
Expand Down
10 changes: 6 additions & 4 deletions fdbserver/DataDistribution.actor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -304,7 +304,7 @@ struct DataDistributor : NonCopyable, ReferenceCounted<DataDistributor> {

Promise<Void> initialized;

std::unordered_map<AuditType, std::vector<std::shared_ptr<DDAudit>>> audits;
std::unordered_map<AuditType, std::unordered_map<UID, std::shared_ptr<DDAudit>>> audits;
Future<Void> auditInitialized;

Optional<Reference<TenantCache>> ddTenantCache;
Expand Down Expand Up @@ -1382,7 +1382,7 @@ ACTOR Future<Void> resumeAuditStorage(Reference<DataDistributor> self, AuditStor

state std::shared_ptr<DDAudit> audit =
std::make_shared<DDAudit>(auditState.id, auditState.range, auditState.getType());
self->audits[auditState.getType()].push_back(audit);
self->audits[auditState.getType()][audit->id] = audit;
audit->actors.add(loadAndDispatchAuditRange(self, audit, auditState.range));
TraceEvent(SevDebug, "DDResumAuditStorageBegin", self->ddId)
.detail("AuditID", audit->id)
Expand Down Expand Up @@ -1411,6 +1411,7 @@ ACTOR Future<Void> resumeAuditStorage(Reference<DataDistributor> self, AuditStor
self->addActor.send(resumeAuditStorage(self, auditState));
}
}
self->audits[auditState.getType()].erase(audit->id);

return Void();
}
Expand All @@ -1425,7 +1426,7 @@ ACTOR Future<Void> auditStorage(Reference<DataDistributor> self, TriggerAuditReq
try {
auto it = self->audits.find(req.getType());
if (it != self->audits.end() && !it->second.empty()) {
for (auto& currentAudit : it->second) {
for (auto& [id, currentAudit] : it->second) {
if (currentAudit->range.contains(req.range)) {
auditState.id = currentAudit->id;
auditState.range = currentAudit->range;
Expand All @@ -1444,7 +1445,7 @@ ACTOR Future<Void> auditStorage(Reference<DataDistributor> self, TriggerAuditReq
UID auditId = wait(persistNewAuditState(self->txnProcessor->context(), auditState));
auditState.id = auditId;
audit = std::make_shared<DDAudit>(auditId, req.range, req.getType());
self->audits[req.getType()].push_back(audit);
self->audits[req.getType()][audit->id] = audit;
audit->actors.add(loadAndDispatchAuditRange(self, audit, req.range));
// Simulate restarting.
if (g_network->isSimulated() && deterministicRandom()->coinflip()) {
Expand Down Expand Up @@ -1497,6 +1498,7 @@ ACTOR Future<Void> auditStorage(Reference<DataDistributor> self, TriggerAuditReq
throw e;
}
}
self->audits[auditState.getType()].erase(auditState.id);

return Void();
}
Expand Down
15 changes: 15 additions & 0 deletions fdbserver/workloads/ValidateStorage.actor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,21 @@ struct ValidateStorage : TestWorkload {
}
}

loop {
try {
Optional<UID> auditId_ = wait(timeout(
auditStorage(cx->getConnectionRecord(), allKeys, AuditType::ValidateHA, /*async=*/true), 30));
if (!auditId_.present()) {
throw audit_storage_failed();
}
ASSERT(auditId_ != auditId);
break;
} catch (Error& e) {
TraceEvent(SevWarn, "StartAuditStorageError").errorUnsuppressed(e);
wait(delay(1));
}
}

return Void();
}

Expand Down