Skip to content

Commit

Permalink
Handle EKP Tenant Not Found Errors (#9261)
Browse files Browse the repository at this point in the history
handle EKP tenant not found errors
  • Loading branch information
sfc-gh-nwijetunga authored Feb 2, 2023
1 parent de670b7 commit 86f3665
Show file tree
Hide file tree
Showing 20 changed files with 357 additions and 71 deletions.
82 changes: 65 additions & 17 deletions fdbclient/BackupAgentBase.actor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
#include "fdbclient/BackupAgent.actor.h"
#include "fdbclient/BlobCipher.h"
#include "fdbclient/CommitTransaction.h"
#include "fdbclient/FDBTypes.h"
#include "fdbclient/GetEncryptCipherKeys.actor.h"
#include "fdbclient/DatabaseContext.h"
#include "fdbclient/ManagementAPI.actor.h"
Expand All @@ -32,7 +33,6 @@
#include "fdbclient/TenantManagement.actor.h"
#include "fdbrpc/simulator.h"
#include "flow/ActorCollection.h"
#include "flow/Trace.h"
#include "flow/actorcompiler.h" // has to be last include

FDB_DEFINE_BOOLEAN_PARAM(LockDB);
Expand Down Expand Up @@ -252,6 +252,34 @@ Version getLogKeyVersion(Key key) {
return bigEndian64(*(int64_t*)(key.begin() + backupLogPrefixBytes + sizeof(UID) + sizeof(uint8_t)));
}

bool validTenantAccess(std::map<int64_t, TenantName>* tenantMap,
MutationRef m,
bool provisionalProxy,
Version version) {
if (isSystemKey(m.param1)) {
return true;
}
int64_t tenantId = TenantInfo::INVALID_TENANT;
if (m.isEncrypted()) {
tenantId = m.encryptionHeader()->cipherTextDetails.encryptDomainId;
} else {
tenantId = TenantAPI::extractTenantIdFromMutation(m);
}
ASSERT(tenantMap != nullptr);
if (m.isEncrypted() && isReservedEncryptDomain(tenantId)) {
// These are valid encrypt domains so don't check the tenant map
} else if (tenantMap->find(tenantId) == tenantMap->end()) {
// If a tenant is not found for a given mutation then exclude it from the batch
ASSERT(!provisionalProxy);
TraceEvent(SevWarnAlways, "MutationLogRestoreTenantNotFound")
.detail("Version", version)
.detail("TenantId", tenantId);
CODE_PROBE(true, "mutation log restore tenant not found");
return false;
}
return true;
}

// Given a key from one of the ranges returned by get_log_ranges,
// returns(version, part) where version is the database version number of
// the transaction log data in the value, and part is 0 for the first such
Expand Down Expand Up @@ -320,29 +348,49 @@ ACTOR static Future<Void> decodeBackupLogValue(Arena* arena,
offset += len2;
state Optional<MutationRef> encryptedLogValue = Optional<MutationRef>();

// Check for valid tenant in required tenant mode. If the tenant does not exist in our tenant map then
// we EXCLUDE the mutation (of that respective tenant) during the restore. NOTE: This simply allows a
// restore to make progress in the event of tenant deletion, but tenant deletion should be considered
// carefully so that we do not run into this case. We do this check here so if encrypted mutations are not
// found in the tenant map then we exit early without needing to reach out to the EKP.
if (config.tenantMode == TenantMode::REQUIRED &&
config.encryptionAtRestMode.mode != EncryptionAtRestMode::CLUSTER_AWARE &&
!validTenantAccess(tenantMap, logValue, provisionalProxy, version)) {
consumed += BackupAgentBase::logHeaderSize + len1 + len2;
continue;
}

// Decrypt mutation ref if encrypted
if (logValue.isEncrypted()) {
encryptedLogValue = logValue;
state EncryptCipherDomainId domainId = logValue.encryptionHeader()->cipherTextDetails.encryptDomainId;
Reference<AsyncVar<ClientDBInfo> const> dbInfo = cx->clientInfo;
TextAndHeaderCipherKeys cipherKeys =
wait(getEncryptCipherKeys(dbInfo, *logValue.encryptionHeader(), BlobCipherMetrics::BACKUP));
logValue = logValue.decrypt(cipherKeys, tempArena, BlobCipherMetrics::BACKUP);
try {
TextAndHeaderCipherKeys cipherKeys =
wait(getEncryptCipherKeys(dbInfo, *logValue.encryptionHeader(), BlobCipherMetrics::RESTORE));
logValue = logValue.decrypt(cipherKeys, tempArena, BlobCipherMetrics::BACKUP);
} catch (Error& e) {
// It's possible a tenant was deleted and the encrypt key fetch failed
TraceEvent(SevWarnAlways, "MutationLogRestoreEncryptKeyFetchFailed")
.detail("Version", version)
.detail("TenantId", domainId);
if (e.code() == error_code_encrypt_keys_fetch_failed) {
CODE_PROBE(true, "mutation log restore encrypt keys not found");
consumed += BackupAgentBase::logHeaderSize + len1 + len2;
continue;
} else {
throw;
}
}
}
ASSERT(!logValue.isEncrypted());

if (config.tenantMode == TenantMode::REQUIRED && !isSystemKey(logValue.param1)) {
// If a tenant is not found for a given mutation then exclude it from the batch
int64_t tenantId = TenantAPI::extractTenantIdFromMutation(logValue);
ASSERT(tenantMap != nullptr);
if (tenantMap->find(tenantId) == tenantMap->end()) {
ASSERT(!provisionalProxy);
TraceEvent(SevWarnAlways, "MutationLogRestoreTenantNotFound")
.detail("Version", version)
.detail("TenantId", tenantId);
CODE_PROBE(true, "mutation log restore tenant not found");
consumed += BackupAgentBase::logHeaderSize + len1 + len2;
continue;
}
// If the mutation was encrypted using cluster aware encryption then check after decryption
if (config.tenantMode == TenantMode::REQUIRED &&
config.encryptionAtRestMode.mode == EncryptionAtRestMode::CLUSTER_AWARE &&
!validTenantAccess(tenantMap, logValue, provisionalProxy, version)) {
consumed += BackupAgentBase::logHeaderSize + len1 + len2;
continue;
}

MutationRef originalLogValue = logValue;
Expand Down
17 changes: 0 additions & 17 deletions fdbclient/BackupContainerFileSystem.actor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -973,23 +973,6 @@ class BackupContainerFileSystemImpl {
continue;

restorable.snapshot = snapshots[i];
// TODO: Reenable the sanity check after TooManyFiles error is resolved
if (false && g_network->isSimulated()) {
// Sanity check key ranges
// TODO: If we want to re-enable this codepath, make sure that we are passing a valid DB object (instead
// of the DB object created on the line below)
ASSERT(false);
state Database cx;
state std::map<std::string, KeyRange>::iterator rit;
for (rit = restorable.keyRanges.begin(); rit != restorable.keyRanges.end(); rit++) {
auto it = std::find_if(restorable.ranges.begin(),
restorable.ranges.end(),
[file = rit->first](const RangeFile f) { return f.fileName == file; });
ASSERT(it != restorable.ranges.end());
KeyRange result = wait(bc->getSnapshotFileKeyRange(*it, cx));
ASSERT(rit->second.begin <= result.begin && rit->second.end >= result.end);
}
}

// No logs needed if there is a complete filtered key space snapshot at the target version.
if (minKeyRangeVersion == maxKeyRangeVersion && maxKeyRangeVersion == restorable.targetVersion) {
Expand Down
3 changes: 3 additions & 0 deletions fdbclient/BlobCipher.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,7 @@ BlobCipherMetrics::BlobCipherMetrics()
CounterSet(cc, "KVRedwood"),
CounterSet(cc, "BlobGranule"),
CounterSet(cc, "Backup"),
CounterSet(cc, "Restore"),
CounterSet(cc, "Test") }) {
specialCounter(cc, "CacheSize", []() { return BlobCipherKeyCache::getInstance()->getSize(); });
traceFuture = cc.traceCounters("BlobCipherMetrics", UID(), FLOW_KNOBS->ENCRYPT_KEY_CACHE_LOGGING_INTERVAL);
Expand All @@ -102,6 +103,8 @@ std::string toString(BlobCipherMetrics::UsageType type) {
return "BlobGranule";
case BlobCipherMetrics::UsageType::BACKUP:
return "Backup";
case BlobCipherMetrics::UsageType::RESTORE:
return "Restore";
case BlobCipherMetrics::UsageType::TEST:
return "Test";
default:
Expand Down
1 change: 1 addition & 0 deletions fdbclient/ClientKnobs.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -301,6 +301,7 @@ void ClientKnobs::initialize(Randomize randomize) {
init( CLIENT_ENABLE_USING_CLUSTER_ID_KEY, false );

init( ENABLE_ENCRYPTION_CPU_TIME_LOGGING, false );
init( SIMULATION_EKP_TENANT_IDS_TO_DROP, "-1" );
// clang-format on
}

Expand Down
39 changes: 31 additions & 8 deletions fdbclient/FileBackupAgent.actor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
#include "fdbclient/DatabaseConfiguration.h"
#include "fdbclient/TenantEntryCache.actor.h"
#include "fdbclient/TenantManagement.actor.h"
#include "fdbrpc/TenantInfo.h"
#include "fdbrpc/simulator.h"
#include "flow/FastRef.h"
#include "fmt/format.h"
Expand Down Expand Up @@ -610,7 +611,7 @@ struct EncryptedRangeFileWriter : public IRangeFileWriter {
int64_t dataLen,
Arena* arena) {
Reference<AsyncVar<ClientDBInfo> const> dbInfo = cx->clientInfo;
TextAndHeaderCipherKeys cipherKeys = wait(getEncryptCipherKeys(dbInfo, header, BlobCipherMetrics::BACKUP));
TextAndHeaderCipherKeys cipherKeys = wait(getEncryptCipherKeys(dbInfo, header, BlobCipherMetrics::RESTORE));
ASSERT(cipherKeys.cipherHeaderKey.isValid() && cipherKeys.cipherTextKey.isValid());
validateEncryptionHeader(cipherKeys.cipherHeaderKey, cipherKeys.cipherTextKey, header);
DecryptBlobCipherAes256Ctr decryptor(
Expand Down Expand Up @@ -1131,6 +1132,7 @@ ACTOR Future<Standalone<VectorRef<KeyValueRef>>> decodeRangeFileBlock(Reference<
wait(tenantCache.get()->init());
}
state EncryptionAtRestMode encryptMode = config.encryptionAtRestMode;
state int64_t blockTenantId = TenantInfo::INVALID_TENANT;

try {
// Read header, currently only decoding BACKUP_AGENT_SNAPSHOT_FILE_VERSION or
Expand All @@ -1142,17 +1144,25 @@ ACTOR Future<Standalone<VectorRef<KeyValueRef>>> decodeRangeFileBlock(Reference<
} else if (file_version == BACKUP_AGENT_ENCRYPTED_SNAPSHOT_FILE_VERSION) {
CODE_PROBE(true, "decoding encrypted block");
// decode options struct
uint32_t optionsLen = reader.consumeNetworkUInt32();
state uint32_t optionsLen = reader.consumeNetworkUInt32();
const uint8_t* o = reader.consume(optionsLen);
StringRef optionsStringRef = StringRef(o, optionsLen);
EncryptedRangeFileWriter::Options options =
ObjectReader::fromStringRef<EncryptedRangeFileWriter::Options>(optionsStringRef, IncludeVersion());
ASSERT(!options.compressionEnabled);

// read encryption header
const uint8_t* headerStart = reader.consume(BlobCipherEncryptHeader::headerSize);
state const uint8_t* headerStart = reader.consume(BlobCipherEncryptHeader::headerSize);
StringRef headerS = StringRef(headerStart, BlobCipherEncryptHeader::headerSize);
state BlobCipherEncryptHeader header = BlobCipherEncryptHeader::fromStringRef(headerS);
blockTenantId = header.cipherTextDetails.encryptDomainId;
if (config.tenantMode == TenantMode::REQUIRED && !isReservedEncryptDomain(blockTenantId)) {
ASSERT(tenantCache.present());
Optional<TenantEntryCachePayload<Void>> payload = wait(tenantCache.get()->getById(blockTenantId));
if (!payload.present()) {
throw tenant_not_found();
}
}
const uint8_t* dataPayloadStart = headerStart + BlobCipherEncryptHeader::headerSize;
// calculate the total bytes read up to (and including) the header
int64_t bytesRead = sizeof(int32_t) + sizeof(uint32_t) + optionsLen + BlobCipherEncryptHeader::headerSize;
Expand All @@ -1167,6 +1177,13 @@ ACTOR Future<Standalone<VectorRef<KeyValueRef>>> decodeRangeFileBlock(Reference<
}
return results;
} catch (Error& e) {
if (e.code() == error_code_encrypt_keys_fetch_failed) {
TraceEvent(SevWarnAlways, "SnapshotRestoreEncryptKeyFetchFailed").detail("TenantId", blockTenantId);
CODE_PROBE(true, "Snapshot restore encrypt keys not found");
} else if (e.code() == error_code_tenant_not_found) {
TraceEvent(SevWarnAlways, "EncryptedSnapshotRestoreTenantNotFound").detail("TenantId", blockTenantId);
CODE_PROBE(true, "Encrypted Snapshot restore tenant not found");
}
TraceEvent(SevWarn, "FileRestoreDecodeRangeFileBlockFailed")
.error(e)
.detail("Filename", file->getFilename())
Expand Down Expand Up @@ -3552,9 +3569,6 @@ struct RestoreRangeTaskFunc : RestoreFileTaskFuncBase {
}
state int64_t tenantId = TenantAPI::extractTenantIdFromKeyRef(key);
Optional<TenantEntryCachePayload<Void>> payload = wait(tenantCache->getById(tenantId));
if (!payload.present()) {
TraceEvent(SevError, "SnapshotRestoreInvalidTenantAccess").detail("Tenant", tenantId);
}
ASSERT(payload.present());
return Void();
}
Expand Down Expand Up @@ -3607,8 +3621,17 @@ struct RestoreRangeTaskFunc : RestoreFileTaskFuncBase {
}

state Reference<IAsyncFile> inFile = wait(bc.get()->readFile(rangeFile.fileName));
state Standalone<VectorRef<KeyValueRef>> blockData =
wait(decodeRangeFileBlock(inFile, readOffset, readLen, cx));
state Standalone<VectorRef<KeyValueRef>> blockData;
try {
Standalone<VectorRef<KeyValueRef>> data = wait(decodeRangeFileBlock(inFile, readOffset, readLen, cx));
blockData = data;
} catch (Error& e) {
// It's possible a tenant was deleted and the encrypt key fetch failed
if (e.code() == error_code_encrypt_keys_fetch_failed || e.code() == error_code_tenant_not_found) {
return Void();
}
throw;
}
state Optional<Reference<TenantEntryCache<Void>>> tenantCache;
state std::vector<Future<Void>> validTenantCheckFutures;
state Arena arena;
Expand Down
1 change: 1 addition & 0 deletions fdbclient/include/fdbclient/BlobCipher.h
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,7 @@ class BlobCipherMetrics : public NonCopyable {
KV_REDWOOD,
BLOB_GRANULE,
BACKUP,
RESTORE,
TEST,
MAX,
};
Expand Down
4 changes: 4 additions & 0 deletions fdbclient/include/fdbclient/ClientKnobs.h
Original file line number Diff line number Diff line change
Expand Up @@ -297,6 +297,10 @@ class ClientKnobs : public KnobsImpl<ClientKnobs> {

// Encryption-at-rest
bool ENABLE_ENCRYPTION_CPU_TIME_LOGGING;
// This Knob will be a comma-delimited string (i.e 0,1,2,3) that specifies which tenants the the EKP should throw
// key_not_found errors for. If TenantInfo::INVALID_TENANT is contained within the list then no tenants will be
// dropped. This Knob should ONLY be used in simulation for testing purposes
std::string SIMULATION_EKP_TENANT_IDS_TO_DROP;

ClientKnobs(Randomize randomize);
void initialize(Randomize randomize);
Expand Down
15 changes: 15 additions & 0 deletions fdbclient/include/fdbclient/GetEncryptCipherKeys.actor.h
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
*/
#pragma once
#include "flow/EncryptUtils.h"
#include "flow/genericactors.actor.h"
#if defined(NO_INTELLISENSE) && !defined(FDBCLIENT_GETCIPHERKEYS_ACTOR_G_H)
#define FDBCLIENT_GETCIPHERKEYS_ACTOR_G_H
#include "fdbclient/GetEncryptCipherKeys.actor.g.h"
Expand All @@ -27,7 +28,9 @@

#include "fdbclient/BlobCipher.h"
#include "fdbclient/EncryptKeyProxyInterface.h"
#include "fdbclient/Knobs.h"
#include "fdbrpc/Stats.h"
#include "fdbrpc/TenantInfo.h"
#include "flow/Knobs.h"
#include "flow/IRandom.h"

Expand Down Expand Up @@ -182,6 +185,18 @@ Future<EKPGetBaseCipherKeysByIdsReply> getUncachedEncryptCipherKeys(Reference<As
TraceEvent(SevWarn, "GetEncryptCipherKeys_RequestFailed").error(reply.error.get());
throw encrypt_keys_fetch_failed();
}
if (g_network && g_network->isSimulated() && usageType == BlobCipherMetrics::RESTORE) {
std::unordered_set<int64_t> tenantIdsToDrop =
parseStringToUnorderedSet<int64_t>(CLIENT_KNOBS->SIMULATION_EKP_TENANT_IDS_TO_DROP, ',');
if (!tenantIdsToDrop.count(TenantInfo::INVALID_TENANT)) {
for (auto& baseCipherInfo : request.baseCipherInfos) {
if (tenantIdsToDrop.count(baseCipherInfo.domainId)) {
TraceEvent("GetEncryptCipherKeys_SimulatedError").detail("DomainId", baseCipherInfo.domainId);
throw encrypt_keys_fetch_failed();
}
}
}
}
return reply;
} catch (Error& e) {
TraceEvent("GetEncryptCipherKeys_CaughtError").error(e);
Expand Down
2 changes: 1 addition & 1 deletion fdbclient/include/fdbclient/TenantEntryCache.actor.h
Original file line number Diff line number Diff line change
Expand Up @@ -220,7 +220,7 @@ class TenantEntryCache : public ReferenceCounted<TenantEntryCache<T>>, NonCopyab
if (!cache->lastTenantId.present()) {
return false;
}
return cache->lastTenantId.get() > 0;
return cache->lastTenantId.get() >= 0;
}
return true;
}
Expand Down
Loading

0 comments on commit 86f3665

Please sign in to comment.