Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Blob files are included in SstFileManager DB size calculation #5127

Open
wants to merge 3 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions HISTORY.md
Original file line number Diff line number Diff line change
Expand Up @@ -1114,6 +1114,8 @@ Note: The next release will be major release 7.0. See https://github.com/faceboo
## 6.1.1 (2019-04-09)
### New Features
* When reading from option file/string/map, customized comparators and/or merge operators can be filled according to object registry.
* WAL files are subjected to rate limited deletion.
* Blob files are deleted in foreground or background based on trash to DB size ratio, instead of always deleted in the background.

### Public API Change

Expand Down
1 change: 1 addition & 0 deletions file/delete_scheduler.cc
Original file line number Diff line number Diff line change
Expand Up @@ -218,6 +218,7 @@ void DeleteScheduler::BackgroundEmptyTrash() {
InstrumentedMutexLock l(&mu_);
while (queue_.empty() && !closing_) {
cv_.Wait();
TEST_SYNC_POINT("DeleteScheduler::BackgroundEmptyTrash:Wakeup");
}

if (closing_) {
Expand Down
20 changes: 19 additions & 1 deletion file/sst_file_manager_impl.cc
Original file line number Diff line number Diff line change
Expand Up @@ -206,14 +206,32 @@ bool SstFileManagerImpl::EnoughRoomForCompaction(
return true;
}

void SstFileManagerImpl::RegisterStackedDB(
std::string dbname,
std::atomic<uint64_t>* size_counter) {
MutexLock l(&mu_);
stacked_db_sizes_.emplace(dbname, size_counter);
}

void SstFileManagerImpl::UnregisterStackedDB(std::string dbname) {
MutexLock l(&mu_);
stacked_db_sizes_.erase(dbname);
}

uint64_t SstFileManagerImpl::GetCompactionsReservedSize() {
MutexLock l(&mu_);
return cur_compactions_reserved_size_;
}

uint64_t SstFileManagerImpl::GetTotalSize() {
uint64_t total_db_size;
MutexLock l(&mu_);
return total_files_size_;
total_db_size = total_files_size_;
for (std::pair<std::string, std::atomic<uint64_t>*> db
: stacked_db_sizes_) {
total_db_size += db.second->load(std::memory_order_relaxed);
}
return total_db_size;
}

std::unordered_map<std::string, uint64_t>
Expand Down
11 changes: 11 additions & 0 deletions file/sst_file_manager_impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,17 @@ class SstFileManagerImpl : public SstFileManager {
const std::vector<CompactionInputFiles>& inputs,
const Status& bg_error);

// Register/De-register atomic size counters for stacked DBs using this
// instance of SstFileManagerImpl. This is used to allow SFM to calculate
// the total DB size for Blob DB. Since Blob files keep growing, in order
// to accurately track the size we need to know the current file size. Since
// Blob DB already keeps track of the total blob size, we simply keep a
// pointer to that counter and read it when needed (which happens to be in
// GetTotalSize()
void RegisterStackedDB(std::string dbname,
std::atomic<uint64_t>* size_counter);
void UnregisterStackedDB(std::string dbname);

// Bookkeeping so total_file_sizes_ goes back to normal after compaction
// finishes
void OnCompactionCompletion(Compaction* c);
Expand Down
11 changes: 11 additions & 0 deletions utilities/blob_db/blob_db_impl.cc
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,12 @@ Status BlobDBImpl::Close() {
}
closed_ = true;

SstFileManagerImpl* sfm = static_cast<SstFileManagerImpl*>(
db_impl_->immutable_db_options().sst_file_manager.get());
if (sfm != nullptr) {
sfm->UnregisterStackedDB(dbname_);
}

// Close base DB before BlobDBImpl destructs to stop event listener and
// compaction filter call.
Status s = db_->Close();
Expand Down Expand Up @@ -269,6 +275,11 @@ Status BlobDBImpl::Open(std::vector<ColumnFamilyHandle*>* handles) {
// Add trash files in blob dir to file delete scheduler.
SstFileManagerImpl* sfm = static_cast<SstFileManagerImpl*>(
db_impl_->immutable_db_options().sst_file_manager.get());
// Register the blob size counter with SFM so it can make rate limiting
// decisions based on the total DB size
if (sfm != nullptr) {
sfm->RegisterStackedDB(dbname_, &total_blob_size_);
}
DeleteScheduler::CleanupDirectory(env_, sfm, blob_dir_);

UpdateLiveSSTSize();
Expand Down
69 changes: 69 additions & 0 deletions utilities/blob_db/blob_db_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -866,6 +866,75 @@ TEST_F(BlobDBTest, SstFileManagerRestart) {
SyncPoint::GetInstance()->DisableProcessing();
}

TEST_F(BlobDBTest, SstFileManagerDBSize) {
BlobDBOptions bdb_options;
bdb_options.min_blob_size = 0;
Options db_options;

DestroyBlobDB(dbname_, db_options, bdb_options);

int files_deleted_directly = 0;
int files_scheduled_to_delete = 0;
rocksdb::SyncPoint::GetInstance()->LoadDependency({
{"BlobDBTest::SstFileManagerDBSize:1",
"DeleteScheduler::BackgroundEmptyTrash:Wakeup"},
});
rocksdb::SyncPoint::GetInstance()->SetCallBack(
"SstFileManagerImpl::ScheduleFileDeletion",
[&](void * /*arg*/) { files_scheduled_to_delete++; });
rocksdb::SyncPoint::GetInstance()->SetCallBack(
"DeleteScheduler::DeleteFile",
[&](void * /*arg*/) { files_deleted_directly++; });

std::shared_ptr<SstFileManager> sst_file_manager(
NewSstFileManager(mock_env_.get()));
sst_file_manager->SetDeleteRateBytesPerSecond(1);
SstFileManagerImpl *sfm =
static_cast<SstFileManagerImpl *>(sst_file_manager.get());
db_options.sst_file_manager = sst_file_manager;

Open(bdb_options, db_options);
sfm->WaitForEmptyTrash();
SyncPoint::GetInstance()->EnableProcessing();

// Create one obselete file
blob_db_->Put(WriteOptions(), "foo1", "bar");
auto blob_files = blob_db_impl()->TEST_GetBlobFiles();
ASSERT_EQ(1, blob_files.size());
std::shared_ptr<BlobFile> bfile = blob_files[0];
ASSERT_OK(blob_db_impl()->TEST_CloseBlobFile(bfile));
GCStats gc_stats;
ASSERT_OK(blob_db_impl()->TEST_GCFileAndUpdateLSM(bfile, &gc_stats));
// Create second obselete file
blob_db_->Put(WriteOptions(), "foo2", "bar");
blob_files = blob_db_impl()->TEST_GetBlobFiles();
// 2 valid files and 1 obsolete file
ASSERT_EQ(3, blob_files.size());
bfile = blob_files[2];
ASSERT_OK(blob_db_impl()->TEST_CloseBlobFile(bfile));
ASSERT_OK(blob_db_impl()->TEST_GCFileAndUpdateLSM(bfile, &gc_stats));
// Clean up the 2 obsolete files
blob_db_impl()->TEST_DeleteObsoleteFiles();

// Even if SSTFileManager is not set, DB is creating a dummy one.
ASSERT_EQ(2, files_scheduled_to_delete);
ASSERT_EQ(1, files_deleted_directly);
Destroy();
// Make sure that DestroyBlobDB() also goes through delete scheduler.
ASSERT_EQ(files_scheduled_to_delete, 5);
// Due to a timing issue, the WAL may or may not be deleted directly. The
// blob file is first scheduled, followed by WAL. If the background trash
// thread does not wake up on time, the WAL file will be directly
// deleted as the trash size will be > DB size
// The first obsolete Blob file will be marked as trash first and it will
// be > .25 of DB size, so 3 Blob file and 1 WAL file deletion after that
// will be done directly
ASSERT_EQ(files_deleted_directly,4);
TEST_SYNC_POINT("BlobDBTest::SstFileManagerDBSize:1");
SyncPoint::GetInstance()->DisableProcessing();
sfm->WaitForEmptyTrash();
}

TEST_F(BlobDBTest, SnapshotAndGarbageCollection) {
BlobDBOptions bdb_options;
bdb_options.min_blob_size = 0;
Expand Down