Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[fix](gc tablet) fix get shutdown tablet cost a lot time #27693

Merged
merged 4 commits into from
Nov 29, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
262 changes: 143 additions & 119 deletions be/src/olap/tablet_manager.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -619,24 +619,12 @@ TabletSharedPtr TabletManager::_get_tablet_unlocked(TTabletId tablet_id, bool in
TabletSharedPtr tablet;
tablet = _get_tablet_unlocked(tablet_id);
if (tablet == nullptr && include_deleted) {
{
std::shared_lock rdlock(_shutdown_tablets_lock);
for (auto& deleted_tablet : _shutdown_tablets) {
CHECK(deleted_tablet != nullptr) << "deleted tablet is nullptr";
if (deleted_tablet->tablet_id() == tablet_id) {
tablet = deleted_tablet;
break;
}
}
}
if (tablet == nullptr) {
std::shared_lock rdlock(_shutdown_deleting_tablets_lock);
for (auto& deleted_tablet : _shutdown_deleting_tablets) {
CHECK(deleted_tablet != nullptr) << "deleted tablet is nullptr";
if (deleted_tablet->tablet_id() == tablet_id) {
tablet = deleted_tablet;
break;
}
std::shared_lock rdlock(_shutdown_tablets_lock);
for (auto& deleted_tablet : _shutdown_tablets) {
CHECK(deleted_tablet != nullptr) << "deleted tablet is nullptr";
if (deleted_tablet->tablet_id() == tablet_id) {
tablet = deleted_tablet;
break;
}
}
}
Expand Down Expand Up @@ -1040,119 +1028,155 @@ Status TabletManager::build_all_report_tablets_info(std::map<TTabletId, TTablet>
}

Status TabletManager::start_trash_sweep() {
std::unique_lock<std::mutex> lock(_gc_tablets_lock, std::defer_lock);
if (!lock.try_lock()) {
return Status::OK();
}

SCOPED_CONSUME_MEM_TRACKER(_mem_tracker);
for_each_tablet([](const TabletSharedPtr& tablet) { tablet->delete_expired_stale_rowset(); },
filter_all_tablets);

std::list<TabletSharedPtr>::iterator last_it;
{
for_each_tablet(
[](const TabletSharedPtr& tablet) { tablet->delete_expired_stale_rowset(); },
filter_all_tablets);
std::shared_lock rdlock(_shutdown_tablets_lock);
last_it = _shutdown_tablets.begin();
if (last_it == _shutdown_tablets.end()) {
return Status::OK();
}
}

int32_t clean_num = 0;
do {
#ifndef BE_TEST
sleep(1);
#endif
clean_num = 0;
// should get write lock here, because it will remove tablet from shut_down_tablets
// and get tablet will access shut_down_tablets
{
std::lock_guard<std::shared_mutex> wrlock1(_shutdown_tablets_lock);
std::lock_guard<std::shared_mutex> wrlock2(_shutdown_deleting_tablets_lock);
for (const auto& tablet : _shutdown_tablets) {
_shutdown_deleting_tablets.push_back(tablet);
auto get_batch_tablets = [this, &last_it](int limit) {
std::vector<TabletSharedPtr> batch_tablets;
std::lock_guard<std::shared_mutex> wrdlock(_shutdown_tablets_lock);
while (last_it != _shutdown_tablets.end() && batch_tablets.size() < limit) {
// it means current tablet is referenced by other thread
if (last_it->use_count() > 1) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

what if tablet is referenced during _move_tablet_to_trash, will the query being correct?

Copy link
Collaborator Author

@yujun777 yujun777 Nov 29, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

如果tablet被其他线程拿住,不会回收这个tablet;
如果tablet正在执行_move_tablet_to_trash,其他线程不会拿到这个tablet(因为其他线程拿tablet需要获得shutdown_tablets_lock 的读锁)

last_it++;
} else {
batch_tablets.push_back(*last_it);
last_it = _shutdown_tablets.erase(last_it);
}
_shutdown_tablets.clear();
}
std::lock_guard<std::shared_mutex> wrlock(_shutdown_deleting_tablets_lock);
auto it = _shutdown_deleting_tablets.begin();
while (it != _shutdown_deleting_tablets.end()) {
// check if the meta has the tablet info and its state is shutdown
if (it->use_count() > 1) {
// it means current tablet is referenced by other thread
++it;
continue;
}
TabletMetaSharedPtr tablet_meta(new TabletMeta());
int64_t get_meta_ts = MonotonicMicros();
Status check_st = TabletMetaManager::get_meta((*it)->data_dir(), (*it)->tablet_id(),
(*it)->schema_hash(), tablet_meta);
if (check_st.ok()) {
if (tablet_meta->tablet_state() != TABLET_SHUTDOWN ||
tablet_meta->tablet_uid() != (*it)->tablet_uid()) {
LOG(WARNING) << "tablet's state changed to normal, skip remove dirs"
<< " tablet id = " << tablet_meta->tablet_id()
<< " schema hash = " << tablet_meta->schema_hash()
<< " old tablet_uid=" << (*it)->tablet_uid()
<< " cur tablet_uid=" << tablet_meta->tablet_uid();
// remove it from list
it = _shutdown_deleting_tablets.erase(it);
continue;
}
// move data to trash
const auto& tablet_path = (*it)->tablet_path();
bool exists = false;
Status exists_st = io::global_local_filesystem()->exists(tablet_path, &exists);
if (!exists_st) {
continue;
}
if (exists) {
// take snapshot of tablet meta
auto meta_file_path = fmt::format("{}/{}.hdr", tablet_path, (*it)->tablet_id());
int64_t save_meta_ts = MonotonicMicros();
RETURN_IF_ERROR((*it)->tablet_meta()->save(meta_file_path));
int64_t now = MonotonicMicros();
LOG(INFO) << "start to move tablet to trash. " << tablet_path
<< ". rocksdb get meta cost " << (save_meta_ts - get_meta_ts)
<< " us, rocksdb save meta cost " << (now - save_meta_ts)
<< " us, clean_num=" << clean_num;
Status rm_st = (*it)->data_dir()->move_to_trash(tablet_path);
if (!rm_st.ok()) {
LOG(WARNING) << "fail to move dir to trash. " << tablet_path;
++it;
continue;
}
}
// remove tablet meta
RETURN_IF_ERROR(TabletMetaManager::remove((*it)->data_dir(), (*it)->tablet_id(),
(*it)->schema_hash()));
LOG(INFO) << "successfully move tablet to trash. "
<< "tablet_id=" << (*it)->tablet_id()
<< ", schema_hash=" << (*it)->schema_hash()
<< ", tablet_path=" << tablet_path;
it = _shutdown_deleting_tablets.erase(it);
++clean_num;
} else {
// if could not find tablet info in meta store, then check if dir existed
const auto& tablet_path = (*it)->tablet_path();
bool exists = false;
Status exists_st = io::global_local_filesystem()->exists(tablet_path, &exists);
if (!exists_st) {
continue;
}
if (exists) {
LOG(WARNING) << "errors while load meta from store, skip this tablet. "
<< "tablet_id=" << (*it)->tablet_id()
<< ", schema_hash=" << (*it)->schema_hash();
++it;

return batch_tablets;
};

std::list<TabletSharedPtr> failed_tablets;
// return true if need continue delete
auto delete_one_batch = [this, get_batch_tablets, &failed_tablets]() -> bool {
int limit = 200;
for (;;) {
auto batch_tablets = get_batch_tablets(limit);
for (const auto& tablet : batch_tablets) {
if (_move_tablet_to_trash(tablet)) {
limit--;
} else {
LOG(INFO) << "could not find tablet dir, skip it and remove it from gc-queue. "
<< "tablet_id=" << (*it)->tablet_id()
<< ", schema_hash=" << (*it)->schema_hash()
<< ", tablet_path=" << tablet_path;
it = _shutdown_deleting_tablets.erase(it);
failed_tablets.push_back(tablet);
}
}

// yield to avoid holding _tablet_map_lock for too long
if (clean_num >= 200) {
break;
if (limit <= 0) {
return true;
}
if (batch_tablets.empty()) {
return false;
}
}
// >= 200 means there may be more tablets need to be handled
// So continue
} while (clean_num >= 200);

return false;
};

while (delete_one_batch()) {
#ifndef BE_TEST
sleep(1);
#endif
}

if (!failed_tablets.empty()) {
std::lock_guard<std::shared_mutex> wrlock(_shutdown_tablets_lock);
_shutdown_tablets.splice(_shutdown_tablets.end(), failed_tablets);
}

return Status::OK();
} // start_trash_sweep
}

bool TabletManager::_move_tablet_to_trash(const TabletSharedPtr& tablet) {
TabletMetaSharedPtr tablet_meta(new TabletMeta());
int64_t get_meta_ts = MonotonicMicros();
Status check_st = TabletMetaManager::get_meta(tablet->data_dir(), tablet->tablet_id(),
tablet->schema_hash(), tablet_meta);
if (check_st.ok()) {
if (tablet_meta->tablet_state() != TABLET_SHUTDOWN ||
tablet_meta->tablet_uid() != tablet->tablet_uid()) {
LOG(WARNING) << "tablet's state changed to normal, skip remove dirs"
<< " tablet id = " << tablet_meta->tablet_id()
<< " schema hash = " << tablet_meta->schema_hash()
<< " old tablet_uid=" << tablet->tablet_uid()
<< " cur tablet_uid=" << tablet_meta->tablet_uid();
return true;
}
// move data to trash
const auto& tablet_path = tablet->tablet_path();
bool exists = false;
Status exists_st = io::global_local_filesystem()->exists(tablet_path, &exists);
if (!exists_st) {
return false;
}
if (exists) {
// take snapshot of tablet meta
auto meta_file_path = fmt::format("{}/{}.hdr", tablet_path, tablet->tablet_id());
int64_t save_meta_ts = MonotonicMicros();
auto save_st = tablet->tablet_meta()->save(meta_file_path);
if (!save_st.ok()) {
LOG(WARNING) << "failed to save meta, tablet_id=" << tablet_meta->tablet_id()
<< ", tablet_uid=" << tablet_meta->tablet_uid()
<< ", error=" << save_st;
return false;
}
int64_t now = MonotonicMicros();
LOG(INFO) << "start to move tablet to trash. " << tablet_path
<< ". rocksdb get meta cost " << (save_meta_ts - get_meta_ts)
<< " us, rocksdb save meta cost " << (now - save_meta_ts) << " us";
Status rm_st = tablet->data_dir()->move_to_trash(tablet_path);
if (!rm_st.ok()) {
LOG(WARNING) << "fail to move dir to trash. " << tablet_path;
return false;
}
}
// remove tablet meta
auto remove_st = TabletMetaManager::remove(tablet->data_dir(), tablet->tablet_id(),
tablet->schema_hash());
if (!remove_st.ok()) {
LOG(WARNING) << "failed to remove meta, tablet_id=" << tablet_meta->tablet_id()
<< ", tablet_uid=" << tablet_meta->tablet_uid() << ", error=" << remove_st;
return false;
}
LOG(INFO) << "successfully move tablet to trash. "
<< "tablet_id=" << tablet->tablet_id()
<< ", schema_hash=" << tablet->schema_hash() << ", tablet_path=" << tablet_path;
return true;
} else {
// if could not find tablet info in meta store, then check if dir existed
const auto& tablet_path = tablet->tablet_path();
bool exists = false;
Status exists_st = io::global_local_filesystem()->exists(tablet_path, &exists);
if (!exists_st) {
return false;
}
if (exists) {
LOG(WARNING) << "errors while load meta from store, skip this tablet. "
<< "tablet_id=" << tablet->tablet_id()
<< ", schema_hash=" << tablet->schema_hash();
return false;
} else {
LOG(INFO) << "could not find tablet dir, skip it and remove it from gc-queue. "
<< "tablet_id=" << tablet->tablet_id()
<< ", schema_hash=" << tablet->schema_hash()
<< ", tablet_path=" << tablet_path;
return true;
}
}
}

bool TabletManager::register_clone_tablet(int64_t tablet_id) {
tablets_shard& shard = _get_tablets_shard(tablet_id);
Expand Down
10 changes: 5 additions & 5 deletions be/src/olap/tablet_manager.h
Original file line number Diff line number Diff line change
Expand Up @@ -206,6 +206,8 @@ class TabletManager {

std::shared_mutex& _get_tablets_shard_lock(TTabletId tabletId);

bool _move_tablet_to_trash(const TabletSharedPtr& tablet);

private:
DISALLOW_COPY_AND_ASSIGN(TabletManager);

Expand Down Expand Up @@ -237,11 +239,9 @@ class TabletManager {
std::shared_mutex _shutdown_tablets_lock;
// partition_id => tablet_info
std::map<int64_t, std::set<TabletInfo>> _partition_tablet_map;
std::vector<TabletSharedPtr> _shutdown_tablets;

// gc thread will move _shutdown_tablets to _shutdown_deleting_tablets
std::shared_mutex _shutdown_deleting_tablets_lock;
std::list<TabletSharedPtr> _shutdown_deleting_tablets;
// the delete tablets. notice only allow function `start_trash_sweep` can erase tablets in _shutdown_tablets
std::list<TabletSharedPtr> _shutdown_tablets;
std::mutex _gc_tablets_lock;

std::mutex _tablet_stat_cache_mutex;
std::shared_ptr<std::vector<TTabletStat>> _tablet_stat_list_cache =
Expand Down
Loading