diff --git a/include/sta/Search.hh b/include/sta/Search.hh index 934c1006..ee3c1ff9 100644 --- a/include/sta/Search.hh +++ b/include/sta/Search.hh @@ -72,6 +72,8 @@ typedef Vector VertexSlackMapSeq; typedef Vector WorstSlacksSeq; typedef std::vector DelayDblSeq; +class ThreadLocalCacheTagSet; + class Search : public StaState { public: @@ -627,7 +629,7 @@ protected: ClkInfoSet *clk_info_set_; std::mutex clk_info_lock_; // Use pointer to tag set so Tag.hh does not need to be included. - TagSet *tag_set_; + ThreadLocalCacheTagSet *tag_set_; // Entries in tags_ may be missing where previous filter tags were deleted. TagIndex tag_capacity_; std::atomic tags_; diff --git a/search/Search.cc b/search/Search.cc index 812c98d9..ba1aac9f 100644 --- a/search/Search.cc +++ b/search/Search.cc @@ -25,7 +25,9 @@ #include "Search.hh" #include +#include #include // abs +#include #include "Mutex.hh" #include "Report.hh" @@ -71,6 +73,162 @@ namespace sta { +// A thread-local cache of tags wrapping a tag set. +// The goal is to make findKey lock free when possible. +class ThreadLocalCacheTagSet +{ +public: + ThreadLocalCacheTagSet(uint64_t tagset_capacity, + const StaState* sta, + void* search_parent_ptr) : + tag_set_(tagset_capacity, + TagSet::hasher(sta), + TagSet::key_equal(sta)), + search_parent_ptr_(search_parent_ptr), + sta_(sta) + { + } + + // Lock free lookup in the thread-local cache. + Tag* + findCachedTag(Tag* tag) const + { + TagSet& cache = getThreadLocalCache(); + auto it = cache.find(tag); + if (it != cache.end()) + return *it; + return nullptr; + } + + // Requires the Search to hold a lock. + Tag* + findKey(Tag* tag) const + { + auto it = tag_set_.find(tag); + if (it == tag_set_.end()) + return nullptr; + + // Insert into this thread's cache to speed up future lookups. + TagSet& cache = getThreadLocalCache(); + cache.insert(*it); + return *it; + } + + void + insert(Tag* tag) + { + // Also insert the tag into the cache for this thread. + TagSet& cache = getThreadLocalCache(); + cache.insert(tag); + tag_set_.insert(tag); + } + + std::size_t + size() const + { + return tag_set_.size(); + } + + void + deleteContentsClear() + { + incrClearCount(); // Clear caches if used again. + tag_set_.deleteContentsClear(); + } + + template + std::vector + eraseIf(Fn&& fn) + { + static_assert( + std::is_invocable_r_v, + "EraseIf predicate must be a function taking a Tag* and returning a bool"); + + std::vector to_delete; + for (auto it = tag_set_.begin(); it != tag_set_.end();) { + if (fn(*it)) { + to_delete.push_back(*it); + it = tag_set_.erase(it); + } else { + ++it; + } + } + + if (to_delete.size() > 0) + incrClearCount(); // Clear caches if used again. + return to_delete; + } + + std::size_t + bucket_count() const + { + return tag_set_.bucket_count(); + } + + std::size_t + bucket_size(std::size_t n) const + { + return tag_set_.bucket_size(n); + } + +private: + TagSet tag_set_; + std::atomic clear_count_ = 0; + void* search_parent_ptr_ = nullptr; + const StaState* sta_ = nullptr; + + // Can be marked const because the object doesn't own the caches. + TagSet& + getThreadLocalCache() const + { + // Both thread-local variables are unique to each thread and live for the + // lifetime of the thread that created them. Because multiple Search + // objects execute on the same thread, we need to make the cache Search + // object specific for safety. + thread_local std::unordered_map tl_cache_map; + thread_local std::unordered_map tl_clear_count_map; + + // Because TagSet is not default constructable we have to explicitly make + // one if it doesn't already exist. + auto cache_it = tl_cache_map.find(search_parent_ptr_); + if (cache_it == tl_cache_map.end()) { + bool emplaced = false; + std::tie(cache_it, emplaced) + = tl_cache_map.emplace(search_parent_ptr_, + TagSet(/* bucket_count= */ 0, + TagSet::hasher(sta_), + TagSet::key_equal(sta_))); + } + + // Find/emplace the clear count for this parent pointer. + auto clear_count_it = tl_clear_count_map.find(search_parent_ptr_); + if (clear_count_it == tl_clear_count_map.end()) { + bool emplaced = false; + std::tie(clear_count_it, emplaced) + = tl_clear_count_map.emplace(search_parent_ptr_, 0); + } + + auto& cache = cache_it->second; + auto& clear_count = clear_count_it->second; + + // To avoid needing to destroy threads and/or worrying about thread + // lifetimes we use a counter to ensure that any removals from the tag set + // trigger a clearing of the cache. This will prevent any stale data in + // the cache. + if (clear_count != clear_count_.load()) { + clear_count = clear_count_.load(); + cache.clear(); + } + return cache; + } + + void + incrClearCount() + { + clear_count_.fetch_add(1); + } +}; + using std::min; using std::max; using std::abs; @@ -235,7 +393,7 @@ Search::init(StaState *sta) arrival_iter_ = new BfsFwdIterator(BfsIndex::arrival, nullptr, sta); required_iter_ = new BfsBkwdIterator(BfsIndex::required, search_adj_, sta); tag_capacity_ = 128; - tag_set_ = new TagSet(tag_capacity_, TagHash(sta), TagEqual(sta)); + tag_set_ = new ThreadLocalCacheTagSet(tag_capacity_, sta, this); clk_info_set_ = new ClkInfoSet(ClkInfoLess(sta)); tag_next_ = 0; tags_ = new Tag*[tag_capacity_]; @@ -573,16 +731,17 @@ Search::deleteTagGroup(TagGroup *group) void Search::deleteFilterTags() { - for (TagIndex i = 0; i < tag_next_; i++) { - Tag *tag = tags_[i]; - if (tag - && (tag->isFilter() - || tag->clkInfo()->crprPathRefsFilter())) { - tags_[i] = nullptr; - tag_set_->erase(tag); - delete tag; - tag_free_indices_.push_back(i); + std::vector to_delete = + tag_set_->eraseIf([](Tag *tag) { return tag->isFilter(); }); + for (Tag *tag : to_delete) { + if (tag == nullptr) { + continue; } + + auto index = tag->index(); + tags_[index] = nullptr; + delete tag; + tag_free_indices_.push_back(index); } } @@ -2982,8 +3141,13 @@ Search::findTag(const RiseFall *rf, { Tag probe(0, rf->index(), path_ap->index(), clk_info, is_clk, input_delay, is_segment_start, states, false, this); + Tag *tag = tag_set_->findCachedTag(&probe); + if (tag) { + return tag; + } + LockGuard lock(tag_lock_); - Tag *tag = tag_set_->findKey(&probe); + tag = tag_set_->findKey(&probe); if (tag == nullptr) { ExceptionStateSet *new_states = !own_states && states ? new ExceptionStateSet(*states) : states; @@ -3012,7 +3176,6 @@ Search::findTag(const RiseFall *rf, tags_prev_.push_back(tags_); tags_ = tags; tag_capacity_ = tag_capacity; - tag_set_->reserve(tag_capacity); } if (tag_next_ == tag_index_max) report_->critical(1511, "max tag index exceeded");