Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion include/sta/Search.hh
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,8 @@ typedef Vector<VertexSlackMap> VertexSlackMapSeq;
typedef Vector<WorstSlacks> WorstSlacksSeq;
typedef std::vector<DelayDbl> DelayDblSeq;

class ThreadLocalCacheTagSet;

class Search : public StaState
{
public:
Expand Down Expand Up @@ -627,7 +629,7 @@ protected:
ClkInfoSet *clk_info_set_;
std::mutex clk_info_lock_;
// Use pointer to tag set so Tag.hh does not need to be included.
TagSet *tag_set_;
ThreadLocalCacheTagSet *tag_set_;
// Entries in tags_ may be missing where previous filter tags were deleted.
TagIndex tag_capacity_;
std::atomic<Tag **> tags_;
Expand Down
187 changes: 175 additions & 12 deletions search/Search.cc
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,9 @@
#include "Search.hh"

#include <algorithm>
#include <atomic>
#include <cmath> // abs
#include <tuple>

#include "Mutex.hh"
#include "Report.hh"
Expand Down Expand Up @@ -71,6 +73,162 @@

namespace sta {

// A thread-local cache of tags wrapping a tag set.
// The goal is to make findKey lock free when possible.
class ThreadLocalCacheTagSet
{
public:
ThreadLocalCacheTagSet(uint64_t tagset_capacity,
const StaState* sta,
void* search_parent_ptr) :
tag_set_(tagset_capacity,
TagSet::hasher(sta),
TagSet::key_equal(sta)),
search_parent_ptr_(search_parent_ptr),
sta_(sta)
{
}

// Lock free lookup in the thread-local cache.
Tag*
findCachedTag(Tag* tag) const
{
TagSet& cache = getThreadLocalCache();
auto it = cache.find(tag);
if (it != cache.end())
return *it;
return nullptr;
}

// Requires the Search to hold a lock.
Tag*
findKey(Tag* tag) const
{
auto it = tag_set_.find(tag);
if (it == tag_set_.end())
return nullptr;

// Insert into this thread's cache to speed up future lookups.
TagSet& cache = getThreadLocalCache();
cache.insert(*it);
return *it;
}

void
insert(Tag* tag)
{
// Also insert the tag into the cache for this thread.
TagSet& cache = getThreadLocalCache();
cache.insert(tag);
tag_set_.insert(tag);
}

std::size_t
size() const
{
return tag_set_.size();
}

void
deleteContentsClear()
{
incrClearCount(); // Clear caches if used again.
tag_set_.deleteContentsClear();
}

template <typename Fn>
std::vector<Tag*>
eraseIf(Fn&& fn)
{
static_assert(
std::is_invocable_r_v<bool, Fn, Tag*>,
"EraseIf predicate must be a function taking a Tag* and returning a bool");

std::vector<Tag*> to_delete;
for (auto it = tag_set_.begin(); it != tag_set_.end();) {
if (fn(*it)) {
to_delete.push_back(*it);
it = tag_set_.erase(it);
} else {
++it;
}
}

if (to_delete.size() > 0)
incrClearCount(); // Clear caches if used again.
return to_delete;
}

std::size_t
bucket_count() const
{
return tag_set_.bucket_count();
}

std::size_t
bucket_size(std::size_t n) const
{
return tag_set_.bucket_size(n);
}

private:
TagSet tag_set_;
std::atomic<uint64_t> clear_count_ = 0;
void* search_parent_ptr_ = nullptr;
const StaState* sta_ = nullptr;

// Can be marked const because the object doesn't own the caches.
TagSet&
getThreadLocalCache() const
{
// Both thread-local variables are unique to each thread and live for the
// lifetime of the thread that created them. Because multiple Search
// objects execute on the same thread, we need to make the cache Search
// object specific for safety.
thread_local std::unordered_map<void*, TagSet> tl_cache_map;
thread_local std::unordered_map<void*, uint64_t> tl_clear_count_map;

// Because TagSet is not default constructable we have to explicitly make
// one if it doesn't already exist.
auto cache_it = tl_cache_map.find(search_parent_ptr_);
if (cache_it == tl_cache_map.end()) {
bool emplaced = false;
std::tie(cache_it, emplaced)
= tl_cache_map.emplace(search_parent_ptr_,
TagSet(/* bucket_count= */ 0,
TagSet::hasher(sta_),
TagSet::key_equal(sta_)));
}

// Find/emplace the clear count for this parent pointer.
auto clear_count_it = tl_clear_count_map.find(search_parent_ptr_);
if (clear_count_it == tl_clear_count_map.end()) {
bool emplaced = false;
std::tie(clear_count_it, emplaced)
= tl_clear_count_map.emplace(search_parent_ptr_, 0);
}

auto& cache = cache_it->second;
auto& clear_count = clear_count_it->second;

// To avoid needing to destroy threads and/or worrying about thread
// lifetimes we use a counter to ensure that any removals from the tag set
// trigger a clearing of the cache. This will prevent any stale data in
// the cache.
if (clear_count != clear_count_.load()) {
clear_count = clear_count_.load();
cache.clear();
}
return cache;
}

void
incrClearCount()
{
clear_count_.fetch_add(1);
}
};

using std::min;
using std::max;
using std::abs;
Expand Down Expand Up @@ -235,7 +393,7 @@ Search::init(StaState *sta)
arrival_iter_ = new BfsFwdIterator(BfsIndex::arrival, nullptr, sta);
required_iter_ = new BfsBkwdIterator(BfsIndex::required, search_adj_, sta);
tag_capacity_ = 128;
tag_set_ = new TagSet(tag_capacity_, TagHash(sta), TagEqual(sta));
tag_set_ = new ThreadLocalCacheTagSet(tag_capacity_, sta, this);
clk_info_set_ = new ClkInfoSet(ClkInfoLess(sta));
tag_next_ = 0;
tags_ = new Tag*[tag_capacity_];
Expand Down Expand Up @@ -573,16 +731,17 @@ Search::deleteTagGroup(TagGroup *group)
void
Search::deleteFilterTags()
{
for (TagIndex i = 0; i < tag_next_; i++) {
Tag *tag = tags_[i];
if (tag
&& (tag->isFilter()
|| tag->clkInfo()->crprPathRefsFilter())) {
tags_[i] = nullptr;
tag_set_->erase(tag);
delete tag;
tag_free_indices_.push_back(i);
std::vector<Tag *> to_delete =
tag_set_->eraseIf([](Tag *tag) { return tag->isFilter(); });
for (Tag *tag : to_delete) {
if (tag == nullptr) {
continue;
}

auto index = tag->index();
tags_[index] = nullptr;
delete tag;
tag_free_indices_.push_back(index);
}
}

Expand Down Expand Up @@ -2982,8 +3141,13 @@ Search::findTag(const RiseFall *rf,
{
Tag probe(0, rf->index(), path_ap->index(), clk_info, is_clk, input_delay,
is_segment_start, states, false, this);
Tag *tag = tag_set_->findCachedTag(&probe);
if (tag) {
return tag;
}

LockGuard lock(tag_lock_);
Tag *tag = tag_set_->findKey(&probe);
tag = tag_set_->findKey(&probe);
if (tag == nullptr) {
ExceptionStateSet *new_states = !own_states && states
? new ExceptionStateSet(*states) : states;
Expand Down Expand Up @@ -3012,7 +3176,6 @@ Search::findTag(const RiseFall *rf,
tags_prev_.push_back(tags_);
tags_ = tags;
tag_capacity_ = tag_capacity;
tag_set_->reserve(tag_capacity);
}
if (tag_next_ == tag_index_max)
report_->critical(1511, "max tag index exceeded");
Expand Down