diff --git a/cachelib/allocator/Cache.cpp b/cachelib/allocator/Cache.cpp index b871a04782..37457cc3e9 100644 --- a/cachelib/allocator/Cache.cpp +++ b/cachelib/allocator/Cache.cpp @@ -478,6 +478,10 @@ void CacheBase::updateGlobalCacheStats(const std::string& statPrefix) const { visitEstimates(uploadStatsNanoToMicro, stats.allocateLatencyNs, statPrefix + "allocate.latency_us"); + visitEstimates(uploadStatsNanoToMicro, stats.bgEvictLatencyNs, + statPrefix + "background.eviction.latency_us"); + visitEstimates(uploadStatsNanoToMicro, stats.bgPromoteLatencyNs, + statPrefix + "background.promotion.latency_us"); visitEstimates(uploadStatsNanoToMicro, stats.moveChainedLatencyNs, statPrefix + "move.chained.latency_us"); visitEstimates(uploadStatsNanoToMicro, stats.moveRegularLatencyNs, diff --git a/cachelib/allocator/CacheAllocator-inl.h b/cachelib/allocator/CacheAllocator-inl.h index a898d9b4fd..11e9058a34 100644 --- a/cachelib/allocator/CacheAllocator-inl.h +++ b/cachelib/allocator/CacheAllocator-inl.h @@ -426,8 +426,7 @@ CacheAllocator::allocateInternalTier(TierId tid, uint32_t expiryTime, bool fromBgThread, bool evict) { - util::LatencyTracker tracker{stats().allocateLatency_}; - + util::LatencyTracker tracker{stats().allocateLatency_, static_cast(!fromBgThread)}; SCOPE_FAIL { stats_.invalidAllocs.inc(); }; // number of bytes required for this item @@ -435,8 +434,8 @@ CacheAllocator::allocateInternalTier(TierId tid, // the allocation class in our memory allocator. const auto cid = allocator_[tid]->getAllocationClassId(pid, requiredSize); - util::RollingLatencyTracker rollTracker{ - (*stats_.classAllocLatency)[tid][pid][cid]}; + + util::RollingLatencyTracker rollTracker{(*stats_.classAllocLatency)[tid][pid][cid]}; (*stats_.allocAttempts)[tid][pid][cid].inc(); diff --git a/cachelib/allocator/CacheAllocator.h b/cachelib/allocator/CacheAllocator.h index 0ec7224603..b3201daf76 100644 --- a/cachelib/allocator/CacheAllocator.h +++ b/cachelib/allocator/CacheAllocator.h @@ -2016,7 +2016,8 @@ class CacheAllocator : public CacheBase { // exposed for the background evictor to iterate through the memory and evict // in batch. This should improve insertion path for tiered memory config size_t traverseAndEvictItems(unsigned int tid, unsigned int pid, unsigned int cid, size_t batch) { -auto& mmContainer = getMMContainer(tid, pid, cid); + util::LatencyTracker tracker{stats().bgEvictLatency_, batch}; + auto& mmContainer = getMMContainer(tid, pid, cid); size_t evictions = 0; size_t evictionCandidates = 0; std::vector candidates; @@ -2089,6 +2090,7 @@ auto& mmContainer = getMMContainer(tid, pid, cid); } size_t traverseAndPromoteItems(unsigned int tid, unsigned int pid, unsigned int cid, size_t batch) { + util::LatencyTracker tracker{stats().bgPromoteLatency_, batch}; auto& mmContainer = getMMContainer(tid, pid, cid); size_t promotions = 0; std::vector candidates; diff --git a/cachelib/allocator/CacheStats.cpp b/cachelib/allocator/CacheStats.cpp index a5dd962dc2..453d3a0abb 100644 --- a/cachelib/allocator/CacheStats.cpp +++ b/cachelib/allocator/CacheStats.cpp @@ -106,6 +106,8 @@ void Stats::populateGlobalCacheStats(GlobalCacheStats& ret) const { ret.numNvmItemDestructorAllocErrors = numNvmItemDestructorAllocErrors.get(); ret.allocateLatencyNs = this->allocateLatency_.estimate(); + ret.bgEvictLatencyNs = this->bgEvictLatency_.estimate(); + ret.bgPromoteLatencyNs = this->bgPromoteLatency_.estimate(); ret.moveChainedLatencyNs = this->moveChainedLatency_.estimate(); ret.moveRegularLatencyNs = this->moveRegularLatency_.estimate(); ret.nvmLookupLatencyNs = this->nvmLookupLatency_.estimate(); diff --git a/cachelib/allocator/CacheStats.h b/cachelib/allocator/CacheStats.h index 48ea90f6b8..cda2690bf8 100644 --- a/cachelib/allocator/CacheStats.h +++ b/cachelib/allocator/CacheStats.h @@ -510,6 +510,8 @@ struct GlobalCacheStats { // latency and percentile stats of various cachelib operations util::PercentileStats::Estimates allocateLatencyNs{}; + util::PercentileStats::Estimates bgEvictLatencyNs{}; + util::PercentileStats::Estimates bgPromoteLatencyNs{}; util::PercentileStats::Estimates moveChainedLatencyNs{}; util::PercentileStats::Estimates moveRegularLatencyNs{}; util::PercentileStats::Estimates nvmLookupLatencyNs{}; diff --git a/cachelib/allocator/CacheStatsInternal.h b/cachelib/allocator/CacheStatsInternal.h index ef41ea7bbc..da48df2d8f 100644 --- a/cachelib/allocator/CacheStatsInternal.h +++ b/cachelib/allocator/CacheStatsInternal.h @@ -189,6 +189,8 @@ struct Stats { // latency stats of various cachelib operations mutable util::PercentileStats allocateLatency_; + mutable util::PercentileStats bgEvictLatency_; + mutable util::PercentileStats bgPromoteLatency_; mutable util::PercentileStats moveChainedLatency_; mutable util::PercentileStats moveRegularLatency_; mutable util::PercentileStats nvmLookupLatency_; diff --git a/cachelib/cachebench/cache/Cache-inl.h b/cachelib/cachebench/cache/Cache-inl.h index 23481ad099..f1c5248718 100644 --- a/cachelib/cachebench/cache/Cache-inl.h +++ b/cachelib/cachebench/cache/Cache-inl.h @@ -743,6 +743,8 @@ Stats Cache::getStats() const { static_cast(itemRecords_.count()) - totalDestructor_; ret.cacheAllocateLatencyNs = cacheStats.allocateLatencyNs; + ret.cacheBgEvictLatencyNs = cacheStats.bgEvictLatencyNs; + ret.cacheBgPromoteLatencyNs = cacheStats.bgPromoteLatencyNs; ret.cacheFindLatencyNs = cacheFindLatency_.estimate(); // Populate counters. diff --git a/cachelib/cachebench/cache/CacheStats.h b/cachelib/cachebench/cache/CacheStats.h index eecb89bb96..72d71f4faa 100644 --- a/cachelib/cachebench/cache/CacheStats.h +++ b/cachelib/cachebench/cache/CacheStats.h @@ -91,6 +91,8 @@ struct Stats { uint64_t numNvmItemRemovedSetSize{0}; util::PercentileStats::Estimates cacheAllocateLatencyNs; + util::PercentileStats::Estimates cacheBgEvictLatencyNs; + util::PercentileStats::Estimates cacheBgPromoteLatencyNs; util::PercentileStats::Estimates cacheFindLatencyNs; double nvmReadLatencyMicrosP50{0}; @@ -282,6 +284,8 @@ struct Stats { printLatencies("Cache Find API latency", cacheFindLatencyNs); printLatencies("Cache Allocate API latency", cacheAllocateLatencyNs); + printLatencies("Cache Background Eviction API latency", cacheBgEvictLatencyNs); + printLatencies("Cache Background Promotion API latency", cacheBgPromoteLatencyNs); } } @@ -510,6 +514,8 @@ struct Stats { counters["find_latency_p99"] = cacheFindLatencyNs.p99; counters["alloc_latency_p99"] = cacheAllocateLatencyNs.p99; + counters["bg_evict_latency_p99"] = cacheBgEvictLatencyNs.p99; + counters["bg_promote_latency_p99"] = cacheBgPromoteLatencyNs.p99; counters["ram_hit_rate"] = calcInvertPctFn(numCacheGetMiss, numCacheGets); counters["nvm_hit_rate"] = calcInvertPctFn(numCacheGetMiss, numCacheGets); diff --git a/cachelib/common/PercentileStats.h b/cachelib/common/PercentileStats.h index bdd3255eba..c308671ee9 100644 --- a/cachelib/common/PercentileStats.h +++ b/cachelib/common/PercentileStats.h @@ -107,16 +107,16 @@ class PercentileStats { class LatencyTracker { public: - explicit LatencyTracker(PercentileStats& stats) - : stats_(&stats), begin_(std::chrono::steady_clock::now()) {} + explicit LatencyTracker(PercentileStats& stats, size_t nSamples = 1) + : stats_(&stats), nSamples_(nSamples), begin_(std::chrono::steady_clock::now()) {} LatencyTracker() {} ~LatencyTracker() { - if (stats_) { + if (nSamples_ > 0 && stats_) { auto tp = std::chrono::steady_clock::now(); auto diffNanos = std::chrono::duration_cast(tp - begin_) .count(); - stats_->trackValue(static_cast(diffNanos), tp); + stats_->trackValue(static_cast(diffNanos/nSamples_), tp); } } @@ -124,7 +124,7 @@ class LatencyTracker { LatencyTracker& operator=(const LatencyTracker&) = delete; LatencyTracker(LatencyTracker&& rhs) noexcept - : stats_(rhs.stats_), begin_(rhs.begin_) { + : stats_(rhs.stats_), nSamples_(rhs.nSamples_), begin_(rhs.begin_) { rhs.stats_ = nullptr; } @@ -138,6 +138,7 @@ class LatencyTracker { private: PercentileStats* stats_{nullptr}; + size_t nSamples_{1}; std::chrono::time_point begin_; }; } // namespace util diff --git a/cachelib/external/fbthrift b/cachelib/external/fbthrift index fb3c6ce37a..cbc3de581f 160000 --- a/cachelib/external/fbthrift +++ b/cachelib/external/fbthrift @@ -1 +1 @@ -Subproject commit fb3c6ce37aab5aecbb39c827e0ae84256c64a44b +Subproject commit cbc3de581fdf36ba474b0c135b9e785e504f1c1e diff --git a/cachelib/external/fizz b/cachelib/external/fizz index 5551610370..80ba4b64d1 160000 --- a/cachelib/external/fizz +++ b/cachelib/external/fizz @@ -1 +1 @@ -Subproject commit 555161037025db59658ae5d0277c4c3e1e49817e +Subproject commit 80ba4b64d1138025a3f61e4cd3c826405cd9e8cb diff --git a/cachelib/external/folly b/cachelib/external/folly index 017e426621..ce2b95715d 160000 --- a/cachelib/external/folly +++ b/cachelib/external/folly @@ -1 +1 @@ -Subproject commit 017e42662179411f83eb24c7100b3af7f8a61518 +Subproject commit ce2b95715de229fcb51bd97410469a3ad4d2bfb2 diff --git a/cachelib/external/wangle b/cachelib/external/wangle index 68b1ec08f2..44690e7894 160000 --- a/cachelib/external/wangle +++ b/cachelib/external/wangle @@ -1 +1 @@ -Subproject commit 68b1ec08f23196e0ad1dd2dfbb2308c095caf440 +Subproject commit 44690e7894842a7127245837b69627d4b964aabd diff --git a/docker/images/centos-8streams.Dockerfile b/docker/images/centos-8streams.Dockerfile index b916ab760c..e0c31226a1 100644 --- a/docker/images/centos-8streams.Dockerfile +++ b/docker/images/centos-8streams.Dockerfile @@ -17,6 +17,8 @@ json-c-devel \ perf \ numactl +# updated to fix compile errors and better symbol +# resolving in VTune RUN dnf -y install gcc-toolset-12 RUN echo "source /opt/rh/gcc-toolset-12/enable" >> /etc/bashrc SHELL ["/bin/bash", "--login", "-c"]