From 0f3c7d38c4115ff689f9e43f28d10dd4362a8dec Mon Sep 17 00:00:00 2001 From: Feng Li Date: Mon, 20 Mar 2023 15:24:36 -0700 Subject: [PATCH] add sorted data to Historgram --- Common/Histogram.h | 146 +++++++++++++++++++++++++++++++-------------- 1 file changed, 102 insertions(+), 44 deletions(-) diff --git a/Common/Histogram.h b/Common/Histogram.h index 8d15ad9..64fd5fb 100644 --- a/Common/Histogram.h +++ b/Common/Histogram.h @@ -44,27 +44,33 @@ SOFTWARE. template class Histogram { - private: +private: unsigned _samples; #define USE_HASH_TABLE #ifdef USE_HASH_TABLE std::unordered_map _data; + std::map _sorteddata; - std::map _GetSortedData() const + std::map _GetSortedData() { - return std::map(_data.begin(), _data.end()); + if (_sorteddata.empty()) + { + _sorteddata = std::map(_data.begin(), _data.end()); + _data.clear(); + } + return _sorteddata; } #else std::map _data; std::map _GetSortedData() const { - return _data; + return _data; } #endif - public: +public: Histogram() : _samples(0) @@ -77,7 +83,7 @@ class Histogram } void Add(T v) - { + { _data[ v ]++; _samples++; } @@ -93,17 +99,27 @@ class Histogram } T GetMin() const - { + { T min(std::numeric_limits::max()); - for (auto i : _data) + if (_sorteddata.empty()) { - if (i.first < min) + for (auto i : _data) { - min = i.first; + if (i.first < min) + { + min = i.first; + } + } + } + else + { + auto i = _sorteddata.begin(); + if (i->first < min) + { + min = i->first; } } - return min; } @@ -111,23 +127,34 @@ class Histogram { T max(std::numeric_limits::min()); - for (auto i : _data) + if (_sorteddata.empty()) + { + for (auto i : _data) + { + if (i.first > max) + { + max = i.first; + } + } + } + else { - if (i.first > max) + auto i = _sorteddata.rbegin(); + if (i->first > max) { - max = i.first; + max = i->first; } } return max; } - unsigned GetSampleSize() const + unsigned GetSampleSize() const { return _samples; } - - T GetPercentile(double p) const + + T GetPercentile(double p) const { // ISSUE-REVIEW // What do the 0th and 100th percentile really mean? @@ -139,7 +166,8 @@ class Histogram const double target = GetSampleSize() * p; unsigned cur = 0; - for (auto i : _GetSortedData()) + + for (auto i : const_cast(this)->_GetSortedData()) { cur += i.second; if (cur >= target) @@ -152,51 +180,81 @@ class Histogram // we don't want to throw an exception and crash return 0; } - - T GetPercentile(int p) const + + T GetPercentile(int p) const { - return GetPercentile(static_cast(p)/100); + return GetPercentile(static_cast(p) / 100); } - T GetMedian() const - { - return GetPercentile(0.5); + T GetMedian() const + { + return GetPercentile(0.5); } double GetStdDev() const { return GetStandardDeviation(); } double GetAvg() const { return GetMean(); } - double GetMean() const - { + double GetMean() const + { double sum(0); unsigned samples = GetSampleSize(); - for (auto i : _data) + if (_sorteddata.empty()) { - double bucket_val = - static_cast(i.first) * i.second / samples; - - if (sum + bucket_val < 0) + for (auto i : _data) { - throw std::overflow_error("while trying to accumulate sum"); + double bucket_val = + static_cast(i.first) * i.second / samples; + + if (sum + bucket_val < 0) + { + throw std::overflow_error("while trying to accumulate sum"); + } + + sum += bucket_val; } + } + else + { + for (auto i : _sorteddata) + { + double bucket_val = + static_cast(i.first) * i.second / samples; + + if (sum + bucket_val < 0) + { + throw std::overflow_error("while trying to accumulate sum"); + } - sum += bucket_val; + sum += bucket_val; + } } return sum; } double GetStandardDeviation() const - { + { double mean(GetMean()); double ssd(0); - for (auto i : _data) + if (_sorteddata.empty()) + { + for (auto i : _data) + { + double dev = static_cast(i.first) - mean; + double sqdev = dev * dev; + ssd += i.second * sqdev; + } + } + else { - double dev = static_cast(i.first) - mean; - double sqdev = dev*dev; - ssd += i.second * sqdev; + for (auto i : _sorteddata) + { + double dev = static_cast(i.first) - mean; + double sqdev = dev * dev; + ssd += i.second * sqdev; + } } return sqrt(ssd / GetSampleSize()); @@ -219,9 +277,9 @@ class Histogram std::ostringstream os; os.precision(std::numeric_limits::digits10); - std::map sortedData = _GetSortedData(); + std::map sortedData = const_cast(this)->_GetSortedData(); - auto pos = sortedData.begin(); + auto pos = sortedData.begin(); unsigned cumulative = 0; @@ -230,8 +288,8 @@ class Histogram unsigned count = 0; limit += binSize; - while (pos != sortedData.end() && - (pos->first < limit || bin == bins)) + while (pos != sortedData.end() && + (pos->first < limit || bin == bins)) { count += pos->second; ++pos; @@ -250,7 +308,7 @@ class Histogram std::ostringstream os; os.precision(std::numeric_limits::digits10); - for (auto i : _GetSortedData()) + for (auto i : const_cast(this)->_GetSortedData();) { os << i.first << "," << i.second << std::endl; } @@ -262,7 +320,7 @@ class Histogram { std::ostringstream os; - for (auto i : _GetSortedData()) + for (auto i : const_cast(this)->_GetSortedData();) { os << i.second << " " << i.first << std::endl; }