Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add sorted data to Histogram #197

Merged
merged 1 commit into from
Mar 23, 2023
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
146 changes: 102 additions & 44 deletions Common/Histogram.h
Original file line number Diff line number Diff line change
Expand Up @@ -44,27 +44,33 @@ SOFTWARE.
template<typename T>
class Histogram
{
private:
private:

unsigned _samples;

#define USE_HASH_TABLE
#ifdef USE_HASH_TABLE
std::unordered_map<T,unsigned> _data;
std::map<T,unsigned> _sorteddata;

std::map<T,unsigned> _GetSortedData() const
std::map<T,unsigned> _GetSortedData()
{
return std::map<T,unsigned>(_data.begin(), _data.end());
if (_sorteddata.empty())
{
_sorteddata = std::map<T, unsigned>(_data.begin(), _data.end());
_data.clear();
}
return _sorteddata;
}
#else
std::map<T,unsigned> _data;

std::map<T,unsigned> _GetSortedData() const
{
return _data;
return _data;
}
#endif
public:
public:

Histogram()
: _samples(0)
Expand All @@ -77,7 +83,7 @@ class Histogram
}

void Add(T v)
{
{
_data[ v ]++;
_samples++;
}
Expand All @@ -93,41 +99,62 @@ class Histogram
}

T GetMin() const
{
{
T min(std::numeric_limits<T>::max());

for (auto i : _data)
if (_sorteddata.empty())
{
if (i.first < min)
for (auto i : _data)
{
min = i.first;
if (i.first < min)
{
min = i.first;
}
}
}
else
{
auto i = _sorteddata.begin();
if (i->first < min)
{
min = i->first;
}
}

return min;
}

T GetMax() const
{
T max(std::numeric_limits<T>::min());

for (auto i : _data)
if (_sorteddata.empty())
{
for (auto i : _data)
{
if (i.first > max)
{
max = i.first;
}
}
}
else
{
if (i.first > max)
auto i = _sorteddata.rbegin();
if (i->first > max)
{
max = i.first;
max = i->first;
}
}

return max;
}

unsigned GetSampleSize() const
unsigned GetSampleSize() const
{
return _samples;
}
T GetPercentile(double p) const

T GetPercentile(double p) const
{
// ISSUE-REVIEW
// What do the 0th and 100th percentile really mean?
Expand All @@ -139,7 +166,8 @@ class Histogram
const double target = GetSampleSize() * p;

unsigned cur = 0;
for (auto i : _GetSortedData())

for (auto i : const_cast<Histogram*>(this)->_GetSortedData())
{
cur += i.second;
if (cur >= target)
Expand All @@ -152,51 +180,81 @@ class Histogram
// we don't want to throw an exception and crash
return 0;
}
T GetPercentile(int p) const

T GetPercentile(int p) const
{
return GetPercentile(static_cast<double>(p)/100);
return GetPercentile(static_cast<double>(p) / 100);
}

T GetMedian() const
{
return GetPercentile(0.5);
T GetMedian() const
{
return GetPercentile(0.5);
}

double GetStdDev() const { return GetStandardDeviation(); }
double GetAvg() const { return GetMean(); }

double GetMean() const
{
double GetMean() const
{
double sum(0);
unsigned samples = GetSampleSize();

for (auto i : _data)
if (_sorteddata.empty())
{
double bucket_val =
static_cast<double>(i.first) * i.second / samples;

if (sum + bucket_val < 0)
for (auto i : _data)
{
throw std::overflow_error("while trying to accumulate sum");
double bucket_val =
static_cast<double>(i.first) * i.second / samples;

if (sum + bucket_val < 0)
{
throw std::overflow_error("while trying to accumulate sum");
}

sum += bucket_val;
}
}
else
{
for (auto i : _sorteddata)
{
double bucket_val =
static_cast<double>(i.first) * i.second / samples;

if (sum + bucket_val < 0)
{
throw std::overflow_error("while trying to accumulate sum");
}

sum += bucket_val;
sum += bucket_val;
}
}

return sum;
}

double GetStandardDeviation() const
{
{
double mean(GetMean());
double ssd(0);

for (auto i : _data)
if (_sorteddata.empty())
{
for (auto i : _data)
{
double dev = static_cast<double>(i.first) - mean;
double sqdev = dev * dev;
ssd += i.second * sqdev;
}
}
else
{
double dev = static_cast<double>(i.first) - mean;
double sqdev = dev*dev;
ssd += i.second * sqdev;
for (auto i : _sorteddata)
{
double dev = static_cast<double>(i.first) - mean;
double sqdev = dev * dev;
ssd += i.second * sqdev;
}
}

return sqrt(ssd / GetSampleSize());
Expand All @@ -219,9 +277,9 @@ class Histogram
std::ostringstream os;
os.precision(std::numeric_limits<T>::digits10);

std::map<T,unsigned> sortedData = _GetSortedData();
std::map<T, unsigned> sortedData = const_cast<Histogram*>(this)->_GetSortedData();

auto pos = sortedData.begin();
auto pos = sortedData.begin();

unsigned cumulative = 0;

Expand All @@ -230,8 +288,8 @@ class Histogram
unsigned count = 0;
limit += binSize;

while (pos != sortedData.end() &&
(pos->first < limit || bin == bins))
while (pos != sortedData.end() &&
(pos->first < limit || bin == bins))
{
count += pos->second;
++pos;
Expand All @@ -250,7 +308,7 @@ class Histogram
std::ostringstream os;
os.precision(std::numeric_limits<T>::digits10);

for (auto i : _GetSortedData())
for (auto i : const_cast<Histogram*>(this)->_GetSortedData();)
{
os << i.first << "," << i.second << std::endl;
}
Expand All @@ -262,7 +320,7 @@ class Histogram
{
std::ostringstream os;

for (auto i : _GetSortedData())
for (auto i : const_cast<Histogram*>(this)->_GetSortedData();)
{
os << i.second << " " << i.first << std::endl;
}
Expand Down