Skip to content

Commit

Permalink
add support for uint64 bucket element type for summary. (#858)
Browse files Browse the repository at this point in the history
  • Loading branch information
poor-circle authored Dec 19, 2024
1 parent f3e7189 commit 89f23d4
Show file tree
Hide file tree
Showing 4 changed files with 77 additions and 41 deletions.
10 changes: 6 additions & 4 deletions include/ylt/metric/summary.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ YLT_REFL(json_summary_t, name, help, type, labels_name, quantiles_key, metrics);
class summary_t : public static_metric {
public:
summary_t(std::string name, std::string help, std::vector<double> quantiles,
std::chrono::seconds max_age = std::chrono::seconds{60})
std::chrono::seconds max_age = std::chrono::seconds{0})
: static_metric(MetricType::Summary, std::move(name), std::move(help)),
quantiles_(std::move(quantiles)),
impl_(quantiles_,
Expand Down Expand Up @@ -133,14 +133,16 @@ class summary_t : public static_metric {

private:
std::vector<double> quantiles_;
ylt::metric::detail::summary_impl<> impl_;
ylt::metric::detail::summary_impl<uint64_t> impl_;
};

template <size_t N>
class basic_dynamic_summary
: public dynamic_metric_impl<ylt::metric::detail::summary_impl<>, N> {
: public dynamic_metric_impl<ylt::metric::detail::summary_impl<uint32_t>,
N> {
private:
using Base = dynamic_metric_impl<ylt::metric::detail::summary_impl<>, N>;
using Base =
dynamic_metric_impl<ylt::metric::detail::summary_impl<uint32_t>, N>;

public:
basic_dynamic_summary(
Expand Down
63 changes: 36 additions & 27 deletions include/ylt/metric/summary_impl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,12 +7,15 @@
#include <iterator>
#include <limits>
#include <memory>
#include <type_traits>
#include <vector>

namespace ylt::metric::detail {

template <std::size_t frac_bit = 6>
template <typename uint_type, std::size_t frac_bit = 6>
class summary_impl {
static_assert(sizeof(uint_type) >= 4);
static_assert(std::is_unsigned_v<uint_type>);
constexpr static uint32_t decode_impl(uint16_t float16_value) {
float16_value <<= (8 - frac_bit);
uint32_t sign = float16_value >> 15;
Expand Down Expand Up @@ -57,7 +60,8 @@ class summary_impl {
static constexpr float float16_max = (1ull << 63) * 2.0f; // 2^64

static uint16_t encode(float flt) {
unsigned int& fltInt32 = *(unsigned int*)&flt;
static_assert(sizeof(float) == 4);
uint32_t& fltInt32 = *(uint32_t*)&flt;
if (std::abs(flt) >= float16_max || std::isnan(flt)) {
flt = (fltInt32 & 0x8000'0000) ? (-float16_max) : (float16_max);
}
Expand Down Expand Up @@ -88,9 +92,9 @@ class summary_impl {

struct data_t {
static constexpr size_t piece_size = bucket_size / piece_cnt;
using piece_t = std::array<std::atomic<uint32_t>, piece_size>;
using piece_t = std::array<std::atomic<uint_type>, piece_size>;

std::atomic<uint32_t>& operator[](std::size_t index) {
std::atomic<uint_type>& operator[](std::size_t index) {
piece_t* piece = arr[index / piece_size];
if (piece == nullptr) {
auto ptr = new piece_t{};
Expand Down Expand Up @@ -122,7 +126,7 @@ class summary_impl {
}
template <bool inc_order>
void stat_impl(uint64_t& count,
std::vector<std::pair<int16_t, uint32_t>>& result, int i) {
std::vector<std::pair<int16_t, uint_type>>& result, int i) {
auto piece = arr[i].load(std::memory_order_relaxed);
if (piece) {
if constexpr (inc_order) {
Expand All @@ -146,7 +150,7 @@ class summary_impl {
}
}
void stat(uint64_t& count,
std::vector<std::pair<int16_t, uint32_t>>& result) {
std::vector<std::pair<int16_t, uint_type>>& result) {
for (int i = piece_cnt - 1; i >= piece_cnt / 2; --i) {
stat_impl<false>(count, result, i);
}
Expand Down Expand Up @@ -182,36 +186,38 @@ class summary_impl {
static inline const unsigned long ms_count =
std::chrono::steady_clock::duration{std::chrono::milliseconds{1}}.count();

constexpr static unsigned int near_uint32_max = 4290000000U;
constexpr static uint32_t near_uint32_max = 4290000000U;

void increase(data_t& arr, uint16_t pos) {
if (arr[pos].fetch_add(1, std::memory_order::relaxed) >
near_uint32_max) /*no overflow*/ [[likely]] {
arr[pos].fetch_sub(1, std::memory_order::relaxed);
int upper = (pos < bucket_size / 2) ? (bucket_size / 2) : (bucket_size);
int lower = (pos < bucket_size / 2) ? (0) : (bucket_size / 2);
for (int delta = 1, lim = (std::max)(upper - pos, pos - lower + 1);
delta < lim; ++delta) {
if (pos + delta < upper) {
if (arr[pos + delta].fetch_add(1, std::memory_order::relaxed) <=
near_uint32_max) {
break;
auto res = arr[pos].fetch_add(1, std::memory_order::relaxed);
if constexpr (std::is_same_v<uint_type, uint32_t>) {
if (res > near_uint32_max) /*no overflow*/ [[likely]] {
arr[pos].fetch_sub(1, std::memory_order::relaxed);
int upper = (pos < bucket_size / 2) ? (bucket_size / 2) : (bucket_size);
int lower = (pos < bucket_size / 2) ? (0) : (bucket_size / 2);
for (int delta = 1, lim = (std::max)(upper - pos, pos - lower + 1);
delta < lim; ++delta) {
if (pos + delta < upper) {
if (arr[pos + delta].fetch_add(1, std::memory_order::relaxed) <=
near_uint32_max) {
break;
}
arr[pos + delta].fetch_sub(1, std::memory_order::relaxed);
}
arr[pos + delta].fetch_sub(1, std::memory_order::relaxed);
}
if (pos - delta >= lower) {
if (arr[pos - delta].fetch_add(1, std::memory_order::relaxed) <=
near_uint32_max) {
break;
if (pos - delta >= lower) {
if (arr[pos - delta].fetch_add(1, std::memory_order::relaxed) <=
near_uint32_max) {
break;
}
arr[pos - delta].fetch_sub(1, std::memory_order::relaxed);
}
arr[pos - delta].fetch_sub(1, std::memory_order::relaxed);
}
}
}
}

struct data_copy_t {
std::vector<std::pair<int16_t, uint32_t>> arr[2];
std::vector<std::pair<int16_t, uint_type>> arr[2];
int index[2] = {}, smaller_one;
void init() {
if (arr[0][0] <= arr[1][0]) {
Expand All @@ -231,7 +237,7 @@ class summary_impl {
}
}
int16_t value() { return arr[smaller_one][index[smaller_one]].first; }
uint32_t count() { return arr[smaller_one][index[smaller_one]].second; }
uint_type count() { return arr[smaller_one][index[smaller_one]].second; }
};

public:
Expand Down Expand Up @@ -304,6 +310,9 @@ class summary_impl {
e = 1;
}
auto target_count = std::min<double>(e * count, count);
if (e == 0) {
target_count = std::min(uint64_t{1}, count);
}
while (true) {
if (target_count <= count_now) [[unlikely]] {
result.push_back(v);
Expand Down
24 changes: 21 additions & 3 deletions src/metric/tests/test_metric.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -925,15 +925,15 @@ TEST_CASE("test summary with illegal quantities") {
CHECK(str.find("test_summary_sum") != std::string::npos);
CHECK(str.find("test_summary{quantile=\"") != std::string::npos);
CHECK(result[0] < 0);
CHECK(result[1] < 0);
CHECK(result[1] == 0);
CHECK(result[result.size() - 1] > result[result.size() - 2]);

#ifdef CINATRA_ENABLE_METRIC_JSON
std::string str_json;
summary.serialize_to_json(str_json);
std::cout << str_json << "\n";
std::cout << str_json.size() << std::endl;
CHECK(str_json.size() == 233);
CHECK(str_json.size() == 222);
#endif
}

Expand Down Expand Up @@ -969,7 +969,7 @@ TEST_CASE("test summary with many quantities") {
summary.serialize_to_json(str_json);
std::cout << str_json << "\n";
std::cout << str_json.size() << std::endl;
CHECK(str_json.size() == 8868);
CHECK(str_json.size() == 8857);
#endif
}

Expand Down Expand Up @@ -1998,6 +1998,24 @@ TEST_CASE("test remove label value") {
CHECK(!counter.has_label_value(std::vector<std::string>{}));
}

TEST_CASE("test static summary with 0 and 1 quantiles") {
{
ylt::metric::summary_t s("test", "help", {0, 1});
for (uint64_t i = 0; i < 100ull; ++i) {
s.observe(1);
}
auto result = s.get_rates();
CHECK(result[0] == 1);
CHECK(result[1] == 1);
}
{
ylt::metric::summary_t s("test", "help", {0, 1});
auto result = s.get_rates();
CHECK(result[0] == 0);
CHECK(result[1] == 0);
}
}

DOCTEST_MSVC_SUPPRESS_WARNING_WITH_PUSH(4007)
int main(int argc, char** argv) { return doctest::Context(argc, argv).run(); }
DOCTEST_MSVC_SUPPRESS_WARNING_POP
21 changes: 14 additions & 7 deletions website/docs/zh/metric/metric_introduction.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,13 @@
# metric 介绍
metric 用于统计应用程序的各种指标,这些指标被用于系统见识和警报,常见的指标类型有四种:Counter、Gauge、Histogram和Summary,这些指标遵循[Prometheus](https://hulining.gitbook.io/prometheus/introduction)的数据格式。yalantinglibs提供了一系列高性能且线程安全的统计工具。

metric 包括4种指标类型:
- couter:只会增加的指标;
- gauge:可以增加或减少的指标,它派生于counter;
- histogram:直方图,初始化的时候需要设置桶(bucket);
- summary:分位数指标,初始化的时候需要设置桶和误差;


## Counter 计数器类型
Counter是一个累计类型的数据指标,它代表单调递增的计数器,其值只能在重新启动时增加或重置为 0。例如,您可以使用计数器来表示已响应的请求数,已完成或出错的任务数。

Expand Down Expand Up @@ -71,11 +78,7 @@ prometheus_tsdb_wal_fsync_duration_seconds_count 216
```
# 概述
metric 包括4种指标类型:
- couter:只会增加的指标;
- gauge:可以增加或减少的指标,它派生于counter;
- histogram:直方图,初始化的时候需要设置桶(bucket);
- summary:分位数指标,初始化的时候需要设置桶和误差;
# label
Expand Down Expand Up @@ -453,7 +456,11 @@ std::vector<std::shared_ptr<counter_t>> get_bucket_counts();
// 序列化
void serialize(std::string& str);
```
## 例子
## 例子
```cpp
histogram_t h("test", "help", {5.0, 10.0, 20.0, 50.0, 100.0});
h.observe(23);
Expand Down Expand Up @@ -548,7 +555,7 @@ summary每次写入的数据会被映射到一个14位的浮点数中,其指
### 大量重复数字导致的误差
为节省内存空间,summary内部的每个桶仅能存储一个32位数字,因此,在一个过期时间周期内同一个数字被插入超过2^32次后,为了避免溢出,新的数字会被插入到与该数字临近的桶(相差约1%)中,这可能导致一定误差。
为节省内存空间,dynamic summary内部的每个桶仅能存储一个32位数字,因此,在一个过期时间周期内同一个数字被插入超过2^32次后,为了避免溢出,新的数字会被插入到与该数字临近的桶(相差约1%)中,这可能导致一定误差。非daynamic的summary 不会有这个问题,因为他内部使用的是64位,不可能出现溢出
### 过期时间误差
Expand Down

0 comments on commit 89f23d4

Please sign in to comment.