-
Notifications
You must be signed in to change notification settings - Fork 30
/
Copy pathprometheus_consul_coordinates.erb
137 lines (124 loc) · 4.88 KB
/
prometheus_consul_coordinates.erb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
# HELP consul_node_rtt_min_seconds Min Round trip with other servers of DC
# TYPE consul_node_rtt_min_seconds gauge
# HELP consul_node_rtt_max_seconds Max Round trip with other servers of DC
# TYPE consul_node_rtt_max_seconds gauge
# HELP consul_node_rtt_sum_seconds Sum in seconds of all RTTs with all other servers
# TYPE consul_node_rtt_sum_seconds gauge
# HELP consul_node_rtt_count Number of nodes
# TYPE consul_node_rtt_count gauge
<%
unless @consul_node_settings
@consul_node_settings = {
:compute_duration => 0,
:initialized => false,
:last_results => {},
:last_update => Time.parse('2019-01-01 00:00:00 +0100'),
:num_cpus => (ENV['CONSUL_COORDINATES_CPUS'] || ENV['MARATHON_APP_RESOURCE_CPUS'] || '-1').to_f.floor,
:percentiles => (ENV['CONSUL_COORDINATES_PERCENTILES'] || '.5,.9,.99,.999').split(',').map {|s| s.to_f},
}
if @consul_node_settings[:num_cpus] < 0
require 'etc'
@consul_node_settings[:num_cpus] = Etc.nprocessors - 1
end
end
@consul_node_settings[:percentiles].each do |pctl|
fname = "consul_rtt_nodes_q#{pctl.to_s.sub('.', '_')}_seconds"
%># HELP <%= fname %> Get the percentile <%= pctl %>
# TYPE <%= fname %> gauge
<%
end
nodes_meta = nodes.map{ |n| [n['Node'], n['Meta']]}.to_h
nodes_coordinates = coordinate.nodes.map { |n| [n['Node'], n] }.to_h
unless @consul_node_settings[:initialized]
@consul_node_settings[:initialized] = true
# compute percentile on sorted values
def percentile(values_sorted, percentile)
k = (percentile*(values_sorted.length-1)+1).floor - 1
f = (percentile*(values_sorted.length-1)+1).modulo(1)
return values_sorted[k] + (f * (values_sorted[k+1] - values_sorted[k]))
end
# build default statistics
def build_stats
{
'_values' => [],
'min_seconds' => 3600,
'max_seconds' => 0,
'sum_seconds' => 0,
'count' => 0,
}
end
# Add node RTT
def add_node_rtt(node_info, rtt_val)
vals = node_info['_values']
idx = vals.bsearch_index { |x| x > rtt_val }
if idx.nil?
vals << rtt_val
else
vals = vals.insert(idx, rtt_val)
end
node_info['_values'] = vals
node_info['min_seconds'] = rtt_val if node_info['min_seconds'] > rtt_val
node_info['max_seconds'] = rtt_val if node_info['max_seconds'] < rtt_val
node_info['sum_seconds'] += rtt_val
node_info['count'] += 1
node_info
end
def compute_node_data(src, nodes_meta, nodes_coordinates)
node_info = {
'results' => build_stats,
}
metas = nodes_meta[src] || {}
metas['src'] = src
node_info['metas'] = metas.select {|k,v| !v.empty?}.map{|k,v| "#{k}=\"#{v}\""}.join(',')
results = node_info['results']
n1 = nodes_coordinates[src]
# Handle coordinates not there yet
return node_info unless n1
nodes_coordinates.each do |c2n, n2|
rtt_val = coordinate.rtt(n1['Coord'], n2['Coord'])
results = add_node_rtt(results, rtt_val)
end
@consul_node_settings[:percentiles].each do |pctl|
results["q#{pctl.to_s.sub('.', '_')}_seconds"] = percentile(results["_values"], pctl)
end unless results["_values"].empty?
node_info
end
def compute_map_keys(nodeCoords, nodes_meta, nodes_coordinates)
node_name = nodeCoords['Node']
[node_name, compute_node_data(node_name, nodes_meta, nodes_coordinates)]
end
def compute_all_results(nodes_meta, nodes_coordinates)
start_now = Time.now.utc
last_updated = @consul_node_settings[:last_update]
# We compute every 30 seconds max
if (start_now - last_updated).round > 30 && !nodes_meta.empty? && !nodes_coordinates.empty?
if @consul_node_settings[:num_cpus] > 1
require 'parallel'
all_nodes = Parallel.map(nodes_coordinates, in_processes: @consul_node_settings[:num_cpus] - 1) {|_, c| compute_map_keys(c, nodes_meta, nodes_coordinates)}
else
all_nodes = nodes_coordinates.map {|_, c| compute_map_keys(c, nodes_meta, nodes_coordinates)}
end
@consul_node_settings[:last_results] = all_nodes
@consul_node_settings[:last_update] = Time.now.utc
@consul_node_settings[:compute_duration] = @consul_node_settings[:last_update] - start_now
end
@consul_node_settings[:last_results]
end
end
all_nodes = compute_all_results(nodes_meta, nodes_coordinates)
%>
# HELP consul_node_rtt_compute_duration Time needed to compute all results in seconds
# TYPE consul_node_rtt_compute_duration gauge
consul_node_rtt_compute_duration <%= @consul_node_settings[:compute_duration] %>
consul_node_rtt_compute_cpus <%= @consul_node_settings[:num_cpus] %>
consul_node_rtt_compute_nodes_meta_count <%= nodes_meta.count %>
consul_node_rtt_compute_nodes_coordinates_count <%= nodes_coordinates.count %>
<%
all_nodes.each do |node, node_info|
node_info['results'].each do |k, v|
next if k.start_with? '_'
%>consul_node_rtt_<%= k %>{<%= node_info['metas'] %>} <%= v.round(5) %>
<%
end
end
%>