Skip to content

Commit

Permalink
WIP
Browse files Browse the repository at this point in the history
  • Loading branch information
chrzaszcz committed Jul 10, 2024
1 parent a5407fb commit 0d16732
Show file tree
Hide file tree
Showing 12 changed files with 343 additions and 348 deletions.
58 changes: 20 additions & 38 deletions big_tests/tests/graphql_metric_SUITE.erl
Original file line number Diff line number Diff line change
Expand Up @@ -119,38 +119,23 @@ type_to_keys(<<"counter">>) ->
type_to_keys(<<"spiral">>) ->
[<<"one">>, <<"count">>];
type_to_keys(<<"gauge">>) ->
[<<"value">>];
type_to_keys(<<"merged_inet_stats">>) ->
[<<"connections">>, <<"recv_cnt">>, <<"recv_max">>, <<"recv_oct">>,
<<"send_cnt">>, <<"send_max">>, <<"send_oct">>, <<"send_pend">>];
type_to_keys(<<"rdbms_stats">>) ->
[<<"workers">>, <<"recv_cnt">>, <<"recv_max">>, <<"recv_oct">>,
<<"send_cnt">>, <<"send_max">>, <<"send_oct">>, <<"send_pend">>];
type_to_keys(<<"vm_stats_memory">>) ->
[<<"atom_used">>, <<"binary">>, <<"ets">>,
<<"processes_used">>, <<"system">>, <<"total">>];
type_to_keys(<<"vm_system_info">>) ->
[<<"ets_limit">>, <<"port_count">>, <<"port_limit">>,
<<"process_count">>, <<"process_limit">>];
type_to_keys(<<"probe_queues">>) ->
[<<"fsm">>, <<"regular">>, <<"total">>].
[<<"value">>].

cets_info_keys() ->
[<<"available_nodes">>, <<"unavailable_nodes">>,
<<"remote_nodes_without_disco">>, <<"joined_nodes">>,
<<"remote_nodes_with_unknown_tables">>, <<"remote_unknown_tables">>,
<<"remote_nodes_with_missing_tables">>, <<"remote_missing_tables">>,
<<"conflict_nodes">>, <<"conflict_tables">>,
<<"discovered_nodes">>, <<"discovery_works">>].
<<"remote_nodes_without_disco">>, <<"joined_nodes">>,
<<"remote_nodes_with_unknown_tables">>, <<"remote_unknown_tables">>,
<<"remote_nodes_with_missing_tables">>, <<"remote_missing_tables">>,
<<"conflict_nodes">>, <<"conflict_tables">>,
<<"discovered_nodes">>, <<"discovery_works">>].

get_by_name_global_erlang_metrics(Config) ->
%% Filter by name works
Result = get_metrics([<<"global">>, <<"erlang">>], Config),
Result = get_metrics([<<"global">>, <<"system_info">>], Config),
ParsedResult = get_ok_value([data, metric, getMetrics], Result),
Map = maps:from_list([{Name, X} || X = #{<<"name">> := Name} <- ParsedResult]),
Info = maps:get([<<"global">>, <<"erlang">>, <<"system_info">>], Map),
%% VMSystemInfoMetric type
#{<<"type">> := <<"vm_system_info">>} = Info,
Info = maps:get([<<"global">>, <<"system_info">>, <<"port_count">>], Map),
#{<<"type">> := <<"counter">>} = Info,
check_metric_by_type(Info),
%% Other metrics are filtered out
undef = maps:get(roster_reads_key(), Map, undef).
Expand Down Expand Up @@ -178,30 +163,27 @@ get_metrics_for_specific_host_type(Config) ->
[_|_] = ParsedResult.

get_process_queue_length(Config) ->
Result = get_metrics([<<"global">>, <<"processQueueLengths">>], Config),
Result = get_metrics([<<"global">>, <<"system_process_queue_lengths">>], Config),
ParsedResult = get_ok_value([data, metric, getMetrics], Result),
Map = maps:from_list([{Name, X} || X = #{<<"name">> := Name} <- ParsedResult]),
Lens = maps:get([<<"global">>, <<"processQueueLengths">>], Map),
%% ProbeQueuesMetric type
#{<<"type">> := <<"probe_queues">>} = Lens,
Lens = maps:get([<<"global">>, <<"system_process_queue_lengths">>, <<"total">>], Map),
#{<<"type">> := <<"counter">>} = Lens,
check_metric_by_type(Lens).

get_inet_stats(Config) ->
Result = get_metrics([<<"global">>, <<"data">>, <<"dist">>], Config),
Result = get_metrics([<<"global">>, <<"system_dist_data">>], Config),
ParsedResult = get_ok_value([data, metric, getMetrics], Result),
Map = maps:from_list([{Name, X} || X = #{<<"name">> := Name} <- ParsedResult]),
Stats = maps:get([<<"global">>, <<"data">>, <<"dist">>], Map),
%% MergedInetStatsMetric type
#{<<"type">> := <<"merged_inet_stats">>} = Stats,
Stats = maps:get([<<"global">>, <<"system_dist_data">>, <<"connections">>], Map),
#{<<"type">> := <<"counter">>} = Stats,
check_metric_by_type(Stats).

get_vm_stats_memory(Config) ->
Result = get_metrics([<<"global">>], Config),
ParsedResult = get_ok_value([data, metric, getMetrics], Result),
Map = maps:from_list([{Name, X} || X = #{<<"name">> := Name} <- ParsedResult]),
Mem = maps:get([<<"global">>, <<"erlang">>, <<"memory">>], Map),
%% VMStatsMemoryMetric type
#{<<"type">> := <<"vm_stats_memory">>} = Mem,
Mem = maps:get([<<"global">>, <<"system_memory">>, <<"total">>], Map),
#{<<"type">> := <<"counter">>} = Mem,
check_metric_by_type(Mem).

get_cets_system(Config) ->
Expand Down Expand Up @@ -265,17 +247,17 @@ get_metrics_as_dicts_empty_args(Config) ->
[#{<<"key">> := <<"median">>, <<"value">> := Median}] = maps:get(RecvName, Map),
?assert(is_integer(Median)),
%% Empty keys
Result2 = get_metrics_as_dicts([<<"global">>, <<"erlang">>], [], Config),
Result2 = get_metrics_as_dicts([<<"global">>, <<"system_info">>], [], Config),
ParsedResult2 = get_ok_value([data, metric, getMetricsAsDicts], Result2),
?assertEqual(length(ParsedResult2), 2).
?assertEqual(6, length(ParsedResult2)).

get_metrics_as_dicts_empty_strings(Config) ->
%% Name is an empty string
Result = get_metrics_as_dicts([<<>>], [<<"median">>], Config),
ParsedResult = get_ok_value([data, metric, getMetricsAsDicts], Result),
[] = ParsedResult,
%% Key is an empty string
Result2 = get_metrics_as_dicts([<<"global">>, <<"erlang">>], [<<>>], Config),
Result2 = get_metrics_as_dicts([<<"global">>, <<"system_info">>], [<<>>], Config),
ParsedResult2 = get_ok_value([data, metric, getMetricsAsDicts], Result2),
[_|_] = ParsedResult2.

Expand Down
184 changes: 184 additions & 0 deletions big_tests/tests/instrument_SUITE.erl
Original file line number Diff line number Diff line change
@@ -0,0 +1,184 @@
%%% @doc Instrumentation tests that don't fit into the XMPP-specific test suites

-module(instrument_SUITE).
-include_lib("eunit/include/eunit.hrl").

-compile([export_all, nowarn_export_all]).

-import(distributed_helper, [mim/0, require_rpc_nodes/1, rpc/4]).
-import(domain_helper, [host_type/1]).

all() ->
[{group, cets},
{group, system}].

groups() ->
[{cets, [], cets_cases()},
{system, [parallel], system_cases()}].

cets_cases() ->
[cets_info].

system_cases() ->
[system_up_time,
system_tcp_ports,
system_process_queue_lengths,
system_info,
system_memory,
system_dist_data].

init_per_suite(Config) ->
mongoose_helper:inject_module(?MODULE),
Config1 = mongoose_helper:backup_and_set_config_option(
Config, [instrumentation, probe_interval], 1),
restart_probes(),
require_rpc_nodes([mim]) ++ Config1.

end_per_suite(Config) ->
mongoose_helper:restore_config_option(Config, [instrumentation, probe_interval]),
restart_probes().

init_per_group(cets, Config) ->
case rpc(mim(), mongoose_config, lookup_opt, [[internal_databases, cets]]) of
{error, not_found} ->
{skip, "CETS is not configured"};
{ok, _} ->
instrument_helper:start([{cets_info, #{}}]),
Config
end;
init_per_group(system, Config) ->
instrument_helper:start(instrument_helper:declared_events(mongoose_system_probe, [])),
Config.

end_per_group(_, _Config) ->
instrument_helper:stop().

init_per_testcase(_, Config) ->
Config.

cets_info(_Config) ->
instrument_helper:wait_and_assert_new(cets_info, #{}, fun check_cets_info/1).

%% Check that values are integers and there are no unknown fields
check_cets_info(Res) ->
lists:all(fun(Name) -> is_integer(maps:get(Name, Res)) end, cets_metrics_names())
andalso #{} =:= maps:without(cets_metrics_names(), Res).

cets_metrics_names() ->
[available_nodes,
unavailable_nodes,
joined_nodes,
discovered_nodes,
discovery_works,
remote_nodes_without_disco,
remote_nodes_with_unknown_tables,
remote_unknown_tables,
remote_nodes_with_missing_tables,
remote_missing_tables,
conflict_nodes,
conflict_tables].

system_up_time(_Config) ->
#{seconds := UpTime} = rpc(mim(), mongoose_system_probe, probe, [system_up_time, #{}]),
?assert(UpTime > 0),

% UpTime should be increasing
instrument_helper:wait_and_assert(system_up_time, #{},
fun(#{seconds := NewUpTime}) -> NewUpTime > UpTime end).

system_tcp_ports(_Config) ->
% A system without any users should have less than 100 TCP ports open
instrument_helper:wait_and_assert(
system_tcp_ports, #{},
fun(#{count := Count}) -> Count > 0 andalso Count < 100 end),

% Open additional ports, and check the increased value
Pid = rpc(mim(), ?MODULE, spawn_process_with_ports, []),
instrument_helper:wait_and_assert_new(
system_tcp_ports, #{},
fun(#{count := Count}) -> Count >= 1000 end),

% Close additional ports, and wait for the value to decrease
Pid ! stop,
instrument_helper:wait_and_assert(
system_tcp_ports, #{},
fun(#{count := Count}) -> Count > 0 andalso Count < 100 end).

system_process_queue_lengths(_Config) ->
% A system without any users shouldn't have more than 1000 messages accumulated
instrument_helper:wait_and_assert(
system_process_queue_lengths, #{},
fun(#{total := Total}) -> Total >= 0 andalso Total < 1000 end),

% Pid will accumulate more messages
Pid = rpc(mim(), ?MODULE, spawn_process_with_queue, []),
instrument_helper:wait_and_assert_new(
system_process_queue_lengths, #{},
fun(#{total := Total}) -> Total >= 10000 end),

% Stop Pid, and wait for the value to decrease
Pid ! stop,
instrument_helper:wait_and_assert_new(
system_process_queue_lengths, #{},
fun(#{total := Total}) -> Total >= 0 andalso Total < 1000 end).

system_info(_Config) ->
instrument_helper:wait_and_assert_new(system_info, #{}, fun check_system_info/1).

%% There should be at least one process, port and ETS, and limits shouldn't be reached.
check_system_info(#{port_count := PortCount, port_limit := PortLimit,
process_count := ProcessCount, process_limit := ProcessLimit,
ets_count := ETSCount, ets_limit := ETSLimit})
when PortCount > 0, PortLimit > PortCount,
ProcessCount > 0, ProcessLimit > ProcessCount,
ETSCount > 0, ETSLimit > ETSCount ->
true.

system_memory(_Config) ->
instrument_helper:wait_and_assert_new(system_memory, #{}, fun check_system_memory/1).

%% There should be all types of memory consumption, with the constraints explained
%% in the docs for erlang:memory/0.
check_system_memory(
#{total := Total, atom := Atom, atom_used := AtomUsed, binary := Binary, code := Code, ets := ETS,
processes := Processes, processes_used := ProcessesUsed, system := System})
when Total =:= Processes + System, Atom >= AtomUsed, AtomUsed > 0, Binary > 0, Code > 0, ETS > 0,
Processes >= ProcessesUsed, System >= Atom + Binary + ETS ->
true.

system_dist_data(_Config) ->
instrument_helper:wait_and_assert_new(system_dist_data, #{}, fun check_system_dist_data/1).

%% There should be data sent and received already, and at least one connection (RPC)
check_system_dist_data(
#{connections := Connections, recv_oct := RecvOct, recv_cnt := RecvCnt, recv_max := RecvMax,
send_oct := SendOct, send_cnt := SendCnt, send_max := SendMax, send_pend := SendPend})
when Connections >= 1, RecvOct > RecvCnt, RecvCnt > 0, RecvMax > 0,
SendOct > SendCnt, SendCnt > 0, SendMax > 0, SendPend >= 0 ->
true.

restart_probes() ->
lists:foreach(fun restart/1, [mongoose_instrument_probe_cets, mongoose_system_probe]).

restart(Module) ->
rpc(mim(), Module, stop, []),
rpc(mim(), Module, start, []).

%% Functions injected to MIM

spawn_process_with_queue() ->
spawn(fun accumulate_messages/0).

accumulate_messages() ->
Self = self(),
[Self ! {msg, I} || I <- lists:seq(1, 10000)],
receive stop -> ok end.

spawn_process_with_ports() ->
spawn(fun open_ports/0).

open_ports() ->
Results = [gen_tcp:listen(0, []) || _ <- lists:seq(1, 1000)],
Ports = lists:map(fun({ok, Port}) -> Port end, Results),
receive stop -> ok end,
lists:foreach(fun gen_tcp:close/1, Ports).
69 changes: 0 additions & 69 deletions big_tests/tests/instrument_cets_SUITE.erl

This file was deleted.

6 changes: 2 additions & 4 deletions big_tests/tests/metrics_api_SUITE.erl
Original file line number Diff line number Diff line change
Expand Up @@ -241,10 +241,8 @@ session_counters(Config) ->
end).

node_uptime(Config) ->
X = fetch_global_incrementing_gauge_value(nodeUpTime, Config),
timer:sleep(timer:seconds(1)),
Y = fetch_global_incrementing_gauge_value(nodeUpTime, Config),
?assertEqual(true, Y > X, [{counter, nodeUpTime}, {first, X}, {second, Y}]).
UpTime = fetch_global_incrementing_gauge_value('system_up_time.seconds', Config),
?assert(UpTime > 0).

cluster_size(Config) ->
SingleNodeClusterState =
Expand Down
Loading

0 comments on commit 0d16732

Please sign in to comment.