Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add status to cets api #4116

Merged
merged 25 commits into from
Sep 20, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
e17c2d1
Rename systemInfo to tableInfo
arcusfelis Sep 4, 2023
4e76f31
Add unavailableNodesCount metric
arcusfelis Sep 4, 2023
b7dd667
Add more fields into CETS systemInfo
arcusfelis Sep 4, 2023
cb08b30
Add more tests for graphql node lists in CETS
arcusfelis Sep 5, 2023
3cd913f
Change the way we get available_nodes list
arcusfelis Sep 5, 2023
1d4b078
Add discoveryWorks
arcusfelis Sep 7, 2023
9148912
Add metrics for CETS
arcusfelis Sep 7, 2023
5d61d42
Properly skip graphql_cets_SUITE
arcusfelis Sep 7, 2023
62597d9
Better output of commands
arcusfelis Sep 7, 2023
0f2d801
Add partiallyJoinedTables metric
arcusfelis Sep 7, 2023
2dbc41b
Fix xref
arcusfelis Sep 7, 2023
01efb19
Propery calculate partially_joined_tables
arcusfelis Sep 8, 2023
92f3632
Refactor the way we select partial nodes
arcusfelis Sep 8, 2023
dd8237e
Use cets_status for metrics
arcusfelis Sep 12, 2023
a584ea3
Reorder metrics
arcusfelis Sep 12, 2023
974c6a7
Expose node lists over graphql API
arcusfelis Sep 12, 2023
33f3dbd
Add wait_for_ready into graphql_cets_SUITE
arcusfelis Sep 12, 2023
60d5b9e
Stop cets server on mod_muc stop
arcusfelis Sep 12, 2023
7d23edc
Update cets dep
arcusfelis Sep 14, 2023
8f75a6a
Use main branch of cets
arcusfelis Sep 14, 2023
a9693d5
Erase badnode before/after graphql_cets_SUITE
arcusfelis Sep 19, 2023
6c846b2
Remove Only for global admin comment from the cets.gql schema
arcusfelis Sep 19, 2023
d1ba890
Fix comments for graphql schema
arcusfelis Sep 19, 2023
db8d831
Create map at once in mongoose_metrics_probe_cets
arcusfelis Sep 19, 2023
28a4ee0
Update priv/graphql/schemas/admin/metric.gql
arcusfelis Sep 19, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
192 changes: 177 additions & 15 deletions big_tests/tests/graphql_cets_SUITE.erl
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@

-compile([export_all, nowarn_export_all]).

-import(distributed_helper, [mim/0]).
-import(distributed_helper, [mim/0, mim2/0, rpc/4]).
-import(domain_helper, [host_type/1]).
-import(mongooseimctl_helper, [rpc_call/3]).
-import(graphql_helper, [execute_command/4, get_unauthorized/1, get_ok_value/2]).
Expand All @@ -14,21 +14,43 @@ all() ->
{group, domain_admin_cets}].

groups() ->
[{admin_cets_http, [sequence], admin_cets_tests()},
{admin_cets_cli, [sequence], admin_cets_tests()},
[{admin_cets_http, [parallel], admin_cets_tests()},
{admin_cets_cli, [parallel], admin_cets_tests()},
{domain_admin_cets, [], domain_admin_tests()}].

admin_cets_tests() ->
[has_sm_table_in_info].
[has_sm_table_in_info,
available_nodes,
unavailable_nodes,
joined_nodes,
discovered_nodes,
remote_nodes_without_disco,
remote_nodes_with_unknown_tables,
remote_unknown_tables,
remote_nodes_with_missing_tables,
remote_missing_tables,
conflict_nodes,
conflict_tables,
discovery_works].

domain_admin_tests() ->
[domain_admin_get_info_test].
[domain_admin_get_table_info_test,
domain_admin_get_system_info_test].

init_per_suite(Config) ->
Config1 = escalus:init_per_suite(Config),
ejabberd_node_utils:init(mim(), Config1).
case rpc_call(mongoose_config, get_opt, [[internal_databases, cets, backend], undefined]) of
rdbms ->
Config1 = escalus:init_per_suite(Config),
Config2 = ejabberd_node_utils:init(mim(), Config1),
add_bad_node(),
ok = rpc_call(cets_discovery, wait_for_ready, [mongoose_cets_discovery, 5000]),
Config2 ++ distributed_helper:require_rpc_nodes([mim, mim2]);
_ ->
{skip, "CETS is not configured with RDBMS"}
end.

end_per_suite(Config) ->
ensure_bad_node_unregistered(),
escalus:end_per_suite(Config).

init_per_group(admin_cets_http, Config) ->
Expand All @@ -55,23 +77,163 @@ init_per_testcase(_, Config) ->
% Admin tests

has_sm_table_in_info(Config) ->
Res = get_info(Config),
Tables = get_ok_value([data, cets, systemInfo], Res),
Res = get_table_info(Config),
Tables = get_ok_value([data, cets, tableInfo], Res),
[T] = [T || T = #{<<"tableName">> := <<"cets_session">>} <- Tables],
#{<<"memory">> := Mem, <<"nodes">> := Nodes, <<"size">> := Size} = T,
true = is_integer(Mem),
true = is_integer(Size),
?assert(is_integer(Mem), T),
?assert(is_integer(Size), T),
#{node := Node1} = mim(),
lists:member(Node1, Nodes).
assert_member(atom_to_binary(Node1), Nodes).

available_nodes(Config) ->
#{node := Node1} = mim(),
#{node := Node2} = mim2(),
Res = get_system_info(Config),
Info = get_ok_value([data, cets, systemInfo], Res),
#{<<"availableNodes">> := Nodes} = Info,
assert_member(atom_to_binary(Node1), Nodes),
assert_member(atom_to_binary(Node2), Nodes),
assert_not_member(<<"badnode@localhost">>, Nodes).

unavailable_nodes(Config) ->
#{node := Node1} = mim(),
#{node := Node2} = mim2(),
Res = get_system_info(Config),
Info = get_ok_value([data, cets, systemInfo], Res),
#{<<"unavailableNodes">> := Nodes} = Info,
assert_member(<<"badnode@localhost">>, Nodes),
assert_not_member(atom_to_binary(Node1), Nodes),
assert_not_member(atom_to_binary(Node2), Nodes).

joined_nodes(Config) ->
#{node := Node1} = mim(),
#{node := Node2} = mim2(),
Res = get_system_info(Config),
Info = get_ok_value([data, cets, systemInfo], Res),
#{<<"joinedNodes">> := Nodes} = Info,
assert_member(atom_to_binary(Node1), Nodes),
assert_member(atom_to_binary(Node2), Nodes),
assert_not_member(<<"badnode@localhost">>, Nodes).

remote_nodes_without_disco(Config) ->
Res = get_system_info(Config),
Info = get_ok_value([data, cets, systemInfo], Res),
?assert(is_list(maps:get(<<"remoteNodesWithoutDisco">>, Info)), Info).

remote_nodes_with_unknown_tables(Config) ->
Res = get_system_info(Config),
Info = get_ok_value([data, cets, systemInfo], Res),
?assert(is_list(maps:get(<<"remoteNodesWithUnknownTables">>, Info)), Info).

remote_unknown_tables(Config) ->
Res = get_system_info(Config),
Info = get_ok_value([data, cets, systemInfo], Res),
?assert(is_list(maps:get(<<"remoteUnknownTables">>, Info)), Info).

remote_nodes_with_missing_tables(Config) ->
Res = get_system_info(Config),
Info = get_ok_value([data, cets, systemInfo], Res),
?assert(is_list(maps:get(<<"remoteNodesWithMissingTables">>, Info)), Info).

remote_missing_tables(Config) ->
Res = get_system_info(Config),
Info = get_ok_value([data, cets, systemInfo], Res),
?assert(is_list(maps:get(<<"remoteMissingTables">>, Info)), Info).

conflict_nodes(Config) ->
Res = get_system_info(Config),
Info = get_ok_value([data, cets, systemInfo], Res),
?assertMatch(#{<<"conflictNodes">> := []}, Info).

conflict_tables(Config) ->
Res = get_system_info(Config),
Info = get_ok_value([data, cets, systemInfo], Res),
?assertMatch(#{<<"conflictTables">> := []}, Info).

conflict_nodes_count(Config) ->
Res = get_system_info(Config),
Info = get_ok_value([data, cets, systemInfo], Res),
?assertMatch(#{<<"conflictNodesCount">> := 0}, Info).

discovered_nodes(Config) ->
#{node := Node1} = mim(),
#{node := Node2} = mim2(),
Res = get_system_info(Config),
Info = get_ok_value([data, cets, systemInfo], Res),
#{<<"discoveredNodes">> := Nodes} = Info,
assert_member(atom_to_binary(Node1), Nodes),
assert_member(atom_to_binary(Node2), Nodes),
assert_member(<<"badnode@localhost">>, Nodes).

discovered_nodes_count(Config) ->
Res = get_system_info(Config),
Info = get_ok_value([data, cets, systemInfo], Res),
#{<<"discoveredNodesCount">> := Count} = Info,
?assert(is_integer(Count), Info),
?assert(Count > 2, Info).

discovery_works(Config) ->
Res = get_system_info(Config),
Info = get_ok_value([data, cets, systemInfo], Res),
?assertMatch(#{<<"discoveryWorks">> := true}, Info).

% Domain admin tests

domain_admin_get_info_test(Config) ->
get_unauthorized(get_info(Config)).
domain_admin_get_table_info_test(Config) ->
get_unauthorized(get_table_info(Config)).

domain_admin_get_system_info_test(Config) ->
get_unauthorized(get_system_info(Config)).

%--------------------------------------------------------------------------------------------------
% Helpers
%--------------------------------------------------------------------------------------------------

get_info(Config) ->
get_table_info(Config) ->
execute_command(<<"cets">>, <<"tableInfo">>, #{}, Config).

get_system_info(Config) ->
execute_command(<<"cets">>, <<"systemInfo">>, #{}, Config).

add_bad_node() ->
ensure_bad_node_unregistered(),
register_bad_node(),
force_check(),
wait_for_has_bad_node().

register_bad_node() ->
ClusterName = <<"mim">>,
Node = <<"badnode@localhost">>,
Num = 100,
Timestamp = rpc(mim(), mongoose_rdbms_timestamp, select, []),
InsertArgs = [ClusterName, Node, Num, Timestamp],
{updated, 1} = rpc(mim(), mongoose_rdbms, execute, [global, cets_disco_insert_new, InsertArgs]).

ensure_bad_node_unregistered() ->
ClusterName = <<"mim">>,
Node = <<"badnode@localhost">>,
DeleteArgs = [ClusterName, Node],
%% Ensure the node is removed
{updated, _} = rpc(mim(), mongoose_rdbms, execute, [global, cets_delete_node_from_db, DeleteArgs]).

force_check() ->
Pid = rpc(mim(), erlang, whereis, [mongoose_cets_discovery]),
true = is_pid(Pid),
Pid ! check.

has_bad_node() ->
#{unavailable_nodes := UnNodes} =
rpc(mim(), cets_discovery, system_info, [mongoose_cets_discovery]),
lists:member('badnode@localhost', UnNodes).

wait_for_has_bad_node() ->
mongoose_helper:wait_until(fun() -> has_bad_node() end, true).

assert_member(Elem, List) ->
lists:member(Elem, List)
orelse ct:fail({assert_member_failed, Elem, List}).

assert_not_member(Elem, List) ->
lists:member(Elem, List)
andalso ct:fail({assert_member_failed, Elem, List}).
31 changes: 30 additions & 1 deletion big_tests/tests/graphql_metric_SUITE.erl
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ metrics_tests() ->
get_process_queue_length,
get_inet_stats,
get_vm_stats_memory,
get_cets_system,
get_all_metrics_as_dicts,
get_by_name_metrics_as_dicts,
get_metrics_as_dicts_by_nonexistent_name,
Expand Down Expand Up @@ -78,6 +79,13 @@ init_per_group(domain_admin_metrics, Config) ->
end_per_group(_GroupName, _Config) ->
graphql_helper:clean().

init_per_testcase(get_cets_system = CaseName, Config) ->
case is_cets_enabled() of
true ->
escalus:init_per_testcase(CaseName, Config);
false ->
{skip, cets_not_enabled}
end;
init_per_testcase(CaseName, Config) ->
escalus:init_per_testcase(CaseName, Config).

Expand Down Expand Up @@ -125,7 +133,14 @@ type_to_keys(<<"vm_system_info">>) ->
[<<"ets_limit">>, <<"port_count">>, <<"port_limit">>,
<<"process_count">>, <<"process_limit">>];
type_to_keys(<<"probe_queues">>) ->
[<<"fsm">>, <<"regular">>, <<"total">>].
[<<"fsm">>, <<"regular">>, <<"total">>];
type_to_keys(<<"cets_system">>) ->
[<<"available_nodes">>, <<"unavailable_nodes">>,
<<"remote_nodes_without_disco">>, <<"joined_nodes">>,
<<"remote_nodes_with_unknown_tables">>, <<"remote_unknown_tables">>,
<<"remote_nodes_with_missing_tables">>, <<"remote_missing_tables">>,
<<"conflict_nodes">>, <<"conflict_tables">>,
<<"discovered_nodes">>, <<"discovery_works">>].

get_by_name_global_erlang_metrics(Config) ->
%% Filter by name works
Expand Down Expand Up @@ -189,6 +204,12 @@ get_vm_stats_memory(Config) ->
#{<<"type">> := <<"vm_stats_memory">>} = Mem,
check_metric_by_type(Mem).

get_cets_system(Config) ->
Result = get_metrics([<<"global">>, <<"cets">>, <<"system">>], Config),
ParsedResult = get_ok_value([data, metric, getMetrics], Result),
[#{<<"type">> := <<"cets_system">>} = Sys] = ParsedResult,
check_metric_by_type(Sys).

get_all_metrics_as_dicts(Config) ->
Result = get_metrics_as_dicts(Config),
ParsedResult = get_ok_value([data, metric, getMetricsAsDicts], Result),
Expand Down Expand Up @@ -454,3 +475,11 @@ values_are_integers(Map, Keys) ->

metric_host_type() ->
binary:replace(domain_helper:host_type(), <<" ">>, <<"_">>, [global]).

is_cets_enabled() ->
case rpc(mim(), mongoose_config, lookup_opt, [[internal_databases, cets]]) of
{ok, _} ->
true;
_ ->
false
end.
2 changes: 1 addition & 1 deletion big_tests/tests/graphql_muc_SUITE.erl
Original file line number Diff line number Diff line change
Expand Up @@ -227,7 +227,7 @@ init_per_suite(Config) ->
end_per_suite(Config) ->
escalus_fresh:clean(),
mongoose_helper:ensure_muc_clean(),
muc_helper:unload_muc(),
ensure_muc_stopped(),
dynamic_modules:restore_modules(Config),
escalus:end_per_suite(Config).

Expand Down
4 changes: 3 additions & 1 deletion big_tests/tests/mongooseimctl_helper.erl
Original file line number Diff line number Diff line change
Expand Up @@ -59,4 +59,6 @@ loop(Cmd, Args, Port, Data, Timeout) ->
log_command(Cmd, Args, Data, ExitStatus) ->
Pattern = lists:flatten(lists:duplicate(length(Args), " ~s")),
ct:log("Execute ~s " ++ Pattern ++ "~nResult ~p~nExitStatus ~p",
[Cmd] ++ Args ++ [Data, ExitStatus]).
[Cmd] ++ Args ++ [Data, ExitStatus]),
%% For easy read of multiline outputs:
ct:log("ResultString:~n~ts", [Data]).
36 changes: 33 additions & 3 deletions priv/graphql/schemas/admin/cets.gql
Original file line number Diff line number Diff line change
@@ -1,11 +1,14 @@
"Allow admin to get information about CETS status"
type CETSAdminQuery @protected{
"Get from the local node. Only for global admin"
systemInfo: [CETSInfo]
"Get a list of tables from the local node"
tableInfo: [CETSTableInfo]
@protected(type: GLOBAL)
"Get status of CETS"
systemInfo: CETSSystemInfo
@protected(type: GLOBAL)
}

type CETSInfo {
type CETSTableInfo {
"ETS table name"
tableName: String
"Memory (in words)"
Expand All @@ -15,3 +18,30 @@ type CETSInfo {
"A list of clustered nodes"
nodes: [String]
}

type CETSSystemInfo {
"Available nodes (nodes that are connected to us and have the CETS disco process started)"
availableNodes: [String]
"Unavailable nodes (nodes that do not respond to our pings)"
unavailableNodes: [String]
"Joined nodes (nodes that have our local tables running)"
joinedNodes: [String]
"Discovered nodes (nodes that are extracted from the discovery backend)."
discoveredNodes: [String]
"Nodes with stopped CETS discovery"
remoteNodesWithoutDisco: [String]
"Nodes that have more tables registered than the local node"
remoteNodesWithUnknownTables: [String]
"Unknown remote tables"
remoteUnknownTables: [String]
"Nodes that are available, but do not host some of our local tables"
remoteNodesWithMissingTables: [String]
"Missing remote tables"
remoteMissingTables: [String]
"Nodes that replicate at least one of our local tables to a different list of nodes"
conflictNodes: [String]
"Tables that have conflicting replication destinations"
conflictTables: [String]
"Returns true if the last discovery attempt is successful"
discoveryWorks: Boolean
}
Loading