Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Instrument/hook handlers - session #4305

Merged
merged 18 commits into from
Jun 19, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion big_tests/default.spec
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,6 @@
{suites, "tests", mam_send_message_SUITE}.
{suites, "tests", metrics_api_SUITE}.
{suites, "tests", metrics_c2s_SUITE}.
{suites, "tests", metrics_register_SUITE}.
{suites, "tests", metrics_roster_SUITE}.
{suites, "tests", metrics_session_SUITE}.
{suites, "tests", mod_blocking_SUITE}.
Expand Down
2 changes: 0 additions & 2 deletions big_tests/dynamic_domains.spec
Original file line number Diff line number Diff line change
Expand Up @@ -85,8 +85,6 @@

{suites, "tests", metrics_c2s_SUITE}.

{suites, "tests", metrics_register_SUITE}.

{suites, "tests", metrics_roster_SUITE}.

{suites, "tests", metrics_session_SUITE}.
Expand Down
2 changes: 1 addition & 1 deletion big_tests/tests/bosh_SUITE.erl
Original file line number Diff line number Diff line change
Expand Up @@ -954,7 +954,7 @@ wait_for_zero_bosh_sessions() ->
instrumentation_events() ->
instrument_helper:declared_events(mod_bosh, [])
++ instrument_helper:declared_events(mongoose_c2s, [global])
++ instrument_helper:declared_events(mongoose_c2s). %% For host_type()
++ [{c2s_message_processing_time, #{host_type => host_type()}}].

negative_instrumentation_events() ->
[{Name, #{}} || Name <- negative_instrumentation_events_names()].
Expand Down
2 changes: 1 addition & 1 deletion big_tests/tests/connect_SUITE.erl
Original file line number Diff line number Diff line change
Expand Up @@ -818,4 +818,4 @@ proxy_info() ->
instrumentation_events() ->
instrument_helper:declared_events(mongoose_c2s_listener, [#{}])
++ instrument_helper:declared_events(mongoose_c2s, [global])
++ instrument_helper:declared_events(mongoose_c2s). %% For host_type()
++ [{c2s_message_processing_time, #{host_type => domain_helper:host_type()}}].
12 changes: 8 additions & 4 deletions big_tests/tests/instrument_helper.erl
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

-export([declared_events/1, declared_events/2,
start/1, start/2, stop/0,
assert/3, assert/4,
assert/3, assert/4, filter/2,
assert_not_emitted/1, assert_not_emitted/2,
wait_for/2, wait_for_new/2,
lookup/2, take/2]).
Expand Down Expand Up @@ -67,9 +67,7 @@ assert(EventName, Labels, CheckF) ->
%% This is for convenience - you only have to code one clause.
-spec assert(event_name(), labels(), [measurements()], fun((measurements()) -> boolean())) -> ok.
assert(EventName, Labels, MeasurementsList, CheckF) ->
case lists:filter(fun(Measurements) ->
try CheckF(Measurements) catch error:function_clause -> false end
end, MeasurementsList) of
case filter(CheckF, MeasurementsList) of
[] ->
ct:log("All measurements for event ~p with labels ~p:~n~p",
[EventName, Labels, MeasurementsList]),
Expand All @@ -91,6 +89,12 @@ assert_not_emitted(EventName, Labels) ->
assert_not_emitted(Events) ->
[assert_not_emitted(Event, Label) || {Event, Label} <- Events].

-spec filter(fun((measurements()) -> boolean()), [measurements()]) -> [measurements()].
filter(CheckF, MeasurementsList) ->
lists:filter(fun(Measurements) ->
try CheckF(Measurements) catch error:function_clause -> false end
end, MeasurementsList).

%% @doc Remove previous events, and wait for a new one. Use for probes only.
-spec wait_for_new(event_name(), labels()) -> [measurements()].
wait_for_new(EventName, Labels) ->
Expand Down
9 changes: 5 additions & 4 deletions big_tests/tests/last_SUITE.erl
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ groups() ->
valid_test_cases() -> [online_user_query,
last_online_user,
last_offline_user,
last_server, sessions_cleanup].
last_server].

invalid_test_cases() -> [user_not_subscribed_receives_error].

Expand Down Expand Up @@ -170,7 +170,10 @@ user_not_subscribed_receives_error(Config) ->
ok
end).

sessions_cleanup(Config) ->
%% This test is disabled on CI, because it puts the system in an inconsistent state.
%% The sessions are created with Pids from the test node, which is incorrect.
%% This incorrectly generates 'sm_session' events, leading to inconsistent session count metrics.
sessions_cleanup(_Config) ->
N = distributed_helper:mim(),
HostType = domain_helper:host_type(),
Server = domain_helper:domain(),
Expand All @@ -197,10 +200,8 @@ sessions_cleanup(Config) ->
{ok, #{timestamp := TS, status := Status} = Data} = distributed_helper:rpc(N, mod_last_api, get_last, [Jid3]),
?assertNotEqual(TS, 1714000000, Data),
?assertEqual(Status, <<>>, Data),
distributed_helper:rpc(N, mongoose_metrics, update, [HostType, sessionCount, -345]),
{ok, _} = distributed_helper:rpc(N, mongoose_account_api, unregister_user, [<<"user3">>, Server]).


%%-----------------------------------------------------------------
%% Helpers
%%-----------------------------------------------------------------
Expand Down
2 changes: 1 addition & 1 deletion big_tests/tests/login_SUITE.erl
Original file line number Diff line number Diff line change
Expand Up @@ -277,7 +277,7 @@ log_one_scram_sha224(Config) ->
log_one_scram_sha256(Config) ->
log_one([{escalus_auth_method, <<"SCRAM-SHA-256">>} | Config]).

log_one_scram_sha384(Config) ->
log_one_scram_sha384(Config) ->
log_one([{escalus_auth_method, <<"SCRAM-SHA-384">>} | Config]).

log_one_scram_sha512(Config) ->
Expand Down
8 changes: 4 additions & 4 deletions big_tests/tests/metrics_api_SUITE.erl
Original file line number Diff line number Diff line change
Expand Up @@ -123,8 +123,8 @@ one_client_just_logs_in(Config) ->
{xmppPresenceReceived, 0 + user_alpha(1)},
{xmppStanzaSent, 0 + user_alpha(3)},
{xmppStanzaReceived, 0 + user_alpha(3)},
{sessionSuccessfulLogins, 0 + user_alpha(1)},
{sessionLogouts, 0 + user_alpha(1)}
{'sm_session.logins', 0 + user_alpha(1)},
{'sm_session.logouts', 0 + user_alpha(1)}
]).

two_clients_just_log_in(Config) ->
Expand All @@ -139,8 +139,8 @@ two_clients_just_log_in(Config) ->
{xmppPresenceReceived, 0 + user_alpha(2)},
{xmppStanzaSent, 0 + user_alpha(6)},
{xmppStanzaReceived, 0 + user_alpha(6)},
{sessionSuccessfulLogins, 0 + user_alpha(2)},
{sessionLogouts, 0 + user_alpha(2)}
{'sm_session.logins', 0 + user_alpha(2)},
{'sm_session.logouts', 0 + user_alpha(2)}
]).

one_message_sent(Config) ->
Expand Down
2 changes: 1 addition & 1 deletion big_tests/tests/metrics_roster_SUITE.erl
Original file line number Diff line number Diff line change
Expand Up @@ -290,7 +290,7 @@ declared_events() ->
declared_backend_events() ++ declared_sm_events() ++ instrument_helper:declared_events(mod_roster).

declared_sm_events() ->
[{sm_presence_subscription, #{}}].
[{sm_presence_subscription, #{host_type => host_type()}}].

declared_backend_events() ->
BackendMod = backend_mod(),
Expand Down
79 changes: 49 additions & 30 deletions big_tests/tests/metrics_session_SUITE.erl
Original file line number Diff line number Diff line change
Expand Up @@ -17,16 +17,15 @@
-module(metrics_session_SUITE).
-compile([export_all, nowarn_export_all]).

-include_lib("escalus/include/escalus.hrl").
-include_lib("common_test/include/ct.hrl").

-define(RT_WINDOW, 3). % seconds
-include_lib("stdlib/include/assert.hrl").

-import(metrics_helper, [assert_counter/2,
assert_counter/3,
get_counter_value/1,
wait_for_counter/2,
wait_for_counter/3]).
-import(domain_helper, [host_type/0]).

%%--------------------------------------------------------------------
%% Suite configuration
%%--------------------------------------------------------------------
Expand All @@ -36,7 +35,7 @@ all() ->
{group, session_global}].

groups() ->
[{session, [sequence], [login_one,
[{session, [parallel], [login_one,
login_many,
auth_failed]},
{session_global, [sequence], [session_global,
Expand All @@ -50,10 +49,14 @@ suite() ->
%%--------------------------------------------------------------------

init_per_suite(Config) ->
instrument_helper:start([{sm_session, #{host_type => host_type()}},
{c2s_auth_failed, #{host_type => host_type()}}]),
escalus:init_per_suite(Config).

end_per_suite(Config) ->
escalus:end_per_suite(Config).
escalus_fresh:clean(),
escalus:end_per_suite(Config),
instrument_helper:stop().

init_per_group(_GroupName, Config) ->
escalus:create_users(Config, escalus:get_users([alice, bob])).
Expand All @@ -71,39 +74,31 @@ end_per_testcase(CaseName, Config) ->
%% Tests
%%--------------------------------------------------------------------


login_one(Config) ->
{value, Logins} = get_counter_value(sessionSuccessfulLogins),
escalus:story(Config, [{alice, 1}], fun(Alice) ->

assert_counter(1, sessionCount),
assert_counter(Logins + 1, sessionSuccessfulLogins),

{value, Logouts} = get_counter_value(sessionLogouts),
escalus_client:stop(Config, Alice),
wait_for_counter(0, sessionCount),
wait_for_counter(Logouts + 1, sessionLogouts)
escalus:fresh_story(Config, [{alice, 1}], fun login_one_story/1).

end).
login_one_story(Alice) ->
assert_sm_login_event(Alice),
sm_helper:stop_client_and_wait_for_termination(Alice),
assert_sm_logout_event(Alice).

login_many(Config) ->
{value, Logins} = get_counter_value(sessionSuccessfulLogins),
escalus:story(Config, [{alice, 1}, {bob, 1}], fun(_Alice, _Bob) ->
escalus:fresh_story(Config, [{alice, 1}, {bob, 1}], fun login_many_story/2).

assert_counter(2, sessionCount),
assert_counter(Logins + 2, sessionSuccessfulLogins)

end).
login_many_story(Alice, Bob) ->
assert_sm_login_event(Alice),
assert_sm_login_event(Bob),
sm_helper:stop_client_and_wait_for_termination(Alice),
assert_sm_logout_event(Alice),
sm_helper:stop_client_and_wait_for_termination(Bob),
assert_sm_logout_event(Bob).

auth_failed(Config) ->
{value, AuthFails} = get_counter_value(sessionAuthFails),

[{_, UserSpec} | _] = escalus_config:get_config(escalus_users, Config),
UserSpec = escalus_fresh:create_fresh_user(Config, alice),
UserSpecM = proplists:delete(password, UserSpec) ++ [{password, <<"mazabe">>}],

{error, _} = escalus_client:start(Config, UserSpecM, <<"res1">>),
assert_counter(0, sessionCount),
assert_counter(AuthFails + 1, sessionAuthFails).
assert_no_sm_login_event(UserSpec),
assert_c2s_auth_failed(UserSpec).

%% Global

Expand All @@ -120,3 +115,27 @@ session_unique(Config) ->
wait_for_counter(global, 1, uniqueSessionCount),
wait_for_counter(global, 2, totalSessionCount)
end).

%% Instrumentation events

assert_sm_login_event(Client) ->
JID = jid:from_binary(escalus_client:full_jid(Client)),
F = fun(M) -> M =:= #{logins => 1, count => 1, jid => JID} end,
instrument_helper:assert(sm_session, #{host_type => host_type()}, F).

assert_no_sm_login_event(UserSpec) ->
LUser = jid:nodeprep(proplists:get_value(username, UserSpec)),
LoginEvents = instrument_helper:lookup(sm_session, #{host_type => host_type()}),
F = fun(#{jid := JID}) -> jid:luser(JID) =:= LUser end,
?assertEqual([], instrument_helper:filter(F, LoginEvents)).

assert_sm_logout_event(Client) ->
JID = jid:from_binary(escalus_client:full_jid(Client)),
F = fun(M) -> M =:= #{logouts => 1, count => -1, jid => JID} end,
instrument_helper:assert(sm_session, #{host_type => host_type()}, F).

assert_c2s_auth_failed(UserSpec) ->
Server = proplists:get_value(server, UserSpec),
UserName = proplists:get_value(username, UserSpec),
F = fun(M) -> M =:= #{count => 1, server => Server, username => UserName} end,
instrument_helper:assert(c2s_auth_failed, #{host_type => host_type()}, F).
4 changes: 2 additions & 2 deletions big_tests/tests/roster_helper.erl
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ set_versioning(Versioning, VersionStore, Config) ->
store_current_id => VersionStore}}]),
Config.

%% Intrumentation events
%% Instrumentation events

assert_roster_event(Client, Event) ->
ClientJid = jid:from_binary(escalus_utils:get_jid(Client)),
Expand All @@ -23,7 +23,7 @@ assert_subscription_event(FromClient, ToClient, CheckF) ->
FromClientJid = jid:from_binary(escalus_utils:get_short_jid(FromClient)),
ToClientJid = jid:from_binary(escalus_utils:get_short_jid(ToClient)),
instrument_helper:assert(
sm_presence_subscription, #{},
sm_presence_subscription, #{host_type => host_type()},
fun(#{from_jid := FromJid, to_jid := ToJid} = M) ->
FromClientJid =:= FromJid andalso ToClientJid =:= ToJid andalso CheckF(M)
end).
2 changes: 1 addition & 1 deletion big_tests/tests/websockets_SUITE.erl
Original file line number Diff line number Diff line change
Expand Up @@ -171,7 +171,7 @@ escape_attrs(Config) ->
instrumentation_events() ->
instrument_helper:declared_events(mod_websockets, [])
++ instrument_helper:declared_events(mongoose_c2s, [global])
++ instrument_helper:declared_events(mongoose_c2s). %% For host_type()
++ [{c2s_message_processing_time, #{host_type => domain_helper:host_type()}}].

negative_instrumentation_events() ->
[{Name, #{}} || Name <- negative_instrumentation_events_names()].
Expand Down
4 changes: 2 additions & 2 deletions doc/modules/mod_register.md
Original file line number Diff line number Diff line change
Expand Up @@ -85,8 +85,8 @@ If you'd like to learn more about metrics in MongooseIM, please visit [MongooseI

| Name | Type | Description (when it gets incremented) |
| ---- | ---- | -------------------------------------- |
| `[Host, modRegisterCount]` | spiral | A user registers via `mod_register` module. |
| `[Host, modUnregisterCount]` | spiral | A user unregisters via `mod_register` module. |
| `[Host, auth_register, count]` | spiral | A user registers via `mod_register` module. |
| `[Host, auth_unregister, count]` | spiral | A user unregisters via `mod_register` module. |

## Entropy calculation algorithm

Expand Down
4 changes: 2 additions & 2 deletions doc/operation-and-maintenance/Logging-&-monitoring.md
Original file line number Diff line number Diff line change
Expand Up @@ -119,8 +119,8 @@ For time-based metrics, you can choose to display multiple calculated values for
Session count: <prefix>.global.totalSessionCount.value
XMPP messages received: <prefix>.<domain>.xmppMessageReceived.one
XMPP messages sent: <prefix>.<domain>.xmppMessageSent.one
Successful logins: <prefix>.<domain>.sessionSuccessfulLogins.one
Logouts: <prefix>.<domain>.sessionLogouts.one
Successful logins: <prefix>.<domain>.sm_session.logins.one
Logouts: <prefix>.<domain>.sm_session.logouts.one
Authorization time: <prefix>.<domain>.backends.auth.authorize.<value-type>
RDBMS "simple" query time: <prefix>.<domain>.backends.mongoose_rdbms.query.<value-type>
RDBMS prepared query time: <prefix>.<domain>.backends.mongoose_rdbms.execute.<value-type>
Expand Down
34 changes: 17 additions & 17 deletions doc/operation-and-maintenance/MongooseIM-metrics.md
Original file line number Diff line number Diff line change
Expand Up @@ -96,33 +96,33 @@ As a result it makes more sense to maintain a list of the most relevant or usefu

| Name | Type | Description (when it gets incremented) |
| ---- | ---- | -------------------------------------- |
| `[HostType, modPresenceSubscriptions]` | spiral | Presence subscription is processed. |
| `[HostType, modPresenceUnsubscriptions]` | spiral | Presence unsubscription is processed. |
| `[HostType, modRosterGets]` | spiral | User's roster is fetched. |
| `[HostType, modRosterPush]` | spiral | A roster update is pushed to a single session. |
| `[HostType, modRosterSets]` | spiral | User's roster is updated. |
| `[HostType, sm_presence_subscription, subscription_count]` | spiral | Presence subscription is processed. |
| `[HostType, sm_presence_subscription, unsubscription_count]` | spiral | Presence unsubscription is processed. |
| `[HostType, mod_roster_get, count]` | spiral | User's roster is fetched. |
| `[HostType, mod_roster_push, count]` | spiral | A roster update is pushed to a single session. |
| `[HostType, mod_roster_set, count]` | spiral | User's roster is updated. |

### Privacy lists

| Name | Type | Description (when it gets incremented) |
| ---- | ---- | -------------------------------------- |
| `[HostType, modPrivacyGets]` | spiral | IQ privacy `get` is processed. |
| `[HostType, modPrivacyPush]` | spiral | Privacy list update is sent to a single session. |
| `[HostType, modPrivacySets]` | spiral | IQ privacy `set` is processed. |
| `[HostType, modPrivacySetsActive]` | spiral | Active privacy list is changed. |
| `[HostType, modPrivacySetsDefault]` | spiral | Default privacy list is changed. |
| `[HostType, modPrivacyStanzaAll]` | spiral | A packet is checked against the privacy list. |
| `[HostType, modPrivacyStanzaDenied]` | spiral | Privacy list check resulted in `deny`. |
| `[HostType, modPrivacyStanzaBlocked]` | spiral | Privacy list check resulted in `block`. |
| `[HostType, mod_privacy_get, count]` | spiral | IQ privacy `get` is processed. |
| `[HostType, mod_privacy_push_item, count]` | spiral | Privacy list update is sent to a single session. |
| `[HostType, mod_privacy_set, count]` | spiral | IQ privacy `set` is processed. |
| `[HostType, mod_privacy_set, active_count]` | spiral | Active privacy list is changed. |
| `[HostType, mod_privacy_set, default_count]` | spiral | Default privacy list is changed. |
| `[HostType, mod_privacy_check_packet, count]` | spiral | A packet is checked against the privacy list. |
| `[HostType, mod_privacy_check_packet, denied_count]` | spiral | Privacy list check resulted in `deny`. |
| `[HostType, mod_privacy_check_packet, blocked_count]` | spiral | Privacy list check resulted in `block`. |

### Other

| Name | Type | Description (when it gets incremented) |
| ---- | ---- | -------------------------------------- |
| `[HostType, sessionAuthFails]` | spiral | A client failed to authenticate. |
| `[HostType, sessionCount]` | counter | Number of active sessions. |
| `[HostType, sessionLogouts]` | spiral | A client session is closed. |
| `[HostType, sessionSuccessfulLogins]` | spiral | A client session is opened. |
| `[HostType, c2s_auth_failed, count]` | spiral | A client failed to authenticate. |
| `[HostType, sm_session, count]` | counter | Number of active sessions. |
| `[HostType, sm_session, logouts]` | spiral | A client session is closed. |
| `[HostType, sm_session, logins]` | spiral | A client session is opened. |
| `[HostType, xmppErrorIq]` | spiral | An `error` IQ is sent to a client. |
| `[HostType, xmppErrorMessage]` | spiral | An `error` message is sent to a client. |
| `[HostType, xmppErrorPresence]` | spiral | An `error` presence is sent to a client. |
Expand Down
4 changes: 2 additions & 2 deletions doc/rest-api/Administration-backend_swagger.yml
Original file line number Diff line number Diff line change
Expand Up @@ -842,7 +842,7 @@ paths:
metrics:
type: object
example:
modRosterPush:
mod_roster_push.count:
one: 0
count: 0
/metrics/all/{metric}:
Expand Down Expand Up @@ -881,7 +881,7 @@ paths:
metrics:
type: object
example:
modRosterPush:
mod_roster_push.count:
one: 0
count: 0
404:
Expand Down
Loading