diff --git a/big_tests/tests/gdpr_SUITE.erl b/big_tests/tests/gdpr_SUITE.erl index c52e03cc535..2c0bd85af5f 100644 --- a/big_tests/tests/gdpr_SUITE.erl +++ b/big_tests/tests/gdpr_SUITE.erl @@ -56,7 +56,7 @@ groups() -> retrieve_vcard, retrieve_roster, %retrieve_mam, - %retrieve_offline, + retrieve_offline, %retrieve_private_xml, %retrieve_inbox, retrieve_logs, @@ -71,6 +71,7 @@ groups() -> ]}, {retrieve_personal_data_with_mods_disabled, [], [ retrieve_vcard, + retrieve_offline, retrieve_logs, retrieve_roster, retrieve_all_pubsub_data @@ -205,25 +206,38 @@ retrieve_mam(_Config) -> ok. retrieve_offline(Config) -> - escalus:fresh_story(Config, [{alice, 1}, {bob, 1}], fun(Alice, Bob) -> + escalus:fresh_story(Config, [{alice, 1}, {bob, 1}, {kate, 1}], fun(Alice, Bob, Kate) -> mongoose_helper:logout_user(Config, Alice), - Body = <<"Here's Johnny!">>, - escalus:send(Bob, escalus_stanza:chat_to(Alice, Body)), + Body1 = <<"Hey!">>, + Body2 = <<"Here is Johnny!">>, + Body3 = <<"Where is Johnny ?">>, + escalus:send(Bob, escalus_stanza:chat_to(Alice, Body1)), + escalus:send(Bob, escalus_stanza:chat_to(Alice, Body2)), + escalus:send(Kate, escalus_stanza:chat_to(Alice, Body3)), %% Well, jid_to_lower works for any binary :) AliceU = escalus_utils:jid_to_lower(escalus_client:username(Alice)), AliceS = escalus_utils:jid_to_lower(escalus_client:server(Alice)), mongoose_helper:wait_until( fun() -> mongoose_helper:successful_rpc(mod_offline_backend, count_offline_messages, - [AliceU, AliceS, 1]) - end, 1), + [AliceU, AliceS, 10]) + end, 3), - BobJid = escalus_client:short_jid(Bob), + BobJid = escalus_client:full_jid(Bob), + AliceJid = escalus_client:short_jid(Alice), + KateJid = escalus_client:full_jid(Kate), ExpectedHeader = ["timestamp", "from", "to", "packet"], - ExpectedItems = [ - #{ "packet" => [{contains, Body}], "from" => BobJid } - ], + Expected = [{Body1, BobJid, AliceJid}, {Body2, BobJid, AliceJid}, {Body3, KateJid, AliceJid}], + + ExpectedItems = lists:map(fun({Body, From ,To}) -> + #{ "packet" => [{contains, Body}], + "from" => binary_to_list(From), + "to" => binary_to_list(To), + "timestamp" => [{validate, fun validate_datetime/1}]} + end, Expected), + maybe_stop_and_unload_module(mod_offline, mod_offline_backend, Config), + retrieve_and_validate_personal_data( Alice, Config, "offline", ExpectedHeader, ExpectedItems) end). @@ -473,7 +487,7 @@ validate_sorted_personal_maps([Map | RMaps], [Checks | RChecks]) -> maps:fold(fun(K, Conditions, _) -> validate_personal_item(maps:get(K, Map), Conditions) end, ok, Checks), -validate_sorted_personal_maps(RMaps, RChecks). + validate_sorted_personal_maps(RMaps, RChecks). validate_personal_item(_Value, []) -> ok; @@ -481,6 +495,9 @@ validate_personal_item(ExactValue, ExactValue) -> ok; validate_personal_item(Value, [{contains, String} | RConditions]) -> {match, _} = re:run(Value, String), + validate_personal_item(Value, RConditions); +validate_personal_item(Value, [{validate, Validator} | RConditions]) when is_function(Validator) -> + true = Validator(Value), validate_personal_item(Value, RConditions). retrieve_and_decode_personal_data(Client, Config, FilePrefix) -> @@ -555,3 +572,24 @@ item_content_xml(Data) -> attrs = [{<<"xmlns">>, <<"http://www.w3.org/2005/Atom">>}], children = [#xmlcdata{content = Data}]}. +validate_datetime(TimeStr) -> + [Date, Time] = string:tokens(TimeStr, "T"), + validate_date(Date), + validate_time(Time). + +validate_date(Date) -> + [Y, M, D] = string:tokens(Date, "-"), + Date1 = {list_to_integer(Y), list_to_integer(M), list_to_integer(D)}, + calendar:valid_date(Date1). + +validate_time(Time) -> + [T | _] = string:tokens(Time, "Z"), + validate_time1(T). + + +validate_time1(Time) -> + [H, M, S] = string:tokens(Time, ":"), + check_list([{H, 24}, {M, 60}, {S, 60}]). + +check_list(List) -> + lists:all(fun({V, L}) -> I = list_to_integer(V), I >= 0 andalso I < L end, List). diff --git a/src/mod_offline.erl b/src/mod_offline.erl index cf985dc71f9..2b0d82953e4 100644 --- a/src/mod_offline.erl +++ b/src/mod_offline.erl @@ -32,6 +32,7 @@ -xep([{xep, 22}, {version, "1.4"}]). -xep([{xep, 85}, {version, "2.1"}]). -behaviour(gen_mod). +-behaviour(gdpr). %% gen_mod handlers -export([start/2, stop/1]). @@ -57,6 +58,9 @@ %% helpers to be used from backend moudules -export([is_expired_message/2]). +%% GDPR related +-export([get_personal_data/2]). + -include("mongoose.hrl"). -include("jlib.hrl"). -include("amp.hrl"). @@ -94,6 +98,11 @@ LServer :: jid:lserver(), Reason :: term(), Result :: list(#offline_msg{}). +-callback fetch_messages(LUser, LServer) -> {ok, Result} | {error, Reason} when + LUser :: jid:luser(), + LServer :: jid:lserver(), + Reason :: term(), + Result :: list(#offline_msg{}). -callback write_messages(LUser, LServer, Msgs) -> ok | {error, Reason} when LUser :: jid:luser(), @@ -510,6 +519,28 @@ pop_messages(LUser, LServer) -> Other end. +get_personal_data(Username, Server) -> + AllMessages = lists:flatmap(fun(B) -> + try B:fetch_messages(Username, Server) of + {ok, Messages} -> + Messages; + _ -> [] + catch + _:_ -> + [] + end + end, mongoose_lib:find_behaviour_implementations(mod_offline)), + [{offline, ["timestamp", "from", "to", "packet"], offline_messages_to_gdpr_format(AllMessages)}]. + +offline_messages_to_gdpr_format(MsgList) -> + [offline_msg_to_gdpr_format(Msg) || Msg <- MsgList]. + +offline_msg_to_gdpr_format(#offline_msg{timestamp = Timestamp, from = From, to = To, packet = Packet}) -> + NowUniversal = calendar:now_to_universal_time(Timestamp), + {UTCTime, UTCDiff} = jlib:timestamp_to_iso(NowUniversal, utc), + UTC = list_to_binary(UTCTime ++ UTCDiff), + {UTC, jid:to_binary(From), jid:to_binary(jid:to_bare(To)), exml:to_binary(Packet)}. + skip_expired_messages(TimeStamp, Rs) -> [R || R <- Rs, not is_expired_message(TimeStamp, R)]. diff --git a/src/mod_offline_mnesia.erl b/src/mod_offline_mnesia.erl index 821b597c61a..4beac230964 100644 --- a/src/mod_offline_mnesia.erl +++ b/src/mod_offline_mnesia.erl @@ -30,6 +30,7 @@ -behaviour(mod_offline). -export([init/2, pop_messages/2, + fetch_messages/2, write_messages/3, count_offline_messages/3, remove_expired_messages/1, @@ -65,6 +66,18 @@ pop_messages(LUser, LServer) -> {error, Reason} end. +fetch_messages(User, Server) -> + LUser = jid:nodeprep(User), + LServer = jid:nodeprep(Server), + US = {LUser, LServer}, + F = fun() -> mnesia:wread({offline_msg, US}) end, + case mnesia:transaction(F) of + {atomic, Rs} -> + {ok, Rs}; + {aborted, Reason} -> + {error, Reason} + end. + write_messages(_LUser, _LServer, Msgs) -> F = fun() -> write_messages_t(Msgs) end, case mnesia:transaction(F) of diff --git a/src/mod_offline_rdbms.erl b/src/mod_offline_rdbms.erl index 7b4ed16a3cd..afbd9638e78 100644 --- a/src/mod_offline_rdbms.erl +++ b/src/mod_offline_rdbms.erl @@ -28,6 +28,7 @@ -behaviour(mod_offline). -export([init/2, pop_messages/2, + fetch_messages/2, write_messages/3, count_offline_messages/3, remove_expired_messages/1, @@ -59,6 +60,22 @@ pop_messages(LUser, LServer) -> {error, Reason} end. +fetch_messages(User, Server) -> + LUser = jid:nodeprep(User), + LServer = jid:nodeprep(Server), + US = {LUser, LServer}, + To = jid:make(User, LServer, <<>>), + TimeStamp = p1_time_compat:timestamp(), + SUser = mongoose_rdbms:escape_string(LUser), + SServer = mongoose_rdbms:escape_string(LServer), + STimeStamp = encode_timestamp(TimeStamp), + case rdbms_queries:fetch_offline_messages(LServer, SUser, SServer, STimeStamp) of + {selected, Rows} -> + {ok, rows_to_records(US, To, Rows)}; + {error, Reason} -> + {error, Reason} + end. + rows_to_records(US, To, Rows) -> [row_to_record(US, To, Row) || Row <- Rows]. diff --git a/src/mod_offline_riak.erl b/src/mod_offline_riak.erl index 9abd814f8a2..edb47af93c6 100644 --- a/src/mod_offline_riak.erl +++ b/src/mod_offline_riak.erl @@ -26,6 +26,7 @@ -export([init/2]). -export([pop_messages/2]). +-export([fetch_messages/2]). -export([write_messages/3]). -export([remove_expired_messages/1]). -export([remove_old_messages/2]). @@ -184,3 +185,36 @@ maybe_decode_timestamp(?INFINITY) -> maybe_decode_timestamp(TS) -> usec:to_now(TS). + +fetch_messages(User, Server) -> + LUser = jid:nodeprep(User), + LServer = jid:nodeprep(Server), + Keys = read_user_idx(LUser, LServer), + To = jid:make({User, LServer, <<>>}), + {ok, [fetch_msg(Key, LUser, LServer, To) || Key <- Keys]}. + +fetch_msg(Key, LUser, LServer, To) -> + try + {ok, Obj} = mongoose_riak:get(bucket_type(LServer), Key), + + PacketRaw = riakc_obj:get_value(Obj), + {ok, Packet} = exml:parse(PacketRaw), + MD = riakc_obj:get_update_metadata(Obj), + [Timestamp] = riakc_obj:get_secondary_index(MD, ?TIMESTAMP_IDX), + From = riakc_obj:get_user_metadata_entry(MD, <<"from">>), + [Expire] = riakc_obj:get_secondary_index(MD, ?EXPIRE_IDX), + + #offline_msg{us = {LUser, LServer}, + timestamp = usec:to_now(Timestamp), + expire = maybe_decode_timestamp(Expire), + from = jid:from_binary(From), + to = To, + packet = Packet} + + catch + Error:Reason -> + ?WARNING_MSG("issue=~p, action=reading_key, host=~s, reason=~p, stack_trace=~p", + [Error, LServer, Reason, erlang:get_stacktrace()]), + [] + end. + diff --git a/src/rdbms/rdbms_queries.erl b/src/rdbms/rdbms_queries.erl index 27e2ab6d496..f282263d043 100644 --- a/src/rdbms/rdbms_queries.erl +++ b/src/rdbms/rdbms_queries.erl @@ -96,6 +96,7 @@ prepare_offline_message/6, push_offline_messages/2, pop_offline_messages/4, + fetch_offline_messages/4, count_offline_messages/4, remove_old_offline_messages/2, remove_expired_offline_messages/2, @@ -896,6 +897,9 @@ pop_offline_messages(LServer, SUser, SServer, STimeStamp) -> end, mongoose_rdbms:sql_transaction(LServer, F). +fetch_offline_messages(LServer, SUser, SServer, STimeStamp) -> + mongoose_rdbms:sql_query(LServer, select_offline_messages_sql(SUser, SServer, STimeStamp)). + select_offline_messages_sql(SUser, SServer, STimeStamp) -> [<<"select timestamp, from_jid, packet from offline_message " "where server = ">>, mongoose_rdbms:use_escaped_string(SServer), <<" and "