From 261ec62b79e35da8704eeb5db5273f60a74f5878 Mon Sep 17 00:00:00 2001 From: Piotr Nosek Date: Thu, 25 Apr 2019 15:09:50 +0200 Subject: [PATCH 1/3] Add GDPR retrieve base with logs and vcard retrieval --- .travis.yml | 1 + big_tests/default.spec | 1 + big_tests/rebar.config | 1 + big_tests/rebar.lock | 2 + big_tests/tests/ejabberdctl_helper.erl | 2 + big_tests/tests/gdpr_SUITE.erl | 368 +++++++++++++++++++ priv/parse_logs.sh | 21 ++ rebar.config | 3 +- rebar.lock | 4 + rel/files/mongooseim.cfg | 2 +- src/admin_extra/service_admin_extra.erl | 2 +- src/admin_extra/service_admin_extra_gdpr.erl | 149 ++++++++ src/ejabberd_config.erl | 2 + src/ejabberd_ctl.erl | 16 +- src/ejabberd_loglevel.erl | 60 +-- src/gdpr.erl | 14 + src/mod_vcard.erl | 26 +- src/mod_vcard_mnesia.erl | 4 - src/mod_vcard_rdbms.erl | 4 - src/mongooseim.app.src | 3 +- 20 files changed, 628 insertions(+), 57 deletions(-) create mode 100644 big_tests/tests/gdpr_SUITE.erl create mode 100755 priv/parse_logs.sh create mode 100644 src/admin_extra/service_admin_extra_gdpr.erl create mode 100644 src/gdpr.erl diff --git a/.travis.yml b/.travis.yml index 5398a5bbcdb..ed505c541a1 100644 --- a/.travis.yml +++ b/.travis.yml @@ -77,6 +77,7 @@ branches: - stable - /^rel\-\d+\.\d+$/ - /^\d+\.\d+\.\d+([a-z0-9\-\+])*/ + - gdpr-retrieve-clean otp_release: - 19.3 diff --git a/big_tests/default.spec b/big_tests/default.spec index ba19ee0447b..d021bcc90da 100644 --- a/big_tests/default.spec +++ b/big_tests/default.spec @@ -26,6 +26,7 @@ {suites, "tests", component_SUITE}. {suites, "tests", disco_and_caps_SUITE}. {suites, "tests", ejabberdctl_SUITE}. +{suites, "tests", gdpr_SUITE}. {suites, "tests", inbox_SUITE}. {suites, "tests", jingle_SUITE}. {suites, "tests", last_SUITE}. diff --git a/big_tests/rebar.config b/big_tests/rebar.config index cba5da13c75..69da237cfe9 100644 --- a/big_tests/rebar.config +++ b/big_tests/rebar.config @@ -15,6 +15,7 @@ {escalus, {git, "https://github.com/esl/escalus.git", {ref, "8911491"}}}, {gen_fsm_compat, "0.3.0"}, {cowboy, "2.4.0"}, + {csv, "3.0.3", {pkg, csve}}, {shotgun, {git, "https://github.com/inaka/shotgun.git", {ref, "636d14e"}}}, {amqp_client, "3.7.11-rc.1"}, %% Esip needs to find stun.hrl to compile diff --git a/big_tests/rebar.lock b/big_tests/rebar.lock index 9ddee0c1610..a2afdee29ff 100644 --- a/big_tests/rebar.lock +++ b/big_tests/rebar.lock @@ -9,6 +9,7 @@ {<<"chatterbox">>,{pkg,<<"chatterbox">>,<<"0.8.0">>},0}, {<<"cowboy">>,{pkg,<<"cowboy">>,<<"2.4.0">>},0}, {<<"cowlib">>,{pkg,<<"cowlib">>,<<"2.3.0">>},1}, + {<<"csv">>,{pkg,<<"csve">>,<<"3.0.3">>},0}, {<<"erlsh">>, {git,"https://github.com/proger/erlsh.git", {ref,"4e8a107e4a082b5e687877cf08d152a45a715bf4"}}, @@ -98,6 +99,7 @@ {<<"chatterbox">>, <<"AF8B95EF45D5B9783B67ECEA1E23A4CCE058FCE5E950B42F84F4DF8BC7620D0B">>}, {<<"cowboy">>, <<"F1B72FABE9C8A5FC64AC5AC85FB65474D64733D1DF52A26FAD5D4BA3D9F70A9F">>}, {<<"cowlib">>, <<"BBD58EF537904E4F7C1DD62E6AA8BC831C8183CE4EFA9BD1150164FE15BE4CAA">>}, + {<<"csv">>, <<"69E7D9B3FDC72016644368762C6A3E6CBFEB85BCCADBF1BD99AB6C827E360E04">>}, {<<"gen_fsm_compat">>, <<"5903549F67D595F58A7101154CBE0FDD46955FBFBE40813F1E53C23A970FF5F4">>}, {<<"goldrush">>, <<"F06E5D5F1277DA5C413E84D5A2924174182FB108DABB39D5EC548B27424CD106">>}, {<<"gun">>, <<"18E5D269649C987AF95AEC309F68A27FFC3930531DD227A6EAA0884D6684286E">>}, diff --git a/big_tests/tests/ejabberdctl_helper.erl b/big_tests/tests/ejabberdctl_helper.erl index 20b572c33c5..5a4f956fc90 100644 --- a/big_tests/tests/ejabberdctl_helper.erl +++ b/big_tests/tests/ejabberdctl_helper.erl @@ -16,6 +16,8 @@ -import(distributed_helper, [mim/0, rpc/4]). +-spec ejabberdctl(Cmd :: string(), Args :: [binary() | string()], Config :: list()) -> + {Data :: iolist(), ExitStatus :: integer()} | no_return(). ejabberdctl(Cmd, Args, Config) -> Node = mim(), ejabberdctl(Node, Cmd, Args, Config). diff --git a/big_tests/tests/gdpr_SUITE.erl b/big_tests/tests/gdpr_SUITE.erl new file mode 100644 index 00000000000..c67b3546ce4 --- /dev/null +++ b/big_tests/tests/gdpr_SUITE.erl @@ -0,0 +1,368 @@ +-module(gdpr_SUITE). + +%% Tests for features related to GDPR compliance. + +-include_lib("common_test/include/ct.hrl"). +-include_lib("escalus/include/escalus.hrl"). +-include_lib("escalus/include/escalus_xmlns.hrl"). +-include_lib("exml/include/exml.hrl"). + +-export([suite/0, all/0, groups/0]). +-export([init_per_suite/1, end_per_suite/1]). +-export([init_per_group/2, end_per_group/2]). +-export([init_per_testcase/2, end_per_testcase/2]). +-export([ + retrieve_vcard/1, + retrieve_roster/1, + retrieve_mam/1, + retrieve_offline/1, + retrieve_pubsub/1, + retrieve_private_xml/1, + retrieve_inbox/1, + retrieve_logs/1 + ]). +-export([ + data_is_not_retrieved_for_missing_user/1 + ]). + +-import(ejabberdctl_helper, [ejabberdctl/3]). + +-import(distributed_helper, [mim/0, + rpc/4]). + +%% ------------------------------------------------------------- +%% Common Test stuff +%% ------------------------------------------------------------- + +suite() -> + escalus:suite(). + +all() -> + [ + {group, retrieve_personal_data}, + {group, data_is_not_retrieved_for_missing_user} + ]. + +groups() -> + [ + {retrieve_personal_data, [parallel], [ + % per type + retrieve_vcard, + %retrieve_roster, + %retrieve_mam, + %retrieve_offline, + %retrieve_pubsub, + %retrieve_private_xml, + %retrieve_inbox, + retrieve_logs + ]}, + {data_is_not_retrieved_for_missing_user, [], + [data_is_not_retrieved_for_missing_user] + } + ]. + +init_per_suite(Config) -> + Config1 = [{{ejabberd_cwd, mim()}, get_mim_cwd()} | dynamic_modules:save_modules(domain(), Config)], + escalus:init_per_suite(Config1). + +end_per_suite(Config) -> + dynamic_modules:restore_modules(domain(), Config), + escalus_fresh:clean(), + escalus:end_per_suite(Config). + +init_per_group(_GN, Config) -> + Config. + +end_per_group(_GN, Config) -> + Config. + +init_per_testcase(retrieve_inbox = CN, Config) -> + case (not ct_helper:is_ct_running()) + orelse mongoose_helper:is_rdbms_enabled(domain()) of + true -> + dynamic_modules:ensure_modules(domain(), inbox_required_modules()), + escalus:init_per_testcase(CN, Config); + false -> + {skip, require_rdbms} + end; +init_per_testcase(retrieve_vcard = CN, Config) -> + case vcard_update:is_vcard_ldap() of + true -> + {skip, skipped_for_simplicity_for_now}; % TODO: Fix the case for LDAP as well + _ -> + escalus:init_per_testcase(CN, Config) + end; +init_per_testcase(retrieve_mam = CN, Config) -> + case pick_backend_for_mam() of + skip -> + {skip, no_supported_backends}; + Backend -> + dynamic_modules:ensure_modules(domain(), mam_required_modules(Backend)), + escalus:init_per_testcase(CN, Config) + end; +init_per_testcase(retrieve_pubsub = CN, Config) -> + dynamic_modules:ensure_modules(domain(), pubsub_required_modules()), + escalus:init_per_testcase(CN, Config); +init_per_testcase(CN, Config) -> + escalus:init_per_testcase(CN, Config). + +end_per_testcase(retrieve_vcard = CN, Config) -> + delete_files(), + escalus:end_per_testcase(CN, Config); +end_per_testcase(CN, Config) -> + escalus:end_per_testcase(CN, Config). + +inbox_required_modules() -> + [{mod_inbox, []}]. + +pick_backend_for_mam() -> + BackendsList = [ + {mam_helper:is_cassandra_enabled(domain()), cassandra}, + {mam_helper:is_riak_enabled(domain()), riak}, + {mam_helper:is_elasticsearch_enabled(domain()), elasticsearch}, + {mongoose_helper:is_rdbms_enabled(domain()), rdbms} + ], + lists:foldl(fun({true, Backend}, skip) -> + Backend; + (_, BackendOrSkip) -> + BackendOrSkip + end, skip, BackendsList). + +mam_required_modules(Backend) -> + [{mod_mam_meta, [{backend, Backend}, {pm, []}]}]. + +pubsub_required_modules() -> + [{mod_caps, []}, {mod_pubsub, [ + {backend, mongoose_helper:mnesia_or_rdbms_backend()}, + {host, "pubsub.@HOST@"}, + {nodetree, <<"tree">>}, + {plugins, [<<"flat">>, <<"pep">>]} + ] + }]. + +%% ------------------------------------------------------------- +%% Test cases +%% ------------------------------------------------------------- + +%% ------------------------- Data retrieval - per type verification ------------------------- + +retrieve_vcard(Config) -> + escalus:fresh_story(Config, [{alice, 1}], fun(Alice) -> + AliceFields = [{<<"FN">>, <<"Alice">>}, {<<"LN">>, <<"Ecila">>}], + AliceSetResultStanza + = escalus:send_and_wait(Alice, escalus_stanza:vcard_update(AliceFields)), + escalus:assert(is_iq_result, AliceSetResultStanza), + AliceU = escalus_utils:jid_to_lower(escalus_client:username(Alice)), + AliceS = escalus_utils:jid_to_lower(escalus_client:server(Alice)), + ExpectedHeader = ["jid", "vcard"], + ExpectedItems = [ + #{ "jid" => [{contains, AliceU}, + {contains, AliceS}], + "vcard" => [{contains, "Alice"}, + {contains, "Ecila"}] } + ], + retrieve_and_validate_personal_data( + Alice, Config, "vcard", ExpectedHeader, ExpectedItems) + end). + +retrieve_roster(Config) -> + escalus:fresh_story(Config, [{alice, 1}, {bob, 1}], fun(Alice, Bob) -> + escalus_story:make_all_clients_friends([Alice, Bob]), + ExpectedHeader = ["jid", "name", "groups"], % TODO + ExpectedItems = [ + #{ "jid" => escalus_client:short_jid(Bob) } + ], + retrieve_and_validate_personal_data( + Alice, Config, "roster", ExpectedHeader, ExpectedItems) + end). + +retrieve_mam(_Config) -> + ok. + +retrieve_offline(Config) -> + escalus:fresh_story(Config, [{alice, 1}, {bob, 1}], fun(Alice, Bob) -> + mongoose_helper:logout_user(Config, Alice), + Body = <<"Here's Johnny!">>, + escalus:send(Bob, escalus_stanza:chat_to(Alice, Body)), + %% Well, jid_to_lower works for any binary :) + AliceU = escalus_utils:jid_to_lower(escalus_client:username(Alice)), + AliceS = escalus_utils:jid_to_lower(escalus_client:server(Alice)), + mongoose_helper:wait_until( + fun() -> + mongoose_helper:successful_rpc(mod_offline_backend, count_offline_messages, + [AliceU, AliceS, 1]) + end, 1), + + BobJid = escalus_client:short_jid(Bob), + ExpectedHeader = ["timestamp", "from", "to", "packet"], + ExpectedItems = [ + #{ "packet" => [{contains, Body}], "from" => BobJid } + ], + retrieve_and_validate_personal_data( + Alice, Config, "offline", ExpectedHeader, ExpectedItems) + end). + +retrieve_pubsub(Config) -> + escalus:fresh_story(Config, [{alice, 1}], fun(Alice) -> + Node = pubsub_tools:pubsub_node(), + ItemId = <<"puczjorhendsindier">>, + pubsub_tools:publish(Alice, ItemId, Node, [{with_payload, true}]), + PepNS = <<"gdpr:pep">>, + PepItemId = <<"puczjorhendsap">>, + pubsub_tools:publish(Alice, PepItemId, {pep, PepNS}, []), + + ExpectedHeader = ["node_id", "item_id", "payload"], + ExpectedItems = [ + ], + retrieve_and_validate_personal_data( + Alice, Config, "pubsub", ExpectedHeader, ExpectedItems) + end). + +retrieve_private_xml(Config) -> + escalus:fresh_story(Config, [{alice, 1}], fun(Alice) -> + NS = <<"alice:gdpr:ns">>, + Content = <<"dGhlcmUgYmUgZHJhZ29ucw==">>, + XML = #xmlel{ name = <<"fingerprint">>, + attrs = [{<<"xmlns">>, NS}], + children = [#xmlcdata{ content = Content }]}, + PrivateStanza = escalus_stanza:private_set(XML), + escalus_client:send(Alice, PrivateStanza), + escalus:assert(is_iq_result, [PrivateStanza], escalus_client:wait_for_stanza(Alice)), + ExpectedHeader = ["ns", "xml"], % TODO? + ExpectedItems = [ + #{ "xml" => [{contains, "alice:gdpr:ns"}, + {contains, binary_to_list(Content)}] } + ], + retrieve_and_validate_personal_data( + Alice, Config, "private", ExpectedHeader, ExpectedItems) + end). + +retrieve_inbox(Config) -> + escalus:fresh_story(Config, [{alice, 1}, {bob, 1}], fun(Alice, Bob) -> + Body = <<"With spam?">>, + escalus:send(Bob, escalus_stanza:chat_to(Alice, Body)), + Msg = escalus:wait_for_stanza(Alice), + escalus:assert(is_chat_message, [Body], Msg), + + BobJid = escalus_client:short_jid(Bob), + ExpectedHeader = ["jid", "content", "unread_count", "msg_id", "timestamp"], + ExpectedItems = [ + #{ "content" => Body, "jid" => BobJid } + ], + retrieve_and_validate_personal_data( + Alice, Config, "inbox", ExpectedHeader, ExpectedItems) + end). + +retrieve_logs(Config) -> + escalus:fresh_story(Config, [{alice, 1}], + fun(Alice) -> + User = string:lowercase(escalus_client:username(Alice)), + Domain = string:lowercase(escalus_client:server(Alice)), + JID = string:uppercase(escalus_client:short_jid(Alice)), + MIM2Node = distributed_helper:mim2(), + mongoose_helper:successful_rpc(net_kernel, connect_node, [MIM2Node]), + mongoose_helper:successful_rpc(MIM2Node, error_logger, error_msg, + ["event=disturbance_in_the_force, jid=~s", [JID]]), + Dir = request_and_unzip_personal_data(User, Domain, Config), + Filename = filename:join(Dir, "logs-" ++ atom_to_list(MIM2Node) ++ ".txt"), + {ok, Content} = file:read_file(Filename), + {match, _} = re:run(Content, "disturbance_in_the_force") + end). + +%% ------------------------- Data retrieval - Negative case ------------------------- + +data_is_not_retrieved_for_missing_user(Config) -> + {Filename, 1} = retrieve_personal_data("non-person", "oblivion", Config), + {error, _} = file:read_file_info(Filename). + +%% ------------------------------------------------------------- +%% Internal functions +%% ------------------------------------------------------------- + +domain() -> + <<"localhost">>. % TODO: Make dynamic? + +retrieve_and_validate_personal_data(Alice, Config, FilePrefix, ExpectedHeader, ExpectedItems) -> + PersonalCSV = retrieve_and_decode_personal_data(Alice, Config, FilePrefix), + PersonalMaps = csv_to_maps(ExpectedHeader, PersonalCSV), + try validate_personal_maps(PersonalMaps, ExpectedItems) of + _ -> ok + catch + C:R -> + ct:fail(#{ + class => C, + reason => R, + stacktrace => erlang:get_stacktrace(), + personal_maps => PersonalMaps, + expected_items => ExpectedItems + }) + end. + +csv_to_maps(ExpectedHeader, [ExpectedHeader | Rows]) -> + lists:foldl(fun(Row, Maps) -> [ csv_row_to_map(ExpectedHeader, Row) | Maps ] end, [], Rows). + +csv_row_to_map(Header, Row) -> + maps:from_list(lists:zip(Header, Row)). + +validate_personal_maps(_, []) -> ok; +validate_personal_maps([Map | RMaps], [Checks | RChecks]) -> + maps:fold(fun(K, Conditions, _) -> + validate_personal_item(maps:get(K, Map), Conditions) + end, ok, Checks), + validate_personal_maps(RMaps, RChecks). + +validate_personal_item(_Value, []) -> + ok; +validate_personal_item(ExactValue, ExactValue) -> + ok; +validate_personal_item(Value, [{contains, String} | RConditions]) -> + {match, _} = re:run(Value, String), + validate_personal_item(Value, RConditions). + +retrieve_and_decode_personal_data(Client, Config, FilePrefix) -> + User = escalus_client:username(Client), + Domain = escalus_client:server(Client), + Dir = request_and_unzip_personal_data(User, Domain, Config), + CSVPath = filename:join(Dir, FilePrefix ++ ".csv"), + {ok, Content} = file:read_file(CSVPath), + % We expect non-empty list because it must contain at least header with columns names + [_ | _] = csv:decode_binary(Content). + +request_and_unzip_personal_data(User, Domain, Config) -> + {Filename, 0} = retrieve_personal_data(User, Domain, Config), + FullPath = get_mim_cwd() ++ "/" ++ Filename, + Dir = Filename ++ ".unzipped", + {ok, _} = zip:extract(FullPath, [{cwd,Dir}]), + Dir. + +retrieve_personal_data(User, Domain, Config) -> + Filename = random_filename(Config), + {_, Code} = ejabberdctl("retrieve_personal_data", [User, Domain, Filename], Config), + {Filename, Code}. + +random_filename(Config) -> + TCName = atom_to_list(?config(tc_name, Config)), + TCName ++ "." ++ integer_to_list(erlang:system_time()) ++ ".zip". + +get_mim_cwd() -> + {ok, Cwd} = rpc(mim(), file, get_cwd, []), + Cwd. + +delete_files() -> + Cwd = get_mim_cwd(), + {ok, Filenames} = rpc(mim(), file, list_dir, [Cwd]), + FilteredFilenames = lists:filter( + fun is_file_to_be_deleted/1, + Filenames), + lists:foreach( + fun(Filename) -> rpc(mim(), file, delete, [Cwd ++ "/" ++ Filename]) end, + FilteredFilenames), + ok. + +is_file_to_be_deleted(Filename) -> + DeletableRegexes = ["\.csv", "\.zip"], + lists:any( + fun(Regex) -> + re:run(Filename, Regex) =/= nomatch + end, + DeletableRegexes). diff --git a/priv/parse_logs.sh b/priv/parse_logs.sh new file mode 100755 index 00000000000..e0c24c552bc --- /dev/null +++ b/priv/parse_logs.sh @@ -0,0 +1,21 @@ +#!/usr/bin/env bash + +## Args = [FileName, Username, Domain | FileList] + +echo "$@" >$1 + +file=$1 +user=$2 +domain=$3 + +shift 3 + +pattern="${user}@${domain}" + +filelist=() + +for f in "$@"; do +filelist+=("${f}"*) +done + +grep -iF "$pattern" "${filelist[@]}" > "$file" || true diff --git a/rebar.config b/rebar.config index 318cd56231e..b03a75c1f9f 100644 --- a/rebar.config +++ b/rebar.config @@ -74,7 +74,8 @@ {amqp_client, "3.7.11-rc.1"}, {eredis, {git, "https://github.com/igors/eredis.git", {ref, "e9688a1"}}}, {gen_fsm_compat, "0.3.0"}, - {erlang_pmp, "0.1.1"} + {erlang_pmp, "0.1.1"}, + {erl_csv_generator, ".*", {git, "https://github.com/aleklisi/erl_csv_generator.git"}} ]}. {relx, [{release, { mongooseim, {cmd, "cat VERSION | tr -d '\r\n'"} }, diff --git a/rebar.lock b/rebar.lock index 6be084c60e9..99137340751 100644 --- a/rebar.lock +++ b/rebar.lock @@ -31,6 +31,10 @@ {git,"https://github.com/igors/eredis.git", {ref,"e9688a1ec98e10acca310f19266bd95f976bc262"}}, 0}, + {<<"erl_csv_generator">>, + {git,"https://github.com/aleklisi/erl_csv_generator.git", + {ref,"72a61b0f96db46bfb8905e933c8ec11e12cfd21e"}}, + 0}, {<<"erlang_pmp">>,{pkg,<<"erlang_pmp">>,<<"0.1.1">>},0}, {<<"erlcloud">>,{pkg,<<"erlcloud">>,<<"3.2.2">>},0}, {<<"exml">>, diff --git a/rel/files/mongooseim.cfg b/rel/files/mongooseim.cfg index 776146f8347..d15344b5fc0 100755 --- a/rel/files/mongooseim.cfg +++ b/rel/files/mongooseim.cfg @@ -655,7 +655,7 @@ {services, [ - {service_admin_extra, [{submods, [node, accounts, sessions, vcard, + {service_admin_extra, [{submods, [node, accounts, sessions, vcard, gdpr, roster, last, private, stanza, stats]}]} ] }. diff --git a/src/admin_extra/service_admin_extra.erl b/src/admin_extra/service_admin_extra.erl index 11491e2791e..5a3040286bc 100644 --- a/src/admin_extra/service_admin_extra.erl +++ b/src/admin_extra/service_admin_extra.erl @@ -32,7 +32,7 @@ -export([start/1, stop/0]). -define(SUBMODS, [node, accounts, sessions, vcard, roster, last, - private, stanza, stats + private, stanza, stats, gdpr %, srg %% Disabled until we add mod_shared_roster ]). diff --git a/src/admin_extra/service_admin_extra_gdpr.erl b/src/admin_extra/service_admin_extra_gdpr.erl new file mode 100644 index 00000000000..b112415d702 --- /dev/null +++ b/src/admin_extra/service_admin_extra_gdpr.erl @@ -0,0 +1,149 @@ +-module(service_admin_extra_gdpr). + +-include("ejabberd_commands.hrl"). + +-export([commands/0, + retrieve_all/3]). + +% Exported for RPC call +-export([retrieve_logs/2]). + +-define(CMD_TIMEOUT, 300000). + +-spec commands() -> [ejabberd_commands:cmd()]. +commands() -> [ + #ejabberd_commands{name = retrieve_personal_data, tags = [gdpr], + desc = "Retrieve user's presonal data.", + longdesc = "Retrieves all personal data from MongooseIM for a given user. Example:\n" + " mongooseimctl alice localhost /home/mim/alice.smith.zip ", + module = ?MODULE, + function = retrieve_all, + args = [{username, binary}, {domain, binary}, {path, binary}], + result = {res, rescode}} + + ]. + +-spec retrieve_all(jid:user(), jid:server(), Path :: binary()) -> + RetrievedFilesInZipName :: binary() | {error, Reason :: any()}. +retrieve_all(Username, Domain, ResultFilePath) -> + case user_exists(Username, Domain) of + true -> + DataFromModules = get_data_from_modules(Username, Domain), + TmpDir = make_tmp_dir(), + + CsvFiles = lists:map( + fun({DataGroup, Schema, Entries}) -> + BinDataGroup = atom_to_binary(DataGroup, utf8), + FileName = <>, + to_csv_file(FileName, Schema, Entries, TmpDir), + binary_to_list(FileName) + end, + DataFromModules), + + LogFiles = get_all_logs(Username, Domain, TmpDir), + + ZipFile = binary_to_list(ResultFilePath), + {ok, ZipFile} = zip:create(ZipFile, CsvFiles ++ LogFiles, [{cwd, TmpDir}]), + remove_tmp_dir(TmpDir), + ok; + false -> + {error, "User does not exist"} + end. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%%% Private funs +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +-spec modules_with_personal_data() -> [module()]. +modules_with_personal_data() -> + [ + mod_vcard + ]. + +-spec get_data_from_modules(jid:user(), jid:server()) -> + [{gdpr:data_group(), gdpr:schema(), gdpr:entries()}]. +get_data_from_modules(Username, Domain) -> + Modules = modules_with_personal_data(), + lists:flatmap(fun(M) -> try_get_data_from_module(M, Username, Domain) end, Modules). + +try_get_data_from_module(Module, Username, Domain) -> + try Module:get_personal_data(Username, Domain) of + [{_, _, []}] -> []; + Val -> Val + catch + _:_ -> [] + end. + +-spec to_csv_file(CsvFilename :: binary(), gdpr:schema(), gdpr:entities(), file:name()) -> ok. +to_csv_file(Filename, DataSchema, DataRows, TmpDir) -> + FilePath = <<(list_to_binary(TmpDir))/binary, "/", Filename/binary>>, + {ok, File} = file:open(FilePath, [write]), + csv_gen:row(File, DataSchema), + lists:foreach(fun(Row) -> csv_gen:row(File, Row) end, DataRows), + file:close(File). + +-spec user_exists(gdpr:username(), gdpr:domain()) -> boolean(). +user_exists(Username, Domain) -> + ejabberd_auth:is_user_exists(Username, Domain). + +-spec make_tmp_dir() -> file:name(). +make_tmp_dir() -> + TmpDirName = lists:flatten(io_lib:format("/tmp/gdpr-~4.36.0b", [rand:uniform(36#zzzz)])), + case file:make_dir(TmpDirName) of + ok -> TmpDirName; + {error, eexist} -> make_tmp_dir(); + {error, Error} -> {error, Error} + end. + +-spec remove_tmp_dir(file:name()) -> ok. +remove_tmp_dir(TmpDir) -> + {ok, FileNames} = file:list_dir(TmpDir), + [file:delete(TmpDir ++ "/" ++ File) || File <- FileNames], + file:del_dir(TmpDir). + +-type cmd() :: string() | binary(). +-spec run(cmd(), [cmd()], timeout()) -> non_neg_integer() | timeout. +run(Cmd, Args, Timeout) -> + Port = erlang:open_port({spawn_executable, Cmd}, [exit_status, {args, Args}]), + receive + {Port, {exit_status, ExitStatus}} -> ExitStatus + after Timeout -> + timeout + end. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%%% Logs retrieval +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +-spec retrieve_logs(gdpr:username(), gdpr:domain()) -> {ok, ZippedLogs :: binary()}. +retrieve_logs(Username, Domain) -> + TmpDir = make_tmp_dir(), + LogFile = get_logs(Username, Domain, TmpDir), + {ok, {_, ZippedLogs}} = zip:create("archive.zip", [LogFile], [memory, {cwd, TmpDir}]), + remove_tmp_dir(TmpDir), + {ok, ZippedLogs}. + +-spec get_all_logs(gdpr:username(), gdpr:domain(), file:name()) -> [file:name()]. +get_all_logs(Username, Domain, TmpDir) -> + OtherNodes = ejabberd_config:other_cluster_nodes(), + LogFile = get_logs(Username, Domain, TmpDir), + LogFilesFromOtherNodes = [get_logs_from_node(Node, Username, Domain, TmpDir) || Node <- OtherNodes], + [LogFile | LogFilesFromOtherNodes]. + +-spec get_logs(gdpr:username(), gdpr:domain(), file:name()) -> file:name(). +get_logs(Username, Domain, TmpDir) -> + FileList = [filename:absname(F) || F <- ejabberd_loglevel:get_log_files()], + Cmd = code:priv_dir(mongooseim) ++ "/parse_logs.sh", + FileName = "logs-" ++ atom_to_list(node()) ++ ".txt", + FilePath = TmpDir ++ "/" ++ FileName, + Args = [FilePath, Username, Domain | FileList], + 0 = run(Cmd, Args, ?CMD_TIMEOUT), + FileName. + +-spec get_logs_from_node(node(), gdpr:username(), gdpr:domain(), file:name()) -> file:name(). +get_logs_from_node(Node, Username, Domain, TmpDir) -> + {ok, ZippedData} = rpc:call(Node, ?MODULE, retrieve_logs, [Username, Domain]), + {ok, [File]} = zip:unzip(ZippedData, [{cwd, TmpDir}]), + string:prefix(File, TmpDir ++ "/"). + + diff --git a/src/ejabberd_config.erl b/src/ejabberd_config.erl index 8b885b2db38..1b6aae2957a 100644 --- a/src/ejabberd_config.erl +++ b/src/ejabberd_config.erl @@ -54,6 +54,8 @@ -export([config_state/0]). -export([config_states/0]). +-export([other_cluster_nodes/0]). + -import(mongoose_config_parser, [can_be_ignored/1]). -export([apply_reloading_change/1]). diff --git a/src/ejabberd_ctl.erl b/src/ejabberd_ctl.erl index a9171b56fe6..5cf9045d073 100644 --- a/src/ejabberd_ctl.erl +++ b/src/ejabberd_ctl.erl @@ -152,7 +152,7 @@ process(["status"]) -> {mongoose_status, MongooseStatus}, {os_pid, os:getpid()}, get_uptime(), {dist_proto, get_dist_proto()}, - {logs, get_log_files()}])]), + {logs, ejabberd_loglevel:get_log_files()}])]), case MongooseStatus of not_running -> ?STATUS_ERROR; {running, _, _Version} -> ?STATUS_SUCCESS @@ -923,17 +923,3 @@ get_dist_proto() -> _ -> "inet_tcp" end. -%%----------------------------- -%% Lager specific helpers -%%----------------------------- - -get_log_files() -> - Handlers = case catch sys:get_state(lager_event) of - {'EXIT', _} -> []; - Hs when is_list(Hs) -> Hs - end, - [ file_backend_path(State) - || {lager_file_backend, _File, State} <- Handlers ]. - -file_backend_path(LagerFileBackendState) when element(1, LagerFileBackendState) =:= state -> - element(2, LagerFileBackendState). diff --git a/src/ejabberd_loglevel.erl b/src/ejabberd_loglevel.erl index a308619c56b..ccff06a290f 100644 --- a/src/ejabberd_loglevel.erl +++ b/src/ejabberd_loglevel.erl @@ -32,18 +32,20 @@ set_custom/2, clear_custom/0, clear_custom/1]). +-export([get_log_files/0]). + -include("mongoose.hrl"). -export_type([loglevel/0]). -type loglevel() :: none | critical | error | warning | info | debug. -define(LOG_LEVELS, - [{0, none}, - {1, critical}, - {2, error}, - {3, warning}, - {4, info}, - {5, debug}]). + [{0, none}, + {1, critical}, + {2, error}, + {3, warning}, + {4, info}, + {5, debug}]). -define(ETS_TRACE_TAB, ejabberd_lager_traces). @@ -54,39 +56,39 @@ init() -> -spec get() -> [{{atom(), term()} | atom(), {non_neg_integer(), loglevel()}}]. get() -> - Backends = gen_event:which_handlers(lager_event), - [ {Backend, lists:keyfind(lager:get_loglevel(Backend), 2, ?LOG_LEVELS)} - || Backend <- Backends, Backend /= lager_backend_throttle ]. + Backends = [Backend || Sink <- lager:list_all_sinks(), Backend <- gen_event:which_handlers(Sink)], + [{Backend, lists:keyfind(lager:get_loglevel(Backend), 2, ?LOG_LEVELS)} + || Backend <- Backends, Backend /= lager_backend_throttle]. -spec set(loglevel() | integer()) -> [Result] when - Result :: { LagerBackend, ok | {error, Reason} }, - %% Yes, these are two different errors! - Reason :: bad_log_level | bad_loglevel, - LagerBackend :: lager_console_backend | {lager_file_backend, Path}, - Path :: string(). + Result :: {LagerBackend, ok | {error, Reason}}, + %% Yes, these are two different errors! + Reason :: bad_log_level | bad_loglevel, + LagerBackend :: lager_console_backend | {lager_file_backend, Path}, + Path :: string(). set(Level) when is_integer(Level) -> {_, Name} = lists:keyfind(Level, 1, ?LOG_LEVELS), set(Name); set(Level) -> - Backends = gen_event:which_handlers(lager_event), - Files = [ { B, lager:set_loglevel(lager_file_backend, File, Level) } - || B = {lager_file_backend, File} <- Backends ], - Consoles = [ { B, lager:set_loglevel(lager_console_backend, Level) } - || B = lager_console_backend <- Backends ], + Backends = [Backend || Sink <- lager:list_all_sinks(), Backend <- gen_event:which_handlers(Sink)], + Files = [{B, lager:set_loglevel(lager_file_backend, File, Level)} + || B = {lager_file_backend, File} <- Backends], + Consoles = [{B, lager:set_loglevel(lager_console_backend, Level)} + || B = lager_console_backend <- Backends], Files ++ Consoles. -spec set_custom(Module :: atom(), loglevel() | integer()) -> [Result] when - Result :: {lager_console_backend | {lager_file_backend, string()}, - ok | {error, any()}}. + Result :: {lager_console_backend | {lager_file_backend, string()}, + ok | {error, any()}}. set_custom(Module, Level) when is_integer(Level) -> {_, Name} = lists:keyfind(Level, 1, ?LOG_LEVELS), set_custom(Module, Name); set_custom(Module, Level) when is_atom(Level) -> clear_custom(Module), - Backends = gen_event:which_handlers(lager_event), - [ {Backend, set_trace(Backend, Module, Level)} - || Backend <- Backends, - Backend /= lager_backend_throttle ]. + Backends = [Backend || Sink <- lager:list_all_sinks(), Backend <- gen_event:which_handlers(Sink)], + [{Backend, set_trace(Backend, Module, Level)} + || Backend <- Backends, + Backend /= lager_backend_throttle]. set_trace(Backend, Module, Level) -> case lager:trace(Backend, [{module, Module}], Level) of @@ -111,8 +113,12 @@ clear_trace({_Module, Trace}, ok) -> clear_custom(Module) when is_atom(Module) -> case ets:lookup(?ETS_TRACE_TAB, Module) of [] -> ok; - [_|_] = Traces -> + [_ | _] = Traces -> ets:delete(?ETS_TRACE_TAB, Module), - [ lager:stop_trace(Trace) || {_, Trace} <- Traces ], + [lager:stop_trace(Trace) || {_, Trace} <- Traces], ok end. + +get_log_files() -> + [lager_util:expand_path(File) || {{lager_file_backend, File}, _, _} <- lager_config:global_get(handlers)]. + diff --git a/src/gdpr.erl b/src/gdpr.erl new file mode 100644 index 00000000000..bda66b50fe4 --- /dev/null +++ b/src/gdpr.erl @@ -0,0 +1,14 @@ +-module(gdpr). + +-export_type( + [data_group/0, + schema/0, + entries/0]). + +-type data_group() :: atom(). +-type entry() :: [string() | binary()]. +-type entries() :: [entry()]. +-type schema() :: [string()]. + +-callback get_personal_data(jid:user(), jid:server()) -> + [{data_group(), schema(), entries()}]. diff --git a/src/mod_vcard.erl b/src/mod_vcard.erl index 7eb81dafa05..72926ffd446 100644 --- a/src/mod_vcard.erl +++ b/src/mod_vcard.erl @@ -37,6 +37,7 @@ -xep([{xep, 55}, {version, "1.3"}]). -behaviour(gen_mod). -behaviour(gen_server). +-behaviour(gdpr). -include("mongoose.hrl"). -include("jlib.hrl"). @@ -73,6 +74,8 @@ -export([config_change/4]). +-export([get_personal_data/2]). + -define(PROCNAME, ejabberd_mod_vcard). -record(state, {search :: boolean(), @@ -82,9 +85,6 @@ -type error() :: error | {error, any()}. -%%-------------------------------------------------------------------- -%% backend callbacks -%%-------------------------------------------------------------------- -callback init(Host, Opts) -> ok when Host :: binary(), Opts :: list(). @@ -123,6 +123,25 @@ -optional_callbacks([tear_down/1]). +%%-------------------------------------------------------------------- +%% gdpr callback +%%-------------------------------------------------------------------- + +-spec get_personal_data(jid:user(), jid:server()) -> + [{gdpr:data_group(), gdpr:schema(), gdpr:entries()}]. +get_personal_data(Username, Server) -> + LUser = jid:nodeprep(Username), + LServer = jid:nameprep(Server), + Jid = jid:to_binary({LUser, LServer}), + Schema = ["jid", "vcard"], + Entries = case mod_vcard_backend:get_vcard(LUser, LServer) of + {ok, Record} -> + SerializedRecords = exml:to_binary(Record), + [{Jid, SerializedRecords}]; + _ -> [] + end, + [{vcard, Schema, Entries}]. + -spec default_search_fields() -> list(). default_search_fields() -> [{<<"User">>, <<"user">>}, @@ -159,6 +178,7 @@ default_host() -> %%-------------------------------------------------------------------- %% gen_mod callbacks %%-------------------------------------------------------------------- + start(VHost, Opts) -> gen_mod:start_backend_module(?MODULE, Opts, [set_vcard, get_vcard, search]), Proc = gen_mod:get_module_proc(VHost, ?PROCNAME), diff --git a/src/mod_vcard_mnesia.erl b/src/mod_vcard_mnesia.erl index 09dc9b39297..53bd9416960 100644 --- a/src/mod_vcard_mnesia.erl +++ b/src/mod_vcard_mnesia.erl @@ -15,10 +15,6 @@ -include("jlib.hrl"). -include("mod_vcard.hrl"). -%%-------------------------------------------------------------------- -%% mod_vcards callbacks -%%-------------------------------------------------------------------- - init(_VHost, _Options) -> prepare_db(), ok. diff --git a/src/mod_vcard_rdbms.erl b/src/mod_vcard_rdbms.erl index c74b9791266..80b503be7d9 100644 --- a/src/mod_vcard_rdbms.erl +++ b/src/mod_vcard_rdbms.erl @@ -41,10 +41,6 @@ -include("jlib.hrl"). -include("mod_vcard.hrl"). -%%-------------------------------------------------------------------- -%% mod_vcards callbacks -%%-------------------------------------------------------------------- - init(_VHost, _Options) -> ok. diff --git a/src/mongooseim.app.src b/src/mongooseim.app.src index 22e44dca3dc..50f146988d1 100644 --- a/src/mongooseim.app.src +++ b/src/mongooseim.app.src @@ -48,7 +48,8 @@ uuid, xmerl, worker_pool, - gen_fsm_compat + gen_fsm_compat, + erl_csv_generator ]}, {env, []}, {mod, {ejabberd_app, []}}]}. From e9694f83a24e480445a330c57085ad1dee2818b4 Mon Sep 17 00:00:00 2001 From: Piotr Nosek Date: Fri, 26 Apr 2019 08:38:00 +0200 Subject: [PATCH 2/3] Replace functions from OTP 20 in GDPR test --- big_tests/tests/gdpr_SUITE.erl | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/big_tests/tests/gdpr_SUITE.erl b/big_tests/tests/gdpr_SUITE.erl index c67b3546ce4..92f3930e926 100644 --- a/big_tests/tests/gdpr_SUITE.erl +++ b/big_tests/tests/gdpr_SUITE.erl @@ -256,9 +256,9 @@ retrieve_inbox(Config) -> retrieve_logs(Config) -> escalus:fresh_story(Config, [{alice, 1}], fun(Alice) -> - User = string:lowercase(escalus_client:username(Alice)), - Domain = string:lowercase(escalus_client:server(Alice)), - JID = string:uppercase(escalus_client:short_jid(Alice)), + User = string:to_lower(binary_to_list(escalus_client:username(Alice))), + Domain = string:to_lower(binary_to_list(escalus_client:server(Alice))), + JID = string:to_upper(binary_to_list(escalus_client:short_jid(Alice))), MIM2Node = distributed_helper:mim2(), mongoose_helper:successful_rpc(net_kernel, connect_node, [MIM2Node]), mongoose_helper:successful_rpc(MIM2Node, error_logger, error_msg, From 71635731f838cf76b272910f27bd0b3bfc90b153 Mon Sep 17 00:00:00 2001 From: Ludwik Bukowski Date: Wed, 10 Apr 2019 14:14:11 +0200 Subject: [PATCH 3/3] Implement GDPR retrieve for pubsub --- big_tests/default.spec | 130 ++++++------ big_tests/tests/gdpr_SUITE.erl | 205 +++++++++++++++++-- big_tests/tests/pep_SUITE.erl | 1 + big_tests/tests/pubsub_tools.erl | 1 + src/admin_extra/service_admin_extra_gdpr.erl | 3 +- src/gdpr.erl | 1 + src/pubsub/mod_pubsub.erl | 20 ++ src/pubsub/mod_pubsub_db.erl | 11 + src/pubsub/mod_pubsub_db_mnesia.erl | 51 ++++- src/pubsub/mod_pubsub_db_rdbms.erl | 34 ++- src/pubsub/mod_pubsub_db_rdbms_sql.erl | 46 +++++ 11 files changed, 413 insertions(+), 90 deletions(-) diff --git a/big_tests/default.spec b/big_tests/default.spec index d021bcc90da..af48a5339db 100644 --- a/big_tests/default.spec +++ b/big_tests/default.spec @@ -11,72 +11,72 @@ %% do not remove below SUITE if testing mongoose {suites, "tests", mongoose_sanity_checks_SUITE}. -{suites, "tests", rdbms_SUITE}. -{suites, "tests", race_conditions_SUITE}. -{suites, "tests", acc_e2e_SUITE}. -{suites, "tests", accounts_SUITE}. -{suites, "tests", adhoc_SUITE}. -{suites, "tests", amp_big_SUITE}. -{suites, "tests", anonymous_SUITE}. -{suites, "tests", bosh_SUITE}. -{suites, "tests", carboncopy_SUITE}. -{suites, "tests", cluster_commands_SUITE}. -{suites, "tests", conf_reload_SUITE}. -{suites, "tests", connect_SUITE}. -{suites, "tests", component_SUITE}. -{suites, "tests", disco_and_caps_SUITE}. -{suites, "tests", ejabberdctl_SUITE}. +%{suites, "tests", rdbms_SUITE}. +%{suites, "tests", race_conditions_SUITE}. +%{suites, "tests", acc_e2e_SUITE}. +%{suites, "tests", accounts_SUITE}. +%{suites, "tests", adhoc_SUITE}. +%{suites, "tests", amp_big_SUITE}. +%{suites, "tests", anonymous_SUITE}. +%{suites, "tests", bosh_SUITE}. +%{suites, "tests", carboncopy_SUITE}. +%{suites, "tests", cluster_commands_SUITE}. +%{suites, "tests", conf_reload_SUITE}. +%{suites, "tests", connect_SUITE}. +%{suites, "tests", component_SUITE}. +%{suites, "tests", disco_and_caps_SUITE}. +%{suites, "tests", ejabberdctl_SUITE}. {suites, "tests", gdpr_SUITE}. -{suites, "tests", inbox_SUITE}. -{suites, "tests", jingle_SUITE}. -{suites, "tests", last_SUITE}. -{suites, "tests", login_SUITE}. -{suites, "tests", mam_SUITE}. -{suites, "tests", metrics_api_SUITE}. -{suites, "tests", metrics_c2s_SUITE}. -{suites, "tests", metrics_roster_SUITE}. -{suites, "tests", metrics_register_SUITE}. -{suites, "tests", metrics_session_SUITE}. -{suites, "tests", mod_aws_sns_SUITE}. -{suites, "tests", mod_blocking_SUITE}. -{suites, "tests", mod_event_pusher_rabbit_SUITE}. -{suites, "tests", mod_http_notification_SUITE}. -{suites, "tests", mod_http_upload_SUITE}. -{suites, "tests", mod_ping_SUITE}. -{suites, "tests", mod_time_SUITE}. -{suites, "tests", mod_version_SUITE}. -{suites, "tests", muc_SUITE}. -{suites, "tests", muc_light_SUITE}. -{suites, "tests", muc_light_legacy_SUITE}. -{suites, "tests", muc_http_api_SUITE}. -{suites, "tests", muc_light_http_api_SUITE}. -{suites, "tests", oauth_SUITE}. -{suites, "tests", offline_SUITE}. -{suites, "tests", pep_SUITE}. -{suites, "tests", presence_SUITE}. -{suites, "tests", privacy_SUITE}. -{suites, "tests", private_SUITE}. -{suites, "tests", pubsub_SUITE}. -{suites, "tests", push_SUITE}. -{suites, "tests", push_http_SUITE}. -{suites, "tests", push_integration_SUITE}. -{suites, "tests", push_pubsub_SUITE}. -{suites, "tests", rest_SUITE}. -{suites, "tests", rest_client_SUITE}. -{suites, "tests", s2s_SUITE}. -{suites, "tests", sasl_SUITE}. -{suites, "tests", shared_roster_SUITE}. -{suites, "tests", sic_SUITE}. -{suites, "tests", sm_SUITE}. -{suites, "tests", users_api_SUITE}. -{suites, "tests", vcard_simple_SUITE}. -{suites, "tests", vcard_SUITE}. -{suites, "tests", websockets_SUITE}. -{suites, "tests", xep_0352_csi_SUITE}. -{suites, "tests", mod_global_distrib_SUITE}. -{suites, "tests", mongoose_cassandra_SUITE}. -{suites, "tests", mongoose_elasticsearch_SUITE}. -{suites, "tests", sasl_external_SUITE}. +%{suites, "tests", inbox_SUITE}. +%{suites, "tests", jingle_SUITE}. +%{suites, "tests", last_SUITE}. +%{suites, "tests", login_SUITE}. +%{suites, "tests", mam_SUITE}. +%{suites, "tests", metrics_api_SUITE}. +%{suites, "tests", metrics_c2s_SUITE}. +%{suites, "tests", metrics_roster_SUITE}. +%{suites, "tests", metrics_register_SUITE}. +%{suites, "tests", metrics_session_SUITE}. +%{suites, "tests", mod_aws_sns_SUITE}. +%{suites, "tests", mod_blocking_SUITE}. +%{suites, "tests", mod_event_pusher_rabbit_SUITE}. +%{suites, "tests", mod_http_notification_SUITE}. +%{suites, "tests", mod_http_upload_SUITE}. +%{suites, "tests", mod_ping_SUITE}. +%{suites, "tests", mod_time_SUITE}. +%{suites, "tests", mod_version_SUITE}. +%{suites, "tests", muc_SUITE}. +%{suites, "tests", muc_light_SUITE}. +%{suites, "tests", muc_light_legacy_SUITE}. +%{suites, "tests", muc_http_api_SUITE}. +%{suites, "tests", muc_light_http_api_SUITE}. +%{suites, "tests", oauth_SUITE}. +%{suites, "tests", offline_SUITE}. +%{suites, "tests", pep_SUITE}. +%{suites, "tests", presence_SUITE}. +%{suites, "tests", privacy_SUITE}. +%{suites, "tests", private_SUITE}. +%{suites, "tests", pubsub_SUITE}. +%{suites, "tests", push_SUITE}. +%{suites, "tests", push_http_SUITE}. +%{suites, "tests", push_integration_SUITE}. +%{suites, "tests", push_pubsub_SUITE}. +%{suites, "tests", rest_SUITE}. +%{suites, "tests", rest_client_SUITE}. +%{suites, "tests", s2s_SUITE}. +%{suites, "tests", sasl_SUITE}. +%{suites, "tests", shared_roster_SUITE}. +%{suites, "tests", sic_SUITE}. +%{suites, "tests", sm_SUITE}. +%{suites, "tests", users_api_SUITE}. +%{suites, "tests", vcard_simple_SUITE}. +%{suites, "tests", vcard_SUITE}. +%{suites, "tests", websockets_SUITE}. +%{suites, "tests", xep_0352_csi_SUITE}. +%{suites, "tests", mod_global_distrib_SUITE}. +%{suites, "tests", mongoose_cassandra_SUITE}. +%{suites, "tests", mongoose_elasticsearch_SUITE}. +%{suites, "tests", sasl_external_SUITE}. {config, ["test.config"]}. {logdir, "ct_report"}. diff --git a/big_tests/tests/gdpr_SUITE.erl b/big_tests/tests/gdpr_SUITE.erl index 92f3930e926..69fdc605993 100644 --- a/big_tests/tests/gdpr_SUITE.erl +++ b/big_tests/tests/gdpr_SUITE.erl @@ -16,7 +16,11 @@ retrieve_roster/1, retrieve_mam/1, retrieve_offline/1, - retrieve_pubsub/1, + retrieve_pubsub_payloads/1, + retrieve_created_pubsub_nodes/1, + retrieve_all_pubsub_data/1, + dont_retrieve_other_user_pubsub_payload/1, + retrieve_pubsub_subscriptions/1, retrieve_private_xml/1, retrieve_inbox/1, retrieve_logs/1 @@ -40,6 +44,7 @@ suite() -> all() -> [ {group, retrieve_personal_data}, + {group, retrieve_personal_data_pubsub}, {group, data_is_not_retrieved_for_missing_user} ]. @@ -51,11 +56,17 @@ groups() -> %retrieve_roster, %retrieve_mam, %retrieve_offline, - %retrieve_pubsub, %retrieve_private_xml, %retrieve_inbox, retrieve_logs ]}, + {retrieve_personal_data_pubsub, [], [ + retrieve_pubsub_payloads, + dont_retrieve_other_user_pubsub_payload, + retrieve_pubsub_subscriptions, + retrieve_created_pubsub_nodes, + retrieve_all_pubsub_data + ]}, {data_is_not_retrieved_for_missing_user, [], [data_is_not_retrieved_for_missing_user] } @@ -100,16 +111,15 @@ init_per_testcase(retrieve_mam = CN, Config) -> dynamic_modules:ensure_modules(domain(), mam_required_modules(Backend)), escalus:init_per_testcase(CN, Config) end; -init_per_testcase(retrieve_pubsub = CN, Config) -> - dynamic_modules:ensure_modules(domain(), pubsub_required_modules()), - escalus:init_per_testcase(CN, Config); init_per_testcase(CN, Config) -> + dynamic_modules:ensure_modules(domain(), pubsub_required_modules()), escalus:init_per_testcase(CN, Config). end_per_testcase(retrieve_vcard = CN, Config) -> delete_files(), escalus:end_per_testcase(CN, Config); end_per_testcase(CN, Config) -> + delete_files(), escalus:end_per_testcase(CN, Config). inbox_required_modules() -> @@ -136,7 +146,7 @@ pubsub_required_modules() -> {backend, mongoose_helper:mnesia_or_rdbms_backend()}, {host, "pubsub.@HOST@"}, {nodetree, <<"tree">>}, - {plugins, [<<"flat">>, <<"pep">>]} + {plugins, [<<"flat">>, <<"pep">>, <<"push">>]} ] }]. @@ -202,22 +212,145 @@ retrieve_offline(Config) -> Alice, Config, "offline", ExpectedHeader, ExpectedItems) end). -retrieve_pubsub(Config) -> +retrieve_pubsub_payloads(Config) -> escalus:fresh_story(Config, [{alice, 1}], fun(Alice) -> - Node = pubsub_tools:pubsub_node(), - ItemId = <<"puczjorhendsindier">>, - pubsub_tools:publish(Alice, ItemId, Node, [{with_payload, true}]), - PepNS = <<"gdpr:pep">>, - PepItemId = <<"puczjorhendsap">>, - pubsub_tools:publish(Alice, PepItemId, {pep, PepNS}, []), - - ExpectedHeader = ["node_id", "item_id", "payload"], - ExpectedItems = [ - ], - retrieve_and_validate_personal_data( - Alice, Config, "pubsub", ExpectedHeader, ExpectedItems) + Node1 = {_Domain, NodeName1} = pubsub_tools:pubsub_node(), + Node2 = {_Domain, NodeName2} = pubsub_tools:pubsub_node(), + {BinItem1, StringItem1} = item_content(<<"Item1Data">>), + {BinItem2, StringItem2} = item_content(<<"Item2Data">>), + {BinItem3, StringItem3} = item_content(<<"Item3Data">>), + {BinOther, StringOther} = item_content(<<"OtherItemData">>), + + pubsub_tools:publish(Alice, <<"Item1">>, Node1, [{with_payload, BinItem1}]), + pubsub_tools:publish(Alice, <<"Item2">>, Node1, [{with_payload, BinItem2}]), + pubsub_tools:publish(Alice, <<"Item3">>, Node1, [{with_payload, BinItem3}]), + pubsub_tools:publish(Alice, <<"OtherItem">>, Node2, [{with_payload, BinOther}]), + + ExpectedItems = [pubsub_payloads_row_map(NodeName1, "Item1", StringItem1), + pubsub_payloads_row_map(NodeName1, "Item2",StringItem2), + pubsub_payloads_row_map(NodeName1, "Item3", StringItem3), + pubsub_payloads_row_map(NodeName2, "OtherItem", StringOther)], + retrieve_and_validate_personal_data( + Alice, Config, "pubsub_payloads", ["node_name", "item_id", "payload"], ExpectedItems) + end). + +dont_retrieve_other_user_pubsub_payload(Config) -> + escalus:fresh_story(Config, [{alice, 1}, {bob, 1}], fun(Alice, Bob) -> + Node1 = {_Domain, NodeName1} = pubsub_tools:pubsub_node(), + pubsub_tools:create_node(Alice, Node1, []), + AffChange = [{Bob, <<"publish-only">>}], + + {BinItem1, StringItem1} = item_content(<<"Item1Data">>), + {BinItem2, StringItem2} = item_content(<<"Item2Data">>), + + pubsub_tools:set_affiliations(Alice, Node1, AffChange, []), + pubsub_tools:publish(Alice, <<"Item1">>, Node1, [{with_payload, {true, BinItem1}}]), + pubsub_tools:publish(Bob, <<"Item2">>, Node1, [{with_payload, {true, BinItem2}}]), + + retrieve_and_validate_personal_data( + Alice, Config, "pubsub_payloads", ["node_name", "item_id", "payload"], + [pubsub_payloads_row_map(NodeName1, "Item1", StringItem1)]), + + retrieve_and_validate_personal_data( + Bob, Config, "pubsub_payloads", ["node_name","item_id", "payload"], + [pubsub_payloads_row_map(NodeName1, "Item2", StringItem2)]), + + pubsub_tools:delete_node(Alice, Node1, []) + end). + +retrieve_created_pubsub_nodes(Config) -> + escalus:fresh_story(Config, [{alice, 1}, {bob, 1}], fun(Alice, Bob) -> + Node1 = {_Domain, NodeName1} = pubsub_tools:pubsub_node(), + Node2 = {_Domain, NodeName2} = pubsub_tools:pubsub_node(), + Node3 = {_Domain, NodeName3} = pubsub_tools:pubsub_node(), + + NodeNS = random_node_ns(), + PepNode = make_pep_node_info(Alice, NodeNS), + AccessModel = {<<"pubsub#access_model">>, <<"authorize">>}, + + pubsub_tools:create_node(Alice, Node1, []), + pubsub_tools:create_node(Alice, Node2, []), + pubsub_tools:create_node(Alice, PepNode, [{config, [AccessModel]}]), + pubsub_tools:create_node(Bob, Node3, [{type, <<"push">>}]), + + ExpectedHeader = ["node_name", "type"], + + retrieve_and_validate_personal_data( + Alice, Config, "pubsub_nodes", ExpectedHeader, + [pubsub_nodes_row_map(NodeName1, "flat"), + pubsub_nodes_row_map(NodeName2, "flat"), + pubsub_nodes_row_map(NodeNS, "pep") + ]), + + retrieve_and_validate_personal_data( + Bob, Config, "pubsub_nodes", ExpectedHeader, + [pubsub_nodes_row_map(NodeName3, "push")]), + + + Nodes = [{Alice, Node1}, {Alice, Node2}, {Alice, PepNode}, {Bob, Node3}], + [pubsub_tools:delete_node(User, Node, []) || {User, Node} <- Nodes] + end). + +retrieve_pubsub_subscriptions(Config) -> + escalus:fresh_story(Config, [{alice, 1}, {bob, 1}], fun(Alice, Bob) -> + Node = {_Domain, NodeName} = pubsub_tools:pubsub_node(), + pubsub_tools:create_node(Alice, Node, []), + pubsub_tools:subscribe(Bob, Node, []), + retrieve_and_validate_personal_data(Bob, Config, "pubsub_subscriptions", ["node_name"], + [pubsub_subscription_row_map(NodeName)]), + + pubsub_tools:delete_node(Alice, Node, []) end). +retrieve_all_pubsub_data(Config) -> + escalus:fresh_story(Config, [{alice, 1}, {bob, 1}], fun(Alice, Bob) -> + Node1 = {_Domain, NodeName1} = pubsub_tools:pubsub_node(), + Node2 = {_Domain, NodeName2} = pubsub_tools:pubsub_node(), + Node3 = {_Domain, NodeName3} = pubsub_tools:pubsub_node(), + pubsub_tools:create_node(Alice, Node1, []), + pubsub_tools:create_node(Alice, Node2, []), + pubsub_tools:create_node(Bob, Node3, []), + AffChange = [{Bob, <<"publish-only">>}], + pubsub_tools:set_affiliations(Alice, Node1, AffChange, []), + pubsub_tools:subscribe(Bob, Node2, []), + + {BinItem1, StringItem1} = item_content(<<"Item1Data">>), + {BinItem2, StringItem2} = item_content(<<"Item2Data">>), + {BinItem3, StringItem3} = item_content(<<"Item3Data">>), + + pubsub_tools:publish(Alice, <<"Item1">>, Node1, [{with_payload, {true, BinItem1}}]), + pubsub_tools:publish(Alice, <<"Item2">>, Node2, [{with_payload, {true, BinItem2}}]), + pubsub_tools:receive_item_notification(Bob, <<"Item2">>, Node2, []), + pubsub_tools:publish(Bob, <<"Item3">>, Node1, [{with_payload, {true, BinItem3}}]), + + %% Bob has one subscription, one node created and one payload sent + retrieve_and_validate_personal_data( + Bob, Config, "pubsub_subscriptions", ["node_name"], + [pubsub_subscription_row_map(NodeName2)]), + + retrieve_and_validate_personal_data( + Bob, Config, "pubsub_nodes", ["node_name", "type"], + [pubsub_nodes_row_map(NodeName3, "flat")]), + + retrieve_and_validate_personal_data( + Bob, Config, "pubsub_payloads", ["node_name", "item_id", "payload"], + [pubsub_payloads_row_map(NodeName1, "Item3", StringItem3)]), + + %% Alice has two nodes created and two payloads sent + retrieve_and_validate_personal_data( + Alice, Config, "pubsub_nodes", ["node_name", "type"], + [pubsub_nodes_row_map(NodeName1, "flat"), + pubsub_nodes_row_map(NodeName2, "flat")]), + retrieve_and_validate_personal_data( + Alice, Config, "pubsub_payloads", ["node_name", "item_id","payload"], + [pubsub_payloads_row_map(NodeName1, "Item1", StringItem1), + pubsub_payloads_row_map(NodeName2, "Item2", StringItem2)]), + + Nodes = [{Alice, Node1}, {Alice, Node2}, {Bob, Node3}], + [pubsub_tools:delete_node(User, Node, []) || {User, Node} <- Nodes] + end). + + retrieve_private_xml(Config) -> escalus:fresh_story(Config, [{alice, 1}], fun(Alice) -> NS = <<"alice:gdpr:ns">>, @@ -304,12 +437,15 @@ csv_to_maps(ExpectedHeader, [ExpectedHeader | Rows]) -> csv_row_to_map(Header, Row) -> maps:from_list(lists:zip(Header, Row)). -validate_personal_maps(_, []) -> ok; -validate_personal_maps([Map | RMaps], [Checks | RChecks]) -> +validate_personal_maps(PersonalMaps, ExpectedItems) -> + validate_sorted_personal_maps(lists:sort(PersonalMaps), lists:sort(ExpectedItems)). + +validate_sorted_personal_maps(_, []) -> ok; +validate_sorted_personal_maps([Map | RMaps], [Checks | RChecks]) -> maps:fold(fun(K, Conditions, _) -> validate_personal_item(maps:get(K, Map), Conditions) end, ok, Checks), - validate_personal_maps(RMaps, RChecks). +validate_sorted_personal_maps(RMaps, RChecks). validate_personal_item(_Value, []) -> ok; @@ -366,3 +502,28 @@ is_file_to_be_deleted(Filename) -> re:run(Filename, Regex) =/= nomatch end, DeletableRegexes). + +pubsub_payloads_row_map(Node, ItemId, Payload) -> + #{"node_name" => binary_to_list(Node), "item_id" => ItemId, "payload" => Payload}. + +pubsub_nodes_row_map(Node, Type) -> + #{"node_name" => binary_to_list(Node), "type" => Type}. + +pubsub_subscription_row_map(Node) -> + #{"node_name" => binary_to_list(Node)}. + +make_pep_node_info(Client, NodeName) -> + {escalus_utils:jid_to_lower(escalus_utils:get_short_jid(Client)), NodeName}. + +random_node_ns() -> + base64:encode(crypto:strong_rand_bytes(16)). + +item_content(Data) -> + Bin = item_content_xml(Data), + {Bin, binary_to_list(exml:to_binary(Bin))}. + +item_content_xml(Data) -> + #xmlel{name = <<"entry">>, + attrs = [{<<"xmlns">>, <<"http://www.w3.org/2005/Atom">>}], + children = [#xmlcdata{content = Data}]}. + diff --git a/big_tests/tests/pep_SUITE.erl b/big_tests/tests/pep_SUITE.erl index c3e047fc6b6..bea5e90fb3c 100644 --- a/big_tests/tests/pep_SUITE.erl +++ b/big_tests/tests/pep_SUITE.erl @@ -442,3 +442,4 @@ item_content() -> enable_sm(User) -> escalus_client:send(User, escalus_stanza:enable_sm()), #xmlel{name = <<"enabled">>} = escalus:wait_for_stanza(User). + diff --git a/big_tests/tests/pubsub_tools.erl b/big_tests/tests/pubsub_tools.erl index 5b29da149a3..4959169cc2c 100644 --- a/big_tests/tests/pubsub_tools.erl +++ b/big_tests/tests/pubsub_tools.erl @@ -160,6 +160,7 @@ publish_without_node_attr(User, ItemId, Node, Options) -> publish_request(Id, User, ItemId, Node, Options) -> case proplists:get_value(with_payload, Options, true) of true -> escalus_pubsub_stanza:publish(User, ItemId, item_content(), Id, Node); + {true, Payload} -> escalus_pubsub_stanza:publish(User, ItemId, Payload, Id, Node); false -> escalus_pubsub_stanza:publish(User, Id, Node); #xmlel{} = El -> escalus_pubsub_stanza:publish(User, ItemId, El, Id, Node) end. diff --git a/src/admin_extra/service_admin_extra_gdpr.erl b/src/admin_extra/service_admin_extra_gdpr.erl index b112415d702..af312997245 100644 --- a/src/admin_extra/service_admin_extra_gdpr.erl +++ b/src/admin_extra/service_admin_extra_gdpr.erl @@ -57,7 +57,8 @@ retrieve_all(Username, Domain, ResultFilePath) -> -spec modules_with_personal_data() -> [module()]. modules_with_personal_data() -> [ - mod_vcard + mod_vcard, + mod_pubsub ]. -spec get_data_from_modules(jid:user(), jid:server()) -> diff --git a/src/gdpr.erl b/src/gdpr.erl index bda66b50fe4..b81653d9ef6 100644 --- a/src/gdpr.erl +++ b/src/gdpr.erl @@ -12,3 +12,4 @@ -callback get_personal_data(jid:user(), jid:server()) -> [{data_group(), schema(), entries()}]. + diff --git a/src/pubsub/mod_pubsub.erl b/src/pubsub/mod_pubsub.erl index 88c53647bb2..b466000b905 100644 --- a/src/pubsub/mod_pubsub.erl +++ b/src/pubsub/mod_pubsub.erl @@ -44,6 +44,7 @@ -module(mod_pubsub). -behaviour(gen_mod). -behaviour(gen_server). +-behaviour(gdpr). -behaviour(mongoose_packet_handler). -author('christophe.romain@process-one.net'). @@ -94,6 +95,8 @@ terminate/2, code_change/3]). -export([default_host/0]). +-export([get_personal_data/2]). + %% packet handler export -export([process_packet/5]). @@ -250,6 +253,23 @@ process_packet(Acc, From, To, El, #state{server_host = ServerHost, access = Acce Packet = mongoose_acc:element(Acc2), do_route(ServerHost, Access, Plugins, To#jid.lserver, From, To, Packet). +%%==================================================================== +%% GDPR callback +%%==================================================================== + +-spec get_personal_data(Username :: jid:user(), Server :: jid:server()) -> + [{gdpr:data_group(), gdpr:schema(), gdpr:entities()}]. +get_personal_data(Username, Server) -> + LUser = jid:nodeprep(Username), + LServer = jid:nodeprep(Server), + Payloads = mod_pubsub_db_backend:get_user_payloads(LUser, LServer), + Nodes = mod_pubsub_db_backend:get_user_nodes(LUser, LServer), + Subscriptions = mod_pubsub_db_backend:get_user_subscriptions(LUser, LServer), + + [{pubsub_payloads, ["node_name", "item_id", "payload"], Payloads}, + {pubsub_nodes, ["node_name", "type"], Nodes}, + {pubsub_subscriptions, ["node_name"], Subscriptions}]. + %%==================================================================== %% gen_server callbacks %%==================================================================== diff --git a/src/pubsub/mod_pubsub_db.erl b/src/pubsub/mod_pubsub_db.erl index 9b51128e470..be6535269d0 100644 --- a/src/pubsub/mod_pubsub_db.erl +++ b/src/pubsub/mod_pubsub_db.erl @@ -174,6 +174,17 @@ -callback del_items(Nidx :: mod_pubsub:nodeIdx(), [ItemId :: mod_pubsub:itemId()]) -> ok. +%% ----------------------- GDPR-related ------------------------ + +-callback get_user_payloads(LUser :: jid:luser(), LServer :: jid:lserver()) -> + [NodeNameItemIDAndPayload :: [binary()]]. + +-callback get_user_nodes(LUser :: jid:luser(), LServer :: jid:lserver()) -> + [NodeNameAndType :: [binary()]]. + +-callback get_user_subscriptions(LUser :: jid:luser(), LServer :: jid:lserver()) -> + [NodeName :: [binary()]]. + %%==================================================================== %% API %%==================================================================== diff --git a/src/pubsub/mod_pubsub_db_mnesia.erl b/src/pubsub/mod_pubsub_db_mnesia.erl index 3af0d0da01a..0f9bf185593 100644 --- a/src/pubsub/mod_pubsub_db_mnesia.erl +++ b/src/pubsub/mod_pubsub_db_mnesia.erl @@ -57,7 +57,6 @@ ]). % Whole Items - -export([ get_items/2, get_item/2, @@ -66,6 +65,13 @@ del_items/2 ]). +% GDPR +-export([ + get_user_payloads/2, + get_user_nodes/2, + get_user_subscriptions/2 + ]). + %%==================================================================== %% Internal records definitions %%==================================================================== @@ -159,6 +165,49 @@ dirty(Fun, ErrorDebug) -> mod_pubsub_db:db_error(ReasonData, ErrorDebug, dirty_failed) end. +%% ------------------------ GDPR-related ------------------------ + +-spec get_user_payloads(LUser :: jid:luser(), LServer :: jid:lserver()) -> + [NodeNameItemIDAndPayload :: [binary()]]. +get_user_payloads(LUser, LServer) -> + {atomic, Recs} = mnesia:transaction(fun() -> get_user_payloads_t(LUser, LServer) end), + Recs. + +get_user_payloads_t(LUser, LServer) -> + BareUserMatchSpec = {'_', {LUser, LServer, '_'}}, + Items = mnesia:match_object(#pubsub_item{creation = BareUserMatchSpec, _ = '_'}), + [[node_name(Nidx), ItemId, << <<(exml:to_binary(P))/binary>> || P <- Payload >>] || + #pubsub_item{itemid = {ItemId, Nidx}, payload = Payload} <- Items]. + +-spec get_user_nodes(LUser :: jid:luser(), LServer :: jid:lserver()) -> + [NodeNameAndType :: [binary()]]. +get_user_nodes(LUser, LServer) -> + LJID = {LUser, LServer, <<>>}, + {atomic, Recs} = mnesia:transaction(fun() -> + Nodes = mnesia:match_object(#pubsub_node{owners = [LJID], _ = '_'}), + [[NodeName, Type] || #pubsub_node{nodeid = {_, NodeName}, type = Type} <- Nodes] + end), + Recs. + +-spec get_user_subscriptions(LUser :: jid:luser(), LServer :: jid:lserver()) -> + [NodeName :: [binary()]]. +get_user_subscriptions(LUser, LServer) -> + {atomic, Recs} = mnesia:transaction(fun() -> get_user_subscriptions_t(LUser, LServer) end), + Recs. + +get_user_subscriptions_t(LUser, LServer) -> + UserMatchSpec = {LUser, LServer, '_'}, + SubscriptionStates + = mnesia:match_object(#pubsub_state{stateid = {UserMatchSpec, '_'}, + subscriptions = [{subscribed, '_'}], _ = '_'}), + [ [node_name(Nidx)] || #pubsub_state{stateid = {_, Nidx}} <- SubscriptionStates]. + +node_name(Nidx) -> + case find_node_by_id(Nidx) of + {ok, #pubsub_node{ nodeid = {_, NodeName} }} -> NodeName; + _ -> <<>> + end. + %% ------------------------ Direct #pubsub_state access ------------------------ -spec get_state(Nidx :: mod_pubsub:nodeIdx(), diff --git a/src/pubsub/mod_pubsub_db_rdbms.erl b/src/pubsub/mod_pubsub_db_rdbms.erl index 395fdb71da1..ff3092f9d95 100644 --- a/src/pubsub/mod_pubsub_db_rdbms.erl +++ b/src/pubsub/mod_pubsub_db_rdbms.erl @@ -65,6 +65,13 @@ del_items/2 ]). +%% GDPR related +-export([ + get_user_payloads/2, + get_user_nodes/2, + get_user_subscriptions/2 + ]). + % For SQL queries -export([aff2int/1, sub2int/1]). @@ -117,7 +124,6 @@ dirty(Fun, ErrorDebug) -> end. %% ------------------------ Direct #pubsub_state access ------------------------ - %% TODO: Functions for direct #pubsub_access are currently inefficient for RDBMS %% - refactor them or remove as many of them as possible from the API at some point -spec get_state(Nidx :: mod_pubsub:nodeIdx(), @@ -579,6 +585,31 @@ remove_all_items(Nidx) -> {updated, _} = mongoose_rdbms:sql_query_t(SQL), ok. +% ------------------- GDPR-related -------------------------------- + +get_user_payloads(LUser, LServer) -> + SQL = mod_pubsub_db_rdbms_sql:get_user_items(LUser, LServer), + case mongoose_rdbms:sql_query(global, SQL) of + {selected, Items} -> + [[NodeName, ItemId, strip_payload(PayloadDB)] || {NodeName, ItemId, PayloadDB} <- Items] + end. + +get_user_nodes(LUser, LServer) -> + LJID = jid:to_binary({LUser, LServer, <<>>}), + SQL = mod_pubsub_db_rdbms_sql:select_nodes_by_owner(LJID), + {selected, Nodes} = mongoose_rdbms:sql_query(global, SQL), + lists:map(fun tuple_to_list/1, Nodes). + +get_user_subscriptions(LUser, LServer) -> + SQL = mod_pubsub_db_rdbms_sql:get_user_subscriptions(LUser, LServer), + {selected, Nodes} = mongoose_rdbms:sql_query(global, SQL), + lists:map(fun tuple_to_list/1, Nodes). + +strip_payload(PayloadDB) -> + PayloadXML = mongoose_rdbms:unescape_binary(global, PayloadDB), + {ok, #xmlel{children = Payload}} = exml:parse(PayloadXML), + exml:to_binary(Payload). + %%==================================================================== %% Helpers %%==================================================================== @@ -741,3 +772,4 @@ key_to_existing_atom({Key, Value}) when is_atom(Key)-> {Key, Value}; key_to_existing_atom({Key, Value}) -> {binary_to_existing_atom(Key, utf8), Value}. + diff --git a/src/pubsub/mod_pubsub_db_rdbms_sql.erl b/src/pubsub/mod_pubsub_db_rdbms_sql.erl index 5a88fff7538..5508ad9a730 100644 --- a/src/pubsub/mod_pubsub_db_rdbms_sql.erl +++ b/src/pubsub/mod_pubsub_db_rdbms_sql.erl @@ -66,6 +66,11 @@ set_parents/2, del_parents/1]). +% GDPR +-export([get_user_items/2, + select_nodes_by_owner/1, + get_user_subscriptions/2]). + %%==================================================================== %% SQL queries %%==================================================================== @@ -325,6 +330,24 @@ get_entity_items(Nidx, LU, LS) -> " AND created_luser = ", esc_string(LU), " AND created_lserver = ", esc_string(LS) ]. +-spec get_user_items(LU :: jid:luser(), LS :: jid:lserver()) -> iolist(). +get_user_items(LU, LS) -> + ["SELECT name, itemid, payload" + " FROM pubsub_items" + " INNER JOIN pubsub_nodes" + " ON pubsub_items.nidx = pubsub_nodes.nidx" + " WHERE created_luser = ", esc_string(LU), + " AND created_lserver = ", esc_string(LS) ]. + +-spec get_user_subscriptions(LU :: jid:luser(), LS :: jid:lserver()) -> iolist(). +get_user_subscriptions(LU, LS) -> + ["SELECT name" + " FROM pubsub_subscriptions" + " INNER JOIN pubsub_nodes" + " ON pubsub_subscriptions.nidx = pubsub_nodes.nidx" + " WHERE luser = ", esc_string(LU), + " AND lserver = ", esc_string(LS) ]. + -spec delete_item(Nidx :: mod_pubsub:nodeIdx(), LU :: jid:luser(), LS :: jid:lserver(), @@ -424,6 +447,29 @@ select_nodes_by_key(Key) -> ["SELECT ", pubsub_node_fields(), " from pubsub_nodes " "WHERE p_key = ", esc_string(Key)]. +-spec select_nodes_by_owner(LJID :: binary()) -> iolist(). +select_nodes_by_owner(LJID) -> + %% TODO I wrote that code in tears in my eyes. Its super inefficient, + %% there should be separate table for many-to-many relation and index + case {mongoose_rdbms:db_engine(global), mongoose_rdbms_type:get()} of + {mysql, _} -> + ["SELECT name, type" + " FROM pubsub_nodes" + " WHERE owners = convert(", esc_string(iolist_to_binary(["[\"", LJID, "\"]"])), ", JSON);" + ]; + {pgsql, _} -> + ["SELECT name, type" + " FROM pubsub_nodes" + " WHERE owners ::json->>0 like ", esc_string(LJID), + " AND JSON_ARRAY_LENGTH(owners) = 1" + ]; + {odbc, mssql} -> + ["SELECT name, type" + " FROM pubsub_nodes" + " WHERE cast(owners as varchar) = ", esc_string(iolist_to_binary(["[\"", LJID, "\"]"])) + ] + end. + -spec select_nodes_in_list_with_key(Key :: binary(), Nodes :: [binary()]) -> iolist(). select_nodes_in_list_with_key(Key, Nodes) -> EscapedNames = [esc_string(Node) || Node <- Nodes],