Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Adding gdpr resolver #3711

Merged
merged 3 commits into from
Jul 20, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions big_tests/default.spec
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@
{suites, "tests", graphql_session_SUITE}.
{suites, "tests", graphql_stanza_SUITE}.
{suites, "tests", graphql_stats_SUITE}.
{suites, "tests", graphql_gdpr_SUITE}.
{suites, "tests", graphql_vcard_SUITE}.
{suites, "tests", graphql_http_upload_SUITE}.
{suites, "tests", graphql_metric_SUITE}.
Expand Down
1 change: 1 addition & 0 deletions big_tests/dynamic_domains.spec
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@
{suites, "tests", graphql_vcard_SUITE}.
{suites, "tests", graphql_offline_SUITE}.
{suites, "tests", graphql_stats_SUITE}.
{suites, "tests", graphql_gdpr_SUITE}.
{suites, "tests", graphql_http_upload_SUITE}.
{suites, "tests", graphql_metric_SUITE}.

Expand Down
2 changes: 1 addition & 1 deletion big_tests/tests/gdpr_SUITE.erl
Original file line number Diff line number Diff line change
Expand Up @@ -1583,7 +1583,7 @@ get_personal_data_via_rpc(Client, ExpectedKeys) ->
ClientU = escalus_utils:jid_to_lower(escalus_client:username(Client)),
ClientS = escalus_utils:jid_to_lower(escalus_client:server(Client)),
AllPersonalData = mongoose_helper:successful_rpc(
service_admin_extra_gdpr, get_data_from_modules, [ClientU, ClientS]),
gdpr_api, get_data_from_modules, [ClientU, ClientS]),
%% We don't use lists:filter/2 because this line also ensures order
[ lists:keyfind(Key, 1, AllPersonalData) || Key <- ExpectedKeys ].

Expand Down
96 changes: 96 additions & 0 deletions big_tests/tests/graphql_gdpr_SUITE.erl
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
-module(graphql_gdpr_SUITE).

-compile([export_all, nowarn_export_all]).

-import(distributed_helper, [require_rpc_nodes/1]).
-import(domain_helper, [host_type/0, domain/0]).
-import(graphql_helper, [execute_user/3, execute_auth/2, user_to_bin/1]).
-import(distributed_helper, [mim/0, rpc/4]).

-include_lib("common_test/include/ct.hrl").
-include_lib("eunit/include/eunit.hrl").

suite() ->
require_rpc_nodes([mim]) ++ escalus:suite().

all() ->
[{group, admin_gdpr}].

groups() ->
[{admin_gdpr, [], admin_stats_handler()}].

admin_stats_handler() ->
[admin_gdpr_test,
admin_gdpr_no_user_test].

init_per_suite(Config) ->
escalus:init_per_suite(Config).

end_per_suite(Config) ->
escalus:end_per_suite(Config).

init_per_group(_, Config) ->
graphql_helper:init_admin_handler(Config).

end_per_group(_, _Config) ->
escalus_fresh:clean().

init_per_testcase(CaseName, Config) ->
escalus:init_per_testcase(CaseName, Config).

end_per_testcase(CaseName, Config) ->
escalus_fresh:clean(),
escalus:end_per_testcase(CaseName, Config).

% Admin test cases

admin_gdpr_test(Config) ->
escalus:fresh_story_with_config(Config, [{alice, 1}], fun admin_gdpr_test/2).

admin_gdpr_test(Config, Alice) ->
Filename = random_filename(Config),
Vars = #{<<"username">> => escalus_client:username(Alice),
<<"domain">> => escalus_client:server(Alice),
<<"resultFilepath">> => list_to_binary(Filename)},
Result = admin_retrieve_personal_data(Config, Vars),
ParsedResult = ok_result(<<"gdpr">>, <<"retrievePersonalData">>, Result),
?assertEqual(<<"Data retrieved">>, ParsedResult),
FullPath = get_mim_cwd() ++ "/" ++ Filename,
Dir = make_dir_name(Filename, escalus_client:username(Alice)),
ct:log("extracting logs ~s", [Dir]),
?assertMatch({ok, _}, zip:extract(FullPath, [{cwd, Dir}])).

admin_gdpr_no_user_test(Config) ->
Vars = #{<<"username">> => <<"AAAA">>, <<"domain">> => domain(),
<<"resultFilepath">> => <<"AAA">>},
Result = admin_retrieve_personal_data(Config, Vars),
ParsedResult = error_result(<<"extensions">>, <<"code">>, Result),
?assertEqual(<<"user_does_not_exist_error">>, ParsedResult).

% Helpers

admin_retrieve_personal_data(Config, Vars) ->
Query = <<"query Q1($username: String!, $domain: String!, $resultFilepath: String!)
{gdpr{retrievePersonalData(username: $username, domain: $domain,
resultFilepath: $resultFilepath)}}">>,
Body = #{query => Query, operationName => <<"Q1">>, variables => Vars},
execute_auth(Body, Config).

error_result(What1, What2, {{<<"200">>, <<"OK">>}, #{<<"errors">> := [Data]}}) ->
maps:get(What2, maps:get(What1, Data)).

ok_result(What1, What2, {{<<"200">>, <<"OK">>}, #{<<"data">> := Data}}) ->
maps:get(What2, maps:get(What1, Data)).

random_filename(Config) ->
TCName = atom_to_list(?config(tc_name, Config)),
TCName ++ "." ++ integer_to_list(erlang:system_time()) ++ ".zip".

get_mim_cwd() ->
{ok, Cwd} = rpc(mim(), file, get_cwd, []),
Cwd.

make_dir_name(Filename, User) when is_binary(User) ->
make_dir_name(Filename, binary_to_list(User));
make_dir_name(Filename, User) when is_list(User) ->
Filename ++ "." ++ User ++ ".unzipped".
2 changes: 2 additions & 0 deletions priv/graphql/schemas/admin/admin_schema.gql
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,8 @@ type AdminQuery{
metric: MetricAdminQuery
"Statistics"
stats: StatsAdminQuery
"Personal data management according to GDPR"
gdpr: GdprAdminQuery
}

"""
Expand Down
5 changes: 5 additions & 0 deletions priv/graphql/schemas/admin/gdpr.gql
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
"Retrieve user's presonal data"
type GdprAdminQuery {
"Retrieves all personal data from MongooseIM for a given user"
retrievePersonalData(username: String!, domain: String!, resultFilepath: String!): String
}
129 changes: 3 additions & 126 deletions src/admin_extra/service_admin_extra_gdpr.erl
Original file line number Diff line number Diff line change
Expand Up @@ -4,140 +4,17 @@
-include("mongoose_logger.hrl").
-include("jlib.hrl").

-export([commands/0, retrieve_all/3]).

% Exported for RPC call
-export([retrieve_logs/2, get_data_from_modules/2]).

-ignore_xref([commands/0, retrieve_all/3, retrieve_logs/2, get_data_from_modules/2]).

-define(CMD_TIMEOUT, 300000).
-export([commands/0]).
-ignore_xref([commands/0]).

-spec commands() -> [ejabberd_commands:cmd()].
commands() -> [
#ejabberd_commands{name = retrieve_personal_data, tags = [gdpr],
desc = "Retrieve user's presonal data.",
longdesc = "Retrieves all personal data from MongooseIM for a given user. Example:\n"
" mongooseimctl retrieve_personal_data alice localhost /home/mim/alice.smith.zip ",
module = ?MODULE,
module = gdpr_api,
function = retrieve_all,
args = [{username, binary}, {domain, binary}, {path, binary}],
result = {res, rescode}}
].

-spec retrieve_all(jid:user(), jid:server(), Path :: binary()) -> ok | {error, Reason :: any()}.
retrieve_all(Username, Domain, ResultFilePath) ->
JID = jid:make(Username, Domain, <<>>),
case user_exists(JID) of
true ->
DataFromModules = get_data_from_modules(JID),
% The contract is that we create personal data files only when there are any items
% returned for the data group.
DataToWrite = lists:filter(fun({_, _, Items}) -> Items /= [] end, DataFromModules),

TmpDir = make_tmp_dir(),

CsvFiles = lists:map(
fun({DataGroup, Schema, Entries}) ->
BinDataGroup = atom_to_binary(DataGroup, utf8),
FileName = <<BinDataGroup/binary, ".csv">>,
to_csv_file(FileName, Schema, Entries, TmpDir),
binary_to_list(FileName)
end,
DataToWrite),

LogFiles = get_all_logs(Username, Domain, TmpDir),

ZipFile = binary_to_list(ResultFilePath),
{ok, ZipFile} = zip:create(ZipFile, CsvFiles ++ LogFiles, [{cwd, TmpDir}]),
remove_tmp_dir(TmpDir),
ok;
false ->
{error, "User does not exist"}
end.

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%% Private funs
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

-spec get_data_from_modules(jid:user(), jid:server()) -> gdpr:personal_data().
get_data_from_modules(Username, Domain) ->
JID = jid:make(Username, Domain, <<>>),
get_data_from_modules(JID).

-spec get_data_from_modules(jid:jid()) -> gdpr:personal_data().
get_data_from_modules(JID) ->
{ok, HostType} = mongoose_domain_api:get_domain_host_type(JID#jid.lserver),
mongoose_hooks:get_personal_data(HostType, JID).

-spec to_csv_file(file:name_all(), gdpr:schema(), gdpr:entries(), file:name()) -> ok.
to_csv_file(Filename, DataSchema, DataRows, TmpDir) ->
FilePath = <<(list_to_binary(TmpDir))/binary, "/", Filename/binary>>,
{ok, File} = file:open(FilePath, [write]),
Encoded = erl_csv:encode([DataSchema | DataRows]),
file:write(File, Encoded),
file:close(File).

-spec user_exists(jid:jid()) -> boolean().
user_exists(JID) ->
ejabberd_auth:does_user_exist(JID).

-spec make_tmp_dir() -> file:name().
make_tmp_dir() ->
TmpDirName = lists:flatten(io_lib:format("/tmp/gdpr-~4.36.0b", [rand:uniform(36#zzzz)])),
case file:make_dir(TmpDirName) of
ok -> TmpDirName;
{error, eexist} -> make_tmp_dir();
{error, Error} -> {error, Error}
end.

-spec remove_tmp_dir(file:name()) -> ok.
remove_tmp_dir(TmpDir) ->
{ok, FileNames} = file:list_dir(TmpDir),
[file:delete(TmpDir ++ "/" ++ File) || File <- FileNames],
file:del_dir(TmpDir).

-type cmd() :: string() | binary().
-spec run(cmd(), [cmd()], timeout()) -> non_neg_integer() | timeout.
run(Cmd, Args, Timeout) ->
Port = erlang:open_port({spawn_executable, Cmd}, [exit_status, {args, Args}]),
receive
{Port, {exit_status, ExitStatus}} -> ExitStatus
after Timeout ->
timeout
end.

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%% Logs retrieval
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

-spec retrieve_logs(gdpr:username(), gdpr:domain()) -> {ok, ZippedLogs :: binary()}.
retrieve_logs(Username, Domain) ->
TmpDir = make_tmp_dir(),
LogFile = get_logs(Username, Domain, TmpDir),
{ok, {_, ZippedLogs}} = zip:create("archive.zip", [LogFile], [memory, {cwd, TmpDir}]),
remove_tmp_dir(TmpDir),
{ok, ZippedLogs}.

-spec get_all_logs(gdpr:username(), gdpr:domain(), file:name()) -> [file:name()].
get_all_logs(Username, Domain, TmpDir) ->
OtherNodes = mongoose_cluster:other_cluster_nodes(),
LogFile = get_logs(Username, Domain, TmpDir),
LogFilesFromOtherNodes = [get_logs_from_node(Node, Username, Domain, TmpDir) || Node <- OtherNodes],
[LogFile | LogFilesFromOtherNodes].

-spec get_logs(gdpr:username(), gdpr:domain(), file:name()) -> file:name().
get_logs(Username, Domain, TmpDir) ->
FileList = [filename:absname(F) || F <- mongoose_logs:get_log_files()],
Cmd = code:priv_dir(mongooseim) ++ "/parse_logs.sh",
FileName = "logs-" ++ atom_to_list(node()) ++ ".txt",
FilePath = TmpDir ++ "/" ++ FileName,
Args = [FilePath, Username, Domain | FileList],
0 = run(Cmd, Args, ?CMD_TIMEOUT),
FileName.

-spec get_logs_from_node(node(), gdpr:username(), gdpr:domain(), file:name()) -> file:name().
get_logs_from_node(Node, Username, Domain, TmpDir) ->
{ok, ZippedData} = rpc:call(Node, ?MODULE, retrieve_logs, [Username, Domain]),
{ok, [File]} = zip:unzip(ZippedData, [{cwd, TmpDir}]),
filename:basename(File).
Loading