Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add GDPR removal for pubsub (alternative) #2349

Merged
merged 2 commits into from
Jun 26, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
283 changes: 255 additions & 28 deletions big_tests/tests/gdpr_SUITE.erl

Large diffs are not rendered by default.

22 changes: 19 additions & 3 deletions big_tests/tests/pubsub_tools.erl
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
-include_lib("eunit/include/eunit.hrl").
%% Send request, receive (optional) response
-export([pubsub_node/0,
pubsub_node/1,
domain/0,
node_addr/0,
rand_name/1,
Expand Down Expand Up @@ -50,7 +51,10 @@
submit_subscription_response/5,
get_pending_subscriptions/3,
get_pending_subscriptions/4,
modify_node_subscriptions/4
modify_node_subscriptions/4,

create_node_names/1,
create_nodes/1
]).

%% Receive notification or response
Expand Down Expand Up @@ -636,8 +640,11 @@ decode_affiliations(IQResult) ->

[ {exml_query:attr(F, <<"jid">>), exml_query:attr(F, <<"affiliation">>)} || F <- Fields ].

pubsub_node() ->
{node_addr(), pubsub_node_name()}.
pubsub_node() -> pubsub_node(1).
pubsub_node(Num) ->
{pubsub_tools:node_addr(), <<"node_",
(integer_to_binary(Num))/binary, "_",
(base64:encode(crypto:strong_rand_bytes(6)))/binary>>}.

domain() ->
ct:get_config({hosts, mim, domain}).
Expand All @@ -661,3 +668,12 @@ encode_group_name(BaseName, NodeTree) ->
decode_group_name(ComplexName) ->
[NodeTree, BaseName] = binary:split(atom_to_binary(ComplexName, utf8), <<"+">>),
#{node_tree => NodeTree, base_name => binary_to_atom(BaseName, utf8)}.

create_node_names(Count) ->
[pubsub_node(N) || N <- lists:seq(1, Count)].

create_nodes(List) ->
lists:map(fun({User, Node, Opts}) ->
pubsub_tools:create_node(User, Node, Opts)
end, List).

8 changes: 7 additions & 1 deletion src/pubsub/gen_pubsub_node.erl
Original file line number Diff line number Diff line change
Expand Up @@ -199,6 +199,10 @@

-callback path_to_node(Path :: [nodeId()]) -> nodeId().

-callback should_delete_when_owner_removed() -> boolean().

-callback remove_user(LUser :: jid:luser(), LServer :: jid:lserver(), Backend :: module()) -> any().

-optional_callbacks([create_node_permission/6,
create_node/2,
delete_node/1,
Expand All @@ -222,7 +226,9 @@
get_item/2,
set_item/1,
get_item_name/3,
path_to_node/1]).
path_to_node/1,
should_delete_when_owner_removed/0,
remove_user/3]).

%% --------------------------------------------------------
%% API
Expand Down
53 changes: 25 additions & 28 deletions src/pubsub/mod_pubsub.erl
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@
-export([subscription_to_string/1, affiliation_to_string/1,
string_to_subscription/1, string_to_affiliation/1,
extended_error/2, extended_error/3, service_jid/1,
tree/1, tree/2, plugin/2, plugins/1, config/3,
tree/1, tree/2, plugin/2, plugin/1, plugins/1, plugin_call/3, config/3,
host/1, serverhost/1]).

%% API and gen_server callbacks
Expand Down Expand Up @@ -879,35 +879,26 @@ remove_user(Acc, User, Server) ->
remove_user(User, Server) ->
LUser = jid:nodeprep(User),
LServer = jid:nameprep(Server),
Entity = jid:make(LUser, LServer, <<>>),
Host = host(LServer),
HomeTreeBase = <<"/home/", LServer/binary, "/", LUser/binary>>,
spawn(fun() -> lists:foreach(
fun(PType) ->
catch remove_user_per_plugin(PType, Host, Entity, HomeTreeBase)
end, plugins(Host)) end),

BackendModules = mongoose_lib:find_behaviour_implementations(mod_pubsub_db),
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

And this just makes me think of one thing: why did we spawn a different process and return ok immediately before? Was it because of some timeout? Do we want to receive a notification for failure or so?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Original pubsub's remove_user implementation probably assumed that the operator doesn't care if the removal action actually finished when the function exits. Currently all GDPR ops we've added are synchronous, even for MAM.

PluginModules = mongoose_lib:find_behaviour_implementations(gen_pubsub_node),
% The line below will currently lead to unnecessary DB selects,
% because plugins don't really filter DB data by type.
% TODO: This should be optimised during GDPR load tests.
[ remove_user_per_backend_and_plugin_safe(LUser, LServer, Plugin, Backend)
|| Plugin <- PluginModules, Backend <- BackendModules ],
ok.

remove_user_per_plugin(PType, Host, Entity, HomeTreeBase) ->
{result, Subs} = node_action(Host, PType, get_entity_subscriptions, [Host, Entity]),
lists:foreach(fun({#pubsub_node{id = Nidx}, _, _, JID}) ->
node_action(Host, PType, unsubscribe_node, [Nidx, Entity, JID, all]);
(_) ->
ok
end, Subs),
{result, Affs} = node_action(Host, PType, get_entity_affiliations, [Host, Entity]),
lists:foreach(fun({#pubsub_node{nodeid = {H, N}, parents = []}, owner}) ->
delete_node(H, N, Entity);
({#pubsub_node{nodeid = {H, N}, type = Type}, owner})
when N == HomeTreeBase, Type == <<"hometree">> ->
delete_node(H, N, Entity);
({#pubsub_node{id = Nidx}, publisher}) ->
node_action(Host, PType,
set_affiliation,
[Nidx, Entity, none]);
(_) ->
ok
end, Affs).
remove_user_per_backend_and_plugin_safe(LUser, LServer, Plugin, Backend) ->
try
plugin_call(Plugin, remove_user, [LUser, LServer, Backend])
catch
Class:Reason ->
StackTrace = erlang:get_stacktrace(),
?WARNING_MSG("event=cannot_delete_pubsub_user,"
"luser=~s,lserver=~s,backend=~p,class=~p,reason=~p,stacktrace=~p",
[LUser, LServer, Backend, Class, Reason, StackTrace])
end.

handle_call(server_host, _From, State) ->
{reply, State#state.server_host, State};
Expand Down Expand Up @@ -4174,6 +4165,9 @@ tree(_Host, Name) ->
binary_to_atom(<<"nodetree_", Name/binary>>, utf8).

plugin(_Host, Name) ->
plugin(Name).

plugin(Name) ->
binary_to_atom(<<"node_", Name/binary>>, utf8).

plugins(Host) ->
Expand Down Expand Up @@ -4282,6 +4276,9 @@ tree_action(Host, Function, Args) ->
node_call(Host, Type, Function, Args) ->
?DEBUG("node_call ~p ~p ~p", [Type, Function, Args]),
PluginModule = plugin(Host, Type),
plugin_call(PluginModule, Function, Args).

plugin_call(PluginModule, Function, Args) ->
CallModule = maybe_default_node(PluginModule, Function, Args),
case apply(CallModule, Function, Args) of
{result, Result} ->
Expand Down
17 changes: 15 additions & 2 deletions src/pubsub/mod_pubsub_db.erl
Original file line number Diff line number Diff line change
Expand Up @@ -185,14 +185,22 @@
-callback get_user_subscriptions(LUser :: jid:luser(), LServer :: jid:lserver()) ->
[NodeName :: [binary()]].

-callback find_nodes_by_affiliated_user(JID :: jid:ljid()) ->
[{mod_pubsub:pubsubNode(), mod_pubsub:affiliation()}].

-callback delete_user_subscriptions(JID :: jid:ljid()) ->
ok.

%%====================================================================
%% API
%%====================================================================

-spec db_error(ReasonData :: map(), ErrorDebug :: map(), Event :: any()) ->
% ReasonData may either be a debug map provided by mod_pubsub
% or some other term if the crash is serious enough to lose the debug map somewhere.
-spec db_error(ReasonData :: map() | any(), ErrorDebug :: map(), Event :: any()) ->
{error, Details :: map()}.
db_error(ReasonData, ErrorDebug, Event) ->
{error, maps:merge(ErrorDebug#{ event => Event }, ReasonData)}.
{error, maps:merge(ErrorDebug#{ event => Event }, sanitize_reason(ReasonData))}.

%% transaction and sync_dirty return very truncated error data so we add extra
%% try to gather stack trace etc.
Expand All @@ -214,3 +222,8 @@ extra_debug_fun(Fun) ->
%% Internal functions
%%====================================================================

sanitize_reason(Map) when is_map(Map) ->
Map;
sanitize_reason(Other) ->
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

How is that possible? The spec above says ReasonData :: map(), so this either is a dead code or should be a dead code. Either way something is not right (spec or this clause).
On different topic, don't we prefer to simply match on map in db_error and crash if something weird is passed?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For disabled backends pool_not_started is returned or similar, instead of map. This was a quick fix I needed for debugging but indeed maybe it can be done better.

#{ unexpected_reason => Other }.

42 changes: 33 additions & 9 deletions src/pubsub/mod_pubsub_db_mnesia.erl
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,9 @@
-export([
get_user_payloads/2,
get_user_nodes/2,
get_user_subscriptions/2
get_user_subscriptions/2,
delete_user_subscriptions/1,
find_nodes_by_affiliated_user/1
]).

%%====================================================================
Expand Down Expand Up @@ -208,6 +210,15 @@ node_name(Nidx) ->
_ -> <<>>
end.

-spec find_nodes_by_affiliated_user(JID :: jid:ljid()) ->
[{mod_pubsub:pubsubNode(), mod_pubsub:affiliation()}].
find_nodes_by_affiliated_user(LJID) ->
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do we really need this function on such low-level module? Isn't there a plugin API that would do the same?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I actually think that such function fits better in DB layer because it should be a simple data retrieval instead the processing done in node_flat, which is actually a workaround for strange Mnesia data model.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I wasn't suggesting to move it, I was just asking if there is similar functionality already there :)

{ok, States} = get_states_by_lus(LJID),
lists:map(fun(#pubsub_state{ stateid = {_, Nidx}, affiliation = Aff }) ->
{ok, Node} = find_node_by_id(Nidx),
{Node, Aff}
end, States).

%% ------------------------ Direct #pubsub_state access ------------------------

-spec get_state(Nidx :: mod_pubsub:nodeIdx(),
Expand Down Expand Up @@ -269,7 +280,7 @@ del_node(Nidx) ->
{ok, States} = get_states(Nidx),
lists:foreach(fun (#pubsub_state{stateid = {LJID, _}, items = Items}) ->
del_items(Nidx, Items),
del_state(Nidx, LJID)
del_state_by_idx_and_ljid(Nidx, LJID)
end, States),
{ok, States}.

Expand Down Expand Up @@ -458,7 +469,7 @@ set_affiliation(Nidx, LJID, Affiliation) ->
BareLJID = jid:to_bare(LJID),
{ok, State} = get_state(Nidx, BareLJID, write),
case {Affiliation, State#pubsub_state.subscriptions} of
{none, []} -> del_state(Nidx, BareLJID);
{none, []} -> del_state_by_idx_and_ljid(Nidx, BareLJID);
_ -> mnesia:write(State#pubsub_state{ affiliation = Affiliation })
end.

Expand Down Expand Up @@ -517,7 +528,7 @@ delete_subscription(Nidx, LJID, SubId) ->
NewSubs = lists:keydelete(SubId, 2, State#pubsub_state.subscriptions),
mnesia:delete({pubsub_subscription, SubId}),
case {State#pubsub_state.affiliation, NewSubs} of
{none, []} -> del_state(Nidx, LJID);
{none, []} -> del_state_by_idx_and_ljid(Nidx, LJID);
_ -> mnesia:write(State#pubsub_state{subscriptions = NewSubs})
end.

Expand All @@ -527,10 +538,19 @@ delete_subscription(Nidx, LJID, SubId) ->
ok.
delete_all_subscriptions(Nidx, LJID) ->
{ok, State} = get_state(Nidx, LJID, write),
delete_all_subscriptions_by_state(State).

-spec delete_user_subscriptions(jid:ljid()) -> ok.
delete_user_subscriptions(LJID) ->
{ok, States} = get_states_by_lus(LJID),
lists:foreach(fun delete_all_subscriptions_by_state/1, States).

-spec delete_all_subscriptions_by_state(mod_pubsub:pubsubState()) -> ok.
delete_all_subscriptions_by_state(State) ->
lists:foreach(fun({_, SubId}) -> mnesia:delete({pubsub_subscription, SubId}) end,
State#pubsub_state.subscriptions),
case State#pubsub_state.affiliation of
none -> del_state(Nidx, LJID);
none -> del_state(State);
_ -> mnesia:write(State#pubsub_state{subscriptions = []})
end.

Expand Down Expand Up @@ -623,10 +643,14 @@ del_items(Nidx, ItemIds) ->
%% Internal functions
%%====================================================================

-spec del_state(Nidx :: mod_pubsub:nodeIdx(),
LJID :: jid:ljid()) -> ok.
del_state(Nidx, LJID) ->
{ok, #pubsub_state{ subscriptions = Subs }} = get_state(Nidx, LJID, write),
-spec del_state_by_idx_and_ljid(Nidx :: mod_pubsub:nodeIdx(),
LJID :: jid:ljid()) -> ok.
del_state_by_idx_and_ljid(Nidx, LJID) ->
{ok, State} = get_state(Nidx, LJID, write),
del_state(State).

-spec del_state(mod_pubsub:pubsubState()) -> ok.
del_state(#pubsub_state{ stateid = {LJID, Nidx}, subscriptions = Subs }) ->
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't really like the fact that this function behaves differently based on type of the argument. I know that arity is different, but the name is the same, so I would expect this function to do the same thing, only maybe based on different data. I would suggest renaming here.

lists:foreach(fun({_, SubId}) -> mnesia:delete({pubsub_subscription, SubId}) end, Subs),
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Also, why do we even need this function? The code above seem to be the same as in https://github.com/esl/MongooseIM/pull/2349/files#diff-76947e0e8d171032fa48bd99ed39698dR550 . What is the difference?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

del_state removes the state unconditionally, while delete_all_subscriptions_by_state only when there is no affiliation in the state (with all subscriptions removed).

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ok, but do we need to delete subs here? This function is called only from https://github.com/esl/MongooseIM/pull/2349/files#diff-76947e0e8d171032fa48bd99ed39698dR553, just after removing subs. Isn't this code redundant?

mnesia:delete({pubsub_state, {LJID, Nidx}}).

Expand Down
20 changes: 19 additions & 1 deletion src/pubsub/mod_pubsub_db_rdbms.erl
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,9 @@
-export([
get_user_payloads/2,
get_user_nodes/2,
get_user_subscriptions/2
get_user_subscriptions/2,
delete_user_subscriptions/1,
find_nodes_by_affiliated_user/1
]).

% For SQL queries
Expand Down Expand Up @@ -437,6 +439,7 @@ find_subnodes(Key, Nodes, Depth, Acc) ->
{Subnodes, NewNodes} = maps:fold(MapTransformer, {[], []}, Map),
NewAcc = [{Depth, NewNodes} | Acc],
find_subnodes(Key, lists:flatten(Subnodes), Depth + 1, NewAcc).

% ------------------- Affiliations --------------------------------

-spec set_affiliation(Nidx :: mod_pubsub:nodeIdx(),
Expand Down Expand Up @@ -610,6 +613,21 @@ strip_payload(PayloadDB) ->
{ok, #xmlel{children = Payload}} = exml:parse(PayloadXML),
exml:to_binary(Payload).

-spec delete_user_subscriptions(jid:ljid()) -> ok.
delete_user_subscriptions({ LU, LS, _ }) ->
SQL = mod_pubsub_db_rdbms_sql:delete_user_subscriptions(LU, LS),
{updated, _} = mongoose_rdbms:sql_query_t(SQL),
ok.

find_nodes_by_affiliated_user({ LU, LS, _ }) ->
SQL = mod_pubsub_db_rdbms_sql:select_nodes_by_affiliated_user(LU, LS),
{selected, NodesWithAffs} = mongoose_rdbms:sql_query(global, SQL),
lists:map(fun decode_pubsub_node_with_aff_row/1, NodesWithAffs).

decode_pubsub_node_with_aff_row(Row) ->
[Aff | NodeRow] = tuple_to_list(Row),
{decode_pubsub_node_row(list_to_tuple(NodeRow)), sql2aff(Aff)}.

%%====================================================================
%% Helpers
%%====================================================================
Expand Down
16 changes: 16 additions & 0 deletions src/pubsub/mod_pubsub_db_rdbms_sql.erl
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@
delete_subscription/5,
delete_all_subscriptions/4,
delete_all_subscriptions/1,
delete_user_subscriptions/2,
update_subscription/6
]).

Expand All @@ -61,6 +62,7 @@
select_nodes_in_list_with_key/2,
select_nodes_by_key_and_names_in_list_with_parents/2,
select_nodes_by_key_and_names_in_list_with_children/2,
select_nodes_by_affiliated_user/2,
select_subnodes/2,
delete_node/2,
set_parents/2,
Expand Down Expand Up @@ -247,6 +249,12 @@ delete_all_subscriptions(Nidx) ->
["DELETE FROM pubsub_subscriptions"
" WHERE nidx = ", esc_int(Nidx)].

-spec delete_user_subscriptions(LU :: jid:luser(), LS :: jid:lserver()) -> iolist().
delete_user_subscriptions(LU, LS) ->
["DELETE FROM pubsub_subscriptions"
" WHERE luser = ", esc_string(LU),
" AND lserver = ", esc_string(LS)].

-spec update_subscription(Nidx :: mod_pubsub:nodeIdx(),
LU :: jid:luser(),
LS :: jid:lserver(),
Expand Down Expand Up @@ -470,6 +478,14 @@ select_nodes_by_owner(LJID) ->
]
end.

-spec select_nodes_by_affiliated_user(LU :: jid:luser(), LS :: jid:lserver()) -> iolist().
select_nodes_by_affiliated_user(LU, LS) ->
["SELECT aff, ", pubsub_node_fields("pn"),
" FROM pubsub_affiliations AS pa"
" INNER JOIN pubsub_nodes AS pn ON pa.nidx = pn.nidx"
" WHERE luser = ", esc_string(LU),
" AND lserver = ", esc_string(LS)].

-spec select_nodes_in_list_with_key(Key :: binary(), Nodes :: [binary()]) -> iolist().
select_nodes_in_list_with_key(Key, Nodes) ->
EscapedNames = [esc_string(Node) || Node <- Nodes],
Expand Down
Loading