Skip to content

Commit

Permalink
[GDPR] DB schema rework for MAM MUC cassandra
Browse files Browse the repository at this point in the history
  • Loading branch information
DenysGonchar committed May 30, 2019
1 parent 2f1fb1a commit 71dab2b
Show file tree
Hide file tree
Showing 4 changed files with 35 additions and 7 deletions.
2 changes: 1 addition & 1 deletion big_tests/tests/gdpr_SUITE.erl
Original file line number Diff line number Diff line change
Expand Up @@ -131,7 +131,7 @@ groups() ->
{retrieve_personal_data_mam_rdbms, [], mam_testcases()},
{retrieve_personal_data_mam_riak, [], mam_testcases()},
{retrieve_personal_data_mam_cassandra, [], mam_testcases()},
{retrieve_personal_data_mam_elasticsearch, [], [retrieve_mam_pm]},
{retrieve_personal_data_mam_elasticsearch, [], mam_testcases()},
{remove_personal_data, [], removal_testcases()},
{remove_personal_data_with_mods_disabled, [], removal_testcases()},
{remove_personal_data_inbox, [], [remove_inbox, remove_inbox_muclight, remove_inbox_muc]}].
Expand Down
3 changes: 3 additions & 0 deletions priv/elasticsearch/muc.json
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,9 @@
"room": {
"type": "keyword"
},
"from_jid" : {
"type": "keyword"
},
"source_jid": {
"type": "keyword"
},
Expand Down
2 changes: 1 addition & 1 deletion src/mam/mod_mam_muc.erl
Original file line number Diff line number Diff line change
Expand Up @@ -242,7 +242,7 @@ archive_room_packet(Packet, FromNick, FromJID=#jid{}, RoomJID=#jid{}, Role, Affi
MessID = generate_message_id(),
Packet1 = replace_x_user_element(FromJID, Role, Affiliation, Packet),
Result = archive_message(Host, MessID, ArcID,
RoomJID, SrcJID, SrcJID, incoming, Packet1),
RoomJID, mod_mam_utils:bare_jid(FromJID), SrcJID, incoming, Packet1),
%% Packet2 goes to archive, Packet to other users
case Result of
ok ->
Expand Down
35 changes: 30 additions & 5 deletions src/mam/mod_mam_muc_elasticsearch_arch.erl
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,9 @@
-export([remove_archive/4]).
-export([archive_size/4]).

%gdpr
-export([get_mam_muc_gdpr_data/2]).

-include("mongoose.hrl").
-include("mongoose_rsm.hrl").
-include("mod_mam.hrl").
Expand All @@ -56,12 +59,28 @@ stop(Host) ->
%%-------------------------------------------------------------------
%% ejabberd_gen_mam_archive callbacks
%%-------------------------------------------------------------------
-spec get_mam_muc_gdpr_data(jid:username(), jid:server()) ->
{ok, ejabberd_gen_mam_archive:mam_muc_gdpr_data()}.
get_mam_muc_gdpr_data(User, Host) ->
Source = jid:make(User, Host, <<"">>),
BinSource = mod_mam_utils:bare_jid(Source),
Filter = #{term => #{from_jid => BinSource}},
Sorting = #{mam_id => #{order => asc}},
SearchQuery = #{query => #{bool => #{filter => Filter}},
sort => Sorting},
case mongoose_elasticsearch:search(?INDEX_NAME, ?TYPE_NAME, SearchQuery) of
{ok, #{<<"hits">> := #{<<"hits">> := Hits}}} ->
Messages = lists:map(fun hit_to_gdpr_mam_message/1, Hits),
{ok, Messages};
{error, _} ->
{ok, []}
end.

archive_message(_Result, Host, MessageId, _UserId, RoomJid, _SourceJid, SourceJid, _Dir, Packet) ->
archive_message(_Result, Host, MessageId, _UserId, RoomJid, FromJID, SourceJid, _Dir, Packet) ->
Room = mod_mam_utils:bare_jid(RoomJid),
SourceBinJid = mod_mam_utils:full_jid(SourceJid),
DocId = make_document_id(Room, MessageId),
Doc = make_document(MessageId, Room, SourceBinJid, Packet),
Doc = make_document(MessageId, Room, SourceBinJid, Packet, FromJID),
case mongoose_elasticsearch:insert_document(?INDEX_NAME, ?TYPE_NAME, DocId, Doc) of
{ok, _} ->
ok;
Expand Down Expand Up @@ -144,10 +163,11 @@ hooks(Host) ->
make_document_id(Room, MessageId) ->
<<Room/binary, $$, (integer_to_binary(MessageId))/binary>>.

-spec make_document(mod_mam:message_id(), binary(), binary(), exml:element()) ->
map().
make_document(MessageId, Room, SourceBinJid, Packet) ->
-spec make_document(mod_mam:message_id(), binary(), binary(), exml:element(),
binary()) -> map().
make_document(MessageId, Room, SourceBinJid, Packet, FromJID) ->
#{mam_id => MessageId,
from_jid => FromJID,
room => Room,
source_jid => SourceBinJid,
message => exml:to_binary(Packet),
Expand Down Expand Up @@ -257,6 +277,11 @@ hit_to_mam_message(#{<<"_source">> := JSON}) ->
{ok, Stanza} = exml:parse(Packet),
{MessageId, jid:from_binary(SourceJid), Stanza}.

hit_to_gdpr_mam_message(#{<<"_source">> := JSON}) ->
MessageId = maps:get(<<"mam_id">>, JSON),
Packet = maps:get(<<"message">>, JSON),
{integer_to_binary(MessageId), Packet}.

%% Usage of RSM affects the `"total"' value returned by ElasticSearch. Per RSM spec, the count
%% returned by the query should represent the size of the whole result set, which in case of MAM
%% is bound only by the MAM filters.
Expand Down

0 comments on commit 71dab2b

Please sign in to comment.