From 9b40943e326ea208c3740912f2c70fe18abb8677 Mon Sep 17 00:00:00 2001 From: Denys Gonchar Date: Tue, 21 May 2019 11:27:46 +0200 Subject: [PATCH] [GDPR] DB schema rework for MAM MUC cassandra --- big_tests/tests/gdpr_SUITE.erl | 2 +- priv/elasticsearch/muc.json | 3 ++ src/mam/mod_mam_muc.erl | 2 +- src/mam/mod_mam_muc_cassandra_arch.erl | 2 +- src/mam/mod_mam_muc_elasticsearch_arch.erl | 36 +++++++++++++++++++--- src/mam/mod_mam_rdbms_arch.erl | 13 ++++++-- src/mam/mod_mam_riak_timed_arch_yz.erl | 4 +-- 7 files changed, 50 insertions(+), 12 deletions(-) diff --git a/big_tests/tests/gdpr_SUITE.erl b/big_tests/tests/gdpr_SUITE.erl index fb9725d255b..eb0a8c1f83a 100644 --- a/big_tests/tests/gdpr_SUITE.erl +++ b/big_tests/tests/gdpr_SUITE.erl @@ -131,7 +131,7 @@ groups() -> {retrieve_personal_data_mam_rdbms, [], mam_testcases()}, {retrieve_personal_data_mam_riak, [], mam_testcases()}, {retrieve_personal_data_mam_cassandra, [], mam_testcases()}, - {retrieve_personal_data_mam_elasticsearch, [], [retrieve_mam_pm]}, + {retrieve_personal_data_mam_elasticsearch, [], mam_testcases()}, {remove_personal_data, [], removal_testcases()}, {remove_personal_data_with_mods_disabled, [], removal_testcases()}, {remove_personal_data_inbox, [], [remove_inbox, remove_inbox_muclight, remove_inbox_muc]}]. diff --git a/priv/elasticsearch/muc.json b/priv/elasticsearch/muc.json index a7ffac89d18..548759e8c61 100644 --- a/priv/elasticsearch/muc.json +++ b/priv/elasticsearch/muc.json @@ -8,6 +8,9 @@ "room": { "type": "keyword" }, + "from_jid" : { + "type": "keyword" + }, "source_jid": { "type": "keyword" }, diff --git a/src/mam/mod_mam_muc.erl b/src/mam/mod_mam_muc.erl index 434265418c3..2bddb2b3246 100644 --- a/src/mam/mod_mam_muc.erl +++ b/src/mam/mod_mam_muc.erl @@ -242,7 +242,7 @@ archive_room_packet(Packet, FromNick, FromJID=#jid{}, RoomJID=#jid{}, Role, Affi MessID = generate_message_id(), Packet1 = replace_x_user_element(FromJID, Role, Affiliation, Packet), Result = archive_message(Host, MessID, ArcID, - RoomJID, SrcJID, SrcJID, incoming, Packet1), + RoomJID, FromJID, SrcJID, incoming, Packet1), %% Packet2 goes to archive, Packet to other users case Result of ok -> diff --git a/src/mam/mod_mam_muc_cassandra_arch.erl b/src/mam/mod_mam_muc_cassandra_arch.erl index 2b9d3b961ee..e32c61cc19c 100644 --- a/src/mam/mod_mam_muc_cassandra_arch.erl +++ b/src/mam/mod_mam_muc_cassandra_arch.erl @@ -156,7 +156,7 @@ insert_query_cql() -> "VALUES (?, ?, ?, ?, ?)". archive_message(Result, Host, MessID, _RoomID, - LocJID, NickName, NickName, Dir, Packet) -> + LocJID, _FromJID, NickName, Dir, Packet) -> try archive_message2(Result, Host, MessID, LocJID, NickName, NickName, Dir, Packet) diff --git a/src/mam/mod_mam_muc_elasticsearch_arch.erl b/src/mam/mod_mam_muc_elasticsearch_arch.erl index 3f0fea32ff8..a7e3a1e8a9c 100644 --- a/src/mam/mod_mam_muc_elasticsearch_arch.erl +++ b/src/mam/mod_mam_muc_elasticsearch_arch.erl @@ -31,6 +31,9 @@ -export([remove_archive/4]). -export([archive_size/4]). +%gdpr +-export([get_mam_muc_gdpr_data/2]). + -include("mongoose.hrl"). -include("mongoose_rsm.hrl"). -include("mod_mam.hrl"). @@ -56,12 +59,29 @@ stop(Host) -> %%------------------------------------------------------------------- %% ejabberd_gen_mam_archive callbacks %%------------------------------------------------------------------- +-spec get_mam_muc_gdpr_data(jid:username(), jid:server()) -> + {ok, ejabberd_gen_mam_archive:mam_muc_gdpr_data()}. +get_mam_muc_gdpr_data(User, Host) -> + Source = jid:make(User, Host, <<"">>), + BinSource = mod_mam_utils:bare_jid(Source), + Filter = #{term => #{from_jid => BinSource}}, + Sorting = #{mam_id => #{order => asc}}, + SearchQuery = #{query => #{bool => #{filter => Filter}}, + sort => Sorting}, + case mongoose_elasticsearch:search(?INDEX_NAME, ?TYPE_NAME, SearchQuery) of + {ok, #{<<"hits">> := #{<<"hits">> := Hits}}} -> + Messages = lists:map(fun hit_to_gdpr_mam_message/1, Hits), + {ok, Messages}; + {error, _} -> + {ok, []} + end. -archive_message(_Result, Host, MessageId, _UserId, RoomJid, _SourceJid, SourceJid, _Dir, Packet) -> +archive_message(_Result, Host, MessageId, _UserId, RoomJid, FromJID, SourceJid, _Dir, Packet) -> Room = mod_mam_utils:bare_jid(RoomJid), SourceBinJid = mod_mam_utils:full_jid(SourceJid), + From = mod_mam_utils:bare_jid(FromJID), DocId = make_document_id(Room, MessageId), - Doc = make_document(MessageId, Room, SourceBinJid, Packet), + Doc = make_document(MessageId, Room, SourceBinJid, Packet, From), case mongoose_elasticsearch:insert_document(?INDEX_NAME, ?TYPE_NAME, DocId, Doc) of {ok, _} -> ok; @@ -144,10 +164,11 @@ hooks(Host) -> make_document_id(Room, MessageId) -> <>. --spec make_document(mod_mam:message_id(), binary(), binary(), exml:element()) -> - map(). -make_document(MessageId, Room, SourceBinJid, Packet) -> +-spec make_document(mod_mam:message_id(), binary(), binary(), exml:element(), + binary()) -> map(). +make_document(MessageId, Room, SourceBinJid, Packet, FromJID) -> #{mam_id => MessageId, + from_jid => FromJID, room => Room, source_jid => SourceBinJid, message => exml:to_binary(Packet), @@ -257,6 +278,11 @@ hit_to_mam_message(#{<<"_source">> := JSON}) -> {ok, Stanza} = exml:parse(Packet), {MessageId, jid:from_binary(SourceJid), Stanza}. +hit_to_gdpr_mam_message(#{<<"_source">> := JSON}) -> + MessageId = maps:get(<<"mam_id">>, JSON), + Packet = maps:get(<<"message">>, JSON), + {integer_to_binary(MessageId), Packet}. + %% Usage of RSM affects the `"total"' value returned by ElasticSearch. Per RSM spec, the count %% returned by the query should represent the size of the whole result set, which in case of MAM %% is bound only by the MAM filters. diff --git a/src/mam/mod_mam_rdbms_arch.erl b/src/mam/mod_mam_rdbms_arch.erl index 9d846553409..42496b4fc77 100644 --- a/src/mam/mod_mam_rdbms_arch.erl +++ b/src/mam/mod_mam_rdbms_arch.erl @@ -20,6 +20,7 @@ -export([archive_size/4, archive_message/9, + archive_message_muc/9, lookup_messages/3, remove_archive/4]). @@ -162,7 +163,7 @@ start_muc(Host, _Opts) -> true -> ok; false -> - ejabberd_hooks:add(mam_muc_archive_message, Host, ?MODULE, archive_message, 50) + ejabberd_hooks:add(mam_muc_archive_message, Host, ?MODULE, archive_message_muc, 50) end, ejabberd_hooks:add(mam_muc_archive_size, Host, ?MODULE, archive_size, 50), ejabberd_hooks:add(mam_muc_lookup_messages, Host, ?MODULE, lookup_messages, 50), @@ -176,7 +177,7 @@ stop_muc(Host) -> true -> ok; false -> - ejabberd_hooks:delete(mam_muc_archive_message, Host, ?MODULE, archive_message, 50) + ejabberd_hooks:delete(mam_muc_archive_message, Host, ?MODULE, archive_message_muc, 50) end, ejabberd_hooks:delete(mam_muc_archive_size, Host, ?MODULE, archive_size, 50), ejabberd_hooks:delete(mam_muc_lookup_messages, Host, ?MODULE, lookup_messages, 50), @@ -208,6 +209,14 @@ index_hint_sql(Host) -> end. +-spec archive_message_muc(_Result, Host :: jid:server(), + MessID :: mod_mam:message_id(), UserID :: mod_mam:archive_id(), + LocJID :: jid:jid(), RemJID :: jid:jid(), + SrcJID :: jid:jid(), Dir :: atom(), Packet :: any()) -> ok. +archive_message_muc(Result, Host, MessID, UserID, LocJID, _RemJID, SrcJID, Dir, Packet) -> + archive_message(Result, Host, MessID, UserID, LocJID, SrcJID, SrcJID, Dir, Packet). + + -spec archive_message(_Result, Host :: jid:server(), MessID :: mod_mam:message_id(), UserID :: mod_mam:archive_id(), LocJID :: jid:jid(), RemJID :: jid:jid(), diff --git a/src/mam/mod_mam_riak_timed_arch_yz.erl b/src/mam/mod_mam_riak_timed_arch_yz.erl index 03c16dde5b7..b2522c03079 100644 --- a/src/mam/mod_mam_riak_timed_arch_yz.erl +++ b/src/mam/mod_mam_riak_timed_arch_yz.erl @@ -132,8 +132,8 @@ archive_message(_Result, Host, MessId, _UserID, LocJID, RemJID, SrcJID, _Dir, Pa {error, Reason} end. -archive_message_muc(_Result, Host, MessId, _UserID, LocJID, RemJID, SrcJID, _Dir, Packet) -> - RemJIDMuc = maybe_muc_jid(RemJID), +archive_message_muc(_Result, Host, MessId, _UserID, LocJID, _FromJID, SrcJID, _Dir, Packet) -> + RemJIDMuc = maybe_muc_jid(SrcJID), try archive_message(Host, MessId, LocJID, RemJIDMuc, SrcJID, Packet, muc) catch _Type:Reason ->