From 51bbceff4983abe421a86ae90fd6b58f8bf85de9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rafa=C5=82=20S=C5=82ota?= Date: Mon, 17 Jun 2019 17:54:56 +0200 Subject: [PATCH] Add migration instructions for Riak @ MIM 3.3.0+ * `3.3.0_3.3.0plus.md` has been modified by adding migration guide for Riak @ MIM 3.3.0+ * `jid-from-mam-muc-script.md` has been modified reflect the changes made to `sender-jid-from-mam-message.escript` script * `sender-jid-from-mam-message.escript` script has been changed so that it returns `-2\n` when the given stanza is not a correct MUC message. --- doc/migrations/3.3.0_3.3.0plus.md | 39 +++++++++++++++- doc/migrations/jid-from-mam-muc-script.md | 7 ++- test/migration_scripts_SUITE.erl | 46 +++++++++++++++++-- .../sender-jid-from-mam-message.escript | 42 ++++++++++++++--- 4 files changed, 119 insertions(+), 15 deletions(-) diff --git a/doc/migrations/3.3.0_3.3.0plus.md b/doc/migrations/3.3.0_3.3.0plus.md index b064b9a46c7..96522bdb56d 100644 --- a/doc/migrations/3.3.0_3.3.0plus.md +++ b/doc/migrations/3.3.0_3.3.0plus.md @@ -51,7 +51,44 @@ TODO ### Riak -TODO +Changes to Riak schema are backwards compatible with the current MongooseIM release. +This means that in case of skipping of the migrations, only some new features (namely GDPR data retrival) won't work correctly. + +#### Step 1 + +Please update the Riak schema: +```bash +# Set the RIAK_HOST to your Riak HTTP endpoint +# Set the RIAK_MAM_SCHEMA_PATH to point to new schema path, which +# by default is: RIAK_MAM_SCHEMA_PATH=tools/mam_search_schema.xml +curl -v -XPUT $RIAK_HOST/search/schema/mam \ + -H 'Content-Type:application/xml' \ + --data-binary @${RIAK_MAM_SCHEMA_PATH} +``` + +After that we need to either reload all Riak nodes (restart them) or manually reload the schema on live nodes. +Reloading of schema of live nodes requires access to Erlang Shell of one of the Riak nodes (any of them). +The instruction on how to get to Riak's Erlang shell is beyond this guide, but if you manage to get to it, just call: + +```erlang +yz_index:reload(<<"mam">>). +``` + +#### Step 2 + +After the schema is posted and reloaded, all "new" objects will be indexed properly as long they contain 2 new fields: `msg_owner_jid` and `mam_type`. +The new MongooseIM code will insert both of them for all new MAM entires, but for all existing ones, we need to "update" by adding those fields. +In order to do that, we need to create a migration script (just pick your favourite scripting/programming language) that will do the following for *each* object in *each* bucket of type `mam_yz` (the object will be referred as `obj`): + +* Use [this dedicated script](jid-from-mam-muc-script.md) to convert `obj.packet_register` field value into so called `$SENDER_JID`. +* If the script returns `$SENDER_JID` correctly: + * set `obj.mam_type = 'muc'` + * set `obj.msg_owner_jid = $SENDER_JID` +* If the script returns error code `-2` + * set `obj.mam_type = 'pm'` + * based on `obj_yz_rk` which is in format `$LOCAL_JID/$REMOTE_JID/$MSG_ID`, set `obj.msg_owner_jid = $LOCAL_JID` +* Save the modified `obj` + ### ElasticSearch diff --git a/doc/migrations/jid-from-mam-muc-script.md b/doc/migrations/jid-from-mam-muc-script.md index 6dbd7f65067..6c85850c06f 100644 --- a/doc/migrations/jid-from-mam-muc-script.md +++ b/doc/migrations/jid-from-mam-muc-script.md @@ -57,8 +57,11 @@ LENGTH\nJID * `JID` is a sequence of bytes, which encodes a Unicode string * `LENGTH` and `PAYLOAD` are separated with a newline character (ASCII code 10 / 0x0a) -If JID couldn't be extracted for some reason (and it's not a critical error, like I/O failure), the script will continue to work and will return `-1\n`. -It's `-1` for `LENGTH`, followed by a newline character and no `PAYLOAD` part (or 0-length `PAYLOAD` if you like). +In case of an error (that is not a critical error, like I/O failure), script will print `-N\n` (where `N` is an error code) and will continue to work. +Technically it's `-N` for `LENGTH`, followed by a newline character and no `PAYLOAD` part (or 0-length `PAYLOAD` if you like). +The following error codes are supported: +* `-1\n` - Unknown error. Something went wrong with the JID extraction (most likely malformed input). +* `-2\n` - Invalid message type. The message / stanza has been decoded successfully, but it's not a groupchat message. ## Examples diff --git a/test/migration_scripts_SUITE.erl b/test/migration_scripts_SUITE.erl index b8af9b80832..6027df3fa8c 100644 --- a/test/migration_scripts_SUITE.erl +++ b/test/migration_scripts_SUITE.erl @@ -7,7 +7,9 @@ sender_jid_from_mam_muc_eterm_stream/1, sender_jid_from_mam_muc_xml_stream/1, sender_jid_from_mam_muc_doesnt_crash_on_unsupported_eterm_input/1, - sender_jid_from_mam_muc_doesnt_crash_on_unsupported_xml_input/1 + sender_jid_from_mam_muc_doesnt_crash_on_unsupported_xml_input/1, + sender_jid_from_mam_muc_doesnt_crash_on_malformed_eterm_input/1, + sender_jid_from_mam_muc_doesnt_crash_on_malformed_xml_input/1 ]). %% ---------------------------------------------------------- @@ -25,7 +27,9 @@ groups() -> sender_jid_from_mam_muc_eterm_stream, sender_jid_from_mam_muc_xml_stream, sender_jid_from_mam_muc_doesnt_crash_on_unsupported_eterm_input, - sender_jid_from_mam_muc_doesnt_crash_on_unsupported_xml_input + sender_jid_from_mam_muc_doesnt_crash_on_unsupported_xml_input, + sender_jid_from_mam_muc_doesnt_crash_on_malformed_eterm_input, + sender_jid_from_mam_muc_doesnt_crash_on_malformed_xml_input ]} ]. @@ -50,11 +54,11 @@ sender_jid_from_mam_muc_eterm_stream(_Config) -> sender_jid_from_mam_muc_xml_stream(_Config) -> Port = script_helper:start("tools/migration/sender-jid-from-mam-message.escript", ["xml"]), - sender_jid_from_mam_muc_data_stream(Port, fun(B) -> B end). + sender_jid_from_mam_muc_data_stream(Port, fun(B) -> B end). sender_jid_from_mam_muc_data_stream(Port, PayloadConverterFun) -> lists:foreach(fun(JID) -> - MsgBin = sample_archived_muc_message(JID), + MsgBin = sample_archived_muc_message(JID), script_helper:write(Port, PayloadConverterFun(MsgBin)), JID = script_helper:read(Port) end, [<<"alice@localhost">>, <<"zażółćgęśląjaźń@localhost2"/utf8>>, @@ -68,10 +72,28 @@ sender_jid_from_mam_muc_doesnt_crash_on_unsupported_xml_input(_Config) -> Port = script_helper:start("tools/migration/sender-jid-from-mam-message.escript", ["xml"]), sender_jid_from_mam_muc_doesnt_crash_on_unsupported_input(Port, fun(B) -> B end). + +sender_jid_from_mam_muc_doesnt_crash_on_malformed_eterm_input(_Config) -> + Port = script_helper:start("tools/migration/sender-jid-from-mam-message.escript", ["eterm"]), + sender_jid_from_mam_muc_doesnt_crash_on_malformed_input(Port, fun binary_string_to_eterm/1). + +sender_jid_from_mam_muc_doesnt_crash_on_malformed_xml_input(_Config) -> + Port = script_helper:start("tools/migration/sender-jid-from-mam-message.escript", ["xml"]), + sender_jid_from_mam_muc_doesnt_crash_on_malformed_input(Port, fun(B) -> B end). + sender_jid_from_mam_muc_doesnt_crash_on_unsupported_input(Port, PayloadConverterFun) -> - %% First we expect that the script replies with -1 length.... + %% First we expect that the script replies with -2 length (non MUC message).... InvalidPayload = PayloadConverterFun(sample_archived_1_to_1_message()), script_helper:write(Port, InvalidPayload), + {error, -2} = script_helper:read(Port), + + %% Then we confirm with valid payload that the script actually still works + sender_jid_from_mam_muc_data_stream(Port, PayloadConverterFun). + +sender_jid_from_mam_muc_doesnt_crash_on_malformed_input(Port, PayloadConverterFun) -> + %% First we expect that the script replies with -1 length (malformed message).... + InvalidPayload = PayloadConverterFun(sample_malformed_muc_message()), + script_helper:write(Port, InvalidPayload), {error, -1} = script_helper:read(Port), %% Then we confirm with valid payload that the script actually still works @@ -98,6 +120,20 @@ sample_archived_1_to_1_message() -> <<"" "Zażółć gęślą jaźń"/utf8>>. +sample_malformed_muc_message() -> + <<" + Zażółć gęślą jaźń + + + + "/utf8>>. + + binary_string_to_eterm(Bin) -> {ok, XmlEl} = exml:parse(Bin), term_to_binary(XmlEl). diff --git a/tools/migration/sender-jid-from-mam-message.escript b/tools/migration/sender-jid-from-mam-message.escript index 46bcb9b8025..3976984eaff 100755 --- a/tools/migration/sender-jid-from-mam-message.escript +++ b/tools/migration/sender-jid-from-mam-message.escript @@ -48,21 +48,45 @@ common_loop(ExtractionFun) -> % We skip trailing \n InLen = binary_to_integer(binary:part(InLenBin, 0, byte_size(InLenBin) - 1)), {ok, Data} = file:read(standard_io, InLen), - safe_jid_extraction(ExtractionFun, Data), + safe_jid_extraction(ExtractionFun, Data), common_loop(ExtractionFun) end. jid_from_eterm(ETerm) -> - {xmlel, <<"message">>, _, MsgChildren} = binary_to_term(ETerm), - {xmlel, <<"x">>, _, XChildren} = - lists:keyfind([{<<"xmlns">>, <<"http://jabber.org/protocol/muc#user">>}], 3, MsgChildren), - {xmlel, _, ItemAttrs, _} = lists:keyfind(<<"item">>, 2, XChildren), - {_, JID} = lists:keyfind(<<"jid">>, 1, ItemAttrs), - JID. + {xmlel, <<"message">>, MsgAttrs, MsgChildren} = binary_to_term(ETerm), + case lists:keyfind(<<"type">>, 1, MsgAttrs) of + {_, <<"groupchat">>} -> ok; + _ -> throw(not_muc_message) + end, + case lists:keyfind([{<<"xmlns">>, <<"http://jabber.org/protocol/muc#user">>}], 3, MsgChildren) of + {xmlel, <<"x">>, _, XChildren} -> + {xmlel, _, ItemAttrs, _} = lists:keyfind(<<"item">>, 2, XChildren), + {_, JID} = lists:keyfind(<<"jid">>, 1, ItemAttrs), + JID; + _ -> + throw(not_muc_message) + end. + jid_from_xml(XML) -> XmerlFriendlyXML = "" ++ binary_to_list(XML), {Doc, _} = xmerl_scan:string(XmerlFriendlyXML), + case xmerl_xpath:string("/message/@type", Doc) of + [#xmlAttribute{ value = "groupchat" }] -> + ok; + _ -> + throw(not_muc_message) + end, + Xs = xmerl_xpath:string("/message/x", Doc), + IsMUC = + lists:any(fun + (#xmlElement{ namespace = #xmlNamespace{ default = 'http://jabber.org/protocol/muc#user' }}) -> + true; + (_Elem) -> + false + end, Xs), + IsMUC orelse throw(not_muc_message), + [#xmlAttribute{ value = JID }] = xmerl_xpath:string("/message/x/item/@jid", Doc), unicode:characters_to_binary(JID). @@ -73,6 +97,10 @@ safe_jid_extraction(JIDExtractorFun, Data) -> OutLenBin = integer_to_binary(OutLen), ok = file:write(standard_io, <>) catch + throw:R -> + Extra = #{ type => invalid_message_type, data => Data }, + debug(throw, R, erlang:get_stacktrace(), Extra), + ok = io:put_chars("-2\n"); C:R -> Extra = #{ type => cannot_extract_jid, data => Data }, debug(C, R, erlang:get_stacktrace(), Extra),