Skip to content

Commit

Permalink
Merge 064873e into daacc8b
Browse files Browse the repository at this point in the history
  • Loading branch information
Rafał Słota authored Jun 19, 2019
2 parents daacc8b + 064873e commit 954ca8a
Show file tree
Hide file tree
Showing 4 changed files with 119 additions and 15 deletions.
39 changes: 38 additions & 1 deletion doc/migrations/3.3.0_3.3.0plus.md
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,44 @@ TODO

### Riak

TODO
Changes to Riak schema are backward compatible with the current MongooseIM release.
This means that skipping the migration will cause only some of the new features (namely GDPR data retrival) to not work correctly.

#### Step 1

Please update the Riak schema:
```bash
# Set the RIAK_HOST to your Riak HTTP endpoint
# Set the RIAK_MAM_SCHEMA_PATH to point to new schema path, which
# by default is: RIAK_MAM_SCHEMA_PATH=tools/mam_search_schema.xml
curl -v -XPUT $RIAK_HOST/search/schema/mam \
-H 'Content-Type:application/xml' \
--data-binary @${RIAK_MAM_SCHEMA_PATH}
```

After that we need to either reload all Riak nodes (restart them) or manually reload the schema on live nodes.
Reloading the schema on live nodes requires access to Erlang Shell of one of the Riak nodes (any of them).
The instruction on how to get to Riak's Erlang shell is beyond this guide, but if you manage to get to it, just call:

```erlang
yz_index:reload(<<"mam">>).
```

#### Step 2

After the schema is posted and reloaded, all "new" objects will be indexed properly as long they contain 2 new fields: `msg_owner_jid` and `mam_type`.
The new MongooseIM code will insert both of them for all new MAM entires, but for all existing ones need to have the fields added.
In order to do that, we need to create a migration script (just pick your favourite scripting/programming language) that will do the following for *each* object in *each* bucket of type `mam_yz` (the object will be referred as `obj`):

* Use [this dedicated script](jid-from-mam-muc-script.md) to convert the `obj.packet_register` field value into a so called `$SENDER_JID`.
* If the script returns `$SENDER_JID` correctly:
* set `obj.mam_type = 'muc'`
* set `obj.msg_owner_jid = $SENDER_JID`
* If the script returns error code `-2`
* set `obj.mam_type = 'pm'`
* based on `obj_yz_rk` formatted as `$LOCAL_JID/$REMOTE_JID/$MSG_ID`, set `obj.msg_owner_jid = $LOCAL_JID`
* Save the modified `obj`


### ElasticSearch

Expand Down
7 changes: 5 additions & 2 deletions doc/migrations/jid-from-mam-muc-script.md
Original file line number Diff line number Diff line change
Expand Up @@ -57,8 +57,11 @@ LENGTH\nJID
* `JID` is a sequence of bytes, which encodes a Unicode string
* `LENGTH` and `PAYLOAD` are separated with a newline character (ASCII code 10 / 0x0a)

If JID couldn't be extracted for some reason (and it's not a critical error, like I/O failure), the script will continue to work and will return `-1\n`.
It's `-1` for `LENGTH`, followed by a newline character and no `PAYLOAD` part (or 0-length `PAYLOAD` if you like).
In case of an error (that is not a critical error, like I/O failure), script will print `-N\n` (where `N` is an error code) and will continue to work.
Technically it's `-N` for `LENGTH`, followed by a newline character and no `PAYLOAD` part (or 0-length `PAYLOAD` if you like).
The following error codes are supported:
* `-1\n` - Unknown error. Something went wrong with the JID extraction (most likely malformed input).
* `-2\n` - Invalid message type. The message / stanza has been decoded successfully, but it's not a groupchat message.

## Examples

Expand Down
46 changes: 41 additions & 5 deletions test/migration_scripts_SUITE.erl
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,9 @@
sender_jid_from_mam_muc_eterm_stream/1,
sender_jid_from_mam_muc_xml_stream/1,
sender_jid_from_mam_muc_doesnt_crash_on_unsupported_eterm_input/1,
sender_jid_from_mam_muc_doesnt_crash_on_unsupported_xml_input/1
sender_jid_from_mam_muc_doesnt_crash_on_unsupported_xml_input/1,
sender_jid_from_mam_muc_doesnt_crash_on_malformed_eterm_input/1,
sender_jid_from_mam_muc_doesnt_crash_on_malformed_xml_input/1
]).

%% ----------------------------------------------------------
Expand All @@ -25,7 +27,9 @@ groups() ->
sender_jid_from_mam_muc_eterm_stream,
sender_jid_from_mam_muc_xml_stream,
sender_jid_from_mam_muc_doesnt_crash_on_unsupported_eterm_input,
sender_jid_from_mam_muc_doesnt_crash_on_unsupported_xml_input
sender_jid_from_mam_muc_doesnt_crash_on_unsupported_xml_input,
sender_jid_from_mam_muc_doesnt_crash_on_malformed_eterm_input,
sender_jid_from_mam_muc_doesnt_crash_on_malformed_xml_input
]}
].

Expand All @@ -50,11 +54,11 @@ sender_jid_from_mam_muc_eterm_stream(_Config) ->

sender_jid_from_mam_muc_xml_stream(_Config) ->
Port = script_helper:start("tools/migration/sender-jid-from-mam-message.escript", ["xml"]),
sender_jid_from_mam_muc_data_stream(Port, fun(B) -> B end).
sender_jid_from_mam_muc_data_stream(Port, fun(B) -> B end).

sender_jid_from_mam_muc_data_stream(Port, PayloadConverterFun) ->
lists:foreach(fun(JID) ->
MsgBin = sample_archived_muc_message(JID),
MsgBin = sample_archived_muc_message(JID),
script_helper:write(Port, PayloadConverterFun(MsgBin)),
JID = script_helper:read(Port)
end, [<<"alice@localhost">>, <<"zażółćgęśląjaźń@localhost2"/utf8>>,
Expand All @@ -68,10 +72,28 @@ sender_jid_from_mam_muc_doesnt_crash_on_unsupported_xml_input(_Config) ->
Port = script_helper:start("tools/migration/sender-jid-from-mam-message.escript", ["xml"]),
sender_jid_from_mam_muc_doesnt_crash_on_unsupported_input(Port, fun(B) -> B end).


sender_jid_from_mam_muc_doesnt_crash_on_malformed_eterm_input(_Config) ->
Port = script_helper:start("tools/migration/sender-jid-from-mam-message.escript", ["eterm"]),
sender_jid_from_mam_muc_doesnt_crash_on_malformed_input(Port, fun binary_string_to_eterm/1).

sender_jid_from_mam_muc_doesnt_crash_on_malformed_xml_input(_Config) ->
Port = script_helper:start("tools/migration/sender-jid-from-mam-message.escript", ["xml"]),
sender_jid_from_mam_muc_doesnt_crash_on_malformed_input(Port, fun(B) -> B end).

sender_jid_from_mam_muc_doesnt_crash_on_unsupported_input(Port, PayloadConverterFun) ->
%% First we expect that the script replies with -1 length....
%% First we expect that the script replies with -2 length (non MUC message)....
InvalidPayload = PayloadConverterFun(sample_archived_1_to_1_message()),
script_helper:write(Port, InvalidPayload),
{error, -2} = script_helper:read(Port),

%% Then we confirm with valid payload that the script actually still works
sender_jid_from_mam_muc_data_stream(Port, PayloadConverterFun).

sender_jid_from_mam_muc_doesnt_crash_on_malformed_input(Port, PayloadConverterFun) ->
%% First we expect that the script replies with -1 length (malformed message)....
InvalidPayload = PayloadConverterFun(sample_malformed_muc_message()),
script_helper:write(Port, InvalidPayload),
{error, -1} = script_helper:read(Port),

%% Then we confirm with valid payload that the script actually still works
Expand All @@ -98,6 +120,20 @@ sample_archived_1_to_1_message() ->
<<"<message from='a@localhost' to='b@localhost' type='chat'><body>"
"Zażółć gęślą jaźń</body></message>"/utf8>>.

sample_malformed_muc_message() ->
<<"<message xmlns='jabber:client'
from='coven@chat.shakespeare.lit/firstwitch'
id='162BEBB1-F6DB-4D9A-9BD8-CFDCC801A0B2'
type='groupchat'>
<body>Zażółć gęślą jaźń</body>
<x xmlns='http://jabber.org/protocol/muc#user'>
<item_malformed affiliation='none'
jid='a@localhost'
role='participant' />
</x>
</message>"/utf8>>.


binary_string_to_eterm(Bin) ->
{ok, XmlEl} = exml:parse(Bin),
term_to_binary(XmlEl).
Expand Down
42 changes: 35 additions & 7 deletions tools/migration/sender-jid-from-mam-message.escript
Original file line number Diff line number Diff line change
Expand Up @@ -48,21 +48,45 @@ common_loop(ExtractionFun) ->
% We skip trailing \n
InLen = binary_to_integer(binary:part(InLenBin, 0, byte_size(InLenBin) - 1)),
{ok, Data} = file:read(standard_io, InLen),
safe_jid_extraction(ExtractionFun, Data),
safe_jid_extraction(ExtractionFun, Data),
common_loop(ExtractionFun)
end.

jid_from_eterm(ETerm) ->
{xmlel, <<"message">>, _, MsgChildren} = binary_to_term(ETerm),
{xmlel, <<"x">>, _, XChildren} =
lists:keyfind([{<<"xmlns">>, <<"http://jabber.org/protocol/muc#user">>}], 3, MsgChildren),
{xmlel, _, ItemAttrs, _} = lists:keyfind(<<"item">>, 2, XChildren),
{_, JID} = lists:keyfind(<<"jid">>, 1, ItemAttrs),
JID.
{xmlel, <<"message">>, MsgAttrs, MsgChildren} = binary_to_term(ETerm),
case lists:keyfind(<<"type">>, 1, MsgAttrs) of
{_, <<"groupchat">>} -> ok;
_ -> throw(not_muc_message)
end,
case lists:keyfind([{<<"xmlns">>, <<"http://jabber.org/protocol/muc#user">>}], 3, MsgChildren) of
{xmlel, <<"x">>, _, XChildren} ->
{xmlel, _, ItemAttrs, _} = lists:keyfind(<<"item">>, 2, XChildren),
{_, JID} = lists:keyfind(<<"jid">>, 1, ItemAttrs),
JID;
_ ->
throw(not_muc_message)
end.


jid_from_xml(XML) ->
XmerlFriendlyXML = "<?xml version='1.0' encoding='utf-8'?>" ++ binary_to_list(XML),
{Doc, _} = xmerl_scan:string(XmerlFriendlyXML),
case xmerl_xpath:string("/message/@type", Doc) of
[#xmlAttribute{ value = "groupchat" }] ->
ok;
_ ->
throw(not_muc_message)
end,
Xs = xmerl_xpath:string("/message/x", Doc),
IsMUC =
lists:any(fun
(#xmlElement{ namespace = #xmlNamespace{ default = 'http://jabber.org/protocol/muc#user' }}) ->
true;
(_Elem) ->
false
end, Xs),
IsMUC orelse throw(not_muc_message),

[#xmlAttribute{ value = JID }] = xmerl_xpath:string("/message/x/item/@jid", Doc),
unicode:characters_to_binary(JID).

Expand All @@ -73,6 +97,10 @@ safe_jid_extraction(JIDExtractorFun, Data) ->
OutLenBin = integer_to_binary(OutLen),
ok = file:write(standard_io, <<OutLenBin/binary, $\n, JID/binary>>)
catch
throw:R ->
Extra = #{ type => invalid_message_type, data => Data },
debug(throw, R, erlang:get_stacktrace(), Extra),
ok = io:put_chars("-2\n");
C:R ->
Extra = #{ type => cannot_extract_jid, data => Data },
debug(C, R, erlang:get_stacktrace(), Extra),
Expand Down

0 comments on commit 954ca8a

Please sign in to comment.