From 73e12a128c659cccd0ebb659fd4ff41d7ca4c891 Mon Sep 17 00:00:00 2001 From: Nelson Vides Date: Fri, 12 Jan 2024 15:02:20 +0100 Subject: [PATCH 1/5] Improve documentation --- src/exml.erl | 21 +++++----- src/exml_nif.erl | 2 +- src/exml_query.erl | 94 ++++++++++++++++++++++++++------------------- src/exml_stream.erl | 30 +++++++++++---- 4 files changed, 90 insertions(+), 57 deletions(-) diff --git a/src/exml.erl b/src/exml.erl index f98e1fa..5503453 100644 --- a/src/exml.erl +++ b/src/exml.erl @@ -1,5 +1,5 @@ %%%------------------------------------------------------------------- -%%% @copyright (C) 2011-2021, Erlang Solutions Ltd. +%%% @copyright (C) 2011-2024, Erlang Solutions Ltd. %%% @doc %%% @end %%% Created : 12 Jul 2011 by Michal Ptaszek @@ -31,6 +31,7 @@ -type item() :: element() | attr() | cdata() | exml_stream:start() | exml_stream:stop(). -type prettify() :: pretty | not_pretty. +%% @doc Calculate the length of the original XML payload -spec xml_size(item() | [item()]) -> non_neg_integer(). xml_size([]) -> 0; @@ -54,14 +55,15 @@ xml_size({Key, Value}) -> + 4 % ="" and whitespace before + byte_size(Value). -%% @doc Sort a (list of) `xmlel()'. +%% @doc Sort in ascending order a list of xml `t:item()'. %% %% Sorting is defined as calling `lists:sort/1' at: -%% * all the `xmlel's provided (if there is a list of them) AND -%% * all the `xmlel' elements' attributes recursively (the root and descendants) AND -%% * all the `xmlel' children recursively (the root and descendants). -%% The order is ascending. -%% +%% +%% @end %% The implementation of this function is a subtle modification of %% https://github.com/erszcz/rxml/commit/e8483408663f0bc2af7896e786c1cdea2e86e43d -spec xml_sort(item() | [item()]) -> item() | [item()]. @@ -99,15 +101,16 @@ to_iolist(Element) -> to_pretty_iolist(Element) -> to_iolist(Element, pretty). +%% @doc Parses a binary or a list of binaries into an XML `t:element()'. -spec parse(binary() | [binary()]) -> {ok, exml:element()} | {error, any()}. parse(XML) -> exml_nif:parse(XML). -%% @doc Turn a –list of– exml element into iodata for IO interactions. +%% @doc Turn a –list of– exml elements into iodata for IO interactions. %% %% The `Pretty' argument indicates if the generated XML should have new lines and indentation, %% which is useful for the debugging eye, or should rather be a minified version, -%% which is better for IO. +%% which is better for IO performance. -spec to_iolist(exml_stream:element() | [exml_stream:element()], prettify()) -> iodata(). to_iolist(#xmlel{} = Element, Pretty) -> to_binary_nif(Element, Pretty); diff --git a/src/exml_nif.erl b/src/exml_nif.erl index 7823f2a..9509a4e 100644 --- a/src/exml_nif.erl +++ b/src/exml_nif.erl @@ -1,5 +1,5 @@ %%%------------------------------------------------------------------- -%%% @copyright (C) 2018-2022, Erlang Solutions Ltd. +%%% @copyright (C) 2018-2024, Erlang Solutions Ltd. %%% @private %%%------------------------------------------------------------------- diff --git a/src/exml_query.erl b/src/exml_query.erl index 8486f1a..2f7dadc 100644 --- a/src/exml_query.erl +++ b/src/exml_query.erl @@ -1,5 +1,5 @@ %%%------------------------------------------------------------------- -%%% @copyright (C) 2011-2021, Erlang Solutions Ltd. +%%% @copyright (C) 2011-2024, Erlang Solutions Ltd. %%% @doc Easy navigation in XML trees %%% @end %%%------------------------------------------------------------------- @@ -20,23 +20,36 @@ -export([attr/2, attr/3]). -export([cdata/1]). --type element_with_ns() :: {element_with_ns, binary()}. --type element_with_name_and_ns () :: {element_with_ns, binary(), binary()}. --type element_with_attr_of_value () :: {element_with_attr, binary(), binary()}. --type path() :: [cdata | %% selects cdata from the element - {element, binary()} | % selects subelement with given name - {attr, binary()} | % selects attr of given name - element_with_ns() | % selects subelement with given namespace - element_with_name_and_ns() | % selects subelement with given name and namespace - element_with_attr_of_value() % selects subelement with given attribute and value - ]. +-type path() :: [cdata | + {attr, binary()} | + {element, binary()} | + {element_with_ns, binary()} | + {element_with_ns, binary(), binary()} | + {element_with_attr, binary(), binary()}]. +%% Path definition in an XML query, each step is defined by one of these types. +%% +%% -export_type([path/0]). -%% @doc Gets the element/attr/cdata contained in the leftmost path -%% Find an element in the xml tree by a path -%% that is pattern-matched against such xml tree structure +%%% @doc Like `path/3' but with default `undefined'. +%%% @see path/3 +-spec path(exml:element(), path()) -> exml:element() | binary() | undefined. +path(Element, Path) -> + path(Element, Path, undefined). + +%% @doc Gets the element/attr/cdata in the leftmost possible described path, +%% or `Default' if there is no match. +%% +%% Find an element in the xml tree by a path that is pattern-matched against such xml tree structure. %% %% For example, given an xml document like %% ``` @@ -59,16 +72,9 @@ %% {element, <<"body">>}, %% cdata}], %% ''' -%% would return `<<"Message from bob to alice">>' +%% will return `<<"Message from bob to alice">>' %% @end --spec path(exml:element(), path()) -> exml:element() | binary() | undefined. -path(Element, Path) -> - path(Element, Path, undefined). - -%% @doc Gets the element/attr/cdata in the leftmost possible described path. -%% Like `path/2' but returns the given `Default' if no element matches the path. -%%% @see path/2 --spec path(exml:element(), path(), Other) -> exml:element() | binary() | Other. +-spec path(exml:element(), path(), Default) -> exml:element() | binary() | Default. path(#xmlel{} = Element, [], _) -> Element; path(#xmlel{} = Element, [{element, Name} | Rest], Default) -> @@ -90,8 +96,8 @@ path(#xmlel{} = Element, [{attr, Name}], Default) -> path(_, _, Default) -> Default. -%% @doc gets the elements/attrs/cdatas reachable by the described path -%% @see path/2 +%% @doc Gets the elements/attrs/cdatas reachable by the described path +%% @see path/3 -spec paths(exml:element(), path()) -> [exml:element() | binary()]. paths(#xmlel{} = Element, []) -> [Element]; @@ -114,11 +120,13 @@ paths(#xmlel{attrs = Attrs}, [{attr, Name}]) -> paths(#xmlel{} = El, Path) when is_list(Path) -> erlang:error(invalid_path, [El, Path]). +%% @equiv path(Element, [{element, Name}]) -spec subelement(exml:element(), binary()) -> exml:element() | undefined. subelement(Element, Name) -> subelement(Element, Name, undefined). --spec subelement(exml:element(), binary(), Other) -> exml:element() | Other. +%% @equiv path(Element, [{element, Name}], Default) +-spec subelement(exml:element(), binary(), Default) -> exml:element() | Default. subelement(#xmlel{children = Children}, Name, Default) -> case lists:keyfind(Name, #xmlel.name, Children) of false -> @@ -127,11 +135,13 @@ subelement(#xmlel{children = Children}, Name, Default) -> Result end. +%% @equiv path(Element, [{element_with_ns, NS}]) -spec subelement_with_ns(exml:element(), binary()) -> exml:element() | undefined. subelement_with_ns(Element, NS) -> subelement_with_ns(Element, NS, undefined). --spec subelement_with_ns(exml:element(), binary(), Other) -> exml:element() | Other. +%% @equiv path(Element, [{element_with_ns, NS}], Default) +-spec subelement_with_ns(exml:element(), binary(), Default) -> exml:element() | Default. subelement_with_ns(#xmlel{children = Children}, NS, Default) -> child_with_ns(Children, NS, Default). @@ -147,17 +157,19 @@ child_with_ns([#xmlel{} = Element | Rest], NS, Default) -> child_with_ns([_ | Rest], NS, Default) -> child_with_ns(Rest, NS, Default). +%% @equiv path(Element, [{element_with_attr, AttrName, AttrValue}]) -spec subelement_with_attr(exml:element(), AttrName :: binary(), AttrValue :: binary()) -> exml:element() | undefined. subelement_with_attr(Element, AttrName, AttrValue) -> subelement_with_attr(Element, AttrName, AttrValue, undefined). --spec subelement_with_attr(Element, AttrName, AttrValue, Other) -> SubElement | Other when +%% @equiv path(Element, [{element_with_attr, AttrName, AttrValue}], Default) +-spec subelement_with_attr(Element, AttrName, AttrValue, Default) -> SubElement | Default when Element :: exml:element(), AttrName :: binary(), AttrValue :: binary(), SubElement :: exml:element(), - Other :: term(). + Default :: term(). subelement_with_attr(#xmlel{children = Children}, AttrName, AttrValue, Default) -> child_with_attr(Children, AttrName, AttrValue, Default). @@ -173,14 +185,15 @@ child_with_attr([#xmlel{} = Element | Rest], AttrName, AttrVal, Default) -> child_with_attr([_ | Rest], AttrName, AttrVal, Default) -> child_with_attr(Rest, AttrName, AttrVal, Default). - +%% @equiv path(Element, [{element_with_ns, Name, NS}]) -spec subelement_with_name_and_ns(exml:element(), binary(), binary()) -> exml:element() | undefined. subelement_with_name_and_ns(Element, Name, NS) -> subelement_with_name_and_ns(Element, Name, NS, undefined). --spec subelement_with_name_and_ns(exml:element(), binary(), binary(), Other) -> - exml:element() | Other. +%% @equiv path(Element, [{element_with_ns, Name, NS}], Default) +-spec subelement_with_name_and_ns(exml:element(), binary(), binary(), Default) -> + exml:element() | Default. subelement_with_name_and_ns(Element, Name, NS, Default) -> case subelements_with_name_and_ns(Element, Name, NS) of [] -> @@ -189,6 +202,7 @@ subelement_with_name_and_ns(Element, Name, NS, Default) -> FirstElem end. +%% @equiv paths(Element, [{element, Name}]) -spec subelements(exml:element(), binary()) -> [exml:element()]. subelements(#xmlel{children = Children}, Name) -> lists:filter(fun(#xmlel{name = N}) when N =:= Name -> @@ -197,6 +211,7 @@ subelements(#xmlel{children = Children}, Name) -> false end, Children). +%% @equiv paths(Element, [{element_with_ns, NS}]) -spec subelements_with_ns(exml:element(), binary()) -> [exml:element()]. subelements_with_ns(#xmlel{children = Children}, NS) -> lists:filter(fun(#xmlel{} = Child) -> @@ -205,6 +220,7 @@ subelements_with_ns(#xmlel{children = Children}, NS) -> false end, Children). +%% @equiv paths(Element, [{element_with_ns, Name, NS}]) -spec subelements_with_name_and_ns(exml:element(), binary(), binary()) -> [exml:element()]. subelements_with_name_and_ns(#xmlel{children = Children}, Name, NS) -> lists:filter(fun(#xmlel{name = SubName} = Child) -> @@ -214,6 +230,7 @@ subelements_with_name_and_ns(#xmlel{children = Children}, Name, NS) -> false end, Children). +%% @equiv paths(Element, [{element_with_attr, AttrName, AttrValue}]) -spec subelements_with_attr(exml:element(), binary(), binary()) -> [exml:element()]. subelements_with_attr(#xmlel{children = Children}, AttrName, Value) -> lists:filter(fun(#xmlel{} = Child) -> @@ -222,22 +239,19 @@ subelements_with_attr(#xmlel{children = Children}, AttrName, Value) -> false end, Children). +%% @equiv path(Element, [cdata]) -spec cdata(exml:element()) -> binary(). cdata(#xmlel{children = Children}) -> list_to_binary([C || #xmlcdata{content = C} <- Children]). -%% @doc Query attribute value by name. -%% Returns the attribute value associated with `Name' if `Element' contains such attribute. -%% Otherwise returns `undefined' +%% @see attr/3 +%% @equiv path(Element, [{attr, Name}]) -spec attr(exml:element(), binary()) -> binary() | undefined. attr(Element, Name) -> attr(Element, Name, undefined). -%% @doc Query attribute value by name. -%% Returns the attribute value associated with `Name' if `Element' contains such attribute. -%% Otherwise returns `Default' -%% @see attr/2 --spec attr(exml:element(), binary(), Other) -> binary() | Other. +%% @equiv path(Element, [{attr, Name}], Default) +-spec attr(exml:element(), binary(), Default) -> binary() | Default. attr(#xmlel{attrs = Attrs}, Name, Default) -> case lists:keyfind(Name, 1, Attrs) of {Name, Value} -> diff --git a/src/exml_stream.erl b/src/exml_stream.erl index 9a276f3..998c071 100644 --- a/src/exml_stream.erl +++ b/src/exml_stream.erl @@ -26,23 +26,34 @@ }). -type start() :: #xmlstreamstart{}. +%% `#xmlstreamstart{}' record. -type stop() :: #xmlstreamend{}. --type element() :: exml_nif:stream_element(). +%% `#xmlstreamend{}' record. -type parser() :: #parser{}. -%% infinite_stream - No distinct "stream start" or "stream end", only #xmlel{} will be returned. -%% max_element_size - Specifies maximum byte size of any parsed XML element. -%% The only exception is the "stream start" element, -%% for which only the size of the opening tag is limited. +%% `#parser{}' record. +-type element() :: exml_nif:stream_element(). +%% One of `t:start()', `t:stop()' or `t:exml:element()'. + -type parser_opt() :: {infinite_stream, boolean()} | {max_element_size, non_neg_integer()}. +%% Parser options +%% +%% %%%=================================================================== %%% Public API %%%=================================================================== +%% @see new_parser/1 -spec new_parser() -> {ok, parser()} | {error, any()}. new_parser() -> new_parser([]). +%% @doc Creates a new parser -spec new_parser([parser_opt()]) -> {ok, parser()} | {error, any()}. new_parser(Opts)-> MaxElementSize = proplists:get_value(max_element_size, Opts, 0), @@ -54,8 +65,9 @@ new_parser(Opts)-> Error end. --spec parse(parser(), binary()) -> {ok, parser(), [exml_stream:element()]} - | {error, Reason :: any()}. +%% @doc Makes a parser parse input +-spec parse(parser(), binary()) -> + {ok, parser(), [exml_stream:element()]} | {error, Reason :: any()}. parse(Parser, Input) when is_binary(Input) -> #parser{event_parser = EventParser, buffer = OldBuf} = Parser, Buffer = OldBuf ++ [Input], @@ -66,11 +78,15 @@ parse(Parser, Input) when is_binary(Input) -> Other end. +%% @doc Resets the parser's buffers -spec reset_parser(parser()) -> {ok, parser()}. reset_parser(#parser{event_parser = NifParser} = Parser) -> exml_nif:reset_parser(NifParser), {ok, Parser#parser{buffer = []}}. +%% @doc Free a parser +%% +%% Kept for backwards-compatibility, it is a no-op. -spec free_parser(parser()) -> ok. free_parser(#parser{}) -> ok. From b8e35d13e509035c1048ea182d6c2af20d7a9ec4 Mon Sep 17 00:00:00 2001 From: Nelson Vides Date: Fri, 12 Jan 2024 15:03:13 +0100 Subject: [PATCH 2/5] Optimise not looking at the end of the list if it is not a stream --- src/exml.erl | 31 ++++++++++++++++--------------- 1 file changed, 16 insertions(+), 15 deletions(-) diff --git a/src/exml.erl b/src/exml.erl index 5503453..486ae47 100644 --- a/src/exml.erl +++ b/src/exml.erl @@ -111,22 +111,9 @@ parse(XML) -> %% The `Pretty' argument indicates if the generated XML should have new lines and indentation, %% which is useful for the debugging eye, or should rather be a minified version, %% which is better for IO performance. --spec to_iolist(exml_stream:element() | [exml_stream:element()], prettify()) -> iodata(). +-spec to_iolist(cdata() | exml_stream:element() | [exml_stream:element()], prettify()) -> iodata(). to_iolist(#xmlel{} = Element, Pretty) -> to_binary_nif(Element, Pretty); -to_iolist([Element], Pretty) -> - to_iolist(Element, Pretty); -to_iolist([Head | _] = Elements, Pretty) -> - [Last | RevChildren] = lists:reverse(tl(Elements)), - case {Head, Last} of - {#xmlstreamstart{name = Name, attrs = Attrs}, - #xmlstreamend{name = Name}} -> - Element = #xmlel{name = Name, attrs = Attrs, - children = lists:reverse(RevChildren)}, - to_binary_nif(Element, Pretty); - _ -> - [to_iolist(El, Pretty) || El <- Elements] - end; to_iolist(#xmlstreamstart{name = Name, attrs = Attrs}, _Pretty) -> Result = to_binary_nif(#xmlel{name = Name, attrs = Attrs}, not_pretty), FrontSize = byte_size(Result) - 2, @@ -135,7 +122,21 @@ to_iolist(#xmlstreamstart{name = Name, attrs = Attrs}, _Pretty) -> to_iolist(#xmlstreamend{name = Name}, _Pretty) -> [<<">, Name, <<">">>]; to_iolist(#xmlcdata{content = Content}, _Pretty) -> - exml_nif:escape_cdata(Content). + exml_nif:escape_cdata(Content); +to_iolist([Element], Pretty) -> + to_iolist(Element, Pretty); +to_iolist([#xmlstreamstart{name = Name, attrs = Attrs} | Tail] = Elements, Pretty) -> + [Last | RevChildren] = lists:reverse(Tail), + case Last of + #xmlstreamend{name = Name} -> + %% Add extra nesting for streams so pretty-printing would be indented properly + Element = #xmlel{name = Name, attrs = Attrs, children = lists:reverse(RevChildren)}, + to_binary_nif(Element, Pretty); + _ -> + [to_iolist(El, Pretty) || El <- Elements] + end; +to_iolist(Elements, Pretty) when is_list(Elements) -> + [to_iolist(El, Pretty) || El <- Elements]. -spec to_binary_nif(element(), prettify()) -> binary(). to_binary_nif(#xmlel{} = Element, Pretty) -> From e0bd57399c71a5b360e7c46cb28db93905ca88e6 Mon Sep 17 00:00:00 2001 From: Nelson Vides Date: Fri, 12 Jan 2024 15:05:32 +0100 Subject: [PATCH 3/5] Upgrade github actions --- .github/workflows/ci.yml | 43 ++++++++++++++++++++++------------------ rebar.config | 2 +- src/exml.app.src | 2 +- 3 files changed, 26 insertions(+), 21 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index dd54369..155cd05 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -9,28 +9,33 @@ on: jobs: test: - name: OTP ${{matrix.otp}} + name: OTP ${{matrix.otp_vsn}} strategy: matrix: - otp: ['25.0', '24.3', '23.3', '22.3', '21.3'] - runs-on: 'ubuntu-20.04' + otp_vsn: ['26.2', '25.3', '24.3'] + rebar_vsn: ['3.22.0'] + runs-on: 'ubuntu-22.04' env: OTPVER: ${{ matrix.otp }} steps: - - uses: actions/checkout@v2 - - uses: ErlGang/setup-erlang@v1.0.0 + - uses: actions/checkout@v4 + - uses: erlef/setup-beam@v1 with: - otp-version: ${{ matrix.otp }} - - run: make rebar3 - - run: make deps - - run: make test - - run: make dialyzer - if: ${{ matrix.otp == '25.0' }} - - run: make codecov - if: ${{ matrix.otp == '25.0' }} - - run: make gcov - if: ${{ matrix.otp == '25.0' }} - - run: pip install --user codecov - if: ${{ matrix.otp == '25.0' }} - - run: /home/runner/.local/bin/codecov - if: ${{ matrix.otp == '25.0' }} + otp-version: ${{ matrix.otp_vsn }} + rebar3-version: ${{ matrix.rebar_vsn }} + - uses: actions/cache@v3 + name: Cache + with: + path: _build + key: ${{ runner.os }}-build-${{ matrix.otp_vsn }}-${{ hashFiles(format('rebar.lock')) }}-1 + restore-keys: ${{ runner.os }}-build-${{ matrix.otp_vsn }}-1- + - run: rebar3 as test compile -d + - run: rebar3 eunit + - run: rebar3 dialyzer + - run: rebar3 as test codecov analyze + - run: gcov -o c_src exml + - uses: codecov/codecov-action@v3 + with: + name: Upload coverage reports to Codecov + token: ${{ secrets.CODECOV_TOKEN }} + fail_ci_if_error: true diff --git a/rebar.config b/rebar.config index 30fa210..7c017f5 100644 --- a/rebar.config +++ b/rebar.config @@ -14,7 +14,7 @@ {proper, "1.4.0"} ]}, {plugins, [ - {rebar3_codecov, "0.3.0"} + {rebar3_codecov, "0.6.0"} ]}, {port_env, [ {"CXXFLAGS", "$CXXFLAGS -O3 -std=c++11 -g -Wall -Wextra -fPIC --coverage"}, diff --git a/src/exml.app.src b/src/exml.app.src index 8745c13..d3d734b 100644 --- a/src/exml.app.src +++ b/src/exml.app.src @@ -10,7 +10,7 @@ {modules, []}, {maintainers, ["ESL"]}, {pkg_name, "hexml"}, - {licenses, ["Apache 2.0", "Boost Software License 1.0 (C++ code)", "GPL (tests)"]}, + {licenses, ["Apache-2.0", "BSL-1.0", "GPL (tests)"]}, {links, [{"GitHub", "https://github.com/esl/exml/"}]}, {exclude_files, ["c_src/exml.d"]} ]}. From a3fb2cc438de7f67a5840a7d0c56cab9f8e6bfde Mon Sep 17 00:00:00 2001 From: Nelson Vides Date: Fri, 12 Jan 2024 15:10:07 +0100 Subject: [PATCH 4/5] Add doc dir to gitignore --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index 23867df..ae2791b 100644 --- a/.gitignore +++ b/.gitignore @@ -12,3 +12,4 @@ _build/ *.beam *.lock /rebar3 +doc/ From 3df87ca9334f2f95b9f8d8d18e7cfb0153a84fcb Mon Sep 17 00:00:00 2001 From: Nelson Vides Date: Fri, 12 Jan 2024 15:10:34 +0100 Subject: [PATCH 5/5] Remove now unused Makefile --- Makefile | 45 --------------------------------------------- 1 file changed, 45 deletions(-) delete mode 100644 Makefile diff --git a/Makefile b/Makefile deleted file mode 100644 index fb7df3f..0000000 --- a/Makefile +++ /dev/null @@ -1,45 +0,0 @@ -.PHONY: rel deps test - -REBARVER = 3.15.2 -ifeq ($(OTPVER),24.3) - REBARVER = 3.17.0 -endif -ifeq ($(OTPVER),25.0) - REBARVER = 3.18.0 -endif - -all: deps compile - -compile: rebar3 - ./rebar3 compile - -deps: rebar3 - ./rebar3 get-deps - -clean: rebar3 - ./rebar3 clean && rm -f c_src/exml.gc* - -test-deps: rebar3 - ./rebar3 get-deps - -test-compile: rebar3 test-deps - ./rebar3 as test compile - -test: test-compile - ./rebar3 eunit - -coverage-report: _build/test/cover/eunit.coverdata - ./rebar3 as test coveralls send - -codecov: _build/test/cover/eunit.coverdata - ./rebar3 as test codecov analyze - -gcov: test-compile - gcov -o c_src exml - -rebar3: - wget https://github.com/erlang/rebar3/releases/download/${REBARVER}/rebar3 &&\ - chmod u+x rebar3 - -dialyzer: rebar3 - ./rebar3 dialyzer