diff --git a/.gitignore b/.gitignore index 667578a3..fcfe912a 100644 --- a/.gitignore +++ b/.gitignore @@ -9,4 +9,7 @@ cover cover_* .eqc-info leveled_data/* +compile_commands.json +*parser.erl +*lexer.erl elp diff --git a/README.md b/README.md index f5b8efae..7dc7898a 100644 --- a/README.md +++ b/README.md @@ -86,4 +86,4 @@ To have rebar3 execute the full set of tests, run: For those with a Quickcheck license, property-based tests can also be run using: -```./rebar3 as eqc do eunit --module=leveled_simpleeqc, eunit --module=leveled_statemeqc``` +```./rebar3 as eqc do eunit``` diff --git a/include/leveled.hrl b/include/leveled.hrl index 6295b4c5..644f347f 100644 --- a/include/leveled.hrl +++ b/include/leveled.hrl @@ -70,6 +70,12 @@ %% Inker key type used for tombstones %%%============================================================================ +%%%============================================================================ +%%% Test +%%%============================================================================ + +-define(EQC_TIME_BUDGET, 120). + %%%============================================================================ %%% Helper Function %%%============================================================================ diff --git a/rebar.config b/rebar.config index 1953a964..b11ab6b6 100644 --- a/rebar.config +++ b/rebar.config @@ -2,11 +2,13 @@ {xref_checks, [undefined_function_calls,undefined_functions, - locals_not_used, deprecated_function_calls, deprecated_functions]}. {cover_excl_mods, - [testutil, + [leveled_filterlexer, leveled_filterparser, + leveled_evallexer, leveled_evalparser, + leveled_setoplexer, leveled_setopparser, + testutil, appdefined_SUITE, basic_SUITE, iterator_SUITE, perf_SUITE, recovery_SUITE, riak_SUITE, tictac_SUITE]}. diff --git a/src/leveled_bookie.erl b/src/leveled_bookie.erl index 4aee189a..ea3cd2fe 100644 --- a/src/leveled_bookie.erl +++ b/src/leveled_bookie.erl @@ -82,6 +82,7 @@ -export([ book_returnfolder/2, book_indexfold/5, + book_multiindexfold/5, book_bucketlist/4, book_keylist/3, book_keylist/4, @@ -695,23 +696,24 @@ book_returnfolder(Pid, RunnerType) -> %% be interrupted by a throw, which will be forwarded to the worker (whilst %% still closing down the snapshot). This may be used, for example, to %% curtail a fold in the application at max_results --spec book_indexfold(pid(), - Constraint:: {Bucket, StartKey}, - FoldAccT :: {FoldFun, Acc}, - Range :: {IndexField, Start, End}, - TermHandling :: {ReturnTerms, TermRegex}) -> - {async, Runner::fun(() -> dynamic())} - when Bucket::term(), - Key :: term(), - StartKey::term(), - FoldFun::fun((Bucket, Key | {IndexVal, Key}, Acc) -> Acc), - Acc::dynamic(), - IndexField::term(), - IndexVal::term(), - Start::IndexVal, - End::IndexVal, - ReturnTerms::boolean(), - TermRegex :: leveled_codec:regular_expression(). +-spec book_indexfold( + pid(), + Constraint:: {Bucket, StartKey}, + FoldAccT :: {FoldFun, Acc}, + Range :: {IndexField, Start, End}, + TermHandling :: {ReturnTerms, TermExpression}) -> + {async, Runner::fun(() -> dynamic())} + when Bucket::term(), + Key :: term(), + StartKey::term(), + FoldFun::fun((Bucket, Key | {IndexVal, Key}, Acc) -> Acc), + Acc::dynamic(), + IndexField::term(), + IndexVal::term(), + Start::IndexVal, + End::IndexVal, + ReturnTerms::boolean()|binary(), + TermExpression :: leveled_codec:term_expression(). book_indexfold(Pid, Constraint, FoldAccT, Range, TermHandling) when is_tuple(Constraint) -> @@ -727,6 +729,23 @@ book_indexfold(Pid, Bucket, FoldAccT, Range, TermHandling) -> leveled_log:log(b0019, [Bucket]), book_indexfold(Pid, {Bucket, null}, FoldAccT, Range, TermHandling). +-type query() + :: {binary(), binary(), binary(), leveled_codec:term_expression()}. +-type combo_fun() + :: fun((list(sets:set(leveled_codec:key()))) + -> sets:set(leveled_codec:key())). + +-spec book_multiindexfold( + pid(), + leveled_codec:key(), + fun((leveled_codec:key(), leveled_codec:key(), term()) -> term()), + list({non_neg_integer(), query()}), + combo_fun()) + -> {async, fun(() -> term())}. +book_multiindexfold(Pid, Bucket, FoldAccT, Queries, ComboFun) -> + RunnerType = + {multi_index_query, Bucket, FoldAccT, Queries, ComboFun}, + book_returnfolder(Pid, RunnerType). %% @doc list buckets. Folds over the ledger only. Given a `Tag' folds %% over the keyspace calling `FoldFun' from `FoldAccT' for each @@ -828,7 +847,7 @@ book_keylist(Pid, Tag, Bucket, KeyRange, FoldAccT) -> StartKey :: Key, EndKey :: Key, Key :: term(), - TermRegex :: leveled_codec:regular_expression(), + TermRegex :: leveled_codec:term_expression(), Runner :: fun(() -> Acc). book_keylist(Pid, Tag, Bucket, KeyRange, FoldAccT, TermRegex) -> RunnerType = {keylist, Tag, Bucket, KeyRange, FoldAccT, TermRegex}, @@ -1997,22 +2016,53 @@ snaptype_by_presence(false) -> %% Get an {async, Runner} for a given fold type. Fold types have different %% tuple inputs get_runner(State, {index_query, Constraint, FoldAccT, Range, TermHandling}) -> - {IdxFld, StartT, EndT} = Range, - {Bucket, ObjKey0} = - case Constraint of - {B, SK} -> - {B, SK}; - B -> - {B, null} - end, - StartKey = - leveled_codec:to_querykey(Bucket, ObjKey0, ?IDX_TAG, IdxFld, StartT), - EndKey = - leveled_codec:to_querykey(Bucket, null, ?IDX_TAG, IdxFld, EndT), + {StartKey, EndKey} = index_range(Constraint, Range), SnapFun = return_snapfun(State, ledger, {StartKey, EndKey}, false, false), - leveled_runner:index_query(SnapFun, - {StartKey, EndKey, TermHandling}, - FoldAccT); + leveled_runner:index_query( + SnapFun, {StartKey, EndKey, TermHandling}, FoldAccT); +get_runner( + State, + {multi_index_query, Bucket, FoldAccT, Queries, ComboFun}) -> + {FoldFun, InitAcc} = FoldAccT, + KeyFolder = fun(_B, K, Acc) -> [K|Acc] end, + QueryRunners = + lists:map( + fun({SetId, {IdxFld, StartTerm, EndTerm, Expr}}) -> + {SK, EK} = + index_range( + {Bucket, null}, {IdxFld, StartTerm, EndTerm}), + SnapFun = + return_snapfun(State, ledger, {SK, EK}, false, true), + {async, Runner} = + leveled_runner:index_query( + SnapFun, {SK, EK, {false, Expr}}, {KeyFolder, []} + ), + {SetId, Runner} + end, + Queries + ), + OverallRunner = + fun() -> + FinalSet = + ComboFun( + maps:from_list( + lists:map( + fun({SetId, R}) -> + case R() of + KLR when is_list(KLR) -> + {SetId, sets:from_list(KLR)} + end + end, + QueryRunners) + ) + ), + lists:foldl( + fun(K, Acc) -> FoldFun(Bucket, K, Acc) end, + InitAcc, + sets:to_list(FinalSet) + ) + end, + {async, OverallRunner}; get_runner(State, {keylist, Tag, FoldAccT}) -> SnapFun = return_snapfun(State, ledger, no_lookup, true, true), leveled_runner:bucketkey_query(SnapFun, Tag, null, FoldAccT); @@ -2021,40 +2071,49 @@ get_runner(State, {keylist, Tag, Bucket, FoldAccT}) -> leveled_runner:bucketkey_query(SnapFun, Tag, Bucket, FoldAccT); get_runner(State, {keylist, Tag, Bucket, KeyRange, FoldAccT, TermRegex}) -> SnapFun = return_snapfun(State, ledger, no_lookup, true, true), - leveled_runner:bucketkey_query(SnapFun, - Tag, Bucket, KeyRange, - FoldAccT, TermRegex); + leveled_runner:bucketkey_query( + SnapFun, Tag, Bucket, KeyRange, FoldAccT, TermRegex); %% Set of runners for object or metadata folds -get_runner(State, - {foldheads_allkeys, - Tag, FoldFun, - JournalCheck, SnapPreFold, SegmentList, - LastModRange, MaxObjectCount}) -> +get_runner( + State, + {foldheads_allkeys, + Tag, FoldFun, + JournalCheck, SnapPreFold, SegmentList, + LastModRange, MaxObjectCount}) -> SnapType = snaptype_by_presence(JournalCheck), SnapFun = return_snapfun(State, SnapType, no_lookup, true, SnapPreFold), - leveled_runner:foldheads_allkeys(SnapFun, - Tag, FoldFun, - JournalCheck, SegmentList, - LastModRange, MaxObjectCount); -get_runner(State, - {foldobjects_allkeys, Tag, FoldFun, SnapPreFold}) -> - get_runner(State, - {foldobjects_allkeys, Tag, FoldFun, SnapPreFold, key_order}); -get_runner(State, - {foldobjects_allkeys, Tag, FoldFun, SnapPreFold, key_order}) -> - SnapFun = return_snapfun(State, store, no_lookup, true, SnapPreFold), - leveled_runner:foldobjects_allkeys(SnapFun, Tag, FoldFun, key_order); -get_runner(State, - {foldobjects_allkeys, Tag, FoldFun, SnapPreFold, sqn_order}) -> - SnapFun = return_snapfun(State, store, undefined, true, SnapPreFold), - leveled_runner:foldobjects_allkeys(SnapFun, Tag, FoldFun, sqn_order); -get_runner(State, - {foldheads_bybucket, - Tag, - BucketList, bucket_list, - FoldFun, - JournalCheck, SnapPreFold, - SegmentList, LastModRange, MaxObjectCount}) -> + leveled_runner:foldheads_allkeys( + SnapFun, + Tag, + FoldFun, + JournalCheck, + SegmentList, + LastModRange, + MaxObjectCount); +get_runner(State, {foldobjects_allkeys, Tag, FoldFun, SnapPreFold}) -> + get_runner( + State, {foldobjects_allkeys, Tag, FoldFun, SnapPreFold, key_order}); +get_runner(State, {foldobjects_allkeys, Tag, FoldFun, SnapPreFold, Order}) -> + case Order of + key_order -> + SnapFun = + return_snapfun(State, store, no_lookup, true, SnapPreFold), + leveled_runner:foldobjects_allkeys( + SnapFun, Tag, FoldFun, key_order); + sqn_order -> + SnapFun = + return_snapfun(State, store, undefined, true, SnapPreFold), + leveled_runner:foldobjects_allkeys( + SnapFun, Tag, FoldFun, sqn_order) + end; +get_runner( + State, + {foldheads_bybucket, + Tag, + BucketList, bucket_list, + FoldFun, + JournalCheck, SnapPreFold, + SegmentList, LastModRange, MaxObjectCount}) -> KeyRangeFun = fun(Bucket) -> {StartKey, EndKey, _} = return_ledger_keyrange(Tag, Bucket, all), @@ -2062,50 +2121,52 @@ get_runner(State, end, SnapType = snaptype_by_presence(JournalCheck), SnapFun = return_snapfun(State, SnapType, no_lookup, true, SnapPreFold), - leveled_runner:foldheads_bybucket(SnapFun, - Tag, - lists:map(KeyRangeFun, BucketList), - FoldFun, - JournalCheck, - SegmentList, - LastModRange, MaxObjectCount); -get_runner(State, - {foldheads_bybucket, - Tag, - Bucket, KeyRange, - FoldFun, - JournalCheck, SnapPreFold, - SegmentList, LastModRange, MaxObjectCount}) -> + leveled_runner:foldheads_bybucket( + SnapFun, + Tag, + lists:map(KeyRangeFun, BucketList), + FoldFun, + JournalCheck, + SegmentList, + LastModRange, MaxObjectCount); +get_runner( + State, + {foldheads_bybucket, + Tag, + Bucket, KeyRange, + FoldFun, + JournalCheck, SnapPreFold, + SegmentList, LastModRange, MaxObjectCount}) -> {StartKey, EndKey, SnapQ} = return_ledger_keyrange(Tag, Bucket, KeyRange), SnapType = snaptype_by_presence(JournalCheck), SnapFun = return_snapfun(State, SnapType, SnapQ, true, SnapPreFold), - leveled_runner:foldheads_bybucket(SnapFun, - Tag, - [{StartKey, EndKey}], - FoldFun, - JournalCheck, - SegmentList, - LastModRange, MaxObjectCount); -get_runner(State, - {foldobjects_bybucket, - Tag, Bucket, KeyRange, - FoldFun, - SnapPreFold}) -> + leveled_runner:foldheads_bybucket( + SnapFun, + Tag, + [{StartKey, EndKey}], + FoldFun, + JournalCheck, + SegmentList, + LastModRange, MaxObjectCount); +get_runner( + State, + {foldobjects_bybucket, + Tag, Bucket, KeyRange, + FoldFun, + SnapPreFold}) -> {StartKey, EndKey, SnapQ} = return_ledger_keyrange(Tag, Bucket, KeyRange), SnapFun = return_snapfun(State, store, SnapQ, true, SnapPreFold), - leveled_runner:foldobjects_bybucket(SnapFun, - Tag, - [{StartKey, EndKey}], - FoldFun); -get_runner(State, - {foldobjects_byindex, - Tag, Bucket, {Field, FromTerm, ToTerm}, - FoldObjectsFun, - SnapPreFold}) -> + leveled_runner:foldobjects_bybucket( + SnapFun, Tag, [{StartKey, EndKey}], FoldFun); +get_runner( + State, + {foldobjects_byindex, + Tag, Bucket, {Field, FromTerm, ToTerm}, + FoldObjectsFun, + SnapPreFold}) -> SnapFun = return_snapfun(State, store, no_lookup, true, SnapPreFold), - leveled_runner:foldobjects_byindex(SnapFun, - {Tag, Bucket, Field, FromTerm, ToTerm}, - FoldObjectsFun); + leveled_runner:foldobjects_byindex( + SnapFun, {Tag, Bucket, Field, FromTerm, ToTerm}, FoldObjectsFun); get_runner(State, {bucket_list, Tag, FoldAccT}) -> {FoldBucketsFun, Acc} = FoldAccT, SnapFun = return_snapfun(State, ledger, no_lookup, false, false), @@ -2119,6 +2180,21 @@ get_runner(State, DeprecatedQuery) -> get_deprecatedrunner(State, DeprecatedQuery). +index_range(Constraint, Range) -> + {IdxFld, StartT, EndT} = Range, + {Bucket, ObjKey0} = + case Constraint of + {B, SK} -> + {B, SK}; + B -> + {B, null} + end, + StartKey = + leveled_codec:to_querykey(Bucket, ObjKey0, ?IDX_TAG, IdxFld, StartT), + EndKey = + leveled_codec:to_querykey(Bucket, null, ?IDX_TAG, IdxFld, EndT), + {StartKey, EndKey}. + -spec get_deprecatedrunner(book_state(), tuple()) -> {async, fun(() -> term())}. %% @doc @@ -2799,17 +2875,16 @@ ttl_test() -> KeyList = IndexFolder(), ?assertMatch(20, length(KeyList)), - {ok, Regex} = re:compile("f8"), + {ok, Regex} = leveled_util:regex_compile("f8"), {async, IndexFolderTR} = book_returnfolder( Bookie1, - { - index_query, - <<"Bucket">>, - {FoldKeysFun, []}, - {<<"idx1_bin">>, <<"f8">>, <<"f9">>}, - {true, Regex}} - ), + {index_query, + <<"Bucket">>, + {FoldKeysFun, []}, + {<<"idx1_bin">>, <<"f8">>, <<"f9">>}, + {true, Regex}} + ), TermKeyList = IndexFolderTR(), ?assertMatch(10, length(TermKeyList)), diff --git a/src/leveled_codec.erl b/src/leveled_codec.erl index 44e65ae0..9588ac31 100644 --- a/src/leveled_codec.erl +++ b/src/leveled_codec.erl @@ -128,10 +128,17 @@ {index_specs(), infinity|integer()}. % {KeyChanges, TTL} -type maybe_lookup() :: lookup|no_lookup. --type regular_expression() :: - {re_pattern, term(), term(), term(), term()}|undefined. - % first element must be re_pattern, but tuple may change legnth with - % versions +-type actual_regex() :: + {re_pattern, term(), term(), term(), term()}. +-type capture_value() :: binary()|integer(). +-type query_filter_fun() :: + fun((#{binary() => capture_value()}) -> boolean()). +-type query_eval_fun() :: + fun((binary(), binary()) -> #{binary() => capture_value()}). +-type query_expression() :: + {query, query_eval_fun(), query_filter_fun()}. +-type term_expression() :: + actual_regex()|undefined|query_expression(). -type value_fetcher() :: {fun((pid(), leveled_codec:journal_key()) -> any()), @@ -174,7 +181,8 @@ maybe_lookup/0, last_moddate/0, lastmod_range/0, - regular_expression/0, + term_expression/0, + actual_regex/0, value_fetcher/0, proxy_object/0, slimmed_key/0 @@ -308,7 +316,7 @@ maybe_accumulate( maybe_accumulate(T, Acc, Count, Filter, AccFun). -spec accumulate_index( - {boolean(), undefined|leveled_runner:mp()}, + {boolean()|binary(), term_expression()}, leveled_runner:fold_keys_fun()) -> leveled_penciller:pclacc_fun(). accumulate_index({false, undefined}, FoldKeysFun) -> @@ -323,11 +331,21 @@ accumulate_index({true, undefined}, FoldKeysFun) -> when IdxValue =/= null, ObjKey =/= null -> FoldKeysFun(Bucket, {IdxValue, ObjKey}, Acc) end; +accumulate_index( + {AddTerm, {query, EvalFun, FilterFun}}, FoldKeysFun) -> + fun({?IDX_TAG, Bucket, {_IdxFld, IdxValue}, ObjKey}, _Value, Acc) + when is_binary(ObjKey) -> + CptMap = EvalFun(IdxValue, ObjKey), + check_captured_terms( + CptMap, + FilterFun, AddTerm, FoldKeysFun, + Bucket, IdxValue, ObjKey, + Acc) + end; accumulate_index({AddTerm, TermRegex}, FoldKeysFun) -> - fun( - {?IDX_TAG, Bucket, {_IdxFld, IdxValue}, ObjKey}, _Value, Acc) - when IdxValue =/= null, ObjKey =/= null -> - case re:run(IdxValue, TermRegex) of + fun({?IDX_TAG, Bucket, {_IdxFld, IdxValue}, ObjKey}, _Value, Acc) + when IdxValue =/= null, ObjKey =/= null, ?IS_DEF(TermRegex) -> + case leveled_util:regex_run(IdxValue, TermRegex, []) of nomatch -> Acc; _ -> @@ -340,6 +358,29 @@ accumulate_index({AddTerm, TermRegex}, FoldKeysFun) -> end end. +check_captured_terms( + CptMap, FilterFun, AddTerm, FoldKeysFun, B, IdxValue, ObjKey, Acc) -> + case FilterFun(CptMap) of + true -> + case AddTerm of + true -> + FoldKeysFun(B, {IdxValue, ObjKey}, Acc); + false -> + FoldKeysFun(B, ObjKey, Acc); + CptKey when is_binary(CptKey) -> + case maps:get(CptKey, CptMap, undefined) of + undefined -> + Acc; + CptValue -> + FoldKeysFun(B, {CptValue, ObjKey}, Acc) + end + end; + false -> + Acc + end. + + + -spec key_dominates(ledger_kv(), ledger_kv()) -> boolean(). %% @doc %% When comparing two keys in the ledger need to find if one key comes before diff --git a/src/leveled_eval.erl b/src/leveled_eval.erl new file mode 100644 index 00000000..2afde87b --- /dev/null +++ b/src/leveled_eval.erl @@ -0,0 +1,855 @@ +%% -------- Eval Functions --------- +%% +%% Support for different eval expressions within leveled +%% + +-module(leveled_eval). + +-export([generate_eval_function/2]). + +%%%============================================================================ +%%% External API +%%%============================================================================ + +-spec generate_eval_function( + string(), map()) -> fun((binary(), binary()) -> map()). +generate_eval_function(EvalString, Substitutions) -> + {ok, ParsedEval} = generate_eval_expression(EvalString, Substitutions), + fun(Term, Key) -> + apply_eval(ParsedEval, Term, Key, maps:new()) + end. + +%%%============================================================================ +%%% Internal functions +%%%============================================================================ + + +generate_eval_expression(EvalString, Substitutions) -> + CodePointList = unicode:characters_to_list(EvalString), + {ok, Tokens, _EndLine} = leveled_evallexer:string(CodePointList), + case leveled_filter:substitute_items(Tokens, Substitutions, []) of + {error, Error} -> + {error, Error}; + UpdTokens -> + leveled_evalparser:parse(UpdTokens) + end. + + +apply_eval({eval, Eval}, Term, Key, AttrMap) -> + apply_eval(Eval, Term, Key, AttrMap); +apply_eval({'PIPE', Eval1, 'INTO', Eval2}, Term, Key, AttrMap) -> + apply_eval(Eval2, Term, Key, apply_eval(Eval1, Term, Key, AttrMap)); +apply_eval({ + delim, {identifier, _, InKey}, {string, _, Delim}, ExpKeys}, + Term, Key, AttrMap) -> + case term_to_process(InKey, Term, Key, AttrMap) of + TermToSplit when is_binary(TermToSplit) -> + CptTerms = string:split(TermToSplit, Delim, all), + L = min(length(CptTerms), length(ExpKeys)), + maps:merge( + AttrMap, + maps:from_list( + lists:zip( + lists:sublist(ExpKeys, L), + lists:sublist(CptTerms, L) + ) + ) + ); + _ -> + AttrMap + end; +apply_eval( + {join, InKeys, {string, _, Delim}, {identifier, _, OutKey}}, + _Term, _Key, AttrMap) -> + NewTerm = + unicode:characters_to_binary( + lists:join( + Delim, + lists:filter( + fun(V) -> is_binary(V) end, + lists:map( + fun(InKey) -> maps:get(InKey, AttrMap, <<"">>) end, + InKeys + ) + ) + ) + ), + maps:put(OutKey, NewTerm, AttrMap); +apply_eval({ + split, {identifier, _, InKey}, DelimAttr, {identifier, _, OutKey}}, + Term, Key, AttrMap) -> + case term_to_process(InKey, Term, Key, AttrMap) of + TermToSplit when is_binary(TermToSplit) -> + TermList = string:split(TermToSplit, element(3, DelimAttr), all), + maps:put(OutKey, TermList, AttrMap); + _ -> + AttrMap + end; +apply_eval( + {slice, {identifier, _, InKey}, WidthAttr, {identifier, _, OutKey}}, + Term, Key, AttrMap) -> + Width = element(3, WidthAttr), + case term_to_process(InKey, Term, Key, AttrMap) of + TermToSlice when is_binary(TermToSlice) -> + TermCount = string:length(TermToSlice) div Width, + TermList = + lists:map( + fun(S) -> string:slice(TermToSlice, S, Width) end, + lists:map( + fun(I) -> Width * I end, + lists:seq(0, TermCount - 1))), + maps:put(OutKey, TermList, AttrMap); + _ -> + AttrMap + end; +apply_eval( + {index, + {identifier, _, InKey}, + StartAtr, LengthAttr, + {identifier, _, OutKey}}, + Term, Key, AttrMap) -> + Start = element(3, StartAtr), + Length = element(3, LengthAttr), + case term_to_process(InKey, Term, Key, AttrMap) of + TermToIndex when is_binary(TermToIndex) -> + case string:length(TermToIndex) of + L when L >= (Start + Length) -> + maps:put( + OutKey, + string:slice(TermToIndex, Start, Length), + AttrMap + ); + _ -> + AttrMap + end; + _ -> + AttrMap + end; +apply_eval( + {kvsplit, + {identifier, _, InKey}, + {string, _, DelimPair}, {string, _, DelimKV}}, + Term, Key, AttrMap) -> + case term_to_process(InKey, Term, Key, AttrMap) of + TermToSplit when is_binary(TermToSplit) -> + lists:foldl( + fun(S, AccMap) -> + case string:split(S, DelimKV, all) of + [K, V] -> + maps:put(K, V, AccMap); + _ -> + AccMap + end + end, + AttrMap, + string:split(TermToSplit, DelimPair, all) + ); + _ -> + AttrMap + end; +apply_eval( + {to_integer, {identifier, _, InKey}, {identifier, _, OutKey}}, + Term, Key, AttrMap) -> + case term_to_process(InKey, Term, Key, AttrMap) of + TermToConvert when is_binary(TermToConvert) -> + case string:to_integer(TermToConvert) of + {I, _Rest} when is_integer(I) -> + maps:put(OutKey, I, AttrMap); + _ -> + AttrMap + end; + AlreadyInteger when is_integer(AlreadyInteger) -> + maps:put(OutKey, AlreadyInteger, AttrMap); + _ -> + AttrMap + end; +apply_eval( + {to_string, {identifier, _, InKey}, {identifier, _, OutKey}}, + Term, Key, AttrMap) -> + case term_to_process(InKey, Term, Key, AttrMap) of + TermToConvert when is_integer(TermToConvert) -> + maps:put( + OutKey, + list_to_binary(integer_to_list(TermToConvert)), + AttrMap + ); + AlreadyString when is_binary(AlreadyString) -> + maps:put(OutKey, AlreadyString, AttrMap); + _ -> + AttrMap + end; +apply_eval( + {map, InID, Comparator, MapList, Default, OutID}, + Term, Key, AttrMap) -> + {identifier, _, InKey} = InID, + {identifier, _, OutKey} = OutID, + TermToCompare = term_to_process(InKey, Term, Key, AttrMap), + F = reverse_compare_mapping(element(2, Comparator), TermToCompare), + case lists:dropwhile(F, MapList) of + [] -> + maps:put(OutKey, element(3, Default), AttrMap); + [{mapping, _T, Assignment}|_Rest] -> + maps:put(OutKey, element(3, Assignment), AttrMap) + end; +apply_eval( + {MathOp, OperandX, OperandY, {identifier, _, OutKey}}, + _Term, _Key, AttrMap) + when MathOp == add; MathOp == subtract -> + X = maybe_fetch_operand(OperandX, AttrMap), + Y = maybe_fetch_operand(OperandY, AttrMap), + case MathOp of + add when is_integer(X), is_integer(Y) -> + maps:put(OutKey, X + Y, AttrMap); + subtract when is_integer(X), is_integer(Y) -> + maps:put(OutKey, X - Y, AttrMap); + _ -> + AttrMap + end; +apply_eval( + {regex, {identifier, _, InKey}, CompiledRE, ExpKeys}, + Term, Key, AttrMap) -> + ExpectedKeyLength = length(ExpKeys), + Opts = [{capture, all_but_first, binary}], + case term_to_process(InKey, Term, Key, AttrMap) of + TermToCapture when is_binary(TermToCapture)-> + case leveled_util:regex_run(TermToCapture, CompiledRE, Opts) of + {match, CptTerms} -> + L = min(length(CptTerms), ExpectedKeyLength), + CptMap = + maps:from_list( + lists:zip( + lists:sublist(ExpKeys, L), + lists:sublist(CptTerms, L))), + maps:merge(AttrMap, CptMap); + _ -> + AttrMap + end; + _ -> + AttrMap + end. + +maybe_fetch_operand({identifier, _, ID}, AttrMap) -> + maps:get(ID, AttrMap, 0); +maybe_fetch_operand(Op, _AttrMap) -> + element(3, Op). + +term_to_process(<<"term">>, Term, _Key, _AttrMap) -> + Term; +term_to_process(<<"key">>, _Term, Key, _AttrMap) -> + Key; +term_to_process(AttrKey, _Term, _Key, AttrMap) -> + maps:get(AttrKey, AttrMap, not_found). + +reverse_compare_mapping('<', Term) -> + fun({mapping, T, _A}) -> Term >= element(3, T) end; +reverse_compare_mapping('<=', Term) -> + fun({mapping, T, _A}) -> Term > element(3, T) end; +reverse_compare_mapping('>', Term) -> + fun({mapping, T, _A}) -> Term =< element(3, T) end; +reverse_compare_mapping('>=', Term) -> + fun({mapping, T, _A}) -> Term < element(3, T) end; +reverse_compare_mapping('=', Term) -> + fun({mapping, T, _A}) -> Term =/= element(3, T) end. + +%%%============================================================================ +%%% Test +%%%============================================================================ + +-ifdef(TEST). + +-include_lib("eunit/include/eunit.hrl"). + +basic_test() -> + EvalString1 = "delim($term, \"|\", ($fn, $dob, $dod, $gns, $pcs))", + EvalString2 = "delim($gns, \"#\", ($gn1, $gn2, $gn3))", + + EvalString3 = EvalString1 ++ " | " ++ EvalString2, + {ok, Tokens3, _EndLine3} = leveled_evallexer:string(EvalString3), + {ok, ParsedExp3} = leveled_evalparser:parse(Tokens3), + EvalOut3 = + apply_eval( + ParsedExp3, + <<"SMITH|19861216||Willow#Mia|LS1 4BT#LS8 1ZZ">>, + <<"9000000001">>, + maps:new() + ), + ?assertMatch(<<"SMITH">>, maps:get(<<"fn">>, EvalOut3)), + ?assertMatch(<<"19861216">>, maps:get(<<"dob">>, EvalOut3)), + ?assertMatch(<<"">>, maps:get(<<"dod">>, EvalOut3)), + ?assertMatch(<<"Willow#Mia">>, maps:get(<<"gns">>, EvalOut3)), + ?assertMatch(<<"LS1 4BT#LS8 1ZZ">>, maps:get(<<"pcs">>, EvalOut3)), + ?assertMatch(<<"Willow">>, maps:get(<<"gn1">>, EvalOut3)), + ?assertMatch(<<"Mia">>, maps:get(<<"gn2">>, EvalOut3)), + ?assertNot(maps:is_key(<<"gn3">>, EvalOut3)), + + + EvalString4 = EvalString3 ++ " | join(($dob, $fn), \"|\", $dobfn)", + {ok, Tokens4, _EndLine4} = leveled_evallexer:string(EvalString4), + {ok, ParsedExp4} = leveled_evalparser:parse(Tokens4), + EvalOut4 = + apply_eval( + ParsedExp4, + <<"SMITH|19861216||Willow#Mia|LS1 4BT#LS8 1ZZ">>, + <<"9000000001">>, + maps:new() + ), + ?assertMatch(<<"SMITH">>, maps:get(<<"fn">>, EvalOut4)), + ?assertMatch(<<"19861216">>, maps:get(<<"dob">>, EvalOut4)), + ?assertMatch(<<"">>, maps:get(<<"dod">>, EvalOut4)), + ?assertMatch(<<"Willow#Mia">>, maps:get(<<"gns">>, EvalOut4)), + ?assertMatch(<<"LS1 4BT#LS8 1ZZ">>, maps:get(<<"pcs">>, EvalOut4)), + ?assertMatch(<<"Willow">>, maps:get(<<"gn1">>, EvalOut4)), + ?assertMatch(<<"Mia">>, maps:get(<<"gn2">>, EvalOut4)), + ?assertNot(maps:is_key(<<"gn3">>, EvalOut4)), + ?assertMatch(<<"19861216|SMITH">>, maps:get(<<"dobfn">>, EvalOut4)), + + + EvalString5 = EvalString4 ++ " | index($dob, 0, 4, $yob) | to_integer($yob, $yob)", + {ok, Tokens5, _EndLine5} = leveled_evallexer:string(EvalString5), + {ok, ParsedExp5} = leveled_evalparser:parse(Tokens5), + EvalOut5 = + apply_eval( + ParsedExp5, + <<"SMITH|19861216||Willow#Mia|LS1 4BT#LS8 1ZZ">>, + <<"9000000001">>, + maps:new() + ), + ?assertMatch(<<"SMITH">>, maps:get(<<"fn">>, EvalOut5)), + ?assertMatch(<<"19861216">>, maps:get(<<"dob">>, EvalOut5)), + ?assertMatch(<<"">>, maps:get(<<"dod">>, EvalOut5)), + ?assertMatch(<<"Willow#Mia">>, maps:get(<<"gns">>, EvalOut5)), + ?assertMatch(<<"LS1 4BT#LS8 1ZZ">>, maps:get(<<"pcs">>, EvalOut5)), + ?assertMatch(<<"Willow">>, maps:get(<<"gn1">>, EvalOut5)), + ?assertMatch(<<"Mia">>, maps:get(<<"gn2">>, EvalOut5)), + ?assertNot(maps:is_key(<<"gn3">>, EvalOut5)), + ?assertMatch(<<"19861216|SMITH">>, maps:get(<<"dobfn">>, EvalOut5)), + ?assertMatch(1986, maps:get(<<"yob">>, EvalOut5)), + + EvalString6 = EvalString1 ++ " | slice($gns, 2, $gns)", + {ok, Tokens6, _EndLine6} = leveled_evallexer:string(EvalString6), + {ok, ParsedExp6} = leveled_evalparser:parse(Tokens6), + EvalOut6 = + apply_eval( + ParsedExp6, + <<"SMITH|19861216||MAN1Ve|LS1 4BT#LS8 1ZZ">>, + <<"9000000001">>, + maps:new() + ), + ?assertMatch(<<"SMITH">>, maps:get(<<"fn">>, EvalOut6)), + ?assertMatch(<<"19861216">>, maps:get(<<"dob">>, EvalOut6)), + ?assertMatch(<<"">>, maps:get(<<"dod">>, EvalOut6)), + ?assertMatch(<<"LS1 4BT#LS8 1ZZ">>, maps:get(<<"pcs">>, EvalOut6)), + ?assertMatch([<<"MA">>, <<"N1">>, <<"Ve">>], maps:get(<<"gns">>, EvalOut6)), + + EvalOut7 = + apply_eval( + ParsedExp6, + <<"SMITH|19861216||MAN1VeZ|LS1 4BT#LS8 1ZZ">>, + <<"9000000001">>, + maps:new() + ), + ?assertMatch(<<"SMITH">>, maps:get(<<"fn">>, EvalOut7)), + ?assertMatch(<<"19861216">>, maps:get(<<"dob">>, EvalOut7)), + ?assertMatch(<<"">>, maps:get(<<"dod">>, EvalOut7)), + ?assertMatch(<<"LS1 4BT#LS8 1ZZ">>, maps:get(<<"pcs">>, EvalOut7)), + ?assertMatch([<<"MA">>, <<"N1">>, <<"Ve">>], maps:get(<<"gns">>, EvalOut7)), + + EvalString8 = EvalString1 ++ " | split($gns, \"#\", $gns)", + {ok, Tokens8, _EndLine8} = leveled_evallexer:string(EvalString8), + {ok, ParsedExp8} = leveled_evalparser:parse(Tokens8), + EvalOut8 = + apply_eval( + ParsedExp8, + <<"SMITH|19861216||Willow#Mia#Vera|LS1 4BT#LS8 1ZZ">>, + <<"9000000001">>, + maps:new() + ), + ?assertMatch(<<"SMITH">>, maps:get(<<"fn">>, EvalOut8)), + ?assertMatch(<<"19861216">>, maps:get(<<"dob">>, EvalOut8)), + ?assertMatch(<<"">>, maps:get(<<"dod">>, EvalOut8)), + ?assertMatch(<<"LS1 4BT#LS8 1ZZ">>, maps:get(<<"pcs">>, EvalOut8)), + ?assertMatch([<<"Willow">>, <<"Mia">>, <<"Vera">>], maps:get(<<"gns">>, EvalOut8)), + + EvalString9 = + "delim($term, \"|\", ($name, $height, $weight, $pick)) |" + " to_integer($height, $height) |" + " to_integer($weight, $weight) |" + " to_integer($pick, $pick) |" + " delim($key, \"|\", ($team, $number)) |" + " index($team, 0, 9, $doh)", + {ok, Tokens9, _EndLine9} = leveled_evallexer:string(EvalString9), + {ok, ParsedExp9} = leveled_evalparser:parse(Tokens9), + EvalOut9 = + apply_eval( + ParsedExp9, + <<"WEMBANYAMA|224cm|95kg|#1">>, + <<"SPURS|00001">>, + maps:new() + ), + ?assertMatch(<<"WEMBANYAMA">>, maps:get(<<"name">>, EvalOut9)), + ?assertMatch(224, maps:get(<<"height">>, EvalOut9)), + ?assertMatch(95, maps:get(<<"weight">>, EvalOut9)), + ?assertMatch(<<"#1">>, maps:get(<<"pick">>, EvalOut9)), + % Not changes as not starting with integer + ?assertMatch(<<"SPURS">>, maps:get(<<"team">>, EvalOut9)), + ?assertMatch(<<"00001">>, maps:get(<<"number">>, EvalOut9)), + ?assertNot(maps:is_key(<<"doh">>, EvalOut9)), + + %% Age at 30 April 2024 + EvalString10 = + EvalString5 ++ + " | index($dob, 4, 4, $birthday)" + " | map($birthday, <=, ((\"0430\", 2024)), 2023, $yoc)" + " | subtract($yoc, $yob, $age)" + " | add($age, 1, $age_next)" + " | to_string($age, $age)" + , + {ok, Tokens10, _EndLine10} = leveled_evallexer:string(EvalString10), + {ok, ParsedExp10} = leveled_evalparser:parse(Tokens10), + EvalOut10A = + apply_eval( + ParsedExp10, + <<"SMITH|19861216||Willow#Mia#Vera|LS1 4BT#LS8 1ZZ">>, + <<"9000000001">>, + maps:new() + ), + ?assertMatch(<<"37">>, maps:get(<<"age">>, EvalOut10A)), + ?assertMatch(38, maps:get(<<"age_next">>, EvalOut10A)), + EvalOut10B = + apply_eval( + ParsedExp10, + <<"SMITH|19860216||Willow#Mia#Vera|LS1 4BT#LS8 1ZZ">>, + <<"9000000001">>, + maps:new() + ), + ?assertMatch(<<"38">>, maps:get(<<"age">>, EvalOut10B)), + EvalString10F = + EvalString1 ++ + " | index($dob, 0, 4, $yob)" + " | index($dob, 4, 4, $birthday)" + " | map($birthday, <=, ((\"0430\", 2024)), 2023, $yoc)" + " | subtract($yoc, $yob, $age)" + % yob has not been converted to an integer, + % so the age will not be set + " | to_string($age, $age)" + , + {ok, Tokens10F, _EndLine10F} = leveled_evallexer:string(EvalString10F), + {ok, ParsedExp10F} = leveled_evalparser:parse(Tokens10F), + EvalOut10F = + apply_eval( + ParsedExp10F, + <<"SMITH|19861216||Willow#Mia#Vera|LS1 4BT#LS8 1ZZ">>, + <<"9000000001">>, + maps:new() + ), + ?assertNot(maps:is_key(<<"age">>, EvalOut10F)), + + EvalString11A = + EvalString1 ++ + " | map($dob, <, " + "((\"1946\", \"Silent\"), (\"1966\", \"Boomer\")," + "(\"1980\", \"GenX\"), (\"1997\", \"Millenial\")), \"GenZ\"," + " $generation)", + {ok, Tokens11A, _EndLine11A} = leveled_evallexer:string(EvalString11A), + {ok, ParsedExp11A} = leveled_evalparser:parse(Tokens11A), + EvalOut11A = + apply_eval( + ParsedExp11A, + <<"SMITH|19861216||Willow#Mia#Vera|LS1 4BT#LS8 1ZZ">>, + <<"9000000001">>, + maps:new() + ), + ?assertMatch(<<"Millenial">>, maps:get(<<"generation">>, EvalOut11A)), + EvalString11B = + EvalString1 ++ + " | map($dob, <=, " + "((\"1945\", \"Silent\"), (\"1965\", \"Boomer\")," + "(\"1979\", \"GenX\"), (\"1996\", \"Millenial\")), \"GenZ\"," + " $generation)", + {ok, Tokens11B, _EndLine11B} = leveled_evallexer:string(EvalString11B), + {ok, ParsedExp11B} = leveled_evalparser:parse(Tokens11B), + EvalOut11B = + apply_eval( + ParsedExp11B, + <<"SMITH|19861216||Willow#Mia#Vera|LS1 4BT#LS8 1ZZ">>, + <<"9000000001">>, + maps:new() + ), + ?assertMatch(<<"Millenial">>, maps:get(<<"generation">>, EvalOut11B)), + EvalString11C = + EvalString1 ++ + " | map($dob, >, " + "((\"1996\", \"GenZ\"), (\"1979\", \"Millenial\")," + "(\"1965\", \"GenX\"), (\"1945\", \"Boomer\")), \"Silent\"," + " $generation)", + {ok, Tokens11C, _EndLine11C} = leveled_evallexer:string(EvalString11C), + {ok, ParsedExp11C} = leveled_evalparser:parse(Tokens11C), + EvalOut11C = + apply_eval( + ParsedExp11C, + <<"SMITH|19861216||Willow#Mia#Vera|LS1 4BT#LS8 1ZZ">>, + <<"9000000001">>, + maps:new() + ), + ?assertMatch(<<"Millenial">>, maps:get(<<"generation">>, EvalOut11C)), + EvalString11D = + EvalString1 ++ + " | map($dob, >=, " + "((\"1997\", \"GenZ\"), (\"1980\", \"Millenial\")," + "(\"1966\", \"GenX\"), (\"1946\", \"Boomer\")), \"Silent\"," + " $generation)", + {ok, Tokens11D, _EndLine11D} = leveled_evallexer:string(EvalString11D), + {ok, ParsedExp11D} = leveled_evalparser:parse(Tokens11D), + EvalOut11D = + apply_eval( + ParsedExp11D, + <<"SMITH|19861216||Willow#Mia#Vera|LS1 4BT#LS8 1ZZ">>, + <<"9000000001">>, + maps:new() + ), + ?assertMatch(<<"Millenial">>, maps:get(<<"generation">>, EvalOut11D)), + + EvalString12 = + "kvsplit($term, \"|\", \"=\") | index($term, 0, 12, $ts) |" + " to_integer($ts, $ts) |" + " to_integer($DEBUG, $DEBUG) |" + " to_integer($INFO, $INFO) |" + " to_integer($WARN, $WARN) |" + " to_integer($ERROR, $ERROR) |" + " to_integer($CRITICAL, $CRITICAL) |" + " add($DEBUG, $INFO, $TOTAL) |" + " add($TOTAL, $WARN, $TOTAL) |" + " add($TOTAL, $ERROR, $TOTAL) |" + " add($TOTAL, $CRITICAL, $TOTAL)" + , + {ok, Tokens12, _EndLine12} = leveled_evallexer:string(EvalString12), + {ok, ParsedExp12} = leveled_evalparser:parse(Tokens12), + EvalOut12 = + apply_eval( + ParsedExp12, + <<"063881703147|DEBUG=804|INFO=186|WARN=10">>, + <<"ABC1233">>, + maps:new() + ), + ?assertMatch(63881703147, maps:get(<<"ts">>, EvalOut12)), + ?assertMatch(1000, maps:get(<<"TOTAL">>, EvalOut12)), + ?assertNot(maps:is_key(<<"CRITICAL">>, EvalOut12)), + + EvalString13 = + "kvsplit($term, \"|\", \":\") |" + " map($cup_year, =, " + "((\"1965\", \"bad\"), (\"1970\", \"bad\"), " + "(\"1972\", \"good\"), (\"1974\", \"bad\")), " + "\"indifferent\", $cup_happy) ", + {ok, Tokens13, _EndLine13} = leveled_evallexer:string(EvalString13), + {ok, ParsedExp13} = leveled_evalparser:parse(Tokens13), + EvalOut13A = + apply_eval(ParsedExp13, <<"cup_year:1972">>, <<"ABC1">>, maps:new()), + ?assertMatch(<<"good">>, maps:get(<<"cup_happy">>, EvalOut13A)), + EvalOut13B = + apply_eval(ParsedExp13, <<"cup_year:1970">>, <<"ABC1">>, maps:new()), + ?assertMatch(<<"bad">>, maps:get(<<"cup_happy">>, EvalOut13B)), + EvalOut13C = + apply_eval(ParsedExp13, <<"cup_year:2024">>, <<"ABC1">>, maps:new()), + ?assertMatch(<<"indifferent">>, maps:get(<<"cup_happy">>, EvalOut13C)), + + ExtractRegex = + "(?P[^\\|]*)\\|(?P[0-9]{8})\\|(?P[0-9]{0,8})\\|" + "(?P[^\\|]*)\\|(?P[^\\|]*)|.", + ok = + check_regex_eval( + "regex($term, :regex, pcre, ($fn, $dob, $dod, $gns, $pcs))", + ExtractRegex + ), + ok = + check_regex_eval( + "regex($term, :regex, ($fn, $dob, $dod, $gns, $pcs))", + ExtractRegex + ) + . + +unicode_test() -> + EvalString1 = "delim($term, \"|\", ($fn, $dob, $dod, $gns, $pcs))", + EvalString2 = "delim($gns, \"#\", ($gn1, $gn2, $gn3))", + + EvalString3 = EvalString1 ++ " | " ++ EvalString2, + {ok, Tokens3, _EndLine3} = leveled_evallexer:string(EvalString3), + {ok, ParsedExp3} = leveled_evalparser:parse(Tokens3), + + EvalOutUnicode0 = + apply_eval( + ParsedExp3, + <<"ÅßERG|19861216||Willow#Mia|LS1 4BT#LS8 1ZZ"/utf8>>, + % Note index terms will have to be unicode_binary() type + % for this to work a latin-1 binary of + % <<"ÅßERG|19861216||Willow#Mia|LS1 4BT#LS8 1ZZ">> will fail to + % match - use unicode:characters_to_binary(B, latin1, utf8) to + % convert + <<"9000000001">>, + maps:new() + ), + ?assertMatch(<<"ÅßERG"/utf8>>, maps:get(<<"fn">>, EvalOutUnicode0)), + FE19 = "begins_with($fn, :prefix)", + {ok, Filter19} = + leveled_filter:generate_filter_expression( + FE19, + #{<<"prefix">> => <<"ÅßE"/utf8>>} + ), + ?assert( + leveled_filter:apply_filter( + Filter19, + EvalOutUnicode0 + ) + ), + + EvalString4 = EvalString1 ++ "| slice($gns, 2, $gns)", + {ok, Tokens4, _EndLine4} = leveled_evallexer:string(EvalString4), + {ok, ParsedExp4} = leveled_evalparser:parse(Tokens4), + EvalOutUnicode1 = + apply_eval( + ParsedExp4, + <<"ÅßERG|19861216||Åbß0Ca|LS1 4BT#LS8 1ZZ"/utf8>>, + <<"9000000001">>, + maps:new() + ), + FE20 = ":gsc_check IN $gns", + {ok, Filter20} = + leveled_filter:generate_filter_expression( + FE20, + #{<<"gsc_check">> => <<"Åb"/utf8>>} + ), + ?assert( + leveled_filter:apply_filter( + Filter20, + EvalOutUnicode1 + ) + ), + {ok, Filter21} = + leveled_filter:generate_filter_expression( + FE20, + #{<<"gsc_check">> => <<"ß0"/utf8>>} + ), + ?assert( + leveled_filter:apply_filter( + Filter21, + EvalOutUnicode1 + ) + ), + {ok, Filter22} = + leveled_filter:generate_filter_expression( + FE20, + #{<<"gsc_check">> => <<"Ca">>} + ), + ?assert( + leveled_filter:apply_filter( + Filter22, + EvalOutUnicode1 + ) + ), + {ok, Filter23} = + leveled_filter:generate_filter_expression( + FE20, + #{<<"gsc_check">> => <<"Ca"/utf8>>} + ), + ?assert( + leveled_filter:apply_filter( + Filter23, + EvalOutUnicode1 + ) + ) + . + + +check_regex_eval(EvalString14, ExtractRegex) -> + {ok, ParsedExp14} = + generate_eval_expression( + EvalString14, + #{<<"regex">> => list_to_binary(ExtractRegex)} + ), + EvalOut14 = + apply_eval( + ParsedExp14, + <<"SMITH|19861216||Willow#Mia#Vera|LS1 4BT#LS8 1ZZ">>, + <<"9000000001">>, + maps:new() + ), + ?assertMatch(<<"SMITH">>, maps:get(<<"fn">>, EvalOut14)), + ok. + +bad_type_test() -> + EvalString9 = + "delim($term, \"|\", ($name, $height, $weight, $pick)) |" + " to_integer($height, $height) |" + " to_integer($weight, $weight) |" + " to_integer($pick, $pick) |" + " delim($key, \"|\", ($team, $number)) |" + " index($team, 0, 9, $doh)", + {ok, Tokens9, _EndLine9} = leveled_evallexer:string(EvalString9), + {ok, ParsedExp9} = leveled_evalparser:parse(Tokens9), + EvalOut9 = + apply_eval( + ParsedExp9, + <<"WEMBANYAMA|224cm|95kg|#1">>, + <<"SPURS|00001">>, + maps:new() + ), + ?assertMatch(<<"WEMBANYAMA">>, maps:get(<<"name">>, EvalOut9)), + ?assertMatch(224, maps:get(<<"height">>, EvalOut9)), + ?assertMatch(95, maps:get(<<"weight">>, EvalOut9)), + ?assertMatch(<<"#1">>, maps:get(<<"pick">>, EvalOut9)), + % Not changes as not starting with integer + ?assertMatch(<<"SPURS">>, maps:get(<<"team">>, EvalOut9)), + ?assertMatch(<<"00001">>, maps:get(<<"number">>, EvalOut9)), + ?assertNot(maps:is_key(<<"doh">>, EvalOut9)), + + EvalStringF1 = EvalString9 ++ " | delim($height, \"|\", ($foo, $bar))", + {ok, TokensF1, _EndLineF1} = leveled_evallexer:string(EvalStringF1), + {ok, ParsedExpF1} = leveled_evalparser:parse(TokensF1), + EvalOutF1 = + apply_eval( + ParsedExpF1, + <<"WEMBANYAMA|224cm|95kg|#1">>, + <<"SPURS|00001">>, + maps:new() + ), + ?assertNot(maps:is_key(<<"foo">>, EvalOutF1)), + ?assertNot(maps:is_key(<<"bar">>, EvalOutF1)), + ?assertMatch(224, maps:get(<<"height">>, EvalOutF1)), + + EvalStringF2 = EvalString9 ++ " | split($height, \"|\", $foo)", + {ok, TokensF2, _EndLineF2} = leveled_evallexer:string(EvalStringF2), + {ok, ParsedExpF2} = leveled_evalparser:parse(TokensF2), + EvalOutF2 = + apply_eval( + ParsedExpF2, + <<"WEMBANYAMA|224cm|95kg|#1">>, + <<"SPURS|00001">>, + maps:new() + ), + ?assertNot(maps:is_key(<<"foo">>, EvalOutF2)), + ?assertMatch(224, maps:get(<<"height">>, EvalOutF2)), + + EvalStringF3 = EvalString9 ++ " | slice($height, 1, $foo)", + {ok, TokensF3, _EndLineF3} = leveled_evallexer:string(EvalStringF3), + {ok, ParsedExpF3} = leveled_evalparser:parse(TokensF3), + EvalOutF3 = + apply_eval( + ParsedExpF3, + <<"WEMBANYAMA|224cm|95kg|#1">>, + <<"SPURS|00001">>, + maps:new() + ), + ?assertNot(maps:is_key(<<"foo">>, EvalOutF3)), + ?assertMatch(224, maps:get(<<"height">>, EvalOutF3)), + + EvalStringF4 = EvalString9 ++ " | index($height, 1, 1, $foo)", + {ok, TokensF4, _EndLineF4} = leveled_evallexer:string(EvalStringF4), + {ok, ParsedExpF4} = leveled_evalparser:parse(TokensF4), + EvalOutF4 = + apply_eval( + ParsedExpF4, + <<"WEMBANYAMA|224cm|95kg|#1">>, + <<"SPURS|00001">>, + maps:new() + ), + ?assertNot(maps:is_key(<<"foo">>, EvalOutF4)), + ?assertMatch(224, maps:get(<<"height">>, EvalOutF4)), + + EvalStringF5 = EvalString9 ++ " | kvsplit($height, \"|\", \"#\")", + {ok, TokensF5, _EndLineF5} = leveled_evallexer:string(EvalStringF5), + {ok, ParsedExpF5} = leveled_evalparser:parse(TokensF5), + EvalOutF5 = + apply_eval( + ParsedExpF5, + <<"WEMBANYAMA|224cm|95kg|#1">>, + <<"SPURS|00001">>, + maps:new() + ), + ?assertNot(maps:is_key(<<"foo">>, EvalOutF5)), + ?assertMatch(224, maps:get(<<"height">>, EvalOutF5)), + + EvalStringF6 = EvalString9 ++ " | to_integer($height, $height_int)", + {ok, TokensF6, _EndLineF6} = leveled_evallexer:string(EvalStringF6), + {ok, ParsedExpF6} = leveled_evalparser:parse(TokensF6), + EvalOutF6 = + apply_eval( + ParsedExpF6, + <<"WEMBANYAMA|224cm|95kg|#1">>, + <<"SPURS|00001">>, + maps:new() + ), + ?assertMatch(224, maps:get(<<"height">>, EvalOutF6)), + ?assertMatch(224, maps:get(<<"height_int">>, EvalOutF6)), + + EvalStringF7 = EvalString9 ++ " | to_string($name, $name_str)", + {ok, TokensF7, _EndLineF7} = leveled_evallexer:string(EvalStringF7), + {ok, ParsedExpF7} = leveled_evalparser:parse(TokensF7), + EvalOutF7 = + apply_eval( + ParsedExpF7, + <<"WEMBANYAMA|224cm|95kg|#1">>, + <<"SPURS|00001">>, + maps:new() + ), + ?assertMatch(<<"WEMBANYAMA">>, maps:get(<<"name">>, EvalOutF7)), + ?assertMatch(<<"WEMBANYAMA">>, maps:get(<<"name_str">>, EvalOutF7)), + + EvalStringF8 = + EvalString9 ++ + " | regex($height, :regex, ($height_int)) |" + " to_integer($height_int, $height_int)", + + {ok, ParsedExpF8} = + generate_eval_expression( + EvalStringF8, + #{<<"regex">> => list_to_binary("(?P[0-9]+)")} + ), + EvalOutF8 = + apply_eval( + ParsedExpF8, + <<"WEMBANYAMA|224cm|95kg|#1">>, + <<"SPURS|00001">>, + maps:new() + ), + ?assertNot(maps:is_key(<<"height_int">>, EvalOutF8)), + + EvalStringF9 = + EvalString9 ++ + " | to_string($height, $height)" + " | regex($height, :regex, ($height_int)) |" + " to_integer($height_int, $height_int)", + + {ok, ParsedExpF9} = + generate_eval_expression( + EvalStringF9, + #{<<"regex">> => list_to_binary("(?P[0-9]+)")} + ), + EvalOutF9 = + apply_eval( + ParsedExpF9, + <<"WEMBANYAMA|224cm|95kg|#1">>, + <<"SPURS|00001">>, + maps:new() + ), + ?assertMatch(224, maps:get(<<"height_int">>, EvalOutF9)) + . + + +generate_test() -> + EvalString13 = + "kvsplit($term, \"|\", \":\") |" + " map($cup_year, =, " + "((\"1965\", \"bad\"), (\"1970\", \"bad\"), " + "(:clarke, \"good\"), (\"1974\", \"bad\")), " + "\"indifferent\", $cup_happy) ", + {ok, ParsedExp13} = + generate_eval_expression(EvalString13, #{<<"clarke">> => <<"1972">>}), + EvalOut13A = + apply_eval(ParsedExp13, <<"cup_year:1972">>, <<"ABC1">>, maps:new()), + ?assertMatch(<<"good">>, maps:get(<<"cup_happy">>, EvalOut13A)), + ?assertMatch( + {error, "Substitution <<\"clarke\">> not found"}, + generate_eval_expression(EvalString13, maps:new()) + ). + +-endif. \ No newline at end of file diff --git a/src/leveled_evallexer.xrl b/src/leveled_evallexer.xrl new file mode 100644 index 00000000..f0016d03 --- /dev/null +++ b/src/leveled_evallexer.xrl @@ -0,0 +1,54 @@ +%% Lexer for eval expressions + +Definitions. +WhiteSpace = ([\t\f\v\r\n\s]+) + +Rules. + +{WhiteSpace} : skip_token. + +\( : {token, {'(', TokenLine}}. +\) : {token, {')', TokenLine}}. +, : {token, {',', TokenLine}}. +\| : {token, {'PIPE', TokenLine}}. + +delim : {token, {delim, TokenLine}}. +join : {token, {join, TokenLine}}. +split : {token, {split, TokenLine}}. +slice : {token, {slice, TokenLine}}. +index : {token, {index, TokenLine}}. +kvsplit : {token, {kvsplit, TokenLine}}. +regex : {token, {regex, TokenLine}}. +to_integer : {token, {to_integer, TokenLine}}. +to_string : {token, {to_string, TokenLine}}. +add : {token, {add, TokenLine}}. +subtract : {token, {subtract, TokenLine}}. +map : {token, {map, TokenLine}}. +pcre : {token, {pcre, TokenLine}}. + += : {token, {comparator, '=', TokenLine}}. +< : {token, {comparator, '<', TokenLine}}. +> : {token, {comparator, '>', TokenLine}}. +<= : {token, {comparator, '<=', TokenLine}}. +>= : {token, {comparator, '>=', TokenLine}}. + +\$[a-zA-Z_][a-zA-Z_0-9]* : {token, {identifier, TokenLine, strip_identifier(TokenChars)}}. +\:[a-zA-Z_][a-zA-Z_0-9]* : {token, {substitution, TokenLine, strip_substitution(TokenChars)}}. +[1-9][0-9]* : {token, {pos_integer, TokenLine, list_to_integer(TokenChars)}}. +0 : {token, {zero, TokenLine, list_to_integer(TokenChars)}}. +\-[0-9]+ : {token, {neg_integer, TokenLine, list_to_integer(TokenChars)}}. +\"[^"]+\" : {token, {string, TokenLine, strip_string(TokenChars)}}. %" + +Erlang code. + +strip_string(TokenChars) -> + unicode:characters_to_binary(lists:droplast(tl(TokenChars))). + +strip_identifier(TokenChars) -> + [36|StrippedChars] = TokenChars, + unicode:characters_to_binary(StrippedChars). + +strip_substitution(TokenChars) -> + [58|StrippedChars] = TokenChars, + unicode:characters_to_binary(StrippedChars). + diff --git a/src/leveled_evalparser.yrl b/src/leveled_evalparser.yrl new file mode 100644 index 00000000..ef4c01e2 --- /dev/null +++ b/src/leveled_evalparser.yrl @@ -0,0 +1,84 @@ +%% Grammar for eval expressions + +Nonterminals +top_level eval +operand math_operand +integer non_neg_integer +regex_method +mapping mappings mappings_list +identifiers identifier_list. + +Terminals +'(' ')' ',' +identifier string +pos_integer neg_integer zero +comparator +'PIPE' +delim join split slice index kvsplit regex map +add subtract +to_integer to_string +pcre. + +Rootsymbol top_level. + +top_level -> eval: {eval, '$1'}. + +eval -> eval 'PIPE' eval : {'PIPE', '$1', 'INTO', '$3'}. +eval -> delim '(' identifier ',' string ',' identifier_list ')' : {delim, '$3', '$5', '$7'}. +eval -> join '(' identifier_list ',' string ',' identifier ')' : {join, '$3', '$5', '$7'}. +eval -> split '(' identifier ',' string ',' identifier ')' : {split, '$3', '$5', '$7'}. +eval -> slice '(' identifier ',' pos_integer ',' identifier ')' : {slice, '$3', '$5', '$7'}. +eval -> index '(' identifier ',' non_neg_integer ',' pos_integer ',' 'identifier' ')' : {index, '$3', '$5', '$7', '$9'}. +eval -> kvsplit '(' identifier ',' string ',' string ')' : {kvsplit, '$3', '$5', '$7'}. +eval -> regex '(' identifier ',' string ',' regex_method ',' identifier_list ')' : {regex, '$3', re_compile('$5', '$7'), '$9'}. +eval -> regex '(' identifier ',' string ',' identifier_list ')' : {regex, '$3', re_compile('$5'), '$7'}. +eval -> map '(' identifier ',' comparator ',' mappings_list ',' operand ',' identifier ')' : {map, '$3', '$5', '$7', '$9', '$11'}. +eval -> to_integer '(' identifier ',' identifier ')' : {to_integer, '$3', '$5'}. +eval -> to_string '(' identifier ',' identifier ')' : {to_string, '$3', '$5'}. +eval -> subtract '(' math_operand ',' math_operand ',' identifier ')' : {subtract, '$3', '$5', '$7'}. +eval -> add '(' math_operand ',' math_operand ',' identifier ')' : {add, '$3', '$5', '$7'}. + +mappings_list -> '(' mappings ')' : '$2'. + +mappings -> mapping ',' mappings : ['$1' | '$3']. +mappings -> mapping : ['$1']. + +mapping -> '(' operand ',' operand ')' : {mapping, '$2', '$4'}. + +non_neg_integer -> pos_integer : '$1'. +non_neg_integer -> zero : '$1'. + +integer -> non_neg_integer : '$1'. +integer -> neg_integer : '$1'. + +operand -> string : '$1'. +operand -> integer : '$1'. + +math_operand -> integer : '$1'. +math_operand -> identifier : '$1'. + +regex_method -> pcre : '$1'. + +identifier_list -> '(' identifiers ')' : strip_ids('$2'). + +identifiers -> identifier ',' identifiers : ['$1' | '$3']. +identifiers -> identifier : ['$1']. + +Endsymbol '$end'. + +Right 100 'PIPE'. + +Erlang code. + +strip_ids(IDL) -> + lists:map( + fun(ID) -> element(3, ID) end, + lists:flatten(IDL) + ). + +re_compile(RegexStr) -> + re_compile(RegexStr, {pcre, element(2, RegexStr)}). + +re_compile({string, _LN, Regex}, Method) -> + {ok, CRE} = leveled_util:regex_compile(Regex, element(1, Method)), + CRE. \ No newline at end of file diff --git a/src/leveled_filter.erl b/src/leveled_filter.erl new file mode 100644 index 00000000..582ff31e --- /dev/null +++ b/src/leveled_filter.erl @@ -0,0 +1,672 @@ +%% -------- Filter Functions --------- +%% +%% Support for different filter expressions within leveled +%% + +-module(leveled_filter). + +-export( + [ + generate_filter_function/2, + generate_filter_expression/2, + apply_filter/2, + substitute_items/3 + ]). + +%%%============================================================================ +%%% External API +%%%============================================================================ + +-spec generate_filter_function(string(), map()) -> fun((map()) -> boolean()). +generate_filter_function(FilterString, Substitutions) -> + {ok, ParsedFilter} = + generate_filter_expression(FilterString, Substitutions), + fun(AttrMap) -> + apply_filter(ParsedFilter, AttrMap) + end. + + +%%%============================================================================ +%%% Internal functions +%%%============================================================================ + +apply_filter({condition, Condition}, AttrMap) -> + apply_filter(Condition, AttrMap); +apply_filter({'OR', P1, P2}, AttrMap) -> + apply_filter(P1, AttrMap) orelse apply_filter(P2, AttrMap); +apply_filter({'AND', P1, P2}, AttrMap) -> + apply_filter(P1, AttrMap) andalso apply_filter(P2, AttrMap); +apply_filter({'NOT', P1}, AttrMap) -> + not apply_filter(P1, AttrMap); +apply_filter({'BETWEEN', {identifier, _, ID}, CmpA, CmpB}, AttrMap) -> + case maps:get(ID, AttrMap, notfound) of + V when is_integer(V) -> + apply_filter({'BETWEEN', {integer, 0, V}, CmpA, CmpB}, AttrMap); + V when is_binary(V) -> + apply_filter({'BETWEEN', {string, 0, V}, CmpA, CmpB}, AttrMap); + _ -> + false + end; +apply_filter( + {'BETWEEN', {Type, _, V0}, {Type, _, VL}, {Type, _, VH}}, _) + when VL =< VH -> + V0 >= VL andalso V0 =< VH; +apply_filter( + {'BETWEEN', {integer, TL0, I0}, {identifier, _, ID}, CmpB}, AttrMap) -> + case maps:get(ID, AttrMap, notfound) of + V when is_integer(V) -> + apply_filter( + {'BETWEEN', {integer, TL0, I0}, {integer, 0, V}, CmpB}, + AttrMap + ); + _ -> + false + end; +apply_filter( + {'BETWEEN', + {integer, TL0, I0}, {integer, TLL, IL}, {identifier, _, ID} + }, + AttrMap) -> + case maps:get(ID, AttrMap, notfound) of + V when is_integer(V) -> + apply_filter( + {'BETWEEN', + {integer, TL0, I0}, {integer, TLL, IL}, {integer, 0, V} + }, + AttrMap + ); + _ -> + false + end; +apply_filter( + {'BETWEEN', {string, TL0, S0}, {identifier, _, ID}, CmpB}, AttrMap) -> + case maps:get(ID, AttrMap, notfound) of + V when is_binary(V) -> + apply_filter( + {'BETWEEN', {string, TL0, S0}, {string, 0, V}, CmpB}, AttrMap); + _ -> + false + end; +apply_filter( + {'BETWEEN', + {string, TL0, S0}, {string, TLL, SL}, {identifier, _, ID} + }, + AttrMap) -> + case maps:get(ID, AttrMap, notfound) of + V when is_binary(V) -> + apply_filter( + {'BETWEEN', + {string, TL0, S0}, {string, TLL, SL}, {string, 0, V} + }, + AttrMap + ); + _ -> + false + end; +apply_filter({'BETWEEN', _, _, _}, _) -> + false; +apply_filter({'IN', {string, _, TestString}, {identifier, _, ID}}, AttrMap) -> + case maps:get(ID, AttrMap, notfound) of + CheckList when is_list(CheckList) -> + lists:member(TestString, CheckList); + _ -> + false + end; +apply_filter( + {'IN', {identifier, _, ID}, CheckList}, AttrMap) + when is_list(CheckList) -> + case maps:get(ID, AttrMap, notfound) of + notfound -> + false; + V -> + lists:member(V, lists:map(fun(C) -> element(3, C) end, CheckList)) + end; +apply_filter({{comparator, Cmp, TLC}, {identifier, _ , ID}, CmpB}, AttrMap) -> + case maps:get(ID, AttrMap, notfound) of + notfound -> + false; + V when is_integer(V) -> + apply_filter( + {{comparator, Cmp, TLC}, {integer, 0, V}, CmpB}, AttrMap + ); + V when is_binary(V) -> + apply_filter( + {{comparator, Cmp, TLC}, {string, 0, V}, CmpB}, AttrMap + ) + end; +apply_filter({{comparator, Cmp, TLC}, CmpA, {identifier, _, ID}}, AttrMap) -> + case maps:get(ID, AttrMap, notfound) of + notfound -> + false; + V when is_integer(V) -> + apply_filter( + {{comparator, Cmp, TLC}, CmpA, {integer, 0, V}}, AttrMap + ); + V when is_binary(V) -> + apply_filter( + {{comparator, Cmp, TLC}, CmpA, {string, 0, V}}, AttrMap + ) + end; +apply_filter({{comparator, Cmp, _}, {Type, _, TL}, {Type, _, TR}}, _AttrMap) -> + compare(Cmp, TL, TR); +apply_filter({{comparator, _, _}, _, _}, _AttrMap) -> + false; +apply_filter({contains, {identifier, _, ID}, {string, _ , SubStr}}, AttrMap) -> + case maps:get(ID, AttrMap, notfound) of + V when is_binary(V) -> + case string:find(V, SubStr) of + nomatch -> + false; + _ -> + true + end; + _ -> + false + end; +apply_filter( + {begins_with, {identifier, _, ID}, {string, _ , SubStr}}, AttrMap) -> + case maps:get(ID, AttrMap, notfound) of + V when is_binary(V) -> + case string:prefix(V, SubStr) of + nomatch -> + false; + _ -> + true + end; + _ -> + false + end; +apply_filter( + {ends_with, {identifier, _, ID}, {string, _ , SubStr}}, AttrMap) -> +case maps:get(ID, AttrMap, notfound) of + V when is_binary(V) -> + case string:prefix(string:reverse(V), string:reverse(SubStr)) of + nomatch -> + false; + _ -> + true + end; + _ -> + false +end; +apply_filter({attribute_exists, {identifier, _, ID}}, AttrMap) -> + maps:is_key(ID, AttrMap); +apply_filter({attribute_not_exists, {identifier, _, ID}}, AttrMap) -> + not maps:is_key(ID, AttrMap); +apply_filter({attribute_empty, {identifier, _, ID}}, AttrMap) -> + case maps:get(ID, AttrMap, notfound) of + <<>> -> + true; + _ -> + false + end. + +generate_filter_expression(FilterString, Substitutions) -> + {ok, Tokens, _EndLine} = leveled_filterlexer:string(FilterString), + case substitute_items(Tokens, Substitutions, []) of + {error, Error} -> + {error, Error}; + UpdTokens -> + leveled_filterparser:parse(UpdTokens) + end. + +substitute_items([], _Subs, UpdTokens) -> + lists:reverse(UpdTokens); +substitute_items([{substitution, LN, ID}|Rest], Subs, UpdTokens) -> + case maps:get(ID, Subs, notfound) of + notfound -> + {error, + lists:flatten( + io_lib:format("Substitution ~p not found", [ID]))}; + Value when is_binary(Value) -> + substitute_items( + Rest, Subs, [{string, LN, Value}|UpdTokens]); + Value when is_integer(Value) -> + substitute_items(Rest, Subs, [{integer, LN, Value}|UpdTokens]); + _UnexpectedValue -> + {error, + lists:flatten( + io_lib:format("Substitution ~p unexpected type", [ID]))} + end; +substitute_items([Token|Rest], Subs, UpdTokens) -> + substitute_items(Rest, Subs, [Token|UpdTokens]). + +compare('>', V, CmpA) -> V > CmpA; +compare('>=', V, CmpA) -> V >= CmpA; +compare('<', V, CmpA) -> V < CmpA; +compare('<=', V, CmpA) -> V =< CmpA; +compare('=', V, CmpA) -> V == CmpA; +compare('<>', V, CmpA) -> V =/= CmpA. + + +%%%============================================================================ +%%% Test +%%%============================================================================ + +-ifdef(TEST). + +-include_lib("eunit/include/eunit.hrl"). + +invalid_filterexpression_test() -> + FE1 = "($a BETWEEN \"A\" AND \"A12\") OR (($b >= \"30\") AND contains($c, :d))", + SubsMissing = maps:from_list([{<<"a">>, <<"MA">>}]), + ?assertMatch( + {error, "Substitution <<\"d\">> not found"}, + generate_filter_expression(FE1, SubsMissing) + ), + SubsWrongType = maps:from_list([{<<"d">>, "42"}]), + ?assertMatch( + {error, "Substitution <<\"d\">> unexpected type"}, + generate_filter_expression(FE1, SubsWrongType) + ), + SubsPresent = maps:from_list([{<<"d">>, <<"MA">>}]), + FE2 = "($a IN (\"A\", 12)) OR (($b >= \"30\") AND contains($c, :d))", + ?assertMatch( + {error, {1, leveled_filterparser,["syntax error before: ","12"]}}, + generate_filter_expression(FE2, SubsPresent) + ), + SubsWrongTypeForContains = maps:from_list([{<<"d">>, 42}]), + FE4 = "($a BETWEEN 12 AND 12) OR (($b >= \"30\") AND contains($c, :d))", + ?assertMatch( + {error, {1, leveled_filterparser, ["syntax error before: ","42"]}}, + generate_filter_expression(FE4, SubsWrongTypeForContains) + ). + +filterexpression_test() -> + FE1 = "($a BETWEEN \"A\" AND \"A12\") AND (($b >= 30) AND contains($c, :d))", + SubsPresent = maps:from_list([{<<"d">>, <<"MA">>}]), + {ok, Filter1} = generate_filter_expression(FE1, SubsPresent), + M1 = #{<<"a">> => <<"A11">>, <<"b">> => 100, <<"c">> => <<"CARTMAN">>}, + ?assert(apply_filter(Filter1, M1)), + % ok + + M2 = #{<<"a">> => <<"A11">>, <<"b">> => 10, <<"c">> => <<"CARTMAN">>}, + ?assertNot(apply_filter(Filter1, M2)), + % $b < 30 + + FE2 = "($a BETWEEN \"A\" AND \"A12\") AND (($b >= 30) OR contains($c, :d))", + {ok, Filter2} = generate_filter_expression(FE2, SubsPresent), + ?assert(apply_filter(Filter2, M2)), + % OR used so ($b >= 30) = false is ok + + FE3 = "($a BETWEEN \"A12\" AND \"A\") AND (($b >= 30) OR contains($c, :d))", + {ok, Filter3} = generate_filter_expression(FE3, SubsPresent), + ?assertNot(apply_filter(Filter3, M2)), + % swapping the low/high - not ok - between explicitly requires low/high + + M3 = #{<<"a">> => <<"A11">>, <<"b">> => <<"100">>, <<"c">> => <<"CARTMAN">>}, + ?assertNot(apply_filter(Filter1, M3)), + % substitution b is not an integer + M3A = #{<<"a">> => 11, <<"b">> => 100, <<"c">> => <<"CARTMAN">>}, + ?assertNot(apply_filter(Filter1, M3A)), + % substitution a is an integer + + FE4 = + "($dob BETWEEN \"19700101\" AND \"19791231\") " + "AND (contains($gns, \"#Willow\") AND contains($pcs, \"#LS\"))", + {ok, Filter4} = generate_filter_expression(FE4, maps:new()), + M4 = + #{ + <<"dob">> => <<"19751124">>, + <<"gns">> => <<"#Mia#Willow#Chloe">>, + <<"pcs">> => <<"#BD1 1DU#LS1 4BT">> + }, + ?assert(apply_filter(Filter4, M4)), + + FE5 = + "($dob >= \"19740301\" AND $dob <= \"19761030\")" + " OR ($dod > \"20200101\" AND $dod < \"20230101\")", + + {ok, Filter5} = generate_filter_expression(FE5, maps:new()), + F = fun(M) -> apply_filter(Filter5, M) end, + + M5 = maps:from_list([{<<"dob">>, <<"19750202">>}, {<<"dod">>, <<"20221216">>}]), + M6 = maps:from_list([{<<"dob">>, <<"19750202">>}, {<<"dod">>, <<"20191216">>}]), + M7 = maps:from_list([{<<"dob">>, <<"19790202">>}, {<<"dod">>, <<"20221216">>}]), + M8 = maps:from_list([{<<"dob">>, <<"19790202">>}, {<<"dod">>, <<"20191216">>}]), + M9 = maps:from_list([{<<"dob">>, <<"19790202">>}, {<<"dod">>, <<"20241216">>}]), + M10 = maps:new(), + ?assertMatch(true, F(M5)), + ?assertMatch(true, F(M6)), + ?assertMatch(true, F(M7)), + ?assertMatch(false, F(M8)), + ?assertMatch(false, F(M9)), + ?assertMatch(false, F(M10)), + + FE5A = + "($dob >= \"19740301\" AND $dob <= \"19761030\")" + " AND ($dod = \"20221216\")", + {ok, Filter5A} = generate_filter_expression(FE5A, maps:new()), + ?assert(apply_filter(Filter5A, M5)), + ?assertNot(apply_filter(Filter5A, M6)), + FE5B = + "$dob >= \"19740301\" AND $dob <= \"19761030\"" + " AND $dod = \"20221216\"", + {ok, Filter5B} = generate_filter_expression(FE5B, maps:new()), + ?assert(apply_filter(Filter5B, M5)), + ?assertNot(apply_filter(Filter5B, M6)), + + FE6 = + "(contains($gn, \"MA\") OR $fn BETWEEN \"SM\" AND \"SN\")" + " OR $dob <> \"19993112\"", + {ok, Filter6} = generate_filter_expression(FE6, maps:new()), + M11 = maps:from_list([{<<"dob">>, <<"19993112">>}]), + ?assertMatch(false, apply_filter(Filter6, M11)), + + FE7 = + "(contains($gn, \"MA\") OR $fn BETWEEN \"SM\" AND \"SN\")" + " OR $dob = \"19993112\"", + {ok, Filter7} = generate_filter_expression(FE7, maps:new()), + ?assert(apply_filter(Filter7, M11)), + + FE8 = "(contains($gn, \"MA\") OR $fn BETWEEN \"SM\" AND \"SN\")" + " OR $dob IN (\"19910301\", \"19910103\")", + {ok, Filter8} = generate_filter_expression(FE8, maps:new()), + ?assert(apply_filter(Filter8, #{<<"dob">> => <<"19910301">>})), + ?assert(apply_filter(Filter8, #{<<"dob">> => <<"19910103">>})), + ?assertNot(apply_filter(Filter8, #{<<"dob">> => <<"19910102">>})), + ?assertNot(apply_filter(Filter8, #{<<"gn">> => <<"Nikki">>})), + + FE9 = "(contains($gn, \"MA\") OR $fn BETWEEN \"SM\" AND \"SN\")" + " OR $dob IN (\"19910301\", \"19910103\")", + % Only match with a type match + {ok, Filter9} = generate_filter_expression(FE9, maps:new()), + ?assert(apply_filter(Filter9, #{<<"dob">> => <<"19910301">>})), + ?assert(apply_filter(Filter9, #{<<"dob">> => <<"19910103">>})), + ?assertNot(apply_filter(Filter9, #{<<"dob">> => <<"19910401">>})), + ?assertNot(apply_filter(Filter9, #{<<"dob">> => <<"19910104">>})), + + FE10 = "NOT contains($gn, \"MA\") AND " + "(NOT $dob IN (\"19910301\", \"19910103\"))", + {ok, Filter10} = generate_filter_expression(FE10, maps:new()), + ?assert( + apply_filter( + Filter10, + #{<<"gn">> => <<"JAMES">>, <<"dob">> => <<"19910201">>})), + ?assertNot( + apply_filter( + Filter10, + #{<<"gn">> => <<"EMMA">>, <<"dob">> => <<"19910201">>})), + ?assertNot( + apply_filter( + Filter10, + #{<<"gn">> => <<"JAMES">>, <<"dob">> => <<"19910301">>})), + + FE11 = "NOT contains($gn, \"MA\") AND " + "NOT $dob IN (\"19910301\", \"19910103\")", + {ok, Filter11} = generate_filter_expression(FE11, maps:new()), + ?assert( + apply_filter( + Filter11, + #{<<"gn">> => <<"JAMES">>, <<"dob">> => <<"19910201">>})), + ?assertNot( + apply_filter( + Filter11, + #{<<"gn">> => <<"EMMA">>, <<"dob">> => <<"19910201">>})), + ?assertNot( + apply_filter( + Filter11, + #{<<"gn">> => <<"JAMES">>, <<"dob">> => <<"19910301">>})), + + FE12 = "begins_with($gn, \"MA\") AND begins_with($fn, :fn)", + {ok, Filter12} = generate_filter_expression(FE12, #{<<"fn">> => <<"SU">>}), + ?assert( + apply_filter( + Filter12, + #{<<"gn">> => <<"MATTY">>, <<"fn">> => <<"SUMMER">>})), + ?assertNot( + apply_filter( + Filter12, + #{<<"gn">> => <<"MITTY">>, <<"fn">> => <<"SUMMER">>})), + ?assertNot( + apply_filter( + Filter12, + #{<<"gn">> => <<"MATTY">>, <<"fn">> => <<"SIMMS">>})), + ?assertNot( + apply_filter( + Filter12, + #{<<"gn">> => 42, <<"fn">> => <<"SUMMER">>})), + + FE12E = "ends_with($gn, \"TY\") AND begins_with($fn, :fn)", + {ok, Filter12E} = generate_filter_expression(FE12E, #{<<"fn">> => <<"SU">>}), + ?assert( + apply_filter( + Filter12E, + #{<<"gn">> => <<"MATTY">>, <<"fn">> => <<"SUMMER">>})), + ?assertNot( + apply_filter( + Filter12E, + #{<<"gn">> => <<"MATTI">>, <<"fn">> => <<"SUMMER">>})), + ?assertNot( + apply_filter( + Filter12E, + #{<<"gn">> => <<"MATTY">>, <<"fn">> => <<"SIMMS">>})), + ?assertNot( + apply_filter( + Filter12E, + #{<<"gn">> => 42, <<"fn">> => <<"SUMMER">>})), + + FE13 = "attribute_exists($dob) AND attribute_not_exists($consent) " + "AND attribute_empty($dod)", + {ok, Filter13} = generate_filter_expression(FE13, maps:new()), + ?assert( + apply_filter( + Filter13, + #{<<"dob">> => <<"19440812">>, <<"dod">> => <<>>})), + ?assertNot( + apply_filter( + Filter13, + #{<<"dod">> => <<>>})), + ?assertNot( + apply_filter( + Filter13, + #{<<"dob">> => <<"19440812">>, + <<"consent">> => <<>>, + <<"dod">> => <<>>})), + ?assertNot( + apply_filter( + Filter13, + #{<<"dob">> => <<"19440812">>, <<"dod">> => <<"20240213">>})), + + FE14 = "\"M1\" IN $gns", + {ok, Filter14} = generate_filter_expression(FE14, maps:new()), + ?assert( + apply_filter( + Filter14, + #{<<"gns">> => [<<"MA">>, <<"M1">>, <<"A0">>]})), + ?assertNot( + apply_filter( + Filter14, + #{<<"gns">> => [<<"MA">>, <<"M2">>, <<"A0">>]})), + ?assertNot( + apply_filter( + Filter14, + #{<<"gns">> => <<"M1">>})), + + FE15 = + "(attribute_empty($dod) AND $dob < :date)" + "OR :date BETWEEN $dob AND $dod", + {ok, Filter15} = + generate_filter_expression(FE15, #{<<"date">> => <<"20200101">>}), + ?assert( + apply_filter( + Filter15, + #{<<"dob">> => <<"199900303">>, <<"dod">> => <<>>} + ) + ), + ?assert( + apply_filter( + Filter15, + #{<<"dob">> => <<"199900303">>, <<"dod">> => <<"20210105">>} + ) + ), + ?assertNot( + apply_filter( + Filter15, + #{<<"dob">> => <<"20210303">>, <<"dod">> => <<"20230105">>} + ) + ), + ?assertNot( + apply_filter( + Filter15, + #{<<"dob">> => <<"196900303">>, <<"dod">> => <<"19890105">>} + ) + ), + ?assertNot( + apply_filter( + Filter15, + #{<<"dob">> => 199900303, <<"dod">> => <<>>} + ) + ), + + FE15A = + "(attribute_empty($dod) AND :date > $dob)" + "OR :date BETWEEN $dob AND $dod", + {ok, Filter15A} = + generate_filter_expression(FE15A, #{<<"date">> => <<"20200101">>}), + ?assert( + apply_filter( + Filter15A, + #{<<"dob">> => <<"199900303">>, <<"dod">> => <<>>} + ) + ), + ?assertNot( + apply_filter( + Filter15A, + #{<<"dob">> => <<"202300303">>, <<"dod">> => <<>>} + ) + ), + ?assertNot( + apply_filter( + Filter15A, + #{<<"dob">> => <<"202300303">>} + ) + ), + ?assert( + apply_filter( + Filter15A, + #{<<"dob">> => <<"199900303">>, <<"dod">> => <<"20210105">>} + ) + ), + + FE16 = ":response_time BETWEEN $low_point AND $high_point", + {ok, Filter16} = + generate_filter_expression( + FE16, + #{<<"response_time">> => 346} + ), + ?assert( + apply_filter( + Filter16, + #{<<"low_point">> => 200, <<"high_point">> => 420} + ) + ), + ?assertNot( + apply_filter( + Filter16, + #{<<"low_point">> => 360, <<"high_point">> => 420} + ) + ), + ?assertNot( + apply_filter( + Filter16, + #{<<"low_point">> => 210, <<"high_point">> => 320} + ) + ), + ?assertNot( + apply_filter( + Filter16, + #{<<"low_point">> => <<"200">>, <<"high_point">> => 420} + ) + ), + ?assertNot( + apply_filter( + Filter16, + #{<<"low_point">> => 200, <<"high_point">> => <<"420">>} + ) + ), + ?assertNot( + apply_filter( + Filter16, + #{<<"high_point">> => 420} + ) + ), + ?assertNot( + apply_filter( + Filter16, + #{<<"low_point">> => 200} + ) + ), + + FE17 = ":response_time > $high_point", + {ok, Filter17} = + generate_filter_expression( + FE17, + #{<<"response_time">> => 350} + ), + ?assert( + apply_filter( + Filter17, + #{<<"high_point">> => 310} + ) + ), + ?assertNot( + apply_filter( + Filter17, + #{<<"high_point">> => <<"310">>} + ) + ), + ?assertNot( + apply_filter( + Filter17, + #{} + ) + ), + + FE18 = "$dod BETWEEN $dob AND :today", + {ok, Filter18} = + generate_filter_expression(FE18, #{<<"today">> => <<"20240520">>}), + ?assert( + apply_filter( + Filter18, + #{<<"dob">> => <<"19900505">>, <<"dod">> => <<"20231015">>} + ) + ), + ?assertNot( + apply_filter( + Filter18, + #{<<"dob">> => <<"19900505">>, <<"dod">> => <<"20261015">>} + ) + ), + ?assertNot( + apply_filter( + Filter18, + #{<<"dob">> => <<"19900505">>} + ) + ), + + FE19 = "begins_with($fn, :prefix)", + {ok, Filter19} = + generate_filter_expression(FE19, #{<<"prefix">> => <<"Åb"/utf8>>}), + ?assert( + apply_filter( + Filter19, + #{<<"fn">> => <<"Åberg"/utf8>>} + ) + ), + ?assertNot( + apply_filter( + Filter19, + #{<<"fn">> => <<"Aberg">>} + ) + ), + ?assertNot( + apply_filter( + Filter19, + #{<<"fn">> => <<"Aberg"/utf8>>} + ) + ) + + . + +-endif. diff --git a/src/leveled_filterlexer.xrl b/src/leveled_filterlexer.xrl new file mode 100644 index 00000000..d85b51f9 --- /dev/null +++ b/src/leveled_filterlexer.xrl @@ -0,0 +1,51 @@ +%% Lexer for filter and conditional expressions +%% Author: Thomas Arts + +Definitions. +WhiteSpace = ([\t\f\v\r\n\s]+) + +Rules. + +{WhiteSpace} : skip_token. + +\( : {token, {'(', TokenLine}}. +\) : {token, {')', TokenLine}}. + +, : {token, {',', TokenLine}}. +NOT : {token, {'NOT', TokenLine}}. +AND : {token, {'AND', TokenLine}}. +OR : {token, {'OR', TokenLine}}. +BETWEEN : {token, {'BETWEEN', TokenLine}}. +IN : {token, {'IN', TokenLine}}. += : {token, {comparator, '=', TokenLine}}. +< : {token, {comparator, '<', TokenLine}}. +> : {token, {comparator, '>', TokenLine}}. +<> : {token, {comparator, '<>', TokenLine}}. +<= : {token, {comparator, '<=', TokenLine}}. +>= : {token, {comparator, '>=', TokenLine}}. + +contains : {token, {contains, TokenLine}}. +begins_with : {token, {begins_with, TokenLine}}. +ends_with : {token, {ends_with, TokenLine}}. +attribute_exists : {token, {attribute_exists, TokenLine}}. +attribute_not_exists : {token, {attribute_not_exists, TokenLine}}. +attribute_empty : {token, {attribute_empty, TokenLine}}. + +\$[a-zA-Z_][a-zA-Z_0-9]* : {token, {identifier, TokenLine, strip_identifier(TokenChars)}}. +\:[a-zA-Z_][a-zA-Z_0-9]* : {token, {substitution, TokenLine, strip_substitution(TokenChars)}}. +\-[0-9]+ : {token, {integer, TokenLine, list_to_integer(TokenChars)}}. +[0-9]+ : {token, {integer, TokenLine, list_to_integer(TokenChars)}}. +\"[^"]+\" : {token, {string, TokenLine, strip_string(TokenChars)}}. %" + +Erlang code. + +strip_string(TokenChars) -> + unicode:characters_to_binary(lists:droplast(tl(TokenChars))). + +strip_identifier(TokenChars) -> + [36|StrippedChars] = TokenChars, + unicode:characters_to_binary(StrippedChars). + +strip_substitution(TokenChars) -> + [58|StrippedChars] = TokenChars, + unicode:characters_to_binary(StrippedChars). diff --git a/src/leveled_filterparser.yrl b/src/leveled_filterparser.yrl new file mode 100644 index 00000000..4a94a798 --- /dev/null +++ b/src/leveled_filterparser.yrl @@ -0,0 +1,53 @@ +%% Grammar for filter expressions +%% Author: Thomas Arts + +Nonterminals +top_level condition operand str_list strings. + + +Terminals +'(' ')' comparator identifier string integer +',' +'NOT' 'AND' 'OR' 'IN' 'BETWEEN' +contains begins_with ends_with +attribute_exists attribute_not_exists attribute_empty. + + +Rootsymbol top_level. + +top_level -> condition: {condition, '$1'}. + +condition -> operand comparator operand : {'$2', '$1', '$3'}. +condition -> operand 'BETWEEN' operand 'AND' operand : {'BETWEEN', '$1', '$3', '$5'}. +condition -> identifier 'IN' str_list : {'IN', '$1', '$3'}. +condition -> string 'IN' identifier : {'IN', '$1', '$3'}. + +condition -> contains '(' identifier ',' string ')' : {contains, '$3', '$5'}. +condition -> begins_with '(' identifier ',' string ')' : {begins_with, '$3', '$5'}. +condition -> ends_with '(' identifier ',' string ')' : {ends_with, '$3', '$5'}. +condition -> attribute_exists '(' identifier ')' : {attribute_exists, '$3'}. +condition -> attribute_not_exists '(' identifier ')' : {attribute_not_exists, '$3'}. +condition -> attribute_empty '(' identifier ')' : {attribute_empty, '$3'}. + +condition -> condition 'AND' condition : {'AND', '$1', '$3'}. +condition -> condition 'OR' condition : {'OR', '$1', '$3'}. +condition -> 'NOT' condition : {'NOT', '$2'}. +condition -> '(' condition ')' : '$2'. + +operand -> identifier : '$1'. +operand -> integer : '$1'. +operand -> string : '$1'. + +str_list -> '(' strings ')' : '$2'. + +strings -> string ',' strings : ['$1' | '$3']. +strings -> string : ['$1']. + +Endsymbol '$end'. + +Right 200 'NOT'. +Nonassoc 200 comparator. +Left 150 'AND'. +Left 100 'OR'. + +Erlang code. diff --git a/src/leveled_runner.erl b/src/leveled_runner.erl index 9737162f..f9eee25b 100644 --- a/src/leveled_runner.erl +++ b/src/leveled_runner.erl @@ -61,7 +61,7 @@ -type objectacc_fun() :: fun((leveled_codec:object_key(), any(), foldacc()) -> foldacc()). -type mp() - :: {re_pattern, term(), term(), term(), term()}. + :: any(). -export_type([fold_keys_fun/0, mp/0]). @@ -128,12 +128,11 @@ bucket_list(SnapFun, Tag, FoldBucketsFun, InitAcc, MaxBuckets) -> end, {async, Runner}. --spec index_query(snap_fun(), - {leveled_codec:ledger_key(), - leveled_codec:ledger_key(), - {boolean(), undefined|mp()}}, - {fold_keys_fun(), foldacc()}) - -> {async, runner_fun()}. +-spec index_query( + snap_fun(), + {leveled_codec:ledger_key(), leveled_codec:ledger_key(), + {boolean()|binary(), leveled_codec:term_expression()}}, + {fold_keys_fun(), foldacc()}) -> {async, runner_fun()}. %% @doc %% Secondary index query %% This has the special capability that it will expect a message to be thrown @@ -166,7 +165,7 @@ index_query(SnapFun, {StartKey, EndKey, TermHandling}, FoldAccT) -> leveled_codec:key()|null, {leveled_codec:single_key()|null, leveled_codec:single_key()|null}, {fold_keys_fun(), foldacc()}, - leveled_codec:regular_expression()) + leveled_codec:term_expression()) -> {async, runner_fun()}. %% @doc %% Fold over all keys in `KeyRange' under tag (restricted to a given bucket) @@ -511,7 +510,7 @@ foldobjects(SnapFun, Tag, KeyRanges, FoldObjFun, DeferredFetch, SegmentList) -> %% the full object), or {true, CheckPresence} - in which case a proxy object %% will be created that if understood by the fold function will allow the fold %% function to work on the head of the object, and defer fetching the body in -%% case such a fetch is unecessary. +%% case such a fetch is unnecessary. foldobjects(SnapFun, Tag, KeyRanges, FoldObjFun, DeferredFetch, SegmentList, LastModRange, MaxObjectCount) -> {FoldFun, InitAcc} = @@ -683,7 +682,7 @@ accumulate_keys(FoldKeysFun, undefined) -> accumulate_keys(FoldKeysFun, TermRegex) -> fun(Key, _Value, Acc) -> {B, K} = leveled_codec:from_ledgerkey(Key), - case re:run(K, TermRegex) of + case leveled_util:regex_run(K, TermRegex, []) of nomatch -> Acc; _ -> diff --git a/src/leveled_setop.erl b/src/leveled_setop.erl new file mode 100644 index 00000000..1677783f --- /dev/null +++ b/src/leveled_setop.erl @@ -0,0 +1,145 @@ +%% -------- Set Operations --------- +%% +%% Support for set operations (i.e on sets of keys) within leveled +%% + +-module(leveled_setop). + +-export([generate_setop_function/1]). + + +%%%============================================================================ +%%% External API +%%%============================================================================ + +-spec generate_setop_function( + string()) -> + fun((#{non_neg_integer() => sets:set(binary())}) + -> sets:set(binary()) + ). +generate_setop_function(EvalString) -> + {ok, ParsedEval} = generate_setop_expression(EvalString), + fun(MapOfSets) -> + apply_setop(ParsedEval, MapOfSets) + end. + +%%%============================================================================ +%%% Internal functions +%%%============================================================================ + +generate_setop_expression(EvalString) -> + String = unicode:characters_to_list(EvalString), + {ok, Tokens, _EndLine} = leveled_setoplexer:string(String), + leveled_setopparser:parse(Tokens). + +apply_setop({setop, SetOp}, SetList) -> + apply_setop(SetOp, SetList); +apply_setop({set_id, _, SetID}, SetList) -> + get_set(SetID, SetList); +apply_setop( + {SetFunctionName, {set_id, _, SetIDa}, {set_id, _, SetIDb}}, + SetList) -> + SetFunction = set_function(SetFunctionName), + SetFunction(get_set(SetIDa, SetList), get_set(SetIDb, SetList)); +apply_setop( + {SetFunctionName, {set_id, _, SetIDa}, Condition}, + SetList) -> + SetFunction = set_function(SetFunctionName), + SetFunction(get_set(SetIDa, SetList), apply_setop(Condition, SetList)); +apply_setop( + {SetFunctionName, Condition, {set_id, _, SetIDb}}, + SetList) -> + SetFunction = set_function(SetFunctionName), + SetFunction(apply_setop(Condition, SetList), get_set(SetIDb, SetList)); +apply_setop({SetFunctionName, ConditionA, ConditionB}, SetList) -> + SetFunction = set_function(SetFunctionName), + SetFunction( + apply_setop(ConditionA, SetList), apply_setop(ConditionB, SetList) + ). + +set_function('UNION') -> + fun(A, B) -> sets:union(A, B) end; +set_function('INTERSECT') -> + fun(A, B) -> sets:intersection(A, B) end; +set_function('SUBTRACT') -> + fun(A, B) -> sets:subtract(A, B) end. + +%% Return empty set if index not present in given set +%% (That is, do not throw an error) +get_set(SetID, SetMap) -> + maps:get(SetID, SetMap, sets:new()). + + +%%%============================================================================ +%%% Test +%%%============================================================================ + +-ifdef(TEST). + +-include_lib("eunit/include/eunit.hrl"). + +parser_formal_test() -> + Q1 = "($1 INTERSECT $2) UNION $3", + Q2 = "($1 INTERSECT $2) UNION ($3 INTERSECT $4)", + Q3 = "($1 INTERSECT $2 INTERSECT $5) UNION ($3 INTERSECT $4)", + Q4 = "($1 INTERSECT $2 INTERSECT $5) UNION ($3 SUBTRACT $4)", + parser_tester(Q1, Q2, Q3, Q4). + +parser_tester(Q1, Q2, Q3, Q4) -> + S1 = sets:from_list([<<"K1">>, <<"K2">>, <<"K3">>, <<"K4">>, <<"K5">>]), + S2 = sets:from_list([<<"K3">>, <<"K4">>, <<"K5">>, <<"K6">>, <<"K7">>]), + S3 = sets:from_list([<<"K7">>, <<"K8">>, <<"K9">>]), + S4 = sets:from_list([<<"K7">>, <<"K9">>, <<"K0">>]), + S5 = sets:from_list([<<"K1">>, <<"K2">>, <<"K3">>, <<"K8">>, <<"K9">>]), + + F1 = generate_setop_function(Q1), + F2 = generate_setop_function(Q2), + F3 = generate_setop_function(Q3), + F4 = generate_setop_function(Q4), + + R1 = + lists:sort( + sets:to_list(F1(#{1 => S1, 2 => S2, 3 => S3}) + ) + ), + R2 = + lists:sort( + sets:to_list(F2(#{1 => S1, 2 => S2, 3 => S3, 4 => S4}) + ) + ), + R3 = + lists:sort( + sets:to_list(F3(#{1 => S1, 2 => S2, 3 => S3, 4 => S4, 5 => S5}) + ) + ), + R4 = + lists:sort( + sets:to_list(F4(#{1 => S1, 2 => S2, 3 => S3, 4 => S4, 5 => S5}) + ) + ), + + ?assertMatch( + [<<"K3">>, <<"K4">>, <<"K5">>, <<"K7">>, <<"K8">>, <<"K9">>], R1), + ?assertMatch( + [<<"K3">>, <<"K4">>, <<"K5">>, <<"K7">>, <<"K9">>], R2), + ?assertMatch( + [<<"K3">>, <<"K7">>, <<"K9">>], R3), + ?assertMatch( + [<<"K3">>, <<"K8">>], R4). + +minimal_test() -> + S1 = sets:from_list([<<"K1">>, <<"K2">>, <<"K3">>, <<"K4">>, <<"K5">>]), + F1 = generate_setop_function("$1"), + R1 = lists:sort(sets:to_list(F1(#{1 => S1}))), + ?assertMatch([<<"K1">>, <<"K2">>, <<"K3">>, <<"K4">>, <<"K5">>], R1), + S2 = sets:from_list([<<"K3">>, <<"K4">>, <<"K5">>, <<"K6">>, <<"K7">>]), + S3 = sets:from_list([<<"K1">>, <<"K2">>]), + F2 = generate_setop_function("$1 INTERSECT ($2 UNION $3)"), + R2 = lists:sort(sets:to_list(F2(#{1 => S1, 2 => S2, 3 => S3}))), + ?assertMatch([<<"K1">>, <<"K2">>, <<"K3">>, <<"K4">>, <<"K5">>], R2), + F3 = generate_setop_function("$1 INTERSECT ($2 UNION $2)"), + R3 = lists:sort(sets:to_list(F3(#{1 => S1, 2 => S2}))), + ?assertMatch([<<"K3">>, <<"K4">>, <<"K5">>], R3). + + +-endif. \ No newline at end of file diff --git a/src/leveled_setoplexer.xrl b/src/leveled_setoplexer.xrl new file mode 100644 index 00000000..1a94cdf5 --- /dev/null +++ b/src/leveled_setoplexer.xrl @@ -0,0 +1,21 @@ +Definitions. +WhiteSpace = ([\t\f\v\r\n\s]+) + +Rules. + +{WhiteSpace} : skip_token. + +\( : {token, {'(', TokenLine}}. +\) : {token, {')', TokenLine}}. + +UNION : {token, {'UNION', TokenLine}}. +INTERSECT : {token, {'INTERSECT', TokenLine}}. +SUBTRACT : {token, {'SUBTRACT', TokenLine}}. + +\$[1-9][0-9]* : {token, {set_id, TokenLine, strip_identifier(TokenChars)}}. + +Erlang code. + +strip_identifier(TokenChars) -> + [36|StrippedChars] = TokenChars, + list_to_integer(StrippedChars). \ No newline at end of file diff --git a/src/leveled_setopparser.yrl b/src/leveled_setopparser.yrl new file mode 100644 index 00000000..e261efc0 --- /dev/null +++ b/src/leveled_setopparser.yrl @@ -0,0 +1,28 @@ +%% Grammar for key set operations + +Nonterminals +top_level condition. + + +Terminals +'(' ')' set_id +'UNION' 'INTERSECT' 'SUBTRACT'. + + +Rootsymbol top_level. + +top_level -> condition: {setop, '$1'}. + +condition -> condition 'UNION' condition : {'UNION', '$1', '$3'}. +condition -> condition 'INTERSECT' condition : {'INTERSECT', '$1', '$3'}. +condition -> condition 'SUBTRACT' condition : {'SUBTRACT', '$1', '$3'}. +condition -> '(' condition ')' : '$2'. +condition -> set_id : '$1'. + +Endsymbol '$end'. + +Right 200 'SUBTRACT'. +Left 150 'INTERSECT'. +Left 100 'UNION'. + +Erlang code. diff --git a/src/leveled_tree.erl b/src/leveled_tree.erl index 15cfde26..19c409a2 100644 --- a/src/leveled_tree.erl +++ b/src/leveled_tree.erl @@ -950,15 +950,19 @@ empty_test() -> search_range_idx_test() -> Tree = {idxt,1, - {{[{{o_rkv,"Bucket1","Key1",null}, - {manifest_entry,{o_rkv,"Bucket","Key9083",null}, - {o_rkv,"Bucket1","Key1",null}, - "<0.320.0>","./16_1_6.sst", none}}]}, - {1,{{o_rkv,"Bucket1","Key1",null},1,nil,nil}}}}, + {{[{{o_rkv,<<"Bucket1">>,<<"Key1">>,null}, + leveled_pmanifest:new_entry( + {o_rkv, <<"Bucket">>, <<"Key9083">>, null}, + {o_rkv, <<"Bucket1">>, <<"Key1">>, null}, + list_to_pid("<0.320.0>"), + "./16_1_6.sst", + none + )}]}, + {1, {{o_rkv, <<"Bucket1">>, <<"Key1">>, null}, 1, nil, nil}}}}, R = search_range( - {o_rkv, "Bucket", null, null}, - {o_rkv, "Bucket", null, null}, + {o_rkv, <<"Bucket">>, null, null}, + {o_rkv, <<"Bucket">>, null, null}, Tree, fun leveled_pmanifest:entry_startkey/1 ), diff --git a/src/leveled_util.erl b/src/leveled_util.erl index 630e9f42..9bb0ab71 100644 --- a/src/leveled_util.erl +++ b/src/leveled_util.erl @@ -11,9 +11,11 @@ integer_time/1, magic_hash/1, t2b/1, - safe_rename/4 - ] - ). + safe_rename/4, + regex_run/3, + regex_compile/1, + regex_compile/2 + ]). -define(WRITE_OPS, [binary, raw, read, write]). @@ -42,6 +44,34 @@ integer_time(TS) -> calendar:datetime_to_gregorian_seconds(DT). +-type match_option() :: + caseless | + {offset, non_neg_integer()} | + {capture, value_spec()} | + {capture, value_spec(), value_spec_type()}. +-type value_spec() :: + all | all_but_first | first | none | [value_id()]. +-type value_spec_type() :: binary. +-type value_id() :: string(). +-type match_index() :: {non_neg_integer(), non_neg_integer()}. + +-spec regex_run( + iodata(), leveled_codec:actual_regex(), list(match_option())) -> + match | + nomatch | + {match, list(match_index())} | + {match, list(binary())} | + {error, atom()}. +regex_run(Subject, CompiledPCRE, Opts) -> + re:run(Subject, CompiledPCRE, Opts). + +-spec regex_compile(iodata()) -> {ok, leveled_codec:actual_regex()}. +regex_compile(PlainRegex) -> + regex_compile(PlainRegex, pcre). + +regex_compile(PlainRegex, pcre) -> + re:compile(PlainRegex). + -spec magic_hash(any()) -> 0..16#FFFFFFFF. %% @doc %% Use DJ Bernstein magic hash function. Note, this is more expensive than diff --git a/test/end_to_end/basic_SUITE.erl b/test/end_to_end/basic_SUITE.erl index 13beb8d4..5a4d4a3f 100644 --- a/test/end_to_end/basic_SUITE.erl +++ b/test/end_to_end/basic_SUITE.erl @@ -802,7 +802,12 @@ space_clear_ondelete(_Config) -> {ok, Book1} = leveled_bookie:book_start(StartOpts1), G2 = fun testutil:generate_compressibleobjects/2, testutil:load_objects( - 20000, [uuid, uuid, uuid, uuid], Book1, no_check, G2), + 20000, + [binary_uuid, binary_uuid, binary_uuid, binary_uuid], + Book1, + no_check, + G2 + ), FoldKeysFun = fun(B, K, Acc) -> [{B, K}|Acc] end, diff --git a/test/end_to_end/iterator_SUITE.erl b/test/end_to_end/iterator_SUITE.erl index 855aad8e..145a9b11 100644 --- a/test/end_to_end/iterator_SUITE.erl +++ b/test/end_to_end/iterator_SUITE.erl @@ -12,7 +12,10 @@ query_count/1, multibucket_fold/1, foldobjects_bybucket_range/1, - rotating_objects/1]). + rotating_objects/1, + capture_and_filter_terms/1, + complex_queries/1 + ]). all() -> [ expiring_indexes, @@ -22,7 +25,9 @@ all() -> [ query_count, multibucket_fold, rotating_objects, - foldobjects_bybucket_range + foldobjects_bybucket_range, + capture_and_filter_terms, + complex_queries ]. init_per_suite(Config) -> @@ -147,13 +152,6 @@ expiring_indexes(_Config) -> Bookie1, B0, K0, 5, <<"value">>, leveled_util:integer_now() + 10), timer:sleep(1000), {async, Folder2} = IndexFold(), - leveled_bookie:book_indexfold( - Bookie1, - B0, - {FoldFun, InitAcc}, - {<<"temp_int">>, 5, 8}, - {true, undefined} - ), QR2 = Folder2(), io:format("Query with additional entry length ~w~n", [length(QR2)]), true = lists:sort(QR2) == lists:sort([{5, B0, K0}|LoadedEntriesInRange]), @@ -481,7 +479,7 @@ small_load_with2i(_Config) -> IndexGen = testutil:get_randomindexes_generator(8), ObjL1 = testutil:generate_objects( - 10000, uuid, [], ObjectGen, IndexGen), + 10000, binary_uuid, [], ObjectGen, IndexGen), testutil:riakload(Bookie1, ObjL1), ChkList1 = lists:sublist(lists:sort(ObjL1), 100), testutil:check_forlist(Bookie1, ChkList1), @@ -569,8 +567,9 @@ small_load_with2i(_Config) -> lists:foldl(SumFromObjLFun, 0, ObjL1), ChkList1Total = lists:foldl(SumFromObjLFun, 0, ChkList1), - io:format("Total in original object list ~w and from removed list ~w~n", - [ObjL1Total, ChkList1Total]), + io:format( + "Total in original object list ~w and from removed list ~w~n", + [ObjL1Total, ChkList1Total]), Total1 = ObjL1Total - ChkList1Total, @@ -608,7 +607,7 @@ query_count(_Config) -> testutil:check_forobject(Book1, TestObject), lists:foreach( fun(_X) -> - V = testutil:get_compressiblevalue(), + V = <<"TestValue">>, Indexes = testutil:get_randomindexes_generator(8), SW = os:timestamp(), ObjL1 = @@ -686,12 +685,13 @@ query_count(_Config) -> Mia2000Count2 = lists:foldl( fun({Term, _Key}, Acc) -> - case re:run(Term, RegMia) of + case leveled_util:regex_run(Term, RegMia, []) of nomatch -> Acc; _ -> Acc + 1 - end end, + end + end, 0, Mia2KFolder2()), ok = case Mia2000Count2 of @@ -700,15 +700,24 @@ query_count(_Config) -> [Mia2000Count1]), ok end, - {ok, RxMia2K} = re:compile("^2000[0-9]+Mia"), + {ok, RxMia2K} = leveled_util:regex_compile("^2000[0-9]+Mia"), Query3 = {index_query, BucketBin, {fun testutil:foldkeysfun/3, []}, {<<"idx2_bin">>, <<"1980">>, <<"2100">>}, {false, RxMia2K}}, - {async, - Mia2KFolder3} = leveled_bookie:book_returnfolder(Book2, Query3), + {async, Mia2KFolder3} = leveled_bookie:book_returnfolder(Book2, Query3), Mia2000Count1 = length(Mia2KFolder3()), + {ok, RxMia2KPCRE} = re:compile("^2000[0-9]+Mia"), + Query3PCRE = + {index_query, + BucketBin, + {fun testutil:foldkeysfun/3, []}, + {<<"idx2_bin">>, <<"1980">>, <<"2100">>}, + {false, RxMia2KPCRE}}, + {async, Mia2KFolder3PCRE} = + leveled_bookie:book_returnfolder(Book2, Query3PCRE), + Mia2000Count1 = length(Mia2KFolder3PCRE()), V9 = testutil:get_compressiblevalue(), Indexes9 = testutil:get_randomindexes_generator(8), @@ -840,6 +849,446 @@ query_count(_Config) -> testutil:reset_filestructure(). +capture_and_filter_terms(_Config) -> + RootPath = testutil:reset_filestructure(), + Bucket = {<<"Type1">>, <<"Bucket1">>}, + IdxName = <<"people_bin">>, + {ok, Book1} = + leveled_bookie:book_start( + RootPath, 2000, 50000000, testutil:sync_strategy()), + V1 = <<"V1">>, + IndexGen = + fun() -> + [{add, IdxName, list_to_binary(perf_SUITE:random_people_index())}] + end, + ObjL1 = + testutil:generate_objects( + 100000, binary_uuid, [], V1, IndexGen, Bucket), + testutil:riakload(Book1, ObjL1), + + StartDoB = <<"19740301">>, + EndDoB = <<"19761031">>, + + WillowLeedsFinder = + "[^\\|]*\\|[0-9]{8}\\|[0-9]{0,8}\\|[^\\|]*#Willow[^\\|]*\\|" + "[^\\|]*#LS[^\\|]*", + + SW0 = os:timestamp(), + {ok, WillowLeedsPCRE} = re:compile(WillowLeedsFinder), + + QueryPCRE0 = + {index_query, + {Bucket, null}, + {fun testutil:foldkeysfun/3, []}, + {IdxName, <<"M">>, <<"Z">>}, + {true, WillowLeedsPCRE}}, + {async, Runner0} = leveled_bookie:book_returnfolder(Book1, QueryPCRE0), + Results0 = Runner0(), + BornMid70s0 = + lists:filtermap( + fun({IdxValue, Key}) -> + DoB = + list_to_binary( + lists:nth( + 2, + string:tokens(binary_to_list(IdxValue), "|") + ) + ), + case (DoB >= StartDoB) andalso (DoB =< EndDoB) of + true -> + {true, Key}; + false -> + false + end + end, + Results0 + ), + + SW1 = os:timestamp(), + + WillowLeedsExtractor = + "[^\\|]*\\|(?P[0-9]{8})\\|[0-9]{0,8}\\|[^\\|]*#Willow[^\\|]*\\|" + "[^\\|]*#LS[^\\|]*", + FilterFun1 = + fun(Captures) -> + DoB = maps:get(<<"dob">>, Captures, notfound), + (DoB >= StartDoB) andalso (DoB =< EndDoB) + end, + EvalFunPCRE = + leveled_eval:generate_eval_function( + "regex($term, :regex, pcre, ($dob))", + #{<<"regex">> => list_to_binary(WillowLeedsExtractor)} + ), + + QueryPCRE1 = + {index_query, + {Bucket, null}, + {fun testutil:foldkeysfun/3, []}, + {IdxName, <<"M">>, <<"Z">>}, + {false, {query, EvalFunPCRE, FilterFun1}} + }, + {async, RunnerPCRE1} = leveled_bookie:book_returnfolder(Book1, QueryPCRE1), + BornMid70sPCRE1 = RunnerPCRE1(), + + SW2 = os:timestamp(), + + EvalFunRE2 = + leveled_eval:generate_eval_function( + "regex($term, :regex, pcre, ($dob))", + #{<<"regex">> => list_to_binary(WillowLeedsExtractor)} + ), + QueryRE2_2 = + {index_query, + {Bucket, null}, + {fun testutil:foldkeysfun/3, []}, + {IdxName, <<"M">>, <<"Z">>}, + {false, {query, EvalFunRE2, FilterFun1}} + }, + {async, RunnerRE2_2} = leveled_bookie:book_returnfolder(Book1, QueryRE2_2), + BornMid70sRE2_2 = RunnerRE2_2(), + + SW3 = os:timestamp(), + + AllFun = fun(_) -> true end, + QueryRE2_3 = + {index_query, + {Bucket, null}, + {fun testutil:foldkeysfun/3, []}, + {IdxName, <<"M">>, <<"Z">>}, + {<<"dob">>, {query, EvalFunRE2, AllFun}} + }, + {async, RunnerRE2_3} = leveled_bookie:book_returnfolder(Book1, QueryRE2_3), + Results3 = RunnerRE2_3(), + BornMid70sRE2_3 = + lists:filtermap( + fun({DoB, Key}) -> + case (DoB >= StartDoB) andalso (DoB =< EndDoB) of + true -> + {true, Key}; + false -> + false + end + end, + Results3 + ), + + SW4 = os:timestamp(), + + WillowLeedsDoubleExtractor = + "[^\\|]*\\|(?P[0-9]{8})\\|(?P[0-9]{0,8})\\|" + "[^\\|]*#Willow[^\\|]*\\|[^\\|]*#LS[^\\|]*", + EvalFunRE2_2 = + leveled_eval:generate_eval_function( + "regex($term, :regex, pcre, ($dob, $dod))", + #{<<"regex">> => list_to_binary(WillowLeedsDoubleExtractor)} + ), + + FilterFun2 = + fun(Captures) -> + DoB = maps:get(<<"dob">>, Captures, notfound), + (DoB >= StartDoB) andalso (DoB =< EndDoB) + end, + QueryRE2_4 = + {index_query, + {Bucket, null}, + {fun testutil:foldkeysfun/3, []}, + {IdxName, <<"M">>, <<"Z">>}, + {false, {query, EvalFunRE2_2, FilterFun2}} + }, + {async, RunnerRE2_4} = leveled_bookie:book_returnfolder(Book1, QueryRE2_4), + BornMid70sRE2_4 = RunnerRE2_4(), + + SW5 = os:timestamp(), + + QueryRE2_5 = + {index_query, + {Bucket, null}, + {fun testutil:foldkeysfun/3, []}, + {IdxName, <<"M">>, <<"Z">>}, + {true, {query, EvalFunRE2, FilterFun1}} + }, + {async, RunnerRE2_5} = leveled_bookie:book_returnfolder(Book1, QueryRE2_5), + {ok, WillowLeedsExtractorRE} = re:compile(WillowLeedsExtractor), + BornMid70sRE2_5 = + lists:filtermap( + fun({T, K}) -> + {match, _} = + leveled_util:regex_run(T, WillowLeedsExtractorRE, []), + {true, K} + end, + RunnerRE2_5()), + + SW8 = os:timestamp(), + + FilterExpression1 = "($dob BETWEEN \"19740301\" AND \"19761030\")", + FilterFun5 = + leveled_filter:generate_filter_function(FilterExpression1, maps:new()), + + QueryRE2_8 = + {index_query, + {Bucket, null}, + {fun testutil:foldkeysfun/3, []}, + {IdxName, <<"M">>, <<"Z">>}, + {false, {query, EvalFunRE2, FilterFun5}} + }, + {async, RunnerRE2_8} = leveled_bookie:book_returnfolder(Book1, QueryRE2_8), + BornMid70sRE2_8 = RunnerRE2_8(), + + SW9 = os:timestamp(), + + PreFilterRE = + "[^\\|]*\\|(?P197[4-6]{1}[0-9]{4})\\|" + "[0-9]{0,8}\\|[^\\|]*#Willow[^\\|]*\\|" + "[^\\|]*#LS[^\\|]*", + PreFilterEvalFun = + leveled_eval:generate_eval_function( + "regex($term, :regex, pcre, ($dob))", + #{<<"regex">> => list_to_binary(PreFilterRE)} + ), + + QueryRE2_9 = + {index_query, + {Bucket, null}, + {fun testutil:foldkeysfun/3, []}, + {IdxName, <<"M">>, <<"Z">>}, + {false, {query, PreFilterEvalFun, FilterFun5}} + }, + {async, RunnerRE2_9} = leveled_bookie:book_returnfolder(Book1, QueryRE2_9), + BornMid70sRE2_9 = RunnerRE2_9(), + + SW10 = os:timestamp(), + + WillowLeedsExtractor = + "[^\\|]*\\|(?P[0-9]{8})\\|[0-9]{0,8}\\|[^\\|]*#Willow[^\\|]*\\|" + "[^\\|]*#LS[^\\|]*", + + FilterExpression2 = + "($dob BETWEEN \"19740301\" AND \"19761030\")" + "AND (contains($gns, \"#Willow\") AND contains($pcs, \"#LS\"))", + FilterFun6 = + leveled_filter:generate_filter_function(FilterExpression2, maps:new()), + EvalFun2 = + leveled_eval:generate_eval_function( + "delim($term, \"|\", ($surname, $dob, $dod, $gns, $pcs))", + maps:new() + ), + QueryRE2_10 = + {index_query, + {Bucket, null}, + {fun testutil:foldkeysfun/3, []}, + {IdxName, <<"M">>, <<"Z">>}, + {false, {query, EvalFun2, FilterFun6}} + }, + {async, RunnerRE2_10} = leveled_bookie:book_returnfolder(Book1, QueryRE2_10), + BornMid70sRE2_10 = RunnerRE2_10(), + + SW11 = os:timestamp(), + + true = length(BornMid70s0) > 0, + + true = lists:sort(BornMid70s0) == lists:sort(BornMid70sPCRE1), + true = lists:sort(BornMid70s0) == lists:sort(BornMid70sRE2_2), + true = lists:sort(BornMid70s0) == lists:sort(BornMid70sRE2_3), + true = lists:sort(BornMid70s0) == lists:sort(BornMid70sRE2_4), + true = lists:sort(BornMid70s0) == lists:sort(BornMid70sRE2_5), + true = lists:sort(BornMid70s0) == lists:sort(BornMid70sRE2_8), + true = lists:sort(BornMid70s0) == lists:sort(BornMid70sRE2_9), + true = lists:sort(BornMid70s0) == lists:sort(BornMid70sRE2_10), + + maybe_log_toscreen( + "~nFilter outside took ~w ms~n", + [timer:now_diff(SW1, SW0) div 1000]), + maybe_log_toscreen( + "~nPCRE Capture filter inside took ~w ms~n", + [timer:now_diff(SW2, SW1) div 1000]), + maybe_log_toscreen( + "~nRE2 Capture filter inside took ~w ms~n", + [timer:now_diff(SW3, SW2) div 1000]), + maybe_log_toscreen( + "~nRE2 Capture filter outside took ~w ms~n", + [timer:now_diff(SW4, SW3) div 1000]), + maybe_log_toscreen( + "~nRE2 double-capture filter outside took ~w ms~n", + [timer:now_diff(SW5, SW4) div 1000]), + maybe_log_toscreen( + "~nRE2 single-capture filter with parsed filter expression took ~w ms~n", + [timer:now_diff(SW9, SW8) div 1000]), + maybe_log_toscreen( + "~nRE2 single-capture pre-filter with parsed query string took ~w ms~n", + [timer:now_diff(SW10, SW9) div 1000]), + maybe_log_toscreen( + "~nEval processed index with parsed filter expression took ~w ms~n", + [timer:now_diff(SW11, SW10) div 1000]), + + + QueryRE2_3_WrongCapture = + {index_query, + {Bucket, null}, + {fun testutil:foldkeysfun/3, []}, + {IdxName, <<"M">>, <<"Z">>}, + {<<"gns">>, {query, EvalFunRE2, FilterFun6}} + }, + {async, RunnerRE2_3_WC} = + leveled_bookie:book_returnfolder(Book1, QueryRE2_3_WrongCapture), + true = [] == RunnerRE2_3_WC(), + + ok = leveled_bookie:book_close(Book1), + + testutil:reset_filestructure(). + +maybe_log_toscreen(Log, Subs) -> + io:format( + % user, + Log, + Subs + ). + +complex_queries(_Config) -> + KeyCount = 200000, + RootPath = testutil:reset_filestructure(), + Bucket = {<<"Type1">>, <<"Bucket1">>}, + IdxGivenName = <<"given_bin">>, + IdxFamilyName = <<"family_bin">>, + IdxPostCode = <<"postcode_bin">>, + IdxFullData = <<"fulldata_bin">>, + {ok, Book1} = + leveled_bookie:book_start( + RootPath, 2000, 50000000, testutil:sync_strategy()), + V1 = <<"V1">>, + IndexGen = + fun() -> + DoB = perf_SUITE:get_random_dob(), + DoD = perf_SUITE:get_random_dod(), + FN = perf_SUITE:get_random_surname(), + GN1 = perf_SUITE:get_random_givenname(), + GN2 = perf_SUITE:get_random_givenname(), + GN3 = perf_SUITE:get_random_givenname(), + PC1 = perf_SUITE:get_random_postcode(), + PC2 = perf_SUITE:get_random_postcode(), + PC3 = perf_SUITE:get_random_postcode(), + FNIdx1 = set_index_term(FN, DoB, DoD), + GNIdx1 = set_index_term(GN1, DoB, DoD), + GNIdx2 = set_index_term(GN2, DoB, DoD), + GNIdx3 = set_index_term(GN3, DoB, DoD), + PCIdx1 = set_index_term(PC1, DoB, DoD), + PCIdx2 = set_index_term(PC2, DoB, DoD), + PCIdx3 = set_index_term(PC3, DoB, DoD), + FullIdx = + set_full_index_term( + FN, DoB, DoD, GN1, GN2, GN3, PC1, PC2, PC3), + [ + {add, IdxFamilyName, FNIdx1}, + {add, IdxGivenName, GNIdx1}, + {add, IdxGivenName, GNIdx2}, + {add, IdxGivenName, GNIdx3}, + {add, IdxPostCode, PCIdx1}, + {add, IdxPostCode, PCIdx2}, + {add, IdxPostCode, PCIdx3}, + {add, IdxFullData, FullIdx} + ] + end, + ObjL1 = + testutil:generate_objects( + KeyCount, binary_uuid, [], V1, IndexGen, Bucket), + testutil:riakload(Book1, ObjL1), + + DoBLow = <<"19730930">>, + DobHigh = <<"19770301">>, + GivenName = <<"#Willow">>, + PostCode = <<"#LS8 ">>, + + %% Search for SM*, Leeds Postcode, bo3n in mid70s + FullIndexEvalFun = + leveled_eval:generate_eval_function( + "delim($term, \"|\", ($fn, $dob, $dod, $gns, $pcs))", + maps:new()), + FilterString = + "($dob BETWEEN :doblow AND :dobhigh) AND (contains($gcs, :givenname) " + "OR contains($pcs, :postcode))", + FullIndexFilterFun = + leveled_filter:generate_filter_function( + FilterString, + #{<<"doblow">> => DoBLow, + <<"dobhigh">> => DobHigh, + <<"givenname">> => GivenName, + <<"postcode">> => PostCode + }), + {async, FullR0} = + leveled_bookie:book_indexfold( + Book1, + {Bucket, null}, + {fun testutil:foldkeysfun/3, []}, + {IdxFullData, <<"Sm">>, <<"Sm~">>}, + {false, {query, FullIndexEvalFun, FullIndexFilterFun}} + ), + STFull0 = os:system_time(millisecond), + FullKL0 = lists:sort(FullR0()), + print_query_results(STFull0, single_index, FullKL0), + + SplitIndexEvalFun = + leveled_eval:generate_eval_function( + "delim($term, \"|\", ($sk, $dob, $dod))", + maps:new()), + SplitIndexFilterFun = + leveled_filter:generate_filter_function( + "$dob BETWEEN :doblow AND :dobhigh", + #{<<"doblow">> => DoBLow, <<"dobhigh">> => DobHigh}), + Q1 = + {IdxFamilyName, + <<"Sm">>, <<"Sm~">>, + {query, SplitIndexEvalFun, SplitIndexFilterFun}}, + Q2 = + {IdxGivenName, + <<"Willow">>, <<"Willow#">>, + {query, SplitIndexEvalFun, SplitIndexFilterFun}}, + Q3 = + {IdxPostCode, + <<"LS8 ">>, <<"LS8#">>, + {query, SplitIndexEvalFun, SplitIndexFilterFun}}, + + ComboFun = + leveled_setop:generate_setop_function("$1 INTERSECT ($2 UNION $3)"), + + {async, SplitR0} = + leveled_bookie:book_multiindexfold( + Book1, + Bucket, + {fun testutil:foldkeysfun/3, []}, + [{1, Q1}, {2, Q2}, {3, Q3}], + ComboFun), + STSplit0 = os:system_time(millisecond), + SplitKL0 = lists:sort(SplitR0()), + print_query_results(STSplit0, multi_index, SplitKL0), + + true = FullKL0 == SplitKL0, + + ok = leveled_bookie:book_close(Book1), + + testutil:reset_filestructure(). + +print_query_results(ST, QT, Results) -> + io:format( + % user, + "Query type ~w took ~w ms with ~w results~n", + [QT, os:system_time(millisecond) - ST, length(Results)] + ). + +set_index_term(SortKey, DoB, DoD) -> + list_to_binary( + lists:flatten( + io_lib:format( + "~s|~s|~s", + [SortKey, DoB, DoD]) + )). + +set_full_index_term(FN, DoB, DoD, GN1, GN2, GN3, PC1, PC2, PC3) -> + list_to_binary( + lists:flatten( + io_lib:format( + "~s|~s|~s|#~s#~s#~s|#~s#~s#~s", + [FN, DoB, DoD, GN1, GN2, GN3, PC1, PC2, PC3]) + )). + count_termsonindex(Bucket, IdxField, Book, QType) -> lists:foldl( fun(X, Acc) -> @@ -865,31 +1314,30 @@ count_termsonindex(Bucket, IdxField, Book, QType) -> multibucket_fold(_Config) -> RootPath = testutil:reset_filestructure(), - {ok, Bookie1} = leveled_bookie:book_start(RootPath, - 2000, - 50000000, - testutil:sync_strategy()), - ObjectGen = testutil:get_compressiblevalue_andinteger(), + {ok, Bookie1} = + leveled_bookie:book_start( + RootPath, 2000, 50000000, testutil:sync_strategy()), + ObjectGen = <<"V1">>, IndexGen = fun() -> [] end, + B1 = {<<"Type1">>, <<"Bucket1">>}, + B2 = <<"Bucket2">>, + B3 = <<"Bucket3">>, + B4 = {<<"Type2">>, <<"Bucket4">>}, ObjL1 = testutil:generate_objects( - 13000, uuid, [], ObjectGen, IndexGen, {<<"Type1">>, <<"Bucket1">>} - ), + 13000, binary_uuid, [], ObjectGen, IndexGen, B1), testutil:riakload(Bookie1, ObjL1), ObjL2 = testutil:generate_objects( - 17000, uuid, [], ObjectGen, IndexGen, <<"Bucket2">> - ), + 17000, binary_uuid, [], ObjectGen, IndexGen, B2), testutil:riakload(Bookie1, ObjL2), ObjL3 = testutil:generate_objects( - 7000, uuid, [], ObjectGen, IndexGen, <<"Bucket3">> - ), + 7000, binary_uuid, [], ObjectGen, IndexGen, B3), testutil:riakload(Bookie1, ObjL3), - ObjL4 = + ObjL4 = testutil:generate_objects( - 23000, uuid, [], ObjectGen, IndexGen, {<<"Type2">>, <<"Bucket4">>} - ), + 23000, binary_uuid, [], ObjectGen, IndexGen, B4), testutil:riakload(Bookie1, ObjL4), FF = fun(B, K, _PO, Acc) -> @@ -964,7 +1412,7 @@ foldobjects_bybucket_range(_Config) -> 1300, {fixed_binary, 1}, [], ObjectGen, IndexGen, <<"Bucket1">>), testutil:riakload(Bookie1, ObjL1), - FoldKeysFun = fun(_B, K,_V, Acc) -> [ K |Acc] end, + FoldKeysFun = fun(_B, K, _V, Acc) -> [ K |Acc] end, StartKey = testutil:fixed_bin_key(123), EndKey = testutil:fixed_bin_key(779), diff --git a/test/end_to_end/perf_SUITE.erl b/test/end_to_end/perf_SUITE.erl index 59d746fd..e2922dd9 100644 --- a/test/end_to_end/perf_SUITE.erl +++ b/test/end_to_end/perf_SUITE.erl @@ -7,11 +7,21 @@ -export([ riak_ctperf/1, riak_fullperf/1, riak_profileperf/1, riak_miniperf/1 ]). +-export( + [ + random_people_index/0, + get_random_dob/0, + get_random_dod/0, + get_random_givenname/0, + get_random_surname/0, + get_random_postcode/0 + ]). --define(PEOPLE_INDEX, <<"people_bin">>). --define(MINI_QUERY_DIVISOR, 8). --define(RGEX_QUERY_DIVISOR, 32). --define(PUT_PAUSE, 40). +-ifdef(test_filter_expression). + -define(TEST_FE, true). +-else. + -define(TEST_FE, false). +-endif. -ifndef(performance). -define(performance, riak_ctperf). @@ -25,6 +35,11 @@ all() -> [?performance]. -define(ACCOUNTING, false). -endif. +-define(PEOPLE_INDEX, <<"people_bin">>). +-define(MINI_QUERY_DIVISOR, 8). +-define(RGEX_QUERY_DIVISOR, 32). +-define(PUT_PAUSE, 40). + suite() -> [{timetrap, {hours, 16}}]. init_per_suite(Config) -> @@ -650,8 +665,56 @@ random_queries(Bookie, Bucket, IDs, IdxCnt, MaxRange, IndexesReturned) -> ), TC div 1000. - random_people_queries(Bookie, Bucket, IndexesReturned) -> + random_people_queries(?TEST_FE, Bookie, Bucket, IndexesReturned). + +random_people_queries(true, Bookie, Bucket, IndexesReturned) -> + FilterExpression = + "($dob BETWEEN \"19700101\" AND \"19791231\") " + "AND (contains($gns, \"#Willow\") AND contains($pcs, \"#LS\"))", + {ok, ParsedFilter} = + leveled_filter:generate_filter_expression( + FilterExpression, maps:new()), + FilterFun = + fun(AttrMap) -> leveled_filter:apply_filter(ParsedFilter, AttrMap) end, + EvalExpression = "delim($term, \"|\", ($surname, $dob, $dod, $gns, $pcs))", + {ok, ParsedEval} = + leveled_eval:generate_eval_expression(EvalExpression, maps:new()), + EvalFun = + fun(Term, Key) -> + leveled_eval:apply_eval(ParsedEval, Term, Key, maps:new()) + end, + + QueryFun = + fun() -> + Surname = get_random_surname(), + Range = + {?PEOPLE_INDEX, + Surname, + <> + }, + FoldKeysFun = fun(_B, _K, Cnt) -> Cnt + 1 end, + {async, R} = + leveled_bookie:book_indexfold( + Bookie, + {Bucket, <<>>}, + {FoldKeysFun, 0}, + Range, + {true, {eval, EvalFun, FilterFun} + }), + R() + end, + + {TC, {QC, EF}} = + timer:tc(fun() -> run_queries(QueryFun, 0, 0, IndexesReturned) end), + ct:log( + ?INFO, + "Fetch of ~w index entries by regex in ~w queries took ~w ms" + " with filter_expression=~w", + [EF, QC, TC div 1000, true] + ), + TC div 1000; +random_people_queries(false, Bookie, Bucket, IndexesReturned) -> SeventiesWillowRegex = "[^\\|]*\\|197[0-9]{5}\\|[^\\|]*\\|" "[^\\|]*#Willow[^\\|]*\\|[^\\|]*#LS[^\\|]*", @@ -664,8 +727,7 @@ random_people_queries(Bookie, Bucket, IndexesReturned) -> Surname, <> }, - {ok, TermRegex} = - re:compile(SeventiesWillowRegex), + {ok, TermRegex} = leveled_util:regex_compile(SeventiesWillowRegex), FoldKeysFun = fun(_B, _K, Cnt) -> Cnt + 1 end, {async, R} = leveled_bookie:book_indexfold( @@ -681,8 +743,9 @@ random_people_queries(Bookie, Bucket, IndexesReturned) -> timer:tc(fun() -> run_queries(QueryFun, 0, 0, IndexesReturned) end), ct:log( ?INFO, - "Fetch of ~w index entries by regex in ~w queries took ~w ms", - [EF, QC, TC div 1000] + "Fetch of ~w index entries by regex in ~w queries took ~w ms" + " with filter_expression=~w", + [EF, QC, TC div 1000, false] ), TC div 1000. diff --git a/test/end_to_end/recovery_SUITE.erl b/test/end_to_end/recovery_SUITE.erl index 5b35970b..78536fa4 100644 --- a/test/end_to_end/recovery_SUITE.erl +++ b/test/end_to_end/recovery_SUITE.erl @@ -64,10 +64,10 @@ replace_everything(_Config) -> {reload_strategy, [{?RIAK_TAG, recalc}]}] end, {ok, Book1} = leveled_bookie:book_start(BookOpts(StdJournalCount)), - BKT = "ReplaceAll", - BKT1 = "ReplaceAll1", - BKT2 = "ReplaceAll2", - BKT3 = "ReplaceAll3", + BKT = <<"ReplaceAll">>, + BKT1 = <<"ReplaceAll1">>, + BKT2 = <<"ReplaceAll2">>, + BKT3 = <<"ReplaceAll3">>, {KSpcL1, V1} = testutil:put_indexed_objects(Book1, BKT, 50000), ok = testutil:check_indexed_objects(Book1, BKT, KSpcL1, V1), @@ -199,12 +199,12 @@ close_duringcompaction(_Config) -> {cache_size, 2000}, {max_journalsize, 2000000}, {sync_strategy, testutil:sync_strategy()}], - {ok, Spcl1, LastV1} = rotating_object_check(BookOpts, "Bucket1", 6400), + {ok, Spcl1, LastV1} = rotating_object_check(BookOpts, <<"Bucket1">>, 6400), {ok, Book1} = leveled_bookie:book_start(BookOpts), ok = leveled_bookie:book_compactjournal(Book1, 30000), ok = leveled_bookie:book_close(Book1), {ok, Book2} = leveled_bookie:book_start(BookOpts), - ok = testutil:check_indexed_objects(Book2, "Bucket1", Spcl1, LastV1), + ok = testutil:check_indexed_objects(Book2, <<"Bucket1">>, Spcl1, LastV1), ok = leveled_bookie:book_close(Book2). recovery_with_samekeyupdates(_Config) -> @@ -385,7 +385,7 @@ hot_backup_changes(_Config) -> {cache_size, 1000}, {max_journalsize, 10000000}, {sync_strategy, testutil:sync_strategy()}], - B = "Bucket0", + B = <<"Bucket0">>, {ok, Book1} = leveled_bookie:book_start(BookOpts), {KSpcL1, _V1} = testutil:put_indexed_objects(Book1, B, 20000), @@ -459,41 +459,62 @@ rotate_wipe_compact(Strategy1, Strategy2) -> {sync_strategy, testutil:sync_strategy()}, {reload_strategy, [{?RIAK_TAG, Strategy2}]}, {max_run_length, 8}], - {ok, Spcl3, LastV3} = rotating_object_check(BookOpts, "Bucket3", 400), - ok = restart_from_blankledger(BookOpts, [{"Bucket3", Spcl3, LastV3}]), - {ok, Spcl4, LastV4} = rotating_object_check(BookOpts, "Bucket4", 800), - ok = restart_from_blankledger(BookOpts, [{"Bucket3", Spcl3, LastV3}, - {"Bucket4", Spcl4, LastV4}]), - {ok, Spcl5, LastV5} = rotating_object_check(BookOpts, "Bucket5", 1600), - ok = restart_from_blankledger(BookOpts, [{"Bucket3", Spcl3, LastV3}, - {"Bucket5", Spcl5, LastV5}]), - {ok, Spcl6, LastV6} = rotating_object_check(BookOpts, "Bucket6", 3200), + {ok, Spcl3, LastV3} = + rotating_object_check(BookOpts, <<"Bucket3">>, 400), + ok = restart_from_blankledger(BookOpts, [{<<"Bucket3">>, Spcl3, LastV3}]), + {ok, Spcl4, LastV4} = + rotating_object_check(BookOpts, <<"Bucket4">>, 800), + ok = + restart_from_blankledger( + BookOpts, + [{<<"Bucket3">>, Spcl3, LastV3}, {<<"Bucket4">>, Spcl4, LastV4}] + ), + {ok, Spcl5, LastV5} = rotating_object_check(BookOpts, <<"Bucket5">>, 1600), + ok = + restart_from_blankledger( + BookOpts, + [{<<"Bucket3">>, Spcl3, LastV3}, {<<"Bucket5">>, Spcl5, LastV5}] + ), + {ok, Spcl6, LastV6} = rotating_object_check(BookOpts, <<"Bucket6">>, 3200), {ok, Book1} = leveled_bookie:book_start(BookOpts), compact_and_wait(Book1), ok = leveled_bookie:book_close(Book1), - ok = restart_from_blankledger(BookOptsAlt, [{"Bucket3", Spcl3, LastV3}, - {"Bucket4", Spcl4, LastV4}, - {"Bucket5", Spcl5, LastV5}, - {"Bucket6", Spcl6, LastV6}]), + ok = + restart_from_blankledger( + BookOptsAlt, + [ + {<<"Bucket3">>, Spcl3, LastV3}, + {<<"Bucket4">>, Spcl4, LastV4}, + {<<"Bucket5">>, Spcl5, LastV5}, + {<<"Bucket6">>, Spcl6, LastV6} + ] + ), {ok, Book2} = leveled_bookie:book_start(BookOptsAlt), compact_and_wait(Book2), ok = leveled_bookie:book_close(Book2), - ok = restart_from_blankledger(BookOptsAlt, [{"Bucket3", Spcl3, LastV3}, - {"Bucket4", Spcl4, LastV4}, - {"Bucket5", Spcl5, LastV5}, - {"Bucket6", Spcl6, LastV6}]), + ok = + restart_from_blankledger( + BookOptsAlt, + [ + {<<"Bucket3">>, Spcl3, LastV3}, + {<<"Bucket4">>, Spcl4, LastV4}, + {<<"Bucket5">>, Spcl5, LastV5}, + {<<"Bucket6">>, Spcl6, LastV6} + ] + ), {ok, Book3} = leveled_bookie:book_start(BookOptsAlt), - {KSpcL2, _V2} = testutil:put_indexed_objects(Book3, "AltBucket6", 3000), + {KSpcL2, _V2} = + testutil:put_indexed_objects(Book3, <<"AltBucket6">>, 3000), Q2 = fun(RT) -> {index_query, - "AltBucket6", + <<"AltBucket6">>, {fun testutil:foldkeysfun/3, []}, {<<"idx1_bin">>, <<"#">>, <<"|">>}, {RT, undefined}} @@ -504,10 +525,13 @@ rotate_wipe_compact(Strategy1, Strategy2) -> DeleteFun = fun({DK, [{add, DIdx, DTerm}]}) -> - ok = testutil:book_riakdelete(Book3, - "AltBucket6", - DK, - [{remove, DIdx, DTerm}]) + ok = + testutil:book_riakdelete( + Book3, + <<"AltBucket6">>, + DK, + [{remove, DIdx, DTerm}] + ) end, lists:foreach(DeleteFun, KSpcL2), @@ -615,33 +639,37 @@ recovr_strategy(_Config) -> {sync_strategy, testutil:sync_strategy()}, {reload_strategy, [{?RIAK_TAG, recovr}]}], - R6 = rotating_object_check(BookOpts, "Bucket6", 6400), + R6 = rotating_object_check(BookOpts, <<"Bucket6">>, 6400), {ok, AllSpcL, V4} = R6, - leveled_penciller:clean_testdir(proplists:get_value(root_path, BookOpts) ++ - "/ledger"), + leveled_penciller:clean_testdir( + proplists:get_value(root_path, BookOpts) ++ "/ledger"), {ok, Book1} = leveled_bookie:book_start(BookOpts), {TestObject, TestSpec} = testutil:generate_testobject(), ok = testutil:book_riakput(Book1, TestObject, TestSpec), - ok = testutil:book_riakdelete(Book1, - testutil:get_bucket(TestObject), - testutil:get_key(TestObject), - []), + ok = + testutil:book_riakdelete( + Book1, + testutil:get_bucket(TestObject), + testutil:get_key(TestObject), + [] + ), - lists:foreach(fun({K, _SpcL}) -> - {ok, OH} = testutil:book_riakhead(Book1, "Bucket6", K), - VCH = testutil:get_vclock(OH), - {ok, OG} = testutil:book_riakget(Book1, "Bucket6", K), - V = testutil:get_value(OG), - VCG = testutil:get_vclock(OG), - true = V == V4, - true = VCH == VCG - end, - lists:nthtail(6400, AllSpcL)), + lists:foreach( + fun({K, _SpcL}) -> + {ok, OH} = testutil:book_riakhead(Book1, <<"Bucket6">>, K), + VCH = testutil:get_vclock(OH), + {ok, OG} = testutil:book_riakget(Book1, <<"Bucket6">>, K), + V = testutil:get_value(OG), + VCG = testutil:get_vclock(OG), + true = V == V4, + true = VCH == VCG + end, + lists:nthtail(6400, AllSpcL)), Q = fun(RT) -> {index_query, - "Bucket6", + <<"Bucket6">>, {fun testutil:foldkeysfun/3, []}, {<<"idx1_bin">>, <<"#">>, <<"|">>}, {RT, undefined}} @@ -666,7 +694,7 @@ recovr_strategy(_Config) -> {ok, Book2} = leveled_bookie:book_start(RevisedOpts), - {KSpcL2, _V2} = testutil:put_indexed_objects(Book2, "AltBucket6", 3000), + {KSpcL2, _V2} = testutil:put_indexed_objects(Book2, <<"AltBucket6">>, 3000), {async, KFolder2} = leveled_bookie:book_returnfolder(Book2, Q(false)), KeyList2 = lists:usort(KFolder2()), true = length(KeyList2) == 6400, @@ -674,7 +702,7 @@ recovr_strategy(_Config) -> Q2 = fun(RT) -> {index_query, - "AltBucket6", + <<"AltBucket6">>, {fun testutil:foldkeysfun/3, []}, {<<"idx1_bin">>, <<"#">>, <<"|">>}, {RT, undefined}} @@ -685,10 +713,9 @@ recovr_strategy(_Config) -> DeleteFun = fun({DK, [{add, DIdx, DTerm}]}) -> - ok = testutil:book_riakdelete(Book2, - "AltBucket6", - DK, - [{remove, DIdx, DTerm}]) + ok = + testutil:book_riakdelete( + Book2, <<"AltBucket6">>, DK, [{remove, DIdx, DTerm}]) end, lists:foreach(DeleteFun, KSpcL2), diff --git a/test/end_to_end/riak_SUITE.erl b/test/end_to_end/riak_SUITE.erl index e5288c79..987ee9f0 100644 --- a/test/end_to_end/riak_SUITE.erl +++ b/test/end_to_end/riak_SUITE.erl @@ -1484,8 +1484,8 @@ dollar_key_index(_Config) -> io:format("Length of Result of folder ~w~n", [ResLen]), true = 657 == ResLen, - {ok, REMatch} = re:compile("K.y"), - {ok, REMiss} = re:compile("key"), + {ok, REMatch} = leveled_util:regex_compile("K.y"), + {ok, REMiss} = leveled_util:regex_compile("key"), {async, FolderREMatch} = leveled_bookie:book_keylist(Bookie1, @@ -1554,26 +1554,17 @@ dollar_bucket_index(_Config) -> testutil:sync_strategy()), ObjectGen = testutil:get_compressiblevalue_andinteger(), IndexGen = fun() -> [] end, - ObjL1 = testutil:generate_objects(1300, - uuid, - [], - ObjectGen, - IndexGen, - <<"Bucket1">>), + ObjL1 = + testutil:generate_objects( + 1300, binary_uuid, [], ObjectGen, IndexGen, <<"Bucket1">>), testutil:riakload(Bookie1, ObjL1), - ObjL2 = testutil:generate_objects(1700, - uuid, - [], - ObjectGen, - IndexGen, - <<"Bucket2">>), + ObjL2 = + testutil:generate_objects( + 1700, binary_uuid, [], ObjectGen, IndexGen, <<"Bucket2">>), testutil:riakload(Bookie1, ObjL2), - ObjL3 = testutil:generate_objects(7000, - uuid, - [], - ObjectGen, - IndexGen, - <<"Bucket3">>), + ObjL3 = + testutil:generate_objects( + 7000, binary_uuid, [], ObjectGen, IndexGen, <<"Bucket3">>), testutil:riakload(Bookie1, ObjL3), @@ -1583,40 +1574,44 @@ dollar_bucket_index(_Config) -> FoldAccT = {FoldKeysFun, []}, {async, Folder} = - leveled_bookie:book_keylist(Bookie1, - ?RIAK_TAG, - <<"Bucket2">>, - FoldAccT), + leveled_bookie:book_keylist( + Bookie1, ?RIAK_TAG, <<"Bucket2">>, FoldAccT), Results = Folder(), true = 1700 == length(Results), {<<"Bucket2">>, SampleKey} = lists:nth(100, Results), UUID = "[a-z0-9]{8}-[a-z0-9]{4}-[a-z0-9]{4}-[a-z0-9]{4}-[a-z0-9]{12}", - {ok, RESingleMatch} = re:compile(SampleKey), - {ok, REAllMatch} = re:compile(UUID), - {ok, REMiss} = re:compile("no_key"), + {ok, RESingleMatch} = leveled_util:regex_compile(SampleKey), + {ok, REAllMatch} = leveled_util:regex_compile(UUID), + {ok, REMiss} = leveled_util:regex_compile("no_key"), {async, FolderREMiss} = - leveled_bookie:book_keylist(Bookie1, - ?RIAK_TAG, - <<"Bucket2">>, - {null, null}, - {FoldKeysFun, []}, - REMiss), + leveled_bookie:book_keylist( + Bookie1, + ?RIAK_TAG, + <<"Bucket2">>, + {null, null}, + {FoldKeysFun, []}, + REMiss + ), {async, FolderRESingleMatch} = - leveled_bookie:book_keylist(Bookie1, - ?RIAK_TAG, - <<"Bucket2">>, - {null, null}, - {FoldKeysFun, []}, - RESingleMatch), + leveled_bookie:book_keylist( + Bookie1, + ?RIAK_TAG, + <<"Bucket2">>, + {null, null}, + {FoldKeysFun, []}, + RESingleMatch + ), {async, FolderREAllMatch} = - leveled_bookie:book_keylist(Bookie1, - ?RIAK_TAG, - <<"Bucket2">>, - {null, null}, - {FoldKeysFun, []}, - REAllMatch), + leveled_bookie:book_keylist( + Bookie1, + ?RIAK_TAG, + <<"Bucket2">>, + {null, null}, + {FoldKeysFun, []}, + REAllMatch + ), true = 0 == length(FolderREMiss()), true = 1 == length(FolderRESingleMatch()), @@ -1628,10 +1623,13 @@ dollar_bucket_index(_Config) -> bigobject_memorycheck(_Config) -> RootPath = testutil:reset_filestructure(), - {ok, Bookie} = leveled_bookie:book_start(RootPath, - 200, - 1000000000, - testutil:sync_strategy()), + {ok, Bookie} = + leveled_bookie:book_start( + RootPath, + 200, + 1000000000, + testutil:sync_strategy() + ), Bucket = <<"B">>, IndexGen = fun() -> [] end, ObjPutFun = @@ -1653,10 +1651,13 @@ bigobject_memorycheck(_Config) -> % All processes {_TotalCDBBinMem, _TotalCDBProcesses} = cdb_memory_check(), ok = leveled_bookie:book_close(Bookie), - {ok, BookieR} = leveled_bookie:book_start(RootPath, - 2000, - 1000000000, - testutil:sync_strategy()), + {ok, BookieR} = + leveled_bookie:book_start( + RootPath, + 2000, + 1000000000, + testutil:sync_strategy() + ), {RS_TotalCDBBinMem, _RS_TotalCDBProcesses} = cdb_memory_check(), true = RS_TotalCDBBinMem < 1024 * 1024, % No binary object references exist after startup @@ -1666,25 +1667,29 @@ bigobject_memorycheck(_Config) -> cdb_memory_check() -> TotalCDBProcesses = - lists:filter(fun(P) -> - {dictionary, PD} = - process_info(P, dictionary), - case lists:keyfind('$initial_call', 1, PD) of - {'$initial_call',{leveled_cdb,init,1}} -> - true; - _ -> - false - end - end, - processes()), + lists:filter( + fun(P) -> + {dictionary, PD} = + process_info(P, dictionary), + case lists:keyfind('$initial_call', 1, PD) of + {'$initial_call',{leveled_cdb,init,1}} -> + true; + _ -> + false + end + end, + processes() + ), TotalCDBBinMem = - lists:foldl(fun(P, Acc) -> - BinMem = calc_total_binary_memory(P), - io:format("Memory for pid ~w is ~w~n", [P, BinMem]), - BinMem + Acc - end, - 0, - TotalCDBProcesses), + lists:foldl( + fun(P, Acc) -> + BinMem = calc_total_binary_memory(P), + io:format("Memory for pid ~w is ~w~n", [P, BinMem]), + BinMem + Acc + end, + 0, + TotalCDBProcesses + ), io:format("Total binary memory ~w in ~w CDB processes~n", [TotalCDBBinMem, length(TotalCDBProcesses)]), {TotalCDBBinMem, TotalCDBProcesses}. diff --git a/test/end_to_end/testutil.erl b/test/end_to_end/testutil.erl index 5816d112..82084f3e 100644 --- a/test/end_to_end/testutil.erl +++ b/test/end_to_end/testutil.erl @@ -549,7 +549,7 @@ generate_objects( ); generate_objects( Count, binary_uuid, ObjL, Value, IndexGen, Bucket) - when is_binary(Bucket) -> + when is_binary(Bucket); is_tuple(Bucket) -> {Obj1, Spec1} = set_object( Bucket, @@ -557,23 +557,14 @@ generate_objects( Value, IndexGen ), - generate_objects(Count - 1, - binary_uuid, - [{rand:uniform(), Obj1, Spec1}|ObjL], - Value, - IndexGen, - Bucket); -generate_objects(Count, uuid, ObjL, Value, IndexGen, Bucket) -> - {Obj1, Spec1} = set_object(Bucket, - leveled_util:generate_uuid(), - Value, - IndexGen), - generate_objects(Count - 1, - uuid, - [{rand:uniform(), Obj1, Spec1}|ObjL], - Value, - IndexGen, - Bucket); + generate_objects( + Count - 1, + binary_uuid, + [{rand:uniform(), Obj1, Spec1}|ObjL], + Value, + IndexGen, + Bucket + ); generate_objects( Count, {binary, KeyNumber}, ObjL, Value, IndexGen, Bucket) when is_list(Bucket) -> @@ -879,7 +870,7 @@ put_indexed_objects(Book, Bucket, Count, V) -> IndexGen = get_randomindexes_generator(1), SW = os:timestamp(), ObjL1 = - generate_objects(Count, uuid, [], V, IndexGen, Bucket), + generate_objects(Count, binary_uuid, [], V, IndexGen, Bucket), KSpecL = lists:map( fun({_RN, Obj, Spc}) -> diff --git a/test/property/evallang_eqc.erl b/test/property/evallang_eqc.erl new file mode 100644 index 00000000..7653690b --- /dev/null +++ b/test/property/evallang_eqc.erl @@ -0,0 +1,161 @@ +-module(evallang_eqc). + +-ifdef(EQC). + +-compile([export_all, nowarn_export_all]). + +-include_lib("eqc/include/eqc.hrl"). +-include_lib("eunit/include/eunit.hrl"). +-include("../include/leveled.hrl"). + +-define(QC_OUT(P), + eqc:on_output(fun(Str, Args) -> + io:format(user, Str, Args) end, P)). + +eqc_prop1_test_() -> + {timeout, + ?EQC_TIME_BUDGET + 10, + ?_assertEqual( + true, + eqc:quickcheck( + eqc:testing_time(?EQC_TIME_BUDGET, ?QC_OUT(prop_lang()))))}. + +eqc_prop2_test_() -> + {timeout, + ?EQC_TIME_BUDGET + 10, + ?_assertEqual( + true, + eqc:quickcheck( + eqc:testing_time(?EQC_TIME_BUDGET, ?QC_OUT(prop_negative()))))}. + +identifier() -> + FirstChars = lists:seq($a,$z)++lists:seq($A,$Z)++["_"], + OtherChars = FirstChars++lists:seq($0,$9), + ?LET({X, Xs}, {oneof(FirstChars), list(elements(OtherChars))}, unicode:characters_to_binary([X|Xs])). + +identifier(Context, Type) -> + ?LET(TypedVars, vars(Context, Type), + ?LET(V, oneof([identifier() || TypedVars == []] ++ TypedVars), [ws(), "$", V, ws()])). + +vars(Context, Type) -> + fault([ V || {V, {T, _}} <- Context, T /= Type ], + [ V || {V, {T, _}} <- Context, T == Type ]). + +%% No quotes in strings +%% Filter the quote with `re` instead of string:find to +%% be compatible with lexer +string() -> + ?SUCHTHAT(String, non_empty(utf8()), re:run(String, "\"") == nomatch). + +typed_context() -> + ?SUCHTHAT(KVs, list({identifier(), oneof([{int, int()}, {string, string()}])}), + unique([K || {K, _} <- KVs])). + +unique(Elems) -> + lists:usort(Elems) == lists:sort(Elems). + +ppvalue(string) -> + ppstring(); +ppvalue(Int) -> + ppint(Int). + +ppregex() -> + [ws(), "\"", regex(), "\"", ws()]. + +regex() -> + elements(["a", ".*", "[^0]*"]). + +ws() -> + ?SHRINK(list(elements(" \t\f\v\r\n\s")), " "). + +comparator() -> + oneof([">", "<", "=", "<=", ">="]). + +ppint(Kind) -> + Gen = case Kind of + pos -> ?LET(N, nat(), N+1); + nat -> nat(); + neg -> ?LET(N, nat(), -N); + _ -> int() + end, + [ws(), ?LET(X, fault(int(), Gen), integer_to_list(X)), ws()]. + +ppstring() -> + [ws(), "\"", string(), "\"", ws()]. + +operand(_Context) -> + oneof([ ppint(any), ppstring() ]). + +math_operand(Context) -> + oneof([ identifier(Context, int) || Context /= []] ++ + [ ppint(any) ]). + +pplist(Gen) -> + ?LET(List, non_empty(list(Gen)), + [ws(), "("] ++ lists:join(",", List) ++ [")", ws()]). + + +identifier_list(Context, Type) -> + pplist(identifier(Context, Type)). + +mapping(int, string) -> + [ ws(), "(", ppint(any), ", ", ppstring(), ws(), ")" ]; +mapping(string, string) -> + [ ws(), "(", ppstring(), ", ", ppstring(), ws(), ")" ]; +mapping(string, int) -> + [ ws(), "(", ppstring(), ", ", ppint(any), ws(), ")" ]; +mapping(int, int) -> + [ ws(), "(", ppint(any), ", ", ppint(any), ws(), ")" ]. + +mappings(InType, OutType) -> + pplist(mapping(InType, OutType)). + +expr(0, Context) -> + oneof([ [ "delim(", identifier(Context, string), ",", ppstring(), ",", identifier_list(Context, string), ")" ] + , [ "join(", identifier_list(Context, string), ",", ppstring(), ",", identifier(Context, string), ")" ] + , [ "split(", identifier(Context, string), ",", ppstring(), ",", identifier(Context, string), ")" ] + , [ "slice(", identifier(Context, string), ",", ppint(pos), ",", identifier(Context, string), ")" ] + , [ "index(", identifier(Context, string), ",", ppint(nat), ",", ppint(pos), ",", identifier(Context, string), ")" ] + , [ "kvsplit(", identifier(Context, string), ",", ppstring(), ",", ppstring(), ")" ] + , [ "regex(", identifier(Context, string), ",", ppregex() , ", pcre, ", identifier_list(Context, string), ")"] + , [ "regex(", identifier(Context, string), ",", ppregex() , ",", identifier_list(Context, string), ")"] + , [ "to_integer(", identifier(Context, string), ",", identifier(Context, int), ")" ] + , [ "to_string(", identifier(Context, int), ",", identifier(Context, string), ")" ] + , [ "subtract(", math_operand(Context), ",", math_operand(Context), ",", identifier(Context, int), ")" ] + , [ "add(", math_operand(Context), ",", math_operand(Context), ",", identifier(Context, int), ")" ] + ] ++ + [ [ "map(", lists:join(",", [identifier(Context, LHS), comparator(), mappings(LHS, RHS), + ppvalue(LHS), identifier(Context, RHS)]), ")" ] + || LHS <- [int, string], + RHS <- [int, string], + Context /= [] ] + ); +expr(N, Context) -> + oneof([ expr(0, Context) + , ?LETSHRINK([E1, E2], [expr(N div 2, Context), expr(N div 2, Context)], [E1, "|", E2]) + ]). + +%% A generator for syntactic and semantic correct expressions +evallang(Context) -> + ?SIZED(Size, expr(Size, Context)). + +%% The property. +%% The Context variables are used to replace ":x" substitution vars in the provided +%% tokens to parse. +prop_lang() -> + eqc:dont_print_counterexample( + ?FORALL(Context, typed_context(), + ?FORALL(String, evallang(Context), + ?WHENFAIL(eqc:format("Failing for\n~ts\nwith context ~p\n", [String, Context]), + try Map = maps:from_list([{Var, Val} || {Var, {_Type, Val}} <- Context]), + F = leveled_eval:generate_eval_function(unicode:characters_to_list(String), Map), + is_map(F(<<"hello">>, <<"world">>)) + catch Error:Reason:St -> + eqc:format("~n~p Failed with ~p ~p~n~p~n", [String, Error, Reason, St]), + equals(Error, true) + end)))). + +prop_negative() -> + fails(fault_rate(1, 10, prop_lang())). + +-endif. \ No newline at end of file diff --git a/test/property/filterlang_eqc.erl b/test/property/filterlang_eqc.erl new file mode 100644 index 00000000..7b8a5ea0 --- /dev/null +++ b/test/property/filterlang_eqc.erl @@ -0,0 +1,111 @@ +-module(filterlang_eqc). + +-ifdef(EQC). + +-compile([export_all, nowarn_export_all]). + +-include_lib("eqc/include/eqc.hrl"). +-include_lib("eunit/include/eunit.hrl"). +-include("../include/leveled.hrl"). + +-define(lazy_oneof(Gens), ?LAZY(oneof(Gens))). + +-define(QC_OUT(P), + eqc:on_output(fun(Str, Args) -> + io:format(user, Str, Args) end, P)). + +eqc_test_() -> + {timeout, + ?EQC_TIME_BUDGET + 10, + ?_assertEqual( + true, + eqc:quickcheck( + eqc:testing_time(?EQC_TIME_BUDGET, ?QC_OUT(prop_lang()))))}. + +identifier() -> + FirstChars = lists:seq($a,$z)++lists:seq($A,$Z)++["_"], + OtherChars = FirstChars++lists:seq($0,$9), + ?LET({X, Xs}, {oneof(FirstChars), list(elements(OtherChars))}, unicode:characters_to_binary([X|Xs])). + +ppidentifier(Vars) -> + ?LET(V, oneof([identifier() | Vars]), [ws(), "$", V, " ",ws()]). + +%% No quotes in strings +%% Filter the quote with `re` instead of string:find to +%% be compatible with lexer +string() -> + ?SUCHTHAT(String, non_empty(utf8()), re:run(String, "\"") == nomatch). + +context() -> + list({identifier(), oneof([int(), string()])}). + +ws() -> + ?SHRINK(list(elements(" \t\f\v\r\n\s")), [" "]). + +comparator() -> + oneof([">", "<", "=", "<>", "<=", ">="]). + +ppint() -> + [ws(), ?LET(X, int(), integer_to_list(X)), ws()]. + +ppstring() -> + [ws(), "\"", string(), "\"", ws()]. + +pplist(Gen) -> + ?LET(List, non_empty(list(Gen)), + [ws(), "("] ++ lists:join(",", List) ++ [")", ws()]). + +operand(Vars) -> + oneof([ ppidentifier(Vars) ] ++ + [ [ws(), ":", oneof(Vars), " ", ws()] || Vars /= []] ++ + %% Always in context, because + %% should fail with error if substitution vars not in context + [ ppint(), ppstring() ]). + +operand_list(Vars) -> + ?LET(OpList, non_empty(list(operand(Vars))), + [ws(), "("] ++ lists:join(",", OpList) ++ [")", ws()]). + +condition(0, Vars) -> + oneof([ [ operand(Vars), comparator(), operand(Vars) ] + , [ operand(Vars), "BETWEEN", operand(Vars), "AND", operand(Vars) ] + , [ ppidentifier(Vars), " IN", pplist(ppstring()) ] + , [ ppstring(), " IN", ppidentifier(Vars) ] + , [ "contains(", ppidentifier(Vars), ", ", ppstring(), ")" ] + , [ "begins_with(", ppidentifier(Vars), ", ", ppstring(), ")" ] + , [ "attribute_exists(", ppidentifier(Vars), ")" ] + , [ "attribute_not_exists(", ppidentifier(Vars), ")" ] + , [ "attribute_empty(", ppidentifier(Vars), ")" ] + ]); +condition(N, Vars) -> + ?lazy_oneof([ condition(0, Vars) + , ?LETSHRINK([C], [condition(N - 1, Vars)], + ?lazy_oneof([ ["NOT", C] , ["(", ws(), C, ws(), ")"] ])) + , ?LETSHRINK([C1, C2], [condition(N div 2, Vars), condition(N div 2, Vars)], + ?lazy_oneof([ [C1, "AND", C2] , [C1, "OR", C2] ])) + ]). + +%% A generator for syntactic and semantic correct expressions +filterlang(Vars) -> + ?SIZED(Size, filterlang(Size, Vars)). + +filterlang(N, Vars) -> + condition(N, Vars). + +%% The property. +%% The Context variables are used to replace ":x" substitution vars in the provided +%% tokens to parse. +prop_lang() -> + eqc:dont_print_counterexample( + ?FORALL(Context, context(), + ?FORALL(String, filterlang([V || {V, _} <- Context]), + ?WHENFAIL(eqc:format("Failing for\n~ts\nwith context ~p\n", [String, Context]), + try Map = maps:from_list(Context), + {ok, Expr} = leveled_filter:generate_filter_expression(unicode:characters_to_list(String), Map), + is_boolean(leveled_filter:apply_filter(Expr, Map)) + catch Error:Reason:St -> + eqc:format("~n~p Failed with ~p ~p~n~p~n", [String, Error, Reason, St]), + equals(Error, true) + end)))). + +-endif. diff --git a/test/property/leveled_simpleeqc.erl b/test/property/leveled_simpleeqc.erl index 9bb6eade..c02c2cff 100644 --- a/test/property/leveled_simpleeqc.erl +++ b/test/property/leveled_simpleeqc.erl @@ -42,7 +42,6 @@ }). -define(NUMTESTS, 10000). --define(TIME_BUDGET, 300). -define(QC_OUT(P), eqc:on_output(fun(Str, Args) -> io:format(user, Str, Args) end, P)). @@ -51,11 +50,11 @@ eqc_test_() -> {timeout, - ?TIME_BUDGET + 10, + ?EQC_TIME_BUDGET + 10, ?_assertEqual( true, eqc:quickcheck( - eqc:testing_time(?TIME_BUDGET, ?QC_OUT(prop_db()))))}. + eqc:testing_time(?EQC_TIME_BUDGET, ?QC_OUT(prop_db()))))}. run() -> run(?NUMTESTS). diff --git a/test/property/leveled_statemeqc.erl b/test/property/leveled_statemeqc.erl index e0cb13e5..65a4a276 100644 --- a/test/property/leveled_statemeqc.erl +++ b/test/property/leveled_statemeqc.erl @@ -680,7 +680,7 @@ indexfold(Pid, Constraint, FoldAccT, Range, {_, undefined} = TermHandling, _Coun {async, Folder} = leveled_bookie:book_indexfold(Pid, Constraint, FoldAccT, Range, TermHandling), Folder; indexfold(Pid, Constraint, FoldAccT, Range, {ReturnTerms, RegExp}, _Counter) -> - {ok, RE} = re:compile(RegExp), + {ok, RE} = leveled_util:regex_compile(RegExp), {async, Folder} = leveled_bookie:book_indexfold(Pid, Constraint, FoldAccT, Range, {ReturnTerms, RE}), Folder. diff --git a/test/property/setoplang_eqc.erl b/test/property/setoplang_eqc.erl new file mode 100644 index 00000000..18e5b547 --- /dev/null +++ b/test/property/setoplang_eqc.erl @@ -0,0 +1,105 @@ +%%% File : setoplang_eqc.erl +%%% Created : 14 May 2024 by Thomas Arts +%%% +%%% Lexer does not accept binary strings it seems (in OTP26) +%%% 3> leveled_setoplexer:string("$7"). +%%% {ok,[{set_id,1,7}],1} +%%% 4> leveled_setoplexer:string(<<"$7">>). +%% ** exception error: no function clause matching lists:sublist(<<"$7">>,1) (lists.erl, line 394) +%% +-module(setoplang_eqc). + +-ifdef(EQC). + +-compile([export_all, nowarn_export_all]). + +-include_lib("eqc/include/eqc.hrl"). +-include_lib("eunit/include/eunit.hrl"). +-include("../include/leveled.hrl"). + +-define( + QC_OUT(P), + eqc:on_output(fun(Str, Args) -> io:format(user, Str, Args) end, P)). + +eqc_prop1_test_() -> + {timeout, + ?EQC_TIME_BUDGET + 10, + ?_assertEqual( + true, + eqc:quickcheck( + eqc:testing_time(?EQC_TIME_BUDGET, ?QC_OUT(prop_gen_fun()))))}. + +eqc_prop2_test_() -> + {timeout, + ?EQC_TIME_BUDGET + 10, + ?_assertEqual( + true, + eqc:quickcheck( + eqc:testing_time(?EQC_TIME_BUDGET, ?QC_OUT(prop_check_eval()))))}. + +set_id() -> + ?LET(N, choose(1,20), integer_to_list(N)). + +value() -> + ?LET(Set, list(int()), sets:from_list(Set)). + +%% This context is always enumartion. +%% Consider implementing a context in which keys are not consecutive +context() -> + ?LET(Sets, list(value()), lists:enumerate(Sets)). +%context() -> +% ?LET(Map, map(set_id(), value()), +% lists:sort(maps:to_list(Map))). + +ws() -> + ?SHRINK(list(elements(" \t\f\v\r\n\s")), " "). + +setoplang(Context) -> + ?SIZED(Size, setoplang(Size, Context)). + +setoplang(0, Vars) -> + ["$", oneof(Vars), ws()]; +setoplang(Size, Vars) -> + ?LAZY( + oneof([setoplang(0, Vars), + ?LETSHRINK([Cond], [setoplang(Size - 1, Vars)], + ["(", ws(), Cond, ws(), " )"]), + ?LETSHRINK([Cond1, Cond2], + [setoplang(Size div 2, Vars), + setoplang(Size div 2, Vars)], + [Cond1, ws(), oneof(["SUBTRACT", "UNION", "INTERSECT"]), ws(), Cond2])])). + + + +%% -- Property --------------------------------------------------------------- + +%% The property. +prop_gen_fun() -> + ?FORALL(Context, non_empty(context()), + ?FORALL(String, setoplang([integer_to_list(V) || {V, _} <- Context]), + try F = leveled_setop:generate_setop_function(String), + sets:is_set(F(maps:from_list(Context))) + catch Error:Reason -> + eqc:format("~n~ts Failed with ~p ~p~n", [String, Error, Reason]), + equals(Error, true) + end)). + +prop_check_eval() -> + ?FORALL(Context, non_empty(context()), + begin + Vars = [ "$"++integer_to_list(Id) || {Id,_} <- Context], + String = "(" ++ lists:flatten(lists:join(" UNION ", Vars) ++ ") SUBTRACT " ++ hd(Vars)), + ?WHENFAIL(eqc:format("setop ~ts~n", [String]), + begin + F = leveled_setop:generate_setop_function(String), + equal_sets(F(maps:from_list(Context)), + sets:subtract(sets:union([Set || {_, Set} <- Context]), + element(2, hd(Context)))) + end) + end). + +equal_sets(S1, S2) -> + ?WHENFAIL(eqc:format("~p /= ~p", [sets:to_list(S1), sets:to_list(S2)]), + sets:is_subset(S1, S2) andalso sets:is_subset(S2, S1)). + +-endif.