Skip to content

Commit 514526f

Browse files
authored
Add ability to defer fetching a value in the fold - but without requiring Journal check (#472)
* Add ability to defer fetching a value in the fold - but without requring check presence Reduces the cost of using head folds in partition repairs, where the check is not necessary as a fetch failure will be caught and prompt read repair. * Update types for JournalCheck * Fix tuple mistake (#473) Otherwise leveled_pclerk will crash if there is a partial merge that results in an an empty addition
1 parent 8960234 commit 514526f

File tree

4 files changed

+255
-168
lines changed

4 files changed

+255
-168
lines changed

src/leveled_bookie.erl

+57-41
Original file line numberDiff line numberDiff line change
@@ -946,15 +946,22 @@ book_objectfold(Pid, Tag, Bucket, Limiter, FoldAccT, SnapPreFold) ->
946946
%% `Acc'. The ProxyObject is an object that only contains the
947947
%% head/metadata, and no object data from the journal. The `Acc' in
948948
%% the first call is that provided as the second element of `FoldAccT'
949-
%% and thereafter the return of the previous all to the fold fun. If
950-
%% `JournalCheck' is `true' then the journal is checked to see if the
951-
%% object in the ledger is present, which means a snapshot of the
952-
%% whole store is required, if `false', then no such check is
953-
%% performed, and onlt ledger need be snapshotted. `SnapPreFold' is a
954-
%% boolean that determines if the snapshot is taken when the folder is
955-
%% requested `true', or when when run `false'. `SegmentList' can be
956-
%% `false' meaning, all heads, or a list of integers that designate
957-
%% segments in a TicTac Tree.
949+
%% and thereafter the return of the previous all to the fold fun.
950+
%%
951+
%% If `JournalCheck' is `true' then the journal is checked to see if the
952+
%% object in the ledger is present, which means a snapshot of the whole store
953+
%% is required, if `false', then no such check is performed, and only ledger
954+
%% need be snapshotted. However, if the intention is to defer fetching the
955+
%% value but don't wish to cost of chekcing the Journal to be made during the
956+
%% fold (e.g. as any exception will be handled later), then the `defer`
957+
%% option can be used. This will snapshot the Journal, but not check for
958+
%% presence. Note that the fetch must still be made within the timefroma of
959+
%% the fold (as the snapshot will expire with the fold).
960+
%%
961+
%% `SnapPreFold' is a boolean that determines if the snapshot is taken when
962+
%% the folder is requested `true', or when when run `false'. `SegmentList' can
963+
%% be `false' meaning, all heads, or a list of integers that designate segments
964+
%% in a TicTac Tree.
958965
-spec book_headfold(pid(), Tag, FoldAccT, JournalCheck, SnapPreFold, SegmentList) ->
959966
{async, Runner} when
960967
Tag :: leveled_codec:tag(),
@@ -964,7 +971,7 @@ book_objectfold(Pid, Tag, Bucket, Limiter, FoldAccT, SnapPreFold) ->
964971
Bucket :: term(),
965972
Key :: term(),
966973
Value :: term(),
967-
JournalCheck :: boolean(),
974+
JournalCheck :: boolean()|defer,
968975
SnapPreFold :: boolean(),
969976
SegmentList :: false | list(integer()),
970977
Runner :: fun(() -> Acc).
@@ -999,7 +1006,7 @@ book_headfold(Pid, Tag, FoldAccT, JournalCheck, SnapPreFold, SegmentList) ->
9991006
Bucket :: term(),
10001007
Key :: term(),
10011008
Value :: term(),
1002-
JournalCheck :: boolean(),
1009+
JournalCheck :: boolean()|defer,
10031010
SnapPreFold :: boolean(),
10041011
SegmentList :: false | list(integer()),
10051012
Runner :: fun(() -> Acc).
@@ -1032,7 +1039,7 @@ book_headfold(Pid, Tag, Limiter, FoldAccT, JournalCheck, SnapPreFold, SegmentLis
10321039
Bucket :: term(),
10331040
Key :: term(),
10341041
Value :: term(),
1035-
JournalCheck :: boolean(),
1042+
JournalCheck :: boolean()|defer,
10361043
SnapPreFold :: boolean(),
10371044
SegmentList :: false | list(integer()),
10381045
LastModRange :: false | leveled_codec:lastmod_range(),
@@ -1989,7 +1996,7 @@ return_snapfun(
19891996
fun() -> {ok, LS, JS, fun() -> ok end} end
19901997
end.
19911998

1992-
-spec snaptype_by_presence(boolean()) -> store|ledger.
1999+
-spec snaptype_by_presence(boolean()|defer) -> store|ledger.
19932000
%% @doc
19942001
%% Folds that traverse over object heads, may also either require to return
19952002
%% the object, or at least confirm the object is present in the Ledger. This
@@ -1998,6 +2005,8 @@ return_snapfun(
19982005
%% rather than just the ledger.
19992006
snaptype_by_presence(true) ->
20002007
store;
2008+
snaptype_by_presence(defer) ->
2009+
store;
20012010
snaptype_by_presence(false) ->
20022011
ledger.
20032012

@@ -2030,9 +2039,8 @@ get_runner(State, {keylist, Tag, Bucket, FoldAccT}) ->
20302039
leveled_runner:bucketkey_query(SnapFun, Tag, Bucket, FoldAccT);
20312040
get_runner(State, {keylist, Tag, Bucket, KeyRange, FoldAccT, TermRegex}) ->
20322041
SnapFun = return_snapfun(State, ledger, no_lookup, true, true),
2033-
leveled_runner:bucketkey_query(SnapFun,
2034-
Tag, Bucket, KeyRange,
2035-
FoldAccT, TermRegex);
2042+
leveled_runner:bucketkey_query(
2043+
SnapFun, Tag, Bucket, KeyRange, FoldAccT, TermRegex);
20362044
%% Set of runners for object or metadata folds
20372045
get_runner(State,
20382046
{foldheads_allkeys,
@@ -2041,10 +2049,15 @@ get_runner(State,
20412049
LastModRange, MaxObjectCount}) ->
20422050
SnapType = snaptype_by_presence(JournalCheck),
20432051
SnapFun = return_snapfun(State, SnapType, no_lookup, true, SnapPreFold),
2044-
leveled_runner:foldheads_allkeys(SnapFun,
2045-
Tag, FoldFun,
2046-
JournalCheck, SegmentList,
2047-
LastModRange, MaxObjectCount);
2052+
leveled_runner:foldheads_allkeys(
2053+
SnapFun,
2054+
Tag,
2055+
FoldFun,
2056+
JournalCheck,
2057+
SegmentList,
2058+
LastModRange,
2059+
MaxObjectCount
2060+
);
20482061
get_runner(State,
20492062
{foldobjects_allkeys, Tag, FoldFun, SnapPreFold}) ->
20502063
get_runner(State,
@@ -2071,13 +2084,16 @@ get_runner(State,
20712084
end,
20722085
SnapType = snaptype_by_presence(JournalCheck),
20732086
SnapFun = return_snapfun(State, SnapType, no_lookup, true, SnapPreFold),
2074-
leveled_runner:foldheads_bybucket(SnapFun,
2075-
Tag,
2076-
lists:map(KeyRangeFun, BucketList),
2077-
FoldFun,
2078-
JournalCheck,
2079-
SegmentList,
2080-
LastModRange, MaxObjectCount);
2087+
leveled_runner:foldheads_bybucket(
2088+
SnapFun,
2089+
Tag,
2090+
lists:map(KeyRangeFun, BucketList),
2091+
FoldFun,
2092+
JournalCheck,
2093+
SegmentList,
2094+
LastModRange,
2095+
MaxObjectCount
2096+
);
20812097
get_runner(State,
20822098
{foldheads_bybucket,
20832099
Tag,
@@ -2088,33 +2104,33 @@ get_runner(State,
20882104
{StartKey, EndKey, SnapQ} = return_ledger_keyrange(Tag, Bucket, KeyRange),
20892105
SnapType = snaptype_by_presence(JournalCheck),
20902106
SnapFun = return_snapfun(State, SnapType, SnapQ, true, SnapPreFold),
2091-
leveled_runner:foldheads_bybucket(SnapFun,
2092-
Tag,
2093-
[{StartKey, EndKey}],
2094-
FoldFun,
2095-
JournalCheck,
2096-
SegmentList,
2097-
LastModRange, MaxObjectCount);
2107+
leveled_runner:foldheads_bybucket(
2108+
SnapFun,
2109+
Tag,
2110+
[{StartKey, EndKey}],
2111+
FoldFun,
2112+
JournalCheck,
2113+
SegmentList,
2114+
LastModRange,
2115+
MaxObjectCount
2116+
);
20982117
get_runner(State,
20992118
{foldobjects_bybucket,
21002119
Tag, Bucket, KeyRange,
21012120
FoldFun,
21022121
SnapPreFold}) ->
21032122
{StartKey, EndKey, SnapQ} = return_ledger_keyrange(Tag, Bucket, KeyRange),
21042123
SnapFun = return_snapfun(State, store, SnapQ, true, SnapPreFold),
2105-
leveled_runner:foldobjects_bybucket(SnapFun,
2106-
Tag,
2107-
[{StartKey, EndKey}],
2108-
FoldFun);
2124+
leveled_runner:foldobjects_bybucket(
2125+
SnapFun, Tag, [{StartKey, EndKey}], FoldFun);
21092126
get_runner(State,
21102127
{foldobjects_byindex,
21112128
Tag, Bucket, {Field, FromTerm, ToTerm},
21122129
FoldObjectsFun,
21132130
SnapPreFold}) ->
21142131
SnapFun = return_snapfun(State, store, no_lookup, true, SnapPreFold),
2115-
leveled_runner:foldobjects_byindex(SnapFun,
2116-
{Tag, Bucket, Field, FromTerm, ToTerm},
2117-
FoldObjectsFun);
2132+
leveled_runner:foldobjects_byindex(
2133+
SnapFun, {Tag, Bucket, Field, FromTerm, ToTerm},FoldObjectsFun);
21182134
get_runner(State, {bucket_list, Tag, FoldAccT}) ->
21192135
{FoldBucketsFun, Acc} = FoldAccT,
21202136
SnapFun = return_snapfun(State, ledger, no_lookup, false, false),

src/leveled_pclerk.erl

+1-1
Original file line numberDiff line numberDiff line change
@@ -421,7 +421,7 @@ do_merge(
421421

422422
add_entry(empty, FileName, _TS1, Additions) ->
423423
leveled_log:log(pc013, [FileName]),
424-
{[], [], Additions};
424+
{Additions, [], []};
425425
add_entry({ok, Pid, Reply, Bloom}, FileName, TS1, Additions) ->
426426
{{KL1Rem, KL2Rem}, SmallestKey, HighestKey} = Reply,
427427
Entry =

src/leveled_runner.erl

+18-10
Original file line numberDiff line numberDiff line change
@@ -269,11 +269,14 @@ tictactree(SnapFun, {Tag, Bucket, Query}, JournalCheck, TreeSize, Filter) ->
269269
end,
270270
{async, Runner}.
271271

272-
-spec foldheads_allkeys(snap_fun(), leveled_codec:tag(),
273-
fold_objects_fun()|{fold_objects_fun(), foldacc()},
274-
boolean(), false|list(integer()),
275-
false|leveled_codec:lastmod_range(),
276-
false|pos_integer()) -> {async, runner_fun()}.
272+
-spec foldheads_allkeys(
273+
snap_fun(),
274+
leveled_codec:tag(),
275+
fold_objects_fun()|{fold_objects_fun(), foldacc()},
276+
boolean()|defer,
277+
false|list(integer()),
278+
false|leveled_codec:lastmod_range(),
279+
false|pos_integer()) -> {async, runner_fun()}.
277280
%% @doc
278281
%% Fold over all heads in the store for a given tag - applying the passed
279282
%% function to each proxy object
@@ -412,7 +415,7 @@ foldobjects_bybucket(SnapFun, Tag, KeyRanges, FoldFun) ->
412415
leveled_codec:tag(),
413416
list(key_range()),
414417
fold_objects_fun()|{fold_objects_fun(), foldacc()},
415-
boolean(),
418+
boolean()|defer,
416419
false|list(integer()),
417420
false|leveled_codec:lastmod_range(),
418421
false|pos_integer())
@@ -501,7 +504,7 @@ foldobjects(SnapFun, Tag, KeyRanges, FoldObjFun, DeferredFetch, SegmentList) ->
501504

502505
-spec foldobjects(snap_fun(), atom(), list(),
503506
fold_objects_fun()|{fold_objects_fun(), foldacc()},
504-
false|{true, boolean()},
507+
false|{true, boolean()|defer},
505508
false|list(integer()),
506509
false|leveled_codec:lastmod_range(),
507510
false|pos_integer()) -> {async, runner_fun()}.
@@ -609,9 +612,14 @@ get_hashaccumulator(JournalCheck, InkerClone, AddKeyFun) ->
609612
AccFun.
610613

611614
-spec accumulate_objects
612-
(fold_objects_fun(), pid(), leveled_head:object_tag(), false|{true, boolean()})
615+
(fold_objects_fun(),
616+
pid(),
617+
leveled_head:object_tag(),
618+
false|{true, boolean()|defer})
613619
-> objectacc_fun();
614-
(fold_objects_fun(), null, leveled_head:headonly_tag(), {true, false})
620+
(fold_objects_fun(),
621+
null, leveled_head:headonly_tag(),
622+
{true, false})
615623
-> objectacc_fun().
616624
accumulate_objects(FoldObjectsFun, InkerClone, Tag, DeferredFetch) ->
617625
AccFun =
@@ -652,7 +660,7 @@ accumulate_objects(FoldObjectsFun, InkerClone, Tag, DeferredFetch) ->
652660
missing ->
653661
Acc
654662
end;
655-
{false, _} ->
663+
_ ->
656664
FoldObjectsFun(B, K, ProxyObj, Acc)
657665
end;
658666
false ->

0 commit comments

Comments
 (0)