diff --git a/priv/riak_kv.schema b/priv/riak_kv.schema index 9e5ac234d..6f3cbc1c1 100644 --- a/priv/riak_kv.schema +++ b/priv/riak_kv.schema @@ -298,6 +298,19 @@ {commented, 2} ]}. +%% @doc Deferred reap on failure +%% Should one or more primaries be unavilable the reap following delete will +%% not be triggered. Rather than being ignored, it can be deferred by enabling +%% defer_reap_on_failure. This will queue the reap on the reaper untill all +%% primaries are available. The reaper queue will be erased on restart, so +%% further failure may lead to the loss of deferred reaps +{mapping, "defer_reap_on_failure", "riak_kv.defer_reap_on_failure", [ + {datatype, flag}, + {default, on}, + hidden +]}. + + %% @doc Whether to allow node to participate in coverage queries. %% This is used as a manual switch to stop nodes in incomplete states %% (E.g. doing a full partition repair, or node replace) from participating diff --git a/src/riak_kv_get_core.erl b/src/riak_kv_get_core.erl index 811bfa57f..aa8bb5f2c 100644 --- a/src/riak_kv_get_core.erl +++ b/src/riak_kv_get_core.erl @@ -45,7 +45,7 @@ {delete_repair, [{non_neg_integer(), repair_reason()}], riak_object:riak_object()} | - delete. + {delete, riak_object:riak_object()}. -type idxresult() :: {non_neg_integer(), result()}. -type idx_type() :: [{non_neg_integer, 'primary' | 'fallback'}]. @@ -379,7 +379,7 @@ final_action(GetCore = #getcore{n = N, merged = Merged0, results = Results, Action = case ReadRepairs of [] when ObjState == tombstone, AllResults -> - delete; + {delete, MObj}; [] -> nop; _ when ObjState == tombstone, AllResults -> diff --git a/src/riak_kv_get_fsm.erl b/src/riak_kv_get_fsm.erl index 8dd1937fa..8bcec8d4a 100644 --- a/src/riak_kv_get_fsm.erl +++ b/src/riak_kv_get_fsm.erl @@ -598,13 +598,13 @@ finalize(StateData=#state{get_core = GetCore, trace = Trace}) -> UpdStateData = StateData#state{get_core = UpdGetCore}, case Action of - delete -> - maybe_delete(UpdStateData); + {delete, TombObj} -> + maybe_delete(UpdStateData, TombObj); {read_repair, Indices, RepairObj} -> maybe_read_repair(Indices, RepairObj, UpdStateData); {delete_repair, Indices, RepairObj} -> maybe_read_repair(Indices, RepairObj, UpdStateData), - maybe_delete(UpdStateData); + maybe_delete(UpdStateData, RepairObj); _Nop -> ?DTRACE(Trace, ?C_GET_FSM_FINALIZE, [], ["finalize"]), ok @@ -615,8 +615,10 @@ finalize(StateData=#state{get_core = GetCore, trace = Trace}) -> %% Maybe issue deletes if all primary nodes are available. %% Get core will only requestion deletion if all vnodes %% replies with the same value. -maybe_delete(StateData=#state{n = N, preflist2=Sent, trace=Trace, - req_id=ReqId, bkey=BKey}) -> +maybe_delete( + StateData=#state{ + n = N, preflist2=Sent, trace=Trace, req_id=ReqId, bkey=BKey}, + TombObj) -> %% Check sent to a perfect preflist and we can delete IdealNodes = [{I, Node} || {{I, Node}, primary} <- Sent], NotCustomN = not using_custom_n_val(StateData), @@ -628,9 +630,27 @@ maybe_delete(StateData=#state{n = N, preflist2=Sent, trace=Trace, _ -> ?DTRACE(Trace, ?C_GET_FSM_MAYBE_DELETE, [0], ["maybe_delete", "nop"]), + maybe_defer_reap(BKey, TombObj), nop end. +-spec maybe_defer_reap( + {riak_object:bucket(), riak_object:key()}, riak_object:object()) -> ok. +maybe_defer_reap(BKey, TombstoneObj) -> + case app_helper:get_env(riak_kv, delete_mode, 3000) of + keep -> + ok; + _ -> + case app_helper:get_env(riak_kv, defer_reap_on_failure, true) of + true -> + VC = riak_object:vclock(TombstoneObj), + riak_kv_reaper:request_reap( + {BKey, riak_object:delete_hash(VC)}); + _ -> + ok + end + end. + using_custom_n_val(#state{n=N, bucket_props=BucketProps}) -> case lists:keyfind(n_val, 1, BucketProps) of {_, N} ->