Skip to content

Commit

Permalink
Log potentially long count calls
Browse files Browse the repository at this point in the history
Potentially long count calls on user spaces tend to be dangerous.

A critical log entry containing the current stack traceback is created
upon potentially long count calls — a user can explicitly request a
full scan though by passing fullscan = true to count's options table
argument in which a case a log entry will not be created.

Closes #276
  • Loading branch information
oleg-jukovec committed May 25, 2022
1 parent 717dc21 commit aed33cc
Show file tree
Hide file tree
Showing 7 changed files with 136 additions and 29 deletions.
9 changes: 5 additions & 4 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,11 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.
### Added

### Changed
* Behaviour of potentially long `select` calls: a critical log entry containing
the current stack traceback is created upon such function calls — an user can
explicitly request a full scan through by passing `fullscan=true` to `select`
options table argument in which a case a log entry will not be created (#276).
* Behaviour of potentially long `select` and `count` calls: a critical log entry
containing the current stack traceback is created upon such function calls —
an user can explicitly request a full scan through by passing `fullscan=true`
to `select` or `count` options table argument in which a case a log entry will
not be created (#276).

### Fixed

Expand Down
6 changes: 4 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -666,6 +666,8 @@ where:
* `force_map_call` (`?boolean`) - if `true`
then the map call is performed without any optimizations even,
default value is `false`
* `fullscan` (`?boolean`) - if `true` then a critical log entry will be skipped
on potentially long `count`, see [avoiding full scan](https://github.com/tarantool/crud/blob/master/doc/select.md#avoiding-full-scan).
* `mode` (`?string`, `read` or `write`) - if `write` is specified then `count` is
performed on master, default value is `read`
* `prefer_replica` (`?boolean`) - if `true` then the preferred target is one of
Expand All @@ -675,9 +677,9 @@ where:
default value is `false`

```lua
crud.count('customers', {{'<=', 'age', 35}})
crud.count('customers', {{'==', 'age', 35}})
---
- 5
- 1
...
```

Expand Down
20 changes: 20 additions & 0 deletions crud/count.lua
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ local sharding = require('crud.common.sharding')
local filters = require('crud.compare.filters')
local count_plan = require('crud.compare.plan')
local dev_checks = require('crud.common.dev_checks')
local ratelimit = require('crud.ratelimit')
local schema = require('crud.common.schema')
local sharding_metadata_module = require('crud.common.sharding.sharding_metadata')

Expand Down Expand Up @@ -90,6 +91,22 @@ function count.init()
_G._crud.count_on_storage = count_on_storage
end

local check_count_safety_rl = ratelimit.new()
local function check_count_safety(space_name, plan, opts)
if opts.fullscan == true then
return
end

local iter = plan.tarantool_iter
if iter == box.index.EQ or iter == box.index.REQ then
return
end

local rl = check_count_safety_rl
local traceback = debug.traceback()
rl:log_crit("Potentially long count from space '%s'\n %s", space_name, traceback)
end

-- returns result, err, need_reload
-- need_reload indicates if reloading schema could help
-- see crud.common.schema.wrap_func_reload()
Expand All @@ -98,6 +115,7 @@ local function call_count_on_router(space_name, user_conditions, opts)
timeout = '?number',
bucket_id = '?number|cdata',
force_map_call = '?boolean',
fullscan = '?boolean',
yield_every = '?number',
prefer_replica = '?boolean',
balance = '?boolean',
Expand Down Expand Up @@ -147,6 +165,7 @@ local function call_count_on_router(space_name, user_conditions, opts)
if err ~= nil then
return nil, CountError:new("Failed to plan count: %s", err), const.NEED_SCHEMA_RELOAD
end
check_count_safety(space_name, plan, opts)

-- set replicasets to count from
local replicasets_to_count = replicasets
Expand Down Expand Up @@ -297,6 +316,7 @@ function count.call(space_name, user_conditions, opts)
timeout = '?number',
bucket_id = '?number|cdata',
force_map_call = '?boolean',
fullscan = '?boolean',
yield_every = '?number',
prefer_replica = '?boolean',
balance = '?boolean',
Expand Down
13 changes: 13 additions & 0 deletions test/helper.lua
Original file line number Diff line number Diff line change
Expand Up @@ -509,4 +509,17 @@ function helpers.assert_timeout_error(value, message)
error(err, 2)
end

function helpers.fflush_cluster_stdout(cluster, capture)
-- Sometimes we have a delay here. This hack helps to wait for the end of
-- the output. It shouldn't take much time.
cluster:server('router').net_box:eval([[
require('log').error("crud fflush stdout message")
]])
local captured = ""
while not string.find(captured, "crud fflush stdout message", 1, true) do
captured = captured .. (capture:flush().stdout or "")
end
return captured
end

return helpers
104 changes: 91 additions & 13 deletions test/integration/count_test.lua
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ local fio = require('fio')
local clock = require('clock')

local t = require('luatest')
local luatest_capture = require('luatest.capture')

local helpers = require('test.helper')

Expand All @@ -26,6 +27,9 @@ pgroup.before_all(function(g)
g.cluster:server('router').net_box:eval([[
require('crud').cfg{ stats = true }
]])
g.cluster:server('router').net_box:eval([[
require('crud.ratelimit').disable()
]])
end)

pgroup.after_all(function(g) helpers.stop_cluster(g.cluster) end)
Expand All @@ -37,14 +41,14 @@ pgroup.before_each(function(g)
end)

pgroup.test_count_non_existent_space = function(g)
local result, err = g.cluster.main_server.net_box:call('crud.count', {'non_existent_space'})
local result, err = g.cluster.main_server.net_box:call('crud.count', {'non_existent_space', nil, {fullscan = true}})

t.assert_equals(result, nil)
t.assert_str_contains(err.err, "Space \"non_existent_space\" doesn't exist")
end

pgroup.test_count_empty_space = function(g)
local result, err = g.cluster.main_server.net_box:call('crud.count', {'customers'})
local result, err = g.cluster.main_server.net_box:call('crud.count', {'customers', nil, {fullscan = true}})

t.assert_equals(err, nil)
t.assert_equals(result, 0)
Expand All @@ -69,7 +73,7 @@ pgroup.test_not_valid_operation = function(g)
}

local result, err = g.cluster.main_server.net_box:call('crud.count',
{'customers', conditions}
{'customers', conditions, {fullscan = true}}
)

t.assert_equals(result, nil)
Expand All @@ -89,6 +93,79 @@ pgroup.test_conditions_with_non_existed_field = function(g)
t.assert_equals(result, 0)
end


local count_safety_cases = {
nil_and_nil_opts = {
has_crit = true,
user_conditions = nil,
opts = nil,
},
fullscan_false = {
has_crit = true,
user_conditions = nil,
opts = {fullscan = false},
},
fullscan_true = {
has_crit = false,
user_conditions = nil,
opts = {fullscan = true},
},
non_equal_conditions = {
has_crit = true,
user_conditions = {
{'>=', 'last_name', 'A'},
{'<=', 'last_name', 'Z'},
{'>', 'age', 20},
{'<', 'age', 30},
},
opts = nil,
},
equal_condition = {
has_crit = false,
user_conditions = {
{'>=', 'last_name', 'A'},
{'<=', 'last_name', 'Z'},
{'=', 'age', 25},
},
opts = nil,
},
equal_condition2 = {
has_crit = false,
user_conditions = {
{'>=', 'last_name', 'A'},
{'<=', 'last_name', 'Z'},
{'==', 'age', 25},
},
opts = nil,
},
}

for name, case in pairs(count_safety_cases) do
local space = 'customers'
local crit_log = "C> Potentially long count from space '" .. space .. "'"
local test_name = ('test_count_safety_%s'):format(name)

pgroup[test_name] = function(g)
local uc = case.user_conditions
local opts = case.opts
local capture = luatest_capture:new()
capture:enable()

local _, err = g.cluster.main_server.net_box:call('crud.count', {space, uc, opts})
t.assert_equals(err, nil)

local captured = helpers.fflush_cluster_stdout(g.cluster, capture)

if case.has_crit then
t.assert_str_contains(captured, crit_log)
else
t.assert_equals(string.find(captured, crit_log, 1, true), nil)
end

capture:disable()
end
end

pgroup.test_count_all = function(g)
-- let's insert five tuples on different replicasets
-- (two tuples on one replica and three on the other)
Expand Down Expand Up @@ -118,7 +195,7 @@ pgroup.test_count_all = function(g)
})

local result, err = g.cluster.main_server.net_box:call('crud.count',
{'customers'}
{'customers', nil, {fullscan = true}}
)

t.assert_equals(err, nil)
Expand Down Expand Up @@ -154,7 +231,7 @@ pgroup.test_count_all_with_yield_every = function(g)
})

local result, err = g.cluster.main_server.net_box:call('crud.count',
{'customers', nil, {yield_every = 1}}
{'customers', nil, {yield_every = 1, fullscan = true}}
)

t.assert_equals(err, nil)
Expand Down Expand Up @@ -190,7 +267,7 @@ pgroup.test_count_all_with_yield_every_0 = function(g)
})

local result, err = g.cluster.main_server.net_box:call('crud.count',
{'customers', nil, {yield_every = 0}}
{'customers', nil, {yield_every = 0, fullscan = true}}
)

t.assert_equals(result, nil)
Expand Down Expand Up @@ -312,7 +389,7 @@ pgroup.test_ge_condition_with_index = function(g)
local expected_len = 3

local result, err = g.cluster.main_server.net_box:call('crud.count',
{'customers', conditions}
{'customers', conditions, {fullscan = true}}
)

t.assert_equals(err, nil)
Expand Down Expand Up @@ -354,7 +431,7 @@ pgroup.test_gt_condition_with_index = function(g)
local expected_len = 1

local result, err = g.cluster.main_server.net_box:call('crud.count',
{'customers', conditions}
{'customers', conditions, {fullscan = true}}
)

t.assert_equals(err, nil)
Expand Down Expand Up @@ -396,7 +473,7 @@ pgroup.test_le_condition_with_index = function(g)
local expected_len = 4

local result, err = g.cluster.main_server.net_box:call('crud.count',
{'customers', conditions}
{'customers', conditions, {fullscan = true}}
)

t.assert_equals(err, nil)
Expand Down Expand Up @@ -438,7 +515,7 @@ pgroup.test_lt_condition_with_index = function(g)
local expected_len = 2

local result, err = g.cluster.main_server.net_box:call('crud.count',
{'customers', conditions}
{'customers', conditions, {fullscan = true}}
)

t.assert_equals(err, nil)
Expand Down Expand Up @@ -543,6 +620,7 @@ pgroup.test_opts_not_damaged = function(g)
mode = 'read',
prefer_replica = false,
balance = false,
fullscan = true
}
local new_count_opts, err = g.cluster.main_server:eval([[
local crud = require('crud')
Expand Down Expand Up @@ -586,7 +664,7 @@ pgroup.test_count_no_map_reduce = function(g)
local result, err = g.cluster.main_server.net_box:call('crud.count', {
'customers',
nil,
{bucket_id = 2804, timeout = 1},
{bucket_id = 2804, timeout = 1, fullscan = true},
})
t.assert_equals(err, nil)
t.assert_equals(result, 1)
Expand Down Expand Up @@ -647,7 +725,7 @@ pgroup.test_count_timeout = function(g)
local begin = clock.proc()

local result, err = g.cluster.main_server.net_box:call('crud.count',
{'customers', conditions, {timeout = timeout}}
{'customers', conditions, {timeout = timeout, fullscan = true}}
)

t.assert_equals(err, nil)
Expand Down Expand Up @@ -688,7 +766,7 @@ pgroup.test_composite_index = function(g)
}

-- no after
local result, err = g.cluster.main_server.net_box:call('crud.count', {'customers', conditions})
local result, err = g.cluster.main_server.net_box:call('crud.count', {'customers', conditions}, {fullscan = true})

t.assert_equals(err, nil)
t.assert_equals(result, 4)
Expand Down
3 changes: 2 additions & 1 deletion test/integration/read_calls_strategies_test.lua
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,8 @@ pgroup.test_count = function(g)
local _, err = g.cluster.main_server.net_box:call('crud.count', {'customers', nil, {
mode = g.params.mode,
balance = g.params.balance,
prefer_replica = g.params.prefer_replica
prefer_replica = g.params.prefer_replica,
fullscan = true
}})
t.assert_equals(err, nil)
local vshard_calls = g.get_vshard_calls('call_impl')
Expand Down
10 changes: 1 addition & 9 deletions test/integration/select_test.lua
Original file line number Diff line number Diff line change
Expand Up @@ -174,15 +174,7 @@ for name, case in pairs(select_safety_cases) do
local _, err = g.cluster.main_server.net_box:call('crud.select', {space, uc, opts})
t.assert_equals(err, nil)

-- We have a delay here. This hack helps to wait for the end of the output.
-- It shouldn't take much time.
g.cluster:server('router').net_box:eval([[
require('log').error("end of test_select_nil case")
]])
local captured = ""
while not string.find(captured, "end of test_select_nil case", 1, true) do
captured = captured .. (capture:flush().stdout or "")
end
local captured = helpers.fflush_cluster_stdout(g.cluster, capture)

if case.has_crit then
t.assert_str_contains(captured, crit_log)
Expand Down

0 comments on commit aed33cc

Please sign in to comment.