Skip to content

Commit

Permalink
Add caching of sharding function
Browse files Browse the repository at this point in the history
The ddl.bucket_id() function needs to know a sharding function.
It is costly to obtain the function declaration /
definition stored in the _ddl_sharding_func space.

Cache contains:
raw_tuple - raw sharding metadata, used for get_schema()
parsed_func_name - parsed dot notation (like {'foo', 'bar'})
callable - function ready to call, this offloads using of loadstring()
error - string with an error: not nil only if setting callable fails

Cache will be rebuilded if:
* _ddl_sharding_func space changed: cache sets _ddl_sharding_func:on_replace
  trigger
* schema changed: cache checks box.internal.schema_version changes

This patch does not serve hot reload techniques.
This entails an on_replace trigger duplication if hot reload occurs.
Hot reload support will be done in separate task:
#87

Benchmarks - 10000000 bucket_id() calls (file test/bench_cache.lua):
Baseline (no DDL):     3.38s user 0.01s system 99% cpu 3.389 total
After this patch:
DDL with function body: 3.81s user 0.01s system 99% cpu 3.818 total
DDL with function name: 5.49s user 0.00s system 99% cpu 5.495 total
Before patch:
DDL with function body: 55.95s user 0.40s system 99% cpu 56.354 total
DDL with function name: 13.68s user 0.13s system 99% cpu 13.807 total

Closes #82
  • Loading branch information
0x501D committed Jan 21, 2022
1 parent 4f0fbd1 commit adefb00
Show file tree
Hide file tree
Showing 7 changed files with 507 additions and 13 deletions.
17 changes: 17 additions & 0 deletions .github/workflows/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -66,3 +66,20 @@ jobs:
# Cleanup cached paths
- run: tarantoolctl rocks remove cartridge
- run: tarantoolctl rocks remove ddl
benchmark:
strategy:
fail-fast: false
matrix:
tarantool: ['1.10', '2.5', '2.6', '2.7']
coveralls: [false]
include:
- tarantool: '2.8'
coveralls: true
runs-on: [ubuntu-latest]
steps:
- uses: actions/checkout@v2
- uses: tarantool/setup-tarantool@v1
with:
tarantool-version: ${{ matrix.tarantool }}

- run: tarantool ./test/bench_cache.lua
5 changes: 5 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,11 @@ bucket identifier (number)
- Calculate bucket id for a specified space and sharding key.
Method uses sharding function specified in DDL schema.

Method is not transactional in the sense that it catches up
_ddl_sharding_func changes immediatelly: it may see changes that're
not committed yet and may see a state from another transaction,
which should not be visible in the current transaction.

Return values: bucket_id if no error, otherwise return `nil, err`

## Input data format
Expand Down
124 changes: 124 additions & 0 deletions ddl/cache.lua
Original file line number Diff line number Diff line change
@@ -0,0 +1,124 @@
local cache = nil

local SPACE_NAME_IDX = 1
local SHARD_FUNC_NAME_IDX = 2
local SHARD_FUNC_BODY_IDX = 3

-- Build cache.
--
-- We don't need to call this function with any type of locking:
-- _ddl_sharding_func is memtx space, so calling :pairs() on it
-- is atomic
--
-- Cache structure format:
--
-- cache = {
-- spaces = {
-- [space_name] = {
-- -- raw sharding metadata, used for ddl.get()
-- raw_tuple = <tuple object> (<nil> at error),
-- -- parsed dot notation (like {'foo', 'bar'})
-- parsed_func_name = <table> or <nil>
-- -- a function ready to call
-- callable = <function> or <nil>,
-- -- string with an error: not nil only if setting callable fails
-- error = <string> or <nil>,
-- }
-- },
-- -- current schema version
-- schema_version = <...>,
-- }

-- function returns nothing
local function cache_build()
-- clear cache
cache.spaces = {}

if box.space._ddl_sharding_func == nil then
return
end

for _, tuple in box.space._ddl_sharding_func:pairs() do
local space_name = tuple[SPACE_NAME_IDX]
local func_name = tuple[SHARD_FUNC_NAME_IDX]
local func_body = tuple[SHARD_FUNC_BODY_IDX]

cache.spaces[space_name] = {
raw_tuple = tuple
}

if func_body ~= nil then
local sharding_func, err = loadstring('return ' .. func_body)
if sharding_func == nil then
cache.spaces[space_name].error =
string.format("Body is incorrect in sharding_func for space (%s): %s",
space_name, err)
else
cache.spaces[space_name].callable = sharding_func()
end
elseif func_name ~= nil then
-- we cannot save the function itself into the cache,
-- because the function can be changed in runtime and
-- there is no way to catch this change
local chunks = string.split(func_name, '.')
cache.spaces[space_name].parsed_func_name = chunks
end
end

cache.schema_version = box.internal.schema_version()

end

-- Rebuild cache if _ddl_sharding_func space changed.
local function cache_set_trigger()
if box.space._ddl_sharding_func == nil then
return
end

local trigger_found = false

for _, func in pairs(box.space._ddl_sharding_func:on_replace()) do
if func == cache_build then
trigger_found = true
break
end
end

if not trigger_found then
box.space._ddl_sharding_func:on_replace(cache_build)
end
end

-- Get data from cache.
-- Returns all cached data for "space_name" or nil.
local function cache_get(space_name)
if space_name == nil then
return nil
end

-- using tarantool internal API.
-- this is not reliable, but it is the only way to track
-- schema_version changes. Fix it if a public method appears:
-- https://github.com/tarantool/tarantool/issues/6544
local schema_version = box.internal.schema_version()

if not cache then
cache = {}
cache_build()
cache_set_trigger()
end

-- rebuild cache if database schema changed
if schema_version ~= cache.schema_version then
cache_build()
cache_set_trigger()
end

return cache.spaces[space_name]
end

return {
internal = {
get = cache_get,
}
}
31 changes: 19 additions & 12 deletions ddl/get.lua
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
local utils = require('ddl.utils')
local cache = require('ddl.cache')
local ddl_check = require('ddl.check')

local function _get_index_field_path(space, index_part)
Expand Down Expand Up @@ -66,11 +67,18 @@ local function get_metadata(space_name, metadata_name)
end

local function get_sharding_func(space_name)
local record = get_metadata(space_name, "sharding_func")
if not record then
return cache.internal.get(space_name)
end

local function get_sharding_func_raw(space_name)
local record = cache.internal.get(space_name)

if not record or not record.raw_tuple then
return nil
end

record = record.raw_tuple

if record.sharding_func_body ~= nil then
return {body = record.sharding_func_body}
end
Expand All @@ -97,7 +105,7 @@ local function get_space_schema(space_name)
space_ddl.engine = box_space.engine
space_ddl.format = box_space:format()
space_ddl.sharding_key = get_sharding_key(space_name)
space_ddl.sharding_func = get_sharding_func(space_name)
space_ddl.sharding_func = get_sharding_func_raw(space_name)
for _, field in ipairs(space_ddl.format) do
if field.is_nullable == nil then
field.is_nullable = false
Expand All @@ -115,21 +123,20 @@ local function get_space_schema(space_name)
end

local function prepare_sharding_func_for_call(space_name, sharding_func_def)
if type(sharding_func_def) == 'string' then
local sharding_func = utils.get_G_function(sharding_func_def)
if sharding_func_def.error ~= nil then
return nil, sharding_func_def.error
end

if sharding_func_def.parsed_func_name ~= nil then
local sharding_func = utils.get_G_function(sharding_func_def.parsed_func_name)
if sharding_func ~= nil and
ddl_check.internal.is_callable(sharding_func) == true then
return sharding_func
end
end

if type(sharding_func_def) == 'table' then
local sharding_func, err = loadstring('return ' .. sharding_func_def.body)
if sharding_func == nil then
return nil, string.format(
"Body is incorrect in sharding_func for space (%s): %s", space_name, err)
end
return sharding_func()
if sharding_func_def.callable ~= nil then
return sharding_func_def.callable
end

return nil, string.format(
Expand Down
12 changes: 11 additions & 1 deletion ddl/utils.lua
Original file line number Diff line number Diff line change
Expand Up @@ -189,9 +189,19 @@ end
-- split sharding func name in dot notation by dot
-- foo.bar.baz -> chunks: foo bar baz
-- foo -> chunks: foo
--
-- func_name parameter may be a string in dot notation or table
-- if func_name type is of type table it is assumed that it is already split
local function get_G_function(func_name)
local chunks = string.split(func_name, '.')
local sharding_func = _G
local chunks

if type(func_name) == 'string' then
chunks = string.split(func_name, '.')
else
chunks = func_name
end

-- check is the each chunk an identifier
for _, chunk in pairs(chunks) do
if not check_name_isident(chunk) or sharding_func == nil then
Expand Down
Loading

0 comments on commit adefb00

Please sign in to comment.