-
Notifications
You must be signed in to change notification settings - Fork 5
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
The ddl.bucket_id() function needs to know a sharding function. It is costly to obtain the function declaration / definition stored in the _ddl_sharding_func space. This cache adds sharding function cache divided into two parts: raw and processed. Raw part is used for get_schema() method. Raw cache stored as is. Processed part is used for bucket_id(). Processed sharding_func cache entry may be: * table with parsed dot notation (like {'foo', 'bar'}) * function ready to call, this offloads using of loadstring() * string with an error Cache will be rebuilded if: * _ddl_sharding_func space changed: cache sets _ddl_sharding_func:on_replace trigger * schema changed: cache checks box.internal.schema_version changes This patch does not serve hot reload techniques. This entails an on_replace trigger duplication if hot reload occurs. Hot reload support will be done in separate task: #87 Benchmarks - 10000000 bucket_id() calls (file test/bench_cache.lua): Baseline (no DDL): 3.38s user 0.01s system 99% cpu 3.389 total After this patch: DDL with function body: 3.81s user 0.01s system 99% cpu 3.818 total DDL with function name: 5.49s user 0.00s system 99% cpu 5.495 total Before patch: DDL with function body: 55.95s user 0.40s system 99% cpu 56.354 total DDL with function name: 13.68s user 0.13s system 99% cpu 13.807 total Closes #82
- Loading branch information
Showing
5 changed files
with
493 additions
and
13 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,120 @@ | ||
local cache = nil | ||
|
||
local SPACE_NAME_IDX = 1 | ||
local SHARD_FUNC_NAME_IDX = 2 | ||
local SHARD_FUNC_BODY_IDX = 3 | ||
|
||
-- Build cache. | ||
-- | ||
-- Cache structure format: | ||
-- | ||
-- cache = { | ||
-- spaces = { | ||
-- [space_name] = { | ||
-- -- raw sharding metadata, used for ddl.get() | ||
-- raw_tuple = <tuple object> (<nil> at error), | ||
-- -- parsed dot notation (like {'foo', 'bar'}) | ||
-- parsed_func_name = <table> or <nil> | ||
-- -- a function ready to call | ||
-- callable = <function> or <nil>, | ||
-- -- string with an error: not nil only if setting callable fails | ||
-- error = <string> or <nil>, | ||
-- } | ||
-- }, | ||
-- -- current schema version | ||
-- schema_version = <...>, | ||
-- } | ||
|
||
-- function returns nothing | ||
local function cache_build() | ||
-- clear cache | ||
cache.spaces = {} | ||
|
||
if box.space._ddl_sharding_func == nil then | ||
return | ||
end | ||
|
||
for _, tuple in box.space._ddl_sharding_func:pairs() do | ||
local space_name = tuple[SPACE_NAME_IDX] | ||
local func_name = tuple[SHARD_FUNC_NAME_IDX] | ||
local func_body = tuple[SHARD_FUNC_BODY_IDX] | ||
|
||
cache.spaces[space_name] = { | ||
raw_tuple = tuple | ||
} | ||
|
||
if func_body ~= nil then | ||
local sharding_func, err = loadstring('return ' .. func_body) | ||
if sharding_func == nil then | ||
cache.spaces[space_name].error = | ||
string.format("Body is incorrect in sharding_func for space (%s): %s", | ||
space_name, err) | ||
else | ||
cache.spaces[space_name].callable = sharding_func() | ||
end | ||
elseif func_name ~= nil then | ||
-- we cannot save the function itself into the cache, | ||
-- because the function can be changed in runtime and | ||
-- there is no way to catch this change | ||
local chunks = string.split(func_name, '.') | ||
cache.spaces[space_name].parsed_func_name = chunks | ||
end | ||
end | ||
|
||
cache.schema_version = box.internal.schema_version() | ||
|
||
end | ||
|
||
-- Rebuild cache if _ddl_sharding_func space changed. | ||
local function cache_set_trigger() | ||
if box.space._ddl_sharding_func == nil then | ||
return | ||
end | ||
|
||
local trigger_found = false | ||
|
||
for _, func in pairs(box.space._ddl_sharding_func:on_replace()) do | ||
if func == cache_build then | ||
trigger_found = true | ||
break | ||
end | ||
end | ||
|
||
if not trigger_found then | ||
box.space._ddl_sharding_func:on_replace(cache_build) | ||
end | ||
end | ||
|
||
-- Get data from cache. | ||
-- Returns all cached data for "space_name" or nil. | ||
local function cache_get(space_name) | ||
if space_name == nil then | ||
return nil | ||
end | ||
|
||
-- using tarantool internal API. | ||
-- this is not reliable, but it is the only way to track | ||
-- schema_version changes. Fix it if a public method appears: | ||
-- https://github.com/tarantool/tarantool/issues/6544 | ||
local schema_version = box.internal.schema_version() | ||
|
||
if not cache then | ||
cache = {} | ||
box.atomic(cache_build) | ||
cache_set_trigger() | ||
end | ||
|
||
-- rebuild cache if database schema changed | ||
if schema_version ~= cache.schema_version then | ||
box.atomic(cache_build) | ||
cache_set_trigger() | ||
end | ||
|
||
return cache.spaces[space_name] | ||
end | ||
|
||
return { | ||
internal = { | ||
get = cache_get, | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,180 @@ | ||
#!/usr/bin/env tarantool | ||
|
||
local db = require('test.db') | ||
local ddl = require('ddl') | ||
local helper = require('test.helper') | ||
|
||
local ITERS = 10000000 | ||
|
||
local function mpcrc32(shard_key) | ||
local digest = require('digest') | ||
if type(shard_key) ~= 'table' then | ||
return digest.crc32(tostring(shard_key)) | ||
else | ||
local crc32 = digest.crc32.new() | ||
for _, v in ipairs(shard_key) do | ||
crc32:update(tostring(v)) | ||
end | ||
return crc32:result() | ||
end | ||
end | ||
|
||
local sharding_func_body = [[ | ||
function(shard_key) | ||
local digest = require('digest') | ||
if type(shard_key) ~= 'table' then | ||
return digest.crc32(tostring(shard_key)) | ||
else | ||
local crc32 = digest.crc32.new() | ||
for _, v in ipairs(shard_key) do | ||
crc32:update(tostring(v)) | ||
end | ||
return crc32:result() | ||
end | ||
end | ||
]] | ||
|
||
local primary_index = { | ||
type = 'HASH', | ||
unique = true, | ||
parts = { | ||
{path = 'string_nonnull', is_nullable = false, type = 'string'}, | ||
{path = 'unsigned_nonnull', is_nullable = false, type = 'unsigned'}, | ||
}, | ||
name = 'primary' | ||
} | ||
|
||
local bucket_id_idx = { | ||
type = 'TREE', | ||
unique = false, | ||
parts = {{path = 'bucket_id', type = 'unsigned', is_nullable = false}}, | ||
name = 'bucket_id' | ||
} | ||
|
||
local function space_init() | ||
db.drop_all() | ||
|
||
local space = { | ||
engine = 'memtx', | ||
is_local = true, | ||
temporary = false, | ||
format = table.deepcopy(helper.test_space_format()) | ||
} | ||
table.insert(space.format, 1, { | ||
name = 'bucket_id', type = 'unsigned', is_nullable = false | ||
}) | ||
|
||
space.indexes = { | ||
table.deepcopy(primary_index), | ||
table.deepcopy(bucket_id_idx) | ||
} | ||
space.sharding_key = {'unsigned_nonnull', 'integer_nonnull'} | ||
local schema = { | ||
spaces = { | ||
space = space, | ||
} | ||
} | ||
|
||
return schema | ||
end | ||
|
||
local function run_body() | ||
db.init() | ||
local schema = space_init() | ||
schema.spaces.space.sharding_func = { | ||
body = sharding_func_body | ||
} | ||
|
||
local _, err = ddl.set_schema(schema) | ||
if err then | ||
print(err) | ||
os.exit() | ||
end | ||
|
||
for i=1,ITERS do | ||
local _, err = ddl.bucket_id('space', i) | ||
if err then | ||
print(err) | ||
os.exit() | ||
end | ||
end | ||
|
||
print("Done") | ||
os.exit() | ||
end | ||
|
||
local function run_baseline() | ||
db.init() | ||
local schema = space_init() | ||
schema.spaces.space.sharding_func = { | ||
body = sharding_func_body | ||
} | ||
|
||
local _, err = ddl.set_schema(schema) | ||
if err then | ||
print(err) | ||
os.exit() | ||
end | ||
|
||
for i=1,ITERS do | ||
mpcrc32(i) | ||
if err then | ||
print(err) | ||
os.exit() | ||
end | ||
end | ||
|
||
print("Done") | ||
os.exit() | ||
end | ||
|
||
local function run_name() | ||
db.init() | ||
local schema = space_init() | ||
local sharding_func_name = 'mpcrc32' | ||
rawset(_G, sharding_func_name, mpcrc32) | ||
schema.spaces.space.sharding_func = sharding_func_name | ||
|
||
local _, err = ddl.set_schema(schema) | ||
if err then | ||
print(err) | ||
os.exit() | ||
end | ||
|
||
for i=1,ITERS do | ||
local _, err = ddl.bucket_id('space', i) | ||
if err then | ||
print(err) | ||
os.exit() | ||
end | ||
end | ||
|
||
print("Done") | ||
os.exit() | ||
end | ||
|
||
local function main() | ||
local getopt = require('posix.unistd').getopt | ||
|
||
if #arg == 0 then | ||
print("Usage:", arg[0], "\n -l - baseline benchmark", | ||
"\n -n - function name caching benchmark", | ||
"\n -b - function body caching benchmark") | ||
os.exit() | ||
end | ||
|
||
for opt, _, optind in getopt(arg, 'lnb') do | ||
if opt == '?' then | ||
return print('unrecognized option', arg[optind -1]) | ||
end | ||
if opt == 'l' then | ||
run_baseline() | ||
elseif opt == 'n' then | ||
run_name() | ||
elseif opt == 'b' then | ||
run_body() | ||
end | ||
end | ||
end | ||
|
||
main() |
Oops, something went wrong.