Skip to content

Commit

Permalink
Test: Lua Hive Partition pager (#6691)
Browse files Browse the repository at this point in the history
  • Loading branch information
Isan-Rivkin authored Oct 9, 2023
1 parent 27fa1cc commit 1ab19fc
Show file tree
Hide file tree
Showing 3 changed files with 187 additions and 0 deletions.
5 changes: 5 additions & 0 deletions pkg/actions/lua_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -357,6 +357,11 @@ func TestLuaRunTable(t *testing.T) {
Input: "testdata/lua/strings_partition.lua",
Output: "testdata/lua/strings_partition.output",
},
{
Name: "catalogexport_hive_partition_pager",
Input: "testdata/lua/catalogexport_hive_partition_pager.lua",
Output: "testdata/lua/catalogexport_hive_partition_pager.output",
},
}

for _, testCase := range tests {
Expand Down
72 changes: 72 additions & 0 deletions pkg/actions/testdata/lua/catalogexport_hive_partition_pager.lua
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
local hive = require("lakefs/catalogexport/hive")

-- helper function to slice table array
local function slice_dense_array(tbl, first, last, step)
local sliced = {}

for i = first or 1, last or #tbl, step or 1 do
sliced[#sliced + 1] = tbl[i]
end

return sliced
end

-- lakefs mock package

local lakefs = {
list_objects = function(repo_id, commit_id, next_offset, prefix, delimiter, page_size)
local fs = {
[action.repository_id] = {
[action.commit_id] = {{
physical_address = "s3://bucket/a1/b1/b",
path = "letters/a=1/b=1/b.csv"
}, {
physical_address = "s3://bucket/a2/b2/a",
path = "letters/a=2/b=2/a.csv"
}, {
physical_address = "s3://bucket/a2/b2/b",
path = "letters/a=2/b=2/b.csv"
}, {
physical_address = "",
path = "letters/a=2/b=3/_SUCCESS"
}, {
physical_address = "s3://bucket/a2/b3/a",
path = "letters/a=2/b=3/a.csv"
}, {
physical_address = "s3://bucket/a3/b4/a",
path = "letters/a=3/b=4/a.csv"
}, {
physical_address = "s3://bucket/a3/b4/b",
path = "letters/a=3/b=4/b.csv"
}}
}
}
local all_entries = fs[repo_id][commit_id]
if next_offset == "" then
next_offset = 1
end
local end_idx = next_offset + page_size
return 200, {
results = slice_dense_array(all_entries, next_offset, end_idx),
pagination = {
has_more = end_idx < #all_entries,
next_offset = end_idx + 1
}
}
end
}

local partitions = {"a", "b"}
local prefix = "letters/"

for page_size = 1, 10 do
local pager = hive.extract_partition_pager(lakefs, action.repository_id, action.commit_id, prefix, partitions,
page_size)
print("result for page_size " .. tostring(page_size))
for part_key, entries in pager do
print("# partition: " .. part_key)
for _, entry in ipairs(entries) do
print("path: " .. entry.path .. " physical: " .. entry.physical_address)
end
end
end
110 changes: 110 additions & 0 deletions pkg/actions/testdata/lua/catalogexport_hive_partition_pager.output
Original file line number Diff line number Diff line change
@@ -0,0 +1,110 @@
result for page_size 1
# partition: letters/a=1/b=1/
path: letters/a=1/b=1/b.csv physical: s3://bucket/a1/b1/b
# partition: letters/a=2/b=2/
path: letters/a=2/b=2/a.csv physical: s3://bucket/a2/b2/a
path: letters/a=2/b=2/b.csv physical: s3://bucket/a2/b2/b
# partition: letters/a=2/b=3/
path: letters/a=2/b=3/a.csv physical: s3://bucket/a2/b3/a
# partition: letters/a=3/b=4/
path: letters/a=3/b=4/a.csv physical: s3://bucket/a3/b4/a
path: letters/a=3/b=4/b.csv physical: s3://bucket/a3/b4/b
result for page_size 2
# partition: letters/a=1/b=1/
path: letters/a=1/b=1/b.csv physical: s3://bucket/a1/b1/b
# partition: letters/a=2/b=2/
path: letters/a=2/b=2/a.csv physical: s3://bucket/a2/b2/a
path: letters/a=2/b=2/b.csv physical: s3://bucket/a2/b2/b
# partition: letters/a=2/b=3/
path: letters/a=2/b=3/a.csv physical: s3://bucket/a2/b3/a
# partition: letters/a=3/b=4/
path: letters/a=3/b=4/a.csv physical: s3://bucket/a3/b4/a
path: letters/a=3/b=4/b.csv physical: s3://bucket/a3/b4/b
result for page_size 3
# partition: letters/a=1/b=1/
path: letters/a=1/b=1/b.csv physical: s3://bucket/a1/b1/b
# partition: letters/a=2/b=2/
path: letters/a=2/b=2/a.csv physical: s3://bucket/a2/b2/a
path: letters/a=2/b=2/b.csv physical: s3://bucket/a2/b2/b
# partition: letters/a=2/b=3/
path: letters/a=2/b=3/a.csv physical: s3://bucket/a2/b3/a
# partition: letters/a=3/b=4/
path: letters/a=3/b=4/a.csv physical: s3://bucket/a3/b4/a
path: letters/a=3/b=4/b.csv physical: s3://bucket/a3/b4/b
result for page_size 4
# partition: letters/a=1/b=1/
path: letters/a=1/b=1/b.csv physical: s3://bucket/a1/b1/b
# partition: letters/a=2/b=2/
path: letters/a=2/b=2/a.csv physical: s3://bucket/a2/b2/a
path: letters/a=2/b=2/b.csv physical: s3://bucket/a2/b2/b
# partition: letters/a=2/b=3/
path: letters/a=2/b=3/a.csv physical: s3://bucket/a2/b3/a
# partition: letters/a=3/b=4/
path: letters/a=3/b=4/a.csv physical: s3://bucket/a3/b4/a
path: letters/a=3/b=4/b.csv physical: s3://bucket/a3/b4/b
result for page_size 5
# partition: letters/a=1/b=1/
path: letters/a=1/b=1/b.csv physical: s3://bucket/a1/b1/b
# partition: letters/a=2/b=2/
path: letters/a=2/b=2/a.csv physical: s3://bucket/a2/b2/a
path: letters/a=2/b=2/b.csv physical: s3://bucket/a2/b2/b
# partition: letters/a=2/b=3/
path: letters/a=2/b=3/a.csv physical: s3://bucket/a2/b3/a
# partition: letters/a=3/b=4/
path: letters/a=3/b=4/a.csv physical: s3://bucket/a3/b4/a
path: letters/a=3/b=4/b.csv physical: s3://bucket/a3/b4/b
result for page_size 6
# partition: letters/a=1/b=1/
path: letters/a=1/b=1/b.csv physical: s3://bucket/a1/b1/b
# partition: letters/a=2/b=2/
path: letters/a=2/b=2/a.csv physical: s3://bucket/a2/b2/a
path: letters/a=2/b=2/b.csv physical: s3://bucket/a2/b2/b
# partition: letters/a=2/b=3/
path: letters/a=2/b=3/a.csv physical: s3://bucket/a2/b3/a
# partition: letters/a=3/b=4/
path: letters/a=3/b=4/a.csv physical: s3://bucket/a3/b4/a
path: letters/a=3/b=4/b.csv physical: s3://bucket/a3/b4/b
result for page_size 7
# partition: letters/a=1/b=1/
path: letters/a=1/b=1/b.csv physical: s3://bucket/a1/b1/b
# partition: letters/a=2/b=2/
path: letters/a=2/b=2/a.csv physical: s3://bucket/a2/b2/a
path: letters/a=2/b=2/b.csv physical: s3://bucket/a2/b2/b
# partition: letters/a=2/b=3/
path: letters/a=2/b=3/a.csv physical: s3://bucket/a2/b3/a
# partition: letters/a=3/b=4/
path: letters/a=3/b=4/a.csv physical: s3://bucket/a3/b4/a
path: letters/a=3/b=4/b.csv physical: s3://bucket/a3/b4/b
result for page_size 8
# partition: letters/a=1/b=1/
path: letters/a=1/b=1/b.csv physical: s3://bucket/a1/b1/b
# partition: letters/a=2/b=2/
path: letters/a=2/b=2/a.csv physical: s3://bucket/a2/b2/a
path: letters/a=2/b=2/b.csv physical: s3://bucket/a2/b2/b
# partition: letters/a=2/b=3/
path: letters/a=2/b=3/a.csv physical: s3://bucket/a2/b3/a
# partition: letters/a=3/b=4/
path: letters/a=3/b=4/a.csv physical: s3://bucket/a3/b4/a
path: letters/a=3/b=4/b.csv physical: s3://bucket/a3/b4/b
result for page_size 9
# partition: letters/a=1/b=1/
path: letters/a=1/b=1/b.csv physical: s3://bucket/a1/b1/b
# partition: letters/a=2/b=2/
path: letters/a=2/b=2/a.csv physical: s3://bucket/a2/b2/a
path: letters/a=2/b=2/b.csv physical: s3://bucket/a2/b2/b
# partition: letters/a=2/b=3/
path: letters/a=2/b=3/a.csv physical: s3://bucket/a2/b3/a
# partition: letters/a=3/b=4/
path: letters/a=3/b=4/a.csv physical: s3://bucket/a3/b4/a
path: letters/a=3/b=4/b.csv physical: s3://bucket/a3/b4/b
result for page_size 10
# partition: letters/a=1/b=1/
path: letters/a=1/b=1/b.csv physical: s3://bucket/a1/b1/b
# partition: letters/a=2/b=2/
path: letters/a=2/b=2/a.csv physical: s3://bucket/a2/b2/a
path: letters/a=2/b=2/b.csv physical: s3://bucket/a2/b2/b
# partition: letters/a=2/b=3/
path: letters/a=2/b=3/a.csv physical: s3://bucket/a2/b3/a
# partition: letters/a=3/b=4/
path: letters/a=3/b=4/a.csv physical: s3://bucket/a3/b4/a
path: letters/a=3/b=4/b.csv physical: s3://bucket/a3/b4/b

0 comments on commit 1ab19fc

Please sign in to comment.