Skip to content

Commit

Permalink
linux: added support for eBPF
Browse files Browse the repository at this point in the history
eBPF is an interface added in Linux 3.19 for
executing sandboxed code directly in kernel for
* socket filtering
* kprobe tracing

doc: https://www.kernel.org/doc/Documentation/networking/filter.txt
introduction: https://lwn.net/Articles/603983/
  • Loading branch information
vavrusa committed Apr 2, 2016
1 parent e095295 commit aaa89cb
Show file tree
Hide file tree
Showing 10 changed files with 233 additions and 0 deletions.
1 change: 1 addition & 0 deletions ChangeLog
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
unreleased

+ Fix seccomp on arm64
+ Linux added support for eBPF

0.11 release

Expand Down
7 changes: 7 additions & 0 deletions syscall/linux/c.lua
Original file line number Diff line number Diff line change
Expand Up @@ -690,6 +690,13 @@ if sys.time then
function C.time(t) return syscall(sys.time, void(t)) end
end

-- bpf syscall that is only on Linux 3.19+
if sys.bpf then
function C.bpf(cmd, attr)
return syscall(sys.bpf, int(cmd), void(attr), u64(ffi.sizeof('union bpf_attr')))
end
end

-- socketcalls
if not sys.socketcall then
function C.socket(domain, tp, protocol) return syscall(sys.socket, int(domain), int(tp), int(protocol)) end
Expand Down
43 changes: 43 additions & 0 deletions syscall/linux/constants.lua
Original file line number Diff line number Diff line change
Expand Up @@ -2013,13 +2013,15 @@ c.BPF = multiflags {
ST = 0x02,
STX = 0x03,
ALU = 0x04,
ALU64 = 0x07,
JMP = 0x05,
RET = 0x06,
MISC = 0x07,
-- size
W = 0x00,
H = 0x08,
B = 0x10,
DW = 0x18,
-- mode
IMM = 0x00,
ABS = 0x20,
Expand All @@ -2036,12 +2038,23 @@ c.BPF = multiflags {
AND = 0x50,
LSH = 0x60,
RSH = 0x70,
ARSH = 0xc0,
NEG = 0x80,
MOD = 0x90,
XOR = 0xa0,
MOV = 0xb0,
XADD = 0xc0,
END = 0xd0,
JA = 0x00,
JEQ = 0x10,
JGT = 0x20,
JGE = 0x30,
JSET = 0x40,
JNE = 0x50,
JSGT = 0x60,
JSGE = 0x70,
CALL = 0x80,
EXIT = 0x90,
-- src
K = 0x00,
X = 0x08,
Expand All @@ -2050,6 +2063,36 @@ c.BPF = multiflags {
-- miscop
TAX = 0x00,
TXA = 0x80,
TO_LE = 0x00,
TO_BE = 0x08,
}

-- eBPF flags
c.BPF_MAP = {
UNSPEC = 0,
HASH = 1,
ARRAY = 2,
PROG_ARRAY = 3,
PERF_EVENT_ARRAY = 4,
}

c.BPF_CMD = {
MAP_CREATE = 0,
MAP_LOOKUP_ELEM = 1,
MAP_UPDATE_ELEM = 2,
MAP_DELETE_ELEM = 3,
MAP_GET_NEXT_KEY = 4,
PROG_LOAD = 5,
OBJ_PIN = 6,
OBJ_GET = 7,
}

c.BPF_PROG = {
UNSPEC = 0,
SOCKET_FILTER = 1,
KPROBE = 2,
SCHED_CLS = 3,
SCHED_ACT = 4,
}

-- termios - c_cc characters
Expand Down
38 changes: 38 additions & 0 deletions syscall/linux/ffi.lua
Original file line number Diff line number Diff line change
Expand Up @@ -498,10 +498,48 @@ struct sock_filter {
uint8_t jf;
uint32_t k;
};
struct bpf_insn {
uint8_t code; /* opcode */
uint8_t dst_reg:4; /* dest register */
uint8_t src_reg:4; /* source register */
uint16_t off; /* signed offset */
uint32_t imm; /* signed immediate constant */
};
struct sock_fprog {
unsigned short len;
struct sock_filter *filter;
};
union bpf_attr {
struct {
uint32_t map_type;
uint32_t key_size;
uint32_t value_size;
uint32_t max_entries;
};
struct {
uint32_t map_fd;
uint64_t key __attribute__((aligned(8)));
union {
uint64_t value __attribute__((aligned(8)));
uint64_t next_key __attribute__((aligned(8)));
};
uint64_t flags;
};
struct {
uint32_t prog_type;
uint32_t insn_cnt;
uint64_t insns __attribute__((aligned(8)));
uint64_t license __attribute__((aligned(8)));
uint32_t log_level;
uint32_t log_size;
uint64_t log_buf __attribute__((aligned(8)));
uint32_t kern_version;
};
struct {
uint64_t pathname __attribute__((aligned(8)));
uint32_t bpf_fd;
};
} __attribute__((aligned(8)));
struct mq_attr {
long mq_flags, mq_maxmsg, mq_msgsize, mq_curmsgs, __unused[4];
};
Expand Down
61 changes: 61 additions & 0 deletions syscall/linux/syscalls.lua
Original file line number Diff line number Diff line change
Expand Up @@ -774,6 +774,67 @@ function S.sysctl(name, new)
return old
end

-- BPF syscall has a complex semantics with one union serving for all purposes
-- The interface exports both raw syscall and helper functions based on libbpf
if C.bpf then
local function ptr_to_u64(p) return ffi.cast('uint64_t', ffi.cast('void *', p)) end
function S.bpf(cmd, attr)
return C.bpf(cmd, attr)
end
function S.bpf_prog_load(type, insns, len, license, version)
if not license then license = "GPL" end -- Must stay alive during the syscall
local bpf_log_buf = ffi.new('char [?]', 4096) -- Must stay alive during the syscall
if not version then
-- We have no better way to extract current kernel hex-string other
-- than parsing headers, compiling a helper function or reading /proc
local ver_str, count = S.sysctl('kernel.version'):match('%d.%d.%d'), 2
version = 0
for i in ver_str:gmatch('%d') do -- Convert 'X.Y.Z' to 0xXXYYZZ
version = bit.bor(version, bit.lshift(tonumber(i), 8*count))
count = count - 1
end
end
local attr = t.bpf_attr1()
attr[0].prog_type = type
attr[0].insns = ptr_to_u64(insns)
attr[0].insn_cnt = len
attr[0].license = ptr_to_u64(license)
attr[0].log_buf = ptr_to_u64(bpf_log_buf)
attr[0].log_size = 4096
attr[0].log_level = 1
attr[0].kern_version = version -- MUST match current kernel version
local fd = S.bpf(c.BPF_CMD.PROG_LOAD, attr)
if fd < 0 then
return nil, t.error(errno()), ffi.string(bpf_log_buf)
end
return fd
end
function S.bpf_map_create(type, key_size, value_size, max_entries)
local attr = t.bpf_attr1()
attr[0].map_type = type
attr[0].key_size = key_size
attr[0].value_size = value_size
attr[0].max_entries = max_entries
local fd = S.bpf(c.BPF_CMD.MAP_CREATE, attr)
if fd < 0 then
return nil, t.error(errno())
end
return fd
end
function S.bpf_map_op(op, fd, key, val_or_next, flags)
local attr = t.bpf_attr1()
attr[0].map_fd = fd
attr[0].key = ptr_to_u64(key)
attr[0].value = ptr_to_u64(val_or_next)
attr[0].flags = flags or 0
local ret = S.bpf(op, attr)
if ret ~= 0 then
return nil, t.error(errno())
end
return ret
end
end

return S

end
Expand Down
14 changes: 14 additions & 0 deletions syscall/linux/types.lua
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,7 @@ local addstructs = {
ff_rumble_effect = "struct ff_rumble_effect",
ff_effect = "struct ff_effect",
sock_fprog = "struct sock_fprog",
bpf_attr = "union bpf_attr",
user_cap_header = "struct user_cap_header",
user_cap_data = "struct user_cap_data",
xt_get_revision = "struct xt_get_revision",
Expand All @@ -139,6 +140,7 @@ pt.inotify_event = ptt("struct inotify_event") -- still need pointer to this

t.aio_context1 = ffi.typeof("aio_context_t[1]")
t.sock_fprog1 = ffi.typeof("struct sock_fprog[1]")
t.bpf_attr1 = ffi.typeof("union bpf_attr[1]")

t.user_cap_data2 = ffi.typeof("struct user_cap_data[2]")

Expand All @@ -147,6 +149,8 @@ local iocbs = ffi.typeof("struct iocb[?]")
t.iocbs = function(n, ...) return ffi.new(iocbs, n, ...) end
local sock_filters = ffi.typeof("struct sock_filter[?]")
t.sock_filters = function(n, ...) return ffi.new(sock_filters, n, ...) end
local bpf_insns = ffi.typeof("struct bpf_insn[?]")
t.bpf_insns = function(n, ...) return ffi.new(bpf_insns, n, ...) end
local iocb_ptrs = ffi.typeof("struct iocb *[?]")
t.iocb_ptrs = function(n, ...) return ffi.new(iocb_ptrs, n, ...) end

Expand Down Expand Up @@ -760,6 +764,14 @@ mt.sock_filter = {

addtype(types, "sock_filter", "struct sock_filter", mt.sock_filter)

mt.bpf_insn = {
__new = function(tp, code, dst_reg, src_reg, off, imm)
return ffi.new(tp, c.BPF[code], dst_reg or 0, src_reg or 0, off or 0, imm or 0)
end
}

addtype(types, "bpf_insn", "struct bpf_insn", mt.bpf_insn)

-- capabilities data is an array so cannot put metatable on it. Also depends on version, so combine into one structure.

-- TODO maybe add caching
Expand Down Expand Up @@ -1163,6 +1175,8 @@ mt.mmsghdrs = {

addtype_var(types, "mmsghdrs", "struct {int count; struct mmsghdr msg[?];}", mt.mmsghdrs)

addtype(types, "bpf_attr", "union bpf_attr")

-- this is declared above
samap_pt = {
[c.AF.UNIX] = pt.sockaddr_un,
Expand Down
21 changes: 21 additions & 0 deletions test/ctest-linux.lua
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,27 @@ ctypes["struct termios"] = nil
-- not defined by glibc
ctypes["struct k_sigaction"] = nil

-- eBPF not available on Travis / opaque types
ctypes["struct bpf_insn"] = nil
ctypes["union bpf_attr"] = nil
c.BPF_MAP = {}
c.BPF_CMD = {}
c.BPF_PROG = {}
c.BPF.ALU64 = nil
c.BPF.DW = nil
c.BPF.JSGT = nil
c.BPF.JSGE = nil
c.BPF.CALL = nil
c.BPF.EXIT = nil
c.BPF.TO_LE = nil
c.BPF.TO_BE = nil
c.BPF.END = nil
c.BPF.ARSH = nil
c.BPF.XADD = nil
c.BPF.JNE = nil
c.BPF.MOV = nil
c.SYS.bpf = nil

if abi.arch == "arm" then ctypes["struct statfs64"] = nil end -- padding difference, not that important

for k, v in pairs(c.IOCTL) do if type(v) == "table" then c.IOCTL[k] = v.number end end
Expand Down
18 changes: 18 additions & 0 deletions test/linux-constants.lua
Original file line number Diff line number Diff line change
Expand Up @@ -207,9 +207,27 @@ local function fixup_constants(abi, c)
c.SYS.getrandom = nil
c.SYS.memfd_create = nil
c.SYS.kexec_file_load = nil
c.SYS.bpf = nil

-- new constants
c.GRND = nil
-- requires Linux 3.19+, not supported on Travis
c.BPF_MAP = {}
c.BPF_CMD = {}
c.BPF_PROG = {}
c.BPF.ALU64 = nil
c.BPF.DW = nil
c.BPF.JSGT = nil
c.BPF.JSGE = nil
c.BPF.CALL = nil
c.BPF.EXIT = nil
c.BPF.TO_LE = nil
c.BPF.TO_BE = nil
c.BPF.END = nil
c.BPF.ARSH = nil
c.BPF.XADD = nil
c.BPF.JNE = nil
c.BPF.MOV = nil

return c
end
Expand Down
2 changes: 2 additions & 0 deletions test/linux-structures.lua
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,8 @@ local function fixup_structs(abi, ctypes)
ctypes["struct sockaddr_storage"] = nil -- uses __kernel_
ctypes["struct k_sigaction"] = nil -- seems to be incorrect in headers
ctypes["struct mmsghdr"] = nil -- too new for our headers
ctypes["union bpf_attr"] = nil -- too new for our headers
ctypes["struct bpf_insn"] = nil -- too new for our headers

ctypes["sigset_t"] = nil -- still some issues

Expand Down
28 changes: 28 additions & 0 deletions test/linux.lua
Original file line number Diff line number Diff line change
Expand Up @@ -1310,6 +1310,34 @@ test.bpf = {
end,
}

-- test eBPF filters
if S.bpf and not S.__rump then
test.bpf_root = {}
test.bpf_root.test_bpf_map_create = function()
local bpf = t.sock_filters(1, {
t.sock_filter("RET,K", 0)
})
-- Update
local key, klen = ffi.new('int [1]', 0xdead), ffi.sizeof('int')
local fd, err = assert(S.bpf_map_create(c.BPF_MAP.HASH, klen, klen, 10))
assert(S.bpf_map_op(c.BPF_CMD.MAP_UPDATE_ELEM, fd, key, key) == 0)
-- Retrieve
local val = ffi.new('int [1]', 0xbeef)
local ok, err = S.bpf_map_op(c.BPF_CMD.MAP_LOOKUP_ELEM, fd, key, val)
assert(ok and key[0] == val[0])
S.close(fd)
end
test.bpf_root.test_bpf_prog_load = function()
local bpf = t.bpf_insns(2, {
t.bpf_insn("ALU64,MOV,K", 0, 0, 0, 1),
t.bpf_insn("JMP,EXIT"),
})
local fd, err, log = S.bpf_prog_load(c.BPF_PROG.SOCKET_FILTER, bpf, 2)
if not fd then assert(false, err..': '..log) end
S.close(fd)
end
end

-- TODO remove arch tests. Unclear if my ppc/arm does not support or a bug, retest later with newer kernel
-- still ppc issues with 3.12.6 ppc, need to debug more, and mips issues
if not (abi.arch == "ppc64le" or abi.arch == "ppc" or abi.arch == "mips" or S.__rump) then -- cannot test on rump as uses clone()
Expand Down

0 comments on commit aaa89cb

Please sign in to comment.