Skip to content

Commit

Permalink
feat(aya): Add iterator program type
Browse files Browse the repository at this point in the history
BPF iterators[0] are a way to dump kernel data into user-space and an
alternative to `/proc` filesystem.

This change adds support for BPF iterators on the user-space side. It
provides a possibility to retrieve the outputs of BPF iterator programs
both from sync and async Rust code.

[0] https://docs.kernel.org/bpf/bpf_iterators.html
  • Loading branch information
vadorovsky committed Nov 24, 2024
1 parent c81f5e4 commit bf2164c
Show file tree
Hide file tree
Showing 12 changed files with 503 additions and 5 deletions.
5 changes: 5 additions & 0 deletions aya-obj/src/obj.rs
Original file line number Diff line number Diff line change
Expand Up @@ -288,6 +288,9 @@ pub enum ProgramSection {
attach_type: CgroupSockAttachType,
},
CgroupDevice,
Iter {
sleepable: bool,
},
}

impl FromStr for ProgramSection {
Expand Down Expand Up @@ -439,6 +442,8 @@ impl FromStr for ProgramSection {
"fexit.s" => FExit { sleepable: true },
"freplace" => Extension,
"sk_lookup" => SkLookup,
"iter" => Iter { sleepable: false },
"iter.s" => Iter { sleepable: true },
_ => {
return Err(ParseError::InvalidProgramSection {
section: section.to_owned(),
Expand Down
17 changes: 13 additions & 4 deletions aya/src/bpf.rs
Original file line number Diff line number Diff line change
Expand Up @@ -31,9 +31,9 @@ use crate::{
},
programs::{
BtfTracePoint, CgroupDevice, CgroupSkb, CgroupSkbAttachType, CgroupSock, CgroupSockAddr,
CgroupSockopt, CgroupSysctl, Extension, FEntry, FExit, KProbe, LircMode2, Lsm, PerfEvent,
ProbeKind, Program, ProgramData, ProgramError, RawTracePoint, SchedClassifier, SkLookup,
SkMsg, SkSkb, SkSkbKind, SockOps, SocketFilter, TracePoint, UProbe, Xdp,
CgroupSockopt, CgroupSysctl, Extension, FEntry, FExit, Iter, KProbe, LircMode2, Lsm,
PerfEvent, ProbeKind, Program, ProgramData, ProgramError, RawTracePoint, SchedClassifier,
SkLookup, SkMsg, SkSkb, SkSkbKind, SockOps, SocketFilter, TracePoint, UProbe, Xdp,
},
sys::{
bpf_load_btf, is_bpf_cookie_supported, is_bpf_global_data_supported,
Expand Down Expand Up @@ -410,7 +410,8 @@ impl<'a> EbpfLoader<'a> {
| ProgramSection::FEntry { sleepable: _ }
| ProgramSection::FExit { sleepable: _ }
| ProgramSection::Lsm { sleepable: _ }
| ProgramSection::BtfTracePoint => {
| ProgramSection::BtfTracePoint
| ProgramSection::Iter { sleepable: _ } => {
return Err(EbpfError::BtfError(err))
}
ProgramSection::KRetProbe
Expand Down Expand Up @@ -688,6 +689,14 @@ impl<'a> EbpfLoader<'a> {
ProgramSection::CgroupDevice => Program::CgroupDevice(CgroupDevice {
data: ProgramData::new(prog_name, obj, btf_fd, *verifier_log_level),
}),
ProgramSection::Iter { sleepable } => {
let mut data =
ProgramData::new(prog_name, obj, btf_fd, *verifier_log_level);
if *sleepable {
data.flags = BPF_F_SLEEPABLE;
}
Program::Iter(Iter { data })
}
}
};
(name, program)
Expand Down
164 changes: 164 additions & 0 deletions aya/src/programs/iter.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,164 @@
//! Iterators.
use std::{
fs::File,
os::fd::{AsFd, BorrowedFd},
};

use crate::{
generated::{
bpf_attach_type::BPF_TRACE_ITER, bpf_link_type::BPF_LINK_TYPE_ITER,
bpf_prog_type::BPF_PROG_TYPE_TRACING,
},
obj::btf::{Btf, BtfKind},
programs::{
define_link_wrapper, load_program, FdLink, LinkError, PerfLinkIdInner, PerfLinkInner,
ProgramData, ProgramError,
},
sys::{bpf_create_iter, bpf_link_create, bpf_link_get_info_by_fd, LinkTarget, SyscallError},
};

/// A BPF iterator which allows to dump data from the kernel-space into the
/// user-space.
///
/// It can be seen as an alternative to `/proc` filesystem as it offers more
/// flexibility about what information should be retrieved and how it should be
/// formatted.
///
/// # Minimum kernel version
///
/// The minimum kernel version required to use this feature is 5.8.
///
/// # Example
///
/// ```no_run
/// use std::io::{BufRead, BufReader};
/// use aya::{programs::{Iter, ProgramError}, BtfError, Btf, Ebpf};
/// # let mut ebpf = Ebpf::load_file("ebpf_programs.o")?;
///
/// let btf = Btf::from_sys_fs()?;
/// let program: &mut Iter = ebpf.program_mut("iter_prog").unwrap().try_into()?;
/// program.load("task", &btf)?;
///
/// let link_id = program.attach()?;
/// let link = program.take_link(link_id)?;
/// let file = link.into_file()?;
/// let reader = BufReader::new(file);
///
/// let mut lines = reader.lines();
/// for line in lines {
/// let line = line?;
/// println!("{line}");
/// }
/// # Ok::<(), Box<dyn std::error::Error>>(())
/// ```
#[derive(Debug)]
pub struct Iter {
pub(crate) data: ProgramData<IterLink>,
}

impl Iter {
/// Loads the program inside the kernel.
pub fn load(&mut self, iter_type: &str, btf: &Btf) -> Result<(), ProgramError> {
self.data.expected_attach_type = Some(BPF_TRACE_ITER);
let type_name = format!("bpf_iter_{iter_type}");
self.data.attach_btf_id =
Some(btf.id_by_type_name_kind(type_name.as_str(), BtfKind::Func)?);
load_program(BPF_PROG_TYPE_TRACING, &mut self.data)
}

/// Attaches the program.
///
/// The returned value can be used to detach, see [`Self::detach`].
pub fn attach(&mut self) -> Result<IterLinkId, ProgramError> {
let prog_fd = self.fd()?;
let prog_fd = prog_fd.as_fd();
let link_fd = bpf_link_create(prog_fd, LinkTarget::Iter, BPF_TRACE_ITER, None, 0, None)
.map_err(|(_, io_error)| SyscallError {
call: "bpf_link_create",
io_error,
})?;

self.data
.links
.insert(IterLink::new(PerfLinkInner::FdLink(FdLink::new(link_fd))))
}

/// Detaches the program.
///
/// See [`Self::attach`].
pub fn detach(&mut self, link_id: IterLinkId) -> Result<(), ProgramError> {
self.data.links.remove(link_id)
}

/// Takes ownership of the link referenced by the provided `link_id`.
///
/// The caller takes the responsibility of managing the lifetime of the
/// link. When the returned [`IterLink`] is dropped, the link is detached.
pub fn take_link(&mut self, link_id: IterLinkId) -> Result<IterLink, ProgramError> {
self.data.take_link(link_id)
}
}

/// An iterator descriptor.
#[derive(Debug)]
pub struct IterFd {
fd: crate::MockableFd,
}

impl AsFd for IterFd {
fn as_fd(&self) -> BorrowedFd<'_> {
let Self { fd } = self;
fd.as_fd()
}
}

impl TryFrom<IterLink> for FdLink {
type Error = LinkError;

fn try_from(value: IterLink) -> Result<Self, Self::Error> {
if let PerfLinkInner::FdLink(fd) = value.into_inner() {
Ok(fd)
} else {
Err(LinkError::InvalidLink)
}
}
}

impl TryFrom<FdLink> for IterLink {
type Error = LinkError;

fn try_from(fd_link: FdLink) -> Result<Self, Self::Error> {
let info = bpf_link_get_info_by_fd(fd_link.fd.as_fd())?;
if info.type_ == (BPF_LINK_TYPE_ITER as u32) {
return Ok(Self::new(PerfLinkInner::FdLink(fd_link)));
}
Err(LinkError::InvalidLink)
}
}

define_link_wrapper!(
/// The link used by [`Iter`] programs.
IterLink,
/// The type returned by [`Iter::attach`]. Can be passed to [`Iter::detach`].
IterLinkId,
PerfLinkInner,
PerfLinkIdInner
);

impl IterLink {
/// Converts [`IterLink`] into a [`File`] that can be used to retrieve the
/// outputs of the iterator program.
pub fn into_file(self) -> Result<File, LinkError> {
if let PerfLinkInner::FdLink(fd) = self.into_inner() {
let fd = bpf_create_iter(fd.fd.as_fd()).map_err(|(_, error)| {
LinkError::SyscallError(SyscallError {
call: "bpf_iter_create",
io_error: error,
})
})?;
Ok(fd.into_inner().into())
} else {
Err(LinkError::InvalidLink)
}
}
}
27 changes: 26 additions & 1 deletion aya/src/programs/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ pub mod cgroup_sysctl;
pub mod extension;
pub mod fentry;
pub mod fexit;
pub mod iter;
pub mod kprobe;
pub mod links;
pub mod lirc_mode2;
Expand Down Expand Up @@ -94,6 +95,7 @@ pub use crate::programs::{
extension::{Extension, ExtensionError},
fentry::FEntry,
fexit::FExit,
iter::Iter,
kprobe::{KProbe, KProbeError},
links::{CgroupAttachMode, Link, LinkOrder},
lirc_mode2::LircMode2,
Expand Down Expand Up @@ -303,6 +305,8 @@ pub enum Program {
CgroupSock(CgroupSock),
/// A [`CgroupDevice`] program
CgroupDevice(CgroupDevice),
/// An [`Iter`] program
Iter(Iter),
}

impl Program {
Expand All @@ -324,7 +328,18 @@ impl Program {
Self::PerfEvent(_) => ProgramType::PerfEvent,
Self::RawTracePoint(_) => ProgramType::RawTracePoint,
Self::Lsm(_) => ProgramType::Lsm,
Self::BtfTracePoint(_) | Self::FEntry(_) | Self::FExit(_) => ProgramType::Tracing,
// The following program types are a subset of `TRACING` programs:
//
// - `BPF_TRACE_RAW_TP` (`BtfTracePoint`)
// - `BTF_TRACE_FENTRY` (`FEntry`)
// - `BPF_MODIFY_RETURN` (not supported yet in Aya)
// - `BPF_TRACE_FEXIT` (`FExit`)
// - `BPF_TRACE_ITER` (`Iter`)
//
// https://github.com/torvalds/linux/blob/v6.12/kernel/bpf/syscall.c#L3935-L3940
Self::BtfTracePoint(_) | Self::FEntry(_) | Self::FExit(_) | Self::Iter(_) => {
ProgramType::Tracing
}
Self::Extension(_) => ProgramType::Extension,
Self::CgroupSockAddr(_) => ProgramType::CgroupSockAddr,
Self::SkLookup(_) => ProgramType::SkLookup,
Expand Down Expand Up @@ -360,6 +375,7 @@ impl Program {
Self::SkLookup(p) => p.pin(path),
Self::CgroupSock(p) => p.pin(path),
Self::CgroupDevice(p) => p.pin(path),
Self::Iter(p) => p.pin(path),
}
}

Expand Down Expand Up @@ -390,6 +406,7 @@ impl Program {
Self::SkLookup(mut p) => p.unload(),
Self::CgroupSock(mut p) => p.unload(),
Self::CgroupDevice(mut p) => p.unload(),
Self::Iter(mut p) => p.unload(),
}
}

Expand Down Expand Up @@ -422,6 +439,7 @@ impl Program {
Self::SkLookup(p) => p.fd(),
Self::CgroupSock(p) => p.fd(),
Self::CgroupDevice(p) => p.fd(),
Self::Iter(p) => p.fd(),
}
}

Expand Down Expand Up @@ -455,6 +473,7 @@ impl Program {
Self::SkLookup(p) => p.info(),
Self::CgroupSock(p) => p.info(),
Self::CgroupDevice(p) => p.info(),
Self::Iter(p) => p.info(),
}
}
}
Expand Down Expand Up @@ -771,6 +790,7 @@ impl_program_unload!(
SockOps,
CgroupSock,
CgroupDevice,
Iter,
);

macro_rules! impl_fd {
Expand Down Expand Up @@ -811,6 +831,7 @@ impl_fd!(
SockOps,
CgroupSock,
CgroupDevice,
Iter,
);

/// Trait implemented by the [`Program`] types which support the kernel's
Expand Down Expand Up @@ -916,6 +937,7 @@ impl_program_pin!(
SockOps,
CgroupSock,
CgroupDevice,
Iter,
);

macro_rules! impl_from_pin {
Expand Down Expand Up @@ -954,6 +976,7 @@ impl_from_pin!(
SkLookup,
SockOps,
CgroupDevice,
Iter,
);

macro_rules! impl_try_from_program {
Expand Down Expand Up @@ -1009,6 +1032,7 @@ impl_try_from_program!(
SkLookup,
CgroupSock,
CgroupDevice,
Iter,
);

impl_info!(
Expand Down Expand Up @@ -1036,6 +1060,7 @@ impl_info!(
SockOps,
CgroupSock,
CgroupDevice,
Iter,
);

// TODO(https://github.com/aya-rs/aya/issues/645): this API is currently used in tests. Stabilize
Expand Down
16 changes: 16 additions & 0 deletions aya/src/sys/bpf.rs
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,16 @@ use crate::{
Btf, Pod, VerifierLogLevel, BPF_OBJ_NAME_LEN, FEATURES,
};

pub(crate) fn bpf_create_iter(link_fd: BorrowedFd<'_>) -> SysResult<crate::MockableFd> {
let mut attr = unsafe { mem::zeroed::<bpf_attr>() };

let u = unsafe { &mut attr.iter_create };
u.link_fd = link_fd.as_raw_fd() as u32;

// SAFETY: BPF_ITER_CREATE returns a new file descriptor.
unsafe { fd_sys_bpf(bpf_cmd::BPF_ITER_CREATE, &mut attr) }
}

pub(crate) fn bpf_create_map(
name: &CStr,
def: &obj::Map,
Expand Down Expand Up @@ -377,6 +387,7 @@ pub(crate) fn bpf_map_freeze(fd: BorrowedFd<'_>) -> SysResult<i64> {
pub(crate) enum LinkTarget<'f> {
Fd(BorrowedFd<'f>),
IfIndex(u32),
Iter,
}

// since kernel 5.7
Expand All @@ -399,6 +410,11 @@ pub(crate) fn bpf_link_create(
LinkTarget::IfIndex(ifindex) => {
attr.link_create.__bindgen_anon_2.target_ifindex = ifindex;
}
// When attaching to an iterator program, no target FD is needed. In
// fact, the kernel explicitly rejects non-zero target FDs for
// iterators:
// https://github.com/torvalds/linux/blob/v6.12/kernel/bpf/bpf_iter.c#L517-L518
LinkTarget::Iter => {}
};
attr.link_create.attach_type = attach_type as u32;

Expand Down
Loading

0 comments on commit bf2164c

Please sign in to comment.