diff --git a/src/elf_parser/consts.rs b/src/elf_parser/consts.rs new file mode 100644 index 000000000..cc393c483 --- /dev/null +++ b/src/elf_parser/consts.rs @@ -0,0 +1,66 @@ +#![allow(dead_code)] + +use super::types::*; + +pub const ELFMAG: [u8; 4] = [0x7F, 0x45, 0x4C, 0x46]; + +pub const ELFCLASSNONE: u8 = 0; +pub const ELFCLASS32: u8 = 1; +pub const ELFCLASS64: u8 = 2; + +pub const ELFDATANONE: u8 = 0; +pub const ELFDATA2LSB: u8 = 1; +pub const ELFDATA2MSB: u8 = 2; + +pub const ET_NONE: Elf64Half = 0; +pub const ET_REL: Elf64Half = 1; +pub const ET_EXEC: Elf64Half = 2; +pub const ET_DYN: Elf64Half = 3; +pub const ET_CORE: Elf64Half = 4; + +pub const EV_NONE: Elf64Word = 0; +pub const EV_CURRENT: Elf64Word = 1; + +pub const PT_NULL: Elf64Word = 0; +pub const PT_LOAD: Elf64Word = 1; +pub const PT_DYNAMIC: Elf64Word = 2; +pub const PT_INTERP: Elf64Word = 3; +pub const PT_NOTE: Elf64Word = 4; +pub const PT_SHLIB: Elf64Word = 5; +pub const PT_PHDR: Elf64Word = 6; +pub const PT_TLS: Elf64Word = 7; +pub const PT_GNU_EH_FRAME: Elf64Word = 0x6474E550; +pub const PT_GNU_STACK: Elf64Word = 0x6474E551; + +pub const PF_X: Elf64Word = 0x1; +pub const PF_W: Elf64Word = 0x2; +pub const PF_R: Elf64Word = 0x4; + +pub const SHT_NULL: Elf64Word = 0; +pub const SHT_PROGBITS: Elf64Word = 1; +pub const SHT_SYMTAB: Elf64Word = 2; +pub const SHT_STRTAB: Elf64Word = 3; +pub const SHT_RELA: Elf64Word = 4; +pub const SHT_HASH: Elf64Word = 5; +pub const SHT_DYNAMIC: Elf64Word = 6; +pub const SHT_NOTE: Elf64Word = 7; +pub const SHT_NOBITS: Elf64Word = 8; +pub const SHT_REL: Elf64Word = 9; +pub const SHT_SHLIB: Elf64Word = 10; +pub const SHT_DYNSYM: Elf64Word = 11; +pub const SHT_INIT_ARRAY: Elf64Word = 14; +pub const SHT_FINI_ARRAY: Elf64Word = 15; +pub const SHT_PREINIT_ARRAY: Elf64Word = 16; +pub const SHT_GROUP: Elf64Word = 17; +pub const SHT_SYMTAB_SHNDX: Elf64Word = 18; + +pub const SHF_WRITE: Elf64Xword = 0x1; +pub const SHF_ALLOC: Elf64Xword = 0x2; +pub const SHF_EXECINSTR: Elf64Xword = 0x4; +pub const SHF_MERGE: Elf64Xword = 0x10; +pub const SHF_STRINGS: Elf64Xword = 0x20; +pub const SHF_INFO_LINK: Elf64Xword = 0x40; +pub const SHF_LINK_ORDER: Elf64Xword = 0x80; +pub const SHF_OS_NONCONFORMING: Elf64Xword = 0x100; +pub const SHF_GROUP: Elf64Xword = 0x200; +pub const SHF_TLS: Elf64Xword = 0x400; diff --git a/src/elf_parser/mod.rs b/src/elf_parser/mod.rs new file mode 100644 index 000000000..1ac837d8b --- /dev/null +++ b/src/elf_parser/mod.rs @@ -0,0 +1,350 @@ +//! Dependency-less 64 bit ELF parser + +mod consts; +pub mod types; + +use std::convert::TryInto; +use {crate::ebpf, consts::*, types::*}; + +const EXPECTED_PROGRAM_HEADERS: [(u32, u32, u64); 3] = [ + (PT_LOAD, PF_R | PF_X, ebpf::MM_PROGRAM_START), + (PT_GNU_STACK, PF_R | PF_W, ebpf::MM_STACK_START), + (PT_NULL, PF_R | PF_W, ebpf::MM_HEAP_START), +]; +const SECTION_COUNT_MAXIMUM: usize = 16; +const SECTION_NAME_LENGTH_MAXIMUM: usize = 16; +const SYMBOL_NAME_LENGTH_MAXIMUM: usize = 64; + +/// Error definitions +#[derive(Debug, PartialEq, Eq)] +pub enum ElfParserError { + /// ELF file header is inconsistent or unsupported + InvalidFileHeader, + /// Program header is inconsistent or unsupported + InvalidProgramHeader, + /// Section header is inconsistent or unsupported + InvalidSectionHeader, + /// Section or symbol name is not UTF8 or too long + InvalidString, + /// An index or memory range does exeed its boundaries + OutOfBounds, + /// Headers, tables or sections do overlap in the file + Overlap, + /// Sections are not sorted in ascending order + SectionNotInOrder, +} + +fn check_that_there_is_no_overlap( + range_a: &std::ops::Range, + range_b: &std::ops::Range, +) -> Result<(), ElfParserError> { + if range_a.end <= range_b.start || range_b.end <= range_a.start { + Ok(()) + } else { + Err(ElfParserError::Overlap) + } +} + +/// The parsed structure of an ELF file +pub struct Elf64<'a> { + elf_bytes: &'a [u8], + file_header: &'a Elf64Ehdr, + program_header_table: &'a [Elf64Phdr], + section_header_table: &'a [Elf64Shdr], + text_section_header: Option<&'a Elf64Shdr>, + readonly_data_section_header: Option<&'a Elf64Shdr>, + symbol_section_header: Option<&'a Elf64Shdr>, + symbol_names_section_header: Option<&'a Elf64Shdr>, + section_names_section_header: Option<&'a Elf64Shdr>, +} + +impl<'a> Elf64<'a> { + /// Parse from the given byte slice + pub fn from(elf_bytes: &'a [u8]) -> Result { + let file_header_range = 0..std::mem::size_of::(); + let file_header_bytes = elf_bytes + .get(file_header_range.clone()) + .and_then(|slice| slice.try_into().ok()) + .ok_or(ElfParserError::OutOfBounds)?; + let file_header = unsafe { + std::mem::transmute::<&[u8; std::mem::size_of::()], &Elf64Ehdr>( + file_header_bytes, + ) + }; + if file_header.e_ident.ei_mag != ELFMAG + || file_header.e_ident.ei_class != ELFCLASS64 + || file_header.e_ident.ei_data != ELFDATA2LSB + || file_header.e_ident.ei_version != EV_CURRENT as u8 + || file_header.e_version != EV_CURRENT + || file_header.e_ehsize != std::mem::size_of::() as u16 + || file_header.e_phentsize != std::mem::size_of::() as u16 + || file_header.e_shentsize != std::mem::size_of::() as u16 + || file_header.e_shstrndx >= file_header.e_shnum + { + return Err(ElfParserError::InvalidFileHeader); + } + + let program_header_table_range = file_header.e_phoff as usize + ..std::mem::size_of::() + .saturating_mul(file_header.e_phnum as usize) + .saturating_add(file_header.e_phoff as usize); + check_that_there_is_no_overlap(&file_header_range, &program_header_table_range)?; + let program_header_table_bytes = elf_bytes + .get(program_header_table_range.clone()) + .ok_or(ElfParserError::OutOfBounds)?; + let program_header_table = unsafe { + std::slice::from_raw_parts::( + program_header_table_bytes.as_ptr() as *const Elf64Phdr, + file_header.e_phnum as usize, + ) + }; + + let section_header_table_range = file_header.e_shoff as usize + ..std::mem::size_of::() + .saturating_mul(file_header.e_shnum as usize) + .saturating_add(file_header.e_shoff as usize); + check_that_there_is_no_overlap(&file_header_range, §ion_header_table_range)?; + check_that_there_is_no_overlap(&program_header_table_range, §ion_header_table_range)?; + let section_header_table_bytes = elf_bytes + .get(section_header_table_range.clone()) + .ok_or(ElfParserError::OutOfBounds)?; + let section_header_table = unsafe { + std::slice::from_raw_parts::( + section_header_table_bytes.as_ptr() as *const Elf64Shdr, + file_header.e_shnum as usize, + ) + }; + + for program_header in program_header_table.iter() { + if program_header.p_type != PT_LOAD { + continue; + } + let program_range = program_header.p_offset as usize + ..(program_header.p_offset as usize) + .saturating_add(program_header.p_filesz as usize); + check_that_there_is_no_overlap(&program_range, &file_header_range)?; + check_that_there_is_no_overlap(&program_range, &program_header_table_range)?; + check_that_there_is_no_overlap(&program_range, §ion_header_table_range)?; + if program_range.end >= elf_bytes.len() { + return Err(ElfParserError::OutOfBounds); + } + } + + let mut offset = 0usize; + for section_header in section_header_table.iter() { + if section_header.sh_type == SHT_NOBITS { + continue; + } + let section_range = section_header.sh_offset as usize + ..(section_header.sh_offset as usize) + .saturating_add(section_header.sh_size as usize); + check_that_there_is_no_overlap(§ion_range, &file_header_range)?; + check_that_there_is_no_overlap(§ion_range, &program_header_table_range)?; + check_that_there_is_no_overlap(§ion_range, §ion_header_table_range)?; + if section_range.start < offset { + return Err(ElfParserError::SectionNotInOrder); + } + if section_range.end >= elf_bytes.len() { + return Err(ElfParserError::OutOfBounds); + } + offset = section_range.end; + } + + Ok(Self { + elf_bytes, + file_header, + program_header_table, + section_header_table, + text_section_header: None, + readonly_data_section_header: None, + symbol_section_header: None, + symbol_names_section_header: None, + section_names_section_header: None, + }) + } + + /// Check that the platform supports the layout and configuration + pub fn check_platform_specific(&mut self) -> Result<(), ElfParserError> { + if self.file_header.e_type != ET_EXEC + || self.file_header.e_machine != 0xF7 + || self.file_header.e_ident.ei_osabi != 0x00 + || self.file_header.e_ident.ei_abiversion != 0x00 + || self.program_header_table.len() != EXPECTED_PROGRAM_HEADERS.len() + || self.section_header_table.len() > SECTION_COUNT_MAXIMUM + { + return Err(ElfParserError::InvalidFileHeader); + } + for (program_header, (p_type, p_flags, addr)) in self + .program_header_table + .iter() + .zip(EXPECTED_PROGRAM_HEADERS.iter()) + { + if program_header.p_type != *p_type + || program_header.p_flags != *p_flags + || program_header.p_vaddr != *addr + || program_header.p_paddr != *addr + || program_header.p_memsz >= 0x100000000 + { + return Err(ElfParserError::InvalidProgramHeader); + } + } + let program_header = self + .program_header_table + .get(0) + .ok_or(ElfParserError::OutOfBounds)?; + let program_range = program_header.p_vaddr + ..program_header + .p_vaddr + .saturating_add(program_header.p_filesz); + if !program_range.contains(&self.file_header.e_entry) + || (self.file_header.e_entry as usize) + .checked_rem(ebpf::INSN_SIZE) + .map(|remainder| remainder != 0) + .unwrap_or(true) + { + return Err(ElfParserError::InvalidSectionHeader); + } + let section_names_section_header = &self + .section_header_table + .get(self.file_header.e_shstrndx as usize) + .ok_or(ElfParserError::OutOfBounds)?; + macro_rules! section_header_by_name { + ($self:expr, $section_header:expr, $section_name:expr, + $($name:literal => $field:ident,)*) => { + match $section_name { + $($name => { + if $self.$field.is_some() { + return Err(ElfParserError::InvalidSectionHeader); + } + $self.$field = Some($section_header); + })* + _ => {} + } + } + } + for section_header in self.section_header_table.iter() { + let section_name = self.get_string_in_section( + section_names_section_header, + section_header.sh_name, + SECTION_NAME_LENGTH_MAXIMUM, + )?; + section_header_by_name!( + self, section_header, section_name, + ".text" => text_section_header, + ".rodata" => readonly_data_section_header, + ".symtab" => symbol_section_header, + ".strtab" => symbol_names_section_header, + ) + } + self.section_names_section_header = Some(section_names_section_header); + Ok(()) + } + + /// Query a single string from a section which is marked as SHT_STRTAB + pub fn get_string_in_section( + &self, + section_header: &Elf64Shdr, + offset_in_section: Elf64Word, + maximum_length: usize, + ) -> Result<&'a str, ElfParserError> { + if section_header.sh_type != SHT_STRTAB { + return Err(ElfParserError::InvalidSectionHeader); + } + let offset_in_file = + (section_header.sh_offset as usize).saturating_add(offset_in_section as usize); + let string_range = offset_in_file + ..(section_header.sh_offset as usize) + .saturating_add(section_header.sh_size as usize) + .min(offset_in_file.saturating_add(maximum_length)); + let unterminated_string_bytes = self + .elf_bytes + .get(string_range) + .ok_or(ElfParserError::OutOfBounds)?; + unterminated_string_bytes + .iter() + .position(|byte| *byte == 0x00) + .and_then(|string_length| unterminated_string_bytes.get(0..string_length)) + .and_then(|string_bytes| std::str::from_utf8(string_bytes).ok()) + .ok_or(ElfParserError::InvalidString) + } + + /// Returns the symbol table of a section which is marked as SHT_SYMTAB + pub fn get_symbol_table_of_section( + &self, + section_header: &Elf64Shdr, + ) -> Result<&'a [Elf64Sym], ElfParserError> { + if section_header.sh_type != SHT_SYMTAB + || (section_header.sh_size as usize) + .checked_rem(std::mem::size_of::()) + .map(|remainder| remainder != 0) + .unwrap_or(true) + { + return Err(ElfParserError::InvalidSectionHeader); + } + let symbol_table_range = section_header.sh_offset as usize + ..(section_header.sh_offset as usize).saturating_add(section_header.sh_size as usize); + let symbol_table_bytes = self + .elf_bytes + .get(symbol_table_range) + .ok_or(ElfParserError::OutOfBounds)?; + let symbol_table = unsafe { + std::slice::from_raw_parts::( + symbol_table_bytes.as_ptr() as *const Elf64Sym, + (section_header.sh_size as usize) + .checked_div(std::mem::size_of::()) + .unwrap_or(0), + ) + }; + Ok(symbol_table) + } +} + +impl<'a> std::fmt::Debug for Elf64<'a> { + fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { + writeln!(f, "{:#X?}", self.file_header)?; + for program_header in self.program_header_table.iter() { + writeln!(f, "{:#X?}", program_header)?; + } + for section_header in self.section_header_table.iter() { + let section_name = self + .get_string_in_section( + self.section_names_section_header.unwrap(), + section_header.sh_name, + SECTION_NAME_LENGTH_MAXIMUM, + ) + .unwrap(); + writeln!(f, "{}", section_name)?; + writeln!(f, "{:#X?}", section_header)?; + } + if let Some(section_header) = self.symbol_section_header { + let symbol_table = self.get_symbol_table_of_section(section_header).unwrap(); + writeln!(f, "{:#X?}", symbol_table)?; + for symbol in symbol_table.iter() { + if symbol.st_name != 0 { + let symbol_name = self + .get_string_in_section( + self.symbol_names_section_header.unwrap(), + symbol.st_name, + SYMBOL_NAME_LENGTH_MAXIMUM, + ) + .unwrap(); + writeln!(f, "{}", symbol_name)?; + } + } + } + Ok(()) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_loading_static_executable() { + let elf_bytes = std::fs::read("tests/elfs/static.elf").unwrap(); + let mut parsed_elf = Elf64::from(&elf_bytes).unwrap(); + parsed_elf.check_platform_specific().unwrap(); + println!("{:?}", parsed_elf); + } +} diff --git a/src/elf_parser/types.rs b/src/elf_parser/types.rs new file mode 100644 index 000000000..e1f2344e8 --- /dev/null +++ b/src/elf_parser/types.rs @@ -0,0 +1,78 @@ +#![allow(missing_docs)] + +pub type Elf64Half = u16; +pub type Elf64Word = u32; +pub type Elf64Xword = u64; +pub type Elf64Addr = u64; +pub type Elf64Off = u64; +pub type Elf64Section = u16; + +#[derive(Debug)] +#[repr(C)] +pub struct ElfIdent { + pub ei_mag: [u8; 4], + pub ei_class: u8, + pub ei_data: u8, + pub ei_version: u8, + pub ei_osabi: u8, + pub ei_abiversion: u8, + pub ei_pad: [u8; 7], +} + +#[derive(Debug)] +#[repr(C)] +pub struct Elf64Ehdr { + pub e_ident: ElfIdent, + pub e_type: Elf64Half, + pub e_machine: Elf64Half, + pub e_version: Elf64Word, + pub e_entry: Elf64Addr, + pub e_phoff: Elf64Off, + pub e_shoff: Elf64Off, + pub e_flags: Elf64Word, + pub e_ehsize: Elf64Half, + pub e_phentsize: Elf64Half, + pub e_phnum: Elf64Half, + pub e_shentsize: Elf64Half, + pub e_shnum: Elf64Half, + pub e_shstrndx: Elf64Half, +} + +#[derive(Debug)] +#[repr(C)] +pub struct Elf64Phdr { + pub p_type: Elf64Word, + pub p_flags: Elf64Word, + pub p_offset: Elf64Off, + pub p_vaddr: Elf64Addr, + pub p_paddr: Elf64Addr, + pub p_filesz: Elf64Xword, + pub p_memsz: Elf64Xword, + pub p_align: Elf64Xword, +} + +#[derive(Debug)] +#[repr(C)] +pub struct Elf64Shdr { + pub sh_name: Elf64Word, + pub sh_type: Elf64Word, + pub sh_flags: Elf64Xword, + pub sh_addr: Elf64Addr, + pub sh_offset: Elf64Off, + pub sh_size: Elf64Xword, + pub sh_link: Elf64Word, + pub sh_info: Elf64Word, + pub sh_addralign: Elf64Xword, + pub sh_entsize: Elf64Xword, +} + +#[derive(Debug)] +#[repr(C)] +pub struct Elf64Sym { + pub st_name: Elf64Word, + pub st_info: u8, + pub st_other: u8, + pub st_shndx: Elf64Section, + pub st_value: Elf64Addr, + pub st_size: Elf64Xword, +} diff --git a/src/lib.rs b/src/lib.rs index 677c678c8..1936f9654 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -30,6 +30,7 @@ pub mod call_frames; pub mod disassembler; pub mod ebpf; pub mod elf; +pub mod elf_parser; pub mod error; pub mod fuzz; pub mod insn_builder; diff --git a/tests/elfs/static.elf b/tests/elfs/static.elf new file mode 100755 index 000000000..9aed92034 Binary files /dev/null and b/tests/elfs/static.elf differ diff --git a/tests/elfs/static.ld b/tests/elfs/static.ld new file mode 100644 index 000000000..e9524589b --- /dev/null +++ b/tests/elfs/static.ld @@ -0,0 +1,28 @@ +PHDRS { + /*header PT_PHDR FILEHDR PHDRS;*/ + prog PT_LOAD FLAGS(5); + stack PT_GNU_STACK FLAGS(6); + heap PT_NULL FLAGS(6); +} + +SECTIONS { + .text 0x100000000 : { + *(.text*) + } :prog + .rodata : { + *(.rodata*) + } :prog + .stack 0x200000000 (NOLOAD) : { + . = .; + } :stack + .heap 0x300000000 (NOLOAD) : { + *(.bss*) + } :heap + /DISCARD/ : { + *(.data*) + *(*.hash) + *(.eh_frame) + *(.dynamic) + *(*.dyn*) + } +}